Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
openSUSE:Evergreen:11.1:kernel-2.6.32
xen
19293-vcpu-migration-delay.patch
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File 19293-vcpu-migration-delay.patch of Package xen
# HG changeset patch # User Keir Fraser <keir.fraser@citrix.com> # Date 1236591472 0 # Node ID 95e3cd67add21637b3580091bc016a16fa46fbf4 # Parent c4760d58ee6f859eda3b9650fa9bf556c53a1d92 Add vcpu_migration_delay=<microsecs> boot option to scheduler The idea is borrowed from Linux kernel: if the vCPU is just scheduled out and put to run-queue, it's likely cache-hot on its current pCPU, and it may be scheduled in in a short period of time; however, if vCPU is migrated to another pCPU, it need to re-warm the cache. The patch introduces an option vcpu_migration_delay to avoid aggressive vCPU migration (actually we really see migration frequency is very high most of the time.), while in the meantime keeping load balancing over slightly longer time scales. Linux kernel uses 0.5ms by default. Considering the cost may be higher (e.g. VMCS impact) than in native, vcpu_migration_delay=1000 is chosen for our tests, which are performed on a 4x 6-core Dunnington platform. In 24-VM case, there is ~2% stable performance gain for enterprise workloads like SPECjbb and sysbench. If HVM is with stubdom, the gain is more: 4% for the same workloads. Signed-off-by: Xiaowei Yang <xiaowei.yang@intel.com> Signed-off-by: Keir Fraser <keir.fraser@citrix.com> # HG changeset patch # User Keir Fraser <keir.fraser@citrix.com> # Date 1236766334 0 # Node ID cfacba42091c651290b025e6de7716ec860f448e # Parent 2cd96ef839966d18a977fa4e32bd823e8a63ca33 Improve vcpu_migration_delay handling. Signed-off-by: Xiaowei Yang <xiaowei.yang@intel.com> --- a/xen/common/schedule.c +++ b/xen/common/schedule.c @@ -785,6 +785,7 @@ static void schedule(void) (test_bit(_VPF_blocked, &prev->pause_flags) ? RUNSTATE_blocked : (vcpu_runnable(prev) ? RUNSTATE_runnable : RUNSTATE_offline)), now); + prev->last_run_time = now; ASSERT(next->runstate.state != RUNSTATE_running); vcpu_runstate_change(next, RUNSTATE_running, now); --- a/xen/common/sched_credit.c +++ b/xen/common/sched_credit.c @@ -123,7 +123,8 @@ _MACRO(dom_init) \ _MACRO(dom_destroy) \ _MACRO(vcpu_init) \ - _MACRO(vcpu_destroy) + _MACRO(vcpu_destroy) \ + _MACRO(vcpu_hot) #ifndef NDEBUG #define CSCHED_STATS_EXPAND_CHECKS(_MACRO) \ @@ -395,14 +396,37 @@ __csched_vcpu_check(struct vcpu *vc) #define CSCHED_VCPU_CHECK(_vc) #endif +/* + * Delay, in microseconds, between migrations of a VCPU between PCPUs. + * This prevents rapid fluttering of a VCPU between CPUs, and reduces the + * implicit overheads such as cache-warming. 1ms (1000) has been measured + * as a good value. + */ +static unsigned int vcpu_migration_delay; +integer_param("vcpu_migration_delay", vcpu_migration_delay); + +static inline int +__csched_vcpu_is_cache_hot(struct vcpu *v) +{ + int hot = ((NOW() - v->last_run_time) < + ((uint64_t)vcpu_migration_delay * 1000u)); + + if ( hot ) + CSCHED_STAT_CRANK(vcpu_hot); + + return hot; +} + static inline int __csched_vcpu_is_migrateable(struct vcpu *vc, int dest_cpu) { /* - * Don't pick up work that's in the peer's scheduling tail. Also only pick - * up work that's allowed to run on our CPU. + * Don't pick up work that's in the peer's scheduling tail or hot on + * peer PCPU. Only pick up work that's allowed to run on our CPU. */ - return !vc->is_running && cpu_isset(dest_cpu, vc->cpu_affinity); + return !vc->is_running && + !__csched_vcpu_is_cache_hot(vc) && + cpu_isset(dest_cpu, vc->cpu_affinity); } static int @@ -1297,7 +1321,8 @@ csched_dump(void) "\tmsecs per tick = %dms\n" "\tcredits per tick = %d\n" "\tticks per tslice = %d\n" - "\tticks per acct = %d\n", + "\tticks per acct = %d\n" + "\tmigration delay = %uus\n", csched_priv.ncpus, csched_priv.master, csched_priv.credit, @@ -1308,7 +1333,8 @@ csched_dump(void) CSCHED_MSECS_PER_TICK, CSCHED_CREDITS_PER_TICK, CSCHED_TICKS_PER_TSLICE, - CSCHED_TICKS_PER_ACCT); + CSCHED_TICKS_PER_ACCT, + vcpu_migration_delay); cpumask_scnprintf(idlers_buf, sizeof(idlers_buf), csched_priv.idlers); printk("idlers: %s\n", idlers_buf); --- a/xen/include/xen/sched.h +++ b/xen/include/xen/sched.h @@ -101,6 +101,9 @@ struct vcpu } runstate_guest; /* guest address */ #endif + /* last time when vCPU is scheduled out */ + uint64_t last_run_time; + /* Has the FPU been initialised? */ bool_t fpu_initialised; /* Has the FPU been used since it was last saved? */
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor