sched: Rewrite tg_shares_up)
By tracking a per-cpu load-avg for each cfs_rq and folding it into a global task_group load on each tick we can rework tg_shares_up to be strictly per-cpu. This should improve cpu-cgroup performance for smp systems significantly. [ Paul: changed to use queueing cfs_rq + bug fixes ] Signed-off-by: Paul Turner <pjt@google.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> LKML-Reference: <20101115234937.580480400@google.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
parent
48c5ccae88
commit
2069dd75c7
|
@ -1885,8 +1885,6 @@ static inline void wake_up_idle_cpu(int cpu) { }
|
|||
extern unsigned int sysctl_sched_latency;
|
||||
extern unsigned int sysctl_sched_min_granularity;
|
||||
extern unsigned int sysctl_sched_wakeup_granularity;
|
||||
extern unsigned int sysctl_sched_shares_ratelimit;
|
||||
extern unsigned int sysctl_sched_shares_thresh;
|
||||
extern unsigned int sysctl_sched_child_runs_first;
|
||||
|
||||
enum sched_tunable_scaling {
|
||||
|
|
173
kernel/sched.c
173
kernel/sched.c
|
@ -253,6 +253,8 @@ struct task_group {
|
|||
/* runqueue "owned" by this group on each cpu */
|
||||
struct cfs_rq **cfs_rq;
|
||||
unsigned long shares;
|
||||
|
||||
atomic_t load_weight;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_RT_GROUP_SCHED
|
||||
|
@ -359,15 +361,11 @@ struct cfs_rq {
|
|||
*/
|
||||
unsigned long h_load;
|
||||
|
||||
/*
|
||||
* this cpu's part of tg->shares
|
||||
*/
|
||||
unsigned long shares;
|
||||
u64 load_avg;
|
||||
u64 load_period;
|
||||
u64 load_stamp;
|
||||
|
||||
/*
|
||||
* load.weight at the time we set shares
|
||||
*/
|
||||
unsigned long rq_weight;
|
||||
unsigned long load_contribution;
|
||||
#endif
|
||||
#endif
|
||||
};
|
||||
|
@ -806,20 +804,6 @@ late_initcall(sched_init_debug);
|
|||
*/
|
||||
const_debug unsigned int sysctl_sched_nr_migrate = 32;
|
||||
|
||||
/*
|
||||
* ratelimit for updating the group shares.
|
||||
* default: 0.25ms
|
||||
*/
|
||||
unsigned int sysctl_sched_shares_ratelimit = 250000;
|
||||
unsigned int normalized_sysctl_sched_shares_ratelimit = 250000;
|
||||
|
||||
/*
|
||||
* Inject some fuzzyness into changing the per-cpu group shares
|
||||
* this avoids remote rq-locks at the expense of fairness.
|
||||
* default: 4
|
||||
*/
|
||||
unsigned int sysctl_sched_shares_thresh = 4;
|
||||
|
||||
/*
|
||||
* period over which we average the RT time consumption, measured
|
||||
* in ms.
|
||||
|
@ -1369,6 +1353,12 @@ static inline void update_load_sub(struct load_weight *lw, unsigned long dec)
|
|||
lw->inv_weight = 0;
|
||||
}
|
||||
|
||||
static inline void update_load_set(struct load_weight *lw, unsigned long w)
|
||||
{
|
||||
lw->weight = w;
|
||||
lw->inv_weight = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* To aid in avoiding the subversion of "niceness" due to uneven distribution
|
||||
* of tasks with abnormal "nice" values across CPUs the contribution that
|
||||
|
@ -1557,97 +1547,44 @@ static unsigned long cpu_avg_load_per_task(int cpu)
|
|||
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
|
||||
static __read_mostly unsigned long __percpu *update_shares_data;
|
||||
|
||||
static void __set_se_shares(struct sched_entity *se, unsigned long shares);
|
||||
static void update_cfs_load(struct cfs_rq *cfs_rq);
|
||||
static void update_cfs_shares(struct cfs_rq *cfs_rq);
|
||||
|
||||
/*
|
||||
* Calculate and set the cpu's group shares.
|
||||
*/
|
||||
static void update_group_shares_cpu(struct task_group *tg, int cpu,
|
||||
unsigned long sd_shares,
|
||||
unsigned long sd_rq_weight,
|
||||
unsigned long *usd_rq_weight)
|
||||
{
|
||||
unsigned long shares, rq_weight;
|
||||
int boost = 0;
|
||||
|
||||
rq_weight = usd_rq_weight[cpu];
|
||||
if (!rq_weight) {
|
||||
boost = 1;
|
||||
rq_weight = NICE_0_LOAD;
|
||||
}
|
||||
|
||||
/*
|
||||
* \Sum_j shares_j * rq_weight_i
|
||||
* shares_i = -----------------------------
|
||||
* \Sum_j rq_weight_j
|
||||
*/
|
||||
shares = (sd_shares * rq_weight) / sd_rq_weight;
|
||||
shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES);
|
||||
|
||||
if (abs(shares - tg->se[cpu]->load.weight) >
|
||||
sysctl_sched_shares_thresh) {
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
unsigned long flags;
|
||||
|
||||
raw_spin_lock_irqsave(&rq->lock, flags);
|
||||
tg->cfs_rq[cpu]->rq_weight = boost ? 0 : rq_weight;
|
||||
tg->cfs_rq[cpu]->shares = boost ? 0 : shares;
|
||||
__set_se_shares(tg->se[cpu], shares);
|
||||
raw_spin_unlock_irqrestore(&rq->lock, flags);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Re-compute the task group their per cpu shares over the given domain.
|
||||
* This needs to be done in a bottom-up fashion because the rq weight of a
|
||||
* parent group depends on the shares of its child groups.
|
||||
* update tg->load_weight by folding this cpu's load_avg
|
||||
*/
|
||||
static int tg_shares_up(struct task_group *tg, void *data)
|
||||
{
|
||||
unsigned long weight, rq_weight = 0, sum_weight = 0, shares = 0;
|
||||
unsigned long *usd_rq_weight;
|
||||
struct sched_domain *sd = data;
|
||||
long load_avg;
|
||||
struct cfs_rq *cfs_rq;
|
||||
unsigned long flags;
|
||||
int i;
|
||||
int cpu = (long)data;
|
||||
struct rq *rq;
|
||||
|
||||
if (!tg->se[0])
|
||||
if (!tg->se[cpu])
|
||||
return 0;
|
||||
|
||||
local_irq_save(flags);
|
||||
usd_rq_weight = per_cpu_ptr(update_shares_data, smp_processor_id());
|
||||
rq = cpu_rq(cpu);
|
||||
cfs_rq = tg->cfs_rq[cpu];
|
||||
|
||||
for_each_cpu(i, sched_domain_span(sd)) {
|
||||
weight = tg->cfs_rq[i]->load.weight;
|
||||
usd_rq_weight[i] = weight;
|
||||
raw_spin_lock_irqsave(&rq->lock, flags);
|
||||
|
||||
rq_weight += weight;
|
||||
/*
|
||||
* If there are currently no tasks on the cpu pretend there
|
||||
* is one of average load so that when a new task gets to
|
||||
* run here it will not get delayed by group starvation.
|
||||
*/
|
||||
if (!weight)
|
||||
weight = NICE_0_LOAD;
|
||||
update_rq_clock(rq);
|
||||
update_cfs_load(cfs_rq);
|
||||
|
||||
sum_weight += weight;
|
||||
shares += tg->cfs_rq[i]->shares;
|
||||
}
|
||||
load_avg = div64_u64(cfs_rq->load_avg, cfs_rq->load_period+1);
|
||||
load_avg -= cfs_rq->load_contribution;
|
||||
|
||||
if (!rq_weight)
|
||||
rq_weight = sum_weight;
|
||||
atomic_add(load_avg, &tg->load_weight);
|
||||
cfs_rq->load_contribution += load_avg;
|
||||
|
||||
if ((!shares && rq_weight) || shares > tg->shares)
|
||||
shares = tg->shares;
|
||||
/*
|
||||
* We need to update shares after updating tg->load_weight in
|
||||
* order to adjust the weight of groups with long running tasks.
|
||||
*/
|
||||
update_cfs_shares(cfs_rq);
|
||||
|
||||
if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE))
|
||||
shares = tg->shares;
|
||||
|
||||
for_each_cpu(i, sched_domain_span(sd))
|
||||
update_group_shares_cpu(tg, i, shares, rq_weight, usd_rq_weight);
|
||||
|
||||
local_irq_restore(flags);
|
||||
raw_spin_unlock_irqrestore(&rq->lock, flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -1666,7 +1603,7 @@ static int tg_load_down(struct task_group *tg, void *data)
|
|||
load = cpu_rq(cpu)->load.weight;
|
||||
} else {
|
||||
load = tg->parent->cfs_rq[cpu]->h_load;
|
||||
load *= tg->cfs_rq[cpu]->shares;
|
||||
load *= tg->se[cpu]->load.weight;
|
||||
load /= tg->parent->cfs_rq[cpu]->load.weight + 1;
|
||||
}
|
||||
|
||||
|
@ -1675,21 +1612,16 @@ static int tg_load_down(struct task_group *tg, void *data)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void update_shares(struct sched_domain *sd)
|
||||
static void update_shares(long cpu)
|
||||
{
|
||||
s64 elapsed;
|
||||
u64 now;
|
||||
|
||||
if (root_task_group_empty())
|
||||
return;
|
||||
|
||||
now = local_clock();
|
||||
elapsed = now - sd->last_update;
|
||||
/*
|
||||
* XXX: replace with an on-demand list
|
||||
*/
|
||||
|
||||
if (elapsed >= (s64)(u64)sysctl_sched_shares_ratelimit) {
|
||||
sd->last_update = now;
|
||||
walk_tg_tree(tg_nop, tg_shares_up, sd);
|
||||
}
|
||||
walk_tg_tree(tg_nop, tg_shares_up, (void *)cpu);
|
||||
}
|
||||
|
||||
static void update_h_load(long cpu)
|
||||
|
@ -1699,7 +1631,7 @@ static void update_h_load(long cpu)
|
|||
|
||||
#else
|
||||
|
||||
static inline void update_shares(struct sched_domain *sd)
|
||||
static inline void update_shares(int cpu)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -1824,15 +1756,6 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2)
|
|||
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares)
|
||||
{
|
||||
#ifdef CONFIG_SMP
|
||||
cfs_rq->shares = shares;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
static void calc_load_account_idle(struct rq *this_rq);
|
||||
static void update_sysctl(void);
|
||||
static int get_update_sysctl_factor(void);
|
||||
|
@ -5551,7 +5474,6 @@ static void update_sysctl(void)
|
|||
SET_SYSCTL(sched_min_granularity);
|
||||
SET_SYSCTL(sched_latency);
|
||||
SET_SYSCTL(sched_wakeup_granularity);
|
||||
SET_SYSCTL(sched_shares_ratelimit);
|
||||
#undef SET_SYSCTL
|
||||
}
|
||||
|
||||
|
@ -7787,8 +7709,7 @@ static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
|
|||
se->cfs_rq = parent->my_q;
|
||||
|
||||
se->my_q = cfs_rq;
|
||||
se->load.weight = tg->shares;
|
||||
se->load.inv_weight = 0;
|
||||
update_load_set(&se->load, tg->shares);
|
||||
se->parent = parent;
|
||||
}
|
||||
#endif
|
||||
|
@ -7881,10 +7802,6 @@ void __init sched_init(void)
|
|||
|
||||
#endif /* CONFIG_CGROUP_SCHED */
|
||||
|
||||
#if defined CONFIG_FAIR_GROUP_SCHED && defined CONFIG_SMP
|
||||
update_shares_data = __alloc_percpu(nr_cpu_ids * sizeof(unsigned long),
|
||||
__alignof__(unsigned long));
|
||||
#endif
|
||||
for_each_possible_cpu(i) {
|
||||
struct rq *rq;
|
||||
|
||||
|
@ -8452,8 +8369,7 @@ static void __set_se_shares(struct sched_entity *se, unsigned long shares)
|
|||
if (on_rq)
|
||||
dequeue_entity(cfs_rq, se, 0);
|
||||
|
||||
se->load.weight = shares;
|
||||
se->load.inv_weight = 0;
|
||||
update_load_set(&se->load, shares);
|
||||
|
||||
if (on_rq)
|
||||
enqueue_entity(cfs_rq, se, 0);
|
||||
|
@ -8510,7 +8426,6 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
|
|||
/*
|
||||
* force a rebalance
|
||||
*/
|
||||
cfs_rq_set_shares(tg->cfs_rq[i], 0);
|
||||
set_se_shares(tg->se[i], shares);
|
||||
}
|
||||
|
||||
|
|
|
@ -202,15 +202,22 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
|
|||
spread0 = min_vruntime - rq0_min_vruntime;
|
||||
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "spread0",
|
||||
SPLIT_NS(spread0));
|
||||
SEQ_printf(m, " .%-30s: %ld\n", "nr_running", cfs_rq->nr_running);
|
||||
SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight);
|
||||
|
||||
SEQ_printf(m, " .%-30s: %d\n", "nr_spread_over",
|
||||
cfs_rq->nr_spread_over);
|
||||
SEQ_printf(m, " .%-30s: %ld\n", "nr_running", cfs_rq->nr_running);
|
||||
SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight);
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
#ifdef CONFIG_SMP
|
||||
SEQ_printf(m, " .%-30s: %lu\n", "shares", cfs_rq->shares);
|
||||
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "load_avg",
|
||||
SPLIT_NS(cfs_rq->load_avg));
|
||||
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "load_period",
|
||||
SPLIT_NS(cfs_rq->load_period));
|
||||
SEQ_printf(m, " .%-30s: %ld\n", "load_contrib",
|
||||
cfs_rq->load_contribution);
|
||||
SEQ_printf(m, " .%-30s: %d\n", "load_tg",
|
||||
atomic_read(&tg->load_weight));
|
||||
#endif
|
||||
|
||||
print_cfs_group_stats(m, cpu, cfs_rq->tg);
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -417,7 +417,6 @@ int sched_proc_update_handler(struct ctl_table *table, int write,
|
|||
WRT_SYSCTL(sched_min_granularity);
|
||||
WRT_SYSCTL(sched_latency);
|
||||
WRT_SYSCTL(sched_wakeup_granularity);
|
||||
WRT_SYSCTL(sched_shares_ratelimit);
|
||||
#undef WRT_SYSCTL
|
||||
|
||||
return 0;
|
||||
|
@ -633,7 +632,6 @@ account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
|||
list_add(&se->group_node, &cfs_rq->tasks);
|
||||
}
|
||||
cfs_rq->nr_running++;
|
||||
se->on_rq = 1;
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -647,9 +645,89 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
|||
list_del_init(&se->group_node);
|
||||
}
|
||||
cfs_rq->nr_running--;
|
||||
se->on_rq = 0;
|
||||
}
|
||||
|
||||
#if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED
|
||||
static void update_cfs_load(struct cfs_rq *cfs_rq)
|
||||
{
|
||||
u64 period = sched_avg_period();
|
||||
u64 now, delta;
|
||||
|
||||
if (!cfs_rq)
|
||||
return;
|
||||
|
||||
now = rq_of(cfs_rq)->clock;
|
||||
delta = now - cfs_rq->load_stamp;
|
||||
|
||||
cfs_rq->load_stamp = now;
|
||||
cfs_rq->load_period += delta;
|
||||
cfs_rq->load_avg += delta * cfs_rq->load.weight;
|
||||
|
||||
while (cfs_rq->load_period > period) {
|
||||
/*
|
||||
* Inline assembly required to prevent the compiler
|
||||
* optimising this loop into a divmod call.
|
||||
* See __iter_div_u64_rem() for another example of this.
|
||||
*/
|
||||
asm("" : "+rm" (cfs_rq->load_period));
|
||||
cfs_rq->load_period /= 2;
|
||||
cfs_rq->load_avg /= 2;
|
||||
}
|
||||
}
|
||||
|
||||
static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
|
||||
unsigned long weight)
|
||||
{
|
||||
if (se->on_rq)
|
||||
account_entity_dequeue(cfs_rq, se);
|
||||
|
||||
update_load_set(&se->load, weight);
|
||||
|
||||
if (se->on_rq)
|
||||
account_entity_enqueue(cfs_rq, se);
|
||||
}
|
||||
|
||||
static void update_cfs_shares(struct cfs_rq *cfs_rq)
|
||||
{
|
||||
struct task_group *tg;
|
||||
struct sched_entity *se;
|
||||
long load_weight, load, shares;
|
||||
|
||||
if (!cfs_rq)
|
||||
return;
|
||||
|
||||
tg = cfs_rq->tg;
|
||||
se = tg->se[cpu_of(rq_of(cfs_rq))];
|
||||
if (!se)
|
||||
return;
|
||||
|
||||
load = cfs_rq->load.weight;
|
||||
|
||||
load_weight = atomic_read(&tg->load_weight);
|
||||
load_weight -= cfs_rq->load_contribution;
|
||||
load_weight += load;
|
||||
|
||||
shares = (tg->shares * load);
|
||||
if (load_weight)
|
||||
shares /= load_weight;
|
||||
|
||||
if (shares < MIN_SHARES)
|
||||
shares = MIN_SHARES;
|
||||
if (shares > tg->shares)
|
||||
shares = tg->shares;
|
||||
|
||||
reweight_entity(cfs_rq_of(se), se, shares);
|
||||
}
|
||||
#else /* CONFIG_FAIR_GROUP_SCHED */
|
||||
static inline void update_cfs_load(struct cfs_rq *cfs_rq)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void update_cfs_shares(struct cfs_rq *cfs_rq)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_FAIR_GROUP_SCHED */
|
||||
|
||||
static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
{
|
||||
#ifdef CONFIG_SCHEDSTATS
|
||||
|
@ -771,7 +849,9 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
|||
* Update run-time statistics of the 'current'.
|
||||
*/
|
||||
update_curr(cfs_rq);
|
||||
update_cfs_load(cfs_rq);
|
||||
account_entity_enqueue(cfs_rq, se);
|
||||
update_cfs_shares(cfs_rq);
|
||||
|
||||
if (flags & ENQUEUE_WAKEUP) {
|
||||
place_entity(cfs_rq, se, 0);
|
||||
|
@ -782,6 +862,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
|||
check_spread(cfs_rq, se);
|
||||
if (se != cfs_rq->curr)
|
||||
__enqueue_entity(cfs_rq, se);
|
||||
se->on_rq = 1;
|
||||
}
|
||||
|
||||
static void __clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
|
@ -825,8 +906,11 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
|||
|
||||
if (se != cfs_rq->curr)
|
||||
__dequeue_entity(cfs_rq, se);
|
||||
se->on_rq = 0;
|
||||
update_cfs_load(cfs_rq);
|
||||
account_entity_dequeue(cfs_rq, se);
|
||||
update_min_vruntime(cfs_rq);
|
||||
update_cfs_shares(cfs_rq);
|
||||
|
||||
/*
|
||||
* Normalize the entity after updating the min_vruntime because the
|
||||
|
@ -1055,6 +1139,13 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
|
|||
flags = ENQUEUE_WAKEUP;
|
||||
}
|
||||
|
||||
for_each_sched_entity(se) {
|
||||
struct cfs_rq *cfs_rq = cfs_rq_of(se);
|
||||
|
||||
update_cfs_load(cfs_rq);
|
||||
update_cfs_shares(cfs_rq);
|
||||
}
|
||||
|
||||
hrtick_update(rq);
|
||||
}
|
||||
|
||||
|
@ -1071,12 +1162,20 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
|
|||
for_each_sched_entity(se) {
|
||||
cfs_rq = cfs_rq_of(se);
|
||||
dequeue_entity(cfs_rq, se, flags);
|
||||
|
||||
/* Don't dequeue parent if it has other entities besides us */
|
||||
if (cfs_rq->load.weight)
|
||||
break;
|
||||
flags |= DEQUEUE_SLEEP;
|
||||
}
|
||||
|
||||
for_each_sched_entity(se) {
|
||||
struct cfs_rq *cfs_rq = cfs_rq_of(se);
|
||||
|
||||
update_cfs_load(cfs_rq);
|
||||
update_cfs_shares(cfs_rq);
|
||||
}
|
||||
|
||||
hrtick_update(rq);
|
||||
}
|
||||
|
||||
|
@ -1143,51 +1242,20 @@ static void task_waking_fair(struct rq *rq, struct task_struct *p)
|
|||
* Adding load to a group doesn't make a group heavier, but can cause movement
|
||||
* of group shares between cpus. Assuming the shares were perfectly aligned one
|
||||
* can calculate the shift in shares.
|
||||
*
|
||||
* The problem is that perfectly aligning the shares is rather expensive, hence
|
||||
* we try to avoid doing that too often - see update_shares(), which ratelimits
|
||||
* this change.
|
||||
*
|
||||
* We compensate this by not only taking the current delta into account, but
|
||||
* also considering the delta between when the shares were last adjusted and
|
||||
* now.
|
||||
*
|
||||
* We still saw a performance dip, some tracing learned us that between
|
||||
* cgroup:/ and cgroup:/foo balancing the number of affine wakeups increased
|
||||
* significantly. Therefore try to bias the error in direction of failing
|
||||
* the affine wakeup.
|
||||
*
|
||||
*/
|
||||
static long effective_load(struct task_group *tg, int cpu,
|
||||
long wl, long wg)
|
||||
static long effective_load(struct task_group *tg, int cpu, long wl, long wg)
|
||||
{
|
||||
struct sched_entity *se = tg->se[cpu];
|
||||
|
||||
if (!tg->parent)
|
||||
return wl;
|
||||
|
||||
/*
|
||||
* By not taking the decrease of shares on the other cpu into
|
||||
* account our error leans towards reducing the affine wakeups.
|
||||
*/
|
||||
if (!wl && sched_feat(ASYM_EFF_LOAD))
|
||||
return wl;
|
||||
|
||||
for_each_sched_entity(se) {
|
||||
long S, rw, s, a, b;
|
||||
long more_w;
|
||||
|
||||
/*
|
||||
* Instead of using this increment, also add the difference
|
||||
* between when the shares were last updated and now.
|
||||
*/
|
||||
more_w = se->my_q->load.weight - se->my_q->rq_weight;
|
||||
wl += more_w;
|
||||
wg += more_w;
|
||||
|
||||
S = se->my_q->tg->shares;
|
||||
s = se->my_q->shares;
|
||||
rw = se->my_q->rq_weight;
|
||||
s = se->load.weight;
|
||||
rw = se->my_q->load.weight;
|
||||
|
||||
a = S*(rw + wl);
|
||||
b = S*rw + s*wg;
|
||||
|
@ -1508,23 +1576,6 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_
|
|||
sd = tmp;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
if (sched_feat(LB_SHARES_UPDATE)) {
|
||||
/*
|
||||
* Pick the largest domain to update shares over
|
||||
*/
|
||||
tmp = sd;
|
||||
if (affine_sd && (!tmp || affine_sd->span_weight > sd->span_weight))
|
||||
tmp = affine_sd;
|
||||
|
||||
if (tmp) {
|
||||
raw_spin_unlock(&rq->lock);
|
||||
update_shares(tmp);
|
||||
raw_spin_lock(&rq->lock);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (affine_sd) {
|
||||
if (cpu == prev_cpu || wake_affine(affine_sd, p, sync))
|
||||
return select_idle_sibling(p, cpu);
|
||||
|
@ -3014,7 +3065,6 @@ static int load_balance(int this_cpu, struct rq *this_rq,
|
|||
schedstat_inc(sd, lb_count[idle]);
|
||||
|
||||
redo:
|
||||
update_shares(sd);
|
||||
group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle,
|
||||
cpus, balance);
|
||||
|
||||
|
@ -3156,8 +3206,6 @@ static int load_balance(int this_cpu, struct rq *this_rq,
|
|||
else
|
||||
ld_moved = 0;
|
||||
out:
|
||||
if (ld_moved)
|
||||
update_shares(sd);
|
||||
return ld_moved;
|
||||
}
|
||||
|
||||
|
@ -3549,6 +3597,8 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
|
|||
int update_next_balance = 0;
|
||||
int need_serialize;
|
||||
|
||||
update_shares(cpu);
|
||||
|
||||
for_each_domain(cpu, sd) {
|
||||
if (!(sd->flags & SD_LOAD_BALANCE))
|
||||
continue;
|
||||
|
|
|
@ -52,8 +52,6 @@ SCHED_FEAT(ARCH_POWER, 0)
|
|||
SCHED_FEAT(HRTICK, 0)
|
||||
SCHED_FEAT(DOUBLE_TICK, 0)
|
||||
SCHED_FEAT(LB_BIAS, 1)
|
||||
SCHED_FEAT(LB_SHARES_UPDATE, 1)
|
||||
SCHED_FEAT(ASYM_EFF_LOAD, 1)
|
||||
|
||||
/*
|
||||
* Spin-wait on mutex acquisition when the mutex owner is running on
|
||||
|
|
|
@ -259,8 +259,6 @@ static int min_wakeup_granularity_ns; /* 0 usecs */
|
|||
static int max_wakeup_granularity_ns = NSEC_PER_SEC; /* 1 second */
|
||||
static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
|
||||
static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1;
|
||||
static int min_sched_shares_ratelimit = 100000; /* 100 usec */
|
||||
static int max_sched_shares_ratelimit = NSEC_PER_SEC; /* 1 second */
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_COMPACTION
|
||||
|
@ -304,15 +302,6 @@ static struct ctl_table kern_table[] = {
|
|||
.extra1 = &min_wakeup_granularity_ns,
|
||||
.extra2 = &max_wakeup_granularity_ns,
|
||||
},
|
||||
{
|
||||
.procname = "sched_shares_ratelimit",
|
||||
.data = &sysctl_sched_shares_ratelimit,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = sched_proc_update_handler,
|
||||
.extra1 = &min_sched_shares_ratelimit,
|
||||
.extra2 = &max_sched_shares_ratelimit,
|
||||
},
|
||||
{
|
||||
.procname = "sched_tunable_scaling",
|
||||
.data = &sysctl_sched_tunable_scaling,
|
||||
|
@ -322,14 +311,6 @@ static struct ctl_table kern_table[] = {
|
|||
.extra1 = &min_sched_tunable_scaling,
|
||||
.extra2 = &max_sched_tunable_scaling,
|
||||
},
|
||||
{
|
||||
.procname = "sched_shares_thresh",
|
||||
.data = &sysctl_sched_shares_thresh,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
.extra1 = &zero,
|
||||
},
|
||||
{
|
||||
.procname = "sched_migration_cost",
|
||||
.data = &sysctl_sched_migration_cost,
|
||||
|
|
Loading…
Reference in New Issue
Block a user