forked from luck/tmp_suning_uos_patched
Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar: "The main changes in this cycle are: - Make schedstats a runtime tunable (disabled by default) and optimize it via static keys. As most distributions enable CONFIG_SCHEDSTATS=y due to its instrumentation value, this is a nice performance enhancement. (Mel Gorman) - Implement 'simple waitqueues' (swait): these are just pure waitqueues without any of the more complex features of full-blown waitqueues (callbacks, wake flags, wake keys, etc.). Simple waitqueues have less memory overhead and are faster. Use simple waitqueues in the RCU code (in 4 different places) and for handling KVM vCPU wakeups. (Peter Zijlstra, Daniel Wagner, Thomas Gleixner, Paul Gortmaker, Marcelo Tosatti) - sched/numa enhancements (Rik van Riel) - NOHZ performance enhancements (Rik van Riel) - Various sched/deadline enhancements (Steven Rostedt) - Various fixes (Peter Zijlstra) - ... and a number of other fixes, cleanups and smaller enhancements" * 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (29 commits) sched/cputime: Fix steal_account_process_tick() to always return jiffies sched/deadline: Remove dl_new from struct sched_dl_entity Revert "kbuild: Add option to turn incompatible pointer check into error" sched/deadline: Remove superfluous call to switched_to_dl() sched/debug: Fix preempt_disable_ip recording for preempt_disable() sched, time: Switch VIRT_CPU_ACCOUNTING_GEN to jiffy granularity time, acct: Drop irq save & restore from __acct_update_integrals() acct, time: Change indentation in __acct_update_integrals() sched, time: Remove non-power-of-two divides from __acct_update_integrals() sched/rt: Kick RT bandwidth timer immediately on start up sched/debug: Add deadline scheduler bandwidth ratio to /proc/sched_debug sched/debug: Move sched_domain_sysctl to debug.c sched/debug: Move the /sys/kernel/debug/sched_features file setup into debug.c sched/rt: Fix PI handling vs. sched_setscheduler() sched/core: Remove duplicated sched_group_set_shares() prototype sched/fair: Consolidate nohz CPU load update code sched/fair: Avoid using decay_load_missed() with a negative value sched/deadline: Always calculate end of period on sched_yield() sched/cgroup: Fix cgroup entity load tracking tear-down rcu: Use simple wait queues where possible in rcutree ...
This commit is contained in:
commit
d4e796152a
|
@ -3532,6 +3532,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
|
|||
|
||||
sched_debug [KNL] Enables verbose scheduler debug messages.
|
||||
|
||||
schedstats= [KNL,X86] Enable or disable scheduled statistics.
|
||||
Allowed values are enable and disable. This feature
|
||||
incurs a small amount of overhead in the scheduler
|
||||
but is useful for debugging and performance tuning.
|
||||
|
||||
skew_tick= [KNL] Offset the periodic timer tick per cpu to mitigate
|
||||
xtime_lock contention on larger systems, and/or RCU lock
|
||||
contention on all systems with CONFIG_MAXSMP set.
|
||||
|
|
|
@ -773,6 +773,14 @@ rtsig-nr shows the number of RT signals currently queued.
|
|||
|
||||
==============================================================
|
||||
|
||||
sched_schedstats:
|
||||
|
||||
Enables/disables scheduler statistics. Enabling this feature
|
||||
incurs a small amount of overhead in the scheduler but is
|
||||
useful for debugging and performance tuning.
|
||||
|
||||
==============================================================
|
||||
|
||||
sg-big-buff:
|
||||
|
||||
This file shows the size of the generic SCSI (sg) buffer.
|
||||
|
|
|
@ -506,18 +506,18 @@ static void kvm_arm_resume_guest(struct kvm *kvm)
|
|||
struct kvm_vcpu *vcpu;
|
||||
|
||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||
wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu);
|
||||
struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu);
|
||||
|
||||
vcpu->arch.pause = false;
|
||||
wake_up_interruptible(wq);
|
||||
swake_up(wq);
|
||||
}
|
||||
}
|
||||
|
||||
static void vcpu_sleep(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu);
|
||||
struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu);
|
||||
|
||||
wait_event_interruptible(*wq, ((!vcpu->arch.power_off) &&
|
||||
swait_event_interruptible(*wq, ((!vcpu->arch.power_off) &&
|
||||
(!vcpu->arch.pause)));
|
||||
}
|
||||
|
||||
|
|
|
@ -70,7 +70,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
|
|||
{
|
||||
struct kvm *kvm = source_vcpu->kvm;
|
||||
struct kvm_vcpu *vcpu = NULL;
|
||||
wait_queue_head_t *wq;
|
||||
struct swait_queue_head *wq;
|
||||
unsigned long cpu_id;
|
||||
unsigned long context_id;
|
||||
phys_addr_t target_pc;
|
||||
|
@ -119,7 +119,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
|
|||
smp_mb(); /* Make sure the above is visible */
|
||||
|
||||
wq = kvm_arch_vcpu_wq(vcpu);
|
||||
wake_up_interruptible(wq);
|
||||
swake_up(wq);
|
||||
|
||||
return PSCI_RET_SUCCESS;
|
||||
}
|
||||
|
|
|
@ -445,8 +445,8 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
|
|||
|
||||
dvcpu->arch.wait = 0;
|
||||
|
||||
if (waitqueue_active(&dvcpu->wq))
|
||||
wake_up_interruptible(&dvcpu->wq);
|
||||
if (swait_active(&dvcpu->wq))
|
||||
swake_up(&dvcpu->wq);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -1174,8 +1174,8 @@ static void kvm_mips_comparecount_func(unsigned long data)
|
|||
kvm_mips_callbacks->queue_timer_int(vcpu);
|
||||
|
||||
vcpu->arch.wait = 0;
|
||||
if (waitqueue_active(&vcpu->wq))
|
||||
wake_up_interruptible(&vcpu->wq);
|
||||
if (swait_active(&vcpu->wq))
|
||||
swake_up(&vcpu->wq);
|
||||
}
|
||||
|
||||
/* low level hrtimer wake routine */
|
||||
|
|
|
@ -289,7 +289,7 @@ struct kvmppc_vcore {
|
|||
struct list_head runnable_threads;
|
||||
struct list_head preempt_list;
|
||||
spinlock_t lock;
|
||||
wait_queue_head_t wq;
|
||||
struct swait_queue_head wq;
|
||||
spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */
|
||||
u64 stolen_tb;
|
||||
u64 preempt_tb;
|
||||
|
@ -629,7 +629,7 @@ struct kvm_vcpu_arch {
|
|||
u8 prodded;
|
||||
u32 last_inst;
|
||||
|
||||
wait_queue_head_t *wqp;
|
||||
struct swait_queue_head *wqp;
|
||||
struct kvmppc_vcore *vcore;
|
||||
int ret;
|
||||
int trap;
|
||||
|
|
|
@ -114,11 +114,11 @@ static bool kvmppc_ipi_thread(int cpu)
|
|||
static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int cpu;
|
||||
wait_queue_head_t *wqp;
|
||||
struct swait_queue_head *wqp;
|
||||
|
||||
wqp = kvm_arch_vcpu_wq(vcpu);
|
||||
if (waitqueue_active(wqp)) {
|
||||
wake_up_interruptible(wqp);
|
||||
if (swait_active(wqp)) {
|
||||
swake_up(wqp);
|
||||
++vcpu->stat.halt_wakeup;
|
||||
}
|
||||
|
||||
|
@ -701,8 +701,8 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
|
|||
tvcpu->arch.prodded = 1;
|
||||
smp_mb();
|
||||
if (vcpu->arch.ceded) {
|
||||
if (waitqueue_active(&vcpu->wq)) {
|
||||
wake_up_interruptible(&vcpu->wq);
|
||||
if (swait_active(&vcpu->wq)) {
|
||||
swake_up(&vcpu->wq);
|
||||
vcpu->stat.halt_wakeup++;
|
||||
}
|
||||
}
|
||||
|
@ -1459,7 +1459,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
|
|||
INIT_LIST_HEAD(&vcore->runnable_threads);
|
||||
spin_lock_init(&vcore->lock);
|
||||
spin_lock_init(&vcore->stoltb_lock);
|
||||
init_waitqueue_head(&vcore->wq);
|
||||
init_swait_queue_head(&vcore->wq);
|
||||
vcore->preempt_tb = TB_NIL;
|
||||
vcore->lpcr = kvm->arch.lpcr;
|
||||
vcore->first_vcpuid = core * threads_per_subcore;
|
||||
|
@ -2531,10 +2531,9 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
|
|||
{
|
||||
struct kvm_vcpu *vcpu;
|
||||
int do_sleep = 1;
|
||||
DECLARE_SWAITQUEUE(wait);
|
||||
|
||||
DEFINE_WAIT(wait);
|
||||
|
||||
prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
|
||||
prepare_to_swait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
|
||||
|
||||
/*
|
||||
* Check one last time for pending exceptions and ceded state after
|
||||
|
@ -2548,7 +2547,7 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
|
|||
}
|
||||
|
||||
if (!do_sleep) {
|
||||
finish_wait(&vc->wq, &wait);
|
||||
finish_swait(&vc->wq, &wait);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -2556,7 +2555,7 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
|
|||
trace_kvmppc_vcore_blocked(vc, 0);
|
||||
spin_unlock(&vc->lock);
|
||||
schedule();
|
||||
finish_wait(&vc->wq, &wait);
|
||||
finish_swait(&vc->wq, &wait);
|
||||
spin_lock(&vc->lock);
|
||||
vc->vcore_state = VCORE_INACTIVE;
|
||||
trace_kvmppc_vcore_blocked(vc, 1);
|
||||
|
@ -2612,7 +2611,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
|
|||
kvmppc_start_thread(vcpu, vc);
|
||||
trace_kvm_guest_enter(vcpu);
|
||||
} else if (vc->vcore_state == VCORE_SLEEPING) {
|
||||
wake_up(&vc->wq);
|
||||
swake_up(&vc->wq);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -467,7 +467,7 @@ struct kvm_s390_irq_payload {
|
|||
struct kvm_s390_local_interrupt {
|
||||
spinlock_t lock;
|
||||
struct kvm_s390_float_interrupt *float_int;
|
||||
wait_queue_head_t *wq;
|
||||
struct swait_queue_head *wq;
|
||||
atomic_t *cpuflags;
|
||||
DECLARE_BITMAP(sigp_emerg_pending, KVM_MAX_VCPUS);
|
||||
struct kvm_s390_irq_payload irq;
|
||||
|
|
|
@ -966,13 +966,13 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
|
|||
|
||||
void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (waitqueue_active(&vcpu->wq)) {
|
||||
if (swait_active(&vcpu->wq)) {
|
||||
/*
|
||||
* The vcpu gave up the cpu voluntarily, mark it as a good
|
||||
* yield-candidate.
|
||||
*/
|
||||
vcpu->preempted = true;
|
||||
wake_up_interruptible(&vcpu->wq);
|
||||
swake_up(&vcpu->wq);
|
||||
vcpu->stat.halt_wakeup++;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1195,7 +1195,7 @@ static void apic_update_lvtt(struct kvm_lapic *apic)
|
|||
static void apic_timer_expired(struct kvm_lapic *apic)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = apic->vcpu;
|
||||
wait_queue_head_t *q = &vcpu->wq;
|
||||
struct swait_queue_head *q = &vcpu->wq;
|
||||
struct kvm_timer *ktimer = &apic->lapic_timer;
|
||||
|
||||
if (atomic_read(&apic->lapic_timer.pending))
|
||||
|
@ -1204,8 +1204,8 @@ static void apic_timer_expired(struct kvm_lapic *apic)
|
|||
atomic_inc(&apic->lapic_timer.pending);
|
||||
kvm_set_pending_timer(vcpu);
|
||||
|
||||
if (waitqueue_active(q))
|
||||
wake_up_interruptible(q);
|
||||
if (swait_active(q))
|
||||
swake_up(q);
|
||||
|
||||
if (apic_lvtt_tscdeadline(apic))
|
||||
ktimer->expired_tscdeadline = ktimer->tscdeadline;
|
||||
|
|
|
@ -713,6 +713,18 @@ static inline void __ftrace_enabled_restore(int enabled)
|
|||
#define CALLER_ADDR5 ((unsigned long)ftrace_return_address(5))
|
||||
#define CALLER_ADDR6 ((unsigned long)ftrace_return_address(6))
|
||||
|
||||
static inline unsigned long get_lock_parent_ip(void)
|
||||
{
|
||||
unsigned long addr = CALLER_ADDR0;
|
||||
|
||||
if (!in_lock_functions(addr))
|
||||
return addr;
|
||||
addr = CALLER_ADDR1;
|
||||
if (!in_lock_functions(addr))
|
||||
return addr;
|
||||
return CALLER_ADDR2;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_IRQSOFF_TRACER
|
||||
extern void time_hardirqs_on(unsigned long a0, unsigned long a1);
|
||||
extern void time_hardirqs_off(unsigned long a0, unsigned long a1);
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
#include <linux/irqflags.h>
|
||||
#include <linux/context_tracking.h>
|
||||
#include <linux/irqbypass.h>
|
||||
#include <linux/swait.h>
|
||||
#include <asm/signal.h>
|
||||
|
||||
#include <linux/kvm.h>
|
||||
|
@ -218,7 +219,7 @@ struct kvm_vcpu {
|
|||
int fpu_active;
|
||||
int guest_fpu_loaded, guest_xcr0_loaded;
|
||||
unsigned char fpu_counter;
|
||||
wait_queue_head_t wq;
|
||||
struct swait_queue_head wq;
|
||||
struct pid *pid;
|
||||
int sigset_active;
|
||||
sigset_t sigset;
|
||||
|
@ -782,7 +783,7 @@ static inline bool kvm_arch_has_assigned_device(struct kvm *kvm)
|
|||
}
|
||||
#endif
|
||||
|
||||
static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
|
||||
static inline struct swait_queue_head *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
#ifdef __KVM_HAVE_ARCH_WQP
|
||||
return vcpu->arch.wqp;
|
||||
|
|
|
@ -37,6 +37,9 @@ account_scheduler_latency(struct task_struct *task, int usecs, int inter)
|
|||
|
||||
void clear_all_latency_tracing(struct task_struct *p);
|
||||
|
||||
extern int sysctl_latencytop(struct ctl_table *table, int write,
|
||||
void __user *buffer, size_t *lenp, loff_t *ppos);
|
||||
|
||||
#else
|
||||
|
||||
static inline void
|
||||
|
|
|
@ -182,8 +182,6 @@ extern void update_cpu_load_nohz(int active);
|
|||
static inline void update_cpu_load_nohz(int active) { }
|
||||
#endif
|
||||
|
||||
extern unsigned long get_parent_ip(unsigned long addr);
|
||||
|
||||
extern void dump_cpu_task(int cpu);
|
||||
|
||||
struct seq_file;
|
||||
|
@ -920,6 +918,10 @@ static inline int sched_info_on(void)
|
|||
#endif
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SCHEDSTATS
|
||||
void force_schedstat_enabled(void);
|
||||
#endif
|
||||
|
||||
enum cpu_idle_type {
|
||||
CPU_IDLE,
|
||||
CPU_NOT_IDLE,
|
||||
|
@ -1289,6 +1291,8 @@ struct sched_rt_entity {
|
|||
unsigned long timeout;
|
||||
unsigned long watchdog_stamp;
|
||||
unsigned int time_slice;
|
||||
unsigned short on_rq;
|
||||
unsigned short on_list;
|
||||
|
||||
struct sched_rt_entity *back;
|
||||
#ifdef CONFIG_RT_GROUP_SCHED
|
||||
|
@ -1329,10 +1333,6 @@ struct sched_dl_entity {
|
|||
* task has to wait for a replenishment to be performed at the
|
||||
* next firing of dl_timer.
|
||||
*
|
||||
* @dl_new tells if a new instance arrived. If so we must
|
||||
* start executing it with full runtime and reset its absolute
|
||||
* deadline;
|
||||
*
|
||||
* @dl_boosted tells if we are boosted due to DI. If so we are
|
||||
* outside bandwidth enforcement mechanism (but only until we
|
||||
* exit the critical section);
|
||||
|
@ -1340,7 +1340,7 @@ struct sched_dl_entity {
|
|||
* @dl_yielded tells if task gave up the cpu before consuming
|
||||
* all its available runtime during the last job.
|
||||
*/
|
||||
int dl_throttled, dl_new, dl_boosted, dl_yielded;
|
||||
int dl_throttled, dl_boosted, dl_yielded;
|
||||
|
||||
/*
|
||||
* Bandwidth enforcement timer. Each -deadline task has its
|
||||
|
|
|
@ -95,4 +95,8 @@ extern int sysctl_numa_balancing(struct ctl_table *table, int write,
|
|||
void __user *buffer, size_t *lenp,
|
||||
loff_t *ppos);
|
||||
|
||||
extern int sysctl_schedstats(struct ctl_table *table, int write,
|
||||
void __user *buffer, size_t *lenp,
|
||||
loff_t *ppos);
|
||||
|
||||
#endif /* _SCHED_SYSCTL_H */
|
||||
|
|
172
include/linux/swait.h
Normal file
172
include/linux/swait.h
Normal file
|
@ -0,0 +1,172 @@
|
|||
#ifndef _LINUX_SWAIT_H
|
||||
#define _LINUX_SWAIT_H
|
||||
|
||||
#include <linux/list.h>
|
||||
#include <linux/stddef.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <asm/current.h>
|
||||
|
||||
/*
|
||||
* Simple wait queues
|
||||
*
|
||||
* While these are very similar to the other/complex wait queues (wait.h) the
|
||||
* most important difference is that the simple waitqueue allows for
|
||||
* deterministic behaviour -- IOW it has strictly bounded IRQ and lock hold
|
||||
* times.
|
||||
*
|
||||
* In order to make this so, we had to drop a fair number of features of the
|
||||
* other waitqueue code; notably:
|
||||
*
|
||||
* - mixing INTERRUPTIBLE and UNINTERRUPTIBLE sleeps on the same waitqueue;
|
||||
* all wakeups are TASK_NORMAL in order to avoid O(n) lookups for the right
|
||||
* sleeper state.
|
||||
*
|
||||
* - the exclusive mode; because this requires preserving the list order
|
||||
* and this is hard.
|
||||
*
|
||||
* - custom wake functions; because you cannot give any guarantees about
|
||||
* random code.
|
||||
*
|
||||
* As a side effect of this; the data structures are slimmer.
|
||||
*
|
||||
* One would recommend using this wait queue where possible.
|
||||
*/
|
||||
|
||||
struct task_struct;
|
||||
|
||||
struct swait_queue_head {
|
||||
raw_spinlock_t lock;
|
||||
struct list_head task_list;
|
||||
};
|
||||
|
||||
struct swait_queue {
|
||||
struct task_struct *task;
|
||||
struct list_head task_list;
|
||||
};
|
||||
|
||||
#define __SWAITQUEUE_INITIALIZER(name) { \
|
||||
.task = current, \
|
||||
.task_list = LIST_HEAD_INIT((name).task_list), \
|
||||
}
|
||||
|
||||
#define DECLARE_SWAITQUEUE(name) \
|
||||
struct swait_queue name = __SWAITQUEUE_INITIALIZER(name)
|
||||
|
||||
#define __SWAIT_QUEUE_HEAD_INITIALIZER(name) { \
|
||||
.lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \
|
||||
.task_list = LIST_HEAD_INIT((name).task_list), \
|
||||
}
|
||||
|
||||
#define DECLARE_SWAIT_QUEUE_HEAD(name) \
|
||||
struct swait_queue_head name = __SWAIT_QUEUE_HEAD_INITIALIZER(name)
|
||||
|
||||
extern void __init_swait_queue_head(struct swait_queue_head *q, const char *name,
|
||||
struct lock_class_key *key);
|
||||
|
||||
#define init_swait_queue_head(q) \
|
||||
do { \
|
||||
static struct lock_class_key __key; \
|
||||
__init_swait_queue_head((q), #q, &__key); \
|
||||
} while (0)
|
||||
|
||||
#ifdef CONFIG_LOCKDEP
|
||||
# define __SWAIT_QUEUE_HEAD_INIT_ONSTACK(name) \
|
||||
({ init_swait_queue_head(&name); name; })
|
||||
# define DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(name) \
|
||||
struct swait_queue_head name = __SWAIT_QUEUE_HEAD_INIT_ONSTACK(name)
|
||||
#else
|
||||
# define DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(name) \
|
||||
DECLARE_SWAIT_QUEUE_HEAD(name)
|
||||
#endif
|
||||
|
||||
static inline int swait_active(struct swait_queue_head *q)
|
||||
{
|
||||
return !list_empty(&q->task_list);
|
||||
}
|
||||
|
||||
extern void swake_up(struct swait_queue_head *q);
|
||||
extern void swake_up_all(struct swait_queue_head *q);
|
||||
extern void swake_up_locked(struct swait_queue_head *q);
|
||||
|
||||
extern void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait);
|
||||
extern void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int state);
|
||||
extern long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state);
|
||||
|
||||
extern void __finish_swait(struct swait_queue_head *q, struct swait_queue *wait);
|
||||
extern void finish_swait(struct swait_queue_head *q, struct swait_queue *wait);
|
||||
|
||||
/* as per ___wait_event() but for swait, therefore "exclusive == 0" */
|
||||
#define ___swait_event(wq, condition, state, ret, cmd) \
|
||||
({ \
|
||||
struct swait_queue __wait; \
|
||||
long __ret = ret; \
|
||||
\
|
||||
INIT_LIST_HEAD(&__wait.task_list); \
|
||||
for (;;) { \
|
||||
long __int = prepare_to_swait_event(&wq, &__wait, state);\
|
||||
\
|
||||
if (condition) \
|
||||
break; \
|
||||
\
|
||||
if (___wait_is_interruptible(state) && __int) { \
|
||||
__ret = __int; \
|
||||
break; \
|
||||
} \
|
||||
\
|
||||
cmd; \
|
||||
} \
|
||||
finish_swait(&wq, &__wait); \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
#define __swait_event(wq, condition) \
|
||||
(void)___swait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, \
|
||||
schedule())
|
||||
|
||||
#define swait_event(wq, condition) \
|
||||
do { \
|
||||
if (condition) \
|
||||
break; \
|
||||
__swait_event(wq, condition); \
|
||||
} while (0)
|
||||
|
||||
#define __swait_event_timeout(wq, condition, timeout) \
|
||||
___swait_event(wq, ___wait_cond_timeout(condition), \
|
||||
TASK_UNINTERRUPTIBLE, timeout, \
|
||||
__ret = schedule_timeout(__ret))
|
||||
|
||||
#define swait_event_timeout(wq, condition, timeout) \
|
||||
({ \
|
||||
long __ret = timeout; \
|
||||
if (!___wait_cond_timeout(condition)) \
|
||||
__ret = __swait_event_timeout(wq, condition, timeout); \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
#define __swait_event_interruptible(wq, condition) \
|
||||
___swait_event(wq, condition, TASK_INTERRUPTIBLE, 0, \
|
||||
schedule())
|
||||
|
||||
#define swait_event_interruptible(wq, condition) \
|
||||
({ \
|
||||
int __ret = 0; \
|
||||
if (!(condition)) \
|
||||
__ret = __swait_event_interruptible(wq, condition); \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
#define __swait_event_interruptible_timeout(wq, condition, timeout) \
|
||||
___swait_event(wq, ___wait_cond_timeout(condition), \
|
||||
TASK_INTERRUPTIBLE, timeout, \
|
||||
__ret = schedule_timeout(__ret))
|
||||
|
||||
#define swait_event_interruptible_timeout(wq, condition, timeout) \
|
||||
({ \
|
||||
long __ret = timeout; \
|
||||
if (!___wait_cond_timeout(condition)) \
|
||||
__ret = __swait_event_interruptible_timeout(wq, \
|
||||
condition, timeout); \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
#endif /* _LINUX_SWAIT_H */
|
|
@ -338,7 +338,7 @@ do { \
|
|||
schedule(); try_to_freeze())
|
||||
|
||||
/**
|
||||
* wait_event - sleep (or freeze) until a condition gets true
|
||||
* wait_event_freezable - sleep (or freeze) until a condition gets true
|
||||
* @wq: the waitqueue to wait on
|
||||
* @condition: a C expression for the event to wait for
|
||||
*
|
||||
|
|
|
@ -47,12 +47,12 @@
|
|||
* of times)
|
||||
*/
|
||||
|
||||
#include <linux/latencytop.h>
|
||||
#include <linux/kallsyms.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/proc_fs.h>
|
||||
#include <linux/latencytop.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/list.h>
|
||||
|
@ -289,4 +289,16 @@ static int __init init_lstats_procfs(void)
|
|||
proc_create("latency_stats", 0644, NULL, &lstats_fops);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int sysctl_latencytop(struct ctl_table *table, int write,
|
||||
void __user *buffer, size_t *lenp, loff_t *ppos)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = proc_dointvec(table, write, buffer, lenp, ppos);
|
||||
if (latencytop_enabled)
|
||||
force_schedstat_enabled();
|
||||
|
||||
return err;
|
||||
}
|
||||
device_initcall(init_lstats_procfs);
|
||||
|
|
|
@ -59,6 +59,7 @@ int profile_setup(char *str)
|
|||
|
||||
if (!strncmp(str, sleepstr, strlen(sleepstr))) {
|
||||
#ifdef CONFIG_SCHEDSTATS
|
||||
force_schedstat_enabled();
|
||||
prof_on = SLEEP_PROFILING;
|
||||
if (str[strlen(sleepstr)] == ',')
|
||||
str += strlen(sleepstr) + 1;
|
||||
|
|
|
@ -1614,7 +1614,6 @@ static int rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
|
|||
int needmore;
|
||||
struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
|
||||
|
||||
rcu_nocb_gp_cleanup(rsp, rnp);
|
||||
rnp->need_future_gp[c & 0x1] = 0;
|
||||
needmore = rnp->need_future_gp[(c + 1) & 0x1];
|
||||
trace_rcu_future_gp(rnp, rdp, c,
|
||||
|
@ -1635,7 +1634,7 @@ static void rcu_gp_kthread_wake(struct rcu_state *rsp)
|
|||
!READ_ONCE(rsp->gp_flags) ||
|
||||
!rsp->gp_kthread)
|
||||
return;
|
||||
wake_up(&rsp->gp_wq);
|
||||
swake_up(&rsp->gp_wq);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -2010,6 +2009,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
|
|||
int nocb = 0;
|
||||
struct rcu_data *rdp;
|
||||
struct rcu_node *rnp = rcu_get_root(rsp);
|
||||
struct swait_queue_head *sq;
|
||||
|
||||
WRITE_ONCE(rsp->gp_activity, jiffies);
|
||||
raw_spin_lock_irq_rcu_node(rnp);
|
||||
|
@ -2046,7 +2046,9 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
|
|||
needgp = __note_gp_changes(rsp, rnp, rdp) || needgp;
|
||||
/* smp_mb() provided by prior unlock-lock pair. */
|
||||
nocb += rcu_future_gp_cleanup(rsp, rnp);
|
||||
sq = rcu_nocb_gp_get(rnp);
|
||||
raw_spin_unlock_irq(&rnp->lock);
|
||||
rcu_nocb_gp_cleanup(sq);
|
||||
cond_resched_rcu_qs();
|
||||
WRITE_ONCE(rsp->gp_activity, jiffies);
|
||||
rcu_gp_slow(rsp, gp_cleanup_delay);
|
||||
|
@ -2092,7 +2094,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
|
|||
READ_ONCE(rsp->gpnum),
|
||||
TPS("reqwait"));
|
||||
rsp->gp_state = RCU_GP_WAIT_GPS;
|
||||
wait_event_interruptible(rsp->gp_wq,
|
||||
swait_event_interruptible(rsp->gp_wq,
|
||||
READ_ONCE(rsp->gp_flags) &
|
||||
RCU_GP_FLAG_INIT);
|
||||
rsp->gp_state = RCU_GP_DONE_GPS;
|
||||
|
@ -2122,7 +2124,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
|
|||
READ_ONCE(rsp->gpnum),
|
||||
TPS("fqswait"));
|
||||
rsp->gp_state = RCU_GP_WAIT_FQS;
|
||||
ret = wait_event_interruptible_timeout(rsp->gp_wq,
|
||||
ret = swait_event_interruptible_timeout(rsp->gp_wq,
|
||||
rcu_gp_fqs_check_wake(rsp, &gf), j);
|
||||
rsp->gp_state = RCU_GP_DOING_FQS;
|
||||
/* Locking provides needed memory barriers. */
|
||||
|
@ -2246,7 +2248,7 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
|
|||
WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
|
||||
WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS);
|
||||
raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags);
|
||||
rcu_gp_kthread_wake(rsp);
|
||||
swake_up(&rsp->gp_wq); /* Memory barrier implied by swake_up() path. */
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -2900,7 +2902,7 @@ static void force_quiescent_state(struct rcu_state *rsp)
|
|||
}
|
||||
WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS);
|
||||
raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
|
||||
rcu_gp_kthread_wake(rsp);
|
||||
swake_up(&rsp->gp_wq); /* Memory barrier implied by swake_up() path. */
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -3529,7 +3531,7 @@ static void __rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
|
|||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||
if (wake) {
|
||||
smp_mb(); /* EGP done before wake_up(). */
|
||||
wake_up(&rsp->expedited_wq);
|
||||
swake_up(&rsp->expedited_wq);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -3780,7 +3782,7 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
|
|||
jiffies_start = jiffies;
|
||||
|
||||
for (;;) {
|
||||
ret = wait_event_interruptible_timeout(
|
||||
ret = swait_event_timeout(
|
||||
rsp->expedited_wq,
|
||||
sync_rcu_preempt_exp_done(rnp_root),
|
||||
jiffies_stall);
|
||||
|
@ -3788,7 +3790,7 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
|
|||
return;
|
||||
if (ret < 0) {
|
||||
/* Hit a signal, disable CPU stall warnings. */
|
||||
wait_event(rsp->expedited_wq,
|
||||
swait_event(rsp->expedited_wq,
|
||||
sync_rcu_preempt_exp_done(rnp_root));
|
||||
return;
|
||||
}
|
||||
|
@ -4482,8 +4484,8 @@ static void __init rcu_init_one(struct rcu_state *rsp)
|
|||
}
|
||||
}
|
||||
|
||||
init_waitqueue_head(&rsp->gp_wq);
|
||||
init_waitqueue_head(&rsp->expedited_wq);
|
||||
init_swait_queue_head(&rsp->gp_wq);
|
||||
init_swait_queue_head(&rsp->expedited_wq);
|
||||
rnp = rsp->level[rcu_num_lvls - 1];
|
||||
for_each_possible_cpu(i) {
|
||||
while (i > rnp->grphi)
|
||||
|
|
|
@ -27,6 +27,7 @@
|
|||
#include <linux/threads.h>
|
||||
#include <linux/cpumask.h>
|
||||
#include <linux/seqlock.h>
|
||||
#include <linux/swait.h>
|
||||
#include <linux/stop_machine.h>
|
||||
|
||||
/*
|
||||
|
@ -243,7 +244,7 @@ struct rcu_node {
|
|||
/* Refused to boost: not sure why, though. */
|
||||
/* This can happen due to race conditions. */
|
||||
#ifdef CONFIG_RCU_NOCB_CPU
|
||||
wait_queue_head_t nocb_gp_wq[2];
|
||||
struct swait_queue_head nocb_gp_wq[2];
|
||||
/* Place for rcu_nocb_kthread() to wait GP. */
|
||||
#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
|
||||
int need_future_gp[2];
|
||||
|
@ -399,7 +400,7 @@ struct rcu_data {
|
|||
atomic_long_t nocb_q_count_lazy; /* invocation (all stages). */
|
||||
struct rcu_head *nocb_follower_head; /* CBs ready to invoke. */
|
||||
struct rcu_head **nocb_follower_tail;
|
||||
wait_queue_head_t nocb_wq; /* For nocb kthreads to sleep on. */
|
||||
struct swait_queue_head nocb_wq; /* For nocb kthreads to sleep on. */
|
||||
struct task_struct *nocb_kthread;
|
||||
int nocb_defer_wakeup; /* Defer wakeup of nocb_kthread. */
|
||||
|
||||
|
@ -478,7 +479,7 @@ struct rcu_state {
|
|||
unsigned long gpnum; /* Current gp number. */
|
||||
unsigned long completed; /* # of last completed gp. */
|
||||
struct task_struct *gp_kthread; /* Task for grace periods. */
|
||||
wait_queue_head_t gp_wq; /* Where GP task waits. */
|
||||
struct swait_queue_head gp_wq; /* Where GP task waits. */
|
||||
short gp_flags; /* Commands for GP task. */
|
||||
short gp_state; /* GP kthread sleep state. */
|
||||
|
||||
|
@ -506,7 +507,7 @@ struct rcu_state {
|
|||
unsigned long expedited_sequence; /* Take a ticket. */
|
||||
atomic_long_t expedited_normal; /* # fallbacks to normal. */
|
||||
atomic_t expedited_need_qs; /* # CPUs left to check in. */
|
||||
wait_queue_head_t expedited_wq; /* Wait for check-ins. */
|
||||
struct swait_queue_head expedited_wq; /* Wait for check-ins. */
|
||||
int ncpus_snap; /* # CPUs seen last time. */
|
||||
|
||||
unsigned long jiffies_force_qs; /* Time at which to invoke */
|
||||
|
@ -621,7 +622,8 @@ static void zero_cpu_stall_ticks(struct rcu_data *rdp);
|
|||
static void increment_cpu_stall_ticks(void);
|
||||
static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu);
|
||||
static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq);
|
||||
static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp);
|
||||
static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp);
|
||||
static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq);
|
||||
static void rcu_init_one_nocb(struct rcu_node *rnp);
|
||||
static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
|
||||
bool lazy, unsigned long flags);
|
||||
|
|
|
@ -1811,9 +1811,9 @@ early_param("rcu_nocb_poll", parse_rcu_nocb_poll);
|
|||
* Wake up any no-CBs CPUs' kthreads that were waiting on the just-ended
|
||||
* grace period.
|
||||
*/
|
||||
static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
|
||||
static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq)
|
||||
{
|
||||
wake_up_all(&rnp->nocb_gp_wq[rnp->completed & 0x1]);
|
||||
swake_up_all(sq);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1829,10 +1829,15 @@ static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq)
|
|||
rnp->need_future_gp[(rnp->completed + 1) & 0x1] += nrq;
|
||||
}
|
||||
|
||||
static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp)
|
||||
{
|
||||
return &rnp->nocb_gp_wq[rnp->completed & 0x1];
|
||||
}
|
||||
|
||||
static void rcu_init_one_nocb(struct rcu_node *rnp)
|
||||
{
|
||||
init_waitqueue_head(&rnp->nocb_gp_wq[0]);
|
||||
init_waitqueue_head(&rnp->nocb_gp_wq[1]);
|
||||
init_swait_queue_head(&rnp->nocb_gp_wq[0]);
|
||||
init_swait_queue_head(&rnp->nocb_gp_wq[1]);
|
||||
}
|
||||
|
||||
#ifndef CONFIG_RCU_NOCB_CPU_ALL
|
||||
|
@ -1857,7 +1862,7 @@ static void wake_nocb_leader(struct rcu_data *rdp, bool force)
|
|||
if (READ_ONCE(rdp_leader->nocb_leader_sleep) || force) {
|
||||
/* Prior smp_mb__after_atomic() orders against prior enqueue. */
|
||||
WRITE_ONCE(rdp_leader->nocb_leader_sleep, false);
|
||||
wake_up(&rdp_leader->nocb_wq);
|
||||
swake_up(&rdp_leader->nocb_wq);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2069,7 +2074,7 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
|
|||
*/
|
||||
trace_rcu_future_gp(rnp, rdp, c, TPS("StartWait"));
|
||||
for (;;) {
|
||||
wait_event_interruptible(
|
||||
swait_event_interruptible(
|
||||
rnp->nocb_gp_wq[c & 0x1],
|
||||
(d = ULONG_CMP_GE(READ_ONCE(rnp->completed), c)));
|
||||
if (likely(d))
|
||||
|
@ -2097,7 +2102,7 @@ static void nocb_leader_wait(struct rcu_data *my_rdp)
|
|||
/* Wait for callbacks to appear. */
|
||||
if (!rcu_nocb_poll) {
|
||||
trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, "Sleep");
|
||||
wait_event_interruptible(my_rdp->nocb_wq,
|
||||
swait_event_interruptible(my_rdp->nocb_wq,
|
||||
!READ_ONCE(my_rdp->nocb_leader_sleep));
|
||||
/* Memory barrier handled by smp_mb() calls below and repoll. */
|
||||
} else if (firsttime) {
|
||||
|
@ -2172,7 +2177,7 @@ static void nocb_leader_wait(struct rcu_data *my_rdp)
|
|||
* List was empty, wake up the follower.
|
||||
* Memory barriers supplied by atomic_long_add().
|
||||
*/
|
||||
wake_up(&rdp->nocb_wq);
|
||||
swake_up(&rdp->nocb_wq);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2193,7 +2198,7 @@ static void nocb_follower_wait(struct rcu_data *rdp)
|
|||
if (!rcu_nocb_poll) {
|
||||
trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
|
||||
"FollowerSleep");
|
||||
wait_event_interruptible(rdp->nocb_wq,
|
||||
swait_event_interruptible(rdp->nocb_wq,
|
||||
READ_ONCE(rdp->nocb_follower_head));
|
||||
} else if (firsttime) {
|
||||
/* Don't drown trace log with "Poll"! */
|
||||
|
@ -2352,7 +2357,7 @@ void __init rcu_init_nohz(void)
|
|||
static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
|
||||
{
|
||||
rdp->nocb_tail = &rdp->nocb_head;
|
||||
init_waitqueue_head(&rdp->nocb_wq);
|
||||
init_swait_queue_head(&rdp->nocb_wq);
|
||||
rdp->nocb_follower_tail = &rdp->nocb_follower_head;
|
||||
}
|
||||
|
||||
|
@ -2502,7 +2507,7 @@ static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu)
|
|||
return false;
|
||||
}
|
||||
|
||||
static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
|
||||
static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -2510,6 +2515,11 @@ static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq)
|
|||
{
|
||||
}
|
||||
|
||||
static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void rcu_init_one_nocb(struct rcu_node *rnp)
|
||||
{
|
||||
}
|
||||
|
|
|
@ -13,7 +13,7 @@ endif
|
|||
|
||||
obj-y += core.o loadavg.o clock.o cputime.o
|
||||
obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o
|
||||
obj-y += wait.o completion.o idle.o
|
||||
obj-y += wait.o swait.o completion.o idle.o
|
||||
obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o
|
||||
obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o
|
||||
obj-$(CONFIG_SCHEDSTATS) += stats.o
|
||||
|
|
|
@ -67,12 +67,10 @@
|
|||
#include <linux/pagemap.h>
|
||||
#include <linux/hrtimer.h>
|
||||
#include <linux/tick.h>
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/ftrace.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/init_task.h>
|
||||
#include <linux/binfmts.h>
|
||||
#include <linux/context_tracking.h>
|
||||
#include <linux/compiler.h>
|
||||
|
||||
|
@ -125,138 +123,6 @@ const_debug unsigned int sysctl_sched_features =
|
|||
|
||||
#undef SCHED_FEAT
|
||||
|
||||
#ifdef CONFIG_SCHED_DEBUG
|
||||
#define SCHED_FEAT(name, enabled) \
|
||||
#name ,
|
||||
|
||||
static const char * const sched_feat_names[] = {
|
||||
#include "features.h"
|
||||
};
|
||||
|
||||
#undef SCHED_FEAT
|
||||
|
||||
static int sched_feat_show(struct seq_file *m, void *v)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < __SCHED_FEAT_NR; i++) {
|
||||
if (!(sysctl_sched_features & (1UL << i)))
|
||||
seq_puts(m, "NO_");
|
||||
seq_printf(m, "%s ", sched_feat_names[i]);
|
||||
}
|
||||
seq_puts(m, "\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef HAVE_JUMP_LABEL
|
||||
|
||||
#define jump_label_key__true STATIC_KEY_INIT_TRUE
|
||||
#define jump_label_key__false STATIC_KEY_INIT_FALSE
|
||||
|
||||
#define SCHED_FEAT(name, enabled) \
|
||||
jump_label_key__##enabled ,
|
||||
|
||||
struct static_key sched_feat_keys[__SCHED_FEAT_NR] = {
|
||||
#include "features.h"
|
||||
};
|
||||
|
||||
#undef SCHED_FEAT
|
||||
|
||||
static void sched_feat_disable(int i)
|
||||
{
|
||||
static_key_disable(&sched_feat_keys[i]);
|
||||
}
|
||||
|
||||
static void sched_feat_enable(int i)
|
||||
{
|
||||
static_key_enable(&sched_feat_keys[i]);
|
||||
}
|
||||
#else
|
||||
static void sched_feat_disable(int i) { };
|
||||
static void sched_feat_enable(int i) { };
|
||||
#endif /* HAVE_JUMP_LABEL */
|
||||
|
||||
static int sched_feat_set(char *cmp)
|
||||
{
|
||||
int i;
|
||||
int neg = 0;
|
||||
|
||||
if (strncmp(cmp, "NO_", 3) == 0) {
|
||||
neg = 1;
|
||||
cmp += 3;
|
||||
}
|
||||
|
||||
for (i = 0; i < __SCHED_FEAT_NR; i++) {
|
||||
if (strcmp(cmp, sched_feat_names[i]) == 0) {
|
||||
if (neg) {
|
||||
sysctl_sched_features &= ~(1UL << i);
|
||||
sched_feat_disable(i);
|
||||
} else {
|
||||
sysctl_sched_features |= (1UL << i);
|
||||
sched_feat_enable(i);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
sched_feat_write(struct file *filp, const char __user *ubuf,
|
||||
size_t cnt, loff_t *ppos)
|
||||
{
|
||||
char buf[64];
|
||||
char *cmp;
|
||||
int i;
|
||||
struct inode *inode;
|
||||
|
||||
if (cnt > 63)
|
||||
cnt = 63;
|
||||
|
||||
if (copy_from_user(&buf, ubuf, cnt))
|
||||
return -EFAULT;
|
||||
|
||||
buf[cnt] = 0;
|
||||
cmp = strstrip(buf);
|
||||
|
||||
/* Ensure the static_key remains in a consistent state */
|
||||
inode = file_inode(filp);
|
||||
inode_lock(inode);
|
||||
i = sched_feat_set(cmp);
|
||||
inode_unlock(inode);
|
||||
if (i == __SCHED_FEAT_NR)
|
||||
return -EINVAL;
|
||||
|
||||
*ppos += cnt;
|
||||
|
||||
return cnt;
|
||||
}
|
||||
|
||||
static int sched_feat_open(struct inode *inode, struct file *filp)
|
||||
{
|
||||
return single_open(filp, sched_feat_show, NULL);
|
||||
}
|
||||
|
||||
static const struct file_operations sched_feat_fops = {
|
||||
.open = sched_feat_open,
|
||||
.write = sched_feat_write,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
};
|
||||
|
||||
static __init int sched_init_debug(void)
|
||||
{
|
||||
debugfs_create_file("sched_features", 0644, NULL, NULL,
|
||||
&sched_feat_fops);
|
||||
|
||||
return 0;
|
||||
}
|
||||
late_initcall(sched_init_debug);
|
||||
#endif /* CONFIG_SCHED_DEBUG */
|
||||
|
||||
/*
|
||||
* Number of tasks to iterate in a single balance run.
|
||||
* Limited because this is done with IRQs disabled.
|
||||
|
@ -2094,7 +1960,8 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
|
|||
|
||||
ttwu_queue(p, cpu);
|
||||
stat:
|
||||
ttwu_stat(p, cpu, wake_flags);
|
||||
if (schedstat_enabled())
|
||||
ttwu_stat(p, cpu, wake_flags);
|
||||
out:
|
||||
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
|
||||
|
||||
|
@ -2142,7 +2009,8 @@ static void try_to_wake_up_local(struct task_struct *p)
|
|||
ttwu_activate(rq, p, ENQUEUE_WAKEUP);
|
||||
|
||||
ttwu_do_wakeup(rq, p, 0);
|
||||
ttwu_stat(p, smp_processor_id(), 0);
|
||||
if (schedstat_enabled())
|
||||
ttwu_stat(p, smp_processor_id(), 0);
|
||||
out:
|
||||
raw_spin_unlock(&p->pi_lock);
|
||||
}
|
||||
|
@ -2184,7 +2052,6 @@ void __dl_clear_params(struct task_struct *p)
|
|||
dl_se->dl_bw = 0;
|
||||
|
||||
dl_se->dl_throttled = 0;
|
||||
dl_se->dl_new = 1;
|
||||
dl_se->dl_yielded = 0;
|
||||
}
|
||||
|
||||
|
@ -2211,6 +2078,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
|
|||
#endif
|
||||
|
||||
#ifdef CONFIG_SCHEDSTATS
|
||||
/* Even if schedstat is disabled, there should not be garbage */
|
||||
memset(&p->se.statistics, 0, sizeof(p->se.statistics));
|
||||
#endif
|
||||
|
||||
|
@ -2219,6 +2087,10 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
|
|||
__dl_clear_params(p);
|
||||
|
||||
INIT_LIST_HEAD(&p->rt.run_list);
|
||||
p->rt.timeout = 0;
|
||||
p->rt.time_slice = sched_rr_timeslice;
|
||||
p->rt.on_rq = 0;
|
||||
p->rt.on_list = 0;
|
||||
|
||||
#ifdef CONFIG_PREEMPT_NOTIFIERS
|
||||
INIT_HLIST_HEAD(&p->preempt_notifiers);
|
||||
|
@ -2282,6 +2154,69 @@ int sysctl_numa_balancing(struct ctl_table *table, int write,
|
|||
#endif
|
||||
#endif
|
||||
|
||||
DEFINE_STATIC_KEY_FALSE(sched_schedstats);
|
||||
|
||||
#ifdef CONFIG_SCHEDSTATS
|
||||
static void set_schedstats(bool enabled)
|
||||
{
|
||||
if (enabled)
|
||||
static_branch_enable(&sched_schedstats);
|
||||
else
|
||||
static_branch_disable(&sched_schedstats);
|
||||
}
|
||||
|
||||
void force_schedstat_enabled(void)
|
||||
{
|
||||
if (!schedstat_enabled()) {
|
||||
pr_info("kernel profiling enabled schedstats, disable via kernel.sched_schedstats.\n");
|
||||
static_branch_enable(&sched_schedstats);
|
||||
}
|
||||
}
|
||||
|
||||
static int __init setup_schedstats(char *str)
|
||||
{
|
||||
int ret = 0;
|
||||
if (!str)
|
||||
goto out;
|
||||
|
||||
if (!strcmp(str, "enable")) {
|
||||
set_schedstats(true);
|
||||
ret = 1;
|
||||
} else if (!strcmp(str, "disable")) {
|
||||
set_schedstats(false);
|
||||
ret = 1;
|
||||
}
|
||||
out:
|
||||
if (!ret)
|
||||
pr_warn("Unable to parse schedstats=\n");
|
||||
|
||||
return ret;
|
||||
}
|
||||
__setup("schedstats=", setup_schedstats);
|
||||
|
||||
#ifdef CONFIG_PROC_SYSCTL
|
||||
int sysctl_schedstats(struct ctl_table *table, int write,
|
||||
void __user *buffer, size_t *lenp, loff_t *ppos)
|
||||
{
|
||||
struct ctl_table t;
|
||||
int err;
|
||||
int state = static_branch_likely(&sched_schedstats);
|
||||
|
||||
if (write && !capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
t = *table;
|
||||
t.data = &state;
|
||||
err = proc_dointvec_minmax(&t, write, buffer, lenp, ppos);
|
||||
if (err < 0)
|
||||
return err;
|
||||
if (write)
|
||||
set_schedstats(state);
|
||||
return err;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/*
|
||||
* fork()/clone()-time setup:
|
||||
*/
|
||||
|
@ -3011,16 +2946,6 @@ u64 scheduler_tick_max_deferment(void)
|
|||
}
|
||||
#endif
|
||||
|
||||
notrace unsigned long get_parent_ip(unsigned long addr)
|
||||
{
|
||||
if (in_lock_functions(addr)) {
|
||||
addr = CALLER_ADDR2;
|
||||
if (in_lock_functions(addr))
|
||||
addr = CALLER_ADDR3;
|
||||
}
|
||||
return addr;
|
||||
}
|
||||
|
||||
#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
|
||||
defined(CONFIG_PREEMPT_TRACER))
|
||||
|
||||
|
@ -3042,7 +2967,7 @@ void preempt_count_add(int val)
|
|||
PREEMPT_MASK - 10);
|
||||
#endif
|
||||
if (preempt_count() == val) {
|
||||
unsigned long ip = get_parent_ip(CALLER_ADDR1);
|
||||
unsigned long ip = get_lock_parent_ip();
|
||||
#ifdef CONFIG_DEBUG_PREEMPT
|
||||
current->preempt_disable_ip = ip;
|
||||
#endif
|
||||
|
@ -3069,7 +2994,7 @@ void preempt_count_sub(int val)
|
|||
#endif
|
||||
|
||||
if (preempt_count() == val)
|
||||
trace_preempt_on(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
|
||||
trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip());
|
||||
__preempt_count_sub(val);
|
||||
}
|
||||
EXPORT_SYMBOL(preempt_count_sub);
|
||||
|
@ -3281,7 +3206,6 @@ static void __sched notrace __schedule(bool preempt)
|
|||
|
||||
trace_sched_switch(preempt, prev, next);
|
||||
rq = context_switch(rq, prev, next); /* unlocks the rq */
|
||||
cpu = cpu_of(rq);
|
||||
} else {
|
||||
lockdep_unpin_lock(&rq->lock);
|
||||
raw_spin_unlock_irq(&rq->lock);
|
||||
|
@ -3467,7 +3391,7 @@ EXPORT_SYMBOL(default_wake_function);
|
|||
*/
|
||||
void rt_mutex_setprio(struct task_struct *p, int prio)
|
||||
{
|
||||
int oldprio, queued, running, enqueue_flag = ENQUEUE_RESTORE;
|
||||
int oldprio, queued, running, queue_flag = DEQUEUE_SAVE | DEQUEUE_MOVE;
|
||||
struct rq *rq;
|
||||
const struct sched_class *prev_class;
|
||||
|
||||
|
@ -3495,11 +3419,15 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
|
|||
|
||||
trace_sched_pi_setprio(p, prio);
|
||||
oldprio = p->prio;
|
||||
|
||||
if (oldprio == prio)
|
||||
queue_flag &= ~DEQUEUE_MOVE;
|
||||
|
||||
prev_class = p->sched_class;
|
||||
queued = task_on_rq_queued(p);
|
||||
running = task_current(rq, p);
|
||||
if (queued)
|
||||
dequeue_task(rq, p, DEQUEUE_SAVE);
|
||||
dequeue_task(rq, p, queue_flag);
|
||||
if (running)
|
||||
put_prev_task(rq, p);
|
||||
|
||||
|
@ -3517,7 +3445,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
|
|||
if (!dl_prio(p->normal_prio) ||
|
||||
(pi_task && dl_entity_preempt(&pi_task->dl, &p->dl))) {
|
||||
p->dl.dl_boosted = 1;
|
||||
enqueue_flag |= ENQUEUE_REPLENISH;
|
||||
queue_flag |= ENQUEUE_REPLENISH;
|
||||
} else
|
||||
p->dl.dl_boosted = 0;
|
||||
p->sched_class = &dl_sched_class;
|
||||
|
@ -3525,7 +3453,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
|
|||
if (dl_prio(oldprio))
|
||||
p->dl.dl_boosted = 0;
|
||||
if (oldprio < prio)
|
||||
enqueue_flag |= ENQUEUE_HEAD;
|
||||
queue_flag |= ENQUEUE_HEAD;
|
||||
p->sched_class = &rt_sched_class;
|
||||
} else {
|
||||
if (dl_prio(oldprio))
|
||||
|
@ -3540,7 +3468,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
|
|||
if (running)
|
||||
p->sched_class->set_curr_task(rq);
|
||||
if (queued)
|
||||
enqueue_task(rq, p, enqueue_flag);
|
||||
enqueue_task(rq, p, queue_flag);
|
||||
|
||||
check_class_changed(rq, p, prev_class, oldprio);
|
||||
out_unlock:
|
||||
|
@ -3896,6 +3824,7 @@ static int __sched_setscheduler(struct task_struct *p,
|
|||
const struct sched_class *prev_class;
|
||||
struct rq *rq;
|
||||
int reset_on_fork;
|
||||
int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE;
|
||||
|
||||
/* may grab non-irq protected spin_locks */
|
||||
BUG_ON(in_interrupt());
|
||||
|
@ -4078,17 +4007,14 @@ static int __sched_setscheduler(struct task_struct *p,
|
|||
* itself.
|
||||
*/
|
||||
new_effective_prio = rt_mutex_get_effective_prio(p, newprio);
|
||||
if (new_effective_prio == oldprio) {
|
||||
__setscheduler_params(p, attr);
|
||||
task_rq_unlock(rq, p, &flags);
|
||||
return 0;
|
||||
}
|
||||
if (new_effective_prio == oldprio)
|
||||
queue_flags &= ~DEQUEUE_MOVE;
|
||||
}
|
||||
|
||||
queued = task_on_rq_queued(p);
|
||||
running = task_current(rq, p);
|
||||
if (queued)
|
||||
dequeue_task(rq, p, DEQUEUE_SAVE);
|
||||
dequeue_task(rq, p, queue_flags);
|
||||
if (running)
|
||||
put_prev_task(rq, p);
|
||||
|
||||
|
@ -4098,15 +4024,14 @@ static int __sched_setscheduler(struct task_struct *p,
|
|||
if (running)
|
||||
p->sched_class->set_curr_task(rq);
|
||||
if (queued) {
|
||||
int enqueue_flags = ENQUEUE_RESTORE;
|
||||
/*
|
||||
* We enqueue to tail when the priority of a task is
|
||||
* increased (user space view).
|
||||
*/
|
||||
if (oldprio <= p->prio)
|
||||
enqueue_flags |= ENQUEUE_HEAD;
|
||||
if (oldprio < p->prio)
|
||||
queue_flags |= ENQUEUE_HEAD;
|
||||
|
||||
enqueue_task(rq, p, enqueue_flags);
|
||||
enqueue_task(rq, p, queue_flags);
|
||||
}
|
||||
|
||||
check_class_changed(rq, p, prev_class, oldprio);
|
||||
|
@ -5408,183 +5333,6 @@ static void migrate_tasks(struct rq *dead_rq)
|
|||
}
|
||||
#endif /* CONFIG_HOTPLUG_CPU */
|
||||
|
||||
#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
|
||||
|
||||
static struct ctl_table sd_ctl_dir[] = {
|
||||
{
|
||||
.procname = "sched_domain",
|
||||
.mode = 0555,
|
||||
},
|
||||
{}
|
||||
};
|
||||
|
||||
static struct ctl_table sd_ctl_root[] = {
|
||||
{
|
||||
.procname = "kernel",
|
||||
.mode = 0555,
|
||||
.child = sd_ctl_dir,
|
||||
},
|
||||
{}
|
||||
};
|
||||
|
||||
static struct ctl_table *sd_alloc_ctl_entry(int n)
|
||||
{
|
||||
struct ctl_table *entry =
|
||||
kcalloc(n, sizeof(struct ctl_table), GFP_KERNEL);
|
||||
|
||||
return entry;
|
||||
}
|
||||
|
||||
static void sd_free_ctl_entry(struct ctl_table **tablep)
|
||||
{
|
||||
struct ctl_table *entry;
|
||||
|
||||
/*
|
||||
* In the intermediate directories, both the child directory and
|
||||
* procname are dynamically allocated and could fail but the mode
|
||||
* will always be set. In the lowest directory the names are
|
||||
* static strings and all have proc handlers.
|
||||
*/
|
||||
for (entry = *tablep; entry->mode; entry++) {
|
||||
if (entry->child)
|
||||
sd_free_ctl_entry(&entry->child);
|
||||
if (entry->proc_handler == NULL)
|
||||
kfree(entry->procname);
|
||||
}
|
||||
|
||||
kfree(*tablep);
|
||||
*tablep = NULL;
|
||||
}
|
||||
|
||||
static int min_load_idx = 0;
|
||||
static int max_load_idx = CPU_LOAD_IDX_MAX-1;
|
||||
|
||||
static void
|
||||
set_table_entry(struct ctl_table *entry,
|
||||
const char *procname, void *data, int maxlen,
|
||||
umode_t mode, proc_handler *proc_handler,
|
||||
bool load_idx)
|
||||
{
|
||||
entry->procname = procname;
|
||||
entry->data = data;
|
||||
entry->maxlen = maxlen;
|
||||
entry->mode = mode;
|
||||
entry->proc_handler = proc_handler;
|
||||
|
||||
if (load_idx) {
|
||||
entry->extra1 = &min_load_idx;
|
||||
entry->extra2 = &max_load_idx;
|
||||
}
|
||||
}
|
||||
|
||||
static struct ctl_table *
|
||||
sd_alloc_ctl_domain_table(struct sched_domain *sd)
|
||||
{
|
||||
struct ctl_table *table = sd_alloc_ctl_entry(14);
|
||||
|
||||
if (table == NULL)
|
||||
return NULL;
|
||||
|
||||
set_table_entry(&table[0], "min_interval", &sd->min_interval,
|
||||
sizeof(long), 0644, proc_doulongvec_minmax, false);
|
||||
set_table_entry(&table[1], "max_interval", &sd->max_interval,
|
||||
sizeof(long), 0644, proc_doulongvec_minmax, false);
|
||||
set_table_entry(&table[2], "busy_idx", &sd->busy_idx,
|
||||
sizeof(int), 0644, proc_dointvec_minmax, true);
|
||||
set_table_entry(&table[3], "idle_idx", &sd->idle_idx,
|
||||
sizeof(int), 0644, proc_dointvec_minmax, true);
|
||||
set_table_entry(&table[4], "newidle_idx", &sd->newidle_idx,
|
||||
sizeof(int), 0644, proc_dointvec_minmax, true);
|
||||
set_table_entry(&table[5], "wake_idx", &sd->wake_idx,
|
||||
sizeof(int), 0644, proc_dointvec_minmax, true);
|
||||
set_table_entry(&table[6], "forkexec_idx", &sd->forkexec_idx,
|
||||
sizeof(int), 0644, proc_dointvec_minmax, true);
|
||||
set_table_entry(&table[7], "busy_factor", &sd->busy_factor,
|
||||
sizeof(int), 0644, proc_dointvec_minmax, false);
|
||||
set_table_entry(&table[8], "imbalance_pct", &sd->imbalance_pct,
|
||||
sizeof(int), 0644, proc_dointvec_minmax, false);
|
||||
set_table_entry(&table[9], "cache_nice_tries",
|
||||
&sd->cache_nice_tries,
|
||||
sizeof(int), 0644, proc_dointvec_minmax, false);
|
||||
set_table_entry(&table[10], "flags", &sd->flags,
|
||||
sizeof(int), 0644, proc_dointvec_minmax, false);
|
||||
set_table_entry(&table[11], "max_newidle_lb_cost",
|
||||
&sd->max_newidle_lb_cost,
|
||||
sizeof(long), 0644, proc_doulongvec_minmax, false);
|
||||
set_table_entry(&table[12], "name", sd->name,
|
||||
CORENAME_MAX_SIZE, 0444, proc_dostring, false);
|
||||
/* &table[13] is terminator */
|
||||
|
||||
return table;
|
||||
}
|
||||
|
||||
static struct ctl_table *sd_alloc_ctl_cpu_table(int cpu)
|
||||
{
|
||||
struct ctl_table *entry, *table;
|
||||
struct sched_domain *sd;
|
||||
int domain_num = 0, i;
|
||||
char buf[32];
|
||||
|
||||
for_each_domain(cpu, sd)
|
||||
domain_num++;
|
||||
entry = table = sd_alloc_ctl_entry(domain_num + 1);
|
||||
if (table == NULL)
|
||||
return NULL;
|
||||
|
||||
i = 0;
|
||||
for_each_domain(cpu, sd) {
|
||||
snprintf(buf, 32, "domain%d", i);
|
||||
entry->procname = kstrdup(buf, GFP_KERNEL);
|
||||
entry->mode = 0555;
|
||||
entry->child = sd_alloc_ctl_domain_table(sd);
|
||||
entry++;
|
||||
i++;
|
||||
}
|
||||
return table;
|
||||
}
|
||||
|
||||
static struct ctl_table_header *sd_sysctl_header;
|
||||
static void register_sched_domain_sysctl(void)
|
||||
{
|
||||
int i, cpu_num = num_possible_cpus();
|
||||
struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1);
|
||||
char buf[32];
|
||||
|
||||
WARN_ON(sd_ctl_dir[0].child);
|
||||
sd_ctl_dir[0].child = entry;
|
||||
|
||||
if (entry == NULL)
|
||||
return;
|
||||
|
||||
for_each_possible_cpu(i) {
|
||||
snprintf(buf, 32, "cpu%d", i);
|
||||
entry->procname = kstrdup(buf, GFP_KERNEL);
|
||||
entry->mode = 0555;
|
||||
entry->child = sd_alloc_ctl_cpu_table(i);
|
||||
entry++;
|
||||
}
|
||||
|
||||
WARN_ON(sd_sysctl_header);
|
||||
sd_sysctl_header = register_sysctl_table(sd_ctl_root);
|
||||
}
|
||||
|
||||
/* may be called multiple times per register */
|
||||
static void unregister_sched_domain_sysctl(void)
|
||||
{
|
||||
unregister_sysctl_table(sd_sysctl_header);
|
||||
sd_sysctl_header = NULL;
|
||||
if (sd_ctl_dir[0].child)
|
||||
sd_free_ctl_entry(&sd_ctl_dir[0].child);
|
||||
}
|
||||
#else
|
||||
static void register_sched_domain_sysctl(void)
|
||||
{
|
||||
}
|
||||
static void unregister_sched_domain_sysctl(void)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_SCHED_DEBUG && CONFIG_SYSCTL */
|
||||
|
||||
static void set_rq_online(struct rq *rq)
|
||||
{
|
||||
if (!rq->online) {
|
||||
|
@ -6176,11 +5924,16 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
|
|||
/* Setup the mask of cpus configured for isolated domains */
|
||||
static int __init isolated_cpu_setup(char *str)
|
||||
{
|
||||
int ret;
|
||||
|
||||
alloc_bootmem_cpumask_var(&cpu_isolated_map);
|
||||
cpulist_parse(str, cpu_isolated_map);
|
||||
ret = cpulist_parse(str, cpu_isolated_map);
|
||||
if (ret) {
|
||||
pr_err("sched: Error, all isolcpus= values must be between 0 and %d\n", nr_cpu_ids);
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
__setup("isolcpus=", isolated_cpu_setup);
|
||||
|
||||
struct s_data {
|
||||
|
@ -7863,11 +7616,9 @@ void sched_destroy_group(struct task_group *tg)
|
|||
void sched_offline_group(struct task_group *tg)
|
||||
{
|
||||
unsigned long flags;
|
||||
int i;
|
||||
|
||||
/* end participation in shares distribution */
|
||||
for_each_possible_cpu(i)
|
||||
unregister_fair_sched_group(tg, i);
|
||||
unregister_fair_sched_group(tg);
|
||||
|
||||
spin_lock_irqsave(&task_group_lock, flags);
|
||||
list_del_rcu(&tg->list);
|
||||
|
@ -7893,7 +7644,7 @@ void sched_move_task(struct task_struct *tsk)
|
|||
queued = task_on_rq_queued(tsk);
|
||||
|
||||
if (queued)
|
||||
dequeue_task(rq, tsk, DEQUEUE_SAVE);
|
||||
dequeue_task(rq, tsk, DEQUEUE_SAVE | DEQUEUE_MOVE);
|
||||
if (unlikely(running))
|
||||
put_prev_task(rq, tsk);
|
||||
|
||||
|
@ -7917,7 +7668,7 @@ void sched_move_task(struct task_struct *tsk)
|
|||
if (unlikely(running))
|
||||
tsk->sched_class->set_curr_task(rq);
|
||||
if (queued)
|
||||
enqueue_task(rq, tsk, ENQUEUE_RESTORE);
|
||||
enqueue_task(rq, tsk, ENQUEUE_RESTORE | ENQUEUE_MOVE);
|
||||
|
||||
task_rq_unlock(rq, tsk, &flags);
|
||||
}
|
||||
|
|
|
@ -262,21 +262,21 @@ static __always_inline bool steal_account_process_tick(void)
|
|||
#ifdef CONFIG_PARAVIRT
|
||||
if (static_key_false(¶virt_steal_enabled)) {
|
||||
u64 steal;
|
||||
cputime_t steal_ct;
|
||||
unsigned long steal_jiffies;
|
||||
|
||||
steal = paravirt_steal_clock(smp_processor_id());
|
||||
steal -= this_rq()->prev_steal_time;
|
||||
|
||||
/*
|
||||
* cputime_t may be less precise than nsecs (eg: if it's
|
||||
* based on jiffies). Lets cast the result to cputime
|
||||
* steal is in nsecs but our caller is expecting steal
|
||||
* time in jiffies. Lets cast the result to jiffies
|
||||
* granularity and account the rest on the next rounds.
|
||||
*/
|
||||
steal_ct = nsecs_to_cputime(steal);
|
||||
this_rq()->prev_steal_time += cputime_to_nsecs(steal_ct);
|
||||
steal_jiffies = nsecs_to_jiffies(steal);
|
||||
this_rq()->prev_steal_time += jiffies_to_nsecs(steal_jiffies);
|
||||
|
||||
account_steal_time(steal_ct);
|
||||
return steal_ct;
|
||||
account_steal_time(jiffies_to_cputime(steal_jiffies));
|
||||
return steal_jiffies;
|
||||
}
|
||||
#endif
|
||||
return false;
|
||||
|
@ -668,26 +668,25 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime
|
|||
#endif /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
|
||||
|
||||
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
|
||||
static unsigned long long vtime_delta(struct task_struct *tsk)
|
||||
static cputime_t vtime_delta(struct task_struct *tsk)
|
||||
{
|
||||
unsigned long long clock;
|
||||
unsigned long now = READ_ONCE(jiffies);
|
||||
|
||||
clock = local_clock();
|
||||
if (clock < tsk->vtime_snap)
|
||||
if (time_before(now, (unsigned long)tsk->vtime_snap))
|
||||
return 0;
|
||||
|
||||
return clock - tsk->vtime_snap;
|
||||
return jiffies_to_cputime(now - tsk->vtime_snap);
|
||||
}
|
||||
|
||||
static cputime_t get_vtime_delta(struct task_struct *tsk)
|
||||
{
|
||||
unsigned long long delta = vtime_delta(tsk);
|
||||
unsigned long now = READ_ONCE(jiffies);
|
||||
unsigned long delta = now - tsk->vtime_snap;
|
||||
|
||||
WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_INACTIVE);
|
||||
tsk->vtime_snap += delta;
|
||||
tsk->vtime_snap = now;
|
||||
|
||||
/* CHECKME: always safe to convert nsecs to cputime? */
|
||||
return nsecs_to_cputime(delta);
|
||||
return jiffies_to_cputime(delta);
|
||||
}
|
||||
|
||||
static void __vtime_account_system(struct task_struct *tsk)
|
||||
|
@ -699,6 +698,9 @@ static void __vtime_account_system(struct task_struct *tsk)
|
|||
|
||||
void vtime_account_system(struct task_struct *tsk)
|
||||
{
|
||||
if (!vtime_delta(tsk))
|
||||
return;
|
||||
|
||||
write_seqcount_begin(&tsk->vtime_seqcount);
|
||||
__vtime_account_system(tsk);
|
||||
write_seqcount_end(&tsk->vtime_seqcount);
|
||||
|
@ -707,7 +709,8 @@ void vtime_account_system(struct task_struct *tsk)
|
|||
void vtime_gen_account_irq_exit(struct task_struct *tsk)
|
||||
{
|
||||
write_seqcount_begin(&tsk->vtime_seqcount);
|
||||
__vtime_account_system(tsk);
|
||||
if (vtime_delta(tsk))
|
||||
__vtime_account_system(tsk);
|
||||
if (context_tracking_in_user())
|
||||
tsk->vtime_snap_whence = VTIME_USER;
|
||||
write_seqcount_end(&tsk->vtime_seqcount);
|
||||
|
@ -718,16 +721,19 @@ void vtime_account_user(struct task_struct *tsk)
|
|||
cputime_t delta_cpu;
|
||||
|
||||
write_seqcount_begin(&tsk->vtime_seqcount);
|
||||
delta_cpu = get_vtime_delta(tsk);
|
||||
tsk->vtime_snap_whence = VTIME_SYS;
|
||||
account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu));
|
||||
if (vtime_delta(tsk)) {
|
||||
delta_cpu = get_vtime_delta(tsk);
|
||||
account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu));
|
||||
}
|
||||
write_seqcount_end(&tsk->vtime_seqcount);
|
||||
}
|
||||
|
||||
void vtime_user_enter(struct task_struct *tsk)
|
||||
{
|
||||
write_seqcount_begin(&tsk->vtime_seqcount);
|
||||
__vtime_account_system(tsk);
|
||||
if (vtime_delta(tsk))
|
||||
__vtime_account_system(tsk);
|
||||
tsk->vtime_snap_whence = VTIME_USER;
|
||||
write_seqcount_end(&tsk->vtime_seqcount);
|
||||
}
|
||||
|
@ -742,7 +748,8 @@ void vtime_guest_enter(struct task_struct *tsk)
|
|||
* that can thus safely catch up with a tickless delta.
|
||||
*/
|
||||
write_seqcount_begin(&tsk->vtime_seqcount);
|
||||
__vtime_account_system(tsk);
|
||||
if (vtime_delta(tsk))
|
||||
__vtime_account_system(tsk);
|
||||
current->flags |= PF_VCPU;
|
||||
write_seqcount_end(&tsk->vtime_seqcount);
|
||||
}
|
||||
|
@ -772,7 +779,7 @@ void arch_vtime_task_switch(struct task_struct *prev)
|
|||
|
||||
write_seqcount_begin(¤t->vtime_seqcount);
|
||||
current->vtime_snap_whence = VTIME_SYS;
|
||||
current->vtime_snap = sched_clock_cpu(smp_processor_id());
|
||||
current->vtime_snap = jiffies;
|
||||
write_seqcount_end(¤t->vtime_seqcount);
|
||||
}
|
||||
|
||||
|
@ -783,7 +790,7 @@ void vtime_init_idle(struct task_struct *t, int cpu)
|
|||
local_irq_save(flags);
|
||||
write_seqcount_begin(&t->vtime_seqcount);
|
||||
t->vtime_snap_whence = VTIME_SYS;
|
||||
t->vtime_snap = sched_clock_cpu(cpu);
|
||||
t->vtime_snap = jiffies;
|
||||
write_seqcount_end(&t->vtime_seqcount);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
|
|
@ -352,7 +352,15 @@ static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se,
|
|||
struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
|
||||
struct rq *rq = rq_of_dl_rq(dl_rq);
|
||||
|
||||
WARN_ON(!dl_se->dl_new || dl_se->dl_throttled);
|
||||
WARN_ON(dl_time_before(rq_clock(rq), dl_se->deadline));
|
||||
|
||||
/*
|
||||
* We are racing with the deadline timer. So, do nothing because
|
||||
* the deadline timer handler will take care of properly recharging
|
||||
* the runtime and postponing the deadline
|
||||
*/
|
||||
if (dl_se->dl_throttled)
|
||||
return;
|
||||
|
||||
/*
|
||||
* We use the regular wall clock time to set deadlines in the
|
||||
|
@ -361,7 +369,6 @@ static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se,
|
|||
*/
|
||||
dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
|
||||
dl_se->runtime = pi_se->dl_runtime;
|
||||
dl_se->dl_new = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -399,6 +406,9 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se,
|
|||
dl_se->runtime = pi_se->dl_runtime;
|
||||
}
|
||||
|
||||
if (dl_se->dl_yielded && dl_se->runtime > 0)
|
||||
dl_se->runtime = 0;
|
||||
|
||||
/*
|
||||
* We keep moving the deadline away until we get some
|
||||
* available runtime for the entity. This ensures correct
|
||||
|
@ -500,15 +510,6 @@ static void update_dl_entity(struct sched_dl_entity *dl_se,
|
|||
struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
|
||||
struct rq *rq = rq_of_dl_rq(dl_rq);
|
||||
|
||||
/*
|
||||
* The arrival of a new instance needs special treatment, i.e.,
|
||||
* the actual scheduling parameters have to be "renewed".
|
||||
*/
|
||||
if (dl_se->dl_new) {
|
||||
setup_new_dl_entity(dl_se, pi_se);
|
||||
return;
|
||||
}
|
||||
|
||||
if (dl_time_before(dl_se->deadline, rq_clock(rq)) ||
|
||||
dl_entity_overflow(dl_se, pi_se, rq_clock(rq))) {
|
||||
dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
|
||||
|
@ -604,16 +605,6 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
|
|||
goto unlock;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is possible if switched_from_dl() raced against a running
|
||||
* callback that took the above !dl_task() path and we've since then
|
||||
* switched back into SCHED_DEADLINE.
|
||||
*
|
||||
* There's nothing to do except drop our task reference.
|
||||
*/
|
||||
if (dl_se->dl_new)
|
||||
goto unlock;
|
||||
|
||||
/*
|
||||
* The task might have been boosted by someone else and might be in the
|
||||
* boosting/deboosting path, its not throttled.
|
||||
|
@ -735,8 +726,11 @@ static void update_curr_dl(struct rq *rq)
|
|||
* approach need further study.
|
||||
*/
|
||||
delta_exec = rq_clock_task(rq) - curr->se.exec_start;
|
||||
if (unlikely((s64)delta_exec <= 0))
|
||||
if (unlikely((s64)delta_exec <= 0)) {
|
||||
if (unlikely(dl_se->dl_yielded))
|
||||
goto throttle;
|
||||
return;
|
||||
}
|
||||
|
||||
schedstat_set(curr->se.statistics.exec_max,
|
||||
max(curr->se.statistics.exec_max, delta_exec));
|
||||
|
@ -749,8 +743,10 @@ static void update_curr_dl(struct rq *rq)
|
|||
|
||||
sched_rt_avg_update(rq, delta_exec);
|
||||
|
||||
dl_se->runtime -= dl_se->dl_yielded ? 0 : delta_exec;
|
||||
if (dl_runtime_exceeded(dl_se)) {
|
||||
dl_se->runtime -= delta_exec;
|
||||
|
||||
throttle:
|
||||
if (dl_runtime_exceeded(dl_se) || dl_se->dl_yielded) {
|
||||
dl_se->dl_throttled = 1;
|
||||
__dequeue_task_dl(rq, curr, 0);
|
||||
if (unlikely(dl_se->dl_boosted || !start_dl_timer(curr)))
|
||||
|
@ -917,7 +913,7 @@ enqueue_dl_entity(struct sched_dl_entity *dl_se,
|
|||
* parameters of the task might need updating. Otherwise,
|
||||
* we want a replenishment of its runtime.
|
||||
*/
|
||||
if (dl_se->dl_new || flags & ENQUEUE_WAKEUP)
|
||||
if (flags & ENQUEUE_WAKEUP)
|
||||
update_dl_entity(dl_se, pi_se);
|
||||
else if (flags & ENQUEUE_REPLENISH)
|
||||
replenish_dl_entity(dl_se, pi_se);
|
||||
|
@ -994,18 +990,14 @@ static void dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags)
|
|||
*/
|
||||
static void yield_task_dl(struct rq *rq)
|
||||
{
|
||||
struct task_struct *p = rq->curr;
|
||||
|
||||
/*
|
||||
* We make the task go to sleep until its current deadline by
|
||||
* forcing its runtime to zero. This way, update_curr_dl() stops
|
||||
* it and the bandwidth timer will wake it up and will give it
|
||||
* new scheduling parameters (thanks to dl_yielded=1).
|
||||
*/
|
||||
if (p->dl.runtime > 0) {
|
||||
rq->curr->dl.dl_yielded = 1;
|
||||
p->dl.runtime = 0;
|
||||
}
|
||||
rq->curr->dl.dl_yielded = 1;
|
||||
|
||||
update_rq_clock(rq);
|
||||
update_curr_dl(rq);
|
||||
/*
|
||||
|
@ -1722,6 +1714,9 @@ static void switched_from_dl(struct rq *rq, struct task_struct *p)
|
|||
*/
|
||||
static void switched_to_dl(struct rq *rq, struct task_struct *p)
|
||||
{
|
||||
if (dl_time_before(p->dl.deadline, rq_clock(rq)))
|
||||
setup_new_dl_entity(&p->dl, &p->dl);
|
||||
|
||||
if (task_on_rq_queued(p) && rq->curr != p) {
|
||||
#ifdef CONFIG_SMP
|
||||
if (p->nr_cpus_allowed > 1 && rq->dl.overloaded)
|
||||
|
@ -1768,8 +1763,7 @@ static void prio_changed_dl(struct rq *rq, struct task_struct *p,
|
|||
*/
|
||||
resched_curr(rq);
|
||||
#endif /* CONFIG_SMP */
|
||||
} else
|
||||
switched_to_dl(rq, p);
|
||||
}
|
||||
}
|
||||
|
||||
const struct sched_class dl_sched_class = {
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#include <linux/kallsyms.h>
|
||||
#include <linux/utsname.h>
|
||||
#include <linux/mempolicy.h>
|
||||
#include <linux/debugfs.h>
|
||||
|
||||
#include "sched.h"
|
||||
|
||||
|
@ -58,6 +59,309 @@ static unsigned long nsec_low(unsigned long long nsec)
|
|||
|
||||
#define SPLIT_NS(x) nsec_high(x), nsec_low(x)
|
||||
|
||||
#define SCHED_FEAT(name, enabled) \
|
||||
#name ,
|
||||
|
||||
static const char * const sched_feat_names[] = {
|
||||
#include "features.h"
|
||||
};
|
||||
|
||||
#undef SCHED_FEAT
|
||||
|
||||
static int sched_feat_show(struct seq_file *m, void *v)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < __SCHED_FEAT_NR; i++) {
|
||||
if (!(sysctl_sched_features & (1UL << i)))
|
||||
seq_puts(m, "NO_");
|
||||
seq_printf(m, "%s ", sched_feat_names[i]);
|
||||
}
|
||||
seq_puts(m, "\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef HAVE_JUMP_LABEL
|
||||
|
||||
#define jump_label_key__true STATIC_KEY_INIT_TRUE
|
||||
#define jump_label_key__false STATIC_KEY_INIT_FALSE
|
||||
|
||||
#define SCHED_FEAT(name, enabled) \
|
||||
jump_label_key__##enabled ,
|
||||
|
||||
struct static_key sched_feat_keys[__SCHED_FEAT_NR] = {
|
||||
#include "features.h"
|
||||
};
|
||||
|
||||
#undef SCHED_FEAT
|
||||
|
||||
static void sched_feat_disable(int i)
|
||||
{
|
||||
static_key_disable(&sched_feat_keys[i]);
|
||||
}
|
||||
|
||||
static void sched_feat_enable(int i)
|
||||
{
|
||||
static_key_enable(&sched_feat_keys[i]);
|
||||
}
|
||||
#else
|
||||
static void sched_feat_disable(int i) { };
|
||||
static void sched_feat_enable(int i) { };
|
||||
#endif /* HAVE_JUMP_LABEL */
|
||||
|
||||
static int sched_feat_set(char *cmp)
|
||||
{
|
||||
int i;
|
||||
int neg = 0;
|
||||
|
||||
if (strncmp(cmp, "NO_", 3) == 0) {
|
||||
neg = 1;
|
||||
cmp += 3;
|
||||
}
|
||||
|
||||
for (i = 0; i < __SCHED_FEAT_NR; i++) {
|
||||
if (strcmp(cmp, sched_feat_names[i]) == 0) {
|
||||
if (neg) {
|
||||
sysctl_sched_features &= ~(1UL << i);
|
||||
sched_feat_disable(i);
|
||||
} else {
|
||||
sysctl_sched_features |= (1UL << i);
|
||||
sched_feat_enable(i);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
sched_feat_write(struct file *filp, const char __user *ubuf,
|
||||
size_t cnt, loff_t *ppos)
|
||||
{
|
||||
char buf[64];
|
||||
char *cmp;
|
||||
int i;
|
||||
struct inode *inode;
|
||||
|
||||
if (cnt > 63)
|
||||
cnt = 63;
|
||||
|
||||
if (copy_from_user(&buf, ubuf, cnt))
|
||||
return -EFAULT;
|
||||
|
||||
buf[cnt] = 0;
|
||||
cmp = strstrip(buf);
|
||||
|
||||
/* Ensure the static_key remains in a consistent state */
|
||||
inode = file_inode(filp);
|
||||
inode_lock(inode);
|
||||
i = sched_feat_set(cmp);
|
||||
inode_unlock(inode);
|
||||
if (i == __SCHED_FEAT_NR)
|
||||
return -EINVAL;
|
||||
|
||||
*ppos += cnt;
|
||||
|
||||
return cnt;
|
||||
}
|
||||
|
||||
static int sched_feat_open(struct inode *inode, struct file *filp)
|
||||
{
|
||||
return single_open(filp, sched_feat_show, NULL);
|
||||
}
|
||||
|
||||
static const struct file_operations sched_feat_fops = {
|
||||
.open = sched_feat_open,
|
||||
.write = sched_feat_write,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
};
|
||||
|
||||
static __init int sched_init_debug(void)
|
||||
{
|
||||
debugfs_create_file("sched_features", 0644, NULL, NULL,
|
||||
&sched_feat_fops);
|
||||
|
||||
return 0;
|
||||
}
|
||||
late_initcall(sched_init_debug);
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
|
||||
#ifdef CONFIG_SYSCTL
|
||||
|
||||
static struct ctl_table sd_ctl_dir[] = {
|
||||
{
|
||||
.procname = "sched_domain",
|
||||
.mode = 0555,
|
||||
},
|
||||
{}
|
||||
};
|
||||
|
||||
static struct ctl_table sd_ctl_root[] = {
|
||||
{
|
||||
.procname = "kernel",
|
||||
.mode = 0555,
|
||||
.child = sd_ctl_dir,
|
||||
},
|
||||
{}
|
||||
};
|
||||
|
||||
static struct ctl_table *sd_alloc_ctl_entry(int n)
|
||||
{
|
||||
struct ctl_table *entry =
|
||||
kcalloc(n, sizeof(struct ctl_table), GFP_KERNEL);
|
||||
|
||||
return entry;
|
||||
}
|
||||
|
||||
static void sd_free_ctl_entry(struct ctl_table **tablep)
|
||||
{
|
||||
struct ctl_table *entry;
|
||||
|
||||
/*
|
||||
* In the intermediate directories, both the child directory and
|
||||
* procname are dynamically allocated and could fail but the mode
|
||||
* will always be set. In the lowest directory the names are
|
||||
* static strings and all have proc handlers.
|
||||
*/
|
||||
for (entry = *tablep; entry->mode; entry++) {
|
||||
if (entry->child)
|
||||
sd_free_ctl_entry(&entry->child);
|
||||
if (entry->proc_handler == NULL)
|
||||
kfree(entry->procname);
|
||||
}
|
||||
|
||||
kfree(*tablep);
|
||||
*tablep = NULL;
|
||||
}
|
||||
|
||||
static int min_load_idx = 0;
|
||||
static int max_load_idx = CPU_LOAD_IDX_MAX-1;
|
||||
|
||||
static void
|
||||
set_table_entry(struct ctl_table *entry,
|
||||
const char *procname, void *data, int maxlen,
|
||||
umode_t mode, proc_handler *proc_handler,
|
||||
bool load_idx)
|
||||
{
|
||||
entry->procname = procname;
|
||||
entry->data = data;
|
||||
entry->maxlen = maxlen;
|
||||
entry->mode = mode;
|
||||
entry->proc_handler = proc_handler;
|
||||
|
||||
if (load_idx) {
|
||||
entry->extra1 = &min_load_idx;
|
||||
entry->extra2 = &max_load_idx;
|
||||
}
|
||||
}
|
||||
|
||||
static struct ctl_table *
|
||||
sd_alloc_ctl_domain_table(struct sched_domain *sd)
|
||||
{
|
||||
struct ctl_table *table = sd_alloc_ctl_entry(14);
|
||||
|
||||
if (table == NULL)
|
||||
return NULL;
|
||||
|
||||
set_table_entry(&table[0], "min_interval", &sd->min_interval,
|
||||
sizeof(long), 0644, proc_doulongvec_minmax, false);
|
||||
set_table_entry(&table[1], "max_interval", &sd->max_interval,
|
||||
sizeof(long), 0644, proc_doulongvec_minmax, false);
|
||||
set_table_entry(&table[2], "busy_idx", &sd->busy_idx,
|
||||
sizeof(int), 0644, proc_dointvec_minmax, true);
|
||||
set_table_entry(&table[3], "idle_idx", &sd->idle_idx,
|
||||
sizeof(int), 0644, proc_dointvec_minmax, true);
|
||||
set_table_entry(&table[4], "newidle_idx", &sd->newidle_idx,
|
||||
sizeof(int), 0644, proc_dointvec_minmax, true);
|
||||
set_table_entry(&table[5], "wake_idx", &sd->wake_idx,
|
||||
sizeof(int), 0644, proc_dointvec_minmax, true);
|
||||
set_table_entry(&table[6], "forkexec_idx", &sd->forkexec_idx,
|
||||
sizeof(int), 0644, proc_dointvec_minmax, true);
|
||||
set_table_entry(&table[7], "busy_factor", &sd->busy_factor,
|
||||
sizeof(int), 0644, proc_dointvec_minmax, false);
|
||||
set_table_entry(&table[8], "imbalance_pct", &sd->imbalance_pct,
|
||||
sizeof(int), 0644, proc_dointvec_minmax, false);
|
||||
set_table_entry(&table[9], "cache_nice_tries",
|
||||
&sd->cache_nice_tries,
|
||||
sizeof(int), 0644, proc_dointvec_minmax, false);
|
||||
set_table_entry(&table[10], "flags", &sd->flags,
|
||||
sizeof(int), 0644, proc_dointvec_minmax, false);
|
||||
set_table_entry(&table[11], "max_newidle_lb_cost",
|
||||
&sd->max_newidle_lb_cost,
|
||||
sizeof(long), 0644, proc_doulongvec_minmax, false);
|
||||
set_table_entry(&table[12], "name", sd->name,
|
||||
CORENAME_MAX_SIZE, 0444, proc_dostring, false);
|
||||
/* &table[13] is terminator */
|
||||
|
||||
return table;
|
||||
}
|
||||
|
||||
static struct ctl_table *sd_alloc_ctl_cpu_table(int cpu)
|
||||
{
|
||||
struct ctl_table *entry, *table;
|
||||
struct sched_domain *sd;
|
||||
int domain_num = 0, i;
|
||||
char buf[32];
|
||||
|
||||
for_each_domain(cpu, sd)
|
||||
domain_num++;
|
||||
entry = table = sd_alloc_ctl_entry(domain_num + 1);
|
||||
if (table == NULL)
|
||||
return NULL;
|
||||
|
||||
i = 0;
|
||||
for_each_domain(cpu, sd) {
|
||||
snprintf(buf, 32, "domain%d", i);
|
||||
entry->procname = kstrdup(buf, GFP_KERNEL);
|
||||
entry->mode = 0555;
|
||||
entry->child = sd_alloc_ctl_domain_table(sd);
|
||||
entry++;
|
||||
i++;
|
||||
}
|
||||
return table;
|
||||
}
|
||||
|
||||
static struct ctl_table_header *sd_sysctl_header;
|
||||
void register_sched_domain_sysctl(void)
|
||||
{
|
||||
int i, cpu_num = num_possible_cpus();
|
||||
struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1);
|
||||
char buf[32];
|
||||
|
||||
WARN_ON(sd_ctl_dir[0].child);
|
||||
sd_ctl_dir[0].child = entry;
|
||||
|
||||
if (entry == NULL)
|
||||
return;
|
||||
|
||||
for_each_possible_cpu(i) {
|
||||
snprintf(buf, 32, "cpu%d", i);
|
||||
entry->procname = kstrdup(buf, GFP_KERNEL);
|
||||
entry->mode = 0555;
|
||||
entry->child = sd_alloc_ctl_cpu_table(i);
|
||||
entry++;
|
||||
}
|
||||
|
||||
WARN_ON(sd_sysctl_header);
|
||||
sd_sysctl_header = register_sysctl_table(sd_ctl_root);
|
||||
}
|
||||
|
||||
/* may be called multiple times per register */
|
||||
void unregister_sched_domain_sysctl(void)
|
||||
{
|
||||
unregister_sysctl_table(sd_sysctl_header);
|
||||
sd_sysctl_header = NULL;
|
||||
if (sd_ctl_dir[0].child)
|
||||
sd_free_ctl_entry(&sd_ctl_dir[0].child);
|
||||
}
|
||||
#endif /* CONFIG_SYSCTL */
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group *tg)
|
||||
{
|
||||
|
@ -75,16 +379,18 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group
|
|||
PN(se->vruntime);
|
||||
PN(se->sum_exec_runtime);
|
||||
#ifdef CONFIG_SCHEDSTATS
|
||||
PN(se->statistics.wait_start);
|
||||
PN(se->statistics.sleep_start);
|
||||
PN(se->statistics.block_start);
|
||||
PN(se->statistics.sleep_max);
|
||||
PN(se->statistics.block_max);
|
||||
PN(se->statistics.exec_max);
|
||||
PN(se->statistics.slice_max);
|
||||
PN(se->statistics.wait_max);
|
||||
PN(se->statistics.wait_sum);
|
||||
P(se->statistics.wait_count);
|
||||
if (schedstat_enabled()) {
|
||||
PN(se->statistics.wait_start);
|
||||
PN(se->statistics.sleep_start);
|
||||
PN(se->statistics.block_start);
|
||||
PN(se->statistics.sleep_max);
|
||||
PN(se->statistics.block_max);
|
||||
PN(se->statistics.exec_max);
|
||||
PN(se->statistics.slice_max);
|
||||
PN(se->statistics.wait_max);
|
||||
PN(se->statistics.wait_sum);
|
||||
P(se->statistics.wait_count);
|
||||
}
|
||||
#endif
|
||||
P(se->load.weight);
|
||||
#ifdef CONFIG_SMP
|
||||
|
@ -122,10 +428,12 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
|
|||
(long long)(p->nvcsw + p->nivcsw),
|
||||
p->prio);
|
||||
#ifdef CONFIG_SCHEDSTATS
|
||||
SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
|
||||
SPLIT_NS(p->se.statistics.wait_sum),
|
||||
SPLIT_NS(p->se.sum_exec_runtime),
|
||||
SPLIT_NS(p->se.statistics.sum_sleep_runtime));
|
||||
if (schedstat_enabled()) {
|
||||
SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
|
||||
SPLIT_NS(p->se.statistics.wait_sum),
|
||||
SPLIT_NS(p->se.sum_exec_runtime),
|
||||
SPLIT_NS(p->se.statistics.sum_sleep_runtime));
|
||||
}
|
||||
#else
|
||||
SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
|
||||
0LL, 0L,
|
||||
|
@ -258,8 +566,17 @@ void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
|
|||
|
||||
void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq)
|
||||
{
|
||||
struct dl_bw *dl_bw;
|
||||
|
||||
SEQ_printf(m, "\ndl_rq[%d]:\n", cpu);
|
||||
SEQ_printf(m, " .%-30s: %ld\n", "dl_nr_running", dl_rq->dl_nr_running);
|
||||
#ifdef CONFIG_SMP
|
||||
dl_bw = &cpu_rq(cpu)->rd->dl_bw;
|
||||
#else
|
||||
dl_bw = &dl_rq->dl_bw;
|
||||
#endif
|
||||
SEQ_printf(m, " .%-30s: %lld\n", "dl_bw->bw", dl_bw->bw);
|
||||
SEQ_printf(m, " .%-30s: %lld\n", "dl_bw->total_bw", dl_bw->total_bw);
|
||||
}
|
||||
|
||||
extern __read_mostly int sched_clock_running;
|
||||
|
@ -313,17 +630,18 @@ do { \
|
|||
#define P(n) SEQ_printf(m, " .%-30s: %d\n", #n, rq->n);
|
||||
#define P64(n) SEQ_printf(m, " .%-30s: %Ld\n", #n, rq->n);
|
||||
|
||||
P(yld_count);
|
||||
|
||||
P(sched_count);
|
||||
P(sched_goidle);
|
||||
#ifdef CONFIG_SMP
|
||||
P64(avg_idle);
|
||||
P64(max_idle_balance_cost);
|
||||
#endif
|
||||
|
||||
P(ttwu_count);
|
||||
P(ttwu_local);
|
||||
if (schedstat_enabled()) {
|
||||
P(yld_count);
|
||||
P(sched_count);
|
||||
P(sched_goidle);
|
||||
P(ttwu_count);
|
||||
P(ttwu_local);
|
||||
}
|
||||
|
||||
#undef P
|
||||
#undef P64
|
||||
|
@ -569,38 +887,39 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
|
|||
nr_switches = p->nvcsw + p->nivcsw;
|
||||
|
||||
#ifdef CONFIG_SCHEDSTATS
|
||||
PN(se.statistics.sum_sleep_runtime);
|
||||
PN(se.statistics.wait_start);
|
||||
PN(se.statistics.sleep_start);
|
||||
PN(se.statistics.block_start);
|
||||
PN(se.statistics.sleep_max);
|
||||
PN(se.statistics.block_max);
|
||||
PN(se.statistics.exec_max);
|
||||
PN(se.statistics.slice_max);
|
||||
PN(se.statistics.wait_max);
|
||||
PN(se.statistics.wait_sum);
|
||||
P(se.statistics.wait_count);
|
||||
PN(se.statistics.iowait_sum);
|
||||
P(se.statistics.iowait_count);
|
||||
P(se.nr_migrations);
|
||||
P(se.statistics.nr_migrations_cold);
|
||||
P(se.statistics.nr_failed_migrations_affine);
|
||||
P(se.statistics.nr_failed_migrations_running);
|
||||
P(se.statistics.nr_failed_migrations_hot);
|
||||
P(se.statistics.nr_forced_migrations);
|
||||
P(se.statistics.nr_wakeups);
|
||||
P(se.statistics.nr_wakeups_sync);
|
||||
P(se.statistics.nr_wakeups_migrate);
|
||||
P(se.statistics.nr_wakeups_local);
|
||||
P(se.statistics.nr_wakeups_remote);
|
||||
P(se.statistics.nr_wakeups_affine);
|
||||
P(se.statistics.nr_wakeups_affine_attempts);
|
||||
P(se.statistics.nr_wakeups_passive);
|
||||
P(se.statistics.nr_wakeups_idle);
|
||||
|
||||
{
|
||||
if (schedstat_enabled()) {
|
||||
u64 avg_atom, avg_per_cpu;
|
||||
|
||||
PN(se.statistics.sum_sleep_runtime);
|
||||
PN(se.statistics.wait_start);
|
||||
PN(se.statistics.sleep_start);
|
||||
PN(se.statistics.block_start);
|
||||
PN(se.statistics.sleep_max);
|
||||
PN(se.statistics.block_max);
|
||||
PN(se.statistics.exec_max);
|
||||
PN(se.statistics.slice_max);
|
||||
PN(se.statistics.wait_max);
|
||||
PN(se.statistics.wait_sum);
|
||||
P(se.statistics.wait_count);
|
||||
PN(se.statistics.iowait_sum);
|
||||
P(se.statistics.iowait_count);
|
||||
P(se.statistics.nr_migrations_cold);
|
||||
P(se.statistics.nr_failed_migrations_affine);
|
||||
P(se.statistics.nr_failed_migrations_running);
|
||||
P(se.statistics.nr_failed_migrations_hot);
|
||||
P(se.statistics.nr_forced_migrations);
|
||||
P(se.statistics.nr_wakeups);
|
||||
P(se.statistics.nr_wakeups_sync);
|
||||
P(se.statistics.nr_wakeups_migrate);
|
||||
P(se.statistics.nr_wakeups_local);
|
||||
P(se.statistics.nr_wakeups_remote);
|
||||
P(se.statistics.nr_wakeups_affine);
|
||||
P(se.statistics.nr_wakeups_affine_attempts);
|
||||
P(se.statistics.nr_wakeups_passive);
|
||||
P(se.statistics.nr_wakeups_idle);
|
||||
|
||||
avg_atom = p->se.sum_exec_runtime;
|
||||
if (nr_switches)
|
||||
avg_atom = div64_ul(avg_atom, nr_switches);
|
||||
|
|
|
@ -20,8 +20,8 @@
|
|||
* Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
|
||||
*/
|
||||
|
||||
#include <linux/latencytop.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/latencytop.h>
|
||||
#include <linux/cpumask.h>
|
||||
#include <linux/cpuidle.h>
|
||||
#include <linux/slab.h>
|
||||
|
@ -755,7 +755,9 @@ static void
|
|||
update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
{
|
||||
struct task_struct *p;
|
||||
u64 delta = rq_clock(rq_of(cfs_rq)) - se->statistics.wait_start;
|
||||
u64 delta;
|
||||
|
||||
delta = rq_clock(rq_of(cfs_rq)) - se->statistics.wait_start;
|
||||
|
||||
if (entity_is_task(se)) {
|
||||
p = task_of(se);
|
||||
|
@ -776,22 +778,12 @@ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
|||
se->statistics.wait_sum += delta;
|
||||
se->statistics.wait_start = 0;
|
||||
}
|
||||
#else
|
||||
static inline void
|
||||
update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void
|
||||
update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Task is being enqueued - update stats:
|
||||
*/
|
||||
static void update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
static inline void
|
||||
update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
{
|
||||
/*
|
||||
* Are we enqueueing a waiting task? (for current tasks
|
||||
|
@ -802,7 +794,7 @@ static void update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
|||
}
|
||||
|
||||
static inline void
|
||||
update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
||||
{
|
||||
/*
|
||||
* Mark the end of the wait period if dequeueing a
|
||||
|
@ -810,7 +802,40 @@ update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
|||
*/
|
||||
if (se != cfs_rq->curr)
|
||||
update_stats_wait_end(cfs_rq, se);
|
||||
|
||||
if (flags & DEQUEUE_SLEEP) {
|
||||
if (entity_is_task(se)) {
|
||||
struct task_struct *tsk = task_of(se);
|
||||
|
||||
if (tsk->state & TASK_INTERRUPTIBLE)
|
||||
se->statistics.sleep_start = rq_clock(rq_of(cfs_rq));
|
||||
if (tsk->state & TASK_UNINTERRUPTIBLE)
|
||||
se->statistics.block_start = rq_clock(rq_of(cfs_rq));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
#else
|
||||
static inline void
|
||||
update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void
|
||||
update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void
|
||||
update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void
|
||||
update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* We are picking a new current task - update its stats:
|
||||
|
@ -907,10 +932,11 @@ struct numa_group {
|
|||
spinlock_t lock; /* nr_tasks, tasks */
|
||||
int nr_tasks;
|
||||
pid_t gid;
|
||||
int active_nodes;
|
||||
|
||||
struct rcu_head rcu;
|
||||
nodemask_t active_nodes;
|
||||
unsigned long total_faults;
|
||||
unsigned long max_faults_cpu;
|
||||
/*
|
||||
* Faults_cpu is used to decide whether memory should move
|
||||
* towards the CPU. As a consequence, these stats are weighted
|
||||
|
@ -969,6 +995,18 @@ static inline unsigned long group_faults_cpu(struct numa_group *group, int nid)
|
|||
group->faults_cpu[task_faults_idx(NUMA_MEM, nid, 1)];
|
||||
}
|
||||
|
||||
/*
|
||||
* A node triggering more than 1/3 as many NUMA faults as the maximum is
|
||||
* considered part of a numa group's pseudo-interleaving set. Migrations
|
||||
* between these nodes are slowed down, to allow things to settle down.
|
||||
*/
|
||||
#define ACTIVE_NODE_FRACTION 3
|
||||
|
||||
static bool numa_is_active_node(int nid, struct numa_group *ng)
|
||||
{
|
||||
return group_faults_cpu(ng, nid) * ACTIVE_NODE_FRACTION > ng->max_faults_cpu;
|
||||
}
|
||||
|
||||
/* Handle placement on systems where not all nodes are directly connected. */
|
||||
static unsigned long score_nearby_nodes(struct task_struct *p, int nid,
|
||||
int maxdist, bool task)
|
||||
|
@ -1118,27 +1156,23 @@ bool should_numa_migrate_memory(struct task_struct *p, struct page * page,
|
|||
return true;
|
||||
|
||||
/*
|
||||
* Do not migrate if the destination is not a node that
|
||||
* is actively used by this numa group.
|
||||
* Destination node is much more heavily used than the source
|
||||
* node? Allow migration.
|
||||
*/
|
||||
if (!node_isset(dst_nid, ng->active_nodes))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* Source is a node that is not actively used by this
|
||||
* numa group, while the destination is. Migrate.
|
||||
*/
|
||||
if (!node_isset(src_nid, ng->active_nodes))
|
||||
if (group_faults_cpu(ng, dst_nid) > group_faults_cpu(ng, src_nid) *
|
||||
ACTIVE_NODE_FRACTION)
|
||||
return true;
|
||||
|
||||
/*
|
||||
* Both source and destination are nodes in active
|
||||
* use by this numa group. Maximize memory bandwidth
|
||||
* by migrating from more heavily used groups, to less
|
||||
* heavily used ones, spreading the load around.
|
||||
* Use a 1/4 hysteresis to avoid spurious page movement.
|
||||
* Distribute memory according to CPU & memory use on each node,
|
||||
* with 3/4 hysteresis to avoid unnecessary memory migrations:
|
||||
*
|
||||
* faults_cpu(dst) 3 faults_cpu(src)
|
||||
* --------------- * - > ---------------
|
||||
* faults_mem(dst) 4 faults_mem(src)
|
||||
*/
|
||||
return group_faults(p, dst_nid) < (group_faults(p, src_nid) * 3 / 4);
|
||||
return group_faults_cpu(ng, dst_nid) * group_faults(p, src_nid) * 3 >
|
||||
group_faults_cpu(ng, src_nid) * group_faults(p, dst_nid) * 4;
|
||||
}
|
||||
|
||||
static unsigned long weighted_cpuload(const int cpu);
|
||||
|
@ -1484,7 +1518,7 @@ static int task_numa_migrate(struct task_struct *p)
|
|||
|
||||
.best_task = NULL,
|
||||
.best_imp = 0,
|
||||
.best_cpu = -1
|
||||
.best_cpu = -1,
|
||||
};
|
||||
struct sched_domain *sd;
|
||||
unsigned long taskweight, groupweight;
|
||||
|
@ -1536,8 +1570,7 @@ static int task_numa_migrate(struct task_struct *p)
|
|||
* multiple NUMA nodes; in order to better consolidate the group,
|
||||
* we need to check other locations.
|
||||
*/
|
||||
if (env.best_cpu == -1 || (p->numa_group &&
|
||||
nodes_weight(p->numa_group->active_nodes) > 1)) {
|
||||
if (env.best_cpu == -1 || (p->numa_group && p->numa_group->active_nodes > 1)) {
|
||||
for_each_online_node(nid) {
|
||||
if (nid == env.src_nid || nid == p->numa_preferred_nid)
|
||||
continue;
|
||||
|
@ -1572,12 +1605,14 @@ static int task_numa_migrate(struct task_struct *p)
|
|||
* trying for a better one later. Do not set the preferred node here.
|
||||
*/
|
||||
if (p->numa_group) {
|
||||
struct numa_group *ng = p->numa_group;
|
||||
|
||||
if (env.best_cpu == -1)
|
||||
nid = env.src_nid;
|
||||
else
|
||||
nid = env.dst_nid;
|
||||
|
||||
if (node_isset(nid, p->numa_group->active_nodes))
|
||||
if (ng->active_nodes > 1 && numa_is_active_node(env.dst_nid, ng))
|
||||
sched_setnuma(p, env.dst_nid);
|
||||
}
|
||||
|
||||
|
@ -1627,20 +1662,15 @@ static void numa_migrate_preferred(struct task_struct *p)
|
|||
}
|
||||
|
||||
/*
|
||||
* Find the nodes on which the workload is actively running. We do this by
|
||||
* Find out how many nodes on the workload is actively running on. Do this by
|
||||
* tracking the nodes from which NUMA hinting faults are triggered. This can
|
||||
* be different from the set of nodes where the workload's memory is currently
|
||||
* located.
|
||||
*
|
||||
* The bitmask is used to make smarter decisions on when to do NUMA page
|
||||
* migrations, To prevent flip-flopping, and excessive page migrations, nodes
|
||||
* are added when they cause over 6/16 of the maximum number of faults, but
|
||||
* only removed when they drop below 3/16.
|
||||
*/
|
||||
static void update_numa_active_node_mask(struct numa_group *numa_group)
|
||||
static void numa_group_count_active_nodes(struct numa_group *numa_group)
|
||||
{
|
||||
unsigned long faults, max_faults = 0;
|
||||
int nid;
|
||||
int nid, active_nodes = 0;
|
||||
|
||||
for_each_online_node(nid) {
|
||||
faults = group_faults_cpu(numa_group, nid);
|
||||
|
@ -1650,12 +1680,12 @@ static void update_numa_active_node_mask(struct numa_group *numa_group)
|
|||
|
||||
for_each_online_node(nid) {
|
||||
faults = group_faults_cpu(numa_group, nid);
|
||||
if (!node_isset(nid, numa_group->active_nodes)) {
|
||||
if (faults > max_faults * 6 / 16)
|
||||
node_set(nid, numa_group->active_nodes);
|
||||
} else if (faults < max_faults * 3 / 16)
|
||||
node_clear(nid, numa_group->active_nodes);
|
||||
if (faults * ACTIVE_NODE_FRACTION > max_faults)
|
||||
active_nodes++;
|
||||
}
|
||||
|
||||
numa_group->max_faults_cpu = max_faults;
|
||||
numa_group->active_nodes = active_nodes;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1946,7 +1976,7 @@ static void task_numa_placement(struct task_struct *p)
|
|||
update_task_scan_period(p, fault_types[0], fault_types[1]);
|
||||
|
||||
if (p->numa_group) {
|
||||
update_numa_active_node_mask(p->numa_group);
|
||||
numa_group_count_active_nodes(p->numa_group);
|
||||
spin_unlock_irq(group_lock);
|
||||
max_nid = preferred_group_nid(p, max_group_nid);
|
||||
}
|
||||
|
@ -1990,14 +2020,14 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags,
|
|||
return;
|
||||
|
||||
atomic_set(&grp->refcount, 1);
|
||||
grp->active_nodes = 1;
|
||||
grp->max_faults_cpu = 0;
|
||||
spin_lock_init(&grp->lock);
|
||||
grp->gid = p->pid;
|
||||
/* Second half of the array tracks nids where faults happen */
|
||||
grp->faults_cpu = grp->faults + NR_NUMA_HINT_FAULT_TYPES *
|
||||
nr_node_ids;
|
||||
|
||||
node_set(task_node(current), grp->active_nodes);
|
||||
|
||||
for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++)
|
||||
grp->faults[i] = p->numa_faults[i];
|
||||
|
||||
|
@ -2111,6 +2141,7 @@ void task_numa_fault(int last_cpupid, int mem_node, int pages, int flags)
|
|||
bool migrated = flags & TNF_MIGRATED;
|
||||
int cpu_node = task_node(current);
|
||||
int local = !!(flags & TNF_FAULT_LOCAL);
|
||||
struct numa_group *ng;
|
||||
int priv;
|
||||
|
||||
if (!static_branch_likely(&sched_numa_balancing))
|
||||
|
@ -2151,9 +2182,10 @@ void task_numa_fault(int last_cpupid, int mem_node, int pages, int flags)
|
|||
* actively using should be counted as local. This allows the
|
||||
* scan rate to slow down when a workload has settled down.
|
||||
*/
|
||||
if (!priv && !local && p->numa_group &&
|
||||
node_isset(cpu_node, p->numa_group->active_nodes) &&
|
||||
node_isset(mem_node, p->numa_group->active_nodes))
|
||||
ng = p->numa_group;
|
||||
if (!priv && !local && ng && ng->active_nodes > 1 &&
|
||||
numa_is_active_node(cpu_node, ng) &&
|
||||
numa_is_active_node(mem_node, ng))
|
||||
local = 1;
|
||||
|
||||
task_numa_placement(p);
|
||||
|
@ -3102,6 +3134,26 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
|
|||
|
||||
static void check_enqueue_throttle(struct cfs_rq *cfs_rq);
|
||||
|
||||
static inline void check_schedstat_required(void)
|
||||
{
|
||||
#ifdef CONFIG_SCHEDSTATS
|
||||
if (schedstat_enabled())
|
||||
return;
|
||||
|
||||
/* Force schedstat enabled if a dependent tracepoint is active */
|
||||
if (trace_sched_stat_wait_enabled() ||
|
||||
trace_sched_stat_sleep_enabled() ||
|
||||
trace_sched_stat_iowait_enabled() ||
|
||||
trace_sched_stat_blocked_enabled() ||
|
||||
trace_sched_stat_runtime_enabled()) {
|
||||
pr_warn_once("Scheduler tracepoints stat_sleep, stat_iowait, "
|
||||
"stat_blocked and stat_runtime require the "
|
||||
"kernel parameter schedstats=enabled or "
|
||||
"kernel.sched_schedstats=1\n");
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static void
|
||||
enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
||||
{
|
||||
|
@ -3122,11 +3174,15 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
|||
|
||||
if (flags & ENQUEUE_WAKEUP) {
|
||||
place_entity(cfs_rq, se, 0);
|
||||
enqueue_sleeper(cfs_rq, se);
|
||||
if (schedstat_enabled())
|
||||
enqueue_sleeper(cfs_rq, se);
|
||||
}
|
||||
|
||||
update_stats_enqueue(cfs_rq, se);
|
||||
check_spread(cfs_rq, se);
|
||||
check_schedstat_required();
|
||||
if (schedstat_enabled()) {
|
||||
update_stats_enqueue(cfs_rq, se);
|
||||
check_spread(cfs_rq, se);
|
||||
}
|
||||
if (se != cfs_rq->curr)
|
||||
__enqueue_entity(cfs_rq, se);
|
||||
se->on_rq = 1;
|
||||
|
@ -3193,19 +3249,8 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
|||
update_curr(cfs_rq);
|
||||
dequeue_entity_load_avg(cfs_rq, se);
|
||||
|
||||
update_stats_dequeue(cfs_rq, se);
|
||||
if (flags & DEQUEUE_SLEEP) {
|
||||
#ifdef CONFIG_SCHEDSTATS
|
||||
if (entity_is_task(se)) {
|
||||
struct task_struct *tsk = task_of(se);
|
||||
|
||||
if (tsk->state & TASK_INTERRUPTIBLE)
|
||||
se->statistics.sleep_start = rq_clock(rq_of(cfs_rq));
|
||||
if (tsk->state & TASK_UNINTERRUPTIBLE)
|
||||
se->statistics.block_start = rq_clock(rq_of(cfs_rq));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
if (schedstat_enabled())
|
||||
update_stats_dequeue(cfs_rq, se, flags);
|
||||
|
||||
clear_buddies(cfs_rq, se);
|
||||
|
||||
|
@ -3279,7 +3324,8 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
|||
* a CPU. So account for the time it spent waiting on the
|
||||
* runqueue.
|
||||
*/
|
||||
update_stats_wait_end(cfs_rq, se);
|
||||
if (schedstat_enabled())
|
||||
update_stats_wait_end(cfs_rq, se);
|
||||
__dequeue_entity(cfs_rq, se);
|
||||
update_load_avg(se, 1);
|
||||
}
|
||||
|
@ -3292,7 +3338,7 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
|||
* least twice that of our own weight (i.e. dont track it
|
||||
* when there are only lesser-weight tasks around):
|
||||
*/
|
||||
if (rq_of(cfs_rq)->load.weight >= 2*se->load.weight) {
|
||||
if (schedstat_enabled() && rq_of(cfs_rq)->load.weight >= 2*se->load.weight) {
|
||||
se->statistics.slice_max = max(se->statistics.slice_max,
|
||||
se->sum_exec_runtime - se->prev_sum_exec_runtime);
|
||||
}
|
||||
|
@ -3375,9 +3421,13 @@ static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev)
|
|||
/* throttle cfs_rqs exceeding runtime */
|
||||
check_cfs_rq_runtime(cfs_rq);
|
||||
|
||||
check_spread(cfs_rq, prev);
|
||||
if (schedstat_enabled()) {
|
||||
check_spread(cfs_rq, prev);
|
||||
if (prev->on_rq)
|
||||
update_stats_wait_start(cfs_rq, prev);
|
||||
}
|
||||
|
||||
if (prev->on_rq) {
|
||||
update_stats_wait_start(cfs_rq, prev);
|
||||
/* Put 'current' back into the tree. */
|
||||
__enqueue_entity(cfs_rq, prev);
|
||||
/* in !on_rq case, update occurred at dequeue */
|
||||
|
@ -4459,9 +4509,17 @@ static void __update_cpu_load(struct rq *this_rq, unsigned long this_load,
|
|||
|
||||
/* scale is effectively 1 << i now, and >> i divides by scale */
|
||||
|
||||
old_load = this_rq->cpu_load[i] - tickless_load;
|
||||
old_load = this_rq->cpu_load[i];
|
||||
old_load = decay_load_missed(old_load, pending_updates - 1, i);
|
||||
old_load += tickless_load;
|
||||
if (tickless_load) {
|
||||
old_load -= decay_load_missed(tickless_load, pending_updates - 1, i);
|
||||
/*
|
||||
* old_load can never be a negative value because a
|
||||
* decayed tickless_load cannot be greater than the
|
||||
* original tickless_load.
|
||||
*/
|
||||
old_load += tickless_load;
|
||||
}
|
||||
new_load = this_load;
|
||||
/*
|
||||
* Round up the averaging division if load is increasing. This
|
||||
|
@ -4484,6 +4542,25 @@ static unsigned long weighted_cpuload(const int cpu)
|
|||
}
|
||||
|
||||
#ifdef CONFIG_NO_HZ_COMMON
|
||||
static void __update_cpu_load_nohz(struct rq *this_rq,
|
||||
unsigned long curr_jiffies,
|
||||
unsigned long load,
|
||||
int active)
|
||||
{
|
||||
unsigned long pending_updates;
|
||||
|
||||
pending_updates = curr_jiffies - this_rq->last_load_update_tick;
|
||||
if (pending_updates) {
|
||||
this_rq->last_load_update_tick = curr_jiffies;
|
||||
/*
|
||||
* In the regular NOHZ case, we were idle, this means load 0.
|
||||
* In the NOHZ_FULL case, we were non-idle, we should consider
|
||||
* its weighted load.
|
||||
*/
|
||||
__update_cpu_load(this_rq, load, pending_updates, active);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* There is no sane way to deal with nohz on smp when using jiffies because the
|
||||
* cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading
|
||||
|
@ -4501,22 +4578,15 @@ static unsigned long weighted_cpuload(const int cpu)
|
|||
* Called from nohz_idle_balance() to update the load ratings before doing the
|
||||
* idle balance.
|
||||
*/
|
||||
static void update_idle_cpu_load(struct rq *this_rq)
|
||||
static void update_cpu_load_idle(struct rq *this_rq)
|
||||
{
|
||||
unsigned long curr_jiffies = READ_ONCE(jiffies);
|
||||
unsigned long load = weighted_cpuload(cpu_of(this_rq));
|
||||
unsigned long pending_updates;
|
||||
|
||||
/*
|
||||
* bail if there's load or we're actually up-to-date.
|
||||
*/
|
||||
if (load || curr_jiffies == this_rq->last_load_update_tick)
|
||||
if (weighted_cpuload(cpu_of(this_rq)))
|
||||
return;
|
||||
|
||||
pending_updates = curr_jiffies - this_rq->last_load_update_tick;
|
||||
this_rq->last_load_update_tick = curr_jiffies;
|
||||
|
||||
__update_cpu_load(this_rq, load, pending_updates, 0);
|
||||
__update_cpu_load_nohz(this_rq, READ_ONCE(jiffies), 0, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -4527,22 +4597,12 @@ void update_cpu_load_nohz(int active)
|
|||
struct rq *this_rq = this_rq();
|
||||
unsigned long curr_jiffies = READ_ONCE(jiffies);
|
||||
unsigned long load = active ? weighted_cpuload(cpu_of(this_rq)) : 0;
|
||||
unsigned long pending_updates;
|
||||
|
||||
if (curr_jiffies == this_rq->last_load_update_tick)
|
||||
return;
|
||||
|
||||
raw_spin_lock(&this_rq->lock);
|
||||
pending_updates = curr_jiffies - this_rq->last_load_update_tick;
|
||||
if (pending_updates) {
|
||||
this_rq->last_load_update_tick = curr_jiffies;
|
||||
/*
|
||||
* In the regular NOHZ case, we were idle, this means load 0.
|
||||
* In the NOHZ_FULL case, we were non-idle, we should consider
|
||||
* its weighted load.
|
||||
*/
|
||||
__update_cpu_load(this_rq, load, pending_updates, active);
|
||||
}
|
||||
__update_cpu_load_nohz(this_rq, curr_jiffies, load, active);
|
||||
raw_spin_unlock(&this_rq->lock);
|
||||
}
|
||||
#endif /* CONFIG_NO_HZ */
|
||||
|
@ -4554,7 +4614,7 @@ void update_cpu_load_active(struct rq *this_rq)
|
|||
{
|
||||
unsigned long load = weighted_cpuload(cpu_of(this_rq));
|
||||
/*
|
||||
* See the mess around update_idle_cpu_load() / update_cpu_load_nohz().
|
||||
* See the mess around update_cpu_load_idle() / update_cpu_load_nohz().
|
||||
*/
|
||||
this_rq->last_load_update_tick = jiffies;
|
||||
__update_cpu_load(this_rq, load, 1, 1);
|
||||
|
@ -7848,7 +7908,7 @@ static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle)
|
|||
if (time_after_eq(jiffies, rq->next_balance)) {
|
||||
raw_spin_lock_irq(&rq->lock);
|
||||
update_rq_clock(rq);
|
||||
update_idle_cpu_load(rq);
|
||||
update_cpu_load_idle(rq);
|
||||
raw_spin_unlock_irq(&rq->lock);
|
||||
rebalance_domains(rq, CPU_IDLE);
|
||||
}
|
||||
|
@ -8234,11 +8294,8 @@ void free_fair_sched_group(struct task_group *tg)
|
|||
for_each_possible_cpu(i) {
|
||||
if (tg->cfs_rq)
|
||||
kfree(tg->cfs_rq[i]);
|
||||
if (tg->se) {
|
||||
if (tg->se[i])
|
||||
remove_entity_load_avg(tg->se[i]);
|
||||
if (tg->se)
|
||||
kfree(tg->se[i]);
|
||||
}
|
||||
}
|
||||
|
||||
kfree(tg->cfs_rq);
|
||||
|
@ -8286,21 +8343,29 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
|
|||
return 0;
|
||||
}
|
||||
|
||||
void unregister_fair_sched_group(struct task_group *tg, int cpu)
|
||||
void unregister_fair_sched_group(struct task_group *tg)
|
||||
{
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
unsigned long flags;
|
||||
struct rq *rq;
|
||||
int cpu;
|
||||
|
||||
/*
|
||||
* Only empty task groups can be destroyed; so we can speculatively
|
||||
* check on_list without danger of it being re-added.
|
||||
*/
|
||||
if (!tg->cfs_rq[cpu]->on_list)
|
||||
return;
|
||||
for_each_possible_cpu(cpu) {
|
||||
if (tg->se[cpu])
|
||||
remove_entity_load_avg(tg->se[cpu]);
|
||||
|
||||
raw_spin_lock_irqsave(&rq->lock, flags);
|
||||
list_del_leaf_cfs_rq(tg->cfs_rq[cpu]);
|
||||
raw_spin_unlock_irqrestore(&rq->lock, flags);
|
||||
/*
|
||||
* Only empty task groups can be destroyed; so we can speculatively
|
||||
* check on_list without danger of it being re-added.
|
||||
*/
|
||||
if (!tg->cfs_rq[cpu]->on_list)
|
||||
continue;
|
||||
|
||||
rq = cpu_rq(cpu);
|
||||
|
||||
raw_spin_lock_irqsave(&rq->lock, flags);
|
||||
list_del_leaf_cfs_rq(tg->cfs_rq[cpu]);
|
||||
raw_spin_unlock_irqrestore(&rq->lock, flags);
|
||||
}
|
||||
}
|
||||
|
||||
void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
|
||||
|
@ -8382,7 +8447,7 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
|
|||
return 1;
|
||||
}
|
||||
|
||||
void unregister_fair_sched_group(struct task_group *tg, int cpu) { }
|
||||
void unregister_fair_sched_group(struct task_group *tg) { }
|
||||
|
||||
#endif /* CONFIG_FAIR_GROUP_SCHED */
|
||||
|
||||
|
|
|
@ -58,7 +58,15 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
|
|||
raw_spin_lock(&rt_b->rt_runtime_lock);
|
||||
if (!rt_b->rt_period_active) {
|
||||
rt_b->rt_period_active = 1;
|
||||
hrtimer_forward_now(&rt_b->rt_period_timer, rt_b->rt_period);
|
||||
/*
|
||||
* SCHED_DEADLINE updates the bandwidth, as a run away
|
||||
* RT task with a DL task could hog a CPU. But DL does
|
||||
* not reset the period. If a deadline task was running
|
||||
* without an RT task running, it can cause RT tasks to
|
||||
* throttle when they start up. Kick the timer right away
|
||||
* to update the period.
|
||||
*/
|
||||
hrtimer_forward_now(&rt_b->rt_period_timer, ns_to_ktime(0));
|
||||
hrtimer_start_expires(&rt_b->rt_period_timer, HRTIMER_MODE_ABS_PINNED);
|
||||
}
|
||||
raw_spin_unlock(&rt_b->rt_runtime_lock);
|
||||
|
@ -436,7 +444,7 @@ static void dequeue_top_rt_rq(struct rt_rq *rt_rq);
|
|||
|
||||
static inline int on_rt_rq(struct sched_rt_entity *rt_se)
|
||||
{
|
||||
return !list_empty(&rt_se->run_list);
|
||||
return rt_se->on_rq;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_RT_GROUP_SCHED
|
||||
|
@ -482,8 +490,8 @@ static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
|
|||
return rt_se->my_q;
|
||||
}
|
||||
|
||||
static void enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head);
|
||||
static void dequeue_rt_entity(struct sched_rt_entity *rt_se);
|
||||
static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags);
|
||||
static void dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags);
|
||||
|
||||
static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
|
||||
{
|
||||
|
@ -499,7 +507,7 @@ static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
|
|||
if (!rt_se)
|
||||
enqueue_top_rt_rq(rt_rq);
|
||||
else if (!on_rt_rq(rt_se))
|
||||
enqueue_rt_entity(rt_se, false);
|
||||
enqueue_rt_entity(rt_se, 0);
|
||||
|
||||
if (rt_rq->highest_prio.curr < curr->prio)
|
||||
resched_curr(rq);
|
||||
|
@ -516,7 +524,7 @@ static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
|
|||
if (!rt_se)
|
||||
dequeue_top_rt_rq(rt_rq);
|
||||
else if (on_rt_rq(rt_se))
|
||||
dequeue_rt_entity(rt_se);
|
||||
dequeue_rt_entity(rt_se, 0);
|
||||
}
|
||||
|
||||
static inline int rt_rq_throttled(struct rt_rq *rt_rq)
|
||||
|
@ -1166,7 +1174,30 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
|
|||
dec_rt_group(rt_se, rt_rq);
|
||||
}
|
||||
|
||||
static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head)
|
||||
/*
|
||||
* Change rt_se->run_list location unless SAVE && !MOVE
|
||||
*
|
||||
* assumes ENQUEUE/DEQUEUE flags match
|
||||
*/
|
||||
static inline bool move_entity(unsigned int flags)
|
||||
{
|
||||
if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) == DEQUEUE_SAVE)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void __delist_rt_entity(struct sched_rt_entity *rt_se, struct rt_prio_array *array)
|
||||
{
|
||||
list_del_init(&rt_se->run_list);
|
||||
|
||||
if (list_empty(array->queue + rt_se_prio(rt_se)))
|
||||
__clear_bit(rt_se_prio(rt_se), array->bitmap);
|
||||
|
||||
rt_se->on_list = 0;
|
||||
}
|
||||
|
||||
static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
|
||||
{
|
||||
struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
|
||||
struct rt_prio_array *array = &rt_rq->active;
|
||||
|
@ -1179,26 +1210,37 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head)
|
|||
* get throttled and the current group doesn't have any other
|
||||
* active members.
|
||||
*/
|
||||
if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running))
|
||||
if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) {
|
||||
if (rt_se->on_list)
|
||||
__delist_rt_entity(rt_se, array);
|
||||
return;
|
||||
}
|
||||
|
||||
if (head)
|
||||
list_add(&rt_se->run_list, queue);
|
||||
else
|
||||
list_add_tail(&rt_se->run_list, queue);
|
||||
__set_bit(rt_se_prio(rt_se), array->bitmap);
|
||||
if (move_entity(flags)) {
|
||||
WARN_ON_ONCE(rt_se->on_list);
|
||||
if (flags & ENQUEUE_HEAD)
|
||||
list_add(&rt_se->run_list, queue);
|
||||
else
|
||||
list_add_tail(&rt_se->run_list, queue);
|
||||
|
||||
__set_bit(rt_se_prio(rt_se), array->bitmap);
|
||||
rt_se->on_list = 1;
|
||||
}
|
||||
rt_se->on_rq = 1;
|
||||
|
||||
inc_rt_tasks(rt_se, rt_rq);
|
||||
}
|
||||
|
||||
static void __dequeue_rt_entity(struct sched_rt_entity *rt_se)
|
||||
static void __dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
|
||||
{
|
||||
struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
|
||||
struct rt_prio_array *array = &rt_rq->active;
|
||||
|
||||
list_del_init(&rt_se->run_list);
|
||||
if (list_empty(array->queue + rt_se_prio(rt_se)))
|
||||
__clear_bit(rt_se_prio(rt_se), array->bitmap);
|
||||
if (move_entity(flags)) {
|
||||
WARN_ON_ONCE(!rt_se->on_list);
|
||||
__delist_rt_entity(rt_se, array);
|
||||
}
|
||||
rt_se->on_rq = 0;
|
||||
|
||||
dec_rt_tasks(rt_se, rt_rq);
|
||||
}
|
||||
|
@ -1207,7 +1249,7 @@ static void __dequeue_rt_entity(struct sched_rt_entity *rt_se)
|
|||
* Because the prio of an upper entry depends on the lower
|
||||
* entries, we must remove entries top - down.
|
||||
*/
|
||||
static void dequeue_rt_stack(struct sched_rt_entity *rt_se)
|
||||
static void dequeue_rt_stack(struct sched_rt_entity *rt_se, unsigned int flags)
|
||||
{
|
||||
struct sched_rt_entity *back = NULL;
|
||||
|
||||
|
@ -1220,31 +1262,31 @@ static void dequeue_rt_stack(struct sched_rt_entity *rt_se)
|
|||
|
||||
for (rt_se = back; rt_se; rt_se = rt_se->back) {
|
||||
if (on_rt_rq(rt_se))
|
||||
__dequeue_rt_entity(rt_se);
|
||||
__dequeue_rt_entity(rt_se, flags);
|
||||
}
|
||||
}
|
||||
|
||||
static void enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head)
|
||||
static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
|
||||
{
|
||||
struct rq *rq = rq_of_rt_se(rt_se);
|
||||
|
||||
dequeue_rt_stack(rt_se);
|
||||
dequeue_rt_stack(rt_se, flags);
|
||||
for_each_sched_rt_entity(rt_se)
|
||||
__enqueue_rt_entity(rt_se, head);
|
||||
__enqueue_rt_entity(rt_se, flags);
|
||||
enqueue_top_rt_rq(&rq->rt);
|
||||
}
|
||||
|
||||
static void dequeue_rt_entity(struct sched_rt_entity *rt_se)
|
||||
static void dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
|
||||
{
|
||||
struct rq *rq = rq_of_rt_se(rt_se);
|
||||
|
||||
dequeue_rt_stack(rt_se);
|
||||
dequeue_rt_stack(rt_se, flags);
|
||||
|
||||
for_each_sched_rt_entity(rt_se) {
|
||||
struct rt_rq *rt_rq = group_rt_rq(rt_se);
|
||||
|
||||
if (rt_rq && rt_rq->rt_nr_running)
|
||||
__enqueue_rt_entity(rt_se, false);
|
||||
__enqueue_rt_entity(rt_se, flags);
|
||||
}
|
||||
enqueue_top_rt_rq(&rq->rt);
|
||||
}
|
||||
|
@ -1260,7 +1302,7 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
|
|||
if (flags & ENQUEUE_WAKEUP)
|
||||
rt_se->timeout = 0;
|
||||
|
||||
enqueue_rt_entity(rt_se, flags & ENQUEUE_HEAD);
|
||||
enqueue_rt_entity(rt_se, flags);
|
||||
|
||||
if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
|
||||
enqueue_pushable_task(rq, p);
|
||||
|
@ -1271,7 +1313,7 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
|
|||
struct sched_rt_entity *rt_se = &p->rt;
|
||||
|
||||
update_curr_rt(rq);
|
||||
dequeue_rt_entity(rt_se);
|
||||
dequeue_rt_entity(rt_se, flags);
|
||||
|
||||
dequeue_pushable_task(rq, p);
|
||||
}
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
#include <linux/sched/sysctl.h>
|
||||
#include <linux/sched/rt.h>
|
||||
#include <linux/sched/deadline.h>
|
||||
#include <linux/binfmts.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/stop_machine.h>
|
||||
|
@ -313,12 +314,11 @@ extern int tg_nop(struct task_group *tg, void *data);
|
|||
|
||||
extern void free_fair_sched_group(struct task_group *tg);
|
||||
extern int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent);
|
||||
extern void unregister_fair_sched_group(struct task_group *tg, int cpu);
|
||||
extern void unregister_fair_sched_group(struct task_group *tg);
|
||||
extern void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
|
||||
struct sched_entity *se, int cpu,
|
||||
struct sched_entity *parent);
|
||||
extern void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
|
||||
extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
|
||||
|
||||
extern void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b);
|
||||
extern void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
|
||||
|
@ -909,6 +909,18 @@ static inline unsigned int group_first_cpu(struct sched_group *group)
|
|||
|
||||
extern int group_balance_cpu(struct sched_group *sg);
|
||||
|
||||
#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
|
||||
void register_sched_domain_sysctl(void);
|
||||
void unregister_sched_domain_sysctl(void);
|
||||
#else
|
||||
static inline void register_sched_domain_sysctl(void)
|
||||
{
|
||||
}
|
||||
static inline void unregister_sched_domain_sysctl(void)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
static inline void sched_ttwu_pending(void) { }
|
||||
|
@ -1022,6 +1034,7 @@ extern struct static_key sched_feat_keys[__SCHED_FEAT_NR];
|
|||
#endif /* SCHED_DEBUG && HAVE_JUMP_LABEL */
|
||||
|
||||
extern struct static_key_false sched_numa_balancing;
|
||||
extern struct static_key_false sched_schedstats;
|
||||
|
||||
static inline u64 global_rt_period(void)
|
||||
{
|
||||
|
@ -1130,18 +1143,40 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
|
|||
extern const int sched_prio_to_weight[40];
|
||||
extern const u32 sched_prio_to_wmult[40];
|
||||
|
||||
/*
|
||||
* {de,en}queue flags:
|
||||
*
|
||||
* DEQUEUE_SLEEP - task is no longer runnable
|
||||
* ENQUEUE_WAKEUP - task just became runnable
|
||||
*
|
||||
* SAVE/RESTORE - an otherwise spurious dequeue/enqueue, done to ensure tasks
|
||||
* are in a known state which allows modification. Such pairs
|
||||
* should preserve as much state as possible.
|
||||
*
|
||||
* MOVE - paired with SAVE/RESTORE, explicitly does not preserve the location
|
||||
* in the runqueue.
|
||||
*
|
||||
* ENQUEUE_HEAD - place at front of runqueue (tail if not specified)
|
||||
* ENQUEUE_REPLENISH - CBS (replenish runtime and postpone deadline)
|
||||
* ENQUEUE_WAKING - sched_class::task_waking was called
|
||||
*
|
||||
*/
|
||||
|
||||
#define DEQUEUE_SLEEP 0x01
|
||||
#define DEQUEUE_SAVE 0x02 /* matches ENQUEUE_RESTORE */
|
||||
#define DEQUEUE_MOVE 0x04 /* matches ENQUEUE_MOVE */
|
||||
|
||||
#define ENQUEUE_WAKEUP 0x01
|
||||
#define ENQUEUE_HEAD 0x02
|
||||
#define ENQUEUE_RESTORE 0x02
|
||||
#define ENQUEUE_MOVE 0x04
|
||||
|
||||
#define ENQUEUE_HEAD 0x08
|
||||
#define ENQUEUE_REPLENISH 0x10
|
||||
#ifdef CONFIG_SMP
|
||||
#define ENQUEUE_WAKING 0x04 /* sched_class::task_waking was called */
|
||||
#define ENQUEUE_WAKING 0x20
|
||||
#else
|
||||
#define ENQUEUE_WAKING 0x00
|
||||
#endif
|
||||
#define ENQUEUE_REPLENISH 0x08
|
||||
#define ENQUEUE_RESTORE 0x10
|
||||
|
||||
#define DEQUEUE_SLEEP 0x01
|
||||
#define DEQUEUE_SAVE 0x02
|
||||
|
||||
#define RETRY_TASK ((void *)-1UL)
|
||||
|
||||
|
|
|
@ -29,9 +29,10 @@ rq_sched_info_dequeued(struct rq *rq, unsigned long long delta)
|
|||
if (rq)
|
||||
rq->rq_sched_info.run_delay += delta;
|
||||
}
|
||||
# define schedstat_inc(rq, field) do { (rq)->field++; } while (0)
|
||||
# define schedstat_add(rq, field, amt) do { (rq)->field += (amt); } while (0)
|
||||
# define schedstat_set(var, val) do { var = (val); } while (0)
|
||||
# define schedstat_enabled() static_branch_unlikely(&sched_schedstats)
|
||||
# define schedstat_inc(rq, field) do { if (schedstat_enabled()) { (rq)->field++; } } while (0)
|
||||
# define schedstat_add(rq, field, amt) do { if (schedstat_enabled()) { (rq)->field += (amt); } } while (0)
|
||||
# define schedstat_set(var, val) do { if (schedstat_enabled()) { var = (val); } } while (0)
|
||||
#else /* !CONFIG_SCHEDSTATS */
|
||||
static inline void
|
||||
rq_sched_info_arrive(struct rq *rq, unsigned long long delta)
|
||||
|
@ -42,6 +43,7 @@ rq_sched_info_dequeued(struct rq *rq, unsigned long long delta)
|
|||
static inline void
|
||||
rq_sched_info_depart(struct rq *rq, unsigned long long delta)
|
||||
{}
|
||||
# define schedstat_enabled() 0
|
||||
# define schedstat_inc(rq, field) do { } while (0)
|
||||
# define schedstat_add(rq, field, amt) do { } while (0)
|
||||
# define schedstat_set(var, val) do { } while (0)
|
||||
|
|
123
kernel/sched/swait.c
Normal file
123
kernel/sched/swait.c
Normal file
|
@ -0,0 +1,123 @@
|
|||
#include <linux/sched.h>
|
||||
#include <linux/swait.h>
|
||||
|
||||
void __init_swait_queue_head(struct swait_queue_head *q, const char *name,
|
||||
struct lock_class_key *key)
|
||||
{
|
||||
raw_spin_lock_init(&q->lock);
|
||||
lockdep_set_class_and_name(&q->lock, key, name);
|
||||
INIT_LIST_HEAD(&q->task_list);
|
||||
}
|
||||
EXPORT_SYMBOL(__init_swait_queue_head);
|
||||
|
||||
/*
|
||||
* The thing about the wake_up_state() return value; I think we can ignore it.
|
||||
*
|
||||
* If for some reason it would return 0, that means the previously waiting
|
||||
* task is already running, so it will observe condition true (or has already).
|
||||
*/
|
||||
void swake_up_locked(struct swait_queue_head *q)
|
||||
{
|
||||
struct swait_queue *curr;
|
||||
|
||||
if (list_empty(&q->task_list))
|
||||
return;
|
||||
|
||||
curr = list_first_entry(&q->task_list, typeof(*curr), task_list);
|
||||
wake_up_process(curr->task);
|
||||
list_del_init(&curr->task_list);
|
||||
}
|
||||
EXPORT_SYMBOL(swake_up_locked);
|
||||
|
||||
void swake_up(struct swait_queue_head *q)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
if (!swait_active(q))
|
||||
return;
|
||||
|
||||
raw_spin_lock_irqsave(&q->lock, flags);
|
||||
swake_up_locked(q);
|
||||
raw_spin_unlock_irqrestore(&q->lock, flags);
|
||||
}
|
||||
EXPORT_SYMBOL(swake_up);
|
||||
|
||||
/*
|
||||
* Does not allow usage from IRQ disabled, since we must be able to
|
||||
* release IRQs to guarantee bounded hold time.
|
||||
*/
|
||||
void swake_up_all(struct swait_queue_head *q)
|
||||
{
|
||||
struct swait_queue *curr;
|
||||
LIST_HEAD(tmp);
|
||||
|
||||
if (!swait_active(q))
|
||||
return;
|
||||
|
||||
raw_spin_lock_irq(&q->lock);
|
||||
list_splice_init(&q->task_list, &tmp);
|
||||
while (!list_empty(&tmp)) {
|
||||
curr = list_first_entry(&tmp, typeof(*curr), task_list);
|
||||
|
||||
wake_up_state(curr->task, TASK_NORMAL);
|
||||
list_del_init(&curr->task_list);
|
||||
|
||||
if (list_empty(&tmp))
|
||||
break;
|
||||
|
||||
raw_spin_unlock_irq(&q->lock);
|
||||
raw_spin_lock_irq(&q->lock);
|
||||
}
|
||||
raw_spin_unlock_irq(&q->lock);
|
||||
}
|
||||
EXPORT_SYMBOL(swake_up_all);
|
||||
|
||||
void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait)
|
||||
{
|
||||
wait->task = current;
|
||||
if (list_empty(&wait->task_list))
|
||||
list_add(&wait->task_list, &q->task_list);
|
||||
}
|
||||
|
||||
void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int state)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
raw_spin_lock_irqsave(&q->lock, flags);
|
||||
__prepare_to_swait(q, wait);
|
||||
set_current_state(state);
|
||||
raw_spin_unlock_irqrestore(&q->lock, flags);
|
||||
}
|
||||
EXPORT_SYMBOL(prepare_to_swait);
|
||||
|
||||
long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state)
|
||||
{
|
||||
if (signal_pending_state(state, current))
|
||||
return -ERESTARTSYS;
|
||||
|
||||
prepare_to_swait(q, wait, state);
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(prepare_to_swait_event);
|
||||
|
||||
void __finish_swait(struct swait_queue_head *q, struct swait_queue *wait)
|
||||
{
|
||||
__set_current_state(TASK_RUNNING);
|
||||
if (!list_empty(&wait->task_list))
|
||||
list_del_init(&wait->task_list);
|
||||
}
|
||||
|
||||
void finish_swait(struct swait_queue_head *q, struct swait_queue *wait)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
__set_current_state(TASK_RUNNING);
|
||||
|
||||
if (!list_empty_careful(&wait->task_list)) {
|
||||
raw_spin_lock_irqsave(&q->lock, flags);
|
||||
list_del_init(&wait->task_list);
|
||||
raw_spin_unlock_irqrestore(&q->lock, flags);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(finish_swait);
|
|
@ -116,9 +116,9 @@ void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
|
|||
|
||||
if (preempt_count() == cnt) {
|
||||
#ifdef CONFIG_DEBUG_PREEMPT
|
||||
current->preempt_disable_ip = get_parent_ip(CALLER_ADDR1);
|
||||
current->preempt_disable_ip = get_lock_parent_ip();
|
||||
#endif
|
||||
trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
|
||||
trace_preempt_off(CALLER_ADDR0, get_lock_parent_ip());
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(__local_bh_disable_ip);
|
||||
|
|
|
@ -350,6 +350,17 @@ static struct ctl_table kern_table[] = {
|
|||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec,
|
||||
},
|
||||
#ifdef CONFIG_SCHEDSTATS
|
||||
{
|
||||
.procname = "sched_schedstats",
|
||||
.data = NULL,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = sysctl_schedstats,
|
||||
.extra1 = &zero,
|
||||
.extra2 = &one,
|
||||
},
|
||||
#endif /* CONFIG_SCHEDSTATS */
|
||||
#endif /* CONFIG_SMP */
|
||||
#ifdef CONFIG_NUMA_BALANCING
|
||||
{
|
||||
|
@ -505,7 +516,7 @@ static struct ctl_table kern_table[] = {
|
|||
.data = &latencytop_enabled,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec,
|
||||
.proc_handler = sysctl_latencytop,
|
||||
},
|
||||
#endif
|
||||
#ifdef CONFIG_BLK_DEV_INITRD
|
||||
|
|
|
@ -93,9 +93,11 @@ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
|
|||
{
|
||||
struct mm_struct *mm;
|
||||
|
||||
/* convert pages-usec to Mbyte-usec */
|
||||
stats->coremem = p->acct_rss_mem1 * PAGE_SIZE / MB;
|
||||
stats->virtmem = p->acct_vm_mem1 * PAGE_SIZE / MB;
|
||||
/* convert pages-nsec/1024 to Mbyte-usec, see __acct_update_integrals */
|
||||
stats->coremem = p->acct_rss_mem1 * PAGE_SIZE;
|
||||
do_div(stats->coremem, 1000 * KB);
|
||||
stats->virtmem = p->acct_vm_mem1 * PAGE_SIZE;
|
||||
do_div(stats->virtmem, 1000 * KB);
|
||||
mm = get_task_mm(p);
|
||||
if (mm) {
|
||||
/* adjust to KB unit */
|
||||
|
@ -123,27 +125,28 @@ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
|
|||
static void __acct_update_integrals(struct task_struct *tsk,
|
||||
cputime_t utime, cputime_t stime)
|
||||
{
|
||||
if (likely(tsk->mm)) {
|
||||
cputime_t time, dtime;
|
||||
struct timeval value;
|
||||
unsigned long flags;
|
||||
u64 delta;
|
||||
cputime_t time, dtime;
|
||||
u64 delta;
|
||||
|
||||
local_irq_save(flags);
|
||||
time = stime + utime;
|
||||
dtime = time - tsk->acct_timexpd;
|
||||
jiffies_to_timeval(cputime_to_jiffies(dtime), &value);
|
||||
delta = value.tv_sec;
|
||||
delta = delta * USEC_PER_SEC + value.tv_usec;
|
||||
if (!likely(tsk->mm))
|
||||
return;
|
||||
|
||||
if (delta == 0)
|
||||
goto out;
|
||||
tsk->acct_timexpd = time;
|
||||
tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm);
|
||||
tsk->acct_vm_mem1 += delta * tsk->mm->total_vm;
|
||||
out:
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
time = stime + utime;
|
||||
dtime = time - tsk->acct_timexpd;
|
||||
/* Avoid division: cputime_t is often in nanoseconds already. */
|
||||
delta = cputime_to_nsecs(dtime);
|
||||
|
||||
if (delta < TICK_NSEC)
|
||||
return;
|
||||
|
||||
tsk->acct_timexpd = time;
|
||||
/*
|
||||
* Divide by 1024 to avoid overflow, and to avoid division.
|
||||
* The final unit reported to userspace is Mbyte-usecs,
|
||||
* the rest of the math is done in xacct_add_tsk.
|
||||
*/
|
||||
tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm) >> 10;
|
||||
tsk->acct_vm_mem1 += delta * tsk->mm->total_vm >> 10;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -153,9 +156,12 @@ static void __acct_update_integrals(struct task_struct *tsk,
|
|||
void acct_update_integrals(struct task_struct *tsk)
|
||||
{
|
||||
cputime_t utime, stime;
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
task_cputime(tsk, &utime, &stime);
|
||||
__acct_update_integrals(tsk, utime, stime);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -97,8 +97,8 @@ static void async_pf_execute(struct work_struct *work)
|
|||
* This memory barrier pairs with prepare_to_wait's set_current_state()
|
||||
*/
|
||||
smp_mb();
|
||||
if (waitqueue_active(&vcpu->wq))
|
||||
wake_up_interruptible(&vcpu->wq);
|
||||
if (swait_active(&vcpu->wq))
|
||||
swake_up(&vcpu->wq);
|
||||
|
||||
mmput(mm);
|
||||
kvm_put_kvm(vcpu->kvm);
|
||||
|
|
|
@ -216,8 +216,7 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
|
|||
vcpu->kvm = kvm;
|
||||
vcpu->vcpu_id = id;
|
||||
vcpu->pid = NULL;
|
||||
vcpu->halt_poll_ns = 0;
|
||||
init_waitqueue_head(&vcpu->wq);
|
||||
init_swait_queue_head(&vcpu->wq);
|
||||
kvm_async_pf_vcpu_init(vcpu);
|
||||
|
||||
vcpu->pre_pcpu = -1;
|
||||
|
@ -1993,7 +1992,7 @@ static int kvm_vcpu_check_block(struct kvm_vcpu *vcpu)
|
|||
void kvm_vcpu_block(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
ktime_t start, cur;
|
||||
DEFINE_WAIT(wait);
|
||||
DECLARE_SWAITQUEUE(wait);
|
||||
bool waited = false;
|
||||
u64 block_ns;
|
||||
|
||||
|
@ -2018,7 +2017,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
|
|||
kvm_arch_vcpu_blocking(vcpu);
|
||||
|
||||
for (;;) {
|
||||
prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
|
||||
prepare_to_swait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
|
||||
|
||||
if (kvm_vcpu_check_block(vcpu) < 0)
|
||||
break;
|
||||
|
@ -2027,7 +2026,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
|
|||
schedule();
|
||||
}
|
||||
|
||||
finish_wait(&vcpu->wq, &wait);
|
||||
finish_swait(&vcpu->wq, &wait);
|
||||
cur = ktime_get();
|
||||
|
||||
kvm_arch_vcpu_unblocking(vcpu);
|
||||
|
@ -2059,11 +2058,11 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
|
|||
{
|
||||
int me;
|
||||
int cpu = vcpu->cpu;
|
||||
wait_queue_head_t *wqp;
|
||||
struct swait_queue_head *wqp;
|
||||
|
||||
wqp = kvm_arch_vcpu_wq(vcpu);
|
||||
if (waitqueue_active(wqp)) {
|
||||
wake_up_interruptible(wqp);
|
||||
if (swait_active(wqp)) {
|
||||
swake_up(wqp);
|
||||
++vcpu->stat.halt_wakeup;
|
||||
}
|
||||
|
||||
|
@ -2164,7 +2163,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
|
|||
continue;
|
||||
if (vcpu == me)
|
||||
continue;
|
||||
if (waitqueue_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu))
|
||||
if (swait_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu))
|
||||
continue;
|
||||
if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
|
||||
continue;
|
||||
|
|
Loading…
Reference in New Issue
Block a user