arm64: entry: fix non-NMI user<->kernel transitions

When built with PROVE_LOCKING, NO_HZ_FULL, and CONTEXT_TRACKING_FORCE
will WARN() at boot time that interrupts are enabled when we call
context_tracking_user_enter(), despite the DAIF flags indicating that
IRQs are masked.

The problem is that we're not tracking IRQ flag changes accurately, and
so lockdep believes interrupts are enabled when they are not (and
vice-versa). We can shuffle things so to make this more accurate. For
kernel->user transitions there are a number of constraints we need to
consider:

1) When we call __context_tracking_user_enter() HW IRQs must be disabled
   and lockdep must be up-to-date with this.

2) Userspace should be treated as having IRQs enabled from the PoV of
   both lockdep and tracing.

3) As context_tracking_user_enter() stops RCU from watching, we cannot
   use RCU after calling it.

4) IRQ flag tracing and lockdep have state that must be manipulated
   before RCU is disabled.

... with similar constraints applying for user->kernel transitions, with
the ordering reversed.

The generic entry code has enter_from_user_mode() and
exit_to_user_mode() helpers to handle this. We can't use those directly,
so we add arm64 copies for now (without the instrumentation markers
which aren't used on arm64). These replace the existing user_exit() and
user_exit_irqoff() calls spread throughout handlers, and the exception
unmasking is left as-is.

Note that:

* The accounting for debug exceptions from userspace now happens in
  el0_dbg() and ret_to_user(), so this is removed from
  debug_exception_enter() and debug_exception_exit(). As
  user_exit_irqoff() wakes RCU, the userspace-specific check is removed.

* The accounting for syscalls now happens in el0_svc(),
  el0_svc_compat(), and ret_to_user(), so this is removed from
  el0_svc_common(). This does not adversely affect the workaround for
  erratum 1463225, as this does not depend on any of the state tracking.

* In ret_to_user() we mask interrupts with local_daif_mask(), and so we
  need to inform lockdep and tracing. Here a trace_hardirqs_off() is
  sufficient and safe as we have not yet exited kernel context and RCU
  is usable.

* As PROVE_LOCKING selects TRACE_IRQFLAGS, the ifdeferry in entry.S only
  needs to check for the latter.

* EL0 SError handling will be dealt with in a subsequent patch, as this
  needs to be treated as an NMI.

Prior to this patch, booting an appropriately-configured kernel would
result in spats as below:

| DEBUG_LOCKS_WARN_ON(lockdep_hardirqs_enabled())
| WARNING: CPU: 2 PID: 1 at kernel/locking/lockdep.c:5280 check_flags.part.54+0x1dc/0x1f0
| Modules linked in:
| CPU: 2 PID: 1 Comm: init Not tainted 5.10.0-rc3 #3
| Hardware name: linux,dummy-virt (DT)
| pstate: 804003c5 (Nzcv DAIF +PAN -UAO -TCO BTYPE=--)
| pc : check_flags.part.54+0x1dc/0x1f0
| lr : check_flags.part.54+0x1dc/0x1f0
| sp : ffff80001003bd80
| x29: ffff80001003bd80 x28: ffff66ce801e0000
| x27: 00000000ffffffff x26: 00000000000003c0
| x25: 0000000000000000 x24: ffffc31842527258
| x23: ffffc31842491368 x22: ffffc3184282d000
| x21: 0000000000000000 x20: 0000000000000001
| x19: ffffc318432ce000 x18: 0080000000000000
| x17: 0000000000000000 x16: ffffc31840f18a78
| x15: 0000000000000001 x14: ffffc3184285c810
| x13: 0000000000000001 x12: 0000000000000000
| x11: ffffc318415857a0 x10: ffffc318406614c0
| x9 : ffffc318415857a0 x8 : ffffc31841f1d000
| x7 : 647261685f706564 x6 : ffffc3183ff7c66c
| x5 : ffff66ce801e0000 x4 : 0000000000000000
| x3 : ffffc3183fe00000 x2 : ffffc31841500000
| x1 : e956dc24146b3500 x0 : 0000000000000000
| Call trace:
|  check_flags.part.54+0x1dc/0x1f0
|  lock_is_held_type+0x10c/0x188
|  rcu_read_lock_sched_held+0x70/0x98
|  __context_tracking_enter+0x310/0x350
|  context_tracking_enter.part.3+0x5c/0xc8
|  context_tracking_user_enter+0x6c/0x80
|  finish_ret_to_user+0x2c/0x13cr

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20201130115950.22492-8-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
This commit is contained in:
Mark Rutland 2020-11-30 11:59:46 +00:00 committed by Will Deacon
parent 105fc33520
commit 23529049c6
5 changed files with 51 additions and 48 deletions

View File

@ -34,6 +34,7 @@ static inline u32 disr_to_esr(u64 disr)
asmlinkage void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs); asmlinkage void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs);
asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs); asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs);
asmlinkage void enter_from_user_mode(void); asmlinkage void enter_from_user_mode(void);
asmlinkage void exit_to_user_mode(void);
void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs); void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs);
void do_undefinstr(struct pt_regs *regs); void do_undefinstr(struct pt_regs *regs);
void do_bti(struct pt_regs *regs); void do_bti(struct pt_regs *regs);

View File

@ -119,15 +119,25 @@ asmlinkage void noinstr el1_sync_handler(struct pt_regs *regs)
asmlinkage void noinstr enter_from_user_mode(void) asmlinkage void noinstr enter_from_user_mode(void)
{ {
lockdep_hardirqs_off(CALLER_ADDR0);
CT_WARN_ON(ct_state() != CONTEXT_USER); CT_WARN_ON(ct_state() != CONTEXT_USER);
user_exit_irqoff(); user_exit_irqoff();
trace_hardirqs_off_finish();
}
asmlinkage void noinstr exit_to_user_mode(void)
{
trace_hardirqs_on_prepare();
lockdep_hardirqs_on_prepare(CALLER_ADDR0);
user_enter_irqoff();
lockdep_hardirqs_on(CALLER_ADDR0);
} }
static void noinstr el0_da(struct pt_regs *regs, unsigned long esr) static void noinstr el0_da(struct pt_regs *regs, unsigned long esr)
{ {
unsigned long far = read_sysreg(far_el1); unsigned long far = read_sysreg(far_el1);
user_exit_irqoff(); enter_from_user_mode();
local_daif_restore(DAIF_PROCCTX); local_daif_restore(DAIF_PROCCTX);
far = untagged_addr(far); far = untagged_addr(far);
do_mem_abort(far, esr, regs); do_mem_abort(far, esr, regs);
@ -145,35 +155,35 @@ static void noinstr el0_ia(struct pt_regs *regs, unsigned long esr)
if (!is_ttbr0_addr(far)) if (!is_ttbr0_addr(far))
arm64_apply_bp_hardening(); arm64_apply_bp_hardening();
user_exit_irqoff(); enter_from_user_mode();
local_daif_restore(DAIF_PROCCTX); local_daif_restore(DAIF_PROCCTX);
do_mem_abort(far, esr, regs); do_mem_abort(far, esr, regs);
} }
static void noinstr el0_fpsimd_acc(struct pt_regs *regs, unsigned long esr) static void noinstr el0_fpsimd_acc(struct pt_regs *regs, unsigned long esr)
{ {
user_exit_irqoff(); enter_from_user_mode();
local_daif_restore(DAIF_PROCCTX); local_daif_restore(DAIF_PROCCTX);
do_fpsimd_acc(esr, regs); do_fpsimd_acc(esr, regs);
} }
static void noinstr el0_sve_acc(struct pt_regs *regs, unsigned long esr) static void noinstr el0_sve_acc(struct pt_regs *regs, unsigned long esr)
{ {
user_exit_irqoff(); enter_from_user_mode();
local_daif_restore(DAIF_PROCCTX); local_daif_restore(DAIF_PROCCTX);
do_sve_acc(esr, regs); do_sve_acc(esr, regs);
} }
static void noinstr el0_fpsimd_exc(struct pt_regs *regs, unsigned long esr) static void noinstr el0_fpsimd_exc(struct pt_regs *regs, unsigned long esr)
{ {
user_exit_irqoff(); enter_from_user_mode();
local_daif_restore(DAIF_PROCCTX); local_daif_restore(DAIF_PROCCTX);
do_fpsimd_exc(esr, regs); do_fpsimd_exc(esr, regs);
} }
static void noinstr el0_sys(struct pt_regs *regs, unsigned long esr) static void noinstr el0_sys(struct pt_regs *regs, unsigned long esr)
{ {
user_exit_irqoff(); enter_from_user_mode();
local_daif_restore(DAIF_PROCCTX); local_daif_restore(DAIF_PROCCTX);
do_sysinstr(esr, regs); do_sysinstr(esr, regs);
} }
@ -185,35 +195,35 @@ static void noinstr el0_pc(struct pt_regs *regs, unsigned long esr)
if (!is_ttbr0_addr(instruction_pointer(regs))) if (!is_ttbr0_addr(instruction_pointer(regs)))
arm64_apply_bp_hardening(); arm64_apply_bp_hardening();
user_exit_irqoff(); enter_from_user_mode();
local_daif_restore(DAIF_PROCCTX); local_daif_restore(DAIF_PROCCTX);
do_sp_pc_abort(far, esr, regs); do_sp_pc_abort(far, esr, regs);
} }
static void noinstr el0_sp(struct pt_regs *regs, unsigned long esr) static void noinstr el0_sp(struct pt_regs *regs, unsigned long esr)
{ {
user_exit_irqoff(); enter_from_user_mode();
local_daif_restore(DAIF_PROCCTX); local_daif_restore(DAIF_PROCCTX);
do_sp_pc_abort(regs->sp, esr, regs); do_sp_pc_abort(regs->sp, esr, regs);
} }
static void noinstr el0_undef(struct pt_regs *regs) static void noinstr el0_undef(struct pt_regs *regs)
{ {
user_exit_irqoff(); enter_from_user_mode();
local_daif_restore(DAIF_PROCCTX); local_daif_restore(DAIF_PROCCTX);
do_undefinstr(regs); do_undefinstr(regs);
} }
static void noinstr el0_bti(struct pt_regs *regs) static void noinstr el0_bti(struct pt_regs *regs)
{ {
user_exit_irqoff(); enter_from_user_mode();
local_daif_restore(DAIF_PROCCTX); local_daif_restore(DAIF_PROCCTX);
do_bti(regs); do_bti(regs);
} }
static void noinstr el0_inv(struct pt_regs *regs, unsigned long esr) static void noinstr el0_inv(struct pt_regs *regs, unsigned long esr)
{ {
user_exit_irqoff(); enter_from_user_mode();
local_daif_restore(DAIF_PROCCTX); local_daif_restore(DAIF_PROCCTX);
bad_el0_sync(regs, 0, esr); bad_el0_sync(regs, 0, esr);
} }
@ -226,7 +236,7 @@ static void noinstr el0_dbg(struct pt_regs *regs, unsigned long esr)
if (system_uses_irq_prio_masking()) if (system_uses_irq_prio_masking())
gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
user_exit_irqoff(); enter_from_user_mode();
do_debug_exception(far, esr, regs); do_debug_exception(far, esr, regs);
local_daif_restore(DAIF_PROCCTX_NOIRQ); local_daif_restore(DAIF_PROCCTX_NOIRQ);
} }
@ -236,12 +246,13 @@ static void noinstr el0_svc(struct pt_regs *regs)
if (system_uses_irq_prio_masking()) if (system_uses_irq_prio_masking())
gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
enter_from_user_mode();
do_el0_svc(regs); do_el0_svc(regs);
} }
static void noinstr el0_fpac(struct pt_regs *regs, unsigned long esr) static void noinstr el0_fpac(struct pt_regs *regs, unsigned long esr)
{ {
user_exit_irqoff(); enter_from_user_mode();
local_daif_restore(DAIF_PROCCTX); local_daif_restore(DAIF_PROCCTX);
do_ptrauth_fault(regs, esr); do_ptrauth_fault(regs, esr);
} }
@ -302,7 +313,7 @@ asmlinkage void noinstr el0_sync_handler(struct pt_regs *regs)
#ifdef CONFIG_COMPAT #ifdef CONFIG_COMPAT
static void noinstr el0_cp15(struct pt_regs *regs, unsigned long esr) static void noinstr el0_cp15(struct pt_regs *regs, unsigned long esr)
{ {
user_exit_irqoff(); enter_from_user_mode();
local_daif_restore(DAIF_PROCCTX); local_daif_restore(DAIF_PROCCTX);
do_cp15instr(esr, regs); do_cp15instr(esr, regs);
} }
@ -312,6 +323,7 @@ static void noinstr el0_svc_compat(struct pt_regs *regs)
if (system_uses_irq_prio_masking()) if (system_uses_irq_prio_masking())
gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
enter_from_user_mode();
do_el0_svc_compat(regs); do_el0_svc_compat(regs);
} }

View File

@ -30,18 +30,18 @@
#include <asm/unistd.h> #include <asm/unistd.h>
/* /*
* Context tracking subsystem. Used to instrument transitions * Context tracking and irqflag tracing need to instrument transitions between
* between user and kernel mode. * user and kernel mode.
*/ */
.macro ct_user_exit_irqoff .macro user_exit_irqoff
#ifdef CONFIG_CONTEXT_TRACKING #if defined(CONFIG_CONTEXT_TRACKING) || defined(CONFIG_TRACE_IRQFLAGS)
bl enter_from_user_mode bl enter_from_user_mode
#endif #endif
.endm .endm
.macro ct_user_enter .macro user_enter_irqoff
#ifdef CONFIG_CONTEXT_TRACKING #if defined(CONFIG_CONTEXT_TRACKING) || defined(CONFIG_TRACE_IRQFLAGS)
bl context_tracking_user_enter bl exit_to_user_mode
#endif #endif
.endm .endm
@ -298,9 +298,6 @@ alternative_if ARM64_HAS_IRQ_PRIO_MASKING
alternative_else_nop_endif alternative_else_nop_endif
ldp x21, x22, [sp, #S_PC] // load ELR, SPSR ldp x21, x22, [sp, #S_PC] // load ELR, SPSR
.if \el == 0
ct_user_enter
.endif
#ifdef CONFIG_ARM64_SW_TTBR0_PAN #ifdef CONFIG_ARM64_SW_TTBR0_PAN
alternative_if_not ARM64_HAS_PAN alternative_if_not ARM64_HAS_PAN
@ -700,21 +697,14 @@ SYM_CODE_START_LOCAL_NOALIGN(el0_irq)
kernel_entry 0 kernel_entry 0
el0_irq_naked: el0_irq_naked:
gic_prio_irq_setup pmr=x20, tmp=x0 gic_prio_irq_setup pmr=x20, tmp=x0
ct_user_exit_irqoff user_exit_irqoff
enable_da_f enable_da_f
#ifdef CONFIG_TRACE_IRQFLAGS
bl trace_hardirqs_off
#endif
tbz x22, #55, 1f tbz x22, #55, 1f
bl do_el0_irq_bp_hardening bl do_el0_irq_bp_hardening
1: 1:
irq_handler irq_handler
#ifdef CONFIG_TRACE_IRQFLAGS
bl trace_hardirqs_on
#endif
b ret_to_user b ret_to_user
SYM_CODE_END(el0_irq) SYM_CODE_END(el0_irq)
@ -733,7 +723,7 @@ SYM_CODE_START_LOCAL(el0_error)
el0_error_naked: el0_error_naked:
mrs x25, esr_el1 mrs x25, esr_el1
gic_prio_kentry_setup tmp=x2 gic_prio_kentry_setup tmp=x2
ct_user_exit_irqoff user_exit_irqoff
enable_dbg enable_dbg
mov x0, sp mov x0, sp
mov x1, x25 mov x1, x25
@ -748,10 +738,14 @@ SYM_CODE_END(el0_error)
SYM_CODE_START_LOCAL(ret_to_user) SYM_CODE_START_LOCAL(ret_to_user)
disable_daif disable_daif
gic_prio_kentry_setup tmp=x3 gic_prio_kentry_setup tmp=x3
#ifdef CONFIG_TRACE_IRQFLAGS
bl trace_hardirqs_off
#endif
ldr x19, [tsk, #TSK_TI_FLAGS] ldr x19, [tsk, #TSK_TI_FLAGS]
and x2, x19, #_TIF_WORK_MASK and x2, x19, #_TIF_WORK_MASK
cbnz x2, work_pending cbnz x2, work_pending
finish_ret_to_user: finish_ret_to_user:
user_enter_irqoff
/* Ignore asynchronous tag check faults in the uaccess routines */ /* Ignore asynchronous tag check faults in the uaccess routines */
clear_mte_async_tcf clear_mte_async_tcf
enable_step_tsk x19, x2 enable_step_tsk x19, x2
@ -767,9 +761,6 @@ work_pending:
mov x0, sp // 'regs' mov x0, sp // 'regs'
mov x1, x19 mov x1, x19
bl do_notify_resume bl do_notify_resume
#ifdef CONFIG_TRACE_IRQFLAGS
bl trace_hardirqs_on // enabled while in userspace
#endif
ldr x19, [tsk, #TSK_TI_FLAGS] // re-check for single-step ldr x19, [tsk, #TSK_TI_FLAGS] // re-check for single-step
b finish_ret_to_user b finish_ret_to_user
SYM_CODE_END(ret_to_user) SYM_CODE_END(ret_to_user)

View File

@ -120,7 +120,6 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
*/ */
cortex_a76_erratum_1463225_svc_handler(); cortex_a76_erratum_1463225_svc_handler();
user_exit_irqoff();
local_daif_restore(DAIF_PROCCTX); local_daif_restore(DAIF_PROCCTX);
if (system_supports_mte() && (flags & _TIF_MTE_ASYNC_FAULT)) { if (system_supports_mte() && (flags & _TIF_MTE_ASYNC_FAULT)) {

View File

@ -789,16 +789,14 @@ void __init hook_debug_fault_code(int nr,
*/ */
static void debug_exception_enter(struct pt_regs *regs) static void debug_exception_enter(struct pt_regs *regs)
{ {
/* if (!user_mode(regs)) {
* Tell lockdep we disabled irqs in entry.S. Do nothing if they were /*
* already disabled to preserve the last enabled/disabled addresses. * Tell lockdep we disabled irqs in entry.S. Do nothing if they were
*/ * already disabled to preserve the last enabled/disabled addresses.
if (interrupts_enabled(regs)) */
trace_hardirqs_off(); if (interrupts_enabled(regs))
trace_hardirqs_off();
if (user_mode(regs)) {
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
} else {
/* /*
* We might have interrupted pretty much anything. In * We might have interrupted pretty much anything. In
* fact, if we're a debug exception, we can even interrupt * fact, if we're a debug exception, we can even interrupt
@ -819,8 +817,10 @@ static void debug_exception_exit(struct pt_regs *regs)
{ {
preempt_enable_no_resched(); preempt_enable_no_resched();
if (!user_mode(regs)) if (user_mode(regs))
rcu_nmi_exit(); return;
rcu_nmi_exit();
if (interrupts_enabled(regs)) if (interrupts_enabled(regs))
trace_hardirqs_on(); trace_hardirqs_on();