x86/sev: Split up runtime #VC handler for correct state tracking
[ Upstream commit be1a5408868af341f61f93c191b5e346ee88c82a ] Split up the #VC handler code into a from-user and a from-kernel part. This allows clean and correct state tracking, as the #VC handler needs to enter NMI-state when raised from kernel mode and plain IRQ state when raised from user-mode. Fixes: 62441a1fb532 ("x86/sev-es: Correctly track IRQ states in runtime #VC handler") Suggested-by: Peter Zijlstra <peterz@infradead.org> Signed-off-by: Joerg Roedel <jroedel@suse.de> Signed-off-by: Borislav Petkov <bp@suse.de> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lkml.kernel.org/r/20210618115409.22735-3-joro@8bytes.org Signed-off-by: Sasha Levin <sashal@kernel.org>
This commit is contained in:
parent
2e1003f3ee
commit
67f66d48bd
|
@ -508,7 +508,7 @@ SYM_CODE_START(\asmsym)
|
||||||
|
|
||||||
movq %rsp, %rdi /* pt_regs pointer */
|
movq %rsp, %rdi /* pt_regs pointer */
|
||||||
|
|
||||||
call \cfunc
|
call kernel_\cfunc
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* No need to switch back to the IST stack. The current stack is either
|
* No need to switch back to the IST stack. The current stack is either
|
||||||
|
@ -519,7 +519,7 @@ SYM_CODE_START(\asmsym)
|
||||||
|
|
||||||
/* Switch to the regular task stack */
|
/* Switch to the regular task stack */
|
||||||
.Lfrom_usermode_switch_stack_\@:
|
.Lfrom_usermode_switch_stack_\@:
|
||||||
idtentry_body safe_stack_\cfunc, has_error_code=1
|
idtentry_body user_\cfunc, has_error_code=1
|
||||||
|
|
||||||
_ASM_NOKPROBE(\asmsym)
|
_ASM_NOKPROBE(\asmsym)
|
||||||
SYM_CODE_END(\asmsym)
|
SYM_CODE_END(\asmsym)
|
||||||
|
|
|
@ -315,8 +315,8 @@ static __always_inline void __##func(struct pt_regs *regs)
|
||||||
*/
|
*/
|
||||||
#define DECLARE_IDTENTRY_VC(vector, func) \
|
#define DECLARE_IDTENTRY_VC(vector, func) \
|
||||||
DECLARE_IDTENTRY_RAW_ERRORCODE(vector, func); \
|
DECLARE_IDTENTRY_RAW_ERRORCODE(vector, func); \
|
||||||
__visible noinstr void ist_##func(struct pt_regs *regs, unsigned long error_code); \
|
__visible noinstr void kernel_##func(struct pt_regs *regs, unsigned long error_code); \
|
||||||
__visible noinstr void safe_stack_##func(struct pt_regs *regs, unsigned long error_code)
|
__visible noinstr void user_##func(struct pt_regs *regs, unsigned long error_code)
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* DEFINE_IDTENTRY_IST - Emit code for IST entry points
|
* DEFINE_IDTENTRY_IST - Emit code for IST entry points
|
||||||
|
@ -358,33 +358,24 @@ static __always_inline void __##func(struct pt_regs *regs)
|
||||||
DEFINE_IDTENTRY_RAW_ERRORCODE(func)
|
DEFINE_IDTENTRY_RAW_ERRORCODE(func)
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* DEFINE_IDTENTRY_VC_SAFE_STACK - Emit code for VMM communication handler
|
* DEFINE_IDTENTRY_VC_KERNEL - Emit code for VMM communication handler
|
||||||
which runs on a safe stack.
|
when raised from kernel mode
|
||||||
* @func: Function name of the entry point
|
* @func: Function name of the entry point
|
||||||
*
|
*
|
||||||
* Maps to DEFINE_IDTENTRY_RAW_ERRORCODE
|
* Maps to DEFINE_IDTENTRY_RAW_ERRORCODE
|
||||||
*/
|
*/
|
||||||
#define DEFINE_IDTENTRY_VC_SAFE_STACK(func) \
|
#define DEFINE_IDTENTRY_VC_KERNEL(func) \
|
||||||
DEFINE_IDTENTRY_RAW_ERRORCODE(safe_stack_##func)
|
DEFINE_IDTENTRY_RAW_ERRORCODE(kernel_##func)
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* DEFINE_IDTENTRY_VC_IST - Emit code for VMM communication handler
|
* DEFINE_IDTENTRY_VC_USER - Emit code for VMM communication handler
|
||||||
which runs on the VC fall-back stack
|
when raised from user mode
|
||||||
* @func: Function name of the entry point
|
* @func: Function name of the entry point
|
||||||
*
|
*
|
||||||
* Maps to DEFINE_IDTENTRY_RAW_ERRORCODE
|
* Maps to DEFINE_IDTENTRY_RAW_ERRORCODE
|
||||||
*/
|
*/
|
||||||
#define DEFINE_IDTENTRY_VC_IST(func) \
|
#define DEFINE_IDTENTRY_VC_USER(func) \
|
||||||
DEFINE_IDTENTRY_RAW_ERRORCODE(ist_##func)
|
DEFINE_IDTENTRY_RAW_ERRORCODE(user_##func)
|
||||||
|
|
||||||
/**
|
|
||||||
* DEFINE_IDTENTRY_VC - Emit code for VMM communication handler
|
|
||||||
* @func: Function name of the entry point
|
|
||||||
*
|
|
||||||
* Maps to DEFINE_IDTENTRY_RAW_ERRORCODE
|
|
||||||
*/
|
|
||||||
#define DEFINE_IDTENTRY_VC(func) \
|
|
||||||
DEFINE_IDTENTRY_RAW_ERRORCODE(func)
|
|
||||||
|
|
||||||
#else /* CONFIG_X86_64 */
|
#else /* CONFIG_X86_64 */
|
||||||
|
|
||||||
|
|
|
@ -759,7 +759,7 @@ void __init sev_es_init_vc_handling(void)
|
||||||
sev_es_setup_play_dead();
|
sev_es_setup_play_dead();
|
||||||
|
|
||||||
/* Secondary CPUs use the runtime #VC handler */
|
/* Secondary CPUs use the runtime #VC handler */
|
||||||
initial_vc_handler = (unsigned long)safe_stack_exc_vmm_communication;
|
initial_vc_handler = (unsigned long)kernel_exc_vmm_communication;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __init vc_early_forward_exception(struct es_em_ctxt *ctxt)
|
static void __init vc_early_forward_exception(struct es_em_ctxt *ctxt)
|
||||||
|
@ -1197,14 +1197,6 @@ static enum es_result vc_handle_trap_ac(struct ghcb *ghcb,
|
||||||
return ES_EXCEPTION;
|
return ES_EXCEPTION;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __always_inline void vc_handle_trap_db(struct pt_regs *regs)
|
|
||||||
{
|
|
||||||
if (user_mode(regs))
|
|
||||||
noist_exc_debug(regs);
|
|
||||||
else
|
|
||||||
exc_debug(regs);
|
|
||||||
}
|
|
||||||
|
|
||||||
static enum es_result vc_handle_exitcode(struct es_em_ctxt *ctxt,
|
static enum es_result vc_handle_exitcode(struct es_em_ctxt *ctxt,
|
||||||
struct ghcb *ghcb,
|
struct ghcb *ghcb,
|
||||||
unsigned long exit_code)
|
unsigned long exit_code)
|
||||||
|
@ -1300,41 +1292,13 @@ static __always_inline bool on_vc_fallback_stack(struct pt_regs *regs)
|
||||||
return (sp >= __this_cpu_ist_bottom_va(VC2) && sp < __this_cpu_ist_top_va(VC2));
|
return (sp >= __this_cpu_ist_bottom_va(VC2) && sp < __this_cpu_ist_top_va(VC2));
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
static bool vc_raw_handle_exception(struct pt_regs *regs, unsigned long error_code)
|
||||||
* Main #VC exception handler. It is called when the entry code was able to
|
|
||||||
* switch off the IST to a safe kernel stack.
|
|
||||||
*
|
|
||||||
* With the current implementation it is always possible to switch to a safe
|
|
||||||
* stack because #VC exceptions only happen at known places, like intercepted
|
|
||||||
* instructions or accesses to MMIO areas/IO ports. They can also happen with
|
|
||||||
* code instrumentation when the hypervisor intercepts #DB, but the critical
|
|
||||||
* paths are forbidden to be instrumented, so #DB exceptions currently also
|
|
||||||
* only happen in safe places.
|
|
||||||
*/
|
|
||||||
DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
|
|
||||||
{
|
{
|
||||||
irqentry_state_t irq_state;
|
|
||||||
struct ghcb_state state;
|
struct ghcb_state state;
|
||||||
struct es_em_ctxt ctxt;
|
struct es_em_ctxt ctxt;
|
||||||
enum es_result result;
|
enum es_result result;
|
||||||
struct ghcb *ghcb;
|
struct ghcb *ghcb;
|
||||||
|
bool ret = true;
|
||||||
/*
|
|
||||||
* Handle #DB before calling into !noinstr code to avoid recursive #DB.
|
|
||||||
*/
|
|
||||||
if (error_code == SVM_EXIT_EXCP_BASE + X86_TRAP_DB) {
|
|
||||||
vc_handle_trap_db(regs);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
irq_state = irqentry_nmi_enter(regs);
|
|
||||||
instrumentation_begin();
|
|
||||||
|
|
||||||
/*
|
|
||||||
* This is invoked through an interrupt gate, so IRQs are disabled. The
|
|
||||||
* code below might walk page-tables for user or kernel addresses, so
|
|
||||||
* keep the IRQs disabled to protect us against concurrent TLB flushes.
|
|
||||||
*/
|
|
||||||
|
|
||||||
ghcb = __sev_get_ghcb(&state);
|
ghcb = __sev_get_ghcb(&state);
|
||||||
|
|
||||||
|
@ -1354,15 +1318,18 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
|
||||||
case ES_UNSUPPORTED:
|
case ES_UNSUPPORTED:
|
||||||
pr_err_ratelimited("Unsupported exit-code 0x%02lx in early #VC exception (IP: 0x%lx)\n",
|
pr_err_ratelimited("Unsupported exit-code 0x%02lx in early #VC exception (IP: 0x%lx)\n",
|
||||||
error_code, regs->ip);
|
error_code, regs->ip);
|
||||||
goto fail;
|
ret = false;
|
||||||
|
break;
|
||||||
case ES_VMM_ERROR:
|
case ES_VMM_ERROR:
|
||||||
pr_err_ratelimited("Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n",
|
pr_err_ratelimited("Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n",
|
||||||
error_code, regs->ip);
|
error_code, regs->ip);
|
||||||
goto fail;
|
ret = false;
|
||||||
|
break;
|
||||||
case ES_DECODE_FAILED:
|
case ES_DECODE_FAILED:
|
||||||
pr_err_ratelimited("Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n",
|
pr_err_ratelimited("Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n",
|
||||||
error_code, regs->ip);
|
error_code, regs->ip);
|
||||||
goto fail;
|
ret = false;
|
||||||
|
break;
|
||||||
case ES_EXCEPTION:
|
case ES_EXCEPTION:
|
||||||
vc_forward_exception(&ctxt);
|
vc_forward_exception(&ctxt);
|
||||||
break;
|
break;
|
||||||
|
@ -1378,24 +1345,52 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
|
||||||
BUG();
|
BUG();
|
||||||
}
|
}
|
||||||
|
|
||||||
out:
|
return ret;
|
||||||
instrumentation_end();
|
}
|
||||||
irqentry_nmi_exit(regs, irq_state);
|
|
||||||
|
|
||||||
return;
|
static __always_inline bool vc_is_db(unsigned long error_code)
|
||||||
|
{
|
||||||
|
return error_code == SVM_EXIT_EXCP_BASE + X86_TRAP_DB;
|
||||||
|
}
|
||||||
|
|
||||||
fail:
|
/*
|
||||||
if (user_mode(regs)) {
|
* Runtime #VC exception handler when raised from kernel mode. Runs in NMI mode
|
||||||
/*
|
* and will panic when an error happens.
|
||||||
* Do not kill the machine if user-space triggered the
|
*/
|
||||||
* exception. Send SIGBUS instead and let user-space deal with
|
DEFINE_IDTENTRY_VC_KERNEL(exc_vmm_communication)
|
||||||
* it.
|
{
|
||||||
*/
|
irqentry_state_t irq_state;
|
||||||
force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)0);
|
|
||||||
} else {
|
|
||||||
pr_emerg("PANIC: Unhandled #VC exception in kernel space (result=%d)\n",
|
|
||||||
result);
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* With the current implementation it is always possible to switch to a
|
||||||
|
* safe stack because #VC exceptions only happen at known places, like
|
||||||
|
* intercepted instructions or accesses to MMIO areas/IO ports. They can
|
||||||
|
* also happen with code instrumentation when the hypervisor intercepts
|
||||||
|
* #DB, but the critical paths are forbidden to be instrumented, so #DB
|
||||||
|
* exceptions currently also only happen in safe places.
|
||||||
|
*
|
||||||
|
* But keep this here in case the noinstr annotations are violated due
|
||||||
|
* to bug elsewhere.
|
||||||
|
*/
|
||||||
|
if (unlikely(on_vc_fallback_stack(regs))) {
|
||||||
|
instrumentation_begin();
|
||||||
|
panic("Can't handle #VC exception from unsupported context\n");
|
||||||
|
instrumentation_end();
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Handle #DB before calling into !noinstr code to avoid recursive #DB.
|
||||||
|
*/
|
||||||
|
if (vc_is_db(error_code)) {
|
||||||
|
exc_debug(regs);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
irq_state = irqentry_nmi_enter(regs);
|
||||||
|
|
||||||
|
instrumentation_begin();
|
||||||
|
|
||||||
|
if (!vc_raw_handle_exception(regs, error_code)) {
|
||||||
/* Show some debug info */
|
/* Show some debug info */
|
||||||
show_regs(regs);
|
show_regs(regs);
|
||||||
|
|
||||||
|
@ -1406,23 +1401,38 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
|
||||||
panic("Returned from Terminate-Request to Hypervisor\n");
|
panic("Returned from Terminate-Request to Hypervisor\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* This handler runs on the #VC fall-back stack. It can cause further #VC exceptions */
|
|
||||||
DEFINE_IDTENTRY_VC_IST(exc_vmm_communication)
|
|
||||||
{
|
|
||||||
instrumentation_begin();
|
|
||||||
panic("Can't handle #VC exception from unsupported context\n");
|
|
||||||
instrumentation_end();
|
instrumentation_end();
|
||||||
|
irqentry_nmi_exit(regs, irq_state);
|
||||||
}
|
}
|
||||||
|
|
||||||
DEFINE_IDTENTRY_VC(exc_vmm_communication)
|
/*
|
||||||
|
* Runtime #VC exception handler when raised from user mode. Runs in IRQ mode
|
||||||
|
* and will kill the current task with SIGBUS when an error happens.
|
||||||
|
*/
|
||||||
|
DEFINE_IDTENTRY_VC_USER(exc_vmm_communication)
|
||||||
{
|
{
|
||||||
if (likely(!on_vc_fallback_stack(regs)))
|
/*
|
||||||
safe_stack_exc_vmm_communication(regs, error_code);
|
* Handle #DB before calling into !noinstr code to avoid recursive #DB.
|
||||||
else
|
*/
|
||||||
ist_exc_vmm_communication(regs, error_code);
|
if (vc_is_db(error_code)) {
|
||||||
|
noist_exc_debug(regs);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
irqentry_enter_from_user_mode(regs);
|
||||||
|
instrumentation_begin();
|
||||||
|
|
||||||
|
if (!vc_raw_handle_exception(regs, error_code)) {
|
||||||
|
/*
|
||||||
|
* Do not kill the machine if user-space triggered the
|
||||||
|
* exception. Send SIGBUS instead and let user-space deal with
|
||||||
|
* it.
|
||||||
|
*/
|
||||||
|
force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)0);
|
||||||
|
}
|
||||||
|
|
||||||
|
instrumentation_end();
|
||||||
|
irqentry_exit_to_user_mode(regs);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool __init handle_vc_boot_ghcb(struct pt_regs *regs)
|
bool __init handle_vc_boot_ghcb(struct pt_regs *regs)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user