forked from luck/tmp_suning_uos_patched
Merge branch 'x86/paravirt' into x86/apic
Conflicts: arch/x86/mach-voyager/voyager_smp.c
This commit is contained in:
commit
eca217b36e
@ -12,21 +12,38 @@
|
||||
#define CLBR_EAX (1 << 0)
|
||||
#define CLBR_ECX (1 << 1)
|
||||
#define CLBR_EDX (1 << 2)
|
||||
#define CLBR_EDI (1 << 3)
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
#define CLBR_RSI (1 << 3)
|
||||
#define CLBR_RDI (1 << 4)
|
||||
#ifdef CONFIG_X86_32
|
||||
/* CLBR_ANY should match all regs platform has. For i386, that's just it */
|
||||
#define CLBR_ANY ((1 << 4) - 1)
|
||||
|
||||
#define CLBR_ARG_REGS (CLBR_EAX | CLBR_EDX | CLBR_ECX)
|
||||
#define CLBR_RET_REG (CLBR_EAX | CLBR_EDX)
|
||||
#define CLBR_SCRATCH (0)
|
||||
#else
|
||||
#define CLBR_RAX CLBR_EAX
|
||||
#define CLBR_RCX CLBR_ECX
|
||||
#define CLBR_RDX CLBR_EDX
|
||||
#define CLBR_RDI CLBR_EDI
|
||||
#define CLBR_RSI (1 << 4)
|
||||
#define CLBR_R8 (1 << 5)
|
||||
#define CLBR_R9 (1 << 6)
|
||||
#define CLBR_R10 (1 << 7)
|
||||
#define CLBR_R11 (1 << 8)
|
||||
|
||||
#define CLBR_ANY ((1 << 9) - 1)
|
||||
|
||||
#define CLBR_ARG_REGS (CLBR_RDI | CLBR_RSI | CLBR_RDX | \
|
||||
CLBR_RCX | CLBR_R8 | CLBR_R9)
|
||||
#define CLBR_RET_REG (CLBR_RAX)
|
||||
#define CLBR_SCRATCH (CLBR_R10 | CLBR_R11)
|
||||
|
||||
#include <asm/desc_defs.h>
|
||||
#else
|
||||
/* CLBR_ANY should match all regs platform has. For i386, that's just it */
|
||||
#define CLBR_ANY ((1 << 3) - 1)
|
||||
#endif /* X86_64 */
|
||||
|
||||
#define CLBR_CALLEE_SAVE ((CLBR_ARG_REGS | CLBR_SCRATCH) & ~CLBR_RET_REG)
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
#include <linux/types.h>
|
||||
#include <linux/cpumask.h>
|
||||
@ -40,6 +57,14 @@ struct tss_struct;
|
||||
struct mm_struct;
|
||||
struct desc_struct;
|
||||
|
||||
/*
|
||||
* Wrapper type for pointers to code which uses the non-standard
|
||||
* calling convention. See PV_CALL_SAVE_REGS_THUNK below.
|
||||
*/
|
||||
struct paravirt_callee_save {
|
||||
void *func;
|
||||
};
|
||||
|
||||
/* general info */
|
||||
struct pv_info {
|
||||
unsigned int kernel_rpl;
|
||||
@ -189,11 +214,15 @@ struct pv_irq_ops {
|
||||
* expected to use X86_EFLAGS_IF; all other bits
|
||||
* returned from save_fl are undefined, and may be ignored by
|
||||
* restore_fl.
|
||||
*
|
||||
* NOTE: These functions callers expect the callee to preserve
|
||||
* more registers than the standard C calling convention.
|
||||
*/
|
||||
unsigned long (*save_fl)(void);
|
||||
void (*restore_fl)(unsigned long);
|
||||
void (*irq_disable)(void);
|
||||
void (*irq_enable)(void);
|
||||
struct paravirt_callee_save save_fl;
|
||||
struct paravirt_callee_save restore_fl;
|
||||
struct paravirt_callee_save irq_disable;
|
||||
struct paravirt_callee_save irq_enable;
|
||||
|
||||
void (*safe_halt)(void);
|
||||
void (*halt)(void);
|
||||
|
||||
@ -279,11 +308,11 @@ struct pv_mmu_ops {
|
||||
void (*ptep_modify_prot_commit)(struct mm_struct *mm, unsigned long addr,
|
||||
pte_t *ptep, pte_t pte);
|
||||
|
||||
pteval_t (*pte_val)(pte_t);
|
||||
pte_t (*make_pte)(pteval_t pte);
|
||||
struct paravirt_callee_save pte_val;
|
||||
struct paravirt_callee_save make_pte;
|
||||
|
||||
pgdval_t (*pgd_val)(pgd_t);
|
||||
pgd_t (*make_pgd)(pgdval_t pgd);
|
||||
struct paravirt_callee_save pgd_val;
|
||||
struct paravirt_callee_save make_pgd;
|
||||
|
||||
#if PAGETABLE_LEVELS >= 3
|
||||
#ifdef CONFIG_X86_PAE
|
||||
@ -298,12 +327,12 @@ struct pv_mmu_ops {
|
||||
|
||||
void (*set_pud)(pud_t *pudp, pud_t pudval);
|
||||
|
||||
pmdval_t (*pmd_val)(pmd_t);
|
||||
pmd_t (*make_pmd)(pmdval_t pmd);
|
||||
struct paravirt_callee_save pmd_val;
|
||||
struct paravirt_callee_save make_pmd;
|
||||
|
||||
#if PAGETABLE_LEVELS == 4
|
||||
pudval_t (*pud_val)(pud_t);
|
||||
pud_t (*make_pud)(pudval_t pud);
|
||||
struct paravirt_callee_save pud_val;
|
||||
struct paravirt_callee_save make_pud;
|
||||
|
||||
void (*set_pgd)(pgd_t *pudp, pgd_t pgdval);
|
||||
#endif /* PAGETABLE_LEVELS == 4 */
|
||||
@ -388,6 +417,8 @@ extern struct pv_lock_ops pv_lock_ops;
|
||||
asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":")
|
||||
|
||||
unsigned paravirt_patch_nop(void);
|
||||
unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len);
|
||||
unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len);
|
||||
unsigned paravirt_patch_ignore(unsigned len);
|
||||
unsigned paravirt_patch_call(void *insnbuf,
|
||||
const void *target, u16 tgt_clobbers,
|
||||
@ -479,25 +510,45 @@ int paravirt_disable_iospace(void);
|
||||
* makes sure the incoming and outgoing types are always correct.
|
||||
*/
|
||||
#ifdef CONFIG_X86_32
|
||||
#define PVOP_VCALL_ARGS unsigned long __eax, __edx, __ecx
|
||||
#define PVOP_VCALL_ARGS \
|
||||
unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx
|
||||
#define PVOP_CALL_ARGS PVOP_VCALL_ARGS
|
||||
|
||||
#define PVOP_CALL_ARG1(x) "a" ((unsigned long)(x))
|
||||
#define PVOP_CALL_ARG2(x) "d" ((unsigned long)(x))
|
||||
#define PVOP_CALL_ARG3(x) "c" ((unsigned long)(x))
|
||||
|
||||
#define PVOP_VCALL_CLOBBERS "=a" (__eax), "=d" (__edx), \
|
||||
"=c" (__ecx)
|
||||
#define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS
|
||||
|
||||
#define PVOP_VCALLEE_CLOBBERS "=a" (__eax), "=d" (__edx)
|
||||
#define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS
|
||||
|
||||
#define EXTRA_CLOBBERS
|
||||
#define VEXTRA_CLOBBERS
|
||||
#else
|
||||
#define PVOP_VCALL_ARGS unsigned long __edi, __esi, __edx, __ecx
|
||||
#else /* CONFIG_X86_64 */
|
||||
#define PVOP_VCALL_ARGS \
|
||||
unsigned long __edi = __edi, __esi = __esi, \
|
||||
__edx = __edx, __ecx = __ecx
|
||||
#define PVOP_CALL_ARGS PVOP_VCALL_ARGS, __eax
|
||||
|
||||
#define PVOP_CALL_ARG1(x) "D" ((unsigned long)(x))
|
||||
#define PVOP_CALL_ARG2(x) "S" ((unsigned long)(x))
|
||||
#define PVOP_CALL_ARG3(x) "d" ((unsigned long)(x))
|
||||
#define PVOP_CALL_ARG4(x) "c" ((unsigned long)(x))
|
||||
|
||||
#define PVOP_VCALL_CLOBBERS "=D" (__edi), \
|
||||
"=S" (__esi), "=d" (__edx), \
|
||||
"=c" (__ecx)
|
||||
|
||||
#define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS, "=a" (__eax)
|
||||
|
||||
#define PVOP_VCALLEE_CLOBBERS "=a" (__eax)
|
||||
#define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS
|
||||
|
||||
#define EXTRA_CLOBBERS , "r8", "r9", "r10", "r11"
|
||||
#define VEXTRA_CLOBBERS , "rax", "r8", "r9", "r10", "r11"
|
||||
#endif
|
||||
#endif /* CONFIG_X86_32 */
|
||||
|
||||
#ifdef CONFIG_PARAVIRT_DEBUG
|
||||
#define PVOP_TEST_NULL(op) BUG_ON(op == NULL)
|
||||
@ -505,7 +556,8 @@ int paravirt_disable_iospace(void);
|
||||
#define PVOP_TEST_NULL(op) ((void)op)
|
||||
#endif
|
||||
|
||||
#define __PVOP_CALL(rettype, op, pre, post, ...) \
|
||||
#define ____PVOP_CALL(rettype, op, clbr, call_clbr, extra_clbr, \
|
||||
pre, post, ...) \
|
||||
({ \
|
||||
rettype __ret; \
|
||||
PVOP_CALL_ARGS; \
|
||||
@ -516,70 +568,113 @@ int paravirt_disable_iospace(void);
|
||||
asm volatile(pre \
|
||||
paravirt_alt(PARAVIRT_CALL) \
|
||||
post \
|
||||
: PVOP_CALL_CLOBBERS \
|
||||
: call_clbr \
|
||||
: paravirt_type(op), \
|
||||
paravirt_clobber(CLBR_ANY), \
|
||||
paravirt_clobber(clbr), \
|
||||
##__VA_ARGS__ \
|
||||
: "memory", "cc" EXTRA_CLOBBERS); \
|
||||
: "memory", "cc" extra_clbr); \
|
||||
__ret = (rettype)((((u64)__edx) << 32) | __eax); \
|
||||
} else { \
|
||||
asm volatile(pre \
|
||||
paravirt_alt(PARAVIRT_CALL) \
|
||||
post \
|
||||
: PVOP_CALL_CLOBBERS \
|
||||
: call_clbr \
|
||||
: paravirt_type(op), \
|
||||
paravirt_clobber(CLBR_ANY), \
|
||||
paravirt_clobber(clbr), \
|
||||
##__VA_ARGS__ \
|
||||
: "memory", "cc" EXTRA_CLOBBERS); \
|
||||
: "memory", "cc" extra_clbr); \
|
||||
__ret = (rettype)__eax; \
|
||||
} \
|
||||
__ret; \
|
||||
})
|
||||
#define __PVOP_VCALL(op, pre, post, ...) \
|
||||
|
||||
#define __PVOP_CALL(rettype, op, pre, post, ...) \
|
||||
____PVOP_CALL(rettype, op, CLBR_ANY, PVOP_CALL_CLOBBERS, \
|
||||
EXTRA_CLOBBERS, pre, post, ##__VA_ARGS__)
|
||||
|
||||
#define __PVOP_CALLEESAVE(rettype, op, pre, post, ...) \
|
||||
____PVOP_CALL(rettype, op.func, CLBR_RET_REG, \
|
||||
PVOP_CALLEE_CLOBBERS, , \
|
||||
pre, post, ##__VA_ARGS__)
|
||||
|
||||
|
||||
#define ____PVOP_VCALL(op, clbr, call_clbr, extra_clbr, pre, post, ...) \
|
||||
({ \
|
||||
PVOP_VCALL_ARGS; \
|
||||
PVOP_TEST_NULL(op); \
|
||||
asm volatile(pre \
|
||||
paravirt_alt(PARAVIRT_CALL) \
|
||||
post \
|
||||
: PVOP_VCALL_CLOBBERS \
|
||||
: call_clbr \
|
||||
: paravirt_type(op), \
|
||||
paravirt_clobber(CLBR_ANY), \
|
||||
paravirt_clobber(clbr), \
|
||||
##__VA_ARGS__ \
|
||||
: "memory", "cc" VEXTRA_CLOBBERS); \
|
||||
: "memory", "cc" extra_clbr); \
|
||||
})
|
||||
|
||||
#define __PVOP_VCALL(op, pre, post, ...) \
|
||||
____PVOP_VCALL(op, CLBR_ANY, PVOP_VCALL_CLOBBERS, \
|
||||
VEXTRA_CLOBBERS, \
|
||||
pre, post, ##__VA_ARGS__)
|
||||
|
||||
#define __PVOP_VCALLEESAVE(rettype, op, pre, post, ...) \
|
||||
____PVOP_CALL(rettype, op.func, CLBR_RET_REG, \
|
||||
PVOP_VCALLEE_CLOBBERS, , \
|
||||
pre, post, ##__VA_ARGS__)
|
||||
|
||||
|
||||
|
||||
#define PVOP_CALL0(rettype, op) \
|
||||
__PVOP_CALL(rettype, op, "", "")
|
||||
#define PVOP_VCALL0(op) \
|
||||
__PVOP_VCALL(op, "", "")
|
||||
|
||||
#define PVOP_CALLEE0(rettype, op) \
|
||||
__PVOP_CALLEESAVE(rettype, op, "", "")
|
||||
#define PVOP_VCALLEE0(op) \
|
||||
__PVOP_VCALLEESAVE(op, "", "")
|
||||
|
||||
|
||||
#define PVOP_CALL1(rettype, op, arg1) \
|
||||
__PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)))
|
||||
__PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1))
|
||||
#define PVOP_VCALL1(op, arg1) \
|
||||
__PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)))
|
||||
__PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1))
|
||||
|
||||
#define PVOP_CALLEE1(rettype, op, arg1) \
|
||||
__PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1))
|
||||
#define PVOP_VCALLEE1(op, arg1) \
|
||||
__PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1))
|
||||
|
||||
|
||||
#define PVOP_CALL2(rettype, op, arg1, arg2) \
|
||||
__PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)), \
|
||||
"1" ((unsigned long)(arg2)))
|
||||
__PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \
|
||||
PVOP_CALL_ARG2(arg2))
|
||||
#define PVOP_VCALL2(op, arg1, arg2) \
|
||||
__PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)), \
|
||||
"1" ((unsigned long)(arg2)))
|
||||
__PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1), \
|
||||
PVOP_CALL_ARG2(arg2))
|
||||
|
||||
#define PVOP_CALLEE2(rettype, op, arg1, arg2) \
|
||||
__PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \
|
||||
PVOP_CALL_ARG2(arg2))
|
||||
#define PVOP_VCALLEE2(op, arg1, arg2) \
|
||||
__PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1), \
|
||||
PVOP_CALL_ARG2(arg2))
|
||||
|
||||
|
||||
#define PVOP_CALL3(rettype, op, arg1, arg2, arg3) \
|
||||
__PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)), \
|
||||
"1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)))
|
||||
__PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \
|
||||
PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3))
|
||||
#define PVOP_VCALL3(op, arg1, arg2, arg3) \
|
||||
__PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)), \
|
||||
"1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)))
|
||||
__PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1), \
|
||||
PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3))
|
||||
|
||||
/* This is the only difference in x86_64. We can make it much simpler */
|
||||
#ifdef CONFIG_X86_32
|
||||
#define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \
|
||||
__PVOP_CALL(rettype, op, \
|
||||
"push %[_arg4];", "lea 4(%%esp),%%esp;", \
|
||||
"0" ((u32)(arg1)), "1" ((u32)(arg2)), \
|
||||
"2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4)))
|
||||
PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \
|
||||
PVOP_CALL_ARG3(arg3), [_arg4] "mr" ((u32)(arg4)))
|
||||
#define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \
|
||||
__PVOP_VCALL(op, \
|
||||
"push %[_arg4];", "lea 4(%%esp),%%esp;", \
|
||||
@ -587,13 +682,13 @@ int paravirt_disable_iospace(void);
|
||||
"2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4)))
|
||||
#else
|
||||
#define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \
|
||||
__PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)), \
|
||||
"1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)), \
|
||||
"3"((unsigned long)(arg4)))
|
||||
__PVOP_CALL(rettype, op, "", "", \
|
||||
PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \
|
||||
PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4))
|
||||
#define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \
|
||||
__PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)), \
|
||||
"1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)), \
|
||||
"3"((unsigned long)(arg4)))
|
||||
__PVOP_VCALL(op, "", "", \
|
||||
PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \
|
||||
PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4))
|
||||
#endif
|
||||
|
||||
static inline int paravirt_enabled(void)
|
||||
@ -1060,11 +1155,11 @@ static inline pte_t __pte(pteval_t val)
|
||||
pteval_t ret;
|
||||
|
||||
if (sizeof(pteval_t) > sizeof(long))
|
||||
ret = PVOP_CALL2(pteval_t,
|
||||
ret = PVOP_CALLEE2(pteval_t,
|
||||
pv_mmu_ops.make_pte,
|
||||
val, (u64)val >> 32);
|
||||
else
|
||||
ret = PVOP_CALL1(pteval_t,
|
||||
ret = PVOP_CALLEE1(pteval_t,
|
||||
pv_mmu_ops.make_pte,
|
||||
val);
|
||||
|
||||
@ -1076,10 +1171,10 @@ static inline pteval_t pte_val(pte_t pte)
|
||||
pteval_t ret;
|
||||
|
||||
if (sizeof(pteval_t) > sizeof(long))
|
||||
ret = PVOP_CALL2(pteval_t, pv_mmu_ops.pte_val,
|
||||
ret = PVOP_CALLEE2(pteval_t, pv_mmu_ops.pte_val,
|
||||
pte.pte, (u64)pte.pte >> 32);
|
||||
else
|
||||
ret = PVOP_CALL1(pteval_t, pv_mmu_ops.pte_val,
|
||||
ret = PVOP_CALLEE1(pteval_t, pv_mmu_ops.pte_val,
|
||||
pte.pte);
|
||||
|
||||
return ret;
|
||||
@ -1090,10 +1185,10 @@ static inline pgd_t __pgd(pgdval_t val)
|
||||
pgdval_t ret;
|
||||
|
||||
if (sizeof(pgdval_t) > sizeof(long))
|
||||
ret = PVOP_CALL2(pgdval_t, pv_mmu_ops.make_pgd,
|
||||
ret = PVOP_CALLEE2(pgdval_t, pv_mmu_ops.make_pgd,
|
||||
val, (u64)val >> 32);
|
||||
else
|
||||
ret = PVOP_CALL1(pgdval_t, pv_mmu_ops.make_pgd,
|
||||
ret = PVOP_CALLEE1(pgdval_t, pv_mmu_ops.make_pgd,
|
||||
val);
|
||||
|
||||
return (pgd_t) { ret };
|
||||
@ -1104,10 +1199,10 @@ static inline pgdval_t pgd_val(pgd_t pgd)
|
||||
pgdval_t ret;
|
||||
|
||||
if (sizeof(pgdval_t) > sizeof(long))
|
||||
ret = PVOP_CALL2(pgdval_t, pv_mmu_ops.pgd_val,
|
||||
ret = PVOP_CALLEE2(pgdval_t, pv_mmu_ops.pgd_val,
|
||||
pgd.pgd, (u64)pgd.pgd >> 32);
|
||||
else
|
||||
ret = PVOP_CALL1(pgdval_t, pv_mmu_ops.pgd_val,
|
||||
ret = PVOP_CALLEE1(pgdval_t, pv_mmu_ops.pgd_val,
|
||||
pgd.pgd);
|
||||
|
||||
return ret;
|
||||
@ -1172,10 +1267,10 @@ static inline pmd_t __pmd(pmdval_t val)
|
||||
pmdval_t ret;
|
||||
|
||||
if (sizeof(pmdval_t) > sizeof(long))
|
||||
ret = PVOP_CALL2(pmdval_t, pv_mmu_ops.make_pmd,
|
||||
ret = PVOP_CALLEE2(pmdval_t, pv_mmu_ops.make_pmd,
|
||||
val, (u64)val >> 32);
|
||||
else
|
||||
ret = PVOP_CALL1(pmdval_t, pv_mmu_ops.make_pmd,
|
||||
ret = PVOP_CALLEE1(pmdval_t, pv_mmu_ops.make_pmd,
|
||||
val);
|
||||
|
||||
return (pmd_t) { ret };
|
||||
@ -1186,10 +1281,10 @@ static inline pmdval_t pmd_val(pmd_t pmd)
|
||||
pmdval_t ret;
|
||||
|
||||
if (sizeof(pmdval_t) > sizeof(long))
|
||||
ret = PVOP_CALL2(pmdval_t, pv_mmu_ops.pmd_val,
|
||||
ret = PVOP_CALLEE2(pmdval_t, pv_mmu_ops.pmd_val,
|
||||
pmd.pmd, (u64)pmd.pmd >> 32);
|
||||
else
|
||||
ret = PVOP_CALL1(pmdval_t, pv_mmu_ops.pmd_val,
|
||||
ret = PVOP_CALLEE1(pmdval_t, pv_mmu_ops.pmd_val,
|
||||
pmd.pmd);
|
||||
|
||||
return ret;
|
||||
@ -1212,10 +1307,10 @@ static inline pud_t __pud(pudval_t val)
|
||||
pudval_t ret;
|
||||
|
||||
if (sizeof(pudval_t) > sizeof(long))
|
||||
ret = PVOP_CALL2(pudval_t, pv_mmu_ops.make_pud,
|
||||
ret = PVOP_CALLEE2(pudval_t, pv_mmu_ops.make_pud,
|
||||
val, (u64)val >> 32);
|
||||
else
|
||||
ret = PVOP_CALL1(pudval_t, pv_mmu_ops.make_pud,
|
||||
ret = PVOP_CALLEE1(pudval_t, pv_mmu_ops.make_pud,
|
||||
val);
|
||||
|
||||
return (pud_t) { ret };
|
||||
@ -1226,10 +1321,10 @@ static inline pudval_t pud_val(pud_t pud)
|
||||
pudval_t ret;
|
||||
|
||||
if (sizeof(pudval_t) > sizeof(long))
|
||||
ret = PVOP_CALL2(pudval_t, pv_mmu_ops.pud_val,
|
||||
ret = PVOP_CALLEE2(pudval_t, pv_mmu_ops.pud_val,
|
||||
pud.pud, (u64)pud.pud >> 32);
|
||||
else
|
||||
ret = PVOP_CALL1(pudval_t, pv_mmu_ops.pud_val,
|
||||
ret = PVOP_CALLEE1(pudval_t, pv_mmu_ops.pud_val,
|
||||
pud.pud);
|
||||
|
||||
return ret;
|
||||
@ -1371,6 +1466,9 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
|
||||
}
|
||||
|
||||
void _paravirt_nop(void);
|
||||
u32 _paravirt_ident_32(u32);
|
||||
u64 _paravirt_ident_64(u64);
|
||||
|
||||
#define paravirt_nop ((void *)_paravirt_nop)
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
@ -1420,12 +1518,37 @@ extern struct paravirt_patch_site __parainstructions[],
|
||||
__parainstructions_end[];
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
#define PV_SAVE_REGS "pushl %%ecx; pushl %%edx;"
|
||||
#define PV_RESTORE_REGS "popl %%edx; popl %%ecx"
|
||||
#define PV_SAVE_REGS "pushl %ecx; pushl %edx;"
|
||||
#define PV_RESTORE_REGS "popl %edx; popl %ecx;"
|
||||
|
||||
/* save and restore all caller-save registers, except return value */
|
||||
#define PV_SAVE_ALL_CALLER_REGS "pushl %ecx;"
|
||||
#define PV_RESTORE_ALL_CALLER_REGS "popl %ecx;"
|
||||
|
||||
#define PV_FLAGS_ARG "0"
|
||||
#define PV_EXTRA_CLOBBERS
|
||||
#define PV_VEXTRA_CLOBBERS
|
||||
#else
|
||||
/* save and restore all caller-save registers, except return value */
|
||||
#define PV_SAVE_ALL_CALLER_REGS \
|
||||
"push %rcx;" \
|
||||
"push %rdx;" \
|
||||
"push %rsi;" \
|
||||
"push %rdi;" \
|
||||
"push %r8;" \
|
||||
"push %r9;" \
|
||||
"push %r10;" \
|
||||
"push %r11;"
|
||||
#define PV_RESTORE_ALL_CALLER_REGS \
|
||||
"pop %r11;" \
|
||||
"pop %r10;" \
|
||||
"pop %r9;" \
|
||||
"pop %r8;" \
|
||||
"pop %rdi;" \
|
||||
"pop %rsi;" \
|
||||
"pop %rdx;" \
|
||||
"pop %rcx;"
|
||||
|
||||
/* We save some registers, but all of them, that's too much. We clobber all
|
||||
* caller saved registers but the argument parameter */
|
||||
#define PV_SAVE_REGS "pushq %%rdi;"
|
||||
@ -1435,52 +1558,76 @@ extern struct paravirt_patch_site __parainstructions[],
|
||||
#define PV_FLAGS_ARG "D"
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Generate a thunk around a function which saves all caller-save
|
||||
* registers except for the return value. This allows C functions to
|
||||
* be called from assembler code where fewer than normal registers are
|
||||
* available. It may also help code generation around calls from C
|
||||
* code if the common case doesn't use many registers.
|
||||
*
|
||||
* When a callee is wrapped in a thunk, the caller can assume that all
|
||||
* arg regs and all scratch registers are preserved across the
|
||||
* call. The return value in rax/eax will not be saved, even for void
|
||||
* functions.
|
||||
*/
|
||||
#define PV_CALLEE_SAVE_REGS_THUNK(func) \
|
||||
extern typeof(func) __raw_callee_save_##func; \
|
||||
static void *__##func##__ __used = func; \
|
||||
\
|
||||
asm(".pushsection .text;" \
|
||||
"__raw_callee_save_" #func ": " \
|
||||
PV_SAVE_ALL_CALLER_REGS \
|
||||
"call " #func ";" \
|
||||
PV_RESTORE_ALL_CALLER_REGS \
|
||||
"ret;" \
|
||||
".popsection")
|
||||
|
||||
/* Get a reference to a callee-save function */
|
||||
#define PV_CALLEE_SAVE(func) \
|
||||
((struct paravirt_callee_save) { __raw_callee_save_##func })
|
||||
|
||||
/* Promise that "func" already uses the right calling convention */
|
||||
#define __PV_IS_CALLEE_SAVE(func) \
|
||||
((struct paravirt_callee_save) { func })
|
||||
|
||||
static inline unsigned long __raw_local_save_flags(void)
|
||||
{
|
||||
unsigned long f;
|
||||
|
||||
asm volatile(paravirt_alt(PV_SAVE_REGS
|
||||
PARAVIRT_CALL
|
||||
PV_RESTORE_REGS)
|
||||
asm volatile(paravirt_alt(PARAVIRT_CALL)
|
||||
: "=a"(f)
|
||||
: paravirt_type(pv_irq_ops.save_fl),
|
||||
paravirt_clobber(CLBR_EAX)
|
||||
: "memory", "cc" PV_VEXTRA_CLOBBERS);
|
||||
: "memory", "cc");
|
||||
return f;
|
||||
}
|
||||
|
||||
static inline void raw_local_irq_restore(unsigned long f)
|
||||
{
|
||||
asm volatile(paravirt_alt(PV_SAVE_REGS
|
||||
PARAVIRT_CALL
|
||||
PV_RESTORE_REGS)
|
||||
asm volatile(paravirt_alt(PARAVIRT_CALL)
|
||||
: "=a"(f)
|
||||
: PV_FLAGS_ARG(f),
|
||||
paravirt_type(pv_irq_ops.restore_fl),
|
||||
paravirt_clobber(CLBR_EAX)
|
||||
: "memory", "cc" PV_EXTRA_CLOBBERS);
|
||||
: "memory", "cc");
|
||||
}
|
||||
|
||||
static inline void raw_local_irq_disable(void)
|
||||
{
|
||||
asm volatile(paravirt_alt(PV_SAVE_REGS
|
||||
PARAVIRT_CALL
|
||||
PV_RESTORE_REGS)
|
||||
asm volatile(paravirt_alt(PARAVIRT_CALL)
|
||||
:
|
||||
: paravirt_type(pv_irq_ops.irq_disable),
|
||||
paravirt_clobber(CLBR_EAX)
|
||||
: "memory", "eax", "cc" PV_EXTRA_CLOBBERS);
|
||||
: "memory", "eax", "cc");
|
||||
}
|
||||
|
||||
static inline void raw_local_irq_enable(void)
|
||||
{
|
||||
asm volatile(paravirt_alt(PV_SAVE_REGS
|
||||
PARAVIRT_CALL
|
||||
PV_RESTORE_REGS)
|
||||
asm volatile(paravirt_alt(PARAVIRT_CALL)
|
||||
:
|
||||
: paravirt_type(pv_irq_ops.irq_enable),
|
||||
paravirt_clobber(CLBR_EAX)
|
||||
: "memory", "eax", "cc" PV_EXTRA_CLOBBERS);
|
||||
: "memory", "eax", "cc");
|
||||
}
|
||||
|
||||
static inline unsigned long __raw_local_irq_save(void)
|
||||
@ -1523,33 +1670,49 @@ static inline unsigned long __raw_local_irq_save(void)
|
||||
.popsection
|
||||
|
||||
|
||||
#define COND_PUSH(set, mask, reg) \
|
||||
.if ((~(set)) & mask); push %reg; .endif
|
||||
#define COND_POP(set, mask, reg) \
|
||||
.if ((~(set)) & mask); pop %reg; .endif
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
#define PV_SAVE_REGS \
|
||||
push %rax; \
|
||||
push %rcx; \
|
||||
push %rdx; \
|
||||
push %rsi; \
|
||||
push %rdi; \
|
||||
push %r8; \
|
||||
push %r9; \
|
||||
push %r10; \
|
||||
push %r11
|
||||
#define PV_RESTORE_REGS \
|
||||
pop %r11; \
|
||||
pop %r10; \
|
||||
pop %r9; \
|
||||
pop %r8; \
|
||||
pop %rdi; \
|
||||
pop %rsi; \
|
||||
pop %rdx; \
|
||||
pop %rcx; \
|
||||
pop %rax
|
||||
|
||||
#define PV_SAVE_REGS(set) \
|
||||
COND_PUSH(set, CLBR_RAX, rax); \
|
||||
COND_PUSH(set, CLBR_RCX, rcx); \
|
||||
COND_PUSH(set, CLBR_RDX, rdx); \
|
||||
COND_PUSH(set, CLBR_RSI, rsi); \
|
||||
COND_PUSH(set, CLBR_RDI, rdi); \
|
||||
COND_PUSH(set, CLBR_R8, r8); \
|
||||
COND_PUSH(set, CLBR_R9, r9); \
|
||||
COND_PUSH(set, CLBR_R10, r10); \
|
||||
COND_PUSH(set, CLBR_R11, r11)
|
||||
#define PV_RESTORE_REGS(set) \
|
||||
COND_POP(set, CLBR_R11, r11); \
|
||||
COND_POP(set, CLBR_R10, r10); \
|
||||
COND_POP(set, CLBR_R9, r9); \
|
||||
COND_POP(set, CLBR_R8, r8); \
|
||||
COND_POP(set, CLBR_RDI, rdi); \
|
||||
COND_POP(set, CLBR_RSI, rsi); \
|
||||
COND_POP(set, CLBR_RDX, rdx); \
|
||||
COND_POP(set, CLBR_RCX, rcx); \
|
||||
COND_POP(set, CLBR_RAX, rax)
|
||||
|
||||
#define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 8)
|
||||
#define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .quad, 8)
|
||||
#define PARA_INDIRECT(addr) *addr(%rip)
|
||||
#else
|
||||
#define PV_SAVE_REGS pushl %eax; pushl %edi; pushl %ecx; pushl %edx
|
||||
#define PV_RESTORE_REGS popl %edx; popl %ecx; popl %edi; popl %eax
|
||||
#define PV_SAVE_REGS(set) \
|
||||
COND_PUSH(set, CLBR_EAX, eax); \
|
||||
COND_PUSH(set, CLBR_EDI, edi); \
|
||||
COND_PUSH(set, CLBR_ECX, ecx); \
|
||||
COND_PUSH(set, CLBR_EDX, edx)
|
||||
#define PV_RESTORE_REGS(set) \
|
||||
COND_POP(set, CLBR_EDX, edx); \
|
||||
COND_POP(set, CLBR_ECX, ecx); \
|
||||
COND_POP(set, CLBR_EDI, edi); \
|
||||
COND_POP(set, CLBR_EAX, eax)
|
||||
|
||||
#define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 4)
|
||||
#define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .long, 4)
|
||||
#define PARA_INDIRECT(addr) *%cs:addr
|
||||
@ -1561,15 +1724,15 @@ static inline unsigned long __raw_local_irq_save(void)
|
||||
|
||||
#define DISABLE_INTERRUPTS(clobbers) \
|
||||
PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \
|
||||
PV_SAVE_REGS; \
|
||||
PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
|
||||
call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable); \
|
||||
PV_RESTORE_REGS;) \
|
||||
PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
|
||||
|
||||
#define ENABLE_INTERRUPTS(clobbers) \
|
||||
PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers, \
|
||||
PV_SAVE_REGS; \
|
||||
PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
|
||||
call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \
|
||||
PV_RESTORE_REGS;)
|
||||
PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
|
||||
|
||||
#define USERGS_SYSRET32 \
|
||||
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret32), \
|
||||
@ -1599,11 +1762,15 @@ static inline unsigned long __raw_local_irq_save(void)
|
||||
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \
|
||||
swapgs)
|
||||
|
||||
/*
|
||||
* Note: swapgs is very special, and in practise is either going to be
|
||||
* implemented with a single "swapgs" instruction or something very
|
||||
* special. Either way, we don't need to save any registers for
|
||||
* it.
|
||||
*/
|
||||
#define SWAPGS \
|
||||
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \
|
||||
PV_SAVE_REGS; \
|
||||
call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs); \
|
||||
PV_RESTORE_REGS \
|
||||
call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs) \
|
||||
)
|
||||
|
||||
#define GET_CR2_INTO_RCX \
|
||||
|
@ -768,7 +768,8 @@ extern int sysenter_setup(void);
|
||||
extern struct desc_ptr early_gdt_descr;
|
||||
|
||||
extern void cpu_set_gdt(int);
|
||||
extern void switch_to_new_gdt(void);
|
||||
extern void switch_to_new_gdt(int);
|
||||
extern void load_percpu_segment(int);
|
||||
extern void cpu_init(void);
|
||||
|
||||
static inline unsigned long get_debugctlmsr(void)
|
||||
|
@ -296,17 +296,8 @@ static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c)
|
||||
|
||||
__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
|
||||
|
||||
/* Current gdt points %fs at the "master" per-cpu area: after this,
|
||||
* it's on the real one. */
|
||||
void switch_to_new_gdt(void)
|
||||
void load_percpu_segment(int cpu)
|
||||
{
|
||||
struct desc_ptr gdt_descr;
|
||||
int cpu = smp_processor_id();
|
||||
|
||||
gdt_descr.address = (long)get_cpu_gdt_table(cpu);
|
||||
gdt_descr.size = GDT_SIZE - 1;
|
||||
load_gdt(&gdt_descr);
|
||||
/* Reload the per-cpu base */
|
||||
#ifdef CONFIG_X86_32
|
||||
loadsegment(fs, __KERNEL_PERCPU);
|
||||
#else
|
||||
@ -315,6 +306,20 @@ void switch_to_new_gdt(void)
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Current gdt points %fs at the "master" per-cpu area: after this,
|
||||
* it's on the real one. */
|
||||
void switch_to_new_gdt(int cpu)
|
||||
{
|
||||
struct desc_ptr gdt_descr;
|
||||
|
||||
gdt_descr.address = (long)get_cpu_gdt_table(cpu);
|
||||
gdt_descr.size = GDT_SIZE - 1;
|
||||
load_gdt(&gdt_descr);
|
||||
/* Reload the per-cpu base */
|
||||
|
||||
load_percpu_segment(cpu);
|
||||
}
|
||||
|
||||
static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
|
||||
|
||||
static void __cpuinit default_init(struct cpuinfo_x86 *c)
|
||||
@ -1029,7 +1034,7 @@ void __cpuinit cpu_init(void)
|
||||
* and set up the GDT descriptor:
|
||||
*/
|
||||
|
||||
switch_to_new_gdt();
|
||||
switch_to_new_gdt(cpu);
|
||||
loadsegment(fs, 0);
|
||||
|
||||
load_idt((const struct desc_ptr *)&idt_descr);
|
||||
@ -1131,7 +1136,7 @@ void __cpuinit cpu_init(void)
|
||||
clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
|
||||
|
||||
load_idt(&idt_descr);
|
||||
switch_to_new_gdt();
|
||||
switch_to_new_gdt(cpu);
|
||||
|
||||
/*
|
||||
* Set up and load the per-CPU TSS and LDT
|
||||
|
@ -1143,7 +1143,7 @@ ENTRY(native_load_gs_index)
|
||||
CFI_STARTPROC
|
||||
pushf
|
||||
CFI_ADJUST_CFA_OFFSET 8
|
||||
DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI))
|
||||
DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI)
|
||||
SWAPGS
|
||||
gs_change:
|
||||
movl %edi,%gs
|
||||
|
@ -44,6 +44,17 @@ void _paravirt_nop(void)
|
||||
{
|
||||
}
|
||||
|
||||
/* identity function, which can be inlined */
|
||||
u32 _paravirt_ident_32(u32 x)
|
||||
{
|
||||
return x;
|
||||
}
|
||||
|
||||
u64 _paravirt_ident_64(u64 x)
|
||||
{
|
||||
return x;
|
||||
}
|
||||
|
||||
static void __init default_banner(void)
|
||||
{
|
||||
printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
|
||||
@ -138,9 +149,16 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
|
||||
if (opfunc == NULL)
|
||||
/* If there's no function, patch it with a ud2a (BUG) */
|
||||
ret = paravirt_patch_insns(insnbuf, len, ud2a, ud2a+sizeof(ud2a));
|
||||
else if (opfunc == paravirt_nop)
|
||||
else if (opfunc == _paravirt_nop)
|
||||
/* If the operation is a nop, then nop the callsite */
|
||||
ret = paravirt_patch_nop();
|
||||
|
||||
/* identity functions just return their single argument */
|
||||
else if (opfunc == _paravirt_ident_32)
|
||||
ret = paravirt_patch_ident_32(insnbuf, len);
|
||||
else if (opfunc == _paravirt_ident_64)
|
||||
ret = paravirt_patch_ident_64(insnbuf, len);
|
||||
|
||||
else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) ||
|
||||
type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) ||
|
||||
type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) ||
|
||||
@ -292,10 +310,10 @@ struct pv_time_ops pv_time_ops = {
|
||||
|
||||
struct pv_irq_ops pv_irq_ops = {
|
||||
.init_IRQ = native_init_IRQ,
|
||||
.save_fl = native_save_fl,
|
||||
.restore_fl = native_restore_fl,
|
||||
.irq_disable = native_irq_disable,
|
||||
.irq_enable = native_irq_enable,
|
||||
.save_fl = __PV_IS_CALLEE_SAVE(native_save_fl),
|
||||
.restore_fl = __PV_IS_CALLEE_SAVE(native_restore_fl),
|
||||
.irq_disable = __PV_IS_CALLEE_SAVE(native_irq_disable),
|
||||
.irq_enable = __PV_IS_CALLEE_SAVE(native_irq_enable),
|
||||
.safe_halt = native_safe_halt,
|
||||
.halt = native_halt,
|
||||
#ifdef CONFIG_X86_64
|
||||
@ -373,6 +391,14 @@ struct pv_apic_ops pv_apic_ops = {
|
||||
#endif
|
||||
};
|
||||
|
||||
#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE)
|
||||
/* 32-bit pagetable entries */
|
||||
#define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_32)
|
||||
#else
|
||||
/* 64-bit pagetable entries */
|
||||
#define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_64)
|
||||
#endif
|
||||
|
||||
struct pv_mmu_ops pv_mmu_ops = {
|
||||
#ifndef CONFIG_X86_64
|
||||
.pagetable_setup_start = native_pagetable_setup_start,
|
||||
@ -424,21 +450,23 @@ struct pv_mmu_ops pv_mmu_ops = {
|
||||
.pmd_clear = native_pmd_clear,
|
||||
#endif
|
||||
.set_pud = native_set_pud,
|
||||
.pmd_val = native_pmd_val,
|
||||
.make_pmd = native_make_pmd,
|
||||
|
||||
.pmd_val = PTE_IDENT,
|
||||
.make_pmd = PTE_IDENT,
|
||||
|
||||
#if PAGETABLE_LEVELS == 4
|
||||
.pud_val = native_pud_val,
|
||||
.make_pud = native_make_pud,
|
||||
.pud_val = PTE_IDENT,
|
||||
.make_pud = PTE_IDENT,
|
||||
|
||||
.set_pgd = native_set_pgd,
|
||||
#endif
|
||||
#endif /* PAGETABLE_LEVELS >= 3 */
|
||||
|
||||
.pte_val = native_pte_val,
|
||||
.pgd_val = native_pgd_val,
|
||||
.pte_val = PTE_IDENT,
|
||||
.pgd_val = PTE_IDENT,
|
||||
|
||||
.make_pte = native_make_pte,
|
||||
.make_pgd = native_make_pgd,
|
||||
.make_pte = PTE_IDENT,
|
||||
.make_pgd = PTE_IDENT,
|
||||
|
||||
.dup_mmap = paravirt_nop,
|
||||
.exit_mmap = paravirt_nop,
|
||||
|
@ -12,6 +12,18 @@ DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax");
|
||||
DEF_NATIVE(pv_cpu_ops, clts, "clts");
|
||||
DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc");
|
||||
|
||||
unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len)
|
||||
{
|
||||
/* arg in %eax, return in %eax */
|
||||
return 0;
|
||||
}
|
||||
|
||||
unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
|
||||
{
|
||||
/* arg in %edx:%eax, return in %edx:%eax */
|
||||
return 0;
|
||||
}
|
||||
|
||||
unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
|
||||
unsigned long addr, unsigned len)
|
||||
{
|
||||
|
@ -19,6 +19,21 @@ DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq");
|
||||
DEF_NATIVE(pv_cpu_ops, usergs_sysret32, "swapgs; sysretl");
|
||||
DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs");
|
||||
|
||||
DEF_NATIVE(, mov32, "mov %edi, %eax");
|
||||
DEF_NATIVE(, mov64, "mov %rdi, %rax");
|
||||
|
||||
unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len)
|
||||
{
|
||||
return paravirt_patch_insns(insnbuf, len,
|
||||
start__mov32, end__mov32);
|
||||
}
|
||||
|
||||
unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
|
||||
{
|
||||
return paravirt_patch_insns(insnbuf, len,
|
||||
start__mov64, end__mov64);
|
||||
}
|
||||
|
||||
unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
|
||||
unsigned long addr, unsigned len)
|
||||
{
|
||||
|
@ -122,7 +122,7 @@ void __init setup_per_cpu_areas(void)
|
||||
* area. Reload any changed state for the boot CPU.
|
||||
*/
|
||||
if (cpu == boot_cpu_id)
|
||||
switch_to_new_gdt();
|
||||
switch_to_new_gdt(cpu);
|
||||
|
||||
DBG("PERCPU: cpu %4d %p\n", cpu, ptr);
|
||||
}
|
||||
|
@ -1188,7 +1188,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
|
||||
void __init native_smp_prepare_boot_cpu(void)
|
||||
{
|
||||
int me = smp_processor_id();
|
||||
switch_to_new_gdt();
|
||||
switch_to_new_gdt(me);
|
||||
/* already set me in cpu_online_mask in boot_cpu_init() */
|
||||
cpumask_set_cpu(me, cpu_callout_mask);
|
||||
per_cpu(cpu_state, me) = CPU_ONLINE;
|
||||
|
@ -259,7 +259,7 @@ const struct cpumask *uv_flush_send_and_wait(int cpu, int this_blade,
|
||||
* the cpu's, all of which are still in the mask.
|
||||
*/
|
||||
__get_cpu_var(ptcstats).ptc_i++;
|
||||
return 0;
|
||||
return flush_mask;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -670,10 +670,11 @@ static inline int __init activate_vmi(void)
|
||||
para_fill(pv_mmu_ops.write_cr2, SetCR2);
|
||||
para_fill(pv_mmu_ops.write_cr3, SetCR3);
|
||||
para_fill(pv_cpu_ops.write_cr4, SetCR4);
|
||||
para_fill(pv_irq_ops.save_fl, GetInterruptMask);
|
||||
para_fill(pv_irq_ops.restore_fl, SetInterruptMask);
|
||||
para_fill(pv_irq_ops.irq_disable, DisableInterrupts);
|
||||
para_fill(pv_irq_ops.irq_enable, EnableInterrupts);
|
||||
|
||||
para_fill(pv_irq_ops.save_fl.func, GetInterruptMask);
|
||||
para_fill(pv_irq_ops.restore_fl.func, SetInterruptMask);
|
||||
para_fill(pv_irq_ops.irq_disable.func, DisableInterrupts);
|
||||
para_fill(pv_irq_ops.irq_enable.func, EnableInterrupts);
|
||||
|
||||
para_fill(pv_cpu_ops.wbinvd, WBINVD);
|
||||
para_fill(pv_cpu_ops.read_tsc, RDTSC);
|
||||
|
@ -22,6 +22,7 @@ PHDRS {
|
||||
#ifdef CONFIG_SMP
|
||||
percpu PT_LOAD FLAGS(7); /* RWE */
|
||||
#endif
|
||||
data.init2 PT_LOAD FLAGS(7); /* RWE */
|
||||
note PT_NOTE FLAGS(0); /* ___ */
|
||||
}
|
||||
SECTIONS
|
||||
@ -215,7 +216,7 @@ SECTIONS
|
||||
/*
|
||||
* percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the
|
||||
* output PHDR, so the next output section - __data_nosave - should
|
||||
* switch it back to data.init. Also, pda should be at the head of
|
||||
* start another section data.init2. Also, pda should be at the head of
|
||||
* percpu area. Preallocate it and define the percpu offset symbol
|
||||
* so that it can be accessed as a percpu variable.
|
||||
*/
|
||||
@ -232,7 +233,7 @@ SECTIONS
|
||||
__nosave_begin = .;
|
||||
.data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
|
||||
*(.data.nosave)
|
||||
} :data.init /* switch back to data.init, see PERCPU_VADDR() above */
|
||||
} :data.init2 /* use another section data.init2, see PERCPU_VADDR() above */
|
||||
. = ALIGN(PAGE_SIZE);
|
||||
__nosave_end = .;
|
||||
|
||||
|
@ -37,6 +37,7 @@ static unsigned long vsmp_save_fl(void)
|
||||
flags &= ~X86_EFLAGS_IF;
|
||||
return flags;
|
||||
}
|
||||
PV_CALLEE_SAVE_REGS_THUNK(vsmp_save_fl);
|
||||
|
||||
static void vsmp_restore_fl(unsigned long flags)
|
||||
{
|
||||
@ -46,6 +47,7 @@ static void vsmp_restore_fl(unsigned long flags)
|
||||
flags |= X86_EFLAGS_AC;
|
||||
native_restore_fl(flags);
|
||||
}
|
||||
PV_CALLEE_SAVE_REGS_THUNK(vsmp_restore_fl);
|
||||
|
||||
static void vsmp_irq_disable(void)
|
||||
{
|
||||
@ -53,6 +55,7 @@ static void vsmp_irq_disable(void)
|
||||
|
||||
native_restore_fl((flags & ~X86_EFLAGS_IF) | X86_EFLAGS_AC);
|
||||
}
|
||||
PV_CALLEE_SAVE_REGS_THUNK(vsmp_irq_disable);
|
||||
|
||||
static void vsmp_irq_enable(void)
|
||||
{
|
||||
@ -60,6 +63,7 @@ static void vsmp_irq_enable(void)
|
||||
|
||||
native_restore_fl((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC));
|
||||
}
|
||||
PV_CALLEE_SAVE_REGS_THUNK(vsmp_irq_enable);
|
||||
|
||||
static unsigned __init_or_module vsmp_patch(u8 type, u16 clobbers, void *ibuf,
|
||||
unsigned long addr, unsigned len)
|
||||
@ -90,10 +94,10 @@ static void __init set_vsmp_pv_ops(void)
|
||||
cap, ctl);
|
||||
if (cap & ctl & (1 << 4)) {
|
||||
/* Setup irq ops and turn on vSMP IRQ fastpath handling */
|
||||
pv_irq_ops.irq_disable = vsmp_irq_disable;
|
||||
pv_irq_ops.irq_enable = vsmp_irq_enable;
|
||||
pv_irq_ops.save_fl = vsmp_save_fl;
|
||||
pv_irq_ops.restore_fl = vsmp_restore_fl;
|
||||
pv_irq_ops.irq_disable = PV_CALLEE_SAVE(vsmp_irq_disable);
|
||||
pv_irq_ops.irq_enable = PV_CALLEE_SAVE(vsmp_irq_enable);
|
||||
pv_irq_ops.save_fl = PV_CALLEE_SAVE(vsmp_save_fl);
|
||||
pv_irq_ops.restore_fl = PV_CALLEE_SAVE(vsmp_restore_fl);
|
||||
pv_init_ops.patch = vsmp_patch;
|
||||
|
||||
ctl &= ~(1 << 4);
|
||||
|
@ -173,24 +173,29 @@ static unsigned long save_fl(void)
|
||||
{
|
||||
return lguest_data.irq_enabled;
|
||||
}
|
||||
PV_CALLEE_SAVE_REGS_THUNK(save_fl);
|
||||
|
||||
/* restore_flags() just sets the flags back to the value given. */
|
||||
static void restore_fl(unsigned long flags)
|
||||
{
|
||||
lguest_data.irq_enabled = flags;
|
||||
}
|
||||
PV_CALLEE_SAVE_REGS_THUNK(restore_fl);
|
||||
|
||||
/* Interrupts go off... */
|
||||
static void irq_disable(void)
|
||||
{
|
||||
lguest_data.irq_enabled = 0;
|
||||
}
|
||||
PV_CALLEE_SAVE_REGS_THUNK(irq_disable);
|
||||
|
||||
/* Interrupts go on... */
|
||||
static void irq_enable(void)
|
||||
{
|
||||
lguest_data.irq_enabled = X86_EFLAGS_IF;
|
||||
}
|
||||
PV_CALLEE_SAVE_REGS_THUNK(irq_enable);
|
||||
|
||||
/*:*/
|
||||
/*M:003 Note that we don't check for outstanding interrupts when we re-enable
|
||||
* them (or when we unmask an interrupt). This seems to work for the moment,
|
||||
@ -984,10 +989,10 @@ __init void lguest_init(void)
|
||||
|
||||
/* interrupt-related operations */
|
||||
pv_irq_ops.init_IRQ = lguest_init_IRQ;
|
||||
pv_irq_ops.save_fl = save_fl;
|
||||
pv_irq_ops.restore_fl = restore_fl;
|
||||
pv_irq_ops.irq_disable = irq_disable;
|
||||
pv_irq_ops.irq_enable = irq_enable;
|
||||
pv_irq_ops.save_fl = PV_CALLEE_SAVE(save_fl);
|
||||
pv_irq_ops.restore_fl = PV_CALLEE_SAVE(restore_fl);
|
||||
pv_irq_ops.irq_disable = PV_CALLEE_SAVE(irq_disable);
|
||||
pv_irq_ops.irq_enable = PV_CALLEE_SAVE(irq_enable);
|
||||
pv_irq_ops.safe_halt = lguest_safe_halt;
|
||||
|
||||
/* init-time operations */
|
||||
|
@ -1744,13 +1744,13 @@ static void __init voyager_smp_prepare_cpus(unsigned int max_cpus)
|
||||
|
||||
static void __cpuinit voyager_smp_prepare_boot_cpu(void)
|
||||
{
|
||||
switch_to_new_gdt();
|
||||
int cpu = smp_processor_id();
|
||||
switch_to_new_gdt(cpu);
|
||||
|
||||
cpu_online_map = cpumask_of_cpu(smp_processor_id());
|
||||
cpu_callout_map = cpumask_of_cpu(smp_processor_id());
|
||||
cpu_callin_map = CPU_MASK_NONE;
|
||||
cpu_present_map = cpumask_of_cpu(smp_processor_id());
|
||||
|
||||
}
|
||||
|
||||
static int __cpuinit voyager_cpu_up(unsigned int cpu)
|
||||
|
@ -6,7 +6,8 @@ CFLAGS_REMOVE_irq.o = -pg
|
||||
endif
|
||||
|
||||
obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \
|
||||
time.o xen-asm_$(BITS).o grant-table.o suspend.o
|
||||
time.o xen-asm.o xen-asm_$(BITS).o \
|
||||
grant-table.o suspend.o
|
||||
|
||||
obj-$(CONFIG_SMP) += smp.o spinlock.o
|
||||
obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o
|
@ -61,40 +61,13 @@ DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
|
||||
enum xen_domain_type xen_domain_type = XEN_NATIVE;
|
||||
EXPORT_SYMBOL_GPL(xen_domain_type);
|
||||
|
||||
/*
|
||||
* Identity map, in addition to plain kernel map. This needs to be
|
||||
* large enough to allocate page table pages to allocate the rest.
|
||||
* Each page can map 2MB.
|
||||
*/
|
||||
static pte_t level1_ident_pgt[PTRS_PER_PTE * 4] __page_aligned_bss;
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/* l3 pud for userspace vsyscall mapping */
|
||||
static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss;
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
/*
|
||||
* Note about cr3 (pagetable base) values:
|
||||
*
|
||||
* xen_cr3 contains the current logical cr3 value; it contains the
|
||||
* last set cr3. This may not be the current effective cr3, because
|
||||
* its update may be being lazily deferred. However, a vcpu looking
|
||||
* at its own cr3 can use this value knowing that it everything will
|
||||
* be self-consistent.
|
||||
*
|
||||
* xen_current_cr3 contains the actual vcpu cr3; it is set once the
|
||||
* hypercall to set the vcpu cr3 is complete (so it may be a little
|
||||
* out of date, but it will never be set early). If one vcpu is
|
||||
* looking at another vcpu's cr3 value, it should use this variable.
|
||||
*/
|
||||
DEFINE_PER_CPU(unsigned long, xen_cr3); /* cr3 stored as physaddr */
|
||||
DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */
|
||||
|
||||
struct start_info *xen_start_info;
|
||||
EXPORT_SYMBOL_GPL(xen_start_info);
|
||||
|
||||
struct shared_info xen_dummy_shared_info;
|
||||
|
||||
void *xen_initial_gdt;
|
||||
|
||||
/*
|
||||
* Point at some empty memory to start with. We map the real shared_info
|
||||
* page as soon as fixmap is up and running.
|
||||
@ -114,14 +87,7 @@ struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info;
|
||||
*
|
||||
* 0: not available, 1: available
|
||||
*/
|
||||
static int have_vcpu_info_placement =
|
||||
#ifdef CONFIG_X86_32
|
||||
1
|
||||
#else
|
||||
0
|
||||
#endif
|
||||
;
|
||||
|
||||
static int have_vcpu_info_placement = 1;
|
||||
|
||||
static void xen_vcpu_setup(int cpu)
|
||||
{
|
||||
@ -237,7 +203,7 @@ static unsigned long xen_get_debugreg(int reg)
|
||||
return HYPERVISOR_get_debugreg(reg);
|
||||
}
|
||||
|
||||
static void xen_leave_lazy(void)
|
||||
void xen_leave_lazy(void)
|
||||
{
|
||||
paravirt_leave_lazy(paravirt_get_lazy_mode());
|
||||
xen_mc_flush();
|
||||
@ -598,76 +564,6 @@ static struct apic_ops xen_basic_apic_ops = {
|
||||
|
||||
#endif
|
||||
|
||||
static void xen_flush_tlb(void)
|
||||
{
|
||||
struct mmuext_op *op;
|
||||
struct multicall_space mcs;
|
||||
|
||||
preempt_disable();
|
||||
|
||||
mcs = xen_mc_entry(sizeof(*op));
|
||||
|
||||
op = mcs.args;
|
||||
op->cmd = MMUEXT_TLB_FLUSH_LOCAL;
|
||||
MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
|
||||
|
||||
xen_mc_issue(PARAVIRT_LAZY_MMU);
|
||||
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
static void xen_flush_tlb_single(unsigned long addr)
|
||||
{
|
||||
struct mmuext_op *op;
|
||||
struct multicall_space mcs;
|
||||
|
||||
preempt_disable();
|
||||
|
||||
mcs = xen_mc_entry(sizeof(*op));
|
||||
op = mcs.args;
|
||||
op->cmd = MMUEXT_INVLPG_LOCAL;
|
||||
op->arg1.linear_addr = addr & PAGE_MASK;
|
||||
MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
|
||||
|
||||
xen_mc_issue(PARAVIRT_LAZY_MMU);
|
||||
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
static void xen_flush_tlb_others(const struct cpumask *cpus,
|
||||
struct mm_struct *mm, unsigned long va)
|
||||
{
|
||||
struct {
|
||||
struct mmuext_op op;
|
||||
DECLARE_BITMAP(mask, NR_CPUS);
|
||||
} *args;
|
||||
struct multicall_space mcs;
|
||||
|
||||
BUG_ON(cpumask_empty(cpus));
|
||||
BUG_ON(!mm);
|
||||
|
||||
mcs = xen_mc_entry(sizeof(*args));
|
||||
args = mcs.args;
|
||||
args->op.arg2.vcpumask = to_cpumask(args->mask);
|
||||
|
||||
/* Remove us, and any offline CPUS. */
|
||||
cpumask_and(to_cpumask(args->mask), cpus, cpu_online_mask);
|
||||
cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask));
|
||||
if (unlikely(cpumask_empty(to_cpumask(args->mask))))
|
||||
goto issue;
|
||||
|
||||
if (va == TLB_FLUSH_ALL) {
|
||||
args->op.cmd = MMUEXT_TLB_FLUSH_MULTI;
|
||||
} else {
|
||||
args->op.cmd = MMUEXT_INVLPG_MULTI;
|
||||
args->op.arg1.linear_addr = va;
|
||||
}
|
||||
|
||||
MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF);
|
||||
|
||||
issue:
|
||||
xen_mc_issue(PARAVIRT_LAZY_MMU);
|
||||
}
|
||||
|
||||
static void xen_clts(void)
|
||||
{
|
||||
@ -693,21 +589,6 @@ static void xen_write_cr0(unsigned long cr0)
|
||||
xen_mc_issue(PARAVIRT_LAZY_CPU);
|
||||
}
|
||||
|
||||
static void xen_write_cr2(unsigned long cr2)
|
||||
{
|
||||
percpu_read(xen_vcpu)->arch.cr2 = cr2;
|
||||
}
|
||||
|
||||
static unsigned long xen_read_cr2(void)
|
||||
{
|
||||
return percpu_read(xen_vcpu)->arch.cr2;
|
||||
}
|
||||
|
||||
static unsigned long xen_read_cr2_direct(void)
|
||||
{
|
||||
return percpu_read(xen_vcpu_info.arch.cr2);
|
||||
}
|
||||
|
||||
static void xen_write_cr4(unsigned long cr4)
|
||||
{
|
||||
cr4 &= ~X86_CR4_PGE;
|
||||
@ -716,71 +597,6 @@ static void xen_write_cr4(unsigned long cr4)
|
||||
native_write_cr4(cr4);
|
||||
}
|
||||
|
||||
static unsigned long xen_read_cr3(void)
|
||||
{
|
||||
return percpu_read(xen_cr3);
|
||||
}
|
||||
|
||||
static void set_current_cr3(void *v)
|
||||
{
|
||||
percpu_write(xen_current_cr3, (unsigned long)v);
|
||||
}
|
||||
|
||||
static void __xen_write_cr3(bool kernel, unsigned long cr3)
|
||||
{
|
||||
struct mmuext_op *op;
|
||||
struct multicall_space mcs;
|
||||
unsigned long mfn;
|
||||
|
||||
if (cr3)
|
||||
mfn = pfn_to_mfn(PFN_DOWN(cr3));
|
||||
else
|
||||
mfn = 0;
|
||||
|
||||
WARN_ON(mfn == 0 && kernel);
|
||||
|
||||
mcs = __xen_mc_entry(sizeof(*op));
|
||||
|
||||
op = mcs.args;
|
||||
op->cmd = kernel ? MMUEXT_NEW_BASEPTR : MMUEXT_NEW_USER_BASEPTR;
|
||||
op->arg1.mfn = mfn;
|
||||
|
||||
MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
|
||||
|
||||
if (kernel) {
|
||||
percpu_write(xen_cr3, cr3);
|
||||
|
||||
/* Update xen_current_cr3 once the batch has actually
|
||||
been submitted. */
|
||||
xen_mc_callback(set_current_cr3, (void *)cr3);
|
||||
}
|
||||
}
|
||||
|
||||
static void xen_write_cr3(unsigned long cr3)
|
||||
{
|
||||
BUG_ON(preemptible());
|
||||
|
||||
xen_mc_batch(); /* disables interrupts */
|
||||
|
||||
/* Update while interrupts are disabled, so its atomic with
|
||||
respect to ipis */
|
||||
percpu_write(xen_cr3, cr3);
|
||||
|
||||
__xen_write_cr3(true, cr3);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
{
|
||||
pgd_t *user_pgd = xen_get_user_pgd(__va(cr3));
|
||||
if (user_pgd)
|
||||
__xen_write_cr3(false, __pa(user_pgd));
|
||||
else
|
||||
__xen_write_cr3(false, 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */
|
||||
}
|
||||
|
||||
static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
|
||||
{
|
||||
int ret;
|
||||
@ -822,185 +638,6 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Early in boot, while setting up the initial pagetable, assume
|
||||
everything is pinned. */
|
||||
static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
|
||||
{
|
||||
#ifdef CONFIG_FLATMEM
|
||||
BUG_ON(mem_map); /* should only be used early */
|
||||
#endif
|
||||
make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
|
||||
}
|
||||
|
||||
/* Early release_pte assumes that all pts are pinned, since there's
|
||||
only init_mm and anything attached to that is pinned. */
|
||||
static void xen_release_pte_init(unsigned long pfn)
|
||||
{
|
||||
make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
|
||||
}
|
||||
|
||||
static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
|
||||
{
|
||||
struct mmuext_op op;
|
||||
op.cmd = cmd;
|
||||
op.arg1.mfn = pfn_to_mfn(pfn);
|
||||
if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
|
||||
BUG();
|
||||
}
|
||||
|
||||
/* This needs to make sure the new pte page is pinned iff its being
|
||||
attached to a pinned pagetable. */
|
||||
static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned level)
|
||||
{
|
||||
struct page *page = pfn_to_page(pfn);
|
||||
|
||||
if (PagePinned(virt_to_page(mm->pgd))) {
|
||||
SetPagePinned(page);
|
||||
|
||||
vm_unmap_aliases();
|
||||
if (!PageHighMem(page)) {
|
||||
make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn)));
|
||||
if (level == PT_PTE && USE_SPLIT_PTLOCKS)
|
||||
pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
|
||||
} else {
|
||||
/* make sure there are no stray mappings of
|
||||
this page */
|
||||
kmap_flush_unused();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void xen_alloc_pte(struct mm_struct *mm, unsigned long pfn)
|
||||
{
|
||||
xen_alloc_ptpage(mm, pfn, PT_PTE);
|
||||
}
|
||||
|
||||
static void xen_alloc_pmd(struct mm_struct *mm, unsigned long pfn)
|
||||
{
|
||||
xen_alloc_ptpage(mm, pfn, PT_PMD);
|
||||
}
|
||||
|
||||
static int xen_pgd_alloc(struct mm_struct *mm)
|
||||
{
|
||||
pgd_t *pgd = mm->pgd;
|
||||
int ret = 0;
|
||||
|
||||
BUG_ON(PagePinned(virt_to_page(pgd)));
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
{
|
||||
struct page *page = virt_to_page(pgd);
|
||||
pgd_t *user_pgd;
|
||||
|
||||
BUG_ON(page->private != 0);
|
||||
|
||||
ret = -ENOMEM;
|
||||
|
||||
user_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
|
||||
page->private = (unsigned long)user_pgd;
|
||||
|
||||
if (user_pgd != NULL) {
|
||||
user_pgd[pgd_index(VSYSCALL_START)] =
|
||||
__pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE);
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd))));
|
||||
}
|
||||
#endif
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
pgd_t *user_pgd = xen_get_user_pgd(pgd);
|
||||
|
||||
if (user_pgd)
|
||||
free_page((unsigned long)user_pgd);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* This should never happen until we're OK to use struct page */
|
||||
static void xen_release_ptpage(unsigned long pfn, unsigned level)
|
||||
{
|
||||
struct page *page = pfn_to_page(pfn);
|
||||
|
||||
if (PagePinned(page)) {
|
||||
if (!PageHighMem(page)) {
|
||||
if (level == PT_PTE && USE_SPLIT_PTLOCKS)
|
||||
pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
|
||||
make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
|
||||
}
|
||||
ClearPagePinned(page);
|
||||
}
|
||||
}
|
||||
|
||||
static void xen_release_pte(unsigned long pfn)
|
||||
{
|
||||
xen_release_ptpage(pfn, PT_PTE);
|
||||
}
|
||||
|
||||
static void xen_release_pmd(unsigned long pfn)
|
||||
{
|
||||
xen_release_ptpage(pfn, PT_PMD);
|
||||
}
|
||||
|
||||
#if PAGETABLE_LEVELS == 4
|
||||
static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn)
|
||||
{
|
||||
xen_alloc_ptpage(mm, pfn, PT_PUD);
|
||||
}
|
||||
|
||||
static void xen_release_pud(unsigned long pfn)
|
||||
{
|
||||
xen_release_ptpage(pfn, PT_PUD);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_HIGHPTE
|
||||
static void *xen_kmap_atomic_pte(struct page *page, enum km_type type)
|
||||
{
|
||||
pgprot_t prot = PAGE_KERNEL;
|
||||
|
||||
if (PagePinned(page))
|
||||
prot = PAGE_KERNEL_RO;
|
||||
|
||||
if (0 && PageHighMem(page))
|
||||
printk("mapping highpte %lx type %d prot %s\n",
|
||||
page_to_pfn(page), type,
|
||||
(unsigned long)pgprot_val(prot) & _PAGE_RW ? "WRITE" : "READ");
|
||||
|
||||
return kmap_atomic_prot(page, type, prot);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
|
||||
{
|
||||
/* If there's an existing pte, then don't allow _PAGE_RW to be set */
|
||||
if (pte_val_ma(*ptep) & _PAGE_PRESENT)
|
||||
pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) &
|
||||
pte_val_ma(pte));
|
||||
|
||||
return pte;
|
||||
}
|
||||
|
||||
/* Init-time set_pte while constructing initial pagetables, which
|
||||
doesn't allow RO pagetable pages to be remapped RW */
|
||||
static __init void xen_set_pte_init(pte_t *ptep, pte_t pte)
|
||||
{
|
||||
pte = mask_rw_pte(ptep, pte);
|
||||
|
||||
xen_set_pte(ptep, pte);
|
||||
}
|
||||
#endif
|
||||
|
||||
static __init void xen_pagetable_setup_start(pgd_t *base)
|
||||
{
|
||||
}
|
||||
|
||||
void xen_setup_shared_info(void)
|
||||
{
|
||||
if (!xen_feature(XENFEAT_auto_translated_physmap)) {
|
||||
@ -1021,37 +658,6 @@ void xen_setup_shared_info(void)
|
||||
xen_setup_mfn_list_list();
|
||||
}
|
||||
|
||||
static __init void xen_pagetable_setup_done(pgd_t *base)
|
||||
{
|
||||
xen_setup_shared_info();
|
||||
}
|
||||
|
||||
static __init void xen_post_allocator_init(void)
|
||||
{
|
||||
pv_mmu_ops.set_pte = xen_set_pte;
|
||||
pv_mmu_ops.set_pmd = xen_set_pmd;
|
||||
pv_mmu_ops.set_pud = xen_set_pud;
|
||||
#if PAGETABLE_LEVELS == 4
|
||||
pv_mmu_ops.set_pgd = xen_set_pgd;
|
||||
#endif
|
||||
|
||||
/* This will work as long as patching hasn't happened yet
|
||||
(which it hasn't) */
|
||||
pv_mmu_ops.alloc_pte = xen_alloc_pte;
|
||||
pv_mmu_ops.alloc_pmd = xen_alloc_pmd;
|
||||
pv_mmu_ops.release_pte = xen_release_pte;
|
||||
pv_mmu_ops.release_pmd = xen_release_pmd;
|
||||
#if PAGETABLE_LEVELS == 4
|
||||
pv_mmu_ops.alloc_pud = xen_alloc_pud;
|
||||
pv_mmu_ops.release_pud = xen_release_pud;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
SetPagePinned(virt_to_page(level3_user_vsyscall));
|
||||
#endif
|
||||
xen_mark_init_mm_pinned();
|
||||
}
|
||||
|
||||
/* This is called once we have the cpu_possible_map */
|
||||
void xen_setup_vcpu_info_placement(void)
|
||||
{
|
||||
@ -1065,10 +671,10 @@ void xen_setup_vcpu_info_placement(void)
|
||||
if (have_vcpu_info_placement) {
|
||||
printk(KERN_INFO "Xen: using vcpu_info placement\n");
|
||||
|
||||
pv_irq_ops.save_fl = xen_save_fl_direct;
|
||||
pv_irq_ops.restore_fl = xen_restore_fl_direct;
|
||||
pv_irq_ops.irq_disable = xen_irq_disable_direct;
|
||||
pv_irq_ops.irq_enable = xen_irq_enable_direct;
|
||||
pv_irq_ops.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct);
|
||||
pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(xen_restore_fl_direct);
|
||||
pv_irq_ops.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct);
|
||||
pv_irq_ops.irq_enable = __PV_IS_CALLEE_SAVE(xen_irq_enable_direct);
|
||||
pv_mmu_ops.read_cr2 = xen_read_cr2_direct;
|
||||
}
|
||||
}
|
||||
@ -1126,49 +732,6 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot)
|
||||
{
|
||||
pte_t pte;
|
||||
|
||||
phys >>= PAGE_SHIFT;
|
||||
|
||||
switch (idx) {
|
||||
case FIX_BTMAP_END ... FIX_BTMAP_BEGIN:
|
||||
#ifdef CONFIG_X86_F00F_BUG
|
||||
case FIX_F00F_IDT:
|
||||
#endif
|
||||
#ifdef CONFIG_X86_32
|
||||
case FIX_WP_TEST:
|
||||
case FIX_VDSO:
|
||||
# ifdef CONFIG_HIGHMEM
|
||||
case FIX_KMAP_BEGIN ... FIX_KMAP_END:
|
||||
# endif
|
||||
#else
|
||||
case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE:
|
||||
#endif
|
||||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
case FIX_APIC_BASE: /* maps dummy local APIC */
|
||||
#endif
|
||||
pte = pfn_pte(phys, prot);
|
||||
break;
|
||||
|
||||
default:
|
||||
pte = mfn_pte(phys, prot);
|
||||
break;
|
||||
}
|
||||
|
||||
__native_set_fixmap(idx, pte);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/* Replicate changes to map the vsyscall page into the user
|
||||
pagetable vsyscall mapping. */
|
||||
if (idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) {
|
||||
unsigned long vaddr = __fix_to_virt(idx);
|
||||
set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static const struct pv_info xen_info __initdata = {
|
||||
.paravirt_enabled = 1,
|
||||
.shared_kernel_pmd = 0,
|
||||
@ -1264,86 +827,6 @@ static const struct pv_apic_ops xen_apic_ops __initdata = {
|
||||
#endif
|
||||
};
|
||||
|
||||
static const struct pv_mmu_ops xen_mmu_ops __initdata = {
|
||||
.pagetable_setup_start = xen_pagetable_setup_start,
|
||||
.pagetable_setup_done = xen_pagetable_setup_done,
|
||||
|
||||
.read_cr2 = xen_read_cr2,
|
||||
.write_cr2 = xen_write_cr2,
|
||||
|
||||
.read_cr3 = xen_read_cr3,
|
||||
.write_cr3 = xen_write_cr3,
|
||||
|
||||
.flush_tlb_user = xen_flush_tlb,
|
||||
.flush_tlb_kernel = xen_flush_tlb,
|
||||
.flush_tlb_single = xen_flush_tlb_single,
|
||||
.flush_tlb_others = xen_flush_tlb_others,
|
||||
|
||||
.pte_update = paravirt_nop,
|
||||
.pte_update_defer = paravirt_nop,
|
||||
|
||||
.pgd_alloc = xen_pgd_alloc,
|
||||
.pgd_free = xen_pgd_free,
|
||||
|
||||
.alloc_pte = xen_alloc_pte_init,
|
||||
.release_pte = xen_release_pte_init,
|
||||
.alloc_pmd = xen_alloc_pte_init,
|
||||
.alloc_pmd_clone = paravirt_nop,
|
||||
.release_pmd = xen_release_pte_init,
|
||||
|
||||
#ifdef CONFIG_HIGHPTE
|
||||
.kmap_atomic_pte = xen_kmap_atomic_pte,
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
.set_pte = xen_set_pte,
|
||||
#else
|
||||
.set_pte = xen_set_pte_init,
|
||||
#endif
|
||||
.set_pte_at = xen_set_pte_at,
|
||||
.set_pmd = xen_set_pmd_hyper,
|
||||
|
||||
.ptep_modify_prot_start = __ptep_modify_prot_start,
|
||||
.ptep_modify_prot_commit = __ptep_modify_prot_commit,
|
||||
|
||||
.pte_val = xen_pte_val,
|
||||
.pgd_val = xen_pgd_val,
|
||||
|
||||
.make_pte = xen_make_pte,
|
||||
.make_pgd = xen_make_pgd,
|
||||
|
||||
#ifdef CONFIG_X86_PAE
|
||||
.set_pte_atomic = xen_set_pte_atomic,
|
||||
.set_pte_present = xen_set_pte_at,
|
||||
.pte_clear = xen_pte_clear,
|
||||
.pmd_clear = xen_pmd_clear,
|
||||
#endif /* CONFIG_X86_PAE */
|
||||
.set_pud = xen_set_pud_hyper,
|
||||
|
||||
.make_pmd = xen_make_pmd,
|
||||
.pmd_val = xen_pmd_val,
|
||||
|
||||
#if PAGETABLE_LEVELS == 4
|
||||
.pud_val = xen_pud_val,
|
||||
.make_pud = xen_make_pud,
|
||||
.set_pgd = xen_set_pgd_hyper,
|
||||
|
||||
.alloc_pud = xen_alloc_pte_init,
|
||||
.release_pud = xen_release_pte_init,
|
||||
#endif /* PAGETABLE_LEVELS == 4 */
|
||||
|
||||
.activate_mm = xen_activate_mm,
|
||||
.dup_mmap = xen_dup_mmap,
|
||||
.exit_mmap = xen_exit_mmap,
|
||||
|
||||
.lazy_mode = {
|
||||
.enter = paravirt_enter_lazy_mmu,
|
||||
.leave = xen_leave_lazy,
|
||||
},
|
||||
|
||||
.set_fixmap = xen_set_fixmap,
|
||||
};
|
||||
|
||||
static void xen_reboot(int reason)
|
||||
{
|
||||
struct sched_shutdown r = { .reason = reason };
|
||||
@ -1386,223 +869,6 @@ static const struct machine_ops __initdata xen_machine_ops = {
|
||||
};
|
||||
|
||||
|
||||
static void __init xen_reserve_top(void)
|
||||
{
|
||||
#ifdef CONFIG_X86_32
|
||||
unsigned long top = HYPERVISOR_VIRT_START;
|
||||
struct xen_platform_parameters pp;
|
||||
|
||||
if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0)
|
||||
top = pp.virt_start;
|
||||
|
||||
reserve_top_address(-top);
|
||||
#endif /* CONFIG_X86_32 */
|
||||
}
|
||||
|
||||
/*
|
||||
* Like __va(), but returns address in the kernel mapping (which is
|
||||
* all we have until the physical memory mapping has been set up.
|
||||
*/
|
||||
static void *__ka(phys_addr_t paddr)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
return (void *)(paddr + __START_KERNEL_map);
|
||||
#else
|
||||
return __va(paddr);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Convert a machine address to physical address */
|
||||
static unsigned long m2p(phys_addr_t maddr)
|
||||
{
|
||||
phys_addr_t paddr;
|
||||
|
||||
maddr &= PTE_PFN_MASK;
|
||||
paddr = mfn_to_pfn(maddr >> PAGE_SHIFT) << PAGE_SHIFT;
|
||||
|
||||
return paddr;
|
||||
}
|
||||
|
||||
/* Convert a machine address to kernel virtual */
|
||||
static void *m2v(phys_addr_t maddr)
|
||||
{
|
||||
return __ka(m2p(maddr));
|
||||
}
|
||||
|
||||
static void set_page_prot(void *addr, pgprot_t prot)
|
||||
{
|
||||
unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
|
||||
pte_t pte = pfn_pte(pfn, prot);
|
||||
|
||||
if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0))
|
||||
BUG();
|
||||
}
|
||||
|
||||
static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
|
||||
{
|
||||
unsigned pmdidx, pteidx;
|
||||
unsigned ident_pte;
|
||||
unsigned long pfn;
|
||||
|
||||
ident_pte = 0;
|
||||
pfn = 0;
|
||||
for (pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) {
|
||||
pte_t *pte_page;
|
||||
|
||||
/* Reuse or allocate a page of ptes */
|
||||
if (pmd_present(pmd[pmdidx]))
|
||||
pte_page = m2v(pmd[pmdidx].pmd);
|
||||
else {
|
||||
/* Check for free pte pages */
|
||||
if (ident_pte == ARRAY_SIZE(level1_ident_pgt))
|
||||
break;
|
||||
|
||||
pte_page = &level1_ident_pgt[ident_pte];
|
||||
ident_pte += PTRS_PER_PTE;
|
||||
|
||||
pmd[pmdidx] = __pmd(__pa(pte_page) | _PAGE_TABLE);
|
||||
}
|
||||
|
||||
/* Install mappings */
|
||||
for (pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) {
|
||||
pte_t pte;
|
||||
|
||||
if (pfn > max_pfn_mapped)
|
||||
max_pfn_mapped = pfn;
|
||||
|
||||
if (!pte_none(pte_page[pteidx]))
|
||||
continue;
|
||||
|
||||
pte = pfn_pte(pfn, PAGE_KERNEL_EXEC);
|
||||
pte_page[pteidx] = pte;
|
||||
}
|
||||
}
|
||||
|
||||
for (pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE)
|
||||
set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO);
|
||||
|
||||
set_page_prot(pmd, PAGE_KERNEL_RO);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
static void convert_pfn_mfn(void *v)
|
||||
{
|
||||
pte_t *pte = v;
|
||||
int i;
|
||||
|
||||
/* All levels are converted the same way, so just treat them
|
||||
as ptes. */
|
||||
for (i = 0; i < PTRS_PER_PTE; i++)
|
||||
pte[i] = xen_make_pte(pte[i].pte);
|
||||
}
|
||||
|
||||
/*
|
||||
* Set up the inital kernel pagetable.
|
||||
*
|
||||
* We can construct this by grafting the Xen provided pagetable into
|
||||
* head_64.S's preconstructed pagetables. We copy the Xen L2's into
|
||||
* level2_ident_pgt, level2_kernel_pgt and level2_fixmap_pgt. This
|
||||
* means that only the kernel has a physical mapping to start with -
|
||||
* but that's enough to get __va working. We need to fill in the rest
|
||||
* of the physical mapping once some sort of allocator has been set
|
||||
* up.
|
||||
*/
|
||||
static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
|
||||
unsigned long max_pfn)
|
||||
{
|
||||
pud_t *l3;
|
||||
pmd_t *l2;
|
||||
|
||||
/* Zap identity mapping */
|
||||
init_level4_pgt[0] = __pgd(0);
|
||||
|
||||
/* Pre-constructed entries are in pfn, so convert to mfn */
|
||||
convert_pfn_mfn(init_level4_pgt);
|
||||
convert_pfn_mfn(level3_ident_pgt);
|
||||
convert_pfn_mfn(level3_kernel_pgt);
|
||||
|
||||
l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd);
|
||||
l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud);
|
||||
|
||||
memcpy(level2_ident_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
|
||||
memcpy(level2_kernel_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
|
||||
|
||||
l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd);
|
||||
l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud);
|
||||
memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
|
||||
|
||||
/* Set up identity map */
|
||||
xen_map_identity_early(level2_ident_pgt, max_pfn);
|
||||
|
||||
/* Make pagetable pieces RO */
|
||||
set_page_prot(init_level4_pgt, PAGE_KERNEL_RO);
|
||||
set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
|
||||
set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO);
|
||||
set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO);
|
||||
set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
|
||||
set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
|
||||
|
||||
/* Pin down new L4 */
|
||||
pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
|
||||
PFN_DOWN(__pa_symbol(init_level4_pgt)));
|
||||
|
||||
/* Unpin Xen-provided one */
|
||||
pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
|
||||
|
||||
/* Switch over */
|
||||
pgd = init_level4_pgt;
|
||||
|
||||
/*
|
||||
* At this stage there can be no user pgd, and no page
|
||||
* structure to attach it to, so make sure we just set kernel
|
||||
* pgd.
|
||||
*/
|
||||
xen_mc_batch();
|
||||
__xen_write_cr3(true, __pa(pgd));
|
||||
xen_mc_issue(PARAVIRT_LAZY_CPU);
|
||||
|
||||
reserve_early(__pa(xen_start_info->pt_base),
|
||||
__pa(xen_start_info->pt_base +
|
||||
xen_start_info->nr_pt_frames * PAGE_SIZE),
|
||||
"XEN PAGETABLES");
|
||||
|
||||
return pgd;
|
||||
}
|
||||
#else /* !CONFIG_X86_64 */
|
||||
static pmd_t level2_kernel_pgt[PTRS_PER_PMD] __page_aligned_bss;
|
||||
|
||||
static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
|
||||
unsigned long max_pfn)
|
||||
{
|
||||
pmd_t *kernel_pmd;
|
||||
|
||||
init_pg_tables_start = __pa(pgd);
|
||||
init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE;
|
||||
max_pfn_mapped = PFN_DOWN(init_pg_tables_end + 512*1024);
|
||||
|
||||
kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd);
|
||||
memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD);
|
||||
|
||||
xen_map_identity_early(level2_kernel_pgt, max_pfn);
|
||||
|
||||
memcpy(swapper_pg_dir, pgd, sizeof(pgd_t) * PTRS_PER_PGD);
|
||||
set_pgd(&swapper_pg_dir[KERNEL_PGD_BOUNDARY],
|
||||
__pgd(__pa(level2_kernel_pgt) | _PAGE_PRESENT));
|
||||
|
||||
set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
|
||||
set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO);
|
||||
set_page_prot(empty_zero_page, PAGE_KERNEL_RO);
|
||||
|
||||
pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
|
||||
|
||||
xen_write_cr3(__pa(swapper_pg_dir));
|
||||
|
||||
pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(swapper_pg_dir)));
|
||||
|
||||
return swapper_pg_dir;
|
||||
}
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
/* First C function to be called on Xen boot */
|
||||
asmlinkage void __init xen_start_kernel(void)
|
||||
{
|
||||
@ -1642,9 +908,18 @@ asmlinkage void __init xen_start_kernel(void)
|
||||
machine_ops = xen_machine_ops;
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/* Disable until direct per-cpu data access. */
|
||||
have_vcpu_info_placement = 0;
|
||||
/*
|
||||
* Setup percpu state. We only need to do this for 64-bit
|
||||
* because 32-bit already has %fs set properly.
|
||||
*/
|
||||
load_percpu_segment(0);
|
||||
#endif
|
||||
/*
|
||||
* The only reliable way to retain the initial address of the
|
||||
* percpu gdt_page is to remember it here, so we can go and
|
||||
* mark it RW later, when the initial percpu area is freed.
|
||||
*/
|
||||
xen_initial_gdt = &per_cpu(gdt_page, 0);
|
||||
|
||||
xen_smp_init();
|
||||
|
||||
|
@ -50,6 +50,7 @@ static unsigned long xen_save_fl(void)
|
||||
*/
|
||||
return (-flags) & X86_EFLAGS_IF;
|
||||
}
|
||||
PV_CALLEE_SAVE_REGS_THUNK(xen_save_fl);
|
||||
|
||||
static void xen_restore_fl(unsigned long flags)
|
||||
{
|
||||
@ -76,6 +77,7 @@ static void xen_restore_fl(unsigned long flags)
|
||||
xen_force_evtchn_callback();
|
||||
}
|
||||
}
|
||||
PV_CALLEE_SAVE_REGS_THUNK(xen_restore_fl);
|
||||
|
||||
static void xen_irq_disable(void)
|
||||
{
|
||||
@ -86,6 +88,7 @@ static void xen_irq_disable(void)
|
||||
percpu_read(xen_vcpu)->evtchn_upcall_mask = 1;
|
||||
preempt_enable_no_resched();
|
||||
}
|
||||
PV_CALLEE_SAVE_REGS_THUNK(xen_irq_disable);
|
||||
|
||||
static void xen_irq_enable(void)
|
||||
{
|
||||
@ -106,6 +109,7 @@ static void xen_irq_enable(void)
|
||||
if (unlikely(vcpu->evtchn_upcall_pending))
|
||||
xen_force_evtchn_callback();
|
||||
}
|
||||
PV_CALLEE_SAVE_REGS_THUNK(xen_irq_enable);
|
||||
|
||||
static void xen_safe_halt(void)
|
||||
{
|
||||
@ -124,10 +128,12 @@ static void xen_halt(void)
|
||||
|
||||
static const struct pv_irq_ops xen_irq_ops __initdata = {
|
||||
.init_IRQ = __xen_init_IRQ,
|
||||
.save_fl = xen_save_fl,
|
||||
.restore_fl = xen_restore_fl,
|
||||
.irq_disable = xen_irq_disable,
|
||||
.irq_enable = xen_irq_enable,
|
||||
|
||||
.save_fl = PV_CALLEE_SAVE(xen_save_fl),
|
||||
.restore_fl = PV_CALLEE_SAVE(xen_restore_fl),
|
||||
.irq_disable = PV_CALLEE_SAVE(xen_irq_disable),
|
||||
.irq_enable = PV_CALLEE_SAVE(xen_irq_enable),
|
||||
|
||||
.safe_halt = xen_safe_halt,
|
||||
.halt = xen_halt,
|
||||
#ifdef CONFIG_X86_64
|
||||
|
@ -47,6 +47,7 @@
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/fixmap.h>
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/setup.h>
|
||||
#include <asm/paravirt.h>
|
||||
#include <asm/linkage.h>
|
||||
|
||||
@ -55,6 +56,8 @@
|
||||
|
||||
#include <xen/page.h>
|
||||
#include <xen/interface/xen.h>
|
||||
#include <xen/interface/version.h>
|
||||
#include <xen/hvc-console.h>
|
||||
|
||||
#include "multicalls.h"
|
||||
#include "mmu.h"
|
||||
@ -114,6 +117,37 @@ static inline void check_zero(void)
|
||||
|
||||
#endif /* CONFIG_XEN_DEBUG_FS */
|
||||
|
||||
|
||||
/*
|
||||
* Identity map, in addition to plain kernel map. This needs to be
|
||||
* large enough to allocate page table pages to allocate the rest.
|
||||
* Each page can map 2MB.
|
||||
*/
|
||||
static pte_t level1_ident_pgt[PTRS_PER_PTE * 4] __page_aligned_bss;
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/* l3 pud for userspace vsyscall mapping */
|
||||
static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss;
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
/*
|
||||
* Note about cr3 (pagetable base) values:
|
||||
*
|
||||
* xen_cr3 contains the current logical cr3 value; it contains the
|
||||
* last set cr3. This may not be the current effective cr3, because
|
||||
* its update may be being lazily deferred. However, a vcpu looking
|
||||
* at its own cr3 can use this value knowing that it everything will
|
||||
* be self-consistent.
|
||||
*
|
||||
* xen_current_cr3 contains the actual vcpu cr3; it is set once the
|
||||
* hypercall to set the vcpu cr3 is complete (so it may be a little
|
||||
* out of date, but it will never be set early). If one vcpu is
|
||||
* looking at another vcpu's cr3 value, it should use this variable.
|
||||
*/
|
||||
DEFINE_PER_CPU(unsigned long, xen_cr3); /* cr3 stored as physaddr */
|
||||
DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */
|
||||
|
||||
|
||||
/*
|
||||
* Just beyond the highest usermode address. STACK_TOP_MAX has a
|
||||
* redzone above it, so round it up to a PGD boundary.
|
||||
@ -458,28 +492,33 @@ pteval_t xen_pte_val(pte_t pte)
|
||||
{
|
||||
return pte_mfn_to_pfn(pte.pte);
|
||||
}
|
||||
PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val);
|
||||
|
||||
pgdval_t xen_pgd_val(pgd_t pgd)
|
||||
{
|
||||
return pte_mfn_to_pfn(pgd.pgd);
|
||||
}
|
||||
PV_CALLEE_SAVE_REGS_THUNK(xen_pgd_val);
|
||||
|
||||
pte_t xen_make_pte(pteval_t pte)
|
||||
{
|
||||
pte = pte_pfn_to_mfn(pte);
|
||||
return native_make_pte(pte);
|
||||
}
|
||||
PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte);
|
||||
|
||||
pgd_t xen_make_pgd(pgdval_t pgd)
|
||||
{
|
||||
pgd = pte_pfn_to_mfn(pgd);
|
||||
return native_make_pgd(pgd);
|
||||
}
|
||||
PV_CALLEE_SAVE_REGS_THUNK(xen_make_pgd);
|
||||
|
||||
pmdval_t xen_pmd_val(pmd_t pmd)
|
||||
{
|
||||
return pte_mfn_to_pfn(pmd.pmd);
|
||||
}
|
||||
PV_CALLEE_SAVE_REGS_THUNK(xen_pmd_val);
|
||||
|
||||
void xen_set_pud_hyper(pud_t *ptr, pud_t val)
|
||||
{
|
||||
@ -556,12 +595,14 @@ pmd_t xen_make_pmd(pmdval_t pmd)
|
||||
pmd = pte_pfn_to_mfn(pmd);
|
||||
return native_make_pmd(pmd);
|
||||
}
|
||||
PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd);
|
||||
|
||||
#if PAGETABLE_LEVELS == 4
|
||||
pudval_t xen_pud_val(pud_t pud)
|
||||
{
|
||||
return pte_mfn_to_pfn(pud.pud);
|
||||
}
|
||||
PV_CALLEE_SAVE_REGS_THUNK(xen_pud_val);
|
||||
|
||||
pud_t xen_make_pud(pudval_t pud)
|
||||
{
|
||||
@ -569,6 +610,7 @@ pud_t xen_make_pud(pudval_t pud)
|
||||
|
||||
return native_make_pud(pud);
|
||||
}
|
||||
PV_CALLEE_SAVE_REGS_THUNK(xen_make_pud);
|
||||
|
||||
pgd_t *xen_get_user_pgd(pgd_t *pgd)
|
||||
{
|
||||
@ -1152,6 +1194,709 @@ void xen_exit_mmap(struct mm_struct *mm)
|
||||
spin_unlock(&mm->page_table_lock);
|
||||
}
|
||||
|
||||
static __init void xen_pagetable_setup_start(pgd_t *base)
|
||||
{
|
||||
}
|
||||
|
||||
static __init void xen_pagetable_setup_done(pgd_t *base)
|
||||
{
|
||||
xen_setup_shared_info();
|
||||
}
|
||||
|
||||
static void xen_write_cr2(unsigned long cr2)
|
||||
{
|
||||
percpu_read(xen_vcpu)->arch.cr2 = cr2;
|
||||
}
|
||||
|
||||
static unsigned long xen_read_cr2(void)
|
||||
{
|
||||
return percpu_read(xen_vcpu)->arch.cr2;
|
||||
}
|
||||
|
||||
unsigned long xen_read_cr2_direct(void)
|
||||
{
|
||||
return percpu_read(xen_vcpu_info.arch.cr2);
|
||||
}
|
||||
|
||||
static void xen_flush_tlb(void)
|
||||
{
|
||||
struct mmuext_op *op;
|
||||
struct multicall_space mcs;
|
||||
|
||||
preempt_disable();
|
||||
|
||||
mcs = xen_mc_entry(sizeof(*op));
|
||||
|
||||
op = mcs.args;
|
||||
op->cmd = MMUEXT_TLB_FLUSH_LOCAL;
|
||||
MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
|
||||
|
||||
xen_mc_issue(PARAVIRT_LAZY_MMU);
|
||||
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
static void xen_flush_tlb_single(unsigned long addr)
|
||||
{
|
||||
struct mmuext_op *op;
|
||||
struct multicall_space mcs;
|
||||
|
||||
preempt_disable();
|
||||
|
||||
mcs = xen_mc_entry(sizeof(*op));
|
||||
op = mcs.args;
|
||||
op->cmd = MMUEXT_INVLPG_LOCAL;
|
||||
op->arg1.linear_addr = addr & PAGE_MASK;
|
||||
MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
|
||||
|
||||
xen_mc_issue(PARAVIRT_LAZY_MMU);
|
||||
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
static void xen_flush_tlb_others(const struct cpumask *cpus,
|
||||
struct mm_struct *mm, unsigned long va)
|
||||
{
|
||||
struct {
|
||||
struct mmuext_op op;
|
||||
DECLARE_BITMAP(mask, NR_CPUS);
|
||||
} *args;
|
||||
struct multicall_space mcs;
|
||||
|
||||
BUG_ON(cpumask_empty(cpus));
|
||||
BUG_ON(!mm);
|
||||
|
||||
mcs = xen_mc_entry(sizeof(*args));
|
||||
args = mcs.args;
|
||||
args->op.arg2.vcpumask = to_cpumask(args->mask);
|
||||
|
||||
/* Remove us, and any offline CPUS. */
|
||||
cpumask_and(to_cpumask(args->mask), cpus, cpu_online_mask);
|
||||
cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask));
|
||||
if (unlikely(cpumask_empty(to_cpumask(args->mask))))
|
||||
goto issue;
|
||||
|
||||
if (va == TLB_FLUSH_ALL) {
|
||||
args->op.cmd = MMUEXT_TLB_FLUSH_MULTI;
|
||||
} else {
|
||||
args->op.cmd = MMUEXT_INVLPG_MULTI;
|
||||
args->op.arg1.linear_addr = va;
|
||||
}
|
||||
|
||||
MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF);
|
||||
|
||||
issue:
|
||||
xen_mc_issue(PARAVIRT_LAZY_MMU);
|
||||
}
|
||||
|
||||
static unsigned long xen_read_cr3(void)
|
||||
{
|
||||
return percpu_read(xen_cr3);
|
||||
}
|
||||
|
||||
static void set_current_cr3(void *v)
|
||||
{
|
||||
percpu_write(xen_current_cr3, (unsigned long)v);
|
||||
}
|
||||
|
||||
static void __xen_write_cr3(bool kernel, unsigned long cr3)
|
||||
{
|
||||
struct mmuext_op *op;
|
||||
struct multicall_space mcs;
|
||||
unsigned long mfn;
|
||||
|
||||
if (cr3)
|
||||
mfn = pfn_to_mfn(PFN_DOWN(cr3));
|
||||
else
|
||||
mfn = 0;
|
||||
|
||||
WARN_ON(mfn == 0 && kernel);
|
||||
|
||||
mcs = __xen_mc_entry(sizeof(*op));
|
||||
|
||||
op = mcs.args;
|
||||
op->cmd = kernel ? MMUEXT_NEW_BASEPTR : MMUEXT_NEW_USER_BASEPTR;
|
||||
op->arg1.mfn = mfn;
|
||||
|
||||
MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
|
||||
|
||||
if (kernel) {
|
||||
percpu_write(xen_cr3, cr3);
|
||||
|
||||
/* Update xen_current_cr3 once the batch has actually
|
||||
been submitted. */
|
||||
xen_mc_callback(set_current_cr3, (void *)cr3);
|
||||
}
|
||||
}
|
||||
|
||||
static void xen_write_cr3(unsigned long cr3)
|
||||
{
|
||||
BUG_ON(preemptible());
|
||||
|
||||
xen_mc_batch(); /* disables interrupts */
|
||||
|
||||
/* Update while interrupts are disabled, so its atomic with
|
||||
respect to ipis */
|
||||
percpu_write(xen_cr3, cr3);
|
||||
|
||||
__xen_write_cr3(true, cr3);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
{
|
||||
pgd_t *user_pgd = xen_get_user_pgd(__va(cr3));
|
||||
if (user_pgd)
|
||||
__xen_write_cr3(false, __pa(user_pgd));
|
||||
else
|
||||
__xen_write_cr3(false, 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */
|
||||
}
|
||||
|
||||
static int xen_pgd_alloc(struct mm_struct *mm)
|
||||
{
|
||||
pgd_t *pgd = mm->pgd;
|
||||
int ret = 0;
|
||||
|
||||
BUG_ON(PagePinned(virt_to_page(pgd)));
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
{
|
||||
struct page *page = virt_to_page(pgd);
|
||||
pgd_t *user_pgd;
|
||||
|
||||
BUG_ON(page->private != 0);
|
||||
|
||||
ret = -ENOMEM;
|
||||
|
||||
user_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
|
||||
page->private = (unsigned long)user_pgd;
|
||||
|
||||
if (user_pgd != NULL) {
|
||||
user_pgd[pgd_index(VSYSCALL_START)] =
|
||||
__pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE);
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd))));
|
||||
}
|
||||
#endif
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
pgd_t *user_pgd = xen_get_user_pgd(pgd);
|
||||
|
||||
if (user_pgd)
|
||||
free_page((unsigned long)user_pgd);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HIGHPTE
|
||||
static void *xen_kmap_atomic_pte(struct page *page, enum km_type type)
|
||||
{
|
||||
pgprot_t prot = PAGE_KERNEL;
|
||||
|
||||
if (PagePinned(page))
|
||||
prot = PAGE_KERNEL_RO;
|
||||
|
||||
if (0 && PageHighMem(page))
|
||||
printk("mapping highpte %lx type %d prot %s\n",
|
||||
page_to_pfn(page), type,
|
||||
(unsigned long)pgprot_val(prot) & _PAGE_RW ? "WRITE" : "READ");
|
||||
|
||||
return kmap_atomic_prot(page, type, prot);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
|
||||
{
|
||||
/* If there's an existing pte, then don't allow _PAGE_RW to be set */
|
||||
if (pte_val_ma(*ptep) & _PAGE_PRESENT)
|
||||
pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) &
|
||||
pte_val_ma(pte));
|
||||
|
||||
return pte;
|
||||
}
|
||||
|
||||
/* Init-time set_pte while constructing initial pagetables, which
|
||||
doesn't allow RO pagetable pages to be remapped RW */
|
||||
static __init void xen_set_pte_init(pte_t *ptep, pte_t pte)
|
||||
{
|
||||
pte = mask_rw_pte(ptep, pte);
|
||||
|
||||
xen_set_pte(ptep, pte);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Early in boot, while setting up the initial pagetable, assume
|
||||
everything is pinned. */
|
||||
static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
|
||||
{
|
||||
#ifdef CONFIG_FLATMEM
|
||||
BUG_ON(mem_map); /* should only be used early */
|
||||
#endif
|
||||
make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
|
||||
}
|
||||
|
||||
/* Early release_pte assumes that all pts are pinned, since there's
|
||||
only init_mm and anything attached to that is pinned. */
|
||||
static void xen_release_pte_init(unsigned long pfn)
|
||||
{
|
||||
make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
|
||||
}
|
||||
|
||||
static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
|
||||
{
|
||||
struct mmuext_op op;
|
||||
op.cmd = cmd;
|
||||
op.arg1.mfn = pfn_to_mfn(pfn);
|
||||
if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
|
||||
BUG();
|
||||
}
|
||||
|
||||
/* This needs to make sure the new pte page is pinned iff its being
|
||||
attached to a pinned pagetable. */
|
||||
static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned level)
|
||||
{
|
||||
struct page *page = pfn_to_page(pfn);
|
||||
|
||||
if (PagePinned(virt_to_page(mm->pgd))) {
|
||||
SetPagePinned(page);
|
||||
|
||||
vm_unmap_aliases();
|
||||
if (!PageHighMem(page)) {
|
||||
make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn)));
|
||||
if (level == PT_PTE && USE_SPLIT_PTLOCKS)
|
||||
pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
|
||||
} else {
|
||||
/* make sure there are no stray mappings of
|
||||
this page */
|
||||
kmap_flush_unused();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void xen_alloc_pte(struct mm_struct *mm, unsigned long pfn)
|
||||
{
|
||||
xen_alloc_ptpage(mm, pfn, PT_PTE);
|
||||
}
|
||||
|
||||
static void xen_alloc_pmd(struct mm_struct *mm, unsigned long pfn)
|
||||
{
|
||||
xen_alloc_ptpage(mm, pfn, PT_PMD);
|
||||
}
|
||||
|
||||
/* This should never happen until we're OK to use struct page */
|
||||
static void xen_release_ptpage(unsigned long pfn, unsigned level)
|
||||
{
|
||||
struct page *page = pfn_to_page(pfn);
|
||||
|
||||
if (PagePinned(page)) {
|
||||
if (!PageHighMem(page)) {
|
||||
if (level == PT_PTE && USE_SPLIT_PTLOCKS)
|
||||
pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
|
||||
make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
|
||||
}
|
||||
ClearPagePinned(page);
|
||||
}
|
||||
}
|
||||
|
||||
static void xen_release_pte(unsigned long pfn)
|
||||
{
|
||||
xen_release_ptpage(pfn, PT_PTE);
|
||||
}
|
||||
|
||||
static void xen_release_pmd(unsigned long pfn)
|
||||
{
|
||||
xen_release_ptpage(pfn, PT_PMD);
|
||||
}
|
||||
|
||||
#if PAGETABLE_LEVELS == 4
|
||||
static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn)
|
||||
{
|
||||
xen_alloc_ptpage(mm, pfn, PT_PUD);
|
||||
}
|
||||
|
||||
static void xen_release_pud(unsigned long pfn)
|
||||
{
|
||||
xen_release_ptpage(pfn, PT_PUD);
|
||||
}
|
||||
#endif
|
||||
|
||||
void __init xen_reserve_top(void)
|
||||
{
|
||||
#ifdef CONFIG_X86_32
|
||||
unsigned long top = HYPERVISOR_VIRT_START;
|
||||
struct xen_platform_parameters pp;
|
||||
|
||||
if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0)
|
||||
top = pp.virt_start;
|
||||
|
||||
reserve_top_address(-top);
|
||||
#endif /* CONFIG_X86_32 */
|
||||
}
|
||||
|
||||
/*
|
||||
* Like __va(), but returns address in the kernel mapping (which is
|
||||
* all we have until the physical memory mapping has been set up.
|
||||
*/
|
||||
static void *__ka(phys_addr_t paddr)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
return (void *)(paddr + __START_KERNEL_map);
|
||||
#else
|
||||
return __va(paddr);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Convert a machine address to physical address */
|
||||
static unsigned long m2p(phys_addr_t maddr)
|
||||
{
|
||||
phys_addr_t paddr;
|
||||
|
||||
maddr &= PTE_PFN_MASK;
|
||||
paddr = mfn_to_pfn(maddr >> PAGE_SHIFT) << PAGE_SHIFT;
|
||||
|
||||
return paddr;
|
||||
}
|
||||
|
||||
/* Convert a machine address to kernel virtual */
|
||||
static void *m2v(phys_addr_t maddr)
|
||||
{
|
||||
return __ka(m2p(maddr));
|
||||
}
|
||||
|
||||
static void set_page_prot(void *addr, pgprot_t prot)
|
||||
{
|
||||
unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
|
||||
pte_t pte = pfn_pte(pfn, prot);
|
||||
|
||||
if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0))
|
||||
BUG();
|
||||
}
|
||||
|
||||
static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
|
||||
{
|
||||
unsigned pmdidx, pteidx;
|
||||
unsigned ident_pte;
|
||||
unsigned long pfn;
|
||||
|
||||
ident_pte = 0;
|
||||
pfn = 0;
|
||||
for (pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) {
|
||||
pte_t *pte_page;
|
||||
|
||||
/* Reuse or allocate a page of ptes */
|
||||
if (pmd_present(pmd[pmdidx]))
|
||||
pte_page = m2v(pmd[pmdidx].pmd);
|
||||
else {
|
||||
/* Check for free pte pages */
|
||||
if (ident_pte == ARRAY_SIZE(level1_ident_pgt))
|
||||
break;
|
||||
|
||||
pte_page = &level1_ident_pgt[ident_pte];
|
||||
ident_pte += PTRS_PER_PTE;
|
||||
|
||||
pmd[pmdidx] = __pmd(__pa(pte_page) | _PAGE_TABLE);
|
||||
}
|
||||
|
||||
/* Install mappings */
|
||||
for (pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) {
|
||||
pte_t pte;
|
||||
|
||||
if (pfn > max_pfn_mapped)
|
||||
max_pfn_mapped = pfn;
|
||||
|
||||
if (!pte_none(pte_page[pteidx]))
|
||||
continue;
|
||||
|
||||
pte = pfn_pte(pfn, PAGE_KERNEL_EXEC);
|
||||
pte_page[pteidx] = pte;
|
||||
}
|
||||
}
|
||||
|
||||
for (pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE)
|
||||
set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO);
|
||||
|
||||
set_page_prot(pmd, PAGE_KERNEL_RO);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
static void convert_pfn_mfn(void *v)
|
||||
{
|
||||
pte_t *pte = v;
|
||||
int i;
|
||||
|
||||
/* All levels are converted the same way, so just treat them
|
||||
as ptes. */
|
||||
for (i = 0; i < PTRS_PER_PTE; i++)
|
||||
pte[i] = xen_make_pte(pte[i].pte);
|
||||
}
|
||||
|
||||
/*
|
||||
* Set up the inital kernel pagetable.
|
||||
*
|
||||
* We can construct this by grafting the Xen provided pagetable into
|
||||
* head_64.S's preconstructed pagetables. We copy the Xen L2's into
|
||||
* level2_ident_pgt, level2_kernel_pgt and level2_fixmap_pgt. This
|
||||
* means that only the kernel has a physical mapping to start with -
|
||||
* but that's enough to get __va working. We need to fill in the rest
|
||||
* of the physical mapping once some sort of allocator has been set
|
||||
* up.
|
||||
*/
|
||||
__init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
|
||||
unsigned long max_pfn)
|
||||
{
|
||||
pud_t *l3;
|
||||
pmd_t *l2;
|
||||
|
||||
/* Zap identity mapping */
|
||||
init_level4_pgt[0] = __pgd(0);
|
||||
|
||||
/* Pre-constructed entries are in pfn, so convert to mfn */
|
||||
convert_pfn_mfn(init_level4_pgt);
|
||||
convert_pfn_mfn(level3_ident_pgt);
|
||||
convert_pfn_mfn(level3_kernel_pgt);
|
||||
|
||||
l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd);
|
||||
l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud);
|
||||
|
||||
memcpy(level2_ident_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
|
||||
memcpy(level2_kernel_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
|
||||
|
||||
l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd);
|
||||
l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud);
|
||||
memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
|
||||
|
||||
/* Set up identity map */
|
||||
xen_map_identity_early(level2_ident_pgt, max_pfn);
|
||||
|
||||
/* Make pagetable pieces RO */
|
||||
set_page_prot(init_level4_pgt, PAGE_KERNEL_RO);
|
||||
set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
|
||||
set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO);
|
||||
set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO);
|
||||
set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
|
||||
set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
|
||||
|
||||
/* Pin down new L4 */
|
||||
pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
|
||||
PFN_DOWN(__pa_symbol(init_level4_pgt)));
|
||||
|
||||
/* Unpin Xen-provided one */
|
||||
pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
|
||||
|
||||
/* Switch over */
|
||||
pgd = init_level4_pgt;
|
||||
|
||||
/*
|
||||
* At this stage there can be no user pgd, and no page
|
||||
* structure to attach it to, so make sure we just set kernel
|
||||
* pgd.
|
||||
*/
|
||||
xen_mc_batch();
|
||||
__xen_write_cr3(true, __pa(pgd));
|
||||
xen_mc_issue(PARAVIRT_LAZY_CPU);
|
||||
|
||||
reserve_early(__pa(xen_start_info->pt_base),
|
||||
__pa(xen_start_info->pt_base +
|
||||
xen_start_info->nr_pt_frames * PAGE_SIZE),
|
||||
"XEN PAGETABLES");
|
||||
|
||||
return pgd;
|
||||
}
|
||||
#else /* !CONFIG_X86_64 */
|
||||
static pmd_t level2_kernel_pgt[PTRS_PER_PMD] __page_aligned_bss;
|
||||
|
||||
__init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
|
||||
unsigned long max_pfn)
|
||||
{
|
||||
pmd_t *kernel_pmd;
|
||||
|
||||
init_pg_tables_start = __pa(pgd);
|
||||
init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE;
|
||||
max_pfn_mapped = PFN_DOWN(init_pg_tables_end + 512*1024);
|
||||
|
||||
kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd);
|
||||
memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD);
|
||||
|
||||
xen_map_identity_early(level2_kernel_pgt, max_pfn);
|
||||
|
||||
memcpy(swapper_pg_dir, pgd, sizeof(pgd_t) * PTRS_PER_PGD);
|
||||
set_pgd(&swapper_pg_dir[KERNEL_PGD_BOUNDARY],
|
||||
__pgd(__pa(level2_kernel_pgt) | _PAGE_PRESENT));
|
||||
|
||||
set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
|
||||
set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO);
|
||||
set_page_prot(empty_zero_page, PAGE_KERNEL_RO);
|
||||
|
||||
pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
|
||||
|
||||
xen_write_cr3(__pa(swapper_pg_dir));
|
||||
|
||||
pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(swapper_pg_dir)));
|
||||
|
||||
return swapper_pg_dir;
|
||||
}
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot)
|
||||
{
|
||||
pte_t pte;
|
||||
|
||||
phys >>= PAGE_SHIFT;
|
||||
|
||||
switch (idx) {
|
||||
case FIX_BTMAP_END ... FIX_BTMAP_BEGIN:
|
||||
#ifdef CONFIG_X86_F00F_BUG
|
||||
case FIX_F00F_IDT:
|
||||
#endif
|
||||
#ifdef CONFIG_X86_32
|
||||
case FIX_WP_TEST:
|
||||
case FIX_VDSO:
|
||||
# ifdef CONFIG_HIGHMEM
|
||||
case FIX_KMAP_BEGIN ... FIX_KMAP_END:
|
||||
# endif
|
||||
#else
|
||||
case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE:
|
||||
#endif
|
||||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
case FIX_APIC_BASE: /* maps dummy local APIC */
|
||||
#endif
|
||||
pte = pfn_pte(phys, prot);
|
||||
break;
|
||||
|
||||
default:
|
||||
pte = mfn_pte(phys, prot);
|
||||
break;
|
||||
}
|
||||
|
||||
__native_set_fixmap(idx, pte);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/* Replicate changes to map the vsyscall page into the user
|
||||
pagetable vsyscall mapping. */
|
||||
if (idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) {
|
||||
unsigned long vaddr = __fix_to_virt(idx);
|
||||
set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
__init void xen_post_allocator_init(void)
|
||||
{
|
||||
pv_mmu_ops.set_pte = xen_set_pte;
|
||||
pv_mmu_ops.set_pmd = xen_set_pmd;
|
||||
pv_mmu_ops.set_pud = xen_set_pud;
|
||||
#if PAGETABLE_LEVELS == 4
|
||||
pv_mmu_ops.set_pgd = xen_set_pgd;
|
||||
#endif
|
||||
|
||||
/* This will work as long as patching hasn't happened yet
|
||||
(which it hasn't) */
|
||||
pv_mmu_ops.alloc_pte = xen_alloc_pte;
|
||||
pv_mmu_ops.alloc_pmd = xen_alloc_pmd;
|
||||
pv_mmu_ops.release_pte = xen_release_pte;
|
||||
pv_mmu_ops.release_pmd = xen_release_pmd;
|
||||
#if PAGETABLE_LEVELS == 4
|
||||
pv_mmu_ops.alloc_pud = xen_alloc_pud;
|
||||
pv_mmu_ops.release_pud = xen_release_pud;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
SetPagePinned(virt_to_page(level3_user_vsyscall));
|
||||
#endif
|
||||
xen_mark_init_mm_pinned();
|
||||
}
|
||||
|
||||
|
||||
const struct pv_mmu_ops xen_mmu_ops __initdata = {
|
||||
.pagetable_setup_start = xen_pagetable_setup_start,
|
||||
.pagetable_setup_done = xen_pagetable_setup_done,
|
||||
|
||||
.read_cr2 = xen_read_cr2,
|
||||
.write_cr2 = xen_write_cr2,
|
||||
|
||||
.read_cr3 = xen_read_cr3,
|
||||
.write_cr3 = xen_write_cr3,
|
||||
|
||||
.flush_tlb_user = xen_flush_tlb,
|
||||
.flush_tlb_kernel = xen_flush_tlb,
|
||||
.flush_tlb_single = xen_flush_tlb_single,
|
||||
.flush_tlb_others = xen_flush_tlb_others,
|
||||
|
||||
.pte_update = paravirt_nop,
|
||||
.pte_update_defer = paravirt_nop,
|
||||
|
||||
.pgd_alloc = xen_pgd_alloc,
|
||||
.pgd_free = xen_pgd_free,
|
||||
|
||||
.alloc_pte = xen_alloc_pte_init,
|
||||
.release_pte = xen_release_pte_init,
|
||||
.alloc_pmd = xen_alloc_pte_init,
|
||||
.alloc_pmd_clone = paravirt_nop,
|
||||
.release_pmd = xen_release_pte_init,
|
||||
|
||||
#ifdef CONFIG_HIGHPTE
|
||||
.kmap_atomic_pte = xen_kmap_atomic_pte,
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
.set_pte = xen_set_pte,
|
||||
#else
|
||||
.set_pte = xen_set_pte_init,
|
||||
#endif
|
||||
.set_pte_at = xen_set_pte_at,
|
||||
.set_pmd = xen_set_pmd_hyper,
|
||||
|
||||
.ptep_modify_prot_start = __ptep_modify_prot_start,
|
||||
.ptep_modify_prot_commit = __ptep_modify_prot_commit,
|
||||
|
||||
.pte_val = PV_CALLEE_SAVE(xen_pte_val),
|
||||
.pgd_val = PV_CALLEE_SAVE(xen_pgd_val),
|
||||
|
||||
.make_pte = PV_CALLEE_SAVE(xen_make_pte),
|
||||
.make_pgd = PV_CALLEE_SAVE(xen_make_pgd),
|
||||
|
||||
#ifdef CONFIG_X86_PAE
|
||||
.set_pte_atomic = xen_set_pte_atomic,
|
||||
.set_pte_present = xen_set_pte_at,
|
||||
.pte_clear = xen_pte_clear,
|
||||
.pmd_clear = xen_pmd_clear,
|
||||
#endif /* CONFIG_X86_PAE */
|
||||
.set_pud = xen_set_pud_hyper,
|
||||
|
||||
.make_pmd = PV_CALLEE_SAVE(xen_make_pmd),
|
||||
.pmd_val = PV_CALLEE_SAVE(xen_pmd_val),
|
||||
|
||||
#if PAGETABLE_LEVELS == 4
|
||||
.pud_val = PV_CALLEE_SAVE(xen_pud_val),
|
||||
.make_pud = PV_CALLEE_SAVE(xen_make_pud),
|
||||
.set_pgd = xen_set_pgd_hyper,
|
||||
|
||||
.alloc_pud = xen_alloc_pte_init,
|
||||
.release_pud = xen_release_pte_init,
|
||||
#endif /* PAGETABLE_LEVELS == 4 */
|
||||
|
||||
.activate_mm = xen_activate_mm,
|
||||
.dup_mmap = xen_dup_mmap,
|
||||
.exit_mmap = xen_exit_mmap,
|
||||
|
||||
.lazy_mode = {
|
||||
.enter = paravirt_enter_lazy_mmu,
|
||||
.leave = xen_leave_lazy,
|
||||
},
|
||||
|
||||
.set_fixmap = xen_set_fixmap,
|
||||
};
|
||||
|
||||
|
||||
#ifdef CONFIG_XEN_DEBUG_FS
|
||||
|
||||
static struct dentry *d_mmu_debug;
|
||||
|
@ -54,4 +54,7 @@ pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t
|
||||
void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
|
||||
pte_t *ptep, pte_t pte);
|
||||
|
||||
unsigned long xen_read_cr2_direct(void);
|
||||
|
||||
extern const struct pv_mmu_ops xen_mmu_ops;
|
||||
#endif /* _XEN_MMU_H */
|
||||
|
@ -170,7 +170,7 @@ static void __init xen_smp_prepare_boot_cpu(void)
|
||||
|
||||
/* We've switched to the "real" per-cpu gdt, so make sure the
|
||||
old memory can be recycled */
|
||||
make_lowmem_page_readwrite(&per_cpu_var(gdt_page));
|
||||
make_lowmem_page_readwrite(xen_initial_gdt);
|
||||
|
||||
xen_setup_vcpu_info_placement();
|
||||
}
|
||||
@ -235,6 +235,8 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
|
||||
ctxt->user_regs.ss = __KERNEL_DS;
|
||||
#ifdef CONFIG_X86_32
|
||||
ctxt->user_regs.fs = __KERNEL_PERCPU;
|
||||
#else
|
||||
ctxt->gs_base_kernel = per_cpu_offset(cpu);
|
||||
#endif
|
||||
ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
|
||||
ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
|
||||
@ -284,6 +286,9 @@ static int __cpuinit xen_cpu_up(unsigned int cpu)
|
||||
irq_ctx_init(cpu);
|
||||
#else
|
||||
clear_tsk_thread_flag(idle, TIF_FORK);
|
||||
per_cpu(kernel_stack, cpu) =
|
||||
(unsigned long)task_stack_page(idle) -
|
||||
KERNEL_STACK_OFFSET + THREAD_SIZE;
|
||||
#endif
|
||||
xen_setup_timer(cpu);
|
||||
xen_init_lock_cpu(cpu);
|
||||
|
140
arch/x86/xen/xen-asm.S
Normal file
140
arch/x86/xen/xen-asm.S
Normal file
@ -0,0 +1,140 @@
|
||||
/*
|
||||
Asm versions of Xen pv-ops, suitable for either direct use or inlining.
|
||||
The inline versions are the same as the direct-use versions, with the
|
||||
pre- and post-amble chopped off.
|
||||
|
||||
This code is encoded for size rather than absolute efficiency,
|
||||
with a view to being able to inline as much as possible.
|
||||
|
||||
We only bother with direct forms (ie, vcpu in percpu data) of
|
||||
the operations here; the indirect forms are better handled in
|
||||
C, since they're generally too large to inline anyway.
|
||||
*/
|
||||
|
||||
#include <asm/asm-offsets.h>
|
||||
#include <asm/percpu.h>
|
||||
#include <asm/processor-flags.h>
|
||||
|
||||
#include "xen-asm.h"
|
||||
|
||||
/*
|
||||
Enable events. This clears the event mask and tests the pending
|
||||
event status with one and operation. If there are pending
|
||||
events, then enter the hypervisor to get them handled.
|
||||
*/
|
||||
ENTRY(xen_irq_enable_direct)
|
||||
/* Unmask events */
|
||||
movb $0, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
|
||||
|
||||
/* Preempt here doesn't matter because that will deal with
|
||||
any pending interrupts. The pending check may end up being
|
||||
run on the wrong CPU, but that doesn't hurt. */
|
||||
|
||||
/* Test for pending */
|
||||
testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
|
||||
jz 1f
|
||||
|
||||
2: call check_events
|
||||
1:
|
||||
ENDPATCH(xen_irq_enable_direct)
|
||||
ret
|
||||
ENDPROC(xen_irq_enable_direct)
|
||||
RELOC(xen_irq_enable_direct, 2b+1)
|
||||
|
||||
|
||||
/*
|
||||
Disabling events is simply a matter of making the event mask
|
||||
non-zero.
|
||||
*/
|
||||
ENTRY(xen_irq_disable_direct)
|
||||
movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
|
||||
ENDPATCH(xen_irq_disable_direct)
|
||||
ret
|
||||
ENDPROC(xen_irq_disable_direct)
|
||||
RELOC(xen_irq_disable_direct, 0)
|
||||
|
||||
/*
|
||||
(xen_)save_fl is used to get the current interrupt enable status.
|
||||
Callers expect the status to be in X86_EFLAGS_IF, and other bits
|
||||
may be set in the return value. We take advantage of this by
|
||||
making sure that X86_EFLAGS_IF has the right value (and other bits
|
||||
in that byte are 0), but other bits in the return value are
|
||||
undefined. We need to toggle the state of the bit, because
|
||||
Xen and x86 use opposite senses (mask vs enable).
|
||||
*/
|
||||
ENTRY(xen_save_fl_direct)
|
||||
testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
|
||||
setz %ah
|
||||
addb %ah,%ah
|
||||
ENDPATCH(xen_save_fl_direct)
|
||||
ret
|
||||
ENDPROC(xen_save_fl_direct)
|
||||
RELOC(xen_save_fl_direct, 0)
|
||||
|
||||
|
||||
/*
|
||||
In principle the caller should be passing us a value return
|
||||
from xen_save_fl_direct, but for robustness sake we test only
|
||||
the X86_EFLAGS_IF flag rather than the whole byte. After
|
||||
setting the interrupt mask state, it checks for unmasked
|
||||
pending events and enters the hypervisor to get them delivered
|
||||
if so.
|
||||
*/
|
||||
ENTRY(xen_restore_fl_direct)
|
||||
#ifdef CONFIG_X86_64
|
||||
testw $X86_EFLAGS_IF, %di
|
||||
#else
|
||||
testb $X86_EFLAGS_IF>>8, %ah
|
||||
#endif
|
||||
setz PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
|
||||
/* Preempt here doesn't matter because that will deal with
|
||||
any pending interrupts. The pending check may end up being
|
||||
run on the wrong CPU, but that doesn't hurt. */
|
||||
|
||||
/* check for unmasked and pending */
|
||||
cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
|
||||
jz 1f
|
||||
2: call check_events
|
||||
1:
|
||||
ENDPATCH(xen_restore_fl_direct)
|
||||
ret
|
||||
ENDPROC(xen_restore_fl_direct)
|
||||
RELOC(xen_restore_fl_direct, 2b+1)
|
||||
|
||||
|
||||
/*
|
||||
Force an event check by making a hypercall,
|
||||
but preserve regs before making the call.
|
||||
*/
|
||||
check_events:
|
||||
#ifdef CONFIG_X86_32
|
||||
push %eax
|
||||
push %ecx
|
||||
push %edx
|
||||
call xen_force_evtchn_callback
|
||||
pop %edx
|
||||
pop %ecx
|
||||
pop %eax
|
||||
#else
|
||||
push %rax
|
||||
push %rcx
|
||||
push %rdx
|
||||
push %rsi
|
||||
push %rdi
|
||||
push %r8
|
||||
push %r9
|
||||
push %r10
|
||||
push %r11
|
||||
call xen_force_evtchn_callback
|
||||
pop %r11
|
||||
pop %r10
|
||||
pop %r9
|
||||
pop %r8
|
||||
pop %rdi
|
||||
pop %rsi
|
||||
pop %rdx
|
||||
pop %rcx
|
||||
pop %rax
|
||||
#endif
|
||||
ret
|
||||
|
12
arch/x86/xen/xen-asm.h
Normal file
12
arch/x86/xen/xen-asm.h
Normal file
@ -0,0 +1,12 @@
|
||||
#ifndef _XEN_XEN_ASM_H
|
||||
#define _XEN_XEN_ASM_H
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
#define RELOC(x, v) .globl x##_reloc; x##_reloc=v
|
||||
#define ENDPATCH(x) .globl x##_end; x##_end=.
|
||||
|
||||
/* Pseudo-flag used for virtual NMI, which we don't implement yet */
|
||||
#define XEN_EFLAGS_NMI 0x80000000
|
||||
|
||||
#endif
|
@ -11,101 +11,28 @@
|
||||
generally too large to inline anyway.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
#include <asm/asm-offsets.h>
|
||||
//#include <asm/asm-offsets.h>
|
||||
#include <asm/thread_info.h>
|
||||
#include <asm/percpu.h>
|
||||
#include <asm/processor-flags.h>
|
||||
#include <asm/segment.h>
|
||||
|
||||
#include <xen/interface/xen.h>
|
||||
|
||||
#define RELOC(x, v) .globl x##_reloc; x##_reloc=v
|
||||
#define ENDPATCH(x) .globl x##_end; x##_end=.
|
||||
|
||||
/* Pseudo-flag used for virtual NMI, which we don't implement yet */
|
||||
#define XEN_EFLAGS_NMI 0x80000000
|
||||
#include "xen-asm.h"
|
||||
|
||||
/*
|
||||
Enable events. This clears the event mask and tests the pending
|
||||
event status with one and operation. If there are pending
|
||||
events, then enter the hypervisor to get them handled.
|
||||
Force an event check by making a hypercall,
|
||||
but preserve regs before making the call.
|
||||
*/
|
||||
ENTRY(xen_irq_enable_direct)
|
||||
/* Unmask events */
|
||||
movb $0, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask
|
||||
|
||||
/* Preempt here doesn't matter because that will deal with
|
||||
any pending interrupts. The pending check may end up being
|
||||
run on the wrong CPU, but that doesn't hurt. */
|
||||
|
||||
/* Test for pending */
|
||||
testb $0xff, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_pending
|
||||
jz 1f
|
||||
|
||||
2: call check_events
|
||||
1:
|
||||
ENDPATCH(xen_irq_enable_direct)
|
||||
check_events:
|
||||
push %eax
|
||||
push %ecx
|
||||
push %edx
|
||||
call xen_force_evtchn_callback
|
||||
pop %edx
|
||||
pop %ecx
|
||||
pop %eax
|
||||
ret
|
||||
ENDPROC(xen_irq_enable_direct)
|
||||
RELOC(xen_irq_enable_direct, 2b+1)
|
||||
|
||||
|
||||
/*
|
||||
Disabling events is simply a matter of making the event mask
|
||||
non-zero.
|
||||
*/
|
||||
ENTRY(xen_irq_disable_direct)
|
||||
movb $1, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask
|
||||
ENDPATCH(xen_irq_disable_direct)
|
||||
ret
|
||||
ENDPROC(xen_irq_disable_direct)
|
||||
RELOC(xen_irq_disable_direct, 0)
|
||||
|
||||
/*
|
||||
(xen_)save_fl is used to get the current interrupt enable status.
|
||||
Callers expect the status to be in X86_EFLAGS_IF, and other bits
|
||||
may be set in the return value. We take advantage of this by
|
||||
making sure that X86_EFLAGS_IF has the right value (and other bits
|
||||
in that byte are 0), but other bits in the return value are
|
||||
undefined. We need to toggle the state of the bit, because
|
||||
Xen and x86 use opposite senses (mask vs enable).
|
||||
*/
|
||||
ENTRY(xen_save_fl_direct)
|
||||
testb $0xff, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask
|
||||
setz %ah
|
||||
addb %ah,%ah
|
||||
ENDPATCH(xen_save_fl_direct)
|
||||
ret
|
||||
ENDPROC(xen_save_fl_direct)
|
||||
RELOC(xen_save_fl_direct, 0)
|
||||
|
||||
|
||||
/*
|
||||
In principle the caller should be passing us a value return
|
||||
from xen_save_fl_direct, but for robustness sake we test only
|
||||
the X86_EFLAGS_IF flag rather than the whole byte. After
|
||||
setting the interrupt mask state, it checks for unmasked
|
||||
pending events and enters the hypervisor to get them delivered
|
||||
if so.
|
||||
*/
|
||||
ENTRY(xen_restore_fl_direct)
|
||||
testb $X86_EFLAGS_IF>>8, %ah
|
||||
setz PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask
|
||||
/* Preempt here doesn't matter because that will deal with
|
||||
any pending interrupts. The pending check may end up being
|
||||
run on the wrong CPU, but that doesn't hurt. */
|
||||
|
||||
/* check for unmasked and pending */
|
||||
cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_pending
|
||||
jz 1f
|
||||
2: call check_events
|
||||
1:
|
||||
ENDPATCH(xen_restore_fl_direct)
|
||||
ret
|
||||
ENDPROC(xen_restore_fl_direct)
|
||||
RELOC(xen_restore_fl_direct, 2b+1)
|
||||
|
||||
/*
|
||||
We can't use sysexit directly, because we're not running in ring0.
|
||||
@ -289,17 +216,3 @@ ENTRY(xen_iret_crit_fixup)
|
||||
lea 4(%edi),%esp /* point esp to new frame */
|
||||
2: jmp xen_do_upcall
|
||||
|
||||
|
||||
/*
|
||||
Force an event check by making a hypercall,
|
||||
but preserve regs before making the call.
|
||||
*/
|
||||
check_events:
|
||||
push %eax
|
||||
push %ecx
|
||||
push %edx
|
||||
call xen_force_evtchn_callback
|
||||
pop %edx
|
||||
pop %ecx
|
||||
pop %eax
|
||||
ret
|
||||
|
@ -11,142 +11,14 @@
|
||||
generally too large to inline anyway.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
#include <asm/asm-offsets.h>
|
||||
#include <asm/processor-flags.h>
|
||||
#include <asm/errno.h>
|
||||
#include <asm/segment.h>
|
||||
#include <asm/percpu.h>
|
||||
#include <asm/processor-flags.h>
|
||||
#include <asm/segment.h>
|
||||
|
||||
#include <xen/interface/xen.h>
|
||||
|
||||
#define RELOC(x, v) .globl x##_reloc; x##_reloc=v
|
||||
#define ENDPATCH(x) .globl x##_end; x##_end=.
|
||||
|
||||
/* Pseudo-flag used for virtual NMI, which we don't implement yet */
|
||||
#define XEN_EFLAGS_NMI 0x80000000
|
||||
|
||||
#if 1
|
||||
/*
|
||||
FIXME: x86_64 now can support direct access to percpu variables
|
||||
via a segment override. Update xen accordingly.
|
||||
*/
|
||||
#define BUG ud2a
|
||||
#endif
|
||||
|
||||
/*
|
||||
Enable events. This clears the event mask and tests the pending
|
||||
event status with one and operation. If there are pending
|
||||
events, then enter the hypervisor to get them handled.
|
||||
*/
|
||||
ENTRY(xen_irq_enable_direct)
|
||||
BUG
|
||||
|
||||
/* Unmask events */
|
||||
movb $0, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
|
||||
|
||||
/* Preempt here doesn't matter because that will deal with
|
||||
any pending interrupts. The pending check may end up being
|
||||
run on the wrong CPU, but that doesn't hurt. */
|
||||
|
||||
/* Test for pending */
|
||||
testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
|
||||
jz 1f
|
||||
|
||||
2: call check_events
|
||||
1:
|
||||
ENDPATCH(xen_irq_enable_direct)
|
||||
ret
|
||||
ENDPROC(xen_irq_enable_direct)
|
||||
RELOC(xen_irq_enable_direct, 2b+1)
|
||||
|
||||
/*
|
||||
Disabling events is simply a matter of making the event mask
|
||||
non-zero.
|
||||
*/
|
||||
ENTRY(xen_irq_disable_direct)
|
||||
BUG
|
||||
|
||||
movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
|
||||
ENDPATCH(xen_irq_disable_direct)
|
||||
ret
|
||||
ENDPROC(xen_irq_disable_direct)
|
||||
RELOC(xen_irq_disable_direct, 0)
|
||||
|
||||
/*
|
||||
(xen_)save_fl is used to get the current interrupt enable status.
|
||||
Callers expect the status to be in X86_EFLAGS_IF, and other bits
|
||||
may be set in the return value. We take advantage of this by
|
||||
making sure that X86_EFLAGS_IF has the right value (and other bits
|
||||
in that byte are 0), but other bits in the return value are
|
||||
undefined. We need to toggle the state of the bit, because
|
||||
Xen and x86 use opposite senses (mask vs enable).
|
||||
*/
|
||||
ENTRY(xen_save_fl_direct)
|
||||
BUG
|
||||
|
||||
testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
|
||||
setz %ah
|
||||
addb %ah,%ah
|
||||
ENDPATCH(xen_save_fl_direct)
|
||||
ret
|
||||
ENDPROC(xen_save_fl_direct)
|
||||
RELOC(xen_save_fl_direct, 0)
|
||||
|
||||
/*
|
||||
In principle the caller should be passing us a value return
|
||||
from xen_save_fl_direct, but for robustness sake we test only
|
||||
the X86_EFLAGS_IF flag rather than the whole byte. After
|
||||
setting the interrupt mask state, it checks for unmasked
|
||||
pending events and enters the hypervisor to get them delivered
|
||||
if so.
|
||||
*/
|
||||
ENTRY(xen_restore_fl_direct)
|
||||
BUG
|
||||
|
||||
testb $X86_EFLAGS_IF>>8, %ah
|
||||
setz PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
|
||||
/* Preempt here doesn't matter because that will deal with
|
||||
any pending interrupts. The pending check may end up being
|
||||
run on the wrong CPU, but that doesn't hurt. */
|
||||
|
||||
/* check for unmasked and pending */
|
||||
cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
|
||||
jz 1f
|
||||
2: call check_events
|
||||
1:
|
||||
ENDPATCH(xen_restore_fl_direct)
|
||||
ret
|
||||
ENDPROC(xen_restore_fl_direct)
|
||||
RELOC(xen_restore_fl_direct, 2b+1)
|
||||
|
||||
|
||||
/*
|
||||
Force an event check by making a hypercall,
|
||||
but preserve regs before making the call.
|
||||
*/
|
||||
check_events:
|
||||
push %rax
|
||||
push %rcx
|
||||
push %rdx
|
||||
push %rsi
|
||||
push %rdi
|
||||
push %r8
|
||||
push %r9
|
||||
push %r10
|
||||
push %r11
|
||||
call xen_force_evtchn_callback
|
||||
pop %r11
|
||||
pop %r10
|
||||
pop %r9
|
||||
pop %r8
|
||||
pop %rdi
|
||||
pop %rsi
|
||||
pop %rdx
|
||||
pop %rcx
|
||||
pop %rax
|
||||
ret
|
||||
#include "xen-asm.h"
|
||||
|
||||
ENTRY(xen_adjust_exception_frame)
|
||||
mov 8+0(%rsp),%rcx
|
||||
|
@ -10,9 +10,12 @@
|
||||
extern const char xen_hypervisor_callback[];
|
||||
extern const char xen_failsafe_callback[];
|
||||
|
||||
extern void *xen_initial_gdt;
|
||||
|
||||
struct trap_info;
|
||||
void xen_copy_trap_info(struct trap_info *traps);
|
||||
|
||||
DECLARE_PER_CPU(struct vcpu_info, xen_vcpu_info);
|
||||
DECLARE_PER_CPU(unsigned long, xen_cr3);
|
||||
DECLARE_PER_CPU(unsigned long, xen_current_cr3);
|
||||
|
||||
@ -22,6 +25,13 @@ extern struct shared_info *HYPERVISOR_shared_info;
|
||||
|
||||
void xen_setup_mfn_list_list(void);
|
||||
void xen_setup_shared_info(void);
|
||||
void xen_setup_machphys_mapping(void);
|
||||
pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn);
|
||||
void xen_ident_map_ISA(void);
|
||||
void xen_reserve_top(void);
|
||||
|
||||
void xen_leave_lazy(void);
|
||||
void xen_post_allocator_init(void);
|
||||
|
||||
char * __init xen_memory_setup(void);
|
||||
void __init xen_arch_setup(void);
|
||||
|
@ -445,24 +445,22 @@
|
||||
* section in the linker script will go there too. @phdr should have
|
||||
* a leading colon.
|
||||
*
|
||||
* This macro defines three symbols, __per_cpu_load, __per_cpu_start
|
||||
* and __per_cpu_end. The first one is the vaddr of loaded percpu
|
||||
* init data. __per_cpu_start equals @vaddr and __per_cpu_end is the
|
||||
* end offset.
|
||||
* Note that this macros defines __per_cpu_load as an absolute symbol.
|
||||
* If there is no need to put the percpu section at a predetermined
|
||||
* address, use PERCPU().
|
||||
*/
|
||||
#define PERCPU_VADDR(vaddr, phdr) \
|
||||
VMLINUX_SYMBOL(__per_cpu_load_abs) = .; \
|
||||
.data.percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load_abs) \
|
||||
VMLINUX_SYMBOL(__per_cpu_load) = .; \
|
||||
.data.percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load) \
|
||||
- LOAD_OFFSET) { \
|
||||
VMLINUX_SYMBOL(__per_cpu_start) = .; \
|
||||
VMLINUX_SYMBOL(__per_cpu_load) = LOADADDR(.data.percpu) + LOAD_OFFSET;\
|
||||
*(.data.percpu.first) \
|
||||
*(.data.percpu.page_aligned) \
|
||||
*(.data.percpu) \
|
||||
*(.data.percpu.shared_aligned) \
|
||||
VMLINUX_SYMBOL(__per_cpu_end) = .; \
|
||||
} phdr \
|
||||
. = VMLINUX_SYMBOL(__per_cpu_load_abs) + SIZEOF(.data.percpu);
|
||||
. = VMLINUX_SYMBOL(__per_cpu_load) + SIZEOF(.data.percpu);
|
||||
|
||||
/**
|
||||
* PERCPU - define output section for percpu area, simple version
|
||||
@ -471,7 +469,20 @@
|
||||
* Align to @align and outputs output section for percpu area. This
|
||||
* macro doesn't maniuplate @vaddr or @phdr and __per_cpu_load and
|
||||
* __per_cpu_start will be identical.
|
||||
*
|
||||
* This macro is equivalent to ALIGN(align); PERCPU_VADDR( , ) except
|
||||
* that __per_cpu_load is defined as a relative symbol against
|
||||
* .data.percpu which is required for relocatable x86_32
|
||||
* configuration.
|
||||
*/
|
||||
#define PERCPU(align) \
|
||||
. = ALIGN(align); \
|
||||
PERCPU_VADDR( , )
|
||||
.data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { \
|
||||
VMLINUX_SYMBOL(__per_cpu_load) = .; \
|
||||
VMLINUX_SYMBOL(__per_cpu_start) = .; \
|
||||
*(.data.percpu.first) \
|
||||
*(.data.percpu.page_aligned) \
|
||||
*(.data.percpu) \
|
||||
*(.data.percpu.shared_aligned) \
|
||||
VMLINUX_SYMBOL(__per_cpu_end) = .; \
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user