x86-64: Clean up vdso/kernel shared variables
Variables that are shared between the vdso and the kernel are currently a bit of a mess. They are each defined with their own magic, they are accessed differently in the kernel, the vsyscall page, and the vdso, and one of them (vsyscall_clock) doesn't even really exist. This changes them all to use a common mechanism. All of them are delcared in vvar.h with a fixed address (validated by the linker script). In the kernel (as before), they look like ordinary read-write variables. In the vsyscall page and the vdso, they are accessed through a new macro VVAR, which gives read-only access. The vdso is now loaded verbatim into memory without any fixups. As a side bonus, access from the vdso is faster because a level of indirection is removed. While we're at it, pack jiffies and vgetcpu_mode into the same cacheline. Signed-off-by: Andy Lutomirski <luto@mit.edu> Cc: Andi Kleen <andi@firstfloor.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: "David S. Miller" <davem@davemloft.net> Cc: Eric Dumazet <eric.dumazet@gmail.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Borislav Petkov <bp@amd64.org> Link: http://lkml.kernel.org/r/%3C7357882fbb51fa30491636a7b6528747301b7ee9.1306156808.git.luto%40mit.edu%3E Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
This commit is contained in:
parent
d762f43831
commit
8c49d9a74b
@ -1,20 +1,6 @@
|
||||
#ifndef _ASM_X86_VDSO_H
|
||||
#define _ASM_X86_VDSO_H
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
extern const char VDSO64_PRELINK[];
|
||||
|
||||
/*
|
||||
* Given a pointer to the vDSO image, find the pointer to VDSO64_name
|
||||
* as that symbol is defined in the vDSO sources or linker script.
|
||||
*/
|
||||
#define VDSO64_SYMBOL(base, name) \
|
||||
({ \
|
||||
extern const char VDSO64_##name[]; \
|
||||
(void *)(VDSO64_##name - VDSO64_PRELINK + (unsigned long)(base)); \
|
||||
})
|
||||
#endif
|
||||
|
||||
#if defined CONFIG_X86_32 || defined CONFIG_COMPAT
|
||||
extern const char VDSO32_PRELINK[];
|
||||
|
||||
|
@ -23,8 +23,6 @@ struct vsyscall_gtod_data {
|
||||
struct timespec wall_to_monotonic;
|
||||
struct timespec wall_time_coarse;
|
||||
};
|
||||
extern struct vsyscall_gtod_data __vsyscall_gtod_data
|
||||
__section_vsyscall_gtod_data;
|
||||
extern struct vsyscall_gtod_data vsyscall_gtod_data;
|
||||
|
||||
#endif /* _ASM_X86_VGTOD_H */
|
||||
|
@ -16,27 +16,19 @@ enum vsyscall_num {
|
||||
#ifdef __KERNEL__
|
||||
#include <linux/seqlock.h>
|
||||
|
||||
#define __section_vgetcpu_mode __attribute__ ((unused, __section__ (".vgetcpu_mode"), aligned(16)))
|
||||
#define __section_jiffies __attribute__ ((unused, __section__ (".jiffies"), aligned(16)))
|
||||
|
||||
/* Definitions for CONFIG_GENERIC_TIME definitions */
|
||||
#define __section_vsyscall_gtod_data __attribute__ \
|
||||
((unused, __section__ (".vsyscall_gtod_data"),aligned(16)))
|
||||
#define __section_vsyscall_clock __attribute__ \
|
||||
((unused, __section__ (".vsyscall_clock"),aligned(16)))
|
||||
#define __vsyscall_fn \
|
||||
__attribute__ ((unused, __section__(".vsyscall_fn"))) notrace
|
||||
|
||||
#define VGETCPU_RDTSCP 1
|
||||
#define VGETCPU_LSL 2
|
||||
|
||||
extern int __vgetcpu_mode;
|
||||
extern volatile unsigned long __jiffies;
|
||||
|
||||
/* kernel space (writeable) */
|
||||
extern int vgetcpu_mode;
|
||||
extern struct timezone sys_tz;
|
||||
|
||||
#include <asm/vvar.h>
|
||||
|
||||
extern void map_vsyscall(void);
|
||||
|
||||
#endif /* __KERNEL__ */
|
||||
|
52
arch/x86/include/asm/vvar.h
Normal file
52
arch/x86/include/asm/vvar.h
Normal file
@ -0,0 +1,52 @@
|
||||
/*
|
||||
* vvar.h: Shared vDSO/kernel variable declarations
|
||||
* Copyright (c) 2011 Andy Lutomirski
|
||||
* Subject to the GNU General Public License, version 2
|
||||
*
|
||||
* A handful of variables are accessible (read-only) from userspace
|
||||
* code in the vsyscall page and the vdso. They are declared here.
|
||||
* Some other file must define them with DEFINE_VVAR.
|
||||
*
|
||||
* In normal kernel code, they are used like any other variable.
|
||||
* In user code, they are accessed through the VVAR macro.
|
||||
*
|
||||
* Each of these variables lives in the vsyscall page, and each
|
||||
* one needs a unique offset within the little piece of the page
|
||||
* reserved for vvars. Specify that offset in DECLARE_VVAR.
|
||||
* (There are 896 bytes available. If you mess up, the linker will
|
||||
* catch it.)
|
||||
*/
|
||||
|
||||
/* Offset of vars within vsyscall page */
|
||||
#define VSYSCALL_VARS_OFFSET (3072 + 128)
|
||||
|
||||
#if defined(__VVAR_KERNEL_LDS)
|
||||
|
||||
/* The kernel linker script defines its own magic to put vvars in the
|
||||
* right place.
|
||||
*/
|
||||
#define DECLARE_VVAR(offset, type, name) \
|
||||
EMIT_VVAR(name, VSYSCALL_VARS_OFFSET + offset)
|
||||
|
||||
#else
|
||||
|
||||
#define DECLARE_VVAR(offset, type, name) \
|
||||
static type const * const vvaraddr_ ## name = \
|
||||
(void *)(VSYSCALL_START + VSYSCALL_VARS_OFFSET + (offset));
|
||||
|
||||
#define DEFINE_VVAR(type, name) \
|
||||
type __vvar_ ## name \
|
||||
__attribute__((section(".vsyscall_var_" #name), aligned(16)))
|
||||
|
||||
#define VVAR(name) (*vvaraddr_ ## name)
|
||||
|
||||
#endif
|
||||
|
||||
/* DECLARE_VVAR(offset, type, name) */
|
||||
|
||||
DECLARE_VVAR(0, volatile unsigned long, jiffies)
|
||||
DECLARE_VVAR(8, int, vgetcpu_mode)
|
||||
DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data)
|
||||
|
||||
#undef DECLARE_VVAR
|
||||
#undef VSYSCALL_VARS_OFFSET
|
@ -23,7 +23,7 @@
|
||||
#include <asm/time.h>
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
|
||||
DEFINE_VVAR(volatile unsigned long, jiffies) = INITIAL_JIFFIES;
|
||||
#endif
|
||||
|
||||
unsigned long profile_pc(struct pt_regs *regs)
|
||||
|
@ -777,8 +777,8 @@ static cycle_t __vsyscall_fn vread_tsc(void)
|
||||
ret = (cycle_t)vget_cycles();
|
||||
rdtsc_barrier();
|
||||
|
||||
return ret >= __vsyscall_gtod_data.clock.cycle_last ?
|
||||
ret : __vsyscall_gtod_data.clock.cycle_last;
|
||||
return ret >= VVAR(vsyscall_gtod_data).clock.cycle_last ?
|
||||
ret : VVAR(vsyscall_gtod_data).clock.cycle_last;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -161,6 +161,12 @@ SECTIONS
|
||||
|
||||
#define VVIRT_OFFSET (VSYSCALL_ADDR - __vsyscall_0)
|
||||
#define VVIRT(x) (ADDR(x) - VVIRT_OFFSET)
|
||||
#define EMIT_VVAR(x, offset) .vsyscall_var_ ## x \
|
||||
ADDR(.vsyscall_0) + offset \
|
||||
: AT(VLOAD(.vsyscall_var_ ## x)) { \
|
||||
*(.vsyscall_var_ ## x) \
|
||||
} \
|
||||
x = VVIRT(.vsyscall_var_ ## x);
|
||||
|
||||
. = ALIGN(4096);
|
||||
__vsyscall_0 = .;
|
||||
@ -175,18 +181,6 @@ SECTIONS
|
||||
*(.vsyscall_fn)
|
||||
}
|
||||
|
||||
. = ALIGN(L1_CACHE_BYTES);
|
||||
.vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data)) {
|
||||
*(.vsyscall_gtod_data)
|
||||
}
|
||||
|
||||
vsyscall_gtod_data = VVIRT(.vsyscall_gtod_data);
|
||||
.vsyscall_clock : AT(VLOAD(.vsyscall_clock)) {
|
||||
*(.vsyscall_clock)
|
||||
}
|
||||
vsyscall_clock = VVIRT(.vsyscall_clock);
|
||||
|
||||
|
||||
.vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1)) {
|
||||
*(.vsyscall_1)
|
||||
}
|
||||
@ -194,21 +188,14 @@ SECTIONS
|
||||
*(.vsyscall_2)
|
||||
}
|
||||
|
||||
.vgetcpu_mode : AT(VLOAD(.vgetcpu_mode)) {
|
||||
*(.vgetcpu_mode)
|
||||
}
|
||||
vgetcpu_mode = VVIRT(.vgetcpu_mode);
|
||||
|
||||
. = ALIGN(L1_CACHE_BYTES);
|
||||
.jiffies : AT(VLOAD(.jiffies)) {
|
||||
*(.jiffies)
|
||||
}
|
||||
jiffies = VVIRT(.jiffies);
|
||||
|
||||
.vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3)) {
|
||||
*(.vsyscall_3)
|
||||
}
|
||||
|
||||
#define __VVAR_KERNEL_LDS
|
||||
#include <asm/vvar.h>
|
||||
#undef __VVAR_KERNEL_LDS
|
||||
|
||||
. = __vsyscall_0 + PAGE_SIZE;
|
||||
|
||||
#undef VSYSCALL_ADDR
|
||||
@ -216,6 +203,7 @@ SECTIONS
|
||||
#undef VLOAD
|
||||
#undef VVIRT_OFFSET
|
||||
#undef VVIRT
|
||||
#undef EMIT_VVAR
|
||||
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
|
@ -49,15 +49,8 @@
|
||||
__attribute__ ((unused, __section__(".vsyscall_" #nr))) notrace
|
||||
#define __syscall_clobber "r11","cx","memory"
|
||||
|
||||
/*
|
||||
* vsyscall_gtod_data contains data that is :
|
||||
* - readonly from vsyscalls
|
||||
* - written by timer interrupt or systcl (/proc/sys/kernel/vsyscall64)
|
||||
* Try to keep this structure as small as possible to avoid cache line ping pongs
|
||||
*/
|
||||
int __vgetcpu_mode __section_vgetcpu_mode;
|
||||
|
||||
struct vsyscall_gtod_data __vsyscall_gtod_data __section_vsyscall_gtod_data =
|
||||
DEFINE_VVAR(int, vgetcpu_mode);
|
||||
DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) =
|
||||
{
|
||||
.lock = SEQLOCK_UNLOCKED,
|
||||
.sysctl_enabled = 1,
|
||||
@ -97,7 +90,7 @@ void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,
|
||||
*/
|
||||
static __always_inline void do_get_tz(struct timezone * tz)
|
||||
{
|
||||
*tz = __vsyscall_gtod_data.sys_tz;
|
||||
*tz = VVAR(vsyscall_gtod_data).sys_tz;
|
||||
}
|
||||
|
||||
static __always_inline int gettimeofday(struct timeval *tv, struct timezone *tz)
|
||||
@ -126,23 +119,24 @@ static __always_inline void do_vgettimeofday(struct timeval * tv)
|
||||
unsigned long mult, shift, nsec;
|
||||
cycle_t (*vread)(void);
|
||||
do {
|
||||
seq = read_seqbegin(&__vsyscall_gtod_data.lock);
|
||||
seq = read_seqbegin(&VVAR(vsyscall_gtod_data).lock);
|
||||
|
||||
vread = __vsyscall_gtod_data.clock.vread;
|
||||
if (unlikely(!__vsyscall_gtod_data.sysctl_enabled || !vread)) {
|
||||
vread = VVAR(vsyscall_gtod_data).clock.vread;
|
||||
if (unlikely(!VVAR(vsyscall_gtod_data).sysctl_enabled ||
|
||||
!vread)) {
|
||||
gettimeofday(tv,NULL);
|
||||
return;
|
||||
}
|
||||
|
||||
now = vread();
|
||||
base = __vsyscall_gtod_data.clock.cycle_last;
|
||||
mask = __vsyscall_gtod_data.clock.mask;
|
||||
mult = __vsyscall_gtod_data.clock.mult;
|
||||
shift = __vsyscall_gtod_data.clock.shift;
|
||||
base = VVAR(vsyscall_gtod_data).clock.cycle_last;
|
||||
mask = VVAR(vsyscall_gtod_data).clock.mask;
|
||||
mult = VVAR(vsyscall_gtod_data).clock.mult;
|
||||
shift = VVAR(vsyscall_gtod_data).clock.shift;
|
||||
|
||||
tv->tv_sec = __vsyscall_gtod_data.wall_time_sec;
|
||||
nsec = __vsyscall_gtod_data.wall_time_nsec;
|
||||
} while (read_seqretry(&__vsyscall_gtod_data.lock, seq));
|
||||
tv->tv_sec = VVAR(vsyscall_gtod_data).wall_time_sec;
|
||||
nsec = VVAR(vsyscall_gtod_data).wall_time_nsec;
|
||||
} while (read_seqretry(&VVAR(vsyscall_gtod_data).lock, seq));
|
||||
|
||||
/* calculate interval: */
|
||||
cycle_delta = (now - base) & mask;
|
||||
@ -171,15 +165,15 @@ time_t __vsyscall(1) vtime(time_t *t)
|
||||
{
|
||||
unsigned seq;
|
||||
time_t result;
|
||||
if (unlikely(!__vsyscall_gtod_data.sysctl_enabled))
|
||||
if (unlikely(!VVAR(vsyscall_gtod_data).sysctl_enabled))
|
||||
return time_syscall(t);
|
||||
|
||||
do {
|
||||
seq = read_seqbegin(&__vsyscall_gtod_data.lock);
|
||||
seq = read_seqbegin(&VVAR(vsyscall_gtod_data).lock);
|
||||
|
||||
result = __vsyscall_gtod_data.wall_time_sec;
|
||||
result = VVAR(vsyscall_gtod_data).wall_time_sec;
|
||||
|
||||
} while (read_seqretry(&__vsyscall_gtod_data.lock, seq));
|
||||
} while (read_seqretry(&VVAR(vsyscall_gtod_data).lock, seq));
|
||||
|
||||
if (t)
|
||||
*t = result;
|
||||
@ -208,9 +202,9 @@ vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
|
||||
We do this here because otherwise user space would do it on
|
||||
its own in a likely inferior way (no access to jiffies).
|
||||
If you don't like it pass NULL. */
|
||||
if (tcache && tcache->blob[0] == (j = __jiffies)) {
|
||||
if (tcache && tcache->blob[0] == (j = VVAR(jiffies))) {
|
||||
p = tcache->blob[1];
|
||||
} else if (__vgetcpu_mode == VGETCPU_RDTSCP) {
|
||||
} else if (VVAR(vgetcpu_mode) == VGETCPU_RDTSCP) {
|
||||
/* Load per CPU data from RDTSCP */
|
||||
native_read_tscp(&p);
|
||||
} else {
|
||||
|
@ -11,7 +11,7 @@ vdso-install-$(VDSO32-y) += $(vdso32-images)
|
||||
|
||||
|
||||
# files to link into the vdso
|
||||
vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o vvar.o
|
||||
vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o
|
||||
|
||||
# files to link into kernel
|
||||
obj-$(VDSO64-y) += vma.o vdso.o
|
||||
|
@ -22,9 +22,8 @@
|
||||
#include <asm/hpet.h>
|
||||
#include <asm/unistd.h>
|
||||
#include <asm/io.h>
|
||||
#include "vextern.h"
|
||||
|
||||
#define gtod vdso_vsyscall_gtod_data
|
||||
#define gtod (&VVAR(vsyscall_gtod_data))
|
||||
|
||||
notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
|
||||
{
|
||||
|
@ -28,10 +28,3 @@ VERSION {
|
||||
}
|
||||
|
||||
VDSO64_PRELINK = VDSO_PRELINK;
|
||||
|
||||
/*
|
||||
* Define VDSO64_x for each VEXTERN(x), for use via VDSO64_SYMBOL.
|
||||
*/
|
||||
#define VEXTERN(x) VDSO64_ ## x = vdso_ ## x;
|
||||
#include "vextern.h"
|
||||
#undef VEXTERN
|
||||
|
@ -1,16 +0,0 @@
|
||||
#ifndef VEXTERN
|
||||
#include <asm/vsyscall.h>
|
||||
#define VEXTERN(x) \
|
||||
extern typeof(x) *vdso_ ## x __attribute__((visibility("hidden")));
|
||||
#endif
|
||||
|
||||
#define VMAGIC 0xfeedbabeabcdefabUL
|
||||
|
||||
/* Any kernel variables used in the vDSO must be exported in the main
|
||||
kernel's vmlinux.lds.S/vsyscall.h/proper __section and
|
||||
put into vextern.h and be referenced as a pointer with vdso prefix.
|
||||
The main kernel later fills in the values. */
|
||||
|
||||
VEXTERN(jiffies)
|
||||
VEXTERN(vgetcpu_mode)
|
||||
VEXTERN(vsyscall_gtod_data)
|
@ -11,14 +11,13 @@
|
||||
#include <linux/time.h>
|
||||
#include <asm/vsyscall.h>
|
||||
#include <asm/vgtod.h>
|
||||
#include "vextern.h"
|
||||
|
||||
notrace long
|
||||
__vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused)
|
||||
{
|
||||
unsigned int p;
|
||||
|
||||
if (*vdso_vgetcpu_mode == VGETCPU_RDTSCP) {
|
||||
if (VVAR(vgetcpu_mode) == VGETCPU_RDTSCP) {
|
||||
/* Load per CPU data from RDTSCP */
|
||||
native_read_tscp(&p);
|
||||
} else {
|
||||
|
@ -15,9 +15,6 @@
|
||||
#include <asm/proto.h>
|
||||
#include <asm/vdso.h>
|
||||
|
||||
#include "vextern.h" /* Just for VMAGIC. */
|
||||
#undef VEXTERN
|
||||
|
||||
unsigned int __read_mostly vdso_enabled = 1;
|
||||
|
||||
extern char vdso_start[], vdso_end[];
|
||||
@ -26,20 +23,10 @@ extern unsigned short vdso_sync_cpuid;
|
||||
static struct page **vdso_pages;
|
||||
static unsigned vdso_size;
|
||||
|
||||
static inline void *var_ref(void *p, char *name)
|
||||
{
|
||||
if (*(void **)p != (void *)VMAGIC) {
|
||||
printk("VDSO: variable %s broken\n", name);
|
||||
vdso_enabled = 0;
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
static int __init init_vdso_vars(void)
|
||||
{
|
||||
int npages = (vdso_end - vdso_start + PAGE_SIZE - 1) / PAGE_SIZE;
|
||||
int i;
|
||||
char *vbase;
|
||||
|
||||
vdso_size = npages << PAGE_SHIFT;
|
||||
vdso_pages = kmalloc(sizeof(struct page *) * npages, GFP_KERNEL);
|
||||
@ -54,20 +41,6 @@ static int __init init_vdso_vars(void)
|
||||
copy_page(page_address(p), vdso_start + i*PAGE_SIZE);
|
||||
}
|
||||
|
||||
vbase = vmap(vdso_pages, npages, 0, PAGE_KERNEL);
|
||||
if (!vbase)
|
||||
goto oom;
|
||||
|
||||
if (memcmp(vbase, "\177ELF", 4)) {
|
||||
printk("VDSO: I'm broken; not ELF\n");
|
||||
vdso_enabled = 0;
|
||||
}
|
||||
|
||||
#define VEXTERN(x) \
|
||||
*(typeof(__ ## x) **) var_ref(VDSO64_SYMBOL(vbase, x), #x) = &__ ## x;
|
||||
#include "vextern.h"
|
||||
#undef VEXTERN
|
||||
vunmap(vbase);
|
||||
return 0;
|
||||
|
||||
oom:
|
||||
|
@ -1,12 +0,0 @@
|
||||
/* Define pointer to external vDSO variables.
|
||||
These are part of the vDSO. The kernel fills in the real addresses
|
||||
at boot time. This is done because when the vdso is linked the
|
||||
kernel isn't yet and we don't know the final addresses. */
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/time.h>
|
||||
#include <asm/vsyscall.h>
|
||||
#include <asm/timex.h>
|
||||
#include <asm/vgtod.h>
|
||||
|
||||
#define VEXTERN(x) typeof (__ ## x) *const vdso_ ## x = (void *)VMAGIC;
|
||||
#include "vextern.h"
|
Loading…
Reference in New Issue
Block a user