kernel_optimize_test/arch/arm/kernel/traps.c
Nicolas Pitre b49c0f24cf [ARM] 4659/1: remove possibilities for spurious false negative with __kuser_cmpxchg
The ARM __kuser_cmpxchg routine is meant to implement an atomic cmpxchg
in user space.  It however can produce spurious false negative if a
processor exception occurs in the middle of the operation.  Normally
this is not a problem since cmpxchg is typically called in a loop until
it succeeds to implement an atomic increment for example.

Some use cases which don't involve a loop require that the operation be
100% reliable though.  This patch changes the implementation so to
reattempt the operation after an exception has occurred in the critical
section rather than abort it.

Here's a simple program to test the fix (don't use CONFIG_NO_HZ in your
kernel as this depends on a sufficiently high interrupt rate):

	#include <stdio.h>

	typedef int (__kernel_cmpxchg_t)(int oldval, int newval, int *ptr);
	#define __kernel_cmpxchg (*(__kernel_cmpxchg_t *)0xffff0fc0)

	int main()
	{
		int i, x = 0;
		for (i = 0; i < 100000000; i++) {
			int v = x;
			if (__kernel_cmpxchg(v, v+1, &x))
				printf("failed at %d: %d vs %d\n", i, v, x);
		}
		printf("done with %d vs %d\n", i, x);
		return 0;
	}

Signed-off-by: Nicolas Pitre <nico@marvell.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2007-11-26 19:43:58 +00:00

733 lines
18 KiB
C

/*
* linux/arch/arm/kernel/traps.c
*
* Copyright (C) 1995-2002 Russell King
* Fragments that appear the same as linux/arch/i386/kernel/traps.c (C) Linus Torvalds
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* 'traps.c' handles hardware exceptions after we have saved some state in
* 'linux/arch/arm/lib/traps.S'. Mostly a debugging aid, but will probably
* kill the offending process.
*/
#include <linux/module.h>
#include <linux/signal.h>
#include <linux/spinlock.h>
#include <linux/personality.h>
#include <linux/kallsyms.h>
#include <linux/delay.h>
#include <linux/init.h>
#include <asm/atomic.h>
#include <asm/cacheflush.h>
#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
#include <asm/traps.h>
#include <asm/io.h>
#include "ptrace.h"
#include "signal.h"
static const char *handler[]= { "prefetch abort", "data abort", "address exception", "interrupt" };
#ifdef CONFIG_DEBUG_USER
unsigned int user_debug;
static int __init user_debug_setup(char *str)
{
get_option(&str, &user_debug);
return 1;
}
__setup("user_debug=", user_debug_setup);
#endif
static void dump_mem(const char *str, unsigned long bottom, unsigned long top);
static inline int in_exception_text(unsigned long ptr)
{
extern char __exception_text_start[];
extern char __exception_text_end[];
return ptr >= (unsigned long)&__exception_text_start &&
ptr < (unsigned long)&__exception_text_end;
}
void dump_backtrace_entry(unsigned long where, unsigned long from, unsigned long frame)
{
#ifdef CONFIG_KALLSYMS
printk("[<%08lx>] ", where);
print_symbol("(%s) ", where);
printk("from [<%08lx>] ", from);
print_symbol("(%s)\n", from);
#else
printk("Function entered at [<%08lx>] from [<%08lx>]\n", where, from);
#endif
if (in_exception_text(where))
dump_mem("Exception stack", frame + 4, frame + 4 + sizeof(struct pt_regs));
}
/*
* Stack pointers should always be within the kernels view of
* physical memory. If it is not there, then we can't dump
* out any information relating to the stack.
*/
static int verify_stack(unsigned long sp)
{
if (sp < PAGE_OFFSET || (sp > (unsigned long)high_memory && high_memory != 0))
return -EFAULT;
return 0;
}
/*
* Dump out the contents of some memory nicely...
*/
static void dump_mem(const char *str, unsigned long bottom, unsigned long top)
{
unsigned long p = bottom & ~31;
mm_segment_t fs;
int i;
/*
* We need to switch to kernel mode so that we can use __get_user
* to safely read from kernel space. Note that we now dump the
* code first, just in case the backtrace kills us.
*/
fs = get_fs();
set_fs(KERNEL_DS);
printk("%s(0x%08lx to 0x%08lx)\n", str, bottom, top);
for (p = bottom & ~31; p < top;) {
printk("%04lx: ", p & 0xffff);
for (i = 0; i < 8; i++, p += 4) {
unsigned int val;
if (p < bottom || p >= top)
printk(" ");
else {
__get_user(val, (unsigned long *)p);
printk("%08x ", val);
}
}
printk ("\n");
}
set_fs(fs);
}
static void dump_instr(struct pt_regs *regs)
{
unsigned long addr = instruction_pointer(regs);
const int thumb = thumb_mode(regs);
const int width = thumb ? 4 : 8;
mm_segment_t fs;
int i;
/*
* We need to switch to kernel mode so that we can use __get_user
* to safely read from kernel space. Note that we now dump the
* code first, just in case the backtrace kills us.
*/
fs = get_fs();
set_fs(KERNEL_DS);
printk("Code: ");
for (i = -4; i < 1; i++) {
unsigned int val, bad;
if (thumb)
bad = __get_user(val, &((u16 *)addr)[i]);
else
bad = __get_user(val, &((u32 *)addr)[i]);
if (!bad)
printk(i == 0 ? "(%0*x) " : "%0*x ", width, val);
else {
printk("bad PC value.");
break;
}
}
printk("\n");
set_fs(fs);
}
static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
{
unsigned int fp;
int ok = 1;
printk("Backtrace: ");
fp = regs->ARM_fp;
if (!fp) {
printk("no frame pointer");
ok = 0;
} else if (verify_stack(fp)) {
printk("invalid frame pointer 0x%08x", fp);
ok = 0;
} else if (fp < (unsigned long)end_of_stack(tsk))
printk("frame pointer underflow");
printk("\n");
if (ok)
c_backtrace(fp, processor_mode(regs));
}
void dump_stack(void)
{
__backtrace();
}
EXPORT_SYMBOL(dump_stack);
void show_stack(struct task_struct *tsk, unsigned long *sp)
{
unsigned long fp;
if (!tsk)
tsk = current;
if (tsk != current)
fp = thread_saved_fp(tsk);
else
asm("mov %0, fp" : "=r" (fp) : : "cc");
c_backtrace(fp, 0x10);
barrier();
}
#ifdef CONFIG_PREEMPT
#define S_PREEMPT " PREEMPT"
#else
#define S_PREEMPT ""
#endif
#ifdef CONFIG_SMP
#define S_SMP " SMP"
#else
#define S_SMP ""
#endif
static void __die(const char *str, int err, struct thread_info *thread, struct pt_regs *regs)
{
struct task_struct *tsk = thread->task;
static int die_counter;
printk("Internal error: %s: %x [#%d]" S_PREEMPT S_SMP "\n",
str, err, ++die_counter);
print_modules();
__show_regs(regs);
printk("Process %s (pid: %d, stack limit = 0x%p)\n",
tsk->comm, task_pid_nr(tsk), thread + 1);
if (!user_mode(regs) || in_interrupt()) {
dump_mem("Stack: ", regs->ARM_sp,
THREAD_SIZE + (unsigned long)task_stack_page(tsk));
dump_backtrace(regs, tsk);
dump_instr(regs);
}
}
DEFINE_SPINLOCK(die_lock);
/*
* This function is protected against re-entrancy.
*/
NORET_TYPE void die(const char *str, struct pt_regs *regs, int err)
{
struct thread_info *thread = current_thread_info();
oops_enter();
console_verbose();
spin_lock_irq(&die_lock);
bust_spinlocks(1);
__die(str, err, thread, regs);
bust_spinlocks(0);
add_taint(TAINT_DIE);
spin_unlock_irq(&die_lock);
if (in_interrupt())
panic("Fatal exception in interrupt");
if (panic_on_oops)
panic("Fatal exception");
oops_exit();
do_exit(SIGSEGV);
}
void arm_notify_die(const char *str, struct pt_regs *regs,
struct siginfo *info, unsigned long err, unsigned long trap)
{
if (user_mode(regs)) {
current->thread.error_code = err;
current->thread.trap_no = trap;
force_sig_info(info->si_signo, info, current);
} else {
die(str, regs, err);
}
}
static LIST_HEAD(undef_hook);
static DEFINE_SPINLOCK(undef_lock);
void register_undef_hook(struct undef_hook *hook)
{
unsigned long flags;
spin_lock_irqsave(&undef_lock, flags);
list_add(&hook->node, &undef_hook);
spin_unlock_irqrestore(&undef_lock, flags);
}
void unregister_undef_hook(struct undef_hook *hook)
{
unsigned long flags;
spin_lock_irqsave(&undef_lock, flags);
list_del(&hook->node);
spin_unlock_irqrestore(&undef_lock, flags);
}
asmlinkage void __exception do_undefinstr(struct pt_regs *regs)
{
unsigned int correction = thumb_mode(regs) ? 2 : 4;
unsigned int instr;
struct undef_hook *hook;
siginfo_t info;
void __user *pc;
unsigned long flags;
/*
* According to the ARM ARM, PC is 2 or 4 bytes ahead,
* depending whether we're in Thumb mode or not.
* Correct this offset.
*/
regs->ARM_pc -= correction;
pc = (void __user *)instruction_pointer(regs);
if (processor_mode(regs) == SVC_MODE) {
instr = *(u32 *) pc;
} else if (thumb_mode(regs)) {
get_user(instr, (u16 __user *)pc);
} else {
get_user(instr, (u32 __user *)pc);
}
spin_lock_irqsave(&undef_lock, flags);
list_for_each_entry(hook, &undef_hook, node) {
if ((instr & hook->instr_mask) == hook->instr_val &&
(regs->ARM_cpsr & hook->cpsr_mask) == hook->cpsr_val) {
if (hook->fn(regs, instr) == 0) {
spin_unlock_irqrestore(&undef_lock, flags);
return;
}
}
}
spin_unlock_irqrestore(&undef_lock, flags);
#ifdef CONFIG_DEBUG_USER
if (user_debug & UDBG_UNDEFINED) {
printk(KERN_INFO "%s (%d): undefined instruction: pc=%p\n",
current->comm, task_pid_nr(current), pc);
dump_instr(regs);
}
#endif
info.si_signo = SIGILL;
info.si_errno = 0;
info.si_code = ILL_ILLOPC;
info.si_addr = pc;
arm_notify_die("Oops - undefined instruction", regs, &info, 0, 6);
}
asmlinkage void do_unexp_fiq (struct pt_regs *regs)
{
printk("Hmm. Unexpected FIQ received, but trying to continue\n");
printk("You may have a hardware problem...\n");
}
/*
* bad_mode handles the impossible case in the vectors. If you see one of
* these, then it's extremely serious, and could mean you have buggy hardware.
* It never returns, and never tries to sync. We hope that we can at least
* dump out some state information...
*/
asmlinkage void bad_mode(struct pt_regs *regs, int reason)
{
console_verbose();
printk(KERN_CRIT "Bad mode in %s handler detected\n", handler[reason]);
die("Oops - bad mode", regs, 0);
local_irq_disable();
panic("bad mode");
}
static int bad_syscall(int n, struct pt_regs *regs)
{
struct thread_info *thread = current_thread_info();
siginfo_t info;
if (current->personality != PER_LINUX &&
current->personality != PER_LINUX_32BIT &&
thread->exec_domain->handler) {
thread->exec_domain->handler(n, regs);
return regs->ARM_r0;
}
#ifdef CONFIG_DEBUG_USER
if (user_debug & UDBG_SYSCALL) {
printk(KERN_ERR "[%d] %s: obsolete system call %08x.\n",
task_pid_nr(current), current->comm, n);
dump_instr(regs);
}
#endif
info.si_signo = SIGILL;
info.si_errno = 0;
info.si_code = ILL_ILLTRP;
info.si_addr = (void __user *)instruction_pointer(regs) -
(thumb_mode(regs) ? 2 : 4);
arm_notify_die("Oops - bad syscall", regs, &info, n, 0);
return regs->ARM_r0;
}
static inline void
do_cache_op(unsigned long start, unsigned long end, int flags)
{
struct vm_area_struct *vma;
if (end < start || flags)
return;
vma = find_vma(current->active_mm, start);
if (vma && vma->vm_start < end) {
if (start < vma->vm_start)
start = vma->vm_start;
if (end > vma->vm_end)
end = vma->vm_end;
flush_cache_user_range(vma, start, end);
}
}
/*
* Handle all unrecognised system calls.
* 0x9f0000 - 0x9fffff are some more esoteric system calls
*/
#define NR(x) ((__ARM_NR_##x) - __ARM_NR_BASE)
asmlinkage int arm_syscall(int no, struct pt_regs *regs)
{
struct thread_info *thread = current_thread_info();
siginfo_t info;
if ((no >> 16) != (__ARM_NR_BASE>> 16))
return bad_syscall(no, regs);
switch (no & 0xffff) {
case 0: /* branch through 0 */
info.si_signo = SIGSEGV;
info.si_errno = 0;
info.si_code = SEGV_MAPERR;
info.si_addr = NULL;
arm_notify_die("branch through zero", regs, &info, 0, 0);
return 0;
case NR(breakpoint): /* SWI BREAK_POINT */
regs->ARM_pc -= thumb_mode(regs) ? 2 : 4;
ptrace_break(current, regs);
return regs->ARM_r0;
/*
* Flush a region from virtual address 'r0' to virtual address 'r1'
* _exclusive_. There is no alignment requirement on either address;
* user space does not need to know the hardware cache layout.
*
* r2 contains flags. It should ALWAYS be passed as ZERO until it
* is defined to be something else. For now we ignore it, but may
* the fires of hell burn in your belly if you break this rule. ;)
*
* (at a later date, we may want to allow this call to not flush
* various aspects of the cache. Passing '0' will guarantee that
* everything necessary gets flushed to maintain consistency in
* the specified region).
*/
case NR(cacheflush):
do_cache_op(regs->ARM_r0, regs->ARM_r1, regs->ARM_r2);
return 0;
case NR(usr26):
if (!(elf_hwcap & HWCAP_26BIT))
break;
regs->ARM_cpsr &= ~MODE32_BIT;
return regs->ARM_r0;
case NR(usr32):
if (!(elf_hwcap & HWCAP_26BIT))
break;
regs->ARM_cpsr |= MODE32_BIT;
return regs->ARM_r0;
case NR(set_tls):
thread->tp_value = regs->ARM_r0;
#if defined(CONFIG_HAS_TLS_REG)
asm ("mcr p15, 0, %0, c13, c0, 3" : : "r" (regs->ARM_r0) );
#elif !defined(CONFIG_TLS_REG_EMUL)
/*
* User space must never try to access this directly.
* Expect your app to break eventually if you do so.
* The user helper at 0xffff0fe0 must be used instead.
* (see entry-armv.S for details)
*/
*((unsigned int *)0xffff0ff0) = regs->ARM_r0;
#endif
return 0;
#ifdef CONFIG_NEEDS_SYSCALL_FOR_CMPXCHG
/*
* Atomically store r1 in *r2 if *r2 is equal to r0 for user space.
* Return zero in r0 if *MEM was changed or non-zero if no exchange
* happened. Also set the user C flag accordingly.
* If access permissions have to be fixed up then non-zero is
* returned and the operation has to be re-attempted.
*
* *NOTE*: This is a ghost syscall private to the kernel. Only the
* __kuser_cmpxchg code in entry-armv.S should be aware of its
* existence. Don't ever use this from user code.
*/
case 0xfff0:
for (;;) {
extern void do_DataAbort(unsigned long addr, unsigned int fsr,
struct pt_regs *regs);
unsigned long val;
unsigned long addr = regs->ARM_r2;
struct mm_struct *mm = current->mm;
pgd_t *pgd; pmd_t *pmd; pte_t *pte;
spinlock_t *ptl;
regs->ARM_cpsr &= ~PSR_C_BIT;
down_read(&mm->mmap_sem);
pgd = pgd_offset(mm, addr);
if (!pgd_present(*pgd))
goto bad_access;
pmd = pmd_offset(pgd, addr);
if (!pmd_present(*pmd))
goto bad_access;
pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
if (!pte_present(*pte) || !pte_dirty(*pte)) {
pte_unmap_unlock(pte, ptl);
goto bad_access;
}
val = *(unsigned long *)addr;
val -= regs->ARM_r0;
if (val == 0) {
*(unsigned long *)addr = regs->ARM_r1;
regs->ARM_cpsr |= PSR_C_BIT;
}
pte_unmap_unlock(pte, ptl);
up_read(&mm->mmap_sem);
return val;
bad_access:
up_read(&mm->mmap_sem);
/* simulate a write access fault */
do_DataAbort(addr, 15 + (1 << 11), regs);
}
#endif
default:
/* Calls 9f00xx..9f07ff are defined to return -ENOSYS
if not implemented, rather than raising SIGILL. This
way the calling program can gracefully determine whether
a feature is supported. */
if (no <= 0x7ff)
return -ENOSYS;
break;
}
#ifdef CONFIG_DEBUG_USER
/*
* experience shows that these seem to indicate that
* something catastrophic has happened
*/
if (user_debug & UDBG_SYSCALL) {
printk("[%d] %s: arm syscall %d\n",
task_pid_nr(current), current->comm, no);
dump_instr(regs);
if (user_mode(regs)) {
__show_regs(regs);
c_backtrace(regs->ARM_fp, processor_mode(regs));
}
}
#endif
info.si_signo = SIGILL;
info.si_errno = 0;
info.si_code = ILL_ILLTRP;
info.si_addr = (void __user *)instruction_pointer(regs) -
(thumb_mode(regs) ? 2 : 4);
arm_notify_die("Oops - bad syscall(2)", regs, &info, no, 0);
return 0;
}
#ifdef CONFIG_TLS_REG_EMUL
/*
* We might be running on an ARMv6+ processor which should have the TLS
* register but for some reason we can't use it, or maybe an SMP system
* using a pre-ARMv6 processor (there are apparently a few prototypes like
* that in existence) and therefore access to that register must be
* emulated.
*/
static int get_tp_trap(struct pt_regs *regs, unsigned int instr)
{
int reg = (instr >> 12) & 15;
if (reg == 15)
return 1;
regs->uregs[reg] = current_thread_info()->tp_value;
regs->ARM_pc += 4;
return 0;
}
static struct undef_hook arm_mrc_hook = {
.instr_mask = 0x0fff0fff,
.instr_val = 0x0e1d0f70,
.cpsr_mask = PSR_T_BIT,
.cpsr_val = 0,
.fn = get_tp_trap,
};
static int __init arm_mrc_hook_init(void)
{
register_undef_hook(&arm_mrc_hook);
return 0;
}
late_initcall(arm_mrc_hook_init);
#endif
void __bad_xchg(volatile void *ptr, int size)
{
printk("xchg: bad data size: pc 0x%p, ptr 0x%p, size %d\n",
__builtin_return_address(0), ptr, size);
BUG();
}
EXPORT_SYMBOL(__bad_xchg);
/*
* A data abort trap was taken, but we did not handle the instruction.
* Try to abort the user program, or panic if it was the kernel.
*/
asmlinkage void
baddataabort(int code, unsigned long instr, struct pt_regs *regs)
{
unsigned long addr = instruction_pointer(regs);
siginfo_t info;
#ifdef CONFIG_DEBUG_USER
if (user_debug & UDBG_BADABORT) {
printk(KERN_ERR "[%d] %s: bad data abort: code %d instr 0x%08lx\n",
task_pid_nr(current), current->comm, code, instr);
dump_instr(regs);
show_pte(current->mm, addr);
}
#endif
info.si_signo = SIGILL;
info.si_errno = 0;
info.si_code = ILL_ILLOPC;
info.si_addr = (void __user *)addr;
arm_notify_die("unknown data abort code", regs, &info, instr, 0);
}
void __attribute__((noreturn)) __bug(const char *file, int line)
{
printk(KERN_CRIT"kernel BUG at %s:%d!\n", file, line);
*(int *)0 = 0;
/* Avoid "noreturn function does return" */
for (;;);
}
EXPORT_SYMBOL(__bug);
void __readwrite_bug(const char *fn)
{
printk("%s called, but not implemented\n", fn);
BUG();
}
EXPORT_SYMBOL(__readwrite_bug);
void __pte_error(const char *file, int line, unsigned long val)
{
printk("%s:%d: bad pte %08lx.\n", file, line, val);
}
void __pmd_error(const char *file, int line, unsigned long val)
{
printk("%s:%d: bad pmd %08lx.\n", file, line, val);
}
void __pgd_error(const char *file, int line, unsigned long val)
{
printk("%s:%d: bad pgd %08lx.\n", file, line, val);
}
asmlinkage void __div0(void)
{
printk("Division by zero in kernel.\n");
dump_stack();
}
EXPORT_SYMBOL(__div0);
void abort(void)
{
BUG();
/* if that doesn't kill us, halt */
panic("Oops failed to kill thread");
}
EXPORT_SYMBOL(abort);
void __init trap_init(void)
{
unsigned long vectors = CONFIG_VECTORS_BASE;
extern char __stubs_start[], __stubs_end[];
extern char __vectors_start[], __vectors_end[];
extern char __kuser_helper_start[], __kuser_helper_end[];
int kuser_sz = __kuser_helper_end - __kuser_helper_start;
/*
* Copy the vectors, stubs and kuser helpers (in entry-armv.S)
* into the vector page, mapped at 0xffff0000, and ensure these
* are visible to the instruction stream.
*/
memcpy((void *)vectors, __vectors_start, __vectors_end - __vectors_start);
memcpy((void *)vectors + 0x200, __stubs_start, __stubs_end - __stubs_start);
memcpy((void *)vectors + 0x1000 - kuser_sz, __kuser_helper_start, kuser_sz);
/*
* Copy signal return handlers into the vector page, and
* set sigreturn to be a pointer to these.
*/
memcpy((void *)KERN_SIGRETURN_CODE, sigreturn_codes,
sizeof(sigreturn_codes));
flush_icache_range(vectors, vectors + PAGE_SIZE);
modify_domain(DOMAIN_USER, DOMAIN_CLIENT);
}