kernel_optimize_test/arch/um/kernel/skas/mmu.c
Hugh Dickins 4c21e2f244 [PATCH] mm: split page table lock
Christoph Lameter demonstrated very poor scalability on the SGI 512-way, with
a many-threaded application which concurrently initializes different parts of
a large anonymous area.

This patch corrects that, by using a separate spinlock per page table page, to
guard the page table entries in that page, instead of using the mm's single
page_table_lock.  (But even then, page_table_lock is still used to guard page
table allocation, and anon_vma allocation.)

In this implementation, the spinlock is tucked inside the struct page of the
page table page: with a BUILD_BUG_ON in case it overflows - which it would in
the case of 32-bit PA-RISC with spinlock debugging enabled.

Splitting the lock is not quite for free: another cacheline access.  Ideally,
I suppose we would use split ptlock only for multi-threaded processes on
multi-cpu machines; but deciding that dynamically would have its own costs.
So for now enable it by config, at some number of cpus - since the Kconfig
language doesn't support inequalities, let preprocessor compare that with
NR_CPUS.  But I don't think it's worth being user-configurable: for good
testing of both split and unsplit configs, split now at 4 cpus, and perhaps
change that to 8 later.

There is a benefit even for singly threaded processes: kswapd can be attacking
one part of the mm while another part is busy faulting.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-10-29 21:40:42 -07:00

155 lines
3.7 KiB
C

/*
* Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
* Licensed under the GPL
*/
#include "linux/config.h"
#include "linux/sched.h"
#include "linux/list.h"
#include "linux/spinlock.h"
#include "linux/slab.h"
#include "linux/errno.h"
#include "linux/mm.h"
#include "asm/current.h"
#include "asm/segment.h"
#include "asm/mmu.h"
#include "asm/pgalloc.h"
#include "asm/pgtable.h"
#include "os.h"
#include "skas.h"
extern int __syscall_stub_start;
static int init_stub_pte(struct mm_struct *mm, unsigned long proc,
unsigned long kernel)
{
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
pgd = pgd_offset(mm, proc);
pud = pud_alloc(mm, pgd, proc);
if (!pud)
goto out;
pmd = pmd_alloc(mm, pud, proc);
if (!pmd)
goto out_pmd;
pte = pte_alloc_map(mm, pmd, proc);
if (!pte)
goto out_pte;
/* There's an interaction between the skas0 stub pages, stack
* randomization, and the BUG at the end of exit_mmap. exit_mmap
* checks that the number of page tables freed is the same as had
* been allocated. If the stack is on the last page table page,
* then the stack pte page will be freed, and if not, it won't. To
* avoid having to know where the stack is, or if the process mapped
* something at the top of its address space for some other reason,
* we set TASK_SIZE to end at the start of the last page table.
* This keeps exit_mmap off the last page, but introduces a leak
* of that page. So, we hang onto it here and free it in
* destroy_context_skas.
*/
mm->context.skas.last_page_table = pmd_page_kernel(*pmd);
#ifdef CONFIG_3_LEVEL_PGTABLES
mm->context.skas.last_pmd = (unsigned long) __va(pud_val(*pud));
#endif
*pte = mk_pte(virt_to_page(kernel), __pgprot(_PAGE_PRESENT));
*pte = pte_mkexec(*pte);
*pte = pte_wrprotect(*pte);
return(0);
out_pmd:
pud_free(pud);
out_pte:
pmd_free(pmd);
out:
return(-ENOMEM);
}
int init_new_context_skas(struct task_struct *task, struct mm_struct *mm)
{
struct mm_struct *cur_mm = current->mm;
struct mm_id *cur_mm_id = &cur_mm->context.skas.id;
struct mm_id *mm_id = &mm->context.skas.id;
unsigned long stack = 0;
int from, ret = -ENOMEM;
if(!proc_mm || !ptrace_faultinfo){
stack = get_zeroed_page(GFP_KERNEL);
if(stack == 0)
goto out;
/* This zeros the entry that pgd_alloc didn't, needed since
* we are about to reinitialize it, and want mm.nr_ptes to
* be accurate.
*/
mm->pgd[USER_PTRS_PER_PGD] = __pgd(0);
ret = init_stub_pte(mm, CONFIG_STUB_CODE,
(unsigned long) &__syscall_stub_start);
if(ret)
goto out_free;
ret = init_stub_pte(mm, CONFIG_STUB_DATA, stack);
if(ret)
goto out_free;
mm->nr_ptes--;
}
mm_id->stack = stack;
if(proc_mm){
if((cur_mm != NULL) && (cur_mm != &init_mm))
from = cur_mm_id->u.mm_fd;
else from = -1;
ret = new_mm(from, stack);
if(ret < 0){
printk("init_new_context_skas - new_mm failed, "
"errno = %d\n", ret);
goto out_free;
}
mm_id->u.mm_fd = ret;
}
else {
if((cur_mm != NULL) && (cur_mm != &init_mm))
mm_id->u.pid = copy_context_skas0(stack,
cur_mm_id->u.pid);
else mm_id->u.pid = start_userspace(stack);
}
return 0;
out_free:
if(mm_id->stack != 0)
free_page(mm_id->stack);
out:
return ret;
}
void destroy_context_skas(struct mm_struct *mm)
{
struct mmu_context_skas *mmu = &mm->context.skas;
if(proc_mm)
os_close_file(mmu->id.u.mm_fd);
else
os_kill_ptraced_process(mmu->id.u.pid, 1);
if(!proc_mm || !ptrace_faultinfo){
free_page(mmu->id.stack);
pte_lock_deinit(virt_to_page(mmu->last_page_table));
pte_free_kernel((pte_t *) mmu->last_page_table);
dec_page_state(nr_page_table_pages);
#ifdef CONFIG_3_LEVEL_PGTABLES
pmd_free((pmd_t *) mmu->last_pmd);
#endif
}
}