kernel_optimize_test/arch/sparc64/lib/copy_page.S
David S. Miller c4bce90ea2 [SPARC64]: Deal with PTE layout differences in SUN4V.
Yes, you heard it right, they changed the PTE layout for
SUN4V.  Ho hum...

This is the simple and inefficient way to support this.
It'll get optimized, don't worry.

Signed-off-by: David S. Miller <davem@davemloft.net>
2006-03-20 01:12:25 -08:00

251 lines
5.8 KiB
ArmAsm

/* clear_page.S: UltraSparc optimized copy page.
*
* Copyright (C) 1996, 1998, 1999, 2000, 2004 David S. Miller (davem@redhat.com)
* Copyright (C) 1997 Jakub Jelinek (jakub@redhat.com)
*/
#include <asm/visasm.h>
#include <asm/thread_info.h>
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/spitfire.h>
#include <asm/head.h>
/* What we used to do was lock a TLB entry into a specific
* TLB slot, clear the page with interrupts disabled, then
* restore the original TLB entry. This was great for
* disturbing the TLB as little as possible, but it meant
* we had to keep interrupts disabled for a long time.
*
* Now, we simply use the normal TLB loading mechanism,
* and this makes the cpu choose a slot all by itself.
* Then we do a normal TLB flush on exit. We need only
* disable preemption during the clear.
*/
#define DCACHE_SIZE (PAGE_SIZE * 2)
#if (PAGE_SHIFT == 13) || (PAGE_SHIFT == 19)
#define PAGE_SIZE_REM 0x80
#elif (PAGE_SHIFT == 16) || (PAGE_SHIFT == 22)
#define PAGE_SIZE_REM 0x100
#else
#error Wrong PAGE_SHIFT specified
#endif
#define TOUCH(reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7) \
fmovd %reg0, %f48; fmovd %reg1, %f50; \
fmovd %reg2, %f52; fmovd %reg3, %f54; \
fmovd %reg4, %f56; fmovd %reg5, %f58; \
fmovd %reg6, %f60; fmovd %reg7, %f62;
.text
.align 32
.globl copy_user_page
.type copy_user_page,#function
copy_user_page: /* %o0=dest, %o1=src, %o2=vaddr */
lduw [%g6 + TI_PRE_COUNT], %o4
sethi %uhi(PAGE_OFFSET), %g2
sethi %hi(PAGE_SIZE), %o3
sllx %g2, 32, %g2
sethi %hi(PAGE_KERNEL_LOCKED), %g3
ldx [%g3 + %lo(PAGE_KERNEL_LOCKED)], %g3
sub %o0, %g2, %g1 ! dest paddr
sub %o1, %g2, %g2 ! src paddr
and %o2, %o3, %o0 ! vaddr D-cache alias bit
or %g1, %g3, %g1 ! dest TTE data
or %g2, %g3, %g2 ! src TTE data
sethi %hi(TLBTEMP_BASE), %o3
sethi %hi(DCACHE_SIZE), %o1
add %o0, %o3, %o0 ! dest TTE vaddr
add %o4, 1, %o2
add %o0, %o1, %o1 ! src TTE vaddr
/* Disable preemption. */
mov TLB_TAG_ACCESS, %g3
stw %o2, [%g6 + TI_PRE_COUNT]
/* Load TLB entries. */
rdpr %pstate, %o2
wrpr %o2, PSTATE_IE, %pstate
stxa %o0, [%g3] ASI_DMMU
stxa %g1, [%g0] ASI_DTLB_DATA_IN
membar #Sync
stxa %o1, [%g3] ASI_DMMU
stxa %g2, [%g0] ASI_DTLB_DATA_IN
membar #Sync
wrpr %o2, 0x0, %pstate
cheetah_copy_page_insn:
ba,pt %xcc, 9f
nop
1:
VISEntryHalf
membar #StoreLoad | #StoreStore | #LoadStore
sethi %hi((PAGE_SIZE/64)-2), %o2
mov %o0, %g1
prefetch [%o1 + 0x000], #one_read
or %o2, %lo((PAGE_SIZE/64)-2), %o2
prefetch [%o1 + 0x040], #one_read
prefetch [%o1 + 0x080], #one_read
prefetch [%o1 + 0x0c0], #one_read
ldd [%o1 + 0x000], %f0
prefetch [%o1 + 0x100], #one_read
ldd [%o1 + 0x008], %f2
prefetch [%o1 + 0x140], #one_read
ldd [%o1 + 0x010], %f4
prefetch [%o1 + 0x180], #one_read
fmovd %f0, %f16
ldd [%o1 + 0x018], %f6
fmovd %f2, %f18
ldd [%o1 + 0x020], %f8
fmovd %f4, %f20
ldd [%o1 + 0x028], %f10
fmovd %f6, %f22
ldd [%o1 + 0x030], %f12
fmovd %f8, %f24
ldd [%o1 + 0x038], %f14
fmovd %f10, %f26
ldd [%o1 + 0x040], %f0
1: ldd [%o1 + 0x048], %f2
fmovd %f12, %f28
ldd [%o1 + 0x050], %f4
fmovd %f14, %f30
stda %f16, [%o0] ASI_BLK_P
ldd [%o1 + 0x058], %f6
fmovd %f0, %f16
ldd [%o1 + 0x060], %f8
fmovd %f2, %f18
ldd [%o1 + 0x068], %f10
fmovd %f4, %f20
ldd [%o1 + 0x070], %f12
fmovd %f6, %f22
ldd [%o1 + 0x078], %f14
fmovd %f8, %f24
ldd [%o1 + 0x080], %f0
prefetch [%o1 + 0x180], #one_read
fmovd %f10, %f26
subcc %o2, 1, %o2
add %o0, 0x40, %o0
bne,pt %xcc, 1b
add %o1, 0x40, %o1
ldd [%o1 + 0x048], %f2
fmovd %f12, %f28
ldd [%o1 + 0x050], %f4
fmovd %f14, %f30
stda %f16, [%o0] ASI_BLK_P
ldd [%o1 + 0x058], %f6
fmovd %f0, %f16
ldd [%o1 + 0x060], %f8
fmovd %f2, %f18
ldd [%o1 + 0x068], %f10
fmovd %f4, %f20
ldd [%o1 + 0x070], %f12
fmovd %f6, %f22
add %o0, 0x40, %o0
ldd [%o1 + 0x078], %f14
fmovd %f8, %f24
fmovd %f10, %f26
fmovd %f12, %f28
fmovd %f14, %f30
stda %f16, [%o0] ASI_BLK_P
membar #Sync
VISExitHalf
ba,pt %xcc, 5f
nop
9:
VISEntry
ldub [%g6 + TI_FAULT_CODE], %g3
mov %o0, %g1
cmp %g3, 0
rd %asi, %g3
be,a,pt %icc, 1f
wr %g0, ASI_BLK_P, %asi
wr %g0, ASI_BLK_COMMIT_P, %asi
1: ldda [%o1] ASI_BLK_P, %f0
add %o1, 0x40, %o1
ldda [%o1] ASI_BLK_P, %f16
add %o1, 0x40, %o1
sethi %hi(PAGE_SIZE), %o2
1: TOUCH(f0, f2, f4, f6, f8, f10, f12, f14)
ldda [%o1] ASI_BLK_P, %f32
stda %f48, [%o0] %asi
add %o1, 0x40, %o1
sub %o2, 0x40, %o2
add %o0, 0x40, %o0
TOUCH(f16, f18, f20, f22, f24, f26, f28, f30)
ldda [%o1] ASI_BLK_P, %f0
stda %f48, [%o0] %asi
add %o1, 0x40, %o1
sub %o2, 0x40, %o2
add %o0, 0x40, %o0
TOUCH(f32, f34, f36, f38, f40, f42, f44, f46)
ldda [%o1] ASI_BLK_P, %f16
stda %f48, [%o0] %asi
sub %o2, 0x40, %o2
add %o1, 0x40, %o1
cmp %o2, PAGE_SIZE_REM
bne,pt %xcc, 1b
add %o0, 0x40, %o0
#if (PAGE_SHIFT == 16) || (PAGE_SHIFT == 22)
TOUCH(f0, f2, f4, f6, f8, f10, f12, f14)
ldda [%o1] ASI_BLK_P, %f32
stda %f48, [%o0] %asi
add %o1, 0x40, %o1
sub %o2, 0x40, %o2
add %o0, 0x40, %o0
TOUCH(f16, f18, f20, f22, f24, f26, f28, f30)
ldda [%o1] ASI_BLK_P, %f0
stda %f48, [%o0] %asi
add %o1, 0x40, %o1
sub %o2, 0x40, %o2
add %o0, 0x40, %o0
membar #Sync
stda %f32, [%o0] %asi
add %o0, 0x40, %o0
stda %f0, [%o0] %asi
#else
membar #Sync
stda %f0, [%o0] %asi
add %o0, 0x40, %o0
stda %f16, [%o0] %asi
#endif
membar #Sync
wr %g3, 0x0, %asi
VISExit
5:
stxa %g0, [%g1] ASI_DMMU_DEMAP
membar #Sync
sethi %hi(DCACHE_SIZE), %g2
stxa %g0, [%g1 + %g2] ASI_DMMU_DEMAP
membar #Sync
retl
stw %o4, [%g6 + TI_PRE_COUNT]
.size copy_user_page, .-copy_user_page
.globl cheetah_patch_copy_page
cheetah_patch_copy_page:
sethi %hi(0x01000000), %o1 ! NOP
sethi %hi(cheetah_copy_page_insn), %o0
or %o0, %lo(cheetah_copy_page_insn), %o0
stw %o1, [%o0]
membar #StoreStore
flush %o0
retl
nop