kernel_optimize_test/arch/arm/lib/memmove.S
Russell King 8478132a87 Revert "arm: move exports to definitions"
This reverts commit 4dd1837d75.

Moving the exports for assembly code into the assembly files breaks
KSYM trimming, but also breaks modversions.

While fixing the KSYM trimming is trivial, fixing modversions brings
us to a technically worse position that we had prior to the above
change:

- We end up with the prototype definitions divorsed from everything
  else, which means that adding or removing assembly level ksyms
  become more fragile:
  * if adding a new assembly ksyms export, a missed prototype in
    asm-prototypes.h results in a successful build if no module in
    the selected configuration makes use of the symbol.
  * when removing a ksyms export, asm-prototypes.h will get forgotten,
    with armksyms.c, you'll get a build error if you forget to touch
    the file.

- We end up with the same amount of include files and prototypes,
  they're just in a header file instead of a .c file with their
  exports.

As for lines of code, we don't get much of a size reduction:
 (original commit)
 47 files changed, 131 insertions(+), 208 deletions(-)
 (fix for ksyms trimming)
 7 files changed, 18 insertions(+), 5 deletions(-)
 (two fixes for modversions)
 1 file changed, 34 insertions(+)
 3 files changed, 7 insertions(+), 2 deletions(-)
which results in a net total of only 25 lines deleted.

As there does not seem to be much benefit from this change of approach,
revert the change.

Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
2016-11-23 10:00:03 +00:00

228 lines
4.8 KiB
ArmAsm

/*
* linux/arch/arm/lib/memmove.S
*
* Author: Nicolas Pitre
* Created: Sep 28, 2005
* Copyright: (C) MontaVista Software Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
#include <asm/unwind.h>
.text
/*
* Prototype: void *memmove(void *dest, const void *src, size_t n);
*
* Note:
*
* If the memory regions don't overlap, we simply branch to memcpy which is
* normally a bit faster. Otherwise the copy is done going downwards. This
* is a transposition of the code from copy_template.S but with the copy
* occurring in the opposite direction.
*/
ENTRY(memmove)
UNWIND( .fnstart )
subs ip, r0, r1
cmphi r2, ip
bls memcpy
stmfd sp!, {r0, r4, lr}
UNWIND( .fnend )
UNWIND( .fnstart )
UNWIND( .save {r0, r4, lr} ) @ in first stmfd block
add r1, r1, r2
add r0, r0, r2
subs r2, r2, #4
blt 8f
ands ip, r0, #3
PLD( pld [r1, #-4] )
bne 9f
ands ip, r1, #3
bne 10f
1: subs r2, r2, #(28)
stmfd sp!, {r5 - r8}
UNWIND( .fnend )
UNWIND( .fnstart )
UNWIND( .save {r0, r4, lr} )
UNWIND( .save {r5 - r8} ) @ in second stmfd block
blt 5f
CALGN( ands ip, r0, #31 )
CALGN( sbcnes r4, ip, r2 ) @ C is always set here
CALGN( bcs 2f )
CALGN( adr r4, 6f )
CALGN( subs r2, r2, ip ) @ C is set here
CALGN( rsb ip, ip, #32 )
CALGN( add pc, r4, ip )
PLD( pld [r1, #-4] )
2: PLD( subs r2, r2, #96 )
PLD( pld [r1, #-32] )
PLD( blt 4f )
PLD( pld [r1, #-64] )
PLD( pld [r1, #-96] )
3: PLD( pld [r1, #-128] )
4: ldmdb r1!, {r3, r4, r5, r6, r7, r8, ip, lr}
subs r2, r2, #32
stmdb r0!, {r3, r4, r5, r6, r7, r8, ip, lr}
bge 3b
PLD( cmn r2, #96 )
PLD( bge 4b )
5: ands ip, r2, #28
rsb ip, ip, #32
addne pc, pc, ip @ C is always clear here
b 7f
6: W(nop)
W(ldr) r3, [r1, #-4]!
W(ldr) r4, [r1, #-4]!
W(ldr) r5, [r1, #-4]!
W(ldr) r6, [r1, #-4]!
W(ldr) r7, [r1, #-4]!
W(ldr) r8, [r1, #-4]!
W(ldr) lr, [r1, #-4]!
add pc, pc, ip
nop
W(nop)
W(str) r3, [r0, #-4]!
W(str) r4, [r0, #-4]!
W(str) r5, [r0, #-4]!
W(str) r6, [r0, #-4]!
W(str) r7, [r0, #-4]!
W(str) r8, [r0, #-4]!
W(str) lr, [r0, #-4]!
CALGN( bcs 2b )
7: ldmfd sp!, {r5 - r8}
UNWIND( .fnend ) @ end of second stmfd block
UNWIND( .fnstart )
UNWIND( .save {r0, r4, lr} ) @ still in first stmfd block
8: movs r2, r2, lsl #31
ldrneb r3, [r1, #-1]!
ldrcsb r4, [r1, #-1]!
ldrcsb ip, [r1, #-1]
strneb r3, [r0, #-1]!
strcsb r4, [r0, #-1]!
strcsb ip, [r0, #-1]
ldmfd sp!, {r0, r4, pc}
9: cmp ip, #2
ldrgtb r3, [r1, #-1]!
ldrgeb r4, [r1, #-1]!
ldrb lr, [r1, #-1]!
strgtb r3, [r0, #-1]!
strgeb r4, [r0, #-1]!
subs r2, r2, ip
strb lr, [r0, #-1]!
blt 8b
ands ip, r1, #3
beq 1b
10: bic r1, r1, #3
cmp ip, #2
ldr r3, [r1, #0]
beq 17f
blt 18f
UNWIND( .fnend )
.macro backward_copy_shift push pull
UNWIND( .fnstart )
UNWIND( .save {r0, r4, lr} ) @ still in first stmfd block
subs r2, r2, #28
blt 14f
CALGN( ands ip, r0, #31 )
CALGN( sbcnes r4, ip, r2 ) @ C is always set here
CALGN( subcc r2, r2, ip )
CALGN( bcc 15f )
11: stmfd sp!, {r5 - r9}
UNWIND( .fnend )
UNWIND( .fnstart )
UNWIND( .save {r0, r4, lr} )
UNWIND( .save {r5 - r9} ) @ in new second stmfd block
PLD( pld [r1, #-4] )
PLD( subs r2, r2, #96 )
PLD( pld [r1, #-32] )
PLD( blt 13f )
PLD( pld [r1, #-64] )
PLD( pld [r1, #-96] )
12: PLD( pld [r1, #-128] )
13: ldmdb r1!, {r7, r8, r9, ip}
mov lr, r3, lspush #\push
subs r2, r2, #32
ldmdb r1!, {r3, r4, r5, r6}
orr lr, lr, ip, lspull #\pull
mov ip, ip, lspush #\push
orr ip, ip, r9, lspull #\pull
mov r9, r9, lspush #\push
orr r9, r9, r8, lspull #\pull
mov r8, r8, lspush #\push
orr r8, r8, r7, lspull #\pull
mov r7, r7, lspush #\push
orr r7, r7, r6, lspull #\pull
mov r6, r6, lspush #\push
orr r6, r6, r5, lspull #\pull
mov r5, r5, lspush #\push
orr r5, r5, r4, lspull #\pull
mov r4, r4, lspush #\push
orr r4, r4, r3, lspull #\pull
stmdb r0!, {r4 - r9, ip, lr}
bge 12b
PLD( cmn r2, #96 )
PLD( bge 13b )
ldmfd sp!, {r5 - r9}
UNWIND( .fnend ) @ end of the second stmfd block
UNWIND( .fnstart )
UNWIND( .save {r0, r4, lr} ) @ still in first stmfd block
14: ands ip, r2, #28
beq 16f
15: mov lr, r3, lspush #\push
ldr r3, [r1, #-4]!
subs ip, ip, #4
orr lr, lr, r3, lspull #\pull
str lr, [r0, #-4]!
bgt 15b
CALGN( cmp r2, #0 )
CALGN( bge 11b )
16: add r1, r1, #(\pull / 8)
b 8b
UNWIND( .fnend )
.endm
backward_copy_shift push=8 pull=24
17: backward_copy_shift push=16 pull=16
18: backward_copy_shift push=24 pull=8
ENDPROC(memmove)