kernel_optimize_test/arch/c6x/lib/csum_64plus.S
Al Viro cc44c17baf csum_partial_copy_nocheck(): drop the last argument
It's always 0.  Note that we theoretically could use ~0U as well -
result will be the same modulo 0xffff, _if_ the damn thing did the
right thing for any value of initial sum; later we'll make use of
that when convenient.

However, unlike csum_and_copy_..._user(), there are instances that
did not work for arbitrary initial sums; c6x is one such.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2020-08-20 15:45:14 -04:00

415 lines
7.3 KiB
ArmAsm

; SPDX-License-Identifier: GPL-2.0-only
;
; linux/arch/c6x/lib/csum_64plus.s
;
; Port on Texas Instruments TMS320C6x architecture
;
; Copyright (C) 2006, 2009, 2010, 2011 Texas Instruments Incorporated
; Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com)
;
#include <linux/linkage.h>
;
;unsigned int csum_partial_copy_nocheck(const char *src, char * dst,
; int len, int sum)
;
; A4: src
; B4: dst
; A6: len
; B6: sum
; return csum in A4
;
.text
ENTRY(csum_partial_copy_nocheck)
MVC .S2 ILC,B30
ZERO .D1 A9 ; csum (a side)
|| ZERO .D2 B9 ; csum (b side)
|| SHRU .S2X A6,2,B5 ; len / 4
;; Check alignment and size
AND .S1 3,A4,A1
|| AND .S2 3,B4,B0
OR .L2X B0,A1,B0 ; non aligned condition
|| MVC .S2 B5,ILC
|| MVK .D2 1,B2
|| MV .D1X B5,A1 ; words condition
[!A1] B .S1 L8
[B0] BNOP .S1 L6,5
SPLOOP 1
;; Main loop for aligned words
LDW .D1T1 *A4++,A7
NOP 4
MV .S2X A7,B7
|| EXTU .S1 A7,0,16,A16
STW .D2T2 B7,*B4++
|| MPYU .M2 B7,B2,B8
|| ADD .L1 A16,A9,A9
NOP
SPKERNEL 8,0
|| ADD .L2 B8,B9,B9
ZERO .D1 A1
|| ADD .L1X A9,B9,A9 ; add csum from a and b sides
L6:
[!A1] BNOP .S1 L8,5
;; Main loop for non-aligned words
SPLOOP 2
|| MVK .L1 1,A2
LDNW .D1T1 *A4++,A7
NOP 3
NOP
MV .S2X A7,B7
|| EXTU .S1 A7,0,16,A16
|| MPYU .M1 A7,A2,A8
ADD .L1 A16,A9,A9
SPKERNEL 6,0
|| STNW .D2T2 B7,*B4++
|| ADD .L1 A8,A9,A9
L8: AND .S2X 2,A6,B5
CMPGT .L2 B5,0,B0
[!B0] BNOP .S1 L82,4
;; Manage half-word
ZERO .L1 A7
|| ZERO .D1 A8
#ifdef CONFIG_CPU_BIG_ENDIAN
LDBU .D1T1 *A4++,A7
LDBU .D1T1 *A4++,A8
NOP 3
SHL .S1 A7,8,A0
ADD .S1 A8,A9,A9
STB .D2T1 A7,*B4++
|| ADD .S1 A0,A9,A9
STB .D2T1 A8,*B4++
#else
LDBU .D1T1 *A4++,A7
LDBU .D1T1 *A4++,A8
NOP 3
ADD .S1 A7,A9,A9
SHL .S1 A8,8,A0
STB .D2T1 A7,*B4++
|| ADD .S1 A0,A9,A9
STB .D2T1 A8,*B4++
#endif
;; Manage eventually the last byte
L82: AND .S2X 1,A6,B0
[!B0] BNOP .S1 L9,5
|| ZERO .L1 A7
L83: LDBU .D1T1 *A4++,A7
NOP 4
MV .L2X A7,B7
#ifdef CONFIG_CPU_BIG_ENDIAN
STB .D2T2 B7,*B4++
|| SHL .S1 A7,8,A7
ADD .S1 A7,A9,A9
#else
STB .D2T2 B7,*B4++
|| ADD .S1 A7,A9,A9
#endif
;; Fold the csum
L9: SHRU .S2X A9,16,B0
[!B0] BNOP .S1 L10,5
L91: SHRU .S2X A9,16,B4
|| EXTU .S1 A9,16,16,A3
ADD .D1X A3,B4,A9
SHRU .S1 A9,16,A0
[A0] BNOP .S1 L91,5
L10: MV .D1 A9,A4
BNOP .S2 B3,4
MVC .S2 B30,ILC
ENDPROC(csum_partial_copy_nocheck)
;
;unsigned short
;ip_fast_csum(unsigned char *iph, unsigned int ihl)
;{
; unsigned int checksum = 0;
; unsigned short *tosum = (unsigned short *) iph;
; int len;
;
; len = ihl*4;
;
; if (len <= 0)
; return 0;
;
; while(len) {
; len -= 2;
; checksum += *tosum++;
; }
; if (len & 1)
; checksum += *(unsigned char*) tosum;
;
; while(checksum >> 16)
; checksum = (checksum & 0xffff) + (checksum >> 16);
;
; return ~checksum;
;}
;
; A4: iph
; B4: ihl
; return checksum in A4
;
.text
ENTRY(ip_fast_csum)
ZERO .D1 A5
|| MVC .S2 ILC,B30
SHL .S2 B4,2,B0
CMPGT .L2 B0,0,B1
[!B1] BNOP .S1 L15,4
[!B1] ZERO .D1 A3
[!B0] B .S1 L12
SHRU .S2 B0,1,B0
MVC .S2 B0,ILC
NOP 3
SPLOOP 1
LDHU .D1T1 *A4++,A3
NOP 3
NOP
SPKERNEL 5,0
|| ADD .L1 A3,A5,A5
L12: SHRU .S1 A5,16,A0
[!A0] BNOP .S1 L14,5
L13: SHRU .S2X A5,16,B4
EXTU .S1 A5,16,16,A3
ADD .D1X A3,B4,A5
SHRU .S1 A5,16,A0
[A0] BNOP .S1 L13,5
L14: NOT .D1 A5,A3
EXTU .S1 A3,16,16,A3
L15: BNOP .S2 B3,3
MVC .S2 B30,ILC
MV .D1 A3,A4
ENDPROC(ip_fast_csum)
;
;unsigned short
;do_csum(unsigned char *buff, unsigned int len)
;{
; int odd, count;
; unsigned int result = 0;
;
; if (len <= 0)
; goto out;
; odd = 1 & (unsigned long) buff;
; if (odd) {
;#ifdef __LITTLE_ENDIAN
; result += (*buff << 8);
;#else
; result = *buff;
;#endif
; len--;
; buff++;
; }
; count = len >> 1; /* nr of 16-bit words.. */
; if (count) {
; if (2 & (unsigned long) buff) {
; result += *(unsigned short *) buff;
; count--;
; len -= 2;
; buff += 2;
; }
; count >>= 1; /* nr of 32-bit words.. */
; if (count) {
; unsigned int carry = 0;
; do {
; unsigned int w = *(unsigned int *) buff;
; count--;
; buff += 4;
; result += carry;
; result += w;
; carry = (w > result);
; } while (count);
; result += carry;
; result = (result & 0xffff) + (result >> 16);
; }
; if (len & 2) {
; result += *(unsigned short *) buff;
; buff += 2;
; }
; }
; if (len & 1)
;#ifdef __LITTLE_ENDIAN
; result += *buff;
;#else
; result += (*buff << 8);
;#endif
; result = (result & 0xffff) + (result >> 16);
; /* add up carry.. */
; result = (result & 0xffff) + (result >> 16);
; if (odd)
; result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
;out:
; return result;
;}
;
; A4: buff
; B4: len
; return checksum in A4
;
ENTRY(do_csum)
CMPGT .L2 B4,0,B0
[!B0] BNOP .S1 L26,3
EXTU .S1 A4,31,31,A0
MV .L1 A0,A3
|| MV .S1X B3,A5
|| MV .L2 B4,B3
|| ZERO .D1 A1
#ifdef CONFIG_CPU_BIG_ENDIAN
[A0] SUB .L2 B3,1,B3
|| [A0] LDBU .D1T1 *A4++,A1
#else
[!A0] BNOP .S1 L21,5
|| [A0] LDBU .D1T1 *A4++,A0
SUB .L2 B3,1,B3
|| SHL .S1 A0,8,A1
L21:
#endif
SHR .S2 B3,1,B0
[!B0] BNOP .S1 L24,3
MVK .L1 2,A0
AND .L1 A4,A0,A0
[!A0] BNOP .S1 L22,5
|| [A0] LDHU .D1T1 *A4++,A0
SUB .L2 B0,1,B0
|| SUB .S2 B3,2,B3
|| ADD .L1 A0,A1,A1
L22:
SHR .S2 B0,1,B0
|| ZERO .L1 A0
[!B0] BNOP .S1 L23,5
|| [B0] MVC .S2 B0,ILC
SPLOOP 3
SPMASK L1
|| MV .L1 A1,A2
|| LDW .D1T1 *A4++,A1
NOP 4
ADD .L1 A0,A1,A0
ADD .L1 A2,A0,A2
SPKERNEL 1,2
|| CMPGTU .L1 A1,A2,A0
ADD .L1 A0,A2,A6
EXTU .S1 A6,16,16,A7
SHRU .S2X A6,16,B0
NOP 1
ADD .L1X A7,B0,A1
L23:
MVK .L2 2,B0
AND .L2 B3,B0,B0
[B0] LDHU .D1T1 *A4++,A0
NOP 4
[B0] ADD .L1 A0,A1,A1
L24:
EXTU .S2 B3,31,31,B0
#ifdef CONFIG_CPU_BIG_ENDIAN
[!B0] BNOP .S1 L25,4
|| [B0] LDBU .D1T1 *A4,A0
SHL .S1 A0,8,A0
ADD .L1 A0,A1,A1
L25:
#else
[B0] LDBU .D1T1 *A4,A0
NOP 4
[B0] ADD .L1 A0,A1,A1
#endif
EXTU .S1 A1,16,16,A0
SHRU .S2X A1,16,B0
NOP 1
ADD .L1X A0,B0,A0
SHRU .S1 A0,16,A1
ADD .L1 A0,A1,A0
EXTU .S1 A0,16,16,A1
EXTU .S1 A1,16,24,A2
EXTU .S1 A1,24,16,A0
|| MV .L2X A3,B0
[B0] OR .L1 A0,A2,A1
L26:
NOP 1
BNOP .S2X A5,4
MV .L1 A1,A4
ENDPROC(do_csum)
;__wsum csum_partial(const void *buff, int len, __wsum wsum)
;{
; unsigned int sum = (__force unsigned int)wsum;
; unsigned int result = do_csum(buff, len);
;
; /* add in old sum, and carry.. */
; result += sum;
; if (sum > result)
; result += 1;
; return (__force __wsum)result;
;}
;
ENTRY(csum_partial)
MV .L1X B3,A9
|| CALLP .S2 do_csum,B3
|| MV .S1 A6,A8
BNOP .S2X A9,2
ADD .L1 A8,A4,A1
CMPGTU .L1 A8,A1,A0
ADD .L1 A1,A0,A4
ENDPROC(csum_partial)
;unsigned short
;ip_compute_csum(unsigned char *buff, unsigned int len)
;
; A4: buff
; B4: len
; return checksum in A4
ENTRY(ip_compute_csum)
MV .L1X B3,A9
|| CALLP .S2 do_csum,B3
BNOP .S2X A9,3
NOT .S1 A4,A4
CLR .S1 A4,16,31,A4
ENDPROC(ip_compute_csum)