9412b23450
The generic implementation of strlen() reads strings byte per byte. This patch implements strlen() in assembly based on a read of entire words, in the same spirit as what some other arches and glibc do. On a 8xx the time spent in strlen is reduced by 3/4 for long strings. strlen() selftest on an 8xx provides the following values: Before the patch (ie with the generic strlen() in lib/string.c): len 256 : time = 1.195055 len 016 : time = 0.083745 len 008 : time = 0.046828 len 004 : time = 0.028390 After the patch: len 256 : time = 0.272185 ==> 78% improvment len 016 : time = 0.040632 ==> 51% improvment len 008 : time = 0.033060 ==> 29% improvment len 004 : time = 0.029149 ==> 2% degradation On a 832x: Before the patch: len 256 : time = 0.236125 len 016 : time = 0.018136 len 008 : time = 0.011000 len 004 : time = 0.007229 After the patch: len 256 : time = 0.094950 ==> 60% improvment len 016 : time = 0.013357 ==> 26% improvment len 008 : time = 0.010586 ==> 4% improvment len 004 : time = 0.008784 Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
49 lines
1.4 KiB
Makefile
49 lines
1.4 KiB
Makefile
# SPDX-License-Identifier: GPL-2.0
|
|
#
|
|
# Makefile for ppc-specific library files..
|
|
#
|
|
|
|
subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
|
|
|
|
ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
|
|
|
|
CFLAGS_REMOVE_code-patching.o = $(CC_FLAGS_FTRACE)
|
|
CFLAGS_REMOVE_feature-fixups.o = $(CC_FLAGS_FTRACE)
|
|
|
|
obj-y += string.o alloc.o code-patching.o feature-fixups.o
|
|
|
|
obj-$(CONFIG_PPC32) += div64.o copy_32.o crtsavres.o strlen_32.o
|
|
|
|
# See corresponding test in arch/powerpc/Makefile
|
|
# 64-bit linker creates .sfpr on demand for final link (vmlinux),
|
|
# so it is only needed for modules, and only for older linkers which
|
|
# do not support --save-restore-funcs
|
|
ifeq ($(call ld-ifversion, -lt, 225000000, y),y)
|
|
extra-$(CONFIG_PPC64) += crtsavres.o
|
|
endif
|
|
|
|
obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \
|
|
memcpy_power7.o
|
|
|
|
obj64-y += copypage_64.o copyuser_64.o mem_64.o hweight_64.o \
|
|
memcpy_64.o pmem.o
|
|
|
|
obj64-$(CONFIG_SMP) += locks.o
|
|
obj64-$(CONFIG_ALTIVEC) += vmx-helper.o
|
|
obj64-$(CONFIG_KPROBES_SANITY_TEST) += test_emulate_step.o
|
|
|
|
obj-y += checksum_$(BITS).o checksum_wrappers.o \
|
|
string_$(BITS).o memcmp_$(BITS).o
|
|
|
|
obj-y += sstep.o ldstfp.o quad.o
|
|
obj64-y += quad.o
|
|
|
|
obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o
|
|
|
|
obj-$(CONFIG_FTR_FIXUP_SELFTEST) += feature-fixups-test.o
|
|
|
|
obj-$(CONFIG_ALTIVEC) += xor_vmx.o xor_vmx_glue.o
|
|
CFLAGS_xor_vmx.o += -maltivec $(call cc-option,-mabi=altivec)
|
|
|
|
obj-$(CONFIG_PPC64) += $(obj64-y)
|