libc/machine: introduce optimized memchr/memcpy/memmove/strcmp/strlen

from newlib/bionic for armv7-a

N/A

Signed-off-by: Huang Qi <huangqi3@xiaomi.com>
Change-Id: I079d929020c353f73033a7e2189f34430e2b9c09
This commit is contained in:
Huang Qi
2021-03-08 15:41:19 +08:00
parent aedf823224
commit 44ec147ff7
9 changed files with 1640 additions and 451 deletions
+53
View File
@@ -3,9 +3,62 @@
# see the file kconfig-language.txt in the NuttX tools repository.
#
config ARMV7A_MEMCHR
bool "Enable optimized memchr() for ARMv7-A"
default n
select MACHINE_OPTS_ARMV7A
select LIBC_ARCH_MEMCHR
depends on ARCH_TOOLCHAIN_GNU
depends on ARM_THUMB
---help---
Enable optimized ARMv7-A specific memchr() library function
config ARMV7A_MEMCPY
bool "Enable optimized memcpy() for ARMv7-A"
select LIBC_ARCH_MEMCPY
depends on ARCH_TOOLCHAIN_GNU
depends on ARM_THUMB
---help---
Enable optimized ARMv7-A specific memcpy() library function
config ARMV7A_MEMMOVE
bool "Enable optimized memmove() for ARMv7-A"
default n
select MACHINE_OPTS_ARMV7A
select LIBC_ARCH_MEMMOVE
depends on ARCH_TOOLCHAIN_GNU
depends on ARM_THUMB
---help---
Enable optimized ARMv7-A specific memmove() library function
config ARMV7A_MEMSET
bool "Enable optimized memset() for ARMv7-A"
default n
select MACHINE_OPTS_ARMV7A
select LIBC_ARCH_MEMSET
depends on ARCH_TOOLCHAIN_GNU
---help---
Enable optimized ARMv7-A specific memset() library function
config ARMV7A_STRCMP
bool "Enable optimized strcmp() for ARMv7-A"
default n
select MACHINE_OPTS_ARMV7A
select LIBC_ARCH_STRCMP
depends on ARCH_TOOLCHAIN_GNU
---help---
Enable optimized ARMv7-A specific strcmp() library function
config ARMV7A_STRLEN
bool "Enable optimized strlen() for ARMv7-A"
default n
select MACHINE_OPTS_ARMV7A
select LIBC_ARCH_STRLEN
depends on ARCH_TOOLCHAIN_GNU
depends on ARM_THUMB
---help---
Enable optimized ARMv7-A specific strlen() library function
config MACHINE_OPTS_ARMV7A
bool
default n
+22 -2
View File
@@ -33,13 +33,33 @@
#
############################################################################
ifeq ($(CONFIG_ARMV7A_MEMCHR),y)
ASRCS += arch_memchr.S
endif
ifeq ($(CONFIG_ARMV7A_MEMCPY),y)
ASRCS += arch_memcpy.S
endif
ifeq ($(CONFIG_ARMV7A_MEMMOVE),y)
ASRCS += arch_memmove.S
endif
ifeq ($(CONFIG_ARMV7A_MEMSET),y)
ASRCS += arch_memset.S
endif
ifeq ($(CONFIG_ARMV7A_STRCMP),y)
ASRCS += arch_strcmp.S
endif
ifeq ($(CONFIG_ARMV7A_STRLEN),y)
ASRCS += arch_strlen.S
endif
ifeq ($(CONFIG_MACHINE_OPTS_ARMV7A),y)
DEPPATH += --dep-path machine/arm/armv7-a/gnu
VPATH += :machine/arm/armv7-a/gnu
endif
ifeq ($(CONFIG_LIBC_ARCH_ELF),y)
@@ -0,0 +1,181 @@
/*
* Copyright (c) 2014 ARM Ltd
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the company may not be used to endorse or promote
* products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __ARM_ARCH
/* ACLE standardises a set of pre-defines that describe the ARM architecture.
These were mostly implemented in GCC around GCC-4.8; older versions
have no, or only partial support. To provide a level of backwards
compatibility we try to work out what the definitions should be, given
the older pre-defines that GCC did produce. This isn't complete, but
it should be enough for use by routines that depend on this header. */
/* No need to handle ARMv8, GCC had ACLE support before that. */
# ifdef __ARM_ARCH_7__
/* The common subset of ARMv7 in all profiles. */
# define __ARM_ARCH 7
# define __ARM_ARCH_ISA_THUMB 2
# define __ARM_FEATURE_CLZ
# define __ARM_FEATURE_LDREX 7
# define __ARM_FEATURE_UNALIGNED
# endif
# if defined (__ARM_ARCH_7A__) || defined (__ARM_ARCH_7R__)
# define __ARM_ARCH 7
# define __ARM_ARCH_ISA_THUMB 2
# define __ARM_ARCH_ISA_ARM
# define __ARM_FEATURE_CLZ
# define __ARM_FEATURE_SIMD32
# define __ARM_FEATURE_DSP
# define __ARM_FEATURE_QBIT
# define __ARM_FEATURE_SAT
# define __ARM_FEATURE_LDREX 15
# define __ARM_FEATURE_UNALIGNED
# ifdef __ARM_ARCH_7A__
# define __ARM_ARCH_PROFILE 'A'
# else
# define __ARM_ARCH_PROFILE 'R'
# endif
# endif
# ifdef __ARM_ARCH_7EM__
# define __ARM_ARCH 7
# define __ARM_ARCH_ISA_THUMB 2
# define __ARM_FEATURE_CLZ
# define __ARM_FEATURE_SIMD32
# define __ARM_FEATURE_DSP
# define __ARM_FEATURE_QBIT
# define __ARM_FEATURE_SAT
# define __ARM_FEATURE_LDREX 7
# define __ARM_FEATURE_UNALIGNED
# define __ARM_ARCH_PROFILE 'M'
# endif
# ifdef __ARM_ARCH_7M__
# define __ARM_ARCH 7
# define __ARM_ARCH_ISA_THUMB 2
# define __ARM_FEATURE_CLZ
# define __ARM_FEATURE_QBIT
# define __ARM_FEATURE_SAT
# define __ARM_FEATURE_LDREX 7
# define __ARM_FEATURE_UNALIGNED
# define __ARM_ARCH_PROFILE 'M'
# endif
# ifdef __ARM_ARCH_6T2__
# define __ARM_ARCH 6
# define __ARM_ARCH_ISA_THUMB 2
# define __ARM_ARCH_ISA_ARM
# define __ARM_FEATURE_CLZ
# define __ARM_FEATURE_SIMD32
# define __ARM_FEATURE_DSP
# define __ARM_FEATURE_QBIT
# define __ARM_FEATURE_SAT
# define __ARM_FEATURE_LDREX 4
# define __ARM_FEATURE_UNALIGNED
# endif
# ifdef __ARM_ARCH_6M__
# define __ARM_ARCH 6
# define __ARM_ARCH_ISA_THUMB 1
# define __ARM_ARCH_PROFILE 'M'
# endif
# if defined (__ARM_ARCH_6__) || defined (__ARM_ARCH_6J__) \
|| defined (__ARM_ARCH_6K__) || defined (__ARM_ARCH_6Z__) \
|| defined (__ARM_ARCH_6ZK__)
# define __ARM_ARCH 6
# define __ARM_ARCH_ISA_THUMB 1
# define __ARM_ARCH_ISA_ARM
# define __ARM_FEATURE_CLZ
# define __ARM_FEATURE_SIMD32
# define __ARM_FEATURE_DSP
# define __ARM_FEATURE_QBIT
# define __ARM_FEATURE_SAT
# define __ARM_FEATURE_UNALIGNED
# ifndef __thumb__
# if defined (__ARM_ARCH_6K__) || defined (__ARM_ARCH_6ZK__)
# define __ARM_FEATURE_LDREX 15
# else
# define __ARM_FEATURE_LDREX 4
# endif
# endif
# endif
# if defined (__ARM_ARCH_5TE__) || defined (__ARM_ARCH_5E__)
# define __ARM_ARCH 5
# define __ARM_ARCH_ISA_ARM
# ifdef __ARM_ARCH_5TE__
# define __ARM_ARCH_ISA_THUMB 1
# endif
# define __ARM_FEATURE_CLZ
# define __ARM_FEATURE_DSP
# endif
# if defined (__ARM_ARCH_5T__) || defined (__ARM_ARCH_5__)
# define __ARM_ARCH 5
# define __ARM_ARCH_ISA_ARM
# ifdef __ARM_ARCH_5TE__
# define __ARM_ARCH_ISA_THUMB 1
# endif
# define __ARM_FEATURE_CLZ
# endif
# ifdef __ARM_ARCH_4T__
# define __ARM_ARCH 4
# define __ARM_ARCH_ISA_ARM
# define __ARM_ARCH_ISA_THUMB 1
# endif
# ifdef __ARM_ARCH_4__
# define __ARM_ARCH 4
# define __ARM_ARCH_ISA_ARM
# endif
# if defined (__ARM_ARCH_3__) || defined (__ARM_ARCH_3M__)
# define __ARM_ARCH 3
# define __ARM_ARCH_ISA_ARM
# endif
# ifdef __ARM_ARCH_2__
# define __ARM_ARCH 2
# define __ARM_ARCH_ISA_ARM
# endif
# ifdef __ARMEB__
# define __ARM_BIG_ENDIAN
# endif
/* If we still don't know what the target architecture is, then we're
probably not using GCC. */
# ifndef __ARM_ARCH
# error Unable to determine architecture version.
# endif
#endif /* __ARM_ARCH */
@@ -0,0 +1,386 @@
/* Copyright (c) 2010-2011, Linaro Limited
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of Linaro Limited nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Written by Dave Gilbert <david.gilbert@linaro.org>
This memchr routine is optimised on a Cortex-A9 and should work on
all ARMv7 processors. It has a fast path for short sizes, and has
an optimised path for large data sets; the worst case is finding the
match early in a large data set. */
/* Copyright (c) 2015 ARM Ltd.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Linaro nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
@ 2011-02-07 david.gilbert@linaro.org
@ Extracted from local git a5b438d861
@ 2011-07-14 david.gilbert@linaro.org
@ Import endianness fix from local git ea786f1b
@ 2011-10-11 david.gilbert@linaro.org
@ Import from cortex-strings bzr rev 63
@ Flip to ldrd (as suggested by Greta Yorsh)
@ Make conditional on CPU type
@ tidy
@ This code requires armv6t2 or later. Uses Thumb2.
.syntax unified
#include "acle-compat.h"
@ NOTE: This ifdef MUST match the one in memchr-stub.c
#if defined (__ARM_NEON__) || defined (__ARM_NEON)
#if __ARM_ARCH >= 8 && __ARM_ARCH_PROFILE == 'R'
.arch armv8-r
#else
.arch armv7-a
#endif
.fpu neon
/* Arguments */
#define srcin r0
#define chrin r1
#define cntin r2
/* Retval */
#define result r0 /* Live range does not overlap with srcin */
/* Working registers */
#define src r1 /* Live range does not overlap with chrin */
#define tmp r3
#define synd r0 /* No overlap with srcin or result */
#define soff r12
/* Working NEON registers */
#define vrepchr q0
#define vdata0 q1
#define vdata0_0 d2 /* Lower half of vdata0 */
#define vdata0_1 d3 /* Upper half of vdata0 */
#define vdata1 q2
#define vdata1_0 d4 /* Lower half of vhas_chr0 */
#define vdata1_1 d5 /* Upper half of vhas_chr0 */
#define vrepmask q3
#define vrepmask0 d6
#define vrepmask1 d7
#define vend q4
#define vend0 d8
#define vend1 d9
/*
* Core algorithm:
*
* For each 32-byte chunk we calculate a 32-bit syndrome value, with one bit per
* byte. Each bit is set if the relevant byte matched the requested character
* and cleared otherwise. Since the bits in the syndrome reflect exactly the
* order in which things occur in the original string, counting trailing zeros
* allows to identify exactly which byte has matched.
*/
.text
.thumb_func
.align 4
.p2align 4,,15
.global memchr
.type memchr,%function
memchr:
.cfi_sections .debug_frame
.cfi_startproc
/* Use a simple loop if there are less than 8 bytes to search. */
cmp cntin, #7
bhi .Llargestr
and chrin, chrin, #0xff
.Lsmallstr:
subs cntin, cntin, #1
blo .Lnotfound /* Return not found if reached end. */
ldrb tmp, [srcin], #1
cmp tmp, chrin
bne .Lsmallstr /* Loop again if not found. */
/* Otherwise fixup address and return. */
sub result, result, #1
bx lr
.Llargestr:
vdup.8 vrepchr, chrin /* Duplicate char across all lanes. */
/*
* Magic constant 0x8040201008040201 allows us to identify which lane
* matches the requested byte.
*/
movw tmp, #0x0201
movt tmp, #0x0804
lsl soff, tmp, #4
vmov vrepmask0, tmp, soff
vmov vrepmask1, tmp, soff
/* Work with aligned 32-byte chunks */
bic src, srcin, #31
ands soff, srcin, #31
beq .Lloopintro /* Go straight to main loop if it's aligned. */
/*
* Input string is not 32-byte aligned. We calculate the syndrome
* value for the aligned 32 bytes block containing the first bytes
* and mask the irrelevant part.
*/
vld1.8 {vdata0, vdata1}, [src:256]!
sub tmp, soff, #32
adds cntin, cntin, tmp
vceq.i8 vdata0, vdata0, vrepchr
vceq.i8 vdata1, vdata1, vrepchr
vand vdata0, vdata0, vrepmask
vand vdata1, vdata1, vrepmask
vpadd.i8 vdata0_0, vdata0_0, vdata0_1
vpadd.i8 vdata1_0, vdata1_0, vdata1_1
vpadd.i8 vdata0_0, vdata0_0, vdata1_0
vpadd.i8 vdata0_0, vdata0_0, vdata0_0
vmov synd, vdata0_0[0]
/* Clear the soff lower bits */
lsr synd, synd, soff
lsl synd, synd, soff
/* The first block can also be the last */
bls .Lmasklast
/* Have we found something already? */
cbnz synd, .Ltail
.Lloopintro:
vpush {vend}
/* 264/265 correspond to d8/d9 for q4 */
.cfi_adjust_cfa_offset 16
.cfi_rel_offset 264, 0
.cfi_rel_offset 265, 8
.p2align 3,,7
.Lloop:
vld1.8 {vdata0, vdata1}, [src:256]!
subs cntin, cntin, #32
vceq.i8 vdata0, vdata0, vrepchr
vceq.i8 vdata1, vdata1, vrepchr
/* If we're out of data we finish regardless of the result. */
bls .Lend
/* Use a fast check for the termination condition. */
vorr vend, vdata0, vdata1
vorr vend0, vend0, vend1
vmov synd, tmp, vend0
orrs synd, synd, tmp
/* We're not out of data, loop if we haven't found the character. */
beq .Lloop
.Lend:
vpop {vend}
.cfi_adjust_cfa_offset -16
.cfi_restore 264
.cfi_restore 265
/* Termination condition found, let's calculate the syndrome value. */
vand vdata0, vdata0, vrepmask
vand vdata1, vdata1, vrepmask
vpadd.i8 vdata0_0, vdata0_0, vdata0_1
vpadd.i8 vdata1_0, vdata1_0, vdata1_1
vpadd.i8 vdata0_0, vdata0_0, vdata1_0
vpadd.i8 vdata0_0, vdata0_0, vdata0_0
vmov synd, vdata0_0[0]
cbz synd, .Lnotfound
bhi .Ltail
.Lmasklast:
/* Clear the (-cntin) upper bits to avoid out-of-bounds matches. */
neg cntin, cntin
lsl synd, synd, cntin
lsrs synd, synd, cntin
it eq
moveq src, #0 /* If no match, set src to 0 so the retval is 0. */
.Ltail:
/* Count the trailing zeros using bit reversing */
rbit synd, synd
/* Compensate the last post-increment */
sub src, src, #32
/* Count the leading zeros */
clz synd, synd
/* Compute the potential result and return */
add result, src, synd
bx lr
.Lnotfound:
/* Set result to NULL if not found and return */
mov result, #0
bx lr
.cfi_endproc
.size memchr, . - memchr
#elif __ARM_ARCH_ISA_THUMB >= 2 && defined (__ARM_FEATURE_DSP)
#if __ARM_ARCH_PROFILE == 'M'
.arch armv7e-m
#else
.arch armv6t2
#endif
@ this lets us check a flag in a 00/ff byte easily in either endianness
#ifdef __ARMEB__
#define CHARTSTMASK(c) 1<<(31-(c*8))
#else
#define CHARTSTMASK(c) 1<<(c*8)
#endif
.text
.thumb
@ ---------------------------------------------------------------------------
.thumb_func
.align 2
.p2align 4,,15
.global memchr
.type memchr,%function
memchr:
@ r0 = start of memory to scan
@ r1 = character to look for
@ r2 = length
@ returns r0 = pointer to character or NULL if not found
and r1,r1,#0xff @ Don't trust the caller to pass a char
cmp r2,#16 @ If short don't bother with anything clever
blt 20f
tst r0, #7 @ If it's already aligned skip the next bit
beq 10f
@ Work up to an aligned point
5:
ldrb r3, [r0],#1
subs r2, r2, #1
cmp r3, r1
beq 50f @ If it matches exit found
tst r0, #7
cbz r2, 40f @ If we run off the end, exit not found
bne 5b @ If not aligned yet then do next byte
10:
@ We are aligned, we know we have at least 8 bytes to work with
push {r4,r5,r6,r7}
orr r1, r1, r1, lsl #8 @ expand the match word across all bytes
orr r1, r1, r1, lsl #16
bic r4, r2, #7 @ Number of double words to work with * 8
mvns r7, #0 @ all F's
movs r3, #0
15:
ldrd r5,r6,[r0],#8
subs r4, r4, #8
eor r5,r5, r1 @ r5,r6 have 00's where bytes match the target
eor r6,r6, r1
uadd8 r5, r5, r7 @ Par add 0xff - sets GE bits for bytes!=0
sel r5, r3, r7 @ bytes are 00 for none-00 bytes,
@ or ff for 00 bytes - NOTE INVERSION
uadd8 r6, r6, r7 @ Par add 0xff - sets GE bits for bytes!=0
sel r6, r5, r7 @ chained....bytes are 00 for none-00 bytes
@ or ff for 00 bytes - NOTE INVERSION
cbnz r6, 60f
bne 15b @ (Flags from the subs above)
pop {r4,r5,r6,r7}
and r1,r1,#0xff @ r1 back to a single character
and r2,r2,#7 @ Leave the count remaining as the number
@ after the double words have been done
20:
cbz r2, 40f @ 0 length or hit the end already then not found
21: @ Post aligned section, or just a short call
ldrb r3,[r0],#1
subs r2,r2,#1
eor r3,r3,r1 @ r3 = 0 if match - doesn't break flags from sub
cbz r3, 50f
bne 21b @ on r2 flags
40:
movs r0,#0 @ not found
bx lr
50:
subs r0,r0,#1 @ found
bx lr
60: @ We're here because the fast path found a hit
@ now we have to track down exactly which word it was
@ r0 points to the start of the double word after the one tested
@ r5 has the 00/ff pattern for the first word, r6 has the chained value
cmp r5, #0
itte eq
moveq r5, r6 @ the end is in the 2nd word
subeq r0,r0,#3 @ Points to 2nd byte of 2nd word
subne r0,r0,#7 @ or 2nd byte of 1st word
@ r0 currently points to the 2nd byte of the word containing the hit
tst r5, # CHARTSTMASK(0) @ 1st character
bne 61f
adds r0,r0,#1
tst r5, # CHARTSTMASK(1) @ 2nd character
ittt eq
addeq r0,r0,#1
tsteq r5, # (3<<15) @ 2nd & 3rd character
@ If not the 3rd must be the last one
addeq r0,r0,#1
61:
pop {r4,r5,r6,r7}
subs r0,r0,#1
bx lr
#else
/* Defined in memchr-stub.c. */
#endif
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,63 @@
/*
* Copyright (c) 2015 ARM Ltd
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the company may not be used to endorse or promote
* products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
.thumb
.syntax unified
.global memmove
.type memmove, %function
memmove:
cmp r0, r1
push {r4}
bls 3f
adds r3, r1, r2
cmp r0, r3
bcs 3f
adds r1, r0, r2
cbz r2, 2f
subs r2, r3, r2
1:
ldrb r4, [r3, #-1]!
cmp r2, r3
strb r4, [r1, #-1]!
bne 1b
2:
pop {r4}
bx lr
3:
cmp r2, #0
beq 2b
add r2, r2, r1
subs r3, r0, #1
4:
ldrb r4, [r1], #1
cmp r2, r1
strb r4, [r3, #1]!
bne 4b
pop {r4}
bx lr
.size memmove, . - memmove
@@ -0,0 +1,139 @@
/*
* Copyright (C) 2013 The Android Open Source Project
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
.arm
.syntax unified
.global memset
.type memset, %function
memset:
mov r3, r0
// At this point only d0, d1 are going to be used below.
vdup.8 q0, r1
cmp r2, #16
blo .L_set_less_than_16_unknown_align
.L_check_alignment:
// Align destination to a double word to avoid the store crossing
// a cache line boundary.
ands ip, r3, #7
bne .L_do_double_word_align
.L_double_word_aligned:
// Duplicate since the less than 64 can use d2, d3.
vmov q1, q0
subs r2, #64
blo .L_set_less_than_64
// Duplicate the copy value so that we can store 64 bytes at a time.
vmov q2, q0
vmov q3, q0
1: // Main loop stores 64 bytes at a time.
subs r2, #64
vstmia r3!, {d0 - d7}
bge 1b
.L_set_less_than_64:
// Restore r2 to the count of bytes left to set.
add r2, #64
lsls ip, r2, #27
bcc .L_set_less_than_32
// Set 32 bytes.
vstmia r3!, {d0 - d3}
.L_set_less_than_32:
bpl .L_set_less_than_16
// Set 16 bytes.
vstmia r3!, {d0, d1}
.L_set_less_than_16:
// Less than 16 bytes to set.
lsls ip, r2, #29
bcc .L_set_less_than_8
// Set 8 bytes.
vstmia r3!, {d0}
.L_set_less_than_8:
bpl .L_set_less_than_4
// Set 4 bytes
vst1.32 {d0[0]}, [r3]!
.L_set_less_than_4:
lsls ip, r2, #31
it ne
strbne r1, [r3], #1
itt cs
strbcs r1, [r3], #1
strbcs r1, [r3]
bx lr
.L_do_double_word_align:
rsb ip, ip, #8
sub r2, r2, ip
// Do this comparison now, otherwise we'll need to save a
// register to the stack since we've used all available
// registers.
cmp ip, #4
blo 1f
// Need to do a four byte copy.
movs ip, ip, lsl #31
it mi
strbmi r1, [r3], #1
itt cs
strbcs r1, [r3], #1
strbcs r1, [r3], #1
vst1.32 {d0[0]}, [r3]!
b .L_double_word_aligned
1:
// No four byte copy.
movs ip, ip, lsl #31
it mi
strbmi r1, [r3], #1
itt cs
strbcs r1, [r3], #1
strbcs r1, [r3], #1
b .L_double_word_aligned
.L_set_less_than_16_unknown_align:
// Set up to 15 bytes.
movs ip, r2, lsl #29
bcc 1f
vst1.8 {d0}, [r3]!
1: bge 2f
vst1.32 {d0[0]}, [r3]!
2: movs ip, r2, lsl #31
it mi
strbmi r1, [r3], #1
itt cs
strbcs r1, [r3], #1
strbcs r1, [r3], #1
bx lr
.size memset, . - memset
@@ -0,0 +1,300 @@
/*
* Copyright (c) 2011 The Android Open Source Project
* Copyright (c) 2008 ARM Ltd
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the company may not be used to endorse or promote
* products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef __ARMEB__
#define SHFT2LSB lsl
#define SHFT2LSBEQ lsleq
#define SHFT2MSB lsr
#define SHFT2MSBEQ lsreq
#define MSB 0x000000ff
#define LSB 0xff000000
#else
#define SHFT2LSB lsr
#define SHFT2LSBEQ lsreq
#define SHFT2MSB lsl
#define SHFT2MSBEQ lsleq
#define MSB 0xff000000
#define LSB 0x000000ff
#endif
#define magic1(REG) REG
#define magic2(REG) REG, lsl #7
.arm
.syntax unified
.global strcmp
.type strcmp, %function
strcmp:
pld [r0, #0]
pld [r1, #0]
eor r2, r0, r1
tst r2, #3
/* Strings not at same byte offset from a word boundary. */
bne .Lstrcmp_unaligned
ands r2, r0, #3
bic r0, r0, #3
bic r1, r1, #3
ldr ip, [r0], #4
it eq
ldreq r3, [r1], #4
beq 1f
/* Although s1 and s2 have identical initial alignment, they are
* not currently word aligned. Rather than comparing bytes,
* make sure that any bytes fetched from before the addressed
* bytes are forced to 0xff. Then they will always compare
* equal.
*/
eor r2, r2, #3
lsl r2, r2, #3
mvn r3, #MSB
SHFT2LSB r2, r3, r2
ldr r3, [r1], #4
orr ip, ip, r2
orr r3, r3, r2
1:
/* Load the 'magic' constant 0x01010101. */
str r4, [sp, #-4]!
mov r4, #1
orr r4, r4, r4, lsl #8
orr r4, r4, r4, lsl #16
.p2align 2
4:
pld [r0, #8]
pld [r1, #8]
sub r2, ip, magic1(r4)
cmp ip, r3
itttt eq
/* check for any zero bytes in first word */
biceq r2, r2, ip
tsteq r2, magic2(r4)
ldreq ip, [r0], #4
ldreq r3, [r1], #4
beq 4b
2:
/* There's a zero or a different byte in the word */
SHFT2MSB r0, ip, #24
SHFT2LSB ip, ip, #8
cmp r0, #1
it cs
cmpcs r0, r3, SHFT2MSB #24
it eq
SHFT2LSBEQ r3, r3, #8
beq 2b
/* On a big-endian machine, r0 contains the desired byte in bits
* 0-7; on a little-endian machine they are in bits 24-31. In
* both cases the other bits in r0 are all zero. For r3 the
* interesting byte is at the other end of the word, but the
* other bits are not necessarily zero. We need a signed result
* representing the differnece in the unsigned bytes, so for the
* little-endian case we can't just shift the interesting bits up.
*/
#ifdef __ARMEB__
sub r0, r0, r3, lsr #24
#else
and r3, r3, #255
/* No RSB instruction in Thumb2 */
#ifdef __thumb2__
lsr r0, r0, #24
sub r0, r0, r3
#else
rsb r0, r3, r0, lsr #24
#endif
#endif
ldr r4, [sp], #4
bx lr
.Lstrcmp_unaligned:
wp1 .req r0
wp2 .req r1
b1 .req r2
w1 .req r4
w2 .req r5
t1 .req ip
@ r3 is scratch
/* First of all, compare bytes until wp1(sp1) is word-aligned. */
1:
tst wp1, #3
beq 2f
ldrb r2, [wp1], #1
ldrb r3, [wp2], #1
cmp r2, #1
it cs
cmpcs r2, r3
beq 1b
sub r0, r2, r3
bx lr
2:
str r5, [sp, #-4]!
str r4, [sp, #-4]!
mov b1, #1
orr b1, b1, b1, lsl #8
orr b1, b1, b1, lsl #16
and t1, wp2, #3
bic wp2, wp2, #3
ldr w1, [wp1], #4
ldr w2, [wp2], #4
cmp t1, #2
beq 2f
bhi 3f
/* Critical inner Loop: Block with 3 bytes initial overlap */
.p2align 2
1:
bic t1, w1, #MSB
cmp t1, w2, SHFT2LSB #8
sub r3, w1, b1
bic r3, r3, w1
bne 4f
ands r3, r3, b1, lsl #7
it eq
ldreq w2, [wp2], #4
bne 5f
eor t1, t1, w1
cmp t1, w2, SHFT2MSB #24
bne 6f
ldr w1, [wp1], #4
b 1b
4:
SHFT2LSB w2, w2, #8
b 8f
5:
#ifdef __ARMEB__
/* The syndrome value may contain false ones if the string ends
* with the bytes 0x01 0x00
*/
tst w1, #0xff000000
itt ne
tstne w1, #0x00ff0000
tstne w1, #0x0000ff00
beq 7f
#else
bics r3, r3, #0xff000000
bne 7f
#endif
ldrb w2, [wp2]
SHFT2LSB t1, w1, #24
#ifdef __ARMEB__
lsl w2, w2, #24
#endif
b 8f
6:
SHFT2LSB t1, w1, #24
and w2, w2, #LSB
b 8f
/* Critical inner Loop: Block with 2 bytes initial overlap */
.p2align 2
2:
SHFT2MSB t1, w1, #16
sub r3, w1, b1
SHFT2LSB t1, t1, #16
bic r3, r3, w1
cmp t1, w2, SHFT2LSB #16
bne 4f
ands r3, r3, b1, lsl #7
it eq
ldreq w2, [wp2], #4
bne 5f
eor t1, t1, w1
cmp t1, w2, SHFT2MSB #16
bne 6f
ldr w1, [wp1], #4
b 2b
5:
#ifdef __ARMEB__
/* The syndrome value may contain false ones if the string ends
* with the bytes 0x01 0x00
*/
tst w1, #0xff000000
it ne
tstne w1, #0x00ff0000
beq 7f
#else
lsls r3, r3, #16
bne 7f
#endif
ldrh w2, [wp2]
SHFT2LSB t1, w1, #16
#ifdef __ARMEB__
lsl w2, w2, #16
#endif
b 8f
6:
SHFT2MSB w2, w2, #16
SHFT2LSB t1, w1, #16
4:
SHFT2LSB w2, w2, #16
b 8f
/* Critical inner Loop: Block with 1 byte initial overlap */
.p2align 2
3:
and t1, w1, #LSB
cmp t1, w2, SHFT2LSB #24
sub r3, w1, b1
bic r3, r3, w1
bne 4f
ands r3, r3, b1, lsl #7
it eq
ldreq w2, [wp2], #4
bne 5f
eor t1, t1, w1
cmp t1, w2, SHFT2MSB #8
bne 6f
ldr w1, [wp1], #4
b 3b
4:
SHFT2LSB w2, w2, #24
b 8f
5:
/* The syndrome value may contain false ones if the string ends
* with the bytes 0x01 0x00
*/
tst w1, #LSB
beq 7f
ldr w2, [wp2], #4
6:
SHFT2LSB t1, w1, #8
bic w2, w2, #MSB
b 8f
7:
mov r0, #0
ldr r4, [sp], #4
ldr r5, [sp], #4
bx lr
8:
and r2, t1, #LSB
and r0, w2, #LSB
cmp r0, #1
it cs
cmpcs r0, r2
itt eq
SHFT2LSBEQ t1, t1, #8
SHFT2LSBEQ w2, w2, #8
beq 8b
sub r0, r2, r0
ldr r4, [sp], #4
ldr r5, [sp], #4
bx lr
.size strcmp, . - strcmp
@@ -0,0 +1,179 @@
/* Copyright (c) 2010-2011,2013 Linaro Limited
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of Linaro Limited nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Assumes:
ARMv6T2 or ARMv7E-M, AArch32
*/
/* Copyright (c) 2015 ARM Ltd.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Linaro nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
#include "acle-compat.h"
.macro def_fn f p2align=0
.text
.p2align \p2align
.global \f
.type \f, %function
\f:
.endm
#ifdef __ARMEB__
#define S2LO lsl
#define S2HI lsr
#else
#define S2LO lsr
#define S2HI lsl
#endif
/* This code requires Thumb. */
#if __ARM_ARCH_PROFILE == 'M'
.arch armv7e-m
#else
.arch armv6t2
#endif
.eabi_attribute Tag_ARM_ISA_use, 0
.thumb
.syntax unified
/* Parameters and result. */
#define srcin r0
#define result r0
/* Internal variables. */
#define src r1
#define data1a r2
#define data1b r3
#define const_m1 r12
#define const_0 r4
#define tmp1 r4 /* Overlaps const_0 */
#define tmp2 r5
def_fn strlen p2align=6
pld [srcin, #0]
strd r4, r5, [sp, #-8]!
bic src, srcin, #7
mvn const_m1, #0
ands tmp1, srcin, #7 /* (8 - bytes) to alignment. */
pld [src, #32]
bne.w .Lmisaligned8
mov const_0, #0
mov result, #-8
.Lloop_aligned:
/* Bytes 0-7. */
ldrd data1a, data1b, [src]
pld [src, #64]
add result, result, #8
.Lstart_realigned:
uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */
uadd8 data1b, data1b, const_m1
sel data1b, data1a, const_m1 /* Only used if d1a == 0. */
cbnz data1b, .Lnull_found
/* Bytes 8-15. */
ldrd data1a, data1b, [src, #8]
uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
add result, result, #8
sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */
uadd8 data1b, data1b, const_m1
sel data1b, data1a, const_m1 /* Only used if d1a == 0. */
cbnz data1b, .Lnull_found
/* Bytes 16-23. */
ldrd data1a, data1b, [src, #16]
uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
add result, result, #8
sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */
uadd8 data1b, data1b, const_m1
sel data1b, data1a, const_m1 /* Only used if d1a == 0. */
cbnz data1b, .Lnull_found
/* Bytes 24-31. */
ldrd data1a, data1b, [src, #24]
add src, src, #32
uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
add result, result, #8
sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */
uadd8 data1b, data1b, const_m1
sel data1b, data1a, const_m1 /* Only used if d1a == 0. */
cmp data1b, #0
beq .Lloop_aligned
.Lnull_found:
cmp data1a, #0
itt eq
addeq result, result, #4
moveq data1a, data1b
#ifndef __ARMEB__
rev data1a, data1a
#endif
clz data1a, data1a
ldrd r4, r5, [sp], #8
add result, result, data1a, lsr #3 /* Bits -> Bytes. */
bx lr
.Lmisaligned8:
ldrd data1a, data1b, [src]
and tmp2, tmp1, #3
rsb result, tmp1, #0
lsl tmp2, tmp2, #3 /* Bytes -> bits. */
tst tmp1, #4
pld [src, #64]
S2HI tmp2, const_m1, tmp2
orn data1a, data1a, tmp2
itt ne
ornne data1b, data1b, tmp2
movne data1a, const_m1
mov const_0, #0
b .Lstart_realigned
.size strlen, . - strlen