newlib: libc: memcpy M-profile PACBTI-enablement

Add function prologue/epilogue to conditionally add BTI landing pads
and/or PAC code generation & authentication instructions depending on
compilation flags.

Signed-off-by: chao an <anchao@xiaomi.com>
This commit is contained in:
chao an
2023-04-26 17:37:30 +08:00
committed by Xiang Xiao
parent d197ca9967
commit 9fa097ab69
16 changed files with 980 additions and 270 deletions
+3
View File
@@ -65,3 +65,6 @@ endif
DEPPATH += --dep-path machine/arm DEPPATH += --dep-path machine/arm
VPATH += :machine/arm VPATH += :machine/arm
AFLAGS += ${INCDIR_PREFIX}$(TOPDIR)$(DELIM)libs$(DELIM)libc$(DELIM)machine$(DELIM)arm
CFLAGS += ${INCDIR_PREFIX}$(TOPDIR)$(DELIM)libs$(DELIM)libc$(DELIM)machine$(DELIM)arm
+188
View File
@@ -0,0 +1,188 @@
/*
* libs/libc/machine/arm/arm-acle-compat.h
*
* Copyright (c) 2014 ARM Ltd
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the company may not be used to endorse or promote
* products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __LIBS_LIBC_MACHINE_ARM_ARM_ACLE_COMPAT_H
#define __LIBS_LIBC_MACHINE_ARM_ARM_ACLE_COMPAT_H
#ifndef __ARM_ARCH
/* ACLE standardises a set of pre-defines that describe the ARM architecture.
These were mostly implemented in GCC around GCC-4.8; older versions
have no, or only partial support. To provide a level of backwards
compatibility we try to work out what the definitions should be, given
the older pre-defines that GCC did produce. This isn't complete, but
it should be enough for use by routines that depend on this header. */
/* No need to handle ARMv8, GCC had ACLE support before that. */
# ifdef __ARM_ARCH_7__
/* The common subset of ARMv7 in all profiles. */
# define __ARM_ARCH 7
# define __ARM_ARCH_ISA_THUMB 2
# define __ARM_FEATURE_CLZ
# define __ARM_FEATURE_LDREX 7
# define __ARM_FEATURE_UNALIGNED
# endif
# if defined (__ARM_ARCH_7A__) || defined (__ARM_ARCH_7R__)
# define __ARM_ARCH 7
# define __ARM_ARCH_ISA_THUMB 2
# define __ARM_ARCH_ISA_ARM
# define __ARM_FEATURE_CLZ
# define __ARM_FEATURE_SIMD32
# define __ARM_FEATURE_DSP
# define __ARM_FEATURE_QBIT
# define __ARM_FEATURE_SAT
# define __ARM_FEATURE_LDREX 15
# define __ARM_FEATURE_UNALIGNED
# ifdef __ARM_ARCH_7A__
# define __ARM_ARCH_PROFILE 'A'
# else
# define __ARM_ARCH_PROFILE 'R'
# endif
# endif
# ifdef __ARM_ARCH_7EM__
# define __ARM_ARCH 7
# define __ARM_ARCH_ISA_THUMB 2
# define __ARM_FEATURE_CLZ
# define __ARM_FEATURE_SIMD32
# define __ARM_FEATURE_DSP
# define __ARM_FEATURE_QBIT
# define __ARM_FEATURE_SAT
# define __ARM_FEATURE_LDREX 7
# define __ARM_FEATURE_UNALIGNED
# define __ARM_ARCH_PROFILE 'M'
# endif
# ifdef __ARM_ARCH_7M__
# define __ARM_ARCH 7
# define __ARM_ARCH_ISA_THUMB 2
# define __ARM_FEATURE_CLZ
# define __ARM_FEATURE_QBIT
# define __ARM_FEATURE_SAT
# define __ARM_FEATURE_LDREX 7
# define __ARM_FEATURE_UNALIGNED
# define __ARM_ARCH_PROFILE 'M'
# endif
# ifdef __ARM_ARCH_6T2__
# define __ARM_ARCH 6
# define __ARM_ARCH_ISA_THUMB 2
# define __ARM_ARCH_ISA_ARM
# define __ARM_FEATURE_CLZ
# define __ARM_FEATURE_SIMD32
# define __ARM_FEATURE_DSP
# define __ARM_FEATURE_QBIT
# define __ARM_FEATURE_SAT
# define __ARM_FEATURE_LDREX 4
# define __ARM_FEATURE_UNALIGNED
# endif
# ifdef __ARM_ARCH_6M__
# define __ARM_ARCH 6
# define __ARM_ARCH_ISA_THUMB 1
# define __ARM_ARCH_PROFILE 'M'
# endif
# if defined (__ARM_ARCH_6__) || defined (__ARM_ARCH_6J__) \
|| defined (__ARM_ARCH_6K__) || defined (__ARM_ARCH_6Z__) \
|| defined (__ARM_ARCH_6ZK__)
# define __ARM_ARCH 6
# define __ARM_ARCH_ISA_THUMB 1
# define __ARM_ARCH_ISA_ARM
# define __ARM_FEATURE_CLZ
# define __ARM_FEATURE_SIMD32
# define __ARM_FEATURE_DSP
# define __ARM_FEATURE_QBIT
# define __ARM_FEATURE_SAT
# define __ARM_FEATURE_UNALIGNED
# ifndef __thumb__
# if defined (__ARM_ARCH_6K__) || defined (__ARM_ARCH_6ZK__)
# define __ARM_FEATURE_LDREX 15
# else
# define __ARM_FEATURE_LDREX 4
# endif
# endif
# endif
# if defined (__ARM_ARCH_5TE__) || defined (__ARM_ARCH_5E__)
# define __ARM_ARCH 5
# define __ARM_ARCH_ISA_ARM
# ifdef __ARM_ARCH_5TE__
# define __ARM_ARCH_ISA_THUMB 1
# endif
# define __ARM_FEATURE_CLZ
# define __ARM_FEATURE_DSP
# endif
# if defined (__ARM_ARCH_5T__) || defined (__ARM_ARCH_5__)
# define __ARM_ARCH 5
# define __ARM_ARCH_ISA_ARM
# ifdef __ARM_ARCH_5TE__
# define __ARM_ARCH_ISA_THUMB 1
# endif
# define __ARM_FEATURE_CLZ
# endif
# ifdef __ARM_ARCH_4T__
# define __ARM_ARCH 4
# define __ARM_ARCH_ISA_ARM
# define __ARM_ARCH_ISA_THUMB 1
# endif
# ifdef __ARM_ARCH_4__
# define __ARM_ARCH 4
# define __ARM_ARCH_ISA_ARM
# endif
# if defined (__ARM_ARCH_3__) || defined (__ARM_ARCH_3M__)
# define __ARM_ARCH 3
# define __ARM_ARCH_ISA_ARM
# endif
# ifdef __ARM_ARCH_2__
# define __ARM_ARCH 2
# define __ARM_ARCH_ISA_ARM
# endif
# ifdef __ARMEB__
# define __ARM_BIG_ENDIAN
# endif
/* If we still don't know what the target architecture is, then we're
probably not using GCC. */
# ifndef __ARM_ARCH
# error Unable to determine architecture version.
# endif
#endif /* __ARM_ARCH */
#endif /* __LIBS_LIBC_MACHINE_ARM_ARM_ACLE_COMPAT_H */
File diff suppressed because it is too large Load Diff
@@ -1,183 +0,0 @@
/****************************************************************************
* libs/libc/machine/arm/armv7-m/gnu/acle-compat.h
*
* Copyright (c) 2014 ARM Ltd
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the company may not be used to endorse or promote
* products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
****************************************************************************/
#ifndef __LIBS_LIBC_MACHINE_ARM_ARMV7M_GNU_ACLE_COMPAT_H
#define __LIBS_LIBC_MACHINE_ARM_ARMV7M_GNU_ACLE_COMPAT_H
#ifndef __ARM_ARCH
/* ACLE standardises a set of pre-defines that describe the ARM architecture.
* These were mostly implemented in GCC around GCC-4.8; older versions
* have no, or only partial support. To provide a level of backwards
* compatibility we try to work out what the definitions should be, given
* the older pre-defines that GCC did produce. This isn't complete, but
* it should be enough for use by routines that depend on this header.
*/
/* No need to handle ARMv8, GCC had ACLE support before that. */
# ifdef __ARM_ARCH_7__
/* The common subset of ARMv7 in all profiles. */
# define __ARM_ARCH 7
# define __ARM_ARCH_ISA_THUMB 2
# define __ARM_FEATURE_CLZ
# define __ARM_FEATURE_LDREX 7
# define __ARM_FEATURE_UNALIGNED
# endif
# if defined (__ARM_ARCH_7A__) || defined (__ARM_ARCH_7R__)
# define __ARM_ARCH 7
# define __ARM_ARCH_ISA_THUMB 2
# define __ARM_ARCH_ISA_ARM
# define __ARM_FEATURE_CLZ
# define __ARM_FEATURE_SIMD32
# define __ARM_FEATURE_DSP
# define __ARM_FEATURE_QBIT
# define __ARM_FEATURE_SAT
# define __ARM_FEATURE_LDREX 15
# define __ARM_FEATURE_UNALIGNED
# ifdef __ARM_ARCH_7A__
# define __ARM_ARCH_PROFILE 'A'
# else
# define __ARM_ARCH_PROFILE 'R'
# endif
# endif
# ifdef __ARM_ARCH_7EM__
# define __ARM_ARCH 7
# define __ARM_ARCH_ISA_THUMB 2
# define __ARM_FEATURE_CLZ
# define __ARM_FEATURE_SIMD32
# define __ARM_FEATURE_DSP
# define __ARM_FEATURE_QBIT
# define __ARM_FEATURE_SAT
# define __ARM_FEATURE_LDREX 7
# define __ARM_FEATURE_UNALIGNED
# define __ARM_ARCH_PROFILE 'M'
# endif
# ifdef __ARM_ARCH_7M__
# define __ARM_ARCH 7
# define __ARM_ARCH_ISA_THUMB 2
# define __ARM_FEATURE_CLZ
# define __ARM_FEATURE_QBIT
# define __ARM_FEATURE_SAT
# define __ARM_FEATURE_LDREX 7
# define __ARM_FEATURE_UNALIGNED
# define __ARM_ARCH_PROFILE 'M'
# endif
# ifdef __ARM_ARCH_6T2__
# define __ARM_ARCH 6
# define __ARM_ARCH_ISA_THUMB 2
# define __ARM_ARCH_ISA_ARM
# define __ARM_FEATURE_CLZ
# define __ARM_FEATURE_SIMD32
# define __ARM_FEATURE_DSP
# define __ARM_FEATURE_QBIT
# define __ARM_FEATURE_SAT
# define __ARM_FEATURE_LDREX 4
# define __ARM_FEATURE_UNALIGNED
# endif
# ifdef __ARM_ARCH_6M__
# define __ARM_ARCH 6
# define __ARM_ARCH_ISA_THUMB 1
# define __ARM_ARCH_PROFILE 'M'
# endif
# if defined (__ARM_ARCH_6__) || defined (__ARM_ARCH_6J__) \
|| defined (__ARM_ARCH_6K__) || defined (__ARM_ARCH_6Z__) \
|| defined (__ARM_ARCH_6ZK__)
# define __ARM_ARCH 6
# define __ARM_ARCH_ISA_THUMB 1
# define __ARM_ARCH_ISA_ARM
# define __ARM_FEATURE_CLZ
# define __ARM_FEATURE_SIMD32
# define __ARM_FEATURE_DSP
# define __ARM_FEATURE_QBIT
# define __ARM_FEATURE_SAT
# define __ARM_FEATURE_UNALIGNED
# ifndef __thumb__
# if defined (__ARM_ARCH_6K__) || defined (__ARM_ARCH_6ZK__)
# define __ARM_FEATURE_LDREX 15
# else
# define __ARM_FEATURE_LDREX 4
# endif
# endif
# endif
# if defined (__ARM_ARCH_5TE__) || defined (__ARM_ARCH_5E__)
# define __ARM_ARCH 5
# define __ARM_ARCH_ISA_ARM
# ifdef __ARM_ARCH_5TE__
# define __ARM_ARCH_ISA_THUMB 1
# endif
# define __ARM_FEATURE_CLZ
# define __ARM_FEATURE_DSP
# endif
# if defined (__ARM_ARCH_5T__) || defined (__ARM_ARCH_5__)
# define __ARM_ARCH 5
# define __ARM_ARCH_ISA_ARM
# ifdef __ARM_ARCH_5TE__
# define __ARM_ARCH_ISA_THUMB 1
# endif
# define __ARM_FEATURE_CLZ
# endif
# ifdef __ARM_ARCH_4T__
# define __ARM_ARCH 4
# define __ARM_ARCH_ISA_ARM
# define __ARM_ARCH_ISA_THUMB 1
# endif
# ifdef __ARM_ARCH_4__
# define __ARM_ARCH 4
# define __ARM_ARCH_ISA_ARM
# endif
# if defined (__ARM_ARCH_3__) || defined (__ARM_ARCH_3M__)
# define __ARM_ARCH 3
# define __ARM_ARCH_ISA_ARM
# endif
# ifdef __ARM_ARCH_2__
# define __ARM_ARCH 2
# define __ARM_ARCH_ISA_ARM
# endif
# ifdef __ARMEB__
# define __ARM_BIG_ENDIAN
# endif
#endif
#endif /* __LIBS_LIBC_MACHINE_ARM_ARMV7M_GNU_ACLE_COMPAT_H */
@@ -80,7 +80,8 @@
.syntax unified .syntax unified
#include "acle-compat.h" #include "arm-acle-compat.h"
#include "arm_asm.h"
@ NOTE: This ifdef MUST match the one in memchr-stub.c @ NOTE: This ifdef MUST match the one in memchr-stub.c
#if defined (__ARM_NEON__) || defined (__ARM_NEON) #if defined (__ARM_NEON__) || defined (__ARM_NEON)
@@ -272,10 +273,14 @@ memchr:
#elif __ARM_ARCH_ISA_THUMB >= 2 && defined (__ARM_FEATURE_DSP) #elif __ARM_ARCH_ISA_THUMB >= 2 && defined (__ARM_FEATURE_DSP)
#if __ARM_ARCH_PROFILE == 'M' #if __ARM_ARCH_PROFILE == 'M'
.arch armv7e-m #if __ARM_ARCH >= 8
/* keep config inherited from -march=. */
#else #else
.arch armv6t2 .arch armv7e-m
#endif #endif /* __ARM_ARCH >= 8 */
#else
.arch armv6t2
#endif /* __ARM_ARCH_PROFILE == 'M' */
@ this lets us check a flag in a 00/ff byte easily in either endianness @ this lets us check a flag in a 00/ff byte easily in either endianness
#ifdef __ARMEB__ #ifdef __ARMEB__
@@ -292,11 +297,14 @@ memchr:
.p2align 4,,15 .p2align 4,,15
.global memchr .global memchr
.type memchr,%function .type memchr,%function
.fnstart
.cfi_startproc
memchr: memchr:
@ r0 = start of memory to scan @ r0 = start of memory to scan
@ r1 = character to look for @ r1 = character to look for
@ r2 = length @ r2 = length
@ returns r0 = pointer to character or NULL if not found @ returns r0 = pointer to character or NULL if not found
prologue
and r1,r1,#0xff @ Don't trust the caller to pass a char and r1,r1,#0xff @ Don't trust the caller to pass a char
cmp r2,#16 @ If short don't bother with anything clever cmp r2,#16 @ If short don't bother with anything clever
@@ -318,6 +326,11 @@ memchr:
10: 10:
@ We are aligned, we know we have at least 8 bytes to work with @ We are aligned, we know we have at least 8 bytes to work with
push {r4,r5,r6,r7} push {r4,r5,r6,r7}
.cfi_adjust_cfa_offset 16
.cfi_rel_offset 4, 0
.cfi_rel_offset 5, 4
.cfi_rel_offset 6, 8
.cfi_rel_offset 7, 12
orr r1, r1, r1, lsl #8 @ expand the match word across all bytes orr r1, r1, r1, lsl #8 @ expand the match word across all bytes
orr r1, r1, r1, lsl #16 orr r1, r1, r1, lsl #16
bic r4, r2, #7 @ Number of double words to work with * 8 bic r4, r2, #7 @ Number of double words to work with * 8
@@ -339,6 +352,11 @@ memchr:
bne 15b @ (Flags from the subs above) bne 15b @ (Flags from the subs above)
pop {r4,r5,r6,r7} pop {r4,r5,r6,r7}
.cfi_restore 7
.cfi_restore 6
.cfi_restore 5
.cfi_restore 4
.cfi_adjust_cfa_offset -16
and r1,r1,#0xff @ r1 back to a single character and r1,r1,#0xff @ r1 back to a single character
and r2,r2,#7 @ Leave the count remaining as the number and r2,r2,#7 @ Leave the count remaining as the number
@ after the double words have been done @ after the double words have been done
@@ -354,17 +372,29 @@ memchr:
bne 21b @ on r2 flags bne 21b @ on r2 flags
40: 40:
.cfi_remember_state
movs r0,#0 @ not found movs r0,#0 @ not found
bx lr epilogue
50: 50:
.cfi_restore_state
.cfi_remember_state
subs r0,r0,#1 @ found subs r0,r0,#1 @ found
bx lr epilogue
60: @ We're here because the fast path found a hit 60: @ We're here because the fast path found a hit
@ now we have to track down exactly which word it was @ now we have to track down exactly which word it was
@ r0 points to the start of the double word after the one tested @ r0 points to the start of the double word after the one tested
@ r5 has the 00/ff pattern for the first word, r6 has the chained value @ r5 has the 00/ff pattern for the first word, r6 has the chained value
@ This point is reached from cbnz midway through label 15 prior to
@ popping r4-r7 off the stack. .cfi_restore_state alone disregards
@ this, so we manually correct this.
.cfi_restore_state @ Standard post-prologue state
.cfi_adjust_cfa_offset 16
.cfi_rel_offset 4, 0
.cfi_rel_offset 5, 4
.cfi_rel_offset 6, 8
.cfi_rel_offset 7, 12
cmp r5, #0 cmp r5, #0
itte eq itte eq
moveq r5, r6 @ the end is in the 2nd word moveq r5, r6 @ the end is in the 2nd word
@@ -384,8 +414,16 @@ memchr:
61: 61:
pop {r4,r5,r6,r7} pop {r4,r5,r6,r7}
.cfi_restore 7
.cfi_restore 6
.cfi_restore 5
.cfi_restore 4
.cfi_adjust_cfa_offset -16
subs r0,r0,#1 subs r0,r0,#1
bx lr epilogue
.cfi_endproc
.cantunwind
.fnend
#else #else
/* Defined in memchr-stub.c. */ /* Defined in memchr-stub.c. */
#endif #endif
+25 -10
View File
@@ -49,6 +49,8 @@
__OPT_BIG_BLOCK_SIZE: Size of big block in words. Default to 64. __OPT_BIG_BLOCK_SIZE: Size of big block in words. Default to 64.
__OPT_MID_BLOCK_SIZE: Size of big block in words. Default to 16. __OPT_MID_BLOCK_SIZE: Size of big block in words. Default to 16.
*/ */
#include "arm_asm.h"
#ifndef __OPT_BIG_BLOCK_SIZE #ifndef __OPT_BIG_BLOCK_SIZE
#define __OPT_BIG_BLOCK_SIZE (4 * 16) #define __OPT_BIG_BLOCK_SIZE (4 * 16)
#endif #endif
@@ -88,6 +90,8 @@
.global memcpy .global memcpy
.thumb .thumb
.thumb_func .thumb_func
.fnstart
.cfi_startproc
.type memcpy, %function .type memcpy, %function
memcpy: memcpy:
@ r0: dst @ r0: dst
@@ -96,10 +100,11 @@ memcpy:
#ifdef __ARM_FEATURE_UNALIGNED #ifdef __ARM_FEATURE_UNALIGNED
/* In case of UNALIGNED access supported, ip is not used in /* In case of UNALIGNED access supported, ip is not used in
function body. */ function body. */
prologue push_ip=HAVE_PAC_LEAF
mov ip, r0 mov ip, r0
#else #else
push {r0} prologue 0 push_ip=HAVE_PAC_LEAF
#endif #endif /* __ARM_FEATURE_UNALIGNED */
orr r3, r1, r0 orr r3, r1, r0
ands r3, r3, #3 ands r3, r3, #3
bne .Lmisaligned_copy bne .Lmisaligned_copy
@@ -181,15 +186,17 @@ memcpy:
#endif /* __ARM_FEATURE_UNALIGNED */ #endif /* __ARM_FEATURE_UNALIGNED */
.Ldone: .Ldone:
.cfi_remember_state
#ifdef __ARM_FEATURE_UNALIGNED #ifdef __ARM_FEATURE_UNALIGNED
mov r0, ip mov r0, ip
epilogue push_ip=HAVE_PAC_LEAF
#else #else
pop {r0} epilogue 0 push_ip=HAVE_PAC_LEAF
#endif #endif /* __ARM_FEATURE_UNALIGNED */
bx lr
.align 2 .align 2
.Lmisaligned_copy: .Lmisaligned_copy:
.cfi_restore_state
#ifdef __ARM_FEATURE_UNALIGNED #ifdef __ARM_FEATURE_UNALIGNED
/* Define label DST_ALIGNED to BIG_BLOCK. It will go to aligned copy /* Define label DST_ALIGNED to BIG_BLOCK. It will go to aligned copy
once destination is adjusted to aligned. */ once destination is adjusted to aligned. */
@@ -250,6 +257,9 @@ memcpy:
/* dst is aligned, but src isn't. Misaligned copy. */ /* dst is aligned, but src isn't. Misaligned copy. */
push {r4, r5} push {r4, r5}
.cfi_adjust_cfa_offset 8
.cfi_rel_offset 4, 0
.cfi_rel_offset 5, 4
subs r2, #4 subs r2, #4
/* Backward r1 by misaligned bytes, to make r1 aligned. /* Backward r1 by misaligned bytes, to make r1 aligned.
@@ -302,6 +312,9 @@ memcpy:
adds r2, #4 adds r2, #4
subs r1, ip subs r1, ip
pop {r4, r5} pop {r4, r5}
.cfi_restore 4
.cfi_restore 5
.cfi_adjust_cfa_offset -8
#endif /* __ARM_FEATURE_UNALIGNED */ #endif /* __ARM_FEATURE_UNALIGNED */
@@ -324,9 +337,11 @@ memcpy:
#ifdef __ARM_FEATURE_UNALIGNED #ifdef __ARM_FEATURE_UNALIGNED
mov r0, ip mov r0, ip
epilogue push_ip=HAVE_PAC_LEAF
#else #else
pop {r0} epilogue 0 push_ip=HAVE_PAC_LEAF
#endif #endif /* __ARM_FEATURE_UNALIGNED */
bx lr .cfi_endproc
.cantunwind
.size memcpy, .-memcpy .fnend
.size memcpy, .-memcpy
@@ -29,13 +29,17 @@
* *
****************************************************************************/ ****************************************************************************/
#include "arm_asm.h"
.thumb .thumb
.syntax unified .syntax unified
.global memmove .global memmove
.type memmove, %function .type memmove, %function
memmove: memmove:
.fnstart
.cfi_startproc
prologue 4
cmp r0, r1 cmp r0, r1
push {r4}
bls 3f bls 3f
adds r3, r1, r2 adds r3, r1, r2
cmp r0, r3 cmp r0, r3
@@ -49,9 +53,10 @@ memmove:
strb r4, [r1, #-1]! strb r4, [r1, #-1]!
bne 1b bne 1b
2: 2:
pop {r4} .cfi_remember_state
bx lr epilogue 4
3: 3:
.cfi_restore_state
cmp r2, #0 cmp r2, #0
beq 2b beq 2b
add r2, r2, r1 add r2, r2, r1
@@ -61,6 +66,8 @@ memmove:
cmp r2, r1 cmp r2, r1
strb r4, [r3, #1]! strb r4, [r3, #1]!
bne 4b bne 4b
pop {r4} epilogue 4
bx lr .cfi_endproc
.size memmove, . - memmove .cantunwind
.fnend
.size memmove, . - memmove
@@ -29,12 +29,16 @@
* *
****************************************************************************/ ****************************************************************************/
#include "arm_asm.h"
.thumb .thumb
.syntax unified .syntax unified
.global memset .global memset
.type memset, %function .type memset, %function
.fnstart
.cfi_startproc
memset: memset:
push {r4, r5, r6} prologue 4 6
lsls r4, r0, #30 lsls r4, r0, #30
beq 10f beq 10f
subs r4, r2, #1 subs r4, r2, #1
@@ -99,10 +103,14 @@ memset:
cmp r3, r4 cmp r3, r4
bne 8b bne 8b
9: 9:
pop {r4, r5, r6} .cfi_remember_state
bx lr epilogue 4 6
10: 10:
.cfi_restore_state
mov r4, r2 mov r4, r2
mov r3, r0 mov r3, r0
b 3b b 3b
.size memset, . - memset .cfi_endproc
.cantunwind
.fnend
.size memset, . - memset
+21 -11
View File
@@ -32,6 +32,8 @@
/* Very similar to the generic code, but uses Thumb2 as implemented /* Very similar to the generic code, but uses Thumb2 as implemented
in ARMv7-M. */ in ARMv7-M. */
#include "arm_asm.h"
/* Parameters and result. */ /* Parameters and result. */
#define src1 r0 #define src1 r0
#define src2 r1 #define src2 r1
@@ -47,8 +49,10 @@
.thumb .thumb
.syntax unified .syntax unified
def_fn strcmp def_fn strcmp
.fnstart
.cfi_sections .debug_frame .cfi_sections .debug_frame
.cfi_startproc .cfi_startproc
prologue push_ip=HAVE_PAC_LEAF
eor tmp1, src1, src2 eor tmp1, src1, src2
tst tmp1, #3 tst tmp1, #3
/* Strings not at same byte offset from a word boundary. */ /* Strings not at same byte offset from a word boundary. */
@@ -85,6 +89,7 @@ def_fn strcmp
ldreq data2, [src2], #4 ldreq data2, [src2], #4
beq 4b beq 4b
2: 2:
.cfi_remember_state
/* There's a zero or a different byte in the word */ /* There's a zero or a different byte in the word */
S2HI result, data1, #24 S2HI result, data1, #24
S2LO data1, data1, #8 S2LO data1, data1, #8
@@ -99,7 +104,7 @@ def_fn strcmp
both cases the other bits in RESULT are all zero. For DATA2 the both cases the other bits in RESULT are all zero. For DATA2 the
interesting byte is at the other end of the word, but the interesting byte is at the other end of the word, but the
other bits are not necessarily zero. We need a signed result other bits are not necessarily zero. We need a signed result
representing the difference in the unsigned bytes, so for the representing the differnece in the unsigned bytes, so for the
little-endian case we can't just shift the interesting bits little-endian case we can't just shift the interesting bits
up. */ up. */
#ifdef __ARM_BIG_ENDIAN #ifdef __ARM_BIG_ENDIAN
@@ -109,11 +114,11 @@ def_fn strcmp
lsrs result, result, #24 lsrs result, result, #24
subs result, result, data2 subs result, result, data2
#endif #endif
bx lr epilogue push_ip=HAVE_PAC_LEAF
#if 0 #if 0
/* The assembly code below is based on the following algorithm. */ /* The assembly code below is based on the following alogrithm. */
#ifdef __ARM_BIG_ENDIAN #ifdef __ARM_BIG_ENDIAN
#define RSHIFT << #define RSHIFT <<
#define LSHIFT >> #define LSHIFT >>
@@ -208,8 +213,10 @@ def_fn strcmp
/* First of all, compare bytes until src1(sp1) is word-aligned. */ /* First of all, compare bytes until src1(sp1) is word-aligned. */
.Lstrcmp_unaligned: .Lstrcmp_unaligned:
.cfi_restore_state
tst src1, #3 tst src1, #3
beq 2f beq 2f
.cfi_remember_state
ldrb data1, [src1], #1 ldrb data1, [src1], #1
ldrb data2, [src2], #1 ldrb data2, [src2], #1
cmp data1, #1 cmp data1, #1
@@ -217,12 +224,13 @@ def_fn strcmp
cmpcs data1, data2 cmpcs data1, data2
beq .Lstrcmp_unaligned beq .Lstrcmp_unaligned
sub result, data1, data2 sub result, data1, data2
bx lr epilogue push_ip=HAVE_PAC_LEAF
2: 2:
.cfi_restore_state
stmfd sp!, {r5} stmfd sp!, {r5}
.cfi_def_cfa_offset 4 .cfi_adjust_cfa_offset 4
.cfi_offset 5, -4 .cfi_rel_offset 5, 0
ldr data1, [src1], #4 ldr data1, [src1], #4
and tmp2, src2, #3 and tmp2, src2, #3
@@ -358,8 +366,8 @@ def_fn strcmp
.cfi_remember_state .cfi_remember_state
ldmfd sp!, {r5} ldmfd sp!, {r5}
.cfi_restore 5 .cfi_restore 5
.cfi_def_cfa_offset 0 .cfi_adjust_cfa_offset -4
bx lr epilogue push_ip=HAVE_PAC_LEAF
.Lstrcmp_tail: .Lstrcmp_tail:
.cfi_restore_state .cfi_restore_state
@@ -375,7 +383,9 @@ def_fn strcmp
sub result, r2, result sub result, r2, result
ldmfd sp!, {r5} ldmfd sp!, {r5}
.cfi_restore 5 .cfi_restore 5
.cfi_def_cfa_offset 0 .cfi_adjust_cfa_offset -4
bx lr epilogue push_ip=HAVE_PAC_LEAF
.cfi_endproc .cfi_endproc
.size strcmp, . - strcmp .cantunwind
.fnend
.size strcmp, . - strcmp
@@ -62,7 +62,8 @@
* *
****************************************************************************/ ****************************************************************************/
#include "acle-compat.h" #include "arm-acle-compat.h"
#include "arm_asm.h"
.macro def_fn f p2align=0 .macro def_fn f p2align=0
.text .text
@@ -82,7 +83,11 @@
/* This code requires Thumb. */ /* This code requires Thumb. */
#if __ARM_ARCH_PROFILE == 'M' #if __ARM_ARCH_PROFILE == 'M'
#if __ARM_ARCH >= 8
/* keep config inherited from -march=. */
#else
.arch armv7e-m .arch armv7e-m
#endif /* if __ARM_ARCH >= 8 */
#else #else
.arch armv6t2 .arch armv6t2
#endif #endif
@@ -104,8 +109,10 @@
#define tmp2 r5 #define tmp2 r5
def_fn strlen p2align=6 def_fn strlen p2align=6
.fnstart
.cfi_startproc
prologue 4 5 push_ip=HAVE_PAC_LEAF
pld [srcin, #0] pld [srcin, #0]
strd r4, r5, [sp, #-8]!
bic src, srcin, #7 bic src, srcin, #7
mvn const_m1, #0 mvn const_m1, #0
ands tmp1, srcin, #7 /* (8 - bytes) to alignment. */ ands tmp1, srcin, #7 /* (8 - bytes) to alignment. */
@@ -155,6 +162,7 @@ def_fn strlen p2align=6
beq .Lloop_aligned beq .Lloop_aligned
.Lnull_found: .Lnull_found:
.cfi_remember_state
cmp data1a, #0 cmp data1a, #0
itt eq itt eq
addeq result, result, #4 addeq result, result, #4
@@ -163,11 +171,11 @@ def_fn strlen p2align=6
rev data1a, data1a rev data1a, data1a
#endif #endif
clz data1a, data1a clz data1a, data1a
ldrd r4, r5, [sp], #8
add result, result, data1a, lsr #3 /* Bits -> Bytes. */ add result, result, data1a, lsr #3 /* Bits -> Bytes. */
bx lr epilogue 4 5 push_ip=HAVE_PAC_LEAF
.Lmisaligned8: .Lmisaligned8:
.cfi_restore_state
ldrd data1a, data1b, [src] ldrd data1a, data1b, [src]
and tmp2, tmp1, #3 and tmp2, tmp1, #3
rsb result, tmp1, #0 rsb result, tmp1, #0
@@ -181,4 +189,7 @@ def_fn strlen p2align=6
movne data1a, const_m1 movne data1a, const_m1
mov const_0, #0 mov const_0, #0
b .Lstart_realigned b .Lstart_realigned
.size strlen, . - strlen .cfi_endproc
.cantunwind
.fnend
.size strlen, . - strlen
@@ -80,6 +80,9 @@
.syntax unified .syntax unified
#include "arm-acle-compat.h"
#include "arm_asm.h"
@ NOTE: This ifdef MUST match the one in memchr-stub.c @ NOTE: This ifdef MUST match the one in memchr-stub.c
#if defined (__ARM_NEON__) || defined (__ARM_NEON) #if defined (__ARM_NEON__) || defined (__ARM_NEON)
#if __ARM_ARCH >= 8 && __ARM_ARCH_PROFILE == 'R' #if __ARM_ARCH >= 8 && __ARM_ARCH_PROFILE == 'R'
@@ -270,10 +273,14 @@ memchr:
#elif __ARM_ARCH_ISA_THUMB >= 2 && defined (__ARM_FEATURE_DSP) #elif __ARM_ARCH_ISA_THUMB >= 2 && defined (__ARM_FEATURE_DSP)
#if __ARM_ARCH_PROFILE == 'M' #if __ARM_ARCH_PROFILE == 'M'
.arch armv7e-m #if __ARM_ARCH >= 8
/* keep config inherited from -march=. */
#else #else
.arch armv6t2 .arch armv7e-m
#endif #endif /* __ARM_ARCH >= 8 */
#else
.arch armv6t2
#endif /* __ARM_ARCH_PROFILE == 'M' */
@ this lets us check a flag in a 00/ff byte easily in either endianness @ this lets us check a flag in a 00/ff byte easily in either endianness
#ifdef __ARMEB__ #ifdef __ARMEB__
@@ -290,11 +297,14 @@ memchr:
.p2align 4,,15 .p2align 4,,15
.global memchr .global memchr
.type memchr,%function .type memchr,%function
.fnstart
.cfi_startproc
memchr: memchr:
@ r0 = start of memory to scan @ r0 = start of memory to scan
@ r1 = character to look for @ r1 = character to look for
@ r2 = length @ r2 = length
@ returns r0 = pointer to character or NULL if not found @ returns r0 = pointer to character or NULL if not found
prologue
and r1,r1,#0xff @ Don't trust the caller to pass a char and r1,r1,#0xff @ Don't trust the caller to pass a char
cmp r2,#16 @ If short don't bother with anything clever cmp r2,#16 @ If short don't bother with anything clever
@@ -316,6 +326,11 @@ memchr:
10: 10:
@ We are aligned, we know we have at least 8 bytes to work with @ We are aligned, we know we have at least 8 bytes to work with
push {r4,r5,r6,r7} push {r4,r5,r6,r7}
.cfi_adjust_cfa_offset 16
.cfi_rel_offset 4, 0
.cfi_rel_offset 5, 4
.cfi_rel_offset 6, 8
.cfi_rel_offset 7, 12
orr r1, r1, r1, lsl #8 @ expand the match word across all bytes orr r1, r1, r1, lsl #8 @ expand the match word across all bytes
orr r1, r1, r1, lsl #16 orr r1, r1, r1, lsl #16
bic r4, r2, #7 @ Number of double words to work with * 8 bic r4, r2, #7 @ Number of double words to work with * 8
@@ -337,6 +352,11 @@ memchr:
bne 15b @ (Flags from the subs above) bne 15b @ (Flags from the subs above)
pop {r4,r5,r6,r7} pop {r4,r5,r6,r7}
.cfi_restore 7
.cfi_restore 6
.cfi_restore 5
.cfi_restore 4
.cfi_adjust_cfa_offset -16
and r1,r1,#0xff @ r1 back to a single character and r1,r1,#0xff @ r1 back to a single character
and r2,r2,#7 @ Leave the count remaining as the number and r2,r2,#7 @ Leave the count remaining as the number
@ after the double words have been done @ after the double words have been done
@@ -352,17 +372,29 @@ memchr:
bne 21b @ on r2 flags bne 21b @ on r2 flags
40: 40:
.cfi_remember_state
movs r0,#0 @ not found movs r0,#0 @ not found
bx lr epilogue
50: 50:
.cfi_restore_state
.cfi_remember_state
subs r0,r0,#1 @ found subs r0,r0,#1 @ found
bx lr epilogue
60: @ We're here because the fast path found a hit 60: @ We're here because the fast path found a hit
@ now we have to track down exactly which word it was @ now we have to track down exactly which word it was
@ r0 points to the start of the double word after the one tested @ r0 points to the start of the double word after the one tested
@ r5 has the 00/ff pattern for the first word, r6 has the chained value @ r5 has the 00/ff pattern for the first word, r6 has the chained value
@ This point is reached from cbnz midway through label 15 prior to
@ popping r4-r7 off the stack. .cfi_restore_state alone disregards
@ this, so we manually correct this.
.cfi_restore_state @ Standard post-prologue state
.cfi_adjust_cfa_offset 16
.cfi_rel_offset 4, 0
.cfi_rel_offset 5, 4
.cfi_rel_offset 6, 8
.cfi_rel_offset 7, 12
cmp r5, #0 cmp r5, #0
itte eq itte eq
moveq r5, r6 @ the end is in the 2nd word moveq r5, r6 @ the end is in the 2nd word
@@ -382,8 +414,16 @@ memchr:
61: 61:
pop {r4,r5,r6,r7} pop {r4,r5,r6,r7}
.cfi_restore 7
.cfi_restore 6
.cfi_restore 5
.cfi_restore 4
.cfi_adjust_cfa_offset -16
subs r0,r0,#1 subs r0,r0,#1
bx lr epilogue
.cfi_endproc
.cantunwind
.fnend
#else #else
/* Defined in memchr-stub.c. */ /* Defined in memchr-stub.c. */
#endif #endif
+35 -13
View File
@@ -50,6 +50,8 @@
__OPT_BIG_BLOCK_SIZE: Size of big block in words. Default to 64. __OPT_BIG_BLOCK_SIZE: Size of big block in words. Default to 64.
__OPT_MID_BLOCK_SIZE: Size of big block in words. Default to 16. __OPT_MID_BLOCK_SIZE: Size of big block in words. Default to 16.
*/ */
#include "arm_asm.h"
#ifndef __OPT_BIG_BLOCK_SIZE #ifndef __OPT_BIG_BLOCK_SIZE
#define __OPT_BIG_BLOCK_SIZE (4 * 16) #define __OPT_BIG_BLOCK_SIZE (4 * 16)
#endif #endif
@@ -90,11 +92,21 @@
.global memcpy .global memcpy
.thumb .thumb
.thumb_func .thumb_func
.fnstart
.cfi_startproc
.type memcpy, %function .type memcpy, %function
memcpy: memcpy:
@ r0: dst @ r0: dst
@ r1: src @ r1: src
@ r2: len @ r2: len
#ifdef __ARM_FEATURE_UNALIGNED
/* In case of UNALIGNED access supported, ip is not used in
function body. */
prologue push_ip=HAVE_PAC_LEAF
mov ip, r0
#else
prologue 0 push_ip=HAVE_PAC_LEAF
#endif /* __ARM_FEATURE_UNALIGNED */
#ifdef __ARM_FEATURE_MVE #ifdef __ARM_FEATURE_MVE
mov r3, lr mov r3, lr
wlstp.8 lr, r2, 2f wlstp.8 lr, r2, 2f
@@ -104,15 +116,14 @@ memcpy:
vstrb.8 q0, [r2], #16 vstrb.8 q0, [r2], #16
letp lr, 1b letp lr, 1b
2: 2:
#ifdef __ARM_FEATURE_UNALIGNED
mov r0, ip
epilogue push_ip=HAVE_PAC_LEAF
#else
epilogue 0 push_ip=HAVE_PAC_LEAF
#endif /* __ARM_FEATURE_UNALIGNED */
bx r3 bx r3
#else #else
#ifdef __ARM_FEATURE_UNALIGNED
/* In case of UNALIGNED access supported, ip is not used in
function body. */
mov ip, r0
#else
push {r0}
#endif
orr r3, r1, r0 orr r3, r1, r0
ands r3, r3, #3 ands r3, r3, #3
bne .Lmisaligned_copy bne .Lmisaligned_copy
@@ -194,15 +205,17 @@ memcpy:
#endif /* __ARM_FEATURE_UNALIGNED */ #endif /* __ARM_FEATURE_UNALIGNED */
.Ldone: .Ldone:
.cfi_remember_state
#ifdef __ARM_FEATURE_UNALIGNED #ifdef __ARM_FEATURE_UNALIGNED
mov r0, ip mov r0, ip
epilogue push_ip=HAVE_PAC_LEAF
#else #else
pop {r0} epilogue 0 push_ip=HAVE_PAC_LEAF
#endif #endif /* __ARM_FEATURE_UNALIGNED */
bx lr
.align 2 .align 2
.Lmisaligned_copy: .Lmisaligned_copy:
.cfi_restore_state
#ifdef __ARM_FEATURE_UNALIGNED #ifdef __ARM_FEATURE_UNALIGNED
/* Define label DST_ALIGNED to BIG_BLOCK. It will go to aligned copy /* Define label DST_ALIGNED to BIG_BLOCK. It will go to aligned copy
once destination is adjusted to aligned. */ once destination is adjusted to aligned. */
@@ -263,6 +276,9 @@ memcpy:
/* dst is aligned, but src isn't. Misaligned copy. */ /* dst is aligned, but src isn't. Misaligned copy. */
push {r4, r5} push {r4, r5}
.cfi_adjust_cfa_offset 8
.cfi_rel_offset 4, 0
.cfi_rel_offset 5, 4
subs r2, #4 subs r2, #4
/* Backward r1 by misaligned bytes, to make r1 aligned. /* Backward r1 by misaligned bytes, to make r1 aligned.
@@ -315,6 +331,9 @@ memcpy:
adds r2, #4 adds r2, #4
subs r1, ip subs r1, ip
pop {r4, r5} pop {r4, r5}
.cfi_restore 4
.cfi_restore 5
.cfi_adjust_cfa_offset -8
#endif /* __ARM_FEATURE_UNALIGNED */ #endif /* __ARM_FEATURE_UNALIGNED */
@@ -337,9 +356,12 @@ memcpy:
#ifdef __ARM_FEATURE_UNALIGNED #ifdef __ARM_FEATURE_UNALIGNED
mov r0, ip mov r0, ip
epilogue push_ip=HAVE_PAC_LEAF
#else #else
pop {r0} epilogue 0 push_ip=HAVE_PAC_LEAF
#endif #endif /* __ARM_FEATURE_UNALIGNED */
bx lr
#endif #endif
.cfi_endproc
.cantunwind
.fnend
.size memcpy, .-memcpy .size memcpy, .-memcpy
@@ -29,13 +29,17 @@
* *
****************************************************************************/ ****************************************************************************/
#include "arm_asm.h"
.thumb .thumb
.syntax unified .syntax unified
.global memmove .global memmove
.type memmove, %function .type memmove, %function
memmove: memmove:
.fnstart
.cfi_startproc
prologue 4
cmp r0, r1 cmp r0, r1
push {r4}
bls 3f bls 3f
adds r3, r1, r2 adds r3, r1, r2
cmp r0, r3 cmp r0, r3
@@ -49,9 +53,10 @@ memmove:
strb r4, [r1, #-1]! strb r4, [r1, #-1]!
bne 1b bne 1b
2: 2:
pop {r4} .cfi_remember_state
bx lr epilogue 4
3: 3:
.cfi_restore_state
cmp r2, #0 cmp r2, #0
beq 2b beq 2b
add r2, r2, r1 add r2, r2, r1
@@ -61,6 +66,8 @@ memmove:
cmp r2, r1 cmp r2, r1
strb r4, [r3, #1]! strb r4, [r3, #1]!
bne 4b bne 4b
pop {r4} epilogue 4
bx lr .cfi_endproc
.cantunwind
.fnend
.size memmove, . - memmove .size memmove, . - memmove
@@ -29,11 +29,16 @@
* *
****************************************************************************/ ****************************************************************************/
#include "arm_asm.h"
.thumb .thumb
.syntax unified .syntax unified
.global memset .global memset
.type memset, %function .type memset, %function
.fnstart
.cfi_startproc
memset: memset:
prologue 4 6
#ifdef __ARM_FEATURE_MVE #ifdef __ARM_FEATURE_MVE
vdup.8 q0, r1 vdup.8 q0, r1
mov r3, lr mov r3, lr
@@ -43,9 +48,11 @@ memset:
vstrb.8 q0, [r1], #16 vstrb.8 q0, [r1], #16
letp lr, 1b letp lr, 1b
2: 2:
.cfi_remember_state
epilogue 4 6
.cfi_restore_state
bx r3 bx r3
#else #else
push {r4, r5, r6}
lsls r4, r0, #30 lsls r4, r0, #30
beq 10f beq 10f
subs r4, r2, #1 subs r4, r2, #1
@@ -110,11 +117,15 @@ memset:
cmp r3, r4 cmp r3, r4
bne 8b bne 8b
9: 9:
pop {r4, r5, r6} .cfi_remember_state
bx lr epilogue 4 6
10: 10:
.cfi_restore_state
mov r4, r2 mov r4, r2
mov r3, r0 mov r3, r0
b 3b b 3b
#endif #endif
.cfi_endproc
.cantunwind
.fnend
.size memset, . - memset .size memset, . - memset
+23 -13
View File
@@ -29,6 +29,11 @@
* *
****************************************************************************/ ****************************************************************************/
/* Very similar to the generic code, but uses Thumb2 as implemented
in ARMv7-M. */
#include "arm_asm.h"
#ifdef __ARM_BIG_ENDIAN #ifdef __ARM_BIG_ENDIAN
#define S2LO lsl #define S2LO lsl
#define S2LOEQ lsleq #define S2LOEQ lsleq
@@ -59,9 +64,6 @@
\f: \f:
.endm .endm
/* Very similar to the generic code, but uses Thumb2 as implemented
in ARMv7-M. */
/* Parameters and result. */ /* Parameters and result. */
#define src1 r0 #define src1 r0
#define src2 r1 #define src2 r1
@@ -77,8 +79,10 @@
.thumb .thumb
.syntax unified .syntax unified
def_fn strcmp def_fn strcmp
.fnstart
.cfi_sections .debug_frame .cfi_sections .debug_frame
.cfi_startproc .cfi_startproc
prologue push_ip=HAVE_PAC_LEAF
eor tmp1, src1, src2 eor tmp1, src1, src2
tst tmp1, #3 tst tmp1, #3
/* Strings not at same byte offset from a word boundary. */ /* Strings not at same byte offset from a word boundary. */
@@ -115,6 +119,7 @@ def_fn strcmp
ldreq data2, [src2], #4 ldreq data2, [src2], #4
beq 4b beq 4b
2: 2:
.cfi_remember_state
/* There's a zero or a different byte in the word */ /* There's a zero or a different byte in the word */
S2HI result, data1, #24 S2HI result, data1, #24
S2LO data1, data1, #8 S2LO data1, data1, #8
@@ -129,7 +134,7 @@ def_fn strcmp
both cases the other bits in RESULT are all zero. For DATA2 the both cases the other bits in RESULT are all zero. For DATA2 the
interesting byte is at the other end of the word, but the interesting byte is at the other end of the word, but the
other bits are not necessarily zero. We need a signed result other bits are not necessarily zero. We need a signed result
representing the difference in the unsigned bytes, so for the representing the differnece in the unsigned bytes, so for the
little-endian case we can't just shift the interesting bits little-endian case we can't just shift the interesting bits
up. */ up. */
#ifdef __ARM_BIG_ENDIAN #ifdef __ARM_BIG_ENDIAN
@@ -139,11 +144,11 @@ def_fn strcmp
lsrs result, result, #24 lsrs result, result, #24
subs result, result, data2 subs result, result, data2
#endif #endif
bx lr epilogue push_ip=HAVE_PAC_LEAF
#if 0 #if 0
/* The assembly code below is based on the following algorithm. */ /* The assembly code below is based on the following alogrithm. */
#ifdef __ARM_BIG_ENDIAN #ifdef __ARM_BIG_ENDIAN
#define RSHIFT << #define RSHIFT <<
#define LSHIFT >> #define LSHIFT >>
@@ -238,8 +243,10 @@ def_fn strcmp
/* First of all, compare bytes until src1(sp1) is word-aligned. */ /* First of all, compare bytes until src1(sp1) is word-aligned. */
.Lstrcmp_unaligned: .Lstrcmp_unaligned:
.cfi_restore_state
tst src1, #3 tst src1, #3
beq 2f beq 2f
.cfi_remember_state
ldrb data1, [src1], #1 ldrb data1, [src1], #1
ldrb data2, [src2], #1 ldrb data2, [src2], #1
cmp data1, #1 cmp data1, #1
@@ -247,12 +254,13 @@ def_fn strcmp
cmpcs data1, data2 cmpcs data1, data2
beq .Lstrcmp_unaligned beq .Lstrcmp_unaligned
sub result, data1, data2 sub result, data1, data2
bx lr epilogue push_ip=HAVE_PAC_LEAF
2: 2:
.cfi_restore_state
stmfd sp!, {r5} stmfd sp!, {r5}
.cfi_def_cfa_offset 4 .cfi_adjust_cfa_offset 4
.cfi_offset 5, -4 .cfi_rel_offset 5, 0
ldr data1, [src1], #4 ldr data1, [src1], #4
and tmp2, src2, #3 and tmp2, src2, #3
@@ -388,8 +396,8 @@ def_fn strcmp
.cfi_remember_state .cfi_remember_state
ldmfd sp!, {r5} ldmfd sp!, {r5}
.cfi_restore 5 .cfi_restore 5
.cfi_def_cfa_offset 0 .cfi_adjust_cfa_offset -4
bx lr epilogue push_ip=HAVE_PAC_LEAF
.Lstrcmp_tail: .Lstrcmp_tail:
.cfi_restore_state .cfi_restore_state
@@ -405,7 +413,9 @@ def_fn strcmp
sub result, r2, result sub result, r2, result
ldmfd sp!, {r5} ldmfd sp!, {r5}
.cfi_restore 5 .cfi_restore 5
.cfi_def_cfa_offset 0 .cfi_adjust_cfa_offset -4
bx lr epilogue push_ip=HAVE_PAC_LEAF
.cfi_endproc .cfi_endproc
.cantunwind
.fnend
.size strcmp, . - strcmp .size strcmp, . - strcmp
@@ -62,6 +62,9 @@
* *
****************************************************************************/ ****************************************************************************/
#include "arm-acle-compat.h"
#include "arm_asm.h"
.macro def_fn f p2align=0 .macro def_fn f p2align=0
.text .text
.p2align \p2align .p2align \p2align
@@ -80,7 +83,11 @@
/* This code requires Thumb. */ /* This code requires Thumb. */
#if __ARM_ARCH_PROFILE == 'M' #if __ARM_ARCH_PROFILE == 'M'
#if __ARM_ARCH >= 8
/* keep config inherited from -march=. */
#else
.arch armv7e-m .arch armv7e-m
#endif /* if __ARM_ARCH >= 8 */
#else #else
.arch armv6t2 .arch armv6t2
#endif #endif
@@ -102,8 +109,10 @@
#define tmp2 r5 #define tmp2 r5
def_fn strlen p2align=6 def_fn strlen p2align=6
.fnstart
.cfi_startproc
prologue 4 5 push_ip=HAVE_PAC_LEAF
pld [srcin, #0] pld [srcin, #0]
strd r4, r5, [sp, #-8]!
bic src, srcin, #7 bic src, srcin, #7
mvn const_m1, #0 mvn const_m1, #0
ands tmp1, srcin, #7 /* (8 - bytes) to alignment. */ ands tmp1, srcin, #7 /* (8 - bytes) to alignment. */
@@ -153,6 +162,7 @@ def_fn strlen p2align=6
beq .Lloop_aligned beq .Lloop_aligned
.Lnull_found: .Lnull_found:
.cfi_remember_state
cmp data1a, #0 cmp data1a, #0
itt eq itt eq
addeq result, result, #4 addeq result, result, #4
@@ -161,11 +171,11 @@ def_fn strlen p2align=6
rev data1a, data1a rev data1a, data1a
#endif #endif
clz data1a, data1a clz data1a, data1a
ldrd r4, r5, [sp], #8
add result, result, data1a, lsr #3 /* Bits -> Bytes. */ add result, result, data1a, lsr #3 /* Bits -> Bytes. */
bx lr epilogue 4 5 push_ip=HAVE_PAC_LEAF
.Lmisaligned8: .Lmisaligned8:
.cfi_restore_state
ldrd data1a, data1b, [src] ldrd data1a, data1b, [src]
and tmp2, tmp1, #3 and tmp2, tmp1, #3
rsb result, tmp1, #0 rsb result, tmp1, #0
@@ -179,4 +189,7 @@ def_fn strlen p2align=6
movne data1a, const_m1 movne data1a, const_m1
mov const_0, #0 mov const_0, #0
b .Lstart_realigned b .Lstart_realigned
.cfi_endproc
.cantunwind
.fnend
.size strlen, . - strlen .size strlen, . - strlen