mirror of
https://github.com/apache/nuttx.git
synced 2026-05-28 11:56:10 +08:00
libc: add arm64 libc function
Porting memory and string optimize functions from newlib and bionic Signed-off-by: zhangyuan21 <zhangyuan21@xiaomi.com>
This commit is contained in:
@@ -490,6 +490,7 @@ libs/libc/machine/arm/armv7-a/gnu/arch_memcpy.S
|
|||||||
libs/libc/machine/arm/armv7-a/gnu/arch_strlen.S
|
libs/libc/machine/arm/armv7-a/gnu/arch_strlen.S
|
||||||
libs/libc/machine/arm/armv7-r/gnu/arch_memcpy.S
|
libs/libc/machine/arm/armv7-r/gnu/arch_memcpy.S
|
||||||
libs/libc/machine/arm/armv7-r/gnu/arch_strlen.S
|
libs/libc/machine/arm/armv7-r/gnu/arch_strlen.S
|
||||||
|
libs/libc/machine/arm64/gnu/arch_strnlen.S
|
||||||
================================================
|
================================================
|
||||||
|
|
||||||
Copyright (c) 2013, Linaro Limited
|
Copyright (c) 2013, Linaro Limited
|
||||||
@@ -739,6 +740,286 @@ libs/libc/machine/arm/armv8-m/gnu/arch_strlen.S
|
|||||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
libs/libc/machine/arm64/gnu/arch_memchr.S
|
||||||
|
libs/libc/machine/arm64/gnu/arch_strchr.S
|
||||||
|
libs/libc/machine/arm64/gnu/arch_strchrnul.S
|
||||||
|
libs/libc/machine/arm64/gnu/arch_strrchr.S
|
||||||
|
================================================
|
||||||
|
|
||||||
|
Copyright (c) 2014, ARM Limited
|
||||||
|
All rights Reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the company nor the names of its contributors
|
||||||
|
may be used to endorse or promote products derived from this
|
||||||
|
software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
libs/libc/machine/arm64/gnu/arch_memcmp.S
|
||||||
|
================================================
|
||||||
|
|
||||||
|
Copyright (c) 2018, Linaro Limited
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of Linaro Limited nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived
|
||||||
|
from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
Copyright (c) 2017 ARM Ltd
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
3. The name of the company may not be used to endorse or promote
|
||||||
|
products derived from this software without specific prior written
|
||||||
|
permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
||||||
|
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||||
|
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||||
|
IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||||
|
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
libs/libc/machine/arm64/gnu/arch_memcpy.S
|
||||||
|
libs/libc/machine/arm64/gnu/arch_memmove.S
|
||||||
|
libs/libc/machine/arm64/gnu/arch_memset.S
|
||||||
|
================================================
|
||||||
|
|
||||||
|
Copyright (c) 2012-2013, Linaro Limited
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of Linaro Limited nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived
|
||||||
|
from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
Copyright (c) 2015 ARM Ltd
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
3. The name of the company may not be used to endorse or promote
|
||||||
|
products derived from this software without specific prior written
|
||||||
|
permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
||||||
|
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||||
|
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||||
|
IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||||
|
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
libs/libc/machine/arm64/gnu/arch_strcmp.S
|
||||||
|
================================================
|
||||||
|
|
||||||
|
Copyright (c) 2012-2018, Linaro Limited
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of Linaro Limited nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived
|
||||||
|
from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
libs/libc/machine/arm64/gnu/arch_strcpy.S
|
||||||
|
================================================
|
||||||
|
|
||||||
|
Copyright (c) 2013, 2014, 2015 ARM Ltd.
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the Linaro nor the
|
||||||
|
names of its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
libs/libc/machine/arm64/gnu/arch_strlen.S
|
||||||
|
================================================
|
||||||
|
|
||||||
|
Copyright (c) 2013-2015, Linaro Limited
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of Linaro Limited nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived
|
||||||
|
from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
libs/libc/machine/arm64/gnu/arch_strncmp.S
|
||||||
|
================================================
|
||||||
|
|
||||||
|
Copyright (c) 2013, 2018, Linaro Limited
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of Linaro Limited nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived
|
||||||
|
from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
libs/libc/machine/risc-v/gnu/arch_memset.S
|
libs/libc/machine/risc-v/gnu/arch_memset.S
|
||||||
libs/libc/machine/risc-v/gnu/arch_strcmp.S
|
libs/libc/machine/risc-v/gnu/arch_strcmp.S
|
||||||
================================================
|
================================================
|
||||||
@@ -5543,7 +5824,8 @@ drivers/mtd/at24xx.c
|
|||||||
POSSIBILITY OF SUCH DAMAGE.
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
libs/libc/machine/arm/armv8-m
|
libs/libc/machine/arm/armv8-m
|
||||||
=============================
|
libs/libc/machine/arm64/gnu/arch_setjmp.S
|
||||||
|
==============================================
|
||||||
|
|
||||||
Copyright (c) 2011, 2012 ARM Ltd. All rights reserved.
|
Copyright (c) 2011, 2012 ARM Ltd. All rights reserved.
|
||||||
|
|
||||||
|
|||||||
@@ -72,10 +72,18 @@ config LIBC_ARCH_STRCHR
|
|||||||
bool
|
bool
|
||||||
default n
|
default n
|
||||||
|
|
||||||
|
config LIBC_ARCH_STRCHRNUL
|
||||||
|
bool
|
||||||
|
default n
|
||||||
|
|
||||||
config LIBC_ARCH_STRCMP
|
config LIBC_ARCH_STRCMP
|
||||||
bool
|
bool
|
||||||
default n
|
default n
|
||||||
|
|
||||||
|
config LIBC_ARCH_STRNCMP
|
||||||
|
bool
|
||||||
|
default n
|
||||||
|
|
||||||
config LIBC_ARCH_STRCPY
|
config LIBC_ARCH_STRCPY
|
||||||
bool
|
bool
|
||||||
default n
|
default n
|
||||||
@@ -100,6 +108,10 @@ config LIBC_ARCH_STRNLEN
|
|||||||
bool
|
bool
|
||||||
default n
|
default n
|
||||||
|
|
||||||
|
config LIBC_ARCH_STRRCHR
|
||||||
|
bool
|
||||||
|
default n
|
||||||
|
|
||||||
config LIBC_ARCH_ELF
|
config LIBC_ARCH_ELF
|
||||||
bool
|
bool
|
||||||
default n
|
default n
|
||||||
@@ -174,6 +186,9 @@ config LIBM_ARCH_TRUNCF
|
|||||||
if ARCH_ARM
|
if ARCH_ARM
|
||||||
source "libs/libc/machine/arm/Kconfig"
|
source "libs/libc/machine/arm/Kconfig"
|
||||||
endif
|
endif
|
||||||
|
if ARCH_ARM64
|
||||||
|
source "libs/libc/machine/arm64/Kconfig"
|
||||||
|
endif
|
||||||
if ARCH_RISCV
|
if ARCH_RISCV
|
||||||
source "libs/libc/machine/risc-v/Kconfig"
|
source "libs/libc/machine/risc-v/Kconfig"
|
||||||
endif
|
endif
|
||||||
|
|||||||
@@ -25,6 +25,9 @@ endif
|
|||||||
ifeq ($(CONFIG_ARCH_ARM),y)
|
ifeq ($(CONFIG_ARCH_ARM),y)
|
||||||
include $(TOPDIR)/libs/libc/machine/arm/Make.defs
|
include $(TOPDIR)/libs/libc/machine/arm/Make.defs
|
||||||
endif
|
endif
|
||||||
|
ifeq ($(CONFIG_ARCH_ARM64),y)
|
||||||
|
include $(TOPDIR)/libs/libc/machine/arm64/Make.defs
|
||||||
|
endif
|
||||||
ifeq ($(CONFIG_ARCH_RISCV),y)
|
ifeq ($(CONFIG_ARCH_RISCV),y)
|
||||||
include $(TOPDIR)/libs/libc/machine/risc-v/Make.defs
|
include $(TOPDIR)/libs/libc/machine/risc-v/Make.defs
|
||||||
endif
|
endif
|
||||||
|
|||||||
@@ -0,0 +1,106 @@
|
|||||||
|
#
|
||||||
|
# For a description of the syntax of this configuration file,
|
||||||
|
# see the file kconfig-language.txt in the NuttX tools repository.
|
||||||
|
#
|
||||||
|
|
||||||
|
config ARM64_MEMCHR
|
||||||
|
bool "Enable optimized memchr() for ARM64"
|
||||||
|
default n
|
||||||
|
select LIBC_ARCH_MEMCHR
|
||||||
|
depends on ARCH_TOOLCHAIN_GNU
|
||||||
|
---help---
|
||||||
|
Enable optimized ARM64 specific memchr() library function
|
||||||
|
|
||||||
|
config ARM64_MEMCMP
|
||||||
|
bool "Enable optimized memcmp() for ARM64"
|
||||||
|
select LIBC_ARCH_MEMCMP
|
||||||
|
depends on ARCH_TOOLCHAIN_GNU
|
||||||
|
---help---
|
||||||
|
Enable optimized ARM64 specific memcmp() library function
|
||||||
|
|
||||||
|
config ARM64_MEMCPY
|
||||||
|
bool "Enable optimized memcpy() for ARM64"
|
||||||
|
select LIBC_ARCH_MEMCPY
|
||||||
|
depends on ARCH_TOOLCHAIN_GNU
|
||||||
|
---help---
|
||||||
|
Enable optimized ARM64 specific memcpy() library function
|
||||||
|
|
||||||
|
config ARM64_MEMSET
|
||||||
|
bool "Enable optimized memset() for ARM64"
|
||||||
|
default n
|
||||||
|
select LIBC_ARCH_MEMSET
|
||||||
|
depends on ARCH_TOOLCHAIN_GNU
|
||||||
|
---help---
|
||||||
|
Enable optimized ARM64 specific memset() library function
|
||||||
|
|
||||||
|
config ARM64_MEMMOVE
|
||||||
|
bool "Enable optimized memmove() for ARM64"
|
||||||
|
default n
|
||||||
|
select LIBC_ARCH_MEMMOVE
|
||||||
|
depends on ARCH_TOOLCHAIN_GNU
|
||||||
|
---help---
|
||||||
|
Enable optimized ARM64 specific memmove() library function
|
||||||
|
|
||||||
|
config ARM64_STRCHR
|
||||||
|
bool "Enable optimized strchr() for ARM64"
|
||||||
|
default n
|
||||||
|
select LIBC_ARCH_STRCHR
|
||||||
|
depends on ARCH_TOOLCHAIN_GNU
|
||||||
|
---help---
|
||||||
|
Enable optimized ARM64 specific strchr() library function
|
||||||
|
|
||||||
|
config ARM64_STRCHRNUL
|
||||||
|
bool "Enable optimized strchrnul() for ARM64"
|
||||||
|
default n
|
||||||
|
select LIBC_ARCH_STRCHRNUL
|
||||||
|
depends on ARCH_TOOLCHAIN_GNU
|
||||||
|
---help---
|
||||||
|
Enable optimized ARM64 specific strchrnul() library function
|
||||||
|
|
||||||
|
config ARM64_STRCMP
|
||||||
|
bool "Enable optimized strcmp() for ARM64"
|
||||||
|
default n
|
||||||
|
select LIBC_ARCH_STRCMP
|
||||||
|
depends on ARCH_TOOLCHAIN_GNU
|
||||||
|
---help---
|
||||||
|
Enable optimized ARM64 specific strcmp() library function
|
||||||
|
|
||||||
|
config ARM64_STRCPY
|
||||||
|
bool "Enable optimized strcpy() for ARM64"
|
||||||
|
default n
|
||||||
|
select LIBC_ARCH_STRCPY
|
||||||
|
depends on ARCH_TOOLCHAIN_GNU
|
||||||
|
---help---
|
||||||
|
Enable optimized ARM64 specific strcpy() library function
|
||||||
|
|
||||||
|
config ARM64_STRLEN
|
||||||
|
bool "Enable optimized strlen() for ARM64"
|
||||||
|
default n
|
||||||
|
select LIBC_ARCH_STRLEN
|
||||||
|
depends on ARCH_TOOLCHAIN_GNU
|
||||||
|
---help---
|
||||||
|
Enable optimized ARM64 specific strlen() library function
|
||||||
|
|
||||||
|
config ARM64_STRNCMP
|
||||||
|
bool "Enable optimized strncmp() for ARM64"
|
||||||
|
default n
|
||||||
|
select LIBC_ARCH_STRNCMP
|
||||||
|
depends on ARCH_TOOLCHAIN_GNU
|
||||||
|
---help---
|
||||||
|
Enable optimized ARM64 specific strncmp() library function
|
||||||
|
|
||||||
|
config ARM64_STRNLEN
|
||||||
|
bool "Enable optimized strnlen() for ARM64"
|
||||||
|
default n
|
||||||
|
select LIBC_ARCH_STRNLEN
|
||||||
|
depends on ARCH_TOOLCHAIN_GNU
|
||||||
|
---help---
|
||||||
|
Enable optimized ARM64 specific strnlen() library function
|
||||||
|
|
||||||
|
config ARM64_STRRCHR
|
||||||
|
bool "Enable optimized strrchr() for ARM64"
|
||||||
|
default n
|
||||||
|
select LIBC_ARCH_STRRCHR
|
||||||
|
depends on ARCH_TOOLCHAIN_GNU
|
||||||
|
---help---
|
||||||
|
Enable optimized ARM64 specific strrchr() library function
|
||||||
@@ -0,0 +1,83 @@
|
|||||||
|
############################################################################
|
||||||
|
# libs/libc/machine/ARM64/Make.defs
|
||||||
|
#
|
||||||
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
# contributor license agreements. See the NOTICE file distributed with
|
||||||
|
# this work for additional information regarding copyright ownership. The
|
||||||
|
# ASF licenses this file to you under the Apache License, Version 2.0 (the
|
||||||
|
# "License"); you may not use this file except in compliance with the
|
||||||
|
# License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
# License for the specific language governing permissions and limitations
|
||||||
|
# under the License.
|
||||||
|
#
|
||||||
|
############################################################################
|
||||||
|
|
||||||
|
ifeq ($(CONFIG_ARM64_MEMCHR),y)
|
||||||
|
ASRCS += arch_memchr.S
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(CONFIG_ARM64_MEMCMP),y)
|
||||||
|
ASRCS += arch_memcmp.S
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(CONFIG_ARM64_MEMCPY),y)
|
||||||
|
ASRCS += arch_memcpy.S
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(CONFIG_ARM64_MEMMOVE),y)
|
||||||
|
ASRCS += arch_memmove.S
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(CONFIG_ARM64_MEMSET),y)
|
||||||
|
ASRCS += arch_memset.S
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(CONFIG_ARM64_STRCHR),y)
|
||||||
|
ASRCS += arch_strchr.S
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(CONFIG_ARM64_STRCHRNUL),y)
|
||||||
|
ASRCS += arch_strchrnul.S
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(CONFIG_ARM64_STRCMP),y)
|
||||||
|
ASRCS += arch_strcmp.S
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(CONFIG_ARM64_STRCPY),y)
|
||||||
|
ASRCS += arch_strcpy.S
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(CONFIG_ARM64_STRLEN),y)
|
||||||
|
ASRCS += arch_strlen.S
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(CONFIG_ARM64_STRNCMP),y)
|
||||||
|
ASRCS += arch_strncmp.S
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(CONFIG_ARM64_STRNLEN),y)
|
||||||
|
ASRCS += arch_strnlen.S
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(CONFIG_ARM64_STRRCHR),y)
|
||||||
|
ASRCS += arch_strrchr.S
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(CONFIG_ARCH_SETJMP_H),y)
|
||||||
|
ASRCS += arch_setjmp.S
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(CONFIG_ARCH_TOOLCHAIN_GNU),y)
|
||||||
|
DEPPATH += --dep-path machine/arm64/gnu
|
||||||
|
VPATH += :machine/arm64/gnu
|
||||||
|
endif
|
||||||
|
|
||||||
|
DEPPATH += --dep-path machine/arm64
|
||||||
|
VPATH += :machine/arm64
|
||||||
@@ -0,0 +1,173 @@
|
|||||||
|
/****************************************************************************
|
||||||
|
* libs/libc/machine/arm64/gnu/arch_memchr.S
|
||||||
|
*
|
||||||
|
* Copyright (c) 2014, ARM Limited
|
||||||
|
* All rights Reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
* * Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* * Neither the name of the company nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this
|
||||||
|
* software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
****************************************************************************/
|
||||||
|
|
||||||
|
/* Assumptions:
|
||||||
|
*
|
||||||
|
* ARMv8-a, AArch64
|
||||||
|
* Neon Available.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Arguments and results. */
|
||||||
|
#define srcin x0
|
||||||
|
#define chrin w1
|
||||||
|
#define cntin x2
|
||||||
|
|
||||||
|
#define result x0
|
||||||
|
|
||||||
|
#define src x3
|
||||||
|
#define tmp x4
|
||||||
|
#define wtmp2 w5
|
||||||
|
#define synd x6
|
||||||
|
#define soff x9
|
||||||
|
#define cntrem x10
|
||||||
|
|
||||||
|
#define vrepchr v0
|
||||||
|
#define vdata1 v1
|
||||||
|
#define vdata2 v2
|
||||||
|
#define vhas_chr1 v3
|
||||||
|
#define vhas_chr2 v4
|
||||||
|
#define vrepmask v5
|
||||||
|
#define vend v6
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Core algorithm:
|
||||||
|
*
|
||||||
|
* For each 32-byte chunk we calculate a 64-bit syndrome value, with two bits
|
||||||
|
* per byte. For each tuple, bit 0 is set if the relevant byte matched the
|
||||||
|
* requested character and bit 1 is not used (faster than using a 32bit
|
||||||
|
* syndrome). Since the bits in the syndrome reflect exactly the order in which
|
||||||
|
* things occur in the original string, counting trailing zeros allows to
|
||||||
|
* identify exactly which byte has matched.
|
||||||
|
*/
|
||||||
|
|
||||||
|
.macro def_fn f p2align=0
|
||||||
|
.text
|
||||||
|
.p2align \p2align
|
||||||
|
.global \f
|
||||||
|
.type \f, %function
|
||||||
|
\f:
|
||||||
|
.endm
|
||||||
|
|
||||||
|
def_fn memchr
|
||||||
|
/* Do not dereference srcin if no bytes to compare. */
|
||||||
|
cbz cntin, .Lzero_length
|
||||||
|
/*
|
||||||
|
* Magic constant 0x40100401 allows us to identify which lane matches
|
||||||
|
* the requested byte.
|
||||||
|
*/
|
||||||
|
mov wtmp2, #0x0401
|
||||||
|
movk wtmp2, #0x4010, lsl #16
|
||||||
|
dup vrepchr.16b, chrin
|
||||||
|
/* Work with aligned 32-byte chunks */
|
||||||
|
bic src, srcin, #31
|
||||||
|
dup vrepmask.4s, wtmp2
|
||||||
|
ands soff, srcin, #31
|
||||||
|
and cntrem, cntin, #31
|
||||||
|
b.eq .Lloop
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Input string is not 32-byte aligned. We calculate the syndrome
|
||||||
|
* value for the aligned 32 bytes block containing the first bytes
|
||||||
|
* and mask the irrelevant part.
|
||||||
|
*/
|
||||||
|
|
||||||
|
ld1 {vdata1.16b, vdata2.16b}, [src], #32
|
||||||
|
sub tmp, soff, #32
|
||||||
|
adds cntin, cntin, tmp
|
||||||
|
cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
|
||||||
|
cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
|
||||||
|
and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
|
||||||
|
and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
|
||||||
|
addp vend.16b, vhas_chr1.16b, vhas_chr2.16b /* 256->128 */
|
||||||
|
addp vend.16b, vend.16b, vend.16b /* 128->64 */
|
||||||
|
mov synd, vend.2d[0]
|
||||||
|
/* Clear the soff*2 lower bits */
|
||||||
|
lsl tmp, soff, #1
|
||||||
|
lsr synd, synd, tmp
|
||||||
|
lsl synd, synd, tmp
|
||||||
|
/* The first block can also be the last */
|
||||||
|
b.ls .Lmasklast
|
||||||
|
/* Have we found something already? */
|
||||||
|
cbnz synd, .Ltail
|
||||||
|
|
||||||
|
.Lloop:
|
||||||
|
ld1 {vdata1.16b, vdata2.16b}, [src], #32
|
||||||
|
subs cntin, cntin, #32
|
||||||
|
cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
|
||||||
|
cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
|
||||||
|
/* If we're out of data we finish regardless of the result */
|
||||||
|
b.ls .Lend
|
||||||
|
/* Use a fast check for the termination condition */
|
||||||
|
orr vend.16b, vhas_chr1.16b, vhas_chr2.16b
|
||||||
|
addp vend.2d, vend.2d, vend.2d
|
||||||
|
mov synd, vend.2d[0]
|
||||||
|
/* We're not out of data, loop if we haven't found the character */
|
||||||
|
cbz synd, .Lloop
|
||||||
|
|
||||||
|
.Lend:
|
||||||
|
/* Termination condition found, let's calculate the syndrome value */
|
||||||
|
and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
|
||||||
|
and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
|
||||||
|
addp vend.16b, vhas_chr1.16b, vhas_chr2.16b /* 256->128 */
|
||||||
|
addp vend.16b, vend.16b, vend.16b /* 128->64 */
|
||||||
|
mov synd, vend.2d[0]
|
||||||
|
/* Only do the clear for the last possible block */
|
||||||
|
b.hi .Ltail
|
||||||
|
|
||||||
|
.Lmasklast:
|
||||||
|
/* Clear the (32 - ((cntrem + soff) % 32)) * 2 upper bits */
|
||||||
|
add tmp, cntrem, soff
|
||||||
|
and tmp, tmp, #31
|
||||||
|
sub tmp, tmp, #32
|
||||||
|
neg tmp, tmp, lsl #1
|
||||||
|
lsl synd, synd, tmp
|
||||||
|
lsr synd, synd, tmp
|
||||||
|
|
||||||
|
.Ltail:
|
||||||
|
/* Count the trailing zeros using bit reversing */
|
||||||
|
rbit synd, synd
|
||||||
|
/* Compensate the last post-increment */
|
||||||
|
sub src, src, #32
|
||||||
|
/* Check that we have found a character */
|
||||||
|
cmp synd, #0
|
||||||
|
/* And count the leading zeros */
|
||||||
|
clz synd, synd
|
||||||
|
/* Compute the potential result */
|
||||||
|
add result, src, synd, lsr #1
|
||||||
|
/* Select result or NULL */
|
||||||
|
csel result, xzr, result, eq
|
||||||
|
ret
|
||||||
|
|
||||||
|
.Lzero_length:
|
||||||
|
mov result, #0
|
||||||
|
ret
|
||||||
|
|
||||||
|
.size memchr, . - memchr
|
||||||
@@ -0,0 +1,196 @@
|
|||||||
|
/****************************************************************************
|
||||||
|
* libs/libc/machine/arm64/gnu/arch_memcmp.S
|
||||||
|
*
|
||||||
|
* Copyright (c) 2018 Linaro Limited
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* * Neither the name of the Linaro nor the names of its
|
||||||
|
* contributors may be used to endorse or promote products derived
|
||||||
|
* from this software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
* Copyright (c) 2017 ARM Ltd
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* 3. The name of the company may not be used to endorse or promote
|
||||||
|
* products derived from this software without specific prior written
|
||||||
|
* permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
||||||
|
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||||
|
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||||
|
* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||||
|
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
****************************************************************************/
|
||||||
|
|
||||||
|
/* Assumptions:
|
||||||
|
*
|
||||||
|
* ARMv8-a, AArch64, unaligned accesses.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define L(l) .L ## l
|
||||||
|
|
||||||
|
/* Parameters and result. */
|
||||||
|
#define src1 x0
|
||||||
|
#define src2 x1
|
||||||
|
#define limit x2
|
||||||
|
#define result w0
|
||||||
|
|
||||||
|
/* Internal variables. */
|
||||||
|
#define data1 x3
|
||||||
|
#define data1w w3
|
||||||
|
#define data1h x4
|
||||||
|
#define data2 x5
|
||||||
|
#define data2w w5
|
||||||
|
#define data2h x6
|
||||||
|
#define tmp1 x7
|
||||||
|
#define tmp2 x8
|
||||||
|
|
||||||
|
.macro def_fn f p2align=0
|
||||||
|
.text
|
||||||
|
.p2align \p2align
|
||||||
|
.global \f
|
||||||
|
.type \f, %function
|
||||||
|
\f:
|
||||||
|
.endm
|
||||||
|
|
||||||
|
def_fn memcmp p2align=6
|
||||||
|
subs limit, limit, 8
|
||||||
|
b.lo L(less8)
|
||||||
|
|
||||||
|
ldr data1, [src1], 8
|
||||||
|
ldr data2, [src2], 8
|
||||||
|
cmp data1, data2
|
||||||
|
b.ne L(return)
|
||||||
|
|
||||||
|
subs limit, limit, 8
|
||||||
|
b.gt L(more16)
|
||||||
|
|
||||||
|
ldr data1, [src1, limit]
|
||||||
|
ldr data2, [src2, limit]
|
||||||
|
b L(return)
|
||||||
|
|
||||||
|
L(more16):
|
||||||
|
ldr data1, [src1], 8
|
||||||
|
ldr data2, [src2], 8
|
||||||
|
cmp data1, data2
|
||||||
|
bne L(return)
|
||||||
|
|
||||||
|
/* Jump directly to comparing the last 16 bytes for 32 byte (or less)
|
||||||
|
strings. */
|
||||||
|
subs limit, limit, 16
|
||||||
|
b.ls L(last_bytes)
|
||||||
|
|
||||||
|
/* We overlap loads between 0-32 bytes at either side of SRC1 when we
|
||||||
|
try to align, so limit it only to strings larger than 128 bytes. */
|
||||||
|
cmp limit, 96
|
||||||
|
b.ls L(loop16)
|
||||||
|
|
||||||
|
/* Align src1 and adjust src2 with bytes not yet done. */
|
||||||
|
and tmp1, src1, 15
|
||||||
|
add limit, limit, tmp1
|
||||||
|
sub src1, src1, tmp1
|
||||||
|
sub src2, src2, tmp1
|
||||||
|
|
||||||
|
/* Loop performing 16 bytes per iteration using aligned src1.
|
||||||
|
Limit is pre-decremented by 16 and must be larger than zero.
|
||||||
|
Exit if <= 16 bytes left to do or if the data is not equal. */
|
||||||
|
.p2align 4
|
||||||
|
L(loop16):
|
||||||
|
ldp data1, data1h, [src1], 16
|
||||||
|
ldp data2, data2h, [src2], 16
|
||||||
|
subs limit, limit, 16
|
||||||
|
ccmp data1, data2, 0, hi
|
||||||
|
ccmp data1h, data2h, 0, eq
|
||||||
|
b.eq L(loop16)
|
||||||
|
|
||||||
|
cmp data1, data2
|
||||||
|
bne L(return)
|
||||||
|
mov data1, data1h
|
||||||
|
mov data2, data2h
|
||||||
|
cmp data1, data2
|
||||||
|
bne L(return)
|
||||||
|
|
||||||
|
/* Compare last 1-16 bytes using unaligned access. */
|
||||||
|
L(last_bytes):
|
||||||
|
add src1, src1, limit
|
||||||
|
add src2, src2, limit
|
||||||
|
ldp data1, data1h, [src1]
|
||||||
|
ldp data2, data2h, [src2]
|
||||||
|
cmp data1, data2
|
||||||
|
bne L(return)
|
||||||
|
mov data1, data1h
|
||||||
|
mov data2, data2h
|
||||||
|
cmp data1, data2
|
||||||
|
|
||||||
|
/* Compare data bytes and set return value to 0, -1 or 1. */
|
||||||
|
L(return):
|
||||||
|
#ifndef __AARCH64EB__
|
||||||
|
rev data1, data1
|
||||||
|
rev data2, data2
|
||||||
|
#endif
|
||||||
|
cmp data1, data2
|
||||||
|
L(ret_eq):
|
||||||
|
cset result, ne
|
||||||
|
cneg result, result, lo
|
||||||
|
ret
|
||||||
|
|
||||||
|
.p2align 4
|
||||||
|
/* Compare up to 8 bytes. Limit is [-8..-1]. */
|
||||||
|
L(less8):
|
||||||
|
adds limit, limit, 4
|
||||||
|
b.lo L(less4)
|
||||||
|
ldr data1w, [src1], 4
|
||||||
|
ldr data2w, [src2], 4
|
||||||
|
cmp data1w, data2w
|
||||||
|
b.ne L(return)
|
||||||
|
sub limit, limit, 4
|
||||||
|
L(less4):
|
||||||
|
adds limit, limit, 4
|
||||||
|
beq L(ret_eq)
|
||||||
|
L(byte_loop):
|
||||||
|
ldrb data1w, [src1], 1
|
||||||
|
ldrb data2w, [src2], 1
|
||||||
|
subs limit, limit, 1
|
||||||
|
ccmp data1w, data2w, 0, ne /* NZCV = 0b0000. */
|
||||||
|
b.eq L(byte_loop)
|
||||||
|
sub result, data1w, data2w
|
||||||
|
ret
|
||||||
|
|
||||||
|
.size memcmp, . - memcmp
|
||||||
@@ -0,0 +1,232 @@
|
|||||||
|
/****************************************************************************
|
||||||
|
* libs/libc/machine/arm64/gnu/arch_memcpy.S
|
||||||
|
*
|
||||||
|
* Copyright (c) 2012-2013, Linaro Limited
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* * Neither the name of the Linaro nor the names of its
|
||||||
|
* contributors may be used to endorse or promote products derived
|
||||||
|
* from this software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015 ARM Ltd
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* 3. The name of the company may not be used to endorse or promote
|
||||||
|
* products derived from this software without specific prior written
|
||||||
|
* permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
||||||
|
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||||
|
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||||
|
* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||||
|
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
****************************************************************************/
|
||||||
|
|
||||||
|
/* Assumptions:
|
||||||
|
*
|
||||||
|
* ARMv8-a, AArch64, unaligned accesses.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define dstin x0
|
||||||
|
#define src x1
|
||||||
|
#define count x2
|
||||||
|
#define dst x3
|
||||||
|
#define srcend x4
|
||||||
|
#define dstend x5
|
||||||
|
#define A_l x6
|
||||||
|
#define A_lw w6
|
||||||
|
#define A_h x7
|
||||||
|
#define A_hw w7
|
||||||
|
#define B_l x8
|
||||||
|
#define B_lw w8
|
||||||
|
#define B_h x9
|
||||||
|
#define C_l x10
|
||||||
|
#define C_h x11
|
||||||
|
#define D_l x12
|
||||||
|
#define D_h x13
|
||||||
|
#define E_l src
|
||||||
|
#define E_h count
|
||||||
|
#define F_l srcend
|
||||||
|
#define F_h dst
|
||||||
|
#define tmp1 x9
|
||||||
|
|
||||||
|
#define L(l) .L ## l
|
||||||
|
|
||||||
|
.macro def_fn f p2align=0
|
||||||
|
.text
|
||||||
|
.p2align \p2align
|
||||||
|
.global \f
|
||||||
|
.type \f, %function
|
||||||
|
\f:
|
||||||
|
.endm
|
||||||
|
|
||||||
|
/* Copies are split into 3 main cases: small copies of up to 16 bytes,
|
||||||
|
medium copies of 17..96 bytes which are fully unrolled. Large copies
|
||||||
|
of more than 96 bytes align the destination and use an unrolled loop
|
||||||
|
processing 64 bytes per iteration.
|
||||||
|
Small and medium copies read all data before writing, allowing any
|
||||||
|
kind of overlap, and memmove tailcalls memcpy for these cases as
|
||||||
|
well as non-overlapping copies.
|
||||||
|
*/
|
||||||
|
|
||||||
|
def_fn memcpy p2align=6
|
||||||
|
prfm PLDL1KEEP, [src]
|
||||||
|
add srcend, src, count
|
||||||
|
add dstend, dstin, count
|
||||||
|
cmp count, 16
|
||||||
|
b.ls L(copy16)
|
||||||
|
cmp count, 96
|
||||||
|
b.hi L(copy_long)
|
||||||
|
|
||||||
|
/* Medium copies: 17..96 bytes. */
|
||||||
|
sub tmp1, count, 1
|
||||||
|
ldp A_l, A_h, [src]
|
||||||
|
tbnz tmp1, 6, L(copy96)
|
||||||
|
ldp D_l, D_h, [srcend, -16]
|
||||||
|
tbz tmp1, 5, 1f
|
||||||
|
ldp B_l, B_h, [src, 16]
|
||||||
|
ldp C_l, C_h, [srcend, -32]
|
||||||
|
stp B_l, B_h, [dstin, 16]
|
||||||
|
stp C_l, C_h, [dstend, -32]
|
||||||
|
1:
|
||||||
|
stp A_l, A_h, [dstin]
|
||||||
|
stp D_l, D_h, [dstend, -16]
|
||||||
|
ret
|
||||||
|
|
||||||
|
.p2align 4
|
||||||
|
/* Small copies: 0..16 bytes. */
|
||||||
|
L(copy16):
|
||||||
|
cmp count, 8
|
||||||
|
b.lo 1f
|
||||||
|
ldr A_l, [src]
|
||||||
|
ldr A_h, [srcend, -8]
|
||||||
|
str A_l, [dstin]
|
||||||
|
str A_h, [dstend, -8]
|
||||||
|
ret
|
||||||
|
.p2align 4
|
||||||
|
1:
|
||||||
|
tbz count, 2, 1f
|
||||||
|
ldr A_lw, [src]
|
||||||
|
ldr A_hw, [srcend, -4]
|
||||||
|
str A_lw, [dstin]
|
||||||
|
str A_hw, [dstend, -4]
|
||||||
|
ret
|
||||||
|
|
||||||
|
/* Copy 0..3 bytes. Use a branchless sequence that copies the same
|
||||||
|
byte 3 times if count==1, or the 2nd byte twice if count==2. */
|
||||||
|
1:
|
||||||
|
cbz count, 2f
|
||||||
|
lsr tmp1, count, 1
|
||||||
|
ldrb A_lw, [src]
|
||||||
|
ldrb A_hw, [srcend, -1]
|
||||||
|
ldrb B_lw, [src, tmp1]
|
||||||
|
strb A_lw, [dstin]
|
||||||
|
strb B_lw, [dstin, tmp1]
|
||||||
|
strb A_hw, [dstend, -1]
|
||||||
|
2: ret
|
||||||
|
|
||||||
|
.p2align 4
|
||||||
|
/* Copy 64..96 bytes. Copy 64 bytes from the start and
|
||||||
|
32 bytes from the end. */
|
||||||
|
L(copy96):
|
||||||
|
ldp B_l, B_h, [src, 16]
|
||||||
|
ldp C_l, C_h, [src, 32]
|
||||||
|
ldp D_l, D_h, [src, 48]
|
||||||
|
ldp E_l, E_h, [srcend, -32]
|
||||||
|
ldp F_l, F_h, [srcend, -16]
|
||||||
|
stp A_l, A_h, [dstin]
|
||||||
|
stp B_l, B_h, [dstin, 16]
|
||||||
|
stp C_l, C_h, [dstin, 32]
|
||||||
|
stp D_l, D_h, [dstin, 48]
|
||||||
|
stp E_l, E_h, [dstend, -32]
|
||||||
|
stp F_l, F_h, [dstend, -16]
|
||||||
|
ret
|
||||||
|
|
||||||
|
/* Align DST to 16 byte alignment so that we don't cross cache line
|
||||||
|
boundaries on both loads and stores. There are at least 96 bytes
|
||||||
|
to copy, so copy 16 bytes unaligned and then align. The loop
|
||||||
|
copies 64 bytes per iteration and prefetches one iteration ahead. */
|
||||||
|
|
||||||
|
.p2align 4
|
||||||
|
L(copy_long):
|
||||||
|
and tmp1, dstin, 15
|
||||||
|
bic dst, dstin, 15
|
||||||
|
ldp D_l, D_h, [src]
|
||||||
|
sub src, src, tmp1
|
||||||
|
add count, count, tmp1 /* Count is now 16 too large. */
|
||||||
|
ldp A_l, A_h, [src, 16]
|
||||||
|
stp D_l, D_h, [dstin]
|
||||||
|
ldp B_l, B_h, [src, 32]
|
||||||
|
ldp C_l, C_h, [src, 48]
|
||||||
|
ldp D_l, D_h, [src, 64]!
|
||||||
|
subs count, count, 128 + 16 /* Test and readjust count. */
|
||||||
|
b.ls 2f
|
||||||
|
1:
|
||||||
|
stp A_l, A_h, [dst, 16]
|
||||||
|
ldp A_l, A_h, [src, 16]
|
||||||
|
stp B_l, B_h, [dst, 32]
|
||||||
|
ldp B_l, B_h, [src, 32]
|
||||||
|
stp C_l, C_h, [dst, 48]
|
||||||
|
ldp C_l, C_h, [src, 48]
|
||||||
|
stp D_l, D_h, [dst, 64]!
|
||||||
|
ldp D_l, D_h, [src, 64]!
|
||||||
|
subs count, count, 64
|
||||||
|
b.hi 1b
|
||||||
|
|
||||||
|
/* Write the last full set of 64 bytes. The remainder is at most 64
|
||||||
|
bytes, so it is safe to always copy 64 bytes from the end even if
|
||||||
|
there is just 1 byte left. */
|
||||||
|
2:
|
||||||
|
ldp E_l, E_h, [srcend, -64]
|
||||||
|
stp A_l, A_h, [dst, 16]
|
||||||
|
ldp A_l, A_h, [srcend, -48]
|
||||||
|
stp B_l, B_h, [dst, 32]
|
||||||
|
ldp B_l, B_h, [srcend, -32]
|
||||||
|
stp C_l, C_h, [dst, 48]
|
||||||
|
ldp C_l, C_h, [srcend, -16]
|
||||||
|
stp D_l, D_h, [dst, 64]
|
||||||
|
stp E_l, E_h, [dstend, -64]
|
||||||
|
stp A_l, A_h, [dstend, -48]
|
||||||
|
stp B_l, B_h, [dstend, -32]
|
||||||
|
stp C_l, C_h, [dstend, -16]
|
||||||
|
ret
|
||||||
|
|
||||||
|
.size memcpy, . - memcpy
|
||||||
@@ -0,0 +1,157 @@
|
|||||||
|
/****************************************************************************
|
||||||
|
* libs/libc/machine/arm64/gnu/arch_memmove.S
|
||||||
|
*
|
||||||
|
* Copyright (c) 2013, Linaro Limited
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* * Neither the name of the Linaro nor the names of its
|
||||||
|
* contributors may be used to endorse or promote products derived
|
||||||
|
* from this software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015 ARM Ltd
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* 3. The name of the company may not be used to endorse or promote
|
||||||
|
* products derived from this software without specific prior written
|
||||||
|
* permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
||||||
|
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||||
|
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||||
|
* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||||
|
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
****************************************************************************/
|
||||||
|
|
||||||
|
/* Assumptions:
|
||||||
|
*
|
||||||
|
* ARMv8-a, AArch64, unaligned accesses
|
||||||
|
*/
|
||||||
|
|
||||||
|
.macro def_fn f p2align=0
|
||||||
|
.text
|
||||||
|
.p2align \p2align
|
||||||
|
.global \f
|
||||||
|
.type \f, %function
|
||||||
|
\f:
|
||||||
|
.endm
|
||||||
|
|
||||||
|
/* Parameters and result. */
|
||||||
|
#define dstin x0
|
||||||
|
#define src x1
|
||||||
|
#define count x2
|
||||||
|
#define srcend x3
|
||||||
|
#define dstend x4
|
||||||
|
#define tmp1 x5
|
||||||
|
#define A_l x6
|
||||||
|
#define A_h x7
|
||||||
|
#define B_l x8
|
||||||
|
#define B_h x9
|
||||||
|
#define C_l x10
|
||||||
|
#define C_h x11
|
||||||
|
#define D_l x12
|
||||||
|
#define D_h x13
|
||||||
|
#define E_l count
|
||||||
|
#define E_h tmp1
|
||||||
|
|
||||||
|
/* All memmoves up to 96 bytes are done by memcpy as it supports overlaps.
|
||||||
|
Larger backwards copies are also handled by memcpy. The only remaining
|
||||||
|
case is forward large copies. The destination is aligned, and an
|
||||||
|
unrolled loop processes 64 bytes per iteration.
|
||||||
|
*/
|
||||||
|
|
||||||
|
def_fn memmove, 6
|
||||||
|
sub tmp1, dstin, src
|
||||||
|
cmp count, 96
|
||||||
|
ccmp tmp1, count, 2, hi
|
||||||
|
b.hs memcpy
|
||||||
|
|
||||||
|
cbz tmp1, 3f
|
||||||
|
add dstend, dstin, count
|
||||||
|
add srcend, src, count
|
||||||
|
|
||||||
|
/* Align dstend to 16 byte alignment so that we don't cross cache line
|
||||||
|
boundaries on both loads and stores. There are at least 96 bytes
|
||||||
|
to copy, so copy 16 bytes unaligned and then align. The loop
|
||||||
|
copies 64 bytes per iteration and prefetches one iteration ahead. */
|
||||||
|
|
||||||
|
and tmp1, dstend, 15
|
||||||
|
ldp D_l, D_h, [srcend, -16]
|
||||||
|
sub srcend, srcend, tmp1
|
||||||
|
sub count, count, tmp1
|
||||||
|
ldp A_l, A_h, [srcend, -16]
|
||||||
|
stp D_l, D_h, [dstend, -16]
|
||||||
|
ldp B_l, B_h, [srcend, -32]
|
||||||
|
ldp C_l, C_h, [srcend, -48]
|
||||||
|
ldp D_l, D_h, [srcend, -64]!
|
||||||
|
sub dstend, dstend, tmp1
|
||||||
|
subs count, count, 128
|
||||||
|
b.ls 2f
|
||||||
|
nop
|
||||||
|
1:
|
||||||
|
stp A_l, A_h, [dstend, -16]
|
||||||
|
ldp A_l, A_h, [srcend, -16]
|
||||||
|
stp B_l, B_h, [dstend, -32]
|
||||||
|
ldp B_l, B_h, [srcend, -32]
|
||||||
|
stp C_l, C_h, [dstend, -48]
|
||||||
|
ldp C_l, C_h, [srcend, -48]
|
||||||
|
stp D_l, D_h, [dstend, -64]!
|
||||||
|
ldp D_l, D_h, [srcend, -64]!
|
||||||
|
subs count, count, 64
|
||||||
|
b.hi 1b
|
||||||
|
|
||||||
|
/* Write the last full set of 64 bytes. The remainder is at most 64
|
||||||
|
bytes, so it is safe to always copy 64 bytes from the start even if
|
||||||
|
there is just 1 byte left. */
|
||||||
|
2:
|
||||||
|
ldp E_l, E_h, [src, 48]
|
||||||
|
stp A_l, A_h, [dstend, -16]
|
||||||
|
ldp A_l, A_h, [src, 32]
|
||||||
|
stp B_l, B_h, [dstend, -32]
|
||||||
|
ldp B_l, B_h, [src, 16]
|
||||||
|
stp C_l, C_h, [dstend, -48]
|
||||||
|
ldp C_l, C_h, [src]
|
||||||
|
stp D_l, D_h, [dstend, -64]
|
||||||
|
stp E_l, E_h, [dstin, 48]
|
||||||
|
stp A_l, A_h, [dstin, 32]
|
||||||
|
stp B_l, B_h, [dstin, 16]
|
||||||
|
stp C_l, C_h, [dstin]
|
||||||
|
3: ret
|
||||||
|
|
||||||
|
.size memmove, . - memmove
|
||||||
@@ -0,0 +1,242 @@
|
|||||||
|
/****************************************************************************
|
||||||
|
* libs/libc/machine/arm64/gnu/arch_memset.S
|
||||||
|
*
|
||||||
|
* Copyright (c) 2012-2013, Linaro Limited
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* * Neither the name of the Linaro nor the names of its
|
||||||
|
* contributors may be used to endorse or promote products derived
|
||||||
|
* from this software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015 ARM Ltd
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* 3. The name of the company may not be used to endorse or promote
|
||||||
|
* products derived from this software without specific prior written
|
||||||
|
* permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
||||||
|
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||||
|
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||||
|
* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||||
|
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
****************************************************************************/
|
||||||
|
|
||||||
|
/* Assumptions:
|
||||||
|
*
|
||||||
|
* ARMv8-a, AArch64, unaligned accesses
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define dstin x0
|
||||||
|
#define val x1
|
||||||
|
#define valw w1
|
||||||
|
#define count x2
|
||||||
|
#define dst x3
|
||||||
|
#define dstend x4
|
||||||
|
#define tmp1 x5
|
||||||
|
#define tmp1w w5
|
||||||
|
#define tmp2 x6
|
||||||
|
#define tmp2w w6
|
||||||
|
#define zva_len x7
|
||||||
|
#define zva_lenw w7
|
||||||
|
|
||||||
|
#define L(l) .L ## l
|
||||||
|
|
||||||
|
.macro def_fn f p2align=0
|
||||||
|
.text
|
||||||
|
.p2align \p2align
|
||||||
|
.global \f
|
||||||
|
.type \f, %function
|
||||||
|
\f:
|
||||||
|
.endm
|
||||||
|
|
||||||
|
def_fn memset p2align=6
|
||||||
|
|
||||||
|
dup v0.16B, valw
|
||||||
|
add dstend, dstin, count
|
||||||
|
|
||||||
|
cmp count, 96
|
||||||
|
b.hi L(set_long)
|
||||||
|
cmp count, 16
|
||||||
|
b.hs L(set_medium)
|
||||||
|
mov val, v0.D[0]
|
||||||
|
|
||||||
|
/* Set 0..15 bytes. */
|
||||||
|
tbz count, 3, 1f
|
||||||
|
str val, [dstin]
|
||||||
|
str val, [dstend, -8]
|
||||||
|
ret
|
||||||
|
nop
|
||||||
|
1: tbz count, 2, 2f
|
||||||
|
str valw, [dstin]
|
||||||
|
str valw, [dstend, -4]
|
||||||
|
ret
|
||||||
|
2: cbz count, 3f
|
||||||
|
strb valw, [dstin]
|
||||||
|
tbz count, 1, 3f
|
||||||
|
strh valw, [dstend, -2]
|
||||||
|
3: ret
|
||||||
|
|
||||||
|
/* Set 17..96 bytes. */
|
||||||
|
L(set_medium):
|
||||||
|
str q0, [dstin]
|
||||||
|
tbnz count, 6, L(set96)
|
||||||
|
str q0, [dstend, -16]
|
||||||
|
tbz count, 5, 1f
|
||||||
|
str q0, [dstin, 16]
|
||||||
|
str q0, [dstend, -32]
|
||||||
|
1: ret
|
||||||
|
|
||||||
|
.p2align 4
|
||||||
|
/* Set 64..96 bytes. Write 64 bytes from the start and
|
||||||
|
32 bytes from the end. */
|
||||||
|
L(set96):
|
||||||
|
str q0, [dstin, 16]
|
||||||
|
stp q0, q0, [dstin, 32]
|
||||||
|
stp q0, q0, [dstend, -32]
|
||||||
|
ret
|
||||||
|
|
||||||
|
.p2align 3
|
||||||
|
nop
|
||||||
|
L(set_long):
|
||||||
|
and valw, valw, 255
|
||||||
|
bic dst, dstin, 15
|
||||||
|
str q0, [dstin]
|
||||||
|
cmp count, 256
|
||||||
|
ccmp valw, 0, 0, cs
|
||||||
|
b.eq L(try_zva)
|
||||||
|
L(no_zva):
|
||||||
|
sub count, dstend, dst /* Count is 16 too large. */
|
||||||
|
sub dst, dst, 16 /* Dst is biased by -32. */
|
||||||
|
sub count, count, 64 + 16 /* Adjust count and bias for loop. */
|
||||||
|
1: stp q0, q0, [dst, 32]
|
||||||
|
stp q0, q0, [dst, 64]!
|
||||||
|
L(tail64):
|
||||||
|
subs count, count, 64
|
||||||
|
b.hi 1b
|
||||||
|
2: stp q0, q0, [dstend, -64]
|
||||||
|
stp q0, q0, [dstend, -32]
|
||||||
|
ret
|
||||||
|
|
||||||
|
.p2align 3
|
||||||
|
L(try_zva):
|
||||||
|
mrs tmp1, dczid_el0
|
||||||
|
tbnz tmp1w, 4, L(no_zva)
|
||||||
|
and tmp1w, tmp1w, 15
|
||||||
|
cmp tmp1w, 4 /* ZVA size is 64 bytes. */
|
||||||
|
b.ne L(zva_128)
|
||||||
|
|
||||||
|
/* Write the first and last 64 byte aligned block using stp rather
|
||||||
|
than using DC ZVA. This is faster on some cores.
|
||||||
|
*/
|
||||||
|
L(zva_64):
|
||||||
|
str q0, [dst, 16]
|
||||||
|
stp q0, q0, [dst, 32]
|
||||||
|
bic dst, dst, 63
|
||||||
|
stp q0, q0, [dst, 64]
|
||||||
|
stp q0, q0, [dst, 96]
|
||||||
|
sub count, dstend, dst /* Count is now 128 too large. */
|
||||||
|
sub count, count, 128+64+64 /* Adjust count and bias for loop. */
|
||||||
|
add dst, dst, 128
|
||||||
|
nop
|
||||||
|
1: dc zva, dst
|
||||||
|
add dst, dst, 64
|
||||||
|
subs count, count, 64
|
||||||
|
b.hi 1b
|
||||||
|
stp q0, q0, [dst, 0]
|
||||||
|
stp q0, q0, [dst, 32]
|
||||||
|
stp q0, q0, [dstend, -64]
|
||||||
|
stp q0, q0, [dstend, -32]
|
||||||
|
ret
|
||||||
|
|
||||||
|
.p2align 3
|
||||||
|
L(zva_128):
|
||||||
|
cmp tmp1w, 5 /* ZVA size is 128 bytes. */
|
||||||
|
b.ne L(zva_other)
|
||||||
|
|
||||||
|
str q0, [dst, 16]
|
||||||
|
stp q0, q0, [dst, 32]
|
||||||
|
stp q0, q0, [dst, 64]
|
||||||
|
stp q0, q0, [dst, 96]
|
||||||
|
bic dst, dst, 127
|
||||||
|
sub count, dstend, dst /* Count is now 128 too large. */
|
||||||
|
sub count, count, 128+128 /* Adjust count and bias for loop. */
|
||||||
|
add dst, dst, 128
|
||||||
|
1: dc zva, dst
|
||||||
|
add dst, dst, 128
|
||||||
|
subs count, count, 128
|
||||||
|
b.hi 1b
|
||||||
|
stp q0, q0, [dstend, -128]
|
||||||
|
stp q0, q0, [dstend, -96]
|
||||||
|
stp q0, q0, [dstend, -64]
|
||||||
|
stp q0, q0, [dstend, -32]
|
||||||
|
ret
|
||||||
|
|
||||||
|
L(zva_other):
|
||||||
|
mov tmp2w, 4
|
||||||
|
lsl zva_lenw, tmp2w, tmp1w
|
||||||
|
add tmp1, zva_len, 64 /* Max alignment bytes written. */
|
||||||
|
cmp count, tmp1
|
||||||
|
blo L(no_zva)
|
||||||
|
|
||||||
|
sub tmp2, zva_len, 1
|
||||||
|
add tmp1, dst, zva_len
|
||||||
|
add dst, dst, 16
|
||||||
|
subs count, tmp1, dst /* Actual alignment bytes to write. */
|
||||||
|
bic tmp1, tmp1, tmp2 /* Aligned dc zva start address. */
|
||||||
|
beq 2f
|
||||||
|
1: stp q0, q0, [dst], 64
|
||||||
|
stp q0, q0, [dst, -32]
|
||||||
|
subs count, count, 64
|
||||||
|
b.hi 1b
|
||||||
|
2: mov dst, tmp1
|
||||||
|
sub count, dstend, tmp1 /* Remaining bytes to write. */
|
||||||
|
subs count, count, zva_len
|
||||||
|
b.lo 4f
|
||||||
|
3: dc zva, dst
|
||||||
|
add dst, dst, zva_len
|
||||||
|
subs count, count, zva_len
|
||||||
|
b.hs 3b
|
||||||
|
4: add count, count, zva_len
|
||||||
|
sub dst, dst, 32 /* Bias dst for tail loop. */
|
||||||
|
b L(tail64)
|
||||||
|
|
||||||
|
.size memset, . - memset
|
||||||
@@ -0,0 +1,77 @@
|
|||||||
|
/****************************************************************************
|
||||||
|
* libs/libc/machine/arm64/gnu/arch_setjmp.S
|
||||||
|
*
|
||||||
|
* Copyright (c) 2011, 2012 ARM Ltd
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* 3. The name of the company may not be used to endorse or promote
|
||||||
|
* products derived from this software without specific prior written
|
||||||
|
* permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
||||||
|
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||||
|
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||||
|
* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||||
|
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
****************************************************************************/
|
||||||
|
|
||||||
|
#define GPR_LAYOUT \
|
||||||
|
REG_PAIR (x19, x20, 0); \
|
||||||
|
REG_PAIR (x21, x22, 16); \
|
||||||
|
REG_PAIR (x23, x24, 32); \
|
||||||
|
REG_PAIR (x25, x26, 48); \
|
||||||
|
REG_PAIR (x27, x28, 64); \
|
||||||
|
REG_PAIR (x29, x30, 80); \
|
||||||
|
REG_ONE (x16, 96)
|
||||||
|
|
||||||
|
#define FPR_LAYOUT \
|
||||||
|
REG_PAIR ( d8, d9, 112); \
|
||||||
|
REG_PAIR (d10, d11, 128); \
|
||||||
|
REG_PAIR (d12, d13, 144); \
|
||||||
|
REG_PAIR (d14, d15, 160);
|
||||||
|
|
||||||
|
// int setjmp (jmp_buf)
|
||||||
|
.global setjmp
|
||||||
|
.type setjmp, %function
|
||||||
|
setjmp:
|
||||||
|
mov x16, sp
|
||||||
|
#define REG_PAIR(REG1, REG2, OFFS) stp REG1, REG2, [x0, OFFS]
|
||||||
|
#define REG_ONE(REG1, OFFS) str REG1, [x0, OFFS]
|
||||||
|
GPR_LAYOUT
|
||||||
|
FPR_LAYOUT
|
||||||
|
#undef REG_PAIR
|
||||||
|
#undef REG_ONE
|
||||||
|
mov w0, #0
|
||||||
|
ret
|
||||||
|
.size setjmp, .-setjmp
|
||||||
|
|
||||||
|
// void longjmp (jmp_buf, int) __attribute__ ((noreturn))
|
||||||
|
.global longjmp
|
||||||
|
.type longjmp, %function
|
||||||
|
longjmp:
|
||||||
|
#define REG_PAIR(REG1, REG2, OFFS) ldp REG1, REG2, [x0, OFFS]
|
||||||
|
#define REG_ONE(REG1, OFFS) ldr REG1, [x0, OFFS]
|
||||||
|
GPR_LAYOUT
|
||||||
|
FPR_LAYOUT
|
||||||
|
#undef REG_PAIR
|
||||||
|
#undef REG_ONE
|
||||||
|
mov sp, x16
|
||||||
|
cmp w1, #0
|
||||||
|
cinc w0, w1, eq
|
||||||
|
// use br not ret, as ret is guaranteed to mispredict
|
||||||
|
br x30
|
||||||
|
.size longjmp, .-longjmp
|
||||||
@@ -0,0 +1,161 @@
|
|||||||
|
/****************************************************************************
|
||||||
|
* libs/libc/machine/arm64/gnu/arch_strchr.S
|
||||||
|
*
|
||||||
|
* Copyright (c) 2014, ARM Limited
|
||||||
|
* All rights Reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
* * Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* * Neither the name of the company nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this
|
||||||
|
* software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
****************************************************************************/
|
||||||
|
|
||||||
|
/* Assumptions:
|
||||||
|
*
|
||||||
|
* ARMv8-a, AArch64
|
||||||
|
* Neon Available.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Arguments and results. */
|
||||||
|
#define srcin x0
|
||||||
|
#define chrin w1
|
||||||
|
|
||||||
|
#define result x0
|
||||||
|
|
||||||
|
#define src x2
|
||||||
|
#define tmp1 x3
|
||||||
|
#define wtmp2 w4
|
||||||
|
#define tmp3 x5
|
||||||
|
|
||||||
|
#define vrepchr v0
|
||||||
|
#define vdata1 v1
|
||||||
|
#define vdata2 v2
|
||||||
|
#define vhas_nul1 v3
|
||||||
|
#define vhas_nul2 v4
|
||||||
|
#define vhas_chr1 v5
|
||||||
|
#define vhas_chr2 v6
|
||||||
|
#define vrepmask_0 v7
|
||||||
|
#define vrepmask_c v16
|
||||||
|
#define vend1 v17
|
||||||
|
#define vend2 v18
|
||||||
|
|
||||||
|
/* Core algorithm.
|
||||||
|
|
||||||
|
For each 32-byte hunk we calculate a 64-bit syndrome value, with
|
||||||
|
two bits per byte (LSB is always in bits 0 and 1, for both big
|
||||||
|
and little-endian systems). For each tuple, bit 0 is set iff
|
||||||
|
the relevant byte matched the requested character; bit 1 is set
|
||||||
|
iff the relevant byte matched the NUL end of string (we trigger
|
||||||
|
off bit0 for the special case of looking for NUL). Since the bits
|
||||||
|
in the syndrome reflect exactly the order in which things occur
|
||||||
|
in the original string a count_trailing_zeros() operation will
|
||||||
|
identify exactly which byte is causing the termination, and why. */
|
||||||
|
|
||||||
|
/* Locals and temporaries. */
|
||||||
|
|
||||||
|
.macro def_fn f p2align=0
|
||||||
|
.text
|
||||||
|
.p2align \p2align
|
||||||
|
.global \f
|
||||||
|
.type \f, %function
|
||||||
|
\f:
|
||||||
|
.endm
|
||||||
|
|
||||||
|
def_fn strchr
|
||||||
|
/* Magic constant 0x40100401 to allow us to identify which lane
|
||||||
|
matches the requested byte. Magic constant 0x80200802 used
|
||||||
|
similarly for NUL termination. */
|
||||||
|
mov wtmp2, #0x0401
|
||||||
|
movk wtmp2, #0x4010, lsl #16
|
||||||
|
dup vrepchr.16b, chrin
|
||||||
|
bic src, srcin, #31 /* Work with aligned 32-byte hunks. */
|
||||||
|
dup vrepmask_c.4s, wtmp2
|
||||||
|
ands tmp1, srcin, #31
|
||||||
|
add vrepmask_0.4s, vrepmask_c.4s, vrepmask_c.4s /* equiv: lsl #1 */
|
||||||
|
b.eq .Lloop
|
||||||
|
|
||||||
|
/* Input string is not 32-byte aligned. Rather than forcing
|
||||||
|
the padding bytes to a safe value, we calculate the syndrome
|
||||||
|
for all the bytes, but then mask off those bits of the
|
||||||
|
syndrome that are related to the padding. */
|
||||||
|
ld1 {vdata1.16b, vdata2.16b}, [src], #32
|
||||||
|
neg tmp1, tmp1
|
||||||
|
cmeq vhas_nul1.16b, vdata1.16b, #0
|
||||||
|
cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
|
||||||
|
cmeq vhas_nul2.16b, vdata2.16b, #0
|
||||||
|
cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
|
||||||
|
and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b
|
||||||
|
and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b
|
||||||
|
and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b
|
||||||
|
and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b
|
||||||
|
orr vend1.16b, vhas_nul1.16b, vhas_chr1.16b
|
||||||
|
orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b
|
||||||
|
lsl tmp1, tmp1, #1
|
||||||
|
addp vend1.16b, vend1.16b, vend2.16b // 256->128
|
||||||
|
mov tmp3, #~0
|
||||||
|
addp vend1.16b, vend1.16b, vend2.16b // 128->64
|
||||||
|
lsr tmp1, tmp3, tmp1
|
||||||
|
|
||||||
|
mov tmp3, vend1.2d[0]
|
||||||
|
bic tmp1, tmp3, tmp1 // Mask padding bits.
|
||||||
|
cbnz tmp1, .Ltail
|
||||||
|
|
||||||
|
.Lloop:
|
||||||
|
ld1 {vdata1.16b, vdata2.16b}, [src], #32
|
||||||
|
cmeq vhas_nul1.16b, vdata1.16b, #0
|
||||||
|
cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
|
||||||
|
cmeq vhas_nul2.16b, vdata2.16b, #0
|
||||||
|
cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
|
||||||
|
/* Use a fast check for the termination condition. */
|
||||||
|
orr vend1.16b, vhas_nul1.16b, vhas_chr1.16b
|
||||||
|
orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b
|
||||||
|
orr vend1.16b, vend1.16b, vend2.16b
|
||||||
|
addp vend1.2d, vend1.2d, vend1.2d
|
||||||
|
mov tmp1, vend1.2d[0]
|
||||||
|
cbz tmp1, .Lloop
|
||||||
|
|
||||||
|
/* Termination condition found. Now need to establish exactly why
|
||||||
|
we terminated. */
|
||||||
|
and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b
|
||||||
|
and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b
|
||||||
|
and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b
|
||||||
|
and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b
|
||||||
|
orr vend1.16b, vhas_nul1.16b, vhas_chr1.16b
|
||||||
|
orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b
|
||||||
|
addp vend1.16b, vend1.16b, vend2.16b // 256->128
|
||||||
|
addp vend1.16b, vend1.16b, vend2.16b // 128->64
|
||||||
|
|
||||||
|
mov tmp1, vend1.2d[0]
|
||||||
|
.Ltail:
|
||||||
|
/* Count the trailing zeros, by bit reversing... */
|
||||||
|
rbit tmp1, tmp1
|
||||||
|
/* Re-bias source. */
|
||||||
|
sub src, src, #32
|
||||||
|
clz tmp1, tmp1 /* And counting the leading zeros. */
|
||||||
|
/* Tmp1 is even if the target charager was found first. Otherwise
|
||||||
|
we've found the end of string and we weren't looking for NUL. */
|
||||||
|
tst tmp1, #1
|
||||||
|
add result, src, tmp1, lsr #1
|
||||||
|
csel result, result, xzr, eq
|
||||||
|
ret
|
||||||
|
|
||||||
|
.size strchr, . - strchr
|
||||||
@@ -0,0 +1,146 @@
|
|||||||
|
/****************************************************************************
|
||||||
|
* libs/libc/machine/arm64/gnu/arch_strchrnul.S
|
||||||
|
*
|
||||||
|
* Copyright (c) 2014, ARM Limited
|
||||||
|
* All rights Reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
* * Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* * Neither the name of the company nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this
|
||||||
|
* software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
****************************************************************************/
|
||||||
|
|
||||||
|
/* Assumptions:
|
||||||
|
*
|
||||||
|
* ARMv8-a, AArch64
|
||||||
|
* Neon Available.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Arguments and results. */
|
||||||
|
#define srcin x0
|
||||||
|
#define chrin w1
|
||||||
|
|
||||||
|
#define result x0
|
||||||
|
|
||||||
|
#define src x2
|
||||||
|
#define tmp1 x3
|
||||||
|
#define wtmp2 w4
|
||||||
|
#define tmp3 x5
|
||||||
|
|
||||||
|
#define vrepchr v0
|
||||||
|
#define vdata1 v1
|
||||||
|
#define vdata2 v2
|
||||||
|
#define vhas_nul1 v3
|
||||||
|
#define vhas_nul2 v4
|
||||||
|
#define vhas_chr1 v5
|
||||||
|
#define vhas_chr2 v6
|
||||||
|
#define vrepmask v7
|
||||||
|
#define vend1 v16
|
||||||
|
|
||||||
|
/* Core algorithm.
|
||||||
|
|
||||||
|
For each 32-byte hunk we calculate a 64-bit syndrome value, with
|
||||||
|
two bits per byte (LSB is always in bits 0 and 1, for both big
|
||||||
|
and little-endian systems). For each tuple, bit 0 is set iff
|
||||||
|
the relevant byte matched the requested character or nul. Since the
|
||||||
|
bits in the syndrome reflect exactly the order in which things occur
|
||||||
|
in the original string a count_trailing_zeros() operation will
|
||||||
|
identify exactly which byte is causing the termination. */
|
||||||
|
|
||||||
|
/* Locals and temporaries. */
|
||||||
|
|
||||||
|
.macro def_fn f p2align=0
|
||||||
|
.text
|
||||||
|
.p2align \p2align
|
||||||
|
.global \f
|
||||||
|
.type \f, %function
|
||||||
|
\f:
|
||||||
|
.endm
|
||||||
|
|
||||||
|
def_fn strchrnul
|
||||||
|
/* Magic constant 0x40100401 to allow us to identify which lane
|
||||||
|
matches the termination condition. */
|
||||||
|
mov wtmp2, #0x0401
|
||||||
|
movk wtmp2, #0x4010, lsl #16
|
||||||
|
dup vrepchr.16b, chrin
|
||||||
|
bic src, srcin, #31 /* Work with aligned 32-byte hunks. */
|
||||||
|
dup vrepmask.4s, wtmp2
|
||||||
|
ands tmp1, srcin, #31
|
||||||
|
b.eq .Lloop
|
||||||
|
|
||||||
|
/* Input string is not 32-byte aligned. Rather than forcing
|
||||||
|
the padding bytes to a safe value, we calculate the syndrome
|
||||||
|
for all the bytes, but then mask off those bits of the
|
||||||
|
syndrome that are related to the padding. */
|
||||||
|
ld1 {vdata1.16b, vdata2.16b}, [src], #32
|
||||||
|
neg tmp1, tmp1
|
||||||
|
cmeq vhas_nul1.16b, vdata1.16b, #0
|
||||||
|
cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
|
||||||
|
cmeq vhas_nul2.16b, vdata2.16b, #0
|
||||||
|
cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
|
||||||
|
orr vhas_chr1.16b, vhas_chr1.16b, vhas_nul1.16b
|
||||||
|
orr vhas_chr2.16b, vhas_chr2.16b, vhas_nul2.16b
|
||||||
|
and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
|
||||||
|
and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
|
||||||
|
lsl tmp1, tmp1, #1
|
||||||
|
addp vend1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128
|
||||||
|
mov tmp3, #~0
|
||||||
|
addp vend1.16b, vend1.16b, vend1.16b // 128->64
|
||||||
|
lsr tmp1, tmp3, tmp1
|
||||||
|
|
||||||
|
mov tmp3, vend1.2d[0]
|
||||||
|
bic tmp1, tmp3, tmp1 // Mask padding bits.
|
||||||
|
cbnz tmp1, .Ltail
|
||||||
|
|
||||||
|
.Lloop:
|
||||||
|
ld1 {vdata1.16b, vdata2.16b}, [src], #32
|
||||||
|
cmeq vhas_nul1.16b, vdata1.16b, #0
|
||||||
|
cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
|
||||||
|
cmeq vhas_nul2.16b, vdata2.16b, #0
|
||||||
|
cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
|
||||||
|
/* Use a fast check for the termination condition. */
|
||||||
|
orr vhas_chr1.16b, vhas_nul1.16b, vhas_chr1.16b
|
||||||
|
orr vhas_chr2.16b, vhas_nul2.16b, vhas_chr2.16b
|
||||||
|
orr vend1.16b, vhas_chr1.16b, vhas_chr2.16b
|
||||||
|
addp vend1.2d, vend1.2d, vend1.2d
|
||||||
|
mov tmp1, vend1.2d[0]
|
||||||
|
cbz tmp1, .Lloop
|
||||||
|
|
||||||
|
/* Termination condition found. Now need to establish exactly why
|
||||||
|
we terminated. */
|
||||||
|
and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
|
||||||
|
and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
|
||||||
|
addp vend1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128
|
||||||
|
addp vend1.16b, vend1.16b, vend1.16b // 128->64
|
||||||
|
|
||||||
|
mov tmp1, vend1.2d[0]
|
||||||
|
.Ltail:
|
||||||
|
/* Count the trailing zeros, by bit reversing... */
|
||||||
|
rbit tmp1, tmp1
|
||||||
|
/* Re-bias source. */
|
||||||
|
sub src, src, #32
|
||||||
|
clz tmp1, tmp1 /* ... and counting the leading zeros. */
|
||||||
|
/* tmp1 is twice the offset into the fragment. */
|
||||||
|
add result, src, tmp1, lsr #1
|
||||||
|
ret
|
||||||
|
|
||||||
|
.size strchrnul, . - strchrnul
|
||||||
@@ -0,0 +1,205 @@
|
|||||||
|
/****************************************************************************
|
||||||
|
* libs/libc/machine/arm64/gnu/arch_strcmp.S
|
||||||
|
*
|
||||||
|
* Copyright (c) 2012-2018, Linaro Limited
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* * Neither the name of the Linaro nor the names of its
|
||||||
|
* contributors may be used to endorse or promote products derived
|
||||||
|
* from this software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
****************************************************************************/
|
||||||
|
|
||||||
|
/* Assumptions:
|
||||||
|
*
|
||||||
|
* ARMv8-a, AArch64
|
||||||
|
*/
|
||||||
|
|
||||||
|
.macro def_fn f p2align=0
|
||||||
|
.text
|
||||||
|
.p2align \p2align
|
||||||
|
.global \f
|
||||||
|
.type \f, %function
|
||||||
|
\f:
|
||||||
|
.endm
|
||||||
|
|
||||||
|
#define L(label) .L ## label
|
||||||
|
|
||||||
|
#define REP8_01 0x0101010101010101
|
||||||
|
#define REP8_7f 0x7f7f7f7f7f7f7f7f
|
||||||
|
#define REP8_80 0x8080808080808080
|
||||||
|
|
||||||
|
/* Parameters and result. */
|
||||||
|
#define src1 x0
|
||||||
|
#define src2 x1
|
||||||
|
#define result x0
|
||||||
|
|
||||||
|
/* Internal variables. */
|
||||||
|
#define data1 x2
|
||||||
|
#define data1w w2
|
||||||
|
#define data2 x3
|
||||||
|
#define data2w w3
|
||||||
|
#define has_nul x4
|
||||||
|
#define diff x5
|
||||||
|
#define syndrome x6
|
||||||
|
#define tmp1 x7
|
||||||
|
#define tmp2 x8
|
||||||
|
#define tmp3 x9
|
||||||
|
#define zeroones x10
|
||||||
|
#define pos x11
|
||||||
|
|
||||||
|
/* Start of performance-critical section -- one 64B cache line. */
|
||||||
|
def_fn strcmp p2align=6
|
||||||
|
eor tmp1, src1, src2
|
||||||
|
mov zeroones, #REP8_01
|
||||||
|
tst tmp1, #7
|
||||||
|
b.ne L(misaligned8)
|
||||||
|
ands tmp1, src1, #7
|
||||||
|
b.ne L(mutual_align)
|
||||||
|
/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
|
||||||
|
(=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
|
||||||
|
can be done in parallel across the entire word. */
|
||||||
|
L(loop_aligned):
|
||||||
|
ldr data1, [src1], #8
|
||||||
|
ldr data2, [src2], #8
|
||||||
|
L(start_realigned):
|
||||||
|
sub tmp1, data1, zeroones
|
||||||
|
orr tmp2, data1, #REP8_7f
|
||||||
|
eor diff, data1, data2 /* Non-zero if differences found. */
|
||||||
|
bic has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
|
||||||
|
orr syndrome, diff, has_nul
|
||||||
|
cbz syndrome, L(loop_aligned)
|
||||||
|
/* End of performance-critical section -- one 64B cache line. */
|
||||||
|
|
||||||
|
L(end):
|
||||||
|
#ifndef __AARCH64EB__
|
||||||
|
rev syndrome, syndrome
|
||||||
|
rev data1, data1
|
||||||
|
/* The MS-non-zero bit of the syndrome marks either the first bit
|
||||||
|
that is different, or the top bit of the first zero byte.
|
||||||
|
Shifting left now will bring the critical information into the
|
||||||
|
top bits. */
|
||||||
|
clz pos, syndrome
|
||||||
|
rev data2, data2
|
||||||
|
lsl data1, data1, pos
|
||||||
|
lsl data2, data2, pos
|
||||||
|
/* But we need to zero-extend (char is unsigned) the value and then
|
||||||
|
perform a signed 32-bit subtraction. */
|
||||||
|
lsr data1, data1, #56
|
||||||
|
sub result, data1, data2, lsr #56
|
||||||
|
ret
|
||||||
|
#else
|
||||||
|
/* For big-endian we cannot use the trick with the syndrome value
|
||||||
|
as carry-propagation can corrupt the upper bits if the trailing
|
||||||
|
bytes in the string contain 0x01. */
|
||||||
|
/* However, if there is no NUL byte in the dword, we can generate
|
||||||
|
the result directly. We can't just subtract the bytes as the
|
||||||
|
MSB might be significant. */
|
||||||
|
cbnz has_nul, 1f
|
||||||
|
cmp data1, data2
|
||||||
|
cset result, ne
|
||||||
|
cneg result, result, lo
|
||||||
|
ret
|
||||||
|
1:
|
||||||
|
/* Re-compute the NUL-byte detection, using a byte-reversed value. */
|
||||||
|
rev tmp3, data1
|
||||||
|
sub tmp1, tmp3, zeroones
|
||||||
|
orr tmp2, tmp3, #REP8_7f
|
||||||
|
bic has_nul, tmp1, tmp2
|
||||||
|
rev has_nul, has_nul
|
||||||
|
orr syndrome, diff, has_nul
|
||||||
|
clz pos, syndrome
|
||||||
|
/* The MS-non-zero bit of the syndrome marks either the first bit
|
||||||
|
that is different, or the top bit of the first zero byte.
|
||||||
|
Shifting left now will bring the critical information into the
|
||||||
|
top bits. */
|
||||||
|
lsl data1, data1, pos
|
||||||
|
lsl data2, data2, pos
|
||||||
|
/* But we need to zero-extend (char is unsigned) the value and then
|
||||||
|
perform a signed 32-bit subtraction. */
|
||||||
|
lsr data1, data1, #56
|
||||||
|
sub result, data1, data2, lsr #56
|
||||||
|
ret
|
||||||
|
#endif
|
||||||
|
|
||||||
|
L(mutual_align):
|
||||||
|
/* Sources are mutually aligned, but are not currently at an
|
||||||
|
alignment boundary. Round down the addresses and then mask off
|
||||||
|
the bytes that preceed the start point. */
|
||||||
|
bic src1, src1, #7
|
||||||
|
bic src2, src2, #7
|
||||||
|
lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */
|
||||||
|
ldr data1, [src1], #8
|
||||||
|
neg tmp1, tmp1 /* Bits to alignment -64. */
|
||||||
|
ldr data2, [src2], #8
|
||||||
|
mov tmp2, #~0
|
||||||
|
#ifdef __AARCH64EB__
|
||||||
|
/* Big-endian. Early bytes are at MSB. */
|
||||||
|
lsl tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
|
||||||
|
#else
|
||||||
|
/* Little-endian. Early bytes are at LSB. */
|
||||||
|
lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
|
||||||
|
#endif
|
||||||
|
orr data1, data1, tmp2
|
||||||
|
orr data2, data2, tmp2
|
||||||
|
b L(start_realigned)
|
||||||
|
|
||||||
|
L(misaligned8):
|
||||||
|
/* Align SRC1 to 8 bytes and then compare 8 bytes at a time, always
|
||||||
|
checking to make sure that we don't access beyond page boundary in
|
||||||
|
SRC2. */
|
||||||
|
tst src1, #7
|
||||||
|
b.eq L(loop_misaligned)
|
||||||
|
L(do_misaligned):
|
||||||
|
ldrb data1w, [src1], #1
|
||||||
|
ldrb data2w, [src2], #1
|
||||||
|
cmp data1w, #1
|
||||||
|
ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
|
||||||
|
b.ne L(done)
|
||||||
|
tst src1, #7
|
||||||
|
b.ne L(do_misaligned)
|
||||||
|
|
||||||
|
L(loop_misaligned):
|
||||||
|
/* Test if we are within the last dword of the end of a 4K page. If
|
||||||
|
yes then jump back to the misaligned loop to copy a byte at a time. */
|
||||||
|
and tmp1, src2, #0xff8
|
||||||
|
eor tmp1, tmp1, #0xff8
|
||||||
|
cbz tmp1, L(do_misaligned)
|
||||||
|
ldr data1, [src1], #8
|
||||||
|
ldr data2, [src2], #8
|
||||||
|
|
||||||
|
sub tmp1, data1, zeroones
|
||||||
|
orr tmp2, data1, #REP8_7f
|
||||||
|
eor diff, data1, data2 /* Non-zero if differences found. */
|
||||||
|
bic has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
|
||||||
|
orr syndrome, diff, has_nul
|
||||||
|
cbz syndrome, L(loop_misaligned)
|
||||||
|
b L(end)
|
||||||
|
|
||||||
|
L(done):
|
||||||
|
sub result, data1, data2
|
||||||
|
ret
|
||||||
|
.size strcmp, .-strcmp
|
||||||
@@ -0,0 +1,338 @@
|
|||||||
|
/****************************************************************************
|
||||||
|
* libs/libc/machine/arm64/gnu/arch_strcpy.S
|
||||||
|
*
|
||||||
|
* Copyright (c) 2013, 2014, 2015 ARM Ltd.
|
||||||
|
* All rights Reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
* * Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* * Neither the name of the company nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this
|
||||||
|
* software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
****************************************************************************/
|
||||||
|
|
||||||
|
/* Assumptions:
|
||||||
|
*
|
||||||
|
* ARMv8-a, AArch64, unaligned accesses, min page size 4k.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* To build as stpcpy, define BUILD_STPCPY before compiling this file.
|
||||||
|
|
||||||
|
To test the page crossing code path more thoroughly, compile with
|
||||||
|
-DSTRCPY_TEST_PAGE_CROSS - this will force all copies through the slower
|
||||||
|
entry path. This option is not intended for production use. */
|
||||||
|
|
||||||
|
/* Arguments and results. */
|
||||||
|
#define dstin x0
|
||||||
|
#define srcin x1
|
||||||
|
|
||||||
|
/* Locals and temporaries. */
|
||||||
|
#define src x2
|
||||||
|
#define dst x3
|
||||||
|
#define data1 x4
|
||||||
|
#define data1w w4
|
||||||
|
#define data2 x5
|
||||||
|
#define data2w w5
|
||||||
|
#define has_nul1 x6
|
||||||
|
#define has_nul2 x7
|
||||||
|
#define tmp1 x8
|
||||||
|
#define tmp2 x9
|
||||||
|
#define tmp3 x10
|
||||||
|
#define tmp4 x11
|
||||||
|
#define zeroones x12
|
||||||
|
#define data1a x13
|
||||||
|
#define data2a x14
|
||||||
|
#define pos x15
|
||||||
|
#define len x16
|
||||||
|
#define to_align x17
|
||||||
|
|
||||||
|
#ifdef BUILD_STPCPY
|
||||||
|
#define STRCPY stpcpy
|
||||||
|
#else
|
||||||
|
#define STRCPY strcpy
|
||||||
|
#endif
|
||||||
|
|
||||||
|
.macro def_fn f p2align=0
|
||||||
|
.text
|
||||||
|
.p2align \p2align
|
||||||
|
.global \f
|
||||||
|
.type \f, %function
|
||||||
|
\f:
|
||||||
|
.endm
|
||||||
|
|
||||||
|
/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
|
||||||
|
(=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
|
||||||
|
can be done in parallel across the entire word. */
|
||||||
|
|
||||||
|
#define REP8_01 0x0101010101010101
|
||||||
|
#define REP8_7f 0x7f7f7f7f7f7f7f7f
|
||||||
|
#define REP8_80 0x8080808080808080
|
||||||
|
|
||||||
|
/* AArch64 systems have a minimum page size of 4k. We can do a quick
|
||||||
|
page size check for crossing this boundary on entry and if we
|
||||||
|
do not, then we can short-circuit much of the entry code. We
|
||||||
|
expect early page-crossing strings to be rare (probability of
|
||||||
|
16/MIN_PAGE_SIZE ~= 0.4%), so the branch should be quite
|
||||||
|
predictable, even with random strings.
|
||||||
|
|
||||||
|
We don't bother checking for larger page sizes, the cost of setting
|
||||||
|
up the correct page size is just not worth the extra gain from
|
||||||
|
a small reduction in the cases taking the slow path. Note that
|
||||||
|
we only care about whether the first fetch, which may be
|
||||||
|
misaligned, crosses a page boundary - after that we move to aligned
|
||||||
|
fetches for the remainder of the string. */
|
||||||
|
|
||||||
|
#ifdef STRCPY_TEST_PAGE_CROSS
|
||||||
|
/* Make everything that isn't Qword aligned look like a page cross. */
|
||||||
|
#define MIN_PAGE_P2 4
|
||||||
|
#else
|
||||||
|
#define MIN_PAGE_P2 12
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define MIN_PAGE_SIZE (1 << MIN_PAGE_P2)
|
||||||
|
|
||||||
|
def_fn STRCPY p2align=6
|
||||||
|
/* For moderately short strings, the fastest way to do the copy is to
|
||||||
|
calculate the length of the string in the same way as strlen, then
|
||||||
|
essentially do a memcpy of the result. This avoids the need for
|
||||||
|
multiple byte copies and further means that by the time we
|
||||||
|
reach the bulk copy loop we know we can always use DWord
|
||||||
|
accesses. We expect strcpy to rarely be called repeatedly
|
||||||
|
with the same source string, so branch prediction is likely to
|
||||||
|
always be difficult - we mitigate against this by preferring
|
||||||
|
conditional select operations over branches whenever this is
|
||||||
|
feasible. */
|
||||||
|
and tmp2, srcin, #(MIN_PAGE_SIZE - 1)
|
||||||
|
mov zeroones, #REP8_01
|
||||||
|
and to_align, srcin, #15
|
||||||
|
cmp tmp2, #(MIN_PAGE_SIZE - 16)
|
||||||
|
neg tmp1, to_align
|
||||||
|
/* The first fetch will straddle a (possible) page boundary iff
|
||||||
|
srcin + 15 causes bit[MIN_PAGE_P2] to change value. A 16-byte
|
||||||
|
aligned string will never fail the page align check, so will
|
||||||
|
always take the fast path. */
|
||||||
|
b.gt .Lpage_cross
|
||||||
|
|
||||||
|
.Lpage_cross_ok:
|
||||||
|
ldp data1, data2, [srcin]
|
||||||
|
#ifdef __AARCH64EB__
|
||||||
|
/* Because we expect the end to be found within 16 characters
|
||||||
|
(profiling shows this is the most common case), it's worth
|
||||||
|
swapping the bytes now to save having to recalculate the
|
||||||
|
termination syndrome later. We preserve data1 and data2
|
||||||
|
so that we can re-use the values later on. */
|
||||||
|
rev tmp2, data1
|
||||||
|
sub tmp1, tmp2, zeroones
|
||||||
|
orr tmp2, tmp2, #REP8_7f
|
||||||
|
bics has_nul1, tmp1, tmp2
|
||||||
|
b.ne .Lfp_le8
|
||||||
|
rev tmp4, data2
|
||||||
|
sub tmp3, tmp4, zeroones
|
||||||
|
orr tmp4, tmp4, #REP8_7f
|
||||||
|
#else
|
||||||
|
sub tmp1, data1, zeroones
|
||||||
|
orr tmp2, data1, #REP8_7f
|
||||||
|
bics has_nul1, tmp1, tmp2
|
||||||
|
b.ne .Lfp_le8
|
||||||
|
sub tmp3, data2, zeroones
|
||||||
|
orr tmp4, data2, #REP8_7f
|
||||||
|
#endif
|
||||||
|
bics has_nul2, tmp3, tmp4
|
||||||
|
b.eq .Lbulk_entry
|
||||||
|
|
||||||
|
/* The string is short (<=16 bytes). We don't know exactly how
|
||||||
|
short though, yet. Work out the exact length so that we can
|
||||||
|
quickly select the optimal copy strategy. */
|
||||||
|
.Lfp_gt8:
|
||||||
|
rev has_nul2, has_nul2
|
||||||
|
clz pos, has_nul2
|
||||||
|
mov tmp2, #56
|
||||||
|
add dst, dstin, pos, lsr #3 /* Bits to bytes. */
|
||||||
|
sub pos, tmp2, pos
|
||||||
|
#ifdef __AARCH64EB__
|
||||||
|
lsr data2, data2, pos
|
||||||
|
#else
|
||||||
|
lsl data2, data2, pos
|
||||||
|
#endif
|
||||||
|
str data2, [dst, #1]
|
||||||
|
str data1, [dstin]
|
||||||
|
#ifdef BUILD_STPCPY
|
||||||
|
add dstin, dst, #8
|
||||||
|
#endif
|
||||||
|
ret
|
||||||
|
|
||||||
|
.Lfp_le8:
|
||||||
|
rev has_nul1, has_nul1
|
||||||
|
clz pos, has_nul1
|
||||||
|
add dst, dstin, pos, lsr #3 /* Bits to bytes. */
|
||||||
|
subs tmp2, pos, #24 /* Pos in bits. */
|
||||||
|
b.lt .Lfp_lt4
|
||||||
|
#ifdef __AARCH64EB__
|
||||||
|
mov tmp2, #56
|
||||||
|
sub pos, tmp2, pos
|
||||||
|
lsr data2, data1, pos
|
||||||
|
lsr data1, data1, #32
|
||||||
|
#else
|
||||||
|
lsr data2, data1, tmp2
|
||||||
|
#endif
|
||||||
|
/* 4->7 bytes to copy. */
|
||||||
|
str data2w, [dst, #-3]
|
||||||
|
str data1w, [dstin]
|
||||||
|
#ifdef BUILD_STPCPY
|
||||||
|
mov dstin, dst
|
||||||
|
#endif
|
||||||
|
ret
|
||||||
|
.Lfp_lt4:
|
||||||
|
cbz pos, .Lfp_lt2
|
||||||
|
/* 2->3 bytes to copy. */
|
||||||
|
#ifdef __AARCH64EB__
|
||||||
|
lsr data1, data1, #48
|
||||||
|
#endif
|
||||||
|
strh data1w, [dstin]
|
||||||
|
/* Fall-through, one byte (max) to go. */
|
||||||
|
.Lfp_lt2:
|
||||||
|
/* Null-terminated string. Last character must be zero! */
|
||||||
|
strb wzr, [dst]
|
||||||
|
#ifdef BUILD_STPCPY
|
||||||
|
mov dstin, dst
|
||||||
|
#endif
|
||||||
|
ret
|
||||||
|
|
||||||
|
.p2align 6
|
||||||
|
/* Aligning here ensures that the entry code and main loop all lies
|
||||||
|
within one 64-byte cache line. */
|
||||||
|
.Lbulk_entry:
|
||||||
|
sub to_align, to_align, #16
|
||||||
|
stp data1, data2, [dstin]
|
||||||
|
sub src, srcin, to_align
|
||||||
|
sub dst, dstin, to_align
|
||||||
|
b .Lentry_no_page_cross
|
||||||
|
|
||||||
|
/* The inner loop deals with two Dwords at a time. This has a
|
||||||
|
slightly higher start-up cost, but we should win quite quickly,
|
||||||
|
especially on cores with a high number of issue slots per
|
||||||
|
cycle, as we get much better parallelism out of the operations. */
|
||||||
|
.Lmain_loop:
|
||||||
|
stp data1, data2, [dst], #16
|
||||||
|
.Lentry_no_page_cross:
|
||||||
|
ldp data1, data2, [src], #16
|
||||||
|
sub tmp1, data1, zeroones
|
||||||
|
orr tmp2, data1, #REP8_7f
|
||||||
|
sub tmp3, data2, zeroones
|
||||||
|
orr tmp4, data2, #REP8_7f
|
||||||
|
bic has_nul1, tmp1, tmp2
|
||||||
|
bics has_nul2, tmp3, tmp4
|
||||||
|
ccmp has_nul1, #0, #0, eq /* NZCV = 0000 */
|
||||||
|
b.eq .Lmain_loop
|
||||||
|
|
||||||
|
/* Since we know we are copying at least 16 bytes, the fastest way
|
||||||
|
to deal with the tail is to determine the location of the
|
||||||
|
trailing NUL, then (re)copy the 16 bytes leading up to that. */
|
||||||
|
cmp has_nul1, #0
|
||||||
|
#ifdef __AARCH64EB__
|
||||||
|
/* For big-endian, carry propagation (if the final byte in the
|
||||||
|
string is 0x01) means we cannot use has_nul directly. The
|
||||||
|
easiest way to get the correct byte is to byte-swap the data
|
||||||
|
and calculate the syndrome a second time. */
|
||||||
|
csel data1, data1, data2, ne
|
||||||
|
rev data1, data1
|
||||||
|
sub tmp1, data1, zeroones
|
||||||
|
orr tmp2, data1, #REP8_7f
|
||||||
|
bic has_nul1, tmp1, tmp2
|
||||||
|
#else
|
||||||
|
csel has_nul1, has_nul1, has_nul2, ne
|
||||||
|
#endif
|
||||||
|
rev has_nul1, has_nul1
|
||||||
|
clz pos, has_nul1
|
||||||
|
add tmp1, pos, #72
|
||||||
|
add pos, pos, #8
|
||||||
|
csel pos, pos, tmp1, ne
|
||||||
|
add src, src, pos, lsr #3
|
||||||
|
add dst, dst, pos, lsr #3
|
||||||
|
ldp data1, data2, [src, #-32]
|
||||||
|
stp data1, data2, [dst, #-16]
|
||||||
|
#ifdef BUILD_STPCPY
|
||||||
|
sub dstin, dst, #1
|
||||||
|
#endif
|
||||||
|
ret
|
||||||
|
|
||||||
|
.Lpage_cross:
|
||||||
|
bic src, srcin, #15
|
||||||
|
/* Start by loading two words at [srcin & ~15], then forcing the
|
||||||
|
bytes that precede srcin to 0xff. This means they never look
|
||||||
|
like termination bytes. */
|
||||||
|
ldp data1, data2, [src]
|
||||||
|
lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */
|
||||||
|
tst to_align, #7
|
||||||
|
csetm tmp2, ne
|
||||||
|
#ifdef __AARCH64EB__
|
||||||
|
lsl tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
|
||||||
|
#else
|
||||||
|
lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
|
||||||
|
#endif
|
||||||
|
orr data1, data1, tmp2
|
||||||
|
orr data2a, data2, tmp2
|
||||||
|
cmp to_align, #8
|
||||||
|
csinv data1, data1, xzr, lt
|
||||||
|
csel data2, data2, data2a, lt
|
||||||
|
sub tmp1, data1, zeroones
|
||||||
|
orr tmp2, data1, #REP8_7f
|
||||||
|
sub tmp3, data2, zeroones
|
||||||
|
orr tmp4, data2, #REP8_7f
|
||||||
|
bic has_nul1, tmp1, tmp2
|
||||||
|
bics has_nul2, tmp3, tmp4
|
||||||
|
ccmp has_nul1, #0, #0, eq /* NZCV = 0000 */
|
||||||
|
b.eq .Lpage_cross_ok
|
||||||
|
/* We now need to make data1 and data2 look like they've been
|
||||||
|
loaded directly from srcin. Do a rotate on the 128-bit value. */
|
||||||
|
lsl tmp1, to_align, #3 /* Bytes->bits. */
|
||||||
|
neg tmp2, to_align, lsl #3
|
||||||
|
#ifdef __AARCH64EB__
|
||||||
|
lsl data1a, data1, tmp1
|
||||||
|
lsr tmp4, data2, tmp2
|
||||||
|
lsl data2, data2, tmp1
|
||||||
|
orr tmp4, tmp4, data1a
|
||||||
|
cmp to_align, #8
|
||||||
|
csel data1, tmp4, data2, lt
|
||||||
|
rev tmp2, data1
|
||||||
|
rev tmp4, data2
|
||||||
|
sub tmp1, tmp2, zeroones
|
||||||
|
orr tmp2, tmp2, #REP8_7f
|
||||||
|
sub tmp3, tmp4, zeroones
|
||||||
|
orr tmp4, tmp4, #REP8_7f
|
||||||
|
#else
|
||||||
|
lsr data1a, data1, tmp1
|
||||||
|
lsl tmp4, data2, tmp2
|
||||||
|
lsr data2, data2, tmp1
|
||||||
|
orr tmp4, tmp4, data1a
|
||||||
|
cmp to_align, #8
|
||||||
|
csel data1, tmp4, data2, lt
|
||||||
|
sub tmp1, data1, zeroones
|
||||||
|
orr tmp2, data1, #REP8_7f
|
||||||
|
sub tmp3, data2, zeroones
|
||||||
|
orr tmp4, data2, #REP8_7f
|
||||||
|
#endif
|
||||||
|
bic has_nul1, tmp1, tmp2
|
||||||
|
cbnz has_nul1, .Lfp_le8
|
||||||
|
bic has_nul2, tmp3, tmp4
|
||||||
|
b .Lfp_gt8
|
||||||
|
|
||||||
|
.size STRCPY, . - STRCPY
|
||||||
@@ -0,0 +1,242 @@
|
|||||||
|
/****************************************************************************
|
||||||
|
* libs/libc/machine/arm64/gnu/arch_strlen.S
|
||||||
|
*
|
||||||
|
* Copyright (c) 2013-2015, Linaro Limited
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* * Neither the name of the Linaro nor the names of its
|
||||||
|
* contributors may be used to endorse or promote products derived
|
||||||
|
* from this software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
****************************************************************************/
|
||||||
|
|
||||||
|
/* Assumptions:
|
||||||
|
*
|
||||||
|
* ARMv8-a, AArch64, unaligned accesses, min page size 4k.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* To test the page crossing code path more thoroughly, compile with
|
||||||
|
-DTEST_PAGE_CROSS - this will force all calls through the slower
|
||||||
|
entry path. This option is not intended for production use. */
|
||||||
|
|
||||||
|
/* Arguments and results. */
|
||||||
|
#define srcin x0
|
||||||
|
#define len x0
|
||||||
|
|
||||||
|
/* Locals and temporaries. */
|
||||||
|
#define src x1
|
||||||
|
#define data1 x2
|
||||||
|
#define data2 x3
|
||||||
|
#define has_nul1 x4
|
||||||
|
#define has_nul2 x5
|
||||||
|
#define tmp1 x4
|
||||||
|
#define tmp2 x5
|
||||||
|
#define tmp3 x6
|
||||||
|
#define tmp4 x7
|
||||||
|
#define zeroones x8
|
||||||
|
|
||||||
|
#define L(l) .L ## l
|
||||||
|
|
||||||
|
.macro def_fn f p2align=0
|
||||||
|
.text
|
||||||
|
.p2align \p2align
|
||||||
|
.global \f
|
||||||
|
.type \f, %function
|
||||||
|
\f:
|
||||||
|
.endm
|
||||||
|
|
||||||
|
/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
|
||||||
|
(=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
|
||||||
|
can be done in parallel across the entire word. A faster check
|
||||||
|
(X - 1) & 0x80 is zero for non-NUL ASCII characters, but gives
|
||||||
|
false hits for characters 129..255. */
|
||||||
|
|
||||||
|
#define REP8_01 0x0101010101010101
|
||||||
|
#define REP8_7f 0x7f7f7f7f7f7f7f7f
|
||||||
|
#define REP8_80 0x8080808080808080
|
||||||
|
|
||||||
|
#ifdef TEST_PAGE_CROSS
|
||||||
|
# define MIN_PAGE_SIZE 15
|
||||||
|
#else
|
||||||
|
# define MIN_PAGE_SIZE 4096
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Since strings are short on average, we check the first 16 bytes
|
||||||
|
of the string for a NUL character. In order to do an unaligned ldp
|
||||||
|
safely we have to do a page cross check first. If there is a NUL
|
||||||
|
byte we calculate the length from the 2 8-byte words using
|
||||||
|
conditional select to reduce branch mispredictions (it is unlikely
|
||||||
|
strlen will be repeatedly called on strings with the same length).
|
||||||
|
|
||||||
|
If the string is longer than 16 bytes, we align src so don't need
|
||||||
|
further page cross checks, and process 32 bytes per iteration
|
||||||
|
using the fast NUL check. If we encounter non-ASCII characters,
|
||||||
|
fallback to a second loop using the full NUL check.
|
||||||
|
|
||||||
|
If the page cross check fails, we read 16 bytes from an aligned
|
||||||
|
address, remove any characters before the string, and continue
|
||||||
|
in the main loop using aligned loads. Since strings crossing a
|
||||||
|
page in the first 16 bytes are rare (probability of
|
||||||
|
16/MIN_PAGE_SIZE ~= 0.4%), this case does not need to be optimized.
|
||||||
|
|
||||||
|
AArch64 systems have a minimum page size of 4k. We don't bother
|
||||||
|
checking for larger page sizes - the cost of setting up the correct
|
||||||
|
page size is just not worth the extra gain from a small reduction in
|
||||||
|
the cases taking the slow path. Note that we only care about
|
||||||
|
whether the first fetch, which may be misaligned, crosses a page
|
||||||
|
boundary. */
|
||||||
|
|
||||||
|
def_fn strlen p2align=6
|
||||||
|
and tmp1, srcin, MIN_PAGE_SIZE - 1
|
||||||
|
mov zeroones, REP8_01
|
||||||
|
cmp tmp1, MIN_PAGE_SIZE - 16
|
||||||
|
b.gt L(page_cross)
|
||||||
|
ldp data1, data2, [srcin]
|
||||||
|
#ifdef __AARCH64EB__
|
||||||
|
/* For big-endian, carry propagation (if the final byte in the
|
||||||
|
string is 0x01) means we cannot use has_nul1/2 directly.
|
||||||
|
Since we expect strings to be small and early-exit,
|
||||||
|
byte-swap the data now so has_null1/2 will be correct. */
|
||||||
|
rev data1, data1
|
||||||
|
rev data2, data2
|
||||||
|
#endif
|
||||||
|
sub tmp1, data1, zeroones
|
||||||
|
orr tmp2, data1, REP8_7f
|
||||||
|
sub tmp3, data2, zeroones
|
||||||
|
orr tmp4, data2, REP8_7f
|
||||||
|
bics has_nul1, tmp1, tmp2
|
||||||
|
bic has_nul2, tmp3, tmp4
|
||||||
|
ccmp has_nul2, 0, 0, eq
|
||||||
|
beq L(main_loop_entry)
|
||||||
|
|
||||||
|
/* Enter with C = has_nul1 == 0. */
|
||||||
|
csel has_nul1, has_nul1, has_nul2, cc
|
||||||
|
mov len, 8
|
||||||
|
rev has_nul1, has_nul1
|
||||||
|
clz tmp1, has_nul1
|
||||||
|
csel len, xzr, len, cc
|
||||||
|
add len, len, tmp1, lsr 3
|
||||||
|
ret
|
||||||
|
|
||||||
|
/* The inner loop processes 32 bytes per iteration and uses the fast
|
||||||
|
NUL check. If we encounter non-ASCII characters, use a second
|
||||||
|
loop with the accurate NUL check. */
|
||||||
|
.p2align 4
|
||||||
|
L(main_loop_entry):
|
||||||
|
bic src, srcin, 15
|
||||||
|
sub src, src, 16
|
||||||
|
L(main_loop):
|
||||||
|
ldp data1, data2, [src, 32]!
|
||||||
|
.Lpage_cross_entry:
|
||||||
|
sub tmp1, data1, zeroones
|
||||||
|
sub tmp3, data2, zeroones
|
||||||
|
orr tmp2, tmp1, tmp3
|
||||||
|
tst tmp2, zeroones, lsl 7
|
||||||
|
bne 1f
|
||||||
|
ldp data1, data2, [src, 16]
|
||||||
|
sub tmp1, data1, zeroones
|
||||||
|
sub tmp3, data2, zeroones
|
||||||
|
orr tmp2, tmp1, tmp3
|
||||||
|
tst tmp2, zeroones, lsl 7
|
||||||
|
beq L(main_loop)
|
||||||
|
add src, src, 16
|
||||||
|
1:
|
||||||
|
/* The fast check failed, so do the slower, accurate NUL check. */
|
||||||
|
orr tmp2, data1, REP8_7f
|
||||||
|
orr tmp4, data2, REP8_7f
|
||||||
|
bics has_nul1, tmp1, tmp2
|
||||||
|
bic has_nul2, tmp3, tmp4
|
||||||
|
ccmp has_nul2, 0, 0, eq
|
||||||
|
beq L(nonascii_loop)
|
||||||
|
|
||||||
|
/* Enter with C = has_nul1 == 0. */
|
||||||
|
L(tail):
|
||||||
|
#ifdef __AARCH64EB__
|
||||||
|
/* For big-endian, carry propagation (if the final byte in the
|
||||||
|
string is 0x01) means we cannot use has_nul1/2 directly. The
|
||||||
|
easiest way to get the correct byte is to byte-swap the data
|
||||||
|
and calculate the syndrome a second time. */
|
||||||
|
csel data1, data1, data2, cc
|
||||||
|
rev data1, data1
|
||||||
|
sub tmp1, data1, zeroones
|
||||||
|
orr tmp2, data1, REP8_7f
|
||||||
|
bic has_nul1, tmp1, tmp2
|
||||||
|
#else
|
||||||
|
csel has_nul1, has_nul1, has_nul2, cc
|
||||||
|
#endif
|
||||||
|
sub len, src, srcin
|
||||||
|
rev has_nul1, has_nul1
|
||||||
|
add tmp2, len, 8
|
||||||
|
clz tmp1, has_nul1
|
||||||
|
csel len, len, tmp2, cc
|
||||||
|
add len, len, tmp1, lsr 3
|
||||||
|
ret
|
||||||
|
|
||||||
|
L(nonascii_loop):
|
||||||
|
ldp data1, data2, [src, 16]!
|
||||||
|
sub tmp1, data1, zeroones
|
||||||
|
orr tmp2, data1, REP8_7f
|
||||||
|
sub tmp3, data2, zeroones
|
||||||
|
orr tmp4, data2, REP8_7f
|
||||||
|
bics has_nul1, tmp1, tmp2
|
||||||
|
bic has_nul2, tmp3, tmp4
|
||||||
|
ccmp has_nul2, 0, 0, eq
|
||||||
|
bne L(tail)
|
||||||
|
ldp data1, data2, [src, 16]!
|
||||||
|
sub tmp1, data1, zeroones
|
||||||
|
orr tmp2, data1, REP8_7f
|
||||||
|
sub tmp3, data2, zeroones
|
||||||
|
orr tmp4, data2, REP8_7f
|
||||||
|
bics has_nul1, tmp1, tmp2
|
||||||
|
bic has_nul2, tmp3, tmp4
|
||||||
|
ccmp has_nul2, 0, 0, eq
|
||||||
|
beq L(nonascii_loop)
|
||||||
|
b L(tail)
|
||||||
|
|
||||||
|
/* Load 16 bytes from [srcin & ~15] and force the bytes that precede
|
||||||
|
srcin to 0x7f, so we ignore any NUL bytes before the string.
|
||||||
|
Then continue in the aligned loop. */
|
||||||
|
L(page_cross):
|
||||||
|
bic src, srcin, 15
|
||||||
|
ldp data1, data2, [src]
|
||||||
|
lsl tmp1, srcin, 3
|
||||||
|
mov tmp4, -1
|
||||||
|
#ifdef __AARCH64EB__
|
||||||
|
/* Big-endian. Early bytes are at MSB. */
|
||||||
|
lsr tmp1, tmp4, tmp1 /* Shift (tmp1 & 63). */
|
||||||
|
#else
|
||||||
|
/* Little-endian. Early bytes are at LSB. */
|
||||||
|
lsl tmp1, tmp4, tmp1 /* Shift (tmp1 & 63). */
|
||||||
|
#endif
|
||||||
|
orr tmp1, tmp1, REP8_80
|
||||||
|
orn data1, data1, tmp1
|
||||||
|
orn tmp2, data2, tmp1
|
||||||
|
tst srcin, 8
|
||||||
|
csel data1, data1, tmp4, eq
|
||||||
|
csel data2, data2, tmp2, eq
|
||||||
|
b L(page_cross_entry)
|
||||||
|
|
||||||
|
.size strlen, . - strlen
|
||||||
@@ -0,0 +1,294 @@
|
|||||||
|
/****************************************************************************
|
||||||
|
* libs/libc/machine/arm64/gnu/arch_strncmp.S
|
||||||
|
*
|
||||||
|
* Copyright (c) 2013, 2018, Linaro Limited
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* * Neither the name of the Linaro nor the names of its
|
||||||
|
* contributors may be used to endorse or promote products derived
|
||||||
|
* from this software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
****************************************************************************/
|
||||||
|
|
||||||
|
/* Assumptions:
|
||||||
|
*
|
||||||
|
* ARMv8-a, AArch64
|
||||||
|
*/
|
||||||
|
|
||||||
|
.macro def_fn f p2align=0
|
||||||
|
.text
|
||||||
|
.p2align \p2align
|
||||||
|
.global \f
|
||||||
|
.type \f, %function
|
||||||
|
\f:
|
||||||
|
.endm
|
||||||
|
|
||||||
|
#define REP8_01 0x0101010101010101
|
||||||
|
#define REP8_7f 0x7f7f7f7f7f7f7f7f
|
||||||
|
#define REP8_80 0x8080808080808080
|
||||||
|
|
||||||
|
/* Parameters and result. */
|
||||||
|
#define src1 x0
|
||||||
|
#define src2 x1
|
||||||
|
#define limit x2
|
||||||
|
#define result x0
|
||||||
|
|
||||||
|
/* Internal variables. */
|
||||||
|
#define data1 x3
|
||||||
|
#define data1w w3
|
||||||
|
#define data2 x4
|
||||||
|
#define data2w w4
|
||||||
|
#define has_nul x5
|
||||||
|
#define diff x6
|
||||||
|
#define syndrome x7
|
||||||
|
#define tmp1 x8
|
||||||
|
#define tmp2 x9
|
||||||
|
#define tmp3 x10
|
||||||
|
#define zeroones x11
|
||||||
|
#define pos x12
|
||||||
|
#define limit_wd x13
|
||||||
|
#define mask x14
|
||||||
|
#define endloop x15
|
||||||
|
#define count mask
|
||||||
|
|
||||||
|
.text
|
||||||
|
.p2align 6
|
||||||
|
.rep 7
|
||||||
|
nop /* Pad so that the loop below fits a cache line. */
|
||||||
|
.endr
|
||||||
|
def_fn strncmp
|
||||||
|
cbz limit, .Lret0
|
||||||
|
eor tmp1, src1, src2
|
||||||
|
mov zeroones, #REP8_01
|
||||||
|
tst tmp1, #7
|
||||||
|
and count, src1, #7
|
||||||
|
b.ne .Lmisaligned8
|
||||||
|
cbnz count, .Lmutual_align
|
||||||
|
/* Calculate the number of full and partial words -1. */
|
||||||
|
sub limit_wd, limit, #1 /* limit != 0, so no underflow. */
|
||||||
|
lsr limit_wd, limit_wd, #3 /* Convert to Dwords. */
|
||||||
|
|
||||||
|
/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
|
||||||
|
(=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
|
||||||
|
can be done in parallel across the entire word. */
|
||||||
|
/* Start of performance-critical section -- one 64B cache line. */
|
||||||
|
.Lloop_aligned:
|
||||||
|
ldr data1, [src1], #8
|
||||||
|
ldr data2, [src2], #8
|
||||||
|
.Lstart_realigned:
|
||||||
|
subs limit_wd, limit_wd, #1
|
||||||
|
sub tmp1, data1, zeroones
|
||||||
|
orr tmp2, data1, #REP8_7f
|
||||||
|
eor diff, data1, data2 /* Non-zero if differences found. */
|
||||||
|
csinv endloop, diff, xzr, pl /* Last Dword or differences. */
|
||||||
|
bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
|
||||||
|
ccmp endloop, #0, #0, eq
|
||||||
|
b.eq .Lloop_aligned
|
||||||
|
/* End of performance-critical section -- one 64B cache line. */
|
||||||
|
|
||||||
|
/* Not reached the limit, must have found the end or a diff. */
|
||||||
|
tbz limit_wd, #63, .Lnot_limit
|
||||||
|
|
||||||
|
/* Limit % 8 == 0 => all bytes significant. */
|
||||||
|
ands limit, limit, #7
|
||||||
|
b.eq .Lnot_limit
|
||||||
|
|
||||||
|
lsl limit, limit, #3 /* Bits -> bytes. */
|
||||||
|
mov mask, #~0
|
||||||
|
#ifdef __AARCH64EB__
|
||||||
|
lsr mask, mask, limit
|
||||||
|
#else
|
||||||
|
lsl mask, mask, limit
|
||||||
|
#endif
|
||||||
|
bic data1, data1, mask
|
||||||
|
bic data2, data2, mask
|
||||||
|
|
||||||
|
/* Make sure that the NUL byte is marked in the syndrome. */
|
||||||
|
orr has_nul, has_nul, mask
|
||||||
|
|
||||||
|
.Lnot_limit:
|
||||||
|
orr syndrome, diff, has_nul
|
||||||
|
|
||||||
|
#ifndef __AARCH64EB__
|
||||||
|
rev syndrome, syndrome
|
||||||
|
rev data1, data1
|
||||||
|
/* The MS-non-zero bit of the syndrome marks either the first bit
|
||||||
|
that is different, or the top bit of the first zero byte.
|
||||||
|
Shifting left now will bring the critical information into the
|
||||||
|
top bits. */
|
||||||
|
clz pos, syndrome
|
||||||
|
rev data2, data2
|
||||||
|
lsl data1, data1, pos
|
||||||
|
lsl data2, data2, pos
|
||||||
|
/* But we need to zero-extend (char is unsigned) the value and then
|
||||||
|
perform a signed 32-bit subtraction. */
|
||||||
|
lsr data1, data1, #56
|
||||||
|
sub result, data1, data2, lsr #56
|
||||||
|
ret
|
||||||
|
#else
|
||||||
|
/* For big-endian we cannot use the trick with the syndrome value
|
||||||
|
as carry-propagation can corrupt the upper bits if the trailing
|
||||||
|
bytes in the string contain 0x01. */
|
||||||
|
/* However, if there is no NUL byte in the dword, we can generate
|
||||||
|
the result directly. We can't just subtract the bytes as the
|
||||||
|
MSB might be significant. */
|
||||||
|
cbnz has_nul, 1f
|
||||||
|
cmp data1, data2
|
||||||
|
cset result, ne
|
||||||
|
cneg result, result, lo
|
||||||
|
ret
|
||||||
|
1:
|
||||||
|
/* Re-compute the NUL-byte detection, using a byte-reversed value. */
|
||||||
|
rev tmp3, data1
|
||||||
|
sub tmp1, tmp3, zeroones
|
||||||
|
orr tmp2, tmp3, #REP8_7f
|
||||||
|
bic has_nul, tmp1, tmp2
|
||||||
|
rev has_nul, has_nul
|
||||||
|
orr syndrome, diff, has_nul
|
||||||
|
clz pos, syndrome
|
||||||
|
/* The MS-non-zero bit of the syndrome marks either the first bit
|
||||||
|
that is different, or the top bit of the first zero byte.
|
||||||
|
Shifting left now will bring the critical information into the
|
||||||
|
top bits. */
|
||||||
|
lsl data1, data1, pos
|
||||||
|
lsl data2, data2, pos
|
||||||
|
/* But we need to zero-extend (char is unsigned) the value and then
|
||||||
|
perform a signed 32-bit subtraction. */
|
||||||
|
lsr data1, data1, #56
|
||||||
|
sub result, data1, data2, lsr #56
|
||||||
|
ret
|
||||||
|
#endif
|
||||||
|
|
||||||
|
.Lmutual_align:
|
||||||
|
/* Sources are mutually aligned, but are not currently at an
|
||||||
|
alignment boundary. Round down the addresses and then mask off
|
||||||
|
the bytes that precede the start point.
|
||||||
|
We also need to adjust the limit calculations, but without
|
||||||
|
overflowing if the limit is near ULONG_MAX. */
|
||||||
|
bic src1, src1, #7
|
||||||
|
bic src2, src2, #7
|
||||||
|
ldr data1, [src1], #8
|
||||||
|
neg tmp3, count, lsl #3 /* 64 - bits(bytes beyond align). */
|
||||||
|
ldr data2, [src2], #8
|
||||||
|
mov tmp2, #~0
|
||||||
|
sub limit_wd, limit, #1 /* limit != 0, so no underflow. */
|
||||||
|
#ifdef __AARCH64EB__
|
||||||
|
/* Big-endian. Early bytes are at MSB. */
|
||||||
|
lsl tmp2, tmp2, tmp3 /* Shift (count & 63). */
|
||||||
|
#else
|
||||||
|
/* Little-endian. Early bytes are at LSB. */
|
||||||
|
lsr tmp2, tmp2, tmp3 /* Shift (count & 63). */
|
||||||
|
#endif
|
||||||
|
and tmp3, limit_wd, #7
|
||||||
|
lsr limit_wd, limit_wd, #3
|
||||||
|
/* Adjust the limit. Only low 3 bits used, so overflow irrelevant. */
|
||||||
|
add limit, limit, count
|
||||||
|
add tmp3, tmp3, count
|
||||||
|
orr data1, data1, tmp2
|
||||||
|
orr data2, data2, tmp2
|
||||||
|
add limit_wd, limit_wd, tmp3, lsr #3
|
||||||
|
b .Lstart_realigned
|
||||||
|
|
||||||
|
.p2align 6
|
||||||
|
/* Don't bother with dwords for up to 16 bytes. */
|
||||||
|
.Lmisaligned8:
|
||||||
|
cmp limit, #16
|
||||||
|
b.hs .Ltry_misaligned_words
|
||||||
|
|
||||||
|
.Lbyte_loop:
|
||||||
|
/* Perhaps we can do better than this. */
|
||||||
|
ldrb data1w, [src1], #1
|
||||||
|
ldrb data2w, [src2], #1
|
||||||
|
subs limit, limit, #1
|
||||||
|
ccmp data1w, #1, #0, hi /* NZCV = 0b0000. */
|
||||||
|
ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
|
||||||
|
b.eq .Lbyte_loop
|
||||||
|
.Ldone:
|
||||||
|
sub result, data1, data2
|
||||||
|
ret
|
||||||
|
/* Align the SRC1 to a dword by doing a bytewise compare and then do
|
||||||
|
the dword loop. */
|
||||||
|
.Ltry_misaligned_words:
|
||||||
|
lsr limit_wd, limit, #3
|
||||||
|
cbz count, .Ldo_misaligned
|
||||||
|
|
||||||
|
neg count, count
|
||||||
|
and count, count, #7
|
||||||
|
sub limit, limit, count
|
||||||
|
lsr limit_wd, limit, #3
|
||||||
|
|
||||||
|
.Lpage_end_loop:
|
||||||
|
ldrb data1w, [src1], #1
|
||||||
|
ldrb data2w, [src2], #1
|
||||||
|
cmp data1w, #1
|
||||||
|
ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
|
||||||
|
b.ne .Ldone
|
||||||
|
subs count, count, #1
|
||||||
|
b.hi .Lpage_end_loop
|
||||||
|
|
||||||
|
.Ldo_misaligned:
|
||||||
|
/* Prepare ourselves for the next page crossing. Unlike the aligned
|
||||||
|
loop, we fetch 1 less dword because we risk crossing bounds on
|
||||||
|
SRC2. */
|
||||||
|
mov count, #8
|
||||||
|
subs limit_wd, limit_wd, #1
|
||||||
|
b.lo .Ldone_loop
|
||||||
|
.Lloop_misaligned:
|
||||||
|
and tmp2, src2, #0xff8
|
||||||
|
eor tmp2, tmp2, #0xff8
|
||||||
|
cbz tmp2, .Lpage_end_loop
|
||||||
|
|
||||||
|
ldr data1, [src1], #8
|
||||||
|
ldr data2, [src2], #8
|
||||||
|
sub tmp1, data1, zeroones
|
||||||
|
orr tmp2, data1, #REP8_7f
|
||||||
|
eor diff, data1, data2 /* Non-zero if differences found. */
|
||||||
|
bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
|
||||||
|
ccmp diff, #0, #0, eq
|
||||||
|
b.ne .Lnot_limit
|
||||||
|
subs limit_wd, limit_wd, #1
|
||||||
|
b.pl .Lloop_misaligned
|
||||||
|
|
||||||
|
.Ldone_loop:
|
||||||
|
/* We found a difference or a NULL before the limit was reached. */
|
||||||
|
and limit, limit, #7
|
||||||
|
cbz limit, .Lnot_limit
|
||||||
|
/* Read the last word. */
|
||||||
|
sub src1, src1, 8
|
||||||
|
sub src2, src2, 8
|
||||||
|
ldr data1, [src1, limit]
|
||||||
|
ldr data2, [src2, limit]
|
||||||
|
sub tmp1, data1, zeroones
|
||||||
|
orr tmp2, data1, #REP8_7f
|
||||||
|
eor diff, data1, data2 /* Non-zero if differences found. */
|
||||||
|
bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
|
||||||
|
ccmp diff, #0, #0, eq
|
||||||
|
b.ne .Lnot_limit
|
||||||
|
|
||||||
|
.Lret0:
|
||||||
|
mov result, #0
|
||||||
|
ret
|
||||||
|
.size strncmp, . - strncmp
|
||||||
@@ -0,0 +1,188 @@
|
|||||||
|
/****************************************************************************
|
||||||
|
* libs/libc/machine/arm64/gnu/arch_strnlen.S
|
||||||
|
*
|
||||||
|
* Copyright (c) 2013, Linaro Limited
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* * Neither the name of the Linaro nor the names of its
|
||||||
|
* contributors may be used to endorse or promote products derived
|
||||||
|
* from this software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
****************************************************************************/
|
||||||
|
|
||||||
|
/* Assumptions:
|
||||||
|
*
|
||||||
|
* ARMv8-a, AArch64
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Arguments and results. */
|
||||||
|
#define srcin x0
|
||||||
|
#define len x0
|
||||||
|
#define limit x1
|
||||||
|
|
||||||
|
/* Locals and temporaries. */
|
||||||
|
#define src x2
|
||||||
|
#define data1 x3
|
||||||
|
#define data2 x4
|
||||||
|
#define data2a x5
|
||||||
|
#define has_nul1 x6
|
||||||
|
#define has_nul2 x7
|
||||||
|
#define tmp1 x8
|
||||||
|
#define tmp2 x9
|
||||||
|
#define tmp3 x10
|
||||||
|
#define tmp4 x11
|
||||||
|
#define zeroones x12
|
||||||
|
#define pos x13
|
||||||
|
#define limit_wd x14
|
||||||
|
|
||||||
|
.macro def_fn f p2align=0
|
||||||
|
.text
|
||||||
|
.p2align \p2align
|
||||||
|
.global \f
|
||||||
|
.type \f, %function
|
||||||
|
\f:
|
||||||
|
.endm
|
||||||
|
|
||||||
|
#define REP8_01 0x0101010101010101
|
||||||
|
#define REP8_7f 0x7f7f7f7f7f7f7f7f
|
||||||
|
#define REP8_80 0x8080808080808080
|
||||||
|
|
||||||
|
.text
|
||||||
|
.p2align 6
|
||||||
|
.Lstart:
|
||||||
|
/* Pre-pad to ensure critical loop begins an icache line. */
|
||||||
|
.rep 7
|
||||||
|
nop
|
||||||
|
.endr
|
||||||
|
/* Put this code here to avoid wasting more space with pre-padding. */
|
||||||
|
.Lhit_limit:
|
||||||
|
mov len, limit
|
||||||
|
ret
|
||||||
|
|
||||||
|
def_fn strnlen
|
||||||
|
cbz limit, .Lhit_limit
|
||||||
|
mov zeroones, #REP8_01
|
||||||
|
bic src, srcin, #15
|
||||||
|
ands tmp1, srcin, #15
|
||||||
|
b.ne .Lmisaligned
|
||||||
|
/* Calculate the number of full and partial words -1. */
|
||||||
|
sub limit_wd, limit, #1 /* Limit != 0, so no underflow. */
|
||||||
|
lsr limit_wd, limit_wd, #4 /* Convert to Qwords. */
|
||||||
|
|
||||||
|
/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
|
||||||
|
(=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
|
||||||
|
can be done in parallel across the entire word. */
|
||||||
|
/* The inner loop deals with two Dwords at a time. This has a
|
||||||
|
slightly higher start-up cost, but we should win quite quickly,
|
||||||
|
especially on cores with a high number of issue slots per
|
||||||
|
cycle, as we get much better parallelism out of the operations. */
|
||||||
|
|
||||||
|
/* Start of critial section -- keep to one 64Byte cache line. */
|
||||||
|
.Lloop:
|
||||||
|
ldp data1, data2, [src], #16
|
||||||
|
.Lrealigned:
|
||||||
|
sub tmp1, data1, zeroones
|
||||||
|
orr tmp2, data1, #REP8_7f
|
||||||
|
sub tmp3, data2, zeroones
|
||||||
|
orr tmp4, data2, #REP8_7f
|
||||||
|
bic has_nul1, tmp1, tmp2
|
||||||
|
bic has_nul2, tmp3, tmp4
|
||||||
|
subs limit_wd, limit_wd, #1
|
||||||
|
orr tmp1, has_nul1, has_nul2
|
||||||
|
ccmp tmp1, #0, #0, pl /* NZCV = 0000 */
|
||||||
|
b.eq .Lloop
|
||||||
|
/* End of critical section -- keep to one 64Byte cache line. */
|
||||||
|
|
||||||
|
orr tmp1, has_nul1, has_nul2
|
||||||
|
cbz tmp1, .Lhit_limit /* No null in final Qword. */
|
||||||
|
|
||||||
|
/* We know there's a null in the final Qword. The easiest thing
|
||||||
|
to do now is work out the length of the string and return
|
||||||
|
MIN (len, limit). */
|
||||||
|
|
||||||
|
sub len, src, srcin
|
||||||
|
cbz has_nul1, .Lnul_in_data2
|
||||||
|
#ifdef __AARCH64EB__
|
||||||
|
mov data2, data1
|
||||||
|
#endif
|
||||||
|
sub len, len, #8
|
||||||
|
mov has_nul2, has_nul1
|
||||||
|
.Lnul_in_data2:
|
||||||
|
#ifdef __AARCH64EB__
|
||||||
|
/* For big-endian, carry propagation (if the final byte in the
|
||||||
|
string is 0x01) means we cannot use has_nul directly. The
|
||||||
|
easiest way to get the correct byte is to byte-swap the data
|
||||||
|
and calculate the syndrome a second time. */
|
||||||
|
rev data2, data2
|
||||||
|
sub tmp1, data2, zeroones
|
||||||
|
orr tmp2, data2, #REP8_7f
|
||||||
|
bic has_nul2, tmp1, tmp2
|
||||||
|
#endif
|
||||||
|
sub len, len, #8
|
||||||
|
rev has_nul2, has_nul2
|
||||||
|
clz pos, has_nul2
|
||||||
|
add len, len, pos, lsr #3 /* Bits to bytes. */
|
||||||
|
cmp len, limit
|
||||||
|
csel len, len, limit, ls /* Return the lower value. */
|
||||||
|
ret
|
||||||
|
|
||||||
|
.Lmisaligned:
|
||||||
|
/* Deal with a partial first word.
|
||||||
|
We're doing two things in parallel here;
|
||||||
|
1) Calculate the number of words (but avoiding overflow if
|
||||||
|
limit is near ULONG_MAX) - to do this we need to work out
|
||||||
|
limit + tmp1 - 1 as a 65-bit value before shifting it;
|
||||||
|
2) Load and mask the initial data words - we force the bytes
|
||||||
|
before the ones we are interested in to 0xff - this ensures
|
||||||
|
early bytes will not hit any zero detection. */
|
||||||
|
sub limit_wd, limit, #1
|
||||||
|
neg tmp4, tmp1
|
||||||
|
cmp tmp1, #8
|
||||||
|
|
||||||
|
and tmp3, limit_wd, #15
|
||||||
|
lsr limit_wd, limit_wd, #4
|
||||||
|
mov tmp2, #~0
|
||||||
|
|
||||||
|
ldp data1, data2, [src], #16
|
||||||
|
lsl tmp4, tmp4, #3 /* Bytes beyond alignment -> bits. */
|
||||||
|
add tmp3, tmp3, tmp1
|
||||||
|
|
||||||
|
#ifdef __AARCH64EB__
|
||||||
|
/* Big-endian. Early bytes are at MSB. */
|
||||||
|
lsl tmp2, tmp2, tmp4 /* Shift (tmp1 & 63). */
|
||||||
|
#else
|
||||||
|
/* Little-endian. Early bytes are at LSB. */
|
||||||
|
lsr tmp2, tmp2, tmp4 /* Shift (tmp1 & 63). */
|
||||||
|
#endif
|
||||||
|
add limit_wd, limit_wd, tmp3, lsr #4
|
||||||
|
|
||||||
|
orr data1, data1, tmp2
|
||||||
|
orr data2a, data2, tmp2
|
||||||
|
|
||||||
|
csinv data1, data1, xzr, le
|
||||||
|
csel data2, data2, data2a, le
|
||||||
|
b .Lrealigned
|
||||||
|
.size strnlen, . - .Lstart /* Include pre-padding in size. */
|
||||||
@@ -0,0 +1,179 @@
|
|||||||
|
/****************************************************************************
|
||||||
|
* libs/libc/machine/arm64/gnu/arch_strrchr.S
|
||||||
|
*
|
||||||
|
* Copyright (c) 2014, ARM Limited
|
||||||
|
* All rights Reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
* * Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* * Neither the name of the company nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this
|
||||||
|
* software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
****************************************************************************/
|
||||||
|
|
||||||
|
/* Assumptions:
|
||||||
|
*
|
||||||
|
* ARMv8-a, AArch64
|
||||||
|
* Neon Available.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Arguments and results. */
|
||||||
|
#define srcin x0
|
||||||
|
#define chrin w1
|
||||||
|
|
||||||
|
#define result x0
|
||||||
|
|
||||||
|
#define src x2
|
||||||
|
#define tmp1 x3
|
||||||
|
#define wtmp2 w4
|
||||||
|
#define tmp3 x5
|
||||||
|
#define src_match x6
|
||||||
|
#define src_offset x7
|
||||||
|
#define const_m1 x8
|
||||||
|
#define tmp4 x9
|
||||||
|
#define nul_match x10
|
||||||
|
#define chr_match x11
|
||||||
|
|
||||||
|
#define vrepchr v0
|
||||||
|
#define vdata1 v1
|
||||||
|
#define vdata2 v2
|
||||||
|
#define vhas_nul1 v3
|
||||||
|
#define vhas_nul2 v4
|
||||||
|
#define vhas_chr1 v5
|
||||||
|
#define vhas_chr2 v6
|
||||||
|
#define vrepmask_0 v7
|
||||||
|
#define vrepmask_c v16
|
||||||
|
#define vend1 v17
|
||||||
|
#define vend2 v18
|
||||||
|
|
||||||
|
/* Core algorithm.
|
||||||
|
|
||||||
|
For each 32-byte hunk we calculate a 64-bit syndrome value, with
|
||||||
|
two bits per byte (LSB is always in bits 0 and 1, for both big
|
||||||
|
and little-endian systems). For each tuple, bit 0 is set iff
|
||||||
|
the relevant byte matched the requested character; bit 1 is set
|
||||||
|
iff the relevant byte matched the NUL end of string (we trigger
|
||||||
|
off bit0 for the special case of looking for NUL). Since the bits
|
||||||
|
in the syndrome reflect exactly the order in which things occur
|
||||||
|
in the original string a count_trailing_zeros() operation will
|
||||||
|
identify exactly which byte is causing the termination, and why. */
|
||||||
|
|
||||||
|
/* Locals and temporaries. */
|
||||||
|
|
||||||
|
.macro def_fn f p2align=0
|
||||||
|
.text
|
||||||
|
.p2align \p2align
|
||||||
|
.global \f
|
||||||
|
.type \f, %function
|
||||||
|
\f:
|
||||||
|
.endm
|
||||||
|
|
||||||
|
def_fn strrchr
|
||||||
|
/* Magic constant 0x40100401 to allow us to identify which lane
|
||||||
|
matches the requested byte. Magic constant 0x80200802 used
|
||||||
|
similarly for NUL termination. */
|
||||||
|
mov wtmp2, #0x0401
|
||||||
|
movk wtmp2, #0x4010, lsl #16
|
||||||
|
dup vrepchr.16b, chrin
|
||||||
|
bic src, srcin, #31 /* Work with aligned 32-byte hunks. */
|
||||||
|
dup vrepmask_c.4s, wtmp2
|
||||||
|
mov src_offset, #0
|
||||||
|
ands tmp1, srcin, #31
|
||||||
|
add vrepmask_0.4s, vrepmask_c.4s, vrepmask_c.4s /* equiv: lsl #1 */
|
||||||
|
b.eq .Laligned
|
||||||
|
|
||||||
|
/* Input string is not 32-byte aligned. Rather than forcing
|
||||||
|
the padding bytes to a safe value, we calculate the syndrome
|
||||||
|
for all the bytes, but then mask off those bits of the
|
||||||
|
syndrome that are related to the padding. */
|
||||||
|
ld1 {vdata1.16b, vdata2.16b}, [src], #32
|
||||||
|
neg tmp1, tmp1
|
||||||
|
cmeq vhas_nul1.16b, vdata1.16b, #0
|
||||||
|
cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
|
||||||
|
cmeq vhas_nul2.16b, vdata2.16b, #0
|
||||||
|
cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
|
||||||
|
and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b
|
||||||
|
and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b
|
||||||
|
and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b
|
||||||
|
and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b
|
||||||
|
addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul2.16b // 256->128
|
||||||
|
addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128
|
||||||
|
addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul1.16b // 128->64
|
||||||
|
addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr1.16b // 128->64
|
||||||
|
mov nul_match, vhas_nul1.2d[0]
|
||||||
|
lsl tmp1, tmp1, #1
|
||||||
|
mov const_m1, #~0
|
||||||
|
mov chr_match, vhas_chr1.2d[0]
|
||||||
|
lsr tmp3, const_m1, tmp1
|
||||||
|
|
||||||
|
bic nul_match, nul_match, tmp3 // Mask padding bits.
|
||||||
|
bic chr_match, chr_match, tmp3 // Mask padding bits.
|
||||||
|
cbnz nul_match, .Ltail
|
||||||
|
|
||||||
|
.Lloop:
|
||||||
|
cmp chr_match, #0
|
||||||
|
csel src_match, src, src_match, ne
|
||||||
|
csel src_offset, chr_match, src_offset, ne
|
||||||
|
.Laligned:
|
||||||
|
ld1 {vdata1.16b, vdata2.16b}, [src], #32
|
||||||
|
cmeq vhas_nul1.16b, vdata1.16b, #0
|
||||||
|
cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
|
||||||
|
cmeq vhas_nul2.16b, vdata2.16b, #0
|
||||||
|
cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
|
||||||
|
addp vend1.16b, vhas_nul1.16b, vhas_nul2.16b // 256->128
|
||||||
|
and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b
|
||||||
|
and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b
|
||||||
|
addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128
|
||||||
|
addp vend1.16b, vend1.16b, vend1.16b // 128->64
|
||||||
|
addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr1.16b // 128->64
|
||||||
|
mov nul_match, vend1.2d[0]
|
||||||
|
mov chr_match, vhas_chr1.2d[0]
|
||||||
|
cbz nul_match, .Lloop
|
||||||
|
|
||||||
|
and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b
|
||||||
|
and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b
|
||||||
|
addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul2.16b
|
||||||
|
addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul1.16b
|
||||||
|
mov nul_match, vhas_nul1.2d[0]
|
||||||
|
|
||||||
|
.Ltail:
|
||||||
|
/* Work out exactly where the string ends. */
|
||||||
|
sub tmp4, nul_match, #1
|
||||||
|
eor tmp4, tmp4, nul_match
|
||||||
|
ands chr_match, chr_match, tmp4
|
||||||
|
/* And pick the values corresponding to the last match. */
|
||||||
|
csel src_match, src, src_match, ne
|
||||||
|
csel src_offset, chr_match, src_offset, ne
|
||||||
|
|
||||||
|
/* Count down from the top of the syndrome to find the last match. */
|
||||||
|
clz tmp3, src_offset
|
||||||
|
/* Src_match points beyond the word containing the match, so we can
|
||||||
|
simply subtract half the bit-offset into the syndrome. Because
|
||||||
|
we are counting down, we need to go back one more character. */
|
||||||
|
add tmp3, tmp3, #2
|
||||||
|
sub result, src_match, tmp3, lsr #1
|
||||||
|
/* But if the syndrome shows no match was found, then return NULL. */
|
||||||
|
cmp src_offset, #0
|
||||||
|
csel result, result, xzr, ne
|
||||||
|
|
||||||
|
ret
|
||||||
|
|
||||||
|
.size strrchr, . - strrchr
|
||||||
@@ -44,6 +44,7 @@
|
|||||||
*
|
*
|
||||||
****************************************************************************/
|
****************************************************************************/
|
||||||
|
|
||||||
|
#ifndef CONFIG_LIBC_ARCH_STRCHRNUL
|
||||||
FAR char *strchrnul(FAR const char *s, int c)
|
FAR char *strchrnul(FAR const char *s, int c)
|
||||||
{
|
{
|
||||||
if (s)
|
if (s)
|
||||||
@@ -56,3 +57,4 @@ FAR char *strchrnul(FAR const char *s, int c)
|
|||||||
|
|
||||||
return (FAR char *)s;
|
return (FAR char *)s;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|||||||
@@ -30,7 +30,7 @@
|
|||||||
* Public Functions
|
* Public Functions
|
||||||
****************************************************************************/
|
****************************************************************************/
|
||||||
|
|
||||||
#ifndef CONFIG_ARCH_STRNCMP
|
#ifndef CONFIG_LIBC_ARCH_STRNCMP
|
||||||
#undef strncmp /* See mm/README.txt */
|
#undef strncmp /* See mm/README.txt */
|
||||||
int strncmp(FAR const char *cs, FAR const char *ct, size_t nb)
|
int strncmp(FAR const char *cs, FAR const char *ct, size_t nb)
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -34,6 +34,7 @@
|
|||||||
* occurrence of the character c in the string s.
|
* occurrence of the character c in the string s.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#ifndef CONFIG_LIBC_ARCH_STRRCHR
|
||||||
#undef strrchr /* See mm/README.txt */
|
#undef strrchr /* See mm/README.txt */
|
||||||
FAR char *strrchr(FAR const char *s, int c)
|
FAR char *strrchr(FAR const char *s, int c)
|
||||||
{
|
{
|
||||||
@@ -50,3 +51,4 @@ FAR char *strrchr(FAR const char *s, int c)
|
|||||||
|
|
||||||
return (FAR char *)r;
|
return (FAR char *)r;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|||||||
Reference in New Issue
Block a user