mirror of
https://github.com/apache/nuttx.git
synced 2025-12-06 09:01:15 +08:00
libc: add arm64 libc function
Porting memory and string optimize functions from newlib and bionic Signed-off-by: zhangyuan21 <zhangyuan21@xiaomi.com>
This commit is contained in:
284
LICENSE
284
LICENSE
@@ -490,6 +490,7 @@ libs/libc/machine/arm/armv7-a/gnu/arch_memcpy.S
|
||||
libs/libc/machine/arm/armv7-a/gnu/arch_strlen.S
|
||||
libs/libc/machine/arm/armv7-r/gnu/arch_memcpy.S
|
||||
libs/libc/machine/arm/armv7-r/gnu/arch_strlen.S
|
||||
libs/libc/machine/arm64/gnu/arch_strnlen.S
|
||||
================================================
|
||||
|
||||
Copyright (c) 2013, Linaro Limited
|
||||
@@ -739,6 +740,286 @@ libs/libc/machine/arm/armv8-m/gnu/arch_strlen.S
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
libs/libc/machine/arm64/gnu/arch_memchr.S
|
||||
libs/libc/machine/arm64/gnu/arch_strchr.S
|
||||
libs/libc/machine/arm64/gnu/arch_strchrnul.S
|
||||
libs/libc/machine/arm64/gnu/arch_strrchr.S
|
||||
================================================
|
||||
|
||||
Copyright (c) 2014, ARM Limited
|
||||
All rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the company nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
libs/libc/machine/arm64/gnu/arch_memcmp.S
|
||||
================================================
|
||||
|
||||
Copyright (c) 2018, Linaro Limited
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Linaro Limited nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
Copyright (c) 2017 ARM Ltd
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
3. The name of the company may not be used to endorse or promote
|
||||
products derived from this software without specific prior written
|
||||
permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
||||
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
libs/libc/machine/arm64/gnu/arch_memcpy.S
|
||||
libs/libc/machine/arm64/gnu/arch_memmove.S
|
||||
libs/libc/machine/arm64/gnu/arch_memset.S
|
||||
================================================
|
||||
|
||||
Copyright (c) 2012-2013, Linaro Limited
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Linaro Limited nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
Copyright (c) 2015 ARM Ltd
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
3. The name of the company may not be used to endorse or promote
|
||||
products derived from this software without specific prior written
|
||||
permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
||||
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
libs/libc/machine/arm64/gnu/arch_strcmp.S
|
||||
================================================
|
||||
|
||||
Copyright (c) 2012-2018, Linaro Limited
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Linaro Limited nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
libs/libc/machine/arm64/gnu/arch_strcpy.S
|
||||
================================================
|
||||
|
||||
Copyright (c) 2013, 2014, 2015 ARM Ltd.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the Linaro nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
libs/libc/machine/arm64/gnu/arch_strlen.S
|
||||
================================================
|
||||
|
||||
Copyright (c) 2013-2015, Linaro Limited
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Linaro Limited nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
libs/libc/machine/arm64/gnu/arch_strncmp.S
|
||||
================================================
|
||||
|
||||
Copyright (c) 2013, 2018, Linaro Limited
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Linaro Limited nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
libs/libc/machine/risc-v/gnu/arch_memset.S
|
||||
libs/libc/machine/risc-v/gnu/arch_strcmp.S
|
||||
================================================
|
||||
@@ -5543,7 +5824,8 @@ drivers/mtd/at24xx.c
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
libs/libc/machine/arm/armv8-m
|
||||
=============================
|
||||
libs/libc/machine/arm64/gnu/arch_setjmp.S
|
||||
==============================================
|
||||
|
||||
Copyright (c) 2011, 2012 ARM Ltd. All rights reserved.
|
||||
|
||||
|
||||
@@ -72,10 +72,18 @@ config LIBC_ARCH_STRCHR
|
||||
bool
|
||||
default n
|
||||
|
||||
config LIBC_ARCH_STRCHRNUL
|
||||
bool
|
||||
default n
|
||||
|
||||
config LIBC_ARCH_STRCMP
|
||||
bool
|
||||
default n
|
||||
|
||||
config LIBC_ARCH_STRNCMP
|
||||
bool
|
||||
default n
|
||||
|
||||
config LIBC_ARCH_STRCPY
|
||||
bool
|
||||
default n
|
||||
@@ -100,6 +108,10 @@ config LIBC_ARCH_STRNLEN
|
||||
bool
|
||||
default n
|
||||
|
||||
config LIBC_ARCH_STRRCHR
|
||||
bool
|
||||
default n
|
||||
|
||||
config LIBC_ARCH_ELF
|
||||
bool
|
||||
default n
|
||||
@@ -174,6 +186,9 @@ config LIBM_ARCH_TRUNCF
|
||||
if ARCH_ARM
|
||||
source "libs/libc/machine/arm/Kconfig"
|
||||
endif
|
||||
if ARCH_ARM64
|
||||
source "libs/libc/machine/arm64/Kconfig"
|
||||
endif
|
||||
if ARCH_RISCV
|
||||
source "libs/libc/machine/risc-v/Kconfig"
|
||||
endif
|
||||
|
||||
@@ -25,6 +25,9 @@ endif
|
||||
ifeq ($(CONFIG_ARCH_ARM),y)
|
||||
include $(TOPDIR)/libs/libc/machine/arm/Make.defs
|
||||
endif
|
||||
ifeq ($(CONFIG_ARCH_ARM64),y)
|
||||
include $(TOPDIR)/libs/libc/machine/arm64/Make.defs
|
||||
endif
|
||||
ifeq ($(CONFIG_ARCH_RISCV),y)
|
||||
include $(TOPDIR)/libs/libc/machine/risc-v/Make.defs
|
||||
endif
|
||||
|
||||
106
libs/libc/machine/arm64/Kconfig
Normal file
106
libs/libc/machine/arm64/Kconfig
Normal file
@@ -0,0 +1,106 @@
|
||||
#
|
||||
# For a description of the syntax of this configuration file,
|
||||
# see the file kconfig-language.txt in the NuttX tools repository.
|
||||
#
|
||||
|
||||
config ARM64_MEMCHR
|
||||
bool "Enable optimized memchr() for ARM64"
|
||||
default n
|
||||
select LIBC_ARCH_MEMCHR
|
||||
depends on ARCH_TOOLCHAIN_GNU
|
||||
---help---
|
||||
Enable optimized ARM64 specific memchr() library function
|
||||
|
||||
config ARM64_MEMCMP
|
||||
bool "Enable optimized memcmp() for ARM64"
|
||||
select LIBC_ARCH_MEMCMP
|
||||
depends on ARCH_TOOLCHAIN_GNU
|
||||
---help---
|
||||
Enable optimized ARM64 specific memcmp() library function
|
||||
|
||||
config ARM64_MEMCPY
|
||||
bool "Enable optimized memcpy() for ARM64"
|
||||
select LIBC_ARCH_MEMCPY
|
||||
depends on ARCH_TOOLCHAIN_GNU
|
||||
---help---
|
||||
Enable optimized ARM64 specific memcpy() library function
|
||||
|
||||
config ARM64_MEMSET
|
||||
bool "Enable optimized memset() for ARM64"
|
||||
default n
|
||||
select LIBC_ARCH_MEMSET
|
||||
depends on ARCH_TOOLCHAIN_GNU
|
||||
---help---
|
||||
Enable optimized ARM64 specific memset() library function
|
||||
|
||||
config ARM64_MEMMOVE
|
||||
bool "Enable optimized memmove() for ARM64"
|
||||
default n
|
||||
select LIBC_ARCH_MEMMOVE
|
||||
depends on ARCH_TOOLCHAIN_GNU
|
||||
---help---
|
||||
Enable optimized ARM64 specific memmove() library function
|
||||
|
||||
config ARM64_STRCHR
|
||||
bool "Enable optimized strchr() for ARM64"
|
||||
default n
|
||||
select LIBC_ARCH_STRCHR
|
||||
depends on ARCH_TOOLCHAIN_GNU
|
||||
---help---
|
||||
Enable optimized ARM64 specific strchr() library function
|
||||
|
||||
config ARM64_STRCHRNUL
|
||||
bool "Enable optimized strchrnul() for ARM64"
|
||||
default n
|
||||
select LIBC_ARCH_STRCHRNUL
|
||||
depends on ARCH_TOOLCHAIN_GNU
|
||||
---help---
|
||||
Enable optimized ARM64 specific strchrnul() library function
|
||||
|
||||
config ARM64_STRCMP
|
||||
bool "Enable optimized strcmp() for ARM64"
|
||||
default n
|
||||
select LIBC_ARCH_STRCMP
|
||||
depends on ARCH_TOOLCHAIN_GNU
|
||||
---help---
|
||||
Enable optimized ARM64 specific strcmp() library function
|
||||
|
||||
config ARM64_STRCPY
|
||||
bool "Enable optimized strcpy() for ARM64"
|
||||
default n
|
||||
select LIBC_ARCH_STRCPY
|
||||
depends on ARCH_TOOLCHAIN_GNU
|
||||
---help---
|
||||
Enable optimized ARM64 specific strcpy() library function
|
||||
|
||||
config ARM64_STRLEN
|
||||
bool "Enable optimized strlen() for ARM64"
|
||||
default n
|
||||
select LIBC_ARCH_STRLEN
|
||||
depends on ARCH_TOOLCHAIN_GNU
|
||||
---help---
|
||||
Enable optimized ARM64 specific strlen() library function
|
||||
|
||||
config ARM64_STRNCMP
|
||||
bool "Enable optimized strncmp() for ARM64"
|
||||
default n
|
||||
select LIBC_ARCH_STRNCMP
|
||||
depends on ARCH_TOOLCHAIN_GNU
|
||||
---help---
|
||||
Enable optimized ARM64 specific strncmp() library function
|
||||
|
||||
config ARM64_STRNLEN
|
||||
bool "Enable optimized strnlen() for ARM64"
|
||||
default n
|
||||
select LIBC_ARCH_STRNLEN
|
||||
depends on ARCH_TOOLCHAIN_GNU
|
||||
---help---
|
||||
Enable optimized ARM64 specific strnlen() library function
|
||||
|
||||
config ARM64_STRRCHR
|
||||
bool "Enable optimized strrchr() for ARM64"
|
||||
default n
|
||||
select LIBC_ARCH_STRRCHR
|
||||
depends on ARCH_TOOLCHAIN_GNU
|
||||
---help---
|
||||
Enable optimized ARM64 specific strrchr() library function
|
||||
83
libs/libc/machine/arm64/Make.defs
Normal file
83
libs/libc/machine/arm64/Make.defs
Normal file
@@ -0,0 +1,83 @@
|
||||
############################################################################
|
||||
# libs/libc/machine/ARM64/Make.defs
|
||||
#
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership. The
|
||||
# ASF licenses this file to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance with the
|
||||
# License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
#
|
||||
############################################################################
|
||||
|
||||
ifeq ($(CONFIG_ARM64_MEMCHR),y)
|
||||
ASRCS += arch_memchr.S
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_ARM64_MEMCMP),y)
|
||||
ASRCS += arch_memcmp.S
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_ARM64_MEMCPY),y)
|
||||
ASRCS += arch_memcpy.S
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_ARM64_MEMMOVE),y)
|
||||
ASRCS += arch_memmove.S
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_ARM64_MEMSET),y)
|
||||
ASRCS += arch_memset.S
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_ARM64_STRCHR),y)
|
||||
ASRCS += arch_strchr.S
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_ARM64_STRCHRNUL),y)
|
||||
ASRCS += arch_strchrnul.S
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_ARM64_STRCMP),y)
|
||||
ASRCS += arch_strcmp.S
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_ARM64_STRCPY),y)
|
||||
ASRCS += arch_strcpy.S
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_ARM64_STRLEN),y)
|
||||
ASRCS += arch_strlen.S
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_ARM64_STRNCMP),y)
|
||||
ASRCS += arch_strncmp.S
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_ARM64_STRNLEN),y)
|
||||
ASRCS += arch_strnlen.S
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_ARM64_STRRCHR),y)
|
||||
ASRCS += arch_strrchr.S
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_ARCH_SETJMP_H),y)
|
||||
ASRCS += arch_setjmp.S
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_ARCH_TOOLCHAIN_GNU),y)
|
||||
DEPPATH += --dep-path machine/arm64/gnu
|
||||
VPATH += :machine/arm64/gnu
|
||||
endif
|
||||
|
||||
DEPPATH += --dep-path machine/arm64
|
||||
VPATH += :machine/arm64
|
||||
173
libs/libc/machine/arm64/gnu/arch_memchr.S
Normal file
173
libs/libc/machine/arm64/gnu/arch_memchr.S
Normal file
@@ -0,0 +1,173 @@
|
||||
/****************************************************************************
|
||||
* libs/libc/machine/arm64/gnu/arch_memchr.S
|
||||
*
|
||||
* Copyright (c) 2014, ARM Limited
|
||||
* All rights Reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of the company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this
|
||||
* software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
/* Assumptions:
|
||||
*
|
||||
* ARMv8-a, AArch64
|
||||
* Neon Available.
|
||||
*/
|
||||
|
||||
/* Arguments and results. */
|
||||
#define srcin x0
|
||||
#define chrin w1
|
||||
#define cntin x2
|
||||
|
||||
#define result x0
|
||||
|
||||
#define src x3
|
||||
#define tmp x4
|
||||
#define wtmp2 w5
|
||||
#define synd x6
|
||||
#define soff x9
|
||||
#define cntrem x10
|
||||
|
||||
#define vrepchr v0
|
||||
#define vdata1 v1
|
||||
#define vdata2 v2
|
||||
#define vhas_chr1 v3
|
||||
#define vhas_chr2 v4
|
||||
#define vrepmask v5
|
||||
#define vend v6
|
||||
|
||||
/*
|
||||
* Core algorithm:
|
||||
*
|
||||
* For each 32-byte chunk we calculate a 64-bit syndrome value, with two bits
|
||||
* per byte. For each tuple, bit 0 is set if the relevant byte matched the
|
||||
* requested character and bit 1 is not used (faster than using a 32bit
|
||||
* syndrome). Since the bits in the syndrome reflect exactly the order in which
|
||||
* things occur in the original string, counting trailing zeros allows to
|
||||
* identify exactly which byte has matched.
|
||||
*/
|
||||
|
||||
.macro def_fn f p2align=0
|
||||
.text
|
||||
.p2align \p2align
|
||||
.global \f
|
||||
.type \f, %function
|
||||
\f:
|
||||
.endm
|
||||
|
||||
def_fn memchr
|
||||
/* Do not dereference srcin if no bytes to compare. */
|
||||
cbz cntin, .Lzero_length
|
||||
/*
|
||||
* Magic constant 0x40100401 allows us to identify which lane matches
|
||||
* the requested byte.
|
||||
*/
|
||||
mov wtmp2, #0x0401
|
||||
movk wtmp2, #0x4010, lsl #16
|
||||
dup vrepchr.16b, chrin
|
||||
/* Work with aligned 32-byte chunks */
|
||||
bic src, srcin, #31
|
||||
dup vrepmask.4s, wtmp2
|
||||
ands soff, srcin, #31
|
||||
and cntrem, cntin, #31
|
||||
b.eq .Lloop
|
||||
|
||||
/*
|
||||
* Input string is not 32-byte aligned. We calculate the syndrome
|
||||
* value for the aligned 32 bytes block containing the first bytes
|
||||
* and mask the irrelevant part.
|
||||
*/
|
||||
|
||||
ld1 {vdata1.16b, vdata2.16b}, [src], #32
|
||||
sub tmp, soff, #32
|
||||
adds cntin, cntin, tmp
|
||||
cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
|
||||
cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
|
||||
and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
|
||||
and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
|
||||
addp vend.16b, vhas_chr1.16b, vhas_chr2.16b /* 256->128 */
|
||||
addp vend.16b, vend.16b, vend.16b /* 128->64 */
|
||||
mov synd, vend.2d[0]
|
||||
/* Clear the soff*2 lower bits */
|
||||
lsl tmp, soff, #1
|
||||
lsr synd, synd, tmp
|
||||
lsl synd, synd, tmp
|
||||
/* The first block can also be the last */
|
||||
b.ls .Lmasklast
|
||||
/* Have we found something already? */
|
||||
cbnz synd, .Ltail
|
||||
|
||||
.Lloop:
|
||||
ld1 {vdata1.16b, vdata2.16b}, [src], #32
|
||||
subs cntin, cntin, #32
|
||||
cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
|
||||
cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
|
||||
/* If we're out of data we finish regardless of the result */
|
||||
b.ls .Lend
|
||||
/* Use a fast check for the termination condition */
|
||||
orr vend.16b, vhas_chr1.16b, vhas_chr2.16b
|
||||
addp vend.2d, vend.2d, vend.2d
|
||||
mov synd, vend.2d[0]
|
||||
/* We're not out of data, loop if we haven't found the character */
|
||||
cbz synd, .Lloop
|
||||
|
||||
.Lend:
|
||||
/* Termination condition found, let's calculate the syndrome value */
|
||||
and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
|
||||
and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
|
||||
addp vend.16b, vhas_chr1.16b, vhas_chr2.16b /* 256->128 */
|
||||
addp vend.16b, vend.16b, vend.16b /* 128->64 */
|
||||
mov synd, vend.2d[0]
|
||||
/* Only do the clear for the last possible block */
|
||||
b.hi .Ltail
|
||||
|
||||
.Lmasklast:
|
||||
/* Clear the (32 - ((cntrem + soff) % 32)) * 2 upper bits */
|
||||
add tmp, cntrem, soff
|
||||
and tmp, tmp, #31
|
||||
sub tmp, tmp, #32
|
||||
neg tmp, tmp, lsl #1
|
||||
lsl synd, synd, tmp
|
||||
lsr synd, synd, tmp
|
||||
|
||||
.Ltail:
|
||||
/* Count the trailing zeros using bit reversing */
|
||||
rbit synd, synd
|
||||
/* Compensate the last post-increment */
|
||||
sub src, src, #32
|
||||
/* Check that we have found a character */
|
||||
cmp synd, #0
|
||||
/* And count the leading zeros */
|
||||
clz synd, synd
|
||||
/* Compute the potential result */
|
||||
add result, src, synd, lsr #1
|
||||
/* Select result or NULL */
|
||||
csel result, xzr, result, eq
|
||||
ret
|
||||
|
||||
.Lzero_length:
|
||||
mov result, #0
|
||||
ret
|
||||
|
||||
.size memchr, . - memchr
|
||||
196
libs/libc/machine/arm64/gnu/arch_memcmp.S
Normal file
196
libs/libc/machine/arm64/gnu/arch_memcmp.S
Normal file
@@ -0,0 +1,196 @@
|
||||
/****************************************************************************
|
||||
* libs/libc/machine/arm64/gnu/arch_memcmp.S
|
||||
*
|
||||
* Copyright (c) 2018 Linaro Limited
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* * Neither the name of the Linaro nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Copyright (c) 2017 ARM Ltd
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the company may not be used to endorse or promote
|
||||
* products derived from this software without specific prior written
|
||||
* permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
||||
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
/* Assumptions:
|
||||
*
|
||||
* ARMv8-a, AArch64, unaligned accesses.
|
||||
*/
|
||||
|
||||
#define L(l) .L ## l
|
||||
|
||||
/* Parameters and result. */
|
||||
#define src1 x0
|
||||
#define src2 x1
|
||||
#define limit x2
|
||||
#define result w0
|
||||
|
||||
/* Internal variables. */
|
||||
#define data1 x3
|
||||
#define data1w w3
|
||||
#define data1h x4
|
||||
#define data2 x5
|
||||
#define data2w w5
|
||||
#define data2h x6
|
||||
#define tmp1 x7
|
||||
#define tmp2 x8
|
||||
|
||||
.macro def_fn f p2align=0
|
||||
.text
|
||||
.p2align \p2align
|
||||
.global \f
|
||||
.type \f, %function
|
||||
\f:
|
||||
.endm
|
||||
|
||||
def_fn memcmp p2align=6
|
||||
subs limit, limit, 8
|
||||
b.lo L(less8)
|
||||
|
||||
ldr data1, [src1], 8
|
||||
ldr data2, [src2], 8
|
||||
cmp data1, data2
|
||||
b.ne L(return)
|
||||
|
||||
subs limit, limit, 8
|
||||
b.gt L(more16)
|
||||
|
||||
ldr data1, [src1, limit]
|
||||
ldr data2, [src2, limit]
|
||||
b L(return)
|
||||
|
||||
L(more16):
|
||||
ldr data1, [src1], 8
|
||||
ldr data2, [src2], 8
|
||||
cmp data1, data2
|
||||
bne L(return)
|
||||
|
||||
/* Jump directly to comparing the last 16 bytes for 32 byte (or less)
|
||||
strings. */
|
||||
subs limit, limit, 16
|
||||
b.ls L(last_bytes)
|
||||
|
||||
/* We overlap loads between 0-32 bytes at either side of SRC1 when we
|
||||
try to align, so limit it only to strings larger than 128 bytes. */
|
||||
cmp limit, 96
|
||||
b.ls L(loop16)
|
||||
|
||||
/* Align src1 and adjust src2 with bytes not yet done. */
|
||||
and tmp1, src1, 15
|
||||
add limit, limit, tmp1
|
||||
sub src1, src1, tmp1
|
||||
sub src2, src2, tmp1
|
||||
|
||||
/* Loop performing 16 bytes per iteration using aligned src1.
|
||||
Limit is pre-decremented by 16 and must be larger than zero.
|
||||
Exit if <= 16 bytes left to do or if the data is not equal. */
|
||||
.p2align 4
|
||||
L(loop16):
|
||||
ldp data1, data1h, [src1], 16
|
||||
ldp data2, data2h, [src2], 16
|
||||
subs limit, limit, 16
|
||||
ccmp data1, data2, 0, hi
|
||||
ccmp data1h, data2h, 0, eq
|
||||
b.eq L(loop16)
|
||||
|
||||
cmp data1, data2
|
||||
bne L(return)
|
||||
mov data1, data1h
|
||||
mov data2, data2h
|
||||
cmp data1, data2
|
||||
bne L(return)
|
||||
|
||||
/* Compare last 1-16 bytes using unaligned access. */
|
||||
L(last_bytes):
|
||||
add src1, src1, limit
|
||||
add src2, src2, limit
|
||||
ldp data1, data1h, [src1]
|
||||
ldp data2, data2h, [src2]
|
||||
cmp data1, data2
|
||||
bne L(return)
|
||||
mov data1, data1h
|
||||
mov data2, data2h
|
||||
cmp data1, data2
|
||||
|
||||
/* Compare data bytes and set return value to 0, -1 or 1. */
|
||||
L(return):
|
||||
#ifndef __AARCH64EB__
|
||||
rev data1, data1
|
||||
rev data2, data2
|
||||
#endif
|
||||
cmp data1, data2
|
||||
L(ret_eq):
|
||||
cset result, ne
|
||||
cneg result, result, lo
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
/* Compare up to 8 bytes. Limit is [-8..-1]. */
|
||||
L(less8):
|
||||
adds limit, limit, 4
|
||||
b.lo L(less4)
|
||||
ldr data1w, [src1], 4
|
||||
ldr data2w, [src2], 4
|
||||
cmp data1w, data2w
|
||||
b.ne L(return)
|
||||
sub limit, limit, 4
|
||||
L(less4):
|
||||
adds limit, limit, 4
|
||||
beq L(ret_eq)
|
||||
L(byte_loop):
|
||||
ldrb data1w, [src1], 1
|
||||
ldrb data2w, [src2], 1
|
||||
subs limit, limit, 1
|
||||
ccmp data1w, data2w, 0, ne /* NZCV = 0b0000. */
|
||||
b.eq L(byte_loop)
|
||||
sub result, data1w, data2w
|
||||
ret
|
||||
|
||||
.size memcmp, . - memcmp
|
||||
232
libs/libc/machine/arm64/gnu/arch_memcpy.S
Normal file
232
libs/libc/machine/arm64/gnu/arch_memcpy.S
Normal file
@@ -0,0 +1,232 @@
|
||||
/****************************************************************************
|
||||
* libs/libc/machine/arm64/gnu/arch_memcpy.S
|
||||
*
|
||||
* Copyright (c) 2012-2013, Linaro Limited
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* * Neither the name of the Linaro nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Copyright (c) 2015 ARM Ltd
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the company may not be used to endorse or promote
|
||||
* products derived from this software without specific prior written
|
||||
* permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
||||
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
/* Assumptions:
|
||||
*
|
||||
* ARMv8-a, AArch64, unaligned accesses.
|
||||
*
|
||||
*/
|
||||
|
||||
#define dstin x0
|
||||
#define src x1
|
||||
#define count x2
|
||||
#define dst x3
|
||||
#define srcend x4
|
||||
#define dstend x5
|
||||
#define A_l x6
|
||||
#define A_lw w6
|
||||
#define A_h x7
|
||||
#define A_hw w7
|
||||
#define B_l x8
|
||||
#define B_lw w8
|
||||
#define B_h x9
|
||||
#define C_l x10
|
||||
#define C_h x11
|
||||
#define D_l x12
|
||||
#define D_h x13
|
||||
#define E_l src
|
||||
#define E_h count
|
||||
#define F_l srcend
|
||||
#define F_h dst
|
||||
#define tmp1 x9
|
||||
|
||||
#define L(l) .L ## l
|
||||
|
||||
.macro def_fn f p2align=0
|
||||
.text
|
||||
.p2align \p2align
|
||||
.global \f
|
||||
.type \f, %function
|
||||
\f:
|
||||
.endm
|
||||
|
||||
/* Copies are split into 3 main cases: small copies of up to 16 bytes,
|
||||
medium copies of 17..96 bytes which are fully unrolled. Large copies
|
||||
of more than 96 bytes align the destination and use an unrolled loop
|
||||
processing 64 bytes per iteration.
|
||||
Small and medium copies read all data before writing, allowing any
|
||||
kind of overlap, and memmove tailcalls memcpy for these cases as
|
||||
well as non-overlapping copies.
|
||||
*/
|
||||
|
||||
def_fn memcpy p2align=6
|
||||
prfm PLDL1KEEP, [src]
|
||||
add srcend, src, count
|
||||
add dstend, dstin, count
|
||||
cmp count, 16
|
||||
b.ls L(copy16)
|
||||
cmp count, 96
|
||||
b.hi L(copy_long)
|
||||
|
||||
/* Medium copies: 17..96 bytes. */
|
||||
sub tmp1, count, 1
|
||||
ldp A_l, A_h, [src]
|
||||
tbnz tmp1, 6, L(copy96)
|
||||
ldp D_l, D_h, [srcend, -16]
|
||||
tbz tmp1, 5, 1f
|
||||
ldp B_l, B_h, [src, 16]
|
||||
ldp C_l, C_h, [srcend, -32]
|
||||
stp B_l, B_h, [dstin, 16]
|
||||
stp C_l, C_h, [dstend, -32]
|
||||
1:
|
||||
stp A_l, A_h, [dstin]
|
||||
stp D_l, D_h, [dstend, -16]
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
/* Small copies: 0..16 bytes. */
|
||||
L(copy16):
|
||||
cmp count, 8
|
||||
b.lo 1f
|
||||
ldr A_l, [src]
|
||||
ldr A_h, [srcend, -8]
|
||||
str A_l, [dstin]
|
||||
str A_h, [dstend, -8]
|
||||
ret
|
||||
.p2align 4
|
||||
1:
|
||||
tbz count, 2, 1f
|
||||
ldr A_lw, [src]
|
||||
ldr A_hw, [srcend, -4]
|
||||
str A_lw, [dstin]
|
||||
str A_hw, [dstend, -4]
|
||||
ret
|
||||
|
||||
/* Copy 0..3 bytes. Use a branchless sequence that copies the same
|
||||
byte 3 times if count==1, or the 2nd byte twice if count==2. */
|
||||
1:
|
||||
cbz count, 2f
|
||||
lsr tmp1, count, 1
|
||||
ldrb A_lw, [src]
|
||||
ldrb A_hw, [srcend, -1]
|
||||
ldrb B_lw, [src, tmp1]
|
||||
strb A_lw, [dstin]
|
||||
strb B_lw, [dstin, tmp1]
|
||||
strb A_hw, [dstend, -1]
|
||||
2: ret
|
||||
|
||||
.p2align 4
|
||||
/* Copy 64..96 bytes. Copy 64 bytes from the start and
|
||||
32 bytes from the end. */
|
||||
L(copy96):
|
||||
ldp B_l, B_h, [src, 16]
|
||||
ldp C_l, C_h, [src, 32]
|
||||
ldp D_l, D_h, [src, 48]
|
||||
ldp E_l, E_h, [srcend, -32]
|
||||
ldp F_l, F_h, [srcend, -16]
|
||||
stp A_l, A_h, [dstin]
|
||||
stp B_l, B_h, [dstin, 16]
|
||||
stp C_l, C_h, [dstin, 32]
|
||||
stp D_l, D_h, [dstin, 48]
|
||||
stp E_l, E_h, [dstend, -32]
|
||||
stp F_l, F_h, [dstend, -16]
|
||||
ret
|
||||
|
||||
/* Align DST to 16 byte alignment so that we don't cross cache line
|
||||
boundaries on both loads and stores. There are at least 96 bytes
|
||||
to copy, so copy 16 bytes unaligned and then align. The loop
|
||||
copies 64 bytes per iteration and prefetches one iteration ahead. */
|
||||
|
||||
.p2align 4
|
||||
L(copy_long):
|
||||
and tmp1, dstin, 15
|
||||
bic dst, dstin, 15
|
||||
ldp D_l, D_h, [src]
|
||||
sub src, src, tmp1
|
||||
add count, count, tmp1 /* Count is now 16 too large. */
|
||||
ldp A_l, A_h, [src, 16]
|
||||
stp D_l, D_h, [dstin]
|
||||
ldp B_l, B_h, [src, 32]
|
||||
ldp C_l, C_h, [src, 48]
|
||||
ldp D_l, D_h, [src, 64]!
|
||||
subs count, count, 128 + 16 /* Test and readjust count. */
|
||||
b.ls 2f
|
||||
1:
|
||||
stp A_l, A_h, [dst, 16]
|
||||
ldp A_l, A_h, [src, 16]
|
||||
stp B_l, B_h, [dst, 32]
|
||||
ldp B_l, B_h, [src, 32]
|
||||
stp C_l, C_h, [dst, 48]
|
||||
ldp C_l, C_h, [src, 48]
|
||||
stp D_l, D_h, [dst, 64]!
|
||||
ldp D_l, D_h, [src, 64]!
|
||||
subs count, count, 64
|
||||
b.hi 1b
|
||||
|
||||
/* Write the last full set of 64 bytes. The remainder is at most 64
|
||||
bytes, so it is safe to always copy 64 bytes from the end even if
|
||||
there is just 1 byte left. */
|
||||
2:
|
||||
ldp E_l, E_h, [srcend, -64]
|
||||
stp A_l, A_h, [dst, 16]
|
||||
ldp A_l, A_h, [srcend, -48]
|
||||
stp B_l, B_h, [dst, 32]
|
||||
ldp B_l, B_h, [srcend, -32]
|
||||
stp C_l, C_h, [dst, 48]
|
||||
ldp C_l, C_h, [srcend, -16]
|
||||
stp D_l, D_h, [dst, 64]
|
||||
stp E_l, E_h, [dstend, -64]
|
||||
stp A_l, A_h, [dstend, -48]
|
||||
stp B_l, B_h, [dstend, -32]
|
||||
stp C_l, C_h, [dstend, -16]
|
||||
ret
|
||||
|
||||
.size memcpy, . - memcpy
|
||||
157
libs/libc/machine/arm64/gnu/arch_memmove.S
Normal file
157
libs/libc/machine/arm64/gnu/arch_memmove.S
Normal file
@@ -0,0 +1,157 @@
|
||||
/****************************************************************************
|
||||
* libs/libc/machine/arm64/gnu/arch_memmove.S
|
||||
*
|
||||
* Copyright (c) 2013, Linaro Limited
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* * Neither the name of the Linaro nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Copyright (c) 2015 ARM Ltd
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the company may not be used to endorse or promote
|
||||
* products derived from this software without specific prior written
|
||||
* permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
||||
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
/* Assumptions:
|
||||
*
|
||||
* ARMv8-a, AArch64, unaligned accesses
|
||||
*/
|
||||
|
||||
.macro def_fn f p2align=0
|
||||
.text
|
||||
.p2align \p2align
|
||||
.global \f
|
||||
.type \f, %function
|
||||
\f:
|
||||
.endm
|
||||
|
||||
/* Parameters and result. */
|
||||
#define dstin x0
|
||||
#define src x1
|
||||
#define count x2
|
||||
#define srcend x3
|
||||
#define dstend x4
|
||||
#define tmp1 x5
|
||||
#define A_l x6
|
||||
#define A_h x7
|
||||
#define B_l x8
|
||||
#define B_h x9
|
||||
#define C_l x10
|
||||
#define C_h x11
|
||||
#define D_l x12
|
||||
#define D_h x13
|
||||
#define E_l count
|
||||
#define E_h tmp1
|
||||
|
||||
/* All memmoves up to 96 bytes are done by memcpy as it supports overlaps.
|
||||
Larger backwards copies are also handled by memcpy. The only remaining
|
||||
case is forward large copies. The destination is aligned, and an
|
||||
unrolled loop processes 64 bytes per iteration.
|
||||
*/
|
||||
|
||||
def_fn memmove, 6
|
||||
sub tmp1, dstin, src
|
||||
cmp count, 96
|
||||
ccmp tmp1, count, 2, hi
|
||||
b.hs memcpy
|
||||
|
||||
cbz tmp1, 3f
|
||||
add dstend, dstin, count
|
||||
add srcend, src, count
|
||||
|
||||
/* Align dstend to 16 byte alignment so that we don't cross cache line
|
||||
boundaries on both loads and stores. There are at least 96 bytes
|
||||
to copy, so copy 16 bytes unaligned and then align. The loop
|
||||
copies 64 bytes per iteration and prefetches one iteration ahead. */
|
||||
|
||||
and tmp1, dstend, 15
|
||||
ldp D_l, D_h, [srcend, -16]
|
||||
sub srcend, srcend, tmp1
|
||||
sub count, count, tmp1
|
||||
ldp A_l, A_h, [srcend, -16]
|
||||
stp D_l, D_h, [dstend, -16]
|
||||
ldp B_l, B_h, [srcend, -32]
|
||||
ldp C_l, C_h, [srcend, -48]
|
||||
ldp D_l, D_h, [srcend, -64]!
|
||||
sub dstend, dstend, tmp1
|
||||
subs count, count, 128
|
||||
b.ls 2f
|
||||
nop
|
||||
1:
|
||||
stp A_l, A_h, [dstend, -16]
|
||||
ldp A_l, A_h, [srcend, -16]
|
||||
stp B_l, B_h, [dstend, -32]
|
||||
ldp B_l, B_h, [srcend, -32]
|
||||
stp C_l, C_h, [dstend, -48]
|
||||
ldp C_l, C_h, [srcend, -48]
|
||||
stp D_l, D_h, [dstend, -64]!
|
||||
ldp D_l, D_h, [srcend, -64]!
|
||||
subs count, count, 64
|
||||
b.hi 1b
|
||||
|
||||
/* Write the last full set of 64 bytes. The remainder is at most 64
|
||||
bytes, so it is safe to always copy 64 bytes from the start even if
|
||||
there is just 1 byte left. */
|
||||
2:
|
||||
ldp E_l, E_h, [src, 48]
|
||||
stp A_l, A_h, [dstend, -16]
|
||||
ldp A_l, A_h, [src, 32]
|
||||
stp B_l, B_h, [dstend, -32]
|
||||
ldp B_l, B_h, [src, 16]
|
||||
stp C_l, C_h, [dstend, -48]
|
||||
ldp C_l, C_h, [src]
|
||||
stp D_l, D_h, [dstend, -64]
|
||||
stp E_l, E_h, [dstin, 48]
|
||||
stp A_l, A_h, [dstin, 32]
|
||||
stp B_l, B_h, [dstin, 16]
|
||||
stp C_l, C_h, [dstin]
|
||||
3: ret
|
||||
|
||||
.size memmove, . - memmove
|
||||
242
libs/libc/machine/arm64/gnu/arch_memset.S
Normal file
242
libs/libc/machine/arm64/gnu/arch_memset.S
Normal file
@@ -0,0 +1,242 @@
|
||||
/****************************************************************************
|
||||
* libs/libc/machine/arm64/gnu/arch_memset.S
|
||||
*
|
||||
* Copyright (c) 2012-2013, Linaro Limited
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* * Neither the name of the Linaro nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Copyright (c) 2015 ARM Ltd
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the company may not be used to endorse or promote
|
||||
* products derived from this software without specific prior written
|
||||
* permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
||||
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
/* Assumptions:
|
||||
*
|
||||
* ARMv8-a, AArch64, unaligned accesses
|
||||
*
|
||||
*/
|
||||
|
||||
#define dstin x0
|
||||
#define val x1
|
||||
#define valw w1
|
||||
#define count x2
|
||||
#define dst x3
|
||||
#define dstend x4
|
||||
#define tmp1 x5
|
||||
#define tmp1w w5
|
||||
#define tmp2 x6
|
||||
#define tmp2w w6
|
||||
#define zva_len x7
|
||||
#define zva_lenw w7
|
||||
|
||||
#define L(l) .L ## l
|
||||
|
||||
.macro def_fn f p2align=0
|
||||
.text
|
||||
.p2align \p2align
|
||||
.global \f
|
||||
.type \f, %function
|
||||
\f:
|
||||
.endm
|
||||
|
||||
def_fn memset p2align=6
|
||||
|
||||
dup v0.16B, valw
|
||||
add dstend, dstin, count
|
||||
|
||||
cmp count, 96
|
||||
b.hi L(set_long)
|
||||
cmp count, 16
|
||||
b.hs L(set_medium)
|
||||
mov val, v0.D[0]
|
||||
|
||||
/* Set 0..15 bytes. */
|
||||
tbz count, 3, 1f
|
||||
str val, [dstin]
|
||||
str val, [dstend, -8]
|
||||
ret
|
||||
nop
|
||||
1: tbz count, 2, 2f
|
||||
str valw, [dstin]
|
||||
str valw, [dstend, -4]
|
||||
ret
|
||||
2: cbz count, 3f
|
||||
strb valw, [dstin]
|
||||
tbz count, 1, 3f
|
||||
strh valw, [dstend, -2]
|
||||
3: ret
|
||||
|
||||
/* Set 17..96 bytes. */
|
||||
L(set_medium):
|
||||
str q0, [dstin]
|
||||
tbnz count, 6, L(set96)
|
||||
str q0, [dstend, -16]
|
||||
tbz count, 5, 1f
|
||||
str q0, [dstin, 16]
|
||||
str q0, [dstend, -32]
|
||||
1: ret
|
||||
|
||||
.p2align 4
|
||||
/* Set 64..96 bytes. Write 64 bytes from the start and
|
||||
32 bytes from the end. */
|
||||
L(set96):
|
||||
str q0, [dstin, 16]
|
||||
stp q0, q0, [dstin, 32]
|
||||
stp q0, q0, [dstend, -32]
|
||||
ret
|
||||
|
||||
.p2align 3
|
||||
nop
|
||||
L(set_long):
|
||||
and valw, valw, 255
|
||||
bic dst, dstin, 15
|
||||
str q0, [dstin]
|
||||
cmp count, 256
|
||||
ccmp valw, 0, 0, cs
|
||||
b.eq L(try_zva)
|
||||
L(no_zva):
|
||||
sub count, dstend, dst /* Count is 16 too large. */
|
||||
sub dst, dst, 16 /* Dst is biased by -32. */
|
||||
sub count, count, 64 + 16 /* Adjust count and bias for loop. */
|
||||
1: stp q0, q0, [dst, 32]
|
||||
stp q0, q0, [dst, 64]!
|
||||
L(tail64):
|
||||
subs count, count, 64
|
||||
b.hi 1b
|
||||
2: stp q0, q0, [dstend, -64]
|
||||
stp q0, q0, [dstend, -32]
|
||||
ret
|
||||
|
||||
.p2align 3
|
||||
L(try_zva):
|
||||
mrs tmp1, dczid_el0
|
||||
tbnz tmp1w, 4, L(no_zva)
|
||||
and tmp1w, tmp1w, 15
|
||||
cmp tmp1w, 4 /* ZVA size is 64 bytes. */
|
||||
b.ne L(zva_128)
|
||||
|
||||
/* Write the first and last 64 byte aligned block using stp rather
|
||||
than using DC ZVA. This is faster on some cores.
|
||||
*/
|
||||
L(zva_64):
|
||||
str q0, [dst, 16]
|
||||
stp q0, q0, [dst, 32]
|
||||
bic dst, dst, 63
|
||||
stp q0, q0, [dst, 64]
|
||||
stp q0, q0, [dst, 96]
|
||||
sub count, dstend, dst /* Count is now 128 too large. */
|
||||
sub count, count, 128+64+64 /* Adjust count and bias for loop. */
|
||||
add dst, dst, 128
|
||||
nop
|
||||
1: dc zva, dst
|
||||
add dst, dst, 64
|
||||
subs count, count, 64
|
||||
b.hi 1b
|
||||
stp q0, q0, [dst, 0]
|
||||
stp q0, q0, [dst, 32]
|
||||
stp q0, q0, [dstend, -64]
|
||||
stp q0, q0, [dstend, -32]
|
||||
ret
|
||||
|
||||
.p2align 3
|
||||
L(zva_128):
|
||||
cmp tmp1w, 5 /* ZVA size is 128 bytes. */
|
||||
b.ne L(zva_other)
|
||||
|
||||
str q0, [dst, 16]
|
||||
stp q0, q0, [dst, 32]
|
||||
stp q0, q0, [dst, 64]
|
||||
stp q0, q0, [dst, 96]
|
||||
bic dst, dst, 127
|
||||
sub count, dstend, dst /* Count is now 128 too large. */
|
||||
sub count, count, 128+128 /* Adjust count and bias for loop. */
|
||||
add dst, dst, 128
|
||||
1: dc zva, dst
|
||||
add dst, dst, 128
|
||||
subs count, count, 128
|
||||
b.hi 1b
|
||||
stp q0, q0, [dstend, -128]
|
||||
stp q0, q0, [dstend, -96]
|
||||
stp q0, q0, [dstend, -64]
|
||||
stp q0, q0, [dstend, -32]
|
||||
ret
|
||||
|
||||
L(zva_other):
|
||||
mov tmp2w, 4
|
||||
lsl zva_lenw, tmp2w, tmp1w
|
||||
add tmp1, zva_len, 64 /* Max alignment bytes written. */
|
||||
cmp count, tmp1
|
||||
blo L(no_zva)
|
||||
|
||||
sub tmp2, zva_len, 1
|
||||
add tmp1, dst, zva_len
|
||||
add dst, dst, 16
|
||||
subs count, tmp1, dst /* Actual alignment bytes to write. */
|
||||
bic tmp1, tmp1, tmp2 /* Aligned dc zva start address. */
|
||||
beq 2f
|
||||
1: stp q0, q0, [dst], 64
|
||||
stp q0, q0, [dst, -32]
|
||||
subs count, count, 64
|
||||
b.hi 1b
|
||||
2: mov dst, tmp1
|
||||
sub count, dstend, tmp1 /* Remaining bytes to write. */
|
||||
subs count, count, zva_len
|
||||
b.lo 4f
|
||||
3: dc zva, dst
|
||||
add dst, dst, zva_len
|
||||
subs count, count, zva_len
|
||||
b.hs 3b
|
||||
4: add count, count, zva_len
|
||||
sub dst, dst, 32 /* Bias dst for tail loop. */
|
||||
b L(tail64)
|
||||
|
||||
.size memset, . - memset
|
||||
77
libs/libc/machine/arm64/gnu/arch_setjmp.S
Normal file
77
libs/libc/machine/arm64/gnu/arch_setjmp.S
Normal file
@@ -0,0 +1,77 @@
|
||||
/****************************************************************************
|
||||
* libs/libc/machine/arm64/gnu/arch_setjmp.S
|
||||
*
|
||||
* Copyright (c) 2011, 2012 ARM Ltd
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the company may not be used to endorse or promote
|
||||
* products derived from this software without specific prior written
|
||||
* permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
||||
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
#define GPR_LAYOUT \
|
||||
REG_PAIR (x19, x20, 0); \
|
||||
REG_PAIR (x21, x22, 16); \
|
||||
REG_PAIR (x23, x24, 32); \
|
||||
REG_PAIR (x25, x26, 48); \
|
||||
REG_PAIR (x27, x28, 64); \
|
||||
REG_PAIR (x29, x30, 80); \
|
||||
REG_ONE (x16, 96)
|
||||
|
||||
#define FPR_LAYOUT \
|
||||
REG_PAIR ( d8, d9, 112); \
|
||||
REG_PAIR (d10, d11, 128); \
|
||||
REG_PAIR (d12, d13, 144); \
|
||||
REG_PAIR (d14, d15, 160);
|
||||
|
||||
// int setjmp (jmp_buf)
|
||||
.global setjmp
|
||||
.type setjmp, %function
|
||||
setjmp:
|
||||
mov x16, sp
|
||||
#define REG_PAIR(REG1, REG2, OFFS) stp REG1, REG2, [x0, OFFS]
|
||||
#define REG_ONE(REG1, OFFS) str REG1, [x0, OFFS]
|
||||
GPR_LAYOUT
|
||||
FPR_LAYOUT
|
||||
#undef REG_PAIR
|
||||
#undef REG_ONE
|
||||
mov w0, #0
|
||||
ret
|
||||
.size setjmp, .-setjmp
|
||||
|
||||
// void longjmp (jmp_buf, int) __attribute__ ((noreturn))
|
||||
.global longjmp
|
||||
.type longjmp, %function
|
||||
longjmp:
|
||||
#define REG_PAIR(REG1, REG2, OFFS) ldp REG1, REG2, [x0, OFFS]
|
||||
#define REG_ONE(REG1, OFFS) ldr REG1, [x0, OFFS]
|
||||
GPR_LAYOUT
|
||||
FPR_LAYOUT
|
||||
#undef REG_PAIR
|
||||
#undef REG_ONE
|
||||
mov sp, x16
|
||||
cmp w1, #0
|
||||
cinc w0, w1, eq
|
||||
// use br not ret, as ret is guaranteed to mispredict
|
||||
br x30
|
||||
.size longjmp, .-longjmp
|
||||
161
libs/libc/machine/arm64/gnu/arch_strchr.S
Normal file
161
libs/libc/machine/arm64/gnu/arch_strchr.S
Normal file
@@ -0,0 +1,161 @@
|
||||
/****************************************************************************
|
||||
* libs/libc/machine/arm64/gnu/arch_strchr.S
|
||||
*
|
||||
* Copyright (c) 2014, ARM Limited
|
||||
* All rights Reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of the company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this
|
||||
* software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
/* Assumptions:
|
||||
*
|
||||
* ARMv8-a, AArch64
|
||||
* Neon Available.
|
||||
*/
|
||||
|
||||
/* Arguments and results. */
|
||||
#define srcin x0
|
||||
#define chrin w1
|
||||
|
||||
#define result x0
|
||||
|
||||
#define src x2
|
||||
#define tmp1 x3
|
||||
#define wtmp2 w4
|
||||
#define tmp3 x5
|
||||
|
||||
#define vrepchr v0
|
||||
#define vdata1 v1
|
||||
#define vdata2 v2
|
||||
#define vhas_nul1 v3
|
||||
#define vhas_nul2 v4
|
||||
#define vhas_chr1 v5
|
||||
#define vhas_chr2 v6
|
||||
#define vrepmask_0 v7
|
||||
#define vrepmask_c v16
|
||||
#define vend1 v17
|
||||
#define vend2 v18
|
||||
|
||||
/* Core algorithm.
|
||||
|
||||
For each 32-byte hunk we calculate a 64-bit syndrome value, with
|
||||
two bits per byte (LSB is always in bits 0 and 1, for both big
|
||||
and little-endian systems). For each tuple, bit 0 is set iff
|
||||
the relevant byte matched the requested character; bit 1 is set
|
||||
iff the relevant byte matched the NUL end of string (we trigger
|
||||
off bit0 for the special case of looking for NUL). Since the bits
|
||||
in the syndrome reflect exactly the order in which things occur
|
||||
in the original string a count_trailing_zeros() operation will
|
||||
identify exactly which byte is causing the termination, and why. */
|
||||
|
||||
/* Locals and temporaries. */
|
||||
|
||||
.macro def_fn f p2align=0
|
||||
.text
|
||||
.p2align \p2align
|
||||
.global \f
|
||||
.type \f, %function
|
||||
\f:
|
||||
.endm
|
||||
|
||||
def_fn strchr
|
||||
/* Magic constant 0x40100401 to allow us to identify which lane
|
||||
matches the requested byte. Magic constant 0x80200802 used
|
||||
similarly for NUL termination. */
|
||||
mov wtmp2, #0x0401
|
||||
movk wtmp2, #0x4010, lsl #16
|
||||
dup vrepchr.16b, chrin
|
||||
bic src, srcin, #31 /* Work with aligned 32-byte hunks. */
|
||||
dup vrepmask_c.4s, wtmp2
|
||||
ands tmp1, srcin, #31
|
||||
add vrepmask_0.4s, vrepmask_c.4s, vrepmask_c.4s /* equiv: lsl #1 */
|
||||
b.eq .Lloop
|
||||
|
||||
/* Input string is not 32-byte aligned. Rather than forcing
|
||||
the padding bytes to a safe value, we calculate the syndrome
|
||||
for all the bytes, but then mask off those bits of the
|
||||
syndrome that are related to the padding. */
|
||||
ld1 {vdata1.16b, vdata2.16b}, [src], #32
|
||||
neg tmp1, tmp1
|
||||
cmeq vhas_nul1.16b, vdata1.16b, #0
|
||||
cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
|
||||
cmeq vhas_nul2.16b, vdata2.16b, #0
|
||||
cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
|
||||
and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b
|
||||
and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b
|
||||
and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b
|
||||
and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b
|
||||
orr vend1.16b, vhas_nul1.16b, vhas_chr1.16b
|
||||
orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b
|
||||
lsl tmp1, tmp1, #1
|
||||
addp vend1.16b, vend1.16b, vend2.16b // 256->128
|
||||
mov tmp3, #~0
|
||||
addp vend1.16b, vend1.16b, vend2.16b // 128->64
|
||||
lsr tmp1, tmp3, tmp1
|
||||
|
||||
mov tmp3, vend1.2d[0]
|
||||
bic tmp1, tmp3, tmp1 // Mask padding bits.
|
||||
cbnz tmp1, .Ltail
|
||||
|
||||
.Lloop:
|
||||
ld1 {vdata1.16b, vdata2.16b}, [src], #32
|
||||
cmeq vhas_nul1.16b, vdata1.16b, #0
|
||||
cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
|
||||
cmeq vhas_nul2.16b, vdata2.16b, #0
|
||||
cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
|
||||
/* Use a fast check for the termination condition. */
|
||||
orr vend1.16b, vhas_nul1.16b, vhas_chr1.16b
|
||||
orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b
|
||||
orr vend1.16b, vend1.16b, vend2.16b
|
||||
addp vend1.2d, vend1.2d, vend1.2d
|
||||
mov tmp1, vend1.2d[0]
|
||||
cbz tmp1, .Lloop
|
||||
|
||||
/* Termination condition found. Now need to establish exactly why
|
||||
we terminated. */
|
||||
and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b
|
||||
and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b
|
||||
and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b
|
||||
and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b
|
||||
orr vend1.16b, vhas_nul1.16b, vhas_chr1.16b
|
||||
orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b
|
||||
addp vend1.16b, vend1.16b, vend2.16b // 256->128
|
||||
addp vend1.16b, vend1.16b, vend2.16b // 128->64
|
||||
|
||||
mov tmp1, vend1.2d[0]
|
||||
.Ltail:
|
||||
/* Count the trailing zeros, by bit reversing... */
|
||||
rbit tmp1, tmp1
|
||||
/* Re-bias source. */
|
||||
sub src, src, #32
|
||||
clz tmp1, tmp1 /* And counting the leading zeros. */
|
||||
/* Tmp1 is even if the target charager was found first. Otherwise
|
||||
we've found the end of string and we weren't looking for NUL. */
|
||||
tst tmp1, #1
|
||||
add result, src, tmp1, lsr #1
|
||||
csel result, result, xzr, eq
|
||||
ret
|
||||
|
||||
.size strchr, . - strchr
|
||||
146
libs/libc/machine/arm64/gnu/arch_strchrnul.S
Normal file
146
libs/libc/machine/arm64/gnu/arch_strchrnul.S
Normal file
@@ -0,0 +1,146 @@
|
||||
/****************************************************************************
|
||||
* libs/libc/machine/arm64/gnu/arch_strchrnul.S
|
||||
*
|
||||
* Copyright (c) 2014, ARM Limited
|
||||
* All rights Reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of the company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this
|
||||
* software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
/* Assumptions:
|
||||
*
|
||||
* ARMv8-a, AArch64
|
||||
* Neon Available.
|
||||
*/
|
||||
|
||||
/* Arguments and results. */
|
||||
#define srcin x0
|
||||
#define chrin w1
|
||||
|
||||
#define result x0
|
||||
|
||||
#define src x2
|
||||
#define tmp1 x3
|
||||
#define wtmp2 w4
|
||||
#define tmp3 x5
|
||||
|
||||
#define vrepchr v0
|
||||
#define vdata1 v1
|
||||
#define vdata2 v2
|
||||
#define vhas_nul1 v3
|
||||
#define vhas_nul2 v4
|
||||
#define vhas_chr1 v5
|
||||
#define vhas_chr2 v6
|
||||
#define vrepmask v7
|
||||
#define vend1 v16
|
||||
|
||||
/* Core algorithm.
|
||||
|
||||
For each 32-byte hunk we calculate a 64-bit syndrome value, with
|
||||
two bits per byte (LSB is always in bits 0 and 1, for both big
|
||||
and little-endian systems). For each tuple, bit 0 is set iff
|
||||
the relevant byte matched the requested character or nul. Since the
|
||||
bits in the syndrome reflect exactly the order in which things occur
|
||||
in the original string a count_trailing_zeros() operation will
|
||||
identify exactly which byte is causing the termination. */
|
||||
|
||||
/* Locals and temporaries. */
|
||||
|
||||
.macro def_fn f p2align=0
|
||||
.text
|
||||
.p2align \p2align
|
||||
.global \f
|
||||
.type \f, %function
|
||||
\f:
|
||||
.endm
|
||||
|
||||
def_fn strchrnul
|
||||
/* Magic constant 0x40100401 to allow us to identify which lane
|
||||
matches the termination condition. */
|
||||
mov wtmp2, #0x0401
|
||||
movk wtmp2, #0x4010, lsl #16
|
||||
dup vrepchr.16b, chrin
|
||||
bic src, srcin, #31 /* Work with aligned 32-byte hunks. */
|
||||
dup vrepmask.4s, wtmp2
|
||||
ands tmp1, srcin, #31
|
||||
b.eq .Lloop
|
||||
|
||||
/* Input string is not 32-byte aligned. Rather than forcing
|
||||
the padding bytes to a safe value, we calculate the syndrome
|
||||
for all the bytes, but then mask off those bits of the
|
||||
syndrome that are related to the padding. */
|
||||
ld1 {vdata1.16b, vdata2.16b}, [src], #32
|
||||
neg tmp1, tmp1
|
||||
cmeq vhas_nul1.16b, vdata1.16b, #0
|
||||
cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
|
||||
cmeq vhas_nul2.16b, vdata2.16b, #0
|
||||
cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
|
||||
orr vhas_chr1.16b, vhas_chr1.16b, vhas_nul1.16b
|
||||
orr vhas_chr2.16b, vhas_chr2.16b, vhas_nul2.16b
|
||||
and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
|
||||
and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
|
||||
lsl tmp1, tmp1, #1
|
||||
addp vend1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128
|
||||
mov tmp3, #~0
|
||||
addp vend1.16b, vend1.16b, vend1.16b // 128->64
|
||||
lsr tmp1, tmp3, tmp1
|
||||
|
||||
mov tmp3, vend1.2d[0]
|
||||
bic tmp1, tmp3, tmp1 // Mask padding bits.
|
||||
cbnz tmp1, .Ltail
|
||||
|
||||
.Lloop:
|
||||
ld1 {vdata1.16b, vdata2.16b}, [src], #32
|
||||
cmeq vhas_nul1.16b, vdata1.16b, #0
|
||||
cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
|
||||
cmeq vhas_nul2.16b, vdata2.16b, #0
|
||||
cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
|
||||
/* Use a fast check for the termination condition. */
|
||||
orr vhas_chr1.16b, vhas_nul1.16b, vhas_chr1.16b
|
||||
orr vhas_chr2.16b, vhas_nul2.16b, vhas_chr2.16b
|
||||
orr vend1.16b, vhas_chr1.16b, vhas_chr2.16b
|
||||
addp vend1.2d, vend1.2d, vend1.2d
|
||||
mov tmp1, vend1.2d[0]
|
||||
cbz tmp1, .Lloop
|
||||
|
||||
/* Termination condition found. Now need to establish exactly why
|
||||
we terminated. */
|
||||
and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
|
||||
and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
|
||||
addp vend1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128
|
||||
addp vend1.16b, vend1.16b, vend1.16b // 128->64
|
||||
|
||||
mov tmp1, vend1.2d[0]
|
||||
.Ltail:
|
||||
/* Count the trailing zeros, by bit reversing... */
|
||||
rbit tmp1, tmp1
|
||||
/* Re-bias source. */
|
||||
sub src, src, #32
|
||||
clz tmp1, tmp1 /* ... and counting the leading zeros. */
|
||||
/* tmp1 is twice the offset into the fragment. */
|
||||
add result, src, tmp1, lsr #1
|
||||
ret
|
||||
|
||||
.size strchrnul, . - strchrnul
|
||||
205
libs/libc/machine/arm64/gnu/arch_strcmp.S
Normal file
205
libs/libc/machine/arm64/gnu/arch_strcmp.S
Normal file
@@ -0,0 +1,205 @@
|
||||
/****************************************************************************
|
||||
* libs/libc/machine/arm64/gnu/arch_strcmp.S
|
||||
*
|
||||
* Copyright (c) 2012-2018, Linaro Limited
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* * Neither the name of the Linaro nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
/* Assumptions:
|
||||
*
|
||||
* ARMv8-a, AArch64
|
||||
*/
|
||||
|
||||
.macro def_fn f p2align=0
|
||||
.text
|
||||
.p2align \p2align
|
||||
.global \f
|
||||
.type \f, %function
|
||||
\f:
|
||||
.endm
|
||||
|
||||
#define L(label) .L ## label
|
||||
|
||||
#define REP8_01 0x0101010101010101
|
||||
#define REP8_7f 0x7f7f7f7f7f7f7f7f
|
||||
#define REP8_80 0x8080808080808080
|
||||
|
||||
/* Parameters and result. */
|
||||
#define src1 x0
|
||||
#define src2 x1
|
||||
#define result x0
|
||||
|
||||
/* Internal variables. */
|
||||
#define data1 x2
|
||||
#define data1w w2
|
||||
#define data2 x3
|
||||
#define data2w w3
|
||||
#define has_nul x4
|
||||
#define diff x5
|
||||
#define syndrome x6
|
||||
#define tmp1 x7
|
||||
#define tmp2 x8
|
||||
#define tmp3 x9
|
||||
#define zeroones x10
|
||||
#define pos x11
|
||||
|
||||
/* Start of performance-critical section -- one 64B cache line. */
|
||||
def_fn strcmp p2align=6
|
||||
eor tmp1, src1, src2
|
||||
mov zeroones, #REP8_01
|
||||
tst tmp1, #7
|
||||
b.ne L(misaligned8)
|
||||
ands tmp1, src1, #7
|
||||
b.ne L(mutual_align)
|
||||
/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
|
||||
(=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
|
||||
can be done in parallel across the entire word. */
|
||||
L(loop_aligned):
|
||||
ldr data1, [src1], #8
|
||||
ldr data2, [src2], #8
|
||||
L(start_realigned):
|
||||
sub tmp1, data1, zeroones
|
||||
orr tmp2, data1, #REP8_7f
|
||||
eor diff, data1, data2 /* Non-zero if differences found. */
|
||||
bic has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
|
||||
orr syndrome, diff, has_nul
|
||||
cbz syndrome, L(loop_aligned)
|
||||
/* End of performance-critical section -- one 64B cache line. */
|
||||
|
||||
L(end):
|
||||
#ifndef __AARCH64EB__
|
||||
rev syndrome, syndrome
|
||||
rev data1, data1
|
||||
/* The MS-non-zero bit of the syndrome marks either the first bit
|
||||
that is different, or the top bit of the first zero byte.
|
||||
Shifting left now will bring the critical information into the
|
||||
top bits. */
|
||||
clz pos, syndrome
|
||||
rev data2, data2
|
||||
lsl data1, data1, pos
|
||||
lsl data2, data2, pos
|
||||
/* But we need to zero-extend (char is unsigned) the value and then
|
||||
perform a signed 32-bit subtraction. */
|
||||
lsr data1, data1, #56
|
||||
sub result, data1, data2, lsr #56
|
||||
ret
|
||||
#else
|
||||
/* For big-endian we cannot use the trick with the syndrome value
|
||||
as carry-propagation can corrupt the upper bits if the trailing
|
||||
bytes in the string contain 0x01. */
|
||||
/* However, if there is no NUL byte in the dword, we can generate
|
||||
the result directly. We can't just subtract the bytes as the
|
||||
MSB might be significant. */
|
||||
cbnz has_nul, 1f
|
||||
cmp data1, data2
|
||||
cset result, ne
|
||||
cneg result, result, lo
|
||||
ret
|
||||
1:
|
||||
/* Re-compute the NUL-byte detection, using a byte-reversed value. */
|
||||
rev tmp3, data1
|
||||
sub tmp1, tmp3, zeroones
|
||||
orr tmp2, tmp3, #REP8_7f
|
||||
bic has_nul, tmp1, tmp2
|
||||
rev has_nul, has_nul
|
||||
orr syndrome, diff, has_nul
|
||||
clz pos, syndrome
|
||||
/* The MS-non-zero bit of the syndrome marks either the first bit
|
||||
that is different, or the top bit of the first zero byte.
|
||||
Shifting left now will bring the critical information into the
|
||||
top bits. */
|
||||
lsl data1, data1, pos
|
||||
lsl data2, data2, pos
|
||||
/* But we need to zero-extend (char is unsigned) the value and then
|
||||
perform a signed 32-bit subtraction. */
|
||||
lsr data1, data1, #56
|
||||
sub result, data1, data2, lsr #56
|
||||
ret
|
||||
#endif
|
||||
|
||||
L(mutual_align):
|
||||
/* Sources are mutually aligned, but are not currently at an
|
||||
alignment boundary. Round down the addresses and then mask off
|
||||
the bytes that preceed the start point. */
|
||||
bic src1, src1, #7
|
||||
bic src2, src2, #7
|
||||
lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */
|
||||
ldr data1, [src1], #8
|
||||
neg tmp1, tmp1 /* Bits to alignment -64. */
|
||||
ldr data2, [src2], #8
|
||||
mov tmp2, #~0
|
||||
#ifdef __AARCH64EB__
|
||||
/* Big-endian. Early bytes are at MSB. */
|
||||
lsl tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
|
||||
#else
|
||||
/* Little-endian. Early bytes are at LSB. */
|
||||
lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
|
||||
#endif
|
||||
orr data1, data1, tmp2
|
||||
orr data2, data2, tmp2
|
||||
b L(start_realigned)
|
||||
|
||||
L(misaligned8):
|
||||
/* Align SRC1 to 8 bytes and then compare 8 bytes at a time, always
|
||||
checking to make sure that we don't access beyond page boundary in
|
||||
SRC2. */
|
||||
tst src1, #7
|
||||
b.eq L(loop_misaligned)
|
||||
L(do_misaligned):
|
||||
ldrb data1w, [src1], #1
|
||||
ldrb data2w, [src2], #1
|
||||
cmp data1w, #1
|
||||
ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
|
||||
b.ne L(done)
|
||||
tst src1, #7
|
||||
b.ne L(do_misaligned)
|
||||
|
||||
L(loop_misaligned):
|
||||
/* Test if we are within the last dword of the end of a 4K page. If
|
||||
yes then jump back to the misaligned loop to copy a byte at a time. */
|
||||
and tmp1, src2, #0xff8
|
||||
eor tmp1, tmp1, #0xff8
|
||||
cbz tmp1, L(do_misaligned)
|
||||
ldr data1, [src1], #8
|
||||
ldr data2, [src2], #8
|
||||
|
||||
sub tmp1, data1, zeroones
|
||||
orr tmp2, data1, #REP8_7f
|
||||
eor diff, data1, data2 /* Non-zero if differences found. */
|
||||
bic has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
|
||||
orr syndrome, diff, has_nul
|
||||
cbz syndrome, L(loop_misaligned)
|
||||
b L(end)
|
||||
|
||||
L(done):
|
||||
sub result, data1, data2
|
||||
ret
|
||||
.size strcmp, .-strcmp
|
||||
338
libs/libc/machine/arm64/gnu/arch_strcpy.S
Normal file
338
libs/libc/machine/arm64/gnu/arch_strcpy.S
Normal file
@@ -0,0 +1,338 @@
|
||||
/****************************************************************************
|
||||
* libs/libc/machine/arm64/gnu/arch_strcpy.S
|
||||
*
|
||||
* Copyright (c) 2013, 2014, 2015 ARM Ltd.
|
||||
* All rights Reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of the company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this
|
||||
* software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
/* Assumptions:
|
||||
*
|
||||
* ARMv8-a, AArch64, unaligned accesses, min page size 4k.
|
||||
*/
|
||||
|
||||
/* To build as stpcpy, define BUILD_STPCPY before compiling this file.
|
||||
|
||||
To test the page crossing code path more thoroughly, compile with
|
||||
-DSTRCPY_TEST_PAGE_CROSS - this will force all copies through the slower
|
||||
entry path. This option is not intended for production use. */
|
||||
|
||||
/* Arguments and results. */
|
||||
#define dstin x0
|
||||
#define srcin x1
|
||||
|
||||
/* Locals and temporaries. */
|
||||
#define src x2
|
||||
#define dst x3
|
||||
#define data1 x4
|
||||
#define data1w w4
|
||||
#define data2 x5
|
||||
#define data2w w5
|
||||
#define has_nul1 x6
|
||||
#define has_nul2 x7
|
||||
#define tmp1 x8
|
||||
#define tmp2 x9
|
||||
#define tmp3 x10
|
||||
#define tmp4 x11
|
||||
#define zeroones x12
|
||||
#define data1a x13
|
||||
#define data2a x14
|
||||
#define pos x15
|
||||
#define len x16
|
||||
#define to_align x17
|
||||
|
||||
#ifdef BUILD_STPCPY
|
||||
#define STRCPY stpcpy
|
||||
#else
|
||||
#define STRCPY strcpy
|
||||
#endif
|
||||
|
||||
.macro def_fn f p2align=0
|
||||
.text
|
||||
.p2align \p2align
|
||||
.global \f
|
||||
.type \f, %function
|
||||
\f:
|
||||
.endm
|
||||
|
||||
/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
|
||||
(=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
|
||||
can be done in parallel across the entire word. */
|
||||
|
||||
#define REP8_01 0x0101010101010101
|
||||
#define REP8_7f 0x7f7f7f7f7f7f7f7f
|
||||
#define REP8_80 0x8080808080808080
|
||||
|
||||
/* AArch64 systems have a minimum page size of 4k. We can do a quick
|
||||
page size check for crossing this boundary on entry and if we
|
||||
do not, then we can short-circuit much of the entry code. We
|
||||
expect early page-crossing strings to be rare (probability of
|
||||
16/MIN_PAGE_SIZE ~= 0.4%), so the branch should be quite
|
||||
predictable, even with random strings.
|
||||
|
||||
We don't bother checking for larger page sizes, the cost of setting
|
||||
up the correct page size is just not worth the extra gain from
|
||||
a small reduction in the cases taking the slow path. Note that
|
||||
we only care about whether the first fetch, which may be
|
||||
misaligned, crosses a page boundary - after that we move to aligned
|
||||
fetches for the remainder of the string. */
|
||||
|
||||
#ifdef STRCPY_TEST_PAGE_CROSS
|
||||
/* Make everything that isn't Qword aligned look like a page cross. */
|
||||
#define MIN_PAGE_P2 4
|
||||
#else
|
||||
#define MIN_PAGE_P2 12
|
||||
#endif
|
||||
|
||||
#define MIN_PAGE_SIZE (1 << MIN_PAGE_P2)
|
||||
|
||||
def_fn STRCPY p2align=6
|
||||
/* For moderately short strings, the fastest way to do the copy is to
|
||||
calculate the length of the string in the same way as strlen, then
|
||||
essentially do a memcpy of the result. This avoids the need for
|
||||
multiple byte copies and further means that by the time we
|
||||
reach the bulk copy loop we know we can always use DWord
|
||||
accesses. We expect strcpy to rarely be called repeatedly
|
||||
with the same source string, so branch prediction is likely to
|
||||
always be difficult - we mitigate against this by preferring
|
||||
conditional select operations over branches whenever this is
|
||||
feasible. */
|
||||
and tmp2, srcin, #(MIN_PAGE_SIZE - 1)
|
||||
mov zeroones, #REP8_01
|
||||
and to_align, srcin, #15
|
||||
cmp tmp2, #(MIN_PAGE_SIZE - 16)
|
||||
neg tmp1, to_align
|
||||
/* The first fetch will straddle a (possible) page boundary iff
|
||||
srcin + 15 causes bit[MIN_PAGE_P2] to change value. A 16-byte
|
||||
aligned string will never fail the page align check, so will
|
||||
always take the fast path. */
|
||||
b.gt .Lpage_cross
|
||||
|
||||
.Lpage_cross_ok:
|
||||
ldp data1, data2, [srcin]
|
||||
#ifdef __AARCH64EB__
|
||||
/* Because we expect the end to be found within 16 characters
|
||||
(profiling shows this is the most common case), it's worth
|
||||
swapping the bytes now to save having to recalculate the
|
||||
termination syndrome later. We preserve data1 and data2
|
||||
so that we can re-use the values later on. */
|
||||
rev tmp2, data1
|
||||
sub tmp1, tmp2, zeroones
|
||||
orr tmp2, tmp2, #REP8_7f
|
||||
bics has_nul1, tmp1, tmp2
|
||||
b.ne .Lfp_le8
|
||||
rev tmp4, data2
|
||||
sub tmp3, tmp4, zeroones
|
||||
orr tmp4, tmp4, #REP8_7f
|
||||
#else
|
||||
sub tmp1, data1, zeroones
|
||||
orr tmp2, data1, #REP8_7f
|
||||
bics has_nul1, tmp1, tmp2
|
||||
b.ne .Lfp_le8
|
||||
sub tmp3, data2, zeroones
|
||||
orr tmp4, data2, #REP8_7f
|
||||
#endif
|
||||
bics has_nul2, tmp3, tmp4
|
||||
b.eq .Lbulk_entry
|
||||
|
||||
/* The string is short (<=16 bytes). We don't know exactly how
|
||||
short though, yet. Work out the exact length so that we can
|
||||
quickly select the optimal copy strategy. */
|
||||
.Lfp_gt8:
|
||||
rev has_nul2, has_nul2
|
||||
clz pos, has_nul2
|
||||
mov tmp2, #56
|
||||
add dst, dstin, pos, lsr #3 /* Bits to bytes. */
|
||||
sub pos, tmp2, pos
|
||||
#ifdef __AARCH64EB__
|
||||
lsr data2, data2, pos
|
||||
#else
|
||||
lsl data2, data2, pos
|
||||
#endif
|
||||
str data2, [dst, #1]
|
||||
str data1, [dstin]
|
||||
#ifdef BUILD_STPCPY
|
||||
add dstin, dst, #8
|
||||
#endif
|
||||
ret
|
||||
|
||||
.Lfp_le8:
|
||||
rev has_nul1, has_nul1
|
||||
clz pos, has_nul1
|
||||
add dst, dstin, pos, lsr #3 /* Bits to bytes. */
|
||||
subs tmp2, pos, #24 /* Pos in bits. */
|
||||
b.lt .Lfp_lt4
|
||||
#ifdef __AARCH64EB__
|
||||
mov tmp2, #56
|
||||
sub pos, tmp2, pos
|
||||
lsr data2, data1, pos
|
||||
lsr data1, data1, #32
|
||||
#else
|
||||
lsr data2, data1, tmp2
|
||||
#endif
|
||||
/* 4->7 bytes to copy. */
|
||||
str data2w, [dst, #-3]
|
||||
str data1w, [dstin]
|
||||
#ifdef BUILD_STPCPY
|
||||
mov dstin, dst
|
||||
#endif
|
||||
ret
|
||||
.Lfp_lt4:
|
||||
cbz pos, .Lfp_lt2
|
||||
/* 2->3 bytes to copy. */
|
||||
#ifdef __AARCH64EB__
|
||||
lsr data1, data1, #48
|
||||
#endif
|
||||
strh data1w, [dstin]
|
||||
/* Fall-through, one byte (max) to go. */
|
||||
.Lfp_lt2:
|
||||
/* Null-terminated string. Last character must be zero! */
|
||||
strb wzr, [dst]
|
||||
#ifdef BUILD_STPCPY
|
||||
mov dstin, dst
|
||||
#endif
|
||||
ret
|
||||
|
||||
.p2align 6
|
||||
/* Aligning here ensures that the entry code and main loop all lies
|
||||
within one 64-byte cache line. */
|
||||
.Lbulk_entry:
|
||||
sub to_align, to_align, #16
|
||||
stp data1, data2, [dstin]
|
||||
sub src, srcin, to_align
|
||||
sub dst, dstin, to_align
|
||||
b .Lentry_no_page_cross
|
||||
|
||||
/* The inner loop deals with two Dwords at a time. This has a
|
||||
slightly higher start-up cost, but we should win quite quickly,
|
||||
especially on cores with a high number of issue slots per
|
||||
cycle, as we get much better parallelism out of the operations. */
|
||||
.Lmain_loop:
|
||||
stp data1, data2, [dst], #16
|
||||
.Lentry_no_page_cross:
|
||||
ldp data1, data2, [src], #16
|
||||
sub tmp1, data1, zeroones
|
||||
orr tmp2, data1, #REP8_7f
|
||||
sub tmp3, data2, zeroones
|
||||
orr tmp4, data2, #REP8_7f
|
||||
bic has_nul1, tmp1, tmp2
|
||||
bics has_nul2, tmp3, tmp4
|
||||
ccmp has_nul1, #0, #0, eq /* NZCV = 0000 */
|
||||
b.eq .Lmain_loop
|
||||
|
||||
/* Since we know we are copying at least 16 bytes, the fastest way
|
||||
to deal with the tail is to determine the location of the
|
||||
trailing NUL, then (re)copy the 16 bytes leading up to that. */
|
||||
cmp has_nul1, #0
|
||||
#ifdef __AARCH64EB__
|
||||
/* For big-endian, carry propagation (if the final byte in the
|
||||
string is 0x01) means we cannot use has_nul directly. The
|
||||
easiest way to get the correct byte is to byte-swap the data
|
||||
and calculate the syndrome a second time. */
|
||||
csel data1, data1, data2, ne
|
||||
rev data1, data1
|
||||
sub tmp1, data1, zeroones
|
||||
orr tmp2, data1, #REP8_7f
|
||||
bic has_nul1, tmp1, tmp2
|
||||
#else
|
||||
csel has_nul1, has_nul1, has_nul2, ne
|
||||
#endif
|
||||
rev has_nul1, has_nul1
|
||||
clz pos, has_nul1
|
||||
add tmp1, pos, #72
|
||||
add pos, pos, #8
|
||||
csel pos, pos, tmp1, ne
|
||||
add src, src, pos, lsr #3
|
||||
add dst, dst, pos, lsr #3
|
||||
ldp data1, data2, [src, #-32]
|
||||
stp data1, data2, [dst, #-16]
|
||||
#ifdef BUILD_STPCPY
|
||||
sub dstin, dst, #1
|
||||
#endif
|
||||
ret
|
||||
|
||||
.Lpage_cross:
|
||||
bic src, srcin, #15
|
||||
/* Start by loading two words at [srcin & ~15], then forcing the
|
||||
bytes that precede srcin to 0xff. This means they never look
|
||||
like termination bytes. */
|
||||
ldp data1, data2, [src]
|
||||
lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */
|
||||
tst to_align, #7
|
||||
csetm tmp2, ne
|
||||
#ifdef __AARCH64EB__
|
||||
lsl tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
|
||||
#else
|
||||
lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
|
||||
#endif
|
||||
orr data1, data1, tmp2
|
||||
orr data2a, data2, tmp2
|
||||
cmp to_align, #8
|
||||
csinv data1, data1, xzr, lt
|
||||
csel data2, data2, data2a, lt
|
||||
sub tmp1, data1, zeroones
|
||||
orr tmp2, data1, #REP8_7f
|
||||
sub tmp3, data2, zeroones
|
||||
orr tmp4, data2, #REP8_7f
|
||||
bic has_nul1, tmp1, tmp2
|
||||
bics has_nul2, tmp3, tmp4
|
||||
ccmp has_nul1, #0, #0, eq /* NZCV = 0000 */
|
||||
b.eq .Lpage_cross_ok
|
||||
/* We now need to make data1 and data2 look like they've been
|
||||
loaded directly from srcin. Do a rotate on the 128-bit value. */
|
||||
lsl tmp1, to_align, #3 /* Bytes->bits. */
|
||||
neg tmp2, to_align, lsl #3
|
||||
#ifdef __AARCH64EB__
|
||||
lsl data1a, data1, tmp1
|
||||
lsr tmp4, data2, tmp2
|
||||
lsl data2, data2, tmp1
|
||||
orr tmp4, tmp4, data1a
|
||||
cmp to_align, #8
|
||||
csel data1, tmp4, data2, lt
|
||||
rev tmp2, data1
|
||||
rev tmp4, data2
|
||||
sub tmp1, tmp2, zeroones
|
||||
orr tmp2, tmp2, #REP8_7f
|
||||
sub tmp3, tmp4, zeroones
|
||||
orr tmp4, tmp4, #REP8_7f
|
||||
#else
|
||||
lsr data1a, data1, tmp1
|
||||
lsl tmp4, data2, tmp2
|
||||
lsr data2, data2, tmp1
|
||||
orr tmp4, tmp4, data1a
|
||||
cmp to_align, #8
|
||||
csel data1, tmp4, data2, lt
|
||||
sub tmp1, data1, zeroones
|
||||
orr tmp2, data1, #REP8_7f
|
||||
sub tmp3, data2, zeroones
|
||||
orr tmp4, data2, #REP8_7f
|
||||
#endif
|
||||
bic has_nul1, tmp1, tmp2
|
||||
cbnz has_nul1, .Lfp_le8
|
||||
bic has_nul2, tmp3, tmp4
|
||||
b .Lfp_gt8
|
||||
|
||||
.size STRCPY, . - STRCPY
|
||||
242
libs/libc/machine/arm64/gnu/arch_strlen.S
Normal file
242
libs/libc/machine/arm64/gnu/arch_strlen.S
Normal file
@@ -0,0 +1,242 @@
|
||||
/****************************************************************************
|
||||
* libs/libc/machine/arm64/gnu/arch_strlen.S
|
||||
*
|
||||
* Copyright (c) 2013-2015, Linaro Limited
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* * Neither the name of the Linaro nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
/* Assumptions:
|
||||
*
|
||||
* ARMv8-a, AArch64, unaligned accesses, min page size 4k.
|
||||
*/
|
||||
|
||||
/* To test the page crossing code path more thoroughly, compile with
|
||||
-DTEST_PAGE_CROSS - this will force all calls through the slower
|
||||
entry path. This option is not intended for production use. */
|
||||
|
||||
/* Arguments and results. */
|
||||
#define srcin x0
|
||||
#define len x0
|
||||
|
||||
/* Locals and temporaries. */
|
||||
#define src x1
|
||||
#define data1 x2
|
||||
#define data2 x3
|
||||
#define has_nul1 x4
|
||||
#define has_nul2 x5
|
||||
#define tmp1 x4
|
||||
#define tmp2 x5
|
||||
#define tmp3 x6
|
||||
#define tmp4 x7
|
||||
#define zeroones x8
|
||||
|
||||
#define L(l) .L ## l
|
||||
|
||||
.macro def_fn f p2align=0
|
||||
.text
|
||||
.p2align \p2align
|
||||
.global \f
|
||||
.type \f, %function
|
||||
\f:
|
||||
.endm
|
||||
|
||||
/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
|
||||
(=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
|
||||
can be done in parallel across the entire word. A faster check
|
||||
(X - 1) & 0x80 is zero for non-NUL ASCII characters, but gives
|
||||
false hits for characters 129..255. */
|
||||
|
||||
#define REP8_01 0x0101010101010101
|
||||
#define REP8_7f 0x7f7f7f7f7f7f7f7f
|
||||
#define REP8_80 0x8080808080808080
|
||||
|
||||
#ifdef TEST_PAGE_CROSS
|
||||
# define MIN_PAGE_SIZE 15
|
||||
#else
|
||||
# define MIN_PAGE_SIZE 4096
|
||||
#endif
|
||||
|
||||
/* Since strings are short on average, we check the first 16 bytes
|
||||
of the string for a NUL character. In order to do an unaligned ldp
|
||||
safely we have to do a page cross check first. If there is a NUL
|
||||
byte we calculate the length from the 2 8-byte words using
|
||||
conditional select to reduce branch mispredictions (it is unlikely
|
||||
strlen will be repeatedly called on strings with the same length).
|
||||
|
||||
If the string is longer than 16 bytes, we align src so don't need
|
||||
further page cross checks, and process 32 bytes per iteration
|
||||
using the fast NUL check. If we encounter non-ASCII characters,
|
||||
fallback to a second loop using the full NUL check.
|
||||
|
||||
If the page cross check fails, we read 16 bytes from an aligned
|
||||
address, remove any characters before the string, and continue
|
||||
in the main loop using aligned loads. Since strings crossing a
|
||||
page in the first 16 bytes are rare (probability of
|
||||
16/MIN_PAGE_SIZE ~= 0.4%), this case does not need to be optimized.
|
||||
|
||||
AArch64 systems have a minimum page size of 4k. We don't bother
|
||||
checking for larger page sizes - the cost of setting up the correct
|
||||
page size is just not worth the extra gain from a small reduction in
|
||||
the cases taking the slow path. Note that we only care about
|
||||
whether the first fetch, which may be misaligned, crosses a page
|
||||
boundary. */
|
||||
|
||||
def_fn strlen p2align=6
|
||||
and tmp1, srcin, MIN_PAGE_SIZE - 1
|
||||
mov zeroones, REP8_01
|
||||
cmp tmp1, MIN_PAGE_SIZE - 16
|
||||
b.gt L(page_cross)
|
||||
ldp data1, data2, [srcin]
|
||||
#ifdef __AARCH64EB__
|
||||
/* For big-endian, carry propagation (if the final byte in the
|
||||
string is 0x01) means we cannot use has_nul1/2 directly.
|
||||
Since we expect strings to be small and early-exit,
|
||||
byte-swap the data now so has_null1/2 will be correct. */
|
||||
rev data1, data1
|
||||
rev data2, data2
|
||||
#endif
|
||||
sub tmp1, data1, zeroones
|
||||
orr tmp2, data1, REP8_7f
|
||||
sub tmp3, data2, zeroones
|
||||
orr tmp4, data2, REP8_7f
|
||||
bics has_nul1, tmp1, tmp2
|
||||
bic has_nul2, tmp3, tmp4
|
||||
ccmp has_nul2, 0, 0, eq
|
||||
beq L(main_loop_entry)
|
||||
|
||||
/* Enter with C = has_nul1 == 0. */
|
||||
csel has_nul1, has_nul1, has_nul2, cc
|
||||
mov len, 8
|
||||
rev has_nul1, has_nul1
|
||||
clz tmp1, has_nul1
|
||||
csel len, xzr, len, cc
|
||||
add len, len, tmp1, lsr 3
|
||||
ret
|
||||
|
||||
/* The inner loop processes 32 bytes per iteration and uses the fast
|
||||
NUL check. If we encounter non-ASCII characters, use a second
|
||||
loop with the accurate NUL check. */
|
||||
.p2align 4
|
||||
L(main_loop_entry):
|
||||
bic src, srcin, 15
|
||||
sub src, src, 16
|
||||
L(main_loop):
|
||||
ldp data1, data2, [src, 32]!
|
||||
.Lpage_cross_entry:
|
||||
sub tmp1, data1, zeroones
|
||||
sub tmp3, data2, zeroones
|
||||
orr tmp2, tmp1, tmp3
|
||||
tst tmp2, zeroones, lsl 7
|
||||
bne 1f
|
||||
ldp data1, data2, [src, 16]
|
||||
sub tmp1, data1, zeroones
|
||||
sub tmp3, data2, zeroones
|
||||
orr tmp2, tmp1, tmp3
|
||||
tst tmp2, zeroones, lsl 7
|
||||
beq L(main_loop)
|
||||
add src, src, 16
|
||||
1:
|
||||
/* The fast check failed, so do the slower, accurate NUL check. */
|
||||
orr tmp2, data1, REP8_7f
|
||||
orr tmp4, data2, REP8_7f
|
||||
bics has_nul1, tmp1, tmp2
|
||||
bic has_nul2, tmp3, tmp4
|
||||
ccmp has_nul2, 0, 0, eq
|
||||
beq L(nonascii_loop)
|
||||
|
||||
/* Enter with C = has_nul1 == 0. */
|
||||
L(tail):
|
||||
#ifdef __AARCH64EB__
|
||||
/* For big-endian, carry propagation (if the final byte in the
|
||||
string is 0x01) means we cannot use has_nul1/2 directly. The
|
||||
easiest way to get the correct byte is to byte-swap the data
|
||||
and calculate the syndrome a second time. */
|
||||
csel data1, data1, data2, cc
|
||||
rev data1, data1
|
||||
sub tmp1, data1, zeroones
|
||||
orr tmp2, data1, REP8_7f
|
||||
bic has_nul1, tmp1, tmp2
|
||||
#else
|
||||
csel has_nul1, has_nul1, has_nul2, cc
|
||||
#endif
|
||||
sub len, src, srcin
|
||||
rev has_nul1, has_nul1
|
||||
add tmp2, len, 8
|
||||
clz tmp1, has_nul1
|
||||
csel len, len, tmp2, cc
|
||||
add len, len, tmp1, lsr 3
|
||||
ret
|
||||
|
||||
L(nonascii_loop):
|
||||
ldp data1, data2, [src, 16]!
|
||||
sub tmp1, data1, zeroones
|
||||
orr tmp2, data1, REP8_7f
|
||||
sub tmp3, data2, zeroones
|
||||
orr tmp4, data2, REP8_7f
|
||||
bics has_nul1, tmp1, tmp2
|
||||
bic has_nul2, tmp3, tmp4
|
||||
ccmp has_nul2, 0, 0, eq
|
||||
bne L(tail)
|
||||
ldp data1, data2, [src, 16]!
|
||||
sub tmp1, data1, zeroones
|
||||
orr tmp2, data1, REP8_7f
|
||||
sub tmp3, data2, zeroones
|
||||
orr tmp4, data2, REP8_7f
|
||||
bics has_nul1, tmp1, tmp2
|
||||
bic has_nul2, tmp3, tmp4
|
||||
ccmp has_nul2, 0, 0, eq
|
||||
beq L(nonascii_loop)
|
||||
b L(tail)
|
||||
|
||||
/* Load 16 bytes from [srcin & ~15] and force the bytes that precede
|
||||
srcin to 0x7f, so we ignore any NUL bytes before the string.
|
||||
Then continue in the aligned loop. */
|
||||
L(page_cross):
|
||||
bic src, srcin, 15
|
||||
ldp data1, data2, [src]
|
||||
lsl tmp1, srcin, 3
|
||||
mov tmp4, -1
|
||||
#ifdef __AARCH64EB__
|
||||
/* Big-endian. Early bytes are at MSB. */
|
||||
lsr tmp1, tmp4, tmp1 /* Shift (tmp1 & 63). */
|
||||
#else
|
||||
/* Little-endian. Early bytes are at LSB. */
|
||||
lsl tmp1, tmp4, tmp1 /* Shift (tmp1 & 63). */
|
||||
#endif
|
||||
orr tmp1, tmp1, REP8_80
|
||||
orn data1, data1, tmp1
|
||||
orn tmp2, data2, tmp1
|
||||
tst srcin, 8
|
||||
csel data1, data1, tmp4, eq
|
||||
csel data2, data2, tmp2, eq
|
||||
b L(page_cross_entry)
|
||||
|
||||
.size strlen, . - strlen
|
||||
294
libs/libc/machine/arm64/gnu/arch_strncmp.S
Normal file
294
libs/libc/machine/arm64/gnu/arch_strncmp.S
Normal file
@@ -0,0 +1,294 @@
|
||||
/****************************************************************************
|
||||
* libs/libc/machine/arm64/gnu/arch_strncmp.S
|
||||
*
|
||||
* Copyright (c) 2013, 2018, Linaro Limited
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* * Neither the name of the Linaro nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
/* Assumptions:
|
||||
*
|
||||
* ARMv8-a, AArch64
|
||||
*/
|
||||
|
||||
.macro def_fn f p2align=0
|
||||
.text
|
||||
.p2align \p2align
|
||||
.global \f
|
||||
.type \f, %function
|
||||
\f:
|
||||
.endm
|
||||
|
||||
#define REP8_01 0x0101010101010101
|
||||
#define REP8_7f 0x7f7f7f7f7f7f7f7f
|
||||
#define REP8_80 0x8080808080808080
|
||||
|
||||
/* Parameters and result. */
|
||||
#define src1 x0
|
||||
#define src2 x1
|
||||
#define limit x2
|
||||
#define result x0
|
||||
|
||||
/* Internal variables. */
|
||||
#define data1 x3
|
||||
#define data1w w3
|
||||
#define data2 x4
|
||||
#define data2w w4
|
||||
#define has_nul x5
|
||||
#define diff x6
|
||||
#define syndrome x7
|
||||
#define tmp1 x8
|
||||
#define tmp2 x9
|
||||
#define tmp3 x10
|
||||
#define zeroones x11
|
||||
#define pos x12
|
||||
#define limit_wd x13
|
||||
#define mask x14
|
||||
#define endloop x15
|
||||
#define count mask
|
||||
|
||||
.text
|
||||
.p2align 6
|
||||
.rep 7
|
||||
nop /* Pad so that the loop below fits a cache line. */
|
||||
.endr
|
||||
def_fn strncmp
|
||||
cbz limit, .Lret0
|
||||
eor tmp1, src1, src2
|
||||
mov zeroones, #REP8_01
|
||||
tst tmp1, #7
|
||||
and count, src1, #7
|
||||
b.ne .Lmisaligned8
|
||||
cbnz count, .Lmutual_align
|
||||
/* Calculate the number of full and partial words -1. */
|
||||
sub limit_wd, limit, #1 /* limit != 0, so no underflow. */
|
||||
lsr limit_wd, limit_wd, #3 /* Convert to Dwords. */
|
||||
|
||||
/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
|
||||
(=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
|
||||
can be done in parallel across the entire word. */
|
||||
/* Start of performance-critical section -- one 64B cache line. */
|
||||
.Lloop_aligned:
|
||||
ldr data1, [src1], #8
|
||||
ldr data2, [src2], #8
|
||||
.Lstart_realigned:
|
||||
subs limit_wd, limit_wd, #1
|
||||
sub tmp1, data1, zeroones
|
||||
orr tmp2, data1, #REP8_7f
|
||||
eor diff, data1, data2 /* Non-zero if differences found. */
|
||||
csinv endloop, diff, xzr, pl /* Last Dword or differences. */
|
||||
bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
|
||||
ccmp endloop, #0, #0, eq
|
||||
b.eq .Lloop_aligned
|
||||
/* End of performance-critical section -- one 64B cache line. */
|
||||
|
||||
/* Not reached the limit, must have found the end or a diff. */
|
||||
tbz limit_wd, #63, .Lnot_limit
|
||||
|
||||
/* Limit % 8 == 0 => all bytes significant. */
|
||||
ands limit, limit, #7
|
||||
b.eq .Lnot_limit
|
||||
|
||||
lsl limit, limit, #3 /* Bits -> bytes. */
|
||||
mov mask, #~0
|
||||
#ifdef __AARCH64EB__
|
||||
lsr mask, mask, limit
|
||||
#else
|
||||
lsl mask, mask, limit
|
||||
#endif
|
||||
bic data1, data1, mask
|
||||
bic data2, data2, mask
|
||||
|
||||
/* Make sure that the NUL byte is marked in the syndrome. */
|
||||
orr has_nul, has_nul, mask
|
||||
|
||||
.Lnot_limit:
|
||||
orr syndrome, diff, has_nul
|
||||
|
||||
#ifndef __AARCH64EB__
|
||||
rev syndrome, syndrome
|
||||
rev data1, data1
|
||||
/* The MS-non-zero bit of the syndrome marks either the first bit
|
||||
that is different, or the top bit of the first zero byte.
|
||||
Shifting left now will bring the critical information into the
|
||||
top bits. */
|
||||
clz pos, syndrome
|
||||
rev data2, data2
|
||||
lsl data1, data1, pos
|
||||
lsl data2, data2, pos
|
||||
/* But we need to zero-extend (char is unsigned) the value and then
|
||||
perform a signed 32-bit subtraction. */
|
||||
lsr data1, data1, #56
|
||||
sub result, data1, data2, lsr #56
|
||||
ret
|
||||
#else
|
||||
/* For big-endian we cannot use the trick with the syndrome value
|
||||
as carry-propagation can corrupt the upper bits if the trailing
|
||||
bytes in the string contain 0x01. */
|
||||
/* However, if there is no NUL byte in the dword, we can generate
|
||||
the result directly. We can't just subtract the bytes as the
|
||||
MSB might be significant. */
|
||||
cbnz has_nul, 1f
|
||||
cmp data1, data2
|
||||
cset result, ne
|
||||
cneg result, result, lo
|
||||
ret
|
||||
1:
|
||||
/* Re-compute the NUL-byte detection, using a byte-reversed value. */
|
||||
rev tmp3, data1
|
||||
sub tmp1, tmp3, zeroones
|
||||
orr tmp2, tmp3, #REP8_7f
|
||||
bic has_nul, tmp1, tmp2
|
||||
rev has_nul, has_nul
|
||||
orr syndrome, diff, has_nul
|
||||
clz pos, syndrome
|
||||
/* The MS-non-zero bit of the syndrome marks either the first bit
|
||||
that is different, or the top bit of the first zero byte.
|
||||
Shifting left now will bring the critical information into the
|
||||
top bits. */
|
||||
lsl data1, data1, pos
|
||||
lsl data2, data2, pos
|
||||
/* But we need to zero-extend (char is unsigned) the value and then
|
||||
perform a signed 32-bit subtraction. */
|
||||
lsr data1, data1, #56
|
||||
sub result, data1, data2, lsr #56
|
||||
ret
|
||||
#endif
|
||||
|
||||
.Lmutual_align:
|
||||
/* Sources are mutually aligned, but are not currently at an
|
||||
alignment boundary. Round down the addresses and then mask off
|
||||
the bytes that precede the start point.
|
||||
We also need to adjust the limit calculations, but without
|
||||
overflowing if the limit is near ULONG_MAX. */
|
||||
bic src1, src1, #7
|
||||
bic src2, src2, #7
|
||||
ldr data1, [src1], #8
|
||||
neg tmp3, count, lsl #3 /* 64 - bits(bytes beyond align). */
|
||||
ldr data2, [src2], #8
|
||||
mov tmp2, #~0
|
||||
sub limit_wd, limit, #1 /* limit != 0, so no underflow. */
|
||||
#ifdef __AARCH64EB__
|
||||
/* Big-endian. Early bytes are at MSB. */
|
||||
lsl tmp2, tmp2, tmp3 /* Shift (count & 63). */
|
||||
#else
|
||||
/* Little-endian. Early bytes are at LSB. */
|
||||
lsr tmp2, tmp2, tmp3 /* Shift (count & 63). */
|
||||
#endif
|
||||
and tmp3, limit_wd, #7
|
||||
lsr limit_wd, limit_wd, #3
|
||||
/* Adjust the limit. Only low 3 bits used, so overflow irrelevant. */
|
||||
add limit, limit, count
|
||||
add tmp3, tmp3, count
|
||||
orr data1, data1, tmp2
|
||||
orr data2, data2, tmp2
|
||||
add limit_wd, limit_wd, tmp3, lsr #3
|
||||
b .Lstart_realigned
|
||||
|
||||
.p2align 6
|
||||
/* Don't bother with dwords for up to 16 bytes. */
|
||||
.Lmisaligned8:
|
||||
cmp limit, #16
|
||||
b.hs .Ltry_misaligned_words
|
||||
|
||||
.Lbyte_loop:
|
||||
/* Perhaps we can do better than this. */
|
||||
ldrb data1w, [src1], #1
|
||||
ldrb data2w, [src2], #1
|
||||
subs limit, limit, #1
|
||||
ccmp data1w, #1, #0, hi /* NZCV = 0b0000. */
|
||||
ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
|
||||
b.eq .Lbyte_loop
|
||||
.Ldone:
|
||||
sub result, data1, data2
|
||||
ret
|
||||
/* Align the SRC1 to a dword by doing a bytewise compare and then do
|
||||
the dword loop. */
|
||||
.Ltry_misaligned_words:
|
||||
lsr limit_wd, limit, #3
|
||||
cbz count, .Ldo_misaligned
|
||||
|
||||
neg count, count
|
||||
and count, count, #7
|
||||
sub limit, limit, count
|
||||
lsr limit_wd, limit, #3
|
||||
|
||||
.Lpage_end_loop:
|
||||
ldrb data1w, [src1], #1
|
||||
ldrb data2w, [src2], #1
|
||||
cmp data1w, #1
|
||||
ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
|
||||
b.ne .Ldone
|
||||
subs count, count, #1
|
||||
b.hi .Lpage_end_loop
|
||||
|
||||
.Ldo_misaligned:
|
||||
/* Prepare ourselves for the next page crossing. Unlike the aligned
|
||||
loop, we fetch 1 less dword because we risk crossing bounds on
|
||||
SRC2. */
|
||||
mov count, #8
|
||||
subs limit_wd, limit_wd, #1
|
||||
b.lo .Ldone_loop
|
||||
.Lloop_misaligned:
|
||||
and tmp2, src2, #0xff8
|
||||
eor tmp2, tmp2, #0xff8
|
||||
cbz tmp2, .Lpage_end_loop
|
||||
|
||||
ldr data1, [src1], #8
|
||||
ldr data2, [src2], #8
|
||||
sub tmp1, data1, zeroones
|
||||
orr tmp2, data1, #REP8_7f
|
||||
eor diff, data1, data2 /* Non-zero if differences found. */
|
||||
bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
|
||||
ccmp diff, #0, #0, eq
|
||||
b.ne .Lnot_limit
|
||||
subs limit_wd, limit_wd, #1
|
||||
b.pl .Lloop_misaligned
|
||||
|
||||
.Ldone_loop:
|
||||
/* We found a difference or a NULL before the limit was reached. */
|
||||
and limit, limit, #7
|
||||
cbz limit, .Lnot_limit
|
||||
/* Read the last word. */
|
||||
sub src1, src1, 8
|
||||
sub src2, src2, 8
|
||||
ldr data1, [src1, limit]
|
||||
ldr data2, [src2, limit]
|
||||
sub tmp1, data1, zeroones
|
||||
orr tmp2, data1, #REP8_7f
|
||||
eor diff, data1, data2 /* Non-zero if differences found. */
|
||||
bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
|
||||
ccmp diff, #0, #0, eq
|
||||
b.ne .Lnot_limit
|
||||
|
||||
.Lret0:
|
||||
mov result, #0
|
||||
ret
|
||||
.size strncmp, . - strncmp
|
||||
188
libs/libc/machine/arm64/gnu/arch_strnlen.S
Normal file
188
libs/libc/machine/arm64/gnu/arch_strnlen.S
Normal file
@@ -0,0 +1,188 @@
|
||||
/****************************************************************************
|
||||
* libs/libc/machine/arm64/gnu/arch_strnlen.S
|
||||
*
|
||||
* Copyright (c) 2013, Linaro Limited
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* * Neither the name of the Linaro nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
/* Assumptions:
|
||||
*
|
||||
* ARMv8-a, AArch64
|
||||
*/
|
||||
|
||||
/* Arguments and results. */
|
||||
#define srcin x0
|
||||
#define len x0
|
||||
#define limit x1
|
||||
|
||||
/* Locals and temporaries. */
|
||||
#define src x2
|
||||
#define data1 x3
|
||||
#define data2 x4
|
||||
#define data2a x5
|
||||
#define has_nul1 x6
|
||||
#define has_nul2 x7
|
||||
#define tmp1 x8
|
||||
#define tmp2 x9
|
||||
#define tmp3 x10
|
||||
#define tmp4 x11
|
||||
#define zeroones x12
|
||||
#define pos x13
|
||||
#define limit_wd x14
|
||||
|
||||
.macro def_fn f p2align=0
|
||||
.text
|
||||
.p2align \p2align
|
||||
.global \f
|
||||
.type \f, %function
|
||||
\f:
|
||||
.endm
|
||||
|
||||
#define REP8_01 0x0101010101010101
|
||||
#define REP8_7f 0x7f7f7f7f7f7f7f7f
|
||||
#define REP8_80 0x8080808080808080
|
||||
|
||||
.text
|
||||
.p2align 6
|
||||
.Lstart:
|
||||
/* Pre-pad to ensure critical loop begins an icache line. */
|
||||
.rep 7
|
||||
nop
|
||||
.endr
|
||||
/* Put this code here to avoid wasting more space with pre-padding. */
|
||||
.Lhit_limit:
|
||||
mov len, limit
|
||||
ret
|
||||
|
||||
def_fn strnlen
|
||||
cbz limit, .Lhit_limit
|
||||
mov zeroones, #REP8_01
|
||||
bic src, srcin, #15
|
||||
ands tmp1, srcin, #15
|
||||
b.ne .Lmisaligned
|
||||
/* Calculate the number of full and partial words -1. */
|
||||
sub limit_wd, limit, #1 /* Limit != 0, so no underflow. */
|
||||
lsr limit_wd, limit_wd, #4 /* Convert to Qwords. */
|
||||
|
||||
/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
|
||||
(=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
|
||||
can be done in parallel across the entire word. */
|
||||
/* The inner loop deals with two Dwords at a time. This has a
|
||||
slightly higher start-up cost, but we should win quite quickly,
|
||||
especially on cores with a high number of issue slots per
|
||||
cycle, as we get much better parallelism out of the operations. */
|
||||
|
||||
/* Start of critial section -- keep to one 64Byte cache line. */
|
||||
.Lloop:
|
||||
ldp data1, data2, [src], #16
|
||||
.Lrealigned:
|
||||
sub tmp1, data1, zeroones
|
||||
orr tmp2, data1, #REP8_7f
|
||||
sub tmp3, data2, zeroones
|
||||
orr tmp4, data2, #REP8_7f
|
||||
bic has_nul1, tmp1, tmp2
|
||||
bic has_nul2, tmp3, tmp4
|
||||
subs limit_wd, limit_wd, #1
|
||||
orr tmp1, has_nul1, has_nul2
|
||||
ccmp tmp1, #0, #0, pl /* NZCV = 0000 */
|
||||
b.eq .Lloop
|
||||
/* End of critical section -- keep to one 64Byte cache line. */
|
||||
|
||||
orr tmp1, has_nul1, has_nul2
|
||||
cbz tmp1, .Lhit_limit /* No null in final Qword. */
|
||||
|
||||
/* We know there's a null in the final Qword. The easiest thing
|
||||
to do now is work out the length of the string and return
|
||||
MIN (len, limit). */
|
||||
|
||||
sub len, src, srcin
|
||||
cbz has_nul1, .Lnul_in_data2
|
||||
#ifdef __AARCH64EB__
|
||||
mov data2, data1
|
||||
#endif
|
||||
sub len, len, #8
|
||||
mov has_nul2, has_nul1
|
||||
.Lnul_in_data2:
|
||||
#ifdef __AARCH64EB__
|
||||
/* For big-endian, carry propagation (if the final byte in the
|
||||
string is 0x01) means we cannot use has_nul directly. The
|
||||
easiest way to get the correct byte is to byte-swap the data
|
||||
and calculate the syndrome a second time. */
|
||||
rev data2, data2
|
||||
sub tmp1, data2, zeroones
|
||||
orr tmp2, data2, #REP8_7f
|
||||
bic has_nul2, tmp1, tmp2
|
||||
#endif
|
||||
sub len, len, #8
|
||||
rev has_nul2, has_nul2
|
||||
clz pos, has_nul2
|
||||
add len, len, pos, lsr #3 /* Bits to bytes. */
|
||||
cmp len, limit
|
||||
csel len, len, limit, ls /* Return the lower value. */
|
||||
ret
|
||||
|
||||
.Lmisaligned:
|
||||
/* Deal with a partial first word.
|
||||
We're doing two things in parallel here;
|
||||
1) Calculate the number of words (but avoiding overflow if
|
||||
limit is near ULONG_MAX) - to do this we need to work out
|
||||
limit + tmp1 - 1 as a 65-bit value before shifting it;
|
||||
2) Load and mask the initial data words - we force the bytes
|
||||
before the ones we are interested in to 0xff - this ensures
|
||||
early bytes will not hit any zero detection. */
|
||||
sub limit_wd, limit, #1
|
||||
neg tmp4, tmp1
|
||||
cmp tmp1, #8
|
||||
|
||||
and tmp3, limit_wd, #15
|
||||
lsr limit_wd, limit_wd, #4
|
||||
mov tmp2, #~0
|
||||
|
||||
ldp data1, data2, [src], #16
|
||||
lsl tmp4, tmp4, #3 /* Bytes beyond alignment -> bits. */
|
||||
add tmp3, tmp3, tmp1
|
||||
|
||||
#ifdef __AARCH64EB__
|
||||
/* Big-endian. Early bytes are at MSB. */
|
||||
lsl tmp2, tmp2, tmp4 /* Shift (tmp1 & 63). */
|
||||
#else
|
||||
/* Little-endian. Early bytes are at LSB. */
|
||||
lsr tmp2, tmp2, tmp4 /* Shift (tmp1 & 63). */
|
||||
#endif
|
||||
add limit_wd, limit_wd, tmp3, lsr #4
|
||||
|
||||
orr data1, data1, tmp2
|
||||
orr data2a, data2, tmp2
|
||||
|
||||
csinv data1, data1, xzr, le
|
||||
csel data2, data2, data2a, le
|
||||
b .Lrealigned
|
||||
.size strnlen, . - .Lstart /* Include pre-padding in size. */
|
||||
179
libs/libc/machine/arm64/gnu/arch_strrchr.S
Normal file
179
libs/libc/machine/arm64/gnu/arch_strrchr.S
Normal file
@@ -0,0 +1,179 @@
|
||||
/****************************************************************************
|
||||
* libs/libc/machine/arm64/gnu/arch_strrchr.S
|
||||
*
|
||||
* Copyright (c) 2014, ARM Limited
|
||||
* All rights Reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of the company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this
|
||||
* software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
/* Assumptions:
|
||||
*
|
||||
* ARMv8-a, AArch64
|
||||
* Neon Available.
|
||||
*/
|
||||
|
||||
/* Arguments and results. */
|
||||
#define srcin x0
|
||||
#define chrin w1
|
||||
|
||||
#define result x0
|
||||
|
||||
#define src x2
|
||||
#define tmp1 x3
|
||||
#define wtmp2 w4
|
||||
#define tmp3 x5
|
||||
#define src_match x6
|
||||
#define src_offset x7
|
||||
#define const_m1 x8
|
||||
#define tmp4 x9
|
||||
#define nul_match x10
|
||||
#define chr_match x11
|
||||
|
||||
#define vrepchr v0
|
||||
#define vdata1 v1
|
||||
#define vdata2 v2
|
||||
#define vhas_nul1 v3
|
||||
#define vhas_nul2 v4
|
||||
#define vhas_chr1 v5
|
||||
#define vhas_chr2 v6
|
||||
#define vrepmask_0 v7
|
||||
#define vrepmask_c v16
|
||||
#define vend1 v17
|
||||
#define vend2 v18
|
||||
|
||||
/* Core algorithm.
|
||||
|
||||
For each 32-byte hunk we calculate a 64-bit syndrome value, with
|
||||
two bits per byte (LSB is always in bits 0 and 1, for both big
|
||||
and little-endian systems). For each tuple, bit 0 is set iff
|
||||
the relevant byte matched the requested character; bit 1 is set
|
||||
iff the relevant byte matched the NUL end of string (we trigger
|
||||
off bit0 for the special case of looking for NUL). Since the bits
|
||||
in the syndrome reflect exactly the order in which things occur
|
||||
in the original string a count_trailing_zeros() operation will
|
||||
identify exactly which byte is causing the termination, and why. */
|
||||
|
||||
/* Locals and temporaries. */
|
||||
|
||||
.macro def_fn f p2align=0
|
||||
.text
|
||||
.p2align \p2align
|
||||
.global \f
|
||||
.type \f, %function
|
||||
\f:
|
||||
.endm
|
||||
|
||||
def_fn strrchr
|
||||
/* Magic constant 0x40100401 to allow us to identify which lane
|
||||
matches the requested byte. Magic constant 0x80200802 used
|
||||
similarly for NUL termination. */
|
||||
mov wtmp2, #0x0401
|
||||
movk wtmp2, #0x4010, lsl #16
|
||||
dup vrepchr.16b, chrin
|
||||
bic src, srcin, #31 /* Work with aligned 32-byte hunks. */
|
||||
dup vrepmask_c.4s, wtmp2
|
||||
mov src_offset, #0
|
||||
ands tmp1, srcin, #31
|
||||
add vrepmask_0.4s, vrepmask_c.4s, vrepmask_c.4s /* equiv: lsl #1 */
|
||||
b.eq .Laligned
|
||||
|
||||
/* Input string is not 32-byte aligned. Rather than forcing
|
||||
the padding bytes to a safe value, we calculate the syndrome
|
||||
for all the bytes, but then mask off those bits of the
|
||||
syndrome that are related to the padding. */
|
||||
ld1 {vdata1.16b, vdata2.16b}, [src], #32
|
||||
neg tmp1, tmp1
|
||||
cmeq vhas_nul1.16b, vdata1.16b, #0
|
||||
cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
|
||||
cmeq vhas_nul2.16b, vdata2.16b, #0
|
||||
cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
|
||||
and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b
|
||||
and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b
|
||||
and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b
|
||||
and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b
|
||||
addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul2.16b // 256->128
|
||||
addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128
|
||||
addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul1.16b // 128->64
|
||||
addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr1.16b // 128->64
|
||||
mov nul_match, vhas_nul1.2d[0]
|
||||
lsl tmp1, tmp1, #1
|
||||
mov const_m1, #~0
|
||||
mov chr_match, vhas_chr1.2d[0]
|
||||
lsr tmp3, const_m1, tmp1
|
||||
|
||||
bic nul_match, nul_match, tmp3 // Mask padding bits.
|
||||
bic chr_match, chr_match, tmp3 // Mask padding bits.
|
||||
cbnz nul_match, .Ltail
|
||||
|
||||
.Lloop:
|
||||
cmp chr_match, #0
|
||||
csel src_match, src, src_match, ne
|
||||
csel src_offset, chr_match, src_offset, ne
|
||||
.Laligned:
|
||||
ld1 {vdata1.16b, vdata2.16b}, [src], #32
|
||||
cmeq vhas_nul1.16b, vdata1.16b, #0
|
||||
cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
|
||||
cmeq vhas_nul2.16b, vdata2.16b, #0
|
||||
cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
|
||||
addp vend1.16b, vhas_nul1.16b, vhas_nul2.16b // 256->128
|
||||
and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b
|
||||
and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b
|
||||
addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128
|
||||
addp vend1.16b, vend1.16b, vend1.16b // 128->64
|
||||
addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr1.16b // 128->64
|
||||
mov nul_match, vend1.2d[0]
|
||||
mov chr_match, vhas_chr1.2d[0]
|
||||
cbz nul_match, .Lloop
|
||||
|
||||
and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b
|
||||
and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b
|
||||
addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul2.16b
|
||||
addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul1.16b
|
||||
mov nul_match, vhas_nul1.2d[0]
|
||||
|
||||
.Ltail:
|
||||
/* Work out exactly where the string ends. */
|
||||
sub tmp4, nul_match, #1
|
||||
eor tmp4, tmp4, nul_match
|
||||
ands chr_match, chr_match, tmp4
|
||||
/* And pick the values corresponding to the last match. */
|
||||
csel src_match, src, src_match, ne
|
||||
csel src_offset, chr_match, src_offset, ne
|
||||
|
||||
/* Count down from the top of the syndrome to find the last match. */
|
||||
clz tmp3, src_offset
|
||||
/* Src_match points beyond the word containing the match, so we can
|
||||
simply subtract half the bit-offset into the syndrome. Because
|
||||
we are counting down, we need to go back one more character. */
|
||||
add tmp3, tmp3, #2
|
||||
sub result, src_match, tmp3, lsr #1
|
||||
/* But if the syndrome shows no match was found, then return NULL. */
|
||||
cmp src_offset, #0
|
||||
csel result, result, xzr, ne
|
||||
|
||||
ret
|
||||
|
||||
.size strrchr, . - strrchr
|
||||
@@ -44,6 +44,7 @@
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
#ifndef CONFIG_LIBC_ARCH_STRCHRNUL
|
||||
FAR char *strchrnul(FAR const char *s, int c)
|
||||
{
|
||||
if (s)
|
||||
@@ -56,3 +57,4 @@ FAR char *strchrnul(FAR const char *s, int c)
|
||||
|
||||
return (FAR char *)s;
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -30,7 +30,7 @@
|
||||
* Public Functions
|
||||
****************************************************************************/
|
||||
|
||||
#ifndef CONFIG_ARCH_STRNCMP
|
||||
#ifndef CONFIG_LIBC_ARCH_STRNCMP
|
||||
#undef strncmp /* See mm/README.txt */
|
||||
int strncmp(FAR const char *cs, FAR const char *ct, size_t nb)
|
||||
{
|
||||
|
||||
@@ -34,6 +34,7 @@
|
||||
* occurrence of the character c in the string s.
|
||||
*/
|
||||
|
||||
#ifndef CONFIG_LIBC_ARCH_STRRCHR
|
||||
#undef strrchr /* See mm/README.txt */
|
||||
FAR char *strrchr(FAR const char *s, int c)
|
||||
{
|
||||
@@ -50,3 +51,4 @@ FAR char *strrchr(FAR const char *s, int c)
|
||||
|
||||
return (FAR char *)r;
|
||||
}
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user