libc: add arm64 libc function

Porting memory and string optimize functions from newlib and bionic Signed-off-by: zhangyuan21 <zhangyuan21@xiaomi.com>
2026-05-16 22:21:04 +08:00 · 2023-01-05 14:36:21 +08:00
parent f8f29c8147
commit fae5aef4fe
22 changed files with 3325 additions and 2 deletions
@@ -490,6 +490,7 @@ libs/libc/machine/arm/armv7-a/gnu/arch_memcpy.S
 libs/libc/machine/arm/armv7-a/gnu/arch_strlen.S
 libs/libc/machine/arm/armv7-r/gnu/arch_memcpy.S
 libs/libc/machine/arm/armv7-r/gnu/arch_strlen.S
+libs/libc/machine/arm64/gnu/arch_strnlen.S
 ================================================

   Copyright (c) 2013, Linaro Limited
@@ -739,6 +740,286 @@ libs/libc/machine/arm/armv8-m/gnu/arch_strlen.S
   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+libs/libc/machine/arm64/gnu/arch_memchr.S
+libs/libc/machine/arm64/gnu/arch_strchr.S
+libs/libc/machine/arm64/gnu/arch_strchrnul.S
+libs/libc/machine/arm64/gnu/arch_strrchr.S
+================================================
+
+ Copyright (c) 2014, ARM Limited
+ All rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+     * Redistributions of source code must retain the above copyright
+       notice, this list of conditions and the following disclaimer.
+     * Redistributions in binary form must reproduce the above copyright
+       notice, this list of conditions and the following disclaimer in the
+       documentation and/or other materials provided with the distribution.
+     * Neither the name of the company nor the names of its contributors
+       may be used to endorse or promote products derived from this
+       software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+libs/libc/machine/arm64/gnu/arch_memcmp.S
+================================================
+
+ Copyright (c) 2018, Linaro Limited
+  All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in the
+    documentation and/or other materials provided with the distribution.
+
+    * Neither the name of Linaro Limited nor the names of its
+    contributors may be used to endorse or promote products derived
+    from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ Copyright (c) 2017 ARM Ltd
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ 1. Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+ 2. Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in the
+    documentation and/or other materials provided with the distribution.
+ 3. The name of the company may not be used to endorse or promote
+    products derived from this software without specific prior written
+    permission.
+
+ THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+libs/libc/machine/arm64/gnu/arch_memcpy.S
+libs/libc/machine/arm64/gnu/arch_memmove.S
+libs/libc/machine/arm64/gnu/arch_memset.S
+================================================
+
+ Copyright (c) 2012-2013, Linaro Limited
+  All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in the
+    documentation and/or other materials provided with the distribution.
+
+    * Neither the name of Linaro Limited nor the names of its
+    contributors may be used to endorse or promote products derived
+    from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ Copyright (c) 2015 ARM Ltd
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ 1. Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+ 2. Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in the
+    documentation and/or other materials provided with the distribution.
+ 3. The name of the company may not be used to endorse or promote
+    products derived from this software without specific prior written
+    permission.
+
+ THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+libs/libc/machine/arm64/gnu/arch_strcmp.S
+================================================
+
+ Copyright (c) 2012-2018, Linaro Limited
+  All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in the
+    documentation and/or other materials provided with the distribution.
+
+    * Neither the name of Linaro Limited nor the names of its
+    contributors may be used to endorse or promote products derived
+    from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+libs/libc/machine/arm64/gnu/arch_strcpy.S
+================================================
+
+   Copyright (c) 2013, 2014, 2015 ARM Ltd.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are met:
+       * Redistributions of source code must retain the above copyright
+	 notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above copyright
+	 notice, this list of conditions and the following disclaimer in the
+	 documentation and/or other materials provided with the distribution.
+       * Neither the name of the Linaro nor the
+	 names of its contributors may be used to endorse or promote products
+	 derived from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
+   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+libs/libc/machine/arm64/gnu/arch_strlen.S
+================================================
+
+ Copyright (c) 2013-2015, Linaro Limited
+  All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in the
+    documentation and/or other materials provided with the distribution.
+
+    * Neither the name of Linaro Limited nor the names of its
+    contributors may be used to endorse or promote products derived
+    from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+libs/libc/machine/arm64/gnu/arch_strncmp.S
+================================================
+
+ Copyright (c) 2013, 2018, Linaro Limited
+  All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in the
+    documentation and/or other materials provided with the distribution.
+
+    * Neither the name of Linaro Limited nor the names of its
+    contributors may be used to endorse or promote products derived
+    from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
 libs/libc/machine/risc-v/gnu/arch_memset.S
 libs/libc/machine/risc-v/gnu/arch_strcmp.S
 ================================================
@@ -5543,7 +5824,8 @@ drivers/mtd/at24xx.c
 POSSIBILITY OF SUCH DAMAGE.

 libs/libc/machine/arm/armv8-m
-=============================
+libs/libc/machine/arm64/gnu/arch_setjmp.S
+==============================================

   Copyright (c) 2011, 2012 ARM Ltd.  All rights reserved.

@@ -72,10 +72,18 @@ config LIBC_ARCH_STRCHR
 	bool
 	default n

+config LIBC_ARCH_STRCHRNUL
+	bool
+	default n
+
 config LIBC_ARCH_STRCMP
 	bool
 	default n

+config LIBC_ARCH_STRNCMP
+	bool
+	default n
+
 config LIBC_ARCH_STRCPY
 	bool
 	default n
@@ -100,6 +108,10 @@ config LIBC_ARCH_STRNLEN
 	bool
 	default n

+config LIBC_ARCH_STRRCHR
+	bool
+	default n
+
 config LIBC_ARCH_ELF
 	bool
 	default n
@@ -174,6 +186,9 @@ config LIBM_ARCH_TRUNCF
 if ARCH_ARM
 source "libs/libc/machine/arm/Kconfig"
 endif
+if ARCH_ARM64
+source "libs/libc/machine/arm64/Kconfig"
+endif
 if ARCH_RISCV
 source "libs/libc/machine/risc-v/Kconfig"
 endif
@@ -25,6 +25,9 @@ endif
 ifeq ($(CONFIG_ARCH_ARM),y)
 include $(TOPDIR)/libs/libc/machine/arm/Make.defs
 endif
+ifeq ($(CONFIG_ARCH_ARM64),y)
+include $(TOPDIR)/libs/libc/machine/arm64/Make.defs
+endif
 ifeq ($(CONFIG_ARCH_RISCV),y)
 include $(TOPDIR)/libs/libc/machine/risc-v/Make.defs
 endif
@@ -0,0 +1,106 @@
+#
+# For a description of the syntax of this configuration file,
+# see the file kconfig-language.txt in the NuttX tools repository.
+#
+
+config ARM64_MEMCHR
+	bool "Enable optimized memchr() for ARM64"
+	default n
+	select LIBC_ARCH_MEMCHR
+	depends on ARCH_TOOLCHAIN_GNU
+	---help---
+		Enable optimized ARM64 specific memchr() library function
+
+config ARM64_MEMCMP
+	bool "Enable optimized memcmp() for ARM64"
+	select LIBC_ARCH_MEMCMP
+	depends on ARCH_TOOLCHAIN_GNU
+	---help---
+		Enable optimized ARM64 specific memcmp() library function
+
+config ARM64_MEMCPY
+	bool "Enable optimized memcpy() for ARM64"
+	select LIBC_ARCH_MEMCPY
+	depends on ARCH_TOOLCHAIN_GNU
+	---help---
+		Enable optimized ARM64 specific memcpy() library function
+
+config ARM64_MEMSET
+	bool "Enable optimized memset() for ARM64"
+	default n
+	select LIBC_ARCH_MEMSET
+	depends on ARCH_TOOLCHAIN_GNU
+	---help---
+		Enable optimized ARM64 specific memset() library function
+
+config ARM64_MEMMOVE
+	bool "Enable optimized memmove() for ARM64"
+	default n
+	select LIBC_ARCH_MEMMOVE
+	depends on ARCH_TOOLCHAIN_GNU
+	---help---
+		Enable optimized ARM64 specific memmove() library function
+
+config ARM64_STRCHR
+	bool "Enable optimized strchr() for ARM64"
+	default n
+	select LIBC_ARCH_STRCHR
+	depends on ARCH_TOOLCHAIN_GNU
+	---help---
+		Enable optimized ARM64 specific strchr() library function
+
+config ARM64_STRCHRNUL
+	bool "Enable optimized strchrnul() for ARM64"
+	default n
+	select LIBC_ARCH_STRCHRNUL
+	depends on ARCH_TOOLCHAIN_GNU
+	---help---
+		Enable optimized ARM64 specific strchrnul() library function
+
+config ARM64_STRCMP
+	bool "Enable optimized strcmp() for ARM64"
+	default n
+	select LIBC_ARCH_STRCMP
+	depends on ARCH_TOOLCHAIN_GNU
+	---help---
+		Enable optimized ARM64 specific strcmp() library function
+
+config ARM64_STRCPY
+	bool "Enable optimized strcpy() for ARM64"
+	default n
+	select LIBC_ARCH_STRCPY
+	depends on ARCH_TOOLCHAIN_GNU
+	---help---
+		Enable optimized ARM64 specific strcpy() library function
+
+config ARM64_STRLEN
+	bool "Enable optimized strlen() for ARM64"
+	default n
+	select LIBC_ARCH_STRLEN
+	depends on ARCH_TOOLCHAIN_GNU
+	---help---
+		Enable optimized ARM64 specific strlen() library function
+
+config ARM64_STRNCMP
+	bool "Enable optimized strncmp() for ARM64"
+	default n
+	select LIBC_ARCH_STRNCMP
+	depends on ARCH_TOOLCHAIN_GNU
+	---help---
+		Enable optimized ARM64 specific strncmp() library function
+
+config ARM64_STRNLEN
+	bool "Enable optimized strnlen() for ARM64"
+	default n
+	select LIBC_ARCH_STRNLEN
+	depends on ARCH_TOOLCHAIN_GNU
+	---help---
+		Enable optimized ARM64 specific strnlen() library function
+
+config ARM64_STRRCHR
+	bool "Enable optimized strrchr() for ARM64"
+	default n
+	select LIBC_ARCH_STRRCHR
+	depends on ARCH_TOOLCHAIN_GNU
+	---help---
+		Enable optimized ARM64 specific strrchr() library function
@@ -0,0 +1,83 @@
+############################################################################
+# libs/libc/machine/ARM64/Make.defs
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.  The
+# ASF licenses this file to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance with the
+# License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
+# License for the specific language governing permissions and limitations
+# under the License.
+#
+############################################################################
+
+ifeq ($(CONFIG_ARM64_MEMCHR),y)
+ASRCS += arch_memchr.S
+endif
+
+ifeq ($(CONFIG_ARM64_MEMCMP),y)
+ASRCS += arch_memcmp.S
+endif
+
+ifeq ($(CONFIG_ARM64_MEMCPY),y)
+ASRCS += arch_memcpy.S
+endif
+
+ifeq ($(CONFIG_ARM64_MEMMOVE),y)
+ASRCS += arch_memmove.S
+endif
+
+ifeq ($(CONFIG_ARM64_MEMSET),y)
+ASRCS += arch_memset.S
+endif
+
+ifeq ($(CONFIG_ARM64_STRCHR),y)
+ASRCS += arch_strchr.S
+endif
+
+ifeq ($(CONFIG_ARM64_STRCHRNUL),y)
+ASRCS += arch_strchrnul.S
+endif
+
+ifeq ($(CONFIG_ARM64_STRCMP),y)
+ASRCS += arch_strcmp.S
+endif
+
+ifeq ($(CONFIG_ARM64_STRCPY),y)
+ASRCS += arch_strcpy.S
+endif
+
+ifeq ($(CONFIG_ARM64_STRLEN),y)
+ASRCS += arch_strlen.S
+endif
+
+ifeq ($(CONFIG_ARM64_STRNCMP),y)
+ASRCS += arch_strncmp.S
+endif
+
+ifeq ($(CONFIG_ARM64_STRNLEN),y)
+ASRCS += arch_strnlen.S
+endif
+
+ifeq ($(CONFIG_ARM64_STRRCHR),y)
+ASRCS += arch_strrchr.S
+endif
+
+ifeq ($(CONFIG_ARCH_SETJMP_H),y)
+ASRCS += arch_setjmp.S
+endif
+
+ifeq ($(CONFIG_ARCH_TOOLCHAIN_GNU),y)
+DEPPATH += --dep-path machine/arm64/gnu
+VPATH += :machine/arm64/gnu
+endif
+
+DEPPATH += --dep-path machine/arm64
+VPATH += :machine/arm64
@@ -0,0 +1,173 @@
+/****************************************************************************
+ * libs/libc/machine/arm64/gnu/arch_memchr.S
+ *
+ * Copyright (c) 2014, ARM Limited
+ * All rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the company nor the names of its contributors
+ *       may be used to endorse or promote products derived from this
+ *       software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ ****************************************************************************/
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64
+ * Neon Available.
+ */
+
+/* Arguments and results.  */
+#define srcin		x0
+#define chrin		w1
+#define cntin		x2
+
+#define result		x0
+
+#define src		x3
+#define	tmp		x4
+#define wtmp2		w5
+#define synd		x6
+#define soff		x9
+#define cntrem		x10
+
+#define vrepchr		v0
+#define vdata1		v1
+#define vdata2		v2
+#define vhas_chr1	v3
+#define vhas_chr2	v4
+#define vrepmask	v5
+#define vend		v6
+
+/*
+ * Core algorithm:
+ *
+ * For each 32-byte chunk we calculate a 64-bit syndrome value, with two bits
+ * per byte. For each tuple, bit 0 is set if the relevant byte matched the
+ * requested character and bit 1 is not used (faster than using a 32bit
+ * syndrome). Since the bits in the syndrome reflect exactly the order in which
+ * things occur in the original string, counting trailing zeros allows to
+ * identify exactly which byte has matched.
+ */
+
+	.macro def_fn f p2align=0
+	.text
+	.p2align \p2align
+	.global \f
+	.type \f, %function
+\f:
+	.endm
+
+def_fn memchr
+	/* Do not dereference srcin if no bytes to compare.  */
+	cbz	cntin, .Lzero_length
+	/*
+	 * Magic constant 0x40100401 allows us to identify which lane matches
+	 * the requested byte.
+	 */
+	mov	wtmp2, #0x0401
+	movk	wtmp2, #0x4010, lsl #16
+	dup	vrepchr.16b, chrin
+	/* Work with aligned 32-byte chunks */
+	bic	src, srcin, #31
+	dup	vrepmask.4s, wtmp2
+	ands	soff, srcin, #31
+	and	cntrem, cntin, #31
+	b.eq	.Lloop
+
+	/*
+	 * Input string is not 32-byte aligned. We calculate the syndrome
+	 * value for the aligned 32 bytes block containing the first bytes
+	 * and mask the irrelevant part.
+	 */
+
+	ld1	{vdata1.16b, vdata2.16b}, [src], #32
+	sub	tmp, soff, #32
+	adds	cntin, cntin, tmp
+	cmeq	vhas_chr1.16b, vdata1.16b, vrepchr.16b
+	cmeq	vhas_chr2.16b, vdata2.16b, vrepchr.16b
+	and	vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
+	and	vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
+	addp	vend.16b, vhas_chr1.16b, vhas_chr2.16b		/* 256->128 */
+	addp	vend.16b, vend.16b, vend.16b			/* 128->64 */
+	mov	synd, vend.2d[0]
+	/* Clear the soff*2 lower bits */
+	lsl	tmp, soff, #1
+	lsr	synd, synd, tmp
+	lsl	synd, synd, tmp
+	/* The first block can also be the last */
+	b.ls	.Lmasklast
+	/* Have we found something already? */
+	cbnz	synd, .Ltail
+
+.Lloop:
+	ld1	{vdata1.16b, vdata2.16b}, [src], #32
+	subs	cntin, cntin, #32
+	cmeq	vhas_chr1.16b, vdata1.16b, vrepchr.16b
+	cmeq	vhas_chr2.16b, vdata2.16b, vrepchr.16b
+	/* If we're out of data we finish regardless of the result */
+	b.ls	.Lend
+	/* Use a fast check for the termination condition */
+	orr	vend.16b, vhas_chr1.16b, vhas_chr2.16b
+	addp	vend.2d, vend.2d, vend.2d
+	mov	synd, vend.2d[0]
+	/* We're not out of data, loop if we haven't found the character */
+	cbz	synd, .Lloop
+
+.Lend:
+	/* Termination condition found, let's calculate the syndrome value */
+	and	vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
+	and	vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
+	addp	vend.16b, vhas_chr1.16b, vhas_chr2.16b		/* 256->128 */
+	addp	vend.16b, vend.16b, vend.16b			/* 128->64 */
+	mov	synd, vend.2d[0]
+	/* Only do the clear for the last possible block */
+	b.hi	.Ltail
+
+.Lmasklast:
+	/* Clear the (32 - ((cntrem + soff) % 32)) * 2 upper bits */
+	add	tmp, cntrem, soff
+	and	tmp, tmp, #31
+	sub	tmp, tmp, #32
+	neg	tmp, tmp, lsl #1
+	lsl	synd, synd, tmp
+	lsr	synd, synd, tmp
+
+.Ltail:
+	/* Count the trailing zeros using bit reversing */
+	rbit	synd, synd
+	/* Compensate the last post-increment */
+	sub	src, src, #32
+	/* Check that we have found a character */
+	cmp	synd, #0
+	/* And count the leading zeros */
+	clz	synd, synd
+	/* Compute the potential result */
+	add	result, src, synd, lsr #1
+	/* Select result or NULL */
+	csel	result, xzr, result, eq
+	ret
+
+.Lzero_length:
+	mov	result, #0
+	ret
+
+	.size	memchr, . - memchr
@@ -0,0 +1,196 @@
+/****************************************************************************
+ * libs/libc/machine/arm64/gnu/arch_memcmp.S
+ *
+ * Copyright (c) 2018 Linaro Limited
+ *  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *    * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ *    * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ *    * Neither the name of the Linaro nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Copyright (c) 2017 ARM Ltd
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the company may not be used to endorse or promote
+ *    products derived from this software without specific prior written
+ *    permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ ****************************************************************************/
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64, unaligned accesses.
+ */
+
+#define L(l) .L ## l
+
+/* Parameters and result.  */
+#define src1		x0
+#define src2		x1
+#define limit		x2
+#define result		w0
+
+/* Internal variables.  */
+#define data1		x3
+#define data1w		w3
+#define data1h		x4
+#define data2		x5
+#define data2w		w5
+#define data2h		x6
+#define tmp1		x7
+#define tmp2		x8
+
+        .macro def_fn f p2align=0
+        .text
+        .p2align \p2align
+        .global \f
+        .type \f, %function
+\f:
+        .endm
+
+def_fn memcmp p2align=6
+	subs	limit, limit, 8
+	b.lo	L(less8)
+
+	ldr	data1, [src1], 8
+	ldr	data2, [src2], 8
+	cmp	data1, data2
+	b.ne	L(return)
+
+	subs	limit, limit, 8
+	b.gt	L(more16)
+
+	ldr	data1, [src1, limit]
+	ldr	data2, [src2, limit]
+	b	L(return)
+
+L(more16):
+	ldr	data1, [src1], 8
+	ldr	data2, [src2], 8
+	cmp	data1, data2
+	bne	L(return)
+
+	/* Jump directly to comparing the last 16 bytes for 32 byte (or less)
+	   strings.  */
+	subs	limit, limit, 16
+	b.ls	L(last_bytes)
+
+	/* We overlap loads between 0-32 bytes at either side of SRC1 when we
+	   try to align, so limit it only to strings larger than 128 bytes.  */
+	cmp	limit, 96
+	b.ls	L(loop16)
+
+	/* Align src1 and adjust src2 with bytes not yet done.  */
+	and	tmp1, src1, 15
+	add	limit, limit, tmp1
+	sub	src1, src1, tmp1
+	sub	src2, src2, tmp1
+
+	/* Loop performing 16 bytes per iteration using aligned src1.
+	   Limit is pre-decremented by 16 and must be larger than zero.
+	   Exit if <= 16 bytes left to do or if the data is not equal.  */
+	.p2align 4
+L(loop16):
+	ldp	data1, data1h, [src1], 16
+	ldp	data2, data2h, [src2], 16
+	subs	limit, limit, 16
+	ccmp	data1, data2, 0, hi
+	ccmp	data1h, data2h, 0, eq
+	b.eq	L(loop16)
+
+	cmp	data1, data2
+	bne	L(return)
+	mov	data1, data1h
+	mov	data2, data2h
+	cmp	data1, data2
+	bne	L(return)
+
+	/* Compare last 1-16 bytes using unaligned access.  */
+L(last_bytes):
+	add	src1, src1, limit
+	add	src2, src2, limit
+	ldp	data1, data1h, [src1]
+	ldp	data2, data2h, [src2]
+	cmp     data1, data2
+	bne	L(return)
+	mov	data1, data1h
+	mov	data2, data2h
+	cmp	data1, data2
+
+	/* Compare data bytes and set return value to 0, -1 or 1.  */
+L(return):
+#ifndef __AARCH64EB__
+	rev	data1, data1
+	rev	data2, data2
+#endif
+	cmp     data1, data2
+L(ret_eq):
+	cset	result, ne
+	cneg	result, result, lo
+	ret
+
+	.p2align 4
+	/* Compare up to 8 bytes.  Limit is [-8..-1].  */
+L(less8):
+	adds	limit, limit, 4
+	b.lo	L(less4)
+	ldr	data1w, [src1], 4
+	ldr	data2w, [src2], 4
+	cmp	data1w, data2w
+	b.ne	L(return)
+	sub	limit, limit, 4
+L(less4):
+	adds	limit, limit, 4
+	beq	L(ret_eq)
+L(byte_loop):
+	ldrb	data1w, [src1], 1
+	ldrb	data2w, [src2], 1
+	subs	limit, limit, 1
+	ccmp	data1w, data2w, 0, ne	/* NZCV = 0b0000.  */
+	b.eq	L(byte_loop)
+	sub	result, data1w, data2w
+	ret
+
+	.size	memcmp, . - memcmp
@@ -0,0 +1,232 @@
+/****************************************************************************
+ * libs/libc/machine/arm64/gnu/arch_memcpy.S
+ *
+ * Copyright (c) 2012-2013, Linaro Limited
+ *  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *    * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ *    * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ *    * Neither the name of the Linaro nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Copyright (c) 2015 ARM Ltd
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the company may not be used to endorse or promote
+ *    products derived from this software without specific prior written
+ *    permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ ****************************************************************************/
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64, unaligned accesses.
+ *
+ */
+
+#define dstin	x0
+#define src	x1
+#define count	x2
+#define dst	x3
+#define srcend	x4
+#define dstend	x5
+#define A_l	x6
+#define A_lw	w6
+#define A_h	x7
+#define A_hw	w7
+#define B_l	x8
+#define B_lw	w8
+#define B_h	x9
+#define C_l	x10
+#define C_h	x11
+#define D_l	x12
+#define D_h	x13
+#define E_l	src
+#define E_h	count
+#define F_l	srcend
+#define F_h	dst
+#define tmp1	x9
+
+#define L(l) .L ## l
+
+	.macro def_fn f p2align=0
+	.text
+	.p2align \p2align
+	.global \f
+	.type \f, %function
+\f:
+	.endm
+
+/* Copies are split into 3 main cases: small copies of up to 16 bytes,
+   medium copies of 17..96 bytes which are fully unrolled. Large copies
+   of more than 96 bytes align the destination and use an unrolled loop
+   processing 64 bytes per iteration.
+   Small and medium copies read all data before writing, allowing any
+   kind of overlap, and memmove tailcalls memcpy for these cases as
+   well as non-overlapping copies.
+*/
+
+def_fn memcpy p2align=6
+	prfm	PLDL1KEEP, [src]
+	add	srcend, src, count
+	add	dstend, dstin, count
+	cmp	count, 16
+	b.ls	L(copy16)
+	cmp	count, 96
+	b.hi	L(copy_long)
+
+	/* Medium copies: 17..96 bytes.  */
+	sub	tmp1, count, 1
+	ldp	A_l, A_h, [src]
+	tbnz	tmp1, 6, L(copy96)
+	ldp	D_l, D_h, [srcend, -16]
+	tbz	tmp1, 5, 1f
+	ldp	B_l, B_h, [src, 16]
+	ldp	C_l, C_h, [srcend, -32]
+	stp	B_l, B_h, [dstin, 16]
+	stp	C_l, C_h, [dstend, -32]
+1:
+	stp	A_l, A_h, [dstin]
+	stp	D_l, D_h, [dstend, -16]
+	ret
+
+	.p2align 4
+	/* Small copies: 0..16 bytes.  */
+L(copy16):
+	cmp	count, 8
+	b.lo	1f
+	ldr	A_l, [src]
+	ldr	A_h, [srcend, -8]
+	str	A_l, [dstin]
+	str	A_h, [dstend, -8]
+	ret
+	.p2align 4
+1:
+	tbz	count, 2, 1f
+	ldr	A_lw, [src]
+	ldr	A_hw, [srcend, -4]
+	str	A_lw, [dstin]
+	str	A_hw, [dstend, -4]
+	ret
+
+	/* Copy 0..3 bytes.  Use a branchless sequence that copies the same
+	   byte 3 times if count==1, or the 2nd byte twice if count==2.  */
+1:
+	cbz	count, 2f
+	lsr	tmp1, count, 1
+	ldrb	A_lw, [src]
+	ldrb	A_hw, [srcend, -1]
+	ldrb	B_lw, [src, tmp1]
+	strb	A_lw, [dstin]
+	strb	B_lw, [dstin, tmp1]
+	strb	A_hw, [dstend, -1]
+2:	ret
+
+	.p2align 4
+	/* Copy 64..96 bytes.  Copy 64 bytes from the start and
+	   32 bytes from the end.  */
+L(copy96):
+	ldp	B_l, B_h, [src, 16]
+	ldp	C_l, C_h, [src, 32]
+	ldp	D_l, D_h, [src, 48]
+	ldp	E_l, E_h, [srcend, -32]
+	ldp	F_l, F_h, [srcend, -16]
+	stp	A_l, A_h, [dstin]
+	stp	B_l, B_h, [dstin, 16]
+	stp	C_l, C_h, [dstin, 32]
+	stp	D_l, D_h, [dstin, 48]
+	stp	E_l, E_h, [dstend, -32]
+	stp	F_l, F_h, [dstend, -16]
+	ret
+
+	/* Align DST to 16 byte alignment so that we don't cross cache line
+	   boundaries on both loads and stores.	 There are at least 96 bytes
+	   to copy, so copy 16 bytes unaligned and then align.	The loop
+	   copies 64 bytes per iteration and prefetches one iteration ahead.  */
+
+	.p2align 4
+L(copy_long):
+	and	tmp1, dstin, 15
+	bic	dst, dstin, 15
+	ldp	D_l, D_h, [src]
+	sub	src, src, tmp1
+	add	count, count, tmp1	/* Count is now 16 too large.  */
+	ldp	A_l, A_h, [src, 16]
+	stp	D_l, D_h, [dstin]
+	ldp	B_l, B_h, [src, 32]
+	ldp	C_l, C_h, [src, 48]
+	ldp	D_l, D_h, [src, 64]!
+	subs	count, count, 128 + 16	/* Test and readjust count.  */
+	b.ls	2f
+1:
+	stp	A_l, A_h, [dst, 16]
+	ldp	A_l, A_h, [src, 16]
+	stp	B_l, B_h, [dst, 32]
+	ldp	B_l, B_h, [src, 32]
+	stp	C_l, C_h, [dst, 48]
+	ldp	C_l, C_h, [src, 48]
+	stp	D_l, D_h, [dst, 64]!
+	ldp	D_l, D_h, [src, 64]!
+	subs	count, count, 64
+	b.hi	1b
+
+	/* Write the last full set of 64 bytes.	 The remainder is at most 64
+	   bytes, so it is safe to always copy 64 bytes from the end even if
+	   there is just 1 byte left.  */
+2:
+	ldp	E_l, E_h, [srcend, -64]
+	stp	A_l, A_h, [dst, 16]
+	ldp	A_l, A_h, [srcend, -48]
+	stp	B_l, B_h, [dst, 32]
+	ldp	B_l, B_h, [srcend, -32]
+	stp	C_l, C_h, [dst, 48]
+	ldp	C_l, C_h, [srcend, -16]
+	stp	D_l, D_h, [dst, 64]
+	stp	E_l, E_h, [dstend, -64]
+	stp	A_l, A_h, [dstend, -48]
+	stp	B_l, B_h, [dstend, -32]
+	stp	C_l, C_h, [dstend, -16]
+	ret
+
+	.size	memcpy, . - memcpy
@@ -0,0 +1,157 @@
+/****************************************************************************
+ * libs/libc/machine/arm64/gnu/arch_memmove.S
+ *
+ * Copyright (c) 2013, Linaro Limited
+ *  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *    * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ *    * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ *    * Neither the name of the Linaro nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Copyright (c) 2015 ARM Ltd
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the company may not be used to endorse or promote
+ *    products derived from this software without specific prior written
+ *    permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ ****************************************************************************/
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64, unaligned accesses
+ */
+
+	.macro def_fn f p2align=0
+	.text
+	.p2align \p2align
+	.global \f
+	.type \f, %function
+\f:
+	.endm
+
+/* Parameters and result.  */
+#define dstin	x0
+#define src	x1
+#define count	x2
+#define srcend	x3
+#define dstend	x4
+#define tmp1	x5
+#define A_l	x6
+#define A_h	x7
+#define B_l	x8
+#define B_h	x9
+#define C_l	x10
+#define C_h	x11
+#define D_l	x12
+#define D_h	x13
+#define E_l	count
+#define E_h	tmp1
+
+/* All memmoves up to 96 bytes are done by memcpy as it supports overlaps.
+   Larger backwards copies are also handled by memcpy. The only remaining
+   case is forward large copies.  The destination is aligned, and an
+   unrolled loop processes 64 bytes per iteration.
+*/
+
+def_fn memmove, 6
+	sub	tmp1, dstin, src
+	cmp	count, 96
+	ccmp	tmp1, count, 2, hi
+	b.hs	memcpy
+
+	cbz	tmp1, 3f
+	add	dstend, dstin, count
+	add	srcend, src, count
+
+	/* Align dstend to 16 byte alignment so that we don't cross cache line
+	   boundaries on both loads and stores.	 There are at least 96 bytes
+	   to copy, so copy 16 bytes unaligned and then align.	The loop
+	   copies 64 bytes per iteration and prefetches one iteration ahead.  */
+
+	and	tmp1, dstend, 15
+	ldp	D_l, D_h, [srcend, -16]
+	sub	srcend, srcend, tmp1
+	sub	count, count, tmp1
+	ldp	A_l, A_h, [srcend, -16]
+	stp	D_l, D_h, [dstend, -16]
+	ldp	B_l, B_h, [srcend, -32]
+	ldp	C_l, C_h, [srcend, -48]
+	ldp	D_l, D_h, [srcend, -64]!
+	sub	dstend, dstend, tmp1
+	subs	count, count, 128
+	b.ls	2f
+	nop
+1:
+	stp	A_l, A_h, [dstend, -16]
+	ldp	A_l, A_h, [srcend, -16]
+	stp	B_l, B_h, [dstend, -32]
+	ldp	B_l, B_h, [srcend, -32]
+	stp	C_l, C_h, [dstend, -48]
+	ldp	C_l, C_h, [srcend, -48]
+	stp	D_l, D_h, [dstend, -64]!
+	ldp	D_l, D_h, [srcend, -64]!
+	subs	count, count, 64
+	b.hi	1b
+
+	/* Write the last full set of 64 bytes.	 The remainder is at most 64
+	   bytes, so it is safe to always copy 64 bytes from the start even if
+	   there is just 1 byte left.  */
+2:
+	ldp	E_l, E_h, [src, 48]
+	stp	A_l, A_h, [dstend, -16]
+	ldp	A_l, A_h, [src, 32]
+	stp	B_l, B_h, [dstend, -32]
+	ldp	B_l, B_h, [src, 16]
+	stp	C_l, C_h, [dstend, -48]
+	ldp	C_l, C_h, [src]
+	stp	D_l, D_h, [dstend, -64]
+	stp	E_l, E_h, [dstin, 48]
+	stp	A_l, A_h, [dstin, 32]
+	stp	B_l, B_h, [dstin, 16]
+	stp	C_l, C_h, [dstin]
+3:	ret
+
+	.size	memmove, . - memmove
@@ -0,0 +1,242 @@
+/****************************************************************************
+ * libs/libc/machine/arm64/gnu/arch_memset.S
+ *
+ * Copyright (c) 2012-2013, Linaro Limited
+ *  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *    * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ *    * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ *    * Neither the name of the Linaro nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Copyright (c) 2015 ARM Ltd
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the company may not be used to endorse or promote
+ *    products derived from this software without specific prior written
+ *    permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ ****************************************************************************/
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64, unaligned accesses
+ *
+ */
+
+#define dstin	x0
+#define val	x1
+#define valw	w1
+#define count	x2
+#define dst	x3
+#define dstend	x4
+#define tmp1	x5
+#define tmp1w	w5
+#define tmp2	x6
+#define tmp2w	w6
+#define zva_len x7
+#define zva_lenw w7
+
+#define L(l) .L ## l
+
+	.macro def_fn f p2align=0
+	.text
+	.p2align \p2align
+	.global \f
+	.type \f, %function
+\f:
+	.endm
+
+def_fn memset p2align=6
+
+	dup	v0.16B, valw
+	add	dstend, dstin, count
+
+	cmp	count, 96
+	b.hi	L(set_long)
+	cmp	count, 16
+	b.hs	L(set_medium)
+	mov	val, v0.D[0]
+
+	/* Set 0..15 bytes.  */
+	tbz	count, 3, 1f
+	str	val, [dstin]
+	str	val, [dstend, -8]
+	ret
+	nop
+1:	tbz	count, 2, 2f
+	str	valw, [dstin]
+	str	valw, [dstend, -4]
+	ret
+2:	cbz	count, 3f
+	strb	valw, [dstin]
+	tbz	count, 1, 3f
+	strh	valw, [dstend, -2]
+3:	ret
+
+	/* Set 17..96 bytes.  */
+L(set_medium):
+	str	q0, [dstin]
+	tbnz	count, 6, L(set96)
+	str	q0, [dstend, -16]
+	tbz	count, 5, 1f
+	str	q0, [dstin, 16]
+	str	q0, [dstend, -32]
+1:	ret
+
+	.p2align 4
+	/* Set 64..96 bytes.  Write 64 bytes from the start and
+	   32 bytes from the end.  */
+L(set96):
+	str	q0, [dstin, 16]
+	stp	q0, q0, [dstin, 32]
+	stp	q0, q0, [dstend, -32]
+	ret
+
+	.p2align 3
+	nop
+L(set_long):
+	and	valw, valw, 255
+	bic	dst, dstin, 15
+	str	q0, [dstin]
+	cmp	count, 256
+	ccmp	valw, 0, 0, cs
+	b.eq	L(try_zva)
+L(no_zva):
+	sub	count, dstend, dst	/* Count is 16 too large.  */
+	sub	dst, dst, 16		/* Dst is biased by -32.  */
+	sub	count, count, 64 + 16	/* Adjust count and bias for loop.  */
+1:	stp	q0, q0, [dst, 32]
+	stp	q0, q0, [dst, 64]!
+L(tail64):
+	subs	count, count, 64
+	b.hi	1b
+2:	stp	q0, q0, [dstend, -64]
+	stp	q0, q0, [dstend, -32]
+	ret
+
+	.p2align 3
+L(try_zva):
+	mrs	tmp1, dczid_el0
+	tbnz	tmp1w, 4, L(no_zva)
+	and	tmp1w, tmp1w, 15
+	cmp	tmp1w, 4	/* ZVA size is 64 bytes.  */
+	b.ne	 L(zva_128)
+
+	/* Write the first and last 64 byte aligned block using stp rather
+	   than using DC ZVA.  This is faster on some cores.
+	 */
+L(zva_64):
+	str	q0, [dst, 16]
+	stp	q0, q0, [dst, 32]
+	bic	dst, dst, 63
+	stp	q0, q0, [dst, 64]
+	stp	q0, q0, [dst, 96]
+	sub	count, dstend, dst	/* Count is now 128 too large.	*/
+	sub	count, count, 128+64+64	/* Adjust count and bias for loop.  */
+	add	dst, dst, 128
+	nop
+1:	dc	zva, dst
+	add	dst, dst, 64
+	subs	count, count, 64
+	b.hi	1b
+	stp	q0, q0, [dst, 0]
+	stp	q0, q0, [dst, 32]
+	stp	q0, q0, [dstend, -64]
+	stp	q0, q0, [dstend, -32]
+	ret
+
+	.p2align 3
+L(zva_128):
+	cmp	tmp1w, 5	/* ZVA size is 128 bytes.  */
+	b.ne	L(zva_other)
+
+	str	q0, [dst, 16]
+	stp	q0, q0, [dst, 32]
+	stp	q0, q0, [dst, 64]
+	stp	q0, q0, [dst, 96]
+	bic	dst, dst, 127
+	sub	count, dstend, dst	/* Count is now 128 too large.	*/
+	sub	count, count, 128+128	/* Adjust count and bias for loop.  */
+	add	dst, dst, 128
+1:	dc	zva, dst
+	add	dst, dst, 128
+	subs	count, count, 128
+	b.hi	1b
+	stp	q0, q0, [dstend, -128]
+	stp	q0, q0, [dstend, -96]
+	stp	q0, q0, [dstend, -64]
+	stp	q0, q0, [dstend, -32]
+	ret
+
+L(zva_other):
+	mov	tmp2w, 4
+	lsl	zva_lenw, tmp2w, tmp1w
+	add	tmp1, zva_len, 64	/* Max alignment bytes written.	 */
+	cmp	count, tmp1
+	blo	L(no_zva)
+
+	sub	tmp2, zva_len, 1
+	add	tmp1, dst, zva_len
+	add	dst, dst, 16
+	subs	count, tmp1, dst	/* Actual alignment bytes to write.  */
+	bic	tmp1, tmp1, tmp2	/* Aligned dc zva start address.  */
+	beq	2f
+1:	stp	q0, q0, [dst], 64
+	stp	q0, q0, [dst, -32]
+	subs	count, count, 64
+	b.hi	1b
+2:	mov	dst, tmp1
+	sub	count, dstend, tmp1	/* Remaining bytes to write.  */
+	subs	count, count, zva_len
+	b.lo	4f
+3:	dc	zva, dst
+	add	dst, dst, zva_len
+	subs	count, count, zva_len
+	b.hs	3b
+4:	add	count, count, zva_len
+	sub	dst, dst, 32		/* Bias dst for tail loop.  */
+	b	L(tail64)
+
+	.size	memset, . - memset
@@ -0,0 +1,77 @@
+/****************************************************************************
+ * libs/libc/machine/arm64/gnu/arch_setjmp.S
+ *
+ * Copyright (c) 2011, 2012 ARM Ltd
+ *  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the company may not be used to endorse or promote
+ *    products derived from this software without specific prior written
+ *    permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ ****************************************************************************/
+
+#define GPR_LAYOUT			\
+	REG_PAIR (x19, x20,  0);	\
+	REG_PAIR (x21, x22, 16);	\
+	REG_PAIR (x23, x24, 32);	\
+	REG_PAIR (x25, x26, 48);	\
+	REG_PAIR (x27, x28, 64);	\
+	REG_PAIR (x29, x30, 80);	\
+	REG_ONE  (x16,      96)
+
+#define FPR_LAYOUT			\
+	REG_PAIR ( d8,  d9, 112);	\
+	REG_PAIR (d10, d11, 128);	\
+	REG_PAIR (d12, d13, 144);	\
+	REG_PAIR (d14, d15, 160);
+
+// int setjmp (jmp_buf)
+	.global	setjmp
+	.type	setjmp, %function
+setjmp:
+	mov	x16, sp
+#define REG_PAIR(REG1, REG2, OFFS)	stp REG1, REG2, [x0, OFFS]
+#define REG_ONE(REG1, OFFS)		str REG1, [x0, OFFS]
+	GPR_LAYOUT
+	FPR_LAYOUT
+#undef REG_PAIR
+#undef REG_ONE
+	mov	w0, #0
+	ret
+	.size	setjmp, .-setjmp
+
+// void longjmp (jmp_buf, int) __attribute__ ((noreturn))
+	.global	longjmp
+	.type	longjmp, %function
+longjmp:
+#define REG_PAIR(REG1, REG2, OFFS)	ldp REG1, REG2, [x0, OFFS]
+#define REG_ONE(REG1, OFFS)		ldr REG1, [x0, OFFS]
+	GPR_LAYOUT
+	FPR_LAYOUT
+#undef REG_PAIR
+#undef REG_ONE
+	mov	sp, x16
+	cmp	w1, #0
+	cinc	w0, w1, eq
+	// use br not ret, as ret is guaranteed to mispredict
+	br	x30
+	.size	longjmp, .-longjmp
@@ -0,0 +1,161 @@
+/****************************************************************************
+ * libs/libc/machine/arm64/gnu/arch_strchr.S
+ *
+ * Copyright (c) 2014, ARM Limited
+ *  All rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the company nor the names of its contributors
+ *       may be used to endorse or promote products derived from this
+ *       software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ ****************************************************************************/
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64
+ * Neon Available.
+ */
+
+/* Arguments and results.  */
+#define srcin		x0
+#define chrin		w1
+
+#define result		x0
+
+#define src		x2
+#define	tmp1		x3
+#define wtmp2		w4
+#define tmp3		x5
+
+#define vrepchr		v0
+#define vdata1		v1
+#define vdata2		v2
+#define vhas_nul1	v3
+#define vhas_nul2	v4
+#define vhas_chr1	v5
+#define vhas_chr2	v6
+#define vrepmask_0	v7
+#define vrepmask_c	v16
+#define vend1		v17
+#define vend2		v18
+
+/* Core algorithm.
+
+   For each 32-byte hunk we calculate a 64-bit syndrome value, with
+   two bits per byte (LSB is always in bits 0 and 1, for both big
+   and little-endian systems).  For each tuple, bit 0 is set iff
+   the relevant byte matched the requested character; bit 1 is set
+   iff the relevant byte matched the NUL end of string (we trigger
+   off bit0 for the special case of looking for NUL).  Since the bits
+   in the syndrome reflect exactly the order in which things occur
+   in the original string a count_trailing_zeros() operation will
+   identify exactly which byte is causing the termination, and why.  */
+
+/* Locals and temporaries.  */
+
+	.macro def_fn f p2align=0
+	.text
+	.p2align \p2align
+	.global \f
+	.type \f, %function
+\f:
+	.endm
+
+def_fn strchr
+	/* Magic constant 0x40100401 to allow us to identify which lane
+	   matches the requested byte.  Magic constant 0x80200802 used
+	   similarly for NUL termination.  */
+	mov	wtmp2, #0x0401
+	movk	wtmp2, #0x4010, lsl #16
+	dup	vrepchr.16b, chrin
+	bic	src, srcin, #31		/* Work with aligned 32-byte hunks.  */
+	dup	vrepmask_c.4s, wtmp2
+	ands	tmp1, srcin, #31
+	add	vrepmask_0.4s, vrepmask_c.4s, vrepmask_c.4s /* equiv: lsl #1 */
+	b.eq	.Lloop
+
+	/* Input string is not 32-byte aligned.  Rather than forcing
+	   the padding bytes to a safe value, we calculate the syndrome
+	   for all the bytes, but then mask off those bits of the
+	   syndrome that are related to the padding.  */
+	ld1	{vdata1.16b, vdata2.16b}, [src], #32
+	neg	tmp1, tmp1
+	cmeq	vhas_nul1.16b, vdata1.16b, #0
+	cmeq	vhas_chr1.16b, vdata1.16b, vrepchr.16b
+	cmeq	vhas_nul2.16b, vdata2.16b, #0
+	cmeq	vhas_chr2.16b, vdata2.16b, vrepchr.16b
+	and	vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b
+	and	vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b
+	and	vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b
+	and	vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b
+	orr	vend1.16b, vhas_nul1.16b, vhas_chr1.16b
+	orr	vend2.16b, vhas_nul2.16b, vhas_chr2.16b
+	lsl	tmp1, tmp1, #1
+	addp	vend1.16b, vend1.16b, vend2.16b		// 256->128
+	mov	tmp3, #~0
+	addp	vend1.16b, vend1.16b, vend2.16b		// 128->64
+	lsr	tmp1, tmp3, tmp1
+
+	mov	tmp3, vend1.2d[0]
+	bic	tmp1, tmp3, tmp1	// Mask padding bits.
+	cbnz	tmp1, .Ltail
+
+.Lloop:
+	ld1	{vdata1.16b, vdata2.16b}, [src], #32
+	cmeq	vhas_nul1.16b, vdata1.16b, #0
+	cmeq	vhas_chr1.16b, vdata1.16b, vrepchr.16b
+	cmeq	vhas_nul2.16b, vdata2.16b, #0
+	cmeq	vhas_chr2.16b, vdata2.16b, vrepchr.16b
+	/* Use a fast check for the termination condition.  */
+	orr	vend1.16b, vhas_nul1.16b, vhas_chr1.16b
+	orr	vend2.16b, vhas_nul2.16b, vhas_chr2.16b
+	orr	vend1.16b, vend1.16b, vend2.16b
+	addp	vend1.2d, vend1.2d, vend1.2d
+	mov	tmp1, vend1.2d[0]
+	cbz	tmp1, .Lloop
+
+	/* Termination condition found.  Now need to establish exactly why
+	   we terminated.  */
+	and	vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b
+	and	vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b
+	and	vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b
+	and	vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b
+	orr	vend1.16b, vhas_nul1.16b, vhas_chr1.16b
+	orr	vend2.16b, vhas_nul2.16b, vhas_chr2.16b
+	addp	vend1.16b, vend1.16b, vend2.16b		// 256->128
+	addp	vend1.16b, vend1.16b, vend2.16b		// 128->64
+
+	mov	tmp1, vend1.2d[0]
+.Ltail:
+	/* Count the trailing zeros, by bit reversing...  */
+	rbit	tmp1, tmp1
+	/* Re-bias source.  */
+	sub	src, src, #32
+	clz	tmp1, tmp1	/* And counting the leading zeros.  */
+	/* Tmp1 is even if the target charager was found first.  Otherwise
+	   we've found the end of string and we weren't looking for NUL.  */
+	tst	tmp1, #1
+	add	result, src, tmp1, lsr #1
+	csel	result, result, xzr, eq
+	ret
+
+	.size	strchr, . - strchr
@@ -0,0 +1,146 @@
+/****************************************************************************
+ * libs/libc/machine/arm64/gnu/arch_strchrnul.S
+ *
+ * Copyright (c) 2014, ARM Limited
+ *  All rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the company nor the names of its contributors
+ *       may be used to endorse or promote products derived from this
+ *       software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ ****************************************************************************/
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64
+ * Neon Available.
+ */
+
+/* Arguments and results.  */
+#define srcin		x0
+#define chrin		w1
+
+#define result		x0
+
+#define src		x2
+#define	tmp1		x3
+#define wtmp2		w4
+#define tmp3		x5
+
+#define vrepchr		v0
+#define vdata1		v1
+#define vdata2		v2
+#define vhas_nul1	v3
+#define vhas_nul2	v4
+#define vhas_chr1	v5
+#define vhas_chr2	v6
+#define vrepmask	v7
+#define vend1		v16
+
+/* Core algorithm.
+
+   For each 32-byte hunk we calculate a 64-bit syndrome value, with
+   two bits per byte (LSB is always in bits 0 and 1, for both big
+   and little-endian systems).  For each tuple, bit 0 is set iff
+   the relevant byte matched the requested character or nul.  Since the
+   bits in the syndrome reflect exactly the order in which things occur
+   in the original string a count_trailing_zeros() operation will
+   identify exactly which byte is causing the termination.  */
+
+/* Locals and temporaries.  */
+
+	.macro def_fn f p2align=0
+	.text
+	.p2align \p2align
+	.global \f
+	.type \f, %function
+\f:
+	.endm
+
+def_fn strchrnul
+	/* Magic constant 0x40100401 to allow us to identify which lane
+	   matches the termination condition.  */
+	mov	wtmp2, #0x0401
+	movk	wtmp2, #0x4010, lsl #16
+	dup	vrepchr.16b, chrin
+	bic	src, srcin, #31		/* Work with aligned 32-byte hunks.  */
+	dup	vrepmask.4s, wtmp2
+	ands	tmp1, srcin, #31
+	b.eq	.Lloop
+
+	/* Input string is not 32-byte aligned.  Rather than forcing
+	   the padding bytes to a safe value, we calculate the syndrome
+	   for all the bytes, but then mask off those bits of the
+	   syndrome that are related to the padding.  */
+	ld1	{vdata1.16b, vdata2.16b}, [src], #32
+	neg	tmp1, tmp1
+	cmeq	vhas_nul1.16b, vdata1.16b, #0
+	cmeq	vhas_chr1.16b, vdata1.16b, vrepchr.16b
+	cmeq	vhas_nul2.16b, vdata2.16b, #0
+	cmeq	vhas_chr2.16b, vdata2.16b, vrepchr.16b
+	orr	vhas_chr1.16b, vhas_chr1.16b, vhas_nul1.16b
+	orr	vhas_chr2.16b, vhas_chr2.16b, vhas_nul2.16b
+	and	vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
+	and	vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
+	lsl	tmp1, tmp1, #1
+	addp	vend1.16b, vhas_chr1.16b, vhas_chr2.16b	// 256->128
+	mov	tmp3, #~0
+	addp	vend1.16b, vend1.16b, vend1.16b		// 128->64
+	lsr	tmp1, tmp3, tmp1
+
+	mov	tmp3, vend1.2d[0]
+	bic	tmp1, tmp3, tmp1	// Mask padding bits.
+	cbnz	tmp1, .Ltail
+
+.Lloop:
+	ld1	{vdata1.16b, vdata2.16b}, [src], #32
+	cmeq	vhas_nul1.16b, vdata1.16b, #0
+	cmeq	vhas_chr1.16b, vdata1.16b, vrepchr.16b
+	cmeq	vhas_nul2.16b, vdata2.16b, #0
+	cmeq	vhas_chr2.16b, vdata2.16b, vrepchr.16b
+	/* Use a fast check for the termination condition.  */
+	orr	vhas_chr1.16b, vhas_nul1.16b, vhas_chr1.16b
+	orr	vhas_chr2.16b, vhas_nul2.16b, vhas_chr2.16b
+	orr	vend1.16b, vhas_chr1.16b, vhas_chr2.16b
+	addp	vend1.2d, vend1.2d, vend1.2d
+	mov	tmp1, vend1.2d[0]
+	cbz	tmp1, .Lloop
+
+	/* Termination condition found.  Now need to establish exactly why
+	   we terminated.  */
+	and	vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
+	and	vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
+	addp	vend1.16b, vhas_chr1.16b, vhas_chr2.16b		// 256->128
+	addp	vend1.16b, vend1.16b, vend1.16b		// 128->64
+
+	mov	tmp1, vend1.2d[0]
+.Ltail:
+	/* Count the trailing zeros, by bit reversing...  */
+	rbit	tmp1, tmp1
+	/* Re-bias source.  */
+	sub	src, src, #32
+	clz	tmp1, tmp1	/* ... and counting the leading zeros.  */
+	/* tmp1 is twice the offset into the fragment.  */
+	add	result, src, tmp1, lsr #1
+	ret
+
+	.size	strchrnul, . - strchrnul
@@ -0,0 +1,205 @@
+/****************************************************************************
+ * libs/libc/machine/arm64/gnu/arch_strcmp.S
+ *
+ * Copyright (c) 2012-2018, Linaro Limited
+ *  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *    * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ *    * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ *    * Neither the name of the Linaro nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ ****************************************************************************/
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64
+ */
+
+	.macro def_fn f p2align=0
+	.text
+	.p2align \p2align
+	.global \f
+	.type \f, %function
+\f:
+	.endm
+
+#define L(label) .L ## label
+
+#define REP8_01 0x0101010101010101
+#define REP8_7f 0x7f7f7f7f7f7f7f7f
+#define REP8_80 0x8080808080808080
+
+/* Parameters and result.  */
+#define src1		x0
+#define src2		x1
+#define result		x0
+
+/* Internal variables.  */
+#define data1		x2
+#define data1w		w2
+#define data2		x3
+#define data2w		w3
+#define has_nul		x4
+#define diff		x5
+#define syndrome	x6
+#define tmp1		x7
+#define tmp2		x8
+#define tmp3		x9
+#define zeroones	x10
+#define pos		x11
+
+	/* Start of performance-critical section  -- one 64B cache line.  */
+def_fn strcmp p2align=6
+	eor	tmp1, src1, src2
+	mov	zeroones, #REP8_01
+	tst	tmp1, #7
+	b.ne	L(misaligned8)
+	ands	tmp1, src1, #7
+	b.ne	L(mutual_align)
+	/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
+	   (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
+	   can be done in parallel across the entire word.  */
+L(loop_aligned):
+	ldr	data1, [src1], #8
+	ldr	data2, [src2], #8
+L(start_realigned):
+	sub	tmp1, data1, zeroones
+	orr	tmp2, data1, #REP8_7f
+	eor	diff, data1, data2	/* Non-zero if differences found.  */
+	bic	has_nul, tmp1, tmp2	/* Non-zero if NUL terminator.  */
+	orr	syndrome, diff, has_nul
+	cbz	syndrome, L(loop_aligned)
+	/* End of performance-critical section  -- one 64B cache line.  */
+
+L(end):
+#ifndef	__AARCH64EB__
+	rev	syndrome, syndrome
+	rev	data1, data1
+	/* The MS-non-zero bit of the syndrome marks either the first bit
+	   that is different, or the top bit of the first zero byte.
+	   Shifting left now will bring the critical information into the
+	   top bits.  */
+	clz	pos, syndrome
+	rev	data2, data2
+	lsl	data1, data1, pos
+	lsl	data2, data2, pos
+	/* But we need to zero-extend (char is unsigned) the value and then
+	   perform a signed 32-bit subtraction.  */
+	lsr	data1, data1, #56
+	sub	result, data1, data2, lsr #56
+	ret
+#else
+	/* For big-endian we cannot use the trick with the syndrome value
+	   as carry-propagation can corrupt the upper bits if the trailing
+	   bytes in the string contain 0x01.  */
+	/* However, if there is no NUL byte in the dword, we can generate
+	   the result directly.  We can't just subtract the bytes as the
+	   MSB might be significant.  */
+	cbnz	has_nul, 1f
+	cmp	data1, data2
+	cset	result, ne
+	cneg	result, result, lo
+	ret
+1:
+	/* Re-compute the NUL-byte detection, using a byte-reversed value.  */
+	rev	tmp3, data1
+	sub	tmp1, tmp3, zeroones
+	orr	tmp2, tmp3, #REP8_7f
+	bic	has_nul, tmp1, tmp2
+	rev	has_nul, has_nul
+	orr	syndrome, diff, has_nul
+	clz	pos, syndrome
+	/* The MS-non-zero bit of the syndrome marks either the first bit
+	   that is different, or the top bit of the first zero byte.
+	   Shifting left now will bring the critical information into the
+	   top bits.  */
+	lsl	data1, data1, pos
+	lsl	data2, data2, pos
+	/* But we need to zero-extend (char is unsigned) the value and then
+	   perform a signed 32-bit subtraction.  */
+	lsr	data1, data1, #56
+	sub	result, data1, data2, lsr #56
+	ret
+#endif
+
+L(mutual_align):
+	/* Sources are mutually aligned, but are not currently at an
+	   alignment boundary.  Round down the addresses and then mask off
+	   the bytes that preceed the start point.  */
+	bic	src1, src1, #7
+	bic	src2, src2, #7
+	lsl	tmp1, tmp1, #3		/* Bytes beyond alignment -> bits.  */
+	ldr	data1, [src1], #8
+	neg	tmp1, tmp1		/* Bits to alignment -64.  */
+	ldr	data2, [src2], #8
+	mov	tmp2, #~0
+#ifdef __AARCH64EB__
+	/* Big-endian.  Early bytes are at MSB.  */
+	lsl	tmp2, tmp2, tmp1	/* Shift (tmp1 & 63).  */
+#else
+	/* Little-endian.  Early bytes are at LSB.  */
+	lsr	tmp2, tmp2, tmp1	/* Shift (tmp1 & 63).  */
+#endif
+	orr	data1, data1, tmp2
+	orr	data2, data2, tmp2
+	b	L(start_realigned)
+
+L(misaligned8):
+	/* Align SRC1 to 8 bytes and then compare 8 bytes at a time, always
+	   checking to make sure that we don't access beyond page boundary in
+	   SRC2.  */
+	tst	src1, #7
+	b.eq	L(loop_misaligned)
+L(do_misaligned):
+	ldrb	data1w, [src1], #1
+	ldrb	data2w, [src2], #1
+	cmp	data1w, #1
+	ccmp	data1w, data2w, #0, cs	/* NZCV = 0b0000.  */
+	b.ne	L(done)
+	tst	src1, #7
+	b.ne	L(do_misaligned)
+
+L(loop_misaligned):
+	/* Test if we are within the last dword of the end of a 4K page.  If
+	   yes then jump back to the misaligned loop to copy a byte at a time.  */
+	and	tmp1, src2, #0xff8
+	eor	tmp1, tmp1, #0xff8
+	cbz	tmp1, L(do_misaligned)
+	ldr	data1, [src1], #8
+	ldr	data2, [src2], #8
+
+	sub	tmp1, data1, zeroones
+	orr	tmp2, data1, #REP8_7f
+	eor	diff, data1, data2	/* Non-zero if differences found.  */
+	bic	has_nul, tmp1, tmp2	/* Non-zero if NUL terminator.  */
+	orr	syndrome, diff, has_nul
+	cbz	syndrome, L(loop_misaligned)
+	b	L(end)
+
+L(done):
+	sub	result, data1, data2
+	ret
+	.size	strcmp, .-strcmp
@@ -0,0 +1,338 @@
+/****************************************************************************
+ * libs/libc/machine/arm64/gnu/arch_strcpy.S
+ *
+ * Copyright (c) 2013, 2014, 2015 ARM Ltd.
+ *  All rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the company nor the names of its contributors
+ *       may be used to endorse or promote products derived from this
+ *       software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ ****************************************************************************/
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64, unaligned accesses, min page size 4k.
+ */
+
+/* To build as stpcpy, define BUILD_STPCPY before compiling this file.
+
+   To test the page crossing code path more thoroughly, compile with
+   -DSTRCPY_TEST_PAGE_CROSS - this will force all copies through the slower
+   entry path.  This option is not intended for production use.  */
+
+/* Arguments and results.  */
+#define dstin		x0
+#define srcin		x1
+
+/* Locals and temporaries.  */
+#define src		x2
+#define dst		x3
+#define data1		x4
+#define data1w		w4
+#define data2		x5
+#define data2w		w5
+#define has_nul1	x6
+#define has_nul2	x7
+#define tmp1		x8
+#define tmp2		x9
+#define tmp3		x10
+#define tmp4		x11
+#define zeroones	x12
+#define data1a		x13
+#define data2a		x14
+#define pos		x15
+#define len		x16
+#define to_align	x17
+
+#ifdef BUILD_STPCPY
+#define STRCPY stpcpy
+#else
+#define STRCPY strcpy
+#endif
+
+	.macro def_fn f p2align=0
+	.text
+	.p2align \p2align
+	.global \f
+	.type \f, %function
+\f:
+	.endm
+
+	/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
+	   (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
+	   can be done in parallel across the entire word.  */
+
+#define REP8_01 0x0101010101010101
+#define REP8_7f 0x7f7f7f7f7f7f7f7f
+#define REP8_80 0x8080808080808080
+
+	/* AArch64 systems have a minimum page size of 4k.  We can do a quick
+	   page size check for crossing this boundary on entry and if we
+	   do not, then we can short-circuit much of the entry code.  We
+	   expect early page-crossing strings to be rare (probability of
+	   16/MIN_PAGE_SIZE ~= 0.4%), so the branch should be quite
+	   predictable, even with random strings.
+
+	   We don't bother checking for larger page sizes, the cost of setting
+	   up the correct page size is just not worth the extra gain from
+	   a small reduction in the cases taking the slow path.  Note that
+	   we only care about whether the first fetch, which may be
+	   misaligned, crosses a page boundary - after that we move to aligned
+	   fetches for the remainder of the string.  */
+
+#ifdef STRCPY_TEST_PAGE_CROSS
+	/* Make everything that isn't Qword aligned look like a page cross.  */
+#define MIN_PAGE_P2 4
+#else
+#define MIN_PAGE_P2 12
+#endif
+
+#define MIN_PAGE_SIZE (1 << MIN_PAGE_P2)
+
+def_fn STRCPY p2align=6
+	/* For moderately short strings, the fastest way to do the copy is to
+	   calculate the length of the string in the same way as strlen, then
+	   essentially do a memcpy of the result.  This avoids the need for
+	   multiple byte copies and further means that by the time we
+	   reach the bulk copy loop we know we can always use DWord
+	   accesses.  We expect strcpy to rarely be called repeatedly
+	   with the same source string, so branch prediction is likely to
+	   always be difficult - we mitigate against this by preferring
+	   conditional select operations over branches whenever this is
+	   feasible.  */
+	and	tmp2, srcin, #(MIN_PAGE_SIZE - 1)
+	mov	zeroones, #REP8_01
+	and	to_align, srcin, #15
+	cmp	tmp2, #(MIN_PAGE_SIZE - 16)
+	neg	tmp1, to_align
+	/* The first fetch will straddle a (possible) page boundary iff
+	   srcin + 15 causes bit[MIN_PAGE_P2] to change value.  A 16-byte
+	   aligned string will never fail the page align check, so will
+	   always take the fast path.  */
+	b.gt	.Lpage_cross
+
+.Lpage_cross_ok:
+	ldp	data1, data2, [srcin]
+#ifdef __AARCH64EB__
+	/* Because we expect the end to be found within 16 characters
+	   (profiling shows this is the most common case), it's worth
+	   swapping the bytes now to save having to recalculate the
+	   termination syndrome later.  We preserve data1 and data2
+	   so that we can re-use the values later on.  */
+	rev	tmp2, data1
+	sub	tmp1, tmp2, zeroones
+	orr	tmp2, tmp2, #REP8_7f
+	bics	has_nul1, tmp1, tmp2
+	b.ne	.Lfp_le8
+	rev	tmp4, data2
+	sub	tmp3, tmp4, zeroones
+	orr	tmp4, tmp4, #REP8_7f
+#else
+	sub	tmp1, data1, zeroones
+	orr	tmp2, data1, #REP8_7f
+	bics	has_nul1, tmp1, tmp2
+	b.ne	.Lfp_le8
+	sub	tmp3, data2, zeroones
+	orr	tmp4, data2, #REP8_7f
+#endif
+	bics	has_nul2, tmp3, tmp4
+	b.eq	.Lbulk_entry
+
+	/* The string is short (<=16 bytes).  We don't know exactly how
+	   short though, yet.  Work out the exact length so that we can
+	   quickly select the optimal copy strategy.  */
+.Lfp_gt8:
+	rev	has_nul2, has_nul2
+	clz	pos, has_nul2
+	mov	tmp2, #56
+	add	dst, dstin, pos, lsr #3		/* Bits to bytes.  */
+	sub	pos, tmp2, pos
+#ifdef __AARCH64EB__
+	lsr	data2, data2, pos
+#else
+	lsl	data2, data2, pos
+#endif
+	str	data2, [dst, #1]
+	str	data1, [dstin]
+#ifdef BUILD_STPCPY
+	add	dstin, dst, #8
+#endif
+	ret
+
+.Lfp_le8:
+	rev	has_nul1, has_nul1
+	clz	pos, has_nul1
+	add	dst, dstin, pos, lsr #3		/* Bits to bytes.  */
+	subs	tmp2, pos, #24			/* Pos in bits. */
+	b.lt	.Lfp_lt4
+#ifdef __AARCH64EB__
+	mov	tmp2, #56
+	sub	pos, tmp2, pos
+	lsr	data2, data1, pos
+	lsr	data1, data1, #32
+#else
+	lsr	data2, data1, tmp2
+#endif
+	/* 4->7 bytes to copy.  */
+	str	data2w, [dst, #-3]
+	str	data1w, [dstin]
+#ifdef BUILD_STPCPY
+	mov	dstin, dst
+#endif
+	ret
+.Lfp_lt4:
+	cbz	pos, .Lfp_lt2
+	/* 2->3 bytes to copy.  */
+#ifdef __AARCH64EB__
+	lsr	data1, data1, #48
+#endif
+	strh	data1w, [dstin]
+	/* Fall-through, one byte (max) to go.  */
+.Lfp_lt2:
+	/* Null-terminated string.  Last character must be zero!  */
+	strb	wzr, [dst]
+#ifdef BUILD_STPCPY
+	mov	dstin, dst
+#endif
+	ret
+
+	.p2align 6
+	/* Aligning here ensures that the entry code and main loop all lies
+	   within one 64-byte cache line.  */
+.Lbulk_entry:
+	sub	to_align, to_align, #16
+	stp	data1, data2, [dstin]
+	sub	src, srcin, to_align
+	sub	dst, dstin, to_align
+	b	.Lentry_no_page_cross
+
+	/* The inner loop deals with two Dwords at a time.  This has a
+	   slightly higher start-up cost, but we should win quite quickly,
+	   especially on cores with a high number of issue slots per
+	   cycle, as we get much better parallelism out of the operations.  */
+.Lmain_loop:
+	stp	data1, data2, [dst], #16
+.Lentry_no_page_cross:
+	ldp	data1, data2, [src], #16
+	sub	tmp1, data1, zeroones
+	orr	tmp2, data1, #REP8_7f
+	sub	tmp3, data2, zeroones
+	orr	tmp4, data2, #REP8_7f
+	bic	has_nul1, tmp1, tmp2
+	bics	has_nul2, tmp3, tmp4
+	ccmp	has_nul1, #0, #0, eq	/* NZCV = 0000  */
+	b.eq	.Lmain_loop
+
+	/* Since we know we are copying at least 16 bytes, the fastest way
+	   to deal with the tail is to determine the location of the
+	   trailing NUL, then (re)copy the 16 bytes leading up to that.  */
+	cmp	has_nul1, #0
+#ifdef __AARCH64EB__
+	/* For big-endian, carry propagation (if the final byte in the
+	   string is 0x01) means we cannot use has_nul directly.  The
+	   easiest way to get the correct byte is to byte-swap the data
+	   and calculate the syndrome a second time.  */
+	csel	data1, data1, data2, ne
+	rev	data1, data1
+	sub	tmp1, data1, zeroones
+	orr	tmp2, data1, #REP8_7f
+	bic	has_nul1, tmp1, tmp2
+#else
+	csel	has_nul1, has_nul1, has_nul2, ne
+#endif
+	rev	has_nul1, has_nul1
+	clz	pos, has_nul1
+	add	tmp1, pos, #72
+	add	pos, pos, #8
+	csel	pos, pos, tmp1, ne
+	add	src, src, pos, lsr #3
+	add	dst, dst, pos, lsr #3
+	ldp	data1, data2, [src, #-32]
+	stp	data1, data2, [dst, #-16]
+#ifdef BUILD_STPCPY
+	sub	dstin, dst, #1
+#endif
+	ret
+
+.Lpage_cross:
+	bic	src, srcin, #15
+	/* Start by loading two words at [srcin & ~15], then forcing the
+	   bytes that precede srcin to 0xff.  This means they never look
+	   like termination bytes.  */
+	ldp	data1, data2, [src]
+	lsl	tmp1, tmp1, #3	/* Bytes beyond alignment -> bits.  */
+	tst	to_align, #7
+	csetm	tmp2, ne
+#ifdef __AARCH64EB__
+	lsl	tmp2, tmp2, tmp1	/* Shift (tmp1 & 63).  */
+#else
+	lsr	tmp2, tmp2, tmp1	/* Shift (tmp1 & 63).  */
+#endif
+	orr	data1, data1, tmp2
+	orr	data2a, data2, tmp2
+	cmp	to_align, #8
+	csinv	data1, data1, xzr, lt
+	csel	data2, data2, data2a, lt
+	sub	tmp1, data1, zeroones
+	orr	tmp2, data1, #REP8_7f
+	sub	tmp3, data2, zeroones
+	orr	tmp4, data2, #REP8_7f
+	bic	has_nul1, tmp1, tmp2
+	bics	has_nul2, tmp3, tmp4
+	ccmp	has_nul1, #0, #0, eq	/* NZCV = 0000  */
+	b.eq	.Lpage_cross_ok
+	/* We now need to make data1 and data2 look like they've been
+	   loaded directly from srcin.  Do a rotate on the 128-bit value.  */
+	lsl	tmp1, to_align, #3	/* Bytes->bits.  */
+	neg	tmp2, to_align, lsl #3
+#ifdef __AARCH64EB__
+	lsl	data1a, data1, tmp1
+	lsr	tmp4, data2, tmp2
+	lsl	data2, data2, tmp1
+	orr	tmp4, tmp4, data1a
+	cmp	to_align, #8
+	csel	data1, tmp4, data2, lt
+	rev	tmp2, data1
+	rev	tmp4, data2
+	sub	tmp1, tmp2, zeroones
+	orr	tmp2, tmp2, #REP8_7f
+	sub	tmp3, tmp4, zeroones
+	orr	tmp4, tmp4, #REP8_7f
+#else
+	lsr	data1a, data1, tmp1
+	lsl	tmp4, data2, tmp2
+	lsr	data2, data2, tmp1
+	orr	tmp4, tmp4, data1a
+	cmp	to_align, #8
+	csel	data1, tmp4, data2, lt
+	sub	tmp1, data1, zeroones
+	orr	tmp2, data1, #REP8_7f
+	sub	tmp3, data2, zeroones
+	orr	tmp4, data2, #REP8_7f
+#endif
+	bic	has_nul1, tmp1, tmp2
+	cbnz	has_nul1, .Lfp_le8
+	bic	has_nul2, tmp3, tmp4
+	b	.Lfp_gt8
+
+	.size	STRCPY, . - STRCPY
@@ -0,0 +1,242 @@
+/****************************************************************************
+ * libs/libc/machine/arm64/gnu/arch_strlen.S
+ *
+ * Copyright (c) 2013-2015, Linaro Limited
+ *  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *    * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ *    * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ *    * Neither the name of the Linaro nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ ****************************************************************************/
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64, unaligned accesses, min page size 4k.
+ */
+
+/* To test the page crossing code path more thoroughly, compile with
+   -DTEST_PAGE_CROSS - this will force all calls through the slower
+   entry path.  This option is not intended for production use.	 */
+
+/* Arguments and results.  */
+#define srcin		x0
+#define len		x0
+
+/* Locals and temporaries.  */
+#define src		x1
+#define data1		x2
+#define data2		x3
+#define has_nul1	x4
+#define has_nul2	x5
+#define tmp1		x4
+#define tmp2		x5
+#define tmp3		x6
+#define tmp4		x7
+#define zeroones	x8
+
+#define L(l) .L ## l
+
+	.macro def_fn f p2align=0
+	.text
+	.p2align \p2align
+	.global \f
+	.type \f, %function
+\f:
+	.endm
+
+	/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
+	   (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
+	   can be done in parallel across the entire word. A faster check
+	   (X - 1) & 0x80 is zero for non-NUL ASCII characters, but gives
+	   false hits for characters 129..255.	*/
+
+#define REP8_01 0x0101010101010101
+#define REP8_7f 0x7f7f7f7f7f7f7f7f
+#define REP8_80 0x8080808080808080
+
+#ifdef TEST_PAGE_CROSS
+# define MIN_PAGE_SIZE 15
+#else
+# define MIN_PAGE_SIZE 4096
+#endif
+
+	/* Since strings are short on average, we check the first 16 bytes
+	   of the string for a NUL character.  In order to do an unaligned ldp
+	   safely we have to do a page cross check first.  If there is a NUL
+	   byte we calculate the length from the 2 8-byte words using
+	   conditional select to reduce branch mispredictions (it is unlikely
+	   strlen will be repeatedly called on strings with the same length).
+
+	   If the string is longer than 16 bytes, we align src so don't need
+	   further page cross checks, and process 32 bytes per iteration
+	   using the fast NUL check.  If we encounter non-ASCII characters,
+	   fallback to a second loop using the full NUL check.
+
+	   If the page cross check fails, we read 16 bytes from an aligned
+	   address, remove any characters before the string, and continue
+	   in the main loop using aligned loads.  Since strings crossing a
+	   page in the first 16 bytes are rare (probability of
+	   16/MIN_PAGE_SIZE ~= 0.4%), this case does not need to be optimized.
+
+	   AArch64 systems have a minimum page size of 4k.  We don't bother
+	   checking for larger page sizes - the cost of setting up the correct
+	   page size is just not worth the extra gain from a small reduction in
+	   the cases taking the slow path.  Note that we only care about
+	   whether the first fetch, which may be misaligned, crosses a page
+	   boundary.  */
+
+def_fn strlen p2align=6
+	and	tmp1, srcin, MIN_PAGE_SIZE - 1
+	mov	zeroones, REP8_01
+	cmp	tmp1, MIN_PAGE_SIZE - 16
+	b.gt	L(page_cross)
+	ldp	data1, data2, [srcin]
+#ifdef __AARCH64EB__
+	/* For big-endian, carry propagation (if the final byte in the
+	   string is 0x01) means we cannot use has_nul1/2 directly.
+	   Since we expect strings to be small and early-exit,
+	   byte-swap the data now so has_null1/2 will be correct.  */
+	rev	data1, data1
+	rev	data2, data2
+#endif
+	sub	tmp1, data1, zeroones
+	orr	tmp2, data1, REP8_7f
+	sub	tmp3, data2, zeroones
+	orr	tmp4, data2, REP8_7f
+	bics	has_nul1, tmp1, tmp2
+	bic	has_nul2, tmp3, tmp4
+	ccmp	has_nul2, 0, 0, eq
+	beq	L(main_loop_entry)
+
+	/* Enter with C = has_nul1 == 0.  */
+	csel	has_nul1, has_nul1, has_nul2, cc
+	mov	len, 8
+	rev	has_nul1, has_nul1
+	clz	tmp1, has_nul1
+	csel	len, xzr, len, cc
+	add	len, len, tmp1, lsr 3
+	ret
+
+	/* The inner loop processes 32 bytes per iteration and uses the fast
+	   NUL check.  If we encounter non-ASCII characters, use a second
+	   loop with the accurate NUL check.  */
+	.p2align 4
+L(main_loop_entry):
+	bic	src, srcin, 15
+	sub	src, src, 16
+L(main_loop):
+	ldp	data1, data2, [src, 32]!
+.Lpage_cross_entry:
+	sub	tmp1, data1, zeroones
+	sub	tmp3, data2, zeroones
+	orr	tmp2, tmp1, tmp3
+	tst	tmp2, zeroones, lsl 7
+	bne	1f
+	ldp	data1, data2, [src, 16]
+	sub	tmp1, data1, zeroones
+	sub	tmp3, data2, zeroones
+	orr	tmp2, tmp1, tmp3
+	tst	tmp2, zeroones, lsl 7
+	beq	L(main_loop)
+	add	src, src, 16
+1:
+	/* The fast check failed, so do the slower, accurate NUL check.	 */
+	orr	tmp2, data1, REP8_7f
+	orr	tmp4, data2, REP8_7f
+	bics	has_nul1, tmp1, tmp2
+	bic	has_nul2, tmp3, tmp4
+	ccmp	has_nul2, 0, 0, eq
+	beq	L(nonascii_loop)
+
+	/* Enter with C = has_nul1 == 0.  */
+L(tail):
+#ifdef __AARCH64EB__
+	/* For big-endian, carry propagation (if the final byte in the
+	   string is 0x01) means we cannot use has_nul1/2 directly.  The
+	   easiest way to get the correct byte is to byte-swap the data
+	   and calculate the syndrome a second time.  */
+	csel	data1, data1, data2, cc
+	rev	data1, data1
+	sub	tmp1, data1, zeroones
+	orr	tmp2, data1, REP8_7f
+	bic	has_nul1, tmp1, tmp2
+#else
+	csel	has_nul1, has_nul1, has_nul2, cc
+#endif
+	sub	len, src, srcin
+	rev	has_nul1, has_nul1
+	add	tmp2, len, 8
+	clz	tmp1, has_nul1
+	csel	len, len, tmp2, cc
+	add	len, len, tmp1, lsr 3
+	ret
+
+L(nonascii_loop):
+	ldp	data1, data2, [src, 16]!
+	sub	tmp1, data1, zeroones
+	orr	tmp2, data1, REP8_7f
+	sub	tmp3, data2, zeroones
+	orr	tmp4, data2, REP8_7f
+	bics	has_nul1, tmp1, tmp2
+	bic	has_nul2, tmp3, tmp4
+	ccmp	has_nul2, 0, 0, eq
+	bne	L(tail)
+	ldp	data1, data2, [src, 16]!
+	sub	tmp1, data1, zeroones
+	orr	tmp2, data1, REP8_7f
+	sub	tmp3, data2, zeroones
+	orr	tmp4, data2, REP8_7f
+	bics	has_nul1, tmp1, tmp2
+	bic	has_nul2, tmp3, tmp4
+	ccmp	has_nul2, 0, 0, eq
+	beq	L(nonascii_loop)
+	b	L(tail)
+
+	/* Load 16 bytes from [srcin & ~15] and force the bytes that precede
+	   srcin to 0x7f, so we ignore any NUL bytes before the string.
+	   Then continue in the aligned loop.  */
+L(page_cross):
+	bic	src, srcin, 15
+	ldp	data1, data2, [src]
+	lsl	tmp1, srcin, 3
+	mov	tmp4, -1
+#ifdef __AARCH64EB__
+	/* Big-endian.	Early bytes are at MSB.	 */
+	lsr	tmp1, tmp4, tmp1	/* Shift (tmp1 & 63).  */
+#else
+	/* Little-endian.  Early bytes are at LSB.  */
+	lsl	tmp1, tmp4, tmp1	/* Shift (tmp1 & 63).  */
+#endif
+	orr	tmp1, tmp1, REP8_80
+	orn	data1, data1, tmp1
+	orn	tmp2, data2, tmp1
+	tst	srcin, 8
+	csel	data1, data1, tmp4, eq
+	csel	data2, data2, tmp2, eq
+	b	L(page_cross_entry)
+
+	.size	strlen, . - strlen
@@ -0,0 +1,294 @@
+/****************************************************************************
+ * libs/libc/machine/arm64/gnu/arch_strncmp.S
+ *
+ * Copyright (c) 2013, 2018, Linaro Limited
+ *  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *    * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ *    * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ *    * Neither the name of the Linaro nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ ****************************************************************************/
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64
+ */
+
+	.macro def_fn f p2align=0
+	.text
+	.p2align \p2align
+	.global \f
+	.type \f, %function
+\f:
+	.endm
+
+#define REP8_01 0x0101010101010101
+#define REP8_7f 0x7f7f7f7f7f7f7f7f
+#define REP8_80 0x8080808080808080
+
+/* Parameters and result.  */
+#define src1		x0
+#define src2		x1
+#define limit		x2
+#define result		x0
+
+/* Internal variables.  */
+#define data1		x3
+#define data1w		w3
+#define data2		x4
+#define data2w		w4
+#define has_nul		x5
+#define diff		x6
+#define syndrome	x7
+#define tmp1		x8
+#define tmp2		x9
+#define tmp3		x10
+#define zeroones	x11
+#define pos		x12
+#define limit_wd	x13
+#define mask		x14
+#define endloop		x15
+#define count		mask
+
+	.text
+	.p2align 6
+	.rep 7
+	nop	/* Pad so that the loop below fits a cache line.  */
+	.endr
+def_fn strncmp
+	cbz	limit, .Lret0
+	eor	tmp1, src1, src2
+	mov	zeroones, #REP8_01
+	tst	tmp1, #7
+	and	count, src1, #7
+	b.ne	.Lmisaligned8
+	cbnz	count, .Lmutual_align
+	/* Calculate the number of full and partial words -1.  */
+	sub	limit_wd, limit, #1	/* limit != 0, so no underflow.  */
+	lsr	limit_wd, limit_wd, #3	/* Convert to Dwords.  */
+
+	/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
+	   (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
+	   can be done in parallel across the entire word.  */
+	/* Start of performance-critical section  -- one 64B cache line.  */
+.Lloop_aligned:
+	ldr	data1, [src1], #8
+	ldr	data2, [src2], #8
+.Lstart_realigned:
+	subs	limit_wd, limit_wd, #1
+	sub	tmp1, data1, zeroones
+	orr	tmp2, data1, #REP8_7f
+	eor	diff, data1, data2	/* Non-zero if differences found.  */
+	csinv	endloop, diff, xzr, pl	/* Last Dword or differences.  */
+	bics	has_nul, tmp1, tmp2	/* Non-zero if NUL terminator.  */
+	ccmp	endloop, #0, #0, eq
+	b.eq	.Lloop_aligned
+	/* End of performance-critical section  -- one 64B cache line.  */
+
+	/* Not reached the limit, must have found the end or a diff.  */
+	tbz	limit_wd, #63, .Lnot_limit
+
+	/* Limit % 8 == 0 => all bytes significant.  */
+	ands	limit, limit, #7
+	b.eq	.Lnot_limit
+
+	lsl	limit, limit, #3	/* Bits -> bytes.  */
+	mov	mask, #~0
+#ifdef __AARCH64EB__
+	lsr	mask, mask, limit
+#else
+	lsl	mask, mask, limit
+#endif
+	bic	data1, data1, mask
+	bic	data2, data2, mask
+
+	/* Make sure that the NUL byte is marked in the syndrome.  */
+	orr	has_nul, has_nul, mask
+
+.Lnot_limit:
+	orr	syndrome, diff, has_nul
+
+#ifndef	__AARCH64EB__
+	rev	syndrome, syndrome
+	rev	data1, data1
+	/* The MS-non-zero bit of the syndrome marks either the first bit
+	   that is different, or the top bit of the first zero byte.
+	   Shifting left now will bring the critical information into the
+	   top bits.  */
+	clz	pos, syndrome
+	rev	data2, data2
+	lsl	data1, data1, pos
+	lsl	data2, data2, pos
+	/* But we need to zero-extend (char is unsigned) the value and then
+	   perform a signed 32-bit subtraction.  */
+	lsr	data1, data1, #56
+	sub	result, data1, data2, lsr #56
+	ret
+#else
+	/* For big-endian we cannot use the trick with the syndrome value
+	   as carry-propagation can corrupt the upper bits if the trailing
+	   bytes in the string contain 0x01.  */
+	/* However, if there is no NUL byte in the dword, we can generate
+	   the result directly.  We can't just subtract the bytes as the
+	   MSB might be significant.  */
+	cbnz	has_nul, 1f
+	cmp	data1, data2
+	cset	result, ne
+	cneg	result, result, lo
+	ret
+1:
+	/* Re-compute the NUL-byte detection, using a byte-reversed value.  */
+	rev	tmp3, data1
+	sub	tmp1, tmp3, zeroones
+	orr	tmp2, tmp3, #REP8_7f
+	bic	has_nul, tmp1, tmp2
+	rev	has_nul, has_nul
+	orr	syndrome, diff, has_nul
+	clz	pos, syndrome
+	/* The MS-non-zero bit of the syndrome marks either the first bit
+	   that is different, or the top bit of the first zero byte.
+	   Shifting left now will bring the critical information into the
+	   top bits.  */
+	lsl	data1, data1, pos
+	lsl	data2, data2, pos
+	/* But we need to zero-extend (char is unsigned) the value and then
+	   perform a signed 32-bit subtraction.  */
+	lsr	data1, data1, #56
+	sub	result, data1, data2, lsr #56
+	ret
+#endif
+
+.Lmutual_align:
+	/* Sources are mutually aligned, but are not currently at an
+	   alignment boundary.  Round down the addresses and then mask off
+	   the bytes that precede the start point.
+	   We also need to adjust the limit calculations, but without
+	   overflowing if the limit is near ULONG_MAX.  */
+	bic	src1, src1, #7
+	bic	src2, src2, #7
+	ldr	data1, [src1], #8
+	neg	tmp3, count, lsl #3	/* 64 - bits(bytes beyond align). */
+	ldr	data2, [src2], #8
+	mov	tmp2, #~0
+	sub	limit_wd, limit, #1	/* limit != 0, so no underflow.  */
+#ifdef __AARCH64EB__
+	/* Big-endian.  Early bytes are at MSB.  */
+	lsl	tmp2, tmp2, tmp3	/* Shift (count & 63).  */
+#else
+	/* Little-endian.  Early bytes are at LSB.  */
+	lsr	tmp2, tmp2, tmp3	/* Shift (count & 63).  */
+#endif
+	and	tmp3, limit_wd, #7
+	lsr	limit_wd, limit_wd, #3
+	/* Adjust the limit. Only low 3 bits used, so overflow irrelevant.  */
+	add	limit, limit, count
+	add	tmp3, tmp3, count
+	orr	data1, data1, tmp2
+	orr	data2, data2, tmp2
+	add	limit_wd, limit_wd, tmp3, lsr #3
+	b	.Lstart_realigned
+
+	.p2align 6
+	/* Don't bother with dwords for up to 16 bytes.  */
+.Lmisaligned8:
+	cmp	limit, #16
+	b.hs	.Ltry_misaligned_words
+
+.Lbyte_loop:
+	/* Perhaps we can do better than this.  */
+	ldrb	data1w, [src1], #1
+	ldrb	data2w, [src2], #1
+	subs	limit, limit, #1
+	ccmp	data1w, #1, #0, hi	/* NZCV = 0b0000.  */
+	ccmp	data1w, data2w, #0, cs	/* NZCV = 0b0000.  */
+	b.eq	.Lbyte_loop
+.Ldone:
+	sub	result, data1, data2
+	ret
+	/* Align the SRC1 to a dword by doing a bytewise compare and then do
+	   the dword loop.  */
+.Ltry_misaligned_words:
+	lsr	limit_wd, limit, #3
+	cbz	count, .Ldo_misaligned
+
+	neg	count, count
+	and	count, count, #7
+	sub	limit, limit, count
+	lsr	limit_wd, limit, #3
+
+.Lpage_end_loop:
+	ldrb	data1w, [src1], #1
+	ldrb	data2w, [src2], #1
+	cmp	data1w, #1
+	ccmp	data1w, data2w, #0, cs	/* NZCV = 0b0000.  */
+	b.ne	.Ldone
+	subs	count, count, #1
+	b.hi	.Lpage_end_loop
+
+.Ldo_misaligned:
+	/* Prepare ourselves for the next page crossing.  Unlike the aligned
+	   loop, we fetch 1 less dword because we risk crossing bounds on
+	   SRC2.  */
+	mov	count, #8
+	subs	limit_wd, limit_wd, #1
+	b.lo	.Ldone_loop
+.Lloop_misaligned:
+	and	tmp2, src2, #0xff8
+	eor	tmp2, tmp2, #0xff8
+	cbz	tmp2, .Lpage_end_loop
+
+	ldr	data1, [src1], #8
+	ldr	data2, [src2], #8
+	sub	tmp1, data1, zeroones
+	orr	tmp2, data1, #REP8_7f
+	eor	diff, data1, data2	/* Non-zero if differences found.  */
+	bics	has_nul, tmp1, tmp2	/* Non-zero if NUL terminator.  */
+	ccmp	diff, #0, #0, eq
+	b.ne	.Lnot_limit
+	subs	limit_wd, limit_wd, #1
+	b.pl	.Lloop_misaligned
+
+.Ldone_loop:
+	/* We found a difference or a NULL before the limit was reached.  */
+	and	limit, limit, #7
+	cbz	limit, .Lnot_limit
+	/* Read the last word.  */
+	sub	src1, src1, 8
+	sub	src2, src2, 8
+	ldr	data1, [src1, limit]
+	ldr	data2, [src2, limit]
+	sub	tmp1, data1, zeroones
+	orr	tmp2, data1, #REP8_7f
+	eor	diff, data1, data2	/* Non-zero if differences found.  */
+	bics	has_nul, tmp1, tmp2	/* Non-zero if NUL terminator.  */
+	ccmp	diff, #0, #0, eq
+	b.ne	.Lnot_limit
+
+.Lret0:
+	mov	result, #0
+	ret
+	.size strncmp, . - strncmp
@@ -0,0 +1,188 @@
+/****************************************************************************
+ * libs/libc/machine/arm64/gnu/arch_strnlen.S
+ *
+ * Copyright (c) 2013, Linaro Limited
+ *  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *    * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ *    * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ *    * Neither the name of the Linaro nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ ****************************************************************************/
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64
+ */
+
+/* Arguments and results.  */
+#define srcin		x0
+#define len		x0
+#define limit		x1
+
+/* Locals and temporaries.  */
+#define src		x2
+#define data1		x3
+#define data2		x4
+#define data2a		x5
+#define has_nul1	x6
+#define has_nul2	x7
+#define tmp1		x8
+#define tmp2		x9
+#define tmp3		x10
+#define tmp4		x11
+#define zeroones	x12
+#define pos		x13
+#define limit_wd	x14
+
+	.macro def_fn f p2align=0
+	.text
+	.p2align \p2align
+	.global \f
+	.type \f, %function
+\f:
+	.endm
+
+#define REP8_01 0x0101010101010101
+#define REP8_7f 0x7f7f7f7f7f7f7f7f
+#define REP8_80 0x8080808080808080
+
+	.text
+	.p2align	6
+.Lstart:
+	/* Pre-pad to ensure critical loop begins an icache line.  */
+	.rep 7
+	nop
+	.endr
+	/* Put this code here to avoid wasting more space with pre-padding.  */
+.Lhit_limit:
+	mov	len, limit
+	ret
+
+def_fn strnlen
+	cbz	limit, .Lhit_limit
+	mov	zeroones, #REP8_01
+	bic	src, srcin, #15
+	ands	tmp1, srcin, #15
+	b.ne	.Lmisaligned
+	/* Calculate the number of full and partial words -1.  */
+	sub	limit_wd, limit, #1	/* Limit != 0, so no underflow.  */
+	lsr	limit_wd, limit_wd, #4	/* Convert to Qwords.  */
+
+	/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
+	   (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
+	   can be done in parallel across the entire word.  */
+	/* The inner loop deals with two Dwords at a time.  This has a
+	   slightly higher start-up cost, but we should win quite quickly,
+	   especially on cores with a high number of issue slots per
+	   cycle, as we get much better parallelism out of the operations.  */
+
+	/* Start of critial section -- keep to one 64Byte cache line.  */
+.Lloop:
+	ldp	data1, data2, [src], #16
+.Lrealigned:
+	sub	tmp1, data1, zeroones
+	orr	tmp2, data1, #REP8_7f
+	sub	tmp3, data2, zeroones
+	orr	tmp4, data2, #REP8_7f
+	bic	has_nul1, tmp1, tmp2
+	bic	has_nul2, tmp3, tmp4
+	subs	limit_wd, limit_wd, #1
+	orr	tmp1, has_nul1, has_nul2
+	ccmp	tmp1, #0, #0, pl	/* NZCV = 0000  */
+	b.eq	.Lloop
+	/* End of critical section -- keep to one 64Byte cache line.  */
+
+	orr	tmp1, has_nul1, has_nul2
+	cbz	tmp1, .Lhit_limit	/* No null in final Qword.  */
+
+	/* We know there's a null in the final Qword.  The easiest thing
+	   to do now is work out the length of the string and return
+	   MIN (len, limit).  */
+
+	sub	len, src, srcin
+	cbz	has_nul1, .Lnul_in_data2
+#ifdef __AARCH64EB__
+	mov	data2, data1
+#endif
+	sub	len, len, #8
+	mov	has_nul2, has_nul1
+.Lnul_in_data2:
+#ifdef __AARCH64EB__
+	/* For big-endian, carry propagation (if the final byte in the
+	   string is 0x01) means we cannot use has_nul directly.  The
+	   easiest way to get the correct byte is to byte-swap the data
+	   and calculate the syndrome a second time.  */
+	rev	data2, data2
+	sub	tmp1, data2, zeroones
+	orr	tmp2, data2, #REP8_7f
+	bic	has_nul2, tmp1, tmp2
+#endif
+	sub	len, len, #8
+	rev	has_nul2, has_nul2
+	clz	pos, has_nul2
+	add	len, len, pos, lsr #3		/* Bits to bytes.  */
+	cmp	len, limit
+	csel	len, len, limit, ls		/* Return the lower value.  */
+	ret
+
+.Lmisaligned:
+	/* Deal with a partial first word.
+	   We're doing two things in parallel here;
+	   1) Calculate the number of words (but avoiding overflow if
+	      limit is near ULONG_MAX) - to do this we need to work out
+	      limit + tmp1 - 1 as a 65-bit value before shifting it;
+	   2) Load and mask the initial data words - we force the bytes
+	      before the ones we are interested in to 0xff - this ensures
+	      early bytes will not hit any zero detection.  */
+	sub	limit_wd, limit, #1
+	neg	tmp4, tmp1
+	cmp	tmp1, #8
+
+	and	tmp3, limit_wd, #15
+	lsr	limit_wd, limit_wd, #4
+	mov	tmp2, #~0
+
+	ldp	data1, data2, [src], #16
+	lsl	tmp4, tmp4, #3		/* Bytes beyond alignment -> bits.  */
+	add	tmp3, tmp3, tmp1
+
+#ifdef __AARCH64EB__
+	/* Big-endian.  Early bytes are at MSB.  */
+	lsl	tmp2, tmp2, tmp4	/* Shift (tmp1 & 63).  */
+#else
+	/* Little-endian.  Early bytes are at LSB.  */
+	lsr	tmp2, tmp2, tmp4	/* Shift (tmp1 & 63).  */
+#endif
+	add	limit_wd, limit_wd, tmp3, lsr #4
+
+	orr	data1, data1, tmp2
+	orr	data2a, data2, tmp2
+
+	csinv	data1, data1, xzr, le
+	csel	data2, data2, data2a, le
+	b	.Lrealigned
+	.size	strnlen, . - .Lstart	/* Include pre-padding in size.  */
@@ -0,0 +1,179 @@
+/****************************************************************************
+ * libs/libc/machine/arm64/gnu/arch_strrchr.S
+ *
+ * Copyright (c) 2014, ARM Limited
+ * All rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the company nor the names of its contributors
+ *       may be used to endorse or promote products derived from this
+ *       software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ ****************************************************************************/
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64
+ * Neon Available.
+ */
+
+/* Arguments and results.  */
+#define srcin		x0
+#define chrin		w1
+
+#define result		x0
+
+#define src		x2
+#define	tmp1		x3
+#define wtmp2		w4
+#define tmp3		x5
+#define src_match	x6
+#define src_offset	x7
+#define const_m1	x8
+#define tmp4		x9
+#define nul_match	x10
+#define chr_match	x11
+
+#define vrepchr		v0
+#define vdata1		v1
+#define vdata2		v2
+#define vhas_nul1	v3
+#define vhas_nul2	v4
+#define vhas_chr1	v5
+#define vhas_chr2	v6
+#define vrepmask_0	v7
+#define vrepmask_c	v16
+#define vend1		v17
+#define vend2		v18
+
+/* Core algorithm.
+
+   For each 32-byte hunk we calculate a 64-bit syndrome value, with
+   two bits per byte (LSB is always in bits 0 and 1, for both big
+   and little-endian systems).  For each tuple, bit 0 is set iff
+   the relevant byte matched the requested character; bit 1 is set
+   iff the relevant byte matched the NUL end of string (we trigger
+   off bit0 for the special case of looking for NUL).  Since the bits
+   in the syndrome reflect exactly the order in which things occur
+   in the original string a count_trailing_zeros() operation will
+   identify exactly which byte is causing the termination, and why.  */
+
+/* Locals and temporaries.  */
+
+	.macro def_fn f p2align=0
+	.text
+	.p2align \p2align
+	.global \f
+	.type \f, %function
+\f:
+	.endm
+
+def_fn strrchr
+	/* Magic constant 0x40100401 to allow us to identify which lane
+	   matches the requested byte.  Magic constant 0x80200802 used
+	   similarly for NUL termination.  */
+	mov	wtmp2, #0x0401
+	movk	wtmp2, #0x4010, lsl #16
+	dup	vrepchr.16b, chrin
+	bic	src, srcin, #31		/* Work with aligned 32-byte hunks.  */
+	dup	vrepmask_c.4s, wtmp2
+	mov	src_offset, #0
+	ands	tmp1, srcin, #31
+	add	vrepmask_0.4s, vrepmask_c.4s, vrepmask_c.4s /* equiv: lsl #1 */
+	b.eq	.Laligned
+
+	/* Input string is not 32-byte aligned.  Rather than forcing
+	   the padding bytes to a safe value, we calculate the syndrome
+	   for all the bytes, but then mask off those bits of the
+	   syndrome that are related to the padding.  */
+	ld1	{vdata1.16b, vdata2.16b}, [src], #32
+	neg	tmp1, tmp1
+	cmeq	vhas_nul1.16b, vdata1.16b, #0
+	cmeq	vhas_chr1.16b, vdata1.16b, vrepchr.16b
+	cmeq	vhas_nul2.16b, vdata2.16b, #0
+	cmeq	vhas_chr2.16b, vdata2.16b, vrepchr.16b
+	and	vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b
+	and	vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b
+	and	vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b
+	and	vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b
+	addp	vhas_nul1.16b, vhas_nul1.16b, vhas_nul2.16b	// 256->128
+	addp	vhas_chr1.16b, vhas_chr1.16b, vhas_chr2.16b	// 256->128
+	addp	vhas_nul1.16b, vhas_nul1.16b, vhas_nul1.16b	// 128->64
+	addp	vhas_chr1.16b, vhas_chr1.16b, vhas_chr1.16b	// 128->64
+	mov	nul_match, vhas_nul1.2d[0]
+	lsl	tmp1, tmp1, #1
+	mov	const_m1, #~0
+	mov	chr_match, vhas_chr1.2d[0]
+	lsr	tmp3, const_m1, tmp1
+
+	bic	nul_match, nul_match, tmp3	// Mask padding bits.
+	bic	chr_match, chr_match, tmp3	// Mask padding bits.
+	cbnz	nul_match, .Ltail
+
+.Lloop:
+	cmp	chr_match, #0
+	csel	src_match, src, src_match, ne
+	csel	src_offset, chr_match, src_offset, ne
+.Laligned:
+	ld1	{vdata1.16b, vdata2.16b}, [src], #32
+	cmeq	vhas_nul1.16b, vdata1.16b, #0
+	cmeq	vhas_chr1.16b, vdata1.16b, vrepchr.16b
+	cmeq	vhas_nul2.16b, vdata2.16b, #0
+	cmeq	vhas_chr2.16b, vdata2.16b, vrepchr.16b
+	addp	vend1.16b, vhas_nul1.16b, vhas_nul2.16b	// 256->128
+	and	vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b
+	and	vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b
+	addp	vhas_chr1.16b, vhas_chr1.16b, vhas_chr2.16b	// 256->128
+	addp	vend1.16b, vend1.16b, vend1.16b	// 128->64
+	addp	vhas_chr1.16b, vhas_chr1.16b, vhas_chr1.16b	// 128->64
+	mov	nul_match, vend1.2d[0]
+	mov	chr_match, vhas_chr1.2d[0]
+	cbz	nul_match, .Lloop
+
+	and	vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b
+	and	vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b
+	addp	vhas_nul1.16b, vhas_nul1.16b, vhas_nul2.16b
+	addp	vhas_nul1.16b, vhas_nul1.16b, vhas_nul1.16b
+	mov	nul_match, vhas_nul1.2d[0]
+
+.Ltail:
+	/* Work out exactly where the string ends.  */
+	sub	tmp4, nul_match, #1
+	eor	tmp4, tmp4, nul_match
+	ands	chr_match, chr_match, tmp4
+	/* And pick the values corresponding to the last match.  */
+	csel	src_match, src, src_match, ne
+	csel	src_offset, chr_match, src_offset, ne
+
+	/* Count down from the top of the syndrome to find the last match.  */
+	clz	tmp3, src_offset
+	/* Src_match points beyond the word containing the match, so we can
+	   simply subtract half the bit-offset into the syndrome.  Because
+	   we are counting down, we need to go back one more character.  */
+	add	tmp3, tmp3, #2
+	sub	result, src_match, tmp3, lsr #1
+	/* But if the syndrome shows no match was found, then return NULL.  */
+	cmp	src_offset, #0
+	csel	result, result, xzr, ne
+
+	ret
+
+	.size	strrchr, . - strrchr
@@ -44,6 +44,7 @@
 *
 ****************************************************************************/

+#ifndef CONFIG_LIBC_ARCH_STRCHRNUL
 FAR char *strchrnul(FAR const char *s, int c)
 {
  if (s)
@@ -56,3 +57,4 @@ FAR char *strchrnul(FAR const char *s, int c)

  return (FAR char *)s;
 }
+#endif
@@ -30,7 +30,7 @@
 * Public Functions
 ****************************************************************************/

-#ifndef CONFIG_ARCH_STRNCMP
+#ifndef CONFIG_LIBC_ARCH_STRNCMP
 #undef strncmp /* See mm/README.txt */
 int strncmp(FAR const char *cs, FAR const char *ct, size_t nb)
 {
@@ -34,6 +34,7 @@
 * occurrence of the character c in the string s.
 */

+#ifndef CONFIG_LIBC_ARCH_STRRCHR
 #undef strrchr /* See mm/README.txt */
 FAR char *strrchr(FAR const char *s, int c)
 {
@@ -50,3 +51,4 @@ FAR char *strrchr(FAR const char *s, int c)

  return (FAR char *)r;
 }
+#endif