diff --git a/libs/libc/machine/arm/armv7-a/Kconfig b/libs/libc/machine/arm/armv7-a/Kconfig
index 64a769aa967..2884836c637 100644
--- a/libs/libc/machine/arm/armv7-a/Kconfig
+++ b/libs/libc/machine/arm/armv7-a/Kconfig
@@ -3,9 +3,62 @@
 # see the file kconfig-language.txt in the NuttX tools repository.
 #
 
+config ARMV7A_MEMCHR
+	bool "Enable optimized memchr() for ARMv7-A"
+	default n
+	select MACHINE_OPTS_ARMV7A
+	select LIBC_ARCH_MEMCHR
+	depends on ARCH_TOOLCHAIN_GNU
+	depends on ARM_THUMB
+	---help---
+		Enable optimized ARMv7-A specific memchr() library function
+
 config ARMV7A_MEMCPY
 	bool "Enable optimized memcpy() for ARMv7-A"
 	select LIBC_ARCH_MEMCPY
 	depends on ARCH_TOOLCHAIN_GNU
+	depends on ARM_THUMB
 	---help---
 		Enable optimized ARMv7-A specific memcpy() library function
+
+config ARMV7A_MEMMOVE
+	bool "Enable optimized memmove() for ARMv7-A"
+	default n
+	select MACHINE_OPTS_ARMV7A
+	select LIBC_ARCH_MEMMOVE
+	depends on ARCH_TOOLCHAIN_GNU
+	depends on ARM_THUMB
+	---help---
+		Enable optimized ARMv7-A specific memmove() library function
+
+config ARMV7A_MEMSET
+	bool "Enable optimized memset() for ARMv7-A"
+	default n
+	select MACHINE_OPTS_ARMV7A
+	select LIBC_ARCH_MEMSET
+	depends on ARCH_TOOLCHAIN_GNU
+	---help---
+		Enable optimized ARMv7-A specific memset() library function
+
+config ARMV7A_STRCMP
+	bool "Enable optimized strcmp() for ARMv7-A"
+	default n
+	select MACHINE_OPTS_ARMV7A
+	select LIBC_ARCH_STRCMP
+	depends on ARCH_TOOLCHAIN_GNU
+	---help---
+		Enable optimized ARMv7-A specific strcmp() library function
+
+config ARMV7A_STRLEN
+	bool "Enable optimized strlen() for ARMv7-A"
+	default n
+	select MACHINE_OPTS_ARMV7A
+	select LIBC_ARCH_STRLEN
+	depends on ARCH_TOOLCHAIN_GNU
+	depends on ARM_THUMB
+	---help---
+		Enable optimized ARMv7-A specific strlen() library function
+
+config MACHINE_OPTS_ARMV7A
+	bool
+	default n
diff --git a/libs/libc/machine/arm/armv7-a/Make.defs b/libs/libc/machine/arm/armv7-a/Make.defs
index 64fb84798c3..324851834e8 100644
--- a/libs/libc/machine/arm/armv7-a/Make.defs
+++ b/libs/libc/machine/arm/armv7-a/Make.defs
@@ -33,13 +33,33 @@
 #
 ############################################################################
 
+ifeq ($(CONFIG_ARMV7A_MEMCHR),y)
+ASRCS += arch_memchr.S
+endif
+
 ifeq ($(CONFIG_ARMV7A_MEMCPY),y)
-
 ASRCS += arch_memcpy.S
+endif
 
+ifeq ($(CONFIG_ARMV7A_MEMMOVE),y)
+ASRCS += arch_memmove.S
+endif
+
+ifeq ($(CONFIG_ARMV7A_MEMSET),y)
+ASRCS += arch_memset.S
+endif
+
+ifeq ($(CONFIG_ARMV7A_STRCMP),y)
+ASRCS += arch_strcmp.S
+endif
+
+ifeq ($(CONFIG_ARMV7A_STRLEN),y)
+ASRCS += arch_strlen.S
+endif
+
+ifeq ($(CONFIG_MACHINE_OPTS_ARMV7A),y)
 DEPPATH += --dep-path machine/arm/armv7-a/gnu
 VPATH += :machine/arm/armv7-a/gnu
-
 endif
 
 ifeq ($(CONFIG_LIBC_ARCH_ELF),y)
diff --git a/libs/libc/machine/arm/armv7-a/gnu/acle-compat.h b/libs/libc/machine/arm/armv7-a/gnu/acle-compat.h
new file mode 100644
index 00000000000..beb6df9dd3d
--- /dev/null
+++ b/libs/libc/machine/arm/armv7-a/gnu/acle-compat.h
@@ -0,0 +1,181 @@
+/*
+ * Copyright (c) 2014 ARM Ltd
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the company may not be used to endorse or promote
+ *    products derived from this software without specific prior written
+ *    permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __ARM_ARCH
+
+/* ACLE standardises a set of pre-defines that describe the ARM architecture.
+   These were mostly implemented in GCC around GCC-4.8; older versions
+   have no, or only partial support.  To provide a level of backwards
+   compatibility we try to work out what the definitions should be, given
+   the older pre-defines that GCC did produce.  This isn't complete, but
+   it should be enough for use by routines that depend on this header.  */
+
+/* No need to handle ARMv8, GCC had ACLE support before that.  */
+
+# ifdef __ARM_ARCH_7__
+/* The common subset of ARMv7 in all profiles.  */
+#  define __ARM_ARCH 7
+#  define __ARM_ARCH_ISA_THUMB 2
+#  define __ARM_FEATURE_CLZ
+#  define __ARM_FEATURE_LDREX 7
+#  define __ARM_FEATURE_UNALIGNED
+# endif
+
+# if defined (__ARM_ARCH_7A__) || defined (__ARM_ARCH_7R__)
+#  define __ARM_ARCH 7
+#  define __ARM_ARCH_ISA_THUMB 2
+#  define __ARM_ARCH_ISA_ARM
+#  define __ARM_FEATURE_CLZ
+#  define __ARM_FEATURE_SIMD32
+#  define __ARM_FEATURE_DSP
+#  define __ARM_FEATURE_QBIT
+#  define __ARM_FEATURE_SAT
+#  define __ARM_FEATURE_LDREX 15
+#  define __ARM_FEATURE_UNALIGNED
+#  ifdef __ARM_ARCH_7A__
+#   define __ARM_ARCH_PROFILE 'A'
+#  else
+#   define __ARM_ARCH_PROFILE 'R'
+#  endif
+# endif
+
+# ifdef __ARM_ARCH_7EM__
+#  define __ARM_ARCH 7
+#  define __ARM_ARCH_ISA_THUMB 2
+#  define __ARM_FEATURE_CLZ
+#  define __ARM_FEATURE_SIMD32
+#  define __ARM_FEATURE_DSP
+#  define __ARM_FEATURE_QBIT
+#  define __ARM_FEATURE_SAT
+#  define __ARM_FEATURE_LDREX 7
+#  define __ARM_FEATURE_UNALIGNED
+#  define __ARM_ARCH_PROFILE 'M'
+# endif
+
+# ifdef __ARM_ARCH_7M__
+#  define __ARM_ARCH 7
+#  define __ARM_ARCH_ISA_THUMB 2
+#  define __ARM_FEATURE_CLZ
+#  define __ARM_FEATURE_QBIT
+#  define __ARM_FEATURE_SAT
+#  define __ARM_FEATURE_LDREX 7
+#  define __ARM_FEATURE_UNALIGNED
+#  define __ARM_ARCH_PROFILE 'M'
+# endif
+
+# ifdef __ARM_ARCH_6T2__
+#  define __ARM_ARCH 6
+#  define __ARM_ARCH_ISA_THUMB 2
+#  define __ARM_ARCH_ISA_ARM
+#  define __ARM_FEATURE_CLZ
+#  define __ARM_FEATURE_SIMD32
+#  define __ARM_FEATURE_DSP
+#  define __ARM_FEATURE_QBIT
+#  define __ARM_FEATURE_SAT
+#  define __ARM_FEATURE_LDREX 4
+#  define __ARM_FEATURE_UNALIGNED
+# endif
+
+# ifdef __ARM_ARCH_6M__
+#  define __ARM_ARCH 6
+#  define __ARM_ARCH_ISA_THUMB 1
+#  define __ARM_ARCH_PROFILE 'M'
+# endif
+
+# if defined (__ARM_ARCH_6__) || defined (__ARM_ARCH_6J__) \
+  || defined (__ARM_ARCH_6K__) || defined (__ARM_ARCH_6Z__) \
+  || defined (__ARM_ARCH_6ZK__)
+#  define __ARM_ARCH 6
+#  define __ARM_ARCH_ISA_THUMB 1
+#  define __ARM_ARCH_ISA_ARM
+#  define __ARM_FEATURE_CLZ
+#  define __ARM_FEATURE_SIMD32
+#  define __ARM_FEATURE_DSP
+#  define __ARM_FEATURE_QBIT
+#  define __ARM_FEATURE_SAT
+#  define __ARM_FEATURE_UNALIGNED
+#  ifndef __thumb__
+#   if defined (__ARM_ARCH_6K__) || defined (__ARM_ARCH_6ZK__)
+#    define __ARM_FEATURE_LDREX 15
+#   else
+#    define __ARM_FEATURE_LDREX 4
+#   endif
+#  endif
+# endif
+
+# if defined (__ARM_ARCH_5TE__) || defined (__ARM_ARCH_5E__)
+#  define __ARM_ARCH 5
+#  define __ARM_ARCH_ISA_ARM
+#  ifdef __ARM_ARCH_5TE__
+#   define __ARM_ARCH_ISA_THUMB 1
+#  endif
+#  define __ARM_FEATURE_CLZ
+#  define __ARM_FEATURE_DSP
+# endif
+
+# if defined (__ARM_ARCH_5T__) || defined (__ARM_ARCH_5__)
+#  define __ARM_ARCH 5
+#  define __ARM_ARCH_ISA_ARM
+#  ifdef __ARM_ARCH_5TE__
+#   define __ARM_ARCH_ISA_THUMB 1
+#  endif
+#  define __ARM_FEATURE_CLZ
+# endif
+
+# ifdef __ARM_ARCH_4T__
+#  define __ARM_ARCH 4
+#  define __ARM_ARCH_ISA_ARM
+#  define __ARM_ARCH_ISA_THUMB 1
+# endif
+
+# ifdef __ARM_ARCH_4__
+#  define __ARM_ARCH 4
+#  define __ARM_ARCH_ISA_ARM
+# endif
+
+# if defined (__ARM_ARCH_3__) || defined (__ARM_ARCH_3M__)
+#  define __ARM_ARCH 3
+#  define __ARM_ARCH_ISA_ARM
+# endif
+
+# ifdef __ARM_ARCH_2__
+#  define __ARM_ARCH 2
+#  define __ARM_ARCH_ISA_ARM
+# endif
+
+# ifdef __ARMEB__
+#  define __ARM_BIG_ENDIAN
+# endif
+
+/* If we still don't know what the target architecture is, then we're
+   probably not using GCC.  */
+# ifndef __ARM_ARCH
+#  error Unable to determine architecture version.
+# endif
+
+#endif /* __ARM_ARCH  */
\ No newline at end of file
diff --git a/libs/libc/machine/arm/armv7-a/gnu/arch_memchr.S b/libs/libc/machine/arm/armv7-a/gnu/arch_memchr.S
new file mode 100644
index 00000000000..3c2b7e53a2d
--- /dev/null
+++ b/libs/libc/machine/arm/armv7-a/gnu/arch_memchr.S
@@ -0,0 +1,386 @@
+/* Copyright (c) 2010-2011, Linaro Limited
+   All rights reserved.
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+      * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+
+      * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+      * Neither the name of Linaro Limited nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   Written by Dave Gilbert <david.gilbert@linaro.org>
+
+   This memchr routine is optimised on a Cortex-A9 and should work on
+   all ARMv7 processors.   It has a fast path for short sizes, and has
+   an optimised path for large data sets; the worst case is finding the
+   match early in a large data set. */
+
+/* Copyright (c) 2015 ARM Ltd.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are met:
+       * Redistributions of source code must retain the above copyright
+	 notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above copyright
+	 notice, this list of conditions and the following disclaimer in the
+	 documentation and/or other materials provided with the distribution.
+       * Neither the name of the Linaro nor the
+	 names of its contributors may be used to endorse or promote products
+	 derived from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
+   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.  */
+
+@ 2011-02-07 david.gilbert@linaro.org
+@    Extracted from local git a5b438d861
+@ 2011-07-14 david.gilbert@linaro.org
+@    Import endianness fix from local git ea786f1b
+@ 2011-10-11 david.gilbert@linaro.org
+@    Import from cortex-strings bzr rev 63
+@    Flip to ldrd (as suggested by Greta Yorsh)
+@    Make conditional on CPU type
+@    tidy
+
+@ This code requires armv6t2 or later.  Uses Thumb2.
+
+	.syntax unified
+
+#include "acle-compat.h"
+
+@ NOTE: This ifdef MUST match the one in memchr-stub.c
+#if defined (__ARM_NEON__) || defined (__ARM_NEON)
+#if __ARM_ARCH >= 8 && __ARM_ARCH_PROFILE == 'R'
+	.arch	armv8-r
+#else
+	.arch	armv7-a
+#endif
+	.fpu	neon
+
+
+/* Arguments */
+#define srcin		r0
+#define chrin		r1
+#define cntin		r2
+
+/* Retval */
+#define result		r0	/* Live range does not overlap with srcin */
+
+/* Working registers */
+#define src		r1	/* Live range does not overlap with chrin */
+#define tmp		r3
+#define synd		r0	/* No overlap with srcin or result */
+#define soff		r12
+
+/* Working NEON registers */
+#define vrepchr		q0
+#define vdata0		q1
+#define vdata0_0	d2	/* Lower half of vdata0 */
+#define vdata0_1	d3	/* Upper half of vdata0 */
+#define vdata1		q2
+#define vdata1_0	d4	/* Lower half of vhas_chr0 */
+#define vdata1_1	d5	/* Upper half of vhas_chr0 */
+#define vrepmask	q3
+#define vrepmask0	d6
+#define vrepmask1	d7
+#define vend		q4
+#define vend0		d8
+#define vend1		d9
+
+/*
+ * Core algorithm:
+ *
+ * For each 32-byte chunk we calculate a 32-bit syndrome value, with one bit per
+ * byte. Each bit is set if the relevant byte matched the requested character
+ * and cleared otherwise. Since the bits in the syndrome reflect exactly the
+ * order in which things occur in the original string, counting trailing zeros
+ * allows to identify exactly which byte has matched.
+ */
+
+	.text
+	.thumb_func
+	.align 4
+	.p2align 4,,15
+	.global memchr
+	.type memchr,%function
+
+memchr:
+	.cfi_sections .debug_frame
+	.cfi_startproc
+	/* Use a simple loop if there are less than 8 bytes to search.  */
+	cmp	cntin, #7
+	bhi	.Llargestr
+	and	chrin, chrin, #0xff
+
+.Lsmallstr:
+	subs	cntin, cntin, #1
+	blo	.Lnotfound	/* Return not found if reached end.  */
+	ldrb	tmp, [srcin], #1
+	cmp	tmp, chrin
+	bne	.Lsmallstr	/* Loop again if not found.  */
+	/* Otherwise fixup address and return.  */
+	sub	result, result, #1
+	bx	lr
+
+
+.Llargestr:
+	vdup.8	vrepchr, chrin	/* Duplicate char across all lanes. */
+	/*
+	 * Magic constant 0x8040201008040201 allows us to identify which lane
+	 * matches the requested byte.
+	 */
+	movw	tmp, #0x0201
+	movt	tmp, #0x0804
+	lsl	soff, tmp, #4
+	vmov	vrepmask0, tmp, soff
+	vmov	vrepmask1, tmp, soff
+	/* Work with aligned 32-byte chunks */
+	bic	src, srcin, #31
+	ands	soff, srcin, #31
+	beq	.Lloopintro	/* Go straight to main loop if it's aligned. */
+
+	/*
+	 * Input string is not 32-byte aligned. We calculate the syndrome
+	 * value for the aligned 32 bytes block containing the first bytes
+	 * and mask the irrelevant part.
+	 */
+	vld1.8		{vdata0, vdata1}, [src:256]!
+	sub		tmp, soff, #32
+	adds		cntin, cntin, tmp
+	vceq.i8		vdata0, vdata0, vrepchr
+	vceq.i8		vdata1, vdata1, vrepchr
+	vand		vdata0, vdata0, vrepmask
+	vand		vdata1, vdata1, vrepmask
+	vpadd.i8	vdata0_0, vdata0_0, vdata0_1
+	vpadd.i8	vdata1_0, vdata1_0, vdata1_1
+	vpadd.i8	vdata0_0, vdata0_0, vdata1_0
+	vpadd.i8	vdata0_0, vdata0_0, vdata0_0
+	vmov		synd, vdata0_0[0]
+
+	/* Clear the soff lower bits */
+	lsr		synd, synd, soff
+	lsl		synd, synd, soff
+	/* The first block can also be the last */
+	bls		.Lmasklast
+	/* Have we found something already? */
+	cbnz		synd, .Ltail
+
+
+.Lloopintro:
+	vpush	{vend}
+	/* 264/265 correspond to d8/d9 for q4 */
+	.cfi_adjust_cfa_offset	16
+	.cfi_rel_offset	264, 0
+	.cfi_rel_offset	265, 8
+	.p2align 3,,7
+.Lloop:
+	vld1.8		{vdata0, vdata1}, [src:256]!
+	subs		cntin, cntin, #32
+	vceq.i8		vdata0, vdata0, vrepchr
+	vceq.i8		vdata1, vdata1, vrepchr
+	/* If we're out of data we finish regardless of the result. */
+	bls		.Lend
+	/* Use a fast check for the termination condition. */
+	vorr		vend, vdata0, vdata1
+	vorr		vend0, vend0, vend1
+	vmov		synd, tmp, vend0
+	orrs		synd, synd, tmp
+	/* We're not out of data, loop if we haven't found the character. */
+	beq		.Lloop
+
+.Lend:
+	vpop		{vend}
+	.cfi_adjust_cfa_offset	-16
+	.cfi_restore	264
+	.cfi_restore	265
+
+	/* Termination condition found, let's calculate the syndrome value. */
+	vand		vdata0, vdata0, vrepmask
+	vand		vdata1, vdata1, vrepmask
+	vpadd.i8	vdata0_0, vdata0_0, vdata0_1
+	vpadd.i8	vdata1_0, vdata1_0, vdata1_1
+	vpadd.i8	vdata0_0, vdata0_0, vdata1_0
+	vpadd.i8	vdata0_0, vdata0_0, vdata0_0
+	vmov		synd, vdata0_0[0]
+	cbz		synd, .Lnotfound
+	bhi		.Ltail
+
+
+.Lmasklast:
+	/* Clear the (-cntin) upper bits to avoid out-of-bounds matches. */
+	neg	cntin, cntin
+	lsl	synd, synd, cntin
+	lsrs	synd, synd, cntin
+	it	eq
+	moveq	src, #0	/* If no match, set src to 0 so the retval is 0. */
+
+
+.Ltail:
+	/* Count the trailing zeros using bit reversing */
+	rbit	synd, synd
+	/* Compensate the last post-increment */
+	sub	src, src, #32
+	/* Count the leading zeros */
+	clz	synd, synd
+	/* Compute the potential result and return */
+	add	result, src, synd
+	bx	lr
+
+
+.Lnotfound:
+	/* Set result to NULL if not found and return */
+	mov	result, #0
+	bx	lr
+
+	.cfi_endproc
+	.size	memchr, . - memchr
+
+#elif __ARM_ARCH_ISA_THUMB >= 2 && defined (__ARM_FEATURE_DSP)
+
+#if __ARM_ARCH_PROFILE == 'M'
+       .arch armv7e-m
+#else
+       .arch armv6t2
+#endif
+
+@ this lets us check a flag in a 00/ff byte easily in either endianness
+#ifdef __ARMEB__
+#define CHARTSTMASK(c) 1<<(31-(c*8))
+#else
+#define CHARTSTMASK(c) 1<<(c*8)
+#endif
+	.text
+	.thumb
+
+@ ---------------------------------------------------------------------------
+	.thumb_func
+	.align 2
+	.p2align 4,,15
+	.global memchr
+	.type memchr,%function
+memchr:
+	@ r0 = start of memory to scan
+	@ r1 = character to look for
+	@ r2 = length
+	@ returns r0 = pointer to character or NULL if not found
+	and	r1,r1,#0xff	@ Don't trust the caller to pass a char
+
+	cmp	r2,#16		@ If short don't bother with anything clever
+	blt	20f 
+
+	tst	r0, #7		@ If it's already aligned skip the next bit
+	beq	10f
+
+	@ Work up to an aligned point
+5:
+	ldrb	r3, [r0],#1
+	subs	r2, r2, #1
+	cmp	r3, r1
+	beq	50f		@ If it matches exit found
+	tst	r0, #7
+	cbz	r2, 40f		@ If we run off the end, exit not found
+	bne	5b		@ If not aligned yet then do next byte
+	
+10:
+	@ We are aligned, we know we have at least 8 bytes to work with
+	push	{r4,r5,r6,r7}
+	orr	r1, r1, r1, lsl #8	@ expand the match word across all bytes
+	orr	r1, r1, r1, lsl #16
+	bic	r4, r2, #7	@ Number of double words to work with * 8
+	mvns	r7, #0		@ all F's
+	movs	r3, #0
+	
+15:
+	ldrd    r5,r6,[r0],#8
+	subs	r4, r4, #8
+	eor	r5,r5, r1	@ r5,r6 have 00's where bytes match the target
+	eor	r6,r6, r1
+	uadd8	r5, r5, r7	@ Par add 0xff - sets GE bits for bytes!=0
+	sel	r5, r3, r7	@ bytes are 00 for none-00 bytes,
+				@ or ff for 00 bytes - NOTE INVERSION
+	uadd8	r6, r6, r7	@ Par add 0xff - sets GE bits for bytes!=0
+	sel	r6, r5, r7	@ chained....bytes are 00 for none-00 bytes
+				@ or ff for 00 bytes - NOTE INVERSION
+	cbnz	r6, 60f
+	bne	15b		@ (Flags from the subs above)
+
+	pop	{r4,r5,r6,r7}
+	and	r1,r1,#0xff	@ r1 back to a single character
+	and	r2,r2,#7	@ Leave the count remaining as the number
+				@ after the double words have been done
+ 
+20:
+	cbz	r2, 40f		@ 0 length or hit the end already then not found
+
+21:  @ Post aligned section, or just a short call
+	ldrb	r3,[r0],#1
+	subs	r2,r2,#1
+	eor	r3,r3,r1	@ r3 = 0 if match - doesn't break flags from sub
+	cbz	r3, 50f
+	bne	21b		@ on r2 flags
+
+40:
+	movs	r0,#0		@ not found
+	bx	lr
+
+50:
+	subs	r0,r0,#1	@ found
+	bx	lr
+
+60:  @ We're here because the fast path found a hit 
+     @ now we have to track down exactly which word it was
+	@ r0 points to the start of the double word after the one tested
+	@ r5 has the 00/ff pattern for the first word, r6 has the chained value
+	cmp	r5, #0
+	itte	eq
+	moveq	r5, r6		@ the end is in the 2nd word
+	subeq	r0,r0,#3	@ Points to 2nd byte of 2nd word
+	subne	r0,r0,#7	@ or 2nd byte of 1st word
+
+	@ r0 currently points to the 2nd byte of the word containing the hit
+	tst	r5, # CHARTSTMASK(0)	@ 1st character
+	bne	61f
+	adds	r0,r0,#1
+	tst	r5, # CHARTSTMASK(1)	@ 2nd character
+	ittt	eq
+	addeq	r0,r0,#1
+	tsteq	r5, # (3<<15)		@ 2nd & 3rd character
+	@ If not the 3rd must be the last one
+	addeq	r0,r0,#1
+
+61:
+	pop	{r4,r5,r6,r7}
+	subs	r0,r0,#1
+	bx	lr
+#else
+  /* Defined in memchr-stub.c.  */
+#endif
\ No newline at end of file
diff --git a/libs/libc/machine/arm/armv7-a/gnu/arch_memcpy.S b/libs/libc/machine/arm/armv7-a/gnu/arch_memcpy.S
index 271b4140ad4..7e7c752c3b2 100644
--- a/libs/libc/machine/arm/armv7-a/gnu/arch_memcpy.S
+++ b/libs/libc/machine/arm/armv7-a/gnu/arch_memcpy.S
@@ -1,461 +1,329 @@
-/************************************************************************************
- * libs/libc/machine/arm/armv7-a/arch_memcpy.S
- * ARMv7-A optimized memcpy.
- *
- * Adapted for use with ARMv7-A and NuttX by:
- *
- *   Copyright (C) 2017 Gregory Nutt. All rights reserved.
- *   Author: Gregory Nutt <gnutt@nuttx.org>
- *
- * Based on the ARMv7-M version contributed by Mike Smith.  Apparently in the public
- * domain and is re-released here under the modified BSD license:
- *
- * Obtained via a posting on the Stellaris forum:
- *  http://e2e.ti.com/support/microcontrollers/\
- *       stellaris_arm_cortex-m3_microcontroller/f/473/t/44360.aspx
- *
- * Posted by rocksoft on Jul 24, 2008 10:19 AM
- *
- *   Hi,
- *
- *   I recently finished a "memcpy" replacement and thought it might be useful for
- *   others...
- *
- *   I've put some instructions and the code here:
- *
- *   http://www.rock-software.net/downloads/memcpy/
- *
- *   Hope it works for you as well as it did for me.
- *
- *   Liam.
+/*
+ * Copyright (c) 2013 ARM Ltd
+ * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
- *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- * 3. Neither the name NuttX nor the names of its contributors may be
- *    used to endorse or promote products derived from this software
- *    without specific prior written permission.
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the company may not be used to endorse or promote
+ *    products derived from this software without specific prior written
+ *    permission.
  *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
- * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- ************************************************************************************/
-
-/************************************************************************************
- * Public Symbols
- ************************************************************************************/
-
-	.global	memcpy
-	.syntax	unified
-	.file	"arch_memcpy.S"
-
-/************************************************************************************
- * .text
- ************************************************************************************/
-
-	.text
-
-/************************************************************************************
- * Private Constant Data
- ************************************************************************************/
-
-/* We have 16 possible alignment combinations of src and dst, this jump table
- * directs the copy operation
- *
- * Bits:  Src=00, Dst=00 - Long to Long copy
- * Bits:  Src=00, Dst=01 - Long to Byte before half word
- * Bits:  Src=00, Dst=10 - Long to Half word
- * Bits:  Src=00, Dst=11 - Long to Byte before long word
- * Bits:  Src=01, Dst=00 - Byte before half word to long
- * Bits:  Src=01, Dst=01 - Byte before half word to byte before half word -
- *                         Same alignment
- * Bits:  Src=01, Dst=10 - Byte before half word to half word
- * Bits:  Src=01, Dst=11 - Byte before half word to byte before long word
- * Bits:  Src=10, Dst=00 - Half word to long word
- * Bits:  Src=10, Dst=01 - Half word to byte before half word
- * Bits:  Src=10, Dst=10 - Half word to half word - Same Alignment
- * Bits:  Src=10, Dst=11 - Half word to byte before long word
- * Bits:  Src=11, Dst=00 - Byte before long word to long word
- * Bits:  Src=11, Dst=01 - Byte before long word to byte before half word
- * Bits:  Src=11, Dst=11 - Byte before long word to half word
- * Bits:  Src=11, Dst=11 - Byte before long word to Byte before long word -
- *                         Same alignment
+ * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-MEM_DataCopyTable:
-	.byte	(MEM_DataCopy0  - MEM_DataCopyJump) >> 2
-	.byte	(MEM_DataCopy1  - MEM_DataCopyJump) >> 2
-	.byte	(MEM_DataCopy2  - MEM_DataCopyJump) >> 2
-	.byte	(MEM_DataCopy3  - MEM_DataCopyJump) >> 2
-	.byte	(MEM_DataCopy4  - MEM_DataCopyJump) >> 2
-	.byte	(MEM_DataCopy5  - MEM_DataCopyJump) >> 2
-	.byte	(MEM_DataCopy6  - MEM_DataCopyJump) >> 2
-	.byte	(MEM_DataCopy7  - MEM_DataCopyJump) >> 2
-	.byte	(MEM_DataCopy8  - MEM_DataCopyJump) >> 2
-	.byte	(MEM_DataCopy9  - MEM_DataCopyJump) >> 2
-	.byte	(MEM_DataCopy10 - MEM_DataCopyJump) >> 2
-	.byte	(MEM_DataCopy11 - MEM_DataCopyJump) >> 2
-	.byte	(MEM_DataCopy12 - MEM_DataCopyJump) >> 2
-	.byte	(MEM_DataCopy13 - MEM_DataCopyJump) >> 2
-	.byte	(MEM_DataCopy14 - MEM_DataCopyJump) >> 2
-	.byte	(MEM_DataCopy15 - MEM_DataCopyJump) >> 2
+/* This memcpy routine is optimised for Cortex-M3/M4 cores with/without
+   unaligned access.
+
+   If compiled with GCC, this file should be enclosed within following
+   pre-processing check:
+   if defined (__ARM_ARCH_7M__) || defined (__ARM_ARCH_7EM__)
+
+   Prototype: void *memcpy (void *dst, const void *src, size_t count);
+
+   The job will be done in 5 steps.
+   Step 1: Align src/dest pointers, copy mis-aligned if fail to align both
+   Step 2: Repeatedly copy big block size of __OPT_BIG_BLOCK_SIZE
+   Step 3: Repeatedly copy big block size of __OPT_MID_BLOCK_SIZE
+   Step 4: Copy word by word
+   Step 5: Copy byte-to-byte
+
+   Tunable options:
+     __OPT_BIG_BLOCK_SIZE: Size of big block in words.  Default to 64.
+     __OPT_MID_BLOCK_SIZE: Size of big block in words.  Default to 16.
+ */
+#ifndef __OPT_BIG_BLOCK_SIZE
+#define __OPT_BIG_BLOCK_SIZE (4 * 16)
+#endif
+
+#ifndef __OPT_MID_BLOCK_SIZE
+#define __OPT_MID_BLOCK_SIZE (4 * 4)
+#endif
+
+#if __OPT_BIG_BLOCK_SIZE == 16
+#define BEGIN_UNROLL_BIG_BLOCK \
+  .irp offset, 0,4,8,12
+#elif __OPT_BIG_BLOCK_SIZE == 32
+#define BEGIN_UNROLL_BIG_BLOCK \
+  .irp offset, 0,4,8,12,16,20,24,28
+#elif __OPT_BIG_BLOCK_SIZE == 64
+#define BEGIN_UNROLL_BIG_BLOCK \
+  .irp offset, 0,4,8,12,16,20,24,28,32,36,40,44,48,52,56,60
+#else
+#error "Illegal __OPT_BIG_BLOCK_SIZE"
+#endif
+
+#if __OPT_MID_BLOCK_SIZE == 8
+#define BEGIN_UNROLL_MID_BLOCK \
+  .irp offset, 0,4
+#elif __OPT_MID_BLOCK_SIZE == 16
+#define BEGIN_UNROLL_MID_BLOCK \
+  .irp offset, 0,4,8,12
+#else
+#error "Illegal __OPT_MID_BLOCK_SIZE"
+#endif
+
+#define END_UNROLL .endr
+
+	.syntax unified
+	.text
+	.align	2
+	.global	memcpy
+	.thumb
+	.thumb_func
+	.type	memcpy, %function
+memcpy:
+	@ r0: dst
+	@ r1: src
+	@ r2: len
+#ifdef __ARM_FEATURE_UNALIGNED
+	/* In case of UNALIGNED access supported, ip is not used in
+	   function body.  */
+	mov	ip, r0
+#else
+	push	{r0}
+#endif
+	orr	r3, r1, r0
+	ands	r3, r3, #3
+	bne	.Lmisaligned_copy
+
+.Lbig_block:
+	subs	r2, __OPT_BIG_BLOCK_SIZE
+	blo	.Lmid_block
+
+	/* Kernel loop for big block copy */
+	.align 2
+.Lbig_block_loop:
+	BEGIN_UNROLL_BIG_BLOCK
+#ifdef __ARM_ARCH_7EM__
+	ldr	r3, [r1], #4
+	str	r3, [r0], #4
+	END_UNROLL
+#else /* __ARM_ARCH_7M__ */
+	ldr	r3, [r1, \offset]
+	str	r3, [r0, \offset]
+	END_UNROLL
+	adds	r0, __OPT_BIG_BLOCK_SIZE
+	adds	r1, __OPT_BIG_BLOCK_SIZE
+#endif
+	subs	r2, __OPT_BIG_BLOCK_SIZE
+	bhs .Lbig_block_loop
+
+.Lmid_block:
+	adds	r2, __OPT_BIG_BLOCK_SIZE - __OPT_MID_BLOCK_SIZE
+	blo	.Lcopy_word_by_word
+
+	/* Kernel loop for mid-block copy */
+	.align 2
+.Lmid_block_loop:
+	BEGIN_UNROLL_MID_BLOCK
+#ifdef __ARM_ARCH_7EM__
+	ldr	r3, [r1], #4
+	str	r3, [r0], #4
+	END_UNROLL
+#else /* __ARM_ARCH_7M__ */
+	ldr	r3, [r1, \offset]
+	str	r3, [r0, \offset]
+	END_UNROLL
+	adds    r0, __OPT_MID_BLOCK_SIZE
+	adds    r1, __OPT_MID_BLOCK_SIZE
+#endif
+	subs	r2, __OPT_MID_BLOCK_SIZE
+	bhs	.Lmid_block_loop
+
+.Lcopy_word_by_word:
+	adds	r2, __OPT_MID_BLOCK_SIZE - 4
+	blo	.Lcopy_less_than_4
+
+	/* Kernel loop for small block copy */
+	.align 2
+.Lcopy_word_by_word_loop:
+	ldr	r3, [r1], #4
+	str	r3, [r0], #4
+	subs	r2, #4
+	bhs	.Lcopy_word_by_word_loop
+
+.Lcopy_less_than_4:
+	adds	r2, #4
+	beq	.Ldone
+
+	lsls	r2, r2, #31
+	itt ne
+	ldrbne  r3, [r1], #1
+	strbne  r3, [r0], #1
+
+	bcc	.Ldone
+#ifdef __ARM_FEATURE_UNALIGNED
+	ldrh	r3, [r1]
+	strh	r3, [r0]
+#else
+	ldrb	r3, [r1]
+	strb	r3, [r0]
+	ldrb	r3, [r1, #1]
+	strb	r3, [r0, #1]
+#endif /* __ARM_FEATURE_UNALIGNED */
+
+.Ldone:
+#ifdef __ARM_FEATURE_UNALIGNED
+	mov	r0, ip
+#else
+	pop	{r0}
+#endif
+	bx	lr
 
 	.align 2
-
-MEM_LongCopyTable:
-	.byte	(MEM_LongCopyEnd   - MEM_LongCopyJump) >> 2	/* 0 bytes left */
-	.byte	(MEM_LongCopyJump0 - MEM_LongCopyJump) >> 2	/* 4 bytes left */
-	.byte	(MEM_LongCopyJump1 - MEM_LongCopyJump) >> 2	/* 8 bytes left */
-	.byte	(MEM_LongCopyJump2 - MEM_LongCopyJump) >> 2	/* 12 bytes left */
-	.byte	(MEM_LongCopyJump3 - MEM_LongCopyJump) >> 2	/* 16 bytes left */
-	.byte	(MEM_LongCopyJump4 - MEM_LongCopyJump) >> 2	/* 20 bytes left */
-	.byte	(MEM_LongCopyJump5 - MEM_LongCopyJump) >> 2	/* 24 bytes left */
-	.byte	(MEM_LongCopyJump6 - MEM_LongCopyJump) >> 2	/* 28 bytes left */
-	.byte	(MEM_LongCopyJump7 - MEM_LongCopyJump) >> 2	/* 32 bytes left */
-	.byte	(MEM_LongCopyJump8 - MEM_LongCopyJump) >> 2	/* 36 bytes left */
-
-/************************************************************************************
- * Public Functions
- ************************************************************************************/
-/************************************************************************************
- * Name: memcpy
- *
- * Description:
- *   Optimized "general" copy routine
- *
- * Input Parameters:
- *   r0 = destination, r1 = source, r2 = length
- *
- * Returned Value:
- *   r0 = destination r1-r3 burned
- *
- ************************************************************************************/
-
-	.align 4
-
-memcpy:
-	push	{r14}
-	push    {r0}
-	bl      _do_memcpy
-	pop     {r0}
-	pop     {pc}
-
-	.align 4
-
-_do_memcpy:
-	push    {r14}
-	push    {r4}
-
-	/* This allows the inner workings to "assume" a minimum amount of bytes */
-	/* Quickly check for very short copies */
-
-	cmp		r2, #4
-	blt		MEM_DataCopyBytes
-
-	and		r14, r0, #3		 		/* Get destination alignment bits */
-	bfi		r14, r1, #2, #2	 		/* Get source alignment bits */
-
-	ldr		r3, =MEM_DataCopyTable	/* Jump table base address */
-	ldrb	r4, [r3, r14]			/* DWord offset for this alignment combination */
-	ldr		r3, =MEM_DataCopyJump	/* Base of branch table anchor */
-	add		r3, r3, r4, lsl #2		/* Absolute address of logic */
-	bx		r3
-
-	/* data copy branch table anchor */
-
-	.align 4
-MEM_DataCopyJump:
-
-/* Bits:  Src=01, Dst=01 - Byte before half word to byte before half word - Same alignment
- * 3 bytes to read for long word aligning
- */
-
-MEM_DataCopy5:
-	ldrb	r3, [r1], #0x01
-	strb	r3, [r0], #0x01
-	sub		r2, r2, #0x01
-
-/* Bits:  Src=10, Dst=10 - Half word to half word - Same Alignment
- * 2 bytes to read for long word aligning
- */
-
-MEM_DataCopy10:
-	ldrb	r3, [r1], #0x01
-	strb	r3, [r0], #0x01
-	sub		r2, r2, #0x01
-
-/* Bits:  Src=11, Dst=11 - Byte before long word to Byte before long word - Same alignment
- * 1 bytes to read for long word aligning
- */
-
-MEM_DataCopy15:
-	ldrb	r3, [r1], #0x01
-	strb	r3, [r0], #0x01
-	sub		r2, r2, #0x01
-
-/* Bits:  Src=00, Dst=00 - Long to Long copy */
-
-MEM_DataCopy0:
-	/* Save regs that may be used by memcpy */
-
-	push	{r5-r12}
-
-	/* Check for short word-aligned copy */
-
-	cmp		r2, #0x28
-	blt		MEM_DataCopy0_2
-
-	/* Bulk copy loop */
-
-MEM_DataCopy0_1:
-	ldmia	r1!, {r3-r12}
-	stmia	r0!, {r3-r12}
-	sub		r2, r2, #0x28
-	cmp		r2, #0x28
-	bge		MEM_DataCopy0_1
-
-	/* Copy remaining long words */
-
-MEM_DataCopy0_2:
-	ldr		r14, =MEM_LongCopyTable	/* Jump table base address */
-	lsr		r11, r2, 2				/* Convert byte count to word count */
-	add		r14, r14, r11			/* Jump table offset address */
-	ldrb	r3, [r14]				/* DWord offset from branch table anchor */
-	ldr		r11, =MEM_LongCopyJump	/* Address of branch table anchor */
-	add		r11, r11, r3, lsl #2	/* Absolute address into branch table */
-	bx		r11						/* Go there */
-
-	/* longword copy branch table anchor */
-
-MEM_LongCopyJump:
-
-MEM_LongCopyJump0:
-	ldr		r3, [r1], #0x04		/* 4 bytes remain */
-	str		r3, [r0], #0x04
-	b		MEM_LongCopyEnd
-
-MEM_LongCopyJump1:
-	ldmia	r1!, {r3-r4}		/* 8 bytes remain */
-	stmia	r0!, {r3-r4}
-	b		MEM_LongCopyEnd
-
-MEM_LongCopyJump2:
-	ldmia	r1!, {r3-r5}		/* 12 bytes remain */
-	stmia	r0!, {r3-r5}
-	b		MEM_LongCopyEnd
-
-MEM_LongCopyJump3:
-	ldmia	r1!, {r3-r6}		/* 16 bytes remain */
-	stmia	r0!, {r3-r6}
-	b		MEM_LongCopyEnd
-
-MEM_LongCopyJump4:
-	ldmia	r1!, {r3-r7}		/* 20 bytes remain */
-	stmia	r0!, {r3-r7}
-	b		MEM_LongCopyEnd
-
-MEM_LongCopyJump5:
-	ldmia	r1!, {r3-r8}		/* 24 bytes remain */
-	stmia	r0!, {r3-r8}
-	b		MEM_LongCopyEnd
-
-MEM_LongCopyJump6:
-	ldmia	r1!, {r3-r9}		/* 28 bytes remain */
-	stmia	r0!, {r3-r9}
-	b		MEM_LongCopyEnd
-
-MEM_LongCopyJump7:
-	ldmia	r1!, {r3-r10}		/* 32 bytes remain */
-	stmia	r0!, {r3-r10}
-	b		MEM_LongCopyEnd
-
-MEM_LongCopyJump8:
-	ldmia	r1!, {r3-r11}		/* 36 bytes remain */
-	stmia	r0!, {r3-r11}
-
-MEM_LongCopyEnd:
-	pop		{r5-r12}
-	and		r2, r2, #0x03		/* All the longs have been copied */
-
-	/* Deal with up to 3 remaining bytes */
-
-MEM_DataCopyBytes:
-	/* Deal with up to 3 remaining bytes */
-
-	pop		{r4}
-	cmp		r2, #0x00
-	it		eq
-	popeq	{pc}
-
-	ldrb	r3, [r1], #0x01
-	strb	r3, [r0], #0x01
-	subs	r2, r2, #0x01
-	it		eq
-	popeq	{pc}
-
-	ldrb	r3, [r1], #0x01
-	strb	r3, [r0], #0x01
-	subs	r2, r2, #0x01
-	it		eq
-	popeq	{pc}
-
-	ldrb	r3, [r1], #0x01
-	strb	r3, [r0], #0x01
-	pop		{pc}
-
- .align 4
-
-/* Bits:  Src=01, Dst=11 - Byte before half word to byte before long word
- * 3 bytes to read for long word aligning the source
- */
-
-MEM_DataCopy7:
-	ldrb	r3, [r1], #0x01
-	strb	r3, [r0], #0x01
-	sub		r2, r2, #0x01
-
-/* Bits:  Src=10, Dst=00 - Half word to long word
- * 2 bytes to read for long word aligning the source
- */
-
-MEM_DataCopy8:
-	ldrb	r3, [r1], #0x01
-	strb	r3, [r0], #0x01
-	sub		r2, r2, #0x01
-
-/* Bits:  Src=11, Dst=01 - Byte before long word to byte before half word
- * 1 byte to read for long word aligning the source
- */
-
-MEM_DataCopy13:
-	ldrb	r3, [r1], #0x01
-	strb	r3, [r0], #0x01
-	sub		r2, r2, #0x01
-
-/* Bits:  Src=00, Dst=10 - Long to Half word */
-
-MEM_DataCopy2:
-	cmp		r2, #0x28
-	blt		MEM_DataCopy2_1
-
-	/* Save regs */
-
-	push	{r5-r12}
-
-	/* Bulk copy loop */
-
-MEM_DataCopy2_2:
-	ldmia	r1!, {r3-r12}
-
-	strh	r3, [r0], #0x02
-
-	lsr		r3, r3, #0x10
-	bfi		r3, r4, #0x10, #0x10
-	lsr		r4, r4, #0x10
-	bfi		r4, r5, #0x10, #0x10
-	lsr		r5, r5, #0x10
-	bfi		r5, r6, #0x10, #0x10
-	lsr		r6, r6, #0x10
-	bfi		r6, r7, #0x10, #0x10
-	lsr		r7, r7, #0x10
-	bfi		r7, r8, #0x10, #0x10
-	lsr		r8, r8, #0x10
-	bfi		r8, r9, #0x10, #0x10
-	lsr		r9, r9, #0x10
-	bfi		r9, r10, #0x10, #0x10
-	lsr		r10, r10, #0x10
-	bfi		r10, r11, #0x10, #0x10
-	lsr		r11, r11, #0x10
-	bfi		r11, r12, #0x10, #0x10
-	stmia	r0!, {r3-r11}
-	lsr		r12, r12, #0x10
-	strh	r12, [r0], #0x02
-
-	sub		r2, r2, #0x28
-	cmp		r2, #0x28
-	bge		MEM_DataCopy2_2
-	pop		{r5-r12}
-
-MEM_DataCopy2_1: /* Read longs and write 2 x half words */
-	cmp		r2, #4
-	blt		MEM_DataCopyBytes
-	ldr		r3, [r1], #0x04
-	strh	r3, [r0], #0x02
-	lsr		r3, r3, #0x10
-	strh	r3, [r0], #0x02
-	sub		r2, r2, #0x04
-	b		MEM_DataCopy2
-
-/* Bits:  Src=01, Dst=00 - Byte before half word to long
- * Bits:  Src=01, Dst=10 - Byte before half word to half word
- * 3 bytes to read for long word aligning the source
- */
-
-MEM_DataCopy4:
-MEM_DataCopy6:
-	/* Read B and write B */
-
-	ldrb	r3, [r1], #0x01
-	strb	r3, [r0], #0x01
-	sub		r2, r2, #0x01
-
-/* Bits:  Src=10, Dst=01 - Half word to byte before half word
- * Bits:  Src=10, Dst=11 - Half word to byte before long word
- * 2 bytes to read for long word aligning the source
- */
-
-MEM_DataCopy9:
-MEM_DataCopy11:
-	ldrb	r3, [r1], #0x01
-	strb	r3, [r0], #0x01
-	sub		r2, r2, #0x01
-
-/* Bits:  Src=11, Dst=00 -chm Byte before long word to long word
- * Bits:  Src=11, Dst=11 - Byte before long word to half word
- * 1 byte to read for long word aligning the source
- */
-
-MEM_DataCopy12:
-MEM_DataCopy14:
-	/* Read B and write B */
-
-	ldrb	r3, [r1], #0x01
-	strb	r3, [r0], #0x01
-	sub		r2, r2, #0x01
-
-/* Bits:  Src=00, Dst=01 - Long to Byte before half word
- * Bits:  Src=00, Dst=11 - Long to Byte before long word
- */
-
-MEM_DataCopy1: /* Read longs, write B->H->B */
-MEM_DataCopy3:
-	cmp		r2, #4
-	blt		MEM_DataCopyBytes
-	ldr		r3, [r1], #0x04
-	strb	r3, [r0], #0x01
-	lsr		r3, r3, #0x08
-	strh	r3, [r0], #0x02
-	lsr		r3, r3, #0x10
-	strb	r3, [r0], #0x01
-	sub		r2, r2, #0x04
-	b		MEM_DataCopy3
-
-	.size	memcpy, .-memcpy
-	.end
+.Lmisaligned_copy:
+#ifdef __ARM_FEATURE_UNALIGNED
+	/* Define label DST_ALIGNED to BIG_BLOCK.  It will go to aligned copy
+	   once destination is adjusted to aligned.  */
+#define Ldst_aligned Lbig_block
+
+	/* Copy word by word using LDR when alignment can be done in hardware,
+	i.e., SCTLR.A is set, supporting unaligned access in LDR and STR.  */
+
+	cmp	r2, #8
+	blo	.Lbyte_copy
+
+	/* if src is aligned, just go to the big block loop.  */
+	lsls	r3, r1, #30
+	beq	.Ldst_aligned
+#else
+	/* if len < 12, misalignment adjustment has more overhead than
+	just byte-to-byte copy.  Also, len must >=8 to guarantee code
+	afterward work correctly.  */
+	cmp	r2, #12
+	blo	.Lbyte_copy
+#endif /* __ARM_FEATURE_UNALIGNED */
+
+	/* Align dst only, not trying to align src.  That is the because
+	handling of aligned src and misaligned dst need more overhead than
+	otherwise.  By doing this the worst case is when initial src is aligned,
+	additional up to 4 byte additional copy will executed, which is
+	acceptable.  */
+
+	ands	r3, r0, #3
+	beq	.Ldst_aligned
+
+	rsb	r3, #4
+	subs	r2, r3
+
+	lsls    r3, r3, #31
+	itt ne
+	ldrbne  r3, [r1], #1
+	strbne  r3, [r0], #1
+
+	bcc .Ldst_aligned
+
+#ifdef __ARM_FEATURE_UNALIGNED
+	ldrh    r3, [r1], #2
+	strh    r3, [r0], #2
+	b	.Ldst_aligned
+#else
+	ldrb    r3, [r1], #1
+	strb    r3, [r0], #1
+	ldrb    r3, [r1], #1
+	strb    r3, [r0], #1
+	/* Now that dst is aligned */
+.Ldst_aligned:
+	/* if r1 is aligned now, it means r0/r1 has the same misalignment,
+	and they are both aligned now.  Go aligned copy.  */
+	ands	r3, r1, #3
+	beq	.Lbig_block
+
+	/* dst is aligned, but src isn't.  Misaligned copy.  */
+
+	push	{r4, r5}
+	subs	r2, #4
+
+	/* Backward r1 by misaligned bytes, to make r1 aligned.
+	Since we need to restore r1 to unaligned address after the loop,
+	we need keep the offset bytes to ip and sub it from r1 afterward.  */
+	subs	r1, r3
+	rsb	ip, r3, #4
+
+	/* Pre-load on word */
+	ldr	r4, [r1], #4
+
+	cmp	r3, #2
+	beq	.Lmisaligned_copy_2_2
+	cmp	r3, #3
+	beq	.Lmisaligned_copy_3_1
+
+	.macro mis_src_copy shift
+1:
+#ifdef __ARM_BIG_ENDIAN
+	lsls	r4, r4, \shift
+#else
+	lsrs	r4, r4, \shift
+#endif
+	ldr	r3, [r1], #4
+#ifdef __ARM_BIG_ENDIAN
+	lsrs	r5, r3, 32-\shift
+#else
+	lsls	r5, r3, 32-\shift
+#endif
+	orr	r4, r4, r5
+	str	r4, [r0], #4
+	mov	r4, r3
+	subs	r2, #4
+	bhs	1b
+	.endm
+
+.Lmisaligned_copy_1_3:
+	mis_src_copy shift=8
+	b	.Lsrc_misaligned_tail
+
+.Lmisaligned_copy_3_1:
+	mis_src_copy shift=24
+	b	.Lsrc_misaligned_tail
+
+.Lmisaligned_copy_2_2:
+	/* For 2_2 misalignment, ldr is still faster than 2 x ldrh.  */
+	mis_src_copy shift=16
+
+.Lsrc_misaligned_tail:
+	adds	r2, #4
+	subs	r1, ip
+	pop	{r4, r5}
+
+#endif /* __ARM_FEATURE_UNALIGNED */
+
+.Lbyte_copy:
+	subs	r2, #4
+	blo	.Lcopy_less_than_4
+
+.Lbyte_copy_loop:
+	subs    r2, #1
+	ldrb    r3, [r1], #1
+	strb    r3, [r0], #1
+	bhs	.Lbyte_copy_loop
+
+	ldrb	r3, [r1]
+	strb	r3, [r0]
+	ldrb	r3, [r1, #1]
+	strb	r3, [r0, #1]
+	ldrb	r3, [r1, #2]
+	strb	r3, [r0, #2]
+
+#ifdef __ARM_FEATURE_UNALIGNED
+	mov	r0, ip
+#else
+	pop	{r0}
+#endif
+	bx	lr
+
+	.size	memcpy, .-memcpy
\ No newline at end of file
diff --git a/libs/libc/machine/arm/armv7-a/gnu/arch_memmove.S b/libs/libc/machine/arm/armv7-a/gnu/arch_memmove.S
new file mode 100644
index 00000000000..8a54e908a82
--- /dev/null
+++ b/libs/libc/machine/arm/armv7-a/gnu/arch_memmove.S
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2015 ARM Ltd
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the company may not be used to endorse or promote
+ *    products derived from this software without specific prior written
+ *    permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+	.thumb
+	.syntax unified
+	.global memmove
+	.type	memmove, %function
+memmove:
+	cmp	r0, r1
+	push	{r4}
+	bls	3f
+	adds	r3, r1, r2
+	cmp	r0, r3
+	bcs	3f
+	adds	r1, r0, r2
+	cbz	r2, 2f
+	subs	r2, r3, r2
+1:
+	ldrb	r4, [r3, #-1]!
+	cmp	r2, r3
+	strb	r4, [r1, #-1]!
+	bne	1b
+2:
+	pop	{r4}
+	bx	lr
+3:
+	cmp	r2, #0
+	beq	2b
+	add	r2, r2, r1
+	subs	r3, r0, #1
+4:
+	ldrb	r4, [r1], #1
+	cmp	r2, r1
+	strb	r4, [r3, #1]!
+	bne	4b
+	pop	{r4}
+	bx	lr
+	.size memmove, . - memmove
\ No newline at end of file
diff --git a/libs/libc/machine/arm/armv7-a/gnu/arch_memset.S b/libs/libc/machine/arm/armv7-a/gnu/arch_memset.S
new file mode 100644
index 00000000000..65d6d89dec0
--- /dev/null
+++ b/libs/libc/machine/arm/armv7-a/gnu/arch_memset.S
@@ -0,0 +1,139 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+	.arm
+	.syntax unified
+	.global memset
+	.type	memset, %function
+memset:
+        mov         r3, r0
+        // At this point only d0, d1 are going to be used below.
+        vdup.8      q0, r1
+        cmp         r2, #16
+        blo         .L_set_less_than_16_unknown_align
+
+.L_check_alignment:
+        // Align destination to a double word to avoid the store crossing
+        // a cache line boundary.
+        ands        ip, r3, #7
+        bne         .L_do_double_word_align
+
+.L_double_word_aligned:
+        // Duplicate since the less than 64 can use d2, d3.
+        vmov        q1, q0
+        subs        r2, #64
+        blo         .L_set_less_than_64
+
+        // Duplicate the copy value so that we can store 64 bytes at a time.
+        vmov        q2, q0
+        vmov        q3, q0
+
+1:      // Main loop stores 64 bytes at a time.
+        subs        r2, #64
+        vstmia      r3!, {d0 - d7}
+        bge         1b
+
+.L_set_less_than_64:
+        // Restore r2 to the count of bytes left to set.
+        add         r2, #64
+        lsls        ip, r2, #27
+        bcc         .L_set_less_than_32
+        // Set 32 bytes.
+        vstmia      r3!, {d0 - d3}
+
+.L_set_less_than_32:
+        bpl         .L_set_less_than_16
+        // Set 16 bytes.
+        vstmia      r3!, {d0, d1}
+
+.L_set_less_than_16:
+        // Less than 16 bytes to set.
+        lsls        ip, r2, #29
+        bcc         .L_set_less_than_8
+
+        // Set 8 bytes.
+        vstmia      r3!, {d0}
+
+.L_set_less_than_8:
+        bpl         .L_set_less_than_4
+        // Set 4 bytes
+        vst1.32     {d0[0]}, [r3]!
+
+.L_set_less_than_4:
+        lsls        ip, r2, #31
+        it          ne
+        strbne      r1, [r3], #1
+        itt         cs
+        strbcs      r1, [r3], #1
+        strbcs      r1, [r3]
+        bx          lr
+
+.L_do_double_word_align:
+        rsb         ip, ip, #8
+        sub         r2, r2, ip
+
+        // Do this comparison now, otherwise we'll need to save a
+        // register to the stack since we've used all available
+        // registers.
+        cmp         ip, #4
+        blo         1f
+
+        // Need to do a four byte copy.
+        movs        ip, ip, lsl #31
+        it          mi
+        strbmi      r1, [r3], #1
+        itt         cs
+        strbcs      r1, [r3], #1
+        strbcs      r1, [r3], #1
+        vst1.32     {d0[0]}, [r3]!
+        b           .L_double_word_aligned
+
+1:
+        // No four byte copy.
+        movs        ip, ip, lsl #31
+        it          mi
+        strbmi      r1, [r3], #1
+        itt         cs
+        strbcs      r1, [r3], #1
+        strbcs      r1, [r3], #1
+        b           .L_double_word_aligned
+
+.L_set_less_than_16_unknown_align:
+        // Set up to 15 bytes.
+        movs        ip, r2, lsl #29
+        bcc         1f
+        vst1.8      {d0}, [r3]!
+1:      bge         2f
+        vst1.32     {d0[0]}, [r3]!
+2:      movs        ip, r2, lsl #31
+        it          mi
+        strbmi      r1, [r3], #1
+        itt         cs
+        strbcs      r1, [r3], #1
+        strbcs      r1, [r3], #1
+        bx          lr
+	.size memset, . - memset
\ No newline at end of file
diff --git a/libs/libc/machine/arm/armv7-a/gnu/arch_strcmp.S b/libs/libc/machine/arm/armv7-a/gnu/arch_strcmp.S
new file mode 100644
index 00000000000..ad1b1d12c9e
--- /dev/null
+++ b/libs/libc/machine/arm/armv7-a/gnu/arch_strcmp.S
@@ -0,0 +1,300 @@
+/*
+ * Copyright (c) 2011 The Android Open Source Project
+ * Copyright (c) 2008 ARM Ltd
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the company may not be used to endorse or promote
+ *    products derived from this software without specific prior written
+ *    permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef __ARMEB__
+#define SHFT2LSB lsl
+#define SHFT2LSBEQ lsleq
+#define SHFT2MSB lsr
+#define SHFT2MSBEQ lsreq
+#define MSB 0x000000ff
+#define LSB 0xff000000
+#else
+#define SHFT2LSB lsr
+#define SHFT2LSBEQ lsreq
+#define SHFT2MSB lsl
+#define SHFT2MSBEQ lsleq
+#define MSB 0xff000000
+#define LSB 0x000000ff
+#endif
+#define magic1(REG) REG
+#define magic2(REG) REG, lsl #7
+
+	.arm
+	.syntax unified
+	.global strcmp
+	.type	strcmp, %function
+strcmp:
+	pld	[r0, #0]
+	pld	[r1, #0]
+	eor	r2, r0, r1
+	tst	r2, #3
+	/* Strings not at same byte offset from a word boundary.  */
+	bne	.Lstrcmp_unaligned
+	ands	r2, r0, #3
+	bic	r0, r0, #3
+	bic	r1, r1, #3
+	ldr	ip, [r0], #4
+	it	eq
+	ldreq	r3, [r1], #4
+	beq	1f
+	/* Although s1 and s2 have identical initial alignment, they are
+	 * not currently word aligned.  Rather than comparing bytes,
+	 * make sure that any bytes fetched from before the addressed
+	 * bytes are forced to 0xff.  Then they will always compare
+	 * equal.
+	 */
+	eor	r2, r2, #3
+	lsl	r2, r2, #3
+	mvn	r3, #MSB
+	SHFT2LSB	r2, r3, r2
+	ldr	r3, [r1], #4
+	orr	ip, ip, r2
+	orr	r3, r3, r2
+1:
+	/* Load the 'magic' constant 0x01010101. */
+	str	r4, [sp, #-4]!
+	mov	r4, #1
+	orr	r4, r4, r4, lsl #8
+	orr	r4, r4, r4, lsl #16
+	.p2align	2
+4:
+	pld	[r0, #8]
+	pld	[r1, #8]
+	sub	r2, ip, magic1(r4)
+	cmp	ip, r3
+	itttt	eq
+	/* check for any zero bytes in first word */
+	biceq	r2, r2, ip
+	tsteq	r2, magic2(r4)
+	ldreq	ip, [r0], #4
+	ldreq	r3, [r1], #4
+	beq	4b
+2:
+	/* There's a zero or a different byte in the word */
+	SHFT2MSB	r0, ip, #24
+	SHFT2LSB	ip, ip, #8
+	cmp	r0, #1
+	it	cs
+	cmpcs	r0, r3, SHFT2MSB #24
+	it	eq
+	SHFT2LSBEQ r3, r3, #8
+	beq	2b
+	/* On a big-endian machine, r0 contains the desired byte in bits
+	 * 0-7; on a little-endian machine they are in bits 24-31.  In
+	 * both cases the other bits in r0 are all zero.  For r3 the
+	 * interesting byte is at the other end of the word, but the
+	 * other bits are not necessarily zero.  We need a signed result
+	 * representing the differnece in the unsigned bytes, so for the
+	 * little-endian case we can't just shift the interesting bits up.
+	 */
+#ifdef __ARMEB__
+	sub	r0, r0, r3, lsr #24
+#else
+	and	r3, r3, #255
+	/* No RSB instruction in Thumb2 */
+#ifdef __thumb2__
+	lsr	r0, r0, #24
+	sub	r0, r0, r3
+#else
+	rsb	r0, r3, r0, lsr #24
+#endif
+#endif
+	ldr	r4, [sp], #4
+	bx	lr
+.Lstrcmp_unaligned:
+	wp1 .req r0
+	wp2 .req r1
+	b1  .req r2
+	w1  .req r4
+	w2  .req r5
+	t1  .req ip
+	@ r3 is scratch
+	/* First of all, compare bytes until wp1(sp1) is word-aligned. */
+1:
+	tst	wp1, #3
+	beq	2f
+	ldrb	r2, [wp1], #1
+	ldrb	r3, [wp2], #1
+	cmp	r2, #1
+	it	cs
+	cmpcs	r2, r3
+	beq	1b
+	sub	r0, r2, r3
+	bx	lr
+2:
+	str	r5, [sp, #-4]!
+	str	r4, [sp, #-4]!
+	mov	b1, #1
+	orr	b1, b1, b1, lsl #8
+	orr	b1, b1, b1, lsl #16
+	and	t1, wp2, #3
+	bic	wp2, wp2, #3
+	ldr	w1, [wp1], #4
+	ldr	w2, [wp2], #4
+	cmp	t1, #2
+	beq	2f
+	bhi	3f
+	/* Critical inner Loop: Block with 3 bytes initial overlap */
+	.p2align	2
+1:
+	bic	t1, w1, #MSB
+	cmp	t1, w2, SHFT2LSB #8
+	sub	r3, w1, b1
+	bic	r3, r3, w1
+	bne	4f
+	ands	r3, r3, b1, lsl #7
+	it	eq
+	ldreq	w2, [wp2], #4
+	bne	5f
+	eor	t1, t1, w1
+	cmp	t1, w2, SHFT2MSB #24
+	bne	6f
+	ldr	w1, [wp1], #4
+	b	1b
+4:
+	SHFT2LSB	w2, w2, #8
+	b	8f
+5:
+#ifdef __ARMEB__
+	/* The syndrome value may contain false ones if the string ends
+	 * with the bytes 0x01 0x00
+	 */
+	tst	w1, #0xff000000
+	itt	ne
+	tstne	w1, #0x00ff0000
+	tstne	w1, #0x0000ff00
+	beq	7f
+#else
+	bics	r3, r3, #0xff000000
+	bne	7f
+#endif
+	ldrb	w2, [wp2]
+	SHFT2LSB	t1, w1, #24
+#ifdef __ARMEB__
+	lsl	w2, w2, #24
+#endif
+	b	8f
+6:
+	SHFT2LSB	t1, w1, #24
+	and	w2, w2, #LSB
+	b	8f
+	/* Critical inner Loop: Block with 2 bytes initial overlap */
+	.p2align	2
+2:
+	SHFT2MSB	t1, w1, #16
+	sub	r3, w1, b1
+	SHFT2LSB	t1, t1, #16
+	bic	r3, r3, w1
+	cmp	t1, w2, SHFT2LSB #16
+	bne	4f
+	ands	r3, r3, b1, lsl #7
+	it	eq
+	ldreq	w2, [wp2], #4
+	bne	5f
+	eor	t1, t1, w1
+	cmp	t1, w2, SHFT2MSB #16
+	bne	6f
+	ldr	w1, [wp1], #4
+	b	2b
+5:
+#ifdef __ARMEB__
+	/* The syndrome value may contain false ones if the string ends
+	 * with the bytes 0x01 0x00
+	 */
+	tst	w1, #0xff000000
+	it	ne
+	tstne	w1, #0x00ff0000
+	beq	7f
+#else
+	lsls	r3, r3, #16
+	bne	7f
+#endif
+	ldrh	w2, [wp2]
+	SHFT2LSB	t1, w1, #16
+#ifdef __ARMEB__
+	lsl	w2, w2, #16
+#endif
+	b	8f
+6:
+	SHFT2MSB	w2, w2, #16
+	SHFT2LSB	t1, w1, #16
+4:
+	SHFT2LSB	w2, w2, #16
+	b	8f
+	/* Critical inner Loop: Block with 1 byte initial overlap */
+	.p2align	2
+3:
+	and	t1, w1, #LSB
+	cmp	t1, w2, SHFT2LSB #24
+	sub	r3, w1, b1
+	bic	r3, r3, w1
+	bne	4f
+	ands	r3, r3, b1, lsl #7
+	it	eq
+	ldreq	w2, [wp2], #4
+	bne	5f
+	eor	t1, t1, w1
+	cmp	t1, w2, SHFT2MSB #8
+	bne	6f
+	ldr	w1, [wp1], #4
+	b	3b
+4:
+	SHFT2LSB	w2, w2, #24
+	b	8f
+5:
+	/* The syndrome value may contain false ones if the string ends
+	 * with the bytes 0x01 0x00
+	 */
+	tst	w1, #LSB
+	beq	7f
+	ldr	w2, [wp2], #4
+6:
+	SHFT2LSB	t1, w1, #8
+	bic	w2, w2, #MSB
+	b	8f
+7:
+	mov	r0, #0
+	ldr	r4, [sp], #4
+	ldr	r5, [sp], #4
+	bx	lr
+8:
+	and	r2, t1, #LSB
+	and	r0, w2, #LSB
+	cmp	r0, #1
+	it	cs
+	cmpcs	r0, r2
+	itt	eq
+	SHFT2LSBEQ	t1, t1, #8
+	SHFT2LSBEQ	w2, w2, #8
+	beq	8b
+	sub	r0, r2, r0
+	ldr	r4, [sp], #4
+	ldr	r5, [sp], #4
+	bx	lr
+	.size strcmp, . - strcmp
\ No newline at end of file
diff --git a/libs/libc/machine/arm/armv7-a/gnu/arch_strlen.S b/libs/libc/machine/arm/armv7-a/gnu/arch_strlen.S
new file mode 100644
index 00000000000..a1e6c8b2bde
--- /dev/null
+++ b/libs/libc/machine/arm/armv7-a/gnu/arch_strlen.S
@@ -0,0 +1,179 @@
+/* Copyright (c) 2010-2011,2013 Linaro Limited
+   All rights reserved.
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+      * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+
+      * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+      * Neither the name of Linaro Limited nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   Assumes:
+   ARMv6T2 or ARMv7E-M, AArch32
+ */
+
+/* Copyright (c) 2015 ARM Ltd.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are met:
+       * Redistributions of source code must retain the above copyright
+	 notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above copyright
+	 notice, this list of conditions and the following disclaimer in the
+	 documentation and/or other materials provided with the distribution.
+       * Neither the name of the Linaro nor the
+	 names of its contributors may be used to endorse or promote products
+	 derived from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
+   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.  */
+
+#include "acle-compat.h"
+
+	.macro def_fn f p2align=0
+	.text
+	.p2align \p2align
+	.global \f
+	.type \f, %function
+\f:
+	.endm
+#ifdef __ARMEB__
+#define S2LO		lsl
+#define S2HI		lsr
+#else
+#define S2LO		lsr
+#define S2HI		lsl
+#endif
+
+	/* This code requires Thumb.  */
+#if __ARM_ARCH_PROFILE == 'M'
+	.arch   armv7e-m
+#else
+	.arch	armv6t2
+#endif
+	.eabi_attribute Tag_ARM_ISA_use, 0
+	.thumb
+	.syntax unified
+
+/* Parameters and result.  */
+#define srcin		r0
+#define result		r0
+
+/* Internal variables.  */
+#define src		r1
+#define data1a		r2
+#define data1b		r3
+#define const_m1	r12
+#define const_0		r4
+#define tmp1		r4		/* Overlaps const_0  */
+#define tmp2		r5
+
+def_fn	strlen p2align=6
+	pld	[srcin, #0]
+	strd	r4, r5, [sp, #-8]!
+	bic	src, srcin, #7
+	mvn	const_m1, #0
+	ands	tmp1, srcin, #7		/* (8 - bytes) to alignment.  */
+	pld	[src, #32]
+	bne.w	.Lmisaligned8
+	mov	const_0, #0
+	mov	result, #-8
+.Lloop_aligned:
+	/* Bytes 0-7.  */
+	ldrd	data1a, data1b, [src]
+	pld	[src, #64]
+	add	result, result, #8
+.Lstart_realigned:
+	uadd8	data1a, data1a, const_m1	/* Saturating GE<0:3> set.  */
+	sel	data1a, const_0, const_m1	/* Select based on GE<0:3>.  */
+	uadd8	data1b, data1b, const_m1
+	sel	data1b, data1a, const_m1	/* Only used if d1a == 0.  */
+	cbnz	data1b, .Lnull_found
+
+	/* Bytes 8-15.  */
+	ldrd	data1a, data1b, [src, #8]
+	uadd8	data1a, data1a, const_m1	/* Saturating GE<0:3> set.  */
+	add	result, result, #8
+	sel	data1a, const_0, const_m1	/* Select based on GE<0:3>.  */
+	uadd8	data1b, data1b, const_m1
+	sel	data1b, data1a, const_m1	/* Only used if d1a == 0.  */
+	cbnz	data1b, .Lnull_found
+
+	/* Bytes 16-23.  */
+	ldrd	data1a, data1b, [src, #16]
+	uadd8	data1a, data1a, const_m1	/* Saturating GE<0:3> set.  */
+	add	result, result, #8
+	sel	data1a, const_0, const_m1	/* Select based on GE<0:3>.  */
+	uadd8	data1b, data1b, const_m1
+	sel	data1b, data1a, const_m1	/* Only used if d1a == 0.  */
+	cbnz	data1b, .Lnull_found
+
+	/* Bytes 24-31.  */
+	ldrd	data1a, data1b, [src, #24]
+	add	src, src, #32
+	uadd8	data1a, data1a, const_m1	/* Saturating GE<0:3> set.  */
+	add	result, result, #8
+	sel	data1a, const_0, const_m1	/* Select based on GE<0:3>.  */
+	uadd8	data1b, data1b, const_m1
+	sel	data1b, data1a, const_m1	/* Only used if d1a == 0.  */
+	cmp	data1b, #0
+	beq	.Lloop_aligned
+
+.Lnull_found:
+	cmp	data1a, #0
+	itt	eq
+	addeq	result, result, #4
+	moveq	data1a, data1b
+#ifndef __ARMEB__
+	rev	data1a, data1a
+#endif
+	clz	data1a, data1a
+	ldrd	r4, r5, [sp], #8
+	add	result, result, data1a, lsr #3	/* Bits -> Bytes.  */
+	bx	lr
+
+.Lmisaligned8:
+	ldrd	data1a, data1b, [src]
+	and	tmp2, tmp1, #3
+	rsb	result, tmp1, #0
+	lsl	tmp2, tmp2, #3			/* Bytes -> bits.  */
+	tst	tmp1, #4
+	pld	[src, #64]
+	S2HI	tmp2, const_m1, tmp2
+	orn	data1a, data1a, tmp2
+	itt	ne
+	ornne	data1b, data1b, tmp2
+	movne	data1a, const_m1
+	mov	const_0, #0
+	b	.Lstart_realigned
+	.size	strlen, . - strlen
\ No newline at end of file