diff --git a/libs/libc/machine/arm/armv7-m/gnu/arch_memcpy.S b/libs/libc/machine/arm/armv7-m/gnu/arch_memcpy.S index 6c1b62f7fb2..7e7c752c3b2 100644 --- a/libs/libc/machine/arm/armv7-m/gnu/arch_memcpy.S +++ b/libs/libc/machine/arm/armv7-m/gnu/arch_memcpy.S @@ -1,429 +1,329 @@ -/************************************************************************************ - * libs/libc/machine/arm/armv7-m/gnu/arch_memcpy.S - * - * armv7m-optimized memcpy, contributed by Mike Smith. Apparently in the public - * domain and is re-released here under the modified BSD license: - * - * Obtained via a posting on the Stellaris forum: - * http://e2e.ti.com/support/microcontrollers/\ - * stellaris_arm_cortex-m3_microcontroller/f/473/t/44360.aspx - * - * Posted by rocksoft on Jul 24, 2008 10:19 AM - * - * Hi, - * - * I recently finished a "memcpy" replacement and thought it might be useful for - * others... - * - * I've put some instructions and the code here: - * - * http://www.rock-software.net/downloads/memcpy/ - * - * Hope it works for you as well as it did for me. - * - * Liam. +/* + * Copyright (c) 2013 ARM Ltd + * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: - * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * 3. Neither the name NuttX nor the names of its contributors may be - * used to endorse or promote products derived from this software - * without specific prior written permission. + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the company may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS - * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - ************************************************************************************/ + * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ -/************************************************************************************ - * Public Symbols - ************************************************************************************/ +/* This memcpy routine is optimised for Cortex-M3/M4 cores with/without + unaligned access. - .global memcpy - .syntax unified - .thumb - .file "arch_memcpy.S" + If compiled with GCC, this file should be enclosed within following + pre-processing check: + if defined (__ARM_ARCH_7M__) || defined (__ARM_ARCH_7EM__) -/************************************************************************************ - * .text - ************************************************************************************/ + Prototype: void *memcpy (void *dst, const void *src, size_t count); + The job will be done in 5 steps. + Step 1: Align src/dest pointers, copy mis-aligned if fail to align both + Step 2: Repeatedly copy big block size of __OPT_BIG_BLOCK_SIZE + Step 3: Repeatedly copy big block size of __OPT_MID_BLOCK_SIZE + Step 4: Copy word by word + Step 5: Copy byte-to-byte + + Tunable options: + __OPT_BIG_BLOCK_SIZE: Size of big block in words. Default to 64. + __OPT_MID_BLOCK_SIZE: Size of big block in words. Default to 16. + */ +#ifndef __OPT_BIG_BLOCK_SIZE +#define __OPT_BIG_BLOCK_SIZE (4 * 16) +#endif + +#ifndef __OPT_MID_BLOCK_SIZE +#define __OPT_MID_BLOCK_SIZE (4 * 4) +#endif + +#if __OPT_BIG_BLOCK_SIZE == 16 +#define BEGIN_UNROLL_BIG_BLOCK \ + .irp offset, 0,4,8,12 +#elif __OPT_BIG_BLOCK_SIZE == 32 +#define BEGIN_UNROLL_BIG_BLOCK \ + .irp offset, 0,4,8,12,16,20,24,28 +#elif __OPT_BIG_BLOCK_SIZE == 64 +#define BEGIN_UNROLL_BIG_BLOCK \ + .irp offset, 0,4,8,12,16,20,24,28,32,36,40,44,48,52,56,60 +#else +#error "Illegal __OPT_BIG_BLOCK_SIZE" +#endif + +#if __OPT_MID_BLOCK_SIZE == 8 +#define BEGIN_UNROLL_MID_BLOCK \ + .irp offset, 0,4 +#elif __OPT_MID_BLOCK_SIZE == 16 +#define BEGIN_UNROLL_MID_BLOCK \ + .irp offset, 0,4,8,12 +#else +#error "Illegal __OPT_MID_BLOCK_SIZE" +#endif + +#define END_UNROLL .endr + + .syntax unified .text - -/************************************************************************************ - * Private Constant Data - ************************************************************************************/ - -/* We have 16 possible alignment combinations of src and dst, this jump table - * directs the copy operation - * - * Bits: Src=00, Dst=00 - Long to Long copy - * Bits: Src=00, Dst=01 - Long to Byte before half word - * Bits: Src=00, Dst=10 - Long to Half word - * Bits: Src=00, Dst=11 - Long to Byte before long word - * Bits: Src=01, Dst=00 - Byte before half word to long - * Bits: Src=01, Dst=01 - Byte before half word to byte before half word - - * Same alignment - * Bits: Src=01, Dst=10 - Byte before half word to half word - * Bits: Src=01, Dst=11 - Byte before half word to byte before long word - * Bits: Src=10, Dst=00 - Half word to long word - * Bits: Src=10, Dst=01 - Half word to byte before half word - * Bits: Src=10, Dst=10 - Half word to half word - Same Alignment - * Bits: Src=10, Dst=11 - Half word to byte before long word - * Bits: Src=11, Dst=00 - Byte before long word to long word - * Bits: Src=11, Dst=01 - Byte before long word to byte before half word - * Bits: Src=11, Dst=11 - Byte before long word to half word - * Bits: Src=11, Dst=11 - Byte before long word to Byte before long word - - * Same alignment - */ - -MEM_DataCopyTable: - .byte (MEM_DataCopy0 - MEM_DataCopyJump) >> 1 - .byte (MEM_DataCopy1 - MEM_DataCopyJump) >> 1 - .byte (MEM_DataCopy2 - MEM_DataCopyJump) >> 1 - .byte (MEM_DataCopy3 - MEM_DataCopyJump) >> 1 - .byte (MEM_DataCopy4 - MEM_DataCopyJump) >> 1 - .byte (MEM_DataCopy5 - MEM_DataCopyJump) >> 1 - .byte (MEM_DataCopy6 - MEM_DataCopyJump) >> 1 - .byte (MEM_DataCopy7 - MEM_DataCopyJump) >> 1 - .byte (MEM_DataCopy8 - MEM_DataCopyJump) >> 1 - .byte (MEM_DataCopy9 - MEM_DataCopyJump) >> 1 - .byte (MEM_DataCopy10 - MEM_DataCopyJump) >> 1 - .byte (MEM_DataCopy11 - MEM_DataCopyJump) >> 1 - .byte (MEM_DataCopy12 - MEM_DataCopyJump) >> 1 - .byte (MEM_DataCopy13 - MEM_DataCopyJump) >> 1 - .byte (MEM_DataCopy14 - MEM_DataCopyJump) >> 1 - .byte (MEM_DataCopy15 - MEM_DataCopyJump) >> 1 - - .align 2 - -MEM_LongCopyTable: - .byte (MEM_LongCopyEnd - MEM_LongCopyJump) >> 1 /* 0 bytes left */ - .byte 0 /* 4 bytes left */ - .byte (1 * 10) >> 1 /* 8 bytes left */ - .byte (2 * 10) >> 1 /* 12 bytes left */ - .byte (3 * 10) >> 1 /* 16 bytes left */ - .byte (4 * 10) >> 1 /* 20 bytes left */ - .byte (5 * 10) >> 1 /* 24 bytes left */ - .byte (6 * 10) >> 1 /* 28 bytes left */ - .byte (7 * 10) >> 1 /* 32 bytes left */ - .byte (8 * 10) >> 1 /* 36 bytes left */ - - .align 2 - -/************************************************************************************ - * Public Functions - ************************************************************************************/ -/************************************************************************************ - * Name: memcpy - * - * Description: - * Optimized "general" copy routine - * - * Input Parameters: - * r0 = destination, r1 = source, r2 = length - * - * Returned Value: - * r0 = destination r1-r3 burned - * - ************************************************************************************/ - - .align 4 + .align 2 + .global memcpy + .thumb .thumb_func - + .type memcpy, %function memcpy: - push {r14} - push {r0} - bl _do_memcpy - pop {r0} - pop {pc} + @ r0: dst + @ r1: src + @ r2: len +#ifdef __ARM_FEATURE_UNALIGNED + /* In case of UNALIGNED access supported, ip is not used in + function body. */ + mov ip, r0 +#else + push {r0} +#endif + orr r3, r1, r0 + ands r3, r3, #3 + bne .Lmisaligned_copy - .align 4 +.Lbig_block: + subs r2, __OPT_BIG_BLOCK_SIZE + blo .Lmid_block - .thumb_func -_do_memcpy: - push {r14} + /* Kernel loop for big block copy */ + .align 2 +.Lbig_block_loop: + BEGIN_UNROLL_BIG_BLOCK +#ifdef __ARM_ARCH_7EM__ + ldr r3, [r1], #4 + str r3, [r0], #4 + END_UNROLL +#else /* __ARM_ARCH_7M__ */ + ldr r3, [r1, \offset] + str r3, [r0, \offset] + END_UNROLL + adds r0, __OPT_BIG_BLOCK_SIZE + adds r1, __OPT_BIG_BLOCK_SIZE +#endif + subs r2, __OPT_BIG_BLOCK_SIZE + bhs .Lbig_block_loop - /* This allows the inner workings to "assume" a minimum amount of bytes */ - /* Quickly check for very short copies */ +.Lmid_block: + adds r2, __OPT_BIG_BLOCK_SIZE - __OPT_MID_BLOCK_SIZE + blo .Lcopy_word_by_word - cmp r2, #4 - blt.n MEM_DataCopyBytes + /* Kernel loop for mid-block copy */ + .align 2 +.Lmid_block_loop: + BEGIN_UNROLL_MID_BLOCK +#ifdef __ARM_ARCH_7EM__ + ldr r3, [r1], #4 + str r3, [r0], #4 + END_UNROLL +#else /* __ARM_ARCH_7M__ */ + ldr r3, [r1, \offset] + str r3, [r0, \offset] + END_UNROLL + adds r0, __OPT_MID_BLOCK_SIZE + adds r1, __OPT_MID_BLOCK_SIZE +#endif + subs r2, __OPT_MID_BLOCK_SIZE + bhs .Lmid_block_loop - and r14, r0, #3 /* Get destination alignment bits */ - bfi r14, r1, #2, #2 /* Get source alignment bits */ - ldr r3, =MEM_DataCopyTable /* Jump table base */ - tbb [r3, r14] /* Perform jump on src/dst alignment bits */ -MEM_DataCopyJump: +.Lcopy_word_by_word: + adds r2, __OPT_MID_BLOCK_SIZE - 4 + blo .Lcopy_less_than_4 - .align 4 + /* Kernel loop for small block copy */ + .align 2 +.Lcopy_word_by_word_loop: + ldr r3, [r1], #4 + str r3, [r0], #4 + subs r2, #4 + bhs .Lcopy_word_by_word_loop -/* Bits: Src=01, Dst=01 - Byte before half word to byte before half word - Same alignment - * 3 bytes to read for long word aligning - */ +.Lcopy_less_than_4: + adds r2, #4 + beq .Ldone -MEM_DataCopy5: - ldrb r3, [r1], #0x01 - strb r3, [r0], #0x01 - sub r2, r2, #0x01 + lsls r2, r2, #31 + itt ne + ldrbne r3, [r1], #1 + strbne r3, [r0], #1 -/* Bits: Src=10, Dst=10 - Half word to half word - Same Alignment - * 2 bytes to read for long word aligning - */ + bcc .Ldone +#ifdef __ARM_FEATURE_UNALIGNED + ldrh r3, [r1] + strh r3, [r0] +#else + ldrb r3, [r1] + strb r3, [r0] + ldrb r3, [r1, #1] + strb r3, [r0, #1] +#endif /* __ARM_FEATURE_UNALIGNED */ -MEM_DataCopy10: - ldrb r3, [r1], #0x01 - strb r3, [r0], #0x01 - sub r2, r2, #0x01 +.Ldone: +#ifdef __ARM_FEATURE_UNALIGNED + mov r0, ip +#else + pop {r0} +#endif + bx lr -/* Bits: Src=11, Dst=11 - Byte before long word to Byte before long word - Same alignment - * 1 bytes to read for long word aligning - */ + .align 2 +.Lmisaligned_copy: +#ifdef __ARM_FEATURE_UNALIGNED + /* Define label DST_ALIGNED to BIG_BLOCK. It will go to aligned copy + once destination is adjusted to aligned. */ +#define Ldst_aligned Lbig_block -MEM_DataCopy15: - ldrb r3, [r1], #0x01 - strb r3, [r0], #0x01 - sub r2, r2, #0x01 + /* Copy word by word using LDR when alignment can be done in hardware, + i.e., SCTLR.A is set, supporting unaligned access in LDR and STR. */ -/* Bits: Src=00, Dst=00 - Long to Long copy */ + cmp r2, #8 + blo .Lbyte_copy -MEM_DataCopy0: - /* Save regs that may be used by memcpy */ + /* if src is aligned, just go to the big block loop. */ + lsls r3, r1, #30 + beq .Ldst_aligned +#else + /* if len < 12, misalignment adjustment has more overhead than + just byte-to-byte copy. Also, len must >=8 to guarantee code + afterward work correctly. */ + cmp r2, #12 + blo .Lbyte_copy +#endif /* __ARM_FEATURE_UNALIGNED */ - push {r4-r12} + /* Align dst only, not trying to align src. That is the because + handling of aligned src and misaligned dst need more overhead than + otherwise. By doing this the worst case is when initial src is aligned, + additional up to 4 byte additional copy will executed, which is + acceptable. */ - /* Check for short word-aligned copy */ + ands r3, r0, #3 + beq .Ldst_aligned - cmp r2, #0x28 - blt.n MEM_DataCopy0_2 + rsb r3, #4 + subs r2, r3 - /* Bulk copy loop */ + lsls r3, r3, #31 + itt ne + ldrbne r3, [r1], #1 + strbne r3, [r0], #1 -MEM_DataCopy0_1: - ldmia r1!, {r3-r12} - stmia r0!, {r3-r12} - sub r2, r2, #0x28 - cmp r2, #0x28 - bge.n MEM_DataCopy0_1 + bcc .Ldst_aligned - /* Copy remaining long words */ +#ifdef __ARM_FEATURE_UNALIGNED + ldrh r3, [r1], #2 + strh r3, [r0], #2 + b .Ldst_aligned +#else + ldrb r3, [r1], #1 + strb r3, [r0], #1 + ldrb r3, [r1], #1 + strb r3, [r0], #1 + /* Now that dst is aligned */ +.Ldst_aligned: + /* if r1 is aligned now, it means r0/r1 has the same misalignment, + and they are both aligned now. Go aligned copy. */ + ands r3, r1, #3 + beq .Lbig_block -MEM_DataCopy0_2: - /* Copy remaining long words */ + /* dst is aligned, but src isn't. Misaligned copy. */ - ldr r14, =MEM_LongCopyTable - lsr r11, r2, #0x02 - tbb [r14, r11] + push {r4, r5} + subs r2, #4 - /* longword copy branch table anchor */ + /* Backward r1 by misaligned bytes, to make r1 aligned. + Since we need to restore r1 to unaligned address after the loop, + we need keep the offset bytes to ip and sub it from r1 afterward. */ + subs r1, r3 + rsb ip, r3, #4 -MEM_LongCopyJump: - ldr.w r3, [r1], #0x04 /* 4 bytes remain */ - str.w r3, [r0], #0x04 - b.n MEM_LongCopyEnd - ldmia.w r1!, {r3-r4} /* 8 bytes remain */ - stmia.w r0!, {r3-r4} - b.n MEM_LongCopyEnd - ldmia.w r1!, {r3-r5} /* 12 bytes remain */ - stmia.w r0!, {r3-r5} - b.n MEM_LongCopyEnd - ldmia.w r1!, {r3-r6} /* 16 bytes remain */ - stmia.w r0!, {r3-r6} - b.n MEM_LongCopyEnd - ldmia.w r1!, {r3-r7} /* 20 bytes remain */ - stmia.w r0!, {r3-r7} - b.n MEM_LongCopyEnd - ldmia.w r1!, {r3-r8} /* 24 bytes remain */ - stmia.w r0!, {r3-r8} - b.n MEM_LongCopyEnd - ldmia.w r1!, {r3-r9} /* 28 bytes remain */ - stmia.w r0!, {r3-r9} - b.n MEM_LongCopyEnd - ldmia.w r1!, {r3-r10} /* 32 bytes remain */ - stmia.w r0!, {r3-r10} - b.n MEM_LongCopyEnd - ldmia.w r1!, {r3-r11} /* 36 bytes remain */ - stmia.w r0!, {r3-r11} + /* Pre-load on word */ + ldr r4, [r1], #4 -MEM_LongCopyEnd: - pop {r4-r12} - and r2, r2, #0x03 /* All the longs have been copied */ + cmp r3, #2 + beq .Lmisaligned_copy_2_2 + cmp r3, #3 + beq .Lmisaligned_copy_3_1 - /* Deal with up to 3 remaining bytes */ + .macro mis_src_copy shift +1: +#ifdef __ARM_BIG_ENDIAN + lsls r4, r4, \shift +#else + lsrs r4, r4, \shift +#endif + ldr r3, [r1], #4 +#ifdef __ARM_BIG_ENDIAN + lsrs r5, r3, 32-\shift +#else + lsls r5, r3, 32-\shift +#endif + orr r4, r4, r5 + str r4, [r0], #4 + mov r4, r3 + subs r2, #4 + bhs 1b + .endm -MEM_DataCopyBytes: - /* Deal with up to 3 remaining bytes */ +.Lmisaligned_copy_1_3: + mis_src_copy shift=8 + b .Lsrc_misaligned_tail - cmp r2, #0x00 - it eq - popeq {pc} - ldrb r3, [r1], #0x01 - strb r3, [r0], #0x01 - subs r2, r2, #0x01 - it eq - popeq {pc} - ldrb r3, [r1], #0x01 - strb r3, [r0], #0x01 - subs r2, r2, #0x01 - it eq - popeq {pc} - ldrb r3, [r1], #0x01 - strb r3, [r0], #0x01 - pop {pc} +.Lmisaligned_copy_3_1: + mis_src_copy shift=24 + b .Lsrc_misaligned_tail - .align 4 +.Lmisaligned_copy_2_2: + /* For 2_2 misalignment, ldr is still faster than 2 x ldrh. */ + mis_src_copy shift=16 -/* Bits: Src=01, Dst=11 - Byte before half word to byte before long word - * 3 bytes to read for long word aligning the source - */ +.Lsrc_misaligned_tail: + adds r2, #4 + subs r1, ip + pop {r4, r5} -MEM_DataCopy7: - ldrb r3, [r1], #0x01 - strb r3, [r0], #0x01 - sub r2, r2, #0x01 +#endif /* __ARM_FEATURE_UNALIGNED */ -/* Bits: Src=10, Dst=00 - Half word to long word - * 2 bytes to read for long word aligning the source - */ +.Lbyte_copy: + subs r2, #4 + blo .Lcopy_less_than_4 -MEM_DataCopy8: - ldrb r3, [r1], #0x01 - strb r3, [r0], #0x01 - sub r2, r2, #0x01 +.Lbyte_copy_loop: + subs r2, #1 + ldrb r3, [r1], #1 + strb r3, [r0], #1 + bhs .Lbyte_copy_loop -/* Bits: Src=11, Dst=01 - Byte before long word to byte before half word - * 1 byte to read for long word aligning the source - */ + ldrb r3, [r1] + strb r3, [r0] + ldrb r3, [r1, #1] + strb r3, [r0, #1] + ldrb r3, [r1, #2] + strb r3, [r0, #2] -MEM_DataCopy13: - ldrb r3, [r1], #0x01 - strb r3, [r0], #0x01 - sub r2, r2, #0x01 +#ifdef __ARM_FEATURE_UNALIGNED + mov r0, ip +#else + pop {r0} +#endif + bx lr -/* Bits: Src=00, Dst=10 - Long to Half word */ - -MEM_DataCopy2: - cmp r2, #0x28 - blt.n MEM_DataCopy2_1 - - /* Save regs */ - - push {r4-r12} - - /* Bulk copy loop */ - -MEM_DataCopy2_2: - ldmia r1!, {r3-r12} - - strh r3, [r0], #0x02 - - lsr r3, r3, #0x10 - bfi r3, r4, #0x10, #0x10 - lsr r4, r4, #0x10 - bfi r4, r5, #0x10, #0x10 - lsr r5, r5, #0x10 - bfi r5, r6, #0x10, #0x10 - lsr r6, r6, #0x10 - bfi r6, r7, #0x10, #0x10 - lsr r7, r7, #0x10 - bfi r7, r8, #0x10, #0x10 - lsr r8, r8, #0x10 - bfi r8, r9, #0x10, #0x10 - lsr r9, r9, #0x10 - bfi r9, r10, #0x10, #0x10 - lsr r10, r10, #0x10 - bfi r10, r11, #0x10, #0x10 - lsr r11, r11, #0x10 - bfi r11, r12, #0x10, #0x10 - stmia r0!, {r3-r11} - lsr r12, r12, #0x10 - strh r12, [r0], #0x02 - - sub r2, r2, #0x28 - cmp r2, #0x28 - bge.n MEM_DataCopy2_2 - pop {r4-r12} - -MEM_DataCopy2_1: /* Read longs and write 2 x half words */ - cmp r2, #4 - blt.n MEM_DataCopyBytes - ldr r3, [r1], #0x04 - strh r3, [r0], #0x02 - lsr r3, r3, #0x10 - strh r3, [r0], #0x02 - sub r2, r2, #0x04 - b.n MEM_DataCopy2 - -/* Bits: Src=01, Dst=00 - Byte before half word to long - * Bits: Src=01, Dst=10 - Byte before half word to half word - * 3 bytes to read for long word aligning the source - */ - -MEM_DataCopy4: -MEM_DataCopy6: - /* Read B and write B */ - - ldrb r3, [r1], #0x01 - strb r3, [r0], #0x01 - sub r2, r2, #0x01 - -/* Bits: Src=10, Dst=01 - Half word to byte before half word - * Bits: Src=10, Dst=11 - Half word to byte before long word - * 2 bytes to read for long word aligning the source - */ - -MEM_DataCopy9: -MEM_DataCopy11: - ldrb r3, [r1], #0x01 - strb r3, [r0], #0x01 - sub r2, r2, #0x01 - -/* Bits: Src=11, Dst=00 -chm Byte before long word to long word - * Bits: Src=11, Dst=11 - Byte before long word to half word - * 1 byte to read for long word aligning the source - */ - -MEM_DataCopy12: -MEM_DataCopy14: - /* Read B and write B */ - - ldrb r3, [r1], #0x01 - strb r3, [r0], #0x01 - sub r2, r2, #0x01 - -/* Bits: Src=00, Dst=01 - Long to Byte before half word - * Bits: Src=00, Dst=11 - Long to Byte before long word - */ - -MEM_DataCopy1: /* Read longs, write B->H->B */ -MEM_DataCopy3: - cmp r2, #4 - blt MEM_DataCopyBytes - ldr r3, [r1], #0x04 - strb r3, [r0], #0x01 - lsr r3, r3, #0x08 - strh r3, [r0], #0x02 - lsr r3, r3, #0x10 - strb r3, [r0], #0x01 - sub r2, r2, #0x04 - b.n MEM_DataCopy3 - - .size memcpy, .-memcpy - .end + .size memcpy, .-memcpy \ No newline at end of file