diff --git a/libc/machine/armv7-a/Kconfig b/libc/machine/armv7-a/Kconfig index d63d564e0d7..7a216e711cb 100644 --- a/libc/machine/armv7-a/Kconfig +++ b/libc/machine/armv7-a/Kconfig @@ -6,9 +6,6 @@ config ARMV7A_MEMCPY bool "Enable optimized memcpy() for ARMv7-A" select LIBC_ARCH_MEMCPY - depends on ARM_TOOLCHAIN_GNU && EXPERIMENTAL + depends on ARM_TOOLCHAIN_GNU ---help--- Enable optimized ARMv7-A specific memcpy() library function - - Marked EXPERIMENTAL because it did not build for me the last time I - tried. diff --git a/libc/machine/armv7-a/gnu/arch_memcpy.S b/libc/machine/armv7-a/gnu/arch_memcpy.S index f5078c7481a..4e1fa3f0a8b 100644 --- a/libc/machine/armv7-a/gnu/arch_memcpy.S +++ b/libc/machine/armv7-a/gnu/arch_memcpy.S @@ -58,15 +58,6 @@ ************************************************************************************/ .global memcpy - -#if defined(CONFIG_ARCH_CORTEXA5) - .cpu cortex-a5 -#elif defined(CONFIG_ARCH_CORTEXA8) - .cpu cortex-a8 -#elif defined(CONFIG_ARCH_CORTEXA9) - .cpu cortex-a9 -#endif - .syntax unified .file "arch_memcpy.S" @@ -104,16 +95,16 @@ */ MEM_DataCopyTable: - .byte (MEM_DataCopy0 - MEM_DataCopyJump) >> 1 - .byte (MEM_DataCopy1 - MEM_DataCopyJump) >> 1 - .byte (MEM_DataCopy2 - MEM_DataCopyJump) >> 1 - .byte (MEM_DataCopy3 - MEM_DataCopyJump) >> 1 - .byte (MEM_DataCopy4 - MEM_DataCopyJump) >> 1 - .byte (MEM_DataCopy5 - MEM_DataCopyJump) >> 1 - .byte (MEM_DataCopy6 - MEM_DataCopyJump) >> 1 - .byte (MEM_DataCopy7 - MEM_DataCopyJump) >> 1 - .byte (MEM_DataCopy8 - MEM_DataCopyJump) >> 1 - .byte (MEM_DataCopy9 - MEM_DataCopyJump) >> 1 + .byte (MEM_DataCopy0 - MEM_DataCopyJump) >> 1 + .byte (MEM_DataCopy1 - MEM_DataCopyJump) >> 1 + .byte (MEM_DataCopy2 - MEM_DataCopyJump) >> 1 + .byte (MEM_DataCopy3 - MEM_DataCopyJump) >> 1 + .byte (MEM_DataCopy4 - MEM_DataCopyJump) >> 1 + .byte (MEM_DataCopy5 - MEM_DataCopyJump) >> 1 + .byte (MEM_DataCopy6 - MEM_DataCopyJump) >> 1 + .byte (MEM_DataCopy7 - MEM_DataCopyJump) >> 1 + .byte (MEM_DataCopy8 - MEM_DataCopyJump) >> 1 + .byte (MEM_DataCopy9 - MEM_DataCopyJump) >> 1 .byte (MEM_DataCopy10 - MEM_DataCopyJump) >> 1 .byte (MEM_DataCopy11 - MEM_DataCopyJump) >> 1 .byte (MEM_DataCopy12 - MEM_DataCopyJump) >> 1 @@ -124,18 +115,16 @@ MEM_DataCopyTable: .align 2 MEM_LongCopyTable: - .byte (MEM_LongCopyEnd - MEM_LongCopyJump) >> 1 /* 0 bytes left */ - .byte 0 /* 4 bytes left */ - .byte (1 * 10) >> 1 /* 8 bytes left */ - .byte (2 * 10) >> 1 /* 12 bytes left */ - .byte (3 * 10) >> 1 /* 16 bytes left */ - .byte (4 * 10) >> 1 /* 20 bytes left */ - .byte (5 * 10) >> 1 /* 24 bytes left */ - .byte (6 * 10) >> 1 /* 28 bytes left */ - .byte (7 * 10) >> 1 /* 32 bytes left */ - .byte (8 * 10) >> 1 /* 36 bytes left */ - - .align 2 + .byte (MEM_LongCopyEnd - MEM_LongCopyJump) >> 1 /* 0 bytes left */ + .byte (MEM_LongCopyJump0 - MEM_LongCopyJump) >> 1 /* 4 bytes left */ + .byte (MEM_LongCopyJump1 - MEM_LongCopyJump) >> 1 /* 8 bytes left */ + .byte (MEM_LongCopyJump2 - MEM_LongCopyJump) >> 1 /* 12 bytes left */ + .byte (MEM_LongCopyJump3 - MEM_LongCopyJump) >> 1 /* 16 bytes left */ + .byte (MEM_LongCopyJump4 - MEM_LongCopyJump) >> 1 /* 20 bytes left */ + .byte (MEM_LongCopyJump5 - MEM_LongCopyJump) >> 1 /* 24 bytes left */ + .byte (MEM_LongCopyJump6 - MEM_LongCopyJump) >> 1 /* 28 bytes left */ + .byte (MEM_LongCopyJump7 - MEM_LongCopyJump) >> 1 /* 32 bytes left */ + .byte (MEM_LongCopyJump8 - MEM_LongCopyJump) >> 1 /* 36 bytes left */ /************************************************************************************ * Public Functions @@ -167,20 +156,27 @@ memcpy: _do_memcpy: push {r14} + push {r4} /* This allows the inner workings to "assume" a minimum amount of bytes */ /* Quickly check for very short copies */ cmp r2, #4 - blt.n MEM_DataCopyBytes + blt MEM_DataCopyBytes and r14, r0, #3 /* Get destination alignment bits */ bfi r14, r1, #2, #2 /* Get source alignment bits */ - ldr r3, =MEM_DataCopyTable /* Jump table base */ - tbb [r3, r14] /* Perform jump on src/dst alignment bits */ -MEM_DataCopyJump: + + ldr r3, =MEM_DataCopyTable /* Jump table base address */ + ldrb r4, [r3, r14] /* Hword offset for this alignment combination */ + ldr r3, =MEM_DataCopyJump /* Base of branch table anchor */ + add r3, r3, r4, lsl #1 /* Absolute address of logic */ + bx r3 + + /* data copy branch table anchor */ .align 4 +MEM_DataCopyJump: /* Bits: Src=01, Dst=01 - Byte before half word to byte before half word - Same alignment * 3 bytes to read for long word aligning @@ -214,12 +210,12 @@ MEM_DataCopy15: MEM_DataCopy0: /* Save regs that may be used by memcpy */ - push {r4-r12} + push {r5-r12} /* Check for short word-aligned copy */ cmp r2, #0x28 - blt.n MEM_DataCopy0_2 + blt MEM_DataCopy0_2 /* Bulk copy loop */ @@ -228,49 +224,69 @@ MEM_DataCopy0_1: stmia r0!, {r3-r12} sub r2, r2, #0x28 cmp r2, #0x28 - bge.n MEM_DataCopy0_1 + bge MEM_DataCopy0_1 /* Copy remaining long words */ MEM_DataCopy0_2: - /* Copy remaining long words */ - - ldr r14, =MEM_LongCopyTable - lsr r11, r2, #0x02 - tbb [r14, r11] + ldr r14, =MEM_LongCopyTable /* Jump table base address */ + lsr r11, r2, 2 /* Convert byte count to word count */ + add r14, r14, r11 /* Jump table offset address */ + ldrb r3, [r14] /* HWord offset from branch table anchor */ + ldr r11, =MEM_LongCopyJump /* Address of branch table anchor */ + add r11, r11, r3, lsl #1 /* Absolute address into branch table */ + bx r11 /* Go there */ /* longword copy branch table anchor */ MEM_LongCopyJump: - ldr.w r3, [r1], #0x04 /* 4 bytes remain */ - str.w r3, [r0], #0x04 - b.n MEM_LongCopyEnd - ldmia.w r1!, {r3-r4} /* 8 bytes remain */ - stmia.w r0!, {r3-r4} - b.n MEM_LongCopyEnd - ldmia.w r1!, {r3-r5} /* 12 bytes remain */ - stmia.w r0!, {r3-r5} - b.n MEM_LongCopyEnd - ldmia.w r1!, {r3-r6} /* 16 bytes remain */ - stmia.w r0!, {r3-r6} - b.n MEM_LongCopyEnd - ldmia.w r1!, {r3-r7} /* 20 bytes remain */ - stmia.w r0!, {r3-r7} - b.n MEM_LongCopyEnd - ldmia.w r1!, {r3-r8} /* 24 bytes remain */ - stmia.w r0!, {r3-r8} - b.n MEM_LongCopyEnd - ldmia.w r1!, {r3-r9} /* 28 bytes remain */ - stmia.w r0!, {r3-r9} - b.n MEM_LongCopyEnd - ldmia.w r1!, {r3-r10} /* 32 bytes remain */ - stmia.w r0!, {r3-r10} - b.n MEM_LongCopyEnd - ldmia.w r1!, {r3-r11} /* 36 bytes remain */ - stmia.w r0!, {r3-r11} + +MEM_LongCopyJump0: + ldr r3, [r1], #0x04 /* 4 bytes remain */ + str r3, [r0], #0x04 + b MEM_LongCopyEnd + +MEM_LongCopyJump1: + ldmia r1!, {r3-r4} /* 8 bytes remain */ + stmia r0!, {r3-r4} + b MEM_LongCopyEnd + +MEM_LongCopyJump2: + ldmia r1!, {r3-r5} /* 12 bytes remain */ + stmia r0!, {r3-r5} + b MEM_LongCopyEnd + +MEM_LongCopyJump3: + ldmia r1!, {r3-r6} /* 16 bytes remain */ + stmia r0!, {r3-r6} + b MEM_LongCopyEnd + +MEM_LongCopyJump4: + ldmia r1!, {r3-r7} /* 20 bytes remain */ + stmia r0!, {r3-r7} + b MEM_LongCopyEnd + +MEM_LongCopyJump5: + ldmia r1!, {r3-r8} /* 24 bytes remain */ + stmia r0!, {r3-r8} + b MEM_LongCopyEnd + +MEM_LongCopyJump6: + ldmia r1!, {r3-r9} /* 28 bytes remain */ + stmia r0!, {r3-r9} + b MEM_LongCopyEnd + +MEM_LongCopyJump7: + ldmia r1!, {r3-r10} /* 32 bytes remain */ + stmia r0!, {r3-r10} + b MEM_LongCopyEnd + +MEM_LongCopyJump8: + ldmia r1!, {r3-r11} /* 36 bytes remain */ + stmia r0!, {r3-r11} MEM_LongCopyEnd: - pop {r4-r12} + pop {r5-r12} and r2, r2, #0x03 /* All the longs have been copied */ /* Deal with up to 3 remaining bytes */ @@ -278,19 +294,23 @@ MEM_LongCopyEnd: MEM_DataCopyBytes: /* Deal with up to 3 remaining bytes */ + pop {r4} cmp r2, #0x00 it eq popeq {pc} + ldrb r3, [r1], #0x01 strb r3, [r0], #0x01 subs r2, r2, #0x01 it eq popeq {pc} + ldrb r3, [r1], #0x01 strb r3, [r0], #0x01 subs r2, r2, #0x01 it eq popeq {pc} + ldrb r3, [r1], #0x01 strb r3, [r0], #0x01 pop {pc} @@ -328,11 +348,11 @@ MEM_DataCopy13: MEM_DataCopy2: cmp r2, #0x28 - blt.n MEM_DataCopy2_1 + blt MEM_DataCopy2_1 /* Save regs */ - push {r4-r12} + push {r5-r12} /* Bulk copy loop */ @@ -365,18 +385,18 @@ MEM_DataCopy2_2: sub r2, r2, #0x28 cmp r2, #0x28 - bge.n MEM_DataCopy2_2 - pop {r4-r12} + bge MEM_DataCopy2_2 + pop {r5-r12} MEM_DataCopy2_1: /* Read longs and write 2 x half words */ cmp r2, #4 - blt.n MEM_DataCopyBytes + blt MEM_DataCopyBytes ldr r3, [r1], #0x04 strh r3, [r0], #0x02 lsr r3, r3, #0x10 strh r3, [r0], #0x02 sub r2, r2, #0x04 - b.n MEM_DataCopy2 + b MEM_DataCopy2 /* Bits: Src=01, Dst=00 - Byte before half word to long * Bits: Src=01, Dst=10 - Byte before half word to half word @@ -430,7 +450,7 @@ MEM_DataCopy3: lsr r3, r3, #0x10 strb r3, [r0], #0x01 sub r2, r2, #0x04 - b.n MEM_DataCopy3 + b MEM_DataCopy3 .size memcpy, .-memcpy .end diff --git a/libc/machine/armv7-m/gnu/arch_memcpy.S b/libc/machine/armv7-m/gnu/arch_memcpy.S index 2b93ee65044..791a925d20d 100644 --- a/libc/machine/armv7-m/gnu/arch_memcpy.S +++ b/libc/machine/armv7-m/gnu/arch_memcpy.S @@ -57,18 +57,8 @@ ************************************************************************************/ .global memcpy - .syntax unified .thumb - -#if defined(CONFIG_ARCH_CORTEXM3) - .cpu cortex-m3 -#elif defined(CONFIG_ARCH_CORTEXM4) - .cpu cortex-m4 -#elif defined(CONFIG_ARCH_CORTEXM7) - .cpu cortex-m7 -#endif - .file "arch_memcpy.S" /************************************************************************************ diff --git a/libc/machine/armv7-r/gnu/arch_memcpy.S b/libc/machine/armv7-r/gnu/arch_memcpy.S index 46bb835addc..1f6efcb8826 100644 --- a/libc/machine/armv7-r/gnu/arch_memcpy.S +++ b/libc/machine/armv7-r/gnu/arch_memcpy.S @@ -58,21 +58,6 @@ ************************************************************************************/ .global memcpy - -#if defined(CONFIG_ARCH_CORTEXR4) - .cpu cortex-r4 -#elif defined(CONFIG_ARCH_CORTEXR4F) - .cpu cortex-r4f -#elif defined(CONFIG_ARCH_CORTEXR5) - .cpu cortex-r5 -#elif defined(CONFIG_ARCH_CORTEXR6F) - .cpu cortex-r5f -#elif defined(CONFIG_ARCH_CORTEXR7) - .cpu cortex-r7 -#elif defined(CONFIG_ARCH_CORTEXR7F) - .cpu cortex-r7f -endif - .syntax unified .file "arch_memcpy.S" @@ -110,16 +95,16 @@ endif */ MEM_DataCopyTable: - .byte (MEM_DataCopy0 - MEM_DataCopyJump) >> 1 - .byte (MEM_DataCopy1 - MEM_DataCopyJump) >> 1 - .byte (MEM_DataCopy2 - MEM_DataCopyJump) >> 1 - .byte (MEM_DataCopy3 - MEM_DataCopyJump) >> 1 - .byte (MEM_DataCopy4 - MEM_DataCopyJump) >> 1 - .byte (MEM_DataCopy5 - MEM_DataCopyJump) >> 1 - .byte (MEM_DataCopy6 - MEM_DataCopyJump) >> 1 - .byte (MEM_DataCopy7 - MEM_DataCopyJump) >> 1 - .byte (MEM_DataCopy8 - MEM_DataCopyJump) >> 1 - .byte (MEM_DataCopy9 - MEM_DataCopyJump) >> 1 + .byte (MEM_DataCopy0 - MEM_DataCopyJump) >> 1 + .byte (MEM_DataCopy1 - MEM_DataCopyJump) >> 1 + .byte (MEM_DataCopy2 - MEM_DataCopyJump) >> 1 + .byte (MEM_DataCopy3 - MEM_DataCopyJump) >> 1 + .byte (MEM_DataCopy4 - MEM_DataCopyJump) >> 1 + .byte (MEM_DataCopy5 - MEM_DataCopyJump) >> 1 + .byte (MEM_DataCopy6 - MEM_DataCopyJump) >> 1 + .byte (MEM_DataCopy7 - MEM_DataCopyJump) >> 1 + .byte (MEM_DataCopy8 - MEM_DataCopyJump) >> 1 + .byte (MEM_DataCopy9 - MEM_DataCopyJump) >> 1 .byte (MEM_DataCopy10 - MEM_DataCopyJump) >> 1 .byte (MEM_DataCopy11 - MEM_DataCopyJump) >> 1 .byte (MEM_DataCopy12 - MEM_DataCopyJump) >> 1 @@ -130,18 +115,16 @@ MEM_DataCopyTable: .align 2 MEM_LongCopyTable: - .byte (MEM_LongCopyEnd - MEM_LongCopyJump) >> 1 /* 0 bytes left */ - .byte 0 /* 4 bytes left */ - .byte (1 * 10) >> 1 /* 8 bytes left */ - .byte (2 * 10) >> 1 /* 12 bytes left */ - .byte (3 * 10) >> 1 /* 16 bytes left */ - .byte (4 * 10) >> 1 /* 20 bytes left */ - .byte (5 * 10) >> 1 /* 24 bytes left */ - .byte (6 * 10) >> 1 /* 28 bytes left */ - .byte (7 * 10) >> 1 /* 32 bytes left */ - .byte (8 * 10) >> 1 /* 36 bytes left */ - - .align 2 + .byte (MEM_LongCopyEnd - MEM_LongCopyJump) >> 1 /* 0 bytes left */ + .byte (MEM_LongCopyJump0 - MEM_LongCopyJump) >> 1 /* 4 bytes left */ + .byte (MEM_LongCopyJump1 - MEM_LongCopyJump) >> 1 /* 8 bytes left */ + .byte (MEM_LongCopyJump2 - MEM_LongCopyJump) >> 1 /* 12 bytes left */ + .byte (MEM_LongCopyJump3 - MEM_LongCopyJump) >> 1 /* 16 bytes left */ + .byte (MEM_LongCopyJump4 - MEM_LongCopyJump) >> 1 /* 20 bytes left */ + .byte (MEM_LongCopyJump5 - MEM_LongCopyJump) >> 1 /* 24 bytes left */ + .byte (MEM_LongCopyJump6 - MEM_LongCopyJump) >> 1 /* 28 bytes left */ + .byte (MEM_LongCopyJump7 - MEM_LongCopyJump) >> 1 /* 32 bytes left */ + .byte (MEM_LongCopyJump8 - MEM_LongCopyJump) >> 1 /* 36 bytes left */ /************************************************************************************ * Public Functions @@ -173,20 +156,27 @@ memcpy: _do_memcpy: push {r14} + push {r4} /* This allows the inner workings to "assume" a minimum amount of bytes */ /* Quickly check for very short copies */ cmp r2, #4 - blt.n MEM_DataCopyBytes + blt MEM_DataCopyBytes and r14, r0, #3 /* Get destination alignment bits */ bfi r14, r1, #2, #2 /* Get source alignment bits */ - ldr r3, =MEM_DataCopyTable /* Jump table base */ - tbb [r3, r14] /* Perform jump on src/dst alignment bits */ -MEM_DataCopyJump: + + ldr r3, =MEM_DataCopyTable /* Jump table base address */ + ldrb r4, [r3, r14] /* Hword offset for this alignment combination */ + ldr r3, =MEM_DataCopyJump /* Base of branch table anchor */ + add r3, r3, r4, lsl #1 /* Absolute address of logic */ + bx r3 + + /* data copy branch table anchor */ .align 4 +MEM_DataCopyJump: /* Bits: Src=01, Dst=01 - Byte before half word to byte before half word - Same alignment * 3 bytes to read for long word aligning @@ -220,12 +210,12 @@ MEM_DataCopy15: MEM_DataCopy0: /* Save regs that may be used by memcpy */ - push {r4-r12} + push {r5-r12} /* Check for short word-aligned copy */ cmp r2, #0x28 - blt.n MEM_DataCopy0_2 + blt MEM_DataCopy0_2 /* Bulk copy loop */ @@ -234,49 +224,69 @@ MEM_DataCopy0_1: stmia r0!, {r3-r12} sub r2, r2, #0x28 cmp r2, #0x28 - bge.n MEM_DataCopy0_1 + bge MEM_DataCopy0_1 /* Copy remaining long words */ MEM_DataCopy0_2: - /* Copy remaining long words */ - - ldr r14, =MEM_LongCopyTable - lsr r11, r2, #0x02 - tbb [r14, r11] + ldr r14, =MEM_LongCopyTable /* Jump table base address */ + lsr r11, r2, 2 /* Convert byte count to word count */ + add r14, r14, r11 /* Jump table offset address */ + ldrb r3, [r14] /* HWord offset from branch table anchor */ + ldr r11, =MEM_LongCopyJump /* Address of branch table anchor */ + add r11, r11, r3, lsl #1 /* Absolute address into branch table */ + bx r11 /* Go there */ /* longword copy branch table anchor */ MEM_LongCopyJump: - ldr.w r3, [r1], #0x04 /* 4 bytes remain */ - str.w r3, [r0], #0x04 - b.n MEM_LongCopyEnd - ldmia.w r1!, {r3-r4} /* 8 bytes remain */ - stmia.w r0!, {r3-r4} - b.n MEM_LongCopyEnd - ldmia.w r1!, {r3-r5} /* 12 bytes remain */ - stmia.w r0!, {r3-r5} - b.n MEM_LongCopyEnd - ldmia.w r1!, {r3-r6} /* 16 bytes remain */ - stmia.w r0!, {r3-r6} - b.n MEM_LongCopyEnd - ldmia.w r1!, {r3-r7} /* 20 bytes remain */ - stmia.w r0!, {r3-r7} - b.n MEM_LongCopyEnd - ldmia.w r1!, {r3-r8} /* 24 bytes remain */ - stmia.w r0!, {r3-r8} - b.n MEM_LongCopyEnd - ldmia.w r1!, {r3-r9} /* 28 bytes remain */ - stmia.w r0!, {r3-r9} - b.n MEM_LongCopyEnd - ldmia.w r1!, {r3-r10} /* 32 bytes remain */ - stmia.w r0!, {r3-r10} - b.n MEM_LongCopyEnd - ldmia.w r1!, {r3-r11} /* 36 bytes remain */ - stmia.w r0!, {r3-r11} + +MEM_LongCopyJump0: + ldr r3, [r1], #0x04 /* 4 bytes remain */ + str r3, [r0], #0x04 + b MEM_LongCopyEnd + +MEM_LongCopyJump1: + ldmia r1!, {r3-r4} /* 8 bytes remain */ + stmia r0!, {r3-r4} + b MEM_LongCopyEnd + +MEM_LongCopyJump2: + ldmia r1!, {r3-r5} /* 12 bytes remain */ + stmia r0!, {r3-r5} + b MEM_LongCopyEnd + +MEM_LongCopyJump3: + ldmia r1!, {r3-r6} /* 16 bytes remain */ + stmia r0!, {r3-r6} + b MEM_LongCopyEnd + +MEM_LongCopyJump4: + ldmia r1!, {r3-r7} /* 20 bytes remain */ + stmia r0!, {r3-r7} + b MEM_LongCopyEnd + +MEM_LongCopyJump5: + ldmia r1!, {r3-r8} /* 24 bytes remain */ + stmia r0!, {r3-r8} + b MEM_LongCopyEnd + +MEM_LongCopyJump6: + ldmia r1!, {r3-r9} /* 28 bytes remain */ + stmia r0!, {r3-r9} + b MEM_LongCopyEnd + +MEM_LongCopyJump7: + ldmia r1!, {r3-r10} /* 32 bytes remain */ + stmia r0!, {r3-r10} + b MEM_LongCopyEnd + +MEM_LongCopyJump8: + ldmia r1!, {r3-r11} /* 36 bytes remain */ + stmia r0!, {r3-r11} MEM_LongCopyEnd: - pop {r4-r12} + pop {r5-r12} and r2, r2, #0x03 /* All the longs have been copied */ /* Deal with up to 3 remaining bytes */ @@ -284,19 +294,23 @@ MEM_LongCopyEnd: MEM_DataCopyBytes: /* Deal with up to 3 remaining bytes */ + pop {r4} cmp r2, #0x00 it eq popeq {pc} + ldrb r3, [r1], #0x01 strb r3, [r0], #0x01 subs r2, r2, #0x01 it eq popeq {pc} + ldrb r3, [r1], #0x01 strb r3, [r0], #0x01 subs r2, r2, #0x01 it eq popeq {pc} + ldrb r3, [r1], #0x01 strb r3, [r0], #0x01 pop {pc} @@ -334,11 +348,11 @@ MEM_DataCopy13: MEM_DataCopy2: cmp r2, #0x28 - blt.n MEM_DataCopy2_1 + blt MEM_DataCopy2_1 /* Save regs */ - push {r4-r12} + push {r5-r12} /* Bulk copy loop */ @@ -371,18 +385,18 @@ MEM_DataCopy2_2: sub r2, r2, #0x28 cmp r2, #0x28 - bge.n MEM_DataCopy2_2 - pop {r4-r12} + bge MEM_DataCopy2_2 + pop {r5-r12} MEM_DataCopy2_1: /* Read longs and write 2 x half words */ cmp r2, #4 - blt.n MEM_DataCopyBytes + blt MEM_DataCopyBytes ldr r3, [r1], #0x04 strh r3, [r0], #0x02 lsr r3, r3, #0x10 strh r3, [r0], #0x02 sub r2, r2, #0x04 - b.n MEM_DataCopy2 + b MEM_DataCopy2 /* Bits: Src=01, Dst=00 - Byte before half word to long * Bits: Src=01, Dst=10 - Byte before half word to half word @@ -436,7 +450,7 @@ MEM_DataCopy3: lsr r3, r3, #0x10 strb r3, [r0], #0x01 sub r2, r2, #0x04 - b.n MEM_DataCopy3 + b MEM_DataCopy3 .size memcpy, .-memcpy .end