mirror of
https://github.com/apache/nuttx.git
synced 2026-06-07 01:05:54 +08:00
libc: Fix ARMv7-A/R memcpy assembly.
This commit is contained in:
@@ -6,9 +6,6 @@
|
||||
config ARMV7A_MEMCPY
|
||||
bool "Enable optimized memcpy() for ARMv7-A"
|
||||
select LIBC_ARCH_MEMCPY
|
||||
depends on ARM_TOOLCHAIN_GNU && EXPERIMENTAL
|
||||
depends on ARM_TOOLCHAIN_GNU
|
||||
---help---
|
||||
Enable optimized ARMv7-A specific memcpy() library function
|
||||
|
||||
Marked EXPERIMENTAL because it did not build for me the last time I
|
||||
tried.
|
||||
|
||||
@@ -58,15 +58,6 @@
|
||||
************************************************************************************/
|
||||
|
||||
.global memcpy
|
||||
|
||||
#if defined(CONFIG_ARCH_CORTEXA5)
|
||||
.cpu cortex-a5
|
||||
#elif defined(CONFIG_ARCH_CORTEXA8)
|
||||
.cpu cortex-a8
|
||||
#elif defined(CONFIG_ARCH_CORTEXA9)
|
||||
.cpu cortex-a9
|
||||
#endif
|
||||
|
||||
.syntax unified
|
||||
.file "arch_memcpy.S"
|
||||
|
||||
@@ -104,16 +95,16 @@
|
||||
*/
|
||||
|
||||
MEM_DataCopyTable:
|
||||
.byte (MEM_DataCopy0 - MEM_DataCopyJump) >> 1
|
||||
.byte (MEM_DataCopy1 - MEM_DataCopyJump) >> 1
|
||||
.byte (MEM_DataCopy2 - MEM_DataCopyJump) >> 1
|
||||
.byte (MEM_DataCopy3 - MEM_DataCopyJump) >> 1
|
||||
.byte (MEM_DataCopy4 - MEM_DataCopyJump) >> 1
|
||||
.byte (MEM_DataCopy5 - MEM_DataCopyJump) >> 1
|
||||
.byte (MEM_DataCopy6 - MEM_DataCopyJump) >> 1
|
||||
.byte (MEM_DataCopy7 - MEM_DataCopyJump) >> 1
|
||||
.byte (MEM_DataCopy8 - MEM_DataCopyJump) >> 1
|
||||
.byte (MEM_DataCopy9 - MEM_DataCopyJump) >> 1
|
||||
.byte (MEM_DataCopy0 - MEM_DataCopyJump) >> 1
|
||||
.byte (MEM_DataCopy1 - MEM_DataCopyJump) >> 1
|
||||
.byte (MEM_DataCopy2 - MEM_DataCopyJump) >> 1
|
||||
.byte (MEM_DataCopy3 - MEM_DataCopyJump) >> 1
|
||||
.byte (MEM_DataCopy4 - MEM_DataCopyJump) >> 1
|
||||
.byte (MEM_DataCopy5 - MEM_DataCopyJump) >> 1
|
||||
.byte (MEM_DataCopy6 - MEM_DataCopyJump) >> 1
|
||||
.byte (MEM_DataCopy7 - MEM_DataCopyJump) >> 1
|
||||
.byte (MEM_DataCopy8 - MEM_DataCopyJump) >> 1
|
||||
.byte (MEM_DataCopy9 - MEM_DataCopyJump) >> 1
|
||||
.byte (MEM_DataCopy10 - MEM_DataCopyJump) >> 1
|
||||
.byte (MEM_DataCopy11 - MEM_DataCopyJump) >> 1
|
||||
.byte (MEM_DataCopy12 - MEM_DataCopyJump) >> 1
|
||||
@@ -124,18 +115,16 @@ MEM_DataCopyTable:
|
||||
.align 2
|
||||
|
||||
MEM_LongCopyTable:
|
||||
.byte (MEM_LongCopyEnd - MEM_LongCopyJump) >> 1 /* 0 bytes left */
|
||||
.byte 0 /* 4 bytes left */
|
||||
.byte (1 * 10) >> 1 /* 8 bytes left */
|
||||
.byte (2 * 10) >> 1 /* 12 bytes left */
|
||||
.byte (3 * 10) >> 1 /* 16 bytes left */
|
||||
.byte (4 * 10) >> 1 /* 20 bytes left */
|
||||
.byte (5 * 10) >> 1 /* 24 bytes left */
|
||||
.byte (6 * 10) >> 1 /* 28 bytes left */
|
||||
.byte (7 * 10) >> 1 /* 32 bytes left */
|
||||
.byte (8 * 10) >> 1 /* 36 bytes left */
|
||||
|
||||
.align 2
|
||||
.byte (MEM_LongCopyEnd - MEM_LongCopyJump) >> 1 /* 0 bytes left */
|
||||
.byte (MEM_LongCopyJump0 - MEM_LongCopyJump) >> 1 /* 4 bytes left */
|
||||
.byte (MEM_LongCopyJump1 - MEM_LongCopyJump) >> 1 /* 8 bytes left */
|
||||
.byte (MEM_LongCopyJump2 - MEM_LongCopyJump) >> 1 /* 12 bytes left */
|
||||
.byte (MEM_LongCopyJump3 - MEM_LongCopyJump) >> 1 /* 16 bytes left */
|
||||
.byte (MEM_LongCopyJump4 - MEM_LongCopyJump) >> 1 /* 20 bytes left */
|
||||
.byte (MEM_LongCopyJump5 - MEM_LongCopyJump) >> 1 /* 24 bytes left */
|
||||
.byte (MEM_LongCopyJump6 - MEM_LongCopyJump) >> 1 /* 28 bytes left */
|
||||
.byte (MEM_LongCopyJump7 - MEM_LongCopyJump) >> 1 /* 32 bytes left */
|
||||
.byte (MEM_LongCopyJump8 - MEM_LongCopyJump) >> 1 /* 36 bytes left */
|
||||
|
||||
/************************************************************************************
|
||||
* Public Functions
|
||||
@@ -167,20 +156,27 @@ memcpy:
|
||||
|
||||
_do_memcpy:
|
||||
push {r14}
|
||||
push {r4}
|
||||
|
||||
/* This allows the inner workings to "assume" a minimum amount of bytes */
|
||||
/* Quickly check for very short copies */
|
||||
|
||||
cmp r2, #4
|
||||
blt.n MEM_DataCopyBytes
|
||||
blt MEM_DataCopyBytes
|
||||
|
||||
and r14, r0, #3 /* Get destination alignment bits */
|
||||
bfi r14, r1, #2, #2 /* Get source alignment bits */
|
||||
ldr r3, =MEM_DataCopyTable /* Jump table base */
|
||||
tbb [r3, r14] /* Perform jump on src/dst alignment bits */
|
||||
MEM_DataCopyJump:
|
||||
|
||||
ldr r3, =MEM_DataCopyTable /* Jump table base address */
|
||||
ldrb r4, [r3, r14] /* Hword offset for this alignment combination */
|
||||
ldr r3, =MEM_DataCopyJump /* Base of branch table anchor */
|
||||
add r3, r3, r4, lsl #1 /* Absolute address of logic */
|
||||
bx r3
|
||||
|
||||
/* data copy branch table anchor */
|
||||
|
||||
.align 4
|
||||
MEM_DataCopyJump:
|
||||
|
||||
/* Bits: Src=01, Dst=01 - Byte before half word to byte before half word - Same alignment
|
||||
* 3 bytes to read for long word aligning
|
||||
@@ -214,12 +210,12 @@ MEM_DataCopy15:
|
||||
MEM_DataCopy0:
|
||||
/* Save regs that may be used by memcpy */
|
||||
|
||||
push {r4-r12}
|
||||
push {r5-r12}
|
||||
|
||||
/* Check for short word-aligned copy */
|
||||
|
||||
cmp r2, #0x28
|
||||
blt.n MEM_DataCopy0_2
|
||||
blt MEM_DataCopy0_2
|
||||
|
||||
/* Bulk copy loop */
|
||||
|
||||
@@ -228,49 +224,69 @@ MEM_DataCopy0_1:
|
||||
stmia r0!, {r3-r12}
|
||||
sub r2, r2, #0x28
|
||||
cmp r2, #0x28
|
||||
bge.n MEM_DataCopy0_1
|
||||
bge MEM_DataCopy0_1
|
||||
|
||||
/* Copy remaining long words */
|
||||
|
||||
MEM_DataCopy0_2:
|
||||
/* Copy remaining long words */
|
||||
|
||||
ldr r14, =MEM_LongCopyTable
|
||||
lsr r11, r2, #0x02
|
||||
tbb [r14, r11]
|
||||
ldr r14, =MEM_LongCopyTable /* Jump table base address */
|
||||
lsr r11, r2, 2 /* Convert byte count to word count */
|
||||
add r14, r14, r11 /* Jump table offset address */
|
||||
ldrb r3, [r14] /* HWord offset from branch table anchor */
|
||||
ldr r11, =MEM_LongCopyJump /* Address of branch table anchor */
|
||||
add r11, r11, r3, lsl #1 /* Absolute address into branch table */
|
||||
bx r11 /* Go there */
|
||||
|
||||
/* longword copy branch table anchor */
|
||||
|
||||
MEM_LongCopyJump:
|
||||
ldr.w r3, [r1], #0x04 /* 4 bytes remain */
|
||||
str.w r3, [r0], #0x04
|
||||
b.n MEM_LongCopyEnd
|
||||
ldmia.w r1!, {r3-r4} /* 8 bytes remain */
|
||||
stmia.w r0!, {r3-r4}
|
||||
b.n MEM_LongCopyEnd
|
||||
ldmia.w r1!, {r3-r5} /* 12 bytes remain */
|
||||
stmia.w r0!, {r3-r5}
|
||||
b.n MEM_LongCopyEnd
|
||||
ldmia.w r1!, {r3-r6} /* 16 bytes remain */
|
||||
stmia.w r0!, {r3-r6}
|
||||
b.n MEM_LongCopyEnd
|
||||
ldmia.w r1!, {r3-r7} /* 20 bytes remain */
|
||||
stmia.w r0!, {r3-r7}
|
||||
b.n MEM_LongCopyEnd
|
||||
ldmia.w r1!, {r3-r8} /* 24 bytes remain */
|
||||
stmia.w r0!, {r3-r8}
|
||||
b.n MEM_LongCopyEnd
|
||||
ldmia.w r1!, {r3-r9} /* 28 bytes remain */
|
||||
stmia.w r0!, {r3-r9}
|
||||
b.n MEM_LongCopyEnd
|
||||
ldmia.w r1!, {r3-r10} /* 32 bytes remain */
|
||||
stmia.w r0!, {r3-r10}
|
||||
b.n MEM_LongCopyEnd
|
||||
ldmia.w r1!, {r3-r11} /* 36 bytes remain */
|
||||
stmia.w r0!, {r3-r11}
|
||||
|
||||
MEM_LongCopyJump0:
|
||||
ldr r3, [r1], #0x04 /* 4 bytes remain */
|
||||
str r3, [r0], #0x04
|
||||
b MEM_LongCopyEnd
|
||||
|
||||
MEM_LongCopyJump1:
|
||||
ldmia r1!, {r3-r4} /* 8 bytes remain */
|
||||
stmia r0!, {r3-r4}
|
||||
b MEM_LongCopyEnd
|
||||
|
||||
MEM_LongCopyJump2:
|
||||
ldmia r1!, {r3-r5} /* 12 bytes remain */
|
||||
stmia r0!, {r3-r5}
|
||||
b MEM_LongCopyEnd
|
||||
|
||||
MEM_LongCopyJump3:
|
||||
ldmia r1!, {r3-r6} /* 16 bytes remain */
|
||||
stmia r0!, {r3-r6}
|
||||
b MEM_LongCopyEnd
|
||||
|
||||
MEM_LongCopyJump4:
|
||||
ldmia r1!, {r3-r7} /* 20 bytes remain */
|
||||
stmia r0!, {r3-r7}
|
||||
b MEM_LongCopyEnd
|
||||
|
||||
MEM_LongCopyJump5:
|
||||
ldmia r1!, {r3-r8} /* 24 bytes remain */
|
||||
stmia r0!, {r3-r8}
|
||||
b MEM_LongCopyEnd
|
||||
|
||||
MEM_LongCopyJump6:
|
||||
ldmia r1!, {r3-r9} /* 28 bytes remain */
|
||||
stmia r0!, {r3-r9}
|
||||
b MEM_LongCopyEnd
|
||||
|
||||
MEM_LongCopyJump7:
|
||||
ldmia r1!, {r3-r10} /* 32 bytes remain */
|
||||
stmia r0!, {r3-r10}
|
||||
b MEM_LongCopyEnd
|
||||
|
||||
MEM_LongCopyJump8:
|
||||
ldmia r1!, {r3-r11} /* 36 bytes remain */
|
||||
stmia r0!, {r3-r11}
|
||||
|
||||
MEM_LongCopyEnd:
|
||||
pop {r4-r12}
|
||||
pop {r5-r12}
|
||||
and r2, r2, #0x03 /* All the longs have been copied */
|
||||
|
||||
/* Deal with up to 3 remaining bytes */
|
||||
@@ -278,19 +294,23 @@ MEM_LongCopyEnd:
|
||||
MEM_DataCopyBytes:
|
||||
/* Deal with up to 3 remaining bytes */
|
||||
|
||||
pop {r4}
|
||||
cmp r2, #0x00
|
||||
it eq
|
||||
popeq {pc}
|
||||
|
||||
ldrb r3, [r1], #0x01
|
||||
strb r3, [r0], #0x01
|
||||
subs r2, r2, #0x01
|
||||
it eq
|
||||
popeq {pc}
|
||||
|
||||
ldrb r3, [r1], #0x01
|
||||
strb r3, [r0], #0x01
|
||||
subs r2, r2, #0x01
|
||||
it eq
|
||||
popeq {pc}
|
||||
|
||||
ldrb r3, [r1], #0x01
|
||||
strb r3, [r0], #0x01
|
||||
pop {pc}
|
||||
@@ -328,11 +348,11 @@ MEM_DataCopy13:
|
||||
|
||||
MEM_DataCopy2:
|
||||
cmp r2, #0x28
|
||||
blt.n MEM_DataCopy2_1
|
||||
blt MEM_DataCopy2_1
|
||||
|
||||
/* Save regs */
|
||||
|
||||
push {r4-r12}
|
||||
push {r5-r12}
|
||||
|
||||
/* Bulk copy loop */
|
||||
|
||||
@@ -365,18 +385,18 @@ MEM_DataCopy2_2:
|
||||
|
||||
sub r2, r2, #0x28
|
||||
cmp r2, #0x28
|
||||
bge.n MEM_DataCopy2_2
|
||||
pop {r4-r12}
|
||||
bge MEM_DataCopy2_2
|
||||
pop {r5-r12}
|
||||
|
||||
MEM_DataCopy2_1: /* Read longs and write 2 x half words */
|
||||
cmp r2, #4
|
||||
blt.n MEM_DataCopyBytes
|
||||
blt MEM_DataCopyBytes
|
||||
ldr r3, [r1], #0x04
|
||||
strh r3, [r0], #0x02
|
||||
lsr r3, r3, #0x10
|
||||
strh r3, [r0], #0x02
|
||||
sub r2, r2, #0x04
|
||||
b.n MEM_DataCopy2
|
||||
b MEM_DataCopy2
|
||||
|
||||
/* Bits: Src=01, Dst=00 - Byte before half word to long
|
||||
* Bits: Src=01, Dst=10 - Byte before half word to half word
|
||||
@@ -430,7 +450,7 @@ MEM_DataCopy3:
|
||||
lsr r3, r3, #0x10
|
||||
strb r3, [r0], #0x01
|
||||
sub r2, r2, #0x04
|
||||
b.n MEM_DataCopy3
|
||||
b MEM_DataCopy3
|
||||
|
||||
.size memcpy, .-memcpy
|
||||
.end
|
||||
|
||||
@@ -57,18 +57,8 @@
|
||||
************************************************************************************/
|
||||
|
||||
.global memcpy
|
||||
|
||||
.syntax unified
|
||||
.thumb
|
||||
|
||||
#if defined(CONFIG_ARCH_CORTEXM3)
|
||||
.cpu cortex-m3
|
||||
#elif defined(CONFIG_ARCH_CORTEXM4)
|
||||
.cpu cortex-m4
|
||||
#elif defined(CONFIG_ARCH_CORTEXM7)
|
||||
.cpu cortex-m7
|
||||
#endif
|
||||
|
||||
.file "arch_memcpy.S"
|
||||
|
||||
/************************************************************************************
|
||||
|
||||
@@ -58,21 +58,6 @@
|
||||
************************************************************************************/
|
||||
|
||||
.global memcpy
|
||||
|
||||
#if defined(CONFIG_ARCH_CORTEXR4)
|
||||
.cpu cortex-r4
|
||||
#elif defined(CONFIG_ARCH_CORTEXR4F)
|
||||
.cpu cortex-r4f
|
||||
#elif defined(CONFIG_ARCH_CORTEXR5)
|
||||
.cpu cortex-r5
|
||||
#elif defined(CONFIG_ARCH_CORTEXR6F)
|
||||
.cpu cortex-r5f
|
||||
#elif defined(CONFIG_ARCH_CORTEXR7)
|
||||
.cpu cortex-r7
|
||||
#elif defined(CONFIG_ARCH_CORTEXR7F)
|
||||
.cpu cortex-r7f
|
||||
endif
|
||||
|
||||
.syntax unified
|
||||
.file "arch_memcpy.S"
|
||||
|
||||
@@ -110,16 +95,16 @@ endif
|
||||
*/
|
||||
|
||||
MEM_DataCopyTable:
|
||||
.byte (MEM_DataCopy0 - MEM_DataCopyJump) >> 1
|
||||
.byte (MEM_DataCopy1 - MEM_DataCopyJump) >> 1
|
||||
.byte (MEM_DataCopy2 - MEM_DataCopyJump) >> 1
|
||||
.byte (MEM_DataCopy3 - MEM_DataCopyJump) >> 1
|
||||
.byte (MEM_DataCopy4 - MEM_DataCopyJump) >> 1
|
||||
.byte (MEM_DataCopy5 - MEM_DataCopyJump) >> 1
|
||||
.byte (MEM_DataCopy6 - MEM_DataCopyJump) >> 1
|
||||
.byte (MEM_DataCopy7 - MEM_DataCopyJump) >> 1
|
||||
.byte (MEM_DataCopy8 - MEM_DataCopyJump) >> 1
|
||||
.byte (MEM_DataCopy9 - MEM_DataCopyJump) >> 1
|
||||
.byte (MEM_DataCopy0 - MEM_DataCopyJump) >> 1
|
||||
.byte (MEM_DataCopy1 - MEM_DataCopyJump) >> 1
|
||||
.byte (MEM_DataCopy2 - MEM_DataCopyJump) >> 1
|
||||
.byte (MEM_DataCopy3 - MEM_DataCopyJump) >> 1
|
||||
.byte (MEM_DataCopy4 - MEM_DataCopyJump) >> 1
|
||||
.byte (MEM_DataCopy5 - MEM_DataCopyJump) >> 1
|
||||
.byte (MEM_DataCopy6 - MEM_DataCopyJump) >> 1
|
||||
.byte (MEM_DataCopy7 - MEM_DataCopyJump) >> 1
|
||||
.byte (MEM_DataCopy8 - MEM_DataCopyJump) >> 1
|
||||
.byte (MEM_DataCopy9 - MEM_DataCopyJump) >> 1
|
||||
.byte (MEM_DataCopy10 - MEM_DataCopyJump) >> 1
|
||||
.byte (MEM_DataCopy11 - MEM_DataCopyJump) >> 1
|
||||
.byte (MEM_DataCopy12 - MEM_DataCopyJump) >> 1
|
||||
@@ -130,18 +115,16 @@ MEM_DataCopyTable:
|
||||
.align 2
|
||||
|
||||
MEM_LongCopyTable:
|
||||
.byte (MEM_LongCopyEnd - MEM_LongCopyJump) >> 1 /* 0 bytes left */
|
||||
.byte 0 /* 4 bytes left */
|
||||
.byte (1 * 10) >> 1 /* 8 bytes left */
|
||||
.byte (2 * 10) >> 1 /* 12 bytes left */
|
||||
.byte (3 * 10) >> 1 /* 16 bytes left */
|
||||
.byte (4 * 10) >> 1 /* 20 bytes left */
|
||||
.byte (5 * 10) >> 1 /* 24 bytes left */
|
||||
.byte (6 * 10) >> 1 /* 28 bytes left */
|
||||
.byte (7 * 10) >> 1 /* 32 bytes left */
|
||||
.byte (8 * 10) >> 1 /* 36 bytes left */
|
||||
|
||||
.align 2
|
||||
.byte (MEM_LongCopyEnd - MEM_LongCopyJump) >> 1 /* 0 bytes left */
|
||||
.byte (MEM_LongCopyJump0 - MEM_LongCopyJump) >> 1 /* 4 bytes left */
|
||||
.byte (MEM_LongCopyJump1 - MEM_LongCopyJump) >> 1 /* 8 bytes left */
|
||||
.byte (MEM_LongCopyJump2 - MEM_LongCopyJump) >> 1 /* 12 bytes left */
|
||||
.byte (MEM_LongCopyJump3 - MEM_LongCopyJump) >> 1 /* 16 bytes left */
|
||||
.byte (MEM_LongCopyJump4 - MEM_LongCopyJump) >> 1 /* 20 bytes left */
|
||||
.byte (MEM_LongCopyJump5 - MEM_LongCopyJump) >> 1 /* 24 bytes left */
|
||||
.byte (MEM_LongCopyJump6 - MEM_LongCopyJump) >> 1 /* 28 bytes left */
|
||||
.byte (MEM_LongCopyJump7 - MEM_LongCopyJump) >> 1 /* 32 bytes left */
|
||||
.byte (MEM_LongCopyJump8 - MEM_LongCopyJump) >> 1 /* 36 bytes left */
|
||||
|
||||
/************************************************************************************
|
||||
* Public Functions
|
||||
@@ -173,20 +156,27 @@ memcpy:
|
||||
|
||||
_do_memcpy:
|
||||
push {r14}
|
||||
push {r4}
|
||||
|
||||
/* This allows the inner workings to "assume" a minimum amount of bytes */
|
||||
/* Quickly check for very short copies */
|
||||
|
||||
cmp r2, #4
|
||||
blt.n MEM_DataCopyBytes
|
||||
blt MEM_DataCopyBytes
|
||||
|
||||
and r14, r0, #3 /* Get destination alignment bits */
|
||||
bfi r14, r1, #2, #2 /* Get source alignment bits */
|
||||
ldr r3, =MEM_DataCopyTable /* Jump table base */
|
||||
tbb [r3, r14] /* Perform jump on src/dst alignment bits */
|
||||
MEM_DataCopyJump:
|
||||
|
||||
ldr r3, =MEM_DataCopyTable /* Jump table base address */
|
||||
ldrb r4, [r3, r14] /* Hword offset for this alignment combination */
|
||||
ldr r3, =MEM_DataCopyJump /* Base of branch table anchor */
|
||||
add r3, r3, r4, lsl #1 /* Absolute address of logic */
|
||||
bx r3
|
||||
|
||||
/* data copy branch table anchor */
|
||||
|
||||
.align 4
|
||||
MEM_DataCopyJump:
|
||||
|
||||
/* Bits: Src=01, Dst=01 - Byte before half word to byte before half word - Same alignment
|
||||
* 3 bytes to read for long word aligning
|
||||
@@ -220,12 +210,12 @@ MEM_DataCopy15:
|
||||
MEM_DataCopy0:
|
||||
/* Save regs that may be used by memcpy */
|
||||
|
||||
push {r4-r12}
|
||||
push {r5-r12}
|
||||
|
||||
/* Check for short word-aligned copy */
|
||||
|
||||
cmp r2, #0x28
|
||||
blt.n MEM_DataCopy0_2
|
||||
blt MEM_DataCopy0_2
|
||||
|
||||
/* Bulk copy loop */
|
||||
|
||||
@@ -234,49 +224,69 @@ MEM_DataCopy0_1:
|
||||
stmia r0!, {r3-r12}
|
||||
sub r2, r2, #0x28
|
||||
cmp r2, #0x28
|
||||
bge.n MEM_DataCopy0_1
|
||||
bge MEM_DataCopy0_1
|
||||
|
||||
/* Copy remaining long words */
|
||||
|
||||
MEM_DataCopy0_2:
|
||||
/* Copy remaining long words */
|
||||
|
||||
ldr r14, =MEM_LongCopyTable
|
||||
lsr r11, r2, #0x02
|
||||
tbb [r14, r11]
|
||||
ldr r14, =MEM_LongCopyTable /* Jump table base address */
|
||||
lsr r11, r2, 2 /* Convert byte count to word count */
|
||||
add r14, r14, r11 /* Jump table offset address */
|
||||
ldrb r3, [r14] /* HWord offset from branch table anchor */
|
||||
ldr r11, =MEM_LongCopyJump /* Address of branch table anchor */
|
||||
add r11, r11, r3, lsl #1 /* Absolute address into branch table */
|
||||
bx r11 /* Go there */
|
||||
|
||||
/* longword copy branch table anchor */
|
||||
|
||||
MEM_LongCopyJump:
|
||||
ldr.w r3, [r1], #0x04 /* 4 bytes remain */
|
||||
str.w r3, [r0], #0x04
|
||||
b.n MEM_LongCopyEnd
|
||||
ldmia.w r1!, {r3-r4} /* 8 bytes remain */
|
||||
stmia.w r0!, {r3-r4}
|
||||
b.n MEM_LongCopyEnd
|
||||
ldmia.w r1!, {r3-r5} /* 12 bytes remain */
|
||||
stmia.w r0!, {r3-r5}
|
||||
b.n MEM_LongCopyEnd
|
||||
ldmia.w r1!, {r3-r6} /* 16 bytes remain */
|
||||
stmia.w r0!, {r3-r6}
|
||||
b.n MEM_LongCopyEnd
|
||||
ldmia.w r1!, {r3-r7} /* 20 bytes remain */
|
||||
stmia.w r0!, {r3-r7}
|
||||
b.n MEM_LongCopyEnd
|
||||
ldmia.w r1!, {r3-r8} /* 24 bytes remain */
|
||||
stmia.w r0!, {r3-r8}
|
||||
b.n MEM_LongCopyEnd
|
||||
ldmia.w r1!, {r3-r9} /* 28 bytes remain */
|
||||
stmia.w r0!, {r3-r9}
|
||||
b.n MEM_LongCopyEnd
|
||||
ldmia.w r1!, {r3-r10} /* 32 bytes remain */
|
||||
stmia.w r0!, {r3-r10}
|
||||
b.n MEM_LongCopyEnd
|
||||
ldmia.w r1!, {r3-r11} /* 36 bytes remain */
|
||||
stmia.w r0!, {r3-r11}
|
||||
|
||||
MEM_LongCopyJump0:
|
||||
ldr r3, [r1], #0x04 /* 4 bytes remain */
|
||||
str r3, [r0], #0x04
|
||||
b MEM_LongCopyEnd
|
||||
|
||||
MEM_LongCopyJump1:
|
||||
ldmia r1!, {r3-r4} /* 8 bytes remain */
|
||||
stmia r0!, {r3-r4}
|
||||
b MEM_LongCopyEnd
|
||||
|
||||
MEM_LongCopyJump2:
|
||||
ldmia r1!, {r3-r5} /* 12 bytes remain */
|
||||
stmia r0!, {r3-r5}
|
||||
b MEM_LongCopyEnd
|
||||
|
||||
MEM_LongCopyJump3:
|
||||
ldmia r1!, {r3-r6} /* 16 bytes remain */
|
||||
stmia r0!, {r3-r6}
|
||||
b MEM_LongCopyEnd
|
||||
|
||||
MEM_LongCopyJump4:
|
||||
ldmia r1!, {r3-r7} /* 20 bytes remain */
|
||||
stmia r0!, {r3-r7}
|
||||
b MEM_LongCopyEnd
|
||||
|
||||
MEM_LongCopyJump5:
|
||||
ldmia r1!, {r3-r8} /* 24 bytes remain */
|
||||
stmia r0!, {r3-r8}
|
||||
b MEM_LongCopyEnd
|
||||
|
||||
MEM_LongCopyJump6:
|
||||
ldmia r1!, {r3-r9} /* 28 bytes remain */
|
||||
stmia r0!, {r3-r9}
|
||||
b MEM_LongCopyEnd
|
||||
|
||||
MEM_LongCopyJump7:
|
||||
ldmia r1!, {r3-r10} /* 32 bytes remain */
|
||||
stmia r0!, {r3-r10}
|
||||
b MEM_LongCopyEnd
|
||||
|
||||
MEM_LongCopyJump8:
|
||||
ldmia r1!, {r3-r11} /* 36 bytes remain */
|
||||
stmia r0!, {r3-r11}
|
||||
|
||||
MEM_LongCopyEnd:
|
||||
pop {r4-r12}
|
||||
pop {r5-r12}
|
||||
and r2, r2, #0x03 /* All the longs have been copied */
|
||||
|
||||
/* Deal with up to 3 remaining bytes */
|
||||
@@ -284,19 +294,23 @@ MEM_LongCopyEnd:
|
||||
MEM_DataCopyBytes:
|
||||
/* Deal with up to 3 remaining bytes */
|
||||
|
||||
pop {r4}
|
||||
cmp r2, #0x00
|
||||
it eq
|
||||
popeq {pc}
|
||||
|
||||
ldrb r3, [r1], #0x01
|
||||
strb r3, [r0], #0x01
|
||||
subs r2, r2, #0x01
|
||||
it eq
|
||||
popeq {pc}
|
||||
|
||||
ldrb r3, [r1], #0x01
|
||||
strb r3, [r0], #0x01
|
||||
subs r2, r2, #0x01
|
||||
it eq
|
||||
popeq {pc}
|
||||
|
||||
ldrb r3, [r1], #0x01
|
||||
strb r3, [r0], #0x01
|
||||
pop {pc}
|
||||
@@ -334,11 +348,11 @@ MEM_DataCopy13:
|
||||
|
||||
MEM_DataCopy2:
|
||||
cmp r2, #0x28
|
||||
blt.n MEM_DataCopy2_1
|
||||
blt MEM_DataCopy2_1
|
||||
|
||||
/* Save regs */
|
||||
|
||||
push {r4-r12}
|
||||
push {r5-r12}
|
||||
|
||||
/* Bulk copy loop */
|
||||
|
||||
@@ -371,18 +385,18 @@ MEM_DataCopy2_2:
|
||||
|
||||
sub r2, r2, #0x28
|
||||
cmp r2, #0x28
|
||||
bge.n MEM_DataCopy2_2
|
||||
pop {r4-r12}
|
||||
bge MEM_DataCopy2_2
|
||||
pop {r5-r12}
|
||||
|
||||
MEM_DataCopy2_1: /* Read longs and write 2 x half words */
|
||||
cmp r2, #4
|
||||
blt.n MEM_DataCopyBytes
|
||||
blt MEM_DataCopyBytes
|
||||
ldr r3, [r1], #0x04
|
||||
strh r3, [r0], #0x02
|
||||
lsr r3, r3, #0x10
|
||||
strh r3, [r0], #0x02
|
||||
sub r2, r2, #0x04
|
||||
b.n MEM_DataCopy2
|
||||
b MEM_DataCopy2
|
||||
|
||||
/* Bits: Src=01, Dst=00 - Byte before half word to long
|
||||
* Bits: Src=01, Dst=10 - Byte before half word to half word
|
||||
@@ -436,7 +450,7 @@ MEM_DataCopy3:
|
||||
lsr r3, r3, #0x10
|
||||
strb r3, [r0], #0x01
|
||||
sub r2, r2, #0x04
|
||||
b.n MEM_DataCopy3
|
||||
b MEM_DataCopy3
|
||||
|
||||
.size memcpy, .-memcpy
|
||||
.end
|
||||
|
||||
Reference in New Issue
Block a user