libc:machine:xtensa:add xtensa libc implement

N/A

Signed-off-by: zhuyanlin <zhuyanlin1@xiaomi.com>
This commit is contained in:
zhuyanlin
2021-10-28 11:56:18 +08:00
committed by Xiang Xiao
parent 580d17cc02
commit cfcff5f570
10 changed files with 2472 additions and 2 deletions
+43
View File
@@ -2,3 +2,46 @@
# For a description of the syntax of this configuration file,
# see the file kconfig-language.txt in the NuttX tools repository.
#
config XTENSA_MEMCPY
bool "Enable optimized memcpy() for XTENSA"
select LIBC_ARCH_MEMCPY
---help---
Enable optimized XTENSA specific memcpy() library function
config XTENSA_MEMMOVE
bool "Enable optimized memmove() for XTENSA"
select LIBC_ARCH_MEMMOVE
---help---
Enable optimized XTENSA specific memmove() library function
config XTENSA_MEMSET
bool "Enable optimized memset() for XTENSA"
select LIBC_ARCH_MEMSET
---help---
Enable optimized XTENSA specific memset() library function
config XTENSA_STRCMP
bool "Enable optimized strcmp() for XTENSA"
select LIBC_ARCH_STRCMP
---help---
Enable optimized XTENSA specific strcmp() library function
config XTENSA_STRCPY
bool "Enable optimized strcpy() for XTENSA"
select LIBC_ARCH_STRCPY
---help---
Enable optimized XTENSA specific strcpy() library function
config XTENSA_STRLEN
bool "Enable optimized strlen() for XTENSA"
select LIBC_ARCH_STRLEN
---help---
Enable optimized XTENSA specific strlen() library function
config XTENSA_STRNCPY
bool "Enable optimized strncpy() for XTENSA"
select LIBC_ARCH_STRNCPY
---help---
Enable optimized XTENSA specific strncpy() library function
+29 -2
View File
@@ -19,10 +19,37 @@
############################################################################
ifeq ($(CONFIG_LIBC_ARCH_ELF),y)
CSRCS += arch_elf.c
endif
ifeq ($(CONFIG_XTENSA_MEMCPY),y)
ASRCS += arch_memcpy.S
endif
ifeq ($(CONFIG_XTENSA_MEMMOVE),y)
ASRCS += arch_memmove.S
endif
ifeq ($(CONFIG_XTENSA_MEMSET),y)
ASRCS += arch_memset.S
endif
ifeq ($(CONFIG_XTENSA_STRCPY),y)
ASRCS += arch_strcpy.S
endif
ifeq ($(CONFIG_XTENSA_STRLEN),y)
ASRCS += arch_strlen.S
endif
ifeq ($(CONFIG_XTENSA_STRNCPY),y)
ASRCS += arch_strncpy.S
endif
ifeq ($(CONFIG_XTENSA_STRCMP),y)
ASRCS += arch_strcmp.S
endif
DEPPATH += --dep-path machine/xtensa
VPATH += :machine/xtensa
endif
+281
View File
@@ -0,0 +1,281 @@
/****************************************************************************
* libs/libc/machine/xtensa/arch_memcpy.S
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership. The
* ASF licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the
* License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*
****************************************************************************/
/****************************************************************************
* Included Files
****************************************************************************/
#include "xtensa_asm.h"
#include <arch/chip/core-isa.h>
#include <arch/xtensa/xtensa_abi.h>
/****************************************************************************
* Pre-processor Macros
****************************************************************************/
/* set to 1 when running on ISS (simulator) with the
lint or ferret client, or 0 to save a few cycles */
#define SIM_CHECKS_ALIGNMENT 0
/****************************************************************************
* Public Functions
****************************************************************************/
.section .text
.begin schedule
.literal_position
.local .Ldst1mod2
.local .Ldst2mod4
.local .Lbytecopy
.align 4
.global memcpy
.type memcpy, @function
memcpy:
ENTRY(16)
/* a2 = dst, a3 = src, a4 = len */
mov a5, a2 # copy dst so that a2 is return value
bbsi.l a2, 0, .Ldst1mod2
bbsi.l a2, 1, .Ldst2mod4
.Ldstaligned:
/* Get number of loop iterations with 16B per iteration. */
srli a7, a4, 4
/* Check if source is aligned. */
slli a8, a3, 30
bnez a8, .Lsrcunaligned
/* Destination and source are word-aligned, use word copy. */
#if XCHAL_HAVE_LOOPS
loopnez a7, 2f
#else
beqz a7, 2f
slli a8, a7, 4
add a8, a8, a3 # a8 = end of last 16B source chunk
#endif
1: l32i a6, a3, 0
l32i a7, a3, 4
s32i a6, a5, 0
l32i a6, a3, 8
s32i a7, a5, 4
l32i a7, a3, 12
s32i a6, a5, 8
addi a3, a3, 16
s32i a7, a5, 12
addi a5, a5, 16
#if !XCHAL_HAVE_LOOPS
bltu a3, a8, 1b
#endif
/* Copy any leftover pieces smaller than 16B. */
2: bbci.l a4, 3, 3f
/* Copy 8 bytes. */
l32i a6, a3, 0
l32i a7, a3, 4
addi a3, a3, 8
s32i a6, a5, 0
s32i a7, a5, 4
addi a5, a5, 8
3: bbsi.l a4, 2, 4f
bbsi.l a4, 1, 5f
bbsi.l a4, 0, 6f
RET(16)
# .align 4
/* Copy 4 bytes. */
4: l32i a6, a3, 0
addi a3, a3, 4
s32i a6, a5, 0
addi a5, a5, 4
bbsi.l a4, 1, 5f
bbsi.l a4, 0, 6f
RET(16)
/* Copy 2 bytes. */
5: l16ui a6, a3, 0
addi a3, a3, 2
s16i a6, a5, 0
addi a5, a5, 2
bbsi.l a4, 0, 6f
RET(16)
/* Copy 1 byte. */
6: l8ui a6, a3, 0
s8i a6, a5, 0
.Ldone:
RET(16)
/* Destination is aligned; source is unaligned. */
# .align 4
.Lsrcunaligned:
/* Avoid loading anything for zero-length copies. */
beqz a4, .Ldone
/* Copy 16 bytes per iteration for word-aligned dst and
unaligned src. */
ssa8 a3 # set shift amount from byte offset
#if XCHAL_UNALIGNED_LOAD_EXCEPTION || SIM_CHECKS_ALIGNMENT
srli a11, a8, 30 # save unalignment offset for below
sub a3, a3, a11 # align a3
#endif
l32i a6, a3, 0 # load first word
#if XCHAL_HAVE_LOOPS
loopnez a7, 2f
#else
beqz a7, 2f
slli a10, a7, 4
add a10, a10, a3 # a10 = end of last 16B source chunk
#endif
1: l32i a7, a3, 4
l32i a8, a3, 8
src_b a6, a6, a7
s32i a6, a5, 0
l32i a9, a3, 12
src_b a7, a7, a8
s32i a7, a5, 4
l32i a6, a3, 16
src_b a8, a8, a9
s32i a8, a5, 8
addi a3, a3, 16
src_b a9, a9, a6
s32i a9, a5, 12
addi a5, a5, 16
#if !XCHAL_HAVE_LOOPS
bltu a3, a10, 1b
#endif
2: bbci.l a4, 3, 3f
/* Copy 8 bytes. */
l32i a7, a3, 4
l32i a8, a3, 8
src_b a6, a6, a7
s32i a6, a5, 0
addi a3, a3, 8
src_b a7, a7, a8
s32i a7, a5, 4
addi a5, a5, 8
mov a6, a8
3: bbci.l a4, 2, 4f
/* Copy 4 bytes. */
l32i a7, a3, 4
addi a3, a3, 4
src_b a6, a6, a7
s32i a6, a5, 0
addi a5, a5, 4
mov a6, a7
4:
#if XCHAL_UNALIGNED_LOAD_EXCEPTION || SIM_CHECKS_ALIGNMENT
add a3, a3, a11 # readjust a3 with correct misalignment
#endif
bbsi.l a4, 1, 5f
bbsi.l a4, 0, 6f
RET(16)
/* Copy 2 bytes. */
5: l8ui a6, a3, 0
l8ui a7, a3, 1
addi a3, a3, 2
s8i a6, a5, 0
s8i a7, a5, 1
addi a5, a5, 2
bbsi.l a4, 0, 6f
RET(16)
/* Copy 1 byte. */
6: l8ui a6, a3, 0
s8i a6, a5, 0
RET(16)
# .align XCHAL_INST_FETCH_WIDTH
__memcpy_aux:
/* Skip bytes to get proper alignment for three-byte loop */
# .skip XCHAL_INST_FETCH_WIDTH - 3
.Lbytecopy:
#if XCHAL_HAVE_LOOPS
loopnez a4, 2f
#else
beqz a4, 2f
add a7, a3, a4 # a7 = end address for source
#endif
1: l8ui a6, a3, 0
addi a3, a3, 1
s8i a6, a5, 0
addi a5, a5, 1
#if !XCHAL_HAVE_LOOPS
bltu a3, a7, 1b
#endif
2: RET(16)
/* Destination is unaligned. */
# .align 4
.Ldst1mod2: # dst is only byte aligned
/* Do short copies byte-by-byte. */
bltui a4, 7, .Lbytecopy
/* Copy 1 byte. */
l8ui a6, a3, 0
addi a3, a3, 1
addi a4, a4, -1
s8i a6, a5, 0
addi a5, a5, 1
/* Return to main algorithm if dst is now aligned. */
bbci.l a5, 1, .Ldstaligned
.Ldst2mod4: # dst has 16-bit alignment
/* Do short copies byte-by-byte. */
bltui a4, 6, .Lbytecopy
/* Copy 2 bytes. */
l8ui a6, a3, 0
l8ui a7, a3, 1
addi a3, a3, 2
addi a4, a4, -2
s8i a6, a5, 0
s8i a7, a5, 1
addi a5, a5, 2
/* dst is now aligned; return to main algorithm. */
j .Ldstaligned
.end schedule
.size memcpy, . - memcpy
+480
View File
@@ -0,0 +1,480 @@
/****************************************************************************
* libs/libc/machine/xtensa/arch_memset.S
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership. The
* ASF licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the
* License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*
****************************************************************************/
/****************************************************************************
* Included Files
****************************************************************************/
#include "xtensa_asm.h"
#include <arch/chip/core-isa.h>
#include <arch/xtensa/xtensa_abi.h>
/****************************************************************************
* Pre-processor Macros
****************************************************************************/
/* set to 1 when running on ISS (simulator) with the
lint or ferret client, or 0 to save a few cycles */
#define SIM_CHECKS_ALIGNMENT 0
/****************************************************************************
* Public Functions
****************************************************************************/
.text
.begin schedule
.global memmove
/*
* Byte by byte copy
*/
.align 4
.byte 0 # 1 mod 4 alignment for LOOPNEZ
# (0 mod 4 alignment for LBEG)
.Lbytecopy:
#if XCHAL_HAVE_LOOPS
loopnez a4, .Lbytecopydone
#else /* !XCHAL_HAVE_LOOPS */
beqz a4, .Lbytecopydone
add a7, a3, a4 # a7 = end address for source
#endif /* !XCHAL_HAVE_LOOPS */
.Lnextbyte:
l8ui a6, a3, 0
addi a3, a3, 1
s8i a6, a5, 0
addi a5, a5, 1
#if !XCHAL_HAVE_LOOPS
bne a3, a7, .Lnextbyte # continue loop if $a3:src != $a7:src_end
#endif /* !XCHAL_HAVE_LOOPS */
.Lbytecopydone:
RET(16)
/*
* Destination is unaligned
*/
.align 4
.Ldst1mod2: # dst is only byte aligned
_bltui a4, 7, .Lbytecopy # do short copies byte by byte
# copy 1 byte
l8ui a6, a3, 0
addi a3, a3, 1
addi a4, a4, -1
s8i a6, a5, 0
addi a5, a5, 1
_bbci.l a5, 1, .Ldstaligned # if dst is now aligned, then
# return to main algorithm
.Ldst2mod4: # dst 16-bit aligned
# copy 2 bytes
_bltui a4, 6, .Lbytecopy # do short copies byte by byte
l8ui a6, a3, 0
l8ui a7, a3, 1
addi a3, a3, 2
addi a4, a4, -2
s8i a6, a5, 0
s8i a7, a5, 1
addi a5, a5, 2
j .Ldstaligned # dst is now aligned, return to main algorithm
.Lcommon:
bbsi.l a2, 0, .Ldst1mod2 # if dst is 1 mod 2
bbsi.l a2, 1, .Ldst2mod4 # if dst is 2 mod 4
.Ldstaligned: # return here from .Ldst?mod? once dst is aligned
srli a7, a4, 4 # number of loop iterations with 16B
# per iteration
movi a8, 3 # if source is not aligned,
bany a3, a8, .Lsrcunaligned # then use shifting copy
/*
* Destination and source are word-aligned, use word copy.
*/
# copy 16 bytes per iteration for word-aligned dst and word-aligned src
#if XCHAL_HAVE_LOOPS
loopnez a7, .Loop1done
#else /* !XCHAL_HAVE_LOOPS */
beqz a7, .Loop1done
slli a8, a7, 4
add a8, a8, a3 # a8 = end of last 16B source chunk
#endif /* !XCHAL_HAVE_LOOPS */
.Loop1:
l32i a6, a3, 0
l32i a7, a3, 4
s32i a6, a5, 0
l32i a6, a3, 8
s32i a7, a5, 4
l32i a7, a3, 12
s32i a6, a5, 8
addi a3, a3, 16
s32i a7, a5, 12
addi a5, a5, 16
#if !XCHAL_HAVE_LOOPS
bne a3, a8, .Loop1 # continue loop if a3:src != a8:src_end
#endif /* !XCHAL_HAVE_LOOPS */
.Loop1done:
bbci.l a4, 3, .L2
# copy 8 bytes
l32i a6, a3, 0
l32i a7, a3, 4
addi a3, a3, 8
s32i a6, a5, 0
s32i a7, a5, 4
addi a5, a5, 8
.L2:
bbsi.l a4, 2, .L3
bbsi.l a4, 1, .L4
bbsi.l a4, 0, .L5
RET(16)
.L3:
# copy 4 bytes
l32i a6, a3, 0
addi a3, a3, 4
s32i a6, a5, 0
addi a5, a5, 4
bbsi.l a4, 1, .L4
bbsi.l a4, 0, .L5
RET(16)
.L4:
# copy 2 bytes
l16ui a6, a3, 0
addi a3, a3, 2
s16i a6, a5, 0
addi a5, a5, 2
bbsi.l a4, 0, .L5
RET(16)
.L5:
# copy 1 byte
l8ui a6, a3, 0
s8i a6, a5, 0
RET(16)
/*
* Destination is aligned, Source is unaligned
*/
.align 4
.Lsrcunaligned:
_beqz a4, .Ldone # avoid loading anything for zero-length copies
# copy 16 bytes per iteration for word-aligned dst and unaligned src
ssa8 a3 # set shift amount from byte offset
#if XCHAL_UNALIGNED_LOAD_EXCEPTION || SIM_CHECKS_ALIGNMENT
and a11, a3, a8 # save unalignment offset for below
sub a3, a3, a11 # align a3
#endif
l32i a6, a3, 0 # load first word
#if XCHAL_HAVE_LOOPS
loopnez a7, .Loop2done
#else /* !XCHAL_HAVE_LOOPS */
beqz a7, .Loop2done
slli a10, a7, 4
add a10, a10, a3 # a10 = end of last 16B source chunk
#endif /* !XCHAL_HAVE_LOOPS */
.Loop2:
l32i a7, a3, 4
l32i a8, a3, 8
src_b a6, a6, a7
s32i a6, a5, 0
l32i a9, a3, 12
src_b a7, a7, a8
s32i a7, a5, 4
l32i a6, a3, 16
src_b a8, a8, a9
s32i a8, a5, 8
addi a3, a3, 16
src_b a9, a9, a6
s32i a9, a5, 12
addi a5, a5, 16
#if !XCHAL_HAVE_LOOPS
bne a3, a10, .Loop2 # continue loop if a3:src != a10:src_end
#endif /* !XCHAL_HAVE_LOOPS */
.Loop2done:
bbci.l a4, 3, .L12
# copy 8 bytes
l32i a7, a3, 4
l32i a8, a3, 8
src_b a6, a6, a7
s32i a6, a5, 0
addi a3, a3, 8
src_b a7, a7, a8
s32i a7, a5, 4
addi a5, a5, 8
mov a6, a8
.L12:
bbci.l a4, 2, .L13
# copy 4 bytes
l32i a7, a3, 4
addi a3, a3, 4
src_b a6, a6, a7
s32i a6, a5, 0
addi a5, a5, 4
mov a6, a7
.L13:
#if XCHAL_UNALIGNED_LOAD_EXCEPTION || SIM_CHECKS_ALIGNMENT
add a3, a3, a11 # readjust a3 with correct misalignment
#endif
bbsi.l a4, 1, .L14
bbsi.l a4, 0, .L15
.Ldone: RET(16)
.L14:
# copy 2 bytes
l8ui a6, a3, 0
l8ui a7, a3, 1
addi a3, a3, 2
s8i a6, a5, 0
s8i a7, a5, 1
addi a5, a5, 2
bbsi.l a4, 0, .L15
RET(16)
.L15:
# copy 1 byte
l8ui a6, a3, 0
s8i a6, a5, 0
RET(16)
/*
* Byte by byte copy
*/
.align 4
.byte 0 # 1 mod 4 alignment for LOOPNEZ
# (0 mod 4 alignment for LBEG)
.Lbackbytecopy:
#if XCHAL_HAVE_LOOPS
loopnez a4, .Lbackbytecopydone
#else /* !XCHAL_HAVE_LOOPS */
beqz a4, .Lbackbytecopydone
sub a7, a3, a4 # a7 = start address for source
#endif /* !XCHAL_HAVE_LOOPS */
.Lbacknextbyte:
addi a3, a3, -1
l8ui a6, a3, 0
addi a5, a5, -1
s8i a6, a5, 0
#if !XCHAL_HAVE_LOOPS
bne a3, a7, .Lbacknextbyte # continue loop if
# $a3:src != $a7:src_start
#endif /* !XCHAL_HAVE_LOOPS */
.Lbackbytecopydone:
RET(16)
/*
* Destination is unaligned
*/
.align 4
.Lbackdst1mod2: # dst is only byte aligned
_bltui a4, 7, .Lbackbytecopy # do short copies byte by byte
# copy 1 byte
addi a3, a3, -1
l8ui a6, a3, 0
addi a5, a5, -1
s8i a6, a5, 0
addi a4, a4, -1
_bbci.l a5, 1, .Lbackdstaligned # if dst is now aligned, then
# return to main algorithm
.Lbackdst2mod4: # dst 16-bit aligned
# copy 2 bytes
_bltui a4, 6, .Lbackbytecopy # do short copies byte by byte
addi a3, a3, -2
l8ui a6, a3, 0
l8ui a7, a3, 1
addi a5, a5, -2
s8i a6, a5, 0
s8i a7, a5, 1
addi a4, a4, -2
j .Lbackdstaligned # dst is now aligned,
# return to main algorithm
.align 4
memmove:
ENTRY(16)
# a2/ dst, a3/ src, a4/ len
mov a5, a2 # copy dst so that a2 is return value
.Lmovecommon:
sub a6, a5, a3
bgeu a6, a4, .Lcommon
add a5, a5, a4
add a3, a3, a4
bbsi.l a5, 0, .Lbackdst1mod2 # if dst is 1 mod 2
bbsi.l a5, 1, .Lbackdst2mod4 # if dst is 2 mod 4
.Lbackdstaligned: # return here from .Lbackdst?mod? once dst is aligned
srli a7, a4, 4 # number of loop iterations with 16B
# per iteration
movi a8, 3 # if source is not aligned,
bany a3, a8, .Lbacksrcunaligned # then use shifting copy
/*
* Destination and source are word-aligned, use word copy.
*/
# copy 16 bytes per iteration for word-aligned dst and word-aligned src
#if XCHAL_HAVE_LOOPS
loopnez a7, .backLoop1done
#else /* !XCHAL_HAVE_LOOPS */
beqz a7, .backLoop1done
slli a8, a7, 4
sub a8, a3, a8 # a8 = start of first 16B source chunk
#endif /* !XCHAL_HAVE_LOOPS */
.backLoop1:
addi a3, a3, -16
l32i a7, a3, 12
l32i a6, a3, 8
addi a5, a5, -16
s32i a7, a5, 12
l32i a7, a3, 4
s32i a6, a5, 8
l32i a6, a3, 0
s32i a7, a5, 4
s32i a6, a5, 0
#if !XCHAL_HAVE_LOOPS
bne a3, a8, .backLoop1 # continue loop if a3:src != a8:src_start
#endif /* !XCHAL_HAVE_LOOPS */
.backLoop1done:
bbci.l a4, 3, .Lback2
# copy 8 bytes
addi a3, a3, -8
l32i a6, a3, 0
l32i a7, a3, 4
addi a5, a5, -8
s32i a6, a5, 0
s32i a7, a5, 4
.Lback2:
bbsi.l a4, 2, .Lback3
bbsi.l a4, 1, .Lback4
bbsi.l a4, 0, .Lback5
RET(16)
.Lback3:
# copy 4 bytes
addi a3, a3, -4
l32i a6, a3, 0
addi a5, a5, -4
s32i a6, a5, 0
bbsi.l a4, 1, .Lback4
bbsi.l a4, 0, .Lback5
RET(16)
.Lback4:
# copy 2 bytes
addi a3, a3, -2
l16ui a6, a3, 0
addi a5, a5, -2
s16i a6, a5, 0
bbsi.l a4, 0, .Lback5
RET(16)
.Lback5:
# copy 1 byte
addi a3, a3, -1
l8ui a6, a3, 0
addi a5, a5, -1
s8i a6, a5, 0
RET(16)
/*
* Destination is aligned, Source is unaligned
*/
.align 4
.Lbacksrcunaligned:
_beqz a4, .Lbackdone # avoid loading anything for zero-length copies
# copy 16 bytes per iteration for word-aligned dst and unaligned src
ssa8 a3 # set shift amount from byte offset
#define SIM_CHECKS_ALIGNMENT 1 /* set to 1 when running on ISS with
* the lint or ferret client, or 0
* to save a few cycles */
#if XCHAL_UNALIGNED_LOAD_EXCEPTION || SIM_CHECKS_ALIGNMENT
and a11, a3, a8 # save unalignment offset for below
sub a3, a3, a11 # align a3
#endif
l32i a6, a3, 0 # load first word
#if XCHAL_HAVE_LOOPS
loopnez a7, .backLoop2done
#else /* !XCHAL_HAVE_LOOPS */
beqz a7, .backLoop2done
slli a10, a7, 4
sub a10, a3, a10 # a10 = start of first 16B source chunk
#endif /* !XCHAL_HAVE_LOOPS */
.backLoop2:
addi a3, a3, -16
l32i a7, a3, 12
l32i a8, a3, 8
addi a5, a5, -16
src_b a6, a7, a6
s32i a6, a5, 12
l32i a9, a3, 4
src_b a7, a8, a7
s32i a7, a5, 8
l32i a6, a3, 0
src_b a8, a9, a8
s32i a8, a5, 4
src_b a9, a6, a9
s32i a9, a5, 0
#if !XCHAL_HAVE_LOOPS
bne a3, a10, .backLoop2 # continue loop if a3:src != a10:src_start
#endif /* !XCHAL_HAVE_LOOPS */
.backLoop2done:
bbci.l a4, 3, .Lback12
# copy 8 bytes
addi a3, a3, -8
l32i a7, a3, 4
l32i a8, a3, 0
addi a5, a5, -8
src_b a6, a7, a6
s32i a6, a5, 4
src_b a7, a8, a7
s32i a7, a5, 0
mov a6, a8
.Lback12:
bbci.l a4, 2, .Lback13
# copy 4 bytes
addi a3, a3, -4
l32i a7, a3, 0
addi a5, a5, -4
src_b a6, a7, a6
s32i a6, a5, 0
mov a6, a7
.Lback13:
#if XCHAL_UNALIGNED_LOAD_EXCEPTION || SIM_CHECKS_ALIGNMENT
add a3, a3, a11 # readjust a3 with correct misalignment
#endif
bbsi.l a4, 1, .Lback14
bbsi.l a4, 0, .Lback15
.Lbackdone:
RET(16)
.Lback14:
# copy 2 bytes
addi a3, a3, -2
l8ui a6, a3, 0
l8ui a7, a3, 1
addi a5, a5, -2
s8i a6, a5, 0
s8i a7, a5, 1
bbsi.l a4, 0, .Lback15
RET(16)
.Lback15:
# copy 1 byte
addi a3, a3, -1
addi a5, a5, -1
l8ui a6, a3, 0
s8i a6, a5, 0
RET(16)
.end schedule
.size memmove, . - memmove
+179
View File
@@ -0,0 +1,179 @@
/****************************************************************************
* libs/libc/machine/xtensa/arch_memset.S
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership. The
* ASF licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the
* License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*
****************************************************************************/
/****************************************************************************
* Included Files
****************************************************************************/
#include "xtensa_asm.h"
#include <arch/chip/core-isa.h>
#include <arch/xtensa/xtensa_abi.h>
/****************************************************************************
* Public Functions
****************************************************************************/
/* void *memset (void *dst, int c, size_t length)
The algorithm is as follows:
Create a word with c in all byte positions.
If the destination is aligned, set 16B chunks with a loop, and then
finish up with 8B, 4B, 2B, and 1B stores conditional on the length.
If the destination is unaligned, align it by conditionally
setting 1B and/or 2B and then go to aligned case.
This code tries to use fall-through branches for the common
case of an aligned destination (except for the branches to
the alignment labels). */
/* Byte-by-byte set. */
.section .text
.begin schedule
.literal_position
.local .Lbyteset
.local .Ldst1mod2
.local .Ldst2mod4
.align 4
.global memset
.type memset, @function
memset:
ENTRY(16)
/* a2 = dst, a3 = c, a4 = length */
/* Duplicate character into all bytes of word. */
extui a3, a3, 0, 8
slli a7, a3, 8
or a3, a3, a7
slli a7, a3, 16
or a3, a3, a7
mov a5, a2 // copy dst so that a2 is return value
/* Check if dst is unaligned. */
bbsi.l a2, 0, .Ldst1mod2
bbsi.l a2, 1, .Ldst2mod4
j .Ldstaligned
.Ldst1mod2: // dst is only byte aligned
/* Do short sizes byte-by-byte. */
bltui a4, 8, .Lbyteset
/* Set 1 byte. */
s8i a3, a5, 0
addi a5, a5, 1
addi a4, a4, -1
/* Now retest if dst is aligned. */
bbci.l a5, 1, .Ldstaligned
.Ldst2mod4: // dst has 16-bit alignment
/* Do short sizes byte-by-byte. */
bltui a4, 8, .Lbyteset
/* Set 2 bytes. */
s16i a3, a5, 0
addi a5, a5, 2
addi a4, a4, -2
/* dst is now aligned; fall through to main algorithm */
.Ldstaligned:
/* Get number of loop iterations with 16B per iteration. */
srli a7, a4, 4
/* Destination is word-aligned. */
#if XCHAL_HAVE_LOOPS
loopnez a7, 2f
#else
beqz a7, 2f
slli a6, a7, 4
add a6, a6, a5 // a6 = end of last 16B chunk
#endif
/* Set 16 bytes per iteration. */
1: s32i a3, a5, 0
s32i a3, a5, 4
s32i a3, a5, 8
s32i a3, a5, 12
addi a5, a5, 16
#if !XCHAL_HAVE_LOOPS
bltu a5, a6, 1b
#endif
/* Set any leftover pieces smaller than 16B. */
2: bbci.l a4, 3, 3f
/* Set 8 bytes. */
s32i a3, a5, 0
s32i a3, a5, 4
addi a5, a5, 8
3: bbci.l a4, 2, 4f
/* Set 4 bytes. */
s32i a3, a5, 0
addi a5, a5, 4
4: bbci.l a4, 1, 5f
/* Set 2 bytes. */
s16i a3, a5, 0
addi a5, a5, 2
5: bbci.l a4, 0, 6f
/* Set 1 byte. */
s8i a3, a5, 0
6: RET(16)
// .align XCHAL_INST_FETCH_WIDTH
__memset_aux:
/* Skip bytes to get proper alignment for three-byte loop */
// .skip XCHAL_INST_FETCH_WIDTH - 3
.Lbyteset:
#if XCHAL_HAVE_LOOPS
loopnez a4, 2f
#else
beqz a4, 2f
add a6, a5, a4 // a6 = ending address
#endif
1: s8i a3, a5, 0
addi a5, a5, 1
#if !XCHAL_HAVE_LOOPS
bltu a5, a6, 1b
#endif
2: RET(16)
.end schedule
.size memset, . - memset
File diff suppressed because it is too large Load Diff
+243
View File
@@ -0,0 +1,243 @@
/****************************************************************************
* libs/libc/machine/xtensa/arch_strcpy.S
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership. The
* ASF licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the
* License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*
****************************************************************************/
/****************************************************************************
* Included Files
****************************************************************************/
#include "xtensa_asm.h"
#include <arch/chip/core-isa.h>
#include <arch/xtensa/xtensa_abi.h>
/****************************************************************************
* Public Functions
****************************************************************************/
.section .text
.begin schedule
.align 4
.literal_position
.global strcpy
.type strcpy, @function
strcpy:
ENTRY(16)
/* a2 = dst, a3 = src */
mov a10, a2 # leave dst in return value register
movi a4, MASK0
movi a5, MASK1
movi a6, MASK2
movi a7, MASK3
bbsi.l a3, 0, .Lsrc1mod2
bbsi.l a3, 1, .Lsrc2mod4
.Lsrcaligned:
/* Check if the destination is aligned. */
movi a8, 3
bnone a10, a8, .Laligned
j .Ldstunaligned
.Lsrc1mod2: # src address is odd
l8ui a8, a3, 0 # get byte 0
addi a3, a3, 1 # advance src pointer
s8i a8, a10, 0 # store byte 0
beqz a8, 1f # if byte 0 is zero
addi a10, a10, 1 # advance dst pointer
bbci.l a3, 1, .Lsrcaligned # if src is now word-aligned
.Lsrc2mod4: # src address is 2 mod 4
l8ui a8, a3, 0 # get byte 0
/* 1-cycle interlock */
s8i a8, a10, 0 # store byte 0
beqz a8, 1f # if byte 0 is zero
l8ui a8, a3, 1 # get byte 0
addi a3, a3, 2 # advance src pointer
s8i a8, a10, 1 # store byte 0
addi a10, a10, 2 # advance dst pointer
bnez a8, .Lsrcaligned
1: RET(16)
/* dst is word-aligned; src is word-aligned. */
.align 4
#if XCHAL_HAVE_LOOPS
#if XCHAL_HAVE_DENSITY
/* (2 mod 4) alignment for loop instruction */
#else
/* (1 mod 4) alignment for loop instruction */
.byte 0
.byte 0
#endif
.Laligned:
#if XCHAL_HAVE_DENSITY
_movi.n a8, 0 # set up for the maximum loop count
#else
_movi a8, 0 # set up for the maximum loop count
#endif
loop a8, .Lz3 # loop forever (almost anyway)
l32i a8, a3, 0 # get word from src
addi a3, a3, 4 # advance src pointer
bnone a8, a4, .Lz0 # if byte 0 is zero
bnone a8, a5, .Lz1 # if byte 1 is zero
bnone a8, a6, .Lz2 # if byte 2 is zero
s32i a8, a10, 0 # store word to dst
bnone a8, a7, .Lz3 # if byte 3 is zero
addi a10, a10, 4 # advance dst pointer
#else /* !XCHAL_HAVE_LOOPS */
1: addi a10, a10, 4 # advance dst pointer
.Laligned:
l32i a8, a3, 0 # get word from src
addi a3, a3, 4 # advance src pointer
bnone a8, a4, .Lz0 # if byte 0 is zero
bnone a8, a5, .Lz1 # if byte 1 is zero
bnone a8, a6, .Lz2 # if byte 2 is zero
s32i a8, a10, 0 # store word to dst
bany a8, a7, 1b # if byte 3 is zero
#endif /* !XCHAL_HAVE_LOOPS */
.Lz3: /* Byte 3 is zero. */
RET(16)
.Lz0: /* Byte 0 is zero. */
#if XCHAL_HAVE_BE
movi a8, 0
#endif
s8i a8, a10, 0
RET(16)
.Lz1: /* Byte 1 is zero. */
#if XCHAL_HAVE_BE
extui a8, a8, 16, 16
#endif
s16i a8, a10, 0
RET(16)
.Lz2: /* Byte 2 is zero. */
#if XCHAL_HAVE_BE
extui a8, a8, 16, 16
#endif
s16i a8, a10, 0
movi a8, 0
s8i a8, a10, 2
RET(16)
#if 1
/* For now just use byte copy loop for the unaligned destination case. */
.align 4
#if XCHAL_HAVE_LOOPS
#if XCHAL_HAVE_DENSITY
/* (2 mod 4) alignment for loop instruction */
#else
/* (1 mod 4) alignment for loop instruction */
.byte 0
.byte 0
#endif
#endif
.Ldstunaligned:
#if XCHAL_HAVE_LOOPS
#if XCHAL_HAVE_DENSITY
_movi.n a8, 0 # set up for the maximum loop count
#else
_movi a8, 0 # set up for the maximum loop count
#endif
loop a8, 2f # loop forever (almost anyway)
#endif
1: l8ui a8, a3, 0
addi a3, a3, 1
s8i a8, a10, 0
addi a10, a10, 1
#if XCHAL_HAVE_LOOPS
beqz a8, 2f
#else
bnez a8, 1b
#endif
2: RET(16)
#else /* 0 */
/* This code is not functional yet. */
.Ldstunaligned:
l32i a9, a2, 0 # load word from dst
#if XCHAL_HAVE_BE
ssa8b a9 # rotate by dst alignment so that
src a9, a9, a9 # shift in loop will put back in place
ssa8l a9 # shift left by byte*8
#else
ssa8l a9 # rotate by dst alignment so that
src a9, a9, a9 # shift in loop will put back in place
ssa8b a9 # shift left by 32-byte*8
#endif
/* dst is word-aligned; src is unaligned. */
.Ldstunalignedloop:
l32i a8, a3, 0 # get word from src
/* 1-cycle interlock */
bnone a8, a4, .Lu0 # if byte 0 is zero
bnone a8, a5, .Lu1 # if byte 1 is zero
bnone a8, a6, .Lu2 # if byte 2 is zero
src a9, a8, a9 # combine last word and this word
s32i a9, a10, 0 # store word to dst
bnone a8, a7, .Lu3 # if byte 3 is nonzero, iterate
l32i a9, a3, 4 # get word from src
addi a3, a3, 8 # advance src pointer
bnone a9, a4, .Lu4 # if byte 0 is zero
bnone a9, a5, .Lu5 # if byte 1 is zero
bnone a9, a6, .Lu6 # if byte 2 is zero
src a8, a9, a8 # combine last word and this word
s32i a8, a10, 4 # store word to dst
addi a10, a10, 8 # advance dst pointer
bany a8, a7, .Ldstunalignedloop # if byte 3 is nonzero, iterate
/* Byte 7 is zero. */
.Lu7: RET(16)
.Lu0: /* Byte 0 is zero. */
#if XCHAL_HAVE_BE
movi a8, 0
#endif
s8i a8, a10, 0
RET(16)
.Lu1: /* Byte 1 is zero. */
#if XCHAL_HAVE_BE
extui a8, a8, 16, 16
#endif
s16i a8, a10, 0
RET(16)
.Lu2: /* Byte 2 is zero. */
s16i a8, a10, 0
movi a8, 0
s8i a8, a10, 2
RET(16)
#endif /* 0 */
.end schedule
.size strcpy, . - strcpy
+123
View File
@@ -0,0 +1,123 @@
/****************************************************************************
* libs/libc/machine/xtensa/arch_strlen.S
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership. The
* ASF licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the
* License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*
****************************************************************************/
/****************************************************************************
* Included Files
****************************************************************************/
#include "xtensa_asm.h"
#include <arch/chip/core-isa.h>
#include <arch/xtensa/xtensa_abi.h>
/****************************************************************************
* Public Functions
****************************************************************************/
.section .text
.begin schedule
.align 4
.literal_position
.global strlen
.type strlen, @function
strlen:
ENTRY(16)
/* a2 = s */
addi a3, a2, -4 # because we overincrement at the end
movi a4, MASK0
movi a5, MASK1
movi a6, MASK2
movi a7, MASK3
bbsi.l a2, 0, .L1mod2
bbsi.l a2, 1, .L2mod4
j .Laligned
.L1mod2: # address is odd
l8ui a8, a3, 4 # get byte 0
addi a3, a3, 1 # advance string pointer
beqz a8, .Lz3 # if byte 0 is zero
bbci.l a3, 1, .Laligned # if string pointer is now word-aligned
.L2mod4: # address is 2 mod 4
addi a3, a3, 2 # advance ptr for aligned access
l32i a8, a3, 0 # get word with first two bytes of string
bnone a8, a6, .Lz2 # if byte 2 (of word, not string) is zero
bany a8, a7, .Laligned # if byte 3 (of word, not string) is nonzero
/* Byte 3 is zero. */
addi a3, a3, 3 # point to zero byte
sub a2, a3, a2 # subtract to get length
RET(16)
/* String is word-aligned. */
.align 4
#if XCHAL_HAVE_LOOPS
#if XCHAL_HAVE_DENSITY
/* (2 mod 4) alignment for loop instruction */
#else
/* (1 mod 4) alignment for loop instruction */
.byte 0
.byte 0
#endif
#endif
.Laligned:
#if XCHAL_HAVE_LOOPS
#if XCHAL_HAVE_DENSITY
_movi.n a8, 0 # set up for the maximum loop count
#else
_movi a8, 0 # set up for the maximum loop count
#endif
loop a8, .Lz3 # loop forever (almost anyway)
#endif
1: l32i a8, a3, 4 # get next word of string
addi a3, a3, 4 # advance string pointer
bnone a8, a4, .Lz0 # if byte 0 is zero
bnone a8, a5, .Lz1 # if byte 1 is zero
bnone a8, a6, .Lz2 # if byte 2 is zero
#if XCHAL_HAVE_LOOPS
bnone a8, a7, .Lz3 # if byte 3 is zero
#else
bany a8, a7, 1b # repeat if byte 3 is non-zero
#endif
.Lz3: /* Byte 3 is zero. */
addi a3, a3, 3 # point to zero byte
/* Fall through.... */
.Lz0: /* Byte 0 is zero. */
sub a2, a3, a2 # subtract to get length
RET(16)
.Lz1: /* Byte 1 is zero. */
addi a3, a3, 1 # point to zero byte
sub a2, a3, a2 # subtract to get length
RET(16)
.Lz2: /* Byte 2 is zero. */
addi a3, a3, 2 # point to zero byte
sub a2, a3, a2 # subtract to get length
RET(16)
.end schedule
.size strlen, . - strlen
+265
View File
@@ -0,0 +1,265 @@
/****************************************************************************
* libs/libc/machine/xtensa/arch_strncpy.S
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership. The
* ASF licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the
* License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*
****************************************************************************/
/****************************************************************************
* Included Files
****************************************************************************/
#include "xtensa_asm.h"
#include <arch/chip/core-isa.h>
#include <arch/xtensa/xtensa_abi.h>
/****************************************************************************
* Public Functions
****************************************************************************/
.section .text
.begin schedule
.align 4
.literal_position
__strncpy_aux:
.Lsrc1mod2: # src address is odd
l8ui a8, a3, 0 # get byte 0
addi a3, a3, 1 # advance src pointer
s8i a8, a10, 0 # store byte 0
addi a4, a4, -1 # decrement n
beqz a4, .Lret # if n is zero
addi a10, a10, 1 # advance dst pointer
beqz a8, .Lfill # if byte 0 is zero
bbci.l a3, 1, .Lsrcaligned # if src is now word-aligned
.Lsrc2mod4: # src address is 2 mod 4
l8ui a8, a3, 0 # get byte 0
addi a4, a4, -1 # decrement n
s8i a8, a10, 0 # store byte 0
beqz a4, .Lret # if n is zero
addi a10, a10, 1 # advance dst pointer
beqz a8, .Lfill # if byte 0 is zero
l8ui a8, a3, 1 # get byte 0
addi a3, a3, 2 # advance src pointer
s8i a8, a10, 0 # store byte 0
addi a4, a4, -1 # decrement n
beqz a4, .Lret # if n is zero
addi a10, a10, 1 # advance dst pointer
bnez a8, .Lsrcaligned
j .Lfill
.Lret:
RET(16)
.align 4
.global strncpy
.type strncpy, @function
strncpy:
ENTRY(16)
/* a2 = dst, a3 = src */
mov a10, a2 # leave dst in return value register
beqz a4, .Lret # if n is zero
movi a11, MASK0
movi a5, MASK1
movi a6, MASK2
movi a7, MASK3
bbsi.l a3, 0, .Lsrc1mod2
bbsi.l a3, 1, .Lsrc2mod4
.Lsrcaligned:
/* Check if the destination is aligned. */
movi a8, 3
bnone a10, a8, .Laligned
j .Ldstunaligned
/* Fill the dst with zeros -- n is at least 1. */
.Lfill:
movi a9, 0
bbsi.l a10, 0, .Lfill1mod2
bbsi.l a10, 1, .Lfill2mod4
.Lfillaligned:
blti a4, 4, .Lfillcleanup
/* Loop filling complete words with zero. */
#if XCHAL_HAVE_LOOPS
srai a8, a4, 2
loop a8, 1f
s32i a9, a10, 0
addi a10, a10, 4
1: slli a8, a8, 2
sub a4, a4, a8
#else /* !XCHAL_HAVE_LOOPS */
1: s32i a9, a10, 0
addi a10, a10, 4
addi a4, a4, -4
bgei a4, 4, 1b
#endif /* !XCHAL_HAVE_LOOPS */
beqz a4, 2f
.Lfillcleanup:
/* Fill leftover (1 to 3) bytes with zero. */
s8i a9, a10, 0 # store byte 0
addi a4, a4, -1 # decrement n
addi a10, a10, 1
bnez a4, .Lfillcleanup
2: RET(16)
.Lfill1mod2: # dst address is odd
s8i a9, a10, 0 # store byte 0
addi a4, a4, -1 # decrement n
beqz a4, 2b # if n is zero
addi a10, a10, 1 # advance dst pointer
bbci.l a10, 1, .Lfillaligned # if dst is now word-aligned
.Lfill2mod4: # dst address is 2 mod 4
s8i a9, a10, 0 # store byte 0
addi a4, a4, -1 # decrement n
beqz a4, 2b # if n is zero
s8i a9, a10, 1 # store byte 1
addi a4, a4, -1 # decrement n
beqz a4, 2b # if n is zero
addi a10, a10, 2 # advance dst pointer
j .Lfillaligned
/* dst is word-aligned; src is word-aligned; n is at least 1. */
.align 4
#if XCHAL_HAVE_LOOPS
#if XCHAL_HAVE_DENSITY
/* (2 mod 4) alignment for loop instruction */
#else
/* (1 mod 4) alignment for loop instruction */
.byte 0
.byte 0
#endif
#endif
.Laligned:
#if XCHAL_HAVE_LOOPS
#if XCHAL_HAVE_DENSITY
_movi.n a8, 0 # set up for the maximum loop count
#else
_movi a8, 0 # set up for the maximum loop count
#endif
loop a8, 1f # loop forever (almost anyway)
blti a4, 5, .Ldstunaligned # n is near limit; do one at a time
l32i a8, a3, 0 # get word from src
addi a3, a3, 4 # advance src pointer
bnone a8, a11, .Lz0 # if byte 0 is zero
bnone a8, a5, .Lz1 # if byte 1 is zero
bnone a8, a6, .Lz2 # if byte 2 is zero
s32i a8, a10, 0 # store word to dst
addi a4, a4, -4 # decrement n
addi a10, a10, 4 # advance dst pointer
bnone a8, a7, .Lfill # if byte 3 is zero
1:
#else /* !XCHAL_HAVE_LOOPS */
1: blti a4, 5, .Ldstunaligned # n is near limit; do one at a time
l32i a8, a3, 0 # get word from src
addi a3, a3, 4 # advance src pointer
bnone a8, a11, .Lz0 # if byte 0 is zero
bnone a8, a5, .Lz1 # if byte 1 is zero
bnone a8, a6, .Lz2 # if byte 2 is zero
s32i a8, a10, 0 # store word to dst
addi a4, a4, -4 # decrement n
addi a10, a10, 4 # advance dst pointer
bany a8, a7, 1b # no zeroes
#endif /* !XCHAL_HAVE_LOOPS */
j .Lfill
.Lz0: /* Byte 0 is zero. */
#if XCHAL_HAVE_BE
movi a8, 0
#endif
s8i a8, a10, 0
addi a4, a4, -1 # decrement n
addi a10, a10, 1 # advance dst pointer
j .Lfill
.Lz1: /* Byte 1 is zero. */
#if XCHAL_HAVE_BE
extui a8, a8, 16, 16
#endif
s16i a8, a10, 0
addi a4, a4, -2 # decrement n
addi a10, a10, 2 # advance dst pointer
j .Lfill
.Lz2: /* Byte 2 is zero. */
#if XCHAL_HAVE_BE
extui a8, a8, 16, 16
#endif
s16i a8, a10, 0
movi a8, 0
s8i a8, a10, 2
addi a4, a4, -3 # decrement n
addi a10, a10, 3 # advance dst pointer
j .Lfill
.align 4
#if XCHAL_HAVE_LOOPS
#if XCHAL_HAVE_DENSITY
/* (2 mod 4) alignment for loop instruction */
#else
/* (1 mod 4) alignment for loop instruction */
.byte 0
.byte 0
#endif
#endif
.Ldstunaligned:
#if XCHAL_HAVE_LOOPS
#if XCHAL_HAVE_DENSITY
_movi.n a8, 0 # set up for the maximum loop count
#else
_movi a8, 0 # set up for the maximum loop count
#endif
loop a8, 2f # loop forever (almost anyway)
#endif
1: l8ui a8, a3, 0
addi a3, a3, 1
s8i a8, a10, 0
addi a4, a4, -1
beqz a4, 3f
addi a10, a10, 1
#if XCHAL_HAVE_LOOPS
beqz a8, 2f
#else
bnez a8, 1b
#endif
2: j .Lfill
3: RET(16)
.end schedule
.size strncpy, . - strncpy
+62
View File
@@ -0,0 +1,62 @@
/****************************************************************************
* libs/libc/machine/xtensa/xtensa_asm.h
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership. The
* ASF licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the
* License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*
****************************************************************************/
/****************************************************************************
* Included Files
****************************************************************************/
#include <arch/chip/core-isa.h>
/****************************************************************************
* Assembly Language Macros
****************************************************************************/
.macro src_b r, w0, w1
#if XCHAL_HAVE_BE
src \r, \w0, \w1
#else
src \r, \w1, \w0
#endif
.endm
.macro ssa8 r
#if XCHAL_HAVE_BE
ssa8b \r
#else
ssa8l \r
#endif
.endm
/****************************************************************************
* Pre-processor Macros
****************************************************************************/
#if XCHAL_HAVE_BE
# define MASK0 0xff000000
# define MASK1 0x00ff0000
# define MASK2 0x0000ff00
# define MASK3 0x000000ff
#else
# define MASK0 0x000000ff
# define MASK1 0x0000ff00
# define MASK2 0x00ff0000
# define MASK3 0xff000000
#endif