[libcpu-riscv]: [surpport SMP]: Add SMP support for qemu-virt64-riscv

1.Add the necessary function declarations for SMP enablement and implement the corresponding
functionalities, including rt_hw_secondary_cpu_up, secondary_cpu_entry, rt_hw_local_irq_disable,
rt_hw_local_irq_enable, rt_hw_secondary_cpu_idle_exec, rt_hw_spin_lock_init, rt_hw_spin_lock,
rt_hw_spin_unlock, rt_hw_ipi_send, rt_hw_interrupt_set_priority, rt_hw_interrupt_get_priority,
rt_hw_ipi_init, rt_hw_ipi_handler_install, and rt_hw_ipi_handler.

2.In the two functions (rt_hw_context_switch_to and rt_hw_context_switch) in context_gcc.S,
add a call to rt_cpus_lock_status_restore to update the scheduler information.

3.If the MMU is enabled, use the .percpu section and record different hartids by configuring
special page tables; if the MMU is not enabled, record them directly in the satp register.
Additionally, add dynamic startup based on core configuration.The .percpu section is only used
when both ARCH_MM_MMU and RT_USING_SMP are enabled. However, there is a certain amount of space
waste since no macro guard is added for it in the link script currently.

4.The physical memory of QEMU started in CI is 128MB, so RT_HW_PAGE_END is modified from the
original +256MB to +128MB. Modify the SConscript file under the common64 directory to include
common/atomic_riscv.c in the compilation process.

Signed-off-by: Mengchen Teng <teng_mengchen@163.com>
This commit is contained in:
Tm-C-mT
2025-12-03 18:38:26 +08:00
committed by R b b666
parent dd19c0eb72
commit acef64ed2a
20 changed files with 947 additions and 167 deletions

View File

@@ -38,5 +38,25 @@ if GetDepend('__STACKSIZE__'): stack_size = GetDepend('__STACKSIZE__')
stack_lds.write('__STACKSIZE__ = %d;\n' % stack_size)
stack_lds.close()
# Obtain the number of harts from rtconfig.h and write
# it into link_cpus.lds for the linker script
try:
with open('rtconfig.h', 'r') as f:
rtconfig_content = f.readlines()
except FileNotFoundError:
cpus_nr = 1
else:
cpus_nr = 1 # default value
for line in rtconfig_content:
line = line.strip()
if line.startswith('#define') and 'RT_CPUS_NR' in line:
parts = line.split()
if len(parts) >= 3 and parts[2].isdigit():
cpus_nr = int(parts[2])
break
with open('link_cpus.lds', 'w') as cpus_lds:
cpus_lds.write(f'RT_CPUS_NR = {cpus_nr};\n')
# make a building
DoBuilding(TARGET, objs)

View File

@@ -24,6 +24,10 @@
#include "plic.h"
#include "stack.h"
#ifdef RT_USING_SMP
#include "interrupt.h"
#endif /* RT_USING_SMP */
#ifdef RT_USING_SMART
#include "riscv_mmu.h"
#include "mmu.h"
@@ -89,6 +93,11 @@ void rt_hw_board_init(void)
rt_hw_tick_init();
#ifdef RT_USING_SMP
/* ipi init */
rt_hw_ipi_init();
#endif /* RT_USING_SMP */
#ifdef RT_USING_COMPONENTS_INIT
rt_components_board_init();
#endif

View File

@@ -25,7 +25,7 @@ extern unsigned int __bss_end;
#define RT_HW_HEAP_BEGIN ((void *)&__bss_end)
#define RT_HW_HEAP_END ((void *)(RT_HW_HEAP_BEGIN + 64 * 1024 * 1024))
#define RT_HW_PAGE_START RT_HW_HEAP_END
#define RT_HW_PAGE_END ((void *)(KERNEL_VADDR_START + (256 * 1024 * 1024 - VIRT64_SBI_MEMSZ)))
#define RT_HW_PAGE_END ((void *)(KERNEL_VADDR_START + (128 * 1024 * 1024 - VIRT64_SBI_MEMSZ)))
void rt_hw_board_init(void);
void rt_init_user_mem(struct rt_thread *thread, const char *name,

View File

@@ -9,6 +9,7 @@
*/
INCLUDE "link_stacksize.lds"
INCLUDE "link_cpus.lds"
OUTPUT_ARCH( "riscv" )
@@ -121,12 +122,9 @@ SECTIONS
{
. = ALIGN(64);
__stack_start__ = .;
. += __STACKSIZE__;
__stack_cpu0 = .;
. += __STACKSIZE__;
__stack_cpu1 = .;
/* Dynamically allocate stack areas according to RT_CPUS_NR */
. += (__STACKSIZE__ * RT_CPUS_NR);
__stack_end__ = .;
} > SRAM
.sbss :
@@ -138,6 +136,24 @@ SECTIONS
*(.scommon)
} > SRAM
.percpu (NOLOAD) :
{
/* 2MB Align for MMU early map */
. = ALIGN(0x200000);
PROVIDE(__percpu_start = .);
*(.percpu)
/* 2MB Align for MMU early map */
. = ALIGN(0x200000);
PROVIDE(__percpu_end = .);
/* Clone the area */
. = __percpu_end + (__percpu_end - __percpu_start) * (RT_CPUS_NR - 1);
PROVIDE(__percpu_real_end = .);
} > SRAM
.bss :
{
*(.bss)

View File

@@ -0,0 +1 @@
RT_CPUS_NR = 8;

View File

@@ -9,6 +9,7 @@
*/
INCLUDE "link_stacksize.lds"
INCLUDE "link_cpus.lds"
OUTPUT_ARCH( "riscv" )
@@ -122,12 +123,9 @@ SECTIONS
{
. = ALIGN(64);
__stack_start__ = .;
. += __STACKSIZE__;
__stack_cpu0 = .;
. += __STACKSIZE__;
__stack_cpu1 = .;
/* Dynamically allocate stack areas according to RT_CPUS_NR */
. += (__STACKSIZE__ * RT_CPUS_NR);
__stack_end__ = .;
} > SRAM
.sbss :
@@ -139,6 +137,24 @@ SECTIONS
*(.scommon)
} > SRAM
.percpu (NOLOAD) :
{
/* 2MB Align for MMU early map */
. = ALIGN(0x200000);
PROVIDE(__percpu_start = .);
*(.percpu)
/* 2MB Align for MMU early map */
. = ALIGN(0x200000);
PROVIDE(__percpu_end = .);
/* Clone the area */
. = __percpu_end + (__percpu_end - __percpu_start) * (RT_CPUS_NR - 1);
PROVIDE(__percpu_real_end = .);
} > SRAM
.bss :
{
*(.bss)

View File

@@ -1,4 +1,16 @@
qemu-system-riscv64 -nographic -machine virt -m 256M -kernel rtthread.bin -s -S \
QEMU_CMD="qemu-system-riscv64 -nographic -machine virt -m 256M -kernel rtthread.bin -s -S"
if grep -q "#define RT_USING_SMP" ./rtconfig.h 2>/dev/null; then
hart_num=$(grep "RT_CPUS_NR = [0-9]*;" ./link_cpus.lds | awk -F'[=;]' '{gsub(/ /, "", $2); print $2}')
if [ -z "$hart_num" ]; then
hart_num=1
fi
QEMU_CMD="$QEMU_CMD -smp $hart_num"
fi
QEMU_CMD="$QEMU_CMD \
-drive if=none,file=sd.bin,format=raw,id=blk0 -device virtio-blk-device,drive=blk0,bus=virtio-mmio-bus.0 \
-netdev user,id=tap0 -device virtio-net-device,netdev=tap0,bus=virtio-mmio-bus.1 \
-device virtio-serial-device -chardev socket,host=127.0.0.1,port=4321,server=on,wait=off,telnet=on,id=console0 -device virtserialport,chardev=console0
-device virtio-serial-device -chardev socket,host=127.0.0.1,port=4321,server=on,wait=off,telnet=on,id=console0 -device virtserialport,chardev=console0"
eval $QEMU_CMD

View File

@@ -24,7 +24,20 @@ if [ ! -f $path_image ]; then
exit
fi
qemu-system-riscv64 -nographic -machine virt -m 256M -kernel rtthread.bin \
QEMU_CMD="qemu-system-riscv64 -nographic -machine virt -m 256M -kernel rtthread.bin"
if grep -q "#define RT_USING_SMP" ./rtconfig.h 2>/dev/null; then
hart_num=$(grep "RT_CPUS_NR = [0-9]*;" ./link_cpus.lds 2>/dev/null | awk -F'[=;]' '{gsub(/ /, "", $2); print $2}')
if [ -z "$hart_num" ] || [ "$hart_num" -lt 1 ]; then
echo "Warning: Invalid or missing RT_CPUS_NR, defaulting to 1"
hart_num=1
fi
QEMU_CMD="$QEMU_CMD -smp $hart_num"
fi
QEMU_CMD="$QEMU_CMD \
-drive if=none,file=$path_image,format=raw,id=blk0 -device virtio-blk-device,drive=blk0,bus=virtio-mmio-bus.0 \
-netdev user,id=tap0 -device virtio-net-device,netdev=tap0,bus=virtio-mmio-bus.1 \
-device virtio-serial-device -chardev socket,host=127.0.0.1,port=4321,server=on,wait=off,telnet=on,id=console0 -device virtserialport,chardev=console0
-device virtio-serial-device -chardev socket,host=127.0.0.1,port=4321,server=on,wait=off,telnet=on,id=console0 -device virtserialport,chardev=console0"
eval $QEMU_CMD

View File

@@ -7,6 +7,8 @@ CPPPATH = [cwd]
if not GetDepend('ARCH_USING_ASID'):
SrcRemove(src, ['asid.c'])
src.append('../common/atomic_riscv.c')
group = DefineGroup('CPU', src, depend = [''], CPPPATH = CPPPATH)
Return('group')

View File

@@ -69,29 +69,47 @@
.endm
/*
* #ifdef RT_USING_SMP
* void rt_hw_context_switch_to(rt_ubase_t to, stuct rt_thread *to_thread);
* #else
* void rt_hw_context_switch_to(rt_ubase_t to);
*
* a0 --> to SP pointer
* #endif
* a0 --> to
* a1 --> to_thread
*/
.globl rt_hw_context_switch_to
rt_hw_context_switch_to:
LOAD sp, (a0)
#ifdef RT_USING_SMP
/* Pass the previous CPU lock status to rt_cpus_lock_status_restore for restoration */
mv a0, a1
call rt_cpus_lock_status_restore
#endif
call rt_thread_self
mv s1, a0
#ifndef RT_USING_SMP
//if enable RT_USING_SMP, it will finished by rt_cpus_lock_status_restore.
#ifdef RT_USING_SMART
call lwp_aspace_switch
call lwp_aspace_switch
#endif
#endif
RESTORE_CONTEXT
sret
/*
* #ifdef RT_USING_SMP
* void rt_hw_context_switch(rt_ubase_t from, rt_ubase_t to, struct rt_thread *to_thread);
* #else
* void rt_hw_context_switch(rt_ubase_t from, rt_ubase_t to);
* #endif
*
* a0 --> from SP pointer
* a1 --> to SP pointer
* a2 --> to_thread
*
* It should only be used on local interrupt disable
*/
@@ -103,13 +121,22 @@ rt_hw_context_switch:
// restore to thread SP
LOAD sp, (a1)
#ifdef RT_USING_SMP
/* Pass the previous CPU lock status to rt_cpus_lock_status_restore for restoration */
mv a0, a2
call rt_cpus_lock_status_restore
#endif /*RT_USING_SMP*/
// restore Address Space
call rt_thread_self
mv s1, a0
#ifndef RT_USING_SMP
// if enable RT_USING_SMP, it will finished by rt_cpus_lock_status_restore.
#ifdef RT_USING_SMART
call lwp_aspace_switch
call lwp_aspace_switch
#endif
#endif
RESTORE_CONTEXT
sret

View File

@@ -18,16 +18,25 @@
#include <sbi.h>
#include <encoding.h>
#ifdef ARCH_MM_MMU
#include "mmu.h"
#endif
#ifdef RT_USING_SMP
#include "tick.h"
#include "interrupt.h"
#endif /* RT_USING_SMP */
#ifdef ARCH_RISCV_FPU
#define K_SSTATUS_DEFAULT_BASE (SSTATUS_SPP | SSTATUS_SPIE | SSTATUS_SUM | SSTATUS_FS)
#define K_SSTATUS_DEFAULT_BASE (SSTATUS_SPP | SSTATUS_SPIE | SSTATUS_SUM | SSTATUS_FS)
#else
#define K_SSTATUS_DEFAULT_BASE (SSTATUS_SPP | SSTATUS_SPIE | SSTATUS_SUM)
#define K_SSTATUS_DEFAULT_BASE (SSTATUS_SPP | SSTATUS_SPIE | SSTATUS_SUM)
#endif
#ifdef ARCH_RISCV_VECTOR
#define K_SSTATUS_DEFAULT (K_SSTATUS_DEFAULT_BASE | SSTATUS_VS)
#define K_SSTATUS_DEFAULT (K_SSTATUS_DEFAULT_BASE | SSTATUS_VS)
#else
#define K_SSTATUS_DEFAULT K_SSTATUS_DEFAULT_BASE
#define K_SSTATUS_DEFAULT K_SSTATUS_DEFAULT_BASE
#endif
#ifdef RT_USING_SMART
#include <lwp_arch.h>
@@ -49,10 +58,13 @@ volatile rt_ubase_t rt_interrupt_to_thread = 0;
*/
volatile rt_ubase_t rt_thread_switch_interrupt_flag = 0;
#ifdef ARCH_MM_MMU
static rt_ubase_t *percpu_hartid;
#endif
void *_rt_hw_stack_init(rt_ubase_t *sp, rt_ubase_t ra, rt_ubase_t sstatus)
{
rt_hw_switch_frame_t frame = (rt_hw_switch_frame_t)
((rt_ubase_t)sp - sizeof(struct rt_hw_switch_frame));
rt_hw_switch_frame_t frame = (rt_hw_switch_frame_t)((rt_ubase_t)sp - sizeof(struct rt_hw_switch_frame));
rt_memset(frame, 0, sizeof(struct rt_hw_switch_frame));
@@ -64,7 +76,21 @@ void *_rt_hw_stack_init(rt_ubase_t *sp, rt_ubase_t ra, rt_ubase_t sstatus)
int rt_hw_cpu_id(void)
{
#ifndef RT_USING_SMP
return 0;
#else
if (rt_kmem_pvoff() != 0)
{
return *percpu_hartid;
}
else
{
// if not enable MMU or pvoff==0, read hartid from satp register
rt_ubase_t hartid;
asm volatile("csrr %0, satp" : "=r"(hartid));
return hartid & 0xFFFF; // Assuming hartid fits in lower 16 bits
}
#endif /* RT_USING_SMP */
}
/**
@@ -117,6 +143,18 @@ void rt_hw_context_switch_interrupt(rt_ubase_t from, rt_ubase_t to, rt_thread_t
return;
}
#else
void rt_hw_context_switch_interrupt(void *context, rt_ubase_t from, rt_ubase_t to, struct rt_thread *to_thread)
{
/* Perform architecture-specific context switch. This call will
* restore the target thread context and should not return when a
* switch is performed. The caller (scheduler) invoked this function
* in a context where local IRQs are disabled. */
rt_uint32_t level;
level = rt_hw_local_irq_disable();
rt_hw_context_switch((rt_ubase_t)from, (rt_ubase_t)to, to_thread);
rt_hw_local_irq_enable(level);
}
#endif /* end of RT_USING_SMP */
/** shutdown CPU */
@@ -137,3 +175,80 @@ void rt_hw_set_process_id(int pid)
{
// TODO
}
#ifdef RT_USING_SMP
extern void _start(void);
extern int boot_hartid;
/* Boot secondary harts using the SBI HSM hart_start call. */
void rt_hw_secondary_cpu_up(void)
{
rt_uint64_t entry_pa;
int hart, ret;
/* translate kernel virtual _start to physical address. */
#ifdef ARCH_MM_MMU
if (rt_kmem_pvoff() != 0)
{
entry_pa = (rt_uint64_t)rt_kmem_v2p(&_start);
}
else
{
entry_pa = (rt_uint64_t)&_start;
}
#else
entry_pa = (rt_uint64_t)&_start;
#endif /* ARCH_MM_MMU */
/* Assumes hart IDs are in range [0, RT_CPUS_NR) */
RT_ASSERT(boot_hartid < RT_CPUS_NR);
for (hart = 0; hart < RT_CPUS_NR; hart++)
{
if (hart == boot_hartid)
continue;
ret = sbi_hsm_hart_start((unsigned long)hart,
(unsigned long)entry_pa,
0UL);
if (ret)
{
rt_kprintf("sbi_hsm_hart_start failed for hart %d: %d\n", hart, ret);
}
}
}
#ifdef ARCH_MM_MMU
void rt_hw_percpu_hartid_init(rt_ubase_t *percpu_ptr, rt_ubase_t hartid)
{
RT_ASSERT(hartid < RT_CPUS_NR);
rt_ubase_t *percpu_hartid_paddr;
rt_size_t percpu_size = (rt_size_t)((rt_ubase_t)&__percpu_end - (rt_ubase_t)&__percpu_start);
percpu_hartid = percpu_ptr;
// from virtual address to physical address
percpu_ptr = (rt_ubase_t *)((rt_ubase_t)percpu_ptr + (rt_ubase_t)rt_kmem_pvoff());
percpu_hartid_paddr = percpu_ptr;
/* Save to the real area */
*(rt_ubase_t *)((void *)percpu_hartid_paddr + hartid * percpu_size) = hartid;
}
#endif /* ARCH_MM_MMU */
void secondary_cpu_entry(void)
{
#ifdef RT_USING_SMART
/* switch to kernel address space */
rt_hw_aspace_switch(&rt_kernel_space);
#endif
/* The PLIC peripheral interrupts are currently handled by the boot_hart. */
/* Enable the Supervisor-Timer bit in SIE */
rt_hw_tick_init();
/* ipi init */
rt_hw_ipi_init();
rt_hw_spin_lock(&_cpus_lock);
/* invoke system scheduler start for secondary CPU */
rt_system_scheduler_start();
}
#endif /* RT_USING_SMP */

View File

@@ -43,6 +43,10 @@ rt_inline void rt_hw_isb(void)
__asm__ volatile(OPC_FENCE_I:::"memory");
}
#ifdef ARCH_MM_MMU
void rt_hw_percpu_hartid_init(rt_ubase_t *percpu_ptr, rt_ubase_t hartid);
#endif
#endif
#endif

View File

@@ -176,6 +176,8 @@
#define PTE_A 0x040 // Accessed
#define PTE_D 0x080 // Dirty
#define PTE_SOFT 0x300 // Reserved for Software
#define PTE_ATTR_RW (PTE_R | PTE_W)
#define PTE_ATTR_RWX (PTE_ATTR_RW | PTE_X)
#define PTE_PPN_SHIFT 10

View File

@@ -60,10 +60,17 @@ _handle_interrupt_and_exception:
call handle_trap
_interrupt_exit:
#ifndef RT_USING_SMP
la s0, rt_thread_switch_interrupt_flag
lw s2, 0(s0)
beqz s2, _resume_execution
sw zero, 0(s0)
#else
mv a0, sp
call rt_scheduler_do_irq_switch
// if failed, jump to __resume_execution
j _resume_execution
#endif /* RT_USING_SMP */
_context_switch:
la t0, rt_interrupt_from_thread
@@ -88,6 +95,7 @@ _resume_kernel:
csrw sscratch, zero
sret
#ifndef RT_USING_SMP
.global rt_hw_interrupt_enable
rt_hw_interrupt_enable:
csrs sstatus, a0 /* restore to old csr */
@@ -97,3 +105,18 @@ rt_hw_interrupt_enable:
rt_hw_interrupt_disable:
csrrci a0, sstatus, 2 /* clear SIE */
jr ra
#else
.global rt_hw_local_irq_disable
rt_hw_local_irq_disable:
csrrci a0, sstatus, 2
jr ra
.global rt_hw_local_irq_enable
rt_hw_local_irq_enable:
csrs sstatus, a0
jr ra
.global rt_hw_secondary_cpu_idle_exec
rt_hw_secondary_cpu_idle_exec:
jr ra
#endif /* RT_USING_SMP */

View File

@@ -38,10 +38,21 @@
static size_t _unmap_area(struct rt_aspace *aspace, void *v_addr);
/* Define the structure of early page table */
struct page_table
{
unsigned long page[ARCH_PAGE_SIZE / sizeof(unsigned long)];
};
static struct page_table *__init_page_array;
#ifndef RT_USING_SMP
static void *current_mmu_table = RT_NULL;
#else
static void *current_mmu_table[RT_CPUS_NR] = { RT_NULL };
#endif /* RT_USING_SMP */
volatile __attribute__((aligned(4 * 1024)))
rt_ubase_t MMUTable[__SIZE(VPN2_BIT)];
rt_ubase_t MMUTable[__SIZE(VPN2_BIT) * RT_CPUS_NR];
/**
* @brief Switch the current address space to the specified one.
@@ -69,8 +80,15 @@ void rt_hw_aspace_switch(rt_aspace_t aspace)
#else /* !ARCH_USING_ASID */
void rt_hw_aspace_switch(rt_aspace_t aspace)
{
uintptr_t page_table = (uintptr_t)rt_kmem_v2p(aspace->page_table);
// It is necessary to find the MMU page table specific to each core.
uint32_t hartid = rt_cpu_get_id();
uintptr_t ptr = (uintptr_t)aspace->page_table + (uintptr_t)(hartid * ARCH_PAGE_SIZE);
uintptr_t page_table = (uintptr_t)rt_kmem_v2p((void *)ptr);
#ifndef RT_USING_SMP
current_mmu_table = aspace->page_table;
#else
current_mmu_table[rt_hw_cpu_id()] = (void *)ptr;
#endif
write_csr(satp, (((size_t)SATP_MODE) << SATP_MODE_OFFSET) |
((rt_ubase_t)page_table >> PAGE_OFFSET_BIT));
@@ -85,7 +103,11 @@ void rt_hw_asid_init(void)
/* get current page table. */
void *rt_hw_mmu_tbl_get()
{
#ifndef RT_USING_SMP
return current_mmu_table;
#else
return current_mmu_table[rt_hw_cpu_id()];
#endif /* RT_USING_SMP */
}
/* Map a single virtual address page to a physical address page in the page table. */
@@ -98,66 +120,153 @@ static int _map_one_page(struct rt_aspace *aspace, void *va, void *pa,
l1_off = GET_L1((size_t)va);
l2_off = GET_L2((size_t)va);
l3_off = GET_L3((size_t)va);
mmu_l1 = ((rt_ubase_t *)aspace->page_table) + l1_off;
if (PTE_USED(*mmu_l1))
/* Create a separate page table for each hart to facilitate access to the .percpu section. */
for (int hartid = 0; hartid < RT_CPUS_NR; hartid++)
{
mmu_l2 = (rt_ubase_t *)PPN_TO_VPN(GET_PADDR(*mmu_l1), PV_OFFSET);
}
else
{
mmu_l2 = (rt_ubase_t *)rt_pages_alloc(0);
mmu_l1 = (rt_ubase_t *)((rt_ubase_t)aspace->page_table + (rt_ubase_t)(hartid * ARCH_PAGE_SIZE)) + l1_off;
if (mmu_l2)
if (PTE_USED(*mmu_l1))
{
rt_memset(mmu_l2, 0, PAGE_SIZE);
rt_hw_cpu_dcache_clean(mmu_l2, PAGE_SIZE);
*mmu_l1 = COMBINEPTE((rt_ubase_t)VPN_TO_PPN(mmu_l2, PV_OFFSET),
PAGE_DEFAULT_ATTR_NEXT);
rt_hw_cpu_dcache_clean(mmu_l1, sizeof(*mmu_l1));
mmu_l2 = (rt_ubase_t *)PPN_TO_VPN(GET_PADDR(*mmu_l1), PV_OFFSET);
}
else
{
return -1;
mmu_l2 = (rt_ubase_t *)rt_pages_alloc(0);
if (mmu_l2)
{
rt_memset(mmu_l2, 0, PAGE_SIZE);
rt_hw_cpu_dcache_clean(mmu_l2, PAGE_SIZE);
*mmu_l1 = COMBINEPTE((rt_ubase_t)VPN_TO_PPN(mmu_l2, PV_OFFSET),
PAGE_DEFAULT_ATTR_NEXT);
rt_hw_cpu_dcache_clean(mmu_l1, sizeof(*mmu_l1));
}
else
{
return -1;
}
}
}
if (PTE_USED(*(mmu_l2 + l2_off)))
{
RT_ASSERT(!PAGE_IS_LEAF(*(mmu_l2 + l2_off)));
mmu_l3 =
(rt_ubase_t *)PPN_TO_VPN(GET_PADDR(*(mmu_l2 + l2_off)), PV_OFFSET);
}
else
{
mmu_l3 = (rt_ubase_t *)rt_pages_alloc(0);
if (mmu_l3)
if (PTE_USED(*(mmu_l2 + l2_off)))
{
rt_memset(mmu_l3, 0, PAGE_SIZE);
rt_hw_cpu_dcache_clean(mmu_l3, PAGE_SIZE);
*(mmu_l2 + l2_off) =
COMBINEPTE((rt_ubase_t)VPN_TO_PPN(mmu_l3, PV_OFFSET),
PAGE_DEFAULT_ATTR_NEXT);
rt_hw_cpu_dcache_clean(mmu_l2, sizeof(*mmu_l2));
/* declares a reference to parent page table */
rt_page_ref_inc((void *)mmu_l2, 0);
RT_ASSERT(!PAGE_IS_LEAF(*(mmu_l2 + l2_off)));
mmu_l3 =
(rt_ubase_t *)PPN_TO_VPN(GET_PADDR(*(mmu_l2 + l2_off)), PV_OFFSET);
}
else
{
return -1;
mmu_l3 = (rt_ubase_t *)rt_pages_alloc(0);
if (mmu_l3)
{
rt_memset(mmu_l3, 0, PAGE_SIZE);
rt_hw_cpu_dcache_clean(mmu_l3, PAGE_SIZE);
*(mmu_l2 + l2_off) =
COMBINEPTE((rt_ubase_t)VPN_TO_PPN(mmu_l3, PV_OFFSET),
PAGE_DEFAULT_ATTR_NEXT);
rt_hw_cpu_dcache_clean(mmu_l2, sizeof(*mmu_l2));
/* declares a reference to parent page table */
rt_page_ref_inc((void *)mmu_l2, 0);
}
else
{
return -1;
}
}
RT_ASSERT(!PTE_USED(*(mmu_l3 + l3_off)));
/* declares a reference to parent page table */
rt_page_ref_inc((void *)mmu_l3, 0);
*(mmu_l3 + l3_off) = COMBINEPTE((rt_ubase_t)pa, attr);
rt_hw_cpu_dcache_clean(mmu_l3 + l3_off, sizeof(*(mmu_l3 + l3_off)));
}
RT_ASSERT(!PTE_USED(*(mmu_l3 + l3_off)));
/* declares a reference to parent page table */
rt_page_ref_inc((void *)mmu_l3, 0);
*(mmu_l3 + l3_off) = COMBINEPTE((rt_ubase_t)pa, attr);
rt_hw_cpu_dcache_clean(mmu_l3 + l3_off, sizeof(*(mmu_l3 + l3_off)));
return 0;
}
#if defined(RT_USING_SMP) && defined(ARCH_MM_MMU)
static int _map_percpu_area(rt_ubase_t *table, void *va, void *pa, int cpu_id)
{
unsigned long page;
rt_ubase_t off, level_shift;
level_shift = PPN2_SHIFT;
// map pages - 4KB
for (int level = 0; level < 2; ++level)
{
off = ((rt_ubase_t)va >> level_shift) & VPN_MASK;
if (table[off] & PTE_V)
{
/* Step into the next level page table */
table = (unsigned long *)((table[off] >> PTE_BITS) << ARCH_PAGE_SHIFT);
level_shift -= VPN_BITS;
continue;
}
if (!(page = get_free_page()))
{
return MMU_MAP_ERROR_NOPAGE;
}
rt_memset((void *)page, 0, ARCH_PAGE_SIZE);
table[off] = ((page >> ARCH_PAGE_SHIFT) << PTE_BITS) | PTE_V;
rt_hw_cpu_dcache_ops(RT_HW_CACHE_FLUSH, table + off, sizeof(void *));
/* Step into the next level page table */
table = (unsigned long *)((table[off] >> PTE_BITS) << ARCH_PAGE_SHIFT);
level_shift -= VPN_BITS;
}
off = ((rt_ubase_t)va >> level_shift) & VPN_MASK;
table[off] = (((rt_ubase_t)pa >> ARCH_PAGE_SHIFT) << PTE_BITS) | MMU_MAP_K_RWCB;
rt_hw_cpu_dcache_ops(RT_HW_CACHE_FLUSH, table + off, sizeof(void *));
return ARCH_PAGE_SIZE;
}
// Ensure that the .percpu section is mapped in the specific address for each core.
static void rt_hw_percpu_mmu_init_check(void)
{
size_t mapped, size;
void *page_table, *vaddr, *paddr;
static rt_bool_t inited = RT_FALSE;
if (inited)
{
return;
}
inited = RT_TRUE;
page_table = rt_kernel_space.page_table;
for (int hartid = 0; hartid < RT_CPUS_NR; ++hartid)
{
vaddr = &__percpu_start;
paddr = vaddr + rt_kmem_pvoff();
size = (size_t)((rt_ubase_t)&__percpu_end - (rt_ubase_t)&__percpu_start);
/* Offset to per-CPU partition for current CPU */
paddr += size * hartid;
while (size > 0)
{
MM_PGTBL_LOCK(&rt_kernel_space);
mapped = _map_percpu_area(page_table, vaddr, paddr, hartid);
MM_PGTBL_UNLOCK(&rt_kernel_space);
RT_ASSERT(mapped > 0);
size -= mapped;
vaddr += mapped;
paddr += mapped;
}
page_table += ARCH_PAGE_SIZE;
}
}
#endif /* RT_USING_SMP && RT_USING_SMART */
/**
* @brief Maps a virtual address space to a physical address space.
*
@@ -185,24 +294,35 @@ void *rt_hw_mmu_map(struct rt_aspace *aspace, void *v_addr, void *p_addr,
int ret = -1;
void *unmap_va = v_addr;
size_t npages = size >> ARCH_PAGE_SHIFT;
#if defined(RT_USING_SMP) && defined(ARCH_MM_MMU)
// Map the memory of the .percpu section separately for each core.
rt_hw_percpu_mmu_init_check();
#endif
/* TODO trying with HUGEPAGE here */
while (npages--)
{
MM_PGTBL_LOCK(aspace);
ret = _map_one_page(aspace, v_addr, p_addr, attr);
MM_PGTBL_UNLOCK(aspace);
if (ret != 0)
#if defined(RT_USING_SMP) && defined(ARCH_MM_MMU)
// skip mapping .percpu section pages
if (v_addr < (void *)&__percpu_start ||
v_addr >= (void *)&__percpu_end)
#endif
{
/* error, undo map */
while (unmap_va != v_addr)
MM_PGTBL_LOCK(aspace);
ret = _map_one_page(aspace, v_addr, p_addr, attr);
MM_PGTBL_UNLOCK(aspace);
if (ret != 0)
{
MM_PGTBL_LOCK(aspace);
_unmap_area(aspace, unmap_va);
MM_PGTBL_UNLOCK(aspace);
unmap_va += ARCH_PAGE_SIZE;
/* error, undo map */
while (unmap_va != v_addr)
{
MM_PGTBL_LOCK(aspace);
_unmap_area(aspace, unmap_va);
MM_PGTBL_UNLOCK(aspace);
unmap_va += ARCH_PAGE_SIZE;
}
break;
}
break;
}
v_addr += ARCH_PAGE_SIZE;
p_addr += ARCH_PAGE_SIZE;
@@ -216,6 +336,99 @@ void *rt_hw_mmu_map(struct rt_aspace *aspace, void *v_addr, void *p_addr,
return NULL;
}
#ifdef ARCH_MM_MMU
void set_free_page(void *page_array)
{
__init_page_array = page_array;
}
// Early-stage page allocator
unsigned long get_free_page(void)
{
static rt_atomic_t page_off = 0;
rt_atomic_t old_off = rt_hw_atomic_add(&page_off, 1);
if (old_off < ARCH_PAGE_SIZE / sizeof(unsigned long))
{
return (unsigned long)(__init_page_array[old_off].page);
}
return 0;
}
#ifdef RT_USING_SMP
// Perform early mapping for the .percpu section
static int rt_hw_mmu_map_percpu_early(rt_ubase_t *tbl, rt_ubase_t va, rt_ubase_t pa)
{
unsigned long page;
rt_ubase_t off, level_shift;
level_shift = PPN2_SHIFT;
// page size 2MB
off = (va >> level_shift) & VPN_MASK;
// Step into the next level page table
tbl = (rt_ubase_t *)((tbl[off] >> PTE_BITS) << ARCH_PAGE_SHIFT);
level_shift -= VPN_BITS;
off = (va >> level_shift) & VPN_MASK;
tbl[off] = ((pa >> ARCH_PAGE_SHIFT) << PTE_BITS) | MMU_MAP_K_RWCB;
asm volatile("sfence.vma x0, x0");
return 0;
}
#endif /* RT_USING_SMP */
static int rt_hw_mmu_map_early(rt_ubase_t *tbl, rt_ubase_t va, rt_ubase_t pa,
rt_ubase_t attr)
{
unsigned long page, *table;
rt_ubase_t off, level_shift;
if ((va & (L2_PAGE_SIZE - 1)) || (pa & (L2_PAGE_SIZE - 1)))
{
return MMU_MAP_ERROR_VANOTALIGN;
}
table = tbl;
level_shift = PPN2_SHIFT;
// page size 2MB
for (int level = 0; level < 1; ++level)
{
off = (va >> level_shift) & VPN_MASK;
if (!(table[off] & PTE_V))
{
if (!(page = get_free_page()))
{
return MMU_MAP_ERROR_NOPAGE;
}
rt_memset((void *)page, 0, ARCH_PAGE_SIZE);
table[off] = ((page >> ARCH_PAGE_SHIFT) << PTE_PPN_SHIFT) | PTE_V;
}
if ((table[off] & PTE_ATTR_RWX) != 0)
{
/* No a page! */
return MMU_MAP_ERROR_CONFLICT;
}
/* Step into the next level page table */
page = (table[off] >> PTE_BITS) << ARCH_PAGE_SHIFT;
table = (unsigned long *)page;
level_shift -= VPN_BITS;
}
off = (va >> level_shift) & VPN_MASK;
table[off] = ((pa >> ARCH_PAGE_SHIFT) << PTE_BITS) | attr;
return 0;
}
#endif
/* unmap page table entry */
static void _unmap_pte(rt_ubase_t *pentry, rt_ubase_t *lvl_entry[], int level)
{
@@ -456,7 +669,7 @@ static rt_ubase_t *_query(struct rt_aspace *aspace, void *vaddr, int *level)
return RT_NULL;
}
mmu_l1 = ((rt_ubase_t *)aspace->page_table) + l1_off;
mmu_l1 = ((rt_ubase_t *)((rt_ubase_t)aspace->page_table + (rt_ubase_t)(rt_hw_cpu_id() * ARCH_PAGE_SIZE))) + l1_off;
if (PTE_USED(*mmu_l1))
{
@@ -648,8 +861,6 @@ void rt_hw_mmu_setup(rt_aspace_t aspace, struct mem_desc *mdesc, int desc_nr)
#define SATP_BASE ((rt_ubase_t)SATP_MODE << SATP_MODE_OFFSET)
extern unsigned int __bss_end;
/**
* @brief Early memory setup function for hardware initialization.
*
@@ -662,12 +873,12 @@ extern unsigned int __bss_end;
* before the memory management system is fully operational.
* Here the identity mapping is implemented by a 1-stage page table, whose page size is 1GB.
*/
void rt_hw_mem_setup_early(void)
void rt_hw_mem_setup_early(void *pgtbl, rt_uint64_t hartid)
{
rt_ubase_t pv_off;
rt_ubase_t pv_off, size;
rt_ubase_t ps = 0x0;
rt_ubase_t vs = 0x0;
rt_ubase_t *early_pgtbl = (rt_ubase_t *)(((size_t)&__bss_end + 4095) & ~0xfff);
rt_ubase_t *early_pgtbl = (rt_ubase_t *)(pgtbl + hartid * ARCH_PAGE_SIZE);
/* calculate pv_offset */
void *symb_pc;
@@ -705,14 +916,39 @@ void rt_hw_mem_setup_early(void)
vs = ps - pv_off;
/* relocate region */
rt_ubase_t vs_idx = GET_L1(vs);
rt_ubase_t ve_idx = GET_L1(vs + 0x80000000);
for (size_t i = vs_idx; i < ve_idx; i++)
rt_ubase_t ve = vs + 0x80000000;
#if defined(RT_USING_SMP) && defined(ARCH_MM_MMU)
while (vs < ve)
{
rt_hw_mmu_map_early(early_pgtbl, vs, ps, MMU_MAP_EARLY);
vs += L2_PAGE_SIZE;
ps += L2_PAGE_SIZE;
}
#else
for (int i = GET_L1(vs); i < GET_L1(ve); i++)
{
early_pgtbl[i] = COMBINEPTE(ps, MMU_MAP_EARLY);
ps += L1_PAGE_SIZE;
}
#endif
#if defined(RT_USING_SMP) && defined(ARCH_MM_MMU)
// map .percpu section
ps = (rt_ubase_t)&__percpu_start;
vs = ps - rt_kmem_pvoff();
size = (rt_size_t)((rt_ubase_t)&__percpu_end - (rt_ubase_t)&__percpu_start);
/* Offset to per-CPU partition for current CPU */
ps += hartid * size;
ve = vs + size;
while (vs < ve)
{
/* Map physical address per-CPU partition */
rt_hw_mmu_map_percpu_early(early_pgtbl, vs, ps);
ps += L2_PAGE_SIZE;
vs += L2_PAGE_SIZE;
}
#endif
/* apply new mapping */
asm volatile("sfence.vma x0, x0");
write_csr(satp, SATP_BASE | ((size_t)early_pgtbl >> PAGE_OFFSET_BIT));

View File

@@ -56,6 +56,14 @@ struct mem_desc
#define MMU_MAP_ERROR_NOPAGE -3
#define MMU_MAP_ERROR_CONFLICT -4
#define VPN_MASK 0x1ffUL
#define PTE_BITS 10
#define VPN_BITS 9
#if defined(RT_USING_SMP) && defined(ARCH_MM_MMU)
extern unsigned int __percpu_end, __percpu_start;
#endif /* RT_USING_SMP && ARCH_MM_MMU */
void *rt_hw_mmu_tbl_get(void);
int rt_hw_mmu_map_init(rt_aspace_t aspace, void *v_address, rt_ubase_t size,
rt_ubase_t *vtable, rt_ubase_t pv_off);
@@ -72,4 +80,5 @@ int rt_hw_mmu_control(struct rt_aspace *aspace, void *vaddr, size_t size,
void *rt_hw_mmu_pgtbl_create(void);
void rt_hw_mmu_pgtbl_delete(void *pgtbl);
unsigned long get_free_page(void);
#endif

View File

@@ -32,9 +32,30 @@ _start:
1:
/* save hartid */
la t0, boot_hartid /* global varible rt_boot_hartid */
#ifdef RT_USING_SMP
lw t2, (t0)
li t3, 0xdeadbeef /* Sentinel value indicating uninitialized boot_hartid */
li t4, 0xffffffff
and t2, t2, t4 /* Extract the lower 32 bits. */
bne t2, t3, system_init /* If the current value is 0xdeadbeef, skip the boot_hartid assignment operation. */
#endif
mv t1, a0 /* get hartid in S-mode frome a0 register */
sw t1, (t0) /* store t1 register low 4 bits in memory address which is stored in t0 */
#ifdef RT_USING_SMP
system_init:
#endif
/*
* When ARCH_MM_MMU is not enabled or pvoff==0:
* Store hartid temporarily in the satp register because:
* 1. satp is not used for address translation when MMU is disabled or pvoff==0.
* 2. This value will be moved to percpu_hartid once MMU is initialized.
* This approach avoids using extra memory or registers during the critical boot phase,
* but developers should be aware that satp is overloaded for this purpose until MMU setup.
*/
csrw satp, a0
/* clear Interrupt Registers */
csrw sie, 0
csrw sip, 0
@@ -51,7 +72,10 @@ _start:
li x7, 0
li x8, 0
li x9, 0
#ifndef RT_USING_SMP
/* In the SMP architecture, a0 will be used again later */
li x10,0
#endif
li x11,0
li x12,0
li x13,0
@@ -85,17 +109,42 @@ _start:
la gp, __global_pointer$
.option pop
#ifndef RT_USING_SMP
/* removed SMP support here */
la sp, __stack_start__
li t0, __STACKSIZE__
add sp, sp, t0
#else
/* Initialize the sp pointer according to different hartids. */
mv t0, a0
/* calculate stack offset: hartid * __STACKSIZE__ */
li t1, __STACKSIZE__
mul t0, t0, t1 /* t0 = hartid * __STACKSIZE__ */
/* set stack pointer */
la sp, __stack_start__
add sp, sp, t0 /* sp = __stack_start__ + hartid * __STACKSIZE__ */
add sp, sp, t1 /* sp += __STACKSIZE__ (point to stack top) */
mv t0, a0
lw t1, boot_hartid
mv tp, a0
bne t0, t1, early_secondary_cpu_entry
#endif /* RT_USING_SMP */
/**
* sscratch is always zero on kernel mode
*/
csrw sscratch, zero
call init_bss
early_secondary_cpu_entry:
#ifdef ARCH_MM_MMU
// Manually manage pages in the early stage
la a0, .early_page_array
call set_free_page
la a0, .early_tbl_page
mv a1, tp
call rt_hw_mem_setup_early
call rt_kmem_pvoff
/* a0 := pvoff */
@@ -106,13 +155,37 @@ _start:
sub x1, x1, a0
ret
_after_pc_relocation:
#if defined(RT_USING_SMP)
/* If the MMU is enabled, save the hartid in percpu_hartid.
* -> .percpu_hartid (hartid_0)
* ...... align(2MB)
* -> (hartid_1)
* ......
*/
la a0, .percpu_hartid
mv a1, tp
call rt_hw_percpu_hartid_init
#endif
/* relocate gp */
sub gp, gp, a0
#ifndef RT_USING_SMP
/* relocate context: sp */
la sp, __stack_start__
li t0, __STACKSIZE__
add sp, sp, t0
#else
/* Initialize the sp pointer according to different hartids. */
mv t0, tp
/* calculate stack offset: hartid * __STACKSIZE__ */
li t1, __STACKSIZE__
mul t0, t0, t1 /* t0 = hartid * __STACKSIZE__ */
/* set stack pointer */
la sp, __stack_start__
add sp, sp, t0 /* sp = __stack_start__ + hartid * __STACKSIZE__ */
add sp, sp, t1 /* sp += __STACKSIZE__ (point to stack top) */
#endif /* RT_USING_SMP */
/* reset s0-fp */
mv s0, zero
@@ -121,7 +194,12 @@ _after_pc_relocation:
la t0, trap_entry
csrw stvec, t0
1:
#ifdef RT_USING_SMP
mv t0, tp
lw t1, boot_hartid
bne t0, t1, secondary_cpu_entry
#endif
#endif /* ARCH_MM_MMU */
call sbi_init
call primary_cpu_entry
@@ -131,3 +209,31 @@ _never_return_here:
.global _start_link_addr
_start_link_addr:
.dword __text_start
#ifdef ARCH_MM_MMU
#ifdef RT_USING_SMP
/*
* CPU stack builtin
*/
.section ".percpu"
.percpu_hartid:
.space 16
#endif
.section ".bss"
.equ page_size, 4096
.balign page_size
.early_tbl_page:
.space 1 * page_size
#if defined(RT_USING_SMP) && RT_CPUS_NR > 1
.space (RT_CPUS_NR - 1) * page_size
#endif
.early_page_array:
.space (8 + 8) * page_size
#ifdef RT_USING_SMP
.space RT_CPUS_NR * 5 * page_size
#endif
#endif /* ARCH_MM_MMU */

View File

@@ -76,44 +76,44 @@ void dump_regs(struct rt_hw_stack_frame *regs)
rt_kprintf("\tCurrent Page Table(Physical) = %p\n",
__MASKVALUE(satp_v, __MASK(44)) << PAGE_OFFSET_BIT);
rt_kprintf("\tCurrent ASID = %p\n", __MASKVALUE(satp_v >> 44, __MASK(16))
<< PAGE_OFFSET_BIT);
<< PAGE_OFFSET_BIT);
const char *mode_str = "Unknown Address Translation/Protection Mode";
switch (__MASKVALUE(satp_v >> 60, __MASK(4)))
{
case 0:
mode_str = "No Address Translation/Protection Mode";
break;
case 0:
mode_str = "No Address Translation/Protection Mode";
break;
case 8:
mode_str = "Page-based 39-bit Virtual Addressing Mode";
break;
case 8:
mode_str = "Page-based 39-bit Virtual Addressing Mode";
break;
case 9:
mode_str = "Page-based 48-bit Virtual Addressing Mode";
break;
case 9:
mode_str = "Page-based 48-bit Virtual Addressing Mode";
break;
}
rt_kprintf("\tMode = %s\n", mode_str);
rt_kprintf("-----------------Dump OK---------------------\n");
}
static const char *Exception_Name[] = {"Instruction Address Misaligned",
"Instruction Access Fault",
"Illegal Instruction",
"Breakpoint",
"Load Address Misaligned",
"Load Access Fault",
"Store/AMO Address Misaligned",
"Store/AMO Access Fault",
"Environment call from U-mode",
"Environment call from S-mode",
"Reserved-10",
"Reserved-11",
"Instruction Page Fault",
"Load Page Fault",
"Reserved-14",
"Store/AMO Page Fault"};
static const char *Exception_Name[] = { "Instruction Address Misaligned",
"Instruction Access Fault",
"Illegal Instruction",
"Breakpoint",
"Load Address Misaligned",
"Load Access Fault",
"Store/AMO Address Misaligned",
"Store/AMO Access Fault",
"Environment call from U-mode",
"Environment call from S-mode",
"Reserved-10",
"Reserved-11",
"Instruction Page Fault",
"Load Page Fault",
"Reserved-14",
"Store/AMO Page Fault" };
static const char *Interrupt_Name[] = {
"User Software Interrupt",
@@ -135,7 +135,16 @@ static volatile int nested = 0;
#define ENTER_TRAP nested += 1
#define EXIT_TRAP nested -= 1
#define CHECK_NESTED_PANIC(cause, tval, epc, eframe) \
if (nested != 1) handle_nested_trap_panic(cause, tval, epc, eframe)
if (nested != 1) \
handle_nested_trap_panic(cause, tval, epc, eframe)
#else
/* Add trap nesting detection under the SMP architecture. */
static volatile int nested[RT_CPUS_NR] = { 0 };
#define ENTER_TRAP nested[rt_hw_cpu_id()] += 1
#define EXIT_TRAP nested[rt_hw_cpu_id()] -= 1
#define CHECK_NESTED_PANIC(cause, tval, epc, eframe) \
if (nested[rt_hw_cpu_id()] != 1) \
handle_nested_trap_panic(cause, tval, epc, eframe)
#endif /* RT_USING_SMP */
static const char *get_exception_msg(int id)
@@ -165,44 +174,44 @@ void handle_user(rt_ubase_t scause, rt_ubase_t stval, rt_ubase_t sepc,
enum rt_mm_fault_type fault_type;
switch (id)
{
case EP_LOAD_PAGE_FAULT:
fault_op = MM_FAULT_OP_READ;
fault_type = MM_FAULT_TYPE_GENERIC_MMU;
break;
case EP_LOAD_ACCESS_FAULT:
fault_op = MM_FAULT_OP_READ;
fault_type = MM_FAULT_TYPE_BUS_ERROR;
break;
case EP_LOAD_ADDRESS_MISALIGNED:
fault_op = MM_FAULT_OP_READ;
fault_type = MM_FAULT_TYPE_BUS_ERROR;
break;
case EP_STORE_PAGE_FAULT:
fault_op = MM_FAULT_OP_WRITE;
fault_type = MM_FAULT_TYPE_GENERIC_MMU;
break;
case EP_STORE_ACCESS_FAULT:
fault_op = MM_FAULT_OP_WRITE;
fault_type = MM_FAULT_TYPE_BUS_ERROR;
break;
case EP_STORE_ADDRESS_MISALIGNED:
fault_op = MM_FAULT_OP_WRITE;
fault_type = MM_FAULT_TYPE_BUS_ERROR;
break;
case EP_INSTRUCTION_PAGE_FAULT:
fault_op = MM_FAULT_OP_EXECUTE;
fault_type = MM_FAULT_TYPE_GENERIC_MMU;
break;
case EP_INSTRUCTION_ACCESS_FAULT:
fault_op = MM_FAULT_OP_EXECUTE;
fault_type = MM_FAULT_TYPE_BUS_ERROR;
break;
case EP_INSTRUCTION_ADDRESS_MISALIGNED:
fault_op = MM_FAULT_OP_EXECUTE;
fault_type = MM_FAULT_TYPE_BUS_ERROR;
break;
default:
fault_op = 0;
case EP_LOAD_PAGE_FAULT:
fault_op = MM_FAULT_OP_READ;
fault_type = MM_FAULT_TYPE_GENERIC_MMU;
break;
case EP_LOAD_ACCESS_FAULT:
fault_op = MM_FAULT_OP_READ;
fault_type = MM_FAULT_TYPE_BUS_ERROR;
break;
case EP_LOAD_ADDRESS_MISALIGNED:
fault_op = MM_FAULT_OP_READ;
fault_type = MM_FAULT_TYPE_BUS_ERROR;
break;
case EP_STORE_PAGE_FAULT:
fault_op = MM_FAULT_OP_WRITE;
fault_type = MM_FAULT_TYPE_GENERIC_MMU;
break;
case EP_STORE_ACCESS_FAULT:
fault_op = MM_FAULT_OP_WRITE;
fault_type = MM_FAULT_TYPE_BUS_ERROR;
break;
case EP_STORE_ADDRESS_MISALIGNED:
fault_op = MM_FAULT_OP_WRITE;
fault_type = MM_FAULT_TYPE_BUS_ERROR;
break;
case EP_INSTRUCTION_PAGE_FAULT:
fault_op = MM_FAULT_OP_EXECUTE;
fault_type = MM_FAULT_TYPE_GENERIC_MMU;
break;
case EP_INSTRUCTION_ACCESS_FAULT:
fault_op = MM_FAULT_OP_EXECUTE;
fault_type = MM_FAULT_TYPE_BUS_ERROR;
break;
case EP_INSTRUCTION_ADDRESS_MISALIGNED:
fault_op = MM_FAULT_OP_EXECUTE;
fault_type = MM_FAULT_TYPE_BUS_ERROR;
break;
default:
fault_op = 0;
}
if (fault_op)
@@ -228,7 +237,7 @@ void handle_user(rt_ubase_t scause, rt_ubase_t stval, rt_ubase_t sepc,
dump_regs(sp);
rt_thread_t cur_thr = rt_thread_self();
struct rt_hw_backtrace_frame frame = {.fp = sp->s0_fp, .pc = sepc};
struct rt_hw_backtrace_frame frame = { .fp = sp->s0_fp, .pc = sepc };
rt_kprintf("fp = %p\n", frame.fp);
lwp_backtrace_frame(cur_thr, &frame);
@@ -260,12 +269,12 @@ static int illegal_inst_recoverable(rt_ubase_t stval,
switch (opcode)
{
case 0x57: // V
case 0x27: // scalar FLOAT
case 0x07:
case 0x73: // CSR
flag = 1;
break;
case 0x57: // V
case 0x27: // scalar FLOAT
case 0x07:
case 0x73: // CSR
flag = 1;
break;
}
if (flag)
@@ -314,6 +323,15 @@ void handle_trap(rt_ubase_t scause, rt_ubase_t stval, rt_ubase_t sepc,
tick_isr();
rt_interrupt_leave();
}
#ifdef RT_USING_SMP
else if ((SCAUSE_INTERRUPT | SCAUSE_S_SOFTWARE_INTR) == scause)
{
/* supervisor software interrupt for ipi */
rt_interrupt_enter();
rt_hw_ipi_handler();
rt_interrupt_leave();
}
#endif /* RT_USING_SMP */
else
{
if (SCAUSE_INTERRUPT & scause)
@@ -364,7 +382,7 @@ void handle_trap(rt_ubase_t scause, rt_ubase_t stval, rt_ubase_t sepc,
rt_kprintf("current thread: %s\n", cur_thr->parent.name);
rt_kprintf("--------------Backtrace--------------\n");
struct rt_hw_backtrace_frame frame = {.fp = sp->s0_fp, .pc = sepc};
struct rt_hw_backtrace_frame frame = { .fp = sp->s0_fp, .pc = sepc };
#ifdef RT_USING_SMART
if (!(sp->sstatus & 0x100))

View File

@@ -17,6 +17,12 @@
struct rt_irq_desc irq_desc[MAX_HANDLERS];
#ifdef RT_USING_SMP
#include "sbi.h"
struct rt_irq_desc ipi_desc[RT_MAX_IPI];
uint8_t ipi_vectors[RT_CPUS_NR] = { 0 };
#endif /* RT_USING_SMP */
static rt_isr_handler_t rt_hw_interrupt_handle(rt_uint32_t vector, void *param)
{
rt_kprintf("UN-handled interrupt %d occurred!!!\n", vector);
@@ -53,11 +59,11 @@ void rt_hw_interrupt_umask(int vector)
* @param old_handler the old interrupt service routine
*/
rt_isr_handler_t rt_hw_interrupt_install(int vector, rt_isr_handler_t handler,
void *param, const char *name)
void *param, const char *name)
{
rt_isr_handler_t old_handler = RT_NULL;
if(vector < MAX_HANDLERS)
if (vector < MAX_HANDLERS)
{
old_handler = irq_desc[vector].handler;
if (handler != RT_NULL)
@@ -92,3 +98,141 @@ void rt_hw_interrupt_init()
plic_set_threshold(0);
}
#ifdef RT_USING_SMP
void rt_hw_interrupt_set_priority(int vector, unsigned int priority)
{
plic_set_priority(vector, priority);
}
unsigned int rt_hw_interrupt_get_priority(int vector)
{
return (*(uint32_t *)PLIC_PRIORITY(vector));
}
rt_bool_t rt_hw_interrupt_is_disabled(void)
{
/* Determine the interrupt enable state */
rt_ubase_t sstatus;
__asm__ volatile("csrr %0, sstatus" : "=r"(sstatus));
return (sstatus & SSTATUS_SIE) == 0;
}
void rt_hw_spin_lock_init(rt_hw_spinlock_t *_lock)
{
_lock->slock = 0;
}
void rt_hw_spin_lock(rt_hw_spinlock_t *lock)
{
/* Use ticket lock implemented on top of the 32/64-bit atomic AMO ops.
* The combined word layout (slock) maps two uint16_t fields:
* low 16 bits: owner
* high 16 bits: next (ticket allocator)
* We atomically increment the "next" field by (1 << 16) and use the
* returned old value to compute our ticket. Then wait until owner == ticket.
*/
rt_atomic_t prev;
rt_atomic_t ticket;
rt_atomic_t owner;
/* Allocate a ticket by adding (1 << 16) to slock, prev holds previous value */
prev = rt_hw_atomic_add((volatile rt_atomic_t *)&lock->slock, (rt_atomic_t)(1UL << 16));
ticket = (prev >> 16) & 0xffffUL;
/* Wait until owner equals our ticket */
for (;;)
{
owner = rt_hw_atomic_load((volatile rt_atomic_t *)&lock->slock) & 0xffffUL;
if (owner == ticket)
break;
/* TODO: low-power wait for interrupt while spinning */
}
/* Ensure all following memory accesses are ordered after acquiring the lock */
__asm__ volatile("fence rw, rw" ::: "memory");
}
void rt_hw_spin_unlock(rt_hw_spinlock_t *lock)
{
/* Ensure memory operations before unlock are visible before owner increment */
__asm__ volatile("fence rw, rw" ::: "memory");
/* Increment owner (low 16 bits) to hand over lock to next ticket.
* Use an atomic load of the combined slock word and compare the low
* 16-bit owner field.If owner would overflow (0xffff), clear the owner field
* atomically by ANDing with 0xffff0000; otherwise increment owner by 1.
*/
if ((rt_hw_atomic_load((volatile rt_atomic_t *)&lock->slock) & (rt_atomic_t)0xffffUL) == (rt_atomic_t)0xffffUL)
{
/* Atomic clear owner (low 16 bits) when it overflows. Keep next ticket field. */
rt_hw_atomic_and((volatile rt_atomic_t *)&lock->slock, (rt_atomic_t)0xffff0000UL);
}
else
{
rt_hw_atomic_add((volatile rt_atomic_t *)&lock->slock, (rt_atomic_t)1);
}
// TODO: IPI interrupt to wake up other harts waiting for the lock
/* Make the increment visible to other harts */
__asm__ volatile("fence rw, rw" ::: "memory");
}
void rt_hw_ipi_send(int ipi_vector, unsigned int cpu_mask)
{
int cpuid = __builtin_ctz(cpu_mask); // get the bit position of the lowest set bit
ipi_vectors[cpuid] |= (uint8_t)ipi_vector;
sbi_send_ipi((const unsigned long *)&cpu_mask);
}
void rt_hw_ipi_init(void)
{
int idx = 0, cpuid = rt_cpu_get_id();
ipi_vectors[cpuid] = 0;
/* init exceptions table */
for (idx = 0; idx < RT_MAX_IPI; idx++)
{
ipi_desc[idx].handler = RT_NULL;
ipi_desc[idx].param = RT_NULL;
#ifdef RT_USING_INTERRUPT_INFO
rt_snprintf(ipi_desc[idx].name, RT_NAME_MAX - 1, "default");
ipi_desc[idx].counter = 0;
#endif
}
set_csr(sie, SIP_SSIP);
}
void rt_hw_ipi_handler_install(int ipi_vector, rt_isr_handler_t ipi_isr_handler)
{
if (ipi_vector < RT_MAX_IPI)
{
if (ipi_isr_handler != RT_NULL)
{
ipi_desc[ipi_vector].handler = (rt_isr_handler_t)ipi_isr_handler;
ipi_desc[ipi_vector].param = RT_NULL;
}
}
}
void rt_hw_ipi_handler(void)
{
rt_uint32_t ipi_vector;
ipi_vector = ipi_vectors[rt_cpu_get_id()];
while (ipi_vector)
{
int bitpos = __builtin_ctz(ipi_vector);
ipi_vector &= ~(1 << bitpos);
if (bitpos < RT_MAX_IPI && ipi_desc[bitpos].handler != RT_NULL)
{
rt_hw_atomic_and((volatile rt_atomic_t *)&ipi_vectors[rt_cpu_get_id()], ~((rt_atomic_t)(1 << bitpos)));
/* call the irq service routine */
ipi_desc[bitpos].handler(bitpos, ipi_desc[bitpos].param);
}
}
// TODO: Clear the software interrupt pending bit in CLINT
clear_csr(sip, SIP_SSIP);
}
#endif /* RT_USING_SMP */

View File

@@ -42,5 +42,12 @@ void rt_hw_interrupt_init(void);
void rt_hw_interrupt_mask(int vector);
rt_isr_handler_t rt_hw_interrupt_install(int vector, rt_isr_handler_t handler, void *param, const char *name);
void handle_trap(rt_ubase_t xcause, rt_ubase_t xtval, rt_ubase_t xepc, struct rt_hw_stack_frame *sp);
#ifdef RT_USING_SMP
void rt_hw_interrupt_set_priority(int vector, unsigned int priority);
unsigned int rt_hw_interrupt_get_priority(int vector);
void rt_hw_ipi_handler(void);
void rt_hw_ipi_handler_install(int ipi_vector, rt_isr_handler_t ipi_isr_handler);
void rt_hw_ipi_init(void);
void rt_hw_ipi_send(int ipi_vector, unsigned int cpu_mask);
#endif /* RT_USING_SMP */
#endif