diff --git a/arch/x86_64/include/intel64/arch.h b/arch/x86_64/include/intel64/arch.h index 0dc2a6fd9e9..d0e7a616079 100644 --- a/arch/x86_64/include/intel64/arch.h +++ b/arch/x86_64/include/intel64/arch.h @@ -114,6 +114,30 @@ #define X86_CR4_XMMEXCPT 0x00000400 #define X86_CR4_FGSBASE 0x00010000 #define X86_CR4_PCIDE 0x00020000 +#define X86_CR4_OSXSAVE 0x00040000 + +/* XCR0 */ + +#define X86_XCR0_X87 (1 << 0) +#define X86_XCR0_SSE (1 << 1) +#define X86_XCR0_AVX (1 << 2) +#define X86_XCR0_BNDREG (1 << 3) +#define X86_XCR0_BNDCSR (1 << 4) +#define X86_XCR0_OPMASK (1 << 5) +#define X86_XCR0_HI256 (1 << 6) +#define X86_XCR0_HI16 (1 << 7) +#define X86_XCR0_PT (1 << 8) +#define X86_XCR0_PKRU (1 << 9) +#define X86_XCR0_PASID (1 << 10) +#define X86_XCR0_CETU (1 << 11) +#define X86_XCR0_CETS (1 << 12) +#define X86_XCR0_HDC (1 << 13) +#define X86_XCR0_UINTR (1 << 14) +#define X86_XCR0_LBR (1 << 15) +#define X86_XCR0_HWP (1 << 16) +#define X86_XCR0_XTILECFG (1 << 17) +#define X86_XCR0_XTILEDATA (1 << 18) +#define X86_XCR0_APX (1 << 19) /* PAGE TABLE ENTRY Definitions */ @@ -161,6 +185,7 @@ # define X86_64_CPUID_07_AVX512CD (1 << 28) # define X86_64_CPUID_07_AVX512BW (1 << 30) # define X86_64_CPUID_07_AVX512VL (1 << 31) +#define X86_64_CPUID_XSAVE 0x0d #define X86_64_CPUID_TSC 0x15 /* MSR Definitions */ @@ -243,6 +268,7 @@ #define MSR_X2APIC_TMICT 0x838 #define MSR_X2APIC_TMCCT 0x839 #define MSR_X2APIC_TDCR 0x83e +#define MSR_IA32_XSS 0xda0 /* IOAPIC related Definitions */ @@ -281,6 +307,42 @@ # define X86_RST_CNT_CPU_RST 0x04 # define X86_RST_CNT_FULL_RST 0x08 +/* XSAVE state component bitmap */ + +#define X86_XSAVE_X87 (1 << 0) /* Bit 0: X87 state */ +#define X86_XSAVE_SSE (1 << 1) /* Bit 1: SSE state (512 bytes) */ +#define X86_XSAVE_AVX (1 << 2) /* Bit 2: AVX state (256 bytes) */ +#define X86_XSAVE_MPX_BNDREGS (1 << 3) /* Bit 3: MPX BNDREGS (64 bytes) */ +#define X86_XSAVE_MPX_BNDCSR (1 << 4) /* Bit 4: MPX BNDCSR (16 bytes) */ +#define X86_XSAVE_AVX512_OPMASK (1 << 5) /* Bit 5: AVX-512 opmask (64 bytes) */ +#define X86_XSAVE_AVX512_HI256 (1 << 6) /* Bit 6: AVX-512 ZMM_Hi256 (512 bytes) */ +#define X86_XSAVE_AVX512_HI16 (1 << 7) /* Bit 7: AVX-512 Hi16_ZMM (1024 bytes) */ +#define X86_XSAVE_PT (1 << 8) /* Bit 8: PT (72 bytes) */ +#define X86_XSAVE_PKRU (1 << 9) /* Bit 9: PKRU (4 bytes) */ +#define X86_XSAVE_PASID (1 << 10) /* Bit 10: PASID state */ +#define X86_XSAVE_CET_U (1 << 11) /* Bit 11: CET_U state */ +#define X86_XSAVE_CET_S (1 << 12) /* Bit 12: CET_S state */ +#define X86_XSAVE_HDC (1 << 13) /* Bit 13: HDC */ +#define X86_XSAVE_UINTR (1 << 14) /* Bit 14: UINTR state */ +#define X86_XSAVE_LBR (1 << 15) /* Bit 15: LBR state */ +#define X86_XSAVE_HWP (1 << 16) /* Bit 16: HWP state */ +#define X86_XSAVE_AMX_TILECFG (1 << 17) /* Bit 17: AMX TILECFG state (64 bytes) */ +#define X86_XSAVE_AMX_TILEDATA (1 << 18) /* Bit 18: AMX TILEDATA state (8192 bytes) */ + +/* XSAVE area size */ + +#define XSAVE_LEGACY_SIZE (512) /* X87 + SSE */ +#define XSAVE_HEADER_SIZE (64) /* XSAVE header */ +#define XSAVE_AVX_SIZE (256) +#define XSAVE_MXP_BNDREGS_SIZE (64) +#define XSAVE_MXP_BNDCSR_SIZE (16) +#define XSAVE_AVX512OPMASK_SIZE (64) +#define XSAVE_AVX512HI256_SIZE (512) +#define XSAVE_AVX512HI16_SIZE (1024) +#define XSAVE_PT_SIZE (72) +#define XSAVE_PKRU_SIZE (4) +#define XSAVE_HDC_SIZE (8) + #ifndef __ASSEMBLY__ /**************************************************************************** diff --git a/arch/x86_64/include/intel64/irq.h b/arch/x86_64/include/intel64/irq.h index dd9ab028766..3f3d27c2d91 100644 --- a/arch/x86_64/include/intel64/irq.h +++ b/arch/x86_64/include/intel64/irq.h @@ -34,10 +34,11 @@ #ifndef __ASSEMBLY__ # include # include -# include # include #endif +#include + /**************************************************************************** * Pre-processor Definitions ****************************************************************************/ @@ -353,53 +354,120 @@ * ISR/IRQ interrupt processing. */ -#define XCPTCONTEXT_XMM_AREA_SIZE 512 -#define XMMAREA_OFFSET (XCPTCONTEXT_XMM_AREA_SIZE / 8) +#ifndef CONFIG_ARCH_X86_64_HAVE_XSAVE + +/* Only legacy area if XSAVE not supported */ + +# define XCPTCONTEXT_XMM_AREA_SIZE 512 +#else + +/* XSAVE state depneds on enabled features */ + +# ifdef CONFIG_ARCH_X86_64_AVX +# define XSTATE_AVX_STATE X86_XSAVE_AVX +# define XSTATE_AVX_SIZE XSAVE_AVX_SIZE +# else +# define XSTATE_AVX_STATE 0 +# define XSTATE_AVX_SIZE 0 +# endif + +# ifdef CONFIG_ARCH_X86_64_AVX512 +# define XSTATE_AVX512_STATE (X86_XSAVE_AVX512_OPMASK | \ + X86_XSAVE_AVX512_HI256 | \ + X86_XSAVE_AVX512_HI16) +# define XSTATE_AVX512_SIZE (XSAVE_MXP_BNDREGS_SIZE + \ + XSAVE_MXP_BNDCSR_SIZE + \ + XSAVE_AVX512OPMASK_SIZE + \ + XSAVE_AVX512HI256_SIZE + \ + XSAVE_AVX512HI16_SIZE) +# else +# define XSTATE_AVX512_STATE 0 +# define XSTATE_AVX512_SIZE 0 +# endif + +/* State component bitmap */ + +# define XSAVE_STATE_COMPONENTS (X86_XSAVE_X87 | \ + X86_XSAVE_SSE | \ + XSTATE_AVX_STATE | \ + XSTATE_AVX512_STATE) + +/* Area for XSAVE - standard area format */ + +# define XCPTCONTEXT_XMM_AREA_SIZE (XSAVE_LEGACY_SIZE + \ + XSAVE_HEADER_SIZE + \ + XSTATE_AVX_SIZE + \ + XSTATE_AVX512_SIZE) +#endif + +/* Align registers to 64-bytes */ + +#ifdef CONFIG_ARCH_X86_64_AVX512 +# define XMMAREA_REG_ALIGN (13) +#else +# define XMMAREA_REG_ALIGN (7) +#endif + +/* Register offset in XMMAREA */ + +#define XMMAREA_OFFSET (XCPTCONTEXT_XMM_AREA_SIZE / 8) +#define XMMAREA_REG_OFFSET (XMMAREA_REG_ALIGN + XMMAREA_OFFSET) /* Data segments */ -#define REG_ALIGN (0 + XMMAREA_OFFSET) /* " " "" " " "" " " " " */ -#define REG_FS (1 + XMMAREA_OFFSET) /* " " "" " " "" " " " " */ -#define REG_GS (2 + XMMAREA_OFFSET) /* " " "" " " "" " " " " */ -#define REG_ES (3 + XMMAREA_OFFSET) /* " " "" " " "" " " " " */ -#define REG_DS (4 + XMMAREA_OFFSET) /* Data segment selector */ +#define REG_FS (0 + XMMAREA_REG_OFFSET) /* " " "" " " "" " " " " */ +#define REG_GS (1 + XMMAREA_REG_OFFSET) /* " " "" " " "" " " " " */ +#define REG_ES (2 + XMMAREA_REG_OFFSET) /* " " "" " " "" " " " " */ +#define REG_DS (3 + XMMAREA_REG_OFFSET) /* Data segment selector */ /* Remaining regs */ -#define REG_RAX (5 + XMMAREA_OFFSET) /* " " "" " " */ -#define REG_RBX (6 + XMMAREA_OFFSET) /* " " "" " " */ -#define REG_RBP (7 + XMMAREA_OFFSET) /* " " "" " " */ -#define REG_R10 (8 + XMMAREA_OFFSET) /* " " "" " " */ -#define REG_R11 (9 + XMMAREA_OFFSET) /* " " "" " " */ -#define REG_R12 (10 + XMMAREA_OFFSET) /* " " "" " " */ -#define REG_R13 (11 + XMMAREA_OFFSET) /* " " "" " " */ -#define REG_R14 (12 + XMMAREA_OFFSET) /* " " "" " " */ -#define REG_R15 (13 + XMMAREA_OFFSET) /* " " "" " " */ +#define REG_RAX (4 + XMMAREA_REG_OFFSET) /* " " "" " " */ +#define REG_RBX (5 + XMMAREA_REG_OFFSET) /* " " "" " " */ +#define REG_RBP (6 + XMMAREA_REG_OFFSET) /* " " "" " " */ +#define REG_R10 (7 + XMMAREA_REG_OFFSET) /* " " "" " " */ +#define REG_R11 (8 + XMMAREA_REG_OFFSET) /* " " "" " " */ +#define REG_R12 (9 + XMMAREA_REG_OFFSET) /* " " "" " " */ +#define REG_R13 (10 + XMMAREA_REG_OFFSET) /* " " "" " " */ +#define REG_R14 (11 + XMMAREA_REG_OFFSET) /* " " "" " " */ +#define REG_R15 (12 + XMMAREA_REG_OFFSET) /* " " "" " " */ /* ABI calling convention */ -#define REG_R9 (14 + XMMAREA_OFFSET) /* " " "" " " */ -#define REG_R8 (15 + XMMAREA_OFFSET) /* " " "" " " */ -#define REG_RCX (16 + XMMAREA_OFFSET) /* " " "" " " */ -#define REG_RDX (17 + XMMAREA_OFFSET) /* " " "" " " */ -#define REG_RSI (18 + XMMAREA_OFFSET) /* " " "" " " */ -#define REG_RDI (19 + XMMAREA_OFFSET) /* " " "" " " */ +#define REG_R9 (13 + XMMAREA_REG_OFFSET) /* " " "" " " */ +#define REG_R8 (14 + XMMAREA_REG_OFFSET) /* " " "" " " */ +#define REG_RCX (15 + XMMAREA_REG_OFFSET) /* " " "" " " */ +#define REG_RDX (16 + XMMAREA_REG_OFFSET) /* " " "" " " */ +#define REG_RSI (17 + XMMAREA_REG_OFFSET) /* " " "" " " */ +#define REG_RDI (18 + XMMAREA_REG_OFFSET) /* " " "" " " */ /* IRQ saved */ -#define REG_ERRCODE (20 + XMMAREA_OFFSET) /* Error code */ -#define REG_RIP (21 + XMMAREA_OFFSET) /* Pushed by process on interrupt processing */ -#define REG_CS (22 + XMMAREA_OFFSET) /* " " "" " " "" " " " " */ -#define REG_RFLAGS (23 + XMMAREA_OFFSET) /* " " "" " " "" " " " " */ -#define REG_RSP (24 + XMMAREA_OFFSET) /* " " "" " " "" " " " " */ -#define REG_SS (25 + XMMAREA_OFFSET) /* " " "" " " "" " " " " */ +#define REG_ERRCODE (19 + XMMAREA_REG_OFFSET) /* Error code */ +#define REG_RIP (20 + XMMAREA_REG_OFFSET) /* Pushed by process on interrupt processing */ +#define REG_CS (21 + XMMAREA_REG_OFFSET) /* " " "" " " "" " " " " */ +#define REG_RFLAGS (22 + XMMAREA_REG_OFFSET) /* " " "" " " "" " " " " */ +#define REG_RSP (23 + XMMAREA_REG_OFFSET) /* " " "" " " "" " " " " */ +#define REG_SS (24 + XMMAREA_REG_OFFSET) /* " " "" " " "" " " " " */ + +#define XMMAREA_REGS (25) /* NOTE 2: This is not really state data. Rather, this is just a convenient * way to pass parameters from the interrupt handler to C code. */ -#define XCPTCONTEXT_REGS (26 + XCPTCONTEXT_XMM_AREA_SIZE / 8) -#define XCPTCONTEXT_SIZE (8 * XCPTCONTEXT_REGS + XCPTCONTEXT_XMM_AREA_SIZE) +#define XCPTCONTEXT_REGS (XMMAREA_REGS + XMMAREA_REG_ALIGN + \ + XCPTCONTEXT_XMM_AREA_SIZE / 8) + +#define XCPTCONTEXT_SIZE (8 * XCPTCONTEXT_REGS) + +/* Always align XCPTCONTEXT to 64-bytes to support XSAVE */ + +#define XCPTCONTEXT_ALIGN (64) + +#define XCP_ALIGN_MASK (XCPTCONTEXT_ALIGN - 1) +#define XCP_ALIGN_DOWN(a) ((a) & ~XCP_ALIGN_MASK) +#define XCP_ALIGN_UP(a) (((a) + XCP_ALIGN_MASK) & ~XCP_ALIGN_MASK) /**************************************************************************** * Public Types @@ -432,9 +500,9 @@ struct xcptcontext uint64_t saved_rflags; uint64_t saved_rsp; - /* Register save area */ + /* Register save area - allocated from stack in up_initial_state() */ - uint64_t regs[XCPTCONTEXT_REGS] aligned_data(16); + uint64_t *regs; }; #endif diff --git a/arch/x86_64/src/common/Kconfig b/arch/x86_64/src/common/Kconfig index fc4235a69b0..fbe6661d184 100644 --- a/arch/x86_64/src/common/Kconfig +++ b/arch/x86_64/src/common/Kconfig @@ -28,6 +28,13 @@ config ARCH_X86_64_ACPI_BIOS endif # ARCH_X86_64_ACPI +config ARCH_X86_64_HAVE_XSAVE + bool "XSAVE support" + default y + ---help--- + Select to enable the use of XSAVE and FPU/SSE/AVX functions + of x86_64 + config ARCH_X86_64_MMX bool "MMX support" depends on ARCH_HAVE_MMX diff --git a/arch/x86_64/src/intel64/Kconfig b/arch/x86_64/src/intel64/Kconfig index 4c62c4e69bf..86d96138e21 100644 --- a/arch/x86_64/src/intel64/Kconfig +++ b/arch/x86_64/src/intel64/Kconfig @@ -150,13 +150,6 @@ config INTEL64_ONESHOT_MAXTIMERS endif -config ARCH_INTEL64_HAVE_XSAVE - bool "XSAVE support" - default y - ---help--- - Select to enable the use of XSAVE and FPU/SSE/AVX functions - of x86_64 - config ARCH_INTEL64_HAVE_PCID bool "PCID support" default y diff --git a/arch/x86_64/src/intel64/intel64_check_capability.c b/arch/x86_64/src/intel64/intel64_check_capability.c index e777b8554b2..87dad14a7ef 100644 --- a/arch/x86_64/src/intel64/intel64_check_capability.c +++ b/arch/x86_64/src/intel64/intel64_check_capability.c @@ -108,10 +108,24 @@ void x86_64_check_and_enable_capability(void) goto err; } -#ifdef CONFIG_ARCH_INTEL64_HAVE_XSAVE +#if defined(CONFIG_ARCH_HAVE_SSE) || defined(CONFIG_ARCH_X86_64_AVX) || \ + defined(CONFIG_ARCH_X86_64_AVX512) __enable_sse_avx(); #endif +#ifdef CONFIG_ARCH_X86_64_HAVE_XSAVE + /* Check XSAVE state area size for the current XCR0 state */ + + asm volatile("cpuid" : "=b" (ebx) + : "a" (X86_64_CPUID_XSAVE), "c" (0) + : "rdx", "memory"); + + if (XCPTCONTEXT_XMM_AREA_SIZE < ebx) + { + goto err; + } +#endif + #ifdef CONFIG_ARCH_INTEL64_HAVE_PCID __enable_pcid(); #endif diff --git a/arch/x86_64/src/intel64/intel64_cpu.c b/arch/x86_64/src/intel64/intel64_cpu.c index 17f7497ae83..d46ebd98c08 100644 --- a/arch/x86_64/src/intel64/intel64_cpu.c +++ b/arch/x86_64/src/intel64/intel64_cpu.c @@ -64,8 +64,8 @@ struct intel64_cpu_s g_cpu_priv[CONFIG_SMP_NCPUS]; /* Allocate stack for interrupts and isr */ -uint8_t g_intstackalloc[IRQ_STACK_ALLOC] aligned_data(16); -uint8_t g_isrstackalloc[IRQ_STACK_ALLOC] aligned_data(16); +uint8_t g_intstackalloc[IRQ_STACK_ALLOC] aligned_data(64); +uint8_t g_isrstackalloc[IRQ_STACK_ALLOC] aligned_data(64); /**************************************************************************** * Private Functions diff --git a/arch/x86_64/src/intel64/intel64_fullcontextrestore.S b/arch/x86_64/src/intel64/intel64_fullcontextrestore.S index 545bb3bdd00..d52b7da744e 100644 --- a/arch/x86_64/src/intel64/intel64_fullcontextrestore.S +++ b/arch/x86_64/src/intel64/intel64_fullcontextrestore.S @@ -53,6 +53,15 @@ x86_64_fullcontextrestore: cli +#ifndef CONFIG_ARCH_X86_64_HAVE_XSAVE + /* Restore xmm registers */ + fxrstorq (%rdi) +#else + movl $XSAVE_STATE_COMPONENTS, %eax + xor %edx, %edx + xrstor (%rdi) +#endif + /* Create an interrupt stack frame for the final iret. * * @@ -115,9 +124,6 @@ x86_64_fullcontextrestore: * XXX: Should use wrgsbase and wrfsbase to restore the gs and fs register */ - /* restore xmm registers */ - fxrstorq (%rdi) - /* Restore the correct value of EAX and then return */ popq %rdi diff --git a/arch/x86_64/src/intel64/intel64_head.S b/arch/x86_64/src/intel64/intel64_head.S index f6f294f2b54..54f63f3b5b6 100644 --- a/arch/x86_64/src/intel64/intel64_head.S +++ b/arch/x86_64/src/intel64/intel64_head.S @@ -24,6 +24,7 @@ #include #include +#include #include #include @@ -33,6 +34,25 @@ * Pre-processor definitions ****************************************************************************/ +/* Get XCR0 register value */ + +#if defined(CONFIG_ARCH_X86_64_AVX512) +# define X86_XCR0_VAL (X86_XCR0_X87 | X86_XCR0_SSE | X86_XCR0_AVX | \ + X86_XCR0_OPMASK | X86_XCR0_HI256 | X86_XCR0_HI16) +#elif defined(CONFIG_ARCH_X86_64_AVX) +# define X86_XCR0_VAL (X86_XCR0_X87 | X86_XCR0_SSE | X86_XCR0_AVX) +#else +# define X86_XCR0_VAL (X86_XCR0_X87 | X86_XCR0_SSE) +#endif + +/* Get CR4 register value */ + +#ifdef CONFIG_ARCH_X86_64_HAVE_XSAVE +# define X86_CR4_FPU_VAL (X86_CR4_OSXSAVE | X86_CR4_OSXFSR | X86_CR4_XMMEXCPT) +#else +# define X86_CR4_FPU_VAL (X86_CR4_OSXFSR | X86_CR4_XMMEXCPT) +#endif + /* Memory Map: _sbss is the start of the BSS region (see ld.script) _ebss is * the end of the BSS region (see ld.script). The idle task stack starts at * the end of BSS and is of size CONFIG_IDLETHREAD_STACKSIZE. The IDLE thread @@ -319,6 +339,9 @@ start64: movabs $g_idle_topstack, %rbx mov (%rbx), %rsp + /* Allocate space XCPTCONTEXT */ + leaq -XCPTCONTEXT_SIZE(%rsp), %rsp + /* Set bsp_done flag */ movl $1, bsp_done @@ -352,7 +375,7 @@ ap_start: .size __pmode_entry, . - __pmode_entry g_cpu_count: - .long 1 + .long 1 /**************************************************************************** * Name: __revoke_low_memory @@ -408,11 +431,21 @@ __enable_sse_avx: /* Enable Saving XMM context */ mov %cr4, %rax - or $(X86_CR4_OSXFSR | X86_CR4_XMMEXCPT), %rax + or $X86_CR4_FPU_VAL, %rax mov %rax, %cr4 +#ifdef CONFIG_ARCH_X86_64_HAVE_XSAVE + /* Configure XSAVE/XRSTOR for user state*/ + mov $X86_XCR0_VAL, %eax + + xor %rdx, %rdx + xor %rcx, %rcx + xsetbv + +#else /* Setup MXCSR, masking all SSE precision exception */ ldmxcsr mxcsr_mem +#endif ret diff --git a/arch/x86_64/src/intel64/intel64_initialstate.c b/arch/x86_64/src/intel64/intel64_initialstate.c index 5aa9fa8c494..389b6d95b9e 100644 --- a/arch/x86_64/src/intel64/intel64_initialstate.c +++ b/arch/x86_64/src/intel64/intel64_initialstate.c @@ -33,6 +33,14 @@ #include "x86_64_internal.h" #include "sched/sched.h" +/**************************************************************************** + * Pre-processor Definitions + ****************************************************************************/ + +#if XCPTCONTEXT_SIZE % XCPTCONTEXT_ALIGN != 0 +# error XCPTCONTEXT_SIZE must be aligned to XCPTCONTEXT_ALIGN ! +#endif + /**************************************************************************** * Public Functions ****************************************************************************/ @@ -83,22 +91,39 @@ void up_initial_state(struct tcb_s *tcb) memset(xcp, 0, sizeof(struct xcptcontext)); - /* set the FCW to 1f80 */ + /* Allocate area for XCPTCONTEXT */ + + xcp->regs = (uint64_t *)XCP_ALIGN_DOWN((uintptr_t)tcb->stack_base_ptr + + tcb->adj_stack_size - + XCPTCONTEXT_SIZE); + + /* Reset the xcp registers */ + + memset(xcp->regs, 0, XCPTCONTEXT_SIZE); + +#ifndef CONFIG_ARCH_X86_64_HAVE_XSAVE + /* Set the FCW to 1f80 */ xcp->regs[1] = (uint64_t)0x0000037f00000000; - /* set the MXCSR to 1f80 */ + /* Set the MXCSR to 1f80 */ xcp->regs[3] = (uint64_t)0x0000000000001f80; +#else + /* Initialize XSAVE region with a valid state */ + + asm volatile("xsave %0" + : "=m" (*xcp->regs) + : "a" (XSAVE_STATE_COMPONENTS), "d" (0) + : "memory"); +#endif /* Save the initial stack pointer... the value of the stackpointer before * the "interrupt occurs." */ - xcp->regs[REG_RSP] = (uint64_t)tcb->stack_base_ptr + - tcb->adj_stack_size; - xcp->regs[REG_RBP] = (uint64_t)tcb->stack_base_ptr + - tcb->adj_stack_size; + xcp->regs[REG_RSP] = (uint64_t)xcp->regs - 8; + xcp->regs[REG_RBP] = (uint64_t)xcp->regs - 8; /* Save the task entry point */ diff --git a/arch/x86_64/src/intel64/intel64_saveusercontext.S b/arch/x86_64/src/intel64/intel64_saveusercontext.S index 1c5a14a9f5e..b5bad4fbf15 100644 --- a/arch/x86_64/src/intel64/intel64_saveusercontext.S +++ b/arch/x86_64/src/intel64/intel64_saveusercontext.S @@ -68,8 +68,14 @@ up_saveusercontext: movq %r14, (8*REG_R14)(%rdi) movq %r15, (8*REG_R15)(%rdi) - /* save xmm registers */ +#ifndef CONFIG_ARCH_X86_64_HAVE_XSAVE + /* Save xmm registers */ fxsaveq (%rdi) +#else + movl $XSAVE_STATE_COMPONENTS, %eax + xor %edx, %edx + xsave (%rdi) +#endif /* Save the value of SP as will be at the time of the IRET that will * appear to be the return from this function. diff --git a/arch/x86_64/src/intel64/intel64_sigdeliver.c b/arch/x86_64/src/intel64/intel64_sigdeliver.c index 819d689c87d..a7f5763b424 100644 --- a/arch/x86_64/src/intel64/intel64_sigdeliver.c +++ b/arch/x86_64/src/intel64/intel64_sigdeliver.c @@ -54,7 +54,7 @@ void x86_64_sigdeliver(void) { struct tcb_s *rtcb = this_task(); - uint64_t regs_area[XCPTCONTEXT_REGS + 2]; + uint64_t regs_area[XCPTCONTEXT_REGS + 8]; uint64_t *regs; #ifdef CONFIG_SMP @@ -72,9 +72,9 @@ void x86_64_sigdeliver(void) rtcb, rtcb->xcp.sigdeliver, rtcb->sigpendactionq.head); DEBUGASSERT(rtcb->xcp.sigdeliver != NULL); - /* Align regs to 16 byte boundary for SSE instructions. */ + /* Align regs to 64 byte boundary for XSAVE */ - regs = (uint64_t *)(((uint64_t)(regs_area) + 15) & (~(uint64_t)15)); + regs = (uint64_t *)(((uint64_t)(regs_area) + 63) & (~(uint64_t)63)); /* Save the real return state on the stack ASAP before any chance we went * sleeping and break the register profile. We entered this function with diff --git a/arch/x86_64/src/intel64/intel64_vectors.S b/arch/x86_64/src/intel64/intel64_vectors.S index 7abaa88bfe7..113a2855cc2 100644 --- a/arch/x86_64/src/intel64/intel64_vectors.S +++ b/arch/x86_64/src/intel64/intel64_vectors.S @@ -729,12 +729,18 @@ isr_common: mov %fs, %ax /* Lower 16-bits of rax. */ pushq %rax /* Save the data segment descriptor */ - /* align to 16-bytes boundary */ - leaq -8(%rsp), %rsp + /* Align to 64-bytes boundary */ + leaq -(XMMAREA_REG_ALIGN * 8)(%rsp), %rsp - /* save xmm registers */ - leaq -512(%rsp), %rsp + /* Save xmm registers */ + leaq -XCPTCONTEXT_XMM_AREA_SIZE(%rsp), %rsp +#ifndef CONFIG_ARCH_X86_64_HAVE_XSAVE fxsaveq (%rsp) +#else + movl $XSAVE_STATE_COMPONENTS, %eax + xor %edx, %edx + xsave (%rsp) +#endif /* The current value of the SP points to the beginning of the state save * structure. Save that in RDI as the input parameter to isr_handler. @@ -786,12 +792,18 @@ irq_common: mov %fs, %ax /* Lower 16-bits of rax. */ pushq %rax /* Save the data segment descriptor */ - /* align to 16-bytes boundary */ - leaq -8(%rsp), %rsp + /* Align to 64-bytes boundary */ + leaq -(XMMAREA_REG_ALIGN * 8)(%rsp), %rsp - /* save xmm registers */ - leaq -512(%rsp), %rsp + /* Save xmm registers */ + leaq -XCPTCONTEXT_XMM_AREA_SIZE(%rsp), %rsp +#ifndef CONFIG_ARCH_X86_64_HAVE_XSAVE fxsaveq (%rsp) +#else + movl $XSAVE_STATE_COMPONENTS, %eax + xor %edx, %edx + xsave (%rsp) +#endif /* The current value of the SP points to the beginning of the state save * structure. Save that in RDI as the input parameter to irq_handler. @@ -811,20 +823,28 @@ irq_common: je .Lnoswitch /* A context swith will be performed. EAX holds the address of the new - * register save structure. + * register save structure. * * Jump to x86_64_fullcontextrestore(). We perform a call here, but that function * never returns. The address of the new register save block is the argument * to the x86_64_fullcontextrestore(). - */ + */ mov %rax, %rdi call x86_64_fullcontextrestore .Lnoswitch: +#ifndef CONFIG_ARCH_X86_64_HAVE_XSAVE fxrstorq (%rsp) - leaq 512(%rsp), %rsp - leaq 8(%rsp), %rsp +#else + movl $XSAVE_STATE_COMPONENTS, %eax + xor %edx, %edx + xrstor (%rsp) +#endif + leaq XCPTCONTEXT_XMM_AREA_SIZE(%rsp), %rsp + + /* Align to 64-bytes boundary */ + leaq (XMMAREA_REG_ALIGN * 8)(%rsp), %rsp popq %rax mov %fs, %ax diff --git a/sched/misc/assert.c b/sched/misc/assert.c index 6d87199eb14..fa9b8c89165 100644 --- a/sched/misc/assert.c +++ b/sched/misc/assert.c @@ -79,11 +79,18 @@ # define DUMP_FORMAT " %016" PRIxPTR "" #endif +/* Architecture can overwrite the default XCPTCONTEXT alignment */ + +#ifndef XCPTCONTEXT_ALIGN +# define XCPTCONTEXT_ALIGN 16 +#endif + /**************************************************************************** * Private Data ****************************************************************************/ -static uintptr_t g_last_regs[XCPTCONTEXT_REGS] aligned_data(16); +static uintptr_t +g_last_regs[XCPTCONTEXT_REGS] aligned_data(XCPTCONTEXT_ALIGN); static FAR const char *g_policy[4] = { "FIFO", "RR", "SPORADIC"