diff options
author | Samuel Thibault <samuel.thibault@ens-lyon.org> | 2020-11-28 16:30:29 +0100 |
---|---|---|
committer | Samuel Thibault <samuel.thibault@ens-lyon.org> | 2020-11-28 17:46:32 +0100 |
commit | b885c5ea26fb3c2f2d91b6e9a1495070da429ea4 (patch) | |
tree | 787f167688bf7518b62241fa55c539da3fa53ed5 | |
parent | 44e3fb20a17dac403ec022a9c4d41dc237f3c79d (diff) | |
download | gnumach-b885c5ea26fb3c2f2d91b6e9a1495070da429ea4.tar.gz gnumach-b885c5ea26fb3c2f2d91b6e9a1495070da429ea4.tar.bz2 gnumach-b885c5ea26fb3c2f2d91b6e9a1495070da429ea4.zip |
x86: Add XSAVE support
* i386/i386/fpu.h (CPU_XCR0_X87, CPU_XCR0_SSE, CPU_XCR0_AVX,
CPU_XCR0_MPX, CPU_XCR0_AVX512): New macros.
(xsave): Pass fp_xsave_support to xsave.
(fpu_save_context): When fp_kind is FP_387X, use xsave.
(fp_xsave_support): New variable declaration.
* i386/i386/proc_reg.h (cpuid): New macro.
* linux/src/include/asm-i386/processor.h (cpuid): Disable macro.
* i386/include/mach/i386/fp_reg.h: Include <stdint.h>
(i386_xfp_xstate_header): New structure.
(i386_xfp_save): Add xsave fields.
* i386/i386/fpu.c (fp_xsave_support): New variable.
(init_fpu): Look for XSAVE feature; if available, get the supported
parts and set fp_kind to FP_387X.
(fpu_module_init): Set ifps_cache alignment to alignof(struct
i386_fpsave_state).
(fpu_set_state): Make sure to clear all the ifps structure.
Reuse the FP_387FX for the FP_387X case.
(fpu_get_state, fpexterrflt, fpastintr, fp_state_alloc): Reuse the FP_387FX
for the FP_387X case.
(fp_save): When fp_kind is FP_387X, use xsave.
(fp_load): When fp_kind is FP_387X, use xrstor.
Reuse the FP_387FX for the FP_387X case.
-rw-r--r-- | i386/i386/fpu.c | 81 | ||||
-rw-r--r-- | i386/i386/fpu.h | 19 | ||||
-rw-r--r-- | i386/i386/proc_reg.h | 22 | ||||
-rw-r--r-- | i386/include/mach/i386/fp_reg.h | 27 | ||||
-rw-r--r-- | linux/src/include/asm-i386/processor.h | 2 |
5 files changed, 124 insertions, 27 deletions
diff --git a/i386/i386/fpu.c b/i386/i386/fpu.c index 5cbfcb2f..517764d6 100644 --- a/i386/i386/fpu.c +++ b/i386/i386/fpu.c @@ -70,6 +70,7 @@ #endif int fp_kind = FP_387; /* 80387 present */ +uint64_t fp_xsave_support; /* Bitmap of supported XSAVE save areas */ struct kmem_cache ifps_cache; /* cache for FPU save area */ static unsigned long mxcsr_feature_mask = 0xffffffff; /* Always AND user-provided mxcsr with this security mask */ @@ -148,21 +149,58 @@ init_fpu(void) /* * We have a 387. */ - if (CPU_HAS_FEATURE(CPU_FEATURE_FXSR)) { - static /* because we _need_ alignment */ - struct i386_xfp_save save; - unsigned long mask; - fp_kind = FP_387FX; + fp_kind = FP_387; + + if (CPU_HAS_FEATURE(CPU_FEATURE_XSAVE)) { + unsigned eax, ebx, ecx, edx; + + eax = 0xd; + ecx = 0x0; + cpuid(eax, ebx, ecx, edx); + + fp_xsave_support = eax + (((uint64_t) edx) << 32); + fp_xsave_support &= CPU_XCR0_SUPPORTED; +#ifndef MACH_RING1 + set_cr4(get_cr4() | CR4_OSFXSR | CR4_OSXSAVE); + set_xcr0(fp_xsave_support); +#endif /* MACH_RING1 */ + + eax = 0xd; + ecx = 0x0; + cpuid(eax, ebx, ecx, edx); + + if (ebx > sizeof(struct i386_xfp_save)) { + /* TODO: rather make struct unbound and set size + * in fpu_module_init */ + printf("XSAVE area size %u larger than provisioned " + "%u, not enabling XSAVE\n", + ebx, sizeof(struct i386_xfp_save)); +#ifndef MACH_RING1 + set_cr4(get_cr4() & ~(CR4_OSFXSR | CR4_OSXSAVE)); +#endif /* MACH_RING1 */ + } else { + fp_kind = FP_387X; + } + } + + if (fp_kind == FP_387 && CPU_HAS_FEATURE(CPU_FEATURE_FXSR)) { #ifndef MACH_RING1 set_cr4(get_cr4() | CR4_OSFXSR); #endif /* MACH_RING1 */ + fp_kind = FP_387FX; + } + + if (fp_kind == FP_387X || fp_kind == FP_387FX) { + /* Compute mxcsr_feature_mask. */ + static /* because we _need_ alignment */ + struct i386_xfp_save save; + unsigned long mask; fxsave(&save); mask = save.fp_mxcsr_mask; if (!mask) mask = 0x0000ffbf; mxcsr_feature_mask &= mask; - } else - fp_kind = FP_387; + } } #ifdef MACH_RING1 set_ts(); @@ -188,7 +226,8 @@ void fpu_module_init(void) { kmem_cache_init(&ifps_cache, "i386_fpsave_state", - sizeof(struct i386_fpsave_state), 16, + sizeof(struct i386_fpsave_state), + alignof(struct i386_fpsave_state), NULL, 0); } @@ -359,9 +398,9 @@ ASSERT_IPL(SPL0); /* * Ensure that reserved parts of the environment are 0. */ - memset(&ifps->fp_save_state, 0, sizeof(struct i386_fp_save)); + memset(ifps, 0, sizeof(*ifps)); - if (fp_kind == FP_387FX) { + if (fp_kind == FP_387X || fp_kind == FP_387FX) { int i; ifps->xfp_save_state.fp_control = user_fp_state->fp_control; @@ -454,7 +493,7 @@ ASSERT_IPL(SPL0); */ memset(user_fp_state, 0, sizeof(struct i386_fp_save)); - if (fp_kind == FP_387FX) { + if (fp_kind == FP_387X || fp_kind == FP_387FX) { int i; user_fp_state->fp_control = ifps->xfp_save_state.fp_control; @@ -697,7 +736,7 @@ fpexterrflt(void) */ i386_exception(EXC_ARITHMETIC, EXC_I386_EXTERR, - fp_kind == FP_387FX ? + fp_kind == FP_387X || fp_kind == FP_387FX ? thread->pcb->ims.ifps->xfp_save_state.fp_status : thread->pcb->ims.ifps->fp_save_state.fp_status); /*NOTREACHED*/ @@ -755,7 +794,7 @@ ASSERT_IPL(SPL0); */ i386_exception(EXC_ARITHMETIC, EXC_I386_EXTERR, - fp_kind == FP_387FX ? + fp_kind == FP_387X || fp_kind == FP_387FX ? thread->pcb->ims.ifps->xfp_save_state.fp_status : thread->pcb->ims.ifps->fp_save_state.fp_status); /*NOTREACHED*/ @@ -779,10 +818,12 @@ fp_save(thread_t thread) if (ifps != 0 && !ifps->fp_valid) { /* registers are in FPU */ ifps->fp_valid = TRUE; - if (fp_kind == FP_387FX) - fxsave(&ifps->xfp_save_state); + if (fp_kind == FP_387X) + xsave(&ifps->xfp_save_state); + else if (fp_kind == FP_387FX) + fxsave(&ifps->xfp_save_state); else - fnsave(&ifps->fp_save_state); + fnsave(&ifps->fp_save_state); } } @@ -822,7 +863,7 @@ ASSERT_IPL(SPL0); */ i386_exception(EXC_ARITHMETIC, EXC_I386_EXTERR, - fp_kind == FP_387FX ? + fp_kind == FP_387X || fp_kind == FP_387FX ? thread->pcb->ims.ifps->xfp_save_state.fp_status : thread->pcb->ims.ifps->fp_save_state.fp_status); /*NOTREACHED*/ @@ -831,7 +872,9 @@ ASSERT_IPL(SPL0); printf("fp_load: invalid FPU state!\n"); fninit (); } else { - if (fp_kind == FP_387FX) + if (fp_kind == FP_387X) + xrstor(ifps->xfp_save_state); + else if (fp_kind == FP_387FX) fxrstor(ifps->xfp_save_state); else frstor(ifps->fp_save_state); @@ -857,7 +900,7 @@ fp_state_alloc(void) ifps->fp_valid = TRUE; - if (fp_kind == FP_387FX) { + if (fp_kind == FP_387X || fp_kind == FP_387FX) { ifps->xfp_save_state.fp_control = (0x037f & ~(FPC_IM|FPC_ZM|FPC_OM|FPC_PC)) | (FPC_PC_64|FPC_IC_AFF); diff --git a/i386/i386/fpu.h b/i386/i386/fpu.h index 3c5b73c5..dc503606 100644 --- a/i386/i386/fpu.h +++ b/i386/i386/fpu.h @@ -96,8 +96,20 @@ static inline void set_xcr0(uint64_t value) { xsetbv(0, value); } +#define CPU_XCR0_X87 (1 << 0) +#define CPU_XCR0_SSE (1 << 1) +#define CPU_XCR0_AVX (1 << 2) +#define CPU_XCR0_MPX (3 << 3) +#define CPU_XCR0_AVX512 (7 << 5) + +/* This is the set we support for now in our struct i386_xfp_save */ +#define CPU_XCR0_SUPPORTED (CPU_XCR0_X87 | CPU_XCR0_SSE | CPU_XCR0_AVX) + #define xsave(state) \ - asm volatile("xsave %0" : "=m" (*state)) + asm volatile("xsave %0" \ + : "=m" (*state) \ + : "a" ((unsigned) fp_xsave_support) \ + , "d" ((unsigned) (fp_xsave_support >> 32))) \ #define xrstor(state) \ asm volatile("xrstor %0" : : "m" (state)) @@ -121,7 +133,9 @@ static inline void set_xcr0(uint64_t value) { if (ifps != 0 && !ifps->fp_valid) { \ /* registers are in FPU - save to memory */ \ ifps->fp_valid = TRUE; \ - if (fp_kind == FP_387FX) \ + if (fp_kind == FP_387X) \ + xsave(&ifps->xfp_save_state); \ + else if (fp_kind == FP_387FX) \ fxsave(&ifps->xfp_save_state); \ else \ fnsave(&ifps->fp_save_state); \ @@ -138,6 +152,7 @@ static inline void set_xcr0(uint64_t value) { #endif /* NCPUS == 1 */ extern int fp_kind; +extern uint64_t fp_xsave_support; extern void fp_save(thread_t thread); extern void fp_load(thread_t thread); extern void fp_free(struct i386_fpsave_state *fps); diff --git a/i386/i386/proc_reg.h b/i386/i386/proc_reg.h index 624819c6..a83ca0d8 100644 --- a/i386/i386/proc_reg.h +++ b/i386/i386/proc_reg.h @@ -379,6 +379,28 @@ extern unsigned long cr3; }) #endif +/* Note: gcc might want to use bx or the stack for %1 addressing, so we can't + * use them :/ */ +#ifdef __x86_64__ +#define cpuid(eax, ebx, ecx, edx) \ +{ \ + uint64_t sav_rbx; \ + asm( "mov %%rbx,%2\n\t" \ + "cpuid\n\t" \ + "xchg %2,%%rbx\n\t" \ + "movl %k2,%1\n\t" \ + : "+a" (eax), "=m" (ebx), "=&r" (sav_rbx), "+c" (ecx), "=&d" (edx)); \ +} +#else +#define cpuid(eax, ebx, ecx, edx) \ +{ \ + asm ( "mov %%ebx,%1\n\t" \ + "cpuid\n\t" \ + "xchg %%ebx,%1\n\t" \ + : "+a" (eax), "=&SD" (ebx), "+c" (ecx), "=&d" (edx)); \ +} +#endif + #endif /* __GNUC__ */ #endif /* __ASSEMBLER__ */ diff --git a/i386/include/mach/i386/fp_reg.h b/i386/include/mach/i386/fp_reg.h index 648511ad..5992ea78 100644 --- a/i386/include/mach/i386/fp_reg.h +++ b/i386/include/mach/i386/fp_reg.h @@ -26,6 +26,9 @@ #ifndef _MACH_I386_FP_REG_H_ #define _MACH_I386_FP_REG_H_ + +#include <stdint.h> + /* * Floating point registers and status, as saved * and restored by FP save/restore instructions. @@ -50,17 +53,24 @@ struct i386_fp_regs { /* space for 8 80-bit FP registers */ }; +struct i386_xfp_xstate_header { + uint64_t xfp_features; + uint64_t xcomp_bv; + uint64_t reserved[6]; +} __attribute__((packed, aligned(64))); +_Static_assert(sizeof(struct i386_xfp_xstate_header) == 8*8); + struct i386_xfp_save { unsigned short fp_control; /* control */ unsigned short fp_status; /* status */ unsigned short fp_tag; /* register tags */ unsigned short fp_opcode; /* opcode of failed instruction */ unsigned int fp_eip; /* eip at failed instruction */ - unsigned short fp_cs; /* cs at failed instruction */ - unsigned short fp_unused_1; + unsigned short fp_cs; /* cs at failed instruction / eip high */ + unsigned short fp_eip3; /* eip higher */ unsigned int fp_dp; /* data address */ - unsigned short fp_ds; /* data segment */ - unsigned short fp_unused_2; + unsigned short fp_ds; /* data segment / dp high */ + unsigned short fp_dp3; /* dp higher */ unsigned int fp_mxcsr; /* MXCSR */ unsigned int fp_mxcsr_mask; /* MXCSR_MASK */ unsigned char fp_reg_word[8][16]; @@ -68,8 +78,13 @@ struct i386_xfp_save { unsigned char fp_xreg_word[16][16]; /* space for 16 128-bit XMM registers */ unsigned int padding[24]; -} __attribute__((aligned(16))); -_Static_assert(sizeof(struct i386_xfp_save) == 512); + struct i386_xfp_xstate_header header; + + unsigned char fp_yreg_word[16][16]; + /* space for the high part of the + * 16 256-bit YMM registers */ +} __attribute__((packed, aligned(64))); +_Static_assert(sizeof(struct i386_xfp_save) == 512 + 8*8 + 16*16); /* * Control register diff --git a/linux/src/include/asm-i386/processor.h b/linux/src/include/asm-i386/processor.h index 2bcacea0..b0679402 100644 --- a/linux/src/include/asm-i386/processor.h +++ b/linux/src/include/asm-i386/processor.h @@ -29,6 +29,7 @@ extern int have_cpuid; /* We have a CPUID */ extern unsigned long cpu_hz; /* CPU clock frequency from time.c */ +#if 0 /* * Detection of CPU model (CPUID). */ @@ -42,6 +43,7 @@ extern inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx) : "a" (op) : "cc"); } +#endif /* * Cyrix CPU register indexes (use special macros to access these) |