From 7ce8464b3acca2b283842f8cb42f97ea50e227cf Mon Sep 17 00:00:00 2001 From: Adrian-Ken Rueegsegger Date: Fri, 6 Mar 2015 14:09:35 +0100 Subject: [PATCH] hw_x86_64: Enable FPU support * Enable the use of the FXSAVE and FXRSTOR instructions, see Intel SDM Vol. 3C, section 2.5. * The state of the x87 floating point unit (FPU) is loaded and saved on demand. * Make the cr0 control register accessible in the Cpu class. This is in preparation of the upcoming FPU management. * Access to the FPU is disabled by setting the Task Switch flag in the cr0 register. * Access to the FPU is enabled by clearing the Task Switch flag in the cr0 register. * Implement FPU initialization * Add is_fpu_enabled helper function * Add pointer to CPU lazy state to CPU class * Init FPU when finishing kernel initialization * Add function to retry FPU instruction: Similar to the ARM mechanism to retry undefined instructions, implement a function for retrying an FPU instruction. If a floating-point instruction causes an #NM exception due to the FPU being disabled, it can be retried after the correct FPU state is restored, saving the current state and enabling the FPU in the process. * Disable FPU when switching to different user context: This enables lazy save/restore of the FPU since trying to execute a floating point instruction when the FPU is disabled will cause a #NM exception. * Declare constant for #NM exception * Retry FPU instruction on #NM exception * Assure alignment of FXSAVE area: The FXSAVE area is 512-byte memory region that must be 16-byte aligned. As it turns out the alignment attribute is not honored in all cases so add a workaround to assure the alignment constraint is met by manually rounding the start of the FXSAVE area to the next 16-byte boundary if necessary. --- repos/base-hw/src/core/include/spec/x86/cpu.h | 155 +++++++++++++++++- .../src/core/spec/x86/kernel/thread.cc | 5 + .../src/core/spec/x86_64/kernel/crt0.s | 3 +- repos/base/include/x86_64/cpu/cpu_state.h | 1 + 4 files changed, 155 insertions(+), 9 deletions(-) diff --git a/repos/base-hw/src/core/include/spec/x86/cpu.h b/repos/base-hw/src/core/include/spec/x86/cpu.h index 1b1bd8ac2..8ef89302c 100644 --- a/repos/base-hw/src/core/include/spec/x86/cpu.h +++ b/repos/base-hw/src/core/include/spec/x86/cpu.h @@ -31,7 +31,7 @@ namespace Genode /** * Part of CPU state that is not switched on every mode transition */ - class Cpu_lazy_state { }; + class Cpu_lazy_state; /** * CPU driver for core @@ -41,15 +41,121 @@ namespace Genode namespace Kernel { using Genode::Cpu_lazy_state; } -class Genode::Cpu +class Genode::Cpu_lazy_state { + friend class Cpu; + private: - Idt *_idt; - Tss *_tss; + + /** + * FXSAVE area providing storage for x87 FPU, MMX, XMM, and MXCSR + * registers. + * + * For further details see Intel SDM Vol. 2A, 'FXSAVE instruction'. + */ + char fxsave_area[527]; + + /** + * 16-byte aligned start of FXSAVE area. + */ + char *start; + + /** + * Load x87 FPU State from fxsave area. + */ + inline void load() { + asm volatile ("fxrstor %0" : : "m" (*start)); } + + /** + * Save x87 FPU State to fxsave area. + */ + inline void save() { + asm volatile ("fxsave %0" : "=m" (*start)); } public: - Cpu() + /** + * Constructor + * + * Calculate 16-byte aligned start of FXSAVE area if necessary. + */ + inline Cpu_lazy_state() + { + start = fxsave_area; + if((addr_t)start & 15) + start = (char *)((addr_t)start & ~15) + 16; + }; +} __attribute__((aligned(16))); + + +class Genode::Cpu +{ + friend class Cpu_lazy_state; + + private: + Idt *_idt; + Tss *_tss; + Cpu_lazy_state *_fpu_state; + + /** + * Control register 0 + */ + struct Cr0 : Register<64> + { + struct Pe : Bitfield<0, 1> { }; /* Protection Enable */ + struct Mp : Bitfield<1, 1> { }; /* Monitor Coprocessor */ + struct Em : Bitfield<2, 1> { }; /* Emulation */ + struct Ts : Bitfield<3, 1> { }; /* Task Switched */ + struct Et : Bitfield<4, 1> { }; /* Extension Type */ + struct Ne : Bitfield<5, 1> { }; /* Numeric Error */ + struct Wp : Bitfield<16, 1> { }; /* Write Protect */ + struct Am : Bitfield<18, 1> { }; /* Alignment Mask */ + struct Nw : Bitfield<29, 1> { }; /* Not Write-through */ + struct Cd : Bitfield<30, 1> { }; /* Cache Disable */ + struct Pg : Bitfield<31, 1> { }; /* Paging */ + + static void write(access_t const v) { + asm volatile ("mov %0, %%cr0" :: "r" (v) : ); } + + static access_t read() + { + access_t v; + asm volatile ("mov %%cr0, %0" : "=r" (v) :: ); + return v; + } + }; + + /** + * Disable FPU by setting the TS flag in CR0. + */ + static void _disable_fpu() + { + Cr0::write(Cr0::read() | Cr0::Ts::bits(1)); + } + + /** + * Enable FPU by clearing the TS flag in CR0. + */ + static void _enable_fpu() { + asm volatile ("clts"); } + + /** + * Initialize FPU without checking for pending unmasked floating-point + * exceptions. + */ + static void _init_fpu() { + asm volatile ("fninit"); + } + + /** + * Returns True if the FPU is enabled. + */ + static bool is_fpu_enabled() { + return !Cr0::Ts::get(Cr0::read()); } + + public: + + Cpu() : _fpu_state(0) { if (primary_id() == executing_id()) { _idt = new (&_mt_idt) Idt(); @@ -259,8 +365,8 @@ class Genode::Cpu init_virt_kernel(addr_t const table, unsigned const process_id) { Cr3::write(Cr3::init(table)); } - inline static void finish_init_phys_kernel() - { } + inline static void finish_init_phys_kernel() { + _init_fpu(); } /** * Configure this module appropriately for the first kernel run @@ -290,6 +396,25 @@ class Genode::Cpu */ bool retry_undefined_instr(Cpu_lazy_state *) { return false; } + /** + * Return whether to retry an FPU instruction after this call + */ + bool retry_fpu_instr(Cpu_lazy_state * const state) + { + if (is_fpu_enabled()) + return false; + + _enable_fpu(); + if (_fpu_state != state) { + if (_fpu_state) + _fpu_state->save(); + + state->load(); + _fpu_state = state; + } + return true; + } + /** * Return kernel name of the executing CPU */ @@ -300,13 +425,27 @@ class Genode::Cpu */ static unsigned primary_id() { return 0; } + /** + * Prepare for the proceeding of a user + * + * \param old_state CPU state of the last user + * \param new_state CPU state of the next user + */ + static void prepare_proceeding(Cpu_lazy_state * const old_state, + Cpu_lazy_state * const new_state) + { + if (old_state == new_state) + return; + + _disable_fpu(); + } + /************* ** Dummies ** *************/ static void tlb_insertions() { inval_branch_predicts(); } static void translation_added(addr_t, size_t) { } - static void prepare_proceeding(Cpu_lazy_state *, Cpu_lazy_state *) { } }; diff --git a/repos/base-hw/src/core/spec/x86/kernel/thread.cc b/repos/base-hw/src/core/spec/x86/kernel/thread.cc index 46a8d270b..b202ec02e 100644 --- a/repos/base-hw/src/core/spec/x86/kernel/thread.cc +++ b/repos/base-hw/src/core/spec/x86/kernel/thread.cc @@ -29,6 +29,11 @@ void Thread::exception(unsigned const cpu) if (trapno == PAGE_FAULT) { _mmu_exception(); return; + } else if (trapno == NO_MATH_COPROC) { + if (_cpu->retry_fpu_instr(&_lazy_state)) { return; } + PWRN("fpu error"); + _stop(); + return; } if (trapno == SUPERVISOR_CALL) { _call(); diff --git a/repos/base-hw/src/core/spec/x86_64/kernel/crt0.s b/repos/base-hw/src/core/spec/x86_64/kernel/crt0.s index 517ba0058..a61f5635c 100644 --- a/repos/base-hw/src/core/spec/x86_64/kernel/crt0.s +++ b/repos/base-hw/src/core/spec/x86_64/kernel/crt0.s @@ -29,9 +29,10 @@ .global _start _start: - /* Enable PAE (prerequisite for IA-32e mode) */ + /* Enable PAE (prerequisite for IA-32e mode) and OSFXSR */ movl %cr4, %eax btsl $5, %eax + btsl $9, %eax movl %eax, %cr4 /* Load initial pagetables */ diff --git a/repos/base/include/x86_64/cpu/cpu_state.h b/repos/base/include/x86_64/cpu/cpu_state.h index a55ffe68e..5ee2c054d 100644 --- a/repos/base/include/x86_64/cpu/cpu_state.h +++ b/repos/base/include/x86_64/cpu/cpu_state.h @@ -24,6 +24,7 @@ namespace Genode { struct Cpu_state; } struct Genode::Cpu_state { enum Cpu_exception { + NO_MATH_COPROC = 0x07, PAGE_FAULT = 0x0e, SUPERVISOR_CALL = 0x80, INTERRUPTS_START = 0x20,