From ef130a3bf9776ad35ea7ddc466a455604ecfab4e Mon Sep 17 00:00:00 2001 From: Alexander Boettcher Date: Wed, 26 Sep 2018 11:00:01 +0200 Subject: [PATCH] sel4/x86: implement vm_session interface Issue #3111 - enable vt-x in kernel configuration Kernel patches: - add unrestricted guest support - avoid kernel boot failure when vt-x is not available - avoid nullpointer in kernel when vcpu is not fully setup - avoid vcpu scheduling bug which causes starvation on same/below prio level - save efer register correctly from guest --- repos/base-sel4/lib/mk/base-sel4-common.inc | 14 + repos/base-sel4/lib/mk/base-sel4-common.mk | 9 +- .../lib/mk/spec/x86/base-sel4-common.mk | 9 + .../base-sel4/lib/mk/spec/x86_32/core-sel4.mk | 4 +- .../lib/mk/spec/x86_32/syscall-sel4.mk | 2 +- .../base-sel4/lib/mk/spec/x86_64/core-sel4.mk | 4 +- .../lib/mk/spec/x86_64/syscall-sel4.mk | 2 +- repos/base-sel4/patches/autoconf_32.patch | 4 +- repos/base-sel4/patches/autoconf_64.patch | 4 +- repos/base-sel4/patches/intel_efer.patch | 9 + repos/base-sel4/patches/intel_ug.patch | 28 + repos/base-sel4/patches/intel_vtx_check.patch | 85 ++ repos/base-sel4/patches/sched_bug_x86.patch | 27 + .../base-sel4/patches/vcpu_nullptr_bug.patch | 13 + repos/base-sel4/ports/sel4.hash | 2 +- .../base-sel4/src/core/include/platform_pd.h | 2 + .../src/core/include/platform_thread.h | 4 + .../base-sel4/src/core/include/thread_sel4.h | 10 +- .../src/core/include/vm_session_component.h | 110 +++ repos/base-sel4/src/core/include/vm_space.h | 111 ++- repos/base-sel4/src/core/platform.cc | 5 +- repos/base-sel4/src/core/platform_pd.cc | 14 +- repos/base-sel4/src/core/platform_thread.cc | 16 + repos/base-sel4/src/core/spec/arm/vm_space.cc | 3 +- .../src/core/spec/x86/platform_services.cc | 35 + .../src/core/spec/x86/platform_thread.cc | 31 + .../src/core/spec/x86/vm_session_component.cc | 247 +++++ repos/base-sel4/src/core/spec/x86/vm_space.cc | 96 +- .../src/core/spec/x86_32/arch_kernel_object.h | 14 + .../src/core/spec/x86_32/platform.cc | 18 +- .../src/core/spec/x86_64/arch_kernel_object.h | 15 + .../src/core/spec/x86_64/platform.cc | 18 + .../src/include/base/internal/native_utcb.h | 12 +- .../base-sel4/src/lib/base/x86/vm_session.cc | 845 ++++++++++++++++++ repos/os/run/vmm_x86.run | 2 +- 35 files changed, 1759 insertions(+), 65 deletions(-) create mode 100644 repos/base-sel4/lib/mk/base-sel4-common.inc create mode 100644 repos/base-sel4/lib/mk/spec/x86/base-sel4-common.mk create mode 100644 repos/base-sel4/patches/intel_efer.patch create mode 100644 repos/base-sel4/patches/intel_ug.patch create mode 100644 repos/base-sel4/patches/intel_vtx_check.patch create mode 100644 repos/base-sel4/patches/sched_bug_x86.patch create mode 100644 repos/base-sel4/patches/vcpu_nullptr_bug.patch create mode 100644 repos/base-sel4/src/core/include/vm_session_component.h create mode 100644 repos/base-sel4/src/core/spec/x86/platform_services.cc create mode 100644 repos/base-sel4/src/core/spec/x86/vm_session_component.cc create mode 100644 repos/base-sel4/src/lib/base/x86/vm_session.cc diff --git a/repos/base-sel4/lib/mk/base-sel4-common.inc b/repos/base-sel4/lib/mk/base-sel4-common.inc new file mode 100644 index 000000000..a6606a746 --- /dev/null +++ b/repos/base-sel4/lib/mk/base-sel4-common.inc @@ -0,0 +1,14 @@ +# +# \brief Portions of base library shared by core and non-core processes +# \author Norman Feske +# \date 2013-02-14 +# + +include $(BASE_DIR)/lib/mk/base-common.inc + +LIBS += startup-sel4 syscall-sel4 + +SRC_CC += rpc_dispatch_loop.cc +SRC_CC += thread.cc thread_myself.cc thread_bootstrap.cc +SRC_CC += capability.cc capability_raw.cc +SRC_CC += stack_area_addr.cc diff --git a/repos/base-sel4/lib/mk/base-sel4-common.mk b/repos/base-sel4/lib/mk/base-sel4-common.mk index a6606a746..b7c19fce6 100644 --- a/repos/base-sel4/lib/mk/base-sel4-common.mk +++ b/repos/base-sel4/lib/mk/base-sel4-common.mk @@ -4,11 +4,4 @@ # \date 2013-02-14 # -include $(BASE_DIR)/lib/mk/base-common.inc - -LIBS += startup-sel4 syscall-sel4 - -SRC_CC += rpc_dispatch_loop.cc -SRC_CC += thread.cc thread_myself.cc thread_bootstrap.cc -SRC_CC += capability.cc capability_raw.cc -SRC_CC += stack_area_addr.cc +include $(REP_DIR)/lib/mk/base-sel4-common.inc diff --git a/repos/base-sel4/lib/mk/spec/x86/base-sel4-common.mk b/repos/base-sel4/lib/mk/spec/x86/base-sel4-common.mk new file mode 100644 index 000000000..432f4d73a --- /dev/null +++ b/repos/base-sel4/lib/mk/spec/x86/base-sel4-common.mk @@ -0,0 +1,9 @@ +# +# \brief Portions of base library shared by core and non-core processes +# \author Norman Feske +# \date 2013-02-14 +# + +vpath vm_session.cc $(REP_DIR)/src/lib/base/x86 + +include $(REP_DIR)/lib/mk/base-sel4-common.inc diff --git a/repos/base-sel4/lib/mk/spec/x86_32/core-sel4.mk b/repos/base-sel4/lib/mk/spec/x86_32/core-sel4.mk index 78432e553..05987a1d2 100644 --- a/repos/base-sel4/lib/mk/spec/x86_32/core-sel4.mk +++ b/repos/base-sel4/lib/mk/spec/x86_32/core-sel4.mk @@ -1,18 +1,20 @@ GEN_SRC_CC = \ - spec/x86/platform_services.cc \ spec/x86/io_port_session_component.cc REP_SRC_CC = \ spec/x86/io_port_session_support.cc \ spec/x86/irq.cc \ + spec/x86/platform_services.cc \ spec/x86/platform_thread.cc \ spec/x86/vm_space.cc \ + spec/x86/vm_session_component.cc \ spec/x86_32/boot_info.cc \ spec/x86_32/platform.cc \ spec/x86_32/platform_pd.cc \ spec/x86_32/thread.cc \ spec/x86_32/vm_space.cc +INC_DIR += $(REP_DIR)/src/core/spec/x86_32 INC_DIR += $(REP_DIR)/src/core/spec/x86 include $(REP_DIR)/lib/mk/core-sel4.inc diff --git a/repos/base-sel4/lib/mk/spec/x86_32/syscall-sel4.mk b/repos/base-sel4/lib/mk/spec/x86_32/syscall-sel4.mk index 0a201ccb9..9f49056d0 100644 --- a/repos/base-sel4/lib/mk/spec/x86_32/syscall-sel4.mk +++ b/repos/base-sel4/lib/mk/spec/x86_32/syscall-sel4.mk @@ -5,6 +5,6 @@ SEL4_ARCH := ia32 PLAT_BOARD := /$(SEL4_ARCH) SEL4_WORDBITS := 32 -ARCH_INCLUDES := exIPC.h +ARCH_INCLUDES := exIPC.h vmenter.h include $(REP_DIR)/lib/mk/syscall-sel4.inc diff --git a/repos/base-sel4/lib/mk/spec/x86_64/core-sel4.mk b/repos/base-sel4/lib/mk/spec/x86_64/core-sel4.mk index 8e9ee6bf0..984ee4915 100644 --- a/repos/base-sel4/lib/mk/spec/x86_64/core-sel4.mk +++ b/repos/base-sel4/lib/mk/spec/x86_64/core-sel4.mk @@ -1,18 +1,20 @@ GEN_SRC_CC = \ - spec/x86/platform_services.cc \ spec/x86/io_port_session_component.cc REP_SRC_CC = \ spec/x86/io_port_session_support.cc \ spec/x86/irq.cc \ + spec/x86/platform_services.cc \ spec/x86/platform_thread.cc \ spec/x86/vm_space.cc \ + spec/x86/vm_session_component.cc \ spec/x86_64/boot_info.cc \ spec/x86_64/platform.cc \ spec/x86_64/platform_pd.cc \ spec/x86_64/thread.cc \ spec/x86_64/vm_space.cc +INC_DIR += $(REP_DIR)/src/core/spec/x86_64 INC_DIR += $(REP_DIR)/src/core/spec/x86 include $(REP_DIR)/lib/mk/core-sel4.inc diff --git a/repos/base-sel4/lib/mk/spec/x86_64/syscall-sel4.mk b/repos/base-sel4/lib/mk/spec/x86_64/syscall-sel4.mk index 2d3fac763..a7fec8905 100644 --- a/repos/base-sel4/lib/mk/spec/x86_64/syscall-sel4.mk +++ b/repos/base-sel4/lib/mk/spec/x86_64/syscall-sel4.mk @@ -5,7 +5,7 @@ SEL4_ARCH := x86_64 PLAT_BOARD := /$(SEL4_ARCH) SEL4_WORDBITS := 64 -ARCH_INCLUDES := exIPC.h +ARCH_INCLUDES := exIPC.h vmenter.h SEL4_ARCH_INCLUDES := syscalls_syscall.h include $(REP_DIR)/lib/mk/syscall-sel4.inc diff --git a/repos/base-sel4/patches/autoconf_32.patch b/repos/base-sel4/patches/autoconf_32.patch index f67143722..fc7daf1cc 100644 --- a/repos/base-sel4/patches/autoconf_32.patch +++ b/repos/base-sel4/patches/autoconf_32.patch @@ -32,7 +32,7 @@ #define CONFIG_NUM_PRIORITIES 256 #define CONFIG_TESTPRINTER_REGEX ".*" #define CONFIG_APP_SEL4TEST 1 -@@ -96,6 +96,11 @@ +@@ -96,6 +96,13 @@ #define CONFIG_CACHE_LN_SZ 64 #define CONFIG_LIB_SEL4_MUSLC_SYS_MORECORE_BYTES 1048576 #define CONFIG_BUILDSYS_USE_CCACHE 1 @@ -41,6 +41,8 @@ +#define CONFIG_PRINTING 1 +#define CONFIG_ENABLE_BENCHMARKS 1 +#define CONFIG_BENCHMARK_TRACK_UTILISATION 1 ++#define CONFIG_VTX 1 ++#define CONFIG_MAX_VPIDS 64 #else #define AUTOCONF_INCLUDED #define CONFIG_LIB_SEL4_SIMPLE 1 diff --git a/repos/base-sel4/patches/autoconf_64.patch b/repos/base-sel4/patches/autoconf_64.patch index 4437c0003..de290d449 100644 --- a/repos/base-sel4/patches/autoconf_64.patch +++ b/repos/base-sel4/patches/autoconf_64.patch @@ -42,7 +42,7 @@ #define CONFIG_NUM_PRIORITIES 256 #define CONFIG_TESTPRINTER_REGEX ".*" #define CONFIG_APP_SEL4TEST 1 -@@ -189,9 +187,14 @@ +@@ -189,9 +187,16 @@ #define CONFIG_LIBSEL4DEBUG_ALLOC_BUFFER_ENTRIES 128 #define CONFIG_CACHE_LN_SZ 64 #define CONFIG_ARCH_X86_64 1 @@ -58,4 +58,6 @@ +#define CONFIG_PRINTING 1 +#define CONFIG_ENABLE_BENCHMARKS 1 +#define CONFIG_BENCHMARK_TRACK_UTILISATION 1 ++#define CONFIG_VTX 1 ++#define CONFIG_MAX_VPIDS 64 #endif /* ARCH_IA32 */ diff --git a/repos/base-sel4/patches/intel_efer.patch b/repos/base-sel4/patches/intel_efer.patch new file mode 100644 index 000000000..b3ad9990a --- /dev/null +++ b/repos/base-sel4/patches/intel_efer.patch @@ -0,0 +1,9 @@ +--- src/kernel/sel4/src/arch/x86/object/vcpu.c ++++ src/kernel/sel4/src/arch/x86/object/vcpu.c +@@ -340,6 +340,7 @@ init_vtx_fixed_values(bool_t useTrueMsrs) + secondary_control_high |= secondary_control_mask; + exit_control_high |= exit_control_mask; + ++ entry_control_high |= BIT(15); + return true; + } diff --git a/repos/base-sel4/patches/intel_ug.patch b/repos/base-sel4/patches/intel_ug.patch new file mode 100644 index 000000000..4480bbb41 --- /dev/null +++ b/repos/base-sel4/patches/intel_ug.patch @@ -0,0 +1,28 @@ +--- src/kernel/sel4/src/arch/x86/object/vcpu.c ++++ src/kernel/sel4/src/arch/x86/object/vcpu.c +@@ -377,6 +379,12 @@ check_vtx_fixed_values(bool_t useTrueMsrs) + uint32_t local_cr4_high = x86_rdmsr_low(IA32_VMX_CR4_FIXED0_MSR); + uint32_t local_cr4_low = x86_rdmsr_low(IA32_VMX_CR4_FIXED1_MSR); + ++ /* if UG was switched on on boot CPU, do it also on all other CPUs */ ++ if (secondary_control_low & BIT(7)) { ++ local_secondary_control_high &= BIT(7); ++ local_cr0_high &= ~(BIT(31) | BIT(0)); ++ } ++ + /* We want to check that any bits that there are no bits that this core + * requires to be high, that the BSP did not require to be high. This can + * be checked with 'local_high & high == local_high'. +@@ -1067,6 +1075,12 @@ vtx_init(void) + printf("vt-x: lack of required features\n"); + return false; + } ++ ++ /* enable unrestricted guest support if available */ ++ if (secondary_control_low & BIT(7)) { ++ secondary_control_high |= BIT(7); ++ cr0_high &= ~(BIT(31) | BIT(0)); ++ } + } + if (!check_vtx_fixed_values(vmx_basic_msr_get_true_msrs(vmx_basic))) { + printf("vt-x: cores have inconsistent features\n"); diff --git a/repos/base-sel4/patches/intel_vtx_check.patch b/repos/base-sel4/patches/intel_vtx_check.patch new file mode 100644 index 000000000..3ad6455a2 --- /dev/null +++ b/repos/base-sel4/patches/intel_vtx_check.patch @@ -0,0 +1,85 @@ +--- src/kernel/sel4/include/arch/x86/arch/object/vcpu.h ++++ src/kernel/sel4/include/arch/x86/arch/object/vcpu.h +@@ -322,7 +322,7 @@ unverified_compile_assert(vcpu_fpu_state_alignment_valid, + + /* Initializes a VCPU object with default values. A VCPU object that is not inititlized + * must not be run/loaded with vmptrld */ +-void vcpu_init(vcpu_t *vcpu); ++bool_t vcpu_init(vcpu_t *vcpu); + + /* Cleans up the VCPU object such that its memory can be freed */ + void vcpu_finalise(vcpu_t *vcpu); +--- src/kernel/sel4/src/arch/x86/kernel/boot.c ++++ src/kernel/sel4/src/arch/x86/kernel/boot.c +@@ -582,9 +582,7 @@ init_cpu( + + #ifdef CONFIG_VTX + /* initialise Intel VT-x extensions */ +- if (!vtx_init()) { +- return false; +- } ++ vtx_init(); + #endif + + return true; +--- src/kernel/sel4/src/arch/x86/object/objecttype.c ++++ src/kernel/sel4/src/arch/x86/object/objecttype.c +@@ -476,7 +476,8 @@ Arch_createObject(object_t t, void *regionBase, word_t userSize, bool_t deviceMe + case seL4_X86_VCPUObject: { + vcpu_t *vcpu; + vcpu = VCPU_PTR((word_t)regionBase); +- vcpu_init(vcpu); ++ if (!vcpu_init(vcpu)) ++ return cap_null_cap_new(); + return cap_vcpu_cap_new(VCPU_REF(vcpu)); + } + case seL4_X86_EPTPML4Object: +--- src/kernel/sel4/src/arch/x86/object/vcpu.c ++++ src/kernel/sel4/src/arch/x86/object/vcpu.c +@@ -409,9 +416,14 @@ applyFixedBits(uint32_t original, uint32_t high, uint32_t low) + return original; + } + +-void ++static bool_t vcpu_support_available = false; ++ ++bool_t + vcpu_init(vcpu_t *vcpu) + { ++ if (!vcpu_support_available) ++ return false; ++ + vcpu->vcpuTCB = NULL; + vcpu->launched = false; + +@@ -481,6 +493,8 @@ vcpu_init(vcpu_t *vcpu) + memset(vcpu->io, ~(word_t)0, VCPU_IOBITMAP_SIZE); + vmwrite(VMX_CONTROL_IOA_ADDRESS, pptr_to_paddr(vcpu->io)); + vmwrite(VMX_CONTROL_IOB_ADDRESS, pptr_to_paddr((char *)vcpu->io + (VCPU_IOBITMAP_SIZE / 2))); ++ ++ return true; + } + + static void +@@ -1067,6 +1081,12 @@ vtx_init(void) + printf("vt-x: lack of required features\n"); + return false; + } ++ ++ /* enable unrestricted guest support if available */ ++ if (secondary_control_low & BIT(7)) { ++ secondary_control_high |= BIT(7); ++ cr0_high &= ~(BIT(31) | BIT(0)); ++ } + } + if (!check_vtx_fixed_values(vmx_basic_msr_get_true_msrs(vmx_basic))) { + printf("vt-x: cores have inconsistent features\n"); +@@ -1109,6 +1129,8 @@ vtx_init(void) + return false; + } + ++ vcpu_support_available = true; ++ + return true; + } + diff --git a/repos/base-sel4/patches/sched_bug_x86.patch b/repos/base-sel4/patches/sched_bug_x86.patch new file mode 100644 index 000000000..c52288886 --- /dev/null +++ b/repos/base-sel4/patches/sched_bug_x86.patch @@ -0,0 +1,27 @@ +--- src/kernel/sel4/src/kernel/thread.c ++++ src/kernel/sel4/src/kernel/thread.c +@@ -437,8 +437,11 @@ scheduleTCB(tcb_t *tptr) + void + timerTick(void) + { +- if (likely(thread_state_get_tsType(NODE_STATE(ksCurThread)->tcbState) == +- ThreadState_Running)) { ++ switch (thread_state_get_tsType(NODE_STATE(ksCurThread)->tcbState)) { ++ case ThreadState_Running: ++#ifdef CONFIG_VTX ++ case ThreadState_RunningVM: ++#endif + if (NODE_STATE(ksCurThread)->tcbTimeSlice > 1) { + NODE_STATE(ksCurThread)->tcbTimeSlice--; + } else { +@@ -446,6 +449,10 @@ timerTick(void) + SCHED_APPEND_CURRENT_TCB; + rescheduleRequired(); + } ++ break; ++ default: ++ /* no tick updates */ ++ break; + } + + if (CONFIG_NUM_DOMAINS > 1) { diff --git a/repos/base-sel4/patches/vcpu_nullptr_bug.patch b/repos/base-sel4/patches/vcpu_nullptr_bug.patch new file mode 100644 index 000000000..b90f722b4 --- /dev/null +++ b/repos/base-sel4/patches/vcpu_nullptr_bug.patch @@ -0,0 +1,13 @@ +Bug detected by Genode and patch provided by Anna Lyons: +http://sel4.systems/pipermail/devel/2018-September/002161.html +--- src/kernel/sel4/src/arch/x86/c_traps.c ++++ src/kernel/sel4/src/arch/x86/c_traps.c +@@ -111,7 +111,7 @@ slowpath(syscall_t syscall) + { + + #ifdef CONFIG_VTX +- if (syscall == SysVMEnter) { ++ if (syscall == SysVMEnter && NODE_STATE(ksCurThread)->tcbArch.tcbVCPU) { + vcpu_update_state_sysvmenter(NODE_STATE(ksCurThread)->tcbArch.tcbVCPU); + if (NODE_STATE(ksCurThread)->tcbBoundNotification && notification_ptr_get_state(NODE_STATE(ksCurThread)->tcbBoundNotification) == NtfnState_Active) { + completeSignal(NODE_STATE(ksCurThread)->tcbBoundNotification, NODE_STATE(ksCurThread)); diff --git a/repos/base-sel4/ports/sel4.hash b/repos/base-sel4/ports/sel4.hash index eb7301c49..db7bb5378 100644 --- a/repos/base-sel4/ports/sel4.hash +++ b/repos/base-sel4/ports/sel4.hash @@ -1 +1 @@ -876d7aaf232c1b7dd548dd2b18474bb91f19ac23 +8518d37c4b819daba60648ca23a739ccf1f55460 diff --git a/repos/base-sel4/src/core/include/platform_pd.h b/repos/base-sel4/src/core/include/platform_pd.h index 6fe53d18a..bc2ae8044 100644 --- a/repos/base-sel4/src/core/include/platform_pd.h +++ b/repos/base-sel4/src/core/include/platform_pd.h @@ -123,6 +123,8 @@ class Genode::Platform_pd : public Address_space size_t cspace_size_log2() { return CSPACE_SIZE_LOG2; } bool install_mapping(Mapping const &mapping, const char * thread_name); + + static Bit_allocator<1024> &pd_id_alloc(); }; #endif /* _CORE__INCLUDE__PLATFORM_PD_H_ */ diff --git a/repos/base-sel4/src/core/include/platform_thread.h b/repos/base-sel4/src/core/include/platform_thread.h index 186061877..b38215c34 100644 --- a/repos/base-sel4/src/core/include/platform_thread.h +++ b/repos/base-sel4/src/core/include/platform_thread.h @@ -66,6 +66,8 @@ class Genode::Platform_thread : public List::Element Cap_sel _fault_handler_sel { 0 }; Cap_sel _ep_sel { 0 }; Cap_sel _lock_sel { 0 }; + Cap_sel _vcpu_sel { 0 }; + Cap_sel _vcpu_notify_sel { 0 }; friend class Platform_pd; @@ -188,6 +190,8 @@ class Genode::Platform_thread : public List::Element Cap_sel tcb_sel() const { return _info.tcb_sel; } bool install_mapping(Mapping const &mapping); + + void setup_vcpu(Cap_sel ept, Cap_sel notification); }; #endif /* _CORE__INCLUDE__PLATFORM_THREAD_H_ */ diff --git a/repos/base-sel4/src/core/include/thread_sel4.h b/repos/base-sel4/src/core/include/thread_sel4.h index ae146e08e..eae57818f 100644 --- a/repos/base-sel4/src/core/include/thread_sel4.h +++ b/repos/base-sel4/src/core/include/thread_sel4.h @@ -32,8 +32,10 @@ namespace Genode { Cap_sel tcb_sel { 0 }; Cap_sel ep_sel { 0 }; Cap_sel lock_sel { 0 }; + Cap_sel vcpu_sel { 0 }; - addr_t ipc_buffer_phys = 0; + addr_t ipc_buffer_phys { 0 }; + addr_t vcpu_state_phys { 0 }; inline void write_thread_info_to_ipc_buffer(Cap_sel pd_ep_sel); @@ -44,6 +46,7 @@ namespace Genode { inline void init(addr_t const utcb_virt_addr, unsigned const prio); inline void destruct(); + bool init_vcpu(Platform &, Cap_sel ept); }; /** @@ -129,6 +132,11 @@ void Genode::Thread_info::destruct() seL4_CNode_Delete(seL4_CapInitThreadCNode, tcb_sel.value(), 32); platform_specific().core_sel_alloc().free(tcb_sel); } + if (vcpu_sel.value()) { + /* XXX free 16k memory */ + seL4_CNode_Delete(seL4_CapInitThreadCNode, vcpu_sel.value(), 32); + platform_specific().core_sel_alloc().free(vcpu_sel); + } if (ipc_buffer_phys) { Platform &platform = platform_specific(); diff --git a/repos/base-sel4/src/core/include/vm_session_component.h b/repos/base-sel4/src/core/include/vm_session_component.h new file mode 100644 index 000000000..4767b5308 --- /dev/null +++ b/repos/base-sel4/src/core/include/vm_session_component.h @@ -0,0 +1,110 @@ +/* + * \brief Core-specific instance of the VM session interface + * \author Alexander Boettcher + * \date 2018-08-26 + */ + +/* + * Copyright (C) 2018 Genode Labs GmbH + * + * This file is part of the Genode OS framework, which is distributed + * under the terms of the GNU Affero General Public License version 3. + */ + +#ifndef _CORE__VM_SESSION_COMPONENT_H_ +#define _CORE__VM_SESSION_COMPONENT_H_ + +/* Genode includes */ +#include +#include +#include + +namespace Genode { class Vm_session_component; } + +class Genode::Vm_session_component +: + private Ram_quota_guard, + private Cap_quota_guard, + public Rpc_object +{ + private: + + class Vcpu : public Genode::List::Element + { + private: + + Constrained_ram_allocator &_ram_alloc; + Ram_dataspace_capability _ds_cap; + Cap_sel _notification { 0 }; + Vm_session::Vcpu_id _vcpu_id; + + void _free_up(); + + public: + + Vcpu(Constrained_ram_allocator &, Cap_quota_guard &, Vcpu_id, + seL4_Untyped); + ~Vcpu() { _free_up(); } + + Dataspace_capability ds_cap() const { return _ds_cap; } + bool match(Vcpu_id id) const { return id.id == _vcpu_id.id; } + void signal() const { seL4_Signal(_notification.value()); } + Cap_sel notification_cap() const { return _notification; } + }; + + Rpc_entrypoint &_ep; + Constrained_ram_allocator _constrained_md_ram_alloc; + Sliced_heap _sliced_heap; + List _vcpus { }; + unsigned _id_alloc { 0 }; + unsigned _pd_id { 0 }; + Cap_sel _vm_page_table; + Page_table_registry _page_table_registry { _sliced_heap }; + Vm_space _vm_space; + struct { + addr_t _phys; + seL4_Untyped _service; + } _ept { 0, 0 }; + struct { + addr_t _phys; + seL4_Untyped _service; + } _notifications { 0, 0 }; + + Vcpu * _lookup(Vcpu_id const vcpu_id) + { + for (Vcpu * vcpu = _vcpus.first(); vcpu; vcpu = vcpu->next()) + if (vcpu->match(vcpu_id)) return vcpu; + + return nullptr; + } + + protected: + + Ram_quota_guard &_ram_quota_guard() { return *this; } + Cap_quota_guard &_cap_quota_guard() { return *this; } + + public: + + using Ram_quota_guard::upgrade; + using Cap_quota_guard::upgrade; + + Vm_session_component(Rpc_entrypoint &, Resources, Label const &, + Diag, Ram_allocator &ram, Region_map &); + ~Vm_session_component(); + + /************************** + ** Vm session interface ** + **************************/ + + Dataspace_capability _cpu_state(Vcpu_id); + + void _exception_handler(Signal_context_capability, Vcpu_id) {} + void _run(Vcpu_id) {} + void _pause(Vcpu_id); + void attach(Dataspace_capability, addr_t) override; + void attach_pic(addr_t) override {} + void detach(addr_t, size_t) override {} + void _create_vcpu(Thread_capability); +}; + +#endif /* _CORE__VM_SESSION_COMPONENT_H_ */ diff --git a/repos/base-sel4/src/core/include/vm_space.h b/repos/base-sel4/src/core/include/vm_space.h index 342bc08f4..394690e90 100644 --- a/repos/base-sel4/src/core/include/vm_space.h +++ b/repos/base-sel4/src/core/include/vm_space.h @@ -157,7 +157,8 @@ class Genode::Vm_space addr_t _idx_to_sel(addr_t idx) const { return (_id << 20) | idx; } - void _flush(bool const flush_support) + template + void _flush(bool const flush_support, FN const &fn) { if (!flush_support) { warning("mapping cache full, but can't flush"); @@ -167,38 +168,18 @@ class Genode::Vm_space warning("flush page table entries - mapping cache full - PD: ", _pd_label.string()); - _page_table_registry.flush_pages([&] (Cap_sel const &idx, - addr_t const v_addr) - { - /* XXX - INITIAL_IPC_BUFFER can't be re-mapped currently */ - if (v_addr == 0x1000) - return false; - /* XXX - UTCB can't be re-mapped currently */ - if (stack_area_virtual_base() <= v_addr - && (v_addr < stack_area_virtual_base() + - stack_area_virtual_size()) - && !((v_addr + 0x1000) & (stack_virtual_size() - 1))) - return false; + _page_table_registry.flush_pages(fn); - long err = _unmap_page(idx); - if (err != seL4_NoError) - error("unmap failed, idx=", idx, " res=", err); - - _leaf_cnode(idx.value()).remove(_leaf_cnode_entry(idx.value())); - - _sel_alloc.free(idx.value()); - - return true; - }); } - bool _map_frame(addr_t const from_phys, addr_t const to_virt, + template + bool _map_frame(addr_t const from_phys, addr_t const to_dest, Cache_attribute const cacheability, bool const writable, bool const executable, - bool const flush_support) + bool const flush_support, bool guest, FN const &fn) { - if (_page_table_registry.page_frame_at(to_virt)) { + if (_page_table_registry.page_frame_at(to_dest)) { /* * Valid behaviour if multiple threads concurrently * causing the same page-fault. For the first thread the @@ -215,7 +196,7 @@ class Genode::Vm_space catch (Selector_allocator::Out_of_indices) { /* free all page-table-entry selectors and retry once */ - _flush(flush_support); + _flush(flush_support, fn); pte_idx = _sel_alloc.alloc(); } @@ -230,22 +211,22 @@ class Genode::Vm_space Cnode_index(_leaf_cnode_entry(pte_idx))); /* remember relationship between pte_sel and the virtual address */ - try { _page_table_registry.insert_page_frame(to_virt, Cap_sel(pte_idx)); } + try { _page_table_registry.insert_page_frame(to_dest, Cap_sel(pte_idx)); } catch (Page_table_registry::Mapping_cache_full) { /* free all entries of mapping cache and re-try once */ - _flush(flush_support); - _page_table_registry.insert_page_frame(to_virt, Cap_sel(pte_idx)); + _flush(flush_support, fn); + _page_table_registry.insert_page_frame(to_dest, Cap_sel(pte_idx)); } /* * Insert copy of page-frame selector into page table */ - long ret = _map_page(Cap_sel(pte_idx), to_virt, cacheability, - writable, executable); + long ret = _map_page(Cap_sel(pte_idx), to_dest, cacheability, + writable, executable, guest); if (ret != seL4_NoError) { error("seL4_*_Page_Map ", Hex(from_phys), "->", - Hex(to_virt), " returned ", ret); + Hex(to_dest), " returned ", ret); return false; } return true; @@ -256,7 +237,7 @@ class Genode::Vm_space */ long _map_page(Genode::Cap_sel const &idx, Genode::addr_t const virt, Cache_attribute const cacheability, bool const write, - bool const writable); + bool const writable, bool guest); long _unmap_page(Genode::Cap_sel const &idx); long _invalidate_page(Genode::Cap_sel const &, seL4_Word const, seL4_Word const); @@ -393,6 +374,29 @@ class Genode::Vm_space size_t const num_pages, Cache_attribute const cacheability, bool const writable, bool const executable, bool flush_support) { + auto fn_unmap = [&] (Cap_sel const &idx, addr_t const v_addr) + { + /* XXX - INITIAL_IPC_BUFFER can't be re-mapped currently */ + if (v_addr == 0x1000) + return false; + /* XXX - UTCB can't be re-mapped currently */ + if (stack_area_virtual_base() <= v_addr + && (v_addr < stack_area_virtual_base() + + stack_area_virtual_size()) + && !((v_addr + 0x1000) & (stack_virtual_size() - 1))) + return false; + + long err = _unmap_page(idx); + if (err != seL4_NoError) + error("unmap failed, idx=", idx, " res=", err); + + _leaf_cnode(idx.value()).remove(_leaf_cnode_entry(idx.value())); + + _sel_alloc.free(idx.value()); + + return true; + }; + Lock::Guard guard(_lock); bool ok = true; @@ -402,7 +406,8 @@ class Genode::Vm_space if (_map_frame(from_phys + offset, to_virt + offset, cacheability, writable, executable, - flush_support)) + flush_support, false /* host page table */, + fn_unmap)) continue; ok = false; @@ -415,6 +420,34 @@ class Genode::Vm_space return ok; } + void map_guest(addr_t const from_phys, addr_t const guest_phys, + size_t const num_pages, Cache_attribute const cacheability, + bool const writable, bool const executable, bool flush_support) + { + auto fn_unmap = [&] (Cap_sel const &idx, addr_t const) { + long err = _unmap_page(idx); + if (err != seL4_NoError) + error("unmap failed, idx=", idx, " res=", err); + + _leaf_cnode(idx.value()).remove(_leaf_cnode_entry(idx.value())); + + _sel_alloc.free(idx.value()); + + return true; + }; + + Lock::Guard guard(_lock); + + for (size_t i = 0; i < num_pages; i++) { + off_t const offset = i << get_page_size_log2(); + + _map_frame(from_phys + offset, guest_phys + offset, + cacheability, writable, executable, + flush_support, true /* guest page table */, + fn_unmap); + } + } + bool unmap(addr_t const virt, size_t const num_pages, bool const invalidate = false) { @@ -453,12 +486,20 @@ class Genode::Vm_space void unsynchronized_alloc_page_tables(addr_t const start, addr_t const size); + void unsynchronized_alloc_guest_page_tables(addr_t, addr_t); + void alloc_page_tables(addr_t const start, addr_t const size) { Lock::Guard guard(_lock); unsynchronized_alloc_page_tables(start, size); } + void alloc_guest_page_tables(addr_t const start, addr_t const size) + { + Lock::Guard guard(_lock); + unsynchronized_alloc_guest_page_tables(start, size); + } + Session_label const & pd_label() const { return _pd_label; } }; diff --git a/repos/base-sel4/src/core/platform.cc b/repos/base-sel4/src/core/platform.cc index 289fdaf09..f3d90f9ae 100644 --- a/repos/base-sel4/src/core/platform.cc +++ b/repos/base-sel4/src/core/platform.cc @@ -406,8 +406,11 @@ void Platform::_init_rom_modules() xml.node("hardware", [&] () { xml.node("features", [&] () { - xml.attribute("svm", false); + #ifdef CONFIG_VTX + xml.attribute("vmx", true); + #else xml.attribute("vmx", false); + #endif }); xml.node("tsc", [&] () { xml.attribute("freq_khz" , boot_freq->freq_mhz * 1000UL); diff --git a/repos/base-sel4/src/core/platform_pd.cc b/repos/base-sel4/src/core/platform_pd.cc index 2efe030b0..df4a068ad 100644 --- a/repos/base-sel4/src/core/platform_pd.cc +++ b/repos/base-sel4/src/core/platform_pd.cc @@ -45,7 +45,7 @@ struct Pd_id_alloc : Bit_allocator<1024> }; -static Pd_id_alloc &pd_id_alloc() +Bit_allocator<1024> &Platform_pd::pd_id_alloc() { static Pd_id_alloc inst; return inst; @@ -59,8 +59,10 @@ bool Platform_pd::bind_thread(Platform_thread &thread) thread._fault_handler_sel = alloc_sel(); /* allocate endpoint selector in the PD's CSpace */ thread._ep_sel = alloc_sel(); + thread._vcpu_sel = alloc_sel(); /* allocate asynchronous selector used for locks in the PD's CSpace */ thread._lock_sel = thread._utcb ? alloc_sel() : Cap_sel(INITIAL_SEL_LOCK); + thread._vcpu_notify_sel = alloc_sel(); } catch (Platform_pd::Sel_bit_alloc::Out_of_indices) { if (thread._fault_handler_sel.value()) { free_sel(thread._fault_handler_sel); @@ -70,6 +72,14 @@ bool Platform_pd::bind_thread(Platform_thread &thread) free_sel(thread._ep_sel); thread._ep_sel = Cap_sel(0); } + if (thread._vcpu_sel.value()) { + free_sel(thread._vcpu_sel); + thread._vcpu_sel = Cap_sel(0); + } + if (thread._vcpu_notify_sel.value()) { + free_sel(thread._vcpu_notify_sel); + thread._vcpu_notify_sel = Cap_sel(0); + } return false; } @@ -103,6 +113,8 @@ void Platform_pd::unbind_thread(Platform_thread &thread) free_sel(thread._fault_handler_sel); free_sel(thread._ep_sel); + free_sel(thread._vcpu_sel); + free_sel(thread._vcpu_notify_sel); if (thread._utcb) _vm_space.unmap(thread._utcb, 1); diff --git a/repos/base-sel4/src/core/platform_thread.cc b/repos/base-sel4/src/core/platform_thread.cc index c3185ccac..8a506a866 100644 --- a/repos/base-sel4/src/core/platform_thread.cc +++ b/repos/base-sel4/src/core/platform_thread.cc @@ -275,3 +275,19 @@ unsigned long long Platform_thread::execution_time() const uint64_t const execution_time = values[BENCHMARK_TCB_UTILISATION]; return execution_time; } + +void Platform_thread::setup_vcpu(Cap_sel ept, Cap_sel notification) +{ + if (!_info.init_vcpu(platform_specific(), ept)) { + Genode::error("creating vCPU failed"); + return; + } + + /* install the thread's endpoint selector to the PD's CSpace */ + _pd->cspace_cnode(_vcpu_sel).copy(platform_specific().core_cnode(), + _info.vcpu_sel, _vcpu_sel); + _pd->cspace_cnode(_vcpu_notify_sel).copy(platform_specific().core_cnode(), + notification, _vcpu_notify_sel); + + prepopulate_ipc_buffer(_info.ipc_buffer_phys, _vcpu_sel, _vcpu_notify_sel); +} diff --git a/repos/base-sel4/src/core/spec/arm/vm_space.cc b/repos/base-sel4/src/core/spec/arm/vm_space.cc index 2cda2c2fd..34f2f8342 100644 --- a/repos/base-sel4/src/core/spec/arm/vm_space.cc +++ b/repos/base-sel4/src/core/spec/arm/vm_space.cc @@ -28,7 +28,8 @@ long Genode::Vm_space::_map_page(Genode::Cap_sel const &idx, Genode::addr_t const virt, Cache_attribute const cacheability, bool const writable, - bool const executable) + bool const executable, + bool) { seL4_ARM_Page const service = _idx_to_sel(idx.value()); seL4_ARM_PageDirectory const pd = _pd_sel.value(); diff --git a/repos/base-sel4/src/core/spec/x86/platform_services.cc b/repos/base-sel4/src/core/spec/x86/platform_services.cc new file mode 100644 index 000000000..2c2ae3ec5 --- /dev/null +++ b/repos/base-sel4/src/core/spec/x86/platform_services.cc @@ -0,0 +1,35 @@ +/* + * \brief Platform specific services for seL4 x86 + * \author Alexander Boettcher + * \date 2018-08-26 + */ + +/* + * Copyright (C) 2018 Genode Labs GmbH + * + * This file is part of the Genode OS framework, which is distributed + * under the terms of the GNU Affero General Public License version 3. + */ + +/* core includes */ +#include +#include +#include +#include + +/* + * Add x86 specific services + */ +void Genode::platform_add_local_services(Rpc_entrypoint &ep, + Sliced_heap &heap, + Registry &services) +{ + static Vm_root vm_root(ep, heap, core_env().ram_allocator(), + core_env().local_rm()); + static Core_service vm(services, vm_root); + + static Io_port_root io_root(*core_env().pd_session(), + platform().io_port_alloc(), heap); + + static Core_service io_port(services, io_root); +} diff --git a/repos/base-sel4/src/core/spec/x86/platform_thread.cc b/repos/base-sel4/src/core/spec/x86/platform_thread.cc index 0cb80b7b3..246282ccb 100644 --- a/repos/base-sel4/src/core/spec/x86/platform_thread.cc +++ b/repos/base-sel4/src/core/spec/x86/platform_thread.cc @@ -12,9 +12,40 @@ */ #include +#include + +using Genode::Phys_allocator; +using Genode::Allocator; + +Phys_allocator& Genode::phys_alloc_16k(Allocator * core_mem_alloc) +{ + static Genode::Phys_allocator phys_alloc_16k(core_mem_alloc); + return phys_alloc_16k; +} void Genode::Platform_thread::affinity(Affinity::Location location) { _location = location; seL4_TCB_SetAffinity(tcb_sel().value(), location.xpos()); } + +bool Genode::Thread_info::init_vcpu(Platform &platform, Cap_sel ept) +{ + enum { PAGES_16K = (1UL << Vcpu_kobj::SIZE_LOG2) / 4096 }; + + this->vcpu_state_phys = Untyped_memory::alloc_pages(phys_alloc_16k(), PAGES_16K); + this->vcpu_sel = platform.core_sel_alloc().alloc(); + + seL4_Untyped const service = Untyped_memory::_core_local_sel(Core_cspace::TOP_CNODE_UNTYPED_16K, vcpu_state_phys, Vcpu_kobj::SIZE_LOG2).value(); + + create(service, platform.core_cnode().sel(), vcpu_sel); + seL4_Error res = seL4_X86_VCPU_SetTCB(vcpu_sel.value(), tcb_sel.value()); + if (res != seL4_NoError) + return false; + + int error = seL4_TCB_SetEPTRoot(tcb_sel.value(), ept.value()); + if (error != seL4_NoError) + return false; + + return true; +} diff --git a/repos/base-sel4/src/core/spec/x86/vm_session_component.cc b/repos/base-sel4/src/core/spec/x86/vm_session_component.cc new file mode 100644 index 000000000..fc8d995e1 --- /dev/null +++ b/repos/base-sel4/src/core/spec/x86/vm_session_component.cc @@ -0,0 +1,247 @@ +/* + * \brief Core-specific instance of the VM session interface + * \author Alexander Boettcher + * \date 2018-08-26 + */ + +/* + * Copyright (C) 2018 Genode Labs GmbH + * + * This file is part of the Genode OS framework, which is distributed + * under the terms of the GNU Affero General Public License version 3. + */ + +/* core includes */ +#include +#include +#include +#include +#include + +using namespace Genode; + +void Vm_session_component::Vcpu::_free_up() +{ + if (_ds_cap.valid()) + _ram_alloc.free(_ds_cap); + + if (_notification.value()) { + int ret = seL4_CNode_Delete(seL4_CapInitThreadCNode, + _notification.value(), 32); + if (ret == seL4_NoError) + platform_specific().core_sel_alloc().free(_notification); + else + Genode::error(__func__, " cnode delete error ", ret); + } +} + +Vm_session_component::Vcpu::Vcpu(Constrained_ram_allocator &ram_alloc, + Cap_quota_guard &cap_alloc, + Vcpu_id const vcpu_id, + seL4_Untyped const service) +: + _ram_alloc(ram_alloc), + _ds_cap (_ram_alloc.alloc(4096, Cache_attribute::CACHED)), + _vcpu_id(vcpu_id) +{ + try { + /* notification cap */ + Cap_quota_guard::Reservation caps(cap_alloc, Cap_quota{1}); + + _notification = platform_specific().core_sel_alloc().alloc(); + create(service, + platform_specific().core_cnode().sel(), + _notification); + + caps.acknowledge(); + } catch (...) { + _free_up(); + throw; + } +} + +Vm_session_component::Vm_session_component(Rpc_entrypoint &ep, + Resources resources, + Label const &, + Diag, + Ram_allocator &ram, + Region_map &local_rm) +try +: + Ram_quota_guard(resources.ram_quota), + Cap_quota_guard(resources.cap_quota), + _ep(ep), + _constrained_md_ram_alloc(ram, _ram_quota_guard(), _cap_quota_guard()), + _sliced_heap(_constrained_md_ram_alloc, local_rm), + _pd_id(Platform_pd::pd_id_alloc().alloc()), + _vm_page_table(platform_specific().core_sel_alloc().alloc()), + _vm_space(_vm_page_table, + platform_specific().core_sel_alloc(), + platform().ram_alloc(), + platform_specific().top_cnode(), + platform_specific().core_cnode(), + platform_specific().phys_cnode(), + _pd_id, _page_table_registry, "VM") +{ + Platform &platform = platform_specific(); + Range_allocator &phys_alloc = platform.ram_alloc(); + + /* _pd_id && _vm_page_table */ + Cap_quota_guard::Reservation caps(_cap_quota_guard(), Cap_quota{2}); + /* ept object requires a page taken directly from core's phys_alloc */ + /* notifications requires a page taken directly from core's phys_alloc */ + Ram_quota_guard::Reservation ram(_ram_quota_guard(), Ram_quota{2 * 4096}); + + try { + _ept._phys = Untyped_memory::alloc_page(phys_alloc); + _ept._service = Untyped_memory::untyped_sel(_ept._phys).value(); + + create(_ept._service, platform.core_cnode().sel(), + _vm_page_table); + } catch (...) { + throw Service_denied(); + } + + long ret = seL4_X86_ASIDPool_Assign(platform.asid_pool().value(), + _vm_page_table.value()); + if (ret != seL4_NoError) + throw Service_denied(); + + try { + _notifications._phys = Untyped_memory::alloc_page(phys_alloc); + _notifications._service = Untyped_memory::untyped_sel(_notifications._phys).value(); + } catch (...) { + throw Service_denied(); + } + + caps.acknowledge(); + ram.acknowledge(); +} catch (...) { + + if (_notifications._service) + Untyped_memory::free_page(platform().ram_alloc(), _notifications._phys); + + if (_ept._service) { + int ret = seL4_CNode_Delete(seL4_CapInitThreadCNode, + _vm_page_table.value(), 32); + if (ret == seL4_NoError) + Untyped_memory::free_page(platform().ram_alloc(), _ept._phys); + + if (ret != seL4_NoError) + error(__FUNCTION__, ": could not free ASID entry, " + "leaking physical memory ", ret); + } + + if (_vm_page_table.value()) + platform_specific().core_sel_alloc().free(_vm_page_table); + + if (_pd_id) + Platform_pd::pd_id_alloc().free(_pd_id); + + throw; +} + +Vm_session_component::~Vm_session_component() +{ + for (;Vcpu * vcpu = _vcpus.first();) { + _vcpus.remove(vcpu); + destroy(_sliced_heap, vcpu); + } + + if (_vm_page_table.value()) + platform_specific().core_sel_alloc().free(_vm_page_table); + + if (_pd_id) + Platform_pd::pd_id_alloc().free(_pd_id); +} + +void Vm_session_component::_create_vcpu(Thread_capability cap) +{ + if (!cap.valid()) + return; + + auto lambda = [&] (Cpu_thread_component *thread) { + if (!thread) + return; + + /* allocate vCPU object */ + Vcpu * vcpu = nullptr; + + /* code to revert partial allocations in case of Out_of_ram/_quota */ + auto free_up = [&] () { if (vcpu) destroy(_sliced_heap, vcpu); }; + + try { + vcpu = new (_sliced_heap) Vcpu(_constrained_md_ram_alloc, + _cap_quota_guard(), + Vcpu_id{_id_alloc}, + _notifications._service); + + Platform_thread &pthread = thread->platform_thread(); + pthread.setup_vcpu(_vm_page_table, vcpu->notification_cap()); + + int ret = seL4_TCB_BindNotification(pthread.tcb_sel().value(), + vcpu->notification_cap().value()); + if (ret != seL4_NoError) + throw 0; + } catch (Out_of_ram) { + free_up(); + throw; + } catch (Out_of_caps) { + free_up(); + throw; + } catch (...) { + Genode::error("unexpected exception occurred"); + free_up(); + return; + } + + _vcpus.insert(vcpu); + _id_alloc++; + }; + + _ep.apply(cap, lambda); +} + +Dataspace_capability Vm_session_component::_cpu_state(Vcpu_id const vcpu_id) +{ + Vcpu * vcpu = _lookup(vcpu_id); + if (!vcpu) + return Dataspace_capability(); + + return vcpu->ds_cap(); +} + +void Vm_session_component::attach(Dataspace_capability cap, addr_t guest_phys) +{ + if (!cap.valid()) + throw Invalid_dataspace(); + + /* check dataspace validity */ + _ep.apply(cap, [&] (Dataspace_component *ptr) { + if (!ptr) + throw Invalid_dataspace(); + + Dataspace_component &dsc = *ptr; + + /* unsupported - deny otherwise arbitrary physical memory can be mapped to a VM */ + if (dsc.managed()) + throw Invalid_dataspace(); + + _vm_space.alloc_guest_page_tables(guest_phys, dsc.size()); + + enum { FLUSHABLE = true, EXECUTABLE = true }; + _vm_space.map_guest(dsc.phys_addr(), guest_phys, dsc.size() >> 12, + dsc.cacheability(), + dsc.writable(), EXECUTABLE, FLUSHABLE); + + }); +} + +void Vm_session_component::_pause(Vcpu_id const vcpu_id) +{ + Vcpu * vcpu = _lookup(vcpu_id); + if (!vcpu) + return; + + vcpu->signal(); +} diff --git a/repos/base-sel4/src/core/spec/x86/vm_space.cc b/repos/base-sel4/src/core/spec/x86/vm_space.cc index 82b47b87a..f2ba3a91f 100644 --- a/repos/base-sel4/src/core/spec/x86/vm_space.cc +++ b/repos/base-sel4/src/core/spec/x86/vm_space.cc @@ -18,7 +18,7 @@ long Genode::Vm_space::_map_page(Genode::Cap_sel const &idx, Genode::addr_t const virt, Cache_attribute const cacheability, bool const writable, - bool const) + bool const, bool ept) { seL4_X86_Page const service = _idx_to_sel(idx.value()); seL4_X86_PageDirectory const pd = _pd_sel.value(); @@ -31,7 +31,10 @@ long Genode::Vm_space::_map_page(Genode::Cap_sel const &idx, if (cacheability == WRITE_COMBINED) attr = seL4_X86_WriteCombining; - return seL4_X86_Page_Map(service, pd, virt, rights, attr); + if (ept) + return seL4_X86_Page_MapEPT(service, pd, virt, rights, attr); + else + return seL4_X86_Page_Map(service, pd, virt, rights, attr); } long Genode::Vm_space::_unmap_page(Genode::Cap_sel const &idx) @@ -45,3 +48,92 @@ long Genode::Vm_space::_invalidate_page(Genode::Cap_sel const &, { return seL4_NoError; } + +/******* + * EPT * + *******/ + +enum { + EPT_PAGE_TABLE_LOG2_SIZE = 21, /* 2M region */ + EPT_PAGE_DIR_LOG2_SIZE = 30, /* 1GB region */ + EPT_PAGE_PDPT_LOG2_SIZE = 39 /* 512GB region */ +}; + +struct Ept_page_table_kobj +{ + enum { SEL4_TYPE = seL4_X86_EPTPTObject, SIZE_LOG2 = 12 }; + static char const *name() { return "ept page table"; } +}; + + +struct Ept_page_directory_kobj +{ + enum { SEL4_TYPE = seL4_X86_EPTPDObject, SIZE_LOG2 = 12 }; + static char const *name() { return "ept page directory"; } +}; + +struct Ept_page_pointer_table_kobj +{ + enum { SEL4_TYPE = seL4_X86_EPTPDPTObject, SIZE_LOG2 = 12 }; + static char const *name() { return "ept page directory pointer table"; } +}; + +struct Ept_page_map_kobj +{ + enum { SEL4_TYPE = seL4_X86_EPTPML4Object, SIZE_LOG2 = 12 }; + static char const *name() { return "ept page-map level-4 table"; } +}; + +static long map_page_table(Genode::Cap_sel const pagetable, + Genode::Cap_sel const vroot, + Genode::addr_t const virt) +{ + return seL4_X86_EPTPT_Map(pagetable.value(), vroot.value(), virt, + seL4_X86_Default_VMAttributes); +} + +static long map_pdpt(Genode::Cap_sel const pdpt, + Genode::Cap_sel const vroot, + Genode::addr_t const virt) +{ + return seL4_X86_EPTPDPT_Map(pdpt.value(), vroot.value(), virt, + seL4_X86_Default_VMAttributes); +} + +static long map_directory(Genode::Cap_sel const pd, + Genode::Cap_sel const vroot, + Genode::addr_t const virt) +{ + return seL4_X86_EPTPD_Map(pd.value(), vroot.value(), virt, + seL4_X86_Default_VMAttributes); +} + +void Genode::Vm_space::unsynchronized_alloc_guest_page_tables(addr_t const start, + addr_t size) +{ + addr_t constexpr PAGE_TABLE_AREA = 1UL << EPT_PAGE_TABLE_LOG2_SIZE; + addr_t virt = start & ~(PAGE_TABLE_AREA - 1); + for (; size != 0; size -= min(size, PAGE_TABLE_AREA), virt += PAGE_TABLE_AREA) { + addr_t phys = 0; + + if (!_page_table_registry.page_level3_at(virt, EPT_PAGE_PDPT_LOG2_SIZE)) { + /* 512 GB range - page directory pointer table */ + Cap_sel const pd = _alloc_and_map(virt, map_pdpt, phys); + _page_table_registry.insert_page_level3(virt, pd, phys, EPT_PAGE_PDPT_LOG2_SIZE); + } + + if (!_page_table_registry.page_directory_at(virt, EPT_PAGE_DIR_LOG2_SIZE)) { + /* 1 GB range - page directory */ + Cap_sel const pd = _alloc_and_map(virt, map_directory, phys); + _page_table_registry.insert_page_directory(virt, pd, phys, + EPT_PAGE_DIR_LOG2_SIZE); + } + + if (!_page_table_registry.page_table_at(virt, EPT_PAGE_TABLE_LOG2_SIZE)) { + /* 2 MB range - page table */ + Cap_sel const pt = _alloc_and_map(virt, map_page_table, phys); + _page_table_registry.insert_page_table(virt, pt, phys, + EPT_PAGE_TABLE_LOG2_SIZE); + } + } +} diff --git a/repos/base-sel4/src/core/spec/x86_32/arch_kernel_object.h b/repos/base-sel4/src/core/spec/x86_32/arch_kernel_object.h index 8e3eb3f72..1075b4003 100644 --- a/repos/base-sel4/src/core/spec/x86_32/arch_kernel_object.h +++ b/repos/base-sel4/src/core/spec/x86_32/arch_kernel_object.h @@ -15,9 +15,12 @@ #define _CORE__X86_32_ARCH_KERNEL_OBJECT_H_ #include +#include namespace Genode { + Phys_allocator &phys_alloc_16k(Allocator * core_mem_alloc = nullptr); + enum { PAGE_TABLE_LOG2_SIZE = 22 /* 4M region */ }; @@ -35,6 +38,17 @@ namespace Genode { static char const *name() { return "page directory"; } }; + struct Vcpu_kobj + { + enum { SEL4_TYPE = seL4_X86_VCPUObject, SIZE_LOG2 = 14 }; + static char const *name() { return "vcpu"; } + }; + + struct Ept_kobj + { + enum { SEL4_TYPE = seL4_X86_EPTPML4Object, SIZE_LOG = 12 }; + static char const *name() { return "ept pml4"; } + }; }; #endif /* _CORE__X86_32_ARCH_KERNEL_OBJECT_H_ */ diff --git a/repos/base-sel4/src/core/spec/x86_32/platform.cc b/repos/base-sel4/src/core/spec/x86_32/platform.cc index 2a99e782c..006aa2560 100644 --- a/repos/base-sel4/src/core/spec/x86_32/platform.cc +++ b/repos/base-sel4/src/core/spec/x86_32/platform.cc @@ -18,6 +18,7 @@ #include #include +#include #include "arch_kernel_object.h" seL4_Word Genode::Untyped_memory::smallest_page_type() { return seL4_X86_4K; } @@ -50,6 +51,21 @@ void Genode::Platform::_init_core_page_table_registry() virt_addr += 1024 * get_page_size(); } + /* initialize 16k memory allocator */ + phys_alloc_16k(&core_mem_alloc()); + + /* reserve some memory for VCPUs - must be 16k */ + enum { MAX_VCPU_COUNT = 16 }; + addr_t const max_pd_mem = MAX_VCPU_COUNT * (1UL << Vcpu_kobj::SIZE_LOG2); + + _initial_untyped_pool.turn_into_untyped_object(Core_cspace::TOP_CNODE_UNTYPED_16K, + [&] (addr_t const phys, addr_t const size, bool) { + phys_alloc_16k().add_range(phys, size); + _unused_phys_alloc.remove_range(phys, size); + }, + Vcpu_kobj::SIZE_LOG2, max_pd_mem); + + log(":phys_mem_16k: ", phys_alloc_16k()); /* * Register initial page frames * - actually we don't use them in core -> skip @@ -70,5 +86,3 @@ void Genode::Platform::_init_core_page_table_registry() } #endif } - - diff --git a/repos/base-sel4/src/core/spec/x86_64/arch_kernel_object.h b/repos/base-sel4/src/core/spec/x86_64/arch_kernel_object.h index 376412144..bc3ce04ae 100644 --- a/repos/base-sel4/src/core/spec/x86_64/arch_kernel_object.h +++ b/repos/base-sel4/src/core/spec/x86_64/arch_kernel_object.h @@ -15,9 +15,12 @@ #define _CORE__X86_64_ARCH_KERNEL_OBJECT_H_ #include +#include namespace Genode { + Phys_allocator &phys_alloc_16k(Allocator * core_mem_alloc = nullptr); + enum { PAGE_TABLE_LOG2_SIZE = 21, /* 2M region */ PAGE_DIR_LOG2_SIZE = 30, /* 1GB region */ @@ -48,6 +51,18 @@ namespace Genode { enum { SEL4_TYPE = seL4_X64_PML4Object, SIZE_LOG2 = 12 }; static char const *name() { return "page-map level-4 table"; } }; + + struct Vcpu_kobj + { + enum { SEL4_TYPE = seL4_X86_VCPUObject, SIZE_LOG2 = 14 }; + static char const *name() { return "vcpu"; } + }; + + struct Ept_kobj + { + enum { SEL4_TYPE = seL4_X86_EPTPML4Object, SIZE_LOG = 12 }; + static char const *name() { return "ept pml4"; } + }; }; #endif /* _CORE__X86_64_ARCH_KERNEL_OBJECT_H_ */ diff --git a/repos/base-sel4/src/core/spec/x86_64/platform.cc b/repos/base-sel4/src/core/spec/x86_64/platform.cc index 0db4b4440..230cd6c38 100644 --- a/repos/base-sel4/src/core/spec/x86_64/platform.cc +++ b/repos/base-sel4/src/core/spec/x86_64/platform.cc @@ -18,8 +18,10 @@ #include #include +#include #include "arch_kernel_object.h" + seL4_Word Genode::Untyped_memory::smallest_page_type() { return seL4_X86_4K; } void Genode::Platform::init_sel4_ipc_buffer() { } @@ -59,6 +61,22 @@ void Genode::Platform::_init_core_page_table_registry() virt_addr += 512 * get_page_size(); } + /* initialize 16k memory allocator */ + phys_alloc_16k(&core_mem_alloc()); + + /* reserve some memory for VCPUs - must be 16k */ + enum { MAX_VCPU_COUNT = 16 }; + addr_t const max_pd_mem = MAX_VCPU_COUNT * (1UL << Vcpu_kobj::SIZE_LOG2); + + _initial_untyped_pool.turn_into_untyped_object(Core_cspace::TOP_CNODE_UNTYPED_16K, + [&] (addr_t const phys, addr_t const size, bool) { + phys_alloc_16k().add_range(phys, size); + _unused_phys_alloc.remove_range(phys, size); + }, + Vcpu_kobj::SIZE_LOG2, max_pd_mem); + + log(":phys_mem_16k: ", phys_alloc_16k()); + /* * Register initial page frames * - actually we don't use them in core -> skip diff --git a/repos/base-sel4/src/include/base/internal/native_utcb.h b/repos/base-sel4/src/include/base/internal/native_utcb.h index 0f6732be6..a38f761f8 100644 --- a/repos/base-sel4/src/include/base/internal/native_utcb.h +++ b/repos/base-sel4/src/include/base/internal/native_utcb.h @@ -24,17 +24,17 @@ struct Genode::Native_utcb * On seL4, the UTCB is called IPC buffer. We use one page * for each IPC buffer. */ - enum { IPC_BUFFER_SIZE = 4096 }; + enum { IPC_BUFFER_SIZE = 4096, ELEMENTS = IPC_BUFFER_SIZE/sizeof(addr_t) }; - addr_t _raw[IPC_BUFFER_SIZE/sizeof(addr_t)]; + addr_t _raw[ELEMENTS]; Native_utcb() { }; - addr_t ep_sel() const { return _raw[0]; } - addr_t lock_sel() const { return _raw[1]; } + addr_t ep_sel() const { return _raw[ELEMENTS - 1]; } + addr_t lock_sel() const { return _raw[ELEMENTS - 2]; } - void ep_sel (addr_t sel) { _raw[0] = sel; } - void lock_sel(addr_t sel) { _raw[1] = sel; } + void ep_sel (addr_t sel) { _raw[ELEMENTS - 1] = sel; } + void lock_sel(addr_t sel) { _raw[ELEMENTS - 2] = sel; } }; #endif /* _INCLUDE__BASE__INTERNAL__NATIVE_UTCB_H_ */ diff --git a/repos/base-sel4/src/lib/base/x86/vm_session.cc b/repos/base-sel4/src/lib/base/x86/vm_session.cc new file mode 100644 index 000000000..4af700958 --- /dev/null +++ b/repos/base-sel4/src/lib/base/x86/vm_session.cc @@ -0,0 +1,845 @@ +/* + * \brief Client-side VM session interface + * \author Alexander Boettcher + * \date 2018-08-27 + */ + +/* + * Copyright (C) 2018 Genode Labs GmbH + * + * This file is part of the Genode OS framework, which is distributed + * under the terms of the GNU Affero General Public License version 3. + */ + +#include +#include +#include +#include + +#include + +#include + +#include +#include + +#include +#include + +#include +using namespace Genode; + +struct Vcpu; + +static Genode::Registry > vcpus; +static unsigned vcpu_id = 0; + +struct Vcpu : Genode::Thread +{ + private: + + Signal_context_capability &_signal; + Semaphore _wake_up { 0 }; + Semaphore &_handler_ready; + Lock _startup { Genode::Lock::LOCKED }; + Vm_session_client::Vcpu_id _id; + addr_t _state { 0 }; + addr_t _recall { 0 }; + uint64_t _tsc_offset { 0 }; + + bool _show_error_unsupported_r { true }; + bool _show_error_unsupported_tpr { true }; + bool _show_error_unsupported_star { true }; + + enum { EXIT_ON_HLT = 1U << 7, EXIT_ON_RDTSC = 1U << 12 }; + + addr_t const _vmcs_ctrl0 = EXIT_ON_HLT; // | EXIT_ON_RDTSC; + + enum { STACK_SIZE = 0x3000 }; + + enum State { + NONE = 0, + PAUSE = 1, + RUN = 2 + } _remote { NONE }; + Lock _remote_lock { Lock::UNLOCKED }; + + enum { + VMEXIT_INVALID = 0x21, + VMEXIT_STARTUP = 0xfe, + VMEXIT_RECALL = 0xff + }; + + enum { + CR0_PE = 0, /* 1U << 0 - not needed in case of UG */ + CR0_CP = 1U << 1, + CR0_NE = 1U << 5, + CR0_NM = 1U << 29, + CR0_CD = 1U << 30, + CR0_PG = 0, /* 1U << 31 - not needed in case of UG */ + + CR4_VMX = 1 << 13, + }; + + addr_t const cr0_mask = CR0_PE | CR0_CP | CR0_NE | CR0_NM | CR0_CD | CR0_PG; + addr_t const cr0_set = 0; + addr_t const cr4_mask = CR4_VMX; + addr_t const cr4_set = CR4_VMX; + + void entry() override + { + /* trigger that thread is up */ + _startup.unlock(); + + /* wait until vcpu is assigned to us */ + _wake_up.down(); + + /* get selector to read/write VMCS */ + addr_t const service = _stack->utcb().ep_sel(); + /* get selector to call back a vCPU into VMM */ + _recall = _stack->utcb().lock_sel(); + + Vm_state &state = *reinterpret_cast(_state); + state = Vm_state {}; + + /* wait for first user resume() */ + _wake_up.down(); + + { + Lock::Guard guard(_remote_lock); + _remote = NONE; + } + + /* initial startup VM exit to get valid VM state */ + state.exit_reason = VMEXIT_STARTUP; + _read_sel4_state_async(service, state); + + Genode::Signal_transmitter(_signal).submit(); + + _handler_ready.down(); + _wake_up.down(); + + State local_state { NONE }; + + _write_vmcs(service, Vmcs::CR0_MASK, cr0_mask); + _write_vmcs(service, Vmcs::CR4_MASK, cr4_mask); + + while (true) { + /* read in requested state from remote threads */ + { + Lock::Guard guard(_remote_lock); + + local_state = _remote; + _remote = NONE; + } + + if (local_state == NONE) { + _wake_up.down(); + continue; + } + + if (local_state == PAUSE) { + + _write_sel4_state(service, state); + + seL4_Word badge = 0; + /* consume spurious notification - XXX better way ? */ + seL4_SetMR(0, state.ip.value()); + seL4_SetMR(1, _vmcs_ctrl0 | state.ctrl_primary.value()); + seL4_SetMR(2, state.inj_info.value() & ~0x3000); + if (seL4_VMEnter(&badge) == SEL4_VMENTER_RESULT_FAULT) + Genode::error("invalid state ahead ", badge); + + state = Vm_state {}; + + state.ip.value(seL4_GetMR(SEL4_VMENTER_CALL_EIP_MR)); + state.ctrl_primary.value(seL4_GetMR(SEL4_VMENTER_CALL_CONTROL_PPC_MR)); + state.inj_info.value(seL4_GetMR(SEL4_VMENTER_CALL_CONTROL_ENTRY_MR)); + + state.exit_reason = VMEXIT_RECALL; + _read_sel4_state_async(service, state); + + /* notify VM handler */ + Genode::Signal_transmitter(_signal).submit(); + + /* + * Wait until VM handler is really really done, + * otherwise we lose state. + */ + _handler_ready.down(); + continue; + } + + if (local_state != RUN) { + Genode::error("unknown vcpu state ", (int)local_state); + while (true) { _remote_lock.lock(); } + } + + _write_sel4_state(service, state); + + seL4_SetMR(0, state.ip.value()); + seL4_SetMR(1, _vmcs_ctrl0 | state.ctrl_primary.value()); + seL4_SetMR(2, state.inj_info.value() & ~0x3000); + + seL4_Word badge = 0; + seL4_Word res = seL4_VMEnter(&badge); + + state = Vm_state {}; + + if (res == SEL4_VMENTER_RESULT_FAULT) { + state.ip.value(seL4_GetMR(SEL4_VMENTER_CALL_EIP_MR)); + state.ctrl_primary.value(seL4_GetMR(SEL4_VMENTER_CALL_CONTROL_PPC_MR)); + state.exit_reason = seL4_GetMR(SEL4_VMENTER_FAULT_REASON_MR); + + state.ip_len.value(seL4_GetMR(SEL4_VMENTER_FAULT_INSTRUCTION_LEN_MR)); + state.qual_primary.value(seL4_GetMR(SEL4_VMENTER_FAULT_QUALIFICATION_MR)); + state.qual_secondary.value(seL4_GetMR(SEL4_VMENTER_FAULT_GUEST_PHYSICAL_MR)); + + state.flags.value(seL4_GetMR(SEL4_VMENTER_FAULT_RFLAGS_MR)); + state.intr_state.value(seL4_GetMR(SEL4_VMENTER_FAULT_GUEST_INT_MR)); + state.cr3.value(seL4_GetMR(SEL4_VMENTER_FAULT_CR3_MR)); + + state.ax.value(seL4_GetMR(SEL4_VMENTER_FAULT_EAX)); + state.bx.value(seL4_GetMR(SEL4_VMENTER_FAULT_EBX)); + state.cx.value(seL4_GetMR(SEL4_VMENTER_FAULT_ECX)); + state.dx.value(seL4_GetMR(SEL4_VMENTER_FAULT_EDX)); + state.si.value(seL4_GetMR(SEL4_VMENTER_FAULT_ESI)); + state.di.value(seL4_GetMR(SEL4_VMENTER_FAULT_EDI)); + state.bp.value(seL4_GetMR(SEL4_VMENTER_FAULT_EBP)); + + _read_sel4_state(service, state); + } else { + state.ip.value(seL4_GetMR(SEL4_VMENTER_CALL_EIP_MR)); + state.ctrl_primary.value(seL4_GetMR(SEL4_VMENTER_CALL_CONTROL_PPC_MR)); + /* what about the other GPR stuff ? XXX */ + + state.exit_reason = VMEXIT_RECALL; + _read_sel4_state_async(service, state); + + Lock::Guard guard(_remote_lock); + if (_remote == PAUSE) { + _remote = NONE; + _wake_up.down(); + } + } + + /* notify VM handler */ + Genode::Signal_transmitter(_signal).submit(); + + /* + * Wait until VM handler is really really done, + * otherwise we lose state. + */ + _handler_ready.down(); + } + } + + enum Vmcs { + IRQ_WINDOW = 1U << 2, + + CR0 = 0x6800, + CR0_MASK = 0x6000, + CR0_SHADOW = 0x6004, + + CR3 = 0x6802, + + CR4 = 0x6804, + CR4_MASK = 0x6002, + CR4_SHADOW = 0x6006, + + DR7 = 0x681a, + + RFLAGS = 0x6820, + + RSP = 0x681c, + RIP = 0x681e, + + INST_LEN = 0x440c, + + EFER = 0x2806, + + CTRL_0 = 0x4002, + CTRL_1 = 0x401e, + + CS_SEL = 0x0802, + CS_LIMIT = 0x4802, + CS_AR = 0x4816, + CS_BASE = 0x6808, + + SS_SEL = 0x0804, + SS_LIMIT = 0x4804, + SS_AR = 0x4818, + SS_BASE = 0x680a, + + ES_SEL = 0x0800, + ES_LIMIT = 0x4800, + ES_AR = 0x4814, + ES_BASE = 0x6806, + + DS_SEL = 0x0806, + DS_LIMIT = 0x4806, + DS_AR = 0x481a, + DS_BASE = 0x680c, + + FS_SEL = 0x0808, + FS_LIMIT = 0x4808, + FS_AR = 0x481c, + FS_BASE = 0x680e, + + GS_SEL = 0x080a, + GS_LIMIT = 0x480a, + GS_AR = 0x481e, + GS_BASE = 0x6810, + + LDTR_SEL = 0x080c, + LDTR_LIMIT = 0x480c, + LDTR_AR = 0x4820, + LDTR_BASE = 0x6812, + + TR_SEL = 0x080e, + TR_LIMIT = 0x480e, + TR_AR = 0x4822, + TR_BASE = 0x6814, + + IDTR_LIMIT = 0x4812, + IDTR_BASE = 0x6818, + + GDTR_LIMIT = 0x4810, + GDTR_BASE = 0x6816, + + PDPTE_0 = 0x280a, + PDPTE_1 = 0x280c, + PDPTE_2 = 0x280e, + PDPTE_3 = 0x2810, + + SYSENTER_CS = 0x482a, + SYSENTER_SP = 0x6824, + SYSENTER_IP = 0x6826, + + STATE_INTR = 0x4824, + STATE_ACTV = 0x4826, + + INTR_INFO = 0x4016, + INTR_ERROR = 0x4018, + + IDT_INFO = 0x4408, + IDT_ERROR = 0x440a, + + TSC_OFF_LO = 0x2010, + TSC_OFF_HI = 0x2011, + }; + + void _write_vmcs(seL4_X86_VCPU const service, enum Vmcs const field, + seL4_Word const value) + { + seL4_X86_VCPU_WriteVMCS_t res; + res = seL4_X86_VCPU_WriteVMCS(service, field, value); + if (res.error != seL4_NoError) + Genode::error("field ", Hex(field), " - ", res.error, + " ", res.written); + } + + void _write_gpr(seL4_X86_VCPU const service, Vm_state &state) + { + seL4_VCPUContext regs; + regs.eax = state.ax.value(); + regs.ebx = state.bx.value(); + regs.ecx = state.cx.value(); + regs.edx = state.dx.value(); + regs.esi = state.si.value(); + regs.edi = state.di.value(); + regs.ebp = state.bp.value(); + + seL4_Error res = seL4_X86_VCPU_WriteRegisters(service, ®s); + if (res != seL4_NoError) + Genode::error("setting general purpose register failed ", + (int)res); + } + + /* + * Convert to Intel format comprising 32 bits. + */ + addr_t _convert_ar(addr_t value) { + return ((value << 4) & 0x1f000) | (value & 0xff); } + + /* + * Convert to AMD (and Genode) format comprising 16 bits. + */ + uint16_t _convert_ar_16(addr_t value) { + return ((value & 0x1f000) >> 4) | (value & 0xff); } + + void _write_sel4_state(seL4_X86_VCPU const service, Vm_state &state) + { + if (state.ax.valid() || state.cx.valid() || + state.dx.valid() || state.bx.valid() || + state.bp.valid() || state.di.valid() || + state.si.valid()) { + /* XXX read first all values and write back only the changed ones ... */ + _write_gpr(service, state); + } + + if (state.r8.valid() || state.r9.valid() || + state.r10.valid() || state.r11.valid() || + state.r12.valid() || state.r13.valid() || + state.r14.valid() || state.r15.valid()) + { + if (_show_error_unsupported_r) + { + _show_error_unsupported_r = false; + Genode::error("registers r8-15 not supported by seL4"); + } + } + + if (state.tsc.valid() || state.tsc_offset.valid()) + { + _tsc_offset += state.tsc_offset.value(); + /* not supported by seL4 */ +#if 0 + _write_vmcs(service, Vmcs::TSC_OFF_LO, _tsc_offset & 0xffffffffu); + _write_vmcs(service, Vmcs::TSC_OFF_HI, (_tsc_offset >> 32) & 0xffffffffu); +#endif + } + + if (state.star.valid() || state.lstar.valid() || + state.fmask.valid() || state.kernel_gs_base.valid()) + { + if (_show_error_unsupported_star) { + _show_error_unsupported_star = false; + Genode::error("star, lstar, fmask, gs_base not supported by seL4"); + } + } + + if (state.tpr.valid() || state.tpr_threshold.valid()) + { + if (_show_error_unsupported_tpr) + { + _show_error_unsupported_tpr = false; + Genode::error("tpr* not supported by seL4"); + } + } + + if (state.dr7.valid()) + _write_vmcs(service, Vmcs::DR7, state.dr7.value()); + + if (state.cr0.valid()) { + _write_vmcs(service, Vmcs::CR0, cr0_set | (~cr0_mask & state.cr0.value())); + _write_vmcs(service, Vmcs::CR0_SHADOW, state.cr0.value()); + } + + /* not supported on seL4 - state.cr2.valid() */ + + if (state.cr3.valid()) + _write_vmcs(service, Vmcs::CR3, state.cr3.value()); + + if (state.cr4.valid()) { + _write_vmcs(service, Vmcs::CR4, cr4_set | (~cr4_mask & state.cr4.value())); + _write_vmcs(service, Vmcs::CR4_SHADOW, state.cr4.value()); + } + + if (state.inj_info.valid()) { + addr_t ctrl_0 = _read_vmcs(service, Vmcs::CTRL_0); + + if (state.inj_info.value() & 0x2000) + Genode::warning("inj_info for NMI not supported"); + + if (state.inj_info.value() & 0x1000) + ctrl_0 |= Vmcs::IRQ_WINDOW; + else + ctrl_0 &= ~Vmcs::IRQ_WINDOW; + + state.ctrl_primary.value(ctrl_0); + } + + if (state.inj_error.valid()) { + /* not supported by seL4 */ + //_write_vmcs(service, Vmcs::INTR_ERROR, state.inj_error.value()); + } + + if (state.flags.valid()) + _write_vmcs(service, Vmcs::RFLAGS, state.flags.value()); + + if (state.sp.valid()) + _write_vmcs(service, Vmcs::RSP, state.sp.value()); + + if (state.ip.valid()) + _write_vmcs(service, Vmcs::RIP, state.ip.value()); + + if (state.efer.valid()) + _write_vmcs(service, Vmcs::EFER, state.efer.value()); + + /* state.ctrl_primary.valid() update on vmenter - see above */ + + if (state.ctrl_secondary.valid()) + _write_vmcs(service, Vmcs::CTRL_1, state.ctrl_secondary.value()); + + if (state.intr_state.valid()) + _write_vmcs(service, Vmcs::STATE_INTR, state.intr_state.value()); + + if (state.actv_state.valid()) + _write_vmcs(service, Vmcs::STATE_ACTV, state.actv_state.value()); + + if (state.cs.valid()) { + _write_vmcs(service, Vmcs::CS_SEL, state.cs.value().sel); + _write_vmcs(service, Vmcs::CS_LIMIT, state.cs.value().limit); + _write_vmcs(service, Vmcs::CS_AR, _convert_ar(state.cs.value().ar)); + _write_vmcs(service, Vmcs::CS_BASE, state.cs.value().base); + } + + if (state.ss.valid()) { + _write_vmcs(service, Vmcs::SS_SEL, state.ss.value().sel); + _write_vmcs(service, Vmcs::SS_LIMIT, state.ss.value().limit); + _write_vmcs(service, Vmcs::SS_AR, _convert_ar(state.ss.value().ar)); + _write_vmcs(service, Vmcs::SS_BASE, state.ss.value().base); + } + + if (state.es.valid()) { + _write_vmcs(service, Vmcs::ES_SEL, state.es.value().sel); + _write_vmcs(service, Vmcs::ES_LIMIT, state.es.value().limit); + _write_vmcs(service, Vmcs::ES_AR, _convert_ar(state.es.value().ar)); + _write_vmcs(service, Vmcs::ES_BASE, state.es.value().base); + } + + if (state.ds.valid()) { + _write_vmcs(service, Vmcs::DS_SEL, state.ds.value().sel); + _write_vmcs(service, Vmcs::DS_LIMIT, state.ds.value().limit); + _write_vmcs(service, Vmcs::DS_AR, _convert_ar(state.ds.value().ar)); + _write_vmcs(service, Vmcs::DS_BASE, state.ds.value().base); + } + + if (state.fs.valid()) { + _write_vmcs(service, Vmcs::FS_SEL, state.fs.value().sel); + _write_vmcs(service, Vmcs::FS_LIMIT, state.fs.value().limit); + _write_vmcs(service, Vmcs::FS_AR, _convert_ar(state.fs.value().ar)); + _write_vmcs(service, Vmcs::FS_BASE, state.fs.value().base); + } + + if (state.gs.valid()) { + _write_vmcs(service, Vmcs::GS_SEL, state.gs.value().sel); + _write_vmcs(service, Vmcs::GS_LIMIT, state.gs.value().limit); + _write_vmcs(service, Vmcs::GS_AR, _convert_ar(state.gs.value().ar)); + _write_vmcs(service, Vmcs::GS_BASE, state.gs.value().base); + } + + if (state.tr.valid()) { + _write_vmcs(service, Vmcs::TR_SEL, state.tr.value().sel); + _write_vmcs(service, Vmcs::TR_LIMIT, state.tr.value().limit); + _write_vmcs(service, Vmcs::TR_AR, _convert_ar(state.tr.value().ar)); + _write_vmcs(service, Vmcs::TR_BASE, state.tr.value().base); + } + + if (state.ldtr.valid()) { + _write_vmcs(service, Vmcs::LDTR_SEL, state.ldtr.value().sel); + _write_vmcs(service, Vmcs::LDTR_LIMIT, state.ldtr.value().limit); + _write_vmcs(service, Vmcs::LDTR_AR, _convert_ar(state.ldtr.value().ar)); + _write_vmcs(service, Vmcs::LDTR_BASE, state.ldtr.value().base); + } + + if (state.idtr.valid()) { + _write_vmcs(service, Vmcs::IDTR_BASE, state.idtr.value().base); + _write_vmcs(service, Vmcs::IDTR_LIMIT, state.idtr.value().limit); + } + + if (state.gdtr.valid()) { + _write_vmcs(service, Vmcs::GDTR_BASE, state.gdtr.value().base); + _write_vmcs(service, Vmcs::GDTR_LIMIT, state.gdtr.value().limit); + } + + if (state.pdpte_0.valid()) + _write_vmcs(service, Vmcs::PDPTE_0, state.pdpte_0.value()); + + if (state.pdpte_1.valid()) + _write_vmcs(service, Vmcs::PDPTE_1, state.pdpte_1.value()); + + if (state.pdpte_2.valid()) + _write_vmcs(service, Vmcs::PDPTE_2, state.pdpte_2.value()); + + if (state.pdpte_3.valid()) + _write_vmcs(service, Vmcs::PDPTE_3, state.pdpte_3.value()); + + if (state.sysenter_cs.valid()) + _write_vmcs(service, Vmcs::SYSENTER_CS, state.sysenter_cs.value()); + + if (state.sysenter_sp.valid()) + _write_vmcs(service, Vmcs::SYSENTER_SP, state.sysenter_sp.value()); + + if (state.sysenter_ip.valid()) + _write_vmcs(service, Vmcs::SYSENTER_IP, state.sysenter_ip.value()); + } + + seL4_Word _read_vmcs(seL4_X86_VCPU const service, enum Vmcs const field) + { + seL4_X86_VCPU_ReadVMCS_t res; + res = seL4_X86_VCPU_ReadVMCS(service, field); + if (res.error != seL4_NoError) + Genode::error("field ", Hex(field), " - ", res.error); + return res.value; + } + + template + T _read_vmcsX(seL4_X86_VCPU const service, enum Vmcs const field) + { + seL4_X86_VCPU_ReadVMCS_t res; + res = seL4_X86_VCPU_ReadVMCS(service, field); + if (res.error != seL4_NoError) + Genode::error("field ", Hex(field), " - ", res.error); + return (T)(res.value); + } + + uint16_t _read_vmcs_16(seL4_X86_VCPU const service, enum Vmcs const field) { + return _read_vmcsX(service, field); } + + uint16_t _read_vmcs_32(seL4_X86_VCPU const service, enum Vmcs const field) { + return _read_vmcsX(service, field); } + + void _read_sel4_state_async(seL4_X86_VCPU const service, Vm_state &state) + { + state.ax.value(state.ax.value()); /* XXX ? */ + state.cx.value(state.cx.value()); + state.dx.value(state.dx.value()); + state.bx.value(state.bx.value()); + + state.di.value(state.di.value()); /* XXX ? */ + state.si.value(state.si.value()); + state.bp.value(state.bp.value()); + + state.flags.value(_read_vmcs(service, Vmcs::RFLAGS)); + + state.ip.value(_read_vmcs(service, Vmcs::RIP)); + state.ip_len.value(_read_vmcs(service, Vmcs::INST_LEN)); + + state.cr3.value(_read_vmcs(service, Vmcs::CR3)); + + state.qual_primary.value(state.qual_primary.value()); /* XXX ? */ + state.qual_secondary.value(state.qual_secondary.value()); /* XXX ? */ + + state.ctrl_primary.value(_read_vmcs(service, Vmcs::CTRL_0)); + + _read_sel4_state(service, state); + } + + void _read_sel4_state(seL4_X86_VCPU const service, Vm_state &state) + { + state.sp.value(_read_vmcs(service, Vmcs::RSP)); + state.dr7.value(_read_vmcs(service, Vmcs::DR7)); + + /* r8 - r15 not supported on seL4 */ + + { + addr_t const cr0 = _read_vmcs(service, Vmcs::CR0); + addr_t const cr0_shadow = _read_vmcs(service, Vmcs::CR0_SHADOW); + state.cr0.value((cr0 & ~cr0_mask) | (cr0_shadow & cr0_mask)); + + if (state.cr0.value() != cr0_shadow) { + Genode::error("reset cr0_shadow to cr0 ", Genode::Hex(cr0), " ", Genode::Hex(cr0_shadow), "->", Genode::Hex(state.cr0.value())); + _write_vmcs(service, Vmcs::CR0_SHADOW, state.cr0.value()); + } + } + + /* cr2 not supported on seL4 */ + state.cr2.value(state.cr2.value()); + + { + addr_t const cr4 = _read_vmcs(service, Vmcs::CR4); + addr_t const cr4_shadow = _read_vmcs(service, Vmcs::CR4_SHADOW); + state.cr4.value((cr4 & ~cr4_mask) | (cr4_shadow & cr4_mask)); + + if (state.cr4.value() != cr4_shadow) { + Genode::error("reset cr4_shadow to cr4 ", Genode::Hex(cr4), " ", Genode::Hex(cr4_shadow), "->", Genode::Hex(state.cr4.value())); + _write_vmcs(service, Vmcs::CR4_SHADOW, state.cr4.value()); + } + } + + typedef Genode::Vm_state::Segment Segment; + typedef Genode::Vm_state::Range Range; + + state.cs.value(Segment{_read_vmcs_16(service, Vmcs::CS_SEL), + _convert_ar_16(_read_vmcs(service, Vmcs::CS_AR)), + _read_vmcs_32(service, Vmcs::CS_LIMIT), + _read_vmcs(service, Vmcs::CS_BASE)}); + + state.ss.value(Segment{_read_vmcs_16(service, Vmcs::SS_SEL), + _convert_ar_16(_read_vmcs(service, Vmcs::SS_AR)), + _read_vmcs_32(service, Vmcs::SS_LIMIT), + _read_vmcs(service, Vmcs::SS_BASE)}); + + state.es.value(Segment{_read_vmcs_16(service, Vmcs::ES_SEL), + _convert_ar_16(_read_vmcs(service, Vmcs::ES_AR)), + _read_vmcs_32(service, Vmcs::ES_LIMIT), + _read_vmcs(service, Vmcs::ES_BASE)}); + + state.ds.value(Segment{_read_vmcs_16(service, Vmcs::DS_SEL), + _convert_ar_16(_read_vmcs(service, Vmcs::DS_AR)), + _read_vmcs_32(service, Vmcs::DS_LIMIT), + _read_vmcs(service, Vmcs::DS_BASE)}); + + state.fs.value(Segment{_read_vmcs_16(service, Vmcs::FS_SEL), + _convert_ar_16(_read_vmcs(service, Vmcs::FS_AR)), + _read_vmcs_32(service, Vmcs::FS_LIMIT), + _read_vmcs(service, Vmcs::FS_BASE)}); + + state.gs.value(Segment{_read_vmcs_16(service, Vmcs::GS_SEL), + _convert_ar_16(_read_vmcs(service, Vmcs::GS_AR)), + _read_vmcs_32(service, Vmcs::GS_LIMIT), + _read_vmcs(service, Vmcs::GS_BASE)}); + + state.tr.value(Segment{_read_vmcs_16(service, Vmcs::TR_SEL), + _convert_ar_16(_read_vmcs(service, Vmcs::TR_AR)), + _read_vmcs_32(service, Vmcs::TR_LIMIT), + _read_vmcs(service, Vmcs::TR_BASE)}); + + state.ldtr.value(Segment{_read_vmcs_16(service, Vmcs::LDTR_SEL), + _convert_ar_16(_read_vmcs(service, Vmcs::LDTR_AR)), + _read_vmcs_32(service, Vmcs::LDTR_LIMIT), + _read_vmcs(service, Vmcs::LDTR_BASE)}); + + state.idtr.value(Range{_read_vmcs(service, Vmcs::IDTR_BASE), + _read_vmcs_32(service, Vmcs::IDTR_LIMIT)}); + + state.gdtr.value(Range{_read_vmcs(service, Vmcs::GDTR_BASE), + _read_vmcs_32(service, Vmcs::GDTR_LIMIT)}); + + state.sysenter_cs.value(_read_vmcs(service, Vmcs::SYSENTER_CS)); + state.sysenter_sp.value(_read_vmcs(service, Vmcs::SYSENTER_SP)); + state.sysenter_ip.value(_read_vmcs(service, Vmcs::SYSENTER_IP)); + + /* no support by seL4 to read this value */ + state.ctrl_secondary.value(state.ctrl_secondary.value()); + //state.ctrl_secondary.value(_read_vmcs(service, Vmcs::CTRL_1)); + + if (state.exit_reason == VMEXIT_INVALID || + state.exit_reason == VMEXIT_RECALL) + { + state.inj_info.value(_read_vmcs(service, Vmcs::INTR_INFO)); + /* no support by seL4 to read this value */ + state.inj_error.value(0); + //state.inj_error.value(_read_vmcs(service, Vmcs::INTR_ERROR)); + } else { + state.inj_info.value(_read_vmcs(service, Vmcs::IDT_INFO)); + state.inj_error.value(_read_vmcs(service, Vmcs::IDT_ERROR)); + } + + state.intr_state.value(_read_vmcs(service, Vmcs::STATE_INTR)); + state.actv_state.value(_read_vmcs(service, Vmcs::STATE_ACTV)); + + state.pdpte_0.value(_read_vmcs(service, Vmcs::PDPTE_0)); + state.pdpte_1.value(_read_vmcs(service, Vmcs::PDPTE_1)); + state.pdpte_2.value(_read_vmcs(service, Vmcs::PDPTE_2)); + state.pdpte_3.value(_read_vmcs(service, Vmcs::PDPTE_3)); + + /* tsc and tsc_offset not supported by seL4 */ + state.tsc.value(Trace::timestamp()); + state.tsc_offset.value(_tsc_offset); + + state.efer.value(_read_vmcs(service, Vmcs::EFER)); + + /* XXX star, lstar, fmask, kernel_gs_base not supported by seL4 */ + + /* XXX tpr and tpr_threshold not supported by seL4 */ + } + + public: + + Vcpu(Genode::Env &env, Genode::Signal_context_capability &cap, + Semaphore &handler_ready, unsigned id) + : + Thread(env, "vcpu_thread", STACK_SIZE), _signal(cap), + _handler_ready(handler_ready), _id({id}) + { } + + void start() override { + Thread::start(); + _startup.lock(); + } + + Genode::Vm_session_client::Vcpu_id id() const { return _id; } + + void assign_ds_state(Region_map &rm, Dataspace_capability cap) { + _state = rm.attach(cap); } + + void initial_resume() + { + _wake_up.up(); + } + + void resume() + { + Lock::Guard guard(_remote_lock); + + if (_remote == RUN) + return; + + _remote = RUN; + _wake_up.up(); + } + + void pause() + { + Lock::Guard guard(_remote_lock); + + if (_remote == PAUSE) + return; + + _remote = PAUSE; + + seL4_Signal(_recall); + + _wake_up.up(); + } +}; + +Genode::Vm_session_client::Vcpu_id +Genode::Vm_session_client::create_vcpu(Allocator &alloc, Env &env, + Vm_handler_base &handler) +{ + /* create thread that switches modes between thread/cpu */ + Vcpu * vcpu = new (alloc) Genode::Registered (vcpus, env, + handler._cap, + handler._done, + vcpu_id); + + try { + /* now it gets actually valid - vcpu->cap() becomes valid */ + vcpu->start(); + + /* instruct core to let it become a vCPU */ + call(handler._cap, vcpu->id()); + call(vcpu->cap()); + + vcpu->assign_ds_state(env.rm(), call(vcpu->id())); + } catch (...) { + destroy(alloc, vcpu); + throw; + } + + vcpu->initial_resume(); + + vcpu_id++; + return vcpu->id(); +} + +void Genode::Vm_session_client::run(Genode::Vm_session_client::Vcpu_id id) +{ + vcpus.for_each([&] (Vcpu &vcpu) { + if (vcpu.id().id == id.id) + vcpu.resume(); + }); +} + +void Vm_session_client::pause(Vm_session_client::Vcpu_id vcpu_id) +{ + vcpus.for_each([&] (Vcpu &vcpu) { + if (vcpu.id().id != vcpu_id.id) + return; + + vcpu.pause(); + }); +} + +Genode::Dataspace_capability Genode::Vm_session_client::cpu_state(Vcpu_id vcpu_id) +{ + Dataspace_capability cap; + + cap = call(vcpu_id); + + return cap; +} diff --git a/repos/os/run/vmm_x86.run b/repos/os/run/vmm_x86.run index 1299fd9a7..2927a0d74 100644 --- a/repos/os/run/vmm_x86.run +++ b/repos/os/run/vmm_x86.run @@ -18,7 +18,7 @@ if { [get_cmd_switch --autopilot] } { exit 0 } - if {[have_spec nova] || [have_spec foc]} { + if {[have_spec nova] || [have_spec foc] || [have_spec sel4]} { } else { puts "\n Run script is not supported on this platform. \n"; exit 0