genode/repos/ports/src/virtualbox/spec/nova/vcpu.h

935 lines
26 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
* \brief Genode/Nova specific VirtualBox SUPLib supplements
* \author Alexander Boettcher
* \author Norman Feske
* \author Christian Helmuth
*/
/*
* Copyright (C) 2013-2014 Genode Labs GmbH
*
* This file is distributed under the terms of the GNU General Public License
* version 2.
*/
#ifndef _VIRTUALBOX__SPEC__NOVA__VCPU_H_
#define _VIRTUALBOX__SPEC__NOVA__VCPU_H_
/* Genode includes */
#include <base/printf.h>
#include <base/semaphore.h>
#include <util/flex_iterator.h>
#include <rom_session/connection.h>
#include <timer_session/connection.h>
#include <vmm/vcpu_thread.h>
#include <vmm/vcpu_dispatcher.h>
#include <vmm/printf.h>
/* NOVA includes that come with Genode */
#include <nova/syscalls.h>
/* VirtualBox includes */
#include <VBox/vmm/vm.h>
#include <VBox/err.h>
#include <VBox/vmm/pdmapi.h>
/* Genode's VirtualBox includes */
#include "sup.h"
#include "guest_memory.h"
#include "vmm_memory.h"
/* Genode libc pthread binding */
#include "thread.h"
/* LibC includes */
#include <setjmp.h>
#include <VBox/vmm/rem.h>
static bool debug_map_memory = false;
/*
* VirtualBox stores segment attributes in Intel format using a 32-bit
* value. NOVA represents the attributes in packed format using a 16-bit
* value.
*/
static inline Genode::uint16_t sel_ar_conv_to_nova(Genode::uint32_t v)
{
return (v & 0xff) | ((v & 0x1f000) >> 4);
}
static inline Genode::uint32_t sel_ar_conv_from_nova(Genode::uint16_t v)
{
return (v & 0xff) | (((uint32_t )v << 4) & 0x1f000);
}
/*
* Used to map mmio memory to VM
*/
extern "C" int MMIO2_MAPPED_SYNC(PVM pVM, RTGCPHYS GCPhys, size_t cbWrite,
void **ppv, Genode::Flexpage_iterator &fli,
bool &writeable);
class Vcpu_handler : public Vmm::Vcpu_dispatcher<pthread>,
public Genode::List<Vcpu_handler>::Element
{
private:
X86FXSTATE _guest_fpu_state __attribute__((aligned(0x10)));
X86FXSTATE _emt_fpu_state __attribute__((aligned(0x10)));
Genode::Cap_connection _cap_connection;
Vmm::Vcpu_other_pd _vcpu;
Genode::addr_t _ec_sel;
bool _irq_win;
unsigned int _cpu_id;
Genode::Semaphore _halt_sem;
unsigned int _last_inj_info;
unsigned int _last_inj_error;
void fpu_save(char * data) {
Assert(!(reinterpret_cast<Genode::addr_t>(data) & 0xF));
asm volatile ("fxsave %0" : "=m" (*data));
}
void fpu_load(char * data) {
Assert(!(reinterpret_cast<Genode::addr_t>(data) & 0xF));
asm volatile ("fxrstor %0" : : "m" (*data));
}
enum {
NOVA_REQ_IRQWIN_EXIT = 0x1000U,
IRQ_INJ_VALID_MASK = 0x80000000UL,
IRQ_INJ_NONE = 0U,
/*
* Intel® 64 and IA-32 Architectures Software Developers Manual
* Volume 3C, Chapter 24.4.2.
* May 2012
*/
BLOCKING_BY_STI = 1U << 0,
BLOCKING_BY_MOV_SS = 1U << 1,
ACTIVITY_STATE_ACTIVE = 0U,
INTERRUPT_STATE_NONE = 0U,
};
/*
* 'longjmp()' restores some FPU registers saved by 'setjmp()',
* so we need to save the guest FPU state before calling 'longjmp()'
*/
__attribute__((noreturn)) void _fpu_save_and_longjmp()
{
fpu_save(reinterpret_cast<char *>(&_guest_fpu_state));
longjmp(_env, 1);
}
protected:
struct {
Nova::mword_t mtd;
unsigned intr_state;
unsigned ctrl[2];
} next_utcb;
PVM _current_vm;
PVMCPU _current_vcpu;
void * _stack_reply;
jmp_buf _env;
void switch_to_hw()
{
unsigned long value;
if (!setjmp(_env)) {
_stack_reply = reinterpret_cast<void *>(&value - 1);
Nova::reply(_stack_reply);
}
}
__attribute__((noreturn)) void _default_handler()
{
Nova::Utcb * utcb = reinterpret_cast<Nova::Utcb *>(Thread_base::utcb());
Assert(utcb->actv_state == ACTIVITY_STATE_ACTIVE);
Assert(!(utcb->inj_info & IRQ_INJ_VALID_MASK));
/* go back to VirtualBox */
_fpu_save_and_longjmp();
}
__attribute__((noreturn)) void _recall_handler()
{
Nova::Utcb * utcb = reinterpret_cast<Nova::Utcb *>(Thread_base::utcb());
Assert(utcb->actv_state == ACTIVITY_STATE_ACTIVE);
if (utcb->inj_info & IRQ_INJ_VALID_MASK) {
Assert(utcb->flags & X86_EFL_IF);
if (utcb->intr_state != INTERRUPT_STATE_NONE)
Vmm::printf("intr state %x %x\n", utcb->intr_state, utcb->intr_state & 0xF);
Assert(utcb->intr_state == INTERRUPT_STATE_NONE);
/*
if (!continue_hw_accelerated(utcb))
Vmm::printf("WARNING - recall ignored during IRQ delivery\n");
*/
/* got recall during irq injection and the guest is ready for
* delivery of IRQ - just continue */
Nova::reply(_stack_reply);
}
/* are we forced to go back to emulation mode ? */
if (!continue_hw_accelerated(utcb)) {
/* go back to emulation mode */
_fpu_save_and_longjmp();
}
/* check whether we have to request irq injection window */
utcb->mtd = Nova::Mtd::FPU;
if (check_to_request_irq_window(utcb, _current_vcpu)) {
_irq_win = true;
Nova::reply(_stack_reply);
}
/* nothing to do at all - continue hardware accelerated */
Assert(!_irq_win);
/*
* Print a debug message if there actually IS something to do now.
* This can happen, for example, if one of the worker threads has
* set a flag in the meantime. Usually, setting a flag is followed
* by a recall request, but we haven't verified this for each flag
* yet.
*/
continue_hw_accelerated(utcb, true);
Nova::reply(_stack_reply);
}
template <unsigned NPT_EPT>
__attribute__((noreturn)) inline
void _exc_memory(Genode::Thread_base * myself, Nova::Utcb * utcb,
bool unmap, Genode::addr_t reason)
{
using namespace Nova;
using namespace Genode;
Assert(utcb->actv_state == ACTIVITY_STATE_ACTIVE);
if (unmap) {
PERR("unmap not implemented\n");
Nova::reply(_stack_reply);
}
enum { MAP_SIZE = 0x1000UL };
bool writeable = true;
Flexpage_iterator fli;
void *pv = guest_memory()->lookup_ram(reason, MAP_SIZE, fli);
if (!pv) {
/**
* Check whether this is some mmio memory provided by VMM
* we can map, e.g. VMMDev memory or framebuffer currently.
*/
int res = MMIO2_MAPPED_SYNC(_current_vm, reason, MAP_SIZE, &pv,
fli, writeable);
if (res != VINF_SUCCESS)
pv = 0;
}
/* emulator has to take over if fault region is not ram */
if (!pv) {
/* event re-injection is not handled yet for this case */
Assert(!(utcb->inj_info & IRQ_INJ_VALID_MASK));
_fpu_save_and_longjmp();
}
/* fault region can be mapped - prepare utcb */
utcb->set_msg_word(0);
utcb->mtd = Mtd::FPU;
if (utcb->inj_info & IRQ_INJ_VALID_MASK) {
/*
* The EPT violation occurred during event injection,
* so the event needs to be injected again.
*/
utcb->mtd |= Mtd::INJ;
utcb->inj_info = _last_inj_info;
utcb->inj_error = _last_inj_error;
}
enum {
USER_PD = false, GUEST_PGT = true,
READABLE = true, EXECUTABLE = true
};
Rights permission(READABLE, writeable, EXECUTABLE);
/* add map items until no space is left on utcb anymore */
bool res;
do {
Flexpage flexpage = fli.page();
if (!flexpage.valid() || flexpage.log2_order < 12)
break;
/* touch memory - otherwise no mapping will take place */
addr_t touch_me = flexpage.addr;
while (touch_me < flexpage.addr + (1UL << flexpage.log2_order)) {
touch_read(reinterpret_cast<unsigned char *>(touch_me));
touch_me += 0x1000UL;
}
Crd crd = Mem_crd(flexpage.addr >> 12, flexpage.log2_order - 12,
permission);
res = utcb->append_item(crd, flexpage.hotspot, USER_PD, GUEST_PGT);
if (debug_map_memory)
Vmm::printf("map guest mem %p+%x -> %lx - reason %lx\n",
flexpage.addr, 1UL << flexpage.log2_order,
flexpage.hotspot, reason);
} while (res);
Nova::reply(_stack_reply);
}
/**
* Shortcut for calling 'Vmm::Vcpu_dispatcher::register_handler'
* with 'Vcpu_dispatcher' as template argument
*/
template <unsigned EV, void (Vcpu_handler::*FUNC)()>
void _register_handler(Genode::addr_t exc_base, Nova::Mtd mtd)
{
if (!register_handler<EV, Vcpu_handler, FUNC>(exc_base, mtd))
PERR("could not register handler %lx", exc_base + EV);
}
Vmm::Vcpu_other_pd &vcpu() { return _vcpu; }
inline bool vbox_to_utcb(Nova::Utcb * utcb, VM *pVM, PVMCPU pVCpu)
{
PCPUMCTX pCtx = CPUMQueryGuestCtxPtr(pVCpu);
using namespace Nova;
utcb->mtd |= Mtd::EIP;
utcb->ip = pCtx->rip;
utcb->mtd |= Mtd::ESP;
utcb->sp = pCtx->rsp;
utcb->mtd |= Mtd::ACDB;
utcb->ax = pCtx->rax;
utcb->bx = pCtx->rbx;
utcb->cx = pCtx->rcx;
utcb->dx = pCtx->rdx;
utcb->mtd |= Mtd::EBSD;
utcb->bp = pCtx->rbp;
utcb->si = pCtx->rsi;
utcb->di = pCtx->rdi;
utcb->mtd |= Mtd::R8_R15;
utcb->write_r8(pCtx->r8);
utcb->write_r9(pCtx->r9);
utcb->write_r10(pCtx->r10);
utcb->write_r11(pCtx->r11);
utcb->write_r12(pCtx->r12);
utcb->write_r13(pCtx->r13);
utcb->write_r14(pCtx->r14);
utcb->write_r15(pCtx->r15);
utcb->mtd |= Mtd::EFL;
utcb->flags = pCtx->rflags.u;
utcb->mtd |= Mtd::SYS;
utcb->sysenter_cs = pCtx->SysEnter.cs;
utcb->sysenter_sp = pCtx->SysEnter.esp;
utcb->sysenter_ip = pCtx->SysEnter.eip;
utcb->mtd |= Mtd::DR;
utcb->dr7 = pCtx->dr[7];
utcb->mtd |= Mtd::CR;
utcb->cr0 = pCtx->cr0;
utcb->mtd |= Mtd::CR;
utcb->cr2 = pCtx->cr2;
utcb->mtd |= Mtd::CR;
utcb->cr3 = pCtx->cr3;
utcb->mtd |= Mtd::CR;
utcb->cr4 = pCtx->cr4;
utcb->mtd |= Mtd::IDTR;
utcb->idtr.limit = pCtx->idtr.cbIdt;
utcb->idtr.base = pCtx->idtr.pIdt;
utcb->mtd |= Mtd::GDTR;
utcb->gdtr.limit = pCtx->gdtr.cbGdt;
utcb->gdtr.base = pCtx->gdtr.pGdt;
utcb->mtd |= Mtd::EFER;
utcb->write_efer(CPUMGetGuestEFER(pVCpu));
/*
* Update the PDPTE registers if necessary
*
* Intel manual sections 4.4.1 of Vol. 3A and 26.3.2.4 of Vol. 3C
* indicate the conditions when this is the case. The following
* code currently does not check if the recompiler modified any
* CR registers, which means the update can happen more often
* than really necessary.
*/
if (pVM->hm.s.vmx.fSupported &&
CPUMIsGuestPagingEnabledEx(pCtx) &&
CPUMIsGuestInPAEModeEx(pCtx)) {
utcb->mtd |= Mtd::PDPTE;
Genode::uint64_t *pdpte = (Genode::uint64_t*)
guest_memory()->lookup(utcb->cr3, sizeof(utcb->pdpte));
Assert(pdpte != 0);
utcb->pdpte[0] = pdpte[0];
utcb->pdpte[1] = pdpte[1];
utcb->pdpte[2] = pdpte[2];
utcb->pdpte[3] = pdpte[3];
}
utcb->mtd |= Mtd::SYSCALL_SWAPGS;
utcb->write_star(pCtx->msrSTAR);
utcb->write_lstar(pCtx->msrLSTAR);
utcb->write_fmask(pCtx->msrSFMASK);
utcb->write_kernel_gs_base(pCtx->msrKERNELGSBASE);
/* from HMVMXR0.cpp */
bool interrupt_pending = false;
uint8_t tpr = 0;
uint8_t pending_interrupt = 0;
PDMApicGetTPR(pVCpu, &tpr, &interrupt_pending, &pending_interrupt);
utcb->mtd |= Mtd::TPR;
utcb->write_tpr(tpr);
utcb->write_tpr_threshold(0);
if (interrupt_pending) {
const uint8_t pending_priority = (pending_interrupt >> 4) & 0xf;
const uint8_t tpr_priority = (tpr >> 4) & 0xf;
if (pending_priority <= tpr_priority)
utcb->write_tpr_threshold(pending_priority);
else
utcb->write_tpr_threshold(tpr_priority);
}
Assert(!(VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS)));
return true;
}
inline bool utcb_to_vbox(Nova::Utcb * utcb, VM *pVM, PVMCPU pVCpu)
{
PCPUMCTX pCtx = CPUMQueryGuestCtxPtr(pVCpu);
pCtx->rip = utcb->ip;
pCtx->rsp = utcb->sp;
pCtx->rax = utcb->ax;
pCtx->rbx = utcb->bx;
pCtx->rcx = utcb->cx;
pCtx->rdx = utcb->dx;
pCtx->rbp = utcb->bp;
pCtx->rsi = utcb->si;
pCtx->rdi = utcb->di;
pCtx->rflags.u = utcb->flags;
pCtx->r8 = utcb->read_r8();
pCtx->r9 = utcb->read_r9();
pCtx->r10 = utcb->read_r10();
pCtx->r11 = utcb->read_r11();
pCtx->r12 = utcb->read_r12();
pCtx->r13 = utcb->read_r13();
pCtx->r14 = utcb->read_r14();
pCtx->r15 = utcb->read_r15();
pCtx->dr[7] = utcb->dr7;
if (pCtx->SysEnter.cs != utcb->sysenter_cs)
CPUMSetGuestMsr(pVCpu, MSR_IA32_SYSENTER_CS, utcb->sysenter_cs);
if (pCtx->SysEnter.esp != utcb->sysenter_sp)
CPUMSetGuestMsr(pVCpu, MSR_IA32_SYSENTER_ESP, utcb->sysenter_sp);
if (pCtx->SysEnter.eip != utcb->sysenter_ip)
CPUMSetGuestMsr(pVCpu, MSR_IA32_SYSENTER_EIP, utcb->sysenter_ip);
if (pCtx->idtr.cbIdt != utcb->idtr.limit ||
pCtx->idtr.pIdt != utcb->idtr.base)
CPUMSetGuestIDTR(pVCpu, utcb->idtr.base, utcb->idtr.limit);
if (pCtx->gdtr.cbGdt != utcb->gdtr.limit ||
pCtx->gdtr.pGdt != utcb->gdtr.base)
CPUMSetGuestGDTR(pVCpu, utcb->gdtr.base, utcb->gdtr.limit);
CPUMSetGuestEFER(pVCpu, utcb->read_efer());
if (pCtx->cr0 != utcb->cr0)
CPUMSetGuestCR0(pVCpu, utcb->cr0);
if (pCtx->cr2 != utcb->cr2)
CPUMSetGuestCR2(pVCpu, utcb->cr2);
if (pCtx->cr3 != utcb->cr3)
CPUMSetGuestCR3(pVCpu, utcb->cr3);
if (pCtx->cr4 != utcb->cr4)
CPUMSetGuestCR4(pVCpu, utcb->cr4);
if (pCtx->msrSTAR != utcb->read_star())
CPUMSetGuestMsr(pVCpu, MSR_K6_STAR, utcb->read_star());
if (pCtx->msrLSTAR != utcb->read_lstar())
CPUMSetGuestMsr(pVCpu, MSR_K8_LSTAR, utcb->read_lstar());
if (pCtx->msrSFMASK != utcb->read_fmask())
CPUMSetGuestMsr(pVCpu, MSR_K8_SF_MASK, utcb->read_fmask());
if (pCtx->msrKERNELGSBASE != utcb->read_kernel_gs_base())
CPUMSetGuestMsr(pVCpu, MSR_K8_KERNEL_GS_BASE, utcb->read_kernel_gs_base());
PDMApicSetTPR(pVCpu, utcb->read_tpr());
VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TO_R3);
/* tell rem compiler that FPU register changed XXX optimizations ? */
CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_FPU_REM); /* redundant ? XXX */
pVCpu->cpum.s.fUseFlags |= (CPUM_USED_FPU | CPUM_USED_FPU_SINCE_REM); /* redundant ? XXX */
if (utcb->intr_state != 0) {
Assert(utcb->intr_state == BLOCKING_BY_STI ||
utcb->intr_state == BLOCKING_BY_MOV_SS);
EMSetInhibitInterruptsPC(pVCpu, pCtx->rip);
} else
VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
return true;
}
inline bool check_to_request_irq_window(Nova::Utcb * utcb, PVMCPU pVCpu)
{
if (!TRPMHasTrap(pVCpu) &&
!VMCPU_FF_IS_PENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC |
VMCPU_FF_INTERRUPT_PIC)))
return false;
unsigned vector = 0;
utcb->inj_info = NOVA_REQ_IRQWIN_EXIT | vector;
utcb->mtd |= Nova::Mtd::INJ;
return true;
}
__attribute__((noreturn)) void _irq_window()
{
Nova::Utcb * utcb = reinterpret_cast<Nova::Utcb *>(Thread_base::utcb());
PVMCPU pVCpu = _current_vcpu;
Assert(utcb->intr_state == INTERRUPT_STATE_NONE);
Assert(utcb->flags & X86_EFL_IF);
Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS));
Assert(!(utcb->inj_info & IRQ_INJ_VALID_MASK));
Assert(_irq_win);
_irq_win = false;
if (!TRPMHasTrap(pVCpu)) {
bool res = VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_INTERRUPT_NMI);
Assert(!res);
if (VMCPU_FF_IS_PENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC |
VMCPU_FF_INTERRUPT_PIC))) {
uint8_t irq;
int rc = PDMGetInterrupt(pVCpu, &irq);
Assert(RT_SUCCESS(rc));
rc = TRPMAssertTrap(pVCpu, irq, TRPM_HARDWARE_INT);
Assert(RT_SUCCESS(rc));
}
}
/*
* If we have no IRQ for injection, something with requesting the
* IRQ window went wrong. Probably it was forgotten to be reset.
*/
Assert(TRPMHasTrap(pVCpu));
/* interrupt can be dispatched */
uint8_t u8Vector;
TRPMEVENT enmType;
SVMEVENT Event;
RTGCUINT u32ErrorCode;
RTGCUINTPTR GCPtrFaultAddress;
uint8_t cbInstr;
Event.u = 0;
/* If a new event is pending, then dispatch it now. */
int rc = TRPMQueryTrapAll(pVCpu, &u8Vector, &enmType, &u32ErrorCode, 0, 0);
AssertRC(rc);
Assert(enmType == TRPM_HARDWARE_INT);
Assert(u8Vector != X86_XCPT_NMI);
/* Clear the pending trap. */
rc = TRPMResetTrap(pVCpu);
AssertRC(rc);
Event.n.u8Vector = u8Vector;
Event.n.u1Valid = 1;
Event.n.u32ErrorCode = u32ErrorCode;
Event.n.u3Type = SVM_EVENT_EXTERNAL_IRQ;
utcb->inj_info = Event.u;
utcb->inj_error = Event.n.u32ErrorCode;
_last_inj_info = utcb->inj_info;
_last_inj_error = utcb->inj_error;
/*
Vmm::printf("type:info:vector %x:%x:%x intr:actv - %x:%x mtd %x\n",
Event.n.u3Type, utcb->inj_info, u8Vector, utcb->intr_state, utcb->actv_state, utcb->mtd);
*/
utcb->mtd = Nova::Mtd::INJ | Nova::Mtd::FPU;
Nova::reply(_stack_reply);
}
inline bool continue_hw_accelerated(Nova::Utcb * utcb, bool verbose = false)
{
Assert(!(VMCPU_FF_IS_SET(_current_vcpu, VMCPU_FF_INHIBIT_INTERRUPTS)));
uint32_t check_vm = VM_FF_HM_TO_R3_MASK | VM_FF_REQUEST
| VM_FF_PGM_POOL_FLUSH_PENDING
| VM_FF_PDM_DMA;
uint32_t check_vcpu = VMCPU_FF_HM_TO_R3_MASK
| VMCPU_FF_PGM_SYNC_CR3
| VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
| VMCPU_FF_REQUEST;
if (!VM_FF_IS_PENDING(_current_vm, check_vm) &&
!VMCPU_FF_IS_PENDING(_current_vcpu, check_vcpu))
return true;
Assert(!(VM_FF_IS_PENDING(_current_vm, VM_FF_PGM_NO_MEMORY)));
#define VERBOSE_VM(flag) \
do { \
if (VM_FF_IS_PENDING(_current_vm, flag)) \
Vmm::printf("flag " #flag " pending\n"); \
} while (0)
#define VERBOSE_VMCPU(flag) \
do { \
if (VMCPU_FF_IS_PENDING(_current_vcpu, flag)) \
Vmm::printf("flag " #flag " pending\n"); \
} while (0)
if (verbose) {
/*
* VM_FF_HM_TO_R3_MASK
*/
VERBOSE_VM(VM_FF_TM_VIRTUAL_SYNC);
VERBOSE_VM(VM_FF_PGM_NEED_HANDY_PAGES);
/* handled by the assertion above */
/* VERBOSE_VM(VM_FF_PGM_NO_MEMORY); */
VERBOSE_VM(VM_FF_PDM_QUEUES);
VERBOSE_VM(VM_FF_EMT_RENDEZVOUS);
VERBOSE_VM(VM_FF_REQUEST);
VERBOSE_VM(VM_FF_PGM_POOL_FLUSH_PENDING);
VERBOSE_VM(VM_FF_PDM_DMA);
/*
* VMCPU_FF_HM_TO_R3_MASK
*/
VERBOSE_VMCPU(VMCPU_FF_TO_R3);
/* when this flag gets set, a recall request follows */
/* VERBOSE_VMCPU(VMCPU_FF_TIMER); */
VERBOSE_VMCPU(VMCPU_FF_PDM_CRITSECT);
VERBOSE_VMCPU(VMCPU_FF_PGM_SYNC_CR3);
VERBOSE_VMCPU(VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL);
VERBOSE_VMCPU(VMCPU_FF_REQUEST);
}
#undef VERBOSE_VMCPU
#undef VERBOSE_VM
return false;
}
virtual bool hw_load_state(Nova::Utcb *, VM *, PVMCPU) = 0;
virtual bool hw_save_state(Nova::Utcb *, VM *, PVMCPU) = 0;
virtual bool vm_exit_requires_instruction_emulation() = 0;
public:
enum Exit_condition
{
SVM_NPT = 0xfc,
SVM_INVALID = 0xfd,
VCPU_STARTUP = 0xfe,
RECALL = 0xff,
EMULATE_INSTR = 0x100
};
Vcpu_handler(size_t stack_size, const pthread_attr_t *attr,
void *(*start_routine) (void *), void *arg,
Genode::Cpu_session * cpu_session,
Genode::Affinity::Location location,
unsigned int cpu_id)
:
Vmm::Vcpu_dispatcher<pthread>(stack_size, _cap_connection,
cpu_session, location,
attr ? *attr : 0, start_routine, arg),
_vcpu(cpu_session, location),
_ec_sel(Genode::cap_map()->insert()),
_irq_win(false),
_cpu_id(cpu_id)
{ }
unsigned int cpu_id() { return _cpu_id; }
void start() {
_vcpu.start(_ec_sel);
}
void recall()
{
using namespace Nova;
if (ec_ctrl(EC_RECALL, _ec_sel) != NOVA_OK) {
PERR("recall failed");
Genode::Lock lock(Genode::Lock::LOCKED);
lock.lock();
}
}
void halt()
{
_halt_sem.down();
}
void wake_up()
{
_halt_sem.up();
}
inline void dump_register_state(PCPUMCTX pCtx)
{
PINF("pCtx");
PLOG("ip:sp:efl ax:bx:cx:dx:si:di %llx:%llx:%llx"
" %llx:%llx:%llx:%llx:%llx:%llx",
pCtx->rip, pCtx->rsp, pCtx->rflags.u, pCtx->rax, pCtx->rbx,
pCtx->rcx, pCtx->rdx, pCtx->rsi, pCtx->rdi);
PLOG("cs.attr.n.u4LimitHigh=0x%x", pCtx->cs.Attr.n.u4LimitHigh);
PLOG("cs base:limit:sel:ar %llx:%x:%x:%x", pCtx->cs.u64Base,
pCtx->cs.u32Limit, pCtx->cs.Sel, pCtx->cs.Attr.u);
PLOG("ds base:limit:sel:ar %llx:%x:%x:%x", pCtx->ds.u64Base,
pCtx->ds.u32Limit, pCtx->ds.Sel, pCtx->ds.Attr.u);
PLOG("es base:limit:sel:ar %llx:%x:%x:%x", pCtx->es.u64Base,
pCtx->es.u32Limit, pCtx->es.Sel, pCtx->es.Attr.u);
PLOG("fs base:limit:sel:ar %llx:%x:%x:%x", pCtx->fs.u64Base,
pCtx->fs.u32Limit, pCtx->fs.Sel, pCtx->fs.Attr.u);
PLOG("gs base:limit:sel:ar %llx:%x:%x:%x", pCtx->gs.u64Base,
pCtx->gs.u32Limit, pCtx->gs.Sel, pCtx->gs.Attr.u);
PLOG("ss base:limit:sel:ar %llx:%x:%x:%x", pCtx->ss.u64Base,
pCtx->ss.u32Limit, pCtx->ss.Sel, pCtx->ss.Attr.u);
PLOG("cr0:cr2:cr3:cr4 %llx:%llx:%llx:%llx",
pCtx->cr0, pCtx->cr2, pCtx->cr3, pCtx->cr4);
PLOG("ldtr base:limit:sel:ar %llx:%x:%x:%x", pCtx->ldtr.u64Base,
pCtx->ldtr.u32Limit, pCtx->ldtr.Sel, pCtx->ldtr.Attr.u);
PLOG("tr base:limit:sel:ar %llx:%x:%x:%x", pCtx->tr.u64Base,
pCtx->tr.u32Limit, pCtx->tr.Sel, pCtx->tr.Attr.u);
PLOG("gdtr base:limit %llx:%x", pCtx->gdtr.pGdt, pCtx->gdtr.cbGdt);
PLOG("idtr base:limit %llx:%x", pCtx->idtr.pIdt, pCtx->idtr.cbIdt);
PLOG("dr 0:1:2:3:4:5:6:7 %llx:%llx:%llx:%llx:%llx:%llx:%llx:%llx",
pCtx->dr[0], pCtx->dr[1], pCtx->dr[2], pCtx->dr[3],
pCtx->dr[4], pCtx->dr[5], pCtx->dr[6], pCtx->dr[7]);
PLOG("sysenter cs:eip:esp %llx %llx %llx", pCtx->SysEnter.cs,
pCtx->SysEnter.eip, pCtx->SysEnter.esp);
}
inline void dump_register_state(Nova::Utcb * utcb)
{
PINF("utcb");
PLOG("ip:sp:efl ax:bx:cx:dx:si:di %lx:%lx:%lx"
" %lx:%lx:%lx:%lx:%lx:%lx",
utcb->ip, utcb->sp, utcb->flags, utcb->ax, utcb->bx,
utcb->cx, utcb->dx, utcb->si, utcb->di);
PLOG("cs base:limit:sel:ar %lx:%x:%x:%x", utcb->cs.base,
utcb->cs.limit, utcb->cs.sel, utcb->cs.ar);
PLOG("ds base:limit:sel:ar %lx:%x:%x:%x", utcb->ds.base,
utcb->ds.limit, utcb->ds.sel, utcb->ds.ar);
PLOG("es base:limit:sel:ar %lx:%x:%x:%x", utcb->es.base,
utcb->es.limit, utcb->es.sel, utcb->es.ar);
PLOG("fs base:limit:sel:ar %lx:%x:%x:%x", utcb->fs.base,
utcb->fs.limit, utcb->fs.sel, utcb->fs.ar);
PLOG("gs base:limit:sel:ar %lx:%x:%x:%x", utcb->gs.base,
utcb->gs.limit, utcb->gs.sel, utcb->gs.ar);
PLOG("ss base:limit:sel:ar %lx:%x:%x:%x", utcb->ss.base,
utcb->ss.limit, utcb->ss.sel, utcb->ss.ar);
PLOG("cr0:cr2:cr3:cr4 %lx:%lx:%lx:%lx",
utcb->cr0, utcb->cr2, utcb->cr3, utcb->cr4);
PLOG("ldtr base:limit:sel:ar %lx:%x:%x:%x", utcb->ldtr.base,
utcb->ldtr.limit, utcb->ldtr.sel, utcb->ldtr.ar);
PLOG("tr base:limit:sel:ar %lx:%x:%x:%x", utcb->tr.base,
utcb->tr.limit, utcb->tr.sel, utcb->tr.ar);
PLOG("gdtr base:limit %lx:%x", utcb->gdtr.base, utcb->gdtr.limit);
PLOG("idtr base:limit %lx:%x", utcb->idtr.base, utcb->idtr.limit);
PLOG("dr 7 %lx", utcb->dr7);
PLOG("sysenter cs:eip:esp %lx %lx %lx", utcb->sysenter_cs,
utcb->sysenter_ip, utcb->sysenter_sp);
PLOG("%x %x %x", utcb->intr_state, utcb->actv_state, utcb->mtd);
}
int run_hw(PVMR0 pVMR0)
{
VM * pVM = reinterpret_cast<VM *>(pVMR0);
PVMCPU pVCpu = &pVM->aCpus[_cpu_id];
PCPUMCTX pCtx = CPUMQueryGuestCtxPtr(pVCpu);
Nova::Utcb *utcb = reinterpret_cast<Nova::Utcb *>(Thread_base::utcb());
Assert(Thread_base::utcb() == Thread_base::myself()->utcb());
/* take the utcb state prepared during the last exit */
utcb->mtd = next_utcb.mtd;
utcb->inj_info = IRQ_INJ_NONE;
utcb->intr_state = next_utcb.intr_state;
utcb->actv_state = ACTIVITY_STATE_ACTIVE;
utcb->ctrl[0] = next_utcb.ctrl[0];
utcb->ctrl[1] = next_utcb.ctrl[1];
using namespace Nova;
/* Transfer vCPU state from vBox to Nova format */
if (!vbox_to_utcb(utcb, pVM, pVCpu) ||
!hw_load_state(utcb, pVM, pVCpu)) {
PERR("loading vCPU state failed");
return VERR_INTERNAL_ERROR;
}
/* check whether to request interrupt window for injection */
_irq_win = check_to_request_irq_window(utcb, pVCpu);
/*
* Flag vCPU to be "pokeable" by external events such as interrupts
* from virtual devices. Only if this flag is set, the
* 'vmR3HaltGlobal1NotifyCpuFF' function calls 'SUPR3CallVMMR0Ex'
* with VMMR0_DO_GVMM_SCHED_POKE as argument to indicate such
* events. This function, in turn, will recall the vCPU.
*/
VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC);
/* save current FPU state */
fpu_save(reinterpret_cast<char *>(&_emt_fpu_state));
/* write FPU state from pCtx to FPU registers */
fpu_load(reinterpret_cast<char *>(&pCtx->fpu));
/* tell kernel to transfer current fpu registers to vCPU */
utcb->mtd |= Mtd::FPU;
_current_vm = pVM;
_current_vcpu = pVCpu;
/* switch to hardware accelerated mode */
switch_to_hw();
Assert(utcb->actv_state == ACTIVITY_STATE_ACTIVE);
_current_vm = 0;
_current_vcpu = 0;
/* write FPU state of vCPU (in current FPU registers) to pCtx */
Genode::memcpy(&pCtx->fpu, &_guest_fpu_state, sizeof(X86FXSTATE));
/* load saved FPU state of EMT thread */
fpu_load(reinterpret_cast<char *>(&_emt_fpu_state));
CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_GLOBAL_TLB_FLUSH);
VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED);
/* Transfer vCPU state from Nova to vBox format */
if (!utcb_to_vbox(utcb, pVM, pVCpu) ||
!hw_save_state(utcb, pVM, pVCpu)) {
PERR("saving vCPU state failed");
return VERR_INTERNAL_ERROR;
}
/* reset message transfer descriptor for next invocation */
Assert (!(utcb->inj_info & IRQ_INJ_VALID_MASK));
/* Reset irq window next time if we are still requesting it */
next_utcb.mtd = _irq_win ? Mtd::INJ : 0;
next_utcb.intr_state = utcb->intr_state;
next_utcb.ctrl[0] = utcb->ctrl[0];
next_utcb.ctrl[1] = utcb->ctrl[1];
if (next_utcb.intr_state & 3) {
next_utcb.intr_state &= ~3U;
next_utcb.mtd |= Mtd::STA;
}
#ifdef VBOX_WITH_REM
/* XXX see VMM/VMMR0/HMVMXR0.cpp - not necessary every time ! XXX */
REMFlushTBs(pVM);
#endif
return vm_exit_requires_instruction_emulation() ? VINF_EM_RAW_EMULATE_INSTR
: VINF_SUCCESS;
}
};
#endif /* _VIRTUALBOX__SPEC__NOVA__VCPU_H_ */