diff --git a/repos/base-hw/src/core/spec/x86_64/virtualization/board.h b/repos/base-hw/src/core/spec/x86_64/virtualization/board.h index 9ee66acbfc..a20c3fd09c 100644 --- a/repos/base-hw/src/core/spec/x86_64/virtualization/board.h +++ b/repos/base-hw/src/core/spec/x86_64/virtualization/board.h @@ -31,9 +31,7 @@ namespace Board { using Vm_page_table = Hw::Page_table; using Vm_page_table_array = Vm_page_table::Allocator::Array; - struct Vcpu_context; - using Vcpu_data = Genode::Vcpu_data; using Vcpu_state = Genode::Vcpu_state; @@ -42,7 +40,7 @@ namespace Board { }; /* FIXME move into Vcpu_context as 'enum class' when we have C++20 */ - enum Platform_exitcodes : Genode::uint64_t { + enum Platform_exitcodes : uint64_t { EXIT_NPF = 0xfc, EXIT_INIT = 0xfd, EXIT_STARTUP = 0xfe, @@ -64,19 +62,21 @@ namespace Kernel { struct Board::Vcpu_context { - Vcpu_context(unsigned id, void *vcpu_data_ptr); - Vcpu_context(unsigned id, void *virt_area, addr_t vmcb_phys_addr); - void initialize_svm(Kernel::Cpu &cpu, void *table); + Vcpu_context(unsigned id, Vcpu_data &vcpu_data); + void initialize(Kernel::Cpu &cpu, addr_t table_phys_addr); void read_vcpu_state(Vcpu_state &state); void write_vcpu_state(Vcpu_state &state); - Vmcb &vmcb; - addr_t vmcb_phys_addr; + Vmcb *vmcb { nullptr }; Genode::Align_at regs; + Vcpu_data &vcpu_data; uint64_t tsc_aux_host = 0U; uint64_t tsc_aux_guest = 0U; uint64_t exitcode = EXIT_INIT; + + Vcpu_context(const Vcpu_context &) = delete; + const Vcpu_context &operator=(Vcpu_context &) = delete; }; #endif /* _CORE__SPEC__PC__VIRTUALIZATION__BOARD_H_ */ diff --git a/repos/base-hw/src/core/spec/x86_64/virtualization/hypervisor.h b/repos/base-hw/src/core/spec/x86_64/virtualization/hypervisor.h index d57ca8b17f..86c3935257 100644 --- a/repos/base-hw/src/core/spec/x86_64/virtualization/hypervisor.h +++ b/repos/base-hw/src/core/spec/x86_64/virtualization/hypervisor.h @@ -53,53 +53,6 @@ namespace Hypervisor { : "memory"); }; - - - inline void switch_world(Call_arg guest_state, Call_arg regs, - Call_arg fpu_context) - { - asm volatile( - "fxrstor (%[fpu_context]);" - "mov %[guest_state], %%rax;" - "mov %[regs], %%rsp;" - "popq %%r8;" - "popq %%r9;" - "popq %%r10;" - "popq %%r11;" - "popq %%r12;" - "popq %%r13;" - "popq %%r14;" - "popq %%r15;" - "add $8, %%rsp;" /* don't pop rax */ - "popq %%rbx;" - "popq %%rcx;" - "popq %%rdx;" - "popq %%rdi;" - "popq %%rsi;" - "popq %%rbp;" - "clgi;" - "sti;" - "vmload;" - "vmrun;" - "vmsave;" - "popq %%rax;" /* get the physical address of the host VMCB from - the stack */ - "vmload;" - "stgi;" /* maybe enter the kernel to handle an external interrupt - that occured ... */ - "nop;" - "cli;" /* ... otherwise, just disable interrupts again */ - "pushq $256;" /* make the stack point to trapno, the right place - to jump to _kernel_entry. We push 256 because - this is outside of the valid range for interrupts - */ - "jmp _kernel_entry;" /* jump to _kernel_entry to save the - GPRs without breaking any */ - : - : [regs] "r"(regs), [fpu_context] "r"(fpu_context), - [guest_state] "r"(guest_state) - : "rax", "memory"); - } } #endif /* _SPEC__PC__VIRTUALIZATION_HYPERVISOR_H_ */ diff --git a/repos/base-hw/src/core/spec/x86_64/virtualization/kernel/svm.cc b/repos/base-hw/src/core/spec/x86_64/virtualization/kernel/svm.cc index 6a231ace0c..7abcc4ea21 100644 --- a/repos/base-hw/src/core/spec/x86_64/virtualization/kernel/svm.cc +++ b/repos/base-hw/src/core/spec/x86_64/virtualization/kernel/svm.cc @@ -18,13 +18,14 @@ #include #include -using Genode::addr_t; +using namespace Genode; + using Kernel::Cpu; using Kernel::Vm; using Board::Vmcb; -Vmcb::Vmcb(Genode::uint32_t id) +Vmcb::Vmcb(uint32_t id) : Mmio({(char *)this, Mmio::SIZE}) { @@ -40,9 +41,9 @@ Vmcb::Vmcb(Genode::uint32_t id) } -Vmcb & Vmcb::host_vmcb(Genode::size_t cpu_id) +Vmcb & Vmcb::host_vmcb(size_t cpu_id) { - static Genode::Constructible host_vmcb[NR_OF_CPUS]; + static Constructible host_vmcb[NR_OF_CPUS]; if (!host_vmcb[cpu_id].constructed()) { host_vmcb[cpu_id].construct(Vmcb::Asid_host); @@ -50,13 +51,21 @@ Vmcb & Vmcb::host_vmcb(Genode::size_t cpu_id) return *host_vmcb[cpu_id]; } - -void Vmcb::init(Genode::size_t cpu_id, void * table_ptr) +void Vmcb::initialize(Kernel::Cpu &cpu, addr_t page_table_phys_addr) { using Cpu = Hw::X86_64_cpu; - root_vmcb_phys = Core::Platform::core_phys_addr((addr_t) - &host_vmcb(cpu_id)); + Cpu::Ia32_efer::access_t ia32_efer_msr = Cpu::Ia32_efer::read(); + Cpu::Ia32_efer::Svme::set(ia32_efer_msr, 1); + Cpu::Ia32_efer::write(ia32_efer_msr); + + Cpu::Amd_vm_syscvg::access_t amd_vm_syscvg_msr = + Cpu::Amd_vm_syscvg::read(); + Cpu::Amd_vm_syscvg::Nested_paging::set(amd_vm_syscvg_msr, 1); + Cpu::Amd_vm_syscvg::write(amd_vm_syscvg_msr); + + root_vmcb_phys = + Core::Platform::core_phys_addr((addr_t)&host_vmcb(cpu.id())); asm volatile ("vmsave" : : "a" (root_vmcb_phys) : "memory"); Cpu::Amd_vm_hsavepa::write((Cpu::Amd_vm_hsavepa::access_t) root_vmcb_phys); @@ -64,7 +73,7 @@ void Vmcb::init(Genode::size_t cpu_id, void * table_ptr) * enable nested paging */ write(1); - write((Genode::addr_t) table_ptr); + write(page_table_phys_addr); write(1); /* See 15.2 */ write(17); /* AC */ @@ -76,7 +85,7 @@ void Vmcb::init(Genode::size_t cpu_id, void * table_ptr) /* * Enforce SVM intercepts */ -void Vmcb::enforce_intercepts(Genode::uint32_t desired_primary, Genode::uint32_t desired_secondary) +void Vmcb::enforce_intercepts(uint32_t desired_primary, uint32_t desired_secondary) { write( desired_primary | @@ -103,13 +112,11 @@ void Vmcb::enforce_intercepts(Genode::uint32_t desired_primary, Genode::uint32_t * AMD Vol.2 15.11: MSR Permissions Map * All set to 1 since we want all MSRs to be intercepted. */ -Genode::addr_t Vmcb::dummy_msrpm() +addr_t Vmcb::dummy_msrpm() { - static Genode::Constructible msrpm; - if (!msrpm.constructed()) - msrpm.construct(); + static Board::Msrpm msrpm; - return Core::Platform::core_phys_addr((addr_t) & *msrpm); + return Core::Platform::core_phys_addr((addr_t) &msrpm); } @@ -117,311 +124,271 @@ Genode::addr_t Vmcb::dummy_msrpm() * AMD Vol.2 15.10.1 I/O Permissions Map * All set to 1 since we want all IO port accesses to be intercepted. */ -Genode::addr_t Vmcb::dummy_iopm() +addr_t Vmcb::dummy_iopm() { - static Genode::Constructible iopm; - if (!iopm.constructed()) - iopm.construct(); + static Board::Iopm iopm; - return Core::Platform::core_phys_addr((addr_t) &*iopm); + return Core::Platform::core_phys_addr((addr_t) &iopm); } Board::Msrpm::Msrpm() { - Genode::memset(this, 0xFF, sizeof(*this)); + memset(this, 0xFF, sizeof(*this)); } Board::Iopm::Iopm() { - Genode::memset(this, 0xFF, sizeof(*this)); + memset(this, 0xFF, sizeof(*this)); } -void Board::Vcpu_context::initialize_svm(Kernel::Cpu & cpu, void * table) +void Vmcb::write_vcpu_state(Vcpu_state &state) { - using Cpu = Hw::X86_64_cpu; + typedef Vcpu_state::Range Range; - Cpu::Ia32_efer::access_t ia32_efer_msr = Cpu::Ia32_efer::read(); - Cpu::Ia32_efer::Svme::set(ia32_efer_msr, 1); - Cpu::Ia32_efer::write(ia32_efer_msr); - - Cpu::Amd_vm_syscvg::access_t amd_vm_syscvg_msr = Cpu::Amd_vm_syscvg::read(); - Cpu::Amd_vm_syscvg::Nested_paging::set(amd_vm_syscvg_msr, 1); - Cpu::Amd_vm_syscvg::write(amd_vm_syscvg_msr); - - vmcb.init(cpu.id(), table); -} - - -void Board::Vcpu_context::write_vcpu_state(Genode::Vcpu_state &state) -{ - typedef Genode::Vcpu_state::Range Range; - - state.discharge(); - state.exit_reason = (unsigned) exitcode; - - state.fpu.charge([&] (Genode::Vcpu_state::Fpu::State &fpu) { - memcpy(&fpu, (void *) regs->fpu_context(), sizeof(fpu)); - }); - - state.ax.charge(vmcb.rax); - state.cx.charge(regs->rcx); - state.dx.charge(regs->rdx); - state.bx.charge(regs->rbx); - - state.di.charge(regs->rdi); - state.si.charge(regs->rsi); - state.bp.charge(regs->rbp); - - - state.ip.charge(vmcb.rip); + state.ax.charge(rax); + state.ip.charge(rip); /* * SVM doesn't use ip_len, so just leave the old value. * We still have to charge it when charging ip. */ state.ip_len.set_charged(); - state.flags.charge(vmcb.rflags); - state.sp.charge(vmcb.rsp); + state.flags.charge(rflags); + state.sp.charge(rsp); - state.dr7.charge(vmcb.dr7); + state.dr7.charge(dr7); - state. r8.charge(regs->r8); - state. r9.charge(regs->r9); - state.r10.charge(regs->r10); - state.r11.charge(regs->r11); - state.r12.charge(regs->r12); - state.r13.charge(regs->r13); - state.r14.charge(regs->r14); - state.r15.charge(regs->r15); + state.cr0.charge(cr0); + state.cr2.charge(cr2); + state.cr3.charge(cr3); + state.cr4.charge(cr4); - state.cr0.charge(vmcb.cr0); - state.cr2.charge(vmcb.cr2); - state.cr3.charge(vmcb.cr3); - state.cr4.charge(vmcb.cr4); + state.cs.charge(cs); + state.ss.charge(ss); + state.es.charge(es); + state.ds.charge(ds); + state.fs.charge(fs); + state.gs.charge(gs); + state.tr.charge(tr); + state.ldtr.charge(ldtr); + state.gdtr.charge(Range { .limit = gdtr.limit, .base = gdtr.base }); - state.cs.charge(vmcb.cs); - state.ss.charge(vmcb.ss); - state.es.charge(vmcb.es); - state.ds.charge(vmcb.ds); - state.fs.charge(vmcb.fs); - state.gs.charge(vmcb.gs); - state.tr.charge(vmcb.tr); - state.ldtr.charge(vmcb.ldtr); - state.gdtr.charge(Range { .limit = vmcb.gdtr.limit, - .base = vmcb.gdtr.base }); + state.idtr.charge(Range { .limit = idtr.limit, .base = idtr.base }); - state.idtr.charge(Range { .limit = vmcb.idtr.limit, - .base = vmcb.idtr.base }); + state.sysenter_cs.charge(sysenter_cs); + state.sysenter_sp.charge(sysenter_esp); + state.sysenter_ip.charge(sysenter_eip); - state.sysenter_cs.charge(vmcb.sysenter_cs); - state.sysenter_sp.charge(vmcb.sysenter_esp); - state.sysenter_ip.charge(vmcb.sysenter_eip); + state.qual_primary.charge(read()); + state.qual_secondary.charge(read()); - state.qual_primary.charge(vmcb.read()); - state.qual_secondary.charge(vmcb.read()); + /* Charging ctrl_primary and ctrl_secondary breaks Virtualbox 6 */ - state.ctrl_primary.charge(vmcb.read()); - state.ctrl_secondary.charge(vmcb.read()); - - state.inj_info.charge(vmcb.read()& 0xFFFFFFFF); - state.inj_error.charge((Genode::uint32_t) - (vmcb.read() >> 32)); + state.inj_info.charge(read() & 0xFFFFFFFF); + state.inj_error.charge( + (uint32_t)(read() >> 32)); /* Guest is in an interrupt shadow, see 15.21.5 */ - state.intr_state.charge((unsigned) - vmcb.read()); + state.intr_state.charge( + (unsigned)read()); /* Guest activity state (actv) not used by SVM */ state.actv_state.set_charged(); state.tsc.charge(Hw::Lapic::rdtsc()); - state.tsc_offset.charge(vmcb.read()); + state.tsc_offset.charge(read()); - tsc_aux_guest = Cpu::Ia32_tsc_aux::read(); - state.tsc_aux.charge(tsc_aux_guest); - Cpu::Ia32_tsc_aux::write((Cpu::Ia32_tsc_aux::access_t) tsc_aux_host); - - state.efer.charge(vmcb.efer); + state.efer.charge(efer); /* pdpte not used by SVM */ - state.star.charge(vmcb.star); - state.lstar.charge(vmcb.lstar); - state.cstar.charge(vmcb.cstar); - state.fmask.charge(vmcb.sfmask); - state.kernel_gs_base.charge(vmcb.kernel_gs_base); + state.star.charge(star); + state.lstar.charge(lstar); + state.cstar.charge(cstar); + state.fmask.charge(sfmask); + state.kernel_gs_base.charge(kernel_gs_base); /* Task Priority Register, see 15.24 */ - state.tpr.charge((unsigned) vmcb.read()); + state.tpr.charge((unsigned)read()); /* TPR threshold not used by SVM */ } -void Board::Vcpu_context::read_vcpu_state(Genode::Vcpu_state &state) +void Vmcb::read_vcpu_state(Vcpu_state &state) { - if (state.ax.charged() || state.cx.charged() || - state.dx.charged() || state.bx.charged()) { - vmcb.rax = state.ax.value(); - regs->rcx = state.cx.value(); - regs->rdx = state.dx.value(); - regs->rbx = state.bx.value(); - } + if (state.ax.charged()) rax = state.ax.value(); + if (state.flags.charged()) rflags = state.flags.value(); + if (state.sp.charged()) rsp = state.sp.value(); + if (state.ip.charged()) rip = state.ip.value(); + /* ip_len not used by SVM */ + if (state.dr7.charged()) dr7 = state.dr7.value(); - if (state.bp.charged() || state.di.charged() || state.si.charged()) { - regs->rdi = state.di.value(); - regs->rsi = state.si.value(); - regs->rbp = state.bp.value(); - } + if (state.cr0.charged()) cr0 = state.cr0.value(); + if (state.cr2.charged()) cr2 = state.cr2.value(); + if (state.cr3.charged()) cr3 = state.cr3.value(); + if (state.cr4.charged()) cr4 = state.cr4.value(); - if (state.flags.charged()) { - vmcb.rflags = state.flags.value(); - } + if (state.cs.charged()) cs = state.cs.value(); + if (state.ss.charged()) ss = state.ss.value(); - if (state.sp.charged()) { - vmcb.rsp = state.sp.value(); - } + if (state.es.charged()) es = state.es.value(); + if (state.ds.charged()) ds = state.ds.value(); - if (state.ip.charged()) { - vmcb.rip = state.ip.value(); - /* ip_len not used by SVM */ - } + if (state.fs.charged()) fs = state.fs.value(); + if (state.gs.charged()) gs = state.gs.value(); - if (state.dr7.charged()) { - vmcb.dr7 = state.dr7.value(); - } - - if (state.r8 .charged() || state.r9 .charged() || - state.r10.charged() || state.r11.charged() || - state.r12.charged() || state.r13.charged() || - state.r14.charged() || state.r15.charged()) { - - regs->r8 = state.r8.value(); - regs->r9 = state.r9.value(); - regs->r10 = state.r10.value(); - regs->r11 = state.r11.value(); - regs->r12 = state.r12.value(); - regs->r13 = state.r13.value(); - regs->r14 = state.r14.value(); - regs->r15 = state.r15.value(); - } - - if (state.cr0.charged() || state.cr2.charged() || - state.cr3.charged() || state.cr4.charged()) { - vmcb.cr0 = state.cr0.value(); - vmcb.cr2 = state.cr2.value(); - vmcb.cr3 = state.cr3.value(); - vmcb.cr4 = state.cr4.value(); - } - - if (state.cs.charged() || state.ss.charged()) { - vmcb.cs = state.cs.value(); - vmcb.ss = state.ss.value(); - } - - if (state.es.charged() || state.ds.charged()) { - vmcb.es = state.es.value(); - vmcb.ds = state.ds.value(); - } - - if (state.fs.charged() || state.gs.charged()) { - vmcb.fs = state.fs.value(); - vmcb.gs = state.gs.value(); - } - - if (state.tr.charged()) { - vmcb.tr = state.tr.value(); - } - - if (state.ldtr.charged()) { - vmcb.ldtr = state.ldtr.value(); - } + if (state.tr.charged()) tr = state.tr.value(); + if (state.ldtr.charged()) ldtr = state.ldtr.value(); if (state.gdtr.charged()) { - vmcb.gdtr.limit = state.gdtr.value().limit; - vmcb.gdtr.base = state.gdtr.value().base; + gdtr.limit = state.gdtr.value().limit; + gdtr.base = state.gdtr.value().base; } if (state.idtr.charged()) { - vmcb.idtr.limit = state.idtr.value().limit; - vmcb.idtr.base = state.idtr.value().base; + idtr.limit = state.idtr.value().limit; + idtr.base = state.idtr.value().base; } - if (state.sysenter_cs.charged() || state.sysenter_sp.charged() || - state.sysenter_ip.charged()) { - vmcb.sysenter_cs = state.sysenter_cs.value(); - vmcb.sysenter_esp = state.sysenter_sp.value(); - vmcb.sysenter_eip = state.sysenter_ip.value(); - } + if (state.sysenter_cs.charged()) sysenter_cs = state.sysenter_cs.value(); + if (state.sysenter_sp.charged()) sysenter_esp = state.sysenter_sp.value(); + if (state.sysenter_ip.charged()) sysenter_eip = state.sysenter_ip.value(); if (state.ctrl_primary.charged() || state.ctrl_secondary.charged()) { - vmcb.enforce_intercepts(state.ctrl_primary.value(), - state.ctrl_secondary.value()); + enforce_intercepts(state.ctrl_primary.value(), + state.ctrl_secondary.value()); } if (state.inj_info.charged() || state.inj_error.charged()) { /* Honor special signaling bit */ if (state.inj_info.value() & 0x1000) { - vmcb.write(1); - vmcb.write(1); - vmcb.write(1); + write(1); + write(1); + write(1); } else { - vmcb.write(0); - vmcb.write(0); - vmcb.write(0); + write(0); + write(0); + write(0); } - - vmcb.write( + write( /* Filter out special signaling bits */ (state.inj_info.value() & - (Genode::uint32_t) ~0x3000) | - (((Genode::uint64_t) state.inj_error.value()) << 32) + (uint32_t) ~0x3000) | + (((uint64_t) state.inj_error.value()) << 32) ); } if (state.intr_state.charged()) { - vmcb.write(state.intr_state.value()); + write( + state.intr_state.value()); } /* Guest activity state (actv) not used by SVM */ if (state.tsc_offset.charged()) { /* state.tsc not used by SVM */ - vmcb.write(vmcb.read() + - state.tsc_offset.value()); + write(read() + + state.tsc_offset.value()); } - tsc_aux_host = Cpu::Ia32_tsc_aux::read(); - if (state.tsc_aux.charged()) { - tsc_aux_guest = state.tsc_aux.value(); - } - Cpu::Ia32_tsc_aux::write((Cpu::Ia32_tsc_aux::access_t) tsc_aux_guest); - if (state.efer.charged()) { - vmcb.efer = state.efer.value(); + efer = state.efer.value(); } /* pdpte not used by SVM */ - if (state.star.charged() || state.lstar.charged() || - state.cstar.charged() || state.fmask.charged() || - state.kernel_gs_base.charged()) { - vmcb.star = state.star.value(); - vmcb.cstar = state.cstar.value(); - vmcb.lstar = state.lstar.value(); - vmcb.sfmask = state.lstar.value(); - vmcb.kernel_gs_base = state.kernel_gs_base.value(); - } + if (state.star.charged()) star = state.star.value(); + if (state.cstar.charged()) cstar = state.cstar.value(); + if (state.lstar.charged()) lstar = state.lstar.value(); + if (state.fmask.charged()) sfmask = state.fmask.value(); + if (state.kernel_gs_base.charged()) kernel_gs_base = state.kernel_gs_base.value(); if (state.tpr.charged()) { - vmcb.write(state.tpr.value()); + write(state.tpr.value()); /* TPR threshold not used on AMD */ } - - if (state.fpu.charged()) { - state.fpu.with_state([&] (Genode::Vcpu_state::Fpu::State const &fpu) { - memcpy((void *) regs->fpu_context(), &fpu, sizeof(fpu)); - }); - } +} + +uint64_t Vmcb::get_exitcode() +{ + enum Svm_exitcodes : uint64_t + { + SVM_EXIT_INVALID = -1ULL, + SVM_VMEXIT_INTR = 0x60, + SVM_VMEXIT_NPF = 0x400, + }; + + uint64_t exitcode = read(); + switch (exitcode) { + case SVM_EXIT_INVALID: + error("VM: invalid SVM state!"); + break; + case 0x40 ... 0x5f: + error("VM: unhandled SVM exception ", + Hex(exitcode)); + break; + case SVM_VMEXIT_INTR: + exitcode = EXIT_PAUSED; + break; + case SVM_VMEXIT_NPF: + exitcode = EXIT_NPF; + break; + default: + break; + } + + return exitcode; +} + +void Vmcb::switch_world(addr_t vmcb_phys_addr, Core::Cpu::Context ®s) +{ + /* + * We push the host context's physical address to trapno so that + * we can pop it later + */ + regs.trapno = root_vmcb_phys; + asm volatile( + "fxrstor (%[fpu_context]);" + "mov %[guest_state], %%rax;" + "mov %[regs], %%rsp;" + "popq %%r8;" + "popq %%r9;" + "popq %%r10;" + "popq %%r11;" + "popq %%r12;" + "popq %%r13;" + "popq %%r14;" + "popq %%r15;" + "add $8, %%rsp;" /* don't pop rax */ + "popq %%rbx;" + "popq %%rcx;" + "popq %%rdx;" + "popq %%rdi;" + "popq %%rsi;" + "popq %%rbp;" + "clgi;" + "sti;" + "vmload;" + "vmrun;" + "vmsave;" + "popq %%rax;" /* get the physical address of the host VMCB from + the stack */ + "vmload;" + "stgi;" /* maybe enter the kernel to handle an external interrupt + that occured ... */ + "nop;" + "cli;" /* ... otherwise, just disable interrupts again */ + "pushq %[trap_vmexit];" /* make the stack point to trapno, the right place + to jump to _kernel_entry. We push 256 because + this is outside of the valid range for interrupts + */ + "jmp _kernel_entry;" /* jump to _kernel_entry to save the + GPRs without breaking any */ + : + : [regs] "r"(®s.r8), [fpu_context] "r"(regs.fpu_context()), + [guest_state] "r"(vmcb_phys_addr), + [trap_vmexit] "i"(TRAP_VMEXIT) + : "rax", "memory"); } diff --git a/repos/base-hw/src/core/spec/x86_64/virtualization/kernel/vm.cc b/repos/base-hw/src/core/spec/x86_64/virtualization/kernel/vm.cc index 02f58894cb..b1d47c64e2 100644 --- a/repos/base-hw/src/core/spec/x86_64/virtualization/kernel/vm.cc +++ b/repos/base-hw/src/core/spec/x86_64/virtualization/kernel/vm.cc @@ -29,7 +29,8 @@ #include #include -using Genode::addr_t; +using namespace Genode; + using Kernel::Cpu; using Kernel::Vm; using Board::Vmcb; @@ -37,7 +38,7 @@ using Board::Vmcb; Vm::Vm(Irq::Pool & user_irq_pool, Cpu & cpu, - Genode::Vcpu_data & data, + Vcpu_data & data, Kernel::Signal_context & context, Identity & id) : @@ -47,7 +48,7 @@ Vm::Vm(Irq::Pool & user_irq_pool, _state(*data.vcpu_state), _context(context), _id(id), - _vcpu_context(id.id, data.virt_area, data.phys_addr) + _vcpu_context(id.id, data) { affinity(cpu); } @@ -73,13 +74,8 @@ void Vm::proceed(Cpu & cpu) Cpu::Ia32_tsc_aux::write( (Cpu::Ia32_tsc_aux::access_t)_vcpu_context.tsc_aux_guest); - /* - * We push the host context's physical address to trapno so that - * we can pop it later - */ - _vcpu_context.regs->trapno = _vcpu_context.vmcb.root_vmcb_phys; - Hypervisor::switch_world( _vcpu_context.vmcb_phys_addr, - (addr_t)&_vcpu_context.regs->r8, _vcpu_context.regs->fpu_context()); + _vcpu_context.vmcb->switch_world(_vcpu_context.vcpu_data.phys_addr, + *_vcpu_context.regs); /* * This will fall into an interrupt or otherwise jump into * _kernel_entry @@ -90,7 +86,6 @@ void Vm::proceed(Cpu & cpu) void Vm::exception(Cpu & cpu) { using namespace Board; - using Genode::Cpu_state; switch (_vcpu_context.regs->trapno) { case Cpu_state::INTERRUPTS_START ... Cpu_state::INTERRUPTS_END: @@ -103,7 +98,7 @@ void Vm::exception(Cpu & cpu) /* exception method was entered without exception */ break; default: - Genode::error("VM: triggered unknown exception ", + error("VM: triggered unknown exception ", _vcpu_context.regs->trapno, " with error code ", _vcpu_context.regs->errcode, " at ip=", @@ -113,14 +108,10 @@ void Vm::exception(Cpu & cpu) return; }; - enum Svm_exitcodes : Genode::uint64_t { - VMEXIT_INVALID = -1ULL, - VMEXIT_INTR = 0x60, - VMEXIT_NPF = 0x400, - }; - if (_vcpu_context.exitcode == EXIT_INIT) { - _vcpu_context.initialize_svm(cpu, _id.table); + addr_t table_phys_addr = + reinterpret_cast(_id.table); + _vcpu_context.initialize(cpu, table_phys_addr); _vcpu_context.tsc_aux_host = cpu.id(); _vcpu_context.exitcode = EXIT_STARTUP; _pause_vcpu(); @@ -128,26 +119,11 @@ void Vm::exception(Cpu & cpu) return; } - _vcpu_context.exitcode = _vcpu_context.vmcb.read(); + _vcpu_context.exitcode = _vcpu_context.vmcb->get_exitcode(); - switch (_vcpu_context.exitcode) { - case VMEXIT_INVALID: - Genode::error("Vm::exception: invalid SVM state!"); - return; - case 0x40 ... 0x5f: - Genode::error("Vm::exception: unhandled SVM exception ", - Genode::Hex(_vcpu_context.exitcode)); - return; - case VMEXIT_INTR: - _vcpu_context.exitcode = EXIT_PAUSED; - return; - case VMEXIT_NPF: - _vcpu_context.exitcode = EXIT_NPF; - [[fallthrough]]; - default: + if (_vcpu_context.exitcode != EXIT_PAUSED) { _pause_vcpu(); _context.submit(1); - return; } } @@ -174,13 +150,94 @@ void Vm::_sync_from_vmm() } -Board::Vcpu_context::Vcpu_context(unsigned id, - void *virt_area, - addr_t vmcb_phys_addr) +Board::Vcpu_context::Vcpu_context(unsigned id, Vcpu_data &vcpu_data) : - vmcb(*Genode::construct_at(virt_area, id)), - vmcb_phys_addr(vmcb_phys_addr), - regs(1) + regs(1), + vcpu_data(vcpu_data) { + vmcb = construct_at(vcpu_data.virt_area, id); regs->trapno = TRAP_VMEXIT; } + +void Board::Vcpu_context::read_vcpu_state(Vcpu_state &state) +{ + vmcb->read_vcpu_state(state); + + if (state.cx.charged() || state.dx.charged() || state.bx.charged()) { + regs->rax = state.ax.value(); + regs->rcx = state.cx.value(); + regs->rdx = state.dx.value(); + regs->rbx = state.bx.value(); + } + + if (state.bp.charged() || state.di.charged() || state.si.charged()) { + regs->rdi = state.di.value(); + regs->rsi = state.si.value(); + regs->rbp = state.bp.value(); + } + + if (state.r8 .charged() || state.r9 .charged() || + state.r10.charged() || state.r11.charged() || + state.r12.charged() || state.r13.charged() || + state.r14.charged() || state.r15.charged()) { + + regs->r8 = state.r8.value(); + regs->r9 = state.r9.value(); + regs->r10 = state.r10.value(); + regs->r11 = state.r11.value(); + regs->r12 = state.r12.value(); + regs->r13 = state.r13.value(); + regs->r14 = state.r14.value(); + regs->r15 = state.r15.value(); + } + + if (state.fpu.charged()) { + state.fpu.with_state( + [&](Vcpu_state::Fpu::State const &fpu) { + memcpy((void *) regs->fpu_context(), &fpu, sizeof(fpu)); + }); + } +} + +void Board::Vcpu_context::write_vcpu_state(Vcpu_state &state) +{ + state.discharge(); + state.exit_reason = (unsigned) exitcode; + + state.fpu.charge([&](Vcpu_state::Fpu::State &fpu) { + memcpy(&fpu, (void *) regs->fpu_context(), sizeof(fpu)); + }); + + /* SVM will overwrite rax but VMX doesn't. */ + state.ax.charge(regs->rax); + state.cx.charge(regs->rcx); + state.dx.charge(regs->rdx); + state.bx.charge(regs->rbx); + + state.di.charge(regs->rdi); + state.si.charge(regs->rsi); + state.bp.charge(regs->rbp); + + state.r8.charge(regs->r8); + state.r9.charge(regs->r9); + state.r10.charge(regs->r10); + state.r11.charge(regs->r11); + state.r12.charge(regs->r12); + state.r13.charge(regs->r13); + state.r14.charge(regs->r14); + state.r15.charge(regs->r15); + + state.tsc.charge(Hw::Lapic::rdtsc()); + + tsc_aux_guest = Cpu::Ia32_tsc_aux::read(); + state.tsc_aux.charge(tsc_aux_guest); + Cpu::Ia32_tsc_aux::write((Cpu::Ia32_tsc_aux::access_t) tsc_aux_host); + + vmcb->write_vcpu_state(state); +} + + +void Board::Vcpu_context::initialize(Kernel::Cpu &cpu, addr_t table_phys_addr) +{ + vmcb->initialize(cpu, table_phys_addr); +} diff --git a/repos/base-hw/src/core/spec/x86_64/virtualization/svm.h b/repos/base-hw/src/core/spec/x86_64/virtualization/svm.h index 6dce4cea24..c5d2032391 100644 --- a/repos/base-hw/src/core/spec/x86_64/virtualization/svm.h +++ b/repos/base-hw/src/core/spec/x86_64/virtualization/svm.h @@ -16,11 +16,28 @@ #include #include +#include #include #include #include #include +using Genode::addr_t; +using Genode::size_t; +using Genode::uint8_t; +using Genode::uint32_t; +using Genode::uint64_t; +using Genode::Mmio; +using Genode::Vcpu_data; +using Genode::Vcpu_state; +using Genode::get_page_size; +using Genode::memset; + +namespace Kernel +{ +class Cpu; +} + namespace Board { struct Msrpm; @@ -32,18 +49,18 @@ namespace Board } -struct alignas(Genode::get_page_size()) Board::Msrpm +struct alignas(get_page_size()) Board::Msrpm { - Genode::uint8_t pad[8192]; + uint8_t pad[8192]; Msrpm(); }; struct -alignas(Genode::get_page_size()) +alignas(get_page_size()) Board::Iopm { - Genode::uint8_t pad[12288]; + uint8_t pad[12288]; Iopm(); }; @@ -55,17 +72,17 @@ Board::Iopm */ struct Board::Vmcb_control_area { - enum : Genode::size_t { + enum : size_t { total_size = 1024U, used_guest_size = 0x3E0U }; /* The control area is padded and used via Mmio-like accesses. */ - Genode::uint8_t control_area[used_guest_size]; + uint8_t control_area[used_guest_size]; Vmcb_control_area() { - Genode::memset((void *) this, 0, sizeof(Vmcb_control_area)); + memset((void *) this, 0, sizeof(Vmcb_control_area)); } }; @@ -77,10 +94,10 @@ struct Board::Vmcb_control_area struct Board::Vmcb_reserved_for_host { /* 64bit used by the inherited Mmio class here */ - Genode::addr_t root_vmcb_phys = 0U; + addr_t root_vmcb_phys = 0U; }; static_assert(Board::Vmcb_control_area::total_size - - sizeof(Board::Vmcb_control_area) - sizeof(Genode::Mmio<0>) - + sizeof(Board::Vmcb_control_area) - sizeof(Mmio<0>) - sizeof(Board::Vmcb_reserved_for_host) == 0); @@ -89,28 +106,28 @@ static_assert(Board::Vmcb_control_area::total_size - */ struct Board::Vmcb_state_save_area { - typedef Genode::Vcpu_state::Segment Segment; + typedef Vcpu_state::Segment Segment; Segment es, cs, ss, ds, fs, gs, gdtr, ldtr, idtr, tr; - Genode::uint8_t reserved1[43]; - Genode::uint8_t cpl; - Genode::uint8_t reserved2[4]; - Genode::uint64_t efer; - Genode::uint8_t reserved3[112]; - Genode::uint64_t cr4, cr3, cr0, dr7, dr6, rflags, rip; - Genode::uint8_t reserved4[88]; - Genode::uint64_t rsp; - Genode::uint64_t s_cet, ssp, isst_addr; - Genode::uint64_t rax, star, lstar, cstar, sfmask, kernel_gs_base; - Genode::uint64_t sysenter_cs, sysenter_esp, sysenter_eip, cr2; - Genode::uint8_t reserved5[32]; - Genode::uint64_t g_pat; - Genode::uint64_t dbgctl; - Genode::uint64_t br_from; - Genode::uint64_t br_to; - Genode::uint64_t lastexcpfrom; - Genode::uint8_t reserved6[72]; - Genode::uint64_t spec_ctrl; + uint8_t reserved1[43]; + uint8_t cpl; + uint8_t reserved2[4]; + uint64_t efer; + uint8_t reserved3[112]; + uint64_t cr4, cr3, cr0, dr7, dr6, rflags, rip; + uint8_t reserved4[88]; + uint64_t rsp; + uint64_t s_cet, ssp, isst_addr; + uint64_t rax, star, lstar, cstar, sfmask, kernel_gs_base; + uint64_t sysenter_cs, sysenter_esp, sysenter_eip, cr2; + uint8_t reserved5[32]; + uint64_t g_pat; + uint64_t dbgctl; + uint64_t br_from; + uint64_t br_to; + uint64_t lastexcpfrom; + uint8_t reserved6[72]; + uint64_t spec_ctrl; } __attribute__((packed)); @@ -132,10 +149,10 @@ struct Board::Vmcb_state_save_area * In total, this allows Register type access to the VMCB control area and easy * direct access to the VMCB state save area. */ -struct alignas(Genode::get_page_size()) Board::Vmcb +struct alignas(get_page_size()) Board::Vmcb : Board::Vmcb_control_area, - public Genode::Mmio, + public Mmio, Board::Vmcb_reserved_for_host, Board::Vmcb_state_save_area { @@ -143,14 +160,19 @@ struct alignas(Genode::get_page_size()) Board::Vmcb Asid_host = 0, }; - Vmcb(Genode::uint32_t id); - void init(Genode::size_t cpu_id, void * table_ptr); - static Vmcb & host_vmcb(Genode::size_t cpu_id); - static Genode::addr_t dummy_msrpm(); - void enforce_intercepts(Genode::uint32_t desired_primary = 0U, Genode::uint32_t desired_secondary = 0U); - static Genode::addr_t dummy_iopm(); + Vmcb(uint32_t id); + static Vmcb & host_vmcb(size_t cpu_id); + static addr_t dummy_msrpm(); + void enforce_intercepts(uint32_t desired_primary = 0U, uint32_t desired_secondary = 0U); + static addr_t dummy_iopm(); - Genode::uint8_t reserved[Genode::get_page_size() - + void initialize(Kernel::Cpu &cpu, addr_t page_table_phys_addr); + void write_vcpu_state(Vcpu_state &state); + void read_vcpu_state(Vcpu_state &state); + void switch_world(addr_t vmcb_phys_addr, Core::Cpu::Context ®s); + uint64_t get_exitcode(); + + uint8_t reserved[get_page_size() - sizeof(Board::Vmcb_state_save_area) - Board::Vmcb_control_area::total_size];