diff --git a/repos/base-hw/src/core/spec/x86_64/virtualization/kernel/svm.cc b/repos/base-hw/src/core/spec/x86_64/virtualization/kernel/svm.cc index 7abcc4ea21..213cc8dcda 100644 --- a/repos/base-hw/src/core/spec/x86_64/virtualization/kernel/svm.cc +++ b/repos/base-hw/src/core/spec/x86_64/virtualization/kernel/svm.cc @@ -11,6 +11,7 @@ * under the terms of the GNU Affero General Public License version 3. */ +#include #include #include #include @@ -25,10 +26,12 @@ using Kernel::Vm; using Board::Vmcb; -Vmcb::Vmcb(uint32_t id) +Vmcb::Vmcb(addr_t vmcb_page_addr, uint32_t id) : - Mmio({(char *)this, Mmio::SIZE}) + Mmio({(char *)vmcb_page_addr, Mmio::SIZE}) { + memset((void *) vmcb_page_addr, 0, get_page_size()); + write(id); write(dummy_msrpm()); write(dummy_iopm()); @@ -37,16 +40,20 @@ Vmcb::Vmcb(uint32_t id) * Set the guest PAT register to the default value. * See: AMD Vol.2 7.8 Page-Attribute Table Mechanism */ - g_pat = 0x0007040600070406ULL; + write(0x0007040600070406ULL); } Vmcb & Vmcb::host_vmcb(size_t cpu_id) { + static uint8_t host_vmcb_pages[get_page_size() * NR_OF_CPUS]; static Constructible host_vmcb[NR_OF_CPUS]; if (!host_vmcb[cpu_id].constructed()) { - host_vmcb[cpu_id].construct(Vmcb::Asid_host); + host_vmcb[cpu_id].construct( + (addr_t) host_vmcb_pages + + get_page_size() * cpu_id, + Asid_host); } return *host_vmcb[cpu_id]; } @@ -65,7 +72,7 @@ void Vmcb::initialize(Kernel::Cpu &cpu, addr_t page_table_phys_addr) Cpu::Amd_vm_syscvg::write(amd_vm_syscvg_msr); root_vmcb_phys = - Core::Platform::core_phys_addr((addr_t)&host_vmcb(cpu.id())); + Core::Platform::core_phys_addr(host_vmcb(cpu.id()).base()); asm volatile ("vmsave" : : "a" (root_vmcb_phys) : "memory"); Cpu::Amd_vm_hsavepa::write((Cpu::Amd_vm_hsavepa::access_t) root_vmcb_phys); @@ -146,41 +153,94 @@ Board::Iopm::Iopm() void Vmcb::write_vcpu_state(Vcpu_state &state) { - typedef Vcpu_state::Range Range; - - state.ax.charge(rax); - state.ip.charge(rip); + state.ax.charge(read()); + state.ip.charge(read()); /* * SVM doesn't use ip_len, so just leave the old value. * We still have to charge it when charging ip. */ state.ip_len.set_charged(); - state.flags.charge(rflags); - state.sp.charge(rsp); + state.flags.charge(read()); + state.sp.charge(read()); - state.dr7.charge(dr7); + state.dr7.charge(read()); - state.cr0.charge(cr0); - state.cr2.charge(cr2); - state.cr3.charge(cr3); - state.cr4.charge(cr4); + state.cr0.charge(read()); + state.cr2.charge(read()); + state.cr3.charge(read()); + state.cr4.charge(read()); - state.cs.charge(cs); - state.ss.charge(ss); - state.es.charge(es); - state.ds.charge(ds); - state.fs.charge(fs); - state.gs.charge(gs); - state.tr.charge(tr); - state.ldtr.charge(ldtr); - state.gdtr.charge(Range { .limit = gdtr.limit, .base = gdtr.base }); + state.cs.charge(Vcpu_state::Segment { + .sel = Vmcb::cs.read(), + .ar = Vmcb::cs.read(), + .limit = Vmcb::cs.read(), + .base = Vmcb::cs.read(), + }); - state.idtr.charge(Range { .limit = idtr.limit, .base = idtr.base }); + state.ss.charge(Vcpu_state::Segment { + .sel = Vmcb::ss.read(), + .ar = Vmcb::ss.read(), + .limit = Vmcb::ss.read(), + .base = Vmcb::ss.read(), + }); - state.sysenter_cs.charge(sysenter_cs); - state.sysenter_sp.charge(sysenter_esp); - state.sysenter_ip.charge(sysenter_eip); + state.es.charge(Vcpu_state::Segment { + .sel = Vmcb::es.read(), + .ar = Vmcb::es.read(), + .limit = Vmcb::es.read(), + .base = Vmcb::es.read(), + }); + + state.ds.charge(Vcpu_state::Segment { + .sel = Vmcb::ds.read(), + .ar = Vmcb::ds.read(), + .limit = Vmcb::ds.read(), + .base = Vmcb::ds.read(), + }); + + state.fs.charge(Vcpu_state::Segment { + .sel = Vmcb::fs.read(), + .ar = Vmcb::fs.read(), + .limit = Vmcb::fs.read(), + .base = Vmcb::fs.read(), + }); + + state.gs.charge(Vcpu_state::Segment { + .sel = Vmcb::gs.read(), + .ar = Vmcb::gs.read(), + .limit = Vmcb::gs.read(), + .base = Vmcb::gs.read(), + }); + + state.tr.charge(Vcpu_state::Segment { + .sel = Vmcb::tr.read(), + .ar = Vmcb::tr.read(), + .limit = Vmcb::tr.read(), + .base = Vmcb::tr.read(), + }); + + state.ldtr.charge(Vcpu_state::Segment { + .sel = Vmcb::ldtr.read(), + .ar = Vmcb::ldtr.read(), + .limit = Vmcb::ldtr.read(), + .base = Vmcb::ldtr.read(), + }); + + state.gdtr.charge(Vcpu_state::Range { + .limit = Vmcb::gdtr.read(), + .base = Vmcb::gdtr.read(), + }); + + state.idtr.charge(Vcpu_state::Range { + .limit = Vmcb::idtr.read(), + .base = Vmcb::idtr.read(), + }); + + + state.sysenter_cs.charge(read()); + state.sysenter_sp.charge(read()); + state.sysenter_ip.charge(read()); state.qual_primary.charge(read()); state.qual_secondary.charge(read()); @@ -200,15 +260,15 @@ void Vmcb::write_vcpu_state(Vcpu_state &state) state.tsc.charge(Hw::Lapic::rdtsc()); state.tsc_offset.charge(read()); - state.efer.charge(efer); + state.efer.charge(read()); /* pdpte not used by SVM */ - state.star.charge(star); - state.lstar.charge(lstar); - state.cstar.charge(cstar); - state.fmask.charge(sfmask); - state.kernel_gs_base.charge(kernel_gs_base); + state.star.charge(read()); + state.lstar.charge(read()); + state.cstar.charge(read()); + state.fmask.charge(read()); + state.kernel_gs_base.charge(read()); /* Task Priority Register, see 15.24 */ state.tpr.charge((unsigned)read()); @@ -218,43 +278,92 @@ void Vmcb::write_vcpu_state(Vcpu_state &state) void Vmcb::read_vcpu_state(Vcpu_state &state) { - if (state.ax.charged()) rax = state.ax.value(); - if (state.flags.charged()) rflags = state.flags.value(); - if (state.sp.charged()) rsp = state.sp.value(); - if (state.ip.charged()) rip = state.ip.value(); + if (state.ax.charged()) write(state.ax.value()); + if (state.flags.charged()) write(state.flags.value()); + if (state.sp.charged()) write(state.sp.value()); + if (state.ip.charged()) write(state.ip.value()); /* ip_len not used by SVM */ - if (state.dr7.charged()) dr7 = state.dr7.value(); + if (state.dr7.charged()) write(state.dr7.value()); - if (state.cr0.charged()) cr0 = state.cr0.value(); - if (state.cr2.charged()) cr2 = state.cr2.value(); - if (state.cr3.charged()) cr3 = state.cr3.value(); - if (state.cr4.charged()) cr4 = state.cr4.value(); + if (state.cr0.charged()) write(state.cr0.value()); + if (state.cr2.charged()) write(state.cr2.value()); + if (state.cr3.charged()) write(state.cr3.value()); + if (state.cr4.charged()) write(state.cr4.value()); - if (state.cs.charged()) cs = state.cs.value(); - if (state.ss.charged()) ss = state.ss.value(); + if (state.cs.charged()) { + Vmcb::cs.write(state.cs.value().sel); + Vmcb::cs.write(state.cs.value().ar); + Vmcb::cs.write(state.cs.value().limit); + Vmcb::cs.write(state.cs.value().base); + } - if (state.es.charged()) es = state.es.value(); - if (state.ds.charged()) ds = state.ds.value(); + if (state.ss.charged()) { + Vmcb::ss.write(state.ss.value().sel); + Vmcb::ss.write(state.ss.value().ar); + Vmcb::ss.write(state.ss.value().limit); + Vmcb::ss.write(state.ss.value().base); + } - if (state.fs.charged()) fs = state.fs.value(); - if (state.gs.charged()) gs = state.gs.value(); + if (state.es.charged()) { + Vmcb::es.write(state.es.value().sel); + Vmcb::es.write(state.es.value().ar); + Vmcb::es.write(state.es.value().limit); + Vmcb::es.write(state.es.value().base); + } - if (state.tr.charged()) tr = state.tr.value(); - if (state.ldtr.charged()) ldtr = state.ldtr.value(); + if (state.ds.charged()) { + Vmcb::ds.write(state.ds.value().sel); + Vmcb::ds.write(state.ds.value().ar); + Vmcb::ds.write(state.ds.value().limit); + Vmcb::ds.write(state.ds.value().base); + } + + if (state.fs.charged()) { + Vmcb::gs.write(state.gs.value().sel); + Vmcb::gs.write(state.gs.value().ar); + Vmcb::gs.write(state.gs.value().limit); + Vmcb::gs.write(state.gs.value().base); + } + + if (state.gs.charged()) { + Vmcb::fs.write(state.fs.value().sel); + Vmcb::fs.write(state.fs.value().ar); + Vmcb::fs.write(state.fs.value().limit); + Vmcb::fs.write(state.fs.value().base); + } + + if (state.tr.charged()) { + Vmcb::tr.write(state.tr.value().sel); + Vmcb::tr.write(state.tr.value().ar); + Vmcb::tr.write(state.tr.value().limit); + Vmcb::tr.write(state.tr.value().base); + } + + if (state.ldtr.charged()) { + Vmcb::ldtr.write(state.ldtr.value().sel); + Vmcb::ldtr.write(state.ldtr.value().ar); + Vmcb::ldtr.write(state.ldtr.value().limit); + Vmcb::ldtr.write(state.ldtr.value().base); + } if (state.gdtr.charged()) { - gdtr.limit = state.gdtr.value().limit; - gdtr.base = state.gdtr.value().base; + Vmcb::gdtr.write(state.gdtr.value().limit); + Vmcb::gdtr.write(state.gdtr.value().base); } if (state.idtr.charged()) { - idtr.limit = state.idtr.value().limit; - idtr.base = state.idtr.value().base; + Vmcb::idtr.write(state.idtr.value().limit); + Vmcb::idtr.write(state.idtr.value().base); } - if (state.sysenter_cs.charged()) sysenter_cs = state.sysenter_cs.value(); - if (state.sysenter_sp.charged()) sysenter_esp = state.sysenter_sp.value(); - if (state.sysenter_ip.charged()) sysenter_eip = state.sysenter_ip.value(); + if (state.sysenter_cs.charged()) + write(state.sysenter_cs.value()); + + if (state.sysenter_sp.charged()) + write(state.sysenter_sp.value()); + + if (state.sysenter_ip.charged()) + write(state.sysenter_ip.value()); if (state.ctrl_primary.charged() || state.ctrl_secondary.charged()) { enforce_intercepts(state.ctrl_primary.value(), @@ -294,21 +403,21 @@ void Vmcb::read_vcpu_state(Vcpu_state &state) } if (state.efer.charged()) { - efer = state.efer.value(); + write(state.efer.value()); } /* pdpte not used by SVM */ - if (state.star.charged()) star = state.star.value(); - if (state.cstar.charged()) cstar = state.cstar.value(); - if (state.lstar.charged()) lstar = state.lstar.value(); - if (state.fmask.charged()) sfmask = state.fmask.value(); - if (state.kernel_gs_base.charged()) kernel_gs_base = state.kernel_gs_base.value(); + if (state.star.charged()) write(state.star.value()); + if (state.cstar.charged()) write(state.cstar.value()); + if (state.lstar.charged()) write(state.lstar.value()); + if (state.fmask.charged()) write(state.lstar.value()); + if (state.kernel_gs_base.charged()) + write(state.kernel_gs_base.value()); - if (state.tpr.charged()) { + if (state.tpr.charged()) write(state.tpr.value()); - /* TPR threshold not used on AMD */ - } + /* TPR threshold not used on AMD */ } uint64_t Vmcb::get_exitcode() diff --git a/repos/base-hw/src/core/spec/x86_64/virtualization/kernel/vm.cc b/repos/base-hw/src/core/spec/x86_64/virtualization/kernel/vm.cc index b1d47c64e2..4e094e2676 100644 --- a/repos/base-hw/src/core/spec/x86_64/virtualization/kernel/vm.cc +++ b/repos/base-hw/src/core/spec/x86_64/virtualization/kernel/vm.cc @@ -11,6 +11,7 @@ * under the terms of the GNU Affero General Public License version 3. */ +#include "base/internal/page_size.h" #include #include #include @@ -74,7 +75,7 @@ void Vm::proceed(Cpu & cpu) Cpu::Ia32_tsc_aux::write( (Cpu::Ia32_tsc_aux::access_t)_vcpu_context.tsc_aux_guest); - _vcpu_context.vmcb->switch_world(_vcpu_context.vcpu_data.phys_addr, + _vcpu_context.vmcb->switch_world(_vcpu_context.vcpu_data.phys_addr + get_page_size(), *_vcpu_context.regs); /* * This will fall into an interrupt or otherwise jump into @@ -155,7 +156,9 @@ Board::Vcpu_context::Vcpu_context(unsigned id, Vcpu_data &vcpu_data) regs(1), vcpu_data(vcpu_data) { - vmcb = construct_at(vcpu_data.virt_area, id); + vmcb = construct_at(vcpu_data.virt_area, + ((addr_t) vcpu_data.virt_area) + + get_page_size(), id); regs->trapno = TRAP_VMEXIT; } diff --git a/repos/base-hw/src/core/spec/x86_64/virtualization/svm.h b/repos/base-hw/src/core/spec/x86_64/virtualization/svm.h index c5d2032391..e2f3c5b801 100644 --- a/repos/base-hw/src/core/spec/x86_64/virtualization/svm.h +++ b/repos/base-hw/src/core/spec/x86_64/virtualization/svm.h @@ -42,9 +42,6 @@ namespace Board { struct Msrpm; struct Iopm; - struct Vmcb_control_area; - struct Vmcb_reserved_for_host; - struct Vmcb_state_save_area; struct Vmcb; } @@ -66,101 +63,23 @@ Board::Iopm }; - -/* - * VMCB Control area, excluding the reserved for host part - */ -struct Board::Vmcb_control_area -{ - enum : size_t { - total_size = 1024U, - used_guest_size = 0x3E0U - }; - - /* The control area is padded and used via Mmio-like accesses. */ - uint8_t control_area[used_guest_size]; - - Vmcb_control_area() - { - memset((void *) this, 0, sizeof(Vmcb_control_area)); - } -}; - - -/* - * Part of the VMCB control area that is reserved for host data. - * This uses 16 bytes less to accomodate for the size of the Mmio class. - */ -struct Board::Vmcb_reserved_for_host -{ - /* 64bit used by the inherited Mmio class here */ - addr_t root_vmcb_phys = 0U; -}; -static_assert(Board::Vmcb_control_area::total_size - - sizeof(Board::Vmcb_control_area) - sizeof(Mmio<0>) - - sizeof(Board::Vmcb_reserved_for_host) == - 0); - -/* - * AMD Manual Vol. 2, Table B-2: VMCB Layout, State Save Area - */ -struct Board::Vmcb_state_save_area -{ - typedef Vcpu_state::Segment Segment; - - Segment es, cs, ss, ds, fs, gs, gdtr, ldtr, idtr, tr; - uint8_t reserved1[43]; - uint8_t cpl; - uint8_t reserved2[4]; - uint64_t efer; - uint8_t reserved3[112]; - uint64_t cr4, cr3, cr0, dr7, dr6, rflags, rip; - uint8_t reserved4[88]; - uint64_t rsp; - uint64_t s_cet, ssp, isst_addr; - uint64_t rax, star, lstar, cstar, sfmask, kernel_gs_base; - uint64_t sysenter_cs, sysenter_esp, sysenter_eip, cr2; - uint8_t reserved5[32]; - uint64_t g_pat; - uint64_t dbgctl; - uint64_t br_from; - uint64_t br_to; - uint64_t lastexcpfrom; - uint8_t reserved6[72]; - uint64_t spec_ctrl; -} __attribute__((packed)); - - /* * VMCB data structure * See: AMD Manual Vol. 2, Appendix B Layout of VMCB - * - * We construct the VMCB by inheriting from its components. Inheritance is used - * instead of making the components members of the overall structure in order to - * present the interface on the top level. Order of inheritance is important! - * - * The Mmio interface is inherited from on this level to achieve the desired - * placement of the Mmio data member address in the host data part and after the - * VMCB control area data. - * The remaining part of the VMCB control area that is reserved for host data - * is then inherited from after the Mmio interface. - * Lastly, the VMCB state save area is inherited from to make its members - * directly available in the VCMB structure. - * In total, this allows Register type access to the VMCB control area and easy - * direct access to the VMCB state save area. */ -struct alignas(get_page_size()) Board::Vmcb +struct Board::Vmcb : - Board::Vmcb_control_area, - public Mmio, - Board::Vmcb_reserved_for_host, - Board::Vmcb_state_save_area + public Mmio { enum { - Asid_host = 0, + Asid_host = 0, + State_off = 1024, }; - Vmcb(uint32_t id); + + addr_t root_vmcb_phys = { 0 }; + + Vmcb(addr_t vmcb_page_addr, uint32_t id); static Vmcb & host_vmcb(size_t cpu_id); static addr_t dummy_msrpm(); void enforce_intercepts(uint32_t desired_primary = 0U, uint32_t desired_secondary = 0U); @@ -172,10 +91,6 @@ struct alignas(get_page_size()) Board::Vmcb void switch_world(addr_t vmcb_phys_addr, Core::Cpu::Context ®s); uint64_t get_exitcode(); - uint8_t reserved[get_page_size() - - sizeof(Board::Vmcb_state_save_area) - - Board::Vmcb_control_area::total_size]; - /* * AMD Manual Vol. 2, Table B-1: VMCB Layout, Control Area */ @@ -351,6 +266,56 @@ struct alignas(get_page_size()) Board::Vmcb struct Vmsa : Register<0x108,64> { struct Vmsa_ptr : Bitfield<12,52> { }; }; -} __attribute__((packed)); + + + /* + * AMD Manual Vol. 2, Table B-2: VMCB Layout, State Save Area + */ + + /* + * Segments are 128bit in size and therefore cannot be represented with + * the current Register Framework. + */ + struct Segment : public Mmio<128> + { + using Mmio<128>::Mmio; + + struct Sel : Register<0x0,16> { }; + struct Ar : Register<0x2,16> { }; + struct Limit : Register<0x4,32> { }; + struct Base : Register<0x8,64> { }; + }; + + Segment es { range_at(State_off + 0x0) }; + Segment cs { range_at(State_off + 0x10) }; + Segment ss { range_at(State_off + 0x20) }; + Segment ds { range_at(State_off + 0x30) }; + Segment fs { range_at(State_off + 0x40) }; + Segment gs { range_at(State_off + 0x50) }; + Segment gdtr { range_at(State_off + 0x60) }; + Segment ldtr { range_at(State_off + 0x70) }; + Segment idtr { range_at(State_off + 0x80) }; + Segment tr { range_at(State_off + 0x90) }; + + struct Efer : Register { }; + struct Cr4 : Register { }; + struct Cr3 : Register { }; + struct Cr0 : Register { }; + struct Dr7 : Register { }; + struct Rflags : Register { }; + struct Rip : Register { }; + struct Rsp : Register { }; + struct Rax : Register { }; + struct Star : Register { }; + struct Lstar : Register { }; + struct Cstar : Register { }; + struct Sfmask : Register { }; + struct Kernel_gs_base : Register { }; + struct Sysenter_cs : Register { }; + struct Sysenter_esp : Register { }; + struct Sysenter_eip : Register { }; + struct Cr2 : Register { }; + struct G_pat : Register { }; +}; #endif /* _INCLUDE__SPEC__PC__SVM_H_ */