1 /*
2  * vmcs.c: VMCS management
3  * Copyright (c) 2004, Intel Corporation.
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms and conditions of the GNU General Public License,
7  * version 2, as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
12  * more details.
13  *
14  * You should have received a copy of the GNU General Public License along with
15  * this program; If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include <xen/init.h>
19 #include <xen/mm.h>
20 #include <xen/lib.h>
21 #include <xen/param.h>
22 #include <xen/errno.h>
23 #include <xen/domain_page.h>
24 #include <xen/event.h>
25 #include <xen/kernel.h>
26 #include <xen/keyhandler.h>
27 #include <xen/vm_event.h>
28 #include <asm/current.h>
29 #include <asm/cpufeature.h>
30 #include <asm/processor.h>
31 #include <asm/msr.h>
32 #include <asm/xstate.h>
33 #include <asm/hvm/hvm.h>
34 #include <asm/hvm/io.h>
35 #include <asm/hvm/nestedhvm.h>
36 #include <asm/hvm/support.h>
37 #include <asm/hvm/vmx/vmx.h>
38 #include <asm/hvm/vmx/vvmx.h>
39 #include <asm/hvm/vmx/vmcs.h>
40 #include <asm/flushtlb.h>
41 #include <asm/monitor.h>
42 #include <asm/shadow.h>
43 #include <asm/spec_ctrl.h>
44 #include <asm/tboot.h>
45 #include <asm/apic.h>
46 
47 static bool_t __read_mostly opt_vpid_enabled = 1;
48 boolean_param("vpid", opt_vpid_enabled);
49 
50 static bool_t __read_mostly opt_unrestricted_guest_enabled = 1;
51 boolean_param("unrestricted_guest", opt_unrestricted_guest_enabled);
52 
53 static bool_t __read_mostly opt_apicv_enabled = 1;
54 boolean_param("apicv", opt_apicv_enabled);
55 
56 /*
57  * These two parameters are used to config the controls for Pause-Loop Exiting:
58  * ple_gap:    upper bound on the amount of time between two successive
59  *             executions of PAUSE in a loop.
60  * ple_window: upper bound on the amount of time a guest is allowed to execute
61  *             in a PAUSE loop.
62  * Time is measured based on a counter that runs at the same rate as the TSC,
63  * refer SDM volume 3b section 21.6.13 & 22.1.3.
64  */
65 static unsigned int __read_mostly ple_gap = 128;
66 integer_param("ple_gap", ple_gap);
67 static unsigned int __read_mostly ple_window = 4096;
68 integer_param("ple_window", ple_window);
69 
70 static bool __read_mostly opt_ept_pml = true;
71 static s8 __read_mostly opt_ept_ad = -1;
72 int8_t __read_mostly opt_ept_exec_sp = -1;
73 
parse_ept_param(const char * s)74 static int __init parse_ept_param(const char *s)
75 {
76     const char *ss;
77     int val, rc = 0;
78 
79     do {
80         ss = strchr(s, ',');
81         if ( !ss )
82             ss = strchr(s, '\0');
83 
84         if ( (val = parse_boolean("ad", s, ss)) >= 0 )
85             opt_ept_ad = val;
86         else if ( (val = parse_boolean("pml", s, ss)) >= 0 )
87             opt_ept_pml = val;
88         else if ( (val = parse_boolean("exec-sp", s, ss)) >= 0 )
89             opt_ept_exec_sp = val;
90         else
91             rc = -EINVAL;
92 
93         s = ss + 1;
94     } while ( *ss );
95 
96     return rc;
97 }
98 custom_param("ept", parse_ept_param);
99 
100 #ifdef CONFIG_HYPFS
101 static char opt_ept_setting[10];
102 
update_ept_param(void)103 static void update_ept_param(void)
104 {
105     if ( opt_ept_exec_sp >= 0 )
106         snprintf(opt_ept_setting, sizeof(opt_ept_setting), "exec-sp=%d",
107                  opt_ept_exec_sp);
108 }
109 
init_ept_param(struct param_hypfs * par)110 static void __init init_ept_param(struct param_hypfs *par)
111 {
112     update_ept_param();
113     custom_runtime_set_var(par, opt_ept_setting);
114 }
115 
116 static int parse_ept_param_runtime(const char *s);
117 custom_runtime_only_param("ept", parse_ept_param_runtime, init_ept_param);
118 
parse_ept_param_runtime(const char * s)119 static int parse_ept_param_runtime(const char *s)
120 {
121     struct domain *d;
122     int val;
123 
124     if ( !cpu_has_vmx_ept || !hvm_funcs.hap_supported ||
125          !(hvm_funcs.hap_capabilities &
126            (HVM_HAP_SUPERPAGE_2MB | HVM_HAP_SUPERPAGE_1GB)) )
127     {
128         printk("VMX: EPT not available, or not in use - ignoring\n");
129         return 0;
130     }
131 
132     if ( (val = parse_boolean("exec-sp", s, NULL)) < 0 )
133         return -EINVAL;
134 
135     opt_ept_exec_sp = val;
136 
137     update_ept_param();
138     custom_runtime_set_var(param_2_parfs(parse_ept_param_runtime),
139                            opt_ept_setting);
140 
141     rcu_read_lock(&domlist_read_lock);
142     for_each_domain ( d )
143     {
144         /* PV, or HVM Shadow domain?  Not applicable. */
145         if ( !paging_mode_hap(d) )
146             continue;
147 
148         /* Hardware domain? Not applicable. */
149         if ( is_hardware_domain(d) )
150             continue;
151 
152         /* Nested Virt?  Broken and exec_sp forced on to avoid livelocks. */
153         if ( nestedhvm_enabled(d) )
154             continue;
155 
156         /* Setting already matches?  No need to rebuild the p2m. */
157         if ( d->arch.hvm.vmx.exec_sp == val )
158             continue;
159 
160         d->arch.hvm.vmx.exec_sp = val;
161         p2m_change_entry_type_global(d, p2m_ram_rw, p2m_ram_rw);
162     }
163     rcu_read_unlock(&domlist_read_lock);
164 
165     printk("VMX: EPT executable superpages %sabled\n",
166            val ? "en" : "dis");
167 
168     return 0;
169 }
170 #endif
171 
172 /* Dynamic (run-time adjusted) execution control flags. */
173 u32 vmx_pin_based_exec_control __read_mostly;
174 u32 vmx_cpu_based_exec_control __read_mostly;
175 u32 vmx_secondary_exec_control __read_mostly;
176 u32 vmx_vmexit_control __read_mostly;
177 u32 vmx_vmentry_control __read_mostly;
178 u64 vmx_ept_vpid_cap __read_mostly;
179 u64 vmx_vmfunc __read_mostly;
180 bool_t vmx_virt_exception __read_mostly;
181 
182 static DEFINE_PER_CPU_READ_MOSTLY(paddr_t, vmxon_region);
183 static DEFINE_PER_CPU(paddr_t, current_vmcs);
184 static DEFINE_PER_CPU(struct list_head, active_vmcs_list);
185 DEFINE_PER_CPU(bool_t, vmxon);
186 
187 static u32 vmcs_revision_id __read_mostly;
188 u64 __read_mostly vmx_basic_msr;
189 
vmx_display_features(void)190 static void __init vmx_display_features(void)
191 {
192     int printed = 0;
193 
194     printk("VMX: Supported advanced features:\n");
195 
196 #define P(p,s) if ( p ) { printk(" - %s\n", s); printed = 1; }
197     P(cpu_has_vmx_virtualize_apic_accesses, "APIC MMIO access virtualisation");
198     P(cpu_has_vmx_tpr_shadow, "APIC TPR shadow");
199     P(cpu_has_vmx_ept, "Extended Page Tables (EPT)");
200     P(cpu_has_vmx_vpid, "Virtual-Processor Identifiers (VPID)");
201     P(cpu_has_vmx_vnmi, "Virtual NMI");
202     P(cpu_has_vmx_msr_bitmap, "MSR direct-access bitmap");
203     P(cpu_has_vmx_unrestricted_guest, "Unrestricted Guest");
204     P(cpu_has_vmx_apic_reg_virt, "APIC Register Virtualization");
205     P(cpu_has_vmx_virtual_intr_delivery, "Virtual Interrupt Delivery");
206     P(cpu_has_vmx_posted_intr_processing, "Posted Interrupt Processing");
207     P(cpu_has_vmx_vmcs_shadowing, "VMCS shadowing");
208     P(cpu_has_vmx_vmfunc, "VM Functions");
209     P(cpu_has_vmx_virt_exceptions, "Virtualisation Exceptions");
210     P(cpu_has_vmx_pml, "Page Modification Logging");
211     P(cpu_has_vmx_tsc_scaling, "TSC Scaling");
212 #undef P
213 
214     if ( !printed )
215         printk(" - none\n");
216 }
217 
adjust_vmx_controls(const char * name,u32 ctl_min,u32 ctl_opt,u32 msr,bool_t * mismatch)218 static u32 adjust_vmx_controls(
219     const char *name, u32 ctl_min, u32 ctl_opt, u32 msr, bool_t *mismatch)
220 {
221     u32 vmx_msr_low, vmx_msr_high, ctl = ctl_min | ctl_opt;
222 
223     rdmsr(msr, vmx_msr_low, vmx_msr_high);
224 
225     ctl &= vmx_msr_high; /* bit == 0 in high word ==> must be zero */
226     ctl |= vmx_msr_low;  /* bit == 1 in low word  ==> must be one  */
227 
228     /* Ensure minimum (required) set of control bits are supported. */
229     if ( ctl_min & ~ctl )
230     {
231         *mismatch = 1;
232         printk("VMX: CPU%d has insufficient %s (%08x; requires %08x)\n",
233                smp_processor_id(), name, ctl, ctl_min);
234     }
235 
236     return ctl;
237 }
238 
cap_check(const char * name,u32 expected,u32 saw)239 static bool_t cap_check(const char *name, u32 expected, u32 saw)
240 {
241     if ( saw != expected )
242         printk("VMX %s: saw %#x expected %#x\n", name, saw, expected);
243     return saw != expected;
244 }
245 
vmx_init_vmcs_config(void)246 static int vmx_init_vmcs_config(void)
247 {
248     u32 vmx_basic_msr_low, vmx_basic_msr_high, min, opt;
249     u32 _vmx_pin_based_exec_control;
250     u32 _vmx_cpu_based_exec_control;
251     u32 _vmx_secondary_exec_control = 0;
252     u64 _vmx_ept_vpid_cap = 0;
253     u64 _vmx_misc_cap = 0;
254     u32 _vmx_vmexit_control;
255     u32 _vmx_vmentry_control;
256     u64 _vmx_vmfunc = 0;
257     bool_t mismatch = 0;
258 
259     rdmsr(MSR_IA32_VMX_BASIC, vmx_basic_msr_low, vmx_basic_msr_high);
260 
261     min = (PIN_BASED_EXT_INTR_MASK |
262            PIN_BASED_NMI_EXITING);
263     opt = (PIN_BASED_VIRTUAL_NMIS |
264            PIN_BASED_POSTED_INTERRUPT);
265     _vmx_pin_based_exec_control = adjust_vmx_controls(
266         "Pin-Based Exec Control", min, opt,
267         MSR_IA32_VMX_PINBASED_CTLS, &mismatch);
268 
269     min = (CPU_BASED_HLT_EXITING |
270            CPU_BASED_VIRTUAL_INTR_PENDING |
271            CPU_BASED_CR8_LOAD_EXITING |
272            CPU_BASED_CR8_STORE_EXITING |
273            CPU_BASED_INVLPG_EXITING |
274            CPU_BASED_CR3_LOAD_EXITING |
275            CPU_BASED_CR3_STORE_EXITING |
276            CPU_BASED_MONITOR_EXITING |
277            CPU_BASED_MWAIT_EXITING |
278            CPU_BASED_MOV_DR_EXITING |
279            CPU_BASED_ACTIVATE_IO_BITMAP |
280            CPU_BASED_USE_TSC_OFFSETING |
281            CPU_BASED_RDTSC_EXITING);
282     opt = (CPU_BASED_ACTIVATE_MSR_BITMAP |
283            CPU_BASED_TPR_SHADOW |
284            CPU_BASED_MONITOR_TRAP_FLAG |
285            CPU_BASED_ACTIVATE_SECONDARY_CONTROLS);
286     _vmx_cpu_based_exec_control = adjust_vmx_controls(
287         "CPU-Based Exec Control", min, opt,
288         MSR_IA32_VMX_PROCBASED_CTLS, &mismatch);
289     _vmx_cpu_based_exec_control &= ~CPU_BASED_RDTSC_EXITING;
290     if ( _vmx_cpu_based_exec_control & CPU_BASED_TPR_SHADOW )
291         _vmx_cpu_based_exec_control &=
292             ~(CPU_BASED_CR8_LOAD_EXITING | CPU_BASED_CR8_STORE_EXITING);
293 
294     if ( _vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS )
295     {
296         min = 0;
297         opt = (SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
298                SECONDARY_EXEC_WBINVD_EXITING |
299                SECONDARY_EXEC_ENABLE_EPT |
300                SECONDARY_EXEC_DESCRIPTOR_TABLE_EXITING |
301                SECONDARY_EXEC_ENABLE_RDTSCP |
302                SECONDARY_EXEC_PAUSE_LOOP_EXITING |
303                SECONDARY_EXEC_ENABLE_INVPCID |
304                SECONDARY_EXEC_ENABLE_VM_FUNCTIONS |
305                SECONDARY_EXEC_ENABLE_VIRT_EXCEPTIONS |
306                SECONDARY_EXEC_XSAVES |
307                SECONDARY_EXEC_TSC_SCALING);
308         rdmsrl(MSR_IA32_VMX_MISC, _vmx_misc_cap);
309         if ( _vmx_misc_cap & VMX_MISC_VMWRITE_ALL )
310             opt |= SECONDARY_EXEC_ENABLE_VMCS_SHADOWING;
311         if ( opt_vpid_enabled )
312             opt |= SECONDARY_EXEC_ENABLE_VPID;
313         if ( opt_unrestricted_guest_enabled )
314             opt |= SECONDARY_EXEC_UNRESTRICTED_GUEST;
315         if ( opt_ept_pml )
316             opt |= SECONDARY_EXEC_ENABLE_PML;
317 
318         /*
319          * "APIC Register Virtualization" and "Virtual Interrupt Delivery"
320          * can be set only when "use TPR shadow" is set
321          */
322         if ( (_vmx_cpu_based_exec_control & CPU_BASED_TPR_SHADOW) &&
323              opt_apicv_enabled )
324             opt |= SECONDARY_EXEC_APIC_REGISTER_VIRT |
325                    SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
326                    SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
327 
328         _vmx_secondary_exec_control = adjust_vmx_controls(
329             "Secondary Exec Control", min, opt,
330             MSR_IA32_VMX_PROCBASED_CTLS2, &mismatch);
331     }
332 
333     /* The IA32_VMX_EPT_VPID_CAP MSR exists only when EPT or VPID available */
334     if ( _vmx_secondary_exec_control & (SECONDARY_EXEC_ENABLE_EPT |
335                                         SECONDARY_EXEC_ENABLE_VPID) )
336     {
337         rdmsrl(MSR_IA32_VMX_EPT_VPID_CAP, _vmx_ept_vpid_cap);
338 
339         if ( !opt_ept_ad )
340             _vmx_ept_vpid_cap &= ~VMX_EPT_AD_BIT;
341 
342         /*
343          * Additional sanity checking before using EPT:
344          * 1) the CPU we are running on must support EPT WB, as we will set
345          *    ept paging structures memory type to WB;
346          * 2) the CPU must support the EPT page-walk length of 4 according to
347          *    Intel SDM 25.2.2.
348          * 3) the CPU must support INVEPT all context invalidation, because we
349          *    will use it as final resort if other types are not supported.
350          *
351          * Or we just don't use EPT.
352          */
353         if ( !(_vmx_ept_vpid_cap & VMX_EPT_MEMORY_TYPE_WB) ||
354              !(_vmx_ept_vpid_cap & VMX_EPT_WALK_LENGTH_4_SUPPORTED) ||
355              !(_vmx_ept_vpid_cap & VMX_EPT_INVEPT_ALL_CONTEXT) )
356             _vmx_secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
357 
358         /*
359          * the CPU must support INVVPID all context invalidation, because we
360          * will use it as final resort if other types are not supported.
361          *
362          * Or we just don't use VPID.
363          */
364         if ( !(_vmx_ept_vpid_cap & VMX_VPID_INVVPID_ALL_CONTEXT) )
365             _vmx_secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_VPID;
366 
367         /* EPT A/D bits is required for PML */
368         if ( !(_vmx_ept_vpid_cap & VMX_EPT_AD_BIT) )
369             _vmx_secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
370     }
371 
372     if ( _vmx_secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT )
373     {
374         /*
375          * To use EPT we expect to be able to clear certain intercepts.
376          * We check VMX_BASIC_MSR[55] to correctly handle default controls.
377          */
378         uint32_t must_be_one, must_be_zero, msr = MSR_IA32_VMX_PROCBASED_CTLS;
379         if ( vmx_basic_msr_high & (VMX_BASIC_DEFAULT1_ZERO >> 32) )
380             msr = MSR_IA32_VMX_TRUE_PROCBASED_CTLS;
381         rdmsr(msr, must_be_one, must_be_zero);
382         if ( must_be_one & (CPU_BASED_INVLPG_EXITING |
383                             CPU_BASED_CR3_LOAD_EXITING |
384                             CPU_BASED_CR3_STORE_EXITING) )
385             _vmx_secondary_exec_control &=
386                 ~(SECONDARY_EXEC_ENABLE_EPT |
387                   SECONDARY_EXEC_UNRESTRICTED_GUEST);
388     }
389 
390     /* PML cannot be supported if EPT is not used */
391     if ( !(_vmx_secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT) )
392         _vmx_secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
393 
394     /* Turn off opt_ept_pml if PML feature is not present. */
395     if ( !(_vmx_secondary_exec_control & SECONDARY_EXEC_ENABLE_PML) )
396         opt_ept_pml = false;
397 
398     if ( (_vmx_secondary_exec_control & SECONDARY_EXEC_PAUSE_LOOP_EXITING) &&
399           ple_gap == 0 )
400     {
401         if ( !vmx_pin_based_exec_control )
402             printk(XENLOG_INFO "Disable Pause-Loop Exiting.\n");
403         _vmx_secondary_exec_control &= ~ SECONDARY_EXEC_PAUSE_LOOP_EXITING;
404     }
405 
406     min = VM_EXIT_ACK_INTR_ON_EXIT;
407     opt = (VM_EXIT_SAVE_GUEST_PAT | VM_EXIT_LOAD_HOST_PAT |
408            VM_EXIT_LOAD_HOST_EFER | VM_EXIT_CLEAR_BNDCFGS);
409     min |= VM_EXIT_IA32E_MODE;
410     _vmx_vmexit_control = adjust_vmx_controls(
411         "VMExit Control", min, opt, MSR_IA32_VMX_EXIT_CTLS, &mismatch);
412 
413     /*
414      * "Process posted interrupt" can be set only when "virtual-interrupt
415      * delivery" and "acknowledge interrupt on exit" is set. For the latter
416      * is a minimal requirement, only check the former, which is optional.
417      */
418     if ( !(_vmx_secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) )
419         _vmx_pin_based_exec_control &= ~PIN_BASED_POSTED_INTERRUPT;
420 
421     if ( iommu_intpost &&
422          !(_vmx_pin_based_exec_control & PIN_BASED_POSTED_INTERRUPT) )
423     {
424         printk("Intel VT-d Posted Interrupt is disabled for CPU-side Posted "
425                "Interrupt is not enabled\n");
426         iommu_intpost = 0;
427     }
428 
429     /* The IA32_VMX_VMFUNC MSR exists only when VMFUNC is available */
430     if ( _vmx_secondary_exec_control & SECONDARY_EXEC_ENABLE_VM_FUNCTIONS )
431     {
432         rdmsrl(MSR_IA32_VMX_VMFUNC, _vmx_vmfunc);
433 
434         /*
435          * VMFUNC leaf 0 (EPTP switching) must be supported.
436          *
437          * Or we just don't use VMFUNC.
438          */
439         if ( !(_vmx_vmfunc & VMX_VMFUNC_EPTP_SWITCHING) )
440             _vmx_secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_VM_FUNCTIONS;
441     }
442 
443     /* Virtualization exceptions are only enabled if VMFUNC is enabled */
444     if ( !(_vmx_secondary_exec_control & SECONDARY_EXEC_ENABLE_VM_FUNCTIONS) )
445         _vmx_secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_VIRT_EXCEPTIONS;
446 
447     min = 0;
448     opt = (VM_ENTRY_LOAD_GUEST_PAT | VM_ENTRY_LOAD_GUEST_EFER |
449            VM_ENTRY_LOAD_BNDCFGS);
450     _vmx_vmentry_control = adjust_vmx_controls(
451         "VMEntry Control", min, opt, MSR_IA32_VMX_ENTRY_CTLS, &mismatch);
452 
453     if ( mismatch )
454         return -EINVAL;
455 
456     if ( !vmx_pin_based_exec_control )
457     {
458         /* First time through. */
459         vmcs_revision_id           = vmx_basic_msr_low & VMX_BASIC_REVISION_MASK;
460         vmx_pin_based_exec_control = _vmx_pin_based_exec_control;
461         vmx_cpu_based_exec_control = _vmx_cpu_based_exec_control;
462         vmx_secondary_exec_control = _vmx_secondary_exec_control;
463         vmx_ept_vpid_cap           = _vmx_ept_vpid_cap;
464         vmx_vmexit_control         = _vmx_vmexit_control;
465         vmx_vmentry_control        = _vmx_vmentry_control;
466         vmx_basic_msr              = ((u64)vmx_basic_msr_high << 32) |
467                                      vmx_basic_msr_low;
468         vmx_vmfunc                 = _vmx_vmfunc;
469         vmx_virt_exception         = !!(_vmx_secondary_exec_control &
470                                        SECONDARY_EXEC_ENABLE_VIRT_EXCEPTIONS);
471         vmx_display_features();
472 
473         /* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */
474         if ( (vmx_basic_msr_high & (VMX_BASIC_VMCS_SIZE_MASK >> 32)) >
475              PAGE_SIZE )
476         {
477             printk("VMX: CPU%d VMCS size is too big (%Lu bytes)\n",
478                    smp_processor_id(),
479                    vmx_basic_msr_high & (VMX_BASIC_VMCS_SIZE_MASK >> 32));
480             return -EINVAL;
481         }
482     }
483     else
484     {
485         /* Globals are already initialised: re-check them. */
486         mismatch |= cap_check(
487             "VMCS revision ID",
488             vmcs_revision_id, vmx_basic_msr_low & VMX_BASIC_REVISION_MASK);
489         mismatch |= cap_check(
490             "Pin-Based Exec Control",
491             vmx_pin_based_exec_control, _vmx_pin_based_exec_control);
492         mismatch |= cap_check(
493             "CPU-Based Exec Control",
494             vmx_cpu_based_exec_control, _vmx_cpu_based_exec_control);
495         mismatch |= cap_check(
496             "Secondary Exec Control",
497             vmx_secondary_exec_control, _vmx_secondary_exec_control);
498         mismatch |= cap_check(
499             "VMExit Control",
500             vmx_vmexit_control, _vmx_vmexit_control);
501         mismatch |= cap_check(
502             "VMEntry Control",
503             vmx_vmentry_control, _vmx_vmentry_control);
504         mismatch |= cap_check(
505             "EPT and VPID Capability",
506             vmx_ept_vpid_cap, _vmx_ept_vpid_cap);
507         mismatch |= cap_check(
508             "VMFUNC Capability",
509             vmx_vmfunc, _vmx_vmfunc);
510         if ( cpu_has_vmx_ins_outs_instr_info !=
511              !!(vmx_basic_msr_high & (VMX_BASIC_INS_OUT_INFO >> 32)) )
512         {
513             printk("VMX INS/OUTS Instruction Info: saw %d expected %d\n",
514                    !!(vmx_basic_msr_high & (VMX_BASIC_INS_OUT_INFO >> 32)),
515                    cpu_has_vmx_ins_outs_instr_info);
516             mismatch = 1;
517         }
518         if ( (vmx_basic_msr_high & (VMX_BASIC_VMCS_SIZE_MASK >> 32)) !=
519              ((vmx_basic_msr & VMX_BASIC_VMCS_SIZE_MASK) >> 32) )
520         {
521             printk("VMX: CPU%d unexpected VMCS size %Lu\n",
522                    smp_processor_id(),
523                    vmx_basic_msr_high & (VMX_BASIC_VMCS_SIZE_MASK >> 32));
524             mismatch = 1;
525         }
526         if ( mismatch )
527         {
528             printk("VMX: Capabilities fatally differ between CPU%d and CPU0\n",
529                    smp_processor_id());
530             return -EINVAL;
531         }
532     }
533 
534     /* IA-32 SDM Vol 3B: 64-bit CPUs always have VMX_BASIC_MSR[48]==0. */
535     if ( vmx_basic_msr_high & (VMX_BASIC_32BIT_ADDRESSES >> 32) )
536     {
537         printk("VMX: CPU%d limits VMX structure pointers to 32 bits\n",
538                smp_processor_id());
539         return -EINVAL;
540     }
541 
542     /* Require Write-Back (WB) memory type for VMCS accesses. */
543     opt = (vmx_basic_msr_high & (VMX_BASIC_MEMORY_TYPE_MASK >> 32)) /
544           ((VMX_BASIC_MEMORY_TYPE_MASK & -VMX_BASIC_MEMORY_TYPE_MASK) >> 32);
545     if ( opt != MTRR_TYPE_WRBACK )
546     {
547         printk("VMX: CPU%d has unexpected VMCS access type %u\n",
548                smp_processor_id(), opt);
549         return -EINVAL;
550     }
551 
552     return 0;
553 }
554 
vmx_alloc_vmcs(void)555 static paddr_t vmx_alloc_vmcs(void)
556 {
557     struct page_info *pg;
558     struct vmcs_struct *vmcs;
559 
560     if ( (pg = alloc_domheap_page(NULL, 0)) == NULL )
561     {
562         gdprintk(XENLOG_WARNING, "Failed to allocate VMCS.\n");
563         return 0;
564     }
565 
566     vmcs = __map_domain_page(pg);
567     clear_page(vmcs);
568     vmcs->vmcs_revision_id = vmcs_revision_id;
569     unmap_domain_page(vmcs);
570 
571     return page_to_maddr(pg);
572 }
573 
vmx_free_vmcs(paddr_t pa)574 static void vmx_free_vmcs(paddr_t pa)
575 {
576     free_domheap_page(maddr_to_page(pa));
577 }
578 
__vmx_clear_vmcs(void * info)579 static void __vmx_clear_vmcs(void *info)
580 {
581     struct vcpu *v = info;
582     struct vmx_vcpu *vmx = &v->arch.hvm.vmx;
583 
584     /* Otherwise we can nest (vmx_cpu_down() vs. vmx_clear_vmcs()). */
585     ASSERT(!local_irq_is_enabled());
586 
587     if ( vmx->active_cpu == smp_processor_id() )
588     {
589         __vmpclear(vmx->vmcs_pa);
590         if ( vmx->vmcs_shadow_maddr )
591             __vmpclear(vmx->vmcs_shadow_maddr);
592 
593         vmx->active_cpu = -1;
594         vmx->launched   = 0;
595 
596         list_del(&vmx->active_list);
597 
598         if ( vmx->vmcs_pa == this_cpu(current_vmcs) )
599             this_cpu(current_vmcs) = 0;
600     }
601 }
602 
vmx_clear_vmcs(struct vcpu * v)603 static void vmx_clear_vmcs(struct vcpu *v)
604 {
605     int cpu = v->arch.hvm.vmx.active_cpu;
606 
607     if ( cpu != -1 )
608         on_selected_cpus(cpumask_of(cpu), __vmx_clear_vmcs, v, 1);
609 }
610 
vmx_load_vmcs(struct vcpu * v)611 static void vmx_load_vmcs(struct vcpu *v)
612 {
613     unsigned long flags;
614 
615     local_irq_save(flags);
616 
617     if ( v->arch.hvm.vmx.active_cpu == -1 )
618     {
619         list_add(&v->arch.hvm.vmx.active_list, &this_cpu(active_vmcs_list));
620         v->arch.hvm.vmx.active_cpu = smp_processor_id();
621     }
622 
623     ASSERT(v->arch.hvm.vmx.active_cpu == smp_processor_id());
624 
625     __vmptrld(v->arch.hvm.vmx.vmcs_pa);
626     this_cpu(current_vmcs) = v->arch.hvm.vmx.vmcs_pa;
627 
628     local_irq_restore(flags);
629 }
630 
vmx_vmcs_reload(struct vcpu * v)631 void vmx_vmcs_reload(struct vcpu *v)
632 {
633     /*
634      * As we may be running with interrupts disabled, we can't acquire
635      * v->arch.hvm.vmx.vmcs_lock here. However, with interrupts disabled
636      * the VMCS can't be taken away from us anymore if we still own it.
637      */
638     ASSERT(v->is_running || !local_irq_is_enabled());
639     if ( v->arch.hvm.vmx.vmcs_pa == this_cpu(current_vmcs) )
640         return;
641 
642     vmx_load_vmcs(v);
643 }
644 
vmx_cpu_up_prepare(unsigned int cpu)645 int vmx_cpu_up_prepare(unsigned int cpu)
646 {
647     /*
648      * If nvmx_cpu_up_prepare() failed, do not return failure and just fallback
649      * to legacy mode for vvmcs synchronization.
650      */
651     if ( nvmx_cpu_up_prepare(cpu) != 0 )
652         printk("CPU%d: Could not allocate virtual VMCS buffer.\n", cpu);
653 
654     if ( per_cpu(vmxon_region, cpu) )
655         return 0;
656 
657     per_cpu(vmxon_region, cpu) = vmx_alloc_vmcs();
658     if ( per_cpu(vmxon_region, cpu) )
659         return 0;
660 
661     printk("CPU%d: Could not allocate host VMCS\n", cpu);
662     nvmx_cpu_dead(cpu);
663     return -ENOMEM;
664 }
665 
vmx_cpu_dead(unsigned int cpu)666 void vmx_cpu_dead(unsigned int cpu)
667 {
668     vmx_free_vmcs(per_cpu(vmxon_region, cpu));
669     per_cpu(vmxon_region, cpu) = 0;
670     nvmx_cpu_dead(cpu);
671     vmx_pi_desc_fixup(cpu);
672 }
673 
_vmx_cpu_up(bool bsp)674 static int _vmx_cpu_up(bool bsp)
675 {
676     u32 eax, edx;
677     int rc, bios_locked, cpu = smp_processor_id();
678     u64 cr0, vmx_cr0_fixed0, vmx_cr0_fixed1;
679 
680     BUG_ON(!(read_cr4() & X86_CR4_VMXE));
681 
682     /*
683      * Ensure the current processor operating mode meets
684      * the requred CRO fixed bits in VMX operation.
685      */
686     cr0 = read_cr0();
687     rdmsrl(MSR_IA32_VMX_CR0_FIXED0, vmx_cr0_fixed0);
688     rdmsrl(MSR_IA32_VMX_CR0_FIXED1, vmx_cr0_fixed1);
689     if ( (~cr0 & vmx_cr0_fixed0) || (cr0 & ~vmx_cr0_fixed1) )
690     {
691         printk("CPU%d: some settings of host CR0 are "
692                "not allowed in VMX operation.\n", cpu);
693         return -EINVAL;
694     }
695 
696     rdmsr(MSR_IA32_FEATURE_CONTROL, eax, edx);
697 
698     bios_locked = !!(eax & IA32_FEATURE_CONTROL_LOCK);
699     if ( bios_locked )
700     {
701         if ( !(eax & (tboot_in_measured_env()
702                       ? IA32_FEATURE_CONTROL_ENABLE_VMXON_INSIDE_SMX
703                       : IA32_FEATURE_CONTROL_ENABLE_VMXON_OUTSIDE_SMX)) )
704         {
705             printk("CPU%d: VMX disabled by BIOS.\n", cpu);
706             return -EINVAL;
707         }
708     }
709     else
710     {
711         eax  = IA32_FEATURE_CONTROL_LOCK;
712         eax |= IA32_FEATURE_CONTROL_ENABLE_VMXON_OUTSIDE_SMX;
713         if ( test_bit(X86_FEATURE_SMX, &boot_cpu_data.x86_capability) )
714             eax |= IA32_FEATURE_CONTROL_ENABLE_VMXON_INSIDE_SMX;
715         wrmsr(MSR_IA32_FEATURE_CONTROL, eax, 0);
716     }
717 
718     if ( (rc = vmx_init_vmcs_config()) != 0 )
719         return rc;
720 
721     INIT_LIST_HEAD(&this_cpu(active_vmcs_list));
722 
723     if ( bsp && (rc = vmx_cpu_up_prepare(cpu)) != 0 )
724         return rc;
725 
726     switch ( __vmxon(this_cpu(vmxon_region)) )
727     {
728     case -2: /* #UD or #GP */
729         if ( bios_locked &&
730              test_bit(X86_FEATURE_SMX, &boot_cpu_data.x86_capability) &&
731              (!(eax & IA32_FEATURE_CONTROL_ENABLE_VMXON_OUTSIDE_SMX) ||
732               !(eax & IA32_FEATURE_CONTROL_ENABLE_VMXON_INSIDE_SMX)) )
733         {
734             printk("CPU%d: VMXON failed: perhaps because of TXT settings "
735                    "in your BIOS configuration?\n", cpu);
736             printk(" --> Disable TXT in your BIOS unless using a secure "
737                    "bootloader.\n");
738             return -EINVAL;
739         }
740         /* fall through */
741     case -1: /* CF==1 or ZF==1 */
742         printk("CPU%d: unexpected VMXON failure\n", cpu);
743         return -EINVAL;
744     case 0: /* success */
745         this_cpu(vmxon) = 1;
746         break;
747     default:
748         BUG();
749     }
750 
751     hvm_asid_init(cpu_has_vmx_vpid ? (1u << VMCS_VPID_WIDTH) : 0);
752 
753     if ( cpu_has_vmx_ept )
754         ept_sync_all();
755 
756     if ( cpu_has_vmx_vpid )
757         vpid_sync_all();
758 
759     vmx_pi_per_cpu_init(cpu);
760 
761     return 0;
762 }
763 
vmx_cpu_up()764 int vmx_cpu_up()
765 {
766     return _vmx_cpu_up(false);
767 }
768 
vmx_cpu_down(void)769 void vmx_cpu_down(void)
770 {
771     struct list_head *active_vmcs_list = &this_cpu(active_vmcs_list);
772     unsigned long flags;
773 
774     if ( !this_cpu(vmxon) )
775         return;
776 
777     local_irq_save(flags);
778 
779     while ( !list_empty(active_vmcs_list) )
780         __vmx_clear_vmcs(list_entry(active_vmcs_list->next,
781                                     struct vcpu, arch.hvm.vmx.active_list));
782 
783     BUG_ON(!(read_cr4() & X86_CR4_VMXE));
784     this_cpu(vmxon) = 0;
785     __vmxoff();
786 
787     local_irq_restore(flags);
788 }
789 
790 struct foreign_vmcs {
791     struct vcpu *v;
792     unsigned int count;
793 };
794 static DEFINE_PER_CPU(struct foreign_vmcs, foreign_vmcs);
795 
vmx_vmcs_try_enter(struct vcpu * v)796 bool_t vmx_vmcs_try_enter(struct vcpu *v)
797 {
798     struct foreign_vmcs *fv;
799 
800     /*
801      * NB. We must *always* run an HVM VCPU on its own VMCS, except for
802      * vmx_vmcs_enter/exit and scheduling tail critical regions.
803      */
804     if ( likely(v == current) )
805         return v->arch.hvm.vmx.vmcs_pa == this_cpu(current_vmcs);
806 
807     fv = &this_cpu(foreign_vmcs);
808 
809     if ( fv->v == v )
810     {
811         BUG_ON(fv->count == 0);
812     }
813     else
814     {
815         BUG_ON(fv->v != NULL);
816         BUG_ON(fv->count != 0);
817 
818         vcpu_pause(v);
819         spin_lock(&v->arch.hvm.vmx.vmcs_lock);
820 
821         vmx_clear_vmcs(v);
822         vmx_load_vmcs(v);
823 
824         fv->v = v;
825     }
826 
827     fv->count++;
828 
829     return 1;
830 }
831 
vmx_vmcs_enter(struct vcpu * v)832 void vmx_vmcs_enter(struct vcpu *v)
833 {
834     bool_t okay = vmx_vmcs_try_enter(v);
835 
836     ASSERT(okay);
837 }
838 
vmx_vmcs_exit(struct vcpu * v)839 void vmx_vmcs_exit(struct vcpu *v)
840 {
841     struct foreign_vmcs *fv;
842 
843     if ( likely(v == current) )
844         return;
845 
846     fv = &this_cpu(foreign_vmcs);
847     BUG_ON(fv->v != v);
848     BUG_ON(fv->count == 0);
849 
850     if ( --fv->count == 0 )
851     {
852         /* Don't confuse vmx_do_resume (for @v or @current!) */
853         vmx_clear_vmcs(v);
854         if ( is_hvm_vcpu(current) )
855             vmx_load_vmcs(current);
856 
857         spin_unlock(&v->arch.hvm.vmx.vmcs_lock);
858         vcpu_unpause(v);
859 
860         fv->v = NULL;
861     }
862 }
863 
vmx_set_host_env(struct vcpu * v)864 static void vmx_set_host_env(struct vcpu *v)
865 {
866     unsigned int cpu = smp_processor_id();
867 
868     __vmwrite(HOST_GDTR_BASE,
869               (unsigned long)(this_cpu(gdt) - FIRST_RESERVED_GDT_ENTRY));
870     __vmwrite(HOST_IDTR_BASE, (unsigned long)idt_tables[cpu]);
871 
872     __vmwrite(HOST_TR_BASE, (unsigned long)&per_cpu(tss_page, cpu).tss);
873 
874     __vmwrite(HOST_SYSENTER_ESP, get_stack_bottom());
875 
876     /*
877      * Skip end of cpu_user_regs when entering the hypervisor because the
878      * CPU does not save context onto the stack. SS,RSP,CS,RIP,RFLAGS,etc
879      * all get saved into the VMCS instead.
880      */
881     __vmwrite(HOST_RSP,
882               (unsigned long)&get_cpu_info()->guest_cpu_user_regs.error_code);
883 }
884 
vmx_clear_msr_intercept(struct vcpu * v,unsigned int msr,enum vmx_msr_intercept_type type)885 void vmx_clear_msr_intercept(struct vcpu *v, unsigned int msr,
886                              enum vmx_msr_intercept_type type)
887 {
888     struct vmx_msr_bitmap *msr_bitmap = v->arch.hvm.vmx.msr_bitmap;
889     struct domain *d = v->domain;
890 
891     /* VMX MSR bitmap supported? */
892     if ( msr_bitmap == NULL )
893         return;
894 
895     if ( unlikely(monitored_msr(d, msr)) )
896         return;
897 
898     if ( msr <= 0x1fff )
899     {
900         if ( type & VMX_MSR_R )
901             clear_bit(msr, msr_bitmap->read_low);
902         if ( type & VMX_MSR_W )
903             clear_bit(msr, msr_bitmap->write_low);
904     }
905     else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) )
906     {
907         msr &= 0x1fff;
908         if ( type & VMX_MSR_R )
909             clear_bit(msr, msr_bitmap->read_high);
910         if ( type & VMX_MSR_W )
911             clear_bit(msr, msr_bitmap->write_high);
912     }
913     else
914         ASSERT(!"MSR out of range for interception\n");
915 }
916 
vmx_set_msr_intercept(struct vcpu * v,unsigned int msr,enum vmx_msr_intercept_type type)917 void vmx_set_msr_intercept(struct vcpu *v, unsigned int msr,
918                            enum vmx_msr_intercept_type type)
919 {
920     struct vmx_msr_bitmap *msr_bitmap = v->arch.hvm.vmx.msr_bitmap;
921 
922     /* VMX MSR bitmap supported? */
923     if ( msr_bitmap == NULL )
924         return;
925 
926     if ( msr <= 0x1fff )
927     {
928         if ( type & VMX_MSR_R )
929             set_bit(msr, msr_bitmap->read_low);
930         if ( type & VMX_MSR_W )
931             set_bit(msr, msr_bitmap->write_low);
932     }
933     else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) )
934     {
935         msr &= 0x1fff;
936         if ( type & VMX_MSR_R )
937             set_bit(msr, msr_bitmap->read_high);
938         if ( type & VMX_MSR_W )
939             set_bit(msr, msr_bitmap->write_high);
940     }
941     else
942         ASSERT(!"MSR out of range for interception\n");
943 }
944 
vmx_msr_is_intercepted(struct vmx_msr_bitmap * msr_bitmap,unsigned int msr,bool is_write)945 bool vmx_msr_is_intercepted(struct vmx_msr_bitmap *msr_bitmap,
946                             unsigned int msr, bool is_write)
947 {
948     if ( msr <= 0x1fff )
949         return test_bit(msr, is_write ? msr_bitmap->write_low
950                                       : msr_bitmap->read_low);
951     else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) )
952         return test_bit(msr & 0x1fff, is_write ? msr_bitmap->write_high
953                                                : msr_bitmap->read_high);
954     else
955         /* MSRs outside the bitmap ranges are always intercepted. */
956         return true;
957 }
958 
959 
960 /*
961  * Switch VMCS between layer 1 & 2 guest
962  */
vmx_vmcs_switch(paddr_t from,paddr_t to)963 void vmx_vmcs_switch(paddr_t from, paddr_t to)
964 {
965     struct vmx_vcpu *vmx = &current->arch.hvm.vmx;
966     spin_lock(&vmx->vmcs_lock);
967 
968     __vmpclear(from);
969     if ( vmx->vmcs_shadow_maddr )
970         __vmpclear(vmx->vmcs_shadow_maddr);
971     __vmptrld(to);
972 
973     vmx->vmcs_pa = to;
974     vmx->launched = 0;
975     this_cpu(current_vmcs) = to;
976 
977     if ( vmx->hostenv_migrated )
978     {
979         vmx->hostenv_migrated = 0;
980         vmx_set_host_env(current);
981     }
982 
983     spin_unlock(&vmx->vmcs_lock);
984 }
985 
virtual_vmcs_enter(const struct vcpu * v)986 void virtual_vmcs_enter(const struct vcpu *v)
987 {
988     __vmptrld(v->arch.hvm.vmx.vmcs_shadow_maddr);
989 }
990 
virtual_vmcs_exit(const struct vcpu * v)991 void virtual_vmcs_exit(const struct vcpu *v)
992 {
993     paddr_t cur = this_cpu(current_vmcs);
994 
995     __vmpclear(v->arch.hvm.vmx.vmcs_shadow_maddr);
996     if ( cur )
997         __vmptrld(cur);
998 }
999 
virtual_vmcs_vmread(const struct vcpu * v,u32 vmcs_encoding)1000 u64 virtual_vmcs_vmread(const struct vcpu *v, u32 vmcs_encoding)
1001 {
1002     u64 res;
1003 
1004     virtual_vmcs_enter(v);
1005     __vmread(vmcs_encoding, &res);
1006     virtual_vmcs_exit(v);
1007 
1008     return res;
1009 }
1010 
virtual_vmcs_vmread_safe(const struct vcpu * v,u32 vmcs_encoding,u64 * val)1011 enum vmx_insn_errno virtual_vmcs_vmread_safe(const struct vcpu *v,
1012                                              u32 vmcs_encoding, u64 *val)
1013 {
1014     enum vmx_insn_errno ret;
1015 
1016     virtual_vmcs_enter(v);
1017     ret = vmread_safe(vmcs_encoding, val);
1018     virtual_vmcs_exit(v);
1019 
1020     return ret;
1021 }
1022 
virtual_vmcs_vmwrite(const struct vcpu * v,u32 vmcs_encoding,u64 val)1023 void virtual_vmcs_vmwrite(const struct vcpu *v, u32 vmcs_encoding, u64 val)
1024 {
1025     virtual_vmcs_enter(v);
1026     __vmwrite(vmcs_encoding, val);
1027     virtual_vmcs_exit(v);
1028 }
1029 
virtual_vmcs_vmwrite_safe(const struct vcpu * v,u32 vmcs_encoding,u64 val)1030 enum vmx_insn_errno virtual_vmcs_vmwrite_safe(const struct vcpu *v,
1031                                               u32 vmcs_encoding, u64 val)
1032 {
1033     enum vmx_insn_errno ret;
1034 
1035     virtual_vmcs_enter(v);
1036     ret = vmwrite_safe(vmcs_encoding, val);
1037     virtual_vmcs_exit(v);
1038 
1039     return ret;
1040 }
1041 
1042 /*
1043  * This function is only called in a vCPU's initialization phase,
1044  * so we can update the posted-interrupt descriptor in non-atomic way.
1045  */
pi_desc_init(struct vcpu * v)1046 static void pi_desc_init(struct vcpu *v)
1047 {
1048     v->arch.hvm.vmx.pi_desc.nv = posted_intr_vector;
1049 
1050     /*
1051      * Mark NDST as invalid, then we can use this invalid value as a
1052      * marker to whether update NDST or not in vmx_pi_hooks_assign().
1053      */
1054     v->arch.hvm.vmx.pi_desc.ndst = APIC_INVALID_DEST;
1055 }
1056 
construct_vmcs(struct vcpu * v)1057 static int construct_vmcs(struct vcpu *v)
1058 {
1059     struct domain *d = v->domain;
1060     u32 vmexit_ctl = vmx_vmexit_control;
1061     u32 vmentry_ctl = vmx_vmentry_control;
1062     int rc = 0;
1063 
1064     vmx_vmcs_enter(v);
1065 
1066     /* VMCS controls. */
1067     __vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_control);
1068 
1069     v->arch.hvm.vmx.exec_control = vmx_cpu_based_exec_control;
1070     if ( d->arch.vtsc && !cpu_has_vmx_tsc_scaling )
1071         v->arch.hvm.vmx.exec_control |= CPU_BASED_RDTSC_EXITING;
1072 
1073     v->arch.hvm.vmx.secondary_exec_control = vmx_secondary_exec_control;
1074 
1075     /*
1076      * Disable features which we don't want active by default:
1077      *  - Descriptor table exiting only if wanted by introspection
1078      *  - x2APIC - default is xAPIC mode
1079      *  - VPID settings chosen at VMEntry time
1080      *  - VMCS Shadowing only when in nested VMX mode
1081      *  - PML only when logdirty is active
1082      *  - VMFUNC/#VE only if wanted by altp2m
1083      */
1084     v->arch.hvm.vmx.secondary_exec_control &=
1085         ~(SECONDARY_EXEC_DESCRIPTOR_TABLE_EXITING |
1086           SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
1087           SECONDARY_EXEC_ENABLE_VPID |
1088           SECONDARY_EXEC_ENABLE_VMCS_SHADOWING |
1089           SECONDARY_EXEC_ENABLE_PML |
1090           SECONDARY_EXEC_ENABLE_VM_FUNCTIONS |
1091           SECONDARY_EXEC_ENABLE_VIRT_EXCEPTIONS);
1092 
1093     if ( paging_mode_hap(d) )
1094     {
1095         v->arch.hvm.vmx.exec_control &= ~(CPU_BASED_INVLPG_EXITING |
1096                                           CPU_BASED_CR3_LOAD_EXITING |
1097                                           CPU_BASED_CR3_STORE_EXITING);
1098     }
1099     else
1100     {
1101         v->arch.hvm.vmx.secondary_exec_control &=
1102             ~(SECONDARY_EXEC_ENABLE_EPT |
1103               SECONDARY_EXEC_UNRESTRICTED_GUEST |
1104               SECONDARY_EXEC_ENABLE_INVPCID);
1105         vmexit_ctl &= ~(VM_EXIT_SAVE_GUEST_PAT |
1106                         VM_EXIT_LOAD_HOST_PAT);
1107         vmentry_ctl &= ~VM_ENTRY_LOAD_GUEST_PAT;
1108     }
1109 
1110     /* Do not enable Monitor Trap Flag unless start single step debug */
1111     v->arch.hvm.vmx.exec_control &= ~CPU_BASED_MONITOR_TRAP_FLAG;
1112 
1113     if ( !has_vlapic(d) )
1114     {
1115         /* Disable virtual apics, TPR */
1116         v->arch.hvm.vmx.secondary_exec_control &=
1117             ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES
1118               | SECONDARY_EXEC_APIC_REGISTER_VIRT
1119               | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
1120         v->arch.hvm.vmx.exec_control &= ~CPU_BASED_TPR_SHADOW;
1121 
1122         /* In turn, disable posted interrupts. */
1123         __vmwrite(PIN_BASED_VM_EXEC_CONTROL,
1124                   vmx_pin_based_exec_control & ~PIN_BASED_POSTED_INTERRUPT);
1125     }
1126 
1127     vmx_update_cpu_exec_control(v);
1128 
1129     __vmwrite(VM_EXIT_CONTROLS, vmexit_ctl);
1130     __vmwrite(VM_ENTRY_CONTROLS, vmentry_ctl);
1131 
1132     if ( cpu_has_vmx_ple )
1133     {
1134         __vmwrite(PLE_GAP, ple_gap);
1135         __vmwrite(PLE_WINDOW, ple_window);
1136     }
1137 
1138     if ( cpu_has_vmx_secondary_exec_control )
1139         __vmwrite(SECONDARY_VM_EXEC_CONTROL,
1140                   v->arch.hvm.vmx.secondary_exec_control);
1141 
1142     /* MSR access bitmap. */
1143     if ( cpu_has_vmx_msr_bitmap )
1144     {
1145         struct vmx_msr_bitmap *msr_bitmap = alloc_xenheap_page();
1146 
1147         if ( msr_bitmap == NULL )
1148         {
1149             rc = -ENOMEM;
1150             goto out;
1151         }
1152 
1153         memset(msr_bitmap, ~0, PAGE_SIZE);
1154         v->arch.hvm.vmx.msr_bitmap = msr_bitmap;
1155         __vmwrite(MSR_BITMAP, virt_to_maddr(msr_bitmap));
1156 
1157         vmx_clear_msr_intercept(v, MSR_FS_BASE, VMX_MSR_RW);
1158         vmx_clear_msr_intercept(v, MSR_GS_BASE, VMX_MSR_RW);
1159         vmx_clear_msr_intercept(v, MSR_SHADOW_GS_BASE, VMX_MSR_RW);
1160         vmx_clear_msr_intercept(v, MSR_IA32_SYSENTER_CS, VMX_MSR_RW);
1161         vmx_clear_msr_intercept(v, MSR_IA32_SYSENTER_ESP, VMX_MSR_RW);
1162         vmx_clear_msr_intercept(v, MSR_IA32_SYSENTER_EIP, VMX_MSR_RW);
1163         if ( paging_mode_hap(d) && (!is_iommu_enabled(d) || iommu_snoop) )
1164             vmx_clear_msr_intercept(v, MSR_IA32_CR_PAT, VMX_MSR_RW);
1165         if ( (vmexit_ctl & VM_EXIT_CLEAR_BNDCFGS) &&
1166              (vmentry_ctl & VM_ENTRY_LOAD_BNDCFGS) )
1167             vmx_clear_msr_intercept(v, MSR_IA32_BNDCFGS, VMX_MSR_RW);
1168     }
1169 
1170     /* I/O access bitmap. */
1171     __vmwrite(IO_BITMAP_A, __pa(d->arch.hvm.io_bitmap));
1172     __vmwrite(IO_BITMAP_B, __pa(d->arch.hvm.io_bitmap) + PAGE_SIZE);
1173 
1174     if ( cpu_has_vmx_virtual_intr_delivery )
1175     {
1176         unsigned int i;
1177 
1178         /* EOI-exit bitmap */
1179         bitmap_zero(v->arch.hvm.vmx.eoi_exit_bitmap, X86_NR_VECTORS);
1180         for ( i = 0; i < ARRAY_SIZE(v->arch.hvm.vmx.eoi_exit_bitmap); ++i )
1181             __vmwrite(EOI_EXIT_BITMAP(i), 0);
1182 
1183         /* Initialise Guest Interrupt Status (RVI and SVI) to 0 */
1184         __vmwrite(GUEST_INTR_STATUS, 0);
1185     }
1186 
1187     if ( cpu_has_vmx_posted_intr_processing )
1188     {
1189         if ( iommu_intpost )
1190             pi_desc_init(v);
1191 
1192         __vmwrite(PI_DESC_ADDR, virt_to_maddr(&v->arch.hvm.vmx.pi_desc));
1193         __vmwrite(POSTED_INTR_NOTIFICATION_VECTOR, posted_intr_vector);
1194     }
1195 
1196     /* Host data selectors. */
1197     __vmwrite(HOST_SS_SELECTOR, __HYPERVISOR_DS);
1198     __vmwrite(HOST_DS_SELECTOR, __HYPERVISOR_DS);
1199     __vmwrite(HOST_ES_SELECTOR, __HYPERVISOR_DS);
1200     __vmwrite(HOST_FS_SELECTOR, 0);
1201     __vmwrite(HOST_GS_SELECTOR, 0);
1202     __vmwrite(HOST_FS_BASE, 0);
1203     __vmwrite(HOST_GS_BASE, 0);
1204     __vmwrite(HOST_TR_SELECTOR, TSS_SELECTOR);
1205 
1206     /* Host control registers. */
1207     v->arch.hvm.vmx.host_cr0 = read_cr0() & ~X86_CR0_TS;
1208     if ( !v->arch.fully_eager_fpu )
1209         v->arch.hvm.vmx.host_cr0 |= X86_CR0_TS;
1210     __vmwrite(HOST_CR0, v->arch.hvm.vmx.host_cr0);
1211     __vmwrite(HOST_CR4, mmu_cr4_features);
1212     if ( cpu_has_vmx_efer )
1213         __vmwrite(HOST_EFER, read_efer());
1214 
1215     /* Host CS:RIP. */
1216     __vmwrite(HOST_CS_SELECTOR, __HYPERVISOR_CS);
1217     __vmwrite(HOST_RIP, (unsigned long)vmx_asm_vmexit_handler);
1218 
1219     /* Host SYSENTER CS:RIP. */
1220     __vmwrite(HOST_SYSENTER_CS, IS_ENABLED(CONFIG_PV) ? __HYPERVISOR_CS : 0);
1221     __vmwrite(HOST_SYSENTER_EIP,
1222               IS_ENABLED(CONFIG_PV) ? (unsigned long)sysenter_entry : 0);
1223 
1224     /* MSR intercepts. */
1225     __vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0);
1226     __vmwrite(VM_EXIT_MSR_STORE_COUNT, 0);
1227     __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0);
1228 
1229     __vmwrite(VM_ENTRY_INTR_INFO, 0);
1230 
1231     __vmwrite(CR0_GUEST_HOST_MASK, ~0UL);
1232     __vmwrite(CR4_GUEST_HOST_MASK, ~0UL);
1233     v->arch.hvm.vmx.cr4_host_mask = ~0UL;
1234 
1235     __vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0);
1236     __vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, 0);
1237 
1238     __vmwrite(CR3_TARGET_COUNT, 0);
1239 
1240     __vmwrite(GUEST_ACTIVITY_STATE, 0);
1241 
1242     /* Guest segment bases. */
1243     __vmwrite(GUEST_ES_BASE, 0);
1244     __vmwrite(GUEST_SS_BASE, 0);
1245     __vmwrite(GUEST_DS_BASE, 0);
1246     __vmwrite(GUEST_FS_BASE, 0);
1247     __vmwrite(GUEST_GS_BASE, 0);
1248     __vmwrite(GUEST_CS_BASE, 0);
1249 
1250     /* Guest segment limits. */
1251     __vmwrite(GUEST_ES_LIMIT, ~0u);
1252     __vmwrite(GUEST_SS_LIMIT, ~0u);
1253     __vmwrite(GUEST_DS_LIMIT, ~0u);
1254     __vmwrite(GUEST_FS_LIMIT, ~0u);
1255     __vmwrite(GUEST_GS_LIMIT, ~0u);
1256     __vmwrite(GUEST_CS_LIMIT, ~0u);
1257 
1258     /* Guest segment AR bytes. */
1259     __vmwrite(GUEST_ES_AR_BYTES, 0xc093); /* read/write, accessed */
1260     __vmwrite(GUEST_SS_AR_BYTES, 0xc093);
1261     __vmwrite(GUEST_DS_AR_BYTES, 0xc093);
1262     __vmwrite(GUEST_FS_AR_BYTES, 0xc093);
1263     __vmwrite(GUEST_GS_AR_BYTES, 0xc093);
1264     __vmwrite(GUEST_CS_AR_BYTES, 0xc09b); /* exec/read, accessed */
1265 
1266     /* Guest IDT. */
1267     __vmwrite(GUEST_IDTR_BASE, 0);
1268     __vmwrite(GUEST_IDTR_LIMIT, 0);
1269 
1270     /* Guest GDT. */
1271     __vmwrite(GUEST_GDTR_BASE, 0);
1272     __vmwrite(GUEST_GDTR_LIMIT, 0);
1273 
1274     /* Guest LDT. */
1275     __vmwrite(GUEST_LDTR_AR_BYTES, 0x0082); /* LDT */
1276     __vmwrite(GUEST_LDTR_SELECTOR, 0);
1277     __vmwrite(GUEST_LDTR_BASE, 0);
1278     __vmwrite(GUEST_LDTR_LIMIT, 0);
1279 
1280     /* Guest TSS. */
1281     __vmwrite(GUEST_TR_AR_BYTES, 0x008b); /* 32-bit TSS (busy) */
1282     __vmwrite(GUEST_TR_BASE, 0);
1283     __vmwrite(GUEST_TR_LIMIT, 0xff);
1284 
1285     __vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0);
1286     __vmwrite(GUEST_DR7, 0);
1287     __vmwrite(VMCS_LINK_POINTER, ~0UL);
1288 
1289     v->arch.hvm.vmx.exception_bitmap = HVM_TRAP_MASK
1290               | (paging_mode_hap(d) ? 0 : (1U << TRAP_page_fault))
1291               | (v->arch.fully_eager_fpu ? 0 : (1U << TRAP_no_device));
1292     vmx_update_exception_bitmap(v);
1293 
1294     v->arch.hvm.guest_cr[0] = X86_CR0_PE | X86_CR0_ET;
1295     hvm_update_guest_cr(v, 0);
1296 
1297     v->arch.hvm.guest_cr[4] = 0;
1298     hvm_update_guest_cr(v, 4);
1299 
1300     if ( cpu_has_vmx_tpr_shadow )
1301     {
1302         __vmwrite(VIRTUAL_APIC_PAGE_ADDR,
1303                   page_to_maddr(vcpu_vlapic(v)->regs_page));
1304         __vmwrite(TPR_THRESHOLD, 0);
1305     }
1306 
1307     if ( paging_mode_hap(d) )
1308     {
1309         struct p2m_domain *p2m = p2m_get_hostp2m(d);
1310         struct ept_data *ept = &p2m->ept;
1311 
1312         ept->mfn = pagetable_get_pfn(p2m_get_pagetable(p2m));
1313         __vmwrite(EPT_POINTER, ept->eptp);
1314 
1315         __vmwrite(HOST_PAT, XEN_MSR_PAT);
1316         __vmwrite(GUEST_PAT, MSR_IA32_CR_PAT_RESET);
1317     }
1318     if ( cpu_has_vmx_mpx )
1319         __vmwrite(GUEST_BNDCFGS, 0);
1320     if ( cpu_has_vmx_xsaves )
1321         __vmwrite(XSS_EXIT_BITMAP, 0);
1322 
1323     if ( cpu_has_vmx_tsc_scaling )
1324         __vmwrite(TSC_MULTIPLIER, d->arch.hvm.tsc_scaling_ratio);
1325 
1326     /* will update HOST & GUEST_CR3 as reqd */
1327     paging_update_paging_modes(v);
1328 
1329     vmx_vlapic_msr_changed(v);
1330 
1331     if ( opt_l1d_flush && paging_mode_hap(d) )
1332         rc = vmx_add_msr(v, MSR_FLUSH_CMD, FLUSH_CMD_L1D,
1333                          VMX_MSR_GUEST_LOADONLY);
1334 
1335  out:
1336     vmx_vmcs_exit(v);
1337 
1338     return rc;
1339 }
1340 
1341 /*
1342  * Search an MSR list looking for an MSR entry, or the slot in which it should
1343  * live (to keep the data sorted) if an entry is not found.
1344  *
1345  * The return pointer is guaranteed to be bounded by start and end.  However,
1346  * it may point at end, and may be invalid for the caller to dereference.
1347  */
locate_msr_entry(struct vmx_msr_entry * start,struct vmx_msr_entry * end,uint32_t msr)1348 static struct vmx_msr_entry *locate_msr_entry(
1349     struct vmx_msr_entry *start, struct vmx_msr_entry *end, uint32_t msr)
1350 {
1351     while ( start < end )
1352     {
1353         struct vmx_msr_entry *mid = start + (end - start) / 2;
1354 
1355         if ( msr < mid->index )
1356             end = mid;
1357         else if ( msr > mid->index )
1358             start = mid + 1;
1359         else
1360             return mid;
1361     }
1362 
1363     return start;
1364 }
1365 
vmx_find_msr(const struct vcpu * v,uint32_t msr,enum vmx_msr_list_type type)1366 struct vmx_msr_entry *vmx_find_msr(const struct vcpu *v, uint32_t msr,
1367                                    enum vmx_msr_list_type type)
1368 {
1369     const struct vmx_vcpu *vmx = &v->arch.hvm.vmx;
1370     struct vmx_msr_entry *start = NULL, *ent, *end;
1371     unsigned int substart = 0, subend = vmx->msr_save_count;
1372     unsigned int total = vmx->msr_load_count;
1373 
1374     ASSERT(v == current || !vcpu_runnable(v));
1375 
1376     switch ( type )
1377     {
1378     case VMX_MSR_HOST:
1379         start    = vmx->host_msr_area;
1380         subend   = vmx->host_msr_count;
1381         total    = subend;
1382         break;
1383 
1384     case VMX_MSR_GUEST:
1385         start    = vmx->msr_area;
1386         break;
1387 
1388     case VMX_MSR_GUEST_LOADONLY:
1389         start    = vmx->msr_area;
1390         substart = subend;
1391         subend   = total;
1392         break;
1393 
1394     default:
1395         ASSERT_UNREACHABLE();
1396     }
1397 
1398     if ( !start )
1399         return NULL;
1400 
1401     end = start + total;
1402     ent = locate_msr_entry(start + substart, start + subend, msr);
1403 
1404     return ((ent < end) && (ent->index == msr)) ? ent : NULL;
1405 }
1406 
vmx_add_msr(struct vcpu * v,uint32_t msr,uint64_t val,enum vmx_msr_list_type type)1407 int vmx_add_msr(struct vcpu *v, uint32_t msr, uint64_t val,
1408                 enum vmx_msr_list_type type)
1409 {
1410     struct vmx_vcpu *vmx = &v->arch.hvm.vmx;
1411     struct vmx_msr_entry **ptr, *start = NULL, *ent, *end;
1412     unsigned int substart, subend, total;
1413     int rc;
1414 
1415     ASSERT(v == current || !vcpu_runnable(v));
1416 
1417     switch ( type )
1418     {
1419     case VMX_MSR_HOST:
1420         ptr      = &vmx->host_msr_area;
1421         substart = 0;
1422         subend   = vmx->host_msr_count;
1423         total    = subend;
1424         break;
1425 
1426     case VMX_MSR_GUEST:
1427         ptr      = &vmx->msr_area;
1428         substart = 0;
1429         subend   = vmx->msr_save_count;
1430         total    = vmx->msr_load_count;
1431         break;
1432 
1433     case VMX_MSR_GUEST_LOADONLY:
1434         ptr      = &vmx->msr_area;
1435         substart = vmx->msr_save_count;
1436         subend   = vmx->msr_load_count;
1437         total    = subend;
1438         break;
1439 
1440     default:
1441         ASSERT_UNREACHABLE();
1442         return -EINVAL;
1443     }
1444 
1445     vmx_vmcs_enter(v);
1446 
1447     /* Allocate memory on first use. */
1448     if ( unlikely(!*ptr) )
1449     {
1450         paddr_t addr;
1451 
1452         if ( (*ptr = alloc_xenheap_page()) == NULL )
1453         {
1454             rc = -ENOMEM;
1455             goto out;
1456         }
1457 
1458         addr = virt_to_maddr(*ptr);
1459 
1460         switch ( type )
1461         {
1462         case VMX_MSR_HOST:
1463             __vmwrite(VM_EXIT_MSR_LOAD_ADDR, addr);
1464             break;
1465 
1466         case VMX_MSR_GUEST:
1467         case VMX_MSR_GUEST_LOADONLY:
1468             __vmwrite(VM_EXIT_MSR_STORE_ADDR, addr);
1469             __vmwrite(VM_ENTRY_MSR_LOAD_ADDR, addr);
1470             break;
1471         }
1472     }
1473 
1474     start = *ptr;
1475     end   = start + total;
1476     ent   = locate_msr_entry(start + substart, start + subend, msr);
1477 
1478     if ( (ent < end) && (ent->index == msr) )
1479         goto found;
1480 
1481     /* If there isn't an existing entry for msr, insert room for one. */
1482     if ( total == (PAGE_SIZE / sizeof(*ent)) )
1483     {
1484         rc = -ENOSPC;
1485         goto out;
1486     }
1487 
1488     memmove(ent + 1, ent, sizeof(*ent) * (end - ent));
1489 
1490     ent->index = msr;
1491     ent->mbz = 0;
1492 
1493     switch ( type )
1494     {
1495     case VMX_MSR_HOST:
1496         __vmwrite(VM_EXIT_MSR_LOAD_COUNT, ++vmx->host_msr_count);
1497         break;
1498 
1499     case VMX_MSR_GUEST:
1500         __vmwrite(VM_EXIT_MSR_STORE_COUNT, ++vmx->msr_save_count);
1501 
1502         /* Fallthrough */
1503     case VMX_MSR_GUEST_LOADONLY:
1504         __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, ++vmx->msr_load_count);
1505         break;
1506     }
1507 
1508     /* Set the msr's value. */
1509  found:
1510     ent->data = val;
1511     rc = 0;
1512 
1513  out:
1514     vmx_vmcs_exit(v);
1515 
1516     return rc;
1517 }
1518 
vmx_del_msr(struct vcpu * v,uint32_t msr,enum vmx_msr_list_type type)1519 int vmx_del_msr(struct vcpu *v, uint32_t msr, enum vmx_msr_list_type type)
1520 {
1521     struct vmx_vcpu *vmx = &v->arch.hvm.vmx;
1522     struct vmx_msr_entry *start = NULL, *ent, *end;
1523     unsigned int substart = 0, subend = vmx->msr_save_count;
1524     unsigned int total = vmx->msr_load_count;
1525 
1526     ASSERT(v == current || !vcpu_runnable(v));
1527 
1528     switch ( type )
1529     {
1530     case VMX_MSR_HOST:
1531         start    = vmx->host_msr_area;
1532         subend   = vmx->host_msr_count;
1533         total    = subend;
1534         break;
1535 
1536     case VMX_MSR_GUEST:
1537         start    = vmx->msr_area;
1538         break;
1539 
1540     case VMX_MSR_GUEST_LOADONLY:
1541         start    = vmx->msr_area;
1542         substart = subend;
1543         subend   = total;
1544         break;
1545 
1546     default:
1547         ASSERT_UNREACHABLE();
1548     }
1549 
1550     if ( !start )
1551         return -ESRCH;
1552 
1553     end = start + total;
1554     ent = locate_msr_entry(start + substart, start + subend, msr);
1555 
1556     if ( (ent == end) || (ent->index != msr) )
1557         return -ESRCH;
1558 
1559     memmove(ent, ent + 1, sizeof(*ent) * (end - ent - 1));
1560 
1561     vmx_vmcs_enter(v);
1562 
1563     switch ( type )
1564     {
1565     case VMX_MSR_HOST:
1566         __vmwrite(VM_EXIT_MSR_LOAD_COUNT, --vmx->host_msr_count);
1567         break;
1568 
1569     case VMX_MSR_GUEST:
1570         __vmwrite(VM_EXIT_MSR_STORE_COUNT, --vmx->msr_save_count);
1571 
1572         /* Fallthrough */
1573     case VMX_MSR_GUEST_LOADONLY:
1574         __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, --vmx->msr_load_count);
1575         break;
1576     }
1577 
1578     vmx_vmcs_exit(v);
1579 
1580     return 0;
1581 }
1582 
vmx_set_eoi_exit_bitmap(struct vcpu * v,u8 vector)1583 void vmx_set_eoi_exit_bitmap(struct vcpu *v, u8 vector)
1584 {
1585     if ( !test_and_set_bit(vector, v->arch.hvm.vmx.eoi_exit_bitmap) )
1586         set_bit(vector / BITS_PER_LONG,
1587                 &v->arch.hvm.vmx.eoi_exitmap_changed);
1588 }
1589 
vmx_clear_eoi_exit_bitmap(struct vcpu * v,u8 vector)1590 void vmx_clear_eoi_exit_bitmap(struct vcpu *v, u8 vector)
1591 {
1592     if ( test_and_clear_bit(vector, v->arch.hvm.vmx.eoi_exit_bitmap) )
1593         set_bit(vector / BITS_PER_LONG,
1594                 &v->arch.hvm.vmx.eoi_exitmap_changed);
1595 }
1596 
vmx_vcpu_pml_enabled(const struct vcpu * v)1597 bool_t vmx_vcpu_pml_enabled(const struct vcpu *v)
1598 {
1599     return !!(v->arch.hvm.vmx.secondary_exec_control &
1600               SECONDARY_EXEC_ENABLE_PML);
1601 }
1602 
vmx_vcpu_enable_pml(struct vcpu * v)1603 int vmx_vcpu_enable_pml(struct vcpu *v)
1604 {
1605     if ( vmx_vcpu_pml_enabled(v) )
1606         return 0;
1607 
1608     v->arch.hvm.vmx.pml_pg = v->domain->arch.paging.alloc_page(v->domain);
1609     if ( !v->arch.hvm.vmx.pml_pg )
1610         return -ENOMEM;
1611 
1612     vmx_vmcs_enter(v);
1613 
1614     __vmwrite(PML_ADDRESS, page_to_maddr(v->arch.hvm.vmx.pml_pg));
1615     __vmwrite(GUEST_PML_INDEX, NR_PML_ENTRIES - 1);
1616 
1617     v->arch.hvm.vmx.secondary_exec_control |= SECONDARY_EXEC_ENABLE_PML;
1618 
1619     __vmwrite(SECONDARY_VM_EXEC_CONTROL,
1620               v->arch.hvm.vmx.secondary_exec_control);
1621 
1622     vmx_vmcs_exit(v);
1623 
1624     return 0;
1625 }
1626 
vmx_vcpu_disable_pml(struct vcpu * v)1627 void vmx_vcpu_disable_pml(struct vcpu *v)
1628 {
1629     if ( !vmx_vcpu_pml_enabled(v) )
1630         return;
1631 
1632     /* Make sure we don't lose any logged GPAs. */
1633     vmx_vcpu_flush_pml_buffer(v);
1634 
1635     vmx_vmcs_enter(v);
1636 
1637     v->arch.hvm.vmx.secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
1638     __vmwrite(SECONDARY_VM_EXEC_CONTROL,
1639               v->arch.hvm.vmx.secondary_exec_control);
1640 
1641     vmx_vmcs_exit(v);
1642 
1643     v->domain->arch.paging.free_page(v->domain, v->arch.hvm.vmx.pml_pg);
1644     v->arch.hvm.vmx.pml_pg = NULL;
1645 }
1646 
vmx_vcpu_flush_pml_buffer(struct vcpu * v)1647 void vmx_vcpu_flush_pml_buffer(struct vcpu *v)
1648 {
1649     uint64_t *pml_buf;
1650     unsigned long pml_idx;
1651 
1652     ASSERT((v == current) || (!vcpu_runnable(v) && !v->is_running));
1653     ASSERT(vmx_vcpu_pml_enabled(v));
1654 
1655     vmx_vmcs_enter(v);
1656 
1657     __vmread(GUEST_PML_INDEX, &pml_idx);
1658 
1659     /* Do nothing if PML buffer is empty. */
1660     if ( pml_idx == (NR_PML_ENTRIES - 1) )
1661         goto out;
1662 
1663     pml_buf = __map_domain_page(v->arch.hvm.vmx.pml_pg);
1664 
1665     /*
1666      * PML index can be either 2^16-1 (buffer is full), or 0 ~ NR_PML_ENTRIES-1
1667      * (buffer is not full), and in latter case PML index always points to next
1668      * available entity.
1669      */
1670     if ( pml_idx >= NR_PML_ENTRIES )
1671         pml_idx = 0;
1672     else
1673         pml_idx++;
1674 
1675     for ( ; pml_idx < NR_PML_ENTRIES; pml_idx++ )
1676     {
1677         unsigned long gfn = pml_buf[pml_idx] >> PAGE_SHIFT;
1678 
1679         /*
1680          * Need to change type from log-dirty to normal memory for logged GFN.
1681          * hap_track_dirty_vram depends on it to work. And we mark all logged
1682          * GFNs to be dirty, as we cannot be sure whether it's safe to ignore
1683          * GFNs on which p2m_change_type_one returns failure. The failure cases
1684          * are very rare, and additional cost is negligible, but a missing mark
1685          * is extremely difficult to debug.
1686          */
1687         p2m_change_type_one(v->domain, gfn, p2m_ram_logdirty, p2m_ram_rw);
1688 
1689         /* HVM guest: pfn == gfn */
1690         paging_mark_pfn_dirty(v->domain, _pfn(gfn));
1691     }
1692 
1693     unmap_domain_page(pml_buf);
1694 
1695     /* Reset PML index */
1696     __vmwrite(GUEST_PML_INDEX, NR_PML_ENTRIES - 1);
1697 
1698  out:
1699     vmx_vmcs_exit(v);
1700 }
1701 
vmx_domain_pml_enabled(const struct domain * d)1702 bool_t vmx_domain_pml_enabled(const struct domain *d)
1703 {
1704     return d->arch.hvm.vmx.status & VMX_DOMAIN_PML_ENABLED;
1705 }
1706 
1707 /*
1708  * This function enables PML for particular domain. It should be called when
1709  * domain is paused.
1710  *
1711  * PML needs to be enabled globally for all vcpus of the domain, as PML buffer
1712  * and PML index are pre-vcpu, but EPT table is shared by vcpus, therefore
1713  * enabling PML on partial vcpus won't work.
1714  */
vmx_domain_enable_pml(struct domain * d)1715 int vmx_domain_enable_pml(struct domain *d)
1716 {
1717     struct vcpu *v;
1718     int rc;
1719 
1720     ASSERT(atomic_read(&d->pause_count));
1721 
1722     if ( vmx_domain_pml_enabled(d) )
1723         return 0;
1724 
1725     for_each_vcpu ( d, v )
1726         if ( (rc = vmx_vcpu_enable_pml(v)) != 0 )
1727             goto error;
1728 
1729     d->arch.hvm.vmx.status |= VMX_DOMAIN_PML_ENABLED;
1730 
1731     return 0;
1732 
1733  error:
1734     for_each_vcpu ( d, v )
1735         if ( vmx_vcpu_pml_enabled(v) )
1736             vmx_vcpu_disable_pml(v);
1737     return rc;
1738 }
1739 
1740 /*
1741  * Disable PML for particular domain. Called when domain is paused.
1742  *
1743  * The same as enabling PML for domain, disabling PML should be done for all
1744  * vcpus at once.
1745  */
vmx_domain_disable_pml(struct domain * d)1746 void vmx_domain_disable_pml(struct domain *d)
1747 {
1748     struct vcpu *v;
1749 
1750     ASSERT(atomic_read(&d->pause_count));
1751 
1752     if ( !vmx_domain_pml_enabled(d) )
1753         return;
1754 
1755     for_each_vcpu ( d, v )
1756         vmx_vcpu_disable_pml(v);
1757 
1758     d->arch.hvm.vmx.status &= ~VMX_DOMAIN_PML_ENABLED;
1759 }
1760 
1761 /*
1762  * Flush PML buffer of all vcpus, and update the logged dirty pages to log-dirty
1763  * radix tree. Called when domain is paused.
1764  */
vmx_domain_flush_pml_buffers(struct domain * d)1765 void vmx_domain_flush_pml_buffers(struct domain *d)
1766 {
1767     struct vcpu *v;
1768 
1769     ASSERT(atomic_read(&d->pause_count));
1770 
1771     if ( !vmx_domain_pml_enabled(d) )
1772         return;
1773 
1774     for_each_vcpu ( d, v )
1775         vmx_vcpu_flush_pml_buffer(v);
1776 }
1777 
vmx_vcpu_update_eptp(struct vcpu * v,u64 eptp)1778 static void vmx_vcpu_update_eptp(struct vcpu *v, u64 eptp)
1779 {
1780     vmx_vmcs_enter(v);
1781     __vmwrite(EPT_POINTER, eptp);
1782     vmx_vmcs_exit(v);
1783 }
1784 
1785 /*
1786  * Update EPTP data to VMCS of all vcpus of the domain. Must be called when
1787  * domain is paused.
1788  */
vmx_domain_update_eptp(struct domain * d)1789 void vmx_domain_update_eptp(struct domain *d)
1790 {
1791     struct p2m_domain *p2m = p2m_get_hostp2m(d);
1792     struct vcpu *v;
1793 
1794     ASSERT(atomic_read(&d->pause_count));
1795 
1796     for_each_vcpu ( d, v )
1797         vmx_vcpu_update_eptp(v, p2m->ept.eptp);
1798 
1799     ept_sync_domain(p2m);
1800 }
1801 
vmx_create_vmcs(struct vcpu * v)1802 int vmx_create_vmcs(struct vcpu *v)
1803 {
1804     struct vmx_vcpu *vmx = &v->arch.hvm.vmx;
1805     int rc;
1806 
1807     if ( (vmx->vmcs_pa = vmx_alloc_vmcs()) == 0 )
1808         return -ENOMEM;
1809 
1810     INIT_LIST_HEAD(&vmx->active_list);
1811     __vmpclear(vmx->vmcs_pa);
1812     vmx->active_cpu = -1;
1813     vmx->launched   = 0;
1814 
1815     if ( (rc = construct_vmcs(v)) != 0 )
1816     {
1817         vmx_free_vmcs(vmx->vmcs_pa);
1818         return rc;
1819     }
1820 
1821     return 0;
1822 }
1823 
vmx_destroy_vmcs(struct vcpu * v)1824 void vmx_destroy_vmcs(struct vcpu *v)
1825 {
1826     struct vmx_vcpu *vmx = &v->arch.hvm.vmx;
1827 
1828     vmx_clear_vmcs(v);
1829 
1830     vmx_free_vmcs(vmx->vmcs_pa);
1831 
1832     free_xenheap_page(v->arch.hvm.vmx.host_msr_area);
1833     free_xenheap_page(v->arch.hvm.vmx.msr_area);
1834     free_xenheap_page(v->arch.hvm.vmx.msr_bitmap);
1835 }
1836 
vmx_vmentry_failure(void)1837 void vmx_vmentry_failure(void)
1838 {
1839     struct vcpu *curr = current;
1840     unsigned long error;
1841 
1842     __vmread(VM_INSTRUCTION_ERROR, &error);
1843     gprintk(XENLOG_ERR, "VM%s error: %#lx\n",
1844             curr->arch.hvm.vmx.launched ? "RESUME" : "LAUNCH", error);
1845 
1846     if ( error == VMX_INSN_INVALID_CONTROL_STATE ||
1847          error == VMX_INSN_INVALID_HOST_STATE )
1848         vmcs_dump_vcpu(curr);
1849 
1850     domain_crash(curr->domain);
1851 }
1852 
vmx_do_resume(void)1853 void vmx_do_resume(void)
1854 {
1855     struct vcpu *v = current;
1856     bool_t debug_state;
1857     unsigned long host_cr4;
1858 
1859     if ( v->arch.hvm.vmx.active_cpu == smp_processor_id() )
1860         vmx_vmcs_reload(v);
1861     else
1862     {
1863         /*
1864          * For pass-through domain, guest PCI-E device driver may leverage the
1865          * "Non-Snoop" I/O, and explicitly WBINVD or CLFLUSH to a RAM space.
1866          * Since migration may occur before WBINVD or CLFLUSH, we need to
1867          * maintain data consistency either by:
1868          *  1: flushing cache (wbinvd) when the guest is scheduled out if
1869          *     there is no wbinvd exit, or
1870          *  2: execute wbinvd on all dirty pCPUs when guest wbinvd exits.
1871          * If VT-d engine can force snooping, we don't need to do these.
1872          */
1873         if ( has_arch_pdevs(v->domain) && !iommu_snoop
1874                 && !cpu_has_wbinvd_exiting )
1875         {
1876             int cpu = v->arch.hvm.vmx.active_cpu;
1877             if ( cpu != -1 )
1878                 flush_mask(cpumask_of(cpu), FLUSH_CACHE);
1879         }
1880 
1881         vmx_clear_vmcs(v);
1882         vmx_load_vmcs(v);
1883         hvm_migrate_timers(v);
1884         hvm_migrate_pirqs(v);
1885         vmx_set_host_env(v);
1886         /*
1887          * Both n1 VMCS and n2 VMCS need to update the host environment after
1888          * VCPU migration. The environment of current VMCS is updated in place,
1889          * but the action of another VMCS is deferred till it is switched in.
1890          */
1891         v->arch.hvm.vmx.hostenv_migrated = 1;
1892 
1893         hvm_asid_flush_vcpu(v);
1894     }
1895 
1896     debug_state = v->domain->debugger_attached
1897                   || v->domain->arch.monitor.software_breakpoint_enabled
1898                   || v->domain->arch.monitor.singlestep_enabled;
1899 
1900     if ( unlikely(v->arch.hvm.debug_state_latch != debug_state) )
1901     {
1902         v->arch.hvm.debug_state_latch = debug_state;
1903         vmx_update_debug_state(v);
1904     }
1905 
1906     hvm_do_resume(v);
1907 
1908     /* Sync host CR4 in case its value has changed. */
1909     __vmread(HOST_CR4, &host_cr4);
1910     if ( host_cr4 != read_cr4() )
1911         __vmwrite(HOST_CR4, read_cr4());
1912 
1913     reset_stack_and_jump(vmx_asm_do_vmentry);
1914 }
1915 
vmr(unsigned long field)1916 static inline unsigned long vmr(unsigned long field)
1917 {
1918     unsigned long val;
1919 
1920     return vmread_safe(field, &val) ? 0 : val;
1921 }
1922 
1923 #define vmr16(fld) ({             \
1924     BUILD_BUG_ON((fld) & 0x6001); \
1925     (uint16_t)vmr(fld);           \
1926 })
1927 
1928 #define vmr32(fld) ({                         \
1929     BUILD_BUG_ON(((fld) & 0x6001) != 0x4000); \
1930     (uint32_t)vmr(fld);                       \
1931 })
1932 
vmx_dump_sel(char * name,uint32_t selector)1933 static void vmx_dump_sel(char *name, uint32_t selector)
1934 {
1935     uint32_t sel, attr, limit;
1936     uint64_t base;
1937     sel = vmr(selector);
1938     attr = vmr(selector + (GUEST_ES_AR_BYTES - GUEST_ES_SELECTOR));
1939     limit = vmr(selector + (GUEST_ES_LIMIT - GUEST_ES_SELECTOR));
1940     base = vmr(selector + (GUEST_ES_BASE - GUEST_ES_SELECTOR));
1941     printk("%s: %04x %05x %08x %016"PRIx64"\n", name, sel, attr, limit, base);
1942 }
1943 
vmx_dump_sel2(char * name,uint32_t lim)1944 static void vmx_dump_sel2(char *name, uint32_t lim)
1945 {
1946     uint32_t limit;
1947     uint64_t base;
1948     limit = vmr(lim);
1949     base = vmr(lim + (GUEST_GDTR_BASE - GUEST_GDTR_LIMIT));
1950     printk("%s:            %08x %016"PRIx64"\n", name, limit, base);
1951 }
1952 
vmcs_dump_vcpu(struct vcpu * v)1953 void vmcs_dump_vcpu(struct vcpu *v)
1954 {
1955     struct cpu_user_regs *regs = &v->arch.user_regs;
1956     uint32_t vmentry_ctl, vmexit_ctl;
1957     unsigned long cr4;
1958     uint64_t efer;
1959     unsigned int i, n;
1960 
1961     if ( v == current )
1962         regs = guest_cpu_user_regs();
1963 
1964     vmx_vmcs_enter(v);
1965 
1966     vmentry_ctl = vmr32(VM_ENTRY_CONTROLS),
1967     vmexit_ctl = vmr32(VM_EXIT_CONTROLS);
1968     cr4 = vmr(GUEST_CR4);
1969 
1970     /*
1971      * The guests EFER setting comes from the GUEST_EFER VMCS field whenever
1972      * available, or the guest load-only MSR list on Gen1 hardware, the entry
1973      * for which may be elided for performance reasons if identical to Xen's
1974      * setting.
1975      */
1976     if ( cpu_has_vmx_efer )
1977         efer = vmr(GUEST_EFER);
1978     else if ( vmx_read_guest_loadonly_msr(v, MSR_EFER, &efer) )
1979         efer = read_efer();
1980 
1981     printk("*** Guest State ***\n");
1982     printk("CR0: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n",
1983            vmr(GUEST_CR0), vmr(CR0_READ_SHADOW), vmr(CR0_GUEST_HOST_MASK));
1984     printk("CR4: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n",
1985            cr4, vmr(CR4_READ_SHADOW), vmr(CR4_GUEST_HOST_MASK));
1986     printk("CR3 = 0x%016lx\n", vmr(GUEST_CR3));
1987     if ( (v->arch.hvm.vmx.secondary_exec_control &
1988           SECONDARY_EXEC_ENABLE_EPT) &&
1989          (cr4 & X86_CR4_PAE) && !(vmentry_ctl & VM_ENTRY_IA32E_MODE) )
1990     {
1991         printk("PDPTE0 = 0x%016lx  PDPTE1 = 0x%016lx\n",
1992                vmr(GUEST_PDPTE(0)), vmr(GUEST_PDPTE(1)));
1993         printk("PDPTE2 = 0x%016lx  PDPTE3 = 0x%016lx\n",
1994                vmr(GUEST_PDPTE(2)), vmr(GUEST_PDPTE(3)));
1995     }
1996     printk("RSP = 0x%016lx (0x%016lx)  RIP = 0x%016lx (0x%016lx)\n",
1997            vmr(GUEST_RSP), regs->rsp,
1998            vmr(GUEST_RIP), regs->rip);
1999     printk("RFLAGS=0x%08lx (0x%08lx)  DR7 = 0x%016lx\n",
2000            vmr(GUEST_RFLAGS), regs->rflags,
2001            vmr(GUEST_DR7));
2002     printk("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n",
2003            vmr(GUEST_SYSENTER_ESP),
2004            vmr32(GUEST_SYSENTER_CS), vmr(GUEST_SYSENTER_EIP));
2005     printk("       sel  attr  limit   base\n");
2006     vmx_dump_sel("  CS", GUEST_CS_SELECTOR);
2007     vmx_dump_sel("  DS", GUEST_DS_SELECTOR);
2008     vmx_dump_sel("  SS", GUEST_SS_SELECTOR);
2009     vmx_dump_sel("  ES", GUEST_ES_SELECTOR);
2010     vmx_dump_sel("  FS", GUEST_FS_SELECTOR);
2011     vmx_dump_sel("  GS", GUEST_GS_SELECTOR);
2012     vmx_dump_sel2("GDTR", GUEST_GDTR_LIMIT);
2013     vmx_dump_sel("LDTR", GUEST_LDTR_SELECTOR);
2014     vmx_dump_sel2("IDTR", GUEST_IDTR_LIMIT);
2015     vmx_dump_sel("  TR", GUEST_TR_SELECTOR);
2016     printk("EFER(%s) = 0x%016lx  PAT = 0x%016lx\n",
2017            cpu_has_vmx_efer ? "VMCS" : "MSR LL", efer, vmr(GUEST_PAT));
2018     printk("PreemptionTimer = 0x%08x  SM Base = 0x%08x\n",
2019            vmr32(GUEST_PREEMPTION_TIMER), vmr32(GUEST_SMBASE));
2020     printk("DebugCtl = 0x%016lx  DebugExceptions = 0x%016lx\n",
2021            vmr(GUEST_IA32_DEBUGCTL), vmr(GUEST_PENDING_DBG_EXCEPTIONS));
2022     if ( vmentry_ctl & (VM_ENTRY_LOAD_PERF_GLOBAL_CTRL | VM_ENTRY_LOAD_BNDCFGS) )
2023         printk("PerfGlobCtl = 0x%016lx  BndCfgS = 0x%016lx\n",
2024                vmr(GUEST_PERF_GLOBAL_CTRL), vmr(GUEST_BNDCFGS));
2025     printk("Interruptibility = %08x  ActivityState = %08x\n",
2026            vmr32(GUEST_INTERRUPTIBILITY_INFO), vmr32(GUEST_ACTIVITY_STATE));
2027     if ( v->arch.hvm.vmx.secondary_exec_control &
2028          SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY )
2029         printk("InterruptStatus = %04x\n", vmr16(GUEST_INTR_STATUS));
2030 
2031     printk("*** Host State ***\n");
2032     printk("RIP = 0x%016lx (%ps)  RSP = 0x%016lx\n",
2033            vmr(HOST_RIP), (void *)vmr(HOST_RIP), vmr(HOST_RSP));
2034     printk("CS=%04x SS=%04x DS=%04x ES=%04x FS=%04x GS=%04x TR=%04x\n",
2035            vmr16(HOST_CS_SELECTOR), vmr16(HOST_SS_SELECTOR),
2036            vmr16(HOST_DS_SELECTOR), vmr16(HOST_ES_SELECTOR),
2037            vmr16(HOST_FS_SELECTOR), vmr16(HOST_GS_SELECTOR),
2038            vmr16(HOST_TR_SELECTOR));
2039     printk("FSBase=%016lx GSBase=%016lx TRBase=%016lx\n",
2040            vmr(HOST_FS_BASE), vmr(HOST_GS_BASE), vmr(HOST_TR_BASE));
2041     printk("GDTBase=%016lx IDTBase=%016lx\n",
2042            vmr(HOST_GDTR_BASE), vmr(HOST_IDTR_BASE));
2043     printk("CR0=%016lx CR3=%016lx CR4=%016lx\n",
2044            vmr(HOST_CR0), vmr(HOST_CR3), vmr(HOST_CR4));
2045     printk("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n",
2046            vmr(HOST_SYSENTER_ESP),
2047            vmr32(HOST_SYSENTER_CS), vmr(HOST_SYSENTER_EIP));
2048     if ( vmexit_ctl & (VM_EXIT_LOAD_HOST_PAT | VM_EXIT_LOAD_HOST_EFER) )
2049         printk("EFER = 0x%016lx  PAT = 0x%016lx\n", vmr(HOST_EFER), vmr(HOST_PAT));
2050     if ( vmexit_ctl & VM_EXIT_LOAD_PERF_GLOBAL_CTRL )
2051         printk("PerfGlobCtl = 0x%016lx\n",
2052                vmr(HOST_PERF_GLOBAL_CTRL));
2053 
2054     printk("*** Control State ***\n");
2055     printk("PinBased=%08x CPUBased=%08x SecondaryExec=%08x\n",
2056            vmr32(PIN_BASED_VM_EXEC_CONTROL),
2057            vmr32(CPU_BASED_VM_EXEC_CONTROL),
2058            vmr32(SECONDARY_VM_EXEC_CONTROL));
2059     printk("EntryControls=%08x ExitControls=%08x\n", vmentry_ctl, vmexit_ctl);
2060     printk("ExceptionBitmap=%08x PFECmask=%08x PFECmatch=%08x\n",
2061            vmr32(EXCEPTION_BITMAP),
2062            vmr32(PAGE_FAULT_ERROR_CODE_MASK),
2063            vmr32(PAGE_FAULT_ERROR_CODE_MATCH));
2064     printk("VMEntry: intr_info=%08x errcode=%08x ilen=%08x\n",
2065            vmr32(VM_ENTRY_INTR_INFO),
2066            vmr32(VM_ENTRY_EXCEPTION_ERROR_CODE),
2067            vmr32(VM_ENTRY_INSTRUCTION_LEN));
2068     printk("VMExit: intr_info=%08x errcode=%08x ilen=%08x\n",
2069            vmr32(VM_EXIT_INTR_INFO),
2070            vmr32(VM_EXIT_INTR_ERROR_CODE),
2071            vmr32(VM_EXIT_INSTRUCTION_LEN));
2072     printk("        reason=%08x qualification=%016lx\n",
2073            vmr32(VM_EXIT_REASON), vmr(EXIT_QUALIFICATION));
2074     printk("IDTVectoring: info=%08x errcode=%08x\n",
2075            vmr32(IDT_VECTORING_INFO), vmr32(IDT_VECTORING_ERROR_CODE));
2076     printk("TSC Offset = 0x%016lx  TSC Multiplier = 0x%016lx\n",
2077            vmr(TSC_OFFSET), vmr(TSC_MULTIPLIER));
2078     if ( (v->arch.hvm.vmx.exec_control & CPU_BASED_TPR_SHADOW) ||
2079          (vmx_pin_based_exec_control & PIN_BASED_POSTED_INTERRUPT) )
2080         printk("TPR Threshold = 0x%02x  PostedIntrVec = 0x%02x\n",
2081                vmr32(TPR_THRESHOLD), vmr16(POSTED_INTR_NOTIFICATION_VECTOR));
2082     if ( (v->arch.hvm.vmx.secondary_exec_control &
2083           SECONDARY_EXEC_ENABLE_EPT) )
2084         printk("EPT pointer = 0x%016lx  EPTP index = 0x%04x\n",
2085                vmr(EPT_POINTER), vmr16(EPTP_INDEX));
2086     n = vmr32(CR3_TARGET_COUNT);
2087     for ( i = 0; i + 1 < n; i += 2 )
2088         printk("CR3 target%u=%016lx target%u=%016lx\n",
2089                i, vmr(CR3_TARGET_VALUE(i)),
2090                i + 1, vmr(CR3_TARGET_VALUE(i + 1)));
2091     if ( i < n )
2092         printk("CR3 target%u=%016lx\n", i, vmr(CR3_TARGET_VALUE(i)));
2093     if ( v->arch.hvm.vmx.secondary_exec_control &
2094          SECONDARY_EXEC_PAUSE_LOOP_EXITING )
2095         printk("PLE Gap=%08x Window=%08x\n",
2096                vmr32(PLE_GAP), vmr32(PLE_WINDOW));
2097     if ( v->arch.hvm.vmx.secondary_exec_control &
2098          (SECONDARY_EXEC_ENABLE_VPID | SECONDARY_EXEC_ENABLE_VM_FUNCTIONS) )
2099         printk("Virtual processor ID = 0x%04x VMfunc controls = %016lx\n",
2100                vmr16(VIRTUAL_PROCESSOR_ID), vmr(VM_FUNCTION_CONTROL));
2101 
2102     vmx_vmcs_exit(v);
2103 }
2104 
vmcs_dump(unsigned char ch)2105 static void vmcs_dump(unsigned char ch)
2106 {
2107     struct domain *d;
2108     struct vcpu *v;
2109 
2110     printk("*********** VMCS Areas **************\n");
2111 
2112     rcu_read_lock(&domlist_read_lock);
2113 
2114     for_each_domain ( d )
2115     {
2116         if ( !is_hvm_domain(d) )
2117             continue;
2118         printk("\n>>> Domain %d <<<\n", d->domain_id);
2119         for_each_vcpu ( d, v )
2120         {
2121             printk("\tVCPU %d\n", v->vcpu_id);
2122             vmcs_dump_vcpu(v);
2123         }
2124     }
2125 
2126     rcu_read_unlock(&domlist_read_lock);
2127 
2128     printk("**************************************\n");
2129 }
2130 
vmx_vmcs_init(void)2131 int __init vmx_vmcs_init(void)
2132 {
2133     int ret;
2134 
2135     if ( opt_ept_ad < 0 )
2136         /* Work around Erratum AVR41 on Avoton processors. */
2137         opt_ept_ad = !(boot_cpu_data.x86 == 6 &&
2138                        boot_cpu_data.x86_model == 0x4d);
2139 
2140     ret = _vmx_cpu_up(true);
2141 
2142     if ( !ret )
2143         register_keyhandler('v', vmcs_dump, "dump VT-x VMCSs", 1);
2144 
2145     return ret;
2146 }
2147 
build_assertions(void)2148 static void __init __maybe_unused build_assertions(void)
2149 {
2150     struct vmx_msr_bitmap bitmap;
2151 
2152     /* Check vmx_msr_bitmap layoug against hardware expectations. */
2153     BUILD_BUG_ON(sizeof(bitmap)            != PAGE_SIZE);
2154     BUILD_BUG_ON(sizeof(bitmap.read_low)   != 1024);
2155     BUILD_BUG_ON(sizeof(bitmap.read_high)  != 1024);
2156     BUILD_BUG_ON(sizeof(bitmap.write_low)  != 1024);
2157     BUILD_BUG_ON(sizeof(bitmap.write_high) != 1024);
2158     BUILD_BUG_ON(offsetof(struct vmx_msr_bitmap, read_low)   != 0);
2159     BUILD_BUG_ON(offsetof(struct vmx_msr_bitmap, read_high)  != 1024);
2160     BUILD_BUG_ON(offsetof(struct vmx_msr_bitmap, write_low)  != 2048);
2161     BUILD_BUG_ON(offsetof(struct vmx_msr_bitmap, write_high) != 3072);
2162 }
2163 
2164 /*
2165  * Local variables:
2166  * mode: C
2167  * c-file-style: "BSD"
2168  * c-basic-offset: 4
2169  * tab-width: 4
2170  * indent-tabs-mode: nil
2171  * End:
2172  */
2173