1 /*
2 * vvmx.c: Support virtual VMX for nested virtualization.
3 *
4 * Copyright (c) 2010, Intel Corporation.
5 * Author: Qing He <qing.he@intel.com>
6 * Eddie Dong <eddie.dong@intel.com>
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 *
17 * You should have received a copy of the GNU General Public License along with
18 * this program; If not, see <http://www.gnu.org/licenses/>.
19 *
20 */
21
22 #include <asm/types.h>
23 #include <asm/mtrr.h>
24 #include <asm/p2m.h>
25 #include <asm/hvm/ioreq.h>
26 #include <asm/hvm/vmx/vmx.h>
27 #include <asm/hvm/vmx/vvmx.h>
28 #include <asm/hvm/nestedhvm.h>
29
30 static DEFINE_PER_CPU(u64 *, vvmcs_buf);
31
32 static void nvmx_purge_vvmcs(struct vcpu *v);
33
nvmx_vcpu_in_vmx(const struct vcpu * v)34 static bool nvmx_vcpu_in_vmx(const struct vcpu *v)
35 {
36 return vcpu_2_nvmx(v).vmxon_region_pa != INVALID_PADDR;
37 }
38
39 #define VMCS_BUF_SIZE 100
40
nvmx_cpu_up_prepare(unsigned int cpu)41 int nvmx_cpu_up_prepare(unsigned int cpu)
42 {
43 uint64_t **vvmcs_buf;
44
45 if ( cpu_has_vmx_vmcs_shadowing &&
46 *(vvmcs_buf = &per_cpu(vvmcs_buf, cpu)) == NULL )
47 {
48 void *ptr = xzalloc_array(uint64_t, VMCS_BUF_SIZE);
49
50 if ( !ptr )
51 return -ENOMEM;
52
53 *vvmcs_buf = ptr;
54 }
55
56 return 0;
57 }
58
nvmx_cpu_dead(unsigned int cpu)59 void nvmx_cpu_dead(unsigned int cpu)
60 {
61 XFREE(per_cpu(vvmcs_buf, cpu));
62 }
63
nvmx_vcpu_initialise(struct vcpu * v)64 int nvmx_vcpu_initialise(struct vcpu *v)
65 {
66 struct domain *d = v->domain;
67 struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
68 struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
69 struct page_info *pg = alloc_domheap_page(NULL, 0);
70
71 /*
72 * Gross bodge. The nested p2m logic can't cope with the CVE-2018-12207
73 * workaround of using NX EPT superpages, and livelocks. Nested HVM isn't
74 * security supported, so disable the workaround until the nested p2m
75 * logic can be improved.
76 */
77 if ( !d->arch.hvm.vmx.exec_sp )
78 {
79 d->arch.hvm.vmx.exec_sp = true;
80 p2m_change_entry_type_global(d, p2m_ram_rw, p2m_ram_rw);
81 }
82
83 if ( !pg )
84 {
85 gdprintk(XENLOG_ERR, "nest: allocation for shadow vmcs failed\n");
86 return -ENOMEM;
87 }
88 nvcpu->nv_n2vmcx_pa = page_to_maddr(pg);
89
90 /* non-root VMREAD/VMWRITE bitmap. */
91 if ( cpu_has_vmx_vmcs_shadowing )
92 {
93 struct page_info *vmread_bitmap, *vmwrite_bitmap;
94 unsigned long *vw;
95
96 vmread_bitmap = alloc_domheap_page(NULL, 0);
97 if ( !vmread_bitmap )
98 {
99 gdprintk(XENLOG_ERR, "nest: allocation for vmread bitmap failed\n");
100 return -ENOMEM;
101 }
102 v->arch.hvm.vmx.vmread_bitmap = vmread_bitmap;
103
104 clear_domain_page(page_to_mfn(vmread_bitmap));
105
106 vmwrite_bitmap = alloc_domheap_page(NULL, 0);
107 if ( !vmwrite_bitmap )
108 {
109 gdprintk(XENLOG_ERR, "nest: allocation for vmwrite bitmap failed\n");
110 return -ENOMEM;
111 }
112 v->arch.hvm.vmx.vmwrite_bitmap = vmwrite_bitmap;
113
114 vw = __map_domain_page(vmwrite_bitmap);
115 clear_page(vw);
116
117 /*
118 * For the following 6 encodings, we need to handle them in VMM.
119 * Let them vmexit as usual.
120 */
121 set_bit(IO_BITMAP_A, vw);
122 set_bit(VMCS_HIGH(IO_BITMAP_A), vw);
123 set_bit(IO_BITMAP_B, vw);
124 set_bit(VMCS_HIGH(IO_BITMAP_B), vw);
125 set_bit(MSR_BITMAP, vw);
126 set_bit(VMCS_HIGH(MSR_BITMAP), vw);
127
128 unmap_domain_page(vw);
129 }
130
131 if ( cpu_has_vmx_msr_bitmap )
132 {
133 nvmx->msr_merged = alloc_xenheap_page();
134 if ( !nvmx->msr_merged )
135 return -ENOMEM;
136 }
137
138 nvmx->ept.enabled = 0;
139 nvmx->guest_vpid = 0;
140 nvmx->vmxon_region_pa = INVALID_PADDR;
141 nvcpu->nv_vvmcx = NULL;
142 nvcpu->nv_vvmcxaddr = INVALID_PADDR;
143 nvmx->intr.intr_info = 0;
144 nvmx->intr.error_code = 0;
145 nvmx->iobitmap[0] = NULL;
146 nvmx->iobitmap[1] = NULL;
147 nvmx->msrbitmap = NULL;
148 INIT_LIST_HEAD(&nvmx->launched_list);
149 return 0;
150 }
151
nvmx_vcpu_destroy(struct vcpu * v)152 void nvmx_vcpu_destroy(struct vcpu *v)
153 {
154 struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
155 struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
156 struct vvmcs_list *item, *n;
157
158 /*
159 * When destroying the vcpu, it may be running on behalf of L2 guest.
160 * Therefore we need to switch the VMCS pointer back to the L1 VMCS,
161 * in order to avoid double free of L2 VMCS and the possible memory
162 * leak of L1 VMCS page.
163 */
164 if ( nvcpu->nv_n1vmcx_pa )
165 v->arch.hvm.vmx.vmcs_pa = nvcpu->nv_n1vmcx_pa;
166
167 if ( nvcpu->nv_n2vmcx_pa )
168 {
169 __vmpclear(nvcpu->nv_n2vmcx_pa);
170 free_domheap_page(maddr_to_page(nvcpu->nv_n2vmcx_pa));
171 nvcpu->nv_n2vmcx_pa = 0;
172 }
173
174 /* Must also cope with nvmx_vcpu_initialise() not having got called. */
175 if ( nvmx->launched_list.next )
176 list_for_each_entry_safe(item, n, &nvmx->launched_list, node)
177 {
178 list_del(&item->node);
179 xfree(item);
180 }
181
182 if ( v->arch.hvm.vmx.vmread_bitmap )
183 {
184 free_domheap_page(v->arch.hvm.vmx.vmread_bitmap);
185 v->arch.hvm.vmx.vmread_bitmap = NULL;
186 }
187 if ( v->arch.hvm.vmx.vmwrite_bitmap )
188 {
189 free_domheap_page(v->arch.hvm.vmx.vmwrite_bitmap);
190 v->arch.hvm.vmx.vmwrite_bitmap = NULL;
191 }
192 }
193
vcpu_relinquish_resources(struct vcpu * v)194 static void vcpu_relinquish_resources(struct vcpu *v)
195 {
196 struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
197
198 FREE_XENHEAP_PAGE(nvmx->msr_merged);
199 }
200
nvmx_domain_relinquish_resources(struct domain * d)201 void nvmx_domain_relinquish_resources(struct domain *d)
202 {
203 struct vcpu *v;
204
205 for_each_vcpu ( d, v )
206 {
207 nvmx_purge_vvmcs(v);
208 vcpu_relinquish_resources(v);
209 }
210 }
211
nvmx_vcpu_reset(struct vcpu * v)212 int nvmx_vcpu_reset(struct vcpu *v)
213 {
214 return 0;
215 }
216
nvmx_vcpu_eptp_base(struct vcpu * v)217 uint64_t nvmx_vcpu_eptp_base(struct vcpu *v)
218 {
219 return get_vvmcs(v, EPT_POINTER) & PAGE_MASK;
220 }
221
nvmx_ept_enabled(struct vcpu * v)222 bool_t nvmx_ept_enabled(struct vcpu *v)
223 {
224 struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
225
226 return !!(nvmx->ept.enabled);
227 }
228
229 struct vmx_inst_decoded {
230 #define VMX_INST_MEMREG_TYPE_MEMORY 0
231 #define VMX_INST_MEMREG_TYPE_REG 1
232 int type;
233 union {
234 struct {
235 unsigned long mem;
236 unsigned int len;
237 };
238 unsigned int reg1;
239 };
240
241 unsigned int reg2;
242 };
243
vvmcs_offset(u32 width,u32 type,u32 index)244 static int vvmcs_offset(u32 width, u32 type, u32 index)
245 {
246 int offset;
247
248 offset = (index & 0x1f) | type << 5 | width << 7;
249
250 if ( offset == 0 ) /* vpid */
251 offset = 0x3f;
252
253 return offset;
254 }
255
get_vvmcs_virtual(void * vvmcs,uint32_t vmcs_encoding)256 uint64_t get_vvmcs_virtual(void *vvmcs, uint32_t vmcs_encoding)
257 {
258 union vmcs_encoding enc;
259 uint64_t *content = vvmcs;
260 int offset;
261 uint64_t res;
262
263 enc.word = vmcs_encoding;
264 offset = vvmcs_offset(enc.width, enc.type, enc.index);
265 res = content[offset];
266
267 switch ( enc.width ) {
268 case VVMCS_WIDTH_16:
269 res &= 0xffff;
270 break;
271 case VVMCS_WIDTH_64:
272 if ( enc.access_type )
273 res >>= 32;
274 break;
275 case VVMCS_WIDTH_32:
276 res &= 0xffffffff;
277 break;
278 case VVMCS_WIDTH_NATURAL:
279 default:
280 break;
281 }
282
283 return res;
284 }
285
get_vvmcs_real(const struct vcpu * v,u32 encoding)286 u64 get_vvmcs_real(const struct vcpu *v, u32 encoding)
287 {
288 return virtual_vmcs_vmread(v, encoding);
289 }
290
get_vvmcs_virtual_safe(void * vvmcs,u32 encoding,u64 * val)291 enum vmx_insn_errno get_vvmcs_virtual_safe(void *vvmcs, u32 encoding, u64 *val)
292 {
293 *val = get_vvmcs_virtual(vvmcs, encoding);
294
295 /*
296 * TODO: This should not always succeed. Fields and values need to be
297 * audited against the features offered to the guest in the VT-x MSRs.
298 * This should be fixed when the MSR levelling work is started, at which
299 * point there will be a cpuid_policy-like object.
300 */
301 return VMX_INSN_SUCCEED;
302 }
303
get_vvmcs_real_safe(const struct vcpu * v,u32 encoding,u64 * val)304 enum vmx_insn_errno get_vvmcs_real_safe(const struct vcpu *v, u32 encoding,
305 u64 *val)
306 {
307 return virtual_vmcs_vmread_safe(v, encoding, val);
308 }
309
set_vvmcs_virtual(void * vvmcs,uint32_t vmcs_encoding,uint64_t val)310 void set_vvmcs_virtual(void *vvmcs, uint32_t vmcs_encoding, uint64_t val)
311 {
312 union vmcs_encoding enc;
313 uint64_t *content = vvmcs;
314 int offset;
315 uint64_t res;
316
317 enc.word = vmcs_encoding;
318 offset = vvmcs_offset(enc.width, enc.type, enc.index);
319 res = content[offset];
320
321 switch ( enc.width ) {
322 case VVMCS_WIDTH_16:
323 res = val & 0xffff;
324 break;
325 case VVMCS_WIDTH_64:
326 if ( enc.access_type )
327 {
328 res &= 0xffffffff;
329 res |= val << 32;
330 }
331 else
332 res = val;
333 break;
334 case VVMCS_WIDTH_32:
335 res = val & 0xffffffff;
336 break;
337 case VVMCS_WIDTH_NATURAL:
338 default:
339 res = val;
340 break;
341 }
342
343 content[offset] = res;
344 }
345
set_vvmcs_real(const struct vcpu * v,u32 encoding,u64 val)346 void set_vvmcs_real(const struct vcpu *v, u32 encoding, u64 val)
347 {
348 virtual_vmcs_vmwrite(v, encoding, val);
349 }
350
set_vvmcs_virtual_safe(void * vvmcs,u32 encoding,u64 val)351 enum vmx_insn_errno set_vvmcs_virtual_safe(void *vvmcs, u32 encoding, u64 val)
352 {
353 set_vvmcs_virtual(vvmcs, encoding, val);
354
355 /*
356 * TODO: This should not always succeed. Fields and values need to be
357 * audited against the features offered to the guest in the VT-x MSRs.
358 * This should be fixed when the MSR levelling work is started, at which
359 * point there will be a cpuid_policy-like object.
360 */
361 return VMX_INSN_SUCCEED;
362 }
363
set_vvmcs_real_safe(const struct vcpu * v,u32 encoding,u64 val)364 enum vmx_insn_errno set_vvmcs_real_safe(const struct vcpu *v, u32 encoding,
365 u64 val)
366 {
367 return virtual_vmcs_vmwrite_safe(v, encoding, val);
368 }
369
reg_read(struct cpu_user_regs * regs,unsigned int index)370 static unsigned long reg_read(struct cpu_user_regs *regs,
371 unsigned int index)
372 {
373 return *decode_gpr(regs, index);
374 }
375
reg_write(struct cpu_user_regs * regs,unsigned int index,unsigned long value)376 static void reg_write(struct cpu_user_regs *regs,
377 unsigned int index,
378 unsigned long value)
379 {
380 *decode_gpr(regs, index) = value;
381 }
382
__n2_pin_exec_control(struct vcpu * v)383 static inline u32 __n2_pin_exec_control(struct vcpu *v)
384 {
385 return get_vvmcs(v, PIN_BASED_VM_EXEC_CONTROL);
386 }
387
__n2_exec_control(struct vcpu * v)388 static inline u32 __n2_exec_control(struct vcpu *v)
389 {
390 return get_vvmcs(v, CPU_BASED_VM_EXEC_CONTROL);
391 }
392
__n2_secondary_exec_control(struct vcpu * v)393 static inline u32 __n2_secondary_exec_control(struct vcpu *v)
394 {
395 u64 second_ctrl = 0;
396
397 if ( __n2_exec_control(v) & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS )
398 second_ctrl = get_vvmcs(v, SECONDARY_VM_EXEC_CONTROL);
399
400 return second_ctrl;
401 }
402
decode_vmx_inst(struct cpu_user_regs * regs,struct vmx_inst_decoded * decode,unsigned long * poperandS)403 static int decode_vmx_inst(struct cpu_user_regs *regs,
404 struct vmx_inst_decoded *decode,
405 unsigned long *poperandS)
406 {
407 struct vcpu *v = current;
408 union vmx_inst_info info;
409 struct segment_register seg;
410 unsigned long base, index, seg_base, disp, offset;
411 int scale, size;
412
413 __vmread(VMX_INSTRUCTION_INFO, &offset);
414 info.word = offset;
415
416 if ( info.fields.memreg ) {
417 decode->type = VMX_INST_MEMREG_TYPE_REG;
418 decode->reg1 = info.fields.reg1;
419 if ( poperandS != NULL )
420 *poperandS = reg_read(regs, decode->reg1);
421 }
422 else
423 {
424 bool mode_64bit = (vmx_guest_x86_mode(v) == 8);
425
426 decode->type = VMX_INST_MEMREG_TYPE_MEMORY;
427
428 if ( info.fields.segment > x86_seg_gs )
429 goto gp_fault;
430 hvm_get_segment_register(v, info.fields.segment, &seg);
431 seg_base = seg.base;
432
433 base = info.fields.base_reg_invalid ? 0 :
434 reg_read(regs, info.fields.base_reg);
435
436 index = info.fields.index_reg_invalid ? 0 :
437 reg_read(regs, info.fields.index_reg);
438
439 scale = 1 << info.fields.scaling;
440
441 __vmread(EXIT_QUALIFICATION, &disp);
442
443 size = 1 << (info.fields.addr_size + 1);
444
445 offset = base + index * scale + disp;
446 base = !mode_64bit || info.fields.segment >= x86_seg_fs ?
447 seg_base + offset : offset;
448 if ( offset + size - 1 < offset ||
449 (mode_64bit ?
450 !is_canonical_address((long)base < 0 ? base :
451 base + size - 1) :
452 offset + size - 1 > seg.limit) )
453 goto gp_fault;
454
455 if ( poperandS != NULL )
456 {
457 pagefault_info_t pfinfo;
458 int rc = hvm_copy_from_guest_linear(poperandS, base, size,
459 0, &pfinfo);
460
461 if ( rc == HVMTRANS_bad_linear_to_gfn )
462 hvm_inject_page_fault(pfinfo.ec, pfinfo.linear);
463 if ( rc != HVMTRANS_okay )
464 return X86EMUL_EXCEPTION;
465 }
466 decode->mem = base;
467 decode->len = size;
468 }
469
470 decode->reg2 = info.fields.reg2;
471
472 return X86EMUL_OKAY;
473
474 gp_fault:
475 hvm_inject_hw_exception(TRAP_gp_fault, 0);
476 return X86EMUL_EXCEPTION;
477 }
478
vmsucceed(struct cpu_user_regs * regs)479 static void vmsucceed(struct cpu_user_regs *regs)
480 {
481 regs->eflags &= ~X86_EFLAGS_ARITH_MASK;
482 }
483
vmfail_valid(struct cpu_user_regs * regs,enum vmx_insn_errno errno)484 static void vmfail_valid(struct cpu_user_regs *regs, enum vmx_insn_errno errno)
485 {
486 struct vcpu *v = current;
487 unsigned int eflags = regs->eflags;
488
489 ASSERT(vvmcx_valid(v));
490
491 regs->eflags = (eflags & ~X86_EFLAGS_ARITH_MASK) | X86_EFLAGS_ZF;
492 set_vvmcs(v, VM_INSTRUCTION_ERROR, errno);
493 }
494
vmfail_invalid(struct cpu_user_regs * regs)495 static void vmfail_invalid(struct cpu_user_regs *regs)
496 {
497 struct vcpu *v = current;
498 unsigned int eflags = regs->eflags;
499
500 ASSERT(!vvmcx_valid(v));
501
502 regs->eflags = (eflags & ~X86_EFLAGS_ARITH_MASK) | X86_EFLAGS_CF;
503 }
504
vmfail(struct cpu_user_regs * regs,enum vmx_insn_errno errno)505 static void vmfail(struct cpu_user_regs *regs, enum vmx_insn_errno errno)
506 {
507 if ( errno == VMX_INSN_SUCCEED )
508 return;
509
510 if ( vvmcx_valid(current) && errno != VMX_INSN_FAIL_INVALID )
511 vmfail_valid(regs, errno);
512 else
513 vmfail_invalid(regs);
514 }
515
nvmx_intercepts_exception(struct vcpu * v,unsigned int vector,int error_code)516 bool_t nvmx_intercepts_exception(
517 struct vcpu *v, unsigned int vector, int error_code)
518 {
519 u32 exception_bitmap, pfec_match=0, pfec_mask=0;
520 int r;
521
522 ASSERT(vector < 32);
523
524 exception_bitmap = get_vvmcs(v, EXCEPTION_BITMAP);
525 r = exception_bitmap & (1 << vector) ? 1: 0;
526
527 if ( vector == TRAP_page_fault )
528 {
529 pfec_match = get_vvmcs(v, PAGE_FAULT_ERROR_CODE_MATCH);
530 pfec_mask = get_vvmcs(v, PAGE_FAULT_ERROR_CODE_MASK);
531 if ( (error_code & pfec_mask) != pfec_match )
532 r = !r;
533 }
534 return r;
535 }
536
537 /*
538 * Nested VMX uses "strict" condition to exit from
539 * L2 guest if either L1 VMM or L0 VMM expect to exit.
540 */
__shadow_control(struct vcpu * v,unsigned int field,u32 host_value)541 static inline u32 __shadow_control(struct vcpu *v,
542 unsigned int field,
543 u32 host_value)
544 {
545 return get_vvmcs(v, field) | host_value;
546 }
547
set_shadow_control(struct vcpu * v,unsigned int field,u32 host_value)548 static void set_shadow_control(struct vcpu *v,
549 unsigned int field,
550 u32 host_value)
551 {
552 __vmwrite(field, __shadow_control(v, field, host_value));
553 }
554
_shadow_io_bitmap(struct vcpu * v)555 unsigned long *_shadow_io_bitmap(struct vcpu *v)
556 {
557 struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
558 int port80, portED;
559 u8 *bitmap;
560
561 bitmap = nvmx->iobitmap[0];
562 port80 = bitmap[0x80 >> 3] & (1 << (0x80 & 0x7)) ? 1 : 0;
563 portED = bitmap[0xed >> 3] & (1 << (0xed & 0x7)) ? 1 : 0;
564
565 return nestedhvm_vcpu_iomap_get(port80, portED);
566 }
567
update_msrbitmap(struct vcpu * v,uint32_t shadow_ctrl)568 static void update_msrbitmap(struct vcpu *v, uint32_t shadow_ctrl)
569 {
570 struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
571 struct vmx_msr_bitmap *msr_bitmap = nvmx->msr_merged;
572
573 if ( !(shadow_ctrl & CPU_BASED_ACTIVATE_MSR_BITMAP) ||
574 !nvmx->msrbitmap )
575 return;
576
577 bitmap_or(msr_bitmap->read_low, nvmx->msrbitmap->read_low,
578 v->arch.hvm.vmx.msr_bitmap->read_low,
579 sizeof(msr_bitmap->read_low) * 8);
580 bitmap_or(msr_bitmap->read_high, nvmx->msrbitmap->read_high,
581 v->arch.hvm.vmx.msr_bitmap->read_high,
582 sizeof(msr_bitmap->read_high) * 8);
583 bitmap_or(msr_bitmap->write_low, nvmx->msrbitmap->write_low,
584 v->arch.hvm.vmx.msr_bitmap->write_low,
585 sizeof(msr_bitmap->write_low) * 8);
586 bitmap_or(msr_bitmap->write_high, nvmx->msrbitmap->write_high,
587 v->arch.hvm.vmx.msr_bitmap->write_high,
588 sizeof(msr_bitmap->write_high) * 8);
589
590 /*
591 * Nested VMX doesn't support any x2APIC hardware virtualization, so
592 * make sure all the x2APIC MSRs are trapped.
593 */
594 bitmap_set(msr_bitmap->read_low, MSR_X2APIC_FIRST, 0x100);
595 bitmap_set(msr_bitmap->write_low, MSR_X2APIC_FIRST, 0x100);
596
597 __vmwrite(MSR_BITMAP, virt_to_maddr(nvmx->msr_merged));
598 }
599
nvmx_update_exec_control(struct vcpu * v,u32 host_cntrl)600 void nvmx_update_exec_control(struct vcpu *v, u32 host_cntrl)
601 {
602 u32 pio_cntrl = (CPU_BASED_ACTIVATE_IO_BITMAP
603 | CPU_BASED_UNCOND_IO_EXITING);
604 unsigned long *bitmap;
605 u32 shadow_cntrl;
606
607 shadow_cntrl = __n2_exec_control(v);
608 pio_cntrl &= shadow_cntrl;
609 /* Enforce the removed features */
610 shadow_cntrl &= ~(CPU_BASED_ACTIVATE_IO_BITMAP
611 | CPU_BASED_UNCOND_IO_EXITING);
612 /*
613 * Do NOT enforce the MSR bitmap currently used by L1, as certain hardware
614 * virtualization features require specific MSR bitmap settings, but
615 * without the guest also using these same features the bitmap could be
616 * leaking through unwanted MSR accesses.
617 */
618 shadow_cntrl |= host_cntrl & ~CPU_BASED_ACTIVATE_MSR_BITMAP;
619 if ( !(shadow_cntrl & host_cntrl & CPU_BASED_ACTIVATE_MSR_BITMAP) )
620 shadow_cntrl &= ~CPU_BASED_ACTIVATE_MSR_BITMAP;
621 if ( pio_cntrl == CPU_BASED_UNCOND_IO_EXITING ) {
622 /* L1 VMM intercepts all I/O instructions */
623 shadow_cntrl |= CPU_BASED_UNCOND_IO_EXITING;
624 shadow_cntrl &= ~CPU_BASED_ACTIVATE_IO_BITMAP;
625 }
626 else {
627 /* Use IO_BITMAP in shadow */
628 if ( pio_cntrl == 0 ) {
629 /*
630 * L1 VMM doesn't intercept IO instruction.
631 * Use host configuration and reset IO_BITMAP
632 */
633 bitmap = hvm_io_bitmap;
634 }
635 else {
636 /* use IO bitmap */
637 bitmap = _shadow_io_bitmap(v);
638 }
639 __vmwrite(IO_BITMAP_A, virt_to_maddr(bitmap));
640 __vmwrite(IO_BITMAP_B, virt_to_maddr(bitmap) + PAGE_SIZE);
641 }
642
643 update_msrbitmap(v, shadow_cntrl);
644
645 /* TODO: change L0 intr window to MTF or NMI window */
646 __vmwrite(CPU_BASED_VM_EXEC_CONTROL, shadow_cntrl);
647 }
648
nvmx_update_secondary_exec_control(struct vcpu * v,unsigned long host_cntrl)649 void nvmx_update_secondary_exec_control(struct vcpu *v,
650 unsigned long host_cntrl)
651 {
652 u32 shadow_cntrl;
653 struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
654 u32 apicv_bit = SECONDARY_EXEC_APIC_REGISTER_VIRT |
655 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
656 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
657
658 host_cntrl &= ~apicv_bit;
659 host_cntrl &= ~SECONDARY_EXEC_ENABLE_VMCS_SHADOWING;
660 shadow_cntrl = get_vvmcs(v, SECONDARY_VM_EXEC_CONTROL);
661
662 /* No vAPIC-v support, so it shouldn't be set in vmcs12. */
663 ASSERT(!(shadow_cntrl & apicv_bit));
664
665 nvmx->ept.enabled = !!(shadow_cntrl & SECONDARY_EXEC_ENABLE_EPT);
666 shadow_cntrl |= host_cntrl;
667 __vmwrite(SECONDARY_VM_EXEC_CONTROL, shadow_cntrl);
668 }
669
nvmx_update_pin_control(struct vcpu * v,unsigned long host_cntrl)670 static void nvmx_update_pin_control(struct vcpu *v, unsigned long host_cntrl)
671 {
672 u32 shadow_cntrl;
673
674 host_cntrl &= ~PIN_BASED_POSTED_INTERRUPT;
675 shadow_cntrl = get_vvmcs(v, PIN_BASED_VM_EXEC_CONTROL);
676
677 /* No vAPIC-v support, so it shouldn't be set in vmcs12. */
678 ASSERT(!(shadow_cntrl & PIN_BASED_POSTED_INTERRUPT));
679
680 shadow_cntrl |= host_cntrl;
681 __vmwrite(PIN_BASED_VM_EXEC_CONTROL, shadow_cntrl);
682 }
683
nvmx_update_exit_control(struct vcpu * v,unsigned long host_cntrl)684 static void nvmx_update_exit_control(struct vcpu *v, unsigned long host_cntrl)
685 {
686 u32 shadow_cntrl;
687
688 shadow_cntrl = get_vvmcs(v, VM_EXIT_CONTROLS);
689 shadow_cntrl &= ~(VM_EXIT_SAVE_DEBUG_CNTRLS
690 | VM_EXIT_LOAD_HOST_PAT
691 | VM_EXIT_LOAD_HOST_EFER
692 | VM_EXIT_LOAD_PERF_GLOBAL_CTRL);
693 shadow_cntrl |= host_cntrl;
694 __vmwrite(VM_EXIT_CONTROLS, shadow_cntrl);
695 }
696
nvmx_update_entry_control(struct vcpu * v)697 static void nvmx_update_entry_control(struct vcpu *v)
698 {
699 u32 shadow_cntrl;
700
701 shadow_cntrl = get_vvmcs(v, VM_ENTRY_CONTROLS);
702 shadow_cntrl &= ~(VM_ENTRY_LOAD_GUEST_PAT
703 | VM_ENTRY_LOAD_GUEST_EFER
704 | VM_ENTRY_LOAD_PERF_GLOBAL_CTRL);
705 __vmwrite(VM_ENTRY_CONTROLS, shadow_cntrl);
706 }
707
nvmx_update_exception_bitmap(struct vcpu * v,unsigned long value)708 void nvmx_update_exception_bitmap(struct vcpu *v, unsigned long value)
709 {
710 set_shadow_control(v, EXCEPTION_BITMAP, value);
711 }
712
nvmx_update_apic_access_address(struct vcpu * v)713 static void nvmx_update_apic_access_address(struct vcpu *v)
714 {
715 u32 ctrl;
716
717 ctrl = __n2_secondary_exec_control(v);
718 if ( ctrl & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES )
719 {
720 p2m_type_t p2mt;
721 unsigned long apic_gpfn;
722 struct page_info *apic_pg;
723
724 apic_gpfn = get_vvmcs(v, APIC_ACCESS_ADDR) >> PAGE_SHIFT;
725 apic_pg = get_page_from_gfn(v->domain, apic_gpfn, &p2mt, P2M_ALLOC);
726 ASSERT(apic_pg && !p2m_is_paging(p2mt));
727 __vmwrite(APIC_ACCESS_ADDR, page_to_maddr(apic_pg));
728 put_page(apic_pg);
729 }
730 else
731 __vmwrite(APIC_ACCESS_ADDR, 0);
732 }
733
nvmx_update_virtual_apic_address(struct vcpu * v)734 static void nvmx_update_virtual_apic_address(struct vcpu *v)
735 {
736 u32 ctrl;
737
738 ctrl = __n2_exec_control(v);
739 if ( ctrl & CPU_BASED_TPR_SHADOW )
740 {
741 p2m_type_t p2mt;
742 unsigned long vapic_gpfn;
743 struct page_info *vapic_pg;
744
745 vapic_gpfn = get_vvmcs(v, VIRTUAL_APIC_PAGE_ADDR) >> PAGE_SHIFT;
746 vapic_pg = get_page_from_gfn(v->domain, vapic_gpfn, &p2mt, P2M_ALLOC);
747 ASSERT(vapic_pg && !p2m_is_paging(p2mt));
748 __vmwrite(VIRTUAL_APIC_PAGE_ADDR, page_to_maddr(vapic_pg));
749 put_page(vapic_pg);
750 }
751 else
752 __vmwrite(VIRTUAL_APIC_PAGE_ADDR, 0);
753 }
754
nvmx_update_tpr_threshold(struct vcpu * v)755 static void nvmx_update_tpr_threshold(struct vcpu *v)
756 {
757 u32 ctrl = __n2_exec_control(v);
758
759 if ( ctrl & CPU_BASED_TPR_SHADOW )
760 __vmwrite(TPR_THRESHOLD, get_vvmcs(v, TPR_THRESHOLD));
761 else
762 __vmwrite(TPR_THRESHOLD, 0);
763 }
764
nvmx_update_pfec(struct vcpu * v)765 static void nvmx_update_pfec(struct vcpu *v)
766 {
767 __vmwrite(PAGE_FAULT_ERROR_CODE_MASK,
768 get_vvmcs(v, PAGE_FAULT_ERROR_CODE_MASK));
769 __vmwrite(PAGE_FAULT_ERROR_CODE_MATCH,
770 get_vvmcs(v, PAGE_FAULT_ERROR_CODE_MATCH));
771 }
772
__clear_current_vvmcs(struct vcpu * v)773 static void __clear_current_vvmcs(struct vcpu *v)
774 {
775 struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
776
777 if ( nvcpu->nv_n2vmcx_pa )
778 __vmpclear(nvcpu->nv_n2vmcx_pa);
779 }
780
unmap_msr_bitmap(struct vcpu * v)781 static void unmap_msr_bitmap(struct vcpu *v)
782 {
783 struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
784
785 if ( nvmx->msrbitmap )
786 {
787 hvm_unmap_guest_frame(nvmx->msrbitmap, 1);
788 nvmx->msrbitmap = NULL;
789 }
790 }
791
792 /*
793 * Refreshes the MSR bitmap mapping for the current nested vcpu. Returns true
794 * for a successful mapping, and returns false for MSR_BITMAP parameter errors
795 * or gfn mapping errors.
796 */
_map_msr_bitmap(struct vcpu * v)797 static bool __must_check _map_msr_bitmap(struct vcpu *v)
798 {
799 struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
800 uint64_t gpa;
801
802 unmap_msr_bitmap(v);
803 gpa = get_vvmcs(v, MSR_BITMAP);
804
805 if ( !IS_ALIGNED(gpa, PAGE_SIZE) )
806 return false;
807
808 nvmx->msrbitmap = hvm_map_guest_frame_ro(gpa >> PAGE_SHIFT, 1);
809
810 return nvmx->msrbitmap != NULL;
811 }
812
unmap_io_bitmap(struct vcpu * v,unsigned int idx)813 static void unmap_io_bitmap(struct vcpu *v, unsigned int idx)
814 {
815 struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
816
817 if ( nvmx->iobitmap[idx] )
818 {
819 hvm_unmap_guest_frame(nvmx->iobitmap[idx], 1);
820 nvmx->iobitmap[idx] = NULL;
821 }
822 }
823
_map_io_bitmap(struct vcpu * v,u64 vmcs_reg)824 static bool_t __must_check _map_io_bitmap(struct vcpu *v, u64 vmcs_reg)
825 {
826 struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
827 unsigned long gpa;
828 int index;
829
830 index = vmcs_reg == IO_BITMAP_A ? 0 : 1;
831 unmap_io_bitmap(v, index);
832 gpa = get_vvmcs(v, vmcs_reg);
833 nvmx->iobitmap[index] = hvm_map_guest_frame_ro(gpa >> PAGE_SHIFT, 1);
834
835 return nvmx->iobitmap[index] != NULL;
836 }
837
map_io_bitmap_all(struct vcpu * v)838 static inline bool_t __must_check map_io_bitmap_all(struct vcpu *v)
839 {
840 return _map_io_bitmap(v, IO_BITMAP_A) &&
841 _map_io_bitmap(v, IO_BITMAP_B);
842 }
843
nvmx_purge_vvmcs(struct vcpu * v)844 static void nvmx_purge_vvmcs(struct vcpu *v)
845 {
846 struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
847 int i;
848
849 __clear_current_vvmcs(v);
850 if ( vvmcx_valid(v) )
851 hvm_unmap_guest_frame(nvcpu->nv_vvmcx, 1);
852 nvcpu->nv_vvmcx = NULL;
853 nvcpu->nv_vvmcxaddr = INVALID_PADDR;
854 v->arch.hvm.vmx.vmcs_shadow_maddr = 0;
855
856 for ( i = 0; i < 2; i++ )
857 unmap_io_bitmap(v, i);
858
859 unmap_msr_bitmap(v);
860 }
861
nvmx_get_tsc_offset(struct vcpu * v)862 u64 nvmx_get_tsc_offset(struct vcpu *v)
863 {
864 u64 offset = 0;
865
866 if ( get_vvmcs(v, CPU_BASED_VM_EXEC_CONTROL) &
867 CPU_BASED_USE_TSC_OFFSETING )
868 offset = get_vvmcs(v, TSC_OFFSET);
869
870 return offset;
871 }
872
873 /*
874 * Context synchronized between shadow and virtual VMCS.
875 */
876 static const u16 vmcs_gstate_field[] = {
877 /* 16 BITS */
878 GUEST_ES_SELECTOR,
879 GUEST_CS_SELECTOR,
880 GUEST_SS_SELECTOR,
881 GUEST_DS_SELECTOR,
882 GUEST_FS_SELECTOR,
883 GUEST_GS_SELECTOR,
884 GUEST_LDTR_SELECTOR,
885 GUEST_TR_SELECTOR,
886 /* 64 BITS */
887 VMCS_LINK_POINTER,
888 GUEST_IA32_DEBUGCTL,
889 GUEST_PAT,
890 GUEST_EFER,
891 GUEST_PERF_GLOBAL_CTRL,
892 /* 32 BITS */
893 GUEST_ES_LIMIT,
894 GUEST_CS_LIMIT,
895 GUEST_SS_LIMIT,
896 GUEST_DS_LIMIT,
897 GUEST_FS_LIMIT,
898 GUEST_GS_LIMIT,
899 GUEST_LDTR_LIMIT,
900 GUEST_TR_LIMIT,
901 GUEST_GDTR_LIMIT,
902 GUEST_IDTR_LIMIT,
903 GUEST_ES_AR_BYTES,
904 GUEST_CS_AR_BYTES,
905 GUEST_SS_AR_BYTES,
906 GUEST_DS_AR_BYTES,
907 GUEST_FS_AR_BYTES,
908 GUEST_GS_AR_BYTES,
909 GUEST_LDTR_AR_BYTES,
910 GUEST_TR_AR_BYTES,
911 GUEST_INTERRUPTIBILITY_INFO,
912 GUEST_ACTIVITY_STATE,
913 GUEST_SYSENTER_CS,
914 GUEST_PREEMPTION_TIMER,
915 /* natural */
916 GUEST_ES_BASE,
917 GUEST_CS_BASE,
918 GUEST_SS_BASE,
919 GUEST_DS_BASE,
920 GUEST_FS_BASE,
921 GUEST_GS_BASE,
922 GUEST_LDTR_BASE,
923 GUEST_TR_BASE,
924 GUEST_GDTR_BASE,
925 GUEST_IDTR_BASE,
926 GUEST_DR7,
927 /*
928 * Following guest states are in local cache (cpu_user_regs)
929 GUEST_RSP,
930 GUEST_RIP,
931 */
932 GUEST_RFLAGS,
933 GUEST_PENDING_DBG_EXCEPTIONS,
934 GUEST_SYSENTER_ESP,
935 GUEST_SYSENTER_EIP,
936 };
937
938 static const u16 gpdpte_fields[] = {
939 GUEST_PDPTE(0),
940 GUEST_PDPTE(1),
941 GUEST_PDPTE(2),
942 GUEST_PDPTE(3),
943 };
944
945 /*
946 * Context: shadow -> virtual VMCS
947 */
948 static const u16 vmcs_ro_field[] = {
949 GUEST_PHYSICAL_ADDRESS,
950 VM_INSTRUCTION_ERROR,
951 VM_EXIT_REASON,
952 VM_EXIT_INTR_INFO,
953 VM_EXIT_INTR_ERROR_CODE,
954 IDT_VECTORING_INFO,
955 IDT_VECTORING_ERROR_CODE,
956 VM_EXIT_INSTRUCTION_LEN,
957 VMX_INSTRUCTION_INFO,
958 EXIT_QUALIFICATION,
959 GUEST_LINEAR_ADDRESS
960 };
961
962 static struct vmcs_host_to_guest {
963 u16 host_field;
964 u16 guest_field;
965 } const vmcs_h2g_field[] = {
966 {HOST_ES_SELECTOR, GUEST_ES_SELECTOR},
967 {HOST_CS_SELECTOR, GUEST_CS_SELECTOR},
968 {HOST_SS_SELECTOR, GUEST_SS_SELECTOR},
969 {HOST_DS_SELECTOR, GUEST_DS_SELECTOR},
970 {HOST_FS_SELECTOR, GUEST_FS_SELECTOR},
971 {HOST_GS_SELECTOR, GUEST_GS_SELECTOR},
972 {HOST_TR_SELECTOR, GUEST_TR_SELECTOR},
973 {HOST_SYSENTER_CS, GUEST_SYSENTER_CS},
974 {HOST_FS_BASE, GUEST_FS_BASE},
975 {HOST_GS_BASE, GUEST_GS_BASE},
976 {HOST_TR_BASE, GUEST_TR_BASE},
977 {HOST_GDTR_BASE, GUEST_GDTR_BASE},
978 {HOST_IDTR_BASE, GUEST_IDTR_BASE},
979 {HOST_SYSENTER_ESP, GUEST_SYSENTER_ESP},
980 {HOST_SYSENTER_EIP, GUEST_SYSENTER_EIP},
981 };
982
vvmcs_to_shadow(const struct vcpu * v,unsigned int field)983 static void vvmcs_to_shadow(const struct vcpu *v, unsigned int field)
984 {
985 __vmwrite(field, get_vvmcs(v, field));
986 }
987
vvmcs_to_shadow_bulk(struct vcpu * v,unsigned int n,const u16 * field)988 static void vvmcs_to_shadow_bulk(struct vcpu *v, unsigned int n,
989 const u16 *field)
990 {
991 u64 *value = this_cpu(vvmcs_buf);
992 unsigned int i;
993
994 if ( !cpu_has_vmx_vmcs_shadowing )
995 goto fallback;
996
997 if ( n > VMCS_BUF_SIZE )
998 {
999 if ( IS_ENABLED(CONFIG_DEBUG) )
1000 printk_once(XENLOG_ERR "%pv VMCS sync too many fields %u\n",
1001 v, n);
1002 goto fallback;
1003 }
1004
1005 virtual_vmcs_enter(v);
1006 for ( i = 0; i < n; i++ )
1007 __vmread(field[i], &value[i]);
1008 virtual_vmcs_exit(v);
1009
1010 for ( i = 0; i < n; i++ )
1011 __vmwrite(field[i], value[i]);
1012
1013 return;
1014
1015 fallback:
1016 for ( i = 0; i < n; i++ )
1017 vvmcs_to_shadow(v, field[i]);
1018 }
1019
shadow_to_vvmcs(const struct vcpu * v,unsigned int field)1020 static inline void shadow_to_vvmcs(const struct vcpu *v, unsigned int field)
1021 {
1022 unsigned long value;
1023
1024 if ( vmread_safe(field, &value) == 0 )
1025 set_vvmcs(v, field, value);
1026 }
1027
shadow_to_vvmcs_bulk(struct vcpu * v,unsigned int n,const u16 * field)1028 static void shadow_to_vvmcs_bulk(struct vcpu *v, unsigned int n,
1029 const u16 *field)
1030 {
1031 u64 *value = this_cpu(vvmcs_buf);
1032 unsigned int i;
1033
1034 if ( !cpu_has_vmx_vmcs_shadowing )
1035 goto fallback;
1036
1037 if ( n > VMCS_BUF_SIZE )
1038 {
1039 if ( IS_ENABLED(CONFIG_DEBUG) )
1040 printk_once(XENLOG_ERR "%pv VMCS sync too many fields %u\n",
1041 v, n);
1042 goto fallback;
1043 }
1044
1045 for ( i = 0; i < n; i++ )
1046 __vmread(field[i], &value[i]);
1047
1048 virtual_vmcs_enter(v);
1049 for ( i = 0; i < n; i++ )
1050 __vmwrite(field[i], value[i]);
1051 virtual_vmcs_exit(v);
1052
1053 return;
1054
1055 fallback:
1056 for ( i = 0; i < n; i++ )
1057 shadow_to_vvmcs(v, field[i]);
1058 }
1059
load_shadow_control(struct vcpu * v)1060 static void load_shadow_control(struct vcpu *v)
1061 {
1062 /*
1063 * Set shadow controls: PIN_BASED, CPU_BASED, EXIT, ENTRY
1064 * and EXCEPTION
1065 * Enforce the removed features
1066 */
1067 nvmx_update_pin_control(v, vmx_pin_based_exec_control);
1068 vmx_update_cpu_exec_control(v);
1069 vmx_update_secondary_exec_control(v);
1070 nvmx_update_exit_control(v, vmx_vmexit_control);
1071 nvmx_update_entry_control(v);
1072 vmx_update_exception_bitmap(v);
1073 nvmx_update_apic_access_address(v);
1074 nvmx_update_virtual_apic_address(v);
1075 nvmx_update_tpr_threshold(v);
1076 nvmx_update_pfec(v);
1077 }
1078
load_shadow_guest_state(struct vcpu * v)1079 static void load_shadow_guest_state(struct vcpu *v)
1080 {
1081 struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
1082 u32 control;
1083 u64 cr_gh_mask, cr_read_shadow;
1084 int rc;
1085
1086 static const u16 vmentry_fields[] = {
1087 VM_ENTRY_INTR_INFO,
1088 VM_ENTRY_EXCEPTION_ERROR_CODE,
1089 VM_ENTRY_INSTRUCTION_LEN,
1090 };
1091
1092 /* vvmcs.gstate to shadow vmcs.gstate */
1093 vvmcs_to_shadow_bulk(v, ARRAY_SIZE(vmcs_gstate_field),
1094 vmcs_gstate_field);
1095
1096 nvcpu->guest_cr[0] = get_vvmcs(v, CR0_READ_SHADOW);
1097 nvcpu->guest_cr[4] = get_vvmcs(v, CR4_READ_SHADOW);
1098
1099 rc = hvm_set_cr4(get_vvmcs(v, GUEST_CR4), true);
1100 if ( rc == X86EMUL_EXCEPTION )
1101 hvm_inject_hw_exception(TRAP_gp_fault, 0);
1102
1103 rc = hvm_set_cr0(get_vvmcs(v, GUEST_CR0), true);
1104 if ( rc == X86EMUL_EXCEPTION )
1105 hvm_inject_hw_exception(TRAP_gp_fault, 0);
1106
1107 rc = hvm_set_cr3(get_vvmcs(v, GUEST_CR3), false, true);
1108 if ( rc == X86EMUL_EXCEPTION )
1109 hvm_inject_hw_exception(TRAP_gp_fault, 0);
1110
1111 control = get_vvmcs(v, VM_ENTRY_CONTROLS);
1112 if ( control & VM_ENTRY_LOAD_GUEST_PAT )
1113 hvm_set_guest_pat(v, get_vvmcs(v, GUEST_PAT));
1114 if ( control & VM_ENTRY_LOAD_PERF_GLOBAL_CTRL )
1115 {
1116 rc = hvm_msr_write_intercept(MSR_CORE_PERF_GLOBAL_CTRL,
1117 get_vvmcs(v, GUEST_PERF_GLOBAL_CTRL), false);
1118 if ( rc == X86EMUL_EXCEPTION )
1119 hvm_inject_hw_exception(TRAP_gp_fault, 0);
1120 }
1121
1122 hvm_set_tsc_offset(v, v->arch.hvm.cache_tsc_offset, 0);
1123
1124 vvmcs_to_shadow_bulk(v, ARRAY_SIZE(vmentry_fields), vmentry_fields);
1125
1126 /*
1127 * While emulate CR0 and CR4 for nested virtualization, set the CR0/CR4
1128 * guest host mask to 0xffffffff in shadow VMCS (follow the host L1 VMCS),
1129 * then calculate the corresponding read shadow separately for CR0 and CR4.
1130 */
1131 cr_gh_mask = get_vvmcs(v, CR0_GUEST_HOST_MASK);
1132 cr_read_shadow = (get_vvmcs(v, GUEST_CR0) & ~cr_gh_mask) |
1133 (get_vvmcs(v, CR0_READ_SHADOW) & cr_gh_mask);
1134 __vmwrite(CR0_READ_SHADOW, cr_read_shadow);
1135
1136 cr_gh_mask = get_vvmcs(v, CR4_GUEST_HOST_MASK);
1137 cr_read_shadow = (get_vvmcs(v, GUEST_CR4) & ~cr_gh_mask) |
1138 (get_vvmcs(v, CR4_READ_SHADOW) & cr_gh_mask);
1139 __vmwrite(CR4_READ_SHADOW, cr_read_shadow);
1140 /* Add the nested host mask to the one set by vmx_update_guest_cr. */
1141 v->arch.hvm.vmx.cr4_host_mask |= cr_gh_mask;
1142 __vmwrite(CR4_GUEST_HOST_MASK, v->arch.hvm.vmx.cr4_host_mask);
1143
1144 /* TODO: CR3 target control */
1145 }
1146
get_shadow_eptp(struct vcpu * v)1147 static uint64_t get_shadow_eptp(struct vcpu *v)
1148 {
1149 struct p2m_domain *p2m = p2m_get_nestedp2m(v);
1150 struct ept_data *ept = &p2m->ept;
1151
1152 ept->mfn = pagetable_get_pfn(p2m_get_pagetable(p2m));
1153 return ept->eptp;
1154 }
1155
get_host_eptp(struct vcpu * v)1156 static uint64_t get_host_eptp(struct vcpu *v)
1157 {
1158 return p2m_get_hostp2m(v->domain)->ept.eptp;
1159 }
1160
nvmx_vpid_enabled(const struct vcpu * v)1161 static bool_t nvmx_vpid_enabled(const struct vcpu *v)
1162 {
1163 uint32_t second_cntl;
1164
1165 second_cntl = get_vvmcs(v, SECONDARY_VM_EXEC_CONTROL);
1166 if ( second_cntl & SECONDARY_EXEC_ENABLE_VPID )
1167 return 1;
1168 return 0;
1169 }
1170
nvmx_set_vmcs_pointer(struct vcpu * v,struct vmcs_struct * vvmcs)1171 static void nvmx_set_vmcs_pointer(struct vcpu *v, struct vmcs_struct *vvmcs)
1172 {
1173 paddr_t vvmcs_maddr = v->arch.hvm.vmx.vmcs_shadow_maddr;
1174
1175 __vmpclear(vvmcs_maddr);
1176 vvmcs->vmcs_revision_id |= VMCS_RID_TYPE_MASK;
1177 v->arch.hvm.vmx.secondary_exec_control |=
1178 SECONDARY_EXEC_ENABLE_VMCS_SHADOWING;
1179 __vmwrite(SECONDARY_VM_EXEC_CONTROL,
1180 v->arch.hvm.vmx.secondary_exec_control);
1181 __vmwrite(VMCS_LINK_POINTER, vvmcs_maddr);
1182 __vmwrite(VMREAD_BITMAP, page_to_maddr(v->arch.hvm.vmx.vmread_bitmap));
1183 __vmwrite(VMWRITE_BITMAP, page_to_maddr(v->arch.hvm.vmx.vmwrite_bitmap));
1184 }
1185
nvmx_clear_vmcs_pointer(struct vcpu * v,struct vmcs_struct * vvmcs)1186 static void nvmx_clear_vmcs_pointer(struct vcpu *v, struct vmcs_struct *vvmcs)
1187 {
1188 paddr_t vvmcs_maddr = v->arch.hvm.vmx.vmcs_shadow_maddr;
1189
1190 __vmpclear(vvmcs_maddr);
1191 vvmcs->vmcs_revision_id &= ~VMCS_RID_TYPE_MASK;
1192 v->arch.hvm.vmx.secondary_exec_control &=
1193 ~SECONDARY_EXEC_ENABLE_VMCS_SHADOWING;
1194 __vmwrite(SECONDARY_VM_EXEC_CONTROL,
1195 v->arch.hvm.vmx.secondary_exec_control);
1196 __vmwrite(VMCS_LINK_POINTER, ~0ul);
1197 __vmwrite(VMREAD_BITMAP, 0);
1198 __vmwrite(VMWRITE_BITMAP, 0);
1199 }
1200
virtual_vmentry(struct cpu_user_regs * regs)1201 static void virtual_vmentry(struct cpu_user_regs *regs)
1202 {
1203 struct vcpu *v = current;
1204 struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
1205 unsigned long lm_l1, lm_l2;
1206
1207 vmx_vmcs_switch(v->arch.hvm.vmx.vmcs_pa, nvcpu->nv_n2vmcx_pa);
1208
1209 nestedhvm_vcpu_enter_guestmode(v);
1210 nvcpu->nv_vmentry_pending = 0;
1211 nvcpu->nv_vmswitch_in_progress = 1;
1212
1213 /*
1214 * EFER handling:
1215 * hvm_set_efer won't work if CR0.PG = 1, so we change the value
1216 * directly to make hvm_long_mode_active(v) work in L2.
1217 * An additional update_paging_modes is also needed if
1218 * there is 32/64 switch. v->arch.hvm.guest_efer doesn't
1219 * need to be saved, since its value on vmexit is determined by
1220 * L1 exit_controls
1221 */
1222 lm_l1 = hvm_long_mode_active(v);
1223 lm_l2 = !!(get_vvmcs(v, VM_ENTRY_CONTROLS) & VM_ENTRY_IA32E_MODE);
1224
1225 if ( lm_l2 )
1226 v->arch.hvm.guest_efer |= EFER_LMA | EFER_LME;
1227 else
1228 v->arch.hvm.guest_efer &= ~(EFER_LMA | EFER_LME);
1229
1230 load_shadow_control(v);
1231 load_shadow_guest_state(v);
1232
1233 if ( lm_l1 != lm_l2 )
1234 paging_update_paging_modes(v);
1235
1236 if ( nvmx_ept_enabled(v) && hvm_pae_enabled(v) &&
1237 !(v->arch.hvm.guest_efer & EFER_LMA) )
1238 vvmcs_to_shadow_bulk(v, ARRAY_SIZE(gpdpte_fields), gpdpte_fields);
1239
1240 regs->rip = get_vvmcs(v, GUEST_RIP);
1241 regs->rsp = get_vvmcs(v, GUEST_RSP);
1242 regs->rflags = get_vvmcs(v, GUEST_RFLAGS);
1243
1244 /* updating host cr0 to sync TS bit */
1245 __vmwrite(HOST_CR0, v->arch.hvm.vmx.host_cr0);
1246
1247 /* Setup virtual ETP for L2 guest*/
1248 if ( nestedhvm_paging_mode_hap(v) )
1249 /* This will setup the initial np2m for the nested vCPU */
1250 __vmwrite(EPT_POINTER, get_shadow_eptp(v));
1251 else
1252 __vmwrite(EPT_POINTER, get_host_eptp(v));
1253
1254 /* nested VPID support! */
1255 if ( cpu_has_vmx_vpid && nvmx_vpid_enabled(v) )
1256 {
1257 struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
1258 uint32_t new_vpid = get_vvmcs(v, VIRTUAL_PROCESSOR_ID);
1259
1260 if ( nvmx->guest_vpid != new_vpid )
1261 {
1262 hvm_asid_flush_vcpu_asid(&vcpu_nestedhvm(v).nv_n2asid);
1263 nvmx->guest_vpid = new_vpid;
1264 }
1265 }
1266
1267 }
1268
sync_vvmcs_guest_state(struct vcpu * v,struct cpu_user_regs * regs)1269 static void sync_vvmcs_guest_state(struct vcpu *v, struct cpu_user_regs *regs)
1270 {
1271 /* copy shadow vmcs.gstate back to vvmcs.gstate */
1272 shadow_to_vvmcs_bulk(v, ARRAY_SIZE(vmcs_gstate_field),
1273 vmcs_gstate_field);
1274 /* RIP, RSP are in user regs */
1275 set_vvmcs(v, GUEST_RIP, regs->rip);
1276 set_vvmcs(v, GUEST_RSP, regs->rsp);
1277
1278 /* CR3 sync if exec doesn't want cr3 load exiting: i.e. nested EPT */
1279 if ( !(__n2_exec_control(v) & CPU_BASED_CR3_LOAD_EXITING) )
1280 shadow_to_vvmcs(v, GUEST_CR3);
1281
1282 if ( v->arch.hvm.vmx.cr4_host_mask != ~0UL )
1283 /* Only need to update nested GUEST_CR4 if not all bits are trapped. */
1284 set_vvmcs(v, GUEST_CR4, v->arch.hvm.guest_cr[4]);
1285 }
1286
sync_vvmcs_ro(struct vcpu * v)1287 static void sync_vvmcs_ro(struct vcpu *v)
1288 {
1289 struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
1290
1291 shadow_to_vvmcs_bulk(v, ARRAY_SIZE(vmcs_ro_field), vmcs_ro_field);
1292
1293 /* Adjust exit_reason/exit_qualifciation for violation case */
1294 if ( get_vvmcs(v, VM_EXIT_REASON) == EXIT_REASON_EPT_VIOLATION )
1295 {
1296 set_vvmcs(v, EXIT_QUALIFICATION, nvmx->ept.exit_qual);
1297 set_vvmcs(v, VM_EXIT_REASON, nvmx->ept.exit_reason);
1298 }
1299 }
1300
load_vvmcs_host_state(struct vcpu * v)1301 static void load_vvmcs_host_state(struct vcpu *v)
1302 {
1303 int i, rc;
1304 u64 r;
1305 u32 control;
1306
1307 for ( i = 0; i < ARRAY_SIZE(vmcs_h2g_field); i++ )
1308 {
1309 r = get_vvmcs(v, vmcs_h2g_field[i].host_field);
1310 __vmwrite(vmcs_h2g_field[i].guest_field, r);
1311 }
1312
1313 rc = hvm_set_cr4(get_vvmcs(v, HOST_CR4), true);
1314 if ( rc == X86EMUL_EXCEPTION )
1315 hvm_inject_hw_exception(TRAP_gp_fault, 0);
1316
1317 rc = hvm_set_cr0(get_vvmcs(v, HOST_CR0), true);
1318 if ( rc == X86EMUL_EXCEPTION )
1319 hvm_inject_hw_exception(TRAP_gp_fault, 0);
1320
1321 rc = hvm_set_cr3(get_vvmcs(v, HOST_CR3), false, true);
1322 if ( rc == X86EMUL_EXCEPTION )
1323 hvm_inject_hw_exception(TRAP_gp_fault, 0);
1324
1325 control = get_vvmcs(v, VM_EXIT_CONTROLS);
1326 if ( control & VM_EXIT_LOAD_HOST_PAT )
1327 hvm_set_guest_pat(v, get_vvmcs(v, HOST_PAT));
1328 if ( control & VM_EXIT_LOAD_PERF_GLOBAL_CTRL )
1329 {
1330 rc = hvm_msr_write_intercept(MSR_CORE_PERF_GLOBAL_CTRL,
1331 get_vvmcs(v, HOST_PERF_GLOBAL_CTRL), true);
1332 if ( rc == X86EMUL_EXCEPTION )
1333 hvm_inject_hw_exception(TRAP_gp_fault, 0);
1334 }
1335
1336 hvm_set_tsc_offset(v, v->arch.hvm.cache_tsc_offset, 0);
1337
1338 set_vvmcs(v, VM_ENTRY_INTR_INFO, 0);
1339
1340 if ( v->arch.hvm.vmx.exec_control & CPU_BASED_ACTIVATE_MSR_BITMAP )
1341 __vmwrite(MSR_BITMAP, virt_to_maddr(v->arch.hvm.vmx.msr_bitmap));
1342 }
1343
sync_exception_state(struct vcpu * v)1344 static void sync_exception_state(struct vcpu *v)
1345 {
1346 struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
1347 uint32_t exit_ctrl = get_vvmcs(v, VM_EXIT_CONTROLS);
1348
1349 if ( !(nvmx->intr.intr_info & INTR_INFO_VALID_MASK) )
1350 return;
1351
1352 switch ( MASK_EXTR(nvmx->intr.intr_info, INTR_INFO_INTR_TYPE_MASK) )
1353 {
1354 case X86_EVENTTYPE_EXT_INTR:
1355 /* rename exit_reason to EXTERNAL_INTERRUPT */
1356 set_vvmcs(v, VM_EXIT_REASON, EXIT_REASON_EXTERNAL_INTERRUPT);
1357 set_vvmcs(v, EXIT_QUALIFICATION, 0);
1358 set_vvmcs(v, VM_EXIT_INTR_INFO,
1359 (exit_ctrl & VM_EXIT_ACK_INTR_ON_EXIT) ? nvmx->intr.intr_info
1360 : 0);
1361 break;
1362
1363 case X86_EVENTTYPE_HW_EXCEPTION:
1364 case X86_EVENTTYPE_SW_INTERRUPT:
1365 case X86_EVENTTYPE_SW_EXCEPTION:
1366 /* throw to L1 */
1367 set_vvmcs(v, VM_EXIT_INTR_INFO, nvmx->intr.intr_info);
1368 set_vvmcs(v, VM_EXIT_INTR_ERROR_CODE, nvmx->intr.error_code);
1369 break;
1370 case X86_EVENTTYPE_NMI:
1371 set_vvmcs(v, VM_EXIT_REASON, EXIT_REASON_EXCEPTION_NMI);
1372 set_vvmcs(v, EXIT_QUALIFICATION, 0);
1373 set_vvmcs(v, VM_EXIT_INTR_INFO, nvmx->intr.intr_info);
1374 break;
1375 default:
1376 gdprintk(XENLOG_ERR, "Exception state %lx not handled\n",
1377 nvmx->intr.intr_info);
1378 break;
1379 }
1380 }
1381
nvmx_update_apicv(struct vcpu * v)1382 static void nvmx_update_apicv(struct vcpu *v)
1383 {
1384 struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
1385 unsigned long reason = get_vvmcs(v, VM_EXIT_REASON);
1386 unsigned long intr_info = get_vvmcs(v, VM_EXIT_INTR_INFO);
1387 unsigned long status;
1388 int rvi;
1389
1390 if ( reason != EXIT_REASON_EXTERNAL_INTERRUPT ||
1391 nvmx->intr.source != hvm_intsrc_lapic )
1392 return;
1393
1394 if ( intr_info & INTR_INFO_VALID_MASK )
1395 {
1396 uint32_t ppr;
1397 unsigned int vector = intr_info & INTR_INFO_VECTOR_MASK;
1398 struct vlapic *vlapic = vcpu_vlapic(v);
1399
1400 /*
1401 * Update SVI to record the current in service interrupt that's
1402 * signaled in EXIT_INTR_INFO.
1403 */
1404 vlapic_ack_pending_irq(v, vector, 1);
1405
1406 ppr = vlapic_set_ppr(vlapic);
1407 WARN_ON((ppr & 0xf0) != (vector & 0xf0));
1408
1409 status = vector << VMX_GUEST_INTR_STATUS_SVI_OFFSET;
1410 }
1411 else
1412 /* Keep previous SVI if there's any. */
1413 __vmread(GUEST_INTR_STATUS, &status);
1414
1415 rvi = vlapic_has_pending_irq(v);
1416 if ( rvi != -1 )
1417 {
1418 status &= ~VMX_GUEST_INTR_STATUS_SUBFIELD_BITMASK;
1419 status |= rvi & VMX_GUEST_INTR_STATUS_SUBFIELD_BITMASK;
1420 }
1421
1422 if ( status )
1423 {
1424 __vmwrite(GUEST_INTR_STATUS, status);
1425 vmx_sync_exit_bitmap(v);
1426 }
1427 }
1428
virtual_vmexit(struct cpu_user_regs * regs)1429 static void virtual_vmexit(struct cpu_user_regs *regs)
1430 {
1431 struct vcpu *v = current;
1432 struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
1433 unsigned long lm_l1, lm_l2;
1434
1435 sync_vvmcs_ro(v);
1436 sync_vvmcs_guest_state(v, regs);
1437 sync_exception_state(v);
1438
1439 if ( nvmx_ept_enabled(v) && hvm_pae_enabled(v) &&
1440 !(v->arch.hvm.guest_efer & EFER_LMA) )
1441 shadow_to_vvmcs_bulk(v, ARRAY_SIZE(gpdpte_fields), gpdpte_fields);
1442
1443 /* This will clear current pCPU bit in p2m->dirty_cpumask */
1444 np2m_schedule(NP2M_SCHEDLE_OUT);
1445
1446 vmx_vmcs_switch(v->arch.hvm.vmx.vmcs_pa, nvcpu->nv_n1vmcx_pa);
1447
1448 nestedhvm_vcpu_exit_guestmode(v);
1449 nvcpu->nv_vmexit_pending = 0;
1450 nvcpu->nv_vmswitch_in_progress = 1;
1451
1452 lm_l2 = hvm_long_mode_active(v);
1453 lm_l1 = !!(get_vvmcs(v, VM_EXIT_CONTROLS) & VM_EXIT_IA32E_MODE);
1454
1455 if ( lm_l1 )
1456 v->arch.hvm.guest_efer |= EFER_LMA | EFER_LME;
1457 else
1458 v->arch.hvm.guest_efer &= ~(EFER_LMA | EFER_LME);
1459
1460 vmx_update_cpu_exec_control(v);
1461 vmx_update_secondary_exec_control(v);
1462 vmx_update_exception_bitmap(v);
1463
1464 load_vvmcs_host_state(v);
1465
1466 if ( lm_l1 != lm_l2 )
1467 paging_update_paging_modes(v);
1468
1469 regs->rip = get_vvmcs(v, HOST_RIP);
1470 regs->rsp = get_vvmcs(v, HOST_RSP);
1471 /* VM exit clears all bits except bit 1 */
1472 regs->rflags = X86_EFLAGS_MBS;
1473
1474 /* updating host cr0 to sync TS bit */
1475 __vmwrite(HOST_CR0, v->arch.hvm.vmx.host_cr0);
1476
1477 if ( cpu_has_vmx_virtual_intr_delivery )
1478 nvmx_update_apicv(v);
1479
1480 nvcpu->nv_vmswitch_in_progress = 0;
1481 }
1482
nvmx_eptp_update(void)1483 static void nvmx_eptp_update(void)
1484 {
1485 struct vcpu *curr = current;
1486
1487 if ( !nestedhvm_vcpu_in_guestmode(curr) ||
1488 vcpu_nestedhvm(curr).nv_vmexit_pending ||
1489 !vcpu_nestedhvm(curr).stale_np2m ||
1490 !nestedhvm_paging_mode_hap(curr) )
1491 return;
1492
1493 /*
1494 * Interrupts are enabled here, so we need to clear stale_np2m
1495 * before we do the vmwrite. If we do it in the other order, an
1496 * and IPI comes in changing the shadow eptp after the vmwrite,
1497 * we'll complete the vmenter with a stale eptp value.
1498 */
1499 vcpu_nestedhvm(curr).stale_np2m = false;
1500 __vmwrite(EPT_POINTER, get_shadow_eptp(curr));
1501 }
1502
nvmx_switch_guest(void)1503 void nvmx_switch_guest(void)
1504 {
1505 struct vcpu *v = current;
1506 struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
1507 struct cpu_user_regs *regs = guest_cpu_user_regs();
1508
1509 nvmx_eptp_update();
1510
1511 /*
1512 * A pending IO emulation may still be not finished. In this case, no
1513 * virtual vmswitch is allowed. Or else, the following IO emulation will
1514 * be handled in a wrong VCPU context. If there are no IO backends - PVH
1515 * guest by itself or a PVH guest with an HVM guest running inside - we
1516 * don't want to continue as this setup is not implemented nor supported
1517 * as of right now.
1518 */
1519 if ( hvm_io_pending(v) )
1520 return;
1521 /*
1522 * a softirq may interrupt us between a virtual vmentry is
1523 * just handled and the true vmentry. If during this window,
1524 * a L1 virtual interrupt causes another virtual vmexit, we
1525 * cannot let that happen or VM_ENTRY_INTR_INFO will be lost.
1526 */
1527 if ( unlikely(nvcpu->nv_vmswitch_in_progress) )
1528 return;
1529
1530 if ( nestedhvm_vcpu_in_guestmode(v) && nvcpu->nv_vmexit_pending )
1531 virtual_vmexit(regs);
1532 else if ( !nestedhvm_vcpu_in_guestmode(v) && nvcpu->nv_vmentry_pending )
1533 virtual_vmentry(regs);
1534 }
1535
1536 /*
1537 * VMX instructions handling
1538 */
1539
nvmx_handle_vmxon(struct cpu_user_regs * regs)1540 static int nvmx_handle_vmxon(struct cpu_user_regs *regs)
1541 {
1542 struct vcpu *v=current;
1543 struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
1544 struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
1545 struct vmx_inst_decoded decode;
1546 unsigned long gpa = 0;
1547 uint32_t nvmcs_revid;
1548 int rc;
1549
1550 rc = decode_vmx_inst(regs, &decode, &gpa);
1551 if ( rc != X86EMUL_OKAY )
1552 return rc;
1553
1554 if ( nvmx_vcpu_in_vmx(v) )
1555 {
1556 vmfail(regs, VMX_INSN_VMXON_IN_VMX_ROOT);
1557 return X86EMUL_OKAY;
1558 }
1559
1560 if ( (gpa & ~PAGE_MASK) || !gfn_valid(v->domain, _gfn(gpa >> PAGE_SHIFT)) )
1561 {
1562 vmfail_invalid(regs);
1563 return X86EMUL_OKAY;
1564 }
1565
1566 rc = hvm_copy_from_guest_phys(&nvmcs_revid, gpa, sizeof(nvmcs_revid));
1567 if ( rc != HVMTRANS_okay ||
1568 (nvmcs_revid & ~VMX_BASIC_REVISION_MASK) ||
1569 ((nvmcs_revid ^ vmx_basic_msr) & VMX_BASIC_REVISION_MASK) )
1570 {
1571 vmfail_invalid(regs);
1572 return X86EMUL_OKAY;
1573 }
1574
1575 nvmx->vmxon_region_pa = gpa;
1576
1577 /*
1578 * `fork' the host vmcs to shadow_vmcs
1579 * vmcs_lock is not needed since we are on current
1580 */
1581 nvcpu->nv_n1vmcx_pa = v->arch.hvm.vmx.vmcs_pa;
1582 __vmpclear(v->arch.hvm.vmx.vmcs_pa);
1583 copy_domain_page(_mfn(PFN_DOWN(nvcpu->nv_n2vmcx_pa)),
1584 _mfn(PFN_DOWN(v->arch.hvm.vmx.vmcs_pa)));
1585 __vmptrld(v->arch.hvm.vmx.vmcs_pa);
1586 v->arch.hvm.vmx.launched = 0;
1587 vmsucceed(regs);
1588
1589 return X86EMUL_OKAY;
1590 }
1591
nvmx_handle_vmxoff(struct cpu_user_regs * regs)1592 static int nvmx_handle_vmxoff(struct cpu_user_regs *regs)
1593 {
1594 struct vcpu *v=current;
1595 struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
1596
1597 nvmx_purge_vvmcs(v);
1598 nvmx->vmxon_region_pa = INVALID_PADDR;
1599
1600 vmsucceed(regs);
1601 return X86EMUL_OKAY;
1602 }
1603
vvmcs_launched(struct list_head * launched_list,unsigned long vvmcs_mfn)1604 static bool_t vvmcs_launched(struct list_head *launched_list,
1605 unsigned long vvmcs_mfn)
1606 {
1607 struct vvmcs_list *vvmcs;
1608 struct list_head *pos;
1609 bool_t launched = 0;
1610
1611 list_for_each(pos, launched_list)
1612 {
1613 vvmcs = list_entry(pos, struct vvmcs_list, node);
1614 if ( vvmcs_mfn == vvmcs->vvmcs_mfn )
1615 {
1616 launched = 1;
1617 break;
1618 }
1619 }
1620
1621 return launched;
1622 }
1623
set_vvmcs_launched(struct list_head * launched_list,unsigned long vvmcs_mfn)1624 static int set_vvmcs_launched(struct list_head *launched_list,
1625 unsigned long vvmcs_mfn)
1626 {
1627 struct vvmcs_list *vvmcs;
1628
1629 if ( vvmcs_launched(launched_list, vvmcs_mfn) )
1630 return 0;
1631
1632 vvmcs = xzalloc(struct vvmcs_list);
1633 if ( !vvmcs )
1634 return -ENOMEM;
1635
1636 vvmcs->vvmcs_mfn = vvmcs_mfn;
1637 list_add(&vvmcs->node, launched_list);
1638
1639 return 0;
1640 }
1641
clear_vvmcs_launched(struct list_head * launched_list,paddr_t vvmcs_mfn)1642 static void clear_vvmcs_launched(struct list_head *launched_list,
1643 paddr_t vvmcs_mfn)
1644 {
1645 struct vvmcs_list *vvmcs;
1646 struct list_head *pos;
1647
1648 list_for_each(pos, launched_list)
1649 {
1650 vvmcs = list_entry(pos, struct vvmcs_list, node);
1651 if ( vvmcs_mfn == vvmcs->vvmcs_mfn )
1652 {
1653 list_del(&vvmcs->node);
1654 xfree(vvmcs);
1655 break;
1656 }
1657 }
1658 }
1659
nvmx_vmresume(struct vcpu * v)1660 static enum vmx_insn_errno nvmx_vmresume(struct vcpu *v)
1661 {
1662 struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
1663 struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
1664 unsigned int exec_ctrl;
1665
1666 ASSERT(vvmcx_valid(v));
1667 exec_ctrl = __n2_exec_control(v);
1668
1669 if ( exec_ctrl & CPU_BASED_ACTIVATE_IO_BITMAP )
1670 {
1671 if ( (nvmx->iobitmap[0] == NULL || nvmx->iobitmap[1] == NULL) &&
1672 !map_io_bitmap_all(v) )
1673 goto invalid_control_state;
1674 }
1675
1676 if ( exec_ctrl & CPU_BASED_ACTIVATE_MSR_BITMAP )
1677 {
1678 if ( nvmx->msrbitmap == NULL && !_map_msr_bitmap(v) )
1679 goto invalid_control_state;
1680 }
1681
1682 nvcpu->nv_vmentry_pending = 1;
1683
1684 return VMX_INSN_SUCCEED;
1685
1686 invalid_control_state:
1687 return VMX_INSN_INVALID_CONTROL_STATE;
1688 }
1689
nvmx_handle_vmresume(struct cpu_user_regs * regs)1690 static int nvmx_handle_vmresume(struct cpu_user_regs *regs)
1691 {
1692 bool_t launched;
1693 struct vcpu *v = current;
1694 struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
1695 unsigned long intr_shadow;
1696 int rc;
1697
1698 if ( !vvmcx_valid(v) )
1699 {
1700 vmfail_invalid(regs);
1701 return X86EMUL_OKAY;
1702 }
1703
1704 __vmread(GUEST_INTERRUPTIBILITY_INFO, &intr_shadow);
1705 if ( intr_shadow & VMX_INTR_SHADOW_MOV_SS )
1706 {
1707 vmfail_valid(regs, VMX_INSN_VMENTRY_BLOCKED_BY_MOV_SS);
1708 return X86EMUL_OKAY;
1709 }
1710
1711 launched = vvmcs_launched(&nvmx->launched_list,
1712 PFN_DOWN(v->arch.hvm.vmx.vmcs_shadow_maddr));
1713 if ( !launched )
1714 {
1715 vmfail_valid(regs, VMX_INSN_VMRESUME_NONLAUNCHED_VMCS);
1716 return X86EMUL_OKAY;
1717 }
1718
1719 rc = nvmx_vmresume(v);
1720 if ( rc )
1721 vmfail_valid(regs, rc);
1722
1723 return X86EMUL_OKAY;
1724 }
1725
nvmx_handle_vmlaunch(struct cpu_user_regs * regs)1726 static int nvmx_handle_vmlaunch(struct cpu_user_regs *regs)
1727 {
1728 bool_t launched;
1729 struct vcpu *v = current;
1730 struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
1731 unsigned long intr_shadow;
1732 int rc;
1733
1734 if ( !vvmcx_valid(v) )
1735 {
1736 vmfail_invalid(regs);
1737 return X86EMUL_OKAY;
1738 }
1739
1740 __vmread(GUEST_INTERRUPTIBILITY_INFO, &intr_shadow);
1741 if ( intr_shadow & VMX_INTR_SHADOW_MOV_SS )
1742 {
1743 vmfail_valid(regs, VMX_INSN_VMENTRY_BLOCKED_BY_MOV_SS);
1744 return X86EMUL_OKAY;
1745 }
1746
1747 launched = vvmcs_launched(&nvmx->launched_list,
1748 PFN_DOWN(v->arch.hvm.vmx.vmcs_shadow_maddr));
1749 if ( launched )
1750 {
1751 vmfail_valid(regs, VMX_INSN_VMLAUNCH_NONCLEAR_VMCS);
1752 return X86EMUL_OKAY;
1753 }
1754 else {
1755 rc = nvmx_vmresume(v);
1756 if ( rc )
1757 vmfail_valid(regs, rc);
1758 else
1759 {
1760 if ( set_vvmcs_launched(&nvmx->launched_list,
1761 PFN_DOWN(v->arch.hvm.vmx.vmcs_shadow_maddr)) < 0 )
1762 return X86EMUL_UNHANDLEABLE;
1763 }
1764 rc = X86EMUL_OKAY;
1765 }
1766 return rc;
1767 }
1768
nvmx_handle_vmptrld(struct cpu_user_regs * regs)1769 static int nvmx_handle_vmptrld(struct cpu_user_regs *regs)
1770 {
1771 struct vcpu *v = current;
1772 struct vmx_inst_decoded decode;
1773 struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
1774 unsigned long gpa = 0;
1775 int rc;
1776
1777 rc = decode_vmx_inst(regs, &decode, &gpa);
1778 if ( rc != X86EMUL_OKAY )
1779 return rc;
1780
1781 if ( (gpa & ~PAGE_MASK) || !gfn_valid(v->domain, gaddr_to_gfn(gpa)) )
1782 {
1783 vmfail(regs, VMX_INSN_VMPTRLD_INVALID_PHYADDR);
1784 goto out;
1785 }
1786
1787 if ( gpa == vcpu_2_nvmx(v).vmxon_region_pa )
1788 {
1789 vmfail(regs, VMX_INSN_VMPTRLD_WITH_VMXON_PTR);
1790 goto out;
1791 }
1792
1793 if ( nvcpu->nv_vvmcxaddr != gpa )
1794 nvmx_purge_vvmcs(v);
1795
1796 if ( !vvmcx_valid(v) )
1797 {
1798 bool_t writable;
1799 void *vvmcx = hvm_map_guest_frame_rw(paddr_to_pfn(gpa), 1, &writable);
1800
1801 if ( vvmcx )
1802 {
1803 if ( writable )
1804 {
1805 struct vmcs_struct *vvmcs = vvmcx;
1806
1807 if ( ((vvmcs->vmcs_revision_id ^ vmx_basic_msr) &
1808 VMX_BASIC_REVISION_MASK) ||
1809 (!cpu_has_vmx_vmcs_shadowing &&
1810 (vvmcs->vmcs_revision_id & ~VMX_BASIC_REVISION_MASK)) )
1811 {
1812 hvm_unmap_guest_frame(vvmcx, 1);
1813 vmfail(regs, VMX_INSN_VMPTRLD_INCORRECT_VMCS_ID);
1814
1815 return X86EMUL_OKAY;
1816 }
1817 nvcpu->nv_vvmcx = vvmcx;
1818 nvcpu->nv_vvmcxaddr = gpa;
1819 v->arch.hvm.vmx.vmcs_shadow_maddr =
1820 mfn_to_maddr(domain_page_map_to_mfn(vvmcx));
1821 }
1822 else
1823 {
1824 hvm_unmap_guest_frame(vvmcx, 1);
1825 vvmcx = NULL;
1826 }
1827 }
1828 else
1829 {
1830 vmfail(regs, VMX_INSN_VMPTRLD_INVALID_PHYADDR);
1831 goto out;
1832 }
1833 }
1834
1835 if ( cpu_has_vmx_vmcs_shadowing )
1836 nvmx_set_vmcs_pointer(v, nvcpu->nv_vvmcx);
1837
1838 vmsucceed(regs);
1839
1840 out:
1841 return X86EMUL_OKAY;
1842 }
1843
nvmx_handle_vmptrst(struct cpu_user_regs * regs)1844 static int nvmx_handle_vmptrst(struct cpu_user_regs *regs)
1845 {
1846 struct vcpu *v = current;
1847 struct vmx_inst_decoded decode;
1848 struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
1849 pagefault_info_t pfinfo;
1850 unsigned long gpa = 0;
1851 int rc;
1852
1853 rc = decode_vmx_inst(regs, &decode, &gpa);
1854 if ( rc != X86EMUL_OKAY )
1855 return rc;
1856
1857 gpa = nvcpu->nv_vvmcxaddr;
1858
1859 rc = hvm_copy_to_guest_linear(decode.mem, &gpa, decode.len, 0, &pfinfo);
1860 if ( rc == HVMTRANS_bad_linear_to_gfn )
1861 hvm_inject_page_fault(pfinfo.ec, pfinfo.linear);
1862 if ( rc != HVMTRANS_okay )
1863 return X86EMUL_EXCEPTION;
1864
1865 vmsucceed(regs);
1866 return X86EMUL_OKAY;
1867 }
1868
nvmx_handle_vmclear(struct cpu_user_regs * regs)1869 static int nvmx_handle_vmclear(struct cpu_user_regs *regs)
1870 {
1871 struct vcpu *v = current;
1872 struct vmx_inst_decoded decode;
1873 struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
1874 struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
1875 unsigned long gpa = 0;
1876 void *vvmcs;
1877 int rc;
1878
1879 rc = decode_vmx_inst(regs, &decode, &gpa);
1880 if ( rc != X86EMUL_OKAY )
1881 return rc;
1882
1883 if ( gpa == vcpu_2_nvmx(v).vmxon_region_pa )
1884 {
1885 vmfail(regs, VMX_INSN_VMCLEAR_WITH_VMXON_PTR);
1886 goto out;
1887 }
1888
1889 if ( (gpa & ~PAGE_MASK) || !gfn_valid(v->domain, gaddr_to_gfn(gpa)) )
1890 {
1891 vmfail(regs, VMX_INSN_VMCLEAR_INVALID_PHYADDR);
1892 goto out;
1893 }
1894
1895 if ( gpa == nvcpu->nv_vvmcxaddr )
1896 {
1897 if ( cpu_has_vmx_vmcs_shadowing )
1898 nvmx_clear_vmcs_pointer(v, nvcpu->nv_vvmcx);
1899 clear_vvmcs_launched(&nvmx->launched_list,
1900 PFN_DOWN(v->arch.hvm.vmx.vmcs_shadow_maddr));
1901 nvmx_purge_vvmcs(v);
1902 vmsucceed(regs);
1903 }
1904 else
1905 {
1906 /* Even if this VMCS isn't the current one, we must clear it. */
1907 bool_t writable;
1908
1909 vvmcs = hvm_map_guest_frame_rw(paddr_to_pfn(gpa), 0, &writable);
1910
1911 if ( !vvmcs )
1912 {
1913 vmfail(regs, VMX_INSN_VMCLEAR_INVALID_PHYADDR);
1914 goto out;
1915 }
1916
1917 if ( writable )
1918 {
1919 clear_vvmcs_launched(&nvmx->launched_list,
1920 mfn_x(domain_page_map_to_mfn(vvmcs)));
1921 vmsucceed(regs);
1922 }
1923 else
1924 vmfail(regs, VMX_INSN_VMCLEAR_INVALID_PHYADDR);
1925
1926 hvm_unmap_guest_frame(vvmcs, 0);
1927 }
1928
1929 out:
1930 return X86EMUL_OKAY;
1931 }
1932
nvmx_handle_vmread(struct cpu_user_regs * regs)1933 static int nvmx_handle_vmread(struct cpu_user_regs *regs)
1934 {
1935 struct vcpu *v = current;
1936 struct vmx_inst_decoded decode;
1937 pagefault_info_t pfinfo;
1938 u64 value = 0;
1939 int rc;
1940
1941 rc = decode_vmx_inst(regs, &decode, NULL);
1942 if ( rc != X86EMUL_OKAY )
1943 return rc;
1944
1945 if ( !vvmcx_valid(v) )
1946 {
1947 vmfail_invalid(regs);
1948 return X86EMUL_OKAY;
1949 }
1950
1951 rc = get_vvmcs_safe(v, reg_read(regs, decode.reg2), &value);
1952 if ( rc != VMX_INSN_SUCCEED )
1953 {
1954 vmfail(regs, rc);
1955 return X86EMUL_OKAY;
1956 }
1957
1958 switch ( decode.type ) {
1959 case VMX_INST_MEMREG_TYPE_MEMORY:
1960 rc = hvm_copy_to_guest_linear(decode.mem, &value, decode.len, 0, &pfinfo);
1961 if ( rc == HVMTRANS_bad_linear_to_gfn )
1962 hvm_inject_page_fault(pfinfo.ec, pfinfo.linear);
1963 if ( rc != HVMTRANS_okay )
1964 return X86EMUL_EXCEPTION;
1965 break;
1966 case VMX_INST_MEMREG_TYPE_REG:
1967 reg_write(regs, decode.reg1, value);
1968 break;
1969 }
1970
1971 vmsucceed(regs);
1972 return X86EMUL_OKAY;
1973 }
1974
nvmx_handle_vmwrite(struct cpu_user_regs * regs)1975 static int nvmx_handle_vmwrite(struct cpu_user_regs *regs)
1976 {
1977 struct vcpu *v = current;
1978 struct vmx_inst_decoded decode;
1979 unsigned long operand;
1980 u64 vmcs_encoding;
1981 enum vmx_insn_errno err;
1982 int rc;
1983
1984 rc = decode_vmx_inst(regs, &decode, &operand);
1985 if ( rc != X86EMUL_OKAY )
1986 return rc;
1987
1988 if ( !vvmcx_valid(v) )
1989 {
1990 vmfail_invalid(regs);
1991 return X86EMUL_OKAY;
1992 }
1993
1994 vmcs_encoding = reg_read(regs, decode.reg2);
1995 err = set_vvmcs_safe(v, vmcs_encoding, operand);
1996 if ( err != VMX_INSN_SUCCEED )
1997 {
1998 vmfail(regs, err);
1999 return X86EMUL_OKAY;
2000 }
2001
2002 switch ( vmcs_encoding & ~VMCS_HIGH(0) )
2003 {
2004 case IO_BITMAP_A:
2005 unmap_io_bitmap(v, 0);
2006 break;
2007 case IO_BITMAP_B:
2008 unmap_io_bitmap(v, 1);
2009 break;
2010 case MSR_BITMAP:
2011 unmap_msr_bitmap(v);
2012 break;
2013 }
2014
2015 vmsucceed(regs);
2016
2017 return X86EMUL_OKAY;
2018 }
2019
nvmx_handle_invept(struct cpu_user_regs * regs)2020 static int nvmx_handle_invept(struct cpu_user_regs *regs)
2021 {
2022 struct vmx_inst_decoded decode;
2023 unsigned long eptp;
2024 int ret;
2025
2026 if ( (ret = decode_vmx_inst(regs, &decode, &eptp)) != X86EMUL_OKAY )
2027 return ret;
2028
2029 switch ( reg_read(regs, decode.reg2) )
2030 {
2031 case INVEPT_SINGLE_CONTEXT:
2032 {
2033 np2m_flush_base(current, eptp);
2034 break;
2035 }
2036 case INVEPT_ALL_CONTEXT:
2037 p2m_flush_nestedp2m(current->domain);
2038 __invept(INVEPT_ALL_CONTEXT, 0);
2039 break;
2040 default:
2041 vmfail(regs, VMX_INSN_INVEPT_INVVPID_INVALID_OP);
2042 return X86EMUL_OKAY;
2043 }
2044 vmsucceed(regs);
2045 return X86EMUL_OKAY;
2046 }
2047
nvmx_handle_invvpid(struct cpu_user_regs * regs)2048 static int nvmx_handle_invvpid(struct cpu_user_regs *regs)
2049 {
2050 struct vmx_inst_decoded decode;
2051 unsigned long vpid;
2052 int ret;
2053
2054 if ( (ret = decode_vmx_inst(regs, &decode, &vpid)) != X86EMUL_OKAY )
2055 return ret;
2056
2057 switch ( reg_read(regs, decode.reg2) )
2058 {
2059 /* Just invalidate all tlb entries for all types! */
2060 case INVVPID_INDIVIDUAL_ADDR:
2061 case INVVPID_SINGLE_CONTEXT:
2062 case INVVPID_ALL_CONTEXT:
2063 hvm_asid_flush_vcpu_asid(&vcpu_nestedhvm(current).nv_n2asid);
2064 break;
2065 default:
2066 vmfail(regs, VMX_INSN_INVEPT_INVVPID_INVALID_OP);
2067 return X86EMUL_OKAY;
2068 }
2069
2070 vmsucceed(regs);
2071 return X86EMUL_OKAY;
2072 }
2073
nvmx_handle_vmx_insn(struct cpu_user_regs * regs,unsigned int exit_reason)2074 int nvmx_handle_vmx_insn(struct cpu_user_regs *regs, unsigned int exit_reason)
2075 {
2076 struct vcpu *curr = current;
2077 int ret;
2078
2079 if ( !(curr->arch.hvm.guest_cr[4] & X86_CR4_VMXE) ||
2080 !nestedhvm_enabled(curr->domain) ||
2081 (vmx_guest_x86_mode(curr) < (hvm_long_mode_active(curr) ? 8 : 2)) ||
2082 (exit_reason != EXIT_REASON_VMXON && !nvmx_vcpu_in_vmx(curr)) )
2083 {
2084 hvm_inject_hw_exception(TRAP_invalid_op, X86_EVENT_NO_EC);
2085 return X86EMUL_EXCEPTION;
2086 }
2087
2088 if ( vmx_get_cpl() > 0 )
2089 {
2090 hvm_inject_hw_exception(TRAP_gp_fault, 0);
2091 return X86EMUL_EXCEPTION;
2092 }
2093
2094 if ( nestedhvm_vcpu_in_guestmode(curr) )
2095 {
2096 /* Should have been handled by nvmx_n2_vmexit_handler()... */
2097 ASSERT_UNREACHABLE();
2098 domain_crash(curr->domain);
2099 return X86EMUL_UNHANDLEABLE;
2100 }
2101
2102 switch ( exit_reason )
2103 {
2104 case EXIT_REASON_VMXOFF:
2105 ret = nvmx_handle_vmxoff(regs);
2106 break;
2107
2108 case EXIT_REASON_VMXON:
2109 ret = nvmx_handle_vmxon(regs);
2110 break;
2111
2112 case EXIT_REASON_VMCLEAR:
2113 ret = nvmx_handle_vmclear(regs);
2114 break;
2115
2116 case EXIT_REASON_VMPTRLD:
2117 ret = nvmx_handle_vmptrld(regs);
2118 break;
2119
2120 case EXIT_REASON_VMPTRST:
2121 ret = nvmx_handle_vmptrst(regs);
2122 break;
2123
2124 case EXIT_REASON_VMREAD:
2125 ret = nvmx_handle_vmread(regs);
2126 break;
2127
2128 case EXIT_REASON_VMWRITE:
2129 ret = nvmx_handle_vmwrite(regs);
2130 break;
2131
2132 case EXIT_REASON_VMLAUNCH:
2133 ret = nvmx_handle_vmlaunch(regs);
2134 break;
2135
2136 case EXIT_REASON_VMRESUME:
2137 ret = nvmx_handle_vmresume(regs);
2138 break;
2139
2140 case EXIT_REASON_INVEPT:
2141 ret = nvmx_handle_invept(regs);
2142 break;
2143
2144 case EXIT_REASON_INVVPID:
2145 ret = nvmx_handle_invvpid(regs);
2146 break;
2147
2148 default:
2149 ASSERT_UNREACHABLE();
2150 domain_crash(curr->domain);
2151 ret = X86EMUL_UNHANDLEABLE;
2152 break;
2153 }
2154
2155 return ret;
2156 }
2157
2158 #define __emul_value(enable1, default1) \
2159 ((enable1 | default1) << 32 | (default1))
2160
2161 #define gen_vmx_msr(enable1, default1, host_value) \
2162 (((__emul_value(enable1, default1) & host_value) & (~0ul << 32)) | \
2163 ((uint32_t)(__emul_value(enable1, default1) | host_value)))
2164
2165 /*
2166 * Capability reporting
2167 */
nvmx_msr_read_intercept(unsigned int msr,u64 * msr_content)2168 int nvmx_msr_read_intercept(unsigned int msr, u64 *msr_content)
2169 {
2170 struct vcpu *v = current;
2171 struct domain *d = v->domain;
2172 u64 data = 0, host_data = 0;
2173 int r = 1;
2174
2175 /* VMX capablity MSRs are available only when guest supports VMX. */
2176 if ( !nestedhvm_enabled(d) || !d->arch.cpuid->basic.vmx )
2177 return 0;
2178
2179 /*
2180 * These MSRs are only available when flags in other MSRs are set.
2181 * These prerequisites are listed in the Intel 64 and IA-32
2182 * Architectures Software Developer’s Manual, Vol 3, Appendix A.
2183 */
2184 switch ( msr )
2185 {
2186 case MSR_IA32_VMX_PROCBASED_CTLS2:
2187 if ( !cpu_has_vmx_secondary_exec_control )
2188 return 0;
2189 break;
2190
2191 case MSR_IA32_VMX_EPT_VPID_CAP:
2192 if ( !(cpu_has_vmx_ept || cpu_has_vmx_vpid) )
2193 return 0;
2194 break;
2195
2196 case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
2197 case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
2198 case MSR_IA32_VMX_TRUE_EXIT_CTLS:
2199 case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
2200 if ( !(vmx_basic_msr & VMX_BASIC_DEFAULT1_ZERO) )
2201 return 0;
2202 break;
2203
2204 case MSR_IA32_VMX_VMFUNC:
2205 if ( !cpu_has_vmx_vmfunc )
2206 return 0;
2207 break;
2208 }
2209
2210 rdmsrl(msr, host_data);
2211
2212 /*
2213 * Remove unsupport features from n1 guest capability MSR
2214 */
2215 switch (msr) {
2216 case MSR_IA32_VMX_BASIC:
2217 {
2218 const struct vmcs_struct *vmcs =
2219 map_domain_page(_mfn(PFN_DOWN(v->arch.hvm.vmx.vmcs_pa)));
2220
2221 data = (host_data & (~0ul << 32)) |
2222 (vmcs->vmcs_revision_id & 0x7fffffff);
2223 unmap_domain_page(vmcs);
2224
2225 if ( !cpu_has_vmx_vmcs_shadowing )
2226 {
2227 /* Report vmcs_region_size as 4096 */
2228 data &= ~VMX_BASIC_VMCS_SIZE_MASK;
2229 data |= 1ULL << 44;
2230 }
2231
2232 break;
2233 }
2234 case MSR_IA32_VMX_PINBASED_CTLS:
2235 case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
2236 /* 1-settings */
2237 data = PIN_BASED_EXT_INTR_MASK |
2238 PIN_BASED_NMI_EXITING |
2239 PIN_BASED_PREEMPT_TIMER;
2240 data = gen_vmx_msr(data, VMX_PINBASED_CTLS_DEFAULT1, host_data);
2241 break;
2242 case MSR_IA32_VMX_PROCBASED_CTLS:
2243 case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
2244 {
2245 u32 default1_bits = VMX_PROCBASED_CTLS_DEFAULT1;
2246 /* 1-settings */
2247 data = CPU_BASED_HLT_EXITING |
2248 CPU_BASED_VIRTUAL_INTR_PENDING |
2249 CPU_BASED_CR8_LOAD_EXITING |
2250 CPU_BASED_CR8_STORE_EXITING |
2251 CPU_BASED_INVLPG_EXITING |
2252 CPU_BASED_CR3_LOAD_EXITING |
2253 CPU_BASED_CR3_STORE_EXITING |
2254 CPU_BASED_MONITOR_EXITING |
2255 CPU_BASED_MWAIT_EXITING |
2256 CPU_BASED_MOV_DR_EXITING |
2257 CPU_BASED_ACTIVATE_IO_BITMAP |
2258 CPU_BASED_USE_TSC_OFFSETING |
2259 CPU_BASED_UNCOND_IO_EXITING |
2260 CPU_BASED_RDTSC_EXITING |
2261 CPU_BASED_MONITOR_TRAP_FLAG |
2262 CPU_BASED_VIRTUAL_NMI_PENDING |
2263 CPU_BASED_ACTIVATE_MSR_BITMAP |
2264 CPU_BASED_PAUSE_EXITING |
2265 CPU_BASED_RDPMC_EXITING |
2266 CPU_BASED_TPR_SHADOW |
2267 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
2268
2269 if ( msr == MSR_IA32_VMX_TRUE_PROCBASED_CTLS )
2270 default1_bits &= ~(CPU_BASED_CR3_LOAD_EXITING |
2271 CPU_BASED_CR3_STORE_EXITING |
2272 CPU_BASED_INVLPG_EXITING);
2273
2274 data = gen_vmx_msr(data, default1_bits, host_data);
2275 break;
2276 }
2277 case MSR_IA32_VMX_PROCBASED_CTLS2:
2278 /* 1-settings */
2279 data = SECONDARY_EXEC_DESCRIPTOR_TABLE_EXITING |
2280 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
2281 SECONDARY_EXEC_ENABLE_VPID |
2282 SECONDARY_EXEC_UNRESTRICTED_GUEST |
2283 SECONDARY_EXEC_ENABLE_EPT;
2284 data = gen_vmx_msr(data, 0, host_data);
2285 break;
2286 case MSR_IA32_VMX_EXIT_CTLS:
2287 case MSR_IA32_VMX_TRUE_EXIT_CTLS:
2288 /* 1-settings */
2289 data = VM_EXIT_ACK_INTR_ON_EXIT |
2290 VM_EXIT_IA32E_MODE |
2291 VM_EXIT_SAVE_PREEMPT_TIMER |
2292 VM_EXIT_SAVE_GUEST_PAT |
2293 VM_EXIT_LOAD_HOST_PAT |
2294 VM_EXIT_SAVE_GUEST_EFER |
2295 VM_EXIT_LOAD_HOST_EFER |
2296 VM_EXIT_LOAD_PERF_GLOBAL_CTRL;
2297 data = gen_vmx_msr(data, VMX_EXIT_CTLS_DEFAULT1, host_data);
2298 break;
2299 case MSR_IA32_VMX_ENTRY_CTLS:
2300 case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
2301 /* 1-settings */
2302 data = VM_ENTRY_LOAD_GUEST_PAT |
2303 VM_ENTRY_LOAD_GUEST_EFER |
2304 VM_ENTRY_LOAD_PERF_GLOBAL_CTRL |
2305 VM_ENTRY_IA32E_MODE;
2306 data = gen_vmx_msr(data, VMX_ENTRY_CTLS_DEFAULT1, host_data);
2307 break;
2308
2309 case MSR_IA32_VMX_VMCS_ENUM:
2310 /* The max index of VVMCS encoding is 0x1f. */
2311 data = 0x1f << 1;
2312 break;
2313 case MSR_IA32_VMX_CR0_FIXED0:
2314 /* PG, PE bits must be 1 in VMX operation */
2315 data = X86_CR0_PE | X86_CR0_PG;
2316 break;
2317 case MSR_IA32_VMX_CR0_FIXED1:
2318 /* allow 0-settings for all bits */
2319 data = 0xffffffff;
2320 break;
2321 case MSR_IA32_VMX_CR4_FIXED0:
2322 /* VMXE bit must be 1 in VMX operation */
2323 data = X86_CR4_VMXE;
2324 break;
2325 case MSR_IA32_VMX_CR4_FIXED1:
2326 data = hvm_cr4_guest_valid_bits(d, false);
2327 break;
2328 case MSR_IA32_VMX_MISC:
2329 /* Do not support CR3-target feature now */
2330 data = host_data & ~VMX_MISC_CR3_TARGET;
2331 break;
2332 case MSR_IA32_VMX_EPT_VPID_CAP:
2333 data = nept_get_ept_vpid_cap();
2334 break;
2335 default:
2336 r = 0;
2337 break;
2338 }
2339
2340 *msr_content = data;
2341 return r;
2342 }
2343
2344 /* This function uses L2_gpa to walk the P2M page table in L1. If the
2345 * walk is successful, the translated value is returned in
2346 * L1_gpa. The result value tells what to do next.
2347 */
2348 int
nvmx_hap_walk_L1_p2m(struct vcpu * v,paddr_t L2_gpa,paddr_t * L1_gpa,unsigned int * page_order,uint8_t * p2m_acc,bool_t access_r,bool_t access_w,bool_t access_x)2349 nvmx_hap_walk_L1_p2m(struct vcpu *v, paddr_t L2_gpa, paddr_t *L1_gpa,
2350 unsigned int *page_order, uint8_t *p2m_acc,
2351 bool_t access_r, bool_t access_w, bool_t access_x)
2352 {
2353 int rc;
2354 unsigned long gfn;
2355 uint64_t exit_qual;
2356 uint32_t exit_reason = EXIT_REASON_EPT_VIOLATION;
2357 uint32_t rwx_rights = (access_x << 2) | (access_w << 1) | access_r;
2358 struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
2359
2360 vmx_vmcs_enter(v);
2361
2362 __vmread(EXIT_QUALIFICATION, &exit_qual);
2363 rc = nept_translate_l2ga(v, L2_gpa, page_order, rwx_rights, &gfn, p2m_acc,
2364 &exit_qual, &exit_reason);
2365 switch ( rc )
2366 {
2367 case EPT_TRANSLATE_SUCCEED:
2368 *L1_gpa = (gfn << PAGE_SHIFT) + (L2_gpa & ~PAGE_MASK);
2369 rc = NESTEDHVM_PAGEFAULT_DONE;
2370 break;
2371 case EPT_TRANSLATE_VIOLATION:
2372 case EPT_TRANSLATE_MISCONFIG:
2373 rc = NESTEDHVM_PAGEFAULT_INJECT;
2374 nvmx->ept.exit_reason = exit_reason;
2375 nvmx->ept.exit_qual = exit_qual;
2376 break;
2377 case EPT_TRANSLATE_RETRY:
2378 rc = NESTEDHVM_PAGEFAULT_RETRY;
2379 break;
2380 default:
2381 gdprintk(XENLOG_ERR, "GUEST EPT translation error!:%d\n", rc);
2382 BUG();
2383 break;
2384 }
2385
2386 vmx_vmcs_exit(v);
2387
2388 return rc;
2389 }
2390
nvmx_idtv_handling(void)2391 void nvmx_idtv_handling(void)
2392 {
2393 struct vcpu *v = current;
2394 struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
2395 struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
2396 unsigned long idtv_info, reason;
2397
2398 __vmread(IDT_VECTORING_INFO, &idtv_info);
2399 if ( likely(!(idtv_info & INTR_INFO_VALID_MASK)) )
2400 return;
2401
2402 /*
2403 * If L0 can solve the fault that causes idt vectoring, it should
2404 * be reinjected, otherwise, pass to L1.
2405 */
2406 __vmread(VM_EXIT_REASON, &reason);
2407 if ( reason != EXIT_REASON_EPT_VIOLATION ?
2408 !(nvmx->intr.intr_info & INTR_INFO_VALID_MASK) :
2409 !nvcpu->nv_vmexit_pending )
2410 {
2411 __vmwrite(VM_ENTRY_INTR_INFO, idtv_info & ~INTR_INFO_RESVD_BITS_MASK);
2412 if ( idtv_info & INTR_INFO_DELIVER_CODE_MASK )
2413 {
2414 __vmread(IDT_VECTORING_ERROR_CODE, &reason);
2415 __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, reason);
2416 }
2417 /*
2418 * SDM 23.2.4, if L1 tries to inject a software interrupt
2419 * and the delivery fails, VM_EXIT_INSTRUCTION_LEN receives
2420 * the value of previous VM_ENTRY_INSTRUCTION_LEN.
2421 *
2422 * This means EXIT_INSTRUCTION_LEN is always valid here, for
2423 * software interrupts both injected by L1, and generated in L2.
2424 */
2425 __vmread(VM_EXIT_INSTRUCTION_LEN, &reason);
2426 __vmwrite(VM_ENTRY_INSTRUCTION_LEN, reason);
2427 }
2428 }
2429
2430 /*
2431 * L2 VMExit handling
2432 * return 1: Done or skip the normal layer 0 hypervisor process.
2433 * Typically it requires layer 1 hypervisor processing
2434 * or it may be already processed here.
2435 * 0: Require the normal layer 0 process.
2436 */
nvmx_n2_vmexit_handler(struct cpu_user_regs * regs,unsigned int exit_reason)2437 int nvmx_n2_vmexit_handler(struct cpu_user_regs *regs,
2438 unsigned int exit_reason)
2439 {
2440 struct vcpu *v = current;
2441 struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
2442 struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
2443 u32 ctrl;
2444
2445 nvcpu->nv_vmexit_pending = 0;
2446 nvmx->intr.intr_info = 0;
2447 nvmx->intr.error_code = 0;
2448
2449 switch (exit_reason) {
2450 case EXIT_REASON_EXCEPTION_NMI:
2451 {
2452 unsigned long intr_info;
2453 u32 valid_mask = MASK_INSR(X86_EVENTTYPE_HW_EXCEPTION,
2454 INTR_INFO_INTR_TYPE_MASK) |
2455 INTR_INFO_VALID_MASK;
2456 u64 exec_bitmap;
2457 int vector;
2458
2459 __vmread(VM_EXIT_INTR_INFO, &intr_info);
2460 vector = intr_info & INTR_INFO_VECTOR_MASK;
2461 /*
2462 * decided by L0 and L1 exception bitmap, if the vetor is set by
2463 * both, L0 has priority on #PF and #NM, L1 has priority on others
2464 */
2465 if ( vector == TRAP_page_fault )
2466 {
2467 if ( paging_mode_hap(v->domain) )
2468 nvcpu->nv_vmexit_pending = 1;
2469 }
2470 else if ( vector == TRAP_no_device )
2471 {
2472 if ( v->fpu_dirtied )
2473 nvcpu->nv_vmexit_pending = 1;
2474 }
2475 else if ( (intr_info & valid_mask) == valid_mask )
2476 {
2477 exec_bitmap = get_vvmcs(v, EXCEPTION_BITMAP);
2478
2479 if ( exec_bitmap & (1 << vector) )
2480 nvcpu->nv_vmexit_pending = 1;
2481 }
2482 break;
2483 }
2484 case EXIT_REASON_WBINVD:
2485 case EXIT_REASON_EPT_VIOLATION:
2486 case EXIT_REASON_EPT_MISCONFIG:
2487 case EXIT_REASON_EXTERNAL_INTERRUPT:
2488 /* pass to L0 handler */
2489 break;
2490 case VMX_EXIT_REASONS_FAILED_VMENTRY:
2491 case EXIT_REASON_TRIPLE_FAULT:
2492 case EXIT_REASON_TASK_SWITCH:
2493 case EXIT_REASON_CPUID:
2494 case EXIT_REASON_GETSEC:
2495 case EXIT_REASON_INVD:
2496 case EXIT_REASON_VMCALL:
2497 case EXIT_REASON_VMCLEAR:
2498 case EXIT_REASON_VMLAUNCH:
2499 case EXIT_REASON_VMPTRLD:
2500 case EXIT_REASON_VMPTRST:
2501 case EXIT_REASON_VMREAD:
2502 case EXIT_REASON_VMRESUME:
2503 case EXIT_REASON_VMWRITE:
2504 case EXIT_REASON_VMXOFF:
2505 case EXIT_REASON_VMXON:
2506 case EXIT_REASON_INVEPT:
2507 case EXIT_REASON_XSETBV:
2508 case EXIT_REASON_INVVPID:
2509 /* inject to L1 */
2510 nvcpu->nv_vmexit_pending = 1;
2511 break;
2512
2513 case EXIT_REASON_MSR_READ:
2514 case EXIT_REASON_MSR_WRITE:
2515 ctrl = __n2_exec_control(v);
2516
2517 /* Without ACTIVATE_MSR_BITMAP, all MSRs are intercepted. */
2518 if ( !(ctrl & CPU_BASED_ACTIVATE_MSR_BITMAP) )
2519 nvcpu->nv_vmexit_pending = 1;
2520 else if ( !nvmx->msrbitmap )
2521 /* ACTIVATE_MSR_BITMAP set, but L2 bitmap not mapped??? */
2522 domain_crash(v->domain);
2523 else
2524 nvcpu->nv_vmexit_pending =
2525 vmx_msr_is_intercepted(nvmx->msrbitmap, regs->ecx,
2526 exit_reason == EXIT_REASON_MSR_WRITE);
2527 break;
2528
2529 case EXIT_REASON_IO_INSTRUCTION:
2530 ctrl = __n2_exec_control(v);
2531 if ( ctrl & CPU_BASED_ACTIVATE_IO_BITMAP )
2532 {
2533 unsigned long qual;
2534 u16 port, size;
2535
2536 __vmread(EXIT_QUALIFICATION, &qual);
2537 port = qual >> 16;
2538 size = (qual & 7) + 1;
2539 do {
2540 const u8 *bitmap = nvmx->iobitmap[port >> 15];
2541
2542 if ( bitmap[(port & 0x7fff) >> 3] & (1 << (port & 7)) )
2543 nvcpu->nv_vmexit_pending = 1;
2544 if ( !--size )
2545 break;
2546 if ( !++port )
2547 nvcpu->nv_vmexit_pending = 1;
2548 } while ( !nvcpu->nv_vmexit_pending );
2549 if ( !nvcpu->nv_vmexit_pending )
2550 printk(XENLOG_G_WARNING "L0 PIO %04x\n", port);
2551 }
2552 else if ( ctrl & CPU_BASED_UNCOND_IO_EXITING )
2553 nvcpu->nv_vmexit_pending = 1;
2554 break;
2555
2556 case EXIT_REASON_PENDING_VIRT_INTR:
2557 ctrl = __n2_exec_control(v);
2558 if ( ctrl & CPU_BASED_VIRTUAL_INTR_PENDING )
2559 nvcpu->nv_vmexit_pending = 1;
2560 break;
2561 case EXIT_REASON_PENDING_VIRT_NMI:
2562 ctrl = __n2_exec_control(v);
2563 if ( ctrl & CPU_BASED_VIRTUAL_NMI_PENDING )
2564 nvcpu->nv_vmexit_pending = 1;
2565 break;
2566 case EXIT_REASON_MONITOR_TRAP_FLAG:
2567 ctrl = __n2_exec_control(v);
2568 if ( ctrl & CPU_BASED_MONITOR_TRAP_FLAG)
2569 nvcpu->nv_vmexit_pending = 1;
2570 break;
2571 case EXIT_REASON_ACCESS_GDTR_OR_IDTR:
2572 case EXIT_REASON_ACCESS_LDTR_OR_TR:
2573 ctrl = __n2_secondary_exec_control(v);
2574 if ( ctrl & SECONDARY_EXEC_DESCRIPTOR_TABLE_EXITING )
2575 nvcpu->nv_vmexit_pending = 1;
2576 break;
2577 case EXIT_REASON_VMX_PREEMPTION_TIMER_EXPIRED:
2578 ctrl = __n2_pin_exec_control(v);
2579 if ( ctrl & PIN_BASED_PREEMPT_TIMER )
2580 nvcpu->nv_vmexit_pending = 1;
2581 break;
2582 /* L1 has priority handling several other types of exits */
2583 case EXIT_REASON_HLT:
2584 ctrl = __n2_exec_control(v);
2585 if ( ctrl & CPU_BASED_HLT_EXITING )
2586 nvcpu->nv_vmexit_pending = 1;
2587 break;
2588 case EXIT_REASON_RDTSC:
2589 case EXIT_REASON_RDTSCP:
2590 ctrl = __n2_exec_control(v);
2591 if ( ctrl & CPU_BASED_RDTSC_EXITING )
2592 nvcpu->nv_vmexit_pending = 1;
2593 else
2594 {
2595 /*
2596 * special handler is needed if L1 doesn't intercept rdtsc,
2597 * avoiding changing guest_tsc and messing up timekeeping in L1
2598 */
2599 msr_split(regs, hvm_get_guest_tsc(v) + get_vvmcs(v, TSC_OFFSET));
2600 if ( exit_reason == EXIT_REASON_RDTSCP )
2601 regs->rcx = v->arch.msrs->tsc_aux;
2602 update_guest_eip();
2603
2604 return 1;
2605 }
2606 break;
2607 case EXIT_REASON_RDPMC:
2608 ctrl = __n2_exec_control(v);
2609 if ( ctrl & CPU_BASED_RDPMC_EXITING )
2610 nvcpu->nv_vmexit_pending = 1;
2611 break;
2612 case EXIT_REASON_MWAIT_INSTRUCTION:
2613 ctrl = __n2_exec_control(v);
2614 if ( ctrl & CPU_BASED_MWAIT_EXITING )
2615 nvcpu->nv_vmexit_pending = 1;
2616 break;
2617 case EXIT_REASON_PAUSE_INSTRUCTION:
2618 ctrl = __n2_exec_control(v);
2619 if ( ctrl & CPU_BASED_PAUSE_EXITING )
2620 nvcpu->nv_vmexit_pending = 1;
2621 break;
2622 case EXIT_REASON_MONITOR_INSTRUCTION:
2623 ctrl = __n2_exec_control(v);
2624 if ( ctrl & CPU_BASED_MONITOR_EXITING )
2625 nvcpu->nv_vmexit_pending = 1;
2626 break;
2627 case EXIT_REASON_DR_ACCESS:
2628 ctrl = __n2_exec_control(v);
2629 if ( (ctrl & CPU_BASED_MOV_DR_EXITING) &&
2630 v->arch.hvm.flag_dr_dirty )
2631 nvcpu->nv_vmexit_pending = 1;
2632 break;
2633 case EXIT_REASON_INVLPG:
2634 ctrl = __n2_exec_control(v);
2635 if ( ctrl & CPU_BASED_INVLPG_EXITING )
2636 nvcpu->nv_vmexit_pending = 1;
2637 break;
2638 case EXIT_REASON_CR_ACCESS:
2639 {
2640 cr_access_qual_t qual;
2641 u32 mask = 0;
2642
2643 __vmread(EXIT_QUALIFICATION, &qual.raw);
2644 /* also according to guest exec_control */
2645 ctrl = __n2_exec_control(v);
2646
2647 /* CLTS/LMSW strictly act on CR0 */
2648 if ( qual.access_type >= VMX_CR_ACCESS_TYPE_CLTS )
2649 ASSERT(qual.cr == 0);
2650
2651 if ( qual.cr == 3 )
2652 {
2653 mask = qual.access_type ? CPU_BASED_CR3_STORE_EXITING
2654 : CPU_BASED_CR3_LOAD_EXITING;
2655 if ( ctrl & mask )
2656 nvcpu->nv_vmexit_pending = 1;
2657 }
2658 else if ( qual.cr == 8 )
2659 {
2660 mask = qual.access_type ? CPU_BASED_CR8_STORE_EXITING
2661 : CPU_BASED_CR8_LOAD_EXITING;
2662 if ( ctrl & mask )
2663 nvcpu->nv_vmexit_pending = 1;
2664 }
2665 else /* CR0, CR4, CLTS, LMSW */
2666 {
2667 /*
2668 * While getting the VM exit for CR0/CR4 access, check if L1 VMM owns
2669 * the bit.
2670 * If so, inject the VM exit to L1 VMM.
2671 * Otherwise, L0 will handle it and sync the value to L1 virtual VMCS.
2672 */
2673 unsigned long old_val, val, changed_bits;
2674
2675 switch ( qual.access_type )
2676 {
2677 case VMX_CR_ACCESS_TYPE_MOV_TO_CR:
2678 {
2679 val = *decode_gpr(guest_cpu_user_regs(), qual.gpr);
2680
2681 if ( qual.cr == 0 )
2682 {
2683 u64 cr0_gh_mask = get_vvmcs(v, CR0_GUEST_HOST_MASK);
2684
2685 __vmread(CR0_READ_SHADOW, &old_val);
2686 changed_bits = old_val ^ val;
2687 if ( changed_bits & cr0_gh_mask )
2688 nvcpu->nv_vmexit_pending = 1;
2689 else
2690 {
2691 u64 guest_cr0 = get_vvmcs(v, GUEST_CR0);
2692
2693 set_vvmcs(v, GUEST_CR0,
2694 (guest_cr0 & cr0_gh_mask) | (val & ~cr0_gh_mask));
2695 }
2696 }
2697 else if ( qual.cr == 4 )
2698 {
2699 u64 cr4_gh_mask = get_vvmcs(v, CR4_GUEST_HOST_MASK);
2700
2701 __vmread(CR4_READ_SHADOW, &old_val);
2702 changed_bits = old_val ^ val;
2703 if ( changed_bits & cr4_gh_mask )
2704 nvcpu->nv_vmexit_pending = 1;
2705 else
2706 {
2707 u64 guest_cr4 = get_vvmcs(v, GUEST_CR4);
2708
2709 set_vvmcs(v, GUEST_CR4,
2710 (guest_cr4 & cr4_gh_mask) | (val & ~cr4_gh_mask));
2711 }
2712 }
2713 else
2714 nvcpu->nv_vmexit_pending = 1;
2715 break;
2716 }
2717
2718 case VMX_CR_ACCESS_TYPE_CLTS:
2719 {
2720 u64 cr0_gh_mask = get_vvmcs(v, CR0_GUEST_HOST_MASK);
2721
2722 if ( cr0_gh_mask & X86_CR0_TS )
2723 nvcpu->nv_vmexit_pending = 1;
2724 else
2725 {
2726 u64 guest_cr0 = get_vvmcs(v, GUEST_CR0);
2727
2728 set_vvmcs(v, GUEST_CR0, (guest_cr0 & ~X86_CR0_TS));
2729 }
2730 break;
2731 }
2732
2733 case VMX_CR_ACCESS_TYPE_LMSW:
2734 {
2735 u64 cr0_gh_mask = get_vvmcs(v, CR0_GUEST_HOST_MASK);
2736
2737 __vmread(CR0_READ_SHADOW, &old_val);
2738 old_val &= X86_CR0_PE|X86_CR0_MP|X86_CR0_EM|X86_CR0_TS;
2739 val = qual.lmsw_data &
2740 (X86_CR0_PE|X86_CR0_MP|X86_CR0_EM|X86_CR0_TS);
2741 changed_bits = old_val ^ val;
2742 if ( changed_bits & cr0_gh_mask )
2743 nvcpu->nv_vmexit_pending = 1;
2744 else
2745 {
2746 u64 guest_cr0 = get_vvmcs(v, GUEST_CR0);
2747
2748 set_vvmcs(v, GUEST_CR0, (guest_cr0 & cr0_gh_mask) | (val & ~cr0_gh_mask));
2749 }
2750 break;
2751 }
2752
2753 default:
2754 ASSERT_UNREACHABLE();
2755 break;
2756 }
2757 }
2758 break;
2759 }
2760 case EXIT_REASON_APIC_ACCESS:
2761 ctrl = __n2_secondary_exec_control(v);
2762 if ( ctrl & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES )
2763 nvcpu->nv_vmexit_pending = 1;
2764 break;
2765 case EXIT_REASON_TPR_BELOW_THRESHOLD:
2766 ctrl = __n2_exec_control(v);
2767 if ( ctrl & CPU_BASED_TPR_SHADOW )
2768 nvcpu->nv_vmexit_pending = 1;
2769 break;
2770 default:
2771 gprintk(XENLOG_ERR, "Unhandled nested vmexit: reason %u\n",
2772 exit_reason);
2773 domain_crash(v->domain);
2774 }
2775
2776 return ( nvcpu->nv_vmexit_pending == 1 );
2777 }
2778
nvmx_set_cr_read_shadow(struct vcpu * v,unsigned int cr)2779 void nvmx_set_cr_read_shadow(struct vcpu *v, unsigned int cr)
2780 {
2781 unsigned long cr_field, read_shadow_field, mask_field;
2782
2783 switch ( cr )
2784 {
2785 case 0:
2786 cr_field = GUEST_CR0;
2787 read_shadow_field = CR0_READ_SHADOW;
2788 mask_field = CR0_GUEST_HOST_MASK;
2789 break;
2790 case 4:
2791 cr_field = GUEST_CR4;
2792 read_shadow_field = CR4_READ_SHADOW;
2793 mask_field = CR4_GUEST_HOST_MASK;
2794 break;
2795 default:
2796 gdprintk(XENLOG_WARNING, "Set read shadow for CR%d.\n", cr);
2797 return;
2798 }
2799
2800 if ( !nestedhvm_vmswitch_in_progress(v) )
2801 {
2802 unsigned long virtual_cr_mask =
2803 get_vvmcs(v, mask_field);
2804
2805 /*
2806 * We get here when L2 changed cr in a way that did not change
2807 * any of L1's shadowed bits (see nvmx_n2_vmexit_handler),
2808 * but did change L0 shadowed bits. So we first calculate the
2809 * effective cr value that L1 would like to write into the
2810 * hardware. It consists of the L2-owned bits from the new
2811 * value combined with the L1-owned bits from L1's guest cr.
2812 */
2813 v->arch.hvm.guest_cr[cr] &= ~virtual_cr_mask;
2814 v->arch.hvm.guest_cr[cr] |= virtual_cr_mask &
2815 get_vvmcs(v, cr_field);
2816 }
2817
2818 /* nvcpu.guest_cr is what L2 write to cr actually. */
2819 __vmwrite(read_shadow_field, v->arch.hvm.nvcpu.guest_cr[cr]);
2820 }
2821
2822 /*
2823 * Local variables:
2824 * mode: C
2825 * c-file-style: "BSD"
2826 * c-basic-offset: 4
2827 * tab-width: 4
2828 * indent-tabs-mode: nil
2829 * End:
2830 */
2831