1 /******************************************************************************
2 * arch/x86/mm/hap/hap.c
3 *
4 * hardware assisted paging
5 * Copyright (c) 2007 Advanced Micro Devices (Wei Huang)
6 * Parts of this code are Copyright (c) 2007 by XenSource Inc.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; If not, see <http://www.gnu.org/licenses/>.
20 */
21
22 #include <xen/types.h>
23 #include <xen/mm.h>
24 #include <xen/trace.h>
25 #include <xen/sched.h>
26 #include <xen/perfc.h>
27 #include <xen/irq.h>
28 #include <xen/domain_page.h>
29 #include <xen/guest_access.h>
30 #include <xen/keyhandler.h>
31 #include <asm/event.h>
32 #include <asm/page.h>
33 #include <asm/current.h>
34 #include <asm/flushtlb.h>
35 #include <asm/shared.h>
36 #include <asm/hap.h>
37 #include <asm/paging.h>
38 #include <asm/p2m.h>
39 #include <asm/domain.h>
40 #include <xen/numa.h>
41 #include <asm/hvm/nestedhvm.h>
42
43 #include "private.h"
44
45 /************************************************/
46 /* HAP VRAM TRACKING SUPPORT */
47 /************************************************/
48
49 /*
50 * hap_track_dirty_vram()
51 * Create the domain's dv_dirty_vram struct on demand.
52 * Create a dirty vram range on demand when some [begin_pfn:begin_pfn+nr] is
53 * first encountered.
54 * Collect the guest_dirty bitmask, a bit mask of the dirty vram pages, by
55 * calling paging_log_dirty_range(), which interrogates each vram
56 * page's p2m type looking for pages that have been made writable.
57 */
58
hap_track_dirty_vram(struct domain * d,unsigned long begin_pfn,unsigned long nr,XEN_GUEST_HANDLE (void)guest_dirty_bitmap)59 int hap_track_dirty_vram(struct domain *d,
60 unsigned long begin_pfn,
61 unsigned long nr,
62 XEN_GUEST_HANDLE(void) guest_dirty_bitmap)
63 {
64 long rc = 0;
65 struct sh_dirty_vram *dirty_vram;
66 uint8_t *dirty_bitmap = NULL;
67
68 if ( nr )
69 {
70 int size = (nr + BITS_PER_BYTE - 1) / BITS_PER_BYTE;
71
72 if ( !paging_mode_log_dirty(d) )
73 {
74 rc = paging_log_dirty_enable(d, false);
75 if ( rc )
76 goto out;
77 }
78
79 rc = -ENOMEM;
80 dirty_bitmap = vzalloc(size);
81 if ( !dirty_bitmap )
82 goto out;
83
84 paging_lock(d);
85
86 dirty_vram = d->arch.hvm.dirty_vram;
87 if ( !dirty_vram )
88 {
89 rc = -ENOMEM;
90 if ( (dirty_vram = xzalloc(struct sh_dirty_vram)) == NULL )
91 {
92 paging_unlock(d);
93 goto out;
94 }
95
96 d->arch.hvm.dirty_vram = dirty_vram;
97 }
98
99 if ( begin_pfn != dirty_vram->begin_pfn ||
100 begin_pfn + nr != dirty_vram->end_pfn )
101 {
102 unsigned long ostart = dirty_vram->begin_pfn;
103 unsigned long oend = dirty_vram->end_pfn;
104
105 dirty_vram->begin_pfn = begin_pfn;
106 dirty_vram->end_pfn = begin_pfn + nr;
107
108 paging_unlock(d);
109
110 if ( oend > ostart )
111 p2m_change_type_range(d, ostart, oend,
112 p2m_ram_logdirty, p2m_ram_rw);
113
114 /*
115 * Switch vram to log dirty mode, either by setting l1e entries of
116 * P2M table to be read-only, or via hardware-assisted log-dirty.
117 */
118 p2m_change_type_range(d, begin_pfn, begin_pfn + nr,
119 p2m_ram_rw, p2m_ram_logdirty);
120
121 guest_flush_tlb_mask(d, d->dirty_cpumask);
122
123 memset(dirty_bitmap, 0xff, size); /* consider all pages dirty */
124 }
125 else
126 {
127 paging_unlock(d);
128
129 domain_pause(d);
130
131 /* Flush dirty GFNs potentially cached by hardware. */
132 p2m_flush_hardware_cached_dirty(d);
133
134 /* get the bitmap */
135 paging_log_dirty_range(d, begin_pfn, nr, dirty_bitmap);
136
137 domain_unpause(d);
138 }
139
140 rc = -EFAULT;
141 if ( copy_to_guest(guest_dirty_bitmap, dirty_bitmap, size) == 0 )
142 rc = 0;
143 }
144 else
145 {
146 paging_lock(d);
147
148 dirty_vram = d->arch.hvm.dirty_vram;
149 if ( dirty_vram )
150 {
151 /*
152 * If zero pages specified while tracking dirty vram
153 * then stop tracking
154 */
155 begin_pfn = dirty_vram->begin_pfn;
156 nr = dirty_vram->end_pfn - dirty_vram->begin_pfn;
157 xfree(dirty_vram);
158 d->arch.hvm.dirty_vram = NULL;
159 }
160
161 paging_unlock(d);
162 if ( nr )
163 p2m_change_type_range(d, begin_pfn, begin_pfn + nr,
164 p2m_ram_logdirty, p2m_ram_rw);
165 }
166 out:
167 vfree(dirty_bitmap);
168
169 return rc;
170 }
171
172 /************************************************/
173 /* HAP LOG DIRTY SUPPORT */
174 /************************************************/
175
176 /*
177 * hap code to call when log_dirty is enable. return 0 if no problem found.
178 *
179 * NB: Domain that having device assigned should not set log_global. Because
180 * there is no way to track the memory updating from device.
181 */
hap_enable_log_dirty(struct domain * d,bool_t log_global)182 static int hap_enable_log_dirty(struct domain *d, bool_t log_global)
183 {
184 struct p2m_domain *p2m = p2m_get_hostp2m(d);
185
186 /*
187 * Refuse to turn on global log-dirty mode if
188 * there are outstanding p2m_ioreq_server pages.
189 */
190 if ( log_global && read_atomic(&p2m->ioreq.entry_count) )
191 return -EBUSY;
192
193 /* turn on PG_log_dirty bit in paging mode */
194 paging_lock(d);
195 d->arch.paging.mode |= PG_log_dirty;
196 paging_unlock(d);
197
198 /* Enable hardware-assisted log-dirty if it is supported. */
199 p2m_enable_hardware_log_dirty(d);
200
201 if ( log_global )
202 {
203 /*
204 * Switch to log dirty mode, either by setting l1e entries of P2M table
205 * to be read-only, or via hardware-assisted log-dirty.
206 */
207 p2m_change_entry_type_global(d, p2m_ram_rw, p2m_ram_logdirty);
208 guest_flush_tlb_mask(d, d->dirty_cpumask);
209 }
210 return 0;
211 }
212
hap_disable_log_dirty(struct domain * d)213 static int hap_disable_log_dirty(struct domain *d)
214 {
215 paging_lock(d);
216 d->arch.paging.mode &= ~PG_log_dirty;
217 paging_unlock(d);
218
219 /* Disable hardware-assisted log-dirty if it is supported. */
220 p2m_disable_hardware_log_dirty(d);
221
222 /*
223 * switch to normal mode, either by setting l1e entries of P2M table to
224 * normal mode, or via hardware-assisted log-dirty.
225 */
226 p2m_change_entry_type_global(d, p2m_ram_logdirty, p2m_ram_rw);
227 return 0;
228 }
229
hap_clean_dirty_bitmap(struct domain * d)230 static void hap_clean_dirty_bitmap(struct domain *d)
231 {
232 /*
233 * Switch to log-dirty mode, either by setting l1e entries of P2M table to
234 * be read-only, or via hardware-assisted log-dirty.
235 */
236 p2m_change_entry_type_global(d, p2m_ram_rw, p2m_ram_logdirty);
237 guest_flush_tlb_mask(d, d->dirty_cpumask);
238 }
239
240 /************************************************/
241 /* HAP SUPPORT FUNCTIONS */
242 /************************************************/
hap_alloc(struct domain * d)243 static struct page_info *hap_alloc(struct domain *d)
244 {
245 struct page_info *pg;
246
247 ASSERT(paging_locked_by_me(d));
248
249 pg = page_list_remove_head(&d->arch.paging.hap.freelist);
250 if ( unlikely(!pg) )
251 return NULL;
252
253 d->arch.paging.hap.free_pages--;
254
255 clear_domain_page(page_to_mfn(pg));
256
257 return pg;
258 }
259
hap_free(struct domain * d,mfn_t mfn)260 static void hap_free(struct domain *d, mfn_t mfn)
261 {
262 struct page_info *pg = mfn_to_page(mfn);
263
264 ASSERT(paging_locked_by_me(d));
265
266 d->arch.paging.hap.free_pages++;
267 page_list_add_tail(pg, &d->arch.paging.hap.freelist);
268 }
269
hap_alloc_p2m_page(struct domain * d)270 static struct page_info *hap_alloc_p2m_page(struct domain *d)
271 {
272 struct page_info *pg;
273
274 /* This is called both from the p2m code (which never holds the
275 * paging lock) and the log-dirty code (which always does). */
276 paging_lock_recursive(d);
277 pg = hap_alloc(d);
278
279 if ( likely(pg != NULL) )
280 {
281 d->arch.paging.hap.total_pages--;
282 d->arch.paging.hap.p2m_pages++;
283 ASSERT(!page_get_owner(pg) && !(pg->count_info & PGC_count_mask));
284 }
285 else if ( !d->arch.paging.p2m_alloc_failed )
286 {
287 d->arch.paging.p2m_alloc_failed = 1;
288 dprintk(XENLOG_ERR, "d%i failed to allocate from HAP pool\n",
289 d->domain_id);
290 }
291
292 paging_unlock(d);
293 return pg;
294 }
295
hap_free_p2m_page(struct domain * d,struct page_info * pg)296 static void hap_free_p2m_page(struct domain *d, struct page_info *pg)
297 {
298 struct domain *owner = page_get_owner(pg);
299
300 /* This is called both from the p2m code (which never holds the
301 * paging lock) and the log-dirty code (which always does). */
302 paging_lock_recursive(d);
303
304 /* Should still have no owner and count zero. */
305 if ( owner || (pg->count_info & PGC_count_mask) )
306 {
307 printk(XENLOG_WARNING
308 "d%d: Odd p2m page %"PRI_mfn" d=%d c=%lx t=%"PRtype_info"\n",
309 d->domain_id, mfn_x(page_to_mfn(pg)),
310 owner ? owner->domain_id : DOMID_INVALID,
311 pg->count_info, pg->u.inuse.type_info);
312 WARN();
313 pg->count_info &= ~PGC_count_mask;
314 page_set_owner(pg, NULL);
315 }
316 d->arch.paging.hap.p2m_pages--;
317 d->arch.paging.hap.total_pages++;
318 hap_free(d, page_to_mfn(pg));
319
320 paging_unlock(d);
321 }
322
323 /* Return the size of the pool, rounded up to the nearest MB */
hap_get_allocation(struct domain * d)324 unsigned int hap_get_allocation(struct domain *d)
325 {
326 unsigned int pg = d->arch.paging.hap.total_pages
327 + d->arch.paging.hap.p2m_pages;
328
329 return ((pg >> (20 - PAGE_SHIFT))
330 + ((pg & ((1 << (20 - PAGE_SHIFT)) - 1)) ? 1 : 0));
331 }
332
333 /* Set the pool of pages to the required number of pages.
334 * Returns 0 for success, non-zero for failure. */
hap_set_allocation(struct domain * d,unsigned int pages,bool * preempted)335 int hap_set_allocation(struct domain *d, unsigned int pages, bool *preempted)
336 {
337 struct page_info *pg;
338
339 ASSERT(paging_locked_by_me(d));
340
341 if ( pages < d->arch.paging.hap.p2m_pages )
342 pages = 0;
343 else
344 pages -= d->arch.paging.hap.p2m_pages;
345
346 for ( ; ; )
347 {
348 if ( d->arch.paging.hap.total_pages < pages )
349 {
350 /* Need to allocate more memory from domheap */
351 pg = alloc_domheap_page(d, MEMF_no_owner);
352 if ( pg == NULL )
353 {
354 HAP_PRINTK("failed to allocate hap pages.\n");
355 return -ENOMEM;
356 }
357 d->arch.paging.hap.free_pages++;
358 d->arch.paging.hap.total_pages++;
359 page_list_add_tail(pg, &d->arch.paging.hap.freelist);
360 }
361 else if ( d->arch.paging.hap.total_pages > pages )
362 {
363 /* Need to return memory to domheap */
364 if ( page_list_empty(&d->arch.paging.hap.freelist) )
365 {
366 HAP_PRINTK("failed to free enough hap pages.\n");
367 return -ENOMEM;
368 }
369 pg = page_list_remove_head(&d->arch.paging.hap.freelist);
370 ASSERT(pg);
371 d->arch.paging.hap.free_pages--;
372 d->arch.paging.hap.total_pages--;
373 free_domheap_page(pg);
374 }
375 else
376 break;
377
378 /* Check to see if we need to yield and try again */
379 if ( preempted && general_preempt_check() )
380 {
381 *preempted = true;
382 return 0;
383 }
384 }
385
386 return 0;
387 }
388
hap_make_monitor_table(struct vcpu * v)389 static mfn_t hap_make_monitor_table(struct vcpu *v)
390 {
391 struct domain *d = v->domain;
392 struct page_info *pg;
393 l4_pgentry_t *l4e;
394 mfn_t m4mfn;
395
396 ASSERT(pagetable_get_pfn(v->arch.hvm.monitor_table) == 0);
397
398 if ( (pg = hap_alloc(d)) == NULL )
399 goto oom;
400
401 m4mfn = page_to_mfn(pg);
402 l4e = map_domain_page(m4mfn);
403
404 init_xen_l4_slots(l4e, m4mfn, d, INVALID_MFN, false);
405 unmap_domain_page(l4e);
406
407 return m4mfn;
408
409 oom:
410 printk(XENLOG_G_ERR "out of memory building monitor pagetable\n");
411 domain_crash(d);
412 return INVALID_MFN;
413 }
414
hap_destroy_monitor_table(struct vcpu * v,mfn_t mmfn)415 static void hap_destroy_monitor_table(struct vcpu* v, mfn_t mmfn)
416 {
417 struct domain *d = v->domain;
418
419 /* Put the memory back in the pool */
420 hap_free(d, mmfn);
421 }
422
423 /************************************************/
424 /* HAP DOMAIN LEVEL FUNCTIONS */
425 /************************************************/
hap_domain_init(struct domain * d)426 void hap_domain_init(struct domain *d)
427 {
428 static const struct log_dirty_ops hap_ops = {
429 .enable = hap_enable_log_dirty,
430 .disable = hap_disable_log_dirty,
431 .clean = hap_clean_dirty_bitmap,
432 };
433
434 INIT_PAGE_LIST_HEAD(&d->arch.paging.hap.freelist);
435
436 /* Use HAP logdirty mechanism. */
437 paging_log_dirty_init(d, &hap_ops);
438 }
439
440 /* return 0 for success, -errno for failure */
hap_enable(struct domain * d,u32 mode)441 int hap_enable(struct domain *d, u32 mode)
442 {
443 unsigned int old_pages;
444 unsigned int i;
445 int rv = 0;
446
447 if ( mode != (PG_external | PG_translate | PG_refcounts) )
448 return -EINVAL;
449
450 /* The function can only be called once per domain. */
451 if ( d->arch.paging.mode != 0 )
452 return -EEXIST;
453
454 domain_pause(d);
455
456 old_pages = d->arch.paging.hap.total_pages;
457 if ( old_pages == 0 )
458 {
459 paging_lock(d);
460 rv = hap_set_allocation(d, 256, NULL);
461 if ( rv != 0 )
462 {
463 hap_set_allocation(d, 0, NULL);
464 paging_unlock(d);
465 goto out;
466 }
467 paging_unlock(d);
468 }
469
470 /* Allow p2m and log-dirty code to borrow our memory */
471 d->arch.paging.alloc_page = hap_alloc_p2m_page;
472 d->arch.paging.free_page = hap_free_p2m_page;
473
474 /* allocate P2M table */
475 rv = p2m_alloc_table(p2m_get_hostp2m(d));
476 if ( rv != 0 )
477 goto out;
478
479 for ( i = 0; i < MAX_NESTEDP2M; i++ )
480 {
481 rv = p2m_alloc_table(d->arch.nested_p2m[i]);
482 if ( rv != 0 )
483 goto out;
484 }
485
486 if ( hvm_altp2m_supported() )
487 {
488 /* Init alternate p2m data */
489 if ( (d->arch.altp2m_eptp = alloc_xenheap_page()) == NULL )
490 {
491 rv = -ENOMEM;
492 goto out;
493 }
494
495 if ( (d->arch.altp2m_visible_eptp = alloc_xenheap_page()) == NULL )
496 {
497 rv = -ENOMEM;
498 goto out;
499 }
500
501 for ( i = 0; i < MAX_EPTP; i++ )
502 {
503 d->arch.altp2m_eptp[i] = mfn_x(INVALID_MFN);
504 d->arch.altp2m_visible_eptp[i] = mfn_x(INVALID_MFN);
505 }
506
507 for ( i = 0; i < MAX_ALTP2M; i++ )
508 {
509 rv = p2m_alloc_table(d->arch.altp2m_p2m[i]);
510 if ( rv != 0 )
511 goto out;
512 }
513
514 d->arch.altp2m_active = 0;
515 }
516
517 /* Now let other users see the new mode */
518 d->arch.paging.mode = mode | PG_HAP_enable;
519
520 out:
521 domain_unpause(d);
522 return rv;
523 }
524
hap_final_teardown(struct domain * d)525 void hap_final_teardown(struct domain *d)
526 {
527 unsigned int i;
528
529 if ( hvm_altp2m_supported() )
530 {
531 d->arch.altp2m_active = 0;
532
533 if ( d->arch.altp2m_eptp )
534 {
535 free_xenheap_page(d->arch.altp2m_eptp);
536 d->arch.altp2m_eptp = NULL;
537 }
538
539 if ( d->arch.altp2m_visible_eptp )
540 {
541 free_xenheap_page(d->arch.altp2m_visible_eptp);
542 d->arch.altp2m_visible_eptp = NULL;
543 }
544
545 for ( i = 0; i < MAX_ALTP2M; i++ )
546 p2m_teardown(d->arch.altp2m_p2m[i]);
547 }
548
549 /* Destroy nestedp2m's first */
550 for (i = 0; i < MAX_NESTEDP2M; i++) {
551 p2m_teardown(d->arch.nested_p2m[i]);
552 }
553
554 if ( d->arch.paging.hap.total_pages != 0 )
555 hap_teardown(d, NULL);
556
557 p2m_teardown(p2m_get_hostp2m(d));
558 /* Free any memory that the p2m teardown released */
559 paging_lock(d);
560 hap_set_allocation(d, 0, NULL);
561 ASSERT(d->arch.paging.hap.p2m_pages == 0);
562 paging_unlock(d);
563 }
564
hap_teardown(struct domain * d,bool * preempted)565 void hap_teardown(struct domain *d, bool *preempted)
566 {
567 struct vcpu *v;
568 mfn_t mfn;
569
570 ASSERT(d->is_dying);
571 ASSERT(d != current->domain);
572
573 paging_lock(d); /* Keep various asserts happy */
574
575 if ( paging_mode_enabled(d) )
576 {
577 /* release the monitor table held by each vcpu */
578 for_each_vcpu ( d, v )
579 {
580 if ( paging_get_hostmode(v) && paging_mode_external(d) )
581 {
582 mfn = pagetable_get_mfn(v->arch.hvm.monitor_table);
583 if ( mfn_valid(mfn) && (mfn_x(mfn) != 0) )
584 hap_destroy_monitor_table(v, mfn);
585 v->arch.hvm.monitor_table = pagetable_null();
586 }
587 }
588 }
589
590 if ( d->arch.paging.hap.total_pages != 0 )
591 {
592 hap_set_allocation(d, 0, preempted);
593
594 if ( preempted && *preempted )
595 goto out;
596
597 ASSERT(d->arch.paging.hap.total_pages == 0);
598 }
599
600 d->arch.paging.mode &= ~PG_log_dirty;
601
602 XFREE(d->arch.hvm.dirty_vram);
603
604 out:
605 paging_unlock(d);
606 }
607
hap_domctl(struct domain * d,struct xen_domctl_shadow_op * sc,XEN_GUEST_HANDLE_PARAM (xen_domctl_t)u_domctl)608 int hap_domctl(struct domain *d, struct xen_domctl_shadow_op *sc,
609 XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
610 {
611 int rc;
612 bool preempted = false;
613
614 switch ( sc->op )
615 {
616 case XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION:
617 paging_lock(d);
618 rc = hap_set_allocation(d, sc->mb << (20 - PAGE_SHIFT), &preempted);
619 paging_unlock(d);
620 if ( preempted )
621 /* Not finished. Set up to re-run the call. */
622 rc = hypercall_create_continuation(__HYPERVISOR_domctl, "h",
623 u_domctl);
624 else
625 /* Finished. Return the new allocation */
626 sc->mb = hap_get_allocation(d);
627 return rc;
628 case XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION:
629 sc->mb = hap_get_allocation(d);
630 /* Fall through... */
631 case XEN_DOMCTL_SHADOW_OP_OFF:
632 return 0;
633 default:
634 HAP_PRINTK("Bad hap domctl op %u\n", sc->op);
635 return -EINVAL;
636 }
637 }
638
639 static const struct paging_mode hap_paging_real_mode;
640 static const struct paging_mode hap_paging_protected_mode;
641 static const struct paging_mode hap_paging_pae_mode;
642 static const struct paging_mode hap_paging_long_mode;
643
hap_vcpu_init(struct vcpu * v)644 void hap_vcpu_init(struct vcpu *v)
645 {
646 v->arch.paging.mode = &hap_paging_real_mode;
647 v->arch.paging.nestedmode = &hap_paging_real_mode;
648 }
649
650 /************************************************/
651 /* HAP PAGING MODE FUNCTIONS */
652 /************************************************/
653 /*
654 * HAP guests can handle page faults (in the guest page tables) without
655 * needing any action from Xen, so we should not be intercepting them.
656 */
hap_page_fault(struct vcpu * v,unsigned long va,struct cpu_user_regs * regs)657 static int hap_page_fault(struct vcpu *v, unsigned long va,
658 struct cpu_user_regs *regs)
659 {
660 struct domain *d = v->domain;
661
662 printk(XENLOG_G_ERR "Intercepted #PF from %pv with HAP enabled\n", v);
663 domain_crash(d);
664 return 0;
665 }
666
667 /*
668 * HAP guests can handle invlpg without needing any action from Xen, so
669 * should not be intercepting it. However, we need to correctly handle
670 * getting here from instruction emulation.
671 */
hap_invlpg(struct vcpu * v,unsigned long linear)672 static bool_t hap_invlpg(struct vcpu *v, unsigned long linear)
673 {
674 /*
675 * Emulate INVLPGA:
676 * Must perform the flush right now or an other vcpu may
677 * use it when we use the next VMRUN emulation, otherwise.
678 */
679 if ( nestedhvm_enabled(v->domain) && vcpu_nestedhvm(v).nv_p2m )
680 p2m_flush(v, vcpu_nestedhvm(v).nv_p2m);
681
682 return 1;
683 }
684
hap_update_cr3(struct vcpu * v,int do_locking,bool noflush)685 static void hap_update_cr3(struct vcpu *v, int do_locking, bool noflush)
686 {
687 v->arch.hvm.hw_cr[3] = v->arch.hvm.guest_cr[3];
688 hvm_update_guest_cr3(v, noflush);
689 }
690
691 /*
692 * Dummy function to use with on_selected_cpus in order to trigger a vmexit on
693 * selected pCPUs. When the VM resumes execution it will get a new ASID/VPID
694 * and thus a clean TLB.
695 */
dummy_flush(void * data)696 static void dummy_flush(void *data)
697 {
698 }
699
flush_tlb(bool (* flush_vcpu)(void * ctxt,struct vcpu * v),void * ctxt)700 static bool flush_tlb(bool (*flush_vcpu)(void *ctxt, struct vcpu *v),
701 void *ctxt)
702 {
703 static DEFINE_PER_CPU(cpumask_t, flush_cpumask);
704 cpumask_t *mask = &this_cpu(flush_cpumask);
705 struct domain *d = current->domain;
706 unsigned int this_cpu = smp_processor_id();
707 struct vcpu *v;
708
709 cpumask_clear(mask);
710
711 /* Flush paging-mode soft state (e.g., va->gfn cache; PAE PDPE cache). */
712 for_each_vcpu ( d, v )
713 {
714 unsigned int cpu;
715
716 if ( !flush_vcpu(ctxt, v) )
717 continue;
718
719 hvm_asid_flush_vcpu(v);
720
721 cpu = read_atomic(&v->dirty_cpu);
722 if ( cpu != this_cpu && is_vcpu_dirty_cpu(cpu) && v->is_running )
723 __cpumask_set_cpu(cpu, mask);
724 }
725
726 /*
727 * Trigger a vmexit on all pCPUs with dirty vCPU state in order to force an
728 * ASID/VPID change and hence accomplish a guest TLB flush. Note that vCPUs
729 * not currently running will already be flushed when scheduled because of
730 * the ASID tickle done in the loop above.
731 */
732 on_selected_cpus(mask, dummy_flush, NULL, 0);
733
734 return true;
735 }
736
737 const struct paging_mode *
hap_paging_get_mode(struct vcpu * v)738 hap_paging_get_mode(struct vcpu *v)
739 {
740 return (!hvm_paging_enabled(v) ? &hap_paging_real_mode :
741 hvm_long_mode_active(v) ? &hap_paging_long_mode :
742 hvm_pae_enabled(v) ? &hap_paging_pae_mode :
743 &hap_paging_protected_mode);
744 }
745
hap_update_paging_modes(struct vcpu * v)746 static void hap_update_paging_modes(struct vcpu *v)
747 {
748 struct domain *d = v->domain;
749 unsigned long cr3_gfn = v->arch.hvm.guest_cr[3] >> PAGE_SHIFT;
750 p2m_type_t t;
751
752 /* We hold onto the cr3 as it may be modified later, and
753 * we need to respect lock ordering. No need for
754 * checks here as they are performed by vmx_load_pdptrs
755 * (the potential user of the cr3) */
756 (void)get_gfn(d, cr3_gfn, &t);
757 paging_lock(d);
758
759 v->arch.paging.mode = hap_paging_get_mode(v);
760
761 if ( pagetable_is_null(v->arch.hvm.monitor_table) )
762 {
763 mfn_t mmfn = hap_make_monitor_table(v);
764 v->arch.hvm.monitor_table = pagetable_from_mfn(mmfn);
765 make_cr3(v, mmfn);
766 hvm_update_host_cr3(v);
767 }
768
769 /* CR3 is effectively updated by a mode change. Flush ASIDs, etc. */
770 hap_update_cr3(v, 0, false);
771
772 paging_unlock(d);
773 put_gfn(d, cr3_gfn);
774 }
775
776 static int
hap_write_p2m_entry(struct p2m_domain * p2m,unsigned long gfn,l1_pgentry_t * p,l1_pgentry_t new,unsigned int level)777 hap_write_p2m_entry(struct p2m_domain *p2m, unsigned long gfn, l1_pgentry_t *p,
778 l1_pgentry_t new, unsigned int level)
779 {
780 struct domain *d = p2m->domain;
781 uint32_t old_flags;
782 bool_t flush_nestedp2m = 0;
783 int rc;
784
785 /* We know always use the host p2m here, regardless if the vcpu
786 * is in host or guest mode. The vcpu can be in guest mode by
787 * a hypercall which passes a domain and chooses mostly the first
788 * vcpu. */
789
790 paging_lock(d);
791 old_flags = l1e_get_flags(*p);
792
793 if ( nestedhvm_enabled(d) && (old_flags & _PAGE_PRESENT)
794 && !p2m_get_hostp2m(d)->defer_nested_flush ) {
795 /* We are replacing a valid entry so we need to flush nested p2ms,
796 * unless the only change is an increase in access rights. */
797 mfn_t omfn = l1e_get_mfn(*p);
798 mfn_t nmfn = l1e_get_mfn(new);
799
800 flush_nestedp2m = !(mfn_eq(omfn, nmfn)
801 && perms_strictly_increased(old_flags, l1e_get_flags(new)) );
802 }
803
804 rc = p2m_entry_modify(p2m, p2m_flags_to_type(l1e_get_flags(new)),
805 p2m_flags_to_type(old_flags), l1e_get_mfn(new),
806 l1e_get_mfn(*p), level);
807 if ( rc )
808 {
809 paging_unlock(d);
810 return rc;
811 }
812
813 safe_write_pte(p, new);
814 if ( old_flags & _PAGE_PRESENT )
815 guest_flush_tlb_mask(d, d->dirty_cpumask);
816
817 paging_unlock(d);
818
819 if ( flush_nestedp2m )
820 p2m_flush_nestedp2m(d);
821
822 return 0;
823 }
824
hap_gva_to_gfn_real_mode(struct vcpu * v,struct p2m_domain * p2m,unsigned long gva,uint32_t * pfec)825 static unsigned long hap_gva_to_gfn_real_mode(
826 struct vcpu *v, struct p2m_domain *p2m, unsigned long gva, uint32_t *pfec)
827 {
828 return ((paddr_t)gva >> PAGE_SHIFT);
829 }
830
hap_p2m_ga_to_gfn_real_mode(struct vcpu * v,struct p2m_domain * p2m,unsigned long cr3,paddr_t ga,uint32_t * pfec,unsigned int * page_order)831 static unsigned long hap_p2m_ga_to_gfn_real_mode(
832 struct vcpu *v, struct p2m_domain *p2m, unsigned long cr3,
833 paddr_t ga, uint32_t *pfec, unsigned int *page_order)
834 {
835 if ( page_order )
836 *page_order = PAGE_ORDER_4K;
837 return (ga >> PAGE_SHIFT);
838 }
839
840 /* Entry points into this mode of the hap code. */
841 static const struct paging_mode hap_paging_real_mode = {
842 .page_fault = hap_page_fault,
843 .invlpg = hap_invlpg,
844 .gva_to_gfn = hap_gva_to_gfn_real_mode,
845 .p2m_ga_to_gfn = hap_p2m_ga_to_gfn_real_mode,
846 .update_cr3 = hap_update_cr3,
847 .update_paging_modes = hap_update_paging_modes,
848 .write_p2m_entry = hap_write_p2m_entry,
849 .flush_tlb = flush_tlb,
850 .guest_levels = 1
851 };
852
853 static const struct paging_mode hap_paging_protected_mode = {
854 .page_fault = hap_page_fault,
855 .invlpg = hap_invlpg,
856 .gva_to_gfn = hap_gva_to_gfn_2_levels,
857 .p2m_ga_to_gfn = hap_p2m_ga_to_gfn_2_levels,
858 .update_cr3 = hap_update_cr3,
859 .update_paging_modes = hap_update_paging_modes,
860 .write_p2m_entry = hap_write_p2m_entry,
861 .flush_tlb = flush_tlb,
862 .guest_levels = 2
863 };
864
865 static const struct paging_mode hap_paging_pae_mode = {
866 .page_fault = hap_page_fault,
867 .invlpg = hap_invlpg,
868 .gva_to_gfn = hap_gva_to_gfn_3_levels,
869 .p2m_ga_to_gfn = hap_p2m_ga_to_gfn_3_levels,
870 .update_cr3 = hap_update_cr3,
871 .update_paging_modes = hap_update_paging_modes,
872 .write_p2m_entry = hap_write_p2m_entry,
873 .flush_tlb = flush_tlb,
874 .guest_levels = 3
875 };
876
877 static const struct paging_mode hap_paging_long_mode = {
878 .page_fault = hap_page_fault,
879 .invlpg = hap_invlpg,
880 .gva_to_gfn = hap_gva_to_gfn_4_levels,
881 .p2m_ga_to_gfn = hap_p2m_ga_to_gfn_4_levels,
882 .update_cr3 = hap_update_cr3,
883 .update_paging_modes = hap_update_paging_modes,
884 .write_p2m_entry = hap_write_p2m_entry,
885 .flush_tlb = flush_tlb,
886 .guest_levels = 4
887 };
888
889 /*
890 * Local variables:
891 * mode: C
892 * c-file-style: "BSD"
893 * c-basic-offset: 4
894 * indent-tabs-mode: nil
895 * End:
896 */
897