1 /******************************************************************************
2  * arch/x86/mm/hap/hap.c
3  *
4  * hardware assisted paging
5  * Copyright (c) 2007 Advanced Micro Devices (Wei Huang)
6  * Parts of this code are Copyright (c) 2007 by XenSource Inc.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; If not, see <http://www.gnu.org/licenses/>.
20  */
21 
22 #include <xen/types.h>
23 #include <xen/mm.h>
24 #include <xen/trace.h>
25 #include <xen/sched.h>
26 #include <xen/perfc.h>
27 #include <xen/irq.h>
28 #include <xen/domain_page.h>
29 #include <xen/guest_access.h>
30 #include <xen/keyhandler.h>
31 #include <asm/event.h>
32 #include <asm/page.h>
33 #include <asm/current.h>
34 #include <asm/flushtlb.h>
35 #include <asm/shared.h>
36 #include <asm/hap.h>
37 #include <asm/paging.h>
38 #include <asm/p2m.h>
39 #include <asm/domain.h>
40 #include <xen/numa.h>
41 #include <asm/hvm/nestedhvm.h>
42 
43 #include "private.h"
44 
45 /************************************************/
46 /*          HAP VRAM TRACKING SUPPORT           */
47 /************************************************/
48 
49 /*
50  * hap_track_dirty_vram()
51  * Create the domain's dv_dirty_vram struct on demand.
52  * Create a dirty vram range on demand when some [begin_pfn:begin_pfn+nr] is
53  * first encountered.
54  * Collect the guest_dirty bitmask, a bit mask of the dirty vram pages, by
55  * calling paging_log_dirty_range(), which interrogates each vram
56  * page's p2m type looking for pages that have been made writable.
57  */
58 
hap_track_dirty_vram(struct domain * d,unsigned long begin_pfn,unsigned long nr,XEN_GUEST_HANDLE (void)guest_dirty_bitmap)59 int hap_track_dirty_vram(struct domain *d,
60                          unsigned long begin_pfn,
61                          unsigned long nr,
62                          XEN_GUEST_HANDLE(void) guest_dirty_bitmap)
63 {
64     long rc = 0;
65     struct sh_dirty_vram *dirty_vram;
66     uint8_t *dirty_bitmap = NULL;
67 
68     if ( nr )
69     {
70         int size = (nr + BITS_PER_BYTE - 1) / BITS_PER_BYTE;
71 
72         if ( !paging_mode_log_dirty(d) )
73         {
74             rc = paging_log_dirty_enable(d, false);
75             if ( rc )
76                 goto out;
77         }
78 
79         rc = -ENOMEM;
80         dirty_bitmap = vzalloc(size);
81         if ( !dirty_bitmap )
82             goto out;
83 
84         paging_lock(d);
85 
86         dirty_vram = d->arch.hvm.dirty_vram;
87         if ( !dirty_vram )
88         {
89             rc = -ENOMEM;
90             if ( (dirty_vram = xzalloc(struct sh_dirty_vram)) == NULL )
91             {
92                 paging_unlock(d);
93                 goto out;
94             }
95 
96             d->arch.hvm.dirty_vram = dirty_vram;
97         }
98 
99         if ( begin_pfn != dirty_vram->begin_pfn ||
100              begin_pfn + nr != dirty_vram->end_pfn )
101         {
102             unsigned long ostart = dirty_vram->begin_pfn;
103             unsigned long oend = dirty_vram->end_pfn;
104 
105             dirty_vram->begin_pfn = begin_pfn;
106             dirty_vram->end_pfn = begin_pfn + nr;
107 
108             paging_unlock(d);
109 
110             if ( oend > ostart )
111                 p2m_change_type_range(d, ostart, oend,
112                                       p2m_ram_logdirty, p2m_ram_rw);
113 
114             /*
115              * Switch vram to log dirty mode, either by setting l1e entries of
116              * P2M table to be read-only, or via hardware-assisted log-dirty.
117              */
118             p2m_change_type_range(d, begin_pfn, begin_pfn + nr,
119                                   p2m_ram_rw, p2m_ram_logdirty);
120 
121             guest_flush_tlb_mask(d, d->dirty_cpumask);
122 
123             memset(dirty_bitmap, 0xff, size); /* consider all pages dirty */
124         }
125         else
126         {
127             paging_unlock(d);
128 
129             domain_pause(d);
130 
131             /* Flush dirty GFNs potentially cached by hardware. */
132             p2m_flush_hardware_cached_dirty(d);
133 
134             /* get the bitmap */
135             paging_log_dirty_range(d, begin_pfn, nr, dirty_bitmap);
136 
137             domain_unpause(d);
138         }
139 
140         rc = -EFAULT;
141         if ( copy_to_guest(guest_dirty_bitmap, dirty_bitmap, size) == 0 )
142             rc = 0;
143     }
144     else
145     {
146         paging_lock(d);
147 
148         dirty_vram = d->arch.hvm.dirty_vram;
149         if ( dirty_vram )
150         {
151             /*
152              * If zero pages specified while tracking dirty vram
153              * then stop tracking
154              */
155             begin_pfn = dirty_vram->begin_pfn;
156             nr = dirty_vram->end_pfn - dirty_vram->begin_pfn;
157             xfree(dirty_vram);
158             d->arch.hvm.dirty_vram = NULL;
159         }
160 
161         paging_unlock(d);
162         if ( nr )
163             p2m_change_type_range(d, begin_pfn, begin_pfn + nr,
164                                   p2m_ram_logdirty, p2m_ram_rw);
165     }
166 out:
167     vfree(dirty_bitmap);
168 
169     return rc;
170 }
171 
172 /************************************************/
173 /*            HAP LOG DIRTY SUPPORT             */
174 /************************************************/
175 
176 /*
177  * hap code to call when log_dirty is enable. return 0 if no problem found.
178  *
179  * NB: Domain that having device assigned should not set log_global. Because
180  * there is no way to track the memory updating from device.
181  */
hap_enable_log_dirty(struct domain * d,bool_t log_global)182 static int hap_enable_log_dirty(struct domain *d, bool_t log_global)
183 {
184     struct p2m_domain *p2m = p2m_get_hostp2m(d);
185 
186     /*
187      * Refuse to turn on global log-dirty mode if
188      * there are outstanding p2m_ioreq_server pages.
189      */
190     if ( log_global && read_atomic(&p2m->ioreq.entry_count) )
191         return -EBUSY;
192 
193     /* turn on PG_log_dirty bit in paging mode */
194     paging_lock(d);
195     d->arch.paging.mode |= PG_log_dirty;
196     paging_unlock(d);
197 
198     /* Enable hardware-assisted log-dirty if it is supported. */
199     p2m_enable_hardware_log_dirty(d);
200 
201     if ( log_global )
202     {
203         /*
204          * Switch to log dirty mode, either by setting l1e entries of P2M table
205          * to be read-only, or via hardware-assisted log-dirty.
206          */
207         p2m_change_entry_type_global(d, p2m_ram_rw, p2m_ram_logdirty);
208         guest_flush_tlb_mask(d, d->dirty_cpumask);
209     }
210     return 0;
211 }
212 
hap_disable_log_dirty(struct domain * d)213 static int hap_disable_log_dirty(struct domain *d)
214 {
215     paging_lock(d);
216     d->arch.paging.mode &= ~PG_log_dirty;
217     paging_unlock(d);
218 
219     /* Disable hardware-assisted log-dirty if it is supported. */
220     p2m_disable_hardware_log_dirty(d);
221 
222     /*
223      * switch to normal mode, either by setting l1e entries of P2M table to
224      * normal mode, or via hardware-assisted log-dirty.
225      */
226     p2m_change_entry_type_global(d, p2m_ram_logdirty, p2m_ram_rw);
227     return 0;
228 }
229 
hap_clean_dirty_bitmap(struct domain * d)230 static void hap_clean_dirty_bitmap(struct domain *d)
231 {
232     /*
233      * Switch to log-dirty mode, either by setting l1e entries of P2M table to
234      * be read-only, or via hardware-assisted log-dirty.
235      */
236     p2m_change_entry_type_global(d, p2m_ram_rw, p2m_ram_logdirty);
237     guest_flush_tlb_mask(d, d->dirty_cpumask);
238 }
239 
240 /************************************************/
241 /*             HAP SUPPORT FUNCTIONS            */
242 /************************************************/
hap_alloc(struct domain * d)243 static struct page_info *hap_alloc(struct domain *d)
244 {
245     struct page_info *pg;
246 
247     ASSERT(paging_locked_by_me(d));
248 
249     pg = page_list_remove_head(&d->arch.paging.hap.freelist);
250     if ( unlikely(!pg) )
251         return NULL;
252 
253     d->arch.paging.hap.free_pages--;
254 
255     clear_domain_page(page_to_mfn(pg));
256 
257     return pg;
258 }
259 
hap_free(struct domain * d,mfn_t mfn)260 static void hap_free(struct domain *d, mfn_t mfn)
261 {
262     struct page_info *pg = mfn_to_page(mfn);
263 
264     ASSERT(paging_locked_by_me(d));
265 
266     d->arch.paging.hap.free_pages++;
267     page_list_add_tail(pg, &d->arch.paging.hap.freelist);
268 }
269 
hap_alloc_p2m_page(struct domain * d)270 static struct page_info *hap_alloc_p2m_page(struct domain *d)
271 {
272     struct page_info *pg;
273 
274     /* This is called both from the p2m code (which never holds the
275      * paging lock) and the log-dirty code (which always does). */
276     paging_lock_recursive(d);
277     pg = hap_alloc(d);
278 
279     if ( likely(pg != NULL) )
280     {
281         d->arch.paging.hap.total_pages--;
282         d->arch.paging.hap.p2m_pages++;
283         ASSERT(!page_get_owner(pg) && !(pg->count_info & PGC_count_mask));
284     }
285     else if ( !d->arch.paging.p2m_alloc_failed )
286     {
287         d->arch.paging.p2m_alloc_failed = 1;
288         dprintk(XENLOG_ERR, "d%i failed to allocate from HAP pool\n",
289                 d->domain_id);
290     }
291 
292     paging_unlock(d);
293     return pg;
294 }
295 
hap_free_p2m_page(struct domain * d,struct page_info * pg)296 static void hap_free_p2m_page(struct domain *d, struct page_info *pg)
297 {
298     struct domain *owner = page_get_owner(pg);
299 
300     /* This is called both from the p2m code (which never holds the
301      * paging lock) and the log-dirty code (which always does). */
302     paging_lock_recursive(d);
303 
304     /* Should still have no owner and count zero. */
305     if ( owner || (pg->count_info & PGC_count_mask) )
306     {
307         printk(XENLOG_WARNING
308                "d%d: Odd p2m page %"PRI_mfn" d=%d c=%lx t=%"PRtype_info"\n",
309                d->domain_id, mfn_x(page_to_mfn(pg)),
310                owner ? owner->domain_id : DOMID_INVALID,
311                pg->count_info, pg->u.inuse.type_info);
312         WARN();
313         pg->count_info &= ~PGC_count_mask;
314         page_set_owner(pg, NULL);
315     }
316     d->arch.paging.hap.p2m_pages--;
317     d->arch.paging.hap.total_pages++;
318     hap_free(d, page_to_mfn(pg));
319 
320     paging_unlock(d);
321 }
322 
323 /* Return the size of the pool, rounded up to the nearest MB */
hap_get_allocation(struct domain * d)324 unsigned int hap_get_allocation(struct domain *d)
325 {
326     unsigned int pg = d->arch.paging.hap.total_pages
327         + d->arch.paging.hap.p2m_pages;
328 
329     return ((pg >> (20 - PAGE_SHIFT))
330             + ((pg & ((1 << (20 - PAGE_SHIFT)) - 1)) ? 1 : 0));
331 }
332 
333 /* Set the pool of pages to the required number of pages.
334  * Returns 0 for success, non-zero for failure. */
hap_set_allocation(struct domain * d,unsigned int pages,bool * preempted)335 int hap_set_allocation(struct domain *d, unsigned int pages, bool *preempted)
336 {
337     struct page_info *pg;
338 
339     ASSERT(paging_locked_by_me(d));
340 
341     if ( pages < d->arch.paging.hap.p2m_pages )
342         pages = 0;
343     else
344         pages -= d->arch.paging.hap.p2m_pages;
345 
346     for ( ; ; )
347     {
348         if ( d->arch.paging.hap.total_pages < pages )
349         {
350             /* Need to allocate more memory from domheap */
351             pg = alloc_domheap_page(d, MEMF_no_owner);
352             if ( pg == NULL )
353             {
354                 HAP_PRINTK("failed to allocate hap pages.\n");
355                 return -ENOMEM;
356             }
357             d->arch.paging.hap.free_pages++;
358             d->arch.paging.hap.total_pages++;
359             page_list_add_tail(pg, &d->arch.paging.hap.freelist);
360         }
361         else if ( d->arch.paging.hap.total_pages > pages )
362         {
363             /* Need to return memory to domheap */
364             if ( page_list_empty(&d->arch.paging.hap.freelist) )
365             {
366                 HAP_PRINTK("failed to free enough hap pages.\n");
367                 return -ENOMEM;
368             }
369             pg = page_list_remove_head(&d->arch.paging.hap.freelist);
370             ASSERT(pg);
371             d->arch.paging.hap.free_pages--;
372             d->arch.paging.hap.total_pages--;
373             free_domheap_page(pg);
374         }
375         else
376             break;
377 
378         /* Check to see if we need to yield and try again */
379         if ( preempted && general_preempt_check() )
380         {
381             *preempted = true;
382             return 0;
383         }
384     }
385 
386     return 0;
387 }
388 
hap_make_monitor_table(struct vcpu * v)389 static mfn_t hap_make_monitor_table(struct vcpu *v)
390 {
391     struct domain *d = v->domain;
392     struct page_info *pg;
393     l4_pgentry_t *l4e;
394     mfn_t m4mfn;
395 
396     ASSERT(pagetable_get_pfn(v->arch.hvm.monitor_table) == 0);
397 
398     if ( (pg = hap_alloc(d)) == NULL )
399         goto oom;
400 
401     m4mfn = page_to_mfn(pg);
402     l4e = map_domain_page(m4mfn);
403 
404     init_xen_l4_slots(l4e, m4mfn, d, INVALID_MFN, false);
405     unmap_domain_page(l4e);
406 
407     return m4mfn;
408 
409  oom:
410     printk(XENLOG_G_ERR "out of memory building monitor pagetable\n");
411     domain_crash(d);
412     return INVALID_MFN;
413 }
414 
hap_destroy_monitor_table(struct vcpu * v,mfn_t mmfn)415 static void hap_destroy_monitor_table(struct vcpu* v, mfn_t mmfn)
416 {
417     struct domain *d = v->domain;
418 
419     /* Put the memory back in the pool */
420     hap_free(d, mmfn);
421 }
422 
423 /************************************************/
424 /*          HAP DOMAIN LEVEL FUNCTIONS          */
425 /************************************************/
hap_domain_init(struct domain * d)426 void hap_domain_init(struct domain *d)
427 {
428     static const struct log_dirty_ops hap_ops = {
429         .enable  = hap_enable_log_dirty,
430         .disable = hap_disable_log_dirty,
431         .clean   = hap_clean_dirty_bitmap,
432     };
433 
434     INIT_PAGE_LIST_HEAD(&d->arch.paging.hap.freelist);
435 
436     /* Use HAP logdirty mechanism. */
437     paging_log_dirty_init(d, &hap_ops);
438 }
439 
440 /* return 0 for success, -errno for failure */
hap_enable(struct domain * d,u32 mode)441 int hap_enable(struct domain *d, u32 mode)
442 {
443     unsigned int old_pages;
444     unsigned int i;
445     int rv = 0;
446 
447     if ( mode != (PG_external | PG_translate | PG_refcounts) )
448         return -EINVAL;
449 
450     /* The function can only be called once per domain. */
451     if ( d->arch.paging.mode != 0 )
452         return -EEXIST;
453 
454     domain_pause(d);
455 
456     old_pages = d->arch.paging.hap.total_pages;
457     if ( old_pages == 0 )
458     {
459         paging_lock(d);
460         rv = hap_set_allocation(d, 256, NULL);
461         if ( rv != 0 )
462         {
463             hap_set_allocation(d, 0, NULL);
464             paging_unlock(d);
465             goto out;
466         }
467         paging_unlock(d);
468     }
469 
470     /* Allow p2m and log-dirty code to borrow our memory */
471     d->arch.paging.alloc_page = hap_alloc_p2m_page;
472     d->arch.paging.free_page = hap_free_p2m_page;
473 
474     /* allocate P2M table */
475     rv = p2m_alloc_table(p2m_get_hostp2m(d));
476     if ( rv != 0 )
477         goto out;
478 
479     for ( i = 0; i < MAX_NESTEDP2M; i++ )
480     {
481         rv = p2m_alloc_table(d->arch.nested_p2m[i]);
482         if ( rv != 0 )
483            goto out;
484     }
485 
486     if ( hvm_altp2m_supported() )
487     {
488         /* Init alternate p2m data */
489         if ( (d->arch.altp2m_eptp = alloc_xenheap_page()) == NULL )
490         {
491             rv = -ENOMEM;
492             goto out;
493         }
494 
495         if ( (d->arch.altp2m_visible_eptp = alloc_xenheap_page()) == NULL )
496         {
497             rv = -ENOMEM;
498             goto out;
499         }
500 
501         for ( i = 0; i < MAX_EPTP; i++ )
502         {
503             d->arch.altp2m_eptp[i] = mfn_x(INVALID_MFN);
504             d->arch.altp2m_visible_eptp[i] = mfn_x(INVALID_MFN);
505         }
506 
507         for ( i = 0; i < MAX_ALTP2M; i++ )
508         {
509             rv = p2m_alloc_table(d->arch.altp2m_p2m[i]);
510             if ( rv != 0 )
511                goto out;
512         }
513 
514         d->arch.altp2m_active = 0;
515     }
516 
517     /* Now let other users see the new mode */
518     d->arch.paging.mode = mode | PG_HAP_enable;
519 
520  out:
521     domain_unpause(d);
522     return rv;
523 }
524 
hap_final_teardown(struct domain * d)525 void hap_final_teardown(struct domain *d)
526 {
527     unsigned int i;
528 
529     if ( hvm_altp2m_supported() )
530     {
531         d->arch.altp2m_active = 0;
532 
533         if ( d->arch.altp2m_eptp )
534         {
535             free_xenheap_page(d->arch.altp2m_eptp);
536             d->arch.altp2m_eptp = NULL;
537         }
538 
539         if ( d->arch.altp2m_visible_eptp )
540         {
541             free_xenheap_page(d->arch.altp2m_visible_eptp);
542             d->arch.altp2m_visible_eptp = NULL;
543         }
544 
545         for ( i = 0; i < MAX_ALTP2M; i++ )
546             p2m_teardown(d->arch.altp2m_p2m[i]);
547     }
548 
549     /* Destroy nestedp2m's first */
550     for (i = 0; i < MAX_NESTEDP2M; i++) {
551         p2m_teardown(d->arch.nested_p2m[i]);
552     }
553 
554     if ( d->arch.paging.hap.total_pages != 0 )
555         hap_teardown(d, NULL);
556 
557     p2m_teardown(p2m_get_hostp2m(d));
558     /* Free any memory that the p2m teardown released */
559     paging_lock(d);
560     hap_set_allocation(d, 0, NULL);
561     ASSERT(d->arch.paging.hap.p2m_pages == 0);
562     paging_unlock(d);
563 }
564 
hap_teardown(struct domain * d,bool * preempted)565 void hap_teardown(struct domain *d, bool *preempted)
566 {
567     struct vcpu *v;
568     mfn_t mfn;
569 
570     ASSERT(d->is_dying);
571     ASSERT(d != current->domain);
572 
573     paging_lock(d); /* Keep various asserts happy */
574 
575     if ( paging_mode_enabled(d) )
576     {
577         /* release the monitor table held by each vcpu */
578         for_each_vcpu ( d, v )
579         {
580             if ( paging_get_hostmode(v) && paging_mode_external(d) )
581             {
582                 mfn = pagetable_get_mfn(v->arch.hvm.monitor_table);
583                 if ( mfn_valid(mfn) && (mfn_x(mfn) != 0) )
584                     hap_destroy_monitor_table(v, mfn);
585                 v->arch.hvm.monitor_table = pagetable_null();
586             }
587         }
588     }
589 
590     if ( d->arch.paging.hap.total_pages != 0 )
591     {
592         hap_set_allocation(d, 0, preempted);
593 
594         if ( preempted && *preempted )
595             goto out;
596 
597         ASSERT(d->arch.paging.hap.total_pages == 0);
598     }
599 
600     d->arch.paging.mode &= ~PG_log_dirty;
601 
602     XFREE(d->arch.hvm.dirty_vram);
603 
604 out:
605     paging_unlock(d);
606 }
607 
hap_domctl(struct domain * d,struct xen_domctl_shadow_op * sc,XEN_GUEST_HANDLE_PARAM (xen_domctl_t)u_domctl)608 int hap_domctl(struct domain *d, struct xen_domctl_shadow_op *sc,
609                XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
610 {
611     int rc;
612     bool preempted = false;
613 
614     switch ( sc->op )
615     {
616     case XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION:
617         paging_lock(d);
618         rc = hap_set_allocation(d, sc->mb << (20 - PAGE_SHIFT), &preempted);
619         paging_unlock(d);
620         if ( preempted )
621             /* Not finished.  Set up to re-run the call. */
622             rc = hypercall_create_continuation(__HYPERVISOR_domctl, "h",
623                                                u_domctl);
624         else
625             /* Finished.  Return the new allocation */
626             sc->mb = hap_get_allocation(d);
627         return rc;
628     case XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION:
629         sc->mb = hap_get_allocation(d);
630         /* Fall through... */
631     case XEN_DOMCTL_SHADOW_OP_OFF:
632         return 0;
633     default:
634         HAP_PRINTK("Bad hap domctl op %u\n", sc->op);
635         return -EINVAL;
636     }
637 }
638 
639 static const struct paging_mode hap_paging_real_mode;
640 static const struct paging_mode hap_paging_protected_mode;
641 static const struct paging_mode hap_paging_pae_mode;
642 static const struct paging_mode hap_paging_long_mode;
643 
hap_vcpu_init(struct vcpu * v)644 void hap_vcpu_init(struct vcpu *v)
645 {
646     v->arch.paging.mode = &hap_paging_real_mode;
647     v->arch.paging.nestedmode = &hap_paging_real_mode;
648 }
649 
650 /************************************************/
651 /*          HAP PAGING MODE FUNCTIONS           */
652 /************************************************/
653 /*
654  * HAP guests can handle page faults (in the guest page tables) without
655  * needing any action from Xen, so we should not be intercepting them.
656  */
hap_page_fault(struct vcpu * v,unsigned long va,struct cpu_user_regs * regs)657 static int hap_page_fault(struct vcpu *v, unsigned long va,
658                           struct cpu_user_regs *regs)
659 {
660     struct domain *d = v->domain;
661 
662     printk(XENLOG_G_ERR "Intercepted #PF from %pv with HAP enabled\n", v);
663     domain_crash(d);
664     return 0;
665 }
666 
667 /*
668  * HAP guests can handle invlpg without needing any action from Xen, so
669  * should not be intercepting it.  However, we need to correctly handle
670  * getting here from instruction emulation.
671  */
hap_invlpg(struct vcpu * v,unsigned long linear)672 static bool_t hap_invlpg(struct vcpu *v, unsigned long linear)
673 {
674     /*
675      * Emulate INVLPGA:
676      * Must perform the flush right now or an other vcpu may
677      * use it when we use the next VMRUN emulation, otherwise.
678      */
679     if ( nestedhvm_enabled(v->domain) && vcpu_nestedhvm(v).nv_p2m )
680         p2m_flush(v, vcpu_nestedhvm(v).nv_p2m);
681 
682     return 1;
683 }
684 
hap_update_cr3(struct vcpu * v,int do_locking,bool noflush)685 static void hap_update_cr3(struct vcpu *v, int do_locking, bool noflush)
686 {
687     v->arch.hvm.hw_cr[3] = v->arch.hvm.guest_cr[3];
688     hvm_update_guest_cr3(v, noflush);
689 }
690 
691 /*
692  * Dummy function to use with on_selected_cpus in order to trigger a vmexit on
693  * selected pCPUs. When the VM resumes execution it will get a new ASID/VPID
694  * and thus a clean TLB.
695  */
dummy_flush(void * data)696 static void dummy_flush(void *data)
697 {
698 }
699 
flush_tlb(bool (* flush_vcpu)(void * ctxt,struct vcpu * v),void * ctxt)700 static bool flush_tlb(bool (*flush_vcpu)(void *ctxt, struct vcpu *v),
701                       void *ctxt)
702 {
703     static DEFINE_PER_CPU(cpumask_t, flush_cpumask);
704     cpumask_t *mask = &this_cpu(flush_cpumask);
705     struct domain *d = current->domain;
706     unsigned int this_cpu = smp_processor_id();
707     struct vcpu *v;
708 
709     cpumask_clear(mask);
710 
711     /* Flush paging-mode soft state (e.g., va->gfn cache; PAE PDPE cache). */
712     for_each_vcpu ( d, v )
713     {
714         unsigned int cpu;
715 
716         if ( !flush_vcpu(ctxt, v) )
717             continue;
718 
719         hvm_asid_flush_vcpu(v);
720 
721         cpu = read_atomic(&v->dirty_cpu);
722         if ( cpu != this_cpu && is_vcpu_dirty_cpu(cpu) && v->is_running )
723             __cpumask_set_cpu(cpu, mask);
724     }
725 
726     /*
727      * Trigger a vmexit on all pCPUs with dirty vCPU state in order to force an
728      * ASID/VPID change and hence accomplish a guest TLB flush. Note that vCPUs
729      * not currently running will already be flushed when scheduled because of
730      * the ASID tickle done in the loop above.
731      */
732     on_selected_cpus(mask, dummy_flush, NULL, 0);
733 
734     return true;
735 }
736 
737 const struct paging_mode *
hap_paging_get_mode(struct vcpu * v)738 hap_paging_get_mode(struct vcpu *v)
739 {
740     return (!hvm_paging_enabled(v)  ? &hap_paging_real_mode :
741             hvm_long_mode_active(v) ? &hap_paging_long_mode :
742             hvm_pae_enabled(v)      ? &hap_paging_pae_mode  :
743                                       &hap_paging_protected_mode);
744 }
745 
hap_update_paging_modes(struct vcpu * v)746 static void hap_update_paging_modes(struct vcpu *v)
747 {
748     struct domain *d = v->domain;
749     unsigned long cr3_gfn = v->arch.hvm.guest_cr[3] >> PAGE_SHIFT;
750     p2m_type_t t;
751 
752     /* We hold onto the cr3 as it may be modified later, and
753      * we need to respect lock ordering. No need for
754      * checks here as they are performed by vmx_load_pdptrs
755      * (the potential user of the cr3) */
756     (void)get_gfn(d, cr3_gfn, &t);
757     paging_lock(d);
758 
759     v->arch.paging.mode = hap_paging_get_mode(v);
760 
761     if ( pagetable_is_null(v->arch.hvm.monitor_table) )
762     {
763         mfn_t mmfn = hap_make_monitor_table(v);
764         v->arch.hvm.monitor_table = pagetable_from_mfn(mmfn);
765         make_cr3(v, mmfn);
766         hvm_update_host_cr3(v);
767     }
768 
769     /* CR3 is effectively updated by a mode change. Flush ASIDs, etc. */
770     hap_update_cr3(v, 0, false);
771 
772     paging_unlock(d);
773     put_gfn(d, cr3_gfn);
774 }
775 
776 static int
hap_write_p2m_entry(struct p2m_domain * p2m,unsigned long gfn,l1_pgentry_t * p,l1_pgentry_t new,unsigned int level)777 hap_write_p2m_entry(struct p2m_domain *p2m, unsigned long gfn, l1_pgentry_t *p,
778                     l1_pgentry_t new, unsigned int level)
779 {
780     struct domain *d = p2m->domain;
781     uint32_t old_flags;
782     bool_t flush_nestedp2m = 0;
783     int rc;
784 
785     /* We know always use the host p2m here, regardless if the vcpu
786      * is in host or guest mode. The vcpu can be in guest mode by
787      * a hypercall which passes a domain and chooses mostly the first
788      * vcpu. */
789 
790     paging_lock(d);
791     old_flags = l1e_get_flags(*p);
792 
793     if ( nestedhvm_enabled(d) && (old_flags & _PAGE_PRESENT)
794          && !p2m_get_hostp2m(d)->defer_nested_flush ) {
795         /* We are replacing a valid entry so we need to flush nested p2ms,
796          * unless the only change is an increase in access rights. */
797         mfn_t omfn = l1e_get_mfn(*p);
798         mfn_t nmfn = l1e_get_mfn(new);
799 
800         flush_nestedp2m = !(mfn_eq(omfn, nmfn)
801             && perms_strictly_increased(old_flags, l1e_get_flags(new)) );
802     }
803 
804     rc = p2m_entry_modify(p2m, p2m_flags_to_type(l1e_get_flags(new)),
805                           p2m_flags_to_type(old_flags), l1e_get_mfn(new),
806                           l1e_get_mfn(*p), level);
807     if ( rc )
808     {
809         paging_unlock(d);
810         return rc;
811     }
812 
813     safe_write_pte(p, new);
814     if ( old_flags & _PAGE_PRESENT )
815         guest_flush_tlb_mask(d, d->dirty_cpumask);
816 
817     paging_unlock(d);
818 
819     if ( flush_nestedp2m )
820         p2m_flush_nestedp2m(d);
821 
822     return 0;
823 }
824 
hap_gva_to_gfn_real_mode(struct vcpu * v,struct p2m_domain * p2m,unsigned long gva,uint32_t * pfec)825 static unsigned long hap_gva_to_gfn_real_mode(
826     struct vcpu *v, struct p2m_domain *p2m, unsigned long gva, uint32_t *pfec)
827 {
828     return ((paddr_t)gva >> PAGE_SHIFT);
829 }
830 
hap_p2m_ga_to_gfn_real_mode(struct vcpu * v,struct p2m_domain * p2m,unsigned long cr3,paddr_t ga,uint32_t * pfec,unsigned int * page_order)831 static unsigned long hap_p2m_ga_to_gfn_real_mode(
832     struct vcpu *v, struct p2m_domain *p2m, unsigned long cr3,
833     paddr_t ga, uint32_t *pfec, unsigned int *page_order)
834 {
835     if ( page_order )
836         *page_order = PAGE_ORDER_4K;
837     return (ga >> PAGE_SHIFT);
838 }
839 
840 /* Entry points into this mode of the hap code. */
841 static const struct paging_mode hap_paging_real_mode = {
842     .page_fault             = hap_page_fault,
843     .invlpg                 = hap_invlpg,
844     .gva_to_gfn             = hap_gva_to_gfn_real_mode,
845     .p2m_ga_to_gfn          = hap_p2m_ga_to_gfn_real_mode,
846     .update_cr3             = hap_update_cr3,
847     .update_paging_modes    = hap_update_paging_modes,
848     .write_p2m_entry        = hap_write_p2m_entry,
849     .flush_tlb              = flush_tlb,
850     .guest_levels           = 1
851 };
852 
853 static const struct paging_mode hap_paging_protected_mode = {
854     .page_fault             = hap_page_fault,
855     .invlpg                 = hap_invlpg,
856     .gva_to_gfn             = hap_gva_to_gfn_2_levels,
857     .p2m_ga_to_gfn          = hap_p2m_ga_to_gfn_2_levels,
858     .update_cr3             = hap_update_cr3,
859     .update_paging_modes    = hap_update_paging_modes,
860     .write_p2m_entry        = hap_write_p2m_entry,
861     .flush_tlb              = flush_tlb,
862     .guest_levels           = 2
863 };
864 
865 static const struct paging_mode hap_paging_pae_mode = {
866     .page_fault             = hap_page_fault,
867     .invlpg                 = hap_invlpg,
868     .gva_to_gfn             = hap_gva_to_gfn_3_levels,
869     .p2m_ga_to_gfn          = hap_p2m_ga_to_gfn_3_levels,
870     .update_cr3             = hap_update_cr3,
871     .update_paging_modes    = hap_update_paging_modes,
872     .write_p2m_entry        = hap_write_p2m_entry,
873     .flush_tlb              = flush_tlb,
874     .guest_levels           = 3
875 };
876 
877 static const struct paging_mode hap_paging_long_mode = {
878     .page_fault             = hap_page_fault,
879     .invlpg                 = hap_invlpg,
880     .gva_to_gfn             = hap_gva_to_gfn_4_levels,
881     .p2m_ga_to_gfn          = hap_p2m_ga_to_gfn_4_levels,
882     .update_cr3             = hap_update_cr3,
883     .update_paging_modes    = hap_update_paging_modes,
884     .write_p2m_entry        = hap_write_p2m_entry,
885     .flush_tlb              = flush_tlb,
886     .guest_levels           = 4
887 };
888 
889 /*
890  * Local variables:
891  * mode: C
892  * c-file-style: "BSD"
893  * c-basic-offset: 4
894  * indent-tabs-mode: nil
895  * End:
896  */
897