1 /******************************************************************************
2  * arch/x86/mm/p2m.c
3  *
4  * physical-to-machine mappings for automatically-translated domains.
5  *
6  * Parts of this code are Copyright (c) 2009 by Citrix Systems, Inc. (Patrick Colp)
7  * Parts of this code are Copyright (c) 2007 by Advanced Micro Devices.
8  * Parts of this code are Copyright (c) 2006-2007 by XenSource Inc.
9  * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
10  * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
11  *
12  * This program is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU General Public License as published by
14  * the Free Software Foundation; either version 2 of the License, or
15  * (at your option) any later version.
16  *
17  * This program is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20  * GNU General Public License for more details.
21  *
22  * You should have received a copy of the GNU General Public License
23  * along with this program; If not, see <http://www.gnu.org/licenses/>.
24  */
25 
26 #include <xen/iommu.h>
27 #include <xen/mem_access.h>
28 #include <xen/vm_event.h>
29 #include <xen/event.h>
30 #include <xen/param.h>
31 #include <public/vm_event.h>
32 #include <asm/domain.h>
33 #include <asm/page.h>
34 #include <asm/paging.h>
35 #include <asm/p2m.h>
36 #include <asm/hvm/vmx/vmx.h> /* ept_p2m_init() */
37 #include <asm/mem_sharing.h>
38 #include <asm/hvm/nestedhvm.h>
39 #include <asm/altp2m.h>
40 #include <asm/vm_event.h>
41 #include <xsm/xsm.h>
42 
43 #include "mm-locks.h"
44 
45 /* Turn on/off host superpage page table support for hap, default on. */
46 bool_t __initdata opt_hap_1gb = 1, __initdata opt_hap_2mb = 1;
47 boolean_param("hap_1gb", opt_hap_1gb);
48 boolean_param("hap_2mb", opt_hap_2mb);
49 
50 DEFINE_PERCPU_RWLOCK_GLOBAL(p2m_percpu_rwlock);
51 
p2m_nestedp2m_init(struct p2m_domain * p2m)52 static void p2m_nestedp2m_init(struct p2m_domain *p2m)
53 {
54 #ifdef CONFIG_HVM
55     INIT_LIST_HEAD(&p2m->np2m_list);
56 
57     p2m->np2m_base = P2M_BASE_EADDR;
58     p2m->np2m_generation = 0;
59 #endif
60 }
61 
p2m_init_logdirty(struct p2m_domain * p2m)62 static int p2m_init_logdirty(struct p2m_domain *p2m)
63 {
64     if ( p2m->logdirty_ranges )
65         return 0;
66 
67     p2m->logdirty_ranges = rangeset_new(p2m->domain, "log-dirty",
68                                         RANGESETF_prettyprint_hex);
69     if ( !p2m->logdirty_ranges )
70         return -ENOMEM;
71 
72     return 0;
73 }
74 
p2m_free_logdirty(struct p2m_domain * p2m)75 static void p2m_free_logdirty(struct p2m_domain *p2m)
76 {
77     if ( !p2m->logdirty_ranges )
78         return;
79 
80     rangeset_destroy(p2m->logdirty_ranges);
81     p2m->logdirty_ranges = NULL;
82 }
83 
84 /* Init the datastructures for later use by the p2m code */
p2m_initialise(struct domain * d,struct p2m_domain * p2m)85 static int p2m_initialise(struct domain *d, struct p2m_domain *p2m)
86 {
87     int ret = 0;
88 
89     mm_rwlock_init(&p2m->lock);
90     INIT_PAGE_LIST_HEAD(&p2m->pages);
91 
92     p2m->domain = d;
93     p2m->default_access = p2m_access_rwx;
94     p2m->p2m_class = p2m_host;
95 
96     p2m_pod_init(p2m);
97     p2m_nestedp2m_init(p2m);
98 
99     if ( hap_enabled(d) && cpu_has_vmx )
100         ret = ept_p2m_init(p2m);
101     else
102         p2m_pt_init(p2m);
103 
104     spin_lock_init(&p2m->ioreq.lock);
105 
106     return ret;
107 }
108 
p2m_init_one(struct domain * d)109 static struct p2m_domain *p2m_init_one(struct domain *d)
110 {
111     struct p2m_domain *p2m = xzalloc(struct p2m_domain);
112 
113     if ( !p2m )
114         return NULL;
115 
116     if ( !zalloc_cpumask_var(&p2m->dirty_cpumask) )
117         goto free_p2m;
118 
119     if ( p2m_initialise(d, p2m) )
120         goto free_cpumask;
121     return p2m;
122 
123 free_cpumask:
124     free_cpumask_var(p2m->dirty_cpumask);
125 free_p2m:
126     xfree(p2m);
127     return NULL;
128 }
129 
p2m_free_one(struct p2m_domain * p2m)130 static void p2m_free_one(struct p2m_domain *p2m)
131 {
132     p2m_free_logdirty(p2m);
133     if ( hap_enabled(p2m->domain) && cpu_has_vmx )
134         ept_p2m_uninit(p2m);
135     free_cpumask_var(p2m->dirty_cpumask);
136     xfree(p2m);
137 }
138 
p2m_init_hostp2m(struct domain * d)139 static int p2m_init_hostp2m(struct domain *d)
140 {
141     struct p2m_domain *p2m = p2m_init_one(d);
142     int rc;
143 
144     if ( !p2m )
145         return -ENOMEM;
146 
147     rc = p2m_init_logdirty(p2m);
148 
149     if ( !rc )
150         d->arch.p2m = p2m;
151     else
152         p2m_free_one(p2m);
153 
154     return rc;
155 }
156 
p2m_teardown_hostp2m(struct domain * d)157 static void p2m_teardown_hostp2m(struct domain *d)
158 {
159     /* Iterate over all p2m tables per domain */
160     struct p2m_domain *p2m = p2m_get_hostp2m(d);
161 
162     if ( p2m )
163     {
164         p2m_free_one(p2m);
165         d->arch.p2m = NULL;
166     }
167 }
168 
169 #ifdef CONFIG_HVM
p2m_teardown_nestedp2m(struct domain * d)170 static void p2m_teardown_nestedp2m(struct domain *d)
171 {
172     unsigned int i;
173     struct p2m_domain *p2m;
174 
175     for ( i = 0; i < MAX_NESTEDP2M; i++ )
176     {
177         if ( !d->arch.nested_p2m[i] )
178             continue;
179         p2m = d->arch.nested_p2m[i];
180         list_del(&p2m->np2m_list);
181         p2m_free_one(p2m);
182         d->arch.nested_p2m[i] = NULL;
183     }
184 }
185 
p2m_init_nestedp2m(struct domain * d)186 static int p2m_init_nestedp2m(struct domain *d)
187 {
188     unsigned int i;
189     struct p2m_domain *p2m;
190 
191     mm_lock_init(&d->arch.nested_p2m_lock);
192     for ( i = 0; i < MAX_NESTEDP2M; i++ )
193     {
194         d->arch.nested_p2m[i] = p2m = p2m_init_one(d);
195         if ( p2m == NULL )
196         {
197             p2m_teardown_nestedp2m(d);
198             return -ENOMEM;
199         }
200         p2m->p2m_class = p2m_nested;
201         p2m->write_p2m_entry = nestedp2m_write_p2m_entry;
202         list_add(&p2m->np2m_list, &p2m_get_hostp2m(d)->np2m_list);
203     }
204 
205     return 0;
206 }
207 
p2m_teardown_altp2m(struct domain * d)208 static void p2m_teardown_altp2m(struct domain *d)
209 {
210     unsigned int i;
211     struct p2m_domain *p2m;
212 
213     for ( i = 0; i < MAX_ALTP2M; i++ )
214     {
215         if ( !d->arch.altp2m_p2m[i] )
216             continue;
217         p2m = d->arch.altp2m_p2m[i];
218         d->arch.altp2m_p2m[i] = NULL;
219         p2m_free_one(p2m);
220     }
221 }
222 
p2m_init_altp2m(struct domain * d)223 static int p2m_init_altp2m(struct domain *d)
224 {
225     unsigned int i;
226     struct p2m_domain *p2m;
227     struct p2m_domain *hostp2m = p2m_get_hostp2m(d);
228 
229     mm_lock_init(&d->arch.altp2m_list_lock);
230     for ( i = 0; i < MAX_ALTP2M; i++ )
231     {
232         d->arch.altp2m_p2m[i] = p2m = p2m_init_one(d);
233         if ( p2m == NULL )
234         {
235             p2m_teardown_altp2m(d);
236             return -ENOMEM;
237         }
238         p2m->p2m_class = p2m_alternate;
239         p2m->access_required = hostp2m->access_required;
240         _atomic_set(&p2m->active_vcpus, 0);
241     }
242 
243     return 0;
244 }
245 #endif
246 
p2m_init(struct domain * d)247 int p2m_init(struct domain *d)
248 {
249     int rc;
250 
251     rc = p2m_init_hostp2m(d);
252     if ( rc )
253         return rc;
254 
255 #ifdef CONFIG_HVM
256     /* Must initialise nestedp2m unconditionally
257      * since nestedhvm_enabled(d) returns false here.
258      * (p2m_init runs too early for HVM_PARAM_* options) */
259     rc = p2m_init_nestedp2m(d);
260     if ( rc )
261     {
262         p2m_teardown_hostp2m(d);
263         return rc;
264     }
265 
266     rc = p2m_init_altp2m(d);
267     if ( rc )
268     {
269         p2m_teardown_hostp2m(d);
270         p2m_teardown_nestedp2m(d);
271     }
272 #endif
273 
274     return rc;
275 }
276 
p2m_is_logdirty_range(struct p2m_domain * p2m,unsigned long start,unsigned long end)277 int p2m_is_logdirty_range(struct p2m_domain *p2m, unsigned long start,
278                           unsigned long end)
279 {
280     if ( p2m->global_logdirty ||
281          rangeset_contains_range(p2m->logdirty_ranges, start, end) )
282         return 1;
283     if ( rangeset_overlaps_range(p2m->logdirty_ranges, start, end) )
284         return -1;
285     return 0;
286 }
287 
change_entry_type_global(struct p2m_domain * p2m,p2m_type_t ot,p2m_type_t nt)288 static void change_entry_type_global(struct p2m_domain *p2m,
289                                      p2m_type_t ot, p2m_type_t nt)
290 {
291     p2m->change_entry_type_global(p2m, ot, nt);
292     /* Don't allow 'recalculate' operations to change the logdirty state. */
293     if ( ot != nt )
294         p2m->global_logdirty = (nt == p2m_ram_logdirty);
295 }
296 
297 /*
298  * May be called with ot = nt = p2m_ram_rw for its side effect of
299  * recalculating all PTEs in the p2m.
300  */
p2m_change_entry_type_global(struct domain * d,p2m_type_t ot,p2m_type_t nt)301 void p2m_change_entry_type_global(struct domain *d,
302                                   p2m_type_t ot, p2m_type_t nt)
303 {
304     struct p2m_domain *hostp2m = p2m_get_hostp2m(d);
305 
306     ASSERT(p2m_is_changeable(ot) && p2m_is_changeable(nt));
307 
308     p2m_lock(hostp2m);
309 
310     change_entry_type_global(hostp2m, ot, nt);
311 
312 #ifdef CONFIG_HVM
313     if ( unlikely(altp2m_active(d)) )
314     {
315         unsigned int i;
316 
317         for ( i = 0; i < MAX_ALTP2M; i++ )
318             if ( d->arch.altp2m_eptp[i] != mfn_x(INVALID_MFN) )
319             {
320                 struct p2m_domain *altp2m = d->arch.altp2m_p2m[i];
321 
322                 p2m_lock(altp2m);
323                 change_entry_type_global(altp2m, ot, nt);
324                 p2m_unlock(altp2m);
325             }
326     }
327 #endif
328 
329     p2m_unlock(hostp2m);
330 }
331 
332 #ifdef CONFIG_HVM
333 /* There's already a memory_type_changed() in asm/mtrr.h. */
_memory_type_changed(struct p2m_domain * p2m)334 static void _memory_type_changed(struct p2m_domain *p2m)
335 {
336     if ( p2m->memory_type_changed )
337         p2m->memory_type_changed(p2m);
338 }
339 
p2m_memory_type_changed(struct domain * d)340 void p2m_memory_type_changed(struct domain *d)
341 {
342     struct p2m_domain *hostp2m = p2m_get_hostp2m(d);
343 
344     p2m_lock(hostp2m);
345 
346     _memory_type_changed(hostp2m);
347 
348     if ( unlikely(altp2m_active(d)) )
349     {
350         unsigned int i;
351 
352         for ( i = 0; i < MAX_ALTP2M; i++ )
353             if ( d->arch.altp2m_eptp[i] != mfn_x(INVALID_MFN) )
354             {
355                 struct p2m_domain *altp2m = d->arch.altp2m_p2m[i];
356 
357                 p2m_lock(altp2m);
358                 _memory_type_changed(altp2m);
359                 p2m_unlock(altp2m);
360             }
361     }
362 
363     p2m_unlock(hostp2m);
364 }
365 #endif
366 
p2m_set_ioreq_server(struct domain * d,unsigned int flags,struct hvm_ioreq_server * s)367 int p2m_set_ioreq_server(struct domain *d,
368                          unsigned int flags,
369                          struct hvm_ioreq_server *s)
370 {
371     struct p2m_domain *p2m = p2m_get_hostp2m(d);
372     int rc;
373 
374     /*
375      * Use lock to prevent concurrent setting attempts
376      * from multiple ioreq servers.
377      */
378     spin_lock(&p2m->ioreq.lock);
379 
380     /* Unmap ioreq server from p2m type by passing flags with 0. */
381     if ( flags == 0 )
382     {
383         rc = -EINVAL;
384         if ( p2m->ioreq.server != s )
385             goto out;
386 
387         p2m->ioreq.server = NULL;
388         p2m->ioreq.flags = 0;
389     }
390     else
391     {
392         rc = -EBUSY;
393         if ( p2m->ioreq.server != NULL )
394             goto out;
395 
396         /*
397          * It is possible that an ioreq server has just been unmapped,
398          * released the spin lock, with some p2m_ioreq_server entries
399          * in p2m table remained. We shall refuse another ioreq server
400          * mapping request in such case.
401          */
402         if ( read_atomic(&p2m->ioreq.entry_count) )
403             goto out;
404 
405         p2m->ioreq.server = s;
406         p2m->ioreq.flags = flags;
407     }
408 
409     rc = 0;
410 
411  out:
412     spin_unlock(&p2m->ioreq.lock);
413 
414     return rc;
415 }
416 
p2m_get_ioreq_server(struct domain * d,unsigned int * flags)417 struct hvm_ioreq_server *p2m_get_ioreq_server(struct domain *d,
418                                               unsigned int *flags)
419 {
420     struct p2m_domain *p2m = p2m_get_hostp2m(d);
421     struct hvm_ioreq_server *s;
422 
423     spin_lock(&p2m->ioreq.lock);
424 
425     s = p2m->ioreq.server;
426     *flags = p2m->ioreq.flags;
427 
428     spin_unlock(&p2m->ioreq.lock);
429     return s;
430 }
431 
p2m_enable_hardware_log_dirty(struct domain * d)432 void p2m_enable_hardware_log_dirty(struct domain *d)
433 {
434     struct p2m_domain *p2m = p2m_get_hostp2m(d);
435 
436     if ( p2m->enable_hardware_log_dirty )
437         p2m->enable_hardware_log_dirty(p2m);
438 }
439 
p2m_disable_hardware_log_dirty(struct domain * d)440 void p2m_disable_hardware_log_dirty(struct domain *d)
441 {
442     struct p2m_domain *p2m = p2m_get_hostp2m(d);
443 
444     if ( p2m->disable_hardware_log_dirty )
445         p2m->disable_hardware_log_dirty(p2m);
446 }
447 
p2m_flush_hardware_cached_dirty(struct domain * d)448 void p2m_flush_hardware_cached_dirty(struct domain *d)
449 {
450     struct p2m_domain *p2m = p2m_get_hostp2m(d);
451 
452     if ( p2m->flush_hardware_cached_dirty )
453     {
454         p2m_lock(p2m);
455         p2m->flush_hardware_cached_dirty(p2m);
456         p2m_unlock(p2m);
457     }
458 }
459 
460 /*
461  * Force a synchronous P2M TLB flush if a deferred flush is pending.
462  *
463  * Must be called with the p2m lock held.
464  */
p2m_tlb_flush_sync(struct p2m_domain * p2m)465 void p2m_tlb_flush_sync(struct p2m_domain *p2m)
466 {
467     if ( p2m->need_flush ) {
468         p2m->need_flush = 0;
469         p2m->tlb_flush(p2m);
470     }
471 }
472 
473 /*
474  * Unlock the p2m lock and do a P2M TLB flush if needed.
475  */
p2m_unlock_and_tlb_flush(struct p2m_domain * p2m)476 void p2m_unlock_and_tlb_flush(struct p2m_domain *p2m)
477 {
478     if ( p2m->need_flush ) {
479         p2m->need_flush = 0;
480         mm_write_unlock(&p2m->lock);
481         p2m->tlb_flush(p2m);
482     } else
483         mm_write_unlock(&p2m->lock);
484 }
485 
__get_gfn_type_access(struct p2m_domain * p2m,unsigned long gfn_l,p2m_type_t * t,p2m_access_t * a,p2m_query_t q,unsigned int * page_order,bool_t locked)486 mfn_t __get_gfn_type_access(struct p2m_domain *p2m, unsigned long gfn_l,
487                     p2m_type_t *t, p2m_access_t *a, p2m_query_t q,
488                     unsigned int *page_order, bool_t locked)
489 {
490     mfn_t mfn;
491     gfn_t gfn = _gfn(gfn_l);
492 
493     /* Unshare makes no sense withuot populate. */
494     if ( q & P2M_UNSHARE )
495         q |= P2M_ALLOC;
496 
497     if ( !p2m || !paging_mode_translate(p2m->domain) )
498     {
499         /* Not necessarily true, but for non-translated guests, we claim
500          * it's the most generic kind of memory */
501         *t = p2m_ram_rw;
502         return _mfn(gfn_l);
503     }
504 
505     if ( locked )
506         /* Grab the lock here, don't release until put_gfn */
507         gfn_lock(p2m, gfn, 0);
508 
509     mfn = p2m->get_entry(p2m, gfn, t, a, q, page_order, NULL);
510 
511     /* Check if we need to fork the page */
512     if ( (q & P2M_ALLOC) && p2m_is_hole(*t) &&
513          !mem_sharing_fork_page(p2m->domain, gfn, q & P2M_UNSHARE) )
514         mfn = p2m->get_entry(p2m, gfn, t, a, q, page_order, NULL);
515 
516     /* Check if we need to unshare the page */
517     if ( (q & P2M_UNSHARE) && p2m_is_shared(*t) )
518     {
519         ASSERT(p2m_is_hostp2m(p2m));
520         /*
521          * Try to unshare. If we fail, communicate ENOMEM without
522          * sleeping.
523          */
524         if ( mem_sharing_unshare_page(p2m->domain, gfn_l) < 0 )
525             mem_sharing_notify_enomem(p2m->domain, gfn_l, false);
526         mfn = p2m->get_entry(p2m, gfn, t, a, q, page_order, NULL);
527     }
528 
529     if (unlikely((p2m_is_broken(*t))))
530     {
531         /* Return invalid_mfn to avoid caller's access */
532         mfn = INVALID_MFN;
533         if ( q & P2M_ALLOC )
534             domain_crash(p2m->domain);
535     }
536 
537     return mfn;
538 }
539 
__put_gfn(struct p2m_domain * p2m,unsigned long gfn)540 void __put_gfn(struct p2m_domain *p2m, unsigned long gfn)
541 {
542     if ( !p2m || !paging_mode_translate(p2m->domain) )
543         /* Nothing to do in this case */
544         return;
545 
546     ASSERT(gfn_locked_by_me(p2m, gfn));
547 
548     gfn_unlock(p2m, gfn, 0);
549 }
550 
551 /* Atomically look up a GFN and take a reference count on the backing page. */
p2m_get_page_from_gfn(struct p2m_domain * p2m,gfn_t gfn,p2m_type_t * t,p2m_access_t * a,p2m_query_t q)552 struct page_info *p2m_get_page_from_gfn(
553     struct p2m_domain *p2m, gfn_t gfn,
554     p2m_type_t *t, p2m_access_t *a, p2m_query_t q)
555 {
556     struct page_info *page = NULL;
557     p2m_access_t _a;
558     p2m_type_t _t;
559     mfn_t mfn;
560 
561     /* Allow t or a to be NULL */
562     t = t ?: &_t;
563     a = a ?: &_a;
564 
565     if ( likely(!p2m_locked_by_me(p2m)) )
566     {
567         /* Fast path: look up and get out */
568         p2m_read_lock(p2m);
569         mfn = __get_gfn_type_access(p2m, gfn_x(gfn), t, a, 0, NULL, 0);
570         if ( p2m_is_any_ram(*t) && mfn_valid(mfn)
571              && !((q & P2M_UNSHARE) && p2m_is_shared(*t)) )
572         {
573             page = mfn_to_page(mfn);
574             if ( unlikely(p2m_is_foreign(*t)) )
575             {
576                 struct domain *fdom = page_get_owner_and_reference(page);
577 
578                 ASSERT(fdom != p2m->domain);
579                 if ( fdom == NULL )
580                     page = NULL;
581             }
582             else
583             {
584                 struct domain *d = !p2m_is_shared(*t) ? p2m->domain : dom_cow;
585 
586                 if ( !get_page(page, d) )
587                     page = NULL;
588             }
589         }
590         p2m_read_unlock(p2m);
591 
592         if ( page )
593             return page;
594 
595         /* Error path: not a suitable GFN at all */
596         if ( !p2m_is_ram(*t) && !p2m_is_paging(*t) && !p2m_is_pod(*t) &&
597              !mem_sharing_is_fork(p2m->domain) )
598             return NULL;
599     }
600 
601     /* Slow path: take the write lock and do fixups */
602     mfn = get_gfn_type_access(p2m, gfn_x(gfn), t, a, q, NULL);
603     if ( p2m_is_ram(*t) && mfn_valid(mfn) )
604     {
605         struct domain *d = !p2m_is_shared(*t) ? p2m->domain : dom_cow;
606 
607         page = mfn_to_page(mfn);
608         if ( !get_page(page, d) )
609             page = NULL;
610     }
611     put_gfn(p2m->domain, gfn_x(gfn));
612 
613     return page;
614 }
615 
616 /* Returns: 0 for success, -errno for failure */
p2m_set_entry(struct p2m_domain * p2m,gfn_t gfn,mfn_t mfn,unsigned int page_order,p2m_type_t p2mt,p2m_access_t p2ma)617 int p2m_set_entry(struct p2m_domain *p2m, gfn_t gfn, mfn_t mfn,
618                   unsigned int page_order, p2m_type_t p2mt, p2m_access_t p2ma)
619 {
620     struct domain *d = p2m->domain;
621     unsigned long todo = 1ul << page_order;
622     unsigned int order;
623     int set_rc, rc = 0;
624 
625     ASSERT(gfn_locked_by_me(p2m, gfn));
626 
627     while ( todo )
628     {
629         if ( hap_enabled(d) )
630         {
631             unsigned long fn_mask = !mfn_eq(mfn, INVALID_MFN) ? mfn_x(mfn) : 0;
632 
633             fn_mask |= gfn_x(gfn) | todo;
634 
635             order = (!(fn_mask & ((1ul << PAGE_ORDER_1G) - 1)) &&
636                      hap_has_1gb) ? PAGE_ORDER_1G :
637                     (!(fn_mask & ((1ul << PAGE_ORDER_2M) - 1)) &&
638                      hap_has_2mb) ? PAGE_ORDER_2M : PAGE_ORDER_4K;
639         }
640         else
641             order = 0;
642 
643         set_rc = p2m->set_entry(p2m, gfn, mfn, order, p2mt, p2ma, -1);
644         if ( set_rc )
645             rc = set_rc;
646 
647         gfn = gfn_add(gfn, 1ul << order);
648         if ( !mfn_eq(mfn, INVALID_MFN) )
649             mfn = mfn_add(mfn, 1ul << order);
650         todo -= 1ul << order;
651     }
652 
653     return rc;
654 }
655 
p2m_alloc_ptp(struct p2m_domain * p2m,unsigned int level)656 mfn_t p2m_alloc_ptp(struct p2m_domain *p2m, unsigned int level)
657 {
658     struct page_info *pg;
659 
660     ASSERT(p2m);
661     ASSERT(p2m->domain);
662     ASSERT(p2m->domain->arch.paging.alloc_page);
663     pg = p2m->domain->arch.paging.alloc_page(p2m->domain);
664     if ( !pg )
665         return INVALID_MFN;
666 
667     page_list_add_tail(pg, &p2m->pages);
668     BUILD_BUG_ON(PGT_l1_page_table * 2 != PGT_l2_page_table);
669     BUILD_BUG_ON(PGT_l1_page_table * 3 != PGT_l3_page_table);
670     BUILD_BUG_ON(PGT_l1_page_table * 4 != PGT_l4_page_table);
671     pg->u.inuse.type_info = (PGT_l1_page_table * level) | 1 | PGT_validated;
672 
673     return page_to_mfn(pg);
674 }
675 
p2m_free_ptp(struct p2m_domain * p2m,struct page_info * pg)676 void p2m_free_ptp(struct p2m_domain *p2m, struct page_info *pg)
677 {
678     ASSERT(pg);
679     ASSERT(p2m);
680     ASSERT(p2m->domain);
681     ASSERT(p2m->domain->arch.paging.free_page);
682 
683     page_list_del(pg, &p2m->pages);
684     p2m->domain->arch.paging.free_page(p2m->domain, pg);
685 
686     return;
687 }
688 
689 /*
690  * Allocate a new p2m table for a domain.
691  *
692  * The structure of the p2m table is that of a pagetable for xen (i.e. it is
693  * controlled by CONFIG_PAGING_LEVELS).
694  *
695  * Returns 0 for success, -errno for failure.
696  */
p2m_alloc_table(struct p2m_domain * p2m)697 int p2m_alloc_table(struct p2m_domain *p2m)
698 {
699     mfn_t top_mfn;
700     struct domain *d = p2m->domain;
701 
702     p2m_lock(p2m);
703 
704     if ( p2m_is_hostp2m(p2m) && domain_tot_pages(d) )
705     {
706         P2M_ERROR("dom %d already has memory allocated\n", d->domain_id);
707         p2m_unlock(p2m);
708         return -EINVAL;
709     }
710 
711     if ( pagetable_get_pfn(p2m_get_pagetable(p2m)) != 0 )
712     {
713         P2M_ERROR("p2m already allocated for this domain\n");
714         p2m_unlock(p2m);
715         return -EINVAL;
716     }
717 
718     P2M_PRINTK("allocating p2m table\n");
719 
720     top_mfn = p2m_alloc_ptp(p2m, 4);
721     if ( mfn_eq(top_mfn, INVALID_MFN) )
722     {
723         p2m_unlock(p2m);
724         return -ENOMEM;
725     }
726 
727     p2m->phys_table = pagetable_from_mfn(top_mfn);
728 
729     if ( hap_enabled(d) )
730         iommu_share_p2m_table(d);
731 
732     p2m_unlock(p2m);
733     return 0;
734 }
735 
736 /*
737  * hvm fixme: when adding support for pvh non-hardware domains, this path must
738  * cleanup any foreign p2m types (release refcnts on them).
739  */
p2m_teardown(struct p2m_domain * p2m)740 void p2m_teardown(struct p2m_domain *p2m)
741 /* Return all the p2m pages to Xen.
742  * We know we don't have any extra mappings to these pages */
743 {
744     struct page_info *pg;
745     struct domain *d;
746 
747     if (p2m == NULL)
748         return;
749 
750     d = p2m->domain;
751 
752     p2m_lock(p2m);
753     ASSERT(atomic_read(&d->shr_pages) == 0);
754     p2m->phys_table = pagetable_null();
755 
756     while ( (pg = page_list_remove_head(&p2m->pages)) )
757         d->arch.paging.free_page(d, pg);
758     p2m_unlock(p2m);
759 }
760 
p2m_final_teardown(struct domain * d)761 void p2m_final_teardown(struct domain *d)
762 {
763 #ifdef CONFIG_HVM
764     /*
765      * We must teardown both of them unconditionally because
766      * we initialise them unconditionally.
767      */
768     p2m_teardown_altp2m(d);
769     p2m_teardown_nestedp2m(d);
770 #endif
771 
772     /* Iterate over all p2m tables per domain */
773     p2m_teardown_hostp2m(d);
774 }
775 
776 static int __must_check
p2m_remove_page(struct p2m_domain * p2m,gfn_t gfn,mfn_t mfn,unsigned int page_order)777 p2m_remove_page(struct p2m_domain *p2m, gfn_t gfn, mfn_t mfn,
778                 unsigned int page_order)
779 {
780     unsigned long i;
781     p2m_type_t t;
782     p2m_access_t a;
783 
784     /* IOMMU for PV guests is handled in get_page_type() and put_page(). */
785     if ( !paging_mode_translate(p2m->domain) )
786         return 0;
787 
788     ASSERT(gfn_locked_by_me(p2m, gfn));
789     P2M_DEBUG("removing gfn=%#lx mfn=%#lx\n", gfn_x(gfn), mfn_x(mfn));
790 
791     for ( i = 0; i < (1UL << page_order); )
792     {
793         unsigned int cur_order;
794         mfn_t mfn_return = p2m->get_entry(p2m, gfn_add(gfn, i), &t, &a, 0,
795                                           &cur_order, NULL);
796 
797         if ( p2m_is_valid(t) &&
798              (!mfn_valid(mfn) || !mfn_eq(mfn_add(mfn, i), mfn_return)) )
799             return -EILSEQ;
800 
801         i += (1UL << cur_order) -
802              ((gfn_x(gfn) + i) & ((1UL << cur_order) - 1));
803     }
804 
805     if ( mfn_valid(mfn) )
806     {
807         for ( i = 0; i < (1UL << page_order); i++ )
808         {
809             p2m->get_entry(p2m, gfn_add(gfn, i), &t, &a, 0, NULL, NULL);
810             if ( !p2m_is_grant(t) && !p2m_is_shared(t) && !p2m_is_foreign(t) )
811                 set_gpfn_from_mfn(mfn_x(mfn) + i, INVALID_M2P_ENTRY);
812         }
813     }
814 
815     return p2m_set_entry(p2m, gfn, INVALID_MFN, page_order, p2m_invalid,
816                          p2m->default_access);
817 }
818 
819 int
guest_physmap_remove_page(struct domain * d,gfn_t gfn,mfn_t mfn,unsigned int page_order)820 guest_physmap_remove_page(struct domain *d, gfn_t gfn,
821                           mfn_t mfn, unsigned int page_order)
822 {
823     struct p2m_domain *p2m = p2m_get_hostp2m(d);
824     int rc;
825 
826     gfn_lock(p2m, gfn, page_order);
827     rc = p2m_remove_page(p2m, gfn, mfn, page_order);
828     gfn_unlock(p2m, gfn, page_order);
829 
830     return rc;
831 }
832 
833 int
guest_physmap_add_page(struct domain * d,gfn_t gfn,mfn_t mfn,unsigned int page_order)834 guest_physmap_add_page(struct domain *d, gfn_t gfn, mfn_t mfn,
835                        unsigned int page_order)
836 {
837     /* IOMMU for PV guests is handled in get_page_type() and put_page(). */
838     if ( !paging_mode_translate(d) )
839     {
840         struct page_info *page = mfn_to_page(mfn);
841         unsigned long i;
842 
843         /*
844          * Our interface for PV guests wrt IOMMU entries hasn't been very
845          * clear; but historically, pages have started out with IOMMU mappings,
846          * and only lose them when changed to a different page type.
847          *
848          * Retain this property by grabbing a writable type ref and then
849          * dropping it immediately.  The result will be pages that have a
850          * writable type (and an IOMMU entry), but a count of 0 (such that
851          * any guest-requested type changes succeed and remove the IOMMU
852          * entry).
853          */
854         for ( i = 0; i < (1UL << page_order); ++i, ++page )
855         {
856             if ( !need_iommu_pt_sync(d) )
857                 /* nothing */;
858             else if ( get_page_and_type(page, d, PGT_writable_page) )
859                 put_page_and_type(page);
860             else
861                 return -EINVAL;
862 
863             set_gpfn_from_mfn(mfn_x(mfn) + i, gfn_x(gfn) + i);
864         }
865 
866         return 0;
867     }
868 
869     return guest_physmap_add_entry(d, gfn, mfn, page_order, p2m_ram_rw);
870 }
871 
872 #ifdef CONFIG_HVM
873 int
guest_physmap_add_entry(struct domain * d,gfn_t gfn,mfn_t mfn,unsigned int page_order,p2m_type_t t)874 guest_physmap_add_entry(struct domain *d, gfn_t gfn, mfn_t mfn,
875                         unsigned int page_order, p2m_type_t t)
876 {
877     struct p2m_domain *p2m = p2m_get_hostp2m(d);
878     unsigned long i;
879     gfn_t ogfn;
880     p2m_type_t ot;
881     p2m_access_t a;
882     mfn_t omfn;
883     int pod_count = 0;
884     int rc = 0;
885 
886     if ( !paging_mode_translate(d) )
887     {
888         ASSERT_UNREACHABLE();
889         return -EPERM;
890     }
891 
892     /* foreign pages are added thru p2m_add_foreign */
893     if ( p2m_is_foreign(t) )
894         return -EINVAL;
895 
896     if ( !mfn_valid(mfn) )
897     {
898         ASSERT_UNREACHABLE();
899         return -EINVAL;
900     }
901 
902     p2m_lock(p2m);
903 
904     P2M_DEBUG("adding gfn=%#lx mfn=%#lx\n", gfn_x(gfn), mfn_x(mfn));
905 
906     /* First, remove m->p mappings for existing p->m mappings */
907     for ( i = 0; i < (1UL << page_order); i++ )
908     {
909         omfn = p2m->get_entry(p2m, gfn_add(gfn, i), &ot,
910                               &a, 0, NULL, NULL);
911         if ( p2m_is_shared(ot) )
912         {
913             /* Do an unshare to cleanly take care of all corner cases. */
914             rc = mem_sharing_unshare_page(d, gfn_x(gfn) + i);
915             if ( rc )
916             {
917                 p2m_unlock(p2m);
918                 /*
919                  * NOTE: Should a guest domain bring this upon itself,
920                  * there is not a whole lot we can do. We are buried
921                  * deep in locks from most code paths by now. So, fail
922                  * the call and don't try to sleep on a wait queue
923                  * while placing the mem event.
924                  *
925                  * However, all current (changeset 3432abcf9380) code
926                  * paths avoid this unsavoury situation. For now.
927                  *
928                  * Foreign domains are okay to place an event as they
929                  * won't go to sleep.
930                  */
931                 mem_sharing_notify_enomem(d, gfn_x(gfn) + i, false);
932                 return rc;
933             }
934             omfn = p2m->get_entry(p2m, gfn_add(gfn, i),
935                                   &ot, &a, 0, NULL, NULL);
936             ASSERT(!p2m_is_shared(ot));
937         }
938         if ( p2m_is_grant(ot) || p2m_is_foreign(ot) )
939         {
940             /* Really shouldn't be unmapping grant/foreign maps this way */
941             domain_crash(d);
942             p2m_unlock(p2m);
943 
944             return -EINVAL;
945         }
946         else if ( p2m_is_ram(ot) && !p2m_is_paged(ot) )
947         {
948             ASSERT(mfn_valid(omfn));
949             set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
950         }
951         else if ( ot == p2m_populate_on_demand )
952         {
953             /* Count how man PoD entries we'll be replacing if successful */
954             pod_count++;
955         }
956         else if ( p2m_is_paging(ot) && (ot != p2m_ram_paging_out) )
957         {
958             /* We're plugging a hole in the physmap where a paged out page was */
959             atomic_dec(&d->paged_pages);
960         }
961     }
962 
963     /* Then, look for m->p mappings for this range and deal with them */
964     for ( i = 0; i < (1UL << page_order); i++ )
965     {
966         if ( dom_cow &&
967              page_get_owner(mfn_to_page(mfn_add(mfn, i))) == dom_cow )
968         {
969             /* This is no way to add a shared page to your physmap! */
970             gdprintk(XENLOG_ERR, "Adding shared mfn %lx directly to dom%d physmap not allowed.\n",
971                      mfn_x(mfn_add(mfn, i)), d->domain_id);
972             p2m_unlock(p2m);
973             return -EINVAL;
974         }
975         if ( page_get_owner(mfn_to_page(mfn_add(mfn, i))) != d )
976             continue;
977         ogfn = mfn_to_gfn(d, mfn_add(mfn, i));
978         if ( !gfn_eq(ogfn, _gfn(INVALID_M2P_ENTRY)) &&
979              !gfn_eq(ogfn, gfn_add(gfn, i)) )
980         {
981             /* This machine frame is already mapped at another physical
982              * address */
983             P2M_DEBUG("aliased! mfn=%#lx, old gfn=%#lx, new gfn=%#lx\n",
984                       mfn_x(mfn_add(mfn, i)), gfn_x(ogfn),
985                       gfn_x(gfn_add(gfn, i)));
986             omfn = p2m->get_entry(p2m, ogfn, &ot, &a, 0, NULL, NULL);
987             if ( p2m_is_ram(ot) && !p2m_is_paged(ot) )
988             {
989                 ASSERT(mfn_valid(omfn));
990                 P2M_DEBUG("old gfn=%#lx -> mfn %#lx\n",
991                           gfn_x(ogfn) , mfn_x(omfn));
992                 if ( mfn_eq(omfn, mfn_add(mfn, i)) &&
993                      (rc = p2m_remove_page(p2m, ogfn, omfn, 0)) )
994                     goto out;
995             }
996         }
997     }
998 
999     /* Now, actually do the two-way mapping */
1000     rc = p2m_set_entry(p2m, gfn, mfn, page_order, t, p2m->default_access);
1001     if ( rc == 0 )
1002     {
1003         pod_lock(p2m);
1004         p2m->pod.entry_count -= pod_count;
1005         BUG_ON(p2m->pod.entry_count < 0);
1006         pod_unlock(p2m);
1007 
1008         if ( !p2m_is_grant(t) )
1009         {
1010             for ( i = 0; i < (1UL << page_order); i++ )
1011                 set_gpfn_from_mfn(mfn_x(mfn_add(mfn, i)),
1012                                   gfn_x(gfn_add(gfn, i)));
1013         }
1014     }
1015 
1016 out:
1017     p2m_unlock(p2m);
1018 
1019     return rc;
1020 }
1021 #endif
1022 
1023 /*
1024  * Modify the p2m type of a single gfn from ot to nt.
1025  * Returns: 0 for success, -errno for failure.
1026  * Resets the access permissions.
1027  */
p2m_change_type_one(struct domain * d,unsigned long gfn_l,p2m_type_t ot,p2m_type_t nt)1028 int p2m_change_type_one(struct domain *d, unsigned long gfn_l,
1029                        p2m_type_t ot, p2m_type_t nt)
1030 {
1031     p2m_access_t a;
1032     p2m_type_t pt;
1033     gfn_t gfn = _gfn(gfn_l);
1034     mfn_t mfn;
1035     struct p2m_domain *p2m = p2m_get_hostp2m(d);
1036     int rc;
1037 
1038     BUG_ON(p2m_is_grant(ot) || p2m_is_grant(nt));
1039     BUG_ON(p2m_is_foreign(ot) || p2m_is_foreign(nt));
1040 
1041     gfn_lock(p2m, gfn, 0);
1042 
1043     mfn = p2m->get_entry(p2m, gfn, &pt, &a, 0, NULL, NULL);
1044     rc = likely(pt == ot)
1045          ? p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, nt,
1046                          p2m->default_access)
1047          : -EBUSY;
1048 
1049     gfn_unlock(p2m, gfn, 0);
1050 
1051     return rc;
1052 }
1053 
1054 /* Modify the p2m type of [start, end_exclusive) from ot to nt. */
change_type_range(struct p2m_domain * p2m,unsigned long start,unsigned long end_exclusive,p2m_type_t ot,p2m_type_t nt)1055 static void change_type_range(struct p2m_domain *p2m,
1056                               unsigned long start, unsigned long end_exclusive,
1057                               p2m_type_t ot, p2m_type_t nt)
1058 {
1059     unsigned long invalidate_start, invalidate_end;
1060     struct domain *d = p2m->domain;
1061     const unsigned long host_max_pfn = p2m_get_hostp2m(d)->max_mapped_pfn;
1062     unsigned long end = end_exclusive - 1;
1063     const unsigned long max_pfn = p2m->max_mapped_pfn;
1064     int rc = 0;
1065 
1066     /*
1067      * If we have an altp2m, the logdirty rangeset range needs to
1068      * match that of the hostp2m, but for efficiency, we want to clip
1069      * down the the invalidation range according to the mapped values
1070      * in the altp2m. Keep track of and clip the ranges separately.
1071      */
1072     invalidate_start = start;
1073     invalidate_end   = end;
1074 
1075     /*
1076      * Clip down to the host p2m. This is probably not the right behavior.
1077      * This should be revisited later, but for now post a warning.
1078      */
1079     if ( unlikely(end > host_max_pfn) )
1080     {
1081         printk(XENLOG_G_WARNING "Dom%d logdirty rangeset clipped to max_mapped_pfn\n",
1082                d->domain_id);
1083         end = invalidate_end = host_max_pfn;
1084     }
1085 
1086     /* If the requested range is out of scope, return doing nothing. */
1087     if ( start > end )
1088         return;
1089 
1090     if ( p2m_is_altp2m(p2m) )
1091         invalidate_end = min(invalidate_end, max_pfn);
1092 
1093     /*
1094      * If the p2m is empty, or the range is outside the currently
1095      * mapped range, no need to do the invalidation; just update the
1096      * rangeset.
1097      */
1098     if ( invalidate_start < invalidate_end )
1099     {
1100         /*
1101          * If all valid gfns are in the invalidation range, just do a
1102          * global type change. Otherwise, invalidate only the range
1103          * we need.
1104          *
1105          * NB that invalidate_end can't logically be >max_pfn at this
1106          * point. If this changes, the == will need to be changed to
1107          * >=.
1108          */
1109         ASSERT(invalidate_end <= max_pfn);
1110         if ( !invalidate_start && invalidate_end == max_pfn)
1111             p2m->change_entry_type_global(p2m, ot, nt);
1112         else
1113             rc = p2m->change_entry_type_range(p2m, ot, nt,
1114                                               invalidate_start, invalidate_end);
1115         if ( rc )
1116         {
1117             printk(XENLOG_G_ERR "Error %d changing Dom%d GFNs [%lx,%lx] from %d to %d\n",
1118                    rc, d->domain_id, invalidate_start, invalidate_end, ot, nt);
1119             domain_crash(d);
1120         }
1121     }
1122 
1123     switch ( nt )
1124     {
1125     case p2m_ram_rw:
1126         if ( ot == p2m_ram_logdirty )
1127             rc = rangeset_remove_range(p2m->logdirty_ranges, start, end);
1128         break;
1129     case p2m_ram_logdirty:
1130         if ( ot == p2m_ram_rw )
1131             rc = rangeset_add_range(p2m->logdirty_ranges, start, end);
1132         break;
1133     default:
1134         break;
1135     }
1136     if ( rc )
1137     {
1138         printk(XENLOG_G_ERR "Error %d manipulating Dom%d's log-dirty ranges\n",
1139                rc, d->domain_id);
1140         domain_crash(d);
1141     }
1142 }
1143 
p2m_change_type_range(struct domain * d,unsigned long start,unsigned long end,p2m_type_t ot,p2m_type_t nt)1144 void p2m_change_type_range(struct domain *d,
1145                            unsigned long start, unsigned long end,
1146                            p2m_type_t ot, p2m_type_t nt)
1147 {
1148     struct p2m_domain *hostp2m = p2m_get_hostp2m(d);
1149 
1150     ASSERT(ot != nt);
1151     ASSERT(p2m_is_changeable(ot) && p2m_is_changeable(nt));
1152 
1153     p2m_lock(hostp2m);
1154     hostp2m->defer_nested_flush = 1;
1155 
1156     change_type_range(hostp2m, start, end, ot, nt);
1157 
1158 #ifdef CONFIG_HVM
1159     if ( unlikely(altp2m_active(d)) )
1160     {
1161         unsigned int i;
1162 
1163         for ( i = 0; i < MAX_ALTP2M; i++ )
1164             if ( d->arch.altp2m_eptp[i] != mfn_x(INVALID_MFN) )
1165             {
1166                 struct p2m_domain *altp2m = d->arch.altp2m_p2m[i];
1167 
1168                 p2m_lock(altp2m);
1169                 change_type_range(altp2m, start, end, ot, nt);
1170                 p2m_unlock(altp2m);
1171             }
1172     }
1173 #endif
1174     hostp2m->defer_nested_flush = 0;
1175     if ( nestedhvm_enabled(d) )
1176         p2m_flush_nestedp2m(d);
1177 
1178     p2m_unlock(hostp2m);
1179 }
1180 
1181 /*
1182  * Finish p2m type change for gfns which are marked as need_recalc in a range.
1183  * Uses the current p2m's max_mapped_pfn to further clip the invalidation
1184  * range for alternate p2ms.
1185  * Returns: 0 for success, negative for failure
1186  */
finish_type_change(struct p2m_domain * p2m,gfn_t first_gfn,unsigned long max_nr)1187 static int finish_type_change(struct p2m_domain *p2m,
1188                               gfn_t first_gfn, unsigned long max_nr)
1189 {
1190     unsigned long gfn = gfn_x(first_gfn);
1191     unsigned long last_gfn = gfn + max_nr - 1;
1192     int rc = 0;
1193 
1194     last_gfn = min(last_gfn, p2m->max_mapped_pfn);
1195     while ( gfn <= last_gfn )
1196     {
1197         rc = p2m->recalc(p2m, gfn);
1198         /*
1199          * ept->recalc could return 0/1/-ENOMEM. pt->recalc could return
1200          * 0/1/-ENOMEM/-ENOENT, -ENOENT isn't an error as we are looping
1201          * gfn here. If rc is 1 we need to have it 0 for success.
1202          */
1203         if ( rc == -ENOENT || rc > 0 )
1204             rc = 0;
1205         else if ( rc < 0 )
1206         {
1207             gdprintk(XENLOG_ERR, "p2m->recalc failed! Dom%d gfn=%lx\n",
1208                      p2m->domain->domain_id, gfn);
1209             break;
1210         }
1211 
1212         gfn++;
1213     }
1214 
1215     return rc;
1216 }
1217 
p2m_finish_type_change(struct domain * d,gfn_t first_gfn,unsigned long max_nr)1218 int p2m_finish_type_change(struct domain *d,
1219                            gfn_t first_gfn, unsigned long max_nr)
1220 {
1221     struct p2m_domain *hostp2m = p2m_get_hostp2m(d);
1222     int rc;
1223 
1224     p2m_lock(hostp2m);
1225 
1226     rc = finish_type_change(hostp2m, first_gfn, max_nr);
1227 
1228     if ( rc < 0 )
1229         goto out;
1230 
1231 #ifdef CONFIG_HVM
1232     if ( unlikely(altp2m_active(d)) )
1233     {
1234         unsigned int i;
1235 
1236         for ( i = 0; i < MAX_ALTP2M; i++ )
1237             if ( d->arch.altp2m_eptp[i] != mfn_x(INVALID_MFN) )
1238             {
1239                 struct p2m_domain *altp2m = d->arch.altp2m_p2m[i];
1240 
1241                 p2m_lock(altp2m);
1242                 rc = finish_type_change(altp2m, first_gfn, max_nr);
1243                 p2m_unlock(altp2m);
1244 
1245                 if ( rc < 0 )
1246                     goto out;
1247             }
1248     }
1249 #endif
1250 
1251  out:
1252     p2m_unlock(hostp2m);
1253 
1254     return rc;
1255 }
1256 
1257 /*
1258  * Returns:
1259  *    0              for success
1260  *    -errno         for failure
1261  *    1 + new order  for caller to retry with smaller order (guaranteed
1262  *                   to be smaller than order passed in)
1263  */
set_typed_p2m_entry(struct domain * d,unsigned long gfn_l,mfn_t mfn,unsigned int order,p2m_type_t gfn_p2mt,p2m_access_t access)1264 static int set_typed_p2m_entry(struct domain *d, unsigned long gfn_l,
1265                                mfn_t mfn, unsigned int order,
1266                                p2m_type_t gfn_p2mt, p2m_access_t access)
1267 {
1268     int rc = 0;
1269     p2m_access_t a;
1270     p2m_type_t ot;
1271     mfn_t omfn;
1272     gfn_t gfn = _gfn(gfn_l);
1273     unsigned int cur_order = 0;
1274     struct p2m_domain *p2m = p2m_get_hostp2m(d);
1275 
1276     if ( !paging_mode_translate(d) )
1277         return -EIO;
1278 
1279     gfn_lock(p2m, gfn, order);
1280     omfn = p2m->get_entry(p2m, gfn, &ot, &a, 0, &cur_order, NULL);
1281     if ( cur_order < order )
1282     {
1283         gfn_unlock(p2m, gfn, order);
1284         return cur_order + 1;
1285     }
1286     if ( p2m_is_grant(ot) || p2m_is_foreign(ot) )
1287     {
1288         gfn_unlock(p2m, gfn, order);
1289         domain_crash(d);
1290         return -ENOENT;
1291     }
1292     else if ( p2m_is_ram(ot) )
1293     {
1294         unsigned long i;
1295 
1296         for ( i = 0; i < (1UL << order); ++i )
1297         {
1298             ASSERT(mfn_valid(mfn_add(omfn, i)));
1299             set_gpfn_from_mfn(mfn_x(omfn) + i, INVALID_M2P_ENTRY);
1300         }
1301     }
1302 
1303     P2M_DEBUG("set %d %lx %lx\n", gfn_p2mt, gfn_l, mfn_x(mfn));
1304     rc = p2m_set_entry(p2m, gfn, mfn, order, gfn_p2mt, access);
1305     if ( rc )
1306         gdprintk(XENLOG_ERR, "p2m_set_entry: %#lx:%u -> %d (0x%"PRI_mfn")\n",
1307                  gfn_l, order, rc, mfn_x(mfn));
1308 #ifdef CONFIG_HVM
1309     else if ( p2m_is_pod(ot) )
1310     {
1311         pod_lock(p2m);
1312         p2m->pod.entry_count -= 1UL << order;
1313         BUG_ON(p2m->pod.entry_count < 0);
1314         pod_unlock(p2m);
1315     }
1316 #endif
1317     gfn_unlock(p2m, gfn, order);
1318 
1319     return rc;
1320 }
1321 
1322 /* Set foreign mfn in the given guest's p2m table. */
set_foreign_p2m_entry(struct domain * d,unsigned long gfn,mfn_t mfn)1323 int set_foreign_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn)
1324 {
1325     return set_typed_p2m_entry(d, gfn, mfn, PAGE_ORDER_4K, p2m_map_foreign,
1326                                p2m_get_hostp2m(d)->default_access);
1327 }
1328 
set_mmio_p2m_entry(struct domain * d,gfn_t gfn,mfn_t mfn,unsigned int order)1329 int set_mmio_p2m_entry(struct domain *d, gfn_t gfn, mfn_t mfn,
1330                        unsigned int order)
1331 {
1332     if ( order > PAGE_ORDER_4K &&
1333          rangeset_overlaps_range(mmio_ro_ranges, mfn_x(mfn),
1334                                  mfn_x(mfn) + (1UL << order) - 1) )
1335         return PAGE_ORDER_4K + 1;
1336 
1337     return set_typed_p2m_entry(d, gfn_x(gfn), mfn, order, p2m_mmio_direct,
1338                                p2m_get_hostp2m(d)->default_access);
1339 }
1340 
set_identity_p2m_entry(struct domain * d,unsigned long gfn_l,p2m_access_t p2ma,unsigned int flag)1341 int set_identity_p2m_entry(struct domain *d, unsigned long gfn_l,
1342                            p2m_access_t p2ma, unsigned int flag)
1343 {
1344     p2m_type_t p2mt;
1345     p2m_access_t a;
1346     gfn_t gfn = _gfn(gfn_l);
1347     mfn_t mfn;
1348     struct p2m_domain *p2m = p2m_get_hostp2m(d);
1349     int ret;
1350 
1351     if ( !paging_mode_translate(p2m->domain) )
1352     {
1353         if ( !is_iommu_enabled(d) )
1354             return 0;
1355         return iommu_legacy_map(d, _dfn(gfn_l), _mfn(gfn_l), PAGE_ORDER_4K,
1356                                 IOMMUF_readable | IOMMUF_writable);
1357     }
1358 
1359     gfn_lock(p2m, gfn, 0);
1360 
1361     mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, 0, NULL, NULL);
1362 
1363     if ( p2mt == p2m_invalid || p2mt == p2m_mmio_dm )
1364         ret = p2m_set_entry(p2m, gfn, _mfn(gfn_l), PAGE_ORDER_4K,
1365                             p2m_mmio_direct, p2ma);
1366     else if ( mfn_x(mfn) == gfn_l && p2mt == p2m_mmio_direct && a == p2ma )
1367         ret = 0;
1368     else
1369     {
1370         if ( flag & XEN_DOMCTL_DEV_RDM_RELAXED )
1371             ret = 0;
1372         else
1373             ret = -EBUSY;
1374         printk(XENLOG_G_WARNING
1375                "Cannot setup identity map d%d:%lx,"
1376                " gfn already mapped to %lx.\n",
1377                d->domain_id, gfn_l, mfn_x(mfn));
1378     }
1379 
1380     gfn_unlock(p2m, gfn, 0);
1381     return ret;
1382 }
1383 
1384 /*
1385  * Returns:
1386  *    0        for success
1387  *    -errno   for failure
1388  *    order+1  for caller to retry with order (guaranteed smaller than
1389  *             the order value passed in)
1390  */
clear_mmio_p2m_entry(struct domain * d,unsigned long gfn_l,mfn_t mfn,unsigned int order)1391 int clear_mmio_p2m_entry(struct domain *d, unsigned long gfn_l, mfn_t mfn,
1392                          unsigned int order)
1393 {
1394     int rc = -EINVAL;
1395     gfn_t gfn = _gfn(gfn_l);
1396     mfn_t actual_mfn;
1397     p2m_access_t a;
1398     p2m_type_t t;
1399     unsigned int cur_order = 0;
1400     struct p2m_domain *p2m = p2m_get_hostp2m(d);
1401 
1402     if ( !paging_mode_translate(d) )
1403         return -EIO;
1404 
1405     gfn_lock(p2m, gfn, order);
1406     actual_mfn = p2m->get_entry(p2m, gfn, &t, &a, 0, &cur_order, NULL);
1407     if ( cur_order < order )
1408     {
1409         rc = cur_order + 1;
1410         goto out;
1411     }
1412 
1413     /* Do not use mfn_valid() here as it will usually fail for MMIO pages. */
1414     if ( mfn_eq(actual_mfn, INVALID_MFN) || (t != p2m_mmio_direct) )
1415     {
1416         gdprintk(XENLOG_ERR,
1417                  "gfn_to_mfn failed! gfn=%08lx type:%d\n", gfn_l, t);
1418         goto out;
1419     }
1420     if ( !mfn_eq(mfn, actual_mfn) )
1421         gdprintk(XENLOG_WARNING,
1422                  "no mapping between mfn %08lx and gfn %08lx\n",
1423                  mfn_x(mfn), gfn_l);
1424     rc = p2m_set_entry(p2m, gfn, INVALID_MFN, order, p2m_invalid,
1425                        p2m->default_access);
1426 
1427  out:
1428     gfn_unlock(p2m, gfn, order);
1429 
1430     return rc;
1431 }
1432 
clear_identity_p2m_entry(struct domain * d,unsigned long gfn_l)1433 int clear_identity_p2m_entry(struct domain *d, unsigned long gfn_l)
1434 {
1435     p2m_type_t p2mt;
1436     p2m_access_t a;
1437     gfn_t gfn = _gfn(gfn_l);
1438     mfn_t mfn;
1439     struct p2m_domain *p2m = p2m_get_hostp2m(d);
1440     int ret;
1441 
1442     if ( !paging_mode_translate(d) )
1443     {
1444         if ( !is_iommu_enabled(d) )
1445             return 0;
1446         return iommu_legacy_unmap(d, _dfn(gfn_l), PAGE_ORDER_4K);
1447     }
1448 
1449     gfn_lock(p2m, gfn, 0);
1450 
1451     mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, 0, NULL, NULL);
1452     if ( p2mt == p2m_mmio_direct && mfn_x(mfn) == gfn_l )
1453     {
1454         ret = p2m_set_entry(p2m, gfn, INVALID_MFN, PAGE_ORDER_4K,
1455                             p2m_invalid, p2m->default_access);
1456         gfn_unlock(p2m, gfn, 0);
1457     }
1458     else
1459     {
1460         gfn_unlock(p2m, gfn, 0);
1461         printk(XENLOG_G_WARNING
1462                "non-identity map d%d:%lx not cleared (mapped to %lx)\n",
1463                d->domain_id, gfn_l, mfn_x(mfn));
1464         ret = 0;
1465     }
1466 
1467     return ret;
1468 }
1469 
1470 /* Returns: 0 for success, -errno for failure */
set_shared_p2m_entry(struct domain * d,unsigned long gfn_l,mfn_t mfn)1471 int set_shared_p2m_entry(struct domain *d, unsigned long gfn_l, mfn_t mfn)
1472 {
1473     struct p2m_domain *p2m = p2m_get_hostp2m(d);
1474     int rc = 0;
1475     gfn_t gfn = _gfn(gfn_l);
1476     p2m_access_t a;
1477     p2m_type_t ot;
1478     mfn_t omfn;
1479     unsigned long pg_type;
1480 
1481     if ( !paging_mode_translate(p2m->domain) )
1482         return -EIO;
1483 
1484     gfn_lock(p2m, gfn, 0);
1485     omfn = p2m->get_entry(p2m, gfn, &ot, &a, 0, NULL, NULL);
1486     /* At the moment we only allow p2m change if gfn has already been made
1487      * sharable first */
1488     ASSERT(p2m_is_shared(ot));
1489     ASSERT(mfn_valid(omfn));
1490     /* Set the m2p entry to invalid only if there are no further type
1491      * refs to this page as shared */
1492     pg_type = read_atomic(&(mfn_to_page(omfn)->u.inuse.type_info));
1493     if ( (pg_type & PGT_count_mask) == 0
1494          || (pg_type & PGT_type_mask) != PGT_shared_page )
1495         set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
1496 
1497     P2M_DEBUG("set shared %lx %lx\n", gfn_l, mfn_x(mfn));
1498     rc = p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2m_ram_shared,
1499                        p2m->default_access);
1500     gfn_unlock(p2m, gfn, 0);
1501     if ( rc )
1502         gdprintk(XENLOG_ERR,
1503                  "p2m_set_entry failed! mfn=%08lx rc:%d\n",
1504                  mfn_x(get_gfn_query_unlocked(p2m->domain, gfn_l, &ot)), rc);
1505     return rc;
1506 }
1507 
1508 #ifdef CONFIG_HVM
1509 static struct p2m_domain *
p2m_getlru_nestedp2m(struct domain * d,struct p2m_domain * p2m)1510 p2m_getlru_nestedp2m(struct domain *d, struct p2m_domain *p2m)
1511 {
1512     struct list_head *lru_list = &p2m_get_hostp2m(d)->np2m_list;
1513 
1514     ASSERT(!list_empty(lru_list));
1515 
1516     if ( p2m == NULL )
1517         p2m = list_entry(lru_list->prev, struct p2m_domain, np2m_list);
1518 
1519     list_move(&p2m->np2m_list, lru_list);
1520 
1521     return p2m;
1522 }
1523 
1524 static void
p2m_flush_table_locked(struct p2m_domain * p2m)1525 p2m_flush_table_locked(struct p2m_domain *p2m)
1526 {
1527     struct page_info *top, *pg;
1528     struct domain *d = p2m->domain;
1529     mfn_t mfn;
1530 
1531     ASSERT(p2m_locked_by_me(p2m));
1532 
1533     /*
1534      * "Host" p2m tables can have shared entries &c that need a bit more care
1535      * when discarding them.
1536      */
1537     ASSERT(!p2m_is_hostp2m(p2m));
1538 #ifdef CONFIG_HVM
1539     /* Nested p2m's do not do pod, hence the asserts (and no pod lock)*/
1540     ASSERT(page_list_empty(&p2m->pod.super));
1541     ASSERT(page_list_empty(&p2m->pod.single));
1542 #endif
1543 
1544     /* No need to flush if it's already empty */
1545     if ( p2m_is_nestedp2m(p2m) && p2m->np2m_base == P2M_BASE_EADDR )
1546         return;
1547 
1548     /* This is no longer a valid nested p2m for any address space */
1549     p2m->np2m_base = P2M_BASE_EADDR;
1550     p2m->np2m_generation++;
1551 
1552     /* Make sure nobody else is using this p2m table */
1553     if ( nestedhvm_enabled(d) )
1554         nestedhvm_vmcx_flushtlb(p2m);
1555 
1556     /* Zap the top level of the trie */
1557     mfn = pagetable_get_mfn(p2m_get_pagetable(p2m));
1558     clear_domain_page(mfn);
1559 
1560     /* Free the rest of the trie pages back to the paging pool */
1561     top = mfn_to_page(mfn);
1562     while ( (pg = page_list_remove_head(&p2m->pages)) )
1563     {
1564         if ( pg != top )
1565             d->arch.paging.free_page(d, pg);
1566     }
1567     page_list_add(top, &p2m->pages);
1568 }
1569 
1570 /* Reset this p2m table to be empty */
1571 static void
p2m_flush_table(struct p2m_domain * p2m)1572 p2m_flush_table(struct p2m_domain *p2m)
1573 {
1574     p2m_lock(p2m);
1575     p2m_flush_table_locked(p2m);
1576     p2m_unlock(p2m);
1577 }
1578 
1579 void
p2m_flush(struct vcpu * v,struct p2m_domain * p2m)1580 p2m_flush(struct vcpu *v, struct p2m_domain *p2m)
1581 {
1582     ASSERT(v->domain == p2m->domain);
1583     vcpu_nestedhvm(v).nv_p2m = NULL;
1584     p2m_flush_table(p2m);
1585     hvm_asid_flush_vcpu(v);
1586 }
1587 
1588 void
p2m_flush_nestedp2m(struct domain * d)1589 p2m_flush_nestedp2m(struct domain *d)
1590 {
1591     int i;
1592     for ( i = 0; i < MAX_NESTEDP2M; i++ )
1593         p2m_flush_table(d->arch.nested_p2m[i]);
1594 }
1595 
np2m_flush_base(struct vcpu * v,unsigned long np2m_base)1596 void np2m_flush_base(struct vcpu *v, unsigned long np2m_base)
1597 {
1598     struct domain *d = v->domain;
1599     struct p2m_domain *p2m;
1600     unsigned int i;
1601 
1602     np2m_base &= ~(0xfffull);
1603 
1604     nestedp2m_lock(d);
1605     for ( i = 0; i < MAX_NESTEDP2M; i++ )
1606     {
1607         p2m = d->arch.nested_p2m[i];
1608         p2m_lock(p2m);
1609         if ( p2m->np2m_base == np2m_base )
1610         {
1611             p2m_flush_table_locked(p2m);
1612             p2m_unlock(p2m);
1613             break;
1614         }
1615         p2m_unlock(p2m);
1616     }
1617     nestedp2m_unlock(d);
1618 }
1619 
assign_np2m(struct vcpu * v,struct p2m_domain * p2m)1620 static void assign_np2m(struct vcpu *v, struct p2m_domain *p2m)
1621 {
1622     struct nestedvcpu *nv = &vcpu_nestedhvm(v);
1623     struct domain *d = v->domain;
1624 
1625     /* Bring this np2m to the top of the LRU list */
1626     p2m_getlru_nestedp2m(d, p2m);
1627 
1628     nv->nv_flushp2m = 0;
1629     nv->nv_p2m = p2m;
1630     nv->np2m_generation = p2m->np2m_generation;
1631     cpumask_set_cpu(v->processor, p2m->dirty_cpumask);
1632 }
1633 
nvcpu_flush(struct vcpu * v)1634 static void nvcpu_flush(struct vcpu *v)
1635 {
1636     hvm_asid_flush_vcpu(v);
1637     vcpu_nestedhvm(v).stale_np2m = true;
1638 }
1639 
1640 struct p2m_domain *
p2m_get_nestedp2m_locked(struct vcpu * v)1641 p2m_get_nestedp2m_locked(struct vcpu *v)
1642 {
1643     struct nestedvcpu *nv = &vcpu_nestedhvm(v);
1644     struct domain *d = v->domain;
1645     struct p2m_domain *p2m;
1646     uint64_t np2m_base = nhvm_vcpu_p2m_base(v);
1647     unsigned int i;
1648     bool needs_flush = true;
1649 
1650     /* Mask out low bits; this avoids collisions with P2M_BASE_EADDR */
1651     np2m_base &= ~(0xfffull);
1652 
1653     if (nv->nv_flushp2m && nv->nv_p2m) {
1654         nv->nv_p2m = NULL;
1655     }
1656 
1657     nestedp2m_lock(d);
1658     p2m = nv->nv_p2m;
1659     if ( p2m )
1660     {
1661         p2m_lock(p2m);
1662         if ( p2m->np2m_base == np2m_base )
1663         {
1664             /* Check if np2m was flushed just before the lock */
1665             if ( nv->np2m_generation == p2m->np2m_generation )
1666                 needs_flush = false;
1667             /* np2m is up-to-date */
1668             goto found;
1669         }
1670         else if ( p2m->np2m_base != P2M_BASE_EADDR )
1671         {
1672             /* vCPU is switching from some other valid np2m */
1673             cpumask_clear_cpu(v->processor, p2m->dirty_cpumask);
1674         }
1675         p2m_unlock(p2m);
1676     }
1677 
1678     /* Share a np2m if possible */
1679     for ( i = 0; i < MAX_NESTEDP2M; i++ )
1680     {
1681         p2m = d->arch.nested_p2m[i];
1682         p2m_lock(p2m);
1683 
1684         if ( p2m->np2m_base == np2m_base )
1685             goto found;
1686 
1687         p2m_unlock(p2m);
1688     }
1689 
1690     /* All p2m's are or were in use. Take the least recent used one,
1691      * flush it and reuse. */
1692     p2m = p2m_getlru_nestedp2m(d, NULL);
1693     p2m_flush_table(p2m);
1694     p2m_lock(p2m);
1695 
1696  found:
1697     if ( needs_flush )
1698         nvcpu_flush(v);
1699     p2m->np2m_base = np2m_base;
1700     assign_np2m(v, p2m);
1701     nestedp2m_unlock(d);
1702 
1703     return p2m;
1704 }
1705 
p2m_get_nestedp2m(struct vcpu * v)1706 struct p2m_domain *p2m_get_nestedp2m(struct vcpu *v)
1707 {
1708     struct p2m_domain *p2m = p2m_get_nestedp2m_locked(v);
1709     p2m_unlock(p2m);
1710 
1711     return p2m;
1712 }
1713 
1714 struct p2m_domain *
p2m_get_p2m(struct vcpu * v)1715 p2m_get_p2m(struct vcpu *v)
1716 {
1717     if (!nestedhvm_is_n2(v))
1718         return p2m_get_hostp2m(v->domain);
1719 
1720     return p2m_get_nestedp2m(v);
1721 }
1722 
np2m_schedule(int dir)1723 void np2m_schedule(int dir)
1724 {
1725     struct vcpu *curr = current;
1726     struct nestedvcpu *nv = &vcpu_nestedhvm(curr);
1727     struct p2m_domain *p2m;
1728 
1729     ASSERT(dir == NP2M_SCHEDLE_IN || dir == NP2M_SCHEDLE_OUT);
1730 
1731     if ( !nestedhvm_enabled(curr->domain) ||
1732          !nestedhvm_vcpu_in_guestmode(curr) ||
1733          !nestedhvm_paging_mode_hap(curr) )
1734         return;
1735 
1736     p2m = nv->nv_p2m;
1737     if ( p2m )
1738     {
1739         bool np2m_valid;
1740 
1741         p2m_lock(p2m);
1742         np2m_valid = p2m->np2m_base == nhvm_vcpu_p2m_base(curr) &&
1743                      nv->np2m_generation == p2m->np2m_generation;
1744         if ( dir == NP2M_SCHEDLE_OUT && np2m_valid )
1745         {
1746             /*
1747              * The np2m is up to date but this vCPU will no longer use it,
1748              * which means there are no reasons to send a flush IPI.
1749              */
1750             cpumask_clear_cpu(curr->processor, p2m->dirty_cpumask);
1751         }
1752         else if ( dir == NP2M_SCHEDLE_IN )
1753         {
1754             if ( !np2m_valid )
1755             {
1756                 /* This vCPU's np2m was flushed while it was not runnable */
1757                 hvm_asid_flush_core();
1758                 vcpu_nestedhvm(curr).nv_p2m = NULL;
1759             }
1760             else
1761                 cpumask_set_cpu(curr->processor, p2m->dirty_cpumask);
1762         }
1763         p2m_unlock(p2m);
1764     }
1765 }
1766 #endif
1767 
paging_gva_to_gfn(struct vcpu * v,unsigned long va,uint32_t * pfec)1768 unsigned long paging_gva_to_gfn(struct vcpu *v,
1769                                 unsigned long va,
1770                                 uint32_t *pfec)
1771 {
1772     struct p2m_domain *hostp2m = p2m_get_hostp2m(v->domain);
1773     const struct paging_mode *hostmode = paging_get_hostmode(v);
1774 
1775     if ( is_hvm_vcpu(v) && paging_mode_hap(v->domain) && nestedhvm_is_n2(v) )
1776     {
1777         unsigned long l2_gfn, l1_gfn;
1778         struct p2m_domain *p2m;
1779         const struct paging_mode *mode;
1780         uint8_t l1_p2ma;
1781         unsigned int l1_page_order;
1782         int rv;
1783 
1784         /* translate l2 guest va into l2 guest gfn */
1785         p2m = p2m_get_nestedp2m(v);
1786         mode = paging_get_nestedmode(v);
1787         l2_gfn = mode->gva_to_gfn(v, p2m, va, pfec);
1788 
1789         if ( l2_gfn == gfn_x(INVALID_GFN) )
1790             return gfn_x(INVALID_GFN);
1791 
1792         /* translate l2 guest gfn into l1 guest gfn */
1793         rv = nestedhap_walk_L1_p2m(v, l2_gfn, &l1_gfn, &l1_page_order, &l1_p2ma,
1794                                    1,
1795                                    !!(*pfec & PFEC_write_access),
1796                                    !!(*pfec & PFEC_insn_fetch));
1797 
1798         if ( rv != NESTEDHVM_PAGEFAULT_DONE )
1799             return gfn_x(INVALID_GFN);
1800 
1801         /*
1802          * Sanity check that l1_gfn can be used properly as a 4K mapping, even
1803          * if it mapped by a nested superpage.
1804          */
1805         ASSERT((l2_gfn & ((1ul << l1_page_order) - 1)) ==
1806                (l1_gfn & ((1ul << l1_page_order) - 1)));
1807 
1808         return l1_gfn;
1809     }
1810 
1811     return hostmode->gva_to_gfn(v, hostp2m, va, pfec);
1812 }
1813 
1814 /*
1815  * If the map is non-NULL, we leave this function having acquired an extra ref
1816  * on mfn_to_page(*mfn).  In all cases, *pfec contains appropriate
1817  * synthetic/structure PFEC_* bits.
1818  */
map_domain_gfn(struct p2m_domain * p2m,gfn_t gfn,mfn_t * mfn,p2m_query_t q,uint32_t * pfec)1819 void *map_domain_gfn(struct p2m_domain *p2m, gfn_t gfn, mfn_t *mfn,
1820                      p2m_query_t q, uint32_t *pfec)
1821 {
1822     p2m_type_t p2mt;
1823     struct page_info *page;
1824 
1825     if ( !gfn_valid(p2m->domain, gfn) )
1826     {
1827         *pfec = PFEC_reserved_bit | PFEC_page_present;
1828         return NULL;
1829     }
1830 
1831     /* Translate the gfn, unsharing if shared. */
1832     page = p2m_get_page_from_gfn(p2m, gfn, &p2mt, NULL, q);
1833     if ( p2m_is_paging(p2mt) )
1834     {
1835         ASSERT(p2m_is_hostp2m(p2m));
1836         if ( page )
1837             put_page(page);
1838         p2m_mem_paging_populate(p2m->domain, gfn);
1839         *pfec = PFEC_page_paged;
1840         return NULL;
1841     }
1842     if ( p2m_is_shared(p2mt) )
1843     {
1844         if ( page )
1845             put_page(page);
1846         *pfec = PFEC_page_shared;
1847         return NULL;
1848     }
1849     if ( !page )
1850     {
1851         *pfec = 0;
1852         return NULL;
1853     }
1854 
1855     *pfec = PFEC_page_present;
1856     *mfn = page_to_mfn(page);
1857     ASSERT(mfn_valid(*mfn));
1858 
1859     return map_domain_page(*mfn);
1860 }
1861 
mmio_order(const struct domain * d,unsigned long start_fn,unsigned long nr)1862 static unsigned int mmio_order(const struct domain *d,
1863                                unsigned long start_fn, unsigned long nr)
1864 {
1865     /*
1866      * Note that the !hap_enabled() here has two effects:
1867      * - exclude shadow mode (which doesn't support large MMIO mappings),
1868      * - exclude PV guests, should execution reach this code for such.
1869      * So be careful when altering this.
1870      */
1871     if ( !hap_enabled(d) ||
1872          (start_fn & ((1UL << PAGE_ORDER_2M) - 1)) || !(nr >> PAGE_ORDER_2M) )
1873         return PAGE_ORDER_4K;
1874 
1875     if ( 0 /*
1876             * Don't use 1Gb pages, to limit the iteration count in
1877             * set_typed_p2m_entry() when it needs to zap M2P entries
1878             * for a RAM range.
1879             */ &&
1880          !(start_fn & ((1UL << PAGE_ORDER_1G) - 1)) && (nr >> PAGE_ORDER_1G) &&
1881          hap_has_1gb )
1882         return PAGE_ORDER_1G;
1883 
1884     if ( hap_has_2mb )
1885         return PAGE_ORDER_2M;
1886 
1887     return PAGE_ORDER_4K;
1888 }
1889 
1890 #define MAP_MMIO_MAX_ITER 64 /* pretty arbitrary */
1891 
map_mmio_regions(struct domain * d,gfn_t start_gfn,unsigned long nr,mfn_t mfn)1892 int map_mmio_regions(struct domain *d,
1893                      gfn_t start_gfn,
1894                      unsigned long nr,
1895                      mfn_t mfn)
1896 {
1897     int ret = 0;
1898     unsigned long i;
1899     unsigned int iter, order;
1900 
1901     if ( !paging_mode_translate(d) )
1902         return 0;
1903 
1904     for ( iter = i = 0; i < nr && iter < MAP_MMIO_MAX_ITER;
1905           i += 1UL << order, ++iter )
1906     {
1907         /* OR'ing gfn and mfn values will return an order suitable to both. */
1908         for ( order = mmio_order(d, (gfn_x(start_gfn) + i) | (mfn_x(mfn) + i), nr - i); ;
1909               order = ret - 1 )
1910         {
1911             ret = set_mmio_p2m_entry(d, gfn_add(start_gfn, i),
1912                                      mfn_add(mfn, i), order);
1913             if ( ret <= 0 )
1914                 break;
1915             ASSERT(ret <= order);
1916         }
1917         if ( ret < 0 )
1918             break;
1919     }
1920 
1921     return i == nr ? 0 : i ?: ret;
1922 }
1923 
unmap_mmio_regions(struct domain * d,gfn_t start_gfn,unsigned long nr,mfn_t mfn)1924 int unmap_mmio_regions(struct domain *d,
1925                        gfn_t start_gfn,
1926                        unsigned long nr,
1927                        mfn_t mfn)
1928 {
1929     int ret = 0;
1930     unsigned long i;
1931     unsigned int iter, order;
1932 
1933     if ( !paging_mode_translate(d) )
1934         return 0;
1935 
1936     for ( iter = i = 0; i < nr && iter < MAP_MMIO_MAX_ITER;
1937           i += 1UL << order, ++iter )
1938     {
1939         /* OR'ing gfn and mfn values will return an order suitable to both. */
1940         for ( order = mmio_order(d, (gfn_x(start_gfn) + i) | (mfn_x(mfn) + i), nr - i); ;
1941               order = ret - 1 )
1942         {
1943             ret = clear_mmio_p2m_entry(d, gfn_x(start_gfn) + i,
1944                                        mfn_add(mfn, i), order);
1945             if ( ret <= 0 )
1946                 break;
1947             ASSERT(ret <= order);
1948         }
1949         if ( ret < 0 )
1950             break;
1951     }
1952 
1953     return i == nr ? 0 : i ?: ret;
1954 }
1955 
1956 #ifdef CONFIG_HVM
1957 
altp2m_get_effective_entry(struct p2m_domain * ap2m,gfn_t gfn,mfn_t * mfn,p2m_type_t * t,p2m_access_t * a,bool prepopulate)1958 int altp2m_get_effective_entry(struct p2m_domain *ap2m, gfn_t gfn, mfn_t *mfn,
1959                                p2m_type_t *t, p2m_access_t *a,
1960                                bool prepopulate)
1961 {
1962     *mfn = ap2m->get_entry(ap2m, gfn, t, a, 0, NULL, NULL);
1963 
1964     /* Check host p2m if no valid entry in alternate */
1965     if ( !mfn_valid(*mfn) && !p2m_is_hostp2m(ap2m) )
1966     {
1967         struct p2m_domain *hp2m = p2m_get_hostp2m(ap2m->domain);
1968         unsigned int page_order;
1969         int rc;
1970 
1971         *mfn = __get_gfn_type_access(hp2m, gfn_x(gfn), t, a,
1972                                      P2M_ALLOC | P2M_UNSHARE, &page_order, 0);
1973 
1974         rc = -ESRCH;
1975         if ( !mfn_valid(*mfn) || *t != p2m_ram_rw )
1976             return rc;
1977 
1978         /* If this is a superpage, copy that first */
1979         if ( prepopulate && page_order != PAGE_ORDER_4K )
1980         {
1981             unsigned long mask = ~((1UL << page_order) - 1);
1982             gfn_t gfn_aligned = _gfn(gfn_x(gfn) & mask);
1983             mfn_t mfn_aligned = _mfn(mfn_x(*mfn) & mask);
1984 
1985             rc = ap2m->set_entry(ap2m, gfn_aligned, mfn_aligned, page_order, *t, *a, 1);
1986             if ( rc )
1987                 return rc;
1988         }
1989     }
1990 
1991     return 0;
1992 }
1993 
p2m_altp2m_check(struct vcpu * v,uint16_t idx)1994 void p2m_altp2m_check(struct vcpu *v, uint16_t idx)
1995 {
1996     if ( altp2m_active(v->domain) )
1997         p2m_switch_vcpu_altp2m_by_id(v, idx);
1998 }
1999 
p2m_switch_vcpu_altp2m_by_id(struct vcpu * v,unsigned int idx)2000 bool_t p2m_switch_vcpu_altp2m_by_id(struct vcpu *v, unsigned int idx)
2001 {
2002     struct domain *d = v->domain;
2003     bool_t rc = 0;
2004 
2005     if ( idx >= MAX_ALTP2M )
2006         return rc;
2007 
2008     altp2m_list_lock(d);
2009 
2010     if ( d->arch.altp2m_eptp[idx] != mfn_x(INVALID_MFN) )
2011     {
2012         if ( idx != vcpu_altp2m(v).p2midx )
2013         {
2014             atomic_dec(&p2m_get_altp2m(v)->active_vcpus);
2015             vcpu_altp2m(v).p2midx = idx;
2016             atomic_inc(&p2m_get_altp2m(v)->active_vcpus);
2017             altp2m_vcpu_update_p2m(v);
2018         }
2019         rc = 1;
2020     }
2021 
2022     altp2m_list_unlock(d);
2023     return rc;
2024 }
2025 
2026 /*
2027  * Read info about the gfn in an altp2m, locking the gfn.
2028  *
2029  * If the entry is valid, pass the results back to the caller.
2030  *
2031  * If the entry was invalid, and the host's entry is also invalid,
2032  * return to the caller without any changes.
2033  *
2034  * If the entry is invalid, and the host entry was valid, propagate
2035  * the host's entry to the altp2m (retaining page order), and indicate
2036  * that the caller should re-try the faulting instruction.
2037  */
p2m_altp2m_get_or_propagate(struct p2m_domain * ap2m,unsigned long gfn_l,mfn_t * mfn,p2m_type_t * p2mt,p2m_access_t * p2ma,unsigned int page_order)2038 bool p2m_altp2m_get_or_propagate(struct p2m_domain *ap2m, unsigned long gfn_l,
2039                                  mfn_t *mfn, p2m_type_t *p2mt,
2040                                  p2m_access_t *p2ma, unsigned int page_order)
2041 {
2042     p2m_type_t ap2mt;
2043     p2m_access_t ap2ma;
2044     unsigned long mask;
2045     gfn_t gfn;
2046     mfn_t amfn;
2047     int rc;
2048 
2049     /*
2050      * NB we must get the full lock on the altp2m here, in addition to
2051      * the lock on the individual gfn, since we may change a range of
2052      * gfns below.
2053      */
2054     p2m_lock(ap2m);
2055 
2056     amfn = get_gfn_type_access(ap2m, gfn_l, &ap2mt, &ap2ma, 0, NULL);
2057 
2058     if ( !mfn_eq(amfn, INVALID_MFN) )
2059     {
2060         p2m_unlock(ap2m);
2061         *mfn  = amfn;
2062         *p2mt = ap2mt;
2063         *p2ma = ap2ma;
2064         return false;
2065     }
2066 
2067     /* Host entry is also invalid; don't bother setting the altp2m entry. */
2068     if ( mfn_eq(*mfn, INVALID_MFN) )
2069     {
2070         p2m_unlock(ap2m);
2071         return false;
2072     }
2073 
2074     /*
2075      * If this is a superpage mapping, round down both frame numbers
2076      * to the start of the superpage.  NB that we repupose `amfn`
2077      * here.
2078      */
2079     mask = ~((1UL << page_order) - 1);
2080     amfn = _mfn(mfn_x(*mfn) & mask);
2081     gfn = _gfn(gfn_l & mask);
2082 
2083     rc = p2m_set_entry(ap2m, gfn, amfn, page_order, *p2mt, *p2ma);
2084     p2m_unlock(ap2m);
2085 
2086     if ( rc )
2087     {
2088         gprintk(XENLOG_ERR,
2089                 "failed to set entry for %"PRI_gfn" -> %"PRI_mfn" altp2m %u, rc %d\n",
2090                 gfn_l, mfn_x(amfn), vcpu_altp2m(current).p2midx, rc);
2091         domain_crash(ap2m->domain);
2092     }
2093 
2094     return true;
2095 }
2096 
2097 enum altp2m_reset_type {
2098     ALTP2M_RESET,
2099     ALTP2M_DEACTIVATE
2100 };
2101 
p2m_reset_altp2m(struct domain * d,unsigned int idx,enum altp2m_reset_type reset_type)2102 static void p2m_reset_altp2m(struct domain *d, unsigned int idx,
2103                              enum altp2m_reset_type reset_type)
2104 {
2105     struct p2m_domain *p2m;
2106 
2107     ASSERT(idx < MAX_ALTP2M);
2108     p2m = array_access_nospec(d->arch.altp2m_p2m, idx);
2109 
2110     p2m_lock(p2m);
2111 
2112     p2m_flush_table_locked(p2m);
2113 
2114     if ( reset_type == ALTP2M_DEACTIVATE )
2115         p2m_free_logdirty(p2m);
2116 
2117     /* Uninit and reinit ept to force TLB shootdown */
2118     ept_p2m_uninit(p2m);
2119     ept_p2m_init(p2m);
2120 
2121     p2m->min_remapped_gfn = gfn_x(INVALID_GFN);
2122     p2m->max_remapped_gfn = 0;
2123 
2124     p2m_unlock(p2m);
2125 }
2126 
p2m_flush_altp2m(struct domain * d)2127 void p2m_flush_altp2m(struct domain *d)
2128 {
2129     unsigned int i;
2130 
2131     altp2m_list_lock(d);
2132 
2133     for ( i = 0; i < MAX_ALTP2M; i++ )
2134     {
2135         p2m_reset_altp2m(d, i, ALTP2M_DEACTIVATE);
2136         d->arch.altp2m_eptp[i] = mfn_x(INVALID_MFN);
2137         d->arch.altp2m_visible_eptp[i] = mfn_x(INVALID_MFN);
2138     }
2139 
2140     altp2m_list_unlock(d);
2141 }
2142 
p2m_activate_altp2m(struct domain * d,unsigned int idx,p2m_access_t hvmmem_default_access)2143 static int p2m_activate_altp2m(struct domain *d, unsigned int idx,
2144                                p2m_access_t hvmmem_default_access)
2145 {
2146     struct p2m_domain *hostp2m, *p2m;
2147     int rc;
2148 
2149     ASSERT(idx < MAX_ALTP2M);
2150 
2151     p2m = array_access_nospec(d->arch.altp2m_p2m, idx);
2152     hostp2m = p2m_get_hostp2m(d);
2153 
2154     p2m_lock(p2m);
2155 
2156     rc = p2m_init_logdirty(p2m);
2157 
2158     if ( rc )
2159         goto out;
2160 
2161     /* The following is really just a rangeset copy. */
2162     rc = rangeset_merge(p2m->logdirty_ranges, hostp2m->logdirty_ranges);
2163 
2164     if ( rc )
2165     {
2166         p2m_free_logdirty(p2m);
2167         goto out;
2168     }
2169 
2170     p2m->default_access = hvmmem_default_access;
2171     p2m->domain = hostp2m->domain;
2172     p2m->global_logdirty = hostp2m->global_logdirty;
2173     p2m->min_remapped_gfn = gfn_x(INVALID_GFN);
2174     p2m->max_mapped_pfn = p2m->max_remapped_gfn = 0;
2175 
2176     p2m_init_altp2m_ept(d, idx);
2177 
2178  out:
2179     p2m_unlock(p2m);
2180 
2181     return rc;
2182 }
2183 
p2m_init_altp2m_by_id(struct domain * d,unsigned int idx)2184 int p2m_init_altp2m_by_id(struct domain *d, unsigned int idx)
2185 {
2186     int rc = -EINVAL;
2187     struct p2m_domain *hostp2m = p2m_get_hostp2m(d);
2188 
2189     if ( idx >= min(ARRAY_SIZE(d->arch.altp2m_p2m), MAX_EPTP) )
2190         return rc;
2191 
2192     altp2m_list_lock(d);
2193 
2194     if ( d->arch.altp2m_eptp[array_index_nospec(idx, MAX_EPTP)] ==
2195          mfn_x(INVALID_MFN) )
2196         rc = p2m_activate_altp2m(d, idx, hostp2m->default_access);
2197 
2198     altp2m_list_unlock(d);
2199     return rc;
2200 }
2201 
p2m_init_next_altp2m(struct domain * d,uint16_t * idx,xenmem_access_t hvmmem_default_access)2202 int p2m_init_next_altp2m(struct domain *d, uint16_t *idx,
2203                          xenmem_access_t hvmmem_default_access)
2204 {
2205     int rc = -EINVAL;
2206     unsigned int i;
2207     p2m_access_t a;
2208     struct p2m_domain *hostp2m = p2m_get_hostp2m(d);
2209 
2210     if ( hvmmem_default_access > XENMEM_access_default ||
2211          !xenmem_access_to_p2m_access(hostp2m, hvmmem_default_access, &a) )
2212         return rc;
2213 
2214     altp2m_list_lock(d);
2215 
2216     for ( i = 0; i < MAX_ALTP2M; i++ )
2217     {
2218         if ( d->arch.altp2m_eptp[i] != mfn_x(INVALID_MFN) )
2219             continue;
2220 
2221         rc = p2m_activate_altp2m(d, i, a);
2222 
2223         if ( !rc )
2224             *idx = i;
2225 
2226         break;
2227     }
2228 
2229     altp2m_list_unlock(d);
2230     return rc;
2231 }
2232 
p2m_destroy_altp2m_by_id(struct domain * d,unsigned int idx)2233 int p2m_destroy_altp2m_by_id(struct domain *d, unsigned int idx)
2234 {
2235     struct p2m_domain *p2m;
2236     int rc = -EBUSY;
2237 
2238     if ( !idx || idx >= min(ARRAY_SIZE(d->arch.altp2m_p2m), MAX_EPTP) )
2239         return rc;
2240 
2241     rc = domain_pause_except_self(d);
2242     if ( rc )
2243         return rc;
2244 
2245     rc = -EBUSY;
2246     altp2m_list_lock(d);
2247 
2248     if ( d->arch.altp2m_eptp[array_index_nospec(idx, MAX_EPTP)] !=
2249          mfn_x(INVALID_MFN) )
2250     {
2251         p2m = array_access_nospec(d->arch.altp2m_p2m, idx);
2252 
2253         if ( !_atomic_read(p2m->active_vcpus) )
2254         {
2255             p2m_reset_altp2m(d, idx, ALTP2M_DEACTIVATE);
2256             d->arch.altp2m_eptp[array_index_nospec(idx, MAX_EPTP)] =
2257                 mfn_x(INVALID_MFN);
2258             d->arch.altp2m_visible_eptp[array_index_nospec(idx, MAX_EPTP)] =
2259                 mfn_x(INVALID_MFN);
2260             rc = 0;
2261         }
2262     }
2263 
2264     altp2m_list_unlock(d);
2265 
2266     domain_unpause_except_self(d);
2267 
2268     return rc;
2269 }
2270 
p2m_switch_domain_altp2m_by_id(struct domain * d,unsigned int idx)2271 int p2m_switch_domain_altp2m_by_id(struct domain *d, unsigned int idx)
2272 {
2273     struct vcpu *v;
2274     int rc = -EINVAL;
2275 
2276     if ( idx >= MAX_ALTP2M )
2277         return rc;
2278 
2279     rc = domain_pause_except_self(d);
2280     if ( rc )
2281         return rc;
2282 
2283     rc = -EINVAL;
2284     altp2m_list_lock(d);
2285 
2286     if ( d->arch.altp2m_visible_eptp[idx] != mfn_x(INVALID_MFN) )
2287     {
2288         for_each_vcpu( d, v )
2289             if ( idx != vcpu_altp2m(v).p2midx )
2290             {
2291                 atomic_dec(&p2m_get_altp2m(v)->active_vcpus);
2292                 vcpu_altp2m(v).p2midx = idx;
2293                 atomic_inc(&p2m_get_altp2m(v)->active_vcpus);
2294                 altp2m_vcpu_update_p2m(v);
2295             }
2296 
2297         rc = 0;
2298     }
2299 
2300     altp2m_list_unlock(d);
2301 
2302     domain_unpause_except_self(d);
2303 
2304     return rc;
2305 }
2306 
p2m_change_altp2m_gfn(struct domain * d,unsigned int idx,gfn_t old_gfn,gfn_t new_gfn)2307 int p2m_change_altp2m_gfn(struct domain *d, unsigned int idx,
2308                           gfn_t old_gfn, gfn_t new_gfn)
2309 {
2310     struct p2m_domain *hp2m, *ap2m;
2311     p2m_access_t a;
2312     p2m_type_t t;
2313     mfn_t mfn;
2314     int rc = -EINVAL;
2315 
2316     if ( idx >=  min(ARRAY_SIZE(d->arch.altp2m_p2m), MAX_EPTP) ||
2317          d->arch.altp2m_eptp[array_index_nospec(idx, MAX_EPTP)] ==
2318          mfn_x(INVALID_MFN) )
2319         return rc;
2320 
2321     hp2m = p2m_get_hostp2m(d);
2322     ap2m = array_access_nospec(d->arch.altp2m_p2m, idx);
2323 
2324     p2m_lock(hp2m);
2325     p2m_lock(ap2m);
2326 
2327     if ( gfn_eq(new_gfn, INVALID_GFN) )
2328     {
2329         mfn = ap2m->get_entry(ap2m, old_gfn, &t, &a, 0, NULL, NULL);
2330         rc = mfn_valid(mfn)
2331              ? p2m_remove_page(ap2m, old_gfn, mfn, PAGE_ORDER_4K)
2332              : 0;
2333         goto out;
2334     }
2335 
2336     rc = altp2m_get_effective_entry(ap2m, old_gfn, &mfn, &t, &a,
2337                                     AP2MGET_prepopulate);
2338     if ( rc )
2339         goto out;
2340 
2341     rc = altp2m_get_effective_entry(ap2m, new_gfn, &mfn, &t, &a,
2342                                     AP2MGET_query);
2343     if ( rc )
2344         goto out;
2345 
2346     if ( !ap2m->set_entry(ap2m, old_gfn, mfn, PAGE_ORDER_4K, t, a,
2347                           (current->domain != d)) )
2348     {
2349         rc = 0;
2350 
2351         if ( gfn_x(new_gfn) < ap2m->min_remapped_gfn )
2352             ap2m->min_remapped_gfn = gfn_x(new_gfn);
2353         if ( gfn_x(new_gfn) > ap2m->max_remapped_gfn )
2354             ap2m->max_remapped_gfn = gfn_x(new_gfn);
2355     }
2356 
2357  out:
2358     p2m_unlock(ap2m);
2359     p2m_unlock(hp2m);
2360     return rc;
2361 }
2362 
p2m_altp2m_propagate_change(struct domain * d,gfn_t gfn,mfn_t mfn,unsigned int page_order,p2m_type_t p2mt,p2m_access_t p2ma)2363 int p2m_altp2m_propagate_change(struct domain *d, gfn_t gfn,
2364                                 mfn_t mfn, unsigned int page_order,
2365                                 p2m_type_t p2mt, p2m_access_t p2ma)
2366 {
2367     struct p2m_domain *p2m;
2368     p2m_access_t a;
2369     p2m_type_t t;
2370     mfn_t m;
2371     unsigned int i;
2372     unsigned int reset_count = 0;
2373     unsigned int last_reset_idx = ~0;
2374     int ret = 0;
2375 
2376     if ( !altp2m_active(d) )
2377         return 0;
2378 
2379     altp2m_list_lock(d);
2380 
2381     for ( i = 0; i < MAX_ALTP2M; i++ )
2382     {
2383         if ( d->arch.altp2m_eptp[i] == mfn_x(INVALID_MFN) )
2384             continue;
2385 
2386         p2m = d->arch.altp2m_p2m[i];
2387         m = get_gfn_type_access(p2m, gfn_x(gfn), &t, &a, 0, NULL);
2388 
2389         /* Check for a dropped page that may impact this altp2m */
2390         if ( mfn_eq(mfn, INVALID_MFN) &&
2391              gfn_x(gfn) >= p2m->min_remapped_gfn &&
2392              gfn_x(gfn) <= p2m->max_remapped_gfn )
2393         {
2394             if ( !reset_count++ )
2395             {
2396                 p2m_reset_altp2m(d, i, ALTP2M_RESET);
2397                 last_reset_idx = i;
2398             }
2399             else
2400             {
2401                 /* At least 2 altp2m's impacted, so reset everything */
2402                 __put_gfn(p2m, gfn_x(gfn));
2403 
2404                 for ( i = 0; i < MAX_ALTP2M; i++ )
2405                 {
2406                     if ( i == last_reset_idx ||
2407                          d->arch.altp2m_eptp[i] == mfn_x(INVALID_MFN) )
2408                         continue;
2409 
2410                     p2m_reset_altp2m(d, i, ALTP2M_RESET);
2411                 }
2412 
2413                 ret = 0;
2414                 break;
2415             }
2416         }
2417         else if ( !mfn_eq(m, INVALID_MFN) )
2418         {
2419             int rc = p2m_set_entry(p2m, gfn, mfn, page_order, p2mt, p2ma);
2420 
2421             /* Best effort: Don't bail on error. */
2422             if ( !ret )
2423                 ret = rc;
2424         }
2425 
2426         __put_gfn(p2m, gfn_x(gfn));
2427     }
2428 
2429     altp2m_list_unlock(d);
2430 
2431     return ret;
2432 }
2433 #endif /* CONFIG_HVM */
2434 
2435 /*** Audit ***/
2436 
2437 #if P2M_AUDIT && defined(CONFIG_HVM)
audit_p2m(struct domain * d,uint64_t * orphans,uint64_t * m2p_bad,uint64_t * p2m_bad)2438 void audit_p2m(struct domain *d,
2439                uint64_t *orphans,
2440                 uint64_t *m2p_bad,
2441                 uint64_t *p2m_bad)
2442 {
2443     struct page_info *page;
2444     struct domain *od;
2445     unsigned long mfn, gfn;
2446     mfn_t p2mfn;
2447     unsigned long orphans_count = 0, mpbad = 0, pmbad = 0;
2448     p2m_access_t p2ma;
2449     p2m_type_t type;
2450     struct p2m_domain *p2m = p2m_get_hostp2m(d);
2451 
2452     if ( !paging_mode_translate(d) )
2453         goto out_p2m_audit;
2454 
2455     P2M_PRINTK("p2m audit starts\n");
2456 
2457     p2m_lock(p2m);
2458     pod_lock(p2m);
2459 
2460     if (p2m->audit_p2m)
2461         pmbad = p2m->audit_p2m(p2m);
2462 
2463     /* Audit part two: walk the domain's page allocation list, checking
2464      * the m2p entries. */
2465     spin_lock(&d->page_alloc_lock);
2466     page_list_for_each ( page, &d->page_list )
2467     {
2468         mfn = mfn_x(page_to_mfn(page));
2469 
2470         P2M_PRINTK("auditing guest page, mfn=%#lx\n", mfn);
2471 
2472         od = page_get_owner(page);
2473 
2474         if ( od != d )
2475         {
2476             P2M_PRINTK("wrong owner %#lx -> %p(%u) != %p(%u)\n",
2477                        mfn, od, (od?od->domain_id:-1), d, d->domain_id);
2478             continue;
2479         }
2480 
2481         gfn = get_gpfn_from_mfn(mfn);
2482         if ( gfn == INVALID_M2P_ENTRY )
2483         {
2484             orphans_count++;
2485             P2M_PRINTK("orphaned guest page: mfn=%#lx has invalid gfn\n",
2486                            mfn);
2487             continue;
2488         }
2489 
2490         if ( SHARED_M2P(gfn) )
2491         {
2492             P2M_PRINTK("shared mfn (%lx) on domain page list!\n",
2493                     mfn);
2494             continue;
2495         }
2496 
2497         p2mfn = get_gfn_type_access(p2m, gfn, &type, &p2ma, 0, NULL);
2498         if ( mfn_x(p2mfn) != mfn )
2499         {
2500             mpbad++;
2501             P2M_PRINTK("map mismatch mfn %#lx -> gfn %#lx -> mfn %#lx"
2502                        " (-> gfn %#lx)\n",
2503                        mfn, gfn, mfn_x(p2mfn),
2504                        (mfn_valid(p2mfn)
2505                         ? get_gpfn_from_mfn(mfn_x(p2mfn))
2506                         : -1u));
2507             /* This m2p entry is stale: the domain has another frame in
2508              * this physical slot.  No great disaster, but for neatness,
2509              * blow away the m2p entry. */
2510             set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
2511         }
2512         __put_gfn(p2m, gfn);
2513 
2514         P2M_PRINTK("OK: mfn=%#lx, gfn=%#lx, p2mfn=%#lx\n",
2515                        mfn, gfn, mfn_x(p2mfn));
2516     }
2517     spin_unlock(&d->page_alloc_lock);
2518 
2519     pod_unlock(p2m);
2520     p2m_unlock(p2m);
2521 
2522     P2M_PRINTK("p2m audit complete\n");
2523     if ( orphans_count | mpbad | pmbad )
2524         P2M_PRINTK("p2m audit found %lu orphans\n", orphans_count);
2525     if ( mpbad | pmbad )
2526     {
2527         P2M_PRINTK("p2m audit found %lu odd p2m, %lu bad m2p entries\n",
2528                    pmbad, mpbad);
2529         WARN();
2530     }
2531 
2532 out_p2m_audit:
2533     *orphans = (uint64_t) orphans_count;
2534     *m2p_bad = (uint64_t) mpbad;
2535     *p2m_bad = (uint64_t) pmbad;
2536 }
2537 #endif /* P2M_AUDIT */
2538 
2539 /*
2540  * Add frame from foreign domain to target domain's physmap. Similar to
2541  * XENMAPSPACE_gmfn but the frame is foreign being mapped into current,
2542  * and is not removed from foreign domain.
2543  *
2544  * Usage: - libxl on pvh dom0 creating a guest and doing privcmd_ioctl_mmap.
2545  *        - xentrace running on dom0 mapping xenheap pages. foreigndom would
2546  *          be DOMID_XEN in such a case.
2547  *        etc..
2548  *
2549  * Side Effect: the mfn for fgfn will be refcounted in lower level routines
2550  *              so it is not lost while mapped here. The refcnt is released
2551  *              via the XENMEM_remove_from_physmap path.
2552  *
2553  * Returns: 0 ==> success
2554  */
p2m_add_foreign(struct domain * tdom,unsigned long fgfn,unsigned long gpfn,domid_t foreigndom)2555 int p2m_add_foreign(struct domain *tdom, unsigned long fgfn,
2556                     unsigned long gpfn, domid_t foreigndom)
2557 {
2558     p2m_type_t p2mt, p2mt_prev;
2559     mfn_t prev_mfn, mfn;
2560     struct page_info *page;
2561     int rc;
2562     struct domain *fdom;
2563 
2564     ASSERT(tdom);
2565     if ( foreigndom == DOMID_SELF )
2566         return -EINVAL;
2567     /*
2568      * hvm fixme: until support is added to p2m teardown code to cleanup any
2569      * foreign entries, limit this to hardware domain only.
2570      */
2571     if ( !is_hardware_domain(tdom) )
2572         return -EPERM;
2573 
2574     if ( foreigndom == DOMID_XEN )
2575         fdom = rcu_lock_domain(dom_xen);
2576     else
2577         fdom = rcu_lock_domain_by_id(foreigndom);
2578     if ( fdom == NULL )
2579         return -ESRCH;
2580 
2581     rc = -EINVAL;
2582     if ( tdom == fdom )
2583         goto out;
2584 
2585     rc = xsm_map_gmfn_foreign(XSM_TARGET, tdom, fdom);
2586     if ( rc )
2587         goto out;
2588 
2589     /*
2590      * Take a refcnt on the mfn. NB: following supported for foreign mapping:
2591      *     ram_rw | ram_logdirty | ram_ro | paging_out.
2592      */
2593     page = get_page_from_gfn(fdom, fgfn, &p2mt, P2M_ALLOC);
2594     if ( !page ||
2595          !p2m_is_ram(p2mt) || p2m_is_shared(p2mt) || p2m_is_hole(p2mt) )
2596     {
2597         if ( page )
2598             put_page(page);
2599         rc = -EINVAL;
2600         goto out;
2601     }
2602     mfn = page_to_mfn(page);
2603 
2604     /* Remove previously mapped page if it is present. */
2605     prev_mfn = get_gfn(tdom, gpfn, &p2mt_prev);
2606     if ( mfn_valid(prev_mfn) )
2607     {
2608         if ( is_special_page(mfn_to_page(prev_mfn)) )
2609             /* Special pages are simply unhooked from this phys slot */
2610             rc = guest_physmap_remove_page(tdom, _gfn(gpfn), prev_mfn, 0);
2611         else
2612             /* Normal domain memory is freed, to avoid leaking memory. */
2613             rc = guest_remove_page(tdom, gpfn);
2614         if ( rc )
2615             goto put_both;
2616     }
2617     /*
2618      * Create the new mapping. Can't use guest_physmap_add_page() because it
2619      * will update the m2p table which will result in  mfn -> gpfn of dom0
2620      * and not fgfn of domU.
2621      */
2622     rc = set_foreign_p2m_entry(tdom, gpfn, mfn);
2623     if ( rc )
2624         gdprintk(XENLOG_WARNING, "set_foreign_p2m_entry failed. "
2625                  "gpfn:%lx mfn:%lx fgfn:%lx td:%d fd:%d\n",
2626                  gpfn, mfn_x(mfn), fgfn, tdom->domain_id, fdom->domain_id);
2627 
2628  put_both:
2629     put_page(page);
2630 
2631     /*
2632      * This put_gfn for the above get_gfn for prev_mfn.  We must do this
2633      * after set_foreign_p2m_entry so another cpu doesn't populate the gpfn
2634      * before us.
2635      */
2636     put_gfn(tdom, gpfn);
2637 
2638 out:
2639     if ( fdom )
2640         rcu_unlock_domain(fdom);
2641     return rc;
2642 }
2643 
2644 #ifdef CONFIG_HVM
2645 /*
2646  * Set/clear the #VE suppress bit for a page.  Only available on VMX.
2647  */
p2m_set_suppress_ve(struct domain * d,gfn_t gfn,bool suppress_ve,unsigned int altp2m_idx)2648 int p2m_set_suppress_ve(struct domain *d, gfn_t gfn, bool suppress_ve,
2649                         unsigned int altp2m_idx)
2650 {
2651     int rc;
2652     struct xen_hvm_altp2m_suppress_ve_multi sve = {
2653         altp2m_idx, suppress_ve, 0, 0, gfn_x(gfn), gfn_x(gfn), 0
2654     };
2655 
2656     if ( !(rc = p2m_set_suppress_ve_multi(d, &sve)) )
2657         rc = sve.first_error;
2658 
2659     return rc;
2660 }
2661 
2662 /*
2663  * Set/clear the #VE suppress bit for multiple pages.  Only available on VMX.
2664  */
p2m_set_suppress_ve_multi(struct domain * d,struct xen_hvm_altp2m_suppress_ve_multi * sve)2665 int p2m_set_suppress_ve_multi(struct domain *d,
2666                               struct xen_hvm_altp2m_suppress_ve_multi *sve)
2667 {
2668     struct p2m_domain *host_p2m = p2m_get_hostp2m(d);
2669     struct p2m_domain *ap2m = NULL;
2670     struct p2m_domain *p2m = host_p2m;
2671     uint64_t start = sve->first_gfn;
2672     int rc = 0;
2673 
2674     if ( sve->view > 0 )
2675     {
2676         if ( sve->view >= min(ARRAY_SIZE(d->arch.altp2m_p2m), MAX_EPTP) ||
2677              d->arch.altp2m_eptp[array_index_nospec(sve->view, MAX_EPTP)] ==
2678              mfn_x(INVALID_MFN) )
2679             return -EINVAL;
2680 
2681         p2m = ap2m = array_access_nospec(d->arch.altp2m_p2m, sve->view);
2682     }
2683 
2684     p2m_lock(host_p2m);
2685 
2686     if ( ap2m )
2687         p2m_lock(ap2m);
2688 
2689     while ( sve->last_gfn >= start )
2690     {
2691         p2m_access_t a;
2692         p2m_type_t t;
2693         mfn_t mfn;
2694         int err = 0;
2695 
2696         if ( (err = altp2m_get_effective_entry(p2m, _gfn(start), &mfn, &t, &a,
2697                                                AP2MGET_query)) &&
2698              !sve->first_error )
2699         {
2700             sve->first_error_gfn = start; /* Save the gfn of the first error */
2701             sve->first_error = err; /* Save the first error code */
2702         }
2703 
2704         if ( !err && (err = p2m->set_entry(p2m, _gfn(start), mfn,
2705                                            PAGE_ORDER_4K, t, a,
2706                                            sve->suppress_ve)) &&
2707              !sve->first_error )
2708         {
2709             sve->first_error_gfn = start; /* Save the gfn of the first error */
2710             sve->first_error = err; /* Save the first error code */
2711         }
2712 
2713         /* Check for continuation if it's not the last iteration. */
2714         if ( sve->last_gfn >= ++start && hypercall_preempt_check() )
2715         {
2716             rc = -ERESTART;
2717             break;
2718         }
2719     }
2720 
2721     sve->first_gfn = start;
2722 
2723     if ( ap2m )
2724         p2m_unlock(ap2m);
2725 
2726     p2m_unlock(host_p2m);
2727 
2728     return rc;
2729 }
2730 
p2m_get_suppress_ve(struct domain * d,gfn_t gfn,bool * suppress_ve,unsigned int altp2m_idx)2731 int p2m_get_suppress_ve(struct domain *d, gfn_t gfn, bool *suppress_ve,
2732                         unsigned int altp2m_idx)
2733 {
2734     struct p2m_domain *host_p2m = p2m_get_hostp2m(d);
2735     struct p2m_domain *ap2m = NULL;
2736     struct p2m_domain *p2m;
2737     mfn_t mfn;
2738     p2m_access_t a;
2739     p2m_type_t t;
2740     int rc = 0;
2741 
2742     if ( altp2m_idx > 0 )
2743     {
2744         if ( altp2m_idx >= min(ARRAY_SIZE(d->arch.altp2m_p2m), MAX_EPTP) ||
2745              d->arch.altp2m_eptp[array_index_nospec(altp2m_idx, MAX_EPTP)] ==
2746              mfn_x(INVALID_MFN) )
2747             return -EINVAL;
2748 
2749         p2m = ap2m = array_access_nospec(d->arch.altp2m_p2m, altp2m_idx);
2750     }
2751     else
2752         p2m = host_p2m;
2753 
2754     gfn_lock(host_p2m, gfn, 0);
2755 
2756     if ( ap2m )
2757         p2m_lock(ap2m);
2758 
2759     mfn = p2m->get_entry(p2m, gfn, &t, &a, 0, NULL, suppress_ve);
2760     if ( !mfn_valid(mfn) )
2761         rc = -ESRCH;
2762 
2763     if ( ap2m )
2764         p2m_unlock(ap2m);
2765 
2766     gfn_unlock(host_p2m, gfn, 0);
2767 
2768     return rc;
2769 }
2770 
p2m_set_altp2m_view_visibility(struct domain * d,unsigned int altp2m_idx,uint8_t visible)2771 int p2m_set_altp2m_view_visibility(struct domain *d, unsigned int altp2m_idx,
2772                                    uint8_t visible)
2773 {
2774     int rc = 0;
2775 
2776     altp2m_list_lock(d);
2777 
2778     /*
2779      * Eptp index is correlated with altp2m index and should not exceed
2780      * min(MAX_ALTP2M, MAX_EPTP).
2781      */
2782     if ( altp2m_idx >= min(ARRAY_SIZE(d->arch.altp2m_p2m), MAX_EPTP) ||
2783          d->arch.altp2m_eptp[array_index_nospec(altp2m_idx, MAX_EPTP)] ==
2784          mfn_x(INVALID_MFN) )
2785         rc = -EINVAL;
2786     else if ( visible )
2787         d->arch.altp2m_visible_eptp[array_index_nospec(altp2m_idx, MAX_EPTP)] =
2788             d->arch.altp2m_eptp[array_index_nospec(altp2m_idx, MAX_EPTP)];
2789     else
2790         d->arch.altp2m_visible_eptp[array_index_nospec(altp2m_idx, MAX_EPTP)] =
2791             mfn_x(INVALID_MFN);
2792 
2793     altp2m_list_unlock(d);
2794 
2795     return rc;
2796 }
2797 #endif
2798 
2799 /*
2800  * Local variables:
2801  * mode: C
2802  * c-file-style: "BSD"
2803  * c-basic-offset: 4
2804  * indent-tabs-mode: nil
2805  * End:
2806  */
2807