1 /******************************************************************************
2 * arch/x86/mm/p2m.c
3 *
4 * physical-to-machine mappings for automatically-translated domains.
5 *
6 * Parts of this code are Copyright (c) 2009 by Citrix Systems, Inc. (Patrick Colp)
7 * Parts of this code are Copyright (c) 2007 by Advanced Micro Devices.
8 * Parts of this code are Copyright (c) 2006-2007 by XenSource Inc.
9 * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
10 * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; If not, see <http://www.gnu.org/licenses/>.
24 */
25
26 #include <xen/iommu.h>
27 #include <xen/mem_access.h>
28 #include <xen/vm_event.h>
29 #include <xen/event.h>
30 #include <xen/param.h>
31 #include <public/vm_event.h>
32 #include <asm/domain.h>
33 #include <asm/page.h>
34 #include <asm/paging.h>
35 #include <asm/p2m.h>
36 #include <asm/hvm/vmx/vmx.h> /* ept_p2m_init() */
37 #include <asm/mem_sharing.h>
38 #include <asm/hvm/nestedhvm.h>
39 #include <asm/altp2m.h>
40 #include <asm/vm_event.h>
41 #include <xsm/xsm.h>
42
43 #include "mm-locks.h"
44
45 /* Turn on/off host superpage page table support for hap, default on. */
46 bool_t __initdata opt_hap_1gb = 1, __initdata opt_hap_2mb = 1;
47 boolean_param("hap_1gb", opt_hap_1gb);
48 boolean_param("hap_2mb", opt_hap_2mb);
49
50 DEFINE_PERCPU_RWLOCK_GLOBAL(p2m_percpu_rwlock);
51
p2m_nestedp2m_init(struct p2m_domain * p2m)52 static void p2m_nestedp2m_init(struct p2m_domain *p2m)
53 {
54 #ifdef CONFIG_HVM
55 INIT_LIST_HEAD(&p2m->np2m_list);
56
57 p2m->np2m_base = P2M_BASE_EADDR;
58 p2m->np2m_generation = 0;
59 #endif
60 }
61
p2m_init_logdirty(struct p2m_domain * p2m)62 static int p2m_init_logdirty(struct p2m_domain *p2m)
63 {
64 if ( p2m->logdirty_ranges )
65 return 0;
66
67 p2m->logdirty_ranges = rangeset_new(p2m->domain, "log-dirty",
68 RANGESETF_prettyprint_hex);
69 if ( !p2m->logdirty_ranges )
70 return -ENOMEM;
71
72 return 0;
73 }
74
p2m_free_logdirty(struct p2m_domain * p2m)75 static void p2m_free_logdirty(struct p2m_domain *p2m)
76 {
77 if ( !p2m->logdirty_ranges )
78 return;
79
80 rangeset_destroy(p2m->logdirty_ranges);
81 p2m->logdirty_ranges = NULL;
82 }
83
84 /* Init the datastructures for later use by the p2m code */
p2m_initialise(struct domain * d,struct p2m_domain * p2m)85 static int p2m_initialise(struct domain *d, struct p2m_domain *p2m)
86 {
87 int ret = 0;
88
89 mm_rwlock_init(&p2m->lock);
90 INIT_PAGE_LIST_HEAD(&p2m->pages);
91
92 p2m->domain = d;
93 p2m->default_access = p2m_access_rwx;
94 p2m->p2m_class = p2m_host;
95
96 p2m_pod_init(p2m);
97 p2m_nestedp2m_init(p2m);
98
99 if ( hap_enabled(d) && cpu_has_vmx )
100 ret = ept_p2m_init(p2m);
101 else
102 p2m_pt_init(p2m);
103
104 spin_lock_init(&p2m->ioreq.lock);
105
106 return ret;
107 }
108
p2m_init_one(struct domain * d)109 static struct p2m_domain *p2m_init_one(struct domain *d)
110 {
111 struct p2m_domain *p2m = xzalloc(struct p2m_domain);
112
113 if ( !p2m )
114 return NULL;
115
116 if ( !zalloc_cpumask_var(&p2m->dirty_cpumask) )
117 goto free_p2m;
118
119 if ( p2m_initialise(d, p2m) )
120 goto free_cpumask;
121 return p2m;
122
123 free_cpumask:
124 free_cpumask_var(p2m->dirty_cpumask);
125 free_p2m:
126 xfree(p2m);
127 return NULL;
128 }
129
p2m_free_one(struct p2m_domain * p2m)130 static void p2m_free_one(struct p2m_domain *p2m)
131 {
132 p2m_free_logdirty(p2m);
133 if ( hap_enabled(p2m->domain) && cpu_has_vmx )
134 ept_p2m_uninit(p2m);
135 free_cpumask_var(p2m->dirty_cpumask);
136 xfree(p2m);
137 }
138
p2m_init_hostp2m(struct domain * d)139 static int p2m_init_hostp2m(struct domain *d)
140 {
141 struct p2m_domain *p2m = p2m_init_one(d);
142 int rc;
143
144 if ( !p2m )
145 return -ENOMEM;
146
147 rc = p2m_init_logdirty(p2m);
148
149 if ( !rc )
150 d->arch.p2m = p2m;
151 else
152 p2m_free_one(p2m);
153
154 return rc;
155 }
156
p2m_teardown_hostp2m(struct domain * d)157 static void p2m_teardown_hostp2m(struct domain *d)
158 {
159 /* Iterate over all p2m tables per domain */
160 struct p2m_domain *p2m = p2m_get_hostp2m(d);
161
162 if ( p2m )
163 {
164 p2m_free_one(p2m);
165 d->arch.p2m = NULL;
166 }
167 }
168
169 #ifdef CONFIG_HVM
p2m_teardown_nestedp2m(struct domain * d)170 static void p2m_teardown_nestedp2m(struct domain *d)
171 {
172 unsigned int i;
173 struct p2m_domain *p2m;
174
175 for ( i = 0; i < MAX_NESTEDP2M; i++ )
176 {
177 if ( !d->arch.nested_p2m[i] )
178 continue;
179 p2m = d->arch.nested_p2m[i];
180 list_del(&p2m->np2m_list);
181 p2m_free_one(p2m);
182 d->arch.nested_p2m[i] = NULL;
183 }
184 }
185
p2m_init_nestedp2m(struct domain * d)186 static int p2m_init_nestedp2m(struct domain *d)
187 {
188 unsigned int i;
189 struct p2m_domain *p2m;
190
191 mm_lock_init(&d->arch.nested_p2m_lock);
192 for ( i = 0; i < MAX_NESTEDP2M; i++ )
193 {
194 d->arch.nested_p2m[i] = p2m = p2m_init_one(d);
195 if ( p2m == NULL )
196 {
197 p2m_teardown_nestedp2m(d);
198 return -ENOMEM;
199 }
200 p2m->p2m_class = p2m_nested;
201 p2m->write_p2m_entry = nestedp2m_write_p2m_entry;
202 list_add(&p2m->np2m_list, &p2m_get_hostp2m(d)->np2m_list);
203 }
204
205 return 0;
206 }
207
p2m_teardown_altp2m(struct domain * d)208 static void p2m_teardown_altp2m(struct domain *d)
209 {
210 unsigned int i;
211 struct p2m_domain *p2m;
212
213 for ( i = 0; i < MAX_ALTP2M; i++ )
214 {
215 if ( !d->arch.altp2m_p2m[i] )
216 continue;
217 p2m = d->arch.altp2m_p2m[i];
218 d->arch.altp2m_p2m[i] = NULL;
219 p2m_free_one(p2m);
220 }
221 }
222
p2m_init_altp2m(struct domain * d)223 static int p2m_init_altp2m(struct domain *d)
224 {
225 unsigned int i;
226 struct p2m_domain *p2m;
227 struct p2m_domain *hostp2m = p2m_get_hostp2m(d);
228
229 mm_lock_init(&d->arch.altp2m_list_lock);
230 for ( i = 0; i < MAX_ALTP2M; i++ )
231 {
232 d->arch.altp2m_p2m[i] = p2m = p2m_init_one(d);
233 if ( p2m == NULL )
234 {
235 p2m_teardown_altp2m(d);
236 return -ENOMEM;
237 }
238 p2m->p2m_class = p2m_alternate;
239 p2m->access_required = hostp2m->access_required;
240 _atomic_set(&p2m->active_vcpus, 0);
241 }
242
243 return 0;
244 }
245 #endif
246
p2m_init(struct domain * d)247 int p2m_init(struct domain *d)
248 {
249 int rc;
250
251 rc = p2m_init_hostp2m(d);
252 if ( rc )
253 return rc;
254
255 #ifdef CONFIG_HVM
256 /* Must initialise nestedp2m unconditionally
257 * since nestedhvm_enabled(d) returns false here.
258 * (p2m_init runs too early for HVM_PARAM_* options) */
259 rc = p2m_init_nestedp2m(d);
260 if ( rc )
261 {
262 p2m_teardown_hostp2m(d);
263 return rc;
264 }
265
266 rc = p2m_init_altp2m(d);
267 if ( rc )
268 {
269 p2m_teardown_hostp2m(d);
270 p2m_teardown_nestedp2m(d);
271 }
272 #endif
273
274 return rc;
275 }
276
p2m_is_logdirty_range(struct p2m_domain * p2m,unsigned long start,unsigned long end)277 int p2m_is_logdirty_range(struct p2m_domain *p2m, unsigned long start,
278 unsigned long end)
279 {
280 if ( p2m->global_logdirty ||
281 rangeset_contains_range(p2m->logdirty_ranges, start, end) )
282 return 1;
283 if ( rangeset_overlaps_range(p2m->logdirty_ranges, start, end) )
284 return -1;
285 return 0;
286 }
287
change_entry_type_global(struct p2m_domain * p2m,p2m_type_t ot,p2m_type_t nt)288 static void change_entry_type_global(struct p2m_domain *p2m,
289 p2m_type_t ot, p2m_type_t nt)
290 {
291 p2m->change_entry_type_global(p2m, ot, nt);
292 /* Don't allow 'recalculate' operations to change the logdirty state. */
293 if ( ot != nt )
294 p2m->global_logdirty = (nt == p2m_ram_logdirty);
295 }
296
297 /*
298 * May be called with ot = nt = p2m_ram_rw for its side effect of
299 * recalculating all PTEs in the p2m.
300 */
p2m_change_entry_type_global(struct domain * d,p2m_type_t ot,p2m_type_t nt)301 void p2m_change_entry_type_global(struct domain *d,
302 p2m_type_t ot, p2m_type_t nt)
303 {
304 struct p2m_domain *hostp2m = p2m_get_hostp2m(d);
305
306 ASSERT(p2m_is_changeable(ot) && p2m_is_changeable(nt));
307
308 p2m_lock(hostp2m);
309
310 change_entry_type_global(hostp2m, ot, nt);
311
312 #ifdef CONFIG_HVM
313 if ( unlikely(altp2m_active(d)) )
314 {
315 unsigned int i;
316
317 for ( i = 0; i < MAX_ALTP2M; i++ )
318 if ( d->arch.altp2m_eptp[i] != mfn_x(INVALID_MFN) )
319 {
320 struct p2m_domain *altp2m = d->arch.altp2m_p2m[i];
321
322 p2m_lock(altp2m);
323 change_entry_type_global(altp2m, ot, nt);
324 p2m_unlock(altp2m);
325 }
326 }
327 #endif
328
329 p2m_unlock(hostp2m);
330 }
331
332 #ifdef CONFIG_HVM
333 /* There's already a memory_type_changed() in asm/mtrr.h. */
_memory_type_changed(struct p2m_domain * p2m)334 static void _memory_type_changed(struct p2m_domain *p2m)
335 {
336 if ( p2m->memory_type_changed )
337 p2m->memory_type_changed(p2m);
338 }
339
p2m_memory_type_changed(struct domain * d)340 void p2m_memory_type_changed(struct domain *d)
341 {
342 struct p2m_domain *hostp2m = p2m_get_hostp2m(d);
343
344 p2m_lock(hostp2m);
345
346 _memory_type_changed(hostp2m);
347
348 if ( unlikely(altp2m_active(d)) )
349 {
350 unsigned int i;
351
352 for ( i = 0; i < MAX_ALTP2M; i++ )
353 if ( d->arch.altp2m_eptp[i] != mfn_x(INVALID_MFN) )
354 {
355 struct p2m_domain *altp2m = d->arch.altp2m_p2m[i];
356
357 p2m_lock(altp2m);
358 _memory_type_changed(altp2m);
359 p2m_unlock(altp2m);
360 }
361 }
362
363 p2m_unlock(hostp2m);
364 }
365 #endif
366
p2m_set_ioreq_server(struct domain * d,unsigned int flags,struct hvm_ioreq_server * s)367 int p2m_set_ioreq_server(struct domain *d,
368 unsigned int flags,
369 struct hvm_ioreq_server *s)
370 {
371 struct p2m_domain *p2m = p2m_get_hostp2m(d);
372 int rc;
373
374 /*
375 * Use lock to prevent concurrent setting attempts
376 * from multiple ioreq servers.
377 */
378 spin_lock(&p2m->ioreq.lock);
379
380 /* Unmap ioreq server from p2m type by passing flags with 0. */
381 if ( flags == 0 )
382 {
383 rc = -EINVAL;
384 if ( p2m->ioreq.server != s )
385 goto out;
386
387 p2m->ioreq.server = NULL;
388 p2m->ioreq.flags = 0;
389 }
390 else
391 {
392 rc = -EBUSY;
393 if ( p2m->ioreq.server != NULL )
394 goto out;
395
396 /*
397 * It is possible that an ioreq server has just been unmapped,
398 * released the spin lock, with some p2m_ioreq_server entries
399 * in p2m table remained. We shall refuse another ioreq server
400 * mapping request in such case.
401 */
402 if ( read_atomic(&p2m->ioreq.entry_count) )
403 goto out;
404
405 p2m->ioreq.server = s;
406 p2m->ioreq.flags = flags;
407 }
408
409 rc = 0;
410
411 out:
412 spin_unlock(&p2m->ioreq.lock);
413
414 return rc;
415 }
416
p2m_get_ioreq_server(struct domain * d,unsigned int * flags)417 struct hvm_ioreq_server *p2m_get_ioreq_server(struct domain *d,
418 unsigned int *flags)
419 {
420 struct p2m_domain *p2m = p2m_get_hostp2m(d);
421 struct hvm_ioreq_server *s;
422
423 spin_lock(&p2m->ioreq.lock);
424
425 s = p2m->ioreq.server;
426 *flags = p2m->ioreq.flags;
427
428 spin_unlock(&p2m->ioreq.lock);
429 return s;
430 }
431
p2m_enable_hardware_log_dirty(struct domain * d)432 void p2m_enable_hardware_log_dirty(struct domain *d)
433 {
434 struct p2m_domain *p2m = p2m_get_hostp2m(d);
435
436 if ( p2m->enable_hardware_log_dirty )
437 p2m->enable_hardware_log_dirty(p2m);
438 }
439
p2m_disable_hardware_log_dirty(struct domain * d)440 void p2m_disable_hardware_log_dirty(struct domain *d)
441 {
442 struct p2m_domain *p2m = p2m_get_hostp2m(d);
443
444 if ( p2m->disable_hardware_log_dirty )
445 p2m->disable_hardware_log_dirty(p2m);
446 }
447
p2m_flush_hardware_cached_dirty(struct domain * d)448 void p2m_flush_hardware_cached_dirty(struct domain *d)
449 {
450 struct p2m_domain *p2m = p2m_get_hostp2m(d);
451
452 if ( p2m->flush_hardware_cached_dirty )
453 {
454 p2m_lock(p2m);
455 p2m->flush_hardware_cached_dirty(p2m);
456 p2m_unlock(p2m);
457 }
458 }
459
460 /*
461 * Force a synchronous P2M TLB flush if a deferred flush is pending.
462 *
463 * Must be called with the p2m lock held.
464 */
p2m_tlb_flush_sync(struct p2m_domain * p2m)465 void p2m_tlb_flush_sync(struct p2m_domain *p2m)
466 {
467 if ( p2m->need_flush ) {
468 p2m->need_flush = 0;
469 p2m->tlb_flush(p2m);
470 }
471 }
472
473 /*
474 * Unlock the p2m lock and do a P2M TLB flush if needed.
475 */
p2m_unlock_and_tlb_flush(struct p2m_domain * p2m)476 void p2m_unlock_and_tlb_flush(struct p2m_domain *p2m)
477 {
478 if ( p2m->need_flush ) {
479 p2m->need_flush = 0;
480 mm_write_unlock(&p2m->lock);
481 p2m->tlb_flush(p2m);
482 } else
483 mm_write_unlock(&p2m->lock);
484 }
485
__get_gfn_type_access(struct p2m_domain * p2m,unsigned long gfn_l,p2m_type_t * t,p2m_access_t * a,p2m_query_t q,unsigned int * page_order,bool_t locked)486 mfn_t __get_gfn_type_access(struct p2m_domain *p2m, unsigned long gfn_l,
487 p2m_type_t *t, p2m_access_t *a, p2m_query_t q,
488 unsigned int *page_order, bool_t locked)
489 {
490 mfn_t mfn;
491 gfn_t gfn = _gfn(gfn_l);
492
493 /* Unshare makes no sense withuot populate. */
494 if ( q & P2M_UNSHARE )
495 q |= P2M_ALLOC;
496
497 if ( !p2m || !paging_mode_translate(p2m->domain) )
498 {
499 /* Not necessarily true, but for non-translated guests, we claim
500 * it's the most generic kind of memory */
501 *t = p2m_ram_rw;
502 return _mfn(gfn_l);
503 }
504
505 if ( locked )
506 /* Grab the lock here, don't release until put_gfn */
507 gfn_lock(p2m, gfn, 0);
508
509 mfn = p2m->get_entry(p2m, gfn, t, a, q, page_order, NULL);
510
511 /* Check if we need to fork the page */
512 if ( (q & P2M_ALLOC) && p2m_is_hole(*t) &&
513 !mem_sharing_fork_page(p2m->domain, gfn, q & P2M_UNSHARE) )
514 mfn = p2m->get_entry(p2m, gfn, t, a, q, page_order, NULL);
515
516 /* Check if we need to unshare the page */
517 if ( (q & P2M_UNSHARE) && p2m_is_shared(*t) )
518 {
519 ASSERT(p2m_is_hostp2m(p2m));
520 /*
521 * Try to unshare. If we fail, communicate ENOMEM without
522 * sleeping.
523 */
524 if ( mem_sharing_unshare_page(p2m->domain, gfn_l) < 0 )
525 mem_sharing_notify_enomem(p2m->domain, gfn_l, false);
526 mfn = p2m->get_entry(p2m, gfn, t, a, q, page_order, NULL);
527 }
528
529 if (unlikely((p2m_is_broken(*t))))
530 {
531 /* Return invalid_mfn to avoid caller's access */
532 mfn = INVALID_MFN;
533 if ( q & P2M_ALLOC )
534 domain_crash(p2m->domain);
535 }
536
537 return mfn;
538 }
539
__put_gfn(struct p2m_domain * p2m,unsigned long gfn)540 void __put_gfn(struct p2m_domain *p2m, unsigned long gfn)
541 {
542 if ( !p2m || !paging_mode_translate(p2m->domain) )
543 /* Nothing to do in this case */
544 return;
545
546 ASSERT(gfn_locked_by_me(p2m, gfn));
547
548 gfn_unlock(p2m, gfn, 0);
549 }
550
551 /* Atomically look up a GFN and take a reference count on the backing page. */
p2m_get_page_from_gfn(struct p2m_domain * p2m,gfn_t gfn,p2m_type_t * t,p2m_access_t * a,p2m_query_t q)552 struct page_info *p2m_get_page_from_gfn(
553 struct p2m_domain *p2m, gfn_t gfn,
554 p2m_type_t *t, p2m_access_t *a, p2m_query_t q)
555 {
556 struct page_info *page = NULL;
557 p2m_access_t _a;
558 p2m_type_t _t;
559 mfn_t mfn;
560
561 /* Allow t or a to be NULL */
562 t = t ?: &_t;
563 a = a ?: &_a;
564
565 if ( likely(!p2m_locked_by_me(p2m)) )
566 {
567 /* Fast path: look up and get out */
568 p2m_read_lock(p2m);
569 mfn = __get_gfn_type_access(p2m, gfn_x(gfn), t, a, 0, NULL, 0);
570 if ( p2m_is_any_ram(*t) && mfn_valid(mfn)
571 && !((q & P2M_UNSHARE) && p2m_is_shared(*t)) )
572 {
573 page = mfn_to_page(mfn);
574 if ( unlikely(p2m_is_foreign(*t)) )
575 {
576 struct domain *fdom = page_get_owner_and_reference(page);
577
578 ASSERT(fdom != p2m->domain);
579 if ( fdom == NULL )
580 page = NULL;
581 }
582 else
583 {
584 struct domain *d = !p2m_is_shared(*t) ? p2m->domain : dom_cow;
585
586 if ( !get_page(page, d) )
587 page = NULL;
588 }
589 }
590 p2m_read_unlock(p2m);
591
592 if ( page )
593 return page;
594
595 /* Error path: not a suitable GFN at all */
596 if ( !p2m_is_ram(*t) && !p2m_is_paging(*t) && !p2m_is_pod(*t) &&
597 !mem_sharing_is_fork(p2m->domain) )
598 return NULL;
599 }
600
601 /* Slow path: take the write lock and do fixups */
602 mfn = get_gfn_type_access(p2m, gfn_x(gfn), t, a, q, NULL);
603 if ( p2m_is_ram(*t) && mfn_valid(mfn) )
604 {
605 struct domain *d = !p2m_is_shared(*t) ? p2m->domain : dom_cow;
606
607 page = mfn_to_page(mfn);
608 if ( !get_page(page, d) )
609 page = NULL;
610 }
611 put_gfn(p2m->domain, gfn_x(gfn));
612
613 return page;
614 }
615
616 /* Returns: 0 for success, -errno for failure */
p2m_set_entry(struct p2m_domain * p2m,gfn_t gfn,mfn_t mfn,unsigned int page_order,p2m_type_t p2mt,p2m_access_t p2ma)617 int p2m_set_entry(struct p2m_domain *p2m, gfn_t gfn, mfn_t mfn,
618 unsigned int page_order, p2m_type_t p2mt, p2m_access_t p2ma)
619 {
620 struct domain *d = p2m->domain;
621 unsigned long todo = 1ul << page_order;
622 unsigned int order;
623 int set_rc, rc = 0;
624
625 ASSERT(gfn_locked_by_me(p2m, gfn));
626
627 while ( todo )
628 {
629 if ( hap_enabled(d) )
630 {
631 unsigned long fn_mask = !mfn_eq(mfn, INVALID_MFN) ? mfn_x(mfn) : 0;
632
633 fn_mask |= gfn_x(gfn) | todo;
634
635 order = (!(fn_mask & ((1ul << PAGE_ORDER_1G) - 1)) &&
636 hap_has_1gb) ? PAGE_ORDER_1G :
637 (!(fn_mask & ((1ul << PAGE_ORDER_2M) - 1)) &&
638 hap_has_2mb) ? PAGE_ORDER_2M : PAGE_ORDER_4K;
639 }
640 else
641 order = 0;
642
643 set_rc = p2m->set_entry(p2m, gfn, mfn, order, p2mt, p2ma, -1);
644 if ( set_rc )
645 rc = set_rc;
646
647 gfn = gfn_add(gfn, 1ul << order);
648 if ( !mfn_eq(mfn, INVALID_MFN) )
649 mfn = mfn_add(mfn, 1ul << order);
650 todo -= 1ul << order;
651 }
652
653 return rc;
654 }
655
p2m_alloc_ptp(struct p2m_domain * p2m,unsigned int level)656 mfn_t p2m_alloc_ptp(struct p2m_domain *p2m, unsigned int level)
657 {
658 struct page_info *pg;
659
660 ASSERT(p2m);
661 ASSERT(p2m->domain);
662 ASSERT(p2m->domain->arch.paging.alloc_page);
663 pg = p2m->domain->arch.paging.alloc_page(p2m->domain);
664 if ( !pg )
665 return INVALID_MFN;
666
667 page_list_add_tail(pg, &p2m->pages);
668 BUILD_BUG_ON(PGT_l1_page_table * 2 != PGT_l2_page_table);
669 BUILD_BUG_ON(PGT_l1_page_table * 3 != PGT_l3_page_table);
670 BUILD_BUG_ON(PGT_l1_page_table * 4 != PGT_l4_page_table);
671 pg->u.inuse.type_info = (PGT_l1_page_table * level) | 1 | PGT_validated;
672
673 return page_to_mfn(pg);
674 }
675
p2m_free_ptp(struct p2m_domain * p2m,struct page_info * pg)676 void p2m_free_ptp(struct p2m_domain *p2m, struct page_info *pg)
677 {
678 ASSERT(pg);
679 ASSERT(p2m);
680 ASSERT(p2m->domain);
681 ASSERT(p2m->domain->arch.paging.free_page);
682
683 page_list_del(pg, &p2m->pages);
684 p2m->domain->arch.paging.free_page(p2m->domain, pg);
685
686 return;
687 }
688
689 /*
690 * Allocate a new p2m table for a domain.
691 *
692 * The structure of the p2m table is that of a pagetable for xen (i.e. it is
693 * controlled by CONFIG_PAGING_LEVELS).
694 *
695 * Returns 0 for success, -errno for failure.
696 */
p2m_alloc_table(struct p2m_domain * p2m)697 int p2m_alloc_table(struct p2m_domain *p2m)
698 {
699 mfn_t top_mfn;
700 struct domain *d = p2m->domain;
701
702 p2m_lock(p2m);
703
704 if ( p2m_is_hostp2m(p2m) && domain_tot_pages(d) )
705 {
706 P2M_ERROR("dom %d already has memory allocated\n", d->domain_id);
707 p2m_unlock(p2m);
708 return -EINVAL;
709 }
710
711 if ( pagetable_get_pfn(p2m_get_pagetable(p2m)) != 0 )
712 {
713 P2M_ERROR("p2m already allocated for this domain\n");
714 p2m_unlock(p2m);
715 return -EINVAL;
716 }
717
718 P2M_PRINTK("allocating p2m table\n");
719
720 top_mfn = p2m_alloc_ptp(p2m, 4);
721 if ( mfn_eq(top_mfn, INVALID_MFN) )
722 {
723 p2m_unlock(p2m);
724 return -ENOMEM;
725 }
726
727 p2m->phys_table = pagetable_from_mfn(top_mfn);
728
729 if ( hap_enabled(d) )
730 iommu_share_p2m_table(d);
731
732 p2m_unlock(p2m);
733 return 0;
734 }
735
736 /*
737 * hvm fixme: when adding support for pvh non-hardware domains, this path must
738 * cleanup any foreign p2m types (release refcnts on them).
739 */
p2m_teardown(struct p2m_domain * p2m)740 void p2m_teardown(struct p2m_domain *p2m)
741 /* Return all the p2m pages to Xen.
742 * We know we don't have any extra mappings to these pages */
743 {
744 struct page_info *pg;
745 struct domain *d;
746
747 if (p2m == NULL)
748 return;
749
750 d = p2m->domain;
751
752 p2m_lock(p2m);
753 ASSERT(atomic_read(&d->shr_pages) == 0);
754 p2m->phys_table = pagetable_null();
755
756 while ( (pg = page_list_remove_head(&p2m->pages)) )
757 d->arch.paging.free_page(d, pg);
758 p2m_unlock(p2m);
759 }
760
p2m_final_teardown(struct domain * d)761 void p2m_final_teardown(struct domain *d)
762 {
763 #ifdef CONFIG_HVM
764 /*
765 * We must teardown both of them unconditionally because
766 * we initialise them unconditionally.
767 */
768 p2m_teardown_altp2m(d);
769 p2m_teardown_nestedp2m(d);
770 #endif
771
772 /* Iterate over all p2m tables per domain */
773 p2m_teardown_hostp2m(d);
774 }
775
776 static int __must_check
p2m_remove_page(struct p2m_domain * p2m,gfn_t gfn,mfn_t mfn,unsigned int page_order)777 p2m_remove_page(struct p2m_domain *p2m, gfn_t gfn, mfn_t mfn,
778 unsigned int page_order)
779 {
780 unsigned long i;
781 p2m_type_t t;
782 p2m_access_t a;
783
784 /* IOMMU for PV guests is handled in get_page_type() and put_page(). */
785 if ( !paging_mode_translate(p2m->domain) )
786 return 0;
787
788 ASSERT(gfn_locked_by_me(p2m, gfn));
789 P2M_DEBUG("removing gfn=%#lx mfn=%#lx\n", gfn_x(gfn), mfn_x(mfn));
790
791 for ( i = 0; i < (1UL << page_order); )
792 {
793 unsigned int cur_order;
794 mfn_t mfn_return = p2m->get_entry(p2m, gfn_add(gfn, i), &t, &a, 0,
795 &cur_order, NULL);
796
797 if ( p2m_is_valid(t) &&
798 (!mfn_valid(mfn) || !mfn_eq(mfn_add(mfn, i), mfn_return)) )
799 return -EILSEQ;
800
801 i += (1UL << cur_order) -
802 ((gfn_x(gfn) + i) & ((1UL << cur_order) - 1));
803 }
804
805 if ( mfn_valid(mfn) )
806 {
807 for ( i = 0; i < (1UL << page_order); i++ )
808 {
809 p2m->get_entry(p2m, gfn_add(gfn, i), &t, &a, 0, NULL, NULL);
810 if ( !p2m_is_grant(t) && !p2m_is_shared(t) && !p2m_is_foreign(t) )
811 set_gpfn_from_mfn(mfn_x(mfn) + i, INVALID_M2P_ENTRY);
812 }
813 }
814
815 return p2m_set_entry(p2m, gfn, INVALID_MFN, page_order, p2m_invalid,
816 p2m->default_access);
817 }
818
819 int
guest_physmap_remove_page(struct domain * d,gfn_t gfn,mfn_t mfn,unsigned int page_order)820 guest_physmap_remove_page(struct domain *d, gfn_t gfn,
821 mfn_t mfn, unsigned int page_order)
822 {
823 struct p2m_domain *p2m = p2m_get_hostp2m(d);
824 int rc;
825
826 gfn_lock(p2m, gfn, page_order);
827 rc = p2m_remove_page(p2m, gfn, mfn, page_order);
828 gfn_unlock(p2m, gfn, page_order);
829
830 return rc;
831 }
832
833 int
guest_physmap_add_page(struct domain * d,gfn_t gfn,mfn_t mfn,unsigned int page_order)834 guest_physmap_add_page(struct domain *d, gfn_t gfn, mfn_t mfn,
835 unsigned int page_order)
836 {
837 /* IOMMU for PV guests is handled in get_page_type() and put_page(). */
838 if ( !paging_mode_translate(d) )
839 {
840 struct page_info *page = mfn_to_page(mfn);
841 unsigned long i;
842
843 /*
844 * Our interface for PV guests wrt IOMMU entries hasn't been very
845 * clear; but historically, pages have started out with IOMMU mappings,
846 * and only lose them when changed to a different page type.
847 *
848 * Retain this property by grabbing a writable type ref and then
849 * dropping it immediately. The result will be pages that have a
850 * writable type (and an IOMMU entry), but a count of 0 (such that
851 * any guest-requested type changes succeed and remove the IOMMU
852 * entry).
853 */
854 for ( i = 0; i < (1UL << page_order); ++i, ++page )
855 {
856 if ( !need_iommu_pt_sync(d) )
857 /* nothing */;
858 else if ( get_page_and_type(page, d, PGT_writable_page) )
859 put_page_and_type(page);
860 else
861 return -EINVAL;
862
863 set_gpfn_from_mfn(mfn_x(mfn) + i, gfn_x(gfn) + i);
864 }
865
866 return 0;
867 }
868
869 return guest_physmap_add_entry(d, gfn, mfn, page_order, p2m_ram_rw);
870 }
871
872 #ifdef CONFIG_HVM
873 int
guest_physmap_add_entry(struct domain * d,gfn_t gfn,mfn_t mfn,unsigned int page_order,p2m_type_t t)874 guest_physmap_add_entry(struct domain *d, gfn_t gfn, mfn_t mfn,
875 unsigned int page_order, p2m_type_t t)
876 {
877 struct p2m_domain *p2m = p2m_get_hostp2m(d);
878 unsigned long i;
879 gfn_t ogfn;
880 p2m_type_t ot;
881 p2m_access_t a;
882 mfn_t omfn;
883 int pod_count = 0;
884 int rc = 0;
885
886 if ( !paging_mode_translate(d) )
887 {
888 ASSERT_UNREACHABLE();
889 return -EPERM;
890 }
891
892 /* foreign pages are added thru p2m_add_foreign */
893 if ( p2m_is_foreign(t) )
894 return -EINVAL;
895
896 if ( !mfn_valid(mfn) )
897 {
898 ASSERT_UNREACHABLE();
899 return -EINVAL;
900 }
901
902 p2m_lock(p2m);
903
904 P2M_DEBUG("adding gfn=%#lx mfn=%#lx\n", gfn_x(gfn), mfn_x(mfn));
905
906 /* First, remove m->p mappings for existing p->m mappings */
907 for ( i = 0; i < (1UL << page_order); i++ )
908 {
909 omfn = p2m->get_entry(p2m, gfn_add(gfn, i), &ot,
910 &a, 0, NULL, NULL);
911 if ( p2m_is_shared(ot) )
912 {
913 /* Do an unshare to cleanly take care of all corner cases. */
914 rc = mem_sharing_unshare_page(d, gfn_x(gfn) + i);
915 if ( rc )
916 {
917 p2m_unlock(p2m);
918 /*
919 * NOTE: Should a guest domain bring this upon itself,
920 * there is not a whole lot we can do. We are buried
921 * deep in locks from most code paths by now. So, fail
922 * the call and don't try to sleep on a wait queue
923 * while placing the mem event.
924 *
925 * However, all current (changeset 3432abcf9380) code
926 * paths avoid this unsavoury situation. For now.
927 *
928 * Foreign domains are okay to place an event as they
929 * won't go to sleep.
930 */
931 mem_sharing_notify_enomem(d, gfn_x(gfn) + i, false);
932 return rc;
933 }
934 omfn = p2m->get_entry(p2m, gfn_add(gfn, i),
935 &ot, &a, 0, NULL, NULL);
936 ASSERT(!p2m_is_shared(ot));
937 }
938 if ( p2m_is_grant(ot) || p2m_is_foreign(ot) )
939 {
940 /* Really shouldn't be unmapping grant/foreign maps this way */
941 domain_crash(d);
942 p2m_unlock(p2m);
943
944 return -EINVAL;
945 }
946 else if ( p2m_is_ram(ot) && !p2m_is_paged(ot) )
947 {
948 ASSERT(mfn_valid(omfn));
949 set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
950 }
951 else if ( ot == p2m_populate_on_demand )
952 {
953 /* Count how man PoD entries we'll be replacing if successful */
954 pod_count++;
955 }
956 else if ( p2m_is_paging(ot) && (ot != p2m_ram_paging_out) )
957 {
958 /* We're plugging a hole in the physmap where a paged out page was */
959 atomic_dec(&d->paged_pages);
960 }
961 }
962
963 /* Then, look for m->p mappings for this range and deal with them */
964 for ( i = 0; i < (1UL << page_order); i++ )
965 {
966 if ( dom_cow &&
967 page_get_owner(mfn_to_page(mfn_add(mfn, i))) == dom_cow )
968 {
969 /* This is no way to add a shared page to your physmap! */
970 gdprintk(XENLOG_ERR, "Adding shared mfn %lx directly to dom%d physmap not allowed.\n",
971 mfn_x(mfn_add(mfn, i)), d->domain_id);
972 p2m_unlock(p2m);
973 return -EINVAL;
974 }
975 if ( page_get_owner(mfn_to_page(mfn_add(mfn, i))) != d )
976 continue;
977 ogfn = mfn_to_gfn(d, mfn_add(mfn, i));
978 if ( !gfn_eq(ogfn, _gfn(INVALID_M2P_ENTRY)) &&
979 !gfn_eq(ogfn, gfn_add(gfn, i)) )
980 {
981 /* This machine frame is already mapped at another physical
982 * address */
983 P2M_DEBUG("aliased! mfn=%#lx, old gfn=%#lx, new gfn=%#lx\n",
984 mfn_x(mfn_add(mfn, i)), gfn_x(ogfn),
985 gfn_x(gfn_add(gfn, i)));
986 omfn = p2m->get_entry(p2m, ogfn, &ot, &a, 0, NULL, NULL);
987 if ( p2m_is_ram(ot) && !p2m_is_paged(ot) )
988 {
989 ASSERT(mfn_valid(omfn));
990 P2M_DEBUG("old gfn=%#lx -> mfn %#lx\n",
991 gfn_x(ogfn) , mfn_x(omfn));
992 if ( mfn_eq(omfn, mfn_add(mfn, i)) &&
993 (rc = p2m_remove_page(p2m, ogfn, omfn, 0)) )
994 goto out;
995 }
996 }
997 }
998
999 /* Now, actually do the two-way mapping */
1000 rc = p2m_set_entry(p2m, gfn, mfn, page_order, t, p2m->default_access);
1001 if ( rc == 0 )
1002 {
1003 pod_lock(p2m);
1004 p2m->pod.entry_count -= pod_count;
1005 BUG_ON(p2m->pod.entry_count < 0);
1006 pod_unlock(p2m);
1007
1008 if ( !p2m_is_grant(t) )
1009 {
1010 for ( i = 0; i < (1UL << page_order); i++ )
1011 set_gpfn_from_mfn(mfn_x(mfn_add(mfn, i)),
1012 gfn_x(gfn_add(gfn, i)));
1013 }
1014 }
1015
1016 out:
1017 p2m_unlock(p2m);
1018
1019 return rc;
1020 }
1021 #endif
1022
1023 /*
1024 * Modify the p2m type of a single gfn from ot to nt.
1025 * Returns: 0 for success, -errno for failure.
1026 * Resets the access permissions.
1027 */
p2m_change_type_one(struct domain * d,unsigned long gfn_l,p2m_type_t ot,p2m_type_t nt)1028 int p2m_change_type_one(struct domain *d, unsigned long gfn_l,
1029 p2m_type_t ot, p2m_type_t nt)
1030 {
1031 p2m_access_t a;
1032 p2m_type_t pt;
1033 gfn_t gfn = _gfn(gfn_l);
1034 mfn_t mfn;
1035 struct p2m_domain *p2m = p2m_get_hostp2m(d);
1036 int rc;
1037
1038 BUG_ON(p2m_is_grant(ot) || p2m_is_grant(nt));
1039 BUG_ON(p2m_is_foreign(ot) || p2m_is_foreign(nt));
1040
1041 gfn_lock(p2m, gfn, 0);
1042
1043 mfn = p2m->get_entry(p2m, gfn, &pt, &a, 0, NULL, NULL);
1044 rc = likely(pt == ot)
1045 ? p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, nt,
1046 p2m->default_access)
1047 : -EBUSY;
1048
1049 gfn_unlock(p2m, gfn, 0);
1050
1051 return rc;
1052 }
1053
1054 /* Modify the p2m type of [start, end_exclusive) from ot to nt. */
change_type_range(struct p2m_domain * p2m,unsigned long start,unsigned long end_exclusive,p2m_type_t ot,p2m_type_t nt)1055 static void change_type_range(struct p2m_domain *p2m,
1056 unsigned long start, unsigned long end_exclusive,
1057 p2m_type_t ot, p2m_type_t nt)
1058 {
1059 unsigned long invalidate_start, invalidate_end;
1060 struct domain *d = p2m->domain;
1061 const unsigned long host_max_pfn = p2m_get_hostp2m(d)->max_mapped_pfn;
1062 unsigned long end = end_exclusive - 1;
1063 const unsigned long max_pfn = p2m->max_mapped_pfn;
1064 int rc = 0;
1065
1066 /*
1067 * If we have an altp2m, the logdirty rangeset range needs to
1068 * match that of the hostp2m, but for efficiency, we want to clip
1069 * down the the invalidation range according to the mapped values
1070 * in the altp2m. Keep track of and clip the ranges separately.
1071 */
1072 invalidate_start = start;
1073 invalidate_end = end;
1074
1075 /*
1076 * Clip down to the host p2m. This is probably not the right behavior.
1077 * This should be revisited later, but for now post a warning.
1078 */
1079 if ( unlikely(end > host_max_pfn) )
1080 {
1081 printk(XENLOG_G_WARNING "Dom%d logdirty rangeset clipped to max_mapped_pfn\n",
1082 d->domain_id);
1083 end = invalidate_end = host_max_pfn;
1084 }
1085
1086 /* If the requested range is out of scope, return doing nothing. */
1087 if ( start > end )
1088 return;
1089
1090 if ( p2m_is_altp2m(p2m) )
1091 invalidate_end = min(invalidate_end, max_pfn);
1092
1093 /*
1094 * If the p2m is empty, or the range is outside the currently
1095 * mapped range, no need to do the invalidation; just update the
1096 * rangeset.
1097 */
1098 if ( invalidate_start < invalidate_end )
1099 {
1100 /*
1101 * If all valid gfns are in the invalidation range, just do a
1102 * global type change. Otherwise, invalidate only the range
1103 * we need.
1104 *
1105 * NB that invalidate_end can't logically be >max_pfn at this
1106 * point. If this changes, the == will need to be changed to
1107 * >=.
1108 */
1109 ASSERT(invalidate_end <= max_pfn);
1110 if ( !invalidate_start && invalidate_end == max_pfn)
1111 p2m->change_entry_type_global(p2m, ot, nt);
1112 else
1113 rc = p2m->change_entry_type_range(p2m, ot, nt,
1114 invalidate_start, invalidate_end);
1115 if ( rc )
1116 {
1117 printk(XENLOG_G_ERR "Error %d changing Dom%d GFNs [%lx,%lx] from %d to %d\n",
1118 rc, d->domain_id, invalidate_start, invalidate_end, ot, nt);
1119 domain_crash(d);
1120 }
1121 }
1122
1123 switch ( nt )
1124 {
1125 case p2m_ram_rw:
1126 if ( ot == p2m_ram_logdirty )
1127 rc = rangeset_remove_range(p2m->logdirty_ranges, start, end);
1128 break;
1129 case p2m_ram_logdirty:
1130 if ( ot == p2m_ram_rw )
1131 rc = rangeset_add_range(p2m->logdirty_ranges, start, end);
1132 break;
1133 default:
1134 break;
1135 }
1136 if ( rc )
1137 {
1138 printk(XENLOG_G_ERR "Error %d manipulating Dom%d's log-dirty ranges\n",
1139 rc, d->domain_id);
1140 domain_crash(d);
1141 }
1142 }
1143
p2m_change_type_range(struct domain * d,unsigned long start,unsigned long end,p2m_type_t ot,p2m_type_t nt)1144 void p2m_change_type_range(struct domain *d,
1145 unsigned long start, unsigned long end,
1146 p2m_type_t ot, p2m_type_t nt)
1147 {
1148 struct p2m_domain *hostp2m = p2m_get_hostp2m(d);
1149
1150 ASSERT(ot != nt);
1151 ASSERT(p2m_is_changeable(ot) && p2m_is_changeable(nt));
1152
1153 p2m_lock(hostp2m);
1154 hostp2m->defer_nested_flush = 1;
1155
1156 change_type_range(hostp2m, start, end, ot, nt);
1157
1158 #ifdef CONFIG_HVM
1159 if ( unlikely(altp2m_active(d)) )
1160 {
1161 unsigned int i;
1162
1163 for ( i = 0; i < MAX_ALTP2M; i++ )
1164 if ( d->arch.altp2m_eptp[i] != mfn_x(INVALID_MFN) )
1165 {
1166 struct p2m_domain *altp2m = d->arch.altp2m_p2m[i];
1167
1168 p2m_lock(altp2m);
1169 change_type_range(altp2m, start, end, ot, nt);
1170 p2m_unlock(altp2m);
1171 }
1172 }
1173 #endif
1174 hostp2m->defer_nested_flush = 0;
1175 if ( nestedhvm_enabled(d) )
1176 p2m_flush_nestedp2m(d);
1177
1178 p2m_unlock(hostp2m);
1179 }
1180
1181 /*
1182 * Finish p2m type change for gfns which are marked as need_recalc in a range.
1183 * Uses the current p2m's max_mapped_pfn to further clip the invalidation
1184 * range for alternate p2ms.
1185 * Returns: 0 for success, negative for failure
1186 */
finish_type_change(struct p2m_domain * p2m,gfn_t first_gfn,unsigned long max_nr)1187 static int finish_type_change(struct p2m_domain *p2m,
1188 gfn_t first_gfn, unsigned long max_nr)
1189 {
1190 unsigned long gfn = gfn_x(first_gfn);
1191 unsigned long last_gfn = gfn + max_nr - 1;
1192 int rc = 0;
1193
1194 last_gfn = min(last_gfn, p2m->max_mapped_pfn);
1195 while ( gfn <= last_gfn )
1196 {
1197 rc = p2m->recalc(p2m, gfn);
1198 /*
1199 * ept->recalc could return 0/1/-ENOMEM. pt->recalc could return
1200 * 0/1/-ENOMEM/-ENOENT, -ENOENT isn't an error as we are looping
1201 * gfn here. If rc is 1 we need to have it 0 for success.
1202 */
1203 if ( rc == -ENOENT || rc > 0 )
1204 rc = 0;
1205 else if ( rc < 0 )
1206 {
1207 gdprintk(XENLOG_ERR, "p2m->recalc failed! Dom%d gfn=%lx\n",
1208 p2m->domain->domain_id, gfn);
1209 break;
1210 }
1211
1212 gfn++;
1213 }
1214
1215 return rc;
1216 }
1217
p2m_finish_type_change(struct domain * d,gfn_t first_gfn,unsigned long max_nr)1218 int p2m_finish_type_change(struct domain *d,
1219 gfn_t first_gfn, unsigned long max_nr)
1220 {
1221 struct p2m_domain *hostp2m = p2m_get_hostp2m(d);
1222 int rc;
1223
1224 p2m_lock(hostp2m);
1225
1226 rc = finish_type_change(hostp2m, first_gfn, max_nr);
1227
1228 if ( rc < 0 )
1229 goto out;
1230
1231 #ifdef CONFIG_HVM
1232 if ( unlikely(altp2m_active(d)) )
1233 {
1234 unsigned int i;
1235
1236 for ( i = 0; i < MAX_ALTP2M; i++ )
1237 if ( d->arch.altp2m_eptp[i] != mfn_x(INVALID_MFN) )
1238 {
1239 struct p2m_domain *altp2m = d->arch.altp2m_p2m[i];
1240
1241 p2m_lock(altp2m);
1242 rc = finish_type_change(altp2m, first_gfn, max_nr);
1243 p2m_unlock(altp2m);
1244
1245 if ( rc < 0 )
1246 goto out;
1247 }
1248 }
1249 #endif
1250
1251 out:
1252 p2m_unlock(hostp2m);
1253
1254 return rc;
1255 }
1256
1257 /*
1258 * Returns:
1259 * 0 for success
1260 * -errno for failure
1261 * 1 + new order for caller to retry with smaller order (guaranteed
1262 * to be smaller than order passed in)
1263 */
set_typed_p2m_entry(struct domain * d,unsigned long gfn_l,mfn_t mfn,unsigned int order,p2m_type_t gfn_p2mt,p2m_access_t access)1264 static int set_typed_p2m_entry(struct domain *d, unsigned long gfn_l,
1265 mfn_t mfn, unsigned int order,
1266 p2m_type_t gfn_p2mt, p2m_access_t access)
1267 {
1268 int rc = 0;
1269 p2m_access_t a;
1270 p2m_type_t ot;
1271 mfn_t omfn;
1272 gfn_t gfn = _gfn(gfn_l);
1273 unsigned int cur_order = 0;
1274 struct p2m_domain *p2m = p2m_get_hostp2m(d);
1275
1276 if ( !paging_mode_translate(d) )
1277 return -EIO;
1278
1279 gfn_lock(p2m, gfn, order);
1280 omfn = p2m->get_entry(p2m, gfn, &ot, &a, 0, &cur_order, NULL);
1281 if ( cur_order < order )
1282 {
1283 gfn_unlock(p2m, gfn, order);
1284 return cur_order + 1;
1285 }
1286 if ( p2m_is_grant(ot) || p2m_is_foreign(ot) )
1287 {
1288 gfn_unlock(p2m, gfn, order);
1289 domain_crash(d);
1290 return -ENOENT;
1291 }
1292 else if ( p2m_is_ram(ot) )
1293 {
1294 unsigned long i;
1295
1296 for ( i = 0; i < (1UL << order); ++i )
1297 {
1298 ASSERT(mfn_valid(mfn_add(omfn, i)));
1299 set_gpfn_from_mfn(mfn_x(omfn) + i, INVALID_M2P_ENTRY);
1300 }
1301 }
1302
1303 P2M_DEBUG("set %d %lx %lx\n", gfn_p2mt, gfn_l, mfn_x(mfn));
1304 rc = p2m_set_entry(p2m, gfn, mfn, order, gfn_p2mt, access);
1305 if ( rc )
1306 gdprintk(XENLOG_ERR, "p2m_set_entry: %#lx:%u -> %d (0x%"PRI_mfn")\n",
1307 gfn_l, order, rc, mfn_x(mfn));
1308 #ifdef CONFIG_HVM
1309 else if ( p2m_is_pod(ot) )
1310 {
1311 pod_lock(p2m);
1312 p2m->pod.entry_count -= 1UL << order;
1313 BUG_ON(p2m->pod.entry_count < 0);
1314 pod_unlock(p2m);
1315 }
1316 #endif
1317 gfn_unlock(p2m, gfn, order);
1318
1319 return rc;
1320 }
1321
1322 /* Set foreign mfn in the given guest's p2m table. */
set_foreign_p2m_entry(struct domain * d,unsigned long gfn,mfn_t mfn)1323 int set_foreign_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn)
1324 {
1325 return set_typed_p2m_entry(d, gfn, mfn, PAGE_ORDER_4K, p2m_map_foreign,
1326 p2m_get_hostp2m(d)->default_access);
1327 }
1328
set_mmio_p2m_entry(struct domain * d,gfn_t gfn,mfn_t mfn,unsigned int order)1329 int set_mmio_p2m_entry(struct domain *d, gfn_t gfn, mfn_t mfn,
1330 unsigned int order)
1331 {
1332 if ( order > PAGE_ORDER_4K &&
1333 rangeset_overlaps_range(mmio_ro_ranges, mfn_x(mfn),
1334 mfn_x(mfn) + (1UL << order) - 1) )
1335 return PAGE_ORDER_4K + 1;
1336
1337 return set_typed_p2m_entry(d, gfn_x(gfn), mfn, order, p2m_mmio_direct,
1338 p2m_get_hostp2m(d)->default_access);
1339 }
1340
set_identity_p2m_entry(struct domain * d,unsigned long gfn_l,p2m_access_t p2ma,unsigned int flag)1341 int set_identity_p2m_entry(struct domain *d, unsigned long gfn_l,
1342 p2m_access_t p2ma, unsigned int flag)
1343 {
1344 p2m_type_t p2mt;
1345 p2m_access_t a;
1346 gfn_t gfn = _gfn(gfn_l);
1347 mfn_t mfn;
1348 struct p2m_domain *p2m = p2m_get_hostp2m(d);
1349 int ret;
1350
1351 if ( !paging_mode_translate(p2m->domain) )
1352 {
1353 if ( !is_iommu_enabled(d) )
1354 return 0;
1355 return iommu_legacy_map(d, _dfn(gfn_l), _mfn(gfn_l), PAGE_ORDER_4K,
1356 IOMMUF_readable | IOMMUF_writable);
1357 }
1358
1359 gfn_lock(p2m, gfn, 0);
1360
1361 mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, 0, NULL, NULL);
1362
1363 if ( p2mt == p2m_invalid || p2mt == p2m_mmio_dm )
1364 ret = p2m_set_entry(p2m, gfn, _mfn(gfn_l), PAGE_ORDER_4K,
1365 p2m_mmio_direct, p2ma);
1366 else if ( mfn_x(mfn) == gfn_l && p2mt == p2m_mmio_direct && a == p2ma )
1367 ret = 0;
1368 else
1369 {
1370 if ( flag & XEN_DOMCTL_DEV_RDM_RELAXED )
1371 ret = 0;
1372 else
1373 ret = -EBUSY;
1374 printk(XENLOG_G_WARNING
1375 "Cannot setup identity map d%d:%lx,"
1376 " gfn already mapped to %lx.\n",
1377 d->domain_id, gfn_l, mfn_x(mfn));
1378 }
1379
1380 gfn_unlock(p2m, gfn, 0);
1381 return ret;
1382 }
1383
1384 /*
1385 * Returns:
1386 * 0 for success
1387 * -errno for failure
1388 * order+1 for caller to retry with order (guaranteed smaller than
1389 * the order value passed in)
1390 */
clear_mmio_p2m_entry(struct domain * d,unsigned long gfn_l,mfn_t mfn,unsigned int order)1391 int clear_mmio_p2m_entry(struct domain *d, unsigned long gfn_l, mfn_t mfn,
1392 unsigned int order)
1393 {
1394 int rc = -EINVAL;
1395 gfn_t gfn = _gfn(gfn_l);
1396 mfn_t actual_mfn;
1397 p2m_access_t a;
1398 p2m_type_t t;
1399 unsigned int cur_order = 0;
1400 struct p2m_domain *p2m = p2m_get_hostp2m(d);
1401
1402 if ( !paging_mode_translate(d) )
1403 return -EIO;
1404
1405 gfn_lock(p2m, gfn, order);
1406 actual_mfn = p2m->get_entry(p2m, gfn, &t, &a, 0, &cur_order, NULL);
1407 if ( cur_order < order )
1408 {
1409 rc = cur_order + 1;
1410 goto out;
1411 }
1412
1413 /* Do not use mfn_valid() here as it will usually fail for MMIO pages. */
1414 if ( mfn_eq(actual_mfn, INVALID_MFN) || (t != p2m_mmio_direct) )
1415 {
1416 gdprintk(XENLOG_ERR,
1417 "gfn_to_mfn failed! gfn=%08lx type:%d\n", gfn_l, t);
1418 goto out;
1419 }
1420 if ( !mfn_eq(mfn, actual_mfn) )
1421 gdprintk(XENLOG_WARNING,
1422 "no mapping between mfn %08lx and gfn %08lx\n",
1423 mfn_x(mfn), gfn_l);
1424 rc = p2m_set_entry(p2m, gfn, INVALID_MFN, order, p2m_invalid,
1425 p2m->default_access);
1426
1427 out:
1428 gfn_unlock(p2m, gfn, order);
1429
1430 return rc;
1431 }
1432
clear_identity_p2m_entry(struct domain * d,unsigned long gfn_l)1433 int clear_identity_p2m_entry(struct domain *d, unsigned long gfn_l)
1434 {
1435 p2m_type_t p2mt;
1436 p2m_access_t a;
1437 gfn_t gfn = _gfn(gfn_l);
1438 mfn_t mfn;
1439 struct p2m_domain *p2m = p2m_get_hostp2m(d);
1440 int ret;
1441
1442 if ( !paging_mode_translate(d) )
1443 {
1444 if ( !is_iommu_enabled(d) )
1445 return 0;
1446 return iommu_legacy_unmap(d, _dfn(gfn_l), PAGE_ORDER_4K);
1447 }
1448
1449 gfn_lock(p2m, gfn, 0);
1450
1451 mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, 0, NULL, NULL);
1452 if ( p2mt == p2m_mmio_direct && mfn_x(mfn) == gfn_l )
1453 {
1454 ret = p2m_set_entry(p2m, gfn, INVALID_MFN, PAGE_ORDER_4K,
1455 p2m_invalid, p2m->default_access);
1456 gfn_unlock(p2m, gfn, 0);
1457 }
1458 else
1459 {
1460 gfn_unlock(p2m, gfn, 0);
1461 printk(XENLOG_G_WARNING
1462 "non-identity map d%d:%lx not cleared (mapped to %lx)\n",
1463 d->domain_id, gfn_l, mfn_x(mfn));
1464 ret = 0;
1465 }
1466
1467 return ret;
1468 }
1469
1470 /* Returns: 0 for success, -errno for failure */
set_shared_p2m_entry(struct domain * d,unsigned long gfn_l,mfn_t mfn)1471 int set_shared_p2m_entry(struct domain *d, unsigned long gfn_l, mfn_t mfn)
1472 {
1473 struct p2m_domain *p2m = p2m_get_hostp2m(d);
1474 int rc = 0;
1475 gfn_t gfn = _gfn(gfn_l);
1476 p2m_access_t a;
1477 p2m_type_t ot;
1478 mfn_t omfn;
1479 unsigned long pg_type;
1480
1481 if ( !paging_mode_translate(p2m->domain) )
1482 return -EIO;
1483
1484 gfn_lock(p2m, gfn, 0);
1485 omfn = p2m->get_entry(p2m, gfn, &ot, &a, 0, NULL, NULL);
1486 /* At the moment we only allow p2m change if gfn has already been made
1487 * sharable first */
1488 ASSERT(p2m_is_shared(ot));
1489 ASSERT(mfn_valid(omfn));
1490 /* Set the m2p entry to invalid only if there are no further type
1491 * refs to this page as shared */
1492 pg_type = read_atomic(&(mfn_to_page(omfn)->u.inuse.type_info));
1493 if ( (pg_type & PGT_count_mask) == 0
1494 || (pg_type & PGT_type_mask) != PGT_shared_page )
1495 set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
1496
1497 P2M_DEBUG("set shared %lx %lx\n", gfn_l, mfn_x(mfn));
1498 rc = p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2m_ram_shared,
1499 p2m->default_access);
1500 gfn_unlock(p2m, gfn, 0);
1501 if ( rc )
1502 gdprintk(XENLOG_ERR,
1503 "p2m_set_entry failed! mfn=%08lx rc:%d\n",
1504 mfn_x(get_gfn_query_unlocked(p2m->domain, gfn_l, &ot)), rc);
1505 return rc;
1506 }
1507
1508 #ifdef CONFIG_HVM
1509 static struct p2m_domain *
p2m_getlru_nestedp2m(struct domain * d,struct p2m_domain * p2m)1510 p2m_getlru_nestedp2m(struct domain *d, struct p2m_domain *p2m)
1511 {
1512 struct list_head *lru_list = &p2m_get_hostp2m(d)->np2m_list;
1513
1514 ASSERT(!list_empty(lru_list));
1515
1516 if ( p2m == NULL )
1517 p2m = list_entry(lru_list->prev, struct p2m_domain, np2m_list);
1518
1519 list_move(&p2m->np2m_list, lru_list);
1520
1521 return p2m;
1522 }
1523
1524 static void
p2m_flush_table_locked(struct p2m_domain * p2m)1525 p2m_flush_table_locked(struct p2m_domain *p2m)
1526 {
1527 struct page_info *top, *pg;
1528 struct domain *d = p2m->domain;
1529 mfn_t mfn;
1530
1531 ASSERT(p2m_locked_by_me(p2m));
1532
1533 /*
1534 * "Host" p2m tables can have shared entries &c that need a bit more care
1535 * when discarding them.
1536 */
1537 ASSERT(!p2m_is_hostp2m(p2m));
1538 #ifdef CONFIG_HVM
1539 /* Nested p2m's do not do pod, hence the asserts (and no pod lock)*/
1540 ASSERT(page_list_empty(&p2m->pod.super));
1541 ASSERT(page_list_empty(&p2m->pod.single));
1542 #endif
1543
1544 /* No need to flush if it's already empty */
1545 if ( p2m_is_nestedp2m(p2m) && p2m->np2m_base == P2M_BASE_EADDR )
1546 return;
1547
1548 /* This is no longer a valid nested p2m for any address space */
1549 p2m->np2m_base = P2M_BASE_EADDR;
1550 p2m->np2m_generation++;
1551
1552 /* Make sure nobody else is using this p2m table */
1553 if ( nestedhvm_enabled(d) )
1554 nestedhvm_vmcx_flushtlb(p2m);
1555
1556 /* Zap the top level of the trie */
1557 mfn = pagetable_get_mfn(p2m_get_pagetable(p2m));
1558 clear_domain_page(mfn);
1559
1560 /* Free the rest of the trie pages back to the paging pool */
1561 top = mfn_to_page(mfn);
1562 while ( (pg = page_list_remove_head(&p2m->pages)) )
1563 {
1564 if ( pg != top )
1565 d->arch.paging.free_page(d, pg);
1566 }
1567 page_list_add(top, &p2m->pages);
1568 }
1569
1570 /* Reset this p2m table to be empty */
1571 static void
p2m_flush_table(struct p2m_domain * p2m)1572 p2m_flush_table(struct p2m_domain *p2m)
1573 {
1574 p2m_lock(p2m);
1575 p2m_flush_table_locked(p2m);
1576 p2m_unlock(p2m);
1577 }
1578
1579 void
p2m_flush(struct vcpu * v,struct p2m_domain * p2m)1580 p2m_flush(struct vcpu *v, struct p2m_domain *p2m)
1581 {
1582 ASSERT(v->domain == p2m->domain);
1583 vcpu_nestedhvm(v).nv_p2m = NULL;
1584 p2m_flush_table(p2m);
1585 hvm_asid_flush_vcpu(v);
1586 }
1587
1588 void
p2m_flush_nestedp2m(struct domain * d)1589 p2m_flush_nestedp2m(struct domain *d)
1590 {
1591 int i;
1592 for ( i = 0; i < MAX_NESTEDP2M; i++ )
1593 p2m_flush_table(d->arch.nested_p2m[i]);
1594 }
1595
np2m_flush_base(struct vcpu * v,unsigned long np2m_base)1596 void np2m_flush_base(struct vcpu *v, unsigned long np2m_base)
1597 {
1598 struct domain *d = v->domain;
1599 struct p2m_domain *p2m;
1600 unsigned int i;
1601
1602 np2m_base &= ~(0xfffull);
1603
1604 nestedp2m_lock(d);
1605 for ( i = 0; i < MAX_NESTEDP2M; i++ )
1606 {
1607 p2m = d->arch.nested_p2m[i];
1608 p2m_lock(p2m);
1609 if ( p2m->np2m_base == np2m_base )
1610 {
1611 p2m_flush_table_locked(p2m);
1612 p2m_unlock(p2m);
1613 break;
1614 }
1615 p2m_unlock(p2m);
1616 }
1617 nestedp2m_unlock(d);
1618 }
1619
assign_np2m(struct vcpu * v,struct p2m_domain * p2m)1620 static void assign_np2m(struct vcpu *v, struct p2m_domain *p2m)
1621 {
1622 struct nestedvcpu *nv = &vcpu_nestedhvm(v);
1623 struct domain *d = v->domain;
1624
1625 /* Bring this np2m to the top of the LRU list */
1626 p2m_getlru_nestedp2m(d, p2m);
1627
1628 nv->nv_flushp2m = 0;
1629 nv->nv_p2m = p2m;
1630 nv->np2m_generation = p2m->np2m_generation;
1631 cpumask_set_cpu(v->processor, p2m->dirty_cpumask);
1632 }
1633
nvcpu_flush(struct vcpu * v)1634 static void nvcpu_flush(struct vcpu *v)
1635 {
1636 hvm_asid_flush_vcpu(v);
1637 vcpu_nestedhvm(v).stale_np2m = true;
1638 }
1639
1640 struct p2m_domain *
p2m_get_nestedp2m_locked(struct vcpu * v)1641 p2m_get_nestedp2m_locked(struct vcpu *v)
1642 {
1643 struct nestedvcpu *nv = &vcpu_nestedhvm(v);
1644 struct domain *d = v->domain;
1645 struct p2m_domain *p2m;
1646 uint64_t np2m_base = nhvm_vcpu_p2m_base(v);
1647 unsigned int i;
1648 bool needs_flush = true;
1649
1650 /* Mask out low bits; this avoids collisions with P2M_BASE_EADDR */
1651 np2m_base &= ~(0xfffull);
1652
1653 if (nv->nv_flushp2m && nv->nv_p2m) {
1654 nv->nv_p2m = NULL;
1655 }
1656
1657 nestedp2m_lock(d);
1658 p2m = nv->nv_p2m;
1659 if ( p2m )
1660 {
1661 p2m_lock(p2m);
1662 if ( p2m->np2m_base == np2m_base )
1663 {
1664 /* Check if np2m was flushed just before the lock */
1665 if ( nv->np2m_generation == p2m->np2m_generation )
1666 needs_flush = false;
1667 /* np2m is up-to-date */
1668 goto found;
1669 }
1670 else if ( p2m->np2m_base != P2M_BASE_EADDR )
1671 {
1672 /* vCPU is switching from some other valid np2m */
1673 cpumask_clear_cpu(v->processor, p2m->dirty_cpumask);
1674 }
1675 p2m_unlock(p2m);
1676 }
1677
1678 /* Share a np2m if possible */
1679 for ( i = 0; i < MAX_NESTEDP2M; i++ )
1680 {
1681 p2m = d->arch.nested_p2m[i];
1682 p2m_lock(p2m);
1683
1684 if ( p2m->np2m_base == np2m_base )
1685 goto found;
1686
1687 p2m_unlock(p2m);
1688 }
1689
1690 /* All p2m's are or were in use. Take the least recent used one,
1691 * flush it and reuse. */
1692 p2m = p2m_getlru_nestedp2m(d, NULL);
1693 p2m_flush_table(p2m);
1694 p2m_lock(p2m);
1695
1696 found:
1697 if ( needs_flush )
1698 nvcpu_flush(v);
1699 p2m->np2m_base = np2m_base;
1700 assign_np2m(v, p2m);
1701 nestedp2m_unlock(d);
1702
1703 return p2m;
1704 }
1705
p2m_get_nestedp2m(struct vcpu * v)1706 struct p2m_domain *p2m_get_nestedp2m(struct vcpu *v)
1707 {
1708 struct p2m_domain *p2m = p2m_get_nestedp2m_locked(v);
1709 p2m_unlock(p2m);
1710
1711 return p2m;
1712 }
1713
1714 struct p2m_domain *
p2m_get_p2m(struct vcpu * v)1715 p2m_get_p2m(struct vcpu *v)
1716 {
1717 if (!nestedhvm_is_n2(v))
1718 return p2m_get_hostp2m(v->domain);
1719
1720 return p2m_get_nestedp2m(v);
1721 }
1722
np2m_schedule(int dir)1723 void np2m_schedule(int dir)
1724 {
1725 struct vcpu *curr = current;
1726 struct nestedvcpu *nv = &vcpu_nestedhvm(curr);
1727 struct p2m_domain *p2m;
1728
1729 ASSERT(dir == NP2M_SCHEDLE_IN || dir == NP2M_SCHEDLE_OUT);
1730
1731 if ( !nestedhvm_enabled(curr->domain) ||
1732 !nestedhvm_vcpu_in_guestmode(curr) ||
1733 !nestedhvm_paging_mode_hap(curr) )
1734 return;
1735
1736 p2m = nv->nv_p2m;
1737 if ( p2m )
1738 {
1739 bool np2m_valid;
1740
1741 p2m_lock(p2m);
1742 np2m_valid = p2m->np2m_base == nhvm_vcpu_p2m_base(curr) &&
1743 nv->np2m_generation == p2m->np2m_generation;
1744 if ( dir == NP2M_SCHEDLE_OUT && np2m_valid )
1745 {
1746 /*
1747 * The np2m is up to date but this vCPU will no longer use it,
1748 * which means there are no reasons to send a flush IPI.
1749 */
1750 cpumask_clear_cpu(curr->processor, p2m->dirty_cpumask);
1751 }
1752 else if ( dir == NP2M_SCHEDLE_IN )
1753 {
1754 if ( !np2m_valid )
1755 {
1756 /* This vCPU's np2m was flushed while it was not runnable */
1757 hvm_asid_flush_core();
1758 vcpu_nestedhvm(curr).nv_p2m = NULL;
1759 }
1760 else
1761 cpumask_set_cpu(curr->processor, p2m->dirty_cpumask);
1762 }
1763 p2m_unlock(p2m);
1764 }
1765 }
1766 #endif
1767
paging_gva_to_gfn(struct vcpu * v,unsigned long va,uint32_t * pfec)1768 unsigned long paging_gva_to_gfn(struct vcpu *v,
1769 unsigned long va,
1770 uint32_t *pfec)
1771 {
1772 struct p2m_domain *hostp2m = p2m_get_hostp2m(v->domain);
1773 const struct paging_mode *hostmode = paging_get_hostmode(v);
1774
1775 if ( is_hvm_vcpu(v) && paging_mode_hap(v->domain) && nestedhvm_is_n2(v) )
1776 {
1777 unsigned long l2_gfn, l1_gfn;
1778 struct p2m_domain *p2m;
1779 const struct paging_mode *mode;
1780 uint8_t l1_p2ma;
1781 unsigned int l1_page_order;
1782 int rv;
1783
1784 /* translate l2 guest va into l2 guest gfn */
1785 p2m = p2m_get_nestedp2m(v);
1786 mode = paging_get_nestedmode(v);
1787 l2_gfn = mode->gva_to_gfn(v, p2m, va, pfec);
1788
1789 if ( l2_gfn == gfn_x(INVALID_GFN) )
1790 return gfn_x(INVALID_GFN);
1791
1792 /* translate l2 guest gfn into l1 guest gfn */
1793 rv = nestedhap_walk_L1_p2m(v, l2_gfn, &l1_gfn, &l1_page_order, &l1_p2ma,
1794 1,
1795 !!(*pfec & PFEC_write_access),
1796 !!(*pfec & PFEC_insn_fetch));
1797
1798 if ( rv != NESTEDHVM_PAGEFAULT_DONE )
1799 return gfn_x(INVALID_GFN);
1800
1801 /*
1802 * Sanity check that l1_gfn can be used properly as a 4K mapping, even
1803 * if it mapped by a nested superpage.
1804 */
1805 ASSERT((l2_gfn & ((1ul << l1_page_order) - 1)) ==
1806 (l1_gfn & ((1ul << l1_page_order) - 1)));
1807
1808 return l1_gfn;
1809 }
1810
1811 return hostmode->gva_to_gfn(v, hostp2m, va, pfec);
1812 }
1813
1814 /*
1815 * If the map is non-NULL, we leave this function having acquired an extra ref
1816 * on mfn_to_page(*mfn). In all cases, *pfec contains appropriate
1817 * synthetic/structure PFEC_* bits.
1818 */
map_domain_gfn(struct p2m_domain * p2m,gfn_t gfn,mfn_t * mfn,p2m_query_t q,uint32_t * pfec)1819 void *map_domain_gfn(struct p2m_domain *p2m, gfn_t gfn, mfn_t *mfn,
1820 p2m_query_t q, uint32_t *pfec)
1821 {
1822 p2m_type_t p2mt;
1823 struct page_info *page;
1824
1825 if ( !gfn_valid(p2m->domain, gfn) )
1826 {
1827 *pfec = PFEC_reserved_bit | PFEC_page_present;
1828 return NULL;
1829 }
1830
1831 /* Translate the gfn, unsharing if shared. */
1832 page = p2m_get_page_from_gfn(p2m, gfn, &p2mt, NULL, q);
1833 if ( p2m_is_paging(p2mt) )
1834 {
1835 ASSERT(p2m_is_hostp2m(p2m));
1836 if ( page )
1837 put_page(page);
1838 p2m_mem_paging_populate(p2m->domain, gfn);
1839 *pfec = PFEC_page_paged;
1840 return NULL;
1841 }
1842 if ( p2m_is_shared(p2mt) )
1843 {
1844 if ( page )
1845 put_page(page);
1846 *pfec = PFEC_page_shared;
1847 return NULL;
1848 }
1849 if ( !page )
1850 {
1851 *pfec = 0;
1852 return NULL;
1853 }
1854
1855 *pfec = PFEC_page_present;
1856 *mfn = page_to_mfn(page);
1857 ASSERT(mfn_valid(*mfn));
1858
1859 return map_domain_page(*mfn);
1860 }
1861
mmio_order(const struct domain * d,unsigned long start_fn,unsigned long nr)1862 static unsigned int mmio_order(const struct domain *d,
1863 unsigned long start_fn, unsigned long nr)
1864 {
1865 /*
1866 * Note that the !hap_enabled() here has two effects:
1867 * - exclude shadow mode (which doesn't support large MMIO mappings),
1868 * - exclude PV guests, should execution reach this code for such.
1869 * So be careful when altering this.
1870 */
1871 if ( !hap_enabled(d) ||
1872 (start_fn & ((1UL << PAGE_ORDER_2M) - 1)) || !(nr >> PAGE_ORDER_2M) )
1873 return PAGE_ORDER_4K;
1874
1875 if ( 0 /*
1876 * Don't use 1Gb pages, to limit the iteration count in
1877 * set_typed_p2m_entry() when it needs to zap M2P entries
1878 * for a RAM range.
1879 */ &&
1880 !(start_fn & ((1UL << PAGE_ORDER_1G) - 1)) && (nr >> PAGE_ORDER_1G) &&
1881 hap_has_1gb )
1882 return PAGE_ORDER_1G;
1883
1884 if ( hap_has_2mb )
1885 return PAGE_ORDER_2M;
1886
1887 return PAGE_ORDER_4K;
1888 }
1889
1890 #define MAP_MMIO_MAX_ITER 64 /* pretty arbitrary */
1891
map_mmio_regions(struct domain * d,gfn_t start_gfn,unsigned long nr,mfn_t mfn)1892 int map_mmio_regions(struct domain *d,
1893 gfn_t start_gfn,
1894 unsigned long nr,
1895 mfn_t mfn)
1896 {
1897 int ret = 0;
1898 unsigned long i;
1899 unsigned int iter, order;
1900
1901 if ( !paging_mode_translate(d) )
1902 return 0;
1903
1904 for ( iter = i = 0; i < nr && iter < MAP_MMIO_MAX_ITER;
1905 i += 1UL << order, ++iter )
1906 {
1907 /* OR'ing gfn and mfn values will return an order suitable to both. */
1908 for ( order = mmio_order(d, (gfn_x(start_gfn) + i) | (mfn_x(mfn) + i), nr - i); ;
1909 order = ret - 1 )
1910 {
1911 ret = set_mmio_p2m_entry(d, gfn_add(start_gfn, i),
1912 mfn_add(mfn, i), order);
1913 if ( ret <= 0 )
1914 break;
1915 ASSERT(ret <= order);
1916 }
1917 if ( ret < 0 )
1918 break;
1919 }
1920
1921 return i == nr ? 0 : i ?: ret;
1922 }
1923
unmap_mmio_regions(struct domain * d,gfn_t start_gfn,unsigned long nr,mfn_t mfn)1924 int unmap_mmio_regions(struct domain *d,
1925 gfn_t start_gfn,
1926 unsigned long nr,
1927 mfn_t mfn)
1928 {
1929 int ret = 0;
1930 unsigned long i;
1931 unsigned int iter, order;
1932
1933 if ( !paging_mode_translate(d) )
1934 return 0;
1935
1936 for ( iter = i = 0; i < nr && iter < MAP_MMIO_MAX_ITER;
1937 i += 1UL << order, ++iter )
1938 {
1939 /* OR'ing gfn and mfn values will return an order suitable to both. */
1940 for ( order = mmio_order(d, (gfn_x(start_gfn) + i) | (mfn_x(mfn) + i), nr - i); ;
1941 order = ret - 1 )
1942 {
1943 ret = clear_mmio_p2m_entry(d, gfn_x(start_gfn) + i,
1944 mfn_add(mfn, i), order);
1945 if ( ret <= 0 )
1946 break;
1947 ASSERT(ret <= order);
1948 }
1949 if ( ret < 0 )
1950 break;
1951 }
1952
1953 return i == nr ? 0 : i ?: ret;
1954 }
1955
1956 #ifdef CONFIG_HVM
1957
altp2m_get_effective_entry(struct p2m_domain * ap2m,gfn_t gfn,mfn_t * mfn,p2m_type_t * t,p2m_access_t * a,bool prepopulate)1958 int altp2m_get_effective_entry(struct p2m_domain *ap2m, gfn_t gfn, mfn_t *mfn,
1959 p2m_type_t *t, p2m_access_t *a,
1960 bool prepopulate)
1961 {
1962 *mfn = ap2m->get_entry(ap2m, gfn, t, a, 0, NULL, NULL);
1963
1964 /* Check host p2m if no valid entry in alternate */
1965 if ( !mfn_valid(*mfn) && !p2m_is_hostp2m(ap2m) )
1966 {
1967 struct p2m_domain *hp2m = p2m_get_hostp2m(ap2m->domain);
1968 unsigned int page_order;
1969 int rc;
1970
1971 *mfn = __get_gfn_type_access(hp2m, gfn_x(gfn), t, a,
1972 P2M_ALLOC | P2M_UNSHARE, &page_order, 0);
1973
1974 rc = -ESRCH;
1975 if ( !mfn_valid(*mfn) || *t != p2m_ram_rw )
1976 return rc;
1977
1978 /* If this is a superpage, copy that first */
1979 if ( prepopulate && page_order != PAGE_ORDER_4K )
1980 {
1981 unsigned long mask = ~((1UL << page_order) - 1);
1982 gfn_t gfn_aligned = _gfn(gfn_x(gfn) & mask);
1983 mfn_t mfn_aligned = _mfn(mfn_x(*mfn) & mask);
1984
1985 rc = ap2m->set_entry(ap2m, gfn_aligned, mfn_aligned, page_order, *t, *a, 1);
1986 if ( rc )
1987 return rc;
1988 }
1989 }
1990
1991 return 0;
1992 }
1993
p2m_altp2m_check(struct vcpu * v,uint16_t idx)1994 void p2m_altp2m_check(struct vcpu *v, uint16_t idx)
1995 {
1996 if ( altp2m_active(v->domain) )
1997 p2m_switch_vcpu_altp2m_by_id(v, idx);
1998 }
1999
p2m_switch_vcpu_altp2m_by_id(struct vcpu * v,unsigned int idx)2000 bool_t p2m_switch_vcpu_altp2m_by_id(struct vcpu *v, unsigned int idx)
2001 {
2002 struct domain *d = v->domain;
2003 bool_t rc = 0;
2004
2005 if ( idx >= MAX_ALTP2M )
2006 return rc;
2007
2008 altp2m_list_lock(d);
2009
2010 if ( d->arch.altp2m_eptp[idx] != mfn_x(INVALID_MFN) )
2011 {
2012 if ( idx != vcpu_altp2m(v).p2midx )
2013 {
2014 atomic_dec(&p2m_get_altp2m(v)->active_vcpus);
2015 vcpu_altp2m(v).p2midx = idx;
2016 atomic_inc(&p2m_get_altp2m(v)->active_vcpus);
2017 altp2m_vcpu_update_p2m(v);
2018 }
2019 rc = 1;
2020 }
2021
2022 altp2m_list_unlock(d);
2023 return rc;
2024 }
2025
2026 /*
2027 * Read info about the gfn in an altp2m, locking the gfn.
2028 *
2029 * If the entry is valid, pass the results back to the caller.
2030 *
2031 * If the entry was invalid, and the host's entry is also invalid,
2032 * return to the caller without any changes.
2033 *
2034 * If the entry is invalid, and the host entry was valid, propagate
2035 * the host's entry to the altp2m (retaining page order), and indicate
2036 * that the caller should re-try the faulting instruction.
2037 */
p2m_altp2m_get_or_propagate(struct p2m_domain * ap2m,unsigned long gfn_l,mfn_t * mfn,p2m_type_t * p2mt,p2m_access_t * p2ma,unsigned int page_order)2038 bool p2m_altp2m_get_or_propagate(struct p2m_domain *ap2m, unsigned long gfn_l,
2039 mfn_t *mfn, p2m_type_t *p2mt,
2040 p2m_access_t *p2ma, unsigned int page_order)
2041 {
2042 p2m_type_t ap2mt;
2043 p2m_access_t ap2ma;
2044 unsigned long mask;
2045 gfn_t gfn;
2046 mfn_t amfn;
2047 int rc;
2048
2049 /*
2050 * NB we must get the full lock on the altp2m here, in addition to
2051 * the lock on the individual gfn, since we may change a range of
2052 * gfns below.
2053 */
2054 p2m_lock(ap2m);
2055
2056 amfn = get_gfn_type_access(ap2m, gfn_l, &ap2mt, &ap2ma, 0, NULL);
2057
2058 if ( !mfn_eq(amfn, INVALID_MFN) )
2059 {
2060 p2m_unlock(ap2m);
2061 *mfn = amfn;
2062 *p2mt = ap2mt;
2063 *p2ma = ap2ma;
2064 return false;
2065 }
2066
2067 /* Host entry is also invalid; don't bother setting the altp2m entry. */
2068 if ( mfn_eq(*mfn, INVALID_MFN) )
2069 {
2070 p2m_unlock(ap2m);
2071 return false;
2072 }
2073
2074 /*
2075 * If this is a superpage mapping, round down both frame numbers
2076 * to the start of the superpage. NB that we repupose `amfn`
2077 * here.
2078 */
2079 mask = ~((1UL << page_order) - 1);
2080 amfn = _mfn(mfn_x(*mfn) & mask);
2081 gfn = _gfn(gfn_l & mask);
2082
2083 rc = p2m_set_entry(ap2m, gfn, amfn, page_order, *p2mt, *p2ma);
2084 p2m_unlock(ap2m);
2085
2086 if ( rc )
2087 {
2088 gprintk(XENLOG_ERR,
2089 "failed to set entry for %"PRI_gfn" -> %"PRI_mfn" altp2m %u, rc %d\n",
2090 gfn_l, mfn_x(amfn), vcpu_altp2m(current).p2midx, rc);
2091 domain_crash(ap2m->domain);
2092 }
2093
2094 return true;
2095 }
2096
2097 enum altp2m_reset_type {
2098 ALTP2M_RESET,
2099 ALTP2M_DEACTIVATE
2100 };
2101
p2m_reset_altp2m(struct domain * d,unsigned int idx,enum altp2m_reset_type reset_type)2102 static void p2m_reset_altp2m(struct domain *d, unsigned int idx,
2103 enum altp2m_reset_type reset_type)
2104 {
2105 struct p2m_domain *p2m;
2106
2107 ASSERT(idx < MAX_ALTP2M);
2108 p2m = array_access_nospec(d->arch.altp2m_p2m, idx);
2109
2110 p2m_lock(p2m);
2111
2112 p2m_flush_table_locked(p2m);
2113
2114 if ( reset_type == ALTP2M_DEACTIVATE )
2115 p2m_free_logdirty(p2m);
2116
2117 /* Uninit and reinit ept to force TLB shootdown */
2118 ept_p2m_uninit(p2m);
2119 ept_p2m_init(p2m);
2120
2121 p2m->min_remapped_gfn = gfn_x(INVALID_GFN);
2122 p2m->max_remapped_gfn = 0;
2123
2124 p2m_unlock(p2m);
2125 }
2126
p2m_flush_altp2m(struct domain * d)2127 void p2m_flush_altp2m(struct domain *d)
2128 {
2129 unsigned int i;
2130
2131 altp2m_list_lock(d);
2132
2133 for ( i = 0; i < MAX_ALTP2M; i++ )
2134 {
2135 p2m_reset_altp2m(d, i, ALTP2M_DEACTIVATE);
2136 d->arch.altp2m_eptp[i] = mfn_x(INVALID_MFN);
2137 d->arch.altp2m_visible_eptp[i] = mfn_x(INVALID_MFN);
2138 }
2139
2140 altp2m_list_unlock(d);
2141 }
2142
p2m_activate_altp2m(struct domain * d,unsigned int idx,p2m_access_t hvmmem_default_access)2143 static int p2m_activate_altp2m(struct domain *d, unsigned int idx,
2144 p2m_access_t hvmmem_default_access)
2145 {
2146 struct p2m_domain *hostp2m, *p2m;
2147 int rc;
2148
2149 ASSERT(idx < MAX_ALTP2M);
2150
2151 p2m = array_access_nospec(d->arch.altp2m_p2m, idx);
2152 hostp2m = p2m_get_hostp2m(d);
2153
2154 p2m_lock(p2m);
2155
2156 rc = p2m_init_logdirty(p2m);
2157
2158 if ( rc )
2159 goto out;
2160
2161 /* The following is really just a rangeset copy. */
2162 rc = rangeset_merge(p2m->logdirty_ranges, hostp2m->logdirty_ranges);
2163
2164 if ( rc )
2165 {
2166 p2m_free_logdirty(p2m);
2167 goto out;
2168 }
2169
2170 p2m->default_access = hvmmem_default_access;
2171 p2m->domain = hostp2m->domain;
2172 p2m->global_logdirty = hostp2m->global_logdirty;
2173 p2m->min_remapped_gfn = gfn_x(INVALID_GFN);
2174 p2m->max_mapped_pfn = p2m->max_remapped_gfn = 0;
2175
2176 p2m_init_altp2m_ept(d, idx);
2177
2178 out:
2179 p2m_unlock(p2m);
2180
2181 return rc;
2182 }
2183
p2m_init_altp2m_by_id(struct domain * d,unsigned int idx)2184 int p2m_init_altp2m_by_id(struct domain *d, unsigned int idx)
2185 {
2186 int rc = -EINVAL;
2187 struct p2m_domain *hostp2m = p2m_get_hostp2m(d);
2188
2189 if ( idx >= min(ARRAY_SIZE(d->arch.altp2m_p2m), MAX_EPTP) )
2190 return rc;
2191
2192 altp2m_list_lock(d);
2193
2194 if ( d->arch.altp2m_eptp[array_index_nospec(idx, MAX_EPTP)] ==
2195 mfn_x(INVALID_MFN) )
2196 rc = p2m_activate_altp2m(d, idx, hostp2m->default_access);
2197
2198 altp2m_list_unlock(d);
2199 return rc;
2200 }
2201
p2m_init_next_altp2m(struct domain * d,uint16_t * idx,xenmem_access_t hvmmem_default_access)2202 int p2m_init_next_altp2m(struct domain *d, uint16_t *idx,
2203 xenmem_access_t hvmmem_default_access)
2204 {
2205 int rc = -EINVAL;
2206 unsigned int i;
2207 p2m_access_t a;
2208 struct p2m_domain *hostp2m = p2m_get_hostp2m(d);
2209
2210 if ( hvmmem_default_access > XENMEM_access_default ||
2211 !xenmem_access_to_p2m_access(hostp2m, hvmmem_default_access, &a) )
2212 return rc;
2213
2214 altp2m_list_lock(d);
2215
2216 for ( i = 0; i < MAX_ALTP2M; i++ )
2217 {
2218 if ( d->arch.altp2m_eptp[i] != mfn_x(INVALID_MFN) )
2219 continue;
2220
2221 rc = p2m_activate_altp2m(d, i, a);
2222
2223 if ( !rc )
2224 *idx = i;
2225
2226 break;
2227 }
2228
2229 altp2m_list_unlock(d);
2230 return rc;
2231 }
2232
p2m_destroy_altp2m_by_id(struct domain * d,unsigned int idx)2233 int p2m_destroy_altp2m_by_id(struct domain *d, unsigned int idx)
2234 {
2235 struct p2m_domain *p2m;
2236 int rc = -EBUSY;
2237
2238 if ( !idx || idx >= min(ARRAY_SIZE(d->arch.altp2m_p2m), MAX_EPTP) )
2239 return rc;
2240
2241 rc = domain_pause_except_self(d);
2242 if ( rc )
2243 return rc;
2244
2245 rc = -EBUSY;
2246 altp2m_list_lock(d);
2247
2248 if ( d->arch.altp2m_eptp[array_index_nospec(idx, MAX_EPTP)] !=
2249 mfn_x(INVALID_MFN) )
2250 {
2251 p2m = array_access_nospec(d->arch.altp2m_p2m, idx);
2252
2253 if ( !_atomic_read(p2m->active_vcpus) )
2254 {
2255 p2m_reset_altp2m(d, idx, ALTP2M_DEACTIVATE);
2256 d->arch.altp2m_eptp[array_index_nospec(idx, MAX_EPTP)] =
2257 mfn_x(INVALID_MFN);
2258 d->arch.altp2m_visible_eptp[array_index_nospec(idx, MAX_EPTP)] =
2259 mfn_x(INVALID_MFN);
2260 rc = 0;
2261 }
2262 }
2263
2264 altp2m_list_unlock(d);
2265
2266 domain_unpause_except_self(d);
2267
2268 return rc;
2269 }
2270
p2m_switch_domain_altp2m_by_id(struct domain * d,unsigned int idx)2271 int p2m_switch_domain_altp2m_by_id(struct domain *d, unsigned int idx)
2272 {
2273 struct vcpu *v;
2274 int rc = -EINVAL;
2275
2276 if ( idx >= MAX_ALTP2M )
2277 return rc;
2278
2279 rc = domain_pause_except_self(d);
2280 if ( rc )
2281 return rc;
2282
2283 rc = -EINVAL;
2284 altp2m_list_lock(d);
2285
2286 if ( d->arch.altp2m_visible_eptp[idx] != mfn_x(INVALID_MFN) )
2287 {
2288 for_each_vcpu( d, v )
2289 if ( idx != vcpu_altp2m(v).p2midx )
2290 {
2291 atomic_dec(&p2m_get_altp2m(v)->active_vcpus);
2292 vcpu_altp2m(v).p2midx = idx;
2293 atomic_inc(&p2m_get_altp2m(v)->active_vcpus);
2294 altp2m_vcpu_update_p2m(v);
2295 }
2296
2297 rc = 0;
2298 }
2299
2300 altp2m_list_unlock(d);
2301
2302 domain_unpause_except_self(d);
2303
2304 return rc;
2305 }
2306
p2m_change_altp2m_gfn(struct domain * d,unsigned int idx,gfn_t old_gfn,gfn_t new_gfn)2307 int p2m_change_altp2m_gfn(struct domain *d, unsigned int idx,
2308 gfn_t old_gfn, gfn_t new_gfn)
2309 {
2310 struct p2m_domain *hp2m, *ap2m;
2311 p2m_access_t a;
2312 p2m_type_t t;
2313 mfn_t mfn;
2314 int rc = -EINVAL;
2315
2316 if ( idx >= min(ARRAY_SIZE(d->arch.altp2m_p2m), MAX_EPTP) ||
2317 d->arch.altp2m_eptp[array_index_nospec(idx, MAX_EPTP)] ==
2318 mfn_x(INVALID_MFN) )
2319 return rc;
2320
2321 hp2m = p2m_get_hostp2m(d);
2322 ap2m = array_access_nospec(d->arch.altp2m_p2m, idx);
2323
2324 p2m_lock(hp2m);
2325 p2m_lock(ap2m);
2326
2327 if ( gfn_eq(new_gfn, INVALID_GFN) )
2328 {
2329 mfn = ap2m->get_entry(ap2m, old_gfn, &t, &a, 0, NULL, NULL);
2330 rc = mfn_valid(mfn)
2331 ? p2m_remove_page(ap2m, old_gfn, mfn, PAGE_ORDER_4K)
2332 : 0;
2333 goto out;
2334 }
2335
2336 rc = altp2m_get_effective_entry(ap2m, old_gfn, &mfn, &t, &a,
2337 AP2MGET_prepopulate);
2338 if ( rc )
2339 goto out;
2340
2341 rc = altp2m_get_effective_entry(ap2m, new_gfn, &mfn, &t, &a,
2342 AP2MGET_query);
2343 if ( rc )
2344 goto out;
2345
2346 if ( !ap2m->set_entry(ap2m, old_gfn, mfn, PAGE_ORDER_4K, t, a,
2347 (current->domain != d)) )
2348 {
2349 rc = 0;
2350
2351 if ( gfn_x(new_gfn) < ap2m->min_remapped_gfn )
2352 ap2m->min_remapped_gfn = gfn_x(new_gfn);
2353 if ( gfn_x(new_gfn) > ap2m->max_remapped_gfn )
2354 ap2m->max_remapped_gfn = gfn_x(new_gfn);
2355 }
2356
2357 out:
2358 p2m_unlock(ap2m);
2359 p2m_unlock(hp2m);
2360 return rc;
2361 }
2362
p2m_altp2m_propagate_change(struct domain * d,gfn_t gfn,mfn_t mfn,unsigned int page_order,p2m_type_t p2mt,p2m_access_t p2ma)2363 int p2m_altp2m_propagate_change(struct domain *d, gfn_t gfn,
2364 mfn_t mfn, unsigned int page_order,
2365 p2m_type_t p2mt, p2m_access_t p2ma)
2366 {
2367 struct p2m_domain *p2m;
2368 p2m_access_t a;
2369 p2m_type_t t;
2370 mfn_t m;
2371 unsigned int i;
2372 unsigned int reset_count = 0;
2373 unsigned int last_reset_idx = ~0;
2374 int ret = 0;
2375
2376 if ( !altp2m_active(d) )
2377 return 0;
2378
2379 altp2m_list_lock(d);
2380
2381 for ( i = 0; i < MAX_ALTP2M; i++ )
2382 {
2383 if ( d->arch.altp2m_eptp[i] == mfn_x(INVALID_MFN) )
2384 continue;
2385
2386 p2m = d->arch.altp2m_p2m[i];
2387 m = get_gfn_type_access(p2m, gfn_x(gfn), &t, &a, 0, NULL);
2388
2389 /* Check for a dropped page that may impact this altp2m */
2390 if ( mfn_eq(mfn, INVALID_MFN) &&
2391 gfn_x(gfn) >= p2m->min_remapped_gfn &&
2392 gfn_x(gfn) <= p2m->max_remapped_gfn )
2393 {
2394 if ( !reset_count++ )
2395 {
2396 p2m_reset_altp2m(d, i, ALTP2M_RESET);
2397 last_reset_idx = i;
2398 }
2399 else
2400 {
2401 /* At least 2 altp2m's impacted, so reset everything */
2402 __put_gfn(p2m, gfn_x(gfn));
2403
2404 for ( i = 0; i < MAX_ALTP2M; i++ )
2405 {
2406 if ( i == last_reset_idx ||
2407 d->arch.altp2m_eptp[i] == mfn_x(INVALID_MFN) )
2408 continue;
2409
2410 p2m_reset_altp2m(d, i, ALTP2M_RESET);
2411 }
2412
2413 ret = 0;
2414 break;
2415 }
2416 }
2417 else if ( !mfn_eq(m, INVALID_MFN) )
2418 {
2419 int rc = p2m_set_entry(p2m, gfn, mfn, page_order, p2mt, p2ma);
2420
2421 /* Best effort: Don't bail on error. */
2422 if ( !ret )
2423 ret = rc;
2424 }
2425
2426 __put_gfn(p2m, gfn_x(gfn));
2427 }
2428
2429 altp2m_list_unlock(d);
2430
2431 return ret;
2432 }
2433 #endif /* CONFIG_HVM */
2434
2435 /*** Audit ***/
2436
2437 #if P2M_AUDIT && defined(CONFIG_HVM)
audit_p2m(struct domain * d,uint64_t * orphans,uint64_t * m2p_bad,uint64_t * p2m_bad)2438 void audit_p2m(struct domain *d,
2439 uint64_t *orphans,
2440 uint64_t *m2p_bad,
2441 uint64_t *p2m_bad)
2442 {
2443 struct page_info *page;
2444 struct domain *od;
2445 unsigned long mfn, gfn;
2446 mfn_t p2mfn;
2447 unsigned long orphans_count = 0, mpbad = 0, pmbad = 0;
2448 p2m_access_t p2ma;
2449 p2m_type_t type;
2450 struct p2m_domain *p2m = p2m_get_hostp2m(d);
2451
2452 if ( !paging_mode_translate(d) )
2453 goto out_p2m_audit;
2454
2455 P2M_PRINTK("p2m audit starts\n");
2456
2457 p2m_lock(p2m);
2458 pod_lock(p2m);
2459
2460 if (p2m->audit_p2m)
2461 pmbad = p2m->audit_p2m(p2m);
2462
2463 /* Audit part two: walk the domain's page allocation list, checking
2464 * the m2p entries. */
2465 spin_lock(&d->page_alloc_lock);
2466 page_list_for_each ( page, &d->page_list )
2467 {
2468 mfn = mfn_x(page_to_mfn(page));
2469
2470 P2M_PRINTK("auditing guest page, mfn=%#lx\n", mfn);
2471
2472 od = page_get_owner(page);
2473
2474 if ( od != d )
2475 {
2476 P2M_PRINTK("wrong owner %#lx -> %p(%u) != %p(%u)\n",
2477 mfn, od, (od?od->domain_id:-1), d, d->domain_id);
2478 continue;
2479 }
2480
2481 gfn = get_gpfn_from_mfn(mfn);
2482 if ( gfn == INVALID_M2P_ENTRY )
2483 {
2484 orphans_count++;
2485 P2M_PRINTK("orphaned guest page: mfn=%#lx has invalid gfn\n",
2486 mfn);
2487 continue;
2488 }
2489
2490 if ( SHARED_M2P(gfn) )
2491 {
2492 P2M_PRINTK("shared mfn (%lx) on domain page list!\n",
2493 mfn);
2494 continue;
2495 }
2496
2497 p2mfn = get_gfn_type_access(p2m, gfn, &type, &p2ma, 0, NULL);
2498 if ( mfn_x(p2mfn) != mfn )
2499 {
2500 mpbad++;
2501 P2M_PRINTK("map mismatch mfn %#lx -> gfn %#lx -> mfn %#lx"
2502 " (-> gfn %#lx)\n",
2503 mfn, gfn, mfn_x(p2mfn),
2504 (mfn_valid(p2mfn)
2505 ? get_gpfn_from_mfn(mfn_x(p2mfn))
2506 : -1u));
2507 /* This m2p entry is stale: the domain has another frame in
2508 * this physical slot. No great disaster, but for neatness,
2509 * blow away the m2p entry. */
2510 set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
2511 }
2512 __put_gfn(p2m, gfn);
2513
2514 P2M_PRINTK("OK: mfn=%#lx, gfn=%#lx, p2mfn=%#lx\n",
2515 mfn, gfn, mfn_x(p2mfn));
2516 }
2517 spin_unlock(&d->page_alloc_lock);
2518
2519 pod_unlock(p2m);
2520 p2m_unlock(p2m);
2521
2522 P2M_PRINTK("p2m audit complete\n");
2523 if ( orphans_count | mpbad | pmbad )
2524 P2M_PRINTK("p2m audit found %lu orphans\n", orphans_count);
2525 if ( mpbad | pmbad )
2526 {
2527 P2M_PRINTK("p2m audit found %lu odd p2m, %lu bad m2p entries\n",
2528 pmbad, mpbad);
2529 WARN();
2530 }
2531
2532 out_p2m_audit:
2533 *orphans = (uint64_t) orphans_count;
2534 *m2p_bad = (uint64_t) mpbad;
2535 *p2m_bad = (uint64_t) pmbad;
2536 }
2537 #endif /* P2M_AUDIT */
2538
2539 /*
2540 * Add frame from foreign domain to target domain's physmap. Similar to
2541 * XENMAPSPACE_gmfn but the frame is foreign being mapped into current,
2542 * and is not removed from foreign domain.
2543 *
2544 * Usage: - libxl on pvh dom0 creating a guest and doing privcmd_ioctl_mmap.
2545 * - xentrace running on dom0 mapping xenheap pages. foreigndom would
2546 * be DOMID_XEN in such a case.
2547 * etc..
2548 *
2549 * Side Effect: the mfn for fgfn will be refcounted in lower level routines
2550 * so it is not lost while mapped here. The refcnt is released
2551 * via the XENMEM_remove_from_physmap path.
2552 *
2553 * Returns: 0 ==> success
2554 */
p2m_add_foreign(struct domain * tdom,unsigned long fgfn,unsigned long gpfn,domid_t foreigndom)2555 int p2m_add_foreign(struct domain *tdom, unsigned long fgfn,
2556 unsigned long gpfn, domid_t foreigndom)
2557 {
2558 p2m_type_t p2mt, p2mt_prev;
2559 mfn_t prev_mfn, mfn;
2560 struct page_info *page;
2561 int rc;
2562 struct domain *fdom;
2563
2564 ASSERT(tdom);
2565 if ( foreigndom == DOMID_SELF )
2566 return -EINVAL;
2567 /*
2568 * hvm fixme: until support is added to p2m teardown code to cleanup any
2569 * foreign entries, limit this to hardware domain only.
2570 */
2571 if ( !is_hardware_domain(tdom) )
2572 return -EPERM;
2573
2574 if ( foreigndom == DOMID_XEN )
2575 fdom = rcu_lock_domain(dom_xen);
2576 else
2577 fdom = rcu_lock_domain_by_id(foreigndom);
2578 if ( fdom == NULL )
2579 return -ESRCH;
2580
2581 rc = -EINVAL;
2582 if ( tdom == fdom )
2583 goto out;
2584
2585 rc = xsm_map_gmfn_foreign(XSM_TARGET, tdom, fdom);
2586 if ( rc )
2587 goto out;
2588
2589 /*
2590 * Take a refcnt on the mfn. NB: following supported for foreign mapping:
2591 * ram_rw | ram_logdirty | ram_ro | paging_out.
2592 */
2593 page = get_page_from_gfn(fdom, fgfn, &p2mt, P2M_ALLOC);
2594 if ( !page ||
2595 !p2m_is_ram(p2mt) || p2m_is_shared(p2mt) || p2m_is_hole(p2mt) )
2596 {
2597 if ( page )
2598 put_page(page);
2599 rc = -EINVAL;
2600 goto out;
2601 }
2602 mfn = page_to_mfn(page);
2603
2604 /* Remove previously mapped page if it is present. */
2605 prev_mfn = get_gfn(tdom, gpfn, &p2mt_prev);
2606 if ( mfn_valid(prev_mfn) )
2607 {
2608 if ( is_special_page(mfn_to_page(prev_mfn)) )
2609 /* Special pages are simply unhooked from this phys slot */
2610 rc = guest_physmap_remove_page(tdom, _gfn(gpfn), prev_mfn, 0);
2611 else
2612 /* Normal domain memory is freed, to avoid leaking memory. */
2613 rc = guest_remove_page(tdom, gpfn);
2614 if ( rc )
2615 goto put_both;
2616 }
2617 /*
2618 * Create the new mapping. Can't use guest_physmap_add_page() because it
2619 * will update the m2p table which will result in mfn -> gpfn of dom0
2620 * and not fgfn of domU.
2621 */
2622 rc = set_foreign_p2m_entry(tdom, gpfn, mfn);
2623 if ( rc )
2624 gdprintk(XENLOG_WARNING, "set_foreign_p2m_entry failed. "
2625 "gpfn:%lx mfn:%lx fgfn:%lx td:%d fd:%d\n",
2626 gpfn, mfn_x(mfn), fgfn, tdom->domain_id, fdom->domain_id);
2627
2628 put_both:
2629 put_page(page);
2630
2631 /*
2632 * This put_gfn for the above get_gfn for prev_mfn. We must do this
2633 * after set_foreign_p2m_entry so another cpu doesn't populate the gpfn
2634 * before us.
2635 */
2636 put_gfn(tdom, gpfn);
2637
2638 out:
2639 if ( fdom )
2640 rcu_unlock_domain(fdom);
2641 return rc;
2642 }
2643
2644 #ifdef CONFIG_HVM
2645 /*
2646 * Set/clear the #VE suppress bit for a page. Only available on VMX.
2647 */
p2m_set_suppress_ve(struct domain * d,gfn_t gfn,bool suppress_ve,unsigned int altp2m_idx)2648 int p2m_set_suppress_ve(struct domain *d, gfn_t gfn, bool suppress_ve,
2649 unsigned int altp2m_idx)
2650 {
2651 int rc;
2652 struct xen_hvm_altp2m_suppress_ve_multi sve = {
2653 altp2m_idx, suppress_ve, 0, 0, gfn_x(gfn), gfn_x(gfn), 0
2654 };
2655
2656 if ( !(rc = p2m_set_suppress_ve_multi(d, &sve)) )
2657 rc = sve.first_error;
2658
2659 return rc;
2660 }
2661
2662 /*
2663 * Set/clear the #VE suppress bit for multiple pages. Only available on VMX.
2664 */
p2m_set_suppress_ve_multi(struct domain * d,struct xen_hvm_altp2m_suppress_ve_multi * sve)2665 int p2m_set_suppress_ve_multi(struct domain *d,
2666 struct xen_hvm_altp2m_suppress_ve_multi *sve)
2667 {
2668 struct p2m_domain *host_p2m = p2m_get_hostp2m(d);
2669 struct p2m_domain *ap2m = NULL;
2670 struct p2m_domain *p2m = host_p2m;
2671 uint64_t start = sve->first_gfn;
2672 int rc = 0;
2673
2674 if ( sve->view > 0 )
2675 {
2676 if ( sve->view >= min(ARRAY_SIZE(d->arch.altp2m_p2m), MAX_EPTP) ||
2677 d->arch.altp2m_eptp[array_index_nospec(sve->view, MAX_EPTP)] ==
2678 mfn_x(INVALID_MFN) )
2679 return -EINVAL;
2680
2681 p2m = ap2m = array_access_nospec(d->arch.altp2m_p2m, sve->view);
2682 }
2683
2684 p2m_lock(host_p2m);
2685
2686 if ( ap2m )
2687 p2m_lock(ap2m);
2688
2689 while ( sve->last_gfn >= start )
2690 {
2691 p2m_access_t a;
2692 p2m_type_t t;
2693 mfn_t mfn;
2694 int err = 0;
2695
2696 if ( (err = altp2m_get_effective_entry(p2m, _gfn(start), &mfn, &t, &a,
2697 AP2MGET_query)) &&
2698 !sve->first_error )
2699 {
2700 sve->first_error_gfn = start; /* Save the gfn of the first error */
2701 sve->first_error = err; /* Save the first error code */
2702 }
2703
2704 if ( !err && (err = p2m->set_entry(p2m, _gfn(start), mfn,
2705 PAGE_ORDER_4K, t, a,
2706 sve->suppress_ve)) &&
2707 !sve->first_error )
2708 {
2709 sve->first_error_gfn = start; /* Save the gfn of the first error */
2710 sve->first_error = err; /* Save the first error code */
2711 }
2712
2713 /* Check for continuation if it's not the last iteration. */
2714 if ( sve->last_gfn >= ++start && hypercall_preempt_check() )
2715 {
2716 rc = -ERESTART;
2717 break;
2718 }
2719 }
2720
2721 sve->first_gfn = start;
2722
2723 if ( ap2m )
2724 p2m_unlock(ap2m);
2725
2726 p2m_unlock(host_p2m);
2727
2728 return rc;
2729 }
2730
p2m_get_suppress_ve(struct domain * d,gfn_t gfn,bool * suppress_ve,unsigned int altp2m_idx)2731 int p2m_get_suppress_ve(struct domain *d, gfn_t gfn, bool *suppress_ve,
2732 unsigned int altp2m_idx)
2733 {
2734 struct p2m_domain *host_p2m = p2m_get_hostp2m(d);
2735 struct p2m_domain *ap2m = NULL;
2736 struct p2m_domain *p2m;
2737 mfn_t mfn;
2738 p2m_access_t a;
2739 p2m_type_t t;
2740 int rc = 0;
2741
2742 if ( altp2m_idx > 0 )
2743 {
2744 if ( altp2m_idx >= min(ARRAY_SIZE(d->arch.altp2m_p2m), MAX_EPTP) ||
2745 d->arch.altp2m_eptp[array_index_nospec(altp2m_idx, MAX_EPTP)] ==
2746 mfn_x(INVALID_MFN) )
2747 return -EINVAL;
2748
2749 p2m = ap2m = array_access_nospec(d->arch.altp2m_p2m, altp2m_idx);
2750 }
2751 else
2752 p2m = host_p2m;
2753
2754 gfn_lock(host_p2m, gfn, 0);
2755
2756 if ( ap2m )
2757 p2m_lock(ap2m);
2758
2759 mfn = p2m->get_entry(p2m, gfn, &t, &a, 0, NULL, suppress_ve);
2760 if ( !mfn_valid(mfn) )
2761 rc = -ESRCH;
2762
2763 if ( ap2m )
2764 p2m_unlock(ap2m);
2765
2766 gfn_unlock(host_p2m, gfn, 0);
2767
2768 return rc;
2769 }
2770
p2m_set_altp2m_view_visibility(struct domain * d,unsigned int altp2m_idx,uint8_t visible)2771 int p2m_set_altp2m_view_visibility(struct domain *d, unsigned int altp2m_idx,
2772 uint8_t visible)
2773 {
2774 int rc = 0;
2775
2776 altp2m_list_lock(d);
2777
2778 /*
2779 * Eptp index is correlated with altp2m index and should not exceed
2780 * min(MAX_ALTP2M, MAX_EPTP).
2781 */
2782 if ( altp2m_idx >= min(ARRAY_SIZE(d->arch.altp2m_p2m), MAX_EPTP) ||
2783 d->arch.altp2m_eptp[array_index_nospec(altp2m_idx, MAX_EPTP)] ==
2784 mfn_x(INVALID_MFN) )
2785 rc = -EINVAL;
2786 else if ( visible )
2787 d->arch.altp2m_visible_eptp[array_index_nospec(altp2m_idx, MAX_EPTP)] =
2788 d->arch.altp2m_eptp[array_index_nospec(altp2m_idx, MAX_EPTP)];
2789 else
2790 d->arch.altp2m_visible_eptp[array_index_nospec(altp2m_idx, MAX_EPTP)] =
2791 mfn_x(INVALID_MFN);
2792
2793 altp2m_list_unlock(d);
2794
2795 return rc;
2796 }
2797 #endif
2798
2799 /*
2800 * Local variables:
2801 * mode: C
2802 * c-file-style: "BSD"
2803 * c-basic-offset: 4
2804 * indent-tabs-mode: nil
2805 * End:
2806 */
2807