1 /*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; If not, see <http://www.gnu.org/licenses/>.
15 *
16 * Copyright (C) Ashok Raj <ashok.raj@intel.com>
17 * Copyright (C) Shaohua Li <shaohua.li@intel.com>
18 * Copyright (C) Allen Kay <allen.m.kay@intel.com> - adapted to xen
19 */
20
21 #include <xen/irq.h>
22 #include <xen/sched.h>
23 #include <xen/xmalloc.h>
24 #include <xen/domain_page.h>
25 #include <xen/iocap.h>
26 #include <xen/iommu.h>
27 #include <xen/numa.h>
28 #include <xen/softirq.h>
29 #include <xen/time.h>
30 #include <xen/pci.h>
31 #include <xen/pci_regs.h>
32 #include <xen/keyhandler.h>
33 #include <asm/msi.h>
34 #include <asm/irq.h>
35 #include <asm/hvm/vmx/vmx.h>
36 #include <asm/p2m.h>
37 #include <mach_apic.h>
38 #include "iommu.h"
39 #include "dmar.h"
40 #include "extern.h"
41 #include "vtd.h"
42 #include "../ats.h"
43
44 struct mapped_rmrr {
45 struct list_head list;
46 u64 base, end;
47 unsigned int count;
48 };
49
50 /* Possible unfiltered LAPIC/MSI messages from untrusted sources? */
51 bool __read_mostly untrusted_msi;
52
53 int nr_iommus;
54
55 static struct tasklet vtd_fault_tasklet;
56
57 static int setup_hwdom_device(u8 devfn, struct pci_dev *);
58 static void setup_hwdom_rmrr(struct domain *d);
59
domain_iommu_domid(struct domain * d,struct iommu * iommu)60 static int domain_iommu_domid(struct domain *d,
61 struct iommu *iommu)
62 {
63 unsigned long nr_dom, i;
64
65 nr_dom = cap_ndoms(iommu->cap);
66 i = find_first_bit(iommu->domid_bitmap, nr_dom);
67 while ( i < nr_dom )
68 {
69 if ( iommu->domid_map[i] == d->domain_id )
70 return i;
71
72 i = find_next_bit(iommu->domid_bitmap, nr_dom, i+1);
73 }
74
75 dprintk(XENLOG_ERR VTDPREFIX,
76 "Cannot get valid iommu domid: domid=%d iommu->index=%d\n",
77 d->domain_id, iommu->index);
78 return -1;
79 }
80
81 #define DID_FIELD_WIDTH 16
82 #define DID_HIGH_OFFSET 8
context_set_domain_id(struct context_entry * context,struct domain * d,struct iommu * iommu)83 static int context_set_domain_id(struct context_entry *context,
84 struct domain *d,
85 struct iommu *iommu)
86 {
87 unsigned long nr_dom, i;
88 int found = 0;
89
90 ASSERT(spin_is_locked(&iommu->lock));
91
92 nr_dom = cap_ndoms(iommu->cap);
93 i = find_first_bit(iommu->domid_bitmap, nr_dom);
94 while ( i < nr_dom )
95 {
96 if ( iommu->domid_map[i] == d->domain_id )
97 {
98 found = 1;
99 break;
100 }
101 i = find_next_bit(iommu->domid_bitmap, nr_dom, i+1);
102 }
103
104 if ( found == 0 )
105 {
106 i = find_first_zero_bit(iommu->domid_bitmap, nr_dom);
107 if ( i >= nr_dom )
108 {
109 dprintk(XENLOG_ERR VTDPREFIX, "IOMMU: no free domain ids\n");
110 return -EFAULT;
111 }
112 iommu->domid_map[i] = d->domain_id;
113 }
114
115 set_bit(i, iommu->domid_bitmap);
116 context->hi |= (i & ((1 << DID_FIELD_WIDTH) - 1)) << DID_HIGH_OFFSET;
117 return 0;
118 }
119
context_get_domain_id(struct context_entry * context,struct iommu * iommu)120 static int context_get_domain_id(struct context_entry *context,
121 struct iommu *iommu)
122 {
123 unsigned long dom_index, nr_dom;
124 int domid = -1;
125
126 if (iommu && context)
127 {
128 nr_dom = cap_ndoms(iommu->cap);
129
130 dom_index = context_domain_id(*context);
131
132 if ( dom_index < nr_dom && iommu->domid_map )
133 domid = iommu->domid_map[dom_index];
134 else
135 dprintk(XENLOG_DEBUG VTDPREFIX,
136 "dom_index %lu exceeds nr_dom %lu or iommu has no domid_map\n",
137 dom_index, nr_dom);
138 }
139 return domid;
140 }
141
alloc_intel_iommu(void)142 static struct intel_iommu *__init alloc_intel_iommu(void)
143 {
144 struct intel_iommu *intel;
145
146 intel = xzalloc(struct intel_iommu);
147 if ( intel == NULL )
148 return NULL;
149
150 spin_lock_init(&intel->ir_ctrl.iremap_lock);
151
152 return intel;
153 }
154
free_intel_iommu(struct intel_iommu * intel)155 static void __init free_intel_iommu(struct intel_iommu *intel)
156 {
157 xfree(intel);
158 }
159
160 static int iommus_incoherent;
__iommu_flush_cache(void * addr,unsigned int size)161 static void __iommu_flush_cache(void *addr, unsigned int size)
162 {
163 int i;
164 static unsigned int clflush_size = 0;
165
166 if ( !iommus_incoherent )
167 return;
168
169 if ( clflush_size == 0 )
170 clflush_size = get_cache_line_size();
171
172 for ( i = 0; i < size; i += clflush_size )
173 cacheline_flush((char *)addr + i);
174 }
175
iommu_flush_cache_entry(void * addr,unsigned int size)176 void iommu_flush_cache_entry(void *addr, unsigned int size)
177 {
178 __iommu_flush_cache(addr, size);
179 }
180
iommu_flush_cache_page(void * addr,unsigned long npages)181 void iommu_flush_cache_page(void *addr, unsigned long npages)
182 {
183 __iommu_flush_cache(addr, PAGE_SIZE * npages);
184 }
185
186 /* Allocate page table, return its machine address */
alloc_pgtable_maddr(struct acpi_drhd_unit * drhd,unsigned long npages)187 u64 alloc_pgtable_maddr(struct acpi_drhd_unit *drhd, unsigned long npages)
188 {
189 struct acpi_rhsa_unit *rhsa;
190 struct page_info *pg, *cur_pg;
191 u64 *vaddr;
192 nodeid_t node = NUMA_NO_NODE;
193 unsigned int i;
194
195 rhsa = drhd_to_rhsa(drhd);
196 if ( rhsa )
197 node = pxm_to_node(rhsa->proximity_domain);
198
199 pg = alloc_domheap_pages(NULL, get_order_from_pages(npages),
200 (node == NUMA_NO_NODE) ? 0 : MEMF_node(node));
201 if ( !pg )
202 return 0;
203
204 cur_pg = pg;
205 for ( i = 0; i < npages; i++ )
206 {
207 vaddr = __map_domain_page(cur_pg);
208 memset(vaddr, 0, PAGE_SIZE);
209
210 iommu_flush_cache_page(vaddr, 1);
211 unmap_domain_page(vaddr);
212 cur_pg++;
213 }
214
215 return page_to_maddr(pg);
216 }
217
free_pgtable_maddr(u64 maddr)218 void free_pgtable_maddr(u64 maddr)
219 {
220 if ( maddr != 0 )
221 free_domheap_page(maddr_to_page(maddr));
222 }
223
224 /* context entry handling */
bus_to_context_maddr(struct iommu * iommu,u8 bus)225 static u64 bus_to_context_maddr(struct iommu *iommu, u8 bus)
226 {
227 struct acpi_drhd_unit *drhd;
228 struct root_entry *root, *root_entries;
229 u64 maddr;
230
231 ASSERT(spin_is_locked(&iommu->lock));
232 root_entries = (struct root_entry *)map_vtd_domain_page(iommu->root_maddr);
233 root = &root_entries[bus];
234 if ( !root_present(*root) )
235 {
236 drhd = iommu_to_drhd(iommu);
237 maddr = alloc_pgtable_maddr(drhd, 1);
238 if ( maddr == 0 )
239 {
240 unmap_vtd_domain_page(root_entries);
241 return 0;
242 }
243 set_root_value(*root, maddr);
244 set_root_present(*root);
245 iommu_flush_cache_entry(root, sizeof(struct root_entry));
246 }
247 maddr = (u64) get_context_addr(*root);
248 unmap_vtd_domain_page(root_entries);
249 return maddr;
250 }
251
addr_to_dma_page_maddr(struct domain * domain,u64 addr,int alloc)252 static u64 addr_to_dma_page_maddr(struct domain *domain, u64 addr, int alloc)
253 {
254 struct acpi_drhd_unit *drhd;
255 struct pci_dev *pdev;
256 struct domain_iommu *hd = dom_iommu(domain);
257 int addr_width = agaw_to_width(hd->arch.agaw);
258 struct dma_pte *parent, *pte = NULL;
259 int level = agaw_to_level(hd->arch.agaw);
260 int offset;
261 u64 pte_maddr = 0;
262
263 addr &= (((u64)1) << addr_width) - 1;
264 ASSERT(spin_is_locked(&hd->arch.mapping_lock));
265 if ( hd->arch.pgd_maddr == 0 )
266 {
267 /*
268 * just get any passthrough device in the domainr - assume user
269 * assigns only devices from same node to a given guest.
270 */
271 pdev = pci_get_pdev_by_domain(domain, -1, -1, -1);
272 drhd = acpi_find_matched_drhd_unit(pdev);
273 if ( !alloc || ((hd->arch.pgd_maddr = alloc_pgtable_maddr(drhd, 1)) == 0) )
274 goto out;
275 }
276
277 parent = (struct dma_pte *)map_vtd_domain_page(hd->arch.pgd_maddr);
278 while ( level > 1 )
279 {
280 offset = address_level_offset(addr, level);
281 pte = &parent[offset];
282
283 pte_maddr = dma_pte_addr(*pte);
284 if ( !pte_maddr )
285 {
286 if ( !alloc )
287 break;
288
289 pdev = pci_get_pdev_by_domain(domain, -1, -1, -1);
290 drhd = acpi_find_matched_drhd_unit(pdev);
291 pte_maddr = alloc_pgtable_maddr(drhd, 1);
292 if ( !pte_maddr )
293 break;
294
295 dma_set_pte_addr(*pte, pte_maddr);
296
297 /*
298 * high level table always sets r/w, last level
299 * page table control read/write
300 */
301 dma_set_pte_readable(*pte);
302 dma_set_pte_writable(*pte);
303 iommu_flush_cache_entry(pte, sizeof(struct dma_pte));
304 }
305
306 if ( level == 2 )
307 break;
308
309 unmap_vtd_domain_page(parent);
310 parent = map_vtd_domain_page(pte_maddr);
311 level--;
312 }
313
314 unmap_vtd_domain_page(parent);
315 out:
316 return pte_maddr;
317 }
318
iommu_flush_write_buffer(struct iommu * iommu)319 static void iommu_flush_write_buffer(struct iommu *iommu)
320 {
321 u32 val;
322 unsigned long flags;
323
324 if ( !rwbf_quirk && !cap_rwbf(iommu->cap) )
325 return;
326
327 spin_lock_irqsave(&iommu->register_lock, flags);
328 val = dmar_readl(iommu->reg, DMAR_GSTS_REG);
329 dmar_writel(iommu->reg, DMAR_GCMD_REG, val | DMA_GCMD_WBF);
330
331 /* Make sure hardware complete it */
332 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, dmar_readl,
333 !(val & DMA_GSTS_WBFS), val);
334
335 spin_unlock_irqrestore(&iommu->register_lock, flags);
336 }
337
338 /* return value determine if we need a write buffer flush */
flush_context_reg(void * _iommu,u16 did,u16 source_id,u8 function_mask,u64 type,bool_t flush_non_present_entry)339 static int __must_check flush_context_reg(void *_iommu, u16 did, u16 source_id,
340 u8 function_mask, u64 type,
341 bool_t flush_non_present_entry)
342 {
343 struct iommu *iommu = (struct iommu *) _iommu;
344 u64 val = 0;
345 unsigned long flags;
346
347 /*
348 * In the non-present entry flush case, if hardware doesn't cache
349 * non-present entry we do nothing and if hardware cache non-present
350 * entry, we flush entries of domain 0 (the domain id is used to cache
351 * any non-present entries)
352 */
353 if ( flush_non_present_entry )
354 {
355 if ( !cap_caching_mode(iommu->cap) )
356 return 1;
357 else
358 did = 0;
359 }
360
361 /* use register invalidation */
362 switch ( type )
363 {
364 case DMA_CCMD_GLOBAL_INVL:
365 val = DMA_CCMD_GLOBAL_INVL;
366 break;
367 case DMA_CCMD_DOMAIN_INVL:
368 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
369 break;
370 case DMA_CCMD_DEVICE_INVL:
371 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
372 |DMA_CCMD_SID(source_id)|DMA_CCMD_FM(function_mask);
373 break;
374 default:
375 BUG();
376 }
377 val |= DMA_CCMD_ICC;
378
379 spin_lock_irqsave(&iommu->register_lock, flags);
380 dmar_writeq(iommu->reg, DMAR_CCMD_REG, val);
381
382 /* Make sure hardware complete it */
383 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG, dmar_readq,
384 !(val & DMA_CCMD_ICC), val);
385
386 spin_unlock_irqrestore(&iommu->register_lock, flags);
387 /* flush context entry will implicitly flush write buffer */
388 return 0;
389 }
390
iommu_flush_context_global(struct iommu * iommu,bool_t flush_non_present_entry)391 static int __must_check iommu_flush_context_global(struct iommu *iommu,
392 bool_t flush_non_present_entry)
393 {
394 struct iommu_flush *flush = iommu_get_flush(iommu);
395 return flush->context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
396 flush_non_present_entry);
397 }
398
iommu_flush_context_device(struct iommu * iommu,u16 did,u16 source_id,u8 function_mask,bool_t flush_non_present_entry)399 static int __must_check iommu_flush_context_device(struct iommu *iommu,
400 u16 did, u16 source_id,
401 u8 function_mask,
402 bool_t flush_non_present_entry)
403 {
404 struct iommu_flush *flush = iommu_get_flush(iommu);
405 return flush->context(iommu, did, source_id, function_mask,
406 DMA_CCMD_DEVICE_INVL,
407 flush_non_present_entry);
408 }
409
410 /* return value determine if we need a write buffer flush */
flush_iotlb_reg(void * _iommu,u16 did,u64 addr,unsigned int size_order,u64 type,bool_t flush_non_present_entry,bool_t flush_dev_iotlb)411 static int __must_check flush_iotlb_reg(void *_iommu, u16 did, u64 addr,
412 unsigned int size_order, u64 type,
413 bool_t flush_non_present_entry,
414 bool_t flush_dev_iotlb)
415 {
416 struct iommu *iommu = (struct iommu *) _iommu;
417 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
418 u64 val = 0;
419 unsigned long flags;
420
421 /*
422 * In the non-present entry flush case, if hardware doesn't cache
423 * non-present entry we do nothing and if hardware cache non-present
424 * entry, we flush entries of domain 0 (the domain id is used to cache
425 * any non-present entries)
426 */
427 if ( flush_non_present_entry )
428 {
429 if ( !cap_caching_mode(iommu->cap) )
430 return 1;
431 else
432 did = 0;
433 }
434
435 /* use register invalidation */
436 switch ( type )
437 {
438 case DMA_TLB_GLOBAL_FLUSH:
439 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
440 break;
441 case DMA_TLB_DSI_FLUSH:
442 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
443 break;
444 case DMA_TLB_PSI_FLUSH:
445 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
446 break;
447 default:
448 BUG();
449 }
450 /* Note: set drain read/write */
451 if ( cap_read_drain(iommu->cap) )
452 val |= DMA_TLB_READ_DRAIN;
453 if ( cap_write_drain(iommu->cap) )
454 val |= DMA_TLB_WRITE_DRAIN;
455
456 spin_lock_irqsave(&iommu->register_lock, flags);
457 /* Note: Only uses first TLB reg currently */
458 if ( type == DMA_TLB_PSI_FLUSH )
459 {
460 /* Note: always flush non-leaf currently. */
461 dmar_writeq(iommu->reg, tlb_offset, size_order | addr);
462 }
463 dmar_writeq(iommu->reg, tlb_offset + 8, val);
464
465 /* Make sure hardware complete it */
466 IOMMU_WAIT_OP(iommu, (tlb_offset + 8), dmar_readq,
467 !(val & DMA_TLB_IVT), val);
468 spin_unlock_irqrestore(&iommu->register_lock, flags);
469
470 /* check IOTLB invalidation granularity */
471 if ( DMA_TLB_IAIG(val) == 0 )
472 dprintk(XENLOG_ERR VTDPREFIX, "IOMMU: flush IOTLB failed\n");
473
474 /* flush iotlb entry will implicitly flush write buffer */
475 return 0;
476 }
477
iommu_flush_iotlb_global(struct iommu * iommu,bool_t flush_non_present_entry,bool_t flush_dev_iotlb)478 static int __must_check iommu_flush_iotlb_global(struct iommu *iommu,
479 bool_t flush_non_present_entry,
480 bool_t flush_dev_iotlb)
481 {
482 struct iommu_flush *flush = iommu_get_flush(iommu);
483 int status;
484
485 /* apply platform specific errata workarounds */
486 vtd_ops_preamble_quirk(iommu);
487
488 status = flush->iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
489 flush_non_present_entry, flush_dev_iotlb);
490
491 /* undo platform specific errata workarounds */
492 vtd_ops_postamble_quirk(iommu);
493
494 return status;
495 }
496
iommu_flush_iotlb_dsi(struct iommu * iommu,u16 did,bool_t flush_non_present_entry,bool_t flush_dev_iotlb)497 static int __must_check iommu_flush_iotlb_dsi(struct iommu *iommu, u16 did,
498 bool_t flush_non_present_entry,
499 bool_t flush_dev_iotlb)
500 {
501 struct iommu_flush *flush = iommu_get_flush(iommu);
502 int status;
503
504 /* apply platform specific errata workarounds */
505 vtd_ops_preamble_quirk(iommu);
506
507 status = flush->iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH,
508 flush_non_present_entry, flush_dev_iotlb);
509
510 /* undo platform specific errata workarounds */
511 vtd_ops_postamble_quirk(iommu);
512
513 return status;
514 }
515
iommu_flush_iotlb_psi(struct iommu * iommu,u16 did,u64 addr,unsigned int order,bool_t flush_non_present_entry,bool_t flush_dev_iotlb)516 static int __must_check iommu_flush_iotlb_psi(struct iommu *iommu, u16 did,
517 u64 addr, unsigned int order,
518 bool_t flush_non_present_entry,
519 bool_t flush_dev_iotlb)
520 {
521 struct iommu_flush *flush = iommu_get_flush(iommu);
522 int status;
523
524 ASSERT(!(addr & (~PAGE_MASK_4K)));
525
526 /* Fallback to domain selective flush if no PSI support */
527 if ( !cap_pgsel_inv(iommu->cap) )
528 return iommu_flush_iotlb_dsi(iommu, did, flush_non_present_entry, flush_dev_iotlb);
529
530 /* Fallback to domain selective flush if size is too big */
531 if ( order > cap_max_amask_val(iommu->cap) )
532 return iommu_flush_iotlb_dsi(iommu, did, flush_non_present_entry, flush_dev_iotlb);
533
534 addr >>= PAGE_SHIFT_4K + order;
535 addr <<= PAGE_SHIFT_4K + order;
536
537 /* apply platform specific errata workarounds */
538 vtd_ops_preamble_quirk(iommu);
539
540 status = flush->iotlb(iommu, did, addr, order, DMA_TLB_PSI_FLUSH,
541 flush_non_present_entry, flush_dev_iotlb);
542
543 /* undo platform specific errata workarounds */
544 vtd_ops_postamble_quirk(iommu);
545
546 return status;
547 }
548
iommu_flush_all(void)549 static int __must_check iommu_flush_all(void)
550 {
551 struct acpi_drhd_unit *drhd;
552 struct iommu *iommu;
553 bool_t flush_dev_iotlb;
554 int rc = 0;
555
556 flush_all_cache();
557 for_each_drhd_unit ( drhd )
558 {
559 int context_rc, iotlb_rc;
560
561 iommu = drhd->iommu;
562 context_rc = iommu_flush_context_global(iommu, 0);
563 flush_dev_iotlb = !!find_ats_dev_drhd(iommu);
564 iotlb_rc = iommu_flush_iotlb_global(iommu, 0, flush_dev_iotlb);
565
566 /*
567 * The current logic for returns:
568 * - positive invoke iommu_flush_write_buffer to flush cache.
569 * - zero on success.
570 * - negative on failure. Continue to flush IOMMU IOTLB on a
571 * best effort basis.
572 */
573 if ( context_rc > 0 || iotlb_rc > 0 )
574 iommu_flush_write_buffer(iommu);
575 if ( rc >= 0 )
576 rc = context_rc;
577 if ( rc >= 0 )
578 rc = iotlb_rc;
579 }
580
581 if ( rc > 0 )
582 rc = 0;
583
584 return rc;
585 }
586
iommu_flush_iotlb(struct domain * d,unsigned long gfn,bool_t dma_old_pte_present,unsigned int page_count)587 static int __must_check iommu_flush_iotlb(struct domain *d,
588 unsigned long gfn,
589 bool_t dma_old_pte_present,
590 unsigned int page_count)
591 {
592 struct domain_iommu *hd = dom_iommu(d);
593 struct acpi_drhd_unit *drhd;
594 struct iommu *iommu;
595 bool_t flush_dev_iotlb;
596 int iommu_domid;
597 int rc = 0;
598
599 /*
600 * No need pcideves_lock here because we have flush
601 * when assign/deassign device
602 */
603 for_each_drhd_unit ( drhd )
604 {
605 iommu = drhd->iommu;
606
607 if ( !test_bit(iommu->index, &hd->arch.iommu_bitmap) )
608 continue;
609
610 flush_dev_iotlb = !!find_ats_dev_drhd(iommu);
611 iommu_domid= domain_iommu_domid(d, iommu);
612 if ( iommu_domid == -1 )
613 continue;
614
615 if ( page_count != 1 || gfn == gfn_x(INVALID_GFN) )
616 rc = iommu_flush_iotlb_dsi(iommu, iommu_domid,
617 0, flush_dev_iotlb);
618 else
619 rc = iommu_flush_iotlb_psi(iommu, iommu_domid,
620 (paddr_t)gfn << PAGE_SHIFT_4K,
621 PAGE_ORDER_4K,
622 !dma_old_pte_present,
623 flush_dev_iotlb);
624
625 if ( rc > 0 )
626 {
627 iommu_flush_write_buffer(iommu);
628 rc = 0;
629 }
630 }
631
632 return rc;
633 }
634
iommu_flush_iotlb_pages(struct domain * d,unsigned long gfn,unsigned int page_count)635 static int __must_check iommu_flush_iotlb_pages(struct domain *d,
636 unsigned long gfn,
637 unsigned int page_count)
638 {
639 return iommu_flush_iotlb(d, gfn, 1, page_count);
640 }
641
iommu_flush_iotlb_all(struct domain * d)642 static int __must_check iommu_flush_iotlb_all(struct domain *d)
643 {
644 return iommu_flush_iotlb(d, gfn_x(INVALID_GFN), 0, 0);
645 }
646
647 /* clear one page's page table */
dma_pte_clear_one(struct domain * domain,u64 addr)648 static int __must_check dma_pte_clear_one(struct domain *domain, u64 addr)
649 {
650 struct domain_iommu *hd = dom_iommu(domain);
651 struct dma_pte *page = NULL, *pte = NULL;
652 u64 pg_maddr;
653 int rc = 0;
654
655 spin_lock(&hd->arch.mapping_lock);
656 /* get last level pte */
657 pg_maddr = addr_to_dma_page_maddr(domain, addr, 0);
658 if ( pg_maddr == 0 )
659 {
660 spin_unlock(&hd->arch.mapping_lock);
661 return 0;
662 }
663
664 page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
665 pte = page + address_level_offset(addr, 1);
666
667 if ( !dma_pte_present(*pte) )
668 {
669 spin_unlock(&hd->arch.mapping_lock);
670 unmap_vtd_domain_page(page);
671 return 0;
672 }
673
674 dma_clear_pte(*pte);
675 spin_unlock(&hd->arch.mapping_lock);
676 iommu_flush_cache_entry(pte, sizeof(struct dma_pte));
677
678 if ( !this_cpu(iommu_dont_flush_iotlb) )
679 rc = iommu_flush_iotlb_pages(domain, addr >> PAGE_SHIFT_4K, 1);
680
681 unmap_vtd_domain_page(page);
682
683 return rc;
684 }
685
iommu_free_pagetable(u64 pt_maddr,int level)686 static void iommu_free_pagetable(u64 pt_maddr, int level)
687 {
688 struct page_info *pg = maddr_to_page(pt_maddr);
689
690 if ( pt_maddr == 0 )
691 return;
692
693 PFN_ORDER(pg) = level;
694 spin_lock(&iommu_pt_cleanup_lock);
695 page_list_add_tail(pg, &iommu_pt_cleanup_list);
696 spin_unlock(&iommu_pt_cleanup_lock);
697 }
698
iommu_free_page_table(struct page_info * pg)699 static void iommu_free_page_table(struct page_info *pg)
700 {
701 unsigned int i, next_level = PFN_ORDER(pg) - 1;
702 u64 pt_maddr = page_to_maddr(pg);
703 struct dma_pte *pt_vaddr, *pte;
704
705 PFN_ORDER(pg) = 0;
706 pt_vaddr = (struct dma_pte *)map_vtd_domain_page(pt_maddr);
707
708 for ( i = 0; i < PTE_NUM; i++ )
709 {
710 pte = &pt_vaddr[i];
711 if ( !dma_pte_present(*pte) )
712 continue;
713
714 if ( next_level >= 1 )
715 iommu_free_pagetable(dma_pte_addr(*pte), next_level);
716
717 dma_clear_pte(*pte);
718 iommu_flush_cache_entry(pte, sizeof(struct dma_pte));
719 }
720
721 unmap_vtd_domain_page(pt_vaddr);
722 free_pgtable_maddr(pt_maddr);
723 }
724
iommu_set_root_entry(struct iommu * iommu)725 static int iommu_set_root_entry(struct iommu *iommu)
726 {
727 u32 sts;
728 unsigned long flags;
729
730 spin_lock_irqsave(&iommu->register_lock, flags);
731 dmar_writeq(iommu->reg, DMAR_RTADDR_REG, iommu->root_maddr);
732
733 sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
734 dmar_writel(iommu->reg, DMAR_GCMD_REG, sts | DMA_GCMD_SRTP);
735
736 /* Make sure hardware complete it */
737 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, dmar_readl,
738 (sts & DMA_GSTS_RTPS), sts);
739 spin_unlock_irqrestore(&iommu->register_lock, flags);
740
741 return 0;
742 }
743
iommu_enable_translation(struct acpi_drhd_unit * drhd)744 static void iommu_enable_translation(struct acpi_drhd_unit *drhd)
745 {
746 u32 sts;
747 unsigned long flags;
748 struct iommu *iommu = drhd->iommu;
749
750 if ( is_igd_drhd(drhd) )
751 {
752 if ( !iommu_igfx )
753 {
754 printk(XENLOG_INFO VTDPREFIX
755 "Passed iommu=no-igfx option. Disabling IGD VT-d engine.\n");
756 return;
757 }
758
759 if ( !is_igd_vt_enabled_quirk() )
760 {
761 if ( force_iommu )
762 panic("BIOS did not enable IGD for VT properly, crash Xen for security purpose");
763
764 printk(XENLOG_WARNING VTDPREFIX
765 "BIOS did not enable IGD for VT properly. Disabling IGD VT-d engine.\n");
766 return;
767 }
768 }
769
770 /* apply platform specific errata workarounds */
771 vtd_ops_preamble_quirk(iommu);
772
773 if ( iommu_verbose )
774 printk(VTDPREFIX "iommu_enable_translation: iommu->reg = %p\n",
775 iommu->reg);
776 spin_lock_irqsave(&iommu->register_lock, flags);
777 sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
778 dmar_writel(iommu->reg, DMAR_GCMD_REG, sts | DMA_GCMD_TE);
779
780 /* Make sure hardware complete it */
781 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, dmar_readl,
782 (sts & DMA_GSTS_TES), sts);
783 spin_unlock_irqrestore(&iommu->register_lock, flags);
784
785 /* undo platform specific errata workarounds */
786 vtd_ops_postamble_quirk(iommu);
787
788 /* Disable PMRs when VT-d engine takes effect per spec definition */
789 disable_pmr(iommu);
790 }
791
iommu_disable_translation(struct iommu * iommu)792 static void iommu_disable_translation(struct iommu *iommu)
793 {
794 u32 sts;
795 unsigned long flags;
796
797 /* apply platform specific errata workarounds */
798 vtd_ops_preamble_quirk(iommu);
799
800 spin_lock_irqsave(&iommu->register_lock, flags);
801 sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
802 dmar_writel(iommu->reg, DMAR_GCMD_REG, sts & (~DMA_GCMD_TE));
803
804 /* Make sure hardware complete it */
805 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, dmar_readl,
806 !(sts & DMA_GSTS_TES), sts);
807 spin_unlock_irqrestore(&iommu->register_lock, flags);
808
809 /* undo platform specific errata workarounds */
810 vtd_ops_postamble_quirk(iommu);
811 }
812
813 enum faulttype {
814 DMA_REMAP,
815 INTR_REMAP,
816 UNKNOWN,
817 };
818
819 static const char *dma_remap_fault_reasons[] =
820 {
821 "Software",
822 "Present bit in root entry is clear",
823 "Present bit in context entry is clear",
824 "Invalid context entry",
825 "Access beyond MGAW",
826 "PTE Write access is not set",
827 "PTE Read access is not set",
828 "Next page table ptr is invalid",
829 "Root table address invalid",
830 "Context table ptr is invalid",
831 "non-zero reserved fields in RTP",
832 "non-zero reserved fields in CTP",
833 "non-zero reserved fields in PTE",
834 "Blocked a DMA translation request",
835 };
836
837 static const char *intr_remap_fault_reasons[] =
838 {
839 "Detected reserved fields in the decoded interrupt-remapped request",
840 "Interrupt index exceeded the interrupt-remapping table size",
841 "Present field in the IRTE entry is clear",
842 "Error accessing interrupt-remapping table pointed by IRTA_REG",
843 "Detected reserved fields in the IRTE entry",
844 "Blocked a compatibility format interrupt request",
845 "Blocked an interrupt request due to source-id verification failure",
846 };
847
iommu_get_fault_reason(u8 fault_reason,enum faulttype * fault_type)848 static const char *iommu_get_fault_reason(u8 fault_reason,
849 enum faulttype *fault_type)
850 {
851 if ( fault_reason >= 0x20 && ( fault_reason < 0x20 +
852 ARRAY_SIZE(intr_remap_fault_reasons)) )
853 {
854 *fault_type = INTR_REMAP;
855 return intr_remap_fault_reasons[fault_reason - 0x20];
856 }
857 else if ( fault_reason < ARRAY_SIZE(dma_remap_fault_reasons) )
858 {
859 *fault_type = DMA_REMAP;
860 return dma_remap_fault_reasons[fault_reason];
861 }
862 else
863 {
864 *fault_type = UNKNOWN;
865 return "Unknown";
866 }
867 }
868
iommu_page_fault_do_one(struct iommu * iommu,int type,u8 fault_reason,u16 source_id,u64 addr)869 static int iommu_page_fault_do_one(struct iommu *iommu, int type,
870 u8 fault_reason, u16 source_id, u64 addr)
871 {
872 const char *reason, *kind;
873 enum faulttype fault_type;
874 u16 seg = iommu->intel->drhd->segment;
875
876 reason = iommu_get_fault_reason(fault_reason, &fault_type);
877 switch ( fault_type )
878 {
879 case DMA_REMAP:
880 printk(XENLOG_G_WARNING VTDPREFIX
881 "DMAR:[%s] Request device [%04x:%02x:%02x.%u] "
882 "fault addr %"PRIx64", iommu reg = %p\n",
883 (type ? "DMA Read" : "DMA Write"),
884 seg, PCI_BUS(source_id), PCI_SLOT(source_id),
885 PCI_FUNC(source_id), addr, iommu->reg);
886 kind = "DMAR";
887 break;
888 case INTR_REMAP:
889 printk(XENLOG_G_WARNING VTDPREFIX
890 "INTR-REMAP: Request device [%04x:%02x:%02x.%u] "
891 "fault index %"PRIx64", iommu reg = %p\n",
892 seg, PCI_BUS(source_id), PCI_SLOT(source_id),
893 PCI_FUNC(source_id), addr >> 48, iommu->reg);
894 kind = "INTR-REMAP";
895 break;
896 default:
897 printk(XENLOG_G_WARNING VTDPREFIX
898 "UNKNOWN: Request device [%04x:%02x:%02x.%u] "
899 "fault addr %"PRIx64", iommu reg = %p\n",
900 seg, PCI_BUS(source_id), PCI_SLOT(source_id),
901 PCI_FUNC(source_id), addr, iommu->reg);
902 kind = "UNKNOWN";
903 break;
904 }
905
906 printk(XENLOG_G_WARNING VTDPREFIX "%s: reason %02x - %s\n",
907 kind, fault_reason, reason);
908
909 if ( iommu_verbose && fault_type == DMA_REMAP )
910 print_vtd_entries(iommu, PCI_BUS(source_id), PCI_DEVFN2(source_id),
911 addr >> PAGE_SHIFT);
912
913 return 0;
914 }
915
iommu_fault_status(u32 fault_status)916 static void iommu_fault_status(u32 fault_status)
917 {
918 if ( fault_status & DMA_FSTS_PFO )
919 INTEL_IOMMU_DEBUG("iommu_fault_status: Fault Overflow\n");
920 if ( fault_status & DMA_FSTS_PPF )
921 INTEL_IOMMU_DEBUG("iommu_fault_status: Primary Pending Fault\n");
922 if ( fault_status & DMA_FSTS_AFO )
923 INTEL_IOMMU_DEBUG("iommu_fault_status: Advanced Fault Overflow\n");
924 if ( fault_status & DMA_FSTS_APF )
925 INTEL_IOMMU_DEBUG("iommu_fault_status: Advanced Pending Fault\n");
926 if ( fault_status & DMA_FSTS_IQE )
927 INTEL_IOMMU_DEBUG("iommu_fault_status: Invalidation Queue Error\n");
928 if ( fault_status & DMA_FSTS_ICE )
929 INTEL_IOMMU_DEBUG("iommu_fault_status: Invalidation Completion Error\n");
930 if ( fault_status & DMA_FSTS_ITE )
931 INTEL_IOMMU_DEBUG("iommu_fault_status: Invalidation Time-out Error\n");
932 }
933
934 #define PRIMARY_FAULT_REG_LEN (16)
__do_iommu_page_fault(struct iommu * iommu)935 static void __do_iommu_page_fault(struct iommu *iommu)
936 {
937 int reg, fault_index;
938 u32 fault_status;
939 unsigned long flags;
940
941 fault_status = dmar_readl(iommu->reg, DMAR_FSTS_REG);
942
943 iommu_fault_status(fault_status);
944
945 /* FIXME: ignore advanced fault log */
946 if ( !(fault_status & DMA_FSTS_PPF) )
947 goto clear_overflow;
948
949 fault_index = dma_fsts_fault_record_index(fault_status);
950 reg = cap_fault_reg_offset(iommu->cap);
951 while (1)
952 {
953 u8 fault_reason;
954 u16 source_id;
955 u32 data;
956 u64 guest_addr;
957 int type;
958
959 /* highest 32 bits */
960 spin_lock_irqsave(&iommu->register_lock, flags);
961 data = dmar_readl(iommu->reg, reg +
962 fault_index * PRIMARY_FAULT_REG_LEN + 12);
963 if ( !(data & DMA_FRCD_F) )
964 {
965 spin_unlock_irqrestore(&iommu->register_lock, flags);
966 break;
967 }
968
969 fault_reason = dma_frcd_fault_reason(data);
970 type = dma_frcd_type(data);
971
972 data = dmar_readl(iommu->reg, reg +
973 fault_index * PRIMARY_FAULT_REG_LEN + 8);
974 source_id = dma_frcd_source_id(data);
975
976 guest_addr = dmar_readq(iommu->reg, reg +
977 fault_index * PRIMARY_FAULT_REG_LEN);
978 guest_addr = dma_frcd_page_addr(guest_addr);
979 /* clear the fault */
980 dmar_writel(iommu->reg, reg +
981 fault_index * PRIMARY_FAULT_REG_LEN + 12, DMA_FRCD_F);
982 spin_unlock_irqrestore(&iommu->register_lock, flags);
983
984 iommu_page_fault_do_one(iommu, type, fault_reason,
985 source_id, guest_addr);
986
987 pci_check_disable_device(iommu->intel->drhd->segment,
988 PCI_BUS(source_id), PCI_DEVFN2(source_id));
989
990 fault_index++;
991 if ( fault_index > cap_num_fault_regs(iommu->cap) )
992 fault_index = 0;
993 }
994 clear_overflow:
995 /* clear primary fault overflow */
996 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
997 if ( fault_status & DMA_FSTS_PFO )
998 {
999 spin_lock_irqsave(&iommu->register_lock, flags);
1000 dmar_writel(iommu->reg, DMAR_FSTS_REG, DMA_FSTS_PFO);
1001 spin_unlock_irqrestore(&iommu->register_lock, flags);
1002 }
1003 }
1004
do_iommu_page_fault(unsigned long data)1005 static void do_iommu_page_fault(unsigned long data)
1006 {
1007 struct acpi_drhd_unit *drhd;
1008
1009 if ( list_empty(&acpi_drhd_units) )
1010 {
1011 INTEL_IOMMU_DEBUG("no device found, something must be very wrong!\n");
1012 return;
1013 }
1014
1015 /*
1016 * No matter from whom the interrupt came from, check all the
1017 * IOMMUs present in the system. This allows for having just one
1018 * tasklet (instead of one per each IOMMUs) and should be more than
1019 * fine, considering how rare the event of a fault should be.
1020 */
1021 for_each_drhd_unit ( drhd )
1022 __do_iommu_page_fault(drhd->iommu);
1023 }
1024
iommu_page_fault(int irq,void * dev_id,struct cpu_user_regs * regs)1025 static void iommu_page_fault(int irq, void *dev_id,
1026 struct cpu_user_regs *regs)
1027 {
1028 /*
1029 * Just flag the tasklet as runnable. This is fine, according to VT-d
1030 * specs since a new interrupt won't be generated until we clear all
1031 * the faults that caused this one to happen.
1032 */
1033 tasklet_schedule(&vtd_fault_tasklet);
1034 }
1035
dma_msi_unmask(struct irq_desc * desc)1036 static void dma_msi_unmask(struct irq_desc *desc)
1037 {
1038 struct iommu *iommu = desc->action->dev_id;
1039 unsigned long flags;
1040 u32 sts;
1041
1042 /* unmask it */
1043 spin_lock_irqsave(&iommu->register_lock, flags);
1044 sts = dmar_readl(iommu->reg, DMAR_FECTL_REG);
1045 sts &= ~DMA_FECTL_IM;
1046 dmar_writel(iommu->reg, DMAR_FECTL_REG, sts);
1047 spin_unlock_irqrestore(&iommu->register_lock, flags);
1048 iommu->msi.msi_attrib.host_masked = 0;
1049 }
1050
dma_msi_mask(struct irq_desc * desc)1051 static void dma_msi_mask(struct irq_desc *desc)
1052 {
1053 unsigned long flags;
1054 struct iommu *iommu = desc->action->dev_id;
1055 u32 sts;
1056
1057 /* mask it */
1058 spin_lock_irqsave(&iommu->register_lock, flags);
1059 sts = dmar_readl(iommu->reg, DMAR_FECTL_REG);
1060 sts |= DMA_FECTL_IM;
1061 dmar_writel(iommu->reg, DMAR_FECTL_REG, sts);
1062 spin_unlock_irqrestore(&iommu->register_lock, flags);
1063 iommu->msi.msi_attrib.host_masked = 1;
1064 }
1065
dma_msi_startup(struct irq_desc * desc)1066 static unsigned int dma_msi_startup(struct irq_desc *desc)
1067 {
1068 dma_msi_unmask(desc);
1069 return 0;
1070 }
1071
dma_msi_ack(struct irq_desc * desc)1072 static void dma_msi_ack(struct irq_desc *desc)
1073 {
1074 irq_complete_move(desc);
1075 dma_msi_mask(desc);
1076 move_masked_irq(desc);
1077 }
1078
dma_msi_end(struct irq_desc * desc,u8 vector)1079 static void dma_msi_end(struct irq_desc *desc, u8 vector)
1080 {
1081 dma_msi_unmask(desc);
1082 ack_APIC_irq();
1083 }
1084
dma_msi_set_affinity(struct irq_desc * desc,const cpumask_t * mask)1085 static void dma_msi_set_affinity(struct irq_desc *desc, const cpumask_t *mask)
1086 {
1087 struct msi_msg msg;
1088 unsigned int dest;
1089 unsigned long flags;
1090 struct iommu *iommu = desc->action->dev_id;
1091
1092 dest = set_desc_affinity(desc, mask);
1093 if (dest == BAD_APICID){
1094 dprintk(XENLOG_ERR VTDPREFIX, "Set iommu interrupt affinity error!\n");
1095 return;
1096 }
1097
1098 msi_compose_msg(desc->arch.vector, NULL, &msg);
1099 msg.dest32 = dest;
1100 if (x2apic_enabled)
1101 msg.address_hi = dest & 0xFFFFFF00;
1102 ASSERT(!(msg.address_lo & MSI_ADDR_DEST_ID_MASK));
1103 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
1104 iommu->msi.msg = msg;
1105
1106 spin_lock_irqsave(&iommu->register_lock, flags);
1107 dmar_writel(iommu->reg, DMAR_FEDATA_REG, msg.data);
1108 dmar_writeq(iommu->reg, DMAR_FEADDR_REG, msg.address);
1109 spin_unlock_irqrestore(&iommu->register_lock, flags);
1110 }
1111
1112 static hw_irq_controller dma_msi_type = {
1113 .typename = "DMA_MSI",
1114 .startup = dma_msi_startup,
1115 .shutdown = dma_msi_mask,
1116 .enable = dma_msi_unmask,
1117 .disable = dma_msi_mask,
1118 .ack = dma_msi_ack,
1119 .end = dma_msi_end,
1120 .set_affinity = dma_msi_set_affinity,
1121 };
1122
iommu_set_interrupt(struct acpi_drhd_unit * drhd)1123 static int __init iommu_set_interrupt(struct acpi_drhd_unit *drhd)
1124 {
1125 int irq, ret;
1126 struct acpi_rhsa_unit *rhsa = drhd_to_rhsa(drhd);
1127 struct iommu *iommu = drhd->iommu;
1128 struct irq_desc *desc;
1129
1130 irq = create_irq(rhsa ? pxm_to_node(rhsa->proximity_domain)
1131 : NUMA_NO_NODE);
1132 if ( irq <= 0 )
1133 {
1134 dprintk(XENLOG_ERR VTDPREFIX, "IOMMU: no irq available!\n");
1135 return -EINVAL;
1136 }
1137
1138 desc = irq_to_desc(irq);
1139 desc->handler = &dma_msi_type;
1140 ret = request_irq(irq, 0, iommu_page_fault, "dmar", iommu);
1141 if ( ret )
1142 {
1143 desc->handler = &no_irq_type;
1144 destroy_irq(irq);
1145 dprintk(XENLOG_ERR VTDPREFIX, "IOMMU: can't request irq\n");
1146 return ret;
1147 }
1148
1149 iommu->msi.irq = irq;
1150 iommu->msi.msi_attrib.pos = MSI_TYPE_IOMMU;
1151 iommu->msi.msi_attrib.maskbit = 1;
1152 iommu->msi.msi_attrib.is_64 = 1;
1153 desc->msi_desc = &iommu->msi;
1154
1155 return 0;
1156 }
1157
iommu_alloc(struct acpi_drhd_unit * drhd)1158 int __init iommu_alloc(struct acpi_drhd_unit *drhd)
1159 {
1160 struct iommu *iommu;
1161 unsigned long sagaw, nr_dom;
1162 int agaw;
1163
1164 if ( nr_iommus > MAX_IOMMUS )
1165 {
1166 dprintk(XENLOG_ERR VTDPREFIX,
1167 "IOMMU: nr_iommus %d > MAX_IOMMUS\n", nr_iommus);
1168 return -ENOMEM;
1169 }
1170
1171 iommu = xzalloc(struct iommu);
1172 if ( iommu == NULL )
1173 return -ENOMEM;
1174
1175 iommu->msi.irq = -1; /* No irq assigned yet. */
1176 INIT_LIST_HEAD(&iommu->ats_devices);
1177
1178 iommu->intel = alloc_intel_iommu();
1179 if ( iommu->intel == NULL )
1180 {
1181 xfree(iommu);
1182 return -ENOMEM;
1183 }
1184 iommu->intel->drhd = drhd;
1185 drhd->iommu = iommu;
1186
1187 if ( !(iommu->root_maddr = alloc_pgtable_maddr(drhd, 1)) )
1188 return -ENOMEM;
1189
1190 iommu->reg = ioremap(drhd->address, PAGE_SIZE);
1191 if ( !iommu->reg )
1192 return -ENOMEM;
1193 iommu->index = nr_iommus++;
1194
1195 iommu->cap = dmar_readq(iommu->reg, DMAR_CAP_REG);
1196 iommu->ecap = dmar_readq(iommu->reg, DMAR_ECAP_REG);
1197
1198 if ( iommu_verbose )
1199 {
1200 printk(VTDPREFIX "drhd->address = %"PRIx64" iommu->reg = %p\n",
1201 drhd->address, iommu->reg);
1202 printk(VTDPREFIX "cap = %"PRIx64" ecap = %"PRIx64"\n",
1203 iommu->cap, iommu->ecap);
1204 }
1205 if ( !(iommu->cap + 1) || !(iommu->ecap + 1) )
1206 return -ENODEV;
1207
1208 if ( cap_fault_reg_offset(iommu->cap) +
1209 cap_num_fault_regs(iommu->cap) * PRIMARY_FAULT_REG_LEN >= PAGE_SIZE ||
1210 ecap_iotlb_offset(iommu->ecap) >= PAGE_SIZE )
1211 {
1212 printk(XENLOG_ERR VTDPREFIX "IOMMU: unsupported\n");
1213 print_iommu_regs(drhd);
1214 return -ENODEV;
1215 }
1216
1217 /* Calculate number of pagetable levels: between 2 and 4. */
1218 sagaw = cap_sagaw(iommu->cap);
1219 for ( agaw = level_to_agaw(4); agaw >= 0; agaw-- )
1220 if ( test_bit(agaw, &sagaw) )
1221 break;
1222 if ( agaw < 0 )
1223 {
1224 printk(XENLOG_ERR VTDPREFIX "IOMMU: unsupported sagaw %lx\n", sagaw);
1225 print_iommu_regs(drhd);
1226 return -ENODEV;
1227 }
1228 iommu->nr_pt_levels = agaw_to_level(agaw);
1229
1230 if ( !ecap_coherent(iommu->ecap) )
1231 iommus_incoherent = 1;
1232
1233 /* allocate domain id bitmap */
1234 nr_dom = cap_ndoms(iommu->cap);
1235 iommu->domid_bitmap = xzalloc_array(unsigned long, BITS_TO_LONGS(nr_dom));
1236 if ( !iommu->domid_bitmap )
1237 return -ENOMEM ;
1238
1239 /*
1240 * if Caching mode is set, then invalid translations are tagged with
1241 * domain id 0, Hence reserve bit 0 for it
1242 */
1243 if ( cap_caching_mode(iommu->cap) )
1244 set_bit(0, iommu->domid_bitmap);
1245
1246 iommu->domid_map = xzalloc_array(u16, nr_dom);
1247 if ( !iommu->domid_map )
1248 return -ENOMEM ;
1249
1250 spin_lock_init(&iommu->lock);
1251 spin_lock_init(&iommu->register_lock);
1252
1253 return 0;
1254 }
1255
iommu_free(struct acpi_drhd_unit * drhd)1256 void __init iommu_free(struct acpi_drhd_unit *drhd)
1257 {
1258 struct iommu *iommu = drhd->iommu;
1259
1260 if ( iommu == NULL )
1261 return;
1262
1263 drhd->iommu = NULL;
1264
1265 if ( iommu->root_maddr != 0 )
1266 {
1267 free_pgtable_maddr(iommu->root_maddr);
1268 iommu->root_maddr = 0;
1269 }
1270
1271 if ( iommu->reg )
1272 iounmap(iommu->reg);
1273
1274 xfree(iommu->domid_bitmap);
1275 xfree(iommu->domid_map);
1276
1277 free_intel_iommu(iommu->intel);
1278 if ( iommu->msi.irq >= 0 )
1279 destroy_irq(iommu->msi.irq);
1280 xfree(iommu);
1281 }
1282
1283 #define guestwidth_to_adjustwidth(gaw) ({ \
1284 int agaw, r = (gaw - 12) % 9; \
1285 agaw = (r == 0) ? gaw : (gaw + 9 - r); \
1286 if ( agaw > 64 ) \
1287 agaw = 64; \
1288 agaw; })
1289
intel_iommu_domain_init(struct domain * d)1290 static int intel_iommu_domain_init(struct domain *d)
1291 {
1292 dom_iommu(d)->arch.agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH);
1293
1294 return 0;
1295 }
1296
intel_iommu_hwdom_init(struct domain * d)1297 static void __hwdom_init intel_iommu_hwdom_init(struct domain *d)
1298 {
1299 struct acpi_drhd_unit *drhd;
1300
1301 if ( !iommu_passthrough && !need_iommu(d) )
1302 {
1303 /* Set up 1:1 page table for hardware domain. */
1304 vtd_set_hwdom_mapping(d);
1305 }
1306
1307 setup_hwdom_pci_devices(d, setup_hwdom_device);
1308 setup_hwdom_rmrr(d);
1309
1310 if ( iommu_flush_all() )
1311 printk(XENLOG_WARNING VTDPREFIX
1312 " IOMMU flush all failed for hardware domain\n");
1313
1314 for_each_drhd_unit ( drhd )
1315 {
1316 if ( iomem_deny_access(d, PFN_DOWN(drhd->address),
1317 PFN_DOWN(drhd->address)) )
1318 BUG();
1319 iommu_enable_translation(drhd);
1320 }
1321 }
1322
domain_context_mapping_one(struct domain * domain,struct iommu * iommu,u8 bus,u8 devfn,const struct pci_dev * pdev)1323 int domain_context_mapping_one(
1324 struct domain *domain,
1325 struct iommu *iommu,
1326 u8 bus, u8 devfn, const struct pci_dev *pdev)
1327 {
1328 struct domain_iommu *hd = dom_iommu(domain);
1329 struct context_entry *context, *context_entries;
1330 u64 maddr, pgd_maddr;
1331 u16 seg = iommu->intel->drhd->segment;
1332 int agaw, rc, ret;
1333 bool_t flush_dev_iotlb;
1334
1335 ASSERT(pcidevs_locked());
1336 spin_lock(&iommu->lock);
1337 maddr = bus_to_context_maddr(iommu, bus);
1338 context_entries = (struct context_entry *)map_vtd_domain_page(maddr);
1339 context = &context_entries[devfn];
1340
1341 if ( context_present(*context) )
1342 {
1343 int res = 0;
1344
1345 /* Try to get domain ownership from device structure. If that's
1346 * not available, try to read it from the context itself. */
1347 if ( pdev )
1348 {
1349 if ( pdev->domain != domain )
1350 {
1351 printk(XENLOG_G_INFO VTDPREFIX
1352 "d%d: %04x:%02x:%02x.%u owned by d%d!",
1353 domain->domain_id,
1354 seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
1355 pdev->domain ? pdev->domain->domain_id : -1);
1356 res = -EINVAL;
1357 }
1358 }
1359 else
1360 {
1361 int cdomain;
1362 cdomain = context_get_domain_id(context, iommu);
1363
1364 if ( cdomain < 0 )
1365 {
1366 printk(XENLOG_G_WARNING VTDPREFIX
1367 "d%d: %04x:%02x:%02x.%u mapped, but can't find owner!\n",
1368 domain->domain_id,
1369 seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1370 res = -EINVAL;
1371 }
1372 else if ( cdomain != domain->domain_id )
1373 {
1374 printk(XENLOG_G_INFO VTDPREFIX
1375 "d%d: %04x:%02x:%02x.%u already mapped to d%d!",
1376 domain->domain_id,
1377 seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
1378 cdomain);
1379 res = -EINVAL;
1380 }
1381 }
1382
1383 unmap_vtd_domain_page(context_entries);
1384 spin_unlock(&iommu->lock);
1385 return res;
1386 }
1387
1388 if ( iommu_passthrough && is_hardware_domain(domain) )
1389 {
1390 context_set_translation_type(*context, CONTEXT_TT_PASS_THRU);
1391 agaw = level_to_agaw(iommu->nr_pt_levels);
1392 }
1393 else
1394 {
1395 spin_lock(&hd->arch.mapping_lock);
1396
1397 /* Ensure we have pagetables allocated down to leaf PTE. */
1398 if ( hd->arch.pgd_maddr == 0 )
1399 {
1400 addr_to_dma_page_maddr(domain, 0, 1);
1401 if ( hd->arch.pgd_maddr == 0 )
1402 {
1403 nomem:
1404 spin_unlock(&hd->arch.mapping_lock);
1405 spin_unlock(&iommu->lock);
1406 unmap_vtd_domain_page(context_entries);
1407 return -ENOMEM;
1408 }
1409 }
1410
1411 /* Skip top levels of page tables for 2- and 3-level DRHDs. */
1412 pgd_maddr = hd->arch.pgd_maddr;
1413 for ( agaw = level_to_agaw(4);
1414 agaw != level_to_agaw(iommu->nr_pt_levels);
1415 agaw-- )
1416 {
1417 struct dma_pte *p = map_vtd_domain_page(pgd_maddr);
1418 pgd_maddr = dma_pte_addr(*p);
1419 unmap_vtd_domain_page(p);
1420 if ( pgd_maddr == 0 )
1421 goto nomem;
1422 }
1423
1424 context_set_address_root(*context, pgd_maddr);
1425 if ( ats_enabled && ecap_dev_iotlb(iommu->ecap) )
1426 context_set_translation_type(*context, CONTEXT_TT_DEV_IOTLB);
1427 else
1428 context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
1429
1430 spin_unlock(&hd->arch.mapping_lock);
1431 }
1432
1433 if ( context_set_domain_id(context, domain, iommu) )
1434 {
1435 spin_unlock(&iommu->lock);
1436 unmap_vtd_domain_page(context_entries);
1437 return -EFAULT;
1438 }
1439
1440 context_set_address_width(*context, agaw);
1441 context_set_fault_enable(*context);
1442 context_set_present(*context);
1443 iommu_flush_cache_entry(context, sizeof(struct context_entry));
1444 spin_unlock(&iommu->lock);
1445
1446 /* Context entry was previously non-present (with domid 0). */
1447 rc = iommu_flush_context_device(iommu, 0, PCI_BDF2(bus, devfn),
1448 DMA_CCMD_MASK_NOBIT, 1);
1449 flush_dev_iotlb = !!find_ats_dev_drhd(iommu);
1450 ret = iommu_flush_iotlb_dsi(iommu, 0, 1, flush_dev_iotlb);
1451
1452 /*
1453 * The current logic for returns:
1454 * - positive invoke iommu_flush_write_buffer to flush cache.
1455 * - zero on success.
1456 * - negative on failure. Continue to flush IOMMU IOTLB on a
1457 * best effort basis.
1458 */
1459 if ( rc > 0 || ret > 0 )
1460 iommu_flush_write_buffer(iommu);
1461 if ( rc >= 0 )
1462 rc = ret;
1463 if ( rc > 0 )
1464 rc = 0;
1465
1466 set_bit(iommu->index, &hd->arch.iommu_bitmap);
1467
1468 unmap_vtd_domain_page(context_entries);
1469
1470 if ( !seg && !rc )
1471 rc = me_wifi_quirk(domain, bus, devfn, MAP_ME_PHANTOM_FUNC);
1472
1473 return rc;
1474 }
1475
domain_context_mapping(struct domain * domain,u8 devfn,struct pci_dev * pdev)1476 static int domain_context_mapping(struct domain *domain, u8 devfn,
1477 struct pci_dev *pdev)
1478 {
1479 struct acpi_drhd_unit *drhd;
1480 int ret = 0;
1481 u8 seg = pdev->seg, bus = pdev->bus, secbus;
1482
1483 drhd = acpi_find_matched_drhd_unit(pdev);
1484 if ( !drhd )
1485 return -ENODEV;
1486
1487 ASSERT(pcidevs_locked());
1488
1489 switch ( pdev->type )
1490 {
1491 case DEV_TYPE_PCI_HOST_BRIDGE:
1492 if ( iommu_debug )
1493 printk(VTDPREFIX "d%d:Hostbridge: skip %04x:%02x:%02x.%u map\n",
1494 domain->domain_id, seg, bus,
1495 PCI_SLOT(devfn), PCI_FUNC(devfn));
1496 if ( !is_hardware_domain(domain) )
1497 return -EPERM;
1498 break;
1499
1500 case DEV_TYPE_PCIe_BRIDGE:
1501 case DEV_TYPE_PCIe2PCI_BRIDGE:
1502 case DEV_TYPE_LEGACY_PCI_BRIDGE:
1503 break;
1504
1505 case DEV_TYPE_PCIe_ENDPOINT:
1506 if ( iommu_debug )
1507 printk(VTDPREFIX "d%d:PCIe: map %04x:%02x:%02x.%u\n",
1508 domain->domain_id, seg, bus,
1509 PCI_SLOT(devfn), PCI_FUNC(devfn));
1510 ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn,
1511 pdev);
1512 if ( !ret && devfn == pdev->devfn && ats_device(pdev, drhd) > 0 )
1513 enable_ats_device(pdev, &drhd->iommu->ats_devices);
1514
1515 break;
1516
1517 case DEV_TYPE_PCI:
1518 if ( iommu_debug )
1519 printk(VTDPREFIX "d%d:PCI: map %04x:%02x:%02x.%u\n",
1520 domain->domain_id, seg, bus,
1521 PCI_SLOT(devfn), PCI_FUNC(devfn));
1522
1523 ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn,
1524 pdev);
1525 if ( ret )
1526 break;
1527
1528 if ( find_upstream_bridge(seg, &bus, &devfn, &secbus) < 1 )
1529 break;
1530
1531 ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn,
1532 pci_get_pdev(seg, bus, devfn));
1533
1534 /*
1535 * Devices behind PCIe-to-PCI/PCIx bridge may generate different
1536 * requester-id. It may originate from devfn=0 on the secondary bus
1537 * behind the bridge. Map that id as well if we didn't already.
1538 */
1539 if ( !ret && pdev_type(seg, bus, devfn) == DEV_TYPE_PCIe2PCI_BRIDGE &&
1540 (secbus != pdev->bus || pdev->devfn != 0) )
1541 ret = domain_context_mapping_one(domain, drhd->iommu, secbus, 0,
1542 pci_get_pdev(seg, secbus, 0));
1543
1544 break;
1545
1546 default:
1547 dprintk(XENLOG_ERR VTDPREFIX, "d%d:unknown(%u): %04x:%02x:%02x.%u\n",
1548 domain->domain_id, pdev->type,
1549 seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1550 ret = -EINVAL;
1551 break;
1552 }
1553
1554 if ( !ret && devfn == pdev->devfn )
1555 pci_vtd_quirk(pdev);
1556
1557 return ret;
1558 }
1559
domain_context_unmap_one(struct domain * domain,struct iommu * iommu,u8 bus,u8 devfn)1560 int domain_context_unmap_one(
1561 struct domain *domain,
1562 struct iommu *iommu,
1563 u8 bus, u8 devfn)
1564 {
1565 struct context_entry *context, *context_entries;
1566 u64 maddr;
1567 int iommu_domid, rc, ret;
1568 bool_t flush_dev_iotlb;
1569
1570 ASSERT(pcidevs_locked());
1571 spin_lock(&iommu->lock);
1572
1573 maddr = bus_to_context_maddr(iommu, bus);
1574 context_entries = (struct context_entry *)map_vtd_domain_page(maddr);
1575 context = &context_entries[devfn];
1576
1577 if ( !context_present(*context) )
1578 {
1579 spin_unlock(&iommu->lock);
1580 unmap_vtd_domain_page(context_entries);
1581 return 0;
1582 }
1583
1584 context_clear_present(*context);
1585 context_clear_entry(*context);
1586 iommu_flush_cache_entry(context, sizeof(struct context_entry));
1587
1588 iommu_domid= domain_iommu_domid(domain, iommu);
1589 if ( iommu_domid == -1 )
1590 {
1591 spin_unlock(&iommu->lock);
1592 unmap_vtd_domain_page(context_entries);
1593 return -EINVAL;
1594 }
1595
1596 rc = iommu_flush_context_device(iommu, iommu_domid,
1597 PCI_BDF2(bus, devfn),
1598 DMA_CCMD_MASK_NOBIT, 0);
1599
1600 flush_dev_iotlb = !!find_ats_dev_drhd(iommu);
1601 ret = iommu_flush_iotlb_dsi(iommu, iommu_domid, 0, flush_dev_iotlb);
1602
1603 /*
1604 * The current logic for returns:
1605 * - positive invoke iommu_flush_write_buffer to flush cache.
1606 * - zero on success.
1607 * - negative on failure. Continue to flush IOMMU IOTLB on a
1608 * best effort basis.
1609 */
1610 if ( rc > 0 || ret > 0 )
1611 iommu_flush_write_buffer(iommu);
1612 if ( rc >= 0 )
1613 rc = ret;
1614 if ( rc > 0 )
1615 rc = 0;
1616
1617 spin_unlock(&iommu->lock);
1618 unmap_vtd_domain_page(context_entries);
1619
1620 if ( !iommu->intel->drhd->segment && !rc )
1621 rc = me_wifi_quirk(domain, bus, devfn, UNMAP_ME_PHANTOM_FUNC);
1622
1623 return rc;
1624 }
1625
domain_context_unmap(struct domain * domain,u8 devfn,struct pci_dev * pdev)1626 static int domain_context_unmap(struct domain *domain, u8 devfn,
1627 struct pci_dev *pdev)
1628 {
1629 struct acpi_drhd_unit *drhd;
1630 struct iommu *iommu;
1631 int ret = 0;
1632 u8 seg = pdev->seg, bus = pdev->bus, tmp_bus, tmp_devfn, secbus;
1633 int found = 0;
1634
1635 drhd = acpi_find_matched_drhd_unit(pdev);
1636 if ( !drhd )
1637 return -ENODEV;
1638 iommu = drhd->iommu;
1639
1640 switch ( pdev->type )
1641 {
1642 case DEV_TYPE_PCI_HOST_BRIDGE:
1643 if ( iommu_debug )
1644 printk(VTDPREFIX "d%d:Hostbridge: skip %04x:%02x:%02x.%u unmap\n",
1645 domain->domain_id, seg, bus,
1646 PCI_SLOT(devfn), PCI_FUNC(devfn));
1647 if ( !is_hardware_domain(domain) )
1648 return -EPERM;
1649 goto out;
1650
1651 case DEV_TYPE_PCIe_BRIDGE:
1652 case DEV_TYPE_PCIe2PCI_BRIDGE:
1653 case DEV_TYPE_LEGACY_PCI_BRIDGE:
1654 goto out;
1655
1656 case DEV_TYPE_PCIe_ENDPOINT:
1657 if ( iommu_debug )
1658 printk(VTDPREFIX "d%d:PCIe: unmap %04x:%02x:%02x.%u\n",
1659 domain->domain_id, seg, bus,
1660 PCI_SLOT(devfn), PCI_FUNC(devfn));
1661 ret = domain_context_unmap_one(domain, iommu, bus, devfn);
1662 if ( !ret && devfn == pdev->devfn && ats_device(pdev, drhd) > 0 )
1663 disable_ats_device(pdev);
1664
1665 break;
1666
1667 case DEV_TYPE_PCI:
1668 if ( iommu_debug )
1669 printk(VTDPREFIX "d%d:PCI: unmap %04x:%02x:%02x.%u\n",
1670 domain->domain_id, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1671 ret = domain_context_unmap_one(domain, iommu, bus, devfn);
1672 if ( ret )
1673 break;
1674
1675 tmp_bus = bus;
1676 tmp_devfn = devfn;
1677 if ( find_upstream_bridge(seg, &tmp_bus, &tmp_devfn, &secbus) < 1 )
1678 break;
1679
1680 /* PCIe to PCI/PCIx bridge */
1681 if ( pdev_type(seg, tmp_bus, tmp_devfn) == DEV_TYPE_PCIe2PCI_BRIDGE )
1682 {
1683 ret = domain_context_unmap_one(domain, iommu, tmp_bus, tmp_devfn);
1684 if ( ret )
1685 return ret;
1686
1687 ret = domain_context_unmap_one(domain, iommu, secbus, 0);
1688 }
1689 else /* Legacy PCI bridge */
1690 ret = domain_context_unmap_one(domain, iommu, tmp_bus, tmp_devfn);
1691
1692 break;
1693
1694 default:
1695 dprintk(XENLOG_ERR VTDPREFIX, "d%d:unknown(%u): %04x:%02x:%02x.%u\n",
1696 domain->domain_id, pdev->type,
1697 seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1698 ret = -EINVAL;
1699 goto out;
1700 }
1701
1702 /*
1703 * if no other devices under the same iommu owned by this domain,
1704 * clear iommu in iommu_bitmap and clear domain_id in domid_bitmp
1705 */
1706 for_each_pdev ( domain, pdev )
1707 {
1708 if ( pdev->seg == seg && pdev->bus == bus && pdev->devfn == devfn )
1709 continue;
1710
1711 drhd = acpi_find_matched_drhd_unit(pdev);
1712 if ( drhd && drhd->iommu == iommu )
1713 {
1714 found = 1;
1715 break;
1716 }
1717 }
1718
1719 if ( found == 0 )
1720 {
1721 int iommu_domid;
1722
1723 clear_bit(iommu->index, &dom_iommu(domain)->arch.iommu_bitmap);
1724
1725 iommu_domid = domain_iommu_domid(domain, iommu);
1726 if ( iommu_domid == -1 )
1727 {
1728 ret = -EINVAL;
1729 goto out;
1730 }
1731
1732 clear_bit(iommu_domid, iommu->domid_bitmap);
1733 iommu->domid_map[iommu_domid] = 0;
1734 }
1735
1736 out:
1737 return ret;
1738 }
1739
iommu_domain_teardown(struct domain * d)1740 static void iommu_domain_teardown(struct domain *d)
1741 {
1742 struct domain_iommu *hd = dom_iommu(d);
1743 struct mapped_rmrr *mrmrr, *tmp;
1744
1745 if ( list_empty(&acpi_drhd_units) )
1746 return;
1747
1748 list_for_each_entry_safe ( mrmrr, tmp, &hd->arch.mapped_rmrrs, list )
1749 {
1750 list_del(&mrmrr->list);
1751 xfree(mrmrr);
1752 }
1753
1754 if ( iommu_use_hap_pt(d) )
1755 return;
1756
1757 spin_lock(&hd->arch.mapping_lock);
1758 iommu_free_pagetable(hd->arch.pgd_maddr, agaw_to_level(hd->arch.agaw));
1759 hd->arch.pgd_maddr = 0;
1760 spin_unlock(&hd->arch.mapping_lock);
1761 }
1762
intel_iommu_map_page(struct domain * d,unsigned long gfn,unsigned long mfn,unsigned int flags)1763 static int __must_check intel_iommu_map_page(struct domain *d,
1764 unsigned long gfn,
1765 unsigned long mfn,
1766 unsigned int flags)
1767 {
1768 struct domain_iommu *hd = dom_iommu(d);
1769 struct dma_pte *page = NULL, *pte = NULL, old, new = { 0 };
1770 u64 pg_maddr;
1771 int rc = 0;
1772
1773 /* Do nothing if VT-d shares EPT page table */
1774 if ( iommu_use_hap_pt(d) )
1775 return 0;
1776
1777 /* Do nothing if hardware domain and iommu supports pass thru. */
1778 if ( iommu_passthrough && is_hardware_domain(d) )
1779 return 0;
1780
1781 spin_lock(&hd->arch.mapping_lock);
1782
1783 pg_maddr = addr_to_dma_page_maddr(d, (paddr_t)gfn << PAGE_SHIFT_4K, 1);
1784 if ( pg_maddr == 0 )
1785 {
1786 spin_unlock(&hd->arch.mapping_lock);
1787 return -ENOMEM;
1788 }
1789 page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
1790 pte = page + (gfn & LEVEL_MASK);
1791 old = *pte;
1792 dma_set_pte_addr(new, (paddr_t)mfn << PAGE_SHIFT_4K);
1793 dma_set_pte_prot(new,
1794 ((flags & IOMMUF_readable) ? DMA_PTE_READ : 0) |
1795 ((flags & IOMMUF_writable) ? DMA_PTE_WRITE : 0));
1796
1797 /* Set the SNP on leaf page table if Snoop Control available */
1798 if ( iommu_snoop )
1799 dma_set_pte_snp(new);
1800
1801 if ( old.val == new.val )
1802 {
1803 spin_unlock(&hd->arch.mapping_lock);
1804 unmap_vtd_domain_page(page);
1805 return 0;
1806 }
1807 *pte = new;
1808
1809 iommu_flush_cache_entry(pte, sizeof(struct dma_pte));
1810 spin_unlock(&hd->arch.mapping_lock);
1811 unmap_vtd_domain_page(page);
1812
1813 if ( !this_cpu(iommu_dont_flush_iotlb) )
1814 rc = iommu_flush_iotlb(d, gfn, dma_pte_present(old), 1);
1815
1816 return rc;
1817 }
1818
intel_iommu_unmap_page(struct domain * d,unsigned long gfn)1819 static int __must_check intel_iommu_unmap_page(struct domain *d,
1820 unsigned long gfn)
1821 {
1822 /* Do nothing if hardware domain and iommu supports pass thru. */
1823 if ( iommu_passthrough && is_hardware_domain(d) )
1824 return 0;
1825
1826 return dma_pte_clear_one(d, (paddr_t)gfn << PAGE_SHIFT_4K);
1827 }
1828
iommu_pte_flush(struct domain * d,u64 gfn,u64 * pte,int order,int present)1829 int iommu_pte_flush(struct domain *d, u64 gfn, u64 *pte,
1830 int order, int present)
1831 {
1832 struct acpi_drhd_unit *drhd;
1833 struct iommu *iommu = NULL;
1834 struct domain_iommu *hd = dom_iommu(d);
1835 bool_t flush_dev_iotlb;
1836 int iommu_domid;
1837 int rc = 0;
1838
1839 iommu_flush_cache_entry(pte, sizeof(struct dma_pte));
1840
1841 for_each_drhd_unit ( drhd )
1842 {
1843 iommu = drhd->iommu;
1844 if ( !test_bit(iommu->index, &hd->arch.iommu_bitmap) )
1845 continue;
1846
1847 flush_dev_iotlb = !!find_ats_dev_drhd(iommu);
1848 iommu_domid= domain_iommu_domid(d, iommu);
1849 if ( iommu_domid == -1 )
1850 continue;
1851
1852 rc = iommu_flush_iotlb_psi(iommu, iommu_domid,
1853 (paddr_t)gfn << PAGE_SHIFT_4K,
1854 order, !present, flush_dev_iotlb);
1855 if ( rc > 0 )
1856 {
1857 iommu_flush_write_buffer(iommu);
1858 rc = 0;
1859 }
1860 }
1861
1862 if ( unlikely(rc) )
1863 {
1864 if ( !d->is_shutting_down && printk_ratelimit() )
1865 printk(XENLOG_ERR VTDPREFIX
1866 " d%d: IOMMU pages flush failed: %d\n",
1867 d->domain_id, rc);
1868
1869 if ( !is_hardware_domain(d) )
1870 domain_crash(d);
1871 }
1872
1873 return rc;
1874 }
1875
vtd_ept_page_compatible(struct iommu * iommu)1876 static int __init vtd_ept_page_compatible(struct iommu *iommu)
1877 {
1878 u64 ept_cap, vtd_cap = iommu->cap;
1879
1880 /* EPT is not initialised yet, so we must check the capability in
1881 * the MSR explicitly rather than use cpu_has_vmx_ept_*() */
1882 if ( rdmsr_safe(MSR_IA32_VMX_EPT_VPID_CAP, ept_cap) != 0 )
1883 return 0;
1884
1885 return (ept_has_2mb(ept_cap) && opt_hap_2mb) == cap_sps_2mb(vtd_cap) &&
1886 (ept_has_1gb(ept_cap) && opt_hap_1gb) == cap_sps_1gb(vtd_cap);
1887 }
1888
1889 /*
1890 * set VT-d page table directory to EPT table if allowed
1891 */
iommu_set_pgd(struct domain * d)1892 static void iommu_set_pgd(struct domain *d)
1893 {
1894 mfn_t pgd_mfn;
1895
1896 pgd_mfn = pagetable_get_mfn(p2m_get_pagetable(p2m_get_hostp2m(d)));
1897 dom_iommu(d)->arch.pgd_maddr =
1898 pagetable_get_paddr(pagetable_from_mfn(pgd_mfn));
1899 }
1900
rmrr_identity_mapping(struct domain * d,bool_t map,const struct acpi_rmrr_unit * rmrr,u32 flag)1901 static int rmrr_identity_mapping(struct domain *d, bool_t map,
1902 const struct acpi_rmrr_unit *rmrr,
1903 u32 flag)
1904 {
1905 unsigned long base_pfn = rmrr->base_address >> PAGE_SHIFT_4K;
1906 unsigned long end_pfn = PAGE_ALIGN_4K(rmrr->end_address) >> PAGE_SHIFT_4K;
1907 struct mapped_rmrr *mrmrr;
1908 struct domain_iommu *hd = dom_iommu(d);
1909
1910 ASSERT(pcidevs_locked());
1911 ASSERT(rmrr->base_address < rmrr->end_address);
1912
1913 /*
1914 * No need to acquire hd->arch.mapping_lock: Both insertion and removal
1915 * get done while holding pcidevs_lock.
1916 */
1917 list_for_each_entry( mrmrr, &hd->arch.mapped_rmrrs, list )
1918 {
1919 if ( mrmrr->base == rmrr->base_address &&
1920 mrmrr->end == rmrr->end_address )
1921 {
1922 int ret = 0;
1923
1924 if ( map )
1925 {
1926 ++mrmrr->count;
1927 return 0;
1928 }
1929
1930 if ( --mrmrr->count )
1931 return 0;
1932
1933 while ( base_pfn < end_pfn )
1934 {
1935 if ( clear_identity_p2m_entry(d, base_pfn) )
1936 ret = -ENXIO;
1937 base_pfn++;
1938 }
1939
1940 list_del(&mrmrr->list);
1941 xfree(mrmrr);
1942 return ret;
1943 }
1944 }
1945
1946 if ( !map )
1947 return -ENOENT;
1948
1949 while ( base_pfn < end_pfn )
1950 {
1951 int err = set_identity_p2m_entry(d, base_pfn, p2m_access_rw, flag);
1952
1953 if ( err )
1954 return err;
1955 base_pfn++;
1956 }
1957
1958 mrmrr = xmalloc(struct mapped_rmrr);
1959 if ( !mrmrr )
1960 return -ENOMEM;
1961 mrmrr->base = rmrr->base_address;
1962 mrmrr->end = rmrr->end_address;
1963 mrmrr->count = 1;
1964 list_add_tail(&mrmrr->list, &hd->arch.mapped_rmrrs);
1965
1966 return 0;
1967 }
1968
intel_iommu_add_device(u8 devfn,struct pci_dev * pdev)1969 static int intel_iommu_add_device(u8 devfn, struct pci_dev *pdev)
1970 {
1971 struct acpi_rmrr_unit *rmrr;
1972 u16 bdf;
1973 int ret, i;
1974
1975 ASSERT(pcidevs_locked());
1976
1977 if ( !pdev->domain )
1978 return -EINVAL;
1979
1980 ret = domain_context_mapping(pdev->domain, devfn, pdev);
1981 if ( ret )
1982 {
1983 dprintk(XENLOG_ERR VTDPREFIX, "d%d: context mapping failed\n",
1984 pdev->domain->domain_id);
1985 return ret;
1986 }
1987
1988 for_each_rmrr_device ( rmrr, bdf, i )
1989 {
1990 if ( rmrr->segment == pdev->seg &&
1991 PCI_BUS(bdf) == pdev->bus &&
1992 PCI_DEVFN2(bdf) == devfn )
1993 {
1994 /*
1995 * iommu_add_device() is only called for the hardware
1996 * domain (see xen/drivers/passthrough/pci.c:pci_add_device()).
1997 * Since RMRRs are always reserved in the e820 map for the hardware
1998 * domain, there shouldn't be a conflict.
1999 */
2000 ret = rmrr_identity_mapping(pdev->domain, 1, rmrr, 0);
2001 if ( ret )
2002 dprintk(XENLOG_ERR VTDPREFIX, "d%d: RMRR mapping failed\n",
2003 pdev->domain->domain_id);
2004 }
2005 }
2006
2007 return 0;
2008 }
2009
intel_iommu_enable_device(struct pci_dev * pdev)2010 static int intel_iommu_enable_device(struct pci_dev *pdev)
2011 {
2012 struct acpi_drhd_unit *drhd = acpi_find_matched_drhd_unit(pdev);
2013 int ret = drhd ? ats_device(pdev, drhd) : -ENODEV;
2014
2015 pci_vtd_quirk(pdev);
2016
2017 if ( ret <= 0 )
2018 return ret;
2019
2020 ret = enable_ats_device(pdev, &drhd->iommu->ats_devices);
2021
2022 return ret >= 0 ? 0 : ret;
2023 }
2024
intel_iommu_remove_device(u8 devfn,struct pci_dev * pdev)2025 static int intel_iommu_remove_device(u8 devfn, struct pci_dev *pdev)
2026 {
2027 struct acpi_rmrr_unit *rmrr;
2028 u16 bdf;
2029 int i;
2030
2031 if ( !pdev->domain )
2032 return -EINVAL;
2033
2034 for_each_rmrr_device ( rmrr, bdf, i )
2035 {
2036 if ( rmrr->segment != pdev->seg ||
2037 PCI_BUS(bdf) != pdev->bus ||
2038 PCI_DEVFN2(bdf) != devfn )
2039 continue;
2040
2041 /*
2042 * Any flag is nothing to clear these mappings but here
2043 * its always safe and strict to set 0.
2044 */
2045 rmrr_identity_mapping(pdev->domain, 0, rmrr, 0);
2046 }
2047
2048 return domain_context_unmap(pdev->domain, devfn, pdev);
2049 }
2050
setup_hwdom_device(u8 devfn,struct pci_dev * pdev)2051 static int __hwdom_init setup_hwdom_device(u8 devfn, struct pci_dev *pdev)
2052 {
2053 return domain_context_mapping(pdev->domain, devfn, pdev);
2054 }
2055
clear_fault_bits(struct iommu * iommu)2056 void clear_fault_bits(struct iommu *iommu)
2057 {
2058 u64 val;
2059 unsigned long flags;
2060
2061 spin_lock_irqsave(&iommu->register_lock, flags);
2062 val = dmar_readq(iommu->reg, cap_fault_reg_offset(iommu->cap) + 8);
2063 dmar_writeq(iommu->reg, cap_fault_reg_offset(iommu->cap) + 8, val);
2064 dmar_writel(iommu->reg, DMAR_FSTS_REG, DMA_FSTS_FAULTS);
2065 spin_unlock_irqrestore(&iommu->register_lock, flags);
2066 }
2067
adjust_irq_affinity(struct acpi_drhd_unit * drhd)2068 static void adjust_irq_affinity(struct acpi_drhd_unit *drhd)
2069 {
2070 const struct acpi_rhsa_unit *rhsa = drhd_to_rhsa(drhd);
2071 unsigned int node = rhsa ? pxm_to_node(rhsa->proximity_domain)
2072 : NUMA_NO_NODE;
2073 const cpumask_t *cpumask = &cpu_online_map;
2074
2075 if ( node < MAX_NUMNODES && node_online(node) &&
2076 cpumask_intersects(&node_to_cpumask(node), cpumask) )
2077 cpumask = &node_to_cpumask(node);
2078 dma_msi_set_affinity(irq_to_desc(drhd->iommu->msi.irq), cpumask);
2079 }
2080
adjust_vtd_irq_affinities(void)2081 int adjust_vtd_irq_affinities(void)
2082 {
2083 struct acpi_drhd_unit *drhd;
2084
2085 if ( !iommu_enabled )
2086 return 0;
2087
2088 for_each_drhd_unit ( drhd )
2089 adjust_irq_affinity(drhd);
2090
2091 return 0;
2092 }
2093 __initcall(adjust_vtd_irq_affinities);
2094
init_vtd_hw(void)2095 static int __must_check init_vtd_hw(void)
2096 {
2097 struct acpi_drhd_unit *drhd;
2098 struct iommu *iommu;
2099 struct iommu_flush *flush = NULL;
2100 int ret;
2101 unsigned long flags;
2102 u32 sts;
2103
2104 /*
2105 * Basic VT-d HW init: set VT-d interrupt, clear VT-d faults.
2106 */
2107 for_each_drhd_unit ( drhd )
2108 {
2109 adjust_irq_affinity(drhd);
2110
2111 iommu = drhd->iommu;
2112
2113 clear_fault_bits(iommu);
2114
2115 spin_lock_irqsave(&iommu->register_lock, flags);
2116 sts = dmar_readl(iommu->reg, DMAR_FECTL_REG);
2117 sts &= ~DMA_FECTL_IM;
2118 dmar_writel(iommu->reg, DMAR_FECTL_REG, sts);
2119 spin_unlock_irqrestore(&iommu->register_lock, flags);
2120 }
2121
2122 /*
2123 * Enable queue invalidation
2124 */
2125 for_each_drhd_unit ( drhd )
2126 {
2127 iommu = drhd->iommu;
2128 /*
2129 * If queued invalidation not enabled, use regiser based
2130 * invalidation
2131 */
2132 if ( enable_qinval(iommu) != 0 )
2133 {
2134 flush = iommu_get_flush(iommu);
2135 flush->context = flush_context_reg;
2136 flush->iotlb = flush_iotlb_reg;
2137 }
2138 }
2139
2140 /*
2141 * Enable interrupt remapping
2142 */
2143 if ( iommu_intremap )
2144 {
2145 int apic;
2146 for ( apic = 0; apic < nr_ioapics; apic++ )
2147 {
2148 if ( ioapic_to_iommu(IO_APIC_ID(apic)) == NULL )
2149 {
2150 iommu_intremap = 0;
2151 dprintk(XENLOG_ERR VTDPREFIX,
2152 "ioapic_to_iommu: ioapic %#x (id: %#x) is NULL! "
2153 "Will not try to enable Interrupt Remapping.\n",
2154 apic, IO_APIC_ID(apic));
2155 break;
2156 }
2157 }
2158 }
2159 if ( iommu_intremap )
2160 {
2161 for_each_drhd_unit ( drhd )
2162 {
2163 iommu = drhd->iommu;
2164 if ( enable_intremap(iommu, 0) != 0 )
2165 {
2166 iommu_intremap = 0;
2167 dprintk(XENLOG_WARNING VTDPREFIX,
2168 "Interrupt Remapping not enabled\n");
2169
2170 break;
2171 }
2172 }
2173 if ( !iommu_intremap )
2174 for_each_drhd_unit ( drhd )
2175 disable_intremap(drhd->iommu);
2176 }
2177
2178 /*
2179 * Set root entries for each VT-d engine. After set root entry,
2180 * must globally invalidate context cache, and then globally
2181 * invalidate IOTLB
2182 */
2183 for_each_drhd_unit ( drhd )
2184 {
2185 iommu = drhd->iommu;
2186 ret = iommu_set_root_entry(iommu);
2187 if ( ret )
2188 {
2189 dprintk(XENLOG_ERR VTDPREFIX, "IOMMU: set root entry failed\n");
2190 return -EIO;
2191 }
2192 }
2193
2194 return iommu_flush_all();
2195 }
2196
setup_hwdom_rmrr(struct domain * d)2197 static void __hwdom_init setup_hwdom_rmrr(struct domain *d)
2198 {
2199 struct acpi_rmrr_unit *rmrr;
2200 u16 bdf;
2201 int ret, i;
2202
2203 pcidevs_lock();
2204 for_each_rmrr_device ( rmrr, bdf, i )
2205 {
2206 /*
2207 * Here means we're add a device to the hardware domain.
2208 * Since RMRRs are always reserved in the e820 map for the hardware
2209 * domain, there shouldn't be a conflict. So its always safe and
2210 * strict to set 0.
2211 */
2212 ret = rmrr_identity_mapping(d, 1, rmrr, 0);
2213 if ( ret )
2214 dprintk(XENLOG_ERR VTDPREFIX,
2215 "IOMMU: mapping reserved region failed\n");
2216 }
2217 pcidevs_unlock();
2218 }
2219
intel_vtd_setup(void)2220 int __init intel_vtd_setup(void)
2221 {
2222 struct acpi_drhd_unit *drhd;
2223 struct iommu *iommu;
2224 int ret;
2225
2226 if ( list_empty(&acpi_drhd_units) )
2227 {
2228 ret = -ENODEV;
2229 goto error;
2230 }
2231
2232 if ( unlikely(acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_MSI) )
2233 {
2234 ret = -EPERM;
2235 goto error;
2236 }
2237
2238 platform_quirks_init();
2239 if ( !iommu_enable )
2240 {
2241 ret = -ENODEV;
2242 goto error;
2243 }
2244
2245 /* We enable the following features only if they are supported by all VT-d
2246 * engines: Snoop Control, DMA passthrough, Queued Invalidation, Interrupt
2247 * Remapping, and Posted Interrupt
2248 */
2249 for_each_drhd_unit ( drhd )
2250 {
2251 iommu = drhd->iommu;
2252
2253 printk("Intel VT-d iommu %"PRIu32" supported page sizes: 4kB",
2254 iommu->index);
2255 if (cap_sps_2mb(iommu->cap))
2256 printk(", 2MB");
2257
2258 if (cap_sps_1gb(iommu->cap))
2259 printk(", 1GB");
2260
2261 printk(".\n");
2262
2263 if ( iommu_snoop && !ecap_snp_ctl(iommu->ecap) )
2264 iommu_snoop = 0;
2265
2266 if ( iommu_passthrough && !ecap_pass_thru(iommu->ecap) )
2267 iommu_passthrough = 0;
2268
2269 if ( iommu_qinval && !ecap_queued_inval(iommu->ecap) )
2270 iommu_qinval = 0;
2271
2272 if ( iommu_intremap && !ecap_intr_remap(iommu->ecap) )
2273 iommu_intremap = 0;
2274
2275 /*
2276 * We cannot use posted interrupt if X86_FEATURE_CX16 is
2277 * not supported, since we count on this feature to
2278 * atomically update 16-byte IRTE in posted format.
2279 */
2280 if ( !cap_intr_post(iommu->cap) || !cpu_has_cx16 )
2281 iommu_intpost = 0;
2282
2283 if ( !vtd_ept_page_compatible(iommu) )
2284 iommu_hap_pt_share = 0;
2285
2286 ret = iommu_set_interrupt(drhd);
2287 if ( ret )
2288 {
2289 dprintk(XENLOG_ERR VTDPREFIX, "IOMMU: interrupt setup failed\n");
2290 goto error;
2291 }
2292 }
2293
2294 softirq_tasklet_init(&vtd_fault_tasklet, do_iommu_page_fault, 0);
2295
2296 if ( !iommu_qinval && iommu_intremap )
2297 {
2298 iommu_intremap = 0;
2299 dprintk(XENLOG_WARNING VTDPREFIX, "Interrupt Remapping disabled "
2300 "since Queued Invalidation isn't supported or enabled.\n");
2301 }
2302
2303 #define P(p,s) printk("Intel VT-d %s %senabled.\n", s, (p)? "" : "not ")
2304 P(iommu_snoop, "Snoop Control");
2305 P(iommu_passthrough, "Dom0 DMA Passthrough");
2306 P(iommu_qinval, "Queued Invalidation");
2307 P(iommu_intremap, "Interrupt Remapping");
2308 P(iommu_intpost, "Posted Interrupt");
2309 P(iommu_hap_pt_share, "Shared EPT tables");
2310 #undef P
2311
2312 ret = scan_pci_devices();
2313 if ( ret )
2314 goto error;
2315
2316 ret = init_vtd_hw();
2317 if ( ret )
2318 goto error;
2319
2320 register_keyhandler('V', vtd_dump_iommu_info, "dump iommu info", 1);
2321
2322 return 0;
2323
2324 error:
2325 iommu_enabled = 0;
2326 iommu_snoop = 0;
2327 iommu_passthrough = 0;
2328 iommu_qinval = 0;
2329 iommu_intremap = 0;
2330 iommu_intpost = 0;
2331 return ret;
2332 }
2333
reassign_device_ownership(struct domain * source,struct domain * target,u8 devfn,struct pci_dev * pdev)2334 static int reassign_device_ownership(
2335 struct domain *source,
2336 struct domain *target,
2337 u8 devfn, struct pci_dev *pdev)
2338 {
2339 int ret;
2340
2341 /*
2342 * Devices assigned to untrusted domains (here assumed to be any domU)
2343 * can attempt to send arbitrary LAPIC/MSI messages. We are unprotected
2344 * by the root complex unless interrupt remapping is enabled.
2345 */
2346 if ( (target != hardware_domain) && !iommu_intremap )
2347 untrusted_msi = true;
2348
2349 /*
2350 * If the device belongs to the hardware domain, and it has RMRR, don't
2351 * remove it from the hardware domain, because BIOS may use RMRR at
2352 * booting time.
2353 */
2354 if ( !is_hardware_domain(source) )
2355 {
2356 const struct acpi_rmrr_unit *rmrr;
2357 u16 bdf;
2358 unsigned int i;
2359
2360 for_each_rmrr_device( rmrr, bdf, i )
2361 if ( rmrr->segment == pdev->seg &&
2362 PCI_BUS(bdf) == pdev->bus &&
2363 PCI_DEVFN2(bdf) == devfn )
2364 {
2365 /*
2366 * Any RMRR flag is always ignored when remove a device,
2367 * but its always safe and strict to set 0.
2368 */
2369 ret = rmrr_identity_mapping(source, 0, rmrr, 0);
2370 if ( ret != -ENOENT )
2371 return ret;
2372 }
2373 }
2374
2375 ret = domain_context_unmap(source, devfn, pdev);
2376 if ( ret )
2377 return ret;
2378
2379 if ( !has_arch_pdevs(target) )
2380 vmx_pi_hooks_assign(target);
2381
2382 ret = domain_context_mapping(target, devfn, pdev);
2383 if ( ret )
2384 {
2385 if ( !has_arch_pdevs(target) )
2386 vmx_pi_hooks_deassign(target);
2387
2388 return ret;
2389 }
2390
2391 if ( devfn == pdev->devfn )
2392 {
2393 list_move(&pdev->domain_list, &target->arch.pdev_list);
2394 pdev->domain = target;
2395 }
2396
2397 if ( !has_arch_pdevs(source) )
2398 vmx_pi_hooks_deassign(source);
2399
2400 return ret;
2401 }
2402
intel_iommu_assign_device(struct domain * d,u8 devfn,struct pci_dev * pdev,u32 flag)2403 static int intel_iommu_assign_device(
2404 struct domain *d, u8 devfn, struct pci_dev *pdev, u32 flag)
2405 {
2406 struct acpi_rmrr_unit *rmrr;
2407 int ret = 0, i;
2408 u16 bdf, seg;
2409 u8 bus;
2410
2411 if ( list_empty(&acpi_drhd_units) )
2412 return -ENODEV;
2413
2414 seg = pdev->seg;
2415 bus = pdev->bus;
2416 /*
2417 * In rare cases one given rmrr is shared by multiple devices but
2418 * obviously this would put the security of a system at risk. So
2419 * we would prevent from this sort of device assignment. But this
2420 * can be permitted if user set
2421 * "pci = [ 'sbdf, rdm_policy=relaxed' ]"
2422 *
2423 * TODO: in the future we can introduce group device assignment
2424 * interface to make sure devices sharing RMRR are assigned to the
2425 * same domain together.
2426 */
2427 for_each_rmrr_device( rmrr, bdf, i )
2428 {
2429 if ( rmrr->segment == seg &&
2430 PCI_BUS(bdf) == bus &&
2431 PCI_DEVFN2(bdf) == devfn &&
2432 rmrr->scope.devices_cnt > 1 )
2433 {
2434 bool_t relaxed = !!(flag & XEN_DOMCTL_DEV_RDM_RELAXED);
2435
2436 printk(XENLOG_GUEST "%s" VTDPREFIX
2437 " It's %s to assign %04x:%02x:%02x.%u"
2438 " with shared RMRR at %"PRIx64" for Dom%d.\n",
2439 relaxed ? XENLOG_WARNING : XENLOG_ERR,
2440 relaxed ? "risky" : "disallowed",
2441 seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
2442 rmrr->base_address, d->domain_id);
2443 if ( !relaxed )
2444 return -EPERM;
2445 }
2446 }
2447
2448 ret = reassign_device_ownership(hardware_domain, d, devfn, pdev);
2449 if ( ret )
2450 return ret;
2451
2452 /* Setup rmrr identity mapping */
2453 for_each_rmrr_device( rmrr, bdf, i )
2454 {
2455 if ( rmrr->segment == seg &&
2456 PCI_BUS(bdf) == bus &&
2457 PCI_DEVFN2(bdf) == devfn )
2458 {
2459 ret = rmrr_identity_mapping(d, 1, rmrr, flag);
2460 if ( ret )
2461 {
2462 reassign_device_ownership(d, hardware_domain, devfn, pdev);
2463 printk(XENLOG_G_ERR VTDPREFIX
2464 " cannot map reserved region (%"PRIx64",%"PRIx64"] for Dom%d (%d)\n",
2465 rmrr->base_address, rmrr->end_address,
2466 d->domain_id, ret);
2467 break;
2468 }
2469 }
2470 }
2471
2472 return ret;
2473 }
2474
intel_iommu_group_id(u16 seg,u8 bus,u8 devfn)2475 static int intel_iommu_group_id(u16 seg, u8 bus, u8 devfn)
2476 {
2477 u8 secbus;
2478 if ( find_upstream_bridge(seg, &bus, &devfn, &secbus) < 0 )
2479 return -1;
2480 else
2481 return PCI_BDF2(bus, devfn);
2482 }
2483
2484 static u32 iommu_state[MAX_IOMMUS][MAX_IOMMU_REGS];
2485
vtd_suspend(void)2486 static int __must_check vtd_suspend(void)
2487 {
2488 struct acpi_drhd_unit *drhd;
2489 struct iommu *iommu;
2490 u32 i;
2491 int rc;
2492
2493 if ( !iommu_enabled )
2494 return 0;
2495
2496 rc = iommu_flush_all();
2497 if ( unlikely(rc) )
2498 {
2499 printk(XENLOG_WARNING VTDPREFIX
2500 " suspend: IOMMU flush all failed: %d\n", rc);
2501
2502 return rc;
2503 }
2504
2505 for_each_drhd_unit ( drhd )
2506 {
2507 iommu = drhd->iommu;
2508 i = iommu->index;
2509
2510 iommu_state[i][DMAR_FECTL_REG] =
2511 (u32) dmar_readl(iommu->reg, DMAR_FECTL_REG);
2512 iommu_state[i][DMAR_FEDATA_REG] =
2513 (u32) dmar_readl(iommu->reg, DMAR_FEDATA_REG);
2514 iommu_state[i][DMAR_FEADDR_REG] =
2515 (u32) dmar_readl(iommu->reg, DMAR_FEADDR_REG);
2516 iommu_state[i][DMAR_FEUADDR_REG] =
2517 (u32) dmar_readl(iommu->reg, DMAR_FEUADDR_REG);
2518
2519 /* don't disable VT-d engine when force_iommu is set. */
2520 if ( force_iommu )
2521 continue;
2522
2523 iommu_disable_translation(iommu);
2524
2525 /* If interrupt remapping is enabled, queued invalidation
2526 * will be disabled following interupt remapping disabling
2527 * in local apic suspend
2528 */
2529 if ( !iommu_intremap && iommu_qinval )
2530 disable_qinval(iommu);
2531 }
2532
2533 return 0;
2534 }
2535
vtd_crash_shutdown(void)2536 static void vtd_crash_shutdown(void)
2537 {
2538 struct acpi_drhd_unit *drhd;
2539 struct iommu *iommu;
2540
2541 if ( !iommu_enabled )
2542 return;
2543
2544 if ( iommu_flush_all() )
2545 printk(XENLOG_WARNING VTDPREFIX
2546 " crash shutdown: IOMMU flush all failed\n");
2547
2548 for_each_drhd_unit ( drhd )
2549 {
2550 iommu = drhd->iommu;
2551 iommu_disable_translation(iommu);
2552 disable_intremap(drhd->iommu);
2553 disable_qinval(drhd->iommu);
2554 }
2555 }
2556
vtd_resume(void)2557 static void vtd_resume(void)
2558 {
2559 struct acpi_drhd_unit *drhd;
2560 struct iommu *iommu;
2561 u32 i;
2562 unsigned long flags;
2563
2564 if ( !iommu_enabled )
2565 return;
2566
2567 if ( init_vtd_hw() != 0 && force_iommu )
2568 panic("IOMMU setup failed, crash Xen for security purpose");
2569
2570 for_each_drhd_unit ( drhd )
2571 {
2572 iommu = drhd->iommu;
2573 i = iommu->index;
2574
2575 spin_lock_irqsave(&iommu->register_lock, flags);
2576 dmar_writel(iommu->reg, DMAR_FECTL_REG,
2577 (u32) iommu_state[i][DMAR_FECTL_REG]);
2578 dmar_writel(iommu->reg, DMAR_FEDATA_REG,
2579 (u32) iommu_state[i][DMAR_FEDATA_REG]);
2580 dmar_writel(iommu->reg, DMAR_FEADDR_REG,
2581 (u32) iommu_state[i][DMAR_FEADDR_REG]);
2582 dmar_writel(iommu->reg, DMAR_FEUADDR_REG,
2583 (u32) iommu_state[i][DMAR_FEUADDR_REG]);
2584 spin_unlock_irqrestore(&iommu->register_lock, flags);
2585
2586 iommu_enable_translation(drhd);
2587 }
2588 }
2589
vtd_dump_p2m_table_level(paddr_t pt_maddr,int level,paddr_t gpa,int indent)2590 static void vtd_dump_p2m_table_level(paddr_t pt_maddr, int level, paddr_t gpa,
2591 int indent)
2592 {
2593 paddr_t address;
2594 int i;
2595 struct dma_pte *pt_vaddr, *pte;
2596 int next_level;
2597
2598 if ( level < 1 )
2599 return;
2600
2601 pt_vaddr = map_vtd_domain_page(pt_maddr);
2602 if ( pt_vaddr == NULL )
2603 {
2604 printk("Failed to map VT-D domain page %"PRIpaddr"\n", pt_maddr);
2605 return;
2606 }
2607
2608 next_level = level - 1;
2609 for ( i = 0; i < PTE_NUM; i++ )
2610 {
2611 if ( !(i % 2) )
2612 process_pending_softirqs();
2613
2614 pte = &pt_vaddr[i];
2615 if ( !dma_pte_present(*pte) )
2616 continue;
2617
2618 address = gpa + offset_level_address(i, level);
2619 if ( next_level >= 1 )
2620 vtd_dump_p2m_table_level(dma_pte_addr(*pte), next_level,
2621 address, indent + 1);
2622 else
2623 printk("%*sgfn: %08lx mfn: %08lx\n",
2624 indent, "",
2625 (unsigned long)(address >> PAGE_SHIFT_4K),
2626 (unsigned long)(dma_pte_addr(*pte) >> PAGE_SHIFT_4K));
2627 }
2628
2629 unmap_vtd_domain_page(pt_vaddr);
2630 }
2631
vtd_dump_p2m_table(struct domain * d)2632 static void vtd_dump_p2m_table(struct domain *d)
2633 {
2634 const struct domain_iommu *hd;
2635
2636 if ( list_empty(&acpi_drhd_units) )
2637 return;
2638
2639 hd = dom_iommu(d);
2640 printk("p2m table has %d levels\n", agaw_to_level(hd->arch.agaw));
2641 vtd_dump_p2m_table_level(hd->arch.pgd_maddr, agaw_to_level(hd->arch.agaw), 0, 0);
2642 }
2643
2644 const struct iommu_ops intel_iommu_ops = {
2645 .init = intel_iommu_domain_init,
2646 .hwdom_init = intel_iommu_hwdom_init,
2647 .add_device = intel_iommu_add_device,
2648 .enable_device = intel_iommu_enable_device,
2649 .remove_device = intel_iommu_remove_device,
2650 .assign_device = intel_iommu_assign_device,
2651 .teardown = iommu_domain_teardown,
2652 .map_page = intel_iommu_map_page,
2653 .unmap_page = intel_iommu_unmap_page,
2654 .free_page_table = iommu_free_page_table,
2655 .reassign_device = reassign_device_ownership,
2656 .get_device_group_id = intel_iommu_group_id,
2657 .update_ire_from_apic = io_apic_write_remap_rte,
2658 .update_ire_from_msi = msi_msg_write_remap_rte,
2659 .read_apic_from_ire = io_apic_read_remap_rte,
2660 .read_msi_from_ire = msi_msg_read_remap_rte,
2661 .setup_hpet_msi = intel_setup_hpet_msi,
2662 .suspend = vtd_suspend,
2663 .resume = vtd_resume,
2664 .share_p2m = iommu_set_pgd,
2665 .crash_shutdown = vtd_crash_shutdown,
2666 .iotlb_flush = iommu_flush_iotlb_pages,
2667 .iotlb_flush_all = iommu_flush_iotlb_all,
2668 .get_reserved_device_memory = intel_iommu_get_reserved_device_memory,
2669 .dump_p2m_table = vtd_dump_p2m_table,
2670 };
2671
2672 /*
2673 * Local variables:
2674 * mode: C
2675 * c-file-style: "BSD"
2676 * c-basic-offset: 4
2677 * tab-width: 4
2678 * indent-tabs-mode: nil
2679 * End:
2680 */
2681