1 /*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; If not, see <http://www.gnu.org/licenses/>.
15 *
16 * Copyright (C) Allen Kay <allen.m.kay@intel.com>
17 * Copyright (C) Xiaohui Xin <xiaohui.xin@intel.com>
18 */
19
20 #include <xen/irq.h>
21 #include <xen/sched.h>
22 #include <xen/iommu.h>
23 #include <xen/time.h>
24 #include <xen/list.h>
25 #include <xen/pci.h>
26 #include <xen/pci_regs.h>
27 #include "iommu.h"
28 #include "dmar.h"
29 #include "vtd.h"
30 #include "extern.h"
31
32 #include <asm/apic.h>
33 #include <asm/io_apic.h>
34 #define nr_ioapic_entries(i) nr_ioapic_entries[i]
35
36 /*
37 * source validation type (SVT)
38 */
39 #define SVT_NO_VERIFY 0x0 /* no verification is required */
40 #define SVT_VERIFY_SID_SQ 0x1 /* verify using SID and SQ fiels */
41 #define SVT_VERIFY_BUS 0x2 /* verify bus of request-id */
42
43 /*
44 * source-id qualifier (SQ)
45 */
46 #define SQ_ALL_16 0x0 /* verify all 16 bits of request-id */
47 #define SQ_13_IGNORE_1 0x1 /* verify most significant 13 bits, ignore
48 * the third least significant bit
49 */
50 #define SQ_13_IGNORE_2 0x2 /* verify most significant 13 bits, ignore
51 * the second and third least significant bits
52 */
53 #define SQ_13_IGNORE_3 0x3 /* verify most significant 13 bits, ignore
54 * the least three significant bits
55 */
56
57 /* apic_pin_2_ir_idx[apicid][pin] = interrupt remapping table index */
58 static int **apic_pin_2_ir_idx;
59
init_apic_pin_2_ir_idx(void)60 static int init_apic_pin_2_ir_idx(void)
61 {
62 int *_apic_pin_2_ir_idx;
63 unsigned int nr_pins, i;
64
65 /* Here we shouldn't need to re-init when resuming from S3. */
66 if ( apic_pin_2_ir_idx != NULL )
67 return 0;
68
69 nr_pins = 0;
70 for ( i = 0; i < nr_ioapics; i++ )
71 nr_pins += nr_ioapic_entries(i);
72
73 _apic_pin_2_ir_idx = xmalloc_array(int, nr_pins);
74 apic_pin_2_ir_idx = xmalloc_array(int *, nr_ioapics);
75 if ( (_apic_pin_2_ir_idx == NULL) || (apic_pin_2_ir_idx == NULL) )
76 {
77 xfree(_apic_pin_2_ir_idx);
78 xfree(apic_pin_2_ir_idx);
79 return -ENOMEM;
80 }
81
82 for ( i = 0; i < nr_pins; i++ )
83 _apic_pin_2_ir_idx[i] = -1;
84
85 nr_pins = 0;
86 for ( i = 0; i < nr_ioapics; i++ )
87 {
88 apic_pin_2_ir_idx[i] = &_apic_pin_2_ir_idx[nr_pins];
89 nr_pins += nr_ioapic_entries(i);
90 }
91
92 return 0;
93 }
94
apicid_to_bdf(int apic_id)95 static u16 apicid_to_bdf(int apic_id)
96 {
97 struct acpi_drhd_unit *drhd = ioapic_to_drhd(apic_id);
98 struct acpi_ioapic_unit *acpi_ioapic_unit;
99
100 list_for_each_entry ( acpi_ioapic_unit, &drhd->ioapic_list, list )
101 if ( acpi_ioapic_unit->apic_id == apic_id )
102 return acpi_ioapic_unit->ioapic.info;
103
104 dprintk(XENLOG_ERR VTDPREFIX, "Didn't find the bdf for the apic_id!\n");
105 return 0;
106 }
107
hpetid_to_bdf(unsigned int hpet_id)108 static u16 hpetid_to_bdf(unsigned int hpet_id)
109 {
110 struct acpi_drhd_unit *drhd = hpet_to_drhd(hpet_id);
111 struct acpi_hpet_unit *acpi_hpet_unit;
112
113 list_for_each_entry ( acpi_hpet_unit, &drhd->hpet_list, list )
114 if ( acpi_hpet_unit->id == hpet_id )
115 return acpi_hpet_unit->bdf;
116
117 dprintk(XENLOG_ERR VTDPREFIX, "Didn't find the bdf for HPET %u!\n", hpet_id);
118 return 0;
119 }
120
set_ire_sid(struct iremap_entry * ire,unsigned int svt,unsigned int sq,unsigned int sid)121 static void set_ire_sid(struct iremap_entry *ire,
122 unsigned int svt, unsigned int sq, unsigned int sid)
123 {
124 ire->remap.svt = svt;
125 ire->remap.sq = sq;
126 ire->remap.sid = sid;
127 }
128
set_ioapic_source_id(int apic_id,struct iremap_entry * ire)129 static void set_ioapic_source_id(int apic_id, struct iremap_entry *ire)
130 {
131 set_ire_sid(ire, SVT_VERIFY_SID_SQ, SQ_ALL_16,
132 apicid_to_bdf(apic_id));
133 }
134
set_hpet_source_id(unsigned int id,struct iremap_entry * ire)135 static void set_hpet_source_id(unsigned int id, struct iremap_entry *ire)
136 {
137 /*
138 * Should really use SQ_ALL_16. Some platforms are broken.
139 * While we figure out the right quirks for these broken platforms, use
140 * SQ_13_IGNORE_3 for now.
141 */
142 set_ire_sid(ire, SVT_VERIFY_SID_SQ, SQ_13_IGNORE_3, hpetid_to_bdf(id));
143 }
144
intel_iommu_supports_eim(void)145 bool __init intel_iommu_supports_eim(void)
146 {
147 struct acpi_drhd_unit *drhd;
148 unsigned int apic;
149
150 if ( !iommu_qinval || !iommu_intremap || list_empty(&acpi_drhd_units) )
151 return false;
152
153 /* We MUST have a DRHD unit for each IOAPIC. */
154 for ( apic = 0; apic < nr_ioapics; apic++ )
155 if ( !ioapic_to_drhd(IO_APIC_ID(apic)) )
156 {
157 dprintk(XENLOG_WARNING VTDPREFIX,
158 "There is not a DRHD for IOAPIC %#x (id: %#x)!\n",
159 apic, IO_APIC_ID(apic));
160 return false;
161 }
162
163 for_each_drhd_unit ( drhd )
164 if ( !ecap_queued_inval(drhd->iommu->ecap) ||
165 !ecap_intr_remap(drhd->iommu->ecap) ||
166 !ecap_eim(drhd->iommu->ecap) )
167 return false;
168
169 return true;
170 }
171
172 /*
173 * Assume iremap_lock has been acquired. It is to make sure software will not
174 * change the same IRTE behind us. With this assumption, if only high qword or
175 * low qword in IRTE is to be updated, this function's atomic variant can
176 * present an atomic update to VT-d hardware even when cmpxchg16b
177 * instruction is not supported.
178 */
update_irte(struct vtd_iommu * iommu,struct iremap_entry * entry,const struct iremap_entry * new_ire,bool atomic)179 static void update_irte(struct vtd_iommu *iommu, struct iremap_entry *entry,
180 const struct iremap_entry *new_ire, bool atomic)
181 {
182 ASSERT(spin_is_locked(&iommu->intremap.lock));
183
184 if ( cpu_has_cx16 )
185 {
186 __uint128_t ret;
187 struct iremap_entry old_ire;
188
189 old_ire = *entry;
190 ret = cmpxchg16b(entry, &old_ire, new_ire);
191
192 /*
193 * In the above, we use cmpxchg16 to atomically update the 128-bit
194 * IRTE, and the hardware cannot update the IRTE behind us, so
195 * the return value of cmpxchg16 should be the same as old_ire.
196 * This ASSERT validate it.
197 */
198 ASSERT(ret == old_ire.val);
199 }
200 else
201 {
202 /*
203 * VT-d hardware doesn't update IRTEs behind us, nor the software
204 * since we hold iremap_lock. If the caller wants VT-d hardware to
205 * always see a consistent entry, but we can't meet it, a bug will
206 * be raised.
207 */
208 if ( entry->lo == new_ire->lo )
209 write_atomic(&entry->hi, new_ire->hi);
210 else if ( entry->hi == new_ire->hi )
211 write_atomic(&entry->lo, new_ire->lo);
212 else if ( !atomic )
213 *entry = *new_ire;
214 else
215 BUG();
216 }
217 }
218
219 /* Mark specified intr remap entry as free */
free_remap_entry(struct vtd_iommu * iommu,int index)220 static void free_remap_entry(struct vtd_iommu *iommu, int index)
221 {
222 struct iremap_entry *iremap_entry = NULL, *iremap_entries, new_ire = { };
223
224 if ( index < 0 || index > IREMAP_ENTRY_NR - 1 )
225 return;
226
227 ASSERT(spin_is_locked(&iommu->intremap.lock));
228
229 GET_IREMAP_ENTRY(iommu->intremap.maddr, index,
230 iremap_entries, iremap_entry);
231
232 update_irte(iommu, iremap_entry, &new_ire, false);
233 iommu_sync_cache(iremap_entry, sizeof(*iremap_entry));
234 iommu_flush_iec_index(iommu, 0, index);
235
236 unmap_vtd_domain_page(iremap_entries);
237 iommu->intremap.num--;
238 }
239
240 /*
241 * Look for a free intr remap entry (or a contiguous set thereof).
242 * Need hold iremap_lock, and setup returned entry before releasing lock.
243 */
alloc_remap_entry(struct vtd_iommu * iommu,unsigned int nr)244 static unsigned int alloc_remap_entry(struct vtd_iommu *iommu, unsigned int nr)
245 {
246 struct iremap_entry *iremap_entries = NULL;
247 unsigned int i, found;
248
249 ASSERT(spin_is_locked(&iommu->intremap.lock));
250
251 for ( found = i = 0; i < IREMAP_ENTRY_NR; i++ )
252 {
253 struct iremap_entry *p;
254 if ( i % (1 << IREMAP_ENTRY_ORDER) == 0 )
255 {
256 /* This entry across page boundry */
257 if ( iremap_entries )
258 unmap_vtd_domain_page(iremap_entries);
259
260 GET_IREMAP_ENTRY(iommu->intremap.maddr, i,
261 iremap_entries, p);
262 }
263 else
264 p = &iremap_entries[i % (1 << IREMAP_ENTRY_ORDER)];
265
266 if ( p->val ) /* not a free entry */
267 found = 0;
268 else if ( ++found == nr )
269 break;
270 }
271
272 if ( iremap_entries )
273 unmap_vtd_domain_page(iremap_entries);
274
275 if ( i < IREMAP_ENTRY_NR )
276 iommu->intremap.num += nr;
277
278 return i;
279 }
280
remap_entry_to_ioapic_rte(struct vtd_iommu * iommu,int index,struct IO_xAPIC_route_entry * old_rte)281 static int remap_entry_to_ioapic_rte(
282 struct vtd_iommu *iommu, int index, struct IO_xAPIC_route_entry *old_rte)
283 {
284 struct iremap_entry *iremap_entry = NULL, *iremap_entries;
285 unsigned long flags;
286
287 if ( index < 0 || index > IREMAP_ENTRY_NR - 1 )
288 {
289 dprintk(XENLOG_ERR VTDPREFIX,
290 "IO-APIC index (%d) for remap table is invalid\n",
291 index);
292 return -EFAULT;
293 }
294
295 spin_lock_irqsave(&iommu->intremap.lock, flags);
296
297 GET_IREMAP_ENTRY(iommu->intremap.maddr, index,
298 iremap_entries, iremap_entry);
299
300 if ( iremap_entry->val == 0 )
301 {
302 dprintk(XENLOG_ERR VTDPREFIX,
303 "IO-APIC index (%d) has an empty entry\n",
304 index);
305 unmap_vtd_domain_page(iremap_entries);
306 spin_unlock_irqrestore(&iommu->intremap.lock, flags);
307 return -EFAULT;
308 }
309
310 old_rte->vector = iremap_entry->remap.vector;
311 old_rte->delivery_mode = iremap_entry->remap.dlm;
312 old_rte->dest_mode = iremap_entry->remap.dm;
313 old_rte->trigger = iremap_entry->remap.tm;
314 old_rte->__reserved_2 = 0;
315 if ( x2apic_enabled )
316 old_rte->dest.dest32 = iremap_entry->remap.dst;
317 else
318 {
319 old_rte->dest.logical.__reserved_1 = 0;
320 old_rte->dest.logical.logical_dest = iremap_entry->remap.dst >> 8;
321 }
322
323 unmap_vtd_domain_page(iremap_entries);
324 spin_unlock_irqrestore(&iommu->intremap.lock, flags);
325
326 return 0;
327 }
328
ioapic_rte_to_remap_entry(struct vtd_iommu * iommu,int apic,unsigned int ioapic_pin,struct IO_xAPIC_route_entry * old_rte,unsigned int rte_upper,unsigned int value)329 static int ioapic_rte_to_remap_entry(struct vtd_iommu *iommu,
330 int apic, unsigned int ioapic_pin, struct IO_xAPIC_route_entry *old_rte,
331 unsigned int rte_upper, unsigned int value)
332 {
333 struct iremap_entry *iremap_entry = NULL, *iremap_entries;
334 struct iremap_entry new_ire;
335 struct IO_APIC_route_remap_entry *remap_rte;
336 struct IO_xAPIC_route_entry new_rte;
337 int index;
338 unsigned long flags;
339 bool init = false;
340
341 remap_rte = (struct IO_APIC_route_remap_entry *) old_rte;
342 spin_lock_irqsave(&iommu->intremap.lock, flags);
343
344 index = apic_pin_2_ir_idx[apic][ioapic_pin];
345 if ( index < 0 )
346 {
347 index = alloc_remap_entry(iommu, 1);
348 if ( index < IREMAP_ENTRY_NR )
349 apic_pin_2_ir_idx[apic][ioapic_pin] = index;
350 init = true;
351 }
352
353 if ( index > IREMAP_ENTRY_NR - 1 )
354 {
355 dprintk(XENLOG_ERR VTDPREFIX,
356 "IO-APIC intremap index (%d) larger than maximum index (%d)\n",
357 index, IREMAP_ENTRY_NR - 1);
358 spin_unlock_irqrestore(&iommu->intremap.lock, flags);
359 return -EFAULT;
360 }
361
362 GET_IREMAP_ENTRY(iommu->intremap.maddr, index,
363 iremap_entries, iremap_entry);
364
365 new_ire = *iremap_entry;
366
367 if ( rte_upper )
368 {
369 if ( x2apic_enabled )
370 new_ire.remap.dst = value;
371 else
372 new_ire.remap.dst = (value >> 24) << 8;
373 }
374 else
375 {
376 *(((u32 *)&new_rte) + 0) = value;
377 new_ire.remap.fpd = 0;
378 new_ire.remap.dm = new_rte.dest_mode;
379 new_ire.remap.tm = new_rte.trigger;
380 new_ire.remap.dlm = new_rte.delivery_mode;
381 /* Hardware require RH = 1 for LPR delivery mode */
382 new_ire.remap.rh = (new_ire.remap.dlm == dest_LowestPrio);
383 new_ire.remap.avail = 0;
384 new_ire.remap.res_1 = 0;
385 new_ire.remap.vector = new_rte.vector;
386 new_ire.remap.res_2 = 0;
387
388 set_ioapic_source_id(IO_APIC_ID(apic), &new_ire);
389 new_ire.remap.res_3 = 0;
390 new_ire.remap.res_4 = 0;
391 new_ire.remap.p = 1; /* finally, set present bit */
392
393 /* now construct new ioapic rte entry */
394 remap_rte->vector = new_rte.vector;
395 remap_rte->delivery_mode = 0; /* has to be 0 for remap format */
396 remap_rte->index_15 = (index >> 15) & 0x1;
397 remap_rte->index_0_14 = index & 0x7fff;
398
399 remap_rte->delivery_status = new_rte.delivery_status;
400 remap_rte->polarity = new_rte.polarity;
401 remap_rte->irr = new_rte.irr;
402 remap_rte->trigger = new_rte.trigger;
403 remap_rte->mask = new_rte.mask;
404 remap_rte->reserved = 0;
405 remap_rte->format = 1; /* indicate remap format */
406 }
407
408 update_irte(iommu, iremap_entry, &new_ire, !init);
409 iommu_sync_cache(iremap_entry, sizeof(*iremap_entry));
410 iommu_flush_iec_index(iommu, 0, index);
411
412 unmap_vtd_domain_page(iremap_entries);
413 spin_unlock_irqrestore(&iommu->intremap.lock, flags);
414 return 0;
415 }
416
io_apic_read_remap_rte(unsigned int apic,unsigned int reg)417 unsigned int io_apic_read_remap_rte(
418 unsigned int apic, unsigned int reg)
419 {
420 unsigned int ioapic_pin = (reg - 0x10) / 2;
421 int index;
422 struct IO_xAPIC_route_entry old_rte = { 0 };
423 int rte_upper = (reg & 1) ? 1 : 0;
424 struct vtd_iommu *iommu = ioapic_to_iommu(IO_APIC_ID(apic));
425
426 if ( !iommu->intremap.num ||
427 ( (index = apic_pin_2_ir_idx[apic][ioapic_pin]) < 0 ) )
428 return __io_apic_read(apic, reg);
429
430 old_rte = __ioapic_read_entry(apic, ioapic_pin, 1);
431
432 if ( remap_entry_to_ioapic_rte(iommu, index, &old_rte) )
433 return __io_apic_read(apic, reg);
434
435 if ( rte_upper )
436 return (*(((u32 *)&old_rte) + 1));
437 else
438 return (*(((u32 *)&old_rte) + 0));
439 }
440
io_apic_write_remap_rte(unsigned int apic,unsigned int reg,unsigned int value)441 void io_apic_write_remap_rte(
442 unsigned int apic, unsigned int reg, unsigned int value)
443 {
444 unsigned int ioapic_pin = (reg - 0x10) / 2;
445 struct IO_xAPIC_route_entry old_rte = { 0 };
446 struct IO_APIC_route_remap_entry *remap_rte;
447 unsigned int rte_upper = (reg & 1) ? 1 : 0;
448 struct vtd_iommu *iommu = ioapic_to_iommu(IO_APIC_ID(apic));
449 int saved_mask;
450
451 old_rte = __ioapic_read_entry(apic, ioapic_pin, 1);
452
453 remap_rte = (struct IO_APIC_route_remap_entry *) &old_rte;
454
455 /* mask the interrupt while we change the intremap table */
456 saved_mask = remap_rte->mask;
457 remap_rte->mask = 1;
458 __io_apic_write(apic, reg & ~1, *(u32 *)&old_rte);
459 remap_rte->mask = saved_mask;
460
461 if ( ioapic_rte_to_remap_entry(iommu, apic, ioapic_pin,
462 &old_rte, rte_upper, value) )
463 {
464 __io_apic_write(apic, reg, value);
465
466 /* Recover the original value of 'mask' bit */
467 if ( rte_upper )
468 __io_apic_write(apic, reg & ~1, *(u32 *)&old_rte);
469 }
470 else
471 __ioapic_write_entry(apic, ioapic_pin, 1, old_rte);
472 }
473
set_msi_source_id(struct pci_dev * pdev,struct iremap_entry * ire)474 static void set_msi_source_id(struct pci_dev *pdev, struct iremap_entry *ire)
475 {
476 u16 seg;
477 u8 bus, devfn, secbus;
478 int ret;
479
480 if ( !pdev || !ire )
481 return;
482
483 seg = pdev->seg;
484 bus = pdev->bus;
485 devfn = pdev->devfn;
486 switch ( pdev->type )
487 {
488 unsigned int sq;
489
490 case DEV_TYPE_PCIe_ENDPOINT:
491 case DEV_TYPE_PCIe_BRIDGE:
492 case DEV_TYPE_PCIe2PCI_BRIDGE:
493 case DEV_TYPE_PCI_HOST_BRIDGE:
494 switch ( pdev->phantom_stride )
495 {
496 case 1: sq = SQ_13_IGNORE_3; break;
497 case 2: sq = SQ_13_IGNORE_2; break;
498 case 4: sq = SQ_13_IGNORE_1; break;
499 default: sq = SQ_ALL_16; break;
500 }
501 set_ire_sid(ire, SVT_VERIFY_SID_SQ, sq, PCI_BDF2(bus, devfn));
502 break;
503
504 case DEV_TYPE_PCI:
505 case DEV_TYPE_LEGACY_PCI_BRIDGE:
506 case DEV_TYPE_PCI2PCIe_BRIDGE:
507 ret = find_upstream_bridge(seg, &bus, &devfn, &secbus);
508 if ( ret == 0 ) /* integrated PCI device */
509 {
510 set_ire_sid(ire, SVT_VERIFY_SID_SQ, SQ_ALL_16,
511 PCI_BDF2(bus, devfn));
512 }
513 else if ( ret == 1 ) /* find upstream bridge */
514 {
515 if ( pdev_type(seg, bus, devfn) == DEV_TYPE_PCIe2PCI_BRIDGE )
516 set_ire_sid(ire, SVT_VERIFY_BUS, SQ_ALL_16,
517 (bus << 8) | pdev->bus);
518 else
519 set_ire_sid(ire, SVT_VERIFY_SID_SQ, SQ_ALL_16,
520 PCI_BDF2(bus, devfn));
521 }
522 else
523 dprintk(XENLOG_WARNING VTDPREFIX,
524 "d%d: no upstream bridge for %04x:%02x:%02x.%u\n",
525 pdev->domain->domain_id,
526 seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
527 break;
528
529 default:
530 dprintk(XENLOG_WARNING VTDPREFIX,
531 "d%d: unknown(%u): %04x:%02x:%02x.%u\n",
532 pdev->domain->domain_id, pdev->type,
533 seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
534 break;
535 }
536 }
537
remap_entry_to_msi_msg(struct vtd_iommu * iommu,struct msi_msg * msg,unsigned int index)538 static int remap_entry_to_msi_msg(
539 struct vtd_iommu *iommu, struct msi_msg *msg, unsigned int index)
540 {
541 struct iremap_entry *iremap_entry = NULL, *iremap_entries;
542 struct msi_msg_remap_entry *remap_rte;
543 unsigned long flags;
544
545 remap_rte = (struct msi_msg_remap_entry *) msg;
546 index += (remap_rte->address_lo.index_15 << 15) |
547 remap_rte->address_lo.index_0_14;
548
549 if ( index >= IREMAP_ENTRY_NR )
550 {
551 dprintk(XENLOG_ERR VTDPREFIX,
552 "MSI index (%d) for remap table is invalid\n",
553 index);
554 return -EFAULT;
555 }
556
557 spin_lock_irqsave(&iommu->intremap.lock, flags);
558
559 GET_IREMAP_ENTRY(iommu->intremap.maddr, index,
560 iremap_entries, iremap_entry);
561
562 if ( iremap_entry->val == 0 )
563 {
564 dprintk(XENLOG_ERR VTDPREFIX,
565 "MSI index (%d) has an empty entry\n",
566 index);
567 unmap_vtd_domain_page(iremap_entries);
568 spin_unlock_irqrestore(&iommu->intremap.lock, flags);
569 return -EFAULT;
570 }
571
572 msg->address_hi = MSI_ADDR_BASE_HI;
573 msg->address_lo =
574 MSI_ADDR_BASE_LO |
575 ((iremap_entry->remap.dm == 0) ?
576 MSI_ADDR_DESTMODE_PHYS:
577 MSI_ADDR_DESTMODE_LOGIC) |
578 ((iremap_entry->remap.dlm != dest_LowestPrio) ?
579 MSI_ADDR_REDIRECTION_CPU:
580 MSI_ADDR_REDIRECTION_LOWPRI);
581 if ( x2apic_enabled )
582 msg->dest32 = iremap_entry->remap.dst;
583 else
584 msg->dest32 = (iremap_entry->remap.dst >> 8) & 0xff;
585 msg->address_lo |= MSI_ADDR_DEST_ID(msg->dest32);
586
587 msg->data =
588 MSI_DATA_TRIGGER_EDGE |
589 MSI_DATA_LEVEL_ASSERT |
590 ((iremap_entry->remap.dlm != dest_LowestPrio) ?
591 MSI_DATA_DELIVERY_FIXED:
592 MSI_DATA_DELIVERY_LOWPRI) |
593 iremap_entry->remap.vector;
594
595 unmap_vtd_domain_page(iremap_entries);
596 spin_unlock_irqrestore(&iommu->intremap.lock, flags);
597 return 0;
598 }
599
msi_msg_to_remap_entry(struct vtd_iommu * iommu,struct pci_dev * pdev,struct msi_desc * msi_desc,struct msi_msg * msg)600 static int msi_msg_to_remap_entry(
601 struct vtd_iommu *iommu, struct pci_dev *pdev,
602 struct msi_desc *msi_desc, struct msi_msg *msg)
603 {
604 struct iremap_entry *iremap_entry = NULL, *iremap_entries, new_ire = { };
605 struct msi_msg_remap_entry *remap_rte;
606 unsigned int index, i, nr = 1;
607 unsigned long flags;
608 const struct pi_desc *pi_desc = msi_desc->pi_desc;
609
610 if ( msi_desc->msi_attrib.type == PCI_CAP_ID_MSI )
611 nr = msi_desc->msi.nvec;
612
613 spin_lock_irqsave(&iommu->intremap.lock, flags);
614
615 if ( msg == NULL )
616 {
617 /* Free specified unused IRTEs */
618 for ( i = 0; i < nr; ++i )
619 {
620 free_remap_entry(iommu, msi_desc->remap_index + i);
621 msi_desc[i].irte_initialized = false;
622 }
623 spin_unlock_irqrestore(&iommu->intremap.lock, flags);
624 return 0;
625 }
626
627 if ( msi_desc->remap_index < 0 )
628 {
629 index = alloc_remap_entry(iommu, nr);
630 for ( i = 0; i < nr; ++i )
631 msi_desc[i].remap_index = index + i;
632 }
633 else
634 index = msi_desc->remap_index;
635
636 if ( index > IREMAP_ENTRY_NR - 1 )
637 {
638 dprintk(XENLOG_ERR VTDPREFIX,
639 "MSI intremap index (%d) larger than maximum index (%d)!\n",
640 index, IREMAP_ENTRY_NR - 1);
641 for ( i = 0; i < nr; ++i )
642 msi_desc[i].remap_index = -1;
643 spin_unlock_irqrestore(&iommu->intremap.lock, flags);
644
645 return -EFAULT;
646 }
647
648 GET_IREMAP_ENTRY(iommu->intremap.maddr, index,
649 iremap_entries, iremap_entry);
650
651 if ( !pi_desc )
652 {
653 new_ire.remap.dm = msg->address_lo >> MSI_ADDR_DESTMODE_SHIFT;
654 new_ire.remap.tm = msg->data >> MSI_DATA_TRIGGER_SHIFT;
655 new_ire.remap.dlm = msg->data >> MSI_DATA_DELIVERY_MODE_SHIFT;
656 /* Hardware requires RH = 1 for lowest priority delivery mode */
657 new_ire.remap.rh = (new_ire.remap.dlm == dest_LowestPrio);
658 new_ire.remap.vector = (msg->data >> MSI_DATA_VECTOR_SHIFT) &
659 MSI_DATA_VECTOR_MASK;
660 if ( x2apic_enabled )
661 new_ire.remap.dst = msg->dest32;
662 else
663 new_ire.remap.dst =
664 MASK_EXTR(msg->address_lo, MSI_ADDR_DEST_ID_MASK) << 8;
665 new_ire.remap.p = 1;
666 }
667 else
668 {
669 new_ire.post.im = 1;
670 new_ire.post.vector = msi_desc->gvec;
671 new_ire.post.pda_l = virt_to_maddr(pi_desc) >> (32 - PDA_LOW_BIT);
672 new_ire.post.pda_h = virt_to_maddr(pi_desc) >> 32;
673 new_ire.post.p = 1;
674 }
675
676 if ( pdev )
677 set_msi_source_id(pdev, &new_ire);
678 else
679 set_hpet_source_id(msi_desc->hpet_id, &new_ire);
680
681 /* now construct new MSI/MSI-X rte entry */
682 remap_rte = (struct msi_msg_remap_entry *)msg;
683 remap_rte->address_lo.dontcare = 0;
684 i = index;
685 if ( !nr )
686 i -= msi_desc->msi_attrib.entry_nr;
687 remap_rte->address_lo.index_15 = (i >> 15) & 0x1;
688 remap_rte->address_lo.index_0_14 = i & 0x7fff;
689 remap_rte->address_lo.SHV = 1;
690 remap_rte->address_lo.format = 1;
691
692 remap_rte->address_hi = 0;
693 remap_rte->data = index - i;
694
695 update_irte(iommu, iremap_entry, &new_ire, msi_desc->irte_initialized);
696 msi_desc->irte_initialized = true;
697
698 iommu_sync_cache(iremap_entry, sizeof(*iremap_entry));
699 iommu_flush_iec_index(iommu, 0, index);
700
701 unmap_vtd_domain_page(iremap_entries);
702 spin_unlock_irqrestore(&iommu->intremap.lock, flags);
703
704 return 0;
705 }
706
msi_msg_read_remap_rte(struct msi_desc * msi_desc,struct msi_msg * msg)707 void msi_msg_read_remap_rte(
708 struct msi_desc *msi_desc, struct msi_msg *msg)
709 {
710 struct pci_dev *pdev = msi_desc->dev;
711 struct acpi_drhd_unit *drhd = NULL;
712
713 drhd = pdev ? acpi_find_matched_drhd_unit(pdev)
714 : hpet_to_drhd(msi_desc->hpet_id);
715 if ( drhd )
716 remap_entry_to_msi_msg(drhd->iommu, msg,
717 msi_desc->msi_attrib.type == PCI_CAP_ID_MSI
718 ? msi_desc->msi_attrib.entry_nr : 0);
719 }
720
msi_msg_write_remap_rte(struct msi_desc * msi_desc,struct msi_msg * msg)721 int msi_msg_write_remap_rte(
722 struct msi_desc *msi_desc, struct msi_msg *msg)
723 {
724 struct pci_dev *pdev = msi_desc->dev;
725 struct acpi_drhd_unit *drhd = NULL;
726
727 drhd = pdev ? acpi_find_matched_drhd_unit(pdev)
728 : hpet_to_drhd(msi_desc->hpet_id);
729 return drhd ? msi_msg_to_remap_entry(drhd->iommu, pdev, msi_desc, msg)
730 : -EINVAL;
731 }
732
intel_setup_hpet_msi(struct msi_desc * msi_desc)733 int __init intel_setup_hpet_msi(struct msi_desc *msi_desc)
734 {
735 struct vtd_iommu *iommu = hpet_to_iommu(msi_desc->hpet_id);
736 unsigned long flags;
737 int rc = 0;
738
739 if ( !iommu->intremap.maddr )
740 return 0;
741
742 spin_lock_irqsave(&iommu->intremap.lock, flags);
743 msi_desc->remap_index = alloc_remap_entry(iommu, 1);
744 if ( msi_desc->remap_index >= IREMAP_ENTRY_NR )
745 {
746 dprintk(XENLOG_ERR VTDPREFIX,
747 "HPET intremap index (%d) larger than maximum index (%d)!\n",
748 msi_desc->remap_index, IREMAP_ENTRY_NR - 1);
749 msi_desc->remap_index = -1;
750 rc = -ENXIO;
751 }
752 spin_unlock_irqrestore(&iommu->intremap.lock, flags);
753
754 return rc;
755 }
756
enable_intremap(struct vtd_iommu * iommu,int eim)757 int enable_intremap(struct vtd_iommu *iommu, int eim)
758 {
759 u32 sts, gcmd;
760 unsigned long flags;
761
762 ASSERT(ecap_intr_remap(iommu->ecap) && iommu_intremap);
763
764 if ( !platform_supports_intremap() )
765 {
766 printk(XENLOG_ERR VTDPREFIX
767 " Platform firmware does not support interrupt remapping\n");
768 return -EINVAL;
769 }
770
771 sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
772
773 /* Return if already enabled by Xen */
774 if ( (sts & DMA_GSTS_IRES) && iommu->intremap.maddr )
775 return 0;
776
777 if ( !(sts & DMA_GSTS_QIES) )
778 {
779 printk(XENLOG_ERR VTDPREFIX
780 " Queued invalidation is not enabled on IOMMU #%u:"
781 " Should not enable interrupt remapping\n", iommu->index);
782 return -EINVAL;
783 }
784
785 if ( !eim && (sts & DMA_GSTS_CFIS) )
786 printk(XENLOG_WARNING VTDPREFIX
787 " Compatibility Format Interrupts permitted on IOMMU #%u:"
788 " Device pass-through will be insecure\n", iommu->index);
789
790 if ( iommu->intremap.maddr == 0 )
791 {
792 iommu->intremap.maddr = alloc_pgtable_maddr(IREMAP_ARCH_PAGE_NR,
793 iommu->node);
794 if ( iommu->intremap.maddr == 0 )
795 {
796 dprintk(XENLOG_WARNING VTDPREFIX,
797 "Cannot allocate memory for ir_ctrl->iremap_maddr\n");
798 return -ENOMEM;
799 }
800
801 iommu->intremap.num = 0;
802 }
803
804 spin_lock_irqsave(&iommu->register_lock, flags);
805
806 /*
807 * Set size of the interrupt remapping table and optionally Extended
808 * Interrupt Mode.
809 */
810 dmar_writeq(iommu->reg, DMAR_IRTA_REG,
811 iommu->intremap.maddr | IRTA_REG_TABLE_SIZE |
812 (eim ? IRTA_EIME : 0));
813
814 /* set SIRTP */
815 gcmd = dmar_readl(iommu->reg, DMAR_GSTS_REG);
816 gcmd |= DMA_GCMD_SIRTP;
817 dmar_writel(iommu->reg, DMAR_GCMD_REG, gcmd);
818
819 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, dmar_readl,
820 (sts & DMA_GSTS_SIRTPS), sts);
821 spin_unlock_irqrestore(&iommu->register_lock, flags);
822
823 /* After set SIRTP, must globally invalidate the interrupt entry cache */
824 iommu_flush_iec_global(iommu);
825
826 spin_lock_irqsave(&iommu->register_lock, flags);
827 /* enable interrupt remapping hardware */
828 gcmd |= DMA_GCMD_IRE;
829 dmar_writel(iommu->reg, DMAR_GCMD_REG, gcmd);
830
831 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, dmar_readl,
832 (sts & DMA_GSTS_IRES), sts);
833 spin_unlock_irqrestore(&iommu->register_lock, flags);
834
835 return init_apic_pin_2_ir_idx();
836 }
837
disable_intremap(struct vtd_iommu * iommu)838 void disable_intremap(struct vtd_iommu *iommu)
839 {
840 u32 sts;
841 u64 irta;
842 unsigned long flags;
843
844 if ( !ecap_intr_remap(iommu->ecap) )
845 return;
846
847 spin_lock_irqsave(&iommu->register_lock, flags);
848 sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
849 if ( !(sts & DMA_GSTS_IRES) )
850 goto out;
851
852 dmar_writel(iommu->reg, DMAR_GCMD_REG, sts & (~DMA_GCMD_IRE));
853
854 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, dmar_readl,
855 !(sts & DMA_GSTS_IRES), sts);
856
857 /* If we are disabling Interrupt Remapping, make sure we dont stay in
858 * Extended Interrupt Mode, as this is unaffected by the Interrupt
859 * Remapping flag in each DMAR Global Control Register.
860 * Specifically, local apics in xapic mode do not like interrupts delivered
861 * in x2apic mode. Any code turning interrupt remapping back on will set
862 * EIME back correctly.
863 */
864 if ( !ecap_eim(iommu->ecap) )
865 goto out;
866
867 /* Can't read the register unless we ecaps says we can */
868 irta = dmar_readl(iommu->reg, DMAR_IRTA_REG);
869 if ( !(irta & IRTA_EIME) )
870 goto out;
871
872 dmar_writel(iommu->reg, DMAR_IRTA_REG, irta & ~IRTA_EIME);
873 IOMMU_WAIT_OP(iommu, DMAR_IRTA_REG, dmar_readl,
874 !(irta & IRTA_EIME), irta);
875
876 out:
877 spin_unlock_irqrestore(&iommu->register_lock, flags);
878 }
879
880 /*
881 * This function is used to enable Interrupt remapping when
882 * enable x2apic
883 */
intel_iommu_enable_eim(void)884 int intel_iommu_enable_eim(void)
885 {
886 struct acpi_drhd_unit *drhd;
887 struct vtd_iommu *iommu;
888
889 if ( system_state < SYS_STATE_active && !platform_supports_x2apic() )
890 return -ENXIO;
891
892 for_each_drhd_unit ( drhd )
893 {
894 iommu = drhd->iommu;
895
896 /* Clear previous faults */
897 clear_fault_bits(iommu);
898
899 /*
900 * Disable interrupt remapping and queued invalidation if
901 * already enabled by BIOS
902 */
903 disable_intremap(iommu);
904 disable_qinval(iommu);
905 }
906
907 /* Enable queue invalidation */
908 for_each_drhd_unit ( drhd )
909 {
910 iommu = drhd->iommu;
911 if ( enable_qinval(iommu) != 0 )
912 {
913 dprintk(XENLOG_INFO VTDPREFIX,
914 "Failed to enable Queued Invalidation!\n");
915 return -EIO;
916 }
917 }
918
919 /* Enable interrupt remapping */
920 for_each_drhd_unit ( drhd )
921 {
922 iommu = drhd->iommu;
923 if ( enable_intremap(iommu, 1) )
924 {
925 dprintk(XENLOG_INFO VTDPREFIX,
926 "Failed to enable Interrupt Remapping!\n");
927 return -EIO;
928 }
929 }
930
931 return 0;
932 }
933
934 /*
935 * This function is used to disable Interrupt remapping when
936 * suspend local apic
937 */
intel_iommu_disable_eim(void)938 void intel_iommu_disable_eim(void)
939 {
940 struct acpi_drhd_unit *drhd;
941
942 for_each_drhd_unit ( drhd )
943 disable_intremap(drhd->iommu);
944
945 for_each_drhd_unit ( drhd )
946 disable_qinval(drhd->iommu);
947 }
948
949 /*
950 * This function is used to update the IRTE for posted-interrupt
951 * when guest changes MSI/MSI-X information.
952 */
pi_update_irte(const struct pi_desc * pi_desc,const struct pirq * pirq,const uint8_t gvec)953 int pi_update_irte(const struct pi_desc *pi_desc, const struct pirq *pirq,
954 const uint8_t gvec)
955 {
956 struct irq_desc *desc;
957 struct msi_desc *msi_desc;
958 int rc;
959
960 desc = pirq_spin_lock_irq_desc(pirq, NULL);
961 if ( !desc )
962 return -EINVAL;
963
964 msi_desc = desc->msi_desc;
965 if ( !msi_desc )
966 {
967 rc = -ENODEV;
968 goto unlock_out;
969 }
970 msi_desc->pi_desc = pi_desc;
971 msi_desc->gvec = gvec;
972
973 spin_unlock_irq(&desc->lock);
974
975 ASSERT(pcidevs_locked());
976
977 return msi_msg_write_remap_rte(msi_desc, &msi_desc->msg);
978
979 unlock_out:
980 spin_unlock_irq(&desc->lock);
981
982 return rc;
983 }
984