1 /*
2  * Copyright (c) 2006, Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; If not, see <http://www.gnu.org/licenses/>.
15  *
16  * Copyright (C) Allen Kay <allen.m.kay@intel.com>
17  * Copyright (C) Xiaohui Xin <xiaohui.xin@intel.com>
18  */
19 
20 #include <xen/irq.h>
21 #include <xen/sched.h>
22 #include <xen/iommu.h>
23 #include <xen/time.h>
24 #include <xen/list.h>
25 #include <xen/pci.h>
26 #include <xen/pci_regs.h>
27 #include "iommu.h"
28 #include "dmar.h"
29 #include "vtd.h"
30 #include "extern.h"
31 
32 #include <asm/apic.h>
33 #include <asm/io_apic.h>
34 #define nr_ioapic_entries(i)  nr_ioapic_entries[i]
35 
36 /*
37  * source validation type (SVT)
38  */
39 #define SVT_NO_VERIFY       0x0  /* no verification is required */
40 #define SVT_VERIFY_SID_SQ   0x1  /* verify using SID and SQ fiels */
41 #define SVT_VERIFY_BUS      0x2  /* verify bus of request-id */
42 
43 /*
44  * source-id qualifier (SQ)
45  */
46 #define SQ_ALL_16           0x0  /* verify all 16 bits of request-id */
47 #define SQ_13_IGNORE_1      0x1  /* verify most significant 13 bits, ignore
48                                   * the third least significant bit
49                                   */
50 #define SQ_13_IGNORE_2      0x2  /* verify most significant 13 bits, ignore
51                                   * the second and third least significant bits
52                                   */
53 #define SQ_13_IGNORE_3      0x3  /* verify most significant 13 bits, ignore
54                                   * the least three significant bits
55                                   */
56 
57 /* apic_pin_2_ir_idx[apicid][pin] = interrupt remapping table index */
58 static int **apic_pin_2_ir_idx;
59 
init_apic_pin_2_ir_idx(void)60 static int init_apic_pin_2_ir_idx(void)
61 {
62     int *_apic_pin_2_ir_idx;
63     unsigned int nr_pins, i;
64 
65     /* Here we shouldn't need to re-init when resuming from S3. */
66     if ( apic_pin_2_ir_idx != NULL )
67         return 0;
68 
69     nr_pins = 0;
70     for ( i = 0; i < nr_ioapics; i++ )
71         nr_pins += nr_ioapic_entries(i);
72 
73     _apic_pin_2_ir_idx = xmalloc_array(int, nr_pins);
74     apic_pin_2_ir_idx = xmalloc_array(int *, nr_ioapics);
75     if ( (_apic_pin_2_ir_idx == NULL) || (apic_pin_2_ir_idx == NULL) )
76     {
77         xfree(_apic_pin_2_ir_idx);
78         xfree(apic_pin_2_ir_idx);
79         return -ENOMEM;
80     }
81 
82     for ( i = 0; i < nr_pins; i++ )
83         _apic_pin_2_ir_idx[i] = -1;
84 
85     nr_pins = 0;
86     for ( i = 0; i < nr_ioapics; i++ )
87     {
88         apic_pin_2_ir_idx[i] = &_apic_pin_2_ir_idx[nr_pins];
89         nr_pins += nr_ioapic_entries(i);
90     }
91 
92     return 0;
93 }
94 
apicid_to_bdf(int apic_id)95 static u16 apicid_to_bdf(int apic_id)
96 {
97     struct acpi_drhd_unit *drhd = ioapic_to_drhd(apic_id);
98     struct acpi_ioapic_unit *acpi_ioapic_unit;
99 
100     list_for_each_entry ( acpi_ioapic_unit, &drhd->ioapic_list, list )
101         if ( acpi_ioapic_unit->apic_id == apic_id )
102             return acpi_ioapic_unit->ioapic.info;
103 
104     dprintk(XENLOG_ERR VTDPREFIX, "Didn't find the bdf for the apic_id!\n");
105     return 0;
106 }
107 
hpetid_to_bdf(unsigned int hpet_id)108 static u16 hpetid_to_bdf(unsigned int hpet_id)
109 {
110     struct acpi_drhd_unit *drhd = hpet_to_drhd(hpet_id);
111     struct acpi_hpet_unit *acpi_hpet_unit;
112 
113     list_for_each_entry ( acpi_hpet_unit, &drhd->hpet_list, list )
114         if ( acpi_hpet_unit->id == hpet_id )
115             return acpi_hpet_unit->bdf;
116 
117     dprintk(XENLOG_ERR VTDPREFIX, "Didn't find the bdf for HPET %u!\n", hpet_id);
118     return 0;
119 }
120 
set_ire_sid(struct iremap_entry * ire,unsigned int svt,unsigned int sq,unsigned int sid)121 static void set_ire_sid(struct iremap_entry *ire,
122                         unsigned int svt, unsigned int sq, unsigned int sid)
123 {
124     ire->remap.svt = svt;
125     ire->remap.sq = sq;
126     ire->remap.sid = sid;
127 }
128 
set_ioapic_source_id(int apic_id,struct iremap_entry * ire)129 static void set_ioapic_source_id(int apic_id, struct iremap_entry *ire)
130 {
131     set_ire_sid(ire, SVT_VERIFY_SID_SQ, SQ_ALL_16,
132                 apicid_to_bdf(apic_id));
133 }
134 
set_hpet_source_id(unsigned int id,struct iremap_entry * ire)135 static void set_hpet_source_id(unsigned int id, struct iremap_entry *ire)
136 {
137     /*
138      * Should really use SQ_ALL_16. Some platforms are broken.
139      * While we figure out the right quirks for these broken platforms, use
140      * SQ_13_IGNORE_3 for now.
141      */
142     set_ire_sid(ire, SVT_VERIFY_SID_SQ, SQ_13_IGNORE_3, hpetid_to_bdf(id));
143 }
144 
intel_iommu_supports_eim(void)145 bool __init intel_iommu_supports_eim(void)
146 {
147     struct acpi_drhd_unit *drhd;
148     unsigned int apic;
149 
150     if ( !iommu_qinval || !iommu_intremap || list_empty(&acpi_drhd_units) )
151         return false;
152 
153     /* We MUST have a DRHD unit for each IOAPIC. */
154     for ( apic = 0; apic < nr_ioapics; apic++ )
155         if ( !ioapic_to_drhd(IO_APIC_ID(apic)) )
156         {
157             dprintk(XENLOG_WARNING VTDPREFIX,
158                     "There is not a DRHD for IOAPIC %#x (id: %#x)!\n",
159                     apic, IO_APIC_ID(apic));
160             return false;
161         }
162 
163     for_each_drhd_unit ( drhd )
164         if ( !ecap_queued_inval(drhd->iommu->ecap) ||
165              !ecap_intr_remap(drhd->iommu->ecap) ||
166              !ecap_eim(drhd->iommu->ecap) )
167             return false;
168 
169     return true;
170 }
171 
172 /*
173  * Assume iremap_lock has been acquired. It is to make sure software will not
174  * change the same IRTE behind us. With this assumption, if only high qword or
175  * low qword in IRTE is to be updated, this function's atomic variant can
176  * present an atomic update to VT-d hardware even when cmpxchg16b
177  * instruction is not supported.
178  */
update_irte(struct vtd_iommu * iommu,struct iremap_entry * entry,const struct iremap_entry * new_ire,bool atomic)179 static void update_irte(struct vtd_iommu *iommu, struct iremap_entry *entry,
180                         const struct iremap_entry *new_ire, bool atomic)
181 {
182     ASSERT(spin_is_locked(&iommu->intremap.lock));
183 
184     if ( cpu_has_cx16 )
185     {
186         __uint128_t ret;
187         struct iremap_entry old_ire;
188 
189         old_ire = *entry;
190         ret = cmpxchg16b(entry, &old_ire, new_ire);
191 
192         /*
193          * In the above, we use cmpxchg16 to atomically update the 128-bit
194          * IRTE, and the hardware cannot update the IRTE behind us, so
195          * the return value of cmpxchg16 should be the same as old_ire.
196          * This ASSERT validate it.
197          */
198         ASSERT(ret == old_ire.val);
199     }
200     else
201     {
202         /*
203          * VT-d hardware doesn't update IRTEs behind us, nor the software
204          * since we hold iremap_lock. If the caller wants VT-d hardware to
205          * always see a consistent entry, but we can't meet it, a bug will
206          * be raised.
207          */
208         if ( entry->lo == new_ire->lo )
209             write_atomic(&entry->hi, new_ire->hi);
210         else if ( entry->hi == new_ire->hi )
211             write_atomic(&entry->lo, new_ire->lo);
212         else if ( !atomic )
213             *entry = *new_ire;
214         else
215             BUG();
216     }
217 }
218 
219 /* Mark specified intr remap entry as free */
free_remap_entry(struct vtd_iommu * iommu,int index)220 static void free_remap_entry(struct vtd_iommu *iommu, int index)
221 {
222     struct iremap_entry *iremap_entry = NULL, *iremap_entries, new_ire = { };
223 
224     if ( index < 0 || index > IREMAP_ENTRY_NR - 1 )
225         return;
226 
227     ASSERT(spin_is_locked(&iommu->intremap.lock));
228 
229     GET_IREMAP_ENTRY(iommu->intremap.maddr, index,
230                      iremap_entries, iremap_entry);
231 
232     update_irte(iommu, iremap_entry, &new_ire, false);
233     iommu_sync_cache(iremap_entry, sizeof(*iremap_entry));
234     iommu_flush_iec_index(iommu, 0, index);
235 
236     unmap_vtd_domain_page(iremap_entries);
237     iommu->intremap.num--;
238 }
239 
240 /*
241  * Look for a free intr remap entry (or a contiguous set thereof).
242  * Need hold iremap_lock, and setup returned entry before releasing lock.
243  */
alloc_remap_entry(struct vtd_iommu * iommu,unsigned int nr)244 static unsigned int alloc_remap_entry(struct vtd_iommu *iommu, unsigned int nr)
245 {
246     struct iremap_entry *iremap_entries = NULL;
247     unsigned int i, found;
248 
249     ASSERT(spin_is_locked(&iommu->intremap.lock));
250 
251     for ( found = i = 0; i < IREMAP_ENTRY_NR; i++ )
252     {
253         struct iremap_entry *p;
254         if ( i % (1 << IREMAP_ENTRY_ORDER) == 0 )
255         {
256             /* This entry across page boundry */
257             if ( iremap_entries )
258                 unmap_vtd_domain_page(iremap_entries);
259 
260             GET_IREMAP_ENTRY(iommu->intremap.maddr, i,
261                              iremap_entries, p);
262         }
263         else
264             p = &iremap_entries[i % (1 << IREMAP_ENTRY_ORDER)];
265 
266         if ( p->val ) /* not a free entry */
267             found = 0;
268         else if ( ++found == nr )
269             break;
270     }
271 
272     if ( iremap_entries )
273         unmap_vtd_domain_page(iremap_entries);
274 
275     if ( i < IREMAP_ENTRY_NR )
276         iommu->intremap.num += nr;
277 
278     return i;
279 }
280 
remap_entry_to_ioapic_rte(struct vtd_iommu * iommu,int index,struct IO_xAPIC_route_entry * old_rte)281 static int remap_entry_to_ioapic_rte(
282     struct vtd_iommu *iommu, int index, struct IO_xAPIC_route_entry *old_rte)
283 {
284     struct iremap_entry *iremap_entry = NULL, *iremap_entries;
285     unsigned long flags;
286 
287     if ( index < 0 || index > IREMAP_ENTRY_NR - 1 )
288     {
289         dprintk(XENLOG_ERR VTDPREFIX,
290                 "IO-APIC index (%d) for remap table is invalid\n",
291                 index);
292         return -EFAULT;
293     }
294 
295     spin_lock_irqsave(&iommu->intremap.lock, flags);
296 
297     GET_IREMAP_ENTRY(iommu->intremap.maddr, index,
298                      iremap_entries, iremap_entry);
299 
300     if ( iremap_entry->val == 0 )
301     {
302         dprintk(XENLOG_ERR VTDPREFIX,
303                 "IO-APIC index (%d) has an empty entry\n",
304                 index);
305         unmap_vtd_domain_page(iremap_entries);
306         spin_unlock_irqrestore(&iommu->intremap.lock, flags);
307         return -EFAULT;
308     }
309 
310     old_rte->vector = iremap_entry->remap.vector;
311     old_rte->delivery_mode = iremap_entry->remap.dlm;
312     old_rte->dest_mode = iremap_entry->remap.dm;
313     old_rte->trigger = iremap_entry->remap.tm;
314     old_rte->__reserved_2 = 0;
315     if ( x2apic_enabled )
316         old_rte->dest.dest32 = iremap_entry->remap.dst;
317     else
318     {
319         old_rte->dest.logical.__reserved_1 = 0;
320         old_rte->dest.logical.logical_dest = iremap_entry->remap.dst >> 8;
321     }
322 
323     unmap_vtd_domain_page(iremap_entries);
324     spin_unlock_irqrestore(&iommu->intremap.lock, flags);
325 
326     return 0;
327 }
328 
ioapic_rte_to_remap_entry(struct vtd_iommu * iommu,int apic,unsigned int ioapic_pin,struct IO_xAPIC_route_entry * old_rte,unsigned int rte_upper,unsigned int value)329 static int ioapic_rte_to_remap_entry(struct vtd_iommu *iommu,
330     int apic, unsigned int ioapic_pin, struct IO_xAPIC_route_entry *old_rte,
331     unsigned int rte_upper, unsigned int value)
332 {
333     struct iremap_entry *iremap_entry = NULL, *iremap_entries;
334     struct iremap_entry new_ire;
335     struct IO_APIC_route_remap_entry *remap_rte;
336     struct IO_xAPIC_route_entry new_rte;
337     int index;
338     unsigned long flags;
339     bool init = false;
340 
341     remap_rte = (struct IO_APIC_route_remap_entry *) old_rte;
342     spin_lock_irqsave(&iommu->intremap.lock, flags);
343 
344     index = apic_pin_2_ir_idx[apic][ioapic_pin];
345     if ( index < 0 )
346     {
347         index = alloc_remap_entry(iommu, 1);
348         if ( index < IREMAP_ENTRY_NR )
349             apic_pin_2_ir_idx[apic][ioapic_pin] = index;
350         init = true;
351     }
352 
353     if ( index > IREMAP_ENTRY_NR - 1 )
354     {
355         dprintk(XENLOG_ERR VTDPREFIX,
356                 "IO-APIC intremap index (%d) larger than maximum index (%d)\n",
357                 index, IREMAP_ENTRY_NR - 1);
358         spin_unlock_irqrestore(&iommu->intremap.lock, flags);
359         return -EFAULT;
360     }
361 
362     GET_IREMAP_ENTRY(iommu->intremap.maddr, index,
363                      iremap_entries, iremap_entry);
364 
365     new_ire = *iremap_entry;
366 
367     if ( rte_upper )
368     {
369         if ( x2apic_enabled )
370             new_ire.remap.dst = value;
371         else
372             new_ire.remap.dst = (value >> 24) << 8;
373     }
374     else
375     {
376         *(((u32 *)&new_rte) + 0) = value;
377         new_ire.remap.fpd = 0;
378         new_ire.remap.dm = new_rte.dest_mode;
379         new_ire.remap.tm = new_rte.trigger;
380         new_ire.remap.dlm = new_rte.delivery_mode;
381         /* Hardware require RH = 1 for LPR delivery mode */
382         new_ire.remap.rh = (new_ire.remap.dlm == dest_LowestPrio);
383         new_ire.remap.avail = 0;
384         new_ire.remap.res_1 = 0;
385         new_ire.remap.vector = new_rte.vector;
386         new_ire.remap.res_2 = 0;
387 
388         set_ioapic_source_id(IO_APIC_ID(apic), &new_ire);
389         new_ire.remap.res_3 = 0;
390         new_ire.remap.res_4 = 0;
391         new_ire.remap.p = 1;     /* finally, set present bit */
392 
393         /* now construct new ioapic rte entry */
394         remap_rte->vector = new_rte.vector;
395         remap_rte->delivery_mode = 0;    /* has to be 0 for remap format */
396         remap_rte->index_15 = (index >> 15) & 0x1;
397         remap_rte->index_0_14 = index & 0x7fff;
398 
399         remap_rte->delivery_status = new_rte.delivery_status;
400         remap_rte->polarity = new_rte.polarity;
401         remap_rte->irr = new_rte.irr;
402         remap_rte->trigger = new_rte.trigger;
403         remap_rte->mask = new_rte.mask;
404         remap_rte->reserved = 0;
405         remap_rte->format = 1;    /* indicate remap format */
406     }
407 
408     update_irte(iommu, iremap_entry, &new_ire, !init);
409     iommu_sync_cache(iremap_entry, sizeof(*iremap_entry));
410     iommu_flush_iec_index(iommu, 0, index);
411 
412     unmap_vtd_domain_page(iremap_entries);
413     spin_unlock_irqrestore(&iommu->intremap.lock, flags);
414     return 0;
415 }
416 
io_apic_read_remap_rte(unsigned int apic,unsigned int reg)417 unsigned int io_apic_read_remap_rte(
418     unsigned int apic, unsigned int reg)
419 {
420     unsigned int ioapic_pin = (reg - 0x10) / 2;
421     int index;
422     struct IO_xAPIC_route_entry old_rte = { 0 };
423     int rte_upper = (reg & 1) ? 1 : 0;
424     struct vtd_iommu *iommu = ioapic_to_iommu(IO_APIC_ID(apic));
425 
426     if ( !iommu->intremap.num ||
427         ( (index = apic_pin_2_ir_idx[apic][ioapic_pin]) < 0 ) )
428         return __io_apic_read(apic, reg);
429 
430     old_rte = __ioapic_read_entry(apic, ioapic_pin, 1);
431 
432     if ( remap_entry_to_ioapic_rte(iommu, index, &old_rte) )
433         return __io_apic_read(apic, reg);
434 
435     if ( rte_upper )
436         return (*(((u32 *)&old_rte) + 1));
437     else
438         return (*(((u32 *)&old_rte) + 0));
439 }
440 
io_apic_write_remap_rte(unsigned int apic,unsigned int reg,unsigned int value)441 void io_apic_write_remap_rte(
442     unsigned int apic, unsigned int reg, unsigned int value)
443 {
444     unsigned int ioapic_pin = (reg - 0x10) / 2;
445     struct IO_xAPIC_route_entry old_rte = { 0 };
446     struct IO_APIC_route_remap_entry *remap_rte;
447     unsigned int rte_upper = (reg & 1) ? 1 : 0;
448     struct vtd_iommu *iommu = ioapic_to_iommu(IO_APIC_ID(apic));
449     int saved_mask;
450 
451     old_rte = __ioapic_read_entry(apic, ioapic_pin, 1);
452 
453     remap_rte = (struct IO_APIC_route_remap_entry *) &old_rte;
454 
455     /* mask the interrupt while we change the intremap table */
456     saved_mask = remap_rte->mask;
457     remap_rte->mask = 1;
458     __io_apic_write(apic, reg & ~1, *(u32 *)&old_rte);
459     remap_rte->mask = saved_mask;
460 
461     if ( ioapic_rte_to_remap_entry(iommu, apic, ioapic_pin,
462                                    &old_rte, rte_upper, value) )
463     {
464         __io_apic_write(apic, reg, value);
465 
466         /* Recover the original value of 'mask' bit */
467         if ( rte_upper )
468             __io_apic_write(apic, reg & ~1, *(u32 *)&old_rte);
469     }
470     else
471         __ioapic_write_entry(apic, ioapic_pin, 1, old_rte);
472 }
473 
set_msi_source_id(struct pci_dev * pdev,struct iremap_entry * ire)474 static void set_msi_source_id(struct pci_dev *pdev, struct iremap_entry *ire)
475 {
476     u16 seg;
477     u8 bus, devfn, secbus;
478     int ret;
479 
480     if ( !pdev || !ire )
481         return;
482 
483     seg = pdev->seg;
484     bus = pdev->bus;
485     devfn = pdev->devfn;
486     switch ( pdev->type )
487     {
488         unsigned int sq;
489 
490     case DEV_TYPE_PCIe_ENDPOINT:
491     case DEV_TYPE_PCIe_BRIDGE:
492     case DEV_TYPE_PCIe2PCI_BRIDGE:
493     case DEV_TYPE_PCI_HOST_BRIDGE:
494         switch ( pdev->phantom_stride )
495         {
496         case 1: sq = SQ_13_IGNORE_3; break;
497         case 2: sq = SQ_13_IGNORE_2; break;
498         case 4: sq = SQ_13_IGNORE_1; break;
499         default: sq = SQ_ALL_16; break;
500         }
501         set_ire_sid(ire, SVT_VERIFY_SID_SQ, sq, PCI_BDF2(bus, devfn));
502         break;
503 
504     case DEV_TYPE_PCI:
505     case DEV_TYPE_LEGACY_PCI_BRIDGE:
506     case DEV_TYPE_PCI2PCIe_BRIDGE:
507         ret = find_upstream_bridge(seg, &bus, &devfn, &secbus);
508         if ( ret == 0 ) /* integrated PCI device */
509         {
510             set_ire_sid(ire, SVT_VERIFY_SID_SQ, SQ_ALL_16,
511                         PCI_BDF2(bus, devfn));
512         }
513         else if ( ret == 1 ) /* find upstream bridge */
514         {
515             if ( pdev_type(seg, bus, devfn) == DEV_TYPE_PCIe2PCI_BRIDGE )
516                 set_ire_sid(ire, SVT_VERIFY_BUS, SQ_ALL_16,
517                             (bus << 8) | pdev->bus);
518             else
519                 set_ire_sid(ire, SVT_VERIFY_SID_SQ, SQ_ALL_16,
520                             PCI_BDF2(bus, devfn));
521         }
522         else
523             dprintk(XENLOG_WARNING VTDPREFIX,
524                     "d%d: no upstream bridge for %04x:%02x:%02x.%u\n",
525                     pdev->domain->domain_id,
526                     seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
527         break;
528 
529     default:
530         dprintk(XENLOG_WARNING VTDPREFIX,
531                 "d%d: unknown(%u): %04x:%02x:%02x.%u\n",
532                 pdev->domain->domain_id, pdev->type,
533                 seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
534         break;
535    }
536 }
537 
remap_entry_to_msi_msg(struct vtd_iommu * iommu,struct msi_msg * msg,unsigned int index)538 static int remap_entry_to_msi_msg(
539     struct vtd_iommu *iommu, struct msi_msg *msg, unsigned int index)
540 {
541     struct iremap_entry *iremap_entry = NULL, *iremap_entries;
542     struct msi_msg_remap_entry *remap_rte;
543     unsigned long flags;
544 
545     remap_rte = (struct msi_msg_remap_entry *) msg;
546     index += (remap_rte->address_lo.index_15 << 15) |
547              remap_rte->address_lo.index_0_14;
548 
549     if ( index >= IREMAP_ENTRY_NR )
550     {
551         dprintk(XENLOG_ERR VTDPREFIX,
552                 "MSI index (%d) for remap table is invalid\n",
553                 index);
554         return -EFAULT;
555     }
556 
557     spin_lock_irqsave(&iommu->intremap.lock, flags);
558 
559     GET_IREMAP_ENTRY(iommu->intremap.maddr, index,
560                      iremap_entries, iremap_entry);
561 
562     if ( iremap_entry->val == 0 )
563     {
564         dprintk(XENLOG_ERR VTDPREFIX,
565                 "MSI index (%d) has an empty entry\n",
566                 index);
567         unmap_vtd_domain_page(iremap_entries);
568         spin_unlock_irqrestore(&iommu->intremap.lock, flags);
569         return -EFAULT;
570     }
571 
572     msg->address_hi = MSI_ADDR_BASE_HI;
573     msg->address_lo =
574         MSI_ADDR_BASE_LO |
575         ((iremap_entry->remap.dm == 0) ?
576             MSI_ADDR_DESTMODE_PHYS:
577             MSI_ADDR_DESTMODE_LOGIC) |
578         ((iremap_entry->remap.dlm != dest_LowestPrio) ?
579             MSI_ADDR_REDIRECTION_CPU:
580             MSI_ADDR_REDIRECTION_LOWPRI);
581     if ( x2apic_enabled )
582         msg->dest32 = iremap_entry->remap.dst;
583     else
584         msg->dest32 = (iremap_entry->remap.dst >> 8) & 0xff;
585     msg->address_lo |= MSI_ADDR_DEST_ID(msg->dest32);
586 
587     msg->data =
588         MSI_DATA_TRIGGER_EDGE |
589         MSI_DATA_LEVEL_ASSERT |
590         ((iremap_entry->remap.dlm != dest_LowestPrio) ?
591             MSI_DATA_DELIVERY_FIXED:
592             MSI_DATA_DELIVERY_LOWPRI) |
593         iremap_entry->remap.vector;
594 
595     unmap_vtd_domain_page(iremap_entries);
596     spin_unlock_irqrestore(&iommu->intremap.lock, flags);
597     return 0;
598 }
599 
msi_msg_to_remap_entry(struct vtd_iommu * iommu,struct pci_dev * pdev,struct msi_desc * msi_desc,struct msi_msg * msg)600 static int msi_msg_to_remap_entry(
601     struct vtd_iommu *iommu, struct pci_dev *pdev,
602     struct msi_desc *msi_desc, struct msi_msg *msg)
603 {
604     struct iremap_entry *iremap_entry = NULL, *iremap_entries, new_ire = { };
605     struct msi_msg_remap_entry *remap_rte;
606     unsigned int index, i, nr = 1;
607     unsigned long flags;
608     const struct pi_desc *pi_desc = msi_desc->pi_desc;
609 
610     if ( msi_desc->msi_attrib.type == PCI_CAP_ID_MSI )
611         nr = msi_desc->msi.nvec;
612 
613     spin_lock_irqsave(&iommu->intremap.lock, flags);
614 
615     if ( msg == NULL )
616     {
617         /* Free specified unused IRTEs */
618         for ( i = 0; i < nr; ++i )
619         {
620             free_remap_entry(iommu, msi_desc->remap_index + i);
621             msi_desc[i].irte_initialized = false;
622         }
623         spin_unlock_irqrestore(&iommu->intremap.lock, flags);
624         return 0;
625     }
626 
627     if ( msi_desc->remap_index < 0 )
628     {
629         index = alloc_remap_entry(iommu, nr);
630         for ( i = 0; i < nr; ++i )
631             msi_desc[i].remap_index = index + i;
632     }
633     else
634         index = msi_desc->remap_index;
635 
636     if ( index > IREMAP_ENTRY_NR - 1 )
637     {
638         dprintk(XENLOG_ERR VTDPREFIX,
639                 "MSI intremap index (%d) larger than maximum index (%d)!\n",
640                 index, IREMAP_ENTRY_NR - 1);
641         for ( i = 0; i < nr; ++i )
642             msi_desc[i].remap_index = -1;
643         spin_unlock_irqrestore(&iommu->intremap.lock, flags);
644 
645         return -EFAULT;
646     }
647 
648     GET_IREMAP_ENTRY(iommu->intremap.maddr, index,
649                      iremap_entries, iremap_entry);
650 
651     if ( !pi_desc )
652     {
653         new_ire.remap.dm = msg->address_lo >> MSI_ADDR_DESTMODE_SHIFT;
654         new_ire.remap.tm = msg->data >> MSI_DATA_TRIGGER_SHIFT;
655         new_ire.remap.dlm = msg->data >> MSI_DATA_DELIVERY_MODE_SHIFT;
656         /* Hardware requires RH = 1 for lowest priority delivery mode */
657         new_ire.remap.rh = (new_ire.remap.dlm == dest_LowestPrio);
658         new_ire.remap.vector = (msg->data >> MSI_DATA_VECTOR_SHIFT) &
659                                 MSI_DATA_VECTOR_MASK;
660         if ( x2apic_enabled )
661             new_ire.remap.dst = msg->dest32;
662         else
663             new_ire.remap.dst =
664                 MASK_EXTR(msg->address_lo, MSI_ADDR_DEST_ID_MASK) << 8;
665         new_ire.remap.p = 1;
666     }
667     else
668     {
669         new_ire.post.im = 1;
670         new_ire.post.vector = msi_desc->gvec;
671         new_ire.post.pda_l = virt_to_maddr(pi_desc) >> (32 - PDA_LOW_BIT);
672         new_ire.post.pda_h = virt_to_maddr(pi_desc) >> 32;
673         new_ire.post.p = 1;
674     }
675 
676     if ( pdev )
677         set_msi_source_id(pdev, &new_ire);
678     else
679         set_hpet_source_id(msi_desc->hpet_id, &new_ire);
680 
681     /* now construct new MSI/MSI-X rte entry */
682     remap_rte = (struct msi_msg_remap_entry *)msg;
683     remap_rte->address_lo.dontcare = 0;
684     i = index;
685     if ( !nr )
686         i -= msi_desc->msi_attrib.entry_nr;
687     remap_rte->address_lo.index_15 = (i >> 15) & 0x1;
688     remap_rte->address_lo.index_0_14 = i & 0x7fff;
689     remap_rte->address_lo.SHV = 1;
690     remap_rte->address_lo.format = 1;
691 
692     remap_rte->address_hi = 0;
693     remap_rte->data = index - i;
694 
695     update_irte(iommu, iremap_entry, &new_ire, msi_desc->irte_initialized);
696     msi_desc->irte_initialized = true;
697 
698     iommu_sync_cache(iremap_entry, sizeof(*iremap_entry));
699     iommu_flush_iec_index(iommu, 0, index);
700 
701     unmap_vtd_domain_page(iremap_entries);
702     spin_unlock_irqrestore(&iommu->intremap.lock, flags);
703 
704     return 0;
705 }
706 
msi_msg_read_remap_rte(struct msi_desc * msi_desc,struct msi_msg * msg)707 void msi_msg_read_remap_rte(
708     struct msi_desc *msi_desc, struct msi_msg *msg)
709 {
710     struct pci_dev *pdev = msi_desc->dev;
711     struct acpi_drhd_unit *drhd = NULL;
712 
713     drhd = pdev ? acpi_find_matched_drhd_unit(pdev)
714                 : hpet_to_drhd(msi_desc->hpet_id);
715     if ( drhd )
716         remap_entry_to_msi_msg(drhd->iommu, msg,
717                                msi_desc->msi_attrib.type == PCI_CAP_ID_MSI
718                                ? msi_desc->msi_attrib.entry_nr : 0);
719 }
720 
msi_msg_write_remap_rte(struct msi_desc * msi_desc,struct msi_msg * msg)721 int msi_msg_write_remap_rte(
722     struct msi_desc *msi_desc, struct msi_msg *msg)
723 {
724     struct pci_dev *pdev = msi_desc->dev;
725     struct acpi_drhd_unit *drhd = NULL;
726 
727     drhd = pdev ? acpi_find_matched_drhd_unit(pdev)
728                 : hpet_to_drhd(msi_desc->hpet_id);
729     return drhd ? msi_msg_to_remap_entry(drhd->iommu, pdev, msi_desc, msg)
730                 : -EINVAL;
731 }
732 
intel_setup_hpet_msi(struct msi_desc * msi_desc)733 int __init intel_setup_hpet_msi(struct msi_desc *msi_desc)
734 {
735     struct vtd_iommu *iommu = hpet_to_iommu(msi_desc->hpet_id);
736     unsigned long flags;
737     int rc = 0;
738 
739     if ( !iommu->intremap.maddr )
740         return 0;
741 
742     spin_lock_irqsave(&iommu->intremap.lock, flags);
743     msi_desc->remap_index = alloc_remap_entry(iommu, 1);
744     if ( msi_desc->remap_index >= IREMAP_ENTRY_NR )
745     {
746         dprintk(XENLOG_ERR VTDPREFIX,
747                 "HPET intremap index (%d) larger than maximum index (%d)!\n",
748                 msi_desc->remap_index, IREMAP_ENTRY_NR - 1);
749         msi_desc->remap_index = -1;
750         rc = -ENXIO;
751     }
752     spin_unlock_irqrestore(&iommu->intremap.lock, flags);
753 
754     return rc;
755 }
756 
enable_intremap(struct vtd_iommu * iommu,int eim)757 int enable_intremap(struct vtd_iommu *iommu, int eim)
758 {
759     u32 sts, gcmd;
760     unsigned long flags;
761 
762     ASSERT(ecap_intr_remap(iommu->ecap) && iommu_intremap);
763 
764     if ( !platform_supports_intremap() )
765     {
766         printk(XENLOG_ERR VTDPREFIX
767                " Platform firmware does not support interrupt remapping\n");
768         return -EINVAL;
769     }
770 
771     sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
772 
773     /* Return if already enabled by Xen */
774     if ( (sts & DMA_GSTS_IRES) && iommu->intremap.maddr )
775         return 0;
776 
777     if ( !(sts & DMA_GSTS_QIES) )
778     {
779         printk(XENLOG_ERR VTDPREFIX
780                " Queued invalidation is not enabled on IOMMU #%u:"
781                " Should not enable interrupt remapping\n", iommu->index);
782         return -EINVAL;
783     }
784 
785     if ( !eim && (sts & DMA_GSTS_CFIS) )
786         printk(XENLOG_WARNING VTDPREFIX
787                " Compatibility Format Interrupts permitted on IOMMU #%u:"
788                " Device pass-through will be insecure\n", iommu->index);
789 
790     if ( iommu->intremap.maddr == 0 )
791     {
792         iommu->intremap.maddr = alloc_pgtable_maddr(IREMAP_ARCH_PAGE_NR,
793                                                     iommu->node);
794         if ( iommu->intremap.maddr == 0 )
795         {
796             dprintk(XENLOG_WARNING VTDPREFIX,
797                     "Cannot allocate memory for ir_ctrl->iremap_maddr\n");
798             return -ENOMEM;
799         }
800 
801         iommu->intremap.num = 0;
802     }
803 
804     spin_lock_irqsave(&iommu->register_lock, flags);
805 
806     /*
807      * Set size of the interrupt remapping table and optionally Extended
808      * Interrupt Mode.
809      */
810     dmar_writeq(iommu->reg, DMAR_IRTA_REG,
811                 iommu->intremap.maddr | IRTA_REG_TABLE_SIZE |
812                 (eim ? IRTA_EIME : 0));
813 
814     /* set SIRTP */
815     gcmd = dmar_readl(iommu->reg, DMAR_GSTS_REG);
816     gcmd |= DMA_GCMD_SIRTP;
817     dmar_writel(iommu->reg, DMAR_GCMD_REG, gcmd);
818 
819     IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, dmar_readl,
820                   (sts & DMA_GSTS_SIRTPS), sts);
821     spin_unlock_irqrestore(&iommu->register_lock, flags);
822 
823     /* After set SIRTP, must globally invalidate the interrupt entry cache */
824     iommu_flush_iec_global(iommu);
825 
826     spin_lock_irqsave(&iommu->register_lock, flags);
827     /* enable interrupt remapping hardware */
828     gcmd |= DMA_GCMD_IRE;
829     dmar_writel(iommu->reg, DMAR_GCMD_REG, gcmd);
830 
831     IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, dmar_readl,
832                   (sts & DMA_GSTS_IRES), sts);
833     spin_unlock_irqrestore(&iommu->register_lock, flags);
834 
835     return init_apic_pin_2_ir_idx();
836 }
837 
disable_intremap(struct vtd_iommu * iommu)838 void disable_intremap(struct vtd_iommu *iommu)
839 {
840     u32 sts;
841     u64 irta;
842     unsigned long flags;
843 
844     if ( !ecap_intr_remap(iommu->ecap) )
845         return;
846 
847     spin_lock_irqsave(&iommu->register_lock, flags);
848     sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
849     if ( !(sts & DMA_GSTS_IRES) )
850         goto out;
851 
852     dmar_writel(iommu->reg, DMAR_GCMD_REG, sts & (~DMA_GCMD_IRE));
853 
854     IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, dmar_readl,
855                   !(sts & DMA_GSTS_IRES), sts);
856 
857     /* If we are disabling Interrupt Remapping, make sure we dont stay in
858      * Extended Interrupt Mode, as this is unaffected by the Interrupt
859      * Remapping flag in each DMAR Global Control Register.
860      * Specifically, local apics in xapic mode do not like interrupts delivered
861      * in x2apic mode.  Any code turning interrupt remapping back on will set
862      * EIME back correctly.
863      */
864     if ( !ecap_eim(iommu->ecap) )
865         goto out;
866 
867     /* Can't read the register unless we ecaps says we can */
868     irta = dmar_readl(iommu->reg, DMAR_IRTA_REG);
869     if ( !(irta & IRTA_EIME) )
870         goto out;
871 
872     dmar_writel(iommu->reg, DMAR_IRTA_REG, irta & ~IRTA_EIME);
873     IOMMU_WAIT_OP(iommu, DMAR_IRTA_REG, dmar_readl,
874                   !(irta & IRTA_EIME), irta);
875 
876 out:
877     spin_unlock_irqrestore(&iommu->register_lock, flags);
878 }
879 
880 /*
881  * This function is used to enable Interrupt remapping when
882  * enable x2apic
883  */
intel_iommu_enable_eim(void)884 int intel_iommu_enable_eim(void)
885 {
886     struct acpi_drhd_unit *drhd;
887     struct vtd_iommu *iommu;
888 
889     if ( system_state < SYS_STATE_active && !platform_supports_x2apic() )
890         return -ENXIO;
891 
892     for_each_drhd_unit ( drhd )
893     {
894         iommu = drhd->iommu;
895 
896         /* Clear previous faults */
897         clear_fault_bits(iommu);
898 
899         /*
900          * Disable interrupt remapping and queued invalidation if
901          * already enabled by BIOS
902          */
903         disable_intremap(iommu);
904         disable_qinval(iommu);
905     }
906 
907     /* Enable queue invalidation */
908     for_each_drhd_unit ( drhd )
909     {
910         iommu = drhd->iommu;
911         if ( enable_qinval(iommu) != 0 )
912         {
913             dprintk(XENLOG_INFO VTDPREFIX,
914                     "Failed to enable Queued Invalidation!\n");
915             return -EIO;
916         }
917     }
918 
919     /* Enable interrupt remapping */
920     for_each_drhd_unit ( drhd )
921     {
922         iommu = drhd->iommu;
923         if ( enable_intremap(iommu, 1) )
924         {
925             dprintk(XENLOG_INFO VTDPREFIX,
926                     "Failed to enable Interrupt Remapping!\n");
927             return -EIO;
928         }
929     }
930 
931     return 0;
932 }
933 
934 /*
935  * This function is used to disable Interrupt remapping when
936  * suspend local apic
937  */
intel_iommu_disable_eim(void)938 void intel_iommu_disable_eim(void)
939 {
940     struct acpi_drhd_unit *drhd;
941 
942     for_each_drhd_unit ( drhd )
943         disable_intremap(drhd->iommu);
944 
945     for_each_drhd_unit ( drhd )
946         disable_qinval(drhd->iommu);
947 }
948 
949 /*
950  * This function is used to update the IRTE for posted-interrupt
951  * when guest changes MSI/MSI-X information.
952  */
pi_update_irte(const struct pi_desc * pi_desc,const struct pirq * pirq,const uint8_t gvec)953 int pi_update_irte(const struct pi_desc *pi_desc, const struct pirq *pirq,
954     const uint8_t gvec)
955 {
956     struct irq_desc *desc;
957     struct msi_desc *msi_desc;
958     int rc;
959 
960     desc = pirq_spin_lock_irq_desc(pirq, NULL);
961     if ( !desc )
962         return -EINVAL;
963 
964     msi_desc = desc->msi_desc;
965     if ( !msi_desc )
966     {
967         rc = -ENODEV;
968         goto unlock_out;
969     }
970     msi_desc->pi_desc = pi_desc;
971     msi_desc->gvec = gvec;
972 
973     spin_unlock_irq(&desc->lock);
974 
975     ASSERT(pcidevs_locked());
976 
977     return msi_msg_write_remap_rte(msi_desc, &msi_desc->msg);
978 
979  unlock_out:
980     spin_unlock_irq(&desc->lock);
981 
982     return rc;
983 }
984