1 /*
2  * Copyright (C) 2008,  Netronome Systems, Inc.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; If not, see <http://www.gnu.org/licenses/>.
15  */
16 
17 #include <xen/sched.h>
18 #include <xen/pci.h>
19 #include <xen/pci_regs.h>
20 #include <xen/pci_ids.h>
21 #include <xen/list.h>
22 #include <xen/prefetch.h>
23 #include <xen/iommu.h>
24 #include <xen/irq.h>
25 #include <xen/param.h>
26 #include <xen/vm_event.h>
27 #include <asm/hvm/irq.h>
28 #include <xen/delay.h>
29 #include <xen/keyhandler.h>
30 #include <xen/event.h>
31 #include <xen/guest_access.h>
32 #include <xen/paging.h>
33 #include <xen/radix-tree.h>
34 #include <xen/softirq.h>
35 #include <xen/tasklet.h>
36 #include <xen/vpci.h>
37 #include <xsm/xsm.h>
38 #include <asm/msi.h>
39 #include "ats.h"
40 
41 struct pci_seg {
42     struct list_head alldevs_list;
43     u16 nr;
44     unsigned long *ro_map;
45     /* bus2bridge_lock protects bus2bridge array */
46     spinlock_t bus2bridge_lock;
47 #define MAX_BUSES 256
48     struct {
49         u8 map;
50         u8 bus;
51         u8 devfn;
52     } bus2bridge[MAX_BUSES];
53 };
54 
55 static spinlock_t _pcidevs_lock = SPIN_LOCK_UNLOCKED;
56 
pcidevs_lock(void)57 void pcidevs_lock(void)
58 {
59     spin_lock_recursive(&_pcidevs_lock);
60 }
61 
pcidevs_unlock(void)62 void pcidevs_unlock(void)
63 {
64     spin_unlock_recursive(&_pcidevs_lock);
65 }
66 
pcidevs_locked(void)67 bool_t pcidevs_locked(void)
68 {
69     return !!spin_is_locked(&_pcidevs_lock);
70 }
71 
pcidevs_trylock(void)72 bool_t pcidevs_trylock(void)
73 {
74     return !!spin_trylock_recursive(&_pcidevs_lock);
75 }
76 
77 static struct radix_tree_root pci_segments;
78 
get_pseg(u16 seg)79 static inline struct pci_seg *get_pseg(u16 seg)
80 {
81     return radix_tree_lookup(&pci_segments, seg);
82 }
83 
pci_known_segment(u16 seg)84 bool_t pci_known_segment(u16 seg)
85 {
86     return get_pseg(seg) != NULL;
87 }
88 
alloc_pseg(u16 seg)89 static struct pci_seg *alloc_pseg(u16 seg)
90 {
91     struct pci_seg *pseg = get_pseg(seg);
92 
93     if ( pseg )
94         return pseg;
95 
96     pseg = xzalloc(struct pci_seg);
97     if ( !pseg )
98         return NULL;
99 
100     pseg->nr = seg;
101     INIT_LIST_HEAD(&pseg->alldevs_list);
102     spin_lock_init(&pseg->bus2bridge_lock);
103 
104     if ( radix_tree_insert(&pci_segments, seg, pseg) )
105     {
106         xfree(pseg);
107         pseg = NULL;
108     }
109 
110     return pseg;
111 }
112 
pci_segments_iterate(int (* handler)(struct pci_seg *,void *),void * arg)113 static int pci_segments_iterate(
114     int (*handler)(struct pci_seg *, void *), void *arg)
115 {
116     u16 seg = 0;
117     int rc = 0;
118 
119     do {
120         struct pci_seg *pseg;
121 
122         if ( !radix_tree_gang_lookup(&pci_segments, (void **)&pseg, seg, 1) )
123             break;
124         rc = handler(pseg, arg);
125         seg = pseg->nr + 1;
126     } while (!rc && seg);
127 
128     return rc;
129 }
130 
pci_segments_init(void)131 void __init pci_segments_init(void)
132 {
133     radix_tree_init(&pci_segments);
134     if ( !alloc_pseg(0) )
135         panic("Could not initialize PCI segment 0\n");
136 }
137 
pci_add_segment(u16 seg)138 int __init pci_add_segment(u16 seg)
139 {
140     return alloc_pseg(seg) ? 0 : -ENOMEM;
141 }
142 
pci_get_ro_map(u16 seg)143 const unsigned long *pci_get_ro_map(u16 seg)
144 {
145     struct pci_seg *pseg = get_pseg(seg);
146 
147     return pseg ? pseg->ro_map : NULL;
148 }
149 
150 static struct phantom_dev {
151     u16 seg;
152     u8 bus, slot, stride;
153 } phantom_devs[8];
154 static unsigned int nr_phantom_devs;
155 
parse_phantom_dev(const char * str)156 static int __init parse_phantom_dev(const char *str)
157 {
158     const char *s;
159     unsigned int seg, bus, slot;
160     struct phantom_dev phantom;
161 
162     if ( !*str )
163         return -EINVAL;
164     if ( nr_phantom_devs >= ARRAY_SIZE(phantom_devs) )
165         return -E2BIG;
166 
167     s = parse_pci(str, &seg, &bus, &slot, NULL);
168     if ( !s || *s != ',' )
169         return -EINVAL;
170 
171     phantom.seg = seg;
172     phantom.bus = bus;
173     phantom.slot = slot;
174 
175     switch ( phantom.stride = simple_strtol(s + 1, &s, 0) )
176     {
177     case 1: case 2: case 4:
178         if ( *s )
179     default:
180             return -EINVAL;
181     }
182 
183     phantom_devs[nr_phantom_devs++] = phantom;
184 
185     return 0;
186 }
187 custom_param("pci-phantom", parse_phantom_dev);
188 
189 static u16 __read_mostly command_mask;
190 static u16 __read_mostly bridge_ctl_mask;
191 
parse_pci_param(const char * s)192 static int __init parse_pci_param(const char *s)
193 {
194     const char *ss;
195     int rc = 0;
196 
197     do {
198         int val;
199         u16 cmd_mask = 0, brctl_mask = 0;
200 
201         ss = strchr(s, ',');
202         if ( !ss )
203             ss = strchr(s, '\0');
204 
205         if ( (val = parse_boolean("serr", s, ss)) >= 0 )
206         {
207             cmd_mask = PCI_COMMAND_SERR;
208             brctl_mask = PCI_BRIDGE_CTL_SERR | PCI_BRIDGE_CTL_DTMR_SERR;
209         }
210         else if ( (val = parse_boolean("perr", s, ss)) >= 0 )
211         {
212             cmd_mask = PCI_COMMAND_PARITY;
213             brctl_mask = PCI_BRIDGE_CTL_PARITY;
214         }
215         else
216             rc = -EINVAL;
217 
218         if ( val )
219         {
220             command_mask &= ~cmd_mask;
221             bridge_ctl_mask &= ~brctl_mask;
222         }
223         else
224         {
225             command_mask |= cmd_mask;
226             bridge_ctl_mask |= brctl_mask;
227         }
228 
229         s = ss + 1;
230     } while ( *ss );
231 
232     return rc;
233 }
234 custom_param("pci", parse_pci_param);
235 
check_pdev(const struct pci_dev * pdev)236 static void check_pdev(const struct pci_dev *pdev)
237 {
238 #define PCI_STATUS_CHECK \
239     (PCI_STATUS_PARITY | PCI_STATUS_SIG_TARGET_ABORT | \
240      PCI_STATUS_REC_TARGET_ABORT | PCI_STATUS_REC_MASTER_ABORT | \
241      PCI_STATUS_SIG_SYSTEM_ERROR | PCI_STATUS_DETECTED_PARITY)
242     u16 seg = pdev->seg;
243     u8 bus = pdev->bus;
244     u8 dev = PCI_SLOT(pdev->devfn);
245     u8 func = PCI_FUNC(pdev->devfn);
246     u16 val;
247 
248     if ( command_mask )
249     {
250         val = pci_conf_read16(pdev->sbdf, PCI_COMMAND);
251         if ( val & command_mask )
252             pci_conf_write16(pdev->sbdf, PCI_COMMAND, val & ~command_mask);
253         val = pci_conf_read16(pdev->sbdf, PCI_STATUS);
254         if ( val & PCI_STATUS_CHECK )
255         {
256             printk(XENLOG_INFO "%04x:%02x:%02x.%u status %04x -> %04x\n",
257                    seg, bus, dev, func, val, val & ~PCI_STATUS_CHECK);
258             pci_conf_write16(pdev->sbdf, PCI_STATUS, val & PCI_STATUS_CHECK);
259         }
260     }
261 
262     switch ( pci_conf_read8(pdev->sbdf, PCI_HEADER_TYPE) & 0x7f )
263     {
264     case PCI_HEADER_TYPE_BRIDGE:
265         if ( !bridge_ctl_mask )
266             break;
267         val = pci_conf_read16(pdev->sbdf, PCI_BRIDGE_CONTROL);
268         if ( val & bridge_ctl_mask )
269             pci_conf_write16(pdev->sbdf, PCI_BRIDGE_CONTROL,
270                              val & ~bridge_ctl_mask);
271         val = pci_conf_read16(pdev->sbdf, PCI_SEC_STATUS);
272         if ( val & PCI_STATUS_CHECK )
273         {
274             printk(XENLOG_INFO
275                    "%04x:%02x:%02x.%u secondary status %04x -> %04x\n",
276                    seg, bus, dev, func, val, val & ~PCI_STATUS_CHECK);
277             pci_conf_write16(pdev->sbdf, PCI_SEC_STATUS,
278                              val & PCI_STATUS_CHECK);
279         }
280         break;
281 
282     case PCI_HEADER_TYPE_CARDBUS:
283         /* TODO */
284         break;
285     }
286 #undef PCI_STATUS_CHECK
287 }
288 
apply_quirks(struct pci_dev * pdev)289 static void apply_quirks(struct pci_dev *pdev)
290 {
291     uint16_t vendor = pci_conf_read16(pdev->sbdf, PCI_VENDOR_ID);
292     uint16_t device = pci_conf_read16(pdev->sbdf, PCI_DEVICE_ID);
293     static const struct {
294         uint16_t vendor, device;
295     } ignore_bars[] = {
296         /*
297          * Device [8086:2fc0]
298          * Erratum HSE43
299          * CONFIG_TDP_NOMINAL CSR Implemented at Incorrect Offset
300          * http://www.intel.com/content/www/us/en/processors/xeon/xeon-e5-v3-spec-update.html
301          */
302         { PCI_VENDOR_ID_INTEL, 0x2fc0 },
303         /*
304          * Devices [8086:6f60,6fa0,6fc0]
305          * Errata BDF2 / BDX2
306          * PCI BARs in the Home Agent Will Return Non-Zero Values During Enumeration
307          * http://www.intel.com/content/www/us/en/processors/xeon/xeon-e5-v4-spec-update.html
308         */
309         { PCI_VENDOR_ID_INTEL, 0x6f60 },
310         { PCI_VENDOR_ID_INTEL, 0x6fa0 },
311         { PCI_VENDOR_ID_INTEL, 0x6fc0 },
312     };
313     unsigned int i;
314 
315     for ( i = 0; i < ARRAY_SIZE(ignore_bars); i++)
316         if ( vendor == ignore_bars[i].vendor &&
317              device == ignore_bars[i].device )
318             /*
319              * For these errata force ignoring the BARs, which prevents vPCI
320              * from trying to size the BARs or add handlers to trap accesses.
321              */
322             pdev->ignore_bars = true;
323 }
324 
alloc_pdev(struct pci_seg * pseg,u8 bus,u8 devfn)325 static struct pci_dev *alloc_pdev(struct pci_seg *pseg, u8 bus, u8 devfn)
326 {
327     struct pci_dev *pdev;
328     unsigned int pos;
329 
330     list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list )
331         if ( pdev->bus == bus && pdev->devfn == devfn )
332             return pdev;
333 
334     pdev = xzalloc(struct pci_dev);
335     if ( !pdev )
336         return NULL;
337 
338     *(u16*) &pdev->seg = pseg->nr;
339     *((u8*) &pdev->bus) = bus;
340     *((u8*) &pdev->devfn) = devfn;
341     pdev->domain = NULL;
342     INIT_LIST_HEAD(&pdev->msi_list);
343 
344     pos = pci_find_cap_offset(pseg->nr, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
345                               PCI_CAP_ID_MSI);
346     if ( pos )
347     {
348         uint16_t ctrl = pci_conf_read16(pdev->sbdf, msi_control_reg(pos));
349 
350         pdev->msi_maxvec = multi_msi_capable(ctrl);
351     }
352 
353     pos = pci_find_cap_offset(pseg->nr, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
354                               PCI_CAP_ID_MSIX);
355     if ( pos )
356     {
357         struct arch_msix *msix = xzalloc(struct arch_msix);
358         uint16_t ctrl;
359 
360         if ( !msix )
361         {
362             xfree(pdev);
363             return NULL;
364         }
365         spin_lock_init(&msix->table_lock);
366 
367         ctrl = pci_conf_read16(pdev->sbdf, msix_control_reg(pos));
368         msix->nr_entries = msix_table_size(ctrl);
369 
370         pdev->msix = msix;
371     }
372 
373     list_add(&pdev->alldevs_list, &pseg->alldevs_list);
374 
375     /* update bus2bridge */
376     switch ( pdev->type = pdev_type(pseg->nr, bus, devfn) )
377     {
378         u16 cap;
379         u8 sec_bus, sub_bus;
380 
381         case DEV_TYPE_PCIe2PCI_BRIDGE:
382         case DEV_TYPE_LEGACY_PCI_BRIDGE:
383             sec_bus = pci_conf_read8(pdev->sbdf, PCI_SECONDARY_BUS);
384             sub_bus = pci_conf_read8(pdev->sbdf, PCI_SUBORDINATE_BUS);
385 
386             spin_lock(&pseg->bus2bridge_lock);
387             for ( ; sec_bus <= sub_bus; sec_bus++ )
388             {
389                 pseg->bus2bridge[sec_bus].map = 1;
390                 pseg->bus2bridge[sec_bus].bus = bus;
391                 pseg->bus2bridge[sec_bus].devfn = devfn;
392             }
393             spin_unlock(&pseg->bus2bridge_lock);
394             break;
395 
396         case DEV_TYPE_PCIe_ENDPOINT:
397             pos = pci_find_cap_offset(pseg->nr, bus, PCI_SLOT(devfn),
398                                       PCI_FUNC(devfn), PCI_CAP_ID_EXP);
399             BUG_ON(!pos);
400             cap = pci_conf_read16(pdev->sbdf, pos + PCI_EXP_DEVCAP);
401             if ( cap & PCI_EXP_DEVCAP_PHANTOM )
402             {
403                 pdev->phantom_stride = 8 >> MASK_EXTR(cap,
404                                                       PCI_EXP_DEVCAP_PHANTOM);
405                 if ( PCI_FUNC(devfn) >= pdev->phantom_stride )
406                     pdev->phantom_stride = 0;
407             }
408             else
409             {
410                 unsigned int i;
411 
412                 for ( i = 0; i < nr_phantom_devs; ++i )
413                     if ( phantom_devs[i].seg == pseg->nr &&
414                          phantom_devs[i].bus == bus &&
415                          phantom_devs[i].slot == PCI_SLOT(devfn) &&
416                          phantom_devs[i].stride > PCI_FUNC(devfn) )
417                     {
418                         pdev->phantom_stride = phantom_devs[i].stride;
419                         break;
420                     }
421             }
422             break;
423 
424         case DEV_TYPE_PCI:
425         case DEV_TYPE_PCIe_BRIDGE:
426         case DEV_TYPE_PCI_HOST_BRIDGE:
427             break;
428 
429         default:
430             printk(XENLOG_WARNING "%04x:%02x:%02x.%u: unknown type %d\n",
431                    pseg->nr, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), pdev->type);
432             break;
433     }
434 
435     check_pdev(pdev);
436     apply_quirks(pdev);
437 
438     return pdev;
439 }
440 
free_pdev(struct pci_seg * pseg,struct pci_dev * pdev)441 static void free_pdev(struct pci_seg *pseg, struct pci_dev *pdev)
442 {
443     /* update bus2bridge */
444     switch ( pdev->type )
445     {
446         uint8_t sec_bus, sub_bus;
447 
448         case DEV_TYPE_PCIe2PCI_BRIDGE:
449         case DEV_TYPE_LEGACY_PCI_BRIDGE:
450             sec_bus = pci_conf_read8(pdev->sbdf, PCI_SECONDARY_BUS);
451             sub_bus = pci_conf_read8(pdev->sbdf, PCI_SUBORDINATE_BUS);
452 
453             spin_lock(&pseg->bus2bridge_lock);
454             for ( ; sec_bus <= sub_bus; sec_bus++ )
455                 pseg->bus2bridge[sec_bus] = pseg->bus2bridge[pdev->bus];
456             spin_unlock(&pseg->bus2bridge_lock);
457             break;
458 
459         default:
460             break;
461     }
462 
463     list_del(&pdev->alldevs_list);
464     xfree(pdev->msix);
465     xfree(pdev);
466 }
467 
_pci_hide_device(struct pci_dev * pdev)468 static void _pci_hide_device(struct pci_dev *pdev)
469 {
470     if ( pdev->domain )
471         return;
472     pdev->domain = dom_xen;
473     list_add(&pdev->domain_list, &dom_xen->pdev_list);
474 }
475 
pci_hide_device(unsigned int seg,unsigned int bus,unsigned int devfn)476 int __init pci_hide_device(unsigned int seg, unsigned int bus,
477                            unsigned int devfn)
478 {
479     struct pci_dev *pdev;
480     struct pci_seg *pseg;
481     int rc = -ENOMEM;
482 
483     pcidevs_lock();
484     pseg = alloc_pseg(seg);
485     if ( pseg )
486     {
487         pdev = alloc_pdev(pseg, bus, devfn);
488         if ( pdev )
489         {
490             _pci_hide_device(pdev);
491             rc = 0;
492         }
493     }
494     pcidevs_unlock();
495 
496     return rc;
497 }
498 
pci_ro_device(int seg,int bus,int devfn)499 int __init pci_ro_device(int seg, int bus, int devfn)
500 {
501     struct pci_seg *pseg = alloc_pseg(seg);
502     struct pci_dev *pdev;
503 
504     if ( !pseg )
505         return -ENOMEM;
506     pdev = alloc_pdev(pseg, bus, devfn);
507     if ( !pdev )
508         return -ENOMEM;
509 
510     if ( !pseg->ro_map )
511     {
512         size_t sz = BITS_TO_LONGS(PCI_BDF(-1, -1, -1) + 1) * sizeof(long);
513 
514         pseg->ro_map = alloc_xenheap_pages(get_order_from_bytes(sz), 0);
515         if ( !pseg->ro_map )
516             return -ENOMEM;
517         memset(pseg->ro_map, 0, sz);
518     }
519 
520     __set_bit(PCI_BDF2(bus, devfn), pseg->ro_map);
521     _pci_hide_device(pdev);
522 
523     return 0;
524 }
525 
pci_get_pdev(int seg,int bus,int devfn)526 struct pci_dev *pci_get_pdev(int seg, int bus, int devfn)
527 {
528     struct pci_seg *pseg = get_pseg(seg);
529     struct pci_dev *pdev = NULL;
530 
531     ASSERT(pcidevs_locked());
532     ASSERT(seg != -1 || bus == -1);
533     ASSERT(bus != -1 || devfn == -1);
534 
535     if ( !pseg )
536     {
537         if ( seg == -1 )
538             radix_tree_gang_lookup(&pci_segments, (void **)&pseg, 0, 1);
539         if ( !pseg )
540             return NULL;
541     }
542 
543     do {
544         list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list )
545             if ( (pdev->bus == bus || bus == -1) &&
546                  (pdev->devfn == devfn || devfn == -1) )
547                 return pdev;
548     } while ( radix_tree_gang_lookup(&pci_segments, (void **)&pseg,
549                                      pseg->nr + 1, 1) );
550 
551     return NULL;
552 }
553 
pci_get_real_pdev(int seg,int bus,int devfn)554 struct pci_dev *pci_get_real_pdev(int seg, int bus, int devfn)
555 {
556     struct pci_dev *pdev;
557     int stride;
558 
559     if ( seg < 0 || bus < 0 || devfn < 0 )
560         return NULL;
561 
562     for ( pdev = pci_get_pdev(seg, bus, devfn), stride = 4;
563           !pdev && stride; stride >>= 1 )
564     {
565         if ( !(devfn & (8 - stride)) )
566             continue;
567         pdev = pci_get_pdev(seg, bus, devfn & ~(8 - stride));
568         if ( pdev && stride != pdev->phantom_stride )
569             pdev = NULL;
570     }
571 
572     return pdev;
573 }
574 
pci_get_pdev_by_domain(const struct domain * d,int seg,int bus,int devfn)575 struct pci_dev *pci_get_pdev_by_domain(const struct domain *d, int seg,
576                                        int bus, int devfn)
577 {
578     struct pci_seg *pseg = get_pseg(seg);
579     struct pci_dev *pdev = NULL;
580 
581     ASSERT(seg != -1 || bus == -1);
582     ASSERT(bus != -1 || devfn == -1);
583 
584     if ( !pseg )
585     {
586         if ( seg == -1 )
587             radix_tree_gang_lookup(&pci_segments, (void **)&pseg, 0, 1);
588         if ( !pseg )
589             return NULL;
590     }
591 
592     do {
593         list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list )
594             if ( (pdev->bus == bus || bus == -1) &&
595                  (pdev->devfn == devfn || devfn == -1) &&
596                  (pdev->domain == d) )
597                 return pdev;
598     } while ( radix_tree_gang_lookup(&pci_segments, (void **)&pseg,
599                                      pseg->nr + 1, 1) );
600 
601     return NULL;
602 }
603 
604 /**
605  * pci_enable_acs - enable ACS if hardware support it
606  * @dev: the PCI device
607  */
pci_enable_acs(struct pci_dev * pdev)608 static void pci_enable_acs(struct pci_dev *pdev)
609 {
610     int pos;
611     u16 cap, ctrl, seg = pdev->seg;
612     u8 bus = pdev->bus;
613 
614     if ( !is_iommu_enabled(pdev->domain) )
615         return;
616 
617     pos = pci_find_ext_capability(seg, bus, pdev->devfn, PCI_EXT_CAP_ID_ACS);
618     if (!pos)
619         return;
620 
621     cap = pci_conf_read16(pdev->sbdf, pos + PCI_ACS_CAP);
622     ctrl = pci_conf_read16(pdev->sbdf, pos + PCI_ACS_CTRL);
623 
624     /* Source Validation */
625     ctrl |= (cap & PCI_ACS_SV);
626 
627     /* P2P Request Redirect */
628     ctrl |= (cap & PCI_ACS_RR);
629 
630     /* P2P Completion Redirect */
631     ctrl |= (cap & PCI_ACS_CR);
632 
633     /* Upstream Forwarding */
634     ctrl |= (cap & PCI_ACS_UF);
635 
636     pci_conf_write16(pdev->sbdf, pos + PCI_ACS_CTRL, ctrl);
637 }
638 
639 static int iommu_add_device(struct pci_dev *pdev);
640 static int iommu_enable_device(struct pci_dev *pdev);
641 static int iommu_remove_device(struct pci_dev *pdev);
642 
pci_size_mem_bar(pci_sbdf_t sbdf,unsigned int pos,uint64_t * paddr,uint64_t * psize,unsigned int flags)643 unsigned int pci_size_mem_bar(pci_sbdf_t sbdf, unsigned int pos,
644                               uint64_t *paddr, uint64_t *psize,
645                               unsigned int flags)
646 {
647     uint32_t hi = 0, bar = pci_conf_read32(sbdf, pos);
648     uint64_t size;
649     bool is64bits = !(flags & PCI_BAR_ROM) &&
650         (bar & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == PCI_BASE_ADDRESS_MEM_TYPE_64;
651     uint32_t mask = (flags & PCI_BAR_ROM) ? (uint32_t)PCI_ROM_ADDRESS_MASK
652                                           : (uint32_t)PCI_BASE_ADDRESS_MEM_MASK;
653 
654     ASSERT(!((flags & PCI_BAR_VF) && (flags & PCI_BAR_ROM)));
655     ASSERT((flags & PCI_BAR_ROM) ||
656            (bar & PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_MEMORY);
657     pci_conf_write32(sbdf, pos, ~0);
658     if ( is64bits )
659     {
660         if ( flags & PCI_BAR_LAST )
661         {
662             printk(XENLOG_WARNING
663                    "%sdevice %04x:%02x:%02x.%u with 64-bit %sBAR in last slot\n",
664                    (flags & PCI_BAR_VF) ? "SR-IOV " : "", sbdf.seg, sbdf.bus,
665                    sbdf.dev, sbdf.fn, (flags & PCI_BAR_VF) ? "vf " : "");
666             *psize = 0;
667             return 1;
668         }
669         hi = pci_conf_read32(sbdf, pos + 4);
670         pci_conf_write32(sbdf, pos + 4, ~0);
671     }
672     size = pci_conf_read32(sbdf, pos) & mask;
673     if ( is64bits )
674     {
675         size |= (uint64_t)pci_conf_read32(sbdf, pos + 4) << 32;
676         pci_conf_write32(sbdf, pos + 4, hi);
677     }
678     else if ( size )
679         size |= (uint64_t)~0 << 32;
680     pci_conf_write32(sbdf, pos, bar);
681     size = -size;
682 
683     if ( paddr )
684         *paddr = (bar & mask) | ((uint64_t)hi << 32);
685     *psize = size;
686 
687     return is64bits ? 2 : 1;
688 }
689 
pci_add_device(u16 seg,u8 bus,u8 devfn,const struct pci_dev_info * info,nodeid_t node)690 int pci_add_device(u16 seg, u8 bus, u8 devfn,
691                    const struct pci_dev_info *info, nodeid_t node)
692 {
693     struct pci_seg *pseg;
694     struct pci_dev *pdev;
695     unsigned int slot = PCI_SLOT(devfn), func = PCI_FUNC(devfn);
696     const char *pdev_type;
697     int ret;
698     bool pf_is_extfn = false;
699 
700     if ( !info )
701         pdev_type = "device";
702     else if ( info->is_virtfn )
703     {
704         pcidevs_lock();
705         pdev = pci_get_pdev(seg, info->physfn.bus, info->physfn.devfn);
706         if ( pdev )
707             pf_is_extfn = pdev->info.is_extfn;
708         pcidevs_unlock();
709         if ( !pdev )
710             pci_add_device(seg, info->physfn.bus, info->physfn.devfn,
711                            NULL, node);
712         pdev_type = "virtual function";
713     }
714     else if ( info->is_extfn )
715         pdev_type = "extended function";
716     else
717         pdev_type = "device";
718 
719     ret = xsm_resource_plug_pci(XSM_PRIV, (seg << 16) | (bus << 8) | devfn);
720     if ( ret )
721         return ret;
722 
723     ret = -ENOMEM;
724 
725     pcidevs_lock();
726     pseg = alloc_pseg(seg);
727     if ( !pseg )
728         goto out;
729     pdev = alloc_pdev(pseg, bus, devfn);
730     if ( !pdev )
731         goto out;
732 
733     pdev->node = node;
734 
735     if ( info )
736     {
737         pdev->info = *info;
738         /*
739          * VF's 'is_extfn' field is used to indicate whether its PF is an
740          * extended function.
741          */
742         if ( pdev->info.is_virtfn )
743             pdev->info.is_extfn = pf_is_extfn;
744     }
745 
746     if ( !pdev->info.is_virtfn && !pdev->vf_rlen[0] )
747     {
748         unsigned int pos = pci_find_ext_capability(seg, bus, devfn,
749                                                    PCI_EXT_CAP_ID_SRIOV);
750         uint16_t ctrl = pci_conf_read16(pdev->sbdf, pos + PCI_SRIOV_CTRL);
751 
752         if ( !pos )
753             /* Nothing */;
754         else if ( !(ctrl & (PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE)) )
755         {
756             unsigned int i;
757 
758             BUILD_BUG_ON(ARRAY_SIZE(pdev->vf_rlen) != PCI_SRIOV_NUM_BARS);
759             for ( i = 0; i < PCI_SRIOV_NUM_BARS; )
760             {
761                 unsigned int idx = pos + PCI_SRIOV_BAR + i * 4;
762                 uint32_t bar = pci_conf_read32(pdev->sbdf, idx);
763 
764                 if ( (bar & PCI_BASE_ADDRESS_SPACE) ==
765                      PCI_BASE_ADDRESS_SPACE_IO )
766                 {
767                     printk(XENLOG_WARNING
768                            "SR-IOV device %04x:%02x:%02x.%u with vf BAR%u"
769                            " in IO space\n",
770                            seg, bus, slot, func, i);
771                     continue;
772                 }
773                 ret = pci_size_mem_bar(pdev->sbdf, idx, NULL,
774                                        &pdev->vf_rlen[i],
775                                        PCI_BAR_VF |
776                                        ((i == PCI_SRIOV_NUM_BARS - 1) ?
777                                         PCI_BAR_LAST : 0));
778                 ASSERT(ret);
779                 i += ret;
780             }
781         }
782         else
783             printk(XENLOG_WARNING
784                    "SR-IOV device %04x:%02x:%02x.%u has its virtual"
785                    " functions already enabled (%04x)\n",
786                    seg, bus, slot, func, ctrl);
787     }
788 
789     check_pdev(pdev);
790 
791     ret = 0;
792     if ( !pdev->domain )
793     {
794         pdev->domain = hardware_domain;
795         ret = iommu_add_device(pdev);
796         if ( ret )
797         {
798             pdev->domain = NULL;
799             goto out;
800         }
801 
802         list_add(&pdev->domain_list, &hardware_domain->pdev_list);
803     }
804     else
805         iommu_enable_device(pdev);
806 
807     pci_enable_acs(pdev);
808 
809 out:
810     pcidevs_unlock();
811     if ( !ret )
812     {
813         printk(XENLOG_DEBUG "PCI add %s %04x:%02x:%02x.%u\n", pdev_type,
814                seg, bus, slot, func);
815         while ( pdev->phantom_stride )
816         {
817             func += pdev->phantom_stride;
818             if ( PCI_SLOT(func) )
819                 break;
820             printk(XENLOG_DEBUG "PCI phantom %04x:%02x:%02x.%u\n",
821                    seg, bus, slot, func);
822         }
823     }
824     return ret;
825 }
826 
pci_remove_device(u16 seg,u8 bus,u8 devfn)827 int pci_remove_device(u16 seg, u8 bus, u8 devfn)
828 {
829     struct pci_seg *pseg = get_pseg(seg);
830     struct pci_dev *pdev;
831     int ret;
832 
833     ret = xsm_resource_unplug_pci(XSM_PRIV, (seg << 16) | (bus << 8) | devfn);
834     if ( ret )
835         return ret;
836 
837     ret = -ENODEV;
838 
839     if ( !pseg )
840         return -ENODEV;
841 
842     pcidevs_lock();
843     list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list )
844         if ( pdev->bus == bus && pdev->devfn == devfn )
845         {
846             pci_cleanup_msi(pdev);
847             ret = iommu_remove_device(pdev);
848             if ( pdev->domain )
849                 list_del(&pdev->domain_list);
850             free_pdev(pseg, pdev);
851             printk(XENLOG_DEBUG "PCI remove device %04x:%02x:%02x.%u\n",
852                    seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
853             break;
854         }
855 
856     pcidevs_unlock();
857     return ret;
858 }
859 
pci_clean_dpci_irq(struct domain * d,struct hvm_pirq_dpci * pirq_dpci,void * arg)860 static int pci_clean_dpci_irq(struct domain *d,
861                               struct hvm_pirq_dpci *pirq_dpci, void *arg)
862 {
863     struct dev_intx_gsi_link *digl, *tmp;
864 
865     pirq_guest_unbind(d, dpci_pirq(pirq_dpci));
866 
867     if ( pt_irq_need_timer(pirq_dpci->flags) )
868         kill_timer(&pirq_dpci->timer);
869 
870     list_for_each_entry_safe ( digl, tmp, &pirq_dpci->digl_list, list )
871     {
872         list_del(&digl->list);
873         xfree(digl);
874     }
875 
876     radix_tree_delete(&d->pirq_tree, dpci_pirq(pirq_dpci)->pirq);
877 
878     if ( !pt_pirq_softirq_active(pirq_dpci) )
879         return 0;
880 
881     domain_get_irq_dpci(d)->pending_pirq_dpci = pirq_dpci;
882 
883     return -ERESTART;
884 }
885 
pci_clean_dpci_irqs(struct domain * d)886 static int pci_clean_dpci_irqs(struct domain *d)
887 {
888     struct hvm_irq_dpci *hvm_irq_dpci = NULL;
889 
890     if ( !is_iommu_enabled(d) )
891         return 0;
892 
893     if ( !is_hvm_domain(d) )
894         return 0;
895 
896     spin_lock(&d->event_lock);
897     hvm_irq_dpci = domain_get_irq_dpci(d);
898     if ( hvm_irq_dpci != NULL )
899     {
900         int ret = 0;
901 
902         if ( hvm_irq_dpci->pending_pirq_dpci )
903         {
904             if ( pt_pirq_softirq_active(hvm_irq_dpci->pending_pirq_dpci) )
905                  ret = -ERESTART;
906             else
907                  hvm_irq_dpci->pending_pirq_dpci = NULL;
908         }
909 
910         if ( !ret )
911             ret = pt_pirq_iterate(d, pci_clean_dpci_irq, NULL);
912         if ( ret )
913         {
914             spin_unlock(&d->event_lock);
915             return ret;
916         }
917 
918         hvm_domain_irq(d)->dpci = NULL;
919         free_hvm_irq_dpci(hvm_irq_dpci);
920     }
921     spin_unlock(&d->event_lock);
922     return 0;
923 }
924 
925 /* Caller should hold the pcidevs_lock */
deassign_device(struct domain * d,uint16_t seg,uint8_t bus,uint8_t devfn)926 static int deassign_device(struct domain *d, uint16_t seg, uint8_t bus,
927                            uint8_t devfn)
928 {
929     const struct domain_iommu *hd = dom_iommu(d);
930     struct pci_dev *pdev;
931     struct domain *target;
932     int ret = 0;
933 
934     if ( !is_iommu_enabled(d) )
935         return -EINVAL;
936 
937     ASSERT(pcidevs_locked());
938     pdev = pci_get_pdev_by_domain(d, seg, bus, devfn);
939     if ( !pdev )
940         return -ENODEV;
941 
942     /* De-assignment from dom_io should de-quarantine the device */
943     target = ((pdev->quarantine || iommu_quarantine) &&
944               pdev->domain != dom_io) ?
945         dom_io : hardware_domain;
946 
947     while ( pdev->phantom_stride )
948     {
949         devfn += pdev->phantom_stride;
950         if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
951             break;
952         ret = hd->platform_ops->reassign_device(d, target, devfn,
953                                                 pci_to_dev(pdev));
954         if ( ret )
955             goto out;
956     }
957 
958     devfn = pdev->devfn;
959     ret = hd->platform_ops->reassign_device(d, target, devfn,
960                                             pci_to_dev(pdev));
961     if ( ret )
962         goto out;
963 
964     if ( pdev->domain == hardware_domain  )
965         pdev->quarantine = false;
966 
967     pdev->fault.count = 0;
968 
969  out:
970     if ( ret )
971         printk(XENLOG_G_ERR "%pd: deassign (%04x:%02x:%02x.%u) failed (%d)\n",
972                d, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), ret);
973 
974     return ret;
975 }
976 
pci_release_devices(struct domain * d)977 int pci_release_devices(struct domain *d)
978 {
979     struct pci_dev *pdev;
980     u8 bus, devfn;
981     int ret;
982 
983     pcidevs_lock();
984     ret = pci_clean_dpci_irqs(d);
985     if ( ret )
986     {
987         pcidevs_unlock();
988         return ret;
989     }
990     while ( (pdev = pci_get_pdev_by_domain(d, -1, -1, -1)) )
991     {
992         bus = pdev->bus;
993         devfn = pdev->devfn;
994         deassign_device(d, pdev->seg, bus, devfn);
995     }
996     pcidevs_unlock();
997 
998     return 0;
999 }
1000 
1001 #define PCI_CLASS_BRIDGE_HOST    0x0600
1002 #define PCI_CLASS_BRIDGE_PCI     0x0604
1003 
pdev_type(u16 seg,u8 bus,u8 devfn)1004 enum pdev_type pdev_type(u16 seg, u8 bus, u8 devfn)
1005 {
1006     u16 class_device, creg;
1007     u8 d = PCI_SLOT(devfn), f = PCI_FUNC(devfn);
1008     int pos = pci_find_cap_offset(seg, bus, d, f, PCI_CAP_ID_EXP);
1009 
1010     class_device = pci_conf_read16(PCI_SBDF(seg, bus, d, f), PCI_CLASS_DEVICE);
1011     switch ( class_device )
1012     {
1013     case PCI_CLASS_BRIDGE_PCI:
1014         if ( !pos )
1015             return DEV_TYPE_LEGACY_PCI_BRIDGE;
1016         creg = pci_conf_read16(PCI_SBDF(seg, bus, d, f), pos + PCI_EXP_FLAGS);
1017         switch ( (creg & PCI_EXP_FLAGS_TYPE) >> 4 )
1018         {
1019         case PCI_EXP_TYPE_PCI_BRIDGE:
1020             return DEV_TYPE_PCIe2PCI_BRIDGE;
1021         case PCI_EXP_TYPE_PCIE_BRIDGE:
1022             return DEV_TYPE_PCI2PCIe_BRIDGE;
1023         }
1024         return DEV_TYPE_PCIe_BRIDGE;
1025     case PCI_CLASS_BRIDGE_HOST:
1026         return DEV_TYPE_PCI_HOST_BRIDGE;
1027 
1028     case 0xffff:
1029         return DEV_TYPE_PCI_UNKNOWN;
1030     }
1031 
1032     /* NB: treat legacy pre PCI 2.0 devices (class_device == 0) as endpoints. */
1033     return pos ? DEV_TYPE_PCIe_ENDPOINT : DEV_TYPE_PCI;
1034 }
1035 
1036 /*
1037  * find the upstream PCIe-to-PCI/PCIX bridge or PCI legacy bridge
1038  * return 0: the device is integrated PCI device or PCIe
1039  * return 1: find PCIe-to-PCI/PCIX bridge or PCI legacy bridge
1040  * return -1: fail
1041  */
find_upstream_bridge(u16 seg,u8 * bus,u8 * devfn,u8 * secbus)1042 int find_upstream_bridge(u16 seg, u8 *bus, u8 *devfn, u8 *secbus)
1043 {
1044     struct pci_seg *pseg = get_pseg(seg);
1045     int ret = 0;
1046     int cnt = 0;
1047 
1048     if ( *bus == 0 )
1049         return 0;
1050 
1051     if ( !pseg )
1052         return -1;
1053 
1054     if ( !pseg->bus2bridge[*bus].map )
1055         return 0;
1056 
1057     ret = 1;
1058     spin_lock(&pseg->bus2bridge_lock);
1059     while ( pseg->bus2bridge[*bus].map )
1060     {
1061         *secbus = *bus;
1062         *devfn = pseg->bus2bridge[*bus].devfn;
1063         *bus = pseg->bus2bridge[*bus].bus;
1064         if ( cnt++ >= MAX_BUSES )
1065         {
1066             ret = -1;
1067             goto out;
1068         }
1069     }
1070 
1071 out:
1072     spin_unlock(&pseg->bus2bridge_lock);
1073     return ret;
1074 }
1075 
pci_device_detect(u16 seg,u8 bus,u8 dev,u8 func)1076 bool_t __init pci_device_detect(u16 seg, u8 bus, u8 dev, u8 func)
1077 {
1078     u32 vendor;
1079 
1080     vendor = pci_conf_read32(PCI_SBDF(seg, bus, dev, func), PCI_VENDOR_ID);
1081     /* some broken boards return 0 or ~0 if a slot is empty: */
1082     if ( (vendor == 0xffffffff) || (vendor == 0x00000000) ||
1083          (vendor == 0x0000ffff) || (vendor == 0xffff0000) )
1084         return 0;
1085     return 1;
1086 }
1087 
pci_check_disable_device(u16 seg,u8 bus,u8 devfn)1088 void pci_check_disable_device(u16 seg, u8 bus, u8 devfn)
1089 {
1090     struct pci_dev *pdev;
1091     s_time_t now = NOW();
1092     u16 cword;
1093 
1094     pcidevs_lock();
1095     pdev = pci_get_real_pdev(seg, bus, devfn);
1096     if ( pdev )
1097     {
1098         if ( now < pdev->fault.time ||
1099              now - pdev->fault.time > MILLISECS(10) )
1100             pdev->fault.count >>= 1;
1101         pdev->fault.time = now;
1102         if ( ++pdev->fault.count < PT_FAULT_THRESHOLD )
1103             pdev = NULL;
1104     }
1105     pcidevs_unlock();
1106 
1107     if ( !pdev )
1108         return;
1109 
1110     /* Tell the device to stop DMAing; we can't rely on the guest to
1111      * control it for us. */
1112     cword = pci_conf_read16(pdev->sbdf, PCI_COMMAND);
1113     pci_conf_write16(pdev->sbdf, PCI_COMMAND, cword & ~PCI_COMMAND_MASTER);
1114 }
1115 
1116 /*
1117  * scan pci devices to add all existed PCI devices to alldevs_list,
1118  * and setup pci hierarchy in array bus2bridge.
1119  */
_scan_pci_devices(struct pci_seg * pseg,void * arg)1120 static int __init _scan_pci_devices(struct pci_seg *pseg, void *arg)
1121 {
1122     struct pci_dev *pdev;
1123     int bus, dev, func;
1124 
1125     for ( bus = 0; bus < 256; bus++ )
1126     {
1127         for ( dev = 0; dev < 32; dev++ )
1128         {
1129             for ( func = 0; func < 8; func++ )
1130             {
1131                 if ( !pci_device_detect(pseg->nr, bus, dev, func) )
1132                 {
1133                     if ( !func )
1134                         break;
1135                     continue;
1136                 }
1137 
1138                 pdev = alloc_pdev(pseg, bus, PCI_DEVFN(dev, func));
1139                 if ( !pdev )
1140                 {
1141                     printk(XENLOG_WARNING "%04x:%02x:%02x.%u: alloc_pdev failed\n",
1142                            pseg->nr, bus, dev, func);
1143                     return -ENOMEM;
1144                 }
1145 
1146                 if ( !func && !(pci_conf_read8(PCI_SBDF(pseg->nr, bus, dev,
1147                                                         func),
1148                                                PCI_HEADER_TYPE) & 0x80) )
1149                     break;
1150             }
1151         }
1152     }
1153 
1154     return 0;
1155 }
1156 
scan_pci_devices(void)1157 int __init scan_pci_devices(void)
1158 {
1159     int ret;
1160 
1161     pcidevs_lock();
1162     ret = pci_segments_iterate(_scan_pci_devices, NULL);
1163     pcidevs_unlock();
1164 
1165     return ret;
1166 }
1167 
1168 struct setup_hwdom {
1169     struct domain *d;
1170     int (*handler)(u8 devfn, struct pci_dev *);
1171 };
1172 
setup_one_hwdom_device(const struct setup_hwdom * ctxt,struct pci_dev * pdev)1173 static void __hwdom_init setup_one_hwdom_device(const struct setup_hwdom *ctxt,
1174                                                 struct pci_dev *pdev)
1175 {
1176     u8 devfn = pdev->devfn;
1177     int err;
1178 
1179     do {
1180         err = ctxt->handler(devfn, pdev);
1181         if ( err )
1182         {
1183             printk(XENLOG_ERR "setup %04x:%02x:%02x.%u for d%d failed (%d)\n",
1184                    pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
1185                    ctxt->d->domain_id, err);
1186             if ( devfn == pdev->devfn )
1187                 return;
1188         }
1189         devfn += pdev->phantom_stride;
1190     } while ( devfn != pdev->devfn &&
1191               PCI_SLOT(devfn) == PCI_SLOT(pdev->devfn) );
1192 
1193     err = vpci_add_handlers(pdev);
1194     if ( err )
1195         printk(XENLOG_ERR "setup of vPCI for d%d failed: %d\n",
1196                ctxt->d->domain_id, err);
1197 }
1198 
_setup_hwdom_pci_devices(struct pci_seg * pseg,void * arg)1199 static int __hwdom_init _setup_hwdom_pci_devices(struct pci_seg *pseg, void *arg)
1200 {
1201     struct setup_hwdom *ctxt = arg;
1202     int bus, devfn;
1203 
1204     for ( bus = 0; bus < 256; bus++ )
1205     {
1206         for ( devfn = 0; devfn < 256; devfn++ )
1207         {
1208             struct pci_dev *pdev = pci_get_pdev(pseg->nr, bus, devfn);
1209 
1210             if ( !pdev )
1211                 continue;
1212 
1213             if ( !pdev->domain )
1214             {
1215                 pdev->domain = ctxt->d;
1216                 list_add(&pdev->domain_list, &ctxt->d->pdev_list);
1217                 setup_one_hwdom_device(ctxt, pdev);
1218             }
1219             else if ( pdev->domain == dom_xen )
1220             {
1221                 pdev->domain = ctxt->d;
1222                 setup_one_hwdom_device(ctxt, pdev);
1223                 pdev->domain = dom_xen;
1224             }
1225             else if ( pdev->domain != ctxt->d )
1226                 printk(XENLOG_WARNING "Dom%d owning %04x:%02x:%02x.%u?\n",
1227                        pdev->domain->domain_id, pseg->nr, bus,
1228                        PCI_SLOT(devfn), PCI_FUNC(devfn));
1229 
1230             if ( iommu_verbose )
1231             {
1232                 pcidevs_unlock();
1233                 process_pending_softirqs();
1234                 pcidevs_lock();
1235             }
1236         }
1237 
1238         if ( !iommu_verbose )
1239         {
1240             pcidevs_unlock();
1241             process_pending_softirqs();
1242             pcidevs_lock();
1243         }
1244     }
1245 
1246     return 0;
1247 }
1248 
setup_hwdom_pci_devices(struct domain * d,int (* handler)(u8 devfn,struct pci_dev *))1249 void __hwdom_init setup_hwdom_pci_devices(
1250     struct domain *d, int (*handler)(u8 devfn, struct pci_dev *))
1251 {
1252     struct setup_hwdom ctxt = { .d = d, .handler = handler };
1253 
1254     pcidevs_lock();
1255     pci_segments_iterate(_setup_hwdom_pci_devices, &ctxt);
1256     pcidevs_unlock();
1257 }
1258 
1259 #ifdef CONFIG_ACPI
1260 #include <acpi/acpi.h>
1261 #include <acpi/apei.h>
1262 
hest_match_pci(const struct acpi_hest_aer_common * p,const struct pci_dev * pdev)1263 static int hest_match_pci(const struct acpi_hest_aer_common *p,
1264                           const struct pci_dev *pdev)
1265 {
1266     return ACPI_HEST_SEGMENT(p->bus) == pdev->seg &&
1267            ACPI_HEST_BUS(p->bus)     == pdev->bus &&
1268            p->device                 == PCI_SLOT(pdev->devfn) &&
1269            p->function               == PCI_FUNC(pdev->devfn);
1270 }
1271 
hest_match_type(const struct acpi_hest_header * hest_hdr,const struct pci_dev * pdev)1272 static bool_t hest_match_type(const struct acpi_hest_header *hest_hdr,
1273                               const struct pci_dev *pdev)
1274 {
1275     unsigned int pos = pci_find_cap_offset(pdev->seg, pdev->bus,
1276                                            PCI_SLOT(pdev->devfn),
1277                                            PCI_FUNC(pdev->devfn),
1278                                            PCI_CAP_ID_EXP);
1279     u8 pcie = MASK_EXTR(pci_conf_read16(pdev->sbdf, pos + PCI_EXP_FLAGS),
1280                         PCI_EXP_FLAGS_TYPE);
1281 
1282     switch ( hest_hdr->type )
1283     {
1284     case ACPI_HEST_TYPE_AER_ROOT_PORT:
1285         return pcie == PCI_EXP_TYPE_ROOT_PORT;
1286     case ACPI_HEST_TYPE_AER_ENDPOINT:
1287         return pcie == PCI_EXP_TYPE_ENDPOINT;
1288     case ACPI_HEST_TYPE_AER_BRIDGE:
1289         return pci_conf_read16(pdev->sbdf, PCI_CLASS_DEVICE) ==
1290                PCI_CLASS_BRIDGE_PCI;
1291     }
1292 
1293     return 0;
1294 }
1295 
1296 struct aer_hest_parse_info {
1297     const struct pci_dev *pdev;
1298     bool_t firmware_first;
1299 };
1300 
hest_source_is_pcie_aer(const struct acpi_hest_header * hest_hdr)1301 static bool_t hest_source_is_pcie_aer(const struct acpi_hest_header *hest_hdr)
1302 {
1303     if ( hest_hdr->type == ACPI_HEST_TYPE_AER_ROOT_PORT ||
1304          hest_hdr->type == ACPI_HEST_TYPE_AER_ENDPOINT ||
1305          hest_hdr->type == ACPI_HEST_TYPE_AER_BRIDGE )
1306         return 1;
1307     return 0;
1308 }
1309 
aer_hest_parse(const struct acpi_hest_header * hest_hdr,void * data)1310 static int aer_hest_parse(const struct acpi_hest_header *hest_hdr, void *data)
1311 {
1312     struct aer_hest_parse_info *info = data;
1313     const struct acpi_hest_aer_common *p;
1314     bool_t ff;
1315 
1316     if ( !hest_source_is_pcie_aer(hest_hdr) )
1317         return 0;
1318 
1319     p = (const struct acpi_hest_aer_common *)(hest_hdr + 1);
1320     ff = !!(p->flags & ACPI_HEST_FIRMWARE_FIRST);
1321 
1322     /*
1323      * If no specific device is supplied, determine whether
1324      * FIRMWARE_FIRST is set for *any* PCIe device.
1325      */
1326     if ( !info->pdev )
1327     {
1328         info->firmware_first |= ff;
1329         return 0;
1330     }
1331 
1332     /* Otherwise, check the specific device */
1333     if ( p->flags & ACPI_HEST_GLOBAL ?
1334          hest_match_type(hest_hdr, info->pdev) :
1335          hest_match_pci(p, info->pdev) )
1336     {
1337         info->firmware_first = ff;
1338         return 1;
1339     }
1340 
1341     return 0;
1342 }
1343 
pcie_aer_get_firmware_first(const struct pci_dev * pdev)1344 bool_t pcie_aer_get_firmware_first(const struct pci_dev *pdev)
1345 {
1346     struct aer_hest_parse_info info = { .pdev = pdev };
1347 
1348     return pci_find_cap_offset(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
1349                                PCI_FUNC(pdev->devfn), PCI_CAP_ID_EXP) &&
1350            apei_hest_parse(aer_hest_parse, &info) >= 0 &&
1351            info.firmware_first;
1352 }
1353 #endif
1354 
_dump_pci_devices(struct pci_seg * pseg,void * arg)1355 static int _dump_pci_devices(struct pci_seg *pseg, void *arg)
1356 {
1357     struct pci_dev *pdev;
1358     struct msi_desc *msi;
1359 
1360     printk("==== segment %04x ====\n", pseg->nr);
1361 
1362     list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list )
1363     {
1364         printk("%04x:%02x:%02x.%u - %pd - node %-3d - MSIs < ",
1365                pseg->nr, pdev->bus,
1366                PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn), pdev->domain,
1367                (pdev->node != NUMA_NO_NODE) ? pdev->node : -1);
1368         list_for_each_entry ( msi, &pdev->msi_list, list )
1369                printk("%d ", msi->irq);
1370         printk(">\n");
1371     }
1372 
1373     return 0;
1374 }
1375 
dump_pci_devices(unsigned char ch)1376 static void dump_pci_devices(unsigned char ch)
1377 {
1378     printk("==== PCI devices ====\n");
1379     pcidevs_lock();
1380     pci_segments_iterate(_dump_pci_devices, NULL);
1381     pcidevs_unlock();
1382 }
1383 
setup_dump_pcidevs(void)1384 static int __init setup_dump_pcidevs(void)
1385 {
1386     register_keyhandler('Q', dump_pci_devices, "dump PCI devices", 1);
1387     return 0;
1388 }
1389 __initcall(setup_dump_pcidevs);
1390 
iommu_update_ire_from_msi(struct msi_desc * msi_desc,struct msi_msg * msg)1391 int iommu_update_ire_from_msi(
1392     struct msi_desc *msi_desc, struct msi_msg *msg)
1393 {
1394     return iommu_intremap
1395            ? iommu_call(&iommu_ops, update_ire_from_msi, msi_desc, msg) : 0;
1396 }
1397 
iommu_read_msi_from_ire(struct msi_desc * msi_desc,struct msi_msg * msg)1398 void iommu_read_msi_from_ire(
1399     struct msi_desc *msi_desc, struct msi_msg *msg)
1400 {
1401     if ( iommu_intremap )
1402         iommu_vcall(&iommu_ops, read_msi_from_ire, msi_desc, msg);
1403 }
1404 
iommu_add_device(struct pci_dev * pdev)1405 static int iommu_add_device(struct pci_dev *pdev)
1406 {
1407     const struct domain_iommu *hd;
1408     int rc;
1409     u8 devfn;
1410 
1411     if ( !pdev->domain )
1412         return -EINVAL;
1413 
1414     ASSERT(pcidevs_locked());
1415 
1416     hd = dom_iommu(pdev->domain);
1417     if ( !is_iommu_enabled(pdev->domain) )
1418         return 0;
1419 
1420     rc = hd->platform_ops->add_device(pdev->devfn, pci_to_dev(pdev));
1421     if ( rc || !pdev->phantom_stride )
1422         return rc;
1423 
1424     for ( devfn = pdev->devfn ; ; )
1425     {
1426         devfn += pdev->phantom_stride;
1427         if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
1428             return 0;
1429         rc = hd->platform_ops->add_device(devfn, pci_to_dev(pdev));
1430         if ( rc )
1431             printk(XENLOG_WARNING "IOMMU: add %04x:%02x:%02x.%u failed (%d)\n",
1432                    pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn), rc);
1433     }
1434 }
1435 
iommu_enable_device(struct pci_dev * pdev)1436 static int iommu_enable_device(struct pci_dev *pdev)
1437 {
1438     const struct domain_iommu *hd;
1439 
1440     if ( !pdev->domain )
1441         return -EINVAL;
1442 
1443     ASSERT(pcidevs_locked());
1444 
1445     hd = dom_iommu(pdev->domain);
1446     if ( !is_iommu_enabled(pdev->domain) ||
1447          !hd->platform_ops->enable_device )
1448         return 0;
1449 
1450     return hd->platform_ops->enable_device(pci_to_dev(pdev));
1451 }
1452 
iommu_remove_device(struct pci_dev * pdev)1453 static int iommu_remove_device(struct pci_dev *pdev)
1454 {
1455     const struct domain_iommu *hd;
1456     u8 devfn;
1457 
1458     if ( !pdev->domain )
1459         return -EINVAL;
1460 
1461     hd = dom_iommu(pdev->domain);
1462     if ( !is_iommu_enabled(pdev->domain) )
1463         return 0;
1464 
1465     for ( devfn = pdev->devfn ; pdev->phantom_stride; )
1466     {
1467         int rc;
1468 
1469         devfn += pdev->phantom_stride;
1470         if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
1471             break;
1472         rc = hd->platform_ops->remove_device(devfn, pci_to_dev(pdev));
1473         if ( !rc )
1474             continue;
1475 
1476         printk(XENLOG_ERR "IOMMU: remove %04x:%02x:%02x.%u failed (%d)\n",
1477                pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn), rc);
1478         return rc;
1479     }
1480 
1481     return hd->platform_ops->remove_device(pdev->devfn, pci_to_dev(pdev));
1482 }
1483 
device_assigned(u16 seg,u8 bus,u8 devfn)1484 static int device_assigned(u16 seg, u8 bus, u8 devfn)
1485 {
1486     struct pci_dev *pdev;
1487     int rc = 0;
1488 
1489     ASSERT(pcidevs_locked());
1490     pdev = pci_get_pdev(seg, bus, devfn);
1491 
1492     if ( !pdev )
1493         rc = -ENODEV;
1494     /*
1495      * If the device exists and it is not owned by either the hardware
1496      * domain or dom_io then it must be assigned to a guest, or be
1497      * hidden (owned by dom_xen).
1498      */
1499     else if ( pdev->domain != hardware_domain &&
1500               pdev->domain != dom_io )
1501         rc = -EBUSY;
1502 
1503     return rc;
1504 }
1505 
1506 /* Caller should hold the pcidevs_lock */
assign_device(struct domain * d,u16 seg,u8 bus,u8 devfn,u32 flag)1507 static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag)
1508 {
1509     const struct domain_iommu *hd = dom_iommu(d);
1510     struct pci_dev *pdev;
1511     int rc = 0;
1512 
1513     if ( !is_iommu_enabled(d) )
1514         return 0;
1515 
1516     /* Prevent device assign if mem paging or mem sharing have been
1517      * enabled for this domain */
1518     if ( d != dom_io &&
1519          unlikely(mem_sharing_enabled(d) ||
1520                   vm_event_check_ring(d->vm_event_paging) ||
1521                   p2m_get_hostp2m(d)->global_logdirty) )
1522         return -EXDEV;
1523 
1524     /* device_assigned() should already have cleared the device for assignment */
1525     ASSERT(pcidevs_locked());
1526     pdev = pci_get_pdev(seg, bus, devfn);
1527     ASSERT(pdev && (pdev->domain == hardware_domain ||
1528                     pdev->domain == dom_io));
1529 
1530     if ( pdev->msix )
1531     {
1532         rc = pci_reset_msix_state(pdev);
1533         if ( rc )
1534             goto done;
1535         msixtbl_init(d);
1536     }
1537 
1538     pdev->fault.count = 0;
1539 
1540     if ( (rc = hd->platform_ops->assign_device(d, devfn, pci_to_dev(pdev), flag)) )
1541         goto done;
1542 
1543     for ( ; pdev->phantom_stride; rc = 0 )
1544     {
1545         devfn += pdev->phantom_stride;
1546         if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
1547             break;
1548         rc = hd->platform_ops->assign_device(d, devfn, pci_to_dev(pdev), flag);
1549     }
1550 
1551  done:
1552     if ( rc )
1553         printk(XENLOG_G_WARNING "%pd: assign (%04x:%02x:%02x.%u) failed (%d)\n",
1554                d, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), rc);
1555     /* The device is assigned to dom_io so mark it as quarantined */
1556     else if ( d == dom_io )
1557         pdev->quarantine = true;
1558 
1559     return rc;
1560 }
1561 
iommu_get_device_group(struct domain * d,u16 seg,u8 bus,u8 devfn,XEN_GUEST_HANDLE_64 (uint32)buf,int max_sdevs)1562 static int iommu_get_device_group(
1563     struct domain *d, u16 seg, u8 bus, u8 devfn,
1564     XEN_GUEST_HANDLE_64(uint32) buf, int max_sdevs)
1565 {
1566     const struct domain_iommu *hd = dom_iommu(d);
1567     struct pci_dev *pdev;
1568     int group_id, sdev_id;
1569     u32 bdf;
1570     int i = 0;
1571     const struct iommu_ops *ops = hd->platform_ops;
1572 
1573     if ( !is_iommu_enabled(d) || !ops->get_device_group_id )
1574         return 0;
1575 
1576     group_id = ops->get_device_group_id(seg, bus, devfn);
1577 
1578     pcidevs_lock();
1579     for_each_pdev( d, pdev )
1580     {
1581         if ( (pdev->seg != seg) ||
1582              ((pdev->bus == bus) && (pdev->devfn == devfn)) )
1583             continue;
1584 
1585         if ( xsm_get_device_group(XSM_HOOK, (seg << 16) | (pdev->bus << 8) | pdev->devfn) )
1586             continue;
1587 
1588         sdev_id = ops->get_device_group_id(seg, pdev->bus, pdev->devfn);
1589         if ( (sdev_id == group_id) && (i < max_sdevs) )
1590         {
1591             bdf = 0;
1592             bdf |= (pdev->bus & 0xff) << 16;
1593             bdf |= (pdev->devfn & 0xff) << 8;
1594 
1595             if ( unlikely(copy_to_guest_offset(buf, i, &bdf, 1)) )
1596             {
1597                 pcidevs_unlock();
1598                 return -1;
1599             }
1600             i++;
1601         }
1602     }
1603 
1604     pcidevs_unlock();
1605 
1606     return i;
1607 }
1608 
iommu_dev_iotlb_flush_timeout(struct domain * d,struct pci_dev * pdev)1609 void iommu_dev_iotlb_flush_timeout(struct domain *d, struct pci_dev *pdev)
1610 {
1611     pcidevs_lock();
1612 
1613     disable_ats_device(pdev);
1614 
1615     ASSERT(pdev->domain);
1616     if ( d != pdev->domain )
1617     {
1618         pcidevs_unlock();
1619         return;
1620     }
1621 
1622     list_del(&pdev->domain_list);
1623     pdev->domain = NULL;
1624     _pci_hide_device(pdev);
1625 
1626     if ( !d->is_shutting_down && printk_ratelimit() )
1627         printk(XENLOG_ERR
1628                "dom%d: ATS device %04x:%02x:%02x.%u flush failed\n",
1629                d->domain_id, pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
1630                PCI_FUNC(pdev->devfn));
1631     if ( !is_hardware_domain(d) )
1632         domain_crash(d);
1633 
1634     pcidevs_unlock();
1635 }
1636 
iommu_do_pci_domctl(struct xen_domctl * domctl,struct domain * d,XEN_GUEST_HANDLE_PARAM (xen_domctl_t)u_domctl)1637 int iommu_do_pci_domctl(
1638     struct xen_domctl *domctl, struct domain *d,
1639     XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
1640 {
1641     u16 seg;
1642     u8 bus, devfn;
1643     int ret = 0;
1644     uint32_t machine_sbdf;
1645 
1646     switch ( domctl->cmd )
1647     {
1648         unsigned int flags;
1649 
1650     case XEN_DOMCTL_get_device_group:
1651     {
1652         u32 max_sdevs;
1653         XEN_GUEST_HANDLE_64(uint32) sdevs;
1654 
1655         ret = xsm_get_device_group(XSM_HOOK, domctl->u.get_device_group.machine_sbdf);
1656         if ( ret )
1657             break;
1658 
1659         seg = domctl->u.get_device_group.machine_sbdf >> 16;
1660         bus = PCI_BUS(domctl->u.get_device_group.machine_sbdf);
1661         devfn = PCI_DEVFN2(domctl->u.get_device_group.machine_sbdf);
1662         max_sdevs = domctl->u.get_device_group.max_sdevs;
1663         sdevs = domctl->u.get_device_group.sdev_array;
1664 
1665         ret = iommu_get_device_group(d, seg, bus, devfn, sdevs, max_sdevs);
1666         if ( ret < 0 )
1667         {
1668             dprintk(XENLOG_ERR, "iommu_get_device_group() failed!\n");
1669             ret = -EFAULT;
1670             domctl->u.get_device_group.num_sdevs = 0;
1671         }
1672         else
1673         {
1674             domctl->u.get_device_group.num_sdevs = ret;
1675             ret = 0;
1676         }
1677         if ( __copy_field_to_guest(u_domctl, domctl, u.get_device_group) )
1678             ret = -EFAULT;
1679     }
1680     break;
1681 
1682     case XEN_DOMCTL_assign_device:
1683         ASSERT(d);
1684         /* fall through */
1685     case XEN_DOMCTL_test_assign_device:
1686         /* Don't support self-assignment of devices. */
1687         if ( d == current->domain )
1688         {
1689             ret = -EINVAL;
1690             break;
1691         }
1692 
1693         ret = -ENODEV;
1694         if ( domctl->u.assign_device.dev != XEN_DOMCTL_DEV_PCI )
1695             break;
1696 
1697         ret = -EINVAL;
1698         flags = domctl->u.assign_device.flags;
1699         if ( domctl->cmd == XEN_DOMCTL_assign_device
1700              ? d->is_dying || (flags & ~XEN_DOMCTL_DEV_RDM_RELAXED)
1701              : flags )
1702             break;
1703 
1704         machine_sbdf = domctl->u.assign_device.u.pci.machine_sbdf;
1705 
1706         ret = xsm_assign_device(XSM_HOOK, d, machine_sbdf);
1707         if ( ret )
1708             break;
1709 
1710         seg = machine_sbdf >> 16;
1711         bus = PCI_BUS(machine_sbdf);
1712         devfn = PCI_DEVFN2(machine_sbdf);
1713 
1714         pcidevs_lock();
1715         ret = device_assigned(seg, bus, devfn);
1716         if ( domctl->cmd == XEN_DOMCTL_test_assign_device )
1717         {
1718             if ( ret )
1719             {
1720                 printk(XENLOG_G_INFO
1721                        "%04x:%02x:%02x.%u already assigned, or non-existent\n",
1722                        seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1723                 ret = -EINVAL;
1724             }
1725         }
1726         else if ( !ret )
1727             ret = assign_device(d, seg, bus, devfn, flags);
1728         pcidevs_unlock();
1729         if ( ret == -ERESTART )
1730             ret = hypercall_create_continuation(__HYPERVISOR_domctl,
1731                                                 "h", u_domctl);
1732         break;
1733 
1734     case XEN_DOMCTL_deassign_device:
1735         /* Don't support self-deassignment of devices. */
1736         if ( d == current->domain )
1737         {
1738             ret = -EINVAL;
1739             break;
1740         }
1741 
1742         ret = -ENODEV;
1743         if ( domctl->u.assign_device.dev != XEN_DOMCTL_DEV_PCI )
1744             break;
1745 
1746         ret = -EINVAL;
1747         if ( domctl->u.assign_device.flags )
1748             break;
1749 
1750         machine_sbdf = domctl->u.assign_device.u.pci.machine_sbdf;
1751 
1752         ret = xsm_deassign_device(XSM_HOOK, d, machine_sbdf);
1753         if ( ret )
1754             break;
1755 
1756         seg = machine_sbdf >> 16;
1757         bus = PCI_BUS(machine_sbdf);
1758         devfn = PCI_DEVFN2(machine_sbdf);
1759 
1760         pcidevs_lock();
1761         ret = deassign_device(d, seg, bus, devfn);
1762         pcidevs_unlock();
1763         break;
1764 
1765     default:
1766         ret = -ENOSYS;
1767         break;
1768     }
1769 
1770     return ret;
1771 }
1772 
1773 /*
1774  * Local variables:
1775  * mode: C
1776  * c-file-style: "BSD"
1777  * c-basic-offset: 4
1778  * indent-tabs-mode: nil
1779  * End:
1780  */
1781