1 /*
2 * Copyright (C) 2008, Netronome Systems, Inc.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; If not, see <http://www.gnu.org/licenses/>.
15 */
16
17 #include <xen/sched.h>
18 #include <xen/pci.h>
19 #include <xen/pci_regs.h>
20 #include <xen/pci_ids.h>
21 #include <xen/list.h>
22 #include <xen/prefetch.h>
23 #include <xen/iommu.h>
24 #include <xen/irq.h>
25 #include <xen/param.h>
26 #include <xen/vm_event.h>
27 #include <asm/hvm/irq.h>
28 #include <xen/delay.h>
29 #include <xen/keyhandler.h>
30 #include <xen/event.h>
31 #include <xen/guest_access.h>
32 #include <xen/paging.h>
33 #include <xen/radix-tree.h>
34 #include <xen/softirq.h>
35 #include <xen/tasklet.h>
36 #include <xen/vpci.h>
37 #include <xsm/xsm.h>
38 #include <asm/msi.h>
39 #include "ats.h"
40
41 struct pci_seg {
42 struct list_head alldevs_list;
43 u16 nr;
44 unsigned long *ro_map;
45 /* bus2bridge_lock protects bus2bridge array */
46 spinlock_t bus2bridge_lock;
47 #define MAX_BUSES 256
48 struct {
49 u8 map;
50 u8 bus;
51 u8 devfn;
52 } bus2bridge[MAX_BUSES];
53 };
54
55 static spinlock_t _pcidevs_lock = SPIN_LOCK_UNLOCKED;
56
pcidevs_lock(void)57 void pcidevs_lock(void)
58 {
59 spin_lock_recursive(&_pcidevs_lock);
60 }
61
pcidevs_unlock(void)62 void pcidevs_unlock(void)
63 {
64 spin_unlock_recursive(&_pcidevs_lock);
65 }
66
pcidevs_locked(void)67 bool_t pcidevs_locked(void)
68 {
69 return !!spin_is_locked(&_pcidevs_lock);
70 }
71
pcidevs_trylock(void)72 bool_t pcidevs_trylock(void)
73 {
74 return !!spin_trylock_recursive(&_pcidevs_lock);
75 }
76
77 static struct radix_tree_root pci_segments;
78
get_pseg(u16 seg)79 static inline struct pci_seg *get_pseg(u16 seg)
80 {
81 return radix_tree_lookup(&pci_segments, seg);
82 }
83
pci_known_segment(u16 seg)84 bool_t pci_known_segment(u16 seg)
85 {
86 return get_pseg(seg) != NULL;
87 }
88
alloc_pseg(u16 seg)89 static struct pci_seg *alloc_pseg(u16 seg)
90 {
91 struct pci_seg *pseg = get_pseg(seg);
92
93 if ( pseg )
94 return pseg;
95
96 pseg = xzalloc(struct pci_seg);
97 if ( !pseg )
98 return NULL;
99
100 pseg->nr = seg;
101 INIT_LIST_HEAD(&pseg->alldevs_list);
102 spin_lock_init(&pseg->bus2bridge_lock);
103
104 if ( radix_tree_insert(&pci_segments, seg, pseg) )
105 {
106 xfree(pseg);
107 pseg = NULL;
108 }
109
110 return pseg;
111 }
112
pci_segments_iterate(int (* handler)(struct pci_seg *,void *),void * arg)113 static int pci_segments_iterate(
114 int (*handler)(struct pci_seg *, void *), void *arg)
115 {
116 u16 seg = 0;
117 int rc = 0;
118
119 do {
120 struct pci_seg *pseg;
121
122 if ( !radix_tree_gang_lookup(&pci_segments, (void **)&pseg, seg, 1) )
123 break;
124 rc = handler(pseg, arg);
125 seg = pseg->nr + 1;
126 } while (!rc && seg);
127
128 return rc;
129 }
130
pci_segments_init(void)131 void __init pci_segments_init(void)
132 {
133 radix_tree_init(&pci_segments);
134 if ( !alloc_pseg(0) )
135 panic("Could not initialize PCI segment 0\n");
136 }
137
pci_add_segment(u16 seg)138 int __init pci_add_segment(u16 seg)
139 {
140 return alloc_pseg(seg) ? 0 : -ENOMEM;
141 }
142
pci_get_ro_map(u16 seg)143 const unsigned long *pci_get_ro_map(u16 seg)
144 {
145 struct pci_seg *pseg = get_pseg(seg);
146
147 return pseg ? pseg->ro_map : NULL;
148 }
149
150 static struct phantom_dev {
151 u16 seg;
152 u8 bus, slot, stride;
153 } phantom_devs[8];
154 static unsigned int nr_phantom_devs;
155
parse_phantom_dev(const char * str)156 static int __init parse_phantom_dev(const char *str)
157 {
158 const char *s;
159 unsigned int seg, bus, slot;
160 struct phantom_dev phantom;
161
162 if ( !*str )
163 return -EINVAL;
164 if ( nr_phantom_devs >= ARRAY_SIZE(phantom_devs) )
165 return -E2BIG;
166
167 s = parse_pci(str, &seg, &bus, &slot, NULL);
168 if ( !s || *s != ',' )
169 return -EINVAL;
170
171 phantom.seg = seg;
172 phantom.bus = bus;
173 phantom.slot = slot;
174
175 switch ( phantom.stride = simple_strtol(s + 1, &s, 0) )
176 {
177 case 1: case 2: case 4:
178 if ( *s )
179 default:
180 return -EINVAL;
181 }
182
183 phantom_devs[nr_phantom_devs++] = phantom;
184
185 return 0;
186 }
187 custom_param("pci-phantom", parse_phantom_dev);
188
189 static u16 __read_mostly command_mask;
190 static u16 __read_mostly bridge_ctl_mask;
191
parse_pci_param(const char * s)192 static int __init parse_pci_param(const char *s)
193 {
194 const char *ss;
195 int rc = 0;
196
197 do {
198 int val;
199 u16 cmd_mask = 0, brctl_mask = 0;
200
201 ss = strchr(s, ',');
202 if ( !ss )
203 ss = strchr(s, '\0');
204
205 if ( (val = parse_boolean("serr", s, ss)) >= 0 )
206 {
207 cmd_mask = PCI_COMMAND_SERR;
208 brctl_mask = PCI_BRIDGE_CTL_SERR | PCI_BRIDGE_CTL_DTMR_SERR;
209 }
210 else if ( (val = parse_boolean("perr", s, ss)) >= 0 )
211 {
212 cmd_mask = PCI_COMMAND_PARITY;
213 brctl_mask = PCI_BRIDGE_CTL_PARITY;
214 }
215 else
216 rc = -EINVAL;
217
218 if ( val )
219 {
220 command_mask &= ~cmd_mask;
221 bridge_ctl_mask &= ~brctl_mask;
222 }
223 else
224 {
225 command_mask |= cmd_mask;
226 bridge_ctl_mask |= brctl_mask;
227 }
228
229 s = ss + 1;
230 } while ( *ss );
231
232 return rc;
233 }
234 custom_param("pci", parse_pci_param);
235
check_pdev(const struct pci_dev * pdev)236 static void check_pdev(const struct pci_dev *pdev)
237 {
238 #define PCI_STATUS_CHECK \
239 (PCI_STATUS_PARITY | PCI_STATUS_SIG_TARGET_ABORT | \
240 PCI_STATUS_REC_TARGET_ABORT | PCI_STATUS_REC_MASTER_ABORT | \
241 PCI_STATUS_SIG_SYSTEM_ERROR | PCI_STATUS_DETECTED_PARITY)
242 u16 seg = pdev->seg;
243 u8 bus = pdev->bus;
244 u8 dev = PCI_SLOT(pdev->devfn);
245 u8 func = PCI_FUNC(pdev->devfn);
246 u16 val;
247
248 if ( command_mask )
249 {
250 val = pci_conf_read16(pdev->sbdf, PCI_COMMAND);
251 if ( val & command_mask )
252 pci_conf_write16(pdev->sbdf, PCI_COMMAND, val & ~command_mask);
253 val = pci_conf_read16(pdev->sbdf, PCI_STATUS);
254 if ( val & PCI_STATUS_CHECK )
255 {
256 printk(XENLOG_INFO "%04x:%02x:%02x.%u status %04x -> %04x\n",
257 seg, bus, dev, func, val, val & ~PCI_STATUS_CHECK);
258 pci_conf_write16(pdev->sbdf, PCI_STATUS, val & PCI_STATUS_CHECK);
259 }
260 }
261
262 switch ( pci_conf_read8(pdev->sbdf, PCI_HEADER_TYPE) & 0x7f )
263 {
264 case PCI_HEADER_TYPE_BRIDGE:
265 if ( !bridge_ctl_mask )
266 break;
267 val = pci_conf_read16(pdev->sbdf, PCI_BRIDGE_CONTROL);
268 if ( val & bridge_ctl_mask )
269 pci_conf_write16(pdev->sbdf, PCI_BRIDGE_CONTROL,
270 val & ~bridge_ctl_mask);
271 val = pci_conf_read16(pdev->sbdf, PCI_SEC_STATUS);
272 if ( val & PCI_STATUS_CHECK )
273 {
274 printk(XENLOG_INFO
275 "%04x:%02x:%02x.%u secondary status %04x -> %04x\n",
276 seg, bus, dev, func, val, val & ~PCI_STATUS_CHECK);
277 pci_conf_write16(pdev->sbdf, PCI_SEC_STATUS,
278 val & PCI_STATUS_CHECK);
279 }
280 break;
281
282 case PCI_HEADER_TYPE_CARDBUS:
283 /* TODO */
284 break;
285 }
286 #undef PCI_STATUS_CHECK
287 }
288
apply_quirks(struct pci_dev * pdev)289 static void apply_quirks(struct pci_dev *pdev)
290 {
291 uint16_t vendor = pci_conf_read16(pdev->sbdf, PCI_VENDOR_ID);
292 uint16_t device = pci_conf_read16(pdev->sbdf, PCI_DEVICE_ID);
293 static const struct {
294 uint16_t vendor, device;
295 } ignore_bars[] = {
296 /*
297 * Device [8086:2fc0]
298 * Erratum HSE43
299 * CONFIG_TDP_NOMINAL CSR Implemented at Incorrect Offset
300 * http://www.intel.com/content/www/us/en/processors/xeon/xeon-e5-v3-spec-update.html
301 */
302 { PCI_VENDOR_ID_INTEL, 0x2fc0 },
303 /*
304 * Devices [8086:6f60,6fa0,6fc0]
305 * Errata BDF2 / BDX2
306 * PCI BARs in the Home Agent Will Return Non-Zero Values During Enumeration
307 * http://www.intel.com/content/www/us/en/processors/xeon/xeon-e5-v4-spec-update.html
308 */
309 { PCI_VENDOR_ID_INTEL, 0x6f60 },
310 { PCI_VENDOR_ID_INTEL, 0x6fa0 },
311 { PCI_VENDOR_ID_INTEL, 0x6fc0 },
312 };
313 unsigned int i;
314
315 for ( i = 0; i < ARRAY_SIZE(ignore_bars); i++)
316 if ( vendor == ignore_bars[i].vendor &&
317 device == ignore_bars[i].device )
318 /*
319 * For these errata force ignoring the BARs, which prevents vPCI
320 * from trying to size the BARs or add handlers to trap accesses.
321 */
322 pdev->ignore_bars = true;
323 }
324
alloc_pdev(struct pci_seg * pseg,u8 bus,u8 devfn)325 static struct pci_dev *alloc_pdev(struct pci_seg *pseg, u8 bus, u8 devfn)
326 {
327 struct pci_dev *pdev;
328 unsigned int pos;
329
330 list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list )
331 if ( pdev->bus == bus && pdev->devfn == devfn )
332 return pdev;
333
334 pdev = xzalloc(struct pci_dev);
335 if ( !pdev )
336 return NULL;
337
338 *(u16*) &pdev->seg = pseg->nr;
339 *((u8*) &pdev->bus) = bus;
340 *((u8*) &pdev->devfn) = devfn;
341 pdev->domain = NULL;
342 INIT_LIST_HEAD(&pdev->msi_list);
343
344 pos = pci_find_cap_offset(pseg->nr, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
345 PCI_CAP_ID_MSI);
346 if ( pos )
347 {
348 uint16_t ctrl = pci_conf_read16(pdev->sbdf, msi_control_reg(pos));
349
350 pdev->msi_maxvec = multi_msi_capable(ctrl);
351 }
352
353 pos = pci_find_cap_offset(pseg->nr, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
354 PCI_CAP_ID_MSIX);
355 if ( pos )
356 {
357 struct arch_msix *msix = xzalloc(struct arch_msix);
358 uint16_t ctrl;
359
360 if ( !msix )
361 {
362 xfree(pdev);
363 return NULL;
364 }
365 spin_lock_init(&msix->table_lock);
366
367 ctrl = pci_conf_read16(pdev->sbdf, msix_control_reg(pos));
368 msix->nr_entries = msix_table_size(ctrl);
369
370 pdev->msix = msix;
371 }
372
373 list_add(&pdev->alldevs_list, &pseg->alldevs_list);
374
375 /* update bus2bridge */
376 switch ( pdev->type = pdev_type(pseg->nr, bus, devfn) )
377 {
378 u16 cap;
379 u8 sec_bus, sub_bus;
380
381 case DEV_TYPE_PCIe2PCI_BRIDGE:
382 case DEV_TYPE_LEGACY_PCI_BRIDGE:
383 sec_bus = pci_conf_read8(pdev->sbdf, PCI_SECONDARY_BUS);
384 sub_bus = pci_conf_read8(pdev->sbdf, PCI_SUBORDINATE_BUS);
385
386 spin_lock(&pseg->bus2bridge_lock);
387 for ( ; sec_bus <= sub_bus; sec_bus++ )
388 {
389 pseg->bus2bridge[sec_bus].map = 1;
390 pseg->bus2bridge[sec_bus].bus = bus;
391 pseg->bus2bridge[sec_bus].devfn = devfn;
392 }
393 spin_unlock(&pseg->bus2bridge_lock);
394 break;
395
396 case DEV_TYPE_PCIe_ENDPOINT:
397 pos = pci_find_cap_offset(pseg->nr, bus, PCI_SLOT(devfn),
398 PCI_FUNC(devfn), PCI_CAP_ID_EXP);
399 BUG_ON(!pos);
400 cap = pci_conf_read16(pdev->sbdf, pos + PCI_EXP_DEVCAP);
401 if ( cap & PCI_EXP_DEVCAP_PHANTOM )
402 {
403 pdev->phantom_stride = 8 >> MASK_EXTR(cap,
404 PCI_EXP_DEVCAP_PHANTOM);
405 if ( PCI_FUNC(devfn) >= pdev->phantom_stride )
406 pdev->phantom_stride = 0;
407 }
408 else
409 {
410 unsigned int i;
411
412 for ( i = 0; i < nr_phantom_devs; ++i )
413 if ( phantom_devs[i].seg == pseg->nr &&
414 phantom_devs[i].bus == bus &&
415 phantom_devs[i].slot == PCI_SLOT(devfn) &&
416 phantom_devs[i].stride > PCI_FUNC(devfn) )
417 {
418 pdev->phantom_stride = phantom_devs[i].stride;
419 break;
420 }
421 }
422 break;
423
424 case DEV_TYPE_PCI:
425 case DEV_TYPE_PCIe_BRIDGE:
426 case DEV_TYPE_PCI_HOST_BRIDGE:
427 break;
428
429 default:
430 printk(XENLOG_WARNING "%04x:%02x:%02x.%u: unknown type %d\n",
431 pseg->nr, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), pdev->type);
432 break;
433 }
434
435 check_pdev(pdev);
436 apply_quirks(pdev);
437
438 return pdev;
439 }
440
free_pdev(struct pci_seg * pseg,struct pci_dev * pdev)441 static void free_pdev(struct pci_seg *pseg, struct pci_dev *pdev)
442 {
443 /* update bus2bridge */
444 switch ( pdev->type )
445 {
446 uint8_t sec_bus, sub_bus;
447
448 case DEV_TYPE_PCIe2PCI_BRIDGE:
449 case DEV_TYPE_LEGACY_PCI_BRIDGE:
450 sec_bus = pci_conf_read8(pdev->sbdf, PCI_SECONDARY_BUS);
451 sub_bus = pci_conf_read8(pdev->sbdf, PCI_SUBORDINATE_BUS);
452
453 spin_lock(&pseg->bus2bridge_lock);
454 for ( ; sec_bus <= sub_bus; sec_bus++ )
455 pseg->bus2bridge[sec_bus] = pseg->bus2bridge[pdev->bus];
456 spin_unlock(&pseg->bus2bridge_lock);
457 break;
458
459 default:
460 break;
461 }
462
463 list_del(&pdev->alldevs_list);
464 xfree(pdev->msix);
465 xfree(pdev);
466 }
467
_pci_hide_device(struct pci_dev * pdev)468 static void _pci_hide_device(struct pci_dev *pdev)
469 {
470 if ( pdev->domain )
471 return;
472 pdev->domain = dom_xen;
473 list_add(&pdev->domain_list, &dom_xen->pdev_list);
474 }
475
pci_hide_device(unsigned int seg,unsigned int bus,unsigned int devfn)476 int __init pci_hide_device(unsigned int seg, unsigned int bus,
477 unsigned int devfn)
478 {
479 struct pci_dev *pdev;
480 struct pci_seg *pseg;
481 int rc = -ENOMEM;
482
483 pcidevs_lock();
484 pseg = alloc_pseg(seg);
485 if ( pseg )
486 {
487 pdev = alloc_pdev(pseg, bus, devfn);
488 if ( pdev )
489 {
490 _pci_hide_device(pdev);
491 rc = 0;
492 }
493 }
494 pcidevs_unlock();
495
496 return rc;
497 }
498
pci_ro_device(int seg,int bus,int devfn)499 int __init pci_ro_device(int seg, int bus, int devfn)
500 {
501 struct pci_seg *pseg = alloc_pseg(seg);
502 struct pci_dev *pdev;
503
504 if ( !pseg )
505 return -ENOMEM;
506 pdev = alloc_pdev(pseg, bus, devfn);
507 if ( !pdev )
508 return -ENOMEM;
509
510 if ( !pseg->ro_map )
511 {
512 size_t sz = BITS_TO_LONGS(PCI_BDF(-1, -1, -1) + 1) * sizeof(long);
513
514 pseg->ro_map = alloc_xenheap_pages(get_order_from_bytes(sz), 0);
515 if ( !pseg->ro_map )
516 return -ENOMEM;
517 memset(pseg->ro_map, 0, sz);
518 }
519
520 __set_bit(PCI_BDF2(bus, devfn), pseg->ro_map);
521 _pci_hide_device(pdev);
522
523 return 0;
524 }
525
pci_get_pdev(int seg,int bus,int devfn)526 struct pci_dev *pci_get_pdev(int seg, int bus, int devfn)
527 {
528 struct pci_seg *pseg = get_pseg(seg);
529 struct pci_dev *pdev = NULL;
530
531 ASSERT(pcidevs_locked());
532 ASSERT(seg != -1 || bus == -1);
533 ASSERT(bus != -1 || devfn == -1);
534
535 if ( !pseg )
536 {
537 if ( seg == -1 )
538 radix_tree_gang_lookup(&pci_segments, (void **)&pseg, 0, 1);
539 if ( !pseg )
540 return NULL;
541 }
542
543 do {
544 list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list )
545 if ( (pdev->bus == bus || bus == -1) &&
546 (pdev->devfn == devfn || devfn == -1) )
547 return pdev;
548 } while ( radix_tree_gang_lookup(&pci_segments, (void **)&pseg,
549 pseg->nr + 1, 1) );
550
551 return NULL;
552 }
553
pci_get_real_pdev(int seg,int bus,int devfn)554 struct pci_dev *pci_get_real_pdev(int seg, int bus, int devfn)
555 {
556 struct pci_dev *pdev;
557 int stride;
558
559 if ( seg < 0 || bus < 0 || devfn < 0 )
560 return NULL;
561
562 for ( pdev = pci_get_pdev(seg, bus, devfn), stride = 4;
563 !pdev && stride; stride >>= 1 )
564 {
565 if ( !(devfn & (8 - stride)) )
566 continue;
567 pdev = pci_get_pdev(seg, bus, devfn & ~(8 - stride));
568 if ( pdev && stride != pdev->phantom_stride )
569 pdev = NULL;
570 }
571
572 return pdev;
573 }
574
pci_get_pdev_by_domain(const struct domain * d,int seg,int bus,int devfn)575 struct pci_dev *pci_get_pdev_by_domain(const struct domain *d, int seg,
576 int bus, int devfn)
577 {
578 struct pci_seg *pseg = get_pseg(seg);
579 struct pci_dev *pdev = NULL;
580
581 ASSERT(seg != -1 || bus == -1);
582 ASSERT(bus != -1 || devfn == -1);
583
584 if ( !pseg )
585 {
586 if ( seg == -1 )
587 radix_tree_gang_lookup(&pci_segments, (void **)&pseg, 0, 1);
588 if ( !pseg )
589 return NULL;
590 }
591
592 do {
593 list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list )
594 if ( (pdev->bus == bus || bus == -1) &&
595 (pdev->devfn == devfn || devfn == -1) &&
596 (pdev->domain == d) )
597 return pdev;
598 } while ( radix_tree_gang_lookup(&pci_segments, (void **)&pseg,
599 pseg->nr + 1, 1) );
600
601 return NULL;
602 }
603
604 /**
605 * pci_enable_acs - enable ACS if hardware support it
606 * @dev: the PCI device
607 */
pci_enable_acs(struct pci_dev * pdev)608 static void pci_enable_acs(struct pci_dev *pdev)
609 {
610 int pos;
611 u16 cap, ctrl, seg = pdev->seg;
612 u8 bus = pdev->bus;
613
614 if ( !is_iommu_enabled(pdev->domain) )
615 return;
616
617 pos = pci_find_ext_capability(seg, bus, pdev->devfn, PCI_EXT_CAP_ID_ACS);
618 if (!pos)
619 return;
620
621 cap = pci_conf_read16(pdev->sbdf, pos + PCI_ACS_CAP);
622 ctrl = pci_conf_read16(pdev->sbdf, pos + PCI_ACS_CTRL);
623
624 /* Source Validation */
625 ctrl |= (cap & PCI_ACS_SV);
626
627 /* P2P Request Redirect */
628 ctrl |= (cap & PCI_ACS_RR);
629
630 /* P2P Completion Redirect */
631 ctrl |= (cap & PCI_ACS_CR);
632
633 /* Upstream Forwarding */
634 ctrl |= (cap & PCI_ACS_UF);
635
636 pci_conf_write16(pdev->sbdf, pos + PCI_ACS_CTRL, ctrl);
637 }
638
639 static int iommu_add_device(struct pci_dev *pdev);
640 static int iommu_enable_device(struct pci_dev *pdev);
641 static int iommu_remove_device(struct pci_dev *pdev);
642
pci_size_mem_bar(pci_sbdf_t sbdf,unsigned int pos,uint64_t * paddr,uint64_t * psize,unsigned int flags)643 unsigned int pci_size_mem_bar(pci_sbdf_t sbdf, unsigned int pos,
644 uint64_t *paddr, uint64_t *psize,
645 unsigned int flags)
646 {
647 uint32_t hi = 0, bar = pci_conf_read32(sbdf, pos);
648 uint64_t size;
649 bool is64bits = !(flags & PCI_BAR_ROM) &&
650 (bar & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == PCI_BASE_ADDRESS_MEM_TYPE_64;
651 uint32_t mask = (flags & PCI_BAR_ROM) ? (uint32_t)PCI_ROM_ADDRESS_MASK
652 : (uint32_t)PCI_BASE_ADDRESS_MEM_MASK;
653
654 ASSERT(!((flags & PCI_BAR_VF) && (flags & PCI_BAR_ROM)));
655 ASSERT((flags & PCI_BAR_ROM) ||
656 (bar & PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_MEMORY);
657 pci_conf_write32(sbdf, pos, ~0);
658 if ( is64bits )
659 {
660 if ( flags & PCI_BAR_LAST )
661 {
662 printk(XENLOG_WARNING
663 "%sdevice %04x:%02x:%02x.%u with 64-bit %sBAR in last slot\n",
664 (flags & PCI_BAR_VF) ? "SR-IOV " : "", sbdf.seg, sbdf.bus,
665 sbdf.dev, sbdf.fn, (flags & PCI_BAR_VF) ? "vf " : "");
666 *psize = 0;
667 return 1;
668 }
669 hi = pci_conf_read32(sbdf, pos + 4);
670 pci_conf_write32(sbdf, pos + 4, ~0);
671 }
672 size = pci_conf_read32(sbdf, pos) & mask;
673 if ( is64bits )
674 {
675 size |= (uint64_t)pci_conf_read32(sbdf, pos + 4) << 32;
676 pci_conf_write32(sbdf, pos + 4, hi);
677 }
678 else if ( size )
679 size |= (uint64_t)~0 << 32;
680 pci_conf_write32(sbdf, pos, bar);
681 size = -size;
682
683 if ( paddr )
684 *paddr = (bar & mask) | ((uint64_t)hi << 32);
685 *psize = size;
686
687 return is64bits ? 2 : 1;
688 }
689
pci_add_device(u16 seg,u8 bus,u8 devfn,const struct pci_dev_info * info,nodeid_t node)690 int pci_add_device(u16 seg, u8 bus, u8 devfn,
691 const struct pci_dev_info *info, nodeid_t node)
692 {
693 struct pci_seg *pseg;
694 struct pci_dev *pdev;
695 unsigned int slot = PCI_SLOT(devfn), func = PCI_FUNC(devfn);
696 const char *pdev_type;
697 int ret;
698 bool pf_is_extfn = false;
699
700 if ( !info )
701 pdev_type = "device";
702 else if ( info->is_virtfn )
703 {
704 pcidevs_lock();
705 pdev = pci_get_pdev(seg, info->physfn.bus, info->physfn.devfn);
706 if ( pdev )
707 pf_is_extfn = pdev->info.is_extfn;
708 pcidevs_unlock();
709 if ( !pdev )
710 pci_add_device(seg, info->physfn.bus, info->physfn.devfn,
711 NULL, node);
712 pdev_type = "virtual function";
713 }
714 else if ( info->is_extfn )
715 pdev_type = "extended function";
716 else
717 pdev_type = "device";
718
719 ret = xsm_resource_plug_pci(XSM_PRIV, (seg << 16) | (bus << 8) | devfn);
720 if ( ret )
721 return ret;
722
723 ret = -ENOMEM;
724
725 pcidevs_lock();
726 pseg = alloc_pseg(seg);
727 if ( !pseg )
728 goto out;
729 pdev = alloc_pdev(pseg, bus, devfn);
730 if ( !pdev )
731 goto out;
732
733 pdev->node = node;
734
735 if ( info )
736 {
737 pdev->info = *info;
738 /*
739 * VF's 'is_extfn' field is used to indicate whether its PF is an
740 * extended function.
741 */
742 if ( pdev->info.is_virtfn )
743 pdev->info.is_extfn = pf_is_extfn;
744 }
745
746 if ( !pdev->info.is_virtfn && !pdev->vf_rlen[0] )
747 {
748 unsigned int pos = pci_find_ext_capability(seg, bus, devfn,
749 PCI_EXT_CAP_ID_SRIOV);
750 uint16_t ctrl = pci_conf_read16(pdev->sbdf, pos + PCI_SRIOV_CTRL);
751
752 if ( !pos )
753 /* Nothing */;
754 else if ( !(ctrl & (PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE)) )
755 {
756 unsigned int i;
757
758 BUILD_BUG_ON(ARRAY_SIZE(pdev->vf_rlen) != PCI_SRIOV_NUM_BARS);
759 for ( i = 0; i < PCI_SRIOV_NUM_BARS; )
760 {
761 unsigned int idx = pos + PCI_SRIOV_BAR + i * 4;
762 uint32_t bar = pci_conf_read32(pdev->sbdf, idx);
763
764 if ( (bar & PCI_BASE_ADDRESS_SPACE) ==
765 PCI_BASE_ADDRESS_SPACE_IO )
766 {
767 printk(XENLOG_WARNING
768 "SR-IOV device %04x:%02x:%02x.%u with vf BAR%u"
769 " in IO space\n",
770 seg, bus, slot, func, i);
771 continue;
772 }
773 ret = pci_size_mem_bar(pdev->sbdf, idx, NULL,
774 &pdev->vf_rlen[i],
775 PCI_BAR_VF |
776 ((i == PCI_SRIOV_NUM_BARS - 1) ?
777 PCI_BAR_LAST : 0));
778 ASSERT(ret);
779 i += ret;
780 }
781 }
782 else
783 printk(XENLOG_WARNING
784 "SR-IOV device %04x:%02x:%02x.%u has its virtual"
785 " functions already enabled (%04x)\n",
786 seg, bus, slot, func, ctrl);
787 }
788
789 check_pdev(pdev);
790
791 ret = 0;
792 if ( !pdev->domain )
793 {
794 pdev->domain = hardware_domain;
795 ret = iommu_add_device(pdev);
796 if ( ret )
797 {
798 pdev->domain = NULL;
799 goto out;
800 }
801
802 list_add(&pdev->domain_list, &hardware_domain->pdev_list);
803 }
804 else
805 iommu_enable_device(pdev);
806
807 pci_enable_acs(pdev);
808
809 out:
810 pcidevs_unlock();
811 if ( !ret )
812 {
813 printk(XENLOG_DEBUG "PCI add %s %04x:%02x:%02x.%u\n", pdev_type,
814 seg, bus, slot, func);
815 while ( pdev->phantom_stride )
816 {
817 func += pdev->phantom_stride;
818 if ( PCI_SLOT(func) )
819 break;
820 printk(XENLOG_DEBUG "PCI phantom %04x:%02x:%02x.%u\n",
821 seg, bus, slot, func);
822 }
823 }
824 return ret;
825 }
826
pci_remove_device(u16 seg,u8 bus,u8 devfn)827 int pci_remove_device(u16 seg, u8 bus, u8 devfn)
828 {
829 struct pci_seg *pseg = get_pseg(seg);
830 struct pci_dev *pdev;
831 int ret;
832
833 ret = xsm_resource_unplug_pci(XSM_PRIV, (seg << 16) | (bus << 8) | devfn);
834 if ( ret )
835 return ret;
836
837 ret = -ENODEV;
838
839 if ( !pseg )
840 return -ENODEV;
841
842 pcidevs_lock();
843 list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list )
844 if ( pdev->bus == bus && pdev->devfn == devfn )
845 {
846 pci_cleanup_msi(pdev);
847 ret = iommu_remove_device(pdev);
848 if ( pdev->domain )
849 list_del(&pdev->domain_list);
850 free_pdev(pseg, pdev);
851 printk(XENLOG_DEBUG "PCI remove device %04x:%02x:%02x.%u\n",
852 seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
853 break;
854 }
855
856 pcidevs_unlock();
857 return ret;
858 }
859
pci_clean_dpci_irq(struct domain * d,struct hvm_pirq_dpci * pirq_dpci,void * arg)860 static int pci_clean_dpci_irq(struct domain *d,
861 struct hvm_pirq_dpci *pirq_dpci, void *arg)
862 {
863 struct dev_intx_gsi_link *digl, *tmp;
864
865 pirq_guest_unbind(d, dpci_pirq(pirq_dpci));
866
867 if ( pt_irq_need_timer(pirq_dpci->flags) )
868 kill_timer(&pirq_dpci->timer);
869
870 list_for_each_entry_safe ( digl, tmp, &pirq_dpci->digl_list, list )
871 {
872 list_del(&digl->list);
873 xfree(digl);
874 }
875
876 radix_tree_delete(&d->pirq_tree, dpci_pirq(pirq_dpci)->pirq);
877
878 if ( !pt_pirq_softirq_active(pirq_dpci) )
879 return 0;
880
881 domain_get_irq_dpci(d)->pending_pirq_dpci = pirq_dpci;
882
883 return -ERESTART;
884 }
885
pci_clean_dpci_irqs(struct domain * d)886 static int pci_clean_dpci_irqs(struct domain *d)
887 {
888 struct hvm_irq_dpci *hvm_irq_dpci = NULL;
889
890 if ( !is_iommu_enabled(d) )
891 return 0;
892
893 if ( !is_hvm_domain(d) )
894 return 0;
895
896 spin_lock(&d->event_lock);
897 hvm_irq_dpci = domain_get_irq_dpci(d);
898 if ( hvm_irq_dpci != NULL )
899 {
900 int ret = 0;
901
902 if ( hvm_irq_dpci->pending_pirq_dpci )
903 {
904 if ( pt_pirq_softirq_active(hvm_irq_dpci->pending_pirq_dpci) )
905 ret = -ERESTART;
906 else
907 hvm_irq_dpci->pending_pirq_dpci = NULL;
908 }
909
910 if ( !ret )
911 ret = pt_pirq_iterate(d, pci_clean_dpci_irq, NULL);
912 if ( ret )
913 {
914 spin_unlock(&d->event_lock);
915 return ret;
916 }
917
918 hvm_domain_irq(d)->dpci = NULL;
919 free_hvm_irq_dpci(hvm_irq_dpci);
920 }
921 spin_unlock(&d->event_lock);
922 return 0;
923 }
924
925 /* Caller should hold the pcidevs_lock */
deassign_device(struct domain * d,uint16_t seg,uint8_t bus,uint8_t devfn)926 static int deassign_device(struct domain *d, uint16_t seg, uint8_t bus,
927 uint8_t devfn)
928 {
929 const struct domain_iommu *hd = dom_iommu(d);
930 struct pci_dev *pdev;
931 struct domain *target;
932 int ret = 0;
933
934 if ( !is_iommu_enabled(d) )
935 return -EINVAL;
936
937 ASSERT(pcidevs_locked());
938 pdev = pci_get_pdev_by_domain(d, seg, bus, devfn);
939 if ( !pdev )
940 return -ENODEV;
941
942 /* De-assignment from dom_io should de-quarantine the device */
943 target = ((pdev->quarantine || iommu_quarantine) &&
944 pdev->domain != dom_io) ?
945 dom_io : hardware_domain;
946
947 while ( pdev->phantom_stride )
948 {
949 devfn += pdev->phantom_stride;
950 if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
951 break;
952 ret = hd->platform_ops->reassign_device(d, target, devfn,
953 pci_to_dev(pdev));
954 if ( ret )
955 goto out;
956 }
957
958 devfn = pdev->devfn;
959 ret = hd->platform_ops->reassign_device(d, target, devfn,
960 pci_to_dev(pdev));
961 if ( ret )
962 goto out;
963
964 if ( pdev->domain == hardware_domain )
965 pdev->quarantine = false;
966
967 pdev->fault.count = 0;
968
969 out:
970 if ( ret )
971 printk(XENLOG_G_ERR "%pd: deassign (%04x:%02x:%02x.%u) failed (%d)\n",
972 d, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), ret);
973
974 return ret;
975 }
976
pci_release_devices(struct domain * d)977 int pci_release_devices(struct domain *d)
978 {
979 struct pci_dev *pdev;
980 u8 bus, devfn;
981 int ret;
982
983 pcidevs_lock();
984 ret = pci_clean_dpci_irqs(d);
985 if ( ret )
986 {
987 pcidevs_unlock();
988 return ret;
989 }
990 while ( (pdev = pci_get_pdev_by_domain(d, -1, -1, -1)) )
991 {
992 bus = pdev->bus;
993 devfn = pdev->devfn;
994 deassign_device(d, pdev->seg, bus, devfn);
995 }
996 pcidevs_unlock();
997
998 return 0;
999 }
1000
1001 #define PCI_CLASS_BRIDGE_HOST 0x0600
1002 #define PCI_CLASS_BRIDGE_PCI 0x0604
1003
pdev_type(u16 seg,u8 bus,u8 devfn)1004 enum pdev_type pdev_type(u16 seg, u8 bus, u8 devfn)
1005 {
1006 u16 class_device, creg;
1007 u8 d = PCI_SLOT(devfn), f = PCI_FUNC(devfn);
1008 int pos = pci_find_cap_offset(seg, bus, d, f, PCI_CAP_ID_EXP);
1009
1010 class_device = pci_conf_read16(PCI_SBDF(seg, bus, d, f), PCI_CLASS_DEVICE);
1011 switch ( class_device )
1012 {
1013 case PCI_CLASS_BRIDGE_PCI:
1014 if ( !pos )
1015 return DEV_TYPE_LEGACY_PCI_BRIDGE;
1016 creg = pci_conf_read16(PCI_SBDF(seg, bus, d, f), pos + PCI_EXP_FLAGS);
1017 switch ( (creg & PCI_EXP_FLAGS_TYPE) >> 4 )
1018 {
1019 case PCI_EXP_TYPE_PCI_BRIDGE:
1020 return DEV_TYPE_PCIe2PCI_BRIDGE;
1021 case PCI_EXP_TYPE_PCIE_BRIDGE:
1022 return DEV_TYPE_PCI2PCIe_BRIDGE;
1023 }
1024 return DEV_TYPE_PCIe_BRIDGE;
1025 case PCI_CLASS_BRIDGE_HOST:
1026 return DEV_TYPE_PCI_HOST_BRIDGE;
1027
1028 case 0xffff:
1029 return DEV_TYPE_PCI_UNKNOWN;
1030 }
1031
1032 /* NB: treat legacy pre PCI 2.0 devices (class_device == 0) as endpoints. */
1033 return pos ? DEV_TYPE_PCIe_ENDPOINT : DEV_TYPE_PCI;
1034 }
1035
1036 /*
1037 * find the upstream PCIe-to-PCI/PCIX bridge or PCI legacy bridge
1038 * return 0: the device is integrated PCI device or PCIe
1039 * return 1: find PCIe-to-PCI/PCIX bridge or PCI legacy bridge
1040 * return -1: fail
1041 */
find_upstream_bridge(u16 seg,u8 * bus,u8 * devfn,u8 * secbus)1042 int find_upstream_bridge(u16 seg, u8 *bus, u8 *devfn, u8 *secbus)
1043 {
1044 struct pci_seg *pseg = get_pseg(seg);
1045 int ret = 0;
1046 int cnt = 0;
1047
1048 if ( *bus == 0 )
1049 return 0;
1050
1051 if ( !pseg )
1052 return -1;
1053
1054 if ( !pseg->bus2bridge[*bus].map )
1055 return 0;
1056
1057 ret = 1;
1058 spin_lock(&pseg->bus2bridge_lock);
1059 while ( pseg->bus2bridge[*bus].map )
1060 {
1061 *secbus = *bus;
1062 *devfn = pseg->bus2bridge[*bus].devfn;
1063 *bus = pseg->bus2bridge[*bus].bus;
1064 if ( cnt++ >= MAX_BUSES )
1065 {
1066 ret = -1;
1067 goto out;
1068 }
1069 }
1070
1071 out:
1072 spin_unlock(&pseg->bus2bridge_lock);
1073 return ret;
1074 }
1075
pci_device_detect(u16 seg,u8 bus,u8 dev,u8 func)1076 bool_t __init pci_device_detect(u16 seg, u8 bus, u8 dev, u8 func)
1077 {
1078 u32 vendor;
1079
1080 vendor = pci_conf_read32(PCI_SBDF(seg, bus, dev, func), PCI_VENDOR_ID);
1081 /* some broken boards return 0 or ~0 if a slot is empty: */
1082 if ( (vendor == 0xffffffff) || (vendor == 0x00000000) ||
1083 (vendor == 0x0000ffff) || (vendor == 0xffff0000) )
1084 return 0;
1085 return 1;
1086 }
1087
pci_check_disable_device(u16 seg,u8 bus,u8 devfn)1088 void pci_check_disable_device(u16 seg, u8 bus, u8 devfn)
1089 {
1090 struct pci_dev *pdev;
1091 s_time_t now = NOW();
1092 u16 cword;
1093
1094 pcidevs_lock();
1095 pdev = pci_get_real_pdev(seg, bus, devfn);
1096 if ( pdev )
1097 {
1098 if ( now < pdev->fault.time ||
1099 now - pdev->fault.time > MILLISECS(10) )
1100 pdev->fault.count >>= 1;
1101 pdev->fault.time = now;
1102 if ( ++pdev->fault.count < PT_FAULT_THRESHOLD )
1103 pdev = NULL;
1104 }
1105 pcidevs_unlock();
1106
1107 if ( !pdev )
1108 return;
1109
1110 /* Tell the device to stop DMAing; we can't rely on the guest to
1111 * control it for us. */
1112 cword = pci_conf_read16(pdev->sbdf, PCI_COMMAND);
1113 pci_conf_write16(pdev->sbdf, PCI_COMMAND, cword & ~PCI_COMMAND_MASTER);
1114 }
1115
1116 /*
1117 * scan pci devices to add all existed PCI devices to alldevs_list,
1118 * and setup pci hierarchy in array bus2bridge.
1119 */
_scan_pci_devices(struct pci_seg * pseg,void * arg)1120 static int __init _scan_pci_devices(struct pci_seg *pseg, void *arg)
1121 {
1122 struct pci_dev *pdev;
1123 int bus, dev, func;
1124
1125 for ( bus = 0; bus < 256; bus++ )
1126 {
1127 for ( dev = 0; dev < 32; dev++ )
1128 {
1129 for ( func = 0; func < 8; func++ )
1130 {
1131 if ( !pci_device_detect(pseg->nr, bus, dev, func) )
1132 {
1133 if ( !func )
1134 break;
1135 continue;
1136 }
1137
1138 pdev = alloc_pdev(pseg, bus, PCI_DEVFN(dev, func));
1139 if ( !pdev )
1140 {
1141 printk(XENLOG_WARNING "%04x:%02x:%02x.%u: alloc_pdev failed\n",
1142 pseg->nr, bus, dev, func);
1143 return -ENOMEM;
1144 }
1145
1146 if ( !func && !(pci_conf_read8(PCI_SBDF(pseg->nr, bus, dev,
1147 func),
1148 PCI_HEADER_TYPE) & 0x80) )
1149 break;
1150 }
1151 }
1152 }
1153
1154 return 0;
1155 }
1156
scan_pci_devices(void)1157 int __init scan_pci_devices(void)
1158 {
1159 int ret;
1160
1161 pcidevs_lock();
1162 ret = pci_segments_iterate(_scan_pci_devices, NULL);
1163 pcidevs_unlock();
1164
1165 return ret;
1166 }
1167
1168 struct setup_hwdom {
1169 struct domain *d;
1170 int (*handler)(u8 devfn, struct pci_dev *);
1171 };
1172
setup_one_hwdom_device(const struct setup_hwdom * ctxt,struct pci_dev * pdev)1173 static void __hwdom_init setup_one_hwdom_device(const struct setup_hwdom *ctxt,
1174 struct pci_dev *pdev)
1175 {
1176 u8 devfn = pdev->devfn;
1177 int err;
1178
1179 do {
1180 err = ctxt->handler(devfn, pdev);
1181 if ( err )
1182 {
1183 printk(XENLOG_ERR "setup %04x:%02x:%02x.%u for d%d failed (%d)\n",
1184 pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
1185 ctxt->d->domain_id, err);
1186 if ( devfn == pdev->devfn )
1187 return;
1188 }
1189 devfn += pdev->phantom_stride;
1190 } while ( devfn != pdev->devfn &&
1191 PCI_SLOT(devfn) == PCI_SLOT(pdev->devfn) );
1192
1193 err = vpci_add_handlers(pdev);
1194 if ( err )
1195 printk(XENLOG_ERR "setup of vPCI for d%d failed: %d\n",
1196 ctxt->d->domain_id, err);
1197 }
1198
_setup_hwdom_pci_devices(struct pci_seg * pseg,void * arg)1199 static int __hwdom_init _setup_hwdom_pci_devices(struct pci_seg *pseg, void *arg)
1200 {
1201 struct setup_hwdom *ctxt = arg;
1202 int bus, devfn;
1203
1204 for ( bus = 0; bus < 256; bus++ )
1205 {
1206 for ( devfn = 0; devfn < 256; devfn++ )
1207 {
1208 struct pci_dev *pdev = pci_get_pdev(pseg->nr, bus, devfn);
1209
1210 if ( !pdev )
1211 continue;
1212
1213 if ( !pdev->domain )
1214 {
1215 pdev->domain = ctxt->d;
1216 list_add(&pdev->domain_list, &ctxt->d->pdev_list);
1217 setup_one_hwdom_device(ctxt, pdev);
1218 }
1219 else if ( pdev->domain == dom_xen )
1220 {
1221 pdev->domain = ctxt->d;
1222 setup_one_hwdom_device(ctxt, pdev);
1223 pdev->domain = dom_xen;
1224 }
1225 else if ( pdev->domain != ctxt->d )
1226 printk(XENLOG_WARNING "Dom%d owning %04x:%02x:%02x.%u?\n",
1227 pdev->domain->domain_id, pseg->nr, bus,
1228 PCI_SLOT(devfn), PCI_FUNC(devfn));
1229
1230 if ( iommu_verbose )
1231 {
1232 pcidevs_unlock();
1233 process_pending_softirqs();
1234 pcidevs_lock();
1235 }
1236 }
1237
1238 if ( !iommu_verbose )
1239 {
1240 pcidevs_unlock();
1241 process_pending_softirqs();
1242 pcidevs_lock();
1243 }
1244 }
1245
1246 return 0;
1247 }
1248
setup_hwdom_pci_devices(struct domain * d,int (* handler)(u8 devfn,struct pci_dev *))1249 void __hwdom_init setup_hwdom_pci_devices(
1250 struct domain *d, int (*handler)(u8 devfn, struct pci_dev *))
1251 {
1252 struct setup_hwdom ctxt = { .d = d, .handler = handler };
1253
1254 pcidevs_lock();
1255 pci_segments_iterate(_setup_hwdom_pci_devices, &ctxt);
1256 pcidevs_unlock();
1257 }
1258
1259 #ifdef CONFIG_ACPI
1260 #include <acpi/acpi.h>
1261 #include <acpi/apei.h>
1262
hest_match_pci(const struct acpi_hest_aer_common * p,const struct pci_dev * pdev)1263 static int hest_match_pci(const struct acpi_hest_aer_common *p,
1264 const struct pci_dev *pdev)
1265 {
1266 return ACPI_HEST_SEGMENT(p->bus) == pdev->seg &&
1267 ACPI_HEST_BUS(p->bus) == pdev->bus &&
1268 p->device == PCI_SLOT(pdev->devfn) &&
1269 p->function == PCI_FUNC(pdev->devfn);
1270 }
1271
hest_match_type(const struct acpi_hest_header * hest_hdr,const struct pci_dev * pdev)1272 static bool_t hest_match_type(const struct acpi_hest_header *hest_hdr,
1273 const struct pci_dev *pdev)
1274 {
1275 unsigned int pos = pci_find_cap_offset(pdev->seg, pdev->bus,
1276 PCI_SLOT(pdev->devfn),
1277 PCI_FUNC(pdev->devfn),
1278 PCI_CAP_ID_EXP);
1279 u8 pcie = MASK_EXTR(pci_conf_read16(pdev->sbdf, pos + PCI_EXP_FLAGS),
1280 PCI_EXP_FLAGS_TYPE);
1281
1282 switch ( hest_hdr->type )
1283 {
1284 case ACPI_HEST_TYPE_AER_ROOT_PORT:
1285 return pcie == PCI_EXP_TYPE_ROOT_PORT;
1286 case ACPI_HEST_TYPE_AER_ENDPOINT:
1287 return pcie == PCI_EXP_TYPE_ENDPOINT;
1288 case ACPI_HEST_TYPE_AER_BRIDGE:
1289 return pci_conf_read16(pdev->sbdf, PCI_CLASS_DEVICE) ==
1290 PCI_CLASS_BRIDGE_PCI;
1291 }
1292
1293 return 0;
1294 }
1295
1296 struct aer_hest_parse_info {
1297 const struct pci_dev *pdev;
1298 bool_t firmware_first;
1299 };
1300
hest_source_is_pcie_aer(const struct acpi_hest_header * hest_hdr)1301 static bool_t hest_source_is_pcie_aer(const struct acpi_hest_header *hest_hdr)
1302 {
1303 if ( hest_hdr->type == ACPI_HEST_TYPE_AER_ROOT_PORT ||
1304 hest_hdr->type == ACPI_HEST_TYPE_AER_ENDPOINT ||
1305 hest_hdr->type == ACPI_HEST_TYPE_AER_BRIDGE )
1306 return 1;
1307 return 0;
1308 }
1309
aer_hest_parse(const struct acpi_hest_header * hest_hdr,void * data)1310 static int aer_hest_parse(const struct acpi_hest_header *hest_hdr, void *data)
1311 {
1312 struct aer_hest_parse_info *info = data;
1313 const struct acpi_hest_aer_common *p;
1314 bool_t ff;
1315
1316 if ( !hest_source_is_pcie_aer(hest_hdr) )
1317 return 0;
1318
1319 p = (const struct acpi_hest_aer_common *)(hest_hdr + 1);
1320 ff = !!(p->flags & ACPI_HEST_FIRMWARE_FIRST);
1321
1322 /*
1323 * If no specific device is supplied, determine whether
1324 * FIRMWARE_FIRST is set for *any* PCIe device.
1325 */
1326 if ( !info->pdev )
1327 {
1328 info->firmware_first |= ff;
1329 return 0;
1330 }
1331
1332 /* Otherwise, check the specific device */
1333 if ( p->flags & ACPI_HEST_GLOBAL ?
1334 hest_match_type(hest_hdr, info->pdev) :
1335 hest_match_pci(p, info->pdev) )
1336 {
1337 info->firmware_first = ff;
1338 return 1;
1339 }
1340
1341 return 0;
1342 }
1343
pcie_aer_get_firmware_first(const struct pci_dev * pdev)1344 bool_t pcie_aer_get_firmware_first(const struct pci_dev *pdev)
1345 {
1346 struct aer_hest_parse_info info = { .pdev = pdev };
1347
1348 return pci_find_cap_offset(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
1349 PCI_FUNC(pdev->devfn), PCI_CAP_ID_EXP) &&
1350 apei_hest_parse(aer_hest_parse, &info) >= 0 &&
1351 info.firmware_first;
1352 }
1353 #endif
1354
_dump_pci_devices(struct pci_seg * pseg,void * arg)1355 static int _dump_pci_devices(struct pci_seg *pseg, void *arg)
1356 {
1357 struct pci_dev *pdev;
1358 struct msi_desc *msi;
1359
1360 printk("==== segment %04x ====\n", pseg->nr);
1361
1362 list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list )
1363 {
1364 printk("%04x:%02x:%02x.%u - %pd - node %-3d - MSIs < ",
1365 pseg->nr, pdev->bus,
1366 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn), pdev->domain,
1367 (pdev->node != NUMA_NO_NODE) ? pdev->node : -1);
1368 list_for_each_entry ( msi, &pdev->msi_list, list )
1369 printk("%d ", msi->irq);
1370 printk(">\n");
1371 }
1372
1373 return 0;
1374 }
1375
dump_pci_devices(unsigned char ch)1376 static void dump_pci_devices(unsigned char ch)
1377 {
1378 printk("==== PCI devices ====\n");
1379 pcidevs_lock();
1380 pci_segments_iterate(_dump_pci_devices, NULL);
1381 pcidevs_unlock();
1382 }
1383
setup_dump_pcidevs(void)1384 static int __init setup_dump_pcidevs(void)
1385 {
1386 register_keyhandler('Q', dump_pci_devices, "dump PCI devices", 1);
1387 return 0;
1388 }
1389 __initcall(setup_dump_pcidevs);
1390
iommu_update_ire_from_msi(struct msi_desc * msi_desc,struct msi_msg * msg)1391 int iommu_update_ire_from_msi(
1392 struct msi_desc *msi_desc, struct msi_msg *msg)
1393 {
1394 return iommu_intremap
1395 ? iommu_call(&iommu_ops, update_ire_from_msi, msi_desc, msg) : 0;
1396 }
1397
iommu_read_msi_from_ire(struct msi_desc * msi_desc,struct msi_msg * msg)1398 void iommu_read_msi_from_ire(
1399 struct msi_desc *msi_desc, struct msi_msg *msg)
1400 {
1401 if ( iommu_intremap )
1402 iommu_vcall(&iommu_ops, read_msi_from_ire, msi_desc, msg);
1403 }
1404
iommu_add_device(struct pci_dev * pdev)1405 static int iommu_add_device(struct pci_dev *pdev)
1406 {
1407 const struct domain_iommu *hd;
1408 int rc;
1409 u8 devfn;
1410
1411 if ( !pdev->domain )
1412 return -EINVAL;
1413
1414 ASSERT(pcidevs_locked());
1415
1416 hd = dom_iommu(pdev->domain);
1417 if ( !is_iommu_enabled(pdev->domain) )
1418 return 0;
1419
1420 rc = hd->platform_ops->add_device(pdev->devfn, pci_to_dev(pdev));
1421 if ( rc || !pdev->phantom_stride )
1422 return rc;
1423
1424 for ( devfn = pdev->devfn ; ; )
1425 {
1426 devfn += pdev->phantom_stride;
1427 if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
1428 return 0;
1429 rc = hd->platform_ops->add_device(devfn, pci_to_dev(pdev));
1430 if ( rc )
1431 printk(XENLOG_WARNING "IOMMU: add %04x:%02x:%02x.%u failed (%d)\n",
1432 pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn), rc);
1433 }
1434 }
1435
iommu_enable_device(struct pci_dev * pdev)1436 static int iommu_enable_device(struct pci_dev *pdev)
1437 {
1438 const struct domain_iommu *hd;
1439
1440 if ( !pdev->domain )
1441 return -EINVAL;
1442
1443 ASSERT(pcidevs_locked());
1444
1445 hd = dom_iommu(pdev->domain);
1446 if ( !is_iommu_enabled(pdev->domain) ||
1447 !hd->platform_ops->enable_device )
1448 return 0;
1449
1450 return hd->platform_ops->enable_device(pci_to_dev(pdev));
1451 }
1452
iommu_remove_device(struct pci_dev * pdev)1453 static int iommu_remove_device(struct pci_dev *pdev)
1454 {
1455 const struct domain_iommu *hd;
1456 u8 devfn;
1457
1458 if ( !pdev->domain )
1459 return -EINVAL;
1460
1461 hd = dom_iommu(pdev->domain);
1462 if ( !is_iommu_enabled(pdev->domain) )
1463 return 0;
1464
1465 for ( devfn = pdev->devfn ; pdev->phantom_stride; )
1466 {
1467 int rc;
1468
1469 devfn += pdev->phantom_stride;
1470 if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
1471 break;
1472 rc = hd->platform_ops->remove_device(devfn, pci_to_dev(pdev));
1473 if ( !rc )
1474 continue;
1475
1476 printk(XENLOG_ERR "IOMMU: remove %04x:%02x:%02x.%u failed (%d)\n",
1477 pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn), rc);
1478 return rc;
1479 }
1480
1481 return hd->platform_ops->remove_device(pdev->devfn, pci_to_dev(pdev));
1482 }
1483
device_assigned(u16 seg,u8 bus,u8 devfn)1484 static int device_assigned(u16 seg, u8 bus, u8 devfn)
1485 {
1486 struct pci_dev *pdev;
1487 int rc = 0;
1488
1489 ASSERT(pcidevs_locked());
1490 pdev = pci_get_pdev(seg, bus, devfn);
1491
1492 if ( !pdev )
1493 rc = -ENODEV;
1494 /*
1495 * If the device exists and it is not owned by either the hardware
1496 * domain or dom_io then it must be assigned to a guest, or be
1497 * hidden (owned by dom_xen).
1498 */
1499 else if ( pdev->domain != hardware_domain &&
1500 pdev->domain != dom_io )
1501 rc = -EBUSY;
1502
1503 return rc;
1504 }
1505
1506 /* Caller should hold the pcidevs_lock */
assign_device(struct domain * d,u16 seg,u8 bus,u8 devfn,u32 flag)1507 static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag)
1508 {
1509 const struct domain_iommu *hd = dom_iommu(d);
1510 struct pci_dev *pdev;
1511 int rc = 0;
1512
1513 if ( !is_iommu_enabled(d) )
1514 return 0;
1515
1516 /* Prevent device assign if mem paging or mem sharing have been
1517 * enabled for this domain */
1518 if ( d != dom_io &&
1519 unlikely(mem_sharing_enabled(d) ||
1520 vm_event_check_ring(d->vm_event_paging) ||
1521 p2m_get_hostp2m(d)->global_logdirty) )
1522 return -EXDEV;
1523
1524 /* device_assigned() should already have cleared the device for assignment */
1525 ASSERT(pcidevs_locked());
1526 pdev = pci_get_pdev(seg, bus, devfn);
1527 ASSERT(pdev && (pdev->domain == hardware_domain ||
1528 pdev->domain == dom_io));
1529
1530 if ( pdev->msix )
1531 {
1532 rc = pci_reset_msix_state(pdev);
1533 if ( rc )
1534 goto done;
1535 msixtbl_init(d);
1536 }
1537
1538 pdev->fault.count = 0;
1539
1540 if ( (rc = hd->platform_ops->assign_device(d, devfn, pci_to_dev(pdev), flag)) )
1541 goto done;
1542
1543 for ( ; pdev->phantom_stride; rc = 0 )
1544 {
1545 devfn += pdev->phantom_stride;
1546 if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
1547 break;
1548 rc = hd->platform_ops->assign_device(d, devfn, pci_to_dev(pdev), flag);
1549 }
1550
1551 done:
1552 if ( rc )
1553 printk(XENLOG_G_WARNING "%pd: assign (%04x:%02x:%02x.%u) failed (%d)\n",
1554 d, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), rc);
1555 /* The device is assigned to dom_io so mark it as quarantined */
1556 else if ( d == dom_io )
1557 pdev->quarantine = true;
1558
1559 return rc;
1560 }
1561
iommu_get_device_group(struct domain * d,u16 seg,u8 bus,u8 devfn,XEN_GUEST_HANDLE_64 (uint32)buf,int max_sdevs)1562 static int iommu_get_device_group(
1563 struct domain *d, u16 seg, u8 bus, u8 devfn,
1564 XEN_GUEST_HANDLE_64(uint32) buf, int max_sdevs)
1565 {
1566 const struct domain_iommu *hd = dom_iommu(d);
1567 struct pci_dev *pdev;
1568 int group_id, sdev_id;
1569 u32 bdf;
1570 int i = 0;
1571 const struct iommu_ops *ops = hd->platform_ops;
1572
1573 if ( !is_iommu_enabled(d) || !ops->get_device_group_id )
1574 return 0;
1575
1576 group_id = ops->get_device_group_id(seg, bus, devfn);
1577
1578 pcidevs_lock();
1579 for_each_pdev( d, pdev )
1580 {
1581 if ( (pdev->seg != seg) ||
1582 ((pdev->bus == bus) && (pdev->devfn == devfn)) )
1583 continue;
1584
1585 if ( xsm_get_device_group(XSM_HOOK, (seg << 16) | (pdev->bus << 8) | pdev->devfn) )
1586 continue;
1587
1588 sdev_id = ops->get_device_group_id(seg, pdev->bus, pdev->devfn);
1589 if ( (sdev_id == group_id) && (i < max_sdevs) )
1590 {
1591 bdf = 0;
1592 bdf |= (pdev->bus & 0xff) << 16;
1593 bdf |= (pdev->devfn & 0xff) << 8;
1594
1595 if ( unlikely(copy_to_guest_offset(buf, i, &bdf, 1)) )
1596 {
1597 pcidevs_unlock();
1598 return -1;
1599 }
1600 i++;
1601 }
1602 }
1603
1604 pcidevs_unlock();
1605
1606 return i;
1607 }
1608
iommu_dev_iotlb_flush_timeout(struct domain * d,struct pci_dev * pdev)1609 void iommu_dev_iotlb_flush_timeout(struct domain *d, struct pci_dev *pdev)
1610 {
1611 pcidevs_lock();
1612
1613 disable_ats_device(pdev);
1614
1615 ASSERT(pdev->domain);
1616 if ( d != pdev->domain )
1617 {
1618 pcidevs_unlock();
1619 return;
1620 }
1621
1622 list_del(&pdev->domain_list);
1623 pdev->domain = NULL;
1624 _pci_hide_device(pdev);
1625
1626 if ( !d->is_shutting_down && printk_ratelimit() )
1627 printk(XENLOG_ERR
1628 "dom%d: ATS device %04x:%02x:%02x.%u flush failed\n",
1629 d->domain_id, pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
1630 PCI_FUNC(pdev->devfn));
1631 if ( !is_hardware_domain(d) )
1632 domain_crash(d);
1633
1634 pcidevs_unlock();
1635 }
1636
iommu_do_pci_domctl(struct xen_domctl * domctl,struct domain * d,XEN_GUEST_HANDLE_PARAM (xen_domctl_t)u_domctl)1637 int iommu_do_pci_domctl(
1638 struct xen_domctl *domctl, struct domain *d,
1639 XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
1640 {
1641 u16 seg;
1642 u8 bus, devfn;
1643 int ret = 0;
1644 uint32_t machine_sbdf;
1645
1646 switch ( domctl->cmd )
1647 {
1648 unsigned int flags;
1649
1650 case XEN_DOMCTL_get_device_group:
1651 {
1652 u32 max_sdevs;
1653 XEN_GUEST_HANDLE_64(uint32) sdevs;
1654
1655 ret = xsm_get_device_group(XSM_HOOK, domctl->u.get_device_group.machine_sbdf);
1656 if ( ret )
1657 break;
1658
1659 seg = domctl->u.get_device_group.machine_sbdf >> 16;
1660 bus = PCI_BUS(domctl->u.get_device_group.machine_sbdf);
1661 devfn = PCI_DEVFN2(domctl->u.get_device_group.machine_sbdf);
1662 max_sdevs = domctl->u.get_device_group.max_sdevs;
1663 sdevs = domctl->u.get_device_group.sdev_array;
1664
1665 ret = iommu_get_device_group(d, seg, bus, devfn, sdevs, max_sdevs);
1666 if ( ret < 0 )
1667 {
1668 dprintk(XENLOG_ERR, "iommu_get_device_group() failed!\n");
1669 ret = -EFAULT;
1670 domctl->u.get_device_group.num_sdevs = 0;
1671 }
1672 else
1673 {
1674 domctl->u.get_device_group.num_sdevs = ret;
1675 ret = 0;
1676 }
1677 if ( __copy_field_to_guest(u_domctl, domctl, u.get_device_group) )
1678 ret = -EFAULT;
1679 }
1680 break;
1681
1682 case XEN_DOMCTL_assign_device:
1683 ASSERT(d);
1684 /* fall through */
1685 case XEN_DOMCTL_test_assign_device:
1686 /* Don't support self-assignment of devices. */
1687 if ( d == current->domain )
1688 {
1689 ret = -EINVAL;
1690 break;
1691 }
1692
1693 ret = -ENODEV;
1694 if ( domctl->u.assign_device.dev != XEN_DOMCTL_DEV_PCI )
1695 break;
1696
1697 ret = -EINVAL;
1698 flags = domctl->u.assign_device.flags;
1699 if ( domctl->cmd == XEN_DOMCTL_assign_device
1700 ? d->is_dying || (flags & ~XEN_DOMCTL_DEV_RDM_RELAXED)
1701 : flags )
1702 break;
1703
1704 machine_sbdf = domctl->u.assign_device.u.pci.machine_sbdf;
1705
1706 ret = xsm_assign_device(XSM_HOOK, d, machine_sbdf);
1707 if ( ret )
1708 break;
1709
1710 seg = machine_sbdf >> 16;
1711 bus = PCI_BUS(machine_sbdf);
1712 devfn = PCI_DEVFN2(machine_sbdf);
1713
1714 pcidevs_lock();
1715 ret = device_assigned(seg, bus, devfn);
1716 if ( domctl->cmd == XEN_DOMCTL_test_assign_device )
1717 {
1718 if ( ret )
1719 {
1720 printk(XENLOG_G_INFO
1721 "%04x:%02x:%02x.%u already assigned, or non-existent\n",
1722 seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1723 ret = -EINVAL;
1724 }
1725 }
1726 else if ( !ret )
1727 ret = assign_device(d, seg, bus, devfn, flags);
1728 pcidevs_unlock();
1729 if ( ret == -ERESTART )
1730 ret = hypercall_create_continuation(__HYPERVISOR_domctl,
1731 "h", u_domctl);
1732 break;
1733
1734 case XEN_DOMCTL_deassign_device:
1735 /* Don't support self-deassignment of devices. */
1736 if ( d == current->domain )
1737 {
1738 ret = -EINVAL;
1739 break;
1740 }
1741
1742 ret = -ENODEV;
1743 if ( domctl->u.assign_device.dev != XEN_DOMCTL_DEV_PCI )
1744 break;
1745
1746 ret = -EINVAL;
1747 if ( domctl->u.assign_device.flags )
1748 break;
1749
1750 machine_sbdf = domctl->u.assign_device.u.pci.machine_sbdf;
1751
1752 ret = xsm_deassign_device(XSM_HOOK, d, machine_sbdf);
1753 if ( ret )
1754 break;
1755
1756 seg = machine_sbdf >> 16;
1757 bus = PCI_BUS(machine_sbdf);
1758 devfn = PCI_DEVFN2(machine_sbdf);
1759
1760 pcidevs_lock();
1761 ret = deassign_device(d, seg, bus, devfn);
1762 pcidevs_unlock();
1763 break;
1764
1765 default:
1766 ret = -ENOSYS;
1767 break;
1768 }
1769
1770 return ret;
1771 }
1772
1773 /*
1774 * Local variables:
1775 * mode: C
1776 * c-file-style: "BSD"
1777 * c-basic-offset: 4
1778 * indent-tabs-mode: nil
1779 * End:
1780 */
1781