1 /*
2 * File: msi.c
3 * Purpose: PCI Message Signaled Interrupt (MSI)
4 *
5 * Copyright (C) 2003-2004 Intel
6 * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com)
7 */
8
9 #include <xen/lib.h>
10 #include <xen/init.h>
11 #include <xen/irq.h>
12 #include <xen/delay.h>
13 #include <xen/sched.h>
14 #include <xen/acpi.h>
15 #include <xen/cpu.h>
16 #include <xen/errno.h>
17 #include <xen/param.h>
18 #include <xen/pci.h>
19 #include <xen/pci_regs.h>
20 #include <xen/iocap.h>
21 #include <xen/keyhandler.h>
22 #include <xen/pfn.h>
23 #include <asm/io.h>
24 #include <asm/smp.h>
25 #include <asm/desc.h>
26 #include <asm/msi.h>
27 #include <asm/fixmap.h>
28 #include <asm/p2m.h>
29 #include <mach_apic.h>
30 #include <io_ports.h>
31 #include <irq_vectors.h>
32 #include <public/physdev.h>
33 #include <xen/iommu.h>
34 #include <xsm/xsm.h>
35 #include <xen/vpci.h>
36
37 static s8 __read_mostly use_msi = -1;
38 boolean_param("msi", use_msi);
39
40 static void __pci_disable_msix(struct msi_desc *);
41
42 /* bitmap indicate which fixed map is free */
43 static DEFINE_SPINLOCK(msix_fixmap_lock);
44 static DECLARE_BITMAP(msix_fixmap_pages, FIX_MSIX_MAX_PAGES);
45
msix_fixmap_alloc(void)46 static int msix_fixmap_alloc(void)
47 {
48 int i, rc = -ENOMEM;
49
50 spin_lock(&msix_fixmap_lock);
51 for ( i = 0; i < FIX_MSIX_MAX_PAGES; i++ )
52 if ( !test_bit(i, &msix_fixmap_pages) )
53 break;
54 if ( i == FIX_MSIX_MAX_PAGES )
55 goto out;
56 rc = FIX_MSIX_IO_RESERV_BASE + i;
57 set_bit(i, &msix_fixmap_pages);
58
59 out:
60 spin_unlock(&msix_fixmap_lock);
61 return rc;
62 }
63
msix_fixmap_free(int idx)64 static void msix_fixmap_free(int idx)
65 {
66 spin_lock(&msix_fixmap_lock);
67 if ( idx >= FIX_MSIX_IO_RESERV_BASE )
68 clear_bit(idx - FIX_MSIX_IO_RESERV_BASE, &msix_fixmap_pages);
69 spin_unlock(&msix_fixmap_lock);
70 }
71
msix_get_fixmap(struct arch_msix * msix,u64 table_paddr,u64 entry_paddr)72 static int msix_get_fixmap(struct arch_msix *msix, u64 table_paddr,
73 u64 entry_paddr)
74 {
75 long nr_page;
76 int idx;
77
78 nr_page = (entry_paddr >> PAGE_SHIFT) - (table_paddr >> PAGE_SHIFT);
79
80 if ( nr_page < 0 || nr_page >= MAX_MSIX_TABLE_PAGES )
81 return -EINVAL;
82
83 spin_lock(&msix->table_lock);
84 if ( msix->table_refcnt[nr_page]++ == 0 )
85 {
86 idx = msix_fixmap_alloc();
87 if ( idx < 0 )
88 {
89 msix->table_refcnt[nr_page]--;
90 goto out;
91 }
92 set_fixmap_nocache(idx, entry_paddr);
93 msix->table_idx[nr_page] = idx;
94 }
95 else
96 idx = msix->table_idx[nr_page];
97
98 out:
99 spin_unlock(&msix->table_lock);
100 return idx;
101 }
102
msix_put_fixmap(struct arch_msix * msix,int idx)103 static void msix_put_fixmap(struct arch_msix *msix, int idx)
104 {
105 int i;
106
107 spin_lock(&msix->table_lock);
108 for ( i = 0; i < MAX_MSIX_TABLE_PAGES; i++ )
109 {
110 if ( msix->table_idx[i] == idx )
111 break;
112 }
113 if ( i == MAX_MSIX_TABLE_PAGES )
114 goto out;
115
116 if ( --msix->table_refcnt[i] == 0 )
117 {
118 clear_fixmap(idx);
119 msix_fixmap_free(idx);
120 msix->table_idx[i] = 0;
121 }
122
123 out:
124 spin_unlock(&msix->table_lock);
125 }
126
memory_decoded(const struct pci_dev * dev)127 static bool memory_decoded(const struct pci_dev *dev)
128 {
129 pci_sbdf_t sbdf = dev->sbdf;
130
131 if ( dev->info.is_virtfn )
132 {
133 sbdf.bus = dev->info.physfn.bus;
134 sbdf.devfn = dev->info.physfn.devfn;
135 }
136
137 return pci_conf_read16(sbdf, PCI_COMMAND) & PCI_COMMAND_MEMORY;
138 }
139
msix_memory_decoded(const struct pci_dev * dev,unsigned int pos)140 static bool msix_memory_decoded(const struct pci_dev *dev, unsigned int pos)
141 {
142 uint16_t control = pci_conf_read16(dev->sbdf, msix_control_reg(pos));
143
144 if ( !(control & PCI_MSIX_FLAGS_ENABLE) )
145 return false;
146
147 return memory_decoded(dev);
148 }
149
150 /*
151 * MSI message composition
152 */
msi_compose_msg(unsigned vector,const cpumask_t * cpu_mask,struct msi_msg * msg)153 void msi_compose_msg(unsigned vector, const cpumask_t *cpu_mask, struct msi_msg *msg)
154 {
155 memset(msg, 0, sizeof(*msg));
156
157 if ( vector < FIRST_DYNAMIC_VECTOR )
158 return;
159
160 if ( cpu_mask )
161 {
162 cpumask_t *mask = this_cpu(scratch_cpumask);
163
164 if ( !cpumask_intersects(cpu_mask, &cpu_online_map) )
165 return;
166
167 cpumask_and(mask, cpu_mask, &cpu_online_map);
168 msg->dest32 = cpu_mask_to_apicid(mask);
169 }
170
171 msg->address_hi = MSI_ADDR_BASE_HI;
172 msg->address_lo = MSI_ADDR_BASE_LO |
173 (INT_DEST_MODE ? MSI_ADDR_DESTMODE_LOGIC
174 : MSI_ADDR_DESTMODE_PHYS) |
175 ((INT_DELIVERY_MODE != dest_LowestPrio)
176 ? MSI_ADDR_REDIRECTION_CPU
177 : MSI_ADDR_REDIRECTION_LOWPRI) |
178 MSI_ADDR_DEST_ID(msg->dest32);
179
180 msg->data = MSI_DATA_TRIGGER_EDGE |
181 MSI_DATA_LEVEL_ASSERT |
182 ((INT_DELIVERY_MODE != dest_LowestPrio)
183 ? MSI_DATA_DELIVERY_FIXED
184 : MSI_DATA_DELIVERY_LOWPRI) |
185 MSI_DATA_VECTOR(vector);
186 }
187
write_msi_msg(struct msi_desc * entry,struct msi_msg * msg)188 static int write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
189 {
190 entry->msg = *msg;
191
192 if ( iommu_intremap )
193 {
194 int rc;
195
196 ASSERT(msg != &entry->msg);
197 rc = iommu_update_ire_from_msi(entry, msg);
198 if ( rc )
199 return rc;
200 }
201
202 switch ( entry->msi_attrib.type )
203 {
204 case PCI_CAP_ID_MSI:
205 {
206 struct pci_dev *dev = entry->dev;
207 int pos = entry->msi_attrib.pos;
208 int nr = entry->msi_attrib.entry_nr;
209
210 ASSERT((msg->data & (entry[-nr].msi.nvec - 1)) == nr);
211 if ( nr )
212 return 0;
213
214 pci_conf_write32(dev->sbdf, msi_lower_address_reg(pos),
215 msg->address_lo);
216 if ( entry->msi_attrib.is_64 )
217 {
218 pci_conf_write32(dev->sbdf, msi_upper_address_reg(pos),
219 msg->address_hi);
220 pci_conf_write16(dev->sbdf, msi_data_reg(pos, 1), msg->data);
221 }
222 else
223 pci_conf_write16(dev->sbdf, msi_data_reg(pos, 0), msg->data);
224 break;
225 }
226 case PCI_CAP_ID_MSIX:
227 {
228 void __iomem *base = entry->mask_base;
229
230 if ( unlikely(!msix_memory_decoded(entry->dev,
231 entry->msi_attrib.pos)) )
232 return -ENXIO;
233 writel(msg->address_lo,
234 base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
235 writel(msg->address_hi,
236 base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
237 writel(msg->data, base + PCI_MSIX_ENTRY_DATA_OFFSET);
238 break;
239 }
240 default:
241 BUG();
242 }
243
244 return 0;
245 }
246
set_msi_affinity(struct irq_desc * desc,const cpumask_t * mask)247 void set_msi_affinity(struct irq_desc *desc, const cpumask_t *mask)
248 {
249 struct msi_msg msg;
250 unsigned int dest;
251 struct msi_desc *msi_desc = desc->msi_desc;
252
253 dest = set_desc_affinity(desc, mask);
254 if ( dest == BAD_APICID || !msi_desc )
255 return;
256
257 ASSERT(spin_is_locked(&desc->lock));
258
259 msg = msi_desc->msg;
260 msg.data &= ~MSI_DATA_VECTOR_MASK;
261 msg.data |= MSI_DATA_VECTOR(desc->arch.vector);
262 msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
263 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
264 msg.dest32 = dest;
265
266 write_msi_msg(msi_desc, &msg);
267 }
268
__msi_set_enable(u16 seg,u8 bus,u8 slot,u8 func,int pos,int enable)269 void __msi_set_enable(u16 seg, u8 bus, u8 slot, u8 func, int pos, int enable)
270 {
271 uint16_t control = pci_conf_read16(PCI_SBDF(seg, bus, slot, func),
272 pos + PCI_MSI_FLAGS);
273
274 control &= ~PCI_MSI_FLAGS_ENABLE;
275 if ( enable )
276 control |= PCI_MSI_FLAGS_ENABLE;
277 pci_conf_write16(PCI_SBDF(seg, bus, slot, func),
278 pos + PCI_MSI_FLAGS, control);
279 }
280
msi_set_enable(struct pci_dev * dev,int enable)281 static void msi_set_enable(struct pci_dev *dev, int enable)
282 {
283 int pos;
284 u16 seg = dev->seg;
285 u8 bus = dev->bus;
286 u8 slot = PCI_SLOT(dev->devfn);
287 u8 func = PCI_FUNC(dev->devfn);
288
289 pos = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSI);
290 if ( pos )
291 __msi_set_enable(seg, bus, slot, func, pos, enable);
292 }
293
msix_set_enable(struct pci_dev * dev,int enable)294 static void msix_set_enable(struct pci_dev *dev, int enable)
295 {
296 int pos;
297 u16 control, seg = dev->seg;
298 u8 bus = dev->bus;
299 u8 slot = PCI_SLOT(dev->devfn);
300 u8 func = PCI_FUNC(dev->devfn);
301
302 pos = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSIX);
303 if ( pos )
304 {
305 control = pci_conf_read16(dev->sbdf, msix_control_reg(pos));
306 control &= ~PCI_MSIX_FLAGS_ENABLE;
307 if ( enable )
308 control |= PCI_MSIX_FLAGS_ENABLE;
309 pci_conf_write16(dev->sbdf, msix_control_reg(pos), control);
310 }
311 }
312
msi_maskable_irq(const struct msi_desc * entry)313 int msi_maskable_irq(const struct msi_desc *entry)
314 {
315 BUG_ON(!entry);
316 return entry->msi_attrib.type != PCI_CAP_ID_MSI
317 || entry->msi_attrib.maskbit;
318 }
319
msi_set_mask_bit(struct irq_desc * desc,bool host,bool guest)320 static bool msi_set_mask_bit(struct irq_desc *desc, bool host, bool guest)
321 {
322 struct msi_desc *entry = desc->msi_desc;
323 struct pci_dev *pdev;
324 u16 seg, control;
325 u8 bus, slot, func;
326 bool flag = host || guest, maskall;
327
328 ASSERT(spin_is_locked(&desc->lock));
329 BUG_ON(!entry || !entry->dev);
330 pdev = entry->dev;
331 seg = pdev->seg;
332 bus = pdev->bus;
333 slot = PCI_SLOT(pdev->devfn);
334 func = PCI_FUNC(pdev->devfn);
335 switch ( entry->msi_attrib.type )
336 {
337 case PCI_CAP_ID_MSI:
338 if ( entry->msi_attrib.maskbit )
339 {
340 u32 mask_bits;
341
342 mask_bits = pci_conf_read32(pdev->sbdf, entry->msi.mpos);
343 mask_bits &= ~((u32)1 << entry->msi_attrib.entry_nr);
344 mask_bits |= (u32)flag << entry->msi_attrib.entry_nr;
345 pci_conf_write32(pdev->sbdf, entry->msi.mpos, mask_bits);
346 }
347 break;
348 case PCI_CAP_ID_MSIX:
349 maskall = pdev->msix->host_maskall;
350 control = pci_conf_read16(pdev->sbdf,
351 msix_control_reg(entry->msi_attrib.pos));
352 if ( unlikely(!(control & PCI_MSIX_FLAGS_ENABLE)) )
353 {
354 pdev->msix->host_maskall = 1;
355 pci_conf_write16(pdev->sbdf,
356 msix_control_reg(entry->msi_attrib.pos),
357 control | (PCI_MSIX_FLAGS_ENABLE |
358 PCI_MSIX_FLAGS_MASKALL));
359 }
360 if ( likely(memory_decoded(pdev)) )
361 {
362 writel(flag, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
363 readl(entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
364
365 if ( likely(control & PCI_MSIX_FLAGS_ENABLE) )
366 break;
367
368 entry->msi_attrib.host_masked = host;
369 entry->msi_attrib.guest_masked = guest;
370
371 flag = true;
372 }
373 else if ( flag && !(control & PCI_MSIX_FLAGS_MASKALL) )
374 {
375 domid_t domid = pdev->domain->domain_id;
376
377 maskall = true;
378 if ( pdev->msix->warned != domid )
379 {
380 pdev->msix->warned = domid;
381 printk(XENLOG_G_WARNING
382 "cannot mask IRQ %d: masking MSI-X on Dom%d's %04x:%02x:%02x.%u\n",
383 desc->irq, domid, seg, bus, slot, func);
384 }
385 }
386 pdev->msix->host_maskall = maskall;
387 if ( maskall || pdev->msix->guest_maskall )
388 control |= PCI_MSIX_FLAGS_MASKALL;
389 pci_conf_write16(pdev->sbdf,
390 msix_control_reg(entry->msi_attrib.pos), control);
391 return flag;
392 default:
393 return 0;
394 }
395 entry->msi_attrib.host_masked = host;
396 entry->msi_attrib.guest_masked = guest;
397
398 return 1;
399 }
400
msi_get_mask_bit(const struct msi_desc * entry)401 static int msi_get_mask_bit(const struct msi_desc *entry)
402 {
403 if ( !entry->dev )
404 return -1;
405
406 switch ( entry->msi_attrib.type )
407 {
408 case PCI_CAP_ID_MSI:
409 if ( !entry->msi_attrib.maskbit )
410 break;
411 return (pci_conf_read32(entry->dev->sbdf, entry->msi.mpos) >>
412 entry->msi_attrib.entry_nr) & 1;
413 case PCI_CAP_ID_MSIX:
414 if ( unlikely(!msix_memory_decoded(entry->dev,
415 entry->msi_attrib.pos)) )
416 break;
417 return readl(entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET) & 1;
418 }
419 return -1;
420 }
421
mask_msi_irq(struct irq_desc * desc)422 void mask_msi_irq(struct irq_desc *desc)
423 {
424 if ( unlikely(!msi_set_mask_bit(desc, 1,
425 desc->msi_desc->msi_attrib.guest_masked)) )
426 BUG_ON(!(desc->status & IRQ_DISABLED));
427 }
428
unmask_msi_irq(struct irq_desc * desc)429 void unmask_msi_irq(struct irq_desc *desc)
430 {
431 if ( unlikely(!msi_set_mask_bit(desc, 0,
432 desc->msi_desc->msi_attrib.guest_masked)) )
433 WARN();
434 }
435
guest_mask_msi_irq(struct irq_desc * desc,bool mask)436 void guest_mask_msi_irq(struct irq_desc *desc, bool mask)
437 {
438 msi_set_mask_bit(desc, desc->msi_desc->msi_attrib.host_masked, mask);
439 }
440
startup_msi_irq(struct irq_desc * desc)441 static unsigned int startup_msi_irq(struct irq_desc *desc)
442 {
443 if ( unlikely(!msi_set_mask_bit(desc, 0, !!(desc->status & IRQ_GUEST))) )
444 WARN();
445 return 0;
446 }
447
shutdown_msi_irq(struct irq_desc * desc)448 static void shutdown_msi_irq(struct irq_desc *desc)
449 {
450 if ( unlikely(!msi_set_mask_bit(desc, 1, 1)) )
451 BUG_ON(!(desc->status & IRQ_DISABLED));
452 }
453
ack_nonmaskable_msi_irq(struct irq_desc * desc)454 void ack_nonmaskable_msi_irq(struct irq_desc *desc)
455 {
456 irq_complete_move(desc);
457 move_native_irq(desc);
458 }
459
ack_maskable_msi_irq(struct irq_desc * desc)460 static void ack_maskable_msi_irq(struct irq_desc *desc)
461 {
462 ack_nonmaskable_msi_irq(desc);
463 ack_APIC_irq(); /* ACKTYPE_NONE */
464 }
465
466 /*
467 * IRQ chip for MSI PCI/PCI-X/PCI-Express devices,
468 * which implement the MSI or MSI-X capability structure.
469 */
470 static hw_irq_controller pci_msi_maskable = {
471 .typename = "PCI-MSI/-X",
472 .startup = startup_msi_irq,
473 .shutdown = shutdown_msi_irq,
474 .enable = unmask_msi_irq,
475 .disable = mask_msi_irq,
476 .ack = ack_maskable_msi_irq,
477 .set_affinity = set_msi_affinity
478 };
479
480 /* As above, but without having masking capability. */
481 static hw_irq_controller pci_msi_nonmaskable = {
482 .typename = "PCI-MSI",
483 .startup = irq_startup_none,
484 .shutdown = irq_shutdown_none,
485 .enable = irq_enable_none,
486 .disable = irq_disable_none,
487 .ack = ack_nonmaskable_msi_irq,
488 .end = end_nonmaskable_irq,
489 .set_affinity = set_msi_affinity
490 };
491
alloc_msi_entry(unsigned int nr)492 static struct msi_desc *alloc_msi_entry(unsigned int nr)
493 {
494 struct msi_desc *entry;
495
496 entry = xmalloc_array(struct msi_desc, nr);
497 if ( !entry )
498 return NULL;
499
500 INIT_LIST_HEAD(&entry->list);
501 while ( nr-- )
502 {
503 entry[nr].dev = NULL;
504 entry[nr].irq = -1;
505 entry[nr].remap_index = -1;
506 entry[nr].pi_desc = NULL;
507 entry[nr].irte_initialized = false;
508 }
509
510 return entry;
511 }
512
setup_msi_irq(struct irq_desc * desc,struct msi_desc * msidesc)513 int setup_msi_irq(struct irq_desc *desc, struct msi_desc *msidesc)
514 {
515 const struct pci_dev *pdev = msidesc->dev;
516 unsigned int cpos = msix_control_reg(msidesc->msi_attrib.pos);
517 u16 control = ~0;
518 int rc;
519
520 if ( msidesc->msi_attrib.type == PCI_CAP_ID_MSIX )
521 {
522 control = pci_conf_read16(pdev->sbdf, cpos);
523 if ( !(control & PCI_MSIX_FLAGS_ENABLE) )
524 pci_conf_write16(pdev->sbdf, cpos,
525 control | (PCI_MSIX_FLAGS_ENABLE |
526 PCI_MSIX_FLAGS_MASKALL));
527 }
528
529 rc = __setup_msi_irq(desc, msidesc,
530 msi_maskable_irq(msidesc) ? &pci_msi_maskable
531 : &pci_msi_nonmaskable);
532
533 if ( !(control & PCI_MSIX_FLAGS_ENABLE) )
534 pci_conf_write16(pdev->sbdf, cpos, control);
535
536 return rc;
537 }
538
__setup_msi_irq(struct irq_desc * desc,struct msi_desc * msidesc,hw_irq_controller * handler)539 int __setup_msi_irq(struct irq_desc *desc, struct msi_desc *msidesc,
540 hw_irq_controller *handler)
541 {
542 struct msi_msg msg;
543 int ret;
544
545 desc->msi_desc = msidesc;
546 desc->handler = handler;
547 msi_compose_msg(desc->arch.vector, desc->arch.cpu_mask, &msg);
548 ret = write_msi_msg(msidesc, &msg);
549 if ( unlikely(ret) )
550 {
551 desc->handler = &no_irq_type;
552 desc->msi_desc = NULL;
553 }
554
555 return ret;
556 }
557
msi_free_irq(struct msi_desc * entry)558 int msi_free_irq(struct msi_desc *entry)
559 {
560 unsigned int nr = entry->msi_attrib.type != PCI_CAP_ID_MSIX
561 ? entry->msi.nvec : 1;
562
563 while ( nr-- )
564 {
565 if ( entry[nr].irq >= 0 )
566 destroy_irq(entry[nr].irq);
567
568 /* Free the unused IRTE if intr remap enabled */
569 if ( iommu_intremap )
570 iommu_update_ire_from_msi(entry + nr, NULL);
571 }
572
573 if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
574 msix_put_fixmap(entry->dev->msix,
575 virt_to_fix((unsigned long)entry->mask_base));
576
577 list_del(&entry->list);
578 xfree(entry);
579 return 0;
580 }
581
find_msi_entry(struct pci_dev * dev,int irq,int cap_id)582 static struct msi_desc *find_msi_entry(struct pci_dev *dev,
583 int irq, int cap_id)
584 {
585 struct msi_desc *entry;
586
587 list_for_each_entry( entry, &dev->msi_list, list )
588 {
589 if ( entry->msi_attrib.type == cap_id &&
590 (irq == -1 || entry->irq == irq) )
591 return entry;
592 }
593
594 return NULL;
595 }
596
597 /**
598 * msi_capability_init - configure device's MSI capability structure
599 * @dev: pointer to the pci_dev data structure of MSI device function
600 *
601 * Setup the MSI capability structure of device function with a single
602 * MSI irq, regardless of device function is capable of handling
603 * multiple messages. A return of zero indicates the successful setup
604 * of an entry zero with the new MSI irq or non-zero for otherwise.
605 **/
msi_capability_init(struct pci_dev * dev,int irq,struct msi_desc ** desc,unsigned int nvec)606 static int msi_capability_init(struct pci_dev *dev,
607 int irq,
608 struct msi_desc **desc,
609 unsigned int nvec)
610 {
611 struct msi_desc *entry;
612 int pos;
613 unsigned int i, mpos;
614 u16 control, seg = dev->seg;
615 u8 bus = dev->bus;
616 u8 slot = PCI_SLOT(dev->devfn);
617 u8 func = PCI_FUNC(dev->devfn);
618
619 ASSERT(pcidevs_locked());
620 pos = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSI);
621 if ( !pos )
622 return -ENODEV;
623 control = pci_conf_read16(dev->sbdf, msi_control_reg(pos));
624 if ( nvec > dev->msi_maxvec )
625 return dev->msi_maxvec;
626 control &= ~PCI_MSI_FLAGS_QSIZE;
627 multi_msi_enable(control, nvec);
628
629 /* MSI Entry Initialization */
630 msi_set_enable(dev, 0); /* Ensure msi is disabled as I set it up */
631
632 entry = alloc_msi_entry(nvec);
633 if ( !entry )
634 return -ENOMEM;
635
636 mpos = msi_mask_bits_reg(pos, is_64bit_address(control));
637 for ( i = 0; i < nvec; ++i )
638 {
639 entry[i].msi_attrib.type = PCI_CAP_ID_MSI;
640 entry[i].msi_attrib.is_64 = is_64bit_address(control);
641 entry[i].msi_attrib.entry_nr = i;
642 entry[i].msi_attrib.host_masked =
643 entry[i].msi_attrib.maskbit = is_mask_bit_support(control);
644 entry[i].msi_attrib.guest_masked = 0;
645 entry[i].msi_attrib.pos = pos;
646 if ( entry[i].msi_attrib.maskbit )
647 entry[i].msi.mpos = mpos;
648 entry[i].msi.nvec = 0;
649 entry[i].dev = dev;
650 }
651 entry->msi.nvec = nvec;
652 entry->irq = irq;
653 if ( entry->msi_attrib.maskbit )
654 {
655 u32 maskbits;
656
657 /* All MSIs are unmasked by default, Mask them all */
658 maskbits = pci_conf_read32(dev->sbdf, mpos);
659 maskbits |= ~(uint32_t)0 >> (32 - dev->msi_maxvec);
660 pci_conf_write32(dev->sbdf, mpos, maskbits);
661 }
662 list_add_tail(&entry->list, &dev->msi_list);
663
664 *desc = entry;
665 /* Restore the original MSI enabled bits */
666 if ( !hardware_domain )
667 {
668 /*
669 * ..., except for internal requests (before Dom0 starts), in which
670 * case we rather need to behave "normally", i.e. not follow the split
671 * brain model where Dom0 actually enables MSI (and disables INTx).
672 */
673 pci_intx(dev, false);
674 control |= PCI_MSI_FLAGS_ENABLE;
675 }
676 pci_conf_write16(dev->sbdf, msi_control_reg(pos), control);
677
678 return 0;
679 }
680
read_pci_mem_bar(u16 seg,u8 bus,u8 slot,u8 func,u8 bir,int vf)681 static u64 read_pci_mem_bar(u16 seg, u8 bus, u8 slot, u8 func, u8 bir, int vf)
682 {
683 u8 limit;
684 u32 addr, base = PCI_BASE_ADDRESS_0;
685 u64 disp = 0;
686
687 if ( vf >= 0 )
688 {
689 struct pci_dev *pdev = pci_get_pdev(seg, bus, PCI_DEVFN(slot, func));
690 unsigned int pos = pci_find_ext_capability(seg, bus,
691 PCI_DEVFN(slot, func),
692 PCI_EXT_CAP_ID_SRIOV);
693 uint16_t ctrl = pci_conf_read16(PCI_SBDF(seg, bus, slot, func),
694 pos + PCI_SRIOV_CTRL);
695 uint16_t num_vf = pci_conf_read16(PCI_SBDF(seg, bus, slot, func),
696 pos + PCI_SRIOV_NUM_VF);
697 uint16_t offset = pci_conf_read16(PCI_SBDF(seg, bus, slot, func),
698 pos + PCI_SRIOV_VF_OFFSET);
699 uint16_t stride = pci_conf_read16(PCI_SBDF(seg, bus, slot, func),
700 pos + PCI_SRIOV_VF_STRIDE);
701
702 if ( !pdev || !pos ||
703 !(ctrl & PCI_SRIOV_CTRL_VFE) ||
704 !(ctrl & PCI_SRIOV_CTRL_MSE) ||
705 !num_vf || !offset || (num_vf > 1 && !stride) ||
706 bir >= PCI_SRIOV_NUM_BARS ||
707 !pdev->vf_rlen[bir] )
708 return 0;
709 base = pos + PCI_SRIOV_BAR;
710 vf -= PCI_BDF(bus, slot, func) + offset;
711 if ( vf < 0 )
712 return 0;
713 if ( stride )
714 {
715 if ( vf % stride )
716 return 0;
717 vf /= stride;
718 }
719 if ( vf >= num_vf )
720 return 0;
721 BUILD_BUG_ON(ARRAY_SIZE(pdev->vf_rlen) != PCI_SRIOV_NUM_BARS);
722 disp = vf * pdev->vf_rlen[bir];
723 limit = PCI_SRIOV_NUM_BARS;
724 }
725 else switch ( pci_conf_read8(PCI_SBDF(seg, bus, slot, func),
726 PCI_HEADER_TYPE) & 0x7f )
727 {
728 case PCI_HEADER_TYPE_NORMAL:
729 limit = 6;
730 break;
731 case PCI_HEADER_TYPE_BRIDGE:
732 limit = 2;
733 break;
734 case PCI_HEADER_TYPE_CARDBUS:
735 limit = 1;
736 break;
737 default:
738 return 0;
739 }
740
741 if ( bir >= limit )
742 return 0;
743 addr = pci_conf_read32(PCI_SBDF(seg, bus, slot, func), base + bir * 4);
744 if ( (addr & PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_IO )
745 return 0;
746 if ( (addr & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == PCI_BASE_ADDRESS_MEM_TYPE_64 )
747 {
748 addr &= PCI_BASE_ADDRESS_MEM_MASK;
749 if ( ++bir >= limit )
750 return 0;
751 return addr + disp +
752 ((uint64_t)pci_conf_read32(PCI_SBDF(seg, bus, slot, func),
753 base + bir * 4) << 32);
754 }
755 return (addr & PCI_BASE_ADDRESS_MEM_MASK) + disp;
756 }
757
758 /**
759 * msix_capability_init - configure device's MSI-X capability
760 * @dev: pointer to the pci_dev data structure of MSI-X device function
761 * @entries: pointer to an array of struct msix_entry entries
762 * @nvec: number of @entries
763 *
764 * Setup the MSI-X capability structure of device function with the requested
765 * number MSI-X irqs. A return of zero indicates the successful setup of
766 * requested MSI-X entries with allocated irqs or non-zero for otherwise.
767 **/
msix_capability_init(struct pci_dev * dev,struct msi_info * msi,struct msi_desc ** desc)768 static int msix_capability_init(struct pci_dev *dev,
769 struct msi_info *msi,
770 struct msi_desc **desc)
771 {
772 struct arch_msix *msix = dev->msix;
773 struct msi_desc *entry = NULL;
774 u16 control;
775 u64 table_paddr;
776 u32 table_offset;
777 u16 seg = dev->seg;
778 u8 bus = dev->bus;
779 u8 slot = PCI_SLOT(dev->devfn);
780 u8 func = PCI_FUNC(dev->devfn);
781 bool maskall = msix->host_maskall, zap_on_error = false;
782 unsigned int pos = pci_find_cap_offset(seg, bus, slot, func,
783 PCI_CAP_ID_MSIX);
784
785 if ( !pos )
786 return -ENODEV;
787
788 ASSERT(pcidevs_locked());
789
790 control = pci_conf_read16(dev->sbdf, msix_control_reg(pos));
791 /*
792 * Ensure MSI-X interrupts are masked during setup. Some devices require
793 * MSI-X to be enabled before we can touch the MSI-X registers. We need
794 * to mask all the vectors to prevent interrupts coming in before they're
795 * fully set up.
796 */
797 msix->host_maskall = 1;
798 pci_conf_write16(dev->sbdf, msix_control_reg(pos),
799 control | (PCI_MSIX_FLAGS_ENABLE |
800 PCI_MSIX_FLAGS_MASKALL));
801
802 if ( unlikely(!memory_decoded(dev)) )
803 {
804 pci_conf_write16(dev->sbdf, msix_control_reg(pos),
805 control & ~PCI_MSIX_FLAGS_ENABLE);
806 return -ENXIO;
807 }
808
809 if ( desc )
810 {
811 entry = alloc_msi_entry(1);
812 if ( !entry )
813 {
814 pci_conf_write16(dev->sbdf, msix_control_reg(pos),
815 control & ~PCI_MSIX_FLAGS_ENABLE);
816 return -ENOMEM;
817 }
818 ASSERT(msi);
819 }
820
821 /* Locate MSI-X table region */
822 table_offset = pci_conf_read32(dev->sbdf, msix_table_offset_reg(pos));
823 if ( !msix->used_entries &&
824 (!msi ||
825 (is_hardware_domain(current->domain) &&
826 (dev->domain == current->domain || dev->domain == dom_io))) )
827 {
828 unsigned int bir = table_offset & PCI_MSIX_BIRMASK, pbus, pslot, pfunc;
829 int vf;
830 paddr_t pba_paddr;
831 unsigned int pba_offset;
832
833 if ( !dev->info.is_virtfn )
834 {
835 pbus = bus;
836 pslot = slot;
837 pfunc = func;
838 vf = -1;
839 }
840 else
841 {
842 pbus = dev->info.physfn.bus;
843 pslot = PCI_SLOT(dev->info.physfn.devfn);
844 pfunc = PCI_FUNC(dev->info.physfn.devfn);
845 vf = PCI_BDF2(dev->bus, dev->devfn);
846 }
847
848 table_paddr = read_pci_mem_bar(seg, pbus, pslot, pfunc, bir, vf);
849 WARN_ON(msi && msi->table_base != table_paddr);
850 if ( !table_paddr )
851 {
852 if ( !msi || !msi->table_base )
853 {
854 pci_conf_write16(dev->sbdf, msix_control_reg(pos),
855 control & ~PCI_MSIX_FLAGS_ENABLE);
856 xfree(entry);
857 return -ENXIO;
858 }
859 table_paddr = msi->table_base;
860 }
861 table_paddr += table_offset & ~PCI_MSIX_BIRMASK;
862
863 msix->table.first = PFN_DOWN(table_paddr);
864 msix->table.last = PFN_DOWN(table_paddr +
865 msix->nr_entries * PCI_MSIX_ENTRY_SIZE - 1);
866 WARN_ON(rangeset_overlaps_range(mmio_ro_ranges, msix->table.first,
867 msix->table.last));
868
869 pba_offset = pci_conf_read32(dev->sbdf, msix_pba_offset_reg(pos));
870 bir = (u8)(pba_offset & PCI_MSIX_BIRMASK);
871 pba_paddr = read_pci_mem_bar(seg, pbus, pslot, pfunc, bir, vf);
872 WARN_ON(!pba_paddr);
873 pba_paddr += pba_offset & ~PCI_MSIX_BIRMASK;
874
875 msix->pba.first = PFN_DOWN(pba_paddr);
876 msix->pba.last = PFN_DOWN(pba_paddr +
877 BITS_TO_LONGS(msix->nr_entries) - 1);
878 WARN_ON(rangeset_overlaps_range(mmio_ro_ranges, msix->pba.first,
879 msix->pba.last));
880
881 zap_on_error = true;
882 }
883 else if ( !msix->table.first )
884 {
885 pci_conf_write16(dev->sbdf, msix_control_reg(pos), control);
886 xfree(entry);
887 return -ENODATA;
888 }
889 else
890 table_paddr = (msix->table.first << PAGE_SHIFT) +
891 PAGE_OFFSET(table_offset & ~PCI_MSIX_BIRMASK);
892
893 if ( entry )
894 {
895 /* Map MSI-X table region */
896 u64 entry_paddr = table_paddr + msi->entry_nr * PCI_MSIX_ENTRY_SIZE;
897 int idx = msix_get_fixmap(msix, table_paddr, entry_paddr);
898 void __iomem *base;
899
900 if ( idx < 0 )
901 {
902 if ( zap_on_error )
903 {
904 msix->table.first = 0;
905 msix->pba.first = 0;
906
907 control &= ~PCI_MSIX_FLAGS_ENABLE;
908 }
909
910 pci_conf_write16(dev->sbdf, msix_control_reg(pos), control);
911 xfree(entry);
912 return idx;
913 }
914 base = fix_to_virt(idx) + (entry_paddr & (PAGE_SIZE - 1));
915
916 /* Mask interrupt here */
917 writel(1, base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
918
919 entry->msi_attrib.type = PCI_CAP_ID_MSIX;
920 entry->msi_attrib.is_64 = 1;
921 entry->msi_attrib.entry_nr = msi->entry_nr;
922 entry->msi_attrib.maskbit = 1;
923 entry->msi_attrib.host_masked = 1;
924 entry->msi_attrib.guest_masked = 1;
925 entry->msi_attrib.pos = pos;
926 entry->irq = msi->irq;
927 entry->dev = dev;
928 entry->mask_base = base;
929
930 list_add_tail(&entry->list, &dev->msi_list);
931 *desc = entry;
932 }
933
934 if ( !msix->used_entries )
935 {
936 maskall = false;
937 if ( !msix->guest_maskall )
938 control &= ~PCI_MSIX_FLAGS_MASKALL;
939 else
940 control |= PCI_MSIX_FLAGS_MASKALL;
941
942 if ( rangeset_add_range(mmio_ro_ranges, msix->table.first,
943 msix->table.last) )
944 WARN();
945 if ( rangeset_add_range(mmio_ro_ranges, msix->pba.first,
946 msix->pba.last) )
947 WARN();
948
949 if ( desc )
950 {
951 struct domain *currd = current->domain;
952 struct domain *d = dev->domain ?: currd;
953
954 if ( !is_hardware_domain(currd) || d != currd )
955 printk("%s use of MSI-X on %04x:%02x:%02x.%u by Dom%d\n",
956 is_hardware_domain(currd)
957 ? XENLOG_WARNING "Potentially insecure"
958 : XENLOG_ERR "Insecure",
959 seg, bus, slot, func, d->domain_id);
960 if ( !is_hardware_domain(d) &&
961 /* Assume a domain without memory has no mappings yet. */
962 (!is_hardware_domain(currd) || domain_tot_pages(d)) )
963 domain_crash(d);
964 /* XXX How to deal with existing mappings? */
965 }
966 }
967 WARN_ON(msix->table.first != (table_paddr >> PAGE_SHIFT));
968 ++msix->used_entries;
969
970 /* Restore MSI-X enabled bits */
971 if ( !hardware_domain )
972 {
973 /*
974 * ..., except for internal requests (before Dom0 starts), in which
975 * case we rather need to behave "normally", i.e. not follow the split
976 * brain model where Dom0 actually enables MSI (and disables INTx).
977 */
978 pci_intx(dev, false);
979 control |= PCI_MSIX_FLAGS_ENABLE;
980 control &= ~PCI_MSIX_FLAGS_MASKALL;
981 maskall = 0;
982 }
983 msix->host_maskall = maskall;
984 pci_conf_write16(dev->sbdf, msix_control_reg(pos), control);
985
986 return 0;
987 }
988
989 /**
990 * pci_enable_msi - configure device's MSI capability structure
991 * @dev: pointer to the pci_dev data structure of MSI device function
992 *
993 * Setup the MSI capability structure of device function with
994 * a single MSI irq upon its software driver call to request for
995 * MSI mode enabled on its hardware device function. A return of zero
996 * indicates the successful setup of an entry zero with the new MSI
997 * irq or non-zero for otherwise.
998 **/
999
__pci_enable_msi(struct msi_info * msi,struct msi_desc ** desc)1000 static int __pci_enable_msi(struct msi_info *msi, struct msi_desc **desc)
1001 {
1002 struct pci_dev *pdev;
1003 struct msi_desc *old_desc;
1004
1005 ASSERT(pcidevs_locked());
1006 pdev = pci_get_pdev(msi->seg, msi->bus, msi->devfn);
1007 if ( !pdev )
1008 return -ENODEV;
1009
1010 old_desc = find_msi_entry(pdev, msi->irq, PCI_CAP_ID_MSI);
1011 if ( old_desc )
1012 {
1013 printk(XENLOG_ERR "irq %d already mapped to MSI on %04x:%02x:%02x.%u\n",
1014 msi->irq, msi->seg, msi->bus,
1015 PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn));
1016 return -EEXIST;
1017 }
1018
1019 old_desc = find_msi_entry(pdev, -1, PCI_CAP_ID_MSIX);
1020 if ( old_desc )
1021 {
1022 printk(XENLOG_WARNING "MSI-X already in use on %04x:%02x:%02x.%u\n",
1023 msi->seg, msi->bus,
1024 PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn));
1025 __pci_disable_msix(old_desc);
1026 }
1027
1028 return msi_capability_init(pdev, msi->irq, desc, msi->entry_nr);
1029 }
1030
__pci_disable_msi(struct msi_desc * entry)1031 static void __pci_disable_msi(struct msi_desc *entry)
1032 {
1033 struct pci_dev *dev;
1034
1035 dev = entry->dev;
1036 msi_set_enable(dev, 0);
1037 if ( entry->irq > 0 && !(irq_to_desc(entry->irq)->status & IRQ_GUEST) )
1038 pci_intx(dev, true);
1039
1040 BUG_ON(list_empty(&dev->msi_list));
1041 }
1042
1043 /**
1044 * pci_enable_msix - configure device's MSI-X capability structure
1045 * @dev: pointer to the pci_dev data structure of MSI-X device function
1046 * @entries: pointer to an array of MSI-X entries
1047 * @nvec: number of MSI-X irqs requested for allocation by device driver
1048 *
1049 * Setup the MSI-X capability structure of device function with the number
1050 * of requested irqs upon its software driver call to request for
1051 * MSI-X mode enabled on its hardware device function. A return of zero
1052 * indicates the successful configuration of MSI-X capability structure
1053 * with new allocated MSI-X irqs. A return of < 0 indicates a failure.
1054 * Or a return of > 0 indicates that driver request is exceeding the number
1055 * of irqs available. Driver should use the returned value to re-send
1056 * its request.
1057 **/
__pci_enable_msix(struct msi_info * msi,struct msi_desc ** desc)1058 static int __pci_enable_msix(struct msi_info *msi, struct msi_desc **desc)
1059 {
1060 struct pci_dev *pdev;
1061 u8 slot = PCI_SLOT(msi->devfn);
1062 u8 func = PCI_FUNC(msi->devfn);
1063 struct msi_desc *old_desc;
1064
1065 ASSERT(pcidevs_locked());
1066 pdev = pci_get_pdev(msi->seg, msi->bus, msi->devfn);
1067 if ( !pdev || !pdev->msix )
1068 return -ENODEV;
1069
1070 if ( msi->entry_nr >= pdev->msix->nr_entries )
1071 return -EINVAL;
1072
1073 old_desc = find_msi_entry(pdev, msi->irq, PCI_CAP_ID_MSIX);
1074 if ( old_desc )
1075 {
1076 printk(XENLOG_ERR "irq %d already mapped to MSI-X on %04x:%02x:%02x.%u\n",
1077 msi->irq, msi->seg, msi->bus, slot, func);
1078 return -EEXIST;
1079 }
1080
1081 old_desc = find_msi_entry(pdev, -1, PCI_CAP_ID_MSI);
1082 if ( old_desc )
1083 {
1084 printk(XENLOG_WARNING "MSI already in use on %04x:%02x:%02x.%u\n",
1085 msi->seg, msi->bus, slot, func);
1086 __pci_disable_msi(old_desc);
1087 }
1088
1089 return msix_capability_init(pdev, msi, desc);
1090 }
1091
_pci_cleanup_msix(struct arch_msix * msix)1092 static void _pci_cleanup_msix(struct arch_msix *msix)
1093 {
1094 if ( !--msix->used_entries )
1095 {
1096 if ( rangeset_remove_range(mmio_ro_ranges, msix->table.first,
1097 msix->table.last) )
1098 WARN();
1099 msix->table.first = 0;
1100 msix->table.last = 0;
1101
1102 if ( rangeset_remove_range(mmio_ro_ranges, msix->pba.first,
1103 msix->pba.last) )
1104 WARN();
1105 msix->pba.first = 0;
1106 msix->pba.last = 0;
1107 }
1108 }
1109
__pci_disable_msix(struct msi_desc * entry)1110 static void __pci_disable_msix(struct msi_desc *entry)
1111 {
1112 struct pci_dev *dev = entry->dev;
1113 u16 seg = dev->seg;
1114 u8 bus = dev->bus;
1115 u8 slot = PCI_SLOT(dev->devfn);
1116 u8 func = PCI_FUNC(dev->devfn);
1117 unsigned int pos = pci_find_cap_offset(seg, bus, slot, func,
1118 PCI_CAP_ID_MSIX);
1119 u16 control = pci_conf_read16(dev->sbdf,
1120 msix_control_reg(entry->msi_attrib.pos));
1121 bool maskall = dev->msix->host_maskall;
1122
1123 if ( unlikely(!(control & PCI_MSIX_FLAGS_ENABLE)) )
1124 {
1125 dev->msix->host_maskall = 1;
1126 pci_conf_write16(dev->sbdf, msix_control_reg(pos),
1127 control | (PCI_MSIX_FLAGS_ENABLE |
1128 PCI_MSIX_FLAGS_MASKALL));
1129 }
1130
1131 BUG_ON(list_empty(&dev->msi_list));
1132
1133 if ( likely(memory_decoded(dev)) )
1134 writel(1, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
1135 else if ( !(control & PCI_MSIX_FLAGS_MASKALL) )
1136 {
1137 printk(XENLOG_WARNING
1138 "cannot disable IRQ %d: masking MSI-X on %04x:%02x:%02x.%u\n",
1139 entry->irq, seg, bus, slot, func);
1140 maskall = true;
1141 }
1142 dev->msix->host_maskall = maskall;
1143 if ( maskall || dev->msix->guest_maskall )
1144 control |= PCI_MSIX_FLAGS_MASKALL;
1145 pci_conf_write16(dev->sbdf, msix_control_reg(pos), control);
1146
1147 _pci_cleanup_msix(dev->msix);
1148 }
1149
pci_prepare_msix(u16 seg,u8 bus,u8 devfn,bool off)1150 int pci_prepare_msix(u16 seg, u8 bus, u8 devfn, bool off)
1151 {
1152 int rc;
1153 struct pci_dev *pdev;
1154
1155 if ( !use_msi )
1156 return 0;
1157
1158 pcidevs_lock();
1159 pdev = pci_get_pdev(seg, bus, devfn);
1160 if ( !pdev )
1161 rc = -ENODEV;
1162 else if ( pdev->msix->used_entries != !!off )
1163 rc = -EBUSY;
1164 else if ( off )
1165 {
1166 _pci_cleanup_msix(pdev->msix);
1167 rc = 0;
1168 }
1169 else
1170 rc = msix_capability_init(pdev, NULL, NULL);
1171 pcidevs_unlock();
1172
1173 return rc;
1174 }
1175
1176 /*
1177 * Notice: only construct the msi_desc
1178 * no change to irq_desc here, and the interrupt is masked
1179 */
pci_enable_msi(struct msi_info * msi,struct msi_desc ** desc)1180 int pci_enable_msi(struct msi_info *msi, struct msi_desc **desc)
1181 {
1182 ASSERT(pcidevs_locked());
1183
1184 if ( !use_msi )
1185 return -EPERM;
1186
1187 return msi->table_base ? __pci_enable_msix(msi, desc) :
1188 __pci_enable_msi(msi, desc);
1189 }
1190
1191 /*
1192 * Device only, no irq_desc
1193 */
pci_disable_msi(struct msi_desc * msi_desc)1194 void pci_disable_msi(struct msi_desc *msi_desc)
1195 {
1196 if ( msi_desc->msi_attrib.type == PCI_CAP_ID_MSI )
1197 __pci_disable_msi(msi_desc);
1198 else if ( msi_desc->msi_attrib.type == PCI_CAP_ID_MSIX )
1199 __pci_disable_msix(msi_desc);
1200 }
1201
msi_free_irqs(struct pci_dev * dev)1202 static void msi_free_irqs(struct pci_dev* dev)
1203 {
1204 struct msi_desc *entry, *tmp;
1205
1206 list_for_each_entry_safe( entry, tmp, &dev->msi_list, list )
1207 {
1208 pci_disable_msi(entry);
1209 msi_free_irq(entry);
1210 }
1211 }
1212
pci_cleanup_msi(struct pci_dev * pdev)1213 void pci_cleanup_msi(struct pci_dev *pdev)
1214 {
1215 /* Disable MSI and/or MSI-X */
1216 msi_set_enable(pdev, 0);
1217 msix_set_enable(pdev, 0);
1218 msi_free_irqs(pdev);
1219 }
1220
pci_reset_msix_state(struct pci_dev * pdev)1221 int pci_reset_msix_state(struct pci_dev *pdev)
1222 {
1223 unsigned int pos = pci_find_cap_offset(pdev->seg, pdev->bus, pdev->sbdf.dev,
1224 pdev->sbdf.fn, PCI_CAP_ID_MSIX);
1225
1226 ASSERT(pos);
1227 /*
1228 * Xen expects the device state to be the after reset one, and hence
1229 * host_maskall = guest_maskall = false and all entries should have the
1230 * mask bit set. Test that the maskall bit is not set, having it set could
1231 * signal that the device hasn't been reset properly.
1232 */
1233 if ( pci_conf_read16(pdev->sbdf, msix_control_reg(pos)) &
1234 PCI_MSIX_FLAGS_MASKALL )
1235 return -EBUSY;
1236
1237 pdev->msix->host_maskall = false;
1238 pdev->msix->guest_maskall = false;
1239
1240 return 0;
1241 }
1242
pci_msi_conf_write_intercept(struct pci_dev * pdev,unsigned int reg,unsigned int size,uint32_t * data)1243 int pci_msi_conf_write_intercept(struct pci_dev *pdev, unsigned int reg,
1244 unsigned int size, uint32_t *data)
1245 {
1246 u16 seg = pdev->seg;
1247 u8 bus = pdev->bus;
1248 u8 slot = PCI_SLOT(pdev->devfn);
1249 u8 func = PCI_FUNC(pdev->devfn);
1250 struct msi_desc *entry;
1251 unsigned int pos;
1252
1253 if ( pdev->msix )
1254 {
1255 entry = find_msi_entry(pdev, -1, PCI_CAP_ID_MSIX);
1256 pos = entry ? entry->msi_attrib.pos
1257 : pci_find_cap_offset(seg, bus, slot, func,
1258 PCI_CAP_ID_MSIX);
1259 ASSERT(pos);
1260
1261 if ( reg >= pos && reg < msix_pba_offset_reg(pos) + 4 )
1262 {
1263 if ( reg != msix_control_reg(pos) || size != 2 )
1264 return -EACCES;
1265
1266 pdev->msix->guest_maskall = !!(*data & PCI_MSIX_FLAGS_MASKALL);
1267 if ( pdev->msix->host_maskall )
1268 *data |= PCI_MSIX_FLAGS_MASKALL;
1269
1270 return 1;
1271 }
1272 }
1273
1274 entry = find_msi_entry(pdev, -1, PCI_CAP_ID_MSI);
1275 if ( entry && entry->msi_attrib.maskbit )
1276 {
1277 uint32_t unused;
1278 unsigned int nvec = entry->msi.nvec;
1279
1280 pos = entry->msi_attrib.pos;
1281 if ( reg < pos || reg >= entry->msi.mpos + 8 )
1282 return 0;
1283
1284 if ( reg == msi_control_reg(pos) )
1285 return size == 2 ? 1 : -EACCES;
1286 if ( reg < entry->msi.mpos || reg >= entry->msi.mpos + 4 || size != 4 )
1287 return -EACCES;
1288
1289 unused = ~(uint32_t)0 >> (32 - pdev->msi_maxvec);
1290 for ( pos = 0; pos < nvec; ++pos, ++entry )
1291 {
1292 entry->msi_attrib.guest_masked =
1293 *data >> entry->msi_attrib.entry_nr;
1294 if ( entry->msi_attrib.host_masked )
1295 *data |= 1 << pos;
1296 unused &= ~(1 << pos);
1297 }
1298
1299 *data |= unused;
1300
1301 return 1;
1302 }
1303
1304 return 0;
1305 }
1306
pci_restore_msi_state(struct pci_dev * pdev)1307 int pci_restore_msi_state(struct pci_dev *pdev)
1308 {
1309 unsigned long flags;
1310 int irq;
1311 int ret;
1312 struct msi_desc *entry, *tmp;
1313 struct irq_desc *desc;
1314 struct msi_msg msg;
1315 u8 slot = PCI_SLOT(pdev->devfn), func = PCI_FUNC(pdev->devfn);
1316 unsigned int type = 0, pos = 0;
1317 u16 control = 0;
1318
1319 ASSERT(pcidevs_locked());
1320
1321 if ( !use_msi )
1322 return -EOPNOTSUPP;
1323
1324 ret = xsm_resource_setup_pci(XSM_PRIV,
1325 (pdev->seg << 16) | (pdev->bus << 8) |
1326 pdev->devfn);
1327 if ( ret )
1328 return ret;
1329
1330 list_for_each_entry_safe( entry, tmp, &pdev->msi_list, list )
1331 {
1332 unsigned int i = 0, nr = 1;
1333
1334 irq = entry->irq;
1335 desc = &irq_desc[irq];
1336
1337 spin_lock_irqsave(&desc->lock, flags);
1338
1339 ASSERT(desc->msi_desc == entry);
1340
1341 if (desc->msi_desc != entry)
1342 {
1343 bogus:
1344 dprintk(XENLOG_ERR,
1345 "Restore MSI for %04x:%02x:%02x:%u entry %u not set?\n",
1346 pdev->seg, pdev->bus, slot, func, i);
1347 spin_unlock_irqrestore(&desc->lock, flags);
1348 if ( type == PCI_CAP_ID_MSIX )
1349 pci_conf_write16(pdev->sbdf, msix_control_reg(pos),
1350 control & ~PCI_MSIX_FLAGS_ENABLE);
1351 return -EINVAL;
1352 }
1353
1354 ASSERT(!type || type == entry->msi_attrib.type);
1355 pos = entry->msi_attrib.pos;
1356 if ( entry->msi_attrib.type == PCI_CAP_ID_MSI )
1357 {
1358 msi_set_enable(pdev, 0);
1359 nr = entry->msi.nvec;
1360 }
1361 else if ( !type && entry->msi_attrib.type == PCI_CAP_ID_MSIX )
1362 {
1363 control = pci_conf_read16(pdev->sbdf, msix_control_reg(pos));
1364 pci_conf_write16(pdev->sbdf, msix_control_reg(pos),
1365 control | (PCI_MSIX_FLAGS_ENABLE |
1366 PCI_MSIX_FLAGS_MASKALL));
1367 if ( unlikely(!memory_decoded(pdev)) )
1368 {
1369 spin_unlock_irqrestore(&desc->lock, flags);
1370 pci_conf_write16(pdev->sbdf, msix_control_reg(pos),
1371 control & ~PCI_MSIX_FLAGS_ENABLE);
1372 return -ENXIO;
1373 }
1374 }
1375 type = entry->msi_attrib.type;
1376
1377 msg = entry->msg;
1378 write_msi_msg(entry, &msg);
1379
1380 for ( i = 0; ; )
1381 {
1382 if ( unlikely(!msi_set_mask_bit(desc,
1383 entry[i].msi_attrib.host_masked,
1384 entry[i].msi_attrib.guest_masked)) )
1385 BUG();
1386
1387 if ( !--nr )
1388 break;
1389
1390 spin_unlock_irqrestore(&desc->lock, flags);
1391 desc = &irq_desc[entry[++i].irq];
1392 spin_lock_irqsave(&desc->lock, flags);
1393 if ( desc->msi_desc != entry + i )
1394 goto bogus;
1395 }
1396
1397 spin_unlock_irqrestore(&desc->lock, flags);
1398
1399 if ( type == PCI_CAP_ID_MSI )
1400 {
1401 unsigned int cpos = msi_control_reg(pos);
1402
1403 control = pci_conf_read16(pdev->sbdf, cpos) & ~PCI_MSI_FLAGS_QSIZE;
1404 multi_msi_enable(control, entry->msi.nvec);
1405 pci_conf_write16(pdev->sbdf, cpos, control);
1406
1407 msi_set_enable(pdev, 1);
1408 }
1409 }
1410
1411 if ( type == PCI_CAP_ID_MSIX )
1412 pci_conf_write16(pdev->sbdf, msix_control_reg(pos),
1413 control | PCI_MSIX_FLAGS_ENABLE);
1414
1415 return 0;
1416 }
1417
early_msi_init(void)1418 void __init early_msi_init(void)
1419 {
1420 if ( use_msi < 0 )
1421 use_msi = !(acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_MSI);
1422 if ( !use_msi )
1423 return;
1424 }
1425
dump_msi(unsigned char key)1426 static void dump_msi(unsigned char key)
1427 {
1428 unsigned int irq;
1429
1430 printk("MSI information:\n");
1431
1432 for ( irq = 0; irq < nr_irqs; irq++ )
1433 {
1434 struct irq_desc *desc = irq_to_desc(irq);
1435 const struct msi_desc *entry;
1436 u32 addr, data, dest32;
1437 signed char mask;
1438 struct msi_attrib attr;
1439 unsigned long flags;
1440 const char *type = "???";
1441
1442 if ( !irq_desc_initialized(desc) )
1443 continue;
1444
1445 spin_lock_irqsave(&desc->lock, flags);
1446
1447 entry = desc->msi_desc;
1448 if ( !entry )
1449 {
1450 spin_unlock_irqrestore(&desc->lock, flags);
1451 continue;
1452 }
1453
1454 switch ( entry->msi_attrib.type )
1455 {
1456 case PCI_CAP_ID_MSI: type = "MSI"; break;
1457 case PCI_CAP_ID_MSIX: type = "MSI-X"; break;
1458 case 0:
1459 switch ( entry->msi_attrib.pos )
1460 {
1461 case MSI_TYPE_HPET: type = "HPET"; break;
1462 case MSI_TYPE_IOMMU: type = "IOMMU"; break;
1463 }
1464 break;
1465 }
1466
1467 data = entry->msg.data;
1468 addr = entry->msg.address_lo;
1469 dest32 = entry->msg.dest32;
1470 attr = entry->msi_attrib;
1471 if ( entry->msi_attrib.type )
1472 mask = msi_get_mask_bit(entry);
1473 else
1474 mask = -1;
1475
1476 spin_unlock_irqrestore(&desc->lock, flags);
1477
1478 if ( mask >= 0 )
1479 mask += '0';
1480 else
1481 mask = '?';
1482 printk(" %-6s%4u vec=%02x%7s%6s%3sassert%5s%7s"
1483 " dest=%08x mask=%d/%c%c/%c\n",
1484 type, irq,
1485 (data & MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT,
1486 data & MSI_DATA_DELIVERY_LOWPRI ? "lowest" : "fixed",
1487 data & MSI_DATA_TRIGGER_LEVEL ? "level" : "edge",
1488 data & MSI_DATA_LEVEL_ASSERT ? "" : "de",
1489 addr & MSI_ADDR_DESTMODE_LOGIC ? "log" : "phys",
1490 addr & MSI_ADDR_REDIRECTION_LOWPRI ? "lowest" : "cpu",
1491 dest32, attr.maskbit,
1492 attr.host_masked ? 'H' : ' ',
1493 attr.guest_masked ? 'G' : ' ',
1494 mask);
1495 }
1496
1497 vpci_dump_msi();
1498 }
1499
msi_setup_keyhandler(void)1500 static int __init msi_setup_keyhandler(void)
1501 {
1502 register_keyhandler('M', dump_msi, "dump MSI state", 1);
1503 return 0;
1504 }
1505 __initcall(msi_setup_keyhandler);
1506