1 /*
2  * File:    msi.c
3  * Purpose: PCI Message Signaled Interrupt (MSI)
4  *
5  * Copyright (C) 2003-2004 Intel
6  * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com)
7  */
8 
9 #include <xen/lib.h>
10 #include <xen/init.h>
11 #include <xen/irq.h>
12 #include <xen/delay.h>
13 #include <xen/sched.h>
14 #include <xen/acpi.h>
15 #include <xen/cpu.h>
16 #include <xen/errno.h>
17 #include <xen/param.h>
18 #include <xen/pci.h>
19 #include <xen/pci_regs.h>
20 #include <xen/iocap.h>
21 #include <xen/keyhandler.h>
22 #include <xen/pfn.h>
23 #include <asm/io.h>
24 #include <asm/smp.h>
25 #include <asm/desc.h>
26 #include <asm/msi.h>
27 #include <asm/fixmap.h>
28 #include <asm/p2m.h>
29 #include <mach_apic.h>
30 #include <io_ports.h>
31 #include <irq_vectors.h>
32 #include <public/physdev.h>
33 #include <xen/iommu.h>
34 #include <xsm/xsm.h>
35 #include <xen/vpci.h>
36 
37 static s8 __read_mostly use_msi = -1;
38 boolean_param("msi", use_msi);
39 
40 static void __pci_disable_msix(struct msi_desc *);
41 
42 /* bitmap indicate which fixed map is free */
43 static DEFINE_SPINLOCK(msix_fixmap_lock);
44 static DECLARE_BITMAP(msix_fixmap_pages, FIX_MSIX_MAX_PAGES);
45 
msix_fixmap_alloc(void)46 static int msix_fixmap_alloc(void)
47 {
48     int i, rc = -ENOMEM;
49 
50     spin_lock(&msix_fixmap_lock);
51     for ( i = 0; i < FIX_MSIX_MAX_PAGES; i++ )
52         if ( !test_bit(i, &msix_fixmap_pages) )
53             break;
54     if ( i == FIX_MSIX_MAX_PAGES )
55         goto out;
56     rc = FIX_MSIX_IO_RESERV_BASE + i;
57     set_bit(i, &msix_fixmap_pages);
58 
59  out:
60     spin_unlock(&msix_fixmap_lock);
61     return rc;
62 }
63 
msix_fixmap_free(int idx)64 static void msix_fixmap_free(int idx)
65 {
66     spin_lock(&msix_fixmap_lock);
67     if ( idx >= FIX_MSIX_IO_RESERV_BASE )
68         clear_bit(idx - FIX_MSIX_IO_RESERV_BASE, &msix_fixmap_pages);
69     spin_unlock(&msix_fixmap_lock);
70 }
71 
msix_get_fixmap(struct arch_msix * msix,u64 table_paddr,u64 entry_paddr)72 static int msix_get_fixmap(struct arch_msix *msix, u64 table_paddr,
73                            u64 entry_paddr)
74 {
75     long nr_page;
76     int idx;
77 
78     nr_page = (entry_paddr >> PAGE_SHIFT) - (table_paddr >> PAGE_SHIFT);
79 
80     if ( nr_page < 0 || nr_page >= MAX_MSIX_TABLE_PAGES )
81         return -EINVAL;
82 
83     spin_lock(&msix->table_lock);
84     if ( msix->table_refcnt[nr_page]++ == 0 )
85     {
86         idx = msix_fixmap_alloc();
87         if ( idx < 0 )
88         {
89             msix->table_refcnt[nr_page]--;
90             goto out;
91         }
92         set_fixmap_nocache(idx, entry_paddr);
93         msix->table_idx[nr_page] = idx;
94     }
95     else
96         idx = msix->table_idx[nr_page];
97 
98  out:
99     spin_unlock(&msix->table_lock);
100     return idx;
101 }
102 
msix_put_fixmap(struct arch_msix * msix,int idx)103 static void msix_put_fixmap(struct arch_msix *msix, int idx)
104 {
105     int i;
106 
107     spin_lock(&msix->table_lock);
108     for ( i = 0; i < MAX_MSIX_TABLE_PAGES; i++ )
109     {
110         if ( msix->table_idx[i] == idx )
111             break;
112     }
113     if ( i == MAX_MSIX_TABLE_PAGES )
114         goto out;
115 
116     if ( --msix->table_refcnt[i] == 0 )
117     {
118         clear_fixmap(idx);
119         msix_fixmap_free(idx);
120         msix->table_idx[i] = 0;
121     }
122 
123  out:
124     spin_unlock(&msix->table_lock);
125 }
126 
memory_decoded(const struct pci_dev * dev)127 static bool memory_decoded(const struct pci_dev *dev)
128 {
129     pci_sbdf_t sbdf = dev->sbdf;
130 
131     if ( dev->info.is_virtfn )
132     {
133         sbdf.bus = dev->info.physfn.bus;
134         sbdf.devfn = dev->info.physfn.devfn;
135     }
136 
137     return pci_conf_read16(sbdf, PCI_COMMAND) & PCI_COMMAND_MEMORY;
138 }
139 
msix_memory_decoded(const struct pci_dev * dev,unsigned int pos)140 static bool msix_memory_decoded(const struct pci_dev *dev, unsigned int pos)
141 {
142     uint16_t control = pci_conf_read16(dev->sbdf, msix_control_reg(pos));
143 
144     if ( !(control & PCI_MSIX_FLAGS_ENABLE) )
145         return false;
146 
147     return memory_decoded(dev);
148 }
149 
150 /*
151  * MSI message composition
152  */
msi_compose_msg(unsigned vector,const cpumask_t * cpu_mask,struct msi_msg * msg)153 void msi_compose_msg(unsigned vector, const cpumask_t *cpu_mask, struct msi_msg *msg)
154 {
155     memset(msg, 0, sizeof(*msg));
156 
157     if ( vector < FIRST_DYNAMIC_VECTOR )
158         return;
159 
160     if ( cpu_mask )
161     {
162         cpumask_t *mask = this_cpu(scratch_cpumask);
163 
164         if ( !cpumask_intersects(cpu_mask, &cpu_online_map) )
165             return;
166 
167         cpumask_and(mask, cpu_mask, &cpu_online_map);
168         msg->dest32 = cpu_mask_to_apicid(mask);
169     }
170 
171     msg->address_hi = MSI_ADDR_BASE_HI;
172     msg->address_lo = MSI_ADDR_BASE_LO |
173                       (INT_DEST_MODE ? MSI_ADDR_DESTMODE_LOGIC
174                                      : MSI_ADDR_DESTMODE_PHYS) |
175                       ((INT_DELIVERY_MODE != dest_LowestPrio)
176                        ? MSI_ADDR_REDIRECTION_CPU
177                        : MSI_ADDR_REDIRECTION_LOWPRI) |
178                       MSI_ADDR_DEST_ID(msg->dest32);
179 
180     msg->data = MSI_DATA_TRIGGER_EDGE |
181                 MSI_DATA_LEVEL_ASSERT |
182                 ((INT_DELIVERY_MODE != dest_LowestPrio)
183                  ? MSI_DATA_DELIVERY_FIXED
184                  : MSI_DATA_DELIVERY_LOWPRI) |
185                 MSI_DATA_VECTOR(vector);
186 }
187 
write_msi_msg(struct msi_desc * entry,struct msi_msg * msg)188 static int write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
189 {
190     entry->msg = *msg;
191 
192     if ( iommu_intremap )
193     {
194         int rc;
195 
196         ASSERT(msg != &entry->msg);
197         rc = iommu_update_ire_from_msi(entry, msg);
198         if ( rc )
199             return rc;
200     }
201 
202     switch ( entry->msi_attrib.type )
203     {
204     case PCI_CAP_ID_MSI:
205     {
206         struct pci_dev *dev = entry->dev;
207         int pos = entry->msi_attrib.pos;
208         int nr = entry->msi_attrib.entry_nr;
209 
210         ASSERT((msg->data & (entry[-nr].msi.nvec - 1)) == nr);
211         if ( nr )
212             return 0;
213 
214         pci_conf_write32(dev->sbdf, msi_lower_address_reg(pos),
215                          msg->address_lo);
216         if ( entry->msi_attrib.is_64 )
217         {
218             pci_conf_write32(dev->sbdf, msi_upper_address_reg(pos),
219                              msg->address_hi);
220             pci_conf_write16(dev->sbdf, msi_data_reg(pos, 1), msg->data);
221         }
222         else
223             pci_conf_write16(dev->sbdf, msi_data_reg(pos, 0), msg->data);
224         break;
225     }
226     case PCI_CAP_ID_MSIX:
227     {
228         void __iomem *base = entry->mask_base;
229 
230         if ( unlikely(!msix_memory_decoded(entry->dev,
231                                            entry->msi_attrib.pos)) )
232             return -ENXIO;
233         writel(msg->address_lo,
234                base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
235         writel(msg->address_hi,
236                base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
237         writel(msg->data, base + PCI_MSIX_ENTRY_DATA_OFFSET);
238         break;
239     }
240     default:
241         BUG();
242     }
243 
244     return 0;
245 }
246 
set_msi_affinity(struct irq_desc * desc,const cpumask_t * mask)247 void set_msi_affinity(struct irq_desc *desc, const cpumask_t *mask)
248 {
249     struct msi_msg msg;
250     unsigned int dest;
251     struct msi_desc *msi_desc = desc->msi_desc;
252 
253     dest = set_desc_affinity(desc, mask);
254     if ( dest == BAD_APICID || !msi_desc )
255         return;
256 
257     ASSERT(spin_is_locked(&desc->lock));
258 
259     msg = msi_desc->msg;
260     msg.data &= ~MSI_DATA_VECTOR_MASK;
261     msg.data |= MSI_DATA_VECTOR(desc->arch.vector);
262     msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
263     msg.address_lo |= MSI_ADDR_DEST_ID(dest);
264     msg.dest32 = dest;
265 
266     write_msi_msg(msi_desc, &msg);
267 }
268 
__msi_set_enable(u16 seg,u8 bus,u8 slot,u8 func,int pos,int enable)269 void __msi_set_enable(u16 seg, u8 bus, u8 slot, u8 func, int pos, int enable)
270 {
271     uint16_t control = pci_conf_read16(PCI_SBDF(seg, bus, slot, func),
272                                        pos + PCI_MSI_FLAGS);
273 
274     control &= ~PCI_MSI_FLAGS_ENABLE;
275     if ( enable )
276         control |= PCI_MSI_FLAGS_ENABLE;
277     pci_conf_write16(PCI_SBDF(seg, bus, slot, func),
278                      pos + PCI_MSI_FLAGS, control);
279 }
280 
msi_set_enable(struct pci_dev * dev,int enable)281 static void msi_set_enable(struct pci_dev *dev, int enable)
282 {
283     int pos;
284     u16 seg = dev->seg;
285     u8 bus = dev->bus;
286     u8 slot = PCI_SLOT(dev->devfn);
287     u8 func = PCI_FUNC(dev->devfn);
288 
289     pos = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSI);
290     if ( pos )
291         __msi_set_enable(seg, bus, slot, func, pos, enable);
292 }
293 
msix_set_enable(struct pci_dev * dev,int enable)294 static void msix_set_enable(struct pci_dev *dev, int enable)
295 {
296     int pos;
297     u16 control, seg = dev->seg;
298     u8 bus = dev->bus;
299     u8 slot = PCI_SLOT(dev->devfn);
300     u8 func = PCI_FUNC(dev->devfn);
301 
302     pos = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSIX);
303     if ( pos )
304     {
305         control = pci_conf_read16(dev->sbdf, msix_control_reg(pos));
306         control &= ~PCI_MSIX_FLAGS_ENABLE;
307         if ( enable )
308             control |= PCI_MSIX_FLAGS_ENABLE;
309         pci_conf_write16(dev->sbdf, msix_control_reg(pos), control);
310     }
311 }
312 
msi_maskable_irq(const struct msi_desc * entry)313 int msi_maskable_irq(const struct msi_desc *entry)
314 {
315     BUG_ON(!entry);
316     return entry->msi_attrib.type != PCI_CAP_ID_MSI
317            || entry->msi_attrib.maskbit;
318 }
319 
msi_set_mask_bit(struct irq_desc * desc,bool host,bool guest)320 static bool msi_set_mask_bit(struct irq_desc *desc, bool host, bool guest)
321 {
322     struct msi_desc *entry = desc->msi_desc;
323     struct pci_dev *pdev;
324     u16 seg, control;
325     u8 bus, slot, func;
326     bool flag = host || guest, maskall;
327 
328     ASSERT(spin_is_locked(&desc->lock));
329     BUG_ON(!entry || !entry->dev);
330     pdev = entry->dev;
331     seg = pdev->seg;
332     bus = pdev->bus;
333     slot = PCI_SLOT(pdev->devfn);
334     func = PCI_FUNC(pdev->devfn);
335     switch ( entry->msi_attrib.type )
336     {
337     case PCI_CAP_ID_MSI:
338         if ( entry->msi_attrib.maskbit )
339         {
340             u32 mask_bits;
341 
342             mask_bits = pci_conf_read32(pdev->sbdf, entry->msi.mpos);
343             mask_bits &= ~((u32)1 << entry->msi_attrib.entry_nr);
344             mask_bits |= (u32)flag << entry->msi_attrib.entry_nr;
345             pci_conf_write32(pdev->sbdf, entry->msi.mpos, mask_bits);
346         }
347         break;
348     case PCI_CAP_ID_MSIX:
349         maskall = pdev->msix->host_maskall;
350         control = pci_conf_read16(pdev->sbdf,
351                                   msix_control_reg(entry->msi_attrib.pos));
352         if ( unlikely(!(control & PCI_MSIX_FLAGS_ENABLE)) )
353         {
354             pdev->msix->host_maskall = 1;
355             pci_conf_write16(pdev->sbdf,
356                              msix_control_reg(entry->msi_attrib.pos),
357                              control | (PCI_MSIX_FLAGS_ENABLE |
358                                         PCI_MSIX_FLAGS_MASKALL));
359         }
360         if ( likely(memory_decoded(pdev)) )
361         {
362             writel(flag, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
363             readl(entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
364 
365             if ( likely(control & PCI_MSIX_FLAGS_ENABLE) )
366                 break;
367 
368             entry->msi_attrib.host_masked = host;
369             entry->msi_attrib.guest_masked = guest;
370 
371             flag = true;
372         }
373         else if ( flag && !(control & PCI_MSIX_FLAGS_MASKALL) )
374         {
375             domid_t domid = pdev->domain->domain_id;
376 
377             maskall = true;
378             if ( pdev->msix->warned != domid )
379             {
380                 pdev->msix->warned = domid;
381                 printk(XENLOG_G_WARNING
382                        "cannot mask IRQ %d: masking MSI-X on Dom%d's %04x:%02x:%02x.%u\n",
383                        desc->irq, domid, seg, bus, slot, func);
384             }
385         }
386         pdev->msix->host_maskall = maskall;
387         if ( maskall || pdev->msix->guest_maskall )
388             control |= PCI_MSIX_FLAGS_MASKALL;
389         pci_conf_write16(pdev->sbdf,
390                          msix_control_reg(entry->msi_attrib.pos), control);
391         return flag;
392     default:
393         return 0;
394     }
395     entry->msi_attrib.host_masked = host;
396     entry->msi_attrib.guest_masked = guest;
397 
398     return 1;
399 }
400 
msi_get_mask_bit(const struct msi_desc * entry)401 static int msi_get_mask_bit(const struct msi_desc *entry)
402 {
403     if ( !entry->dev )
404         return -1;
405 
406     switch ( entry->msi_attrib.type )
407     {
408     case PCI_CAP_ID_MSI:
409         if ( !entry->msi_attrib.maskbit )
410             break;
411         return (pci_conf_read32(entry->dev->sbdf, entry->msi.mpos) >>
412                 entry->msi_attrib.entry_nr) & 1;
413     case PCI_CAP_ID_MSIX:
414         if ( unlikely(!msix_memory_decoded(entry->dev,
415                                            entry->msi_attrib.pos)) )
416             break;
417         return readl(entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET) & 1;
418     }
419     return -1;
420 }
421 
mask_msi_irq(struct irq_desc * desc)422 void mask_msi_irq(struct irq_desc *desc)
423 {
424     if ( unlikely(!msi_set_mask_bit(desc, 1,
425                                     desc->msi_desc->msi_attrib.guest_masked)) )
426         BUG_ON(!(desc->status & IRQ_DISABLED));
427 }
428 
unmask_msi_irq(struct irq_desc * desc)429 void unmask_msi_irq(struct irq_desc *desc)
430 {
431     if ( unlikely(!msi_set_mask_bit(desc, 0,
432                                     desc->msi_desc->msi_attrib.guest_masked)) )
433         WARN();
434 }
435 
guest_mask_msi_irq(struct irq_desc * desc,bool mask)436 void guest_mask_msi_irq(struct irq_desc *desc, bool mask)
437 {
438     msi_set_mask_bit(desc, desc->msi_desc->msi_attrib.host_masked, mask);
439 }
440 
startup_msi_irq(struct irq_desc * desc)441 static unsigned int startup_msi_irq(struct irq_desc *desc)
442 {
443     if ( unlikely(!msi_set_mask_bit(desc, 0, !!(desc->status & IRQ_GUEST))) )
444         WARN();
445     return 0;
446 }
447 
shutdown_msi_irq(struct irq_desc * desc)448 static void shutdown_msi_irq(struct irq_desc *desc)
449 {
450     if ( unlikely(!msi_set_mask_bit(desc, 1, 1)) )
451         BUG_ON(!(desc->status & IRQ_DISABLED));
452 }
453 
ack_nonmaskable_msi_irq(struct irq_desc * desc)454 void ack_nonmaskable_msi_irq(struct irq_desc *desc)
455 {
456     irq_complete_move(desc);
457     move_native_irq(desc);
458 }
459 
ack_maskable_msi_irq(struct irq_desc * desc)460 static void ack_maskable_msi_irq(struct irq_desc *desc)
461 {
462     ack_nonmaskable_msi_irq(desc);
463     ack_APIC_irq(); /* ACKTYPE_NONE */
464 }
465 
466 /*
467  * IRQ chip for MSI PCI/PCI-X/PCI-Express devices,
468  * which implement the MSI or MSI-X capability structure.
469  */
470 static hw_irq_controller pci_msi_maskable = {
471     .typename     = "PCI-MSI/-X",
472     .startup      = startup_msi_irq,
473     .shutdown     = shutdown_msi_irq,
474     .enable       = unmask_msi_irq,
475     .disable      = mask_msi_irq,
476     .ack          = ack_maskable_msi_irq,
477     .set_affinity = set_msi_affinity
478 };
479 
480 /* As above, but without having masking capability. */
481 static hw_irq_controller pci_msi_nonmaskable = {
482     .typename     = "PCI-MSI",
483     .startup      = irq_startup_none,
484     .shutdown     = irq_shutdown_none,
485     .enable       = irq_enable_none,
486     .disable      = irq_disable_none,
487     .ack          = ack_nonmaskable_msi_irq,
488     .end          = end_nonmaskable_irq,
489     .set_affinity = set_msi_affinity
490 };
491 
alloc_msi_entry(unsigned int nr)492 static struct msi_desc *alloc_msi_entry(unsigned int nr)
493 {
494     struct msi_desc *entry;
495 
496     entry = xmalloc_array(struct msi_desc, nr);
497     if ( !entry )
498         return NULL;
499 
500     INIT_LIST_HEAD(&entry->list);
501     while ( nr-- )
502     {
503         entry[nr].dev = NULL;
504         entry[nr].irq = -1;
505         entry[nr].remap_index = -1;
506         entry[nr].pi_desc = NULL;
507         entry[nr].irte_initialized = false;
508     }
509 
510     return entry;
511 }
512 
setup_msi_irq(struct irq_desc * desc,struct msi_desc * msidesc)513 int setup_msi_irq(struct irq_desc *desc, struct msi_desc *msidesc)
514 {
515     const struct pci_dev *pdev = msidesc->dev;
516     unsigned int cpos = msix_control_reg(msidesc->msi_attrib.pos);
517     u16 control = ~0;
518     int rc;
519 
520     if ( msidesc->msi_attrib.type == PCI_CAP_ID_MSIX )
521     {
522         control = pci_conf_read16(pdev->sbdf, cpos);
523         if ( !(control & PCI_MSIX_FLAGS_ENABLE) )
524             pci_conf_write16(pdev->sbdf, cpos,
525                              control | (PCI_MSIX_FLAGS_ENABLE |
526                                         PCI_MSIX_FLAGS_MASKALL));
527     }
528 
529     rc = __setup_msi_irq(desc, msidesc,
530                          msi_maskable_irq(msidesc) ? &pci_msi_maskable
531                                                    : &pci_msi_nonmaskable);
532 
533     if ( !(control & PCI_MSIX_FLAGS_ENABLE) )
534         pci_conf_write16(pdev->sbdf, cpos, control);
535 
536     return rc;
537 }
538 
__setup_msi_irq(struct irq_desc * desc,struct msi_desc * msidesc,hw_irq_controller * handler)539 int __setup_msi_irq(struct irq_desc *desc, struct msi_desc *msidesc,
540                     hw_irq_controller *handler)
541 {
542     struct msi_msg msg;
543     int ret;
544 
545     desc->msi_desc = msidesc;
546     desc->handler = handler;
547     msi_compose_msg(desc->arch.vector, desc->arch.cpu_mask, &msg);
548     ret = write_msi_msg(msidesc, &msg);
549     if ( unlikely(ret) )
550     {
551         desc->handler = &no_irq_type;
552         desc->msi_desc = NULL;
553     }
554 
555     return ret;
556 }
557 
msi_free_irq(struct msi_desc * entry)558 int msi_free_irq(struct msi_desc *entry)
559 {
560     unsigned int nr = entry->msi_attrib.type != PCI_CAP_ID_MSIX
561                       ? entry->msi.nvec : 1;
562 
563     while ( nr-- )
564     {
565         if ( entry[nr].irq >= 0 )
566             destroy_irq(entry[nr].irq);
567 
568         /* Free the unused IRTE if intr remap enabled */
569         if ( iommu_intremap )
570             iommu_update_ire_from_msi(entry + nr, NULL);
571     }
572 
573     if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
574         msix_put_fixmap(entry->dev->msix,
575                         virt_to_fix((unsigned long)entry->mask_base));
576 
577     list_del(&entry->list);
578     xfree(entry);
579     return 0;
580 }
581 
find_msi_entry(struct pci_dev * dev,int irq,int cap_id)582 static struct msi_desc *find_msi_entry(struct pci_dev *dev,
583                                        int irq, int cap_id)
584 {
585     struct msi_desc *entry;
586 
587     list_for_each_entry( entry, &dev->msi_list, list )
588     {
589         if ( entry->msi_attrib.type == cap_id &&
590              (irq == -1 || entry->irq == irq) )
591             return entry;
592     }
593 
594     return NULL;
595 }
596 
597 /**
598  * msi_capability_init - configure device's MSI capability structure
599  * @dev: pointer to the pci_dev data structure of MSI device function
600  *
601  * Setup the MSI capability structure of device function with a single
602  * MSI irq, regardless of device function is capable of handling
603  * multiple messages. A return of zero indicates the successful setup
604  * of an entry zero with the new MSI irq or non-zero for otherwise.
605  **/
msi_capability_init(struct pci_dev * dev,int irq,struct msi_desc ** desc,unsigned int nvec)606 static int msi_capability_init(struct pci_dev *dev,
607                                int irq,
608                                struct msi_desc **desc,
609                                unsigned int nvec)
610 {
611     struct msi_desc *entry;
612     int pos;
613     unsigned int i, mpos;
614     u16 control, seg = dev->seg;
615     u8 bus = dev->bus;
616     u8 slot = PCI_SLOT(dev->devfn);
617     u8 func = PCI_FUNC(dev->devfn);
618 
619     ASSERT(pcidevs_locked());
620     pos = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSI);
621     if ( !pos )
622         return -ENODEV;
623     control = pci_conf_read16(dev->sbdf, msi_control_reg(pos));
624     if ( nvec > dev->msi_maxvec )
625         return dev->msi_maxvec;
626     control &= ~PCI_MSI_FLAGS_QSIZE;
627     multi_msi_enable(control, nvec);
628 
629     /* MSI Entry Initialization */
630     msi_set_enable(dev, 0); /* Ensure msi is disabled as I set it up */
631 
632     entry = alloc_msi_entry(nvec);
633     if ( !entry )
634         return -ENOMEM;
635 
636     mpos = msi_mask_bits_reg(pos, is_64bit_address(control));
637     for ( i = 0; i < nvec; ++i )
638     {
639         entry[i].msi_attrib.type = PCI_CAP_ID_MSI;
640         entry[i].msi_attrib.is_64 = is_64bit_address(control);
641         entry[i].msi_attrib.entry_nr = i;
642         entry[i].msi_attrib.host_masked =
643         entry[i].msi_attrib.maskbit = is_mask_bit_support(control);
644         entry[i].msi_attrib.guest_masked = 0;
645         entry[i].msi_attrib.pos = pos;
646         if ( entry[i].msi_attrib.maskbit )
647             entry[i].msi.mpos = mpos;
648         entry[i].msi.nvec = 0;
649         entry[i].dev = dev;
650     }
651     entry->msi.nvec = nvec;
652     entry->irq = irq;
653     if ( entry->msi_attrib.maskbit )
654     {
655         u32 maskbits;
656 
657         /* All MSIs are unmasked by default, Mask them all */
658         maskbits = pci_conf_read32(dev->sbdf, mpos);
659         maskbits |= ~(uint32_t)0 >> (32 - dev->msi_maxvec);
660         pci_conf_write32(dev->sbdf, mpos, maskbits);
661     }
662     list_add_tail(&entry->list, &dev->msi_list);
663 
664     *desc = entry;
665     /* Restore the original MSI enabled bits  */
666     if ( !hardware_domain )
667     {
668         /*
669          * ..., except for internal requests (before Dom0 starts), in which
670          * case we rather need to behave "normally", i.e. not follow the split
671          * brain model where Dom0 actually enables MSI (and disables INTx).
672          */
673         pci_intx(dev, false);
674         control |= PCI_MSI_FLAGS_ENABLE;
675     }
676     pci_conf_write16(dev->sbdf, msi_control_reg(pos), control);
677 
678     return 0;
679 }
680 
read_pci_mem_bar(u16 seg,u8 bus,u8 slot,u8 func,u8 bir,int vf)681 static u64 read_pci_mem_bar(u16 seg, u8 bus, u8 slot, u8 func, u8 bir, int vf)
682 {
683     u8 limit;
684     u32 addr, base = PCI_BASE_ADDRESS_0;
685     u64 disp = 0;
686 
687     if ( vf >= 0 )
688     {
689         struct pci_dev *pdev = pci_get_pdev(seg, bus, PCI_DEVFN(slot, func));
690         unsigned int pos = pci_find_ext_capability(seg, bus,
691                                                    PCI_DEVFN(slot, func),
692                                                    PCI_EXT_CAP_ID_SRIOV);
693         uint16_t ctrl = pci_conf_read16(PCI_SBDF(seg, bus, slot, func),
694                                         pos + PCI_SRIOV_CTRL);
695         uint16_t num_vf = pci_conf_read16(PCI_SBDF(seg, bus, slot, func),
696                                           pos + PCI_SRIOV_NUM_VF);
697         uint16_t offset = pci_conf_read16(PCI_SBDF(seg, bus, slot, func),
698                                           pos + PCI_SRIOV_VF_OFFSET);
699         uint16_t stride = pci_conf_read16(PCI_SBDF(seg, bus, slot, func),
700                                           pos + PCI_SRIOV_VF_STRIDE);
701 
702         if ( !pdev || !pos ||
703              !(ctrl & PCI_SRIOV_CTRL_VFE) ||
704              !(ctrl & PCI_SRIOV_CTRL_MSE) ||
705              !num_vf || !offset || (num_vf > 1 && !stride) ||
706              bir >= PCI_SRIOV_NUM_BARS ||
707              !pdev->vf_rlen[bir] )
708             return 0;
709         base = pos + PCI_SRIOV_BAR;
710         vf -= PCI_BDF(bus, slot, func) + offset;
711         if ( vf < 0 )
712             return 0;
713         if ( stride )
714         {
715             if ( vf % stride )
716                 return 0;
717             vf /= stride;
718         }
719         if ( vf >= num_vf )
720             return 0;
721         BUILD_BUG_ON(ARRAY_SIZE(pdev->vf_rlen) != PCI_SRIOV_NUM_BARS);
722         disp = vf * pdev->vf_rlen[bir];
723         limit = PCI_SRIOV_NUM_BARS;
724     }
725     else switch ( pci_conf_read8(PCI_SBDF(seg, bus, slot, func),
726                                  PCI_HEADER_TYPE) & 0x7f )
727     {
728     case PCI_HEADER_TYPE_NORMAL:
729         limit = 6;
730         break;
731     case PCI_HEADER_TYPE_BRIDGE:
732         limit = 2;
733         break;
734     case PCI_HEADER_TYPE_CARDBUS:
735         limit = 1;
736         break;
737     default:
738         return 0;
739     }
740 
741     if ( bir >= limit )
742         return 0;
743     addr = pci_conf_read32(PCI_SBDF(seg, bus, slot, func), base + bir * 4);
744     if ( (addr & PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_IO )
745         return 0;
746     if ( (addr & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == PCI_BASE_ADDRESS_MEM_TYPE_64 )
747     {
748         addr &= PCI_BASE_ADDRESS_MEM_MASK;
749         if ( ++bir >= limit )
750             return 0;
751         return addr + disp +
752                ((uint64_t)pci_conf_read32(PCI_SBDF(seg, bus, slot, func),
753                                           base + bir * 4) << 32);
754     }
755     return (addr & PCI_BASE_ADDRESS_MEM_MASK) + disp;
756 }
757 
758 /**
759  * msix_capability_init - configure device's MSI-X capability
760  * @dev: pointer to the pci_dev data structure of MSI-X device function
761  * @entries: pointer to an array of struct msix_entry entries
762  * @nvec: number of @entries
763  *
764  * Setup the MSI-X capability structure of device function with the requested
765  * number MSI-X irqs. A return of zero indicates the successful setup of
766  * requested MSI-X entries with allocated irqs or non-zero for otherwise.
767  **/
msix_capability_init(struct pci_dev * dev,struct msi_info * msi,struct msi_desc ** desc)768 static int msix_capability_init(struct pci_dev *dev,
769                                 struct msi_info *msi,
770                                 struct msi_desc **desc)
771 {
772     struct arch_msix *msix = dev->msix;
773     struct msi_desc *entry = NULL;
774     u16 control;
775     u64 table_paddr;
776     u32 table_offset;
777     u16 seg = dev->seg;
778     u8 bus = dev->bus;
779     u8 slot = PCI_SLOT(dev->devfn);
780     u8 func = PCI_FUNC(dev->devfn);
781     bool maskall = msix->host_maskall, zap_on_error = false;
782     unsigned int pos = pci_find_cap_offset(seg, bus, slot, func,
783                                            PCI_CAP_ID_MSIX);
784 
785     if ( !pos )
786         return -ENODEV;
787 
788     ASSERT(pcidevs_locked());
789 
790     control = pci_conf_read16(dev->sbdf, msix_control_reg(pos));
791     /*
792      * Ensure MSI-X interrupts are masked during setup. Some devices require
793      * MSI-X to be enabled before we can touch the MSI-X registers. We need
794      * to mask all the vectors to prevent interrupts coming in before they're
795      * fully set up.
796      */
797     msix->host_maskall = 1;
798     pci_conf_write16(dev->sbdf, msix_control_reg(pos),
799                      control | (PCI_MSIX_FLAGS_ENABLE |
800                                 PCI_MSIX_FLAGS_MASKALL));
801 
802     if ( unlikely(!memory_decoded(dev)) )
803     {
804         pci_conf_write16(dev->sbdf, msix_control_reg(pos),
805                          control & ~PCI_MSIX_FLAGS_ENABLE);
806         return -ENXIO;
807     }
808 
809     if ( desc )
810     {
811         entry = alloc_msi_entry(1);
812         if ( !entry )
813         {
814             pci_conf_write16(dev->sbdf, msix_control_reg(pos),
815                              control & ~PCI_MSIX_FLAGS_ENABLE);
816             return -ENOMEM;
817         }
818         ASSERT(msi);
819     }
820 
821     /* Locate MSI-X table region */
822     table_offset = pci_conf_read32(dev->sbdf, msix_table_offset_reg(pos));
823     if ( !msix->used_entries &&
824          (!msi ||
825           (is_hardware_domain(current->domain) &&
826            (dev->domain == current->domain || dev->domain == dom_io))) )
827     {
828         unsigned int bir = table_offset & PCI_MSIX_BIRMASK, pbus, pslot, pfunc;
829         int vf;
830         paddr_t pba_paddr;
831         unsigned int pba_offset;
832 
833         if ( !dev->info.is_virtfn )
834         {
835             pbus = bus;
836             pslot = slot;
837             pfunc = func;
838             vf = -1;
839         }
840         else
841         {
842             pbus = dev->info.physfn.bus;
843             pslot = PCI_SLOT(dev->info.physfn.devfn);
844             pfunc = PCI_FUNC(dev->info.physfn.devfn);
845             vf = PCI_BDF2(dev->bus, dev->devfn);
846         }
847 
848         table_paddr = read_pci_mem_bar(seg, pbus, pslot, pfunc, bir, vf);
849         WARN_ON(msi && msi->table_base != table_paddr);
850         if ( !table_paddr )
851         {
852             if ( !msi || !msi->table_base )
853             {
854                 pci_conf_write16(dev->sbdf, msix_control_reg(pos),
855                                  control & ~PCI_MSIX_FLAGS_ENABLE);
856                 xfree(entry);
857                 return -ENXIO;
858             }
859             table_paddr = msi->table_base;
860         }
861         table_paddr += table_offset & ~PCI_MSIX_BIRMASK;
862 
863         msix->table.first = PFN_DOWN(table_paddr);
864         msix->table.last = PFN_DOWN(table_paddr +
865                                     msix->nr_entries * PCI_MSIX_ENTRY_SIZE - 1);
866         WARN_ON(rangeset_overlaps_range(mmio_ro_ranges, msix->table.first,
867                                         msix->table.last));
868 
869         pba_offset = pci_conf_read32(dev->sbdf, msix_pba_offset_reg(pos));
870         bir = (u8)(pba_offset & PCI_MSIX_BIRMASK);
871         pba_paddr = read_pci_mem_bar(seg, pbus, pslot, pfunc, bir, vf);
872         WARN_ON(!pba_paddr);
873         pba_paddr += pba_offset & ~PCI_MSIX_BIRMASK;
874 
875         msix->pba.first = PFN_DOWN(pba_paddr);
876         msix->pba.last = PFN_DOWN(pba_paddr +
877                                   BITS_TO_LONGS(msix->nr_entries) - 1);
878         WARN_ON(rangeset_overlaps_range(mmio_ro_ranges, msix->pba.first,
879                                         msix->pba.last));
880 
881         zap_on_error = true;
882     }
883     else if ( !msix->table.first )
884     {
885         pci_conf_write16(dev->sbdf, msix_control_reg(pos), control);
886         xfree(entry);
887         return -ENODATA;
888     }
889     else
890         table_paddr = (msix->table.first << PAGE_SHIFT) +
891                       PAGE_OFFSET(table_offset & ~PCI_MSIX_BIRMASK);
892 
893     if ( entry )
894     {
895         /* Map MSI-X table region */
896         u64 entry_paddr = table_paddr + msi->entry_nr * PCI_MSIX_ENTRY_SIZE;
897         int idx = msix_get_fixmap(msix, table_paddr, entry_paddr);
898         void __iomem *base;
899 
900         if ( idx < 0 )
901         {
902             if ( zap_on_error )
903             {
904                 msix->table.first = 0;
905                 msix->pba.first = 0;
906 
907                 control &= ~PCI_MSIX_FLAGS_ENABLE;
908             }
909 
910             pci_conf_write16(dev->sbdf, msix_control_reg(pos), control);
911             xfree(entry);
912             return idx;
913         }
914         base = fix_to_virt(idx) + (entry_paddr & (PAGE_SIZE - 1));
915 
916         /* Mask interrupt here */
917         writel(1, base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
918 
919         entry->msi_attrib.type = PCI_CAP_ID_MSIX;
920         entry->msi_attrib.is_64 = 1;
921         entry->msi_attrib.entry_nr = msi->entry_nr;
922         entry->msi_attrib.maskbit = 1;
923         entry->msi_attrib.host_masked = 1;
924         entry->msi_attrib.guest_masked = 1;
925         entry->msi_attrib.pos = pos;
926         entry->irq = msi->irq;
927         entry->dev = dev;
928         entry->mask_base = base;
929 
930         list_add_tail(&entry->list, &dev->msi_list);
931         *desc = entry;
932     }
933 
934     if ( !msix->used_entries )
935     {
936         maskall = false;
937         if ( !msix->guest_maskall )
938             control &= ~PCI_MSIX_FLAGS_MASKALL;
939         else
940             control |= PCI_MSIX_FLAGS_MASKALL;
941 
942         if ( rangeset_add_range(mmio_ro_ranges, msix->table.first,
943                                 msix->table.last) )
944             WARN();
945         if ( rangeset_add_range(mmio_ro_ranges, msix->pba.first,
946                                 msix->pba.last) )
947             WARN();
948 
949         if ( desc )
950         {
951             struct domain *currd = current->domain;
952             struct domain *d = dev->domain ?: currd;
953 
954             if ( !is_hardware_domain(currd) || d != currd )
955                 printk("%s use of MSI-X on %04x:%02x:%02x.%u by Dom%d\n",
956                        is_hardware_domain(currd)
957                        ? XENLOG_WARNING "Potentially insecure"
958                        : XENLOG_ERR "Insecure",
959                        seg, bus, slot, func, d->domain_id);
960             if ( !is_hardware_domain(d) &&
961                  /* Assume a domain without memory has no mappings yet. */
962                  (!is_hardware_domain(currd) || domain_tot_pages(d)) )
963                 domain_crash(d);
964             /* XXX How to deal with existing mappings? */
965         }
966     }
967     WARN_ON(msix->table.first != (table_paddr >> PAGE_SHIFT));
968     ++msix->used_entries;
969 
970     /* Restore MSI-X enabled bits */
971     if ( !hardware_domain )
972     {
973         /*
974          * ..., except for internal requests (before Dom0 starts), in which
975          * case we rather need to behave "normally", i.e. not follow the split
976          * brain model where Dom0 actually enables MSI (and disables INTx).
977          */
978         pci_intx(dev, false);
979         control |= PCI_MSIX_FLAGS_ENABLE;
980         control &= ~PCI_MSIX_FLAGS_MASKALL;
981         maskall = 0;
982     }
983     msix->host_maskall = maskall;
984     pci_conf_write16(dev->sbdf, msix_control_reg(pos), control);
985 
986     return 0;
987 }
988 
989 /**
990  * pci_enable_msi - configure device's MSI capability structure
991  * @dev: pointer to the pci_dev data structure of MSI device function
992  *
993  * Setup the MSI capability structure of device function with
994  * a single MSI irq upon its software driver call to request for
995  * MSI mode enabled on its hardware device function. A return of zero
996  * indicates the successful setup of an entry zero with the new MSI
997  * irq or non-zero for otherwise.
998  **/
999 
__pci_enable_msi(struct msi_info * msi,struct msi_desc ** desc)1000 static int __pci_enable_msi(struct msi_info *msi, struct msi_desc **desc)
1001 {
1002     struct pci_dev *pdev;
1003     struct msi_desc *old_desc;
1004 
1005     ASSERT(pcidevs_locked());
1006     pdev = pci_get_pdev(msi->seg, msi->bus, msi->devfn);
1007     if ( !pdev )
1008         return -ENODEV;
1009 
1010     old_desc = find_msi_entry(pdev, msi->irq, PCI_CAP_ID_MSI);
1011     if ( old_desc )
1012     {
1013         printk(XENLOG_ERR "irq %d already mapped to MSI on %04x:%02x:%02x.%u\n",
1014                msi->irq, msi->seg, msi->bus,
1015                PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn));
1016         return -EEXIST;
1017     }
1018 
1019     old_desc = find_msi_entry(pdev, -1, PCI_CAP_ID_MSIX);
1020     if ( old_desc )
1021     {
1022         printk(XENLOG_WARNING "MSI-X already in use on %04x:%02x:%02x.%u\n",
1023                msi->seg, msi->bus,
1024                PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn));
1025         __pci_disable_msix(old_desc);
1026     }
1027 
1028     return msi_capability_init(pdev, msi->irq, desc, msi->entry_nr);
1029 }
1030 
__pci_disable_msi(struct msi_desc * entry)1031 static void __pci_disable_msi(struct msi_desc *entry)
1032 {
1033     struct pci_dev *dev;
1034 
1035     dev = entry->dev;
1036     msi_set_enable(dev, 0);
1037     if ( entry->irq > 0 && !(irq_to_desc(entry->irq)->status & IRQ_GUEST) )
1038         pci_intx(dev, true);
1039 
1040     BUG_ON(list_empty(&dev->msi_list));
1041 }
1042 
1043 /**
1044  * pci_enable_msix - configure device's MSI-X capability structure
1045  * @dev: pointer to the pci_dev data structure of MSI-X device function
1046  * @entries: pointer to an array of MSI-X entries
1047  * @nvec: number of MSI-X irqs requested for allocation by device driver
1048  *
1049  * Setup the MSI-X capability structure of device function with the number
1050  * of requested irqs upon its software driver call to request for
1051  * MSI-X mode enabled on its hardware device function. A return of zero
1052  * indicates the successful configuration of MSI-X capability structure
1053  * with new allocated MSI-X irqs. A return of < 0 indicates a failure.
1054  * Or a return of > 0 indicates that driver request is exceeding the number
1055  * of irqs available. Driver should use the returned value to re-send
1056  * its request.
1057  **/
__pci_enable_msix(struct msi_info * msi,struct msi_desc ** desc)1058 static int __pci_enable_msix(struct msi_info *msi, struct msi_desc **desc)
1059 {
1060     struct pci_dev *pdev;
1061     u8 slot = PCI_SLOT(msi->devfn);
1062     u8 func = PCI_FUNC(msi->devfn);
1063     struct msi_desc *old_desc;
1064 
1065     ASSERT(pcidevs_locked());
1066     pdev = pci_get_pdev(msi->seg, msi->bus, msi->devfn);
1067     if ( !pdev || !pdev->msix )
1068         return -ENODEV;
1069 
1070     if ( msi->entry_nr >= pdev->msix->nr_entries )
1071         return -EINVAL;
1072 
1073     old_desc = find_msi_entry(pdev, msi->irq, PCI_CAP_ID_MSIX);
1074     if ( old_desc )
1075     {
1076         printk(XENLOG_ERR "irq %d already mapped to MSI-X on %04x:%02x:%02x.%u\n",
1077                msi->irq, msi->seg, msi->bus, slot, func);
1078         return -EEXIST;
1079     }
1080 
1081     old_desc = find_msi_entry(pdev, -1, PCI_CAP_ID_MSI);
1082     if ( old_desc )
1083     {
1084         printk(XENLOG_WARNING "MSI already in use on %04x:%02x:%02x.%u\n",
1085                msi->seg, msi->bus, slot, func);
1086         __pci_disable_msi(old_desc);
1087     }
1088 
1089     return msix_capability_init(pdev, msi, desc);
1090 }
1091 
_pci_cleanup_msix(struct arch_msix * msix)1092 static void _pci_cleanup_msix(struct arch_msix *msix)
1093 {
1094     if ( !--msix->used_entries )
1095     {
1096         if ( rangeset_remove_range(mmio_ro_ranges, msix->table.first,
1097                                    msix->table.last) )
1098             WARN();
1099         msix->table.first = 0;
1100         msix->table.last = 0;
1101 
1102         if ( rangeset_remove_range(mmio_ro_ranges, msix->pba.first,
1103                                    msix->pba.last) )
1104             WARN();
1105         msix->pba.first = 0;
1106         msix->pba.last = 0;
1107     }
1108 }
1109 
__pci_disable_msix(struct msi_desc * entry)1110 static void __pci_disable_msix(struct msi_desc *entry)
1111 {
1112     struct pci_dev *dev = entry->dev;
1113     u16 seg = dev->seg;
1114     u8 bus = dev->bus;
1115     u8 slot = PCI_SLOT(dev->devfn);
1116     u8 func = PCI_FUNC(dev->devfn);
1117     unsigned int pos = pci_find_cap_offset(seg, bus, slot, func,
1118                                            PCI_CAP_ID_MSIX);
1119     u16 control = pci_conf_read16(dev->sbdf,
1120                                   msix_control_reg(entry->msi_attrib.pos));
1121     bool maskall = dev->msix->host_maskall;
1122 
1123     if ( unlikely(!(control & PCI_MSIX_FLAGS_ENABLE)) )
1124     {
1125         dev->msix->host_maskall = 1;
1126         pci_conf_write16(dev->sbdf, msix_control_reg(pos),
1127                          control | (PCI_MSIX_FLAGS_ENABLE |
1128                                     PCI_MSIX_FLAGS_MASKALL));
1129     }
1130 
1131     BUG_ON(list_empty(&dev->msi_list));
1132 
1133     if ( likely(memory_decoded(dev)) )
1134         writel(1, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
1135     else if ( !(control & PCI_MSIX_FLAGS_MASKALL) )
1136     {
1137         printk(XENLOG_WARNING
1138                "cannot disable IRQ %d: masking MSI-X on %04x:%02x:%02x.%u\n",
1139                entry->irq, seg, bus, slot, func);
1140         maskall = true;
1141     }
1142     dev->msix->host_maskall = maskall;
1143     if ( maskall || dev->msix->guest_maskall )
1144         control |= PCI_MSIX_FLAGS_MASKALL;
1145     pci_conf_write16(dev->sbdf, msix_control_reg(pos), control);
1146 
1147     _pci_cleanup_msix(dev->msix);
1148 }
1149 
pci_prepare_msix(u16 seg,u8 bus,u8 devfn,bool off)1150 int pci_prepare_msix(u16 seg, u8 bus, u8 devfn, bool off)
1151 {
1152     int rc;
1153     struct pci_dev *pdev;
1154 
1155     if ( !use_msi )
1156         return 0;
1157 
1158     pcidevs_lock();
1159     pdev = pci_get_pdev(seg, bus, devfn);
1160     if ( !pdev )
1161         rc = -ENODEV;
1162     else if ( pdev->msix->used_entries != !!off )
1163         rc = -EBUSY;
1164     else if ( off )
1165     {
1166         _pci_cleanup_msix(pdev->msix);
1167         rc = 0;
1168     }
1169     else
1170         rc = msix_capability_init(pdev, NULL, NULL);
1171     pcidevs_unlock();
1172 
1173     return rc;
1174 }
1175 
1176 /*
1177  * Notice: only construct the msi_desc
1178  * no change to irq_desc here, and the interrupt is masked
1179  */
pci_enable_msi(struct msi_info * msi,struct msi_desc ** desc)1180 int pci_enable_msi(struct msi_info *msi, struct msi_desc **desc)
1181 {
1182     ASSERT(pcidevs_locked());
1183 
1184     if ( !use_msi )
1185         return -EPERM;
1186 
1187     return msi->table_base ? __pci_enable_msix(msi, desc) :
1188                              __pci_enable_msi(msi, desc);
1189 }
1190 
1191 /*
1192  * Device only, no irq_desc
1193  */
pci_disable_msi(struct msi_desc * msi_desc)1194 void pci_disable_msi(struct msi_desc *msi_desc)
1195 {
1196     if ( msi_desc->msi_attrib.type == PCI_CAP_ID_MSI )
1197         __pci_disable_msi(msi_desc);
1198     else if ( msi_desc->msi_attrib.type == PCI_CAP_ID_MSIX )
1199         __pci_disable_msix(msi_desc);
1200 }
1201 
msi_free_irqs(struct pci_dev * dev)1202 static void msi_free_irqs(struct pci_dev* dev)
1203 {
1204     struct msi_desc *entry, *tmp;
1205 
1206     list_for_each_entry_safe( entry, tmp, &dev->msi_list, list )
1207     {
1208         pci_disable_msi(entry);
1209         msi_free_irq(entry);
1210     }
1211 }
1212 
pci_cleanup_msi(struct pci_dev * pdev)1213 void pci_cleanup_msi(struct pci_dev *pdev)
1214 {
1215     /* Disable MSI and/or MSI-X */
1216     msi_set_enable(pdev, 0);
1217     msix_set_enable(pdev, 0);
1218     msi_free_irqs(pdev);
1219 }
1220 
pci_reset_msix_state(struct pci_dev * pdev)1221 int pci_reset_msix_state(struct pci_dev *pdev)
1222 {
1223     unsigned int pos = pci_find_cap_offset(pdev->seg, pdev->bus, pdev->sbdf.dev,
1224                                            pdev->sbdf.fn, PCI_CAP_ID_MSIX);
1225 
1226     ASSERT(pos);
1227     /*
1228      * Xen expects the device state to be the after reset one, and hence
1229      * host_maskall = guest_maskall = false and all entries should have the
1230      * mask bit set. Test that the maskall bit is not set, having it set could
1231      * signal that the device hasn't been reset properly.
1232      */
1233     if ( pci_conf_read16(pdev->sbdf, msix_control_reg(pos)) &
1234          PCI_MSIX_FLAGS_MASKALL )
1235         return -EBUSY;
1236 
1237     pdev->msix->host_maskall = false;
1238     pdev->msix->guest_maskall = false;
1239 
1240     return 0;
1241 }
1242 
pci_msi_conf_write_intercept(struct pci_dev * pdev,unsigned int reg,unsigned int size,uint32_t * data)1243 int pci_msi_conf_write_intercept(struct pci_dev *pdev, unsigned int reg,
1244                                  unsigned int size, uint32_t *data)
1245 {
1246     u16 seg = pdev->seg;
1247     u8 bus = pdev->bus;
1248     u8 slot = PCI_SLOT(pdev->devfn);
1249     u8 func = PCI_FUNC(pdev->devfn);
1250     struct msi_desc *entry;
1251     unsigned int pos;
1252 
1253     if ( pdev->msix )
1254     {
1255         entry = find_msi_entry(pdev, -1, PCI_CAP_ID_MSIX);
1256         pos = entry ? entry->msi_attrib.pos
1257                     : pci_find_cap_offset(seg, bus, slot, func,
1258                                           PCI_CAP_ID_MSIX);
1259         ASSERT(pos);
1260 
1261         if ( reg >= pos && reg < msix_pba_offset_reg(pos) + 4 )
1262         {
1263             if ( reg != msix_control_reg(pos) || size != 2 )
1264                 return -EACCES;
1265 
1266             pdev->msix->guest_maskall = !!(*data & PCI_MSIX_FLAGS_MASKALL);
1267             if ( pdev->msix->host_maskall )
1268                 *data |= PCI_MSIX_FLAGS_MASKALL;
1269 
1270             return 1;
1271         }
1272     }
1273 
1274     entry = find_msi_entry(pdev, -1, PCI_CAP_ID_MSI);
1275     if ( entry && entry->msi_attrib.maskbit )
1276     {
1277         uint32_t unused;
1278         unsigned int nvec = entry->msi.nvec;
1279 
1280         pos = entry->msi_attrib.pos;
1281         if ( reg < pos || reg >= entry->msi.mpos + 8 )
1282             return 0;
1283 
1284         if ( reg == msi_control_reg(pos) )
1285             return size == 2 ? 1 : -EACCES;
1286         if ( reg < entry->msi.mpos || reg >= entry->msi.mpos + 4 || size != 4 )
1287             return -EACCES;
1288 
1289         unused = ~(uint32_t)0 >> (32 - pdev->msi_maxvec);
1290         for ( pos = 0; pos < nvec; ++pos, ++entry )
1291         {
1292             entry->msi_attrib.guest_masked =
1293                 *data >> entry->msi_attrib.entry_nr;
1294             if ( entry->msi_attrib.host_masked )
1295                 *data |= 1 << pos;
1296             unused &= ~(1 << pos);
1297         }
1298 
1299         *data |= unused;
1300 
1301         return 1;
1302     }
1303 
1304     return 0;
1305 }
1306 
pci_restore_msi_state(struct pci_dev * pdev)1307 int pci_restore_msi_state(struct pci_dev *pdev)
1308 {
1309     unsigned long flags;
1310     int irq;
1311     int ret;
1312     struct msi_desc *entry, *tmp;
1313     struct irq_desc *desc;
1314     struct msi_msg msg;
1315     u8 slot = PCI_SLOT(pdev->devfn), func = PCI_FUNC(pdev->devfn);
1316     unsigned int type = 0, pos = 0;
1317     u16 control = 0;
1318 
1319     ASSERT(pcidevs_locked());
1320 
1321     if ( !use_msi )
1322         return -EOPNOTSUPP;
1323 
1324     ret = xsm_resource_setup_pci(XSM_PRIV,
1325                                 (pdev->seg << 16) | (pdev->bus << 8) |
1326                                 pdev->devfn);
1327     if ( ret )
1328         return ret;
1329 
1330     list_for_each_entry_safe( entry, tmp, &pdev->msi_list, list )
1331     {
1332         unsigned int i = 0, nr = 1;
1333 
1334         irq = entry->irq;
1335         desc = &irq_desc[irq];
1336 
1337         spin_lock_irqsave(&desc->lock, flags);
1338 
1339         ASSERT(desc->msi_desc == entry);
1340 
1341         if (desc->msi_desc != entry)
1342         {
1343     bogus:
1344             dprintk(XENLOG_ERR,
1345                     "Restore MSI for %04x:%02x:%02x:%u entry %u not set?\n",
1346                     pdev->seg, pdev->bus, slot, func, i);
1347             spin_unlock_irqrestore(&desc->lock, flags);
1348             if ( type == PCI_CAP_ID_MSIX )
1349                 pci_conf_write16(pdev->sbdf, msix_control_reg(pos),
1350                                  control & ~PCI_MSIX_FLAGS_ENABLE);
1351             return -EINVAL;
1352         }
1353 
1354         ASSERT(!type || type == entry->msi_attrib.type);
1355         pos = entry->msi_attrib.pos;
1356         if ( entry->msi_attrib.type == PCI_CAP_ID_MSI )
1357         {
1358             msi_set_enable(pdev, 0);
1359             nr = entry->msi.nvec;
1360         }
1361         else if ( !type && entry->msi_attrib.type == PCI_CAP_ID_MSIX )
1362         {
1363             control = pci_conf_read16(pdev->sbdf, msix_control_reg(pos));
1364             pci_conf_write16(pdev->sbdf, msix_control_reg(pos),
1365                              control | (PCI_MSIX_FLAGS_ENABLE |
1366                                         PCI_MSIX_FLAGS_MASKALL));
1367             if ( unlikely(!memory_decoded(pdev)) )
1368             {
1369                 spin_unlock_irqrestore(&desc->lock, flags);
1370                 pci_conf_write16(pdev->sbdf, msix_control_reg(pos),
1371                                  control & ~PCI_MSIX_FLAGS_ENABLE);
1372                 return -ENXIO;
1373             }
1374         }
1375         type = entry->msi_attrib.type;
1376 
1377         msg = entry->msg;
1378         write_msi_msg(entry, &msg);
1379 
1380         for ( i = 0; ; )
1381         {
1382             if ( unlikely(!msi_set_mask_bit(desc,
1383                                             entry[i].msi_attrib.host_masked,
1384                                             entry[i].msi_attrib.guest_masked)) )
1385                 BUG();
1386 
1387             if ( !--nr )
1388                 break;
1389 
1390             spin_unlock_irqrestore(&desc->lock, flags);
1391             desc = &irq_desc[entry[++i].irq];
1392             spin_lock_irqsave(&desc->lock, flags);
1393             if ( desc->msi_desc != entry + i )
1394                 goto bogus;
1395         }
1396 
1397         spin_unlock_irqrestore(&desc->lock, flags);
1398 
1399         if ( type == PCI_CAP_ID_MSI )
1400         {
1401             unsigned int cpos = msi_control_reg(pos);
1402 
1403             control = pci_conf_read16(pdev->sbdf, cpos) & ~PCI_MSI_FLAGS_QSIZE;
1404             multi_msi_enable(control, entry->msi.nvec);
1405             pci_conf_write16(pdev->sbdf, cpos, control);
1406 
1407             msi_set_enable(pdev, 1);
1408         }
1409     }
1410 
1411     if ( type == PCI_CAP_ID_MSIX )
1412         pci_conf_write16(pdev->sbdf, msix_control_reg(pos),
1413                          control | PCI_MSIX_FLAGS_ENABLE);
1414 
1415     return 0;
1416 }
1417 
early_msi_init(void)1418 void __init early_msi_init(void)
1419 {
1420     if ( use_msi < 0 )
1421         use_msi = !(acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_MSI);
1422     if ( !use_msi )
1423         return;
1424 }
1425 
dump_msi(unsigned char key)1426 static void dump_msi(unsigned char key)
1427 {
1428     unsigned int irq;
1429 
1430     printk("MSI information:\n");
1431 
1432     for ( irq = 0; irq < nr_irqs; irq++ )
1433     {
1434         struct irq_desc *desc = irq_to_desc(irq);
1435         const struct msi_desc *entry;
1436         u32 addr, data, dest32;
1437         signed char mask;
1438         struct msi_attrib attr;
1439         unsigned long flags;
1440         const char *type = "???";
1441 
1442         if ( !irq_desc_initialized(desc) )
1443             continue;
1444 
1445         spin_lock_irqsave(&desc->lock, flags);
1446 
1447         entry = desc->msi_desc;
1448         if ( !entry )
1449         {
1450             spin_unlock_irqrestore(&desc->lock, flags);
1451             continue;
1452         }
1453 
1454         switch ( entry->msi_attrib.type )
1455         {
1456         case PCI_CAP_ID_MSI: type = "MSI"; break;
1457         case PCI_CAP_ID_MSIX: type = "MSI-X"; break;
1458         case 0:
1459             switch ( entry->msi_attrib.pos )
1460             {
1461             case MSI_TYPE_HPET: type = "HPET"; break;
1462             case MSI_TYPE_IOMMU: type = "IOMMU"; break;
1463             }
1464             break;
1465         }
1466 
1467         data = entry->msg.data;
1468         addr = entry->msg.address_lo;
1469         dest32 = entry->msg.dest32;
1470         attr = entry->msi_attrib;
1471         if ( entry->msi_attrib.type )
1472             mask = msi_get_mask_bit(entry);
1473         else
1474             mask = -1;
1475 
1476         spin_unlock_irqrestore(&desc->lock, flags);
1477 
1478         if ( mask >= 0 )
1479             mask += '0';
1480         else
1481             mask = '?';
1482         printk(" %-6s%4u vec=%02x%7s%6s%3sassert%5s%7s"
1483                " dest=%08x mask=%d/%c%c/%c\n",
1484                type, irq,
1485                (data & MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT,
1486                data & MSI_DATA_DELIVERY_LOWPRI ? "lowest" : "fixed",
1487                data & MSI_DATA_TRIGGER_LEVEL ? "level" : "edge",
1488                data & MSI_DATA_LEVEL_ASSERT ? "" : "de",
1489                addr & MSI_ADDR_DESTMODE_LOGIC ? "log" : "phys",
1490                addr & MSI_ADDR_REDIRECTION_LOWPRI ? "lowest" : "cpu",
1491                dest32, attr.maskbit,
1492                attr.host_masked ? 'H' : ' ',
1493                attr.guest_masked ? 'G' : ' ',
1494                mask);
1495     }
1496 
1497     vpci_dump_msi();
1498 }
1499 
msi_setup_keyhandler(void)1500 static int __init msi_setup_keyhandler(void)
1501 {
1502     register_keyhandler('M', dump_msi, "dump MSI state", 1);
1503     return 0;
1504 }
1505 __initcall(msi_setup_keyhandler);
1506