1 /*
2  *  Copyright (C) 2001  MandrakeSoft S.A.
3  *
4  *    MandrakeSoft S.A.
5  *    43, rue d'Aboukir
6  *    75002 Paris - France
7  *    http://www.linux-mandrake.com/
8  *    http://www.mandrakesoft.com/
9  *
10  *  This library is free software; you can redistribute it and/or
11  *  modify it under the terms of the GNU Lesser General Public
12  *  License as published by the Free Software Foundation; either
13  *  version 2 of the License, or (at your option) any later version.
14  *
15  *  This library is distributed in the hope that it will be useful,
16  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  *  Lesser General Public License for more details.
19  *
20  *  You should have received a copy of the GNU Lesser General Public
21  *  License along with this library; If not, see <http://www.gnu.org/licenses/>.
22  *
23  * Support for virtual MSI logic
24  * Will be merged it with virtual IOAPIC logic, since most is the same
25 */
26 
27 #include <xen/types.h>
28 #include <xen/mm.h>
29 #include <xen/xmalloc.h>
30 #include <xen/lib.h>
31 #include <xen/errno.h>
32 #include <xen/nospec.h>
33 #include <xen/sched.h>
34 #include <xen/softirq.h>
35 #include <xen/irq.h>
36 #include <xen/vpci.h>
37 #include <public/hvm/ioreq.h>
38 #include <asm/hvm/emulate.h>
39 #include <asm/hvm/io.h>
40 #include <asm/hvm/vpic.h>
41 #include <asm/hvm/vlapic.h>
42 #include <asm/hvm/support.h>
43 #include <asm/current.h>
44 #include <asm/event.h>
45 #include <asm/io_apic.h>
46 
vmsi_inj_irq(struct vlapic * target,uint8_t vector,uint8_t trig_mode,uint8_t delivery_mode)47 static void vmsi_inj_irq(
48     struct vlapic *target,
49     uint8_t vector,
50     uint8_t trig_mode,
51     uint8_t delivery_mode)
52 {
53     HVM_DBG_LOG(DBG_LEVEL_VLAPIC, "vmsi_inj_irq: vec %02x trig %d dm %d\n",
54                 vector, trig_mode, delivery_mode);
55 
56     switch ( delivery_mode )
57     {
58     case dest_Fixed:
59     case dest_LowestPrio:
60         vlapic_set_irq(target, vector, trig_mode);
61         break;
62     default:
63         BUG();
64     }
65 }
66 
vmsi_deliver(struct domain * d,int vector,uint8_t dest,uint8_t dest_mode,uint8_t delivery_mode,uint8_t trig_mode)67 int vmsi_deliver(
68     struct domain *d, int vector,
69     uint8_t dest, uint8_t dest_mode,
70     uint8_t delivery_mode, uint8_t trig_mode)
71 {
72     struct vlapic *target;
73     struct vcpu *v;
74 
75     switch ( delivery_mode )
76     {
77     case dest_LowestPrio:
78         target = vlapic_lowest_prio(d, NULL, 0, dest, dest_mode);
79         if ( target != NULL )
80         {
81             vmsi_inj_irq(target, vector, trig_mode, delivery_mode);
82             break;
83         }
84         HVM_DBG_LOG(DBG_LEVEL_VLAPIC, "null MSI round robin: vector=%02x\n",
85                     vector);
86         return -ESRCH;
87 
88     case dest_Fixed:
89         for_each_vcpu ( d, v )
90             if ( vlapic_match_dest(vcpu_vlapic(v), NULL,
91                                    0, dest, dest_mode) )
92                 vmsi_inj_irq(vcpu_vlapic(v), vector,
93                              trig_mode, delivery_mode);
94         break;
95 
96     default:
97         printk(XENLOG_G_WARNING
98                "%pv: Unsupported MSI delivery mode %d for Dom%d\n",
99                current, delivery_mode, d->domain_id);
100         return -EINVAL;
101     }
102 
103     return 0;
104 }
105 
vmsi_deliver_pirq(struct domain * d,const struct hvm_pirq_dpci * pirq_dpci)106 void vmsi_deliver_pirq(struct domain *d, const struct hvm_pirq_dpci *pirq_dpci)
107 {
108     uint32_t flags = pirq_dpci->gmsi.gflags;
109     int vector = pirq_dpci->gmsi.gvec;
110     uint8_t dest = (uint8_t)flags;
111     bool dest_mode = flags & XEN_DOMCTL_VMSI_X86_DM_MASK;
112     uint8_t delivery_mode = MASK_EXTR(flags, XEN_DOMCTL_VMSI_X86_DELIV_MASK);
113     bool trig_mode = flags & XEN_DOMCTL_VMSI_X86_TRIG_MASK;
114 
115     HVM_DBG_LOG(DBG_LEVEL_IOAPIC,
116                 "msi: dest=%x dest_mode=%x delivery_mode=%x "
117                 "vector=%x trig_mode=%x\n",
118                 dest, dest_mode, delivery_mode, vector, trig_mode);
119 
120     ASSERT(pirq_dpci->flags & HVM_IRQ_DPCI_GUEST_MSI);
121 
122     vmsi_deliver(d, vector, dest, dest_mode, delivery_mode, trig_mode);
123 }
124 
125 /* Return value, -1 : multi-dests, non-negative value: dest_vcpu_id */
hvm_girq_dest_2_vcpu_id(struct domain * d,uint8_t dest,uint8_t dest_mode)126 int hvm_girq_dest_2_vcpu_id(struct domain *d, uint8_t dest, uint8_t dest_mode)
127 {
128     int dest_vcpu_id = -1, w = 0;
129     struct vcpu *v;
130 
131     if ( d->max_vcpus == 1 )
132         return 0;
133 
134     for_each_vcpu ( d, v )
135     {
136         if ( vlapic_match_dest(vcpu_vlapic(v), NULL, 0, dest, dest_mode) )
137         {
138             w++;
139             dest_vcpu_id = v->vcpu_id;
140         }
141     }
142     if ( w > 1 )
143         return -1;
144 
145     return dest_vcpu_id;
146 }
147 
148 /* MSI-X mask bit hypervisor interception */
149 struct msixtbl_entry
150 {
151     struct list_head list;
152     atomic_t refcnt;    /* how many bind_pt_irq called for the device */
153 
154     /* TODO: resolve the potential race by destruction of pdev */
155     struct pci_dev *pdev;
156     unsigned long gtable;       /* gpa of msix table */
157     DECLARE_BITMAP(table_flags, MAX_MSIX_TABLE_ENTRIES);
158 #define MAX_MSIX_ACC_ENTRIES 3
159     unsigned int table_len;
160     struct {
161         uint32_t msi_ad[3];	/* Shadow of address low, high and data */
162     } gentries[MAX_MSIX_ACC_ENTRIES];
163     DECLARE_BITMAP(acc_valid, 3 * MAX_MSIX_ACC_ENTRIES);
164 #define acc_bit(what, ent, slot, idx) \
165         what##_bit((slot) * 3 + (idx), (ent)->acc_valid)
166     struct rcu_head rcu;
167 };
168 
169 static DEFINE_RCU_READ_LOCK(msixtbl_rcu_lock);
170 
171 /*
172  * MSI-X table infrastructure is dynamically initialised when an MSI-X capable
173  * device is passed through to a domain, rather than unconditionally for all
174  * domains.
175  */
msixtbl_initialised(const struct domain * d)176 static bool msixtbl_initialised(const struct domain *d)
177 {
178     return d->arch.hvm.msixtbl_list.next;
179 }
180 
msixtbl_find_entry(struct vcpu * v,unsigned long addr)181 static struct msixtbl_entry *msixtbl_find_entry(
182     struct vcpu *v, unsigned long addr)
183 {
184     struct msixtbl_entry *entry;
185     struct domain *d = v->domain;
186 
187     list_for_each_entry( entry, &d->arch.hvm.msixtbl_list, list )
188         if ( addr >= entry->gtable &&
189              addr < entry->gtable + entry->table_len )
190             return entry;
191 
192     return NULL;
193 }
194 
msixtbl_addr_to_desc(const struct msixtbl_entry * entry,unsigned long addr)195 static struct msi_desc *msixtbl_addr_to_desc(
196     const struct msixtbl_entry *entry, unsigned long addr)
197 {
198     unsigned int nr_entry;
199     struct msi_desc *desc;
200 
201     if ( !entry || !entry->pdev )
202         return NULL;
203 
204     nr_entry = (addr - entry->gtable) / PCI_MSIX_ENTRY_SIZE;
205 
206     list_for_each_entry( desc, &entry->pdev->msi_list, list )
207         if ( desc->msi_attrib.type == PCI_CAP_ID_MSIX &&
208              desc->msi_attrib.entry_nr == nr_entry )
209             return desc;
210 
211     return NULL;
212 }
213 
msixtbl_read(const struct hvm_io_handler * handler,uint64_t address,uint32_t len,uint64_t * pval)214 static int msixtbl_read(const struct hvm_io_handler *handler,
215                         uint64_t address, uint32_t len, uint64_t *pval)
216 {
217     unsigned long offset;
218     struct msixtbl_entry *entry;
219     unsigned int nr_entry, index;
220     int r = X86EMUL_UNHANDLEABLE;
221 
222     if ( (len != 4 && len != 8) || (address & (len - 1)) )
223         return r;
224 
225     rcu_read_lock(&msixtbl_rcu_lock);
226 
227     entry = msixtbl_find_entry(current, address);
228     if ( !entry )
229         goto out;
230     offset = address & (PCI_MSIX_ENTRY_SIZE - 1);
231 
232     if ( offset != PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET )
233     {
234         nr_entry = (address - entry->gtable) / PCI_MSIX_ENTRY_SIZE;
235         index = offset / sizeof(uint32_t);
236         if ( nr_entry >= ARRAY_SIZE(entry->gentries) )
237             goto out;
238         nr_entry = array_index_nospec(nr_entry, ARRAY_SIZE(entry->gentries));
239         if ( !acc_bit(test, entry, nr_entry, index) )
240             goto out;
241         *pval = entry->gentries[nr_entry].msi_ad[index];
242         if ( len == 8 )
243         {
244             if ( index )
245                 offset = PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET;
246             else if ( acc_bit(test, entry, nr_entry, 1) )
247                 *pval |= (u64)entry->gentries[nr_entry].msi_ad[1] << 32;
248             else
249                 goto out;
250         }
251     }
252     if ( offset == PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET )
253     {
254         const struct msi_desc *msi_desc = msixtbl_addr_to_desc(entry, address);
255 
256         if ( !msi_desc )
257             goto out;
258         if ( len == 4 )
259             *pval = MASK_INSR(msi_desc->msi_attrib.guest_masked,
260                               PCI_MSIX_VECTOR_BITMASK);
261         else
262             *pval |= (u64)MASK_INSR(msi_desc->msi_attrib.guest_masked,
263                                     PCI_MSIX_VECTOR_BITMASK) << 32;
264     }
265 
266     r = X86EMUL_OKAY;
267 out:
268     rcu_read_unlock(&msixtbl_rcu_lock);
269     return r;
270 }
271 
msixtbl_write(struct vcpu * v,unsigned long address,unsigned int len,unsigned long val)272 static int msixtbl_write(struct vcpu *v, unsigned long address,
273                          unsigned int len, unsigned long val)
274 {
275     unsigned long offset;
276     struct msixtbl_entry *entry;
277     const struct msi_desc *msi_desc;
278     unsigned int nr_entry, index;
279     int r = X86EMUL_UNHANDLEABLE;
280     unsigned long flags;
281     struct irq_desc *desc;
282 
283     if ( (len != 4 && len != 8) || (address & (len - 1)) )
284         return r;
285 
286     rcu_read_lock(&msixtbl_rcu_lock);
287 
288     entry = msixtbl_find_entry(v, address);
289     if ( !entry )
290         goto out;
291     nr_entry = array_index_nospec(((address - entry->gtable) /
292                                    PCI_MSIX_ENTRY_SIZE),
293                                   MAX_MSIX_TABLE_ENTRIES);
294 
295     offset = address & (PCI_MSIX_ENTRY_SIZE - 1);
296     if ( offset != PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET )
297     {
298         index = offset / sizeof(uint32_t);
299         if ( nr_entry < ARRAY_SIZE(entry->gentries) )
300         {
301             nr_entry = array_index_nospec(nr_entry,
302                                           ARRAY_SIZE(entry->gentries));
303             entry->gentries[nr_entry].msi_ad[index] = val;
304             acc_bit(set, entry, nr_entry, index);
305             if ( len == 8 && !index )
306             {
307                 entry->gentries[nr_entry].msi_ad[1] = val >> 32;
308                 acc_bit(set, entry, nr_entry, 1);
309             }
310         }
311         set_bit(nr_entry, &entry->table_flags);
312         if ( len != 8 || !index )
313             goto out;
314         val >>= 32;
315         address += 4;
316     }
317 
318     /* Exit to device model when unmasking and address/data got modified. */
319     if ( !(val & PCI_MSIX_VECTOR_BITMASK) &&
320          test_and_clear_bit(nr_entry, &entry->table_flags) )
321     {
322         v->arch.hvm.hvm_io.msix_unmask_address = address;
323         goto out;
324     }
325 
326     msi_desc = msixtbl_addr_to_desc(entry, address);
327     if ( !msi_desc || msi_desc->irq < 0 )
328         goto out;
329 
330     desc = irq_to_desc(msi_desc->irq);
331     if ( !desc )
332         goto out;
333 
334     spin_lock_irqsave(&desc->lock, flags);
335 
336     if ( !desc->msi_desc )
337         goto unlock;
338 
339     ASSERT(msi_desc == desc->msi_desc);
340 
341     guest_mask_msi_irq(desc, !!(val & PCI_MSIX_VECTOR_BITMASK));
342 
343 unlock:
344     spin_unlock_irqrestore(&desc->lock, flags);
345     if ( len == 4 )
346         r = X86EMUL_OKAY;
347 
348 out:
349     rcu_read_unlock(&msixtbl_rcu_lock);
350     return r;
351 }
352 
_msixtbl_write(const struct hvm_io_handler * handler,uint64_t address,uint32_t len,uint64_t val)353 static int _msixtbl_write(const struct hvm_io_handler *handler,
354                           uint64_t address, uint32_t len, uint64_t val)
355 {
356     return msixtbl_write(current, address, len, val);
357 }
358 
msixtbl_range(const struct hvm_io_handler * handler,const ioreq_t * r)359 static bool_t msixtbl_range(const struct hvm_io_handler *handler,
360                             const ioreq_t *r)
361 {
362     struct vcpu *curr = current;
363     unsigned long addr = r->addr;
364     const struct msi_desc *desc;
365 
366     ASSERT(r->type == IOREQ_TYPE_COPY);
367 
368     rcu_read_lock(&msixtbl_rcu_lock);
369     desc = msixtbl_addr_to_desc(msixtbl_find_entry(curr, addr), addr);
370     rcu_read_unlock(&msixtbl_rcu_lock);
371 
372     if ( desc )
373         return 1;
374 
375     if ( r->state == STATE_IOREQ_READY && r->dir == IOREQ_WRITE )
376     {
377         unsigned int size = r->size;
378 
379         if ( !r->data_is_ptr )
380         {
381             uint64_t data = r->data;
382 
383             if ( size == 8 )
384             {
385                 BUILD_BUG_ON(!(PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET & 4));
386                 data >>= 32;
387                 addr += size = 4;
388             }
389             if ( size == 4 &&
390                  ((addr & (PCI_MSIX_ENTRY_SIZE - 1)) ==
391                   PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET) &&
392                  !(data & PCI_MSIX_VECTOR_BITMASK) )
393             {
394                 curr->arch.hvm.hvm_io.msix_snoop_address = addr;
395                 curr->arch.hvm.hvm_io.msix_snoop_gpa = 0;
396             }
397         }
398         else if ( (size == 4 || size == 8) &&
399                   /* Only support forward REP MOVS for now. */
400                   !r->df &&
401                   /*
402                    * Only fully support accesses to a single table entry for
403                    * now (if multiple ones get written to in one go, only the
404                    * final one gets dealt with).
405                    */
406                   r->count && r->count <= PCI_MSIX_ENTRY_SIZE / size &&
407                   !((addr + (size * r->count)) & (PCI_MSIX_ENTRY_SIZE - 1)) )
408         {
409             BUILD_BUG_ON((PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET + 4) &
410                          (PCI_MSIX_ENTRY_SIZE - 1));
411 
412             curr->arch.hvm.hvm_io.msix_snoop_address =
413                 addr + size * r->count - 4;
414             curr->arch.hvm.hvm_io.msix_snoop_gpa =
415                 r->data + size * r->count - 4;
416         }
417     }
418 
419     return 0;
420 }
421 
422 static const struct hvm_io_ops msixtbl_mmio_ops = {
423     .accept = msixtbl_range,
424     .read = msixtbl_read,
425     .write = _msixtbl_write,
426 };
427 
add_msixtbl_entry(struct domain * d,struct pci_dev * pdev,uint64_t gtable,struct msixtbl_entry * entry)428 static void add_msixtbl_entry(struct domain *d,
429                               struct pci_dev *pdev,
430                               uint64_t gtable,
431                               struct msixtbl_entry *entry)
432 {
433     INIT_LIST_HEAD(&entry->list);
434     INIT_RCU_HEAD(&entry->rcu);
435     atomic_set(&entry->refcnt, 0);
436 
437     entry->table_len = pdev->msix->nr_entries * PCI_MSIX_ENTRY_SIZE;
438     entry->pdev = pdev;
439     entry->gtable = (unsigned long) gtable;
440 
441     list_add_rcu(&entry->list, &d->arch.hvm.msixtbl_list);
442 }
443 
free_msixtbl_entry(struct rcu_head * rcu)444 static void free_msixtbl_entry(struct rcu_head *rcu)
445 {
446     struct msixtbl_entry *entry;
447 
448     entry = container_of (rcu, struct msixtbl_entry, rcu);
449 
450     xfree(entry);
451 }
452 
del_msixtbl_entry(struct msixtbl_entry * entry)453 static void del_msixtbl_entry(struct msixtbl_entry *entry)
454 {
455     list_del_rcu(&entry->list);
456     call_rcu(&entry->rcu, free_msixtbl_entry);
457 }
458 
msixtbl_pt_register(struct domain * d,struct pirq * pirq,uint64_t gtable)459 int msixtbl_pt_register(struct domain *d, struct pirq *pirq, uint64_t gtable)
460 {
461     struct irq_desc *irq_desc;
462     struct msi_desc *msi_desc;
463     struct pci_dev *pdev;
464     struct msixtbl_entry *entry, *new_entry;
465     int r = -EINVAL;
466 
467     ASSERT(pcidevs_locked());
468     ASSERT(spin_is_locked(&d->event_lock));
469 
470     if ( !msixtbl_initialised(d) )
471         return -ENODEV;
472 
473     /*
474      * xmalloc() with irq_disabled causes the failure of check_lock()
475      * for xenpool->lock. So we allocate an entry beforehand.
476      */
477     new_entry = xzalloc(struct msixtbl_entry);
478     if ( !new_entry )
479         return -ENOMEM;
480 
481     irq_desc = pirq_spin_lock_irq_desc(pirq, NULL);
482     if ( !irq_desc )
483     {
484         xfree(new_entry);
485         return r;
486     }
487 
488     msi_desc = irq_desc->msi_desc;
489     if ( !msi_desc )
490         goto out;
491 
492     pdev = msi_desc->dev;
493 
494     list_for_each_entry( entry, &d->arch.hvm.msixtbl_list, list )
495         if ( pdev == entry->pdev )
496             goto found;
497 
498     entry = new_entry;
499     new_entry = NULL;
500     add_msixtbl_entry(d, pdev, gtable, entry);
501 
502 found:
503     atomic_inc(&entry->refcnt);
504     r = 0;
505 
506 out:
507     spin_unlock_irq(&irq_desc->lock);
508     xfree(new_entry);
509 
510     if ( !r )
511     {
512         struct vcpu *v;
513 
514         for_each_vcpu ( d, v )
515         {
516             if ( (v->pause_flags & VPF_blocked_in_xen) &&
517                  !v->arch.hvm.hvm_io.msix_snoop_gpa &&
518                  v->arch.hvm.hvm_io.msix_snoop_address ==
519                  (gtable + msi_desc->msi_attrib.entry_nr *
520                            PCI_MSIX_ENTRY_SIZE +
521                   PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET) )
522                 v->arch.hvm.hvm_io.msix_unmask_address =
523                     v->arch.hvm.hvm_io.msix_snoop_address;
524         }
525     }
526 
527     return r;
528 }
529 
msixtbl_pt_unregister(struct domain * d,struct pirq * pirq)530 void msixtbl_pt_unregister(struct domain *d, struct pirq *pirq)
531 {
532     struct irq_desc *irq_desc;
533     struct msi_desc *msi_desc;
534     struct pci_dev *pdev;
535     struct msixtbl_entry *entry;
536 
537     ASSERT(pcidevs_locked());
538     ASSERT(spin_is_locked(&d->event_lock));
539 
540     if ( !msixtbl_initialised(d) )
541         return;
542 
543     irq_desc = pirq_spin_lock_irq_desc(pirq, NULL);
544     if ( !irq_desc )
545         return;
546 
547     msi_desc = irq_desc->msi_desc;
548     if ( !msi_desc )
549         goto out;
550 
551     pdev = msi_desc->dev;
552 
553     list_for_each_entry( entry, &d->arch.hvm.msixtbl_list, list )
554         if ( pdev == entry->pdev )
555             goto found;
556 
557 out:
558     spin_unlock_irq(&irq_desc->lock);
559     return;
560 
561 found:
562     if ( !atomic_dec_and_test(&entry->refcnt) )
563         del_msixtbl_entry(entry);
564 
565     spin_unlock_irq(&irq_desc->lock);
566 }
567 
msixtbl_init(struct domain * d)568 void msixtbl_init(struct domain *d)
569 {
570     struct hvm_io_handler *handler;
571 
572     if ( !is_hvm_domain(d) || !has_vlapic(d) || msixtbl_initialised(d) )
573         return;
574 
575     INIT_LIST_HEAD(&d->arch.hvm.msixtbl_list);
576 
577     handler = hvm_next_io_handler(d);
578     if ( handler )
579     {
580         handler->type = IOREQ_TYPE_COPY;
581         handler->ops = &msixtbl_mmio_ops;
582     }
583 }
584 
msixtbl_pt_cleanup(struct domain * d)585 void msixtbl_pt_cleanup(struct domain *d)
586 {
587     struct msixtbl_entry *entry, *temp;
588 
589     if ( !msixtbl_initialised(d) )
590         return;
591 
592     spin_lock(&d->event_lock);
593 
594     list_for_each_entry_safe( entry, temp,
595                               &d->arch.hvm.msixtbl_list, list )
596         del_msixtbl_entry(entry);
597 
598     spin_unlock(&d->event_lock);
599 }
600 
msix_write_completion(struct vcpu * v)601 void msix_write_completion(struct vcpu *v)
602 {
603     unsigned long ctrl_address = v->arch.hvm.hvm_io.msix_unmask_address;
604     unsigned long snoop_addr = v->arch.hvm.hvm_io.msix_snoop_address;
605 
606     v->arch.hvm.hvm_io.msix_snoop_address = 0;
607 
608     if ( !ctrl_address && snoop_addr &&
609          v->arch.hvm.hvm_io.msix_snoop_gpa )
610     {
611         unsigned int token = hvmemul_cache_disable(v);
612         const struct msi_desc *desc;
613         uint32_t data;
614 
615         rcu_read_lock(&msixtbl_rcu_lock);
616         desc = msixtbl_addr_to_desc(msixtbl_find_entry(v, snoop_addr),
617                                     snoop_addr);
618         rcu_read_unlock(&msixtbl_rcu_lock);
619 
620         if ( desc &&
621              hvm_copy_from_guest_phys(&data,
622                                       v->arch.hvm.hvm_io.msix_snoop_gpa,
623                                       sizeof(data)) == HVMTRANS_okay &&
624              !(data & PCI_MSIX_VECTOR_BITMASK) )
625             ctrl_address = snoop_addr;
626 
627         hvmemul_cache_restore(v, token);
628     }
629 
630     if ( !ctrl_address )
631         return;
632 
633     v->arch.hvm.hvm_io.msix_unmask_address = 0;
634     if ( msixtbl_write(v, ctrl_address, 4, 0) != X86EMUL_OKAY )
635         gdprintk(XENLOG_WARNING, "MSI-X write completion failure\n");
636 }
637 
638 #ifdef CONFIG_HAS_VPCI
msi_gflags(uint16_t data,uint64_t addr,bool masked)639 static unsigned int msi_gflags(uint16_t data, uint64_t addr, bool masked)
640 {
641     /*
642      * We need to use the DOMCTL constants here because the output of this
643      * function is used as input to pt_irq_create_bind, which also takes the
644      * input from the DOMCTL itself.
645      */
646     return MASK_INSR(MASK_EXTR(addr, MSI_ADDR_DEST_ID_MASK),
647                      XEN_DOMCTL_VMSI_X86_DEST_ID_MASK) |
648            MASK_INSR(MASK_EXTR(addr, MSI_ADDR_REDIRECTION_MASK),
649                      XEN_DOMCTL_VMSI_X86_RH_MASK) |
650            MASK_INSR(MASK_EXTR(addr, MSI_ADDR_DESTMODE_MASK),
651                      XEN_DOMCTL_VMSI_X86_DM_MASK) |
652            MASK_INSR(MASK_EXTR(data, MSI_DATA_DELIVERY_MODE_MASK),
653                      XEN_DOMCTL_VMSI_X86_DELIV_MASK) |
654            MASK_INSR(MASK_EXTR(data, MSI_DATA_TRIGGER_MASK),
655                      XEN_DOMCTL_VMSI_X86_TRIG_MASK) |
656            /* NB: by default MSI vectors are bound masked. */
657            (masked ? 0 : XEN_DOMCTL_VMSI_X86_UNMASKED);
658 }
659 
vpci_mask_pirq(struct domain * d,int pirq,bool mask)660 static void vpci_mask_pirq(struct domain *d, int pirq, bool mask)
661 {
662     unsigned long flags;
663     struct irq_desc *desc = domain_spin_lock_irq_desc(d, pirq, &flags);
664 
665     if ( !desc )
666         return;
667     guest_mask_msi_irq(desc, mask);
668     spin_unlock_irqrestore(&desc->lock, flags);
669 }
670 
vpci_msi_arch_mask(struct vpci_msi * msi,const struct pci_dev * pdev,unsigned int entry,bool mask)671 void vpci_msi_arch_mask(struct vpci_msi *msi, const struct pci_dev *pdev,
672                         unsigned int entry, bool mask)
673 {
674     vpci_mask_pirq(pdev->domain, msi->arch.pirq + entry, mask);
675 }
676 
vpci_msi_update(const struct pci_dev * pdev,uint32_t data,uint64_t address,unsigned int vectors,unsigned int pirq,uint32_t mask)677 static int vpci_msi_update(const struct pci_dev *pdev, uint32_t data,
678                            uint64_t address, unsigned int vectors,
679                            unsigned int pirq, uint32_t mask)
680 {
681     unsigned int i;
682 
683     ASSERT(pcidevs_locked());
684 
685     for ( i = 0; i < vectors; i++ )
686     {
687         uint8_t vector = MASK_EXTR(data, MSI_DATA_VECTOR_MASK);
688         uint8_t vector_mask = 0xff >> (8 - fls(vectors) + 1);
689         struct xen_domctl_bind_pt_irq bind = {
690             .machine_irq = pirq + i,
691             .irq_type = PT_IRQ_TYPE_MSI,
692             .u.msi.gvec = (vector & ~vector_mask) |
693                           ((vector + i) & vector_mask),
694             .u.msi.gflags = msi_gflags(data, address, (mask >> i) & 1),
695         };
696         int rc = pt_irq_create_bind(pdev->domain, &bind);
697 
698         if ( rc )
699         {
700             gdprintk(XENLOG_ERR,
701                      "%04x:%02x:%02x.%u: failed to bind PIRQ %u: %d\n",
702                      pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
703                      PCI_FUNC(pdev->devfn), pirq + i, rc);
704             while ( bind.machine_irq-- > pirq )
705                 pt_irq_destroy_bind(pdev->domain, &bind);
706             return rc;
707         }
708     }
709 
710     return 0;
711 }
712 
vpci_msi_arch_update(struct vpci_msi * msi,const struct pci_dev * pdev)713 int vpci_msi_arch_update(struct vpci_msi *msi, const struct pci_dev *pdev)
714 {
715     int rc;
716 
717     ASSERT(msi->arch.pirq != INVALID_PIRQ);
718 
719     pcidevs_lock();
720     rc = vpci_msi_update(pdev, msi->data, msi->address, msi->vectors,
721                          msi->arch.pirq, msi->mask);
722     if ( rc )
723     {
724         spin_lock(&pdev->domain->event_lock);
725         unmap_domain_pirq(pdev->domain, msi->arch.pirq);
726         spin_unlock(&pdev->domain->event_lock);
727         pcidevs_unlock();
728         msi->arch.pirq = INVALID_PIRQ;
729         return rc;
730     }
731     pcidevs_unlock();
732 
733     return 0;
734 }
735 
vpci_msi_enable(const struct pci_dev * pdev,uint32_t data,uint64_t address,unsigned int nr,paddr_t table_base,uint32_t mask)736 static int vpci_msi_enable(const struct pci_dev *pdev, uint32_t data,
737                            uint64_t address, unsigned int nr,
738                            paddr_t table_base, uint32_t mask)
739 {
740     struct msi_info msi_info = {
741         .seg = pdev->seg,
742         .bus = pdev->bus,
743         .devfn = pdev->devfn,
744         .table_base = table_base,
745         .entry_nr = nr,
746     };
747     unsigned vectors = table_base ? 1 : nr;
748     int rc, pirq = INVALID_PIRQ;
749 
750     /* Get a PIRQ. */
751     rc = allocate_and_map_msi_pirq(pdev->domain, -1, &pirq,
752                                    table_base ? MAP_PIRQ_TYPE_MSI
753                                               : MAP_PIRQ_TYPE_MULTI_MSI,
754                                    &msi_info);
755     if ( rc )
756     {
757         gdprintk(XENLOG_ERR, "%04x:%02x:%02x.%u: failed to map PIRQ: %d\n",
758                  pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
759                  PCI_FUNC(pdev->devfn), rc);
760         return rc;
761     }
762 
763     pcidevs_lock();
764     rc = vpci_msi_update(pdev, data, address, vectors, pirq, mask);
765     if ( rc )
766     {
767         spin_lock(&pdev->domain->event_lock);
768         unmap_domain_pirq(pdev->domain, pirq);
769         spin_unlock(&pdev->domain->event_lock);
770         pcidevs_unlock();
771         return rc;
772     }
773     pcidevs_unlock();
774 
775     return pirq;
776 }
777 
vpci_msi_arch_enable(struct vpci_msi * msi,const struct pci_dev * pdev,unsigned int vectors)778 int vpci_msi_arch_enable(struct vpci_msi *msi, const struct pci_dev *pdev,
779                          unsigned int vectors)
780 {
781     int rc;
782 
783     ASSERT(msi->arch.pirq == INVALID_PIRQ);
784     rc = vpci_msi_enable(pdev, msi->data, msi->address, vectors, 0, msi->mask);
785     if ( rc >= 0 )
786     {
787         msi->arch.pirq = rc;
788         rc = 0;
789     }
790 
791     return rc;
792 }
793 
vpci_msi_disable(const struct pci_dev * pdev,int pirq,unsigned int nr)794 static void vpci_msi_disable(const struct pci_dev *pdev, int pirq,
795                              unsigned int nr)
796 {
797     unsigned int i;
798 
799     ASSERT(pirq != INVALID_PIRQ);
800 
801     pcidevs_lock();
802     for ( i = 0; i < nr; i++ )
803     {
804         struct xen_domctl_bind_pt_irq bind = {
805             .machine_irq = pirq + i,
806             .irq_type = PT_IRQ_TYPE_MSI,
807         };
808         int rc;
809 
810         rc = pt_irq_destroy_bind(pdev->domain, &bind);
811         ASSERT(!rc);
812     }
813 
814     spin_lock(&pdev->domain->event_lock);
815     unmap_domain_pirq(pdev->domain, pirq);
816     spin_unlock(&pdev->domain->event_lock);
817     pcidevs_unlock();
818 }
819 
vpci_msi_arch_disable(struct vpci_msi * msi,const struct pci_dev * pdev)820 void vpci_msi_arch_disable(struct vpci_msi *msi, const struct pci_dev *pdev)
821 {
822     vpci_msi_disable(pdev, msi->arch.pirq, msi->vectors);
823     msi->arch.pirq = INVALID_PIRQ;
824 }
825 
vpci_msi_arch_init(struct vpci_msi * msi)826 void vpci_msi_arch_init(struct vpci_msi *msi)
827 {
828     msi->arch.pirq = INVALID_PIRQ;
829 }
830 
vpci_msi_arch_print(const struct vpci_msi * msi)831 void vpci_msi_arch_print(const struct vpci_msi *msi)
832 {
833     printk("vec=%#02x%7s%6s%3sassert%5s%7s dest_id=%lu pirq: %d\n",
834            MASK_EXTR(msi->data, MSI_DATA_VECTOR_MASK),
835            msi->data & MSI_DATA_DELIVERY_LOWPRI ? "lowest" : "fixed",
836            msi->data & MSI_DATA_TRIGGER_LEVEL ? "level" : "edge",
837            msi->data & MSI_DATA_LEVEL_ASSERT ? "" : "de",
838            msi->address & MSI_ADDR_DESTMODE_LOGIC ? "log" : "phys",
839            msi->address & MSI_ADDR_REDIRECTION_LOWPRI ? "lowest" : "fixed",
840            MASK_EXTR(msi->address, MSI_ADDR_DEST_ID_MASK),
841            msi->arch.pirq);
842 }
843 
vpci_msix_arch_mask_entry(struct vpci_msix_entry * entry,const struct pci_dev * pdev,bool mask)844 void vpci_msix_arch_mask_entry(struct vpci_msix_entry *entry,
845                                const struct pci_dev *pdev, bool mask)
846 {
847     ASSERT(entry->arch.pirq != INVALID_PIRQ);
848     vpci_mask_pirq(pdev->domain, entry->arch.pirq, mask);
849 }
850 
vpci_msix_arch_enable_entry(struct vpci_msix_entry * entry,const struct pci_dev * pdev,paddr_t table_base)851 int vpci_msix_arch_enable_entry(struct vpci_msix_entry *entry,
852                                 const struct pci_dev *pdev, paddr_t table_base)
853 {
854     int rc;
855 
856     ASSERT(entry->arch.pirq == INVALID_PIRQ);
857     rc = vpci_msi_enable(pdev, entry->data, entry->addr,
858                          vmsix_entry_nr(pdev->vpci->msix, entry),
859                          table_base, entry->masked);
860     if ( rc >= 0 )
861     {
862         entry->arch.pirq = rc;
863         rc = 0;
864     }
865 
866     return rc;
867 }
868 
vpci_msix_arch_disable_entry(struct vpci_msix_entry * entry,const struct pci_dev * pdev)869 int vpci_msix_arch_disable_entry(struct vpci_msix_entry *entry,
870                                  const struct pci_dev *pdev)
871 {
872     if ( entry->arch.pirq == INVALID_PIRQ )
873         return -ENOENT;
874 
875     vpci_msi_disable(pdev, entry->arch.pirq, 1);
876     entry->arch.pirq = INVALID_PIRQ;
877 
878     return 0;
879 }
880 
vpci_msix_arch_init_entry(struct vpci_msix_entry * entry)881 void vpci_msix_arch_init_entry(struct vpci_msix_entry *entry)
882 {
883     entry->arch.pirq = INVALID_PIRQ;
884 }
885 
vpci_msix_arch_print(const struct vpci_msix * msix)886 int vpci_msix_arch_print(const struct vpci_msix *msix)
887 {
888     unsigned int i;
889 
890     for ( i = 0; i < msix->max_entries; i++ )
891     {
892         const struct vpci_msix_entry *entry = &msix->entries[i];
893 
894         printk("%6u vec=%02x%7s%6s%3sassert%5s%7s dest_id=%lu mask=%u pirq: %d\n",
895                i, MASK_EXTR(entry->data, MSI_DATA_VECTOR_MASK),
896                entry->data & MSI_DATA_DELIVERY_LOWPRI ? "lowest" : "fixed",
897                entry->data & MSI_DATA_TRIGGER_LEVEL ? "level" : "edge",
898                entry->data & MSI_DATA_LEVEL_ASSERT ? "" : "de",
899                entry->addr & MSI_ADDR_DESTMODE_LOGIC ? "log" : "phys",
900                entry->addr & MSI_ADDR_REDIRECTION_LOWPRI ? "lowest" : "fixed",
901                MASK_EXTR(entry->addr, MSI_ADDR_DEST_ID_MASK),
902                entry->masked, entry->arch.pirq);
903         if ( i && !(i % 64) )
904         {
905             struct pci_dev *pdev = msix->pdev;
906 
907             spin_unlock(&msix->pdev->vpci->lock);
908             process_pending_softirqs();
909             /* NB: we assume that pdev cannot go away for an alive domain. */
910             if ( !pdev->vpci || !spin_trylock(&pdev->vpci->lock) )
911                 return -EBUSY;
912             if ( pdev->vpci->msix != msix )
913             {
914                 spin_unlock(&pdev->vpci->lock);
915                 return -EAGAIN;
916             }
917         }
918     }
919 
920     return 0;
921 }
922 #endif /* CONFIG_HAS_VPCI */
923