1 /*
2  * Copyright (c) 2006, Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; If not, see <http://www.gnu.org/licenses/>.
15  *
16  * Copyright (C) Allen Kay <allen.m.kay@intel.com>
17  * Copyright (C) Xiaohui Xin <xiaohui.xin@intel.com>
18  */
19 
20 #include <xen/event.h>
21 #include <xen/iommu.h>
22 #include <xen/cpu.h>
23 #include <xen/irq.h>
24 #include <asm/hvm/irq.h>
25 #include <asm/hvm/support.h>
26 #include <asm/io_apic.h>
27 
28 static DEFINE_PER_CPU(struct list_head, dpci_list);
29 
30 /*
31  * These two bit states help to safely schedule, deschedule, and wait until
32  * the softirq has finished.
33  *
34  * The semantics behind these two bits is as follow:
35  *  - STATE_SCHED - whoever modifies it has to ref-count the domain (->dom).
36  *  - STATE_RUN - only softirq is allowed to set and clear it. If it has
37  *      been set hvm_dirq_assist will RUN with a saved value of the
38  *      'struct domain' copied from 'pirq_dpci->dom' before STATE_RUN was set.
39  *
40  * The usual states are: STATE_SCHED(set) -> STATE_RUN(set) ->
41  * STATE_SCHED(unset) -> STATE_RUN(unset).
42  *
43  * However the states can also diverge such as: STATE_SCHED(set) ->
44  * STATE_SCHED(unset) -> STATE_RUN(set) -> STATE_RUN(unset). That means
45  * the 'hvm_dirq_assist' never run and that the softirq did not do any
46  * ref-counting.
47  */
48 
49 enum {
50     STATE_SCHED,
51     STATE_RUN
52 };
53 
54 /*
55  * This can be called multiple times, but the softirq is only raised once.
56  * That is until the STATE_SCHED state has been cleared. The state can be
57  * cleared by: the 'dpci_softirq' (when it has executed 'hvm_dirq_assist'),
58  * or by 'pt_pirq_softirq_reset' (which will try to clear the state before
59  * the softirq had a chance to run).
60  */
raise_softirq_for(struct hvm_pirq_dpci * pirq_dpci)61 static void raise_softirq_for(struct hvm_pirq_dpci *pirq_dpci)
62 {
63     unsigned long flags;
64 
65     if ( test_and_set_bit(STATE_SCHED, &pirq_dpci->state) )
66         return;
67 
68     get_knownalive_domain(pirq_dpci->dom);
69 
70     local_irq_save(flags);
71     list_add_tail(&pirq_dpci->softirq_list, &this_cpu(dpci_list));
72     local_irq_restore(flags);
73 
74     raise_softirq(HVM_DPCI_SOFTIRQ);
75 }
76 
77 /*
78  * If we are racing with softirq_dpci (STATE_SCHED) we return
79  * true. Otherwise we return false.
80  *
81  * If it is false, it is the callers responsibility to make sure
82  * that the softirq (with the event_lock dropped) has ran.
83  */
pt_pirq_softirq_active(struct hvm_pirq_dpci * pirq_dpci)84 bool pt_pirq_softirq_active(struct hvm_pirq_dpci *pirq_dpci)
85 {
86     if ( pirq_dpci->state & ((1 << STATE_RUN) | (1 << STATE_SCHED)) )
87         return true;
88 
89     /*
90      * If in the future we would call 'raise_softirq_for' right away
91      * after 'pt_pirq_softirq_active' we MUST reset the list (otherwise it
92      * might have stale data).
93      */
94     return false;
95 }
96 
97 /*
98  * Reset the pirq_dpci->dom parameter to NULL.
99  *
100  * This function checks the different states to make sure it can do it
101  * at the right time. If it unschedules the 'hvm_dirq_assist' from running
102  * it also refcounts (which is what the softirq would have done) properly.
103  */
pt_pirq_softirq_reset(struct hvm_pirq_dpci * pirq_dpci)104 static void pt_pirq_softirq_reset(struct hvm_pirq_dpci *pirq_dpci)
105 {
106     struct domain *d = pirq_dpci->dom;
107 
108     ASSERT(spin_is_locked(&d->event_lock));
109 
110     switch ( cmpxchg(&pirq_dpci->state, 1 << STATE_SCHED, 0) )
111     {
112     case (1 << STATE_SCHED):
113         /*
114          * We are going to try to de-schedule the softirq before it goes in
115          * STATE_RUN. Whoever clears STATE_SCHED MUST refcount the 'dom'.
116          */
117         put_domain(d);
118         /* fallthrough. */
119     case (1 << STATE_RUN):
120     case (1 << STATE_RUN) | (1 << STATE_SCHED):
121         /*
122          * The reason it is OK to reset 'dom' when STATE_RUN bit is set is due
123          * to a shortcut the 'dpci_softirq' implements. It stashes the 'dom'
124          * in local variable before it sets STATE_RUN - and therefore will not
125          * dereference '->dom' which would crash.
126          */
127         pirq_dpci->dom = NULL;
128         break;
129     }
130     /*
131      * Inhibit 'hvm_dirq_assist' from doing anything useful and at worst
132      * calling 'set_timer' which will blow up (as we have called kill_timer
133      * or never initialized it). Note that we hold the lock that
134      * 'hvm_dirq_assist' could be spinning on.
135      */
136     pirq_dpci->masked = 0;
137 }
138 
pt_irq_need_timer(uint32_t flags)139 bool pt_irq_need_timer(uint32_t flags)
140 {
141     return !(flags & (HVM_IRQ_DPCI_GUEST_MSI | HVM_IRQ_DPCI_TRANSLATE |
142                       HVM_IRQ_DPCI_NO_EOI));
143 }
144 
pt_irq_guest_eoi(struct domain * d,struct hvm_pirq_dpci * pirq_dpci,void * arg)145 static int pt_irq_guest_eoi(struct domain *d, struct hvm_pirq_dpci *pirq_dpci,
146                             void *arg)
147 {
148     if ( __test_and_clear_bit(_HVM_IRQ_DPCI_EOI_LATCH_SHIFT,
149                               &pirq_dpci->flags) )
150     {
151         pirq_dpci->masked = 0;
152         pirq_dpci->pending = 0;
153         pirq_guest_eoi(dpci_pirq(pirq_dpci));
154     }
155 
156     return 0;
157 }
158 
pt_irq_time_out(void * data)159 static void pt_irq_time_out(void *data)
160 {
161     struct hvm_pirq_dpci *irq_map = data;
162     const struct hvm_irq_dpci *dpci;
163     const struct dev_intx_gsi_link *digl;
164 
165     spin_lock(&irq_map->dom->event_lock);
166 
167     if ( irq_map->flags & HVM_IRQ_DPCI_IDENTITY_GSI )
168     {
169         ASSERT(is_hardware_domain(irq_map->dom));
170         /*
171          * Identity mapped, no need to iterate over the guest GSI list to find
172          * other pirqs sharing the same guest GSI.
173          *
174          * In the identity mapped case the EOI can also be done now, this way
175          * the iteration over the list of domain pirqs is avoided.
176          */
177         hvm_gsi_deassert(irq_map->dom, dpci_pirq(irq_map)->pirq);
178         irq_map->flags |= HVM_IRQ_DPCI_EOI_LATCH;
179         pt_irq_guest_eoi(irq_map->dom, irq_map, NULL);
180         spin_unlock(&irq_map->dom->event_lock);
181         return;
182     }
183 
184     dpci = domain_get_irq_dpci(irq_map->dom);
185     if ( unlikely(!dpci) )
186     {
187         ASSERT_UNREACHABLE();
188         spin_unlock(&irq_map->dom->event_lock);
189         return;
190     }
191     list_for_each_entry ( digl, &irq_map->digl_list, list )
192     {
193         unsigned int guest_gsi = hvm_pci_intx_gsi(digl->device, digl->intx);
194         const struct hvm_girq_dpci_mapping *girq;
195 
196         list_for_each_entry ( girq, &dpci->girq[guest_gsi], list )
197         {
198             struct pirq *pirq = pirq_info(irq_map->dom, girq->machine_gsi);
199 
200             pirq_dpci(pirq)->flags |= HVM_IRQ_DPCI_EOI_LATCH;
201         }
202         hvm_pci_intx_deassert(irq_map->dom, digl->device, digl->intx);
203     }
204 
205     pt_pirq_iterate(irq_map->dom, pt_irq_guest_eoi, NULL);
206 
207     spin_unlock(&irq_map->dom->event_lock);
208 }
209 
domain_get_irq_dpci(const struct domain * d)210 struct hvm_irq_dpci *domain_get_irq_dpci(const struct domain *d)
211 {
212     if ( !d || !is_hvm_domain(d) )
213         return NULL;
214 
215     return hvm_domain_irq(d)->dpci;
216 }
217 
free_hvm_irq_dpci(struct hvm_irq_dpci * dpci)218 void free_hvm_irq_dpci(struct hvm_irq_dpci *dpci)
219 {
220     xfree(dpci);
221 }
222 
223 /*
224  * This routine handles lowest-priority interrupts using vector-hashing
225  * mechanism. As an example, modern Intel CPUs use this method to handle
226  * lowest-priority interrupts.
227  *
228  * Here is the details about the vector-hashing mechanism:
229  * 1. For lowest-priority interrupts, store all the possible destination
230  *    vCPUs in an array.
231  * 2. Use "gvec % max number of destination vCPUs" to find the right
232  *    destination vCPU in the array for the lowest-priority interrupt.
233  */
vector_hashing_dest(const struct domain * d,uint32_t dest_id,bool dest_mode,uint8_t gvec)234 static struct vcpu *vector_hashing_dest(const struct domain *d,
235                                         uint32_t dest_id,
236                                         bool dest_mode,
237                                         uint8_t gvec)
238 
239 {
240     unsigned long *dest_vcpu_bitmap;
241     unsigned int dest_vcpus = 0;
242     struct vcpu *v, *dest = NULL;
243     unsigned int i;
244 
245     dest_vcpu_bitmap = xzalloc_array(unsigned long,
246                                      BITS_TO_LONGS(d->max_vcpus));
247     if ( !dest_vcpu_bitmap )
248         return NULL;
249 
250     for_each_vcpu ( d, v )
251     {
252         if ( !vlapic_match_dest(vcpu_vlapic(v), NULL, APIC_DEST_NOSHORT,
253                                 dest_id, dest_mode) )
254             continue;
255 
256         __set_bit(v->vcpu_id, dest_vcpu_bitmap);
257         dest_vcpus++;
258     }
259 
260     if ( dest_vcpus != 0 )
261     {
262         unsigned int mod = gvec % dest_vcpus;
263         unsigned int idx = 0;
264 
265         for ( i = 0; i <= mod; i++ )
266         {
267             idx = find_next_bit(dest_vcpu_bitmap, d->max_vcpus, idx) + 1;
268             BUG_ON(idx > d->max_vcpus);
269         }
270 
271         dest = d->vcpu[idx - 1];
272     }
273 
274     xfree(dest_vcpu_bitmap);
275 
276     return dest;
277 }
278 
pt_irq_create_bind(struct domain * d,const struct xen_domctl_bind_pt_irq * pt_irq_bind)279 int pt_irq_create_bind(
280     struct domain *d, const struct xen_domctl_bind_pt_irq *pt_irq_bind)
281 {
282     struct hvm_irq_dpci *hvm_irq_dpci;
283     struct hvm_pirq_dpci *pirq_dpci;
284     struct pirq *info;
285     int rc, pirq = pt_irq_bind->machine_irq;
286 
287     if ( pirq < 0 || pirq >= d->nr_pirqs )
288         return -EINVAL;
289 
290  restart:
291     spin_lock(&d->event_lock);
292 
293     hvm_irq_dpci = domain_get_irq_dpci(d);
294     if ( !hvm_irq_dpci && !is_hardware_domain(d) )
295     {
296         unsigned int i;
297 
298         /*
299          * NB: the hardware domain doesn't use a hvm_irq_dpci struct because
300          * it's only allowed to identity map GSIs, and so the data contained in
301          * that struct (used to map guest GSIs into machine GSIs and perform
302          * interrupt routing) is completely useless to it.
303          */
304         hvm_irq_dpci = xzalloc(struct hvm_irq_dpci);
305         if ( hvm_irq_dpci == NULL )
306         {
307             spin_unlock(&d->event_lock);
308             return -ENOMEM;
309         }
310         for ( i = 0; i < NR_HVM_DOMU_IRQS; i++ )
311             INIT_LIST_HEAD(&hvm_irq_dpci->girq[i]);
312 
313         hvm_domain_irq(d)->dpci = hvm_irq_dpci;
314     }
315 
316     info = pirq_get_info(d, pirq);
317     if ( !info )
318     {
319         spin_unlock(&d->event_lock);
320         return -ENOMEM;
321     }
322     pirq_dpci = pirq_dpci(info);
323 
324     /*
325      * A crude 'while' loop with us dropping the spinlock and giving
326      * the softirq_dpci a chance to run.
327      * We MUST check for this condition as the softirq could be scheduled
328      * and hasn't run yet. Note that this code replaced tasklet_kill which
329      * would have spun forever and would do the same thing (wait to flush out
330      * outstanding hvm_dirq_assist calls.
331      */
332     if ( pt_pirq_softirq_active(pirq_dpci) )
333     {
334         spin_unlock(&d->event_lock);
335         cpu_relax();
336         goto restart;
337     }
338 
339     switch ( pt_irq_bind->irq_type )
340     {
341     case PT_IRQ_TYPE_MSI:
342     {
343         uint8_t dest, delivery_mode;
344         bool dest_mode;
345         int dest_vcpu_id;
346         const struct vcpu *vcpu;
347         uint32_t gflags = pt_irq_bind->u.msi.gflags &
348                           ~XEN_DOMCTL_VMSI_X86_UNMASKED;
349 
350         if ( !(pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) )
351         {
352             pirq_dpci->flags = HVM_IRQ_DPCI_MAPPED | HVM_IRQ_DPCI_MACH_MSI |
353                                HVM_IRQ_DPCI_GUEST_MSI;
354             pirq_dpci->gmsi.gvec = pt_irq_bind->u.msi.gvec;
355             pirq_dpci->gmsi.gflags = gflags;
356             /*
357              * 'pt_irq_create_bind' can be called after 'pt_irq_destroy_bind'.
358              * The 'pirq_cleanup_check' which would free the structure is only
359              * called if the event channel for the PIRQ is active. However
360              * OS-es that use event channels usually bind PIRQs to eventds
361              * and unbind them before calling 'pt_irq_destroy_bind' - with the
362              * result that we re-use the 'dpci' structure. This can be
363              * reproduced with unloading and loading the driver for a device.
364              *
365              * As such on every 'pt_irq_create_bind' call we MUST set it.
366              */
367             pirq_dpci->dom = d;
368             /* bind after hvm_irq_dpci is setup to avoid race with irq handler*/
369             rc = pirq_guest_bind(d->vcpu[0], info, 0);
370             if ( rc == 0 && pt_irq_bind->u.msi.gtable )
371             {
372                 rc = msixtbl_pt_register(d, info, pt_irq_bind->u.msi.gtable);
373                 if ( unlikely(rc) )
374                 {
375                     pirq_guest_unbind(d, info);
376                     /*
377                      * Between 'pirq_guest_bind' and before 'pirq_guest_unbind'
378                      * an interrupt can be scheduled. No more of them are going
379                      * to be scheduled but we must deal with the one that may be
380                      * in the queue.
381                      */
382                     pt_pirq_softirq_reset(pirq_dpci);
383                 }
384             }
385             if ( unlikely(rc) )
386             {
387                 pirq_dpci->gmsi.gflags = 0;
388                 pirq_dpci->gmsi.gvec = 0;
389                 pirq_dpci->dom = NULL;
390                 pirq_dpci->flags = 0;
391                 pirq_cleanup_check(info, d);
392                 spin_unlock(&d->event_lock);
393                 return rc;
394             }
395         }
396         else
397         {
398             uint32_t mask = HVM_IRQ_DPCI_MACH_MSI | HVM_IRQ_DPCI_GUEST_MSI;
399 
400             if ( (pirq_dpci->flags & mask) != mask )
401             {
402                 spin_unlock(&d->event_lock);
403                 return -EBUSY;
404             }
405 
406             /* If pirq is already mapped as vmsi, update guest data/addr. */
407             if ( pirq_dpci->gmsi.gvec != pt_irq_bind->u.msi.gvec ||
408                  pirq_dpci->gmsi.gflags != gflags )
409             {
410                 /* Directly clear pending EOIs before enabling new MSI info. */
411                 pirq_guest_eoi(info);
412 
413                 pirq_dpci->gmsi.gvec = pt_irq_bind->u.msi.gvec;
414                 pirq_dpci->gmsi.gflags = gflags;
415             }
416         }
417         /* Calculate dest_vcpu_id for MSI-type pirq migration. */
418         dest = MASK_EXTR(pirq_dpci->gmsi.gflags,
419                          XEN_DOMCTL_VMSI_X86_DEST_ID_MASK);
420         dest_mode = pirq_dpci->gmsi.gflags & XEN_DOMCTL_VMSI_X86_DM_MASK;
421         delivery_mode = MASK_EXTR(pirq_dpci->gmsi.gflags,
422                                   XEN_DOMCTL_VMSI_X86_DELIV_MASK);
423 
424         dest_vcpu_id = hvm_girq_dest_2_vcpu_id(d, dest, dest_mode);
425         pirq_dpci->gmsi.dest_vcpu_id = dest_vcpu_id;
426         spin_unlock(&d->event_lock);
427 
428         pirq_dpci->gmsi.posted = false;
429         vcpu = (dest_vcpu_id >= 0) ? d->vcpu[dest_vcpu_id] : NULL;
430         if ( iommu_intpost )
431         {
432             if ( delivery_mode == dest_LowestPrio )
433                 vcpu = vector_hashing_dest(d, dest, dest_mode,
434                                            pirq_dpci->gmsi.gvec);
435             if ( vcpu )
436                 pirq_dpci->gmsi.posted = true;
437         }
438         if ( vcpu && is_iommu_enabled(d) )
439             hvm_migrate_pirq(pirq_dpci, vcpu);
440 
441         /* Use interrupt posting if it is supported. */
442         if ( iommu_intpost )
443             pi_update_irte(vcpu ? &vcpu->arch.hvm.vmx.pi_desc : NULL,
444                            info, pirq_dpci->gmsi.gvec);
445 
446         if ( pt_irq_bind->u.msi.gflags & XEN_DOMCTL_VMSI_X86_UNMASKED )
447         {
448             unsigned long flags;
449             struct irq_desc *desc = pirq_spin_lock_irq_desc(info, &flags);
450 
451             if ( !desc )
452             {
453                 pt_irq_destroy_bind(d, pt_irq_bind);
454                 return -EINVAL;
455             }
456 
457             guest_mask_msi_irq(desc, false);
458             spin_unlock_irqrestore(&desc->lock, flags);
459         }
460 
461         break;
462     }
463 
464     case PT_IRQ_TYPE_PCI:
465     case PT_IRQ_TYPE_MSI_TRANSLATE:
466     {
467         struct dev_intx_gsi_link *digl = NULL;
468         struct hvm_girq_dpci_mapping *girq = NULL;
469         unsigned int guest_gsi;
470 
471         /*
472          * Mapping GSIs for the hardware domain is different than doing it for
473          * an unpriviledged guest, the hardware domain is only allowed to
474          * identity map GSIs, and as such all the data in the u.pci union is
475          * discarded.
476          */
477         if ( hvm_irq_dpci )
478         {
479             unsigned int link;
480 
481             digl = xmalloc(struct dev_intx_gsi_link);
482             girq = xmalloc(struct hvm_girq_dpci_mapping);
483 
484             if ( !digl || !girq )
485             {
486                 spin_unlock(&d->event_lock);
487                 xfree(girq);
488                 xfree(digl);
489                 return -ENOMEM;
490             }
491 
492             girq->bus = digl->bus = pt_irq_bind->u.pci.bus;
493             girq->device = digl->device = pt_irq_bind->u.pci.device;
494             girq->intx = digl->intx = pt_irq_bind->u.pci.intx;
495             list_add_tail(&digl->list, &pirq_dpci->digl_list);
496 
497             guest_gsi = hvm_pci_intx_gsi(digl->device, digl->intx);
498             link = hvm_pci_intx_link(digl->device, digl->intx);
499 
500             hvm_irq_dpci->link_cnt[link]++;
501 
502             girq->machine_gsi = pirq;
503             list_add_tail(&girq->list, &hvm_irq_dpci->girq[guest_gsi]);
504         }
505         else
506         {
507             ASSERT(is_hardware_domain(d));
508 
509             /* MSI_TRANSLATE is not supported for the hardware domain. */
510             if ( pt_irq_bind->irq_type != PT_IRQ_TYPE_PCI ||
511                  pirq >= hvm_domain_irq(d)->nr_gsis )
512             {
513                 spin_unlock(&d->event_lock);
514 
515                 return -EINVAL;
516             }
517             guest_gsi = pirq;
518         }
519 
520         /* Bind the same mirq once in the same domain */
521         if ( !(pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) )
522         {
523             unsigned int share;
524 
525             /* MUST be set, as the pirq_dpci can be re-used. */
526             pirq_dpci->dom = d;
527             if ( pt_irq_bind->irq_type == PT_IRQ_TYPE_MSI_TRANSLATE )
528             {
529                 pirq_dpci->flags = HVM_IRQ_DPCI_MAPPED |
530                                    HVM_IRQ_DPCI_MACH_MSI |
531                                    HVM_IRQ_DPCI_GUEST_PCI |
532                                    HVM_IRQ_DPCI_TRANSLATE;
533                 share = 0;
534             }
535             else    /* PT_IRQ_TYPE_PCI */
536             {
537                 pirq_dpci->flags = HVM_IRQ_DPCI_MAPPED |
538                                    HVM_IRQ_DPCI_MACH_PCI |
539                                    HVM_IRQ_DPCI_GUEST_PCI;
540                 if ( !is_hardware_domain(d) )
541                     share = BIND_PIRQ__WILL_SHARE;
542                 else
543                 {
544                     int mask = vioapic_get_mask(d, guest_gsi);
545                     int trigger_mode = vioapic_get_trigger_mode(d, guest_gsi);
546 
547                     if ( mask < 0 || trigger_mode < 0 )
548                     {
549                         spin_unlock(&d->event_lock);
550 
551                         ASSERT_UNREACHABLE();
552                         return -EINVAL;
553                     }
554                     pirq_dpci->flags |= HVM_IRQ_DPCI_IDENTITY_GSI;
555                     /*
556                      * Check if the corresponding vIO APIC pin is configured
557                      * level or edge trigger, level triggered interrupts will
558                      * be marked as shareable.
559                      */
560                     ASSERT(!mask);
561                     share = trigger_mode;
562                     if ( trigger_mode == VIOAPIC_EDGE_TRIG )
563                         /*
564                          * Edge IO-APIC interrupt, no EOI or unmask to perform
565                          * and hence no timer needed.
566                          */
567                         pirq_dpci->flags |= HVM_IRQ_DPCI_NO_EOI;
568                 }
569             }
570 
571             /* Init timer before binding */
572             if ( pt_irq_need_timer(pirq_dpci->flags) )
573                 init_timer(&pirq_dpci->timer, pt_irq_time_out, pirq_dpci, 0);
574             /* Deal with gsi for legacy devices */
575             rc = pirq_guest_bind(d->vcpu[0], info, share);
576             if ( unlikely(rc) )
577             {
578                 if ( pt_irq_need_timer(pirq_dpci->flags) )
579                     kill_timer(&pirq_dpci->timer);
580                 /*
581                  * There is no path for __do_IRQ to schedule softirq as
582                  * IRQ_GUEST is not set. As such we can reset 'dom' directly.
583                  */
584                 pirq_dpci->dom = NULL;
585                 if ( hvm_irq_dpci )
586                 {
587                     unsigned int link;
588 
589                     ASSERT(girq && digl);
590                     list_del(&girq->list);
591                     list_del(&digl->list);
592                     link = hvm_pci_intx_link(digl->device, digl->intx);
593                     hvm_irq_dpci->link_cnt[link]--;
594                 }
595                 pirq_dpci->flags = 0;
596                 pirq_cleanup_check(info, d);
597                 spin_unlock(&d->event_lock);
598                 xfree(girq);
599                 xfree(digl);
600                 return rc;
601             }
602         }
603 
604         spin_unlock(&d->event_lock);
605 
606         if ( iommu_verbose )
607         {
608             char buf[24] = "";
609 
610             if ( digl )
611                 snprintf(buf, ARRAY_SIZE(buf), " dev=%02x.%02x.%u intx=%u",
612                          digl->bus, PCI_SLOT(digl->device),
613                          PCI_FUNC(digl->device), digl->intx);
614 
615             printk(XENLOG_G_INFO "d%d: bind: m_gsi=%u g_gsi=%u%s\n",
616                    d->domain_id, pirq, guest_gsi, buf);
617         }
618         break;
619     }
620 
621     default:
622         spin_unlock(&d->event_lock);
623         return -EOPNOTSUPP;
624     }
625 
626     return 0;
627 }
628 
pt_irq_destroy_bind(struct domain * d,const struct xen_domctl_bind_pt_irq * pt_irq_bind)629 int pt_irq_destroy_bind(
630     struct domain *d, const struct xen_domctl_bind_pt_irq *pt_irq_bind)
631 {
632     struct hvm_irq_dpci *hvm_irq_dpci;
633     struct hvm_pirq_dpci *pirq_dpci;
634     unsigned int machine_gsi = pt_irq_bind->machine_irq;
635     struct pirq *pirq;
636     const char *what = NULL;
637 
638     switch ( pt_irq_bind->irq_type )
639     {
640     case PT_IRQ_TYPE_PCI:
641     case PT_IRQ_TYPE_MSI_TRANSLATE:
642         if ( iommu_verbose )
643         {
644             unsigned int device = pt_irq_bind->u.pci.device;
645             unsigned int intx = pt_irq_bind->u.pci.intx;
646 
647             printk(XENLOG_G_INFO
648                    "d%d: unbind: m_gsi=%u g_gsi=%u dev=%02x:%02x.%u intx=%u\n",
649                    d->domain_id, machine_gsi, hvm_pci_intx_gsi(device, intx),
650                    pt_irq_bind->u.pci.bus,
651                    PCI_SLOT(device), PCI_FUNC(device), intx);
652         }
653         break;
654     case PT_IRQ_TYPE_MSI:
655     {
656         unsigned long flags;
657         struct irq_desc *desc = domain_spin_lock_irq_desc(d, machine_gsi,
658                                                           &flags);
659 
660         if ( !desc )
661             return -EINVAL;
662         /*
663          * Leave the MSI masked, so that the state when calling
664          * pt_irq_create_bind is consistent across bind/unbinds.
665          */
666         guest_mask_msi_irq(desc, true);
667         spin_unlock_irqrestore(&desc->lock, flags);
668         break;
669     }
670 
671     default:
672         return -EOPNOTSUPP;
673     }
674 
675     spin_lock(&d->event_lock);
676 
677     hvm_irq_dpci = domain_get_irq_dpci(d);
678 
679     if ( !hvm_irq_dpci && !is_hardware_domain(d) )
680     {
681         spin_unlock(&d->event_lock);
682         return -EINVAL;
683     }
684 
685     pirq = pirq_info(d, machine_gsi);
686     pirq_dpci = pirq_dpci(pirq);
687 
688     if ( hvm_irq_dpci && pt_irq_bind->irq_type != PT_IRQ_TYPE_MSI )
689     {
690         unsigned int bus = pt_irq_bind->u.pci.bus;
691         unsigned int device = pt_irq_bind->u.pci.device;
692         unsigned int intx = pt_irq_bind->u.pci.intx;
693         unsigned int guest_gsi = hvm_pci_intx_gsi(device, intx);
694         unsigned int link = hvm_pci_intx_link(device, intx);
695         struct hvm_girq_dpci_mapping *girq;
696         struct dev_intx_gsi_link *digl, *tmp;
697 
698         list_for_each_entry ( girq, &hvm_irq_dpci->girq[guest_gsi], list )
699         {
700             if ( girq->bus         == bus &&
701                  girq->device      == device &&
702                  girq->intx        == intx &&
703                  girq->machine_gsi == machine_gsi )
704             {
705                 list_del(&girq->list);
706                 xfree(girq);
707                 girq = NULL;
708                 break;
709             }
710         }
711 
712         if ( girq )
713         {
714             spin_unlock(&d->event_lock);
715             return -EINVAL;
716         }
717 
718         hvm_irq_dpci->link_cnt[link]--;
719 
720         /* clear the mirq info */
721         if ( pirq_dpci && (pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) )
722         {
723             list_for_each_entry_safe ( digl, tmp, &pirq_dpci->digl_list, list )
724             {
725                 if ( digl->bus    == bus &&
726                      digl->device == device &&
727                      digl->intx   == intx )
728                 {
729                     list_del(&digl->list);
730                     xfree(digl);
731                 }
732             }
733             what = list_empty(&pirq_dpci->digl_list) ? "final" : "partial";
734         }
735         else
736             what = "bogus";
737     }
738     else if ( pirq_dpci && pirq_dpci->gmsi.posted )
739         pi_update_irte(NULL, pirq, 0);
740 
741     if ( pirq_dpci && (pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) &&
742          list_empty(&pirq_dpci->digl_list) )
743     {
744         pirq_guest_unbind(d, pirq);
745         msixtbl_pt_unregister(d, pirq);
746         if ( pt_irq_need_timer(pirq_dpci->flags) )
747             kill_timer(&pirq_dpci->timer);
748         pirq_dpci->flags = 0;
749         /*
750          * See comment in pt_irq_create_bind's PT_IRQ_TYPE_MSI before the
751          * call to pt_pirq_softirq_reset.
752          */
753         pt_pirq_softirq_reset(pirq_dpci);
754 
755         pirq_cleanup_check(pirq, d);
756     }
757 
758     spin_unlock(&d->event_lock);
759 
760     if ( what && iommu_verbose )
761     {
762         unsigned int device = pt_irq_bind->u.pci.device;
763         char buf[24] = "";
764 
765         if ( hvm_irq_dpci )
766             snprintf(buf, ARRAY_SIZE(buf), " dev=%02x.%02x.%u intx=%u",
767                      pt_irq_bind->u.pci.bus, PCI_SLOT(device),
768                      PCI_FUNC(device), pt_irq_bind->u.pci.intx);
769 
770         printk(XENLOG_G_INFO "d%d %s unmap: m_irq=%u%s\n",
771                d->domain_id, what, machine_gsi, buf);
772     }
773 
774     return 0;
775 }
776 
pt_pirq_init(struct domain * d,struct hvm_pirq_dpci * dpci)777 void pt_pirq_init(struct domain *d, struct hvm_pirq_dpci *dpci)
778 {
779     INIT_LIST_HEAD(&dpci->digl_list);
780     dpci->gmsi.dest_vcpu_id = -1;
781 }
782 
pt_pirq_cleanup_check(struct hvm_pirq_dpci * dpci)783 bool pt_pirq_cleanup_check(struct hvm_pirq_dpci *dpci)
784 {
785     if ( !dpci->flags && !pt_pirq_softirq_active(dpci) )
786     {
787         dpci->dom = NULL;
788         return true;
789     }
790     return false;
791 }
792 
pt_pirq_iterate(struct domain * d,int (* cb)(struct domain *,struct hvm_pirq_dpci *,void *),void * arg)793 int pt_pirq_iterate(struct domain *d,
794                     int (*cb)(struct domain *,
795                               struct hvm_pirq_dpci *, void *),
796                     void *arg)
797 {
798     int rc = 0;
799     unsigned int pirq = 0, n, i;
800     struct pirq *pirqs[8];
801 
802     ASSERT(spin_is_locked(&d->event_lock));
803 
804     do {
805         n = radix_tree_gang_lookup(&d->pirq_tree, (void **)pirqs, pirq,
806                                    ARRAY_SIZE(pirqs));
807         for ( i = 0; i < n; ++i )
808         {
809             struct hvm_pirq_dpci *pirq_dpci = pirq_dpci(pirqs[i]);
810 
811             pirq = pirqs[i]->pirq;
812             if ( (pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) )
813                 rc = cb(d, pirq_dpci, arg);
814         }
815     } while ( !rc && ++pirq < d->nr_pirqs && n == ARRAY_SIZE(pirqs) );
816 
817     return rc;
818 }
819 
hvm_do_IRQ_dpci(struct domain * d,struct pirq * pirq)820 int hvm_do_IRQ_dpci(struct domain *d, struct pirq *pirq)
821 {
822     struct hvm_irq_dpci *dpci = domain_get_irq_dpci(d);
823     struct hvm_pirq_dpci *pirq_dpci = pirq_dpci(pirq);
824 
825     ASSERT(is_hvm_domain(d));
826 
827     if ( !is_iommu_enabled(d) || (!is_hardware_domain(d) && !dpci) ||
828          !pirq_dpci || !(pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) )
829         return 0;
830 
831     pirq_dpci->masked = 1;
832     raise_softirq_for(pirq_dpci);
833     return 1;
834 }
835 
836 /* called with d->event_lock held */
__msi_pirq_eoi(struct hvm_pirq_dpci * pirq_dpci)837 static void __msi_pirq_eoi(struct hvm_pirq_dpci *pirq_dpci)
838 {
839     irq_desc_t *desc;
840 
841     if ( (pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) &&
842          (pirq_dpci->flags & HVM_IRQ_DPCI_MACH_MSI) )
843     {
844         struct pirq *pirq = dpci_pirq(pirq_dpci);
845 
846         BUG_ON(!local_irq_is_enabled());
847         desc = pirq_spin_lock_irq_desc(pirq, NULL);
848         if ( !desc )
849             return;
850         desc_guest_eoi(desc, pirq);
851     }
852 }
853 
_hvm_dpci_msi_eoi(struct domain * d,struct hvm_pirq_dpci * pirq_dpci,void * arg)854 static int _hvm_dpci_msi_eoi(struct domain *d,
855                              struct hvm_pirq_dpci *pirq_dpci, void *arg)
856 {
857     int vector = (long)arg;
858 
859     if ( (pirq_dpci->flags & HVM_IRQ_DPCI_MACH_MSI) &&
860          (pirq_dpci->gmsi.gvec == vector) )
861     {
862         unsigned int dest = MASK_EXTR(pirq_dpci->gmsi.gflags,
863                                       XEN_DOMCTL_VMSI_X86_DEST_ID_MASK);
864         bool dest_mode = pirq_dpci->gmsi.gflags & XEN_DOMCTL_VMSI_X86_DM_MASK;
865 
866         if ( vlapic_match_dest(vcpu_vlapic(current), NULL, 0, dest,
867                                dest_mode) )
868         {
869             __msi_pirq_eoi(pirq_dpci);
870             return 1;
871         }
872     }
873 
874     return 0;
875 }
876 
hvm_dpci_msi_eoi(struct domain * d,int vector)877 void hvm_dpci_msi_eoi(struct domain *d, int vector)
878 {
879     if ( !is_iommu_enabled(d) ||
880          (!hvm_domain_irq(d)->dpci && !is_hardware_domain(d)) )
881        return;
882 
883     spin_lock(&d->event_lock);
884     pt_pirq_iterate(d, _hvm_dpci_msi_eoi, (void *)(long)vector);
885     spin_unlock(&d->event_lock);
886 }
887 
hvm_dirq_assist(struct domain * d,struct hvm_pirq_dpci * pirq_dpci)888 static void hvm_dirq_assist(struct domain *d, struct hvm_pirq_dpci *pirq_dpci)
889 {
890     if ( unlikely(!hvm_domain_irq(d)->dpci) && !is_hardware_domain(d) )
891     {
892         ASSERT_UNREACHABLE();
893         return;
894     }
895 
896     spin_lock(&d->event_lock);
897     if ( test_and_clear_bool(pirq_dpci->masked) )
898     {
899         struct pirq *pirq = dpci_pirq(pirq_dpci);
900         const struct dev_intx_gsi_link *digl;
901 
902         if ( hvm_domain_use_pirq(d, pirq) )
903         {
904             send_guest_pirq(d, pirq);
905 
906             if ( pirq_dpci->flags & HVM_IRQ_DPCI_GUEST_MSI )
907                 goto out;
908         }
909 
910         if ( pirq_dpci->flags & HVM_IRQ_DPCI_GUEST_MSI )
911         {
912             vmsi_deliver_pirq(d, pirq_dpci);
913             goto out;
914         }
915 
916         list_for_each_entry ( digl, &pirq_dpci->digl_list, list )
917         {
918             ASSERT(!(pirq_dpci->flags & HVM_IRQ_DPCI_IDENTITY_GSI));
919             hvm_pci_intx_assert(d, digl->device, digl->intx);
920             pirq_dpci->pending++;
921         }
922 
923         if ( pirq_dpci->flags & HVM_IRQ_DPCI_IDENTITY_GSI )
924         {
925             hvm_gsi_assert(d, pirq->pirq);
926             if ( pirq_dpci->flags & HVM_IRQ_DPCI_NO_EOI )
927                 goto out;
928             pirq_dpci->pending++;
929         }
930 
931         if ( pirq_dpci->flags & HVM_IRQ_DPCI_TRANSLATE )
932         {
933             /* for translated MSI to INTx interrupt, eoi as early as possible */
934             __msi_pirq_eoi(pirq_dpci);
935             goto out;
936         }
937 
938         /*
939          * Set a timer to see if the guest can finish the interrupt or not. For
940          * example, the guest OS may unmask the PIC during boot, before the
941          * guest driver is loaded. hvm_pci_intx_assert() may succeed, but the
942          * guest will never deal with the irq, then the physical interrupt line
943          * will never be deasserted.
944          */
945         ASSERT(pt_irq_need_timer(pirq_dpci->flags));
946         set_timer(&pirq_dpci->timer, NOW() + PT_IRQ_TIME_OUT);
947     }
948 
949  out:
950     spin_unlock(&d->event_lock);
951 }
952 
hvm_pirq_eoi(struct pirq * pirq,const union vioapic_redir_entry * ent)953 static void hvm_pirq_eoi(struct pirq *pirq,
954                          const union vioapic_redir_entry *ent)
955 {
956     struct hvm_pirq_dpci *pirq_dpci;
957 
958     if ( !pirq )
959     {
960         ASSERT_UNREACHABLE();
961         return;
962     }
963 
964     pirq_dpci = pirq_dpci(pirq);
965 
966     /*
967      * No need to get vector lock for timer
968      * since interrupt is still not EOIed
969      */
970     if ( --pirq_dpci->pending ||
971          (ent && ent->fields.mask) ||
972          !pt_irq_need_timer(pirq_dpci->flags) )
973         return;
974 
975     stop_timer(&pirq_dpci->timer);
976     pirq_guest_eoi(pirq);
977 }
978 
__hvm_dpci_eoi(struct domain * d,const struct hvm_girq_dpci_mapping * girq,const union vioapic_redir_entry * ent)979 static void __hvm_dpci_eoi(struct domain *d,
980                            const struct hvm_girq_dpci_mapping *girq,
981                            const union vioapic_redir_entry *ent)
982 {
983     struct pirq *pirq = pirq_info(d, girq->machine_gsi);
984 
985     if ( !hvm_domain_use_pirq(d, pirq) )
986         hvm_pci_intx_deassert(d, girq->device, girq->intx);
987 
988     hvm_pirq_eoi(pirq, ent);
989 }
990 
hvm_gsi_eoi(struct domain * d,unsigned int gsi,const union vioapic_redir_entry * ent)991 static void hvm_gsi_eoi(struct domain *d, unsigned int gsi,
992                         const union vioapic_redir_entry *ent)
993 {
994     struct pirq *pirq = pirq_info(d, gsi);
995 
996     /* Check if GSI is actually mapped. */
997     if ( !pirq_dpci(pirq) )
998         return;
999 
1000     hvm_gsi_deassert(d, gsi);
1001     hvm_pirq_eoi(pirq, ent);
1002 }
1003 
hvm_dpci_eoi(struct domain * d,unsigned int guest_gsi,const union vioapic_redir_entry * ent)1004 void hvm_dpci_eoi(struct domain *d, unsigned int guest_gsi,
1005                   const union vioapic_redir_entry *ent)
1006 {
1007     const struct hvm_irq_dpci *hvm_irq_dpci;
1008     const struct hvm_girq_dpci_mapping *girq;
1009 
1010     if ( !is_iommu_enabled(d) )
1011         return;
1012 
1013     if ( is_hardware_domain(d) )
1014     {
1015         spin_lock(&d->event_lock);
1016         hvm_gsi_eoi(d, guest_gsi, ent);
1017         goto unlock;
1018     }
1019 
1020     if ( guest_gsi < NR_ISAIRQS )
1021     {
1022         hvm_dpci_isairq_eoi(d, guest_gsi);
1023         return;
1024     }
1025 
1026     spin_lock(&d->event_lock);
1027     hvm_irq_dpci = domain_get_irq_dpci(d);
1028 
1029     if ( !hvm_irq_dpci )
1030         goto unlock;
1031 
1032     list_for_each_entry ( girq, &hvm_irq_dpci->girq[guest_gsi], list )
1033         __hvm_dpci_eoi(d, girq, ent);
1034 
1035 unlock:
1036     spin_unlock(&d->event_lock);
1037 }
1038 
1039 /*
1040  * Note: 'pt_pirq_softirq_reset' can clear the STATE_SCHED before we get to
1041  * doing it. If that is the case we let 'pt_pirq_softirq_reset' do ref-counting.
1042  */
dpci_softirq(void)1043 static void dpci_softirq(void)
1044 {
1045     unsigned int cpu = smp_processor_id();
1046     LIST_HEAD(our_list);
1047 
1048     local_irq_disable();
1049     list_splice_init(&per_cpu(dpci_list, cpu), &our_list);
1050     local_irq_enable();
1051 
1052     while ( !list_empty(&our_list) )
1053     {
1054         struct hvm_pirq_dpci *pirq_dpci;
1055         struct domain *d;
1056 
1057         pirq_dpci = list_entry(our_list.next, struct hvm_pirq_dpci, softirq_list);
1058         list_del(&pirq_dpci->softirq_list);
1059 
1060         d = pirq_dpci->dom;
1061         smp_mb(); /* 'd' MUST be saved before we set/clear the bits. */
1062         if ( test_and_set_bit(STATE_RUN, &pirq_dpci->state) )
1063         {
1064             unsigned long flags;
1065 
1066             /* Put back on the list and retry. */
1067             local_irq_save(flags);
1068             list_add_tail(&pirq_dpci->softirq_list, &this_cpu(dpci_list));
1069             local_irq_restore(flags);
1070 
1071             raise_softirq(HVM_DPCI_SOFTIRQ);
1072             continue;
1073         }
1074         /*
1075          * The one who clears STATE_SCHED MUST refcount the domain.
1076          */
1077         if ( test_and_clear_bit(STATE_SCHED, &pirq_dpci->state) )
1078         {
1079             hvm_dirq_assist(d, pirq_dpci);
1080             put_domain(d);
1081         }
1082         clear_bit(STATE_RUN, &pirq_dpci->state);
1083     }
1084 }
1085 
cpu_callback(struct notifier_block * nfb,unsigned long action,void * hcpu)1086 static int cpu_callback(
1087     struct notifier_block *nfb, unsigned long action, void *hcpu)
1088 {
1089     unsigned int cpu = (unsigned long)hcpu;
1090 
1091     switch ( action )
1092     {
1093     case CPU_UP_PREPARE:
1094         INIT_LIST_HEAD(&per_cpu(dpci_list, cpu));
1095         break;
1096     case CPU_UP_CANCELED:
1097     case CPU_DEAD:
1098         /*
1099          * On CPU_DYING this callback is called (on the CPU that is dying)
1100          * with an possible HVM_DPIC_SOFTIRQ pending - at which point we can
1101          * clear out any outstanding domains (by the virtue of the idle loop
1102          * calling the softirq later). In CPU_DEAD case the CPU is deaf and
1103          * there are no pending softirqs for us to handle so we can chill.
1104          */
1105         ASSERT(list_empty(&per_cpu(dpci_list, cpu)));
1106         break;
1107     }
1108 
1109     return NOTIFY_DONE;
1110 }
1111 
1112 static struct notifier_block cpu_nfb = {
1113     .notifier_call = cpu_callback,
1114 };
1115 
setup_dpci_softirq(void)1116 static int __init setup_dpci_softirq(void)
1117 {
1118     unsigned int cpu;
1119 
1120     for_each_online_cpu(cpu)
1121         INIT_LIST_HEAD(&per_cpu(dpci_list, cpu));
1122 
1123     open_softirq(HVM_DPCI_SOFTIRQ, dpci_softirq);
1124     register_cpu_notifier(&cpu_nfb);
1125     return 0;
1126 }
1127 __initcall(setup_dpci_softirq);
1128