1 /*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; If not, see <http://www.gnu.org/licenses/>.
15 *
16 * Copyright (C) Allen Kay <allen.m.kay@intel.com>
17 * Copyright (C) Xiaohui Xin <xiaohui.xin@intel.com>
18 */
19
20 #include <xen/event.h>
21 #include <xen/iommu.h>
22 #include <xen/cpu.h>
23 #include <xen/irq.h>
24 #include <asm/hvm/irq.h>
25 #include <asm/hvm/support.h>
26 #include <asm/io_apic.h>
27
28 static DEFINE_PER_CPU(struct list_head, dpci_list);
29
30 /*
31 * These two bit states help to safely schedule, deschedule, and wait until
32 * the softirq has finished.
33 *
34 * The semantics behind these two bits is as follow:
35 * - STATE_SCHED - whoever modifies it has to ref-count the domain (->dom).
36 * - STATE_RUN - only softirq is allowed to set and clear it. If it has
37 * been set hvm_dirq_assist will RUN with a saved value of the
38 * 'struct domain' copied from 'pirq_dpci->dom' before STATE_RUN was set.
39 *
40 * The usual states are: STATE_SCHED(set) -> STATE_RUN(set) ->
41 * STATE_SCHED(unset) -> STATE_RUN(unset).
42 *
43 * However the states can also diverge such as: STATE_SCHED(set) ->
44 * STATE_SCHED(unset) -> STATE_RUN(set) -> STATE_RUN(unset). That means
45 * the 'hvm_dirq_assist' never run and that the softirq did not do any
46 * ref-counting.
47 */
48
49 enum {
50 STATE_SCHED,
51 STATE_RUN
52 };
53
54 /*
55 * This can be called multiple times, but the softirq is only raised once.
56 * That is until the STATE_SCHED state has been cleared. The state can be
57 * cleared by: the 'dpci_softirq' (when it has executed 'hvm_dirq_assist'),
58 * or by 'pt_pirq_softirq_reset' (which will try to clear the state before
59 * the softirq had a chance to run).
60 */
raise_softirq_for(struct hvm_pirq_dpci * pirq_dpci)61 static void raise_softirq_for(struct hvm_pirq_dpci *pirq_dpci)
62 {
63 unsigned long flags;
64
65 if ( test_and_set_bit(STATE_SCHED, &pirq_dpci->state) )
66 return;
67
68 get_knownalive_domain(pirq_dpci->dom);
69
70 local_irq_save(flags);
71 list_add_tail(&pirq_dpci->softirq_list, &this_cpu(dpci_list));
72 local_irq_restore(flags);
73
74 raise_softirq(HVM_DPCI_SOFTIRQ);
75 }
76
77 /*
78 * If we are racing with softirq_dpci (STATE_SCHED) we return
79 * true. Otherwise we return false.
80 *
81 * If it is false, it is the callers responsibility to make sure
82 * that the softirq (with the event_lock dropped) has ran.
83 */
pt_pirq_softirq_active(struct hvm_pirq_dpci * pirq_dpci)84 bool pt_pirq_softirq_active(struct hvm_pirq_dpci *pirq_dpci)
85 {
86 if ( pirq_dpci->state & ((1 << STATE_RUN) | (1 << STATE_SCHED)) )
87 return true;
88
89 /*
90 * If in the future we would call 'raise_softirq_for' right away
91 * after 'pt_pirq_softirq_active' we MUST reset the list (otherwise it
92 * might have stale data).
93 */
94 return false;
95 }
96
97 /*
98 * Reset the pirq_dpci->dom parameter to NULL.
99 *
100 * This function checks the different states to make sure it can do it
101 * at the right time. If it unschedules the 'hvm_dirq_assist' from running
102 * it also refcounts (which is what the softirq would have done) properly.
103 */
pt_pirq_softirq_reset(struct hvm_pirq_dpci * pirq_dpci)104 static void pt_pirq_softirq_reset(struct hvm_pirq_dpci *pirq_dpci)
105 {
106 struct domain *d = pirq_dpci->dom;
107
108 ASSERT(spin_is_locked(&d->event_lock));
109
110 switch ( cmpxchg(&pirq_dpci->state, 1 << STATE_SCHED, 0) )
111 {
112 case (1 << STATE_SCHED):
113 /*
114 * We are going to try to de-schedule the softirq before it goes in
115 * STATE_RUN. Whoever clears STATE_SCHED MUST refcount the 'dom'.
116 */
117 put_domain(d);
118 /* fallthrough. */
119 case (1 << STATE_RUN):
120 case (1 << STATE_RUN) | (1 << STATE_SCHED):
121 /*
122 * The reason it is OK to reset 'dom' when STATE_RUN bit is set is due
123 * to a shortcut the 'dpci_softirq' implements. It stashes the 'dom'
124 * in local variable before it sets STATE_RUN - and therefore will not
125 * dereference '->dom' which would crash.
126 */
127 pirq_dpci->dom = NULL;
128 break;
129 }
130 /*
131 * Inhibit 'hvm_dirq_assist' from doing anything useful and at worst
132 * calling 'set_timer' which will blow up (as we have called kill_timer
133 * or never initialized it). Note that we hold the lock that
134 * 'hvm_dirq_assist' could be spinning on.
135 */
136 pirq_dpci->masked = 0;
137 }
138
pt_irq_need_timer(uint32_t flags)139 bool pt_irq_need_timer(uint32_t flags)
140 {
141 return !(flags & (HVM_IRQ_DPCI_GUEST_MSI | HVM_IRQ_DPCI_TRANSLATE |
142 HVM_IRQ_DPCI_NO_EOI));
143 }
144
pt_irq_guest_eoi(struct domain * d,struct hvm_pirq_dpci * pirq_dpci,void * arg)145 static int pt_irq_guest_eoi(struct domain *d, struct hvm_pirq_dpci *pirq_dpci,
146 void *arg)
147 {
148 if ( __test_and_clear_bit(_HVM_IRQ_DPCI_EOI_LATCH_SHIFT,
149 &pirq_dpci->flags) )
150 {
151 pirq_dpci->masked = 0;
152 pirq_dpci->pending = 0;
153 pirq_guest_eoi(dpci_pirq(pirq_dpci));
154 }
155
156 return 0;
157 }
158
pt_irq_time_out(void * data)159 static void pt_irq_time_out(void *data)
160 {
161 struct hvm_pirq_dpci *irq_map = data;
162 const struct hvm_irq_dpci *dpci;
163 const struct dev_intx_gsi_link *digl;
164
165 spin_lock(&irq_map->dom->event_lock);
166
167 if ( irq_map->flags & HVM_IRQ_DPCI_IDENTITY_GSI )
168 {
169 ASSERT(is_hardware_domain(irq_map->dom));
170 /*
171 * Identity mapped, no need to iterate over the guest GSI list to find
172 * other pirqs sharing the same guest GSI.
173 *
174 * In the identity mapped case the EOI can also be done now, this way
175 * the iteration over the list of domain pirqs is avoided.
176 */
177 hvm_gsi_deassert(irq_map->dom, dpci_pirq(irq_map)->pirq);
178 irq_map->flags |= HVM_IRQ_DPCI_EOI_LATCH;
179 pt_irq_guest_eoi(irq_map->dom, irq_map, NULL);
180 spin_unlock(&irq_map->dom->event_lock);
181 return;
182 }
183
184 dpci = domain_get_irq_dpci(irq_map->dom);
185 if ( unlikely(!dpci) )
186 {
187 ASSERT_UNREACHABLE();
188 spin_unlock(&irq_map->dom->event_lock);
189 return;
190 }
191 list_for_each_entry ( digl, &irq_map->digl_list, list )
192 {
193 unsigned int guest_gsi = hvm_pci_intx_gsi(digl->device, digl->intx);
194 const struct hvm_girq_dpci_mapping *girq;
195
196 list_for_each_entry ( girq, &dpci->girq[guest_gsi], list )
197 {
198 struct pirq *pirq = pirq_info(irq_map->dom, girq->machine_gsi);
199
200 pirq_dpci(pirq)->flags |= HVM_IRQ_DPCI_EOI_LATCH;
201 }
202 hvm_pci_intx_deassert(irq_map->dom, digl->device, digl->intx);
203 }
204
205 pt_pirq_iterate(irq_map->dom, pt_irq_guest_eoi, NULL);
206
207 spin_unlock(&irq_map->dom->event_lock);
208 }
209
domain_get_irq_dpci(const struct domain * d)210 struct hvm_irq_dpci *domain_get_irq_dpci(const struct domain *d)
211 {
212 if ( !d || !is_hvm_domain(d) )
213 return NULL;
214
215 return hvm_domain_irq(d)->dpci;
216 }
217
free_hvm_irq_dpci(struct hvm_irq_dpci * dpci)218 void free_hvm_irq_dpci(struct hvm_irq_dpci *dpci)
219 {
220 xfree(dpci);
221 }
222
223 /*
224 * This routine handles lowest-priority interrupts using vector-hashing
225 * mechanism. As an example, modern Intel CPUs use this method to handle
226 * lowest-priority interrupts.
227 *
228 * Here is the details about the vector-hashing mechanism:
229 * 1. For lowest-priority interrupts, store all the possible destination
230 * vCPUs in an array.
231 * 2. Use "gvec % max number of destination vCPUs" to find the right
232 * destination vCPU in the array for the lowest-priority interrupt.
233 */
vector_hashing_dest(const struct domain * d,uint32_t dest_id,bool dest_mode,uint8_t gvec)234 static struct vcpu *vector_hashing_dest(const struct domain *d,
235 uint32_t dest_id,
236 bool dest_mode,
237 uint8_t gvec)
238
239 {
240 unsigned long *dest_vcpu_bitmap;
241 unsigned int dest_vcpus = 0;
242 struct vcpu *v, *dest = NULL;
243 unsigned int i;
244
245 dest_vcpu_bitmap = xzalloc_array(unsigned long,
246 BITS_TO_LONGS(d->max_vcpus));
247 if ( !dest_vcpu_bitmap )
248 return NULL;
249
250 for_each_vcpu ( d, v )
251 {
252 if ( !vlapic_match_dest(vcpu_vlapic(v), NULL, APIC_DEST_NOSHORT,
253 dest_id, dest_mode) )
254 continue;
255
256 __set_bit(v->vcpu_id, dest_vcpu_bitmap);
257 dest_vcpus++;
258 }
259
260 if ( dest_vcpus != 0 )
261 {
262 unsigned int mod = gvec % dest_vcpus;
263 unsigned int idx = 0;
264
265 for ( i = 0; i <= mod; i++ )
266 {
267 idx = find_next_bit(dest_vcpu_bitmap, d->max_vcpus, idx) + 1;
268 BUG_ON(idx > d->max_vcpus);
269 }
270
271 dest = d->vcpu[idx - 1];
272 }
273
274 xfree(dest_vcpu_bitmap);
275
276 return dest;
277 }
278
pt_irq_create_bind(struct domain * d,const struct xen_domctl_bind_pt_irq * pt_irq_bind)279 int pt_irq_create_bind(
280 struct domain *d, const struct xen_domctl_bind_pt_irq *pt_irq_bind)
281 {
282 struct hvm_irq_dpci *hvm_irq_dpci;
283 struct hvm_pirq_dpci *pirq_dpci;
284 struct pirq *info;
285 int rc, pirq = pt_irq_bind->machine_irq;
286
287 if ( pirq < 0 || pirq >= d->nr_pirqs )
288 return -EINVAL;
289
290 restart:
291 spin_lock(&d->event_lock);
292
293 hvm_irq_dpci = domain_get_irq_dpci(d);
294 if ( !hvm_irq_dpci && !is_hardware_domain(d) )
295 {
296 unsigned int i;
297
298 /*
299 * NB: the hardware domain doesn't use a hvm_irq_dpci struct because
300 * it's only allowed to identity map GSIs, and so the data contained in
301 * that struct (used to map guest GSIs into machine GSIs and perform
302 * interrupt routing) is completely useless to it.
303 */
304 hvm_irq_dpci = xzalloc(struct hvm_irq_dpci);
305 if ( hvm_irq_dpci == NULL )
306 {
307 spin_unlock(&d->event_lock);
308 return -ENOMEM;
309 }
310 for ( i = 0; i < NR_HVM_DOMU_IRQS; i++ )
311 INIT_LIST_HEAD(&hvm_irq_dpci->girq[i]);
312
313 hvm_domain_irq(d)->dpci = hvm_irq_dpci;
314 }
315
316 info = pirq_get_info(d, pirq);
317 if ( !info )
318 {
319 spin_unlock(&d->event_lock);
320 return -ENOMEM;
321 }
322 pirq_dpci = pirq_dpci(info);
323
324 /*
325 * A crude 'while' loop with us dropping the spinlock and giving
326 * the softirq_dpci a chance to run.
327 * We MUST check for this condition as the softirq could be scheduled
328 * and hasn't run yet. Note that this code replaced tasklet_kill which
329 * would have spun forever and would do the same thing (wait to flush out
330 * outstanding hvm_dirq_assist calls.
331 */
332 if ( pt_pirq_softirq_active(pirq_dpci) )
333 {
334 spin_unlock(&d->event_lock);
335 cpu_relax();
336 goto restart;
337 }
338
339 switch ( pt_irq_bind->irq_type )
340 {
341 case PT_IRQ_TYPE_MSI:
342 {
343 uint8_t dest, delivery_mode;
344 bool dest_mode;
345 int dest_vcpu_id;
346 const struct vcpu *vcpu;
347 uint32_t gflags = pt_irq_bind->u.msi.gflags &
348 ~XEN_DOMCTL_VMSI_X86_UNMASKED;
349
350 if ( !(pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) )
351 {
352 pirq_dpci->flags = HVM_IRQ_DPCI_MAPPED | HVM_IRQ_DPCI_MACH_MSI |
353 HVM_IRQ_DPCI_GUEST_MSI;
354 pirq_dpci->gmsi.gvec = pt_irq_bind->u.msi.gvec;
355 pirq_dpci->gmsi.gflags = gflags;
356 /*
357 * 'pt_irq_create_bind' can be called after 'pt_irq_destroy_bind'.
358 * The 'pirq_cleanup_check' which would free the structure is only
359 * called if the event channel for the PIRQ is active. However
360 * OS-es that use event channels usually bind PIRQs to eventds
361 * and unbind them before calling 'pt_irq_destroy_bind' - with the
362 * result that we re-use the 'dpci' structure. This can be
363 * reproduced with unloading and loading the driver for a device.
364 *
365 * As such on every 'pt_irq_create_bind' call we MUST set it.
366 */
367 pirq_dpci->dom = d;
368 /* bind after hvm_irq_dpci is setup to avoid race with irq handler*/
369 rc = pirq_guest_bind(d->vcpu[0], info, 0);
370 if ( rc == 0 && pt_irq_bind->u.msi.gtable )
371 {
372 rc = msixtbl_pt_register(d, info, pt_irq_bind->u.msi.gtable);
373 if ( unlikely(rc) )
374 {
375 pirq_guest_unbind(d, info);
376 /*
377 * Between 'pirq_guest_bind' and before 'pirq_guest_unbind'
378 * an interrupt can be scheduled. No more of them are going
379 * to be scheduled but we must deal with the one that may be
380 * in the queue.
381 */
382 pt_pirq_softirq_reset(pirq_dpci);
383 }
384 }
385 if ( unlikely(rc) )
386 {
387 pirq_dpci->gmsi.gflags = 0;
388 pirq_dpci->gmsi.gvec = 0;
389 pirq_dpci->dom = NULL;
390 pirq_dpci->flags = 0;
391 pirq_cleanup_check(info, d);
392 spin_unlock(&d->event_lock);
393 return rc;
394 }
395 }
396 else
397 {
398 uint32_t mask = HVM_IRQ_DPCI_MACH_MSI | HVM_IRQ_DPCI_GUEST_MSI;
399
400 if ( (pirq_dpci->flags & mask) != mask )
401 {
402 spin_unlock(&d->event_lock);
403 return -EBUSY;
404 }
405
406 /* If pirq is already mapped as vmsi, update guest data/addr. */
407 if ( pirq_dpci->gmsi.gvec != pt_irq_bind->u.msi.gvec ||
408 pirq_dpci->gmsi.gflags != gflags )
409 {
410 /* Directly clear pending EOIs before enabling new MSI info. */
411 pirq_guest_eoi(info);
412
413 pirq_dpci->gmsi.gvec = pt_irq_bind->u.msi.gvec;
414 pirq_dpci->gmsi.gflags = gflags;
415 }
416 }
417 /* Calculate dest_vcpu_id for MSI-type pirq migration. */
418 dest = MASK_EXTR(pirq_dpci->gmsi.gflags,
419 XEN_DOMCTL_VMSI_X86_DEST_ID_MASK);
420 dest_mode = pirq_dpci->gmsi.gflags & XEN_DOMCTL_VMSI_X86_DM_MASK;
421 delivery_mode = MASK_EXTR(pirq_dpci->gmsi.gflags,
422 XEN_DOMCTL_VMSI_X86_DELIV_MASK);
423
424 dest_vcpu_id = hvm_girq_dest_2_vcpu_id(d, dest, dest_mode);
425 pirq_dpci->gmsi.dest_vcpu_id = dest_vcpu_id;
426 spin_unlock(&d->event_lock);
427
428 pirq_dpci->gmsi.posted = false;
429 vcpu = (dest_vcpu_id >= 0) ? d->vcpu[dest_vcpu_id] : NULL;
430 if ( iommu_intpost )
431 {
432 if ( delivery_mode == dest_LowestPrio )
433 vcpu = vector_hashing_dest(d, dest, dest_mode,
434 pirq_dpci->gmsi.gvec);
435 if ( vcpu )
436 pirq_dpci->gmsi.posted = true;
437 }
438 if ( vcpu && is_iommu_enabled(d) )
439 hvm_migrate_pirq(pirq_dpci, vcpu);
440
441 /* Use interrupt posting if it is supported. */
442 if ( iommu_intpost )
443 pi_update_irte(vcpu ? &vcpu->arch.hvm.vmx.pi_desc : NULL,
444 info, pirq_dpci->gmsi.gvec);
445
446 if ( pt_irq_bind->u.msi.gflags & XEN_DOMCTL_VMSI_X86_UNMASKED )
447 {
448 unsigned long flags;
449 struct irq_desc *desc = pirq_spin_lock_irq_desc(info, &flags);
450
451 if ( !desc )
452 {
453 pt_irq_destroy_bind(d, pt_irq_bind);
454 return -EINVAL;
455 }
456
457 guest_mask_msi_irq(desc, false);
458 spin_unlock_irqrestore(&desc->lock, flags);
459 }
460
461 break;
462 }
463
464 case PT_IRQ_TYPE_PCI:
465 case PT_IRQ_TYPE_MSI_TRANSLATE:
466 {
467 struct dev_intx_gsi_link *digl = NULL;
468 struct hvm_girq_dpci_mapping *girq = NULL;
469 unsigned int guest_gsi;
470
471 /*
472 * Mapping GSIs for the hardware domain is different than doing it for
473 * an unpriviledged guest, the hardware domain is only allowed to
474 * identity map GSIs, and as such all the data in the u.pci union is
475 * discarded.
476 */
477 if ( hvm_irq_dpci )
478 {
479 unsigned int link;
480
481 digl = xmalloc(struct dev_intx_gsi_link);
482 girq = xmalloc(struct hvm_girq_dpci_mapping);
483
484 if ( !digl || !girq )
485 {
486 spin_unlock(&d->event_lock);
487 xfree(girq);
488 xfree(digl);
489 return -ENOMEM;
490 }
491
492 girq->bus = digl->bus = pt_irq_bind->u.pci.bus;
493 girq->device = digl->device = pt_irq_bind->u.pci.device;
494 girq->intx = digl->intx = pt_irq_bind->u.pci.intx;
495 list_add_tail(&digl->list, &pirq_dpci->digl_list);
496
497 guest_gsi = hvm_pci_intx_gsi(digl->device, digl->intx);
498 link = hvm_pci_intx_link(digl->device, digl->intx);
499
500 hvm_irq_dpci->link_cnt[link]++;
501
502 girq->machine_gsi = pirq;
503 list_add_tail(&girq->list, &hvm_irq_dpci->girq[guest_gsi]);
504 }
505 else
506 {
507 ASSERT(is_hardware_domain(d));
508
509 /* MSI_TRANSLATE is not supported for the hardware domain. */
510 if ( pt_irq_bind->irq_type != PT_IRQ_TYPE_PCI ||
511 pirq >= hvm_domain_irq(d)->nr_gsis )
512 {
513 spin_unlock(&d->event_lock);
514
515 return -EINVAL;
516 }
517 guest_gsi = pirq;
518 }
519
520 /* Bind the same mirq once in the same domain */
521 if ( !(pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) )
522 {
523 unsigned int share;
524
525 /* MUST be set, as the pirq_dpci can be re-used. */
526 pirq_dpci->dom = d;
527 if ( pt_irq_bind->irq_type == PT_IRQ_TYPE_MSI_TRANSLATE )
528 {
529 pirq_dpci->flags = HVM_IRQ_DPCI_MAPPED |
530 HVM_IRQ_DPCI_MACH_MSI |
531 HVM_IRQ_DPCI_GUEST_PCI |
532 HVM_IRQ_DPCI_TRANSLATE;
533 share = 0;
534 }
535 else /* PT_IRQ_TYPE_PCI */
536 {
537 pirq_dpci->flags = HVM_IRQ_DPCI_MAPPED |
538 HVM_IRQ_DPCI_MACH_PCI |
539 HVM_IRQ_DPCI_GUEST_PCI;
540 if ( !is_hardware_domain(d) )
541 share = BIND_PIRQ__WILL_SHARE;
542 else
543 {
544 int mask = vioapic_get_mask(d, guest_gsi);
545 int trigger_mode = vioapic_get_trigger_mode(d, guest_gsi);
546
547 if ( mask < 0 || trigger_mode < 0 )
548 {
549 spin_unlock(&d->event_lock);
550
551 ASSERT_UNREACHABLE();
552 return -EINVAL;
553 }
554 pirq_dpci->flags |= HVM_IRQ_DPCI_IDENTITY_GSI;
555 /*
556 * Check if the corresponding vIO APIC pin is configured
557 * level or edge trigger, level triggered interrupts will
558 * be marked as shareable.
559 */
560 ASSERT(!mask);
561 share = trigger_mode;
562 if ( trigger_mode == VIOAPIC_EDGE_TRIG )
563 /*
564 * Edge IO-APIC interrupt, no EOI or unmask to perform
565 * and hence no timer needed.
566 */
567 pirq_dpci->flags |= HVM_IRQ_DPCI_NO_EOI;
568 }
569 }
570
571 /* Init timer before binding */
572 if ( pt_irq_need_timer(pirq_dpci->flags) )
573 init_timer(&pirq_dpci->timer, pt_irq_time_out, pirq_dpci, 0);
574 /* Deal with gsi for legacy devices */
575 rc = pirq_guest_bind(d->vcpu[0], info, share);
576 if ( unlikely(rc) )
577 {
578 if ( pt_irq_need_timer(pirq_dpci->flags) )
579 kill_timer(&pirq_dpci->timer);
580 /*
581 * There is no path for __do_IRQ to schedule softirq as
582 * IRQ_GUEST is not set. As such we can reset 'dom' directly.
583 */
584 pirq_dpci->dom = NULL;
585 if ( hvm_irq_dpci )
586 {
587 unsigned int link;
588
589 ASSERT(girq && digl);
590 list_del(&girq->list);
591 list_del(&digl->list);
592 link = hvm_pci_intx_link(digl->device, digl->intx);
593 hvm_irq_dpci->link_cnt[link]--;
594 }
595 pirq_dpci->flags = 0;
596 pirq_cleanup_check(info, d);
597 spin_unlock(&d->event_lock);
598 xfree(girq);
599 xfree(digl);
600 return rc;
601 }
602 }
603
604 spin_unlock(&d->event_lock);
605
606 if ( iommu_verbose )
607 {
608 char buf[24] = "";
609
610 if ( digl )
611 snprintf(buf, ARRAY_SIZE(buf), " dev=%02x.%02x.%u intx=%u",
612 digl->bus, PCI_SLOT(digl->device),
613 PCI_FUNC(digl->device), digl->intx);
614
615 printk(XENLOG_G_INFO "d%d: bind: m_gsi=%u g_gsi=%u%s\n",
616 d->domain_id, pirq, guest_gsi, buf);
617 }
618 break;
619 }
620
621 default:
622 spin_unlock(&d->event_lock);
623 return -EOPNOTSUPP;
624 }
625
626 return 0;
627 }
628
pt_irq_destroy_bind(struct domain * d,const struct xen_domctl_bind_pt_irq * pt_irq_bind)629 int pt_irq_destroy_bind(
630 struct domain *d, const struct xen_domctl_bind_pt_irq *pt_irq_bind)
631 {
632 struct hvm_irq_dpci *hvm_irq_dpci;
633 struct hvm_pirq_dpci *pirq_dpci;
634 unsigned int machine_gsi = pt_irq_bind->machine_irq;
635 struct pirq *pirq;
636 const char *what = NULL;
637
638 switch ( pt_irq_bind->irq_type )
639 {
640 case PT_IRQ_TYPE_PCI:
641 case PT_IRQ_TYPE_MSI_TRANSLATE:
642 if ( iommu_verbose )
643 {
644 unsigned int device = pt_irq_bind->u.pci.device;
645 unsigned int intx = pt_irq_bind->u.pci.intx;
646
647 printk(XENLOG_G_INFO
648 "d%d: unbind: m_gsi=%u g_gsi=%u dev=%02x:%02x.%u intx=%u\n",
649 d->domain_id, machine_gsi, hvm_pci_intx_gsi(device, intx),
650 pt_irq_bind->u.pci.bus,
651 PCI_SLOT(device), PCI_FUNC(device), intx);
652 }
653 break;
654 case PT_IRQ_TYPE_MSI:
655 {
656 unsigned long flags;
657 struct irq_desc *desc = domain_spin_lock_irq_desc(d, machine_gsi,
658 &flags);
659
660 if ( !desc )
661 return -EINVAL;
662 /*
663 * Leave the MSI masked, so that the state when calling
664 * pt_irq_create_bind is consistent across bind/unbinds.
665 */
666 guest_mask_msi_irq(desc, true);
667 spin_unlock_irqrestore(&desc->lock, flags);
668 break;
669 }
670
671 default:
672 return -EOPNOTSUPP;
673 }
674
675 spin_lock(&d->event_lock);
676
677 hvm_irq_dpci = domain_get_irq_dpci(d);
678
679 if ( !hvm_irq_dpci && !is_hardware_domain(d) )
680 {
681 spin_unlock(&d->event_lock);
682 return -EINVAL;
683 }
684
685 pirq = pirq_info(d, machine_gsi);
686 pirq_dpci = pirq_dpci(pirq);
687
688 if ( hvm_irq_dpci && pt_irq_bind->irq_type != PT_IRQ_TYPE_MSI )
689 {
690 unsigned int bus = pt_irq_bind->u.pci.bus;
691 unsigned int device = pt_irq_bind->u.pci.device;
692 unsigned int intx = pt_irq_bind->u.pci.intx;
693 unsigned int guest_gsi = hvm_pci_intx_gsi(device, intx);
694 unsigned int link = hvm_pci_intx_link(device, intx);
695 struct hvm_girq_dpci_mapping *girq;
696 struct dev_intx_gsi_link *digl, *tmp;
697
698 list_for_each_entry ( girq, &hvm_irq_dpci->girq[guest_gsi], list )
699 {
700 if ( girq->bus == bus &&
701 girq->device == device &&
702 girq->intx == intx &&
703 girq->machine_gsi == machine_gsi )
704 {
705 list_del(&girq->list);
706 xfree(girq);
707 girq = NULL;
708 break;
709 }
710 }
711
712 if ( girq )
713 {
714 spin_unlock(&d->event_lock);
715 return -EINVAL;
716 }
717
718 hvm_irq_dpci->link_cnt[link]--;
719
720 /* clear the mirq info */
721 if ( pirq_dpci && (pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) )
722 {
723 list_for_each_entry_safe ( digl, tmp, &pirq_dpci->digl_list, list )
724 {
725 if ( digl->bus == bus &&
726 digl->device == device &&
727 digl->intx == intx )
728 {
729 list_del(&digl->list);
730 xfree(digl);
731 }
732 }
733 what = list_empty(&pirq_dpci->digl_list) ? "final" : "partial";
734 }
735 else
736 what = "bogus";
737 }
738 else if ( pirq_dpci && pirq_dpci->gmsi.posted )
739 pi_update_irte(NULL, pirq, 0);
740
741 if ( pirq_dpci && (pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) &&
742 list_empty(&pirq_dpci->digl_list) )
743 {
744 pirq_guest_unbind(d, pirq);
745 msixtbl_pt_unregister(d, pirq);
746 if ( pt_irq_need_timer(pirq_dpci->flags) )
747 kill_timer(&pirq_dpci->timer);
748 pirq_dpci->flags = 0;
749 /*
750 * See comment in pt_irq_create_bind's PT_IRQ_TYPE_MSI before the
751 * call to pt_pirq_softirq_reset.
752 */
753 pt_pirq_softirq_reset(pirq_dpci);
754
755 pirq_cleanup_check(pirq, d);
756 }
757
758 spin_unlock(&d->event_lock);
759
760 if ( what && iommu_verbose )
761 {
762 unsigned int device = pt_irq_bind->u.pci.device;
763 char buf[24] = "";
764
765 if ( hvm_irq_dpci )
766 snprintf(buf, ARRAY_SIZE(buf), " dev=%02x.%02x.%u intx=%u",
767 pt_irq_bind->u.pci.bus, PCI_SLOT(device),
768 PCI_FUNC(device), pt_irq_bind->u.pci.intx);
769
770 printk(XENLOG_G_INFO "d%d %s unmap: m_irq=%u%s\n",
771 d->domain_id, what, machine_gsi, buf);
772 }
773
774 return 0;
775 }
776
pt_pirq_init(struct domain * d,struct hvm_pirq_dpci * dpci)777 void pt_pirq_init(struct domain *d, struct hvm_pirq_dpci *dpci)
778 {
779 INIT_LIST_HEAD(&dpci->digl_list);
780 dpci->gmsi.dest_vcpu_id = -1;
781 }
782
pt_pirq_cleanup_check(struct hvm_pirq_dpci * dpci)783 bool pt_pirq_cleanup_check(struct hvm_pirq_dpci *dpci)
784 {
785 if ( !dpci->flags && !pt_pirq_softirq_active(dpci) )
786 {
787 dpci->dom = NULL;
788 return true;
789 }
790 return false;
791 }
792
pt_pirq_iterate(struct domain * d,int (* cb)(struct domain *,struct hvm_pirq_dpci *,void *),void * arg)793 int pt_pirq_iterate(struct domain *d,
794 int (*cb)(struct domain *,
795 struct hvm_pirq_dpci *, void *),
796 void *arg)
797 {
798 int rc = 0;
799 unsigned int pirq = 0, n, i;
800 struct pirq *pirqs[8];
801
802 ASSERT(spin_is_locked(&d->event_lock));
803
804 do {
805 n = radix_tree_gang_lookup(&d->pirq_tree, (void **)pirqs, pirq,
806 ARRAY_SIZE(pirqs));
807 for ( i = 0; i < n; ++i )
808 {
809 struct hvm_pirq_dpci *pirq_dpci = pirq_dpci(pirqs[i]);
810
811 pirq = pirqs[i]->pirq;
812 if ( (pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) )
813 rc = cb(d, pirq_dpci, arg);
814 }
815 } while ( !rc && ++pirq < d->nr_pirqs && n == ARRAY_SIZE(pirqs) );
816
817 return rc;
818 }
819
hvm_do_IRQ_dpci(struct domain * d,struct pirq * pirq)820 int hvm_do_IRQ_dpci(struct domain *d, struct pirq *pirq)
821 {
822 struct hvm_irq_dpci *dpci = domain_get_irq_dpci(d);
823 struct hvm_pirq_dpci *pirq_dpci = pirq_dpci(pirq);
824
825 ASSERT(is_hvm_domain(d));
826
827 if ( !is_iommu_enabled(d) || (!is_hardware_domain(d) && !dpci) ||
828 !pirq_dpci || !(pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) )
829 return 0;
830
831 pirq_dpci->masked = 1;
832 raise_softirq_for(pirq_dpci);
833 return 1;
834 }
835
836 /* called with d->event_lock held */
__msi_pirq_eoi(struct hvm_pirq_dpci * pirq_dpci)837 static void __msi_pirq_eoi(struct hvm_pirq_dpci *pirq_dpci)
838 {
839 irq_desc_t *desc;
840
841 if ( (pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) &&
842 (pirq_dpci->flags & HVM_IRQ_DPCI_MACH_MSI) )
843 {
844 struct pirq *pirq = dpci_pirq(pirq_dpci);
845
846 BUG_ON(!local_irq_is_enabled());
847 desc = pirq_spin_lock_irq_desc(pirq, NULL);
848 if ( !desc )
849 return;
850 desc_guest_eoi(desc, pirq);
851 }
852 }
853
_hvm_dpci_msi_eoi(struct domain * d,struct hvm_pirq_dpci * pirq_dpci,void * arg)854 static int _hvm_dpci_msi_eoi(struct domain *d,
855 struct hvm_pirq_dpci *pirq_dpci, void *arg)
856 {
857 int vector = (long)arg;
858
859 if ( (pirq_dpci->flags & HVM_IRQ_DPCI_MACH_MSI) &&
860 (pirq_dpci->gmsi.gvec == vector) )
861 {
862 unsigned int dest = MASK_EXTR(pirq_dpci->gmsi.gflags,
863 XEN_DOMCTL_VMSI_X86_DEST_ID_MASK);
864 bool dest_mode = pirq_dpci->gmsi.gflags & XEN_DOMCTL_VMSI_X86_DM_MASK;
865
866 if ( vlapic_match_dest(vcpu_vlapic(current), NULL, 0, dest,
867 dest_mode) )
868 {
869 __msi_pirq_eoi(pirq_dpci);
870 return 1;
871 }
872 }
873
874 return 0;
875 }
876
hvm_dpci_msi_eoi(struct domain * d,int vector)877 void hvm_dpci_msi_eoi(struct domain *d, int vector)
878 {
879 if ( !is_iommu_enabled(d) ||
880 (!hvm_domain_irq(d)->dpci && !is_hardware_domain(d)) )
881 return;
882
883 spin_lock(&d->event_lock);
884 pt_pirq_iterate(d, _hvm_dpci_msi_eoi, (void *)(long)vector);
885 spin_unlock(&d->event_lock);
886 }
887
hvm_dirq_assist(struct domain * d,struct hvm_pirq_dpci * pirq_dpci)888 static void hvm_dirq_assist(struct domain *d, struct hvm_pirq_dpci *pirq_dpci)
889 {
890 if ( unlikely(!hvm_domain_irq(d)->dpci) && !is_hardware_domain(d) )
891 {
892 ASSERT_UNREACHABLE();
893 return;
894 }
895
896 spin_lock(&d->event_lock);
897 if ( test_and_clear_bool(pirq_dpci->masked) )
898 {
899 struct pirq *pirq = dpci_pirq(pirq_dpci);
900 const struct dev_intx_gsi_link *digl;
901
902 if ( hvm_domain_use_pirq(d, pirq) )
903 {
904 send_guest_pirq(d, pirq);
905
906 if ( pirq_dpci->flags & HVM_IRQ_DPCI_GUEST_MSI )
907 goto out;
908 }
909
910 if ( pirq_dpci->flags & HVM_IRQ_DPCI_GUEST_MSI )
911 {
912 vmsi_deliver_pirq(d, pirq_dpci);
913 goto out;
914 }
915
916 list_for_each_entry ( digl, &pirq_dpci->digl_list, list )
917 {
918 ASSERT(!(pirq_dpci->flags & HVM_IRQ_DPCI_IDENTITY_GSI));
919 hvm_pci_intx_assert(d, digl->device, digl->intx);
920 pirq_dpci->pending++;
921 }
922
923 if ( pirq_dpci->flags & HVM_IRQ_DPCI_IDENTITY_GSI )
924 {
925 hvm_gsi_assert(d, pirq->pirq);
926 if ( pirq_dpci->flags & HVM_IRQ_DPCI_NO_EOI )
927 goto out;
928 pirq_dpci->pending++;
929 }
930
931 if ( pirq_dpci->flags & HVM_IRQ_DPCI_TRANSLATE )
932 {
933 /* for translated MSI to INTx interrupt, eoi as early as possible */
934 __msi_pirq_eoi(pirq_dpci);
935 goto out;
936 }
937
938 /*
939 * Set a timer to see if the guest can finish the interrupt or not. For
940 * example, the guest OS may unmask the PIC during boot, before the
941 * guest driver is loaded. hvm_pci_intx_assert() may succeed, but the
942 * guest will never deal with the irq, then the physical interrupt line
943 * will never be deasserted.
944 */
945 ASSERT(pt_irq_need_timer(pirq_dpci->flags));
946 set_timer(&pirq_dpci->timer, NOW() + PT_IRQ_TIME_OUT);
947 }
948
949 out:
950 spin_unlock(&d->event_lock);
951 }
952
hvm_pirq_eoi(struct pirq * pirq,const union vioapic_redir_entry * ent)953 static void hvm_pirq_eoi(struct pirq *pirq,
954 const union vioapic_redir_entry *ent)
955 {
956 struct hvm_pirq_dpci *pirq_dpci;
957
958 if ( !pirq )
959 {
960 ASSERT_UNREACHABLE();
961 return;
962 }
963
964 pirq_dpci = pirq_dpci(pirq);
965
966 /*
967 * No need to get vector lock for timer
968 * since interrupt is still not EOIed
969 */
970 if ( --pirq_dpci->pending ||
971 (ent && ent->fields.mask) ||
972 !pt_irq_need_timer(pirq_dpci->flags) )
973 return;
974
975 stop_timer(&pirq_dpci->timer);
976 pirq_guest_eoi(pirq);
977 }
978
__hvm_dpci_eoi(struct domain * d,const struct hvm_girq_dpci_mapping * girq,const union vioapic_redir_entry * ent)979 static void __hvm_dpci_eoi(struct domain *d,
980 const struct hvm_girq_dpci_mapping *girq,
981 const union vioapic_redir_entry *ent)
982 {
983 struct pirq *pirq = pirq_info(d, girq->machine_gsi);
984
985 if ( !hvm_domain_use_pirq(d, pirq) )
986 hvm_pci_intx_deassert(d, girq->device, girq->intx);
987
988 hvm_pirq_eoi(pirq, ent);
989 }
990
hvm_gsi_eoi(struct domain * d,unsigned int gsi,const union vioapic_redir_entry * ent)991 static void hvm_gsi_eoi(struct domain *d, unsigned int gsi,
992 const union vioapic_redir_entry *ent)
993 {
994 struct pirq *pirq = pirq_info(d, gsi);
995
996 /* Check if GSI is actually mapped. */
997 if ( !pirq_dpci(pirq) )
998 return;
999
1000 hvm_gsi_deassert(d, gsi);
1001 hvm_pirq_eoi(pirq, ent);
1002 }
1003
hvm_dpci_eoi(struct domain * d,unsigned int guest_gsi,const union vioapic_redir_entry * ent)1004 void hvm_dpci_eoi(struct domain *d, unsigned int guest_gsi,
1005 const union vioapic_redir_entry *ent)
1006 {
1007 const struct hvm_irq_dpci *hvm_irq_dpci;
1008 const struct hvm_girq_dpci_mapping *girq;
1009
1010 if ( !is_iommu_enabled(d) )
1011 return;
1012
1013 if ( is_hardware_domain(d) )
1014 {
1015 spin_lock(&d->event_lock);
1016 hvm_gsi_eoi(d, guest_gsi, ent);
1017 goto unlock;
1018 }
1019
1020 if ( guest_gsi < NR_ISAIRQS )
1021 {
1022 hvm_dpci_isairq_eoi(d, guest_gsi);
1023 return;
1024 }
1025
1026 spin_lock(&d->event_lock);
1027 hvm_irq_dpci = domain_get_irq_dpci(d);
1028
1029 if ( !hvm_irq_dpci )
1030 goto unlock;
1031
1032 list_for_each_entry ( girq, &hvm_irq_dpci->girq[guest_gsi], list )
1033 __hvm_dpci_eoi(d, girq, ent);
1034
1035 unlock:
1036 spin_unlock(&d->event_lock);
1037 }
1038
1039 /*
1040 * Note: 'pt_pirq_softirq_reset' can clear the STATE_SCHED before we get to
1041 * doing it. If that is the case we let 'pt_pirq_softirq_reset' do ref-counting.
1042 */
dpci_softirq(void)1043 static void dpci_softirq(void)
1044 {
1045 unsigned int cpu = smp_processor_id();
1046 LIST_HEAD(our_list);
1047
1048 local_irq_disable();
1049 list_splice_init(&per_cpu(dpci_list, cpu), &our_list);
1050 local_irq_enable();
1051
1052 while ( !list_empty(&our_list) )
1053 {
1054 struct hvm_pirq_dpci *pirq_dpci;
1055 struct domain *d;
1056
1057 pirq_dpci = list_entry(our_list.next, struct hvm_pirq_dpci, softirq_list);
1058 list_del(&pirq_dpci->softirq_list);
1059
1060 d = pirq_dpci->dom;
1061 smp_mb(); /* 'd' MUST be saved before we set/clear the bits. */
1062 if ( test_and_set_bit(STATE_RUN, &pirq_dpci->state) )
1063 {
1064 unsigned long flags;
1065
1066 /* Put back on the list and retry. */
1067 local_irq_save(flags);
1068 list_add_tail(&pirq_dpci->softirq_list, &this_cpu(dpci_list));
1069 local_irq_restore(flags);
1070
1071 raise_softirq(HVM_DPCI_SOFTIRQ);
1072 continue;
1073 }
1074 /*
1075 * The one who clears STATE_SCHED MUST refcount the domain.
1076 */
1077 if ( test_and_clear_bit(STATE_SCHED, &pirq_dpci->state) )
1078 {
1079 hvm_dirq_assist(d, pirq_dpci);
1080 put_domain(d);
1081 }
1082 clear_bit(STATE_RUN, &pirq_dpci->state);
1083 }
1084 }
1085
cpu_callback(struct notifier_block * nfb,unsigned long action,void * hcpu)1086 static int cpu_callback(
1087 struct notifier_block *nfb, unsigned long action, void *hcpu)
1088 {
1089 unsigned int cpu = (unsigned long)hcpu;
1090
1091 switch ( action )
1092 {
1093 case CPU_UP_PREPARE:
1094 INIT_LIST_HEAD(&per_cpu(dpci_list, cpu));
1095 break;
1096 case CPU_UP_CANCELED:
1097 case CPU_DEAD:
1098 /*
1099 * On CPU_DYING this callback is called (on the CPU that is dying)
1100 * with an possible HVM_DPIC_SOFTIRQ pending - at which point we can
1101 * clear out any outstanding domains (by the virtue of the idle loop
1102 * calling the softirq later). In CPU_DEAD case the CPU is deaf and
1103 * there are no pending softirqs for us to handle so we can chill.
1104 */
1105 ASSERT(list_empty(&per_cpu(dpci_list, cpu)));
1106 break;
1107 }
1108
1109 return NOTIFY_DONE;
1110 }
1111
1112 static struct notifier_block cpu_nfb = {
1113 .notifier_call = cpu_callback,
1114 };
1115
setup_dpci_softirq(void)1116 static int __init setup_dpci_softirq(void)
1117 {
1118 unsigned int cpu;
1119
1120 for_each_online_cpu(cpu)
1121 INIT_LIST_HEAD(&per_cpu(dpci_list, cpu));
1122
1123 open_softirq(HVM_DPCI_SOFTIRQ, dpci_softirq);
1124 register_cpu_notifier(&cpu_nfb);
1125 return 0;
1126 }
1127 __initcall(setup_dpci_softirq);
1128