1 /*
2  * vlapic.c: virtualize LAPIC for HVM vcpus.
3  *
4  * Copyright (c) 2004, Intel Corporation.
5  * Copyright (c) 2006 Keir Fraser, XenSource Inc.
6  *
7  * This program is free software; you can redistribute it and/or modify it
8  * under the terms and conditions of the GNU General Public License,
9  * version 2, as published by the Free Software Foundation.
10  *
11  * This program is distributed in the hope it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
14  * more details.
15  *
16  * You should have received a copy of the GNU General Public License along with
17  * this program; If not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include <xen/types.h>
21 #include <xen/mm.h>
22 #include <xen/xmalloc.h>
23 #include <xen/domain.h>
24 #include <xen/domain_page.h>
25 #include <xen/event.h>
26 #include <xen/nospec.h>
27 #include <xen/trace.h>
28 #include <xen/lib.h>
29 #include <xen/sched.h>
30 #include <xen/numa.h>
31 #include <asm/current.h>
32 #include <asm/page.h>
33 #include <asm/apic.h>
34 #include <asm/io_apic.h>
35 #include <asm/vpmu.h>
36 #include <asm/hvm/emulate.h>
37 #include <asm/hvm/hvm.h>
38 #include <asm/hvm/io.h>
39 #include <asm/hvm/support.h>
40 #include <asm/hvm/vmx/vmx.h>
41 #include <asm/hvm/nestedhvm.h>
42 #include <asm/hvm/viridian.h>
43 #include <public/hvm/ioreq.h>
44 #include <public/hvm/params.h>
45 
46 #define VLAPIC_VERSION                  0x00050014
47 #define VLAPIC_LVT_NUM                  6
48 
49 #define LVT_MASK \
50     (APIC_LVT_MASKED | APIC_SEND_PENDING | APIC_VECTOR_MASK)
51 
52 #define LINT_MASK   \
53     (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY |\
54     APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
55 
56 static const unsigned int vlapic_lvt_mask[VLAPIC_LVT_NUM] =
57 {
58      /* LVTT */
59      LVT_MASK | APIC_TIMER_MODE_MASK,
60      /* LVTTHMR */
61      LVT_MASK | APIC_MODE_MASK,
62      /* LVTPC */
63      LVT_MASK | APIC_MODE_MASK,
64      /* LVT0-1 */
65      LINT_MASK, LINT_MASK,
66      /* LVTERR */
67      LVT_MASK
68 };
69 
70 #define vlapic_lvtt_period(vlapic)                              \
71     ((vlapic_get_reg(vlapic, APIC_LVTT) & APIC_TIMER_MODE_MASK) \
72      == APIC_TIMER_MODE_PERIODIC)
73 
74 #define vlapic_lvtt_oneshot(vlapic)                             \
75     ((vlapic_get_reg(vlapic, APIC_LVTT) & APIC_TIMER_MODE_MASK) \
76      == APIC_TIMER_MODE_ONESHOT)
77 
78 #define vlapic_lvtt_tdt(vlapic)                                 \
79     ((vlapic_get_reg(vlapic, APIC_LVTT) & APIC_TIMER_MODE_MASK) \
80      == APIC_TIMER_MODE_TSC_DEADLINE)
81 
82 static void vlapic_do_init(struct vlapic *vlapic);
83 
vlapic_find_highest_vector(const void * bitmap)84 static int vlapic_find_highest_vector(const void *bitmap)
85 {
86     const uint32_t *word = bitmap;
87     unsigned int word_offset = X86_NR_VECTORS / 32;
88 
89     /* Work backwards through the bitmap (first 32-bit word in every four). */
90     while ( (word_offset != 0) && (word[(--word_offset)*4] == 0) )
91         continue;
92 
93     return (fls(word[word_offset*4]) - 1) + (word_offset * 32);
94 }
95 
96 /*
97  * IRR-specific bitmap update & search routines.
98  */
99 
vlapic_test_and_set_irr(int vector,struct vlapic * vlapic)100 static int vlapic_test_and_set_irr(int vector, struct vlapic *vlapic)
101 {
102     return vlapic_test_and_set_vector(vector, &vlapic->regs->data[APIC_IRR]);
103 }
104 
vlapic_clear_irr(int vector,struct vlapic * vlapic)105 static void vlapic_clear_irr(int vector, struct vlapic *vlapic)
106 {
107     vlapic_clear_vector(vector, &vlapic->regs->data[APIC_IRR]);
108 }
109 
vlapic_find_highest_irr(struct vlapic * vlapic)110 static int vlapic_find_highest_irr(struct vlapic *vlapic)
111 {
112     vlapic_sync_pir_to_irr(vlapic_vcpu(vlapic));
113 
114     return vlapic_find_highest_vector(&vlapic->regs->data[APIC_IRR]);
115 }
116 
vlapic_error(struct vlapic * vlapic,unsigned int errmask)117 static void vlapic_error(struct vlapic *vlapic, unsigned int errmask)
118 {
119     unsigned long flags;
120     uint32_t esr;
121 
122     spin_lock_irqsave(&vlapic->esr_lock, flags);
123     esr = vlapic_get_reg(vlapic, APIC_ESR);
124     if ( (esr & errmask) != errmask )
125     {
126         uint32_t lvterr = vlapic_get_reg(vlapic, APIC_LVTERR);
127 
128         vlapic_set_reg(vlapic, APIC_ESR, esr | errmask);
129         if ( !(lvterr & APIC_LVT_MASKED) )
130             vlapic_set_irq(vlapic, lvterr & APIC_VECTOR_MASK, 0);
131     }
132     spin_unlock_irqrestore(&vlapic->esr_lock, flags);
133 }
134 
vlapic_test_irq(const struct vlapic * vlapic,uint8_t vec)135 bool vlapic_test_irq(const struct vlapic *vlapic, uint8_t vec)
136 {
137     if ( unlikely(vec < 16) )
138         return false;
139 
140     if ( hvm_funcs.test_pir &&
141          alternative_call(hvm_funcs.test_pir, const_vlapic_vcpu(vlapic), vec) )
142         return true;
143 
144     return vlapic_test_vector(vec, &vlapic->regs->data[APIC_IRR]);
145 }
146 
vlapic_set_irq(struct vlapic * vlapic,uint8_t vec,uint8_t trig)147 void vlapic_set_irq(struct vlapic *vlapic, uint8_t vec, uint8_t trig)
148 {
149     struct vcpu *target = vlapic_vcpu(vlapic);
150 
151     if ( unlikely(vec < 16) )
152     {
153         vlapic_error(vlapic, APIC_ESR_RECVILL);
154         return;
155     }
156 
157     if ( trig )
158         vlapic_set_vector(vec, &vlapic->regs->data[APIC_TMR]);
159     else
160         vlapic_clear_vector(vec, &vlapic->regs->data[APIC_TMR]);
161 
162     if ( hvm_funcs.update_eoi_exit_bitmap )
163         alternative_vcall(hvm_funcs.update_eoi_exit_bitmap, target, vec, trig);
164 
165     if ( hvm_funcs.deliver_posted_intr )
166         alternative_vcall(hvm_funcs.deliver_posted_intr, target, vec);
167     else if ( !vlapic_test_and_set_irr(vec, vlapic) )
168         vcpu_kick(target);
169 }
170 
vlapic_find_highest_isr(const struct vlapic * vlapic)171 static int vlapic_find_highest_isr(const struct vlapic *vlapic)
172 {
173     return vlapic_find_highest_vector(&vlapic->regs->data[APIC_ISR]);
174 }
175 
vlapic_get_ppr(const struct vlapic * vlapic)176 static uint32_t vlapic_get_ppr(const struct vlapic *vlapic)
177 {
178     uint32_t tpr, isrv, ppr;
179     int isr;
180 
181     tpr  = vlapic_get_reg(vlapic, APIC_TASKPRI);
182     isr  = vlapic_find_highest_isr(vlapic);
183     isrv = (isr != -1) ? isr : 0;
184 
185     if ( (tpr & 0xf0) >= (isrv & 0xf0) )
186         ppr = tpr & 0xff;
187     else
188         ppr = isrv & 0xf0;
189 
190     HVM_DBG_LOG(DBG_LEVEL_VLAPIC_INTERRUPT,
191                 "vlapic %p, ppr %#x, isr %#x, isrv %#x",
192                 vlapic, ppr, isr, isrv);
193 
194     return ppr;
195 }
196 
vlapic_set_ppr(struct vlapic * vlapic)197 uint32_t vlapic_set_ppr(struct vlapic *vlapic)
198 {
199    uint32_t ppr = vlapic_get_ppr(vlapic);
200 
201    vlapic_set_reg(vlapic, APIC_PROCPRI, ppr);
202    return ppr;
203 }
204 
vlapic_match_logical_addr(const struct vlapic * vlapic,uint32_t mda)205 static bool_t vlapic_match_logical_addr(const struct vlapic *vlapic,
206                                         uint32_t mda)
207 {
208     bool_t result = 0;
209     uint32_t logical_id = vlapic_get_reg(vlapic, APIC_LDR);
210 
211     if ( vlapic_x2apic_mode(vlapic) )
212         return ((logical_id >> 16) == (mda >> 16)) &&
213                (uint16_t)(logical_id & mda);
214 
215     logical_id = GET_xAPIC_LOGICAL_ID(logical_id);
216     mda = (uint8_t)mda;
217 
218     switch ( vlapic_get_reg(vlapic, APIC_DFR) )
219     {
220     case APIC_DFR_FLAT:
221         if ( logical_id & mda )
222             result = 1;
223         break;
224     case APIC_DFR_CLUSTER:
225         if ( ((logical_id >> 4) == (mda >> 0x4)) && (logical_id & mda & 0xf) )
226             result = 1;
227         break;
228     default:
229         printk(XENLOG_G_WARNING "%pv: bad LAPIC DFR value %08x\n",
230                const_vlapic_vcpu(vlapic),
231                vlapic_get_reg(vlapic, APIC_DFR));
232         break;
233     }
234 
235     return result;
236 }
237 
vlapic_match_dest(const struct vlapic * target,const struct vlapic * source,int short_hand,uint32_t dest,bool_t dest_mode)238 bool_t vlapic_match_dest(
239     const struct vlapic *target, const struct vlapic *source,
240     int short_hand, uint32_t dest, bool_t dest_mode)
241 {
242     HVM_DBG_LOG(DBG_LEVEL_VLAPIC, "target %p, source %p, dest %#x, "
243                 "dest_mode %#x, short_hand %#x",
244                 target, source, dest, dest_mode, short_hand);
245 
246     switch ( short_hand )
247     {
248     case APIC_DEST_NOSHORT:
249         if ( dest_mode )
250             return vlapic_match_logical_addr(target, dest);
251         return (dest == _VLAPIC_ID(target, 0xffffffff)) ||
252                (dest == VLAPIC_ID(target));
253 
254     case APIC_DEST_SELF:
255         return (target == source);
256 
257     case APIC_DEST_ALLINC:
258         return 1;
259 
260     case APIC_DEST_ALLBUT:
261         return (target != source);
262 
263     default:
264         gdprintk(XENLOG_WARNING, "Bad dest shorthand value %x\n", short_hand);
265         break;
266     }
267 
268     return 0;
269 }
270 
vlapic_init_sipi_one(struct vcpu * target,uint32_t icr)271 static void vlapic_init_sipi_one(struct vcpu *target, uint32_t icr)
272 {
273     vcpu_pause(target);
274 
275     switch ( icr & APIC_MODE_MASK )
276     {
277     case APIC_DM_INIT: {
278         bool_t fpu_initialised;
279         int rc;
280 
281         /* No work on INIT de-assert for P4-type APIC. */
282         if ( (icr & (APIC_INT_LEVELTRIG | APIC_INT_ASSERT)) ==
283              APIC_INT_LEVELTRIG )
284             break;
285         /* Nothing to do if the VCPU is already reset. */
286         if ( !target->is_initialised )
287             break;
288         hvm_vcpu_down(target);
289         domain_lock(target->domain);
290         /* Reset necessary VCPU state. This does not include FPU state. */
291         fpu_initialised = target->fpu_initialised;
292         rc = vcpu_reset(target);
293         ASSERT(!rc);
294         target->fpu_initialised = fpu_initialised;
295         vlapic_do_init(vcpu_vlapic(target));
296         domain_unlock(target->domain);
297         break;
298     }
299 
300     case APIC_DM_STARTUP: {
301         uint16_t reset_cs = (icr & 0xffu) << 8;
302         hvm_vcpu_reset_state(target, reset_cs, 0);
303         break;
304     }
305 
306     default:
307         BUG();
308     }
309 
310     hvmemul_cancel(target);
311 
312     vcpu_unpause(target);
313 }
314 
vlapic_init_sipi_action(void * data)315 static void vlapic_init_sipi_action(void *data)
316 {
317     struct vcpu *origin = data;
318     uint32_t icr = vcpu_vlapic(origin)->init_sipi.icr;
319     uint32_t dest = vcpu_vlapic(origin)->init_sipi.dest;
320     uint32_t short_hand = icr & APIC_SHORT_MASK;
321     bool_t dest_mode = !!(icr & APIC_DEST_MASK);
322     struct vcpu *v;
323 
324     if ( icr == 0 )
325         return;
326 
327     for_each_vcpu ( origin->domain, v )
328     {
329         if ( vlapic_match_dest(vcpu_vlapic(v), vcpu_vlapic(origin),
330                                short_hand, dest, dest_mode) )
331             vlapic_init_sipi_one(v, icr);
332     }
333 
334     vcpu_vlapic(origin)->init_sipi.icr = 0;
335     vcpu_unpause(origin);
336 }
337 
338 /* Add a pending IRQ into lapic. */
vlapic_accept_irq(struct vcpu * v,uint32_t icr_low)339 static void vlapic_accept_irq(struct vcpu *v, uint32_t icr_low)
340 {
341     struct vlapic *vlapic = vcpu_vlapic(v);
342     uint8_t vector = (uint8_t)icr_low;
343 
344     switch ( icr_low & APIC_MODE_MASK )
345     {
346     case APIC_DM_FIXED:
347     case APIC_DM_LOWEST:
348         if ( vlapic_enabled(vlapic) )
349             vlapic_set_irq(vlapic, vector, 0);
350         break;
351 
352     case APIC_DM_REMRD:
353         gdprintk(XENLOG_WARNING, "Ignoring delivery mode 3\n");
354         break;
355 
356     case APIC_DM_SMI:
357         gdprintk(XENLOG_WARNING, "Ignoring guest SMI\n");
358         break;
359 
360     case APIC_DM_NMI:
361         if ( !test_and_set_bool(v->arch.nmi_pending) )
362         {
363             bool_t wake = 0;
364             domain_lock(v->domain);
365             if ( v->is_initialised )
366                 wake = test_and_clear_bit(_VPF_down, &v->pause_flags);
367             domain_unlock(v->domain);
368             if ( wake )
369                 vcpu_wake(v);
370             vcpu_kick(v);
371         }
372         break;
373 
374     case APIC_DM_INIT:
375     case APIC_DM_STARTUP:
376         BUG(); /* Handled in vlapic_ipi(). */
377 
378     default:
379         gdprintk(XENLOG_ERR, "TODO: unsupported delivery mode in ICR %x\n",
380                  icr_low);
381         domain_crash(v->domain);
382     }
383 }
384 
vlapic_lowest_prio(struct domain * d,const struct vlapic * source,int short_hand,uint32_t dest,bool_t dest_mode)385 struct vlapic *vlapic_lowest_prio(
386     struct domain *d, const struct vlapic *source,
387     int short_hand, uint32_t dest, bool_t dest_mode)
388 {
389     int old = hvm_domain_irq(d)->round_robin_prev_vcpu;
390     uint32_t ppr, target_ppr = UINT_MAX;
391     struct vlapic *vlapic, *target = NULL;
392     struct vcpu *v;
393 
394     if ( unlikely(!d->vcpu) || unlikely((v = d->vcpu[old]) == NULL) )
395         return NULL;
396 
397     do {
398         v = v->next_in_list ? : d->vcpu[0];
399         vlapic = vcpu_vlapic(v);
400         if ( vlapic_match_dest(vlapic, source, short_hand, dest, dest_mode) &&
401              vlapic_enabled(vlapic) &&
402              ((ppr = vlapic_get_ppr(vlapic)) < target_ppr) )
403         {
404             target = vlapic;
405             target_ppr = ppr;
406         }
407     } while ( v->vcpu_id != old );
408 
409     if ( target != NULL )
410         hvm_domain_irq(d)->round_robin_prev_vcpu =
411            vlapic_vcpu(target)->vcpu_id;
412 
413     return target;
414 }
415 
vlapic_EOI_set(struct vlapic * vlapic)416 void vlapic_EOI_set(struct vlapic *vlapic)
417 {
418     struct vcpu *v = vlapic_vcpu(vlapic);
419     /*
420      * If APIC assist was set then an EOI may have been avoided and
421      * hence this EOI actually relates to a lower priority vector.
422      * Thus it is necessary to first emulate the EOI for the higher
423      * priority vector and then recurse to handle the lower priority
424      * vector.
425      */
426     bool missed_eoi = viridian_apic_assist_completed(v);
427     int vector;
428 
429  again:
430     vector = vlapic_find_highest_isr(vlapic);
431 
432     /* Some EOI writes may not have a matching to an in-service interrupt. */
433     if ( vector == -1 )
434         return;
435 
436     /*
437      * If APIC assist was set but the guest chose to EOI anyway then
438      * we need to clean up state.
439      * NOTE: It is harmless to call viridian_apic_assist_clear() on a
440      *       recursion, even though it is not necessary.
441      */
442     if ( !missed_eoi )
443         viridian_apic_assist_clear(v);
444 
445     vlapic_clear_vector(vector, &vlapic->regs->data[APIC_ISR]);
446 
447     if ( hvm_funcs.handle_eoi )
448         alternative_vcall(hvm_funcs.handle_eoi, vector,
449                           vlapic_find_highest_isr(vlapic));
450 
451     vlapic_handle_EOI(vlapic, vector);
452 
453     if ( missed_eoi )
454     {
455         missed_eoi = false;
456         goto again;
457     }
458 }
459 
vlapic_handle_EOI(struct vlapic * vlapic,u8 vector)460 void vlapic_handle_EOI(struct vlapic *vlapic, u8 vector)
461 {
462     struct vcpu *v = vlapic_vcpu(vlapic);
463     struct domain *d = v->domain;
464 
465     /* All synic SINTx vectors are edge triggered */
466 
467     if ( vlapic_test_vector(vector, &vlapic->regs->data[APIC_TMR]) )
468         vioapic_update_EOI(d, vector);
469     else if ( has_viridian_synic(d) )
470         viridian_synic_ack_sint(v, vector);
471 
472     hvm_dpci_msi_eoi(d, vector);
473 }
474 
is_multicast_dest(struct vlapic * vlapic,unsigned int short_hand,uint32_t dest,bool_t dest_mode)475 static bool_t is_multicast_dest(struct vlapic *vlapic, unsigned int short_hand,
476                                 uint32_t dest, bool_t dest_mode)
477 {
478     if ( vlapic_domain(vlapic)->max_vcpus <= 2 )
479         return 0;
480 
481     if ( short_hand )
482         return short_hand != APIC_DEST_SELF;
483 
484     if ( vlapic_x2apic_mode(vlapic) )
485         return dest_mode ? hweight16(dest) > 1 : dest == 0xffffffff;
486 
487     if ( dest_mode )
488         return hweight8(dest &
489                         GET_xAPIC_DEST_FIELD(vlapic_get_reg(vlapic,
490                                                             APIC_DFR))) > 1;
491 
492     return dest == 0xff;
493 }
494 
vlapic_ipi(struct vlapic * vlapic,uint32_t icr_low,uint32_t icr_high)495 void vlapic_ipi(
496     struct vlapic *vlapic, uint32_t icr_low, uint32_t icr_high)
497 {
498     unsigned int dest;
499     unsigned int short_hand = icr_low & APIC_SHORT_MASK;
500     bool_t dest_mode = !!(icr_low & APIC_DEST_MASK);
501 
502     HVM_DBG_LOG(DBG_LEVEL_VLAPIC, "icr = 0x%08x:%08x", icr_high, icr_low);
503 
504     dest = _VLAPIC_ID(vlapic, icr_high);
505 
506     switch ( icr_low & APIC_MODE_MASK )
507     {
508     case APIC_DM_INIT:
509     case APIC_DM_STARTUP:
510         if ( vlapic->init_sipi.icr != 0 )
511         {
512             WARN(); /* should be impossible but don't BUG, just in case */
513             break;
514         }
515         vcpu_pause_nosync(vlapic_vcpu(vlapic));
516         vlapic->init_sipi.icr = icr_low;
517         vlapic->init_sipi.dest = dest;
518         tasklet_schedule(&vlapic->init_sipi.tasklet);
519         break;
520 
521     case APIC_DM_LOWEST: {
522         struct vlapic *target = vlapic_lowest_prio(
523             vlapic_domain(vlapic), vlapic, short_hand, dest, dest_mode);
524 
525         if ( unlikely((icr_low & APIC_VECTOR_MASK) < 16) )
526             vlapic_error(vlapic, APIC_ESR_SENDILL);
527         else if ( target )
528             vlapic_accept_irq(vlapic_vcpu(target), icr_low);
529         break;
530     }
531 
532     case APIC_DM_FIXED:
533         if ( unlikely((icr_low & APIC_VECTOR_MASK) < 16) )
534         {
535             vlapic_error(vlapic, APIC_ESR_SENDILL);
536             break;
537         }
538         /* fall through */
539     default: {
540         struct vcpu *v;
541         bool_t batch = is_multicast_dest(vlapic, short_hand, dest, dest_mode);
542 
543         if ( batch )
544             cpu_raise_softirq_batch_begin();
545         for_each_vcpu ( vlapic_domain(vlapic), v )
546         {
547             if ( vlapic_match_dest(vcpu_vlapic(v), vlapic,
548                                    short_hand, dest, dest_mode) )
549                 vlapic_accept_irq(v, icr_low);
550         }
551         if ( batch )
552             cpu_raise_softirq_batch_finish();
553         break;
554     }
555     }
556 }
557 
vlapic_get_tmcct(const struct vlapic * vlapic)558 static uint32_t vlapic_get_tmcct(const struct vlapic *vlapic)
559 {
560     const struct vcpu *v = const_vlapic_vcpu(vlapic);
561     uint32_t tmcct = 0, tmict = vlapic_get_reg(vlapic, APIC_TMICT);
562     uint64_t counter_passed;
563 
564     counter_passed = ((hvm_get_guest_time(v) - vlapic->timer_last_update)
565                       / (APIC_BUS_CYCLE_NS * vlapic->hw.timer_divisor));
566 
567     /* If timer_last_update is 0, then TMCCT should return 0 as well.  */
568     if ( tmict && vlapic->timer_last_update )
569     {
570         if ( vlapic_lvtt_period(vlapic) )
571             counter_passed %= tmict;
572         if ( counter_passed < tmict )
573             tmcct = tmict - counter_passed;
574     }
575 
576     HVM_DBG_LOG(DBG_LEVEL_VLAPIC_TIMER,
577                 "timer initial count %d, timer current count %d, "
578                 "offset %"PRId64,
579                 tmict, tmcct, counter_passed);
580 
581     return tmcct;
582 }
583 
vlapic_set_tdcr(struct vlapic * vlapic,unsigned int val)584 static void vlapic_set_tdcr(struct vlapic *vlapic, unsigned int val)
585 {
586     /* Only bits 0, 1 and 3 are settable; others are MBZ. */
587     val &= 0xb;
588     vlapic_set_reg(vlapic, APIC_TDCR, val);
589 
590     /* Update the demangled hw.timer_divisor. */
591     val = ((val & 3) | ((val & 8) >> 1)) + 1;
592     vlapic->hw.timer_divisor = 1 << (val & 7);
593 
594     HVM_DBG_LOG(DBG_LEVEL_VLAPIC_TIMER,
595                 "timer_divisor: %d", vlapic->hw.timer_divisor);
596 }
597 
vlapic_read_aligned(const struct vlapic * vlapic,unsigned int offset)598 static uint32_t vlapic_read_aligned(const struct vlapic *vlapic,
599                                     unsigned int offset)
600 {
601     switch ( offset )
602     {
603     case APIC_PROCPRI:
604         return vlapic_get_ppr(vlapic);
605 
606     case APIC_TMCCT: /* Timer CCR */
607         if ( !vlapic_lvtt_oneshot(vlapic) && !vlapic_lvtt_period(vlapic) )
608             break;
609         return vlapic_get_tmcct(vlapic);
610 
611     case APIC_TMICT: /* Timer ICR */
612         if ( !vlapic_lvtt_oneshot(vlapic) && !vlapic_lvtt_period(vlapic) )
613             break;
614         /* fall through */
615     default:
616         return vlapic_get_reg(vlapic, offset);
617     }
618 
619     return 0;
620 }
621 
vlapic_mmio_read(struct vcpu * v,unsigned long address,unsigned int len,unsigned long * pval)622 static int vlapic_mmio_read(struct vcpu *v, unsigned long address,
623                             unsigned int len, unsigned long *pval)
624 {
625     struct vlapic *vlapic = vcpu_vlapic(v);
626     unsigned int offset = address - vlapic_base_address(vlapic);
627     unsigned int alignment = offset & 0xf, result = 0;
628 
629     /*
630      * APIC registers are 32-bit values, aligned on 128-bit boundaries, and
631      * should be accessed with 32-bit wide loads.
632      *
633      * Some processors support smaller accesses, so we allow any access which
634      * fully fits within the 32-bit register.
635      */
636     if ( (alignment + len) <= 4 && offset <= (APIC_TDCR + 3) )
637     {
638         uint32_t reg = vlapic_read_aligned(vlapic, offset & ~0xf);
639 
640         switch ( len )
641         {
642         case 1: result = (uint8_t) (reg >> (alignment * 8)); break;
643         case 2: result = (uint16_t)(reg >> (alignment * 8)); break;
644         case 4: result = reg;                                break;
645         }
646 
647         HVM_DBG_LOG(DBG_LEVEL_VLAPIC, "offset %#x with length %#x, "
648                     "and the result is %#x", offset, len, result);
649     }
650 
651     *pval = result;
652     return X86EMUL_OKAY;
653 }
654 
guest_rdmsr_x2apic(const struct vcpu * v,uint32_t msr,uint64_t * val)655 int guest_rdmsr_x2apic(const struct vcpu *v, uint32_t msr, uint64_t *val)
656 {
657     static const unsigned long readable[] = {
658 #define REG(x) (1UL << (APIC_ ## x >> 4))
659         REG(ID)    | REG(LVR)  | REG(TASKPRI) | REG(PROCPRI) |
660         REG(LDR)   | REG(SPIV) | REG(ESR)     | REG(ICR)     |
661         REG(CMCI)  | REG(LVTT) | REG(LVTTHMR) | REG(LVTPC)   |
662         REG(LVT0)  | REG(LVT1) | REG(LVTERR)  | REG(TMICT)   |
663         REG(TMCCT) | REG(TDCR) |
664 #undef REG
665 #define REGBLOCK(x) (((1UL << (X86_NR_VECTORS / 32)) - 1) << (APIC_ ## x >> 4))
666         REGBLOCK(ISR) | REGBLOCK(TMR) | REGBLOCK(IRR)
667 #undef REGBLOCK
668     };
669     const struct vlapic *vlapic = vcpu_vlapic(v);
670     uint64_t high = 0;
671     uint32_t reg = msr - MSR_X2APIC_FIRST, offset;
672 
673     /*
674      * The read side looks as if it might be safe to use outside of current
675      * context, but the write side is most certainly not.  As we don't need
676      * any non-current access, enforce symmetry with the write side.
677      */
678     ASSERT(v == current);
679 
680     if ( !vlapic_x2apic_mode(vlapic) ||
681          (reg >= sizeof(readable) * 8) )
682         return X86EMUL_EXCEPTION;
683 
684     reg = array_index_nospec(reg, sizeof(readable) * 8);
685     if ( !test_bit(reg, readable) )
686         return X86EMUL_EXCEPTION;
687 
688     offset = reg << 4;
689     if ( offset == APIC_ICR )
690         high = (uint64_t)vlapic_read_aligned(vlapic, APIC_ICR2) << 32;
691 
692     *val = high | vlapic_read_aligned(vlapic, offset);
693 
694     return X86EMUL_OKAY;
695 }
696 
vlapic_pt_cb(struct vcpu * v,void * data)697 static void vlapic_pt_cb(struct vcpu *v, void *data)
698 {
699     TRACE_0D(TRC_HVM_EMUL_LAPIC_TIMER_CB);
700     *(s_time_t *)data = hvm_get_guest_time(v);
701 }
702 
vlapic_tdt_pt_cb(struct vcpu * v,void * data)703 static void vlapic_tdt_pt_cb(struct vcpu *v, void *data)
704 {
705     *(s_time_t *)data = hvm_get_guest_time(v);
706     vcpu_vlapic(v)->hw.tdt_msr = 0;
707 }
708 
709 /*
710  * This function is used when a register related to the APIC timer is updated.
711  * It expects the new value for the register TMICT to be set *before*
712  * being called, and the previous value of the divisor (calculated from TDCR)
713  * to be passed as argument.
714  * It expect the new value of LVTT to be set *after* being called, with this
715  * new values passed as parameter (only APIC_TIMER_MODE_MASK bits matter).
716  */
vlapic_update_timer(struct vlapic * vlapic,uint32_t lvtt,bool tmict_updated,uint32_t old_divisor)717 static void vlapic_update_timer(struct vlapic *vlapic, uint32_t lvtt,
718                                 bool tmict_updated, uint32_t old_divisor)
719 {
720     uint64_t period, delta = 0;
721     bool is_oneshot, is_periodic;
722 
723     is_periodic = (lvtt & APIC_TIMER_MODE_MASK) == APIC_TIMER_MODE_PERIODIC;
724     is_oneshot = (lvtt & APIC_TIMER_MODE_MASK) == APIC_TIMER_MODE_ONESHOT;
725 
726     period = (uint64_t)vlapic_get_reg(vlapic, APIC_TMICT)
727         * APIC_BUS_CYCLE_NS * old_divisor;
728 
729     /* Calculate the next time the timer should trigger an interrupt. */
730     if ( tmict_updated )
731         delta = period;
732     else if ( period && vlapic->timer_last_update )
733     {
734         uint64_t time_passed = hvm_get_guest_time(current)
735             - vlapic->timer_last_update;
736 
737         /* This depends of the previous mode, if a new mode is being set */
738         if ( vlapic_lvtt_period(vlapic) )
739             time_passed %= period;
740         if ( time_passed < period )
741             delta = period - time_passed;
742     }
743 
744     if ( delta && (is_oneshot || is_periodic) )
745     {
746         if ( vlapic->hw.timer_divisor != old_divisor )
747         {
748             period = (uint64_t)vlapic_get_reg(vlapic, APIC_TMICT)
749                 * APIC_BUS_CYCLE_NS * vlapic->hw.timer_divisor;
750             delta = delta * vlapic->hw.timer_divisor / old_divisor;
751         }
752 
753         TRACE_2_LONG_3D(TRC_HVM_EMUL_LAPIC_START_TIMER, TRC_PAR_LONG(delta),
754                         TRC_PAR_LONG(is_periodic ? period : 0),
755                         vlapic->pt.irq);
756 
757         create_periodic_time(current, &vlapic->pt, delta,
758                              is_periodic ? period : 0, vlapic->pt.irq,
759                              is_periodic ? vlapic_pt_cb : NULL,
760                              &vlapic->timer_last_update, false);
761 
762         vlapic->timer_last_update = vlapic->pt.last_plt_gtime;
763         if ( !tmict_updated )
764             vlapic->timer_last_update -= period - delta;
765 
766         HVM_DBG_LOG(DBG_LEVEL_VLAPIC,
767                     "bus cycle is %uns, "
768                     "initial count %u, period %"PRIu64"ns",
769                     APIC_BUS_CYCLE_NS,
770                     vlapic_get_reg(vlapic, APIC_TMICT),
771                     period);
772     }
773     else
774     {
775         TRACE_0D(TRC_HVM_EMUL_LAPIC_STOP_TIMER);
776         destroy_periodic_time(&vlapic->pt);
777         /*
778          * From now, TMCCT should return 0 until TMICT is set again.
779          * This is because the timer mode was one-shot when the counter reach 0
780          * or just because the timer is disable.
781          */
782         vlapic->timer_last_update = 0;
783     }
784 }
785 
vlapic_reg_write(struct vcpu * v,unsigned int reg,uint32_t val)786 void vlapic_reg_write(struct vcpu *v, unsigned int reg, uint32_t val)
787 {
788     struct vlapic *vlapic = vcpu_vlapic(v);
789 
790     memset(&vlapic->loaded, 0, sizeof(vlapic->loaded));
791 
792     switch ( reg )
793     {
794     case APIC_ID:
795         vlapic_set_reg(vlapic, APIC_ID, val);
796         break;
797 
798     case APIC_TASKPRI:
799         vlapic_set_reg(vlapic, APIC_TASKPRI, val & 0xff);
800         break;
801 
802     case APIC_EOI:
803         vlapic_EOI_set(vlapic);
804         break;
805 
806     case APIC_LDR:
807         vlapic_set_reg(vlapic, APIC_LDR, val & APIC_LDR_MASK);
808         break;
809 
810     case APIC_DFR:
811         vlapic_set_reg(vlapic, APIC_DFR, val | 0x0FFFFFFF);
812         break;
813 
814     case APIC_SPIV:
815         vlapic_set_reg(vlapic, APIC_SPIV, val & 0x3ff);
816 
817         if ( !(val & APIC_SPIV_APIC_ENABLED) )
818         {
819             int i;
820             uint32_t lvt_val;
821 
822             vlapic->hw.disabled |= VLAPIC_SW_DISABLED;
823 
824             for ( i = 0; i < VLAPIC_LVT_NUM; i++ )
825             {
826                 lvt_val = vlapic_get_reg(vlapic, APIC_LVTT + 0x10 * i);
827                 vlapic_set_reg(vlapic, APIC_LVTT + 0x10 * i,
828                                lvt_val | APIC_LVT_MASKED);
829             }
830         }
831         else
832         {
833             vlapic->hw.disabled &= ~VLAPIC_SW_DISABLED;
834             pt_may_unmask_irq(vlapic_domain(vlapic), &vlapic->pt);
835         }
836         break;
837 
838     case APIC_ICR:
839         val &= ~(1 << 12); /* always clear the pending bit */
840         vlapic_ipi(vlapic, val, vlapic_get_reg(vlapic, APIC_ICR2));
841         vlapic_set_reg(vlapic, APIC_ICR, val);
842         break;
843 
844     case APIC_ICR2:
845         vlapic_set_reg(vlapic, APIC_ICR2, val & 0xff000000);
846         break;
847 
848     case APIC_LVTT:         /* LVT Timer Reg */
849         if ( vlapic_lvtt_tdt(vlapic) !=
850              ((val & APIC_TIMER_MODE_MASK) == APIC_TIMER_MODE_TSC_DEADLINE))
851         {
852             vlapic_set_reg(vlapic, APIC_TMICT, 0);
853             vlapic->hw.tdt_msr = 0;
854         }
855         vlapic->pt.irq = val & APIC_VECTOR_MASK;
856 
857         vlapic_update_timer(vlapic, val, false, vlapic->hw.timer_divisor);
858 
859         /* fallthrough */
860     case APIC_LVTTHMR:      /* LVT Thermal Monitor */
861     case APIC_LVTPC:        /* LVT Performance Counter */
862     case APIC_LVT0:         /* LVT LINT0 Reg */
863     case APIC_LVT1:         /* LVT Lint1 Reg */
864     case APIC_LVTERR:       /* LVT Error Reg */
865         if ( vlapic_sw_disabled(vlapic) )
866             val |= APIC_LVT_MASKED;
867         val &= array_access_nospec(vlapic_lvt_mask, (reg - APIC_LVTT) >> 4);
868         vlapic_set_reg(vlapic, reg, val);
869         if ( reg == APIC_LVT0 )
870         {
871             vlapic_adjust_i8259_target(v->domain);
872             pt_may_unmask_irq(v->domain, NULL);
873         }
874         if ( (reg == APIC_LVTT) && !(val & APIC_LVT_MASKED) )
875             pt_may_unmask_irq(NULL, &vlapic->pt);
876         if ( reg == APIC_LVTPC )
877             vpmu_lvtpc_update(val);
878         break;
879 
880     case APIC_TMICT:
881         if ( !vlapic_lvtt_oneshot(vlapic) && !vlapic_lvtt_period(vlapic) )
882             break;
883 
884         vlapic_set_reg(vlapic, APIC_TMICT, val);
885 
886         vlapic_update_timer(vlapic, vlapic_get_reg(vlapic, APIC_LVTT), true,
887                             vlapic->hw.timer_divisor);
888         break;
889 
890     case APIC_TDCR:
891     {
892         uint32_t current_divisor = vlapic->hw.timer_divisor;
893 
894         vlapic_set_tdcr(vlapic, val & 0xb);
895 
896         vlapic_update_timer(vlapic, vlapic_get_reg(vlapic, APIC_LVTT), false,
897                             current_divisor);
898         HVM_DBG_LOG(DBG_LEVEL_VLAPIC_TIMER, "timer divisor is %#x",
899                     vlapic->hw.timer_divisor);
900         break;
901     }
902     }
903 }
904 
vlapic_mmio_write(struct vcpu * v,unsigned long address,unsigned int len,unsigned long val)905 static int vlapic_mmio_write(struct vcpu *v, unsigned long address,
906                              unsigned int len, unsigned long val)
907 {
908     struct vlapic *vlapic = vcpu_vlapic(v);
909     unsigned int offset = address - vlapic_base_address(vlapic);
910     unsigned int alignment = offset & 0xf;
911 
912     offset &= ~0xf;
913 
914     if ( offset != APIC_EOI )
915         HVM_DBG_LOG(DBG_LEVEL_VLAPIC,
916                     "offset %#x with length %#x, and value is %#lx",
917                     offset, len, val);
918 
919     /*
920      * APIC registers are 32-bit values, aligned on 128-bit boundaries, and
921      * should be accessed with 32-bit wide stores.
922      *
923      * Some processors support smaller accesses, so we allow any access which
924      * fully fits within the 32-bit register.
925      */
926     if ( (alignment + len) <= 4 && offset <= APIC_TDCR )
927     {
928         if ( unlikely(len < 4) )
929         {
930             uint32_t reg = vlapic_read_aligned(vlapic, offset);
931 
932             alignment *= 8;
933 
934             switch ( len )
935             {
936             case 1:
937                 val = ((reg & ~(0xffU << alignment)) |
938                        ((val &  0xff) << alignment));
939                 break;
940 
941             case 2:
942                 val = ((reg & ~(0xffffU << alignment)) |
943                        ((val &  0xffff) << alignment));
944                 break;
945             }
946         }
947 
948         vlapic_reg_write(v, offset, val);
949     }
950 
951     return X86EMUL_OKAY;
952 }
953 
vlapic_apicv_write(struct vcpu * v,unsigned int offset)954 int vlapic_apicv_write(struct vcpu *v, unsigned int offset)
955 {
956     struct vlapic *vlapic = vcpu_vlapic(v);
957     uint32_t val = vlapic_get_reg(vlapic, offset & ~0xf);
958 
959     if ( vlapic_x2apic_mode(vlapic) )
960     {
961         if ( offset != APIC_SELF_IPI )
962             return X86EMUL_UNHANDLEABLE;
963 
964         offset = APIC_ICR;
965         val = APIC_DEST_SELF | (val & APIC_VECTOR_MASK);
966     }
967 
968     vlapic_reg_write(v, offset, val);
969 
970     return X86EMUL_OKAY;
971 }
972 
guest_wrmsr_x2apic(struct vcpu * v,uint32_t msr,uint64_t msr_content)973 int guest_wrmsr_x2apic(struct vcpu *v, uint32_t msr, uint64_t msr_content)
974 {
975     struct vlapic *vlapic = vcpu_vlapic(v);
976     uint32_t offset = (msr - MSR_X2APIC_FIRST) << 4;
977 
978     /* The timer handling at least is unsafe outside of current context. */
979     ASSERT(v == current);
980 
981     if ( !vlapic_x2apic_mode(vlapic) )
982         return X86EMUL_EXCEPTION;
983 
984     switch ( offset )
985     {
986     case APIC_TASKPRI:
987         if ( msr_content & ~APIC_TPRI_MASK )
988             return X86EMUL_EXCEPTION;
989         break;
990 
991     case APIC_SPIV:
992         if ( msr_content & ~(APIC_VECTOR_MASK | APIC_SPIV_APIC_ENABLED |
993                              APIC_SPIV_FOCUS_DISABLED |
994                              (VLAPIC_VERSION & APIC_LVR_DIRECTED_EOI
995                               ? APIC_SPIV_DIRECTED_EOI : 0)) )
996             return X86EMUL_EXCEPTION;
997         break;
998 
999     case APIC_LVTT:
1000         if ( msr_content & ~(LVT_MASK | APIC_TIMER_MODE_MASK) )
1001             return X86EMUL_EXCEPTION;
1002         break;
1003 
1004     case APIC_LVTTHMR:
1005     case APIC_LVTPC:
1006     case APIC_CMCI:
1007         if ( msr_content & ~(LVT_MASK | APIC_MODE_MASK) )
1008             return X86EMUL_EXCEPTION;
1009         break;
1010 
1011     case APIC_LVT0:
1012     case APIC_LVT1:
1013         if ( msr_content & ~LINT_MASK )
1014             return X86EMUL_EXCEPTION;
1015         break;
1016 
1017     case APIC_LVTERR:
1018         if ( msr_content & ~LVT_MASK )
1019             return X86EMUL_EXCEPTION;
1020         break;
1021 
1022     case APIC_TMICT:
1023         break;
1024 
1025     case APIC_TDCR:
1026         if ( msr_content & ~APIC_TDR_DIV_1 )
1027             return X86EMUL_EXCEPTION;
1028         break;
1029 
1030     case APIC_ICR:
1031         if ( (uint32_t)msr_content & ~(APIC_VECTOR_MASK | APIC_MODE_MASK |
1032                                        APIC_DEST_MASK | APIC_INT_ASSERT |
1033                                        APIC_INT_LEVELTRIG | APIC_SHORT_MASK) )
1034             return X86EMUL_EXCEPTION;
1035         vlapic_set_reg(vlapic, APIC_ICR2, msr_content >> 32);
1036         break;
1037 
1038     case APIC_SELF_IPI:
1039         if ( msr_content & ~APIC_VECTOR_MASK )
1040             return X86EMUL_EXCEPTION;
1041         offset = APIC_ICR;
1042         msr_content = APIC_DEST_SELF | (msr_content & APIC_VECTOR_MASK);
1043         break;
1044 
1045     case APIC_EOI:
1046     case APIC_ESR:
1047         if ( msr_content )
1048         {
1049     default:
1050             return X86EMUL_EXCEPTION;
1051         }
1052     }
1053 
1054     vlapic_reg_write(v, array_index_nospec(offset, PAGE_SIZE), msr_content);
1055 
1056     return X86EMUL_OKAY;
1057 }
1058 
vlapic_range(struct vcpu * v,unsigned long addr)1059 static int vlapic_range(struct vcpu *v, unsigned long addr)
1060 {
1061     struct vlapic *vlapic = vcpu_vlapic(v);
1062     unsigned long offset  = addr - vlapic_base_address(vlapic);
1063 
1064     return !vlapic_hw_disabled(vlapic) &&
1065            !vlapic_x2apic_mode(vlapic) &&
1066            (offset < PAGE_SIZE);
1067 }
1068 
1069 static const struct hvm_mmio_ops vlapic_mmio_ops = {
1070     .check = vlapic_range,
1071     .read = vlapic_mmio_read,
1072     .write = vlapic_mmio_write,
1073 };
1074 
set_x2apic_id(struct vlapic * vlapic)1075 static void set_x2apic_id(struct vlapic *vlapic)
1076 {
1077     u32 id = vlapic_vcpu(vlapic)->vcpu_id;
1078     u32 ldr = ((id & ~0xf) << 12) | (1 << (id & 0xf));
1079 
1080     vlapic_set_reg(vlapic, APIC_ID, id * 2);
1081     vlapic_set_reg(vlapic, APIC_LDR, ldr);
1082 }
1083 
guest_wrmsr_apic_base(struct vcpu * v,uint64_t value)1084 int guest_wrmsr_apic_base(struct vcpu *v, uint64_t value)
1085 {
1086     const struct cpuid_policy *cp = v->domain->arch.cpuid;
1087     struct vlapic *vlapic = vcpu_vlapic(v);
1088 
1089     if ( !has_vlapic(v->domain) )
1090         return X86EMUL_EXCEPTION;
1091 
1092     /* Attempting to set reserved bits? */
1093     if ( value & ~(APIC_BASE_ADDR_MASK | APIC_BASE_ENABLE | APIC_BASE_BSP |
1094                    (cp->basic.x2apic ? APIC_BASE_EXTD : 0)) )
1095         return X86EMUL_EXCEPTION;
1096 
1097     /*
1098      * Architecturally speaking, we should allow a guest to move the xAPIC
1099      * MMIO window (within reason - not even hardware allows arbitrary
1100      * positions).  However, virtualising the behaviour for multi-vcpu guests
1101      * is problematic.
1102      *
1103      * The ability to move the MMIO window was introduced with the Pentium Pro
1104      * processor, to deconflict the window with other MMIO in the system.  The
1105      * need to move the MMIO window was obsoleted by the Netburst architecture
1106      * which reserved the space in physical address space for MSIs.
1107      *
1108      * As such, it appears to be a rarely used feature before the turn of the
1109      * millennium, and entirely unused after.
1110      *
1111      * Xen uses a per-domain P2M, but MSR_APIC_BASE is per-vcpu.  In
1112      * principle, we could emulate the MMIO windows being in different
1113      * locations by ensuring that all windows are unmapped in the P2M and trap
1114      * for emulation.  Xen has never had code to modify the P2M in response to
1115      * APIC_BASE updates, so guests which actually try this are likely to end
1116      * up without a working APIC.
1117      *
1118      * Things are more complicated with hardware APIC acceleration, where Xen
1119      * has to map a sink-page into the P2M for APIC accesses to be recognised
1120      * and accelerated by microcode.  Again, this could in principle be
1121      * emulated, but the visible result in the guest would be multiple working
1122      * APIC MMIO windows.  Moving the APIC window has never caused the
1123      * sink-page to move in the P2M, meaning that on all modern hardware, the
1124      * APIC definitely ceases working if the guest tries to move the window.
1125      *
1126      * As such, when the APIC is configured in xAPIC mode, require the MMIO
1127      * window to be in its default location.  We don't expect any guests which
1128      * currently run on Xen to be impacted by this restriction, and the #GP
1129      * fault will be far more obvious to debug than a malfunctioning MMIO
1130      * window.
1131      */
1132     if ( ((value & (APIC_BASE_EXTD | APIC_BASE_ENABLE)) == APIC_BASE_ENABLE) &&
1133          ((value & APIC_BASE_ADDR_MASK) != APIC_DEFAULT_PHYS_BASE) )
1134     {
1135         printk(XENLOG_G_INFO
1136                "%pv tried to move the APIC MMIO window: val 0x%08"PRIx64"\n",
1137                v, value);
1138         return X86EMUL_EXCEPTION;
1139     }
1140 
1141     if ( (vlapic->hw.apic_base_msr ^ value) & APIC_BASE_ENABLE )
1142     {
1143         if ( unlikely(value & APIC_BASE_EXTD) )
1144             return X86EMUL_EXCEPTION;
1145 
1146         if ( value & APIC_BASE_ENABLE )
1147         {
1148             vlapic_reset(vlapic);
1149             vlapic->hw.disabled &= ~VLAPIC_HW_DISABLED;
1150             pt_may_unmask_irq(vlapic_domain(vlapic), &vlapic->pt);
1151         }
1152         else
1153         {
1154             vlapic->hw.disabled |= VLAPIC_HW_DISABLED;
1155             pt_may_unmask_irq(vlapic_domain(vlapic), NULL);
1156         }
1157     }
1158     else if ( ((vlapic->hw.apic_base_msr ^ value) & APIC_BASE_EXTD) &&
1159               unlikely(!vlapic_xapic_mode(vlapic)) )
1160         return X86EMUL_EXCEPTION;
1161 
1162     vlapic->hw.apic_base_msr = value;
1163     memset(&vlapic->loaded, 0, sizeof(vlapic->loaded));
1164 
1165     if ( vlapic_x2apic_mode(vlapic) )
1166         set_x2apic_id(vlapic);
1167 
1168     vmx_vlapic_msr_changed(vlapic_vcpu(vlapic));
1169 
1170     HVM_DBG_LOG(DBG_LEVEL_VLAPIC,
1171                 "apic base msr is 0x%016"PRIx64, vlapic->hw.apic_base_msr);
1172 
1173     return X86EMUL_OKAY;
1174 }
1175 
vlapic_tdt_msr_get(struct vlapic * vlapic)1176 uint64_t  vlapic_tdt_msr_get(struct vlapic *vlapic)
1177 {
1178     if ( !vlapic_lvtt_tdt(vlapic) )
1179         return 0;
1180 
1181     return vlapic->hw.tdt_msr;
1182 }
1183 
vlapic_tdt_msr_set(struct vlapic * vlapic,uint64_t value)1184 void vlapic_tdt_msr_set(struct vlapic *vlapic, uint64_t value)
1185 {
1186     uint64_t guest_tsc;
1187     struct vcpu *v = vlapic_vcpu(vlapic);
1188 
1189     if ( vlapic_hw_disabled(vlapic) )
1190         return;
1191 
1192     if ( !vlapic_lvtt_tdt(vlapic) )
1193     {
1194         HVM_DBG_LOG(DBG_LEVEL_VLAPIC_TIMER, "ignore tsc deadline msr write");
1195         return;
1196     }
1197 
1198     /* new_value = 0, >0 && <= now, > now */
1199     guest_tsc = hvm_get_guest_tsc(v);
1200     if ( value > guest_tsc )
1201     {
1202         uint64_t delta = gtsc_to_gtime(v->domain, value - guest_tsc);
1203         delta = max_t(s64, delta, 0);
1204 
1205         HVM_DBG_LOG(DBG_LEVEL_VLAPIC_TIMER, "delta[0x%016"PRIx64"]", delta);
1206 
1207         vlapic->hw.tdt_msr = value;
1208         /* .... reprogram tdt timer */
1209         TRACE_2_LONG_3D(TRC_HVM_EMUL_LAPIC_START_TIMER, TRC_PAR_LONG(delta),
1210                         TRC_PAR_LONG(0LL), vlapic->pt.irq);
1211         create_periodic_time(v, &vlapic->pt, delta, 0,
1212                              vlapic->pt.irq, vlapic_tdt_pt_cb,
1213                              &vlapic->timer_last_update, false);
1214         vlapic->timer_last_update = vlapic->pt.last_plt_gtime;
1215     }
1216     else
1217     {
1218         vlapic->hw.tdt_msr = 0;
1219 
1220         /* trigger a timer event if needed */
1221         if ( value > 0 )
1222         {
1223             TRACE_2_LONG_3D(TRC_HVM_EMUL_LAPIC_START_TIMER, TRC_PAR_LONG(0LL),
1224                             TRC_PAR_LONG(0LL), vlapic->pt.irq);
1225             create_periodic_time(v, &vlapic->pt, 0, 0,
1226                                  vlapic->pt.irq, vlapic_tdt_pt_cb,
1227                                  &vlapic->timer_last_update, false);
1228             vlapic->timer_last_update = vlapic->pt.last_plt_gtime;
1229         }
1230         else
1231         {
1232             /* .... stop tdt timer */
1233             TRACE_0D(TRC_HVM_EMUL_LAPIC_STOP_TIMER);
1234             destroy_periodic_time(&vlapic->pt);
1235         }
1236 
1237         HVM_DBG_LOG(DBG_LEVEL_VLAPIC_TIMER, "value[0x%016"PRIx64"]", value);
1238     }
1239 
1240     HVM_DBG_LOG(DBG_LEVEL_VLAPIC_TIMER,
1241                 "tdt_msr[0x%016"PRIx64"],"
1242                 " gtsc[0x%016"PRIx64"]",
1243                 vlapic->hw.tdt_msr, guest_tsc);
1244 }
1245 
__vlapic_accept_pic_intr(struct vcpu * v)1246 static int __vlapic_accept_pic_intr(struct vcpu *v)
1247 {
1248     struct domain *d = v->domain;
1249     struct vlapic *vlapic = vcpu_vlapic(v);
1250     uint32_t lvt0 = vlapic_get_reg(vlapic, APIC_LVT0);
1251     union vioapic_redir_entry redir0;
1252 
1253     ASSERT(has_vpic(d));
1254 
1255     if ( !has_vioapic(d) )
1256         return 0;
1257 
1258     redir0 = domain_vioapic(d, 0)->redirtbl[0];
1259 
1260     /* We deliver 8259 interrupts to the appropriate CPU as follows. */
1261     return ((/* IOAPIC pin0 is unmasked and routing to this LAPIC? */
1262              ((redir0.fields.delivery_mode == dest_ExtINT) &&
1263               !redir0.fields.mask &&
1264               redir0.fields.dest_id == VLAPIC_ID(vlapic) &&
1265               !vlapic_disabled(vlapic)) ||
1266              /* LAPIC has LVT0 unmasked for ExtInts? */
1267              ((lvt0 & (APIC_MODE_MASK|APIC_LVT_MASKED)) == APIC_DM_EXTINT) ||
1268              /* LAPIC is fully disabled? */
1269              vlapic_hw_disabled(vlapic)));
1270 }
1271 
vlapic_accept_pic_intr(struct vcpu * v)1272 int vlapic_accept_pic_intr(struct vcpu *v)
1273 {
1274     if ( vlapic_hw_disabled(vcpu_vlapic(v)) || !has_vpic(v->domain) )
1275         return 0;
1276 
1277     TRACE_2D(TRC_HVM_EMUL_LAPIC_PIC_INTR,
1278              (v == v->domain->arch.hvm.i8259_target),
1279              v ? __vlapic_accept_pic_intr(v) : -1);
1280 
1281     return ((v == v->domain->arch.hvm.i8259_target) &&
1282             __vlapic_accept_pic_intr(v));
1283 }
1284 
vlapic_adjust_i8259_target(struct domain * d)1285 void vlapic_adjust_i8259_target(struct domain *d)
1286 {
1287     struct vcpu *v;
1288 
1289     if ( !has_vpic(d) )
1290         return;
1291 
1292     for_each_vcpu ( d, v )
1293         if ( __vlapic_accept_pic_intr(v) )
1294             goto found;
1295 
1296     v = d->vcpu ? d->vcpu[0] : NULL;
1297 
1298  found:
1299     if ( d->arch.hvm.i8259_target == v )
1300         return;
1301     d->arch.hvm.i8259_target = v;
1302     pt_adjust_global_vcpu_target(v);
1303 }
1304 
vlapic_has_pending_irq(struct vcpu * v)1305 int vlapic_has_pending_irq(struct vcpu *v)
1306 {
1307     struct vlapic *vlapic = vcpu_vlapic(v);
1308     int irr, isr;
1309 
1310     if ( !vlapic_enabled(vlapic) )
1311         return -1;
1312 
1313     /*
1314      * Poll the viridian message queues before checking the IRR since
1315      * a synthetic interrupt may be asserted during the poll.
1316      */
1317     if ( has_viridian_synic(v->domain) )
1318         viridian_synic_poll(v);
1319 
1320     irr = vlapic_find_highest_irr(vlapic);
1321     if ( irr == -1 )
1322         return -1;
1323 
1324     if ( hvm_funcs.virtual_intr_delivery_enabled &&
1325          !nestedhvm_vcpu_in_guestmode(v) )
1326         return irr;
1327 
1328     /*
1329      * If APIC assist was set then an EOI may have been avoided.
1330      * If so, we need to emulate the EOI here before comparing ISR
1331      * with IRR.
1332      */
1333     if ( viridian_apic_assist_completed(v) )
1334         vlapic_EOI_set(vlapic);
1335 
1336     isr = vlapic_find_highest_isr(vlapic);
1337 
1338     /*
1339      * The specification says that if APIC assist is set and a
1340      * subsequent interrupt of lower priority occurs then APIC assist
1341      * needs to be cleared.
1342      */
1343     if ( isr >= 0 &&
1344          (irr & 0xf0) <= (isr & 0xf0) )
1345     {
1346         viridian_apic_assist_clear(v);
1347         return -1;
1348     }
1349 
1350     return irr;
1351 }
1352 
vlapic_ack_pending_irq(struct vcpu * v,int vector,bool_t force_ack)1353 int vlapic_ack_pending_irq(struct vcpu *v, int vector, bool_t force_ack)
1354 {
1355     struct vlapic *vlapic = vcpu_vlapic(v);
1356     int isr;
1357 
1358     if ( !force_ack &&
1359          hvm_funcs.virtual_intr_delivery_enabled )
1360         return 1;
1361 
1362     /* If there's no chance of using APIC assist then bail now. */
1363     if ( !has_viridian_apic_assist(v->domain) ||
1364          vlapic_test_vector(vector, &vlapic->regs->data[APIC_TMR]) )
1365         goto done;
1366 
1367     isr = vlapic_find_highest_isr(vlapic);
1368     if ( isr == -1 && vector > 0x10 )
1369     {
1370         /*
1371          * This vector is edge triggered, not in the legacy range, and no
1372          * lower priority vectors are pending in the ISR. Thus we can set
1373          * APIC assist to avoid exiting for EOI.
1374          */
1375         viridian_apic_assist_set(v);
1376     }
1377 
1378  done:
1379     if ( !has_viridian_synic(v->domain) ||
1380          !viridian_synic_is_auto_eoi_sint(v, vector) )
1381         vlapic_set_vector(vector, &vlapic->regs->data[APIC_ISR]);
1382 
1383     vlapic_clear_irr(vector, vlapic);
1384 
1385     return 1;
1386 }
1387 
is_vlapic_lvtpc_enabled(struct vlapic * vlapic)1388 bool_t is_vlapic_lvtpc_enabled(struct vlapic *vlapic)
1389 {
1390     return (vlapic_enabled(vlapic) &&
1391             !(vlapic_get_reg(vlapic, APIC_LVTPC) & APIC_LVT_MASKED));
1392 }
1393 
1394 /* Reset the VLAPIC back to its init state. */
vlapic_do_init(struct vlapic * vlapic)1395 static void vlapic_do_init(struct vlapic *vlapic)
1396 {
1397     int i;
1398 
1399     if ( !has_vlapic(vlapic_vcpu(vlapic)->domain) )
1400         return;
1401 
1402     vlapic_set_reg(vlapic, APIC_LVR, VLAPIC_VERSION);
1403 
1404     for ( i = 0; i < 8; i++ )
1405     {
1406         vlapic_set_reg(vlapic, APIC_IRR + 0x10 * i, 0);
1407         vlapic_set_reg(vlapic, APIC_ISR + 0x10 * i, 0);
1408         vlapic_set_reg(vlapic, APIC_TMR + 0x10 * i, 0);
1409     }
1410     vlapic_set_reg(vlapic, APIC_ICR,     0);
1411     vlapic_set_reg(vlapic, APIC_ICR2,    0);
1412     /*
1413      * LDR is read-only in x2APIC mode. Preserve its value when handling
1414      * INIT signal in x2APIC mode.
1415      */
1416     if ( !vlapic_x2apic_mode(vlapic) )
1417         vlapic_set_reg(vlapic, APIC_LDR, 0);
1418     vlapic_set_reg(vlapic, APIC_TASKPRI, 0);
1419     vlapic_set_reg(vlapic, APIC_TMICT,   0);
1420     vlapic_set_reg(vlapic, APIC_TMCCT,   0);
1421     vlapic_set_tdcr(vlapic, 0);
1422 
1423     vlapic_set_reg(vlapic, APIC_DFR, 0xffffffffU);
1424 
1425     for ( i = 0; i < VLAPIC_LVT_NUM; i++ )
1426         vlapic_set_reg(vlapic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
1427 
1428     vlapic_set_reg(vlapic, APIC_SPIV, 0xff);
1429     vlapic->hw.disabled |= VLAPIC_SW_DISABLED;
1430 
1431     TRACE_0D(TRC_HVM_EMUL_LAPIC_STOP_TIMER);
1432     destroy_periodic_time(&vlapic->pt);
1433 }
1434 
1435 /* Reset the VLAPIC back to its power-on/reset state. */
vlapic_reset(struct vlapic * vlapic)1436 void vlapic_reset(struct vlapic *vlapic)
1437 {
1438     const struct vcpu *v = vlapic_vcpu(vlapic);
1439 
1440     if ( !has_vlapic(v->domain) )
1441         return;
1442 
1443     vlapic->hw.apic_base_msr = APIC_BASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
1444     if ( v->vcpu_id == 0 )
1445         vlapic->hw.apic_base_msr |= APIC_BASE_BSP;
1446 
1447     vlapic_set_reg(vlapic, APIC_ID, (v->vcpu_id * 2) << 24);
1448     vlapic_do_init(vlapic);
1449 }
1450 
1451 /* rearm the actimer if needed, after a HVM restore */
lapic_rearm(struct vlapic * s)1452 static void lapic_rearm(struct vlapic *s)
1453 {
1454     unsigned long tmict;
1455     uint64_t period, tdt_msr;
1456 
1457     s->pt.irq = vlapic_get_reg(s, APIC_LVTT) & APIC_VECTOR_MASK;
1458 
1459     if ( vlapic_lvtt_tdt(s) )
1460     {
1461         if ( (tdt_msr = vlapic_tdt_msr_get(s)) != 0 )
1462             vlapic_tdt_msr_set(s, tdt_msr);
1463         return;
1464     }
1465 
1466     if ( (tmict = vlapic_get_reg(s, APIC_TMICT)) == 0 )
1467         return;
1468 
1469     period = ((uint64_t)APIC_BUS_CYCLE_NS *
1470               (uint32_t)tmict * s->hw.timer_divisor);
1471     TRACE_2_LONG_3D(TRC_HVM_EMUL_LAPIC_START_TIMER, TRC_PAR_LONG(period),
1472              TRC_PAR_LONG(vlapic_lvtt_period(s) ? period : 0LL), s->pt.irq);
1473     create_periodic_time(vlapic_vcpu(s), &s->pt, period,
1474                          vlapic_lvtt_period(s) ? period : 0,
1475                          s->pt.irq,
1476                          vlapic_lvtt_period(s) ? vlapic_pt_cb : NULL,
1477                          &s->timer_last_update, false);
1478     s->timer_last_update = s->pt.last_plt_gtime;
1479 }
1480 
lapic_save_hidden(struct vcpu * v,hvm_domain_context_t * h)1481 static int lapic_save_hidden(struct vcpu *v, hvm_domain_context_t *h)
1482 {
1483     if ( !has_vlapic(v->domain) )
1484         return 0;
1485 
1486     return hvm_save_entry(LAPIC, v->vcpu_id, h, &vcpu_vlapic(v)->hw);
1487 }
1488 
lapic_save_regs(struct vcpu * v,hvm_domain_context_t * h)1489 static int lapic_save_regs(struct vcpu *v, hvm_domain_context_t *h)
1490 {
1491     if ( !has_vlapic(v->domain) )
1492         return 0;
1493 
1494     vlapic_sync_pir_to_irr(v);
1495 
1496     return hvm_save_entry(LAPIC_REGS, v->vcpu_id, h, vcpu_vlapic(v)->regs);
1497 }
1498 
1499 /*
1500  * Following lapic_load_hidden()/lapic_load_regs() we may need to
1501  * correct ID and LDR when they come from an old, broken hypervisor.
1502  */
lapic_load_fixup(struct vlapic * vlapic)1503 static void lapic_load_fixup(struct vlapic *vlapic)
1504 {
1505     uint32_t id = vlapic->loaded.id;
1506 
1507     if ( vlapic_x2apic_mode(vlapic) && id && vlapic->loaded.ldr == 1 )
1508     {
1509         /*
1510          * This is optional: ID != 0 contradicts LDR == 1. It's being added
1511          * to aid in eventual debugging of issues arising from the fixup done
1512          * here, but can be dropped as soon as it is found to conflict with
1513          * other (future) changes.
1514          */
1515         if ( GET_xAPIC_ID(id) != vlapic_vcpu(vlapic)->vcpu_id * 2 ||
1516              id != SET_xAPIC_ID(GET_xAPIC_ID(id)) )
1517             printk(XENLOG_G_WARNING "%pv: bogus APIC ID %#x loaded\n",
1518                    vlapic_vcpu(vlapic), id);
1519         set_x2apic_id(vlapic);
1520     }
1521     else /* Undo an eventual earlier fixup. */
1522     {
1523         vlapic_set_reg(vlapic, APIC_ID, id);
1524         vlapic_set_reg(vlapic, APIC_LDR, vlapic->loaded.ldr);
1525     }
1526 }
1527 
lapic_load_hidden(struct domain * d,hvm_domain_context_t * h)1528 static int lapic_load_hidden(struct domain *d, hvm_domain_context_t *h)
1529 {
1530     unsigned int vcpuid = hvm_load_instance(h);
1531     struct vcpu *v;
1532     struct vlapic *s;
1533 
1534     if ( !has_vlapic(d) )
1535         return -ENODEV;
1536 
1537     /* Which vlapic to load? */
1538     if ( vcpuid >= d->max_vcpus || (v = d->vcpu[vcpuid]) == NULL )
1539     {
1540         dprintk(XENLOG_G_ERR, "HVM restore: dom%d has no apic%u\n",
1541                 d->domain_id, vcpuid);
1542         return -EINVAL;
1543     }
1544     s = vcpu_vlapic(v);
1545 
1546     if ( hvm_load_entry_zeroextend(LAPIC, h, &s->hw) != 0 )
1547         return -EINVAL;
1548 
1549     s->loaded.hw = 1;
1550     if ( s->loaded.regs )
1551         lapic_load_fixup(s);
1552 
1553     if ( !(s->hw.apic_base_msr & APIC_BASE_ENABLE) &&
1554          unlikely(vlapic_x2apic_mode(s)) )
1555         return -EINVAL;
1556 
1557     vmx_vlapic_msr_changed(v);
1558 
1559     return 0;
1560 }
1561 
lapic_load_regs(struct domain * d,hvm_domain_context_t * h)1562 static int lapic_load_regs(struct domain *d, hvm_domain_context_t *h)
1563 {
1564     unsigned int vcpuid = hvm_load_instance(h);
1565     struct vcpu *v;
1566     struct vlapic *s;
1567 
1568     if ( !has_vlapic(d) )
1569         return -ENODEV;
1570 
1571     /* Which vlapic to load? */
1572     if ( vcpuid >= d->max_vcpus || (v = d->vcpu[vcpuid]) == NULL )
1573     {
1574         dprintk(XENLOG_G_ERR, "HVM restore: dom%d has no apic%u\n",
1575                 d->domain_id, vcpuid);
1576         return -EINVAL;
1577     }
1578     s = vcpu_vlapic(v);
1579 
1580     if ( hvm_load_entry(LAPIC_REGS, h, s->regs) != 0 )
1581         return -EINVAL;
1582 
1583     s->loaded.id = vlapic_get_reg(s, APIC_ID);
1584     s->loaded.ldr = vlapic_get_reg(s, APIC_LDR);
1585     s->loaded.regs = 1;
1586     if ( s->loaded.hw )
1587         lapic_load_fixup(s);
1588 
1589     if ( hvm_funcs.process_isr )
1590         alternative_vcall(hvm_funcs.process_isr,
1591                           vlapic_find_highest_isr(s), v);
1592 
1593     vlapic_adjust_i8259_target(d);
1594     lapic_rearm(s);
1595     return 0;
1596 }
1597 
1598 HVM_REGISTER_SAVE_RESTORE(LAPIC, lapic_save_hidden,
1599                           lapic_load_hidden, 1, HVMSR_PER_VCPU);
1600 HVM_REGISTER_SAVE_RESTORE(LAPIC_REGS, lapic_save_regs,
1601                           lapic_load_regs, 1, HVMSR_PER_VCPU);
1602 
vlapic_init(struct vcpu * v)1603 int vlapic_init(struct vcpu *v)
1604 {
1605     struct vlapic *vlapic = vcpu_vlapic(v);
1606 
1607     HVM_DBG_LOG(DBG_LEVEL_VLAPIC, "%d", v->vcpu_id);
1608 
1609     if ( !has_vlapic(v->domain) )
1610     {
1611         vlapic->hw.disabled = VLAPIC_HW_DISABLED;
1612         return 0;
1613     }
1614 
1615     vlapic->pt.source = PTSRC_lapic;
1616 
1617     if (vlapic->regs_page == NULL)
1618     {
1619         vlapic->regs_page = alloc_domheap_page(v->domain, MEMF_no_owner);
1620         if ( vlapic->regs_page == NULL )
1621         {
1622             dprintk(XENLOG_ERR, "alloc vlapic regs error: %d/%d\n",
1623                     v->domain->domain_id, v->vcpu_id);
1624             return -ENOMEM;
1625         }
1626     }
1627     if (vlapic->regs == NULL)
1628     {
1629         vlapic->regs = __map_domain_page_global(vlapic->regs_page);
1630         if ( vlapic->regs == NULL )
1631         {
1632             free_domheap_page(vlapic->regs_page);
1633             dprintk(XENLOG_ERR, "map vlapic regs error: %d/%d\n",
1634                     v->domain->domain_id, v->vcpu_id);
1635             return -ENOMEM;
1636         }
1637     }
1638     clear_page(vlapic->regs);
1639 
1640     vlapic_reset(vlapic);
1641 
1642     spin_lock_init(&vlapic->esr_lock);
1643 
1644     tasklet_init(&vlapic->init_sipi.tasklet, vlapic_init_sipi_action, v);
1645 
1646     if ( v->vcpu_id == 0 )
1647         register_mmio_handler(v->domain, &vlapic_mmio_ops);
1648 
1649     return 0;
1650 }
1651 
vlapic_destroy(struct vcpu * v)1652 void vlapic_destroy(struct vcpu *v)
1653 {
1654     struct vlapic *vlapic = vcpu_vlapic(v);
1655 
1656     if ( !has_vlapic(v->domain) )
1657         return;
1658 
1659     tasklet_kill(&vlapic->init_sipi.tasklet);
1660     TRACE_0D(TRC_HVM_EMUL_LAPIC_STOP_TIMER);
1661     destroy_periodic_time(&vlapic->pt);
1662     unmap_domain_page_global(vlapic->regs);
1663     free_domheap_page(vlapic->regs_page);
1664 }
1665 
1666 /*
1667  * Local variables:
1668  * mode: C
1669  * c-file-style: "BSD"
1670  * c-basic-offset: 4
1671  * indent-tabs-mode: nil
1672  * End:
1673  */
1674