1 /*
2 * vlapic.c: virtualize LAPIC for HVM vcpus.
3 *
4 * Copyright (c) 2004, Intel Corporation.
5 * Copyright (c) 2006 Keir Fraser, XenSource Inc.
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms and conditions of the GNU General Public License,
9 * version 2, as published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 * more details.
15 *
16 * You should have received a copy of the GNU General Public License along with
17 * this program; If not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include <xen/types.h>
21 #include <xen/mm.h>
22 #include <xen/xmalloc.h>
23 #include <xen/domain.h>
24 #include <xen/domain_page.h>
25 #include <xen/event.h>
26 #include <xen/nospec.h>
27 #include <xen/trace.h>
28 #include <xen/lib.h>
29 #include <xen/sched.h>
30 #include <xen/numa.h>
31 #include <asm/current.h>
32 #include <asm/page.h>
33 #include <asm/apic.h>
34 #include <asm/io_apic.h>
35 #include <asm/vpmu.h>
36 #include <asm/hvm/emulate.h>
37 #include <asm/hvm/hvm.h>
38 #include <asm/hvm/io.h>
39 #include <asm/hvm/support.h>
40 #include <asm/hvm/vmx/vmx.h>
41 #include <asm/hvm/nestedhvm.h>
42 #include <asm/hvm/viridian.h>
43 #include <public/hvm/ioreq.h>
44 #include <public/hvm/params.h>
45
46 #define VLAPIC_VERSION 0x00050014
47 #define VLAPIC_LVT_NUM 6
48
49 #define LVT_MASK \
50 (APIC_LVT_MASKED | APIC_SEND_PENDING | APIC_VECTOR_MASK)
51
52 #define LINT_MASK \
53 (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY |\
54 APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
55
56 static const unsigned int vlapic_lvt_mask[VLAPIC_LVT_NUM] =
57 {
58 /* LVTT */
59 LVT_MASK | APIC_TIMER_MODE_MASK,
60 /* LVTTHMR */
61 LVT_MASK | APIC_MODE_MASK,
62 /* LVTPC */
63 LVT_MASK | APIC_MODE_MASK,
64 /* LVT0-1 */
65 LINT_MASK, LINT_MASK,
66 /* LVTERR */
67 LVT_MASK
68 };
69
70 #define vlapic_lvtt_period(vlapic) \
71 ((vlapic_get_reg(vlapic, APIC_LVTT) & APIC_TIMER_MODE_MASK) \
72 == APIC_TIMER_MODE_PERIODIC)
73
74 #define vlapic_lvtt_oneshot(vlapic) \
75 ((vlapic_get_reg(vlapic, APIC_LVTT) & APIC_TIMER_MODE_MASK) \
76 == APIC_TIMER_MODE_ONESHOT)
77
78 #define vlapic_lvtt_tdt(vlapic) \
79 ((vlapic_get_reg(vlapic, APIC_LVTT) & APIC_TIMER_MODE_MASK) \
80 == APIC_TIMER_MODE_TSC_DEADLINE)
81
82 static void vlapic_do_init(struct vlapic *vlapic);
83
vlapic_find_highest_vector(const void * bitmap)84 static int vlapic_find_highest_vector(const void *bitmap)
85 {
86 const uint32_t *word = bitmap;
87 unsigned int word_offset = X86_NR_VECTORS / 32;
88
89 /* Work backwards through the bitmap (first 32-bit word in every four). */
90 while ( (word_offset != 0) && (word[(--word_offset)*4] == 0) )
91 continue;
92
93 return (fls(word[word_offset*4]) - 1) + (word_offset * 32);
94 }
95
96 /*
97 * IRR-specific bitmap update & search routines.
98 */
99
vlapic_test_and_set_irr(int vector,struct vlapic * vlapic)100 static int vlapic_test_and_set_irr(int vector, struct vlapic *vlapic)
101 {
102 return vlapic_test_and_set_vector(vector, &vlapic->regs->data[APIC_IRR]);
103 }
104
vlapic_clear_irr(int vector,struct vlapic * vlapic)105 static void vlapic_clear_irr(int vector, struct vlapic *vlapic)
106 {
107 vlapic_clear_vector(vector, &vlapic->regs->data[APIC_IRR]);
108 }
109
vlapic_find_highest_irr(struct vlapic * vlapic)110 static int vlapic_find_highest_irr(struct vlapic *vlapic)
111 {
112 vlapic_sync_pir_to_irr(vlapic_vcpu(vlapic));
113
114 return vlapic_find_highest_vector(&vlapic->regs->data[APIC_IRR]);
115 }
116
vlapic_error(struct vlapic * vlapic,unsigned int errmask)117 static void vlapic_error(struct vlapic *vlapic, unsigned int errmask)
118 {
119 unsigned long flags;
120 uint32_t esr;
121
122 spin_lock_irqsave(&vlapic->esr_lock, flags);
123 esr = vlapic_get_reg(vlapic, APIC_ESR);
124 if ( (esr & errmask) != errmask )
125 {
126 uint32_t lvterr = vlapic_get_reg(vlapic, APIC_LVTERR);
127
128 vlapic_set_reg(vlapic, APIC_ESR, esr | errmask);
129 if ( !(lvterr & APIC_LVT_MASKED) )
130 vlapic_set_irq(vlapic, lvterr & APIC_VECTOR_MASK, 0);
131 }
132 spin_unlock_irqrestore(&vlapic->esr_lock, flags);
133 }
134
vlapic_test_irq(const struct vlapic * vlapic,uint8_t vec)135 bool vlapic_test_irq(const struct vlapic *vlapic, uint8_t vec)
136 {
137 if ( unlikely(vec < 16) )
138 return false;
139
140 if ( hvm_funcs.test_pir &&
141 alternative_call(hvm_funcs.test_pir, const_vlapic_vcpu(vlapic), vec) )
142 return true;
143
144 return vlapic_test_vector(vec, &vlapic->regs->data[APIC_IRR]);
145 }
146
vlapic_set_irq(struct vlapic * vlapic,uint8_t vec,uint8_t trig)147 void vlapic_set_irq(struct vlapic *vlapic, uint8_t vec, uint8_t trig)
148 {
149 struct vcpu *target = vlapic_vcpu(vlapic);
150
151 if ( unlikely(vec < 16) )
152 {
153 vlapic_error(vlapic, APIC_ESR_RECVILL);
154 return;
155 }
156
157 if ( trig )
158 vlapic_set_vector(vec, &vlapic->regs->data[APIC_TMR]);
159 else
160 vlapic_clear_vector(vec, &vlapic->regs->data[APIC_TMR]);
161
162 if ( hvm_funcs.update_eoi_exit_bitmap )
163 alternative_vcall(hvm_funcs.update_eoi_exit_bitmap, target, vec, trig);
164
165 if ( hvm_funcs.deliver_posted_intr )
166 alternative_vcall(hvm_funcs.deliver_posted_intr, target, vec);
167 else if ( !vlapic_test_and_set_irr(vec, vlapic) )
168 vcpu_kick(target);
169 }
170
vlapic_find_highest_isr(const struct vlapic * vlapic)171 static int vlapic_find_highest_isr(const struct vlapic *vlapic)
172 {
173 return vlapic_find_highest_vector(&vlapic->regs->data[APIC_ISR]);
174 }
175
vlapic_get_ppr(const struct vlapic * vlapic)176 static uint32_t vlapic_get_ppr(const struct vlapic *vlapic)
177 {
178 uint32_t tpr, isrv, ppr;
179 int isr;
180
181 tpr = vlapic_get_reg(vlapic, APIC_TASKPRI);
182 isr = vlapic_find_highest_isr(vlapic);
183 isrv = (isr != -1) ? isr : 0;
184
185 if ( (tpr & 0xf0) >= (isrv & 0xf0) )
186 ppr = tpr & 0xff;
187 else
188 ppr = isrv & 0xf0;
189
190 HVM_DBG_LOG(DBG_LEVEL_VLAPIC_INTERRUPT,
191 "vlapic %p, ppr %#x, isr %#x, isrv %#x",
192 vlapic, ppr, isr, isrv);
193
194 return ppr;
195 }
196
vlapic_set_ppr(struct vlapic * vlapic)197 uint32_t vlapic_set_ppr(struct vlapic *vlapic)
198 {
199 uint32_t ppr = vlapic_get_ppr(vlapic);
200
201 vlapic_set_reg(vlapic, APIC_PROCPRI, ppr);
202 return ppr;
203 }
204
vlapic_match_logical_addr(const struct vlapic * vlapic,uint32_t mda)205 static bool_t vlapic_match_logical_addr(const struct vlapic *vlapic,
206 uint32_t mda)
207 {
208 bool_t result = 0;
209 uint32_t logical_id = vlapic_get_reg(vlapic, APIC_LDR);
210
211 if ( vlapic_x2apic_mode(vlapic) )
212 return ((logical_id >> 16) == (mda >> 16)) &&
213 (uint16_t)(logical_id & mda);
214
215 logical_id = GET_xAPIC_LOGICAL_ID(logical_id);
216 mda = (uint8_t)mda;
217
218 switch ( vlapic_get_reg(vlapic, APIC_DFR) )
219 {
220 case APIC_DFR_FLAT:
221 if ( logical_id & mda )
222 result = 1;
223 break;
224 case APIC_DFR_CLUSTER:
225 if ( ((logical_id >> 4) == (mda >> 0x4)) && (logical_id & mda & 0xf) )
226 result = 1;
227 break;
228 default:
229 printk(XENLOG_G_WARNING "%pv: bad LAPIC DFR value %08x\n",
230 const_vlapic_vcpu(vlapic),
231 vlapic_get_reg(vlapic, APIC_DFR));
232 break;
233 }
234
235 return result;
236 }
237
vlapic_match_dest(const struct vlapic * target,const struct vlapic * source,int short_hand,uint32_t dest,bool_t dest_mode)238 bool_t vlapic_match_dest(
239 const struct vlapic *target, const struct vlapic *source,
240 int short_hand, uint32_t dest, bool_t dest_mode)
241 {
242 HVM_DBG_LOG(DBG_LEVEL_VLAPIC, "target %p, source %p, dest %#x, "
243 "dest_mode %#x, short_hand %#x",
244 target, source, dest, dest_mode, short_hand);
245
246 switch ( short_hand )
247 {
248 case APIC_DEST_NOSHORT:
249 if ( dest_mode )
250 return vlapic_match_logical_addr(target, dest);
251 return (dest == _VLAPIC_ID(target, 0xffffffff)) ||
252 (dest == VLAPIC_ID(target));
253
254 case APIC_DEST_SELF:
255 return (target == source);
256
257 case APIC_DEST_ALLINC:
258 return 1;
259
260 case APIC_DEST_ALLBUT:
261 return (target != source);
262
263 default:
264 gdprintk(XENLOG_WARNING, "Bad dest shorthand value %x\n", short_hand);
265 break;
266 }
267
268 return 0;
269 }
270
vlapic_init_sipi_one(struct vcpu * target,uint32_t icr)271 static void vlapic_init_sipi_one(struct vcpu *target, uint32_t icr)
272 {
273 vcpu_pause(target);
274
275 switch ( icr & APIC_MODE_MASK )
276 {
277 case APIC_DM_INIT: {
278 bool_t fpu_initialised;
279 int rc;
280
281 /* No work on INIT de-assert for P4-type APIC. */
282 if ( (icr & (APIC_INT_LEVELTRIG | APIC_INT_ASSERT)) ==
283 APIC_INT_LEVELTRIG )
284 break;
285 /* Nothing to do if the VCPU is already reset. */
286 if ( !target->is_initialised )
287 break;
288 hvm_vcpu_down(target);
289 domain_lock(target->domain);
290 /* Reset necessary VCPU state. This does not include FPU state. */
291 fpu_initialised = target->fpu_initialised;
292 rc = vcpu_reset(target);
293 ASSERT(!rc);
294 target->fpu_initialised = fpu_initialised;
295 vlapic_do_init(vcpu_vlapic(target));
296 domain_unlock(target->domain);
297 break;
298 }
299
300 case APIC_DM_STARTUP: {
301 uint16_t reset_cs = (icr & 0xffu) << 8;
302 hvm_vcpu_reset_state(target, reset_cs, 0);
303 break;
304 }
305
306 default:
307 BUG();
308 }
309
310 hvmemul_cancel(target);
311
312 vcpu_unpause(target);
313 }
314
vlapic_init_sipi_action(void * data)315 static void vlapic_init_sipi_action(void *data)
316 {
317 struct vcpu *origin = data;
318 uint32_t icr = vcpu_vlapic(origin)->init_sipi.icr;
319 uint32_t dest = vcpu_vlapic(origin)->init_sipi.dest;
320 uint32_t short_hand = icr & APIC_SHORT_MASK;
321 bool_t dest_mode = !!(icr & APIC_DEST_MASK);
322 struct vcpu *v;
323
324 if ( icr == 0 )
325 return;
326
327 for_each_vcpu ( origin->domain, v )
328 {
329 if ( vlapic_match_dest(vcpu_vlapic(v), vcpu_vlapic(origin),
330 short_hand, dest, dest_mode) )
331 vlapic_init_sipi_one(v, icr);
332 }
333
334 vcpu_vlapic(origin)->init_sipi.icr = 0;
335 vcpu_unpause(origin);
336 }
337
338 /* Add a pending IRQ into lapic. */
vlapic_accept_irq(struct vcpu * v,uint32_t icr_low)339 static void vlapic_accept_irq(struct vcpu *v, uint32_t icr_low)
340 {
341 struct vlapic *vlapic = vcpu_vlapic(v);
342 uint8_t vector = (uint8_t)icr_low;
343
344 switch ( icr_low & APIC_MODE_MASK )
345 {
346 case APIC_DM_FIXED:
347 case APIC_DM_LOWEST:
348 if ( vlapic_enabled(vlapic) )
349 vlapic_set_irq(vlapic, vector, 0);
350 break;
351
352 case APIC_DM_REMRD:
353 gdprintk(XENLOG_WARNING, "Ignoring delivery mode 3\n");
354 break;
355
356 case APIC_DM_SMI:
357 gdprintk(XENLOG_WARNING, "Ignoring guest SMI\n");
358 break;
359
360 case APIC_DM_NMI:
361 if ( !test_and_set_bool(v->arch.nmi_pending) )
362 {
363 bool_t wake = 0;
364 domain_lock(v->domain);
365 if ( v->is_initialised )
366 wake = test_and_clear_bit(_VPF_down, &v->pause_flags);
367 domain_unlock(v->domain);
368 if ( wake )
369 vcpu_wake(v);
370 vcpu_kick(v);
371 }
372 break;
373
374 case APIC_DM_INIT:
375 case APIC_DM_STARTUP:
376 BUG(); /* Handled in vlapic_ipi(). */
377
378 default:
379 gdprintk(XENLOG_ERR, "TODO: unsupported delivery mode in ICR %x\n",
380 icr_low);
381 domain_crash(v->domain);
382 }
383 }
384
vlapic_lowest_prio(struct domain * d,const struct vlapic * source,int short_hand,uint32_t dest,bool_t dest_mode)385 struct vlapic *vlapic_lowest_prio(
386 struct domain *d, const struct vlapic *source,
387 int short_hand, uint32_t dest, bool_t dest_mode)
388 {
389 int old = hvm_domain_irq(d)->round_robin_prev_vcpu;
390 uint32_t ppr, target_ppr = UINT_MAX;
391 struct vlapic *vlapic, *target = NULL;
392 struct vcpu *v;
393
394 if ( unlikely(!d->vcpu) || unlikely((v = d->vcpu[old]) == NULL) )
395 return NULL;
396
397 do {
398 v = v->next_in_list ? : d->vcpu[0];
399 vlapic = vcpu_vlapic(v);
400 if ( vlapic_match_dest(vlapic, source, short_hand, dest, dest_mode) &&
401 vlapic_enabled(vlapic) &&
402 ((ppr = vlapic_get_ppr(vlapic)) < target_ppr) )
403 {
404 target = vlapic;
405 target_ppr = ppr;
406 }
407 } while ( v->vcpu_id != old );
408
409 if ( target != NULL )
410 hvm_domain_irq(d)->round_robin_prev_vcpu =
411 vlapic_vcpu(target)->vcpu_id;
412
413 return target;
414 }
415
vlapic_EOI_set(struct vlapic * vlapic)416 void vlapic_EOI_set(struct vlapic *vlapic)
417 {
418 struct vcpu *v = vlapic_vcpu(vlapic);
419 /*
420 * If APIC assist was set then an EOI may have been avoided and
421 * hence this EOI actually relates to a lower priority vector.
422 * Thus it is necessary to first emulate the EOI for the higher
423 * priority vector and then recurse to handle the lower priority
424 * vector.
425 */
426 bool missed_eoi = viridian_apic_assist_completed(v);
427 int vector;
428
429 again:
430 vector = vlapic_find_highest_isr(vlapic);
431
432 /* Some EOI writes may not have a matching to an in-service interrupt. */
433 if ( vector == -1 )
434 return;
435
436 /*
437 * If APIC assist was set but the guest chose to EOI anyway then
438 * we need to clean up state.
439 * NOTE: It is harmless to call viridian_apic_assist_clear() on a
440 * recursion, even though it is not necessary.
441 */
442 if ( !missed_eoi )
443 viridian_apic_assist_clear(v);
444
445 vlapic_clear_vector(vector, &vlapic->regs->data[APIC_ISR]);
446
447 if ( hvm_funcs.handle_eoi )
448 alternative_vcall(hvm_funcs.handle_eoi, vector,
449 vlapic_find_highest_isr(vlapic));
450
451 vlapic_handle_EOI(vlapic, vector);
452
453 if ( missed_eoi )
454 {
455 missed_eoi = false;
456 goto again;
457 }
458 }
459
vlapic_handle_EOI(struct vlapic * vlapic,u8 vector)460 void vlapic_handle_EOI(struct vlapic *vlapic, u8 vector)
461 {
462 struct vcpu *v = vlapic_vcpu(vlapic);
463 struct domain *d = v->domain;
464
465 /* All synic SINTx vectors are edge triggered */
466
467 if ( vlapic_test_vector(vector, &vlapic->regs->data[APIC_TMR]) )
468 vioapic_update_EOI(d, vector);
469 else if ( has_viridian_synic(d) )
470 viridian_synic_ack_sint(v, vector);
471
472 hvm_dpci_msi_eoi(d, vector);
473 }
474
is_multicast_dest(struct vlapic * vlapic,unsigned int short_hand,uint32_t dest,bool_t dest_mode)475 static bool_t is_multicast_dest(struct vlapic *vlapic, unsigned int short_hand,
476 uint32_t dest, bool_t dest_mode)
477 {
478 if ( vlapic_domain(vlapic)->max_vcpus <= 2 )
479 return 0;
480
481 if ( short_hand )
482 return short_hand != APIC_DEST_SELF;
483
484 if ( vlapic_x2apic_mode(vlapic) )
485 return dest_mode ? hweight16(dest) > 1 : dest == 0xffffffff;
486
487 if ( dest_mode )
488 return hweight8(dest &
489 GET_xAPIC_DEST_FIELD(vlapic_get_reg(vlapic,
490 APIC_DFR))) > 1;
491
492 return dest == 0xff;
493 }
494
vlapic_ipi(struct vlapic * vlapic,uint32_t icr_low,uint32_t icr_high)495 void vlapic_ipi(
496 struct vlapic *vlapic, uint32_t icr_low, uint32_t icr_high)
497 {
498 unsigned int dest;
499 unsigned int short_hand = icr_low & APIC_SHORT_MASK;
500 bool_t dest_mode = !!(icr_low & APIC_DEST_MASK);
501
502 HVM_DBG_LOG(DBG_LEVEL_VLAPIC, "icr = 0x%08x:%08x", icr_high, icr_low);
503
504 dest = _VLAPIC_ID(vlapic, icr_high);
505
506 switch ( icr_low & APIC_MODE_MASK )
507 {
508 case APIC_DM_INIT:
509 case APIC_DM_STARTUP:
510 if ( vlapic->init_sipi.icr != 0 )
511 {
512 WARN(); /* should be impossible but don't BUG, just in case */
513 break;
514 }
515 vcpu_pause_nosync(vlapic_vcpu(vlapic));
516 vlapic->init_sipi.icr = icr_low;
517 vlapic->init_sipi.dest = dest;
518 tasklet_schedule(&vlapic->init_sipi.tasklet);
519 break;
520
521 case APIC_DM_LOWEST: {
522 struct vlapic *target = vlapic_lowest_prio(
523 vlapic_domain(vlapic), vlapic, short_hand, dest, dest_mode);
524
525 if ( unlikely((icr_low & APIC_VECTOR_MASK) < 16) )
526 vlapic_error(vlapic, APIC_ESR_SENDILL);
527 else if ( target )
528 vlapic_accept_irq(vlapic_vcpu(target), icr_low);
529 break;
530 }
531
532 case APIC_DM_FIXED:
533 if ( unlikely((icr_low & APIC_VECTOR_MASK) < 16) )
534 {
535 vlapic_error(vlapic, APIC_ESR_SENDILL);
536 break;
537 }
538 /* fall through */
539 default: {
540 struct vcpu *v;
541 bool_t batch = is_multicast_dest(vlapic, short_hand, dest, dest_mode);
542
543 if ( batch )
544 cpu_raise_softirq_batch_begin();
545 for_each_vcpu ( vlapic_domain(vlapic), v )
546 {
547 if ( vlapic_match_dest(vcpu_vlapic(v), vlapic,
548 short_hand, dest, dest_mode) )
549 vlapic_accept_irq(v, icr_low);
550 }
551 if ( batch )
552 cpu_raise_softirq_batch_finish();
553 break;
554 }
555 }
556 }
557
vlapic_get_tmcct(const struct vlapic * vlapic)558 static uint32_t vlapic_get_tmcct(const struct vlapic *vlapic)
559 {
560 const struct vcpu *v = const_vlapic_vcpu(vlapic);
561 uint32_t tmcct = 0, tmict = vlapic_get_reg(vlapic, APIC_TMICT);
562 uint64_t counter_passed;
563
564 counter_passed = ((hvm_get_guest_time(v) - vlapic->timer_last_update)
565 / (APIC_BUS_CYCLE_NS * vlapic->hw.timer_divisor));
566
567 /* If timer_last_update is 0, then TMCCT should return 0 as well. */
568 if ( tmict && vlapic->timer_last_update )
569 {
570 if ( vlapic_lvtt_period(vlapic) )
571 counter_passed %= tmict;
572 if ( counter_passed < tmict )
573 tmcct = tmict - counter_passed;
574 }
575
576 HVM_DBG_LOG(DBG_LEVEL_VLAPIC_TIMER,
577 "timer initial count %d, timer current count %d, "
578 "offset %"PRId64,
579 tmict, tmcct, counter_passed);
580
581 return tmcct;
582 }
583
vlapic_set_tdcr(struct vlapic * vlapic,unsigned int val)584 static void vlapic_set_tdcr(struct vlapic *vlapic, unsigned int val)
585 {
586 /* Only bits 0, 1 and 3 are settable; others are MBZ. */
587 val &= 0xb;
588 vlapic_set_reg(vlapic, APIC_TDCR, val);
589
590 /* Update the demangled hw.timer_divisor. */
591 val = ((val & 3) | ((val & 8) >> 1)) + 1;
592 vlapic->hw.timer_divisor = 1 << (val & 7);
593
594 HVM_DBG_LOG(DBG_LEVEL_VLAPIC_TIMER,
595 "timer_divisor: %d", vlapic->hw.timer_divisor);
596 }
597
vlapic_read_aligned(const struct vlapic * vlapic,unsigned int offset)598 static uint32_t vlapic_read_aligned(const struct vlapic *vlapic,
599 unsigned int offset)
600 {
601 switch ( offset )
602 {
603 case APIC_PROCPRI:
604 return vlapic_get_ppr(vlapic);
605
606 case APIC_TMCCT: /* Timer CCR */
607 if ( !vlapic_lvtt_oneshot(vlapic) && !vlapic_lvtt_period(vlapic) )
608 break;
609 return vlapic_get_tmcct(vlapic);
610
611 case APIC_TMICT: /* Timer ICR */
612 if ( !vlapic_lvtt_oneshot(vlapic) && !vlapic_lvtt_period(vlapic) )
613 break;
614 /* fall through */
615 default:
616 return vlapic_get_reg(vlapic, offset);
617 }
618
619 return 0;
620 }
621
vlapic_mmio_read(struct vcpu * v,unsigned long address,unsigned int len,unsigned long * pval)622 static int vlapic_mmio_read(struct vcpu *v, unsigned long address,
623 unsigned int len, unsigned long *pval)
624 {
625 struct vlapic *vlapic = vcpu_vlapic(v);
626 unsigned int offset = address - vlapic_base_address(vlapic);
627 unsigned int alignment = offset & 0xf, result = 0;
628
629 /*
630 * APIC registers are 32-bit values, aligned on 128-bit boundaries, and
631 * should be accessed with 32-bit wide loads.
632 *
633 * Some processors support smaller accesses, so we allow any access which
634 * fully fits within the 32-bit register.
635 */
636 if ( (alignment + len) <= 4 && offset <= (APIC_TDCR + 3) )
637 {
638 uint32_t reg = vlapic_read_aligned(vlapic, offset & ~0xf);
639
640 switch ( len )
641 {
642 case 1: result = (uint8_t) (reg >> (alignment * 8)); break;
643 case 2: result = (uint16_t)(reg >> (alignment * 8)); break;
644 case 4: result = reg; break;
645 }
646
647 HVM_DBG_LOG(DBG_LEVEL_VLAPIC, "offset %#x with length %#x, "
648 "and the result is %#x", offset, len, result);
649 }
650
651 *pval = result;
652 return X86EMUL_OKAY;
653 }
654
guest_rdmsr_x2apic(const struct vcpu * v,uint32_t msr,uint64_t * val)655 int guest_rdmsr_x2apic(const struct vcpu *v, uint32_t msr, uint64_t *val)
656 {
657 static const unsigned long readable[] = {
658 #define REG(x) (1UL << (APIC_ ## x >> 4))
659 REG(ID) | REG(LVR) | REG(TASKPRI) | REG(PROCPRI) |
660 REG(LDR) | REG(SPIV) | REG(ESR) | REG(ICR) |
661 REG(CMCI) | REG(LVTT) | REG(LVTTHMR) | REG(LVTPC) |
662 REG(LVT0) | REG(LVT1) | REG(LVTERR) | REG(TMICT) |
663 REG(TMCCT) | REG(TDCR) |
664 #undef REG
665 #define REGBLOCK(x) (((1UL << (X86_NR_VECTORS / 32)) - 1) << (APIC_ ## x >> 4))
666 REGBLOCK(ISR) | REGBLOCK(TMR) | REGBLOCK(IRR)
667 #undef REGBLOCK
668 };
669 const struct vlapic *vlapic = vcpu_vlapic(v);
670 uint64_t high = 0;
671 uint32_t reg = msr - MSR_X2APIC_FIRST, offset;
672
673 /*
674 * The read side looks as if it might be safe to use outside of current
675 * context, but the write side is most certainly not. As we don't need
676 * any non-current access, enforce symmetry with the write side.
677 */
678 ASSERT(v == current);
679
680 if ( !vlapic_x2apic_mode(vlapic) ||
681 (reg >= sizeof(readable) * 8) )
682 return X86EMUL_EXCEPTION;
683
684 reg = array_index_nospec(reg, sizeof(readable) * 8);
685 if ( !test_bit(reg, readable) )
686 return X86EMUL_EXCEPTION;
687
688 offset = reg << 4;
689 if ( offset == APIC_ICR )
690 high = (uint64_t)vlapic_read_aligned(vlapic, APIC_ICR2) << 32;
691
692 *val = high | vlapic_read_aligned(vlapic, offset);
693
694 return X86EMUL_OKAY;
695 }
696
vlapic_pt_cb(struct vcpu * v,void * data)697 static void vlapic_pt_cb(struct vcpu *v, void *data)
698 {
699 TRACE_0D(TRC_HVM_EMUL_LAPIC_TIMER_CB);
700 *(s_time_t *)data = hvm_get_guest_time(v);
701 }
702
vlapic_tdt_pt_cb(struct vcpu * v,void * data)703 static void vlapic_tdt_pt_cb(struct vcpu *v, void *data)
704 {
705 *(s_time_t *)data = hvm_get_guest_time(v);
706 vcpu_vlapic(v)->hw.tdt_msr = 0;
707 }
708
709 /*
710 * This function is used when a register related to the APIC timer is updated.
711 * It expects the new value for the register TMICT to be set *before*
712 * being called, and the previous value of the divisor (calculated from TDCR)
713 * to be passed as argument.
714 * It expect the new value of LVTT to be set *after* being called, with this
715 * new values passed as parameter (only APIC_TIMER_MODE_MASK bits matter).
716 */
vlapic_update_timer(struct vlapic * vlapic,uint32_t lvtt,bool tmict_updated,uint32_t old_divisor)717 static void vlapic_update_timer(struct vlapic *vlapic, uint32_t lvtt,
718 bool tmict_updated, uint32_t old_divisor)
719 {
720 uint64_t period, delta = 0;
721 bool is_oneshot, is_periodic;
722
723 is_periodic = (lvtt & APIC_TIMER_MODE_MASK) == APIC_TIMER_MODE_PERIODIC;
724 is_oneshot = (lvtt & APIC_TIMER_MODE_MASK) == APIC_TIMER_MODE_ONESHOT;
725
726 period = (uint64_t)vlapic_get_reg(vlapic, APIC_TMICT)
727 * APIC_BUS_CYCLE_NS * old_divisor;
728
729 /* Calculate the next time the timer should trigger an interrupt. */
730 if ( tmict_updated )
731 delta = period;
732 else if ( period && vlapic->timer_last_update )
733 {
734 uint64_t time_passed = hvm_get_guest_time(current)
735 - vlapic->timer_last_update;
736
737 /* This depends of the previous mode, if a new mode is being set */
738 if ( vlapic_lvtt_period(vlapic) )
739 time_passed %= period;
740 if ( time_passed < period )
741 delta = period - time_passed;
742 }
743
744 if ( delta && (is_oneshot || is_periodic) )
745 {
746 if ( vlapic->hw.timer_divisor != old_divisor )
747 {
748 period = (uint64_t)vlapic_get_reg(vlapic, APIC_TMICT)
749 * APIC_BUS_CYCLE_NS * vlapic->hw.timer_divisor;
750 delta = delta * vlapic->hw.timer_divisor / old_divisor;
751 }
752
753 TRACE_2_LONG_3D(TRC_HVM_EMUL_LAPIC_START_TIMER, TRC_PAR_LONG(delta),
754 TRC_PAR_LONG(is_periodic ? period : 0),
755 vlapic->pt.irq);
756
757 create_periodic_time(current, &vlapic->pt, delta,
758 is_periodic ? period : 0, vlapic->pt.irq,
759 is_periodic ? vlapic_pt_cb : NULL,
760 &vlapic->timer_last_update, false);
761
762 vlapic->timer_last_update = vlapic->pt.last_plt_gtime;
763 if ( !tmict_updated )
764 vlapic->timer_last_update -= period - delta;
765
766 HVM_DBG_LOG(DBG_LEVEL_VLAPIC,
767 "bus cycle is %uns, "
768 "initial count %u, period %"PRIu64"ns",
769 APIC_BUS_CYCLE_NS,
770 vlapic_get_reg(vlapic, APIC_TMICT),
771 period);
772 }
773 else
774 {
775 TRACE_0D(TRC_HVM_EMUL_LAPIC_STOP_TIMER);
776 destroy_periodic_time(&vlapic->pt);
777 /*
778 * From now, TMCCT should return 0 until TMICT is set again.
779 * This is because the timer mode was one-shot when the counter reach 0
780 * or just because the timer is disable.
781 */
782 vlapic->timer_last_update = 0;
783 }
784 }
785
vlapic_reg_write(struct vcpu * v,unsigned int reg,uint32_t val)786 void vlapic_reg_write(struct vcpu *v, unsigned int reg, uint32_t val)
787 {
788 struct vlapic *vlapic = vcpu_vlapic(v);
789
790 memset(&vlapic->loaded, 0, sizeof(vlapic->loaded));
791
792 switch ( reg )
793 {
794 case APIC_ID:
795 vlapic_set_reg(vlapic, APIC_ID, val);
796 break;
797
798 case APIC_TASKPRI:
799 vlapic_set_reg(vlapic, APIC_TASKPRI, val & 0xff);
800 break;
801
802 case APIC_EOI:
803 vlapic_EOI_set(vlapic);
804 break;
805
806 case APIC_LDR:
807 vlapic_set_reg(vlapic, APIC_LDR, val & APIC_LDR_MASK);
808 break;
809
810 case APIC_DFR:
811 vlapic_set_reg(vlapic, APIC_DFR, val | 0x0FFFFFFF);
812 break;
813
814 case APIC_SPIV:
815 vlapic_set_reg(vlapic, APIC_SPIV, val & 0x3ff);
816
817 if ( !(val & APIC_SPIV_APIC_ENABLED) )
818 {
819 int i;
820 uint32_t lvt_val;
821
822 vlapic->hw.disabled |= VLAPIC_SW_DISABLED;
823
824 for ( i = 0; i < VLAPIC_LVT_NUM; i++ )
825 {
826 lvt_val = vlapic_get_reg(vlapic, APIC_LVTT + 0x10 * i);
827 vlapic_set_reg(vlapic, APIC_LVTT + 0x10 * i,
828 lvt_val | APIC_LVT_MASKED);
829 }
830 }
831 else
832 {
833 vlapic->hw.disabled &= ~VLAPIC_SW_DISABLED;
834 pt_may_unmask_irq(vlapic_domain(vlapic), &vlapic->pt);
835 }
836 break;
837
838 case APIC_ICR:
839 val &= ~(1 << 12); /* always clear the pending bit */
840 vlapic_ipi(vlapic, val, vlapic_get_reg(vlapic, APIC_ICR2));
841 vlapic_set_reg(vlapic, APIC_ICR, val);
842 break;
843
844 case APIC_ICR2:
845 vlapic_set_reg(vlapic, APIC_ICR2, val & 0xff000000);
846 break;
847
848 case APIC_LVTT: /* LVT Timer Reg */
849 if ( vlapic_lvtt_tdt(vlapic) !=
850 ((val & APIC_TIMER_MODE_MASK) == APIC_TIMER_MODE_TSC_DEADLINE))
851 {
852 vlapic_set_reg(vlapic, APIC_TMICT, 0);
853 vlapic->hw.tdt_msr = 0;
854 }
855 vlapic->pt.irq = val & APIC_VECTOR_MASK;
856
857 vlapic_update_timer(vlapic, val, false, vlapic->hw.timer_divisor);
858
859 /* fallthrough */
860 case APIC_LVTTHMR: /* LVT Thermal Monitor */
861 case APIC_LVTPC: /* LVT Performance Counter */
862 case APIC_LVT0: /* LVT LINT0 Reg */
863 case APIC_LVT1: /* LVT Lint1 Reg */
864 case APIC_LVTERR: /* LVT Error Reg */
865 if ( vlapic_sw_disabled(vlapic) )
866 val |= APIC_LVT_MASKED;
867 val &= array_access_nospec(vlapic_lvt_mask, (reg - APIC_LVTT) >> 4);
868 vlapic_set_reg(vlapic, reg, val);
869 if ( reg == APIC_LVT0 )
870 {
871 vlapic_adjust_i8259_target(v->domain);
872 pt_may_unmask_irq(v->domain, NULL);
873 }
874 if ( (reg == APIC_LVTT) && !(val & APIC_LVT_MASKED) )
875 pt_may_unmask_irq(NULL, &vlapic->pt);
876 if ( reg == APIC_LVTPC )
877 vpmu_lvtpc_update(val);
878 break;
879
880 case APIC_TMICT:
881 if ( !vlapic_lvtt_oneshot(vlapic) && !vlapic_lvtt_period(vlapic) )
882 break;
883
884 vlapic_set_reg(vlapic, APIC_TMICT, val);
885
886 vlapic_update_timer(vlapic, vlapic_get_reg(vlapic, APIC_LVTT), true,
887 vlapic->hw.timer_divisor);
888 break;
889
890 case APIC_TDCR:
891 {
892 uint32_t current_divisor = vlapic->hw.timer_divisor;
893
894 vlapic_set_tdcr(vlapic, val & 0xb);
895
896 vlapic_update_timer(vlapic, vlapic_get_reg(vlapic, APIC_LVTT), false,
897 current_divisor);
898 HVM_DBG_LOG(DBG_LEVEL_VLAPIC_TIMER, "timer divisor is %#x",
899 vlapic->hw.timer_divisor);
900 break;
901 }
902 }
903 }
904
vlapic_mmio_write(struct vcpu * v,unsigned long address,unsigned int len,unsigned long val)905 static int vlapic_mmio_write(struct vcpu *v, unsigned long address,
906 unsigned int len, unsigned long val)
907 {
908 struct vlapic *vlapic = vcpu_vlapic(v);
909 unsigned int offset = address - vlapic_base_address(vlapic);
910 unsigned int alignment = offset & 0xf;
911
912 offset &= ~0xf;
913
914 if ( offset != APIC_EOI )
915 HVM_DBG_LOG(DBG_LEVEL_VLAPIC,
916 "offset %#x with length %#x, and value is %#lx",
917 offset, len, val);
918
919 /*
920 * APIC registers are 32-bit values, aligned on 128-bit boundaries, and
921 * should be accessed with 32-bit wide stores.
922 *
923 * Some processors support smaller accesses, so we allow any access which
924 * fully fits within the 32-bit register.
925 */
926 if ( (alignment + len) <= 4 && offset <= APIC_TDCR )
927 {
928 if ( unlikely(len < 4) )
929 {
930 uint32_t reg = vlapic_read_aligned(vlapic, offset);
931
932 alignment *= 8;
933
934 switch ( len )
935 {
936 case 1:
937 val = ((reg & ~(0xffU << alignment)) |
938 ((val & 0xff) << alignment));
939 break;
940
941 case 2:
942 val = ((reg & ~(0xffffU << alignment)) |
943 ((val & 0xffff) << alignment));
944 break;
945 }
946 }
947
948 vlapic_reg_write(v, offset, val);
949 }
950
951 return X86EMUL_OKAY;
952 }
953
vlapic_apicv_write(struct vcpu * v,unsigned int offset)954 int vlapic_apicv_write(struct vcpu *v, unsigned int offset)
955 {
956 struct vlapic *vlapic = vcpu_vlapic(v);
957 uint32_t val = vlapic_get_reg(vlapic, offset & ~0xf);
958
959 if ( vlapic_x2apic_mode(vlapic) )
960 {
961 if ( offset != APIC_SELF_IPI )
962 return X86EMUL_UNHANDLEABLE;
963
964 offset = APIC_ICR;
965 val = APIC_DEST_SELF | (val & APIC_VECTOR_MASK);
966 }
967
968 vlapic_reg_write(v, offset, val);
969
970 return X86EMUL_OKAY;
971 }
972
guest_wrmsr_x2apic(struct vcpu * v,uint32_t msr,uint64_t msr_content)973 int guest_wrmsr_x2apic(struct vcpu *v, uint32_t msr, uint64_t msr_content)
974 {
975 struct vlapic *vlapic = vcpu_vlapic(v);
976 uint32_t offset = (msr - MSR_X2APIC_FIRST) << 4;
977
978 /* The timer handling at least is unsafe outside of current context. */
979 ASSERT(v == current);
980
981 if ( !vlapic_x2apic_mode(vlapic) )
982 return X86EMUL_EXCEPTION;
983
984 switch ( offset )
985 {
986 case APIC_TASKPRI:
987 if ( msr_content & ~APIC_TPRI_MASK )
988 return X86EMUL_EXCEPTION;
989 break;
990
991 case APIC_SPIV:
992 if ( msr_content & ~(APIC_VECTOR_MASK | APIC_SPIV_APIC_ENABLED |
993 APIC_SPIV_FOCUS_DISABLED |
994 (VLAPIC_VERSION & APIC_LVR_DIRECTED_EOI
995 ? APIC_SPIV_DIRECTED_EOI : 0)) )
996 return X86EMUL_EXCEPTION;
997 break;
998
999 case APIC_LVTT:
1000 if ( msr_content & ~(LVT_MASK | APIC_TIMER_MODE_MASK) )
1001 return X86EMUL_EXCEPTION;
1002 break;
1003
1004 case APIC_LVTTHMR:
1005 case APIC_LVTPC:
1006 case APIC_CMCI:
1007 if ( msr_content & ~(LVT_MASK | APIC_MODE_MASK) )
1008 return X86EMUL_EXCEPTION;
1009 break;
1010
1011 case APIC_LVT0:
1012 case APIC_LVT1:
1013 if ( msr_content & ~LINT_MASK )
1014 return X86EMUL_EXCEPTION;
1015 break;
1016
1017 case APIC_LVTERR:
1018 if ( msr_content & ~LVT_MASK )
1019 return X86EMUL_EXCEPTION;
1020 break;
1021
1022 case APIC_TMICT:
1023 break;
1024
1025 case APIC_TDCR:
1026 if ( msr_content & ~APIC_TDR_DIV_1 )
1027 return X86EMUL_EXCEPTION;
1028 break;
1029
1030 case APIC_ICR:
1031 if ( (uint32_t)msr_content & ~(APIC_VECTOR_MASK | APIC_MODE_MASK |
1032 APIC_DEST_MASK | APIC_INT_ASSERT |
1033 APIC_INT_LEVELTRIG | APIC_SHORT_MASK) )
1034 return X86EMUL_EXCEPTION;
1035 vlapic_set_reg(vlapic, APIC_ICR2, msr_content >> 32);
1036 break;
1037
1038 case APIC_SELF_IPI:
1039 if ( msr_content & ~APIC_VECTOR_MASK )
1040 return X86EMUL_EXCEPTION;
1041 offset = APIC_ICR;
1042 msr_content = APIC_DEST_SELF | (msr_content & APIC_VECTOR_MASK);
1043 break;
1044
1045 case APIC_EOI:
1046 case APIC_ESR:
1047 if ( msr_content )
1048 {
1049 default:
1050 return X86EMUL_EXCEPTION;
1051 }
1052 }
1053
1054 vlapic_reg_write(v, array_index_nospec(offset, PAGE_SIZE), msr_content);
1055
1056 return X86EMUL_OKAY;
1057 }
1058
vlapic_range(struct vcpu * v,unsigned long addr)1059 static int vlapic_range(struct vcpu *v, unsigned long addr)
1060 {
1061 struct vlapic *vlapic = vcpu_vlapic(v);
1062 unsigned long offset = addr - vlapic_base_address(vlapic);
1063
1064 return !vlapic_hw_disabled(vlapic) &&
1065 !vlapic_x2apic_mode(vlapic) &&
1066 (offset < PAGE_SIZE);
1067 }
1068
1069 static const struct hvm_mmio_ops vlapic_mmio_ops = {
1070 .check = vlapic_range,
1071 .read = vlapic_mmio_read,
1072 .write = vlapic_mmio_write,
1073 };
1074
set_x2apic_id(struct vlapic * vlapic)1075 static void set_x2apic_id(struct vlapic *vlapic)
1076 {
1077 u32 id = vlapic_vcpu(vlapic)->vcpu_id;
1078 u32 ldr = ((id & ~0xf) << 12) | (1 << (id & 0xf));
1079
1080 vlapic_set_reg(vlapic, APIC_ID, id * 2);
1081 vlapic_set_reg(vlapic, APIC_LDR, ldr);
1082 }
1083
guest_wrmsr_apic_base(struct vcpu * v,uint64_t value)1084 int guest_wrmsr_apic_base(struct vcpu *v, uint64_t value)
1085 {
1086 const struct cpuid_policy *cp = v->domain->arch.cpuid;
1087 struct vlapic *vlapic = vcpu_vlapic(v);
1088
1089 if ( !has_vlapic(v->domain) )
1090 return X86EMUL_EXCEPTION;
1091
1092 /* Attempting to set reserved bits? */
1093 if ( value & ~(APIC_BASE_ADDR_MASK | APIC_BASE_ENABLE | APIC_BASE_BSP |
1094 (cp->basic.x2apic ? APIC_BASE_EXTD : 0)) )
1095 return X86EMUL_EXCEPTION;
1096
1097 /*
1098 * Architecturally speaking, we should allow a guest to move the xAPIC
1099 * MMIO window (within reason - not even hardware allows arbitrary
1100 * positions). However, virtualising the behaviour for multi-vcpu guests
1101 * is problematic.
1102 *
1103 * The ability to move the MMIO window was introduced with the Pentium Pro
1104 * processor, to deconflict the window with other MMIO in the system. The
1105 * need to move the MMIO window was obsoleted by the Netburst architecture
1106 * which reserved the space in physical address space for MSIs.
1107 *
1108 * As such, it appears to be a rarely used feature before the turn of the
1109 * millennium, and entirely unused after.
1110 *
1111 * Xen uses a per-domain P2M, but MSR_APIC_BASE is per-vcpu. In
1112 * principle, we could emulate the MMIO windows being in different
1113 * locations by ensuring that all windows are unmapped in the P2M and trap
1114 * for emulation. Xen has never had code to modify the P2M in response to
1115 * APIC_BASE updates, so guests which actually try this are likely to end
1116 * up without a working APIC.
1117 *
1118 * Things are more complicated with hardware APIC acceleration, where Xen
1119 * has to map a sink-page into the P2M for APIC accesses to be recognised
1120 * and accelerated by microcode. Again, this could in principle be
1121 * emulated, but the visible result in the guest would be multiple working
1122 * APIC MMIO windows. Moving the APIC window has never caused the
1123 * sink-page to move in the P2M, meaning that on all modern hardware, the
1124 * APIC definitely ceases working if the guest tries to move the window.
1125 *
1126 * As such, when the APIC is configured in xAPIC mode, require the MMIO
1127 * window to be in its default location. We don't expect any guests which
1128 * currently run on Xen to be impacted by this restriction, and the #GP
1129 * fault will be far more obvious to debug than a malfunctioning MMIO
1130 * window.
1131 */
1132 if ( ((value & (APIC_BASE_EXTD | APIC_BASE_ENABLE)) == APIC_BASE_ENABLE) &&
1133 ((value & APIC_BASE_ADDR_MASK) != APIC_DEFAULT_PHYS_BASE) )
1134 {
1135 printk(XENLOG_G_INFO
1136 "%pv tried to move the APIC MMIO window: val 0x%08"PRIx64"\n",
1137 v, value);
1138 return X86EMUL_EXCEPTION;
1139 }
1140
1141 if ( (vlapic->hw.apic_base_msr ^ value) & APIC_BASE_ENABLE )
1142 {
1143 if ( unlikely(value & APIC_BASE_EXTD) )
1144 return X86EMUL_EXCEPTION;
1145
1146 if ( value & APIC_BASE_ENABLE )
1147 {
1148 vlapic_reset(vlapic);
1149 vlapic->hw.disabled &= ~VLAPIC_HW_DISABLED;
1150 pt_may_unmask_irq(vlapic_domain(vlapic), &vlapic->pt);
1151 }
1152 else
1153 {
1154 vlapic->hw.disabled |= VLAPIC_HW_DISABLED;
1155 pt_may_unmask_irq(vlapic_domain(vlapic), NULL);
1156 }
1157 }
1158 else if ( ((vlapic->hw.apic_base_msr ^ value) & APIC_BASE_EXTD) &&
1159 unlikely(!vlapic_xapic_mode(vlapic)) )
1160 return X86EMUL_EXCEPTION;
1161
1162 vlapic->hw.apic_base_msr = value;
1163 memset(&vlapic->loaded, 0, sizeof(vlapic->loaded));
1164
1165 if ( vlapic_x2apic_mode(vlapic) )
1166 set_x2apic_id(vlapic);
1167
1168 vmx_vlapic_msr_changed(vlapic_vcpu(vlapic));
1169
1170 HVM_DBG_LOG(DBG_LEVEL_VLAPIC,
1171 "apic base msr is 0x%016"PRIx64, vlapic->hw.apic_base_msr);
1172
1173 return X86EMUL_OKAY;
1174 }
1175
vlapic_tdt_msr_get(struct vlapic * vlapic)1176 uint64_t vlapic_tdt_msr_get(struct vlapic *vlapic)
1177 {
1178 if ( !vlapic_lvtt_tdt(vlapic) )
1179 return 0;
1180
1181 return vlapic->hw.tdt_msr;
1182 }
1183
vlapic_tdt_msr_set(struct vlapic * vlapic,uint64_t value)1184 void vlapic_tdt_msr_set(struct vlapic *vlapic, uint64_t value)
1185 {
1186 uint64_t guest_tsc;
1187 struct vcpu *v = vlapic_vcpu(vlapic);
1188
1189 if ( vlapic_hw_disabled(vlapic) )
1190 return;
1191
1192 if ( !vlapic_lvtt_tdt(vlapic) )
1193 {
1194 HVM_DBG_LOG(DBG_LEVEL_VLAPIC_TIMER, "ignore tsc deadline msr write");
1195 return;
1196 }
1197
1198 /* new_value = 0, >0 && <= now, > now */
1199 guest_tsc = hvm_get_guest_tsc(v);
1200 if ( value > guest_tsc )
1201 {
1202 uint64_t delta = gtsc_to_gtime(v->domain, value - guest_tsc);
1203 delta = max_t(s64, delta, 0);
1204
1205 HVM_DBG_LOG(DBG_LEVEL_VLAPIC_TIMER, "delta[0x%016"PRIx64"]", delta);
1206
1207 vlapic->hw.tdt_msr = value;
1208 /* .... reprogram tdt timer */
1209 TRACE_2_LONG_3D(TRC_HVM_EMUL_LAPIC_START_TIMER, TRC_PAR_LONG(delta),
1210 TRC_PAR_LONG(0LL), vlapic->pt.irq);
1211 create_periodic_time(v, &vlapic->pt, delta, 0,
1212 vlapic->pt.irq, vlapic_tdt_pt_cb,
1213 &vlapic->timer_last_update, false);
1214 vlapic->timer_last_update = vlapic->pt.last_plt_gtime;
1215 }
1216 else
1217 {
1218 vlapic->hw.tdt_msr = 0;
1219
1220 /* trigger a timer event if needed */
1221 if ( value > 0 )
1222 {
1223 TRACE_2_LONG_3D(TRC_HVM_EMUL_LAPIC_START_TIMER, TRC_PAR_LONG(0LL),
1224 TRC_PAR_LONG(0LL), vlapic->pt.irq);
1225 create_periodic_time(v, &vlapic->pt, 0, 0,
1226 vlapic->pt.irq, vlapic_tdt_pt_cb,
1227 &vlapic->timer_last_update, false);
1228 vlapic->timer_last_update = vlapic->pt.last_plt_gtime;
1229 }
1230 else
1231 {
1232 /* .... stop tdt timer */
1233 TRACE_0D(TRC_HVM_EMUL_LAPIC_STOP_TIMER);
1234 destroy_periodic_time(&vlapic->pt);
1235 }
1236
1237 HVM_DBG_LOG(DBG_LEVEL_VLAPIC_TIMER, "value[0x%016"PRIx64"]", value);
1238 }
1239
1240 HVM_DBG_LOG(DBG_LEVEL_VLAPIC_TIMER,
1241 "tdt_msr[0x%016"PRIx64"],"
1242 " gtsc[0x%016"PRIx64"]",
1243 vlapic->hw.tdt_msr, guest_tsc);
1244 }
1245
__vlapic_accept_pic_intr(struct vcpu * v)1246 static int __vlapic_accept_pic_intr(struct vcpu *v)
1247 {
1248 struct domain *d = v->domain;
1249 struct vlapic *vlapic = vcpu_vlapic(v);
1250 uint32_t lvt0 = vlapic_get_reg(vlapic, APIC_LVT0);
1251 union vioapic_redir_entry redir0;
1252
1253 ASSERT(has_vpic(d));
1254
1255 if ( !has_vioapic(d) )
1256 return 0;
1257
1258 redir0 = domain_vioapic(d, 0)->redirtbl[0];
1259
1260 /* We deliver 8259 interrupts to the appropriate CPU as follows. */
1261 return ((/* IOAPIC pin0 is unmasked and routing to this LAPIC? */
1262 ((redir0.fields.delivery_mode == dest_ExtINT) &&
1263 !redir0.fields.mask &&
1264 redir0.fields.dest_id == VLAPIC_ID(vlapic) &&
1265 !vlapic_disabled(vlapic)) ||
1266 /* LAPIC has LVT0 unmasked for ExtInts? */
1267 ((lvt0 & (APIC_MODE_MASK|APIC_LVT_MASKED)) == APIC_DM_EXTINT) ||
1268 /* LAPIC is fully disabled? */
1269 vlapic_hw_disabled(vlapic)));
1270 }
1271
vlapic_accept_pic_intr(struct vcpu * v)1272 int vlapic_accept_pic_intr(struct vcpu *v)
1273 {
1274 if ( vlapic_hw_disabled(vcpu_vlapic(v)) || !has_vpic(v->domain) )
1275 return 0;
1276
1277 TRACE_2D(TRC_HVM_EMUL_LAPIC_PIC_INTR,
1278 (v == v->domain->arch.hvm.i8259_target),
1279 v ? __vlapic_accept_pic_intr(v) : -1);
1280
1281 return ((v == v->domain->arch.hvm.i8259_target) &&
1282 __vlapic_accept_pic_intr(v));
1283 }
1284
vlapic_adjust_i8259_target(struct domain * d)1285 void vlapic_adjust_i8259_target(struct domain *d)
1286 {
1287 struct vcpu *v;
1288
1289 if ( !has_vpic(d) )
1290 return;
1291
1292 for_each_vcpu ( d, v )
1293 if ( __vlapic_accept_pic_intr(v) )
1294 goto found;
1295
1296 v = d->vcpu ? d->vcpu[0] : NULL;
1297
1298 found:
1299 if ( d->arch.hvm.i8259_target == v )
1300 return;
1301 d->arch.hvm.i8259_target = v;
1302 pt_adjust_global_vcpu_target(v);
1303 }
1304
vlapic_has_pending_irq(struct vcpu * v)1305 int vlapic_has_pending_irq(struct vcpu *v)
1306 {
1307 struct vlapic *vlapic = vcpu_vlapic(v);
1308 int irr, isr;
1309
1310 if ( !vlapic_enabled(vlapic) )
1311 return -1;
1312
1313 /*
1314 * Poll the viridian message queues before checking the IRR since
1315 * a synthetic interrupt may be asserted during the poll.
1316 */
1317 if ( has_viridian_synic(v->domain) )
1318 viridian_synic_poll(v);
1319
1320 irr = vlapic_find_highest_irr(vlapic);
1321 if ( irr == -1 )
1322 return -1;
1323
1324 if ( hvm_funcs.virtual_intr_delivery_enabled &&
1325 !nestedhvm_vcpu_in_guestmode(v) )
1326 return irr;
1327
1328 /*
1329 * If APIC assist was set then an EOI may have been avoided.
1330 * If so, we need to emulate the EOI here before comparing ISR
1331 * with IRR.
1332 */
1333 if ( viridian_apic_assist_completed(v) )
1334 vlapic_EOI_set(vlapic);
1335
1336 isr = vlapic_find_highest_isr(vlapic);
1337
1338 /*
1339 * The specification says that if APIC assist is set and a
1340 * subsequent interrupt of lower priority occurs then APIC assist
1341 * needs to be cleared.
1342 */
1343 if ( isr >= 0 &&
1344 (irr & 0xf0) <= (isr & 0xf0) )
1345 {
1346 viridian_apic_assist_clear(v);
1347 return -1;
1348 }
1349
1350 return irr;
1351 }
1352
vlapic_ack_pending_irq(struct vcpu * v,int vector,bool_t force_ack)1353 int vlapic_ack_pending_irq(struct vcpu *v, int vector, bool_t force_ack)
1354 {
1355 struct vlapic *vlapic = vcpu_vlapic(v);
1356 int isr;
1357
1358 if ( !force_ack &&
1359 hvm_funcs.virtual_intr_delivery_enabled )
1360 return 1;
1361
1362 /* If there's no chance of using APIC assist then bail now. */
1363 if ( !has_viridian_apic_assist(v->domain) ||
1364 vlapic_test_vector(vector, &vlapic->regs->data[APIC_TMR]) )
1365 goto done;
1366
1367 isr = vlapic_find_highest_isr(vlapic);
1368 if ( isr == -1 && vector > 0x10 )
1369 {
1370 /*
1371 * This vector is edge triggered, not in the legacy range, and no
1372 * lower priority vectors are pending in the ISR. Thus we can set
1373 * APIC assist to avoid exiting for EOI.
1374 */
1375 viridian_apic_assist_set(v);
1376 }
1377
1378 done:
1379 if ( !has_viridian_synic(v->domain) ||
1380 !viridian_synic_is_auto_eoi_sint(v, vector) )
1381 vlapic_set_vector(vector, &vlapic->regs->data[APIC_ISR]);
1382
1383 vlapic_clear_irr(vector, vlapic);
1384
1385 return 1;
1386 }
1387
is_vlapic_lvtpc_enabled(struct vlapic * vlapic)1388 bool_t is_vlapic_lvtpc_enabled(struct vlapic *vlapic)
1389 {
1390 return (vlapic_enabled(vlapic) &&
1391 !(vlapic_get_reg(vlapic, APIC_LVTPC) & APIC_LVT_MASKED));
1392 }
1393
1394 /* Reset the VLAPIC back to its init state. */
vlapic_do_init(struct vlapic * vlapic)1395 static void vlapic_do_init(struct vlapic *vlapic)
1396 {
1397 int i;
1398
1399 if ( !has_vlapic(vlapic_vcpu(vlapic)->domain) )
1400 return;
1401
1402 vlapic_set_reg(vlapic, APIC_LVR, VLAPIC_VERSION);
1403
1404 for ( i = 0; i < 8; i++ )
1405 {
1406 vlapic_set_reg(vlapic, APIC_IRR + 0x10 * i, 0);
1407 vlapic_set_reg(vlapic, APIC_ISR + 0x10 * i, 0);
1408 vlapic_set_reg(vlapic, APIC_TMR + 0x10 * i, 0);
1409 }
1410 vlapic_set_reg(vlapic, APIC_ICR, 0);
1411 vlapic_set_reg(vlapic, APIC_ICR2, 0);
1412 /*
1413 * LDR is read-only in x2APIC mode. Preserve its value when handling
1414 * INIT signal in x2APIC mode.
1415 */
1416 if ( !vlapic_x2apic_mode(vlapic) )
1417 vlapic_set_reg(vlapic, APIC_LDR, 0);
1418 vlapic_set_reg(vlapic, APIC_TASKPRI, 0);
1419 vlapic_set_reg(vlapic, APIC_TMICT, 0);
1420 vlapic_set_reg(vlapic, APIC_TMCCT, 0);
1421 vlapic_set_tdcr(vlapic, 0);
1422
1423 vlapic_set_reg(vlapic, APIC_DFR, 0xffffffffU);
1424
1425 for ( i = 0; i < VLAPIC_LVT_NUM; i++ )
1426 vlapic_set_reg(vlapic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
1427
1428 vlapic_set_reg(vlapic, APIC_SPIV, 0xff);
1429 vlapic->hw.disabled |= VLAPIC_SW_DISABLED;
1430
1431 TRACE_0D(TRC_HVM_EMUL_LAPIC_STOP_TIMER);
1432 destroy_periodic_time(&vlapic->pt);
1433 }
1434
1435 /* Reset the VLAPIC back to its power-on/reset state. */
vlapic_reset(struct vlapic * vlapic)1436 void vlapic_reset(struct vlapic *vlapic)
1437 {
1438 const struct vcpu *v = vlapic_vcpu(vlapic);
1439
1440 if ( !has_vlapic(v->domain) )
1441 return;
1442
1443 vlapic->hw.apic_base_msr = APIC_BASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
1444 if ( v->vcpu_id == 0 )
1445 vlapic->hw.apic_base_msr |= APIC_BASE_BSP;
1446
1447 vlapic_set_reg(vlapic, APIC_ID, (v->vcpu_id * 2) << 24);
1448 vlapic_do_init(vlapic);
1449 }
1450
1451 /* rearm the actimer if needed, after a HVM restore */
lapic_rearm(struct vlapic * s)1452 static void lapic_rearm(struct vlapic *s)
1453 {
1454 unsigned long tmict;
1455 uint64_t period, tdt_msr;
1456
1457 s->pt.irq = vlapic_get_reg(s, APIC_LVTT) & APIC_VECTOR_MASK;
1458
1459 if ( vlapic_lvtt_tdt(s) )
1460 {
1461 if ( (tdt_msr = vlapic_tdt_msr_get(s)) != 0 )
1462 vlapic_tdt_msr_set(s, tdt_msr);
1463 return;
1464 }
1465
1466 if ( (tmict = vlapic_get_reg(s, APIC_TMICT)) == 0 )
1467 return;
1468
1469 period = ((uint64_t)APIC_BUS_CYCLE_NS *
1470 (uint32_t)tmict * s->hw.timer_divisor);
1471 TRACE_2_LONG_3D(TRC_HVM_EMUL_LAPIC_START_TIMER, TRC_PAR_LONG(period),
1472 TRC_PAR_LONG(vlapic_lvtt_period(s) ? period : 0LL), s->pt.irq);
1473 create_periodic_time(vlapic_vcpu(s), &s->pt, period,
1474 vlapic_lvtt_period(s) ? period : 0,
1475 s->pt.irq,
1476 vlapic_lvtt_period(s) ? vlapic_pt_cb : NULL,
1477 &s->timer_last_update, false);
1478 s->timer_last_update = s->pt.last_plt_gtime;
1479 }
1480
lapic_save_hidden(struct vcpu * v,hvm_domain_context_t * h)1481 static int lapic_save_hidden(struct vcpu *v, hvm_domain_context_t *h)
1482 {
1483 if ( !has_vlapic(v->domain) )
1484 return 0;
1485
1486 return hvm_save_entry(LAPIC, v->vcpu_id, h, &vcpu_vlapic(v)->hw);
1487 }
1488
lapic_save_regs(struct vcpu * v,hvm_domain_context_t * h)1489 static int lapic_save_regs(struct vcpu *v, hvm_domain_context_t *h)
1490 {
1491 if ( !has_vlapic(v->domain) )
1492 return 0;
1493
1494 vlapic_sync_pir_to_irr(v);
1495
1496 return hvm_save_entry(LAPIC_REGS, v->vcpu_id, h, vcpu_vlapic(v)->regs);
1497 }
1498
1499 /*
1500 * Following lapic_load_hidden()/lapic_load_regs() we may need to
1501 * correct ID and LDR when they come from an old, broken hypervisor.
1502 */
lapic_load_fixup(struct vlapic * vlapic)1503 static void lapic_load_fixup(struct vlapic *vlapic)
1504 {
1505 uint32_t id = vlapic->loaded.id;
1506
1507 if ( vlapic_x2apic_mode(vlapic) && id && vlapic->loaded.ldr == 1 )
1508 {
1509 /*
1510 * This is optional: ID != 0 contradicts LDR == 1. It's being added
1511 * to aid in eventual debugging of issues arising from the fixup done
1512 * here, but can be dropped as soon as it is found to conflict with
1513 * other (future) changes.
1514 */
1515 if ( GET_xAPIC_ID(id) != vlapic_vcpu(vlapic)->vcpu_id * 2 ||
1516 id != SET_xAPIC_ID(GET_xAPIC_ID(id)) )
1517 printk(XENLOG_G_WARNING "%pv: bogus APIC ID %#x loaded\n",
1518 vlapic_vcpu(vlapic), id);
1519 set_x2apic_id(vlapic);
1520 }
1521 else /* Undo an eventual earlier fixup. */
1522 {
1523 vlapic_set_reg(vlapic, APIC_ID, id);
1524 vlapic_set_reg(vlapic, APIC_LDR, vlapic->loaded.ldr);
1525 }
1526 }
1527
lapic_load_hidden(struct domain * d,hvm_domain_context_t * h)1528 static int lapic_load_hidden(struct domain *d, hvm_domain_context_t *h)
1529 {
1530 unsigned int vcpuid = hvm_load_instance(h);
1531 struct vcpu *v;
1532 struct vlapic *s;
1533
1534 if ( !has_vlapic(d) )
1535 return -ENODEV;
1536
1537 /* Which vlapic to load? */
1538 if ( vcpuid >= d->max_vcpus || (v = d->vcpu[vcpuid]) == NULL )
1539 {
1540 dprintk(XENLOG_G_ERR, "HVM restore: dom%d has no apic%u\n",
1541 d->domain_id, vcpuid);
1542 return -EINVAL;
1543 }
1544 s = vcpu_vlapic(v);
1545
1546 if ( hvm_load_entry_zeroextend(LAPIC, h, &s->hw) != 0 )
1547 return -EINVAL;
1548
1549 s->loaded.hw = 1;
1550 if ( s->loaded.regs )
1551 lapic_load_fixup(s);
1552
1553 if ( !(s->hw.apic_base_msr & APIC_BASE_ENABLE) &&
1554 unlikely(vlapic_x2apic_mode(s)) )
1555 return -EINVAL;
1556
1557 vmx_vlapic_msr_changed(v);
1558
1559 return 0;
1560 }
1561
lapic_load_regs(struct domain * d,hvm_domain_context_t * h)1562 static int lapic_load_regs(struct domain *d, hvm_domain_context_t *h)
1563 {
1564 unsigned int vcpuid = hvm_load_instance(h);
1565 struct vcpu *v;
1566 struct vlapic *s;
1567
1568 if ( !has_vlapic(d) )
1569 return -ENODEV;
1570
1571 /* Which vlapic to load? */
1572 if ( vcpuid >= d->max_vcpus || (v = d->vcpu[vcpuid]) == NULL )
1573 {
1574 dprintk(XENLOG_G_ERR, "HVM restore: dom%d has no apic%u\n",
1575 d->domain_id, vcpuid);
1576 return -EINVAL;
1577 }
1578 s = vcpu_vlapic(v);
1579
1580 if ( hvm_load_entry(LAPIC_REGS, h, s->regs) != 0 )
1581 return -EINVAL;
1582
1583 s->loaded.id = vlapic_get_reg(s, APIC_ID);
1584 s->loaded.ldr = vlapic_get_reg(s, APIC_LDR);
1585 s->loaded.regs = 1;
1586 if ( s->loaded.hw )
1587 lapic_load_fixup(s);
1588
1589 if ( hvm_funcs.process_isr )
1590 alternative_vcall(hvm_funcs.process_isr,
1591 vlapic_find_highest_isr(s), v);
1592
1593 vlapic_adjust_i8259_target(d);
1594 lapic_rearm(s);
1595 return 0;
1596 }
1597
1598 HVM_REGISTER_SAVE_RESTORE(LAPIC, lapic_save_hidden,
1599 lapic_load_hidden, 1, HVMSR_PER_VCPU);
1600 HVM_REGISTER_SAVE_RESTORE(LAPIC_REGS, lapic_save_regs,
1601 lapic_load_regs, 1, HVMSR_PER_VCPU);
1602
vlapic_init(struct vcpu * v)1603 int vlapic_init(struct vcpu *v)
1604 {
1605 struct vlapic *vlapic = vcpu_vlapic(v);
1606
1607 HVM_DBG_LOG(DBG_LEVEL_VLAPIC, "%d", v->vcpu_id);
1608
1609 if ( !has_vlapic(v->domain) )
1610 {
1611 vlapic->hw.disabled = VLAPIC_HW_DISABLED;
1612 return 0;
1613 }
1614
1615 vlapic->pt.source = PTSRC_lapic;
1616
1617 if (vlapic->regs_page == NULL)
1618 {
1619 vlapic->regs_page = alloc_domheap_page(v->domain, MEMF_no_owner);
1620 if ( vlapic->regs_page == NULL )
1621 {
1622 dprintk(XENLOG_ERR, "alloc vlapic regs error: %d/%d\n",
1623 v->domain->domain_id, v->vcpu_id);
1624 return -ENOMEM;
1625 }
1626 }
1627 if (vlapic->regs == NULL)
1628 {
1629 vlapic->regs = __map_domain_page_global(vlapic->regs_page);
1630 if ( vlapic->regs == NULL )
1631 {
1632 free_domheap_page(vlapic->regs_page);
1633 dprintk(XENLOG_ERR, "map vlapic regs error: %d/%d\n",
1634 v->domain->domain_id, v->vcpu_id);
1635 return -ENOMEM;
1636 }
1637 }
1638 clear_page(vlapic->regs);
1639
1640 vlapic_reset(vlapic);
1641
1642 spin_lock_init(&vlapic->esr_lock);
1643
1644 tasklet_init(&vlapic->init_sipi.tasklet, vlapic_init_sipi_action, v);
1645
1646 if ( v->vcpu_id == 0 )
1647 register_mmio_handler(v->domain, &vlapic_mmio_ops);
1648
1649 return 0;
1650 }
1651
vlapic_destroy(struct vcpu * v)1652 void vlapic_destroy(struct vcpu *v)
1653 {
1654 struct vlapic *vlapic = vcpu_vlapic(v);
1655
1656 if ( !has_vlapic(v->domain) )
1657 return;
1658
1659 tasklet_kill(&vlapic->init_sipi.tasklet);
1660 TRACE_0D(TRC_HVM_EMUL_LAPIC_STOP_TIMER);
1661 destroy_periodic_time(&vlapic->pt);
1662 unmap_domain_page_global(vlapic->regs);
1663 free_domheap_page(vlapic->regs_page);
1664 }
1665
1666 /*
1667 * Local variables:
1668 * mode: C
1669 * c-file-style: "BSD"
1670 * c-basic-offset: 4
1671 * indent-tabs-mode: nil
1672 * End:
1673 */
1674