1 /*
2 * based on linux-2.6.17.13/arch/i386/kernel/apic.c
3 *
4 * Local APIC handling, local APIC timers
5 *
6 * (c) 1999, 2000 Ingo Molnar <mingo@redhat.com>
7 *
8 * Fixes
9 * Maciej W. Rozycki : Bits for genuine 82489DX APICs;
10 * thanks to Eric Gilmore
11 * and Rolf G. Tews
12 * for testing these extensively.
13 * Maciej W. Rozycki : Various updates and fixes.
14 * Mikael Pettersson : Power Management for UP-APIC.
15 * Pavel Machek and
16 * Mikael Pettersson : PM converted to driver model.
17 */
18
19 #include <xen/perfc.h>
20 #include <xen/errno.h>
21 #include <xen/init.h>
22 #include <xen/mm.h>
23 #include <xen/param.h>
24 #include <xen/sched.h>
25 #include <xen/irq.h>
26 #include <xen/delay.h>
27 #include <xen/smp.h>
28 #include <xen/softirq.h>
29 #include <asm/mc146818rtc.h>
30 #include <asm/microcode.h>
31 #include <asm/msr.h>
32 #include <asm/atomic.h>
33 #include <asm/mpspec.h>
34 #include <asm/flushtlb.h>
35 #include <asm/hardirq.h>
36 #include <asm/apic.h>
37 #include <asm/io_apic.h>
38 #include <mach_apic.h>
39 #include <io_ports.h>
40 #include <irq_vectors.h>
41 #include <xen/kexec.h>
42 #include <asm/guest.h>
43 #include <asm/time.h>
44
45 static bool __read_mostly tdt_enabled;
46 static bool __initdata tdt_enable = true;
47 boolean_param("tdt", tdt_enable);
48
49 static bool __read_mostly iommu_x2apic_enabled;
50
51 static struct {
52 int active;
53 /* r/w apic fields */
54 unsigned int apic_id;
55 unsigned int apic_taskpri;
56 unsigned int apic_ldr;
57 unsigned int apic_dfr;
58 unsigned int apic_spiv;
59 unsigned int apic_lvtt;
60 unsigned int apic_lvtpc;
61 unsigned int apic_lvtcmci;
62 unsigned int apic_lvt0;
63 unsigned int apic_lvt1;
64 unsigned int apic_lvterr;
65 unsigned int apic_tmict;
66 unsigned int apic_tdcr;
67 unsigned int apic_thmr;
68 } apic_pm_state;
69
70 /*
71 * Knob to control our willingness to enable the local APIC.
72 */
73 static s8 __initdata enable_local_apic; /* -1=force-disable, +1=force-enable */
74
75 /*
76 * Debug level
77 */
78 u8 __read_mostly apic_verbosity;
79
80 static bool __initdata opt_x2apic = true;
81 boolean_param("x2apic", opt_x2apic);
82
83 /*
84 * Bootstrap processor local APIC boot mode - so we can undo our changes
85 * to the APIC state.
86 */
87 static enum apic_mode apic_boot_mode = APIC_MODE_INVALID;
88
89 bool __read_mostly x2apic_enabled;
90 bool __read_mostly directed_eoi_enabled;
91
modern_apic(void)92 static int modern_apic(void)
93 {
94 unsigned int lvr, version;
95 /* AMD systems use old APIC versions, so check the CPU */
96 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
97 boot_cpu_data.x86 >= 0xf)
98 return 1;
99
100 /* Hygon systems use modern APIC */
101 if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
102 return 1;
103
104 lvr = apic_read(APIC_LVR);
105 version = GET_APIC_VERSION(lvr);
106 return version >= 0x14;
107 }
108
109 /*
110 * 'what should we do if we get a hw irq event on an illegal vector'.
111 * each architecture has to answer this themselves.
112 */
ack_bad_irq(unsigned int irq)113 void ack_bad_irq(unsigned int irq)
114 {
115 printk("unexpected IRQ trap at irq %02x\n", irq);
116 /*
117 * Currently unexpected vectors happen only on SMP and APIC.
118 * We _must_ ack these because every local APIC has only N
119 * irq slots per priority level, and a 'hanging, unacked' IRQ
120 * holds up an irq slot - in excessive cases (when multiple
121 * unexpected vectors occur) that might lock up the APIC
122 * completely.
123 * But only ack when the APIC is enabled -AK
124 */
125 if (cpu_has_apic)
126 ack_APIC_irq();
127 }
128
apic_intr_init(void)129 void __init apic_intr_init(void)
130 {
131 smp_intr_init();
132
133 /* self generated IPI for local APIC timer */
134 set_direct_apic_vector(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
135
136 /* IPI vectors for APIC spurious and error interrupts */
137 set_direct_apic_vector(SPURIOUS_APIC_VECTOR, spurious_interrupt);
138 set_direct_apic_vector(ERROR_APIC_VECTOR, error_interrupt);
139
140 /* Performance Counters Interrupt */
141 set_direct_apic_vector(PMU_APIC_VECTOR, pmu_apic_interrupt);
142 }
143
144 /* Using APIC to generate smp_local_timer_interrupt? */
145 static bool __read_mostly using_apic_timer;
146
147 static bool __read_mostly enabled_via_apicbase;
148
get_physical_broadcast(void)149 int get_physical_broadcast(void)
150 {
151 if (modern_apic())
152 return 0xff;
153 else
154 return 0xf;
155 }
156
get_maxlvt(void)157 int get_maxlvt(void)
158 {
159 unsigned int v = apic_read(APIC_LVR);
160
161 return GET_APIC_MAXLVT(v);
162 }
163
clear_local_APIC(void)164 void clear_local_APIC(void)
165 {
166 int maxlvt;
167 unsigned long v;
168
169 maxlvt = get_maxlvt();
170
171 /* Work around AMD Erratum 411. This is a nice thing to do anyway. */
172 apic_write(APIC_TMICT, 0);
173
174 /*
175 * Masking an LVT entry on a P6 can trigger a local APIC error
176 * if the vector is zero. Mask LVTERR first to prevent this.
177 */
178 if (maxlvt >= 3) {
179 v = ERROR_APIC_VECTOR; /* any non-zero vector will do */
180 apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
181 }
182 /*
183 * Careful: we have to set masks only first to deassert
184 * any level-triggered sources.
185 */
186 v = apic_read(APIC_LVTT);
187 apic_write(APIC_LVTT, v | APIC_LVT_MASKED);
188 v = apic_read(APIC_LVT0);
189 apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
190 v = apic_read(APIC_LVT1);
191 apic_write(APIC_LVT1, v | APIC_LVT_MASKED);
192 if (maxlvt >= 4) {
193 v = apic_read(APIC_LVTPC);
194 apic_write(APIC_LVTPC, v | APIC_LVT_MASKED);
195 }
196 if (maxlvt >= 5) {
197 v = apic_read(APIC_LVTTHMR);
198 apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED);
199 }
200 if (maxlvt >= 6) {
201 v = apic_read(APIC_CMCI);
202 apic_write(APIC_CMCI, v | APIC_LVT_MASKED);
203 }
204
205 /*
206 * Clean APIC state for other OSs:
207 */
208 apic_write(APIC_LVTT, APIC_LVT_MASKED);
209 apic_write(APIC_LVT0, APIC_LVT_MASKED);
210 apic_write(APIC_LVT1, APIC_LVT_MASKED);
211 if (maxlvt >= 3)
212 apic_write(APIC_LVTERR, APIC_LVT_MASKED);
213 if (maxlvt >= 4)
214 apic_write(APIC_LVTPC, APIC_LVT_MASKED);
215 if (maxlvt >= 5)
216 apic_write(APIC_LVTTHMR, APIC_LVT_MASKED);
217 if (maxlvt >= 6)
218 apic_write(APIC_CMCI, APIC_LVT_MASKED);
219 if (!x2apic_enabled) {
220 v = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
221 apic_write(APIC_LDR, v);
222 }
223
224 if (maxlvt > 3) /* Due to Pentium errata 3AP and 11AP. */
225 apic_write(APIC_ESR, 0);
226 apic_read(APIC_ESR);
227 }
228
connect_bsp_APIC(void)229 void __init connect_bsp_APIC(void)
230 {
231 if (pic_mode) {
232 /*
233 * Do not trust the local APIC being empty at bootup.
234 */
235 clear_local_APIC();
236 /*
237 * PIC mode, enable APIC mode in the IMCR, i.e.
238 * connect BSP's local APIC to INT and NMI lines.
239 */
240 apic_printk(APIC_VERBOSE, "leaving PIC mode, "
241 "enabling APIC mode.\n");
242 outb(0x70, 0x22);
243 outb(0x01, 0x23);
244 }
245 enable_apic_mode();
246 }
247
disconnect_bsp_APIC(int virt_wire_setup)248 void disconnect_bsp_APIC(int virt_wire_setup)
249 {
250 if (pic_mode) {
251 /*
252 * Put the board back into PIC mode (has an effect
253 * only on certain older boards). Note that APIC
254 * interrupts, including IPIs, won't work beyond
255 * this point! The only exception are INIT IPIs.
256 */
257 apic_printk(APIC_VERBOSE, "disabling APIC mode, "
258 "entering PIC mode.\n");
259 outb(0x70, 0x22);
260 outb(0x00, 0x23);
261 }
262 else {
263 /* Go back to Virtual Wire compatibility mode */
264 unsigned long value;
265
266 clear_local_APIC();
267
268 /* For the spurious interrupt use vector F, and enable it */
269 value = apic_read(APIC_SPIV);
270 value &= ~APIC_VECTOR_MASK;
271 value |= APIC_SPIV_APIC_ENABLED;
272 value |= 0xf;
273 apic_write(APIC_SPIV, value);
274
275 if (!virt_wire_setup) {
276 /* For LVT0 make it edge triggered, active high, external and enabled */
277 value = apic_read(APIC_LVT0);
278 value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
279 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
280 APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED );
281 value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
282 value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
283 apic_write(APIC_LVT0, value);
284 }
285
286 /* For LVT1 make it edge triggered, active high, nmi and enabled */
287 value = apic_read(APIC_LVT1);
288 value &= ~(
289 APIC_MODE_MASK | APIC_SEND_PENDING |
290 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
291 APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
292 value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
293 value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
294 apic_write(APIC_LVT1, value);
295 }
296 }
297
disable_local_APIC(void)298 void disable_local_APIC(void)
299 {
300 clear_local_APIC();
301
302 /*
303 * Disable APIC (implies clearing of registers
304 * for 82489DX!).
305 */
306 apic_write(APIC_SPIV, apic_read(APIC_SPIV) & ~APIC_SPIV_APIC_ENABLED);
307
308 if (enabled_via_apicbase) {
309 uint64_t msr_content;
310 rdmsrl(MSR_APIC_BASE, msr_content);
311 wrmsrl(MSR_APIC_BASE, msr_content &
312 ~(APIC_BASE_ENABLE | APIC_BASE_EXTD));
313 }
314
315 if ( kexecing && (current_local_apic_mode() != apic_boot_mode) )
316 {
317 uint64_t msr_content;
318 rdmsrl(MSR_APIC_BASE, msr_content);
319 msr_content &= ~(APIC_BASE_ENABLE | APIC_BASE_EXTD);
320 wrmsrl(MSR_APIC_BASE, msr_content);
321
322 switch ( apic_boot_mode )
323 {
324 case APIC_MODE_DISABLED:
325 break; /* Nothing to do - we did this above */
326 case APIC_MODE_XAPIC:
327 msr_content |= APIC_BASE_ENABLE;
328 wrmsrl(MSR_APIC_BASE, msr_content);
329 break;
330 case APIC_MODE_X2APIC:
331 msr_content |= APIC_BASE_ENABLE;
332 wrmsrl(MSR_APIC_BASE, msr_content);
333 msr_content |= APIC_BASE_EXTD;
334 wrmsrl(MSR_APIC_BASE, msr_content);
335 break;
336 default:
337 printk("Default case when reverting #%d lapic to boot state\n",
338 smp_processor_id());
339 break;
340 }
341 }
342
343 }
344
345 /*
346 * This is to verify that we're looking at a real local APIC.
347 * Check these against your board if the CPUs aren't getting
348 * started for no apparent reason.
349 */
verify_local_APIC(void)350 int __init verify_local_APIC(void)
351 {
352 unsigned int reg0, reg1;
353
354 /*
355 * The version register is read-only in a real APIC.
356 */
357 reg0 = apic_read(APIC_LVR);
358 apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg0);
359
360 /* We don't try writing LVR in x2APIC mode since that incurs #GP. */
361 if ( !x2apic_enabled )
362 apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK);
363 reg1 = apic_read(APIC_LVR);
364 apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg1);
365
366 /*
367 * The two version reads above should print the same
368 * numbers. If the second one is different, then we
369 * poke at a non-APIC.
370 */
371 if (reg1 != reg0)
372 return 0;
373
374 /*
375 * Check if the version looks reasonably.
376 */
377 reg1 = GET_APIC_VERSION(reg0);
378 if (reg1 == 0x00 || reg1 == 0xff)
379 return 0;
380 reg1 = get_maxlvt();
381 if (reg1 < 0x02 || reg1 == 0xff)
382 return 0;
383
384 /*
385 * Detecting directed EOI on BSP:
386 * If having directed EOI support in lapic, force to use ioapic_ack_old,
387 * and enable the directed EOI for intr handling.
388 */
389 if ( reg0 & APIC_LVR_DIRECTED_EOI )
390 {
391 if ( ioapic_ack_new && ioapic_ack_forced )
392 printk("Not enabling directed EOI because ioapic_ack_new has been "
393 "forced on the command line\n");
394 else
395 {
396 ioapic_ack_new = false;
397 directed_eoi_enabled = true;
398 printk("Enabled directed EOI with ioapic_ack_old on!\n");
399 }
400 }
401
402 /*
403 * The ID register is read/write in a real APIC.
404 */
405 reg0 = apic_read(APIC_ID);
406 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
407
408 /*
409 * The next two are just to see if we have sane values.
410 * They're only really relevant if we're in Virtual Wire
411 * compatibility mode, but most boxes are anymore.
412 */
413 reg0 = apic_read(APIC_LVT0);
414 apic_printk(APIC_DEBUG, "Getting LVT0: %x\n", reg0);
415 reg1 = apic_read(APIC_LVT1);
416 apic_printk(APIC_DEBUG, "Getting LVT1: %x\n", reg1);
417
418 return 1;
419 }
420
sync_Arb_IDs(void)421 void __init sync_Arb_IDs(void)
422 {
423 /* Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1
424 And not needed on AMD */
425 if (modern_apic())
426 return;
427 /*
428 * Wait for idle.
429 */
430 apic_wait_icr_idle();
431
432 apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n");
433 apic_write(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | APIC_DM_INIT);
434 }
435
436 /*
437 * An initial setup of the virtual wire mode.
438 */
init_bsp_APIC(void)439 void __init init_bsp_APIC(void)
440 {
441 unsigned long value;
442
443 /*
444 * Don't do the setup now if we have a SMP BIOS as the
445 * through-I/O-APIC virtual wire mode might be active.
446 */
447 if (smp_found_config || !cpu_has_apic)
448 return;
449
450 /*
451 * Do not trust the local APIC being empty at bootup.
452 */
453 clear_local_APIC();
454
455 /*
456 * Enable APIC.
457 */
458 value = apic_read(APIC_SPIV);
459 value &= ~APIC_VECTOR_MASK;
460 value |= APIC_SPIV_APIC_ENABLED;
461
462 /* This bit is reserved on P4/Xeon and should be cleared */
463 if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 == 15))
464 value &= ~APIC_SPIV_FOCUS_DISABLED;
465 else
466 value |= APIC_SPIV_FOCUS_DISABLED;
467 value |= SPURIOUS_APIC_VECTOR;
468 apic_write(APIC_SPIV, value);
469
470 /*
471 * Set up the virtual wire mode.
472 */
473 apic_write(APIC_LVT0, APIC_DM_EXTINT);
474 apic_write(APIC_LVT1, APIC_DM_NMI);
475 }
476
apic_pm_activate(void)477 static void apic_pm_activate(void)
478 {
479 apic_pm_state.active = 1;
480 }
481
__enable_x2apic(void)482 static void __enable_x2apic(void)
483 {
484 uint64_t msr_content;
485
486 rdmsrl(MSR_APIC_BASE, msr_content);
487 if ( !(msr_content & APIC_BASE_EXTD) )
488 {
489 msr_content |= APIC_BASE_ENABLE | APIC_BASE_EXTD;
490 msr_content = (uint32_t)msr_content;
491 wrmsrl(MSR_APIC_BASE, msr_content);
492 }
493 }
494
resume_x2apic(void)495 static void resume_x2apic(void)
496 {
497 if ( iommu_x2apic_enabled )
498 iommu_enable_x2apic();
499 __enable_x2apic();
500 }
501
setup_local_APIC(bool bsp)502 void setup_local_APIC(bool bsp)
503 {
504 unsigned long oldvalue, value, maxlvt;
505 int i, j;
506
507 /* Pound the ESR really hard over the head with a big hammer - mbligh */
508 if (esr_disable) {
509 apic_write(APIC_ESR, 0);
510 apic_write(APIC_ESR, 0);
511 apic_write(APIC_ESR, 0);
512 apic_write(APIC_ESR, 0);
513 }
514
515 BUILD_BUG_ON((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f);
516
517 /*
518 * Double-check whether this APIC is really registered.
519 */
520 if (!apic_id_registered())
521 BUG();
522
523 /*
524 * Intel recommends to set DFR, LDR and TPR before enabling
525 * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
526 * document number 292116). So here it goes...
527 */
528 init_apic_ldr();
529
530 /*
531 * Set Task Priority to reject any interrupts below FIRST_IRQ_VECTOR.
532 */
533 apic_write(APIC_TASKPRI, (FIRST_IRQ_VECTOR & 0xF0) - 0x10);
534
535 /*
536 * After a crash, we no longer service the interrupts and a pending
537 * interrupt from previous kernel might still have ISR bit set.
538 *
539 * Most probably by now CPU has serviced that pending interrupt and
540 * it might not have done the ack_APIC_irq() because it thought,
541 * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it
542 * does not clear the ISR bit and cpu thinks it has already serivced
543 * the interrupt. Hence a vector might get locked. It was noticed
544 * for timer irq (vector 0x31). Issue an extra EOI to clear ISR.
545 */
546 for (i = APIC_ISR_NR - 1; i >= 0; i--) {
547 value = apic_read(APIC_ISR + i*0x10);
548 for (j = 31; j >= 0; j--) {
549 if (value & (1u << j))
550 ack_APIC_irq();
551 }
552 }
553
554 /*
555 * Now that we are all set up, enable the APIC
556 */
557 value = apic_read(APIC_SPIV);
558 value &= ~APIC_VECTOR_MASK;
559 /*
560 * Enable APIC
561 */
562 value |= APIC_SPIV_APIC_ENABLED;
563
564 /*
565 * Some unknown Intel IO/APIC (or APIC) errata is biting us with
566 * certain networking cards. If high frequency interrupts are
567 * happening on a particular IOAPIC pin, plus the IOAPIC routing
568 * entry is masked/unmasked at a high rate as well then sooner or
569 * later IOAPIC line gets 'stuck', no more interrupts are received
570 * from the device. If focus CPU is disabled then the hang goes
571 * away, oh well :-(
572 *
573 * [ This bug can be reproduced easily with a level-triggered
574 * PCI Ne2000 networking cards and PII/PIII processors, dual
575 * BX chipset. ]
576 */
577 /*
578 * Actually disabling the focus CPU check just makes the hang less
579 * frequent as it makes the interrupt distributon model be more
580 * like LRU than MRU (the short-term load is more even across CPUs).
581 * See also the comment in end_level_ioapic_irq(). --macro
582 */
583 #if 1
584 /* Enable focus processor (bit==0) */
585 value &= ~APIC_SPIV_FOCUS_DISABLED;
586 #else
587 /* Disable focus processor (bit==1) */
588 value |= APIC_SPIV_FOCUS_DISABLED;
589 #endif
590 /*
591 * Set spurious IRQ vector
592 */
593 value |= SPURIOUS_APIC_VECTOR;
594
595 /*
596 * Enable directed EOI
597 */
598 if ( directed_eoi_enabled )
599 {
600 value |= APIC_SPIV_DIRECTED_EOI;
601 if ( bsp )
602 apic_printk(APIC_VERBOSE, "Suppressing EOI broadcast\n");
603 }
604
605 apic_write(APIC_SPIV, value);
606
607 /*
608 * Set up LVT0, LVT1:
609 *
610 * set up through-local-APIC on the BP's LINT0. This is not
611 * strictly necessery in pure symmetric-IO mode, but sometimes
612 * we delegate interrupts to the 8259A.
613 */
614 /*
615 * TODO: set up through-local-APIC from through-I/O-APIC? --macro
616 */
617 value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
618 if (bsp && (pic_mode || !value)) {
619 value = APIC_DM_EXTINT;
620 apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n",
621 smp_processor_id());
622 } else {
623 value = APIC_DM_EXTINT | APIC_LVT_MASKED;
624 if (bsp)
625 apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n",
626 smp_processor_id());
627 }
628 apic_write(APIC_LVT0, value);
629
630 /*
631 * only the BP should see the LINT1 NMI signal, obviously.
632 */
633 if (bsp)
634 value = APIC_DM_NMI;
635 else
636 value = APIC_DM_NMI | APIC_LVT_MASKED;
637 apic_write(APIC_LVT1, value);
638
639 if (!esr_disable) {
640 maxlvt = get_maxlvt();
641 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
642 apic_write(APIC_ESR, 0);
643 oldvalue = apic_read(APIC_ESR);
644
645 value = ERROR_APIC_VECTOR; // enables sending errors
646 apic_write(APIC_LVTERR, value);
647 /*
648 * spec says clear errors after enabling vector.
649 */
650 if (maxlvt > 3)
651 apic_write(APIC_ESR, 0);
652 value = apic_read(APIC_ESR);
653 if (value != oldvalue)
654 apic_printk(APIC_VERBOSE, "ESR value before enabling "
655 "vector: %#lx after: %#lx\n",
656 oldvalue, value);
657 } else {
658 /*
659 * Something untraceble is creating bad interrupts on
660 * secondary quads ... for the moment, just leave the
661 * ESR disabled - we can't do anything useful with the
662 * errors anyway - mbligh
663 */
664 printk("Leaving ESR disabled.\n");
665 }
666
667 if (nmi_watchdog == NMI_LOCAL_APIC && !bsp)
668 setup_apic_nmi_watchdog();
669 apic_pm_activate();
670 }
671
lapic_suspend(void)672 int lapic_suspend(void)
673 {
674 unsigned long flags;
675 int maxlvt = get_maxlvt();
676 if (!apic_pm_state.active)
677 return 0;
678
679 apic_pm_state.apic_id = apic_read(APIC_ID);
680 apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
681 apic_pm_state.apic_ldr = apic_read(APIC_LDR);
682 apic_pm_state.apic_dfr = apic_read(APIC_DFR);
683 apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
684 apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
685 if (maxlvt >= 4)
686 apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
687
688 if (maxlvt >= 6) {
689 apic_pm_state.apic_lvtcmci = apic_read(APIC_CMCI);
690 }
691
692 apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
693 apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
694 apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
695 apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
696 apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
697 if (maxlvt >= 5)
698 apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
699
700 local_irq_save(flags);
701 disable_local_APIC();
702 if ( iommu_x2apic_enabled )
703 iommu_disable_x2apic();
704 local_irq_restore(flags);
705 return 0;
706 }
707
lapic_resume(void)708 int lapic_resume(void)
709 {
710 uint64_t msr_content;
711 unsigned long flags;
712 int maxlvt;
713
714 if (!apic_pm_state.active)
715 return 0;
716
717 local_irq_save(flags);
718
719 /*
720 * Make sure the APICBASE points to the right address
721 *
722 * FIXME! This will be wrong if we ever support suspend on
723 * SMP! We'll need to do this as part of the CPU restore!
724 */
725 if ( !x2apic_enabled )
726 {
727 rdmsrl(MSR_APIC_BASE, msr_content);
728 msr_content &= ~APIC_BASE_ADDR_MASK;
729 wrmsrl(MSR_APIC_BASE,
730 msr_content | APIC_BASE_ENABLE | mp_lapic_addr);
731 }
732 else
733 resume_x2apic();
734
735 maxlvt = get_maxlvt();
736 apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
737 apic_write(APIC_ID, apic_pm_state.apic_id);
738 apic_write(APIC_DFR, apic_pm_state.apic_dfr);
739 apic_write(APIC_LDR, apic_pm_state.apic_ldr);
740 apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri);
741 apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
742 apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
743 apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
744 if (maxlvt >= 5)
745 apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
746
747 if (maxlvt >= 6) {
748 apic_write(APIC_CMCI, apic_pm_state.apic_lvtcmci);
749 }
750
751 if (maxlvt >= 4)
752 apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
753 apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
754 apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
755 apic_write(APIC_TMICT, apic_pm_state.apic_tmict);
756 apic_write(APIC_ESR, 0);
757 apic_read(APIC_ESR);
758 apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
759 apic_write(APIC_ESR, 0);
760 apic_read(APIC_ESR);
761 local_irq_restore(flags);
762 return 0;
763 }
764
765
766 /*
767 * Detect and enable local APICs on non-SMP boards.
768 * Original code written by Keir Fraser.
769 */
770
lapic_disable(const char * str)771 static int __init lapic_disable(const char *str)
772 {
773 enable_local_apic = -1;
774 setup_clear_cpu_cap(X86_FEATURE_APIC);
775 return 0;
776 }
777 custom_param("nolapic", lapic_disable);
778 boolean_param("lapic", enable_local_apic);
779
apic_set_verbosity(const char * str)780 static int __init apic_set_verbosity(const char *str)
781 {
782 if (strcmp("debug", str) == 0)
783 apic_verbosity = APIC_DEBUG;
784 else if (strcmp("verbose", str) == 0)
785 apic_verbosity = APIC_VERBOSE;
786 else
787 return -EINVAL;
788
789 return 0;
790 }
791 custom_param("apic_verbosity", apic_set_verbosity);
792
detect_init_APIC(void)793 static int __init detect_init_APIC (void)
794 {
795 uint64_t msr_content;
796
797 /* Disabled by kernel option? */
798 if (enable_local_apic < 0)
799 return -1;
800
801 if ( rdmsr_safe(MSR_APIC_BASE, msr_content) )
802 {
803 printk("No local APIC present\n");
804 return -1;
805 }
806
807 if (!cpu_has_apic) {
808 /*
809 * Over-ride BIOS and try to enable the local
810 * APIC only if "lapic" specified.
811 */
812 if (enable_local_apic <= 0) {
813 printk("Local APIC disabled by BIOS -- "
814 "you can enable it with \"lapic\"\n");
815 return -1;
816 }
817 /*
818 * Some BIOSes disable the local APIC in the
819 * APIC_BASE MSR. This can only be done in
820 * software for Intel P6 or later and AMD K7
821 * (Model > 1) or later.
822 */
823 if ( !(msr_content & APIC_BASE_ENABLE) )
824 {
825 printk("Local APIC disabled by BIOS -- reenabling.\n");
826 msr_content &= ~APIC_BASE_ADDR_MASK;
827 msr_content |= APIC_BASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
828 wrmsrl(MSR_APIC_BASE, msr_content);
829 enabled_via_apicbase = true;
830 }
831 }
832 /*
833 * The APIC feature bit should now be enabled
834 * in `cpuid'
835 */
836 if (!(cpuid_edx(1) & cpufeat_mask(X86_FEATURE_APIC))) {
837 printk("Could not enable APIC!\n");
838 return -1;
839 }
840
841 setup_force_cpu_cap(X86_FEATURE_APIC);
842 mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
843
844 /* The BIOS may have set up the APIC at some other address */
845 if ( msr_content & APIC_BASE_ENABLE )
846 mp_lapic_addr = msr_content & APIC_BASE_ADDR_MASK;
847
848 if (nmi_watchdog != NMI_NONE)
849 nmi_watchdog = NMI_LOCAL_APIC;
850
851 printk("Found and enabled local APIC!\n");
852
853 apic_pm_activate();
854
855 return 0;
856 }
857
x2apic_ap_setup(void)858 void x2apic_ap_setup(void)
859 {
860 if ( x2apic_enabled )
861 __enable_x2apic();
862 }
863
x2apic_bsp_setup(void)864 void __init x2apic_bsp_setup(void)
865 {
866 struct IO_APIC_route_entry **ioapic_entries = NULL;
867 const char *orig_name;
868
869 if ( !cpu_has_x2apic )
870 return;
871
872 if ( !opt_x2apic )
873 {
874 if ( !x2apic_enabled )
875 {
876 printk("Not enabling x2APIC: disabled by cmdline.\n");
877 return;
878 }
879 printk("x2APIC: Already enabled by BIOS: Ignoring cmdline disable.\n");
880 }
881
882 if ( iommu_supports_x2apic() )
883 {
884 if ( (ioapic_entries = alloc_ioapic_entries()) == NULL )
885 {
886 printk("Allocate ioapic_entries failed\n");
887 goto out;
888 }
889
890 if ( save_IO_APIC_setup(ioapic_entries) )
891 {
892 printk("Saving IO-APIC state failed\n");
893 goto out;
894 }
895
896 mask_8259A();
897 mask_IO_APIC_setup(ioapic_entries);
898
899 switch ( iommu_enable_x2apic() )
900 {
901 case 0:
902 iommu_x2apic_enabled = true;
903 break;
904
905 case -ENXIO: /* ACPI_DMAR_X2APIC_OPT_OUT set */
906 if ( x2apic_enabled )
907 panic("IOMMU requests xAPIC mode, but x2APIC already enabled by firmware\n");
908
909 printk("Not enabling x2APIC (upon firmware request)\n");
910 iommu_x2apic_enabled = false;
911 goto restore_out;
912
913 default:
914 printk(XENLOG_ERR "Failed to enable Interrupt Remapping\n");
915 iommu_x2apic_enabled = false;
916 break;
917 }
918
919 if ( iommu_x2apic_enabled )
920 force_iommu = 1;
921 }
922
923 if ( !x2apic_enabled )
924 {
925 x2apic_enabled = true;
926 __enable_x2apic();
927 }
928
929 orig_name = genapic.name;
930 genapic = *apic_x2apic_probe();
931 if ( genapic.name != orig_name )
932 printk("Switched to APIC driver %s\n", genapic.name);
933
934 restore_out:
935 /* iommu_x2apic_enabled cannot be used here in the error case. */
936 if ( iommu_supports_x2apic() )
937 {
938 /*
939 * NB: do not use raw mode when restoring entries if the iommu has
940 * been enabled during the process, because the entries need to be
941 * translated and added to the remapping table in that case.
942 */
943 restore_IO_APIC_setup(ioapic_entries, !iommu_x2apic_enabled);
944 unmask_8259A();
945 }
946
947 out:
948 if ( ioapic_entries )
949 free_ioapic_entries(ioapic_entries);
950 }
951
init_apic_mappings(void)952 void __init init_apic_mappings(void)
953 {
954 unsigned long apic_phys;
955
956 if ( x2apic_enabled )
957 goto __next;
958 /*
959 * If no local APIC can be found then set up a fake all
960 * zeroes page to simulate the local APIC and another
961 * one for the IO-APIC.
962 */
963 if (!smp_found_config && detect_init_APIC()) {
964 apic_phys = __pa(alloc_xenheap_page());
965 clear_page(__va(apic_phys));
966 } else
967 apic_phys = mp_lapic_addr;
968
969 set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
970 apic_printk(APIC_VERBOSE, "mapped APIC to %08Lx (%08lx)\n", APIC_BASE,
971 apic_phys);
972
973 __next:
974 /*
975 * Fetch the APIC ID of the BSP in case we have a
976 * default configuration (or the MP table is broken).
977 */
978 if (boot_cpu_physical_apicid == -1U)
979 boot_cpu_physical_apicid = get_apic_id();
980 x86_cpu_to_apicid[0] = get_apic_id();
981
982 ioapic_init();
983 }
984
985 /*****************************************************************************
986 * APIC calibration
987 *
988 * The APIC is programmed in bus cycles.
989 * Timeout values should specified in real time units.
990 * The "cheapest" time source is the cyclecounter.
991 *
992 * Thus, we need a mappings from: bus cycles <- cycle counter <- system time
993 *
994 * The calibration is currently a bit shoddy since it requires the external
995 * timer chip to generate periodic timer interupts.
996 *****************************************************************************/
997
998 /* used for system time scaling */
999 static u32 __read_mostly bus_scale; /* scaling factor: ns -> bus cycles */
1000
1001 /*
1002 * The timer chip is already set up at HZ interrupts per second here,
1003 * but we do not accept timer interrupts yet. We only allow the BP
1004 * to calibrate.
1005 */
get_8254_timer_count(void)1006 static unsigned int __init get_8254_timer_count(void)
1007 {
1008 /*extern spinlock_t i8253_lock;*/
1009 /*unsigned long flags;*/
1010
1011 unsigned int count;
1012
1013 /*spin_lock_irqsave(&i8253_lock, flags);*/
1014
1015 outb_p(0x00, PIT_MODE);
1016 count = inb_p(PIT_CH0);
1017 count |= inb_p(PIT_CH0) << 8;
1018
1019 /*spin_unlock_irqrestore(&i8253_lock, flags);*/
1020
1021 return count;
1022 }
1023
1024 /* next tick in 8254 can be caught by catching timer wraparound */
wait_8254_wraparound(void)1025 static void __init wait_8254_wraparound(void)
1026 {
1027 unsigned int curr_count, prev_count;
1028
1029 curr_count = get_8254_timer_count();
1030 do {
1031 prev_count = curr_count;
1032 curr_count = get_8254_timer_count();
1033
1034 /* workaround for broken Mercury/Neptune */
1035 if (prev_count >= curr_count + 0x100)
1036 curr_count = get_8254_timer_count();
1037
1038 } while (prev_count >= curr_count);
1039 }
1040
1041 /*
1042 * This function sets up the local APIC timer, with a timeout of
1043 * 'clocks' APIC bus clock. During calibration we actually call
1044 * this function twice on the boot CPU, once with a bogus timeout
1045 * value, second time for real. The other (noncalibrating) CPUs
1046 * call this function only once, with the real, calibrated value.
1047 *
1048 * We do reads before writes even if unnecessary, to get around the
1049 * P5 APIC double write bug.
1050 */
1051
1052 #define APIC_DIVISOR 1
1053
__setup_APIC_LVTT(unsigned int clocks)1054 static void __setup_APIC_LVTT(unsigned int clocks)
1055 {
1056 unsigned int lvtt_value, tmp_value;
1057
1058 /* NB. Xen uses local APIC timer in one-shot mode. */
1059 lvtt_value = /*APIC_TIMER_MODE_PERIODIC |*/ LOCAL_TIMER_VECTOR;
1060
1061 if ( tdt_enabled )
1062 {
1063 lvtt_value &= (~APIC_TIMER_MODE_MASK);
1064 lvtt_value |= APIC_TIMER_MODE_TSC_DEADLINE;
1065 }
1066
1067 apic_write(APIC_LVTT, lvtt_value);
1068
1069 /*
1070 * See Intel SDM: TSC-Deadline Mode chapter. In xAPIC mode,
1071 * writing to the APIC LVTT and TSC_DEADLINE MSR isn't serialized.
1072 * According to Intel, MFENCE can do the serialization here.
1073 */
1074 asm volatile( "mfence" : : : "memory" );
1075
1076 tmp_value = apic_read(APIC_TDCR);
1077 apic_write(APIC_TDCR, tmp_value | APIC_TDR_DIV_1);
1078
1079 apic_write(APIC_TMICT, clocks / APIC_DIVISOR);
1080 }
1081
setup_APIC_timer(void)1082 static void setup_APIC_timer(void)
1083 {
1084 unsigned long flags;
1085 local_irq_save(flags);
1086 __setup_APIC_LVTT(0);
1087 local_irq_restore(flags);
1088 }
1089
1090 #define DEADLINE_MODEL_MATCH(m, fr) \
1091 { .vendor = X86_VENDOR_INTEL, .family = 6, .model = (m), \
1092 .feature = X86_FEATURE_TSC_DEADLINE, \
1093 .driver_data = (void *)(unsigned long)(fr) }
1094
hsx_deadline_rev(void)1095 static unsigned int __init hsx_deadline_rev(void)
1096 {
1097 switch ( boot_cpu_data.x86_mask )
1098 {
1099 case 0x02: return 0x3a; /* EP */
1100 case 0x04: return 0x0f; /* EX */
1101 }
1102
1103 return ~0U;
1104 }
1105
bdx_deadline_rev(void)1106 static unsigned int __init bdx_deadline_rev(void)
1107 {
1108 switch ( boot_cpu_data.x86_mask )
1109 {
1110 case 0x02: return 0x00000011;
1111 case 0x03: return 0x0700000e;
1112 case 0x04: return 0x0f00000c;
1113 case 0x05: return 0x0e000003;
1114 }
1115
1116 return ~0U;
1117 }
1118
skx_deadline_rev(void)1119 static unsigned int __init skx_deadline_rev(void)
1120 {
1121 switch ( boot_cpu_data.x86_mask )
1122 {
1123 case 0x00 ... 0x02: return ~0U;
1124 case 0x03: return 0x01000136;
1125 case 0x04: return 0x02000014;
1126 }
1127
1128 return 0;
1129 }
1130
1131 static const struct x86_cpu_id __initconstrel deadline_match[] = {
1132 DEADLINE_MODEL_MATCH(0x3c, 0x22), /* Haswell */
1133 DEADLINE_MODEL_MATCH(0x3f, hsx_deadline_rev), /* Haswell EP/EX */
1134 DEADLINE_MODEL_MATCH(0x45, 0x20), /* Haswell D */
1135 DEADLINE_MODEL_MATCH(0x46, 0x17), /* Haswell H */
1136
1137 DEADLINE_MODEL_MATCH(0x3d, 0x25), /* Broadwell */
1138 DEADLINE_MODEL_MATCH(0x47, 0x17), /* Broadwell H */
1139 DEADLINE_MODEL_MATCH(0x4f, 0x0b000020), /* Broadwell EP/EX */
1140 DEADLINE_MODEL_MATCH(0x56, bdx_deadline_rev), /* Broadwell D */
1141
1142 DEADLINE_MODEL_MATCH(0x4e, 0xb2), /* Skylake M */
1143 DEADLINE_MODEL_MATCH(0x55, skx_deadline_rev), /* Skylake X */
1144 DEADLINE_MODEL_MATCH(0x5e, 0xb2), /* Skylake D */
1145
1146 DEADLINE_MODEL_MATCH(0x8e, 0x52), /* Kabylake M */
1147 DEADLINE_MODEL_MATCH(0x9e, 0x52), /* Kabylake D */
1148
1149 {}
1150 };
1151
check_deadline_errata(void)1152 static void __init check_deadline_errata(void)
1153 {
1154 const struct x86_cpu_id *m;
1155 unsigned int rev;
1156
1157 if ( cpu_has_hypervisor )
1158 return;
1159
1160 m = x86_match_cpu(deadline_match);
1161 if ( !m )
1162 return;
1163
1164 /*
1165 * Function pointers will have the MSB set due to address layout,
1166 * immediate revisions will not.
1167 */
1168 if ( (long)m->driver_data < 0 )
1169 rev = ((unsigned int (*)(void))(m->driver_data))();
1170 else
1171 rev = (unsigned long)m->driver_data;
1172
1173 if ( this_cpu(cpu_sig).rev >= rev )
1174 return;
1175
1176 setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE);
1177 printk(XENLOG_WARNING "TSC_DEADLINE disabled due to Errata; "
1178 "please update microcode to version %#x (or later)\n", rev);
1179 }
1180
wait_tick_pvh(void)1181 static void wait_tick_pvh(void)
1182 {
1183 u64 lapse_ns = 1000000000ULL / HZ;
1184 s_time_t start, curr_time;
1185
1186 start = NOW();
1187
1188 /* Won't wrap around */
1189 do {
1190 cpu_relax();
1191 curr_time = NOW();
1192 } while ( curr_time - start < lapse_ns );
1193 }
1194
1195 /*
1196 * In this function we calibrate APIC bus clocks to the external
1197 * timer. Unfortunately we cannot use jiffies and the timer irq
1198 * to calibrate, since some later bootup code depends on getting
1199 * the first irq? Ugh.
1200 *
1201 * We want to do the calibration only once since we
1202 * want to have local timer irqs syncron. CPUs connected
1203 * by the same APIC bus have the very same bus frequency.
1204 * And we want to have irqs off anyways, no accidental
1205 * APIC irq that way.
1206 */
1207
calibrate_APIC_clock(void)1208 static void __init calibrate_APIC_clock(void)
1209 {
1210 unsigned long long t1, t2;
1211 unsigned long tt1, tt2;
1212 unsigned int i;
1213 unsigned long bus_freq; /* KAF: pointer-size avoids compile warns. */
1214 unsigned int bus_cycle; /* length of one bus cycle in pico-seconds */
1215 #define LOOPS_FRAC 10U /* measure for one tenth of a second */
1216
1217 apic_printk(APIC_VERBOSE, "calibrating APIC timer ...\n");
1218
1219 /*
1220 * Setup the APIC counter to maximum. There is no way the lapic
1221 * can underflow in the 100ms detection time frame.
1222 */
1223 __setup_APIC_LVTT(0xffffffff);
1224
1225 if ( !xen_guest )
1226 /*
1227 * The timer chip counts down to zero. Let's wait
1228 * for a wraparound to start exact measurement:
1229 * (the current tick might have been already half done)
1230 */
1231 wait_8254_wraparound();
1232 else
1233 wait_tick_pvh();
1234
1235 /*
1236 * We wrapped around just now. Let's start:
1237 */
1238 t1 = rdtsc_ordered();
1239 tt1 = apic_read(APIC_TMCCT);
1240
1241 /*
1242 * Let's wait HZ / LOOPS_FRAC ticks:
1243 */
1244 for (i = 0; i < HZ / LOOPS_FRAC; i++)
1245 if ( !xen_guest )
1246 wait_8254_wraparound();
1247 else
1248 wait_tick_pvh();
1249
1250 tt2 = apic_read(APIC_TMCCT);
1251 t2 = rdtsc_ordered();
1252
1253 bus_freq = (tt1 - tt2) * APIC_DIVISOR * LOOPS_FRAC;
1254
1255 apic_printk(APIC_VERBOSE, "..... CPU clock speed is %lu.%04lu MHz.\n",
1256 ((unsigned long)(t2 - t1) * LOOPS_FRAC) / 1000000,
1257 (((unsigned long)(t2 - t1) * LOOPS_FRAC) / 100) % 10000);
1258
1259 apic_printk(APIC_VERBOSE, "..... host bus clock speed is %ld.%04ld MHz.\n",
1260 bus_freq / 1000000, (bus_freq / 100) % 10000);
1261
1262 /* set up multipliers for accurate timer code */
1263 bus_cycle = 1000000000000UL / bus_freq; /* in pico seconds */
1264 bus_cycle += (1000000000000UL % bus_freq) * 2 > bus_freq;
1265 bus_scale = (1000*262144)/bus_cycle;
1266 bus_scale += ((1000 * 262144) % bus_cycle) * 2 > bus_cycle;
1267
1268 apic_printk(APIC_VERBOSE, "..... bus_scale = %#x\n", bus_scale);
1269 /* reset APIC to zero timeout value */
1270 __setup_APIC_LVTT(0);
1271
1272 #undef LOOPS_FRAC
1273 }
1274
setup_boot_APIC_clock(void)1275 void __init setup_boot_APIC_clock(void)
1276 {
1277 unsigned long flags;
1278 apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n");
1279 using_apic_timer = true;
1280
1281 check_deadline_errata();
1282
1283 local_irq_save(flags);
1284
1285 calibrate_APIC_clock();
1286
1287 if ( tdt_enable && boot_cpu_has(X86_FEATURE_TSC_DEADLINE) )
1288 {
1289 printk(KERN_DEBUG "TSC deadline timer enabled\n");
1290 tdt_enabled = true;
1291 }
1292
1293 setup_APIC_timer();
1294
1295 local_irq_restore(flags);
1296 }
1297
setup_secondary_APIC_clock(void)1298 void setup_secondary_APIC_clock(void)
1299 {
1300 setup_APIC_timer();
1301 }
1302
disable_APIC_timer(void)1303 void disable_APIC_timer(void)
1304 {
1305 if (using_apic_timer) {
1306 unsigned long v;
1307
1308 /* Work around AMD Erratum 411. This is a nice thing to do anyway. */
1309 apic_write(APIC_TMICT, 0);
1310
1311 v = apic_read(APIC_LVTT);
1312 apic_write(APIC_LVTT, v | APIC_LVT_MASKED);
1313 }
1314 }
1315
enable_APIC_timer(void)1316 void enable_APIC_timer(void)
1317 {
1318 if (using_apic_timer) {
1319 unsigned long v;
1320
1321 v = apic_read(APIC_LVTT);
1322 apic_write(APIC_LVTT, v & ~APIC_LVT_MASKED);
1323 }
1324 }
1325
1326 #undef APIC_DIVISOR
1327
1328 /*
1329 * reprogram_timer: Reprogram the APIC timer.
1330 * Timeout is a Xen system time (nanoseconds since boot); 0 disables the timer.
1331 * Returns 1 on success; 0 if the timeout is too soon or is in the past.
1332 */
reprogram_timer(s_time_t timeout)1333 int reprogram_timer(s_time_t timeout)
1334 {
1335 s_time_t expire;
1336 u32 apic_tmict = 0;
1337
1338 /* No local APIC: timer list is polled via the PIT interrupt. */
1339 if ( !cpu_has_apic )
1340 return 1;
1341
1342 if ( tdt_enabled )
1343 {
1344 wrmsrl(MSR_IA32_TSC_DEADLINE, timeout ? stime2tsc(timeout) : 0);
1345 return 1;
1346 }
1347
1348 if ( timeout && ((expire = timeout - NOW()) > 0) )
1349 apic_tmict = min_t(u64, (bus_scale * expire) >> 18, UINT_MAX);
1350
1351 apic_write(APIC_TMICT, (unsigned long)apic_tmict);
1352
1353 return apic_tmict || !timeout;
1354 }
1355
apic_timer_interrupt(struct cpu_user_regs * regs)1356 void apic_timer_interrupt(struct cpu_user_regs * regs)
1357 {
1358 ack_APIC_irq();
1359 perfc_incr(apic_timer);
1360 raise_softirq(TIMER_SOFTIRQ);
1361 }
1362
1363 static DEFINE_PER_CPU(bool, state_dump_pending);
1364
smp_send_state_dump(unsigned int cpu)1365 void smp_send_state_dump(unsigned int cpu)
1366 {
1367 /* We overload the spurious interrupt handler to handle the dump. */
1368 per_cpu(state_dump_pending, cpu) = true;
1369 send_IPI_mask(cpumask_of(cpu), SPURIOUS_APIC_VECTOR);
1370 }
1371
1372 /*
1373 * Spurious interrupts should _never_ happen with our APIC/SMP architecture.
1374 */
spurious_interrupt(struct cpu_user_regs * regs)1375 void spurious_interrupt(struct cpu_user_regs *regs)
1376 {
1377 /*
1378 * Check if this is a vectored interrupt (most likely, as this is probably
1379 * a request to dump local CPU state). Vectored interrupts are ACKed;
1380 * spurious interrupts are not.
1381 */
1382 if (apic_isr_read(SPURIOUS_APIC_VECTOR)) {
1383 ack_APIC_irq();
1384 if (this_cpu(state_dump_pending)) {
1385 this_cpu(state_dump_pending) = false;
1386 dump_execstate(regs);
1387 return;
1388 }
1389 }
1390
1391 /* see sw-dev-man vol 3, chapter 7.4.13.5 */
1392 printk(KERN_INFO "spurious APIC interrupt on CPU#%d, should "
1393 "never happen.\n", smp_processor_id());
1394 }
1395
1396 /*
1397 * This interrupt should never happen with our APIC/SMP architecture
1398 */
1399
error_interrupt(struct cpu_user_regs * regs)1400 void error_interrupt(struct cpu_user_regs *regs)
1401 {
1402 static const char *const esr_fields[] = {
1403 "Send CS error",
1404 "Receive CS error",
1405 "Send accept error",
1406 "Receive accept error",
1407 "Redirectable IPI",
1408 "Send illegal vector",
1409 "Received illegal vector",
1410 "Illegal register address",
1411 };
1412 unsigned int v, v1;
1413 int i;
1414
1415 /* First tickle the hardware, only then report what went on. -- REW */
1416 v = apic_read(APIC_ESR);
1417 apic_write(APIC_ESR, 0);
1418 v1 = apic_read(APIC_ESR);
1419 ack_APIC_irq();
1420
1421 printk(XENLOG_DEBUG "APIC error on CPU%u: %02x(%02x)",
1422 smp_processor_id(), v , v1);
1423 for ( i = 7; i >= 0; --i )
1424 if ( v1 & (1 << i) )
1425 printk(", %s", esr_fields[i]);
1426 printk("\n");
1427 }
1428
1429 /*
1430 * This interrupt handles performance counters interrupt
1431 */
1432
pmu_apic_interrupt(struct cpu_user_regs * regs)1433 void pmu_apic_interrupt(struct cpu_user_regs *regs)
1434 {
1435 ack_APIC_irq();
1436 vpmu_do_interrupt(regs);
1437 }
1438
1439 /*
1440 * This initializes the IO-APIC and APIC hardware if this is
1441 * a UP kernel.
1442 */
APIC_init_uniprocessor(void)1443 int __init APIC_init_uniprocessor (void)
1444 {
1445 if (enable_local_apic < 0)
1446 setup_clear_cpu_cap(X86_FEATURE_APIC);
1447
1448 if (!smp_found_config && !cpu_has_apic) {
1449 skip_ioapic_setup = true;
1450 return -1;
1451 }
1452
1453 /*
1454 * Complain if the BIOS pretends there is one.
1455 */
1456 if (!cpu_has_apic) {
1457 printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
1458 boot_cpu_physical_apicid);
1459 skip_ioapic_setup = true;
1460 return -1;
1461 }
1462
1463 verify_local_APIC();
1464
1465 connect_bsp_APIC();
1466
1467 /*
1468 * Hack: In case of kdump, after a crash, kernel might be booting
1469 * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid
1470 * might be zero if read from MP tables. Get it from LAPIC.
1471 */
1472 #ifdef CONFIG_CRASH_DUMP
1473 boot_cpu_physical_apicid = get_apic_id();
1474 #endif
1475 physids_clear(phys_cpu_present_map);
1476 physid_set(boot_cpu_physical_apicid, phys_cpu_present_map);
1477
1478 setup_local_APIC(true);
1479
1480 if (nmi_watchdog == NMI_LOCAL_APIC)
1481 check_nmi_watchdog();
1482
1483 if (smp_found_config)
1484 if (!skip_ioapic_setup && nr_ioapics)
1485 setup_IO_APIC();
1486
1487 setup_boot_APIC_clock();
1488
1489 return 0;
1490 }
1491
apic_mode_to_str(const enum apic_mode mode)1492 static const char * __init apic_mode_to_str(const enum apic_mode mode)
1493 {
1494 switch ( mode )
1495 {
1496 case APIC_MODE_INVALID:
1497 return "invalid";
1498 case APIC_MODE_DISABLED:
1499 return "disabled";
1500 case APIC_MODE_XAPIC:
1501 return "xapic";
1502 case APIC_MODE_X2APIC:
1503 return "x2apic";
1504 default:
1505 return "unrecognised";
1506 }
1507 }
1508
1509 /* Needs to be called during startup. It records the state the BIOS
1510 * leaves the local APIC so we can undo upon kexec.
1511 */
record_boot_APIC_mode(void)1512 void __init record_boot_APIC_mode(void)
1513 {
1514 /* Sanity check - we should only ever run once, but could possibly
1515 * be called several times */
1516 if ( APIC_MODE_INVALID != apic_boot_mode )
1517 return;
1518
1519 apic_boot_mode = current_local_apic_mode();
1520
1521 apic_printk(APIC_DEBUG, "APIC boot state is '%s'\n",
1522 apic_mode_to_str(apic_boot_mode));
1523 }
1524
1525 /* Look at the bits in MSR_APIC_BASE and work out which APIC mode we are in */
current_local_apic_mode(void)1526 enum apic_mode current_local_apic_mode(void)
1527 {
1528 u64 msr_contents;
1529
1530 rdmsrl(MSR_APIC_BASE, msr_contents);
1531
1532 /* Reading EXTD bit from the MSR is only valid if CPUID
1533 * says so, else reserved */
1534 if ( boot_cpu_has(X86_FEATURE_X2APIC) && (msr_contents & APIC_BASE_EXTD) )
1535 return APIC_MODE_X2APIC;
1536
1537 /* EN bit should always be valid as long as we can read the MSR
1538 */
1539 if ( msr_contents & APIC_BASE_ENABLE )
1540 return APIC_MODE_XAPIC;
1541
1542 return APIC_MODE_DISABLED;
1543 }
1544
1545
check_for_unexpected_msi(unsigned int vector)1546 void check_for_unexpected_msi(unsigned int vector)
1547 {
1548 BUG_ON(apic_isr_read(vector));
1549 }
1550