1 /*
2  *      based on linux-2.6.17.13/arch/i386/kernel/apic.c
3  *
4  *  Local APIC handling, local APIC timers
5  *
6  *  (c) 1999, 2000 Ingo Molnar <mingo@redhat.com>
7  *
8  *  Fixes
9  *  Maciej W. Rozycki   :   Bits for genuine 82489DX APICs;
10  *                  thanks to Eric Gilmore
11  *                  and Rolf G. Tews
12  *                  for testing these extensively.
13  *    Maciej W. Rozycki :   Various updates and fixes.
14  *    Mikael Pettersson :   Power Management for UP-APIC.
15  *    Pavel Machek and
16  *    Mikael Pettersson    :    PM converted to driver model.
17  */
18 
19 #include <xen/perfc.h>
20 #include <xen/errno.h>
21 #include <xen/init.h>
22 #include <xen/mm.h>
23 #include <xen/param.h>
24 #include <xen/sched.h>
25 #include <xen/irq.h>
26 #include <xen/delay.h>
27 #include <xen/smp.h>
28 #include <xen/softirq.h>
29 #include <asm/mc146818rtc.h>
30 #include <asm/microcode.h>
31 #include <asm/msr.h>
32 #include <asm/atomic.h>
33 #include <asm/mpspec.h>
34 #include <asm/flushtlb.h>
35 #include <asm/hardirq.h>
36 #include <asm/apic.h>
37 #include <asm/io_apic.h>
38 #include <mach_apic.h>
39 #include <io_ports.h>
40 #include <irq_vectors.h>
41 #include <xen/kexec.h>
42 #include <asm/guest.h>
43 #include <asm/time.h>
44 
45 static bool __read_mostly tdt_enabled;
46 static bool __initdata tdt_enable = true;
47 boolean_param("tdt", tdt_enable);
48 
49 static bool __read_mostly iommu_x2apic_enabled;
50 
51 static struct {
52     int active;
53     /* r/w apic fields */
54     unsigned int apic_id;
55     unsigned int apic_taskpri;
56     unsigned int apic_ldr;
57     unsigned int apic_dfr;
58     unsigned int apic_spiv;
59     unsigned int apic_lvtt;
60     unsigned int apic_lvtpc;
61     unsigned int apic_lvtcmci;
62     unsigned int apic_lvt0;
63     unsigned int apic_lvt1;
64     unsigned int apic_lvterr;
65     unsigned int apic_tmict;
66     unsigned int apic_tdcr;
67     unsigned int apic_thmr;
68 } apic_pm_state;
69 
70 /*
71  * Knob to control our willingness to enable the local APIC.
72  */
73 static s8 __initdata enable_local_apic; /* -1=force-disable, +1=force-enable */
74 
75 /*
76  * Debug level
77  */
78 u8 __read_mostly apic_verbosity;
79 
80 static bool __initdata opt_x2apic = true;
81 boolean_param("x2apic", opt_x2apic);
82 
83 /*
84  * Bootstrap processor local APIC boot mode - so we can undo our changes
85  * to the APIC state.
86  */
87 static enum apic_mode apic_boot_mode = APIC_MODE_INVALID;
88 
89 bool __read_mostly x2apic_enabled;
90 bool __read_mostly directed_eoi_enabled;
91 
modern_apic(void)92 static int modern_apic(void)
93 {
94     unsigned int lvr, version;
95     /* AMD systems use old APIC versions, so check the CPU */
96     if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
97         boot_cpu_data.x86 >= 0xf)
98         return 1;
99 
100     /* Hygon systems use modern APIC */
101     if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
102         return 1;
103 
104     lvr = apic_read(APIC_LVR);
105     version = GET_APIC_VERSION(lvr);
106     return version >= 0x14;
107 }
108 
109 /*
110  * 'what should we do if we get a hw irq event on an illegal vector'.
111  * each architecture has to answer this themselves.
112  */
ack_bad_irq(unsigned int irq)113 void ack_bad_irq(unsigned int irq)
114 {
115     printk("unexpected IRQ trap at irq %02x\n", irq);
116     /*
117      * Currently unexpected vectors happen only on SMP and APIC.
118      * We _must_ ack these because every local APIC has only N
119      * irq slots per priority level, and a 'hanging, unacked' IRQ
120      * holds up an irq slot - in excessive cases (when multiple
121      * unexpected vectors occur) that might lock up the APIC
122      * completely.
123      * But only ack when the APIC is enabled -AK
124      */
125     if (cpu_has_apic)
126         ack_APIC_irq();
127 }
128 
apic_intr_init(void)129 void __init apic_intr_init(void)
130 {
131     smp_intr_init();
132 
133     /* self generated IPI for local APIC timer */
134     set_direct_apic_vector(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
135 
136     /* IPI vectors for APIC spurious and error interrupts */
137     set_direct_apic_vector(SPURIOUS_APIC_VECTOR, spurious_interrupt);
138     set_direct_apic_vector(ERROR_APIC_VECTOR, error_interrupt);
139 
140     /* Performance Counters Interrupt */
141     set_direct_apic_vector(PMU_APIC_VECTOR, pmu_apic_interrupt);
142 }
143 
144 /* Using APIC to generate smp_local_timer_interrupt? */
145 static bool __read_mostly using_apic_timer;
146 
147 static bool __read_mostly enabled_via_apicbase;
148 
get_physical_broadcast(void)149 int get_physical_broadcast(void)
150 {
151     if (modern_apic())
152         return 0xff;
153     else
154         return 0xf;
155 }
156 
get_maxlvt(void)157 int get_maxlvt(void)
158 {
159     unsigned int v = apic_read(APIC_LVR);
160 
161     return GET_APIC_MAXLVT(v);
162 }
163 
clear_local_APIC(void)164 void clear_local_APIC(void)
165 {
166     int maxlvt;
167     unsigned long v;
168 
169     maxlvt = get_maxlvt();
170 
171     /* Work around AMD Erratum 411. This is a nice thing to do anyway. */
172     apic_write(APIC_TMICT, 0);
173 
174     /*
175      * Masking an LVT entry on a P6 can trigger a local APIC error
176      * if the vector is zero. Mask LVTERR first to prevent this.
177      */
178     if (maxlvt >= 3) {
179         v = ERROR_APIC_VECTOR; /* any non-zero vector will do */
180         apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
181     }
182     /*
183      * Careful: we have to set masks only first to deassert
184      * any level-triggered sources.
185      */
186     v = apic_read(APIC_LVTT);
187     apic_write(APIC_LVTT, v | APIC_LVT_MASKED);
188     v = apic_read(APIC_LVT0);
189     apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
190     v = apic_read(APIC_LVT1);
191     apic_write(APIC_LVT1, v | APIC_LVT_MASKED);
192     if (maxlvt >= 4) {
193         v = apic_read(APIC_LVTPC);
194         apic_write(APIC_LVTPC, v | APIC_LVT_MASKED);
195     }
196     if (maxlvt >= 5) {
197         v = apic_read(APIC_LVTTHMR);
198         apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED);
199     }
200     if (maxlvt >= 6) {
201         v = apic_read(APIC_CMCI);
202         apic_write(APIC_CMCI, v | APIC_LVT_MASKED);
203     }
204 
205     /*
206      * Clean APIC state for other OSs:
207      */
208     apic_write(APIC_LVTT, APIC_LVT_MASKED);
209     apic_write(APIC_LVT0, APIC_LVT_MASKED);
210     apic_write(APIC_LVT1, APIC_LVT_MASKED);
211     if (maxlvt >= 3)
212         apic_write(APIC_LVTERR, APIC_LVT_MASKED);
213     if (maxlvt >= 4)
214         apic_write(APIC_LVTPC, APIC_LVT_MASKED);
215     if (maxlvt >= 5)
216         apic_write(APIC_LVTTHMR, APIC_LVT_MASKED);
217     if (maxlvt >= 6)
218         apic_write(APIC_CMCI, APIC_LVT_MASKED);
219     if (!x2apic_enabled) {
220         v = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
221         apic_write(APIC_LDR, v);
222     }
223 
224     if (maxlvt > 3)        /* Due to Pentium errata 3AP and 11AP. */
225         apic_write(APIC_ESR, 0);
226     apic_read(APIC_ESR);
227 }
228 
connect_bsp_APIC(void)229 void __init connect_bsp_APIC(void)
230 {
231     if (pic_mode) {
232         /*
233          * Do not trust the local APIC being empty at bootup.
234          */
235         clear_local_APIC();
236         /*
237          * PIC mode, enable APIC mode in the IMCR, i.e.
238          * connect BSP's local APIC to INT and NMI lines.
239          */
240         apic_printk(APIC_VERBOSE, "leaving PIC mode, "
241                     "enabling APIC mode.\n");
242         outb(0x70, 0x22);
243         outb(0x01, 0x23);
244     }
245     enable_apic_mode();
246 }
247 
disconnect_bsp_APIC(int virt_wire_setup)248 void disconnect_bsp_APIC(int virt_wire_setup)
249 {
250     if (pic_mode) {
251         /*
252          * Put the board back into PIC mode (has an effect
253          * only on certain older boards).  Note that APIC
254          * interrupts, including IPIs, won't work beyond
255          * this point!  The only exception are INIT IPIs.
256          */
257         apic_printk(APIC_VERBOSE, "disabling APIC mode, "
258                     "entering PIC mode.\n");
259         outb(0x70, 0x22);
260         outb(0x00, 0x23);
261     }
262     else {
263         /* Go back to Virtual Wire compatibility mode */
264         unsigned long value;
265 
266         clear_local_APIC();
267 
268         /* For the spurious interrupt use vector F, and enable it */
269         value = apic_read(APIC_SPIV);
270         value &= ~APIC_VECTOR_MASK;
271         value |= APIC_SPIV_APIC_ENABLED;
272         value |= 0xf;
273         apic_write(APIC_SPIV, value);
274 
275         if (!virt_wire_setup) {
276             /* For LVT0 make it edge triggered, active high, external and enabled */
277             value = apic_read(APIC_LVT0);
278             value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
279                        APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
280                        APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED );
281             value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
282             value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
283             apic_write(APIC_LVT0, value);
284         }
285 
286         /* For LVT1 make it edge triggered, active high, nmi and enabled */
287         value = apic_read(APIC_LVT1);
288         value &= ~(
289             APIC_MODE_MASK | APIC_SEND_PENDING |
290             APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
291             APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
292         value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
293         value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
294         apic_write(APIC_LVT1, value);
295     }
296 }
297 
disable_local_APIC(void)298 void disable_local_APIC(void)
299 {
300     clear_local_APIC();
301 
302     /*
303      * Disable APIC (implies clearing of registers
304      * for 82489DX!).
305      */
306     apic_write(APIC_SPIV, apic_read(APIC_SPIV) & ~APIC_SPIV_APIC_ENABLED);
307 
308     if (enabled_via_apicbase) {
309         uint64_t msr_content;
310         rdmsrl(MSR_APIC_BASE, msr_content);
311         wrmsrl(MSR_APIC_BASE, msr_content &
312                ~(APIC_BASE_ENABLE | APIC_BASE_EXTD));
313     }
314 
315     if ( kexecing && (current_local_apic_mode() != apic_boot_mode) )
316     {
317         uint64_t msr_content;
318         rdmsrl(MSR_APIC_BASE, msr_content);
319         msr_content &= ~(APIC_BASE_ENABLE | APIC_BASE_EXTD);
320         wrmsrl(MSR_APIC_BASE, msr_content);
321 
322         switch ( apic_boot_mode )
323         {
324         case APIC_MODE_DISABLED:
325             break; /* Nothing to do - we did this above */
326         case APIC_MODE_XAPIC:
327             msr_content |= APIC_BASE_ENABLE;
328             wrmsrl(MSR_APIC_BASE, msr_content);
329             break;
330         case APIC_MODE_X2APIC:
331             msr_content |= APIC_BASE_ENABLE;
332             wrmsrl(MSR_APIC_BASE, msr_content);
333             msr_content |= APIC_BASE_EXTD;
334             wrmsrl(MSR_APIC_BASE, msr_content);
335             break;
336         default:
337             printk("Default case when reverting #%d lapic to boot state\n",
338                    smp_processor_id());
339             break;
340         }
341     }
342 
343 }
344 
345 /*
346  * This is to verify that we're looking at a real local APIC.
347  * Check these against your board if the CPUs aren't getting
348  * started for no apparent reason.
349  */
verify_local_APIC(void)350 int __init verify_local_APIC(void)
351 {
352     unsigned int reg0, reg1;
353 
354     /*
355      * The version register is read-only in a real APIC.
356      */
357     reg0 = apic_read(APIC_LVR);
358     apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg0);
359 
360     /* We don't try writing LVR in x2APIC mode since that incurs #GP. */
361     if ( !x2apic_enabled )
362         apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK);
363     reg1 = apic_read(APIC_LVR);
364     apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg1);
365 
366     /*
367      * The two version reads above should print the same
368      * numbers.  If the second one is different, then we
369      * poke at a non-APIC.
370      */
371     if (reg1 != reg0)
372         return 0;
373 
374     /*
375      * Check if the version looks reasonably.
376      */
377     reg1 = GET_APIC_VERSION(reg0);
378     if (reg1 == 0x00 || reg1 == 0xff)
379         return 0;
380     reg1 = get_maxlvt();
381     if (reg1 < 0x02 || reg1 == 0xff)
382         return 0;
383 
384     /*
385      * Detecting directed EOI on BSP:
386      * If having directed EOI support in lapic, force to use ioapic_ack_old,
387      * and enable the directed EOI for intr handling.
388      */
389     if ( reg0 & APIC_LVR_DIRECTED_EOI )
390     {
391         if ( ioapic_ack_new && ioapic_ack_forced )
392             printk("Not enabling directed EOI because ioapic_ack_new has been "
393                    "forced on the command line\n");
394         else
395         {
396             ioapic_ack_new = false;
397             directed_eoi_enabled = true;
398             printk("Enabled directed EOI with ioapic_ack_old on!\n");
399         }
400     }
401 
402     /*
403      * The ID register is read/write in a real APIC.
404      */
405     reg0 = apic_read(APIC_ID);
406     apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
407 
408     /*
409      * The next two are just to see if we have sane values.
410      * They're only really relevant if we're in Virtual Wire
411      * compatibility mode, but most boxes are anymore.
412      */
413     reg0 = apic_read(APIC_LVT0);
414     apic_printk(APIC_DEBUG, "Getting LVT0: %x\n", reg0);
415     reg1 = apic_read(APIC_LVT1);
416     apic_printk(APIC_DEBUG, "Getting LVT1: %x\n", reg1);
417 
418     return 1;
419 }
420 
sync_Arb_IDs(void)421 void __init sync_Arb_IDs(void)
422 {
423     /* Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1
424        And not needed on AMD */
425     if (modern_apic())
426         return;
427     /*
428      * Wait for idle.
429      */
430     apic_wait_icr_idle();
431 
432     apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n");
433     apic_write(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | APIC_DM_INIT);
434 }
435 
436 /*
437  * An initial setup of the virtual wire mode.
438  */
init_bsp_APIC(void)439 void __init init_bsp_APIC(void)
440 {
441     unsigned long value;
442 
443     /*
444      * Don't do the setup now if we have a SMP BIOS as the
445      * through-I/O-APIC virtual wire mode might be active.
446      */
447     if (smp_found_config || !cpu_has_apic)
448         return;
449 
450     /*
451      * Do not trust the local APIC being empty at bootup.
452      */
453     clear_local_APIC();
454 
455     /*
456      * Enable APIC.
457      */
458     value = apic_read(APIC_SPIV);
459     value &= ~APIC_VECTOR_MASK;
460     value |= APIC_SPIV_APIC_ENABLED;
461 
462     /* This bit is reserved on P4/Xeon and should be cleared */
463     if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 == 15))
464         value &= ~APIC_SPIV_FOCUS_DISABLED;
465     else
466         value |= APIC_SPIV_FOCUS_DISABLED;
467     value |= SPURIOUS_APIC_VECTOR;
468     apic_write(APIC_SPIV, value);
469 
470     /*
471      * Set up the virtual wire mode.
472      */
473     apic_write(APIC_LVT0, APIC_DM_EXTINT);
474     apic_write(APIC_LVT1, APIC_DM_NMI);
475 }
476 
apic_pm_activate(void)477 static void apic_pm_activate(void)
478 {
479     apic_pm_state.active = 1;
480 }
481 
__enable_x2apic(void)482 static void __enable_x2apic(void)
483 {
484     uint64_t msr_content;
485 
486     rdmsrl(MSR_APIC_BASE, msr_content);
487     if ( !(msr_content & APIC_BASE_EXTD) )
488     {
489         msr_content |= APIC_BASE_ENABLE | APIC_BASE_EXTD;
490         msr_content = (uint32_t)msr_content;
491         wrmsrl(MSR_APIC_BASE, msr_content);
492     }
493 }
494 
resume_x2apic(void)495 static void resume_x2apic(void)
496 {
497     if ( iommu_x2apic_enabled )
498         iommu_enable_x2apic();
499     __enable_x2apic();
500 }
501 
setup_local_APIC(bool bsp)502 void setup_local_APIC(bool bsp)
503 {
504     unsigned long oldvalue, value, maxlvt;
505     int i, j;
506 
507     /* Pound the ESR really hard over the head with a big hammer - mbligh */
508     if (esr_disable) {
509         apic_write(APIC_ESR, 0);
510         apic_write(APIC_ESR, 0);
511         apic_write(APIC_ESR, 0);
512         apic_write(APIC_ESR, 0);
513     }
514 
515     BUILD_BUG_ON((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f);
516 
517     /*
518      * Double-check whether this APIC is really registered.
519      */
520     if (!apic_id_registered())
521         BUG();
522 
523     /*
524      * Intel recommends to set DFR, LDR and TPR before enabling
525      * an APIC.  See e.g. "AP-388 82489DX User's Manual" (Intel
526      * document number 292116).  So here it goes...
527      */
528     init_apic_ldr();
529 
530     /*
531      * Set Task Priority to reject any interrupts below FIRST_IRQ_VECTOR.
532      */
533     apic_write(APIC_TASKPRI, (FIRST_IRQ_VECTOR & 0xF0) - 0x10);
534 
535     /*
536      * After a crash, we no longer service the interrupts and a pending
537      * interrupt from previous kernel might still have ISR bit set.
538      *
539      * Most probably by now CPU has serviced that pending interrupt and
540      * it might not have done the ack_APIC_irq() because it thought,
541      * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it
542      * does not clear the ISR bit and cpu thinks it has already serivced
543      * the interrupt. Hence a vector might get locked. It was noticed
544      * for timer irq (vector 0x31). Issue an extra EOI to clear ISR.
545      */
546     for (i = APIC_ISR_NR - 1; i >= 0; i--) {
547         value = apic_read(APIC_ISR + i*0x10);
548         for (j = 31; j >= 0; j--) {
549             if (value & (1u << j))
550                 ack_APIC_irq();
551         }
552     }
553 
554     /*
555      * Now that we are all set up, enable the APIC
556      */
557     value = apic_read(APIC_SPIV);
558     value &= ~APIC_VECTOR_MASK;
559     /*
560      * Enable APIC
561      */
562     value |= APIC_SPIV_APIC_ENABLED;
563 
564     /*
565      * Some unknown Intel IO/APIC (or APIC) errata is biting us with
566      * certain networking cards. If high frequency interrupts are
567      * happening on a particular IOAPIC pin, plus the IOAPIC routing
568      * entry is masked/unmasked at a high rate as well then sooner or
569      * later IOAPIC line gets 'stuck', no more interrupts are received
570      * from the device. If focus CPU is disabled then the hang goes
571      * away, oh well :-(
572      *
573      * [ This bug can be reproduced easily with a level-triggered
574      *   PCI Ne2000 networking cards and PII/PIII processors, dual
575      *   BX chipset. ]
576      */
577     /*
578      * Actually disabling the focus CPU check just makes the hang less
579      * frequent as it makes the interrupt distributon model be more
580      * like LRU than MRU (the short-term load is more even across CPUs).
581      * See also the comment in end_level_ioapic_irq().  --macro
582      */
583 #if 1
584     /* Enable focus processor (bit==0) */
585     value &= ~APIC_SPIV_FOCUS_DISABLED;
586 #else
587     /* Disable focus processor (bit==1) */
588     value |= APIC_SPIV_FOCUS_DISABLED;
589 #endif
590     /*
591      * Set spurious IRQ vector
592      */
593     value |= SPURIOUS_APIC_VECTOR;
594 
595     /*
596      * Enable directed EOI
597      */
598     if ( directed_eoi_enabled )
599     {
600         value |= APIC_SPIV_DIRECTED_EOI;
601         if ( bsp )
602             apic_printk(APIC_VERBOSE, "Suppressing EOI broadcast\n");
603     }
604 
605     apic_write(APIC_SPIV, value);
606 
607     /*
608      * Set up LVT0, LVT1:
609      *
610      * set up through-local-APIC on the BP's LINT0. This is not
611      * strictly necessery in pure symmetric-IO mode, but sometimes
612      * we delegate interrupts to the 8259A.
613      */
614     /*
615      * TODO: set up through-local-APIC from through-I/O-APIC? --macro
616      */
617     value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
618     if (bsp && (pic_mode || !value)) {
619         value = APIC_DM_EXTINT;
620         apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n",
621                     smp_processor_id());
622     } else {
623         value = APIC_DM_EXTINT | APIC_LVT_MASKED;
624         if (bsp)
625             apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n",
626                         smp_processor_id());
627     }
628     apic_write(APIC_LVT0, value);
629 
630     /*
631      * only the BP should see the LINT1 NMI signal, obviously.
632      */
633     if (bsp)
634         value = APIC_DM_NMI;
635     else
636         value = APIC_DM_NMI | APIC_LVT_MASKED;
637     apic_write(APIC_LVT1, value);
638 
639     if (!esr_disable) {
640         maxlvt = get_maxlvt();
641         if (maxlvt > 3)     /* Due to the Pentium erratum 3AP. */
642             apic_write(APIC_ESR, 0);
643         oldvalue = apic_read(APIC_ESR);
644 
645         value = ERROR_APIC_VECTOR;      // enables sending errors
646         apic_write(APIC_LVTERR, value);
647         /*
648          * spec says clear errors after enabling vector.
649          */
650         if (maxlvt > 3)
651             apic_write(APIC_ESR, 0);
652         value = apic_read(APIC_ESR);
653         if (value != oldvalue)
654             apic_printk(APIC_VERBOSE, "ESR value before enabling "
655                         "vector: %#lx  after: %#lx\n",
656                         oldvalue, value);
657     } else {
658         /*
659          * Something untraceble is creating bad interrupts on
660          * secondary quads ... for the moment, just leave the
661          * ESR disabled - we can't do anything useful with the
662          * errors anyway - mbligh
663          */
664         printk("Leaving ESR disabled.\n");
665     }
666 
667     if (nmi_watchdog == NMI_LOCAL_APIC && !bsp)
668         setup_apic_nmi_watchdog();
669     apic_pm_activate();
670 }
671 
lapic_suspend(void)672 int lapic_suspend(void)
673 {
674     unsigned long flags;
675     int maxlvt = get_maxlvt();
676     if (!apic_pm_state.active)
677         return 0;
678 
679     apic_pm_state.apic_id = apic_read(APIC_ID);
680     apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
681     apic_pm_state.apic_ldr = apic_read(APIC_LDR);
682     apic_pm_state.apic_dfr = apic_read(APIC_DFR);
683     apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
684     apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
685     if (maxlvt >= 4)
686         apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
687 
688     if (maxlvt >= 6) {
689         apic_pm_state.apic_lvtcmci = apic_read(APIC_CMCI);
690     }
691 
692     apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
693     apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
694     apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
695     apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
696     apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
697     if (maxlvt >= 5)
698         apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
699 
700     local_irq_save(flags);
701     disable_local_APIC();
702     if ( iommu_x2apic_enabled )
703         iommu_disable_x2apic();
704     local_irq_restore(flags);
705     return 0;
706 }
707 
lapic_resume(void)708 int lapic_resume(void)
709 {
710     uint64_t msr_content;
711     unsigned long flags;
712     int maxlvt;
713 
714     if (!apic_pm_state.active)
715         return 0;
716 
717     local_irq_save(flags);
718 
719     /*
720      * Make sure the APICBASE points to the right address
721      *
722      * FIXME! This will be wrong if we ever support suspend on
723      * SMP! We'll need to do this as part of the CPU restore!
724      */
725     if ( !x2apic_enabled )
726     {
727         rdmsrl(MSR_APIC_BASE, msr_content);
728         msr_content &= ~APIC_BASE_ADDR_MASK;
729         wrmsrl(MSR_APIC_BASE,
730                msr_content | APIC_BASE_ENABLE | mp_lapic_addr);
731     }
732     else
733         resume_x2apic();
734 
735     maxlvt = get_maxlvt();
736     apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
737     apic_write(APIC_ID, apic_pm_state.apic_id);
738     apic_write(APIC_DFR, apic_pm_state.apic_dfr);
739     apic_write(APIC_LDR, apic_pm_state.apic_ldr);
740     apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri);
741     apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
742     apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
743     apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
744     if (maxlvt >= 5)
745         apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
746 
747     if (maxlvt >= 6) {
748         apic_write(APIC_CMCI, apic_pm_state.apic_lvtcmci);
749     }
750 
751     if (maxlvt >= 4)
752         apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
753     apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
754     apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
755     apic_write(APIC_TMICT, apic_pm_state.apic_tmict);
756     apic_write(APIC_ESR, 0);
757     apic_read(APIC_ESR);
758     apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
759     apic_write(APIC_ESR, 0);
760     apic_read(APIC_ESR);
761     local_irq_restore(flags);
762     return 0;
763 }
764 
765 
766 /*
767  * Detect and enable local APICs on non-SMP boards.
768  * Original code written by Keir Fraser.
769  */
770 
lapic_disable(const char * str)771 static int __init lapic_disable(const char *str)
772 {
773     enable_local_apic = -1;
774     setup_clear_cpu_cap(X86_FEATURE_APIC);
775     return 0;
776 }
777 custom_param("nolapic", lapic_disable);
778 boolean_param("lapic", enable_local_apic);
779 
apic_set_verbosity(const char * str)780 static int __init apic_set_verbosity(const char *str)
781 {
782     if (strcmp("debug", str) == 0)
783         apic_verbosity = APIC_DEBUG;
784     else if (strcmp("verbose", str) == 0)
785         apic_verbosity = APIC_VERBOSE;
786     else
787         return -EINVAL;
788 
789     return 0;
790 }
791 custom_param("apic_verbosity", apic_set_verbosity);
792 
detect_init_APIC(void)793 static int __init detect_init_APIC (void)
794 {
795     uint64_t msr_content;
796 
797     /* Disabled by kernel option? */
798     if (enable_local_apic < 0)
799         return -1;
800 
801     if ( rdmsr_safe(MSR_APIC_BASE, msr_content) )
802     {
803         printk("No local APIC present\n");
804         return -1;
805     }
806 
807     if (!cpu_has_apic) {
808         /*
809          * Over-ride BIOS and try to enable the local
810          * APIC only if "lapic" specified.
811          */
812         if (enable_local_apic <= 0) {
813             printk("Local APIC disabled by BIOS -- "
814                    "you can enable it with \"lapic\"\n");
815             return -1;
816         }
817         /*
818          * Some BIOSes disable the local APIC in the
819          * APIC_BASE MSR. This can only be done in
820          * software for Intel P6 or later and AMD K7
821          * (Model > 1) or later.
822          */
823         if ( !(msr_content & APIC_BASE_ENABLE) )
824         {
825             printk("Local APIC disabled by BIOS -- reenabling.\n");
826             msr_content &= ~APIC_BASE_ADDR_MASK;
827             msr_content |= APIC_BASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
828             wrmsrl(MSR_APIC_BASE, msr_content);
829             enabled_via_apicbase = true;
830         }
831     }
832     /*
833      * The APIC feature bit should now be enabled
834      * in `cpuid'
835      */
836     if (!(cpuid_edx(1) & cpufeat_mask(X86_FEATURE_APIC))) {
837         printk("Could not enable APIC!\n");
838         return -1;
839     }
840 
841     setup_force_cpu_cap(X86_FEATURE_APIC);
842     mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
843 
844     /* The BIOS may have set up the APIC at some other address */
845     if ( msr_content & APIC_BASE_ENABLE )
846         mp_lapic_addr = msr_content & APIC_BASE_ADDR_MASK;
847 
848     if (nmi_watchdog != NMI_NONE)
849         nmi_watchdog = NMI_LOCAL_APIC;
850 
851     printk("Found and enabled local APIC!\n");
852 
853     apic_pm_activate();
854 
855     return 0;
856 }
857 
x2apic_ap_setup(void)858 void x2apic_ap_setup(void)
859 {
860     if ( x2apic_enabled )
861         __enable_x2apic();
862 }
863 
x2apic_bsp_setup(void)864 void __init x2apic_bsp_setup(void)
865 {
866     struct IO_APIC_route_entry **ioapic_entries = NULL;
867     const char *orig_name;
868 
869     if ( !cpu_has_x2apic )
870         return;
871 
872     if ( !opt_x2apic )
873     {
874         if ( !x2apic_enabled )
875         {
876             printk("Not enabling x2APIC: disabled by cmdline.\n");
877             return;
878         }
879         printk("x2APIC: Already enabled by BIOS: Ignoring cmdline disable.\n");
880     }
881 
882     if ( iommu_supports_x2apic() )
883     {
884         if ( (ioapic_entries = alloc_ioapic_entries()) == NULL )
885         {
886             printk("Allocate ioapic_entries failed\n");
887             goto out;
888         }
889 
890         if ( save_IO_APIC_setup(ioapic_entries) )
891         {
892             printk("Saving IO-APIC state failed\n");
893             goto out;
894         }
895 
896         mask_8259A();
897         mask_IO_APIC_setup(ioapic_entries);
898 
899         switch ( iommu_enable_x2apic() )
900         {
901         case 0:
902             iommu_x2apic_enabled = true;
903             break;
904 
905         case -ENXIO: /* ACPI_DMAR_X2APIC_OPT_OUT set */
906             if ( x2apic_enabled )
907                 panic("IOMMU requests xAPIC mode, but x2APIC already enabled by firmware\n");
908 
909             printk("Not enabling x2APIC (upon firmware request)\n");
910             iommu_x2apic_enabled = false;
911             goto restore_out;
912 
913         default:
914             printk(XENLOG_ERR "Failed to enable Interrupt Remapping\n");
915             iommu_x2apic_enabled = false;
916             break;
917         }
918 
919         if ( iommu_x2apic_enabled )
920             force_iommu = 1;
921     }
922 
923     if ( !x2apic_enabled )
924     {
925         x2apic_enabled = true;
926         __enable_x2apic();
927     }
928 
929     orig_name = genapic.name;
930     genapic = *apic_x2apic_probe();
931     if ( genapic.name != orig_name )
932         printk("Switched to APIC driver %s\n", genapic.name);
933 
934 restore_out:
935     /* iommu_x2apic_enabled cannot be used here in the error case. */
936     if ( iommu_supports_x2apic() )
937     {
938         /*
939          * NB: do not use raw mode when restoring entries if the iommu has
940          * been enabled during the process, because the entries need to be
941          * translated and added to the remapping table in that case.
942          */
943         restore_IO_APIC_setup(ioapic_entries, !iommu_x2apic_enabled);
944         unmask_8259A();
945     }
946 
947 out:
948     if ( ioapic_entries )
949         free_ioapic_entries(ioapic_entries);
950 }
951 
init_apic_mappings(void)952 void __init init_apic_mappings(void)
953 {
954     unsigned long apic_phys;
955 
956     if ( x2apic_enabled )
957         goto __next;
958     /*
959      * If no local APIC can be found then set up a fake all
960      * zeroes page to simulate the local APIC and another
961      * one for the IO-APIC.
962      */
963     if (!smp_found_config && detect_init_APIC()) {
964         apic_phys = __pa(alloc_xenheap_page());
965         clear_page(__va(apic_phys));
966     } else
967         apic_phys = mp_lapic_addr;
968 
969     set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
970     apic_printk(APIC_VERBOSE, "mapped APIC to %08Lx (%08lx)\n", APIC_BASE,
971                 apic_phys);
972 
973 __next:
974     /*
975      * Fetch the APIC ID of the BSP in case we have a
976      * default configuration (or the MP table is broken).
977      */
978     if (boot_cpu_physical_apicid == -1U)
979         boot_cpu_physical_apicid = get_apic_id();
980     x86_cpu_to_apicid[0] = get_apic_id();
981 
982     ioapic_init();
983 }
984 
985 /*****************************************************************************
986  * APIC calibration
987  *
988  * The APIC is programmed in bus cycles.
989  * Timeout values should specified in real time units.
990  * The "cheapest" time source is the cyclecounter.
991  *
992  * Thus, we need a mappings from: bus cycles <- cycle counter <- system time
993  *
994  * The calibration is currently a bit shoddy since it requires the external
995  * timer chip to generate periodic timer interupts.
996  *****************************************************************************/
997 
998 /* used for system time scaling */
999 static u32 __read_mostly bus_scale; /* scaling factor: ns -> bus cycles */
1000 
1001 /*
1002  * The timer chip is already set up at HZ interrupts per second here,
1003  * but we do not accept timer interrupts yet. We only allow the BP
1004  * to calibrate.
1005  */
get_8254_timer_count(void)1006 static unsigned int __init get_8254_timer_count(void)
1007 {
1008     /*extern spinlock_t i8253_lock;*/
1009     /*unsigned long flags;*/
1010 
1011     unsigned int count;
1012 
1013     /*spin_lock_irqsave(&i8253_lock, flags);*/
1014 
1015     outb_p(0x00, PIT_MODE);
1016     count = inb_p(PIT_CH0);
1017     count |= inb_p(PIT_CH0) << 8;
1018 
1019     /*spin_unlock_irqrestore(&i8253_lock, flags);*/
1020 
1021     return count;
1022 }
1023 
1024 /* next tick in 8254 can be caught by catching timer wraparound */
wait_8254_wraparound(void)1025 static void __init wait_8254_wraparound(void)
1026 {
1027     unsigned int curr_count, prev_count;
1028 
1029     curr_count = get_8254_timer_count();
1030     do {
1031         prev_count = curr_count;
1032         curr_count = get_8254_timer_count();
1033 
1034         /* workaround for broken Mercury/Neptune */
1035         if (prev_count >= curr_count + 0x100)
1036             curr_count = get_8254_timer_count();
1037 
1038     } while (prev_count >= curr_count);
1039 }
1040 
1041 /*
1042  * This function sets up the local APIC timer, with a timeout of
1043  * 'clocks' APIC bus clock. During calibration we actually call
1044  * this function twice on the boot CPU, once with a bogus timeout
1045  * value, second time for real. The other (noncalibrating) CPUs
1046  * call this function only once, with the real, calibrated value.
1047  *
1048  * We do reads before writes even if unnecessary, to get around the
1049  * P5 APIC double write bug.
1050  */
1051 
1052 #define APIC_DIVISOR 1
1053 
__setup_APIC_LVTT(unsigned int clocks)1054 static void __setup_APIC_LVTT(unsigned int clocks)
1055 {
1056     unsigned int lvtt_value, tmp_value;
1057 
1058     /* NB. Xen uses local APIC timer in one-shot mode. */
1059     lvtt_value = /*APIC_TIMER_MODE_PERIODIC |*/ LOCAL_TIMER_VECTOR;
1060 
1061     if ( tdt_enabled )
1062     {
1063         lvtt_value &= (~APIC_TIMER_MODE_MASK);
1064         lvtt_value |= APIC_TIMER_MODE_TSC_DEADLINE;
1065     }
1066 
1067     apic_write(APIC_LVTT, lvtt_value);
1068 
1069     /*
1070      * See Intel SDM: TSC-Deadline Mode chapter. In xAPIC mode,
1071      * writing to the APIC LVTT and TSC_DEADLINE MSR isn't serialized.
1072      * According to Intel, MFENCE can do the serialization here.
1073      */
1074     asm volatile( "mfence" : : : "memory" );
1075 
1076     tmp_value = apic_read(APIC_TDCR);
1077     apic_write(APIC_TDCR, tmp_value | APIC_TDR_DIV_1);
1078 
1079     apic_write(APIC_TMICT, clocks / APIC_DIVISOR);
1080 }
1081 
setup_APIC_timer(void)1082 static void setup_APIC_timer(void)
1083 {
1084     unsigned long flags;
1085     local_irq_save(flags);
1086     __setup_APIC_LVTT(0);
1087     local_irq_restore(flags);
1088 }
1089 
1090 #define DEADLINE_MODEL_MATCH(m, fr) \
1091     { .vendor = X86_VENDOR_INTEL, .family = 6, .model = (m), \
1092       .feature = X86_FEATURE_TSC_DEADLINE, \
1093       .driver_data = (void *)(unsigned long)(fr) }
1094 
hsx_deadline_rev(void)1095 static unsigned int __init hsx_deadline_rev(void)
1096 {
1097     switch ( boot_cpu_data.x86_mask )
1098     {
1099     case 0x02: return 0x3a; /* EP */
1100     case 0x04: return 0x0f; /* EX */
1101     }
1102 
1103     return ~0U;
1104 }
1105 
bdx_deadline_rev(void)1106 static unsigned int __init bdx_deadline_rev(void)
1107 {
1108     switch ( boot_cpu_data.x86_mask )
1109     {
1110     case 0x02: return 0x00000011;
1111     case 0x03: return 0x0700000e;
1112     case 0x04: return 0x0f00000c;
1113     case 0x05: return 0x0e000003;
1114     }
1115 
1116     return ~0U;
1117 }
1118 
skx_deadline_rev(void)1119 static unsigned int __init skx_deadline_rev(void)
1120 {
1121     switch ( boot_cpu_data.x86_mask )
1122     {
1123     case 0x00 ... 0x02: return ~0U;
1124     case 0x03: return 0x01000136;
1125     case 0x04: return 0x02000014;
1126     }
1127 
1128     return 0;
1129 }
1130 
1131 static const struct x86_cpu_id __initconstrel deadline_match[] = {
1132     DEADLINE_MODEL_MATCH(0x3c, 0x22),             /* Haswell */
1133     DEADLINE_MODEL_MATCH(0x3f, hsx_deadline_rev), /* Haswell EP/EX */
1134     DEADLINE_MODEL_MATCH(0x45, 0x20),             /* Haswell D */
1135     DEADLINE_MODEL_MATCH(0x46, 0x17),             /* Haswell H */
1136 
1137     DEADLINE_MODEL_MATCH(0x3d, 0x25),             /* Broadwell */
1138     DEADLINE_MODEL_MATCH(0x47, 0x17),             /* Broadwell H */
1139     DEADLINE_MODEL_MATCH(0x4f, 0x0b000020),       /* Broadwell EP/EX */
1140     DEADLINE_MODEL_MATCH(0x56, bdx_deadline_rev), /* Broadwell D */
1141 
1142     DEADLINE_MODEL_MATCH(0x4e, 0xb2),             /* Skylake M */
1143     DEADLINE_MODEL_MATCH(0x55, skx_deadline_rev), /* Skylake X */
1144     DEADLINE_MODEL_MATCH(0x5e, 0xb2),             /* Skylake D */
1145 
1146     DEADLINE_MODEL_MATCH(0x8e, 0x52),             /* Kabylake M */
1147     DEADLINE_MODEL_MATCH(0x9e, 0x52),             /* Kabylake D */
1148 
1149     {}
1150 };
1151 
check_deadline_errata(void)1152 static void __init check_deadline_errata(void)
1153 {
1154     const struct x86_cpu_id *m;
1155     unsigned int rev;
1156 
1157     if ( cpu_has_hypervisor )
1158         return;
1159 
1160     m = x86_match_cpu(deadline_match);
1161     if ( !m )
1162         return;
1163 
1164     /*
1165      * Function pointers will have the MSB set due to address layout,
1166      * immediate revisions will not.
1167      */
1168     if ( (long)m->driver_data < 0 )
1169         rev = ((unsigned int (*)(void))(m->driver_data))();
1170     else
1171         rev = (unsigned long)m->driver_data;
1172 
1173     if ( this_cpu(cpu_sig).rev >= rev )
1174         return;
1175 
1176     setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE);
1177     printk(XENLOG_WARNING "TSC_DEADLINE disabled due to Errata; "
1178            "please update microcode to version %#x (or later)\n", rev);
1179 }
1180 
wait_tick_pvh(void)1181 static void wait_tick_pvh(void)
1182 {
1183     u64 lapse_ns = 1000000000ULL / HZ;
1184     s_time_t start, curr_time;
1185 
1186     start = NOW();
1187 
1188     /* Won't wrap around */
1189     do {
1190         cpu_relax();
1191         curr_time = NOW();
1192     } while ( curr_time - start < lapse_ns );
1193 }
1194 
1195 /*
1196  * In this function we calibrate APIC bus clocks to the external
1197  * timer. Unfortunately we cannot use jiffies and the timer irq
1198  * to calibrate, since some later bootup code depends on getting
1199  * the first irq? Ugh.
1200  *
1201  * We want to do the calibration only once since we
1202  * want to have local timer irqs syncron. CPUs connected
1203  * by the same APIC bus have the very same bus frequency.
1204  * And we want to have irqs off anyways, no accidental
1205  * APIC irq that way.
1206  */
1207 
calibrate_APIC_clock(void)1208 static void __init calibrate_APIC_clock(void)
1209 {
1210     unsigned long long t1, t2;
1211     unsigned long tt1, tt2;
1212     unsigned int i;
1213     unsigned long bus_freq; /* KAF: pointer-size avoids compile warns. */
1214     unsigned int bus_cycle; /* length of one bus cycle in pico-seconds */
1215 #define LOOPS_FRAC 10U      /* measure for one tenth of a second */
1216 
1217     apic_printk(APIC_VERBOSE, "calibrating APIC timer ...\n");
1218 
1219     /*
1220      * Setup the APIC counter to maximum. There is no way the lapic
1221      * can underflow in the 100ms detection time frame.
1222      */
1223     __setup_APIC_LVTT(0xffffffff);
1224 
1225     if ( !xen_guest )
1226         /*
1227          * The timer chip counts down to zero. Let's wait
1228          * for a wraparound to start exact measurement:
1229          * (the current tick might have been already half done)
1230          */
1231         wait_8254_wraparound();
1232     else
1233         wait_tick_pvh();
1234 
1235     /*
1236      * We wrapped around just now. Let's start:
1237      */
1238     t1 = rdtsc_ordered();
1239     tt1 = apic_read(APIC_TMCCT);
1240 
1241     /*
1242      * Let's wait HZ / LOOPS_FRAC ticks:
1243      */
1244     for (i = 0; i < HZ / LOOPS_FRAC; i++)
1245         if ( !xen_guest )
1246             wait_8254_wraparound();
1247         else
1248             wait_tick_pvh();
1249 
1250     tt2 = apic_read(APIC_TMCCT);
1251     t2 = rdtsc_ordered();
1252 
1253     bus_freq = (tt1 - tt2) * APIC_DIVISOR * LOOPS_FRAC;
1254 
1255     apic_printk(APIC_VERBOSE, "..... CPU clock speed is %lu.%04lu MHz.\n",
1256                 ((unsigned long)(t2 - t1) * LOOPS_FRAC) / 1000000,
1257                 (((unsigned long)(t2 - t1) * LOOPS_FRAC) / 100) % 10000);
1258 
1259     apic_printk(APIC_VERBOSE, "..... host bus clock speed is %ld.%04ld MHz.\n",
1260                 bus_freq / 1000000, (bus_freq / 100) % 10000);
1261 
1262     /* set up multipliers for accurate timer code */
1263     bus_cycle  = 1000000000000UL / bus_freq; /* in pico seconds */
1264     bus_cycle += (1000000000000UL % bus_freq) * 2 > bus_freq;
1265     bus_scale  = (1000*262144)/bus_cycle;
1266     bus_scale += ((1000 * 262144) % bus_cycle) * 2 > bus_cycle;
1267 
1268     apic_printk(APIC_VERBOSE, "..... bus_scale = %#x\n", bus_scale);
1269     /* reset APIC to zero timeout value */
1270     __setup_APIC_LVTT(0);
1271 
1272 #undef LOOPS_FRAC
1273 }
1274 
setup_boot_APIC_clock(void)1275 void __init setup_boot_APIC_clock(void)
1276 {
1277     unsigned long flags;
1278     apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n");
1279     using_apic_timer = true;
1280 
1281     check_deadline_errata();
1282 
1283     local_irq_save(flags);
1284 
1285     calibrate_APIC_clock();
1286 
1287     if ( tdt_enable && boot_cpu_has(X86_FEATURE_TSC_DEADLINE) )
1288     {
1289         printk(KERN_DEBUG "TSC deadline timer enabled\n");
1290         tdt_enabled = true;
1291     }
1292 
1293     setup_APIC_timer();
1294 
1295     local_irq_restore(flags);
1296 }
1297 
setup_secondary_APIC_clock(void)1298 void setup_secondary_APIC_clock(void)
1299 {
1300     setup_APIC_timer();
1301 }
1302 
disable_APIC_timer(void)1303 void disable_APIC_timer(void)
1304 {
1305     if (using_apic_timer) {
1306         unsigned long v;
1307 
1308         /* Work around AMD Erratum 411. This is a nice thing to do anyway. */
1309         apic_write(APIC_TMICT, 0);
1310 
1311         v = apic_read(APIC_LVTT);
1312         apic_write(APIC_LVTT, v | APIC_LVT_MASKED);
1313     }
1314 }
1315 
enable_APIC_timer(void)1316 void enable_APIC_timer(void)
1317 {
1318     if (using_apic_timer) {
1319         unsigned long v;
1320 
1321         v = apic_read(APIC_LVTT);
1322         apic_write(APIC_LVTT, v & ~APIC_LVT_MASKED);
1323     }
1324 }
1325 
1326 #undef APIC_DIVISOR
1327 
1328 /*
1329  * reprogram_timer: Reprogram the APIC timer.
1330  * Timeout is a Xen system time (nanoseconds since boot); 0 disables the timer.
1331  * Returns 1 on success; 0 if the timeout is too soon or is in the past.
1332  */
reprogram_timer(s_time_t timeout)1333 int reprogram_timer(s_time_t timeout)
1334 {
1335     s_time_t expire;
1336     u32 apic_tmict = 0;
1337 
1338     /* No local APIC: timer list is polled via the PIT interrupt. */
1339     if ( !cpu_has_apic )
1340         return 1;
1341 
1342     if ( tdt_enabled )
1343     {
1344         wrmsrl(MSR_IA32_TSC_DEADLINE, timeout ? stime2tsc(timeout) : 0);
1345         return 1;
1346     }
1347 
1348     if ( timeout && ((expire = timeout - NOW()) > 0) )
1349         apic_tmict = min_t(u64, (bus_scale * expire) >> 18, UINT_MAX);
1350 
1351     apic_write(APIC_TMICT, (unsigned long)apic_tmict);
1352 
1353     return apic_tmict || !timeout;
1354 }
1355 
apic_timer_interrupt(struct cpu_user_regs * regs)1356 void apic_timer_interrupt(struct cpu_user_regs * regs)
1357 {
1358     ack_APIC_irq();
1359     perfc_incr(apic_timer);
1360     raise_softirq(TIMER_SOFTIRQ);
1361 }
1362 
1363 static DEFINE_PER_CPU(bool, state_dump_pending);
1364 
smp_send_state_dump(unsigned int cpu)1365 void smp_send_state_dump(unsigned int cpu)
1366 {
1367     /* We overload the spurious interrupt handler to handle the dump. */
1368     per_cpu(state_dump_pending, cpu) = true;
1369     send_IPI_mask(cpumask_of(cpu), SPURIOUS_APIC_VECTOR);
1370 }
1371 
1372 /*
1373  * Spurious interrupts should _never_ happen with our APIC/SMP architecture.
1374  */
spurious_interrupt(struct cpu_user_regs * regs)1375 void spurious_interrupt(struct cpu_user_regs *regs)
1376 {
1377     /*
1378      * Check if this is a vectored interrupt (most likely, as this is probably
1379      * a request to dump local CPU state). Vectored interrupts are ACKed;
1380      * spurious interrupts are not.
1381      */
1382     if (apic_isr_read(SPURIOUS_APIC_VECTOR)) {
1383         ack_APIC_irq();
1384         if (this_cpu(state_dump_pending)) {
1385             this_cpu(state_dump_pending) = false;
1386             dump_execstate(regs);
1387             return;
1388         }
1389     }
1390 
1391     /* see sw-dev-man vol 3, chapter 7.4.13.5 */
1392     printk(KERN_INFO "spurious APIC interrupt on CPU#%d, should "
1393            "never happen.\n", smp_processor_id());
1394 }
1395 
1396 /*
1397  * This interrupt should never happen with our APIC/SMP architecture
1398  */
1399 
error_interrupt(struct cpu_user_regs * regs)1400 void error_interrupt(struct cpu_user_regs *regs)
1401 {
1402     static const char *const esr_fields[] = {
1403         "Send CS error",
1404         "Receive CS error",
1405         "Send accept error",
1406         "Receive accept error",
1407         "Redirectable IPI",
1408         "Send illegal vector",
1409         "Received illegal vector",
1410         "Illegal register address",
1411     };
1412     unsigned int v, v1;
1413     int i;
1414 
1415     /* First tickle the hardware, only then report what went on. -- REW */
1416     v = apic_read(APIC_ESR);
1417     apic_write(APIC_ESR, 0);
1418     v1 = apic_read(APIC_ESR);
1419     ack_APIC_irq();
1420 
1421     printk(XENLOG_DEBUG "APIC error on CPU%u: %02x(%02x)",
1422             smp_processor_id(), v , v1);
1423     for ( i = 7; i >= 0; --i )
1424         if ( v1 & (1 << i) )
1425             printk(", %s", esr_fields[i]);
1426     printk("\n");
1427 }
1428 
1429 /*
1430  * This interrupt handles performance counters interrupt
1431  */
1432 
pmu_apic_interrupt(struct cpu_user_regs * regs)1433 void pmu_apic_interrupt(struct cpu_user_regs *regs)
1434 {
1435     ack_APIC_irq();
1436     vpmu_do_interrupt(regs);
1437 }
1438 
1439 /*
1440  * This initializes the IO-APIC and APIC hardware if this is
1441  * a UP kernel.
1442  */
APIC_init_uniprocessor(void)1443 int __init APIC_init_uniprocessor (void)
1444 {
1445     if (enable_local_apic < 0)
1446         setup_clear_cpu_cap(X86_FEATURE_APIC);
1447 
1448     if (!smp_found_config && !cpu_has_apic) {
1449         skip_ioapic_setup = true;
1450         return -1;
1451     }
1452 
1453     /*
1454      * Complain if the BIOS pretends there is one.
1455      */
1456     if (!cpu_has_apic) {
1457         printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
1458                boot_cpu_physical_apicid);
1459         skip_ioapic_setup = true;
1460         return -1;
1461     }
1462 
1463     verify_local_APIC();
1464 
1465     connect_bsp_APIC();
1466 
1467     /*
1468      * Hack: In case of kdump, after a crash, kernel might be booting
1469      * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid
1470      * might be zero if read from MP tables. Get it from LAPIC.
1471      */
1472 #ifdef CONFIG_CRASH_DUMP
1473     boot_cpu_physical_apicid = get_apic_id();
1474 #endif
1475     physids_clear(phys_cpu_present_map);
1476     physid_set(boot_cpu_physical_apicid, phys_cpu_present_map);
1477 
1478     setup_local_APIC(true);
1479 
1480     if (nmi_watchdog == NMI_LOCAL_APIC)
1481         check_nmi_watchdog();
1482 
1483     if (smp_found_config)
1484         if (!skip_ioapic_setup && nr_ioapics)
1485             setup_IO_APIC();
1486 
1487     setup_boot_APIC_clock();
1488 
1489     return 0;
1490 }
1491 
apic_mode_to_str(const enum apic_mode mode)1492 static const char * __init apic_mode_to_str(const enum apic_mode mode)
1493 {
1494     switch ( mode )
1495     {
1496         case APIC_MODE_INVALID:
1497             return "invalid";
1498         case APIC_MODE_DISABLED:
1499             return "disabled";
1500         case APIC_MODE_XAPIC:
1501             return "xapic";
1502         case APIC_MODE_X2APIC:
1503             return "x2apic";
1504         default:
1505             return "unrecognised";
1506     }
1507 }
1508 
1509 /* Needs to be called during startup.  It records the state the BIOS
1510  * leaves the local APIC so we can undo upon kexec.
1511  */
record_boot_APIC_mode(void)1512 void __init record_boot_APIC_mode(void)
1513 {
1514     /* Sanity check - we should only ever run once, but could possibly
1515      * be called several times */
1516     if ( APIC_MODE_INVALID != apic_boot_mode )
1517         return;
1518 
1519     apic_boot_mode = current_local_apic_mode();
1520 
1521     apic_printk(APIC_DEBUG, "APIC boot state is '%s'\n",
1522                 apic_mode_to_str(apic_boot_mode));
1523 }
1524 
1525 /* Look at the bits in MSR_APIC_BASE and work out which APIC mode we are in */
current_local_apic_mode(void)1526 enum apic_mode current_local_apic_mode(void)
1527 {
1528     u64 msr_contents;
1529 
1530     rdmsrl(MSR_APIC_BASE, msr_contents);
1531 
1532     /* Reading EXTD bit from the MSR is only valid if CPUID
1533      * says so, else reserved */
1534     if ( boot_cpu_has(X86_FEATURE_X2APIC) && (msr_contents & APIC_BASE_EXTD) )
1535         return APIC_MODE_X2APIC;
1536 
1537     /* EN bit should always be valid as long as we can read the MSR
1538      */
1539     if ( msr_contents & APIC_BASE_ENABLE )
1540         return APIC_MODE_XAPIC;
1541 
1542     return APIC_MODE_DISABLED;
1543 }
1544 
1545 
check_for_unexpected_msi(unsigned int vector)1546 void check_for_unexpected_msi(unsigned int vector)
1547 {
1548     BUG_ON(apic_isr_read(vector));
1549 }
1550