1 #ifndef _MCE_H
2 
3 #define _MCE_H
4 
5 #include <xen/init.h>
6 #include <xen/percpu.h>
7 #include <xen/sched.h>
8 #include <xen/smp.h>
9 
10 #include <asm/types.h>
11 #include <asm/traps.h>
12 #include <asm/atomic.h>
13 
14 #include "x86_mca.h"
15 #include "mctelem.h"
16 
17 #define MCE_QUIET       0
18 #define MCE_VERBOSE     1
19 /* !only for developer debug as printk is unsafe in MCE context */
20 #define MCE_CRITICAL    2
21 
22 extern int mce_verbosity;
23 /*
24  * Define the default level of machine check related print.
25  * When set mce_verbosity=verbose, all mce debug information
26  * will be printed, otherwise, those information will not be
27  * printed.
28  */
29 #define mce_printk(v, s, a...) do {       \
30         if ((v) <= mce_verbosity) \
31             printk(s, ##a);       \
32         } while (0)
33 
34 enum mcheck_type {
35     mcheck_unset = -1,
36     mcheck_none,
37     mcheck_amd_famXX,
38     mcheck_amd_k8,
39     mcheck_intel,
40     mcheck_hygon
41 };
42 
43 extern uint8_t cmci_apic_vector;
44 extern bool lmce_support;
45 
46 /* Init functions */
47 enum mcheck_type amd_mcheck_init(struct cpuinfo_x86 *c);
48 enum mcheck_type intel_mcheck_init(struct cpuinfo_x86 *c, bool bsp);
49 
50 void amd_nonfatal_mcheck_init(struct cpuinfo_x86 *c);
51 
52 extern unsigned int firstbank;
53 extern unsigned int ppin_msr;
54 
55 struct mcinfo_extended *intel_get_extended_msrs(
56     struct mcinfo_global *mig, struct mc_info *mi);
57 
58 bool mce_available(const struct cpuinfo_x86 *c);
59 unsigned int mce_firstbank(struct cpuinfo_x86 *c);
60 /* Helper functions used for collecting error telemetry */
61 void noreturn mc_panic(char *s);
62 void x86_mc_get_cpu_info(unsigned, uint32_t *, uint16_t *, uint16_t *,
63                          uint32_t *, uint32_t *, uint32_t *, uint32_t *);
64 
65 /* Register a handler for machine check exceptions. */
66 typedef void (*x86_mce_vector_t)(const struct cpu_user_regs *regs);
67 extern void x86_mce_vector_register(x86_mce_vector_t);
68 
69 /*
70  * Common generic MCE handler that implementations may nominate
71  * via x86_mce_vector_register.
72  */
73 extern void mcheck_cmn_handler(const struct cpu_user_regs *regs);
74 
75 /* Register a handler for judging whether mce is recoverable. */
76 typedef bool (*mce_recoverable_t)(uint64_t status);
77 extern void mce_recoverable_register(mce_recoverable_t);
78 
79 /* Read an MSR, checking for an interposed value first */
80 extern struct intpose_ent *intpose_lookup(unsigned int, uint64_t,
81     uint64_t *);
82 extern bool intpose_inval(unsigned int, uint64_t);
83 
mca_rdmsr(unsigned int msr)84 static inline uint64_t mca_rdmsr(unsigned int msr)
85 {
86     uint64_t val;
87     if (intpose_lookup(smp_processor_id(), msr, &val) == NULL)
88         rdmsrl(msr, val);
89     return val;
90 }
91 
92 /* Write an MSR, invalidating any interposed value */
93 #define mca_wrmsr(msr, val) do { \
94     if ( !intpose_inval(smp_processor_id(), msr) ) \
95         wrmsrl(msr, val); \
96 } while ( 0 )
97 
98 
99 /*
100  * Utility function to "logout" all architectural MCA telemetry from the MCA
101  * banks of the current processor.  A cookie is returned which may be
102  * uses to reference the data so logged (the cookie can be NULL if
103  * no logout structures were available).  The caller can also pass a pointer
104  * to a structure which will be completed with some summary information
105  * of the MCA data observed in the logout operation.
106  */
107 
108 enum mca_source {
109     MCA_POLLER,
110     MCA_CMCI_HANDLER,
111     MCA_RESET,
112     MCA_MCE_SCAN
113 };
114 
115 struct mca_summary {
116     uint32_t    errcnt; /* number of banks with valid errors */
117     int         ripv;   /* meaningful on #MC */
118     int         eipv;   /* meaningful on #MC */
119     bool        uc;     /* UC flag */
120     bool        pcc;    /* PCC flag */
121     bool        lmce;   /* LMCE flag (Intel only) */
122     bool        recoverable; /* software error recoverable flag */
123 };
124 
125 DECLARE_PER_CPU(struct mca_banks *, poll_bankmask);
126 DECLARE_PER_CPU(struct mca_banks *, no_cmci_banks);
127 DECLARE_PER_CPU(struct mca_banks *, mce_clear_banks);
128 
129 extern bool cmci_support;
130 extern bool is_mc_panic;
131 extern bool mce_broadcast;
132 extern void mcheck_mca_clearbanks(struct mca_banks *);
133 
134 extern mctelem_cookie_t mcheck_mca_logout(enum mca_source, struct mca_banks *,
135     struct mca_summary *, struct mca_banks *);
136 
137 /*
138  * Register callbacks to be made during bank telemetry logout.
139  * Those callbacks are only available to those machine check handlers
140  * that call to the common mcheck_cmn_handler or who use the common
141  * telemetry logout function mcheck_mca_logout in error polling.
142  */
143 
144 /* Register a handler for judging whether the bank need to be cleared */
145 typedef bool (*mce_need_clearbank_t)(enum mca_source who, u64 status);
146 extern void mce_need_clearbank_register(mce_need_clearbank_t);
147 
148 /*
149  * Register a callback to collect additional information (typically non-
150  * architectural) provided by newer CPU families/models without the need
151  * to duplicate the whole handler resulting in various handlers each with
152  * its own tweaks and bugs. The callback receives an struct mc_info pointer
153  * which it can use with x86_mcinfo_reserve to add additional telemetry,
154  * the current MCA bank number we are reading telemetry from, and the
155  * MCi_STATUS value for that bank.
156  */
157 typedef struct mcinfo_extended *(*x86_mce_callback_t)
158     (struct mc_info *, uint16_t, uint64_t);
159 extern void x86_mce_callback_register(x86_mce_callback_t);
160 
161 void *x86_mcinfo_reserve(struct mc_info *mi,
162                          unsigned int size, unsigned int type);
163 void x86_mcinfo_dump(struct mc_info *mi);
164 
mce_vendor_bank_msr(const struct vcpu * v,uint32_t msr)165 static inline int mce_vendor_bank_msr(const struct vcpu *v, uint32_t msr)
166 {
167     switch (boot_cpu_data.x86_vendor) {
168     case X86_VENDOR_INTEL:
169         if (msr >= MSR_IA32_MC0_CTL2 &&
170             msr < MSR_IA32_MCx_CTL2(v->arch.vmce.mcg_cap & MCG_CAP_COUNT) )
171             return 1;
172         break;
173 
174     case X86_VENDOR_AMD:
175         switch (msr) {
176         case MSR_F10_MC4_MISC1:
177         case MSR_F10_MC4_MISC2:
178         case MSR_F10_MC4_MISC3:
179             return 1;
180         }
181         break;
182     }
183     return 0;
184 }
185 
mce_bank_msr(const struct vcpu * v,uint32_t msr)186 static inline int mce_bank_msr(const struct vcpu *v, uint32_t msr)
187 {
188     if ( (msr >= MSR_IA32_MC0_CTL &&
189          msr < MSR_IA32_MCx_CTL(v->arch.vmce.mcg_cap & MCG_CAP_COUNT)) ||
190          mce_vendor_bank_msr(v, msr) )
191         return 1;
192     return 0;
193 }
194 
195 /* MC softirq */
196 void mce_handler_init(void);
197 
198 extern const struct mca_error_handler *mce_dhandlers;
199 extern const struct mca_error_handler *mce_uhandlers;
200 extern unsigned int mce_dhandler_num;
201 extern unsigned int mce_uhandler_num;
202 
203 /* Fields are zero when not available */
204 struct mce {
205     uint64_t status;
206     uint64_t misc;
207     uint64_t addr;
208     uint64_t mcgstatus;
209     uint64_t ip;
210     uint64_t tsc;      /* cpu time stamp counter */
211     uint64_t time;     /* wall time_t when error was detected */
212     uint8_t  cpuvendor;        /* cpu vendor as encoded in system.h */
213     uint8_t  inject_flags;     /* software inject flags */
214     uint16_t pad;
215     uint32_t cpuid;    /* CPUID 1 EAX */
216     uint8_t  cs;       /* code segment */
217     uint8_t  bank;     /* machine check bank */
218     uint8_t  cpu;      /* cpu number; obsolete; use extcpu now */
219     uint8_t  finished; /* entry is valid */
220     uint32_t extcpu;   /* linux cpu number that detected the error */
221     uint32_t socketid; /* CPU socket ID */
222     uint32_t apicid;   /* CPU initial apic ID */
223     uint64_t mcgcap;   /* MCGCAP MSR: machine check capabilities of CPU */
224 };
225 
226 extern int apei_write_mce(struct mce *m);
227 
228 #endif /* _MCE_H */
229