1 #ifndef _MCE_H
2
3 #define _MCE_H
4
5 #include <xen/init.h>
6 #include <xen/percpu.h>
7 #include <xen/sched.h>
8 #include <xen/smp.h>
9
10 #include <asm/types.h>
11 #include <asm/traps.h>
12 #include <asm/atomic.h>
13
14 #include "x86_mca.h"
15 #include "mctelem.h"
16
17 #define MCE_QUIET 0
18 #define MCE_VERBOSE 1
19 /* !only for developer debug as printk is unsafe in MCE context */
20 #define MCE_CRITICAL 2
21
22 extern int mce_verbosity;
23 /*
24 * Define the default level of machine check related print.
25 * When set mce_verbosity=verbose, all mce debug information
26 * will be printed, otherwise, those information will not be
27 * printed.
28 */
29 #define mce_printk(v, s, a...) do { \
30 if ((v) <= mce_verbosity) \
31 printk(s, ##a); \
32 } while (0)
33
34 enum mcheck_type {
35 mcheck_unset = -1,
36 mcheck_none,
37 mcheck_amd_famXX,
38 mcheck_amd_k8,
39 mcheck_intel,
40 mcheck_hygon
41 };
42
43 extern uint8_t cmci_apic_vector;
44 extern bool lmce_support;
45
46 /* Init functions */
47 enum mcheck_type amd_mcheck_init(struct cpuinfo_x86 *c);
48 enum mcheck_type intel_mcheck_init(struct cpuinfo_x86 *c, bool bsp);
49
50 void amd_nonfatal_mcheck_init(struct cpuinfo_x86 *c);
51
52 extern unsigned int firstbank;
53 extern unsigned int ppin_msr;
54
55 struct mcinfo_extended *intel_get_extended_msrs(
56 struct mcinfo_global *mig, struct mc_info *mi);
57
58 bool mce_available(const struct cpuinfo_x86 *c);
59 unsigned int mce_firstbank(struct cpuinfo_x86 *c);
60 /* Helper functions used for collecting error telemetry */
61 void noreturn mc_panic(char *s);
62 void x86_mc_get_cpu_info(unsigned, uint32_t *, uint16_t *, uint16_t *,
63 uint32_t *, uint32_t *, uint32_t *, uint32_t *);
64
65 /* Register a handler for machine check exceptions. */
66 typedef void (*x86_mce_vector_t)(const struct cpu_user_regs *regs);
67 extern void x86_mce_vector_register(x86_mce_vector_t);
68
69 /*
70 * Common generic MCE handler that implementations may nominate
71 * via x86_mce_vector_register.
72 */
73 extern void mcheck_cmn_handler(const struct cpu_user_regs *regs);
74
75 /* Register a handler for judging whether mce is recoverable. */
76 typedef bool (*mce_recoverable_t)(uint64_t status);
77 extern void mce_recoverable_register(mce_recoverable_t);
78
79 /* Read an MSR, checking for an interposed value first */
80 extern struct intpose_ent *intpose_lookup(unsigned int, uint64_t,
81 uint64_t *);
82 extern bool intpose_inval(unsigned int, uint64_t);
83
mca_rdmsr(unsigned int msr)84 static inline uint64_t mca_rdmsr(unsigned int msr)
85 {
86 uint64_t val;
87 if (intpose_lookup(smp_processor_id(), msr, &val) == NULL)
88 rdmsrl(msr, val);
89 return val;
90 }
91
92 /* Write an MSR, invalidating any interposed value */
93 #define mca_wrmsr(msr, val) do { \
94 if ( !intpose_inval(smp_processor_id(), msr) ) \
95 wrmsrl(msr, val); \
96 } while ( 0 )
97
98
99 /*
100 * Utility function to "logout" all architectural MCA telemetry from the MCA
101 * banks of the current processor. A cookie is returned which may be
102 * uses to reference the data so logged (the cookie can be NULL if
103 * no logout structures were available). The caller can also pass a pointer
104 * to a structure which will be completed with some summary information
105 * of the MCA data observed in the logout operation.
106 */
107
108 enum mca_source {
109 MCA_POLLER,
110 MCA_CMCI_HANDLER,
111 MCA_RESET,
112 MCA_MCE_SCAN
113 };
114
115 struct mca_summary {
116 uint32_t errcnt; /* number of banks with valid errors */
117 int ripv; /* meaningful on #MC */
118 int eipv; /* meaningful on #MC */
119 bool uc; /* UC flag */
120 bool pcc; /* PCC flag */
121 bool lmce; /* LMCE flag (Intel only) */
122 bool recoverable; /* software error recoverable flag */
123 };
124
125 DECLARE_PER_CPU(struct mca_banks *, poll_bankmask);
126 DECLARE_PER_CPU(struct mca_banks *, no_cmci_banks);
127 DECLARE_PER_CPU(struct mca_banks *, mce_clear_banks);
128
129 extern bool cmci_support;
130 extern bool is_mc_panic;
131 extern bool mce_broadcast;
132 extern void mcheck_mca_clearbanks(struct mca_banks *);
133
134 extern mctelem_cookie_t mcheck_mca_logout(enum mca_source, struct mca_banks *,
135 struct mca_summary *, struct mca_banks *);
136
137 /*
138 * Register callbacks to be made during bank telemetry logout.
139 * Those callbacks are only available to those machine check handlers
140 * that call to the common mcheck_cmn_handler or who use the common
141 * telemetry logout function mcheck_mca_logout in error polling.
142 */
143
144 /* Register a handler for judging whether the bank need to be cleared */
145 typedef bool (*mce_need_clearbank_t)(enum mca_source who, u64 status);
146 extern void mce_need_clearbank_register(mce_need_clearbank_t);
147
148 /*
149 * Register a callback to collect additional information (typically non-
150 * architectural) provided by newer CPU families/models without the need
151 * to duplicate the whole handler resulting in various handlers each with
152 * its own tweaks and bugs. The callback receives an struct mc_info pointer
153 * which it can use with x86_mcinfo_reserve to add additional telemetry,
154 * the current MCA bank number we are reading telemetry from, and the
155 * MCi_STATUS value for that bank.
156 */
157 typedef struct mcinfo_extended *(*x86_mce_callback_t)
158 (struct mc_info *, uint16_t, uint64_t);
159 extern void x86_mce_callback_register(x86_mce_callback_t);
160
161 void *x86_mcinfo_reserve(struct mc_info *mi,
162 unsigned int size, unsigned int type);
163 void x86_mcinfo_dump(struct mc_info *mi);
164
mce_vendor_bank_msr(const struct vcpu * v,uint32_t msr)165 static inline int mce_vendor_bank_msr(const struct vcpu *v, uint32_t msr)
166 {
167 switch (boot_cpu_data.x86_vendor) {
168 case X86_VENDOR_INTEL:
169 if (msr >= MSR_IA32_MC0_CTL2 &&
170 msr < MSR_IA32_MCx_CTL2(v->arch.vmce.mcg_cap & MCG_CAP_COUNT) )
171 return 1;
172 break;
173
174 case X86_VENDOR_AMD:
175 switch (msr) {
176 case MSR_F10_MC4_MISC1:
177 case MSR_F10_MC4_MISC2:
178 case MSR_F10_MC4_MISC3:
179 return 1;
180 }
181 break;
182 }
183 return 0;
184 }
185
mce_bank_msr(const struct vcpu * v,uint32_t msr)186 static inline int mce_bank_msr(const struct vcpu *v, uint32_t msr)
187 {
188 if ( (msr >= MSR_IA32_MC0_CTL &&
189 msr < MSR_IA32_MCx_CTL(v->arch.vmce.mcg_cap & MCG_CAP_COUNT)) ||
190 mce_vendor_bank_msr(v, msr) )
191 return 1;
192 return 0;
193 }
194
195 /* MC softirq */
196 void mce_handler_init(void);
197
198 extern const struct mca_error_handler *mce_dhandlers;
199 extern const struct mca_error_handler *mce_uhandlers;
200 extern unsigned int mce_dhandler_num;
201 extern unsigned int mce_uhandler_num;
202
203 /* Fields are zero when not available */
204 struct mce {
205 uint64_t status;
206 uint64_t misc;
207 uint64_t addr;
208 uint64_t mcgstatus;
209 uint64_t ip;
210 uint64_t tsc; /* cpu time stamp counter */
211 uint64_t time; /* wall time_t when error was detected */
212 uint8_t cpuvendor; /* cpu vendor as encoded in system.h */
213 uint8_t inject_flags; /* software inject flags */
214 uint16_t pad;
215 uint32_t cpuid; /* CPUID 1 EAX */
216 uint8_t cs; /* code segment */
217 uint8_t bank; /* machine check bank */
218 uint8_t cpu; /* cpu number; obsolete; use extcpu now */
219 uint8_t finished; /* entry is valid */
220 uint32_t extcpu; /* linux cpu number that detected the error */
221 uint32_t socketid; /* CPU socket ID */
222 uint32_t apicid; /* CPU initial apic ID */
223 uint64_t mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */
224 };
225
226 extern int apei_write_mce(struct mce *m);
227
228 #endif /* _MCE_H */
229