1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (C) 2012,2013 - ARM Ltd
4 * Author: Marc Zyngier <marc.zyngier@arm.com>
5 *
6 * Derived from arch/arm/kvm/handle_exit.c:
7 * Copyright (C) 2012 - Virtual Open Systems and Columbia University
8 * Author: Christoffer Dall <c.dall@virtualopensystems.com>
9 */
10
11 #include <linux/kvm.h>
12 #include <linux/kvm_host.h>
13
14 #include <asm/esr.h>
15 #include <asm/exception.h>
16 #include <asm/kvm_asm.h>
17 #include <asm/kvm_emulate.h>
18 #include <asm/kvm_mmu.h>
19 #include <asm/kvm_nested.h>
20 #include <asm/debug-monitors.h>
21 #include <asm/stacktrace/nvhe.h>
22 #include <asm/traps.h>
23
24 #include <kvm/arm_hypercalls.h>
25
26 #define CREATE_TRACE_POINTS
27 #include "trace_handle_exit.h"
28
29 typedef int (*exit_handle_fn)(struct kvm_vcpu *);
30
kvm_handle_guest_serror(struct kvm_vcpu * vcpu,u64 esr)31 static void kvm_handle_guest_serror(struct kvm_vcpu *vcpu, u64 esr)
32 {
33 if (!arm64_is_ras_serror(esr) || arm64_is_fatal_ras_serror(NULL, esr))
34 kvm_inject_vabt(vcpu);
35 }
36
handle_hvc(struct kvm_vcpu * vcpu)37 static int handle_hvc(struct kvm_vcpu *vcpu)
38 {
39 int ret;
40
41 trace_kvm_hvc_arm64(*vcpu_pc(vcpu), vcpu_get_reg(vcpu, 0),
42 kvm_vcpu_hvc_get_imm(vcpu));
43 vcpu->stat.hvc_exit_stat++;
44
45 /* Forward hvc instructions to the virtual EL2 if the guest has EL2. */
46 if (vcpu_has_nv(vcpu)) {
47 if (vcpu_read_sys_reg(vcpu, HCR_EL2) & HCR_HCD)
48 kvm_inject_undefined(vcpu);
49 else
50 kvm_inject_nested_sync(vcpu, kvm_vcpu_get_esr(vcpu));
51
52 return 1;
53 }
54
55 ret = kvm_hvc_call_handler(vcpu);
56 if (ret < 0) {
57 vcpu_set_reg(vcpu, 0, ~0UL);
58 return 1;
59 }
60
61 return ret;
62 }
63
handle_smc(struct kvm_vcpu * vcpu)64 static int handle_smc(struct kvm_vcpu *vcpu)
65 {
66 int ret;
67
68 /*
69 * "If an SMC instruction executed at Non-secure EL1 is
70 * trapped to EL2 because HCR_EL2.TSC is 1, the exception is a
71 * Trap exception, not a Secure Monitor Call exception [...]"
72 *
73 * We need to advance the PC after the trap, as it would
74 * otherwise return to the same address...
75 *
76 * Only handle SMCs from the virtual EL2 with an immediate of zero and
77 * skip it otherwise.
78 */
79 if (!vcpu_is_el2(vcpu) || kvm_vcpu_hvc_get_imm(vcpu)) {
80 vcpu_set_reg(vcpu, 0, ~0UL);
81 kvm_incr_pc(vcpu);
82 return 1;
83 }
84
85 /*
86 * If imm is zero then it is likely an SMCCC call.
87 *
88 * Note that on ARMv8.3, even if EL3 is not implemented, SMC executed
89 * at Non-secure EL1 is trapped to EL2 if HCR_EL2.TSC==1, rather than
90 * being treated as UNDEFINED.
91 */
92 ret = kvm_hvc_call_handler(vcpu);
93 if (ret < 0)
94 vcpu_set_reg(vcpu, 0, ~0UL);
95
96 kvm_incr_pc(vcpu);
97
98 return ret;
99 }
100
101 /*
102 * Guest access to FP/ASIMD registers are routed to this handler only
103 * when the system doesn't support FP/ASIMD.
104 */
handle_no_fpsimd(struct kvm_vcpu * vcpu)105 static int handle_no_fpsimd(struct kvm_vcpu *vcpu)
106 {
107 kvm_inject_undefined(vcpu);
108 return 1;
109 }
110
111 /**
112 * kvm_handle_wfx - handle a wait-for-interrupts or wait-for-event
113 * instruction executed by a guest
114 *
115 * @vcpu: the vcpu pointer
116 *
117 * WFE[T]: Yield the CPU and come back to this vcpu when the scheduler
118 * decides to.
119 * WFI: Simply call kvm_vcpu_halt(), which will halt execution of
120 * world-switches and schedule other host processes until there is an
121 * incoming IRQ or FIQ to the VM.
122 * WFIT: Same as WFI, with a timed wakeup implemented as a background timer
123 *
124 * WF{I,E}T can immediately return if the deadline has already expired.
125 */
kvm_handle_wfx(struct kvm_vcpu * vcpu)126 static int kvm_handle_wfx(struct kvm_vcpu *vcpu)
127 {
128 u64 esr = kvm_vcpu_get_esr(vcpu);
129
130 if (esr & ESR_ELx_WFx_ISS_WFE) {
131 trace_kvm_wfx_arm64(*vcpu_pc(vcpu), true);
132 vcpu->stat.wfe_exit_stat++;
133 } else {
134 trace_kvm_wfx_arm64(*vcpu_pc(vcpu), false);
135 vcpu->stat.wfi_exit_stat++;
136 }
137
138 if (esr & ESR_ELx_WFx_ISS_WFxT) {
139 if (esr & ESR_ELx_WFx_ISS_RV) {
140 u64 val, now;
141
142 now = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_TIMER_CNT);
143 val = vcpu_get_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu));
144
145 if (now >= val)
146 goto out;
147 } else {
148 /* Treat WFxT as WFx if RN is invalid */
149 esr &= ~ESR_ELx_WFx_ISS_WFxT;
150 }
151 }
152
153 if (esr & ESR_ELx_WFx_ISS_WFE) {
154 kvm_vcpu_on_spin(vcpu, vcpu_mode_priv(vcpu));
155 } else {
156 if (esr & ESR_ELx_WFx_ISS_WFxT)
157 vcpu_set_flag(vcpu, IN_WFIT);
158
159 kvm_vcpu_wfi(vcpu);
160 }
161 out:
162 kvm_incr_pc(vcpu);
163
164 return 1;
165 }
166
167 /**
168 * kvm_handle_guest_debug - handle a debug exception instruction
169 *
170 * @vcpu: the vcpu pointer
171 *
172 * We route all debug exceptions through the same handler. If both the
173 * guest and host are using the same debug facilities it will be up to
174 * userspace to re-inject the correct exception for guest delivery.
175 *
176 * @return: 0 (while setting vcpu->run->exit_reason)
177 */
kvm_handle_guest_debug(struct kvm_vcpu * vcpu)178 static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu)
179 {
180 struct kvm_run *run = vcpu->run;
181 u64 esr = kvm_vcpu_get_esr(vcpu);
182
183 run->exit_reason = KVM_EXIT_DEBUG;
184 run->debug.arch.hsr = lower_32_bits(esr);
185 run->debug.arch.hsr_high = upper_32_bits(esr);
186 run->flags = KVM_DEBUG_ARCH_HSR_HIGH_VALID;
187
188 switch (ESR_ELx_EC(esr)) {
189 case ESR_ELx_EC_WATCHPT_LOW:
190 run->debug.arch.far = vcpu->arch.fault.far_el2;
191 break;
192 case ESR_ELx_EC_SOFTSTP_LOW:
193 vcpu_clear_flag(vcpu, DBG_SS_ACTIVE_PENDING);
194 break;
195 }
196
197 return 0;
198 }
199
kvm_handle_unknown_ec(struct kvm_vcpu * vcpu)200 static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu)
201 {
202 u64 esr = kvm_vcpu_get_esr(vcpu);
203
204 kvm_pr_unimpl("Unknown exception class: esr: %#016llx -- %s\n",
205 esr, esr_get_class_string(esr));
206
207 kvm_inject_undefined(vcpu);
208 return 1;
209 }
210
211 /*
212 * Guest access to SVE registers should be routed to this handler only
213 * when the system doesn't support SVE.
214 */
handle_sve(struct kvm_vcpu * vcpu)215 static int handle_sve(struct kvm_vcpu *vcpu)
216 {
217 kvm_inject_undefined(vcpu);
218 return 1;
219 }
220
221 /*
222 * Guest usage of a ptrauth instruction (which the guest EL1 did not turn into
223 * a NOP). If we get here, it is that we didn't fixup ptrauth on exit, and all
224 * that we can do is give the guest an UNDEF.
225 */
kvm_handle_ptrauth(struct kvm_vcpu * vcpu)226 static int kvm_handle_ptrauth(struct kvm_vcpu *vcpu)
227 {
228 kvm_inject_undefined(vcpu);
229 return 1;
230 }
231
kvm_handle_eret(struct kvm_vcpu * vcpu)232 static int kvm_handle_eret(struct kvm_vcpu *vcpu)
233 {
234 if (kvm_vcpu_get_esr(vcpu) & ESR_ELx_ERET_ISS_ERET)
235 return kvm_handle_ptrauth(vcpu);
236
237 kvm_emulate_nested_eret(vcpu);
238 return 1;
239 }
240
241 static exit_handle_fn arm_exit_handlers[] = {
242 [0 ... ESR_ELx_EC_MAX] = kvm_handle_unknown_ec,
243 [ESR_ELx_EC_WFx] = kvm_handle_wfx,
244 [ESR_ELx_EC_CP15_32] = kvm_handle_cp15_32,
245 [ESR_ELx_EC_CP15_64] = kvm_handle_cp15_64,
246 [ESR_ELx_EC_CP14_MR] = kvm_handle_cp14_32,
247 [ESR_ELx_EC_CP14_LS] = kvm_handle_cp14_load_store,
248 [ESR_ELx_EC_CP10_ID] = kvm_handle_cp10_id,
249 [ESR_ELx_EC_CP14_64] = kvm_handle_cp14_64,
250 [ESR_ELx_EC_HVC32] = handle_hvc,
251 [ESR_ELx_EC_SMC32] = handle_smc,
252 [ESR_ELx_EC_HVC64] = handle_hvc,
253 [ESR_ELx_EC_SMC64] = handle_smc,
254 [ESR_ELx_EC_SYS64] = kvm_handle_sys_reg,
255 [ESR_ELx_EC_SVE] = handle_sve,
256 [ESR_ELx_EC_ERET] = kvm_handle_eret,
257 [ESR_ELx_EC_IABT_LOW] = kvm_handle_guest_abort,
258 [ESR_ELx_EC_DABT_LOW] = kvm_handle_guest_abort,
259 [ESR_ELx_EC_SOFTSTP_LOW]= kvm_handle_guest_debug,
260 [ESR_ELx_EC_WATCHPT_LOW]= kvm_handle_guest_debug,
261 [ESR_ELx_EC_BREAKPT_LOW]= kvm_handle_guest_debug,
262 [ESR_ELx_EC_BKPT32] = kvm_handle_guest_debug,
263 [ESR_ELx_EC_BRK64] = kvm_handle_guest_debug,
264 [ESR_ELx_EC_FP_ASIMD] = handle_no_fpsimd,
265 [ESR_ELx_EC_PAC] = kvm_handle_ptrauth,
266 };
267
kvm_get_exit_handler(struct kvm_vcpu * vcpu)268 static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
269 {
270 u64 esr = kvm_vcpu_get_esr(vcpu);
271 u8 esr_ec = ESR_ELx_EC(esr);
272
273 return arm_exit_handlers[esr_ec];
274 }
275
276 /*
277 * We may be single-stepping an emulated instruction. If the emulation
278 * has been completed in the kernel, we can return to userspace with a
279 * KVM_EXIT_DEBUG, otherwise userspace needs to complete its
280 * emulation first.
281 */
handle_trap_exceptions(struct kvm_vcpu * vcpu)282 static int handle_trap_exceptions(struct kvm_vcpu *vcpu)
283 {
284 int handled;
285
286 /*
287 * See ARM ARM B1.14.1: "Hyp traps on instructions
288 * that fail their condition code check"
289 */
290 if (!kvm_condition_valid(vcpu)) {
291 kvm_incr_pc(vcpu);
292 handled = 1;
293 } else {
294 exit_handle_fn exit_handler;
295
296 exit_handler = kvm_get_exit_handler(vcpu);
297 handled = exit_handler(vcpu);
298 }
299
300 return handled;
301 }
302
303 /*
304 * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on
305 * proper exit to userspace.
306 */
handle_exit(struct kvm_vcpu * vcpu,int exception_index)307 int handle_exit(struct kvm_vcpu *vcpu, int exception_index)
308 {
309 struct kvm_run *run = vcpu->run;
310
311 if (ARM_SERROR_PENDING(exception_index)) {
312 /*
313 * The SError is handled by handle_exit_early(). If the guest
314 * survives it will re-execute the original instruction.
315 */
316 return 1;
317 }
318
319 exception_index = ARM_EXCEPTION_CODE(exception_index);
320
321 switch (exception_index) {
322 case ARM_EXCEPTION_IRQ:
323 return 1;
324 case ARM_EXCEPTION_EL1_SERROR:
325 return 1;
326 case ARM_EXCEPTION_TRAP:
327 return handle_trap_exceptions(vcpu);
328 case ARM_EXCEPTION_HYP_GONE:
329 /*
330 * EL2 has been reset to the hyp-stub. This happens when a guest
331 * is pre-emptied by kvm_reboot()'s shutdown call.
332 */
333 run->exit_reason = KVM_EXIT_FAIL_ENTRY;
334 return 0;
335 case ARM_EXCEPTION_IL:
336 /*
337 * We attempted an illegal exception return. Guest state must
338 * have been corrupted somehow. Give up.
339 */
340 run->exit_reason = KVM_EXIT_FAIL_ENTRY;
341 return -EINVAL;
342 default:
343 kvm_pr_unimpl("Unsupported exception type: %d",
344 exception_index);
345 run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
346 return 0;
347 }
348 }
349
350 /* For exit types that need handling before we can be preempted */
handle_exit_early(struct kvm_vcpu * vcpu,int exception_index)351 void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index)
352 {
353 if (ARM_SERROR_PENDING(exception_index)) {
354 if (this_cpu_has_cap(ARM64_HAS_RAS_EXTN)) {
355 u64 disr = kvm_vcpu_get_disr(vcpu);
356
357 kvm_handle_guest_serror(vcpu, disr_to_esr(disr));
358 } else {
359 kvm_inject_vabt(vcpu);
360 }
361
362 return;
363 }
364
365 exception_index = ARM_EXCEPTION_CODE(exception_index);
366
367 if (exception_index == ARM_EXCEPTION_EL1_SERROR)
368 kvm_handle_guest_serror(vcpu, kvm_vcpu_get_esr(vcpu));
369 }
370
nvhe_hyp_panic_handler(u64 esr,u64 spsr,u64 elr_virt,u64 elr_phys,u64 par,uintptr_t vcpu,u64 far,u64 hpfar)371 void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr,
372 u64 elr_virt, u64 elr_phys,
373 u64 par, uintptr_t vcpu,
374 u64 far, u64 hpfar) {
375 u64 elr_in_kimg = __phys_to_kimg(elr_phys);
376 u64 hyp_offset = elr_in_kimg - kaslr_offset() - elr_virt;
377 u64 mode = spsr & PSR_MODE_MASK;
378 u64 panic_addr = elr_virt + hyp_offset;
379
380 if (mode != PSR_MODE_EL2t && mode != PSR_MODE_EL2h) {
381 kvm_err("Invalid host exception to nVHE hyp!\n");
382 } else if (ESR_ELx_EC(esr) == ESR_ELx_EC_BRK64 &&
383 (esr & ESR_ELx_BRK64_ISS_COMMENT_MASK) == BUG_BRK_IMM) {
384 const char *file = NULL;
385 unsigned int line = 0;
386
387 /* All hyp bugs, including warnings, are treated as fatal. */
388 if (!is_protected_kvm_enabled() ||
389 IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) {
390 struct bug_entry *bug = find_bug(elr_in_kimg);
391
392 if (bug)
393 bug_get_file_line(bug, &file, &line);
394 }
395
396 if (file)
397 kvm_err("nVHE hyp BUG at: %s:%u!\n", file, line);
398 else
399 kvm_err("nVHE hyp BUG at: [<%016llx>] %pB!\n", panic_addr,
400 (void *)(panic_addr + kaslr_offset()));
401 } else {
402 kvm_err("nVHE hyp panic at: [<%016llx>] %pB!\n", panic_addr,
403 (void *)(panic_addr + kaslr_offset()));
404 }
405
406 /* Dump the nVHE hypervisor backtrace */
407 kvm_nvhe_dump_backtrace(hyp_offset);
408
409 /*
410 * Hyp has panicked and we're going to handle that by panicking the
411 * kernel. The kernel offset will be revealed in the panic so we're
412 * also safe to reveal the hyp offset as a debugging aid for translating
413 * hyp VAs to vmlinux addresses.
414 */
415 kvm_err("Hyp Offset: 0x%llx\n", hyp_offset);
416
417 panic("HYP panic:\nPS:%08llx PC:%016llx ESR:%016llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%016lx\n",
418 spsr, elr_virt, esr, far, hpfar, par, vcpu);
419 }
420