1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (C) 2015 Linaro Ltd.
4 * Author: Shannon Zhao <shannon.zhao@linaro.org>
5 */
6
7 #include <linux/cpu.h>
8 #include <linux/kvm.h>
9 #include <linux/kvm_host.h>
10 #include <linux/perf_event.h>
11 #include <linux/perf/arm_pmu.h>
12 #include <linux/uaccess.h>
13 #include <asm/kvm_emulate.h>
14 #include <kvm/arm_pmu.h>
15 #include <kvm/arm_vgic.h>
16
17 static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx);
18 static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx);
19 static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc);
20
21 #define PERF_ATTR_CFG1_KVM_PMU_CHAINED 0x1
22
kvm_pmu_event_mask(struct kvm * kvm)23 static u32 kvm_pmu_event_mask(struct kvm *kvm)
24 {
25 switch (kvm->arch.pmuver) {
26 case ID_AA64DFR0_PMUVER_8_0:
27 return GENMASK(9, 0);
28 case ID_AA64DFR0_PMUVER_8_1:
29 case ID_AA64DFR0_PMUVER_8_4:
30 case ID_AA64DFR0_PMUVER_8_5:
31 return GENMASK(15, 0);
32 default: /* Shouldn't be here, just for sanity */
33 WARN_ONCE(1, "Unknown PMU version %d\n", kvm->arch.pmuver);
34 return 0;
35 }
36 }
37
38 /**
39 * kvm_pmu_idx_is_64bit - determine if select_idx is a 64bit counter
40 * @vcpu: The vcpu pointer
41 * @select_idx: The counter index
42 */
kvm_pmu_idx_is_64bit(struct kvm_vcpu * vcpu,u64 select_idx)43 static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 select_idx)
44 {
45 return (select_idx == ARMV8_PMU_CYCLE_IDX &&
46 __vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_LC);
47 }
48
kvm_pmc_to_vcpu(struct kvm_pmc * pmc)49 static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc)
50 {
51 struct kvm_pmu *pmu;
52 struct kvm_vcpu_arch *vcpu_arch;
53
54 pmc -= pmc->idx;
55 pmu = container_of(pmc, struct kvm_pmu, pmc[0]);
56 vcpu_arch = container_of(pmu, struct kvm_vcpu_arch, pmu);
57 return container_of(vcpu_arch, struct kvm_vcpu, arch);
58 }
59
60 /**
61 * kvm_pmu_pmc_is_chained - determine if the pmc is chained
62 * @pmc: The PMU counter pointer
63 */
kvm_pmu_pmc_is_chained(struct kvm_pmc * pmc)64 static bool kvm_pmu_pmc_is_chained(struct kvm_pmc *pmc)
65 {
66 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
67
68 return test_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
69 }
70
71 /**
72 * kvm_pmu_idx_is_high_counter - determine if select_idx is a high/low counter
73 * @select_idx: The counter index
74 */
kvm_pmu_idx_is_high_counter(u64 select_idx)75 static bool kvm_pmu_idx_is_high_counter(u64 select_idx)
76 {
77 return select_idx & 0x1;
78 }
79
80 /**
81 * kvm_pmu_get_canonical_pmc - obtain the canonical pmc
82 * @pmc: The PMU counter pointer
83 *
84 * When a pair of PMCs are chained together we use the low counter (canonical)
85 * to hold the underlying perf event.
86 */
kvm_pmu_get_canonical_pmc(struct kvm_pmc * pmc)87 static struct kvm_pmc *kvm_pmu_get_canonical_pmc(struct kvm_pmc *pmc)
88 {
89 if (kvm_pmu_pmc_is_chained(pmc) &&
90 kvm_pmu_idx_is_high_counter(pmc->idx))
91 return pmc - 1;
92
93 return pmc;
94 }
kvm_pmu_get_alternate_pmc(struct kvm_pmc * pmc)95 static struct kvm_pmc *kvm_pmu_get_alternate_pmc(struct kvm_pmc *pmc)
96 {
97 if (kvm_pmu_idx_is_high_counter(pmc->idx))
98 return pmc - 1;
99 else
100 return pmc + 1;
101 }
102
103 /**
104 * kvm_pmu_idx_has_chain_evtype - determine if the event type is chain
105 * @vcpu: The vcpu pointer
106 * @select_idx: The counter index
107 */
kvm_pmu_idx_has_chain_evtype(struct kvm_vcpu * vcpu,u64 select_idx)108 static bool kvm_pmu_idx_has_chain_evtype(struct kvm_vcpu *vcpu, u64 select_idx)
109 {
110 u64 eventsel, reg;
111
112 select_idx |= 0x1;
113
114 if (select_idx == ARMV8_PMU_CYCLE_IDX)
115 return false;
116
117 reg = PMEVTYPER0_EL0 + select_idx;
118 eventsel = __vcpu_sys_reg(vcpu, reg) & kvm_pmu_event_mask(vcpu->kvm);
119
120 return eventsel == ARMV8_PMUV3_PERFCTR_CHAIN;
121 }
122
123 /**
124 * kvm_pmu_get_pair_counter_value - get PMU counter value
125 * @vcpu: The vcpu pointer
126 * @pmc: The PMU counter pointer
127 */
kvm_pmu_get_pair_counter_value(struct kvm_vcpu * vcpu,struct kvm_pmc * pmc)128 static u64 kvm_pmu_get_pair_counter_value(struct kvm_vcpu *vcpu,
129 struct kvm_pmc *pmc)
130 {
131 u64 counter, counter_high, reg, enabled, running;
132
133 if (kvm_pmu_pmc_is_chained(pmc)) {
134 pmc = kvm_pmu_get_canonical_pmc(pmc);
135 reg = PMEVCNTR0_EL0 + pmc->idx;
136
137 counter = __vcpu_sys_reg(vcpu, reg);
138 counter_high = __vcpu_sys_reg(vcpu, reg + 1);
139
140 counter = lower_32_bits(counter) | (counter_high << 32);
141 } else {
142 reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
143 ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx;
144 counter = __vcpu_sys_reg(vcpu, reg);
145 }
146
147 /*
148 * The real counter value is equal to the value of counter register plus
149 * the value perf event counts.
150 */
151 if (pmc->perf_event)
152 counter += perf_event_read_value(pmc->perf_event, &enabled,
153 &running);
154
155 return counter;
156 }
157
158 /**
159 * kvm_pmu_get_counter_value - get PMU counter value
160 * @vcpu: The vcpu pointer
161 * @select_idx: The counter index
162 */
kvm_pmu_get_counter_value(struct kvm_vcpu * vcpu,u64 select_idx)163 u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx)
164 {
165 u64 counter;
166 struct kvm_pmu *pmu = &vcpu->arch.pmu;
167 struct kvm_pmc *pmc = &pmu->pmc[select_idx];
168
169 counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
170
171 if (kvm_pmu_pmc_is_chained(pmc) &&
172 kvm_pmu_idx_is_high_counter(select_idx))
173 counter = upper_32_bits(counter);
174 else if (select_idx != ARMV8_PMU_CYCLE_IDX)
175 counter = lower_32_bits(counter);
176
177 return counter;
178 }
179
180 /**
181 * kvm_pmu_set_counter_value - set PMU counter value
182 * @vcpu: The vcpu pointer
183 * @select_idx: The counter index
184 * @val: The counter value
185 */
kvm_pmu_set_counter_value(struct kvm_vcpu * vcpu,u64 select_idx,u64 val)186 void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val)
187 {
188 u64 reg;
189
190 reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
191 ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx;
192 __vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx);
193
194 /* Recreate the perf event to reflect the updated sample_period */
195 kvm_pmu_create_perf_event(vcpu, select_idx);
196 }
197
198 /**
199 * kvm_pmu_release_perf_event - remove the perf event
200 * @pmc: The PMU counter pointer
201 */
kvm_pmu_release_perf_event(struct kvm_pmc * pmc)202 static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc)
203 {
204 pmc = kvm_pmu_get_canonical_pmc(pmc);
205 if (pmc->perf_event) {
206 perf_event_disable(pmc->perf_event);
207 perf_event_release_kernel(pmc->perf_event);
208 pmc->perf_event = NULL;
209 }
210 }
211
212 /**
213 * kvm_pmu_stop_counter - stop PMU counter
214 * @pmc: The PMU counter pointer
215 *
216 * If this counter has been configured to monitor some event, release it here.
217 */
kvm_pmu_stop_counter(struct kvm_vcpu * vcpu,struct kvm_pmc * pmc)218 static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc)
219 {
220 u64 counter, reg, val;
221
222 pmc = kvm_pmu_get_canonical_pmc(pmc);
223 if (!pmc->perf_event)
224 return;
225
226 counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
227
228 if (pmc->idx == ARMV8_PMU_CYCLE_IDX) {
229 reg = PMCCNTR_EL0;
230 val = counter;
231 } else {
232 reg = PMEVCNTR0_EL0 + pmc->idx;
233 val = lower_32_bits(counter);
234 }
235
236 __vcpu_sys_reg(vcpu, reg) = val;
237
238 if (kvm_pmu_pmc_is_chained(pmc))
239 __vcpu_sys_reg(vcpu, reg + 1) = upper_32_bits(counter);
240
241 kvm_pmu_release_perf_event(pmc);
242 }
243
244 /**
245 * kvm_pmu_vcpu_init - assign pmu counter idx for cpu
246 * @vcpu: The vcpu pointer
247 *
248 */
kvm_pmu_vcpu_init(struct kvm_vcpu * vcpu)249 void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu)
250 {
251 int i;
252 struct kvm_pmu *pmu = &vcpu->arch.pmu;
253
254 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++)
255 pmu->pmc[i].idx = i;
256 }
257
258 /**
259 * kvm_pmu_vcpu_reset - reset pmu state for cpu
260 * @vcpu: The vcpu pointer
261 *
262 */
kvm_pmu_vcpu_reset(struct kvm_vcpu * vcpu)263 void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu)
264 {
265 unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
266 struct kvm_pmu *pmu = &vcpu->arch.pmu;
267 int i;
268
269 for_each_set_bit(i, &mask, 32)
270 kvm_pmu_stop_counter(vcpu, &pmu->pmc[i]);
271
272 bitmap_zero(vcpu->arch.pmu.chained, ARMV8_PMU_MAX_COUNTER_PAIRS);
273 }
274
275 /**
276 * kvm_pmu_vcpu_destroy - free perf event of PMU for cpu
277 * @vcpu: The vcpu pointer
278 *
279 */
kvm_pmu_vcpu_destroy(struct kvm_vcpu * vcpu)280 void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu)
281 {
282 int i;
283 struct kvm_pmu *pmu = &vcpu->arch.pmu;
284
285 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++)
286 kvm_pmu_release_perf_event(&pmu->pmc[i]);
287 irq_work_sync(&vcpu->arch.pmu.overflow_work);
288 }
289
kvm_pmu_valid_counter_mask(struct kvm_vcpu * vcpu)290 u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu)
291 {
292 u64 val = __vcpu_sys_reg(vcpu, PMCR_EL0) >> ARMV8_PMU_PMCR_N_SHIFT;
293
294 val &= ARMV8_PMU_PMCR_N_MASK;
295 if (val == 0)
296 return BIT(ARMV8_PMU_CYCLE_IDX);
297 else
298 return GENMASK(val - 1, 0) | BIT(ARMV8_PMU_CYCLE_IDX);
299 }
300
301 /**
302 * kvm_pmu_enable_counter_mask - enable selected PMU counters
303 * @vcpu: The vcpu pointer
304 * @val: the value guest writes to PMCNTENSET register
305 *
306 * Call perf_event_enable to start counting the perf event
307 */
kvm_pmu_enable_counter_mask(struct kvm_vcpu * vcpu,u64 val)308 void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
309 {
310 int i;
311 struct kvm_pmu *pmu = &vcpu->arch.pmu;
312 struct kvm_pmc *pmc;
313
314 if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val)
315 return;
316
317 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
318 if (!(val & BIT(i)))
319 continue;
320
321 pmc = &pmu->pmc[i];
322
323 /* A change in the enable state may affect the chain state */
324 kvm_pmu_update_pmc_chained(vcpu, i);
325 kvm_pmu_create_perf_event(vcpu, i);
326
327 /* At this point, pmc must be the canonical */
328 if (pmc->perf_event) {
329 perf_event_enable(pmc->perf_event);
330 if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE)
331 kvm_debug("fail to enable perf event\n");
332 }
333 }
334 }
335
336 /**
337 * kvm_pmu_disable_counter_mask - disable selected PMU counters
338 * @vcpu: The vcpu pointer
339 * @val: the value guest writes to PMCNTENCLR register
340 *
341 * Call perf_event_disable to stop counting the perf event
342 */
kvm_pmu_disable_counter_mask(struct kvm_vcpu * vcpu,u64 val)343 void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
344 {
345 int i;
346 struct kvm_pmu *pmu = &vcpu->arch.pmu;
347 struct kvm_pmc *pmc;
348
349 if (!val)
350 return;
351
352 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
353 if (!(val & BIT(i)))
354 continue;
355
356 pmc = &pmu->pmc[i];
357
358 /* A change in the enable state may affect the chain state */
359 kvm_pmu_update_pmc_chained(vcpu, i);
360 kvm_pmu_create_perf_event(vcpu, i);
361
362 /* At this point, pmc must be the canonical */
363 if (pmc->perf_event)
364 perf_event_disable(pmc->perf_event);
365 }
366 }
367
kvm_pmu_overflow_status(struct kvm_vcpu * vcpu)368 static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu)
369 {
370 u64 reg = 0;
371
372 if ((__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) {
373 reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0);
374 reg &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
375 reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1);
376 }
377
378 return reg;
379 }
380
kvm_pmu_update_state(struct kvm_vcpu * vcpu)381 static void kvm_pmu_update_state(struct kvm_vcpu *vcpu)
382 {
383 struct kvm_pmu *pmu = &vcpu->arch.pmu;
384 bool overflow;
385
386 if (!kvm_vcpu_has_pmu(vcpu))
387 return;
388
389 overflow = !!kvm_pmu_overflow_status(vcpu);
390 if (pmu->irq_level == overflow)
391 return;
392
393 pmu->irq_level = overflow;
394
395 if (likely(irqchip_in_kernel(vcpu->kvm))) {
396 int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
397 pmu->irq_num, overflow, pmu);
398 WARN_ON(ret);
399 }
400 }
401
kvm_pmu_should_notify_user(struct kvm_vcpu * vcpu)402 bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu)
403 {
404 struct kvm_pmu *pmu = &vcpu->arch.pmu;
405 struct kvm_sync_regs *sregs = &vcpu->run->s.regs;
406 bool run_level = sregs->device_irq_level & KVM_ARM_DEV_PMU;
407
408 if (likely(irqchip_in_kernel(vcpu->kvm)))
409 return false;
410
411 return pmu->irq_level != run_level;
412 }
413
414 /*
415 * Reflect the PMU overflow interrupt output level into the kvm_run structure
416 */
kvm_pmu_update_run(struct kvm_vcpu * vcpu)417 void kvm_pmu_update_run(struct kvm_vcpu *vcpu)
418 {
419 struct kvm_sync_regs *regs = &vcpu->run->s.regs;
420
421 /* Populate the timer bitmap for user space */
422 regs->device_irq_level &= ~KVM_ARM_DEV_PMU;
423 if (vcpu->arch.pmu.irq_level)
424 regs->device_irq_level |= KVM_ARM_DEV_PMU;
425 }
426
427 /**
428 * kvm_pmu_flush_hwstate - flush pmu state to cpu
429 * @vcpu: The vcpu pointer
430 *
431 * Check if the PMU has overflowed while we were running in the host, and inject
432 * an interrupt if that was the case.
433 */
kvm_pmu_flush_hwstate(struct kvm_vcpu * vcpu)434 void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu)
435 {
436 kvm_pmu_update_state(vcpu);
437 }
438
439 /**
440 * kvm_pmu_sync_hwstate - sync pmu state from cpu
441 * @vcpu: The vcpu pointer
442 *
443 * Check if the PMU has overflowed while we were running in the guest, and
444 * inject an interrupt if that was the case.
445 */
kvm_pmu_sync_hwstate(struct kvm_vcpu * vcpu)446 void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu)
447 {
448 kvm_pmu_update_state(vcpu);
449 }
450
451 /**
452 * When perf interrupt is an NMI, we cannot safely notify the vcpu corresponding
453 * to the event.
454 * This is why we need a callback to do it once outside of the NMI context.
455 */
kvm_pmu_perf_overflow_notify_vcpu(struct irq_work * work)456 static void kvm_pmu_perf_overflow_notify_vcpu(struct irq_work *work)
457 {
458 struct kvm_vcpu *vcpu;
459 struct kvm_pmu *pmu;
460
461 pmu = container_of(work, struct kvm_pmu, overflow_work);
462 vcpu = kvm_pmc_to_vcpu(pmu->pmc);
463
464 kvm_vcpu_kick(vcpu);
465 }
466
467 /**
468 * When the perf event overflows, set the overflow status and inform the vcpu.
469 */
kvm_pmu_perf_overflow(struct perf_event * perf_event,struct perf_sample_data * data,struct pt_regs * regs)470 static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
471 struct perf_sample_data *data,
472 struct pt_regs *regs)
473 {
474 struct kvm_pmc *pmc = perf_event->overflow_handler_context;
475 struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu);
476 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
477 int idx = pmc->idx;
478 u64 period;
479
480 cpu_pmu->pmu.stop(perf_event, PERF_EF_UPDATE);
481
482 /*
483 * Reset the sample period to the architectural limit,
484 * i.e. the point where the counter overflows.
485 */
486 period = -(local64_read(&perf_event->count));
487
488 if (!kvm_pmu_idx_is_64bit(vcpu, pmc->idx))
489 period &= GENMASK(31, 0);
490
491 local64_set(&perf_event->hw.period_left, 0);
492 perf_event->attr.sample_period = period;
493 perf_event->hw.sample_period = period;
494
495 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx);
496
497 if (kvm_pmu_overflow_status(vcpu)) {
498 kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
499
500 if (!in_nmi())
501 kvm_vcpu_kick(vcpu);
502 else
503 irq_work_queue(&vcpu->arch.pmu.overflow_work);
504 }
505
506 cpu_pmu->pmu.start(perf_event, PERF_EF_RELOAD);
507 }
508
509 /**
510 * kvm_pmu_software_increment - do software increment
511 * @vcpu: The vcpu pointer
512 * @val: the value guest writes to PMSWINC register
513 */
kvm_pmu_software_increment(struct kvm_vcpu * vcpu,u64 val)514 void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)
515 {
516 struct kvm_pmu *pmu = &vcpu->arch.pmu;
517 int i;
518
519 if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E))
520 return;
521
522 /* Weed out disabled counters */
523 val &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
524
525 for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) {
526 u64 type, reg;
527
528 if (!(val & BIT(i)))
529 continue;
530
531 /* PMSWINC only applies to ... SW_INC! */
532 type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i);
533 type &= kvm_pmu_event_mask(vcpu->kvm);
534 if (type != ARMV8_PMUV3_PERFCTR_SW_INCR)
535 continue;
536
537 /* increment this even SW_INC counter */
538 reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1;
539 reg = lower_32_bits(reg);
540 __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg;
541
542 if (reg) /* no overflow on the low part */
543 continue;
544
545 if (kvm_pmu_pmc_is_chained(&pmu->pmc[i])) {
546 /* increment the high counter */
547 reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) + 1;
548 reg = lower_32_bits(reg);
549 __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) = reg;
550 if (!reg) /* mark overflow on the high counter */
551 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i + 1);
552 } else {
553 /* mark overflow on low counter */
554 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i);
555 }
556 }
557 }
558
559 /**
560 * kvm_pmu_handle_pmcr - handle PMCR register
561 * @vcpu: The vcpu pointer
562 * @val: the value guest writes to PMCR register
563 */
kvm_pmu_handle_pmcr(struct kvm_vcpu * vcpu,u64 val)564 void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
565 {
566 int i;
567
568 if (val & ARMV8_PMU_PMCR_E) {
569 kvm_pmu_enable_counter_mask(vcpu,
570 __vcpu_sys_reg(vcpu, PMCNTENSET_EL0));
571 } else {
572 kvm_pmu_disable_counter_mask(vcpu,
573 __vcpu_sys_reg(vcpu, PMCNTENSET_EL0));
574 }
575
576 if (val & ARMV8_PMU_PMCR_C)
577 kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0);
578
579 if (val & ARMV8_PMU_PMCR_P) {
580 unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
581 mask &= ~BIT(ARMV8_PMU_CYCLE_IDX);
582 for_each_set_bit(i, &mask, 32)
583 kvm_pmu_set_counter_value(vcpu, i, 0);
584 }
585 }
586
kvm_pmu_counter_is_enabled(struct kvm_vcpu * vcpu,u64 select_idx)587 static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx)
588 {
589 return (__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) &&
590 (__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(select_idx));
591 }
592
593 /**
594 * kvm_pmu_create_perf_event - create a perf event for a counter
595 * @vcpu: The vcpu pointer
596 * @select_idx: The number of selected counter
597 */
kvm_pmu_create_perf_event(struct kvm_vcpu * vcpu,u64 select_idx)598 static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
599 {
600 struct kvm_pmu *pmu = &vcpu->arch.pmu;
601 struct kvm_pmc *pmc;
602 struct perf_event *event;
603 struct perf_event_attr attr;
604 u64 eventsel, counter, reg, data;
605
606 /*
607 * For chained counters the event type and filtering attributes are
608 * obtained from the low/even counter. We also use this counter to
609 * determine if the event is enabled/disabled.
610 */
611 pmc = kvm_pmu_get_canonical_pmc(&pmu->pmc[select_idx]);
612
613 reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
614 ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + pmc->idx;
615 data = __vcpu_sys_reg(vcpu, reg);
616
617 kvm_pmu_stop_counter(vcpu, pmc);
618 if (pmc->idx == ARMV8_PMU_CYCLE_IDX)
619 eventsel = ARMV8_PMUV3_PERFCTR_CPU_CYCLES;
620 else
621 eventsel = data & kvm_pmu_event_mask(vcpu->kvm);
622
623 /* Software increment event doesn't need to be backed by a perf event */
624 if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR)
625 return;
626
627 /*
628 * If we have a filter in place and that the event isn't allowed, do
629 * not install a perf event either.
630 */
631 if (vcpu->kvm->arch.pmu_filter &&
632 !test_bit(eventsel, vcpu->kvm->arch.pmu_filter))
633 return;
634
635 memset(&attr, 0, sizeof(struct perf_event_attr));
636 attr.type = PERF_TYPE_RAW;
637 attr.size = sizeof(attr);
638 attr.pinned = 1;
639 attr.disabled = !kvm_pmu_counter_is_enabled(vcpu, pmc->idx);
640 attr.exclude_user = data & ARMV8_PMU_EXCLUDE_EL0 ? 1 : 0;
641 attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0;
642 attr.exclude_hv = 1; /* Don't count EL2 events */
643 attr.exclude_host = 1; /* Don't count host events */
644 attr.config = eventsel;
645
646 counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
647
648 if (kvm_pmu_pmc_is_chained(pmc)) {
649 /**
650 * The initial sample period (overflow count) of an event. For
651 * chained counters we only support overflow interrupts on the
652 * high counter.
653 */
654 attr.sample_period = (-counter) & GENMASK(63, 0);
655 attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED;
656
657 event = perf_event_create_kernel_counter(&attr, -1, current,
658 kvm_pmu_perf_overflow,
659 pmc + 1);
660 } else {
661 /* The initial sample period (overflow count) of an event. */
662 if (kvm_pmu_idx_is_64bit(vcpu, pmc->idx))
663 attr.sample_period = (-counter) & GENMASK(63, 0);
664 else
665 attr.sample_period = (-counter) & GENMASK(31, 0);
666
667 event = perf_event_create_kernel_counter(&attr, -1, current,
668 kvm_pmu_perf_overflow, pmc);
669 }
670
671 if (IS_ERR(event)) {
672 pr_err_once("kvm: pmu event creation failed %ld\n",
673 PTR_ERR(event));
674 return;
675 }
676
677 pmc->perf_event = event;
678 }
679
680 /**
681 * kvm_pmu_update_pmc_chained - update chained bitmap
682 * @vcpu: The vcpu pointer
683 * @select_idx: The number of selected counter
684 *
685 * Update the chained bitmap based on the event type written in the
686 * typer register and the enable state of the odd register.
687 */
kvm_pmu_update_pmc_chained(struct kvm_vcpu * vcpu,u64 select_idx)688 static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx)
689 {
690 struct kvm_pmu *pmu = &vcpu->arch.pmu;
691 struct kvm_pmc *pmc = &pmu->pmc[select_idx], *canonical_pmc;
692 bool new_state, old_state;
693
694 old_state = kvm_pmu_pmc_is_chained(pmc);
695 new_state = kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx) &&
696 kvm_pmu_counter_is_enabled(vcpu, pmc->idx | 0x1);
697
698 if (old_state == new_state)
699 return;
700
701 canonical_pmc = kvm_pmu_get_canonical_pmc(pmc);
702 kvm_pmu_stop_counter(vcpu, canonical_pmc);
703 if (new_state) {
704 /*
705 * During promotion from !chained to chained we must ensure
706 * the adjacent counter is stopped and its event destroyed
707 */
708 kvm_pmu_stop_counter(vcpu, kvm_pmu_get_alternate_pmc(pmc));
709 set_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
710 return;
711 }
712 clear_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
713 }
714
715 /**
716 * kvm_pmu_set_counter_event_type - set selected counter to monitor some event
717 * @vcpu: The vcpu pointer
718 * @data: The data guest writes to PMXEVTYPER_EL0
719 * @select_idx: The number of selected counter
720 *
721 * When OS accesses PMXEVTYPER_EL0, that means it wants to set a PMC to count an
722 * event with given hardware event number. Here we call perf_event API to
723 * emulate this action and create a kernel perf event for it.
724 */
kvm_pmu_set_counter_event_type(struct kvm_vcpu * vcpu,u64 data,u64 select_idx)725 void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
726 u64 select_idx)
727 {
728 u64 reg, mask;
729
730 mask = ARMV8_PMU_EVTYPE_MASK;
731 mask &= ~ARMV8_PMU_EVTYPE_EVENT;
732 mask |= kvm_pmu_event_mask(vcpu->kvm);
733
734 reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
735 ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + select_idx;
736
737 __vcpu_sys_reg(vcpu, reg) = data & mask;
738
739 kvm_pmu_update_pmc_chained(vcpu, select_idx);
740 kvm_pmu_create_perf_event(vcpu, select_idx);
741 }
742
kvm_host_pmu_init(struct arm_pmu * pmu)743 void kvm_host_pmu_init(struct arm_pmu *pmu)
744 {
745 if (pmu->pmuver != 0 && pmu->pmuver != ID_AA64DFR0_PMUVER_IMP_DEF &&
746 !kvm_arm_support_pmu_v3() && !is_protected_kvm_enabled())
747 static_branch_enable(&kvm_arm_pmu_available);
748 }
749
kvm_pmu_probe_pmuver(void)750 static int kvm_pmu_probe_pmuver(void)
751 {
752 struct perf_event_attr attr = { };
753 struct perf_event *event;
754 struct arm_pmu *pmu;
755 int pmuver = ID_AA64DFR0_PMUVER_IMP_DEF;
756
757 /*
758 * Create a dummy event that only counts user cycles. As we'll never
759 * leave this function with the event being live, it will never
760 * count anything. But it allows us to probe some of the PMU
761 * details. Yes, this is terrible.
762 */
763 attr.type = PERF_TYPE_RAW;
764 attr.size = sizeof(attr);
765 attr.pinned = 1;
766 attr.disabled = 0;
767 attr.exclude_user = 0;
768 attr.exclude_kernel = 1;
769 attr.exclude_hv = 1;
770 attr.exclude_host = 1;
771 attr.config = ARMV8_PMUV3_PERFCTR_CPU_CYCLES;
772 attr.sample_period = GENMASK(63, 0);
773
774 event = perf_event_create_kernel_counter(&attr, -1, current,
775 kvm_pmu_perf_overflow, &attr);
776
777 if (IS_ERR(event)) {
778 pr_err_once("kvm: pmu event creation failed %ld\n",
779 PTR_ERR(event));
780 return ID_AA64DFR0_PMUVER_IMP_DEF;
781 }
782
783 if (event->pmu) {
784 pmu = to_arm_pmu(event->pmu);
785 if (pmu->pmuver)
786 pmuver = pmu->pmuver;
787 }
788
789 perf_event_disable(event);
790 perf_event_release_kernel(event);
791
792 return pmuver;
793 }
794
kvm_pmu_get_pmceid(struct kvm_vcpu * vcpu,bool pmceid1)795 u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
796 {
797 unsigned long *bmap = vcpu->kvm->arch.pmu_filter;
798 u64 val, mask = 0;
799 int base, i, nr_events;
800
801 if (!pmceid1) {
802 val = read_sysreg(pmceid0_el0);
803 base = 0;
804 } else {
805 val = read_sysreg(pmceid1_el0);
806 /*
807 * Don't advertise STALL_SLOT, as PMMIR_EL0 is handled
808 * as RAZ
809 */
810 if (vcpu->kvm->arch.pmuver >= ID_AA64DFR0_PMUVER_8_4)
811 val &= ~BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32);
812 base = 32;
813 }
814
815 if (!bmap)
816 return val;
817
818 nr_events = kvm_pmu_event_mask(vcpu->kvm) + 1;
819
820 for (i = 0; i < 32; i += 8) {
821 u64 byte;
822
823 byte = bitmap_get_value8(bmap, base + i);
824 mask |= byte << i;
825 if (nr_events >= (0x4000 + base + 32)) {
826 byte = bitmap_get_value8(bmap, 0x4000 + base + i);
827 mask |= byte << (32 + i);
828 }
829 }
830
831 return val & mask;
832 }
833
kvm_arm_pmu_v3_enable(struct kvm_vcpu * vcpu)834 int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu)
835 {
836 if (!kvm_vcpu_has_pmu(vcpu))
837 return 0;
838
839 if (!vcpu->arch.pmu.created)
840 return -EINVAL;
841
842 /*
843 * A valid interrupt configuration for the PMU is either to have a
844 * properly configured interrupt number and using an in-kernel
845 * irqchip, or to not have an in-kernel GIC and not set an IRQ.
846 */
847 if (irqchip_in_kernel(vcpu->kvm)) {
848 int irq = vcpu->arch.pmu.irq_num;
849 /*
850 * If we are using an in-kernel vgic, at this point we know
851 * the vgic will be initialized, so we can check the PMU irq
852 * number against the dimensions of the vgic and make sure
853 * it's valid.
854 */
855 if (!irq_is_ppi(irq) && !vgic_valid_spi(vcpu->kvm, irq))
856 return -EINVAL;
857 } else if (kvm_arm_pmu_irq_initialized(vcpu)) {
858 return -EINVAL;
859 }
860
861 /* One-off reload of the PMU on first run */
862 kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu);
863
864 return 0;
865 }
866
kvm_arm_pmu_v3_init(struct kvm_vcpu * vcpu)867 static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu)
868 {
869 if (irqchip_in_kernel(vcpu->kvm)) {
870 int ret;
871
872 /*
873 * If using the PMU with an in-kernel virtual GIC
874 * implementation, we require the GIC to be already
875 * initialized when initializing the PMU.
876 */
877 if (!vgic_initialized(vcpu->kvm))
878 return -ENODEV;
879
880 if (!kvm_arm_pmu_irq_initialized(vcpu))
881 return -ENXIO;
882
883 ret = kvm_vgic_set_owner(vcpu, vcpu->arch.pmu.irq_num,
884 &vcpu->arch.pmu);
885 if (ret)
886 return ret;
887 }
888
889 init_irq_work(&vcpu->arch.pmu.overflow_work,
890 kvm_pmu_perf_overflow_notify_vcpu);
891
892 vcpu->arch.pmu.created = true;
893 return 0;
894 }
895
896 /*
897 * For one VM the interrupt type must be same for each vcpu.
898 * As a PPI, the interrupt number is the same for all vcpus,
899 * while as an SPI it must be a separate number per vcpu.
900 */
pmu_irq_is_valid(struct kvm * kvm,int irq)901 static bool pmu_irq_is_valid(struct kvm *kvm, int irq)
902 {
903 int i;
904 struct kvm_vcpu *vcpu;
905
906 kvm_for_each_vcpu(i, vcpu, kvm) {
907 if (!kvm_arm_pmu_irq_initialized(vcpu))
908 continue;
909
910 if (irq_is_ppi(irq)) {
911 if (vcpu->arch.pmu.irq_num != irq)
912 return false;
913 } else {
914 if (vcpu->arch.pmu.irq_num == irq)
915 return false;
916 }
917 }
918
919 return true;
920 }
921
kvm_arm_pmu_v3_set_attr(struct kvm_vcpu * vcpu,struct kvm_device_attr * attr)922 int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
923 {
924 if (!kvm_vcpu_has_pmu(vcpu))
925 return -ENODEV;
926
927 if (vcpu->arch.pmu.created)
928 return -EBUSY;
929
930 if (!vcpu->kvm->arch.pmuver)
931 vcpu->kvm->arch.pmuver = kvm_pmu_probe_pmuver();
932
933 if (vcpu->kvm->arch.pmuver == ID_AA64DFR0_PMUVER_IMP_DEF)
934 return -ENODEV;
935
936 switch (attr->attr) {
937 case KVM_ARM_VCPU_PMU_V3_IRQ: {
938 int __user *uaddr = (int __user *)(long)attr->addr;
939 int irq;
940
941 if (!irqchip_in_kernel(vcpu->kvm))
942 return -EINVAL;
943
944 if (get_user(irq, uaddr))
945 return -EFAULT;
946
947 /* The PMU overflow interrupt can be a PPI or a valid SPI. */
948 if (!(irq_is_ppi(irq) || irq_is_spi(irq)))
949 return -EINVAL;
950
951 if (!pmu_irq_is_valid(vcpu->kvm, irq))
952 return -EINVAL;
953
954 if (kvm_arm_pmu_irq_initialized(vcpu))
955 return -EBUSY;
956
957 kvm_debug("Set kvm ARM PMU irq: %d\n", irq);
958 vcpu->arch.pmu.irq_num = irq;
959 return 0;
960 }
961 case KVM_ARM_VCPU_PMU_V3_FILTER: {
962 struct kvm_pmu_event_filter __user *uaddr;
963 struct kvm_pmu_event_filter filter;
964 int nr_events;
965
966 nr_events = kvm_pmu_event_mask(vcpu->kvm) + 1;
967
968 uaddr = (struct kvm_pmu_event_filter __user *)(long)attr->addr;
969
970 if (copy_from_user(&filter, uaddr, sizeof(filter)))
971 return -EFAULT;
972
973 if (((u32)filter.base_event + filter.nevents) > nr_events ||
974 (filter.action != KVM_PMU_EVENT_ALLOW &&
975 filter.action != KVM_PMU_EVENT_DENY))
976 return -EINVAL;
977
978 mutex_lock(&vcpu->kvm->lock);
979
980 if (!vcpu->kvm->arch.pmu_filter) {
981 vcpu->kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL_ACCOUNT);
982 if (!vcpu->kvm->arch.pmu_filter) {
983 mutex_unlock(&vcpu->kvm->lock);
984 return -ENOMEM;
985 }
986
987 /*
988 * The default depends on the first applied filter.
989 * If it allows events, the default is to deny.
990 * Conversely, if the first filter denies a set of
991 * events, the default is to allow.
992 */
993 if (filter.action == KVM_PMU_EVENT_ALLOW)
994 bitmap_zero(vcpu->kvm->arch.pmu_filter, nr_events);
995 else
996 bitmap_fill(vcpu->kvm->arch.pmu_filter, nr_events);
997 }
998
999 if (filter.action == KVM_PMU_EVENT_ALLOW)
1000 bitmap_set(vcpu->kvm->arch.pmu_filter, filter.base_event, filter.nevents);
1001 else
1002 bitmap_clear(vcpu->kvm->arch.pmu_filter, filter.base_event, filter.nevents);
1003
1004 mutex_unlock(&vcpu->kvm->lock);
1005
1006 return 0;
1007 }
1008 case KVM_ARM_VCPU_PMU_V3_INIT:
1009 return kvm_arm_pmu_v3_init(vcpu);
1010 }
1011
1012 return -ENXIO;
1013 }
1014
kvm_arm_pmu_v3_get_attr(struct kvm_vcpu * vcpu,struct kvm_device_attr * attr)1015 int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
1016 {
1017 switch (attr->attr) {
1018 case KVM_ARM_VCPU_PMU_V3_IRQ: {
1019 int __user *uaddr = (int __user *)(long)attr->addr;
1020 int irq;
1021
1022 if (!irqchip_in_kernel(vcpu->kvm))
1023 return -EINVAL;
1024
1025 if (!kvm_vcpu_has_pmu(vcpu))
1026 return -ENODEV;
1027
1028 if (!kvm_arm_pmu_irq_initialized(vcpu))
1029 return -ENXIO;
1030
1031 irq = vcpu->arch.pmu.irq_num;
1032 return put_user(irq, uaddr);
1033 }
1034 }
1035
1036 return -ENXIO;
1037 }
1038
kvm_arm_pmu_v3_has_attr(struct kvm_vcpu * vcpu,struct kvm_device_attr * attr)1039 int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
1040 {
1041 switch (attr->attr) {
1042 case KVM_ARM_VCPU_PMU_V3_IRQ:
1043 case KVM_ARM_VCPU_PMU_V3_INIT:
1044 case KVM_ARM_VCPU_PMU_V3_FILTER:
1045 if (kvm_vcpu_has_pmu(vcpu))
1046 return 0;
1047 }
1048
1049 return -ENXIO;
1050 }
1051