1 /*
2 * vpmu.c: PMU virtualization for HVM domain.
3 *
4 * Copyright (c) 2007, Intel Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; If not, see <http://www.gnu.org/licenses/>.
17 *
18 * Author: Haitao Shan <haitao.shan@intel.com>
19 */
20 #include <xen/sched.h>
21 #include <xen/xenoprof.h>
22 #include <xen/event.h>
23 #include <xen/guest_access.h>
24 #include <xen/cpu.h>
25 #include <xen/param.h>
26 #include <asm/regs.h>
27 #include <asm/types.h>
28 #include <asm/msr.h>
29 #include <asm/nmi.h>
30 #include <asm/p2m.h>
31 #include <asm/vpmu.h>
32 #include <asm/hvm/support.h>
33 #include <asm/hvm/vmx/vmx.h>
34 #include <asm/hvm/vmx/vmcs.h>
35 #include <asm/hvm/svm/svm.h>
36 #include <asm/hvm/svm/vmcb.h>
37 #include <asm/apic.h>
38 #include <irq_vectors.h>
39 #include <public/pmu.h>
40 #include <xsm/xsm.h>
41
42 #include <compat/pmu.h>
43 CHECK_pmu_cntr_pair;
44 CHECK_pmu_data;
45 CHECK_pmu_params;
46
47 static unsigned int __read_mostly opt_vpmu_enabled;
48 unsigned int __read_mostly vpmu_mode = XENPMU_MODE_OFF;
49 unsigned int __read_mostly vpmu_features = 0;
50 bool __read_mostly opt_rtm_abort;
51
52 static DEFINE_SPINLOCK(vpmu_lock);
53 static unsigned vpmu_count;
54
55 static DEFINE_PER_CPU(struct vcpu *, last_vcpu);
56
parse_vpmu_params(const char * s)57 static int __init parse_vpmu_params(const char *s)
58 {
59 const char *ss;
60 int rc = 0, val;
61
62 do {
63 ss = strchr(s, ',');
64 if ( !ss )
65 ss = strchr(s, '\0');
66
67 if ( (val = parse_bool(s, ss)) >= 0 )
68 {
69 opt_vpmu_enabled = val;
70 if ( !val )
71 vpmu_features = 0;
72 }
73 else if ( !cmdline_strcmp(s, "bts") )
74 vpmu_features |= XENPMU_FEATURE_INTEL_BTS;
75 else if ( !cmdline_strcmp(s, "ipc") )
76 vpmu_features |= XENPMU_FEATURE_IPC_ONLY;
77 else if ( !cmdline_strcmp(s, "arch") )
78 vpmu_features |= XENPMU_FEATURE_ARCH_ONLY;
79 else if ( (val = parse_boolean("rtm-abort", s, ss)) >= 0 )
80 opt_rtm_abort = val;
81 else
82 rc = -EINVAL;
83
84 s = ss + 1;
85 } while ( *ss );
86
87 /* Selecting bts/ipc/arch implies vpmu=1. */
88 if ( vpmu_features )
89 opt_vpmu_enabled = true;
90
91 if ( opt_vpmu_enabled )
92 vpmu_mode = XENPMU_MODE_SELF;
93
94 return rc;
95 }
96 custom_param("vpmu", parse_vpmu_params);
97
vpmu_lvtpc_update(uint32_t val)98 void vpmu_lvtpc_update(uint32_t val)
99 {
100 struct vpmu_struct *vpmu;
101 struct vcpu *curr = current;
102
103 if ( likely(vpmu_mode == XENPMU_MODE_OFF) )
104 return;
105
106 vpmu = vcpu_vpmu(curr);
107
108 vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | (val & APIC_LVT_MASKED);
109
110 /* Postpone APIC updates for PV(H) guests if PMU interrupt is pending */
111 if ( has_vlapic(curr->domain) || !vpmu->xenpmu_data ||
112 !vpmu_is_set(vpmu, VPMU_CACHED) )
113 apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
114 }
115
vpmu_do_msr(unsigned int msr,uint64_t * msr_content,uint64_t supported,bool_t is_write)116 int vpmu_do_msr(unsigned int msr, uint64_t *msr_content,
117 uint64_t supported, bool_t is_write)
118 {
119 struct vcpu *curr = current;
120 struct vpmu_struct *vpmu;
121 const struct arch_vpmu_ops *ops;
122 int ret = 0;
123
124 /*
125 * Hide the PMU MSRs if vpmu is not configured, or the hardware domain is
126 * profiling the whole system.
127 */
128 if ( likely(vpmu_mode == XENPMU_MODE_OFF) ||
129 ((vpmu_mode & XENPMU_MODE_ALL) &&
130 !is_hardware_domain(curr->domain)) )
131 goto nop;
132
133 vpmu = vcpu_vpmu(curr);
134 ops = vpmu->arch_vpmu_ops;
135 if ( !ops )
136 goto nop;
137
138 if ( is_write && ops->do_wrmsr )
139 ret = ops->do_wrmsr(msr, *msr_content, supported);
140 else if ( !is_write && ops->do_rdmsr )
141 ret = ops->do_rdmsr(msr, msr_content);
142 else
143 goto nop;
144
145 /*
146 * We may have received a PMU interrupt while handling MSR access
147 * and since do_wr/rdmsr may load VPMU context we should save
148 * (and unload) it again.
149 */
150 if ( !has_vlapic(curr->domain) && vpmu->xenpmu_data &&
151 vpmu_is_set(vpmu, VPMU_CACHED) )
152 {
153 vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
154 ops->arch_vpmu_save(curr, 0);
155 vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
156 }
157
158 return ret;
159
160 nop:
161 if ( !is_write && (msr != MSR_IA32_MISC_ENABLE) )
162 *msr_content = 0;
163
164 return 0;
165 }
166
choose_hwdom_vcpu(void)167 static inline struct vcpu *choose_hwdom_vcpu(void)
168 {
169 unsigned idx;
170
171 if ( hardware_domain->max_vcpus == 0 )
172 return NULL;
173
174 idx = smp_processor_id() % hardware_domain->max_vcpus;
175
176 return hardware_domain->vcpu[idx];
177 }
178
vpmu_do_interrupt(struct cpu_user_regs * regs)179 void vpmu_do_interrupt(struct cpu_user_regs *regs)
180 {
181 struct vcpu *sampled = current, *sampling;
182 struct vpmu_struct *vpmu;
183 #ifdef CONFIG_HVM
184 struct vlapic *vlapic;
185 uint32_t vlapic_lvtpc;
186 #endif
187
188 /*
189 * dom0 will handle interrupt for special domains (e.g. idle domain) or,
190 * in XENPMU_MODE_ALL, for everyone.
191 */
192 if ( (vpmu_mode & XENPMU_MODE_ALL) ||
193 (sampled->domain->domain_id >= DOMID_FIRST_RESERVED) )
194 {
195 sampling = choose_hwdom_vcpu();
196 if ( !sampling )
197 return;
198 }
199 else
200 sampling = sampled;
201
202 vpmu = vcpu_vpmu(sampling);
203 if ( !vpmu->arch_vpmu_ops )
204 return;
205
206 /* PV(H) guest */
207 if ( !has_vlapic(sampling->domain) || (vpmu_mode & XENPMU_MODE_ALL) )
208 {
209 const struct cpu_user_regs *cur_regs;
210 uint64_t *flags = &vpmu->xenpmu_data->pmu.pmu_flags;
211 domid_t domid;
212
213 if ( !vpmu->xenpmu_data )
214 return;
215
216 if ( vpmu_is_set(vpmu, VPMU_CACHED) )
217 return;
218
219 /* PV guest will be reading PMU MSRs from xenpmu_data */
220 vpmu_set(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
221 vpmu->arch_vpmu_ops->arch_vpmu_save(sampling, 1);
222 vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
223
224 if ( is_hvm_vcpu(sampled) )
225 *flags = 0;
226 else
227 *flags = PMU_SAMPLE_PV;
228
229 if ( sampled == sampling )
230 domid = DOMID_SELF;
231 else
232 domid = sampled->domain->domain_id;
233
234 /* Store appropriate registers in xenpmu_data */
235 /* FIXME: 32-bit PVH should go here as well */
236 if ( is_pv_32bit_vcpu(sampling) )
237 {
238 /*
239 * 32-bit dom0 cannot process Xen's addresses (which are 64 bit)
240 * and therefore we treat it the same way as a non-privileged
241 * PV 32-bit domain.
242 */
243 struct compat_pmu_regs *cmp;
244
245 cur_regs = guest_cpu_user_regs();
246
247 cmp = (void *)&vpmu->xenpmu_data->pmu.r.regs;
248 cmp->ip = cur_regs->rip;
249 cmp->sp = cur_regs->rsp;
250 cmp->flags = cur_regs->rflags;
251 cmp->ss = cur_regs->ss;
252 cmp->cs = cur_regs->cs;
253 if ( (cmp->cs & 3) > 1 )
254 *flags |= PMU_SAMPLE_USER;
255 }
256 else
257 {
258 struct xen_pmu_regs *r = &vpmu->xenpmu_data->pmu.r.regs;
259
260 if ( (vpmu_mode & XENPMU_MODE_SELF) )
261 cur_regs = guest_cpu_user_regs();
262 else if ( !guest_mode(regs) &&
263 is_hardware_domain(sampling->domain) )
264 {
265 cur_regs = regs;
266 domid = DOMID_XEN;
267 }
268 else
269 cur_regs = guest_cpu_user_regs();
270
271 r->ip = cur_regs->rip;
272 r->sp = cur_regs->rsp;
273 r->flags = cur_regs->rflags;
274
275 if ( !is_hvm_vcpu(sampled) )
276 {
277 r->ss = cur_regs->ss;
278 r->cs = cur_regs->cs;
279 if ( !(sampled->arch.flags & TF_kernel_mode) )
280 *flags |= PMU_SAMPLE_USER;
281 }
282 else
283 {
284 struct segment_register seg;
285
286 hvm_get_segment_register(sampled, x86_seg_cs, &seg);
287 r->cs = seg.sel;
288 hvm_get_segment_register(sampled, x86_seg_ss, &seg);
289 r->ss = seg.sel;
290 r->cpl = seg.dpl;
291 if ( !(sampled->arch.hvm.guest_cr[0] & X86_CR0_PE) )
292 *flags |= PMU_SAMPLE_REAL;
293 }
294 }
295
296 vpmu->xenpmu_data->domain_id = domid;
297 vpmu->xenpmu_data->vcpu_id = sampled->vcpu_id;
298 if ( is_hardware_domain(sampling->domain) )
299 vpmu->xenpmu_data->pcpu_id = smp_processor_id();
300 else
301 vpmu->xenpmu_data->pcpu_id = sampled->vcpu_id;
302
303 vpmu->hw_lapic_lvtpc |= APIC_LVT_MASKED;
304 apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
305 *flags |= PMU_CACHED;
306 vpmu_set(vpmu, VPMU_CACHED);
307
308 send_guest_vcpu_virq(sampling, VIRQ_XENPMU);
309
310 return;
311 }
312
313 #ifdef CONFIG_HVM
314 /* HVM guests */
315 vlapic = vcpu_vlapic(sampling);
316
317 /* We don't support (yet) HVM dom0 */
318 ASSERT(sampling == sampled);
319
320 if ( !vpmu->arch_vpmu_ops->do_interrupt(regs) ||
321 !is_vlapic_lvtpc_enabled(vlapic) )
322 return;
323
324 vlapic_lvtpc = vlapic_get_reg(vlapic, APIC_LVTPC);
325
326 switch ( GET_APIC_DELIVERY_MODE(vlapic_lvtpc) )
327 {
328 case APIC_MODE_FIXED:
329 vlapic_set_irq(vlapic, vlapic_lvtpc & APIC_VECTOR_MASK, 0);
330 break;
331 case APIC_MODE_NMI:
332 sampling->arch.nmi_pending = true;
333 break;
334 }
335 #endif
336 }
337
vpmu_save_force(void * arg)338 static void vpmu_save_force(void *arg)
339 {
340 struct vcpu *v = arg;
341 struct vpmu_struct *vpmu = vcpu_vpmu(v);
342
343 if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
344 return;
345
346 vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
347
348 if ( vpmu->arch_vpmu_ops )
349 (void)vpmu->arch_vpmu_ops->arch_vpmu_save(v, 0);
350
351 vpmu_reset(vpmu, VPMU_CONTEXT_SAVE);
352
353 per_cpu(last_vcpu, smp_processor_id()) = NULL;
354 }
355
vpmu_save(struct vcpu * v)356 void vpmu_save(struct vcpu *v)
357 {
358 struct vpmu_struct *vpmu = vcpu_vpmu(v);
359 int pcpu = smp_processor_id();
360
361 if ( !vpmu_are_all_set(vpmu, VPMU_CONTEXT_ALLOCATED | VPMU_CONTEXT_LOADED) )
362 return;
363
364 vpmu->last_pcpu = pcpu;
365 per_cpu(last_vcpu, pcpu) = v;
366
367 if ( vpmu->arch_vpmu_ops )
368 if ( vpmu->arch_vpmu_ops->arch_vpmu_save(v, 0) )
369 vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
370
371 apic_write(APIC_LVTPC, PMU_APIC_VECTOR | APIC_LVT_MASKED);
372 }
373
vpmu_load(struct vcpu * v,bool_t from_guest)374 int vpmu_load(struct vcpu *v, bool_t from_guest)
375 {
376 struct vpmu_struct *vpmu = vcpu_vpmu(v);
377 int pcpu = smp_processor_id();
378 struct vcpu *prev = NULL;
379
380 if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
381 return 0;
382
383 /* First time this VCPU is running here */
384 if ( vpmu->last_pcpu != pcpu )
385 {
386 /*
387 * Get the context from last pcpu that we ran on. Note that if another
388 * VCPU is running there it must have saved this VPCU's context before
389 * startig to run (see below).
390 * There should be no race since remote pcpu will disable interrupts
391 * before saving the context.
392 */
393 if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
394 {
395 on_selected_cpus(cpumask_of(vpmu->last_pcpu),
396 vpmu_save_force, (void *)v, 1);
397 vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
398 }
399 }
400
401 /* Prevent forced context save from remote CPU */
402 local_irq_disable();
403
404 prev = per_cpu(last_vcpu, pcpu);
405
406 if ( prev != v && prev )
407 {
408 vpmu = vcpu_vpmu(prev);
409
410 /* Someone ran here before us */
411 vpmu_save_force(prev);
412 vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
413
414 vpmu = vcpu_vpmu(v);
415 }
416
417 local_irq_enable();
418
419 /* Only when PMU is counting, we load PMU context immediately. */
420 if ( !vpmu_is_set(vpmu, VPMU_RUNNING) ||
421 (!has_vlapic(vpmu_vcpu(vpmu)->domain) &&
422 vpmu_is_set(vpmu, VPMU_CACHED)) )
423 return 0;
424
425 if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_load )
426 {
427 int ret;
428
429 apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
430 /* Arch code needs to set VPMU_CONTEXT_LOADED */
431 ret = vpmu->arch_vpmu_ops->arch_vpmu_load(v, from_guest);
432 if ( ret )
433 {
434 apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc | APIC_LVT_MASKED);
435 return ret;
436 }
437 }
438
439 return 0;
440 }
441
vpmu_arch_initialise(struct vcpu * v)442 static int vpmu_arch_initialise(struct vcpu *v)
443 {
444 struct vpmu_struct *vpmu = vcpu_vpmu(v);
445 uint8_t vendor = current_cpu_data.x86_vendor;
446 int ret;
447
448 BUILD_BUG_ON(sizeof(struct xen_pmu_intel_ctxt) > XENPMU_CTXT_PAD_SZ);
449 BUILD_BUG_ON(sizeof(struct xen_pmu_amd_ctxt) > XENPMU_CTXT_PAD_SZ);
450 BUILD_BUG_ON(sizeof(struct xen_pmu_regs) > XENPMU_REGS_PAD_SZ);
451 BUILD_BUG_ON(sizeof(struct compat_pmu_regs) > XENPMU_REGS_PAD_SZ);
452
453 ASSERT(!(vpmu->flags & ~VPMU_AVAILABLE) && !vpmu->context);
454
455 if ( !vpmu_available(v) )
456 return 0;
457
458 switch ( vendor )
459 {
460 case X86_VENDOR_AMD:
461 case X86_VENDOR_HYGON:
462 ret = svm_vpmu_initialise(v);
463 break;
464
465 case X86_VENDOR_INTEL:
466 ret = vmx_vpmu_initialise(v);
467 break;
468
469 default:
470 if ( vpmu_mode != XENPMU_MODE_OFF )
471 {
472 printk(XENLOG_G_WARNING "VPMU: Unknown CPU vendor %d. "
473 "Disabling VPMU\n", vendor);
474 opt_vpmu_enabled = 0;
475 vpmu_mode = XENPMU_MODE_OFF;
476 }
477 return -EINVAL;
478 }
479
480 vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | APIC_LVT_MASKED;
481
482 if ( ret )
483 printk(XENLOG_G_WARNING "VPMU: Initialization failed for %pv\n", v);
484
485 return ret;
486 }
487
get_vpmu(struct vcpu * v)488 static void get_vpmu(struct vcpu *v)
489 {
490 spin_lock(&vpmu_lock);
491
492 /*
493 * Keep count of VPMUs in the system so that we won't try to change
494 * vpmu_mode while a guest might be using one.
495 * vpmu_mode can be safely updated while dom0's VPMUs are active and
496 * so we don't need to include it in the count.
497 */
498 if ( !is_hardware_domain(v->domain) &&
499 (vpmu_mode & (XENPMU_MODE_SELF | XENPMU_MODE_HV)) )
500 {
501 vpmu_count++;
502 vpmu_set(vcpu_vpmu(v), VPMU_AVAILABLE);
503 }
504 else if ( is_hardware_domain(v->domain) &&
505 (vpmu_mode != XENPMU_MODE_OFF) )
506 vpmu_set(vcpu_vpmu(v), VPMU_AVAILABLE);
507
508 spin_unlock(&vpmu_lock);
509 }
510
put_vpmu(struct vcpu * v)511 static void put_vpmu(struct vcpu *v)
512 {
513 spin_lock(&vpmu_lock);
514
515 if ( !vpmu_available(v) )
516 goto out;
517
518 if ( !is_hardware_domain(v->domain) &&
519 (vpmu_mode & (XENPMU_MODE_SELF | XENPMU_MODE_HV)) )
520 {
521 vpmu_count--;
522 vpmu_reset(vcpu_vpmu(v), VPMU_AVAILABLE);
523 }
524 else if ( is_hardware_domain(v->domain) &&
525 (vpmu_mode != XENPMU_MODE_OFF) )
526 vpmu_reset(vcpu_vpmu(v), VPMU_AVAILABLE);
527
528 out:
529 spin_unlock(&vpmu_lock);
530 }
531
vpmu_initialise(struct vcpu * v)532 void vpmu_initialise(struct vcpu *v)
533 {
534 get_vpmu(v);
535
536 /*
537 * Guests without LAPIC (i.e. PV) call vpmu_arch_initialise()
538 * from pvpmu_init().
539 */
540 if ( has_vlapic(v->domain) && vpmu_arch_initialise(v) )
541 put_vpmu(v);
542 }
543
vpmu_clear_last(void * arg)544 static void vpmu_clear_last(void *arg)
545 {
546 if ( this_cpu(last_vcpu) == arg )
547 this_cpu(last_vcpu) = NULL;
548 }
549
vpmu_arch_destroy(struct vcpu * v)550 static void vpmu_arch_destroy(struct vcpu *v)
551 {
552 struct vpmu_struct *vpmu = vcpu_vpmu(v);
553
554 if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
555 return;
556
557 /*
558 * Need to clear last_vcpu in case it points to v.
559 * We can check here non-atomically whether it is 'v' since
560 * last_vcpu can never become 'v' again at this point.
561 * We will test it again in vpmu_clear_last() with interrupts
562 * disabled to make sure we don't clear someone else.
563 */
564 if ( cpu_online(vpmu->last_pcpu) &&
565 per_cpu(last_vcpu, vpmu->last_pcpu) == v )
566 on_selected_cpus(cpumask_of(vpmu->last_pcpu),
567 vpmu_clear_last, v, 1);
568
569 if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_destroy )
570 {
571 /*
572 * Unload VPMU first if VPMU_CONTEXT_LOADED being set.
573 * This will stop counters.
574 */
575 if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
576 on_selected_cpus(cpumask_of(vcpu_vpmu(v)->last_pcpu),
577 vpmu_save_force, v, 1);
578
579 vpmu->arch_vpmu_ops->arch_vpmu_destroy(v);
580 }
581
582 vpmu_reset(vpmu, VPMU_CONTEXT_ALLOCATED);
583 }
584
vpmu_cleanup(struct vcpu * v)585 static void vpmu_cleanup(struct vcpu *v)
586 {
587 struct vpmu_struct *vpmu = vcpu_vpmu(v);
588 void *xenpmu_data;
589
590 spin_lock(&vpmu->vpmu_lock);
591
592 vpmu_arch_destroy(v);
593 xenpmu_data = vpmu->xenpmu_data;
594 vpmu->xenpmu_data = NULL;
595
596 spin_unlock(&vpmu->vpmu_lock);
597
598 if ( xenpmu_data )
599 {
600 mfn_t mfn = domain_page_map_to_mfn(xenpmu_data);
601
602 ASSERT(mfn_valid(mfn));
603 unmap_domain_page_global(xenpmu_data);
604 put_page_and_type(mfn_to_page(mfn));
605 }
606 }
607
vpmu_destroy(struct vcpu * v)608 void vpmu_destroy(struct vcpu *v)
609 {
610 vpmu_cleanup(v);
611
612 put_vpmu(v);
613 }
614
pvpmu_init(struct domain * d,xen_pmu_params_t * params)615 static int pvpmu_init(struct domain *d, xen_pmu_params_t *params)
616 {
617 struct vcpu *v;
618 struct vpmu_struct *vpmu;
619 struct page_info *page;
620 uint64_t gfn = params->val;
621
622 if ( (params->vcpu >= d->max_vcpus) || (d->vcpu[params->vcpu] == NULL) )
623 return -EINVAL;
624
625 v = d->vcpu[params->vcpu];
626 vpmu = vcpu_vpmu(v);
627
628 if ( !vpmu_available(v) )
629 return -ENOENT;
630
631 page = get_page_from_gfn(d, gfn, NULL, P2M_ALLOC);
632 if ( !page )
633 return -EINVAL;
634
635 if ( !get_page_type(page, PGT_writable_page) )
636 {
637 put_page(page);
638 return -EINVAL;
639 }
640
641 spin_lock(&vpmu->vpmu_lock);
642
643 if ( v->arch.vpmu.xenpmu_data )
644 {
645 spin_unlock(&vpmu->vpmu_lock);
646 put_page_and_type(page);
647 return -EEXIST;
648 }
649
650 v->arch.vpmu.xenpmu_data = __map_domain_page_global(page);
651 if ( !v->arch.vpmu.xenpmu_data )
652 {
653 spin_unlock(&vpmu->vpmu_lock);
654 put_page_and_type(page);
655 return -ENOMEM;
656 }
657
658 if ( vpmu_arch_initialise(v) )
659 put_vpmu(v);
660
661 spin_unlock(&vpmu->vpmu_lock);
662
663 return 0;
664 }
665
pvpmu_finish(struct domain * d,xen_pmu_params_t * params)666 static void pvpmu_finish(struct domain *d, xen_pmu_params_t *params)
667 {
668 struct vcpu *v;
669
670 if ( (params->vcpu >= d->max_vcpus) || (d->vcpu[params->vcpu] == NULL) )
671 return;
672
673 v = d->vcpu[params->vcpu];
674 if ( v != current )
675 vcpu_pause(v);
676
677 vpmu_cleanup(v);
678
679 if ( v != current )
680 vcpu_unpause(v);
681 }
682
683 /* Dump some vpmu informations on console. Used in keyhandler dump_domains(). */
vpmu_dump(struct vcpu * v)684 void vpmu_dump(struct vcpu *v)
685 {
686 struct vpmu_struct *vpmu = vcpu_vpmu(v);
687
688 if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_dump )
689 vpmu->arch_vpmu_ops->arch_vpmu_dump(v);
690 }
691
do_xenpmu_op(unsigned int op,XEN_GUEST_HANDLE_PARAM (xen_pmu_params_t)arg)692 long do_xenpmu_op(unsigned int op, XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) arg)
693 {
694 int ret;
695 struct vcpu *curr;
696 struct xen_pmu_params pmu_params = {.val = 0};
697 struct xen_pmu_data *xenpmu_data;
698 struct vpmu_struct *vpmu;
699
700 if ( !opt_vpmu_enabled || has_vlapic(current->domain) )
701 return -EOPNOTSUPP;
702
703 ret = xsm_pmu_op(XSM_OTHER, current->domain, op);
704 if ( ret )
705 return ret;
706
707 /* Check major version when parameters are specified */
708 switch ( op )
709 {
710 case XENPMU_mode_set:
711 case XENPMU_feature_set:
712 case XENPMU_init:
713 case XENPMU_finish:
714 if ( copy_from_guest(&pmu_params, arg, 1) )
715 return -EFAULT;
716
717 if ( pmu_params.version.maj != XENPMU_VER_MAJ )
718 return -EINVAL;
719 }
720
721 switch ( op )
722 {
723 case XENPMU_mode_set:
724 {
725 if ( (pmu_params.val &
726 ~(XENPMU_MODE_SELF | XENPMU_MODE_HV | XENPMU_MODE_ALL)) ||
727 (hweight64(pmu_params.val) > 1) )
728 return -EINVAL;
729
730 /* 32-bit dom0 can only sample itself. */
731 if ( is_pv_32bit_vcpu(current) &&
732 (pmu_params.val & (XENPMU_MODE_HV | XENPMU_MODE_ALL)) )
733 return -EINVAL;
734
735 spin_lock(&vpmu_lock);
736
737 /*
738 * We can always safely switch between XENPMU_MODE_SELF and
739 * XENPMU_MODE_HV while other VPMUs are active.
740 */
741 if ( (vpmu_count == 0) ||
742 ((vpmu_mode ^ pmu_params.val) ==
743 (XENPMU_MODE_SELF | XENPMU_MODE_HV)) )
744 vpmu_mode = pmu_params.val;
745 else if ( vpmu_mode != pmu_params.val )
746 {
747 gprintk(XENLOG_WARNING,
748 "VPMU: Cannot change mode while active VPMUs exist\n");
749 ret = -EBUSY;
750 }
751
752 spin_unlock(&vpmu_lock);
753
754 break;
755 }
756
757 case XENPMU_mode_get:
758 memset(&pmu_params, 0, sizeof(pmu_params));
759 pmu_params.val = vpmu_mode;
760
761 pmu_params.version.maj = XENPMU_VER_MAJ;
762 pmu_params.version.min = XENPMU_VER_MIN;
763
764 if ( copy_to_guest(arg, &pmu_params, 1) )
765 ret = -EFAULT;
766
767 break;
768
769 case XENPMU_feature_set:
770 if ( pmu_params.val & ~(XENPMU_FEATURE_INTEL_BTS |
771 XENPMU_FEATURE_IPC_ONLY |
772 XENPMU_FEATURE_ARCH_ONLY))
773 return -EINVAL;
774
775 spin_lock(&vpmu_lock);
776
777 if ( (vpmu_count == 0) || (vpmu_features == pmu_params.val) )
778 vpmu_features = pmu_params.val;
779 else
780 {
781 gprintk(XENLOG_WARNING,
782 "VPMU: Cannot change features while active VPMUs exist\n");
783 ret = -EBUSY;
784 }
785
786 spin_unlock(&vpmu_lock);
787
788 break;
789
790 case XENPMU_feature_get:
791 pmu_params.val = vpmu_features;
792 if ( copy_field_to_guest(arg, &pmu_params, val) )
793 ret = -EFAULT;
794
795 break;
796
797 case XENPMU_init:
798 ret = pvpmu_init(current->domain, &pmu_params);
799 break;
800
801 case XENPMU_finish:
802 pvpmu_finish(current->domain, &pmu_params);
803 break;
804
805 case XENPMU_lvtpc_set:
806 xenpmu_data = current->arch.vpmu.xenpmu_data;
807 if ( xenpmu_data != NULL )
808 vpmu_lvtpc_update(xenpmu_data->pmu.l.lapic_lvtpc);
809 else
810 ret = -EINVAL;
811 break;
812
813 case XENPMU_flush:
814 curr = current;
815 vpmu = vcpu_vpmu(curr);
816 xenpmu_data = curr->arch.vpmu.xenpmu_data;
817 if ( xenpmu_data == NULL )
818 return -EINVAL;
819 xenpmu_data->pmu.pmu_flags &= ~PMU_CACHED;
820 vpmu_reset(vpmu, VPMU_CACHED);
821 vpmu_lvtpc_update(xenpmu_data->pmu.l.lapic_lvtpc);
822 if ( vpmu_load(curr, 1) )
823 {
824 xenpmu_data->pmu.pmu_flags |= PMU_CACHED;
825 vpmu_set(vpmu, VPMU_CACHED);
826 ret = -EIO;
827 }
828 break ;
829
830 default:
831 ret = -EINVAL;
832 }
833
834 return ret;
835 }
836
cpu_callback(struct notifier_block * nfb,unsigned long action,void * hcpu)837 static int cpu_callback(
838 struct notifier_block *nfb, unsigned long action, void *hcpu)
839 {
840 unsigned int cpu = (unsigned long)hcpu;
841 struct vcpu *vcpu = per_cpu(last_vcpu, cpu);
842 struct vpmu_struct *vpmu;
843
844 if ( !vcpu )
845 return NOTIFY_DONE;
846
847 vpmu = vcpu_vpmu(vcpu);
848 if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
849 return NOTIFY_DONE;
850
851 if ( action == CPU_DYING )
852 {
853 vpmu_save_force(vcpu);
854 vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
855 }
856
857 return NOTIFY_DONE;
858 }
859
860 static struct notifier_block cpu_nfb = {
861 .notifier_call = cpu_callback
862 };
863
vpmu_init(void)864 static int __init vpmu_init(void)
865 {
866 int vendor = current_cpu_data.x86_vendor;
867
868 if ( !opt_vpmu_enabled )
869 return 0;
870
871 /* NMI watchdog uses LVTPC and HW counter */
872 if ( opt_watchdog && opt_vpmu_enabled )
873 {
874 printk(XENLOG_WARNING "NMI watchdog is enabled. Turning VPMU off.\n");
875 opt_vpmu_enabled = 0;
876 vpmu_mode = XENPMU_MODE_OFF;
877 return 0;
878 }
879
880 switch ( vendor )
881 {
882 case X86_VENDOR_AMD:
883 if ( amd_vpmu_init() )
884 vpmu_mode = XENPMU_MODE_OFF;
885 break;
886
887 case X86_VENDOR_HYGON:
888 if ( hygon_vpmu_init() )
889 vpmu_mode = XENPMU_MODE_OFF;
890 break;
891
892 case X86_VENDOR_INTEL:
893 if ( core2_vpmu_init() )
894 vpmu_mode = XENPMU_MODE_OFF;
895 break;
896
897 default:
898 printk(XENLOG_WARNING "VPMU: Unknown CPU vendor: %d. "
899 "Turning VPMU off.\n", vendor);
900 vpmu_mode = XENPMU_MODE_OFF;
901 break;
902 }
903
904 if ( vpmu_mode != XENPMU_MODE_OFF )
905 {
906 register_cpu_notifier(&cpu_nfb);
907 printk(XENLOG_INFO "VPMU: version " __stringify(XENPMU_VER_MAJ) "."
908 __stringify(XENPMU_VER_MIN) "\n");
909 }
910 else
911 opt_vpmu_enabled = 0;
912
913 return 0;
914 }
915 __initcall(vpmu_init);
916