1 /*
2  * vpmu.c: PMU virtualization for HVM domain.
3  *
4  * Copyright (c) 2007, Intel Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  * You should have received a copy of the GNU General Public License along with
16  * this program; If not, see <http://www.gnu.org/licenses/>.
17  *
18  * Author: Haitao Shan <haitao.shan@intel.com>
19  */
20 #include <xen/sched.h>
21 #include <xen/xenoprof.h>
22 #include <xen/event.h>
23 #include <xen/guest_access.h>
24 #include <xen/cpu.h>
25 #include <xen/param.h>
26 #include <asm/regs.h>
27 #include <asm/types.h>
28 #include <asm/msr.h>
29 #include <asm/nmi.h>
30 #include <asm/p2m.h>
31 #include <asm/vpmu.h>
32 #include <asm/hvm/support.h>
33 #include <asm/hvm/vmx/vmx.h>
34 #include <asm/hvm/vmx/vmcs.h>
35 #include <asm/hvm/svm/svm.h>
36 #include <asm/hvm/svm/vmcb.h>
37 #include <asm/apic.h>
38 #include <irq_vectors.h>
39 #include <public/pmu.h>
40 #include <xsm/xsm.h>
41 
42 #include <compat/pmu.h>
43 CHECK_pmu_cntr_pair;
44 CHECK_pmu_data;
45 CHECK_pmu_params;
46 
47 static unsigned int __read_mostly opt_vpmu_enabled;
48 unsigned int __read_mostly vpmu_mode = XENPMU_MODE_OFF;
49 unsigned int __read_mostly vpmu_features = 0;
50 bool __read_mostly opt_rtm_abort;
51 
52 static DEFINE_SPINLOCK(vpmu_lock);
53 static unsigned vpmu_count;
54 
55 static DEFINE_PER_CPU(struct vcpu *, last_vcpu);
56 
parse_vpmu_params(const char * s)57 static int __init parse_vpmu_params(const char *s)
58 {
59     const char *ss;
60     int rc = 0, val;
61 
62     do {
63         ss = strchr(s, ',');
64         if ( !ss )
65             ss = strchr(s, '\0');
66 
67         if ( (val = parse_bool(s, ss)) >= 0 )
68         {
69             opt_vpmu_enabled = val;
70             if ( !val )
71                 vpmu_features = 0;
72         }
73         else if ( !cmdline_strcmp(s, "bts") )
74             vpmu_features |= XENPMU_FEATURE_INTEL_BTS;
75         else if ( !cmdline_strcmp(s, "ipc") )
76             vpmu_features |= XENPMU_FEATURE_IPC_ONLY;
77         else if ( !cmdline_strcmp(s, "arch") )
78             vpmu_features |= XENPMU_FEATURE_ARCH_ONLY;
79         else if ( (val = parse_boolean("rtm-abort", s, ss)) >= 0 )
80             opt_rtm_abort = val;
81         else
82             rc = -EINVAL;
83 
84         s = ss + 1;
85     } while ( *ss );
86 
87     /* Selecting bts/ipc/arch implies vpmu=1. */
88     if ( vpmu_features )
89         opt_vpmu_enabled = true;
90 
91     if ( opt_vpmu_enabled )
92         vpmu_mode = XENPMU_MODE_SELF;
93 
94     return rc;
95 }
96 custom_param("vpmu", parse_vpmu_params);
97 
vpmu_lvtpc_update(uint32_t val)98 void vpmu_lvtpc_update(uint32_t val)
99 {
100     struct vpmu_struct *vpmu;
101     struct vcpu *curr = current;
102 
103     if ( likely(vpmu_mode == XENPMU_MODE_OFF) )
104         return;
105 
106     vpmu = vcpu_vpmu(curr);
107 
108     vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | (val & APIC_LVT_MASKED);
109 
110     /* Postpone APIC updates for PV(H) guests if PMU interrupt is pending */
111     if ( has_vlapic(curr->domain) || !vpmu->xenpmu_data ||
112          !vpmu_is_set(vpmu, VPMU_CACHED) )
113         apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
114 }
115 
vpmu_do_msr(unsigned int msr,uint64_t * msr_content,uint64_t supported,bool_t is_write)116 int vpmu_do_msr(unsigned int msr, uint64_t *msr_content,
117                 uint64_t supported, bool_t is_write)
118 {
119     struct vcpu *curr = current;
120     struct vpmu_struct *vpmu;
121     const struct arch_vpmu_ops *ops;
122     int ret = 0;
123 
124     /*
125      * Hide the PMU MSRs if vpmu is not configured, or the hardware domain is
126      * profiling the whole system.
127      */
128     if ( likely(vpmu_mode == XENPMU_MODE_OFF) ||
129          ((vpmu_mode & XENPMU_MODE_ALL) &&
130           !is_hardware_domain(curr->domain)) )
131          goto nop;
132 
133     vpmu = vcpu_vpmu(curr);
134     ops = vpmu->arch_vpmu_ops;
135     if ( !ops )
136         goto nop;
137 
138     if ( is_write && ops->do_wrmsr )
139         ret = ops->do_wrmsr(msr, *msr_content, supported);
140     else if ( !is_write && ops->do_rdmsr )
141         ret = ops->do_rdmsr(msr, msr_content);
142     else
143         goto nop;
144 
145     /*
146      * We may have received a PMU interrupt while handling MSR access
147      * and since do_wr/rdmsr may load VPMU context we should save
148      * (and unload) it again.
149      */
150     if ( !has_vlapic(curr->domain) && vpmu->xenpmu_data &&
151         vpmu_is_set(vpmu, VPMU_CACHED) )
152     {
153         vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
154         ops->arch_vpmu_save(curr, 0);
155         vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
156     }
157 
158     return ret;
159 
160  nop:
161     if ( !is_write && (msr != MSR_IA32_MISC_ENABLE) )
162         *msr_content = 0;
163 
164     return 0;
165 }
166 
choose_hwdom_vcpu(void)167 static inline struct vcpu *choose_hwdom_vcpu(void)
168 {
169     unsigned idx;
170 
171     if ( hardware_domain->max_vcpus == 0 )
172         return NULL;
173 
174     idx = smp_processor_id() % hardware_domain->max_vcpus;
175 
176     return hardware_domain->vcpu[idx];
177 }
178 
vpmu_do_interrupt(struct cpu_user_regs * regs)179 void vpmu_do_interrupt(struct cpu_user_regs *regs)
180 {
181     struct vcpu *sampled = current, *sampling;
182     struct vpmu_struct *vpmu;
183 #ifdef CONFIG_HVM
184     struct vlapic *vlapic;
185     uint32_t vlapic_lvtpc;
186 #endif
187 
188     /*
189      * dom0 will handle interrupt for special domains (e.g. idle domain) or,
190      * in XENPMU_MODE_ALL, for everyone.
191      */
192     if ( (vpmu_mode & XENPMU_MODE_ALL) ||
193          (sampled->domain->domain_id >= DOMID_FIRST_RESERVED) )
194     {
195         sampling = choose_hwdom_vcpu();
196         if ( !sampling )
197             return;
198     }
199     else
200         sampling = sampled;
201 
202     vpmu = vcpu_vpmu(sampling);
203     if ( !vpmu->arch_vpmu_ops )
204         return;
205 
206     /* PV(H) guest */
207     if ( !has_vlapic(sampling->domain) || (vpmu_mode & XENPMU_MODE_ALL) )
208     {
209         const struct cpu_user_regs *cur_regs;
210         uint64_t *flags = &vpmu->xenpmu_data->pmu.pmu_flags;
211         domid_t domid;
212 
213         if ( !vpmu->xenpmu_data )
214             return;
215 
216         if ( vpmu_is_set(vpmu, VPMU_CACHED) )
217             return;
218 
219         /* PV guest will be reading PMU MSRs from xenpmu_data */
220         vpmu_set(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
221         vpmu->arch_vpmu_ops->arch_vpmu_save(sampling, 1);
222         vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
223 
224         if ( is_hvm_vcpu(sampled) )
225             *flags = 0;
226         else
227             *flags = PMU_SAMPLE_PV;
228 
229         if ( sampled == sampling )
230             domid = DOMID_SELF;
231         else
232             domid = sampled->domain->domain_id;
233 
234         /* Store appropriate registers in xenpmu_data */
235         /* FIXME: 32-bit PVH should go here as well */
236         if ( is_pv_32bit_vcpu(sampling) )
237         {
238             /*
239              * 32-bit dom0 cannot process Xen's addresses (which are 64 bit)
240              * and therefore we treat it the same way as a non-privileged
241              * PV 32-bit domain.
242              */
243             struct compat_pmu_regs *cmp;
244 
245             cur_regs = guest_cpu_user_regs();
246 
247             cmp = (void *)&vpmu->xenpmu_data->pmu.r.regs;
248             cmp->ip = cur_regs->rip;
249             cmp->sp = cur_regs->rsp;
250             cmp->flags = cur_regs->rflags;
251             cmp->ss = cur_regs->ss;
252             cmp->cs = cur_regs->cs;
253             if ( (cmp->cs & 3) > 1 )
254                 *flags |= PMU_SAMPLE_USER;
255         }
256         else
257         {
258             struct xen_pmu_regs *r = &vpmu->xenpmu_data->pmu.r.regs;
259 
260             if ( (vpmu_mode & XENPMU_MODE_SELF) )
261                 cur_regs = guest_cpu_user_regs();
262             else if ( !guest_mode(regs) &&
263                       is_hardware_domain(sampling->domain) )
264             {
265                 cur_regs = regs;
266                 domid = DOMID_XEN;
267             }
268             else
269                 cur_regs = guest_cpu_user_regs();
270 
271             r->ip = cur_regs->rip;
272             r->sp = cur_regs->rsp;
273             r->flags = cur_regs->rflags;
274 
275             if ( !is_hvm_vcpu(sampled) )
276             {
277                 r->ss = cur_regs->ss;
278                 r->cs = cur_regs->cs;
279                 if ( !(sampled->arch.flags & TF_kernel_mode) )
280                     *flags |= PMU_SAMPLE_USER;
281             }
282             else
283             {
284                 struct segment_register seg;
285 
286                 hvm_get_segment_register(sampled, x86_seg_cs, &seg);
287                 r->cs = seg.sel;
288                 hvm_get_segment_register(sampled, x86_seg_ss, &seg);
289                 r->ss = seg.sel;
290                 r->cpl = seg.dpl;
291                 if ( !(sampled->arch.hvm.guest_cr[0] & X86_CR0_PE) )
292                     *flags |= PMU_SAMPLE_REAL;
293             }
294         }
295 
296         vpmu->xenpmu_data->domain_id = domid;
297         vpmu->xenpmu_data->vcpu_id = sampled->vcpu_id;
298         if ( is_hardware_domain(sampling->domain) )
299             vpmu->xenpmu_data->pcpu_id = smp_processor_id();
300         else
301             vpmu->xenpmu_data->pcpu_id = sampled->vcpu_id;
302 
303         vpmu->hw_lapic_lvtpc |= APIC_LVT_MASKED;
304         apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
305         *flags |= PMU_CACHED;
306         vpmu_set(vpmu, VPMU_CACHED);
307 
308         send_guest_vcpu_virq(sampling, VIRQ_XENPMU);
309 
310         return;
311     }
312 
313 #ifdef CONFIG_HVM
314     /* HVM guests */
315     vlapic = vcpu_vlapic(sampling);
316 
317     /* We don't support (yet) HVM dom0 */
318     ASSERT(sampling == sampled);
319 
320     if ( !vpmu->arch_vpmu_ops->do_interrupt(regs) ||
321          !is_vlapic_lvtpc_enabled(vlapic) )
322         return;
323 
324     vlapic_lvtpc = vlapic_get_reg(vlapic, APIC_LVTPC);
325 
326     switch ( GET_APIC_DELIVERY_MODE(vlapic_lvtpc) )
327     {
328     case APIC_MODE_FIXED:
329         vlapic_set_irq(vlapic, vlapic_lvtpc & APIC_VECTOR_MASK, 0);
330         break;
331     case APIC_MODE_NMI:
332         sampling->arch.nmi_pending = true;
333         break;
334     }
335 #endif
336 }
337 
vpmu_save_force(void * arg)338 static void vpmu_save_force(void *arg)
339 {
340     struct vcpu *v = arg;
341     struct vpmu_struct *vpmu = vcpu_vpmu(v);
342 
343     if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
344         return;
345 
346     vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
347 
348     if ( vpmu->arch_vpmu_ops )
349         (void)vpmu->arch_vpmu_ops->arch_vpmu_save(v, 0);
350 
351     vpmu_reset(vpmu, VPMU_CONTEXT_SAVE);
352 
353     per_cpu(last_vcpu, smp_processor_id()) = NULL;
354 }
355 
vpmu_save(struct vcpu * v)356 void vpmu_save(struct vcpu *v)
357 {
358     struct vpmu_struct *vpmu = vcpu_vpmu(v);
359     int pcpu = smp_processor_id();
360 
361     if ( !vpmu_are_all_set(vpmu, VPMU_CONTEXT_ALLOCATED | VPMU_CONTEXT_LOADED) )
362        return;
363 
364     vpmu->last_pcpu = pcpu;
365     per_cpu(last_vcpu, pcpu) = v;
366 
367     if ( vpmu->arch_vpmu_ops )
368         if ( vpmu->arch_vpmu_ops->arch_vpmu_save(v, 0) )
369             vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
370 
371     apic_write(APIC_LVTPC, PMU_APIC_VECTOR | APIC_LVT_MASKED);
372 }
373 
vpmu_load(struct vcpu * v,bool_t from_guest)374 int vpmu_load(struct vcpu *v, bool_t from_guest)
375 {
376     struct vpmu_struct *vpmu = vcpu_vpmu(v);
377     int pcpu = smp_processor_id();
378     struct vcpu *prev = NULL;
379 
380     if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
381         return 0;
382 
383     /* First time this VCPU is running here */
384     if ( vpmu->last_pcpu != pcpu )
385     {
386         /*
387          * Get the context from last pcpu that we ran on. Note that if another
388          * VCPU is running there it must have saved this VPCU's context before
389          * startig to run (see below).
390          * There should be no race since remote pcpu will disable interrupts
391          * before saving the context.
392          */
393         if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
394         {
395             on_selected_cpus(cpumask_of(vpmu->last_pcpu),
396                              vpmu_save_force, (void *)v, 1);
397             vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
398         }
399     }
400 
401     /* Prevent forced context save from remote CPU */
402     local_irq_disable();
403 
404     prev = per_cpu(last_vcpu, pcpu);
405 
406     if ( prev != v && prev )
407     {
408         vpmu = vcpu_vpmu(prev);
409 
410         /* Someone ran here before us */
411         vpmu_save_force(prev);
412         vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
413 
414         vpmu = vcpu_vpmu(v);
415     }
416 
417     local_irq_enable();
418 
419     /* Only when PMU is counting, we load PMU context immediately. */
420     if ( !vpmu_is_set(vpmu, VPMU_RUNNING) ||
421          (!has_vlapic(vpmu_vcpu(vpmu)->domain) &&
422          vpmu_is_set(vpmu, VPMU_CACHED)) )
423         return 0;
424 
425     if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_load )
426     {
427         int ret;
428 
429         apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
430         /* Arch code needs to set VPMU_CONTEXT_LOADED */
431         ret = vpmu->arch_vpmu_ops->arch_vpmu_load(v, from_guest);
432         if ( ret )
433         {
434             apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc | APIC_LVT_MASKED);
435             return ret;
436         }
437     }
438 
439     return 0;
440 }
441 
vpmu_arch_initialise(struct vcpu * v)442 static int vpmu_arch_initialise(struct vcpu *v)
443 {
444     struct vpmu_struct *vpmu = vcpu_vpmu(v);
445     uint8_t vendor = current_cpu_data.x86_vendor;
446     int ret;
447 
448     BUILD_BUG_ON(sizeof(struct xen_pmu_intel_ctxt) > XENPMU_CTXT_PAD_SZ);
449     BUILD_BUG_ON(sizeof(struct xen_pmu_amd_ctxt) > XENPMU_CTXT_PAD_SZ);
450     BUILD_BUG_ON(sizeof(struct xen_pmu_regs) > XENPMU_REGS_PAD_SZ);
451     BUILD_BUG_ON(sizeof(struct compat_pmu_regs) > XENPMU_REGS_PAD_SZ);
452 
453     ASSERT(!(vpmu->flags & ~VPMU_AVAILABLE) && !vpmu->context);
454 
455     if ( !vpmu_available(v) )
456         return 0;
457 
458     switch ( vendor )
459     {
460     case X86_VENDOR_AMD:
461     case X86_VENDOR_HYGON:
462         ret = svm_vpmu_initialise(v);
463         break;
464 
465     case X86_VENDOR_INTEL:
466         ret = vmx_vpmu_initialise(v);
467         break;
468 
469     default:
470         if ( vpmu_mode != XENPMU_MODE_OFF )
471         {
472             printk(XENLOG_G_WARNING "VPMU: Unknown CPU vendor %d. "
473                    "Disabling VPMU\n", vendor);
474             opt_vpmu_enabled = 0;
475             vpmu_mode = XENPMU_MODE_OFF;
476         }
477         return -EINVAL;
478     }
479 
480     vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | APIC_LVT_MASKED;
481 
482     if ( ret )
483         printk(XENLOG_G_WARNING "VPMU: Initialization failed for %pv\n", v);
484 
485     return ret;
486 }
487 
get_vpmu(struct vcpu * v)488 static void get_vpmu(struct vcpu *v)
489 {
490     spin_lock(&vpmu_lock);
491 
492     /*
493      * Keep count of VPMUs in the system so that we won't try to change
494      * vpmu_mode while a guest might be using one.
495      * vpmu_mode can be safely updated while dom0's VPMUs are active and
496      * so we don't need to include it in the count.
497      */
498     if ( !is_hardware_domain(v->domain) &&
499         (vpmu_mode & (XENPMU_MODE_SELF | XENPMU_MODE_HV)) )
500     {
501         vpmu_count++;
502         vpmu_set(vcpu_vpmu(v), VPMU_AVAILABLE);
503     }
504     else if ( is_hardware_domain(v->domain) &&
505               (vpmu_mode != XENPMU_MODE_OFF) )
506         vpmu_set(vcpu_vpmu(v), VPMU_AVAILABLE);
507 
508     spin_unlock(&vpmu_lock);
509 }
510 
put_vpmu(struct vcpu * v)511 static void put_vpmu(struct vcpu *v)
512 {
513     spin_lock(&vpmu_lock);
514 
515     if ( !vpmu_available(v) )
516         goto out;
517 
518     if ( !is_hardware_domain(v->domain) &&
519          (vpmu_mode & (XENPMU_MODE_SELF | XENPMU_MODE_HV)) )
520     {
521         vpmu_count--;
522         vpmu_reset(vcpu_vpmu(v), VPMU_AVAILABLE);
523     }
524     else if ( is_hardware_domain(v->domain) &&
525               (vpmu_mode != XENPMU_MODE_OFF) )
526         vpmu_reset(vcpu_vpmu(v), VPMU_AVAILABLE);
527 
528  out:
529     spin_unlock(&vpmu_lock);
530 }
531 
vpmu_initialise(struct vcpu * v)532 void vpmu_initialise(struct vcpu *v)
533 {
534     get_vpmu(v);
535 
536     /*
537      * Guests without LAPIC (i.e. PV) call vpmu_arch_initialise()
538      * from pvpmu_init().
539      */
540     if ( has_vlapic(v->domain) && vpmu_arch_initialise(v) )
541         put_vpmu(v);
542 }
543 
vpmu_clear_last(void * arg)544 static void vpmu_clear_last(void *arg)
545 {
546     if ( this_cpu(last_vcpu) == arg )
547         this_cpu(last_vcpu) = NULL;
548 }
549 
vpmu_arch_destroy(struct vcpu * v)550 static void vpmu_arch_destroy(struct vcpu *v)
551 {
552     struct vpmu_struct *vpmu = vcpu_vpmu(v);
553 
554     if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
555         return;
556 
557     /*
558      * Need to clear last_vcpu in case it points to v.
559      * We can check here non-atomically whether it is 'v' since
560      * last_vcpu can never become 'v' again at this point.
561      * We will test it again in vpmu_clear_last() with interrupts
562      * disabled to make sure we don't clear someone else.
563      */
564     if ( cpu_online(vpmu->last_pcpu) &&
565          per_cpu(last_vcpu, vpmu->last_pcpu) == v )
566         on_selected_cpus(cpumask_of(vpmu->last_pcpu),
567                          vpmu_clear_last, v, 1);
568 
569     if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_destroy )
570     {
571         /*
572          * Unload VPMU first if VPMU_CONTEXT_LOADED being set.
573          * This will stop counters.
574          */
575         if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
576             on_selected_cpus(cpumask_of(vcpu_vpmu(v)->last_pcpu),
577                              vpmu_save_force, v, 1);
578 
579          vpmu->arch_vpmu_ops->arch_vpmu_destroy(v);
580     }
581 
582     vpmu_reset(vpmu, VPMU_CONTEXT_ALLOCATED);
583 }
584 
vpmu_cleanup(struct vcpu * v)585 static void vpmu_cleanup(struct vcpu *v)
586 {
587     struct vpmu_struct *vpmu = vcpu_vpmu(v);
588     void *xenpmu_data;
589 
590     spin_lock(&vpmu->vpmu_lock);
591 
592     vpmu_arch_destroy(v);
593     xenpmu_data = vpmu->xenpmu_data;
594     vpmu->xenpmu_data = NULL;
595 
596     spin_unlock(&vpmu->vpmu_lock);
597 
598     if ( xenpmu_data )
599     {
600         mfn_t mfn = domain_page_map_to_mfn(xenpmu_data);
601 
602         ASSERT(mfn_valid(mfn));
603         unmap_domain_page_global(xenpmu_data);
604         put_page_and_type(mfn_to_page(mfn));
605     }
606 }
607 
vpmu_destroy(struct vcpu * v)608 void vpmu_destroy(struct vcpu *v)
609 {
610     vpmu_cleanup(v);
611 
612     put_vpmu(v);
613 }
614 
pvpmu_init(struct domain * d,xen_pmu_params_t * params)615 static int pvpmu_init(struct domain *d, xen_pmu_params_t *params)
616 {
617     struct vcpu *v;
618     struct vpmu_struct *vpmu;
619     struct page_info *page;
620     uint64_t gfn = params->val;
621 
622     if ( (params->vcpu >= d->max_vcpus) || (d->vcpu[params->vcpu] == NULL) )
623         return -EINVAL;
624 
625     v = d->vcpu[params->vcpu];
626     vpmu = vcpu_vpmu(v);
627 
628     if ( !vpmu_available(v) )
629         return -ENOENT;
630 
631     page = get_page_from_gfn(d, gfn, NULL, P2M_ALLOC);
632     if ( !page )
633         return -EINVAL;
634 
635     if ( !get_page_type(page, PGT_writable_page) )
636     {
637         put_page(page);
638         return -EINVAL;
639     }
640 
641     spin_lock(&vpmu->vpmu_lock);
642 
643     if ( v->arch.vpmu.xenpmu_data )
644     {
645         spin_unlock(&vpmu->vpmu_lock);
646         put_page_and_type(page);
647         return -EEXIST;
648     }
649 
650     v->arch.vpmu.xenpmu_data = __map_domain_page_global(page);
651     if ( !v->arch.vpmu.xenpmu_data )
652     {
653         spin_unlock(&vpmu->vpmu_lock);
654         put_page_and_type(page);
655         return -ENOMEM;
656     }
657 
658     if ( vpmu_arch_initialise(v) )
659         put_vpmu(v);
660 
661     spin_unlock(&vpmu->vpmu_lock);
662 
663     return 0;
664 }
665 
pvpmu_finish(struct domain * d,xen_pmu_params_t * params)666 static void pvpmu_finish(struct domain *d, xen_pmu_params_t *params)
667 {
668     struct vcpu *v;
669 
670     if ( (params->vcpu >= d->max_vcpus) || (d->vcpu[params->vcpu] == NULL) )
671         return;
672 
673     v = d->vcpu[params->vcpu];
674     if ( v != current )
675         vcpu_pause(v);
676 
677     vpmu_cleanup(v);
678 
679     if ( v != current )
680         vcpu_unpause(v);
681 }
682 
683 /* Dump some vpmu informations on console. Used in keyhandler dump_domains(). */
vpmu_dump(struct vcpu * v)684 void vpmu_dump(struct vcpu *v)
685 {
686     struct vpmu_struct *vpmu = vcpu_vpmu(v);
687 
688     if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_dump )
689         vpmu->arch_vpmu_ops->arch_vpmu_dump(v);
690 }
691 
do_xenpmu_op(unsigned int op,XEN_GUEST_HANDLE_PARAM (xen_pmu_params_t)arg)692 long do_xenpmu_op(unsigned int op, XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) arg)
693 {
694     int ret;
695     struct vcpu *curr;
696     struct xen_pmu_params pmu_params = {.val = 0};
697     struct xen_pmu_data *xenpmu_data;
698     struct vpmu_struct *vpmu;
699 
700     if ( !opt_vpmu_enabled || has_vlapic(current->domain) )
701         return -EOPNOTSUPP;
702 
703     ret = xsm_pmu_op(XSM_OTHER, current->domain, op);
704     if ( ret )
705         return ret;
706 
707     /* Check major version when parameters are specified */
708     switch ( op )
709     {
710     case XENPMU_mode_set:
711     case XENPMU_feature_set:
712     case XENPMU_init:
713     case XENPMU_finish:
714         if ( copy_from_guest(&pmu_params, arg, 1) )
715             return -EFAULT;
716 
717         if ( pmu_params.version.maj != XENPMU_VER_MAJ )
718             return -EINVAL;
719     }
720 
721     switch ( op )
722     {
723     case XENPMU_mode_set:
724     {
725         if ( (pmu_params.val &
726               ~(XENPMU_MODE_SELF | XENPMU_MODE_HV | XENPMU_MODE_ALL)) ||
727              (hweight64(pmu_params.val) > 1) )
728             return -EINVAL;
729 
730         /* 32-bit dom0 can only sample itself. */
731         if ( is_pv_32bit_vcpu(current) &&
732              (pmu_params.val & (XENPMU_MODE_HV | XENPMU_MODE_ALL)) )
733             return -EINVAL;
734 
735         spin_lock(&vpmu_lock);
736 
737         /*
738          * We can always safely switch between XENPMU_MODE_SELF and
739          * XENPMU_MODE_HV while other VPMUs are active.
740          */
741         if ( (vpmu_count == 0) ||
742              ((vpmu_mode ^ pmu_params.val) ==
743               (XENPMU_MODE_SELF | XENPMU_MODE_HV)) )
744             vpmu_mode = pmu_params.val;
745         else if ( vpmu_mode != pmu_params.val )
746         {
747             gprintk(XENLOG_WARNING,
748                     "VPMU: Cannot change mode while active VPMUs exist\n");
749             ret = -EBUSY;
750         }
751 
752         spin_unlock(&vpmu_lock);
753 
754         break;
755     }
756 
757     case XENPMU_mode_get:
758         memset(&pmu_params, 0, sizeof(pmu_params));
759         pmu_params.val = vpmu_mode;
760 
761         pmu_params.version.maj = XENPMU_VER_MAJ;
762         pmu_params.version.min = XENPMU_VER_MIN;
763 
764         if ( copy_to_guest(arg, &pmu_params, 1) )
765             ret = -EFAULT;
766 
767         break;
768 
769     case XENPMU_feature_set:
770         if ( pmu_params.val & ~(XENPMU_FEATURE_INTEL_BTS |
771                                 XENPMU_FEATURE_IPC_ONLY |
772                                 XENPMU_FEATURE_ARCH_ONLY))
773             return -EINVAL;
774 
775         spin_lock(&vpmu_lock);
776 
777         if ( (vpmu_count == 0) || (vpmu_features == pmu_params.val) )
778             vpmu_features = pmu_params.val;
779         else
780         {
781             gprintk(XENLOG_WARNING,
782                     "VPMU: Cannot change features while active VPMUs exist\n");
783             ret = -EBUSY;
784         }
785 
786         spin_unlock(&vpmu_lock);
787 
788         break;
789 
790     case XENPMU_feature_get:
791         pmu_params.val = vpmu_features;
792         if ( copy_field_to_guest(arg, &pmu_params, val) )
793             ret = -EFAULT;
794 
795         break;
796 
797     case XENPMU_init:
798         ret = pvpmu_init(current->domain, &pmu_params);
799         break;
800 
801     case XENPMU_finish:
802         pvpmu_finish(current->domain, &pmu_params);
803         break;
804 
805     case XENPMU_lvtpc_set:
806         xenpmu_data = current->arch.vpmu.xenpmu_data;
807         if ( xenpmu_data != NULL )
808             vpmu_lvtpc_update(xenpmu_data->pmu.l.lapic_lvtpc);
809         else
810             ret = -EINVAL;
811         break;
812 
813     case XENPMU_flush:
814         curr = current;
815         vpmu = vcpu_vpmu(curr);
816         xenpmu_data = curr->arch.vpmu.xenpmu_data;
817         if ( xenpmu_data == NULL )
818             return -EINVAL;
819         xenpmu_data->pmu.pmu_flags &= ~PMU_CACHED;
820         vpmu_reset(vpmu, VPMU_CACHED);
821         vpmu_lvtpc_update(xenpmu_data->pmu.l.lapic_lvtpc);
822         if ( vpmu_load(curr, 1) )
823         {
824             xenpmu_data->pmu.pmu_flags |= PMU_CACHED;
825             vpmu_set(vpmu, VPMU_CACHED);
826             ret = -EIO;
827         }
828         break ;
829 
830     default:
831         ret = -EINVAL;
832     }
833 
834     return ret;
835 }
836 
cpu_callback(struct notifier_block * nfb,unsigned long action,void * hcpu)837 static int cpu_callback(
838     struct notifier_block *nfb, unsigned long action, void *hcpu)
839 {
840     unsigned int cpu = (unsigned long)hcpu;
841     struct vcpu *vcpu = per_cpu(last_vcpu, cpu);
842     struct vpmu_struct *vpmu;
843 
844     if ( !vcpu )
845         return NOTIFY_DONE;
846 
847     vpmu = vcpu_vpmu(vcpu);
848     if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
849         return NOTIFY_DONE;
850 
851     if ( action == CPU_DYING )
852     {
853         vpmu_save_force(vcpu);
854         vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
855     }
856 
857     return NOTIFY_DONE;
858 }
859 
860 static struct notifier_block cpu_nfb = {
861     .notifier_call = cpu_callback
862 };
863 
vpmu_init(void)864 static int __init vpmu_init(void)
865 {
866     int vendor = current_cpu_data.x86_vendor;
867 
868     if ( !opt_vpmu_enabled )
869         return 0;
870 
871     /* NMI watchdog uses LVTPC and HW counter */
872     if ( opt_watchdog && opt_vpmu_enabled )
873     {
874         printk(XENLOG_WARNING "NMI watchdog is enabled. Turning VPMU off.\n");
875         opt_vpmu_enabled = 0;
876         vpmu_mode = XENPMU_MODE_OFF;
877         return 0;
878     }
879 
880     switch ( vendor )
881     {
882     case X86_VENDOR_AMD:
883         if ( amd_vpmu_init() )
884            vpmu_mode = XENPMU_MODE_OFF;
885         break;
886 
887     case X86_VENDOR_HYGON:
888         if ( hygon_vpmu_init() )
889            vpmu_mode = XENPMU_MODE_OFF;
890         break;
891 
892     case X86_VENDOR_INTEL:
893         if ( core2_vpmu_init() )
894            vpmu_mode = XENPMU_MODE_OFF;
895         break;
896 
897     default:
898         printk(XENLOG_WARNING "VPMU: Unknown CPU vendor: %d. "
899                "Turning VPMU off.\n", vendor);
900         vpmu_mode = XENPMU_MODE_OFF;
901         break;
902     }
903 
904     if ( vpmu_mode != XENPMU_MODE_OFF )
905     {
906         register_cpu_notifier(&cpu_nfb);
907         printk(XENLOG_INFO "VPMU: version " __stringify(XENPMU_VER_MAJ) "."
908                __stringify(XENPMU_VER_MIN) "\n");
909     }
910     else
911         opt_vpmu_enabled = 0;
912 
913     return 0;
914 }
915 __initcall(vpmu_init);
916