1 /*****************************************************************************
2 #  pmstat.c - Power Management statistic information (Px/Cx/Tx, etc.)
3 #
4 #  Copyright (c) 2008, Liu Jinsong <jinsong.liu@intel.com>
5 #
6 # This program is free software; you can redistribute it and/or modify it
7 # under the terms of the GNU General Public License as published by the Free
8 # Software Foundation; either version 2 of the License, or (at your option)
9 # any later version.
10 #
11 # This program is distributed in the hope that it will be useful, but WITHOUT
12 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 # FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
14 # more details.
15 #
16 # You should have received a copy of the GNU General Public License along with
17 # this program; If not, see <http://www.gnu.org/licenses/>.
18 #
19 # The full GNU General Public License is included in this distribution in the
20 # file called LICENSE.
21 #
22 *****************************************************************************/
23 
24 #include <xen/lib.h>
25 #include <xen/errno.h>
26 #include <xen/sched.h>
27 #include <xen/event.h>
28 #include <xen/irq.h>
29 #include <xen/iocap.h>
30 #include <xen/compat.h>
31 #include <xen/guest_access.h>
32 #include <asm/current.h>
33 #include <public/xen.h>
34 #include <xen/cpumask.h>
35 #include <asm/processor.h>
36 #include <xen/percpu.h>
37 #include <xen/domain.h>
38 #include <xen/acpi.h>
39 
40 #include <public/sysctl.h>
41 #include <acpi/cpufreq/cpufreq.h>
42 #include <xen/pmstat.h>
43 
44 DEFINE_PER_CPU_READ_MOSTLY(struct pm_px *, cpufreq_statistic_data);
45 
46 /*
47  * Get PM statistic info
48  */
do_get_pm_info(struct xen_sysctl_get_pmstat * op)49 int do_get_pm_info(struct xen_sysctl_get_pmstat *op)
50 {
51     int ret = 0;
52     const struct processor_pminfo *pmpt;
53 
54     if ( !op || (op->cpuid >= nr_cpu_ids) || !cpu_online(op->cpuid) )
55         return -EINVAL;
56     pmpt = processor_pminfo[op->cpuid];
57 
58     switch ( op->type & PMSTAT_CATEGORY_MASK )
59     {
60     case PMSTAT_CX:
61         if ( !(xen_processor_pmbits & XEN_PROCESSOR_PM_CX) )
62             return -ENODEV;
63         break;
64     case PMSTAT_PX:
65         if ( !(xen_processor_pmbits & XEN_PROCESSOR_PM_PX) )
66             return -ENODEV;
67         if ( !cpufreq_driver.init )
68             return -ENODEV;
69         if ( !pmpt || !(pmpt->perf.init & XEN_PX_INIT) )
70             return -EINVAL;
71         break;
72     default:
73         return -ENODEV;
74     }
75 
76     switch ( op->type )
77     {
78     case PMSTAT_get_max_px:
79     {
80         op->u.getpx.total = pmpt->perf.state_count;
81         break;
82     }
83 
84     case PMSTAT_get_pxstat:
85     {
86         uint32_t ct;
87         struct pm_px *pxpt;
88         spinlock_t *cpufreq_statistic_lock =
89                    &per_cpu(cpufreq_statistic_lock, op->cpuid);
90 
91         spin_lock(cpufreq_statistic_lock);
92 
93         pxpt = per_cpu(cpufreq_statistic_data, op->cpuid);
94         if ( !pxpt || !pxpt->u.pt || !pxpt->u.trans_pt )
95         {
96             spin_unlock(cpufreq_statistic_lock);
97             return -ENODATA;
98         }
99 
100         pxpt->u.usable = pmpt->perf.state_count - pmpt->perf.platform_limit;
101 
102         cpufreq_residency_update(op->cpuid, pxpt->u.cur);
103 
104         ct = pmpt->perf.state_count;
105         if ( copy_to_guest(op->u.getpx.trans_pt, pxpt->u.trans_pt, ct*ct) )
106         {
107             spin_unlock(cpufreq_statistic_lock);
108             ret = -EFAULT;
109             break;
110         }
111 
112         if ( copy_to_guest(op->u.getpx.pt, pxpt->u.pt, ct) )
113         {
114             spin_unlock(cpufreq_statistic_lock);
115             ret = -EFAULT;
116             break;
117         }
118 
119         op->u.getpx.total = pxpt->u.total;
120         op->u.getpx.usable = pxpt->u.usable;
121         op->u.getpx.last = pxpt->u.last;
122         op->u.getpx.cur = pxpt->u.cur;
123 
124         spin_unlock(cpufreq_statistic_lock);
125 
126         break;
127     }
128 
129     case PMSTAT_reset_pxstat:
130     {
131         cpufreq_statistic_reset(op->cpuid);
132         break;
133     }
134 
135     case PMSTAT_get_max_cx:
136     {
137         op->u.getcx.nr = pmstat_get_cx_nr(op->cpuid);
138         ret = 0;
139         break;
140     }
141 
142     case PMSTAT_get_cxstat:
143     {
144         ret = pmstat_get_cx_stat(op->cpuid, &op->u.getcx);
145         break;
146     }
147 
148     case PMSTAT_reset_cxstat:
149     {
150         ret = pmstat_reset_cx_stat(op->cpuid);
151         break;
152     }
153 
154     default:
155         printk("not defined sub-hypercall @ do_get_pm_info\n");
156         ret = -ENOSYS;
157         break;
158     }
159 
160     return ret;
161 }
162 
163 /*
164  * 1. Get PM parameter
165  * 2. Provide user PM control
166  */
read_scaling_available_governors(char * scaling_available_governors,unsigned int size)167 static int read_scaling_available_governors(char *scaling_available_governors,
168                                             unsigned int size)
169 {
170     unsigned int i = 0;
171     struct cpufreq_governor *t;
172 
173     if ( !scaling_available_governors )
174         return -EINVAL;
175 
176     list_for_each_entry(t, &cpufreq_governor_list, governor_list)
177     {
178         i += scnprintf(&scaling_available_governors[i],
179                        CPUFREQ_NAME_LEN, "%s ", t->name);
180         if ( i > size )
181             return -EINVAL;
182     }
183     scaling_available_governors[i-1] = '\0';
184 
185     return 0;
186 }
187 
get_cpufreq_para(struct xen_sysctl_pm_op * op)188 static int get_cpufreq_para(struct xen_sysctl_pm_op *op)
189 {
190     uint32_t ret = 0;
191     const struct processor_pminfo *pmpt;
192     struct cpufreq_policy *policy;
193     uint32_t gov_num = 0;
194     uint32_t *affected_cpus;
195     uint32_t *scaling_available_frequencies;
196     char     *scaling_available_governors;
197     struct list_head *pos;
198     uint32_t cpu, i, j = 0;
199 
200     pmpt = processor_pminfo[op->cpuid];
201     policy = per_cpu(cpufreq_cpu_policy, op->cpuid);
202 
203     if ( !pmpt || !pmpt->perf.states ||
204          !policy || !policy->governor )
205         return -EINVAL;
206 
207     list_for_each(pos, &cpufreq_governor_list)
208         gov_num++;
209 
210     if ( (op->u.get_para.cpu_num  != cpumask_weight(policy->cpus)) ||
211          (op->u.get_para.freq_num != pmpt->perf.state_count)    ||
212          (op->u.get_para.gov_num  != gov_num) )
213     {
214         op->u.get_para.cpu_num =  cpumask_weight(policy->cpus);
215         op->u.get_para.freq_num = pmpt->perf.state_count;
216         op->u.get_para.gov_num  = gov_num;
217         return -EAGAIN;
218     }
219 
220     if ( !(affected_cpus = xzalloc_array(uint32_t, op->u.get_para.cpu_num)) )
221         return -ENOMEM;
222     for_each_cpu(cpu, policy->cpus)
223         affected_cpus[j++] = cpu;
224     ret = copy_to_guest(op->u.get_para.affected_cpus,
225                        affected_cpus, op->u.get_para.cpu_num);
226     xfree(affected_cpus);
227     if ( ret )
228         return ret;
229 
230     if ( !(scaling_available_frequencies =
231            xzalloc_array(uint32_t, op->u.get_para.freq_num)) )
232         return -ENOMEM;
233     for ( i = 0; i < op->u.get_para.freq_num; i++ )
234         scaling_available_frequencies[i] =
235                         pmpt->perf.states[i].core_frequency * 1000;
236     ret = copy_to_guest(op->u.get_para.scaling_available_frequencies,
237                    scaling_available_frequencies, op->u.get_para.freq_num);
238     xfree(scaling_available_frequencies);
239     if ( ret )
240         return ret;
241 
242     if ( !(scaling_available_governors =
243            xzalloc_array(char, gov_num * CPUFREQ_NAME_LEN)) )
244         return -ENOMEM;
245     if ( (ret = read_scaling_available_governors(scaling_available_governors,
246                 gov_num * CPUFREQ_NAME_LEN * sizeof(char))) )
247     {
248         xfree(scaling_available_governors);
249         return ret;
250     }
251     ret = copy_to_guest(op->u.get_para.scaling_available_governors,
252                 scaling_available_governors, gov_num * CPUFREQ_NAME_LEN);
253     xfree(scaling_available_governors);
254     if ( ret )
255         return ret;
256 
257     op->u.get_para.cpuinfo_cur_freq =
258         cpufreq_driver.get ? cpufreq_driver.get(op->cpuid) : policy->cur;
259     op->u.get_para.cpuinfo_max_freq = policy->cpuinfo.max_freq;
260     op->u.get_para.cpuinfo_min_freq = policy->cpuinfo.min_freq;
261     op->u.get_para.scaling_cur_freq = policy->cur;
262     op->u.get_para.scaling_max_freq = policy->max;
263     op->u.get_para.scaling_min_freq = policy->min;
264 
265     if ( cpufreq_driver.name[0] )
266         strlcpy(op->u.get_para.scaling_driver,
267             cpufreq_driver.name, CPUFREQ_NAME_LEN);
268     else
269         strlcpy(op->u.get_para.scaling_driver, "Unknown", CPUFREQ_NAME_LEN);
270 
271     if ( policy->governor->name[0] )
272         strlcpy(op->u.get_para.scaling_governor,
273             policy->governor->name, CPUFREQ_NAME_LEN);
274     else
275         strlcpy(op->u.get_para.scaling_governor, "Unknown", CPUFREQ_NAME_LEN);
276 
277     /* governor specific para */
278     if ( !strnicmp(op->u.get_para.scaling_governor,
279                    "userspace", CPUFREQ_NAME_LEN) )
280     {
281         op->u.get_para.u.userspace.scaling_setspeed = policy->cur;
282     }
283 
284     if ( !strnicmp(op->u.get_para.scaling_governor,
285                    "ondemand", CPUFREQ_NAME_LEN) )
286     {
287         ret = get_cpufreq_ondemand_para(
288             &op->u.get_para.u.ondemand.sampling_rate_max,
289             &op->u.get_para.u.ondemand.sampling_rate_min,
290             &op->u.get_para.u.ondemand.sampling_rate,
291             &op->u.get_para.u.ondemand.up_threshold);
292     }
293     op->u.get_para.turbo_enabled = cpufreq_get_turbo_status(op->cpuid);
294 
295     return ret;
296 }
297 
set_cpufreq_gov(struct xen_sysctl_pm_op * op)298 static int set_cpufreq_gov(struct xen_sysctl_pm_op *op)
299 {
300     struct cpufreq_policy new_policy, *old_policy;
301 
302     old_policy = per_cpu(cpufreq_cpu_policy, op->cpuid);
303     if ( !old_policy )
304         return -EINVAL;
305 
306     memcpy(&new_policy, old_policy, sizeof(struct cpufreq_policy));
307 
308     new_policy.governor = __find_governor(op->u.set_gov.scaling_governor);
309     if (new_policy.governor == NULL)
310         return -EINVAL;
311 
312     return __cpufreq_set_policy(old_policy, &new_policy);
313 }
314 
set_cpufreq_para(struct xen_sysctl_pm_op * op)315 static int set_cpufreq_para(struct xen_sysctl_pm_op *op)
316 {
317     int ret = 0;
318     struct cpufreq_policy *policy;
319 
320     policy = per_cpu(cpufreq_cpu_policy, op->cpuid);
321 
322     if ( !policy || !policy->governor )
323         return -EINVAL;
324 
325     switch(op->u.set_para.ctrl_type)
326     {
327     case SCALING_MAX_FREQ:
328     {
329         struct cpufreq_policy new_policy;
330 
331         memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
332         new_policy.max = op->u.set_para.ctrl_value;
333         ret = __cpufreq_set_policy(policy, &new_policy);
334 
335         break;
336     }
337 
338     case SCALING_MIN_FREQ:
339     {
340         struct cpufreq_policy new_policy;
341 
342         memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
343         new_policy.min = op->u.set_para.ctrl_value;
344         ret = __cpufreq_set_policy(policy, &new_policy);
345 
346         break;
347     }
348 
349     case SCALING_SETSPEED:
350     {
351         unsigned int freq =op->u.set_para.ctrl_value;
352 
353         if ( !strnicmp(policy->governor->name,
354                        "userspace", CPUFREQ_NAME_LEN) )
355             ret = write_userspace_scaling_setspeed(op->cpuid, freq);
356         else
357             ret = -EINVAL;
358 
359         break;
360     }
361 
362     case SAMPLING_RATE:
363     {
364         unsigned int sampling_rate = op->u.set_para.ctrl_value;
365 
366         if ( !strnicmp(policy->governor->name,
367                        "ondemand", CPUFREQ_NAME_LEN) )
368             ret = write_ondemand_sampling_rate(sampling_rate);
369         else
370             ret = -EINVAL;
371 
372         break;
373     }
374 
375     case UP_THRESHOLD:
376     {
377         unsigned int up_threshold = op->u.set_para.ctrl_value;
378 
379         if ( !strnicmp(policy->governor->name,
380                        "ondemand", CPUFREQ_NAME_LEN) )
381             ret = write_ondemand_up_threshold(up_threshold);
382         else
383             ret = -EINVAL;
384 
385         break;
386     }
387 
388     default:
389         ret = -EINVAL;
390         break;
391     }
392 
393     return ret;
394 }
395 
do_pm_op(struct xen_sysctl_pm_op * op)396 int do_pm_op(struct xen_sysctl_pm_op *op)
397 {
398     int ret = 0;
399     const struct processor_pminfo *pmpt;
400 
401     switch ( op->cmd )
402     {
403     case XEN_SYSCTL_pm_op_set_sched_opt_smt:
404     {
405         uint32_t saved_value = sched_smt_power_savings;
406 
407         if ( op->cpuid != 0 )
408             return -EINVAL;
409         sched_smt_power_savings = !!op->u.set_sched_opt_smt;
410         op->u.set_sched_opt_smt = saved_value;
411         return 0;
412     }
413 
414     case XEN_SYSCTL_pm_op_get_max_cstate:
415         BUILD_BUG_ON(XEN_SYSCTL_CX_UNLIMITED != UINT_MAX);
416         if ( op->cpuid == 0 )
417             op->u.get_max_cstate = acpi_get_cstate_limit();
418         else if ( op->cpuid == 1 )
419             op->u.get_max_cstate = acpi_get_csubstate_limit();
420         else
421             ret = -EINVAL;
422         return ret;
423 
424     case XEN_SYSCTL_pm_op_set_max_cstate:
425         if ( op->cpuid == 0 )
426             acpi_set_cstate_limit(op->u.set_max_cstate);
427         else if ( op->cpuid == 1 )
428             acpi_set_csubstate_limit(op->u.set_max_cstate);
429         else
430             ret = -EINVAL;
431         return ret;
432     }
433 
434     if ( op->cpuid >= nr_cpu_ids || !cpu_online(op->cpuid) )
435         return -EINVAL;
436     pmpt = processor_pminfo[op->cpuid];
437 
438     switch ( op->cmd & PM_PARA_CATEGORY_MASK )
439     {
440     case CPUFREQ_PARA:
441         if ( !(xen_processor_pmbits & XEN_PROCESSOR_PM_PX) )
442             return -ENODEV;
443         if ( !pmpt || !(pmpt->perf.init & XEN_PX_INIT) )
444             return -EINVAL;
445         break;
446     }
447 
448     switch ( op->cmd )
449     {
450     case GET_CPUFREQ_PARA:
451     {
452         ret = get_cpufreq_para(op);
453         break;
454     }
455 
456     case SET_CPUFREQ_GOV:
457     {
458         ret = set_cpufreq_gov(op);
459         break;
460     }
461 
462     case SET_CPUFREQ_PARA:
463     {
464         ret = set_cpufreq_para(op);
465         break;
466     }
467 
468     case GET_CPUFREQ_AVGFREQ:
469     {
470         op->u.get_avgfreq = cpufreq_driver_getavg(op->cpuid, USR_GETAVG);
471         break;
472     }
473 
474     case XEN_SYSCTL_pm_op_enable_turbo:
475     {
476         ret = cpufreq_update_turbo(op->cpuid, CPUFREQ_TURBO_ENABLED);
477         break;
478     }
479 
480     case XEN_SYSCTL_pm_op_disable_turbo:
481     {
482         ret = cpufreq_update_turbo(op->cpuid, CPUFREQ_TURBO_DISABLED);
483         break;
484     }
485 
486     default:
487         printk("not defined sub-hypercall @ do_pm_op\n");
488         ret = -ENOSYS;
489         break;
490     }
491 
492     return ret;
493 }
494 
acpi_set_pdc_bits(uint32_t acpi_id,XEN_GUEST_HANDLE (uint32)pdc)495 int acpi_set_pdc_bits(uint32_t acpi_id, XEN_GUEST_HANDLE(uint32) pdc)
496 {
497     u32 bits[3];
498     int ret;
499 
500     if ( copy_from_guest(bits, pdc, 2) )
501         ret = -EFAULT;
502     else if ( bits[0] != ACPI_PDC_REVISION_ID || !bits[1] )
503         ret = -EINVAL;
504     else if ( copy_from_guest_offset(bits + 2, pdc, 2, 1) )
505         ret = -EFAULT;
506     else
507     {
508         u32 mask = 0;
509 
510         if ( xen_processor_pmbits & XEN_PROCESSOR_PM_CX )
511             mask |= ACPI_PDC_C_MASK | ACPI_PDC_SMP_C1PT;
512         if ( xen_processor_pmbits & XEN_PROCESSOR_PM_PX )
513             mask |= ACPI_PDC_P_MASK | ACPI_PDC_SMP_C1PT;
514         if ( xen_processor_pmbits & XEN_PROCESSOR_PM_TX )
515             mask |= ACPI_PDC_T_MASK | ACPI_PDC_SMP_C1PT;
516         bits[2] &= (ACPI_PDC_C_MASK | ACPI_PDC_P_MASK | ACPI_PDC_T_MASK |
517                     ACPI_PDC_SMP_C1PT) & ~mask;
518         ret = arch_acpi_set_pdc_bits(acpi_id, bits, mask);
519     }
520     if ( !ret && __copy_to_guest_offset(pdc, 2, bits + 2, 1) )
521         ret = -EFAULT;
522 
523     return ret;
524 }
525