1 /*****************************************************************************
2 # pmstat.c - Power Management statistic information (Px/Cx/Tx, etc.)
3 #
4 # Copyright (c) 2008, Liu Jinsong <jinsong.liu@intel.com>
5 #
6 # This program is free software; you can redistribute it and/or modify it
7 # under the terms of the GNU General Public License as published by the Free
8 # Software Foundation; either version 2 of the License, or (at your option)
9 # any later version.
10 #
11 # This program is distributed in the hope that it will be useful, but WITHOUT
12 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 # more details.
15 #
16 # You should have received a copy of the GNU General Public License along with
17 # this program; If not, see <http://www.gnu.org/licenses/>.
18 #
19 # The full GNU General Public License is included in this distribution in the
20 # file called LICENSE.
21 #
22 *****************************************************************************/
23
24 #include <xen/lib.h>
25 #include <xen/errno.h>
26 #include <xen/sched.h>
27 #include <xen/event.h>
28 #include <xen/irq.h>
29 #include <xen/iocap.h>
30 #include <xen/compat.h>
31 #include <xen/guest_access.h>
32 #include <asm/current.h>
33 #include <public/xen.h>
34 #include <xen/cpumask.h>
35 #include <asm/processor.h>
36 #include <xen/percpu.h>
37 #include <xen/domain.h>
38 #include <xen/acpi.h>
39
40 #include <public/sysctl.h>
41 #include <acpi/cpufreq/cpufreq.h>
42 #include <xen/pmstat.h>
43
44 DEFINE_PER_CPU_READ_MOSTLY(struct pm_px *, cpufreq_statistic_data);
45
46 /*
47 * Get PM statistic info
48 */
do_get_pm_info(struct xen_sysctl_get_pmstat * op)49 int do_get_pm_info(struct xen_sysctl_get_pmstat *op)
50 {
51 int ret = 0;
52 const struct processor_pminfo *pmpt;
53
54 if ( !op || (op->cpuid >= nr_cpu_ids) || !cpu_online(op->cpuid) )
55 return -EINVAL;
56 pmpt = processor_pminfo[op->cpuid];
57
58 switch ( op->type & PMSTAT_CATEGORY_MASK )
59 {
60 case PMSTAT_CX:
61 if ( !(xen_processor_pmbits & XEN_PROCESSOR_PM_CX) )
62 return -ENODEV;
63 break;
64 case PMSTAT_PX:
65 if ( !(xen_processor_pmbits & XEN_PROCESSOR_PM_PX) )
66 return -ENODEV;
67 if ( !cpufreq_driver.init )
68 return -ENODEV;
69 if ( !pmpt || !(pmpt->perf.init & XEN_PX_INIT) )
70 return -EINVAL;
71 break;
72 default:
73 return -ENODEV;
74 }
75
76 switch ( op->type )
77 {
78 case PMSTAT_get_max_px:
79 {
80 op->u.getpx.total = pmpt->perf.state_count;
81 break;
82 }
83
84 case PMSTAT_get_pxstat:
85 {
86 uint32_t ct;
87 struct pm_px *pxpt;
88 spinlock_t *cpufreq_statistic_lock =
89 &per_cpu(cpufreq_statistic_lock, op->cpuid);
90
91 spin_lock(cpufreq_statistic_lock);
92
93 pxpt = per_cpu(cpufreq_statistic_data, op->cpuid);
94 if ( !pxpt || !pxpt->u.pt || !pxpt->u.trans_pt )
95 {
96 spin_unlock(cpufreq_statistic_lock);
97 return -ENODATA;
98 }
99
100 pxpt->u.usable = pmpt->perf.state_count - pmpt->perf.platform_limit;
101
102 cpufreq_residency_update(op->cpuid, pxpt->u.cur);
103
104 ct = pmpt->perf.state_count;
105 if ( copy_to_guest(op->u.getpx.trans_pt, pxpt->u.trans_pt, ct*ct) )
106 {
107 spin_unlock(cpufreq_statistic_lock);
108 ret = -EFAULT;
109 break;
110 }
111
112 if ( copy_to_guest(op->u.getpx.pt, pxpt->u.pt, ct) )
113 {
114 spin_unlock(cpufreq_statistic_lock);
115 ret = -EFAULT;
116 break;
117 }
118
119 op->u.getpx.total = pxpt->u.total;
120 op->u.getpx.usable = pxpt->u.usable;
121 op->u.getpx.last = pxpt->u.last;
122 op->u.getpx.cur = pxpt->u.cur;
123
124 spin_unlock(cpufreq_statistic_lock);
125
126 break;
127 }
128
129 case PMSTAT_reset_pxstat:
130 {
131 cpufreq_statistic_reset(op->cpuid);
132 break;
133 }
134
135 case PMSTAT_get_max_cx:
136 {
137 op->u.getcx.nr = pmstat_get_cx_nr(op->cpuid);
138 ret = 0;
139 break;
140 }
141
142 case PMSTAT_get_cxstat:
143 {
144 ret = pmstat_get_cx_stat(op->cpuid, &op->u.getcx);
145 break;
146 }
147
148 case PMSTAT_reset_cxstat:
149 {
150 ret = pmstat_reset_cx_stat(op->cpuid);
151 break;
152 }
153
154 default:
155 printk("not defined sub-hypercall @ do_get_pm_info\n");
156 ret = -ENOSYS;
157 break;
158 }
159
160 return ret;
161 }
162
163 /*
164 * 1. Get PM parameter
165 * 2. Provide user PM control
166 */
read_scaling_available_governors(char * scaling_available_governors,unsigned int size)167 static int read_scaling_available_governors(char *scaling_available_governors,
168 unsigned int size)
169 {
170 unsigned int i = 0;
171 struct cpufreq_governor *t;
172
173 if ( !scaling_available_governors )
174 return -EINVAL;
175
176 list_for_each_entry(t, &cpufreq_governor_list, governor_list)
177 {
178 i += scnprintf(&scaling_available_governors[i],
179 CPUFREQ_NAME_LEN, "%s ", t->name);
180 if ( i > size )
181 return -EINVAL;
182 }
183 scaling_available_governors[i-1] = '\0';
184
185 return 0;
186 }
187
get_cpufreq_para(struct xen_sysctl_pm_op * op)188 static int get_cpufreq_para(struct xen_sysctl_pm_op *op)
189 {
190 uint32_t ret = 0;
191 const struct processor_pminfo *pmpt;
192 struct cpufreq_policy *policy;
193 uint32_t gov_num = 0;
194 uint32_t *affected_cpus;
195 uint32_t *scaling_available_frequencies;
196 char *scaling_available_governors;
197 struct list_head *pos;
198 uint32_t cpu, i, j = 0;
199
200 pmpt = processor_pminfo[op->cpuid];
201 policy = per_cpu(cpufreq_cpu_policy, op->cpuid);
202
203 if ( !pmpt || !pmpt->perf.states ||
204 !policy || !policy->governor )
205 return -EINVAL;
206
207 list_for_each(pos, &cpufreq_governor_list)
208 gov_num++;
209
210 if ( (op->u.get_para.cpu_num != cpumask_weight(policy->cpus)) ||
211 (op->u.get_para.freq_num != pmpt->perf.state_count) ||
212 (op->u.get_para.gov_num != gov_num) )
213 {
214 op->u.get_para.cpu_num = cpumask_weight(policy->cpus);
215 op->u.get_para.freq_num = pmpt->perf.state_count;
216 op->u.get_para.gov_num = gov_num;
217 return -EAGAIN;
218 }
219
220 if ( !(affected_cpus = xzalloc_array(uint32_t, op->u.get_para.cpu_num)) )
221 return -ENOMEM;
222 for_each_cpu(cpu, policy->cpus)
223 affected_cpus[j++] = cpu;
224 ret = copy_to_guest(op->u.get_para.affected_cpus,
225 affected_cpus, op->u.get_para.cpu_num);
226 xfree(affected_cpus);
227 if ( ret )
228 return ret;
229
230 if ( !(scaling_available_frequencies =
231 xzalloc_array(uint32_t, op->u.get_para.freq_num)) )
232 return -ENOMEM;
233 for ( i = 0; i < op->u.get_para.freq_num; i++ )
234 scaling_available_frequencies[i] =
235 pmpt->perf.states[i].core_frequency * 1000;
236 ret = copy_to_guest(op->u.get_para.scaling_available_frequencies,
237 scaling_available_frequencies, op->u.get_para.freq_num);
238 xfree(scaling_available_frequencies);
239 if ( ret )
240 return ret;
241
242 if ( !(scaling_available_governors =
243 xzalloc_array(char, gov_num * CPUFREQ_NAME_LEN)) )
244 return -ENOMEM;
245 if ( (ret = read_scaling_available_governors(scaling_available_governors,
246 gov_num * CPUFREQ_NAME_LEN * sizeof(char))) )
247 {
248 xfree(scaling_available_governors);
249 return ret;
250 }
251 ret = copy_to_guest(op->u.get_para.scaling_available_governors,
252 scaling_available_governors, gov_num * CPUFREQ_NAME_LEN);
253 xfree(scaling_available_governors);
254 if ( ret )
255 return ret;
256
257 op->u.get_para.cpuinfo_cur_freq =
258 cpufreq_driver.get ? cpufreq_driver.get(op->cpuid) : policy->cur;
259 op->u.get_para.cpuinfo_max_freq = policy->cpuinfo.max_freq;
260 op->u.get_para.cpuinfo_min_freq = policy->cpuinfo.min_freq;
261 op->u.get_para.scaling_cur_freq = policy->cur;
262 op->u.get_para.scaling_max_freq = policy->max;
263 op->u.get_para.scaling_min_freq = policy->min;
264
265 if ( cpufreq_driver.name[0] )
266 strlcpy(op->u.get_para.scaling_driver,
267 cpufreq_driver.name, CPUFREQ_NAME_LEN);
268 else
269 strlcpy(op->u.get_para.scaling_driver, "Unknown", CPUFREQ_NAME_LEN);
270
271 if ( policy->governor->name[0] )
272 strlcpy(op->u.get_para.scaling_governor,
273 policy->governor->name, CPUFREQ_NAME_LEN);
274 else
275 strlcpy(op->u.get_para.scaling_governor, "Unknown", CPUFREQ_NAME_LEN);
276
277 /* governor specific para */
278 if ( !strnicmp(op->u.get_para.scaling_governor,
279 "userspace", CPUFREQ_NAME_LEN) )
280 {
281 op->u.get_para.u.userspace.scaling_setspeed = policy->cur;
282 }
283
284 if ( !strnicmp(op->u.get_para.scaling_governor,
285 "ondemand", CPUFREQ_NAME_LEN) )
286 {
287 ret = get_cpufreq_ondemand_para(
288 &op->u.get_para.u.ondemand.sampling_rate_max,
289 &op->u.get_para.u.ondemand.sampling_rate_min,
290 &op->u.get_para.u.ondemand.sampling_rate,
291 &op->u.get_para.u.ondemand.up_threshold);
292 }
293 op->u.get_para.turbo_enabled = cpufreq_get_turbo_status(op->cpuid);
294
295 return ret;
296 }
297
set_cpufreq_gov(struct xen_sysctl_pm_op * op)298 static int set_cpufreq_gov(struct xen_sysctl_pm_op *op)
299 {
300 struct cpufreq_policy new_policy, *old_policy;
301
302 old_policy = per_cpu(cpufreq_cpu_policy, op->cpuid);
303 if ( !old_policy )
304 return -EINVAL;
305
306 memcpy(&new_policy, old_policy, sizeof(struct cpufreq_policy));
307
308 new_policy.governor = __find_governor(op->u.set_gov.scaling_governor);
309 if (new_policy.governor == NULL)
310 return -EINVAL;
311
312 return __cpufreq_set_policy(old_policy, &new_policy);
313 }
314
set_cpufreq_para(struct xen_sysctl_pm_op * op)315 static int set_cpufreq_para(struct xen_sysctl_pm_op *op)
316 {
317 int ret = 0;
318 struct cpufreq_policy *policy;
319
320 policy = per_cpu(cpufreq_cpu_policy, op->cpuid);
321
322 if ( !policy || !policy->governor )
323 return -EINVAL;
324
325 switch(op->u.set_para.ctrl_type)
326 {
327 case SCALING_MAX_FREQ:
328 {
329 struct cpufreq_policy new_policy;
330
331 memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
332 new_policy.max = op->u.set_para.ctrl_value;
333 ret = __cpufreq_set_policy(policy, &new_policy);
334
335 break;
336 }
337
338 case SCALING_MIN_FREQ:
339 {
340 struct cpufreq_policy new_policy;
341
342 memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
343 new_policy.min = op->u.set_para.ctrl_value;
344 ret = __cpufreq_set_policy(policy, &new_policy);
345
346 break;
347 }
348
349 case SCALING_SETSPEED:
350 {
351 unsigned int freq =op->u.set_para.ctrl_value;
352
353 if ( !strnicmp(policy->governor->name,
354 "userspace", CPUFREQ_NAME_LEN) )
355 ret = write_userspace_scaling_setspeed(op->cpuid, freq);
356 else
357 ret = -EINVAL;
358
359 break;
360 }
361
362 case SAMPLING_RATE:
363 {
364 unsigned int sampling_rate = op->u.set_para.ctrl_value;
365
366 if ( !strnicmp(policy->governor->name,
367 "ondemand", CPUFREQ_NAME_LEN) )
368 ret = write_ondemand_sampling_rate(sampling_rate);
369 else
370 ret = -EINVAL;
371
372 break;
373 }
374
375 case UP_THRESHOLD:
376 {
377 unsigned int up_threshold = op->u.set_para.ctrl_value;
378
379 if ( !strnicmp(policy->governor->name,
380 "ondemand", CPUFREQ_NAME_LEN) )
381 ret = write_ondemand_up_threshold(up_threshold);
382 else
383 ret = -EINVAL;
384
385 break;
386 }
387
388 default:
389 ret = -EINVAL;
390 break;
391 }
392
393 return ret;
394 }
395
do_pm_op(struct xen_sysctl_pm_op * op)396 int do_pm_op(struct xen_sysctl_pm_op *op)
397 {
398 int ret = 0;
399 const struct processor_pminfo *pmpt;
400
401 switch ( op->cmd )
402 {
403 case XEN_SYSCTL_pm_op_set_sched_opt_smt:
404 {
405 uint32_t saved_value = sched_smt_power_savings;
406
407 if ( op->cpuid != 0 )
408 return -EINVAL;
409 sched_smt_power_savings = !!op->u.set_sched_opt_smt;
410 op->u.set_sched_opt_smt = saved_value;
411 return 0;
412 }
413
414 case XEN_SYSCTL_pm_op_get_max_cstate:
415 BUILD_BUG_ON(XEN_SYSCTL_CX_UNLIMITED != UINT_MAX);
416 if ( op->cpuid == 0 )
417 op->u.get_max_cstate = acpi_get_cstate_limit();
418 else if ( op->cpuid == 1 )
419 op->u.get_max_cstate = acpi_get_csubstate_limit();
420 else
421 ret = -EINVAL;
422 return ret;
423
424 case XEN_SYSCTL_pm_op_set_max_cstate:
425 if ( op->cpuid == 0 )
426 acpi_set_cstate_limit(op->u.set_max_cstate);
427 else if ( op->cpuid == 1 )
428 acpi_set_csubstate_limit(op->u.set_max_cstate);
429 else
430 ret = -EINVAL;
431 return ret;
432 }
433
434 if ( op->cpuid >= nr_cpu_ids || !cpu_online(op->cpuid) )
435 return -EINVAL;
436 pmpt = processor_pminfo[op->cpuid];
437
438 switch ( op->cmd & PM_PARA_CATEGORY_MASK )
439 {
440 case CPUFREQ_PARA:
441 if ( !(xen_processor_pmbits & XEN_PROCESSOR_PM_PX) )
442 return -ENODEV;
443 if ( !pmpt || !(pmpt->perf.init & XEN_PX_INIT) )
444 return -EINVAL;
445 break;
446 }
447
448 switch ( op->cmd )
449 {
450 case GET_CPUFREQ_PARA:
451 {
452 ret = get_cpufreq_para(op);
453 break;
454 }
455
456 case SET_CPUFREQ_GOV:
457 {
458 ret = set_cpufreq_gov(op);
459 break;
460 }
461
462 case SET_CPUFREQ_PARA:
463 {
464 ret = set_cpufreq_para(op);
465 break;
466 }
467
468 case GET_CPUFREQ_AVGFREQ:
469 {
470 op->u.get_avgfreq = cpufreq_driver_getavg(op->cpuid, USR_GETAVG);
471 break;
472 }
473
474 case XEN_SYSCTL_pm_op_enable_turbo:
475 {
476 ret = cpufreq_update_turbo(op->cpuid, CPUFREQ_TURBO_ENABLED);
477 break;
478 }
479
480 case XEN_SYSCTL_pm_op_disable_turbo:
481 {
482 ret = cpufreq_update_turbo(op->cpuid, CPUFREQ_TURBO_DISABLED);
483 break;
484 }
485
486 default:
487 printk("not defined sub-hypercall @ do_pm_op\n");
488 ret = -ENOSYS;
489 break;
490 }
491
492 return ret;
493 }
494
acpi_set_pdc_bits(uint32_t acpi_id,XEN_GUEST_HANDLE (uint32)pdc)495 int acpi_set_pdc_bits(uint32_t acpi_id, XEN_GUEST_HANDLE(uint32) pdc)
496 {
497 u32 bits[3];
498 int ret;
499
500 if ( copy_from_guest(bits, pdc, 2) )
501 ret = -EFAULT;
502 else if ( bits[0] != ACPI_PDC_REVISION_ID || !bits[1] )
503 ret = -EINVAL;
504 else if ( copy_from_guest_offset(bits + 2, pdc, 2, 1) )
505 ret = -EFAULT;
506 else
507 {
508 u32 mask = 0;
509
510 if ( xen_processor_pmbits & XEN_PROCESSOR_PM_CX )
511 mask |= ACPI_PDC_C_MASK | ACPI_PDC_SMP_C1PT;
512 if ( xen_processor_pmbits & XEN_PROCESSOR_PM_PX )
513 mask |= ACPI_PDC_P_MASK | ACPI_PDC_SMP_C1PT;
514 if ( xen_processor_pmbits & XEN_PROCESSOR_PM_TX )
515 mask |= ACPI_PDC_T_MASK | ACPI_PDC_SMP_C1PT;
516 bits[2] &= (ACPI_PDC_C_MASK | ACPI_PDC_P_MASK | ACPI_PDC_T_MASK |
517 ACPI_PDC_SMP_C1PT) & ~mask;
518 ret = arch_acpi_set_pdc_bits(acpi_id, bits, mask);
519 }
520 if ( !ret && __copy_to_guest_offset(pdc, 2, bits + 2, 1) )
521 ret = -EFAULT;
522
523 return ret;
524 }
525