1 /******************************************************************************
2 * sysctl.c
3 *
4 * System management operations. For use by node control stack.
5 *
6 * Copyright (c) 2002-2006, K Fraser
7 */
8
9 #include <xen/types.h>
10 #include <xen/lib.h>
11 #include <xen/mm.h>
12 #include <xen/sched.h>
13 #include <xen/domain.h>
14 #include <xen/event.h>
15 #include <xen/domain_page.h>
16 #include <xen/tmem.h>
17 #include <xen/trace.h>
18 #include <xen/console.h>
19 #include <xen/iocap.h>
20 #include <xen/guest_access.h>
21 #include <xen/keyhandler.h>
22 #include <asm/current.h>
23 #include <xen/hypercall.h>
24 #include <public/sysctl.h>
25 #include <asm/numa.h>
26 #include <xen/nodemask.h>
27 #include <xsm/xsm.h>
28 #include <xen/pmstat.h>
29 #include <xen/livepatch.h>
30 #include <xen/gcov.h>
31
do_sysctl(XEN_GUEST_HANDLE_PARAM (xen_sysctl_t)u_sysctl)32 long do_sysctl(XEN_GUEST_HANDLE_PARAM(xen_sysctl_t) u_sysctl)
33 {
34 long ret = 0;
35 int copyback = -1;
36 struct xen_sysctl curop, *op = &curop;
37 static DEFINE_SPINLOCK(sysctl_lock);
38
39 if ( copy_from_guest(op, u_sysctl, 1) )
40 return -EFAULT;
41
42 if ( op->interface_version != XEN_SYSCTL_INTERFACE_VERSION )
43 return -EACCES;
44
45 ret = xsm_sysctl(XSM_PRIV, op->cmd);
46 if ( ret )
47 return ret;
48
49 /*
50 * Trylock here avoids deadlock with an existing sysctl critical section
51 * which might (for some current or future reason) want to synchronise
52 * with this vcpu.
53 */
54 while ( !spin_trylock(&sysctl_lock) )
55 if ( hypercall_preempt_check() )
56 return hypercall_create_continuation(
57 __HYPERVISOR_sysctl, "h", u_sysctl);
58
59 switch ( op->cmd )
60 {
61 case XEN_SYSCTL_readconsole:
62 ret = xsm_readconsole(XSM_HOOK, op->u.readconsole.clear);
63 if ( ret )
64 break;
65
66 ret = read_console_ring(&op->u.readconsole);
67 break;
68
69 case XEN_SYSCTL_tbuf_op:
70 ret = tb_control(&op->u.tbuf_op);
71 break;
72
73 case XEN_SYSCTL_sched_id:
74 op->u.sched_id.sched_id = sched_id();
75 break;
76
77 case XEN_SYSCTL_getdomaininfolist:
78 {
79 struct domain *d;
80 struct xen_domctl_getdomaininfo info = { 0 };
81 u32 num_domains = 0;
82
83 rcu_read_lock(&domlist_read_lock);
84
85 for_each_domain ( d )
86 {
87 if ( d->domain_id < op->u.getdomaininfolist.first_domain )
88 continue;
89 if ( num_domains == op->u.getdomaininfolist.max_domains )
90 break;
91
92 ret = xsm_getdomaininfo(XSM_HOOK, d);
93 if ( ret )
94 continue;
95
96 getdomaininfo(d, &info);
97
98 if ( copy_to_guest_offset(op->u.getdomaininfolist.buffer,
99 num_domains, &info, 1) )
100 {
101 ret = -EFAULT;
102 break;
103 }
104
105 num_domains++;
106 }
107
108 rcu_read_unlock(&domlist_read_lock);
109
110 if ( ret != 0 )
111 break;
112
113 op->u.getdomaininfolist.num_domains = num_domains;
114 }
115 break;
116
117 #ifdef CONFIG_PERF_COUNTERS
118 case XEN_SYSCTL_perfc_op:
119 ret = perfc_control(&op->u.perfc_op);
120 break;
121 #endif
122
123 #ifdef CONFIG_LOCK_PROFILE
124 case XEN_SYSCTL_lockprof_op:
125 ret = spinlock_profile_control(&op->u.lockprof_op);
126 break;
127 #endif
128 case XEN_SYSCTL_debug_keys:
129 {
130 char c;
131 uint32_t i;
132
133 ret = -EFAULT;
134 for ( i = 0; i < op->u.debug_keys.nr_keys; i++ )
135 {
136 if ( copy_from_guest_offset(&c, op->u.debug_keys.keys, i, 1) )
137 goto out;
138 handle_keypress(c, guest_cpu_user_regs());
139 }
140 ret = 0;
141 copyback = 0;
142 }
143 break;
144
145 case XEN_SYSCTL_getcpuinfo:
146 {
147 uint32_t i, nr_cpus;
148 struct xen_sysctl_cpuinfo cpuinfo = { 0 };
149
150 nr_cpus = min(op->u.getcpuinfo.max_cpus, nr_cpu_ids);
151
152 ret = -EFAULT;
153 for ( i = 0; i < nr_cpus; i++ )
154 {
155 cpuinfo.idletime = get_cpu_idle_time(i);
156
157 if ( copy_to_guest_offset(op->u.getcpuinfo.info, i, &cpuinfo, 1) )
158 goto out;
159 }
160
161 op->u.getcpuinfo.nr_cpus = i;
162 ret = 0;
163 }
164 break;
165
166 case XEN_SYSCTL_availheap:
167 op->u.availheap.avail_bytes = avail_domheap_pages_region(
168 op->u.availheap.node,
169 op->u.availheap.min_bitwidth,
170 op->u.availheap.max_bitwidth);
171 op->u.availheap.avail_bytes <<= PAGE_SHIFT;
172 break;
173
174 #if defined (CONFIG_ACPI) && defined (CONFIG_HAS_CPUFREQ)
175 case XEN_SYSCTL_get_pmstat:
176 ret = do_get_pm_info(&op->u.get_pmstat);
177 break;
178
179 case XEN_SYSCTL_pm_op:
180 ret = do_pm_op(&op->u.pm_op);
181 if ( ret == -EAGAIN )
182 copyback = 1;
183 break;
184 #endif
185
186 case XEN_SYSCTL_page_offline_op:
187 {
188 uint32_t *status, *ptr;
189 unsigned long pfn;
190
191 ret = xsm_page_offline(XSM_HOOK, op->u.page_offline.cmd);
192 if ( ret )
193 break;
194
195 ptr = status = xmalloc_bytes( sizeof(uint32_t) *
196 (op->u.page_offline.end -
197 op->u.page_offline.start + 1));
198 if ( !status )
199 {
200 dprintk(XENLOG_WARNING, "Out of memory for page offline op\n");
201 ret = -ENOMEM;
202 break;
203 }
204
205 memset(status, PG_OFFLINE_INVALID, sizeof(uint32_t) *
206 (op->u.page_offline.end - op->u.page_offline.start + 1));
207
208 for ( pfn = op->u.page_offline.start;
209 pfn <= op->u.page_offline.end;
210 pfn ++ )
211 {
212 switch ( op->u.page_offline.cmd )
213 {
214 /* Shall revert her if failed, or leave caller do it? */
215 case sysctl_page_offline:
216 ret = offline_page(pfn, 0, ptr++);
217 break;
218 case sysctl_page_online:
219 ret = online_page(pfn, ptr++);
220 break;
221 case sysctl_query_page_offline:
222 ret = query_page_offline(pfn, ptr++);
223 break;
224 default:
225 ret = -EINVAL;
226 break;
227 }
228
229 if (ret)
230 break;
231 }
232
233 if ( copy_to_guest(
234 op->u.page_offline.status, status,
235 op->u.page_offline.end - op->u.page_offline.start + 1) )
236 ret = -EFAULT;
237
238 xfree(status);
239 copyback = 0;
240 }
241 break;
242
243 case XEN_SYSCTL_cpupool_op:
244 ret = cpupool_do_sysctl(&op->u.cpupool_op);
245 break;
246
247 case XEN_SYSCTL_scheduler_op:
248 ret = sched_adjust_global(&op->u.scheduler_op);
249 break;
250
251 case XEN_SYSCTL_physinfo:
252 {
253 struct xen_sysctl_physinfo *pi = &op->u.physinfo;
254
255 memset(pi, 0, sizeof(*pi));
256 pi->threads_per_core =
257 cpumask_weight(per_cpu(cpu_sibling_mask, 0));
258 pi->cores_per_socket =
259 cpumask_weight(per_cpu(cpu_core_mask, 0)) / pi->threads_per_core;
260 pi->nr_cpus = num_online_cpus();
261 pi->nr_nodes = num_online_nodes();
262 pi->max_node_id = MAX_NUMNODES-1;
263 pi->max_cpu_id = nr_cpu_ids - 1;
264 pi->total_pages = total_pages;
265 /* Protected by lock */
266 get_outstanding_claims(&pi->free_pages, &pi->outstanding_pages);
267 pi->scrub_pages = 0;
268 pi->cpu_khz = cpu_khz;
269 pi->max_mfn = get_upper_mfn_bound();
270 arch_do_physinfo(pi);
271
272 if ( copy_to_guest(u_sysctl, op, 1) )
273 ret = -EFAULT;
274 }
275 break;
276
277 case XEN_SYSCTL_numainfo:
278 {
279 unsigned int i, j, num_nodes;
280 struct xen_sysctl_numainfo *ni = &op->u.numainfo;
281 bool_t do_meminfo = !guest_handle_is_null(ni->meminfo);
282 bool_t do_distance = !guest_handle_is_null(ni->distance);
283
284 num_nodes = last_node(node_online_map) + 1;
285
286 if ( do_meminfo || do_distance )
287 {
288 struct xen_sysctl_meminfo meminfo = { };
289
290 if ( num_nodes > ni->num_nodes )
291 num_nodes = ni->num_nodes;
292 for ( i = 0; i < num_nodes; ++i )
293 {
294 static uint32_t distance[MAX_NUMNODES];
295
296 if ( do_meminfo )
297 {
298 if ( node_online(i) )
299 {
300 meminfo.memsize = node_spanned_pages(i) << PAGE_SHIFT;
301 meminfo.memfree = avail_node_heap_pages(i) << PAGE_SHIFT;
302 }
303 else
304 meminfo.memsize = meminfo.memfree = XEN_INVALID_MEM_SZ;
305
306 if ( copy_to_guest_offset(ni->meminfo, i, &meminfo, 1) )
307 {
308 ret = -EFAULT;
309 break;
310 }
311 }
312
313 if ( do_distance )
314 {
315 for ( j = 0; j < num_nodes; j++ )
316 {
317 distance[j] = __node_distance(i, j);
318 if ( distance[j] == NUMA_NO_DISTANCE )
319 distance[j] = XEN_INVALID_NODE_DIST;
320 }
321
322 if ( copy_to_guest_offset(ni->distance, i * num_nodes,
323 distance, num_nodes) )
324 {
325 ret = -EFAULT;
326 break;
327 }
328 }
329 }
330 }
331 else
332 i = num_nodes;
333
334 if ( !ret && (ni->num_nodes != i) )
335 {
336 ni->num_nodes = i;
337 if ( __copy_field_to_guest(u_sysctl, op,
338 u.numainfo.num_nodes) )
339 {
340 ret = -EFAULT;
341 break;
342 }
343 }
344 }
345 break;
346
347 case XEN_SYSCTL_cputopoinfo:
348 {
349 unsigned int i, num_cpus;
350 struct xen_sysctl_cputopoinfo *ti = &op->u.cputopoinfo;
351
352 num_cpus = cpumask_last(&cpu_online_map) + 1;
353 if ( !guest_handle_is_null(ti->cputopo) )
354 {
355 struct xen_sysctl_cputopo cputopo = { };
356
357 if ( num_cpus > ti->num_cpus )
358 num_cpus = ti->num_cpus;
359 for ( i = 0; i < num_cpus; ++i )
360 {
361 if ( cpu_present(i) )
362 {
363 cputopo.core = cpu_to_core(i);
364 cputopo.socket = cpu_to_socket(i);
365 cputopo.node = cpu_to_node(i);
366 if ( cputopo.node == NUMA_NO_NODE )
367 cputopo.node = XEN_INVALID_NODE_ID;
368 }
369 else
370 {
371 cputopo.core = XEN_INVALID_CORE_ID;
372 cputopo.socket = XEN_INVALID_SOCKET_ID;
373 cputopo.node = XEN_INVALID_NODE_ID;
374 }
375
376 if ( copy_to_guest_offset(ti->cputopo, i, &cputopo, 1) )
377 {
378 ret = -EFAULT;
379 break;
380 }
381 }
382 }
383 else
384 i = num_cpus;
385
386 if ( !ret && (ti->num_cpus != i) )
387 {
388 ti->num_cpus = i;
389 if ( __copy_field_to_guest(u_sysctl, op,
390 u.cputopoinfo.num_cpus) )
391 {
392 ret = -EFAULT;
393 break;
394 }
395 }
396 }
397 break;
398
399 #ifdef CONFIG_GCOV
400 case XEN_SYSCTL_gcov_op:
401 ret = sysctl_gcov_op(&op->u.gcov_op);
402 copyback = 1;
403 break;
404 #endif
405
406 #ifdef CONFIG_HAS_PCI
407 case XEN_SYSCTL_pcitopoinfo:
408 {
409 struct xen_sysctl_pcitopoinfo *ti = &op->u.pcitopoinfo;
410 unsigned int i = 0;
411
412 if ( guest_handle_is_null(ti->devs) ||
413 guest_handle_is_null(ti->nodes) )
414 {
415 ret = -EINVAL;
416 break;
417 }
418
419 while ( i < ti->num_devs )
420 {
421 physdev_pci_device_t dev;
422 uint32_t node;
423 const struct pci_dev *pdev;
424
425 if ( copy_from_guest_offset(&dev, ti->devs, i, 1) )
426 {
427 ret = -EFAULT;
428 break;
429 }
430
431 pcidevs_lock();
432 pdev = pci_get_pdev(dev.seg, dev.bus, dev.devfn);
433 if ( !pdev )
434 node = XEN_INVALID_DEV;
435 else if ( pdev->node == NUMA_NO_NODE )
436 node = XEN_INVALID_NODE_ID;
437 else
438 node = pdev->node;
439 pcidevs_unlock();
440
441 if ( copy_to_guest_offset(ti->nodes, i, &node, 1) )
442 {
443 ret = -EFAULT;
444 break;
445 }
446
447 if ( (++i > 0x3f) && hypercall_preempt_check() )
448 break;
449 }
450
451 if ( !ret && (ti->num_devs != i) )
452 {
453 ti->num_devs = i;
454 if ( __copy_field_to_guest(u_sysctl, op, u.pcitopoinfo.num_devs) )
455 ret = -EFAULT;
456 }
457 break;
458 }
459 #endif
460
461 case XEN_SYSCTL_tmem_op:
462 ret = tmem_control(&op->u.tmem_op);
463 break;
464
465 case XEN_SYSCTL_livepatch_op:
466 ret = livepatch_op(&op->u.livepatch);
467 if ( ret != -ENOSYS && ret != -EOPNOTSUPP )
468 copyback = 1;
469 break;
470
471 case XEN_SYSCTL_set_parameter:
472 {
473 #define XEN_SET_PARAMETER_MAX_SIZE 1023
474 char *params;
475
476 if ( op->u.set_parameter.pad[0] || op->u.set_parameter.pad[1] ||
477 op->u.set_parameter.pad[2] )
478 {
479 ret = -EINVAL;
480 break;
481 }
482 if ( op->u.set_parameter.size > XEN_SET_PARAMETER_MAX_SIZE )
483 {
484 ret = -E2BIG;
485 break;
486 }
487 params = xmalloc_bytes(op->u.set_parameter.size + 1);
488 if ( !params )
489 {
490 ret = -ENOMEM;
491 break;
492 }
493 if ( copy_from_guest(params, op->u.set_parameter.params,
494 op->u.set_parameter.size) )
495 ret = -EFAULT;
496 else
497 {
498 params[op->u.set_parameter.size] = 0;
499 ret = runtime_parse(params);
500 }
501
502 xfree(params);
503
504 break;
505 }
506
507 default:
508 ret = arch_do_sysctl(op, u_sysctl);
509 copyback = 0;
510 break;
511 }
512
513 out:
514 spin_unlock(&sysctl_lock);
515
516 if ( copyback && (!ret || copyback > 0) &&
517 __copy_to_guest(u_sysctl, op, 1) )
518 ret = -EFAULT;
519
520 return ret;
521 }
522
523 /*
524 * Local variables:
525 * mode: C
526 * c-file-style: "BSD"
527 * c-basic-offset: 4
528 * tab-width: 4
529 * indent-tabs-mode: nil
530 * End:
531 */
532