1 /******************************************************************************
2  * sysctl.c
3  *
4  * System management operations. For use by node control stack.
5  *
6  * Copyright (c) 2002-2006, K Fraser
7  */
8 
9 #include <xen/types.h>
10 #include <xen/lib.h>
11 #include <xen/mm.h>
12 #include <xen/sched.h>
13 #include <xen/domain.h>
14 #include <xen/event.h>
15 #include <xen/domain_page.h>
16 #include <xen/tmem.h>
17 #include <xen/trace.h>
18 #include <xen/console.h>
19 #include <xen/iocap.h>
20 #include <xen/guest_access.h>
21 #include <xen/keyhandler.h>
22 #include <asm/current.h>
23 #include <xen/hypercall.h>
24 #include <public/sysctl.h>
25 #include <asm/numa.h>
26 #include <xen/nodemask.h>
27 #include <xsm/xsm.h>
28 #include <xen/pmstat.h>
29 #include <xen/livepatch.h>
30 #include <xen/gcov.h>
31 
do_sysctl(XEN_GUEST_HANDLE_PARAM (xen_sysctl_t)u_sysctl)32 long do_sysctl(XEN_GUEST_HANDLE_PARAM(xen_sysctl_t) u_sysctl)
33 {
34     long ret = 0;
35     int copyback = -1;
36     struct xen_sysctl curop, *op = &curop;
37     static DEFINE_SPINLOCK(sysctl_lock);
38 
39     if ( copy_from_guest(op, u_sysctl, 1) )
40         return -EFAULT;
41 
42     if ( op->interface_version != XEN_SYSCTL_INTERFACE_VERSION )
43         return -EACCES;
44 
45     ret = xsm_sysctl(XSM_PRIV, op->cmd);
46     if ( ret )
47         return ret;
48 
49     /*
50      * Trylock here avoids deadlock with an existing sysctl critical section
51      * which might (for some current or future reason) want to synchronise
52      * with this vcpu.
53      */
54     while ( !spin_trylock(&sysctl_lock) )
55         if ( hypercall_preempt_check() )
56             return hypercall_create_continuation(
57                 __HYPERVISOR_sysctl, "h", u_sysctl);
58 
59     switch ( op->cmd )
60     {
61     case XEN_SYSCTL_readconsole:
62         ret = xsm_readconsole(XSM_HOOK, op->u.readconsole.clear);
63         if ( ret )
64             break;
65 
66         ret = read_console_ring(&op->u.readconsole);
67         break;
68 
69     case XEN_SYSCTL_tbuf_op:
70         ret = tb_control(&op->u.tbuf_op);
71         break;
72 
73     case XEN_SYSCTL_sched_id:
74         op->u.sched_id.sched_id = sched_id();
75         break;
76 
77     case XEN_SYSCTL_getdomaininfolist:
78     {
79         struct domain *d;
80         struct xen_domctl_getdomaininfo info = { 0 };
81         u32 num_domains = 0;
82 
83         rcu_read_lock(&domlist_read_lock);
84 
85         for_each_domain ( d )
86         {
87             if ( d->domain_id < op->u.getdomaininfolist.first_domain )
88                 continue;
89             if ( num_domains == op->u.getdomaininfolist.max_domains )
90                 break;
91 
92             ret = xsm_getdomaininfo(XSM_HOOK, d);
93             if ( ret )
94                 continue;
95 
96             getdomaininfo(d, &info);
97 
98             if ( copy_to_guest_offset(op->u.getdomaininfolist.buffer,
99                                       num_domains, &info, 1) )
100             {
101                 ret = -EFAULT;
102                 break;
103             }
104 
105             num_domains++;
106         }
107 
108         rcu_read_unlock(&domlist_read_lock);
109 
110         if ( ret != 0 )
111             break;
112 
113         op->u.getdomaininfolist.num_domains = num_domains;
114     }
115     break;
116 
117 #ifdef CONFIG_PERF_COUNTERS
118     case XEN_SYSCTL_perfc_op:
119         ret = perfc_control(&op->u.perfc_op);
120         break;
121 #endif
122 
123 #ifdef CONFIG_LOCK_PROFILE
124     case XEN_SYSCTL_lockprof_op:
125         ret = spinlock_profile_control(&op->u.lockprof_op);
126         break;
127 #endif
128     case XEN_SYSCTL_debug_keys:
129     {
130         char c;
131         uint32_t i;
132 
133         ret = -EFAULT;
134         for ( i = 0; i < op->u.debug_keys.nr_keys; i++ )
135         {
136             if ( copy_from_guest_offset(&c, op->u.debug_keys.keys, i, 1) )
137                 goto out;
138             handle_keypress(c, guest_cpu_user_regs());
139         }
140         ret = 0;
141         copyback = 0;
142     }
143     break;
144 
145     case XEN_SYSCTL_getcpuinfo:
146     {
147         uint32_t i, nr_cpus;
148         struct xen_sysctl_cpuinfo cpuinfo = { 0 };
149 
150         nr_cpus = min(op->u.getcpuinfo.max_cpus, nr_cpu_ids);
151 
152         ret = -EFAULT;
153         for ( i = 0; i < nr_cpus; i++ )
154         {
155             cpuinfo.idletime = get_cpu_idle_time(i);
156 
157             if ( copy_to_guest_offset(op->u.getcpuinfo.info, i, &cpuinfo, 1) )
158                 goto out;
159         }
160 
161         op->u.getcpuinfo.nr_cpus = i;
162         ret = 0;
163     }
164     break;
165 
166     case XEN_SYSCTL_availheap:
167         op->u.availheap.avail_bytes = avail_domheap_pages_region(
168             op->u.availheap.node,
169             op->u.availheap.min_bitwidth,
170             op->u.availheap.max_bitwidth);
171         op->u.availheap.avail_bytes <<= PAGE_SHIFT;
172         break;
173 
174 #if defined (CONFIG_ACPI) && defined (CONFIG_HAS_CPUFREQ)
175     case XEN_SYSCTL_get_pmstat:
176         ret = do_get_pm_info(&op->u.get_pmstat);
177         break;
178 
179     case XEN_SYSCTL_pm_op:
180         ret = do_pm_op(&op->u.pm_op);
181         if ( ret == -EAGAIN )
182             copyback = 1;
183         break;
184 #endif
185 
186     case XEN_SYSCTL_page_offline_op:
187     {
188         uint32_t *status, *ptr;
189         unsigned long pfn;
190 
191         ret = xsm_page_offline(XSM_HOOK, op->u.page_offline.cmd);
192         if ( ret )
193             break;
194 
195         ptr = status = xmalloc_bytes( sizeof(uint32_t) *
196                                 (op->u.page_offline.end -
197                                   op->u.page_offline.start + 1));
198         if ( !status )
199         {
200             dprintk(XENLOG_WARNING, "Out of memory for page offline op\n");
201             ret = -ENOMEM;
202             break;
203         }
204 
205         memset(status, PG_OFFLINE_INVALID, sizeof(uint32_t) *
206                       (op->u.page_offline.end - op->u.page_offline.start + 1));
207 
208         for ( pfn = op->u.page_offline.start;
209               pfn <= op->u.page_offline.end;
210               pfn ++ )
211         {
212             switch ( op->u.page_offline.cmd )
213             {
214                 /* Shall revert her if failed, or leave caller do it? */
215                 case sysctl_page_offline:
216                     ret = offline_page(pfn, 0, ptr++);
217                     break;
218                 case sysctl_page_online:
219                     ret = online_page(pfn, ptr++);
220                     break;
221                 case sysctl_query_page_offline:
222                     ret = query_page_offline(pfn, ptr++);
223                     break;
224                 default:
225                     ret = -EINVAL;
226                     break;
227             }
228 
229             if (ret)
230                 break;
231         }
232 
233         if ( copy_to_guest(
234                  op->u.page_offline.status, status,
235                  op->u.page_offline.end - op->u.page_offline.start + 1) )
236             ret = -EFAULT;
237 
238         xfree(status);
239         copyback = 0;
240     }
241     break;
242 
243     case XEN_SYSCTL_cpupool_op:
244         ret = cpupool_do_sysctl(&op->u.cpupool_op);
245         break;
246 
247     case XEN_SYSCTL_scheduler_op:
248         ret = sched_adjust_global(&op->u.scheduler_op);
249         break;
250 
251     case XEN_SYSCTL_physinfo:
252     {
253         struct xen_sysctl_physinfo *pi = &op->u.physinfo;
254 
255         memset(pi, 0, sizeof(*pi));
256         pi->threads_per_core =
257             cpumask_weight(per_cpu(cpu_sibling_mask, 0));
258         pi->cores_per_socket =
259             cpumask_weight(per_cpu(cpu_core_mask, 0)) / pi->threads_per_core;
260         pi->nr_cpus = num_online_cpus();
261         pi->nr_nodes = num_online_nodes();
262         pi->max_node_id = MAX_NUMNODES-1;
263         pi->max_cpu_id = nr_cpu_ids - 1;
264         pi->total_pages = total_pages;
265         /* Protected by lock */
266         get_outstanding_claims(&pi->free_pages, &pi->outstanding_pages);
267         pi->scrub_pages = 0;
268         pi->cpu_khz = cpu_khz;
269         pi->max_mfn = get_upper_mfn_bound();
270         arch_do_physinfo(pi);
271 
272         if ( copy_to_guest(u_sysctl, op, 1) )
273             ret = -EFAULT;
274     }
275     break;
276 
277     case XEN_SYSCTL_numainfo:
278     {
279         unsigned int i, j, num_nodes;
280         struct xen_sysctl_numainfo *ni = &op->u.numainfo;
281         bool_t do_meminfo = !guest_handle_is_null(ni->meminfo);
282         bool_t do_distance = !guest_handle_is_null(ni->distance);
283 
284         num_nodes = last_node(node_online_map) + 1;
285 
286         if ( do_meminfo || do_distance )
287         {
288             struct xen_sysctl_meminfo meminfo = { };
289 
290             if ( num_nodes > ni->num_nodes )
291                 num_nodes = ni->num_nodes;
292             for ( i = 0; i < num_nodes; ++i )
293             {
294                 static uint32_t distance[MAX_NUMNODES];
295 
296                 if ( do_meminfo )
297                 {
298                     if ( node_online(i) )
299                     {
300                         meminfo.memsize = node_spanned_pages(i) << PAGE_SHIFT;
301                         meminfo.memfree = avail_node_heap_pages(i) << PAGE_SHIFT;
302                     }
303                     else
304                         meminfo.memsize = meminfo.memfree = XEN_INVALID_MEM_SZ;
305 
306                     if ( copy_to_guest_offset(ni->meminfo, i, &meminfo, 1) )
307                     {
308                         ret = -EFAULT;
309                         break;
310                     }
311                 }
312 
313                 if ( do_distance )
314                 {
315                     for ( j = 0; j < num_nodes; j++ )
316                     {
317                         distance[j] = __node_distance(i, j);
318                         if ( distance[j] == NUMA_NO_DISTANCE )
319                             distance[j] = XEN_INVALID_NODE_DIST;
320                     }
321 
322                     if ( copy_to_guest_offset(ni->distance, i * num_nodes,
323                                               distance, num_nodes) )
324                     {
325                         ret = -EFAULT;
326                         break;
327                     }
328                 }
329             }
330         }
331         else
332             i = num_nodes;
333 
334         if ( !ret && (ni->num_nodes != i) )
335         {
336             ni->num_nodes = i;
337             if ( __copy_field_to_guest(u_sysctl, op,
338                                        u.numainfo.num_nodes) )
339             {
340                 ret = -EFAULT;
341                 break;
342             }
343         }
344     }
345     break;
346 
347     case XEN_SYSCTL_cputopoinfo:
348     {
349         unsigned int i, num_cpus;
350         struct xen_sysctl_cputopoinfo *ti = &op->u.cputopoinfo;
351 
352         num_cpus = cpumask_last(&cpu_online_map) + 1;
353         if ( !guest_handle_is_null(ti->cputopo) )
354         {
355             struct xen_sysctl_cputopo cputopo = { };
356 
357             if ( num_cpus > ti->num_cpus )
358                 num_cpus = ti->num_cpus;
359             for ( i = 0; i < num_cpus; ++i )
360             {
361                 if ( cpu_present(i) )
362                 {
363                     cputopo.core = cpu_to_core(i);
364                     cputopo.socket = cpu_to_socket(i);
365                     cputopo.node = cpu_to_node(i);
366                     if ( cputopo.node == NUMA_NO_NODE )
367                         cputopo.node = XEN_INVALID_NODE_ID;
368                 }
369                 else
370                 {
371                     cputopo.core = XEN_INVALID_CORE_ID;
372                     cputopo.socket = XEN_INVALID_SOCKET_ID;
373                     cputopo.node = XEN_INVALID_NODE_ID;
374                 }
375 
376                 if ( copy_to_guest_offset(ti->cputopo, i, &cputopo, 1) )
377                 {
378                     ret = -EFAULT;
379                     break;
380                 }
381             }
382         }
383         else
384             i = num_cpus;
385 
386         if ( !ret && (ti->num_cpus != i) )
387         {
388             ti->num_cpus = i;
389             if ( __copy_field_to_guest(u_sysctl, op,
390                                        u.cputopoinfo.num_cpus) )
391             {
392                 ret = -EFAULT;
393                 break;
394             }
395         }
396     }
397     break;
398 
399 #ifdef CONFIG_GCOV
400     case XEN_SYSCTL_gcov_op:
401         ret = sysctl_gcov_op(&op->u.gcov_op);
402         copyback = 1;
403         break;
404 #endif
405 
406 #ifdef CONFIG_HAS_PCI
407     case XEN_SYSCTL_pcitopoinfo:
408     {
409         struct xen_sysctl_pcitopoinfo *ti = &op->u.pcitopoinfo;
410         unsigned int i = 0;
411 
412         if ( guest_handle_is_null(ti->devs) ||
413              guest_handle_is_null(ti->nodes) )
414         {
415             ret = -EINVAL;
416             break;
417         }
418 
419         while ( i < ti->num_devs )
420         {
421             physdev_pci_device_t dev;
422             uint32_t node;
423             const struct pci_dev *pdev;
424 
425             if ( copy_from_guest_offset(&dev, ti->devs, i, 1) )
426             {
427                 ret = -EFAULT;
428                 break;
429             }
430 
431             pcidevs_lock();
432             pdev = pci_get_pdev(dev.seg, dev.bus, dev.devfn);
433             if ( !pdev )
434                 node = XEN_INVALID_DEV;
435             else if ( pdev->node == NUMA_NO_NODE )
436                 node = XEN_INVALID_NODE_ID;
437             else
438                 node = pdev->node;
439             pcidevs_unlock();
440 
441             if ( copy_to_guest_offset(ti->nodes, i, &node, 1) )
442             {
443                 ret = -EFAULT;
444                 break;
445             }
446 
447             if ( (++i > 0x3f) && hypercall_preempt_check() )
448                 break;
449         }
450 
451         if ( !ret && (ti->num_devs != i) )
452         {
453             ti->num_devs = i;
454             if ( __copy_field_to_guest(u_sysctl, op, u.pcitopoinfo.num_devs) )
455                 ret = -EFAULT;
456         }
457         break;
458     }
459 #endif
460 
461     case XEN_SYSCTL_tmem_op:
462         ret = tmem_control(&op->u.tmem_op);
463         break;
464 
465     case XEN_SYSCTL_livepatch_op:
466         ret = livepatch_op(&op->u.livepatch);
467         if ( ret != -ENOSYS && ret != -EOPNOTSUPP )
468             copyback = 1;
469         break;
470 
471     case XEN_SYSCTL_set_parameter:
472     {
473 #define XEN_SET_PARAMETER_MAX_SIZE 1023
474         char *params;
475 
476         if ( op->u.set_parameter.pad[0] || op->u.set_parameter.pad[1] ||
477              op->u.set_parameter.pad[2] )
478         {
479             ret = -EINVAL;
480             break;
481         }
482         if ( op->u.set_parameter.size > XEN_SET_PARAMETER_MAX_SIZE )
483         {
484             ret = -E2BIG;
485             break;
486         }
487         params = xmalloc_bytes(op->u.set_parameter.size + 1);
488         if ( !params )
489         {
490             ret = -ENOMEM;
491             break;
492         }
493         if ( copy_from_guest(params, op->u.set_parameter.params,
494                              op->u.set_parameter.size) )
495             ret = -EFAULT;
496         else
497         {
498             params[op->u.set_parameter.size] = 0;
499             ret = runtime_parse(params);
500         }
501 
502         xfree(params);
503 
504         break;
505     }
506 
507     default:
508         ret = arch_do_sysctl(op, u_sysctl);
509         copyback = 0;
510         break;
511     }
512 
513  out:
514     spin_unlock(&sysctl_lock);
515 
516     if ( copyback && (!ret || copyback > 0) &&
517          __copy_to_guest(u_sysctl, op, 1) )
518         ret = -EFAULT;
519 
520     return ret;
521 }
522 
523 /*
524  * Local variables:
525  * mode: C
526  * c-file-style: "BSD"
527  * c-basic-offset: 4
528  * tab-width: 4
529  * indent-tabs-mode: nil
530  * End:
531  */
532