1 /******************************************************************************
2 * tools/xenbaked.c
3 *
4 * Tool for collecting raw trace buffer data from Xen and
5 * performing some accumulation operations and other processing
6 * on it.
7 *
8 * Copyright (C) 2004 by Intel Research Cambridge
9 * Copyright (C) 2005 by Hewlett Packard, Palo Alto and Fort Collins
10 * Copyright (C) 2006 by Hewlett Packard Fort Collins
11 *
12 * Authors: Diwaker Gupta, diwaker.gupta@hp.com
13 * Rob Gardner, rob.gardner@hp.com
14 * Lucy Cherkasova, lucy.cherkasova.hp.com
15 * Much code based on xentrace, authored by Mark Williamson,
16 * mark.a.williamson@intel.com
17 * Date: November, 2005
18 *
19 * This program is free software; you can redistribute it and/or modify
20 * it under the terms of the GNU General Public License as published by
21 * the Free Software Foundation; under version 2 of the License.
22 *
23 * This program is distributed in the hope that it will be useful,
24 * but WITHOUT ANY WARRANTY; without even the implied warranty of
25 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26 * GNU General Public License for more details.
27 *
28 * You should have received a copy of the GNU General Public License
29 * along with this program; If not, see <http://www.gnu.org/licenses/>.
30 */
31
32 #include <time.h>
33 #include <stdlib.h>
34 #include <stdio.h>
35 #include <sys/mman.h>
36 #include <fcntl.h>
37 #include <unistd.h>
38 #include <errno.h>
39 #include <signal.h>
40 #include <xenevtchn.h>
41 #define XC_WANT_COMPAT_MAP_FOREIGN_API
42 #include <xenctrl.h>
43 #include <xen/xen.h>
44 #include <string.h>
45 #include <sys/select.h>
46 #include <getopt.h>
47
48 #define PERROR(_m, _a...) \
49 do { \
50 int __saved_errno = errno; \
51 fprintf(stderr, "ERROR: " _m " (%d = %s)\n" , ## _a , \
52 __saved_errno, strerror(__saved_errno)); \
53 errno = __saved_errno; \
54 } while (0)
55
56 typedef struct { int counter; } atomic_t;
57 #define _atomic_read(v) ((v).counter)
58
59 #include <xen/trace.h>
60 #include "xenbaked.h"
61
62
63 /***** Compile time configuration of defaults ********************************/
64
65 /* when we've got more records than this waiting, we log it to the output */
66 #define NEW_DATA_THRESH 1
67
68 /* sleep for this long (milliseconds) between checking the trace buffers */
69 #define POLL_SLEEP_MILLIS 100
70
71 /* Size of time period represented by each sample */
72 #define MS_PER_SAMPLE 100
73
74 /* CPU Frequency */
75 #define MHZ
76 #define CPU_FREQ 2660 MHZ
77
78 /***** The code **************************************************************/
79
80 typedef struct settings_st {
81 struct timespec poll_sleep;
82 unsigned long new_data_thresh;
83 unsigned long ms_per_sample;
84 double cpu_freq;
85 } settings_t;
86
87 struct t_struct {
88 const struct t_info *t_info; /* Structure with information about individual buffers */
89 struct t_buf **meta; /* Pointers to trace buffer metadata */
90 unsigned char **data; /* Pointers to trace buffer data areas */
91 };
92
93 settings_t opts;
94
95 int interrupted = 0; /* gets set if we get a SIGHUP */
96 int rec_count = 0;
97 int wakeups = 0;
98 time_t start_time;
99 int dom0_flips = 0;
100
101 _new_qos_data *new_qos;
102 _new_qos_data **cpu_qos_data;
103
104 int global_cpu;
105 uint64_t global_now;
106
107 // array of currently running domains, indexed by cpu
108 int *running = NULL;
109
110 // number of cpu's on this platform
111 int NCPU = 0;
112
113
114 static void advance_next_datapoint(uint64_t);
115 static void alloc_qos_data(int ncpu);
116 static int process_record(int, struct t_rec *);
117 static void qos_kill_thread(int domid);
118
119
init_current(int ncpu)120 static void init_current(int ncpu)
121 {
122 running = calloc(ncpu, sizeof(int));
123 NCPU = ncpu;
124 printf("Initialized with %d %s\n", ncpu, (ncpu == 1) ? "cpu" : "cpu's");
125 }
126
is_current(int domain,int cpu)127 static int is_current(int domain, int cpu)
128 {
129 // int i;
130
131 // for (i=0; i<NCPU; i++)
132 if (running[cpu] == domain)
133 return 1;
134 return 0;
135 }
136
137
138 #if 0 /* unused */
139 // return the domain that's currently running on the given cpu
140 static int current(int cpu)
141 {
142 return running[cpu];
143 }
144 #endif
145
set_current(int cpu,int domain)146 static void set_current(int cpu, int domain)
147 {
148 running[cpu] = domain;
149 }
150
151
152
close_handler(int signal)153 static void close_handler(int signal)
154 {
155 interrupted = 1;
156 }
157
158 #if 0
159 void dump_record(int cpu, struct t_rec *x)
160 {
161 printf("record: cpu=%x, tsc=%lx, event=%x, d1=%lx\n",
162 cpu, x->cycles, x->event, x->data[0]);
163 }
164 #endif
165
166 /**
167 * millis_to_timespec - convert a time in milliseconds to a struct timespec
168 * @millis: time interval in milliseconds
169 */
millis_to_timespec(unsigned long millis)170 static struct timespec millis_to_timespec(unsigned long millis)
171 {
172 struct timespec spec;
173
174 spec.tv_sec = millis / 1000;
175 spec.tv_nsec = (millis % 1000) * 1000;
176
177 return spec;
178 }
179
180
181 typedef struct
182 {
183 int event_count;
184 int event_id;
185 char *text;
186 } stat_map_t;
187
188 stat_map_t stat_map[] = {
189 { 0, 0, "Other" },
190 { 0, TRC_SCHED_DOM_ADD, "Add Domain" },
191 { 0, TRC_SCHED_DOM_REM, "Remove Domain" },
192 { 0, TRC_SCHED_SLEEP, "Sleep" },
193 { 0, TRC_SCHED_WAKE, "Wake" },
194 { 0, TRC_SCHED_BLOCK, "Block" },
195 { 0, TRC_SCHED_SWITCH, "Switch" },
196 { 0, TRC_SCHED_S_TIMER_FN, "Timer Func"},
197 { 0, TRC_SCHED_SWITCH_INFPREV, "Switch Prev" },
198 { 0, TRC_SCHED_SWITCH_INFNEXT, "Switch Next" },
199 { 0, TRC_MEM_PAGE_GRANT_MAP, "Page Map" },
200 { 0, TRC_MEM_PAGE_GRANT_UNMAP, "Page Unmap" },
201 { 0, TRC_MEM_PAGE_GRANT_TRANSFER, "Page Transfer" },
202 { 0, 0, 0 }
203 };
204
205
check_gotten_sum(void)206 static void check_gotten_sum(void)
207 {
208 #if 0
209 uint64_t sum, ns;
210 extern uint64_t total_ns_gotten(uint64_t*);
211 double percent;
212 int i;
213
214 for (i=0; i<NCPU; i++) {
215 new_qos = cpu_qos_data[i];
216 ns = billion;
217 sum = total_ns_gotten(&ns);
218
219 printf("[cpu%d] ns_gotten over all domains = %lldns, over %lldns\n",
220 i, sum, ns);
221 percent = (double) sum;
222 percent = (100.0*percent) / (double)ns;
223 printf(" ==> ns_gotten = %7.3f%%\n", percent);
224 }
225 #endif
226 }
227
228
229
dump_stats(void)230 static void dump_stats(void)
231 {
232 stat_map_t *smt = stat_map;
233 time_t end_time, run_time;
234
235 time(&end_time);
236
237 run_time = end_time - start_time;
238
239 printf("Event counts:\n");
240 while (smt->text != NULL) {
241 printf("%08d\t%s\n", smt->event_count, smt->text);
242 smt++;
243 }
244
245 printf("processed %d total records in %d seconds (%ld per second)\n",
246 rec_count, (int)run_time,
247 run_time ? (long)(rec_count/run_time) : 0L);
248
249 printf("woke up %d times in %d seconds (%ld per second)\n",
250 wakeups, (int) run_time,
251 run_time ? (long)(wakeups/run_time) : 0L);
252
253 check_gotten_sum();
254 }
255
log_event(int event_id)256 static void log_event(int event_id)
257 {
258 stat_map_t *smt = stat_map;
259
260 // printf("event_id = 0x%x\n", event_id);
261
262 while (smt->text != NULL) {
263 if (smt->event_id == event_id) {
264 smt->event_count++;
265 return;
266 }
267 smt++;
268 }
269 if (smt->text == NULL)
270 stat_map[0].event_count++; // other
271 }
272
273 int virq_port;
274 xenevtchn_handle *xce_handle = NULL;
275
276 /* Returns the event channel handle. */
277 /* Stolen from xenstore code */
eventchn_init(void)278 static int eventchn_init(void)
279 {
280 int rc;
281
282 // to revert to old way:
283 if (0)
284 return -1;
285
286 xce_handle = xenevtchn_open(NULL, 0);
287
288 if (xce_handle == NULL)
289 perror("Failed to open evtchn device");
290
291 if ((rc = xenevtchn_bind_virq(xce_handle, VIRQ_TBUF)) == -1)
292 perror("Failed to bind to domain exception virq port");
293 virq_port = rc;
294
295 return xce_handle == NULL ? -1 : 0;
296 }
297
wait_for_event(void)298 static void wait_for_event(void)
299 {
300 int ret;
301 fd_set inset;
302 evtchn_port_t port;
303 struct timeval tv;
304 int evtchn_fd;
305
306 if (xce_handle == NULL) {
307 nanosleep(&opts.poll_sleep, NULL);
308 return;
309 }
310
311 evtchn_fd = xenevtchn_fd(xce_handle);
312
313 FD_ZERO(&inset);
314 FD_SET(evtchn_fd, &inset);
315 tv.tv_sec = 1;
316 tv.tv_usec = 0;
317 // tv = millis_to_timespec(&opts.poll_sleep);
318 ret = select(evtchn_fd+1, &inset, NULL, NULL, &tv);
319
320 if ( (ret == 1) && FD_ISSET(evtchn_fd, &inset)) {
321 if ((port = xenevtchn_pending(xce_handle)) == -1)
322 perror("Failed to read from event fd");
323
324 // if (port == virq_port)
325 // printf("got the event I was looking for\r\n");
326
327 if (xenevtchn_unmask(xce_handle, port) == -1)
328 perror("Failed to write to event fd");
329 }
330 }
331
get_tbufs(unsigned long * mfn,unsigned long * size)332 static void get_tbufs(unsigned long *mfn, unsigned long *size)
333 {
334 xc_interface *xc_handle = xc_interface_open(0,0,0);
335 int ret;
336
337 if ( !xc_handle )
338 {
339 exit(EXIT_FAILURE);
340 }
341
342 ret = xc_tbuf_enable(xc_handle, DEFAULT_TBUF_SIZE, mfn, size);
343
344 if ( ret != 0 )
345 {
346 perror("Couldn't enable trace buffers");
347 exit(1);
348 }
349
350 xc_interface_close(xc_handle);
351 }
352
disable_tracing(void)353 static void disable_tracing(void)
354 {
355 xc_interface *xc_handle = xc_interface_open(0,0,0);
356 xc_tbuf_disable(xc_handle);
357 xc_interface_close(xc_handle);
358 }
359
360 /**
361 * map_tbufs - memory map Xen trace buffers into user space
362 * @tbufs_mfn: mfn of the trace buffers
363 * @num: number of trace buffers to map
364 * @size: size of each trace buffer
365 *
366 * Maps the Xen trace buffers them into process address space.
367 */
map_tbufs(unsigned long tbufs_mfn,unsigned int num,unsigned long tinfo_size)368 static struct t_struct *map_tbufs(unsigned long tbufs_mfn, unsigned int num,
369 unsigned long tinfo_size)
370 {
371 xc_interface *xc_handle;
372 static struct t_struct tbufs = { 0 };
373 int i;
374
375 xc_handle = xc_interface_open(0,0,0);
376 if ( !xc_handle )
377 {
378 exit(EXIT_FAILURE);
379 }
380
381 /* Map t_info metadata structure */
382 tbufs.t_info = xc_map_foreign_range(xc_handle, DOMID_XEN, tinfo_size,
383 PROT_READ, tbufs_mfn);
384
385 if ( tbufs.t_info == 0 )
386 {
387 PERROR("Failed to mmap trace buffers");
388 exit(EXIT_FAILURE);
389 }
390
391 if ( tbufs.t_info->tbuf_size == 0 )
392 {
393 fprintf(stderr, "%s: tbuf_size 0!\n", __func__);
394 exit(EXIT_FAILURE);
395 }
396
397 /* Map per-cpu buffers */
398 tbufs.meta = (struct t_buf **)calloc(num, sizeof(struct t_buf *));
399 tbufs.data = (unsigned char **)calloc(num, sizeof(unsigned char *));
400 if ( tbufs.meta == NULL || tbufs.data == NULL )
401 {
402 PERROR( "Failed to allocate memory for buffer pointers\n");
403 exit(EXIT_FAILURE);
404 }
405
406 for(i=0; i<num; i++)
407 {
408
409 const uint32_t *mfn_list = (const uint32_t *)tbufs.t_info
410 + tbufs.t_info->mfn_offset[i];
411 int j;
412 xen_pfn_t pfn_list[tbufs.t_info->tbuf_size];
413
414 for ( j=0; j<tbufs.t_info->tbuf_size; j++)
415 pfn_list[j] = (xen_pfn_t)mfn_list[j];
416
417 tbufs.meta[i] = xc_map_foreign_pages(xc_handle, DOMID_XEN,
418 PROT_READ | PROT_WRITE,
419 pfn_list,
420 tbufs.t_info->tbuf_size);
421 if ( tbufs.meta[i] == NULL )
422 {
423 PERROR("Failed to map cpu buffer!");
424 exit(EXIT_FAILURE);
425 }
426 tbufs.data[i] = (unsigned char *)(tbufs.meta[i]+1);
427 }
428
429 xc_interface_close(xc_handle);
430
431 return &tbufs;
432 }
433
434 /**
435 * get_num_cpus - get the number of logical CPUs
436 */
get_num_cpus(void)437 static unsigned int get_num_cpus(void)
438 {
439 xc_physinfo_t physinfo = { 0 };
440 xc_interface *xc_handle = xc_interface_open(0,0,0);
441 int ret;
442
443 ret = xc_physinfo(xc_handle, &physinfo);
444
445 if ( ret != 0 )
446 {
447 PERROR("Failure to get logical CPU count from Xen");
448 exit(EXIT_FAILURE);
449 }
450
451 xc_interface_close(xc_handle);
452 opts.cpu_freq = (double)physinfo.cpu_khz/1000.0;
453
454 return physinfo.nr_cpus;
455 }
456
457 /**
458 * monitor_tbufs - monitor the contents of tbufs
459 */
monitor_tbufs(void)460 static int monitor_tbufs(void)
461 {
462 int i;
463
464 struct t_struct *tbufs; /* Pointer to hypervisor maps */
465 struct t_buf **meta; /* pointers to the trace buffer metadata */
466 unsigned char **data; /* pointers to the trace buffer data areas
467 * where they are mapped into user space. */
468 unsigned long tbufs_mfn; /* mfn of the tbufs */
469 unsigned int num; /* number of trace buffers / logical CPUS */
470 unsigned long tinfo_size; /* size of t_info metadata map */
471 unsigned long size; /* size of a single trace buffer */
472
473 unsigned long data_size, rec_size;
474
475 /* get number of logical CPUs (and therefore number of trace buffers) */
476 num = get_num_cpus();
477
478 init_current(num);
479 alloc_qos_data(num);
480
481 printf("CPU Frequency = %7.2f\n", opts.cpu_freq);
482
483 /* setup access to trace buffers */
484 get_tbufs(&tbufs_mfn, &tinfo_size);
485 tbufs = map_tbufs(tbufs_mfn, num, tinfo_size);
486
487 size = tbufs->t_info->tbuf_size * XC_PAGE_SIZE;
488
489 data_size = size - sizeof(struct t_buf);
490
491 meta = tbufs->meta;
492 data = tbufs->data;
493
494 if ( eventchn_init() < 0 )
495 fprintf(stderr, "Failed to initialize event channel; "
496 "Using POLL method\r\n");
497
498 /* now, scan buffers for events */
499 while ( !interrupted )
500 {
501 for ( i = 0; (i < num) && !interrupted; i++ )
502 {
503 unsigned long start_offset, end_offset, cons, prod;
504
505 cons = meta[i]->cons;
506 prod = meta[i]->prod;
507 xen_rmb(); /* read prod, then read item. */
508
509 if ( cons == prod )
510 continue;
511
512 start_offset = cons % data_size;
513 end_offset = prod % data_size;
514
515 if ( start_offset >= end_offset )
516 {
517 while ( start_offset != data_size )
518 {
519 rec_size = process_record(
520 i, (struct t_rec *)(data[i] + start_offset));
521 start_offset += rec_size;
522 }
523 start_offset = 0;
524 }
525 while ( start_offset != end_offset )
526 {
527 rec_size = process_record(
528 i, (struct t_rec *)(data[i] + start_offset));
529 start_offset += rec_size;
530 }
531 xen_mb(); /* read item, then update cons. */
532 meta[i]->cons = prod;
533 }
534
535 wait_for_event();
536 wakeups++;
537 }
538
539 /* cleanup */
540 free(meta);
541 free(data);
542 /* don't need to munmap - cleanup is automatic */
543
544 return 0;
545 }
546
547
548 /******************************************************************************
549 * Command line handling
550 *****************************************************************************/
551
552 const char *program_version = "xenbaked v1.4";
553 const char *program_bug_address = "<rob.gardner@hp.com>";
554
555 #define xstr(x) str(x)
556 #define str(x) #x
557
usage(void)558 static void usage(void)
559 {
560 #define USAGE_STR \
561 "Usage: xenbaked [OPTION...]\n" \
562 "Tool to capture and partially process Xen trace buffer data\n" \
563 "\n" \
564 " -m, --ms_per_sample=MS Specify the number of milliseconds per sample\n" \
565 " (default " xstr(MS_PER_SAMPLE) ").\n" \
566 " -s, --poll-sleep=p Set sleep time, p, in milliseconds between\n" \
567 " polling the trace buffer for new data\n" \
568 " (default " xstr(POLL_SLEEP_MILLIS) ").\n" \
569 " -t, --log-thresh=l Set number, l, of new records required to\n" \
570 " trigger a write to output (default " \
571 xstr(NEW_DATA_THRESH) ").\n" \
572 " -?, --help Show this message\n" \
573 " -V, --version Print program version\n" \
574 "\n" \
575 "This tool is used to capture trace buffer data from Xen. The data is\n" \
576 "saved in a shared memory structure to be further processed by xenmon.\n"
577
578 printf(USAGE_STR);
579 printf("\nReport bugs to %s\n", program_bug_address);
580
581 exit(EXIT_FAILURE);
582 }
583
584 /* convert the argument string pointed to by arg to a long int representation */
argtol(const char * restrict arg,int base)585 static long argtol(const char *restrict arg, int base)
586 {
587 char *endp;
588 long val;
589
590 errno = 0;
591 val = strtol(arg, &endp, base);
592
593 if (errno != 0) {
594 fprintf(stderr, "Invalid option argument: %s\n", arg);
595 fprintf(stderr, "Error: %s\n\n", strerror(errno));
596 usage();
597 } else if (endp == arg || *endp != '\0') {
598 fprintf(stderr, "Invalid option argument: %s\n\n", arg);
599 usage();
600 }
601
602 return val;
603 }
604
605 /* parse command line arguments */
parse_args(int argc,char ** argv)606 static void parse_args(int argc, char **argv)
607 {
608 int option;
609 static struct option long_options[] = {
610 { "log-thresh", required_argument, 0, 't' },
611 { "poll-sleep", required_argument, 0, 's' },
612 { "ms_per_sample", required_argument, 0, 'm' },
613 { "help", no_argument, 0, '?' },
614 { "version", no_argument, 0, 'V' },
615 { 0, 0, 0, 0 }
616 };
617
618 while ( (option = getopt_long(argc, argv, "m:s:t:?V",
619 long_options, NULL)) != -1)
620 {
621 switch ( option )
622 {
623 case 't': /* set new records threshold for logging */
624 opts.new_data_thresh = argtol(optarg, 0);
625 break;
626
627 case 's': /* set sleep time (given in milliseconds) */
628 opts.poll_sleep = millis_to_timespec(argtol(optarg, 0));
629 break;
630
631 case 'm': /* set ms_per_sample */
632 opts.ms_per_sample = argtol(optarg, 0);
633 break;
634
635 case 'V': /* print program version */
636 printf("%s\n", program_version);
637 exit(EXIT_SUCCESS);
638 break;
639
640 default:
641 usage();
642 }
643 }
644
645 /* all arguments should have been processed */
646 if (optind != argc) {
647 usage();
648 }
649 }
650
651 #define SHARED_MEM_FILE "/var/run/xenq-shm"
alloc_qos_data(int ncpu)652 static void alloc_qos_data(int ncpu)
653 {
654 int i, n, pgsize, off=0;
655 char *dummy;
656 int qos_fd;
657
658 cpu_qos_data = (_new_qos_data **) calloc(ncpu, sizeof(_new_qos_data *));
659
660
661 qos_fd = open(SHARED_MEM_FILE, O_RDWR|O_CREAT|O_TRUNC, 0777);
662 if (qos_fd < 0) {
663 PERROR(SHARED_MEM_FILE);
664 exit(2);
665 }
666 pgsize = getpagesize();
667 dummy = malloc(pgsize);
668 if (!dummy) {
669 PERROR("malloc");
670 exit(EXIT_FAILURE);
671 }
672 memset(dummy, 0, pgsize);
673
674 for (n=0; n<ncpu; n++) {
675
676 for (i=0; i<sizeof(_new_qos_data); i=i+pgsize)
677 if ((write(qos_fd, dummy, pgsize)) != pgsize) {
678 PERROR(SHARED_MEM_FILE);
679 exit(2);
680 }
681
682 new_qos = (_new_qos_data *) mmap(0, sizeof(_new_qos_data), PROT_READ|PROT_WRITE,
683 MAP_SHARED, qos_fd, off);
684 off += i;
685 if (new_qos == MAP_FAILED) {
686 PERROR("mmap");
687 exit(3);
688 }
689 // printf("new_qos = %p\n", new_qos);
690 memset(new_qos, 0, sizeof(_new_qos_data));
691 new_qos->next_datapoint = 0;
692 advance_next_datapoint(0);
693 new_qos->structlen = i;
694 new_qos->ncpu = ncpu;
695 // printf("structlen = 0x%x\n", i);
696 cpu_qos_data[n] = new_qos;
697 }
698 free(dummy);
699 close(qos_fd);
700 new_qos = NULL;
701 }
702
703
main(int argc,char ** argv)704 int main(int argc, char **argv)
705 {
706 int ret;
707 struct sigaction act;
708
709 time(&start_time);
710 opts.poll_sleep = millis_to_timespec(POLL_SLEEP_MILLIS);
711 opts.new_data_thresh = NEW_DATA_THRESH;
712 opts.ms_per_sample = MS_PER_SAMPLE;
713 opts.cpu_freq = CPU_FREQ;
714
715 parse_args(argc, argv);
716 fprintf(stderr, "ms_per_sample = %ld\n", opts.ms_per_sample);
717
718
719 /* ensure that if we get a signal, we'll do cleanup, then exit */
720 act.sa_handler = close_handler;
721 act.sa_flags = 0;
722 sigemptyset(&act.sa_mask);
723 sigaction(SIGHUP, &act, NULL);
724 sigaction(SIGTERM, &act, NULL);
725 sigaction(SIGINT, &act, NULL);
726
727 ret = monitor_tbufs();
728
729 dump_stats();
730 msync(new_qos, sizeof(_new_qos_data), MS_SYNC);
731 disable_tracing();
732
733 return ret;
734 }
735
qos_init_domain(int domid,int idx)736 static void qos_init_domain(int domid, int idx)
737 {
738 int i;
739
740 memset(&new_qos->domain_info[idx], 0, sizeof(_domain_info));
741 new_qos->domain_info[idx].last_update_time = global_now;
742 // runnable_start_time[idx] = 0;
743 new_qos->domain_info[idx].runnable_start_time = 0; // invalidate
744 new_qos->domain_info[idx].in_use = 1;
745 new_qos->domain_info[idx].blocked_start_time = 0;
746 new_qos->domain_info[idx].id = domid;
747 if (domid == IDLE_DOMAIN_ID)
748 snprintf(new_qos->domain_info[idx].name,
749 sizeof(new_qos->domain_info[idx].name),
750 "Idle Task%d", global_cpu);
751 else
752 snprintf(new_qos->domain_info[idx].name,
753 sizeof(new_qos->domain_info[idx].name),
754 "Domain#%d", domid);
755
756 for (i=0; i<NSAMPLES; i++) {
757 new_qos->qdata[i].ns_gotten[idx] = 0;
758 new_qos->qdata[i].ns_allocated[idx] = 0;
759 new_qos->qdata[i].ns_waiting[idx] = 0;
760 new_qos->qdata[i].ns_blocked[idx] = 0;
761 new_qos->qdata[i].switchin_count[idx] = 0;
762 new_qos->qdata[i].io_count[idx] = 0;
763 }
764 }
765
global_init_domain(int domid,int idx)766 static void global_init_domain(int domid, int idx)
767 {
768 int cpu;
769 _new_qos_data *saved_qos;
770
771 saved_qos = new_qos;
772
773 for (cpu=0; cpu<NCPU; cpu++) {
774 new_qos = cpu_qos_data[cpu];
775 qos_init_domain(domid, idx);
776 }
777 new_qos = saved_qos;
778 }
779
780 // give index of this domain in the qos data array
indexof(int domid)781 static int indexof(int domid)
782 {
783 int idx;
784 xc_dominfo_t dominfo[NDOMAINS];
785 xc_interface *xc_handle;
786 int ndomains;
787
788 if (domid < 0) { // shouldn't happen
789 printf("bad domain id: %d\r\n", domid);
790 return 0;
791 }
792
793 for (idx=0; idx<NDOMAINS; idx++)
794 if ( (new_qos->domain_info[idx].id == domid) && new_qos->domain_info[idx].in_use)
795 return idx;
796
797 // not found, make a new entry
798 for (idx=0; idx<NDOMAINS; idx++)
799 if (new_qos->domain_info[idx].in_use == 0) {
800 global_init_domain(domid, idx);
801 return idx;
802 }
803
804 // call domaininfo hypercall to try and garbage collect unused entries
805 xc_handle = xc_interface_open(0,0,0);
806 ndomains = xc_domain_getinfo(xc_handle, 0, NDOMAINS, dominfo);
807 xc_interface_close(xc_handle);
808
809 // for each domain in our data, look for it in the system dominfo structure
810 // and purge the domain's data from our state if it does not exist in the
811 // dominfo structure
812 for (idx=0; idx<NDOMAINS; idx++) {
813 int domid = new_qos->domain_info[idx].id;
814 int jdx;
815
816 for (jdx=0; jdx<ndomains; jdx++) {
817 if (dominfo[jdx].domid == domid)
818 break;
819 }
820 if (jdx == ndomains) // we didn't find domid in the dominfo struct
821 if (domid != IDLE_DOMAIN_ID) // exception for idle domain, which is not
822 // contained in dominfo
823 qos_kill_thread(domid); // purge our stale data
824 }
825
826 // look again for a free slot
827 for (idx=0; idx<NDOMAINS; idx++)
828 if (new_qos->domain_info[idx].in_use == 0) {
829 global_init_domain(domid, idx);
830 return idx;
831 }
832
833 // still no space found, so bail
834 fprintf(stderr, "out of space in domain table, increase NDOMAINS\r\n");
835 exit(2);
836 }
837
domain_runnable(int domid)838 static int domain_runnable(int domid)
839 {
840 return new_qos->domain_info[indexof(domid)].runnable;
841 }
842
843
update_blocked_time(int domid,uint64_t now)844 static void update_blocked_time(int domid, uint64_t now)
845 {
846 uint64_t t_blocked;
847 int id = indexof(domid);
848
849 if (new_qos->domain_info[id].blocked_start_time != 0) {
850 if (now >= new_qos->domain_info[id].blocked_start_time)
851 t_blocked = now - new_qos->domain_info[id].blocked_start_time;
852 else
853 t_blocked = now + (~0ULL - new_qos->domain_info[id].blocked_start_time);
854 new_qos->qdata[new_qos->next_datapoint].ns_blocked[id] += t_blocked;
855 }
856
857 if (domain_runnable(domid))
858 new_qos->domain_info[id].blocked_start_time = 0;
859 else
860 new_qos->domain_info[id].blocked_start_time = now;
861 }
862
863
864 // advance to next datapoint for all domains
advance_next_datapoint(uint64_t now)865 static void advance_next_datapoint(uint64_t now)
866 {
867 int new, old, didx;
868
869 old = new_qos->next_datapoint;
870 new = QOS_INCR(old);
871 new_qos->next_datapoint = new;
872 // memset(&new_qos->qdata[new], 0, sizeof(uint64_t)*(2+5*NDOMAINS));
873 for (didx = 0; didx < NDOMAINS; didx++) {
874 new_qos->qdata[new].ns_gotten[didx] = 0;
875 new_qos->qdata[new].ns_allocated[didx] = 0;
876 new_qos->qdata[new].ns_waiting[didx] = 0;
877 new_qos->qdata[new].ns_blocked[didx] = 0;
878 new_qos->qdata[new].switchin_count[didx] = 0;
879 new_qos->qdata[new].io_count[didx] = 0;
880 }
881 new_qos->qdata[new].ns_passed = 0;
882 new_qos->qdata[new].lost_records = 0;
883 new_qos->qdata[new].flip_free_periods = 0;
884
885 new_qos->qdata[new].timestamp = now;
886 }
887
888
889
qos_update_thread(int cpu,int domid,uint64_t now)890 static void qos_update_thread(int cpu, int domid, uint64_t now)
891 {
892 int n, id;
893 uint64_t last_update_time, start;
894 int64_t time_since_update, run_time = 0;
895
896 id = indexof(domid);
897
898 n = new_qos->next_datapoint;
899 last_update_time = new_qos->domain_info[id].last_update_time;
900
901 time_since_update = now - last_update_time;
902
903 if (time_since_update < 0) {
904 // what happened here? either a timestamp wraparound, or more likely,
905 // a slight inconsistency among timestamps from various cpu's
906 if (-time_since_update < billion) {
907 // fairly small difference, let's just adjust 'now' to be a little
908 // beyond last_update_time
909 time_since_update = -time_since_update;
910 }
911 else if ( ((~0ULL - last_update_time) < billion) && (now < billion) ) {
912 // difference is huge, must be a wraparound
913 // last_update time should be "near" ~0ULL,
914 // and now should be "near" 0
915 time_since_update = now + (~0ULL - last_update_time);
916 printf("time wraparound\n");
917 }
918 else {
919 // none of the above, may be an out of order record
920 // no good solution, just ignore and update again later
921 return;
922 }
923 }
924
925 new_qos->domain_info[id].last_update_time = now;
926
927 if (new_qos->domain_info[id].runnable_at_last_update && is_current(domid, cpu)) {
928 start = new_qos->domain_info[id].start_time;
929 if (start > now) { // wrapped around
930 run_time = now + (~0ULL - start);
931 // this could happen if there is nothing going on within a cpu;
932 // in this case the idle domain would run forever
933 // printf("warning: start > now\n");
934 }
935 else
936 run_time = now - start;
937 // if (run_time < 0) // should not happen
938 // printf("warning: run_time < 0; start = %lld now= %lld\n", start, now);
939 new_qos->domain_info[id].ns_oncpu_since_boot += run_time;
940 new_qos->domain_info[id].start_time = now;
941 new_qos->domain_info[id].ns_since_boot += time_since_update;
942
943 new_qos->qdata[n].ns_gotten[id] += run_time;
944 // if (domid == 0 && cpu == 1)
945 // printf("adding run time for dom0 on cpu1\r\n");
946
947 }
948
949 new_qos->domain_info[id].runnable_at_last_update = domain_runnable(domid);
950
951 update_blocked_time(domid, now);
952
953 // how much time passed since this datapoint was updated?
954 if (now >= new_qos->qdata[n].timestamp) {
955 // all is right with the world, time is increasing
956 new_qos->qdata[n].ns_passed += (now - new_qos->qdata[n].timestamp);
957 }
958 else {
959 // time wrapped around
960 //new_qos->qdata[n].ns_passed += (now + (~0LL - new_qos->qdata[n].timestamp));
961 // printf("why timewrap?\r\n");
962 }
963 new_qos->qdata[n].timestamp = now;
964 }
965
966
967 // called by dump routines to update all structures
qos_update_all(uint64_t now,int cpu)968 static void qos_update_all(uint64_t now, int cpu)
969 {
970 int i;
971
972 for (i=0; i<NDOMAINS; i++)
973 if (new_qos->domain_info[i].in_use)
974 qos_update_thread(cpu, new_qos->domain_info[i].id, now);
975 }
976
977
qos_update_thread_stats(int cpu,int domid,uint64_t now)978 static void qos_update_thread_stats(int cpu, int domid, uint64_t now)
979 {
980 if (new_qos->qdata[new_qos->next_datapoint].ns_passed > (million*opts.ms_per_sample)) {
981 qos_update_all(now, cpu);
982 advance_next_datapoint(now);
983 return;
984 }
985 qos_update_thread(cpu, domid, now);
986 }
987
988
989
990 // called when a new thread gets the cpu
qos_switch_in(int cpu,int domid,uint64_t now,unsigned long ns_alloc,unsigned long ns_waited)991 static void qos_switch_in(int cpu, int domid, uint64_t now, unsigned long ns_alloc, unsigned long ns_waited)
992 {
993 int idx = indexof(domid);
994
995 new_qos->domain_info[idx].runnable = 1;
996 update_blocked_time(domid, now);
997 new_qos->domain_info[idx].blocked_start_time = 0; // invalidate
998 new_qos->domain_info[idx].runnable_start_time = 0; // invalidate
999 //runnable_start_time[idx] = 0;
1000
1001 new_qos->domain_info[idx].start_time = now;
1002 new_qos->qdata[new_qos->next_datapoint].switchin_count[idx]++;
1003 new_qos->qdata[new_qos->next_datapoint].ns_allocated[idx] += ns_alloc;
1004 new_qos->qdata[new_qos->next_datapoint].ns_waiting[idx] += ns_waited;
1005 qos_update_thread_stats(cpu, domid, now);
1006 set_current(cpu, domid);
1007
1008 // count up page flips for dom0 execution
1009 if (domid == 0)
1010 dom0_flips = 0;
1011 }
1012
1013 // called when the current thread is taken off the cpu
qos_switch_out(int cpu,int domid,uint64_t now,unsigned long gotten)1014 static void qos_switch_out(int cpu, int domid, uint64_t now, unsigned long gotten)
1015 {
1016 int idx = indexof(domid);
1017 int n;
1018
1019 if (!is_current(domid, cpu)) {
1020 // printf("switching out domain %d but it is not current. gotten=%ld\r\n", id, gotten);
1021 }
1022
1023 if (gotten == 0) {
1024 printf("gotten==0 in qos_switchout(domid=%d)\n", domid);
1025 }
1026
1027 if (gotten < 100) {
1028 printf("gotten<100ns in qos_switchout(domid=%d)\n", domid);
1029 }
1030
1031
1032 n = new_qos->next_datapoint;
1033 #if 0
1034 new_qos->qdata[n].ns_gotten[idx] += gotten;
1035 if (gotten > new_qos->qdata[n].ns_passed)
1036 printf("inconsistency #257, diff = %lld\n",
1037 gotten - new_qos->qdata[n].ns_passed );
1038 #endif
1039 new_qos->domain_info[idx].ns_oncpu_since_boot += gotten;
1040 new_qos->domain_info[idx].runnable_start_time = now;
1041 // runnable_start_time[id] = now;
1042 qos_update_thread_stats(cpu, domid, now);
1043
1044 // process dom0 page flips
1045 if (domid == 0)
1046 if (dom0_flips == 0)
1047 new_qos->qdata[n].flip_free_periods++;
1048 }
1049
1050 // called when domain is put to sleep, may also be called
1051 // when thread is already asleep
qos_state_sleeping(int cpu,int domid,uint64_t now)1052 static void qos_state_sleeping(int cpu, int domid, uint64_t now)
1053 {
1054 int idx;
1055
1056 if (!domain_runnable(domid)) // double call?
1057 return;
1058
1059 idx = indexof(domid);
1060 new_qos->domain_info[idx].runnable = 0;
1061 new_qos->domain_info[idx].blocked_start_time = now;
1062 new_qos->domain_info[idx].runnable_start_time = 0; // invalidate
1063 // runnable_start_time[idx] = 0; // invalidate
1064 qos_update_thread_stats(cpu, domid, now);
1065 }
1066
1067
1068
1069 // domain died, presume it's dead on all cpu's, not just mostly dead
qos_kill_thread(int domid)1070 static void qos_kill_thread(int domid)
1071 {
1072 int cpu;
1073
1074 for (cpu=0; cpu<NCPU; cpu++) {
1075 cpu_qos_data[cpu]->domain_info[indexof(domid)].in_use = 0;
1076 }
1077
1078 }
1079
1080
1081 // called when thread becomes runnable, may also be called
1082 // when thread is already runnable
qos_state_runnable(int cpu,int domid,uint64_t now)1083 static void qos_state_runnable(int cpu, int domid, uint64_t now)
1084 {
1085 int idx;
1086
1087
1088 qos_update_thread_stats(cpu, domid, now);
1089
1090 if (domain_runnable(domid)) // double call?
1091 return;
1092
1093 idx = indexof(domid);
1094 new_qos->domain_info[idx].runnable = 1;
1095 update_blocked_time(domid, now);
1096
1097 new_qos->domain_info[idx].blocked_start_time = 0; /* invalidate */
1098 new_qos->domain_info[idx].runnable_start_time = now;
1099 // runnable_start_time[id] = now;
1100 }
1101
1102
qos_count_packets(domid_t domid,uint64_t now)1103 static void qos_count_packets(domid_t domid, uint64_t now)
1104 {
1105 int i, idx = indexof(domid);
1106 _new_qos_data *cpu_data;
1107
1108 for (i=0; i<NCPU; i++) {
1109 cpu_data = cpu_qos_data[i];
1110 if (cpu_data->domain_info[idx].in_use) {
1111 cpu_data->qdata[cpu_data->next_datapoint].io_count[idx]++;
1112 }
1113 }
1114
1115 new_qos->qdata[new_qos->next_datapoint].io_count[0]++;
1116 dom0_flips++;
1117 }
1118
1119
process_record(int cpu,struct t_rec * r)1120 static int process_record(int cpu, struct t_rec *r)
1121 {
1122 uint64_t now = 0;
1123 uint32_t *extra_u32 = r->u.nocycles.extra_u32;
1124
1125 new_qos = cpu_qos_data[cpu];
1126
1127 rec_count++;
1128
1129 if ( r->cycles_included )
1130 {
1131 now = ((uint64_t)r->u.cycles.cycles_hi << 32) | r->u.cycles.cycles_lo;
1132 now = ((double)now) / (opts.cpu_freq / 1000.0);
1133 extra_u32 = r->u.cycles.extra_u32;
1134 }
1135
1136 global_now = now;
1137 global_cpu = cpu;
1138
1139 log_event(r->event);
1140
1141 switch (r->event) {
1142
1143 case TRC_SCHED_SWITCH_INFPREV:
1144 // domain data[0] just switched out and received data[1] ns of cpu time
1145 qos_switch_out(cpu, extra_u32[0], now, extra_u32[1]);
1146 // printf("ns_gotten %ld\n", extra_u32[1]);
1147 break;
1148
1149 case TRC_SCHED_SWITCH_INFNEXT:
1150 // domain data[0] just switched in and
1151 // waited data[1] ns, and was allocated data[2] ns of cpu time
1152 qos_switch_in(cpu, extra_u32[0], now, extra_u32[2], extra_u32[1]);
1153 break;
1154
1155 case TRC_SCHED_DOM_ADD:
1156 (void) indexof(extra_u32[0]);
1157 break;
1158
1159 case TRC_SCHED_DOM_REM:
1160 qos_kill_thread(extra_u32[0]);
1161 break;
1162
1163 case TRC_SCHED_SLEEP:
1164 qos_state_sleeping(cpu, extra_u32[0], now);
1165 break;
1166
1167 case TRC_SCHED_WAKE:
1168 qos_state_runnable(cpu, extra_u32[0], now);
1169 break;
1170
1171 case TRC_SCHED_BLOCK:
1172 qos_state_sleeping(cpu, extra_u32[0], now);
1173 break;
1174
1175 case TRC_MEM_PAGE_GRANT_TRANSFER:
1176 qos_count_packets(extra_u32[0], now);
1177 break;
1178
1179 default:
1180 break;
1181 }
1182
1183 new_qos = NULL;
1184
1185 return 4 + (r->cycles_included ? 8 : 0) + (r->extra_u32 * 4);
1186 }
1187
1188 /*
1189 * Local variables:
1190 * mode: C
1191 * c-file-style: "BSD"
1192 * c-basic-offset: 4
1193 * tab-width: 4
1194 * indent-tabs-mode: nil
1195 * End:
1196 */
1197