1 /******************************************************************************
2 * tools/xenbaked.c
3 *
4 * Tool for collecting raw trace buffer data from Xen and
5 * performing some accumulation operations and other processing
6 * on it.
7 *
8 * Copyright (C) 2004 by Intel Research Cambridge
9 * Copyright (C) 2005 by Hewlett Packard, Palo Alto and Fort Collins
10 * Copyright (C) 2006 by Hewlett Packard Fort Collins
11 *
12 * Authors: Diwaker Gupta, diwaker.gupta@hp.com
13 * Rob Gardner, rob.gardner@hp.com
14 * Lucy Cherkasova, lucy.cherkasova.hp.com
15 * Much code based on xentrace, authored by Mark Williamson,
16 * mark.a.williamson@intel.com
17 * Date: November, 2005
18 *
19 * This program is free software; you can redistribute it and/or modify
20 * it under the terms of the GNU General Public License as published by
21 * the Free Software Foundation; under version 2 of the License.
22 *
23 * This program is distributed in the hope that it will be useful,
24 * but WITHOUT ANY WARRANTY; without even the implied warranty of
25 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26 * GNU General Public License for more details.
27 *
28 * You should have received a copy of the GNU General Public License
29 * along with this program; If not, see <http://www.gnu.org/licenses/>.
30 */
31
32 #include <time.h>
33 #include <stdlib.h>
34 #include <stdio.h>
35 #include <sys/mman.h>
36 #include <fcntl.h>
37 #include <unistd.h>
38 #include <errno.h>
39 #include <signal.h>
40 #include <xenevtchn.h>
41 #define XC_WANT_COMPAT_MAP_FOREIGN_API
42 #include <xenctrl.h>
43 #include <xen/xen.h>
44 #include <string.h>
45 #include <sys/select.h>
46 #include <getopt.h>
47
48 #define PERROR(_m, _a...) \
49 do { \
50 int __saved_errno = errno; \
51 fprintf(stderr, "ERROR: " _m " (%d = %s)\n" , ## _a , \
52 __saved_errno, strerror(__saved_errno)); \
53 errno = __saved_errno; \
54 } while (0)
55
56 typedef struct { int counter; } atomic_t;
57 #define _atomic_read(v) ((v).counter)
58
59 #include <xen/trace.h>
60 #include "xenbaked.h"
61
62
63 /***** Compile time configuration of defaults ********************************/
64
65 /* when we've got more records than this waiting, we log it to the output */
66 #define NEW_DATA_THRESH 1
67
68 /* sleep for this long (milliseconds) between checking the trace buffers */
69 #define POLL_SLEEP_MILLIS 100
70
71 /* Size of time period represented by each sample */
72 #define MS_PER_SAMPLE 100
73
74 /* CPU Frequency */
75 #define MHZ
76 #define CPU_FREQ 2660 MHZ
77
78 /***** The code **************************************************************/
79
80 typedef struct settings_st {
81 struct timespec poll_sleep;
82 unsigned long new_data_thresh;
83 unsigned long ms_per_sample;
84 double cpu_freq;
85 } settings_t;
86
87 struct t_struct {
88 const struct t_info *t_info; /* Structure with information about individual buffers */
89 struct t_buf **meta; /* Pointers to trace buffer metadata */
90 unsigned char **data; /* Pointers to trace buffer data areas */
91 };
92
93 settings_t opts;
94
95 int interrupted = 0; /* gets set if we get a SIGHUP */
96 int rec_count = 0;
97 int wakeups = 0;
98 time_t start_time;
99 int dom0_flips = 0;
100
101 _new_qos_data *new_qos;
102 _new_qos_data **cpu_qos_data;
103
104 int global_cpu;
105 uint64_t global_now;
106
107 // array of currently running domains, indexed by cpu
108 int *running = NULL;
109
110 // number of cpu's on this platform
111 int NCPU = 0;
112
113
114 static void advance_next_datapoint(uint64_t);
115 static void alloc_qos_data(int ncpu);
116 static int process_record(int, struct t_rec *);
117 static void qos_kill_thread(int domid);
118
119
init_current(int ncpu)120 static void init_current(int ncpu)
121 {
122 running = calloc(ncpu, sizeof(int));
123 NCPU = ncpu;
124 printf("Initialized with %d %s\n", ncpu, (ncpu == 1) ? "cpu" : "cpu's");
125 }
126
is_current(int domain,int cpu)127 static int is_current(int domain, int cpu)
128 {
129 // int i;
130
131 // for (i=0; i<NCPU; i++)
132 if (running[cpu] == domain)
133 return 1;
134 return 0;
135 }
136
137
138 #if 0 /* unused */
139 // return the domain that's currently running on the given cpu
140 static int current(int cpu)
141 {
142 return running[cpu];
143 }
144 #endif
145
set_current(int cpu,int domain)146 static void set_current(int cpu, int domain)
147 {
148 running[cpu] = domain;
149 }
150
151
152
close_handler(int signal)153 static void close_handler(int signal)
154 {
155 interrupted = 1;
156 }
157
158 #if 0
159 void dump_record(int cpu, struct t_rec *x)
160 {
161 printf("record: cpu=%x, tsc=%lx, event=%x, d1=%lx\n",
162 cpu, x->cycles, x->event, x->data[0]);
163 }
164 #endif
165
166 /**
167 * millis_to_timespec - convert a time in milliseconds to a struct timespec
168 * @millis: time interval in milliseconds
169 */
millis_to_timespec(unsigned long millis)170 static struct timespec millis_to_timespec(unsigned long millis)
171 {
172 struct timespec spec;
173
174 spec.tv_sec = millis / 1000;
175 spec.tv_nsec = (millis % 1000) * 1000;
176
177 return spec;
178 }
179
180
181 typedef struct
182 {
183 int event_count;
184 int event_id;
185 char *text;
186 } stat_map_t;
187
188 stat_map_t stat_map[] = {
189 { 0, 0, "Other" },
190 { 0, TRC_SCHED_DOM_ADD, "Add Domain" },
191 { 0, TRC_SCHED_DOM_REM, "Remove Domain" },
192 { 0, TRC_SCHED_SLEEP, "Sleep" },
193 { 0, TRC_SCHED_WAKE, "Wake" },
194 { 0, TRC_SCHED_BLOCK, "Block" },
195 { 0, TRC_SCHED_SWITCH, "Switch" },
196 { 0, TRC_SCHED_S_TIMER_FN, "Timer Func"},
197 { 0, TRC_SCHED_SWITCH_INFPREV, "Switch Prev" },
198 { 0, TRC_SCHED_SWITCH_INFNEXT, "Switch Next" },
199 { 0, TRC_MEM_PAGE_GRANT_MAP, "Page Map" },
200 { 0, TRC_MEM_PAGE_GRANT_UNMAP, "Page Unmap" },
201 { 0, TRC_MEM_PAGE_GRANT_TRANSFER, "Page Transfer" },
202 { 0, 0, 0 }
203 };
204
205
check_gotten_sum(void)206 static void check_gotten_sum(void)
207 {
208 #if 0
209 uint64_t sum, ns;
210 extern uint64_t total_ns_gotten(uint64_t*);
211 double percent;
212 int i;
213
214 for (i=0; i<NCPU; i++) {
215 new_qos = cpu_qos_data[i];
216 ns = billion;
217 sum = total_ns_gotten(&ns);
218
219 printf("[cpu%d] ns_gotten over all domains = %lldns, over %lldns\n",
220 i, sum, ns);
221 percent = (double) sum;
222 percent = (100.0*percent) / (double)ns;
223 printf(" ==> ns_gotten = %7.3f%%\n", percent);
224 }
225 #endif
226 }
227
228
229
dump_stats(void)230 static void dump_stats(void)
231 {
232 stat_map_t *smt = stat_map;
233 time_t end_time, run_time;
234
235 time(&end_time);
236
237 run_time = end_time - start_time;
238
239 printf("Event counts:\n");
240 while (smt->text != NULL) {
241 printf("%08d\t%s\n", smt->event_count, smt->text);
242 smt++;
243 }
244
245 printf("processed %d total records in %d seconds (%ld per second)\n",
246 rec_count, (int)run_time, (long)(rec_count/run_time));
247
248 printf("woke up %d times in %d seconds (%ld per second)\n", wakeups,
249 (int) run_time, (long)(wakeups/run_time));
250
251 check_gotten_sum();
252 }
253
log_event(int event_id)254 static void log_event(int event_id)
255 {
256 stat_map_t *smt = stat_map;
257
258 // printf("event_id = 0x%x\n", event_id);
259
260 while (smt->text != NULL) {
261 if (smt->event_id == event_id) {
262 smt->event_count++;
263 return;
264 }
265 smt++;
266 }
267 if (smt->text == NULL)
268 stat_map[0].event_count++; // other
269 }
270
271 int virq_port;
272 xenevtchn_handle *xce_handle = NULL;
273
274 /* Returns the event channel handle. */
275 /* Stolen from xenstore code */
eventchn_init(void)276 static int eventchn_init(void)
277 {
278 int rc;
279
280 // to revert to old way:
281 if (0)
282 return -1;
283
284 xce_handle = xenevtchn_open(NULL, 0);
285
286 if (xce_handle == NULL)
287 perror("Failed to open evtchn device");
288
289 if ((rc = xenevtchn_bind_virq(xce_handle, VIRQ_TBUF)) == -1)
290 perror("Failed to bind to domain exception virq port");
291 virq_port = rc;
292
293 return xce_handle == NULL ? -1 : 0;
294 }
295
wait_for_event(void)296 static void wait_for_event(void)
297 {
298 int ret;
299 fd_set inset;
300 evtchn_port_t port;
301 struct timeval tv;
302 int evtchn_fd;
303
304 if (xce_handle == NULL) {
305 nanosleep(&opts.poll_sleep, NULL);
306 return;
307 }
308
309 evtchn_fd = xenevtchn_fd(xce_handle);
310
311 FD_ZERO(&inset);
312 FD_SET(evtchn_fd, &inset);
313 tv.tv_sec = 1;
314 tv.tv_usec = 0;
315 // tv = millis_to_timespec(&opts.poll_sleep);
316 ret = select(evtchn_fd+1, &inset, NULL, NULL, &tv);
317
318 if ( (ret == 1) && FD_ISSET(evtchn_fd, &inset)) {
319 if ((port = xenevtchn_pending(xce_handle)) == -1)
320 perror("Failed to read from event fd");
321
322 // if (port == virq_port)
323 // printf("got the event I was looking for\r\n");
324
325 if (xenevtchn_unmask(xce_handle, port) == -1)
326 perror("Failed to write to event fd");
327 }
328 }
329
get_tbufs(unsigned long * mfn,unsigned long * size)330 static void get_tbufs(unsigned long *mfn, unsigned long *size)
331 {
332 xc_interface *xc_handle = xc_interface_open(0,0,0);
333 int ret;
334
335 if ( !xc_handle )
336 {
337 exit(EXIT_FAILURE);
338 }
339
340 ret = xc_tbuf_enable(xc_handle, DEFAULT_TBUF_SIZE, mfn, size);
341
342 if ( ret != 0 )
343 {
344 perror("Couldn't enable trace buffers");
345 exit(1);
346 }
347
348 xc_interface_close(xc_handle);
349 }
350
disable_tracing(void)351 static void disable_tracing(void)
352 {
353 xc_interface *xc_handle = xc_interface_open(0,0,0);
354 xc_tbuf_disable(xc_handle);
355 xc_interface_close(xc_handle);
356 }
357
358 /**
359 * map_tbufs - memory map Xen trace buffers into user space
360 * @tbufs_mfn: mfn of the trace buffers
361 * @num: number of trace buffers to map
362 * @size: size of each trace buffer
363 *
364 * Maps the Xen trace buffers them into process address space.
365 */
map_tbufs(unsigned long tbufs_mfn,unsigned int num,unsigned long tinfo_size)366 static struct t_struct *map_tbufs(unsigned long tbufs_mfn, unsigned int num,
367 unsigned long tinfo_size)
368 {
369 xc_interface *xc_handle;
370 static struct t_struct tbufs = { 0 };
371 int i;
372
373 xc_handle = xc_interface_open(0,0,0);
374 if ( !xc_handle )
375 {
376 exit(EXIT_FAILURE);
377 }
378
379 /* Map t_info metadata structure */
380 tbufs.t_info = xc_map_foreign_range(xc_handle, DOMID_XEN, tinfo_size,
381 PROT_READ, tbufs_mfn);
382
383 if ( tbufs.t_info == 0 )
384 {
385 PERROR("Failed to mmap trace buffers");
386 exit(EXIT_FAILURE);
387 }
388
389 if ( tbufs.t_info->tbuf_size == 0 )
390 {
391 fprintf(stderr, "%s: tbuf_size 0!\n", __func__);
392 exit(EXIT_FAILURE);
393 }
394
395 /* Map per-cpu buffers */
396 tbufs.meta = (struct t_buf **)calloc(num, sizeof(struct t_buf *));
397 tbufs.data = (unsigned char **)calloc(num, sizeof(unsigned char *));
398 if ( tbufs.meta == NULL || tbufs.data == NULL )
399 {
400 PERROR( "Failed to allocate memory for buffer pointers\n");
401 exit(EXIT_FAILURE);
402 }
403
404 for(i=0; i<num; i++)
405 {
406
407 const uint32_t *mfn_list = (const uint32_t *)tbufs.t_info
408 + tbufs.t_info->mfn_offset[i];
409 int j;
410 xen_pfn_t pfn_list[tbufs.t_info->tbuf_size];
411
412 for ( j=0; j<tbufs.t_info->tbuf_size; j++)
413 pfn_list[j] = (xen_pfn_t)mfn_list[j];
414
415 tbufs.meta[i] = xc_map_foreign_pages(xc_handle, DOMID_XEN,
416 PROT_READ | PROT_WRITE,
417 pfn_list,
418 tbufs.t_info->tbuf_size);
419 if ( tbufs.meta[i] == NULL )
420 {
421 PERROR("Failed to map cpu buffer!");
422 exit(EXIT_FAILURE);
423 }
424 tbufs.data[i] = (unsigned char *)(tbufs.meta[i]+1);
425 }
426
427 xc_interface_close(xc_handle);
428
429 return &tbufs;
430 }
431
432 /**
433 * get_num_cpus - get the number of logical CPUs
434 */
get_num_cpus(void)435 static unsigned int get_num_cpus(void)
436 {
437 xc_physinfo_t physinfo = { 0 };
438 xc_interface *xc_handle = xc_interface_open(0,0,0);
439 int ret;
440
441 ret = xc_physinfo(xc_handle, &physinfo);
442
443 if ( ret != 0 )
444 {
445 PERROR("Failure to get logical CPU count from Xen");
446 exit(EXIT_FAILURE);
447 }
448
449 xc_interface_close(xc_handle);
450 opts.cpu_freq = (double)physinfo.cpu_khz/1000.0;
451
452 return physinfo.nr_cpus;
453 }
454
455 /**
456 * monitor_tbufs - monitor the contents of tbufs
457 */
monitor_tbufs(void)458 static int monitor_tbufs(void)
459 {
460 int i;
461
462 struct t_struct *tbufs; /* Pointer to hypervisor maps */
463 struct t_buf **meta; /* pointers to the trace buffer metadata */
464 unsigned char **data; /* pointers to the trace buffer data areas
465 * where they are mapped into user space. */
466 unsigned long tbufs_mfn; /* mfn of the tbufs */
467 unsigned int num; /* number of trace buffers / logical CPUS */
468 unsigned long tinfo_size; /* size of t_info metadata map */
469 unsigned long size; /* size of a single trace buffer */
470
471 unsigned long data_size, rec_size;
472
473 /* get number of logical CPUs (and therefore number of trace buffers) */
474 num = get_num_cpus();
475
476 init_current(num);
477 alloc_qos_data(num);
478
479 printf("CPU Frequency = %7.2f\n", opts.cpu_freq);
480
481 /* setup access to trace buffers */
482 get_tbufs(&tbufs_mfn, &tinfo_size);
483 tbufs = map_tbufs(tbufs_mfn, num, tinfo_size);
484
485 size = tbufs->t_info->tbuf_size * XC_PAGE_SIZE;
486
487 data_size = size - sizeof(struct t_buf);
488
489 meta = tbufs->meta;
490 data = tbufs->data;
491
492 if ( eventchn_init() < 0 )
493 fprintf(stderr, "Failed to initialize event channel; "
494 "Using POLL method\r\n");
495
496 /* now, scan buffers for events */
497 while ( !interrupted )
498 {
499 for ( i = 0; (i < num) && !interrupted; i++ )
500 {
501 unsigned long start_offset, end_offset, cons, prod;
502
503 cons = meta[i]->cons;
504 prod = meta[i]->prod;
505 xen_rmb(); /* read prod, then read item. */
506
507 if ( cons == prod )
508 continue;
509
510 start_offset = cons % data_size;
511 end_offset = prod % data_size;
512
513 if ( start_offset >= end_offset )
514 {
515 while ( start_offset != data_size )
516 {
517 rec_size = process_record(
518 i, (struct t_rec *)(data[i] + start_offset));
519 start_offset += rec_size;
520 }
521 start_offset = 0;
522 }
523 while ( start_offset != end_offset )
524 {
525 rec_size = process_record(
526 i, (struct t_rec *)(data[i] + start_offset));
527 start_offset += rec_size;
528 }
529 xen_mb(); /* read item, then update cons. */
530 meta[i]->cons = prod;
531 }
532
533 wait_for_event();
534 wakeups++;
535 }
536
537 /* cleanup */
538 free(meta);
539 free(data);
540 /* don't need to munmap - cleanup is automatic */
541
542 return 0;
543 }
544
545
546 /******************************************************************************
547 * Command line handling
548 *****************************************************************************/
549
550 const char *program_version = "xenbaked v1.4";
551 const char *program_bug_address = "<rob.gardner@hp.com>";
552
553 #define xstr(x) str(x)
554 #define str(x) #x
555
usage(void)556 static void usage(void)
557 {
558 #define USAGE_STR \
559 "Usage: xenbaked [OPTION...]\n" \
560 "Tool to capture and partially process Xen trace buffer data\n" \
561 "\n" \
562 " -m, --ms_per_sample=MS Specify the number of milliseconds per sample\n" \
563 " (default " xstr(MS_PER_SAMPLE) ").\n" \
564 " -s, --poll-sleep=p Set sleep time, p, in milliseconds between\n" \
565 " polling the trace buffer for new data\n" \
566 " (default " xstr(POLL_SLEEP_MILLIS) ").\n" \
567 " -t, --log-thresh=l Set number, l, of new records required to\n" \
568 " trigger a write to output (default " \
569 xstr(NEW_DATA_THRESH) ").\n" \
570 " -?, --help Show this message\n" \
571 " -V, --version Print program version\n" \
572 "\n" \
573 "This tool is used to capture trace buffer data from Xen. The data is\n" \
574 "saved in a shared memory structure to be further processed by xenmon.\n"
575
576 printf(USAGE_STR);
577 printf("\nReport bugs to %s\n", program_bug_address);
578
579 exit(EXIT_FAILURE);
580 }
581
582 /* convert the argument string pointed to by arg to a long int representation */
argtol(const char * restrict arg,int base)583 static long argtol(const char *restrict arg, int base)
584 {
585 char *endp;
586 long val;
587
588 errno = 0;
589 val = strtol(arg, &endp, base);
590
591 if (errno != 0) {
592 fprintf(stderr, "Invalid option argument: %s\n", arg);
593 fprintf(stderr, "Error: %s\n\n", strerror(errno));
594 usage();
595 } else if (endp == arg || *endp != '\0') {
596 fprintf(stderr, "Invalid option argument: %s\n\n", arg);
597 usage();
598 }
599
600 return val;
601 }
602
603 /* parse command line arguments */
parse_args(int argc,char ** argv)604 static void parse_args(int argc, char **argv)
605 {
606 int option;
607 static struct option long_options[] = {
608 { "log-thresh", required_argument, 0, 't' },
609 { "poll-sleep", required_argument, 0, 's' },
610 { "ms_per_sample", required_argument, 0, 'm' },
611 { "help", no_argument, 0, '?' },
612 { "version", no_argument, 0, 'V' },
613 { 0, 0, 0, 0 }
614 };
615
616 while ( (option = getopt_long(argc, argv, "m:s:t:?V",
617 long_options, NULL)) != -1)
618 {
619 switch ( option )
620 {
621 case 't': /* set new records threshold for logging */
622 opts.new_data_thresh = argtol(optarg, 0);
623 break;
624
625 case 's': /* set sleep time (given in milliseconds) */
626 opts.poll_sleep = millis_to_timespec(argtol(optarg, 0));
627 break;
628
629 case 'm': /* set ms_per_sample */
630 opts.ms_per_sample = argtol(optarg, 0);
631 break;
632
633 case 'V': /* print program version */
634 printf("%s\n", program_version);
635 exit(EXIT_SUCCESS);
636 break;
637
638 default:
639 usage();
640 }
641 }
642
643 /* all arguments should have been processed */
644 if (optind != argc) {
645 usage();
646 }
647 }
648
649 #define SHARED_MEM_FILE "/var/run/xenq-shm"
alloc_qos_data(int ncpu)650 static void alloc_qos_data(int ncpu)
651 {
652 int i, n, pgsize, off=0;
653 char *dummy;
654 int qos_fd;
655
656 cpu_qos_data = (_new_qos_data **) calloc(ncpu, sizeof(_new_qos_data *));
657
658
659 qos_fd = open(SHARED_MEM_FILE, O_RDWR|O_CREAT|O_TRUNC, 0777);
660 if (qos_fd < 0) {
661 PERROR(SHARED_MEM_FILE);
662 exit(2);
663 }
664 pgsize = getpagesize();
665 dummy = malloc(pgsize);
666 if (!dummy) {
667 PERROR("malloc");
668 exit(EXIT_FAILURE);
669 }
670 memset(dummy, 0, pgsize);
671
672 for (n=0; n<ncpu; n++) {
673
674 for (i=0; i<sizeof(_new_qos_data); i=i+pgsize)
675 if ((write(qos_fd, dummy, pgsize)) != pgsize) {
676 PERROR(SHARED_MEM_FILE);
677 exit(2);
678 }
679
680 new_qos = (_new_qos_data *) mmap(0, sizeof(_new_qos_data), PROT_READ|PROT_WRITE,
681 MAP_SHARED, qos_fd, off);
682 off += i;
683 if (new_qos == MAP_FAILED) {
684 PERROR("mmap");
685 exit(3);
686 }
687 // printf("new_qos = %p\n", new_qos);
688 memset(new_qos, 0, sizeof(_new_qos_data));
689 new_qos->next_datapoint = 0;
690 advance_next_datapoint(0);
691 new_qos->structlen = i;
692 new_qos->ncpu = ncpu;
693 // printf("structlen = 0x%x\n", i);
694 cpu_qos_data[n] = new_qos;
695 }
696 free(dummy);
697 close(qos_fd);
698 new_qos = NULL;
699 }
700
701
main(int argc,char ** argv)702 int main(int argc, char **argv)
703 {
704 int ret;
705 struct sigaction act;
706
707 time(&start_time);
708 opts.poll_sleep = millis_to_timespec(POLL_SLEEP_MILLIS);
709 opts.new_data_thresh = NEW_DATA_THRESH;
710 opts.ms_per_sample = MS_PER_SAMPLE;
711 opts.cpu_freq = CPU_FREQ;
712
713 parse_args(argc, argv);
714 fprintf(stderr, "ms_per_sample = %ld\n", opts.ms_per_sample);
715
716
717 /* ensure that if we get a signal, we'll do cleanup, then exit */
718 act.sa_handler = close_handler;
719 act.sa_flags = 0;
720 sigemptyset(&act.sa_mask);
721 sigaction(SIGHUP, &act, NULL);
722 sigaction(SIGTERM, &act, NULL);
723 sigaction(SIGINT, &act, NULL);
724
725 ret = monitor_tbufs();
726
727 dump_stats();
728 msync(new_qos, sizeof(_new_qos_data), MS_SYNC);
729 disable_tracing();
730
731 return ret;
732 }
733
qos_init_domain(int domid,int idx)734 static void qos_init_domain(int domid, int idx)
735 {
736 int i;
737
738 memset(&new_qos->domain_info[idx], 0, sizeof(_domain_info));
739 new_qos->domain_info[idx].last_update_time = global_now;
740 // runnable_start_time[idx] = 0;
741 new_qos->domain_info[idx].runnable_start_time = 0; // invalidate
742 new_qos->domain_info[idx].in_use = 1;
743 new_qos->domain_info[idx].blocked_start_time = 0;
744 new_qos->domain_info[idx].id = domid;
745 if (domid == IDLE_DOMAIN_ID)
746 snprintf(new_qos->domain_info[idx].name,
747 sizeof(new_qos->domain_info[idx].name),
748 "Idle Task%d", global_cpu);
749 else
750 snprintf(new_qos->domain_info[idx].name,
751 sizeof(new_qos->domain_info[idx].name),
752 "Domain#%d", domid);
753
754 for (i=0; i<NSAMPLES; i++) {
755 new_qos->qdata[i].ns_gotten[idx] = 0;
756 new_qos->qdata[i].ns_allocated[idx] = 0;
757 new_qos->qdata[i].ns_waiting[idx] = 0;
758 new_qos->qdata[i].ns_blocked[idx] = 0;
759 new_qos->qdata[i].switchin_count[idx] = 0;
760 new_qos->qdata[i].io_count[idx] = 0;
761 }
762 }
763
global_init_domain(int domid,int idx)764 static void global_init_domain(int domid, int idx)
765 {
766 int cpu;
767 _new_qos_data *saved_qos;
768
769 saved_qos = new_qos;
770
771 for (cpu=0; cpu<NCPU; cpu++) {
772 new_qos = cpu_qos_data[cpu];
773 qos_init_domain(domid, idx);
774 }
775 new_qos = saved_qos;
776 }
777
778 // give index of this domain in the qos data array
indexof(int domid)779 static int indexof(int domid)
780 {
781 int idx;
782 xc_dominfo_t dominfo[NDOMAINS];
783 xc_interface *xc_handle;
784 int ndomains;
785
786 if (domid < 0) { // shouldn't happen
787 printf("bad domain id: %d\r\n", domid);
788 return 0;
789 }
790
791 for (idx=0; idx<NDOMAINS; idx++)
792 if ( (new_qos->domain_info[idx].id == domid) && new_qos->domain_info[idx].in_use)
793 return idx;
794
795 // not found, make a new entry
796 for (idx=0; idx<NDOMAINS; idx++)
797 if (new_qos->domain_info[idx].in_use == 0) {
798 global_init_domain(domid, idx);
799 return idx;
800 }
801
802 // call domaininfo hypercall to try and garbage collect unused entries
803 xc_handle = xc_interface_open(0,0,0);
804 ndomains = xc_domain_getinfo(xc_handle, 0, NDOMAINS, dominfo);
805 xc_interface_close(xc_handle);
806
807 // for each domain in our data, look for it in the system dominfo structure
808 // and purge the domain's data from our state if it does not exist in the
809 // dominfo structure
810 for (idx=0; idx<NDOMAINS; idx++) {
811 int domid = new_qos->domain_info[idx].id;
812 int jdx;
813
814 for (jdx=0; jdx<ndomains; jdx++) {
815 if (dominfo[jdx].domid == domid)
816 break;
817 }
818 if (jdx == ndomains) // we didn't find domid in the dominfo struct
819 if (domid != IDLE_DOMAIN_ID) // exception for idle domain, which is not
820 // contained in dominfo
821 qos_kill_thread(domid); // purge our stale data
822 }
823
824 // look again for a free slot
825 for (idx=0; idx<NDOMAINS; idx++)
826 if (new_qos->domain_info[idx].in_use == 0) {
827 global_init_domain(domid, idx);
828 return idx;
829 }
830
831 // still no space found, so bail
832 fprintf(stderr, "out of space in domain table, increase NDOMAINS\r\n");
833 exit(2);
834 }
835
domain_runnable(int domid)836 static int domain_runnable(int domid)
837 {
838 return new_qos->domain_info[indexof(domid)].runnable;
839 }
840
841
update_blocked_time(int domid,uint64_t now)842 static void update_blocked_time(int domid, uint64_t now)
843 {
844 uint64_t t_blocked;
845 int id = indexof(domid);
846
847 if (new_qos->domain_info[id].blocked_start_time != 0) {
848 if (now >= new_qos->domain_info[id].blocked_start_time)
849 t_blocked = now - new_qos->domain_info[id].blocked_start_time;
850 else
851 t_blocked = now + (~0ULL - new_qos->domain_info[id].blocked_start_time);
852 new_qos->qdata[new_qos->next_datapoint].ns_blocked[id] += t_blocked;
853 }
854
855 if (domain_runnable(domid))
856 new_qos->domain_info[id].blocked_start_time = 0;
857 else
858 new_qos->domain_info[id].blocked_start_time = now;
859 }
860
861
862 // advance to next datapoint for all domains
advance_next_datapoint(uint64_t now)863 static void advance_next_datapoint(uint64_t now)
864 {
865 int new, old, didx;
866
867 old = new_qos->next_datapoint;
868 new = QOS_INCR(old);
869 new_qos->next_datapoint = new;
870 // memset(&new_qos->qdata[new], 0, sizeof(uint64_t)*(2+5*NDOMAINS));
871 for (didx = 0; didx < NDOMAINS; didx++) {
872 new_qos->qdata[new].ns_gotten[didx] = 0;
873 new_qos->qdata[new].ns_allocated[didx] = 0;
874 new_qos->qdata[new].ns_waiting[didx] = 0;
875 new_qos->qdata[new].ns_blocked[didx] = 0;
876 new_qos->qdata[new].switchin_count[didx] = 0;
877 new_qos->qdata[new].io_count[didx] = 0;
878 }
879 new_qos->qdata[new].ns_passed = 0;
880 new_qos->qdata[new].lost_records = 0;
881 new_qos->qdata[new].flip_free_periods = 0;
882
883 new_qos->qdata[new].timestamp = now;
884 }
885
886
887
qos_update_thread(int cpu,int domid,uint64_t now)888 static void qos_update_thread(int cpu, int domid, uint64_t now)
889 {
890 int n, id;
891 uint64_t last_update_time, start;
892 int64_t time_since_update, run_time = 0;
893
894 id = indexof(domid);
895
896 n = new_qos->next_datapoint;
897 last_update_time = new_qos->domain_info[id].last_update_time;
898
899 time_since_update = now - last_update_time;
900
901 if (time_since_update < 0) {
902 // what happened here? either a timestamp wraparound, or more likely,
903 // a slight inconsistency among timestamps from various cpu's
904 if (-time_since_update < billion) {
905 // fairly small difference, let's just adjust 'now' to be a little
906 // beyond last_update_time
907 time_since_update = -time_since_update;
908 }
909 else if ( ((~0ULL - last_update_time) < billion) && (now < billion) ) {
910 // difference is huge, must be a wraparound
911 // last_update time should be "near" ~0ULL,
912 // and now should be "near" 0
913 time_since_update = now + (~0ULL - last_update_time);
914 printf("time wraparound\n");
915 }
916 else {
917 // none of the above, may be an out of order record
918 // no good solution, just ignore and update again later
919 return;
920 }
921 }
922
923 new_qos->domain_info[id].last_update_time = now;
924
925 if (new_qos->domain_info[id].runnable_at_last_update && is_current(domid, cpu)) {
926 start = new_qos->domain_info[id].start_time;
927 if (start > now) { // wrapped around
928 run_time = now + (~0ULL - start);
929 // this could happen if there is nothing going on within a cpu;
930 // in this case the idle domain would run forever
931 // printf("warning: start > now\n");
932 }
933 else
934 run_time = now - start;
935 // if (run_time < 0) // should not happen
936 // printf("warning: run_time < 0; start = %lld now= %lld\n", start, now);
937 new_qos->domain_info[id].ns_oncpu_since_boot += run_time;
938 new_qos->domain_info[id].start_time = now;
939 new_qos->domain_info[id].ns_since_boot += time_since_update;
940
941 new_qos->qdata[n].ns_gotten[id] += run_time;
942 // if (domid == 0 && cpu == 1)
943 // printf("adding run time for dom0 on cpu1\r\n");
944
945 }
946
947 new_qos->domain_info[id].runnable_at_last_update = domain_runnable(domid);
948
949 update_blocked_time(domid, now);
950
951 // how much time passed since this datapoint was updated?
952 if (now >= new_qos->qdata[n].timestamp) {
953 // all is right with the world, time is increasing
954 new_qos->qdata[n].ns_passed += (now - new_qos->qdata[n].timestamp);
955 }
956 else {
957 // time wrapped around
958 //new_qos->qdata[n].ns_passed += (now + (~0LL - new_qos->qdata[n].timestamp));
959 // printf("why timewrap?\r\n");
960 }
961 new_qos->qdata[n].timestamp = now;
962 }
963
964
965 // called by dump routines to update all structures
qos_update_all(uint64_t now,int cpu)966 static void qos_update_all(uint64_t now, int cpu)
967 {
968 int i;
969
970 for (i=0; i<NDOMAINS; i++)
971 if (new_qos->domain_info[i].in_use)
972 qos_update_thread(cpu, new_qos->domain_info[i].id, now);
973 }
974
975
qos_update_thread_stats(int cpu,int domid,uint64_t now)976 static void qos_update_thread_stats(int cpu, int domid, uint64_t now)
977 {
978 if (new_qos->qdata[new_qos->next_datapoint].ns_passed > (million*opts.ms_per_sample)) {
979 qos_update_all(now, cpu);
980 advance_next_datapoint(now);
981 return;
982 }
983 qos_update_thread(cpu, domid, now);
984 }
985
986
987
988 // called when a new thread gets the cpu
qos_switch_in(int cpu,int domid,uint64_t now,unsigned long ns_alloc,unsigned long ns_waited)989 static void qos_switch_in(int cpu, int domid, uint64_t now, unsigned long ns_alloc, unsigned long ns_waited)
990 {
991 int idx = indexof(domid);
992
993 new_qos->domain_info[idx].runnable = 1;
994 update_blocked_time(domid, now);
995 new_qos->domain_info[idx].blocked_start_time = 0; // invalidate
996 new_qos->domain_info[idx].runnable_start_time = 0; // invalidate
997 //runnable_start_time[idx] = 0;
998
999 new_qos->domain_info[idx].start_time = now;
1000 new_qos->qdata[new_qos->next_datapoint].switchin_count[idx]++;
1001 new_qos->qdata[new_qos->next_datapoint].ns_allocated[idx] += ns_alloc;
1002 new_qos->qdata[new_qos->next_datapoint].ns_waiting[idx] += ns_waited;
1003 qos_update_thread_stats(cpu, domid, now);
1004 set_current(cpu, domid);
1005
1006 // count up page flips for dom0 execution
1007 if (domid == 0)
1008 dom0_flips = 0;
1009 }
1010
1011 // called when the current thread is taken off the cpu
qos_switch_out(int cpu,int domid,uint64_t now,unsigned long gotten)1012 static void qos_switch_out(int cpu, int domid, uint64_t now, unsigned long gotten)
1013 {
1014 int idx = indexof(domid);
1015 int n;
1016
1017 if (!is_current(domid, cpu)) {
1018 // printf("switching out domain %d but it is not current. gotten=%ld\r\n", id, gotten);
1019 }
1020
1021 if (gotten == 0) {
1022 printf("gotten==0 in qos_switchout(domid=%d)\n", domid);
1023 }
1024
1025 if (gotten < 100) {
1026 printf("gotten<100ns in qos_switchout(domid=%d)\n", domid);
1027 }
1028
1029
1030 n = new_qos->next_datapoint;
1031 #if 0
1032 new_qos->qdata[n].ns_gotten[idx] += gotten;
1033 if (gotten > new_qos->qdata[n].ns_passed)
1034 printf("inconsistency #257, diff = %lld\n",
1035 gotten - new_qos->qdata[n].ns_passed );
1036 #endif
1037 new_qos->domain_info[idx].ns_oncpu_since_boot += gotten;
1038 new_qos->domain_info[idx].runnable_start_time = now;
1039 // runnable_start_time[id] = now;
1040 qos_update_thread_stats(cpu, domid, now);
1041
1042 // process dom0 page flips
1043 if (domid == 0)
1044 if (dom0_flips == 0)
1045 new_qos->qdata[n].flip_free_periods++;
1046 }
1047
1048 // called when domain is put to sleep, may also be called
1049 // when thread is already asleep
qos_state_sleeping(int cpu,int domid,uint64_t now)1050 static void qos_state_sleeping(int cpu, int domid, uint64_t now)
1051 {
1052 int idx;
1053
1054 if (!domain_runnable(domid)) // double call?
1055 return;
1056
1057 idx = indexof(domid);
1058 new_qos->domain_info[idx].runnable = 0;
1059 new_qos->domain_info[idx].blocked_start_time = now;
1060 new_qos->domain_info[idx].runnable_start_time = 0; // invalidate
1061 // runnable_start_time[idx] = 0; // invalidate
1062 qos_update_thread_stats(cpu, domid, now);
1063 }
1064
1065
1066
1067 // domain died, presume it's dead on all cpu's, not just mostly dead
qos_kill_thread(int domid)1068 static void qos_kill_thread(int domid)
1069 {
1070 int cpu;
1071
1072 for (cpu=0; cpu<NCPU; cpu++) {
1073 cpu_qos_data[cpu]->domain_info[indexof(domid)].in_use = 0;
1074 }
1075
1076 }
1077
1078
1079 // called when thread becomes runnable, may also be called
1080 // when thread is already runnable
qos_state_runnable(int cpu,int domid,uint64_t now)1081 static void qos_state_runnable(int cpu, int domid, uint64_t now)
1082 {
1083 int idx;
1084
1085
1086 qos_update_thread_stats(cpu, domid, now);
1087
1088 if (domain_runnable(domid)) // double call?
1089 return;
1090
1091 idx = indexof(domid);
1092 new_qos->domain_info[idx].runnable = 1;
1093 update_blocked_time(domid, now);
1094
1095 new_qos->domain_info[idx].blocked_start_time = 0; /* invalidate */
1096 new_qos->domain_info[idx].runnable_start_time = now;
1097 // runnable_start_time[id] = now;
1098 }
1099
1100
qos_count_packets(domid_t domid,uint64_t now)1101 static void qos_count_packets(domid_t domid, uint64_t now)
1102 {
1103 int i, idx = indexof(domid);
1104 _new_qos_data *cpu_data;
1105
1106 for (i=0; i<NCPU; i++) {
1107 cpu_data = cpu_qos_data[i];
1108 if (cpu_data->domain_info[idx].in_use) {
1109 cpu_data->qdata[cpu_data->next_datapoint].io_count[idx]++;
1110 }
1111 }
1112
1113 new_qos->qdata[new_qos->next_datapoint].io_count[0]++;
1114 dom0_flips++;
1115 }
1116
1117
process_record(int cpu,struct t_rec * r)1118 static int process_record(int cpu, struct t_rec *r)
1119 {
1120 uint64_t now = 0;
1121 uint32_t *extra_u32 = r->u.nocycles.extra_u32;
1122
1123 new_qos = cpu_qos_data[cpu];
1124
1125 rec_count++;
1126
1127 if ( r->cycles_included )
1128 {
1129 now = ((uint64_t)r->u.cycles.cycles_hi << 32) | r->u.cycles.cycles_lo;
1130 now = ((double)now) / (opts.cpu_freq / 1000.0);
1131 extra_u32 = r->u.cycles.extra_u32;
1132 }
1133
1134 global_now = now;
1135 global_cpu = cpu;
1136
1137 log_event(r->event);
1138
1139 switch (r->event) {
1140
1141 case TRC_SCHED_SWITCH_INFPREV:
1142 // domain data[0] just switched out and received data[1] ns of cpu time
1143 qos_switch_out(cpu, extra_u32[0], now, extra_u32[1]);
1144 // printf("ns_gotten %ld\n", extra_u32[1]);
1145 break;
1146
1147 case TRC_SCHED_SWITCH_INFNEXT:
1148 // domain data[0] just switched in and
1149 // waited data[1] ns, and was allocated data[2] ns of cpu time
1150 qos_switch_in(cpu, extra_u32[0], now, extra_u32[2], extra_u32[1]);
1151 break;
1152
1153 case TRC_SCHED_DOM_ADD:
1154 (void) indexof(extra_u32[0]);
1155 break;
1156
1157 case TRC_SCHED_DOM_REM:
1158 qos_kill_thread(extra_u32[0]);
1159 break;
1160
1161 case TRC_SCHED_SLEEP:
1162 qos_state_sleeping(cpu, extra_u32[0], now);
1163 break;
1164
1165 case TRC_SCHED_WAKE:
1166 qos_state_runnable(cpu, extra_u32[0], now);
1167 break;
1168
1169 case TRC_SCHED_BLOCK:
1170 qos_state_sleeping(cpu, extra_u32[0], now);
1171 break;
1172
1173 case TRC_MEM_PAGE_GRANT_TRANSFER:
1174 qos_count_packets(extra_u32[0], now);
1175 break;
1176
1177 default:
1178 break;
1179 }
1180
1181 new_qos = NULL;
1182
1183 return 4 + (r->cycles_included ? 8 : 0) + (r->extra_u32 * 4);
1184 }
1185
1186 /*
1187 * Local variables:
1188 * mode: C
1189 * c-file-style: "BSD"
1190 * c-basic-offset: 4
1191 * tab-width: 4
1192 * indent-tabs-mode: nil
1193 * End:
1194 */
1195