1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 2023 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
4 */
5
6 #include <stdlib.h>
7 #include <errno.h>
8 #include "utils.h"
9 #include "osnoise.h"
10 #include "timerlat.h"
11
12 enum timelat_state {
13 TIMERLAT_INIT = 0,
14 TIMERLAT_WAITING_IRQ,
15 TIMERLAT_WAITING_THREAD,
16 };
17
18 #define MAX_COMM 24
19
20 /*
21 * Per-cpu data statistics and data.
22 */
23 struct timerlat_aa_data {
24 /* Current CPU state */
25 int curr_state;
26
27 /* timerlat IRQ latency */
28 unsigned long long tlat_irq_seqnum;
29 unsigned long long tlat_irq_latency;
30 unsigned long long tlat_irq_timstamp;
31
32 /* timerlat Thread latency */
33 unsigned long long tlat_thread_seqnum;
34 unsigned long long tlat_thread_latency;
35 unsigned long long tlat_thread_timstamp;
36
37 /*
38 * Information about the thread running when the IRQ
39 * arrived.
40 *
41 * This can be blocking or interference, depending on the
42 * priority of the thread. Assuming timerlat is the highest
43 * prio, it is blocking. If timerlat has a lower prio, it is
44 * interference.
45 * note: "unsigned long long" because they are fetch using tep_get_field_val();
46 */
47 unsigned long long run_thread_pid;
48 char run_thread_comm[MAX_COMM];
49 unsigned long long thread_blocking_duration;
50 unsigned long long max_exit_idle_latency;
51
52 /* Information about the timerlat timer irq */
53 unsigned long long timer_irq_start_time;
54 unsigned long long timer_irq_start_delay;
55 unsigned long long timer_irq_duration;
56 unsigned long long timer_exit_from_idle;
57
58 /*
59 * Information about the last IRQ before the timerlat irq
60 * arrived.
61 *
62 * If now - timestamp is <= latency, it might have influenced
63 * in the timerlat irq latency. Otherwise, ignore it.
64 */
65 unsigned long long prev_irq_duration;
66 unsigned long long prev_irq_timstamp;
67
68 /*
69 * Interference sum.
70 */
71 unsigned long long thread_nmi_sum;
72 unsigned long long thread_irq_sum;
73 unsigned long long thread_softirq_sum;
74 unsigned long long thread_thread_sum;
75
76 /*
77 * Interference task information.
78 */
79 struct trace_seq *prev_irqs_seq;
80 struct trace_seq *nmi_seq;
81 struct trace_seq *irqs_seq;
82 struct trace_seq *softirqs_seq;
83 struct trace_seq *threads_seq;
84 struct trace_seq *stack_seq;
85
86 /*
87 * Current thread.
88 */
89 char current_comm[MAX_COMM];
90 unsigned long long current_pid;
91
92 /*
93 * Is the system running a kworker?
94 */
95 unsigned long long kworker;
96 unsigned long long kworker_func;
97 };
98
99 /*
100 * The analysis context and system wide view
101 */
102 struct timerlat_aa_context {
103 int nr_cpus;
104 int dump_tasks;
105
106 /* per CPU data */
107 struct timerlat_aa_data *taa_data;
108
109 /*
110 * required to translate function names and register
111 * events.
112 */
113 struct osnoise_tool *tool;
114 };
115
116 /*
117 * The data is stored as a local variable, but accessed via a helper function.
118 *
119 * It could be stored inside the trace context. But every access would
120 * require container_of() + a series of pointers. Do we need it? Not sure.
121 *
122 * For now keep it simple. If needed, store it in the tool, add the *context
123 * as a parameter in timerlat_aa_get_ctx() and do the magic there.
124 */
125 static struct timerlat_aa_context *__timerlat_aa_ctx;
126
timerlat_aa_get_ctx(void)127 static struct timerlat_aa_context *timerlat_aa_get_ctx(void)
128 {
129 return __timerlat_aa_ctx;
130 }
131
132 /*
133 * timerlat_aa_get_data - Get the per-cpu data from the timerlat context
134 */
135 static struct timerlat_aa_data
timerlat_aa_get_data(struct timerlat_aa_context * taa_ctx,int cpu)136 *timerlat_aa_get_data(struct timerlat_aa_context *taa_ctx, int cpu)
137 {
138 return &taa_ctx->taa_data[cpu];
139 }
140
141 /*
142 * timerlat_aa_irq_latency - Handles timerlat IRQ event
143 */
timerlat_aa_irq_latency(struct timerlat_aa_data * taa_data,struct trace_seq * s,struct tep_record * record,struct tep_event * event)144 static int timerlat_aa_irq_latency(struct timerlat_aa_data *taa_data,
145 struct trace_seq *s, struct tep_record *record,
146 struct tep_event *event)
147 {
148 /*
149 * For interference, we start now looking for things that can delay
150 * the thread.
151 */
152 taa_data->curr_state = TIMERLAT_WAITING_THREAD;
153 taa_data->tlat_irq_timstamp = record->ts;
154
155 /*
156 * Zero values.
157 */
158 taa_data->thread_nmi_sum = 0;
159 taa_data->thread_irq_sum = 0;
160 taa_data->thread_softirq_sum = 0;
161 taa_data->thread_blocking_duration = 0;
162 taa_data->timer_irq_start_time = 0;
163 taa_data->timer_irq_duration = 0;
164 taa_data->timer_exit_from_idle = 0;
165
166 /*
167 * Zero interference tasks.
168 */
169 trace_seq_reset(taa_data->nmi_seq);
170 trace_seq_reset(taa_data->irqs_seq);
171 trace_seq_reset(taa_data->softirqs_seq);
172 trace_seq_reset(taa_data->threads_seq);
173
174 /* IRQ latency values */
175 tep_get_field_val(s, event, "timer_latency", record, &taa_data->tlat_irq_latency, 1);
176 tep_get_field_val(s, event, "seqnum", record, &taa_data->tlat_irq_seqnum, 1);
177
178 /* The thread that can cause blocking */
179 tep_get_common_field_val(s, event, "common_pid", record, &taa_data->run_thread_pid, 1);
180
181 /*
182 * Get exit from idle case.
183 *
184 * If it is not idle thread:
185 */
186 if (taa_data->run_thread_pid)
187 return 0;
188
189 /*
190 * if the latency is shorter than the known exit from idle:
191 */
192 if (taa_data->tlat_irq_latency < taa_data->max_exit_idle_latency)
193 return 0;
194
195 /*
196 * To be safe, ignore the cases in which an IRQ/NMI could have
197 * interfered with the timerlat IRQ.
198 */
199 if (taa_data->tlat_irq_timstamp - taa_data->tlat_irq_latency
200 < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration)
201 return 0;
202
203 taa_data->max_exit_idle_latency = taa_data->tlat_irq_latency;
204
205 return 0;
206 }
207
208 /*
209 * timerlat_aa_thread_latency - Handles timerlat thread event
210 */
timerlat_aa_thread_latency(struct timerlat_aa_data * taa_data,struct trace_seq * s,struct tep_record * record,struct tep_event * event)211 static int timerlat_aa_thread_latency(struct timerlat_aa_data *taa_data,
212 struct trace_seq *s, struct tep_record *record,
213 struct tep_event *event)
214 {
215 /*
216 * For interference, we start now looking for things that can delay
217 * the IRQ of the next cycle.
218 */
219 taa_data->curr_state = TIMERLAT_WAITING_IRQ;
220 taa_data->tlat_thread_timstamp = record->ts;
221
222 /* Thread latency values */
223 tep_get_field_val(s, event, "timer_latency", record, &taa_data->tlat_thread_latency, 1);
224 tep_get_field_val(s, event, "seqnum", record, &taa_data->tlat_thread_seqnum, 1);
225
226 return 0;
227 }
228
229 /*
230 * timerlat_aa_handler - Handle timerlat events
231 *
232 * This function is called to handle timerlat events recording statistics.
233 *
234 * Returns 0 on success, -1 otherwise.
235 */
timerlat_aa_handler(struct trace_seq * s,struct tep_record * record,struct tep_event * event,void * context)236 int timerlat_aa_handler(struct trace_seq *s, struct tep_record *record,
237 struct tep_event *event, void *context)
238 {
239 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
240 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
241 unsigned long long thread;
242
243 if (!taa_data)
244 return -1;
245
246 tep_get_field_val(s, event, "context", record, &thread, 1);
247 if (!thread)
248 return timerlat_aa_irq_latency(taa_data, s, record, event);
249 else
250 return timerlat_aa_thread_latency(taa_data, s, record, event);
251 }
252
253 /*
254 * timerlat_aa_nmi_handler - Handles NMI noise
255 *
256 * It is used to collect information about interferences from NMI. It is
257 * hooked to the osnoise:nmi_noise event.
258 */
timerlat_aa_nmi_handler(struct trace_seq * s,struct tep_record * record,struct tep_event * event,void * context)259 static int timerlat_aa_nmi_handler(struct trace_seq *s, struct tep_record *record,
260 struct tep_event *event, void *context)
261 {
262 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
263 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
264 unsigned long long duration;
265 unsigned long long start;
266
267 tep_get_field_val(s, event, "duration", record, &duration, 1);
268 tep_get_field_val(s, event, "start", record, &start, 1);
269
270 if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) {
271 taa_data->prev_irq_duration = duration;
272 taa_data->prev_irq_timstamp = start;
273
274 trace_seq_reset(taa_data->prev_irqs_seq);
275 trace_seq_printf(taa_data->prev_irqs_seq, "\t%24s \t\t\t%9.2f us\n",
276 "nmi", ns_to_usf(duration));
277 return 0;
278 }
279
280 taa_data->thread_nmi_sum += duration;
281 trace_seq_printf(taa_data->nmi_seq, " %24s \t\t\t%9.2f us\n",
282 "nmi", ns_to_usf(duration));
283
284 return 0;
285 }
286
287 /*
288 * timerlat_aa_irq_handler - Handles IRQ noise
289 *
290 * It is used to collect information about interferences from IRQ. It is
291 * hooked to the osnoise:irq_noise event.
292 *
293 * It is a little bit more complex than the other because it measures:
294 * - The IRQs that can delay the timer IRQ before it happened.
295 * - The Timerlat IRQ handler
296 * - The IRQs that happened between the timerlat IRQ and the timerlat thread
297 * (IRQ interference).
298 */
timerlat_aa_irq_handler(struct trace_seq * s,struct tep_record * record,struct tep_event * event,void * context)299 static int timerlat_aa_irq_handler(struct trace_seq *s, struct tep_record *record,
300 struct tep_event *event, void *context)
301 {
302 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
303 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
304 unsigned long long expected_start;
305 unsigned long long duration;
306 unsigned long long vector;
307 unsigned long long start;
308 char *desc;
309 int val;
310
311 tep_get_field_val(s, event, "duration", record, &duration, 1);
312 tep_get_field_val(s, event, "start", record, &start, 1);
313 tep_get_field_val(s, event, "vector", record, &vector, 1);
314 desc = tep_get_field_raw(s, event, "desc", record, &val, 1);
315
316 /*
317 * Before the timerlat IRQ.
318 */
319 if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) {
320 taa_data->prev_irq_duration = duration;
321 taa_data->prev_irq_timstamp = start;
322
323 trace_seq_reset(taa_data->prev_irqs_seq);
324 trace_seq_printf(taa_data->prev_irqs_seq, "\t%24s:%-3llu \t\t%9.2f us\n",
325 desc, vector, ns_to_usf(duration));
326 return 0;
327 }
328
329 /*
330 * The timerlat IRQ: taa_data->timer_irq_start_time is zeroed at
331 * the timerlat irq handler.
332 */
333 if (!taa_data->timer_irq_start_time) {
334 expected_start = taa_data->tlat_irq_timstamp - taa_data->tlat_irq_latency;
335
336 taa_data->timer_irq_start_time = start;
337 taa_data->timer_irq_duration = duration;
338
339 taa_data->timer_irq_start_delay = taa_data->timer_irq_start_time - expected_start;
340
341 /*
342 * not exit from idle.
343 */
344 if (taa_data->run_thread_pid)
345 return 0;
346
347 if (expected_start > taa_data->prev_irq_timstamp + taa_data->prev_irq_duration)
348 taa_data->timer_exit_from_idle = taa_data->timer_irq_start_delay;
349
350 return 0;
351 }
352
353 /*
354 * IRQ interference.
355 */
356 taa_data->thread_irq_sum += duration;
357 trace_seq_printf(taa_data->irqs_seq, " %24s:%-3llu \t %9.2f us\n",
358 desc, vector, ns_to_usf(duration));
359
360 return 0;
361 }
362
363 static char *softirq_name[] = { "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK",
364 "IRQ_POLL", "TASKLET", "SCHED", "HRTIMER", "RCU" };
365
366
367 /*
368 * timerlat_aa_softirq_handler - Handles Softirq noise
369 *
370 * It is used to collect information about interferences from Softirq. It is
371 * hooked to the osnoise:softirq_noise event.
372 *
373 * It is only printed in the non-rt kernel, as softirqs become thread on RT.
374 */
timerlat_aa_softirq_handler(struct trace_seq * s,struct tep_record * record,struct tep_event * event,void * context)375 static int timerlat_aa_softirq_handler(struct trace_seq *s, struct tep_record *record,
376 struct tep_event *event, void *context)
377 {
378 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
379 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
380 unsigned long long duration;
381 unsigned long long vector;
382 unsigned long long start;
383
384 if (taa_data->curr_state == TIMERLAT_WAITING_IRQ)
385 return 0;
386
387 tep_get_field_val(s, event, "duration", record, &duration, 1);
388 tep_get_field_val(s, event, "start", record, &start, 1);
389 tep_get_field_val(s, event, "vector", record, &vector, 1);
390
391 taa_data->thread_softirq_sum += duration;
392
393 trace_seq_printf(taa_data->softirqs_seq, "\t%24s:%-3llu \t %9.2f us\n",
394 softirq_name[vector], vector, ns_to_usf(duration));
395 return 0;
396 }
397
398 /*
399 * timerlat_aa_softirq_handler - Handles thread noise
400 *
401 * It is used to collect information about interferences from threads. It is
402 * hooked to the osnoise:thread_noise event.
403 *
404 * Note: if you see thread noise, your timerlat thread was not the highest prio one.
405 */
timerlat_aa_thread_handler(struct trace_seq * s,struct tep_record * record,struct tep_event * event,void * context)406 static int timerlat_aa_thread_handler(struct trace_seq *s, struct tep_record *record,
407 struct tep_event *event, void *context)
408 {
409 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
410 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
411 unsigned long long duration;
412 unsigned long long start;
413 unsigned long long pid;
414 const char *comm;
415 int val;
416
417 if (taa_data->curr_state == TIMERLAT_WAITING_IRQ)
418 return 0;
419
420 tep_get_field_val(s, event, "duration", record, &duration, 1);
421 tep_get_field_val(s, event, "start", record, &start, 1);
422
423 tep_get_common_field_val(s, event, "common_pid", record, &pid, 1);
424 comm = tep_get_field_raw(s, event, "comm", record, &val, 1);
425
426 if (pid == taa_data->run_thread_pid && !taa_data->thread_blocking_duration) {
427 taa_data->thread_blocking_duration = duration;
428
429 if (comm)
430 strncpy(taa_data->run_thread_comm, comm, MAX_COMM);
431 else
432 sprintf(taa_data->run_thread_comm, "<...>");
433
434 } else {
435 taa_data->thread_thread_sum += duration;
436
437 trace_seq_printf(taa_data->threads_seq, "\t%24s:%-3llu \t\t%9.2f us\n",
438 comm, pid, ns_to_usf(duration));
439 }
440
441 return 0;
442 }
443
444 /*
445 * timerlat_aa_stack_handler - Handles timerlat IRQ stack trace
446 *
447 * Saves and parse the stack trace generated by the timerlat IRQ.
448 */
timerlat_aa_stack_handler(struct trace_seq * s,struct tep_record * record,struct tep_event * event,void * context)449 static int timerlat_aa_stack_handler(struct trace_seq *s, struct tep_record *record,
450 struct tep_event *event, void *context)
451 {
452 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
453 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
454 unsigned long *caller;
455 const char *function;
456 int val, i;
457
458 trace_seq_reset(taa_data->stack_seq);
459
460 trace_seq_printf(taa_data->stack_seq, " Blocking thread stack trace\n");
461 caller = tep_get_field_raw(s, event, "caller", record, &val, 1);
462 if (caller) {
463 for (i = 0; ; i++) {
464 function = tep_find_function(taa_ctx->tool->trace.tep, caller[i]);
465 if (!function)
466 break;
467 trace_seq_printf(taa_data->stack_seq, "\t\t-> %s\n", function);
468 }
469 }
470 return 0;
471 }
472
473 /*
474 * timerlat_aa_sched_switch_handler - Tracks the current thread running on the CPU
475 *
476 * Handles the sched:sched_switch event to trace the current thread running on the
477 * CPU. It is used to display the threads running on the other CPUs when the trace
478 * stops.
479 */
timerlat_aa_sched_switch_handler(struct trace_seq * s,struct tep_record * record,struct tep_event * event,void * context)480 static int timerlat_aa_sched_switch_handler(struct trace_seq *s, struct tep_record *record,
481 struct tep_event *event, void *context)
482 {
483 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
484 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
485 const char *comm;
486 int val;
487
488 tep_get_field_val(s, event, "next_pid", record, &taa_data->current_pid, 1);
489 comm = tep_get_field_raw(s, event, "next_comm", record, &val, 1);
490
491 strncpy(taa_data->current_comm, comm, MAX_COMM);
492
493 /*
494 * If this was a kworker, clean the last kworkers that ran.
495 */
496 taa_data->kworker = 0;
497 taa_data->kworker_func = 0;
498
499 return 0;
500 }
501
502 /*
503 * timerlat_aa_kworker_start_handler - Tracks a kworker running on the CPU
504 *
505 * Handles workqueue:workqueue_execute_start event, keeping track of
506 * the job that a kworker could be doing in the CPU.
507 *
508 * We already catch problems of hardware related latencies caused by work queues
509 * running driver code that causes hardware stall. For example, with DRM drivers.
510 */
timerlat_aa_kworker_start_handler(struct trace_seq * s,struct tep_record * record,struct tep_event * event,void * context)511 static int timerlat_aa_kworker_start_handler(struct trace_seq *s, struct tep_record *record,
512 struct tep_event *event, void *context)
513 {
514 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
515 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
516
517 tep_get_field_val(s, event, "work", record, &taa_data->kworker, 1);
518 tep_get_field_val(s, event, "function", record, &taa_data->kworker_func, 1);
519 return 0;
520 }
521
522 /*
523 * timerlat_thread_analysis - Prints the analysis of a CPU that hit a stop tracing
524 *
525 * This is the core of the analysis.
526 */
timerlat_thread_analysis(struct timerlat_aa_data * taa_data,int cpu,int irq_thresh,int thread_thresh)527 static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu,
528 int irq_thresh, int thread_thresh)
529 {
530 unsigned long long exp_irq_ts;
531 int total;
532 int irq;
533
534 /*
535 * IRQ latency or Thread latency?
536 */
537 if (taa_data->tlat_irq_seqnum > taa_data->tlat_thread_seqnum) {
538 irq = 1;
539 total = taa_data->tlat_irq_latency;
540 } else {
541 irq = 0;
542 total = taa_data->tlat_thread_latency;
543 }
544
545 /*
546 * Expected IRQ arrival time using the trace clock as the base.
547 */
548 exp_irq_ts = taa_data->timer_irq_start_time - taa_data->timer_irq_start_delay;
549
550 if (exp_irq_ts < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration)
551 printf(" Previous IRQ interference: \t up to %9.2f us",
552 ns_to_usf(taa_data->prev_irq_duration));
553
554 /*
555 * The delay that the IRQ suffered before starting.
556 */
557 printf(" IRQ handler delay: %16s %9.2f us (%.2f %%)\n",
558 (ns_to_usf(taa_data->timer_exit_from_idle) > 10) ? "(exit from idle)" : "",
559 ns_to_usf(taa_data->timer_irq_start_delay),
560 ns_to_per(total, taa_data->timer_irq_start_delay));
561
562 /*
563 * Timerlat IRQ.
564 */
565 printf(" IRQ latency: \t\t\t\t %9.2f us\n",
566 ns_to_usf(taa_data->tlat_irq_latency));
567
568 if (irq) {
569 /*
570 * If the trace stopped due to IRQ, the other events will not happen
571 * because... the trace stopped :-).
572 *
573 * That is all folks, the stack trace was printed before the stop,
574 * so it will be displayed, it is the key.
575 */
576 printf(" Blocking thread:\n");
577 printf(" %24s:%-9llu\n",
578 taa_data->run_thread_comm, taa_data->run_thread_pid);
579 } else {
580 /*
581 * The duration of the IRQ handler that handled the timerlat IRQ.
582 */
583 printf(" Timerlat IRQ duration: \t\t %9.2f us (%.2f %%)\n",
584 ns_to_usf(taa_data->timer_irq_duration),
585 ns_to_per(total, taa_data->timer_irq_duration));
586
587 /*
588 * The amount of time that the current thread postponed the scheduler.
589 *
590 * Recalling that it is net from NMI/IRQ/Softirq interference, so there
591 * is no need to compute values here.
592 */
593 printf(" Blocking thread: \t\t\t %9.2f us (%.2f %%)\n",
594 ns_to_usf(taa_data->thread_blocking_duration),
595 ns_to_per(total, taa_data->thread_blocking_duration));
596
597 printf(" %24s:%-9llu %9.2f us\n",
598 taa_data->run_thread_comm, taa_data->run_thread_pid,
599 ns_to_usf(taa_data->thread_blocking_duration));
600 }
601
602 /*
603 * Print the stack trace!
604 */
605 trace_seq_do_printf(taa_data->stack_seq);
606
607 /*
608 * NMIs can happen during the IRQ, so they are always possible.
609 */
610 if (taa_data->thread_nmi_sum)
611 printf(" NMI interference \t\t\t %9.2f us (%.2f %%)\n",
612 ns_to_usf(taa_data->thread_nmi_sum),
613 ns_to_per(total, taa_data->thread_nmi_sum));
614
615 /*
616 * If it is an IRQ latency, the other factors can be skipped.
617 */
618 if (irq)
619 goto print_total;
620
621 /*
622 * Prints the interference caused by IRQs to the thread latency.
623 */
624 if (taa_data->thread_irq_sum) {
625 printf(" IRQ interference \t\t\t %9.2f us (%.2f %%)\n",
626 ns_to_usf(taa_data->thread_irq_sum),
627 ns_to_per(total, taa_data->thread_irq_sum));
628
629 trace_seq_do_printf(taa_data->irqs_seq);
630 }
631
632 /*
633 * Prints the interference caused by Softirqs to the thread latency.
634 */
635 if (taa_data->thread_softirq_sum) {
636 printf(" Softirq interference \t\t\t %9.2f us (%.2f %%)\n",
637 ns_to_usf(taa_data->thread_softirq_sum),
638 ns_to_per(total, taa_data->thread_softirq_sum));
639
640 trace_seq_do_printf(taa_data->softirqs_seq);
641 }
642
643 /*
644 * Prints the interference caused by other threads to the thread latency.
645 *
646 * If this happens, your timerlat is not the highest prio. OK, migration
647 * thread can happen. But otherwise, you are not measuring the "scheduling
648 * latency" only, and here is the difference from scheduling latency and
649 * timer handling latency.
650 */
651 if (taa_data->thread_thread_sum) {
652 printf(" Thread interference \t\t\t %9.2f us (%.2f %%)\n",
653 ns_to_usf(taa_data->thread_thread_sum),
654 ns_to_per(total, taa_data->thread_thread_sum));
655
656 trace_seq_do_printf(taa_data->threads_seq);
657 }
658
659 /*
660 * Done.
661 */
662 print_total:
663 printf("------------------------------------------------------------------------\n");
664 printf(" %s latency: \t\t\t %9.2f us (100%%)\n", irq ? "IRQ" : "Thread",
665 ns_to_usf(total));
666 }
667
668 /**
669 * timerlat_auto_analysis - Analyze the collected data
670 */
timerlat_auto_analysis(int irq_thresh,int thread_thresh)671 void timerlat_auto_analysis(int irq_thresh, int thread_thresh)
672 {
673 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
674 unsigned long long max_exit_from_idle = 0;
675 struct timerlat_aa_data *taa_data;
676 int max_exit_from_idle_cpu;
677 struct tep_handle *tep;
678 int cpu;
679
680 /* bring stop tracing to the ns scale */
681 irq_thresh = irq_thresh * 1000;
682 thread_thresh = thread_thresh * 1000;
683
684 for (cpu = 0; cpu < taa_ctx->nr_cpus; cpu++) {
685 taa_data = timerlat_aa_get_data(taa_ctx, cpu);
686
687 if (irq_thresh && taa_data->tlat_irq_latency >= irq_thresh) {
688 printf("## CPU %d hit stop tracing, analyzing it ##\n", cpu);
689 timerlat_thread_analysis(taa_data, cpu, irq_thresh, thread_thresh);
690 } else if (thread_thresh && (taa_data->tlat_thread_latency) >= thread_thresh) {
691 printf("## CPU %d hit stop tracing, analyzing it ##\n", cpu);
692 timerlat_thread_analysis(taa_data, cpu, irq_thresh, thread_thresh);
693 }
694
695 if (taa_data->max_exit_idle_latency > max_exit_from_idle) {
696 max_exit_from_idle = taa_data->max_exit_idle_latency;
697 max_exit_from_idle_cpu = cpu;
698 }
699
700 }
701
702 if (max_exit_from_idle) {
703 printf("\n");
704 printf("Max timerlat IRQ latency from idle: %.2f us in cpu %d\n",
705 ns_to_usf(max_exit_from_idle), max_exit_from_idle_cpu);
706 }
707 if (!taa_ctx->dump_tasks)
708 return;
709
710 printf("\n");
711 printf("Printing CPU tasks:\n");
712 for (cpu = 0; cpu < taa_ctx->nr_cpus; cpu++) {
713 taa_data = timerlat_aa_get_data(taa_ctx, cpu);
714 tep = taa_ctx->tool->trace.tep;
715
716 printf(" [%.3d] %24s:%llu", cpu, taa_data->current_comm, taa_data->current_pid);
717
718 if (taa_data->kworker_func)
719 printf(" kworker:%s:%s",
720 tep_find_function(tep, taa_data->kworker) ? : "<...>",
721 tep_find_function(tep, taa_data->kworker_func));
722 printf("\n");
723 }
724
725 }
726
727 /*
728 * timerlat_aa_destroy_seqs - Destroy seq files used to store parsed data
729 */
timerlat_aa_destroy_seqs(struct timerlat_aa_context * taa_ctx)730 static void timerlat_aa_destroy_seqs(struct timerlat_aa_context *taa_ctx)
731 {
732 struct timerlat_aa_data *taa_data;
733 int i;
734
735 if (!taa_ctx->taa_data)
736 return;
737
738 for (i = 0; i < taa_ctx->nr_cpus; i++) {
739 taa_data = timerlat_aa_get_data(taa_ctx, i);
740
741 if (taa_data->prev_irqs_seq) {
742 trace_seq_destroy(taa_data->prev_irqs_seq);
743 free(taa_data->prev_irqs_seq);
744 }
745
746 if (taa_data->nmi_seq) {
747 trace_seq_destroy(taa_data->nmi_seq);
748 free(taa_data->nmi_seq);
749 }
750
751 if (taa_data->irqs_seq) {
752 trace_seq_destroy(taa_data->irqs_seq);
753 free(taa_data->irqs_seq);
754 }
755
756 if (taa_data->softirqs_seq) {
757 trace_seq_destroy(taa_data->softirqs_seq);
758 free(taa_data->softirqs_seq);
759 }
760
761 if (taa_data->threads_seq) {
762 trace_seq_destroy(taa_data->threads_seq);
763 free(taa_data->threads_seq);
764 }
765
766 if (taa_data->stack_seq) {
767 trace_seq_destroy(taa_data->stack_seq);
768 free(taa_data->stack_seq);
769 }
770 }
771 }
772
773 /*
774 * timerlat_aa_init_seqs - Init seq files used to store parsed information
775 *
776 * Instead of keeping data structures to store raw data, use seq files to
777 * store parsed data.
778 *
779 * Allocates and initialize seq files.
780 *
781 * Returns 0 on success, -1 otherwise.
782 */
timerlat_aa_init_seqs(struct timerlat_aa_context * taa_ctx)783 static int timerlat_aa_init_seqs(struct timerlat_aa_context *taa_ctx)
784 {
785 struct timerlat_aa_data *taa_data;
786 int i;
787
788 for (i = 0; i < taa_ctx->nr_cpus; i++) {
789
790 taa_data = timerlat_aa_get_data(taa_ctx, i);
791
792 taa_data->prev_irqs_seq = calloc(1, sizeof(*taa_data->prev_irqs_seq));
793 if (!taa_data->prev_irqs_seq)
794 goto out_err;
795
796 trace_seq_init(taa_data->prev_irqs_seq);
797
798 taa_data->nmi_seq = calloc(1, sizeof(*taa_data->nmi_seq));
799 if (!taa_data->nmi_seq)
800 goto out_err;
801
802 trace_seq_init(taa_data->nmi_seq);
803
804 taa_data->irqs_seq = calloc(1, sizeof(*taa_data->irqs_seq));
805 if (!taa_data->irqs_seq)
806 goto out_err;
807
808 trace_seq_init(taa_data->irqs_seq);
809
810 taa_data->softirqs_seq = calloc(1, sizeof(*taa_data->softirqs_seq));
811 if (!taa_data->softirqs_seq)
812 goto out_err;
813
814 trace_seq_init(taa_data->softirqs_seq);
815
816 taa_data->threads_seq = calloc(1, sizeof(*taa_data->threads_seq));
817 if (!taa_data->threads_seq)
818 goto out_err;
819
820 trace_seq_init(taa_data->threads_seq);
821
822 taa_data->stack_seq = calloc(1, sizeof(*taa_data->stack_seq));
823 if (!taa_data->stack_seq)
824 goto out_err;
825
826 trace_seq_init(taa_data->stack_seq);
827 }
828
829 return 0;
830
831 out_err:
832 timerlat_aa_destroy_seqs(taa_ctx);
833 return -1;
834 }
835
836 /*
837 * timerlat_aa_unregister_events - Unregister events used in the auto-analysis
838 */
timerlat_aa_unregister_events(struct osnoise_tool * tool,int dump_tasks)839 static void timerlat_aa_unregister_events(struct osnoise_tool *tool, int dump_tasks)
840 {
841 tracefs_event_disable(tool->trace.inst, "osnoise", NULL);
842
843 tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "nmi_noise",
844 timerlat_aa_nmi_handler, tool);
845
846 tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "irq_noise",
847 timerlat_aa_irq_handler, tool);
848
849 tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "softirq_noise",
850 timerlat_aa_softirq_handler, tool);
851
852 tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "thread_noise",
853 timerlat_aa_thread_handler, tool);
854
855 tep_unregister_event_handler(tool->trace.tep, -1, "ftrace", "kernel_stack",
856 timerlat_aa_stack_handler, tool);
857 if (!dump_tasks)
858 return;
859
860 tracefs_event_disable(tool->trace.inst, "sched", "sched_switch");
861 tep_unregister_event_handler(tool->trace.tep, -1, "sched", "sched_switch",
862 timerlat_aa_sched_switch_handler, tool);
863
864 tracefs_event_disable(tool->trace.inst, "workqueue", "workqueue_execute_start");
865 tep_unregister_event_handler(tool->trace.tep, -1, "workqueue", "workqueue_execute_start",
866 timerlat_aa_kworker_start_handler, tool);
867 }
868
869 /*
870 * timerlat_aa_register_events - Register events used in the auto-analysis
871 *
872 * Returns 0 on success, -1 otherwise.
873 */
timerlat_aa_register_events(struct osnoise_tool * tool,int dump_tasks)874 static int timerlat_aa_register_events(struct osnoise_tool *tool, int dump_tasks)
875 {
876 int retval;
877
878 /*
879 * register auto-analysis handlers.
880 */
881 retval = tracefs_event_enable(tool->trace.inst, "osnoise", NULL);
882 if (retval < 0 && !errno) {
883 err_msg("Could not find osnoise events\n");
884 goto out_err;
885 }
886
887 tep_register_event_handler(tool->trace.tep, -1, "osnoise", "nmi_noise",
888 timerlat_aa_nmi_handler, tool);
889
890 tep_register_event_handler(tool->trace.tep, -1, "osnoise", "irq_noise",
891 timerlat_aa_irq_handler, tool);
892
893 tep_register_event_handler(tool->trace.tep, -1, "osnoise", "softirq_noise",
894 timerlat_aa_softirq_handler, tool);
895
896 tep_register_event_handler(tool->trace.tep, -1, "osnoise", "thread_noise",
897 timerlat_aa_thread_handler, tool);
898
899 tep_register_event_handler(tool->trace.tep, -1, "ftrace", "kernel_stack",
900 timerlat_aa_stack_handler, tool);
901
902 if (!dump_tasks)
903 return 0;
904
905 /*
906 * Dump task events.
907 */
908 retval = tracefs_event_enable(tool->trace.inst, "sched", "sched_switch");
909 if (retval < 0 && !errno) {
910 err_msg("Could not find sched_switch\n");
911 goto out_err;
912 }
913
914 tep_register_event_handler(tool->trace.tep, -1, "sched", "sched_switch",
915 timerlat_aa_sched_switch_handler, tool);
916
917 retval = tracefs_event_enable(tool->trace.inst, "workqueue", "workqueue_execute_start");
918 if (retval < 0 && !errno) {
919 err_msg("Could not find workqueue_execute_start\n");
920 goto out_err;
921 }
922
923 tep_register_event_handler(tool->trace.tep, -1, "workqueue", "workqueue_execute_start",
924 timerlat_aa_kworker_start_handler, tool);
925
926 return 0;
927
928 out_err:
929 timerlat_aa_unregister_events(tool, dump_tasks);
930 return -1;
931 }
932
933 /**
934 * timerlat_aa_destroy - Destroy timerlat auto-analysis
935 */
timerlat_aa_destroy(void)936 void timerlat_aa_destroy(void)
937 {
938 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
939
940 if (!taa_ctx)
941 return;
942
943 if (!taa_ctx->taa_data)
944 goto out_ctx;
945
946 timerlat_aa_unregister_events(taa_ctx->tool, taa_ctx->dump_tasks);
947 timerlat_aa_destroy_seqs(taa_ctx);
948 free(taa_ctx->taa_data);
949 out_ctx:
950 free(taa_ctx);
951 }
952
953 /**
954 * timerlat_aa_init - Initialize timerlat auto-analysis
955 *
956 * Returns 0 on success, -1 otherwise.
957 */
timerlat_aa_init(struct osnoise_tool * tool,int nr_cpus,int dump_tasks)958 int timerlat_aa_init(struct osnoise_tool *tool, int nr_cpus, int dump_tasks)
959 {
960 struct timerlat_aa_context *taa_ctx;
961 int retval;
962
963 taa_ctx = calloc(1, sizeof(*taa_ctx));
964 if (!taa_ctx)
965 return -1;
966
967 __timerlat_aa_ctx = taa_ctx;
968
969 taa_ctx->nr_cpus = nr_cpus;
970 taa_ctx->tool = tool;
971 taa_ctx->dump_tasks = dump_tasks;
972
973 taa_ctx->taa_data = calloc(nr_cpus, sizeof(*taa_ctx->taa_data));
974 if (!taa_ctx->taa_data)
975 goto out_err;
976
977 retval = timerlat_aa_init_seqs(taa_ctx);
978 if (retval)
979 goto out_err;
980
981 retval = timerlat_aa_register_events(tool, dump_tasks);
982 if (retval)
983 goto out_err;
984
985 return 0;
986
987 out_err:
988 timerlat_aa_destroy();
989 return -1;
990 }
991