1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
3 */
4 static const char *__doc__ =
5 "XDP CPU redirect tool, using BPF_MAP_TYPE_CPUMAP\n"
6 "Usage: xdp_redirect_cpu -d <IFINDEX|IFNAME> -c 0 ... -c N\n"
7 "Valid specification for CPUMAP BPF program:\n"
8 " --mprog-name/-e pass (use built-in XDP_PASS program)\n"
9 " --mprog-name/-e drop (use built-in XDP_DROP program)\n"
10 " --redirect-device/-r <ifindex|ifname> (use built-in DEVMAP redirect program)\n"
11 " Custom CPUMAP BPF program:\n"
12 " --mprog-filename/-f <filename> --mprog-name/-e <program>\n"
13 " Optionally, also pass --redirect-map/-m and --redirect-device/-r together\n"
14 " to configure DEVMAP in BPF object <filename>\n";
15
16 #include <errno.h>
17 #include <signal.h>
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <stdbool.h>
21 #include <string.h>
22 #include <unistd.h>
23 #include <locale.h>
24 #include <sys/sysinfo.h>
25 #include <getopt.h>
26 #include <net/if.h>
27 #include <time.h>
28 #include <linux/limits.h>
29 #include <arpa/inet.h>
30 #include <linux/if_link.h>
31 #include <bpf/bpf.h>
32 #include <bpf/libbpf.h>
33 #include "bpf_util.h"
34 #include "xdp_sample_user.h"
35 #include "xdp_redirect_cpu.skel.h"
36
37 static int map_fd;
38 static int avail_fd;
39 static int count_fd;
40
41 static int mask = SAMPLE_RX_CNT | SAMPLE_REDIRECT_ERR_MAP_CNT |
42 SAMPLE_CPUMAP_ENQUEUE_CNT | SAMPLE_CPUMAP_KTHREAD_CNT |
43 SAMPLE_EXCEPTION_CNT;
44
45 DEFINE_SAMPLE_INIT(xdp_redirect_cpu);
46
47 static const struct option long_options[] = {
48 { "help", no_argument, NULL, 'h' },
49 { "dev", required_argument, NULL, 'd' },
50 { "skb-mode", no_argument, NULL, 'S' },
51 { "progname", required_argument, NULL, 'p' },
52 { "qsize", required_argument, NULL, 'q' },
53 { "cpu", required_argument, NULL, 'c' },
54 { "stress-mode", no_argument, NULL, 'x' },
55 { "force", no_argument, NULL, 'F' },
56 { "interval", required_argument, NULL, 'i' },
57 { "verbose", no_argument, NULL, 'v' },
58 { "stats", no_argument, NULL, 's' },
59 { "mprog-name", required_argument, NULL, 'e' },
60 { "mprog-filename", required_argument, NULL, 'f' },
61 { "redirect-device", required_argument, NULL, 'r' },
62 { "redirect-map", required_argument, NULL, 'm' },
63 {}
64 };
65
print_avail_progs(struct bpf_object * obj)66 static void print_avail_progs(struct bpf_object *obj)
67 {
68 struct bpf_program *pos;
69
70 printf(" Programs to be used for -p/--progname:\n");
71 bpf_object__for_each_program(pos, obj) {
72 if (bpf_program__type(pos) == BPF_PROG_TYPE_XDP) {
73 if (!strncmp(bpf_program__name(pos), "xdp_prognum",
74 sizeof("xdp_prognum") - 1))
75 printf(" %s\n", bpf_program__name(pos));
76 }
77 }
78 }
79
usage(char * argv[],const struct option * long_options,const char * doc,int mask,bool error,struct bpf_object * obj)80 static void usage(char *argv[], const struct option *long_options,
81 const char *doc, int mask, bool error, struct bpf_object *obj)
82 {
83 sample_usage(argv, long_options, doc, mask, error);
84 print_avail_progs(obj);
85 }
86
create_cpu_entry(__u32 cpu,struct bpf_cpumap_val * value,__u32 avail_idx,bool new)87 static int create_cpu_entry(__u32 cpu, struct bpf_cpumap_val *value,
88 __u32 avail_idx, bool new)
89 {
90 __u32 curr_cpus_count = 0;
91 __u32 key = 0;
92 int ret;
93
94 /* Add a CPU entry to cpumap, as this allocate a cpu entry in
95 * the kernel for the cpu.
96 */
97 ret = bpf_map_update_elem(map_fd, &cpu, value, 0);
98 if (ret < 0) {
99 fprintf(stderr, "Create CPU entry failed: %s\n", strerror(errno));
100 return ret;
101 }
102
103 /* Inform bpf_prog's that a new CPU is available to select
104 * from via some control maps.
105 */
106 ret = bpf_map_update_elem(avail_fd, &avail_idx, &cpu, 0);
107 if (ret < 0) {
108 fprintf(stderr, "Add to avail CPUs failed: %s\n", strerror(errno));
109 return ret;
110 }
111
112 /* When not replacing/updating existing entry, bump the count */
113 ret = bpf_map_lookup_elem(count_fd, &key, &curr_cpus_count);
114 if (ret < 0) {
115 fprintf(stderr, "Failed reading curr cpus_count: %s\n",
116 strerror(errno));
117 return ret;
118 }
119 if (new) {
120 curr_cpus_count++;
121 ret = bpf_map_update_elem(count_fd, &key,
122 &curr_cpus_count, 0);
123 if (ret < 0) {
124 fprintf(stderr, "Failed write curr cpus_count: %s\n",
125 strerror(errno));
126 return ret;
127 }
128 }
129
130 printf("%s CPU: %u as idx: %u qsize: %d cpumap_prog_fd: %d (cpus_count: %u)\n",
131 new ? "Add new" : "Replace", cpu, avail_idx,
132 value->qsize, value->bpf_prog.fd, curr_cpus_count);
133
134 return 0;
135 }
136
137 /* CPUs are zero-indexed. Thus, add a special sentinel default value
138 * in map cpus_available to mark CPU index'es not configured
139 */
mark_cpus_unavailable(void)140 static int mark_cpus_unavailable(void)
141 {
142 int ret, i, n_cpus = libbpf_num_possible_cpus();
143 __u32 invalid_cpu = n_cpus;
144
145 for (i = 0; i < n_cpus; i++) {
146 ret = bpf_map_update_elem(avail_fd, &i,
147 &invalid_cpu, 0);
148 if (ret < 0) {
149 fprintf(stderr, "Failed marking CPU unavailable: %s\n",
150 strerror(errno));
151 return ret;
152 }
153 }
154
155 return 0;
156 }
157
158 /* Stress cpumap management code by concurrently changing underlying cpumap */
stress_cpumap(void * ctx)159 static void stress_cpumap(void *ctx)
160 {
161 struct bpf_cpumap_val *value = ctx;
162
163 /* Changing qsize will cause kernel to free and alloc a new
164 * bpf_cpu_map_entry, with an associated/complicated tear-down
165 * procedure.
166 */
167 value->qsize = 1024;
168 create_cpu_entry(1, value, 0, false);
169 value->qsize = 8;
170 create_cpu_entry(1, value, 0, false);
171 value->qsize = 16000;
172 create_cpu_entry(1, value, 0, false);
173 }
174
set_cpumap_prog(struct xdp_redirect_cpu * skel,const char * redir_interface,const char * redir_map,const char * mprog_filename,const char * mprog_name)175 static int set_cpumap_prog(struct xdp_redirect_cpu *skel,
176 const char *redir_interface, const char *redir_map,
177 const char *mprog_filename, const char *mprog_name)
178 {
179 if (mprog_filename) {
180 struct bpf_program *prog;
181 struct bpf_object *obj;
182 int ret;
183
184 if (!mprog_name) {
185 fprintf(stderr, "BPF program not specified for file %s\n",
186 mprog_filename);
187 goto end;
188 }
189 if ((redir_interface && !redir_map) || (!redir_interface && redir_map)) {
190 fprintf(stderr, "--redirect-%s specified but --redirect-%s not specified\n",
191 redir_interface ? "device" : "map", redir_interface ? "map" : "device");
192 goto end;
193 }
194
195 /* Custom BPF program */
196 obj = bpf_object__open_file(mprog_filename, NULL);
197 if (!obj) {
198 ret = -errno;
199 fprintf(stderr, "Failed to bpf_prog_load_xattr: %s\n",
200 strerror(errno));
201 return ret;
202 }
203
204 ret = bpf_object__load(obj);
205 if (ret < 0) {
206 ret = -errno;
207 fprintf(stderr, "Failed to bpf_object__load: %s\n",
208 strerror(errno));
209 return ret;
210 }
211
212 if (redir_map) {
213 int err, redir_map_fd, ifindex_out, key = 0;
214
215 redir_map_fd = bpf_object__find_map_fd_by_name(obj, redir_map);
216 if (redir_map_fd < 0) {
217 fprintf(stderr, "Failed to bpf_object__find_map_fd_by_name: %s\n",
218 strerror(errno));
219 return redir_map_fd;
220 }
221
222 ifindex_out = if_nametoindex(redir_interface);
223 if (!ifindex_out)
224 ifindex_out = strtoul(redir_interface, NULL, 0);
225 if (!ifindex_out) {
226 fprintf(stderr, "Bad interface name or index\n");
227 return -EINVAL;
228 }
229
230 err = bpf_map_update_elem(redir_map_fd, &key, &ifindex_out, 0);
231 if (err < 0)
232 return err;
233 }
234
235 prog = bpf_object__find_program_by_name(obj, mprog_name);
236 if (!prog) {
237 ret = -errno;
238 fprintf(stderr, "Failed to bpf_object__find_program_by_name: %s\n",
239 strerror(errno));
240 return ret;
241 }
242
243 return bpf_program__fd(prog);
244 } else {
245 if (mprog_name) {
246 if (redir_interface || redir_map) {
247 fprintf(stderr, "Need to specify --mprog-filename/-f\n");
248 goto end;
249 }
250 if (!strcmp(mprog_name, "pass") || !strcmp(mprog_name, "drop")) {
251 /* Use built-in pass/drop programs */
252 return *mprog_name == 'p' ? bpf_program__fd(skel->progs.xdp_redirect_cpu_pass)
253 : bpf_program__fd(skel->progs.xdp_redirect_cpu_drop);
254 } else {
255 fprintf(stderr, "Unknown name \"%s\" for built-in BPF program\n",
256 mprog_name);
257 goto end;
258 }
259 } else {
260 if (redir_map) {
261 fprintf(stderr, "Need to specify --mprog-filename, --mprog-name and"
262 " --redirect-device with --redirect-map\n");
263 goto end;
264 }
265 if (redir_interface) {
266 /* Use built-in devmap redirect */
267 struct bpf_devmap_val val = {};
268 int ifindex_out, err;
269 __u32 key = 0;
270
271 if (!redir_interface)
272 return 0;
273
274 ifindex_out = if_nametoindex(redir_interface);
275 if (!ifindex_out)
276 ifindex_out = strtoul(redir_interface, NULL, 0);
277 if (!ifindex_out) {
278 fprintf(stderr, "Bad interface name or index\n");
279 return -EINVAL;
280 }
281
282 if (get_mac_addr(ifindex_out, skel->bss->tx_mac_addr) < 0) {
283 printf("Get interface %d mac failed\n", ifindex_out);
284 return -EINVAL;
285 }
286
287 val.ifindex = ifindex_out;
288 val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_redirect_egress_prog);
289 err = bpf_map_update_elem(bpf_map__fd(skel->maps.tx_port), &key, &val, 0);
290 if (err < 0)
291 return -errno;
292
293 return bpf_program__fd(skel->progs.xdp_redirect_cpu_devmap);
294 }
295 }
296 }
297
298 /* Disabled */
299 return 0;
300 end:
301 fprintf(stderr, "Invalid options for CPUMAP BPF program\n");
302 return -EINVAL;
303 }
304
main(int argc,char ** argv)305 int main(int argc, char **argv)
306 {
307 const char *redir_interface = NULL, *redir_map = NULL;
308 const char *mprog_filename = NULL, *mprog_name = NULL;
309 struct xdp_redirect_cpu *skel;
310 struct bpf_map_info info = {};
311 struct bpf_cpumap_val value;
312 __u32 infosz = sizeof(info);
313 int ret = EXIT_FAIL_OPTION;
314 unsigned long interval = 2;
315 bool stress_mode = false;
316 struct bpf_program *prog;
317 const char *prog_name;
318 bool generic = false;
319 bool force = false;
320 int added_cpus = 0;
321 bool error = true;
322 int longindex = 0;
323 int add_cpu = -1;
324 int ifindex = -1;
325 int *cpu, i, opt;
326 __u32 qsize;
327 int n_cpus;
328
329 n_cpus = libbpf_num_possible_cpus();
330
331 /* Notice: Choosing the queue size is very important when CPU is
332 * configured with power-saving states.
333 *
334 * If deepest state take 133 usec to wakeup from (133/10^6). When link
335 * speed is 10Gbit/s ((10*10^9/8) in bytes/sec). How many bytes can
336 * arrive with in 133 usec at this speed: (10*10^9/8)*(133/10^6) =
337 * 166250 bytes. With MTU size packets this is 110 packets, and with
338 * minimum Ethernet (MAC-preamble + intergap) 84 bytes is 1979 packets.
339 *
340 * Setting default cpumap queue to 2048 as worst-case (small packet)
341 * should be +64 packet due kthread wakeup call (due to xdp_do_flush)
342 * worst-case is 2043 packets.
343 *
344 * Sysadm can configured system to avoid deep-sleep via:
345 * tuned-adm profile network-latency
346 */
347 qsize = 2048;
348
349 skel = xdp_redirect_cpu__open();
350 if (!skel) {
351 fprintf(stderr, "Failed to xdp_redirect_cpu__open: %s\n",
352 strerror(errno));
353 ret = EXIT_FAIL_BPF;
354 goto end;
355 }
356
357 ret = sample_init_pre_load(skel);
358 if (ret < 0) {
359 fprintf(stderr, "Failed to sample_init_pre_load: %s\n", strerror(-ret));
360 ret = EXIT_FAIL_BPF;
361 goto end_destroy;
362 }
363
364 if (bpf_map__set_max_entries(skel->maps.cpu_map, n_cpus) < 0) {
365 fprintf(stderr, "Failed to set max entries for cpu_map map: %s",
366 strerror(errno));
367 ret = EXIT_FAIL_BPF;
368 goto end_destroy;
369 }
370
371 if (bpf_map__set_max_entries(skel->maps.cpus_available, n_cpus) < 0) {
372 fprintf(stderr, "Failed to set max entries for cpus_available map: %s",
373 strerror(errno));
374 ret = EXIT_FAIL_BPF;
375 goto end_destroy;
376 }
377
378 cpu = calloc(n_cpus, sizeof(int));
379 if (!cpu) {
380 fprintf(stderr, "Failed to allocate cpu array\n");
381 goto end_destroy;
382 }
383
384 prog = skel->progs.xdp_prognum5_lb_hash_ip_pairs;
385 while ((opt = getopt_long(argc, argv, "d:si:Sxp:f:e:r:m:c:q:Fvh",
386 long_options, &longindex)) != -1) {
387 switch (opt) {
388 case 'd':
389 if (strlen(optarg) >= IF_NAMESIZE) {
390 fprintf(stderr, "-d/--dev name too long\n");
391 usage(argv, long_options, __doc__, mask, true, skel->obj);
392 goto end_cpu;
393 }
394 ifindex = if_nametoindex(optarg);
395 if (!ifindex)
396 ifindex = strtoul(optarg, NULL, 0);
397 if (!ifindex) {
398 fprintf(stderr, "Bad interface index or name (%d): %s\n",
399 errno, strerror(errno));
400 usage(argv, long_options, __doc__, mask, true, skel->obj);
401 goto end_cpu;
402 }
403 break;
404 case 's':
405 mask |= SAMPLE_REDIRECT_MAP_CNT;
406 break;
407 case 'i':
408 interval = strtoul(optarg, NULL, 0);
409 break;
410 case 'S':
411 generic = true;
412 break;
413 case 'x':
414 stress_mode = true;
415 break;
416 case 'p':
417 /* Selecting eBPF prog to load */
418 prog_name = optarg;
419 prog = bpf_object__find_program_by_name(skel->obj,
420 prog_name);
421 if (!prog) {
422 fprintf(stderr,
423 "Failed to find program %s specified by"
424 " option -p/--progname\n",
425 prog_name);
426 print_avail_progs(skel->obj);
427 goto end_cpu;
428 }
429 break;
430 case 'f':
431 mprog_filename = optarg;
432 break;
433 case 'e':
434 mprog_name = optarg;
435 break;
436 case 'r':
437 redir_interface = optarg;
438 mask |= SAMPLE_DEVMAP_XMIT_CNT_MULTI;
439 break;
440 case 'm':
441 redir_map = optarg;
442 break;
443 case 'c':
444 /* Add multiple CPUs */
445 add_cpu = strtoul(optarg, NULL, 0);
446 if (add_cpu >= n_cpus) {
447 fprintf(stderr,
448 "--cpu nr too large for cpumap err (%d):%s\n",
449 errno, strerror(errno));
450 usage(argv, long_options, __doc__, mask, true, skel->obj);
451 goto end_cpu;
452 }
453 cpu[added_cpus++] = add_cpu;
454 break;
455 case 'q':
456 qsize = strtoul(optarg, NULL, 0);
457 break;
458 case 'F':
459 force = true;
460 break;
461 case 'v':
462 sample_switch_mode();
463 break;
464 case 'h':
465 error = false;
466 default:
467 usage(argv, long_options, __doc__, mask, error, skel->obj);
468 goto end_cpu;
469 }
470 }
471
472 ret = EXIT_FAIL_OPTION;
473 if (ifindex == -1) {
474 fprintf(stderr, "Required option --dev missing\n");
475 usage(argv, long_options, __doc__, mask, true, skel->obj);
476 goto end_cpu;
477 }
478
479 if (add_cpu == -1) {
480 fprintf(stderr, "Required option --cpu missing\n"
481 "Specify multiple --cpu option to add more\n");
482 usage(argv, long_options, __doc__, mask, true, skel->obj);
483 goto end_cpu;
484 }
485
486 skel->rodata->from_match[0] = ifindex;
487 if (redir_interface)
488 skel->rodata->to_match[0] = if_nametoindex(redir_interface);
489
490 ret = xdp_redirect_cpu__load(skel);
491 if (ret < 0) {
492 fprintf(stderr, "Failed to xdp_redirect_cpu__load: %s\n",
493 strerror(errno));
494 goto end_cpu;
495 }
496
497 ret = bpf_map_get_info_by_fd(bpf_map__fd(skel->maps.cpu_map), &info, &infosz);
498 if (ret < 0) {
499 fprintf(stderr, "Failed bpf_map_get_info_by_fd for cpumap: %s\n",
500 strerror(errno));
501 goto end_cpu;
502 }
503
504 skel->bss->cpumap_map_id = info.id;
505
506 map_fd = bpf_map__fd(skel->maps.cpu_map);
507 avail_fd = bpf_map__fd(skel->maps.cpus_available);
508 count_fd = bpf_map__fd(skel->maps.cpus_count);
509
510 ret = mark_cpus_unavailable();
511 if (ret < 0) {
512 fprintf(stderr, "Unable to mark CPUs as unavailable\n");
513 goto end_cpu;
514 }
515
516 ret = sample_init(skel, mask);
517 if (ret < 0) {
518 fprintf(stderr, "Failed to initialize sample: %s\n", strerror(-ret));
519 ret = EXIT_FAIL;
520 goto end_cpu;
521 }
522
523 value.bpf_prog.fd = set_cpumap_prog(skel, redir_interface, redir_map,
524 mprog_filename, mprog_name);
525 if (value.bpf_prog.fd < 0) {
526 fprintf(stderr, "Failed to set CPUMAP BPF program: %s\n",
527 strerror(-value.bpf_prog.fd));
528 usage(argv, long_options, __doc__, mask, true, skel->obj);
529 ret = EXIT_FAIL_BPF;
530 goto end_cpu;
531 }
532 value.qsize = qsize;
533
534 for (i = 0; i < added_cpus; i++) {
535 if (create_cpu_entry(cpu[i], &value, i, true) < 0) {
536 fprintf(stderr, "Cannot proceed, exiting\n");
537 usage(argv, long_options, __doc__, mask, true, skel->obj);
538 goto end_cpu;
539 }
540 }
541
542 ret = EXIT_FAIL_XDP;
543 if (sample_install_xdp(prog, ifindex, generic, force) < 0)
544 goto end_cpu;
545
546 ret = sample_run(interval, stress_mode ? stress_cpumap : NULL, &value);
547 if (ret < 0) {
548 fprintf(stderr, "Failed during sample run: %s\n", strerror(-ret));
549 ret = EXIT_FAIL;
550 goto end_cpu;
551 }
552 ret = EXIT_OK;
553 end_cpu:
554 free(cpu);
555 end_destroy:
556 xdp_redirect_cpu__destroy(skel);
557 end:
558 sample_exit(ret);
559 }
560