1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
3  */
4 static const char *__doc__ =
5 "XDP CPU redirect tool, using BPF_MAP_TYPE_CPUMAP\n"
6 "Usage: xdp_redirect_cpu -d <IFINDEX|IFNAME> -c 0 ... -c N\n"
7 "Valid specification for CPUMAP BPF program:\n"
8 "  --mprog-name/-e pass (use built-in XDP_PASS program)\n"
9 "  --mprog-name/-e drop (use built-in XDP_DROP program)\n"
10 "  --redirect-device/-r <ifindex|ifname> (use built-in DEVMAP redirect program)\n"
11 "  Custom CPUMAP BPF program:\n"
12 "    --mprog-filename/-f <filename> --mprog-name/-e <program>\n"
13 "    Optionally, also pass --redirect-map/-m and --redirect-device/-r together\n"
14 "    to configure DEVMAP in BPF object <filename>\n";
15 
16 #include <errno.h>
17 #include <signal.h>
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <stdbool.h>
21 #include <string.h>
22 #include <unistd.h>
23 #include <locale.h>
24 #include <sys/sysinfo.h>
25 #include <getopt.h>
26 #include <net/if.h>
27 #include <time.h>
28 #include <linux/limits.h>
29 #include <arpa/inet.h>
30 #include <linux/if_link.h>
31 #include <bpf/bpf.h>
32 #include <bpf/libbpf.h>
33 #include "bpf_util.h"
34 #include "xdp_sample_user.h"
35 #include "xdp_redirect_cpu.skel.h"
36 
37 static int map_fd;
38 static int avail_fd;
39 static int count_fd;
40 
41 static int mask = SAMPLE_RX_CNT | SAMPLE_REDIRECT_ERR_MAP_CNT |
42 		  SAMPLE_CPUMAP_ENQUEUE_CNT | SAMPLE_CPUMAP_KTHREAD_CNT |
43 		  SAMPLE_EXCEPTION_CNT;
44 
45 DEFINE_SAMPLE_INIT(xdp_redirect_cpu);
46 
47 static const struct option long_options[] = {
48 	{ "help", no_argument, NULL, 'h' },
49 	{ "dev", required_argument, NULL, 'd' },
50 	{ "skb-mode", no_argument, NULL, 'S' },
51 	{ "progname", required_argument, NULL, 'p' },
52 	{ "qsize", required_argument, NULL, 'q' },
53 	{ "cpu", required_argument, NULL, 'c' },
54 	{ "stress-mode", no_argument, NULL, 'x' },
55 	{ "force", no_argument, NULL, 'F' },
56 	{ "interval", required_argument, NULL, 'i' },
57 	{ "verbose", no_argument, NULL, 'v' },
58 	{ "stats", no_argument, NULL, 's' },
59 	{ "mprog-name", required_argument, NULL, 'e' },
60 	{ "mprog-filename", required_argument, NULL, 'f' },
61 	{ "redirect-device", required_argument, NULL, 'r' },
62 	{ "redirect-map", required_argument, NULL, 'm' },
63 	{}
64 };
65 
print_avail_progs(struct bpf_object * obj)66 static void print_avail_progs(struct bpf_object *obj)
67 {
68 	struct bpf_program *pos;
69 
70 	printf(" Programs to be used for -p/--progname:\n");
71 	bpf_object__for_each_program(pos, obj) {
72 		if (bpf_program__type(pos) == BPF_PROG_TYPE_XDP) {
73 			if (!strncmp(bpf_program__name(pos), "xdp_prognum",
74 				     sizeof("xdp_prognum") - 1))
75 				printf(" %s\n", bpf_program__name(pos));
76 		}
77 	}
78 }
79 
usage(char * argv[],const struct option * long_options,const char * doc,int mask,bool error,struct bpf_object * obj)80 static void usage(char *argv[], const struct option *long_options,
81 		  const char *doc, int mask, bool error, struct bpf_object *obj)
82 {
83 	sample_usage(argv, long_options, doc, mask, error);
84 	print_avail_progs(obj);
85 }
86 
create_cpu_entry(__u32 cpu,struct bpf_cpumap_val * value,__u32 avail_idx,bool new)87 static int create_cpu_entry(__u32 cpu, struct bpf_cpumap_val *value,
88 			    __u32 avail_idx, bool new)
89 {
90 	__u32 curr_cpus_count = 0;
91 	__u32 key = 0;
92 	int ret;
93 
94 	/* Add a CPU entry to cpumap, as this allocate a cpu entry in
95 	 * the kernel for the cpu.
96 	 */
97 	ret = bpf_map_update_elem(map_fd, &cpu, value, 0);
98 	if (ret < 0) {
99 		fprintf(stderr, "Create CPU entry failed: %s\n", strerror(errno));
100 		return ret;
101 	}
102 
103 	/* Inform bpf_prog's that a new CPU is available to select
104 	 * from via some control maps.
105 	 */
106 	ret = bpf_map_update_elem(avail_fd, &avail_idx, &cpu, 0);
107 	if (ret < 0) {
108 		fprintf(stderr, "Add to avail CPUs failed: %s\n", strerror(errno));
109 		return ret;
110 	}
111 
112 	/* When not replacing/updating existing entry, bump the count */
113 	ret = bpf_map_lookup_elem(count_fd, &key, &curr_cpus_count);
114 	if (ret < 0) {
115 		fprintf(stderr, "Failed reading curr cpus_count: %s\n",
116 			strerror(errno));
117 		return ret;
118 	}
119 	if (new) {
120 		curr_cpus_count++;
121 		ret = bpf_map_update_elem(count_fd, &key,
122 					  &curr_cpus_count, 0);
123 		if (ret < 0) {
124 			fprintf(stderr, "Failed write curr cpus_count: %s\n",
125 				strerror(errno));
126 			return ret;
127 		}
128 	}
129 
130 	printf("%s CPU: %u as idx: %u qsize: %d cpumap_prog_fd: %d (cpus_count: %u)\n",
131 	       new ? "Add new" : "Replace", cpu, avail_idx,
132 	       value->qsize, value->bpf_prog.fd, curr_cpus_count);
133 
134 	return 0;
135 }
136 
137 /* CPUs are zero-indexed. Thus, add a special sentinel default value
138  * in map cpus_available to mark CPU index'es not configured
139  */
mark_cpus_unavailable(void)140 static int mark_cpus_unavailable(void)
141 {
142 	int ret, i, n_cpus = libbpf_num_possible_cpus();
143 	__u32 invalid_cpu = n_cpus;
144 
145 	for (i = 0; i < n_cpus; i++) {
146 		ret = bpf_map_update_elem(avail_fd, &i,
147 					  &invalid_cpu, 0);
148 		if (ret < 0) {
149 			fprintf(stderr, "Failed marking CPU unavailable: %s\n",
150 				strerror(errno));
151 			return ret;
152 		}
153 	}
154 
155 	return 0;
156 }
157 
158 /* Stress cpumap management code by concurrently changing underlying cpumap */
stress_cpumap(void * ctx)159 static void stress_cpumap(void *ctx)
160 {
161 	struct bpf_cpumap_val *value = ctx;
162 
163 	/* Changing qsize will cause kernel to free and alloc a new
164 	 * bpf_cpu_map_entry, with an associated/complicated tear-down
165 	 * procedure.
166 	 */
167 	value->qsize = 1024;
168 	create_cpu_entry(1, value, 0, false);
169 	value->qsize = 8;
170 	create_cpu_entry(1, value, 0, false);
171 	value->qsize = 16000;
172 	create_cpu_entry(1, value, 0, false);
173 }
174 
set_cpumap_prog(struct xdp_redirect_cpu * skel,const char * redir_interface,const char * redir_map,const char * mprog_filename,const char * mprog_name)175 static int set_cpumap_prog(struct xdp_redirect_cpu *skel,
176 			   const char *redir_interface, const char *redir_map,
177 			   const char *mprog_filename, const char *mprog_name)
178 {
179 	if (mprog_filename) {
180 		struct bpf_program *prog;
181 		struct bpf_object *obj;
182 		int ret;
183 
184 		if (!mprog_name) {
185 			fprintf(stderr, "BPF program not specified for file %s\n",
186 				mprog_filename);
187 			goto end;
188 		}
189 		if ((redir_interface && !redir_map) || (!redir_interface && redir_map)) {
190 			fprintf(stderr, "--redirect-%s specified but --redirect-%s not specified\n",
191 				redir_interface ? "device" : "map", redir_interface ? "map" : "device");
192 			goto end;
193 		}
194 
195 		/* Custom BPF program */
196 		obj = bpf_object__open_file(mprog_filename, NULL);
197 		if (!obj) {
198 			ret = -errno;
199 			fprintf(stderr, "Failed to bpf_prog_load_xattr: %s\n",
200 				strerror(errno));
201 			return ret;
202 		}
203 
204 		ret = bpf_object__load(obj);
205 		if (ret < 0) {
206 			ret = -errno;
207 			fprintf(stderr, "Failed to bpf_object__load: %s\n",
208 				strerror(errno));
209 			return ret;
210 		}
211 
212 		if (redir_map) {
213 			int err, redir_map_fd, ifindex_out, key = 0;
214 
215 			redir_map_fd = bpf_object__find_map_fd_by_name(obj, redir_map);
216 			if (redir_map_fd < 0) {
217 				fprintf(stderr, "Failed to bpf_object__find_map_fd_by_name: %s\n",
218 					strerror(errno));
219 				return redir_map_fd;
220 			}
221 
222 			ifindex_out = if_nametoindex(redir_interface);
223 			if (!ifindex_out)
224 				ifindex_out = strtoul(redir_interface, NULL, 0);
225 			if (!ifindex_out) {
226 				fprintf(stderr, "Bad interface name or index\n");
227 				return -EINVAL;
228 			}
229 
230 			err = bpf_map_update_elem(redir_map_fd, &key, &ifindex_out, 0);
231 			if (err < 0)
232 				return err;
233 		}
234 
235 		prog = bpf_object__find_program_by_name(obj, mprog_name);
236 		if (!prog) {
237 			ret = -errno;
238 			fprintf(stderr, "Failed to bpf_object__find_program_by_name: %s\n",
239 				strerror(errno));
240 			return ret;
241 		}
242 
243 		return bpf_program__fd(prog);
244 	} else {
245 		if (mprog_name) {
246 			if (redir_interface || redir_map) {
247 				fprintf(stderr, "Need to specify --mprog-filename/-f\n");
248 				goto end;
249 			}
250 			if (!strcmp(mprog_name, "pass") || !strcmp(mprog_name, "drop")) {
251 				/* Use built-in pass/drop programs */
252 				return *mprog_name == 'p' ? bpf_program__fd(skel->progs.xdp_redirect_cpu_pass)
253 					: bpf_program__fd(skel->progs.xdp_redirect_cpu_drop);
254 			} else {
255 				fprintf(stderr, "Unknown name \"%s\" for built-in BPF program\n",
256 					mprog_name);
257 				goto end;
258 			}
259 		} else {
260 			if (redir_map) {
261 				fprintf(stderr, "Need to specify --mprog-filename, --mprog-name and"
262 					" --redirect-device with --redirect-map\n");
263 				goto end;
264 			}
265 			if (redir_interface) {
266 				/* Use built-in devmap redirect */
267 				struct bpf_devmap_val val = {};
268 				int ifindex_out, err;
269 				__u32 key = 0;
270 
271 				if (!redir_interface)
272 					return 0;
273 
274 				ifindex_out = if_nametoindex(redir_interface);
275 				if (!ifindex_out)
276 					ifindex_out = strtoul(redir_interface, NULL, 0);
277 				if (!ifindex_out) {
278 					fprintf(stderr, "Bad interface name or index\n");
279 					return -EINVAL;
280 				}
281 
282 				if (get_mac_addr(ifindex_out, skel->bss->tx_mac_addr) < 0) {
283 					printf("Get interface %d mac failed\n", ifindex_out);
284 					return -EINVAL;
285 				}
286 
287 				val.ifindex = ifindex_out;
288 				val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_redirect_egress_prog);
289 				err = bpf_map_update_elem(bpf_map__fd(skel->maps.tx_port), &key, &val, 0);
290 				if (err < 0)
291 					return -errno;
292 
293 				return bpf_program__fd(skel->progs.xdp_redirect_cpu_devmap);
294 			}
295 		}
296 	}
297 
298 	/* Disabled */
299 	return 0;
300 end:
301 	fprintf(stderr, "Invalid options for CPUMAP BPF program\n");
302 	return -EINVAL;
303 }
304 
main(int argc,char ** argv)305 int main(int argc, char **argv)
306 {
307 	const char *redir_interface = NULL, *redir_map = NULL;
308 	const char *mprog_filename = NULL, *mprog_name = NULL;
309 	struct xdp_redirect_cpu *skel;
310 	struct bpf_map_info info = {};
311 	struct bpf_cpumap_val value;
312 	__u32 infosz = sizeof(info);
313 	int ret = EXIT_FAIL_OPTION;
314 	unsigned long interval = 2;
315 	bool stress_mode = false;
316 	struct bpf_program *prog;
317 	const char *prog_name;
318 	bool generic = false;
319 	bool force = false;
320 	int added_cpus = 0;
321 	bool error = true;
322 	int longindex = 0;
323 	int add_cpu = -1;
324 	int ifindex = -1;
325 	int *cpu, i, opt;
326 	__u32 qsize;
327 	int n_cpus;
328 
329 	n_cpus = libbpf_num_possible_cpus();
330 
331 	/* Notice: Choosing the queue size is very important when CPU is
332 	 * configured with power-saving states.
333 	 *
334 	 * If deepest state take 133 usec to wakeup from (133/10^6). When link
335 	 * speed is 10Gbit/s ((10*10^9/8) in bytes/sec). How many bytes can
336 	 * arrive with in 133 usec at this speed: (10*10^9/8)*(133/10^6) =
337 	 * 166250 bytes. With MTU size packets this is 110 packets, and with
338 	 * minimum Ethernet (MAC-preamble + intergap) 84 bytes is 1979 packets.
339 	 *
340 	 * Setting default cpumap queue to 2048 as worst-case (small packet)
341 	 * should be +64 packet due kthread wakeup call (due to xdp_do_flush)
342 	 * worst-case is 2043 packets.
343 	 *
344 	 * Sysadm can configured system to avoid deep-sleep via:
345 	 *   tuned-adm profile network-latency
346 	 */
347 	qsize = 2048;
348 
349 	skel = xdp_redirect_cpu__open();
350 	if (!skel) {
351 		fprintf(stderr, "Failed to xdp_redirect_cpu__open: %s\n",
352 			strerror(errno));
353 		ret = EXIT_FAIL_BPF;
354 		goto end;
355 	}
356 
357 	ret = sample_init_pre_load(skel);
358 	if (ret < 0) {
359 		fprintf(stderr, "Failed to sample_init_pre_load: %s\n", strerror(-ret));
360 		ret = EXIT_FAIL_BPF;
361 		goto end_destroy;
362 	}
363 
364 	if (bpf_map__set_max_entries(skel->maps.cpu_map, n_cpus) < 0) {
365 		fprintf(stderr, "Failed to set max entries for cpu_map map: %s",
366 			strerror(errno));
367 		ret = EXIT_FAIL_BPF;
368 		goto end_destroy;
369 	}
370 
371 	if (bpf_map__set_max_entries(skel->maps.cpus_available, n_cpus) < 0) {
372 		fprintf(stderr, "Failed to set max entries for cpus_available map: %s",
373 			strerror(errno));
374 		ret = EXIT_FAIL_BPF;
375 		goto end_destroy;
376 	}
377 
378 	cpu = calloc(n_cpus, sizeof(int));
379 	if (!cpu) {
380 		fprintf(stderr, "Failed to allocate cpu array\n");
381 		goto end_destroy;
382 	}
383 
384 	prog = skel->progs.xdp_prognum5_lb_hash_ip_pairs;
385 	while ((opt = getopt_long(argc, argv, "d:si:Sxp:f:e:r:m:c:q:Fvh",
386 				  long_options, &longindex)) != -1) {
387 		switch (opt) {
388 		case 'd':
389 			if (strlen(optarg) >= IF_NAMESIZE) {
390 				fprintf(stderr, "-d/--dev name too long\n");
391 				usage(argv, long_options, __doc__, mask, true, skel->obj);
392 				goto end_cpu;
393 			}
394 			ifindex = if_nametoindex(optarg);
395 			if (!ifindex)
396 				ifindex = strtoul(optarg, NULL, 0);
397 			if (!ifindex) {
398 				fprintf(stderr, "Bad interface index or name (%d): %s\n",
399 					errno, strerror(errno));
400 				usage(argv, long_options, __doc__, mask, true, skel->obj);
401 				goto end_cpu;
402 			}
403 			break;
404 		case 's':
405 			mask |= SAMPLE_REDIRECT_MAP_CNT;
406 			break;
407 		case 'i':
408 			interval = strtoul(optarg, NULL, 0);
409 			break;
410 		case 'S':
411 			generic = true;
412 			break;
413 		case 'x':
414 			stress_mode = true;
415 			break;
416 		case 'p':
417 			/* Selecting eBPF prog to load */
418 			prog_name = optarg;
419 			prog = bpf_object__find_program_by_name(skel->obj,
420 								prog_name);
421 			if (!prog) {
422 				fprintf(stderr,
423 					"Failed to find program %s specified by"
424 					" option -p/--progname\n",
425 					prog_name);
426 				print_avail_progs(skel->obj);
427 				goto end_cpu;
428 			}
429 			break;
430 		case 'f':
431 			mprog_filename = optarg;
432 			break;
433 		case 'e':
434 			mprog_name = optarg;
435 			break;
436 		case 'r':
437 			redir_interface = optarg;
438 			mask |= SAMPLE_DEVMAP_XMIT_CNT_MULTI;
439 			break;
440 		case 'm':
441 			redir_map = optarg;
442 			break;
443 		case 'c':
444 			/* Add multiple CPUs */
445 			add_cpu = strtoul(optarg, NULL, 0);
446 			if (add_cpu >= n_cpus) {
447 				fprintf(stderr,
448 				"--cpu nr too large for cpumap err (%d):%s\n",
449 					errno, strerror(errno));
450 				usage(argv, long_options, __doc__, mask, true, skel->obj);
451 				goto end_cpu;
452 			}
453 			cpu[added_cpus++] = add_cpu;
454 			break;
455 		case 'q':
456 			qsize = strtoul(optarg, NULL, 0);
457 			break;
458 		case 'F':
459 			force = true;
460 			break;
461 		case 'v':
462 			sample_switch_mode();
463 			break;
464 		case 'h':
465 			error = false;
466 		default:
467 			usage(argv, long_options, __doc__, mask, error, skel->obj);
468 			goto end_cpu;
469 		}
470 	}
471 
472 	ret = EXIT_FAIL_OPTION;
473 	if (ifindex == -1) {
474 		fprintf(stderr, "Required option --dev missing\n");
475 		usage(argv, long_options, __doc__, mask, true, skel->obj);
476 		goto end_cpu;
477 	}
478 
479 	if (add_cpu == -1) {
480 		fprintf(stderr, "Required option --cpu missing\n"
481 				"Specify multiple --cpu option to add more\n");
482 		usage(argv, long_options, __doc__, mask, true, skel->obj);
483 		goto end_cpu;
484 	}
485 
486 	skel->rodata->from_match[0] = ifindex;
487 	if (redir_interface)
488 		skel->rodata->to_match[0] = if_nametoindex(redir_interface);
489 
490 	ret = xdp_redirect_cpu__load(skel);
491 	if (ret < 0) {
492 		fprintf(stderr, "Failed to xdp_redirect_cpu__load: %s\n",
493 			strerror(errno));
494 		goto end_cpu;
495 	}
496 
497 	ret = bpf_map_get_info_by_fd(bpf_map__fd(skel->maps.cpu_map), &info, &infosz);
498 	if (ret < 0) {
499 		fprintf(stderr, "Failed bpf_map_get_info_by_fd for cpumap: %s\n",
500 			strerror(errno));
501 		goto end_cpu;
502 	}
503 
504 	skel->bss->cpumap_map_id = info.id;
505 
506 	map_fd = bpf_map__fd(skel->maps.cpu_map);
507 	avail_fd = bpf_map__fd(skel->maps.cpus_available);
508 	count_fd = bpf_map__fd(skel->maps.cpus_count);
509 
510 	ret = mark_cpus_unavailable();
511 	if (ret < 0) {
512 		fprintf(stderr, "Unable to mark CPUs as unavailable\n");
513 		goto end_cpu;
514 	}
515 
516 	ret = sample_init(skel, mask);
517 	if (ret < 0) {
518 		fprintf(stderr, "Failed to initialize sample: %s\n", strerror(-ret));
519 		ret = EXIT_FAIL;
520 		goto end_cpu;
521 	}
522 
523 	value.bpf_prog.fd = set_cpumap_prog(skel, redir_interface, redir_map,
524 					    mprog_filename, mprog_name);
525 	if (value.bpf_prog.fd < 0) {
526 		fprintf(stderr, "Failed to set CPUMAP BPF program: %s\n",
527 			strerror(-value.bpf_prog.fd));
528 		usage(argv, long_options, __doc__, mask, true, skel->obj);
529 		ret = EXIT_FAIL_BPF;
530 		goto end_cpu;
531 	}
532 	value.qsize = qsize;
533 
534 	for (i = 0; i < added_cpus; i++) {
535 		if (create_cpu_entry(cpu[i], &value, i, true) < 0) {
536 			fprintf(stderr, "Cannot proceed, exiting\n");
537 			usage(argv, long_options, __doc__, mask, true, skel->obj);
538 			goto end_cpu;
539 		}
540 	}
541 
542 	ret = EXIT_FAIL_XDP;
543 	if (sample_install_xdp(prog, ifindex, generic, force) < 0)
544 		goto end_cpu;
545 
546 	ret = sample_run(interval, stress_mode ? stress_cpumap : NULL, &value);
547 	if (ret < 0) {
548 		fprintf(stderr, "Failed during sample run: %s\n", strerror(-ret));
549 		ret = EXIT_FAIL;
550 		goto end_cpu;
551 	}
552 	ret = EXIT_OK;
553 end_cpu:
554 	free(cpu);
555 end_destroy:
556 	xdp_redirect_cpu__destroy(skel);
557 end:
558 	sample_exit(ret);
559 }
560