1  // SPDX-License-Identifier: GPL-2.0
2  #define _GNU_SOURCE
3  #include <sched.h>
4  #include <sys/mount.h>
5  #include <sys/stat.h>
6  #include <sys/types.h>
7  #include <linux/limits.h>
8  #include <stdio.h>
9  #include <stdlib.h>
10  #include <linux/sched.h>
11  #include <fcntl.h>
12  #include <unistd.h>
13  #include <ftw.h>
14  
15  #include "cgroup_helpers.h"
16  #include "bpf_util.h"
17  
18  /*
19   * To avoid relying on the system setup, when setup_cgroup_env is called
20   * we create a new mount namespace, and cgroup namespace. The cgroupv2
21   * root is mounted at CGROUP_MOUNT_PATH. Unfortunately, most people don't
22   * have cgroupv2 enabled at this point in time. It's easier to create our
23   * own mount namespace and manage it ourselves. We assume /mnt exists.
24   *
25   * Related cgroupv1 helpers are named *classid*(), since we only use the
26   * net_cls controller for tagging net_cls.classid. We assume the default
27   * mount under /sys/fs/cgroup/net_cls, which should be the case for the
28   * vast majority of users.
29   */
30  
31  #define WALK_FD_LIMIT			16
32  
33  #define CGROUP_MOUNT_PATH		"/mnt"
34  #define CGROUP_MOUNT_DFLT		"/sys/fs/cgroup"
35  #define NETCLS_MOUNT_PATH		CGROUP_MOUNT_DFLT "/net_cls"
36  #define CGROUP_WORK_DIR			"/cgroup-test-work-dir"
37  
38  #define format_cgroup_path_pid(buf, path, pid) \
39  	snprintf(buf, sizeof(buf), "%s%s%d%s", CGROUP_MOUNT_PATH, \
40  	CGROUP_WORK_DIR, pid, path)
41  
42  #define format_cgroup_path(buf, path) \
43  	format_cgroup_path_pid(buf, path, getpid())
44  
45  #define format_parent_cgroup_path(buf, path) \
46  	format_cgroup_path_pid(buf, path, getppid())
47  
48  #define format_classid_path(buf)				\
49  	snprintf(buf, sizeof(buf), "%s%s", NETCLS_MOUNT_PATH,	\
50  		 CGROUP_WORK_DIR)
51  
__enable_controllers(const char * cgroup_path,const char * controllers)52  static int __enable_controllers(const char *cgroup_path, const char *controllers)
53  {
54  	char path[PATH_MAX + 1];
55  	char enable[PATH_MAX + 1];
56  	char *c, *c2;
57  	int fd, cfd;
58  	ssize_t len;
59  
60  	/* If not controllers are passed, enable all available controllers */
61  	if (!controllers) {
62  		snprintf(path, sizeof(path), "%s/cgroup.controllers",
63  			 cgroup_path);
64  		fd = open(path, O_RDONLY);
65  		if (fd < 0) {
66  			log_err("Opening cgroup.controllers: %s", path);
67  			return 1;
68  		}
69  		len = read(fd, enable, sizeof(enable) - 1);
70  		if (len < 0) {
71  			close(fd);
72  			log_err("Reading cgroup.controllers: %s", path);
73  			return 1;
74  		} else if (len == 0) { /* No controllers to enable */
75  			close(fd);
76  			return 0;
77  		}
78  		enable[len] = 0;
79  		close(fd);
80  	} else {
81  		bpf_strlcpy(enable, controllers, sizeof(enable));
82  	}
83  
84  	snprintf(path, sizeof(path), "%s/cgroup.subtree_control", cgroup_path);
85  	cfd = open(path, O_RDWR);
86  	if (cfd < 0) {
87  		log_err("Opening cgroup.subtree_control: %s", path);
88  		return 1;
89  	}
90  
91  	for (c = strtok_r(enable, " ", &c2); c; c = strtok_r(NULL, " ", &c2)) {
92  		if (dprintf(cfd, "+%s\n", c) <= 0) {
93  			log_err("Enabling controller %s: %s", c, path);
94  			close(cfd);
95  			return 1;
96  		}
97  	}
98  	close(cfd);
99  	return 0;
100  }
101  
102  /**
103   * enable_controllers() - Enable cgroup v2 controllers
104   * @relative_path: The cgroup path, relative to the workdir
105   * @controllers: List of controllers to enable in cgroup.controllers format
106   *
107   *
108   * Enable given cgroup v2 controllers, if @controllers is NULL, enable all
109   * available controllers.
110   *
111   * If successful, 0 is returned.
112   */
enable_controllers(const char * relative_path,const char * controllers)113  int enable_controllers(const char *relative_path, const char *controllers)
114  {
115  	char cgroup_path[PATH_MAX + 1];
116  
117  	format_cgroup_path(cgroup_path, relative_path);
118  	return __enable_controllers(cgroup_path, controllers);
119  }
120  
__write_cgroup_file(const char * cgroup_path,const char * file,const char * buf)121  static int __write_cgroup_file(const char *cgroup_path, const char *file,
122  			       const char *buf)
123  {
124  	char file_path[PATH_MAX + 1];
125  	int fd;
126  
127  	snprintf(file_path, sizeof(file_path), "%s/%s", cgroup_path, file);
128  	fd = open(file_path, O_RDWR);
129  	if (fd < 0) {
130  		log_err("Opening %s", file_path);
131  		return 1;
132  	}
133  
134  	if (dprintf(fd, "%s", buf) <= 0) {
135  		log_err("Writing to %s", file_path);
136  		close(fd);
137  		return 1;
138  	}
139  	close(fd);
140  	return 0;
141  }
142  
143  /**
144   * write_cgroup_file() - Write to a cgroup file
145   * @relative_path: The cgroup path, relative to the workdir
146   * @file: The name of the file in cgroupfs to write to
147   * @buf: Buffer to write to the file
148   *
149   * Write to a file in the given cgroup's directory.
150   *
151   * If successful, 0 is returned.
152   */
write_cgroup_file(const char * relative_path,const char * file,const char * buf)153  int write_cgroup_file(const char *relative_path, const char *file,
154  		      const char *buf)
155  {
156  	char cgroup_path[PATH_MAX - 24];
157  
158  	format_cgroup_path(cgroup_path, relative_path);
159  	return __write_cgroup_file(cgroup_path, file, buf);
160  }
161  
162  /**
163   * write_cgroup_file_parent() - Write to a cgroup file in the parent process
164   *                              workdir
165   * @relative_path: The cgroup path, relative to the parent process workdir
166   * @file: The name of the file in cgroupfs to write to
167   * @buf: Buffer to write to the file
168   *
169   * Write to a file in the given cgroup's directory under the parent process
170   * workdir.
171   *
172   * If successful, 0 is returned.
173   */
write_cgroup_file_parent(const char * relative_path,const char * file,const char * buf)174  int write_cgroup_file_parent(const char *relative_path, const char *file,
175  			     const char *buf)
176  {
177  	char cgroup_path[PATH_MAX - 24];
178  
179  	format_parent_cgroup_path(cgroup_path, relative_path);
180  	return __write_cgroup_file(cgroup_path, file, buf);
181  }
182  
183  /**
184   * setup_cgroup_environment() - Setup the cgroup environment
185   *
186   * After calling this function, cleanup_cgroup_environment should be called
187   * once testing is complete.
188   *
189   * This function will print an error to stderr and return 1 if it is unable
190   * to setup the cgroup environment. If setup is successful, 0 is returned.
191   */
setup_cgroup_environment(void)192  int setup_cgroup_environment(void)
193  {
194  	char cgroup_workdir[PATH_MAX - 24];
195  
196  	format_cgroup_path(cgroup_workdir, "");
197  
198  	if (unshare(CLONE_NEWNS)) {
199  		log_err("unshare");
200  		return 1;
201  	}
202  
203  	if (mount("none", "/", NULL, MS_REC | MS_PRIVATE, NULL)) {
204  		log_err("mount fakeroot");
205  		return 1;
206  	}
207  
208  	if (mount("none", CGROUP_MOUNT_PATH, "cgroup2", 0, NULL) && errno != EBUSY) {
209  		log_err("mount cgroup2");
210  		return 1;
211  	}
212  
213  	/* Cleanup existing failed runs, now that the environment is setup */
214  	cleanup_cgroup_environment();
215  
216  	if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) {
217  		log_err("mkdir cgroup work dir");
218  		return 1;
219  	}
220  
221  	/* Enable all available controllers to increase test coverage */
222  	if (__enable_controllers(CGROUP_MOUNT_PATH, NULL) ||
223  	    __enable_controllers(cgroup_workdir, NULL))
224  		return 1;
225  
226  	return 0;
227  }
228  
nftwfunc(const char * filename,const struct stat * statptr,int fileflags,struct FTW * pfwt)229  static int nftwfunc(const char *filename, const struct stat *statptr,
230  		    int fileflags, struct FTW *pfwt)
231  {
232  	if ((fileflags & FTW_D) && rmdir(filename))
233  		log_err("Removing cgroup: %s", filename);
234  	return 0;
235  }
236  
join_cgroup_from_top(const char * cgroup_path)237  static int join_cgroup_from_top(const char *cgroup_path)
238  {
239  	char cgroup_procs_path[PATH_MAX + 1];
240  	pid_t pid = getpid();
241  	int fd, rc = 0;
242  
243  	snprintf(cgroup_procs_path, sizeof(cgroup_procs_path),
244  		 "%s/cgroup.procs", cgroup_path);
245  
246  	fd = open(cgroup_procs_path, O_WRONLY);
247  	if (fd < 0) {
248  		log_err("Opening Cgroup Procs: %s", cgroup_procs_path);
249  		return 1;
250  	}
251  
252  	if (dprintf(fd, "%d\n", pid) < 0) {
253  		log_err("Joining Cgroup");
254  		rc = 1;
255  	}
256  
257  	close(fd);
258  	return rc;
259  }
260  
261  /**
262   * join_cgroup() - Join a cgroup
263   * @relative_path: The cgroup path, relative to the workdir, to join
264   *
265   * This function expects a cgroup to already be created, relative to the cgroup
266   * work dir, and it joins it. For example, passing "/my-cgroup" as the path
267   * would actually put the calling process into the cgroup
268   * "/cgroup-test-work-dir/my-cgroup"
269   *
270   * On success, it returns 0, otherwise on failure it returns 1.
271   */
join_cgroup(const char * relative_path)272  int join_cgroup(const char *relative_path)
273  {
274  	char cgroup_path[PATH_MAX + 1];
275  
276  	format_cgroup_path(cgroup_path, relative_path);
277  	return join_cgroup_from_top(cgroup_path);
278  }
279  
280  /**
281   * join_parent_cgroup() - Join a cgroup in the parent process workdir
282   * @relative_path: The cgroup path, relative to parent process workdir, to join
283   *
284   * See join_cgroup().
285   *
286   * On success, it returns 0, otherwise on failure it returns 1.
287   */
join_parent_cgroup(const char * relative_path)288  int join_parent_cgroup(const char *relative_path)
289  {
290  	char cgroup_path[PATH_MAX + 1];
291  
292  	format_parent_cgroup_path(cgroup_path, relative_path);
293  	return join_cgroup_from_top(cgroup_path);
294  }
295  
296  /**
297   * cleanup_cgroup_environment() - Cleanup Cgroup Testing Environment
298   *
299   * This is an idempotent function to delete all temporary cgroups that
300   * have been created during the test, including the cgroup testing work
301   * directory.
302   *
303   * At call time, it moves the calling process to the root cgroup, and then
304   * runs the deletion process. It is idempotent, and should not fail, unless
305   * a process is lingering.
306   *
307   * On failure, it will print an error to stderr, and try to continue.
308   */
cleanup_cgroup_environment(void)309  void cleanup_cgroup_environment(void)
310  {
311  	char cgroup_workdir[PATH_MAX + 1];
312  
313  	format_cgroup_path(cgroup_workdir, "");
314  	join_cgroup_from_top(CGROUP_MOUNT_PATH);
315  	nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT);
316  }
317  
318  /**
319   * get_root_cgroup() - Get the FD of the root cgroup
320   *
321   * On success, it returns the file descriptor. On failure, it returns -1.
322   * If there is a failure, it prints the error to stderr.
323   */
get_root_cgroup(void)324  int get_root_cgroup(void)
325  {
326  	int fd;
327  
328  	fd = open(CGROUP_MOUNT_PATH, O_RDONLY);
329  	if (fd < 0) {
330  		log_err("Opening root cgroup");
331  		return -1;
332  	}
333  	return fd;
334  }
335  
336  /*
337   * remove_cgroup() - Remove a cgroup
338   * @relative_path: The cgroup path, relative to the workdir, to remove
339   *
340   * This function expects a cgroup to already be created, relative to the cgroup
341   * work dir. It also expects the cgroup doesn't have any children or live
342   * processes and it removes the cgroup.
343   *
344   * On failure, it will print an error to stderr.
345   */
remove_cgroup(const char * relative_path)346  void remove_cgroup(const char *relative_path)
347  {
348  	char cgroup_path[PATH_MAX + 1];
349  
350  	format_cgroup_path(cgroup_path, relative_path);
351  	if (rmdir(cgroup_path))
352  		log_err("rmdiring cgroup %s .. %s", relative_path, cgroup_path);
353  }
354  
355  /**
356   * create_and_get_cgroup() - Create a cgroup, relative to workdir, and get the FD
357   * @relative_path: The cgroup path, relative to the workdir, to join
358   *
359   * This function creates a cgroup under the top level workdir and returns the
360   * file descriptor. It is idempotent.
361   *
362   * On success, it returns the file descriptor. On failure it returns -1.
363   * If there is a failure, it prints the error to stderr.
364   */
create_and_get_cgroup(const char * relative_path)365  int create_and_get_cgroup(const char *relative_path)
366  {
367  	char cgroup_path[PATH_MAX + 1];
368  	int fd;
369  
370  	format_cgroup_path(cgroup_path, relative_path);
371  	if (mkdir(cgroup_path, 0777) && errno != EEXIST) {
372  		log_err("mkdiring cgroup %s .. %s", relative_path, cgroup_path);
373  		return -1;
374  	}
375  
376  	fd = open(cgroup_path, O_RDONLY);
377  	if (fd < 0) {
378  		log_err("Opening Cgroup");
379  		return -1;
380  	}
381  
382  	return fd;
383  }
384  
385  /**
386   * get_cgroup_id() - Get cgroup id for a particular cgroup path
387   * @relative_path: The cgroup path, relative to the workdir, to join
388   *
389   * On success, it returns the cgroup id. On failure it returns 0,
390   * which is an invalid cgroup id.
391   * If there is a failure, it prints the error to stderr.
392   */
get_cgroup_id(const char * relative_path)393  unsigned long long get_cgroup_id(const char *relative_path)
394  {
395  	int dirfd, err, flags, mount_id, fhsize;
396  	union {
397  		unsigned long long cgid;
398  		unsigned char raw_bytes[8];
399  	} id;
400  	char cgroup_workdir[PATH_MAX + 1];
401  	struct file_handle *fhp, *fhp2;
402  	unsigned long long ret = 0;
403  
404  	format_cgroup_path(cgroup_workdir, relative_path);
405  
406  	dirfd = AT_FDCWD;
407  	flags = 0;
408  	fhsize = sizeof(*fhp);
409  	fhp = calloc(1, fhsize);
410  	if (!fhp) {
411  		log_err("calloc");
412  		return 0;
413  	}
414  	err = name_to_handle_at(dirfd, cgroup_workdir, fhp, &mount_id, flags);
415  	if (err >= 0 || fhp->handle_bytes != 8) {
416  		log_err("name_to_handle_at");
417  		goto free_mem;
418  	}
419  
420  	fhsize = sizeof(struct file_handle) + fhp->handle_bytes;
421  	fhp2 = realloc(fhp, fhsize);
422  	if (!fhp2) {
423  		log_err("realloc");
424  		goto free_mem;
425  	}
426  	err = name_to_handle_at(dirfd, cgroup_workdir, fhp2, &mount_id, flags);
427  	fhp = fhp2;
428  	if (err < 0) {
429  		log_err("name_to_handle_at");
430  		goto free_mem;
431  	}
432  
433  	memcpy(id.raw_bytes, fhp->f_handle, 8);
434  	ret = id.cgid;
435  
436  free_mem:
437  	free(fhp);
438  	return ret;
439  }
440  
cgroup_setup_and_join(const char * path)441  int cgroup_setup_and_join(const char *path) {
442  	int cg_fd;
443  
444  	if (setup_cgroup_environment()) {
445  		fprintf(stderr, "Failed to setup cgroup environment\n");
446  		return -EINVAL;
447  	}
448  
449  	cg_fd = create_and_get_cgroup(path);
450  	if (cg_fd < 0) {
451  		fprintf(stderr, "Failed to create test cgroup\n");
452  		cleanup_cgroup_environment();
453  		return cg_fd;
454  	}
455  
456  	if (join_cgroup(path)) {
457  		fprintf(stderr, "Failed to join cgroup\n");
458  		cleanup_cgroup_environment();
459  		return -EINVAL;
460  	}
461  	return cg_fd;
462  }
463  
464  /**
465   * setup_classid_environment() - Setup the cgroupv1 net_cls environment
466   *
467   * After calling this function, cleanup_classid_environment should be called
468   * once testing is complete.
469   *
470   * This function will print an error to stderr and return 1 if it is unable
471   * to setup the cgroup environment. If setup is successful, 0 is returned.
472   */
setup_classid_environment(void)473  int setup_classid_environment(void)
474  {
475  	char cgroup_workdir[PATH_MAX + 1];
476  
477  	format_classid_path(cgroup_workdir);
478  
479  	if (mount("tmpfs", CGROUP_MOUNT_DFLT, "tmpfs", 0, NULL) &&
480  	    errno != EBUSY) {
481  		log_err("mount cgroup base");
482  		return 1;
483  	}
484  
485  	if (mkdir(NETCLS_MOUNT_PATH, 0777) && errno != EEXIST) {
486  		log_err("mkdir cgroup net_cls");
487  		return 1;
488  	}
489  
490  	if (mount("net_cls", NETCLS_MOUNT_PATH, "cgroup", 0, "net_cls") &&
491  	    errno != EBUSY) {
492  		log_err("mount cgroup net_cls");
493  		return 1;
494  	}
495  
496  	cleanup_classid_environment();
497  
498  	if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) {
499  		log_err("mkdir cgroup work dir");
500  		return 1;
501  	}
502  
503  	return 0;
504  }
505  
506  /**
507   * set_classid() - Set a cgroupv1 net_cls classid
508   * @id: the numeric classid
509   *
510   * Writes the passed classid into the cgroup work dir's net_cls.classid
511   * file in order to later on trigger socket tagging.
512   *
513   * On success, it returns 0, otherwise on failure it returns 1. If there
514   * is a failure, it prints the error to stderr.
515   */
set_classid(unsigned int id)516  int set_classid(unsigned int id)
517  {
518  	char cgroup_workdir[PATH_MAX - 42];
519  	char cgroup_classid_path[PATH_MAX + 1];
520  	int fd, rc = 0;
521  
522  	format_classid_path(cgroup_workdir);
523  	snprintf(cgroup_classid_path, sizeof(cgroup_classid_path),
524  		 "%s/net_cls.classid", cgroup_workdir);
525  
526  	fd = open(cgroup_classid_path, O_WRONLY);
527  	if (fd < 0) {
528  		log_err("Opening cgroup classid: %s", cgroup_classid_path);
529  		return 1;
530  	}
531  
532  	if (dprintf(fd, "%u\n", id) < 0) {
533  		log_err("Setting cgroup classid");
534  		rc = 1;
535  	}
536  
537  	close(fd);
538  	return rc;
539  }
540  
541  /**
542   * join_classid() - Join a cgroupv1 net_cls classid
543   *
544   * This function expects the cgroup work dir to be already created, as we
545   * join it here. This causes the process sockets to be tagged with the given
546   * net_cls classid.
547   *
548   * On success, it returns 0, otherwise on failure it returns 1.
549   */
join_classid(void)550  int join_classid(void)
551  {
552  	char cgroup_workdir[PATH_MAX + 1];
553  
554  	format_classid_path(cgroup_workdir);
555  	return join_cgroup_from_top(cgroup_workdir);
556  }
557  
558  /**
559   * cleanup_classid_environment() - Cleanup the cgroupv1 net_cls environment
560   *
561   * At call time, it moves the calling process to the root cgroup, and then
562   * runs the deletion process.
563   *
564   * On failure, it will print an error to stderr, and try to continue.
565   */
cleanup_classid_environment(void)566  void cleanup_classid_environment(void)
567  {
568  	char cgroup_workdir[PATH_MAX + 1];
569  
570  	format_classid_path(cgroup_workdir);
571  	join_cgroup_from_top(NETCLS_MOUNT_PATH);
572  	nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT);
573  }
574