1 // SPDX-License-Identifier: GPL-2.0
2 
3 #define _GNU_SOURCE
4 #include <errno.h>
5 #include <fcntl.h>
6 #include <linux/kernel.h>
7 #include <limits.h>
8 #include <stdbool.h>
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <syscall.h>
13 #include <unistd.h>
14 #include <sys/resource.h>
15 
16 #include "../kselftest_harness.h"
17 #include "../clone3/clone3_selftests.h"
18 
19 #ifndef __NR_close_range
20 	#if defined __alpha__
21 		#define __NR_close_range 546
22 	#elif defined _MIPS_SIM
23 		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
24 			#define __NR_close_range (436 + 4000)
25 		#endif
26 		#if _MIPS_SIM == _MIPS_SIM_NABI32	/* n32 */
27 			#define __NR_close_range (436 + 6000)
28 		#endif
29 		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
30 			#define __NR_close_range (436 + 5000)
31 		#endif
32 	#elif defined __ia64__
33 		#define __NR_close_range (436 + 1024)
34 	#else
35 		#define __NR_close_range 436
36 	#endif
37 #endif
38 
39 #ifndef CLOSE_RANGE_UNSHARE
40 #define CLOSE_RANGE_UNSHARE	(1U << 1)
41 #endif
42 
43 #ifndef CLOSE_RANGE_CLOEXEC
44 #define CLOSE_RANGE_CLOEXEC	(1U << 2)
45 #endif
46 
sys_close_range(unsigned int fd,unsigned int max_fd,unsigned int flags)47 static inline int sys_close_range(unsigned int fd, unsigned int max_fd,
48 				  unsigned int flags)
49 {
50 	return syscall(__NR_close_range, fd, max_fd, flags);
51 }
52 
53 #ifndef ARRAY_SIZE
54 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
55 #endif
56 
TEST(core_close_range)57 TEST(core_close_range)
58 {
59 	int i, ret;
60 	int open_fds[101];
61 
62 	for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
63 		int fd;
64 
65 		fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
66 		ASSERT_GE(fd, 0) {
67 			if (errno == ENOENT)
68 				SKIP(return, "Skipping test since /dev/null does not exist");
69 		}
70 
71 		open_fds[i] = fd;
72 	}
73 
74 	EXPECT_EQ(-1, sys_close_range(open_fds[0], open_fds[100], -1)) {
75 		if (errno == ENOSYS)
76 			SKIP(return, "close_range() syscall not supported");
77 	}
78 
79 	EXPECT_EQ(0, sys_close_range(open_fds[0], open_fds[50], 0));
80 
81 	for (i = 0; i <= 50; i++)
82 		EXPECT_EQ(-1, fcntl(open_fds[i], F_GETFL));
83 
84 	for (i = 51; i <= 100; i++)
85 		EXPECT_GT(fcntl(open_fds[i], F_GETFL), -1);
86 
87 	/* create a couple of gaps */
88 	close(57);
89 	close(78);
90 	close(81);
91 	close(82);
92 	close(84);
93 	close(90);
94 
95 	EXPECT_EQ(0, sys_close_range(open_fds[51], open_fds[92], 0));
96 
97 	for (i = 51; i <= 92; i++)
98 		EXPECT_EQ(-1, fcntl(open_fds[i], F_GETFL));
99 
100 	for (i = 93; i <= 100; i++)
101 		EXPECT_GT(fcntl(open_fds[i], F_GETFL), -1);
102 
103 	/* test that the kernel caps and still closes all fds */
104 	EXPECT_EQ(0, sys_close_range(open_fds[93], open_fds[99], 0));
105 
106 	for (i = 93; i <= 99; i++)
107 		EXPECT_EQ(-1, fcntl(open_fds[i], F_GETFL));
108 
109 	EXPECT_GT(fcntl(open_fds[i], F_GETFL), -1);
110 
111 	EXPECT_EQ(0, sys_close_range(open_fds[100], open_fds[100], 0));
112 
113 	EXPECT_EQ(-1, fcntl(open_fds[100], F_GETFL));
114 }
115 
TEST(close_range_unshare)116 TEST(close_range_unshare)
117 {
118 	int i, ret, status;
119 	pid_t pid;
120 	int open_fds[101];
121 	struct __clone_args args = {
122 		.flags = CLONE_FILES,
123 		.exit_signal = SIGCHLD,
124 	};
125 
126 	for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
127 		int fd;
128 
129 		fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
130 		ASSERT_GE(fd, 0) {
131 			if (errno == ENOENT)
132 				SKIP(return, "Skipping test since /dev/null does not exist");
133 		}
134 
135 		open_fds[i] = fd;
136 	}
137 
138 	pid = sys_clone3(&args, sizeof(args));
139 	ASSERT_GE(pid, 0);
140 
141 	if (pid == 0) {
142 		ret = sys_close_range(open_fds[0], open_fds[50],
143 				      CLOSE_RANGE_UNSHARE);
144 		if (ret)
145 			exit(EXIT_FAILURE);
146 
147 		for (i = 0; i <= 50; i++)
148 			if (fcntl(open_fds[i], F_GETFL) != -1)
149 				exit(EXIT_FAILURE);
150 
151 		for (i = 51; i <= 100; i++)
152 			if (fcntl(open_fds[i], F_GETFL) == -1)
153 				exit(EXIT_FAILURE);
154 
155 		/* create a couple of gaps */
156 		close(57);
157 		close(78);
158 		close(81);
159 		close(82);
160 		close(84);
161 		close(90);
162 
163 		ret = sys_close_range(open_fds[51], open_fds[92],
164 				      CLOSE_RANGE_UNSHARE);
165 		if (ret)
166 			exit(EXIT_FAILURE);
167 
168 		for (i = 51; i <= 92; i++)
169 			if (fcntl(open_fds[i], F_GETFL) != -1)
170 				exit(EXIT_FAILURE);
171 
172 		for (i = 93; i <= 100; i++)
173 			if (fcntl(open_fds[i], F_GETFL) == -1)
174 				exit(EXIT_FAILURE);
175 
176 		/* test that the kernel caps and still closes all fds */
177 		ret = sys_close_range(open_fds[93], open_fds[99],
178 				      CLOSE_RANGE_UNSHARE);
179 		if (ret)
180 			exit(EXIT_FAILURE);
181 
182 		for (i = 93; i <= 99; i++)
183 			if (fcntl(open_fds[i], F_GETFL) != -1)
184 				exit(EXIT_FAILURE);
185 
186 		if (fcntl(open_fds[100], F_GETFL) == -1)
187 			exit(EXIT_FAILURE);
188 
189 		ret = sys_close_range(open_fds[100], open_fds[100],
190 				      CLOSE_RANGE_UNSHARE);
191 		if (ret)
192 			exit(EXIT_FAILURE);
193 
194 		if (fcntl(open_fds[100], F_GETFL) != -1)
195 			exit(EXIT_FAILURE);
196 
197 		exit(EXIT_SUCCESS);
198 	}
199 
200 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
201 	EXPECT_EQ(true, WIFEXITED(status));
202 	EXPECT_EQ(0, WEXITSTATUS(status));
203 }
204 
TEST(close_range_unshare_capped)205 TEST(close_range_unshare_capped)
206 {
207 	int i, ret, status;
208 	pid_t pid;
209 	int open_fds[101];
210 	struct __clone_args args = {
211 		.flags = CLONE_FILES,
212 		.exit_signal = SIGCHLD,
213 	};
214 
215 	for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
216 		int fd;
217 
218 		fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
219 		ASSERT_GE(fd, 0) {
220 			if (errno == ENOENT)
221 				SKIP(return, "Skipping test since /dev/null does not exist");
222 		}
223 
224 		open_fds[i] = fd;
225 	}
226 
227 	pid = sys_clone3(&args, sizeof(args));
228 	ASSERT_GE(pid, 0);
229 
230 	if (pid == 0) {
231 		ret = sys_close_range(open_fds[0], UINT_MAX,
232 				      CLOSE_RANGE_UNSHARE);
233 		if (ret)
234 			exit(EXIT_FAILURE);
235 
236 		for (i = 0; i <= 100; i++)
237 			if (fcntl(open_fds[i], F_GETFL) != -1)
238 				exit(EXIT_FAILURE);
239 
240 		exit(EXIT_SUCCESS);
241 	}
242 
243 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
244 	EXPECT_EQ(true, WIFEXITED(status));
245 	EXPECT_EQ(0, WEXITSTATUS(status));
246 }
247 
TEST(close_range_cloexec)248 TEST(close_range_cloexec)
249 {
250 	int i, ret;
251 	int open_fds[101];
252 	struct rlimit rlimit;
253 
254 	for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
255 		int fd;
256 
257 		fd = open("/dev/null", O_RDONLY);
258 		ASSERT_GE(fd, 0) {
259 			if (errno == ENOENT)
260 				SKIP(return, "Skipping test since /dev/null does not exist");
261 		}
262 
263 		open_fds[i] = fd;
264 	}
265 
266 	ret = sys_close_range(1000, 1000, CLOSE_RANGE_CLOEXEC);
267 	if (ret < 0) {
268 		if (errno == ENOSYS)
269 			SKIP(return, "close_range() syscall not supported");
270 		if (errno == EINVAL)
271 			SKIP(return, "close_range() doesn't support CLOSE_RANGE_CLOEXEC");
272 	}
273 
274 	/* Ensure the FD_CLOEXEC bit is set also with a resource limit in place.  */
275 	ASSERT_EQ(0, getrlimit(RLIMIT_NOFILE, &rlimit));
276 	rlimit.rlim_cur = 25;
277 	ASSERT_EQ(0, setrlimit(RLIMIT_NOFILE, &rlimit));
278 
279 	/* Set close-on-exec for two ranges: [0-50] and [75-100].  */
280 	ret = sys_close_range(open_fds[0], open_fds[50], CLOSE_RANGE_CLOEXEC);
281 	ASSERT_EQ(0, ret);
282 	ret = sys_close_range(open_fds[75], open_fds[100], CLOSE_RANGE_CLOEXEC);
283 	ASSERT_EQ(0, ret);
284 
285 	for (i = 0; i <= 50; i++) {
286 		int flags = fcntl(open_fds[i], F_GETFD);
287 
288 		EXPECT_GT(flags, -1);
289 		EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
290 	}
291 
292 	for (i = 51; i <= 74; i++) {
293 		int flags = fcntl(open_fds[i], F_GETFD);
294 
295 		EXPECT_GT(flags, -1);
296 		EXPECT_EQ(flags & FD_CLOEXEC, 0);
297 	}
298 
299 	for (i = 75; i <= 100; i++) {
300 		int flags = fcntl(open_fds[i], F_GETFD);
301 
302 		EXPECT_GT(flags, -1);
303 		EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
304 	}
305 
306 	/* Test a common pattern.  */
307 	ret = sys_close_range(3, UINT_MAX, CLOSE_RANGE_CLOEXEC);
308 	for (i = 0; i <= 100; i++) {
309 		int flags = fcntl(open_fds[i], F_GETFD);
310 
311 		EXPECT_GT(flags, -1);
312 		EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
313 	}
314 }
315 
TEST(close_range_cloexec_unshare)316 TEST(close_range_cloexec_unshare)
317 {
318 	int i, ret;
319 	int open_fds[101];
320 	struct rlimit rlimit;
321 
322 	for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
323 		int fd;
324 
325 		fd = open("/dev/null", O_RDONLY);
326 		ASSERT_GE(fd, 0) {
327 			if (errno == ENOENT)
328 				SKIP(return, "Skipping test since /dev/null does not exist");
329 		}
330 
331 		open_fds[i] = fd;
332 	}
333 
334 	ret = sys_close_range(1000, 1000, CLOSE_RANGE_CLOEXEC);
335 	if (ret < 0) {
336 		if (errno == ENOSYS)
337 			SKIP(return, "close_range() syscall not supported");
338 		if (errno == EINVAL)
339 			SKIP(return, "close_range() doesn't support CLOSE_RANGE_CLOEXEC");
340 	}
341 
342 	/* Ensure the FD_CLOEXEC bit is set also with a resource limit in place.  */
343 	ASSERT_EQ(0, getrlimit(RLIMIT_NOFILE, &rlimit));
344 	rlimit.rlim_cur = 25;
345 	ASSERT_EQ(0, setrlimit(RLIMIT_NOFILE, &rlimit));
346 
347 	/* Set close-on-exec for two ranges: [0-50] and [75-100].  */
348 	ret = sys_close_range(open_fds[0], open_fds[50],
349 			      CLOSE_RANGE_CLOEXEC | CLOSE_RANGE_UNSHARE);
350 	ASSERT_EQ(0, ret);
351 	ret = sys_close_range(open_fds[75], open_fds[100],
352 			      CLOSE_RANGE_CLOEXEC | CLOSE_RANGE_UNSHARE);
353 	ASSERT_EQ(0, ret);
354 
355 	for (i = 0; i <= 50; i++) {
356 		int flags = fcntl(open_fds[i], F_GETFD);
357 
358 		EXPECT_GT(flags, -1);
359 		EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
360 	}
361 
362 	for (i = 51; i <= 74; i++) {
363 		int flags = fcntl(open_fds[i], F_GETFD);
364 
365 		EXPECT_GT(flags, -1);
366 		EXPECT_EQ(flags & FD_CLOEXEC, 0);
367 	}
368 
369 	for (i = 75; i <= 100; i++) {
370 		int flags = fcntl(open_fds[i], F_GETFD);
371 
372 		EXPECT_GT(flags, -1);
373 		EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
374 	}
375 
376 	/* Test a common pattern.  */
377 	ret = sys_close_range(3, UINT_MAX,
378 			      CLOSE_RANGE_CLOEXEC | CLOSE_RANGE_UNSHARE);
379 	for (i = 0; i <= 100; i++) {
380 		int flags = fcntl(open_fds[i], F_GETFD);
381 
382 		EXPECT_GT(flags, -1);
383 		EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
384 	}
385 }
386 
387 /*
388  * Regression test for syzbot+96cfd2b22b3213646a93@syzkaller.appspotmail.com
389  */
TEST(close_range_cloexec_syzbot)390 TEST(close_range_cloexec_syzbot)
391 {
392 	int fd1, fd2, fd3, flags, ret, status;
393 	pid_t pid;
394 	struct __clone_args args = {
395 		.flags = CLONE_FILES,
396 		.exit_signal = SIGCHLD,
397 	};
398 
399 	/* Create a huge gap in the fd table. */
400 	fd1 = open("/dev/null", O_RDWR);
401 	EXPECT_GT(fd1, 0);
402 
403 	fd2 = dup2(fd1, 1000);
404 	EXPECT_GT(fd2, 0);
405 
406 	pid = sys_clone3(&args, sizeof(args));
407 	ASSERT_GE(pid, 0);
408 
409 	if (pid == 0) {
410 		ret = sys_close_range(3, ~0U, CLOSE_RANGE_CLOEXEC);
411 		if (ret)
412 			exit(EXIT_FAILURE);
413 
414 		/*
415 			 * We now have a private file descriptor table and all
416 			 * our open fds should still be open but made
417 			 * close-on-exec.
418 			 */
419 		flags = fcntl(fd1, F_GETFD);
420 		EXPECT_GT(flags, -1);
421 		EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
422 
423 		flags = fcntl(fd2, F_GETFD);
424 		EXPECT_GT(flags, -1);
425 		EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
426 
427 		fd3 = dup2(fd1, 42);
428 		EXPECT_GT(fd3, 0);
429 
430 		/*
431 			 * Duplicating the file descriptor must remove the
432 			 * FD_CLOEXEC flag.
433 			 */
434 		flags = fcntl(fd3, F_GETFD);
435 		EXPECT_GT(flags, -1);
436 		EXPECT_EQ(flags & FD_CLOEXEC, 0);
437 
438 		exit(EXIT_SUCCESS);
439 	}
440 
441 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
442 	EXPECT_EQ(true, WIFEXITED(status));
443 	EXPECT_EQ(0, WEXITSTATUS(status));
444 
445 	/*
446 	 * We had a shared file descriptor table before along with requesting
447 	 * close-on-exec so the original fds must not be close-on-exec.
448 	 */
449 	flags = fcntl(fd1, F_GETFD);
450 	EXPECT_GT(flags, -1);
451 	EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
452 
453 	flags = fcntl(fd2, F_GETFD);
454 	EXPECT_GT(flags, -1);
455 	EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
456 
457 	fd3 = dup2(fd1, 42);
458 	EXPECT_GT(fd3, 0);
459 
460 	flags = fcntl(fd3, F_GETFD);
461 	EXPECT_GT(flags, -1);
462 	EXPECT_EQ(flags & FD_CLOEXEC, 0);
463 
464 	EXPECT_EQ(close(fd1), 0);
465 	EXPECT_EQ(close(fd2), 0);
466 	EXPECT_EQ(close(fd3), 0);
467 }
468 
469 /*
470  * Regression test for syzbot+96cfd2b22b3213646a93@syzkaller.appspotmail.com
471  */
TEST(close_range_cloexec_unshare_syzbot)472 TEST(close_range_cloexec_unshare_syzbot)
473 {
474 	int i, fd1, fd2, fd3, flags, ret, status;
475 	pid_t pid;
476 	struct __clone_args args = {
477 		.flags = CLONE_FILES,
478 		.exit_signal = SIGCHLD,
479 	};
480 
481 	/*
482 	 * Create a huge gap in the fd table. When we now call
483 	 * CLOSE_RANGE_UNSHARE with a shared fd table and and with ~0U as upper
484 	 * bound the kernel will only copy up to fd1 file descriptors into the
485 	 * new fd table. If the kernel is buggy and doesn't handle
486 	 * CLOSE_RANGE_CLOEXEC correctly it will not have copied all file
487 	 * descriptors and we will oops!
488 	 *
489 	 * On a buggy kernel this should immediately oops. But let's loop just
490 	 * to be sure.
491 	 */
492 	fd1 = open("/dev/null", O_RDWR);
493 	EXPECT_GT(fd1, 0);
494 
495 	fd2 = dup2(fd1, 1000);
496 	EXPECT_GT(fd2, 0);
497 
498 	for (i = 0; i < 100; i++) {
499 
500 		pid = sys_clone3(&args, sizeof(args));
501 		ASSERT_GE(pid, 0);
502 
503 		if (pid == 0) {
504 			ret = sys_close_range(3, ~0U, CLOSE_RANGE_UNSHARE |
505 						      CLOSE_RANGE_CLOEXEC);
506 			if (ret)
507 				exit(EXIT_FAILURE);
508 
509 			/*
510 			 * We now have a private file descriptor table and all
511 			 * our open fds should still be open but made
512 			 * close-on-exec.
513 			 */
514 			flags = fcntl(fd1, F_GETFD);
515 			EXPECT_GT(flags, -1);
516 			EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
517 
518 			flags = fcntl(fd2, F_GETFD);
519 			EXPECT_GT(flags, -1);
520 			EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
521 
522 			fd3 = dup2(fd1, 42);
523 			EXPECT_GT(fd3, 0);
524 
525 			/*
526 			 * Duplicating the file descriptor must remove the
527 			 * FD_CLOEXEC flag.
528 			 */
529 			flags = fcntl(fd3, F_GETFD);
530 			EXPECT_GT(flags, -1);
531 			EXPECT_EQ(flags & FD_CLOEXEC, 0);
532 
533 			EXPECT_EQ(close(fd1), 0);
534 			EXPECT_EQ(close(fd2), 0);
535 			EXPECT_EQ(close(fd3), 0);
536 
537 			exit(EXIT_SUCCESS);
538 		}
539 
540 		EXPECT_EQ(waitpid(pid, &status, 0), pid);
541 		EXPECT_EQ(true, WIFEXITED(status));
542 		EXPECT_EQ(0, WEXITSTATUS(status));
543 	}
544 
545 	/*
546 	 * We created a private file descriptor table before along with
547 	 * requesting close-on-exec so the original fds must not be
548 	 * close-on-exec.
549 	 */
550 	flags = fcntl(fd1, F_GETFD);
551 	EXPECT_GT(flags, -1);
552 	EXPECT_EQ(flags & FD_CLOEXEC, 0);
553 
554 	flags = fcntl(fd2, F_GETFD);
555 	EXPECT_GT(flags, -1);
556 	EXPECT_EQ(flags & FD_CLOEXEC, 0);
557 
558 	fd3 = dup2(fd1, 42);
559 	EXPECT_GT(fd3, 0);
560 
561 	flags = fcntl(fd3, F_GETFD);
562 	EXPECT_GT(flags, -1);
563 	EXPECT_EQ(flags & FD_CLOEXEC, 0);
564 
565 	EXPECT_EQ(close(fd1), 0);
566 	EXPECT_EQ(close(fd2), 0);
567 	EXPECT_EQ(close(fd3), 0);
568 }
569 
570 TEST_HARNESS_MAIN
571