1 // SPDX-License-Identifier: GPL-2.0
2
3 #define _GNU_SOURCE
4 #include <errno.h>
5 #include <fcntl.h>
6 #include <linux/kernel.h>
7 #include <limits.h>
8 #include <stdbool.h>
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <syscall.h>
13 #include <unistd.h>
14 #include <sys/resource.h>
15
16 #include "../kselftest_harness.h"
17 #include "../clone3/clone3_selftests.h"
18
19 #ifndef __NR_close_range
20 #if defined __alpha__
21 #define __NR_close_range 546
22 #elif defined _MIPS_SIM
23 #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */
24 #define __NR_close_range (436 + 4000)
25 #endif
26 #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */
27 #define __NR_close_range (436 + 6000)
28 #endif
29 #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */
30 #define __NR_close_range (436 + 5000)
31 #endif
32 #elif defined __ia64__
33 #define __NR_close_range (436 + 1024)
34 #else
35 #define __NR_close_range 436
36 #endif
37 #endif
38
39 #ifndef CLOSE_RANGE_UNSHARE
40 #define CLOSE_RANGE_UNSHARE (1U << 1)
41 #endif
42
43 #ifndef CLOSE_RANGE_CLOEXEC
44 #define CLOSE_RANGE_CLOEXEC (1U << 2)
45 #endif
46
sys_close_range(unsigned int fd,unsigned int max_fd,unsigned int flags)47 static inline int sys_close_range(unsigned int fd, unsigned int max_fd,
48 unsigned int flags)
49 {
50 return syscall(__NR_close_range, fd, max_fd, flags);
51 }
52
53 #ifndef ARRAY_SIZE
54 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
55 #endif
56
TEST(core_close_range)57 TEST(core_close_range)
58 {
59 int i, ret;
60 int open_fds[101];
61
62 for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
63 int fd;
64
65 fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
66 ASSERT_GE(fd, 0) {
67 if (errno == ENOENT)
68 SKIP(return, "Skipping test since /dev/null does not exist");
69 }
70
71 open_fds[i] = fd;
72 }
73
74 EXPECT_EQ(-1, sys_close_range(open_fds[0], open_fds[100], -1)) {
75 if (errno == ENOSYS)
76 SKIP(return, "close_range() syscall not supported");
77 }
78
79 EXPECT_EQ(0, sys_close_range(open_fds[0], open_fds[50], 0));
80
81 for (i = 0; i <= 50; i++)
82 EXPECT_EQ(-1, fcntl(open_fds[i], F_GETFL));
83
84 for (i = 51; i <= 100; i++)
85 EXPECT_GT(fcntl(open_fds[i], F_GETFL), -1);
86
87 /* create a couple of gaps */
88 close(57);
89 close(78);
90 close(81);
91 close(82);
92 close(84);
93 close(90);
94
95 EXPECT_EQ(0, sys_close_range(open_fds[51], open_fds[92], 0));
96
97 for (i = 51; i <= 92; i++)
98 EXPECT_EQ(-1, fcntl(open_fds[i], F_GETFL));
99
100 for (i = 93; i <= 100; i++)
101 EXPECT_GT(fcntl(open_fds[i], F_GETFL), -1);
102
103 /* test that the kernel caps and still closes all fds */
104 EXPECT_EQ(0, sys_close_range(open_fds[93], open_fds[99], 0));
105
106 for (i = 93; i <= 99; i++)
107 EXPECT_EQ(-1, fcntl(open_fds[i], F_GETFL));
108
109 EXPECT_GT(fcntl(open_fds[i], F_GETFL), -1);
110
111 EXPECT_EQ(0, sys_close_range(open_fds[100], open_fds[100], 0));
112
113 EXPECT_EQ(-1, fcntl(open_fds[100], F_GETFL));
114 }
115
TEST(close_range_unshare)116 TEST(close_range_unshare)
117 {
118 int i, ret, status;
119 pid_t pid;
120 int open_fds[101];
121 struct __clone_args args = {
122 .flags = CLONE_FILES,
123 .exit_signal = SIGCHLD,
124 };
125
126 for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
127 int fd;
128
129 fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
130 ASSERT_GE(fd, 0) {
131 if (errno == ENOENT)
132 SKIP(return, "Skipping test since /dev/null does not exist");
133 }
134
135 open_fds[i] = fd;
136 }
137
138 pid = sys_clone3(&args, sizeof(args));
139 ASSERT_GE(pid, 0);
140
141 if (pid == 0) {
142 ret = sys_close_range(open_fds[0], open_fds[50],
143 CLOSE_RANGE_UNSHARE);
144 if (ret)
145 exit(EXIT_FAILURE);
146
147 for (i = 0; i <= 50; i++)
148 if (fcntl(open_fds[i], F_GETFL) != -1)
149 exit(EXIT_FAILURE);
150
151 for (i = 51; i <= 100; i++)
152 if (fcntl(open_fds[i], F_GETFL) == -1)
153 exit(EXIT_FAILURE);
154
155 /* create a couple of gaps */
156 close(57);
157 close(78);
158 close(81);
159 close(82);
160 close(84);
161 close(90);
162
163 ret = sys_close_range(open_fds[51], open_fds[92],
164 CLOSE_RANGE_UNSHARE);
165 if (ret)
166 exit(EXIT_FAILURE);
167
168 for (i = 51; i <= 92; i++)
169 if (fcntl(open_fds[i], F_GETFL) != -1)
170 exit(EXIT_FAILURE);
171
172 for (i = 93; i <= 100; i++)
173 if (fcntl(open_fds[i], F_GETFL) == -1)
174 exit(EXIT_FAILURE);
175
176 /* test that the kernel caps and still closes all fds */
177 ret = sys_close_range(open_fds[93], open_fds[99],
178 CLOSE_RANGE_UNSHARE);
179 if (ret)
180 exit(EXIT_FAILURE);
181
182 for (i = 93; i <= 99; i++)
183 if (fcntl(open_fds[i], F_GETFL) != -1)
184 exit(EXIT_FAILURE);
185
186 if (fcntl(open_fds[100], F_GETFL) == -1)
187 exit(EXIT_FAILURE);
188
189 ret = sys_close_range(open_fds[100], open_fds[100],
190 CLOSE_RANGE_UNSHARE);
191 if (ret)
192 exit(EXIT_FAILURE);
193
194 if (fcntl(open_fds[100], F_GETFL) != -1)
195 exit(EXIT_FAILURE);
196
197 exit(EXIT_SUCCESS);
198 }
199
200 EXPECT_EQ(waitpid(pid, &status, 0), pid);
201 EXPECT_EQ(true, WIFEXITED(status));
202 EXPECT_EQ(0, WEXITSTATUS(status));
203 }
204
TEST(close_range_unshare_capped)205 TEST(close_range_unshare_capped)
206 {
207 int i, ret, status;
208 pid_t pid;
209 int open_fds[101];
210 struct __clone_args args = {
211 .flags = CLONE_FILES,
212 .exit_signal = SIGCHLD,
213 };
214
215 for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
216 int fd;
217
218 fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
219 ASSERT_GE(fd, 0) {
220 if (errno == ENOENT)
221 SKIP(return, "Skipping test since /dev/null does not exist");
222 }
223
224 open_fds[i] = fd;
225 }
226
227 pid = sys_clone3(&args, sizeof(args));
228 ASSERT_GE(pid, 0);
229
230 if (pid == 0) {
231 ret = sys_close_range(open_fds[0], UINT_MAX,
232 CLOSE_RANGE_UNSHARE);
233 if (ret)
234 exit(EXIT_FAILURE);
235
236 for (i = 0; i <= 100; i++)
237 if (fcntl(open_fds[i], F_GETFL) != -1)
238 exit(EXIT_FAILURE);
239
240 exit(EXIT_SUCCESS);
241 }
242
243 EXPECT_EQ(waitpid(pid, &status, 0), pid);
244 EXPECT_EQ(true, WIFEXITED(status));
245 EXPECT_EQ(0, WEXITSTATUS(status));
246 }
247
TEST(close_range_cloexec)248 TEST(close_range_cloexec)
249 {
250 int i, ret;
251 int open_fds[101];
252 struct rlimit rlimit;
253
254 for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
255 int fd;
256
257 fd = open("/dev/null", O_RDONLY);
258 ASSERT_GE(fd, 0) {
259 if (errno == ENOENT)
260 SKIP(return, "Skipping test since /dev/null does not exist");
261 }
262
263 open_fds[i] = fd;
264 }
265
266 ret = sys_close_range(1000, 1000, CLOSE_RANGE_CLOEXEC);
267 if (ret < 0) {
268 if (errno == ENOSYS)
269 SKIP(return, "close_range() syscall not supported");
270 if (errno == EINVAL)
271 SKIP(return, "close_range() doesn't support CLOSE_RANGE_CLOEXEC");
272 }
273
274 /* Ensure the FD_CLOEXEC bit is set also with a resource limit in place. */
275 ASSERT_EQ(0, getrlimit(RLIMIT_NOFILE, &rlimit));
276 rlimit.rlim_cur = 25;
277 ASSERT_EQ(0, setrlimit(RLIMIT_NOFILE, &rlimit));
278
279 /* Set close-on-exec for two ranges: [0-50] and [75-100]. */
280 ret = sys_close_range(open_fds[0], open_fds[50], CLOSE_RANGE_CLOEXEC);
281 ASSERT_EQ(0, ret);
282 ret = sys_close_range(open_fds[75], open_fds[100], CLOSE_RANGE_CLOEXEC);
283 ASSERT_EQ(0, ret);
284
285 for (i = 0; i <= 50; i++) {
286 int flags = fcntl(open_fds[i], F_GETFD);
287
288 EXPECT_GT(flags, -1);
289 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
290 }
291
292 for (i = 51; i <= 74; i++) {
293 int flags = fcntl(open_fds[i], F_GETFD);
294
295 EXPECT_GT(flags, -1);
296 EXPECT_EQ(flags & FD_CLOEXEC, 0);
297 }
298
299 for (i = 75; i <= 100; i++) {
300 int flags = fcntl(open_fds[i], F_GETFD);
301
302 EXPECT_GT(flags, -1);
303 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
304 }
305
306 /* Test a common pattern. */
307 ret = sys_close_range(3, UINT_MAX, CLOSE_RANGE_CLOEXEC);
308 for (i = 0; i <= 100; i++) {
309 int flags = fcntl(open_fds[i], F_GETFD);
310
311 EXPECT_GT(flags, -1);
312 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
313 }
314 }
315
TEST(close_range_cloexec_unshare)316 TEST(close_range_cloexec_unshare)
317 {
318 int i, ret;
319 int open_fds[101];
320 struct rlimit rlimit;
321
322 for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
323 int fd;
324
325 fd = open("/dev/null", O_RDONLY);
326 ASSERT_GE(fd, 0) {
327 if (errno == ENOENT)
328 SKIP(return, "Skipping test since /dev/null does not exist");
329 }
330
331 open_fds[i] = fd;
332 }
333
334 ret = sys_close_range(1000, 1000, CLOSE_RANGE_CLOEXEC);
335 if (ret < 0) {
336 if (errno == ENOSYS)
337 SKIP(return, "close_range() syscall not supported");
338 if (errno == EINVAL)
339 SKIP(return, "close_range() doesn't support CLOSE_RANGE_CLOEXEC");
340 }
341
342 /* Ensure the FD_CLOEXEC bit is set also with a resource limit in place. */
343 ASSERT_EQ(0, getrlimit(RLIMIT_NOFILE, &rlimit));
344 rlimit.rlim_cur = 25;
345 ASSERT_EQ(0, setrlimit(RLIMIT_NOFILE, &rlimit));
346
347 /* Set close-on-exec for two ranges: [0-50] and [75-100]. */
348 ret = sys_close_range(open_fds[0], open_fds[50],
349 CLOSE_RANGE_CLOEXEC | CLOSE_RANGE_UNSHARE);
350 ASSERT_EQ(0, ret);
351 ret = sys_close_range(open_fds[75], open_fds[100],
352 CLOSE_RANGE_CLOEXEC | CLOSE_RANGE_UNSHARE);
353 ASSERT_EQ(0, ret);
354
355 for (i = 0; i <= 50; i++) {
356 int flags = fcntl(open_fds[i], F_GETFD);
357
358 EXPECT_GT(flags, -1);
359 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
360 }
361
362 for (i = 51; i <= 74; i++) {
363 int flags = fcntl(open_fds[i], F_GETFD);
364
365 EXPECT_GT(flags, -1);
366 EXPECT_EQ(flags & FD_CLOEXEC, 0);
367 }
368
369 for (i = 75; i <= 100; i++) {
370 int flags = fcntl(open_fds[i], F_GETFD);
371
372 EXPECT_GT(flags, -1);
373 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
374 }
375
376 /* Test a common pattern. */
377 ret = sys_close_range(3, UINT_MAX,
378 CLOSE_RANGE_CLOEXEC | CLOSE_RANGE_UNSHARE);
379 for (i = 0; i <= 100; i++) {
380 int flags = fcntl(open_fds[i], F_GETFD);
381
382 EXPECT_GT(flags, -1);
383 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
384 }
385 }
386
387 /*
388 * Regression test for syzbot+96cfd2b22b3213646a93@syzkaller.appspotmail.com
389 */
TEST(close_range_cloexec_syzbot)390 TEST(close_range_cloexec_syzbot)
391 {
392 int fd1, fd2, fd3, flags, ret, status;
393 pid_t pid;
394 struct __clone_args args = {
395 .flags = CLONE_FILES,
396 .exit_signal = SIGCHLD,
397 };
398
399 /* Create a huge gap in the fd table. */
400 fd1 = open("/dev/null", O_RDWR);
401 EXPECT_GT(fd1, 0);
402
403 fd2 = dup2(fd1, 1000);
404 EXPECT_GT(fd2, 0);
405
406 pid = sys_clone3(&args, sizeof(args));
407 ASSERT_GE(pid, 0);
408
409 if (pid == 0) {
410 ret = sys_close_range(3, ~0U, CLOSE_RANGE_CLOEXEC);
411 if (ret)
412 exit(EXIT_FAILURE);
413
414 /*
415 * We now have a private file descriptor table and all
416 * our open fds should still be open but made
417 * close-on-exec.
418 */
419 flags = fcntl(fd1, F_GETFD);
420 EXPECT_GT(flags, -1);
421 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
422
423 flags = fcntl(fd2, F_GETFD);
424 EXPECT_GT(flags, -1);
425 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
426
427 fd3 = dup2(fd1, 42);
428 EXPECT_GT(fd3, 0);
429
430 /*
431 * Duplicating the file descriptor must remove the
432 * FD_CLOEXEC flag.
433 */
434 flags = fcntl(fd3, F_GETFD);
435 EXPECT_GT(flags, -1);
436 EXPECT_EQ(flags & FD_CLOEXEC, 0);
437
438 exit(EXIT_SUCCESS);
439 }
440
441 EXPECT_EQ(waitpid(pid, &status, 0), pid);
442 EXPECT_EQ(true, WIFEXITED(status));
443 EXPECT_EQ(0, WEXITSTATUS(status));
444
445 /*
446 * We had a shared file descriptor table before along with requesting
447 * close-on-exec so the original fds must not be close-on-exec.
448 */
449 flags = fcntl(fd1, F_GETFD);
450 EXPECT_GT(flags, -1);
451 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
452
453 flags = fcntl(fd2, F_GETFD);
454 EXPECT_GT(flags, -1);
455 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
456
457 fd3 = dup2(fd1, 42);
458 EXPECT_GT(fd3, 0);
459
460 flags = fcntl(fd3, F_GETFD);
461 EXPECT_GT(flags, -1);
462 EXPECT_EQ(flags & FD_CLOEXEC, 0);
463
464 EXPECT_EQ(close(fd1), 0);
465 EXPECT_EQ(close(fd2), 0);
466 EXPECT_EQ(close(fd3), 0);
467 }
468
469 /*
470 * Regression test for syzbot+96cfd2b22b3213646a93@syzkaller.appspotmail.com
471 */
TEST(close_range_cloexec_unshare_syzbot)472 TEST(close_range_cloexec_unshare_syzbot)
473 {
474 int i, fd1, fd2, fd3, flags, ret, status;
475 pid_t pid;
476 struct __clone_args args = {
477 .flags = CLONE_FILES,
478 .exit_signal = SIGCHLD,
479 };
480
481 /*
482 * Create a huge gap in the fd table. When we now call
483 * CLOSE_RANGE_UNSHARE with a shared fd table and and with ~0U as upper
484 * bound the kernel will only copy up to fd1 file descriptors into the
485 * new fd table. If the kernel is buggy and doesn't handle
486 * CLOSE_RANGE_CLOEXEC correctly it will not have copied all file
487 * descriptors and we will oops!
488 *
489 * On a buggy kernel this should immediately oops. But let's loop just
490 * to be sure.
491 */
492 fd1 = open("/dev/null", O_RDWR);
493 EXPECT_GT(fd1, 0);
494
495 fd2 = dup2(fd1, 1000);
496 EXPECT_GT(fd2, 0);
497
498 for (i = 0; i < 100; i++) {
499
500 pid = sys_clone3(&args, sizeof(args));
501 ASSERT_GE(pid, 0);
502
503 if (pid == 0) {
504 ret = sys_close_range(3, ~0U, CLOSE_RANGE_UNSHARE |
505 CLOSE_RANGE_CLOEXEC);
506 if (ret)
507 exit(EXIT_FAILURE);
508
509 /*
510 * We now have a private file descriptor table and all
511 * our open fds should still be open but made
512 * close-on-exec.
513 */
514 flags = fcntl(fd1, F_GETFD);
515 EXPECT_GT(flags, -1);
516 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
517
518 flags = fcntl(fd2, F_GETFD);
519 EXPECT_GT(flags, -1);
520 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
521
522 fd3 = dup2(fd1, 42);
523 EXPECT_GT(fd3, 0);
524
525 /*
526 * Duplicating the file descriptor must remove the
527 * FD_CLOEXEC flag.
528 */
529 flags = fcntl(fd3, F_GETFD);
530 EXPECT_GT(flags, -1);
531 EXPECT_EQ(flags & FD_CLOEXEC, 0);
532
533 EXPECT_EQ(close(fd1), 0);
534 EXPECT_EQ(close(fd2), 0);
535 EXPECT_EQ(close(fd3), 0);
536
537 exit(EXIT_SUCCESS);
538 }
539
540 EXPECT_EQ(waitpid(pid, &status, 0), pid);
541 EXPECT_EQ(true, WIFEXITED(status));
542 EXPECT_EQ(0, WEXITSTATUS(status));
543 }
544
545 /*
546 * We created a private file descriptor table before along with
547 * requesting close-on-exec so the original fds must not be
548 * close-on-exec.
549 */
550 flags = fcntl(fd1, F_GETFD);
551 EXPECT_GT(flags, -1);
552 EXPECT_EQ(flags & FD_CLOEXEC, 0);
553
554 flags = fcntl(fd2, F_GETFD);
555 EXPECT_GT(flags, -1);
556 EXPECT_EQ(flags & FD_CLOEXEC, 0);
557
558 fd3 = dup2(fd1, 42);
559 EXPECT_GT(fd3, 0);
560
561 flags = fcntl(fd3, F_GETFD);
562 EXPECT_GT(flags, -1);
563 EXPECT_EQ(flags & FD_CLOEXEC, 0);
564
565 EXPECT_EQ(close(fd1), 0);
566 EXPECT_EQ(close(fd2), 0);
567 EXPECT_EQ(close(fd3), 0);
568 }
569
570 TEST_HARNESS_MAIN
571