1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * COW (Copy On Write) tests.
4 *
5 * Copyright 2022, Red Hat, Inc.
6 *
7 * Author(s): David Hildenbrand <david@redhat.com>
8 */
9 #define _GNU_SOURCE
10 #include <stdlib.h>
11 #include <string.h>
12 #include <stdbool.h>
13 #include <stdint.h>
14 #include <unistd.h>
15 #include <errno.h>
16 #include <fcntl.h>
17 #include <dirent.h>
18 #include <assert.h>
19 #include <sys/mman.h>
20 #include <sys/ioctl.h>
21 #include <sys/wait.h>
22 #include <linux/memfd.h>
23
24 #include "local_config.h"
25 #ifdef LOCAL_CONFIG_HAVE_LIBURING
26 #include <liburing.h>
27 #endif /* LOCAL_CONFIG_HAVE_LIBURING */
28
29 #include "../../../../mm/gup_test.h"
30 #include "../kselftest.h"
31 #include "vm_util.h"
32
33 #ifndef MADV_PAGEOUT
34 #define MADV_PAGEOUT 21
35 #endif
36 #ifndef MADV_COLLAPSE
37 #define MADV_COLLAPSE 25
38 #endif
39
40 static size_t pagesize;
41 static int pagemap_fd;
42 static size_t thpsize;
43 static int nr_hugetlbsizes;
44 static size_t hugetlbsizes[10];
45 static int gup_fd;
46 static bool has_huge_zeropage;
47
detect_thpsize(void)48 static void detect_thpsize(void)
49 {
50 int fd = open("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size",
51 O_RDONLY);
52 size_t size = 0;
53 char buf[15];
54 int ret;
55
56 if (fd < 0)
57 return;
58
59 ret = pread(fd, buf, sizeof(buf), 0);
60 if (ret > 0 && ret < sizeof(buf)) {
61 buf[ret] = 0;
62
63 size = strtoul(buf, NULL, 10);
64 if (size < pagesize)
65 size = 0;
66 if (size > 0) {
67 thpsize = size;
68 ksft_print_msg("[INFO] detected THP size: %zu KiB\n",
69 thpsize / 1024);
70 }
71 }
72
73 close(fd);
74 }
75
detect_huge_zeropage(void)76 static void detect_huge_zeropage(void)
77 {
78 int fd = open("/sys/kernel/mm/transparent_hugepage/use_zero_page",
79 O_RDONLY);
80 size_t enabled = 0;
81 char buf[15];
82 int ret;
83
84 if (fd < 0)
85 return;
86
87 ret = pread(fd, buf, sizeof(buf), 0);
88 if (ret > 0 && ret < sizeof(buf)) {
89 buf[ret] = 0;
90
91 enabled = strtoul(buf, NULL, 10);
92 if (enabled == 1) {
93 has_huge_zeropage = true;
94 ksft_print_msg("[INFO] huge zeropage is enabled\n");
95 }
96 }
97
98 close(fd);
99 }
100
detect_hugetlbsizes(void)101 static void detect_hugetlbsizes(void)
102 {
103 DIR *dir = opendir("/sys/kernel/mm/hugepages/");
104
105 if (!dir)
106 return;
107
108 while (nr_hugetlbsizes < ARRAY_SIZE(hugetlbsizes)) {
109 struct dirent *entry = readdir(dir);
110 size_t kb;
111
112 if (!entry)
113 break;
114 if (entry->d_type != DT_DIR)
115 continue;
116 if (sscanf(entry->d_name, "hugepages-%zukB", &kb) != 1)
117 continue;
118 hugetlbsizes[nr_hugetlbsizes] = kb * 1024;
119 nr_hugetlbsizes++;
120 ksft_print_msg("[INFO] detected hugetlb size: %zu KiB\n",
121 kb);
122 }
123 closedir(dir);
124 }
125
range_is_swapped(void * addr,size_t size)126 static bool range_is_swapped(void *addr, size_t size)
127 {
128 for (; size; addr += pagesize, size -= pagesize)
129 if (!pagemap_is_swapped(pagemap_fd, addr))
130 return false;
131 return true;
132 }
133
134 struct comm_pipes {
135 int child_ready[2];
136 int parent_ready[2];
137 };
138
setup_comm_pipes(struct comm_pipes * comm_pipes)139 static int setup_comm_pipes(struct comm_pipes *comm_pipes)
140 {
141 if (pipe(comm_pipes->child_ready) < 0)
142 return -errno;
143 if (pipe(comm_pipes->parent_ready) < 0) {
144 close(comm_pipes->child_ready[0]);
145 close(comm_pipes->child_ready[1]);
146 return -errno;
147 }
148
149 return 0;
150 }
151
close_comm_pipes(struct comm_pipes * comm_pipes)152 static void close_comm_pipes(struct comm_pipes *comm_pipes)
153 {
154 close(comm_pipes->child_ready[0]);
155 close(comm_pipes->child_ready[1]);
156 close(comm_pipes->parent_ready[0]);
157 close(comm_pipes->parent_ready[1]);
158 }
159
child_memcmp_fn(char * mem,size_t size,struct comm_pipes * comm_pipes)160 static int child_memcmp_fn(char *mem, size_t size,
161 struct comm_pipes *comm_pipes)
162 {
163 char *old = malloc(size);
164 char buf;
165
166 /* Backup the original content. */
167 memcpy(old, mem, size);
168
169 /* Wait until the parent modified the page. */
170 write(comm_pipes->child_ready[1], "0", 1);
171 while (read(comm_pipes->parent_ready[0], &buf, 1) != 1)
172 ;
173
174 /* See if we still read the old values. */
175 return memcmp(old, mem, size);
176 }
177
child_vmsplice_memcmp_fn(char * mem,size_t size,struct comm_pipes * comm_pipes)178 static int child_vmsplice_memcmp_fn(char *mem, size_t size,
179 struct comm_pipes *comm_pipes)
180 {
181 struct iovec iov = {
182 .iov_base = mem,
183 .iov_len = size,
184 };
185 ssize_t cur, total, transferred;
186 char *old, *new;
187 int fds[2];
188 char buf;
189
190 old = malloc(size);
191 new = malloc(size);
192
193 /* Backup the original content. */
194 memcpy(old, mem, size);
195
196 if (pipe(fds) < 0)
197 return -errno;
198
199 /* Trigger a read-only pin. */
200 transferred = vmsplice(fds[1], &iov, 1, 0);
201 if (transferred < 0)
202 return -errno;
203 if (transferred == 0)
204 return -EINVAL;
205
206 /* Unmap it from our page tables. */
207 if (munmap(mem, size) < 0)
208 return -errno;
209
210 /* Wait until the parent modified it. */
211 write(comm_pipes->child_ready[1], "0", 1);
212 while (read(comm_pipes->parent_ready[0], &buf, 1) != 1)
213 ;
214
215 /* See if we still read the old values via the pipe. */
216 for (total = 0; total < transferred; total += cur) {
217 cur = read(fds[0], new + total, transferred - total);
218 if (cur < 0)
219 return -errno;
220 }
221
222 return memcmp(old, new, transferred);
223 }
224
225 typedef int (*child_fn)(char *mem, size_t size, struct comm_pipes *comm_pipes);
226
do_test_cow_in_parent(char * mem,size_t size,bool do_mprotect,child_fn fn)227 static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect,
228 child_fn fn)
229 {
230 struct comm_pipes comm_pipes;
231 char buf;
232 int ret;
233
234 ret = setup_comm_pipes(&comm_pipes);
235 if (ret) {
236 ksft_test_result_fail("pipe() failed\n");
237 return;
238 }
239
240 ret = fork();
241 if (ret < 0) {
242 ksft_test_result_fail("fork() failed\n");
243 goto close_comm_pipes;
244 } else if (!ret) {
245 exit(fn(mem, size, &comm_pipes));
246 }
247
248 while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
249 ;
250
251 if (do_mprotect) {
252 /*
253 * mprotect() optimizations might try avoiding
254 * write-faults by directly mapping pages writable.
255 */
256 ret = mprotect(mem, size, PROT_READ);
257 ret |= mprotect(mem, size, PROT_READ|PROT_WRITE);
258 if (ret) {
259 ksft_test_result_fail("mprotect() failed\n");
260 write(comm_pipes.parent_ready[1], "0", 1);
261 wait(&ret);
262 goto close_comm_pipes;
263 }
264 }
265
266 /* Modify the page. */
267 memset(mem, 0xff, size);
268 write(comm_pipes.parent_ready[1], "0", 1);
269
270 wait(&ret);
271 if (WIFEXITED(ret))
272 ret = WEXITSTATUS(ret);
273 else
274 ret = -EINVAL;
275
276 ksft_test_result(!ret, "No leak from parent into child\n");
277 close_comm_pipes:
278 close_comm_pipes(&comm_pipes);
279 }
280
test_cow_in_parent(char * mem,size_t size)281 static void test_cow_in_parent(char *mem, size_t size)
282 {
283 do_test_cow_in_parent(mem, size, false, child_memcmp_fn);
284 }
285
test_cow_in_parent_mprotect(char * mem,size_t size)286 static void test_cow_in_parent_mprotect(char *mem, size_t size)
287 {
288 do_test_cow_in_parent(mem, size, true, child_memcmp_fn);
289 }
290
test_vmsplice_in_child(char * mem,size_t size)291 static void test_vmsplice_in_child(char *mem, size_t size)
292 {
293 do_test_cow_in_parent(mem, size, false, child_vmsplice_memcmp_fn);
294 }
295
test_vmsplice_in_child_mprotect(char * mem,size_t size)296 static void test_vmsplice_in_child_mprotect(char *mem, size_t size)
297 {
298 do_test_cow_in_parent(mem, size, true, child_vmsplice_memcmp_fn);
299 }
300
do_test_vmsplice_in_parent(char * mem,size_t size,bool before_fork)301 static void do_test_vmsplice_in_parent(char *mem, size_t size,
302 bool before_fork)
303 {
304 struct iovec iov = {
305 .iov_base = mem,
306 .iov_len = size,
307 };
308 ssize_t cur, total, transferred;
309 struct comm_pipes comm_pipes;
310 char *old, *new;
311 int ret, fds[2];
312 char buf;
313
314 old = malloc(size);
315 new = malloc(size);
316
317 memcpy(old, mem, size);
318
319 ret = setup_comm_pipes(&comm_pipes);
320 if (ret) {
321 ksft_test_result_fail("pipe() failed\n");
322 goto free;
323 }
324
325 if (pipe(fds) < 0) {
326 ksft_test_result_fail("pipe() failed\n");
327 goto close_comm_pipes;
328 }
329
330 if (before_fork) {
331 transferred = vmsplice(fds[1], &iov, 1, 0);
332 if (transferred <= 0) {
333 ksft_test_result_fail("vmsplice() failed\n");
334 goto close_pipe;
335 }
336 }
337
338 ret = fork();
339 if (ret < 0) {
340 ksft_test_result_fail("fork() failed\n");
341 goto close_pipe;
342 } else if (!ret) {
343 write(comm_pipes.child_ready[1], "0", 1);
344 while (read(comm_pipes.parent_ready[0], &buf, 1) != 1)
345 ;
346 /* Modify page content in the child. */
347 memset(mem, 0xff, size);
348 exit(0);
349 }
350
351 if (!before_fork) {
352 transferred = vmsplice(fds[1], &iov, 1, 0);
353 if (transferred <= 0) {
354 ksft_test_result_fail("vmsplice() failed\n");
355 wait(&ret);
356 goto close_pipe;
357 }
358 }
359
360 while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
361 ;
362 if (munmap(mem, size) < 0) {
363 ksft_test_result_fail("munmap() failed\n");
364 goto close_pipe;
365 }
366 write(comm_pipes.parent_ready[1], "0", 1);
367
368 /* Wait until the child is done writing. */
369 wait(&ret);
370 if (!WIFEXITED(ret)) {
371 ksft_test_result_fail("wait() failed\n");
372 goto close_pipe;
373 }
374
375 /* See if we still read the old values. */
376 for (total = 0; total < transferred; total += cur) {
377 cur = read(fds[0], new + total, transferred - total);
378 if (cur < 0) {
379 ksft_test_result_fail("read() failed\n");
380 goto close_pipe;
381 }
382 }
383
384 ksft_test_result(!memcmp(old, new, transferred),
385 "No leak from child into parent\n");
386 close_pipe:
387 close(fds[0]);
388 close(fds[1]);
389 close_comm_pipes:
390 close_comm_pipes(&comm_pipes);
391 free:
392 free(old);
393 free(new);
394 }
395
test_vmsplice_before_fork(char * mem,size_t size)396 static void test_vmsplice_before_fork(char *mem, size_t size)
397 {
398 do_test_vmsplice_in_parent(mem, size, true);
399 }
400
test_vmsplice_after_fork(char * mem,size_t size)401 static void test_vmsplice_after_fork(char *mem, size_t size)
402 {
403 do_test_vmsplice_in_parent(mem, size, false);
404 }
405
406 #ifdef LOCAL_CONFIG_HAVE_LIBURING
do_test_iouring(char * mem,size_t size,bool use_fork)407 static void do_test_iouring(char *mem, size_t size, bool use_fork)
408 {
409 struct comm_pipes comm_pipes;
410 struct io_uring_cqe *cqe;
411 struct io_uring_sqe *sqe;
412 struct io_uring ring;
413 ssize_t cur, total;
414 struct iovec iov;
415 char *buf, *tmp;
416 int ret, fd;
417 FILE *file;
418
419 ret = setup_comm_pipes(&comm_pipes);
420 if (ret) {
421 ksft_test_result_fail("pipe() failed\n");
422 return;
423 }
424
425 file = tmpfile();
426 if (!file) {
427 ksft_test_result_fail("tmpfile() failed\n");
428 goto close_comm_pipes;
429 }
430 fd = fileno(file);
431 assert(fd);
432
433 tmp = malloc(size);
434 if (!tmp) {
435 ksft_test_result_fail("malloc() failed\n");
436 goto close_file;
437 }
438
439 /* Skip on errors, as we might just lack kernel support. */
440 ret = io_uring_queue_init(1, &ring, 0);
441 if (ret < 0) {
442 ksft_test_result_skip("io_uring_queue_init() failed\n");
443 goto free_tmp;
444 }
445
446 /*
447 * Register the range as a fixed buffer. This will FOLL_WRITE | FOLL_PIN
448 * | FOLL_LONGTERM the range.
449 *
450 * Skip on errors, as we might just lack kernel support or might not
451 * have sufficient MEMLOCK permissions.
452 */
453 iov.iov_base = mem;
454 iov.iov_len = size;
455 ret = io_uring_register_buffers(&ring, &iov, 1);
456 if (ret) {
457 ksft_test_result_skip("io_uring_register_buffers() failed\n");
458 goto queue_exit;
459 }
460
461 if (use_fork) {
462 /*
463 * fork() and keep the child alive until we're done. Note that
464 * we expect the pinned page to not get shared with the child.
465 */
466 ret = fork();
467 if (ret < 0) {
468 ksft_test_result_fail("fork() failed\n");
469 goto unregister_buffers;
470 } else if (!ret) {
471 write(comm_pipes.child_ready[1], "0", 1);
472 while (read(comm_pipes.parent_ready[0], &buf, 1) != 1)
473 ;
474 exit(0);
475 }
476
477 while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
478 ;
479 } else {
480 /*
481 * Map the page R/O into the page table. Enable softdirty
482 * tracking to stop the page from getting mapped R/W immediately
483 * again by mprotect() optimizations. Note that we don't have an
484 * easy way to test if that worked (the pagemap does not export
485 * if the page is mapped R/O vs. R/W).
486 */
487 ret = mprotect(mem, size, PROT_READ);
488 clear_softdirty();
489 ret |= mprotect(mem, size, PROT_READ | PROT_WRITE);
490 if (ret) {
491 ksft_test_result_fail("mprotect() failed\n");
492 goto unregister_buffers;
493 }
494 }
495
496 /*
497 * Modify the page and write page content as observed by the fixed
498 * buffer pin to the file so we can verify it.
499 */
500 memset(mem, 0xff, size);
501 sqe = io_uring_get_sqe(&ring);
502 if (!sqe) {
503 ksft_test_result_fail("io_uring_get_sqe() failed\n");
504 goto quit_child;
505 }
506 io_uring_prep_write_fixed(sqe, fd, mem, size, 0, 0);
507
508 ret = io_uring_submit(&ring);
509 if (ret < 0) {
510 ksft_test_result_fail("io_uring_submit() failed\n");
511 goto quit_child;
512 }
513
514 ret = io_uring_wait_cqe(&ring, &cqe);
515 if (ret < 0) {
516 ksft_test_result_fail("io_uring_wait_cqe() failed\n");
517 goto quit_child;
518 }
519
520 if (cqe->res != size) {
521 ksft_test_result_fail("write_fixed failed\n");
522 goto quit_child;
523 }
524 io_uring_cqe_seen(&ring, cqe);
525
526 /* Read back the file content to the temporary buffer. */
527 total = 0;
528 while (total < size) {
529 cur = pread(fd, tmp + total, size - total, total);
530 if (cur < 0) {
531 ksft_test_result_fail("pread() failed\n");
532 goto quit_child;
533 }
534 total += cur;
535 }
536
537 /* Finally, check if we read what we expected. */
538 ksft_test_result(!memcmp(mem, tmp, size),
539 "Longterm R/W pin is reliable\n");
540
541 quit_child:
542 if (use_fork) {
543 write(comm_pipes.parent_ready[1], "0", 1);
544 wait(&ret);
545 }
546 unregister_buffers:
547 io_uring_unregister_buffers(&ring);
548 queue_exit:
549 io_uring_queue_exit(&ring);
550 free_tmp:
551 free(tmp);
552 close_file:
553 fclose(file);
554 close_comm_pipes:
555 close_comm_pipes(&comm_pipes);
556 }
557
test_iouring_ro(char * mem,size_t size)558 static void test_iouring_ro(char *mem, size_t size)
559 {
560 do_test_iouring(mem, size, false);
561 }
562
test_iouring_fork(char * mem,size_t size)563 static void test_iouring_fork(char *mem, size_t size)
564 {
565 do_test_iouring(mem, size, true);
566 }
567
568 #endif /* LOCAL_CONFIG_HAVE_LIBURING */
569
570 enum ro_pin_test {
571 RO_PIN_TEST,
572 RO_PIN_TEST_SHARED,
573 RO_PIN_TEST_PREVIOUSLY_SHARED,
574 RO_PIN_TEST_RO_EXCLUSIVE,
575 };
576
do_test_ro_pin(char * mem,size_t size,enum ro_pin_test test,bool fast)577 static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test,
578 bool fast)
579 {
580 struct pin_longterm_test args;
581 struct comm_pipes comm_pipes;
582 char *tmp, buf;
583 __u64 tmp_val;
584 int ret;
585
586 if (gup_fd < 0) {
587 ksft_test_result_skip("gup_test not available\n");
588 return;
589 }
590
591 tmp = malloc(size);
592 if (!tmp) {
593 ksft_test_result_fail("malloc() failed\n");
594 return;
595 }
596
597 ret = setup_comm_pipes(&comm_pipes);
598 if (ret) {
599 ksft_test_result_fail("pipe() failed\n");
600 goto free_tmp;
601 }
602
603 switch (test) {
604 case RO_PIN_TEST:
605 break;
606 case RO_PIN_TEST_SHARED:
607 case RO_PIN_TEST_PREVIOUSLY_SHARED:
608 /*
609 * Share the pages with our child. As the pages are not pinned,
610 * this should just work.
611 */
612 ret = fork();
613 if (ret < 0) {
614 ksft_test_result_fail("fork() failed\n");
615 goto close_comm_pipes;
616 } else if (!ret) {
617 write(comm_pipes.child_ready[1], "0", 1);
618 while (read(comm_pipes.parent_ready[0], &buf, 1) != 1)
619 ;
620 exit(0);
621 }
622
623 /* Wait until our child is ready. */
624 while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
625 ;
626
627 if (test == RO_PIN_TEST_PREVIOUSLY_SHARED) {
628 /*
629 * Tell the child to quit now and wait until it quit.
630 * The pages should now be mapped R/O into our page
631 * tables, but they are no longer shared.
632 */
633 write(comm_pipes.parent_ready[1], "0", 1);
634 wait(&ret);
635 if (!WIFEXITED(ret))
636 ksft_print_msg("[INFO] wait() failed\n");
637 }
638 break;
639 case RO_PIN_TEST_RO_EXCLUSIVE:
640 /*
641 * Map the page R/O into the page table. Enable softdirty
642 * tracking to stop the page from getting mapped R/W immediately
643 * again by mprotect() optimizations. Note that we don't have an
644 * easy way to test if that worked (the pagemap does not export
645 * if the page is mapped R/O vs. R/W).
646 */
647 ret = mprotect(mem, size, PROT_READ);
648 clear_softdirty();
649 ret |= mprotect(mem, size, PROT_READ | PROT_WRITE);
650 if (ret) {
651 ksft_test_result_fail("mprotect() failed\n");
652 goto close_comm_pipes;
653 }
654 break;
655 default:
656 assert(false);
657 }
658
659 /* Take a R/O pin. This should trigger unsharing. */
660 args.addr = (__u64)(uintptr_t)mem;
661 args.size = size;
662 args.flags = fast ? PIN_LONGTERM_TEST_FLAG_USE_FAST : 0;
663 ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args);
664 if (ret) {
665 if (errno == EINVAL)
666 ksft_test_result_skip("PIN_LONGTERM_TEST_START failed\n");
667 else
668 ksft_test_result_fail("PIN_LONGTERM_TEST_START failed\n");
669 goto wait;
670 }
671
672 /* Modify the page. */
673 memset(mem, 0xff, size);
674
675 /*
676 * Read back the content via the pin to the temporary buffer and
677 * test if we observed the modification.
678 */
679 tmp_val = (__u64)(uintptr_t)tmp;
680 ret = ioctl(gup_fd, PIN_LONGTERM_TEST_READ, &tmp_val);
681 if (ret)
682 ksft_test_result_fail("PIN_LONGTERM_TEST_READ failed\n");
683 else
684 ksft_test_result(!memcmp(mem, tmp, size),
685 "Longterm R/O pin is reliable\n");
686
687 ret = ioctl(gup_fd, PIN_LONGTERM_TEST_STOP);
688 if (ret)
689 ksft_print_msg("[INFO] PIN_LONGTERM_TEST_STOP failed\n");
690 wait:
691 switch (test) {
692 case RO_PIN_TEST_SHARED:
693 write(comm_pipes.parent_ready[1], "0", 1);
694 wait(&ret);
695 if (!WIFEXITED(ret))
696 ksft_print_msg("[INFO] wait() failed\n");
697 break;
698 default:
699 break;
700 }
701 close_comm_pipes:
702 close_comm_pipes(&comm_pipes);
703 free_tmp:
704 free(tmp);
705 }
706
test_ro_pin_on_shared(char * mem,size_t size)707 static void test_ro_pin_on_shared(char *mem, size_t size)
708 {
709 do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, false);
710 }
711
test_ro_fast_pin_on_shared(char * mem,size_t size)712 static void test_ro_fast_pin_on_shared(char *mem, size_t size)
713 {
714 do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, true);
715 }
716
test_ro_pin_on_ro_previously_shared(char * mem,size_t size)717 static void test_ro_pin_on_ro_previously_shared(char *mem, size_t size)
718 {
719 do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, false);
720 }
721
test_ro_fast_pin_on_ro_previously_shared(char * mem,size_t size)722 static void test_ro_fast_pin_on_ro_previously_shared(char *mem, size_t size)
723 {
724 do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, true);
725 }
726
test_ro_pin_on_ro_exclusive(char * mem,size_t size)727 static void test_ro_pin_on_ro_exclusive(char *mem, size_t size)
728 {
729 do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, false);
730 }
731
test_ro_fast_pin_on_ro_exclusive(char * mem,size_t size)732 static void test_ro_fast_pin_on_ro_exclusive(char *mem, size_t size)
733 {
734 do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, true);
735 }
736
737 typedef void (*test_fn)(char *mem, size_t size);
738
do_run_with_base_page(test_fn fn,bool swapout)739 static void do_run_with_base_page(test_fn fn, bool swapout)
740 {
741 char *mem;
742 int ret;
743
744 mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE,
745 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
746 if (mem == MAP_FAILED) {
747 ksft_test_result_fail("mmap() failed\n");
748 return;
749 }
750
751 ret = madvise(mem, pagesize, MADV_NOHUGEPAGE);
752 /* Ignore if not around on a kernel. */
753 if (ret && errno != EINVAL) {
754 ksft_test_result_fail("MADV_NOHUGEPAGE failed\n");
755 goto munmap;
756 }
757
758 /* Populate a base page. */
759 memset(mem, 0, pagesize);
760
761 if (swapout) {
762 madvise(mem, pagesize, MADV_PAGEOUT);
763 if (!pagemap_is_swapped(pagemap_fd, mem)) {
764 ksft_test_result_skip("MADV_PAGEOUT did not work, is swap enabled?\n");
765 goto munmap;
766 }
767 }
768
769 fn(mem, pagesize);
770 munmap:
771 munmap(mem, pagesize);
772 }
773
run_with_base_page(test_fn fn,const char * desc)774 static void run_with_base_page(test_fn fn, const char *desc)
775 {
776 ksft_print_msg("[RUN] %s ... with base page\n", desc);
777 do_run_with_base_page(fn, false);
778 }
779
run_with_base_page_swap(test_fn fn,const char * desc)780 static void run_with_base_page_swap(test_fn fn, const char *desc)
781 {
782 ksft_print_msg("[RUN] %s ... with swapped out base page\n", desc);
783 do_run_with_base_page(fn, true);
784 }
785
786 enum thp_run {
787 THP_RUN_PMD,
788 THP_RUN_PMD_SWAPOUT,
789 THP_RUN_PTE,
790 THP_RUN_PTE_SWAPOUT,
791 THP_RUN_SINGLE_PTE,
792 THP_RUN_SINGLE_PTE_SWAPOUT,
793 THP_RUN_PARTIAL_MREMAP,
794 THP_RUN_PARTIAL_SHARED,
795 };
796
do_run_with_thp(test_fn fn,enum thp_run thp_run)797 static void do_run_with_thp(test_fn fn, enum thp_run thp_run)
798 {
799 char *mem, *mmap_mem, *tmp, *mremap_mem = MAP_FAILED;
800 size_t size, mmap_size, mremap_size;
801 int ret;
802
803 /* For alignment purposes, we need twice the thp size. */
804 mmap_size = 2 * thpsize;
805 mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
806 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
807 if (mmap_mem == MAP_FAILED) {
808 ksft_test_result_fail("mmap() failed\n");
809 return;
810 }
811
812 /* We need a THP-aligned memory area. */
813 mem = (char *)(((uintptr_t)mmap_mem + thpsize) & ~(thpsize - 1));
814
815 ret = madvise(mem, thpsize, MADV_HUGEPAGE);
816 if (ret) {
817 ksft_test_result_fail("MADV_HUGEPAGE failed\n");
818 goto munmap;
819 }
820
821 /*
822 * Try to populate a THP. Touch the first sub-page and test if we get
823 * another sub-page populated automatically.
824 */
825 mem[0] = 0;
826 if (!pagemap_is_populated(pagemap_fd, mem + pagesize)) {
827 ksft_test_result_skip("Did not get a THP populated\n");
828 goto munmap;
829 }
830 memset(mem, 0, thpsize);
831
832 size = thpsize;
833 switch (thp_run) {
834 case THP_RUN_PMD:
835 case THP_RUN_PMD_SWAPOUT:
836 break;
837 case THP_RUN_PTE:
838 case THP_RUN_PTE_SWAPOUT:
839 /*
840 * Trigger PTE-mapping the THP by temporarily mapping a single
841 * subpage R/O.
842 */
843 ret = mprotect(mem + pagesize, pagesize, PROT_READ);
844 if (ret) {
845 ksft_test_result_fail("mprotect() failed\n");
846 goto munmap;
847 }
848 ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE);
849 if (ret) {
850 ksft_test_result_fail("mprotect() failed\n");
851 goto munmap;
852 }
853 break;
854 case THP_RUN_SINGLE_PTE:
855 case THP_RUN_SINGLE_PTE_SWAPOUT:
856 /*
857 * Discard all but a single subpage of that PTE-mapped THP. What
858 * remains is a single PTE mapping a single subpage.
859 */
860 ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTNEED);
861 if (ret) {
862 ksft_test_result_fail("MADV_DONTNEED failed\n");
863 goto munmap;
864 }
865 size = pagesize;
866 break;
867 case THP_RUN_PARTIAL_MREMAP:
868 /*
869 * Remap half of the THP. We need some new memory location
870 * for that.
871 */
872 mremap_size = thpsize / 2;
873 mremap_mem = mmap(NULL, mremap_size, PROT_NONE,
874 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
875 if (mem == MAP_FAILED) {
876 ksft_test_result_fail("mmap() failed\n");
877 goto munmap;
878 }
879 tmp = mremap(mem + mremap_size, mremap_size, mremap_size,
880 MREMAP_MAYMOVE | MREMAP_FIXED, mremap_mem);
881 if (tmp != mremap_mem) {
882 ksft_test_result_fail("mremap() failed\n");
883 goto munmap;
884 }
885 size = mremap_size;
886 break;
887 case THP_RUN_PARTIAL_SHARED:
888 /*
889 * Share the first page of the THP with a child and quit the
890 * child. This will result in some parts of the THP never
891 * have been shared.
892 */
893 ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTFORK);
894 if (ret) {
895 ksft_test_result_fail("MADV_DONTFORK failed\n");
896 goto munmap;
897 }
898 ret = fork();
899 if (ret < 0) {
900 ksft_test_result_fail("fork() failed\n");
901 goto munmap;
902 } else if (!ret) {
903 exit(0);
904 }
905 wait(&ret);
906 /* Allow for sharing all pages again. */
907 ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DOFORK);
908 if (ret) {
909 ksft_test_result_fail("MADV_DOFORK failed\n");
910 goto munmap;
911 }
912 break;
913 default:
914 assert(false);
915 }
916
917 switch (thp_run) {
918 case THP_RUN_PMD_SWAPOUT:
919 case THP_RUN_PTE_SWAPOUT:
920 case THP_RUN_SINGLE_PTE_SWAPOUT:
921 madvise(mem, size, MADV_PAGEOUT);
922 if (!range_is_swapped(mem, size)) {
923 ksft_test_result_skip("MADV_PAGEOUT did not work, is swap enabled?\n");
924 goto munmap;
925 }
926 break;
927 default:
928 break;
929 }
930
931 fn(mem, size);
932 munmap:
933 munmap(mmap_mem, mmap_size);
934 if (mremap_mem != MAP_FAILED)
935 munmap(mremap_mem, mremap_size);
936 }
937
run_with_thp(test_fn fn,const char * desc)938 static void run_with_thp(test_fn fn, const char *desc)
939 {
940 ksft_print_msg("[RUN] %s ... with THP\n", desc);
941 do_run_with_thp(fn, THP_RUN_PMD);
942 }
943
run_with_thp_swap(test_fn fn,const char * desc)944 static void run_with_thp_swap(test_fn fn, const char *desc)
945 {
946 ksft_print_msg("[RUN] %s ... with swapped-out THP\n", desc);
947 do_run_with_thp(fn, THP_RUN_PMD_SWAPOUT);
948 }
949
run_with_pte_mapped_thp(test_fn fn,const char * desc)950 static void run_with_pte_mapped_thp(test_fn fn, const char *desc)
951 {
952 ksft_print_msg("[RUN] %s ... with PTE-mapped THP\n", desc);
953 do_run_with_thp(fn, THP_RUN_PTE);
954 }
955
run_with_pte_mapped_thp_swap(test_fn fn,const char * desc)956 static void run_with_pte_mapped_thp_swap(test_fn fn, const char *desc)
957 {
958 ksft_print_msg("[RUN] %s ... with swapped-out, PTE-mapped THP\n", desc);
959 do_run_with_thp(fn, THP_RUN_PTE_SWAPOUT);
960 }
961
run_with_single_pte_of_thp(test_fn fn,const char * desc)962 static void run_with_single_pte_of_thp(test_fn fn, const char *desc)
963 {
964 ksft_print_msg("[RUN] %s ... with single PTE of THP\n", desc);
965 do_run_with_thp(fn, THP_RUN_SINGLE_PTE);
966 }
967
run_with_single_pte_of_thp_swap(test_fn fn,const char * desc)968 static void run_with_single_pte_of_thp_swap(test_fn fn, const char *desc)
969 {
970 ksft_print_msg("[RUN] %s ... with single PTE of swapped-out THP\n", desc);
971 do_run_with_thp(fn, THP_RUN_SINGLE_PTE_SWAPOUT);
972 }
973
run_with_partial_mremap_thp(test_fn fn,const char * desc)974 static void run_with_partial_mremap_thp(test_fn fn, const char *desc)
975 {
976 ksft_print_msg("[RUN] %s ... with partially mremap()'ed THP\n", desc);
977 do_run_with_thp(fn, THP_RUN_PARTIAL_MREMAP);
978 }
979
run_with_partial_shared_thp(test_fn fn,const char * desc)980 static void run_with_partial_shared_thp(test_fn fn, const char *desc)
981 {
982 ksft_print_msg("[RUN] %s ... with partially shared THP\n", desc);
983 do_run_with_thp(fn, THP_RUN_PARTIAL_SHARED);
984 }
985
run_with_hugetlb(test_fn fn,const char * desc,size_t hugetlbsize)986 static void run_with_hugetlb(test_fn fn, const char *desc, size_t hugetlbsize)
987 {
988 int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB;
989 char *mem, *dummy;
990
991 ksft_print_msg("[RUN] %s ... with hugetlb (%zu kB)\n", desc,
992 hugetlbsize / 1024);
993
994 flags |= __builtin_ctzll(hugetlbsize) << MAP_HUGE_SHIFT;
995
996 mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0);
997 if (mem == MAP_FAILED) {
998 ksft_test_result_skip("need more free huge pages\n");
999 return;
1000 }
1001
1002 /* Populate an huge page. */
1003 memset(mem, 0, hugetlbsize);
1004
1005 /*
1006 * We need a total of two hugetlb pages to handle COW/unsharing
1007 * properly, otherwise we might get zapped by a SIGBUS.
1008 */
1009 dummy = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0);
1010 if (dummy == MAP_FAILED) {
1011 ksft_test_result_skip("need more free huge pages\n");
1012 goto munmap;
1013 }
1014 munmap(dummy, hugetlbsize);
1015
1016 fn(mem, hugetlbsize);
1017 munmap:
1018 munmap(mem, hugetlbsize);
1019 }
1020
1021 struct test_case {
1022 const char *desc;
1023 test_fn fn;
1024 };
1025
1026 /*
1027 * Test cases that are specific to anonymous pages: pages in private mappings
1028 * that may get shared via COW during fork().
1029 */
1030 static const struct test_case anon_test_cases[] = {
1031 /*
1032 * Basic COW tests for fork() without any GUP. If we miss to break COW,
1033 * either the child can observe modifications by the parent or the
1034 * other way around.
1035 */
1036 {
1037 "Basic COW after fork()",
1038 test_cow_in_parent,
1039 },
1040 /*
1041 * Basic test, but do an additional mprotect(PROT_READ)+
1042 * mprotect(PROT_READ|PROT_WRITE) in the parent before write access.
1043 */
1044 {
1045 "Basic COW after fork() with mprotect() optimization",
1046 test_cow_in_parent_mprotect,
1047 },
1048 /*
1049 * vmsplice() [R/O GUP] + unmap in the child; modify in the parent. If
1050 * we miss to break COW, the child observes modifications by the parent.
1051 * This is CVE-2020-29374 reported by Jann Horn.
1052 */
1053 {
1054 "vmsplice() + unmap in child",
1055 test_vmsplice_in_child
1056 },
1057 /*
1058 * vmsplice() test, but do an additional mprotect(PROT_READ)+
1059 * mprotect(PROT_READ|PROT_WRITE) in the parent before write access.
1060 */
1061 {
1062 "vmsplice() + unmap in child with mprotect() optimization",
1063 test_vmsplice_in_child_mprotect
1064 },
1065 /*
1066 * vmsplice() [R/O GUP] in parent before fork(), unmap in parent after
1067 * fork(); modify in the child. If we miss to break COW, the parent
1068 * observes modifications by the child.
1069 */
1070 {
1071 "vmsplice() before fork(), unmap in parent after fork()",
1072 test_vmsplice_before_fork,
1073 },
1074 /*
1075 * vmsplice() [R/O GUP] + unmap in parent after fork(); modify in the
1076 * child. If we miss to break COW, the parent observes modifications by
1077 * the child.
1078 */
1079 {
1080 "vmsplice() + unmap in parent after fork()",
1081 test_vmsplice_after_fork,
1082 },
1083 #ifdef LOCAL_CONFIG_HAVE_LIBURING
1084 /*
1085 * Take a R/W longterm pin and then map the page R/O into the page
1086 * table to trigger a write fault on next access. When modifying the
1087 * page, the page content must be visible via the pin.
1088 */
1089 {
1090 "R/O-mapping a page registered as iouring fixed buffer",
1091 test_iouring_ro,
1092 },
1093 /*
1094 * Take a R/W longterm pin and then fork() a child. When modifying the
1095 * page, the page content must be visible via the pin. We expect the
1096 * pinned page to not get shared with the child.
1097 */
1098 {
1099 "fork() with an iouring fixed buffer",
1100 test_iouring_fork,
1101 },
1102
1103 #endif /* LOCAL_CONFIG_HAVE_LIBURING */
1104 /*
1105 * Take a R/O longterm pin on a R/O-mapped shared anonymous page.
1106 * When modifying the page via the page table, the page content change
1107 * must be visible via the pin.
1108 */
1109 {
1110 "R/O GUP pin on R/O-mapped shared page",
1111 test_ro_pin_on_shared,
1112 },
1113 /* Same as above, but using GUP-fast. */
1114 {
1115 "R/O GUP-fast pin on R/O-mapped shared page",
1116 test_ro_fast_pin_on_shared,
1117 },
1118 /*
1119 * Take a R/O longterm pin on a R/O-mapped exclusive anonymous page that
1120 * was previously shared. When modifying the page via the page table,
1121 * the page content change must be visible via the pin.
1122 */
1123 {
1124 "R/O GUP pin on R/O-mapped previously-shared page",
1125 test_ro_pin_on_ro_previously_shared,
1126 },
1127 /* Same as above, but using GUP-fast. */
1128 {
1129 "R/O GUP-fast pin on R/O-mapped previously-shared page",
1130 test_ro_fast_pin_on_ro_previously_shared,
1131 },
1132 /*
1133 * Take a R/O longterm pin on a R/O-mapped exclusive anonymous page.
1134 * When modifying the page via the page table, the page content change
1135 * must be visible via the pin.
1136 */
1137 {
1138 "R/O GUP pin on R/O-mapped exclusive page",
1139 test_ro_pin_on_ro_exclusive,
1140 },
1141 /* Same as above, but using GUP-fast. */
1142 {
1143 "R/O GUP-fast pin on R/O-mapped exclusive page",
1144 test_ro_fast_pin_on_ro_exclusive,
1145 },
1146 };
1147
run_anon_test_case(struct test_case const * test_case)1148 static void run_anon_test_case(struct test_case const *test_case)
1149 {
1150 int i;
1151
1152 run_with_base_page(test_case->fn, test_case->desc);
1153 run_with_base_page_swap(test_case->fn, test_case->desc);
1154 if (thpsize) {
1155 run_with_thp(test_case->fn, test_case->desc);
1156 run_with_thp_swap(test_case->fn, test_case->desc);
1157 run_with_pte_mapped_thp(test_case->fn, test_case->desc);
1158 run_with_pte_mapped_thp_swap(test_case->fn, test_case->desc);
1159 run_with_single_pte_of_thp(test_case->fn, test_case->desc);
1160 run_with_single_pte_of_thp_swap(test_case->fn, test_case->desc);
1161 run_with_partial_mremap_thp(test_case->fn, test_case->desc);
1162 run_with_partial_shared_thp(test_case->fn, test_case->desc);
1163 }
1164 for (i = 0; i < nr_hugetlbsizes; i++)
1165 run_with_hugetlb(test_case->fn, test_case->desc,
1166 hugetlbsizes[i]);
1167 }
1168
run_anon_test_cases(void)1169 static void run_anon_test_cases(void)
1170 {
1171 int i;
1172
1173 ksft_print_msg("[INFO] Anonymous memory tests in private mappings\n");
1174
1175 for (i = 0; i < ARRAY_SIZE(anon_test_cases); i++)
1176 run_anon_test_case(&anon_test_cases[i]);
1177 }
1178
tests_per_anon_test_case(void)1179 static int tests_per_anon_test_case(void)
1180 {
1181 int tests = 2 + nr_hugetlbsizes;
1182
1183 if (thpsize)
1184 tests += 8;
1185 return tests;
1186 }
1187
1188 enum anon_thp_collapse_test {
1189 ANON_THP_COLLAPSE_UNSHARED,
1190 ANON_THP_COLLAPSE_FULLY_SHARED,
1191 ANON_THP_COLLAPSE_LOWER_SHARED,
1192 ANON_THP_COLLAPSE_UPPER_SHARED,
1193 };
1194
do_test_anon_thp_collapse(char * mem,size_t size,enum anon_thp_collapse_test test)1195 static void do_test_anon_thp_collapse(char *mem, size_t size,
1196 enum anon_thp_collapse_test test)
1197 {
1198 struct comm_pipes comm_pipes;
1199 char buf;
1200 int ret;
1201
1202 ret = setup_comm_pipes(&comm_pipes);
1203 if (ret) {
1204 ksft_test_result_fail("pipe() failed\n");
1205 return;
1206 }
1207
1208 /*
1209 * Trigger PTE-mapping the THP by temporarily mapping a single subpage
1210 * R/O, such that we can try collapsing it later.
1211 */
1212 ret = mprotect(mem + pagesize, pagesize, PROT_READ);
1213 if (ret) {
1214 ksft_test_result_fail("mprotect() failed\n");
1215 goto close_comm_pipes;
1216 }
1217 ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE);
1218 if (ret) {
1219 ksft_test_result_fail("mprotect() failed\n");
1220 goto close_comm_pipes;
1221 }
1222
1223 switch (test) {
1224 case ANON_THP_COLLAPSE_UNSHARED:
1225 /* Collapse before actually COW-sharing the page. */
1226 ret = madvise(mem, size, MADV_COLLAPSE);
1227 if (ret) {
1228 ksft_test_result_skip("MADV_COLLAPSE failed: %s\n",
1229 strerror(errno));
1230 goto close_comm_pipes;
1231 }
1232 break;
1233 case ANON_THP_COLLAPSE_FULLY_SHARED:
1234 /* COW-share the full PTE-mapped THP. */
1235 break;
1236 case ANON_THP_COLLAPSE_LOWER_SHARED:
1237 /* Don't COW-share the upper part of the THP. */
1238 ret = madvise(mem + size / 2, size / 2, MADV_DONTFORK);
1239 if (ret) {
1240 ksft_test_result_fail("MADV_DONTFORK failed\n");
1241 goto close_comm_pipes;
1242 }
1243 break;
1244 case ANON_THP_COLLAPSE_UPPER_SHARED:
1245 /* Don't COW-share the lower part of the THP. */
1246 ret = madvise(mem, size / 2, MADV_DONTFORK);
1247 if (ret) {
1248 ksft_test_result_fail("MADV_DONTFORK failed\n");
1249 goto close_comm_pipes;
1250 }
1251 break;
1252 default:
1253 assert(false);
1254 }
1255
1256 ret = fork();
1257 if (ret < 0) {
1258 ksft_test_result_fail("fork() failed\n");
1259 goto close_comm_pipes;
1260 } else if (!ret) {
1261 switch (test) {
1262 case ANON_THP_COLLAPSE_UNSHARED:
1263 case ANON_THP_COLLAPSE_FULLY_SHARED:
1264 exit(child_memcmp_fn(mem, size, &comm_pipes));
1265 break;
1266 case ANON_THP_COLLAPSE_LOWER_SHARED:
1267 exit(child_memcmp_fn(mem, size / 2, &comm_pipes));
1268 break;
1269 case ANON_THP_COLLAPSE_UPPER_SHARED:
1270 exit(child_memcmp_fn(mem + size / 2, size / 2,
1271 &comm_pipes));
1272 break;
1273 default:
1274 assert(false);
1275 }
1276 }
1277
1278 while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
1279 ;
1280
1281 switch (test) {
1282 case ANON_THP_COLLAPSE_UNSHARED:
1283 break;
1284 case ANON_THP_COLLAPSE_UPPER_SHARED:
1285 case ANON_THP_COLLAPSE_LOWER_SHARED:
1286 /*
1287 * Revert MADV_DONTFORK such that we merge the VMAs and are
1288 * able to actually collapse.
1289 */
1290 ret = madvise(mem, size, MADV_DOFORK);
1291 if (ret) {
1292 ksft_test_result_fail("MADV_DOFORK failed\n");
1293 write(comm_pipes.parent_ready[1], "0", 1);
1294 wait(&ret);
1295 goto close_comm_pipes;
1296 }
1297 /* FALLTHROUGH */
1298 case ANON_THP_COLLAPSE_FULLY_SHARED:
1299 /* Collapse before anyone modified the COW-shared page. */
1300 ret = madvise(mem, size, MADV_COLLAPSE);
1301 if (ret) {
1302 ksft_test_result_skip("MADV_COLLAPSE failed: %s\n",
1303 strerror(errno));
1304 write(comm_pipes.parent_ready[1], "0", 1);
1305 wait(&ret);
1306 goto close_comm_pipes;
1307 }
1308 break;
1309 default:
1310 assert(false);
1311 }
1312
1313 /* Modify the page. */
1314 memset(mem, 0xff, size);
1315 write(comm_pipes.parent_ready[1], "0", 1);
1316
1317 wait(&ret);
1318 if (WIFEXITED(ret))
1319 ret = WEXITSTATUS(ret);
1320 else
1321 ret = -EINVAL;
1322
1323 ksft_test_result(!ret, "No leak from parent into child\n");
1324 close_comm_pipes:
1325 close_comm_pipes(&comm_pipes);
1326 }
1327
test_anon_thp_collapse_unshared(char * mem,size_t size)1328 static void test_anon_thp_collapse_unshared(char *mem, size_t size)
1329 {
1330 do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_UNSHARED);
1331 }
1332
test_anon_thp_collapse_fully_shared(char * mem,size_t size)1333 static void test_anon_thp_collapse_fully_shared(char *mem, size_t size)
1334 {
1335 do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_FULLY_SHARED);
1336 }
1337
test_anon_thp_collapse_lower_shared(char * mem,size_t size)1338 static void test_anon_thp_collapse_lower_shared(char *mem, size_t size)
1339 {
1340 do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_LOWER_SHARED);
1341 }
1342
test_anon_thp_collapse_upper_shared(char * mem,size_t size)1343 static void test_anon_thp_collapse_upper_shared(char *mem, size_t size)
1344 {
1345 do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_UPPER_SHARED);
1346 }
1347
1348 /*
1349 * Test cases that are specific to anonymous THP: pages in private mappings
1350 * that may get shared via COW during fork().
1351 */
1352 static const struct test_case anon_thp_test_cases[] = {
1353 /*
1354 * Basic COW test for fork() without any GUP when collapsing a THP
1355 * before fork().
1356 *
1357 * Re-mapping a PTE-mapped anon THP using a single PMD ("in-place
1358 * collapse") might easily get COW handling wrong when not collapsing
1359 * exclusivity information properly.
1360 */
1361 {
1362 "Basic COW after fork() when collapsing before fork()",
1363 test_anon_thp_collapse_unshared,
1364 },
1365 /* Basic COW test, but collapse after COW-sharing a full THP. */
1366 {
1367 "Basic COW after fork() when collapsing after fork() (fully shared)",
1368 test_anon_thp_collapse_fully_shared,
1369 },
1370 /*
1371 * Basic COW test, but collapse after COW-sharing the lower half of a
1372 * THP.
1373 */
1374 {
1375 "Basic COW after fork() when collapsing after fork() (lower shared)",
1376 test_anon_thp_collapse_lower_shared,
1377 },
1378 /*
1379 * Basic COW test, but collapse after COW-sharing the upper half of a
1380 * THP.
1381 */
1382 {
1383 "Basic COW after fork() when collapsing after fork() (upper shared)",
1384 test_anon_thp_collapse_upper_shared,
1385 },
1386 };
1387
run_anon_thp_test_cases(void)1388 static void run_anon_thp_test_cases(void)
1389 {
1390 int i;
1391
1392 if (!thpsize)
1393 return;
1394
1395 ksft_print_msg("[INFO] Anonymous THP tests\n");
1396
1397 for (i = 0; i < ARRAY_SIZE(anon_thp_test_cases); i++) {
1398 struct test_case const *test_case = &anon_thp_test_cases[i];
1399
1400 ksft_print_msg("[RUN] %s\n", test_case->desc);
1401 do_run_with_thp(test_case->fn, THP_RUN_PMD);
1402 }
1403 }
1404
tests_per_anon_thp_test_case(void)1405 static int tests_per_anon_thp_test_case(void)
1406 {
1407 return thpsize ? 1 : 0;
1408 }
1409
1410 typedef void (*non_anon_test_fn)(char *mem, const char *smem, size_t size);
1411
test_cow(char * mem,const char * smem,size_t size)1412 static void test_cow(char *mem, const char *smem, size_t size)
1413 {
1414 char *old = malloc(size);
1415
1416 /* Backup the original content. */
1417 memcpy(old, smem, size);
1418
1419 /* Modify the page. */
1420 memset(mem, 0xff, size);
1421
1422 /* See if we still read the old values via the other mapping. */
1423 ksft_test_result(!memcmp(smem, old, size),
1424 "Other mapping not modified\n");
1425 free(old);
1426 }
1427
test_ro_pin(char * mem,const char * smem,size_t size)1428 static void test_ro_pin(char *mem, const char *smem, size_t size)
1429 {
1430 do_test_ro_pin(mem, size, RO_PIN_TEST, false);
1431 }
1432
test_ro_fast_pin(char * mem,const char * smem,size_t size)1433 static void test_ro_fast_pin(char *mem, const char *smem, size_t size)
1434 {
1435 do_test_ro_pin(mem, size, RO_PIN_TEST, true);
1436 }
1437
run_with_zeropage(non_anon_test_fn fn,const char * desc)1438 static void run_with_zeropage(non_anon_test_fn fn, const char *desc)
1439 {
1440 char *mem, *smem, tmp;
1441
1442 ksft_print_msg("[RUN] %s ... with shared zeropage\n", desc);
1443
1444 mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE,
1445 MAP_PRIVATE | MAP_ANON, -1, 0);
1446 if (mem == MAP_FAILED) {
1447 ksft_test_result_fail("mmap() failed\n");
1448 return;
1449 }
1450
1451 smem = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0);
1452 if (mem == MAP_FAILED) {
1453 ksft_test_result_fail("mmap() failed\n");
1454 goto munmap;
1455 }
1456
1457 /* Read from the page to populate the shared zeropage. */
1458 tmp = *mem + *smem;
1459 asm volatile("" : "+r" (tmp));
1460
1461 fn(mem, smem, pagesize);
1462 munmap:
1463 munmap(mem, pagesize);
1464 if (smem != MAP_FAILED)
1465 munmap(smem, pagesize);
1466 }
1467
run_with_huge_zeropage(non_anon_test_fn fn,const char * desc)1468 static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc)
1469 {
1470 char *mem, *smem, *mmap_mem, *mmap_smem, tmp;
1471 size_t mmap_size;
1472 int ret;
1473
1474 ksft_print_msg("[RUN] %s ... with huge zeropage\n", desc);
1475
1476 if (!has_huge_zeropage) {
1477 ksft_test_result_skip("Huge zeropage not enabled\n");
1478 return;
1479 }
1480
1481 /* For alignment purposes, we need twice the thp size. */
1482 mmap_size = 2 * thpsize;
1483 mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
1484 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
1485 if (mmap_mem == MAP_FAILED) {
1486 ksft_test_result_fail("mmap() failed\n");
1487 return;
1488 }
1489 mmap_smem = mmap(NULL, mmap_size, PROT_READ,
1490 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
1491 if (mmap_smem == MAP_FAILED) {
1492 ksft_test_result_fail("mmap() failed\n");
1493 goto munmap;
1494 }
1495
1496 /* We need a THP-aligned memory area. */
1497 mem = (char *)(((uintptr_t)mmap_mem + thpsize) & ~(thpsize - 1));
1498 smem = (char *)(((uintptr_t)mmap_smem + thpsize) & ~(thpsize - 1));
1499
1500 ret = madvise(mem, thpsize, MADV_HUGEPAGE);
1501 ret |= madvise(smem, thpsize, MADV_HUGEPAGE);
1502 if (ret) {
1503 ksft_test_result_fail("MADV_HUGEPAGE failed\n");
1504 goto munmap;
1505 }
1506
1507 /*
1508 * Read from the memory to populate the huge shared zeropage. Read from
1509 * the first sub-page and test if we get another sub-page populated
1510 * automatically.
1511 */
1512 tmp = *mem + *smem;
1513 asm volatile("" : "+r" (tmp));
1514 if (!pagemap_is_populated(pagemap_fd, mem + pagesize) ||
1515 !pagemap_is_populated(pagemap_fd, smem + pagesize)) {
1516 ksft_test_result_skip("Did not get THPs populated\n");
1517 goto munmap;
1518 }
1519
1520 fn(mem, smem, thpsize);
1521 munmap:
1522 munmap(mmap_mem, mmap_size);
1523 if (mmap_smem != MAP_FAILED)
1524 munmap(mmap_smem, mmap_size);
1525 }
1526
run_with_memfd(non_anon_test_fn fn,const char * desc)1527 static void run_with_memfd(non_anon_test_fn fn, const char *desc)
1528 {
1529 char *mem, *smem, tmp;
1530 int fd;
1531
1532 ksft_print_msg("[RUN] %s ... with memfd\n", desc);
1533
1534 fd = memfd_create("test", 0);
1535 if (fd < 0) {
1536 ksft_test_result_fail("memfd_create() failed\n");
1537 return;
1538 }
1539
1540 /* File consists of a single page filled with zeroes. */
1541 if (fallocate(fd, 0, 0, pagesize)) {
1542 ksft_test_result_fail("fallocate() failed\n");
1543 goto close;
1544 }
1545
1546 /* Create a private mapping of the memfd. */
1547 mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
1548 if (mem == MAP_FAILED) {
1549 ksft_test_result_fail("mmap() failed\n");
1550 goto close;
1551 }
1552 smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0);
1553 if (mem == MAP_FAILED) {
1554 ksft_test_result_fail("mmap() failed\n");
1555 goto munmap;
1556 }
1557
1558 /* Fault the page in. */
1559 tmp = *mem + *smem;
1560 asm volatile("" : "+r" (tmp));
1561
1562 fn(mem, smem, pagesize);
1563 munmap:
1564 munmap(mem, pagesize);
1565 if (smem != MAP_FAILED)
1566 munmap(smem, pagesize);
1567 close:
1568 close(fd);
1569 }
1570
run_with_tmpfile(non_anon_test_fn fn,const char * desc)1571 static void run_with_tmpfile(non_anon_test_fn fn, const char *desc)
1572 {
1573 char *mem, *smem, tmp;
1574 FILE *file;
1575 int fd;
1576
1577 ksft_print_msg("[RUN] %s ... with tmpfile\n", desc);
1578
1579 file = tmpfile();
1580 if (!file) {
1581 ksft_test_result_fail("tmpfile() failed\n");
1582 return;
1583 }
1584
1585 fd = fileno(file);
1586 if (fd < 0) {
1587 ksft_test_result_skip("fileno() failed\n");
1588 return;
1589 }
1590
1591 /* File consists of a single page filled with zeroes. */
1592 if (fallocate(fd, 0, 0, pagesize)) {
1593 ksft_test_result_fail("fallocate() failed\n");
1594 goto close;
1595 }
1596
1597 /* Create a private mapping of the memfd. */
1598 mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
1599 if (mem == MAP_FAILED) {
1600 ksft_test_result_fail("mmap() failed\n");
1601 goto close;
1602 }
1603 smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0);
1604 if (mem == MAP_FAILED) {
1605 ksft_test_result_fail("mmap() failed\n");
1606 goto munmap;
1607 }
1608
1609 /* Fault the page in. */
1610 tmp = *mem + *smem;
1611 asm volatile("" : "+r" (tmp));
1612
1613 fn(mem, smem, pagesize);
1614 munmap:
1615 munmap(mem, pagesize);
1616 if (smem != MAP_FAILED)
1617 munmap(smem, pagesize);
1618 close:
1619 fclose(file);
1620 }
1621
run_with_memfd_hugetlb(non_anon_test_fn fn,const char * desc,size_t hugetlbsize)1622 static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc,
1623 size_t hugetlbsize)
1624 {
1625 int flags = MFD_HUGETLB;
1626 char *mem, *smem, tmp;
1627 int fd;
1628
1629 ksft_print_msg("[RUN] %s ... with memfd hugetlb (%zu kB)\n", desc,
1630 hugetlbsize / 1024);
1631
1632 flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT;
1633
1634 fd = memfd_create("test", flags);
1635 if (fd < 0) {
1636 ksft_test_result_skip("memfd_create() failed\n");
1637 return;
1638 }
1639
1640 /* File consists of a single page filled with zeroes. */
1641 if (fallocate(fd, 0, 0, hugetlbsize)) {
1642 ksft_test_result_skip("need more free huge pages\n");
1643 goto close;
1644 }
1645
1646 /* Create a private mapping of the memfd. */
1647 mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd,
1648 0);
1649 if (mem == MAP_FAILED) {
1650 ksft_test_result_skip("need more free huge pages\n");
1651 goto close;
1652 }
1653 smem = mmap(NULL, hugetlbsize, PROT_READ, MAP_SHARED, fd, 0);
1654 if (mem == MAP_FAILED) {
1655 ksft_test_result_fail("mmap() failed\n");
1656 goto munmap;
1657 }
1658
1659 /* Fault the page in. */
1660 tmp = *mem + *smem;
1661 asm volatile("" : "+r" (tmp));
1662
1663 fn(mem, smem, hugetlbsize);
1664 munmap:
1665 munmap(mem, hugetlbsize);
1666 if (mem != MAP_FAILED)
1667 munmap(smem, hugetlbsize);
1668 close:
1669 close(fd);
1670 }
1671
1672 struct non_anon_test_case {
1673 const char *desc;
1674 non_anon_test_fn fn;
1675 };
1676
1677 /*
1678 * Test cases that target any pages in private mappings that are not anonymous:
1679 * pages that may get shared via COW ndependent of fork(). This includes
1680 * the shared zeropage(s), pagecache pages, ...
1681 */
1682 static const struct non_anon_test_case non_anon_test_cases[] = {
1683 /*
1684 * Basic COW test without any GUP. If we miss to break COW, changes are
1685 * visible via other private/shared mappings.
1686 */
1687 {
1688 "Basic COW",
1689 test_cow,
1690 },
1691 /*
1692 * Take a R/O longterm pin. When modifying the page via the page table,
1693 * the page content change must be visible via the pin.
1694 */
1695 {
1696 "R/O longterm GUP pin",
1697 test_ro_pin,
1698 },
1699 /* Same as above, but using GUP-fast. */
1700 {
1701 "R/O longterm GUP-fast pin",
1702 test_ro_fast_pin,
1703 },
1704 };
1705
run_non_anon_test_case(struct non_anon_test_case const * test_case)1706 static void run_non_anon_test_case(struct non_anon_test_case const *test_case)
1707 {
1708 int i;
1709
1710 run_with_zeropage(test_case->fn, test_case->desc);
1711 run_with_memfd(test_case->fn, test_case->desc);
1712 run_with_tmpfile(test_case->fn, test_case->desc);
1713 if (thpsize)
1714 run_with_huge_zeropage(test_case->fn, test_case->desc);
1715 for (i = 0; i < nr_hugetlbsizes; i++)
1716 run_with_memfd_hugetlb(test_case->fn, test_case->desc,
1717 hugetlbsizes[i]);
1718 }
1719
run_non_anon_test_cases(void)1720 static void run_non_anon_test_cases(void)
1721 {
1722 int i;
1723
1724 ksft_print_msg("[RUN] Non-anonymous memory tests in private mappings\n");
1725
1726 for (i = 0; i < ARRAY_SIZE(non_anon_test_cases); i++)
1727 run_non_anon_test_case(&non_anon_test_cases[i]);
1728 }
1729
tests_per_non_anon_test_case(void)1730 static int tests_per_non_anon_test_case(void)
1731 {
1732 int tests = 3 + nr_hugetlbsizes;
1733
1734 if (thpsize)
1735 tests += 1;
1736 return tests;
1737 }
1738
main(int argc,char ** argv)1739 int main(int argc, char **argv)
1740 {
1741 int err;
1742
1743 pagesize = getpagesize();
1744 detect_thpsize();
1745 detect_hugetlbsizes();
1746 detect_huge_zeropage();
1747
1748 ksft_print_header();
1749 ksft_set_plan(ARRAY_SIZE(anon_test_cases) * tests_per_anon_test_case() +
1750 ARRAY_SIZE(anon_thp_test_cases) * tests_per_anon_thp_test_case() +
1751 ARRAY_SIZE(non_anon_test_cases) * tests_per_non_anon_test_case());
1752
1753 gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR);
1754 pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
1755 if (pagemap_fd < 0)
1756 ksft_exit_fail_msg("opening pagemap failed\n");
1757
1758 run_anon_test_cases();
1759 run_anon_thp_test_cases();
1760 run_non_anon_test_cases();
1761
1762 err = ksft_get_fail_cnt();
1763 if (err)
1764 ksft_exit_fail_msg("%d out of %d tests failed\n",
1765 err, ksft_test_num());
1766 return ksft_exit_pass();
1767 }
1768