1 /******************************************************************************
2 *
3 * Domain paging.
4 * Copyright (c) 2009 by Citrix Systems, Inc. (Patrick Colp)
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; If not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #define _GNU_SOURCE
21
22 #include <inttypes.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <stdarg.h>
26 #include <time.h>
27 #include <signal.h>
28 #include <unistd.h>
29 #include <poll.h>
30 #include <xenstore.h>
31 #include <getopt.h>
32
33 #include "xc_bitops.h"
34 #include "file_ops.h"
35 #include "policy.h"
36 #include "xenpaging.h"
37
38 /* Defines number of mfns a guest should use at a time, in KiB */
39 #define WATCH_TARGETPAGES "memory/target-tot_pages"
40 static char *watch_target_tot_pages;
41 static char *dom_path;
42 static char watch_token[16];
43 static char *filename;
44 static int interrupted;
45
unlink_pagefile(void)46 static void unlink_pagefile(void)
47 {
48 if ( filename && filename[0] )
49 {
50 unlink(filename);
51 filename[0] = '\0';
52 }
53 }
54
close_handler(int sig)55 static void close_handler(int sig)
56 {
57 interrupted = sig;
58 unlink_pagefile();
59 }
60
xenpaging_mem_paging_flush_ioemu_cache(struct xenpaging * paging)61 static void xenpaging_mem_paging_flush_ioemu_cache(struct xenpaging *paging)
62 {
63 struct xs_handle *xsh = paging->xs_handle;
64 domid_t domain_id = paging->vm_event.domain_id;
65 char path[80];
66
67 sprintf(path, "/local/domain/0/device-model/%u/command", domain_id);
68
69 xs_write(xsh, XBT_NULL, path, "flush-cache", strlen("flush-cache"));
70 }
71
xenpaging_wait_for_event_or_timeout(struct xenpaging * paging)72 static int xenpaging_wait_for_event_or_timeout(struct xenpaging *paging)
73 {
74 xc_interface *xch = paging->xc_handle;
75 xenevtchn_handle *xce = paging->vm_event.xce_handle;
76 char **vec, *val;
77 unsigned int num;
78 struct pollfd fd[2];
79 int port;
80 int rc;
81 int timeout;
82
83 /* Wait for event channel and xenstore */
84 fd[0].fd = xenevtchn_fd(xce);
85 fd[0].events = POLLIN | POLLERR;
86 fd[1].fd = xs_fileno(paging->xs_handle);
87 fd[1].events = POLLIN | POLLERR;
88
89 /* No timeout while page-out is still in progress */
90 timeout = paging->use_poll_timeout ? 100 : 0;
91 rc = poll(fd, 2, timeout);
92 if ( rc < 0 )
93 {
94 if (errno == EINTR)
95 return 0;
96
97 PERROR("Poll exited with an error");
98 return -1;
99 }
100
101 /* First check for guest shutdown */
102 if ( rc && fd[1].revents & POLLIN )
103 {
104 DPRINTF("Got event from xenstore\n");
105 vec = xs_read_watch(paging->xs_handle, &num);
106 if ( vec )
107 {
108 DPRINTF("path '%s' token '%s'\n", vec[XS_WATCH_PATH], vec[XS_WATCH_TOKEN]);
109 if ( strcmp(vec[XS_WATCH_TOKEN], watch_token) == 0 )
110 {
111 /* If our guest disappeared, set interrupt flag and fall through */
112 if ( xs_is_domain_introduced(paging->xs_handle, paging->vm_event.domain_id) == false )
113 {
114 xs_unwatch(paging->xs_handle, "@releaseDomain", watch_token);
115 interrupted = SIGQUIT;
116 /* No further poll result processing */
117 rc = 0;
118 }
119 }
120 else if ( strcmp(vec[XS_WATCH_PATH], watch_target_tot_pages) == 0 )
121 {
122 int ret, target_tot_pages;
123 val = xs_read(paging->xs_handle, XBT_NULL, vec[XS_WATCH_PATH], NULL);
124 if ( val )
125 {
126 ret = sscanf(val, "%d", &target_tot_pages);
127 if ( ret > 0 )
128 {
129 /* KiB to pages */
130 target_tot_pages >>= 2;
131 if ( target_tot_pages < 0 || target_tot_pages > paging->max_pages )
132 target_tot_pages = paging->max_pages;
133 paging->target_tot_pages = target_tot_pages;
134 /* Disable poll() delay while new target is not yet reached */
135 paging->use_poll_timeout = 0;
136 DPRINTF("new target_tot_pages %d\n", target_tot_pages);
137 }
138 free(val);
139 }
140 }
141 free(vec);
142 }
143 }
144
145 if ( rc && fd[0].revents & POLLIN )
146 {
147 DPRINTF("Got event from evtchn\n");
148 port = xenevtchn_pending(xce);
149 if ( port == -1 )
150 {
151 PERROR("Failed to read port from event channel");
152 rc = -1;
153 goto err;
154 }
155
156 rc = xenevtchn_unmask(xce, port);
157 if ( rc < 0 )
158 {
159 PERROR("Failed to unmask event channel port");
160 }
161 }
162 err:
163 return rc;
164 }
165
xenpaging_get_tot_pages(struct xenpaging * paging)166 static int xenpaging_get_tot_pages(struct xenpaging *paging)
167 {
168 xc_interface *xch = paging->xc_handle;
169 xc_domaininfo_t domain_info;
170 int rc;
171
172 rc = xc_domain_getinfolist(xch, paging->vm_event.domain_id, 1, &domain_info);
173 if ( rc != 1 )
174 {
175 PERROR("Error getting domain info");
176 return -1;
177 }
178 return domain_info.tot_pages;
179 }
180
init_page(void)181 static void *init_page(void)
182 {
183 void *buffer;
184
185 /* Allocated page memory */
186 errno = posix_memalign(&buffer, PAGE_SIZE, PAGE_SIZE);
187 if ( errno != 0 )
188 return NULL;
189
190 /* Lock buffer in memory so it can't be paged out */
191 if ( mlock(buffer, PAGE_SIZE) < 0 )
192 {
193 free(buffer);
194 buffer = NULL;
195 }
196
197 return buffer;
198 }
199
usage(void)200 static void usage(void)
201 {
202 printf("usage:\n\n");
203
204 printf(" xenpaging [options] -f <pagefile> -d <domain_id>\n\n");
205
206 printf("options:\n");
207 printf(" -d <domid> --domain=<domid> numerical domain_id of guest. This option is required.\n");
208 printf(" -f <file> --pagefile=<file> pagefile to use. This option is required.\n");
209 printf(" -m <max_memkb> --max_memkb=<max_memkb> maximum amount of memory to handle.\n");
210 printf(" -r <num> --mru_size=<num> number of paged-in pages to keep in memory.\n");
211 printf(" -v --verbose enable debug output.\n");
212 printf(" -h --help this output.\n");
213 }
214
xenpaging_getopts(struct xenpaging * paging,int argc,char * argv[])215 static int xenpaging_getopts(struct xenpaging *paging, int argc, char *argv[])
216 {
217 int ch;
218 static const char sopts[] = "hvd:f:m:r:";
219 static const struct option lopts[] = {
220 {"help", 0, NULL, 'h'},
221 {"verbose", 0, NULL, 'v'},
222 {"domain", 1, NULL, 'd'},
223 {"pagefile", 1, NULL, 'f'},
224 {"mru_size", 1, NULL, 'm'},
225 { }
226 };
227
228 while ((ch = getopt_long(argc, argv, sopts, lopts, NULL)) != -1)
229 {
230 switch(ch) {
231 case 'd':
232 paging->vm_event.domain_id = atoi(optarg);
233 break;
234 case 'f':
235 free(filename);
236 filename = strdup(optarg);
237 break;
238 case 'm':
239 /* KiB to pages */
240 paging->max_pages = atoi(optarg) >> 2;
241 break;
242 case 'r':
243 paging->policy_mru_size = atoi(optarg);
244 break;
245 case 'v':
246 paging->debug = 1;
247 break;
248 case 'h':
249 case '?':
250 usage();
251 return 1;
252 }
253 }
254
255 argv += optind; argc -= optind;
256
257 /* Path to pagefile is required */
258 if ( !filename )
259 {
260 printf("Filename for pagefile missing!\n");
261 usage();
262 return 1;
263 }
264
265 /* Set domain id */
266 if ( !paging->vm_event.domain_id )
267 {
268 printf("Numerical <domain_id> missing!\n");
269 return 1;
270 }
271
272 return 0;
273 }
274
xenpaging_init(int argc,char * argv[])275 static struct xenpaging *xenpaging_init(int argc, char *argv[])
276 {
277 struct xenpaging *paging;
278 xc_domaininfo_t domain_info;
279 xc_interface *xch = NULL;
280 xentoollog_logger *dbg = NULL;
281 char *p;
282 int rc;
283 unsigned long ring_pfn, mmap_pfn;
284
285 /* Allocate memory */
286 paging = calloc(1, sizeof(struct xenpaging));
287 if ( !paging )
288 goto err;
289
290 /* Get cmdline options and domain_id */
291 if ( xenpaging_getopts(paging, argc, argv) )
292 goto err;
293
294 /* Enable debug output */
295 if ( paging->debug )
296 dbg = (xentoollog_logger *)xtl_createlogger_stdiostream(stderr, XTL_DEBUG, 0);
297
298 /* Open connection to xen */
299 paging->xc_handle = xch = xc_interface_open(dbg, NULL, 0);
300 if ( !xch )
301 goto err;
302
303 DPRINTF("xenpaging init\n");
304
305 /* Open connection to xenstore */
306 paging->xs_handle = xs_open(0);
307 if ( paging->xs_handle == NULL )
308 {
309 PERROR("Error initialising xenstore connection");
310 goto err;
311 }
312
313 /* write domain ID to watch so we can ignore other domain shutdowns */
314 snprintf(watch_token, sizeof(watch_token), "%u", paging->vm_event.domain_id);
315 if ( xs_watch(paging->xs_handle, "@releaseDomain", watch_token) == false )
316 {
317 PERROR("Could not bind to shutdown watch\n");
318 goto err;
319 }
320
321 /* Watch xenpagings working target */
322 dom_path = xs_get_domain_path(paging->xs_handle, paging->vm_event.domain_id);
323 if ( !dom_path )
324 {
325 PERROR("Could not find domain path\n");
326 goto err;
327 }
328 if ( asprintf(&watch_target_tot_pages, "%s/%s", dom_path, WATCH_TARGETPAGES) < 0 )
329 {
330 PERROR("Could not alloc watch path\n");
331 goto err;
332 }
333 DPRINTF("watching '%s'\n", watch_target_tot_pages);
334 if ( xs_watch(paging->xs_handle, watch_target_tot_pages, "") == false )
335 {
336 PERROR("Could not bind to xenpaging watch\n");
337 goto err;
338 }
339
340 /* Map the ring page */
341 xc_get_hvm_param(xch, paging->vm_event.domain_id,
342 HVM_PARAM_PAGING_RING_PFN, &ring_pfn);
343 mmap_pfn = ring_pfn;
344 paging->vm_event.ring_page =
345 xc_map_foreign_pages(xch, paging->vm_event.domain_id,
346 PROT_READ | PROT_WRITE, &mmap_pfn, 1);
347 if ( !paging->vm_event.ring_page )
348 {
349 /* Map failed, populate ring page */
350 rc = xc_domain_populate_physmap_exact(paging->xc_handle,
351 paging->vm_event.domain_id,
352 1, 0, 0, &ring_pfn);
353 if ( rc != 0 )
354 {
355 PERROR("Failed to populate ring gfn\n");
356 goto err;
357 }
358
359 paging->vm_event.ring_page =
360 xc_map_foreign_pages(xch, paging->vm_event.domain_id,
361 PROT_READ | PROT_WRITE,
362 &mmap_pfn, 1);
363 if ( !paging->vm_event.ring_page )
364 {
365 PERROR("Could not map the ring page\n");
366 goto err;
367 }
368 }
369
370 /* Initialise Xen */
371 rc = xc_mem_paging_enable(xch, paging->vm_event.domain_id,
372 &paging->vm_event.evtchn_port);
373 if ( rc != 0 )
374 {
375 switch ( errno ) {
376 case EBUSY:
377 ERROR("xenpaging is (or was) active on this domain");
378 break;
379 case ENODEV:
380 ERROR("xenpaging requires Hardware Assisted Paging");
381 break;
382 case EMLINK:
383 ERROR("xenpaging not supported while iommu passthrough is enabled");
384 break;
385 case EXDEV:
386 ERROR("xenpaging not supported in a PoD guest");
387 break;
388 default:
389 PERROR("Error initialising shared page");
390 break;
391 }
392 goto err;
393 }
394
395 /* Open event channel */
396 paging->vm_event.xce_handle = xenevtchn_open(NULL, 0);
397 if ( paging->vm_event.xce_handle == NULL )
398 {
399 PERROR("Failed to open event channel");
400 goto err;
401 }
402
403 /* Bind event notification */
404 rc = xenevtchn_bind_interdomain(paging->vm_event.xce_handle,
405 paging->vm_event.domain_id,
406 paging->vm_event.evtchn_port);
407 if ( rc < 0 )
408 {
409 PERROR("Failed to bind event channel");
410 goto err;
411 }
412
413 paging->vm_event.port = rc;
414
415 /* Initialise ring */
416 SHARED_RING_INIT((vm_event_sring_t *)paging->vm_event.ring_page);
417 BACK_RING_INIT(&paging->vm_event.back_ring,
418 (vm_event_sring_t *)paging->vm_event.ring_page,
419 PAGE_SIZE);
420
421 /* Now that the ring is set, remove it from the guest's physmap */
422 if ( xc_domain_decrease_reservation_exact(xch,
423 paging->vm_event.domain_id, 1, 0, &ring_pfn) )
424 PERROR("Failed to remove ring from guest physmap");
425
426 /* Get max_pages from guest if not provided via cmdline */
427 if ( !paging->max_pages )
428 {
429 rc = xc_domain_getinfolist(xch, paging->vm_event.domain_id, 1,
430 &domain_info);
431 if ( rc != 1 )
432 {
433 PERROR("Error getting domain info");
434 goto err;
435 }
436
437 /* Record number of max_pages */
438 paging->max_pages = domain_info.max_pages;
439 }
440
441 /* Allocate bitmap for tracking pages that have been paged out */
442 paging->bitmap = bitmap_alloc(paging->max_pages);
443 if ( !paging->bitmap )
444 {
445 PERROR("Error allocating bitmap");
446 goto err;
447 }
448 DPRINTF("max_pages = %d\n", paging->max_pages);
449
450 /* Allocate indicies for pagefile slots */
451 paging->slot_to_gfn = calloc(paging->max_pages, sizeof(*paging->slot_to_gfn));
452 paging->gfn_to_slot = calloc(paging->max_pages, sizeof(*paging->gfn_to_slot));
453 if ( !paging->slot_to_gfn || !paging->gfn_to_slot )
454 goto err;
455
456 /* Allocate stack for known free slots in pagefile */
457 paging->free_slot_stack = calloc(paging->max_pages, sizeof(*paging->free_slot_stack));
458 if ( !paging->free_slot_stack )
459 goto err;
460
461 /* Initialise policy */
462 rc = policy_init(paging);
463 if ( rc != 0 )
464 {
465 PERROR("Error initialising policy");
466 goto err;
467 }
468
469 paging->paging_buffer = init_page();
470 if ( !paging->paging_buffer )
471 {
472 PERROR("Creating page aligned load buffer");
473 goto err;
474 }
475
476 /* Open file */
477 paging->fd = open(filename, O_CREAT | O_TRUNC | O_RDWR, S_IRUSR | S_IWUSR);
478 if ( paging->fd < 0 )
479 {
480 PERROR("failed to open file");
481 goto err;
482 }
483
484 return paging;
485
486 err:
487 if ( paging )
488 {
489 if ( paging->xs_handle )
490 xs_close(paging->xs_handle);
491 if ( xch )
492 xc_interface_close(xch);
493 if ( paging->paging_buffer )
494 {
495 munlock(paging->paging_buffer, PAGE_SIZE);
496 free(paging->paging_buffer);
497 }
498
499 if ( paging->vm_event.ring_page )
500 {
501 munmap(paging->vm_event.ring_page, PAGE_SIZE);
502 }
503
504 free(dom_path);
505 free(watch_target_tot_pages);
506 free(paging->free_slot_stack);
507 free(paging->slot_to_gfn);
508 free(paging->gfn_to_slot);
509 free(paging->bitmap);
510 free(paging);
511 }
512
513 return NULL;
514 }
515
xenpaging_teardown(struct xenpaging * paging)516 static void xenpaging_teardown(struct xenpaging *paging)
517 {
518 int rc;
519 xc_interface *xch = paging->xc_handle;
520
521 xs_unwatch(paging->xs_handle, watch_target_tot_pages, "");
522 xs_unwatch(paging->xs_handle, "@releaseDomain", watch_token);
523
524 paging->xc_handle = NULL;
525 /* Tear down domain paging in Xen */
526 munmap(paging->vm_event.ring_page, PAGE_SIZE);
527 rc = xc_mem_paging_disable(xch, paging->vm_event.domain_id);
528 if ( rc != 0 )
529 {
530 PERROR("Error tearing down domain paging in xen");
531 }
532
533 /* Unbind VIRQ */
534 rc = xenevtchn_unbind(paging->vm_event.xce_handle, paging->vm_event.port);
535 if ( rc != 0 )
536 {
537 PERROR("Error unbinding event port");
538 }
539 paging->vm_event.port = -1;
540
541 /* Close event channel */
542 rc = xenevtchn_close(paging->vm_event.xce_handle);
543 if ( rc != 0 )
544 {
545 PERROR("Error closing event channel");
546 }
547 paging->vm_event.xce_handle = NULL;
548
549 /* Close connection to xenstore */
550 xs_close(paging->xs_handle);
551
552 /* Close connection to Xen */
553 xc_interface_close(xch);
554 }
555
get_request(struct vm_event * vm_event,vm_event_request_t * req)556 static void get_request(struct vm_event *vm_event, vm_event_request_t *req)
557 {
558 vm_event_back_ring_t *back_ring;
559 RING_IDX req_cons;
560
561 back_ring = &vm_event->back_ring;
562 req_cons = back_ring->req_cons;
563
564 /* Copy request */
565 memcpy(req, RING_GET_REQUEST(back_ring, req_cons), sizeof(*req));
566 req_cons++;
567
568 /* Update ring */
569 back_ring->req_cons = req_cons;
570 back_ring->sring->req_event = req_cons + 1;
571 }
572
put_response(struct vm_event * vm_event,vm_event_response_t * rsp)573 static void put_response(struct vm_event *vm_event, vm_event_response_t *rsp)
574 {
575 vm_event_back_ring_t *back_ring;
576 RING_IDX rsp_prod;
577
578 back_ring = &vm_event->back_ring;
579 rsp_prod = back_ring->rsp_prod_pvt;
580
581 /* Copy response */
582 memcpy(RING_GET_RESPONSE(back_ring, rsp_prod), rsp, sizeof(*rsp));
583 rsp_prod++;
584
585 /* Update ring */
586 back_ring->rsp_prod_pvt = rsp_prod;
587 RING_PUSH_RESPONSES(back_ring);
588 }
589
590 /* Evict a given gfn
591 * Returns < 0 on fatal error
592 * Returns 0 on successful evict
593 * Returns > 0 if gfn can not be evicted
594 */
xenpaging_evict_page(struct xenpaging * paging,unsigned long gfn,int slot)595 static int xenpaging_evict_page(struct xenpaging *paging, unsigned long gfn, int slot)
596 {
597 xc_interface *xch = paging->xc_handle;
598 void *page;
599 xen_pfn_t victim = gfn;
600 int ret;
601
602 DECLARE_DOMCTL;
603
604 /* Nominate page */
605 ret = xc_mem_paging_nominate(xch, paging->vm_event.domain_id, gfn);
606 if ( ret < 0 )
607 {
608 /* unpageable gfn is indicated by EBUSY */
609 if ( errno == EBUSY )
610 ret = 1;
611 else
612 PERROR("Error nominating page %lx", gfn);
613 goto out;
614 }
615
616 /* Map page */
617 page = xc_map_foreign_pages(xch, paging->vm_event.domain_id, PROT_READ, &victim, 1);
618 if ( page == NULL )
619 {
620 PERROR("Error mapping page %lx", gfn);
621 ret = -1;
622 goto out;
623 }
624
625 /* Copy page */
626 ret = write_page(paging->fd, page, slot);
627 if ( ret < 0 )
628 {
629 PERROR("Error copying page %lx", gfn);
630 munmap(page, PAGE_SIZE);
631 ret = -1;
632 goto out;
633 }
634
635 /* Release page */
636 munmap(page, PAGE_SIZE);
637
638 /* Tell Xen to evict page */
639 ret = xc_mem_paging_evict(xch, paging->vm_event.domain_id, gfn);
640 if ( ret < 0 )
641 {
642 /* A gfn in use is indicated by EBUSY */
643 if ( errno == EBUSY )
644 {
645 ret = 1;
646 DPRINTF("Nominated page %lx busy", gfn);
647 } else
648 PERROR("Error evicting page %lx", gfn);
649 goto out;
650 }
651
652 DPRINTF("evict_page > gfn %lx pageslot %d\n", gfn, slot);
653 /* Notify policy of page being paged out */
654 policy_notify_paged_out(gfn);
655
656 /* Update index */
657 paging->slot_to_gfn[slot] = gfn;
658 paging->gfn_to_slot[gfn] = slot;
659
660 /* Record number of evicted pages */
661 paging->num_paged_out++;
662
663 ret = 0;
664
665 out:
666 return ret;
667 }
668
xenpaging_resume_page(struct xenpaging * paging,vm_event_response_t * rsp,int notify_policy)669 static int xenpaging_resume_page(struct xenpaging *paging, vm_event_response_t *rsp, int notify_policy)
670 {
671 /* Put the page info on the ring */
672 put_response(&paging->vm_event, rsp);
673
674 /* Notify policy of page being paged in */
675 if ( notify_policy )
676 {
677 /*
678 * Do not add gfn to mru list if the target is lower than mru size.
679 * This allows page-out of these gfns if the target grows again.
680 */
681 if (paging->num_paged_out > paging->policy_mru_size)
682 policy_notify_paged_in(rsp->u.mem_paging.gfn);
683 else
684 policy_notify_paged_in_nomru(rsp->u.mem_paging.gfn);
685
686 /* Record number of resumed pages */
687 paging->num_paged_out--;
688 }
689
690 /* Tell Xen page is ready */
691 return xenevtchn_notify(paging->vm_event.xce_handle, paging->vm_event.port);
692 }
693
xenpaging_populate_page(struct xenpaging * paging,unsigned long gfn,int i)694 static int xenpaging_populate_page(struct xenpaging *paging, unsigned long gfn, int i)
695 {
696 xc_interface *xch = paging->xc_handle;
697 int ret;
698 unsigned char oom = 0;
699
700 DPRINTF("populate_page < gfn %lx pageslot %d\n", gfn, i);
701
702 /* Read page */
703 ret = read_page(paging->fd, paging->paging_buffer, i);
704 if ( ret != 0 )
705 {
706 PERROR("Error reading page");
707 goto out;
708 }
709
710 do
711 {
712 /* Tell Xen to allocate a page for the domain */
713 ret = xc_mem_paging_load(xch, paging->vm_event.domain_id, gfn, paging->paging_buffer);
714 if ( ret < 0 )
715 {
716 if ( errno == ENOMEM )
717 {
718 if ( oom++ == 0 )
719 DPRINTF("ENOMEM while preparing gfn %lx\n", gfn);
720 sleep(1);
721 continue;
722 }
723 PERROR("Error loading %lx during page-in", gfn);
724 ret = -1;
725 break;
726 }
727 }
728 while ( ret && !interrupted );
729
730
731 out:
732 return ret;
733 }
734
735 /* Trigger a page-in for a batch of pages */
resume_pages(struct xenpaging * paging,int num_pages)736 static void resume_pages(struct xenpaging *paging, int num_pages)
737 {
738 xc_interface *xch = paging->xc_handle;
739 int i, num = 0;
740
741 for ( i = 0; i < paging->max_pages && num < num_pages; i++ )
742 {
743 if ( test_bit(i, paging->bitmap) )
744 {
745 paging->pagein_queue[num] = i;
746 num++;
747 if ( num == XENPAGING_PAGEIN_QUEUE_SIZE )
748 break;
749 }
750 }
751 /* num may be less than num_pages, caller has to try again */
752 if ( num )
753 page_in_trigger();
754 }
755
756 /* Evict one gfn and write it to the given slot
757 * Returns < 0 on fatal error
758 * Returns 0 on successful evict
759 * Returns > 0 if no gfn can be evicted
760 */
evict_victim(struct xenpaging * paging,int slot)761 static int evict_victim(struct xenpaging *paging, int slot)
762 {
763 xc_interface *xch = paging->xc_handle;
764 unsigned long gfn;
765 static int num_paged_out;
766 int ret;
767
768 do
769 {
770 gfn = policy_choose_victim(paging);
771 if ( gfn == INVALID_MFN )
772 {
773 /* If the number did not change after last flush command then
774 * the command did not reach qemu yet, or qemu still processes
775 * the command, or qemu has nothing to release.
776 * Right now there is no need to issue the command again.
777 */
778 if ( num_paged_out != paging->num_paged_out )
779 {
780 DPRINTF("Flushing qemu cache\n");
781 xenpaging_mem_paging_flush_ioemu_cache(paging);
782 num_paged_out = paging->num_paged_out;
783 }
784 ret = ENOSPC;
785 goto out;
786 }
787
788 if ( interrupted )
789 {
790 ret = EINTR;
791 goto out;
792 }
793
794 ret = xenpaging_evict_page(paging, gfn, slot);
795 if ( ret < 0 )
796 goto out;
797 }
798 while ( ret );
799
800 if ( test_and_set_bit(gfn, paging->bitmap) )
801 ERROR("Page %lx has been evicted before", gfn);
802
803 ret = 0;
804
805 out:
806 return ret;
807 }
808
809 /* Evict a batch of pages and write them to a free slot in the paging file
810 * Returns < 0 on fatal error
811 * Returns 0 if no gfn can be evicted
812 * Returns > 0 on successful evict
813 */
evict_pages(struct xenpaging * paging,int num_pages)814 static int evict_pages(struct xenpaging *paging, int num_pages)
815 {
816 xc_interface *xch = paging->xc_handle;
817 int rc, slot, num = 0;
818
819 /* Reuse known free slots */
820 while ( paging->stack_count > 0 && num < num_pages )
821 {
822 slot = paging->free_slot_stack[--paging->stack_count];
823 rc = evict_victim(paging, slot);
824 if ( rc )
825 {
826 num = rc < 0 ? -1 : num;
827 return num;
828 }
829 num++;
830 }
831
832 /* Scan all slots slots for remainders */
833 for ( slot = 0; slot < paging->max_pages && num < num_pages; slot++ )
834 {
835 /* Slot is allocated */
836 if ( paging->slot_to_gfn[slot] )
837 continue;
838
839 rc = evict_victim(paging, slot);
840 if ( rc )
841 {
842 num = rc < 0 ? -1 : num;
843 break;
844 }
845
846 num++;
847 }
848 return num;
849 }
850
main(int argc,char * argv[])851 int main(int argc, char *argv[])
852 {
853 struct sigaction act;
854 struct xenpaging *paging;
855 vm_event_request_t req;
856 vm_event_response_t rsp;
857 int num, prev_num = 0;
858 int slot;
859 int tot_pages;
860 int rc;
861 xc_interface *xch;
862
863 /* Initialise domain paging */
864 paging = xenpaging_init(argc, argv);
865 if ( paging == NULL )
866 {
867 fprintf(stderr, "Error initialising paging\n");
868 return 1;
869 }
870 xch = paging->xc_handle;
871
872 DPRINTF("starting %s for domain_id %u with pagefile %s\n",
873 argv[0], paging->vm_event.domain_id, filename);
874
875 /* ensure that if we get a signal, we'll do cleanup, then exit */
876 act.sa_handler = close_handler;
877 act.sa_flags = 0;
878 sigemptyset(&act.sa_mask);
879 sigaction(SIGHUP, &act, NULL);
880 sigaction(SIGTERM, &act, NULL);
881 sigaction(SIGINT, &act, NULL);
882 sigaction(SIGALRM, &act, NULL);
883
884 /* listen for page-in events to stop pager */
885 create_page_in_thread(paging);
886
887 /* Swap pages in and out */
888 while ( 1 )
889 {
890 /* Wait for Xen to signal that a page needs paged in */
891 rc = xenpaging_wait_for_event_or_timeout(paging);
892 if ( rc < 0 )
893 {
894 ERROR("Error getting event");
895 goto out;
896 }
897 else if ( rc != 0 )
898 {
899 DPRINTF("Got event from Xen\n");
900 }
901
902 while ( RING_HAS_UNCONSUMED_REQUESTS(&paging->vm_event.back_ring) )
903 {
904 /* Indicate possible error */
905 rc = 1;
906
907 get_request(&paging->vm_event, &req);
908
909 if ( req.u.mem_paging.gfn > paging->max_pages )
910 {
911 ERROR("Requested gfn %"PRIx64" higher than max_pages %x\n",
912 req.u.mem_paging.gfn, paging->max_pages);
913 goto out;
914 }
915
916 /* Check if the page has already been paged in */
917 if ( test_and_clear_bit(req.u.mem_paging.gfn, paging->bitmap) )
918 {
919 /* Find where in the paging file to read from */
920 slot = paging->gfn_to_slot[req.u.mem_paging.gfn];
921
922 /* Sanity check */
923 if ( paging->slot_to_gfn[slot] != req.u.mem_paging.gfn )
924 {
925 ERROR("Expected gfn %"PRIx64" in slot %d, but found gfn %lx\n",
926 req.u.mem_paging.gfn, slot, paging->slot_to_gfn[slot]);
927 goto out;
928 }
929
930 if ( req.u.mem_paging.flags & MEM_PAGING_DROP_PAGE )
931 {
932 DPRINTF("drop_page ^ gfn %"PRIx64" pageslot %d\n",
933 req.u.mem_paging.gfn, slot);
934 /* Notify policy of page being dropped */
935 policy_notify_dropped(req.u.mem_paging.gfn);
936 }
937 else
938 {
939 /* Populate the page */
940 if ( xenpaging_populate_page(paging, req.u.mem_paging.gfn, slot) < 0 )
941 {
942 ERROR("Error populating page %"PRIx64"", req.u.mem_paging.gfn);
943 goto out;
944 }
945 }
946
947 /* Prepare the response */
948 rsp.u.mem_paging.gfn = req.u.mem_paging.gfn;
949 rsp.vcpu_id = req.vcpu_id;
950 rsp.flags = req.flags;
951
952 if ( xenpaging_resume_page(paging, &rsp, 1) < 0 )
953 {
954 PERROR("Error resuming page %"PRIx64"", req.u.mem_paging.gfn);
955 goto out;
956 }
957
958 /* Clear this pagefile slot */
959 paging->slot_to_gfn[slot] = 0;
960
961 /* Record this free slot */
962 paging->free_slot_stack[paging->stack_count++] = slot;
963 }
964 else
965 {
966 DPRINTF("page %s populated (domain = %d; vcpu = %d;"
967 " gfn = %"PRIx64"; paused = %d; evict_fail = %d)\n",
968 req.u.mem_paging.flags & MEM_PAGING_EVICT_FAIL ? "not" : "already",
969 paging->vm_event.domain_id, req.vcpu_id, req.u.mem_paging.gfn,
970 !!(req.flags & VM_EVENT_FLAG_VCPU_PAUSED) ,
971 !!(req.u.mem_paging.flags & MEM_PAGING_EVICT_FAIL) );
972
973 /* Tell Xen to resume the vcpu */
974 if (( req.flags & VM_EVENT_FLAG_VCPU_PAUSED ) ||
975 ( req.u.mem_paging.flags & MEM_PAGING_EVICT_FAIL ))
976 {
977 /* Prepare the response */
978 rsp.u.mem_paging.gfn = req.u.mem_paging.gfn;
979 rsp.vcpu_id = req.vcpu_id;
980 rsp.flags = req.flags;
981
982 if ( xenpaging_resume_page(paging, &rsp, 0) < 0 )
983 {
984 PERROR("Error resuming page %"PRIx64"", req.u.mem_paging.gfn);
985 goto out;
986 }
987 }
988 }
989 }
990
991 /* If interrupted, write all pages back into the guest */
992 if ( interrupted == SIGTERM || interrupted == SIGINT )
993 {
994 /* If no more pages to process, exit loop. */
995 if ( !paging->num_paged_out )
996 break;
997
998 /* One more round if there are still pages to process. */
999 resume_pages(paging, paging->num_paged_out);
1000
1001 /* Resume main loop */
1002 continue;
1003 }
1004
1005 /* Exit main loop on any other signal */
1006 if ( interrupted )
1007 break;
1008
1009 /* Indicate possible error */
1010 rc = 1;
1011
1012 /* Check if the target has been reached already */
1013 tot_pages = xenpaging_get_tot_pages(paging);
1014 if ( tot_pages < 0 )
1015 goto out;
1016
1017 /* Resume all pages if paging is disabled or no target was set */
1018 if ( paging->target_tot_pages == 0 )
1019 {
1020 if ( paging->num_paged_out )
1021 resume_pages(paging, paging->num_paged_out);
1022 }
1023 /* Evict more pages if target not reached */
1024 else if ( tot_pages > paging->target_tot_pages )
1025 {
1026 num = tot_pages - paging->target_tot_pages;
1027 if ( num != prev_num )
1028 {
1029 DPRINTF("Need to evict %d pages to reach %d target_tot_pages\n", num, paging->target_tot_pages);
1030 prev_num = num;
1031 }
1032 /* Limit the number of evicts to be able to process page-in requests */
1033 if ( num > 42 )
1034 {
1035 paging->use_poll_timeout = 0;
1036 num = 42;
1037 }
1038 if ( evict_pages(paging, num) < 0 )
1039 goto out;
1040 }
1041 /* Resume some pages if target not reached */
1042 else if ( tot_pages < paging->target_tot_pages && paging->num_paged_out )
1043 {
1044 num = paging->target_tot_pages - tot_pages;
1045 if ( num != prev_num )
1046 {
1047 DPRINTF("Need to resume %d pages to reach %d target_tot_pages\n", num, paging->target_tot_pages);
1048 prev_num = num;
1049 }
1050 resume_pages(paging, num);
1051 }
1052 /* Now target was reached, enable poll() timeout */
1053 else
1054 {
1055 paging->use_poll_timeout = 1;
1056 }
1057
1058 }
1059
1060 /* No error */
1061 rc = 0;
1062
1063 DPRINTF("xenpaging got signal %d\n", interrupted);
1064
1065 out:
1066 close(paging->fd);
1067 unlink_pagefile();
1068
1069 /* Tear down domain paging */
1070 xenpaging_teardown(paging);
1071
1072 return rc ? 2 : 0;
1073 }
1074
1075
1076 /*
1077 * Local variables:
1078 * mode: C
1079 * c-file-style: "BSD"
1080 * c-basic-offset: 4
1081 * indent-tabs-mode: nil
1082 * End:
1083 */
1084