1 #include <assert.h>
2 #include <arpa/inet.h>
3
4 #include "xc_sr_common.h"
5
6 /*
7 * Writes an Image header and Domain header into the stream.
8 */
write_headers(struct xc_sr_context * ctx,uint16_t guest_type)9 static int write_headers(struct xc_sr_context *ctx, uint16_t guest_type)
10 {
11 xc_interface *xch = ctx->xch;
12 int32_t xen_version = xc_version(xch, XENVER_version, NULL);
13 struct xc_sr_ihdr ihdr = {
14 .marker = IHDR_MARKER,
15 .id = htonl(IHDR_ID),
16 .version = htonl(3),
17 .options = htons(IHDR_OPT_LITTLE_ENDIAN),
18 };
19 struct xc_sr_dhdr dhdr = {
20 .type = guest_type,
21 .page_shift = XC_PAGE_SHIFT,
22 .xen_major = (xen_version >> 16) & 0xffff,
23 .xen_minor = (xen_version) & 0xffff,
24 };
25
26 if ( xen_version < 0 )
27 {
28 PERROR("Unable to obtain Xen Version");
29 return -1;
30 }
31
32 if ( write_exact(ctx->fd, &ihdr, sizeof(ihdr)) )
33 {
34 PERROR("Unable to write Image Header to stream");
35 return -1;
36 }
37
38 if ( write_exact(ctx->fd, &dhdr, sizeof(dhdr)) )
39 {
40 PERROR("Unable to write Domain Header to stream");
41 return -1;
42 }
43
44 return 0;
45 }
46
47 /*
48 * Writes an END record into the stream.
49 */
write_end_record(struct xc_sr_context * ctx)50 static int write_end_record(struct xc_sr_context *ctx)
51 {
52 struct xc_sr_record end = { .type = REC_TYPE_END };
53
54 return write_record(ctx, &end);
55 }
56
57 /*
58 * Writes a STATIC_DATA_END record into the stream.
59 */
write_static_data_end_record(struct xc_sr_context * ctx)60 static int write_static_data_end_record(struct xc_sr_context *ctx)
61 {
62 struct xc_sr_record end = { .type = REC_TYPE_STATIC_DATA_END };
63
64 return write_record(ctx, &end);
65 }
66
67 /*
68 * Writes a CHECKPOINT record into the stream.
69 */
write_checkpoint_record(struct xc_sr_context * ctx)70 static int write_checkpoint_record(struct xc_sr_context *ctx)
71 {
72 struct xc_sr_record checkpoint = { .type = REC_TYPE_CHECKPOINT };
73
74 return write_record(ctx, &checkpoint);
75 }
76
77 /*
78 * Writes a batch of memory as a PAGE_DATA record into the stream. The batch
79 * is constructed in ctx->save.batch_pfns.
80 *
81 * This function:
82 * - gets the types for each pfn in the batch.
83 * - for each pfn with real data:
84 * - maps and attempts to localise the pages.
85 * - construct and writes a PAGE_DATA record into the stream.
86 */
write_batch(struct xc_sr_context * ctx)87 static int write_batch(struct xc_sr_context *ctx)
88 {
89 xc_interface *xch = ctx->xch;
90 xen_pfn_t *mfns = NULL, *types = NULL;
91 void *guest_mapping = NULL;
92 void **guest_data = NULL;
93 void **local_pages = NULL;
94 int *errors = NULL, rc = -1;
95 unsigned int i, p, nr_pages = 0, nr_pages_mapped = 0;
96 unsigned int nr_pfns = ctx->save.nr_batch_pfns;
97 void *page, *orig_page;
98 uint64_t *rec_pfns = NULL;
99 struct iovec *iov = NULL; int iovcnt = 0;
100 struct xc_sr_rec_page_data_header hdr = { 0 };
101 struct xc_sr_record rec = {
102 .type = REC_TYPE_PAGE_DATA,
103 };
104
105 assert(nr_pfns != 0);
106
107 /* Mfns of the batch pfns. */
108 mfns = malloc(nr_pfns * sizeof(*mfns));
109 /* Types of the batch pfns. */
110 types = malloc(nr_pfns * sizeof(*types));
111 /* Errors from attempting to map the gfns. */
112 errors = malloc(nr_pfns * sizeof(*errors));
113 /* Pointers to page data to send. Mapped gfns or local allocations. */
114 guest_data = calloc(nr_pfns, sizeof(*guest_data));
115 /* Pointers to locally allocated pages. Need freeing. */
116 local_pages = calloc(nr_pfns, sizeof(*local_pages));
117 /* iovec[] for writev(). */
118 iov = malloc((nr_pfns + 4) * sizeof(*iov));
119
120 if ( !mfns || !types || !errors || !guest_data || !local_pages || !iov )
121 {
122 ERROR("Unable to allocate arrays for a batch of %u pages",
123 nr_pfns);
124 goto err;
125 }
126
127 for ( i = 0; i < nr_pfns; ++i )
128 {
129 types[i] = mfns[i] = ctx->save.ops.pfn_to_gfn(ctx,
130 ctx->save.batch_pfns[i]);
131
132 /* Likely a ballooned page. */
133 if ( mfns[i] == INVALID_MFN )
134 {
135 set_bit(ctx->save.batch_pfns[i], ctx->save.deferred_pages);
136 ++ctx->save.nr_deferred_pages;
137 }
138 }
139
140 rc = xc_get_pfn_type_batch(xch, ctx->domid, nr_pfns, types);
141 if ( rc )
142 {
143 PERROR("Failed to get types for pfn batch");
144 goto err;
145 }
146 rc = -1;
147
148 for ( i = 0; i < nr_pfns; ++i )
149 {
150 switch ( types[i] )
151 {
152 case XEN_DOMCTL_PFINFO_BROKEN:
153 case XEN_DOMCTL_PFINFO_XALLOC:
154 case XEN_DOMCTL_PFINFO_XTAB:
155 continue;
156 }
157
158 mfns[nr_pages++] = mfns[i];
159 }
160
161 if ( nr_pages > 0 )
162 {
163 guest_mapping = xenforeignmemory_map(
164 xch->fmem, ctx->domid, PROT_READ, nr_pages, mfns, errors);
165 if ( !guest_mapping )
166 {
167 PERROR("Failed to map guest pages");
168 goto err;
169 }
170 nr_pages_mapped = nr_pages;
171
172 for ( i = 0, p = 0; i < nr_pfns; ++i )
173 {
174 switch ( types[i] )
175 {
176 case XEN_DOMCTL_PFINFO_BROKEN:
177 case XEN_DOMCTL_PFINFO_XALLOC:
178 case XEN_DOMCTL_PFINFO_XTAB:
179 continue;
180 }
181
182 if ( errors[p] )
183 {
184 ERROR("Mapping of pfn %#"PRIpfn" (mfn %#"PRIpfn") failed %d",
185 ctx->save.batch_pfns[i], mfns[p], errors[p]);
186 goto err;
187 }
188
189 orig_page = page = guest_mapping + (p * PAGE_SIZE);
190 rc = ctx->save.ops.normalise_page(ctx, types[i], &page);
191
192 if ( orig_page != page )
193 local_pages[i] = page;
194
195 if ( rc )
196 {
197 if ( rc == -1 && errno == EAGAIN )
198 {
199 set_bit(ctx->save.batch_pfns[i], ctx->save.deferred_pages);
200 ++ctx->save.nr_deferred_pages;
201 types[i] = XEN_DOMCTL_PFINFO_XTAB;
202 --nr_pages;
203 }
204 else
205 goto err;
206 }
207 else
208 guest_data[i] = page;
209
210 rc = -1;
211 ++p;
212 }
213 }
214
215 rec_pfns = malloc(nr_pfns * sizeof(*rec_pfns));
216 if ( !rec_pfns )
217 {
218 ERROR("Unable to allocate %zu bytes of memory for page data pfn list",
219 nr_pfns * sizeof(*rec_pfns));
220 goto err;
221 }
222
223 hdr.count = nr_pfns;
224
225 rec.length = sizeof(hdr);
226 rec.length += nr_pfns * sizeof(*rec_pfns);
227 rec.length += nr_pages * PAGE_SIZE;
228
229 for ( i = 0; i < nr_pfns; ++i )
230 rec_pfns[i] = ((uint64_t)(types[i]) << 32) | ctx->save.batch_pfns[i];
231
232 iov[0].iov_base = &rec.type;
233 iov[0].iov_len = sizeof(rec.type);
234
235 iov[1].iov_base = &rec.length;
236 iov[1].iov_len = sizeof(rec.length);
237
238 iov[2].iov_base = &hdr;
239 iov[2].iov_len = sizeof(hdr);
240
241 iov[3].iov_base = rec_pfns;
242 iov[3].iov_len = nr_pfns * sizeof(*rec_pfns);
243
244 iovcnt = 4;
245
246 if ( nr_pages )
247 {
248 for ( i = 0; i < nr_pfns; ++i )
249 {
250 if ( guest_data[i] )
251 {
252 iov[iovcnt].iov_base = guest_data[i];
253 iov[iovcnt].iov_len = PAGE_SIZE;
254 iovcnt++;
255 --nr_pages;
256 }
257 }
258 }
259
260 if ( writev_exact(ctx->fd, iov, iovcnt) )
261 {
262 PERROR("Failed to write page data to stream");
263 goto err;
264 }
265
266 /* Sanity check we have sent all the pages we expected to. */
267 assert(nr_pages == 0);
268 rc = ctx->save.nr_batch_pfns = 0;
269
270 err:
271 free(rec_pfns);
272 if ( guest_mapping )
273 xenforeignmemory_unmap(xch->fmem, guest_mapping, nr_pages_mapped);
274 for ( i = 0; local_pages && i < nr_pfns; ++i )
275 free(local_pages[i]);
276 free(iov);
277 free(local_pages);
278 free(guest_data);
279 free(errors);
280 free(types);
281 free(mfns);
282
283 return rc;
284 }
285
286 /*
287 * Flush a batch of pfns into the stream.
288 */
flush_batch(struct xc_sr_context * ctx)289 static int flush_batch(struct xc_sr_context *ctx)
290 {
291 int rc = 0;
292
293 if ( ctx->save.nr_batch_pfns == 0 )
294 return rc;
295
296 rc = write_batch(ctx);
297
298 if ( !rc )
299 {
300 VALGRIND_MAKE_MEM_UNDEFINED(ctx->save.batch_pfns,
301 MAX_BATCH_SIZE *
302 sizeof(*ctx->save.batch_pfns));
303 }
304
305 return rc;
306 }
307
308 /*
309 * Add a single pfn to the batch, flushing the batch if full.
310 */
add_to_batch(struct xc_sr_context * ctx,xen_pfn_t pfn)311 static int add_to_batch(struct xc_sr_context *ctx, xen_pfn_t pfn)
312 {
313 int rc = 0;
314
315 if ( ctx->save.nr_batch_pfns == MAX_BATCH_SIZE )
316 rc = flush_batch(ctx);
317
318 if ( rc == 0 )
319 ctx->save.batch_pfns[ctx->save.nr_batch_pfns++] = pfn;
320
321 return rc;
322 }
323
324 /*
325 * Pause/suspend the domain, and refresh ctx->dominfo if required.
326 */
suspend_domain(struct xc_sr_context * ctx)327 static int suspend_domain(struct xc_sr_context *ctx)
328 {
329 xc_interface *xch = ctx->xch;
330
331 /* TODO: Properly specify the return value from this callback. All
332 * implementations currently appear to return 1 for success, whereas
333 * the legacy code checks for != 0. */
334 int cb_rc = ctx->save.callbacks->suspend(ctx->save.callbacks->data);
335
336 if ( cb_rc == 0 )
337 {
338 ERROR("save callback suspend() failed: %d", cb_rc);
339 return -1;
340 }
341
342 /* Refresh domain information. */
343 if ( (xc_domain_getinfo(xch, ctx->domid, 1, &ctx->dominfo) != 1) ||
344 (ctx->dominfo.domid != ctx->domid) )
345 {
346 PERROR("Unable to refresh domain information");
347 return -1;
348 }
349
350 /* Confirm the domain has actually been paused. */
351 if ( !ctx->dominfo.shutdown ||
352 (ctx->dominfo.shutdown_reason != SHUTDOWN_suspend) )
353 {
354 ERROR("Domain has not been suspended: shutdown %d, reason %d",
355 ctx->dominfo.shutdown, ctx->dominfo.shutdown_reason);
356 return -1;
357 }
358
359 xc_report_progress_single(xch, "Domain now suspended");
360
361 return 0;
362 }
363
364 /*
365 * Send a subset of pages in the guests p2m, according to the dirty bitmap.
366 * Used for each subsequent iteration of the live migration loop.
367 *
368 * Bitmap is bounded by p2m_size.
369 */
send_dirty_pages(struct xc_sr_context * ctx,unsigned long entries)370 static int send_dirty_pages(struct xc_sr_context *ctx,
371 unsigned long entries)
372 {
373 xc_interface *xch = ctx->xch;
374 xen_pfn_t p;
375 unsigned long written;
376 int rc;
377 DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
378 &ctx->save.dirty_bitmap_hbuf);
379
380 for ( p = 0, written = 0; p < ctx->save.p2m_size; ++p )
381 {
382 if ( !test_bit(p, dirty_bitmap) )
383 continue;
384
385 rc = add_to_batch(ctx, p);
386 if ( rc )
387 return rc;
388
389 /* Update progress every 4MB worth of memory sent. */
390 if ( (written & ((1U << (22 - 12)) - 1)) == 0 )
391 xc_report_progress_step(xch, written, entries);
392
393 ++written;
394 }
395
396 rc = flush_batch(ctx);
397 if ( rc )
398 return rc;
399
400 if ( written > entries )
401 DPRINTF("Bitmap contained more entries than expected...");
402
403 xc_report_progress_step(xch, entries, entries);
404
405 return ctx->save.ops.check_vm_state(ctx);
406 }
407
408 /*
409 * Send all pages in the guests p2m. Used as the first iteration of the live
410 * migration loop, and for a non-live save.
411 */
send_all_pages(struct xc_sr_context * ctx)412 static int send_all_pages(struct xc_sr_context *ctx)
413 {
414 DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
415 &ctx->save.dirty_bitmap_hbuf);
416
417 bitmap_set(dirty_bitmap, ctx->save.p2m_size);
418
419 return send_dirty_pages(ctx, ctx->save.p2m_size);
420 }
421
enable_logdirty(struct xc_sr_context * ctx)422 static int enable_logdirty(struct xc_sr_context *ctx)
423 {
424 xc_interface *xch = ctx->xch;
425 int on1 = 0, off = 0, on2 = 0;
426 int rc;
427
428 /* This juggling is required if logdirty is enabled for VRAM tracking. */
429 rc = xc_shadow_control(xch, ctx->domid,
430 XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY,
431 NULL, 0, NULL, 0, NULL);
432 if ( rc < 0 )
433 {
434 on1 = errno;
435 rc = xc_shadow_control(xch, ctx->domid, XEN_DOMCTL_SHADOW_OP_OFF,
436 NULL, 0, NULL, 0, NULL);
437 if ( rc < 0 )
438 off = errno;
439 else {
440 rc = xc_shadow_control(xch, ctx->domid,
441 XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY,
442 NULL, 0, NULL, 0, NULL);
443 if ( rc < 0 )
444 on2 = errno;
445 }
446 if ( rc < 0 )
447 {
448 PERROR("Failed to enable logdirty: %d,%d,%d", on1, off, on2);
449 return rc;
450 }
451 }
452
453 return 0;
454 }
455
update_progress_string(struct xc_sr_context * ctx,char ** str)456 static int update_progress_string(struct xc_sr_context *ctx, char **str)
457 {
458 xc_interface *xch = ctx->xch;
459 char *new_str = NULL;
460 unsigned int iter = ctx->save.stats.iteration;
461
462 if ( asprintf(&new_str, "Frames iteration %u", iter) == -1 )
463 {
464 PERROR("Unable to allocate new progress string");
465 return -1;
466 }
467
468 free(*str);
469 *str = new_str;
470
471 xc_set_progress_prefix(xch, *str);
472 return 0;
473 }
474
475 /*
476 * This is the live migration precopy policy - it's called periodically during
477 * the precopy phase of live migrations, and is responsible for deciding when
478 * the precopy phase should terminate and what should be done next.
479 *
480 * The policy implemented here behaves identically to the policy previously
481 * hard-coded into xc_domain_save() - it proceeds to the stop-and-copy phase of
482 * the live migration when there are either fewer than 50 dirty pages, or more
483 * than 5 precopy rounds have completed.
484 */
485 #define SPP_MAX_ITERATIONS 5
486 #define SPP_TARGET_DIRTY_COUNT 50
487
simple_precopy_policy(struct precopy_stats stats,void * user)488 static int simple_precopy_policy(struct precopy_stats stats, void *user)
489 {
490 return ((stats.dirty_count >= 0 &&
491 stats.dirty_count < SPP_TARGET_DIRTY_COUNT) ||
492 stats.iteration >= SPP_MAX_ITERATIONS)
493 ? XGS_POLICY_STOP_AND_COPY
494 : XGS_POLICY_CONTINUE_PRECOPY;
495 }
496
497 /*
498 * Send memory while guest is running.
499 */
send_memory_live(struct xc_sr_context * ctx)500 static int send_memory_live(struct xc_sr_context *ctx)
501 {
502 xc_interface *xch = ctx->xch;
503 xc_shadow_op_stats_t stats = { 0, ctx->save.p2m_size };
504 char *progress_str = NULL;
505 unsigned int x = 0;
506 int rc;
507 int policy_decision;
508
509 DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
510 &ctx->save.dirty_bitmap_hbuf);
511
512 precopy_policy_t precopy_policy = ctx->save.callbacks->precopy_policy;
513 void *data = ctx->save.callbacks->data;
514
515 struct precopy_stats *policy_stats;
516
517 rc = update_progress_string(ctx, &progress_str);
518 if ( rc )
519 goto out;
520
521 ctx->save.stats = (struct precopy_stats){
522 .dirty_count = ctx->save.p2m_size,
523 };
524 policy_stats = &ctx->save.stats;
525
526 if ( precopy_policy == NULL )
527 precopy_policy = simple_precopy_policy;
528
529 bitmap_set(dirty_bitmap, ctx->save.p2m_size);
530
531 for ( ; ; )
532 {
533 policy_decision = precopy_policy(*policy_stats, data);
534 x++;
535
536 if ( stats.dirty_count > 0 && policy_decision != XGS_POLICY_ABORT )
537 {
538 rc = update_progress_string(ctx, &progress_str);
539 if ( rc )
540 goto out;
541
542 rc = send_dirty_pages(ctx, stats.dirty_count);
543 if ( rc )
544 goto out;
545 }
546
547 if ( policy_decision != XGS_POLICY_CONTINUE_PRECOPY )
548 break;
549
550 policy_stats->iteration = x;
551 policy_stats->total_written += policy_stats->dirty_count;
552 policy_stats->dirty_count = -1;
553
554 policy_decision = precopy_policy(*policy_stats, data);
555
556 if ( policy_decision != XGS_POLICY_CONTINUE_PRECOPY )
557 break;
558
559 if ( xc_shadow_control(
560 xch, ctx->domid, XEN_DOMCTL_SHADOW_OP_CLEAN,
561 &ctx->save.dirty_bitmap_hbuf, ctx->save.p2m_size,
562 NULL, 0, &stats) != ctx->save.p2m_size )
563 {
564 PERROR("Failed to retrieve logdirty bitmap");
565 rc = -1;
566 goto out;
567 }
568
569 policy_stats->dirty_count = stats.dirty_count;
570
571 }
572
573 if ( policy_decision == XGS_POLICY_ABORT )
574 {
575 PERROR("Abort precopy loop");
576 rc = -1;
577 goto out;
578 }
579
580 out:
581 xc_set_progress_prefix(xch, NULL);
582 free(progress_str);
583 return rc;
584 }
585
colo_merge_secondary_dirty_bitmap(struct xc_sr_context * ctx)586 static int colo_merge_secondary_dirty_bitmap(struct xc_sr_context *ctx)
587 {
588 xc_interface *xch = ctx->xch;
589 struct xc_sr_record rec;
590 uint64_t *pfns = NULL;
591 uint64_t pfn;
592 unsigned int count, i;
593 int rc;
594 DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
595 &ctx->save.dirty_bitmap_hbuf);
596
597 rc = read_record(ctx, ctx->save.recv_fd, &rec);
598 if ( rc )
599 goto err;
600
601 if ( rec.type != REC_TYPE_CHECKPOINT_DIRTY_PFN_LIST )
602 {
603 PERROR("Expect dirty bitmap record, but received %u", rec.type);
604 rc = -1;
605 goto err;
606 }
607
608 if ( rec.length % sizeof(*pfns) )
609 {
610 PERROR("Invalid dirty pfn list record length %u", rec.length);
611 rc = -1;
612 goto err;
613 }
614
615 count = rec.length / sizeof(*pfns);
616 pfns = rec.data;
617
618 for ( i = 0; i < count; i++ )
619 {
620 pfn = pfns[i];
621 if ( pfn > ctx->save.p2m_size )
622 {
623 PERROR("Invalid pfn 0x%" PRIx64, pfn);
624 rc = -1;
625 goto err;
626 }
627
628 set_bit(pfn, dirty_bitmap);
629 }
630
631 rc = 0;
632
633 err:
634 free(rec.data);
635 return rc;
636 }
637
638 /*
639 * Suspend the domain and send dirty memory.
640 * This is the last iteration of the live migration and the
641 * heart of the checkpointed stream.
642 */
suspend_and_send_dirty(struct xc_sr_context * ctx)643 static int suspend_and_send_dirty(struct xc_sr_context *ctx)
644 {
645 xc_interface *xch = ctx->xch;
646 xc_shadow_op_stats_t stats = { 0, ctx->save.p2m_size };
647 char *progress_str = NULL;
648 int rc;
649 DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
650 &ctx->save.dirty_bitmap_hbuf);
651
652 rc = suspend_domain(ctx);
653 if ( rc )
654 goto out;
655
656 if ( xc_shadow_control(
657 xch, ctx->domid, XEN_DOMCTL_SHADOW_OP_CLEAN,
658 HYPERCALL_BUFFER(dirty_bitmap), ctx->save.p2m_size,
659 NULL, XEN_DOMCTL_SHADOW_LOGDIRTY_FINAL, &stats) !=
660 ctx->save.p2m_size )
661 {
662 PERROR("Failed to retrieve logdirty bitmap");
663 rc = -1;
664 goto out;
665 }
666
667 if ( ctx->save.live )
668 {
669 rc = update_progress_string(ctx, &progress_str);
670 if ( rc )
671 goto out;
672 }
673 else
674 xc_set_progress_prefix(xch, "Checkpointed save");
675
676 bitmap_or(dirty_bitmap, ctx->save.deferred_pages, ctx->save.p2m_size);
677
678 if ( !ctx->save.live && ctx->stream_type == XC_STREAM_COLO )
679 {
680 rc = colo_merge_secondary_dirty_bitmap(ctx);
681 if ( rc )
682 {
683 PERROR("Failed to get secondary vm's dirty pages");
684 goto out;
685 }
686 }
687
688 rc = send_dirty_pages(ctx, stats.dirty_count + ctx->save.nr_deferred_pages);
689 if ( rc )
690 goto out;
691
692 bitmap_clear(ctx->save.deferred_pages, ctx->save.p2m_size);
693 ctx->save.nr_deferred_pages = 0;
694
695 out:
696 xc_set_progress_prefix(xch, NULL);
697 free(progress_str);
698 return rc;
699 }
700
verify_frames(struct xc_sr_context * ctx)701 static int verify_frames(struct xc_sr_context *ctx)
702 {
703 xc_interface *xch = ctx->xch;
704 xc_shadow_op_stats_t stats = { 0, ctx->save.p2m_size };
705 int rc;
706 struct xc_sr_record rec = { .type = REC_TYPE_VERIFY };
707
708 DPRINTF("Enabling verify mode");
709
710 rc = write_record(ctx, &rec);
711 if ( rc )
712 goto out;
713
714 xc_set_progress_prefix(xch, "Frames verify");
715 rc = send_all_pages(ctx);
716 if ( rc )
717 goto out;
718
719 if ( xc_shadow_control(
720 xch, ctx->domid, XEN_DOMCTL_SHADOW_OP_PEEK,
721 &ctx->save.dirty_bitmap_hbuf, ctx->save.p2m_size,
722 NULL, 0, &stats) != ctx->save.p2m_size )
723 {
724 PERROR("Failed to retrieve logdirty bitmap");
725 rc = -1;
726 goto out;
727 }
728
729 DPRINTF(" Further stats: faults %u, dirty %u",
730 stats.fault_count, stats.dirty_count);
731
732 out:
733 return rc;
734 }
735
736 /*
737 * Send all domain memory. This is the heart of the live migration loop.
738 */
send_domain_memory_live(struct xc_sr_context * ctx)739 static int send_domain_memory_live(struct xc_sr_context *ctx)
740 {
741 int rc;
742
743 rc = enable_logdirty(ctx);
744 if ( rc )
745 goto out;
746
747 rc = send_memory_live(ctx);
748 if ( rc )
749 goto out;
750
751 rc = suspend_and_send_dirty(ctx);
752 if ( rc )
753 goto out;
754
755 if ( ctx->save.debug && ctx->stream_type != XC_STREAM_PLAIN )
756 {
757 rc = verify_frames(ctx);
758 if ( rc )
759 goto out;
760 }
761
762 out:
763 return rc;
764 }
765
766 /*
767 * Checkpointed save.
768 */
send_domain_memory_checkpointed(struct xc_sr_context * ctx)769 static int send_domain_memory_checkpointed(struct xc_sr_context *ctx)
770 {
771 return suspend_and_send_dirty(ctx);
772 }
773
774 /*
775 * Send all domain memory, pausing the domain first. Generally used for
776 * suspend-to-file.
777 */
send_domain_memory_nonlive(struct xc_sr_context * ctx)778 static int send_domain_memory_nonlive(struct xc_sr_context *ctx)
779 {
780 xc_interface *xch = ctx->xch;
781 int rc;
782
783 rc = suspend_domain(ctx);
784 if ( rc )
785 goto err;
786
787 xc_set_progress_prefix(xch, "Frames");
788
789 rc = send_all_pages(ctx);
790 if ( rc )
791 goto err;
792
793 err:
794 return rc;
795 }
796
setup(struct xc_sr_context * ctx)797 static int setup(struct xc_sr_context *ctx)
798 {
799 xc_interface *xch = ctx->xch;
800 int rc;
801 DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
802 &ctx->save.dirty_bitmap_hbuf);
803
804 rc = ctx->save.ops.setup(ctx);
805 if ( rc )
806 goto err;
807
808 dirty_bitmap = xc_hypercall_buffer_alloc_pages(
809 xch, dirty_bitmap, NRPAGES(bitmap_size(ctx->save.p2m_size)));
810 ctx->save.batch_pfns = malloc(MAX_BATCH_SIZE *
811 sizeof(*ctx->save.batch_pfns));
812 ctx->save.deferred_pages = calloc(1, bitmap_size(ctx->save.p2m_size));
813
814 if ( !ctx->save.batch_pfns || !dirty_bitmap || !ctx->save.deferred_pages )
815 {
816 ERROR("Unable to allocate memory for dirty bitmaps, batch pfns and"
817 " deferred pages");
818 rc = -1;
819 errno = ENOMEM;
820 goto err;
821 }
822
823 rc = 0;
824
825 err:
826 return rc;
827 }
828
cleanup(struct xc_sr_context * ctx)829 static void cleanup(struct xc_sr_context *ctx)
830 {
831 xc_interface *xch = ctx->xch;
832 DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
833 &ctx->save.dirty_bitmap_hbuf);
834
835
836 xc_shadow_control(xch, ctx->domid, XEN_DOMCTL_SHADOW_OP_OFF,
837 NULL, 0, NULL, 0, NULL);
838
839 if ( ctx->save.ops.cleanup(ctx) )
840 PERROR("Failed to clean up");
841
842 xc_hypercall_buffer_free_pages(xch, dirty_bitmap,
843 NRPAGES(bitmap_size(ctx->save.p2m_size)));
844 free(ctx->save.deferred_pages);
845 free(ctx->save.batch_pfns);
846 }
847
848 /*
849 * Save a domain.
850 */
save(struct xc_sr_context * ctx,uint16_t guest_type)851 static int save(struct xc_sr_context *ctx, uint16_t guest_type)
852 {
853 xc_interface *xch = ctx->xch;
854 int rc, saved_rc = 0, saved_errno = 0;
855
856 IPRINTF("Saving domain %d, type %s",
857 ctx->domid, dhdr_type_to_str(guest_type));
858
859 rc = setup(ctx);
860 if ( rc )
861 goto err;
862
863 xc_report_progress_single(xch, "Start of stream");
864
865 rc = write_headers(ctx, guest_type);
866 if ( rc )
867 goto err;
868
869 rc = ctx->save.ops.static_data(ctx);
870 if ( rc )
871 goto err;
872
873 rc = write_static_data_end_record(ctx);
874 if ( rc )
875 goto err;
876
877 rc = ctx->save.ops.start_of_stream(ctx);
878 if ( rc )
879 goto err;
880
881 do {
882 rc = ctx->save.ops.start_of_checkpoint(ctx);
883 if ( rc )
884 goto err;
885
886 rc = ctx->save.ops.check_vm_state(ctx);
887 if ( rc )
888 goto err;
889
890 if ( ctx->save.live )
891 rc = send_domain_memory_live(ctx);
892 else if ( ctx->stream_type != XC_STREAM_PLAIN )
893 rc = send_domain_memory_checkpointed(ctx);
894 else
895 rc = send_domain_memory_nonlive(ctx);
896
897 if ( rc )
898 goto err;
899
900 if ( !ctx->dominfo.shutdown ||
901 (ctx->dominfo.shutdown_reason != SHUTDOWN_suspend) )
902 {
903 ERROR("Domain has not been suspended");
904 rc = -1;
905 goto err;
906 }
907
908 rc = ctx->save.ops.end_of_checkpoint(ctx);
909 if ( rc )
910 goto err;
911
912 if ( ctx->stream_type != XC_STREAM_PLAIN )
913 {
914 /*
915 * We have now completed the initial live portion of the checkpoint
916 * process. Therefore switch into periodically sending synchronous
917 * batches of pages.
918 */
919 ctx->save.live = false;
920
921 rc = write_checkpoint_record(ctx);
922 if ( rc )
923 goto err;
924
925 if ( ctx->stream_type == XC_STREAM_COLO )
926 {
927 rc = ctx->save.callbacks->checkpoint(ctx->save.callbacks->data);
928 if ( !rc )
929 {
930 rc = -1;
931 goto err;
932 }
933 }
934
935 rc = ctx->save.callbacks->postcopy(ctx->save.callbacks->data);
936 if ( rc <= 0 )
937 goto err;
938
939 if ( ctx->stream_type == XC_STREAM_COLO )
940 {
941 rc = ctx->save.callbacks->wait_checkpoint(
942 ctx->save.callbacks->data);
943 if ( rc <= 0 )
944 goto err;
945 }
946 else if ( ctx->stream_type == XC_STREAM_REMUS )
947 {
948 rc = ctx->save.callbacks->checkpoint(ctx->save.callbacks->data);
949 if ( rc <= 0 )
950 goto err;
951 }
952 else
953 {
954 ERROR("Unknown checkpointed stream");
955 rc = -1;
956 goto err;
957 }
958 }
959 } while ( ctx->stream_type != XC_STREAM_PLAIN );
960
961 xc_report_progress_single(xch, "End of stream");
962
963 rc = write_end_record(ctx);
964 if ( rc )
965 goto err;
966
967 xc_report_progress_single(xch, "Complete");
968 goto done;
969
970 err:
971 saved_errno = errno;
972 saved_rc = rc;
973 PERROR("Save failed");
974
975 done:
976 cleanup(ctx);
977
978 if ( saved_rc )
979 {
980 rc = saved_rc;
981 errno = saved_errno;
982 }
983
984 return rc;
985 };
986
xc_domain_save(xc_interface * xch,int io_fd,uint32_t dom,uint32_t flags,struct save_callbacks * callbacks,xc_stream_type_t stream_type,int recv_fd)987 int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom,
988 uint32_t flags, struct save_callbacks *callbacks,
989 xc_stream_type_t stream_type, int recv_fd)
990 {
991 struct xc_sr_context ctx = {
992 .xch = xch,
993 .fd = io_fd,
994 .stream_type = stream_type,
995 };
996
997 /* GCC 4.4 (of CentOS 6.x vintage) can' t initialise anonymous unions. */
998 ctx.save.callbacks = callbacks;
999 ctx.save.live = !!(flags & XCFLAGS_LIVE);
1000 ctx.save.debug = !!(flags & XCFLAGS_DEBUG);
1001 ctx.save.recv_fd = recv_fd;
1002
1003 if ( xc_domain_getinfo(xch, dom, 1, &ctx.dominfo) != 1 )
1004 {
1005 PERROR("Failed to get domain info");
1006 return -1;
1007 }
1008
1009 if ( ctx.dominfo.domid != dom )
1010 {
1011 ERROR("Domain %u does not exist", dom);
1012 return -1;
1013 }
1014
1015 /* Sanity check stream_type-related parameters */
1016 switch ( stream_type )
1017 {
1018 case XC_STREAM_COLO:
1019 assert(callbacks->wait_checkpoint);
1020 /* Fallthrough */
1021 case XC_STREAM_REMUS:
1022 assert(callbacks->checkpoint && callbacks->postcopy);
1023 /* Fallthrough */
1024 case XC_STREAM_PLAIN:
1025 if ( ctx.dominfo.hvm )
1026 assert(callbacks->switch_qemu_logdirty);
1027 break;
1028
1029 default:
1030 assert(!"Bad stream_type");
1031 break;
1032 }
1033
1034 DPRINTF("fd %d, dom %u, flags %u, hvm %d",
1035 io_fd, dom, flags, ctx.dominfo.hvm);
1036
1037 ctx.domid = dom;
1038
1039 if ( ctx.dominfo.hvm )
1040 {
1041 ctx.save.ops = save_ops_x86_hvm;
1042 return save(&ctx, DHDR_TYPE_X86_HVM);
1043 }
1044 else
1045 {
1046 ctx.save.ops = save_ops_x86_pv;
1047 return save(&ctx, DHDR_TYPE_X86_PV);
1048 }
1049 }
1050
1051 /*
1052 * Local variables:
1053 * mode: C
1054 * c-file-style: "BSD"
1055 * c-basic-offset: 4
1056 * tab-width: 4
1057 * indent-tabs-mode: nil
1058 * End:
1059 */
1060