1 #include <assert.h>
2 #include <arpa/inet.h>
3
4 #include "xc_sr_common.h"
5
6 /*
7 * Writes an Image header and Domain header into the stream.
8 */
write_headers(struct xc_sr_context * ctx,uint16_t guest_type)9 static int write_headers(struct xc_sr_context *ctx, uint16_t guest_type)
10 {
11 xc_interface *xch = ctx->xch;
12 int32_t xen_version = xc_version(xch, XENVER_version, NULL);
13 struct xc_sr_ihdr ihdr =
14 {
15 .marker = IHDR_MARKER,
16 .id = htonl(IHDR_ID),
17 .version = htonl(IHDR_VERSION),
18 .options = htons(IHDR_OPT_LITTLE_ENDIAN),
19 };
20 struct xc_sr_dhdr dhdr =
21 {
22 .type = guest_type,
23 .page_shift = XC_PAGE_SHIFT,
24 .xen_major = (xen_version >> 16) & 0xffff,
25 .xen_minor = (xen_version) & 0xffff,
26 };
27
28 if ( xen_version < 0 )
29 {
30 PERROR("Unable to obtain Xen Version");
31 return -1;
32 }
33
34 if ( write_exact(ctx->fd, &ihdr, sizeof(ihdr)) )
35 {
36 PERROR("Unable to write Image Header to stream");
37 return -1;
38 }
39
40 if ( write_exact(ctx->fd, &dhdr, sizeof(dhdr)) )
41 {
42 PERROR("Unable to write Domain Header to stream");
43 return -1;
44 }
45
46 return 0;
47 }
48
49 /*
50 * Writes an END record into the stream.
51 */
write_end_record(struct xc_sr_context * ctx)52 static int write_end_record(struct xc_sr_context *ctx)
53 {
54 struct xc_sr_record end = { REC_TYPE_END, 0, NULL };
55
56 return write_record(ctx, &end);
57 }
58
59 /*
60 * Writes a CHECKPOINT record into the stream.
61 */
write_checkpoint_record(struct xc_sr_context * ctx)62 static int write_checkpoint_record(struct xc_sr_context *ctx)
63 {
64 struct xc_sr_record checkpoint = { REC_TYPE_CHECKPOINT, 0, NULL };
65
66 return write_record(ctx, &checkpoint);
67 }
68
69 /*
70 * Writes a batch of memory as a PAGE_DATA record into the stream. The batch
71 * is constructed in ctx->save.batch_pfns.
72 *
73 * This function:
74 * - gets the types for each pfn in the batch.
75 * - for each pfn with real data:
76 * - maps and attempts to localise the pages.
77 * - construct and writes a PAGE_DATA record into the stream.
78 */
write_batch(struct xc_sr_context * ctx)79 static int write_batch(struct xc_sr_context *ctx)
80 {
81 xc_interface *xch = ctx->xch;
82 xen_pfn_t *mfns = NULL, *types = NULL;
83 void *guest_mapping = NULL;
84 void **guest_data = NULL;
85 void **local_pages = NULL;
86 int *errors = NULL, rc = -1;
87 unsigned i, p, nr_pages = 0, nr_pages_mapped = 0;
88 unsigned nr_pfns = ctx->save.nr_batch_pfns;
89 void *page, *orig_page;
90 uint64_t *rec_pfns = NULL;
91 struct iovec *iov = NULL; int iovcnt = 0;
92 struct xc_sr_rec_page_data_header hdr = { 0 };
93 struct xc_sr_record rec =
94 {
95 .type = REC_TYPE_PAGE_DATA,
96 };
97
98 assert(nr_pfns != 0);
99
100 /* Mfns of the batch pfns. */
101 mfns = malloc(nr_pfns * sizeof(*mfns));
102 /* Types of the batch pfns. */
103 types = malloc(nr_pfns * sizeof(*types));
104 /* Errors from attempting to map the gfns. */
105 errors = malloc(nr_pfns * sizeof(*errors));
106 /* Pointers to page data to send. Mapped gfns or local allocations. */
107 guest_data = calloc(nr_pfns, sizeof(*guest_data));
108 /* Pointers to locally allocated pages. Need freeing. */
109 local_pages = calloc(nr_pfns, sizeof(*local_pages));
110 /* iovec[] for writev(). */
111 iov = malloc((nr_pfns + 4) * sizeof(*iov));
112
113 if ( !mfns || !types || !errors || !guest_data || !local_pages || !iov )
114 {
115 ERROR("Unable to allocate arrays for a batch of %u pages",
116 nr_pfns);
117 goto err;
118 }
119
120 for ( i = 0; i < nr_pfns; ++i )
121 {
122 types[i] = mfns[i] = ctx->save.ops.pfn_to_gfn(ctx,
123 ctx->save.batch_pfns[i]);
124
125 /* Likely a ballooned page. */
126 if ( mfns[i] == INVALID_MFN )
127 {
128 set_bit(ctx->save.batch_pfns[i], ctx->save.deferred_pages);
129 ++ctx->save.nr_deferred_pages;
130 }
131 }
132
133 rc = xc_get_pfn_type_batch(xch, ctx->domid, nr_pfns, types);
134 if ( rc )
135 {
136 PERROR("Failed to get types for pfn batch");
137 goto err;
138 }
139 rc = -1;
140
141 for ( i = 0; i < nr_pfns; ++i )
142 {
143 switch ( types[i] )
144 {
145 case XEN_DOMCTL_PFINFO_BROKEN:
146 case XEN_DOMCTL_PFINFO_XALLOC:
147 case XEN_DOMCTL_PFINFO_XTAB:
148 continue;
149 }
150
151 mfns[nr_pages++] = mfns[i];
152 }
153
154 if ( nr_pages > 0 )
155 {
156 guest_mapping = xenforeignmemory_map(xch->fmem,
157 ctx->domid, PROT_READ, nr_pages, mfns, errors);
158 if ( !guest_mapping )
159 {
160 PERROR("Failed to map guest pages");
161 goto err;
162 }
163 nr_pages_mapped = nr_pages;
164
165 for ( i = 0, p = 0; i < nr_pfns; ++i )
166 {
167 switch ( types[i] )
168 {
169 case XEN_DOMCTL_PFINFO_BROKEN:
170 case XEN_DOMCTL_PFINFO_XALLOC:
171 case XEN_DOMCTL_PFINFO_XTAB:
172 continue;
173 }
174
175 if ( errors[p] )
176 {
177 ERROR("Mapping of pfn %#"PRIpfn" (mfn %#"PRIpfn") failed %d",
178 ctx->save.batch_pfns[i], mfns[p], errors[p]);
179 goto err;
180 }
181
182 orig_page = page = guest_mapping + (p * PAGE_SIZE);
183 rc = ctx->save.ops.normalise_page(ctx, types[i], &page);
184
185 if ( orig_page != page )
186 local_pages[i] = page;
187
188 if ( rc )
189 {
190 if ( rc == -1 && errno == EAGAIN )
191 {
192 set_bit(ctx->save.batch_pfns[i], ctx->save.deferred_pages);
193 ++ctx->save.nr_deferred_pages;
194 types[i] = XEN_DOMCTL_PFINFO_XTAB;
195 --nr_pages;
196 }
197 else
198 goto err;
199 }
200 else
201 guest_data[i] = page;
202
203 rc = -1;
204 ++p;
205 }
206 }
207
208 rec_pfns = malloc(nr_pfns * sizeof(*rec_pfns));
209 if ( !rec_pfns )
210 {
211 ERROR("Unable to allocate %zu bytes of memory for page data pfn list",
212 nr_pfns * sizeof(*rec_pfns));
213 goto err;
214 }
215
216 hdr.count = nr_pfns;
217
218 rec.length = sizeof(hdr);
219 rec.length += nr_pfns * sizeof(*rec_pfns);
220 rec.length += nr_pages * PAGE_SIZE;
221
222 for ( i = 0; i < nr_pfns; ++i )
223 rec_pfns[i] = ((uint64_t)(types[i]) << 32) | ctx->save.batch_pfns[i];
224
225 iov[0].iov_base = &rec.type;
226 iov[0].iov_len = sizeof(rec.type);
227
228 iov[1].iov_base = &rec.length;
229 iov[1].iov_len = sizeof(rec.length);
230
231 iov[2].iov_base = &hdr;
232 iov[2].iov_len = sizeof(hdr);
233
234 iov[3].iov_base = rec_pfns;
235 iov[3].iov_len = nr_pfns * sizeof(*rec_pfns);
236
237 iovcnt = 4;
238
239 if ( nr_pages )
240 {
241 for ( i = 0; i < nr_pfns; ++i )
242 {
243 if ( guest_data[i] )
244 {
245 iov[iovcnt].iov_base = guest_data[i];
246 iov[iovcnt].iov_len = PAGE_SIZE;
247 iovcnt++;
248 --nr_pages;
249 }
250 }
251 }
252
253 if ( writev_exact(ctx->fd, iov, iovcnt) )
254 {
255 PERROR("Failed to write page data to stream");
256 goto err;
257 }
258
259 /* Sanity check we have sent all the pages we expected to. */
260 assert(nr_pages == 0);
261 rc = ctx->save.nr_batch_pfns = 0;
262
263 err:
264 free(rec_pfns);
265 if ( guest_mapping )
266 xenforeignmemory_unmap(xch->fmem, guest_mapping, nr_pages_mapped);
267 for ( i = 0; local_pages && i < nr_pfns; ++i )
268 free(local_pages[i]);
269 free(iov);
270 free(local_pages);
271 free(guest_data);
272 free(errors);
273 free(types);
274 free(mfns);
275
276 return rc;
277 }
278
279 /*
280 * Flush a batch of pfns into the stream.
281 */
flush_batch(struct xc_sr_context * ctx)282 static int flush_batch(struct xc_sr_context *ctx)
283 {
284 int rc = 0;
285
286 if ( ctx->save.nr_batch_pfns == 0 )
287 return rc;
288
289 rc = write_batch(ctx);
290
291 if ( !rc )
292 {
293 VALGRIND_MAKE_MEM_UNDEFINED(ctx->save.batch_pfns,
294 MAX_BATCH_SIZE *
295 sizeof(*ctx->save.batch_pfns));
296 }
297
298 return rc;
299 }
300
301 /*
302 * Add a single pfn to the batch, flushing the batch if full.
303 */
add_to_batch(struct xc_sr_context * ctx,xen_pfn_t pfn)304 static int add_to_batch(struct xc_sr_context *ctx, xen_pfn_t pfn)
305 {
306 int rc = 0;
307
308 if ( ctx->save.nr_batch_pfns == MAX_BATCH_SIZE )
309 rc = flush_batch(ctx);
310
311 if ( rc == 0 )
312 ctx->save.batch_pfns[ctx->save.nr_batch_pfns++] = pfn;
313
314 return rc;
315 }
316
317 /*
318 * Pause/suspend the domain, and refresh ctx->dominfo if required.
319 */
suspend_domain(struct xc_sr_context * ctx)320 static int suspend_domain(struct xc_sr_context *ctx)
321 {
322 xc_interface *xch = ctx->xch;
323
324 /* TODO: Properly specify the return value from this callback. All
325 * implementations currently appear to return 1 for success, whereas
326 * the legacy code checks for != 0. */
327 int cb_rc = ctx->save.callbacks->suspend(ctx->save.callbacks->data);
328
329 if ( cb_rc == 0 )
330 {
331 ERROR("save callback suspend() failed: %d", cb_rc);
332 return -1;
333 }
334
335 /* Refresh domain information. */
336 if ( (xc_domain_getinfo(xch, ctx->domid, 1, &ctx->dominfo) != 1) ||
337 (ctx->dominfo.domid != ctx->domid) )
338 {
339 PERROR("Unable to refresh domain information");
340 return -1;
341 }
342
343 /* Confirm the domain has actually been paused. */
344 if ( !ctx->dominfo.shutdown ||
345 (ctx->dominfo.shutdown_reason != SHUTDOWN_suspend) )
346 {
347 ERROR("Domain has not been suspended: shutdown %d, reason %d",
348 ctx->dominfo.shutdown, ctx->dominfo.shutdown_reason);
349 return -1;
350 }
351
352 xc_report_progress_single(xch, "Domain now suspended");
353
354 return 0;
355 }
356
357 /*
358 * Send a subset of pages in the guests p2m, according to the dirty bitmap.
359 * Used for each subsequent iteration of the live migration loop.
360 *
361 * Bitmap is bounded by p2m_size.
362 */
send_dirty_pages(struct xc_sr_context * ctx,unsigned long entries)363 static int send_dirty_pages(struct xc_sr_context *ctx,
364 unsigned long entries)
365 {
366 xc_interface *xch = ctx->xch;
367 xen_pfn_t p;
368 unsigned long written;
369 int rc;
370 DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
371 &ctx->save.dirty_bitmap_hbuf);
372
373 for ( p = 0, written = 0; p < ctx->save.p2m_size; ++p )
374 {
375 if ( !test_bit(p, dirty_bitmap) )
376 continue;
377
378 rc = add_to_batch(ctx, p);
379 if ( rc )
380 return rc;
381
382 /* Update progress every 4MB worth of memory sent. */
383 if ( (written & ((1U << (22 - 12)) - 1)) == 0 )
384 xc_report_progress_step(xch, written, entries);
385
386 ++written;
387 }
388
389 rc = flush_batch(ctx);
390 if ( rc )
391 return rc;
392
393 if ( written > entries )
394 DPRINTF("Bitmap contained more entries than expected...");
395
396 xc_report_progress_step(xch, entries, entries);
397
398 return ctx->save.ops.check_vm_state(ctx);
399 }
400
401 /*
402 * Send all pages in the guests p2m. Used as the first iteration of the live
403 * migration loop, and for a non-live save.
404 */
send_all_pages(struct xc_sr_context * ctx)405 static int send_all_pages(struct xc_sr_context *ctx)
406 {
407 DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
408 &ctx->save.dirty_bitmap_hbuf);
409
410 bitmap_set(dirty_bitmap, ctx->save.p2m_size);
411
412 return send_dirty_pages(ctx, ctx->save.p2m_size);
413 }
414
enable_logdirty(struct xc_sr_context * ctx)415 static int enable_logdirty(struct xc_sr_context *ctx)
416 {
417 xc_interface *xch = ctx->xch;
418 int on1 = 0, off = 0, on2 = 0;
419 int rc;
420
421 /* This juggling is required if logdirty is enabled for VRAM tracking. */
422 rc = xc_shadow_control(xch, ctx->domid,
423 XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY,
424 NULL, 0, NULL, 0, NULL);
425 if ( rc < 0 )
426 {
427 on1 = errno;
428 rc = xc_shadow_control(xch, ctx->domid, XEN_DOMCTL_SHADOW_OP_OFF,
429 NULL, 0, NULL, 0, NULL);
430 if ( rc < 0 )
431 off = errno;
432 else {
433 rc = xc_shadow_control(xch, ctx->domid,
434 XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY,
435 NULL, 0, NULL, 0, NULL);
436 if ( rc < 0 )
437 on2 = errno;
438 }
439 if ( rc < 0 )
440 {
441 PERROR("Failed to enable logdirty: %d,%d,%d", on1, off, on2);
442 return rc;
443 }
444 }
445
446 return 0;
447 }
448
update_progress_string(struct xc_sr_context * ctx,char ** str)449 static int update_progress_string(struct xc_sr_context *ctx, char **str)
450 {
451 xc_interface *xch = ctx->xch;
452 char *new_str = NULL;
453 unsigned int iter = ctx->save.stats.iteration;
454
455 if ( asprintf(&new_str, "Frames iteration %u", iter) == -1 )
456 {
457 PERROR("Unable to allocate new progress string");
458 return -1;
459 }
460
461 free(*str);
462 *str = new_str;
463
464 xc_set_progress_prefix(xch, *str);
465 return 0;
466 }
467
468 /*
469 * This is the live migration precopy policy - it's called periodically during
470 * the precopy phase of live migrations, and is responsible for deciding when
471 * the precopy phase should terminate and what should be done next.
472 *
473 * The policy implemented here behaves identically to the policy previously
474 * hard-coded into xc_domain_save() - it proceeds to the stop-and-copy phase of
475 * the live migration when there are either fewer than 50 dirty pages, or more
476 * than 5 precopy rounds have completed.
477 */
478 #define SPP_MAX_ITERATIONS 5
479 #define SPP_TARGET_DIRTY_COUNT 50
480
simple_precopy_policy(struct precopy_stats stats,void * user)481 static int simple_precopy_policy(struct precopy_stats stats, void *user)
482 {
483 return ((stats.dirty_count >= 0 &&
484 stats.dirty_count < SPP_TARGET_DIRTY_COUNT) ||
485 stats.iteration >= SPP_MAX_ITERATIONS)
486 ? XGS_POLICY_STOP_AND_COPY
487 : XGS_POLICY_CONTINUE_PRECOPY;
488 }
489
490 /*
491 * Send memory while guest is running.
492 */
send_memory_live(struct xc_sr_context * ctx)493 static int send_memory_live(struct xc_sr_context *ctx)
494 {
495 xc_interface *xch = ctx->xch;
496 xc_shadow_op_stats_t stats = { 0, ctx->save.p2m_size };
497 char *progress_str = NULL;
498 unsigned int x = 0;
499 int rc;
500 int policy_decision;
501
502 DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
503 &ctx->save.dirty_bitmap_hbuf);
504
505 precopy_policy_t precopy_policy = ctx->save.callbacks->precopy_policy;
506 void *data = ctx->save.callbacks->data;
507
508 struct precopy_stats *policy_stats;
509
510 rc = update_progress_string(ctx, &progress_str);
511 if ( rc )
512 goto out;
513
514 ctx->save.stats = (struct precopy_stats)
515 { .dirty_count = ctx->save.p2m_size };
516 policy_stats = &ctx->save.stats;
517
518 if ( precopy_policy == NULL )
519 precopy_policy = simple_precopy_policy;
520
521 bitmap_set(dirty_bitmap, ctx->save.p2m_size);
522
523 for ( ; ; )
524 {
525 policy_decision = precopy_policy(*policy_stats, data);
526 x++;
527
528 if ( stats.dirty_count > 0 && policy_decision != XGS_POLICY_ABORT )
529 {
530 rc = update_progress_string(ctx, &progress_str);
531 if ( rc )
532 goto out;
533
534 rc = send_dirty_pages(ctx, stats.dirty_count);
535 if ( rc )
536 goto out;
537 }
538
539 if ( policy_decision != XGS_POLICY_CONTINUE_PRECOPY )
540 break;
541
542 policy_stats->iteration = x;
543 policy_stats->total_written += policy_stats->dirty_count;
544 policy_stats->dirty_count = -1;
545
546 policy_decision = precopy_policy(*policy_stats, data);
547
548 if ( policy_decision != XGS_POLICY_CONTINUE_PRECOPY )
549 break;
550
551 if ( xc_shadow_control(
552 xch, ctx->domid, XEN_DOMCTL_SHADOW_OP_CLEAN,
553 &ctx->save.dirty_bitmap_hbuf, ctx->save.p2m_size,
554 NULL, 0, &stats) != ctx->save.p2m_size )
555 {
556 PERROR("Failed to retrieve logdirty bitmap");
557 rc = -1;
558 goto out;
559 }
560
561 policy_stats->dirty_count = stats.dirty_count;
562
563 }
564
565 out:
566 xc_set_progress_prefix(xch, NULL);
567 free(progress_str);
568 return rc;
569 }
570
colo_merge_secondary_dirty_bitmap(struct xc_sr_context * ctx)571 static int colo_merge_secondary_dirty_bitmap(struct xc_sr_context *ctx)
572 {
573 xc_interface *xch = ctx->xch;
574 struct xc_sr_record rec = { 0, 0, NULL };
575 uint64_t *pfns = NULL;
576 uint64_t pfn;
577 unsigned count, i;
578 int rc;
579 DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
580 &ctx->save.dirty_bitmap_hbuf);
581
582 rc = read_record(ctx, ctx->save.recv_fd, &rec);
583 if ( rc )
584 goto err;
585
586 if ( rec.type != REC_TYPE_CHECKPOINT_DIRTY_PFN_LIST )
587 {
588 PERROR("Expect dirty bitmap record, but received %u", rec.type );
589 rc = -1;
590 goto err;
591 }
592
593 if ( rec.length % sizeof(*pfns) )
594 {
595 PERROR("Invalid dirty pfn list record length %u", rec.length );
596 rc = -1;
597 goto err;
598 }
599
600 count = rec.length / sizeof(*pfns);
601 pfns = rec.data;
602
603 for ( i = 0; i < count; i++ )
604 {
605 pfn = pfns[i];
606 if (pfn > ctx->save.p2m_size)
607 {
608 PERROR("Invalid pfn 0x%" PRIx64, pfn);
609 rc = -1;
610 goto err;
611 }
612
613 set_bit(pfn, dirty_bitmap);
614 }
615
616 rc = 0;
617
618 err:
619 free(rec.data);
620 return rc;
621 }
622
623 /*
624 * Suspend the domain and send dirty memory.
625 * This is the last iteration of the live migration and the
626 * heart of the checkpointed stream.
627 */
suspend_and_send_dirty(struct xc_sr_context * ctx)628 static int suspend_and_send_dirty(struct xc_sr_context *ctx)
629 {
630 xc_interface *xch = ctx->xch;
631 xc_shadow_op_stats_t stats = { 0, ctx->save.p2m_size };
632 char *progress_str = NULL;
633 int rc;
634 DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
635 &ctx->save.dirty_bitmap_hbuf);
636
637 rc = suspend_domain(ctx);
638 if ( rc )
639 goto out;
640
641 if ( xc_shadow_control(
642 xch, ctx->domid, XEN_DOMCTL_SHADOW_OP_CLEAN,
643 HYPERCALL_BUFFER(dirty_bitmap), ctx->save.p2m_size,
644 NULL, XEN_DOMCTL_SHADOW_LOGDIRTY_FINAL, &stats) !=
645 ctx->save.p2m_size )
646 {
647 PERROR("Failed to retrieve logdirty bitmap");
648 rc = -1;
649 goto out;
650 }
651
652 if ( ctx->save.live )
653 {
654 rc = update_progress_string(ctx, &progress_str);
655 if ( rc )
656 goto out;
657 }
658 else
659 xc_set_progress_prefix(xch, "Checkpointed save");
660
661 bitmap_or(dirty_bitmap, ctx->save.deferred_pages, ctx->save.p2m_size);
662
663 if ( !ctx->save.live && ctx->save.checkpointed == XC_MIG_STREAM_COLO )
664 {
665 rc = colo_merge_secondary_dirty_bitmap(ctx);
666 if ( rc )
667 {
668 PERROR("Failed to get secondary vm's dirty pages");
669 goto out;
670 }
671 }
672
673 rc = send_dirty_pages(ctx, stats.dirty_count + ctx->save.nr_deferred_pages);
674 if ( rc )
675 goto out;
676
677 bitmap_clear(ctx->save.deferred_pages, ctx->save.p2m_size);
678 ctx->save.nr_deferred_pages = 0;
679
680 out:
681 xc_set_progress_prefix(xch, NULL);
682 free(progress_str);
683 return rc;
684 }
685
verify_frames(struct xc_sr_context * ctx)686 static int verify_frames(struct xc_sr_context *ctx)
687 {
688 xc_interface *xch = ctx->xch;
689 xc_shadow_op_stats_t stats = { 0, ctx->save.p2m_size };
690 int rc;
691 struct xc_sr_record rec =
692 {
693 .type = REC_TYPE_VERIFY,
694 .length = 0,
695 };
696
697 DPRINTF("Enabling verify mode");
698
699 rc = write_record(ctx, &rec);
700 if ( rc )
701 goto out;
702
703 xc_set_progress_prefix(xch, "Frames verify");
704 rc = send_all_pages(ctx);
705 if ( rc )
706 goto out;
707
708 if ( xc_shadow_control(
709 xch, ctx->domid, XEN_DOMCTL_SHADOW_OP_PEEK,
710 &ctx->save.dirty_bitmap_hbuf, ctx->save.p2m_size,
711 NULL, 0, &stats) != ctx->save.p2m_size )
712 {
713 PERROR("Failed to retrieve logdirty bitmap");
714 rc = -1;
715 goto out;
716 }
717
718 DPRINTF(" Further stats: faults %u, dirty %u",
719 stats.fault_count, stats.dirty_count);
720
721 out:
722 return rc;
723 }
724
725 /*
726 * Send all domain memory. This is the heart of the live migration loop.
727 */
send_domain_memory_live(struct xc_sr_context * ctx)728 static int send_domain_memory_live(struct xc_sr_context *ctx)
729 {
730 int rc;
731
732 rc = enable_logdirty(ctx);
733 if ( rc )
734 goto out;
735
736 rc = send_memory_live(ctx);
737 if ( rc )
738 goto out;
739
740 rc = suspend_and_send_dirty(ctx);
741 if ( rc )
742 goto out;
743
744 if ( ctx->save.debug && ctx->save.checkpointed != XC_MIG_STREAM_NONE )
745 {
746 rc = verify_frames(ctx);
747 if ( rc )
748 goto out;
749 }
750
751 out:
752 return rc;
753 }
754
755 /*
756 * Checkpointed save.
757 */
send_domain_memory_checkpointed(struct xc_sr_context * ctx)758 static int send_domain_memory_checkpointed(struct xc_sr_context *ctx)
759 {
760 return suspend_and_send_dirty(ctx);
761 }
762
763 /*
764 * Send all domain memory, pausing the domain first. Generally used for
765 * suspend-to-file.
766 */
send_domain_memory_nonlive(struct xc_sr_context * ctx)767 static int send_domain_memory_nonlive(struct xc_sr_context *ctx)
768 {
769 xc_interface *xch = ctx->xch;
770 int rc;
771
772 rc = suspend_domain(ctx);
773 if ( rc )
774 goto err;
775
776 xc_set_progress_prefix(xch, "Frames");
777
778 rc = send_all_pages(ctx);
779 if ( rc )
780 goto err;
781
782 err:
783 return rc;
784 }
785
setup(struct xc_sr_context * ctx)786 static int setup(struct xc_sr_context *ctx)
787 {
788 xc_interface *xch = ctx->xch;
789 int rc;
790 DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
791 &ctx->save.dirty_bitmap_hbuf);
792
793 rc = ctx->save.ops.setup(ctx);
794 if ( rc )
795 goto err;
796
797 dirty_bitmap = xc_hypercall_buffer_alloc_pages(
798 xch, dirty_bitmap, NRPAGES(bitmap_size(ctx->save.p2m_size)));
799 ctx->save.batch_pfns = malloc(MAX_BATCH_SIZE *
800 sizeof(*ctx->save.batch_pfns));
801 ctx->save.deferred_pages = calloc(1, bitmap_size(ctx->save.p2m_size));
802
803 if ( !ctx->save.batch_pfns || !dirty_bitmap || !ctx->save.deferred_pages )
804 {
805 ERROR("Unable to allocate memory for dirty bitmaps, batch pfns and"
806 " deferred pages");
807 rc = -1;
808 errno = ENOMEM;
809 goto err;
810 }
811
812 rc = 0;
813
814 err:
815 return rc;
816 }
817
cleanup(struct xc_sr_context * ctx)818 static void cleanup(struct xc_sr_context *ctx)
819 {
820 xc_interface *xch = ctx->xch;
821 DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
822 &ctx->save.dirty_bitmap_hbuf);
823
824
825 xc_shadow_control(xch, ctx->domid, XEN_DOMCTL_SHADOW_OP_OFF,
826 NULL, 0, NULL, 0, NULL);
827
828 if ( ctx->save.ops.cleanup(ctx) )
829 PERROR("Failed to clean up");
830
831 xc_hypercall_buffer_free_pages(xch, dirty_bitmap,
832 NRPAGES(bitmap_size(ctx->save.p2m_size)));
833 free(ctx->save.deferred_pages);
834 free(ctx->save.batch_pfns);
835 }
836
837 /*
838 * Save a domain.
839 */
save(struct xc_sr_context * ctx,uint16_t guest_type)840 static int save(struct xc_sr_context *ctx, uint16_t guest_type)
841 {
842 xc_interface *xch = ctx->xch;
843 int rc, saved_rc = 0, saved_errno = 0;
844
845 IPRINTF("Saving domain %d, type %s",
846 ctx->domid, dhdr_type_to_str(guest_type));
847
848 rc = setup(ctx);
849 if ( rc )
850 goto err;
851
852 xc_report_progress_single(xch, "Start of stream");
853
854 rc = write_headers(ctx, guest_type);
855 if ( rc )
856 goto err;
857
858 rc = ctx->save.ops.start_of_stream(ctx);
859 if ( rc )
860 goto err;
861
862 do {
863 rc = ctx->save.ops.start_of_checkpoint(ctx);
864 if ( rc )
865 goto err;
866
867 rc = ctx->save.ops.check_vm_state(ctx);
868 if ( rc )
869 goto err;
870
871 if ( ctx->save.live )
872 rc = send_domain_memory_live(ctx);
873 else if ( ctx->save.checkpointed != XC_MIG_STREAM_NONE )
874 rc = send_domain_memory_checkpointed(ctx);
875 else
876 rc = send_domain_memory_nonlive(ctx);
877
878 if ( rc )
879 goto err;
880
881 if ( !ctx->dominfo.shutdown ||
882 (ctx->dominfo.shutdown_reason != SHUTDOWN_suspend) )
883 {
884 ERROR("Domain has not been suspended");
885 rc = -1;
886 goto err;
887 }
888
889 rc = ctx->save.ops.end_of_checkpoint(ctx);
890 if ( rc )
891 goto err;
892
893 if ( ctx->save.checkpointed != XC_MIG_STREAM_NONE )
894 {
895 /*
896 * We have now completed the initial live portion of the checkpoint
897 * process. Therefore switch into periodically sending synchronous
898 * batches of pages.
899 */
900 ctx->save.live = false;
901
902 rc = write_checkpoint_record(ctx);
903 if ( rc )
904 goto err;
905
906 if ( ctx->save.checkpointed == XC_MIG_STREAM_COLO )
907 {
908 rc = ctx->save.callbacks->checkpoint(ctx->save.callbacks->data);
909 if ( !rc )
910 {
911 rc = -1;
912 goto err;
913 }
914 }
915
916 rc = ctx->save.callbacks->postcopy(ctx->save.callbacks->data);
917 if ( rc <= 0 )
918 goto err;
919
920 if ( ctx->save.checkpointed == XC_MIG_STREAM_COLO )
921 {
922 rc = ctx->save.callbacks->wait_checkpoint(
923 ctx->save.callbacks->data);
924 if ( rc <= 0 )
925 goto err;
926 }
927 else if ( ctx->save.checkpointed == XC_MIG_STREAM_REMUS )
928 {
929 rc = ctx->save.callbacks->checkpoint(ctx->save.callbacks->data);
930 if ( rc <= 0 )
931 goto err;
932 }
933 else
934 {
935 ERROR("Unknown checkpointed stream");
936 rc = -1;
937 goto err;
938 }
939 }
940 } while ( ctx->save.checkpointed != XC_MIG_STREAM_NONE );
941
942 xc_report_progress_single(xch, "End of stream");
943
944 rc = write_end_record(ctx);
945 if ( rc )
946 goto err;
947
948 xc_report_progress_single(xch, "Complete");
949 goto done;
950
951 err:
952 saved_errno = errno;
953 saved_rc = rc;
954 PERROR("Save failed");
955
956 done:
957 cleanup(ctx);
958
959 if ( saved_rc )
960 {
961 rc = saved_rc;
962 errno = saved_errno;
963 }
964
965 return rc;
966 };
967
xc_domain_save(xc_interface * xch,int io_fd,uint32_t dom,uint32_t flags,struct save_callbacks * callbacks,int hvm,xc_migration_stream_t stream_type,int recv_fd)968 int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom,
969 uint32_t flags, struct save_callbacks* callbacks,
970 int hvm, xc_migration_stream_t stream_type, int recv_fd)
971 {
972 struct xc_sr_context ctx =
973 {
974 .xch = xch,
975 .fd = io_fd,
976 };
977
978 /* GCC 4.4 (of CentOS 6.x vintage) can' t initialise anonymous unions. */
979 ctx.save.callbacks = callbacks;
980 ctx.save.live = !!(flags & XCFLAGS_LIVE);
981 ctx.save.debug = !!(flags & XCFLAGS_DEBUG);
982 ctx.save.checkpointed = stream_type;
983 ctx.save.recv_fd = recv_fd;
984
985 /* If altering migration_stream update this assert too. */
986 assert(stream_type == XC_MIG_STREAM_NONE ||
987 stream_type == XC_MIG_STREAM_REMUS ||
988 stream_type == XC_MIG_STREAM_COLO);
989
990 /* Sanity checks for callbacks. */
991 if ( hvm )
992 assert(callbacks->switch_qemu_logdirty);
993 if ( ctx.save.checkpointed )
994 assert(callbacks->checkpoint && callbacks->postcopy);
995 if ( ctx.save.checkpointed == XC_MIG_STREAM_COLO )
996 assert(callbacks->wait_checkpoint);
997
998 DPRINTF("fd %d, dom %u, flags %u, hvm %d", io_fd, dom, flags, hvm);
999
1000 if ( xc_domain_getinfo(xch, dom, 1, &ctx.dominfo) != 1 )
1001 {
1002 PERROR("Failed to get domain info");
1003 return -1;
1004 }
1005
1006 if ( ctx.dominfo.domid != dom )
1007 {
1008 ERROR("Domain %u does not exist", dom);
1009 return -1;
1010 }
1011
1012 ctx.domid = dom;
1013
1014 if ( ctx.dominfo.hvm )
1015 {
1016 ctx.save.ops = save_ops_x86_hvm;
1017 return save(&ctx, DHDR_TYPE_X86_HVM);
1018 }
1019 else
1020 {
1021 ctx.save.ops = save_ops_x86_pv;
1022 return save(&ctx, DHDR_TYPE_X86_PV);
1023 }
1024 }
1025
1026 /*
1027 * Local variables:
1028 * mode: C
1029 * c-file-style: "BSD"
1030 * c-basic-offset: 4
1031 * tab-width: 4
1032 * indent-tabs-mode: nil
1033 * End:
1034 */
1035