1 #include <assert.h>
2 #include <arpa/inet.h>
3 
4 #include "xc_sr_common.h"
5 
6 /*
7  * Writes an Image header and Domain header into the stream.
8  */
write_headers(struct xc_sr_context * ctx,uint16_t guest_type)9 static int write_headers(struct xc_sr_context *ctx, uint16_t guest_type)
10 {
11     xc_interface *xch = ctx->xch;
12     int32_t xen_version = xc_version(xch, XENVER_version, NULL);
13     struct xc_sr_ihdr ihdr = {
14         .marker  = IHDR_MARKER,
15         .id      = htonl(IHDR_ID),
16         .version = htonl(3),
17         .options = htons(IHDR_OPT_LITTLE_ENDIAN),
18     };
19     struct xc_sr_dhdr dhdr = {
20         .type       = guest_type,
21         .page_shift = XC_PAGE_SHIFT,
22         .xen_major  = (xen_version >> 16) & 0xffff,
23         .xen_minor  = (xen_version)       & 0xffff,
24     };
25 
26     if ( xen_version < 0 )
27     {
28         PERROR("Unable to obtain Xen Version");
29         return -1;
30     }
31 
32     if ( write_exact(ctx->fd, &ihdr, sizeof(ihdr)) )
33     {
34         PERROR("Unable to write Image Header to stream");
35         return -1;
36     }
37 
38     if ( write_exact(ctx->fd, &dhdr, sizeof(dhdr)) )
39     {
40         PERROR("Unable to write Domain Header to stream");
41         return -1;
42     }
43 
44     return 0;
45 }
46 
47 /*
48  * Writes an END record into the stream.
49  */
write_end_record(struct xc_sr_context * ctx)50 static int write_end_record(struct xc_sr_context *ctx)
51 {
52     struct xc_sr_record end = { .type = REC_TYPE_END };
53 
54     return write_record(ctx, &end);
55 }
56 
57 /*
58  * Writes a STATIC_DATA_END record into the stream.
59  */
write_static_data_end_record(struct xc_sr_context * ctx)60 static int write_static_data_end_record(struct xc_sr_context *ctx)
61 {
62     struct xc_sr_record end = { .type = REC_TYPE_STATIC_DATA_END };
63 
64     return write_record(ctx, &end);
65 }
66 
67 /*
68  * Writes a CHECKPOINT record into the stream.
69  */
write_checkpoint_record(struct xc_sr_context * ctx)70 static int write_checkpoint_record(struct xc_sr_context *ctx)
71 {
72     struct xc_sr_record checkpoint = { .type = REC_TYPE_CHECKPOINT };
73 
74     return write_record(ctx, &checkpoint);
75 }
76 
77 /*
78  * Writes a batch of memory as a PAGE_DATA record into the stream.  The batch
79  * is constructed in ctx->save.batch_pfns.
80  *
81  * This function:
82  * - gets the types for each pfn in the batch.
83  * - for each pfn with real data:
84  *   - maps and attempts to localise the pages.
85  * - construct and writes a PAGE_DATA record into the stream.
86  */
write_batch(struct xc_sr_context * ctx)87 static int write_batch(struct xc_sr_context *ctx)
88 {
89     xc_interface *xch = ctx->xch;
90     xen_pfn_t *mfns = NULL, *types = NULL;
91     void *guest_mapping = NULL;
92     void **guest_data = NULL;
93     void **local_pages = NULL;
94     int *errors = NULL, rc = -1;
95     unsigned int i, p, nr_pages = 0, nr_pages_mapped = 0;
96     unsigned int nr_pfns = ctx->save.nr_batch_pfns;
97     void *page, *orig_page;
98     uint64_t *rec_pfns = NULL;
99     struct iovec *iov = NULL; int iovcnt = 0;
100     struct xc_sr_rec_page_data_header hdr = { 0 };
101     struct xc_sr_record rec = {
102         .type = REC_TYPE_PAGE_DATA,
103     };
104 
105     assert(nr_pfns != 0);
106 
107     /* Mfns of the batch pfns. */
108     mfns = malloc(nr_pfns * sizeof(*mfns));
109     /* Types of the batch pfns. */
110     types = malloc(nr_pfns * sizeof(*types));
111     /* Errors from attempting to map the gfns. */
112     errors = malloc(nr_pfns * sizeof(*errors));
113     /* Pointers to page data to send.  Mapped gfns or local allocations. */
114     guest_data = calloc(nr_pfns, sizeof(*guest_data));
115     /* Pointers to locally allocated pages.  Need freeing. */
116     local_pages = calloc(nr_pfns, sizeof(*local_pages));
117     /* iovec[] for writev(). */
118     iov = malloc((nr_pfns + 4) * sizeof(*iov));
119 
120     if ( !mfns || !types || !errors || !guest_data || !local_pages || !iov )
121     {
122         ERROR("Unable to allocate arrays for a batch of %u pages",
123               nr_pfns);
124         goto err;
125     }
126 
127     for ( i = 0; i < nr_pfns; ++i )
128     {
129         types[i] = mfns[i] = ctx->save.ops.pfn_to_gfn(ctx,
130                                                       ctx->save.batch_pfns[i]);
131 
132         /* Likely a ballooned page. */
133         if ( mfns[i] == INVALID_MFN )
134         {
135             set_bit(ctx->save.batch_pfns[i], ctx->save.deferred_pages);
136             ++ctx->save.nr_deferred_pages;
137         }
138     }
139 
140     rc = xc_get_pfn_type_batch(xch, ctx->domid, nr_pfns, types);
141     if ( rc )
142     {
143         PERROR("Failed to get types for pfn batch");
144         goto err;
145     }
146     rc = -1;
147 
148     for ( i = 0; i < nr_pfns; ++i )
149     {
150         switch ( types[i] )
151         {
152         case XEN_DOMCTL_PFINFO_BROKEN:
153         case XEN_DOMCTL_PFINFO_XALLOC:
154         case XEN_DOMCTL_PFINFO_XTAB:
155             continue;
156         }
157 
158         mfns[nr_pages++] = mfns[i];
159     }
160 
161     if ( nr_pages > 0 )
162     {
163         guest_mapping = xenforeignmemory_map(
164             xch->fmem, ctx->domid, PROT_READ, nr_pages, mfns, errors);
165         if ( !guest_mapping )
166         {
167             PERROR("Failed to map guest pages");
168             goto err;
169         }
170         nr_pages_mapped = nr_pages;
171 
172         for ( i = 0, p = 0; i < nr_pfns; ++i )
173         {
174             switch ( types[i] )
175             {
176             case XEN_DOMCTL_PFINFO_BROKEN:
177             case XEN_DOMCTL_PFINFO_XALLOC:
178             case XEN_DOMCTL_PFINFO_XTAB:
179                 continue;
180             }
181 
182             if ( errors[p] )
183             {
184                 ERROR("Mapping of pfn %#"PRIpfn" (mfn %#"PRIpfn") failed %d",
185                       ctx->save.batch_pfns[i], mfns[p], errors[p]);
186                 goto err;
187             }
188 
189             orig_page = page = guest_mapping + (p * PAGE_SIZE);
190             rc = ctx->save.ops.normalise_page(ctx, types[i], &page);
191 
192             if ( orig_page != page )
193                 local_pages[i] = page;
194 
195             if ( rc )
196             {
197                 if ( rc == -1 && errno == EAGAIN )
198                 {
199                     set_bit(ctx->save.batch_pfns[i], ctx->save.deferred_pages);
200                     ++ctx->save.nr_deferred_pages;
201                     types[i] = XEN_DOMCTL_PFINFO_XTAB;
202                     --nr_pages;
203                 }
204                 else
205                     goto err;
206             }
207             else
208                 guest_data[i] = page;
209 
210             rc = -1;
211             ++p;
212         }
213     }
214 
215     rec_pfns = malloc(nr_pfns * sizeof(*rec_pfns));
216     if ( !rec_pfns )
217     {
218         ERROR("Unable to allocate %zu bytes of memory for page data pfn list",
219               nr_pfns * sizeof(*rec_pfns));
220         goto err;
221     }
222 
223     hdr.count = nr_pfns;
224 
225     rec.length = sizeof(hdr);
226     rec.length += nr_pfns * sizeof(*rec_pfns);
227     rec.length += nr_pages * PAGE_SIZE;
228 
229     for ( i = 0; i < nr_pfns; ++i )
230         rec_pfns[i] = ((uint64_t)(types[i]) << 32) | ctx->save.batch_pfns[i];
231 
232     iov[0].iov_base = &rec.type;
233     iov[0].iov_len = sizeof(rec.type);
234 
235     iov[1].iov_base = &rec.length;
236     iov[1].iov_len = sizeof(rec.length);
237 
238     iov[2].iov_base = &hdr;
239     iov[2].iov_len = sizeof(hdr);
240 
241     iov[3].iov_base = rec_pfns;
242     iov[3].iov_len = nr_pfns * sizeof(*rec_pfns);
243 
244     iovcnt = 4;
245 
246     if ( nr_pages )
247     {
248         for ( i = 0; i < nr_pfns; ++i )
249         {
250             if ( guest_data[i] )
251             {
252                 iov[iovcnt].iov_base = guest_data[i];
253                 iov[iovcnt].iov_len = PAGE_SIZE;
254                 iovcnt++;
255                 --nr_pages;
256             }
257         }
258     }
259 
260     if ( writev_exact(ctx->fd, iov, iovcnt) )
261     {
262         PERROR("Failed to write page data to stream");
263         goto err;
264     }
265 
266     /* Sanity check we have sent all the pages we expected to. */
267     assert(nr_pages == 0);
268     rc = ctx->save.nr_batch_pfns = 0;
269 
270  err:
271     free(rec_pfns);
272     if ( guest_mapping )
273         xenforeignmemory_unmap(xch->fmem, guest_mapping, nr_pages_mapped);
274     for ( i = 0; local_pages && i < nr_pfns; ++i )
275         free(local_pages[i]);
276     free(iov);
277     free(local_pages);
278     free(guest_data);
279     free(errors);
280     free(types);
281     free(mfns);
282 
283     return rc;
284 }
285 
286 /*
287  * Flush a batch of pfns into the stream.
288  */
flush_batch(struct xc_sr_context * ctx)289 static int flush_batch(struct xc_sr_context *ctx)
290 {
291     int rc = 0;
292 
293     if ( ctx->save.nr_batch_pfns == 0 )
294         return rc;
295 
296     rc = write_batch(ctx);
297 
298     if ( !rc )
299     {
300         VALGRIND_MAKE_MEM_UNDEFINED(ctx->save.batch_pfns,
301                                     MAX_BATCH_SIZE *
302                                     sizeof(*ctx->save.batch_pfns));
303     }
304 
305     return rc;
306 }
307 
308 /*
309  * Add a single pfn to the batch, flushing the batch if full.
310  */
add_to_batch(struct xc_sr_context * ctx,xen_pfn_t pfn)311 static int add_to_batch(struct xc_sr_context *ctx, xen_pfn_t pfn)
312 {
313     int rc = 0;
314 
315     if ( ctx->save.nr_batch_pfns == MAX_BATCH_SIZE )
316         rc = flush_batch(ctx);
317 
318     if ( rc == 0 )
319         ctx->save.batch_pfns[ctx->save.nr_batch_pfns++] = pfn;
320 
321     return rc;
322 }
323 
324 /*
325  * Pause/suspend the domain, and refresh ctx->dominfo if required.
326  */
suspend_domain(struct xc_sr_context * ctx)327 static int suspend_domain(struct xc_sr_context *ctx)
328 {
329     xc_interface *xch = ctx->xch;
330 
331     /* TODO: Properly specify the return value from this callback.  All
332      * implementations currently appear to return 1 for success, whereas
333      * the legacy code checks for != 0. */
334     int cb_rc = ctx->save.callbacks->suspend(ctx->save.callbacks->data);
335 
336     if ( cb_rc == 0 )
337     {
338         ERROR("save callback suspend() failed: %d", cb_rc);
339         return -1;
340     }
341 
342     /* Refresh domain information. */
343     if ( (xc_domain_getinfo(xch, ctx->domid, 1, &ctx->dominfo) != 1) ||
344          (ctx->dominfo.domid != ctx->domid) )
345     {
346         PERROR("Unable to refresh domain information");
347         return -1;
348     }
349 
350     /* Confirm the domain has actually been paused. */
351     if ( !ctx->dominfo.shutdown ||
352          (ctx->dominfo.shutdown_reason != SHUTDOWN_suspend) )
353     {
354         ERROR("Domain has not been suspended: shutdown %d, reason %d",
355               ctx->dominfo.shutdown, ctx->dominfo.shutdown_reason);
356         return -1;
357     }
358 
359     xc_report_progress_single(xch, "Domain now suspended");
360 
361     return 0;
362 }
363 
364 /*
365  * Send a subset of pages in the guests p2m, according to the dirty bitmap.
366  * Used for each subsequent iteration of the live migration loop.
367  *
368  * Bitmap is bounded by p2m_size.
369  */
send_dirty_pages(struct xc_sr_context * ctx,unsigned long entries)370 static int send_dirty_pages(struct xc_sr_context *ctx,
371                             unsigned long entries)
372 {
373     xc_interface *xch = ctx->xch;
374     xen_pfn_t p;
375     unsigned long written;
376     int rc;
377     DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
378                                     &ctx->save.dirty_bitmap_hbuf);
379 
380     for ( p = 0, written = 0; p < ctx->save.p2m_size; ++p )
381     {
382         if ( !test_bit(p, dirty_bitmap) )
383             continue;
384 
385         rc = add_to_batch(ctx, p);
386         if ( rc )
387             return rc;
388 
389         /* Update progress every 4MB worth of memory sent. */
390         if ( (written & ((1U << (22 - 12)) - 1)) == 0 )
391             xc_report_progress_step(xch, written, entries);
392 
393         ++written;
394     }
395 
396     rc = flush_batch(ctx);
397     if ( rc )
398         return rc;
399 
400     if ( written > entries )
401         DPRINTF("Bitmap contained more entries than expected...");
402 
403     xc_report_progress_step(xch, entries, entries);
404 
405     return ctx->save.ops.check_vm_state(ctx);
406 }
407 
408 /*
409  * Send all pages in the guests p2m.  Used as the first iteration of the live
410  * migration loop, and for a non-live save.
411  */
send_all_pages(struct xc_sr_context * ctx)412 static int send_all_pages(struct xc_sr_context *ctx)
413 {
414     DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
415                                     &ctx->save.dirty_bitmap_hbuf);
416 
417     bitmap_set(dirty_bitmap, ctx->save.p2m_size);
418 
419     return send_dirty_pages(ctx, ctx->save.p2m_size);
420 }
421 
enable_logdirty(struct xc_sr_context * ctx)422 static int enable_logdirty(struct xc_sr_context *ctx)
423 {
424     xc_interface *xch = ctx->xch;
425     int on1 = 0, off = 0, on2 = 0;
426     int rc;
427 
428     /* This juggling is required if logdirty is enabled for VRAM tracking. */
429     rc = xc_shadow_control(xch, ctx->domid,
430                            XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY,
431                            NULL, 0, NULL, 0, NULL);
432     if ( rc < 0 )
433     {
434         on1 = errno;
435         rc = xc_shadow_control(xch, ctx->domid, XEN_DOMCTL_SHADOW_OP_OFF,
436                                NULL, 0, NULL, 0, NULL);
437         if ( rc < 0 )
438             off = errno;
439         else {
440             rc = xc_shadow_control(xch, ctx->domid,
441                                    XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY,
442                                    NULL, 0, NULL, 0, NULL);
443             if ( rc < 0 )
444                 on2 = errno;
445         }
446         if ( rc < 0 )
447         {
448             PERROR("Failed to enable logdirty: %d,%d,%d", on1, off, on2);
449             return rc;
450         }
451     }
452 
453     return 0;
454 }
455 
update_progress_string(struct xc_sr_context * ctx,char ** str)456 static int update_progress_string(struct xc_sr_context *ctx, char **str)
457 {
458     xc_interface *xch = ctx->xch;
459     char *new_str = NULL;
460     unsigned int iter = ctx->save.stats.iteration;
461 
462     if ( asprintf(&new_str, "Frames iteration %u", iter) == -1 )
463     {
464         PERROR("Unable to allocate new progress string");
465         return -1;
466     }
467 
468     free(*str);
469     *str = new_str;
470 
471     xc_set_progress_prefix(xch, *str);
472     return 0;
473 }
474 
475 /*
476  * This is the live migration precopy policy - it's called periodically during
477  * the precopy phase of live migrations, and is responsible for deciding when
478  * the precopy phase should terminate and what should be done next.
479  *
480  * The policy implemented here behaves identically to the policy previously
481  * hard-coded into xc_domain_save() - it proceeds to the stop-and-copy phase of
482  * the live migration when there are either fewer than 50 dirty pages, or more
483  * than 5 precopy rounds have completed.
484  */
485 #define SPP_MAX_ITERATIONS      5
486 #define SPP_TARGET_DIRTY_COUNT 50
487 
simple_precopy_policy(struct precopy_stats stats,void * user)488 static int simple_precopy_policy(struct precopy_stats stats, void *user)
489 {
490     return ((stats.dirty_count >= 0 &&
491              stats.dirty_count < SPP_TARGET_DIRTY_COUNT) ||
492             stats.iteration >= SPP_MAX_ITERATIONS)
493         ? XGS_POLICY_STOP_AND_COPY
494         : XGS_POLICY_CONTINUE_PRECOPY;
495 }
496 
497 /*
498  * Send memory while guest is running.
499  */
send_memory_live(struct xc_sr_context * ctx)500 static int send_memory_live(struct xc_sr_context *ctx)
501 {
502     xc_interface *xch = ctx->xch;
503     xc_shadow_op_stats_t stats = { 0, ctx->save.p2m_size };
504     char *progress_str = NULL;
505     unsigned int x = 0;
506     int rc;
507     int policy_decision;
508 
509     DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
510                                     &ctx->save.dirty_bitmap_hbuf);
511 
512     precopy_policy_t precopy_policy = ctx->save.callbacks->precopy_policy;
513     void *data = ctx->save.callbacks->data;
514 
515     struct precopy_stats *policy_stats;
516 
517     rc = update_progress_string(ctx, &progress_str);
518     if ( rc )
519         goto out;
520 
521     ctx->save.stats = (struct precopy_stats){
522         .dirty_count = ctx->save.p2m_size,
523     };
524     policy_stats = &ctx->save.stats;
525 
526     if ( precopy_policy == NULL )
527         precopy_policy = simple_precopy_policy;
528 
529     bitmap_set(dirty_bitmap, ctx->save.p2m_size);
530 
531     for ( ; ; )
532     {
533         policy_decision = precopy_policy(*policy_stats, data);
534         x++;
535 
536         if ( stats.dirty_count > 0 && policy_decision != XGS_POLICY_ABORT )
537         {
538             rc = update_progress_string(ctx, &progress_str);
539             if ( rc )
540                 goto out;
541 
542             rc = send_dirty_pages(ctx, stats.dirty_count);
543             if ( rc )
544                 goto out;
545         }
546 
547         if ( policy_decision != XGS_POLICY_CONTINUE_PRECOPY )
548             break;
549 
550         policy_stats->iteration     = x;
551         policy_stats->total_written += policy_stats->dirty_count;
552         policy_stats->dirty_count   = -1;
553 
554         policy_decision = precopy_policy(*policy_stats, data);
555 
556         if ( policy_decision != XGS_POLICY_CONTINUE_PRECOPY )
557             break;
558 
559         if ( xc_shadow_control(
560                  xch, ctx->domid, XEN_DOMCTL_SHADOW_OP_CLEAN,
561                  &ctx->save.dirty_bitmap_hbuf, ctx->save.p2m_size,
562                  NULL, 0, &stats) != ctx->save.p2m_size )
563         {
564             PERROR("Failed to retrieve logdirty bitmap");
565             rc = -1;
566             goto out;
567         }
568 
569         policy_stats->dirty_count = stats.dirty_count;
570 
571     }
572 
573     if ( policy_decision == XGS_POLICY_ABORT )
574     {
575         PERROR("Abort precopy loop");
576         rc = -1;
577         goto out;
578     }
579 
580  out:
581     xc_set_progress_prefix(xch, NULL);
582     free(progress_str);
583     return rc;
584 }
585 
colo_merge_secondary_dirty_bitmap(struct xc_sr_context * ctx)586 static int colo_merge_secondary_dirty_bitmap(struct xc_sr_context *ctx)
587 {
588     xc_interface *xch = ctx->xch;
589     struct xc_sr_record rec;
590     uint64_t *pfns = NULL;
591     uint64_t pfn;
592     unsigned int count, i;
593     int rc;
594     DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
595                                     &ctx->save.dirty_bitmap_hbuf);
596 
597     rc = read_record(ctx, ctx->save.recv_fd, &rec);
598     if ( rc )
599         goto err;
600 
601     if ( rec.type != REC_TYPE_CHECKPOINT_DIRTY_PFN_LIST )
602     {
603         PERROR("Expect dirty bitmap record, but received %u", rec.type);
604         rc = -1;
605         goto err;
606     }
607 
608     if ( rec.length % sizeof(*pfns) )
609     {
610         PERROR("Invalid dirty pfn list record length %u", rec.length);
611         rc = -1;
612         goto err;
613     }
614 
615     count = rec.length / sizeof(*pfns);
616     pfns = rec.data;
617 
618     for ( i = 0; i < count; i++ )
619     {
620         pfn = pfns[i];
621         if ( pfn > ctx->save.p2m_size )
622         {
623             PERROR("Invalid pfn 0x%" PRIx64, pfn);
624             rc = -1;
625             goto err;
626         }
627 
628         set_bit(pfn, dirty_bitmap);
629     }
630 
631     rc = 0;
632 
633  err:
634     free(rec.data);
635     return rc;
636 }
637 
638 /*
639  * Suspend the domain and send dirty memory.
640  * This is the last iteration of the live migration and the
641  * heart of the checkpointed stream.
642  */
suspend_and_send_dirty(struct xc_sr_context * ctx)643 static int suspend_and_send_dirty(struct xc_sr_context *ctx)
644 {
645     xc_interface *xch = ctx->xch;
646     xc_shadow_op_stats_t stats = { 0, ctx->save.p2m_size };
647     char *progress_str = NULL;
648     int rc;
649     DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
650                                     &ctx->save.dirty_bitmap_hbuf);
651 
652     rc = suspend_domain(ctx);
653     if ( rc )
654         goto out;
655 
656     if ( xc_shadow_control(
657              xch, ctx->domid, XEN_DOMCTL_SHADOW_OP_CLEAN,
658              HYPERCALL_BUFFER(dirty_bitmap), ctx->save.p2m_size,
659              NULL, XEN_DOMCTL_SHADOW_LOGDIRTY_FINAL, &stats) !=
660          ctx->save.p2m_size )
661     {
662         PERROR("Failed to retrieve logdirty bitmap");
663         rc = -1;
664         goto out;
665     }
666 
667     if ( ctx->save.live )
668     {
669         rc = update_progress_string(ctx, &progress_str);
670         if ( rc )
671             goto out;
672     }
673     else
674         xc_set_progress_prefix(xch, "Checkpointed save");
675 
676     bitmap_or(dirty_bitmap, ctx->save.deferred_pages, ctx->save.p2m_size);
677 
678     if ( !ctx->save.live && ctx->stream_type == XC_STREAM_COLO )
679     {
680         rc = colo_merge_secondary_dirty_bitmap(ctx);
681         if ( rc )
682         {
683             PERROR("Failed to get secondary vm's dirty pages");
684             goto out;
685         }
686     }
687 
688     rc = send_dirty_pages(ctx, stats.dirty_count + ctx->save.nr_deferred_pages);
689     if ( rc )
690         goto out;
691 
692     bitmap_clear(ctx->save.deferred_pages, ctx->save.p2m_size);
693     ctx->save.nr_deferred_pages = 0;
694 
695  out:
696     xc_set_progress_prefix(xch, NULL);
697     free(progress_str);
698     return rc;
699 }
700 
verify_frames(struct xc_sr_context * ctx)701 static int verify_frames(struct xc_sr_context *ctx)
702 {
703     xc_interface *xch = ctx->xch;
704     xc_shadow_op_stats_t stats = { 0, ctx->save.p2m_size };
705     int rc;
706     struct xc_sr_record rec = { .type = REC_TYPE_VERIFY };
707 
708     DPRINTF("Enabling verify mode");
709 
710     rc = write_record(ctx, &rec);
711     if ( rc )
712         goto out;
713 
714     xc_set_progress_prefix(xch, "Frames verify");
715     rc = send_all_pages(ctx);
716     if ( rc )
717         goto out;
718 
719     if ( xc_shadow_control(
720              xch, ctx->domid, XEN_DOMCTL_SHADOW_OP_PEEK,
721              &ctx->save.dirty_bitmap_hbuf, ctx->save.p2m_size,
722              NULL, 0, &stats) != ctx->save.p2m_size )
723     {
724         PERROR("Failed to retrieve logdirty bitmap");
725         rc = -1;
726         goto out;
727     }
728 
729     DPRINTF("  Further stats: faults %u, dirty %u",
730             stats.fault_count, stats.dirty_count);
731 
732  out:
733     return rc;
734 }
735 
736 /*
737  * Send all domain memory.  This is the heart of the live migration loop.
738  */
send_domain_memory_live(struct xc_sr_context * ctx)739 static int send_domain_memory_live(struct xc_sr_context *ctx)
740 {
741     int rc;
742 
743     rc = enable_logdirty(ctx);
744     if ( rc )
745         goto out;
746 
747     rc = send_memory_live(ctx);
748     if ( rc )
749         goto out;
750 
751     rc = suspend_and_send_dirty(ctx);
752     if ( rc )
753         goto out;
754 
755     if ( ctx->save.debug && ctx->stream_type != XC_STREAM_PLAIN )
756     {
757         rc = verify_frames(ctx);
758         if ( rc )
759             goto out;
760     }
761 
762  out:
763     return rc;
764 }
765 
766 /*
767  * Checkpointed save.
768  */
send_domain_memory_checkpointed(struct xc_sr_context * ctx)769 static int send_domain_memory_checkpointed(struct xc_sr_context *ctx)
770 {
771     return suspend_and_send_dirty(ctx);
772 }
773 
774 /*
775  * Send all domain memory, pausing the domain first.  Generally used for
776  * suspend-to-file.
777  */
send_domain_memory_nonlive(struct xc_sr_context * ctx)778 static int send_domain_memory_nonlive(struct xc_sr_context *ctx)
779 {
780     xc_interface *xch = ctx->xch;
781     int rc;
782 
783     rc = suspend_domain(ctx);
784     if ( rc )
785         goto err;
786 
787     xc_set_progress_prefix(xch, "Frames");
788 
789     rc = send_all_pages(ctx);
790     if ( rc )
791         goto err;
792 
793  err:
794     return rc;
795 }
796 
setup(struct xc_sr_context * ctx)797 static int setup(struct xc_sr_context *ctx)
798 {
799     xc_interface *xch = ctx->xch;
800     int rc;
801     DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
802                                     &ctx->save.dirty_bitmap_hbuf);
803 
804     rc = ctx->save.ops.setup(ctx);
805     if ( rc )
806         goto err;
807 
808     dirty_bitmap = xc_hypercall_buffer_alloc_pages(
809         xch, dirty_bitmap, NRPAGES(bitmap_size(ctx->save.p2m_size)));
810     ctx->save.batch_pfns = malloc(MAX_BATCH_SIZE *
811                                   sizeof(*ctx->save.batch_pfns));
812     ctx->save.deferred_pages = calloc(1, bitmap_size(ctx->save.p2m_size));
813 
814     if ( !ctx->save.batch_pfns || !dirty_bitmap || !ctx->save.deferred_pages )
815     {
816         ERROR("Unable to allocate memory for dirty bitmaps, batch pfns and"
817               " deferred pages");
818         rc = -1;
819         errno = ENOMEM;
820         goto err;
821     }
822 
823     rc = 0;
824 
825  err:
826     return rc;
827 }
828 
cleanup(struct xc_sr_context * ctx)829 static void cleanup(struct xc_sr_context *ctx)
830 {
831     xc_interface *xch = ctx->xch;
832     DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
833                                     &ctx->save.dirty_bitmap_hbuf);
834 
835 
836     xc_shadow_control(xch, ctx->domid, XEN_DOMCTL_SHADOW_OP_OFF,
837                       NULL, 0, NULL, 0, NULL);
838 
839     if ( ctx->save.ops.cleanup(ctx) )
840         PERROR("Failed to clean up");
841 
842     xc_hypercall_buffer_free_pages(xch, dirty_bitmap,
843                                    NRPAGES(bitmap_size(ctx->save.p2m_size)));
844     free(ctx->save.deferred_pages);
845     free(ctx->save.batch_pfns);
846 }
847 
848 /*
849  * Save a domain.
850  */
save(struct xc_sr_context * ctx,uint16_t guest_type)851 static int save(struct xc_sr_context *ctx, uint16_t guest_type)
852 {
853     xc_interface *xch = ctx->xch;
854     int rc, saved_rc = 0, saved_errno = 0;
855 
856     IPRINTF("Saving domain %d, type %s",
857             ctx->domid, dhdr_type_to_str(guest_type));
858 
859     rc = setup(ctx);
860     if ( rc )
861         goto err;
862 
863     xc_report_progress_single(xch, "Start of stream");
864 
865     rc = write_headers(ctx, guest_type);
866     if ( rc )
867         goto err;
868 
869     rc = ctx->save.ops.static_data(ctx);
870     if ( rc )
871         goto err;
872 
873     rc = write_static_data_end_record(ctx);
874     if ( rc )
875         goto err;
876 
877     rc = ctx->save.ops.start_of_stream(ctx);
878     if ( rc )
879         goto err;
880 
881     do {
882         rc = ctx->save.ops.start_of_checkpoint(ctx);
883         if ( rc )
884             goto err;
885 
886         rc = ctx->save.ops.check_vm_state(ctx);
887         if ( rc )
888             goto err;
889 
890         if ( ctx->save.live )
891             rc = send_domain_memory_live(ctx);
892         else if ( ctx->stream_type != XC_STREAM_PLAIN )
893             rc = send_domain_memory_checkpointed(ctx);
894         else
895             rc = send_domain_memory_nonlive(ctx);
896 
897         if ( rc )
898             goto err;
899 
900         if ( !ctx->dominfo.shutdown ||
901              (ctx->dominfo.shutdown_reason != SHUTDOWN_suspend) )
902         {
903             ERROR("Domain has not been suspended");
904             rc = -1;
905             goto err;
906         }
907 
908         rc = ctx->save.ops.end_of_checkpoint(ctx);
909         if ( rc )
910             goto err;
911 
912         if ( ctx->stream_type != XC_STREAM_PLAIN )
913         {
914             /*
915              * We have now completed the initial live portion of the checkpoint
916              * process. Therefore switch into periodically sending synchronous
917              * batches of pages.
918              */
919             ctx->save.live = false;
920 
921             rc = write_checkpoint_record(ctx);
922             if ( rc )
923                 goto err;
924 
925             if ( ctx->stream_type == XC_STREAM_COLO )
926             {
927                 rc = ctx->save.callbacks->checkpoint(ctx->save.callbacks->data);
928                 if ( !rc )
929                 {
930                     rc = -1;
931                     goto err;
932                 }
933             }
934 
935             rc = ctx->save.callbacks->postcopy(ctx->save.callbacks->data);
936             if ( rc <= 0 )
937                 goto err;
938 
939             if ( ctx->stream_type == XC_STREAM_COLO )
940             {
941                 rc = ctx->save.callbacks->wait_checkpoint(
942                     ctx->save.callbacks->data);
943                 if ( rc <= 0 )
944                     goto err;
945             }
946             else if ( ctx->stream_type == XC_STREAM_REMUS )
947             {
948                 rc = ctx->save.callbacks->checkpoint(ctx->save.callbacks->data);
949                 if ( rc <= 0 )
950                     goto err;
951             }
952             else
953             {
954                 ERROR("Unknown checkpointed stream");
955                 rc = -1;
956                 goto err;
957             }
958         }
959     } while ( ctx->stream_type != XC_STREAM_PLAIN );
960 
961     xc_report_progress_single(xch, "End of stream");
962 
963     rc = write_end_record(ctx);
964     if ( rc )
965         goto err;
966 
967     xc_report_progress_single(xch, "Complete");
968     goto done;
969 
970  err:
971     saved_errno = errno;
972     saved_rc = rc;
973     PERROR("Save failed");
974 
975  done:
976     cleanup(ctx);
977 
978     if ( saved_rc )
979     {
980         rc = saved_rc;
981         errno = saved_errno;
982     }
983 
984     return rc;
985 };
986 
xc_domain_save(xc_interface * xch,int io_fd,uint32_t dom,uint32_t flags,struct save_callbacks * callbacks,xc_stream_type_t stream_type,int recv_fd)987 int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom,
988                    uint32_t flags, struct save_callbacks *callbacks,
989                    xc_stream_type_t stream_type, int recv_fd)
990 {
991     struct xc_sr_context ctx = {
992         .xch = xch,
993         .fd = io_fd,
994         .stream_type = stream_type,
995     };
996 
997     /* GCC 4.4 (of CentOS 6.x vintage) can' t initialise anonymous unions. */
998     ctx.save.callbacks = callbacks;
999     ctx.save.live  = !!(flags & XCFLAGS_LIVE);
1000     ctx.save.debug = !!(flags & XCFLAGS_DEBUG);
1001     ctx.save.recv_fd = recv_fd;
1002 
1003     if ( xc_domain_getinfo(xch, dom, 1, &ctx.dominfo) != 1 )
1004     {
1005         PERROR("Failed to get domain info");
1006         return -1;
1007     }
1008 
1009     if ( ctx.dominfo.domid != dom )
1010     {
1011         ERROR("Domain %u does not exist", dom);
1012         return -1;
1013     }
1014 
1015     /* Sanity check stream_type-related parameters */
1016     switch ( stream_type )
1017     {
1018     case XC_STREAM_COLO:
1019         assert(callbacks->wait_checkpoint);
1020         /* Fallthrough */
1021     case XC_STREAM_REMUS:
1022         assert(callbacks->checkpoint && callbacks->postcopy);
1023         /* Fallthrough */
1024     case XC_STREAM_PLAIN:
1025         if ( ctx.dominfo.hvm )
1026             assert(callbacks->switch_qemu_logdirty);
1027         break;
1028 
1029     default:
1030         assert(!"Bad stream_type");
1031         break;
1032     }
1033 
1034     DPRINTF("fd %d, dom %u, flags %u, hvm %d",
1035             io_fd, dom, flags, ctx.dominfo.hvm);
1036 
1037     ctx.domid = dom;
1038 
1039     if ( ctx.dominfo.hvm )
1040     {
1041         ctx.save.ops = save_ops_x86_hvm;
1042         return save(&ctx, DHDR_TYPE_X86_HVM);
1043     }
1044     else
1045     {
1046         ctx.save.ops = save_ops_x86_pv;
1047         return save(&ctx, DHDR_TYPE_X86_PV);
1048     }
1049 }
1050 
1051 /*
1052  * Local variables:
1053  * mode: C
1054  * c-file-style: "BSD"
1055  * c-basic-offset: 4
1056  * tab-width: 4
1057  * indent-tabs-mode: nil
1058  * End:
1059  */
1060