1 #include <assert.h>
2 #include <arpa/inet.h>
3 
4 #include "xc_sr_common.h"
5 
6 /*
7  * Writes an Image header and Domain header into the stream.
8  */
write_headers(struct xc_sr_context * ctx,uint16_t guest_type)9 static int write_headers(struct xc_sr_context *ctx, uint16_t guest_type)
10 {
11     xc_interface *xch = ctx->xch;
12     int32_t xen_version = xc_version(xch, XENVER_version, NULL);
13     struct xc_sr_ihdr ihdr =
14         {
15             .marker  = IHDR_MARKER,
16             .id      = htonl(IHDR_ID),
17             .version = htonl(IHDR_VERSION),
18             .options = htons(IHDR_OPT_LITTLE_ENDIAN),
19         };
20     struct xc_sr_dhdr dhdr =
21         {
22             .type       = guest_type,
23             .page_shift = XC_PAGE_SHIFT,
24             .xen_major  = (xen_version >> 16) & 0xffff,
25             .xen_minor  = (xen_version)       & 0xffff,
26         };
27 
28     if ( xen_version < 0 )
29     {
30         PERROR("Unable to obtain Xen Version");
31         return -1;
32     }
33 
34     if ( write_exact(ctx->fd, &ihdr, sizeof(ihdr)) )
35     {
36         PERROR("Unable to write Image Header to stream");
37         return -1;
38     }
39 
40     if ( write_exact(ctx->fd, &dhdr, sizeof(dhdr)) )
41     {
42         PERROR("Unable to write Domain Header to stream");
43         return -1;
44     }
45 
46     return 0;
47 }
48 
49 /*
50  * Writes an END record into the stream.
51  */
write_end_record(struct xc_sr_context * ctx)52 static int write_end_record(struct xc_sr_context *ctx)
53 {
54     struct xc_sr_record end = { REC_TYPE_END, 0, NULL };
55 
56     return write_record(ctx, &end);
57 }
58 
59 /*
60  * Writes a CHECKPOINT record into the stream.
61  */
write_checkpoint_record(struct xc_sr_context * ctx)62 static int write_checkpoint_record(struct xc_sr_context *ctx)
63 {
64     struct xc_sr_record checkpoint = { REC_TYPE_CHECKPOINT, 0, NULL };
65 
66     return write_record(ctx, &checkpoint);
67 }
68 
69 /*
70  * Writes a batch of memory as a PAGE_DATA record into the stream.  The batch
71  * is constructed in ctx->save.batch_pfns.
72  *
73  * This function:
74  * - gets the types for each pfn in the batch.
75  * - for each pfn with real data:
76  *   - maps and attempts to localise the pages.
77  * - construct and writes a PAGE_DATA record into the stream.
78  */
write_batch(struct xc_sr_context * ctx)79 static int write_batch(struct xc_sr_context *ctx)
80 {
81     xc_interface *xch = ctx->xch;
82     xen_pfn_t *mfns = NULL, *types = NULL;
83     void *guest_mapping = NULL;
84     void **guest_data = NULL;
85     void **local_pages = NULL;
86     int *errors = NULL, rc = -1;
87     unsigned i, p, nr_pages = 0, nr_pages_mapped = 0;
88     unsigned nr_pfns = ctx->save.nr_batch_pfns;
89     void *page, *orig_page;
90     uint64_t *rec_pfns = NULL;
91     struct iovec *iov = NULL; int iovcnt = 0;
92     struct xc_sr_rec_page_data_header hdr = { 0 };
93     struct xc_sr_record rec =
94     {
95         .type = REC_TYPE_PAGE_DATA,
96     };
97 
98     assert(nr_pfns != 0);
99 
100     /* Mfns of the batch pfns. */
101     mfns = malloc(nr_pfns * sizeof(*mfns));
102     /* Types of the batch pfns. */
103     types = malloc(nr_pfns * sizeof(*types));
104     /* Errors from attempting to map the gfns. */
105     errors = malloc(nr_pfns * sizeof(*errors));
106     /* Pointers to page data to send.  Mapped gfns or local allocations. */
107     guest_data = calloc(nr_pfns, sizeof(*guest_data));
108     /* Pointers to locally allocated pages.  Need freeing. */
109     local_pages = calloc(nr_pfns, sizeof(*local_pages));
110     /* iovec[] for writev(). */
111     iov = malloc((nr_pfns + 4) * sizeof(*iov));
112 
113     if ( !mfns || !types || !errors || !guest_data || !local_pages || !iov )
114     {
115         ERROR("Unable to allocate arrays for a batch of %u pages",
116               nr_pfns);
117         goto err;
118     }
119 
120     for ( i = 0; i < nr_pfns; ++i )
121     {
122         types[i] = mfns[i] = ctx->save.ops.pfn_to_gfn(ctx,
123                                                       ctx->save.batch_pfns[i]);
124 
125         /* Likely a ballooned page. */
126         if ( mfns[i] == INVALID_MFN )
127         {
128             set_bit(ctx->save.batch_pfns[i], ctx->save.deferred_pages);
129             ++ctx->save.nr_deferred_pages;
130         }
131     }
132 
133     rc = xc_get_pfn_type_batch(xch, ctx->domid, nr_pfns, types);
134     if ( rc )
135     {
136         PERROR("Failed to get types for pfn batch");
137         goto err;
138     }
139     rc = -1;
140 
141     for ( i = 0; i < nr_pfns; ++i )
142     {
143         switch ( types[i] )
144         {
145         case XEN_DOMCTL_PFINFO_BROKEN:
146         case XEN_DOMCTL_PFINFO_XALLOC:
147         case XEN_DOMCTL_PFINFO_XTAB:
148             continue;
149         }
150 
151         mfns[nr_pages++] = mfns[i];
152     }
153 
154     if ( nr_pages > 0 )
155     {
156         guest_mapping = xenforeignmemory_map(xch->fmem,
157             ctx->domid, PROT_READ, nr_pages, mfns, errors);
158         if ( !guest_mapping )
159         {
160             PERROR("Failed to map guest pages");
161             goto err;
162         }
163         nr_pages_mapped = nr_pages;
164 
165         for ( i = 0, p = 0; i < nr_pfns; ++i )
166         {
167             switch ( types[i] )
168             {
169             case XEN_DOMCTL_PFINFO_BROKEN:
170             case XEN_DOMCTL_PFINFO_XALLOC:
171             case XEN_DOMCTL_PFINFO_XTAB:
172                 continue;
173             }
174 
175             if ( errors[p] )
176             {
177                 ERROR("Mapping of pfn %#"PRIpfn" (mfn %#"PRIpfn") failed %d",
178                       ctx->save.batch_pfns[i], mfns[p], errors[p]);
179                 goto err;
180             }
181 
182             orig_page = page = guest_mapping + (p * PAGE_SIZE);
183             rc = ctx->save.ops.normalise_page(ctx, types[i], &page);
184 
185             if ( orig_page != page )
186                 local_pages[i] = page;
187 
188             if ( rc )
189             {
190                 if ( rc == -1 && errno == EAGAIN )
191                 {
192                     set_bit(ctx->save.batch_pfns[i], ctx->save.deferred_pages);
193                     ++ctx->save.nr_deferred_pages;
194                     types[i] = XEN_DOMCTL_PFINFO_XTAB;
195                     --nr_pages;
196                 }
197                 else
198                     goto err;
199             }
200             else
201                 guest_data[i] = page;
202 
203             rc = -1;
204             ++p;
205         }
206     }
207 
208     rec_pfns = malloc(nr_pfns * sizeof(*rec_pfns));
209     if ( !rec_pfns )
210     {
211         ERROR("Unable to allocate %zu bytes of memory for page data pfn list",
212               nr_pfns * sizeof(*rec_pfns));
213         goto err;
214     }
215 
216     hdr.count = nr_pfns;
217 
218     rec.length = sizeof(hdr);
219     rec.length += nr_pfns * sizeof(*rec_pfns);
220     rec.length += nr_pages * PAGE_SIZE;
221 
222     for ( i = 0; i < nr_pfns; ++i )
223         rec_pfns[i] = ((uint64_t)(types[i]) << 32) | ctx->save.batch_pfns[i];
224 
225     iov[0].iov_base = &rec.type;
226     iov[0].iov_len = sizeof(rec.type);
227 
228     iov[1].iov_base = &rec.length;
229     iov[1].iov_len = sizeof(rec.length);
230 
231     iov[2].iov_base = &hdr;
232     iov[2].iov_len = sizeof(hdr);
233 
234     iov[3].iov_base = rec_pfns;
235     iov[3].iov_len = nr_pfns * sizeof(*rec_pfns);
236 
237     iovcnt = 4;
238 
239     if ( nr_pages )
240     {
241         for ( i = 0; i < nr_pfns; ++i )
242         {
243             if ( guest_data[i] )
244             {
245                 iov[iovcnt].iov_base = guest_data[i];
246                 iov[iovcnt].iov_len = PAGE_SIZE;
247                 iovcnt++;
248                 --nr_pages;
249             }
250         }
251     }
252 
253     if ( writev_exact(ctx->fd, iov, iovcnt) )
254     {
255         PERROR("Failed to write page data to stream");
256         goto err;
257     }
258 
259     /* Sanity check we have sent all the pages we expected to. */
260     assert(nr_pages == 0);
261     rc = ctx->save.nr_batch_pfns = 0;
262 
263  err:
264     free(rec_pfns);
265     if ( guest_mapping )
266         xenforeignmemory_unmap(xch->fmem, guest_mapping, nr_pages_mapped);
267     for ( i = 0; local_pages && i < nr_pfns; ++i )
268         free(local_pages[i]);
269     free(iov);
270     free(local_pages);
271     free(guest_data);
272     free(errors);
273     free(types);
274     free(mfns);
275 
276     return rc;
277 }
278 
279 /*
280  * Flush a batch of pfns into the stream.
281  */
flush_batch(struct xc_sr_context * ctx)282 static int flush_batch(struct xc_sr_context *ctx)
283 {
284     int rc = 0;
285 
286     if ( ctx->save.nr_batch_pfns == 0 )
287         return rc;
288 
289     rc = write_batch(ctx);
290 
291     if ( !rc )
292     {
293         VALGRIND_MAKE_MEM_UNDEFINED(ctx->save.batch_pfns,
294                                     MAX_BATCH_SIZE *
295                                     sizeof(*ctx->save.batch_pfns));
296     }
297 
298     return rc;
299 }
300 
301 /*
302  * Add a single pfn to the batch, flushing the batch if full.
303  */
add_to_batch(struct xc_sr_context * ctx,xen_pfn_t pfn)304 static int add_to_batch(struct xc_sr_context *ctx, xen_pfn_t pfn)
305 {
306     int rc = 0;
307 
308     if ( ctx->save.nr_batch_pfns == MAX_BATCH_SIZE )
309         rc = flush_batch(ctx);
310 
311     if ( rc == 0 )
312         ctx->save.batch_pfns[ctx->save.nr_batch_pfns++] = pfn;
313 
314     return rc;
315 }
316 
317 /*
318  * Pause/suspend the domain, and refresh ctx->dominfo if required.
319  */
suspend_domain(struct xc_sr_context * ctx)320 static int suspend_domain(struct xc_sr_context *ctx)
321 {
322     xc_interface *xch = ctx->xch;
323 
324     /* TODO: Properly specify the return value from this callback.  All
325      * implementations currently appear to return 1 for success, whereas
326      * the legacy code checks for != 0. */
327     int cb_rc = ctx->save.callbacks->suspend(ctx->save.callbacks->data);
328 
329     if ( cb_rc == 0 )
330     {
331         ERROR("save callback suspend() failed: %d", cb_rc);
332         return -1;
333     }
334 
335     /* Refresh domain information. */
336     if ( (xc_domain_getinfo(xch, ctx->domid, 1, &ctx->dominfo) != 1) ||
337          (ctx->dominfo.domid != ctx->domid) )
338     {
339         PERROR("Unable to refresh domain information");
340         return -1;
341     }
342 
343     /* Confirm the domain has actually been paused. */
344     if ( !ctx->dominfo.shutdown ||
345          (ctx->dominfo.shutdown_reason != SHUTDOWN_suspend) )
346     {
347         ERROR("Domain has not been suspended: shutdown %d, reason %d",
348               ctx->dominfo.shutdown, ctx->dominfo.shutdown_reason);
349         return -1;
350     }
351 
352     xc_report_progress_single(xch, "Domain now suspended");
353 
354     return 0;
355 }
356 
357 /*
358  * Send a subset of pages in the guests p2m, according to the dirty bitmap.
359  * Used for each subsequent iteration of the live migration loop.
360  *
361  * Bitmap is bounded by p2m_size.
362  */
send_dirty_pages(struct xc_sr_context * ctx,unsigned long entries)363 static int send_dirty_pages(struct xc_sr_context *ctx,
364                             unsigned long entries)
365 {
366     xc_interface *xch = ctx->xch;
367     xen_pfn_t p;
368     unsigned long written;
369     int rc;
370     DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
371                                     &ctx->save.dirty_bitmap_hbuf);
372 
373     for ( p = 0, written = 0; p < ctx->save.p2m_size; ++p )
374     {
375         if ( !test_bit(p, dirty_bitmap) )
376             continue;
377 
378         rc = add_to_batch(ctx, p);
379         if ( rc )
380             return rc;
381 
382         /* Update progress every 4MB worth of memory sent. */
383         if ( (written & ((1U << (22 - 12)) - 1)) == 0 )
384             xc_report_progress_step(xch, written, entries);
385 
386         ++written;
387     }
388 
389     rc = flush_batch(ctx);
390     if ( rc )
391         return rc;
392 
393     if ( written > entries )
394         DPRINTF("Bitmap contained more entries than expected...");
395 
396     xc_report_progress_step(xch, entries, entries);
397 
398     return ctx->save.ops.check_vm_state(ctx);
399 }
400 
401 /*
402  * Send all pages in the guests p2m.  Used as the first iteration of the live
403  * migration loop, and for a non-live save.
404  */
send_all_pages(struct xc_sr_context * ctx)405 static int send_all_pages(struct xc_sr_context *ctx)
406 {
407     DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
408                                     &ctx->save.dirty_bitmap_hbuf);
409 
410     bitmap_set(dirty_bitmap, ctx->save.p2m_size);
411 
412     return send_dirty_pages(ctx, ctx->save.p2m_size);
413 }
414 
enable_logdirty(struct xc_sr_context * ctx)415 static int enable_logdirty(struct xc_sr_context *ctx)
416 {
417     xc_interface *xch = ctx->xch;
418     int on1 = 0, off = 0, on2 = 0;
419     int rc;
420 
421     /* This juggling is required if logdirty is enabled for VRAM tracking. */
422     rc = xc_shadow_control(xch, ctx->domid,
423                            XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY,
424                            NULL, 0, NULL, 0, NULL);
425     if ( rc < 0 )
426     {
427         on1 = errno;
428         rc = xc_shadow_control(xch, ctx->domid, XEN_DOMCTL_SHADOW_OP_OFF,
429                                NULL, 0, NULL, 0, NULL);
430         if ( rc < 0 )
431             off = errno;
432         else {
433             rc = xc_shadow_control(xch, ctx->domid,
434                                    XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY,
435                                    NULL, 0, NULL, 0, NULL);
436             if ( rc < 0 )
437                 on2 = errno;
438         }
439         if ( rc < 0 )
440         {
441             PERROR("Failed to enable logdirty: %d,%d,%d", on1, off, on2);
442             return rc;
443         }
444     }
445 
446     return 0;
447 }
448 
update_progress_string(struct xc_sr_context * ctx,char ** str)449 static int update_progress_string(struct xc_sr_context *ctx, char **str)
450 {
451     xc_interface *xch = ctx->xch;
452     char *new_str = NULL;
453     unsigned int iter = ctx->save.stats.iteration;
454 
455     if ( asprintf(&new_str, "Frames iteration %u", iter) == -1 )
456     {
457         PERROR("Unable to allocate new progress string");
458         return -1;
459     }
460 
461     free(*str);
462     *str = new_str;
463 
464     xc_set_progress_prefix(xch, *str);
465     return 0;
466 }
467 
468 /*
469  * This is the live migration precopy policy - it's called periodically during
470  * the precopy phase of live migrations, and is responsible for deciding when
471  * the precopy phase should terminate and what should be done next.
472  *
473  * The policy implemented here behaves identically to the policy previously
474  * hard-coded into xc_domain_save() - it proceeds to the stop-and-copy phase of
475  * the live migration when there are either fewer than 50 dirty pages, or more
476  * than 5 precopy rounds have completed.
477  */
478 #define SPP_MAX_ITERATIONS      5
479 #define SPP_TARGET_DIRTY_COUNT 50
480 
simple_precopy_policy(struct precopy_stats stats,void * user)481 static int simple_precopy_policy(struct precopy_stats stats, void *user)
482 {
483     return ((stats.dirty_count >= 0 &&
484             stats.dirty_count < SPP_TARGET_DIRTY_COUNT) ||
485             stats.iteration >= SPP_MAX_ITERATIONS)
486         ? XGS_POLICY_STOP_AND_COPY
487         : XGS_POLICY_CONTINUE_PRECOPY;
488 }
489 
490 /*
491  * Send memory while guest is running.
492  */
send_memory_live(struct xc_sr_context * ctx)493 static int send_memory_live(struct xc_sr_context *ctx)
494 {
495     xc_interface *xch = ctx->xch;
496     xc_shadow_op_stats_t stats = { 0, ctx->save.p2m_size };
497     char *progress_str = NULL;
498     unsigned int x = 0;
499     int rc;
500     int policy_decision;
501 
502     DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
503                                     &ctx->save.dirty_bitmap_hbuf);
504 
505     precopy_policy_t precopy_policy = ctx->save.callbacks->precopy_policy;
506     void *data = ctx->save.callbacks->data;
507 
508     struct precopy_stats *policy_stats;
509 
510     rc = update_progress_string(ctx, &progress_str);
511     if ( rc )
512         goto out;
513 
514     ctx->save.stats = (struct precopy_stats)
515         { .dirty_count   = ctx->save.p2m_size };
516     policy_stats = &ctx->save.stats;
517 
518     if ( precopy_policy == NULL )
519          precopy_policy = simple_precopy_policy;
520 
521     bitmap_set(dirty_bitmap, ctx->save.p2m_size);
522 
523     for ( ; ; )
524     {
525         policy_decision = precopy_policy(*policy_stats, data);
526         x++;
527 
528         if ( stats.dirty_count > 0 && policy_decision != XGS_POLICY_ABORT )
529         {
530             rc = update_progress_string(ctx, &progress_str);
531             if ( rc )
532                 goto out;
533 
534             rc = send_dirty_pages(ctx, stats.dirty_count);
535             if ( rc )
536                 goto out;
537         }
538 
539         if ( policy_decision != XGS_POLICY_CONTINUE_PRECOPY )
540             break;
541 
542         policy_stats->iteration     = x;
543         policy_stats->total_written += policy_stats->dirty_count;
544         policy_stats->dirty_count   = -1;
545 
546         policy_decision = precopy_policy(*policy_stats, data);
547 
548         if ( policy_decision != XGS_POLICY_CONTINUE_PRECOPY )
549            break;
550 
551         if ( xc_shadow_control(
552                  xch, ctx->domid, XEN_DOMCTL_SHADOW_OP_CLEAN,
553                  &ctx->save.dirty_bitmap_hbuf, ctx->save.p2m_size,
554                  NULL, 0, &stats) != ctx->save.p2m_size )
555         {
556             PERROR("Failed to retrieve logdirty bitmap");
557             rc = -1;
558             goto out;
559         }
560 
561         policy_stats->dirty_count = stats.dirty_count;
562 
563     }
564 
565  out:
566     xc_set_progress_prefix(xch, NULL);
567     free(progress_str);
568     return rc;
569 }
570 
colo_merge_secondary_dirty_bitmap(struct xc_sr_context * ctx)571 static int colo_merge_secondary_dirty_bitmap(struct xc_sr_context *ctx)
572 {
573     xc_interface *xch = ctx->xch;
574     struct xc_sr_record rec = { 0, 0, NULL };
575     uint64_t *pfns = NULL;
576     uint64_t pfn;
577     unsigned count, i;
578     int rc;
579     DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
580                                     &ctx->save.dirty_bitmap_hbuf);
581 
582     rc = read_record(ctx, ctx->save.recv_fd, &rec);
583     if ( rc )
584         goto err;
585 
586     if ( rec.type != REC_TYPE_CHECKPOINT_DIRTY_PFN_LIST )
587     {
588         PERROR("Expect dirty bitmap record, but received %u", rec.type );
589         rc = -1;
590         goto err;
591     }
592 
593     if ( rec.length % sizeof(*pfns) )
594     {
595         PERROR("Invalid dirty pfn list record length %u", rec.length );
596         rc = -1;
597         goto err;
598     }
599 
600     count = rec.length / sizeof(*pfns);
601     pfns = rec.data;
602 
603     for ( i = 0; i < count; i++ )
604     {
605         pfn = pfns[i];
606         if (pfn > ctx->save.p2m_size)
607         {
608             PERROR("Invalid pfn 0x%" PRIx64, pfn);
609             rc = -1;
610             goto err;
611         }
612 
613         set_bit(pfn, dirty_bitmap);
614     }
615 
616     rc = 0;
617 
618  err:
619     free(rec.data);
620     return rc;
621 }
622 
623 /*
624  * Suspend the domain and send dirty memory.
625  * This is the last iteration of the live migration and the
626  * heart of the checkpointed stream.
627  */
suspend_and_send_dirty(struct xc_sr_context * ctx)628 static int suspend_and_send_dirty(struct xc_sr_context *ctx)
629 {
630     xc_interface *xch = ctx->xch;
631     xc_shadow_op_stats_t stats = { 0, ctx->save.p2m_size };
632     char *progress_str = NULL;
633     int rc;
634     DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
635                                     &ctx->save.dirty_bitmap_hbuf);
636 
637     rc = suspend_domain(ctx);
638     if ( rc )
639         goto out;
640 
641     if ( xc_shadow_control(
642              xch, ctx->domid, XEN_DOMCTL_SHADOW_OP_CLEAN,
643              HYPERCALL_BUFFER(dirty_bitmap), ctx->save.p2m_size,
644              NULL, XEN_DOMCTL_SHADOW_LOGDIRTY_FINAL, &stats) !=
645          ctx->save.p2m_size )
646     {
647         PERROR("Failed to retrieve logdirty bitmap");
648         rc = -1;
649         goto out;
650     }
651 
652     if ( ctx->save.live )
653     {
654         rc = update_progress_string(ctx, &progress_str);
655         if ( rc )
656             goto out;
657     }
658     else
659         xc_set_progress_prefix(xch, "Checkpointed save");
660 
661     bitmap_or(dirty_bitmap, ctx->save.deferred_pages, ctx->save.p2m_size);
662 
663     if ( !ctx->save.live && ctx->save.checkpointed == XC_MIG_STREAM_COLO )
664     {
665         rc = colo_merge_secondary_dirty_bitmap(ctx);
666         if ( rc )
667         {
668             PERROR("Failed to get secondary vm's dirty pages");
669             goto out;
670         }
671     }
672 
673     rc = send_dirty_pages(ctx, stats.dirty_count + ctx->save.nr_deferred_pages);
674     if ( rc )
675         goto out;
676 
677     bitmap_clear(ctx->save.deferred_pages, ctx->save.p2m_size);
678     ctx->save.nr_deferred_pages = 0;
679 
680  out:
681     xc_set_progress_prefix(xch, NULL);
682     free(progress_str);
683     return rc;
684 }
685 
verify_frames(struct xc_sr_context * ctx)686 static int verify_frames(struct xc_sr_context *ctx)
687 {
688     xc_interface *xch = ctx->xch;
689     xc_shadow_op_stats_t stats = { 0, ctx->save.p2m_size };
690     int rc;
691     struct xc_sr_record rec =
692     {
693         .type = REC_TYPE_VERIFY,
694         .length = 0,
695     };
696 
697     DPRINTF("Enabling verify mode");
698 
699     rc = write_record(ctx, &rec);
700     if ( rc )
701         goto out;
702 
703     xc_set_progress_prefix(xch, "Frames verify");
704     rc = send_all_pages(ctx);
705     if ( rc )
706         goto out;
707 
708     if ( xc_shadow_control(
709              xch, ctx->domid, XEN_DOMCTL_SHADOW_OP_PEEK,
710              &ctx->save.dirty_bitmap_hbuf, ctx->save.p2m_size,
711              NULL, 0, &stats) != ctx->save.p2m_size )
712     {
713         PERROR("Failed to retrieve logdirty bitmap");
714         rc = -1;
715         goto out;
716     }
717 
718     DPRINTF("  Further stats: faults %u, dirty %u",
719             stats.fault_count, stats.dirty_count);
720 
721  out:
722     return rc;
723 }
724 
725 /*
726  * Send all domain memory.  This is the heart of the live migration loop.
727  */
send_domain_memory_live(struct xc_sr_context * ctx)728 static int send_domain_memory_live(struct xc_sr_context *ctx)
729 {
730     int rc;
731 
732     rc = enable_logdirty(ctx);
733     if ( rc )
734         goto out;
735 
736     rc = send_memory_live(ctx);
737     if ( rc )
738         goto out;
739 
740     rc = suspend_and_send_dirty(ctx);
741     if ( rc )
742         goto out;
743 
744     if ( ctx->save.debug && ctx->save.checkpointed != XC_MIG_STREAM_NONE )
745     {
746         rc = verify_frames(ctx);
747         if ( rc )
748             goto out;
749     }
750 
751   out:
752     return rc;
753 }
754 
755 /*
756  * Checkpointed save.
757  */
send_domain_memory_checkpointed(struct xc_sr_context * ctx)758 static int send_domain_memory_checkpointed(struct xc_sr_context *ctx)
759 {
760     return suspend_and_send_dirty(ctx);
761 }
762 
763 /*
764  * Send all domain memory, pausing the domain first.  Generally used for
765  * suspend-to-file.
766  */
send_domain_memory_nonlive(struct xc_sr_context * ctx)767 static int send_domain_memory_nonlive(struct xc_sr_context *ctx)
768 {
769     xc_interface *xch = ctx->xch;
770     int rc;
771 
772     rc = suspend_domain(ctx);
773     if ( rc )
774         goto err;
775 
776     xc_set_progress_prefix(xch, "Frames");
777 
778     rc = send_all_pages(ctx);
779     if ( rc )
780         goto err;
781 
782  err:
783     return rc;
784 }
785 
setup(struct xc_sr_context * ctx)786 static int setup(struct xc_sr_context *ctx)
787 {
788     xc_interface *xch = ctx->xch;
789     int rc;
790     DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
791                                     &ctx->save.dirty_bitmap_hbuf);
792 
793     rc = ctx->save.ops.setup(ctx);
794     if ( rc )
795         goto err;
796 
797     dirty_bitmap = xc_hypercall_buffer_alloc_pages(
798                    xch, dirty_bitmap, NRPAGES(bitmap_size(ctx->save.p2m_size)));
799     ctx->save.batch_pfns = malloc(MAX_BATCH_SIZE *
800                                   sizeof(*ctx->save.batch_pfns));
801     ctx->save.deferred_pages = calloc(1, bitmap_size(ctx->save.p2m_size));
802 
803     if ( !ctx->save.batch_pfns || !dirty_bitmap || !ctx->save.deferred_pages )
804     {
805         ERROR("Unable to allocate memory for dirty bitmaps, batch pfns and"
806               " deferred pages");
807         rc = -1;
808         errno = ENOMEM;
809         goto err;
810     }
811 
812     rc = 0;
813 
814  err:
815     return rc;
816 }
817 
cleanup(struct xc_sr_context * ctx)818 static void cleanup(struct xc_sr_context *ctx)
819 {
820     xc_interface *xch = ctx->xch;
821     DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
822                                     &ctx->save.dirty_bitmap_hbuf);
823 
824 
825     xc_shadow_control(xch, ctx->domid, XEN_DOMCTL_SHADOW_OP_OFF,
826                       NULL, 0, NULL, 0, NULL);
827 
828     if ( ctx->save.ops.cleanup(ctx) )
829         PERROR("Failed to clean up");
830 
831     xc_hypercall_buffer_free_pages(xch, dirty_bitmap,
832                                    NRPAGES(bitmap_size(ctx->save.p2m_size)));
833     free(ctx->save.deferred_pages);
834     free(ctx->save.batch_pfns);
835 }
836 
837 /*
838  * Save a domain.
839  */
save(struct xc_sr_context * ctx,uint16_t guest_type)840 static int save(struct xc_sr_context *ctx, uint16_t guest_type)
841 {
842     xc_interface *xch = ctx->xch;
843     int rc, saved_rc = 0, saved_errno = 0;
844 
845     IPRINTF("Saving domain %d, type %s",
846             ctx->domid, dhdr_type_to_str(guest_type));
847 
848     rc = setup(ctx);
849     if ( rc )
850         goto err;
851 
852     xc_report_progress_single(xch, "Start of stream");
853 
854     rc = write_headers(ctx, guest_type);
855     if ( rc )
856         goto err;
857 
858     rc = ctx->save.ops.start_of_stream(ctx);
859     if ( rc )
860         goto err;
861 
862     do {
863         rc = ctx->save.ops.start_of_checkpoint(ctx);
864         if ( rc )
865             goto err;
866 
867         rc = ctx->save.ops.check_vm_state(ctx);
868         if ( rc )
869             goto err;
870 
871         if ( ctx->save.live )
872             rc = send_domain_memory_live(ctx);
873         else if ( ctx->save.checkpointed != XC_MIG_STREAM_NONE )
874             rc = send_domain_memory_checkpointed(ctx);
875         else
876             rc = send_domain_memory_nonlive(ctx);
877 
878         if ( rc )
879             goto err;
880 
881         if ( !ctx->dominfo.shutdown ||
882              (ctx->dominfo.shutdown_reason != SHUTDOWN_suspend) )
883         {
884             ERROR("Domain has not been suspended");
885             rc = -1;
886             goto err;
887         }
888 
889         rc = ctx->save.ops.end_of_checkpoint(ctx);
890         if ( rc )
891             goto err;
892 
893         if ( ctx->save.checkpointed != XC_MIG_STREAM_NONE )
894         {
895             /*
896              * We have now completed the initial live portion of the checkpoint
897              * process. Therefore switch into periodically sending synchronous
898              * batches of pages.
899              */
900             ctx->save.live = false;
901 
902             rc = write_checkpoint_record(ctx);
903             if ( rc )
904                 goto err;
905 
906             if ( ctx->save.checkpointed == XC_MIG_STREAM_COLO )
907             {
908                 rc = ctx->save.callbacks->checkpoint(ctx->save.callbacks->data);
909                 if ( !rc )
910                 {
911                     rc = -1;
912                     goto err;
913                 }
914             }
915 
916             rc = ctx->save.callbacks->postcopy(ctx->save.callbacks->data);
917             if ( rc <= 0 )
918                 goto err;
919 
920             if ( ctx->save.checkpointed == XC_MIG_STREAM_COLO )
921             {
922                 rc = ctx->save.callbacks->wait_checkpoint(
923                     ctx->save.callbacks->data);
924                 if ( rc <= 0 )
925                     goto err;
926             }
927             else if ( ctx->save.checkpointed == XC_MIG_STREAM_REMUS )
928             {
929                 rc = ctx->save.callbacks->checkpoint(ctx->save.callbacks->data);
930                 if ( rc <= 0 )
931                     goto err;
932             }
933             else
934             {
935                 ERROR("Unknown checkpointed stream");
936                 rc = -1;
937                 goto err;
938             }
939         }
940     } while ( ctx->save.checkpointed != XC_MIG_STREAM_NONE );
941 
942     xc_report_progress_single(xch, "End of stream");
943 
944     rc = write_end_record(ctx);
945     if ( rc )
946         goto err;
947 
948     xc_report_progress_single(xch, "Complete");
949     goto done;
950 
951  err:
952     saved_errno = errno;
953     saved_rc = rc;
954     PERROR("Save failed");
955 
956  done:
957     cleanup(ctx);
958 
959     if ( saved_rc )
960     {
961         rc = saved_rc;
962         errno = saved_errno;
963     }
964 
965     return rc;
966 };
967 
xc_domain_save(xc_interface * xch,int io_fd,uint32_t dom,uint32_t flags,struct save_callbacks * callbacks,int hvm,xc_migration_stream_t stream_type,int recv_fd)968 int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom,
969                    uint32_t flags, struct save_callbacks* callbacks,
970                    int hvm, xc_migration_stream_t stream_type, int recv_fd)
971 {
972     struct xc_sr_context ctx =
973         {
974             .xch = xch,
975             .fd = io_fd,
976         };
977 
978     /* GCC 4.4 (of CentOS 6.x vintage) can' t initialise anonymous unions. */
979     ctx.save.callbacks = callbacks;
980     ctx.save.live  = !!(flags & XCFLAGS_LIVE);
981     ctx.save.debug = !!(flags & XCFLAGS_DEBUG);
982     ctx.save.checkpointed = stream_type;
983     ctx.save.recv_fd = recv_fd;
984 
985     /* If altering migration_stream update this assert too. */
986     assert(stream_type == XC_MIG_STREAM_NONE ||
987            stream_type == XC_MIG_STREAM_REMUS ||
988            stream_type == XC_MIG_STREAM_COLO);
989 
990     /* Sanity checks for callbacks. */
991     if ( hvm )
992         assert(callbacks->switch_qemu_logdirty);
993     if ( ctx.save.checkpointed )
994         assert(callbacks->checkpoint && callbacks->postcopy);
995     if ( ctx.save.checkpointed == XC_MIG_STREAM_COLO )
996         assert(callbacks->wait_checkpoint);
997 
998     DPRINTF("fd %d, dom %u, flags %u, hvm %d", io_fd, dom, flags, hvm);
999 
1000     if ( xc_domain_getinfo(xch, dom, 1, &ctx.dominfo) != 1 )
1001     {
1002         PERROR("Failed to get domain info");
1003         return -1;
1004     }
1005 
1006     if ( ctx.dominfo.domid != dom )
1007     {
1008         ERROR("Domain %u does not exist", dom);
1009         return -1;
1010     }
1011 
1012     ctx.domid = dom;
1013 
1014     if ( ctx.dominfo.hvm )
1015     {
1016         ctx.save.ops = save_ops_x86_hvm;
1017         return save(&ctx, DHDR_TYPE_X86_HVM);
1018     }
1019     else
1020     {
1021         ctx.save.ops = save_ops_x86_pv;
1022         return save(&ctx, DHDR_TYPE_X86_PV);
1023     }
1024 }
1025 
1026 /*
1027  * Local variables:
1028  * mode: C
1029  * c-file-style: "BSD"
1030  * c-basic-offset: 4
1031  * tab-width: 4
1032  * indent-tabs-mode: nil
1033  * End:
1034  */
1035