1 #include <arpa/inet.h>
2 
3 #include <assert.h>
4 
5 #include "xc_sr_common.h"
6 
7 /*
8  * Read and validate the Image and Domain headers.
9  */
read_headers(struct xc_sr_context * ctx)10 static int read_headers(struct xc_sr_context *ctx)
11 {
12     xc_interface *xch = ctx->xch;
13     struct xc_sr_ihdr ihdr;
14     struct xc_sr_dhdr dhdr;
15 
16     if ( read_exact(ctx->fd, &ihdr, sizeof(ihdr)) )
17     {
18         PERROR("Failed to read Image Header from stream");
19         return -1;
20     }
21 
22     ihdr.id      = ntohl(ihdr.id);
23     ihdr.version = ntohl(ihdr.version);
24     ihdr.options = ntohs(ihdr.options);
25 
26     if ( ihdr.marker != IHDR_MARKER )
27     {
28         ERROR("Invalid marker: Got 0x%016"PRIx64, ihdr.marker);
29         return -1;
30     }
31     else if ( ihdr.id != IHDR_ID )
32     {
33         ERROR("Invalid ID: Expected 0x%08x, Got 0x%08x", IHDR_ID, ihdr.id);
34         return -1;
35     }
36     else if ( ihdr.version != IHDR_VERSION )
37     {
38         ERROR("Invalid Version: Expected %d, Got %d",
39               ihdr.version, IHDR_VERSION);
40         return -1;
41     }
42     else if ( ihdr.options & IHDR_OPT_BIG_ENDIAN )
43     {
44         ERROR("Unable to handle big endian streams");
45         return -1;
46     }
47 
48     ctx->restore.format_version = ihdr.version;
49 
50     if ( read_exact(ctx->fd, &dhdr, sizeof(dhdr)) )
51     {
52         PERROR("Failed to read Domain Header from stream");
53         return -1;
54     }
55 
56     ctx->restore.guest_type = dhdr.type;
57     ctx->restore.guest_page_size = (1U << dhdr.page_shift);
58 
59     if ( dhdr.xen_major == 0 )
60     {
61         IPRINTF("Found %s domain, converted from legacy stream format",
62                 dhdr_type_to_str(dhdr.type));
63         DPRINTF("  Legacy conversion script version %u", dhdr.xen_minor);
64     }
65     else
66         IPRINTF("Found %s domain from Xen %u.%u",
67                 dhdr_type_to_str(dhdr.type), dhdr.xen_major, dhdr.xen_minor);
68     return 0;
69 }
70 
71 /*
72  * Is a pfn populated?
73  */
pfn_is_populated(const struct xc_sr_context * ctx,xen_pfn_t pfn)74 static bool pfn_is_populated(const struct xc_sr_context *ctx, xen_pfn_t pfn)
75 {
76     if ( pfn > ctx->restore.max_populated_pfn )
77         return false;
78     return test_bit(pfn, ctx->restore.populated_pfns);
79 }
80 
81 /*
82  * Set a pfn as populated, expanding the tracking structures if needed. To
83  * avoid realloc()ing too excessively, the size increased to the nearest power
84  * of two large enough to contain the required pfn.
85  */
pfn_set_populated(struct xc_sr_context * ctx,xen_pfn_t pfn)86 static int pfn_set_populated(struct xc_sr_context *ctx, xen_pfn_t pfn)
87 {
88     xc_interface *xch = ctx->xch;
89 
90     if ( pfn > ctx->restore.max_populated_pfn )
91     {
92         xen_pfn_t new_max;
93         size_t old_sz, new_sz;
94         unsigned long *p;
95 
96         /* Round up to the nearest power of two larger than pfn, less 1. */
97         new_max = pfn;
98         new_max |= new_max >> 1;
99         new_max |= new_max >> 2;
100         new_max |= new_max >> 4;
101         new_max |= new_max >> 8;
102         new_max |= new_max >> 16;
103 #ifdef __x86_64__
104         new_max |= new_max >> 32;
105 #endif
106 
107         old_sz = bitmap_size(ctx->restore.max_populated_pfn + 1);
108         new_sz = bitmap_size(new_max + 1);
109         p = realloc(ctx->restore.populated_pfns, new_sz);
110         if ( !p )
111         {
112             ERROR("Failed to realloc populated bitmap");
113             errno = ENOMEM;
114             return -1;
115         }
116 
117         memset((uint8_t *)p + old_sz, 0x00, new_sz - old_sz);
118 
119         ctx->restore.populated_pfns    = p;
120         ctx->restore.max_populated_pfn = new_max;
121     }
122 
123     assert(!test_bit(pfn, ctx->restore.populated_pfns));
124     set_bit(pfn, ctx->restore.populated_pfns);
125 
126     return 0;
127 }
128 
129 /*
130  * Given a set of pfns, obtain memory from Xen to fill the physmap for the
131  * unpopulated subset.  If types is NULL, no page type checking is performed
132  * and all unpopulated pfns are populated.
133  */
populate_pfns(struct xc_sr_context * ctx,unsigned count,const xen_pfn_t * original_pfns,const uint32_t * types)134 int populate_pfns(struct xc_sr_context *ctx, unsigned count,
135                   const xen_pfn_t *original_pfns, const uint32_t *types)
136 {
137     xc_interface *xch = ctx->xch;
138     xen_pfn_t *mfns = malloc(count * sizeof(*mfns)),
139         *pfns = malloc(count * sizeof(*pfns));
140     unsigned i, nr_pfns = 0;
141     int rc = -1;
142 
143     if ( !mfns || !pfns )
144     {
145         ERROR("Failed to allocate %zu bytes for populating the physmap",
146               2 * count * sizeof(*mfns));
147         goto err;
148     }
149 
150     for ( i = 0; i < count; ++i )
151     {
152         if ( (!types || (types &&
153                          (types[i] != XEN_DOMCTL_PFINFO_XTAB &&
154                           types[i] != XEN_DOMCTL_PFINFO_BROKEN))) &&
155              !pfn_is_populated(ctx, original_pfns[i]) )
156         {
157             rc = pfn_set_populated(ctx, original_pfns[i]);
158             if ( rc )
159                 goto err;
160             pfns[nr_pfns] = mfns[nr_pfns] = original_pfns[i];
161             ++nr_pfns;
162         }
163     }
164 
165     if ( nr_pfns )
166     {
167         rc = xc_domain_populate_physmap_exact(
168             xch, ctx->domid, nr_pfns, 0, 0, mfns);
169         if ( rc )
170         {
171             PERROR("Failed to populate physmap");
172             goto err;
173         }
174 
175         for ( i = 0; i < nr_pfns; ++i )
176         {
177             if ( mfns[i] == INVALID_MFN )
178             {
179                 ERROR("Populate physmap failed for pfn %u", i);
180                 rc = -1;
181                 goto err;
182             }
183 
184             ctx->restore.ops.set_gfn(ctx, pfns[i], mfns[i]);
185         }
186     }
187 
188     rc = 0;
189 
190  err:
191     free(pfns);
192     free(mfns);
193 
194     return rc;
195 }
196 
197 /*
198  * Given a list of pfns, their types, and a block of page data from the
199  * stream, populate and record their types, map the relevant subset and copy
200  * the data into the guest.
201  */
process_page_data(struct xc_sr_context * ctx,unsigned count,xen_pfn_t * pfns,uint32_t * types,void * page_data)202 static int process_page_data(struct xc_sr_context *ctx, unsigned count,
203                              xen_pfn_t *pfns, uint32_t *types, void *page_data)
204 {
205     xc_interface *xch = ctx->xch;
206     xen_pfn_t *mfns = malloc(count * sizeof(*mfns));
207     int *map_errs = malloc(count * sizeof(*map_errs));
208     int rc;
209     void *mapping = NULL, *guest_page = NULL;
210     unsigned i,    /* i indexes the pfns from the record. */
211         j,         /* j indexes the subset of pfns we decide to map. */
212         nr_pages = 0;
213 
214     if ( !mfns || !map_errs )
215     {
216         rc = -1;
217         ERROR("Failed to allocate %zu bytes to process page data",
218               count * (sizeof(*mfns) + sizeof(*map_errs)));
219         goto err;
220     }
221 
222     rc = populate_pfns(ctx, count, pfns, types);
223     if ( rc )
224     {
225         ERROR("Failed to populate pfns for batch of %u pages", count);
226         goto err;
227     }
228 
229     for ( i = 0; i < count; ++i )
230     {
231         ctx->restore.ops.set_page_type(ctx, pfns[i], types[i]);
232 
233         switch ( types[i] )
234         {
235         case XEN_DOMCTL_PFINFO_NOTAB:
236 
237         case XEN_DOMCTL_PFINFO_L1TAB:
238         case XEN_DOMCTL_PFINFO_L1TAB | XEN_DOMCTL_PFINFO_LPINTAB:
239 
240         case XEN_DOMCTL_PFINFO_L2TAB:
241         case XEN_DOMCTL_PFINFO_L2TAB | XEN_DOMCTL_PFINFO_LPINTAB:
242 
243         case XEN_DOMCTL_PFINFO_L3TAB:
244         case XEN_DOMCTL_PFINFO_L3TAB | XEN_DOMCTL_PFINFO_LPINTAB:
245 
246         case XEN_DOMCTL_PFINFO_L4TAB:
247         case XEN_DOMCTL_PFINFO_L4TAB | XEN_DOMCTL_PFINFO_LPINTAB:
248 
249             mfns[nr_pages++] = ctx->restore.ops.pfn_to_gfn(ctx, pfns[i]);
250             break;
251         }
252     }
253 
254     /* Nothing to do? */
255     if ( nr_pages == 0 )
256         goto done;
257 
258     mapping = guest_page = xenforeignmemory_map(xch->fmem,
259         ctx->domid, PROT_READ | PROT_WRITE,
260         nr_pages, mfns, map_errs);
261     if ( !mapping )
262     {
263         rc = -1;
264         PERROR("Unable to map %u mfns for %u pages of data",
265                nr_pages, count);
266         goto err;
267     }
268 
269     for ( i = 0, j = 0; i < count; ++i )
270     {
271         switch ( types[i] )
272         {
273         case XEN_DOMCTL_PFINFO_XTAB:
274         case XEN_DOMCTL_PFINFO_BROKEN:
275         case XEN_DOMCTL_PFINFO_XALLOC:
276             /* No page data to deal with. */
277             continue;
278         }
279 
280         if ( map_errs[j] )
281         {
282             rc = -1;
283             ERROR("Mapping pfn %#"PRIpfn" (mfn %#"PRIpfn", type %#"PRIx32") failed with %d",
284                   pfns[i], mfns[j], types[i], map_errs[j]);
285             goto err;
286         }
287 
288         /* Undo page normalisation done by the saver. */
289         rc = ctx->restore.ops.localise_page(ctx, types[i], page_data);
290         if ( rc )
291         {
292             ERROR("Failed to localise pfn %#"PRIpfn" (type %#"PRIx32")",
293                   pfns[i], types[i] >> XEN_DOMCTL_PFINFO_LTAB_SHIFT);
294             goto err;
295         }
296 
297         if ( ctx->restore.verify )
298         {
299             /* Verify mode - compare incoming data to what we already have. */
300             if ( memcmp(guest_page, page_data, PAGE_SIZE) )
301                 ERROR("verify pfn %#"PRIpfn" failed (type %#"PRIx32")",
302                       pfns[i], types[i] >> XEN_DOMCTL_PFINFO_LTAB_SHIFT);
303         }
304         else
305         {
306             /* Regular mode - copy incoming data into place. */
307             memcpy(guest_page, page_data, PAGE_SIZE);
308         }
309 
310         ++j;
311         guest_page += PAGE_SIZE;
312         page_data += PAGE_SIZE;
313     }
314 
315  done:
316     rc = 0;
317 
318  err:
319     if ( mapping )
320         xenforeignmemory_unmap(xch->fmem, mapping, nr_pages);
321 
322     free(map_errs);
323     free(mfns);
324 
325     return rc;
326 }
327 
328 /*
329  * Validate a PAGE_DATA record from the stream, and pass the results to
330  * process_page_data() to actually perform the legwork.
331  */
handle_page_data(struct xc_sr_context * ctx,struct xc_sr_record * rec)332 static int handle_page_data(struct xc_sr_context *ctx, struct xc_sr_record *rec)
333 {
334     xc_interface *xch = ctx->xch;
335     struct xc_sr_rec_page_data_header *pages = rec->data;
336     unsigned i, pages_of_data = 0;
337     int rc = -1;
338 
339     xen_pfn_t *pfns = NULL, pfn;
340     uint32_t *types = NULL, type;
341 
342     if ( rec->length < sizeof(*pages) )
343     {
344         ERROR("PAGE_DATA record truncated: length %u, min %zu",
345               rec->length, sizeof(*pages));
346         goto err;
347     }
348     else if ( pages->count < 1 )
349     {
350         ERROR("Expected at least 1 pfn in PAGE_DATA record");
351         goto err;
352     }
353     else if ( rec->length < sizeof(*pages) + (pages->count * sizeof(uint64_t)) )
354     {
355         ERROR("PAGE_DATA record (length %u) too short to contain %u"
356               " pfns worth of information", rec->length, pages->count);
357         goto err;
358     }
359 
360     pfns = malloc(pages->count * sizeof(*pfns));
361     types = malloc(pages->count * sizeof(*types));
362     if ( !pfns || !types )
363     {
364         ERROR("Unable to allocate enough memory for %u pfns",
365               pages->count);
366         goto err;
367     }
368 
369     for ( i = 0; i < pages->count; ++i )
370     {
371         pfn = pages->pfn[i] & PAGE_DATA_PFN_MASK;
372         if ( !ctx->restore.ops.pfn_is_valid(ctx, pfn) )
373         {
374             ERROR("pfn %#"PRIpfn" (index %u) outside domain maximum", pfn, i);
375             goto err;
376         }
377 
378         type = (pages->pfn[i] & PAGE_DATA_TYPE_MASK) >> 32;
379         if ( ((type >> XEN_DOMCTL_PFINFO_LTAB_SHIFT) >= 5) &&
380              ((type >> XEN_DOMCTL_PFINFO_LTAB_SHIFT) <= 8) )
381         {
382             ERROR("Invalid type %#"PRIx32" for pfn %#"PRIpfn" (index %u)",
383                   type, pfn, i);
384             goto err;
385         }
386         else if ( type < XEN_DOMCTL_PFINFO_BROKEN )
387             /* NOTAB and all L1 through L4 tables (including pinned) should
388              * have a page worth of data in the record. */
389             pages_of_data++;
390 
391         pfns[i] = pfn;
392         types[i] = type;
393     }
394 
395     if ( rec->length != (sizeof(*pages) +
396                          (sizeof(uint64_t) * pages->count) +
397                          (PAGE_SIZE * pages_of_data)) )
398     {
399         ERROR("PAGE_DATA record wrong size: length %u, expected "
400               "%zu + %zu + %lu", rec->length, sizeof(*pages),
401               (sizeof(uint64_t) * pages->count), (PAGE_SIZE * pages_of_data));
402         goto err;
403     }
404 
405     rc = process_page_data(ctx, pages->count, pfns, types,
406                            &pages->pfn[pages->count]);
407  err:
408     free(types);
409     free(pfns);
410 
411     return rc;
412 }
413 
414 /*
415  * Send checkpoint dirty pfn list to primary.
416  */
send_checkpoint_dirty_pfn_list(struct xc_sr_context * ctx)417 static int send_checkpoint_dirty_pfn_list(struct xc_sr_context *ctx)
418 {
419     xc_interface *xch = ctx->xch;
420     int rc = -1;
421     unsigned count, written;
422     uint64_t i, *pfns = NULL;
423     struct iovec *iov = NULL;
424     xc_shadow_op_stats_t stats = { 0, ctx->restore.p2m_size };
425     struct xc_sr_record rec =
426     {
427         .type = REC_TYPE_CHECKPOINT_DIRTY_PFN_LIST,
428     };
429     DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
430                                     &ctx->restore.dirty_bitmap_hbuf);
431 
432     if ( xc_shadow_control(
433              xch, ctx->domid, XEN_DOMCTL_SHADOW_OP_CLEAN,
434              HYPERCALL_BUFFER(dirty_bitmap), ctx->restore.p2m_size,
435              NULL, 0, &stats) != ctx->restore.p2m_size )
436     {
437         PERROR("Failed to retrieve logdirty bitmap");
438         goto err;
439     }
440 
441     for ( i = 0, count = 0; i < ctx->restore.p2m_size; i++ )
442     {
443         if ( test_bit(i, dirty_bitmap) )
444             count++;
445     }
446 
447 
448     pfns = malloc(count * sizeof(*pfns));
449     if ( !pfns )
450     {
451         ERROR("Unable to allocate %zu bytes of memory for dirty pfn list",
452               count * sizeof(*pfns));
453         goto err;
454     }
455 
456     for ( i = 0, written = 0; i < ctx->restore.p2m_size; ++i )
457     {
458         if ( !test_bit(i, dirty_bitmap) )
459             continue;
460 
461         if ( written > count )
462         {
463             ERROR("Dirty pfn list exceed");
464             goto err;
465         }
466 
467         pfns[written++] = i;
468     }
469 
470     /* iovec[] for writev(). */
471     iov = malloc(3 * sizeof(*iov));
472     if ( !iov )
473     {
474         ERROR("Unable to allocate memory for sending dirty bitmap");
475         goto err;
476     }
477 
478     rec.length = count * sizeof(*pfns);
479 
480     iov[0].iov_base = &rec.type;
481     iov[0].iov_len = sizeof(rec.type);
482 
483     iov[1].iov_base = &rec.length;
484     iov[1].iov_len = sizeof(rec.length);
485 
486     iov[2].iov_base = pfns;
487     iov[2].iov_len = count * sizeof(*pfns);
488 
489     if ( writev_exact(ctx->restore.send_back_fd, iov, 3) )
490     {
491         PERROR("Failed to write dirty bitmap to stream");
492         goto err;
493     }
494 
495     rc = 0;
496  err:
497     free(pfns);
498     free(iov);
499     return rc;
500 }
501 
502 static int process_record(struct xc_sr_context *ctx, struct xc_sr_record *rec);
handle_checkpoint(struct xc_sr_context * ctx)503 static int handle_checkpoint(struct xc_sr_context *ctx)
504 {
505     xc_interface *xch = ctx->xch;
506     int rc = 0, ret;
507     unsigned i;
508 
509     if ( !ctx->restore.checkpointed )
510     {
511         ERROR("Found checkpoint in non-checkpointed stream");
512         rc = -1;
513         goto err;
514     }
515 
516     ret = ctx->restore.callbacks->checkpoint(ctx->restore.callbacks->data);
517     switch ( ret )
518     {
519     case XGR_CHECKPOINT_SUCCESS:
520         break;
521 
522     case XGR_CHECKPOINT_FAILOVER:
523         if ( ctx->restore.buffer_all_records )
524             rc = BROKEN_CHANNEL;
525         else
526             /* We don't have a consistent state */
527             rc = -1;
528         goto err;
529 
530     default: /* Other fatal error */
531         rc = -1;
532         goto err;
533     }
534 
535     if ( ctx->restore.buffer_all_records )
536     {
537         IPRINTF("All records buffered");
538 
539         for ( i = 0; i < ctx->restore.buffered_rec_num; i++ )
540         {
541             rc = process_record(ctx, &ctx->restore.buffered_records[i]);
542             if ( rc )
543                 goto err;
544         }
545         ctx->restore.buffered_rec_num = 0;
546         IPRINTF("All records processed");
547     }
548     else
549         ctx->restore.buffer_all_records = true;
550 
551     if ( ctx->restore.checkpointed == XC_MIG_STREAM_COLO )
552     {
553 #define HANDLE_CALLBACK_RETURN_VALUE(ret)                   \
554     do {                                                    \
555         if ( ret == 1 )                                     \
556             rc = 0; /* Success */                           \
557         else                                                \
558         {                                                   \
559             if ( ret == 2 )                                 \
560                 rc = BROKEN_CHANNEL;                        \
561             else                                            \
562                 rc = -1; /* Some unspecified error */       \
563             goto err;                                       \
564         }                                                   \
565     } while (0)
566 
567         /* COLO */
568 
569         /* We need to resume guest */
570         rc = ctx->restore.ops.stream_complete(ctx);
571         if ( rc )
572             goto err;
573 
574         ctx->restore.callbacks->restore_results(ctx->restore.xenstore_gfn,
575                                                 ctx->restore.console_gfn,
576                                                 ctx->restore.callbacks->data);
577 
578         /* Resume secondary vm */
579         ret = ctx->restore.callbacks->postcopy(ctx->restore.callbacks->data);
580         HANDLE_CALLBACK_RETURN_VALUE(ret);
581 
582         /* Wait for a new checkpoint */
583         ret = ctx->restore.callbacks->wait_checkpoint(
584                                                 ctx->restore.callbacks->data);
585         HANDLE_CALLBACK_RETURN_VALUE(ret);
586 
587         /* suspend secondary vm */
588         ret = ctx->restore.callbacks->suspend(ctx->restore.callbacks->data);
589         HANDLE_CALLBACK_RETURN_VALUE(ret);
590 
591 #undef HANDLE_CALLBACK_RETURN_VALUE
592 
593         rc = send_checkpoint_dirty_pfn_list(ctx);
594         if ( rc )
595             goto err;
596     }
597 
598  err:
599     return rc;
600 }
601 
buffer_record(struct xc_sr_context * ctx,struct xc_sr_record * rec)602 static int buffer_record(struct xc_sr_context *ctx, struct xc_sr_record *rec)
603 {
604     xc_interface *xch = ctx->xch;
605     unsigned new_alloc_num;
606     struct xc_sr_record *p;
607 
608     if ( ctx->restore.buffered_rec_num >= ctx->restore.allocated_rec_num )
609     {
610         new_alloc_num = ctx->restore.allocated_rec_num + DEFAULT_BUF_RECORDS;
611         p = realloc(ctx->restore.buffered_records,
612                     new_alloc_num * sizeof(struct xc_sr_record));
613         if ( !p )
614         {
615             ERROR("Failed to realloc memory for buffered records");
616             return -1;
617         }
618 
619         ctx->restore.buffered_records = p;
620         ctx->restore.allocated_rec_num = new_alloc_num;
621     }
622 
623     memcpy(&ctx->restore.buffered_records[ctx->restore.buffered_rec_num++],
624            rec, sizeof(*rec));
625 
626     return 0;
627 }
628 
process_record(struct xc_sr_context * ctx,struct xc_sr_record * rec)629 static int process_record(struct xc_sr_context *ctx, struct xc_sr_record *rec)
630 {
631     xc_interface *xch = ctx->xch;
632     int rc = 0;
633 
634     switch ( rec->type )
635     {
636     case REC_TYPE_END:
637         break;
638 
639     case REC_TYPE_PAGE_DATA:
640         rc = handle_page_data(ctx, rec);
641         break;
642 
643     case REC_TYPE_VERIFY:
644         DPRINTF("Verify mode enabled");
645         ctx->restore.verify = true;
646         break;
647 
648     case REC_TYPE_CHECKPOINT:
649         rc = handle_checkpoint(ctx);
650         break;
651 
652     default:
653         rc = ctx->restore.ops.process_record(ctx, rec);
654         break;
655     }
656 
657     free(rec->data);
658     rec->data = NULL;
659 
660     return rc;
661 }
662 
setup(struct xc_sr_context * ctx)663 static int setup(struct xc_sr_context *ctx)
664 {
665     xc_interface *xch = ctx->xch;
666     int rc;
667     DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
668                                     &ctx->restore.dirty_bitmap_hbuf);
669 
670     if ( ctx->restore.checkpointed == XC_MIG_STREAM_COLO )
671     {
672         dirty_bitmap = xc_hypercall_buffer_alloc_pages(xch, dirty_bitmap,
673                                 NRPAGES(bitmap_size(ctx->restore.p2m_size)));
674 
675         if ( !dirty_bitmap )
676         {
677             ERROR("Unable to allocate memory for dirty bitmap");
678             rc = -1;
679             goto err;
680         }
681     }
682 
683     rc = ctx->restore.ops.setup(ctx);
684     if ( rc )
685         goto err;
686 
687     ctx->restore.max_populated_pfn = (32 * 1024 / 4) - 1;
688     ctx->restore.populated_pfns = bitmap_alloc(
689         ctx->restore.max_populated_pfn + 1);
690     if ( !ctx->restore.populated_pfns )
691     {
692         ERROR("Unable to allocate memory for populated_pfns bitmap");
693         rc = -1;
694         goto err;
695     }
696 
697     ctx->restore.buffered_records = malloc(
698         DEFAULT_BUF_RECORDS * sizeof(struct xc_sr_record));
699     if ( !ctx->restore.buffered_records )
700     {
701         ERROR("Unable to allocate memory for buffered records");
702         rc = -1;
703         goto err;
704     }
705     ctx->restore.allocated_rec_num = DEFAULT_BUF_RECORDS;
706 
707  err:
708     return rc;
709 }
710 
cleanup(struct xc_sr_context * ctx)711 static void cleanup(struct xc_sr_context *ctx)
712 {
713     xc_interface *xch = ctx->xch;
714     unsigned i;
715     DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
716                                     &ctx->restore.dirty_bitmap_hbuf);
717 
718     for ( i = 0; i < ctx->restore.buffered_rec_num; i++ )
719         free(ctx->restore.buffered_records[i].data);
720 
721     if ( ctx->restore.checkpointed == XC_MIG_STREAM_COLO )
722         xc_hypercall_buffer_free_pages(xch, dirty_bitmap,
723                                    NRPAGES(bitmap_size(ctx->restore.p2m_size)));
724     free(ctx->restore.buffered_records);
725     free(ctx->restore.populated_pfns);
726     if ( ctx->restore.ops.cleanup(ctx) )
727         PERROR("Failed to clean up");
728 }
729 
730 /*
731  * Restore a domain.
732  */
restore(struct xc_sr_context * ctx)733 static int restore(struct xc_sr_context *ctx)
734 {
735     xc_interface *xch = ctx->xch;
736     struct xc_sr_record rec;
737     int rc, saved_rc = 0, saved_errno = 0;
738 
739     IPRINTF("Restoring domain");
740 
741     rc = setup(ctx);
742     if ( rc )
743         goto err;
744 
745     do
746     {
747         rc = read_record(ctx, ctx->fd, &rec);
748         if ( rc )
749         {
750             if ( ctx->restore.buffer_all_records )
751                 goto remus_failover;
752             else
753                 goto err;
754         }
755 
756         if ( ctx->restore.buffer_all_records &&
757              rec.type != REC_TYPE_END &&
758              rec.type != REC_TYPE_CHECKPOINT )
759         {
760             rc = buffer_record(ctx, &rec);
761             if ( rc )
762                 goto err;
763         }
764         else
765         {
766             rc = process_record(ctx, &rec);
767             if ( rc == RECORD_NOT_PROCESSED )
768             {
769                 if ( rec.type & REC_TYPE_OPTIONAL )
770                     DPRINTF("Ignoring optional record %#x (%s)",
771                             rec.type, rec_type_to_str(rec.type));
772                 else
773                 {
774                     ERROR("Mandatory record %#x (%s) not handled",
775                           rec.type, rec_type_to_str(rec.type));
776                     rc = -1;
777                     goto err;
778                 }
779             }
780             else if ( rc == BROKEN_CHANNEL )
781                 goto remus_failover;
782             else if ( rc )
783                 goto err;
784         }
785 
786     } while ( rec.type != REC_TYPE_END );
787 
788  remus_failover:
789 
790     if ( ctx->restore.checkpointed == XC_MIG_STREAM_COLO )
791     {
792         /* With COLO, we have already called stream_complete */
793         rc = 0;
794         IPRINTF("COLO Failover");
795         goto done;
796     }
797 
798     /*
799      * With Remus, if we reach here, there must be some error on primary,
800      * failover from the last checkpoint state.
801      */
802     rc = ctx->restore.ops.stream_complete(ctx);
803     if ( rc )
804         goto err;
805 
806     IPRINTF("Restore successful");
807     goto done;
808 
809  err:
810     saved_errno = errno;
811     saved_rc = rc;
812     PERROR("Restore failed");
813 
814  done:
815     cleanup(ctx);
816 
817     if ( saved_rc )
818     {
819         rc = saved_rc;
820         errno = saved_errno;
821     }
822 
823     return rc;
824 }
825 
xc_domain_restore(xc_interface * xch,int io_fd,uint32_t dom,unsigned int store_evtchn,unsigned long * store_mfn,uint32_t store_domid,unsigned int console_evtchn,unsigned long * console_gfn,uint32_t console_domid,unsigned int hvm,unsigned int pae,xc_migration_stream_t stream_type,struct restore_callbacks * callbacks,int send_back_fd)826 int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom,
827                       unsigned int store_evtchn, unsigned long *store_mfn,
828                       uint32_t store_domid, unsigned int console_evtchn,
829                       unsigned long *console_gfn, uint32_t console_domid,
830                       unsigned int hvm, unsigned int pae,
831                       xc_migration_stream_t stream_type,
832                       struct restore_callbacks *callbacks, int send_back_fd)
833 {
834     xen_pfn_t nr_pfns;
835     struct xc_sr_context ctx =
836         {
837             .xch = xch,
838             .fd = io_fd,
839         };
840 
841     /* GCC 4.4 (of CentOS 6.x vintage) can' t initialise anonymous unions. */
842     ctx.restore.console_evtchn = console_evtchn;
843     ctx.restore.console_domid = console_domid;
844     ctx.restore.xenstore_evtchn = store_evtchn;
845     ctx.restore.xenstore_domid = store_domid;
846     ctx.restore.checkpointed = stream_type;
847     ctx.restore.callbacks = callbacks;
848     ctx.restore.send_back_fd = send_back_fd;
849 
850     /* Sanity checks for callbacks. */
851     if ( stream_type )
852         assert(callbacks->checkpoint);
853 
854     if ( ctx.restore.checkpointed == XC_MIG_STREAM_COLO )
855     {
856         /* this is COLO restore */
857         assert(callbacks->suspend &&
858                callbacks->postcopy &&
859                callbacks->wait_checkpoint &&
860                callbacks->restore_results);
861     }
862 
863     DPRINTF("fd %d, dom %u, hvm %u, pae %u, stream_type %d",
864             io_fd, dom, hvm, pae, stream_type);
865 
866     if ( xc_domain_getinfo(xch, dom, 1, &ctx.dominfo) != 1 )
867     {
868         PERROR("Failed to get domain info");
869         return -1;
870     }
871 
872     if ( ctx.dominfo.domid != dom )
873     {
874         ERROR("Domain %u does not exist", dom);
875         return -1;
876     }
877 
878     ctx.domid = dom;
879 
880     if ( read_headers(&ctx) )
881         return -1;
882 
883     if ( xc_domain_nr_gpfns(xch, dom, &nr_pfns) < 0 )
884     {
885         PERROR("Unable to obtain the guest p2m size");
886         return -1;
887     }
888 
889     ctx.restore.p2m_size = nr_pfns;
890 
891     if ( ctx.dominfo.hvm )
892     {
893         ctx.restore.ops = restore_ops_x86_hvm;
894         if ( restore(&ctx) )
895             return -1;
896     }
897     else
898     {
899         ctx.restore.ops = restore_ops_x86_pv;
900         if ( restore(&ctx) )
901             return -1;
902     }
903 
904     IPRINTF("XenStore: mfn %#"PRIpfn", dom %d, evt %u",
905             ctx.restore.xenstore_gfn,
906             ctx.restore.xenstore_domid,
907             ctx.restore.xenstore_evtchn);
908 
909     IPRINTF("Console: mfn %#"PRIpfn", dom %d, evt %u",
910             ctx.restore.console_gfn,
911             ctx.restore.console_domid,
912             ctx.restore.console_evtchn);
913 
914     *console_gfn = ctx.restore.console_gfn;
915     *store_mfn = ctx.restore.xenstore_gfn;
916 
917     return 0;
918 }
919 
920 /*
921  * Local variables:
922  * mode: C
923  * c-file-style: "BSD"
924  * c-basic-offset: 4
925  * tab-width: 4
926  * indent-tabs-mode: nil
927  * End:
928  */
929