1 #include <arpa/inet.h>
2
3 #include <assert.h>
4
5 #include "xc_sr_common.h"
6
7 /*
8 * Read and validate the Image and Domain headers.
9 */
read_headers(struct xc_sr_context * ctx)10 static int read_headers(struct xc_sr_context *ctx)
11 {
12 xc_interface *xch = ctx->xch;
13 struct xc_sr_ihdr ihdr;
14 struct xc_sr_dhdr dhdr;
15
16 if ( read_exact(ctx->fd, &ihdr, sizeof(ihdr)) )
17 {
18 PERROR("Failed to read Image Header from stream");
19 return -1;
20 }
21
22 ihdr.id = ntohl(ihdr.id);
23 ihdr.version = ntohl(ihdr.version);
24 ihdr.options = ntohs(ihdr.options);
25
26 if ( ihdr.marker != IHDR_MARKER )
27 {
28 ERROR("Invalid marker: Got 0x%016"PRIx64, ihdr.marker);
29 return -1;
30 }
31 else if ( ihdr.id != IHDR_ID )
32 {
33 ERROR("Invalid ID: Expected 0x%08x, Got 0x%08x", IHDR_ID, ihdr.id);
34 return -1;
35 }
36 else if ( ihdr.version != IHDR_VERSION )
37 {
38 ERROR("Invalid Version: Expected %d, Got %d",
39 ihdr.version, IHDR_VERSION);
40 return -1;
41 }
42 else if ( ihdr.options & IHDR_OPT_BIG_ENDIAN )
43 {
44 ERROR("Unable to handle big endian streams");
45 return -1;
46 }
47
48 ctx->restore.format_version = ihdr.version;
49
50 if ( read_exact(ctx->fd, &dhdr, sizeof(dhdr)) )
51 {
52 PERROR("Failed to read Domain Header from stream");
53 return -1;
54 }
55
56 ctx->restore.guest_type = dhdr.type;
57 ctx->restore.guest_page_size = (1U << dhdr.page_shift);
58
59 if ( dhdr.xen_major == 0 )
60 {
61 IPRINTF("Found %s domain, converted from legacy stream format",
62 dhdr_type_to_str(dhdr.type));
63 DPRINTF(" Legacy conversion script version %u", dhdr.xen_minor);
64 }
65 else
66 IPRINTF("Found %s domain from Xen %u.%u",
67 dhdr_type_to_str(dhdr.type), dhdr.xen_major, dhdr.xen_minor);
68 return 0;
69 }
70
71 /*
72 * Is a pfn populated?
73 */
pfn_is_populated(const struct xc_sr_context * ctx,xen_pfn_t pfn)74 static bool pfn_is_populated(const struct xc_sr_context *ctx, xen_pfn_t pfn)
75 {
76 if ( pfn > ctx->restore.max_populated_pfn )
77 return false;
78 return test_bit(pfn, ctx->restore.populated_pfns);
79 }
80
81 /*
82 * Set a pfn as populated, expanding the tracking structures if needed. To
83 * avoid realloc()ing too excessively, the size increased to the nearest power
84 * of two large enough to contain the required pfn.
85 */
pfn_set_populated(struct xc_sr_context * ctx,xen_pfn_t pfn)86 static int pfn_set_populated(struct xc_sr_context *ctx, xen_pfn_t pfn)
87 {
88 xc_interface *xch = ctx->xch;
89
90 if ( pfn > ctx->restore.max_populated_pfn )
91 {
92 xen_pfn_t new_max;
93 size_t old_sz, new_sz;
94 unsigned long *p;
95
96 /* Round up to the nearest power of two larger than pfn, less 1. */
97 new_max = pfn;
98 new_max |= new_max >> 1;
99 new_max |= new_max >> 2;
100 new_max |= new_max >> 4;
101 new_max |= new_max >> 8;
102 new_max |= new_max >> 16;
103 #ifdef __x86_64__
104 new_max |= new_max >> 32;
105 #endif
106
107 old_sz = bitmap_size(ctx->restore.max_populated_pfn + 1);
108 new_sz = bitmap_size(new_max + 1);
109 p = realloc(ctx->restore.populated_pfns, new_sz);
110 if ( !p )
111 {
112 ERROR("Failed to realloc populated bitmap");
113 errno = ENOMEM;
114 return -1;
115 }
116
117 memset((uint8_t *)p + old_sz, 0x00, new_sz - old_sz);
118
119 ctx->restore.populated_pfns = p;
120 ctx->restore.max_populated_pfn = new_max;
121 }
122
123 assert(!test_bit(pfn, ctx->restore.populated_pfns));
124 set_bit(pfn, ctx->restore.populated_pfns);
125
126 return 0;
127 }
128
129 /*
130 * Given a set of pfns, obtain memory from Xen to fill the physmap for the
131 * unpopulated subset. If types is NULL, no page type checking is performed
132 * and all unpopulated pfns are populated.
133 */
populate_pfns(struct xc_sr_context * ctx,unsigned count,const xen_pfn_t * original_pfns,const uint32_t * types)134 int populate_pfns(struct xc_sr_context *ctx, unsigned count,
135 const xen_pfn_t *original_pfns, const uint32_t *types)
136 {
137 xc_interface *xch = ctx->xch;
138 xen_pfn_t *mfns = malloc(count * sizeof(*mfns)),
139 *pfns = malloc(count * sizeof(*pfns));
140 unsigned i, nr_pfns = 0;
141 int rc = -1;
142
143 if ( !mfns || !pfns )
144 {
145 ERROR("Failed to allocate %zu bytes for populating the physmap",
146 2 * count * sizeof(*mfns));
147 goto err;
148 }
149
150 for ( i = 0; i < count; ++i )
151 {
152 if ( (!types || (types &&
153 (types[i] != XEN_DOMCTL_PFINFO_XTAB &&
154 types[i] != XEN_DOMCTL_PFINFO_BROKEN))) &&
155 !pfn_is_populated(ctx, original_pfns[i]) )
156 {
157 rc = pfn_set_populated(ctx, original_pfns[i]);
158 if ( rc )
159 goto err;
160 pfns[nr_pfns] = mfns[nr_pfns] = original_pfns[i];
161 ++nr_pfns;
162 }
163 }
164
165 if ( nr_pfns )
166 {
167 rc = xc_domain_populate_physmap_exact(
168 xch, ctx->domid, nr_pfns, 0, 0, mfns);
169 if ( rc )
170 {
171 PERROR("Failed to populate physmap");
172 goto err;
173 }
174
175 for ( i = 0; i < nr_pfns; ++i )
176 {
177 if ( mfns[i] == INVALID_MFN )
178 {
179 ERROR("Populate physmap failed for pfn %u", i);
180 rc = -1;
181 goto err;
182 }
183
184 ctx->restore.ops.set_gfn(ctx, pfns[i], mfns[i]);
185 }
186 }
187
188 rc = 0;
189
190 err:
191 free(pfns);
192 free(mfns);
193
194 return rc;
195 }
196
197 /*
198 * Given a list of pfns, their types, and a block of page data from the
199 * stream, populate and record their types, map the relevant subset and copy
200 * the data into the guest.
201 */
process_page_data(struct xc_sr_context * ctx,unsigned count,xen_pfn_t * pfns,uint32_t * types,void * page_data)202 static int process_page_data(struct xc_sr_context *ctx, unsigned count,
203 xen_pfn_t *pfns, uint32_t *types, void *page_data)
204 {
205 xc_interface *xch = ctx->xch;
206 xen_pfn_t *mfns = malloc(count * sizeof(*mfns));
207 int *map_errs = malloc(count * sizeof(*map_errs));
208 int rc;
209 void *mapping = NULL, *guest_page = NULL;
210 unsigned i, /* i indexes the pfns from the record. */
211 j, /* j indexes the subset of pfns we decide to map. */
212 nr_pages = 0;
213
214 if ( !mfns || !map_errs )
215 {
216 rc = -1;
217 ERROR("Failed to allocate %zu bytes to process page data",
218 count * (sizeof(*mfns) + sizeof(*map_errs)));
219 goto err;
220 }
221
222 rc = populate_pfns(ctx, count, pfns, types);
223 if ( rc )
224 {
225 ERROR("Failed to populate pfns for batch of %u pages", count);
226 goto err;
227 }
228
229 for ( i = 0; i < count; ++i )
230 {
231 ctx->restore.ops.set_page_type(ctx, pfns[i], types[i]);
232
233 switch ( types[i] )
234 {
235 case XEN_DOMCTL_PFINFO_NOTAB:
236
237 case XEN_DOMCTL_PFINFO_L1TAB:
238 case XEN_DOMCTL_PFINFO_L1TAB | XEN_DOMCTL_PFINFO_LPINTAB:
239
240 case XEN_DOMCTL_PFINFO_L2TAB:
241 case XEN_DOMCTL_PFINFO_L2TAB | XEN_DOMCTL_PFINFO_LPINTAB:
242
243 case XEN_DOMCTL_PFINFO_L3TAB:
244 case XEN_DOMCTL_PFINFO_L3TAB | XEN_DOMCTL_PFINFO_LPINTAB:
245
246 case XEN_DOMCTL_PFINFO_L4TAB:
247 case XEN_DOMCTL_PFINFO_L4TAB | XEN_DOMCTL_PFINFO_LPINTAB:
248
249 mfns[nr_pages++] = ctx->restore.ops.pfn_to_gfn(ctx, pfns[i]);
250 break;
251 }
252 }
253
254 /* Nothing to do? */
255 if ( nr_pages == 0 )
256 goto done;
257
258 mapping = guest_page = xenforeignmemory_map(xch->fmem,
259 ctx->domid, PROT_READ | PROT_WRITE,
260 nr_pages, mfns, map_errs);
261 if ( !mapping )
262 {
263 rc = -1;
264 PERROR("Unable to map %u mfns for %u pages of data",
265 nr_pages, count);
266 goto err;
267 }
268
269 for ( i = 0, j = 0; i < count; ++i )
270 {
271 switch ( types[i] )
272 {
273 case XEN_DOMCTL_PFINFO_XTAB:
274 case XEN_DOMCTL_PFINFO_BROKEN:
275 case XEN_DOMCTL_PFINFO_XALLOC:
276 /* No page data to deal with. */
277 continue;
278 }
279
280 if ( map_errs[j] )
281 {
282 rc = -1;
283 ERROR("Mapping pfn %#"PRIpfn" (mfn %#"PRIpfn", type %#"PRIx32") failed with %d",
284 pfns[i], mfns[j], types[i], map_errs[j]);
285 goto err;
286 }
287
288 /* Undo page normalisation done by the saver. */
289 rc = ctx->restore.ops.localise_page(ctx, types[i], page_data);
290 if ( rc )
291 {
292 ERROR("Failed to localise pfn %#"PRIpfn" (type %#"PRIx32")",
293 pfns[i], types[i] >> XEN_DOMCTL_PFINFO_LTAB_SHIFT);
294 goto err;
295 }
296
297 if ( ctx->restore.verify )
298 {
299 /* Verify mode - compare incoming data to what we already have. */
300 if ( memcmp(guest_page, page_data, PAGE_SIZE) )
301 ERROR("verify pfn %#"PRIpfn" failed (type %#"PRIx32")",
302 pfns[i], types[i] >> XEN_DOMCTL_PFINFO_LTAB_SHIFT);
303 }
304 else
305 {
306 /* Regular mode - copy incoming data into place. */
307 memcpy(guest_page, page_data, PAGE_SIZE);
308 }
309
310 ++j;
311 guest_page += PAGE_SIZE;
312 page_data += PAGE_SIZE;
313 }
314
315 done:
316 rc = 0;
317
318 err:
319 if ( mapping )
320 xenforeignmemory_unmap(xch->fmem, mapping, nr_pages);
321
322 free(map_errs);
323 free(mfns);
324
325 return rc;
326 }
327
328 /*
329 * Validate a PAGE_DATA record from the stream, and pass the results to
330 * process_page_data() to actually perform the legwork.
331 */
handle_page_data(struct xc_sr_context * ctx,struct xc_sr_record * rec)332 static int handle_page_data(struct xc_sr_context *ctx, struct xc_sr_record *rec)
333 {
334 xc_interface *xch = ctx->xch;
335 struct xc_sr_rec_page_data_header *pages = rec->data;
336 unsigned i, pages_of_data = 0;
337 int rc = -1;
338
339 xen_pfn_t *pfns = NULL, pfn;
340 uint32_t *types = NULL, type;
341
342 if ( rec->length < sizeof(*pages) )
343 {
344 ERROR("PAGE_DATA record truncated: length %u, min %zu",
345 rec->length, sizeof(*pages));
346 goto err;
347 }
348 else if ( pages->count < 1 )
349 {
350 ERROR("Expected at least 1 pfn in PAGE_DATA record");
351 goto err;
352 }
353 else if ( rec->length < sizeof(*pages) + (pages->count * sizeof(uint64_t)) )
354 {
355 ERROR("PAGE_DATA record (length %u) too short to contain %u"
356 " pfns worth of information", rec->length, pages->count);
357 goto err;
358 }
359
360 pfns = malloc(pages->count * sizeof(*pfns));
361 types = malloc(pages->count * sizeof(*types));
362 if ( !pfns || !types )
363 {
364 ERROR("Unable to allocate enough memory for %u pfns",
365 pages->count);
366 goto err;
367 }
368
369 for ( i = 0; i < pages->count; ++i )
370 {
371 pfn = pages->pfn[i] & PAGE_DATA_PFN_MASK;
372 if ( !ctx->restore.ops.pfn_is_valid(ctx, pfn) )
373 {
374 ERROR("pfn %#"PRIpfn" (index %u) outside domain maximum", pfn, i);
375 goto err;
376 }
377
378 type = (pages->pfn[i] & PAGE_DATA_TYPE_MASK) >> 32;
379 if ( ((type >> XEN_DOMCTL_PFINFO_LTAB_SHIFT) >= 5) &&
380 ((type >> XEN_DOMCTL_PFINFO_LTAB_SHIFT) <= 8) )
381 {
382 ERROR("Invalid type %#"PRIx32" for pfn %#"PRIpfn" (index %u)",
383 type, pfn, i);
384 goto err;
385 }
386 else if ( type < XEN_DOMCTL_PFINFO_BROKEN )
387 /* NOTAB and all L1 through L4 tables (including pinned) should
388 * have a page worth of data in the record. */
389 pages_of_data++;
390
391 pfns[i] = pfn;
392 types[i] = type;
393 }
394
395 if ( rec->length != (sizeof(*pages) +
396 (sizeof(uint64_t) * pages->count) +
397 (PAGE_SIZE * pages_of_data)) )
398 {
399 ERROR("PAGE_DATA record wrong size: length %u, expected "
400 "%zu + %zu + %lu", rec->length, sizeof(*pages),
401 (sizeof(uint64_t) * pages->count), (PAGE_SIZE * pages_of_data));
402 goto err;
403 }
404
405 rc = process_page_data(ctx, pages->count, pfns, types,
406 &pages->pfn[pages->count]);
407 err:
408 free(types);
409 free(pfns);
410
411 return rc;
412 }
413
414 /*
415 * Send checkpoint dirty pfn list to primary.
416 */
send_checkpoint_dirty_pfn_list(struct xc_sr_context * ctx)417 static int send_checkpoint_dirty_pfn_list(struct xc_sr_context *ctx)
418 {
419 xc_interface *xch = ctx->xch;
420 int rc = -1;
421 unsigned count, written;
422 uint64_t i, *pfns = NULL;
423 struct iovec *iov = NULL;
424 xc_shadow_op_stats_t stats = { 0, ctx->restore.p2m_size };
425 struct xc_sr_record rec =
426 {
427 .type = REC_TYPE_CHECKPOINT_DIRTY_PFN_LIST,
428 };
429 DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
430 &ctx->restore.dirty_bitmap_hbuf);
431
432 if ( xc_shadow_control(
433 xch, ctx->domid, XEN_DOMCTL_SHADOW_OP_CLEAN,
434 HYPERCALL_BUFFER(dirty_bitmap), ctx->restore.p2m_size,
435 NULL, 0, &stats) != ctx->restore.p2m_size )
436 {
437 PERROR("Failed to retrieve logdirty bitmap");
438 goto err;
439 }
440
441 for ( i = 0, count = 0; i < ctx->restore.p2m_size; i++ )
442 {
443 if ( test_bit(i, dirty_bitmap) )
444 count++;
445 }
446
447
448 pfns = malloc(count * sizeof(*pfns));
449 if ( !pfns )
450 {
451 ERROR("Unable to allocate %zu bytes of memory for dirty pfn list",
452 count * sizeof(*pfns));
453 goto err;
454 }
455
456 for ( i = 0, written = 0; i < ctx->restore.p2m_size; ++i )
457 {
458 if ( !test_bit(i, dirty_bitmap) )
459 continue;
460
461 if ( written > count )
462 {
463 ERROR("Dirty pfn list exceed");
464 goto err;
465 }
466
467 pfns[written++] = i;
468 }
469
470 /* iovec[] for writev(). */
471 iov = malloc(3 * sizeof(*iov));
472 if ( !iov )
473 {
474 ERROR("Unable to allocate memory for sending dirty bitmap");
475 goto err;
476 }
477
478 rec.length = count * sizeof(*pfns);
479
480 iov[0].iov_base = &rec.type;
481 iov[0].iov_len = sizeof(rec.type);
482
483 iov[1].iov_base = &rec.length;
484 iov[1].iov_len = sizeof(rec.length);
485
486 iov[2].iov_base = pfns;
487 iov[2].iov_len = count * sizeof(*pfns);
488
489 if ( writev_exact(ctx->restore.send_back_fd, iov, 3) )
490 {
491 PERROR("Failed to write dirty bitmap to stream");
492 goto err;
493 }
494
495 rc = 0;
496 err:
497 free(pfns);
498 free(iov);
499 return rc;
500 }
501
502 static int process_record(struct xc_sr_context *ctx, struct xc_sr_record *rec);
handle_checkpoint(struct xc_sr_context * ctx)503 static int handle_checkpoint(struct xc_sr_context *ctx)
504 {
505 xc_interface *xch = ctx->xch;
506 int rc = 0, ret;
507 unsigned i;
508
509 if ( !ctx->restore.checkpointed )
510 {
511 ERROR("Found checkpoint in non-checkpointed stream");
512 rc = -1;
513 goto err;
514 }
515
516 ret = ctx->restore.callbacks->checkpoint(ctx->restore.callbacks->data);
517 switch ( ret )
518 {
519 case XGR_CHECKPOINT_SUCCESS:
520 break;
521
522 case XGR_CHECKPOINT_FAILOVER:
523 if ( ctx->restore.buffer_all_records )
524 rc = BROKEN_CHANNEL;
525 else
526 /* We don't have a consistent state */
527 rc = -1;
528 goto err;
529
530 default: /* Other fatal error */
531 rc = -1;
532 goto err;
533 }
534
535 if ( ctx->restore.buffer_all_records )
536 {
537 IPRINTF("All records buffered");
538
539 for ( i = 0; i < ctx->restore.buffered_rec_num; i++ )
540 {
541 rc = process_record(ctx, &ctx->restore.buffered_records[i]);
542 if ( rc )
543 goto err;
544 }
545 ctx->restore.buffered_rec_num = 0;
546 IPRINTF("All records processed");
547 }
548 else
549 ctx->restore.buffer_all_records = true;
550
551 if ( ctx->restore.checkpointed == XC_MIG_STREAM_COLO )
552 {
553 #define HANDLE_CALLBACK_RETURN_VALUE(ret) \
554 do { \
555 if ( ret == 1 ) \
556 rc = 0; /* Success */ \
557 else \
558 { \
559 if ( ret == 2 ) \
560 rc = BROKEN_CHANNEL; \
561 else \
562 rc = -1; /* Some unspecified error */ \
563 goto err; \
564 } \
565 } while (0)
566
567 /* COLO */
568
569 /* We need to resume guest */
570 rc = ctx->restore.ops.stream_complete(ctx);
571 if ( rc )
572 goto err;
573
574 ctx->restore.callbacks->restore_results(ctx->restore.xenstore_gfn,
575 ctx->restore.console_gfn,
576 ctx->restore.callbacks->data);
577
578 /* Resume secondary vm */
579 ret = ctx->restore.callbacks->postcopy(ctx->restore.callbacks->data);
580 HANDLE_CALLBACK_RETURN_VALUE(ret);
581
582 /* Wait for a new checkpoint */
583 ret = ctx->restore.callbacks->wait_checkpoint(
584 ctx->restore.callbacks->data);
585 HANDLE_CALLBACK_RETURN_VALUE(ret);
586
587 /* suspend secondary vm */
588 ret = ctx->restore.callbacks->suspend(ctx->restore.callbacks->data);
589 HANDLE_CALLBACK_RETURN_VALUE(ret);
590
591 #undef HANDLE_CALLBACK_RETURN_VALUE
592
593 rc = send_checkpoint_dirty_pfn_list(ctx);
594 if ( rc )
595 goto err;
596 }
597
598 err:
599 return rc;
600 }
601
buffer_record(struct xc_sr_context * ctx,struct xc_sr_record * rec)602 static int buffer_record(struct xc_sr_context *ctx, struct xc_sr_record *rec)
603 {
604 xc_interface *xch = ctx->xch;
605 unsigned new_alloc_num;
606 struct xc_sr_record *p;
607
608 if ( ctx->restore.buffered_rec_num >= ctx->restore.allocated_rec_num )
609 {
610 new_alloc_num = ctx->restore.allocated_rec_num + DEFAULT_BUF_RECORDS;
611 p = realloc(ctx->restore.buffered_records,
612 new_alloc_num * sizeof(struct xc_sr_record));
613 if ( !p )
614 {
615 ERROR("Failed to realloc memory for buffered records");
616 return -1;
617 }
618
619 ctx->restore.buffered_records = p;
620 ctx->restore.allocated_rec_num = new_alloc_num;
621 }
622
623 memcpy(&ctx->restore.buffered_records[ctx->restore.buffered_rec_num++],
624 rec, sizeof(*rec));
625
626 return 0;
627 }
628
process_record(struct xc_sr_context * ctx,struct xc_sr_record * rec)629 static int process_record(struct xc_sr_context *ctx, struct xc_sr_record *rec)
630 {
631 xc_interface *xch = ctx->xch;
632 int rc = 0;
633
634 switch ( rec->type )
635 {
636 case REC_TYPE_END:
637 break;
638
639 case REC_TYPE_PAGE_DATA:
640 rc = handle_page_data(ctx, rec);
641 break;
642
643 case REC_TYPE_VERIFY:
644 DPRINTF("Verify mode enabled");
645 ctx->restore.verify = true;
646 break;
647
648 case REC_TYPE_CHECKPOINT:
649 rc = handle_checkpoint(ctx);
650 break;
651
652 default:
653 rc = ctx->restore.ops.process_record(ctx, rec);
654 break;
655 }
656
657 free(rec->data);
658 rec->data = NULL;
659
660 return rc;
661 }
662
setup(struct xc_sr_context * ctx)663 static int setup(struct xc_sr_context *ctx)
664 {
665 xc_interface *xch = ctx->xch;
666 int rc;
667 DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
668 &ctx->restore.dirty_bitmap_hbuf);
669
670 if ( ctx->restore.checkpointed == XC_MIG_STREAM_COLO )
671 {
672 dirty_bitmap = xc_hypercall_buffer_alloc_pages(xch, dirty_bitmap,
673 NRPAGES(bitmap_size(ctx->restore.p2m_size)));
674
675 if ( !dirty_bitmap )
676 {
677 ERROR("Unable to allocate memory for dirty bitmap");
678 rc = -1;
679 goto err;
680 }
681 }
682
683 rc = ctx->restore.ops.setup(ctx);
684 if ( rc )
685 goto err;
686
687 ctx->restore.max_populated_pfn = (32 * 1024 / 4) - 1;
688 ctx->restore.populated_pfns = bitmap_alloc(
689 ctx->restore.max_populated_pfn + 1);
690 if ( !ctx->restore.populated_pfns )
691 {
692 ERROR("Unable to allocate memory for populated_pfns bitmap");
693 rc = -1;
694 goto err;
695 }
696
697 ctx->restore.buffered_records = malloc(
698 DEFAULT_BUF_RECORDS * sizeof(struct xc_sr_record));
699 if ( !ctx->restore.buffered_records )
700 {
701 ERROR("Unable to allocate memory for buffered records");
702 rc = -1;
703 goto err;
704 }
705 ctx->restore.allocated_rec_num = DEFAULT_BUF_RECORDS;
706
707 err:
708 return rc;
709 }
710
cleanup(struct xc_sr_context * ctx)711 static void cleanup(struct xc_sr_context *ctx)
712 {
713 xc_interface *xch = ctx->xch;
714 unsigned i;
715 DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
716 &ctx->restore.dirty_bitmap_hbuf);
717
718 for ( i = 0; i < ctx->restore.buffered_rec_num; i++ )
719 free(ctx->restore.buffered_records[i].data);
720
721 if ( ctx->restore.checkpointed == XC_MIG_STREAM_COLO )
722 xc_hypercall_buffer_free_pages(xch, dirty_bitmap,
723 NRPAGES(bitmap_size(ctx->restore.p2m_size)));
724 free(ctx->restore.buffered_records);
725 free(ctx->restore.populated_pfns);
726 if ( ctx->restore.ops.cleanup(ctx) )
727 PERROR("Failed to clean up");
728 }
729
730 /*
731 * Restore a domain.
732 */
restore(struct xc_sr_context * ctx)733 static int restore(struct xc_sr_context *ctx)
734 {
735 xc_interface *xch = ctx->xch;
736 struct xc_sr_record rec;
737 int rc, saved_rc = 0, saved_errno = 0;
738
739 IPRINTF("Restoring domain");
740
741 rc = setup(ctx);
742 if ( rc )
743 goto err;
744
745 do
746 {
747 rc = read_record(ctx, ctx->fd, &rec);
748 if ( rc )
749 {
750 if ( ctx->restore.buffer_all_records )
751 goto remus_failover;
752 else
753 goto err;
754 }
755
756 if ( ctx->restore.buffer_all_records &&
757 rec.type != REC_TYPE_END &&
758 rec.type != REC_TYPE_CHECKPOINT )
759 {
760 rc = buffer_record(ctx, &rec);
761 if ( rc )
762 goto err;
763 }
764 else
765 {
766 rc = process_record(ctx, &rec);
767 if ( rc == RECORD_NOT_PROCESSED )
768 {
769 if ( rec.type & REC_TYPE_OPTIONAL )
770 DPRINTF("Ignoring optional record %#x (%s)",
771 rec.type, rec_type_to_str(rec.type));
772 else
773 {
774 ERROR("Mandatory record %#x (%s) not handled",
775 rec.type, rec_type_to_str(rec.type));
776 rc = -1;
777 goto err;
778 }
779 }
780 else if ( rc == BROKEN_CHANNEL )
781 goto remus_failover;
782 else if ( rc )
783 goto err;
784 }
785
786 } while ( rec.type != REC_TYPE_END );
787
788 remus_failover:
789
790 if ( ctx->restore.checkpointed == XC_MIG_STREAM_COLO )
791 {
792 /* With COLO, we have already called stream_complete */
793 rc = 0;
794 IPRINTF("COLO Failover");
795 goto done;
796 }
797
798 /*
799 * With Remus, if we reach here, there must be some error on primary,
800 * failover from the last checkpoint state.
801 */
802 rc = ctx->restore.ops.stream_complete(ctx);
803 if ( rc )
804 goto err;
805
806 IPRINTF("Restore successful");
807 goto done;
808
809 err:
810 saved_errno = errno;
811 saved_rc = rc;
812 PERROR("Restore failed");
813
814 done:
815 cleanup(ctx);
816
817 if ( saved_rc )
818 {
819 rc = saved_rc;
820 errno = saved_errno;
821 }
822
823 return rc;
824 }
825
xc_domain_restore(xc_interface * xch,int io_fd,uint32_t dom,unsigned int store_evtchn,unsigned long * store_mfn,uint32_t store_domid,unsigned int console_evtchn,unsigned long * console_gfn,uint32_t console_domid,unsigned int hvm,unsigned int pae,xc_migration_stream_t stream_type,struct restore_callbacks * callbacks,int send_back_fd)826 int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom,
827 unsigned int store_evtchn, unsigned long *store_mfn,
828 uint32_t store_domid, unsigned int console_evtchn,
829 unsigned long *console_gfn, uint32_t console_domid,
830 unsigned int hvm, unsigned int pae,
831 xc_migration_stream_t stream_type,
832 struct restore_callbacks *callbacks, int send_back_fd)
833 {
834 xen_pfn_t nr_pfns;
835 struct xc_sr_context ctx =
836 {
837 .xch = xch,
838 .fd = io_fd,
839 };
840
841 /* GCC 4.4 (of CentOS 6.x vintage) can' t initialise anonymous unions. */
842 ctx.restore.console_evtchn = console_evtchn;
843 ctx.restore.console_domid = console_domid;
844 ctx.restore.xenstore_evtchn = store_evtchn;
845 ctx.restore.xenstore_domid = store_domid;
846 ctx.restore.checkpointed = stream_type;
847 ctx.restore.callbacks = callbacks;
848 ctx.restore.send_back_fd = send_back_fd;
849
850 /* Sanity checks for callbacks. */
851 if ( stream_type )
852 assert(callbacks->checkpoint);
853
854 if ( ctx.restore.checkpointed == XC_MIG_STREAM_COLO )
855 {
856 /* this is COLO restore */
857 assert(callbacks->suspend &&
858 callbacks->postcopy &&
859 callbacks->wait_checkpoint &&
860 callbacks->restore_results);
861 }
862
863 DPRINTF("fd %d, dom %u, hvm %u, pae %u, stream_type %d",
864 io_fd, dom, hvm, pae, stream_type);
865
866 if ( xc_domain_getinfo(xch, dom, 1, &ctx.dominfo) != 1 )
867 {
868 PERROR("Failed to get domain info");
869 return -1;
870 }
871
872 if ( ctx.dominfo.domid != dom )
873 {
874 ERROR("Domain %u does not exist", dom);
875 return -1;
876 }
877
878 ctx.domid = dom;
879
880 if ( read_headers(&ctx) )
881 return -1;
882
883 if ( xc_domain_nr_gpfns(xch, dom, &nr_pfns) < 0 )
884 {
885 PERROR("Unable to obtain the guest p2m size");
886 return -1;
887 }
888
889 ctx.restore.p2m_size = nr_pfns;
890
891 if ( ctx.dominfo.hvm )
892 {
893 ctx.restore.ops = restore_ops_x86_hvm;
894 if ( restore(&ctx) )
895 return -1;
896 }
897 else
898 {
899 ctx.restore.ops = restore_ops_x86_pv;
900 if ( restore(&ctx) )
901 return -1;
902 }
903
904 IPRINTF("XenStore: mfn %#"PRIpfn", dom %d, evt %u",
905 ctx.restore.xenstore_gfn,
906 ctx.restore.xenstore_domid,
907 ctx.restore.xenstore_evtchn);
908
909 IPRINTF("Console: mfn %#"PRIpfn", dom %d, evt %u",
910 ctx.restore.console_gfn,
911 ctx.restore.console_domid,
912 ctx.restore.console_evtchn);
913
914 *console_gfn = ctx.restore.console_gfn;
915 *store_mfn = ctx.restore.xenstore_gfn;
916
917 return 0;
918 }
919
920 /*
921 * Local variables:
922 * mode: C
923 * c-file-style: "BSD"
924 * c-basic-offset: 4
925 * tab-width: 4
926 * indent-tabs-mode: nil
927 * End:
928 */
929