1 #include <assert.h>
2
3 #include "xc_sr_common_x86_pv.h"
4
pfn_to_mfn(const struct xc_sr_context * ctx,xen_pfn_t pfn)5 static xen_pfn_t pfn_to_mfn(const struct xc_sr_context *ctx, xen_pfn_t pfn)
6 {
7 assert(pfn <= ctx->x86_pv.max_pfn);
8
9 return xc_pfn_to_mfn(pfn, ctx->x86_pv.p2m, ctx->x86_pv.width);
10 }
11
12 /*
13 * Expand our local tracking information for the p2m table and domains maximum
14 * size. Normally this will be called once to expand from 0 to max_pfn, but
15 * is liable to expand multiple times if the domain grows on the sending side
16 * after migration has started.
17 */
expand_p2m(struct xc_sr_context * ctx,unsigned long max_pfn)18 static int expand_p2m(struct xc_sr_context *ctx, unsigned long max_pfn)
19 {
20 xc_interface *xch = ctx->xch;
21 unsigned long old_max = ctx->x86_pv.max_pfn, i;
22 unsigned int fpp = PAGE_SIZE / ctx->x86_pv.width;
23 unsigned long end_frame = (max_pfn / fpp) + 1;
24 unsigned long old_end_frame = (old_max / fpp) + 1;
25 xen_pfn_t *p2m = NULL, *p2m_pfns = NULL;
26 uint32_t *pfn_types = NULL;
27 size_t p2msz, p2m_pfnsz, pfn_typesz;
28
29 assert(max_pfn > old_max);
30
31 p2msz = (max_pfn + 1) * ctx->x86_pv.width;
32 p2m = realloc(ctx->x86_pv.p2m, p2msz);
33 if ( !p2m )
34 {
35 ERROR("Failed to (re)alloc %zu bytes for p2m", p2msz);
36 return -1;
37 }
38 ctx->x86_pv.p2m = p2m;
39
40 pfn_typesz = (max_pfn + 1) * sizeof(*pfn_types);
41 pfn_types = realloc(ctx->x86_pv.restore.pfn_types, pfn_typesz);
42 if ( !pfn_types )
43 {
44 ERROR("Failed to (re)alloc %zu bytes for pfn_types", pfn_typesz);
45 return -1;
46 }
47 ctx->x86_pv.restore.pfn_types = pfn_types;
48
49 p2m_pfnsz = (end_frame + 1) * sizeof(*p2m_pfns);
50 p2m_pfns = realloc(ctx->x86_pv.p2m_pfns, p2m_pfnsz);
51 if ( !p2m_pfns )
52 {
53 ERROR("Failed to (re)alloc %zu bytes for p2m frame list", p2m_pfnsz);
54 return -1;
55 }
56 ctx->x86_pv.p2m_frames = end_frame;
57 ctx->x86_pv.p2m_pfns = p2m_pfns;
58
59 ctx->x86_pv.max_pfn = max_pfn;
60 for ( i = (old_max ? old_max + 1 : 0); i <= max_pfn; ++i )
61 {
62 ctx->restore.ops.set_gfn(ctx, i, INVALID_MFN);
63 ctx->restore.ops.set_page_type(ctx, i, 0);
64 }
65
66 for ( i = (old_end_frame ? old_end_frame + 1 : 0); i <= end_frame; ++i )
67 ctx->x86_pv.p2m_pfns[i] = INVALID_MFN;
68
69 DPRINTF("Changed max_pfn from %#lx to %#lx", old_max, max_pfn);
70 return 0;
71 }
72
73 /*
74 * Pin all of the pagetables.
75 */
pin_pagetables(struct xc_sr_context * ctx)76 static int pin_pagetables(struct xc_sr_context *ctx)
77 {
78 xc_interface *xch = ctx->xch;
79 unsigned long i, nr_pins;
80 struct mmuext_op pin[MAX_PIN_BATCH];
81
82 for ( i = nr_pins = 0; i <= ctx->x86_pv.max_pfn; ++i )
83 {
84 if ( (ctx->x86_pv.restore.pfn_types[i] &
85 XEN_DOMCTL_PFINFO_LPINTAB) == 0 )
86 continue;
87
88 switch ( (ctx->x86_pv.restore.pfn_types[i] &
89 XEN_DOMCTL_PFINFO_LTABTYPE_MASK) )
90 {
91 case XEN_DOMCTL_PFINFO_L1TAB:
92 pin[nr_pins].cmd = MMUEXT_PIN_L1_TABLE;
93 break;
94 case XEN_DOMCTL_PFINFO_L2TAB:
95 pin[nr_pins].cmd = MMUEXT_PIN_L2_TABLE;
96 break;
97 case XEN_DOMCTL_PFINFO_L3TAB:
98 pin[nr_pins].cmd = MMUEXT_PIN_L3_TABLE;
99 break;
100 case XEN_DOMCTL_PFINFO_L4TAB:
101 pin[nr_pins].cmd = MMUEXT_PIN_L4_TABLE;
102 break;
103 default:
104 continue;
105 }
106
107 pin[nr_pins].arg1.mfn = pfn_to_mfn(ctx, i);
108 nr_pins++;
109
110 if ( nr_pins == MAX_PIN_BATCH )
111 {
112 if ( xc_mmuext_op(xch, pin, nr_pins, ctx->domid) != 0 )
113 {
114 PERROR("Failed to pin batch of pagetables");
115 return -1;
116 }
117 nr_pins = 0;
118 }
119 }
120
121 if ( (nr_pins > 0) && (xc_mmuext_op(xch, pin, nr_pins, ctx->domid) < 0) )
122 {
123 PERROR("Failed to pin batch of pagetables");
124 return -1;
125 }
126
127 return 0;
128 }
129
130 /*
131 * Update details in a guests start_info structure.
132 */
process_start_info(struct xc_sr_context * ctx,vcpu_guest_context_any_t * vcpu)133 static int process_start_info(struct xc_sr_context *ctx,
134 vcpu_guest_context_any_t *vcpu)
135 {
136 xc_interface *xch = ctx->xch;
137 xen_pfn_t pfn, mfn;
138 start_info_any_t *guest_start_info = NULL;
139 int rc = -1;
140
141 pfn = GET_FIELD(vcpu, user_regs.edx, ctx->x86_pv.width);
142
143 if ( pfn > ctx->x86_pv.max_pfn )
144 {
145 ERROR("Start Info pfn %#lx out of range", pfn);
146 goto err;
147 }
148 else if ( ctx->x86_pv.restore.pfn_types[pfn] != XEN_DOMCTL_PFINFO_NOTAB )
149 {
150 ERROR("Start Info pfn %#lx has bad type %u", pfn,
151 (ctx->x86_pv.restore.pfn_types[pfn] >>
152 XEN_DOMCTL_PFINFO_LTAB_SHIFT));
153 goto err;
154 }
155
156 mfn = pfn_to_mfn(ctx, pfn);
157 if ( !mfn_in_pseudophysmap(ctx, mfn) )
158 {
159 ERROR("Start Info has bad mfn");
160 dump_bad_pseudophysmap_entry(ctx, mfn);
161 goto err;
162 }
163
164 SET_FIELD(vcpu, user_regs.edx, mfn, ctx->x86_pv.width);
165 guest_start_info = xc_map_foreign_range(
166 xch, ctx->domid, PAGE_SIZE, PROT_READ | PROT_WRITE, mfn);
167 if ( !guest_start_info )
168 {
169 PERROR("Failed to map Start Info at mfn %#lx", mfn);
170 goto err;
171 }
172
173 /* Deal with xenstore stuff */
174 pfn = GET_FIELD(guest_start_info, store_mfn, ctx->x86_pv.width);
175 if ( pfn > ctx->x86_pv.max_pfn )
176 {
177 ERROR("XenStore pfn %#lx out of range", pfn);
178 goto err;
179 }
180
181 mfn = pfn_to_mfn(ctx, pfn);
182 if ( !mfn_in_pseudophysmap(ctx, mfn) )
183 {
184 ERROR("XenStore pfn has bad mfn");
185 dump_bad_pseudophysmap_entry(ctx, mfn);
186 goto err;
187 }
188
189 ctx->restore.xenstore_gfn = mfn;
190 SET_FIELD(guest_start_info, store_mfn, mfn, ctx->x86_pv.width);
191 SET_FIELD(guest_start_info, store_evtchn,
192 ctx->restore.xenstore_evtchn, ctx->x86_pv.width);
193
194 /* Deal with console stuff */
195 pfn = GET_FIELD(guest_start_info, console.domU.mfn, ctx->x86_pv.width);
196 if ( pfn > ctx->x86_pv.max_pfn )
197 {
198 ERROR("Console pfn %#lx out of range", pfn);
199 goto err;
200 }
201
202 mfn = pfn_to_mfn(ctx, pfn);
203 if ( !mfn_in_pseudophysmap(ctx, mfn) )
204 {
205 ERROR("Console pfn has bad mfn");
206 dump_bad_pseudophysmap_entry(ctx, mfn);
207 goto err;
208 }
209
210 ctx->restore.console_gfn = mfn;
211 SET_FIELD(guest_start_info, console.domU.mfn, mfn, ctx->x86_pv.width);
212 SET_FIELD(guest_start_info, console.domU.evtchn,
213 ctx->restore.console_evtchn, ctx->x86_pv.width);
214
215 /* Set other information */
216 SET_FIELD(guest_start_info, nr_pages,
217 ctx->x86_pv.max_pfn + 1, ctx->x86_pv.width);
218 SET_FIELD(guest_start_info, shared_info,
219 ctx->dominfo.shared_info_frame << PAGE_SHIFT, ctx->x86_pv.width);
220 SET_FIELD(guest_start_info, flags, 0, ctx->x86_pv.width);
221
222 rc = 0;
223
224 err:
225 if ( guest_start_info )
226 munmap(guest_start_info, PAGE_SIZE);
227
228 return rc;
229 }
230
231 /*
232 * Process one stashed vcpu worth of basic state and send to Xen.
233 */
process_vcpu_basic(struct xc_sr_context * ctx,unsigned int vcpuid)234 static int process_vcpu_basic(struct xc_sr_context *ctx,
235 unsigned int vcpuid)
236 {
237 xc_interface *xch = ctx->xch;
238 vcpu_guest_context_any_t vcpu;
239 xen_pfn_t pfn, mfn;
240 unsigned i, gdt_count;
241 int rc = -1;
242
243 memcpy(&vcpu, ctx->x86_pv.restore.vcpus[vcpuid].basic,
244 ctx->x86_pv.restore.vcpus[vcpuid].basicsz);
245
246 /* Vcpu 0 is special: Convert the suspend record to an mfn. */
247 if ( vcpuid == 0 )
248 {
249 rc = process_start_info(ctx, &vcpu);
250 if ( rc )
251 return rc;
252 rc = -1;
253 }
254
255 SET_FIELD(&vcpu, flags,
256 GET_FIELD(&vcpu, flags, ctx->x86_pv.width) | VGCF_online,
257 ctx->x86_pv.width);
258
259 gdt_count = GET_FIELD(&vcpu, gdt_ents, ctx->x86_pv.width);
260 if ( gdt_count > FIRST_RESERVED_GDT_ENTRY )
261 {
262 ERROR("GDT entry count (%u) out of range (max %u)",
263 gdt_count, FIRST_RESERVED_GDT_ENTRY);
264 errno = ERANGE;
265 goto err;
266 }
267 gdt_count = (gdt_count + 511) / 512; /* gdt_count now in units of frames. */
268
269 /* Convert GDT frames to mfns. */
270 for ( i = 0; i < gdt_count; ++i )
271 {
272 pfn = GET_FIELD(&vcpu, gdt_frames[i], ctx->x86_pv.width);
273 if ( pfn > ctx->x86_pv.max_pfn )
274 {
275 ERROR("GDT frame %u (pfn %#lx) out of range", i, pfn);
276 goto err;
277 }
278 else if ( (ctx->x86_pv.restore.pfn_types[pfn] !=
279 XEN_DOMCTL_PFINFO_NOTAB) )
280 {
281 ERROR("GDT frame %u (pfn %#lx) has bad type %u", i, pfn,
282 (ctx->x86_pv.restore.pfn_types[pfn] >>
283 XEN_DOMCTL_PFINFO_LTAB_SHIFT));
284 goto err;
285 }
286
287 mfn = pfn_to_mfn(ctx, pfn);
288 if ( !mfn_in_pseudophysmap(ctx, mfn) )
289 {
290 ERROR("GDT frame %u has bad mfn", i);
291 dump_bad_pseudophysmap_entry(ctx, mfn);
292 goto err;
293 }
294
295 SET_FIELD(&vcpu, gdt_frames[i], mfn, ctx->x86_pv.width);
296 }
297
298 /* Convert CR3 to an mfn. */
299 pfn = cr3_to_mfn(ctx, GET_FIELD(&vcpu, ctrlreg[3], ctx->x86_pv.width));
300 if ( pfn > ctx->x86_pv.max_pfn )
301 {
302 ERROR("cr3 (pfn %#lx) out of range", pfn);
303 goto err;
304 }
305 else if ( (ctx->x86_pv.restore.pfn_types[pfn] &
306 XEN_DOMCTL_PFINFO_LTABTYPE_MASK) !=
307 (((xen_pfn_t)ctx->x86_pv.levels) <<
308 XEN_DOMCTL_PFINFO_LTAB_SHIFT) )
309 {
310 ERROR("cr3 (pfn %#lx) has bad type %u, expected %u", pfn,
311 (ctx->x86_pv.restore.pfn_types[pfn] >>
312 XEN_DOMCTL_PFINFO_LTAB_SHIFT),
313 ctx->x86_pv.levels);
314 goto err;
315 }
316
317 mfn = pfn_to_mfn(ctx, pfn);
318 if ( !mfn_in_pseudophysmap(ctx, mfn) )
319 {
320 ERROR("cr3 has bad mfn");
321 dump_bad_pseudophysmap_entry(ctx, mfn);
322 goto err;
323 }
324
325 SET_FIELD(&vcpu, ctrlreg[3], mfn_to_cr3(ctx, mfn), ctx->x86_pv.width);
326
327 /* 64bit guests: Convert CR1 (guest pagetables) to mfn. */
328 if ( ctx->x86_pv.levels == 4 && (vcpu.x64.ctrlreg[1] & 1) )
329 {
330 pfn = vcpu.x64.ctrlreg[1] >> PAGE_SHIFT;
331
332 if ( pfn > ctx->x86_pv.max_pfn )
333 {
334 ERROR("cr1 (pfn %#lx) out of range", pfn);
335 goto err;
336 }
337 else if ( (ctx->x86_pv.restore.pfn_types[pfn] &
338 XEN_DOMCTL_PFINFO_LTABTYPE_MASK) !=
339 (((xen_pfn_t)ctx->x86_pv.levels) <<
340 XEN_DOMCTL_PFINFO_LTAB_SHIFT) )
341 {
342 ERROR("cr1 (pfn %#lx) has bad type %u, expected %u", pfn,
343 (ctx->x86_pv.restore.pfn_types[pfn] >>
344 XEN_DOMCTL_PFINFO_LTAB_SHIFT),
345 ctx->x86_pv.levels);
346 goto err;
347 }
348
349 mfn = pfn_to_mfn(ctx, pfn);
350 if ( !mfn_in_pseudophysmap(ctx, mfn) )
351 {
352 ERROR("cr1 has bad mfn");
353 dump_bad_pseudophysmap_entry(ctx, mfn);
354 goto err;
355 }
356
357 vcpu.x64.ctrlreg[1] = (uint64_t)mfn << PAGE_SHIFT;
358 }
359
360 if ( xc_vcpu_setcontext(xch, ctx->domid, vcpuid, &vcpu) )
361 {
362 PERROR("Failed to set vcpu%u's basic info", vcpuid);
363 goto err;
364 }
365
366 rc = 0;
367
368 err:
369 return rc;
370 }
371
372 /*
373 * Process one stashed vcpu worth of extended state and send to Xen.
374 */
process_vcpu_extended(struct xc_sr_context * ctx,unsigned int vcpuid)375 static int process_vcpu_extended(struct xc_sr_context *ctx,
376 unsigned int vcpuid)
377 {
378 xc_interface *xch = ctx->xch;
379 struct xc_sr_x86_pv_restore_vcpu *vcpu =
380 &ctx->x86_pv.restore.vcpus[vcpuid];
381 DECLARE_DOMCTL;
382
383 domctl.cmd = XEN_DOMCTL_set_ext_vcpucontext;
384 domctl.domain = ctx->domid;
385 memcpy(&domctl.u.ext_vcpucontext, vcpu->extd, vcpu->extdsz);
386
387 if ( xc_domctl(xch, &domctl) != 0 )
388 {
389 PERROR("Failed to set vcpu%u's extended info", vcpuid);
390 return -1;
391 }
392
393 return 0;
394 }
395
396 /*
397 * Process one stashed vcpu worth of xsave state and send to Xen.
398 */
process_vcpu_xsave(struct xc_sr_context * ctx,unsigned int vcpuid)399 static int process_vcpu_xsave(struct xc_sr_context *ctx,
400 unsigned int vcpuid)
401 {
402 xc_interface *xch = ctx->xch;
403 struct xc_sr_x86_pv_restore_vcpu *vcpu =
404 &ctx->x86_pv.restore.vcpus[vcpuid];
405 int rc;
406 DECLARE_DOMCTL;
407 DECLARE_HYPERCALL_BUFFER(void, buffer);
408
409 buffer = xc_hypercall_buffer_alloc(xch, buffer, vcpu->xsavesz);
410 if ( !buffer )
411 {
412 ERROR("Unable to allocate %zu bytes for xsave hypercall buffer",
413 vcpu->xsavesz);
414 return -1;
415 }
416
417 domctl.cmd = XEN_DOMCTL_setvcpuextstate;
418 domctl.domain = ctx->domid;
419 domctl.u.vcpuextstate.vcpu = vcpuid;
420 domctl.u.vcpuextstate.size = vcpu->xsavesz;
421 set_xen_guest_handle(domctl.u.vcpuextstate.buffer, buffer);
422
423 memcpy(buffer, vcpu->xsave, vcpu->xsavesz);
424
425 rc = xc_domctl(xch, &domctl);
426 if ( rc )
427 PERROR("Failed to set vcpu%u's xsave info", vcpuid);
428
429 xc_hypercall_buffer_free(xch, buffer);
430
431 return rc;
432 }
433
434 /*
435 * Process one stashed vcpu worth of msr state and send to Xen.
436 */
process_vcpu_msrs(struct xc_sr_context * ctx,unsigned int vcpuid)437 static int process_vcpu_msrs(struct xc_sr_context *ctx,
438 unsigned int vcpuid)
439 {
440 xc_interface *xch = ctx->xch;
441 struct xc_sr_x86_pv_restore_vcpu *vcpu =
442 &ctx->x86_pv.restore.vcpus[vcpuid];
443 int rc;
444 DECLARE_DOMCTL;
445 DECLARE_HYPERCALL_BUFFER(void, buffer);
446
447 buffer = xc_hypercall_buffer_alloc(xch, buffer, vcpu->msrsz);
448 if ( !buffer )
449 {
450 ERROR("Unable to allocate %zu bytes for msr hypercall buffer",
451 vcpu->msrsz);
452 return -1;
453 }
454
455 domctl.cmd = XEN_DOMCTL_set_vcpu_msrs;
456 domctl.domain = ctx->domid;
457 domctl.u.vcpu_msrs.vcpu = vcpuid;
458 domctl.u.vcpu_msrs.msr_count = vcpu->msrsz / sizeof(xen_domctl_vcpu_msr_t);
459 set_xen_guest_handle(domctl.u.vcpu_msrs.msrs, buffer);
460
461 memcpy(buffer, vcpu->msr, vcpu->msrsz);
462
463 rc = xc_domctl(xch, &domctl);
464 if ( rc )
465 PERROR("Failed to set vcpu%u's msrs", vcpuid);
466
467 xc_hypercall_buffer_free(xch, buffer);
468
469 return rc;
470 }
471
472 /*
473 * Process all stashed vcpu context and send to Xen.
474 */
update_vcpu_context(struct xc_sr_context * ctx)475 static int update_vcpu_context(struct xc_sr_context *ctx)
476 {
477 xc_interface *xch = ctx->xch;
478 struct xc_sr_x86_pv_restore_vcpu *vcpu;
479 unsigned i;
480 int rc = 0;
481
482 for ( i = 0; i < ctx->x86_pv.restore.nr_vcpus; ++i )
483 {
484 vcpu = &ctx->x86_pv.restore.vcpus[i];
485
486 if ( vcpu->basic )
487 {
488 rc = process_vcpu_basic(ctx, i);
489 if ( rc )
490 return rc;
491 }
492 else if ( i == 0 )
493 {
494 ERROR("Sender didn't send vcpu0's basic state");
495 return -1;
496 }
497
498 if ( vcpu->extd )
499 {
500 rc = process_vcpu_extended(ctx, i);
501 if ( rc )
502 return rc;
503 }
504
505 if ( vcpu->xsave )
506 {
507 rc = process_vcpu_xsave(ctx, i);
508 if ( rc )
509 return rc;
510 }
511
512 if ( vcpu->msr )
513 {
514 rc = process_vcpu_msrs(ctx, i);
515 if ( rc )
516 return rc;
517 }
518 }
519
520 return rc;
521 }
522
523 /*
524 * Copy the p2m which has been constructed locally as memory has been
525 * allocated, over the p2m in guest, so the guest can find its memory again on
526 * resume.
527 */
update_guest_p2m(struct xc_sr_context * ctx)528 static int update_guest_p2m(struct xc_sr_context *ctx)
529 {
530 xc_interface *xch = ctx->xch;
531 xen_pfn_t mfn, pfn, *guest_p2m = NULL;
532 unsigned i;
533 int rc = -1;
534
535 for ( i = 0; i < ctx->x86_pv.p2m_frames; ++i )
536 {
537 pfn = ctx->x86_pv.p2m_pfns[i];
538
539 if ( pfn > ctx->x86_pv.max_pfn )
540 {
541 ERROR("pfn (%#lx) for p2m_frame_list[%u] out of range",
542 pfn, i);
543 goto err;
544 }
545 else if ( (ctx->x86_pv.restore.pfn_types[pfn] !=
546 XEN_DOMCTL_PFINFO_NOTAB) )
547 {
548 ERROR("pfn (%#lx) for p2m_frame_list[%u] has bad type %u", pfn, i,
549 (ctx->x86_pv.restore.pfn_types[pfn] >>
550 XEN_DOMCTL_PFINFO_LTAB_SHIFT));
551 goto err;
552 }
553
554 mfn = pfn_to_mfn(ctx, pfn);
555 if ( !mfn_in_pseudophysmap(ctx, mfn) )
556 {
557 ERROR("p2m_frame_list[%u] has bad mfn", i);
558 dump_bad_pseudophysmap_entry(ctx, mfn);
559 goto err;
560 }
561
562 ctx->x86_pv.p2m_pfns[i] = mfn;
563 }
564
565 guest_p2m = xc_map_foreign_pages(xch, ctx->domid, PROT_WRITE,
566 ctx->x86_pv.p2m_pfns,
567 ctx->x86_pv.p2m_frames );
568 if ( !guest_p2m )
569 {
570 PERROR("Failed to map p2m frames");
571 goto err;
572 }
573
574 memcpy(guest_p2m, ctx->x86_pv.p2m,
575 (ctx->x86_pv.max_pfn + 1) * ctx->x86_pv.width);
576 rc = 0;
577 err:
578 if ( guest_p2m )
579 munmap(guest_p2m, ctx->x86_pv.p2m_frames * PAGE_SIZE);
580
581 return rc;
582 }
583
584 /*
585 * Process an X86_PV_INFO record.
586 */
handle_x86_pv_info(struct xc_sr_context * ctx,struct xc_sr_record * rec)587 static int handle_x86_pv_info(struct xc_sr_context *ctx,
588 struct xc_sr_record *rec)
589 {
590 xc_interface *xch = ctx->xch;
591 struct xc_sr_rec_x86_pv_info *info = rec->data;
592
593 if ( ctx->x86_pv.restore.seen_pv_info )
594 {
595 ERROR("Already received X86_PV_INFO record");
596 return -1;
597 }
598
599 if ( rec->length < sizeof(*info) )
600 {
601 ERROR("X86_PV_INFO record truncated: length %u, expected %zu",
602 rec->length, sizeof(*info));
603 return -1;
604 }
605 else if ( info->guest_width != 4 &&
606 info->guest_width != 8 )
607 {
608 ERROR("Unexpected guest width %u, Expected 4 or 8",
609 info->guest_width);
610 return -1;
611 }
612 else if ( info->guest_width != ctx->x86_pv.width )
613 {
614 int rc;
615 struct xen_domctl domctl;
616
617 /* Try to set address size, domain is always created 64 bit. */
618 memset(&domctl, 0, sizeof(domctl));
619 domctl.domain = ctx->domid;
620 domctl.cmd = XEN_DOMCTL_set_address_size;
621 domctl.u.address_size.size = info->guest_width * 8;
622 rc = do_domctl(xch, &domctl);
623 if ( rc != 0 )
624 {
625 ERROR("Width of guest in stream (%u"
626 " bits) differs with existing domain (%u bits)",
627 info->guest_width * 8, ctx->x86_pv.width * 8);
628 return -1;
629 }
630
631 /* Domain's information changed, better to refresh. */
632 rc = x86_pv_domain_info(ctx);
633 if ( rc != 0 )
634 {
635 ERROR("Unable to refresh guest information");
636 return -1;
637 }
638 }
639 else if ( info->pt_levels != 3 &&
640 info->pt_levels != 4 )
641 {
642 ERROR("Unexpected guest levels %u, Expected 3 or 4",
643 info->pt_levels);
644 return -1;
645 }
646 else if ( info->pt_levels != ctx->x86_pv.levels )
647 {
648 ERROR("Levels of guest in stream (%u"
649 ") differs with existing domain (%u)",
650 info->pt_levels, ctx->x86_pv.levels);
651 return -1;
652 }
653
654 ctx->x86_pv.restore.seen_pv_info = true;
655 return 0;
656 }
657
658 /*
659 * Process an X86_PV_P2M_FRAMES record. Takes care of expanding the local p2m
660 * state if needed.
661 */
handle_x86_pv_p2m_frames(struct xc_sr_context * ctx,struct xc_sr_record * rec)662 static int handle_x86_pv_p2m_frames(struct xc_sr_context *ctx,
663 struct xc_sr_record *rec)
664 {
665 xc_interface *xch = ctx->xch;
666 struct xc_sr_rec_x86_pv_p2m_frames *data = rec->data;
667 unsigned start, end, x, fpp = PAGE_SIZE / ctx->x86_pv.width;
668 int rc;
669
670 if ( !ctx->x86_pv.restore.seen_pv_info )
671 {
672 ERROR("Not yet received X86_PV_INFO record");
673 return -1;
674 }
675
676 if ( rec->length < sizeof(*data) )
677 {
678 ERROR("X86_PV_P2M_FRAMES record truncated: length %u, min %zu",
679 rec->length, sizeof(*data) + sizeof(uint64_t));
680 return -1;
681 }
682 else if ( data->start_pfn > data->end_pfn )
683 {
684 ERROR("End pfn in stream (%#x) exceeds Start (%#x)",
685 data->end_pfn, data->start_pfn);
686 return -1;
687 }
688
689 start = data->start_pfn / fpp;
690 end = data->end_pfn / fpp + 1;
691
692 if ( rec->length != sizeof(*data) + ((end - start) * sizeof(uint64_t)) )
693 {
694 ERROR("X86_PV_P2M_FRAMES record wrong size: start_pfn %#x"
695 ", end_pfn %#x, length %u, expected %zu + (%u - %u) * %zu",
696 data->start_pfn, data->end_pfn, rec->length,
697 sizeof(*data), end, start, sizeof(uint64_t));
698 return -1;
699 }
700
701 if ( data->end_pfn > ctx->x86_pv.max_pfn )
702 {
703 rc = expand_p2m(ctx, data->end_pfn);
704 if ( rc )
705 return rc;
706 }
707
708 for ( x = 0; x < (end - start); ++x )
709 ctx->x86_pv.p2m_pfns[start + x] = data->p2m_pfns[x];
710
711 return 0;
712 }
713
714 /*
715 * Processes X86_PV_VCPU_{BASIC,EXTENDED,XSAVE,MSRS} records from the stream.
716 * The blobs are all stashed to one side as they need to be deferred until the
717 * very end of the stream, rather than being send to Xen at the point they
718 * arrive in the stream. It performs all pre-hypercall size validation.
719 */
handle_x86_pv_vcpu_blob(struct xc_sr_context * ctx,struct xc_sr_record * rec)720 static int handle_x86_pv_vcpu_blob(struct xc_sr_context *ctx,
721 struct xc_sr_record *rec)
722 {
723 xc_interface *xch = ctx->xch;
724 struct xc_sr_rec_x86_pv_vcpu_hdr *vhdr = rec->data;
725 struct xc_sr_x86_pv_restore_vcpu *vcpu;
726 const char *rec_name;
727 size_t blobsz;
728 void *blob;
729 int rc = -1;
730
731 switch ( rec->type )
732 {
733 case REC_TYPE_X86_PV_VCPU_BASIC:
734 rec_name = "X86_PV_VCPU_BASIC";
735 break;
736
737 case REC_TYPE_X86_PV_VCPU_EXTENDED:
738 rec_name = "X86_PV_VCPU_EXTENDED";
739 break;
740
741 case REC_TYPE_X86_PV_VCPU_XSAVE:
742 rec_name = "X86_PV_VCPU_XSAVE";
743 break;
744
745 case REC_TYPE_X86_PV_VCPU_MSRS:
746 rec_name = "X86_PV_VCPU_MSRS";
747 break;
748
749 default:
750 ERROR("Unrecognised vcpu blob record %s (%u)",
751 rec_type_to_str(rec->type), rec->type);
752 goto out;
753 }
754
755 /* Confirm that there is a complete header. */
756 if ( rec->length < sizeof(*vhdr) )
757 {
758 ERROR("%s record truncated: length %u, header size %zu",
759 rec_name, rec->length, sizeof(*vhdr));
760 goto out;
761 }
762
763 blobsz = rec->length - sizeof(*vhdr);
764
765 /*
766 * Tolerate empty records. Older sending sides used to accidentally
767 * generate them.
768 */
769 if ( blobsz == 0 )
770 {
771 DBGPRINTF("Skipping empty %s record for vcpu %u\n",
772 rec_type_to_str(rec->type), vhdr->vcpu_id);
773 goto out;
774 }
775
776 /* Check that the vcpu id is within range. */
777 if ( vhdr->vcpu_id >= ctx->x86_pv.restore.nr_vcpus )
778 {
779 ERROR("%s record vcpu_id (%u) exceeds domain max (%u)",
780 rec_name, vhdr->vcpu_id, ctx->x86_pv.restore.nr_vcpus - 1);
781 goto out;
782 }
783
784 vcpu = &ctx->x86_pv.restore.vcpus[vhdr->vcpu_id];
785
786 /* Further per-record checks, where possible. */
787 switch ( rec->type )
788 {
789 case REC_TYPE_X86_PV_VCPU_BASIC:
790 {
791 size_t vcpusz = ctx->x86_pv.width == 8 ?
792 sizeof(vcpu_guest_context_x86_64_t) :
793 sizeof(vcpu_guest_context_x86_32_t);
794
795 if ( blobsz != vcpusz )
796 {
797 ERROR("%s record wrong size: expected %zu, got %u",
798 rec_name, sizeof(*vhdr) + vcpusz, rec->length);
799 goto out;
800 }
801 break;
802 }
803
804 case REC_TYPE_X86_PV_VCPU_EXTENDED:
805 if ( blobsz > 128 )
806 {
807 ERROR("%s record too long: max %zu, got %u",
808 rec_name, sizeof(*vhdr) + 128, rec->length);
809 goto out;
810 }
811 break;
812
813 case REC_TYPE_X86_PV_VCPU_XSAVE:
814 if ( blobsz % sizeof(xen_domctl_vcpu_msr_t) != 0 )
815 {
816 ERROR("%s record payload size %zu expected to be a multiple of %zu",
817 rec_name, blobsz, sizeof(xen_domctl_vcpu_msr_t));
818 goto out;
819 }
820 break;
821 }
822
823 /* Allocate memory. */
824 blob = malloc(blobsz);
825 if ( !blob )
826 {
827 ERROR("Unable to allocate %zu bytes for vcpu%u %s blob",
828 blobsz, vhdr->vcpu_id, rec_name);
829 goto out;
830 }
831
832 memcpy(blob, &vhdr->context, blobsz);
833
834 /* Stash sideways for later. */
835 switch ( rec->type )
836 {
837 #define RECSTORE(x, y) case REC_TYPE_X86_PV_ ## x: \
838 free(y); (y) = blob; (y ## sz) = blobsz; break
839
840 RECSTORE(VCPU_BASIC, vcpu->basic);
841 RECSTORE(VCPU_EXTENDED, vcpu->extd);
842 RECSTORE(VCPU_XSAVE, vcpu->xsave);
843 RECSTORE(VCPU_MSRS, vcpu->msr);
844 #undef RECSTORE
845 }
846
847 rc = 0;
848
849 out:
850 return rc;
851 }
852
853 /*
854 * Process a SHARED_INFO record from the stream.
855 */
handle_shared_info(struct xc_sr_context * ctx,struct xc_sr_record * rec)856 static int handle_shared_info(struct xc_sr_context *ctx,
857 struct xc_sr_record *rec)
858 {
859 xc_interface *xch = ctx->xch;
860 unsigned i;
861 int rc = -1;
862 shared_info_any_t *guest_shinfo = NULL;
863 const shared_info_any_t *old_shinfo = rec->data;
864
865 if ( !ctx->x86_pv.restore.seen_pv_info )
866 {
867 ERROR("Not yet received X86_PV_INFO record");
868 return -1;
869 }
870
871 if ( rec->length != PAGE_SIZE )
872 {
873 ERROR("X86_PV_SHARED_INFO record wrong size: length %u"
874 ", expected 4096", rec->length);
875 goto err;
876 }
877
878 guest_shinfo = xc_map_foreign_range(
879 xch, ctx->domid, PAGE_SIZE, PROT_READ | PROT_WRITE,
880 ctx->dominfo.shared_info_frame);
881 if ( !guest_shinfo )
882 {
883 PERROR("Failed to map Shared Info at mfn %#lx",
884 ctx->dominfo.shared_info_frame);
885 goto err;
886 }
887
888 MEMCPY_FIELD(guest_shinfo, old_shinfo, vcpu_info, ctx->x86_pv.width);
889 MEMCPY_FIELD(guest_shinfo, old_shinfo, arch, ctx->x86_pv.width);
890
891 SET_FIELD(guest_shinfo, arch.pfn_to_mfn_frame_list_list,
892 0, ctx->x86_pv.width);
893
894 MEMSET_ARRAY_FIELD(guest_shinfo, evtchn_pending, 0, ctx->x86_pv.width);
895 for ( i = 0; i < XEN_LEGACY_MAX_VCPUS; i++ )
896 SET_FIELD(guest_shinfo, vcpu_info[i].evtchn_pending_sel,
897 0, ctx->x86_pv.width);
898
899 MEMSET_ARRAY_FIELD(guest_shinfo, evtchn_mask, 0xff, ctx->x86_pv.width);
900
901 rc = 0;
902 err:
903
904 if ( guest_shinfo )
905 munmap(guest_shinfo, PAGE_SIZE);
906
907 return rc;
908 }
909
910 /* restore_ops function. */
x86_pv_pfn_is_valid(const struct xc_sr_context * ctx,xen_pfn_t pfn)911 static bool x86_pv_pfn_is_valid(const struct xc_sr_context *ctx, xen_pfn_t pfn)
912 {
913 return pfn <= ctx->x86_pv.max_pfn;
914 }
915
916 /* restore_ops function. */
x86_pv_set_page_type(struct xc_sr_context * ctx,xen_pfn_t pfn,unsigned long type)917 static void x86_pv_set_page_type(struct xc_sr_context *ctx, xen_pfn_t pfn,
918 unsigned long type)
919 {
920 assert(pfn <= ctx->x86_pv.max_pfn);
921
922 ctx->x86_pv.restore.pfn_types[pfn] = type;
923 }
924
925 /* restore_ops function. */
x86_pv_set_gfn(struct xc_sr_context * ctx,xen_pfn_t pfn,xen_pfn_t mfn)926 static void x86_pv_set_gfn(struct xc_sr_context *ctx, xen_pfn_t pfn,
927 xen_pfn_t mfn)
928 {
929 assert(pfn <= ctx->x86_pv.max_pfn);
930
931 if ( ctx->x86_pv.width == sizeof(uint64_t) )
932 /* 64 bit guest. Need to expand INVALID_MFN for 32 bit toolstacks. */
933 ((uint64_t *)ctx->x86_pv.p2m)[pfn] = mfn == INVALID_MFN ? ~0ULL : mfn;
934 else
935 /* 32 bit guest. Can truncate INVALID_MFN for 64 bit toolstacks. */
936 ((uint32_t *)ctx->x86_pv.p2m)[pfn] = mfn;
937 }
938
939 /*
940 * restore_ops function. Convert pfns back to mfns in pagetables. Possibly
941 * needs to populate new frames if a PTE is found referring to a frame which
942 * hasn't yet been seen from PAGE_DATA records.
943 */
x86_pv_localise_page(struct xc_sr_context * ctx,uint32_t type,void * page)944 static int x86_pv_localise_page(struct xc_sr_context *ctx,
945 uint32_t type, void *page)
946 {
947 xc_interface *xch = ctx->xch;
948 uint64_t *table = page;
949 uint64_t pte;
950 unsigned i, to_populate;
951 xen_pfn_t pfns[(PAGE_SIZE / sizeof(uint64_t))];
952
953 type &= XEN_DOMCTL_PFINFO_LTABTYPE_MASK;
954
955 /* Only page tables need localisation. */
956 if ( type < XEN_DOMCTL_PFINFO_L1TAB || type > XEN_DOMCTL_PFINFO_L4TAB )
957 return 0;
958
959 /* Check to see whether we need to populate any new frames. */
960 for ( i = 0, to_populate = 0; i < (PAGE_SIZE / sizeof(uint64_t)); ++i )
961 {
962 pte = table[i];
963
964 if ( pte & _PAGE_PRESENT )
965 {
966 xen_pfn_t pfn = pte_to_frame(pte);
967
968 #ifdef __i386__
969 if ( pfn == INVALID_MFN )
970 {
971 ERROR("PTE truncation detected. L%u[%u] = %016"PRIx64,
972 type >> XEN_DOMCTL_PFINFO_LTAB_SHIFT, i, pte);
973 errno = E2BIG;
974 return -1;
975 }
976 #endif
977
978 if ( pfn_to_mfn(ctx, pfn) == INVALID_MFN )
979 pfns[to_populate++] = pfn;
980 }
981 }
982
983 if ( to_populate && populate_pfns(ctx, to_populate, pfns, NULL) )
984 return -1;
985
986 for ( i = 0; i < (PAGE_SIZE / sizeof(uint64_t)); ++i )
987 {
988 pte = table[i];
989
990 if ( pte & _PAGE_PRESENT )
991 {
992 xen_pfn_t mfn, pfn;
993
994 pfn = pte_to_frame(pte);
995 mfn = pfn_to_mfn(ctx, pfn);
996
997 if ( !mfn_in_pseudophysmap(ctx, mfn) )
998 {
999 ERROR("Bad mfn for L%u[%u] - pte %"PRIx64,
1000 type >> XEN_DOMCTL_PFINFO_LTAB_SHIFT, i, pte);
1001 dump_bad_pseudophysmap_entry(ctx, mfn);
1002 errno = ERANGE;
1003 return -1;
1004 }
1005
1006 table[i] = merge_pte(pte, mfn);
1007 }
1008 }
1009
1010 return 0;
1011 }
1012
1013 /*
1014 * restore_ops function. Confirm that the incoming stream matches the type of
1015 * domain we are attempting to restore into.
1016 */
x86_pv_setup(struct xc_sr_context * ctx)1017 static int x86_pv_setup(struct xc_sr_context *ctx)
1018 {
1019 xc_interface *xch = ctx->xch;
1020 int rc;
1021
1022 if ( ctx->restore.guest_type != DHDR_TYPE_X86_PV )
1023 {
1024 ERROR("Unable to restore %s domain into an x86_pv domain",
1025 dhdr_type_to_str(ctx->restore.guest_type));
1026 return -1;
1027 }
1028 else if ( ctx->restore.guest_page_size != PAGE_SIZE )
1029 {
1030 ERROR("Invalid page size %d for x86_pv domains",
1031 ctx->restore.guest_page_size);
1032 return -1;
1033 }
1034
1035 rc = x86_pv_domain_info(ctx);
1036 if ( rc )
1037 return rc;
1038
1039 ctx->x86_pv.restore.nr_vcpus = ctx->dominfo.max_vcpu_id + 1;
1040 ctx->x86_pv.restore.vcpus = calloc(sizeof(struct xc_sr_x86_pv_restore_vcpu),
1041 ctx->x86_pv.restore.nr_vcpus);
1042 if ( !ctx->x86_pv.restore.vcpus )
1043 {
1044 errno = ENOMEM;
1045 return -1;
1046 }
1047
1048 rc = x86_pv_map_m2p(ctx);
1049 if ( rc )
1050 return rc;
1051
1052 return rc;
1053 }
1054
1055 /*
1056 * restore_ops function.
1057 */
x86_pv_process_record(struct xc_sr_context * ctx,struct xc_sr_record * rec)1058 static int x86_pv_process_record(struct xc_sr_context *ctx,
1059 struct xc_sr_record *rec)
1060 {
1061 switch ( rec->type )
1062 {
1063 case REC_TYPE_X86_PV_INFO:
1064 return handle_x86_pv_info(ctx, rec);
1065
1066 case REC_TYPE_X86_PV_P2M_FRAMES:
1067 return handle_x86_pv_p2m_frames(ctx, rec);
1068
1069 case REC_TYPE_X86_PV_VCPU_BASIC:
1070 case REC_TYPE_X86_PV_VCPU_EXTENDED:
1071 case REC_TYPE_X86_PV_VCPU_XSAVE:
1072 case REC_TYPE_X86_PV_VCPU_MSRS:
1073 return handle_x86_pv_vcpu_blob(ctx, rec);
1074
1075 case REC_TYPE_SHARED_INFO:
1076 return handle_shared_info(ctx, rec);
1077
1078 case REC_TYPE_TSC_INFO:
1079 return handle_tsc_info(ctx, rec);
1080
1081 default:
1082 return RECORD_NOT_PROCESSED;
1083 }
1084 }
1085
1086 /*
1087 * restore_ops function. Update the vcpu context in Xen, pin the pagetables,
1088 * rewrite the p2m and seed the grant table.
1089 */
x86_pv_stream_complete(struct xc_sr_context * ctx)1090 static int x86_pv_stream_complete(struct xc_sr_context *ctx)
1091 {
1092 xc_interface *xch = ctx->xch;
1093 int rc;
1094
1095 rc = update_vcpu_context(ctx);
1096 if ( rc )
1097 return rc;
1098
1099 rc = pin_pagetables(ctx);
1100 if ( rc )
1101 return rc;
1102
1103 rc = update_guest_p2m(ctx);
1104 if ( rc )
1105 return rc;
1106
1107 rc = xc_dom_gnttab_seed(xch, ctx->domid,
1108 ctx->restore.console_gfn,
1109 ctx->restore.xenstore_gfn,
1110 ctx->restore.console_domid,
1111 ctx->restore.xenstore_domid);
1112 if ( rc )
1113 {
1114 PERROR("Failed to seed grant table");
1115 return rc;
1116 }
1117
1118 return rc;
1119 }
1120
1121 /*
1122 * restore_ops function.
1123 */
x86_pv_cleanup(struct xc_sr_context * ctx)1124 static int x86_pv_cleanup(struct xc_sr_context *ctx)
1125 {
1126 free(ctx->x86_pv.p2m);
1127 free(ctx->x86_pv.p2m_pfns);
1128
1129 if ( ctx->x86_pv.restore.vcpus )
1130 {
1131 unsigned i;
1132
1133 for ( i = 0; i < ctx->x86_pv.restore.nr_vcpus; ++i )
1134 {
1135 struct xc_sr_x86_pv_restore_vcpu *vcpu =
1136 &ctx->x86_pv.restore.vcpus[i];
1137
1138 free(vcpu->basic);
1139 free(vcpu->extd);
1140 free(vcpu->xsave);
1141 free(vcpu->msr);
1142 }
1143
1144 free(ctx->x86_pv.restore.vcpus);
1145 }
1146
1147 free(ctx->x86_pv.restore.pfn_types);
1148
1149 if ( ctx->x86_pv.m2p )
1150 munmap(ctx->x86_pv.m2p, ctx->x86_pv.nr_m2p_frames * PAGE_SIZE);
1151
1152 return 0;
1153 }
1154
1155 struct xc_sr_restore_ops restore_ops_x86_pv =
1156 {
1157 .pfn_is_valid = x86_pv_pfn_is_valid,
1158 .pfn_to_gfn = pfn_to_mfn,
1159 .set_page_type = x86_pv_set_page_type,
1160 .set_gfn = x86_pv_set_gfn,
1161 .localise_page = x86_pv_localise_page,
1162 .setup = x86_pv_setup,
1163 .process_record = x86_pv_process_record,
1164 .stream_complete = x86_pv_stream_complete,
1165 .cleanup = x86_pv_cleanup,
1166 };
1167
1168 /*
1169 * Local variables:
1170 * mode: C
1171 * c-file-style: "BSD"
1172 * c-basic-offset: 4
1173 * tab-width: 4
1174 * indent-tabs-mode: nil
1175 * End:
1176 */
1177