1 /*
2  * Copyright (C) 2016 FUJITSU LIMITED
3  * Author: Wen Congyang <wency@cn.fujitsu.com>
4  *         Yang Hongyang <hongyang.yang@easystack.cn>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published
8  * by the Free Software Foundation; version 2.1 only. with the special
9  * exception on linking described in file LICENSE.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU Lesser General Public License for more details.
15  */
16 
17 #include "libxl_osdeps.h" /* must come before any other headers */
18 
19 #include "libxl_internal.h"
20 #include "libxl_sr_stream_format.h"
21 
22 typedef struct libxl__colo_restore_checkpoint_state libxl__colo_restore_checkpoint_state;
23 struct libxl__colo_restore_checkpoint_state {
24     libxl__domain_suspend_state dsps;
25     libxl__logdirty_switch lds;
26     libxl__colo_restore_state *crs;
27     libxl__stream_write_state sws;
28     int status;
29     bool preresume;
30     /* used for teardown */
31     int teardown_devices;
32     int saved_rc;
33     char *state_file;
34 
35     void (*callback)(libxl__egc *,
36                      libxl__colo_restore_checkpoint_state *,
37                      int);
38 };
39 
40 extern const libxl__checkpoint_device_instance_ops colo_restore_device_nic;
41 extern const libxl__checkpoint_device_instance_ops colo_restore_device_qdisk;
42 
43 static const libxl__checkpoint_device_instance_ops *colo_restore_ops[] = {
44     &colo_restore_device_nic,
45     &colo_restore_device_qdisk,
46     NULL,
47 };
48 
49 /* ===================== colo: common functions ===================== */
50 
colo_enable_logdirty(libxl__colo_restore_state * crs,libxl__egc * egc)51 static void colo_enable_logdirty(libxl__colo_restore_state *crs, libxl__egc *egc)
52 {
53     libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
54     libxl__colo_restore_checkpoint_state *crcs = crs->crcs;
55 
56     /* Convenience aliases */
57     const uint32_t domid = crs->domid;
58     libxl__logdirty_switch *const lds = &crcs->lds;
59 
60     EGC_GC;
61 
62     /* we need to know which pages are dirty to restore the guest */
63     if (xc_shadow_control(CTX->xch, domid,
64                           XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY,
65                           NULL, 0, NULL, 0, NULL) < 0) {
66         LOGD(ERROR, domid, "cannot enable secondary vm's logdirty");
67         lds->callback(egc, lds, ERROR_FAIL);
68         return;
69     }
70 
71     if (crs->hvm) {
72         libxl__domain_common_switch_qemu_logdirty(egc, domid, 1, lds);
73         return;
74     }
75 
76     lds->callback(egc, lds, 0);
77 }
78 
colo_disable_logdirty(libxl__colo_restore_state * crs,libxl__egc * egc)79 static void colo_disable_logdirty(libxl__colo_restore_state *crs,
80                                   libxl__egc *egc)
81 {
82     libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
83     libxl__colo_restore_checkpoint_state *crcs = crs->crcs;
84 
85     /* Convenience aliases */
86     const uint32_t domid = crs->domid;
87     libxl__logdirty_switch *const lds = &crcs->lds;
88 
89     EGC_GC;
90 
91     /* we need to know which pages are dirty to restore the guest */
92     if (xc_shadow_control(CTX->xch, domid, XEN_DOMCTL_SHADOW_OP_OFF,
93                           NULL, 0, NULL, 0, NULL) < 0)
94         LOGD(WARN, domid, "cannot disable secondary vm's logdirty");
95 
96     if (crs->hvm) {
97         libxl__domain_common_switch_qemu_logdirty(egc, domid, 0, lds);
98         return;
99     }
100 
101     lds->callback(egc, lds, 0);
102 }
103 
colo_resume_vm(libxl__egc * egc,libxl__colo_restore_checkpoint_state * crcs,int restore_device_model)104 static void colo_resume_vm(libxl__egc *egc,
105                            libxl__colo_restore_checkpoint_state *crcs,
106                            int restore_device_model)
107 {
108     libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs);
109     int rc;
110 
111     /* Convenience aliases */
112     libxl__colo_restore_state *const crs = crcs->crs;
113 
114     EGC_GC;
115 
116     if (!crs->saved_cb) {
117         /* TODO: sync mmu for hvm? */
118         if (restore_device_model) {
119             rc = libxl__qmp_restore(gc, crs->domid, crcs->state_file);
120             if (rc) {
121                 LOGD(ERROR, crs->domid,
122                      "cannot restore device model for secondary vm");
123                 crcs->callback(egc, crcs, rc);
124                 return;
125             }
126         }
127         rc = libxl__domain_resume_deprecated(gc, crs->domid, 0);
128         if (rc)
129             LOGD(ERROR, crs->domid, "cannot resume secondary vm");
130 
131         crcs->callback(egc, crcs, rc);
132         return;
133     }
134 
135     libxl__xc_domain_restore_done(egc, dcs, 0, 0, 0);
136 
137     return;
138 }
139 
init_device_subkind(libxl__checkpoint_devices_state * cds)140 static int init_device_subkind(libxl__checkpoint_devices_state *cds)
141 {
142     /* init device subkind-specific state in the libxl ctx */
143     int rc;
144     STATE_AO_GC(cds->ao);
145 
146     rc = init_subkind_colo_nic(cds);
147     if (rc) goto out;
148 
149     rc = init_subkind_qdisk(cds);
150     if (rc) {
151         cleanup_subkind_colo_nic(cds);
152         goto out;
153     }
154 
155     rc = 0;
156 out:
157     return rc;
158 }
159 
cleanup_device_subkind(libxl__checkpoint_devices_state * cds)160 static void cleanup_device_subkind(libxl__checkpoint_devices_state *cds)
161 {
162     /* cleanup device subkind-specific state in the libxl ctx */
163     STATE_AO_GC(cds->ao);
164 
165     cleanup_subkind_colo_nic(cds);
166     cleanup_subkind_qdisk(cds);
167 }
168 
169 /* ================ colo: setup restore environment ================ */
170 
171 static void libxl__colo_domain_create_cb(libxl__egc *egc,
172                                          libxl__domain_create_state *dcs,
173                                          int rc, uint32_t domid);
174 
init_dsps(libxl__domain_suspend_state * dsps)175 static int init_dsps(libxl__domain_suspend_state *dsps)
176 {
177     int rc = ERROR_FAIL;
178     libxl_domain_type type;
179 
180     STATE_AO_GC(dsps->ao);
181 
182     libxl__xswait_init(&dsps->pvcontrol);
183     libxl__ev_evtchn_init(&dsps->guest_evtchn);
184     libxl__ev_xswatch_init(&dsps->guest_watch);
185     libxl__ev_time_init(&dsps->guest_timeout);
186 
187     type = libxl__domain_type(gc, dsps->domid);
188     if (type == LIBXL_DOMAIN_TYPE_INVALID)
189         goto out;
190 
191     dsps->type = type;
192 
193     dsps->guest_evtchn.port = -1;
194     dsps->guest_evtchn_lockfd = -1;
195     dsps->guest_responded = 0;
196     dsps->dm_savefile = libxl__device_model_savefile(gc, dsps->domid);
197 
198     /* Secondary vm is not created, so we cannot get evtchn port */
199 
200     rc = 0;
201 
202 out:
203     return rc;
204 }
205 
206 /*
207  * checkpoint callbacks are called in the following order:
208  * 1. resume
209  * 2. wait checkpoint
210  * 3. suspend
211  * 4. checkpoint
212  */
213 static void libxl__colo_restore_domain_resume_callback(void *data);
214 static void libxl__colo_restore_domain_wait_checkpoint_callback(void *data);
215 static void libxl__colo_restore_domain_suspend_callback(void *data);
216 static void libxl__colo_restore_domain_checkpoint_callback(void *data);
217 
libxl__colo_restore_setup(libxl__egc * egc,libxl__colo_restore_state * crs)218 void libxl__colo_restore_setup(libxl__egc *egc,
219                                libxl__colo_restore_state *crs)
220 {
221     libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
222     libxl__colo_restore_checkpoint_state *crcs;
223     int rc = ERROR_FAIL;
224 
225     /* Convenience aliases */
226     libxl__srm_restore_autogen_callbacks *const callbacks =
227         &dcs->srs.shs.callbacks.restore.a;
228     const int domid = crs->domid;
229 
230     STATE_AO_GC(crs->ao);
231 
232     GCNEW(crcs);
233     crs->crcs = crcs;
234     crcs->crs = crs;
235     crs->qdisk_setuped = false;
236     crs->qdisk_used = false;
237     if (dcs->colo_proxy_script)
238         crs->colo_proxy_script = libxl__strdup(gc, dcs->colo_proxy_script);
239     else
240         crs->colo_proxy_script = GCSPRINTF("%s/colo-proxy-setup",
241                                            libxl__xen_script_dir_path());
242 
243     /* setup dsps */
244     crcs->dsps.ao = ao;
245     crcs->dsps.domid = domid;
246     if (init_dsps(&crcs->dsps))
247         goto out;
248 
249     callbacks->postcopy = libxl__colo_restore_domain_resume_callback;
250     callbacks->wait_checkpoint = libxl__colo_restore_domain_wait_checkpoint_callback;
251     callbacks->suspend = libxl__colo_restore_domain_suspend_callback;
252     callbacks->checkpoint = libxl__colo_restore_domain_checkpoint_callback;
253 
254     /*
255      * Secondary vm is running in colo mode, so we need to call
256      * libxl__xc_domain_restore_done() to create secondary vm.
257      * But we will exit in domain_create_cb(). So replace the
258      * callback here.
259      */
260     crs->saved_cb = dcs->callback;
261     dcs->callback = libxl__colo_domain_create_cb;
262     crcs->state_file = GCSPRINTF(LIBXL_DEVICE_MODEL_RESTORE_FILE".%d", domid);
263     crcs->status = LIBXL_COLO_SETUPED;
264 
265     libxl__logdirty_init(&crcs->lds);
266     crcs->lds.ao = ao;
267 
268     crcs->sws.fd = crs->send_back_fd;
269     crcs->sws.ao = ao;
270     crcs->sws.back_channel = true;
271 
272     dcs->cds.concrete_data = crs;
273 
274     libxl__stream_write_start(egc, &crcs->sws);
275 
276     rc = 0;
277 
278 out:
279     crs->callback(egc, crs, rc);
280     return;
281 }
282 
libxl__colo_domain_create_cb(libxl__egc * egc,libxl__domain_create_state * dcs,int rc,uint32_t domid)283 static void libxl__colo_domain_create_cb(libxl__egc *egc,
284                                          libxl__domain_create_state *dcs,
285                                          int rc, uint32_t domid)
286 {
287     libxl__colo_restore_checkpoint_state *crcs = dcs->crs.crcs;
288 
289     crcs->callback(egc, crcs, rc);
290 }
291 
292 /* ================ colo: teardown restore environment ================ */
293 
294 static void colo_restore_teardown_devices_done(libxl__egc *egc,
295     libxl__checkpoint_devices_state *cds, int rc);
296 static void do_failover(libxl__egc *egc, libxl__colo_restore_state *crs);
297 static void do_failover_done(libxl__egc *egc,
298                              libxl__colo_restore_checkpoint_state* crcs,
299                              int rc);
300 static void colo_disable_logdirty_done(libxl__egc *egc,
301                                        libxl__logdirty_switch *lds,
302                                        int rc);
303 static void libxl__colo_restore_teardown_done(libxl__egc *egc,
304                                               libxl__colo_restore_state *crs,
305                                               int rc);
306 
libxl__colo_restore_teardown(libxl__egc * egc,void * dcs_void,int ret,int retval,int errnoval)307 void libxl__colo_restore_teardown(libxl__egc *egc, void *dcs_void,
308                                   int ret, int retval, int errnoval)
309 {
310     libxl__domain_create_state *dcs = dcs_void;
311     libxl__colo_restore_checkpoint_state *crcs = dcs->crs.crcs;
312     int rc = 1;
313 
314     /* convenience aliases */
315     libxl__colo_restore_state *const crs = &dcs->crs;
316     EGC_GC;
317 
318     if (ret == 0 && retval == 0)
319         rc = 0;
320 
321     LOGD(INFO, crs->domid, "%s", rc ? "colo fails" : "failover");
322 
323     libxl__stream_write_abort(egc, &crcs->sws, 1);
324     if (crs->saved_cb) {
325         /* crcs->status is LIBXL_COLO_SETUPED */
326         dcs->srs.completion_callback = NULL;
327     }
328     libxl__xc_domain_restore_done(egc, dcs, ret, retval, errnoval);
329 
330     if (crs->qdisk_setuped) {
331         libxl__qmp_stop_replication(gc, crs->domid, false);
332         crs->qdisk_setuped = false;
333     }
334 
335     crcs->saved_rc = rc;
336     if (!crcs->teardown_devices) {
337         colo_restore_teardown_devices_done(egc, &dcs->cds, 0);
338         return;
339     }
340 
341     dcs->cds.callback = colo_restore_teardown_devices_done;
342     libxl__checkpoint_devices_teardown(egc, &dcs->cds);
343 }
344 
colo_restore_teardown_devices_done(libxl__egc * egc,libxl__checkpoint_devices_state * cds,int rc)345 static void colo_restore_teardown_devices_done(libxl__egc *egc,
346     libxl__checkpoint_devices_state *cds, int rc)
347 {
348     libxl__colo_restore_state *crs = cds->concrete_data;
349     libxl__colo_restore_checkpoint_state *crcs = crs->crcs;
350     libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
351 
352     EGC_GC;
353 
354     if (rc)
355         LOGD(ERROR, cds->domid, "COLO: failed to teardown device for guest,"
356             " rc %d", rc);
357 
358     if (crcs->teardown_devices)
359         cleanup_device_subkind(cds);
360 
361     colo_proxy_teardown(&crs->cps);
362 
363     rc = crcs->saved_rc;
364     if (!rc) {
365         crcs->callback = do_failover_done;
366         do_failover(egc, crs);
367         return;
368     }
369 
370     libxl__colo_restore_teardown_done(egc, crs, rc);
371 }
372 
do_failover(libxl__egc * egc,libxl__colo_restore_state * crs)373 static void do_failover(libxl__egc *egc, libxl__colo_restore_state *crs)
374 {
375     libxl__colo_restore_checkpoint_state *crcs = crs->crcs;
376 
377     /* Convenience aliases */
378     const int status = crcs->status;
379     libxl__logdirty_switch *const lds = &crcs->lds;
380 
381     EGC_GC;
382 
383     switch(status) {
384     case LIBXL_COLO_SETUPED:
385         /*
386          * We will come here only when reading emulator xenstore data or
387          * emulator context fails, and libxl__xc_domain_restore_done()
388          * is not called. In this case, the migration is not finished,
389          * so we cannot do failover.
390          */
391         LOGD(ERROR, crs->domid, "migration fails");
392         crcs->callback(egc, crcs, ERROR_FAIL);
393         return;
394     case LIBXL_COLO_SUSPENDED:
395     case LIBXL_COLO_RESUMED:
396         /* disable logdirty first */
397         lds->callback = colo_disable_logdirty_done;
398         colo_disable_logdirty(crs, egc);
399         return;
400     default:
401         LOGD(ERROR, crs->domid, "invalid status: %d", status);
402         crcs->callback(egc, crcs, ERROR_FAIL);
403     }
404 }
405 
do_failover_done(libxl__egc * egc,libxl__colo_restore_checkpoint_state * crcs,int rc)406 static void do_failover_done(libxl__egc *egc,
407                              libxl__colo_restore_checkpoint_state* crcs,
408                              int rc)
409 {
410     libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs);
411 
412     /* Convenience aliases */
413     libxl__colo_restore_state *const crs = crcs->crs;
414 
415     EGC_GC;
416 
417     if (rc)
418         LOGD(ERROR, crs->domid, "cannot do failover");
419 
420     libxl__colo_restore_teardown_done(egc, crs, rc);
421 }
422 
colo_disable_logdirty_done(libxl__egc * egc,libxl__logdirty_switch * lds,int rc)423 static void colo_disable_logdirty_done(libxl__egc *egc,
424                                        libxl__logdirty_switch *lds,
425                                        int rc)
426 {
427     libxl__colo_restore_checkpoint_state *crcs = CONTAINER_OF(lds, *crcs, lds);
428 
429     EGC_GC;
430 
431     if (rc)
432         LOGD(WARN, crcs->crs->domid, "cannot disable logdirty");
433 
434     if (crcs->status == LIBXL_COLO_SUSPENDED) {
435         /*
436          * failover when reading state from master, so no need to
437          * call libxl__qmp_restore().
438          */
439         colo_resume_vm(egc, crcs, 0);
440         return;
441     }
442 
443     /* If we cannot disable logdirty, we still can do failover */
444     crcs->callback(egc, crcs, 0);
445 }
446 
libxl__colo_restore_teardown_done(libxl__egc * egc,libxl__colo_restore_state * crs,int rc)447 static void libxl__colo_restore_teardown_done(libxl__egc *egc,
448                                               libxl__colo_restore_state *crs,
449                                               int rc)
450 {
451     libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
452     EGC_GC;
453 
454     /* convenience aliases */
455     const int domid = crs->domid;
456     const libxl_ctx *const ctx = libxl__gc_owner(gc);
457     xc_interface *const xch = ctx->xch;
458 
459     if (!rc)
460         /* failover, no need to destroy the secondary vm */
461         goto out;
462 
463     xc_domain_destroy(xch, domid);
464 
465 out:
466     if (crs->saved_cb) {
467         dcs->callback = crs->saved_cb;
468         crs->saved_cb = NULL;
469     }
470 
471     dcs->callback(egc, dcs, rc, crs->domid);
472 }
473 
474 static void colo_common_write_stream_done(libxl__egc *egc,
475                                           libxl__stream_write_state *stream,
476                                           int rc);
477 static void colo_common_read_stream_done(libxl__egc *egc,
478                                          libxl__stream_read_state *stream,
479                                          int rc);
480 
481 /* ======================== colo: checkpoint ======================= */
482 
483 /*
484  * Do the following things when resuming secondary vm:
485  *  1. read emulator xenstore data
486  *  2. read emulator context
487  *  3. REC_TYPE_CHECKPOINT_END
488  */
libxl__colo_restore_domain_checkpoint_callback(void * data)489 static void libxl__colo_restore_domain_checkpoint_callback(void *data)
490 {
491     libxl__save_helper_state *shs = data;
492     libxl__stream_read_state *srs = CONTAINER_OF(shs, *srs, shs);
493     libxl__domain_create_state *dcs = CONTAINER_OF(srs, *dcs, srs);
494     libxl__colo_restore_checkpoint_state *crcs = dcs->crs.crcs;
495 
496     crcs->callback = NULL;
497     dcs->srs.checkpoint_callback = colo_common_read_stream_done;
498     libxl__stream_read_start_checkpoint(shs->egc, &dcs->srs);
499 }
500 
501 /* ===================== colo: resume secondary vm ===================== */
502 
503 /*
504  * Do the following things when resuming secondary vm the first time:
505  *  1. resume secondary vm
506  *  2. enable log dirty
507  *  3. setup checkpoint devices
508  *  4. write CHECKPOINT_SVM_READY
509  *  5. unpause secondary vm
510  *  6. write CHECKPOINT_SVM_RESUMED
511  *
512  * Do the following things when resuming secondary vm:
513  *  1. write CHECKPOINT_SVM_READY
514  *  2. resume secondary vm
515  *  3. write CHECKPOINT_SVM_RESUMED
516  */
517 static void colo_send_svm_ready(libxl__egc *egc,
518                                 libxl__colo_restore_checkpoint_state *crcs);
519 static void colo_send_svm_ready_done(libxl__egc *egc,
520                                      libxl__colo_restore_checkpoint_state *crcs,
521                                      int rc);
522 static void colo_restore_preresume_cb(libxl__egc *egc,
523                                       libxl__checkpoint_devices_state *cds,
524                                       int rc);
525 static void colo_restore_resume_vm(libxl__egc *egc,
526                                    libxl__colo_restore_checkpoint_state *crcs);
527 static void colo_resume_vm_done(libxl__egc *egc,
528                                 libxl__colo_restore_checkpoint_state *crcs,
529                                 int rc);
530 static void colo_write_svm_resumed(libxl__egc *egc,
531                                    libxl__colo_restore_checkpoint_state *crcs);
532 static void colo_enable_logdirty_done(libxl__egc *egc,
533                                       libxl__logdirty_switch *lds,
534                                       int retval);
535 static void colo_reenable_logdirty(libxl__egc *egc,
536                                    libxl__logdirty_switch *lds,
537                                    int rc);
538 static void colo_reenable_logdirty_done(libxl__egc *egc,
539                                         libxl__logdirty_switch *lds,
540                                         int rc);
541 static void colo_setup_checkpoint_devices(libxl__egc *egc,
542                                           libxl__colo_restore_state *crs);
543 static void colo_restore_setup_cds_done(libxl__egc *egc,
544                                         libxl__checkpoint_devices_state *cds,
545                                         int rc);
546 static void colo_unpause_svm(libxl__egc *egc,
547                              libxl__colo_restore_checkpoint_state *crcs);
548 
libxl__colo_restore_domain_resume_callback(void * data)549 static void libxl__colo_restore_domain_resume_callback(void *data)
550 {
551     libxl__save_helper_state *shs = data;
552     libxl__stream_read_state *srs = CONTAINER_OF(shs, *srs, shs);
553     libxl__domain_create_state *dcs = CONTAINER_OF(srs, *dcs, srs);
554     libxl__colo_restore_checkpoint_state *crcs = dcs->crs.crcs;
555 
556     if (crcs->teardown_devices)
557         colo_send_svm_ready(shs->egc, crcs);
558     else
559         colo_restore_resume_vm(shs->egc, crcs);
560 }
561 
colo_send_svm_ready(libxl__egc * egc,libxl__colo_restore_checkpoint_state * crcs)562 static void colo_send_svm_ready(libxl__egc *egc,
563                                libxl__colo_restore_checkpoint_state *crcs)
564 {
565     libxl_sr_checkpoint_state srcs = { .id = CHECKPOINT_SVM_READY };
566 
567     crcs->callback = colo_send_svm_ready_done;
568     crcs->sws.checkpoint_callback = colo_common_write_stream_done;
569     libxl__stream_write_checkpoint_state(egc, &crcs->sws, &srcs);
570 }
571 
colo_send_svm_ready_done(libxl__egc * egc,libxl__colo_restore_checkpoint_state * crcs,int rc)572 static void colo_send_svm_ready_done(libxl__egc *egc,
573                                      libxl__colo_restore_checkpoint_state *crcs,
574                                      int rc)
575 {
576     libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs);
577 
578     /* Convenience aliases */
579     libxl__checkpoint_devices_state *cds = &dcs->cds;
580 
581     if (!crcs->preresume) {
582         crcs->preresume = true;
583         colo_unpause_svm(egc, crcs);
584         return;
585     }
586 
587     cds->callback = colo_restore_preresume_cb;
588     libxl__checkpoint_devices_preresume(egc, cds);
589 }
590 
colo_restore_preresume_cb(libxl__egc * egc,libxl__checkpoint_devices_state * cds,int rc)591 static void colo_restore_preresume_cb(libxl__egc *egc,
592                                       libxl__checkpoint_devices_state *cds,
593                                       int rc)
594 {
595     libxl__colo_restore_state *crs = cds->concrete_data;
596     libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
597     libxl__colo_restore_checkpoint_state *crcs = crs->crcs;
598 
599     /* Convenience aliases */
600     libxl__save_helper_state *const shs = &dcs->srs.shs;
601 
602     EGC_GC;
603 
604     if (rc) {
605         LOGD(ERROR, crs->domid, "preresume fails");
606         goto out;
607     }
608 
609     if (crs->qdisk_setuped) {
610         if (libxl__qmp_colo_do_checkpoint(gc, crs->domid)) {
611             LOGD(ERROR, crs->domid, "doing checkpoint fails");
612             goto out;
613         }
614     }
615 
616     if (!crs->cps.is_userspace_proxy)
617         colo_proxy_preresume(&crs->cps);
618 
619     colo_restore_resume_vm(egc, crcs);
620 
621     return;
622 
623 out:
624     libxl__xc_domain_saverestore_async_callback_done(egc, shs, 0);
625 }
626 
colo_restore_resume_vm(libxl__egc * egc,libxl__colo_restore_checkpoint_state * crcs)627 static void colo_restore_resume_vm(libxl__egc *egc,
628                                    libxl__colo_restore_checkpoint_state *crcs)
629 {
630 
631     crcs->callback = colo_resume_vm_done;
632     colo_resume_vm(egc, crcs, 1);
633 }
634 
colo_resume_vm_done(libxl__egc * egc,libxl__colo_restore_checkpoint_state * crcs,int rc)635 static void colo_resume_vm_done(libxl__egc *egc,
636                                 libxl__colo_restore_checkpoint_state *crcs,
637                                 int rc)
638 {
639     libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs);
640 
641     /* Convenience aliases */
642     libxl__colo_restore_state *const crs = crcs->crs;
643     libxl__logdirty_switch *const lds = &crcs->lds;
644     libxl__save_helper_state *const shs = &dcs->srs.shs;
645 
646     EGC_GC;
647 
648     if (rc) {
649         LOGD(ERROR, crs->domid, "cannot resume secondary vm");
650         goto out;
651     }
652 
653     crcs->status = LIBXL_COLO_RESUMED;
654 
655     colo_proxy_postresume(&crs->cps);
656 
657     /* avoid calling stream->completion_callback() more than once */
658     if (crs->saved_cb) {
659         dcs->callback = crs->saved_cb;
660         crs->saved_cb = NULL;
661 
662         dcs->srs.completion_callback = NULL;
663 
664         lds->callback = colo_enable_logdirty_done;
665         colo_enable_logdirty(crs, egc);
666         return;
667     }
668 
669     colo_write_svm_resumed(egc, crcs);
670     return;
671 
672 out:
673     libxl__xc_domain_saverestore_async_callback_done(egc, shs, 0);
674 }
675 
colo_write_svm_resumed(libxl__egc * egc,libxl__colo_restore_checkpoint_state * crcs)676 static void colo_write_svm_resumed(libxl__egc *egc,
677                                    libxl__colo_restore_checkpoint_state *crcs)
678 {
679     libxl_sr_checkpoint_state srcs = { .id = CHECKPOINT_SVM_RESUMED };
680 
681     crcs->callback = NULL;
682     crcs->sws.checkpoint_callback = colo_common_write_stream_done;
683     libxl__stream_write_checkpoint_state(egc, &crcs->sws, &srcs);
684 }
685 
colo_enable_logdirty_done(libxl__egc * egc,libxl__logdirty_switch * lds,int rc)686 static void colo_enable_logdirty_done(libxl__egc *egc,
687                                       libxl__logdirty_switch *lds,
688                                       int rc)
689 {
690     libxl__colo_restore_checkpoint_state *crcs = CONTAINER_OF(lds, *crcs, lds);
691 
692     /* Convenience aliases */
693     libxl__colo_restore_state *const crs = crcs->crs;
694 
695     EGC_GC;
696 
697     if (rc) {
698         /*
699          * log-dirty already enabled? There's no test op,
700          * so attempt to disable then reenable it
701          */
702         lds->callback = colo_reenable_logdirty;
703         colo_disable_logdirty(crs, egc);
704         return;
705     }
706 
707     colo_setup_checkpoint_devices(egc, crs);
708 }
709 
colo_reenable_logdirty(libxl__egc * egc,libxl__logdirty_switch * lds,int rc)710 static void colo_reenable_logdirty(libxl__egc *egc,
711                                    libxl__logdirty_switch *lds,
712                                    int rc)
713 {
714     libxl__colo_restore_checkpoint_state *crcs = CONTAINER_OF(lds, *crcs, lds);
715     libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs);
716 
717     /* Convenience aliases */
718     libxl__colo_restore_state *const crs = crcs->crs;
719     libxl__save_helper_state *const shs = &dcs->srs.shs;
720 
721     EGC_GC;
722 
723     if (rc) {
724         LOGD(ERROR, crs->domid, "cannot enable logdirty");
725         goto out;
726     }
727 
728     lds->callback = colo_reenable_logdirty_done;
729     colo_enable_logdirty(crs, egc);
730 
731     return;
732 
733 out:
734     libxl__xc_domain_saverestore_async_callback_done(egc, shs, 0);
735 }
736 
colo_reenable_logdirty_done(libxl__egc * egc,libxl__logdirty_switch * lds,int rc)737 static void colo_reenable_logdirty_done(libxl__egc *egc,
738                                         libxl__logdirty_switch *lds,
739                                         int rc)
740 {
741     libxl__colo_restore_checkpoint_state *crcs = CONTAINER_OF(lds, *crcs, lds);
742     libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs);
743 
744     /* Convenience aliases */
745     libxl__save_helper_state *const shs = &dcs->srs.shs;
746 
747     EGC_GC;
748 
749     if (rc) {
750         LOGD(ERROR, crcs->crs->domid, "cannot enable logdirty");
751         goto out;
752     }
753 
754     colo_setup_checkpoint_devices(egc, crcs->crs);
755 
756     return;
757 
758 out:
759     libxl__xc_domain_saverestore_async_callback_done(egc, shs, 0);
760 }
761 
762 /*
763  * We cannot setup checkpoint devices in libxl__colo_restore_setup(),
764  * because the guest is not ready.
765  */
colo_setup_checkpoint_devices(libxl__egc * egc,libxl__colo_restore_state * crs)766 static void colo_setup_checkpoint_devices(libxl__egc *egc,
767                                           libxl__colo_restore_state *crs)
768 {
769     libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
770     libxl__colo_restore_checkpoint_state *crcs = crs->crcs;
771 
772     /* Convenience aliases */
773     libxl__checkpoint_devices_state *cds = &dcs->cds;
774     libxl__save_helper_state *const shs = &dcs->srs.shs;
775 
776     STATE_AO_GC(crs->ao);
777 
778     if (crs->cps.is_userspace_proxy)
779         cds->device_kind_flags = (1 << LIBXL__DEVICE_KIND_VBD);
780     else
781         cds->device_kind_flags = (1 << LIBXL__DEVICE_KIND_VIF) |
782                                  (1 << LIBXL__DEVICE_KIND_VBD);
783 
784     cds->callback = colo_restore_setup_cds_done;
785     cds->ao = ao;
786     cds->domid = crs->domid;
787     cds->ops = colo_restore_ops;
788 
789     crs->cps.ao = ao;
790     if (!crs->cps.is_userspace_proxy) {
791         if (colo_proxy_setup(&crs->cps)) {
792             LOGD(ERROR, cds->domid, "COLO: failed to setup colo proxy for guest");
793             goto out;
794         }
795     }
796 
797     if (init_device_subkind(cds))
798         goto out;
799 
800     crcs->teardown_devices = 1;
801 
802     libxl__checkpoint_devices_setup(egc, cds);
803     return;
804 
805 out:
806     libxl__xc_domain_saverestore_async_callback_done(egc, shs, 0);
807 }
808 
colo_restore_setup_cds_done(libxl__egc * egc,libxl__checkpoint_devices_state * cds,int rc)809 static void colo_restore_setup_cds_done(libxl__egc *egc,
810                                         libxl__checkpoint_devices_state *cds,
811                                         int rc)
812 {
813     libxl__colo_restore_state *crs = cds->concrete_data;
814     libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
815     libxl__colo_restore_checkpoint_state *crcs = crs->crcs;
816 
817     /* Convenience aliases */
818     libxl__save_helper_state *const shs = &dcs->srs.shs;
819 
820     EGC_GC;
821 
822     if (rc) {
823         LOGD(ERROR, cds->domid, "COLO: failed to setup device for guest");
824         goto out;
825     }
826 
827     if (crs->qdisk_used && !crs->qdisk_setuped) {
828         if (libxl__qmp_start_replication(gc, crs->domid, false)) {
829             LOGD(ERROR, cds->domid, "starting replication fails");
830             goto out;
831         }
832         crs->qdisk_setuped = true;
833     }
834 
835     colo_send_svm_ready(egc, crcs);
836 
837     return;
838 
839 out:
840     libxl__xc_domain_saverestore_async_callback_done(egc, shs, 0);
841 }
842 
colo_unpause_svm(libxl__egc * egc,libxl__colo_restore_checkpoint_state * crcs)843 static void colo_unpause_svm(libxl__egc *egc,
844                              libxl__colo_restore_checkpoint_state *crcs)
845 {
846     libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs);
847     int rc;
848 
849     /* Convenience aliases */
850     const uint32_t domid = crcs->crs->domid;
851     libxl__save_helper_state *const shs = &dcs->srs.shs;
852 
853     EGC_GC;
854 
855     /* We have enabled secondary vm's logdirty, so we can unpause it now */
856     rc = libxl__domain_unpause_deprecated(gc, domid);
857     if (rc) {
858         LOGD(ERROR, domid, "cannot unpause secondary vm");
859         goto out;
860     }
861 
862     colo_write_svm_resumed(egc, crcs);
863 
864     return;
865 
866 out:
867     libxl__xc_domain_saverestore_async_callback_done(egc, shs, 0);
868 }
869 
870 /* ===================== colo: wait new checkpoint ===================== */
871 
872 static void colo_restore_commit_cb(libxl__egc *egc,
873                                    libxl__checkpoint_devices_state *cds,
874                                    int rc);
875 static void colo_stream_read_done(libxl__egc *egc,
876                                   libxl__colo_restore_checkpoint_state *crcs,
877                                   int real_size);
878 
libxl__colo_restore_domain_wait_checkpoint_callback(void * data)879 static void libxl__colo_restore_domain_wait_checkpoint_callback(void *data)
880 {
881     libxl__save_helper_state *shs = data;
882     libxl__stream_read_state *srs = CONTAINER_OF(shs, *srs, shs);
883     libxl__domain_create_state *dcs = CONTAINER_OF(srs, *dcs, srs);
884 
885     /* Convenience aliases */
886     libxl__checkpoint_devices_state *cds = &dcs->cds;
887 
888     cds->callback = colo_restore_commit_cb;
889     libxl__checkpoint_devices_commit(shs->egc, cds);
890 }
891 
colo_restore_commit_cb(libxl__egc * egc,libxl__checkpoint_devices_state * cds,int rc)892 static void colo_restore_commit_cb(libxl__egc *egc,
893                                    libxl__checkpoint_devices_state *cds,
894                                    int rc)
895 {
896     libxl__colo_restore_state *crs = cds->concrete_data;
897     libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
898     libxl__colo_restore_checkpoint_state *crcs = crs->crcs;
899 
900     EGC_GC;
901 
902     if (rc) {
903         LOGD(ERROR, crs->domid, "commit fails");
904         goto out;
905     }
906 
907     crcs->callback = colo_stream_read_done;
908     dcs->srs.checkpoint_callback = colo_common_read_stream_done;
909     libxl__stream_read_checkpoint_state(egc, &dcs->srs);
910 
911     return;
912 
913 out:
914     libxl__xc_domain_saverestore_async_callback_done(egc, &dcs->srs.shs, 0);
915 }
916 
colo_stream_read_done(libxl__egc * egc,libxl__colo_restore_checkpoint_state * crcs,int id)917 static void colo_stream_read_done(libxl__egc *egc,
918                                   libxl__colo_restore_checkpoint_state *crcs,
919                                   int id)
920 {
921     libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs);
922     int ok = 0;
923 
924     EGC_GC;
925 
926     if (id != CHECKPOINT_NEW) {
927         LOGD(ERROR, crcs->crs->domid, "invalid section: %d", id);
928         goto out;
929     }
930 
931     ok = 1;
932 
933 out:
934     libxl__xc_domain_saverestore_async_callback_done(egc, &dcs->srs.shs, ok);
935 }
936 
937 /* ===================== colo: suspend secondary vm ===================== */
938 
939 /*
940  * Do the following things when resuming secondary vm:
941  *  1. suspend secondary vm
942  *  2. send CHECKPOINT_SVM_SUSPENDED
943  */
944 static void colo_suspend_vm_done(libxl__egc *egc,
945                                  libxl__domain_suspend_state *dsps,
946                                  int ok);
947 static void colo_restore_postsuspend_cb(libxl__egc *egc,
948                                         libxl__checkpoint_devices_state *cds,
949                                         int rc);
950 
libxl__colo_restore_domain_suspend_callback(void * data)951 static void libxl__colo_restore_domain_suspend_callback(void *data)
952 {
953     libxl__save_helper_state *shs = data;
954     libxl__stream_read_state *srs = CONTAINER_OF(shs, *srs, shs);
955     libxl__domain_create_state *dcs = CONTAINER_OF(srs, *dcs, srs);
956     libxl__colo_restore_checkpoint_state *crcs = dcs->crs.crcs;
957 
958     STATE_AO_GC(dcs->ao);
959 
960     /* Convenience aliases */
961     libxl__domain_suspend_state *const dsps = &crcs->dsps;
962 
963     /* suspend secondary vm */
964     dsps->callback_common_done = colo_suspend_vm_done;
965 
966     libxl__domain_suspend(shs->egc, dsps);
967 }
968 
colo_suspend_vm_done(libxl__egc * egc,libxl__domain_suspend_state * dsps,int rc)969 static void colo_suspend_vm_done(libxl__egc *egc,
970                                  libxl__domain_suspend_state *dsps,
971                                  int rc)
972 {
973     libxl__colo_restore_checkpoint_state *crcs = CONTAINER_OF(dsps, *crcs, dsps);
974     libxl__colo_restore_state *crs = crcs->crs;
975     libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
976 
977     /* Convenience aliases */
978     libxl__checkpoint_devices_state *cds = &dcs->cds;
979 
980     EGC_GC;
981 
982     if (rc) {
983         LOGD(ERROR, crs->domid, "cannot suspend secondary vm");
984         goto out;
985     }
986 
987     crcs->status = LIBXL_COLO_SUSPENDED;
988 
989     if (libxl__qmp_query_xen_replication_status(gc, crs->domid)) {
990         LOGD(ERROR, crs->domid, "replication error occurs when secondary vm is running");
991         goto out;
992     }
993 
994     cds->callback = colo_restore_postsuspend_cb;
995     libxl__checkpoint_devices_postsuspend(egc, cds);
996 
997     return;
998 
999 out:
1000     libxl__xc_domain_saverestore_async_callback_done(egc, &dcs->srs.shs, 0);
1001 }
1002 
colo_restore_postsuspend_cb(libxl__egc * egc,libxl__checkpoint_devices_state * cds,int rc)1003 static void colo_restore_postsuspend_cb(libxl__egc *egc,
1004                                         libxl__checkpoint_devices_state *cds,
1005                                         int rc)
1006 {
1007     libxl__colo_restore_state *crs = cds->concrete_data;
1008     libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
1009     libxl__colo_restore_checkpoint_state *crcs = crs->crcs;
1010     libxl_sr_checkpoint_state srcs = { .id = CHECKPOINT_SVM_SUSPENDED };
1011 
1012     EGC_GC;
1013 
1014     if (rc) {
1015         LOGD(ERROR, crs->domid, "postsuspend fails");
1016         goto out;
1017     }
1018 
1019     crcs->callback = NULL;
1020     crcs->sws.checkpoint_callback = colo_common_write_stream_done;
1021     libxl__stream_write_checkpoint_state(egc, &crcs->sws, &srcs);
1022 
1023     return;
1024 
1025 out:
1026     libxl__xc_domain_saverestore_async_callback_done(egc, &dcs->srs.shs, !rc);
1027 }
1028 
1029 /* ===================== colo: common callback ===================== */
1030 
colo_common_write_stream_done(libxl__egc * egc,libxl__stream_write_state * stream,int rc)1031 static void colo_common_write_stream_done(libxl__egc *egc,
1032                                           libxl__stream_write_state *stream,
1033                                           int rc)
1034 {
1035     libxl__colo_restore_checkpoint_state *crcs =
1036         CONTAINER_OF(stream, *crcs, sws);
1037     libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs);
1038     int ok;
1039 
1040     EGC_GC;
1041 
1042     if (rc < 0) {
1043         /* TODO: it may be a internal error, but we don't know */
1044         LOGD(ERROR, crcs->crs->domid, "sending data fails");
1045         ok = 2;
1046         goto out;
1047     }
1048 
1049     if (!crcs->callback) {
1050         /* Everythins is OK */
1051         ok = 1;
1052         goto out;
1053     }
1054 
1055     crcs->callback(egc, crcs, 0);
1056 
1057     return;
1058 
1059 out:
1060     libxl__xc_domain_saverestore_async_callback_done(egc, &dcs->srs.shs, ok);
1061 }
1062 
colo_common_read_stream_done(libxl__egc * egc,libxl__stream_read_state * stream,int rc)1063 static void colo_common_read_stream_done(libxl__egc *egc,
1064                                          libxl__stream_read_state *stream,
1065                                          int rc)
1066 {
1067     libxl__domain_create_state *dcs = CONTAINER_OF(stream, *dcs, srs);
1068     libxl__colo_restore_checkpoint_state *crcs = dcs->crs.crcs;
1069     int ok;
1070 
1071     EGC_GC;
1072 
1073     if (rc < 0) {
1074         /* TODO: it may be a internal error, but we don't know */
1075         LOGD(ERROR, crcs->crs->domid, "reading data fails");
1076         ok = 2;
1077         goto out;
1078     }
1079 
1080     if (!crcs->callback) {
1081         /* Everythins is OK */
1082         ok = 1;
1083         goto out;
1084     }
1085 
1086     /* rc contains the id */
1087     crcs->callback(egc, crcs, rc);
1088 
1089     return;
1090 
1091 out:
1092     libxl__xc_domain_saverestore_async_callback_done(egc, &dcs->srs.shs, ok);
1093 }
1094