1 /*
2  * Copyright (C) 2009      Citrix Ltd.
3  * Author Vincent Hanquez <vincent.hanquez@eu.citrix.com>
4  * Author Stefano Stabellini <stefano.stabellini@eu.citrix.com>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published
8  * by the Free Software Foundation; version 2.1 only. with the special
9  * exception on linking described in file LICENSE.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU Lesser General Public License for more details.
15  */
16 
17 #include "libxl_osdeps.h" /* must come before any other headers */
18 
19 #include "libxl_internal.h"
20 
21 #define PCI_BDF                "%04x:%02x:%02x.%01x"
22 #define PCI_BDF_SHORT          "%02x:%02x.%01x"
23 #define PCI_BDF_VDEVFN         "%04x:%02x:%02x.%01x@%02x"
24 #define PCI_OPTIONS            "msitranslate=%d,power_mgmt=%d"
25 #define PCI_BDF_XSPATH         "%04x-%02x-%02x-%01x"
26 #define PCI_PT_QDEV_ID         "pci-pt-%02x_%02x.%01x"
27 
pcidev_encode_bdf(libxl_device_pci * pcidev)28 static unsigned int pcidev_encode_bdf(libxl_device_pci *pcidev)
29 {
30     unsigned int value;
31 
32     value = pcidev->domain << 16;
33     value |= (pcidev->bus & 0xff) << 8;
34     value |= (pcidev->dev & 0x1f) << 3;
35     value |= (pcidev->func & 0x7);
36 
37     return value;
38 }
39 
pcidev_struct_fill(libxl_device_pci * pcidev,unsigned int domain,unsigned int bus,unsigned int dev,unsigned int func,unsigned int vdevfn)40 static void pcidev_struct_fill(libxl_device_pci *pcidev, unsigned int domain,
41                                unsigned int bus, unsigned int dev,
42                                unsigned int func, unsigned int vdevfn)
43 {
44     pcidev->domain = domain;
45     pcidev->bus = bus;
46     pcidev->dev = dev;
47     pcidev->func = func;
48     pcidev->vdevfn = vdevfn;
49 }
50 
libxl_create_pci_backend_device(libxl__gc * gc,flexarray_t * back,int num,const libxl_device_pci * pcidev)51 static void libxl_create_pci_backend_device(libxl__gc *gc,
52                                             flexarray_t *back,
53                                             int num,
54                                             const libxl_device_pci *pcidev)
55 {
56     flexarray_append(back, GCSPRINTF("key-%d", num));
57     flexarray_append(back, GCSPRINTF(PCI_BDF, pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func));
58     flexarray_append(back, GCSPRINTF("dev-%d", num));
59     flexarray_append(back, GCSPRINTF(PCI_BDF, pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func));
60     if (pcidev->vdevfn)
61         flexarray_append_pair(back, GCSPRINTF("vdevfn-%d", num), GCSPRINTF("%x", pcidev->vdevfn));
62     flexarray_append(back, GCSPRINTF("opts-%d", num));
63     flexarray_append(back,
64               GCSPRINTF("msitranslate=%d,power_mgmt=%d,permissive=%d",
65                              pcidev->msitranslate, pcidev->power_mgmt,
66                              pcidev->permissive));
67     flexarray_append_pair(back, GCSPRINTF("state-%d", num), GCSPRINTF("%d", XenbusStateInitialising));
68 }
69 
libxl__device_from_pcidev(libxl__gc * gc,uint32_t domid,const libxl_device_pci * pcidev,libxl__device * device)70 static void libxl__device_from_pcidev(libxl__gc *gc, uint32_t domid,
71                                       const libxl_device_pci *pcidev,
72                                       libxl__device *device)
73 {
74     device->backend_devid = 0;
75     device->backend_domid = 0;
76     device->backend_kind = LIBXL__DEVICE_KIND_PCI;
77     device->devid = 0;
78     device->domid = domid;
79     device->kind = LIBXL__DEVICE_KIND_PCI;
80 }
81 
libxl__create_pci_backend(libxl__gc * gc,uint32_t domid,const libxl_device_pci * pcidev,int num)82 static int libxl__create_pci_backend(libxl__gc *gc, uint32_t domid,
83                                      const libxl_device_pci *pcidev,
84                                      int num)
85 {
86     flexarray_t *front = NULL;
87     flexarray_t *back = NULL;
88     libxl__device device;
89     int i;
90 
91     front = flexarray_make(gc, 16, 1);
92     back = flexarray_make(gc, 16, 1);
93 
94     LOGD(DEBUG, domid, "Creating pci backend");
95 
96     /* add pci device */
97     libxl__device_from_pcidev(gc, domid, pcidev, &device);
98 
99     flexarray_append_pair(back, "frontend-id", GCSPRINTF("%d", domid));
100     flexarray_append_pair(back, "online", "1");
101     flexarray_append_pair(back, "state", GCSPRINTF("%d", XenbusStateInitialising));
102     flexarray_append_pair(back, "domain", libxl__domid_to_name(gc, domid));
103 
104     for (i = 0; i < num; i++, pcidev++)
105         libxl_create_pci_backend_device(gc, back, i, pcidev);
106 
107     flexarray_append_pair(back, "num_devs", GCSPRINTF("%d", num));
108     flexarray_append_pair(front, "backend-id", GCSPRINTF("%d", 0));
109     flexarray_append_pair(front, "state", GCSPRINTF("%d", XenbusStateInitialising));
110 
111     return libxl__device_generic_add(gc, XBT_NULL, &device,
112                                      libxl__xs_kvs_of_flexarray(gc, back),
113                                      libxl__xs_kvs_of_flexarray(gc, front),
114                                      NULL);
115 }
116 
libxl__device_pci_add_xenstore(libxl__gc * gc,uint32_t domid,const libxl_device_pci * pcidev,bool starting)117 static int libxl__device_pci_add_xenstore(libxl__gc *gc,
118                                           uint32_t domid,
119                                           const libxl_device_pci *pcidev,
120                                           bool starting)
121 {
122     flexarray_t *back;
123     char *num_devs, *be_path;
124     int num = 0;
125     xs_transaction_t t = XBT_NULL;
126     int rc;
127     libxl_domain_config d_config;
128     libxl__flock *lock = NULL;
129     bool is_stubdomain = libxl_is_stubdom(CTX, domid, NULL);
130 
131     /* Stubdomain doesn't have own config. */
132     if (!is_stubdomain)
133         libxl_domain_config_init(&d_config);
134 
135     be_path = libxl__domain_device_backend_path(gc, 0, domid, 0,
136                                                 LIBXL__DEVICE_KIND_PCI);
137     num_devs = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/num_devs", be_path));
138     if (!num_devs)
139         return libxl__create_pci_backend(gc, domid, pcidev, 1);
140 
141     libxl_domain_type domtype = libxl__domain_type(gc, domid);
142     if (domtype == LIBXL_DOMAIN_TYPE_INVALID)
143         return ERROR_FAIL;
144 
145     if (!starting && domtype == LIBXL_DOMAIN_TYPE_PV) {
146         if (libxl__wait_for_backend(gc, be_path, GCSPRINTF("%d", XenbusStateConnected)) < 0)
147             return ERROR_FAIL;
148     }
149 
150     back = flexarray_make(gc, 16, 1);
151 
152     LOGD(DEBUG, domid, "Adding new pci device to xenstore");
153     num = atoi(num_devs);
154     libxl_create_pci_backend_device(gc, back, num, pcidev);
155     flexarray_append_pair(back, "num_devs", GCSPRINTF("%d", num + 1));
156     if (!starting)
157         flexarray_append_pair(back, "state", GCSPRINTF("%d", XenbusStateReconfiguring));
158 
159     /*
160      * Stubdomin config is derived from its target domain, it doesn't have
161      * its own file.
162      */
163     if (!is_stubdomain) {
164         lock = libxl__lock_domain_userdata(gc, domid);
165         if (!lock) {
166             rc = ERROR_LOCK_FAIL;
167             goto out;
168         }
169 
170         rc = libxl__get_domain_configuration(gc, domid, &d_config);
171         if (rc) goto out;
172 
173         device_add_domain_config(gc, &d_config, &libxl__pcidev_devtype,
174                                  pcidev);
175 
176         rc = libxl__dm_check_start(gc, &d_config, domid);
177         if (rc) goto out;
178     }
179 
180     for (;;) {
181         rc = libxl__xs_transaction_start(gc, &t);
182         if (rc) goto out;
183 
184         if (lock) {
185             rc = libxl__set_domain_configuration(gc, domid, &d_config);
186             if (rc) goto out;
187         }
188 
189         libxl__xs_writev(gc, t, be_path, libxl__xs_kvs_of_flexarray(gc, back));
190 
191         rc = libxl__xs_transaction_commit(gc, &t);
192         if (!rc) break;
193         if (rc < 0) goto out;
194     }
195 
196 out:
197     libxl__xs_transaction_abort(gc, &t);
198     if (lock) libxl__unlock_file(lock);
199     if (!is_stubdomain)
200         libxl_domain_config_dispose(&d_config);
201     return rc;
202 }
203 
libxl__device_pci_remove_xenstore(libxl__gc * gc,uint32_t domid,libxl_device_pci * pcidev)204 static int libxl__device_pci_remove_xenstore(libxl__gc *gc, uint32_t domid, libxl_device_pci *pcidev)
205 {
206     libxl_ctx *ctx = libxl__gc_owner(gc);
207     char *be_path, *num_devs_path, *num_devs, *xsdev, *tmp, *tmppath;
208     int num, i, j;
209     xs_transaction_t t;
210 
211     be_path = libxl__domain_device_backend_path(gc, 0, domid, 0,
212                                                 LIBXL__DEVICE_KIND_PCI);
213     num_devs_path = GCSPRINTF("%s/num_devs", be_path);
214     num_devs = libxl__xs_read(gc, XBT_NULL, num_devs_path);
215     if (!num_devs)
216         return ERROR_INVAL;
217     num = atoi(num_devs);
218 
219     libxl_domain_type domtype = libxl__domain_type(gc, domid);
220     if (domtype == LIBXL_DOMAIN_TYPE_INVALID)
221         return ERROR_FAIL;
222 
223     if (domtype == LIBXL_DOMAIN_TYPE_PV) {
224         if (libxl__wait_for_backend(gc, be_path, GCSPRINTF("%d", XenbusStateConnected)) < 0) {
225             LOGD(DEBUG, domid, "pci backend at %s is not ready", be_path);
226             return ERROR_FAIL;
227         }
228     }
229 
230     for (i = 0; i < num; i++) {
231         unsigned int domain = 0, bus = 0, dev = 0, func = 0;
232         xsdev = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/dev-%d", be_path, i));
233         sscanf(xsdev, PCI_BDF, &domain, &bus, &dev, &func);
234         if (domain == pcidev->domain && bus == pcidev->bus &&
235             pcidev->dev == dev && pcidev->func == func) {
236             break;
237         }
238     }
239     if (i == num) {
240         LOGD(ERROR, domid, "Couldn't find the device on xenstore");
241         return ERROR_INVAL;
242     }
243 
244 retry_transaction:
245     t = xs_transaction_start(ctx->xsh);
246     xs_write(ctx->xsh, t, GCSPRINTF("%s/state-%d", be_path, i), GCSPRINTF("%d", XenbusStateClosing), 1);
247     xs_write(ctx->xsh, t, GCSPRINTF("%s/state", be_path), GCSPRINTF("%d", XenbusStateReconfiguring), 1);
248     if (!xs_transaction_end(ctx->xsh, t, 0))
249         if (errno == EAGAIN)
250             goto retry_transaction;
251 
252     if (domtype == LIBXL_DOMAIN_TYPE_PV) {
253         if (libxl__wait_for_backend(gc, be_path, GCSPRINTF("%d", XenbusStateConnected)) < 0) {
254             LOGD(DEBUG, domid, "pci backend at %s is not ready", be_path);
255             return ERROR_FAIL;
256         }
257     }
258 
259 retry_transaction2:
260     t = xs_transaction_start(ctx->xsh);
261     xs_rm(ctx->xsh, t, GCSPRINTF("%s/state-%d", be_path, i));
262     xs_rm(ctx->xsh, t, GCSPRINTF("%s/key-%d", be_path, i));
263     xs_rm(ctx->xsh, t, GCSPRINTF("%s/dev-%d", be_path, i));
264     xs_rm(ctx->xsh, t, GCSPRINTF("%s/vdev-%d", be_path, i));
265     xs_rm(ctx->xsh, t, GCSPRINTF("%s/opts-%d", be_path, i));
266     xs_rm(ctx->xsh, t, GCSPRINTF("%s/vdevfn-%d", be_path, i));
267     libxl__xs_printf(gc, t, num_devs_path, "%d", num - 1);
268     for (j = i + 1; j < num; j++) {
269         tmppath = GCSPRINTF("%s/state-%d", be_path, j);
270         tmp = libxl__xs_read(gc, t, tmppath);
271         xs_write(ctx->xsh, t, GCSPRINTF("%s/state-%d", be_path, j - 1), tmp, strlen(tmp));
272         xs_rm(ctx->xsh, t, tmppath);
273         tmppath = GCSPRINTF("%s/dev-%d", be_path, j);
274         tmp = libxl__xs_read(gc, t, tmppath);
275         xs_write(ctx->xsh, t, GCSPRINTF("%s/dev-%d", be_path, j - 1), tmp, strlen(tmp));
276         xs_rm(ctx->xsh, t, tmppath);
277         tmppath = GCSPRINTF("%s/key-%d", be_path, j);
278         tmp = libxl__xs_read(gc, t, tmppath);
279         xs_write(ctx->xsh, t, GCSPRINTF("%s/key-%d", be_path, j - 1), tmp, strlen(tmp));
280         xs_rm(ctx->xsh, t, tmppath);
281         tmppath = GCSPRINTF("%s/vdev-%d", be_path, j);
282         tmp = libxl__xs_read(gc, t, tmppath);
283         if (tmp) {
284             xs_write(ctx->xsh, t, GCSPRINTF("%s/vdev-%d", be_path, j - 1), tmp, strlen(tmp));
285             xs_rm(ctx->xsh, t, tmppath);
286         }
287         tmppath = GCSPRINTF("%s/opts-%d", be_path, j);
288         tmp = libxl__xs_read(gc, t, tmppath);
289         if (tmp) {
290             xs_write(ctx->xsh, t, GCSPRINTF("%s/opts-%d", be_path, j - 1), tmp, strlen(tmp));
291             xs_rm(ctx->xsh, t, tmppath);
292         }
293         tmppath = GCSPRINTF("%s/vdevfn-%d", be_path, j);
294         tmp = libxl__xs_read(gc, t, tmppath);
295         if (tmp) {
296             xs_write(ctx->xsh, t, GCSPRINTF("%s/vdevfn-%d", be_path, j - 1), tmp, strlen(tmp));
297             xs_rm(ctx->xsh, t, tmppath);
298         }
299     }
300     if (!xs_transaction_end(ctx->xsh, t, 0))
301         if (errno == EAGAIN)
302             goto retry_transaction2;
303 
304     if (num == 1) {
305         libxl__device dev;
306         if (libxl__parse_backend_path(gc, be_path, &dev) != 0)
307             return ERROR_FAIL;
308 
309         dev.domid = domid;
310         dev.kind = LIBXL__DEVICE_KIND_PCI;
311         dev.devid = 0;
312 
313         libxl__device_destroy(gc, &dev);
314         return 0;
315     }
316 
317     return 0;
318 }
319 
get_all_assigned_devices(libxl__gc * gc,libxl_device_pci ** list,int * num)320 static int get_all_assigned_devices(libxl__gc *gc, libxl_device_pci **list, int *num)
321 {
322     char **domlist;
323     unsigned int nd = 0, i;
324 
325     *list = NULL;
326     *num = 0;
327 
328     domlist = libxl__xs_directory(gc, XBT_NULL, "/local/domain", &nd);
329     for(i = 0; i < nd; i++) {
330         char *path, *num_devs;
331 
332         path = GCSPRINTF("/local/domain/0/backend/%s/%s/0/num_devs",
333                          libxl__device_kind_to_string(LIBXL__DEVICE_KIND_PCI),
334                          domlist[i]);
335         num_devs = libxl__xs_read(gc, XBT_NULL, path);
336         if ( num_devs ) {
337             int ndev = atoi(num_devs), j;
338             char *devpath, *bdf;
339 
340             for(j = 0; j < ndev; j++) {
341                 devpath = GCSPRINTF("/local/domain/0/backend/%s/%s/0/dev-%u",
342                                     libxl__device_kind_to_string(LIBXL__DEVICE_KIND_PCI),
343                                     domlist[i], j);
344                 bdf = libxl__xs_read(gc, XBT_NULL, devpath);
345                 if ( bdf ) {
346                     unsigned dom, bus, dev, func;
347                     if ( sscanf(bdf, PCI_BDF, &dom, &bus, &dev, &func) != 4 )
348                         continue;
349 
350                     *list = realloc(*list, sizeof(libxl_device_pci) * ((*num) + 1));
351                     if (*list == NULL)
352                         return ERROR_NOMEM;
353                     pcidev_struct_fill(*list + *num, dom, bus, dev, func, 0);
354                     (*num)++;
355                 }
356             }
357         }
358     }
359     libxl__ptr_add(gc, *list);
360 
361     return 0;
362 }
363 
is_pcidev_in_array(libxl_device_pci * assigned,int num_assigned,int dom,int bus,int dev,int func)364 static int is_pcidev_in_array(libxl_device_pci *assigned, int num_assigned,
365                        int dom, int bus, int dev, int func)
366 {
367     int i;
368 
369     for(i = 0; i < num_assigned; i++) {
370         if ( assigned[i].domain != dom )
371             continue;
372         if ( assigned[i].bus != bus )
373             continue;
374         if ( assigned[i].dev != dev )
375             continue;
376         if ( assigned[i].func != func )
377             continue;
378         return 1;
379     }
380 
381     return 0;
382 }
383 
384 /* Write the standard BDF into the sysfs path given by sysfs_path. */
sysfs_write_bdf(libxl__gc * gc,const char * sysfs_path,libxl_device_pci * pcidev)385 static int sysfs_write_bdf(libxl__gc *gc, const char * sysfs_path,
386                            libxl_device_pci *pcidev)
387 {
388     int rc, fd;
389     char *buf;
390 
391     fd = open(sysfs_path, O_WRONLY);
392     if (fd < 0) {
393         LOGE(ERROR, "Couldn't open %s", sysfs_path);
394         return ERROR_FAIL;
395     }
396 
397     buf = GCSPRINTF(PCI_BDF, pcidev->domain, pcidev->bus,
398                     pcidev->dev, pcidev->func);
399     rc = write(fd, buf, strlen(buf));
400     /* Annoying to have two if's, but we need the errno */
401     if (rc < 0)
402         LOGE(ERROR, "write to %s returned %d", sysfs_path, rc);
403     close(fd);
404 
405     if (rc < 0)
406         return ERROR_FAIL;
407 
408     return 0;
409 }
410 
libxl_device_pci_assignable_list(libxl_ctx * ctx,int * num)411 libxl_device_pci *libxl_device_pci_assignable_list(libxl_ctx *ctx, int *num)
412 {
413     GC_INIT(ctx);
414     libxl_device_pci *pcidevs = NULL, *new, *assigned;
415     struct dirent *de;
416     DIR *dir;
417     int r, num_assigned;
418 
419     *num = 0;
420 
421     r = get_all_assigned_devices(gc, &assigned, &num_assigned);
422     if (r) goto out;
423 
424     dir = opendir(SYSFS_PCIBACK_DRIVER);
425     if (NULL == dir) {
426         if (errno == ENOENT) {
427             LOG(ERROR, "Looks like pciback driver not loaded");
428         } else {
429             LOGE(ERROR, "Couldn't open %s", SYSFS_PCIBACK_DRIVER);
430         }
431         goto out;
432     }
433 
434     while((de = readdir(dir))) {
435         unsigned dom, bus, dev, func;
436         if (sscanf(de->d_name, PCI_BDF, &dom, &bus, &dev, &func) != 4)
437             continue;
438 
439         if (is_pcidev_in_array(assigned, num_assigned, dom, bus, dev, func))
440             continue;
441 
442         new = realloc(pcidevs, ((*num) + 1) * sizeof(*new));
443         if (NULL == new)
444             continue;
445 
446         pcidevs = new;
447         new = pcidevs + *num;
448 
449         memset(new, 0, sizeof(*new));
450         pcidev_struct_fill(new, dom, bus, dev, func, 0);
451         (*num)++;
452     }
453 
454     closedir(dir);
455 out:
456     GC_FREE;
457     return pcidevs;
458 }
459 
460 /* Unbind device from its current driver, if any.  If driver_path is non-NULL,
461  * store the path to the original driver in it. */
sysfs_dev_unbind(libxl__gc * gc,libxl_device_pci * pcidev,char ** driver_path)462 static int sysfs_dev_unbind(libxl__gc *gc, libxl_device_pci *pcidev,
463                             char **driver_path)
464 {
465     char * spath, *dp = NULL;
466     struct stat st;
467 
468     spath = GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF"/driver",
469                            pcidev->domain,
470                            pcidev->bus,
471                            pcidev->dev,
472                            pcidev->func);
473     if ( !lstat(spath, &st) ) {
474         /* Find the canonical path to the driver. */
475         dp = libxl__zalloc(gc, PATH_MAX);
476         dp = realpath(spath, dp);
477         if ( !dp ) {
478             LOGE(ERROR, "realpath() failed");
479             return -1;
480         }
481 
482         LOG(DEBUG, "Driver re-plug path: %s", dp);
483 
484         /* Unbind from the old driver */
485         spath = GCSPRINTF("%s/unbind", dp);
486         if ( sysfs_write_bdf(gc, spath, pcidev) < 0 ) {
487             LOGE(ERROR, "Couldn't unbind device");
488             return -1;
489         }
490     }
491 
492     if ( driver_path )
493         *driver_path = dp;
494 
495     return 0;
496 }
497 
sysfs_dev_get_vendor(libxl__gc * gc,libxl_device_pci * pcidev)498 static uint16_t sysfs_dev_get_vendor(libxl__gc *gc, libxl_device_pci *pcidev)
499 {
500     char *pci_device_vendor_path =
501             GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF"/vendor",
502                       pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func);
503     uint16_t read_items;
504     uint16_t pci_device_vendor;
505 
506     FILE *f = fopen(pci_device_vendor_path, "r");
507     if (!f) {
508         LOGE(ERROR,
509              "pci device "PCI_BDF" does not have vendor attribute",
510              pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func);
511         return 0xffff;
512     }
513     read_items = fscanf(f, "0x%hx\n", &pci_device_vendor);
514     fclose(f);
515     if (read_items != 1) {
516         LOGE(ERROR,
517              "cannot read vendor of pci device "PCI_BDF,
518              pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func);
519         return 0xffff;
520     }
521 
522     return pci_device_vendor;
523 }
524 
sysfs_dev_get_device(libxl__gc * gc,libxl_device_pci * pcidev)525 static uint16_t sysfs_dev_get_device(libxl__gc *gc, libxl_device_pci *pcidev)
526 {
527     char *pci_device_device_path =
528             GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF"/device",
529                       pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func);
530     uint16_t read_items;
531     uint16_t pci_device_device;
532 
533     FILE *f = fopen(pci_device_device_path, "r");
534     if (!f) {
535         LOGE(ERROR,
536              "pci device "PCI_BDF" does not have device attribute",
537              pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func);
538         return 0xffff;
539     }
540     read_items = fscanf(f, "0x%hx\n", &pci_device_device);
541     fclose(f);
542     if (read_items != 1) {
543         LOGE(ERROR,
544              "cannot read device of pci device "PCI_BDF,
545              pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func);
546         return 0xffff;
547     }
548 
549     return pci_device_device;
550 }
551 
sysfs_dev_get_class(libxl__gc * gc,libxl_device_pci * pcidev,unsigned long * class)552 static int sysfs_dev_get_class(libxl__gc *gc, libxl_device_pci *pcidev,
553                                unsigned long *class)
554 {
555     char *pci_device_class_path = GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF"/class",
556                      pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func);
557     int read_items, ret = 0;
558 
559     FILE *f = fopen(pci_device_class_path, "r");
560     if (!f) {
561         LOGE(ERROR,
562              "pci device "PCI_BDF" does not have class attribute",
563              pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func);
564         ret = ERROR_FAIL;
565         goto out;
566     }
567     read_items = fscanf(f, "0x%lx\n", class);
568     fclose(f);
569     if (read_items != 1) {
570         LOGE(ERROR,
571              "cannot read class of pci device "PCI_BDF,
572              pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func);
573         ret = ERROR_FAIL;
574     }
575 
576 out:
577     return ret;
578 }
579 
580 /*
581  * Some devices may need some ways to work well. Here like IGD,
582  * we have to pass a specific option to qemu.
583  */
libxl__is_igd_vga_passthru(libxl__gc * gc,const libxl_domain_config * d_config)584 bool libxl__is_igd_vga_passthru(libxl__gc *gc,
585                                 const libxl_domain_config *d_config)
586 {
587     unsigned int i;
588     uint16_t pt_vendor, pt_device;
589     unsigned long class;
590 
591     for (i = 0 ; i < d_config->num_pcidevs ; i++) {
592         libxl_device_pci *pcidev = &d_config->pcidevs[i];
593         pt_vendor = sysfs_dev_get_vendor(gc, pcidev);
594         pt_device = sysfs_dev_get_device(gc, pcidev);
595 
596         if (pt_vendor == 0xffff || pt_device == 0xffff ||
597             pt_vendor != 0x8086)
598             continue;
599 
600         if (sysfs_dev_get_class(gc, pcidev, &class))
601             continue;
602         if (class == 0x030000)
603             return true;
604     }
605 
606     return false;
607 }
608 
609 /*
610  * A brief comment about slots.  I don't know what slots are for; however,
611  * I have by experimentation determined:
612  * - Before a device can be bound to pciback, its BDF must first be listed
613  *   in pciback/slots
614  * - The way to get the BDF listed there is to write BDF to
615  *   pciback/new_slot
616  * - Writing the same BDF to pciback/new_slot is not idempotent; it results
617  *   in two entries of the BDF in pciback/slots
618  * It's not clear whether having two entries in pciback/slots is a problem
619  * or not.  Just to be safe, this code does the conservative thing, and
620  * first checks to see if there is a slot, adding one only if one does not
621  * already exist.
622  */
623 
624 /* Scan through /sys/.../pciback/slots looking for pcidev's BDF */
pciback_dev_has_slot(libxl__gc * gc,libxl_device_pci * pcidev)625 static int pciback_dev_has_slot(libxl__gc *gc, libxl_device_pci *pcidev)
626 {
627     FILE *f;
628     int rc = 0;
629     unsigned dom, bus, dev, func;
630 
631     f = fopen(SYSFS_PCIBACK_DRIVER"/slots", "r");
632 
633     if (f == NULL) {
634         LOGE(ERROR, "Couldn't open %s", SYSFS_PCIBACK_DRIVER"/slots");
635         return ERROR_FAIL;
636     }
637 
638     while(fscanf(f, "%x:%x:%x.%d\n", &dom, &bus, &dev, &func)==4) {
639         if(dom == pcidev->domain
640            && bus == pcidev->bus
641            && dev == pcidev->dev
642            && func == pcidev->func) {
643             rc = 1;
644             goto out;
645         }
646     }
647 out:
648     fclose(f);
649     return rc;
650 }
651 
pciback_dev_is_assigned(libxl__gc * gc,libxl_device_pci * pcidev)652 static int pciback_dev_is_assigned(libxl__gc *gc, libxl_device_pci *pcidev)
653 {
654     char * spath;
655     int rc;
656     struct stat st;
657 
658     if ( access(SYSFS_PCIBACK_DRIVER, F_OK) < 0 ) {
659         if ( errno == ENOENT ) {
660             LOG(ERROR, "Looks like pciback driver is not loaded");
661         } else {
662             LOGE(ERROR, "Can't access "SYSFS_PCIBACK_DRIVER);
663         }
664         return -1;
665     }
666 
667     spath = GCSPRINTF(SYSFS_PCIBACK_DRIVER"/"PCI_BDF,
668                       pcidev->domain, pcidev->bus,
669                       pcidev->dev, pcidev->func);
670     rc = lstat(spath, &st);
671 
672     if( rc == 0 )
673         return 1;
674     if ( rc < 0 && errno == ENOENT )
675         return 0;
676     LOGE(ERROR, "Accessing %s", spath);
677     return -1;
678 }
679 
pciback_dev_assign(libxl__gc * gc,libxl_device_pci * pcidev)680 static int pciback_dev_assign(libxl__gc *gc, libxl_device_pci *pcidev)
681 {
682     int rc;
683 
684     if ( (rc=pciback_dev_has_slot(gc, pcidev)) < 0 ) {
685         LOGE(ERROR, "Error checking for pciback slot");
686         return ERROR_FAIL;
687     } else if (rc == 0) {
688         if ( sysfs_write_bdf(gc, SYSFS_PCIBACK_DRIVER"/new_slot",
689                              pcidev) < 0 ) {
690             LOGE(ERROR, "Couldn't bind device to pciback!");
691             return ERROR_FAIL;
692         }
693     }
694 
695     if ( sysfs_write_bdf(gc, SYSFS_PCIBACK_DRIVER"/bind", pcidev) < 0 ) {
696         LOGE(ERROR, "Couldn't bind device to pciback!");
697         return ERROR_FAIL;
698     }
699     return 0;
700 }
701 
pciback_dev_unassign(libxl__gc * gc,libxl_device_pci * pcidev)702 static int pciback_dev_unassign(libxl__gc *gc, libxl_device_pci *pcidev)
703 {
704     /* Remove from pciback */
705     if ( sysfs_dev_unbind(gc, pcidev, NULL) < 0 ) {
706         LOG(ERROR, "Couldn't unbind device!");
707         return ERROR_FAIL;
708     }
709 
710     /* Remove slot if necessary */
711     if ( pciback_dev_has_slot(gc, pcidev) > 0 ) {
712         if ( sysfs_write_bdf(gc, SYSFS_PCIBACK_DRIVER"/remove_slot",
713                              pcidev) < 0 ) {
714             LOGE(ERROR, "Couldn't remove pciback slot");
715             return ERROR_FAIL;
716         }
717     }
718     return 0;
719 }
720 
721 #define PCIBACK_INFO_PATH "/libxl/pciback"
722 
pci_assignable_driver_path_write(libxl__gc * gc,libxl_device_pci * pcidev,char * driver_path)723 static void pci_assignable_driver_path_write(libxl__gc *gc,
724                                             libxl_device_pci *pcidev,
725                                             char *driver_path)
726 {
727     char *path;
728 
729     path = GCSPRINTF(PCIBACK_INFO_PATH"/"PCI_BDF_XSPATH"/driver_path",
730                      pcidev->domain,
731                      pcidev->bus,
732                      pcidev->dev,
733                      pcidev->func);
734     if ( libxl__xs_printf(gc, XBT_NULL, path, "%s", driver_path) < 0 ) {
735         LOGE(WARN, "Write of %s to node %s failed.", driver_path, path);
736     }
737 }
738 
pci_assignable_driver_path_read(libxl__gc * gc,libxl_device_pci * pcidev)739 static char * pci_assignable_driver_path_read(libxl__gc *gc,
740                                               libxl_device_pci *pcidev)
741 {
742     return libxl__xs_read(gc, XBT_NULL,
743                           GCSPRINTF(
744                            PCIBACK_INFO_PATH "/" PCI_BDF_XSPATH "/driver_path",
745                            pcidev->domain,
746                            pcidev->bus,
747                            pcidev->dev,
748                            pcidev->func));
749 }
750 
pci_assignable_driver_path_remove(libxl__gc * gc,libxl_device_pci * pcidev)751 static void pci_assignable_driver_path_remove(libxl__gc *gc,
752                                               libxl_device_pci *pcidev)
753 {
754     libxl_ctx *ctx = libxl__gc_owner(gc);
755 
756     /* Remove the xenstore entry */
757     xs_rm(ctx->xsh, XBT_NULL,
758           GCSPRINTF(PCIBACK_INFO_PATH "/" PCI_BDF_XSPATH,
759                     pcidev->domain,
760                     pcidev->bus,
761                     pcidev->dev,
762                     pcidev->func) );
763 }
764 
libxl__device_pci_assignable_add(libxl__gc * gc,libxl_device_pci * pcidev,int rebind)765 static int libxl__device_pci_assignable_add(libxl__gc *gc,
766                                             libxl_device_pci *pcidev,
767                                             int rebind)
768 {
769     libxl_ctx *ctx = libxl__gc_owner(gc);
770     unsigned dom, bus, dev, func;
771     char *spath, *driver_path = NULL;
772     int rc;
773     struct stat st;
774 
775     /* Local copy for convenience */
776     dom = pcidev->domain;
777     bus = pcidev->bus;
778     dev = pcidev->dev;
779     func = pcidev->func;
780 
781     /* See if the device exists */
782     spath = GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF, dom, bus, dev, func);
783     if ( lstat(spath, &st) ) {
784         LOGE(ERROR, "Couldn't lstat %s", spath);
785         return ERROR_FAIL;
786     }
787 
788     /* Check to see if it's already assigned to pciback */
789     rc = pciback_dev_is_assigned(gc, pcidev);
790     if ( rc < 0 ) {
791         return ERROR_FAIL;
792     }
793     if ( rc ) {
794         LOG(WARN, PCI_BDF" already assigned to pciback", dom, bus, dev, func);
795         goto quarantine;
796     }
797 
798     /* Check to see if there's already a driver that we need to unbind from */
799     if ( sysfs_dev_unbind(gc, pcidev, &driver_path ) ) {
800         LOG(ERROR, "Couldn't unbind "PCI_BDF" from driver",
801             dom, bus, dev, func);
802         return ERROR_FAIL;
803     }
804 
805     /* Store driver_path for rebinding to dom0 */
806     if ( rebind ) {
807         if ( driver_path ) {
808             pci_assignable_driver_path_write(gc, pcidev, driver_path);
809         } else if ( (driver_path =
810                      pci_assignable_driver_path_read(gc, pcidev)) != NULL ) {
811             LOG(INFO, PCI_BDF" not bound to a driver, will be rebound to %s",
812                 dom, bus, dev, func, driver_path);
813         } else {
814             LOG(WARN, PCI_BDF" not bound to a driver, will not be rebound.",
815                 dom, bus, dev, func);
816         }
817     } else {
818         pci_assignable_driver_path_remove(gc, pcidev);
819     }
820 
821     if ( pciback_dev_assign(gc, pcidev) ) {
822         LOG(ERROR, "Couldn't bind device to pciback!");
823         return ERROR_FAIL;
824     }
825 
826 quarantine:
827     /*
828      * DOMID_IO is just a sentinel domain, without any actual mappings,
829      * so always pass XEN_DOMCTL_DEV_RDM_RELAXED to avoid assignment being
830      * unnecessarily denied.
831      */
832     rc = xc_assign_device(ctx->xch, DOMID_IO, pcidev_encode_bdf(pcidev),
833                           XEN_DOMCTL_DEV_RDM_RELAXED);
834     if ( rc < 0 ) {
835         LOG(ERROR, "failed to quarantine "PCI_BDF, dom, bus, dev, func);
836         return ERROR_FAIL;
837     }
838 
839     return 0;
840 }
841 
libxl__device_pci_assignable_remove(libxl__gc * gc,libxl_device_pci * pcidev,int rebind)842 static int libxl__device_pci_assignable_remove(libxl__gc *gc,
843                                                libxl_device_pci *pcidev,
844                                                int rebind)
845 {
846     libxl_ctx *ctx = libxl__gc_owner(gc);
847     int rc;
848     char *driver_path;
849 
850     /* De-quarantine */
851     rc = xc_deassign_device(ctx->xch, DOMID_IO, pcidev_encode_bdf(pcidev));
852     if ( rc < 0 ) {
853         LOG(ERROR, "failed to de-quarantine "PCI_BDF, pcidev->domain, pcidev->bus,
854             pcidev->dev, pcidev->func);
855         return ERROR_FAIL;
856     }
857 
858     /* Unbind from pciback */
859     if ( (rc=pciback_dev_is_assigned(gc, pcidev)) < 0 ) {
860         return ERROR_FAIL;
861     } else if ( rc ) {
862         pciback_dev_unassign(gc, pcidev);
863     } else {
864         LOG(WARN, "Not bound to pciback");
865     }
866 
867     /* Rebind if necessary */
868     driver_path = pci_assignable_driver_path_read(gc, pcidev);
869 
870     if ( driver_path ) {
871         if ( rebind ) {
872             LOG(INFO, "Rebinding to driver at %s", driver_path);
873 
874             if ( sysfs_write_bdf(gc,
875                                  GCSPRINTF("%s/bind", driver_path),
876                                  pcidev) < 0 ) {
877                 LOGE(ERROR, "Couldn't bind device to %s", driver_path);
878                 return -1;
879             }
880 
881             pci_assignable_driver_path_remove(gc, pcidev);
882         }
883     } else {
884         if ( rebind ) {
885             LOG(WARN,
886                 "Couldn't find path for original driver; not rebinding");
887         }
888     }
889 
890     return 0;
891 }
892 
libxl_device_pci_assignable_add(libxl_ctx * ctx,libxl_device_pci * pcidev,int rebind)893 int libxl_device_pci_assignable_add(libxl_ctx *ctx, libxl_device_pci *pcidev,
894                                     int rebind)
895 {
896     GC_INIT(ctx);
897     int rc;
898 
899     rc = libxl__device_pci_assignable_add(gc, pcidev, rebind);
900 
901     GC_FREE;
902     return rc;
903 }
904 
905 
libxl_device_pci_assignable_remove(libxl_ctx * ctx,libxl_device_pci * pcidev,int rebind)906 int libxl_device_pci_assignable_remove(libxl_ctx *ctx, libxl_device_pci *pcidev,
907                                        int rebind)
908 {
909     GC_INIT(ctx);
910     int rc;
911 
912     rc = libxl__device_pci_assignable_remove(gc, pcidev, rebind);
913 
914     GC_FREE;
915     return rc;
916 }
917 
918 /*
919  * This function checks that all functions of a device are bound to pciback
920  * driver. It also initialises a bit-mask of which function numbers are present
921  * on that device.
922 */
pci_multifunction_check(libxl__gc * gc,libxl_device_pci * pcidev,unsigned int * func_mask)923 static int pci_multifunction_check(libxl__gc *gc, libxl_device_pci *pcidev, unsigned int *func_mask)
924 {
925     struct dirent *de;
926     DIR *dir;
927 
928     *func_mask = 0;
929 
930     dir = opendir(SYSFS_PCI_DEV);
931     if ( NULL == dir ) {
932         LOGE(ERROR, "Couldn't open %s", SYSFS_PCI_DEV);
933         return -1;
934     }
935 
936     while( (de = readdir(dir)) ) {
937         unsigned dom, bus, dev, func;
938         struct stat st;
939         char *path;
940 
941         if ( sscanf(de->d_name, PCI_BDF, &dom, &bus, &dev, &func) != 4 )
942             continue;
943         if ( pcidev->domain != dom )
944             continue;
945         if ( pcidev->bus != bus )
946             continue;
947         if ( pcidev->dev != dev )
948             continue;
949 
950         path = GCSPRINTF("%s/" PCI_BDF, SYSFS_PCIBACK_DRIVER, dom, bus, dev, func);
951         if ( lstat(path, &st) ) {
952             if ( errno == ENOENT )
953                 LOG(ERROR, PCI_BDF " is not assigned to pciback driver",
954                     dom, bus, dev, func);
955             else
956                 LOGE(ERROR, "Couldn't lstat %s", path);
957             closedir(dir);
958             return -1;
959         }
960         (*func_mask) |= (1 << func);
961     }
962 
963     closedir(dir);
964     return 0;
965 }
966 
pci_ins_check(libxl__gc * gc,uint32_t domid,const char * state,void * priv)967 static int pci_ins_check(libxl__gc *gc, uint32_t domid, const char *state, void *priv)
968 {
969     char *orig_state = priv;
970 
971     if ( !strcmp(state, "pci-insert-failed") )
972         return -1;
973     if ( !strcmp(state, "pci-inserted") )
974         return 0;
975     if ( !strcmp(state, orig_state) )
976         return 1;
977 
978     return 1;
979 }
980 
qemu_pci_add_xenstore(libxl__gc * gc,uint32_t domid,libxl_device_pci * pcidev)981 static int qemu_pci_add_xenstore(libxl__gc *gc, uint32_t domid,
982                                  libxl_device_pci *pcidev)
983 {
984     libxl_ctx *ctx = libxl__gc_owner(gc);
985     int rc = 0;
986     char *path;
987     char *state, *vdevfn;
988     uint32_t dm_domid;
989 
990     dm_domid = libxl_get_stubdom_id(CTX, domid);
991     path = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "/state");
992     state = libxl__xs_read(gc, XBT_NULL, path);
993     path = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "/parameter");
994     if (pcidev->vdevfn) {
995         libxl__xs_printf(gc, XBT_NULL, path, PCI_BDF_VDEVFN","PCI_OPTIONS,
996                          pcidev->domain, pcidev->bus, pcidev->dev,
997                          pcidev->func, pcidev->vdevfn, pcidev->msitranslate,
998                          pcidev->power_mgmt);
999     } else {
1000         libxl__xs_printf(gc, XBT_NULL, path, PCI_BDF","PCI_OPTIONS,
1001                          pcidev->domain,  pcidev->bus, pcidev->dev,
1002                          pcidev->func, pcidev->msitranslate, pcidev->power_mgmt);
1003     }
1004 
1005     libxl__qemu_traditional_cmd(gc, domid, "pci-ins");
1006     rc = libxl__wait_for_device_model_deprecated(gc, domid, NULL, NULL,
1007                                       pci_ins_check, state);
1008     path = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "/parameter");
1009     vdevfn = libxl__xs_read(gc, XBT_NULL, path);
1010     path = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "/state");
1011     if ( rc < 0 )
1012         LOGD(ERROR, domid, "qemu refused to add device: %s", vdevfn);
1013     else if ( sscanf(vdevfn, "0x%x", &pcidev->vdevfn) != 1 ) {
1014         LOGD(ERROR, domid, "wrong format for the vdevfn: '%s'", vdevfn);
1015         rc = -1;
1016     }
1017     xs_write(ctx->xsh, XBT_NULL, path, state, strlen(state));
1018 
1019     return rc;
1020 }
1021 
check_qemu_running(libxl__gc * gc,libxl_domid domid,libxl__xswait_state * xswa,int rc,const char * state)1022 static int check_qemu_running(libxl__gc *gc,
1023                               libxl_domid domid,
1024                               libxl__xswait_state *xswa,
1025                               int rc,
1026                               const char *state)
1027 {
1028     if (rc) {
1029         if (rc == ERROR_TIMEDOUT) {
1030             LOGD(ERROR, domid, "%s not ready", xswa->what);
1031         }
1032         goto out;
1033     }
1034 
1035     if (!state || strcmp(state, "running"))
1036         return ERROR_NOT_READY;
1037 
1038 out:
1039     libxl__xswait_stop(gc, xswa);
1040     return rc;
1041 }
1042 
1043 typedef struct pci_add_state {
1044     /* filled by user of do_pci_add */
1045     libxl__ao_device *aodev;
1046     libxl_domid domid;
1047     bool starting;
1048     void (*callback)(libxl__egc *, struct pci_add_state *, int rc);
1049 
1050     /* private to device_pci_add_stubdom_wait */
1051     libxl__ev_devstate pciback_ds;
1052 
1053     /* private to do_pci_add */
1054     libxl__xswait_state xswait;
1055     libxl__ev_qmp qmp;
1056     libxl__ev_time timeout;
1057     libxl_device_pci *pcidev;
1058     int pci_domid;
1059 } pci_add_state;
1060 
1061 static void pci_add_qemu_trad_watch_state_cb(libxl__egc *egc,
1062     libxl__xswait_state *xswa, int rc, const char *state);
1063 static void pci_add_qmp_device_add(libxl__egc *, pci_add_state *);
1064 static void pci_add_qmp_device_add_cb(libxl__egc *,
1065     libxl__ev_qmp *, const libxl__json_object *, int rc);
1066 static void pci_add_qmp_query_pci_cb(libxl__egc *,
1067     libxl__ev_qmp *, const libxl__json_object *, int rc);
1068 static void pci_add_timeout(libxl__egc *egc, libxl__ev_time *ev,
1069     const struct timeval *requested_abs, int rc);
1070 static void pci_add_dm_done(libxl__egc *,
1071     pci_add_state *, int rc);
1072 
do_pci_add(libxl__egc * egc,libxl_domid domid,libxl_device_pci * pcidev,pci_add_state * pas)1073 static void do_pci_add(libxl__egc *egc,
1074                        libxl_domid domid,
1075                        libxl_device_pci *pcidev,
1076                        pci_add_state *pas)
1077 {
1078     STATE_AO_GC(pas->aodev->ao);
1079     libxl_domain_type type = libxl__domain_type(gc, domid);
1080     int rc;
1081 
1082     /* init pci_add_state */
1083     libxl__xswait_init(&pas->xswait);
1084     libxl__ev_qmp_init(&pas->qmp);
1085     pas->pcidev = pcidev;
1086     pas->pci_domid = domid;
1087     libxl__ev_time_init(&pas->timeout);
1088 
1089     if (type == LIBXL_DOMAIN_TYPE_INVALID) {
1090         rc = ERROR_FAIL;
1091         goto out;
1092     }
1093 
1094     if (type == LIBXL_DOMAIN_TYPE_HVM) {
1095         switch (libxl__device_model_version_running(gc, domid)) {
1096             case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL:
1097                 pas->xswait.ao = ao;
1098                 pas->xswait.what = "Device Model";
1099                 pas->xswait.path = DEVICE_MODEL_XS_PATH(gc,
1100                     libxl_get_stubdom_id(CTX, domid), domid, "/state");
1101                 pas->xswait.timeout_ms = LIBXL_DEVICE_MODEL_START_TIMEOUT * 1000;
1102                 pas->xswait.callback = pci_add_qemu_trad_watch_state_cb;
1103                 rc = libxl__xswait_start(gc, &pas->xswait);
1104                 if (rc) goto out;
1105                 return;
1106             case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN:
1107                 pci_add_qmp_device_add(egc, pas); /* must be last */
1108                 return;
1109             default:
1110                 rc = ERROR_INVAL;
1111                 break;
1112         }
1113     }
1114 
1115     rc = 0;
1116 
1117 out:
1118     pci_add_dm_done(egc, pas, rc); /* must be last */
1119 }
1120 
pci_add_qemu_trad_watch_state_cb(libxl__egc * egc,libxl__xswait_state * xswa,int rc,const char * state)1121 static void pci_add_qemu_trad_watch_state_cb(libxl__egc *egc,
1122                                              libxl__xswait_state *xswa,
1123                                              int rc,
1124                                              const char *state)
1125 {
1126     pci_add_state *pas = CONTAINER_OF(xswa, *pas, xswait);
1127     STATE_AO_GC(pas->aodev->ao);
1128 
1129     /* Convenience aliases */
1130     libxl_domid domid = pas->domid;
1131     libxl_device_pci *pcidev = pas->pcidev;
1132 
1133     rc = check_qemu_running(gc, domid, xswa, rc, state);
1134     if (rc == ERROR_NOT_READY)
1135         return;
1136     if (rc)
1137         goto out;
1138 
1139     rc = qemu_pci_add_xenstore(gc, domid, pcidev);
1140 out:
1141     pci_add_dm_done(egc, pas, rc); /* must be last */
1142 }
1143 
pci_add_qmp_device_add(libxl__egc * egc,pci_add_state * pas)1144 static void pci_add_qmp_device_add(libxl__egc *egc, pci_add_state *pas)
1145 {
1146     STATE_AO_GC(pas->aodev->ao);
1147     libxl__json_object *args = NULL;
1148     int rc;
1149 
1150     /* Convenience aliases */
1151     libxl_domid domid = pas->domid;
1152     libxl_device_pci *pcidev = pas->pcidev;
1153     libxl__ev_qmp *const qmp = &pas->qmp;
1154 
1155     rc = libxl__ev_time_register_rel(ao, &pas->timeout,
1156                                      pci_add_timeout,
1157                                      LIBXL_QMP_CMD_TIMEOUT * 1000);
1158     if (rc) goto out;
1159 
1160     libxl__qmp_param_add_string(gc, &args, "driver",
1161                                 "xen-pci-passthrough");
1162     QMP_PARAMETERS_SPRINTF(&args, "id", PCI_PT_QDEV_ID,
1163                            pcidev->bus, pcidev->dev, pcidev->func);
1164     QMP_PARAMETERS_SPRINTF(&args, "hostaddr",
1165                            "%04x:%02x:%02x.%01x", pcidev->domain,
1166                            pcidev->bus, pcidev->dev, pcidev->func);
1167     if (pcidev->vdevfn) {
1168         QMP_PARAMETERS_SPRINTF(&args, "addr", "%x.%x",
1169                                PCI_SLOT(pcidev->vdevfn),
1170                                PCI_FUNC(pcidev->vdevfn));
1171     }
1172     /*
1173      * Version of QEMU prior to the XSA-131 fix did not support
1174      * this property and were effectively always in permissive
1175      * mode. The fix for XSA-131 switched the default to be
1176      * restricted by default and added the permissive property.
1177      *
1178      * Therefore in order to support both old and new QEMU we only
1179      * set the permissive flag if it is true. Users of older QEMU
1180      * have no reason to set the flag so this is ok.
1181      */
1182     if (pcidev->permissive)
1183         libxl__qmp_param_add_bool(gc, &args, "permissive", true);
1184 
1185     qmp->ao = pas->aodev->ao;
1186     qmp->domid = domid;
1187     qmp->payload_fd = -1;
1188     qmp->callback = pci_add_qmp_device_add_cb;
1189     rc = libxl__ev_qmp_send(egc, qmp, "device_add", args);
1190     if (rc) goto out;
1191     return;
1192 
1193 out:
1194     pci_add_dm_done(egc, pas, rc); /* must be last */
1195 }
1196 
pci_add_qmp_device_add_cb(libxl__egc * egc,libxl__ev_qmp * qmp,const libxl__json_object * response,int rc)1197 static void pci_add_qmp_device_add_cb(libxl__egc *egc,
1198                                       libxl__ev_qmp *qmp,
1199                                       const libxl__json_object *response,
1200                                       int rc)
1201 {
1202     EGC_GC;
1203     pci_add_state *pas = CONTAINER_OF(qmp, *pas, qmp);
1204 
1205     if (rc) goto out;
1206 
1207     qmp->callback = pci_add_qmp_query_pci_cb;
1208     rc = libxl__ev_qmp_send(egc, qmp, "query-pci", NULL);
1209     if (rc) goto out;
1210     return;
1211 
1212 out:
1213     pci_add_dm_done(egc, pas, rc); /* must be last */
1214 }
1215 
pci_add_qmp_query_pci_cb(libxl__egc * egc,libxl__ev_qmp * qmp,const libxl__json_object * response,int rc)1216 static void pci_add_qmp_query_pci_cb(libxl__egc *egc,
1217                                      libxl__ev_qmp *qmp,
1218                                      const libxl__json_object *response,
1219                                      int rc)
1220 {
1221     EGC_GC;
1222     pci_add_state *pas = CONTAINER_OF(qmp, *pas, qmp);
1223     const libxl__json_object *bus = NULL;
1224     char *asked_id;
1225     int i, j;
1226     const libxl__json_object *devices = NULL;
1227     const libxl__json_object *device = NULL;
1228     const libxl__json_object *o = NULL;
1229     const char *id = NULL;
1230     int dev_slot, dev_func;
1231 
1232     /* Convenience aliases */
1233     libxl_device_pci *pcidev = pas->pcidev;
1234 
1235     if (rc) goto out;
1236 
1237     /* `query-pci' returns:
1238      * [
1239      *   {'bus': 'int',
1240      *    'devices': [
1241      *       {'bus': 'int', 'slot': 'int', 'function': 'int',
1242      *        'class_info': 'PciDeviceClass', 'id': 'PciDeviceId',
1243      *        '*irq': 'int', 'qdev_id': 'str',
1244      *        '*pci_bridge': 'PciBridgeInfo',
1245      *        'regions': ['PciMemoryRegion']
1246      *       }
1247      *    ]
1248      *   }
1249      * ]
1250      * (See qemu.git/qapi/ for the struct that aren't detailed here)
1251      */
1252 
1253     asked_id = GCSPRINTF(PCI_PT_QDEV_ID,
1254                          pcidev->bus, pcidev->dev, pcidev->func);
1255 
1256     for (i = 0; (bus = libxl__json_array_get(response, i)); i++) {
1257         devices = libxl__json_map_get("devices", bus, JSON_ARRAY);
1258         if (!devices) {
1259             rc = ERROR_QEMU_API;
1260             goto out;
1261         }
1262 
1263         for (j = 0; (device = libxl__json_array_get(devices, j)); j++) {
1264              o = libxl__json_map_get("qdev_id", device, JSON_STRING);
1265              if (!o) {
1266                  rc = ERROR_QEMU_API;
1267                  goto out;
1268              }
1269              id = libxl__json_object_get_string(o);
1270              if (!id || strcmp(asked_id, id))
1271                  continue;
1272 
1273              o = libxl__json_map_get("slot", device, JSON_INTEGER);
1274              if (!o) {
1275                  rc = ERROR_QEMU_API;
1276                  goto out;
1277              }
1278              dev_slot = libxl__json_object_get_integer(o);
1279              o = libxl__json_map_get("function", device, JSON_INTEGER);
1280              if (!o) {
1281                  rc = ERROR_QEMU_API;
1282                  goto out;
1283              }
1284              dev_func = libxl__json_object_get_integer(o);
1285 
1286              pcidev->vdevfn = PCI_DEVFN(dev_slot, dev_func);
1287 
1288              rc = 0;
1289              goto out;
1290         }
1291     }
1292 
1293     rc = ERROR_FAIL;
1294     LOGD(ERROR, qmp->domid,
1295          "PCI device id '%s' wasn't found in QEMU's 'query-pci' response.",
1296          asked_id);
1297 
1298 out:
1299     if (rc == ERROR_QEMU_API) {
1300         LOGD(ERROR, qmp->domid,
1301              "Unexpected response to QMP cmd 'query-pci', received:\n%s",
1302              JSON(response));
1303     }
1304     pci_add_dm_done(egc, pas, rc); /* must be last */
1305 }
1306 
pci_add_timeout(libxl__egc * egc,libxl__ev_time * ev,const struct timeval * requested_abs,int rc)1307 static void pci_add_timeout(libxl__egc *egc, libxl__ev_time *ev,
1308                             const struct timeval *requested_abs,
1309                             int rc)
1310 {
1311     pci_add_state *pas = CONTAINER_OF(ev, *pas, timeout);
1312 
1313     pci_add_dm_done(egc, pas, rc);
1314 }
1315 
pci_add_dm_done(libxl__egc * egc,pci_add_state * pas,int rc)1316 static void pci_add_dm_done(libxl__egc *egc,
1317                             pci_add_state *pas,
1318                             int rc)
1319 {
1320     STATE_AO_GC(pas->aodev->ao);
1321     libxl_ctx *ctx = libxl__gc_owner(gc);
1322     libxl_domid domid = pas->pci_domid;
1323     char *sysfs_path;
1324     FILE *f;
1325     unsigned long long start, end, flags, size;
1326     int irq, i;
1327     int r;
1328     uint32_t flag = XEN_DOMCTL_DEV_RDM_RELAXED;
1329     uint32_t domainid = domid;
1330     bool isstubdom = libxl_is_stubdom(ctx, domid, &domainid);
1331 
1332     /* Convenience aliases */
1333     bool starting = pas->starting;
1334     libxl_device_pci *pcidev = pas->pcidev;
1335     bool hvm = libxl__domain_type(gc, domid) == LIBXL_DOMAIN_TYPE_HVM;
1336 
1337     libxl__ev_qmp_dispose(gc, &pas->qmp);
1338 
1339     if (rc) goto out;
1340 
1341     /* stubdomain is always running by now, even at create time */
1342     if (isstubdom)
1343         starting = false;
1344 
1345     sysfs_path = GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF"/resource", pcidev->domain,
1346                            pcidev->bus, pcidev->dev, pcidev->func);
1347     f = fopen(sysfs_path, "r");
1348     start = end = flags = size = 0;
1349     irq = 0;
1350 
1351     if (f == NULL) {
1352         LOGED(ERROR, domainid, "Couldn't open %s", sysfs_path);
1353         rc = ERROR_FAIL;
1354         goto out;
1355     }
1356     for (i = 0; i < PROC_PCI_NUM_RESOURCES; i++) {
1357         if (fscanf(f, "0x%llx 0x%llx 0x%llx\n", &start, &end, &flags) != 3)
1358             continue;
1359         size = end - start + 1;
1360         if (start) {
1361             if (flags & PCI_BAR_IO) {
1362                 r = xc_domain_ioport_permission(ctx->xch, domid, start, size, 1);
1363                 if (r < 0) {
1364                     LOGED(ERROR, domainid,
1365                           "xc_domain_ioport_permission 0x%llx/0x%llx (error %d)",
1366                           start, size, r);
1367                     fclose(f);
1368                     rc = ERROR_FAIL;
1369                     goto out;
1370                 }
1371             } else {
1372                 r = xc_domain_iomem_permission(ctx->xch, domid, start>>XC_PAGE_SHIFT,
1373                                                 (size+(XC_PAGE_SIZE-1))>>XC_PAGE_SHIFT, 1);
1374                 if (r < 0) {
1375                     LOGED(ERROR, domainid,
1376                           "xc_domain_iomem_permission 0x%llx/0x%llx (error %d)",
1377                           start, size, r);
1378                     fclose(f);
1379                     rc = ERROR_FAIL;
1380                     goto out;
1381                 }
1382             }
1383         }
1384     }
1385     fclose(f);
1386     sysfs_path = GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF"/irq", pcidev->domain,
1387                                 pcidev->bus, pcidev->dev, pcidev->func);
1388     f = fopen(sysfs_path, "r");
1389     if (f == NULL) {
1390         LOGED(ERROR, domainid, "Couldn't open %s", sysfs_path);
1391         goto out_no_irq;
1392     }
1393     if ((fscanf(f, "%u", &irq) == 1) && irq) {
1394         r = xc_physdev_map_pirq(ctx->xch, domid, irq, &irq);
1395         if (r < 0) {
1396             LOGED(ERROR, domainid, "xc_physdev_map_pirq irq=%d (error=%d)",
1397                   irq, r);
1398             fclose(f);
1399             rc = ERROR_FAIL;
1400             goto out;
1401         }
1402         r = xc_domain_irq_permission(ctx->xch, domid, irq, 1);
1403         if (r < 0) {
1404             LOGED(ERROR, domainid,
1405                   "xc_domain_irq_permission irq=%d (error=%d)", irq, r);
1406             fclose(f);
1407             rc = ERROR_FAIL;
1408             goto out;
1409         }
1410     }
1411     fclose(f);
1412 
1413     /* Don't restrict writes to the PCI config space from this VM */
1414     if (pcidev->permissive) {
1415         if ( sysfs_write_bdf(gc, SYSFS_PCIBACK_DRIVER"/permissive",
1416                              pcidev) < 0 ) {
1417             LOGD(ERROR, domainid, "Setting permissive for device");
1418             rc = ERROR_FAIL;
1419             goto out;
1420         }
1421     }
1422 
1423 out_no_irq:
1424     if (!isstubdom) {
1425         if (pcidev->rdm_policy == LIBXL_RDM_RESERVE_POLICY_STRICT) {
1426             flag &= ~XEN_DOMCTL_DEV_RDM_RELAXED;
1427         } else if (pcidev->rdm_policy != LIBXL_RDM_RESERVE_POLICY_RELAXED) {
1428             LOGED(ERROR, domainid, "unknown rdm check flag.");
1429             rc = ERROR_FAIL;
1430             goto out;
1431         }
1432         r = xc_assign_device(ctx->xch, domid, pcidev_encode_bdf(pcidev), flag);
1433         if (r < 0 && (hvm || errno != ENOSYS)) {
1434             LOGED(ERROR, domainid, "xc_assign_device failed");
1435             rc = ERROR_FAIL;
1436             goto out;
1437         }
1438     }
1439 
1440     if (!starting && !libxl_get_stubdom_id(CTX, domid))
1441         rc = libxl__device_pci_add_xenstore(gc, domid, pcidev, starting);
1442     else
1443         rc = 0;
1444 out:
1445     libxl__ev_time_deregister(gc, &pas->timeout);
1446     pas->callback(egc, pas, rc);
1447 }
1448 
libxl__device_pci_reset(libxl__gc * gc,unsigned int domain,unsigned int bus,unsigned int dev,unsigned int func)1449 static int libxl__device_pci_reset(libxl__gc *gc, unsigned int domain, unsigned int bus,
1450                                    unsigned int dev, unsigned int func)
1451 {
1452     char *reset;
1453     int fd, rc;
1454 
1455     reset = GCSPRINTF("%s/do_flr", SYSFS_PCIBACK_DRIVER);
1456     fd = open(reset, O_WRONLY);
1457     if (fd >= 0) {
1458         char *buf = GCSPRINTF(PCI_BDF, domain, bus, dev, func);
1459         rc = write(fd, buf, strlen(buf));
1460         if (rc < 0)
1461             LOGD(ERROR, domain, "write to %s returned %d", reset, rc);
1462         close(fd);
1463         return rc < 0 ? rc : 0;
1464     }
1465     if (errno != ENOENT)
1466         LOGED(ERROR, domain, "Failed to access pciback path %s", reset);
1467     reset = GCSPRINTF("%s/"PCI_BDF"/reset", SYSFS_PCI_DEV, domain, bus, dev, func);
1468     fd = open(reset, O_WRONLY);
1469     if (fd >= 0) {
1470         rc = write(fd, "1", 1);
1471         if (rc < 0)
1472             LOGED(ERROR, domain, "write to %s returned %d", reset, rc);
1473         close(fd);
1474         return rc < 0 ? rc : 0;
1475     }
1476     if (errno == ENOENT) {
1477         LOGD(ERROR, domain,
1478              "The kernel doesn't support reset from sysfs for PCI device "PCI_BDF,
1479              domain, bus, dev, func);
1480     } else {
1481         LOGED(ERROR, domain, "Failed to access reset path %s", reset);
1482     }
1483     return -1;
1484 }
1485 
libxl__device_pci_setdefault(libxl__gc * gc,uint32_t domid,libxl_device_pci * pci,bool hotplug)1486 int libxl__device_pci_setdefault(libxl__gc *gc, uint32_t domid,
1487                                  libxl_device_pci *pci, bool hotplug)
1488 {
1489     /* We'd like to force reserve rdm specific to a device by default.*/
1490     if (pci->rdm_policy == LIBXL_RDM_RESERVE_POLICY_INVALID)
1491         pci->rdm_policy = LIBXL_RDM_RESERVE_POLICY_STRICT;
1492     return 0;
1493 }
1494 
libxl_device_pci_add(libxl_ctx * ctx,uint32_t domid,libxl_device_pci * pcidev,const libxl_asyncop_how * ao_how)1495 int libxl_device_pci_add(libxl_ctx *ctx, uint32_t domid,
1496                          libxl_device_pci *pcidev,
1497                          const libxl_asyncop_how *ao_how)
1498 {
1499     AO_CREATE(ctx, domid, ao_how);
1500     libxl__ao_device *aodev;
1501 
1502     GCNEW(aodev);
1503     libxl__prepare_ao_device(ao, aodev);
1504     aodev->action = LIBXL__DEVICE_ACTION_ADD;
1505     aodev->callback = device_addrm_aocomplete;
1506     aodev->update_json = true;
1507     libxl__device_pci_add(egc, domid, pcidev, false, aodev);
1508     return AO_INPROGRESS;
1509 }
1510 
libxl_pcidev_assignable(libxl_ctx * ctx,libxl_device_pci * pcidev)1511 static int libxl_pcidev_assignable(libxl_ctx *ctx, libxl_device_pci *pcidev)
1512 {
1513     libxl_device_pci *pcidevs;
1514     int num, i;
1515 
1516     pcidevs = libxl_device_pci_assignable_list(ctx, &num);
1517     for (i = 0; i < num; i++) {
1518         if (pcidevs[i].domain == pcidev->domain &&
1519             pcidevs[i].bus == pcidev->bus &&
1520             pcidevs[i].dev == pcidev->dev &&
1521             pcidevs[i].func == pcidev->func)
1522             break;
1523     }
1524     free(pcidevs);
1525     return i != num;
1526 }
1527 
1528 static void device_pci_add_stubdom_wait(libxl__egc *egc,
1529     pci_add_state *pas, int rc);
1530 static void device_pci_add_stubdom_ready(libxl__egc *egc,
1531     libxl__ev_devstate *ds, int rc);
1532 static void device_pci_add_stubdom_done(libxl__egc *egc,
1533     pci_add_state *, int rc);
1534 static void device_pci_add_done(libxl__egc *egc,
1535     pci_add_state *, int rc);
1536 
libxl__device_pci_add(libxl__egc * egc,uint32_t domid,libxl_device_pci * pcidev,bool starting,libxl__ao_device * aodev)1537 void libxl__device_pci_add(libxl__egc *egc, uint32_t domid,
1538                            libxl_device_pci *pcidev, bool starting,
1539                            libxl__ao_device *aodev)
1540 {
1541     STATE_AO_GC(aodev->ao);
1542     libxl_ctx *ctx = libxl__gc_owner(gc);
1543     libxl_device_pci *assigned;
1544     int num_assigned, rc;
1545     int stubdomid = 0;
1546     pci_add_state *pas;
1547 
1548     /* Store *pcidev to be used by callbacks */
1549     aodev->device_config = pcidev;
1550     aodev->device_type = &libxl__pcidev_devtype;
1551 
1552     GCNEW(pas);
1553     pas->aodev = aodev;
1554     pas->domid = domid;
1555     pas->starting = starting;
1556     pas->callback = device_pci_add_stubdom_done;
1557 
1558     if (libxl__domain_type(gc, domid) == LIBXL_DOMAIN_TYPE_HVM) {
1559         rc = xc_test_assign_device(ctx->xch, domid, pcidev_encode_bdf(pcidev));
1560         if (rc) {
1561             LOGD(ERROR, domid,
1562                  "PCI device %04x:%02x:%02x.%u %s?",
1563                  pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func,
1564                  errno == EOPNOTSUPP ? "cannot be assigned - no IOMMU"
1565                  : "already assigned to a different guest");
1566             goto out;
1567         }
1568     }
1569 
1570     rc = libxl__device_pci_setdefault(gc, domid, pcidev, !starting);
1571     if (rc) goto out;
1572 
1573     if (pcidev->seize && !pciback_dev_is_assigned(gc, pcidev)) {
1574         rc = libxl__device_pci_assignable_add(gc, pcidev, 1);
1575         if ( rc )
1576             goto out;
1577     }
1578 
1579     if (!libxl_pcidev_assignable(ctx, pcidev)) {
1580         LOGD(ERROR, domid, "PCI device %x:%x:%x.%x is not assignable",
1581              pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func);
1582         rc = ERROR_FAIL;
1583         goto out;
1584     }
1585 
1586     rc = get_all_assigned_devices(gc, &assigned, &num_assigned);
1587     if ( rc ) {
1588         LOGD(ERROR, domid,
1589              "cannot determine if device is assigned, refusing to continue");
1590         goto out;
1591     }
1592     if ( is_pcidev_in_array(assigned, num_assigned, pcidev->domain,
1593                      pcidev->bus, pcidev->dev, pcidev->func) ) {
1594         LOGD(ERROR, domid, "PCI device already attached to a domain");
1595         rc = ERROR_FAIL;
1596         goto out;
1597     }
1598 
1599     libxl__device_pci_reset(gc, pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func);
1600 
1601     stubdomid = libxl_get_stubdom_id(ctx, domid);
1602     if (stubdomid != 0) {
1603         libxl_device_pci *pcidev_s;
1604 
1605         GCNEW(pcidev_s);
1606         libxl_device_pci_init(pcidev_s);
1607         libxl_device_pci_copy(CTX, pcidev_s, pcidev);
1608         pas->callback = device_pci_add_stubdom_wait;
1609 
1610         do_pci_add(egc, stubdomid, pcidev_s, pas); /* must be last */
1611         return;
1612     }
1613 
1614     device_pci_add_stubdom_done(egc, pas, 0); /* must be last */
1615     return;
1616 
1617 out:
1618     device_pci_add_done(egc, pas, rc); /* must be last */
1619 }
1620 
device_pci_add_stubdom_wait(libxl__egc * egc,pci_add_state * pas,int rc)1621 static void device_pci_add_stubdom_wait(libxl__egc *egc,
1622                                         pci_add_state *pas,
1623                                         int rc)
1624 {
1625     libxl__ao_device *aodev = pas->aodev;
1626     STATE_AO_GC(aodev->ao);
1627     int stubdomid = libxl_get_stubdom_id(CTX, pas->domid);
1628     char *state_path;
1629 
1630     if (rc) goto out;
1631 
1632     /* Wait for the device actually being connected, otherwise device model
1633      * running there will fail to find the device. */
1634     state_path = GCSPRINTF("%s/state",
1635             libxl__domain_device_backend_path(gc, 0, stubdomid, 0,
1636                                               LIBXL__DEVICE_KIND_PCI));
1637     rc = libxl__ev_devstate_wait(ao, &pas->pciback_ds,
1638             device_pci_add_stubdom_ready,
1639             state_path, XenbusStateConnected,
1640             LIBXL_DEVICE_MODEL_START_TIMEOUT * 1000);
1641     if (rc) goto out;
1642     return;
1643 out:
1644     device_pci_add_done(egc, pas, rc); /* must be last */
1645 }
1646 
device_pci_add_stubdom_ready(libxl__egc * egc,libxl__ev_devstate * ds,int rc)1647 static void device_pci_add_stubdom_ready(libxl__egc *egc,
1648                                          libxl__ev_devstate *ds,
1649                                          int rc)
1650 {
1651     pci_add_state *pas = CONTAINER_OF(ds, *pas, pciback_ds);
1652 
1653     device_pci_add_stubdom_done(egc, pas, rc); /* must be last */
1654 }
1655 
device_pci_add_stubdom_done(libxl__egc * egc,pci_add_state * pas,int rc)1656 static void device_pci_add_stubdom_done(libxl__egc *egc,
1657                                         pci_add_state *pas,
1658                                         int rc)
1659 {
1660     STATE_AO_GC(pas->aodev->ao);
1661     unsigned int orig_vdev, pfunc_mask;
1662     int i;
1663 
1664     /* Convenience aliases */
1665     libxl__ao_device *aodev = pas->aodev;
1666     libxl_domid domid = pas->domid;
1667     libxl_device_pci *pcidev = aodev->device_config;
1668 
1669     if (rc) goto out;
1670 
1671     orig_vdev = pcidev->vdevfn & ~7U;
1672 
1673     if ( pcidev->vfunc_mask == LIBXL_PCI_FUNC_ALL ) {
1674         if ( !(pcidev->vdevfn >> 3) ) {
1675             LOGD(ERROR, domid, "Must specify a v-slot for multi-function devices");
1676             rc = ERROR_INVAL;
1677             goto out;
1678         }
1679         if ( pci_multifunction_check(gc, pcidev, &pfunc_mask) ) {
1680             rc = ERROR_FAIL;
1681             goto out;
1682         }
1683         pcidev->vfunc_mask &= pfunc_mask;
1684         /* so now vfunc_mask == pfunc_mask */
1685     }else{
1686         pfunc_mask = (1 << pcidev->func);
1687     }
1688 
1689     for(rc = 0, i = 7; i >= 0; --i) {
1690         if ( (1 << i) & pfunc_mask ) {
1691             if ( pcidev->vfunc_mask == pfunc_mask ) {
1692                 pcidev->func = i;
1693                 pcidev->vdevfn = orig_vdev | i;
1694             }else{
1695                 /* if not passing through multiple devices in a block make
1696                  * sure that virtual function number 0 is always used otherwise
1697                  * guest won't see the device
1698                  */
1699                 pcidev->vdevfn = orig_vdev;
1700             }
1701             pas->callback = device_pci_add_done;
1702             do_pci_add(egc, domid, pcidev, pas); /* must be last */
1703             return;
1704         }
1705     }
1706 
1707 out:
1708     device_pci_add_done(egc, pas, rc);
1709 }
1710 
device_pci_add_done(libxl__egc * egc,pci_add_state * pas,int rc)1711 static void device_pci_add_done(libxl__egc *egc,
1712                                 pci_add_state *pas,
1713                                 int rc)
1714 {
1715     EGC_GC;
1716     libxl__ao_device *aodev = pas->aodev;
1717     libxl_domid domid = pas->domid;
1718     libxl_device_pci *pcidev = aodev->device_config;
1719 
1720     if (rc) {
1721         LOGD(ERROR, domid,
1722              "libxl__device_pci_add  failed for "
1723              "PCI device %x:%x:%x.%x (rc %d)",
1724              pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func,
1725              rc);
1726     }
1727     aodev->rc = rc;
1728     aodev->callback(egc, aodev);
1729 }
1730 
1731 typedef struct {
1732     libxl__multidev multidev;
1733     libxl__ao_device *outer_aodev;
1734     libxl_domain_config *d_config;
1735     libxl_domid domid;
1736 } add_pcidevs_state;
1737 
1738 static void add_pcidevs_done(libxl__egc *, libxl__multidev *, int rc);
1739 
libxl__add_pcidevs(libxl__egc * egc,libxl__ao * ao,uint32_t domid,libxl_domain_config * d_config,libxl__multidev * multidev)1740 static void libxl__add_pcidevs(libxl__egc *egc, libxl__ao *ao, uint32_t domid,
1741                                libxl_domain_config *d_config,
1742                                libxl__multidev *multidev)
1743 {
1744     AO_GC;
1745     add_pcidevs_state *apds;
1746     int i;
1747 
1748     /* We need to start a new multidev in order to be able to execute
1749      * libxl__create_pci_backend only once. */
1750 
1751     GCNEW(apds);
1752     apds->outer_aodev = libxl__multidev_prepare(multidev);
1753     apds->d_config = d_config;
1754     apds->domid = domid;
1755     apds->multidev.callback = add_pcidevs_done;
1756     libxl__multidev_begin(ao, &apds->multidev);
1757 
1758     for (i = 0; i < d_config->num_pcidevs; i++) {
1759         libxl__ao_device *aodev = libxl__multidev_prepare(&apds->multidev);
1760         libxl__device_pci_add(egc, domid, &d_config->pcidevs[i],
1761                               true, aodev);
1762     }
1763 
1764     libxl__multidev_prepared(egc, &apds->multidev, 0);
1765 }
1766 
add_pcidevs_done(libxl__egc * egc,libxl__multidev * multidev,int rc)1767 static void add_pcidevs_done(libxl__egc *egc, libxl__multidev *multidev,
1768                              int rc)
1769 {
1770     EGC_GC;
1771     add_pcidevs_state *apds = CONTAINER_OF(multidev, *apds, multidev);
1772 
1773     /* Convenience aliases */
1774     libxl_domain_config *d_config = apds->d_config;
1775     libxl_domid domid = apds->domid;
1776     libxl__ao_device *aodev = apds->outer_aodev;
1777 
1778     if (rc) goto out;
1779 
1780     if (d_config->num_pcidevs > 0 && !libxl_get_stubdom_id(CTX, domid)) {
1781         rc = libxl__create_pci_backend(gc, domid, d_config->pcidevs,
1782             d_config->num_pcidevs);
1783         if (rc < 0) {
1784             LOGD(ERROR, domid, "libxl_create_pci_backend failed: %d", rc);
1785             goto out;
1786         }
1787     }
1788 
1789 out:
1790     aodev->rc = rc;
1791     aodev->callback(egc, aodev);
1792 }
1793 
qemu_pci_remove_xenstore(libxl__gc * gc,uint32_t domid,libxl_device_pci * pcidev,int force)1794 static int qemu_pci_remove_xenstore(libxl__gc *gc, uint32_t domid,
1795                                     libxl_device_pci *pcidev, int force)
1796 {
1797     libxl_ctx *ctx = libxl__gc_owner(gc);
1798     char *state;
1799     char *path;
1800     uint32_t dm_domid;
1801 
1802     dm_domid = libxl_get_stubdom_id(CTX, domid);
1803 
1804     path = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "/state");
1805     state = libxl__xs_read(gc, XBT_NULL, path);
1806     path = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "/parameter");
1807     libxl__xs_printf(gc, XBT_NULL, path, PCI_BDF, pcidev->domain,
1808                      pcidev->bus, pcidev->dev, pcidev->func);
1809 
1810     /* Remove all functions at once atomically by only signalling
1811      * device-model for function 0 */
1812     if ( !force && (pcidev->vdevfn & 0x7) == 0 ) {
1813         libxl__qemu_traditional_cmd(gc, domid, "pci-rem");
1814         if (libxl__wait_for_device_model_deprecated(gc, domid, "pci-removed",
1815                                          NULL, NULL, NULL) < 0) {
1816             LOGD(ERROR, domid, "Device Model didn't respond in time");
1817             /* This depends on guest operating system acknowledging the
1818              * SCI, if it doesn't respond in time then we may wish to
1819              * force the removal.
1820              */
1821             return ERROR_FAIL;
1822         }
1823     }
1824     path = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "/state");
1825     xs_write(ctx->xsh, XBT_NULL, path, state, strlen(state));
1826 
1827     return 0;
1828 }
1829 
1830 typedef struct pci_remove_state {
1831     libxl__ao_device *aodev;
1832     libxl_domid domid;
1833     libxl_device_pci *pcidev;
1834     bool force;
1835     bool hvm;
1836     unsigned int orig_vdev;
1837     unsigned int pfunc_mask;
1838     int next_func;
1839     libxl__ao_device stubdom_aodev;
1840     libxl__xswait_state xswait;
1841     libxl__ev_qmp qmp;
1842     libxl__ev_time timeout;
1843     libxl__ev_time retry_timer;
1844 } pci_remove_state;
1845 
1846 static void libxl__device_pci_remove_common(libxl__egc *egc,
1847     uint32_t domid, libxl_device_pci *pcidev, bool force,
1848     libxl__ao_device *aodev);
1849 static void device_pci_remove_common_next(libxl__egc *egc,
1850     pci_remove_state *prs, int rc);
1851 
1852 static void pci_remove_qemu_trad_watch_state_cb(libxl__egc *egc,
1853     libxl__xswait_state *xswa, int rc, const char *state);
1854 static void pci_remove_qmp_device_del(libxl__egc *egc,
1855     pci_remove_state *prs);
1856 static void pci_remove_qmp_device_del_cb(libxl__egc *egc,
1857     libxl__ev_qmp *qmp, const libxl__json_object *response, int rc);
1858 static void pci_remove_qmp_retry_timer_cb(libxl__egc *egc,
1859     libxl__ev_time *ev, const struct timeval *requested_abs, int rc);
1860 static void pci_remove_qmp_query_cb(libxl__egc *egc,
1861     libxl__ev_qmp *qmp, const libxl__json_object *response, int rc);
1862 static void pci_remove_timeout(libxl__egc *egc,
1863     libxl__ev_time *ev, const struct timeval *requested_abs, int rc);
1864 static void pci_remove_detatched(libxl__egc *egc,
1865     pci_remove_state *prs, int rc);
1866 static void pci_remove_stubdom_done(libxl__egc *egc,
1867     libxl__ao_device *aodev);
1868 static void pci_remove_done(libxl__egc *egc,
1869     pci_remove_state *prs, int rc);
1870 
do_pci_remove(libxl__egc * egc,uint32_t domid,libxl_device_pci * pcidev,int force,pci_remove_state * prs)1871 static void do_pci_remove(libxl__egc *egc, uint32_t domid,
1872                           libxl_device_pci *pcidev, int force,
1873                           pci_remove_state *prs)
1874 {
1875     STATE_AO_GC(prs->aodev->ao);
1876     libxl_ctx *ctx = libxl__gc_owner(gc);
1877     libxl_device_pci *assigned;
1878     libxl_domain_type type = libxl__domain_type(gc, domid);
1879     int rc, num;
1880     uint32_t domainid = domid;
1881 
1882     assigned = libxl_device_pci_list(ctx, domid, &num);
1883     if (assigned == NULL) {
1884         rc = ERROR_FAIL;
1885         goto out_fail;
1886     }
1887     libxl__ptr_add(gc, assigned);
1888 
1889     rc = ERROR_INVAL;
1890     if ( !is_pcidev_in_array(assigned, num, pcidev->domain,
1891                       pcidev->bus, pcidev->dev, pcidev->func) ) {
1892         LOGD(ERROR, domainid, "PCI device not attached to this domain");
1893         goto out_fail;
1894     }
1895 
1896     rc = ERROR_FAIL;
1897     if (type == LIBXL_DOMAIN_TYPE_HVM) {
1898         prs->hvm = true;
1899         switch (libxl__device_model_version_running(gc, domid)) {
1900         case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL:
1901             prs->xswait.ao = ao;
1902             prs->xswait.what = "Device Model";
1903             prs->xswait.path = DEVICE_MODEL_XS_PATH(gc,
1904                 libxl_get_stubdom_id(CTX, domid), domid, "/state");
1905             prs->xswait.timeout_ms = LIBXL_DEVICE_MODEL_START_TIMEOUT * 1000;
1906             prs->xswait.callback = pci_remove_qemu_trad_watch_state_cb;
1907             rc = libxl__xswait_start(gc, &prs->xswait);
1908             if (rc) goto out_fail;
1909             return;
1910         case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN:
1911             pci_remove_qmp_device_del(egc, prs); /* must be last */
1912             return;
1913         default:
1914             rc = ERROR_INVAL;
1915             goto out_fail;
1916         }
1917     } else {
1918         assert(type == LIBXL_DOMAIN_TYPE_PV);
1919 
1920         char *sysfs_path = GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF"/resource", pcidev->domain,
1921                                      pcidev->bus, pcidev->dev, pcidev->func);
1922         FILE *f = fopen(sysfs_path, "r");
1923         unsigned int start = 0, end = 0, flags = 0, size = 0;
1924         int irq = 0;
1925         int i;
1926 
1927         if (f == NULL) {
1928             LOGED(ERROR, domainid, "Couldn't open %s", sysfs_path);
1929             goto skip1;
1930         }
1931         for (i = 0; i < PROC_PCI_NUM_RESOURCES; i++) {
1932             if (fscanf(f, "0x%x 0x%x 0x%x\n", &start, &end, &flags) != 3)
1933                 continue;
1934             size = end - start + 1;
1935             if (start) {
1936                 if (flags & PCI_BAR_IO) {
1937                     rc = xc_domain_ioport_permission(ctx->xch, domid, start, size, 0);
1938                     if (rc < 0)
1939                         LOGED(ERROR, domainid,
1940                               "xc_domain_ioport_permission error 0x%x/0x%x",
1941                               start,
1942                               size);
1943                 } else {
1944                     rc = xc_domain_iomem_permission(ctx->xch, domid, start>>XC_PAGE_SHIFT,
1945                                                     (size+(XC_PAGE_SIZE-1))>>XC_PAGE_SHIFT, 0);
1946                     if (rc < 0)
1947                         LOGED(ERROR, domainid,
1948                               "xc_domain_iomem_permission error 0x%x/0x%x",
1949                               start,
1950                               size);
1951                 }
1952             }
1953         }
1954         fclose(f);
1955 skip1:
1956         sysfs_path = GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF"/irq", pcidev->domain,
1957                                pcidev->bus, pcidev->dev, pcidev->func);
1958         f = fopen(sysfs_path, "r");
1959         if (f == NULL) {
1960             LOGED(ERROR, domainid, "Couldn't open %s", sysfs_path);
1961             goto skip_irq;
1962         }
1963         if ((fscanf(f, "%u", &irq) == 1) && irq) {
1964             rc = xc_physdev_unmap_pirq(ctx->xch, domid, irq);
1965             if (rc < 0) {
1966                 LOGED(ERROR, domainid, "xc_physdev_unmap_pirq irq=%d", irq);
1967             }
1968             rc = xc_domain_irq_permission(ctx->xch, domid, irq, 0);
1969             if (rc < 0) {
1970                 LOGED(ERROR, domainid, "xc_domain_irq_permission irq=%d", irq);
1971             }
1972         }
1973         fclose(f);
1974     }
1975 skip_irq:
1976     rc = 0;
1977 out_fail:
1978     pci_remove_detatched(egc, prs, rc); /* must be last */
1979 }
1980 
pci_remove_qemu_trad_watch_state_cb(libxl__egc * egc,libxl__xswait_state * xswa,int rc,const char * state)1981 static void pci_remove_qemu_trad_watch_state_cb(libxl__egc *egc,
1982                                                 libxl__xswait_state *xswa,
1983                                                 int rc,
1984                                                 const char *state)
1985 {
1986     pci_remove_state *prs = CONTAINER_OF(xswa, *prs, xswait);
1987     STATE_AO_GC(prs->aodev->ao);
1988 
1989     /* Convenience aliases */
1990     libxl_domid domid = prs->domid;
1991     libxl_device_pci *const pcidev = prs->pcidev;
1992 
1993     rc = check_qemu_running(gc, domid, xswa, rc, state);
1994     if (rc == ERROR_NOT_READY)
1995         return;
1996     if (rc)
1997         goto out;
1998 
1999     rc = qemu_pci_remove_xenstore(gc, domid, pcidev, prs->force);
2000 
2001 out:
2002     pci_remove_detatched(egc, prs, rc);
2003 }
2004 
pci_remove_qmp_device_del(libxl__egc * egc,pci_remove_state * prs)2005 static void pci_remove_qmp_device_del(libxl__egc *egc,
2006                                       pci_remove_state *prs)
2007 {
2008     STATE_AO_GC(prs->aodev->ao);
2009     libxl__json_object *args = NULL;
2010     int rc;
2011 
2012     /* Convenience aliases */
2013     libxl_device_pci *const pcidev = prs->pcidev;
2014 
2015     rc = libxl__ev_time_register_rel(ao, &prs->timeout,
2016                                      pci_remove_timeout,
2017                                      LIBXL_QMP_CMD_TIMEOUT * 1000);
2018     if (rc) goto out;
2019 
2020     QMP_PARAMETERS_SPRINTF(&args, "id", PCI_PT_QDEV_ID,
2021                            pcidev->bus, pcidev->dev, pcidev->func);
2022     prs->qmp.callback = pci_remove_qmp_device_del_cb;
2023     rc = libxl__ev_qmp_send(egc, &prs->qmp, "device_del", args);
2024     if (rc) goto out;
2025     return;
2026 
2027 out:
2028     pci_remove_detatched(egc, prs, rc);
2029 }
2030 
pci_remove_qmp_device_del_cb(libxl__egc * egc,libxl__ev_qmp * qmp,const libxl__json_object * response,int rc)2031 static void pci_remove_qmp_device_del_cb(libxl__egc *egc,
2032                                          libxl__ev_qmp *qmp,
2033                                          const libxl__json_object *response,
2034                                          int rc)
2035 {
2036     EGC_GC;
2037     pci_remove_state *prs = CONTAINER_OF(qmp, *prs, qmp);
2038 
2039     if (rc) goto out;
2040 
2041     /* Now that the command is sent, we want to wait until QEMU has
2042      * confirmed that the device is removed. */
2043     /* TODO: Instead of using a poll loop { ev_timer ; query-pci }, it
2044      * could be possible to listen to events sent by QEMU via QMP in order
2045      * to wait for the passthrough pci-device to be removed from QEMU.  */
2046     pci_remove_qmp_retry_timer_cb(egc, &prs->retry_timer, NULL,
2047                                   ERROR_TIMEDOUT);
2048     return;
2049 
2050 out:
2051     pci_remove_detatched(egc, prs, rc);
2052 }
2053 
pci_remove_qmp_retry_timer_cb(libxl__egc * egc,libxl__ev_time * ev,const struct timeval * requested_abs,int rc)2054 static void pci_remove_qmp_retry_timer_cb(libxl__egc *egc, libxl__ev_time *ev,
2055                                           const struct timeval *requested_abs,
2056                                           int rc)
2057 {
2058     EGC_GC;
2059     pci_remove_state *prs = CONTAINER_OF(ev, *prs, retry_timer);
2060 
2061     prs->qmp.callback = pci_remove_qmp_query_cb;
2062     rc = libxl__ev_qmp_send(egc, &prs->qmp, "query-pci", NULL);
2063     if (rc) goto out;
2064     return;
2065 
2066 out:
2067     pci_remove_detatched(egc, prs, rc);
2068 }
2069 
pci_remove_qmp_query_cb(libxl__egc * egc,libxl__ev_qmp * qmp,const libxl__json_object * response,int rc)2070 static void pci_remove_qmp_query_cb(libxl__egc *egc,
2071                                     libxl__ev_qmp *qmp,
2072                                     const libxl__json_object *response,
2073                                     int rc)
2074 {
2075     EGC_GC;
2076     pci_remove_state *prs = CONTAINER_OF(qmp, *prs, qmp);
2077     const libxl__json_object *bus = NULL;
2078     const char *asked_id;
2079     int i, j;
2080 
2081     /* Convenience aliases */
2082     libxl__ao *const ao = prs->aodev->ao;
2083     libxl_device_pci *const pcidev = prs->pcidev;
2084 
2085     if (rc) goto out;
2086 
2087     libxl__ev_qmp_dispose(gc, qmp);
2088 
2089     asked_id = GCSPRINTF(PCI_PT_QDEV_ID,
2090                          pcidev->bus, pcidev->dev, pcidev->func);
2091 
2092     /* query-pci response:
2093      * [{ 'devices': [ 'qdev_id': 'str', ...  ], ... }]
2094      * */
2095 
2096     for (i = 0; (bus = libxl__json_array_get(response, i)); i++) {
2097         const libxl__json_object *devices = NULL;
2098         const libxl__json_object *device = NULL;
2099         const libxl__json_object *o = NULL;
2100         const char *id = NULL;
2101 
2102         devices = libxl__json_map_get("devices", bus, JSON_ARRAY);
2103         if (!devices) {
2104             rc = ERROR_QEMU_API;
2105             goto out;
2106         }
2107 
2108         for (j = 0; (device = libxl__json_array_get(devices, j)); j++) {
2109              o = libxl__json_map_get("qdev_id", device, JSON_STRING);
2110              if (!o) {
2111                  rc = ERROR_QEMU_API;
2112                  goto out;
2113              }
2114              id = libxl__json_object_get_string(o);
2115 
2116              if (id && !strcmp(asked_id, id)) {
2117                  /* Device still in QEMU, need to wait longuer. */
2118                  rc = libxl__ev_time_register_rel(ao, &prs->retry_timer,
2119                      pci_remove_qmp_retry_timer_cb, 1000);
2120                  if (rc) goto out;
2121                  return;
2122              }
2123         }
2124     }
2125 
2126 out:
2127     pci_remove_detatched(egc, prs, rc); /* must be last */
2128 }
2129 
pci_remove_timeout(libxl__egc * egc,libxl__ev_time * ev,const struct timeval * requested_abs,int rc)2130 static void pci_remove_timeout(libxl__egc *egc, libxl__ev_time *ev,
2131                                const struct timeval *requested_abs,
2132                                int rc)
2133 {
2134     EGC_GC;
2135     pci_remove_state *prs = CONTAINER_OF(ev, *prs, timeout);
2136 
2137     /* Convenience aliases */
2138     libxl_device_pci *const pcidev = prs->pcidev;
2139 
2140     LOGD(WARN, prs->domid, "timed out waiting for DM to remove "
2141          PCI_PT_QDEV_ID, pcidev->bus, pcidev->dev, pcidev->func);
2142 
2143     /* If we timed out, we might still want to keep destroying the device
2144      * (when force==true), so let the next function decide what to do on
2145      * error */
2146     pci_remove_detatched(egc, prs, rc);
2147 }
2148 
pci_remove_detatched(libxl__egc * egc,pci_remove_state * prs,int rc)2149 static void pci_remove_detatched(libxl__egc *egc,
2150                                  pci_remove_state *prs,
2151                                  int rc)
2152 {
2153     STATE_AO_GC(prs->aodev->ao);
2154     int stubdomid = 0;
2155     uint32_t domainid = prs->domid;
2156     bool isstubdom;
2157 
2158     /* Convenience aliases */
2159     libxl_device_pci *const pcidev = prs->pcidev;
2160     libxl_domid domid = prs->domid;
2161 
2162     /* Cleaning QMP states ASAP */
2163     libxl__ev_qmp_dispose(gc, &prs->qmp);
2164     libxl__ev_time_deregister(gc, &prs->timeout);
2165     libxl__ev_time_deregister(gc, &prs->retry_timer);
2166 
2167     if (rc && !prs->force)
2168         goto out;
2169 
2170     isstubdom = libxl_is_stubdom(CTX, domid, &domainid);
2171 
2172     /* don't do multiple resets while some functions are still passed through */
2173     if ( (pcidev->vdevfn & 0x7) == 0 ) {
2174         libxl__device_pci_reset(gc, pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func);
2175     }
2176 
2177     if (!isstubdom) {
2178         rc = xc_deassign_device(CTX->xch, domid, pcidev_encode_bdf(pcidev));
2179         if (rc < 0 && (prs->hvm || errno != ENOSYS))
2180             LOGED(ERROR, domainid, "xc_deassign_device failed");
2181     }
2182 
2183     stubdomid = libxl_get_stubdom_id(CTX, domid);
2184     if (stubdomid != 0) {
2185         libxl_device_pci *pcidev_s;
2186         libxl__ao_device *const stubdom_aodev = &prs->stubdom_aodev;
2187 
2188         GCNEW(pcidev_s);
2189         libxl_device_pci_init(pcidev_s);
2190         libxl_device_pci_copy(CTX, pcidev_s, pcidev);
2191 
2192         libxl__prepare_ao_device(ao, stubdom_aodev);
2193         stubdom_aodev->action = LIBXL__DEVICE_ACTION_REMOVE;
2194         stubdom_aodev->callback = pci_remove_stubdom_done;
2195         stubdom_aodev->update_json = prs->aodev->update_json;
2196         libxl__device_pci_remove_common(egc, stubdomid, pcidev_s,
2197                                         prs->force, stubdom_aodev);
2198         return;
2199     }
2200 
2201     rc = 0;
2202 out:
2203     pci_remove_done(egc, prs, rc);
2204 }
2205 
pci_remove_stubdom_done(libxl__egc * egc,libxl__ao_device * aodev)2206 static void pci_remove_stubdom_done(libxl__egc *egc,
2207                                     libxl__ao_device *aodev)
2208 {
2209     pci_remove_state *prs = CONTAINER_OF(aodev, *prs, stubdom_aodev);
2210 
2211     pci_remove_done(egc, prs, 0);
2212 }
2213 
pci_remove_done(libxl__egc * egc,pci_remove_state * prs,int rc)2214 static void pci_remove_done(libxl__egc *egc,
2215                             pci_remove_state *prs,
2216                             int rc)
2217 {
2218     EGC_GC;
2219 
2220     if (rc) goto out;
2221 
2222     libxl__device_pci_remove_xenstore(gc, prs->domid, prs->pcidev);
2223 out:
2224     device_pci_remove_common_next(egc, prs, rc);
2225 }
2226 
libxl__device_pci_remove_common(libxl__egc * egc,uint32_t domid,libxl_device_pci * pcidev,bool force,libxl__ao_device * aodev)2227 static void libxl__device_pci_remove_common(libxl__egc *egc,
2228                                             uint32_t domid,
2229                                             libxl_device_pci *pcidev,
2230                                             bool force,
2231                                             libxl__ao_device *aodev)
2232 {
2233     STATE_AO_GC(aodev->ao);
2234     int rc;
2235     pci_remove_state *prs;
2236 
2237     GCNEW(prs);
2238     prs->aodev = aodev;
2239     prs->domid = domid;
2240     prs->pcidev = pcidev;
2241     prs->force = force;
2242     libxl__xswait_init(&prs->xswait);
2243     libxl__ev_qmp_init(&prs->qmp);
2244     prs->qmp.ao = prs->aodev->ao;
2245     prs->qmp.domid = prs->domid;
2246     prs->qmp.payload_fd = -1;
2247     libxl__ev_time_init(&prs->timeout);
2248     libxl__ev_time_init(&prs->retry_timer);
2249 
2250     prs->orig_vdev = pcidev->vdevfn & ~7U;
2251 
2252     if ( pcidev->vfunc_mask == LIBXL_PCI_FUNC_ALL ) {
2253         if ( pci_multifunction_check(gc, pcidev, &prs->pfunc_mask) ) {
2254             rc = ERROR_FAIL;
2255             goto out;
2256         }
2257         pcidev->vfunc_mask &= prs->pfunc_mask;
2258     }else{
2259         prs->pfunc_mask = (1 << pcidev->func);
2260     }
2261 
2262     rc = 0;
2263     prs->next_func = 7;
2264 out:
2265     device_pci_remove_common_next(egc, prs, rc);
2266 }
2267 
device_pci_remove_common_next(libxl__egc * egc,pci_remove_state * prs,int rc)2268 static void device_pci_remove_common_next(libxl__egc *egc,
2269                                           pci_remove_state *prs,
2270                                           int rc)
2271 {
2272     EGC_GC;
2273 
2274     /* Convenience aliases */
2275     libxl_domid domid = prs->domid;
2276     libxl_device_pci *const pcidev = prs->pcidev;
2277     libxl__ao_device *const aodev = prs->aodev;
2278     const unsigned int pfunc_mask = prs->pfunc_mask;
2279     const unsigned int orig_vdev = prs->orig_vdev;
2280 
2281     if (rc) goto out;
2282 
2283     while (prs->next_func >= 0) {
2284         const int i = prs->next_func;
2285         prs->next_func--;
2286         if ( (1 << i) & pfunc_mask ) {
2287             if ( pcidev->vfunc_mask == pfunc_mask ) {
2288                 pcidev->func = i;
2289                 pcidev->vdevfn = orig_vdev | i;
2290             }else{
2291                 pcidev->vdevfn = orig_vdev;
2292             }
2293             do_pci_remove(egc, domid, pcidev, prs->force, prs);
2294             return;
2295         }
2296     }
2297 
2298     rc = 0;
2299 out:
2300     libxl__ev_qmp_dispose(gc, &prs->qmp);
2301     libxl__xswait_stop(gc, &prs->xswait);
2302     libxl__ev_time_deregister(gc, &prs->timeout);
2303     libxl__ev_time_deregister(gc, &prs->retry_timer);
2304     aodev->rc = rc;
2305     aodev->callback(egc, aodev);
2306 }
2307 
libxl_device_pci_remove(libxl_ctx * ctx,uint32_t domid,libxl_device_pci * pcidev,const libxl_asyncop_how * ao_how)2308 int libxl_device_pci_remove(libxl_ctx *ctx, uint32_t domid,
2309                             libxl_device_pci *pcidev,
2310                             const libxl_asyncop_how *ao_how)
2311 
2312 {
2313     AO_CREATE(ctx, domid, ao_how);
2314     libxl__ao_device *aodev;
2315 
2316     GCNEW(aodev);
2317     libxl__prepare_ao_device(ao, aodev);
2318     aodev->action = LIBXL__DEVICE_ACTION_REMOVE;
2319     aodev->callback = device_addrm_aocomplete;
2320     aodev->update_json = true;
2321     libxl__device_pci_remove_common(egc, domid, pcidev, false, aodev);
2322     return AO_INPROGRESS;
2323 }
2324 
libxl_device_pci_destroy(libxl_ctx * ctx,uint32_t domid,libxl_device_pci * pcidev,const libxl_asyncop_how * ao_how)2325 int libxl_device_pci_destroy(libxl_ctx *ctx, uint32_t domid,
2326                              libxl_device_pci *pcidev,
2327                              const libxl_asyncop_how *ao_how)
2328 {
2329     AO_CREATE(ctx, domid, ao_how);
2330     libxl__ao_device *aodev;
2331 
2332     GCNEW(aodev);
2333     libxl__prepare_ao_device(ao, aodev);
2334     aodev->action = LIBXL__DEVICE_ACTION_REMOVE;
2335     aodev->callback = device_addrm_aocomplete;
2336     aodev->update_json = true;
2337     libxl__device_pci_remove_common(egc, domid, pcidev, true, aodev);
2338     return AO_INPROGRESS;
2339 }
2340 
libxl__device_pci_from_xs_be(libxl__gc * gc,const char * be_path,libxl_devid nr,void * data)2341 static int libxl__device_pci_from_xs_be(libxl__gc *gc,
2342                                         const char *be_path,
2343                                         libxl_devid nr, void *data)
2344 {
2345     char *s;
2346     unsigned int domain = 0, bus = 0, dev = 0, func = 0, vdevfn = 0;
2347     libxl_device_pci *pci = data;
2348 
2349     s = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/dev-%d", be_path, nr));
2350     sscanf(s, PCI_BDF, &domain, &bus, &dev, &func);
2351 
2352     s = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/vdevfn-%d", be_path, nr));
2353     if (s)
2354         vdevfn = strtol(s, (char **) NULL, 16);
2355 
2356     pcidev_struct_fill(pci, domain, bus, dev, func, vdevfn);
2357 
2358     s = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/opts-%d", be_path, nr));
2359     if (s) {
2360         char *saveptr;
2361         char *p = strtok_r(s, ",=", &saveptr);
2362         do {
2363             while (*p == ' ')
2364                 p++;
2365             if (!strcmp(p, "msitranslate")) {
2366                 p = strtok_r(NULL, ",=", &saveptr);
2367                 pci->msitranslate = atoi(p);
2368             } else if (!strcmp(p, "power_mgmt")) {
2369                 p = strtok_r(NULL, ",=", &saveptr);
2370                 pci->power_mgmt = atoi(p);
2371             } else if (!strcmp(p, "permissive")) {
2372                 p = strtok_r(NULL, ",=", &saveptr);
2373                 pci->permissive = atoi(p);
2374             }
2375         } while ((p = strtok_r(NULL, ",=", &saveptr)) != NULL);
2376     }
2377 
2378     return 0;
2379 }
2380 
libxl__device_pci_get_num(libxl__gc * gc,const char * be_path,unsigned int * num)2381 static int libxl__device_pci_get_num(libxl__gc *gc, const char *be_path,
2382                                      unsigned int *num)
2383 {
2384     char *num_devs;
2385     int rc = 0;
2386 
2387     num_devs = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/num_devs", be_path));
2388     if (!num_devs)
2389         rc = ERROR_FAIL;
2390     else
2391         *num = atoi(num_devs);
2392 
2393     return rc;
2394 }
2395 
libxl_device_pci_list(libxl_ctx * ctx,uint32_t domid,int * num)2396 libxl_device_pci *libxl_device_pci_list(libxl_ctx *ctx, uint32_t domid, int *num)
2397 {
2398     GC_INIT(ctx);
2399     char *be_path;
2400     unsigned int n, i;
2401     libxl_device_pci *pcidevs = NULL;
2402 
2403     *num = 0;
2404 
2405     be_path = libxl__domain_device_backend_path(gc, 0, domid, 0,
2406                                                 LIBXL__DEVICE_KIND_PCI);
2407     if (libxl__device_pci_get_num(gc, be_path, &n))
2408         goto out;
2409 
2410     pcidevs = calloc(n, sizeof(libxl_device_pci));
2411 
2412     for (i = 0; i < n; i++)
2413         libxl__device_pci_from_xs_be(gc, be_path, i, pcidevs + i);
2414 
2415     *num = n;
2416 out:
2417     GC_FREE;
2418     return pcidevs;
2419 }
2420 
libxl__device_pci_destroy_all(libxl__egc * egc,uint32_t domid,libxl__multidev * multidev)2421 void libxl__device_pci_destroy_all(libxl__egc *egc, uint32_t domid,
2422                                    libxl__multidev *multidev)
2423 {
2424     STATE_AO_GC(multidev->ao);
2425     libxl_device_pci *pcidevs;
2426     int num, i;
2427 
2428     pcidevs = libxl_device_pci_list(CTX, domid, &num);
2429     if ( pcidevs == NULL )
2430         return;
2431     libxl__ptr_add(gc, pcidevs);
2432 
2433     for (i = 0; i < num; i++) {
2434         /* Force remove on shutdown since, on HVM, qemu will not always
2435          * respond to SCI interrupt because the guest kernel has shut down the
2436          * devices by the time we even get here!
2437          */
2438         libxl__ao_device *aodev = libxl__multidev_prepare(multidev);
2439         libxl__device_pci_remove_common(egc, domid, pcidevs + i, true,
2440                                         aodev);
2441     }
2442 }
2443 
libxl__grant_vga_iomem_permission(libxl__gc * gc,const uint32_t domid,libxl_domain_config * const d_config)2444 int libxl__grant_vga_iomem_permission(libxl__gc *gc, const uint32_t domid,
2445                                       libxl_domain_config *const d_config)
2446 {
2447     int i, ret;
2448 
2449     if (!libxl_defbool_val(d_config->b_info.u.hvm.gfx_passthru))
2450         return 0;
2451 
2452     for (i = 0 ; i < d_config->num_pcidevs ; i++) {
2453         uint64_t vga_iomem_start = 0xa0000 >> XC_PAGE_SHIFT;
2454         uint32_t stubdom_domid;
2455         libxl_device_pci *pcidev = &d_config->pcidevs[i];
2456         unsigned long pci_device_class;
2457 
2458         if (sysfs_dev_get_class(gc, pcidev, &pci_device_class))
2459             continue;
2460         if (pci_device_class != 0x030000) /* VGA class */
2461             continue;
2462 
2463         stubdom_domid = libxl_get_stubdom_id(CTX, domid);
2464         ret = xc_domain_iomem_permission(CTX->xch, stubdom_domid,
2465                                          vga_iomem_start, 0x20, 1);
2466         if (ret < 0) {
2467             LOGED(ERROR, domid,
2468                   "failed to give stubdom%d access to iomem range "
2469                   "%"PRIx64"-%"PRIx64" for VGA passthru",
2470                   stubdom_domid,
2471                   vga_iomem_start, (vga_iomem_start + 0x20 - 1));
2472             return ret;
2473         }
2474         ret = xc_domain_iomem_permission(CTX->xch, domid,
2475                                          vga_iomem_start, 0x20, 1);
2476         if (ret < 0) {
2477             LOGED(ERROR, domid,
2478                   "failed to give dom%d access to iomem range "
2479                   "%"PRIx64"-%"PRIx64" for VGA passthru",
2480                   domid, vga_iomem_start, (vga_iomem_start + 0x20 - 1));
2481             return ret;
2482         }
2483         break;
2484     }
2485 
2486     return 0;
2487 }
2488 
libxl_device_pci_compare(const libxl_device_pci * d1,const libxl_device_pci * d2)2489 static int libxl_device_pci_compare(const libxl_device_pci *d1,
2490                                     const libxl_device_pci *d2)
2491 {
2492     return COMPARE_PCI(d1, d2);
2493 }
2494 
2495 #define libxl__device_pci_update_devid NULL
2496 
2497 DEFINE_DEVICE_TYPE_STRUCT_X(pcidev, pci, PCI,
2498     .get_num = libxl__device_pci_get_num,
2499     .from_xenstore = libxl__device_pci_from_xs_be,
2500 );
2501 
2502 /*
2503  * Local variables:
2504  * mode: C
2505  * c-basic-offset: 4
2506  * indent-tabs-mode: nil
2507  * End:
2508  */
2509