1 /*
2 * Copyright (C) 2009 Citrix Ltd.
3 * Author Vincent Hanquez <vincent.hanquez@eu.citrix.com>
4 * Author Stefano Stabellini <stefano.stabellini@eu.citrix.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU Lesser General Public License as published
8 * by the Free Software Foundation; version 2.1 only. with the special
9 * exception on linking described in file LICENSE.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
15 */
16
17 #include "libxl_osdeps.h" /* must come before any other headers */
18
19 #include "libxl_internal.h"
20
21 #define PCI_BDF "%04x:%02x:%02x.%01x"
22 #define PCI_BDF_SHORT "%02x:%02x.%01x"
23 #define PCI_BDF_VDEVFN "%04x:%02x:%02x.%01x@%02x"
24 #define PCI_OPTIONS "msitranslate=%d,power_mgmt=%d"
25 #define PCI_BDF_XSPATH "%04x-%02x-%02x-%01x"
26 #define PCI_PT_QDEV_ID "pci-pt-%02x_%02x.%01x"
27
pcidev_encode_bdf(libxl_device_pci * pcidev)28 static unsigned int pcidev_encode_bdf(libxl_device_pci *pcidev)
29 {
30 unsigned int value;
31
32 value = pcidev->domain << 16;
33 value |= (pcidev->bus & 0xff) << 8;
34 value |= (pcidev->dev & 0x1f) << 3;
35 value |= (pcidev->func & 0x7);
36
37 return value;
38 }
39
pcidev_struct_fill(libxl_device_pci * pcidev,unsigned int domain,unsigned int bus,unsigned int dev,unsigned int func,unsigned int vdevfn)40 static void pcidev_struct_fill(libxl_device_pci *pcidev, unsigned int domain,
41 unsigned int bus, unsigned int dev,
42 unsigned int func, unsigned int vdevfn)
43 {
44 pcidev->domain = domain;
45 pcidev->bus = bus;
46 pcidev->dev = dev;
47 pcidev->func = func;
48 pcidev->vdevfn = vdevfn;
49 }
50
libxl_create_pci_backend_device(libxl__gc * gc,flexarray_t * back,int num,const libxl_device_pci * pcidev)51 static void libxl_create_pci_backend_device(libxl__gc *gc,
52 flexarray_t *back,
53 int num,
54 const libxl_device_pci *pcidev)
55 {
56 flexarray_append(back, GCSPRINTF("key-%d", num));
57 flexarray_append(back, GCSPRINTF(PCI_BDF, pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func));
58 flexarray_append(back, GCSPRINTF("dev-%d", num));
59 flexarray_append(back, GCSPRINTF(PCI_BDF, pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func));
60 if (pcidev->vdevfn)
61 flexarray_append_pair(back, GCSPRINTF("vdevfn-%d", num), GCSPRINTF("%x", pcidev->vdevfn));
62 flexarray_append(back, GCSPRINTF("opts-%d", num));
63 flexarray_append(back,
64 GCSPRINTF("msitranslate=%d,power_mgmt=%d,permissive=%d",
65 pcidev->msitranslate, pcidev->power_mgmt,
66 pcidev->permissive));
67 flexarray_append_pair(back, GCSPRINTF("state-%d", num), GCSPRINTF("%d", XenbusStateInitialising));
68 }
69
libxl__device_from_pcidev(libxl__gc * gc,uint32_t domid,const libxl_device_pci * pcidev,libxl__device * device)70 static void libxl__device_from_pcidev(libxl__gc *gc, uint32_t domid,
71 const libxl_device_pci *pcidev,
72 libxl__device *device)
73 {
74 device->backend_devid = 0;
75 device->backend_domid = 0;
76 device->backend_kind = LIBXL__DEVICE_KIND_PCI;
77 device->devid = 0;
78 device->domid = domid;
79 device->kind = LIBXL__DEVICE_KIND_PCI;
80 }
81
libxl__create_pci_backend(libxl__gc * gc,uint32_t domid,const libxl_device_pci * pcidev,int num)82 static int libxl__create_pci_backend(libxl__gc *gc, uint32_t domid,
83 const libxl_device_pci *pcidev,
84 int num)
85 {
86 flexarray_t *front = NULL;
87 flexarray_t *back = NULL;
88 libxl__device device;
89 int i;
90
91 front = flexarray_make(gc, 16, 1);
92 back = flexarray_make(gc, 16, 1);
93
94 LOGD(DEBUG, domid, "Creating pci backend");
95
96 /* add pci device */
97 libxl__device_from_pcidev(gc, domid, pcidev, &device);
98
99 flexarray_append_pair(back, "frontend-id", GCSPRINTF("%d", domid));
100 flexarray_append_pair(back, "online", "1");
101 flexarray_append_pair(back, "state", GCSPRINTF("%d", XenbusStateInitialising));
102 flexarray_append_pair(back, "domain", libxl__domid_to_name(gc, domid));
103
104 for (i = 0; i < num; i++, pcidev++)
105 libxl_create_pci_backend_device(gc, back, i, pcidev);
106
107 flexarray_append_pair(back, "num_devs", GCSPRINTF("%d", num));
108 flexarray_append_pair(front, "backend-id", GCSPRINTF("%d", 0));
109 flexarray_append_pair(front, "state", GCSPRINTF("%d", XenbusStateInitialising));
110
111 return libxl__device_generic_add(gc, XBT_NULL, &device,
112 libxl__xs_kvs_of_flexarray(gc, back),
113 libxl__xs_kvs_of_flexarray(gc, front),
114 NULL);
115 }
116
libxl__device_pci_add_xenstore(libxl__gc * gc,uint32_t domid,const libxl_device_pci * pcidev,bool starting)117 static int libxl__device_pci_add_xenstore(libxl__gc *gc,
118 uint32_t domid,
119 const libxl_device_pci *pcidev,
120 bool starting)
121 {
122 flexarray_t *back;
123 char *num_devs, *be_path;
124 int num = 0;
125 xs_transaction_t t = XBT_NULL;
126 int rc;
127 libxl_domain_config d_config;
128 libxl__flock *lock = NULL;
129 bool is_stubdomain = libxl_is_stubdom(CTX, domid, NULL);
130
131 /* Stubdomain doesn't have own config. */
132 if (!is_stubdomain)
133 libxl_domain_config_init(&d_config);
134
135 be_path = libxl__domain_device_backend_path(gc, 0, domid, 0,
136 LIBXL__DEVICE_KIND_PCI);
137 num_devs = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/num_devs", be_path));
138 if (!num_devs)
139 return libxl__create_pci_backend(gc, domid, pcidev, 1);
140
141 libxl_domain_type domtype = libxl__domain_type(gc, domid);
142 if (domtype == LIBXL_DOMAIN_TYPE_INVALID)
143 return ERROR_FAIL;
144
145 if (!starting && domtype == LIBXL_DOMAIN_TYPE_PV) {
146 if (libxl__wait_for_backend(gc, be_path, GCSPRINTF("%d", XenbusStateConnected)) < 0)
147 return ERROR_FAIL;
148 }
149
150 back = flexarray_make(gc, 16, 1);
151
152 LOGD(DEBUG, domid, "Adding new pci device to xenstore");
153 num = atoi(num_devs);
154 libxl_create_pci_backend_device(gc, back, num, pcidev);
155 flexarray_append_pair(back, "num_devs", GCSPRINTF("%d", num + 1));
156 if (!starting)
157 flexarray_append_pair(back, "state", GCSPRINTF("%d", XenbusStateReconfiguring));
158
159 /*
160 * Stubdomin config is derived from its target domain, it doesn't have
161 * its own file.
162 */
163 if (!is_stubdomain) {
164 lock = libxl__lock_domain_userdata(gc, domid);
165 if (!lock) {
166 rc = ERROR_LOCK_FAIL;
167 goto out;
168 }
169
170 rc = libxl__get_domain_configuration(gc, domid, &d_config);
171 if (rc) goto out;
172
173 device_add_domain_config(gc, &d_config, &libxl__pcidev_devtype,
174 pcidev);
175
176 rc = libxl__dm_check_start(gc, &d_config, domid);
177 if (rc) goto out;
178 }
179
180 for (;;) {
181 rc = libxl__xs_transaction_start(gc, &t);
182 if (rc) goto out;
183
184 if (lock) {
185 rc = libxl__set_domain_configuration(gc, domid, &d_config);
186 if (rc) goto out;
187 }
188
189 libxl__xs_writev(gc, t, be_path, libxl__xs_kvs_of_flexarray(gc, back));
190
191 rc = libxl__xs_transaction_commit(gc, &t);
192 if (!rc) break;
193 if (rc < 0) goto out;
194 }
195
196 out:
197 libxl__xs_transaction_abort(gc, &t);
198 if (lock) libxl__unlock_file(lock);
199 if (!is_stubdomain)
200 libxl_domain_config_dispose(&d_config);
201 return rc;
202 }
203
libxl__device_pci_remove_xenstore(libxl__gc * gc,uint32_t domid,libxl_device_pci * pcidev)204 static int libxl__device_pci_remove_xenstore(libxl__gc *gc, uint32_t domid, libxl_device_pci *pcidev)
205 {
206 libxl_ctx *ctx = libxl__gc_owner(gc);
207 char *be_path, *num_devs_path, *num_devs, *xsdev, *tmp, *tmppath;
208 int num, i, j;
209 xs_transaction_t t;
210
211 be_path = libxl__domain_device_backend_path(gc, 0, domid, 0,
212 LIBXL__DEVICE_KIND_PCI);
213 num_devs_path = GCSPRINTF("%s/num_devs", be_path);
214 num_devs = libxl__xs_read(gc, XBT_NULL, num_devs_path);
215 if (!num_devs)
216 return ERROR_INVAL;
217 num = atoi(num_devs);
218
219 libxl_domain_type domtype = libxl__domain_type(gc, domid);
220 if (domtype == LIBXL_DOMAIN_TYPE_INVALID)
221 return ERROR_FAIL;
222
223 if (domtype == LIBXL_DOMAIN_TYPE_PV) {
224 if (libxl__wait_for_backend(gc, be_path, GCSPRINTF("%d", XenbusStateConnected)) < 0) {
225 LOGD(DEBUG, domid, "pci backend at %s is not ready", be_path);
226 return ERROR_FAIL;
227 }
228 }
229
230 for (i = 0; i < num; i++) {
231 unsigned int domain = 0, bus = 0, dev = 0, func = 0;
232 xsdev = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/dev-%d", be_path, i));
233 sscanf(xsdev, PCI_BDF, &domain, &bus, &dev, &func);
234 if (domain == pcidev->domain && bus == pcidev->bus &&
235 pcidev->dev == dev && pcidev->func == func) {
236 break;
237 }
238 }
239 if (i == num) {
240 LOGD(ERROR, domid, "Couldn't find the device on xenstore");
241 return ERROR_INVAL;
242 }
243
244 retry_transaction:
245 t = xs_transaction_start(ctx->xsh);
246 xs_write(ctx->xsh, t, GCSPRINTF("%s/state-%d", be_path, i), GCSPRINTF("%d", XenbusStateClosing), 1);
247 xs_write(ctx->xsh, t, GCSPRINTF("%s/state", be_path), GCSPRINTF("%d", XenbusStateReconfiguring), 1);
248 if (!xs_transaction_end(ctx->xsh, t, 0))
249 if (errno == EAGAIN)
250 goto retry_transaction;
251
252 if (domtype == LIBXL_DOMAIN_TYPE_PV) {
253 if (libxl__wait_for_backend(gc, be_path, GCSPRINTF("%d", XenbusStateConnected)) < 0) {
254 LOGD(DEBUG, domid, "pci backend at %s is not ready", be_path);
255 return ERROR_FAIL;
256 }
257 }
258
259 retry_transaction2:
260 t = xs_transaction_start(ctx->xsh);
261 xs_rm(ctx->xsh, t, GCSPRINTF("%s/state-%d", be_path, i));
262 xs_rm(ctx->xsh, t, GCSPRINTF("%s/key-%d", be_path, i));
263 xs_rm(ctx->xsh, t, GCSPRINTF("%s/dev-%d", be_path, i));
264 xs_rm(ctx->xsh, t, GCSPRINTF("%s/vdev-%d", be_path, i));
265 xs_rm(ctx->xsh, t, GCSPRINTF("%s/opts-%d", be_path, i));
266 xs_rm(ctx->xsh, t, GCSPRINTF("%s/vdevfn-%d", be_path, i));
267 libxl__xs_printf(gc, t, num_devs_path, "%d", num - 1);
268 for (j = i + 1; j < num; j++) {
269 tmppath = GCSPRINTF("%s/state-%d", be_path, j);
270 tmp = libxl__xs_read(gc, t, tmppath);
271 xs_write(ctx->xsh, t, GCSPRINTF("%s/state-%d", be_path, j - 1), tmp, strlen(tmp));
272 xs_rm(ctx->xsh, t, tmppath);
273 tmppath = GCSPRINTF("%s/dev-%d", be_path, j);
274 tmp = libxl__xs_read(gc, t, tmppath);
275 xs_write(ctx->xsh, t, GCSPRINTF("%s/dev-%d", be_path, j - 1), tmp, strlen(tmp));
276 xs_rm(ctx->xsh, t, tmppath);
277 tmppath = GCSPRINTF("%s/key-%d", be_path, j);
278 tmp = libxl__xs_read(gc, t, tmppath);
279 xs_write(ctx->xsh, t, GCSPRINTF("%s/key-%d", be_path, j - 1), tmp, strlen(tmp));
280 xs_rm(ctx->xsh, t, tmppath);
281 tmppath = GCSPRINTF("%s/vdev-%d", be_path, j);
282 tmp = libxl__xs_read(gc, t, tmppath);
283 if (tmp) {
284 xs_write(ctx->xsh, t, GCSPRINTF("%s/vdev-%d", be_path, j - 1), tmp, strlen(tmp));
285 xs_rm(ctx->xsh, t, tmppath);
286 }
287 tmppath = GCSPRINTF("%s/opts-%d", be_path, j);
288 tmp = libxl__xs_read(gc, t, tmppath);
289 if (tmp) {
290 xs_write(ctx->xsh, t, GCSPRINTF("%s/opts-%d", be_path, j - 1), tmp, strlen(tmp));
291 xs_rm(ctx->xsh, t, tmppath);
292 }
293 tmppath = GCSPRINTF("%s/vdevfn-%d", be_path, j);
294 tmp = libxl__xs_read(gc, t, tmppath);
295 if (tmp) {
296 xs_write(ctx->xsh, t, GCSPRINTF("%s/vdevfn-%d", be_path, j - 1), tmp, strlen(tmp));
297 xs_rm(ctx->xsh, t, tmppath);
298 }
299 }
300 if (!xs_transaction_end(ctx->xsh, t, 0))
301 if (errno == EAGAIN)
302 goto retry_transaction2;
303
304 if (num == 1) {
305 libxl__device dev;
306 if (libxl__parse_backend_path(gc, be_path, &dev) != 0)
307 return ERROR_FAIL;
308
309 dev.domid = domid;
310 dev.kind = LIBXL__DEVICE_KIND_PCI;
311 dev.devid = 0;
312
313 libxl__device_destroy(gc, &dev);
314 return 0;
315 }
316
317 return 0;
318 }
319
get_all_assigned_devices(libxl__gc * gc,libxl_device_pci ** list,int * num)320 static int get_all_assigned_devices(libxl__gc *gc, libxl_device_pci **list, int *num)
321 {
322 char **domlist;
323 unsigned int nd = 0, i;
324
325 *list = NULL;
326 *num = 0;
327
328 domlist = libxl__xs_directory(gc, XBT_NULL, "/local/domain", &nd);
329 for(i = 0; i < nd; i++) {
330 char *path, *num_devs;
331
332 path = GCSPRINTF("/local/domain/0/backend/%s/%s/0/num_devs",
333 libxl__device_kind_to_string(LIBXL__DEVICE_KIND_PCI),
334 domlist[i]);
335 num_devs = libxl__xs_read(gc, XBT_NULL, path);
336 if ( num_devs ) {
337 int ndev = atoi(num_devs), j;
338 char *devpath, *bdf;
339
340 for(j = 0; j < ndev; j++) {
341 devpath = GCSPRINTF("/local/domain/0/backend/%s/%s/0/dev-%u",
342 libxl__device_kind_to_string(LIBXL__DEVICE_KIND_PCI),
343 domlist[i], j);
344 bdf = libxl__xs_read(gc, XBT_NULL, devpath);
345 if ( bdf ) {
346 unsigned dom, bus, dev, func;
347 if ( sscanf(bdf, PCI_BDF, &dom, &bus, &dev, &func) != 4 )
348 continue;
349
350 *list = realloc(*list, sizeof(libxl_device_pci) * ((*num) + 1));
351 if (*list == NULL)
352 return ERROR_NOMEM;
353 pcidev_struct_fill(*list + *num, dom, bus, dev, func, 0);
354 (*num)++;
355 }
356 }
357 }
358 }
359 libxl__ptr_add(gc, *list);
360
361 return 0;
362 }
363
is_pcidev_in_array(libxl_device_pci * assigned,int num_assigned,int dom,int bus,int dev,int func)364 static int is_pcidev_in_array(libxl_device_pci *assigned, int num_assigned,
365 int dom, int bus, int dev, int func)
366 {
367 int i;
368
369 for(i = 0; i < num_assigned; i++) {
370 if ( assigned[i].domain != dom )
371 continue;
372 if ( assigned[i].bus != bus )
373 continue;
374 if ( assigned[i].dev != dev )
375 continue;
376 if ( assigned[i].func != func )
377 continue;
378 return 1;
379 }
380
381 return 0;
382 }
383
384 /* Write the standard BDF into the sysfs path given by sysfs_path. */
sysfs_write_bdf(libxl__gc * gc,const char * sysfs_path,libxl_device_pci * pcidev)385 static int sysfs_write_bdf(libxl__gc *gc, const char * sysfs_path,
386 libxl_device_pci *pcidev)
387 {
388 int rc, fd;
389 char *buf;
390
391 fd = open(sysfs_path, O_WRONLY);
392 if (fd < 0) {
393 LOGE(ERROR, "Couldn't open %s", sysfs_path);
394 return ERROR_FAIL;
395 }
396
397 buf = GCSPRINTF(PCI_BDF, pcidev->domain, pcidev->bus,
398 pcidev->dev, pcidev->func);
399 rc = write(fd, buf, strlen(buf));
400 /* Annoying to have two if's, but we need the errno */
401 if (rc < 0)
402 LOGE(ERROR, "write to %s returned %d", sysfs_path, rc);
403 close(fd);
404
405 if (rc < 0)
406 return ERROR_FAIL;
407
408 return 0;
409 }
410
libxl_device_pci_assignable_list(libxl_ctx * ctx,int * num)411 libxl_device_pci *libxl_device_pci_assignable_list(libxl_ctx *ctx, int *num)
412 {
413 GC_INIT(ctx);
414 libxl_device_pci *pcidevs = NULL, *new, *assigned;
415 struct dirent *de;
416 DIR *dir;
417 int r, num_assigned;
418
419 *num = 0;
420
421 r = get_all_assigned_devices(gc, &assigned, &num_assigned);
422 if (r) goto out;
423
424 dir = opendir(SYSFS_PCIBACK_DRIVER);
425 if (NULL == dir) {
426 if (errno == ENOENT) {
427 LOG(ERROR, "Looks like pciback driver not loaded");
428 } else {
429 LOGE(ERROR, "Couldn't open %s", SYSFS_PCIBACK_DRIVER);
430 }
431 goto out;
432 }
433
434 while((de = readdir(dir))) {
435 unsigned dom, bus, dev, func;
436 if (sscanf(de->d_name, PCI_BDF, &dom, &bus, &dev, &func) != 4)
437 continue;
438
439 if (is_pcidev_in_array(assigned, num_assigned, dom, bus, dev, func))
440 continue;
441
442 new = realloc(pcidevs, ((*num) + 1) * sizeof(*new));
443 if (NULL == new)
444 continue;
445
446 pcidevs = new;
447 new = pcidevs + *num;
448
449 memset(new, 0, sizeof(*new));
450 pcidev_struct_fill(new, dom, bus, dev, func, 0);
451 (*num)++;
452 }
453
454 closedir(dir);
455 out:
456 GC_FREE;
457 return pcidevs;
458 }
459
460 /* Unbind device from its current driver, if any. If driver_path is non-NULL,
461 * store the path to the original driver in it. */
sysfs_dev_unbind(libxl__gc * gc,libxl_device_pci * pcidev,char ** driver_path)462 static int sysfs_dev_unbind(libxl__gc *gc, libxl_device_pci *pcidev,
463 char **driver_path)
464 {
465 char * spath, *dp = NULL;
466 struct stat st;
467
468 spath = GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF"/driver",
469 pcidev->domain,
470 pcidev->bus,
471 pcidev->dev,
472 pcidev->func);
473 if ( !lstat(spath, &st) ) {
474 /* Find the canonical path to the driver. */
475 dp = libxl__zalloc(gc, PATH_MAX);
476 dp = realpath(spath, dp);
477 if ( !dp ) {
478 LOGE(ERROR, "realpath() failed");
479 return -1;
480 }
481
482 LOG(DEBUG, "Driver re-plug path: %s", dp);
483
484 /* Unbind from the old driver */
485 spath = GCSPRINTF("%s/unbind", dp);
486 if ( sysfs_write_bdf(gc, spath, pcidev) < 0 ) {
487 LOGE(ERROR, "Couldn't unbind device");
488 return -1;
489 }
490 }
491
492 if ( driver_path )
493 *driver_path = dp;
494
495 return 0;
496 }
497
sysfs_dev_get_vendor(libxl__gc * gc,libxl_device_pci * pcidev)498 static uint16_t sysfs_dev_get_vendor(libxl__gc *gc, libxl_device_pci *pcidev)
499 {
500 char *pci_device_vendor_path =
501 GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF"/vendor",
502 pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func);
503 uint16_t read_items;
504 uint16_t pci_device_vendor;
505
506 FILE *f = fopen(pci_device_vendor_path, "r");
507 if (!f) {
508 LOGE(ERROR,
509 "pci device "PCI_BDF" does not have vendor attribute",
510 pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func);
511 return 0xffff;
512 }
513 read_items = fscanf(f, "0x%hx\n", &pci_device_vendor);
514 fclose(f);
515 if (read_items != 1) {
516 LOGE(ERROR,
517 "cannot read vendor of pci device "PCI_BDF,
518 pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func);
519 return 0xffff;
520 }
521
522 return pci_device_vendor;
523 }
524
sysfs_dev_get_device(libxl__gc * gc,libxl_device_pci * pcidev)525 static uint16_t sysfs_dev_get_device(libxl__gc *gc, libxl_device_pci *pcidev)
526 {
527 char *pci_device_device_path =
528 GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF"/device",
529 pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func);
530 uint16_t read_items;
531 uint16_t pci_device_device;
532
533 FILE *f = fopen(pci_device_device_path, "r");
534 if (!f) {
535 LOGE(ERROR,
536 "pci device "PCI_BDF" does not have device attribute",
537 pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func);
538 return 0xffff;
539 }
540 read_items = fscanf(f, "0x%hx\n", &pci_device_device);
541 fclose(f);
542 if (read_items != 1) {
543 LOGE(ERROR,
544 "cannot read device of pci device "PCI_BDF,
545 pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func);
546 return 0xffff;
547 }
548
549 return pci_device_device;
550 }
551
sysfs_dev_get_class(libxl__gc * gc,libxl_device_pci * pcidev,unsigned long * class)552 static int sysfs_dev_get_class(libxl__gc *gc, libxl_device_pci *pcidev,
553 unsigned long *class)
554 {
555 char *pci_device_class_path = GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF"/class",
556 pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func);
557 int read_items, ret = 0;
558
559 FILE *f = fopen(pci_device_class_path, "r");
560 if (!f) {
561 LOGE(ERROR,
562 "pci device "PCI_BDF" does not have class attribute",
563 pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func);
564 ret = ERROR_FAIL;
565 goto out;
566 }
567 read_items = fscanf(f, "0x%lx\n", class);
568 fclose(f);
569 if (read_items != 1) {
570 LOGE(ERROR,
571 "cannot read class of pci device "PCI_BDF,
572 pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func);
573 ret = ERROR_FAIL;
574 }
575
576 out:
577 return ret;
578 }
579
580 /*
581 * Some devices may need some ways to work well. Here like IGD,
582 * we have to pass a specific option to qemu.
583 */
libxl__is_igd_vga_passthru(libxl__gc * gc,const libxl_domain_config * d_config)584 bool libxl__is_igd_vga_passthru(libxl__gc *gc,
585 const libxl_domain_config *d_config)
586 {
587 unsigned int i;
588 uint16_t pt_vendor, pt_device;
589 unsigned long class;
590
591 for (i = 0 ; i < d_config->num_pcidevs ; i++) {
592 libxl_device_pci *pcidev = &d_config->pcidevs[i];
593 pt_vendor = sysfs_dev_get_vendor(gc, pcidev);
594 pt_device = sysfs_dev_get_device(gc, pcidev);
595
596 if (pt_vendor == 0xffff || pt_device == 0xffff ||
597 pt_vendor != 0x8086)
598 continue;
599
600 if (sysfs_dev_get_class(gc, pcidev, &class))
601 continue;
602 if (class == 0x030000)
603 return true;
604 }
605
606 return false;
607 }
608
609 /*
610 * A brief comment about slots. I don't know what slots are for; however,
611 * I have by experimentation determined:
612 * - Before a device can be bound to pciback, its BDF must first be listed
613 * in pciback/slots
614 * - The way to get the BDF listed there is to write BDF to
615 * pciback/new_slot
616 * - Writing the same BDF to pciback/new_slot is not idempotent; it results
617 * in two entries of the BDF in pciback/slots
618 * It's not clear whether having two entries in pciback/slots is a problem
619 * or not. Just to be safe, this code does the conservative thing, and
620 * first checks to see if there is a slot, adding one only if one does not
621 * already exist.
622 */
623
624 /* Scan through /sys/.../pciback/slots looking for pcidev's BDF */
pciback_dev_has_slot(libxl__gc * gc,libxl_device_pci * pcidev)625 static int pciback_dev_has_slot(libxl__gc *gc, libxl_device_pci *pcidev)
626 {
627 FILE *f;
628 int rc = 0;
629 unsigned dom, bus, dev, func;
630
631 f = fopen(SYSFS_PCIBACK_DRIVER"/slots", "r");
632
633 if (f == NULL) {
634 LOGE(ERROR, "Couldn't open %s", SYSFS_PCIBACK_DRIVER"/slots");
635 return ERROR_FAIL;
636 }
637
638 while(fscanf(f, "%x:%x:%x.%d\n", &dom, &bus, &dev, &func)==4) {
639 if(dom == pcidev->domain
640 && bus == pcidev->bus
641 && dev == pcidev->dev
642 && func == pcidev->func) {
643 rc = 1;
644 goto out;
645 }
646 }
647 out:
648 fclose(f);
649 return rc;
650 }
651
pciback_dev_is_assigned(libxl__gc * gc,libxl_device_pci * pcidev)652 static int pciback_dev_is_assigned(libxl__gc *gc, libxl_device_pci *pcidev)
653 {
654 char * spath;
655 int rc;
656 struct stat st;
657
658 if ( access(SYSFS_PCIBACK_DRIVER, F_OK) < 0 ) {
659 if ( errno == ENOENT ) {
660 LOG(ERROR, "Looks like pciback driver is not loaded");
661 } else {
662 LOGE(ERROR, "Can't access "SYSFS_PCIBACK_DRIVER);
663 }
664 return -1;
665 }
666
667 spath = GCSPRINTF(SYSFS_PCIBACK_DRIVER"/"PCI_BDF,
668 pcidev->domain, pcidev->bus,
669 pcidev->dev, pcidev->func);
670 rc = lstat(spath, &st);
671
672 if( rc == 0 )
673 return 1;
674 if ( rc < 0 && errno == ENOENT )
675 return 0;
676 LOGE(ERROR, "Accessing %s", spath);
677 return -1;
678 }
679
pciback_dev_assign(libxl__gc * gc,libxl_device_pci * pcidev)680 static int pciback_dev_assign(libxl__gc *gc, libxl_device_pci *pcidev)
681 {
682 int rc;
683
684 if ( (rc=pciback_dev_has_slot(gc, pcidev)) < 0 ) {
685 LOGE(ERROR, "Error checking for pciback slot");
686 return ERROR_FAIL;
687 } else if (rc == 0) {
688 if ( sysfs_write_bdf(gc, SYSFS_PCIBACK_DRIVER"/new_slot",
689 pcidev) < 0 ) {
690 LOGE(ERROR, "Couldn't bind device to pciback!");
691 return ERROR_FAIL;
692 }
693 }
694
695 if ( sysfs_write_bdf(gc, SYSFS_PCIBACK_DRIVER"/bind", pcidev) < 0 ) {
696 LOGE(ERROR, "Couldn't bind device to pciback!");
697 return ERROR_FAIL;
698 }
699 return 0;
700 }
701
pciback_dev_unassign(libxl__gc * gc,libxl_device_pci * pcidev)702 static int pciback_dev_unassign(libxl__gc *gc, libxl_device_pci *pcidev)
703 {
704 /* Remove from pciback */
705 if ( sysfs_dev_unbind(gc, pcidev, NULL) < 0 ) {
706 LOG(ERROR, "Couldn't unbind device!");
707 return ERROR_FAIL;
708 }
709
710 /* Remove slot if necessary */
711 if ( pciback_dev_has_slot(gc, pcidev) > 0 ) {
712 if ( sysfs_write_bdf(gc, SYSFS_PCIBACK_DRIVER"/remove_slot",
713 pcidev) < 0 ) {
714 LOGE(ERROR, "Couldn't remove pciback slot");
715 return ERROR_FAIL;
716 }
717 }
718 return 0;
719 }
720
721 #define PCIBACK_INFO_PATH "/libxl/pciback"
722
pci_assignable_driver_path_write(libxl__gc * gc,libxl_device_pci * pcidev,char * driver_path)723 static void pci_assignable_driver_path_write(libxl__gc *gc,
724 libxl_device_pci *pcidev,
725 char *driver_path)
726 {
727 char *path;
728
729 path = GCSPRINTF(PCIBACK_INFO_PATH"/"PCI_BDF_XSPATH"/driver_path",
730 pcidev->domain,
731 pcidev->bus,
732 pcidev->dev,
733 pcidev->func);
734 if ( libxl__xs_printf(gc, XBT_NULL, path, "%s", driver_path) < 0 ) {
735 LOGE(WARN, "Write of %s to node %s failed.", driver_path, path);
736 }
737 }
738
pci_assignable_driver_path_read(libxl__gc * gc,libxl_device_pci * pcidev)739 static char * pci_assignable_driver_path_read(libxl__gc *gc,
740 libxl_device_pci *pcidev)
741 {
742 return libxl__xs_read(gc, XBT_NULL,
743 GCSPRINTF(
744 PCIBACK_INFO_PATH "/" PCI_BDF_XSPATH "/driver_path",
745 pcidev->domain,
746 pcidev->bus,
747 pcidev->dev,
748 pcidev->func));
749 }
750
pci_assignable_driver_path_remove(libxl__gc * gc,libxl_device_pci * pcidev)751 static void pci_assignable_driver_path_remove(libxl__gc *gc,
752 libxl_device_pci *pcidev)
753 {
754 libxl_ctx *ctx = libxl__gc_owner(gc);
755
756 /* Remove the xenstore entry */
757 xs_rm(ctx->xsh, XBT_NULL,
758 GCSPRINTF(PCIBACK_INFO_PATH "/" PCI_BDF_XSPATH,
759 pcidev->domain,
760 pcidev->bus,
761 pcidev->dev,
762 pcidev->func) );
763 }
764
libxl__device_pci_assignable_add(libxl__gc * gc,libxl_device_pci * pcidev,int rebind)765 static int libxl__device_pci_assignable_add(libxl__gc *gc,
766 libxl_device_pci *pcidev,
767 int rebind)
768 {
769 libxl_ctx *ctx = libxl__gc_owner(gc);
770 unsigned dom, bus, dev, func;
771 char *spath, *driver_path = NULL;
772 int rc;
773 struct stat st;
774
775 /* Local copy for convenience */
776 dom = pcidev->domain;
777 bus = pcidev->bus;
778 dev = pcidev->dev;
779 func = pcidev->func;
780
781 /* See if the device exists */
782 spath = GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF, dom, bus, dev, func);
783 if ( lstat(spath, &st) ) {
784 LOGE(ERROR, "Couldn't lstat %s", spath);
785 return ERROR_FAIL;
786 }
787
788 /* Check to see if it's already assigned to pciback */
789 rc = pciback_dev_is_assigned(gc, pcidev);
790 if ( rc < 0 ) {
791 return ERROR_FAIL;
792 }
793 if ( rc ) {
794 LOG(WARN, PCI_BDF" already assigned to pciback", dom, bus, dev, func);
795 goto quarantine;
796 }
797
798 /* Check to see if there's already a driver that we need to unbind from */
799 if ( sysfs_dev_unbind(gc, pcidev, &driver_path ) ) {
800 LOG(ERROR, "Couldn't unbind "PCI_BDF" from driver",
801 dom, bus, dev, func);
802 return ERROR_FAIL;
803 }
804
805 /* Store driver_path for rebinding to dom0 */
806 if ( rebind ) {
807 if ( driver_path ) {
808 pci_assignable_driver_path_write(gc, pcidev, driver_path);
809 } else if ( (driver_path =
810 pci_assignable_driver_path_read(gc, pcidev)) != NULL ) {
811 LOG(INFO, PCI_BDF" not bound to a driver, will be rebound to %s",
812 dom, bus, dev, func, driver_path);
813 } else {
814 LOG(WARN, PCI_BDF" not bound to a driver, will not be rebound.",
815 dom, bus, dev, func);
816 }
817 } else {
818 pci_assignable_driver_path_remove(gc, pcidev);
819 }
820
821 if ( pciback_dev_assign(gc, pcidev) ) {
822 LOG(ERROR, "Couldn't bind device to pciback!");
823 return ERROR_FAIL;
824 }
825
826 quarantine:
827 /*
828 * DOMID_IO is just a sentinel domain, without any actual mappings,
829 * so always pass XEN_DOMCTL_DEV_RDM_RELAXED to avoid assignment being
830 * unnecessarily denied.
831 */
832 rc = xc_assign_device(ctx->xch, DOMID_IO, pcidev_encode_bdf(pcidev),
833 XEN_DOMCTL_DEV_RDM_RELAXED);
834 if ( rc < 0 ) {
835 LOG(ERROR, "failed to quarantine "PCI_BDF, dom, bus, dev, func);
836 return ERROR_FAIL;
837 }
838
839 return 0;
840 }
841
libxl__device_pci_assignable_remove(libxl__gc * gc,libxl_device_pci * pcidev,int rebind)842 static int libxl__device_pci_assignable_remove(libxl__gc *gc,
843 libxl_device_pci *pcidev,
844 int rebind)
845 {
846 libxl_ctx *ctx = libxl__gc_owner(gc);
847 int rc;
848 char *driver_path;
849
850 /* De-quarantine */
851 rc = xc_deassign_device(ctx->xch, DOMID_IO, pcidev_encode_bdf(pcidev));
852 if ( rc < 0 ) {
853 LOG(ERROR, "failed to de-quarantine "PCI_BDF, pcidev->domain, pcidev->bus,
854 pcidev->dev, pcidev->func);
855 return ERROR_FAIL;
856 }
857
858 /* Unbind from pciback */
859 if ( (rc=pciback_dev_is_assigned(gc, pcidev)) < 0 ) {
860 return ERROR_FAIL;
861 } else if ( rc ) {
862 pciback_dev_unassign(gc, pcidev);
863 } else {
864 LOG(WARN, "Not bound to pciback");
865 }
866
867 /* Rebind if necessary */
868 driver_path = pci_assignable_driver_path_read(gc, pcidev);
869
870 if ( driver_path ) {
871 if ( rebind ) {
872 LOG(INFO, "Rebinding to driver at %s", driver_path);
873
874 if ( sysfs_write_bdf(gc,
875 GCSPRINTF("%s/bind", driver_path),
876 pcidev) < 0 ) {
877 LOGE(ERROR, "Couldn't bind device to %s", driver_path);
878 return -1;
879 }
880
881 pci_assignable_driver_path_remove(gc, pcidev);
882 }
883 } else {
884 if ( rebind ) {
885 LOG(WARN,
886 "Couldn't find path for original driver; not rebinding");
887 }
888 }
889
890 return 0;
891 }
892
libxl_device_pci_assignable_add(libxl_ctx * ctx,libxl_device_pci * pcidev,int rebind)893 int libxl_device_pci_assignable_add(libxl_ctx *ctx, libxl_device_pci *pcidev,
894 int rebind)
895 {
896 GC_INIT(ctx);
897 int rc;
898
899 rc = libxl__device_pci_assignable_add(gc, pcidev, rebind);
900
901 GC_FREE;
902 return rc;
903 }
904
905
libxl_device_pci_assignable_remove(libxl_ctx * ctx,libxl_device_pci * pcidev,int rebind)906 int libxl_device_pci_assignable_remove(libxl_ctx *ctx, libxl_device_pci *pcidev,
907 int rebind)
908 {
909 GC_INIT(ctx);
910 int rc;
911
912 rc = libxl__device_pci_assignable_remove(gc, pcidev, rebind);
913
914 GC_FREE;
915 return rc;
916 }
917
918 /*
919 * This function checks that all functions of a device are bound to pciback
920 * driver. It also initialises a bit-mask of which function numbers are present
921 * on that device.
922 */
pci_multifunction_check(libxl__gc * gc,libxl_device_pci * pcidev,unsigned int * func_mask)923 static int pci_multifunction_check(libxl__gc *gc, libxl_device_pci *pcidev, unsigned int *func_mask)
924 {
925 struct dirent *de;
926 DIR *dir;
927
928 *func_mask = 0;
929
930 dir = opendir(SYSFS_PCI_DEV);
931 if ( NULL == dir ) {
932 LOGE(ERROR, "Couldn't open %s", SYSFS_PCI_DEV);
933 return -1;
934 }
935
936 while( (de = readdir(dir)) ) {
937 unsigned dom, bus, dev, func;
938 struct stat st;
939 char *path;
940
941 if ( sscanf(de->d_name, PCI_BDF, &dom, &bus, &dev, &func) != 4 )
942 continue;
943 if ( pcidev->domain != dom )
944 continue;
945 if ( pcidev->bus != bus )
946 continue;
947 if ( pcidev->dev != dev )
948 continue;
949
950 path = GCSPRINTF("%s/" PCI_BDF, SYSFS_PCIBACK_DRIVER, dom, bus, dev, func);
951 if ( lstat(path, &st) ) {
952 if ( errno == ENOENT )
953 LOG(ERROR, PCI_BDF " is not assigned to pciback driver",
954 dom, bus, dev, func);
955 else
956 LOGE(ERROR, "Couldn't lstat %s", path);
957 closedir(dir);
958 return -1;
959 }
960 (*func_mask) |= (1 << func);
961 }
962
963 closedir(dir);
964 return 0;
965 }
966
pci_ins_check(libxl__gc * gc,uint32_t domid,const char * state,void * priv)967 static int pci_ins_check(libxl__gc *gc, uint32_t domid, const char *state, void *priv)
968 {
969 char *orig_state = priv;
970
971 if ( !strcmp(state, "pci-insert-failed") )
972 return -1;
973 if ( !strcmp(state, "pci-inserted") )
974 return 0;
975 if ( !strcmp(state, orig_state) )
976 return 1;
977
978 return 1;
979 }
980
qemu_pci_add_xenstore(libxl__gc * gc,uint32_t domid,libxl_device_pci * pcidev)981 static int qemu_pci_add_xenstore(libxl__gc *gc, uint32_t domid,
982 libxl_device_pci *pcidev)
983 {
984 libxl_ctx *ctx = libxl__gc_owner(gc);
985 int rc = 0;
986 char *path;
987 char *state, *vdevfn;
988 uint32_t dm_domid;
989
990 dm_domid = libxl_get_stubdom_id(CTX, domid);
991 path = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "/state");
992 state = libxl__xs_read(gc, XBT_NULL, path);
993 path = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "/parameter");
994 if (pcidev->vdevfn) {
995 libxl__xs_printf(gc, XBT_NULL, path, PCI_BDF_VDEVFN","PCI_OPTIONS,
996 pcidev->domain, pcidev->bus, pcidev->dev,
997 pcidev->func, pcidev->vdevfn, pcidev->msitranslate,
998 pcidev->power_mgmt);
999 } else {
1000 libxl__xs_printf(gc, XBT_NULL, path, PCI_BDF","PCI_OPTIONS,
1001 pcidev->domain, pcidev->bus, pcidev->dev,
1002 pcidev->func, pcidev->msitranslate, pcidev->power_mgmt);
1003 }
1004
1005 libxl__qemu_traditional_cmd(gc, domid, "pci-ins");
1006 rc = libxl__wait_for_device_model_deprecated(gc, domid, NULL, NULL,
1007 pci_ins_check, state);
1008 path = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "/parameter");
1009 vdevfn = libxl__xs_read(gc, XBT_NULL, path);
1010 path = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "/state");
1011 if ( rc < 0 )
1012 LOGD(ERROR, domid, "qemu refused to add device: %s", vdevfn);
1013 else if ( sscanf(vdevfn, "0x%x", &pcidev->vdevfn) != 1 ) {
1014 LOGD(ERROR, domid, "wrong format for the vdevfn: '%s'", vdevfn);
1015 rc = -1;
1016 }
1017 xs_write(ctx->xsh, XBT_NULL, path, state, strlen(state));
1018
1019 return rc;
1020 }
1021
check_qemu_running(libxl__gc * gc,libxl_domid domid,libxl__xswait_state * xswa,int rc,const char * state)1022 static int check_qemu_running(libxl__gc *gc,
1023 libxl_domid domid,
1024 libxl__xswait_state *xswa,
1025 int rc,
1026 const char *state)
1027 {
1028 if (rc) {
1029 if (rc == ERROR_TIMEDOUT) {
1030 LOGD(ERROR, domid, "%s not ready", xswa->what);
1031 }
1032 goto out;
1033 }
1034
1035 if (!state || strcmp(state, "running"))
1036 return ERROR_NOT_READY;
1037
1038 out:
1039 libxl__xswait_stop(gc, xswa);
1040 return rc;
1041 }
1042
1043 typedef struct pci_add_state {
1044 /* filled by user of do_pci_add */
1045 libxl__ao_device *aodev;
1046 libxl_domid domid;
1047 bool starting;
1048 void (*callback)(libxl__egc *, struct pci_add_state *, int rc);
1049
1050 /* private to device_pci_add_stubdom_wait */
1051 libxl__ev_devstate pciback_ds;
1052
1053 /* private to do_pci_add */
1054 libxl__xswait_state xswait;
1055 libxl__ev_qmp qmp;
1056 libxl__ev_time timeout;
1057 libxl_device_pci *pcidev;
1058 int pci_domid;
1059 } pci_add_state;
1060
1061 static void pci_add_qemu_trad_watch_state_cb(libxl__egc *egc,
1062 libxl__xswait_state *xswa, int rc, const char *state);
1063 static void pci_add_qmp_device_add(libxl__egc *, pci_add_state *);
1064 static void pci_add_qmp_device_add_cb(libxl__egc *,
1065 libxl__ev_qmp *, const libxl__json_object *, int rc);
1066 static void pci_add_qmp_query_pci_cb(libxl__egc *,
1067 libxl__ev_qmp *, const libxl__json_object *, int rc);
1068 static void pci_add_timeout(libxl__egc *egc, libxl__ev_time *ev,
1069 const struct timeval *requested_abs, int rc);
1070 static void pci_add_dm_done(libxl__egc *,
1071 pci_add_state *, int rc);
1072
do_pci_add(libxl__egc * egc,libxl_domid domid,libxl_device_pci * pcidev,pci_add_state * pas)1073 static void do_pci_add(libxl__egc *egc,
1074 libxl_domid domid,
1075 libxl_device_pci *pcidev,
1076 pci_add_state *pas)
1077 {
1078 STATE_AO_GC(pas->aodev->ao);
1079 libxl_domain_type type = libxl__domain_type(gc, domid);
1080 int rc;
1081
1082 /* init pci_add_state */
1083 libxl__xswait_init(&pas->xswait);
1084 libxl__ev_qmp_init(&pas->qmp);
1085 pas->pcidev = pcidev;
1086 pas->pci_domid = domid;
1087 libxl__ev_time_init(&pas->timeout);
1088
1089 if (type == LIBXL_DOMAIN_TYPE_INVALID) {
1090 rc = ERROR_FAIL;
1091 goto out;
1092 }
1093
1094 if (type == LIBXL_DOMAIN_TYPE_HVM) {
1095 switch (libxl__device_model_version_running(gc, domid)) {
1096 case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL:
1097 pas->xswait.ao = ao;
1098 pas->xswait.what = "Device Model";
1099 pas->xswait.path = DEVICE_MODEL_XS_PATH(gc,
1100 libxl_get_stubdom_id(CTX, domid), domid, "/state");
1101 pas->xswait.timeout_ms = LIBXL_DEVICE_MODEL_START_TIMEOUT * 1000;
1102 pas->xswait.callback = pci_add_qemu_trad_watch_state_cb;
1103 rc = libxl__xswait_start(gc, &pas->xswait);
1104 if (rc) goto out;
1105 return;
1106 case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN:
1107 pci_add_qmp_device_add(egc, pas); /* must be last */
1108 return;
1109 default:
1110 rc = ERROR_INVAL;
1111 break;
1112 }
1113 }
1114
1115 rc = 0;
1116
1117 out:
1118 pci_add_dm_done(egc, pas, rc); /* must be last */
1119 }
1120
pci_add_qemu_trad_watch_state_cb(libxl__egc * egc,libxl__xswait_state * xswa,int rc,const char * state)1121 static void pci_add_qemu_trad_watch_state_cb(libxl__egc *egc,
1122 libxl__xswait_state *xswa,
1123 int rc,
1124 const char *state)
1125 {
1126 pci_add_state *pas = CONTAINER_OF(xswa, *pas, xswait);
1127 STATE_AO_GC(pas->aodev->ao);
1128
1129 /* Convenience aliases */
1130 libxl_domid domid = pas->domid;
1131 libxl_device_pci *pcidev = pas->pcidev;
1132
1133 rc = check_qemu_running(gc, domid, xswa, rc, state);
1134 if (rc == ERROR_NOT_READY)
1135 return;
1136 if (rc)
1137 goto out;
1138
1139 rc = qemu_pci_add_xenstore(gc, domid, pcidev);
1140 out:
1141 pci_add_dm_done(egc, pas, rc); /* must be last */
1142 }
1143
pci_add_qmp_device_add(libxl__egc * egc,pci_add_state * pas)1144 static void pci_add_qmp_device_add(libxl__egc *egc, pci_add_state *pas)
1145 {
1146 STATE_AO_GC(pas->aodev->ao);
1147 libxl__json_object *args = NULL;
1148 int rc;
1149
1150 /* Convenience aliases */
1151 libxl_domid domid = pas->domid;
1152 libxl_device_pci *pcidev = pas->pcidev;
1153 libxl__ev_qmp *const qmp = &pas->qmp;
1154
1155 rc = libxl__ev_time_register_rel(ao, &pas->timeout,
1156 pci_add_timeout,
1157 LIBXL_QMP_CMD_TIMEOUT * 1000);
1158 if (rc) goto out;
1159
1160 libxl__qmp_param_add_string(gc, &args, "driver",
1161 "xen-pci-passthrough");
1162 QMP_PARAMETERS_SPRINTF(&args, "id", PCI_PT_QDEV_ID,
1163 pcidev->bus, pcidev->dev, pcidev->func);
1164 QMP_PARAMETERS_SPRINTF(&args, "hostaddr",
1165 "%04x:%02x:%02x.%01x", pcidev->domain,
1166 pcidev->bus, pcidev->dev, pcidev->func);
1167 if (pcidev->vdevfn) {
1168 QMP_PARAMETERS_SPRINTF(&args, "addr", "%x.%x",
1169 PCI_SLOT(pcidev->vdevfn),
1170 PCI_FUNC(pcidev->vdevfn));
1171 }
1172 /*
1173 * Version of QEMU prior to the XSA-131 fix did not support
1174 * this property and were effectively always in permissive
1175 * mode. The fix for XSA-131 switched the default to be
1176 * restricted by default and added the permissive property.
1177 *
1178 * Therefore in order to support both old and new QEMU we only
1179 * set the permissive flag if it is true. Users of older QEMU
1180 * have no reason to set the flag so this is ok.
1181 */
1182 if (pcidev->permissive)
1183 libxl__qmp_param_add_bool(gc, &args, "permissive", true);
1184
1185 qmp->ao = pas->aodev->ao;
1186 qmp->domid = domid;
1187 qmp->payload_fd = -1;
1188 qmp->callback = pci_add_qmp_device_add_cb;
1189 rc = libxl__ev_qmp_send(egc, qmp, "device_add", args);
1190 if (rc) goto out;
1191 return;
1192
1193 out:
1194 pci_add_dm_done(egc, pas, rc); /* must be last */
1195 }
1196
pci_add_qmp_device_add_cb(libxl__egc * egc,libxl__ev_qmp * qmp,const libxl__json_object * response,int rc)1197 static void pci_add_qmp_device_add_cb(libxl__egc *egc,
1198 libxl__ev_qmp *qmp,
1199 const libxl__json_object *response,
1200 int rc)
1201 {
1202 EGC_GC;
1203 pci_add_state *pas = CONTAINER_OF(qmp, *pas, qmp);
1204
1205 if (rc) goto out;
1206
1207 qmp->callback = pci_add_qmp_query_pci_cb;
1208 rc = libxl__ev_qmp_send(egc, qmp, "query-pci", NULL);
1209 if (rc) goto out;
1210 return;
1211
1212 out:
1213 pci_add_dm_done(egc, pas, rc); /* must be last */
1214 }
1215
pci_add_qmp_query_pci_cb(libxl__egc * egc,libxl__ev_qmp * qmp,const libxl__json_object * response,int rc)1216 static void pci_add_qmp_query_pci_cb(libxl__egc *egc,
1217 libxl__ev_qmp *qmp,
1218 const libxl__json_object *response,
1219 int rc)
1220 {
1221 EGC_GC;
1222 pci_add_state *pas = CONTAINER_OF(qmp, *pas, qmp);
1223 const libxl__json_object *bus = NULL;
1224 char *asked_id;
1225 int i, j;
1226 const libxl__json_object *devices = NULL;
1227 const libxl__json_object *device = NULL;
1228 const libxl__json_object *o = NULL;
1229 const char *id = NULL;
1230 int dev_slot, dev_func;
1231
1232 /* Convenience aliases */
1233 libxl_device_pci *pcidev = pas->pcidev;
1234
1235 if (rc) goto out;
1236
1237 /* `query-pci' returns:
1238 * [
1239 * {'bus': 'int',
1240 * 'devices': [
1241 * {'bus': 'int', 'slot': 'int', 'function': 'int',
1242 * 'class_info': 'PciDeviceClass', 'id': 'PciDeviceId',
1243 * '*irq': 'int', 'qdev_id': 'str',
1244 * '*pci_bridge': 'PciBridgeInfo',
1245 * 'regions': ['PciMemoryRegion']
1246 * }
1247 * ]
1248 * }
1249 * ]
1250 * (See qemu.git/qapi/ for the struct that aren't detailed here)
1251 */
1252
1253 asked_id = GCSPRINTF(PCI_PT_QDEV_ID,
1254 pcidev->bus, pcidev->dev, pcidev->func);
1255
1256 for (i = 0; (bus = libxl__json_array_get(response, i)); i++) {
1257 devices = libxl__json_map_get("devices", bus, JSON_ARRAY);
1258 if (!devices) {
1259 rc = ERROR_QEMU_API;
1260 goto out;
1261 }
1262
1263 for (j = 0; (device = libxl__json_array_get(devices, j)); j++) {
1264 o = libxl__json_map_get("qdev_id", device, JSON_STRING);
1265 if (!o) {
1266 rc = ERROR_QEMU_API;
1267 goto out;
1268 }
1269 id = libxl__json_object_get_string(o);
1270 if (!id || strcmp(asked_id, id))
1271 continue;
1272
1273 o = libxl__json_map_get("slot", device, JSON_INTEGER);
1274 if (!o) {
1275 rc = ERROR_QEMU_API;
1276 goto out;
1277 }
1278 dev_slot = libxl__json_object_get_integer(o);
1279 o = libxl__json_map_get("function", device, JSON_INTEGER);
1280 if (!o) {
1281 rc = ERROR_QEMU_API;
1282 goto out;
1283 }
1284 dev_func = libxl__json_object_get_integer(o);
1285
1286 pcidev->vdevfn = PCI_DEVFN(dev_slot, dev_func);
1287
1288 rc = 0;
1289 goto out;
1290 }
1291 }
1292
1293 rc = ERROR_FAIL;
1294 LOGD(ERROR, qmp->domid,
1295 "PCI device id '%s' wasn't found in QEMU's 'query-pci' response.",
1296 asked_id);
1297
1298 out:
1299 if (rc == ERROR_QEMU_API) {
1300 LOGD(ERROR, qmp->domid,
1301 "Unexpected response to QMP cmd 'query-pci', received:\n%s",
1302 JSON(response));
1303 }
1304 pci_add_dm_done(egc, pas, rc); /* must be last */
1305 }
1306
pci_add_timeout(libxl__egc * egc,libxl__ev_time * ev,const struct timeval * requested_abs,int rc)1307 static void pci_add_timeout(libxl__egc *egc, libxl__ev_time *ev,
1308 const struct timeval *requested_abs,
1309 int rc)
1310 {
1311 pci_add_state *pas = CONTAINER_OF(ev, *pas, timeout);
1312
1313 pci_add_dm_done(egc, pas, rc);
1314 }
1315
pci_add_dm_done(libxl__egc * egc,pci_add_state * pas,int rc)1316 static void pci_add_dm_done(libxl__egc *egc,
1317 pci_add_state *pas,
1318 int rc)
1319 {
1320 STATE_AO_GC(pas->aodev->ao);
1321 libxl_ctx *ctx = libxl__gc_owner(gc);
1322 libxl_domid domid = pas->pci_domid;
1323 char *sysfs_path;
1324 FILE *f;
1325 unsigned long long start, end, flags, size;
1326 int irq, i;
1327 int r;
1328 uint32_t flag = XEN_DOMCTL_DEV_RDM_RELAXED;
1329 uint32_t domainid = domid;
1330 bool isstubdom = libxl_is_stubdom(ctx, domid, &domainid);
1331
1332 /* Convenience aliases */
1333 bool starting = pas->starting;
1334 libxl_device_pci *pcidev = pas->pcidev;
1335 bool hvm = libxl__domain_type(gc, domid) == LIBXL_DOMAIN_TYPE_HVM;
1336
1337 libxl__ev_qmp_dispose(gc, &pas->qmp);
1338
1339 if (rc) goto out;
1340
1341 /* stubdomain is always running by now, even at create time */
1342 if (isstubdom)
1343 starting = false;
1344
1345 sysfs_path = GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF"/resource", pcidev->domain,
1346 pcidev->bus, pcidev->dev, pcidev->func);
1347 f = fopen(sysfs_path, "r");
1348 start = end = flags = size = 0;
1349 irq = 0;
1350
1351 if (f == NULL) {
1352 LOGED(ERROR, domainid, "Couldn't open %s", sysfs_path);
1353 rc = ERROR_FAIL;
1354 goto out;
1355 }
1356 for (i = 0; i < PROC_PCI_NUM_RESOURCES; i++) {
1357 if (fscanf(f, "0x%llx 0x%llx 0x%llx\n", &start, &end, &flags) != 3)
1358 continue;
1359 size = end - start + 1;
1360 if (start) {
1361 if (flags & PCI_BAR_IO) {
1362 r = xc_domain_ioport_permission(ctx->xch, domid, start, size, 1);
1363 if (r < 0) {
1364 LOGED(ERROR, domainid,
1365 "xc_domain_ioport_permission 0x%llx/0x%llx (error %d)",
1366 start, size, r);
1367 fclose(f);
1368 rc = ERROR_FAIL;
1369 goto out;
1370 }
1371 } else {
1372 r = xc_domain_iomem_permission(ctx->xch, domid, start>>XC_PAGE_SHIFT,
1373 (size+(XC_PAGE_SIZE-1))>>XC_PAGE_SHIFT, 1);
1374 if (r < 0) {
1375 LOGED(ERROR, domainid,
1376 "xc_domain_iomem_permission 0x%llx/0x%llx (error %d)",
1377 start, size, r);
1378 fclose(f);
1379 rc = ERROR_FAIL;
1380 goto out;
1381 }
1382 }
1383 }
1384 }
1385 fclose(f);
1386 sysfs_path = GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF"/irq", pcidev->domain,
1387 pcidev->bus, pcidev->dev, pcidev->func);
1388 f = fopen(sysfs_path, "r");
1389 if (f == NULL) {
1390 LOGED(ERROR, domainid, "Couldn't open %s", sysfs_path);
1391 goto out_no_irq;
1392 }
1393 if ((fscanf(f, "%u", &irq) == 1) && irq) {
1394 r = xc_physdev_map_pirq(ctx->xch, domid, irq, &irq);
1395 if (r < 0) {
1396 LOGED(ERROR, domainid, "xc_physdev_map_pirq irq=%d (error=%d)",
1397 irq, r);
1398 fclose(f);
1399 rc = ERROR_FAIL;
1400 goto out;
1401 }
1402 r = xc_domain_irq_permission(ctx->xch, domid, irq, 1);
1403 if (r < 0) {
1404 LOGED(ERROR, domainid,
1405 "xc_domain_irq_permission irq=%d (error=%d)", irq, r);
1406 fclose(f);
1407 rc = ERROR_FAIL;
1408 goto out;
1409 }
1410 }
1411 fclose(f);
1412
1413 /* Don't restrict writes to the PCI config space from this VM */
1414 if (pcidev->permissive) {
1415 if ( sysfs_write_bdf(gc, SYSFS_PCIBACK_DRIVER"/permissive",
1416 pcidev) < 0 ) {
1417 LOGD(ERROR, domainid, "Setting permissive for device");
1418 rc = ERROR_FAIL;
1419 goto out;
1420 }
1421 }
1422
1423 out_no_irq:
1424 if (!isstubdom) {
1425 if (pcidev->rdm_policy == LIBXL_RDM_RESERVE_POLICY_STRICT) {
1426 flag &= ~XEN_DOMCTL_DEV_RDM_RELAXED;
1427 } else if (pcidev->rdm_policy != LIBXL_RDM_RESERVE_POLICY_RELAXED) {
1428 LOGED(ERROR, domainid, "unknown rdm check flag.");
1429 rc = ERROR_FAIL;
1430 goto out;
1431 }
1432 r = xc_assign_device(ctx->xch, domid, pcidev_encode_bdf(pcidev), flag);
1433 if (r < 0 && (hvm || errno != ENOSYS)) {
1434 LOGED(ERROR, domainid, "xc_assign_device failed");
1435 rc = ERROR_FAIL;
1436 goto out;
1437 }
1438 }
1439
1440 if (!starting && !libxl_get_stubdom_id(CTX, domid))
1441 rc = libxl__device_pci_add_xenstore(gc, domid, pcidev, starting);
1442 else
1443 rc = 0;
1444 out:
1445 libxl__ev_time_deregister(gc, &pas->timeout);
1446 pas->callback(egc, pas, rc);
1447 }
1448
libxl__device_pci_reset(libxl__gc * gc,unsigned int domain,unsigned int bus,unsigned int dev,unsigned int func)1449 static int libxl__device_pci_reset(libxl__gc *gc, unsigned int domain, unsigned int bus,
1450 unsigned int dev, unsigned int func)
1451 {
1452 char *reset;
1453 int fd, rc;
1454
1455 reset = GCSPRINTF("%s/do_flr", SYSFS_PCIBACK_DRIVER);
1456 fd = open(reset, O_WRONLY);
1457 if (fd >= 0) {
1458 char *buf = GCSPRINTF(PCI_BDF, domain, bus, dev, func);
1459 rc = write(fd, buf, strlen(buf));
1460 if (rc < 0)
1461 LOGD(ERROR, domain, "write to %s returned %d", reset, rc);
1462 close(fd);
1463 return rc < 0 ? rc : 0;
1464 }
1465 if (errno != ENOENT)
1466 LOGED(ERROR, domain, "Failed to access pciback path %s", reset);
1467 reset = GCSPRINTF("%s/"PCI_BDF"/reset", SYSFS_PCI_DEV, domain, bus, dev, func);
1468 fd = open(reset, O_WRONLY);
1469 if (fd >= 0) {
1470 rc = write(fd, "1", 1);
1471 if (rc < 0)
1472 LOGED(ERROR, domain, "write to %s returned %d", reset, rc);
1473 close(fd);
1474 return rc < 0 ? rc : 0;
1475 }
1476 if (errno == ENOENT) {
1477 LOGD(ERROR, domain,
1478 "The kernel doesn't support reset from sysfs for PCI device "PCI_BDF,
1479 domain, bus, dev, func);
1480 } else {
1481 LOGED(ERROR, domain, "Failed to access reset path %s", reset);
1482 }
1483 return -1;
1484 }
1485
libxl__device_pci_setdefault(libxl__gc * gc,uint32_t domid,libxl_device_pci * pci,bool hotplug)1486 int libxl__device_pci_setdefault(libxl__gc *gc, uint32_t domid,
1487 libxl_device_pci *pci, bool hotplug)
1488 {
1489 /* We'd like to force reserve rdm specific to a device by default.*/
1490 if (pci->rdm_policy == LIBXL_RDM_RESERVE_POLICY_INVALID)
1491 pci->rdm_policy = LIBXL_RDM_RESERVE_POLICY_STRICT;
1492 return 0;
1493 }
1494
libxl_device_pci_add(libxl_ctx * ctx,uint32_t domid,libxl_device_pci * pcidev,const libxl_asyncop_how * ao_how)1495 int libxl_device_pci_add(libxl_ctx *ctx, uint32_t domid,
1496 libxl_device_pci *pcidev,
1497 const libxl_asyncop_how *ao_how)
1498 {
1499 AO_CREATE(ctx, domid, ao_how);
1500 libxl__ao_device *aodev;
1501
1502 GCNEW(aodev);
1503 libxl__prepare_ao_device(ao, aodev);
1504 aodev->action = LIBXL__DEVICE_ACTION_ADD;
1505 aodev->callback = device_addrm_aocomplete;
1506 aodev->update_json = true;
1507 libxl__device_pci_add(egc, domid, pcidev, false, aodev);
1508 return AO_INPROGRESS;
1509 }
1510
libxl_pcidev_assignable(libxl_ctx * ctx,libxl_device_pci * pcidev)1511 static int libxl_pcidev_assignable(libxl_ctx *ctx, libxl_device_pci *pcidev)
1512 {
1513 libxl_device_pci *pcidevs;
1514 int num, i;
1515
1516 pcidevs = libxl_device_pci_assignable_list(ctx, &num);
1517 for (i = 0; i < num; i++) {
1518 if (pcidevs[i].domain == pcidev->domain &&
1519 pcidevs[i].bus == pcidev->bus &&
1520 pcidevs[i].dev == pcidev->dev &&
1521 pcidevs[i].func == pcidev->func)
1522 break;
1523 }
1524 free(pcidevs);
1525 return i != num;
1526 }
1527
1528 static void device_pci_add_stubdom_wait(libxl__egc *egc,
1529 pci_add_state *pas, int rc);
1530 static void device_pci_add_stubdom_ready(libxl__egc *egc,
1531 libxl__ev_devstate *ds, int rc);
1532 static void device_pci_add_stubdom_done(libxl__egc *egc,
1533 pci_add_state *, int rc);
1534 static void device_pci_add_done(libxl__egc *egc,
1535 pci_add_state *, int rc);
1536
libxl__device_pci_add(libxl__egc * egc,uint32_t domid,libxl_device_pci * pcidev,bool starting,libxl__ao_device * aodev)1537 void libxl__device_pci_add(libxl__egc *egc, uint32_t domid,
1538 libxl_device_pci *pcidev, bool starting,
1539 libxl__ao_device *aodev)
1540 {
1541 STATE_AO_GC(aodev->ao);
1542 libxl_ctx *ctx = libxl__gc_owner(gc);
1543 libxl_device_pci *assigned;
1544 int num_assigned, rc;
1545 int stubdomid = 0;
1546 pci_add_state *pas;
1547
1548 /* Store *pcidev to be used by callbacks */
1549 aodev->device_config = pcidev;
1550 aodev->device_type = &libxl__pcidev_devtype;
1551
1552 GCNEW(pas);
1553 pas->aodev = aodev;
1554 pas->domid = domid;
1555 pas->starting = starting;
1556 pas->callback = device_pci_add_stubdom_done;
1557
1558 if (libxl__domain_type(gc, domid) == LIBXL_DOMAIN_TYPE_HVM) {
1559 rc = xc_test_assign_device(ctx->xch, domid, pcidev_encode_bdf(pcidev));
1560 if (rc) {
1561 LOGD(ERROR, domid,
1562 "PCI device %04x:%02x:%02x.%u %s?",
1563 pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func,
1564 errno == EOPNOTSUPP ? "cannot be assigned - no IOMMU"
1565 : "already assigned to a different guest");
1566 goto out;
1567 }
1568 }
1569
1570 rc = libxl__device_pci_setdefault(gc, domid, pcidev, !starting);
1571 if (rc) goto out;
1572
1573 if (pcidev->seize && !pciback_dev_is_assigned(gc, pcidev)) {
1574 rc = libxl__device_pci_assignable_add(gc, pcidev, 1);
1575 if ( rc )
1576 goto out;
1577 }
1578
1579 if (!libxl_pcidev_assignable(ctx, pcidev)) {
1580 LOGD(ERROR, domid, "PCI device %x:%x:%x.%x is not assignable",
1581 pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func);
1582 rc = ERROR_FAIL;
1583 goto out;
1584 }
1585
1586 rc = get_all_assigned_devices(gc, &assigned, &num_assigned);
1587 if ( rc ) {
1588 LOGD(ERROR, domid,
1589 "cannot determine if device is assigned, refusing to continue");
1590 goto out;
1591 }
1592 if ( is_pcidev_in_array(assigned, num_assigned, pcidev->domain,
1593 pcidev->bus, pcidev->dev, pcidev->func) ) {
1594 LOGD(ERROR, domid, "PCI device already attached to a domain");
1595 rc = ERROR_FAIL;
1596 goto out;
1597 }
1598
1599 libxl__device_pci_reset(gc, pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func);
1600
1601 stubdomid = libxl_get_stubdom_id(ctx, domid);
1602 if (stubdomid != 0) {
1603 libxl_device_pci *pcidev_s;
1604
1605 GCNEW(pcidev_s);
1606 libxl_device_pci_init(pcidev_s);
1607 libxl_device_pci_copy(CTX, pcidev_s, pcidev);
1608 pas->callback = device_pci_add_stubdom_wait;
1609
1610 do_pci_add(egc, stubdomid, pcidev_s, pas); /* must be last */
1611 return;
1612 }
1613
1614 device_pci_add_stubdom_done(egc, pas, 0); /* must be last */
1615 return;
1616
1617 out:
1618 device_pci_add_done(egc, pas, rc); /* must be last */
1619 }
1620
device_pci_add_stubdom_wait(libxl__egc * egc,pci_add_state * pas,int rc)1621 static void device_pci_add_stubdom_wait(libxl__egc *egc,
1622 pci_add_state *pas,
1623 int rc)
1624 {
1625 libxl__ao_device *aodev = pas->aodev;
1626 STATE_AO_GC(aodev->ao);
1627 int stubdomid = libxl_get_stubdom_id(CTX, pas->domid);
1628 char *state_path;
1629
1630 if (rc) goto out;
1631
1632 /* Wait for the device actually being connected, otherwise device model
1633 * running there will fail to find the device. */
1634 state_path = GCSPRINTF("%s/state",
1635 libxl__domain_device_backend_path(gc, 0, stubdomid, 0,
1636 LIBXL__DEVICE_KIND_PCI));
1637 rc = libxl__ev_devstate_wait(ao, &pas->pciback_ds,
1638 device_pci_add_stubdom_ready,
1639 state_path, XenbusStateConnected,
1640 LIBXL_DEVICE_MODEL_START_TIMEOUT * 1000);
1641 if (rc) goto out;
1642 return;
1643 out:
1644 device_pci_add_done(egc, pas, rc); /* must be last */
1645 }
1646
device_pci_add_stubdom_ready(libxl__egc * egc,libxl__ev_devstate * ds,int rc)1647 static void device_pci_add_stubdom_ready(libxl__egc *egc,
1648 libxl__ev_devstate *ds,
1649 int rc)
1650 {
1651 pci_add_state *pas = CONTAINER_OF(ds, *pas, pciback_ds);
1652
1653 device_pci_add_stubdom_done(egc, pas, rc); /* must be last */
1654 }
1655
device_pci_add_stubdom_done(libxl__egc * egc,pci_add_state * pas,int rc)1656 static void device_pci_add_stubdom_done(libxl__egc *egc,
1657 pci_add_state *pas,
1658 int rc)
1659 {
1660 STATE_AO_GC(pas->aodev->ao);
1661 unsigned int orig_vdev, pfunc_mask;
1662 int i;
1663
1664 /* Convenience aliases */
1665 libxl__ao_device *aodev = pas->aodev;
1666 libxl_domid domid = pas->domid;
1667 libxl_device_pci *pcidev = aodev->device_config;
1668
1669 if (rc) goto out;
1670
1671 orig_vdev = pcidev->vdevfn & ~7U;
1672
1673 if ( pcidev->vfunc_mask == LIBXL_PCI_FUNC_ALL ) {
1674 if ( !(pcidev->vdevfn >> 3) ) {
1675 LOGD(ERROR, domid, "Must specify a v-slot for multi-function devices");
1676 rc = ERROR_INVAL;
1677 goto out;
1678 }
1679 if ( pci_multifunction_check(gc, pcidev, &pfunc_mask) ) {
1680 rc = ERROR_FAIL;
1681 goto out;
1682 }
1683 pcidev->vfunc_mask &= pfunc_mask;
1684 /* so now vfunc_mask == pfunc_mask */
1685 }else{
1686 pfunc_mask = (1 << pcidev->func);
1687 }
1688
1689 for(rc = 0, i = 7; i >= 0; --i) {
1690 if ( (1 << i) & pfunc_mask ) {
1691 if ( pcidev->vfunc_mask == pfunc_mask ) {
1692 pcidev->func = i;
1693 pcidev->vdevfn = orig_vdev | i;
1694 }else{
1695 /* if not passing through multiple devices in a block make
1696 * sure that virtual function number 0 is always used otherwise
1697 * guest won't see the device
1698 */
1699 pcidev->vdevfn = orig_vdev;
1700 }
1701 pas->callback = device_pci_add_done;
1702 do_pci_add(egc, domid, pcidev, pas); /* must be last */
1703 return;
1704 }
1705 }
1706
1707 out:
1708 device_pci_add_done(egc, pas, rc);
1709 }
1710
device_pci_add_done(libxl__egc * egc,pci_add_state * pas,int rc)1711 static void device_pci_add_done(libxl__egc *egc,
1712 pci_add_state *pas,
1713 int rc)
1714 {
1715 EGC_GC;
1716 libxl__ao_device *aodev = pas->aodev;
1717 libxl_domid domid = pas->domid;
1718 libxl_device_pci *pcidev = aodev->device_config;
1719
1720 if (rc) {
1721 LOGD(ERROR, domid,
1722 "libxl__device_pci_add failed for "
1723 "PCI device %x:%x:%x.%x (rc %d)",
1724 pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func,
1725 rc);
1726 }
1727 aodev->rc = rc;
1728 aodev->callback(egc, aodev);
1729 }
1730
1731 typedef struct {
1732 libxl__multidev multidev;
1733 libxl__ao_device *outer_aodev;
1734 libxl_domain_config *d_config;
1735 libxl_domid domid;
1736 } add_pcidevs_state;
1737
1738 static void add_pcidevs_done(libxl__egc *, libxl__multidev *, int rc);
1739
libxl__add_pcidevs(libxl__egc * egc,libxl__ao * ao,uint32_t domid,libxl_domain_config * d_config,libxl__multidev * multidev)1740 static void libxl__add_pcidevs(libxl__egc *egc, libxl__ao *ao, uint32_t domid,
1741 libxl_domain_config *d_config,
1742 libxl__multidev *multidev)
1743 {
1744 AO_GC;
1745 add_pcidevs_state *apds;
1746 int i;
1747
1748 /* We need to start a new multidev in order to be able to execute
1749 * libxl__create_pci_backend only once. */
1750
1751 GCNEW(apds);
1752 apds->outer_aodev = libxl__multidev_prepare(multidev);
1753 apds->d_config = d_config;
1754 apds->domid = domid;
1755 apds->multidev.callback = add_pcidevs_done;
1756 libxl__multidev_begin(ao, &apds->multidev);
1757
1758 for (i = 0; i < d_config->num_pcidevs; i++) {
1759 libxl__ao_device *aodev = libxl__multidev_prepare(&apds->multidev);
1760 libxl__device_pci_add(egc, domid, &d_config->pcidevs[i],
1761 true, aodev);
1762 }
1763
1764 libxl__multidev_prepared(egc, &apds->multidev, 0);
1765 }
1766
add_pcidevs_done(libxl__egc * egc,libxl__multidev * multidev,int rc)1767 static void add_pcidevs_done(libxl__egc *egc, libxl__multidev *multidev,
1768 int rc)
1769 {
1770 EGC_GC;
1771 add_pcidevs_state *apds = CONTAINER_OF(multidev, *apds, multidev);
1772
1773 /* Convenience aliases */
1774 libxl_domain_config *d_config = apds->d_config;
1775 libxl_domid domid = apds->domid;
1776 libxl__ao_device *aodev = apds->outer_aodev;
1777
1778 if (rc) goto out;
1779
1780 if (d_config->num_pcidevs > 0 && !libxl_get_stubdom_id(CTX, domid)) {
1781 rc = libxl__create_pci_backend(gc, domid, d_config->pcidevs,
1782 d_config->num_pcidevs);
1783 if (rc < 0) {
1784 LOGD(ERROR, domid, "libxl_create_pci_backend failed: %d", rc);
1785 goto out;
1786 }
1787 }
1788
1789 out:
1790 aodev->rc = rc;
1791 aodev->callback(egc, aodev);
1792 }
1793
qemu_pci_remove_xenstore(libxl__gc * gc,uint32_t domid,libxl_device_pci * pcidev,int force)1794 static int qemu_pci_remove_xenstore(libxl__gc *gc, uint32_t domid,
1795 libxl_device_pci *pcidev, int force)
1796 {
1797 libxl_ctx *ctx = libxl__gc_owner(gc);
1798 char *state;
1799 char *path;
1800 uint32_t dm_domid;
1801
1802 dm_domid = libxl_get_stubdom_id(CTX, domid);
1803
1804 path = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "/state");
1805 state = libxl__xs_read(gc, XBT_NULL, path);
1806 path = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "/parameter");
1807 libxl__xs_printf(gc, XBT_NULL, path, PCI_BDF, pcidev->domain,
1808 pcidev->bus, pcidev->dev, pcidev->func);
1809
1810 /* Remove all functions at once atomically by only signalling
1811 * device-model for function 0 */
1812 if ( !force && (pcidev->vdevfn & 0x7) == 0 ) {
1813 libxl__qemu_traditional_cmd(gc, domid, "pci-rem");
1814 if (libxl__wait_for_device_model_deprecated(gc, domid, "pci-removed",
1815 NULL, NULL, NULL) < 0) {
1816 LOGD(ERROR, domid, "Device Model didn't respond in time");
1817 /* This depends on guest operating system acknowledging the
1818 * SCI, if it doesn't respond in time then we may wish to
1819 * force the removal.
1820 */
1821 return ERROR_FAIL;
1822 }
1823 }
1824 path = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "/state");
1825 xs_write(ctx->xsh, XBT_NULL, path, state, strlen(state));
1826
1827 return 0;
1828 }
1829
1830 typedef struct pci_remove_state {
1831 libxl__ao_device *aodev;
1832 libxl_domid domid;
1833 libxl_device_pci *pcidev;
1834 bool force;
1835 bool hvm;
1836 unsigned int orig_vdev;
1837 unsigned int pfunc_mask;
1838 int next_func;
1839 libxl__ao_device stubdom_aodev;
1840 libxl__xswait_state xswait;
1841 libxl__ev_qmp qmp;
1842 libxl__ev_time timeout;
1843 libxl__ev_time retry_timer;
1844 } pci_remove_state;
1845
1846 static void libxl__device_pci_remove_common(libxl__egc *egc,
1847 uint32_t domid, libxl_device_pci *pcidev, bool force,
1848 libxl__ao_device *aodev);
1849 static void device_pci_remove_common_next(libxl__egc *egc,
1850 pci_remove_state *prs, int rc);
1851
1852 static void pci_remove_qemu_trad_watch_state_cb(libxl__egc *egc,
1853 libxl__xswait_state *xswa, int rc, const char *state);
1854 static void pci_remove_qmp_device_del(libxl__egc *egc,
1855 pci_remove_state *prs);
1856 static void pci_remove_qmp_device_del_cb(libxl__egc *egc,
1857 libxl__ev_qmp *qmp, const libxl__json_object *response, int rc);
1858 static void pci_remove_qmp_retry_timer_cb(libxl__egc *egc,
1859 libxl__ev_time *ev, const struct timeval *requested_abs, int rc);
1860 static void pci_remove_qmp_query_cb(libxl__egc *egc,
1861 libxl__ev_qmp *qmp, const libxl__json_object *response, int rc);
1862 static void pci_remove_timeout(libxl__egc *egc,
1863 libxl__ev_time *ev, const struct timeval *requested_abs, int rc);
1864 static void pci_remove_detatched(libxl__egc *egc,
1865 pci_remove_state *prs, int rc);
1866 static void pci_remove_stubdom_done(libxl__egc *egc,
1867 libxl__ao_device *aodev);
1868 static void pci_remove_done(libxl__egc *egc,
1869 pci_remove_state *prs, int rc);
1870
do_pci_remove(libxl__egc * egc,uint32_t domid,libxl_device_pci * pcidev,int force,pci_remove_state * prs)1871 static void do_pci_remove(libxl__egc *egc, uint32_t domid,
1872 libxl_device_pci *pcidev, int force,
1873 pci_remove_state *prs)
1874 {
1875 STATE_AO_GC(prs->aodev->ao);
1876 libxl_ctx *ctx = libxl__gc_owner(gc);
1877 libxl_device_pci *assigned;
1878 libxl_domain_type type = libxl__domain_type(gc, domid);
1879 int rc, num;
1880 uint32_t domainid = domid;
1881
1882 assigned = libxl_device_pci_list(ctx, domid, &num);
1883 if (assigned == NULL) {
1884 rc = ERROR_FAIL;
1885 goto out_fail;
1886 }
1887 libxl__ptr_add(gc, assigned);
1888
1889 rc = ERROR_INVAL;
1890 if ( !is_pcidev_in_array(assigned, num, pcidev->domain,
1891 pcidev->bus, pcidev->dev, pcidev->func) ) {
1892 LOGD(ERROR, domainid, "PCI device not attached to this domain");
1893 goto out_fail;
1894 }
1895
1896 rc = ERROR_FAIL;
1897 if (type == LIBXL_DOMAIN_TYPE_HVM) {
1898 prs->hvm = true;
1899 switch (libxl__device_model_version_running(gc, domid)) {
1900 case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL:
1901 prs->xswait.ao = ao;
1902 prs->xswait.what = "Device Model";
1903 prs->xswait.path = DEVICE_MODEL_XS_PATH(gc,
1904 libxl_get_stubdom_id(CTX, domid), domid, "/state");
1905 prs->xswait.timeout_ms = LIBXL_DEVICE_MODEL_START_TIMEOUT * 1000;
1906 prs->xswait.callback = pci_remove_qemu_trad_watch_state_cb;
1907 rc = libxl__xswait_start(gc, &prs->xswait);
1908 if (rc) goto out_fail;
1909 return;
1910 case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN:
1911 pci_remove_qmp_device_del(egc, prs); /* must be last */
1912 return;
1913 default:
1914 rc = ERROR_INVAL;
1915 goto out_fail;
1916 }
1917 } else {
1918 assert(type == LIBXL_DOMAIN_TYPE_PV);
1919
1920 char *sysfs_path = GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF"/resource", pcidev->domain,
1921 pcidev->bus, pcidev->dev, pcidev->func);
1922 FILE *f = fopen(sysfs_path, "r");
1923 unsigned int start = 0, end = 0, flags = 0, size = 0;
1924 int irq = 0;
1925 int i;
1926
1927 if (f == NULL) {
1928 LOGED(ERROR, domainid, "Couldn't open %s", sysfs_path);
1929 goto skip1;
1930 }
1931 for (i = 0; i < PROC_PCI_NUM_RESOURCES; i++) {
1932 if (fscanf(f, "0x%x 0x%x 0x%x\n", &start, &end, &flags) != 3)
1933 continue;
1934 size = end - start + 1;
1935 if (start) {
1936 if (flags & PCI_BAR_IO) {
1937 rc = xc_domain_ioport_permission(ctx->xch, domid, start, size, 0);
1938 if (rc < 0)
1939 LOGED(ERROR, domainid,
1940 "xc_domain_ioport_permission error 0x%x/0x%x",
1941 start,
1942 size);
1943 } else {
1944 rc = xc_domain_iomem_permission(ctx->xch, domid, start>>XC_PAGE_SHIFT,
1945 (size+(XC_PAGE_SIZE-1))>>XC_PAGE_SHIFT, 0);
1946 if (rc < 0)
1947 LOGED(ERROR, domainid,
1948 "xc_domain_iomem_permission error 0x%x/0x%x",
1949 start,
1950 size);
1951 }
1952 }
1953 }
1954 fclose(f);
1955 skip1:
1956 sysfs_path = GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF"/irq", pcidev->domain,
1957 pcidev->bus, pcidev->dev, pcidev->func);
1958 f = fopen(sysfs_path, "r");
1959 if (f == NULL) {
1960 LOGED(ERROR, domainid, "Couldn't open %s", sysfs_path);
1961 goto skip_irq;
1962 }
1963 if ((fscanf(f, "%u", &irq) == 1) && irq) {
1964 rc = xc_physdev_unmap_pirq(ctx->xch, domid, irq);
1965 if (rc < 0) {
1966 LOGED(ERROR, domainid, "xc_physdev_unmap_pirq irq=%d", irq);
1967 }
1968 rc = xc_domain_irq_permission(ctx->xch, domid, irq, 0);
1969 if (rc < 0) {
1970 LOGED(ERROR, domainid, "xc_domain_irq_permission irq=%d", irq);
1971 }
1972 }
1973 fclose(f);
1974 }
1975 skip_irq:
1976 rc = 0;
1977 out_fail:
1978 pci_remove_detatched(egc, prs, rc); /* must be last */
1979 }
1980
pci_remove_qemu_trad_watch_state_cb(libxl__egc * egc,libxl__xswait_state * xswa,int rc,const char * state)1981 static void pci_remove_qemu_trad_watch_state_cb(libxl__egc *egc,
1982 libxl__xswait_state *xswa,
1983 int rc,
1984 const char *state)
1985 {
1986 pci_remove_state *prs = CONTAINER_OF(xswa, *prs, xswait);
1987 STATE_AO_GC(prs->aodev->ao);
1988
1989 /* Convenience aliases */
1990 libxl_domid domid = prs->domid;
1991 libxl_device_pci *const pcidev = prs->pcidev;
1992
1993 rc = check_qemu_running(gc, domid, xswa, rc, state);
1994 if (rc == ERROR_NOT_READY)
1995 return;
1996 if (rc)
1997 goto out;
1998
1999 rc = qemu_pci_remove_xenstore(gc, domid, pcidev, prs->force);
2000
2001 out:
2002 pci_remove_detatched(egc, prs, rc);
2003 }
2004
pci_remove_qmp_device_del(libxl__egc * egc,pci_remove_state * prs)2005 static void pci_remove_qmp_device_del(libxl__egc *egc,
2006 pci_remove_state *prs)
2007 {
2008 STATE_AO_GC(prs->aodev->ao);
2009 libxl__json_object *args = NULL;
2010 int rc;
2011
2012 /* Convenience aliases */
2013 libxl_device_pci *const pcidev = prs->pcidev;
2014
2015 rc = libxl__ev_time_register_rel(ao, &prs->timeout,
2016 pci_remove_timeout,
2017 LIBXL_QMP_CMD_TIMEOUT * 1000);
2018 if (rc) goto out;
2019
2020 QMP_PARAMETERS_SPRINTF(&args, "id", PCI_PT_QDEV_ID,
2021 pcidev->bus, pcidev->dev, pcidev->func);
2022 prs->qmp.callback = pci_remove_qmp_device_del_cb;
2023 rc = libxl__ev_qmp_send(egc, &prs->qmp, "device_del", args);
2024 if (rc) goto out;
2025 return;
2026
2027 out:
2028 pci_remove_detatched(egc, prs, rc);
2029 }
2030
pci_remove_qmp_device_del_cb(libxl__egc * egc,libxl__ev_qmp * qmp,const libxl__json_object * response,int rc)2031 static void pci_remove_qmp_device_del_cb(libxl__egc *egc,
2032 libxl__ev_qmp *qmp,
2033 const libxl__json_object *response,
2034 int rc)
2035 {
2036 EGC_GC;
2037 pci_remove_state *prs = CONTAINER_OF(qmp, *prs, qmp);
2038
2039 if (rc) goto out;
2040
2041 /* Now that the command is sent, we want to wait until QEMU has
2042 * confirmed that the device is removed. */
2043 /* TODO: Instead of using a poll loop { ev_timer ; query-pci }, it
2044 * could be possible to listen to events sent by QEMU via QMP in order
2045 * to wait for the passthrough pci-device to be removed from QEMU. */
2046 pci_remove_qmp_retry_timer_cb(egc, &prs->retry_timer, NULL,
2047 ERROR_TIMEDOUT);
2048 return;
2049
2050 out:
2051 pci_remove_detatched(egc, prs, rc);
2052 }
2053
pci_remove_qmp_retry_timer_cb(libxl__egc * egc,libxl__ev_time * ev,const struct timeval * requested_abs,int rc)2054 static void pci_remove_qmp_retry_timer_cb(libxl__egc *egc, libxl__ev_time *ev,
2055 const struct timeval *requested_abs,
2056 int rc)
2057 {
2058 EGC_GC;
2059 pci_remove_state *prs = CONTAINER_OF(ev, *prs, retry_timer);
2060
2061 prs->qmp.callback = pci_remove_qmp_query_cb;
2062 rc = libxl__ev_qmp_send(egc, &prs->qmp, "query-pci", NULL);
2063 if (rc) goto out;
2064 return;
2065
2066 out:
2067 pci_remove_detatched(egc, prs, rc);
2068 }
2069
pci_remove_qmp_query_cb(libxl__egc * egc,libxl__ev_qmp * qmp,const libxl__json_object * response,int rc)2070 static void pci_remove_qmp_query_cb(libxl__egc *egc,
2071 libxl__ev_qmp *qmp,
2072 const libxl__json_object *response,
2073 int rc)
2074 {
2075 EGC_GC;
2076 pci_remove_state *prs = CONTAINER_OF(qmp, *prs, qmp);
2077 const libxl__json_object *bus = NULL;
2078 const char *asked_id;
2079 int i, j;
2080
2081 /* Convenience aliases */
2082 libxl__ao *const ao = prs->aodev->ao;
2083 libxl_device_pci *const pcidev = prs->pcidev;
2084
2085 if (rc) goto out;
2086
2087 libxl__ev_qmp_dispose(gc, qmp);
2088
2089 asked_id = GCSPRINTF(PCI_PT_QDEV_ID,
2090 pcidev->bus, pcidev->dev, pcidev->func);
2091
2092 /* query-pci response:
2093 * [{ 'devices': [ 'qdev_id': 'str', ... ], ... }]
2094 * */
2095
2096 for (i = 0; (bus = libxl__json_array_get(response, i)); i++) {
2097 const libxl__json_object *devices = NULL;
2098 const libxl__json_object *device = NULL;
2099 const libxl__json_object *o = NULL;
2100 const char *id = NULL;
2101
2102 devices = libxl__json_map_get("devices", bus, JSON_ARRAY);
2103 if (!devices) {
2104 rc = ERROR_QEMU_API;
2105 goto out;
2106 }
2107
2108 for (j = 0; (device = libxl__json_array_get(devices, j)); j++) {
2109 o = libxl__json_map_get("qdev_id", device, JSON_STRING);
2110 if (!o) {
2111 rc = ERROR_QEMU_API;
2112 goto out;
2113 }
2114 id = libxl__json_object_get_string(o);
2115
2116 if (id && !strcmp(asked_id, id)) {
2117 /* Device still in QEMU, need to wait longuer. */
2118 rc = libxl__ev_time_register_rel(ao, &prs->retry_timer,
2119 pci_remove_qmp_retry_timer_cb, 1000);
2120 if (rc) goto out;
2121 return;
2122 }
2123 }
2124 }
2125
2126 out:
2127 pci_remove_detatched(egc, prs, rc); /* must be last */
2128 }
2129
pci_remove_timeout(libxl__egc * egc,libxl__ev_time * ev,const struct timeval * requested_abs,int rc)2130 static void pci_remove_timeout(libxl__egc *egc, libxl__ev_time *ev,
2131 const struct timeval *requested_abs,
2132 int rc)
2133 {
2134 EGC_GC;
2135 pci_remove_state *prs = CONTAINER_OF(ev, *prs, timeout);
2136
2137 /* Convenience aliases */
2138 libxl_device_pci *const pcidev = prs->pcidev;
2139
2140 LOGD(WARN, prs->domid, "timed out waiting for DM to remove "
2141 PCI_PT_QDEV_ID, pcidev->bus, pcidev->dev, pcidev->func);
2142
2143 /* If we timed out, we might still want to keep destroying the device
2144 * (when force==true), so let the next function decide what to do on
2145 * error */
2146 pci_remove_detatched(egc, prs, rc);
2147 }
2148
pci_remove_detatched(libxl__egc * egc,pci_remove_state * prs,int rc)2149 static void pci_remove_detatched(libxl__egc *egc,
2150 pci_remove_state *prs,
2151 int rc)
2152 {
2153 STATE_AO_GC(prs->aodev->ao);
2154 int stubdomid = 0;
2155 uint32_t domainid = prs->domid;
2156 bool isstubdom;
2157
2158 /* Convenience aliases */
2159 libxl_device_pci *const pcidev = prs->pcidev;
2160 libxl_domid domid = prs->domid;
2161
2162 /* Cleaning QMP states ASAP */
2163 libxl__ev_qmp_dispose(gc, &prs->qmp);
2164 libxl__ev_time_deregister(gc, &prs->timeout);
2165 libxl__ev_time_deregister(gc, &prs->retry_timer);
2166
2167 if (rc && !prs->force)
2168 goto out;
2169
2170 isstubdom = libxl_is_stubdom(CTX, domid, &domainid);
2171
2172 /* don't do multiple resets while some functions are still passed through */
2173 if ( (pcidev->vdevfn & 0x7) == 0 ) {
2174 libxl__device_pci_reset(gc, pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func);
2175 }
2176
2177 if (!isstubdom) {
2178 rc = xc_deassign_device(CTX->xch, domid, pcidev_encode_bdf(pcidev));
2179 if (rc < 0 && (prs->hvm || errno != ENOSYS))
2180 LOGED(ERROR, domainid, "xc_deassign_device failed");
2181 }
2182
2183 stubdomid = libxl_get_stubdom_id(CTX, domid);
2184 if (stubdomid != 0) {
2185 libxl_device_pci *pcidev_s;
2186 libxl__ao_device *const stubdom_aodev = &prs->stubdom_aodev;
2187
2188 GCNEW(pcidev_s);
2189 libxl_device_pci_init(pcidev_s);
2190 libxl_device_pci_copy(CTX, pcidev_s, pcidev);
2191
2192 libxl__prepare_ao_device(ao, stubdom_aodev);
2193 stubdom_aodev->action = LIBXL__DEVICE_ACTION_REMOVE;
2194 stubdom_aodev->callback = pci_remove_stubdom_done;
2195 stubdom_aodev->update_json = prs->aodev->update_json;
2196 libxl__device_pci_remove_common(egc, stubdomid, pcidev_s,
2197 prs->force, stubdom_aodev);
2198 return;
2199 }
2200
2201 rc = 0;
2202 out:
2203 pci_remove_done(egc, prs, rc);
2204 }
2205
pci_remove_stubdom_done(libxl__egc * egc,libxl__ao_device * aodev)2206 static void pci_remove_stubdom_done(libxl__egc *egc,
2207 libxl__ao_device *aodev)
2208 {
2209 pci_remove_state *prs = CONTAINER_OF(aodev, *prs, stubdom_aodev);
2210
2211 pci_remove_done(egc, prs, 0);
2212 }
2213
pci_remove_done(libxl__egc * egc,pci_remove_state * prs,int rc)2214 static void pci_remove_done(libxl__egc *egc,
2215 pci_remove_state *prs,
2216 int rc)
2217 {
2218 EGC_GC;
2219
2220 if (rc) goto out;
2221
2222 libxl__device_pci_remove_xenstore(gc, prs->domid, prs->pcidev);
2223 out:
2224 device_pci_remove_common_next(egc, prs, rc);
2225 }
2226
libxl__device_pci_remove_common(libxl__egc * egc,uint32_t domid,libxl_device_pci * pcidev,bool force,libxl__ao_device * aodev)2227 static void libxl__device_pci_remove_common(libxl__egc *egc,
2228 uint32_t domid,
2229 libxl_device_pci *pcidev,
2230 bool force,
2231 libxl__ao_device *aodev)
2232 {
2233 STATE_AO_GC(aodev->ao);
2234 int rc;
2235 pci_remove_state *prs;
2236
2237 GCNEW(prs);
2238 prs->aodev = aodev;
2239 prs->domid = domid;
2240 prs->pcidev = pcidev;
2241 prs->force = force;
2242 libxl__xswait_init(&prs->xswait);
2243 libxl__ev_qmp_init(&prs->qmp);
2244 prs->qmp.ao = prs->aodev->ao;
2245 prs->qmp.domid = prs->domid;
2246 prs->qmp.payload_fd = -1;
2247 libxl__ev_time_init(&prs->timeout);
2248 libxl__ev_time_init(&prs->retry_timer);
2249
2250 prs->orig_vdev = pcidev->vdevfn & ~7U;
2251
2252 if ( pcidev->vfunc_mask == LIBXL_PCI_FUNC_ALL ) {
2253 if ( pci_multifunction_check(gc, pcidev, &prs->pfunc_mask) ) {
2254 rc = ERROR_FAIL;
2255 goto out;
2256 }
2257 pcidev->vfunc_mask &= prs->pfunc_mask;
2258 }else{
2259 prs->pfunc_mask = (1 << pcidev->func);
2260 }
2261
2262 rc = 0;
2263 prs->next_func = 7;
2264 out:
2265 device_pci_remove_common_next(egc, prs, rc);
2266 }
2267
device_pci_remove_common_next(libxl__egc * egc,pci_remove_state * prs,int rc)2268 static void device_pci_remove_common_next(libxl__egc *egc,
2269 pci_remove_state *prs,
2270 int rc)
2271 {
2272 EGC_GC;
2273
2274 /* Convenience aliases */
2275 libxl_domid domid = prs->domid;
2276 libxl_device_pci *const pcidev = prs->pcidev;
2277 libxl__ao_device *const aodev = prs->aodev;
2278 const unsigned int pfunc_mask = prs->pfunc_mask;
2279 const unsigned int orig_vdev = prs->orig_vdev;
2280
2281 if (rc) goto out;
2282
2283 while (prs->next_func >= 0) {
2284 const int i = prs->next_func;
2285 prs->next_func--;
2286 if ( (1 << i) & pfunc_mask ) {
2287 if ( pcidev->vfunc_mask == pfunc_mask ) {
2288 pcidev->func = i;
2289 pcidev->vdevfn = orig_vdev | i;
2290 }else{
2291 pcidev->vdevfn = orig_vdev;
2292 }
2293 do_pci_remove(egc, domid, pcidev, prs->force, prs);
2294 return;
2295 }
2296 }
2297
2298 rc = 0;
2299 out:
2300 libxl__ev_qmp_dispose(gc, &prs->qmp);
2301 libxl__xswait_stop(gc, &prs->xswait);
2302 libxl__ev_time_deregister(gc, &prs->timeout);
2303 libxl__ev_time_deregister(gc, &prs->retry_timer);
2304 aodev->rc = rc;
2305 aodev->callback(egc, aodev);
2306 }
2307
libxl_device_pci_remove(libxl_ctx * ctx,uint32_t domid,libxl_device_pci * pcidev,const libxl_asyncop_how * ao_how)2308 int libxl_device_pci_remove(libxl_ctx *ctx, uint32_t domid,
2309 libxl_device_pci *pcidev,
2310 const libxl_asyncop_how *ao_how)
2311
2312 {
2313 AO_CREATE(ctx, domid, ao_how);
2314 libxl__ao_device *aodev;
2315
2316 GCNEW(aodev);
2317 libxl__prepare_ao_device(ao, aodev);
2318 aodev->action = LIBXL__DEVICE_ACTION_REMOVE;
2319 aodev->callback = device_addrm_aocomplete;
2320 aodev->update_json = true;
2321 libxl__device_pci_remove_common(egc, domid, pcidev, false, aodev);
2322 return AO_INPROGRESS;
2323 }
2324
libxl_device_pci_destroy(libxl_ctx * ctx,uint32_t domid,libxl_device_pci * pcidev,const libxl_asyncop_how * ao_how)2325 int libxl_device_pci_destroy(libxl_ctx *ctx, uint32_t domid,
2326 libxl_device_pci *pcidev,
2327 const libxl_asyncop_how *ao_how)
2328 {
2329 AO_CREATE(ctx, domid, ao_how);
2330 libxl__ao_device *aodev;
2331
2332 GCNEW(aodev);
2333 libxl__prepare_ao_device(ao, aodev);
2334 aodev->action = LIBXL__DEVICE_ACTION_REMOVE;
2335 aodev->callback = device_addrm_aocomplete;
2336 aodev->update_json = true;
2337 libxl__device_pci_remove_common(egc, domid, pcidev, true, aodev);
2338 return AO_INPROGRESS;
2339 }
2340
libxl__device_pci_from_xs_be(libxl__gc * gc,const char * be_path,libxl_devid nr,void * data)2341 static int libxl__device_pci_from_xs_be(libxl__gc *gc,
2342 const char *be_path,
2343 libxl_devid nr, void *data)
2344 {
2345 char *s;
2346 unsigned int domain = 0, bus = 0, dev = 0, func = 0, vdevfn = 0;
2347 libxl_device_pci *pci = data;
2348
2349 s = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/dev-%d", be_path, nr));
2350 sscanf(s, PCI_BDF, &domain, &bus, &dev, &func);
2351
2352 s = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/vdevfn-%d", be_path, nr));
2353 if (s)
2354 vdevfn = strtol(s, (char **) NULL, 16);
2355
2356 pcidev_struct_fill(pci, domain, bus, dev, func, vdevfn);
2357
2358 s = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/opts-%d", be_path, nr));
2359 if (s) {
2360 char *saveptr;
2361 char *p = strtok_r(s, ",=", &saveptr);
2362 do {
2363 while (*p == ' ')
2364 p++;
2365 if (!strcmp(p, "msitranslate")) {
2366 p = strtok_r(NULL, ",=", &saveptr);
2367 pci->msitranslate = atoi(p);
2368 } else if (!strcmp(p, "power_mgmt")) {
2369 p = strtok_r(NULL, ",=", &saveptr);
2370 pci->power_mgmt = atoi(p);
2371 } else if (!strcmp(p, "permissive")) {
2372 p = strtok_r(NULL, ",=", &saveptr);
2373 pci->permissive = atoi(p);
2374 }
2375 } while ((p = strtok_r(NULL, ",=", &saveptr)) != NULL);
2376 }
2377
2378 return 0;
2379 }
2380
libxl__device_pci_get_num(libxl__gc * gc,const char * be_path,unsigned int * num)2381 static int libxl__device_pci_get_num(libxl__gc *gc, const char *be_path,
2382 unsigned int *num)
2383 {
2384 char *num_devs;
2385 int rc = 0;
2386
2387 num_devs = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/num_devs", be_path));
2388 if (!num_devs)
2389 rc = ERROR_FAIL;
2390 else
2391 *num = atoi(num_devs);
2392
2393 return rc;
2394 }
2395
libxl_device_pci_list(libxl_ctx * ctx,uint32_t domid,int * num)2396 libxl_device_pci *libxl_device_pci_list(libxl_ctx *ctx, uint32_t domid, int *num)
2397 {
2398 GC_INIT(ctx);
2399 char *be_path;
2400 unsigned int n, i;
2401 libxl_device_pci *pcidevs = NULL;
2402
2403 *num = 0;
2404
2405 be_path = libxl__domain_device_backend_path(gc, 0, domid, 0,
2406 LIBXL__DEVICE_KIND_PCI);
2407 if (libxl__device_pci_get_num(gc, be_path, &n))
2408 goto out;
2409
2410 pcidevs = calloc(n, sizeof(libxl_device_pci));
2411
2412 for (i = 0; i < n; i++)
2413 libxl__device_pci_from_xs_be(gc, be_path, i, pcidevs + i);
2414
2415 *num = n;
2416 out:
2417 GC_FREE;
2418 return pcidevs;
2419 }
2420
libxl__device_pci_destroy_all(libxl__egc * egc,uint32_t domid,libxl__multidev * multidev)2421 void libxl__device_pci_destroy_all(libxl__egc *egc, uint32_t domid,
2422 libxl__multidev *multidev)
2423 {
2424 STATE_AO_GC(multidev->ao);
2425 libxl_device_pci *pcidevs;
2426 int num, i;
2427
2428 pcidevs = libxl_device_pci_list(CTX, domid, &num);
2429 if ( pcidevs == NULL )
2430 return;
2431 libxl__ptr_add(gc, pcidevs);
2432
2433 for (i = 0; i < num; i++) {
2434 /* Force remove on shutdown since, on HVM, qemu will not always
2435 * respond to SCI interrupt because the guest kernel has shut down the
2436 * devices by the time we even get here!
2437 */
2438 libxl__ao_device *aodev = libxl__multidev_prepare(multidev);
2439 libxl__device_pci_remove_common(egc, domid, pcidevs + i, true,
2440 aodev);
2441 }
2442 }
2443
libxl__grant_vga_iomem_permission(libxl__gc * gc,const uint32_t domid,libxl_domain_config * const d_config)2444 int libxl__grant_vga_iomem_permission(libxl__gc *gc, const uint32_t domid,
2445 libxl_domain_config *const d_config)
2446 {
2447 int i, ret;
2448
2449 if (!libxl_defbool_val(d_config->b_info.u.hvm.gfx_passthru))
2450 return 0;
2451
2452 for (i = 0 ; i < d_config->num_pcidevs ; i++) {
2453 uint64_t vga_iomem_start = 0xa0000 >> XC_PAGE_SHIFT;
2454 uint32_t stubdom_domid;
2455 libxl_device_pci *pcidev = &d_config->pcidevs[i];
2456 unsigned long pci_device_class;
2457
2458 if (sysfs_dev_get_class(gc, pcidev, &pci_device_class))
2459 continue;
2460 if (pci_device_class != 0x030000) /* VGA class */
2461 continue;
2462
2463 stubdom_domid = libxl_get_stubdom_id(CTX, domid);
2464 ret = xc_domain_iomem_permission(CTX->xch, stubdom_domid,
2465 vga_iomem_start, 0x20, 1);
2466 if (ret < 0) {
2467 LOGED(ERROR, domid,
2468 "failed to give stubdom%d access to iomem range "
2469 "%"PRIx64"-%"PRIx64" for VGA passthru",
2470 stubdom_domid,
2471 vga_iomem_start, (vga_iomem_start + 0x20 - 1));
2472 return ret;
2473 }
2474 ret = xc_domain_iomem_permission(CTX->xch, domid,
2475 vga_iomem_start, 0x20, 1);
2476 if (ret < 0) {
2477 LOGED(ERROR, domid,
2478 "failed to give dom%d access to iomem range "
2479 "%"PRIx64"-%"PRIx64" for VGA passthru",
2480 domid, vga_iomem_start, (vga_iomem_start + 0x20 - 1));
2481 return ret;
2482 }
2483 break;
2484 }
2485
2486 return 0;
2487 }
2488
libxl_device_pci_compare(const libxl_device_pci * d1,const libxl_device_pci * d2)2489 static int libxl_device_pci_compare(const libxl_device_pci *d1,
2490 const libxl_device_pci *d2)
2491 {
2492 return COMPARE_PCI(d1, d2);
2493 }
2494
2495 #define libxl__device_pci_update_devid NULL
2496
2497 DEFINE_DEVICE_TYPE_STRUCT_X(pcidev, pci, PCI,
2498 .get_num = libxl__device_pci_get_num,
2499 .from_xenstore = libxl__device_pci_from_xs_be,
2500 );
2501
2502 /*
2503 * Local variables:
2504 * mode: C
2505 * c-basic-offset: 4
2506 * indent-tabs-mode: nil
2507 * End:
2508 */
2509