1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * VFIO core
4 *
5 * Copyright (C) 2012 Red Hat, Inc. All rights reserved.
6 * Author: Alex Williamson <alex.williamson@redhat.com>
7 *
8 * Derived from original vfio:
9 * Copyright 2010 Cisco Systems, Inc. All rights reserved.
10 * Author: Tom Lyon, pugs@cisco.com
11 */
12
13 #include <linux/cdev.h>
14 #include <linux/compat.h>
15 #include <linux/device.h>
16 #include <linux/file.h>
17 #include <linux/anon_inodes.h>
18 #include <linux/fs.h>
19 #include <linux/idr.h>
20 #include <linux/iommu.h>
21 #include <linux/list.h>
22 #include <linux/miscdevice.h>
23 #include <linux/module.h>
24 #include <linux/mutex.h>
25 #include <linux/pci.h>
26 #include <linux/rwsem.h>
27 #include <linux/sched.h>
28 #include <linux/slab.h>
29 #include <linux/stat.h>
30 #include <linux/string.h>
31 #include <linux/uaccess.h>
32 #include <linux/vfio.h>
33 #include <linux/wait.h>
34 #include <linux/sched/signal.h>
35 #include "vfio.h"
36
37 #define DRIVER_VERSION "0.3"
38 #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>"
39 #define DRIVER_DESC "VFIO - User Level meta-driver"
40
41 static struct vfio {
42 struct class *class;
43 struct list_head iommu_drivers_list;
44 struct mutex iommu_drivers_lock;
45 struct list_head group_list;
46 struct mutex group_lock; /* locks group_list */
47 struct ida group_ida;
48 dev_t group_devt;
49 } vfio;
50
51 struct vfio_iommu_driver {
52 const struct vfio_iommu_driver_ops *ops;
53 struct list_head vfio_next;
54 };
55
56 struct vfio_container {
57 struct kref kref;
58 struct list_head group_list;
59 struct rw_semaphore group_lock;
60 struct vfio_iommu_driver *iommu_driver;
61 void *iommu_data;
62 bool noiommu;
63 };
64
65 struct vfio_unbound_dev {
66 struct device *dev;
67 struct list_head unbound_next;
68 };
69
70 struct vfio_group {
71 struct device dev;
72 struct cdev cdev;
73 refcount_t users;
74 atomic_t container_users;
75 struct iommu_group *iommu_group;
76 struct vfio_container *container;
77 struct list_head device_list;
78 struct mutex device_lock;
79 struct notifier_block nb;
80 struct list_head vfio_next;
81 struct list_head container_next;
82 struct list_head unbound_list;
83 struct mutex unbound_lock;
84 atomic_t opened;
85 wait_queue_head_t container_q;
86 enum vfio_group_type type;
87 unsigned int dev_counter;
88 struct kvm *kvm;
89 struct blocking_notifier_head notifier;
90 };
91
92 #ifdef CONFIG_VFIO_NOIOMMU
93 static bool noiommu __read_mostly;
94 module_param_named(enable_unsafe_noiommu_mode,
95 noiommu, bool, S_IRUGO | S_IWUSR);
96 MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode. This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel. If you do not know what this is for, step away. (default: false)");
97 #endif
98
99 static DEFINE_XARRAY(vfio_device_set_xa);
100 static const struct file_operations vfio_group_fops;
101
vfio_assign_device_set(struct vfio_device * device,void * set_id)102 int vfio_assign_device_set(struct vfio_device *device, void *set_id)
103 {
104 unsigned long idx = (unsigned long)set_id;
105 struct vfio_device_set *new_dev_set;
106 struct vfio_device_set *dev_set;
107
108 if (WARN_ON(!set_id))
109 return -EINVAL;
110
111 /*
112 * Atomically acquire a singleton object in the xarray for this set_id
113 */
114 xa_lock(&vfio_device_set_xa);
115 dev_set = xa_load(&vfio_device_set_xa, idx);
116 if (dev_set)
117 goto found_get_ref;
118 xa_unlock(&vfio_device_set_xa);
119
120 new_dev_set = kzalloc(sizeof(*new_dev_set), GFP_KERNEL);
121 if (!new_dev_set)
122 return -ENOMEM;
123 mutex_init(&new_dev_set->lock);
124 INIT_LIST_HEAD(&new_dev_set->device_list);
125 new_dev_set->set_id = set_id;
126
127 xa_lock(&vfio_device_set_xa);
128 dev_set = __xa_cmpxchg(&vfio_device_set_xa, idx, NULL, new_dev_set,
129 GFP_KERNEL);
130 if (!dev_set) {
131 dev_set = new_dev_set;
132 goto found_get_ref;
133 }
134
135 kfree(new_dev_set);
136 if (xa_is_err(dev_set)) {
137 xa_unlock(&vfio_device_set_xa);
138 return xa_err(dev_set);
139 }
140
141 found_get_ref:
142 dev_set->device_count++;
143 xa_unlock(&vfio_device_set_xa);
144 mutex_lock(&dev_set->lock);
145 device->dev_set = dev_set;
146 list_add_tail(&device->dev_set_list, &dev_set->device_list);
147 mutex_unlock(&dev_set->lock);
148 return 0;
149 }
150 EXPORT_SYMBOL_GPL(vfio_assign_device_set);
151
vfio_release_device_set(struct vfio_device * device)152 static void vfio_release_device_set(struct vfio_device *device)
153 {
154 struct vfio_device_set *dev_set = device->dev_set;
155
156 if (!dev_set)
157 return;
158
159 mutex_lock(&dev_set->lock);
160 list_del(&device->dev_set_list);
161 mutex_unlock(&dev_set->lock);
162
163 xa_lock(&vfio_device_set_xa);
164 if (!--dev_set->device_count) {
165 __xa_erase(&vfio_device_set_xa,
166 (unsigned long)dev_set->set_id);
167 mutex_destroy(&dev_set->lock);
168 kfree(dev_set);
169 }
170 xa_unlock(&vfio_device_set_xa);
171 }
172
173 #ifdef CONFIG_VFIO_NOIOMMU
vfio_noiommu_open(unsigned long arg)174 static void *vfio_noiommu_open(unsigned long arg)
175 {
176 if (arg != VFIO_NOIOMMU_IOMMU)
177 return ERR_PTR(-EINVAL);
178 if (!capable(CAP_SYS_RAWIO))
179 return ERR_PTR(-EPERM);
180
181 return NULL;
182 }
183
vfio_noiommu_release(void * iommu_data)184 static void vfio_noiommu_release(void *iommu_data)
185 {
186 }
187
vfio_noiommu_ioctl(void * iommu_data,unsigned int cmd,unsigned long arg)188 static long vfio_noiommu_ioctl(void *iommu_data,
189 unsigned int cmd, unsigned long arg)
190 {
191 if (cmd == VFIO_CHECK_EXTENSION)
192 return noiommu && (arg == VFIO_NOIOMMU_IOMMU) ? 1 : 0;
193
194 return -ENOTTY;
195 }
196
vfio_noiommu_attach_group(void * iommu_data,struct iommu_group * iommu_group,enum vfio_group_type type)197 static int vfio_noiommu_attach_group(void *iommu_data,
198 struct iommu_group *iommu_group, enum vfio_group_type type)
199 {
200 return 0;
201 }
202
vfio_noiommu_detach_group(void * iommu_data,struct iommu_group * iommu_group)203 static void vfio_noiommu_detach_group(void *iommu_data,
204 struct iommu_group *iommu_group)
205 {
206 }
207
208 static const struct vfio_iommu_driver_ops vfio_noiommu_ops = {
209 .name = "vfio-noiommu",
210 .owner = THIS_MODULE,
211 .open = vfio_noiommu_open,
212 .release = vfio_noiommu_release,
213 .ioctl = vfio_noiommu_ioctl,
214 .attach_group = vfio_noiommu_attach_group,
215 .detach_group = vfio_noiommu_detach_group,
216 };
217
218 /*
219 * Only noiommu containers can use vfio-noiommu and noiommu containers can only
220 * use vfio-noiommu.
221 */
vfio_iommu_driver_allowed(struct vfio_container * container,const struct vfio_iommu_driver * driver)222 static inline bool vfio_iommu_driver_allowed(struct vfio_container *container,
223 const struct vfio_iommu_driver *driver)
224 {
225 return container->noiommu == (driver->ops == &vfio_noiommu_ops);
226 }
227 #else
vfio_iommu_driver_allowed(struct vfio_container * container,const struct vfio_iommu_driver * driver)228 static inline bool vfio_iommu_driver_allowed(struct vfio_container *container,
229 const struct vfio_iommu_driver *driver)
230 {
231 return true;
232 }
233 #endif /* CONFIG_VFIO_NOIOMMU */
234
235 /*
236 * IOMMU driver registration
237 */
vfio_register_iommu_driver(const struct vfio_iommu_driver_ops * ops)238 int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops)
239 {
240 struct vfio_iommu_driver *driver, *tmp;
241
242 driver = kzalloc(sizeof(*driver), GFP_KERNEL);
243 if (!driver)
244 return -ENOMEM;
245
246 driver->ops = ops;
247
248 mutex_lock(&vfio.iommu_drivers_lock);
249
250 /* Check for duplicates */
251 list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) {
252 if (tmp->ops == ops) {
253 mutex_unlock(&vfio.iommu_drivers_lock);
254 kfree(driver);
255 return -EINVAL;
256 }
257 }
258
259 list_add(&driver->vfio_next, &vfio.iommu_drivers_list);
260
261 mutex_unlock(&vfio.iommu_drivers_lock);
262
263 return 0;
264 }
265 EXPORT_SYMBOL_GPL(vfio_register_iommu_driver);
266
vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops * ops)267 void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops)
268 {
269 struct vfio_iommu_driver *driver;
270
271 mutex_lock(&vfio.iommu_drivers_lock);
272 list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
273 if (driver->ops == ops) {
274 list_del(&driver->vfio_next);
275 mutex_unlock(&vfio.iommu_drivers_lock);
276 kfree(driver);
277 return;
278 }
279 }
280 mutex_unlock(&vfio.iommu_drivers_lock);
281 }
282 EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver);
283
284 static int vfio_iommu_group_notifier(struct notifier_block *nb,
285 unsigned long action, void *data);
286 static void vfio_group_get(struct vfio_group *group);
287
288 /*
289 * Container objects - containers are created when /dev/vfio/vfio is
290 * opened, but their lifecycle extends until the last user is done, so
291 * it's freed via kref. Must support container/group/device being
292 * closed in any order.
293 */
vfio_container_get(struct vfio_container * container)294 static void vfio_container_get(struct vfio_container *container)
295 {
296 kref_get(&container->kref);
297 }
298
vfio_container_release(struct kref * kref)299 static void vfio_container_release(struct kref *kref)
300 {
301 struct vfio_container *container;
302 container = container_of(kref, struct vfio_container, kref);
303
304 kfree(container);
305 }
306
vfio_container_put(struct vfio_container * container)307 static void vfio_container_put(struct vfio_container *container)
308 {
309 kref_put(&container->kref, vfio_container_release);
310 }
311
312 /*
313 * Group objects - create, release, get, put, search
314 */
315 static struct vfio_group *
__vfio_group_get_from_iommu(struct iommu_group * iommu_group)316 __vfio_group_get_from_iommu(struct iommu_group *iommu_group)
317 {
318 struct vfio_group *group;
319
320 list_for_each_entry(group, &vfio.group_list, vfio_next) {
321 if (group->iommu_group == iommu_group) {
322 vfio_group_get(group);
323 return group;
324 }
325 }
326 return NULL;
327 }
328
329 static struct vfio_group *
vfio_group_get_from_iommu(struct iommu_group * iommu_group)330 vfio_group_get_from_iommu(struct iommu_group *iommu_group)
331 {
332 struct vfio_group *group;
333
334 mutex_lock(&vfio.group_lock);
335 group = __vfio_group_get_from_iommu(iommu_group);
336 mutex_unlock(&vfio.group_lock);
337 return group;
338 }
339
vfio_group_release(struct device * dev)340 static void vfio_group_release(struct device *dev)
341 {
342 struct vfio_group *group = container_of(dev, struct vfio_group, dev);
343 struct vfio_unbound_dev *unbound, *tmp;
344
345 list_for_each_entry_safe(unbound, tmp,
346 &group->unbound_list, unbound_next) {
347 list_del(&unbound->unbound_next);
348 kfree(unbound);
349 }
350
351 mutex_destroy(&group->device_lock);
352 mutex_destroy(&group->unbound_lock);
353 iommu_group_put(group->iommu_group);
354 ida_free(&vfio.group_ida, MINOR(group->dev.devt));
355 kfree(group);
356 }
357
vfio_group_alloc(struct iommu_group * iommu_group,enum vfio_group_type type)358 static struct vfio_group *vfio_group_alloc(struct iommu_group *iommu_group,
359 enum vfio_group_type type)
360 {
361 struct vfio_group *group;
362 int minor;
363
364 group = kzalloc(sizeof(*group), GFP_KERNEL);
365 if (!group)
366 return ERR_PTR(-ENOMEM);
367
368 minor = ida_alloc_max(&vfio.group_ida, MINORMASK, GFP_KERNEL);
369 if (minor < 0) {
370 kfree(group);
371 return ERR_PTR(minor);
372 }
373
374 device_initialize(&group->dev);
375 group->dev.devt = MKDEV(MAJOR(vfio.group_devt), minor);
376 group->dev.class = vfio.class;
377 group->dev.release = vfio_group_release;
378 cdev_init(&group->cdev, &vfio_group_fops);
379 group->cdev.owner = THIS_MODULE;
380
381 refcount_set(&group->users, 1);
382 INIT_LIST_HEAD(&group->device_list);
383 mutex_init(&group->device_lock);
384 INIT_LIST_HEAD(&group->unbound_list);
385 mutex_init(&group->unbound_lock);
386 init_waitqueue_head(&group->container_q);
387 group->iommu_group = iommu_group;
388 /* put in vfio_group_release() */
389 iommu_group_ref_get(iommu_group);
390 group->type = type;
391 BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier);
392
393 return group;
394 }
395
vfio_create_group(struct iommu_group * iommu_group,enum vfio_group_type type)396 static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
397 enum vfio_group_type type)
398 {
399 struct vfio_group *group;
400 struct vfio_group *ret;
401 int err;
402
403 group = vfio_group_alloc(iommu_group, type);
404 if (IS_ERR(group))
405 return group;
406
407 err = dev_set_name(&group->dev, "%s%d",
408 group->type == VFIO_NO_IOMMU ? "noiommu-" : "",
409 iommu_group_id(iommu_group));
410 if (err) {
411 ret = ERR_PTR(err);
412 goto err_put;
413 }
414
415 group->nb.notifier_call = vfio_iommu_group_notifier;
416 err = iommu_group_register_notifier(iommu_group, &group->nb);
417 if (err) {
418 ret = ERR_PTR(err);
419 goto err_put;
420 }
421
422 mutex_lock(&vfio.group_lock);
423
424 /* Did we race creating this group? */
425 ret = __vfio_group_get_from_iommu(iommu_group);
426 if (ret)
427 goto err_unlock;
428
429 err = cdev_device_add(&group->cdev, &group->dev);
430 if (err) {
431 ret = ERR_PTR(err);
432 goto err_unlock;
433 }
434
435 list_add(&group->vfio_next, &vfio.group_list);
436
437 mutex_unlock(&vfio.group_lock);
438 return group;
439
440 err_unlock:
441 mutex_unlock(&vfio.group_lock);
442 iommu_group_unregister_notifier(group->iommu_group, &group->nb);
443 err_put:
444 put_device(&group->dev);
445 return ret;
446 }
447
vfio_group_put(struct vfio_group * group)448 static void vfio_group_put(struct vfio_group *group)
449 {
450 if (!refcount_dec_and_mutex_lock(&group->users, &vfio.group_lock))
451 return;
452
453 /*
454 * These data structures all have paired operations that can only be
455 * undone when the caller holds a live reference on the group. Since all
456 * pairs must be undone these WARN_ON's indicate some caller did not
457 * properly hold the group reference.
458 */
459 WARN_ON(!list_empty(&group->device_list));
460 WARN_ON(atomic_read(&group->container_users));
461 WARN_ON(group->notifier.head);
462
463 list_del(&group->vfio_next);
464 cdev_device_del(&group->cdev, &group->dev);
465 mutex_unlock(&vfio.group_lock);
466
467 iommu_group_unregister_notifier(group->iommu_group, &group->nb);
468 put_device(&group->dev);
469 }
470
vfio_group_get(struct vfio_group * group)471 static void vfio_group_get(struct vfio_group *group)
472 {
473 refcount_inc(&group->users);
474 }
475
vfio_group_get_from_dev(struct device * dev)476 static struct vfio_group *vfio_group_get_from_dev(struct device *dev)
477 {
478 struct iommu_group *iommu_group;
479 struct vfio_group *group;
480
481 iommu_group = iommu_group_get(dev);
482 if (!iommu_group)
483 return NULL;
484
485 group = vfio_group_get_from_iommu(iommu_group);
486 iommu_group_put(iommu_group);
487
488 return group;
489 }
490
491 /*
492 * Device objects - create, release, get, put, search
493 */
494 /* Device reference always implies a group reference */
vfio_device_put(struct vfio_device * device)495 void vfio_device_put(struct vfio_device *device)
496 {
497 if (refcount_dec_and_test(&device->refcount))
498 complete(&device->comp);
499 }
500 EXPORT_SYMBOL_GPL(vfio_device_put);
501
vfio_device_try_get(struct vfio_device * device)502 static bool vfio_device_try_get(struct vfio_device *device)
503 {
504 return refcount_inc_not_zero(&device->refcount);
505 }
506
vfio_group_get_device(struct vfio_group * group,struct device * dev)507 static struct vfio_device *vfio_group_get_device(struct vfio_group *group,
508 struct device *dev)
509 {
510 struct vfio_device *device;
511
512 mutex_lock(&group->device_lock);
513 list_for_each_entry(device, &group->device_list, group_next) {
514 if (device->dev == dev && vfio_device_try_get(device)) {
515 mutex_unlock(&group->device_lock);
516 return device;
517 }
518 }
519 mutex_unlock(&group->device_lock);
520 return NULL;
521 }
522
523 /*
524 * Some drivers, like pci-stub, are only used to prevent other drivers from
525 * claiming a device and are therefore perfectly legitimate for a user owned
526 * group. The pci-stub driver has no dependencies on DMA or the IOVA mapping
527 * of the device, but it does prevent the user from having direct access to
528 * the device, which is useful in some circumstances.
529 *
530 * We also assume that we can include PCI interconnect devices, ie. bridges.
531 * IOMMU grouping on PCI necessitates that if we lack isolation on a bridge
532 * then all of the downstream devices will be part of the same IOMMU group as
533 * the bridge. Thus, if placing the bridge into the user owned IOVA space
534 * breaks anything, it only does so for user owned devices downstream. Note
535 * that error notification via MSI can be affected for platforms that handle
536 * MSI within the same IOVA space as DMA.
537 */
538 static const char * const vfio_driver_allowed[] = { "pci-stub" };
539
vfio_dev_driver_allowed(struct device * dev,struct device_driver * drv)540 static bool vfio_dev_driver_allowed(struct device *dev,
541 struct device_driver *drv)
542 {
543 if (dev_is_pci(dev)) {
544 struct pci_dev *pdev = to_pci_dev(dev);
545
546 if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL)
547 return true;
548 }
549
550 return match_string(vfio_driver_allowed,
551 ARRAY_SIZE(vfio_driver_allowed),
552 drv->name) >= 0;
553 }
554
555 /*
556 * A vfio group is viable for use by userspace if all devices are in
557 * one of the following states:
558 * - driver-less
559 * - bound to a vfio driver
560 * - bound to an otherwise allowed driver
561 * - a PCI interconnect device
562 *
563 * We use two methods to determine whether a device is bound to a vfio
564 * driver. The first is to test whether the device exists in the vfio
565 * group. The second is to test if the device exists on the group
566 * unbound_list, indicating it's in the middle of transitioning from
567 * a vfio driver to driver-less.
568 */
vfio_dev_viable(struct device * dev,void * data)569 static int vfio_dev_viable(struct device *dev, void *data)
570 {
571 struct vfio_group *group = data;
572 struct vfio_device *device;
573 struct device_driver *drv = READ_ONCE(dev->driver);
574 struct vfio_unbound_dev *unbound;
575 int ret = -EINVAL;
576
577 mutex_lock(&group->unbound_lock);
578 list_for_each_entry(unbound, &group->unbound_list, unbound_next) {
579 if (dev == unbound->dev) {
580 ret = 0;
581 break;
582 }
583 }
584 mutex_unlock(&group->unbound_lock);
585
586 if (!ret || !drv || vfio_dev_driver_allowed(dev, drv))
587 return 0;
588
589 device = vfio_group_get_device(group, dev);
590 if (device) {
591 vfio_device_put(device);
592 return 0;
593 }
594
595 return ret;
596 }
597
598 /*
599 * Async device support
600 */
vfio_group_nb_add_dev(struct vfio_group * group,struct device * dev)601 static int vfio_group_nb_add_dev(struct vfio_group *group, struct device *dev)
602 {
603 struct vfio_device *device;
604
605 /* Do we already know about it? We shouldn't */
606 device = vfio_group_get_device(group, dev);
607 if (WARN_ON_ONCE(device)) {
608 vfio_device_put(device);
609 return 0;
610 }
611
612 /* Nothing to do for idle groups */
613 if (!atomic_read(&group->container_users))
614 return 0;
615
616 /* TODO Prevent device auto probing */
617 dev_WARN(dev, "Device added to live group %d!\n",
618 iommu_group_id(group->iommu_group));
619
620 return 0;
621 }
622
vfio_group_nb_verify(struct vfio_group * group,struct device * dev)623 static int vfio_group_nb_verify(struct vfio_group *group, struct device *dev)
624 {
625 /* We don't care what happens when the group isn't in use */
626 if (!atomic_read(&group->container_users))
627 return 0;
628
629 return vfio_dev_viable(dev, group);
630 }
631
vfio_iommu_group_notifier(struct notifier_block * nb,unsigned long action,void * data)632 static int vfio_iommu_group_notifier(struct notifier_block *nb,
633 unsigned long action, void *data)
634 {
635 struct vfio_group *group = container_of(nb, struct vfio_group, nb);
636 struct device *dev = data;
637 struct vfio_unbound_dev *unbound;
638
639 switch (action) {
640 case IOMMU_GROUP_NOTIFY_ADD_DEVICE:
641 vfio_group_nb_add_dev(group, dev);
642 break;
643 case IOMMU_GROUP_NOTIFY_DEL_DEVICE:
644 /*
645 * Nothing to do here. If the device is in use, then the
646 * vfio sub-driver should block the remove callback until
647 * it is unused. If the device is unused or attached to a
648 * stub driver, then it should be released and we don't
649 * care that it will be going away.
650 */
651 break;
652 case IOMMU_GROUP_NOTIFY_BIND_DRIVER:
653 dev_dbg(dev, "%s: group %d binding to driver\n", __func__,
654 iommu_group_id(group->iommu_group));
655 break;
656 case IOMMU_GROUP_NOTIFY_BOUND_DRIVER:
657 dev_dbg(dev, "%s: group %d bound to driver %s\n", __func__,
658 iommu_group_id(group->iommu_group), dev->driver->name);
659 BUG_ON(vfio_group_nb_verify(group, dev));
660 break;
661 case IOMMU_GROUP_NOTIFY_UNBIND_DRIVER:
662 dev_dbg(dev, "%s: group %d unbinding from driver %s\n",
663 __func__, iommu_group_id(group->iommu_group),
664 dev->driver->name);
665 break;
666 case IOMMU_GROUP_NOTIFY_UNBOUND_DRIVER:
667 dev_dbg(dev, "%s: group %d unbound from driver\n", __func__,
668 iommu_group_id(group->iommu_group));
669 /*
670 * XXX An unbound device in a live group is ok, but we'd
671 * really like to avoid the above BUG_ON by preventing other
672 * drivers from binding to it. Once that occurs, we have to
673 * stop the system to maintain isolation. At a minimum, we'd
674 * want a toggle to disable driver auto probe for this device.
675 */
676
677 mutex_lock(&group->unbound_lock);
678 list_for_each_entry(unbound,
679 &group->unbound_list, unbound_next) {
680 if (dev == unbound->dev) {
681 list_del(&unbound->unbound_next);
682 kfree(unbound);
683 break;
684 }
685 }
686 mutex_unlock(&group->unbound_lock);
687 break;
688 }
689 return NOTIFY_OK;
690 }
691
692 /*
693 * VFIO driver API
694 */
vfio_init_group_dev(struct vfio_device * device,struct device * dev,const struct vfio_device_ops * ops)695 void vfio_init_group_dev(struct vfio_device *device, struct device *dev,
696 const struct vfio_device_ops *ops)
697 {
698 init_completion(&device->comp);
699 device->dev = dev;
700 device->ops = ops;
701 }
702 EXPORT_SYMBOL_GPL(vfio_init_group_dev);
703
vfio_uninit_group_dev(struct vfio_device * device)704 void vfio_uninit_group_dev(struct vfio_device *device)
705 {
706 vfio_release_device_set(device);
707 }
708 EXPORT_SYMBOL_GPL(vfio_uninit_group_dev);
709
vfio_noiommu_group_alloc(struct device * dev,enum vfio_group_type type)710 static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev,
711 enum vfio_group_type type)
712 {
713 struct iommu_group *iommu_group;
714 struct vfio_group *group;
715 int ret;
716
717 iommu_group = iommu_group_alloc();
718 if (IS_ERR(iommu_group))
719 return ERR_CAST(iommu_group);
720
721 iommu_group_set_name(iommu_group, "vfio-noiommu");
722 ret = iommu_group_add_device(iommu_group, dev);
723 if (ret)
724 goto out_put_group;
725
726 group = vfio_create_group(iommu_group, type);
727 if (IS_ERR(group)) {
728 ret = PTR_ERR(group);
729 goto out_remove_device;
730 }
731 iommu_group_put(iommu_group);
732 return group;
733
734 out_remove_device:
735 iommu_group_remove_device(dev);
736 out_put_group:
737 iommu_group_put(iommu_group);
738 return ERR_PTR(ret);
739 }
740
vfio_group_find_or_alloc(struct device * dev)741 static struct vfio_group *vfio_group_find_or_alloc(struct device *dev)
742 {
743 struct iommu_group *iommu_group;
744 struct vfio_group *group;
745
746 iommu_group = iommu_group_get(dev);
747 #ifdef CONFIG_VFIO_NOIOMMU
748 if (!iommu_group && noiommu && !iommu_present(dev->bus)) {
749 /*
750 * With noiommu enabled, create an IOMMU group for devices that
751 * don't already have one and don't have an iommu_ops on their
752 * bus. Taint the kernel because we're about to give a DMA
753 * capable device to a user without IOMMU protection.
754 */
755 group = vfio_noiommu_group_alloc(dev, VFIO_NO_IOMMU);
756 if (!IS_ERR(group)) {
757 add_taint(TAINT_USER, LOCKDEP_STILL_OK);
758 dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n");
759 }
760 return group;
761 }
762 #endif
763 if (!iommu_group)
764 return ERR_PTR(-EINVAL);
765
766 group = vfio_group_get_from_iommu(iommu_group);
767 if (!group)
768 group = vfio_create_group(iommu_group, VFIO_IOMMU);
769
770 /* The vfio_group holds a reference to the iommu_group */
771 iommu_group_put(iommu_group);
772 return group;
773 }
774
__vfio_register_dev(struct vfio_device * device,struct vfio_group * group)775 static int __vfio_register_dev(struct vfio_device *device,
776 struct vfio_group *group)
777 {
778 struct vfio_device *existing_device;
779
780 if (IS_ERR(group))
781 return PTR_ERR(group);
782
783 /*
784 * If the driver doesn't specify a set then the device is added to a
785 * singleton set just for itself.
786 */
787 if (!device->dev_set)
788 vfio_assign_device_set(device, device);
789
790 existing_device = vfio_group_get_device(group, device->dev);
791 if (existing_device) {
792 dev_WARN(device->dev, "Device already exists on group %d\n",
793 iommu_group_id(group->iommu_group));
794 vfio_device_put(existing_device);
795 if (group->type == VFIO_NO_IOMMU ||
796 group->type == VFIO_EMULATED_IOMMU)
797 iommu_group_remove_device(device->dev);
798 vfio_group_put(group);
799 return -EBUSY;
800 }
801
802 /* Our reference on group is moved to the device */
803 device->group = group;
804
805 /* Refcounting can't start until the driver calls register */
806 refcount_set(&device->refcount, 1);
807
808 mutex_lock(&group->device_lock);
809 list_add(&device->group_next, &group->device_list);
810 group->dev_counter++;
811 mutex_unlock(&group->device_lock);
812
813 return 0;
814 }
815
vfio_register_group_dev(struct vfio_device * device)816 int vfio_register_group_dev(struct vfio_device *device)
817 {
818 return __vfio_register_dev(device,
819 vfio_group_find_or_alloc(device->dev));
820 }
821 EXPORT_SYMBOL_GPL(vfio_register_group_dev);
822
823 /*
824 * Register a virtual device without IOMMU backing. The user of this
825 * device must not be able to directly trigger unmediated DMA.
826 */
vfio_register_emulated_iommu_dev(struct vfio_device * device)827 int vfio_register_emulated_iommu_dev(struct vfio_device *device)
828 {
829 return __vfio_register_dev(device,
830 vfio_noiommu_group_alloc(device->dev, VFIO_EMULATED_IOMMU));
831 }
832 EXPORT_SYMBOL_GPL(vfio_register_emulated_iommu_dev);
833
834 /*
835 * Get a reference to the vfio_device for a device. Even if the
836 * caller thinks they own the device, they could be racing with a
837 * release call path, so we can't trust drvdata for the shortcut.
838 * Go the long way around, from the iommu_group to the vfio_group
839 * to the vfio_device.
840 */
vfio_device_get_from_dev(struct device * dev)841 struct vfio_device *vfio_device_get_from_dev(struct device *dev)
842 {
843 struct vfio_group *group;
844 struct vfio_device *device;
845
846 group = vfio_group_get_from_dev(dev);
847 if (!group)
848 return NULL;
849
850 device = vfio_group_get_device(group, dev);
851 vfio_group_put(group);
852
853 return device;
854 }
855 EXPORT_SYMBOL_GPL(vfio_device_get_from_dev);
856
vfio_device_get_from_name(struct vfio_group * group,char * buf)857 static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group,
858 char *buf)
859 {
860 struct vfio_device *it, *device = ERR_PTR(-ENODEV);
861
862 mutex_lock(&group->device_lock);
863 list_for_each_entry(it, &group->device_list, group_next) {
864 int ret;
865
866 if (it->ops->match) {
867 ret = it->ops->match(it, buf);
868 if (ret < 0) {
869 device = ERR_PTR(ret);
870 break;
871 }
872 } else {
873 ret = !strcmp(dev_name(it->dev), buf);
874 }
875
876 if (ret && vfio_device_try_get(it)) {
877 device = it;
878 break;
879 }
880 }
881 mutex_unlock(&group->device_lock);
882
883 return device;
884 }
885
886 /*
887 * Decrement the device reference count and wait for the device to be
888 * removed. Open file descriptors for the device... */
vfio_unregister_group_dev(struct vfio_device * device)889 void vfio_unregister_group_dev(struct vfio_device *device)
890 {
891 struct vfio_group *group = device->group;
892 struct vfio_unbound_dev *unbound;
893 unsigned int i = 0;
894 bool interrupted = false;
895 long rc;
896
897 /*
898 * When the device is removed from the group, the group suddenly
899 * becomes non-viable; the device has a driver (until the unbind
900 * completes), but it's not present in the group. This is bad news
901 * for any external users that need to re-acquire a group reference
902 * in order to match and release their existing reference. To
903 * solve this, we track such devices on the unbound_list to bridge
904 * the gap until they're fully unbound.
905 */
906 unbound = kzalloc(sizeof(*unbound), GFP_KERNEL);
907 if (unbound) {
908 unbound->dev = device->dev;
909 mutex_lock(&group->unbound_lock);
910 list_add(&unbound->unbound_next, &group->unbound_list);
911 mutex_unlock(&group->unbound_lock);
912 }
913 WARN_ON(!unbound);
914
915 vfio_device_put(device);
916 rc = try_wait_for_completion(&device->comp);
917 while (rc <= 0) {
918 if (device->ops->request)
919 device->ops->request(device, i++);
920
921 if (interrupted) {
922 rc = wait_for_completion_timeout(&device->comp,
923 HZ * 10);
924 } else {
925 rc = wait_for_completion_interruptible_timeout(
926 &device->comp, HZ * 10);
927 if (rc < 0) {
928 interrupted = true;
929 dev_warn(device->dev,
930 "Device is currently in use, task"
931 " \"%s\" (%d) "
932 "blocked until device is released",
933 current->comm, task_pid_nr(current));
934 }
935 }
936 }
937
938 mutex_lock(&group->device_lock);
939 list_del(&device->group_next);
940 group->dev_counter--;
941 mutex_unlock(&group->device_lock);
942
943 /*
944 * In order to support multiple devices per group, devices can be
945 * plucked from the group while other devices in the group are still
946 * in use. The container persists with this group and those remaining
947 * devices still attached. If the user creates an isolation violation
948 * by binding this device to another driver while the group is still in
949 * use, that's their fault. However, in the case of removing the last,
950 * or potentially the only, device in the group there can be no other
951 * in-use devices in the group. The user has done their due diligence
952 * and we should lay no claims to those devices. In order to do that,
953 * we need to make sure the group is detached from the container.
954 * Without this stall, we're potentially racing with a user process
955 * that may attempt to immediately bind this device to another driver.
956 */
957 if (list_empty(&group->device_list))
958 wait_event(group->container_q, !group->container);
959
960 if (group->type == VFIO_NO_IOMMU || group->type == VFIO_EMULATED_IOMMU)
961 iommu_group_remove_device(device->dev);
962
963 /* Matches the get in vfio_register_group_dev() */
964 vfio_group_put(group);
965 }
966 EXPORT_SYMBOL_GPL(vfio_unregister_group_dev);
967
968 /*
969 * VFIO base fd, /dev/vfio/vfio
970 */
vfio_ioctl_check_extension(struct vfio_container * container,unsigned long arg)971 static long vfio_ioctl_check_extension(struct vfio_container *container,
972 unsigned long arg)
973 {
974 struct vfio_iommu_driver *driver;
975 long ret = 0;
976
977 down_read(&container->group_lock);
978
979 driver = container->iommu_driver;
980
981 switch (arg) {
982 /* No base extensions yet */
983 default:
984 /*
985 * If no driver is set, poll all registered drivers for
986 * extensions and return the first positive result. If
987 * a driver is already set, further queries will be passed
988 * only to that driver.
989 */
990 if (!driver) {
991 mutex_lock(&vfio.iommu_drivers_lock);
992 list_for_each_entry(driver, &vfio.iommu_drivers_list,
993 vfio_next) {
994
995 if (!list_empty(&container->group_list) &&
996 !vfio_iommu_driver_allowed(container,
997 driver))
998 continue;
999 if (!try_module_get(driver->ops->owner))
1000 continue;
1001
1002 ret = driver->ops->ioctl(NULL,
1003 VFIO_CHECK_EXTENSION,
1004 arg);
1005 module_put(driver->ops->owner);
1006 if (ret > 0)
1007 break;
1008 }
1009 mutex_unlock(&vfio.iommu_drivers_lock);
1010 } else
1011 ret = driver->ops->ioctl(container->iommu_data,
1012 VFIO_CHECK_EXTENSION, arg);
1013 }
1014
1015 up_read(&container->group_lock);
1016
1017 return ret;
1018 }
1019
1020 /* hold write lock on container->group_lock */
__vfio_container_attach_groups(struct vfio_container * container,struct vfio_iommu_driver * driver,void * data)1021 static int __vfio_container_attach_groups(struct vfio_container *container,
1022 struct vfio_iommu_driver *driver,
1023 void *data)
1024 {
1025 struct vfio_group *group;
1026 int ret = -ENODEV;
1027
1028 list_for_each_entry(group, &container->group_list, container_next) {
1029 ret = driver->ops->attach_group(data, group->iommu_group,
1030 group->type);
1031 if (ret)
1032 goto unwind;
1033 }
1034
1035 return ret;
1036
1037 unwind:
1038 list_for_each_entry_continue_reverse(group, &container->group_list,
1039 container_next) {
1040 driver->ops->detach_group(data, group->iommu_group);
1041 }
1042
1043 return ret;
1044 }
1045
vfio_ioctl_set_iommu(struct vfio_container * container,unsigned long arg)1046 static long vfio_ioctl_set_iommu(struct vfio_container *container,
1047 unsigned long arg)
1048 {
1049 struct vfio_iommu_driver *driver;
1050 long ret = -ENODEV;
1051
1052 down_write(&container->group_lock);
1053
1054 /*
1055 * The container is designed to be an unprivileged interface while
1056 * the group can be assigned to specific users. Therefore, only by
1057 * adding a group to a container does the user get the privilege of
1058 * enabling the iommu, which may allocate finite resources. There
1059 * is no unset_iommu, but by removing all the groups from a container,
1060 * the container is deprivileged and returns to an unset state.
1061 */
1062 if (list_empty(&container->group_list) || container->iommu_driver) {
1063 up_write(&container->group_lock);
1064 return -EINVAL;
1065 }
1066
1067 mutex_lock(&vfio.iommu_drivers_lock);
1068 list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
1069 void *data;
1070
1071 if (!vfio_iommu_driver_allowed(container, driver))
1072 continue;
1073 if (!try_module_get(driver->ops->owner))
1074 continue;
1075
1076 /*
1077 * The arg magic for SET_IOMMU is the same as CHECK_EXTENSION,
1078 * so test which iommu driver reported support for this
1079 * extension and call open on them. We also pass them the
1080 * magic, allowing a single driver to support multiple
1081 * interfaces if they'd like.
1082 */
1083 if (driver->ops->ioctl(NULL, VFIO_CHECK_EXTENSION, arg) <= 0) {
1084 module_put(driver->ops->owner);
1085 continue;
1086 }
1087
1088 data = driver->ops->open(arg);
1089 if (IS_ERR(data)) {
1090 ret = PTR_ERR(data);
1091 module_put(driver->ops->owner);
1092 continue;
1093 }
1094
1095 ret = __vfio_container_attach_groups(container, driver, data);
1096 if (ret) {
1097 driver->ops->release(data);
1098 module_put(driver->ops->owner);
1099 continue;
1100 }
1101
1102 container->iommu_driver = driver;
1103 container->iommu_data = data;
1104 break;
1105 }
1106
1107 mutex_unlock(&vfio.iommu_drivers_lock);
1108 up_write(&container->group_lock);
1109
1110 return ret;
1111 }
1112
vfio_fops_unl_ioctl(struct file * filep,unsigned int cmd,unsigned long arg)1113 static long vfio_fops_unl_ioctl(struct file *filep,
1114 unsigned int cmd, unsigned long arg)
1115 {
1116 struct vfio_container *container = filep->private_data;
1117 struct vfio_iommu_driver *driver;
1118 void *data;
1119 long ret = -EINVAL;
1120
1121 if (!container)
1122 return ret;
1123
1124 switch (cmd) {
1125 case VFIO_GET_API_VERSION:
1126 ret = VFIO_API_VERSION;
1127 break;
1128 case VFIO_CHECK_EXTENSION:
1129 ret = vfio_ioctl_check_extension(container, arg);
1130 break;
1131 case VFIO_SET_IOMMU:
1132 ret = vfio_ioctl_set_iommu(container, arg);
1133 break;
1134 default:
1135 driver = container->iommu_driver;
1136 data = container->iommu_data;
1137
1138 if (driver) /* passthrough all unrecognized ioctls */
1139 ret = driver->ops->ioctl(data, cmd, arg);
1140 }
1141
1142 return ret;
1143 }
1144
vfio_fops_open(struct inode * inode,struct file * filep)1145 static int vfio_fops_open(struct inode *inode, struct file *filep)
1146 {
1147 struct vfio_container *container;
1148
1149 container = kzalloc(sizeof(*container), GFP_KERNEL);
1150 if (!container)
1151 return -ENOMEM;
1152
1153 INIT_LIST_HEAD(&container->group_list);
1154 init_rwsem(&container->group_lock);
1155 kref_init(&container->kref);
1156
1157 filep->private_data = container;
1158
1159 return 0;
1160 }
1161
vfio_fops_release(struct inode * inode,struct file * filep)1162 static int vfio_fops_release(struct inode *inode, struct file *filep)
1163 {
1164 struct vfio_container *container = filep->private_data;
1165 struct vfio_iommu_driver *driver = container->iommu_driver;
1166
1167 if (driver && driver->ops->notify)
1168 driver->ops->notify(container->iommu_data,
1169 VFIO_IOMMU_CONTAINER_CLOSE);
1170
1171 filep->private_data = NULL;
1172
1173 vfio_container_put(container);
1174
1175 return 0;
1176 }
1177
1178 static const struct file_operations vfio_fops = {
1179 .owner = THIS_MODULE,
1180 .open = vfio_fops_open,
1181 .release = vfio_fops_release,
1182 .unlocked_ioctl = vfio_fops_unl_ioctl,
1183 .compat_ioctl = compat_ptr_ioctl,
1184 };
1185
1186 /*
1187 * VFIO Group fd, /dev/vfio/$GROUP
1188 */
__vfio_group_unset_container(struct vfio_group * group)1189 static void __vfio_group_unset_container(struct vfio_group *group)
1190 {
1191 struct vfio_container *container = group->container;
1192 struct vfio_iommu_driver *driver;
1193
1194 down_write(&container->group_lock);
1195
1196 driver = container->iommu_driver;
1197 if (driver)
1198 driver->ops->detach_group(container->iommu_data,
1199 group->iommu_group);
1200
1201 group->container = NULL;
1202 wake_up(&group->container_q);
1203 list_del(&group->container_next);
1204
1205 /* Detaching the last group deprivileges a container, remove iommu */
1206 if (driver && list_empty(&container->group_list)) {
1207 driver->ops->release(container->iommu_data);
1208 module_put(driver->ops->owner);
1209 container->iommu_driver = NULL;
1210 container->iommu_data = NULL;
1211 }
1212
1213 up_write(&container->group_lock);
1214
1215 vfio_container_put(container);
1216 }
1217
1218 /*
1219 * VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or
1220 * if there was no container to unset. Since the ioctl is called on
1221 * the group, we know that still exists, therefore the only valid
1222 * transition here is 1->0.
1223 */
vfio_group_unset_container(struct vfio_group * group)1224 static int vfio_group_unset_container(struct vfio_group *group)
1225 {
1226 int users = atomic_cmpxchg(&group->container_users, 1, 0);
1227
1228 if (!users)
1229 return -EINVAL;
1230 if (users != 1)
1231 return -EBUSY;
1232
1233 __vfio_group_unset_container(group);
1234
1235 return 0;
1236 }
1237
1238 /*
1239 * When removing container users, anything that removes the last user
1240 * implicitly removes the group from the container. That is, if the
1241 * group file descriptor is closed, as well as any device file descriptors,
1242 * the group is free.
1243 */
vfio_group_try_dissolve_container(struct vfio_group * group)1244 static void vfio_group_try_dissolve_container(struct vfio_group *group)
1245 {
1246 if (0 == atomic_dec_if_positive(&group->container_users))
1247 __vfio_group_unset_container(group);
1248 }
1249
vfio_group_set_container(struct vfio_group * group,int container_fd)1250 static int vfio_group_set_container(struct vfio_group *group, int container_fd)
1251 {
1252 struct fd f;
1253 struct vfio_container *container;
1254 struct vfio_iommu_driver *driver;
1255 int ret = 0;
1256
1257 if (atomic_read(&group->container_users))
1258 return -EINVAL;
1259
1260 if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
1261 return -EPERM;
1262
1263 f = fdget(container_fd);
1264 if (!f.file)
1265 return -EBADF;
1266
1267 /* Sanity check, is this really our fd? */
1268 if (f.file->f_op != &vfio_fops) {
1269 fdput(f);
1270 return -EINVAL;
1271 }
1272
1273 container = f.file->private_data;
1274 WARN_ON(!container); /* fget ensures we don't race vfio_release */
1275
1276 down_write(&container->group_lock);
1277
1278 /* Real groups and fake groups cannot mix */
1279 if (!list_empty(&container->group_list) &&
1280 container->noiommu != (group->type == VFIO_NO_IOMMU)) {
1281 ret = -EPERM;
1282 goto unlock_out;
1283 }
1284
1285 driver = container->iommu_driver;
1286 if (driver) {
1287 ret = driver->ops->attach_group(container->iommu_data,
1288 group->iommu_group,
1289 group->type);
1290 if (ret)
1291 goto unlock_out;
1292 }
1293
1294 group->container = container;
1295 container->noiommu = (group->type == VFIO_NO_IOMMU);
1296 list_add(&group->container_next, &container->group_list);
1297
1298 /* Get a reference on the container and mark a user within the group */
1299 vfio_container_get(container);
1300 atomic_inc(&group->container_users);
1301
1302 unlock_out:
1303 up_write(&container->group_lock);
1304 fdput(f);
1305 return ret;
1306 }
1307
vfio_group_viable(struct vfio_group * group)1308 static bool vfio_group_viable(struct vfio_group *group)
1309 {
1310 return (iommu_group_for_each_dev(group->iommu_group,
1311 group, vfio_dev_viable) == 0);
1312 }
1313
vfio_group_add_container_user(struct vfio_group * group)1314 static int vfio_group_add_container_user(struct vfio_group *group)
1315 {
1316 if (!atomic_inc_not_zero(&group->container_users))
1317 return -EINVAL;
1318
1319 if (group->type == VFIO_NO_IOMMU) {
1320 atomic_dec(&group->container_users);
1321 return -EPERM;
1322 }
1323 if (!group->container->iommu_driver || !vfio_group_viable(group)) {
1324 atomic_dec(&group->container_users);
1325 return -EINVAL;
1326 }
1327
1328 return 0;
1329 }
1330
1331 static const struct file_operations vfio_device_fops;
1332
vfio_group_get_device_fd(struct vfio_group * group,char * buf)1333 static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
1334 {
1335 struct vfio_device *device;
1336 struct file *filep;
1337 int fdno;
1338 int ret = 0;
1339
1340 if (0 == atomic_read(&group->container_users) ||
1341 !group->container->iommu_driver || !vfio_group_viable(group))
1342 return -EINVAL;
1343
1344 if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
1345 return -EPERM;
1346
1347 device = vfio_device_get_from_name(group, buf);
1348 if (IS_ERR(device))
1349 return PTR_ERR(device);
1350
1351 if (!try_module_get(device->dev->driver->owner)) {
1352 ret = -ENODEV;
1353 goto err_device_put;
1354 }
1355
1356 mutex_lock(&device->dev_set->lock);
1357 device->open_count++;
1358 if (device->open_count == 1 && device->ops->open_device) {
1359 ret = device->ops->open_device(device);
1360 if (ret)
1361 goto err_undo_count;
1362 }
1363 mutex_unlock(&device->dev_set->lock);
1364
1365 /*
1366 * We can't use anon_inode_getfd() because we need to modify
1367 * the f_mode flags directly to allow more than just ioctls
1368 */
1369 fdno = ret = get_unused_fd_flags(O_CLOEXEC);
1370 if (ret < 0)
1371 goto err_close_device;
1372
1373 filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops,
1374 device, O_RDWR);
1375 if (IS_ERR(filep)) {
1376 ret = PTR_ERR(filep);
1377 goto err_fd;
1378 }
1379
1380 /*
1381 * TODO: add an anon_inode interface to do this.
1382 * Appears to be missing by lack of need rather than
1383 * explicitly prevented. Now there's need.
1384 */
1385 filep->f_mode |= (FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
1386
1387 atomic_inc(&group->container_users);
1388
1389 fd_install(fdno, filep);
1390
1391 if (group->type == VFIO_NO_IOMMU)
1392 dev_warn(device->dev, "vfio-noiommu device opened by user "
1393 "(%s:%d)\n", current->comm, task_pid_nr(current));
1394 return fdno;
1395
1396 err_fd:
1397 put_unused_fd(fdno);
1398 err_close_device:
1399 mutex_lock(&device->dev_set->lock);
1400 if (device->open_count == 1 && device->ops->close_device)
1401 device->ops->close_device(device);
1402 err_undo_count:
1403 device->open_count--;
1404 mutex_unlock(&device->dev_set->lock);
1405 module_put(device->dev->driver->owner);
1406 err_device_put:
1407 vfio_device_put(device);
1408 return ret;
1409 }
1410
vfio_group_fops_unl_ioctl(struct file * filep,unsigned int cmd,unsigned long arg)1411 static long vfio_group_fops_unl_ioctl(struct file *filep,
1412 unsigned int cmd, unsigned long arg)
1413 {
1414 struct vfio_group *group = filep->private_data;
1415 long ret = -ENOTTY;
1416
1417 switch (cmd) {
1418 case VFIO_GROUP_GET_STATUS:
1419 {
1420 struct vfio_group_status status;
1421 unsigned long minsz;
1422
1423 minsz = offsetofend(struct vfio_group_status, flags);
1424
1425 if (copy_from_user(&status, (void __user *)arg, minsz))
1426 return -EFAULT;
1427
1428 if (status.argsz < minsz)
1429 return -EINVAL;
1430
1431 status.flags = 0;
1432
1433 if (vfio_group_viable(group))
1434 status.flags |= VFIO_GROUP_FLAGS_VIABLE;
1435
1436 if (group->container)
1437 status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET;
1438
1439 if (copy_to_user((void __user *)arg, &status, minsz))
1440 return -EFAULT;
1441
1442 ret = 0;
1443 break;
1444 }
1445 case VFIO_GROUP_SET_CONTAINER:
1446 {
1447 int fd;
1448
1449 if (get_user(fd, (int __user *)arg))
1450 return -EFAULT;
1451
1452 if (fd < 0)
1453 return -EINVAL;
1454
1455 ret = vfio_group_set_container(group, fd);
1456 break;
1457 }
1458 case VFIO_GROUP_UNSET_CONTAINER:
1459 ret = vfio_group_unset_container(group);
1460 break;
1461 case VFIO_GROUP_GET_DEVICE_FD:
1462 {
1463 char *buf;
1464
1465 buf = strndup_user((const char __user *)arg, PAGE_SIZE);
1466 if (IS_ERR(buf))
1467 return PTR_ERR(buf);
1468
1469 ret = vfio_group_get_device_fd(group, buf);
1470 kfree(buf);
1471 break;
1472 }
1473 }
1474
1475 return ret;
1476 }
1477
vfio_group_fops_open(struct inode * inode,struct file * filep)1478 static int vfio_group_fops_open(struct inode *inode, struct file *filep)
1479 {
1480 struct vfio_group *group =
1481 container_of(inode->i_cdev, struct vfio_group, cdev);
1482 int opened;
1483
1484 /* users can be zero if this races with vfio_group_put() */
1485 if (!refcount_inc_not_zero(&group->users))
1486 return -ENODEV;
1487
1488 if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) {
1489 vfio_group_put(group);
1490 return -EPERM;
1491 }
1492
1493 /* Do we need multiple instances of the group open? Seems not. */
1494 opened = atomic_cmpxchg(&group->opened, 0, 1);
1495 if (opened) {
1496 vfio_group_put(group);
1497 return -EBUSY;
1498 }
1499
1500 /* Is something still in use from a previous open? */
1501 if (group->container) {
1502 atomic_dec(&group->opened);
1503 vfio_group_put(group);
1504 return -EBUSY;
1505 }
1506
1507 /* Warn if previous user didn't cleanup and re-init to drop them */
1508 if (WARN_ON(group->notifier.head))
1509 BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier);
1510
1511 filep->private_data = group;
1512
1513 return 0;
1514 }
1515
vfio_group_fops_release(struct inode * inode,struct file * filep)1516 static int vfio_group_fops_release(struct inode *inode, struct file *filep)
1517 {
1518 struct vfio_group *group = filep->private_data;
1519
1520 filep->private_data = NULL;
1521
1522 vfio_group_try_dissolve_container(group);
1523
1524 atomic_dec(&group->opened);
1525
1526 vfio_group_put(group);
1527
1528 return 0;
1529 }
1530
1531 static const struct file_operations vfio_group_fops = {
1532 .owner = THIS_MODULE,
1533 .unlocked_ioctl = vfio_group_fops_unl_ioctl,
1534 .compat_ioctl = compat_ptr_ioctl,
1535 .open = vfio_group_fops_open,
1536 .release = vfio_group_fops_release,
1537 };
1538
1539 /*
1540 * VFIO Device fd
1541 */
vfio_device_fops_release(struct inode * inode,struct file * filep)1542 static int vfio_device_fops_release(struct inode *inode, struct file *filep)
1543 {
1544 struct vfio_device *device = filep->private_data;
1545
1546 mutex_lock(&device->dev_set->lock);
1547 if (!--device->open_count && device->ops->close_device)
1548 device->ops->close_device(device);
1549 mutex_unlock(&device->dev_set->lock);
1550
1551 module_put(device->dev->driver->owner);
1552
1553 vfio_group_try_dissolve_container(device->group);
1554
1555 vfio_device_put(device);
1556
1557 return 0;
1558 }
1559
vfio_device_fops_unl_ioctl(struct file * filep,unsigned int cmd,unsigned long arg)1560 static long vfio_device_fops_unl_ioctl(struct file *filep,
1561 unsigned int cmd, unsigned long arg)
1562 {
1563 struct vfio_device *device = filep->private_data;
1564
1565 if (unlikely(!device->ops->ioctl))
1566 return -EINVAL;
1567
1568 return device->ops->ioctl(device, cmd, arg);
1569 }
1570
vfio_device_fops_read(struct file * filep,char __user * buf,size_t count,loff_t * ppos)1571 static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf,
1572 size_t count, loff_t *ppos)
1573 {
1574 struct vfio_device *device = filep->private_data;
1575
1576 if (unlikely(!device->ops->read))
1577 return -EINVAL;
1578
1579 return device->ops->read(device, buf, count, ppos);
1580 }
1581
vfio_device_fops_write(struct file * filep,const char __user * buf,size_t count,loff_t * ppos)1582 static ssize_t vfio_device_fops_write(struct file *filep,
1583 const char __user *buf,
1584 size_t count, loff_t *ppos)
1585 {
1586 struct vfio_device *device = filep->private_data;
1587
1588 if (unlikely(!device->ops->write))
1589 return -EINVAL;
1590
1591 return device->ops->write(device, buf, count, ppos);
1592 }
1593
vfio_device_fops_mmap(struct file * filep,struct vm_area_struct * vma)1594 static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma)
1595 {
1596 struct vfio_device *device = filep->private_data;
1597
1598 if (unlikely(!device->ops->mmap))
1599 return -EINVAL;
1600
1601 return device->ops->mmap(device, vma);
1602 }
1603
1604 static const struct file_operations vfio_device_fops = {
1605 .owner = THIS_MODULE,
1606 .release = vfio_device_fops_release,
1607 .read = vfio_device_fops_read,
1608 .write = vfio_device_fops_write,
1609 .unlocked_ioctl = vfio_device_fops_unl_ioctl,
1610 .compat_ioctl = compat_ptr_ioctl,
1611 .mmap = vfio_device_fops_mmap,
1612 };
1613
1614 /*
1615 * External user API, exported by symbols to be linked dynamically.
1616 *
1617 * The protocol includes:
1618 * 1. do normal VFIO init operation:
1619 * - opening a new container;
1620 * - attaching group(s) to it;
1621 * - setting an IOMMU driver for a container.
1622 * When IOMMU is set for a container, all groups in it are
1623 * considered ready to use by an external user.
1624 *
1625 * 2. User space passes a group fd to an external user.
1626 * The external user calls vfio_group_get_external_user()
1627 * to verify that:
1628 * - the group is initialized;
1629 * - IOMMU is set for it.
1630 * If both checks passed, vfio_group_get_external_user()
1631 * increments the container user counter to prevent
1632 * the VFIO group from disposal before KVM exits.
1633 *
1634 * 3. The external user calls vfio_external_user_iommu_id()
1635 * to know an IOMMU ID.
1636 *
1637 * 4. When the external KVM finishes, it calls
1638 * vfio_group_put_external_user() to release the VFIO group.
1639 * This call decrements the container user counter.
1640 */
vfio_group_get_external_user(struct file * filep)1641 struct vfio_group *vfio_group_get_external_user(struct file *filep)
1642 {
1643 struct vfio_group *group = filep->private_data;
1644 int ret;
1645
1646 if (filep->f_op != &vfio_group_fops)
1647 return ERR_PTR(-EINVAL);
1648
1649 ret = vfio_group_add_container_user(group);
1650 if (ret)
1651 return ERR_PTR(ret);
1652
1653 /*
1654 * Since the caller holds the fget on the file group->users must be >= 1
1655 */
1656 vfio_group_get(group);
1657
1658 return group;
1659 }
1660 EXPORT_SYMBOL_GPL(vfio_group_get_external_user);
1661
1662 /*
1663 * External user API, exported by symbols to be linked dynamically.
1664 * The external user passes in a device pointer
1665 * to verify that:
1666 * - A VFIO group is assiciated with the device;
1667 * - IOMMU is set for the group.
1668 * If both checks passed, vfio_group_get_external_user_from_dev()
1669 * increments the container user counter to prevent the VFIO group
1670 * from disposal before external user exits and returns the pointer
1671 * to the VFIO group.
1672 *
1673 * When the external user finishes using the VFIO group, it calls
1674 * vfio_group_put_external_user() to release the VFIO group and
1675 * decrement the container user counter.
1676 *
1677 * @dev [in] : device
1678 * Return error PTR or pointer to VFIO group.
1679 */
1680
vfio_group_get_external_user_from_dev(struct device * dev)1681 struct vfio_group *vfio_group_get_external_user_from_dev(struct device *dev)
1682 {
1683 struct vfio_group *group;
1684 int ret;
1685
1686 group = vfio_group_get_from_dev(dev);
1687 if (!group)
1688 return ERR_PTR(-ENODEV);
1689
1690 ret = vfio_group_add_container_user(group);
1691 if (ret) {
1692 vfio_group_put(group);
1693 return ERR_PTR(ret);
1694 }
1695
1696 return group;
1697 }
1698 EXPORT_SYMBOL_GPL(vfio_group_get_external_user_from_dev);
1699
vfio_group_put_external_user(struct vfio_group * group)1700 void vfio_group_put_external_user(struct vfio_group *group)
1701 {
1702 vfio_group_try_dissolve_container(group);
1703 vfio_group_put(group);
1704 }
1705 EXPORT_SYMBOL_GPL(vfio_group_put_external_user);
1706
vfio_external_group_match_file(struct vfio_group * test_group,struct file * filep)1707 bool vfio_external_group_match_file(struct vfio_group *test_group,
1708 struct file *filep)
1709 {
1710 struct vfio_group *group = filep->private_data;
1711
1712 return (filep->f_op == &vfio_group_fops) && (group == test_group);
1713 }
1714 EXPORT_SYMBOL_GPL(vfio_external_group_match_file);
1715
vfio_external_user_iommu_id(struct vfio_group * group)1716 int vfio_external_user_iommu_id(struct vfio_group *group)
1717 {
1718 return iommu_group_id(group->iommu_group);
1719 }
1720 EXPORT_SYMBOL_GPL(vfio_external_user_iommu_id);
1721
vfio_external_check_extension(struct vfio_group * group,unsigned long arg)1722 long vfio_external_check_extension(struct vfio_group *group, unsigned long arg)
1723 {
1724 return vfio_ioctl_check_extension(group->container, arg);
1725 }
1726 EXPORT_SYMBOL_GPL(vfio_external_check_extension);
1727
1728 /*
1729 * Sub-module support
1730 */
1731 /*
1732 * Helper for managing a buffer of info chain capabilities, allocate or
1733 * reallocate a buffer with additional @size, filling in @id and @version
1734 * of the capability. A pointer to the new capability is returned.
1735 *
1736 * NB. The chain is based at the head of the buffer, so new entries are
1737 * added to the tail, vfio_info_cap_shift() should be called to fixup the
1738 * next offsets prior to copying to the user buffer.
1739 */
vfio_info_cap_add(struct vfio_info_cap * caps,size_t size,u16 id,u16 version)1740 struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps,
1741 size_t size, u16 id, u16 version)
1742 {
1743 void *buf;
1744 struct vfio_info_cap_header *header, *tmp;
1745
1746 buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL);
1747 if (!buf) {
1748 kfree(caps->buf);
1749 caps->size = 0;
1750 return ERR_PTR(-ENOMEM);
1751 }
1752
1753 caps->buf = buf;
1754 header = buf + caps->size;
1755
1756 /* Eventually copied to user buffer, zero */
1757 memset(header, 0, size);
1758
1759 header->id = id;
1760 header->version = version;
1761
1762 /* Add to the end of the capability chain */
1763 for (tmp = buf; tmp->next; tmp = buf + tmp->next)
1764 ; /* nothing */
1765
1766 tmp->next = caps->size;
1767 caps->size += size;
1768
1769 return header;
1770 }
1771 EXPORT_SYMBOL_GPL(vfio_info_cap_add);
1772
vfio_info_cap_shift(struct vfio_info_cap * caps,size_t offset)1773 void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset)
1774 {
1775 struct vfio_info_cap_header *tmp;
1776 void *buf = (void *)caps->buf;
1777
1778 for (tmp = buf; tmp->next; tmp = buf + tmp->next - offset)
1779 tmp->next += offset;
1780 }
1781 EXPORT_SYMBOL(vfio_info_cap_shift);
1782
vfio_info_add_capability(struct vfio_info_cap * caps,struct vfio_info_cap_header * cap,size_t size)1783 int vfio_info_add_capability(struct vfio_info_cap *caps,
1784 struct vfio_info_cap_header *cap, size_t size)
1785 {
1786 struct vfio_info_cap_header *header;
1787
1788 header = vfio_info_cap_add(caps, size, cap->id, cap->version);
1789 if (IS_ERR(header))
1790 return PTR_ERR(header);
1791
1792 memcpy(header + 1, cap + 1, size - sizeof(*header));
1793
1794 return 0;
1795 }
1796 EXPORT_SYMBOL(vfio_info_add_capability);
1797
vfio_set_irqs_validate_and_prepare(struct vfio_irq_set * hdr,int num_irqs,int max_irq_type,size_t * data_size)1798 int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs,
1799 int max_irq_type, size_t *data_size)
1800 {
1801 unsigned long minsz;
1802 size_t size;
1803
1804 minsz = offsetofend(struct vfio_irq_set, count);
1805
1806 if ((hdr->argsz < minsz) || (hdr->index >= max_irq_type) ||
1807 (hdr->count >= (U32_MAX - hdr->start)) ||
1808 (hdr->flags & ~(VFIO_IRQ_SET_DATA_TYPE_MASK |
1809 VFIO_IRQ_SET_ACTION_TYPE_MASK)))
1810 return -EINVAL;
1811
1812 if (data_size)
1813 *data_size = 0;
1814
1815 if (hdr->start >= num_irqs || hdr->start + hdr->count > num_irqs)
1816 return -EINVAL;
1817
1818 switch (hdr->flags & VFIO_IRQ_SET_DATA_TYPE_MASK) {
1819 case VFIO_IRQ_SET_DATA_NONE:
1820 size = 0;
1821 break;
1822 case VFIO_IRQ_SET_DATA_BOOL:
1823 size = sizeof(uint8_t);
1824 break;
1825 case VFIO_IRQ_SET_DATA_EVENTFD:
1826 size = sizeof(int32_t);
1827 break;
1828 default:
1829 return -EINVAL;
1830 }
1831
1832 if (size) {
1833 if (hdr->argsz - minsz < hdr->count * size)
1834 return -EINVAL;
1835
1836 if (!data_size)
1837 return -EINVAL;
1838
1839 *data_size = hdr->count * size;
1840 }
1841
1842 return 0;
1843 }
1844 EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare);
1845
1846 /*
1847 * Pin a set of guest PFNs and return their associated host PFNs for local
1848 * domain only.
1849 * @dev [in] : device
1850 * @user_pfn [in]: array of user/guest PFNs to be pinned.
1851 * @npage [in] : count of elements in user_pfn array. This count should not
1852 * be greater VFIO_PIN_PAGES_MAX_ENTRIES.
1853 * @prot [in] : protection flags
1854 * @phys_pfn[out]: array of host PFNs
1855 * Return error or number of pages pinned.
1856 */
vfio_pin_pages(struct device * dev,unsigned long * user_pfn,int npage,int prot,unsigned long * phys_pfn)1857 int vfio_pin_pages(struct device *dev, unsigned long *user_pfn, int npage,
1858 int prot, unsigned long *phys_pfn)
1859 {
1860 struct vfio_container *container;
1861 struct vfio_group *group;
1862 struct vfio_iommu_driver *driver;
1863 int ret;
1864
1865 if (!dev || !user_pfn || !phys_pfn || !npage)
1866 return -EINVAL;
1867
1868 if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
1869 return -E2BIG;
1870
1871 group = vfio_group_get_from_dev(dev);
1872 if (!group)
1873 return -ENODEV;
1874
1875 if (group->dev_counter > 1) {
1876 ret = -EINVAL;
1877 goto err_pin_pages;
1878 }
1879
1880 ret = vfio_group_add_container_user(group);
1881 if (ret)
1882 goto err_pin_pages;
1883
1884 container = group->container;
1885 driver = container->iommu_driver;
1886 if (likely(driver && driver->ops->pin_pages))
1887 ret = driver->ops->pin_pages(container->iommu_data,
1888 group->iommu_group, user_pfn,
1889 npage, prot, phys_pfn);
1890 else
1891 ret = -ENOTTY;
1892
1893 vfio_group_try_dissolve_container(group);
1894
1895 err_pin_pages:
1896 vfio_group_put(group);
1897 return ret;
1898 }
1899 EXPORT_SYMBOL(vfio_pin_pages);
1900
1901 /*
1902 * Unpin set of host PFNs for local domain only.
1903 * @dev [in] : device
1904 * @user_pfn [in]: array of user/guest PFNs to be unpinned. Number of user/guest
1905 * PFNs should not be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
1906 * @npage [in] : count of elements in user_pfn array. This count should not
1907 * be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
1908 * Return error or number of pages unpinned.
1909 */
vfio_unpin_pages(struct device * dev,unsigned long * user_pfn,int npage)1910 int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn, int npage)
1911 {
1912 struct vfio_container *container;
1913 struct vfio_group *group;
1914 struct vfio_iommu_driver *driver;
1915 int ret;
1916
1917 if (!dev || !user_pfn || !npage)
1918 return -EINVAL;
1919
1920 if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
1921 return -E2BIG;
1922
1923 group = vfio_group_get_from_dev(dev);
1924 if (!group)
1925 return -ENODEV;
1926
1927 ret = vfio_group_add_container_user(group);
1928 if (ret)
1929 goto err_unpin_pages;
1930
1931 container = group->container;
1932 driver = container->iommu_driver;
1933 if (likely(driver && driver->ops->unpin_pages))
1934 ret = driver->ops->unpin_pages(container->iommu_data, user_pfn,
1935 npage);
1936 else
1937 ret = -ENOTTY;
1938
1939 vfio_group_try_dissolve_container(group);
1940
1941 err_unpin_pages:
1942 vfio_group_put(group);
1943 return ret;
1944 }
1945 EXPORT_SYMBOL(vfio_unpin_pages);
1946
1947 /*
1948 * Pin a set of guest IOVA PFNs and return their associated host PFNs for a
1949 * VFIO group.
1950 *
1951 * The caller needs to call vfio_group_get_external_user() or
1952 * vfio_group_get_external_user_from_dev() prior to calling this interface,
1953 * so as to prevent the VFIO group from disposal in the middle of the call.
1954 * But it can keep the reference to the VFIO group for several calls into
1955 * this interface.
1956 * After finishing using of the VFIO group, the caller needs to release the
1957 * VFIO group by calling vfio_group_put_external_user().
1958 *
1959 * @group [in] : VFIO group
1960 * @user_iova_pfn [in] : array of user/guest IOVA PFNs to be pinned.
1961 * @npage [in] : count of elements in user_iova_pfn array.
1962 * This count should not be greater
1963 * VFIO_PIN_PAGES_MAX_ENTRIES.
1964 * @prot [in] : protection flags
1965 * @phys_pfn [out] : array of host PFNs
1966 * Return error or number of pages pinned.
1967 */
vfio_group_pin_pages(struct vfio_group * group,unsigned long * user_iova_pfn,int npage,int prot,unsigned long * phys_pfn)1968 int vfio_group_pin_pages(struct vfio_group *group,
1969 unsigned long *user_iova_pfn, int npage,
1970 int prot, unsigned long *phys_pfn)
1971 {
1972 struct vfio_container *container;
1973 struct vfio_iommu_driver *driver;
1974 int ret;
1975
1976 if (!group || !user_iova_pfn || !phys_pfn || !npage)
1977 return -EINVAL;
1978
1979 if (group->dev_counter > 1)
1980 return -EINVAL;
1981
1982 if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
1983 return -E2BIG;
1984
1985 container = group->container;
1986 driver = container->iommu_driver;
1987 if (likely(driver && driver->ops->pin_pages))
1988 ret = driver->ops->pin_pages(container->iommu_data,
1989 group->iommu_group, user_iova_pfn,
1990 npage, prot, phys_pfn);
1991 else
1992 ret = -ENOTTY;
1993
1994 return ret;
1995 }
1996 EXPORT_SYMBOL(vfio_group_pin_pages);
1997
1998 /*
1999 * Unpin a set of guest IOVA PFNs for a VFIO group.
2000 *
2001 * The caller needs to call vfio_group_get_external_user() or
2002 * vfio_group_get_external_user_from_dev() prior to calling this interface,
2003 * so as to prevent the VFIO group from disposal in the middle of the call.
2004 * But it can keep the reference to the VFIO group for several calls into
2005 * this interface.
2006 * After finishing using of the VFIO group, the caller needs to release the
2007 * VFIO group by calling vfio_group_put_external_user().
2008 *
2009 * @group [in] : vfio group
2010 * @user_iova_pfn [in] : array of user/guest IOVA PFNs to be unpinned.
2011 * @npage [in] : count of elements in user_iova_pfn array.
2012 * This count should not be greater than
2013 * VFIO_PIN_PAGES_MAX_ENTRIES.
2014 * Return error or number of pages unpinned.
2015 */
vfio_group_unpin_pages(struct vfio_group * group,unsigned long * user_iova_pfn,int npage)2016 int vfio_group_unpin_pages(struct vfio_group *group,
2017 unsigned long *user_iova_pfn, int npage)
2018 {
2019 struct vfio_container *container;
2020 struct vfio_iommu_driver *driver;
2021 int ret;
2022
2023 if (!group || !user_iova_pfn || !npage)
2024 return -EINVAL;
2025
2026 if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
2027 return -E2BIG;
2028
2029 container = group->container;
2030 driver = container->iommu_driver;
2031 if (likely(driver && driver->ops->unpin_pages))
2032 ret = driver->ops->unpin_pages(container->iommu_data,
2033 user_iova_pfn, npage);
2034 else
2035 ret = -ENOTTY;
2036
2037 return ret;
2038 }
2039 EXPORT_SYMBOL(vfio_group_unpin_pages);
2040
2041
2042 /*
2043 * This interface allows the CPUs to perform some sort of virtual DMA on
2044 * behalf of the device.
2045 *
2046 * CPUs read/write from/into a range of IOVAs pointing to user space memory
2047 * into/from a kernel buffer.
2048 *
2049 * As the read/write of user space memory is conducted via the CPUs and is
2050 * not a real device DMA, it is not necessary to pin the user space memory.
2051 *
2052 * The caller needs to call vfio_group_get_external_user() or
2053 * vfio_group_get_external_user_from_dev() prior to calling this interface,
2054 * so as to prevent the VFIO group from disposal in the middle of the call.
2055 * But it can keep the reference to the VFIO group for several calls into
2056 * this interface.
2057 * After finishing using of the VFIO group, the caller needs to release the
2058 * VFIO group by calling vfio_group_put_external_user().
2059 *
2060 * @group [in] : VFIO group
2061 * @user_iova [in] : base IOVA of a user space buffer
2062 * @data [in] : pointer to kernel buffer
2063 * @len [in] : kernel buffer length
2064 * @write : indicate read or write
2065 * Return error code on failure or 0 on success.
2066 */
vfio_dma_rw(struct vfio_group * group,dma_addr_t user_iova,void * data,size_t len,bool write)2067 int vfio_dma_rw(struct vfio_group *group, dma_addr_t user_iova,
2068 void *data, size_t len, bool write)
2069 {
2070 struct vfio_container *container;
2071 struct vfio_iommu_driver *driver;
2072 int ret = 0;
2073
2074 if (!group || !data || len <= 0)
2075 return -EINVAL;
2076
2077 container = group->container;
2078 driver = container->iommu_driver;
2079
2080 if (likely(driver && driver->ops->dma_rw))
2081 ret = driver->ops->dma_rw(container->iommu_data,
2082 user_iova, data, len, write);
2083 else
2084 ret = -ENOTTY;
2085
2086 return ret;
2087 }
2088 EXPORT_SYMBOL(vfio_dma_rw);
2089
vfio_register_iommu_notifier(struct vfio_group * group,unsigned long * events,struct notifier_block * nb)2090 static int vfio_register_iommu_notifier(struct vfio_group *group,
2091 unsigned long *events,
2092 struct notifier_block *nb)
2093 {
2094 struct vfio_container *container;
2095 struct vfio_iommu_driver *driver;
2096 int ret;
2097
2098 ret = vfio_group_add_container_user(group);
2099 if (ret)
2100 return -EINVAL;
2101
2102 container = group->container;
2103 driver = container->iommu_driver;
2104 if (likely(driver && driver->ops->register_notifier))
2105 ret = driver->ops->register_notifier(container->iommu_data,
2106 events, nb);
2107 else
2108 ret = -ENOTTY;
2109
2110 vfio_group_try_dissolve_container(group);
2111
2112 return ret;
2113 }
2114
vfio_unregister_iommu_notifier(struct vfio_group * group,struct notifier_block * nb)2115 static int vfio_unregister_iommu_notifier(struct vfio_group *group,
2116 struct notifier_block *nb)
2117 {
2118 struct vfio_container *container;
2119 struct vfio_iommu_driver *driver;
2120 int ret;
2121
2122 ret = vfio_group_add_container_user(group);
2123 if (ret)
2124 return -EINVAL;
2125
2126 container = group->container;
2127 driver = container->iommu_driver;
2128 if (likely(driver && driver->ops->unregister_notifier))
2129 ret = driver->ops->unregister_notifier(container->iommu_data,
2130 nb);
2131 else
2132 ret = -ENOTTY;
2133
2134 vfio_group_try_dissolve_container(group);
2135
2136 return ret;
2137 }
2138
vfio_group_set_kvm(struct vfio_group * group,struct kvm * kvm)2139 void vfio_group_set_kvm(struct vfio_group *group, struct kvm *kvm)
2140 {
2141 group->kvm = kvm;
2142 blocking_notifier_call_chain(&group->notifier,
2143 VFIO_GROUP_NOTIFY_SET_KVM, kvm);
2144 }
2145 EXPORT_SYMBOL_GPL(vfio_group_set_kvm);
2146
vfio_register_group_notifier(struct vfio_group * group,unsigned long * events,struct notifier_block * nb)2147 static int vfio_register_group_notifier(struct vfio_group *group,
2148 unsigned long *events,
2149 struct notifier_block *nb)
2150 {
2151 int ret;
2152 bool set_kvm = false;
2153
2154 if (*events & VFIO_GROUP_NOTIFY_SET_KVM)
2155 set_kvm = true;
2156
2157 /* clear known events */
2158 *events &= ~VFIO_GROUP_NOTIFY_SET_KVM;
2159
2160 /* refuse to continue if still events remaining */
2161 if (*events)
2162 return -EINVAL;
2163
2164 ret = vfio_group_add_container_user(group);
2165 if (ret)
2166 return -EINVAL;
2167
2168 ret = blocking_notifier_chain_register(&group->notifier, nb);
2169
2170 /*
2171 * The attaching of kvm and vfio_group might already happen, so
2172 * here we replay once upon registration.
2173 */
2174 if (!ret && set_kvm && group->kvm)
2175 blocking_notifier_call_chain(&group->notifier,
2176 VFIO_GROUP_NOTIFY_SET_KVM, group->kvm);
2177
2178 vfio_group_try_dissolve_container(group);
2179
2180 return ret;
2181 }
2182
vfio_unregister_group_notifier(struct vfio_group * group,struct notifier_block * nb)2183 static int vfio_unregister_group_notifier(struct vfio_group *group,
2184 struct notifier_block *nb)
2185 {
2186 int ret;
2187
2188 ret = vfio_group_add_container_user(group);
2189 if (ret)
2190 return -EINVAL;
2191
2192 ret = blocking_notifier_chain_unregister(&group->notifier, nb);
2193
2194 vfio_group_try_dissolve_container(group);
2195
2196 return ret;
2197 }
2198
vfio_register_notifier(struct device * dev,enum vfio_notify_type type,unsigned long * events,struct notifier_block * nb)2199 int vfio_register_notifier(struct device *dev, enum vfio_notify_type type,
2200 unsigned long *events, struct notifier_block *nb)
2201 {
2202 struct vfio_group *group;
2203 int ret;
2204
2205 if (!dev || !nb || !events || (*events == 0))
2206 return -EINVAL;
2207
2208 group = vfio_group_get_from_dev(dev);
2209 if (!group)
2210 return -ENODEV;
2211
2212 switch (type) {
2213 case VFIO_IOMMU_NOTIFY:
2214 ret = vfio_register_iommu_notifier(group, events, nb);
2215 break;
2216 case VFIO_GROUP_NOTIFY:
2217 ret = vfio_register_group_notifier(group, events, nb);
2218 break;
2219 default:
2220 ret = -EINVAL;
2221 }
2222
2223 vfio_group_put(group);
2224 return ret;
2225 }
2226 EXPORT_SYMBOL(vfio_register_notifier);
2227
vfio_unregister_notifier(struct device * dev,enum vfio_notify_type type,struct notifier_block * nb)2228 int vfio_unregister_notifier(struct device *dev, enum vfio_notify_type type,
2229 struct notifier_block *nb)
2230 {
2231 struct vfio_group *group;
2232 int ret;
2233
2234 if (!dev || !nb)
2235 return -EINVAL;
2236
2237 group = vfio_group_get_from_dev(dev);
2238 if (!group)
2239 return -ENODEV;
2240
2241 switch (type) {
2242 case VFIO_IOMMU_NOTIFY:
2243 ret = vfio_unregister_iommu_notifier(group, nb);
2244 break;
2245 case VFIO_GROUP_NOTIFY:
2246 ret = vfio_unregister_group_notifier(group, nb);
2247 break;
2248 default:
2249 ret = -EINVAL;
2250 }
2251
2252 vfio_group_put(group);
2253 return ret;
2254 }
2255 EXPORT_SYMBOL(vfio_unregister_notifier);
2256
vfio_group_iommu_domain(struct vfio_group * group)2257 struct iommu_domain *vfio_group_iommu_domain(struct vfio_group *group)
2258 {
2259 struct vfio_container *container;
2260 struct vfio_iommu_driver *driver;
2261
2262 if (!group)
2263 return ERR_PTR(-EINVAL);
2264
2265 container = group->container;
2266 driver = container->iommu_driver;
2267 if (likely(driver && driver->ops->group_iommu_domain))
2268 return driver->ops->group_iommu_domain(container->iommu_data,
2269 group->iommu_group);
2270
2271 return ERR_PTR(-ENOTTY);
2272 }
2273 EXPORT_SYMBOL_GPL(vfio_group_iommu_domain);
2274
2275 /*
2276 * Module/class support
2277 */
vfio_devnode(struct device * dev,umode_t * mode)2278 static char *vfio_devnode(struct device *dev, umode_t *mode)
2279 {
2280 return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev));
2281 }
2282
2283 static struct miscdevice vfio_dev = {
2284 .minor = VFIO_MINOR,
2285 .name = "vfio",
2286 .fops = &vfio_fops,
2287 .nodename = "vfio/vfio",
2288 .mode = S_IRUGO | S_IWUGO,
2289 };
2290
vfio_init(void)2291 static int __init vfio_init(void)
2292 {
2293 int ret;
2294
2295 ida_init(&vfio.group_ida);
2296 mutex_init(&vfio.group_lock);
2297 mutex_init(&vfio.iommu_drivers_lock);
2298 INIT_LIST_HEAD(&vfio.group_list);
2299 INIT_LIST_HEAD(&vfio.iommu_drivers_list);
2300
2301 ret = misc_register(&vfio_dev);
2302 if (ret) {
2303 pr_err("vfio: misc device register failed\n");
2304 return ret;
2305 }
2306
2307 /* /dev/vfio/$GROUP */
2308 vfio.class = class_create(THIS_MODULE, "vfio");
2309 if (IS_ERR(vfio.class)) {
2310 ret = PTR_ERR(vfio.class);
2311 goto err_class;
2312 }
2313
2314 vfio.class->devnode = vfio_devnode;
2315
2316 ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK + 1, "vfio");
2317 if (ret)
2318 goto err_alloc_chrdev;
2319
2320 pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
2321
2322 #ifdef CONFIG_VFIO_NOIOMMU
2323 vfio_register_iommu_driver(&vfio_noiommu_ops);
2324 #endif
2325 return 0;
2326
2327 err_alloc_chrdev:
2328 class_destroy(vfio.class);
2329 vfio.class = NULL;
2330 err_class:
2331 misc_deregister(&vfio_dev);
2332 return ret;
2333 }
2334
vfio_cleanup(void)2335 static void __exit vfio_cleanup(void)
2336 {
2337 WARN_ON(!list_empty(&vfio.group_list));
2338
2339 #ifdef CONFIG_VFIO_NOIOMMU
2340 vfio_unregister_iommu_driver(&vfio_noiommu_ops);
2341 #endif
2342 ida_destroy(&vfio.group_ida);
2343 unregister_chrdev_region(vfio.group_devt, MINORMASK + 1);
2344 class_destroy(vfio.class);
2345 vfio.class = NULL;
2346 misc_deregister(&vfio_dev);
2347 xa_destroy(&vfio_device_set_xa);
2348 }
2349
2350 module_init(vfio_init);
2351 module_exit(vfio_cleanup);
2352
2353 MODULE_VERSION(DRIVER_VERSION);
2354 MODULE_LICENSE("GPL v2");
2355 MODULE_AUTHOR(DRIVER_AUTHOR);
2356 MODULE_DESCRIPTION(DRIVER_DESC);
2357 MODULE_ALIAS_MISCDEV(VFIO_MINOR);
2358 MODULE_ALIAS("devname:vfio/vfio");
2359 MODULE_SOFTDEP("post: vfio_iommu_type1 vfio_iommu_spapr_tce");
2360