1 /*
2  * Copyright (c) 2014-2015 Travis Geiselbrecht
3  *
4  * Use of this source code is governed by a MIT-style
5  * license that can be found in the LICENSE file or at
6  * https://opensource.org/licenses/MIT
7  */
8 #include <dev/virtio/block.h>
9 
10 #include <stdlib.h>
11 #include <lk/debug.h>
12 #include <assert.h>
13 #include <lk/trace.h>
14 #include <lk/compiler.h>
15 #include <lk/list.h>
16 #include <lk/err.h>
17 #include <kernel/thread.h>
18 #include <kernel/event.h>
19 #include <kernel/mutex.h>
20 #include <lib/bio.h>
21 
22 #if WITH_KERNEL_VM
23 #include <kernel/vm.h>
24 #endif
25 
26 #define LOCAL_TRACE 0
27 
28 struct virtio_blk_config {
29     uint64_t capacity;
30     uint32_t size_max;
31     uint32_t seg_max;
32     struct virtio_blk_geometry {
33         uint16_t cylinders;
34         uint8_t heads;
35         uint8_t sectors;
36     } geometry;
37     uint32_t blk_size;
38 };
39 STATIC_ASSERT(sizeof(struct virtio_blk_config) == 24);
40 
41 struct virtio_blk_req {
42     uint32_t type;
43     uint32_t ioprio;
44     uint64_t sector;
45 };
46 STATIC_ASSERT(sizeof(struct virtio_blk_req) == 16);
47 
48 #define VIRTIO_BLK_F_BARRIER  (1<<0)
49 #define VIRTIO_BLK_F_SIZE_MAX (1<<1)
50 #define VIRTIO_BLK_F_SEG_MAX  (1<<2)
51 #define VIRTIO_BLK_F_GEOMETRY (1<<4)
52 #define VIRTIO_BLK_F_RO       (1<<5)
53 #define VIRTIO_BLK_F_BLK_SIZE (1<<6)
54 #define VIRTIO_BLK_F_SCSI     (1<<7)
55 #define VIRTIO_BLK_F_FLUSH    (1<<9)
56 #define VIRTIO_BLK_F_TOPOLOGY (1<<10)
57 #define VIRTIO_BLK_F_CONFIG_WCE (1<<11)
58 
59 #define VIRTIO_BLK_T_IN         0
60 #define VIRTIO_BLK_T_OUT        1
61 #define VIRTIO_BLK_T_FLUSH      4
62 
63 #define VIRTIO_BLK_S_OK         0
64 #define VIRTIO_BLK_S_IOERR      1
65 #define VIRTIO_BLK_S_UNSUPP     2
66 
67 static enum handler_return virtio_block_irq_driver_callback(struct virtio_device *dev, uint ring, const struct vring_used_elem *e);
68 static ssize_t virtio_bdev_read_block(struct bdev *bdev, void *buf, bnum_t block, uint count);
69 static ssize_t virtio_bdev_write_block(struct bdev *bdev, const void *buf, bnum_t block, uint count);
70 
71 struct virtio_block_dev {
72     struct virtio_device *dev;
73 
74     mutex_t lock;
75     event_t io_event;
76 
77     /* bio block device */
78     bdev_t bdev;
79 
80     /* one blk_req structure for io, not crossing a page boundary */
81     struct virtio_blk_req *blk_req;
82     paddr_t blk_req_phys;
83 
84     /* one uint8_t response word */
85     uint8_t blk_response;
86     paddr_t blk_response_phys;
87 };
88 
virtio_block_init(struct virtio_device * dev,uint32_t host_features)89 status_t virtio_block_init(struct virtio_device *dev, uint32_t host_features) {
90     LTRACEF("dev %p, host_features 0x%x\n", dev, host_features);
91 
92     /* allocate a new block device */
93     struct virtio_block_dev *bdev = malloc(sizeof(struct virtio_block_dev));
94     if (!bdev)
95         return ERR_NO_MEMORY;
96 
97     mutex_init(&bdev->lock);
98     event_init(&bdev->io_event, false, EVENT_FLAG_AUTOUNSIGNAL);
99 
100     bdev->dev = dev;
101     dev->priv = bdev;
102 
103     bdev->blk_req = memalign(sizeof(struct virtio_blk_req), sizeof(struct virtio_blk_req));
104 #if WITH_KERNEL_VM
105     bdev->blk_req_phys = vaddr_to_paddr(bdev->blk_req);
106 #else
107     bdev->blk_req_phys = (uint64_t)(uintptr_t)bdev->blk_req;
108 #endif
109     LTRACEF("blk_req structure at %p (0x%lx phys)\n", bdev->blk_req, bdev->blk_req_phys);
110 
111 #if WITH_KERNEL_VM
112     bdev->blk_response_phys = vaddr_to_paddr(&bdev->blk_response);
113 #else
114     bdev->blk_response_phys = (uint64_t)(uintptr_t)&bdev->blk_response;
115 #endif
116 
117     /* make sure the device is reset */
118     virtio_reset_device(dev);
119 
120     volatile struct virtio_blk_config *config = (struct virtio_blk_config *)dev->config_ptr;
121 
122     LTRACEF("capacity 0x%llx\n", config->capacity);
123     LTRACEF("size_max 0x%x\n", config->size_max);
124     LTRACEF("seg_max  0x%x\n", config->seg_max);
125     LTRACEF("blk_size 0x%x\n", config->blk_size);
126 
127     /* ack and set the driver status bit */
128     virtio_status_acknowledge_driver(dev);
129 
130     // XXX check features bits and ack/nak them
131 
132     /* allocate a virtio ring */
133     virtio_alloc_ring(dev, 0, 256);
134 
135     /* set our irq handler */
136     dev->irq_driver_callback = &virtio_block_irq_driver_callback;
137 
138     /* set DRIVER_OK */
139     virtio_status_driver_ok(dev);
140 
141     /* construct the block device */
142     static uint8_t found_index = 0;
143     char buf[16];
144     snprintf(buf, sizeof(buf), "virtio%u", found_index++);
145     bio_initialize_bdev(&bdev->bdev, buf,
146                         config->blk_size, config->capacity,
147                         0, NULL, BIO_FLAGS_NONE);
148 
149     /* override our block device hooks */
150     bdev->bdev.read_block = &virtio_bdev_read_block;
151     bdev->bdev.write_block = &virtio_bdev_write_block;
152 
153     bio_register_device(&bdev->bdev);
154 
155     printf("found virtio block device of size %lld\n", config->capacity * config->blk_size);
156 
157     return NO_ERROR;
158 }
159 
virtio_block_irq_driver_callback(struct virtio_device * dev,uint ring,const struct vring_used_elem * e)160 static enum handler_return virtio_block_irq_driver_callback(struct virtio_device *dev, uint ring, const struct vring_used_elem *e) {
161     struct virtio_block_dev *bdev = (struct virtio_block_dev *)dev->priv;
162 
163     LTRACEF("dev %p, ring %u, e %p, id %u, len %u\n", dev, ring, e, e->id, e->len);
164 
165     /* parse our descriptor chain, add back to the free queue */
166     uint16_t i = e->id;
167     for (;;) {
168         int next;
169         struct vring_desc *desc = virtio_desc_index_to_desc(dev, ring, i);
170 
171         //virtio_dump_desc(desc);
172 
173         if (desc->flags & VRING_DESC_F_NEXT) {
174             next = desc->next;
175         } else {
176             /* end of chain */
177             next = -1;
178         }
179 
180         virtio_free_desc(dev, ring, i);
181 
182         if (next < 0)
183             break;
184         i = next;
185     }
186 
187     /* signal our event */
188     event_signal(&bdev->io_event, false);
189 
190     return INT_RESCHEDULE;
191 }
192 
virtio_block_read_write(struct virtio_device * dev,void * buf,off_t offset,size_t len,bool write)193 ssize_t virtio_block_read_write(struct virtio_device *dev, void *buf, off_t offset, size_t len, bool write) {
194     struct virtio_block_dev *bdev = (struct virtio_block_dev *)dev->priv;
195 
196     uint16_t i;
197     struct vring_desc *desc;
198 
199     LTRACEF("dev %p, buf %p, offset 0x%llx, len %zu\n", dev, buf, offset, len);
200 
201     mutex_acquire(&bdev->lock);
202 
203     /* set up the request */
204     bdev->blk_req->type = write ? VIRTIO_BLK_T_OUT : VIRTIO_BLK_T_IN;
205     bdev->blk_req->ioprio = 0;
206     bdev->blk_req->sector = offset / 512;
207     LTRACEF("blk_req type %u ioprio %u sector %llu\n",
208             bdev->blk_req->type, bdev->blk_req->ioprio, bdev->blk_req->sector);
209 
210     /* put together a transfer */
211     desc = virtio_alloc_desc_chain(dev, 0, 3, &i);
212     LTRACEF("after alloc chain desc %p, i %u\n", desc, i);
213 
214     // XXX not cache safe.
215     // At the moment only tested on arm qemu, which doesn't emulate cache.
216 
217     /* set up the descriptor pointing to the head */
218     desc->addr = bdev->blk_req_phys;
219     desc->len = sizeof(struct virtio_blk_req);
220     desc->flags |= VRING_DESC_F_NEXT;
221 
222     /* set up the descriptor pointing to the buffer */
223     desc = virtio_desc_index_to_desc(dev, 0, desc->next);
224 #if WITH_KERNEL_VM
225     /* translate the first buffer */
226     vaddr_t va = (vaddr_t)buf;
227     paddr_t pa = vaddr_to_paddr((void *)va);
228     desc->addr = (uint64_t)pa;
229     /* desc->len is filled in below */
230 #else
231     desc->addr = (uint64_t)(uintptr_t)buf;
232     desc->len = len;
233 #endif
234     desc->flags |= write ? 0 : VRING_DESC_F_WRITE; /* mark buffer as write-only if its a block read */
235     desc->flags |= VRING_DESC_F_NEXT;
236 
237 #if WITH_KERNEL_VM
238     /* see if we need to add more descriptors due to scatter gather */
239     paddr_t next_pa = PAGE_ALIGN(pa + 1);
240     desc->len = MIN(next_pa - pa, len);
241     LTRACEF("first descriptor va 0x%lx desc->addr 0x%llx desc->len %u\n", va, desc->addr, desc->len);
242     len -= desc->len;
243     while (len > 0) {
244         /* amount of source buffer handled by this iteration of the loop */
245         size_t len_tohandle = MIN(len, PAGE_SIZE);
246 
247         /* translate the next page in the buffer */
248         va = PAGE_ALIGN(va + 1);
249         pa = vaddr_to_paddr((void *)va);
250         LTRACEF("va now 0x%lx, pa 0x%lx, next_pa 0x%lx, remaining len %zu\n", va, pa, next_pa, len);
251 
252         /* is the new translated physical address contiguous to the last one? */
253         if (next_pa == pa) {
254             LTRACEF("extending last one by %zu bytes\n", len_tohandle);
255             desc->len += len_tohandle;
256         } else {
257             uint16_t next_i = virtio_alloc_desc(dev, 0);
258             struct vring_desc *next_desc = virtio_desc_index_to_desc(dev, 0, next_i);
259             DEBUG_ASSERT(next_desc);
260 
261             LTRACEF("doesn't extend, need new desc, allocated desc %i (%p)\n", next_i, next_desc);
262 
263             /* fill this descriptor in and put it after the last one but before the response descriptor */
264             next_desc->addr = (uint64_t)pa;
265             next_desc->len = len_tohandle;
266             next_desc->flags = write ? 0 : VRING_DESC_F_WRITE; /* mark buffer as write-only if its a block read */
267             next_desc->flags |= VRING_DESC_F_NEXT;
268             next_desc->next = desc->next;
269             desc->next = next_i;
270 
271             desc = next_desc;
272         }
273         len -= len_tohandle;
274         next_pa += PAGE_SIZE;
275     }
276 #endif
277 
278     /* set up the descriptor pointing to the response */
279     desc = virtio_desc_index_to_desc(dev, 0, desc->next);
280     desc->addr = bdev->blk_response_phys;
281     desc->len = 1;
282     desc->flags = VRING_DESC_F_WRITE;
283 
284     /* submit the transfer */
285     virtio_submit_chain(dev, 0, i);
286 
287     /* kick it off */
288     virtio_kick(dev, 0);
289 
290     /* wait for the transfer to complete */
291     event_wait(&bdev->io_event);
292 
293     LTRACEF("status 0x%hhx\n", bdev->blk_response);
294 
295     mutex_release(&bdev->lock);
296 
297     return len;
298 }
299 
virtio_bdev_read_block(struct bdev * bdev,void * buf,bnum_t block,uint count)300 static ssize_t virtio_bdev_read_block(struct bdev *bdev, void *buf, bnum_t block, uint count) {
301     struct virtio_block_dev *dev = containerof(bdev, struct virtio_block_dev, bdev);
302 
303     LTRACEF("dev %p, buf %p, block 0x%x, count %u\n", bdev, buf, block, count);
304 
305     if (virtio_block_read_write(dev->dev, buf, (off_t)block * dev->bdev.block_size,
306                                 count * dev->bdev.block_size, false) == 0) {
307         return count * dev->bdev.block_size;
308     } else {
309         return ERR_IO;
310     }
311 }
312 
virtio_bdev_write_block(struct bdev * bdev,const void * buf,bnum_t block,uint count)313 static ssize_t virtio_bdev_write_block(struct bdev *bdev, const void *buf, bnum_t block, uint count) {
314     struct virtio_block_dev *dev = containerof(bdev, struct virtio_block_dev, bdev);
315 
316     LTRACEF("dev %p, buf %p, block 0x%x, count %u\n", bdev, buf, block, count);
317 
318     if (virtio_block_read_write(dev->dev, (void *)buf, (off_t)block * dev->bdev.block_size,
319                                 count * dev->bdev.block_size, true) == 0) {
320         return count * dev->bdev.block_size;
321     } else {
322         return ERR_IO;
323     }
324 }
325 
326