1 /*
2 * Copyright (c) 2008, XenSource Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of XenSource Inc. nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
20 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * A note on write transactions:
29 * Writes that require updating the BAT or bitmaps cannot be signaled
30 * as complete until all updates have reached disk. Transactions are
31 * used to ensure proper ordering in these cases. The two types of
32 * transactions are as follows:
33 * - Bitmap updates only: data writes that require updates to the same
34 * bitmap are grouped in a transaction. Only after all data writes
35 * in a transaction complete does the bitmap write commence. Only
36 * after the bitmap write finishes are the data writes signalled as
37 * complete.
38 * - BAT and bitmap updates: data writes are grouped in transactions
39 * as above, but a special extra write is included in the transaction,
40 * which zeros out the newly allocated bitmap on disk. When the data
41 * writes and the zero-bitmap write complete, the BAT and bitmap writes
42 * are started in parallel. The transaction is completed only after both
43 * the BAT and bitmap writes successfully return.
44 */
45
46 #include <errno.h>
47 #include <fcntl.h>
48 #include <stdio.h>
49 #include <stdlib.h>
50 #include <unistd.h>
51 #include <sys/stat.h>
52 #include <sys/ioctl.h>
53 #include <string.h> /* for memset. */
54 #include <libaio.h>
55 #include <sys/mman.h>
56
57 #include "libvhd.h"
58 #include "tapdisk.h"
59 #include "tapdisk-driver.h"
60 #include "tapdisk-interface.h"
61 #include "tapdisk-disktype.h"
62
63 unsigned int SPB;
64
65 #define DEBUGGING 2
66 #define ASSERTING 1
67 #define MICROSOFT_COMPAT
68
69 #define VHD_BATMAP_MAX_RETRIES 10
70
71 #define __TRACE(s) \
72 do { \
73 DBG(TLOG_DBG, "%s: QUEUED: %" PRIu64 ", COMPLETED: %" \
74 PRIu64", RETURNED: %" PRIu64 ", DATA_ALLOCATED: " \
75 "%lu, BBLK: 0x%04x\n", \
76 s->vhd.file, s->queued, s->completed, s->returned, \
77 VHD_REQS_DATA - s->vreq_free_count, \
78 s->bat.pbw_blk); \
79 } while(0)
80
81 #define __ASSERT(_p) \
82 if (!(_p)) { \
83 DPRINTF("%s:%d: FAILED ASSERTION: '%s'\n", \
84 __FILE__, __LINE__, #_p); \
85 DBG(TLOG_WARN, "%s:%d: FAILED ASSERTION: '%s'\n", \
86 __FILE__, __LINE__, #_p); \
87 tlog_flush(); \
88 abort(); \
89 }
90
91 #if (DEBUGGING == 1)
92 #define DBG(level, _f, _a...) DPRINTF(_f, ##_a)
93 #define ERR(err, _f, _a...) DPRINTF("ERROR: %d: " _f, err, ##_a)
94 #define TRACE(s) ((void)0)
95 #elif (DEBUGGING == 2)
96 #define DBG(level, _f, _a...) tlog_write(level, _f, ##_a)
97 #define ERR(err, _f, _a...) tlog_error(err, _f, ##_a)
98 #define TRACE(s) __TRACE(s)
99 #else
100 #define DBG(level, _f, _a...) ((void)0)
101 #define ERR(err, _f, _a...) ((void)0)
102 #define TRACE(s) ((void)0)
103 #endif
104
105 #if (ASSERTING == 1)
106 #define ASSERT(_p) __ASSERT(_p)
107 #else
108 #define ASSERT(_p) ((void)0)
109 #endif
110
111 /******VHD DEFINES******/
112 #define VHD_CACHE_SIZE 32
113
114 #define VHD_REQS_DATA TAPDISK_DATA_REQUESTS
115 #define VHD_REQS_META (VHD_CACHE_SIZE + 2)
116 #define VHD_REQS_TOTAL (VHD_REQS_DATA + VHD_REQS_META)
117
118 #define VHD_OP_BAT_WRITE 0
119 #define VHD_OP_DATA_READ 1
120 #define VHD_OP_DATA_WRITE 2
121 #define VHD_OP_BITMAP_READ 3
122 #define VHD_OP_BITMAP_WRITE 4
123 #define VHD_OP_ZERO_BM_WRITE 5
124
125 #define VHD_BM_BAT_LOCKED 0
126 #define VHD_BM_BAT_CLEAR 1
127 #define VHD_BM_BIT_CLEAR 2
128 #define VHD_BM_BIT_SET 3
129 #define VHD_BM_NOT_CACHED 4
130 #define VHD_BM_READ_PENDING 5
131
132 #define VHD_FLAG_OPEN_RDONLY 1
133 #define VHD_FLAG_OPEN_NO_CACHE 2
134 #define VHD_FLAG_OPEN_QUIET 4
135 #define VHD_FLAG_OPEN_STRICT 8
136 #define VHD_FLAG_OPEN_QUERY 16
137 #define VHD_FLAG_OPEN_PREALLOCATE 32
138
139 #define VHD_FLAG_BAT_LOCKED 1
140 #define VHD_FLAG_BAT_WRITE_STARTED 2
141
142 #define VHD_FLAG_BM_UPDATE_BAT 1
143 #define VHD_FLAG_BM_WRITE_PENDING 2
144 #define VHD_FLAG_BM_READ_PENDING 4
145 #define VHD_FLAG_BM_LOCKED 8
146
147 #define VHD_FLAG_REQ_UPDATE_BAT 1
148 #define VHD_FLAG_REQ_UPDATE_BITMAP 2
149 #define VHD_FLAG_REQ_QUEUED 4
150 #define VHD_FLAG_REQ_FINISHED 8
151
152 #define VHD_FLAG_TX_LIVE 1
153 #define VHD_FLAG_TX_UPDATE_BAT 2
154
155 typedef uint8_t vhd_flag_t;
156
157 struct vhd_state;
158 struct vhd_request;
159
160 struct vhd_req_list {
161 struct vhd_request *head;
162 struct vhd_request *tail;
163 };
164
165 struct vhd_transaction {
166 int error;
167 int closed;
168 int started;
169 int finished;
170 vhd_flag_t status;
171 struct vhd_req_list requests;
172 };
173
174 struct vhd_request {
175 int error;
176 uint8_t op;
177 vhd_flag_t flags;
178 td_request_t treq;
179 struct tiocb tiocb;
180 struct vhd_state *state;
181 struct vhd_request *next;
182 struct vhd_transaction *tx;
183 };
184
185 struct vhd_bat_state {
186 vhd_bat_t bat;
187 vhd_batmap_t batmap;
188 vhd_flag_t status;
189 uint32_t pbw_blk; /* blk num of pending write */
190 uint64_t pbw_offset; /* file offset of same */
191 struct vhd_request req; /* for writing bat table */
192 struct vhd_request zero_req; /* for initializing bitmaps */
193 char *bat_buf;
194 };
195
196 struct vhd_bitmap {
197 u32 blk;
198 u64 seqno; /* lru sequence number */
199 vhd_flag_t status;
200
201 char *map; /* map should only be modified
202 * in finish_bitmap_write */
203 char *shadow; /* in-memory bitmap changes are
204 * made to shadow and copied to
205 * map only after having been
206 * flushed to disk */
207 struct vhd_transaction tx; /* transaction data structure
208 * encapsulating data, bitmap,
209 * and bat writes */
210 struct vhd_req_list queue; /* data writes waiting for next
211 * transaction */
212 struct vhd_req_list waiting; /* pending requests that cannot
213 * be serviced until this bitmap
214 * is read from disk */
215 struct vhd_request req;
216 };
217
218 struct vhd_state {
219 vhd_flag_t flags;
220
221 /* VHD stuff */
222 vhd_context_t vhd;
223 u32 spp; /* sectors per page */
224 u32 spb; /* sectors per block */
225 u64 next_db; /* pointer to the next
226 * (unallocated) datablock */
227
228 struct vhd_bat_state bat;
229
230 u64 bm_lru; /* lru sequence number */
231 u32 bm_secs; /* size of bitmap, in sectors */
232 struct vhd_bitmap *bitmap[VHD_CACHE_SIZE];
233
234 int bm_free_count;
235 struct vhd_bitmap *bitmap_free[VHD_CACHE_SIZE];
236 struct vhd_bitmap bitmap_list[VHD_CACHE_SIZE];
237
238 int vreq_free_count;
239 struct vhd_request *vreq_free[VHD_REQS_DATA];
240 struct vhd_request vreq_list[VHD_REQS_DATA];
241
242 td_driver_t *driver;
243
244 uint64_t queued;
245 uint64_t completed;
246 uint64_t returned;
247 uint64_t reads;
248 uint64_t read_size;
249 uint64_t writes;
250 uint64_t write_size;
251 };
252
253 #define test_vhd_flag(word, flag) ((word) & (flag))
254 #define set_vhd_flag(word, flag) ((word) |= (flag))
255 #define clear_vhd_flag(word, flag) ((word) &= ~(flag))
256
257 #define bat_entry(s, blk) ((s)->bat.bat.bat[(blk)])
258
259 static void vhd_complete(void *, struct tiocb *, int);
260 static void finish_data_transaction(struct vhd_state *, struct vhd_bitmap *);
261
262 static struct vhd_state *_vhd_master;
263 static unsigned long _vhd_zsize;
264 static char *_vhd_zeros;
265
266 static int
vhd_initialize(struct vhd_state * s)267 vhd_initialize(struct vhd_state *s)
268 {
269 if (_vhd_zeros)
270 return 0;
271
272 _vhd_zsize = 2 * getpagesize();
273 if (test_vhd_flag(s->flags, VHD_FLAG_OPEN_PREALLOCATE))
274 _vhd_zsize += VHD_BLOCK_SIZE;
275
276 _vhd_zeros = mmap(0, _vhd_zsize, PROT_READ,
277 MAP_SHARED | MAP_ANON, -1, 0);
278 if (_vhd_zeros == MAP_FAILED) {
279 EPRINTF("vhd_initialize failed: %d\n", -errno);
280 _vhd_zeros = NULL;
281 _vhd_zsize = 0;
282 return -errno;
283 }
284
285 _vhd_master = s;
286 return 0;
287 }
288
289 static void
vhd_free(struct vhd_state * s)290 vhd_free(struct vhd_state *s)
291 {
292 if (_vhd_master != s || !_vhd_zeros)
293 return;
294
295 munmap(_vhd_zeros, _vhd_zsize);
296 _vhd_zsize = 0;
297 _vhd_zeros = NULL;
298 _vhd_master = NULL;
299 }
300
301 static char *
_get_vhd_zeros(const char * func,unsigned long size)302 _get_vhd_zeros(const char *func, unsigned long size)
303 {
304 if (!_vhd_zeros || _vhd_zsize < size) {
305 EPRINTF("invalid zero request from %s: %lu, %lu, %p\n",
306 func, size, _vhd_zsize, _vhd_zeros);
307 ASSERT(0);
308 }
309
310 return _vhd_zeros;
311 }
312
313 #define vhd_zeros(size) _get_vhd_zeros(__func__, size)
314
315 static inline void
set_batmap(struct vhd_state * s,uint32_t blk)316 set_batmap(struct vhd_state *s, uint32_t blk)
317 {
318 if (s->bat.batmap.map) {
319 vhd_batmap_set(&s->vhd, &s->bat.batmap, blk);
320 DBG(TLOG_DBG, "block 0x%x completely full\n", blk);
321 }
322 }
323
324 static inline int
test_batmap(struct vhd_state * s,uint32_t blk)325 test_batmap(struct vhd_state *s, uint32_t blk)
326 {
327 if (!s->bat.batmap.map)
328 return 0;
329 return vhd_batmap_test(&s->vhd, &s->bat.batmap, blk);
330 }
331
332 static int
vhd_kill_footer(struct vhd_state * s)333 vhd_kill_footer(struct vhd_state *s)
334 {
335 int err;
336 off_t end;
337 char *zeros;
338
339 if (s->vhd.footer.type == HD_TYPE_FIXED)
340 return 0;
341
342 err = posix_memalign((void **)&zeros, 512, 512);
343 if (err)
344 return -err;
345
346 err = 1;
347 memset(zeros, 0xc7c7c7c7, 512);
348
349 if ((end = lseek(s->vhd.fd, 0, SEEK_END)) == -1)
350 goto fail;
351
352 if (lseek(s->vhd.fd, (end - 512), SEEK_SET) == -1)
353 goto fail;
354
355 if (write(s->vhd.fd, zeros, 512) != 512)
356 goto fail;
357
358 err = 0;
359
360 fail:
361 free(zeros);
362 if (err)
363 return (errno ? -errno : -EIO);
364 return 0;
365 }
366
367 static inline int
find_next_free_block(struct vhd_state * s)368 find_next_free_block(struct vhd_state *s)
369 {
370 int err;
371 off_t eom;
372 uint32_t i, entry;
373
374 err = vhd_end_of_headers(&s->vhd, &eom);
375 if (err)
376 return err;
377
378 s->next_db = secs_round_up(eom);
379
380 for (i = 0; i < s->bat.bat.entries; i++) {
381 entry = bat_entry(s, i);
382 if (entry != DD_BLK_UNUSED && entry >= s->next_db)
383 s->next_db = entry + s->spb + s->bm_secs;
384 }
385
386 return 0;
387 }
388
389 static void
vhd_free_bat(struct vhd_state * s)390 vhd_free_bat(struct vhd_state *s)
391 {
392 free(s->bat.bat.bat);
393 free(s->bat.batmap.map);
394 free(s->bat.bat_buf);
395 memset(&s->bat, 0, sizeof(struct vhd_bat));
396 }
397
398 static int
vhd_initialize_bat(struct vhd_state * s)399 vhd_initialize_bat(struct vhd_state *s)
400 {
401 int err, psize, batmap_required, i;
402
403 memset(&s->bat, 0, sizeof(struct vhd_bat));
404
405 psize = getpagesize();
406
407 err = vhd_read_bat(&s->vhd, &s->bat.bat);
408 if (err) {
409 EPRINTF("%s: reading bat: %d\n", s->vhd.file, err);
410 return err;
411 }
412
413 batmap_required = 1;
414 if (test_vhd_flag(s->flags, VHD_FLAG_OPEN_RDONLY)) {
415 batmap_required = 0;
416 } else {
417 err = find_next_free_block(s);
418 if (err)
419 goto fail;
420 }
421
422 if (vhd_has_batmap(&s->vhd)) {
423 for (i = 0; i < VHD_BATMAP_MAX_RETRIES; i++) {
424 err = vhd_read_batmap(&s->vhd, &s->bat.batmap);
425 if (err) {
426 EPRINTF("%s: reading batmap: %d\n",
427 s->vhd.file, err);
428 if (batmap_required)
429 goto fail;
430 } else {
431 break;
432 }
433 }
434 if (err)
435 EPRINTF("%s: ignoring non-critical batmap error\n",
436 s->vhd.file);
437 }
438
439 err = posix_memalign((void **)&s->bat.bat_buf,
440 VHD_SECTOR_SIZE, VHD_SECTOR_SIZE);
441 if (err) {
442 s->bat.bat_buf = NULL;
443 goto fail;
444 }
445
446 return 0;
447
448 fail:
449 vhd_free_bat(s);
450 return err;
451 }
452
453 static void
vhd_free_bitmap_cache(struct vhd_state * s)454 vhd_free_bitmap_cache(struct vhd_state *s)
455 {
456 int i;
457 struct vhd_bitmap *bm;
458
459 for (i = 0; i < VHD_CACHE_SIZE; i++) {
460 bm = s->bitmap_list + i;
461 free(bm->map);
462 free(bm->shadow);
463 s->bitmap_free[i] = NULL;
464 }
465
466 memset(s->bitmap_list, 0, sizeof(struct vhd_bitmap) * VHD_CACHE_SIZE);
467 }
468
469 static int
vhd_initialize_bitmap_cache(struct vhd_state * s)470 vhd_initialize_bitmap_cache(struct vhd_state *s)
471 {
472 int i, err, map_size;
473 struct vhd_bitmap *bm;
474
475 memset(s->bitmap_list, 0, sizeof(struct vhd_bitmap) * VHD_CACHE_SIZE);
476
477 s->bm_lru = 0;
478 map_size = vhd_sectors_to_bytes(s->bm_secs);
479 s->bm_free_count = VHD_CACHE_SIZE;
480
481 for (i = 0; i < VHD_CACHE_SIZE; i++) {
482 bm = s->bitmap_list + i;
483
484 err = posix_memalign((void **)&bm->map, 512, map_size);
485 if (err) {
486 bm->map = NULL;
487 goto fail;
488 }
489
490 err = posix_memalign((void **)&bm->shadow, 512, map_size);
491 if (err) {
492 bm->shadow = NULL;
493 goto fail;
494 }
495
496 memset(bm->map, 0, map_size);
497 memset(bm->shadow, 0, map_size);
498 s->bitmap_free[i] = bm;
499 }
500
501 return 0;
502
503 fail:
504 vhd_free_bitmap_cache(s);
505 return err;
506 }
507
508 static int
vhd_initialize_dynamic_disk(struct vhd_state * s)509 vhd_initialize_dynamic_disk(struct vhd_state *s)
510 {
511 int err;
512
513 err = vhd_get_header(&s->vhd);
514 if (err) {
515 if (!test_vhd_flag(s->flags, VHD_FLAG_OPEN_QUIET))
516 EPRINTF("Error reading VHD DD header.\n");
517 return err;
518 }
519
520 if (s->vhd.header.hdr_ver != 0x00010000) {
521 EPRINTF("unsupported header version! (0x%x)\n",
522 s->vhd.header.hdr_ver);
523 return -EINVAL;
524 }
525
526 s->spp = getpagesize() >> VHD_SECTOR_SHIFT;
527 s->spb = s->vhd.header.block_size >> VHD_SECTOR_SHIFT;
528 s->bm_secs = secs_round_up_no_zero(s->spb >> 3);
529
530 if (test_vhd_flag(s->flags, VHD_FLAG_OPEN_NO_CACHE))
531 return 0;
532
533 err = vhd_initialize_bat(s);
534 if (err)
535 return err;
536
537 err = vhd_initialize_bitmap_cache(s);
538 if (err) {
539 vhd_free_bat(s);
540 return err;
541 }
542
543 return 0;
544 }
545
546 static int
vhd_check_version(struct vhd_state * s)547 vhd_check_version(struct vhd_state *s)
548 {
549 if (strncmp(s->vhd.footer.crtr_app, "tap", 3))
550 return 0;
551
552 if (s->vhd.footer.crtr_ver > VHD_CURRENT_VERSION) {
553 if (!test_vhd_flag(s->flags, VHD_FLAG_OPEN_QUIET))
554 EPRINTF("WARNING: %s vhd creator version 0x%08x, "
555 "but only versions up to 0x%08x are "
556 "supported for IO\n", s->vhd.file,
557 s->vhd.footer.crtr_ver, VHD_CURRENT_VERSION);
558
559 return -EINVAL;
560 }
561
562 return 0;
563 }
564
565 static void
vhd_log_open(struct vhd_state * s)566 vhd_log_open(struct vhd_state *s)
567 {
568 char buf[5];
569 uint32_t i, allocated, full;
570
571 if (test_vhd_flag(s->flags, VHD_FLAG_OPEN_QUIET))
572 return;
573
574 snprintf(buf, sizeof(buf), "%s", s->vhd.footer.crtr_app);
575 if (!vhd_type_dynamic(&s->vhd)) {
576 DPRINTF("%s version: %s 0x%08x\n",
577 s->vhd.file, buf, s->vhd.footer.crtr_ver);
578 return;
579 }
580
581 allocated = 0;
582 full = 0;
583
584 for (i = 0; i < s->bat.bat.entries; i++) {
585 if (bat_entry(s, i) != DD_BLK_UNUSED)
586 allocated++;
587 if (test_batmap(s, i))
588 full++;
589 }
590
591 DPRINTF("%s version: %s 0x%08x, b: %u, a: %u, f: %u, n: %"PRIu64"\n",
592 s->vhd.file, buf, s->vhd.footer.crtr_ver, s->bat.bat.entries,
593 allocated, full, s->next_db);
594 }
595
596 static int
__vhd_open(td_driver_t * driver,const char * name,vhd_flag_t flags)597 __vhd_open(td_driver_t *driver, const char *name, vhd_flag_t flags)
598 {
599 int i, o_flags, err;
600 struct vhd_state *s;
601
602 DBG(TLOG_INFO, "vhd_open: %s\n", name);
603 if (test_vhd_flag(flags, VHD_FLAG_OPEN_STRICT))
604 libvhd_set_log_level(1);
605
606 s = (struct vhd_state *)driver->data;
607 memset(s, 0, sizeof(struct vhd_state));
608
609 s->flags = flags;
610 s->driver = driver;
611
612 err = vhd_initialize(s);
613 if (err)
614 return err;
615
616 o_flags = ((test_vhd_flag(flags, VHD_FLAG_OPEN_RDONLY)) ?
617 VHD_OPEN_RDONLY : VHD_OPEN_RDWR);
618
619 err = vhd_open(&s->vhd, name, o_flags);
620 if (err) {
621 libvhd_set_log_level(1);
622 err = vhd_open(&s->vhd, name, o_flags);
623 if (err) {
624 EPRINTF("Unable to open [%s] (%d)!\n", name, err);
625 return err;
626 }
627 }
628
629 err = vhd_check_version(s);
630 if (err)
631 goto fail;
632
633 s->spb = s->spp = 1;
634
635 if (vhd_type_dynamic(&s->vhd)) {
636 err = vhd_initialize_dynamic_disk(s);
637 if (err)
638 goto fail;
639 }
640
641 vhd_log_open(s);
642
643 SPB = s->spb;
644
645 s->vreq_free_count = VHD_REQS_DATA;
646 for (i = 0; i < VHD_REQS_DATA; i++)
647 s->vreq_free[i] = s->vreq_list + i;
648
649 driver->info.size = s->vhd.footer.curr_size >> VHD_SECTOR_SHIFT;
650 driver->info.sector_size = VHD_SECTOR_SIZE;
651 driver->info.info = 0;
652
653 DBG(TLOG_INFO, "vhd_open: done (sz:%"PRIu64", sct:%"PRIu64
654 ", inf:%u)\n",
655 driver->info.size, driver->info.sector_size, driver->info.info);
656
657 if (test_vhd_flag(flags, VHD_FLAG_OPEN_STRICT) &&
658 !test_vhd_flag(flags, VHD_FLAG_OPEN_RDONLY)) {
659 err = vhd_kill_footer(s);
660 if (err) {
661 DPRINTF("ERROR killing footer: %d\n", err);
662 goto fail;
663 }
664 s->writes++;
665 }
666
667 return 0;
668
669 fail:
670 vhd_free_bat(s);
671 vhd_free_bitmap_cache(s);
672 vhd_close(&s->vhd);
673 vhd_free(s);
674 return err;
675 }
676
677 static int
_vhd_open(td_driver_t * driver,const char * name,td_flag_t flags)678 _vhd_open(td_driver_t *driver, const char *name, td_flag_t flags)
679 {
680 vhd_flag_t vhd_flags = 0;
681
682 if (flags & TD_OPEN_RDONLY)
683 vhd_flags |= VHD_FLAG_OPEN_RDONLY;
684 if (flags & TD_OPEN_QUIET)
685 vhd_flags |= VHD_FLAG_OPEN_QUIET;
686 if (flags & TD_OPEN_STRICT)
687 vhd_flags |= VHD_FLAG_OPEN_STRICT;
688 if (flags & TD_OPEN_QUERY)
689 vhd_flags |= (VHD_FLAG_OPEN_QUERY |
690 VHD_FLAG_OPEN_QUIET |
691 VHD_FLAG_OPEN_RDONLY |
692 VHD_FLAG_OPEN_NO_CACHE);
693
694 /* pre-allocate for all but NFS and LVM storage */
695 if (driver->storage != TAPDISK_STORAGE_TYPE_NFS &&
696 driver->storage != TAPDISK_STORAGE_TYPE_LVM)
697 vhd_flags |= VHD_FLAG_OPEN_PREALLOCATE;
698
699 return __vhd_open(driver, name, vhd_flags);
700 }
701
702 static void
vhd_log_close(struct vhd_state * s)703 vhd_log_close(struct vhd_state *s)
704 {
705 uint32_t i, allocated, full;
706
707 if (test_vhd_flag(s->flags, VHD_FLAG_OPEN_QUIET))
708 return;
709
710 allocated = 0;
711 full = 0;
712
713 for (i = 0; i < s->bat.bat.entries; i++) {
714 if (bat_entry(s, i) != DD_BLK_UNUSED)
715 allocated++;
716 if (test_batmap(s, i))
717 full++;
718 }
719
720 DPRINTF("%s: b: %u, a: %u, f: %u, n: %"PRIu64"\n",
721 s->vhd.file, s->bat.bat.entries, allocated, full, s->next_db);
722 }
723
724 static int
_vhd_close(td_driver_t * driver)725 _vhd_close(td_driver_t *driver)
726 {
727 int err;
728 struct vhd_state *s;
729 struct vhd_bitmap *bm;
730
731 DBG(TLOG_WARN, "vhd_close\n");
732 s = (struct vhd_state *)driver->data;
733
734 /* don't write footer if tapdisk is read-only */
735 if (test_vhd_flag(s->flags, VHD_FLAG_OPEN_RDONLY))
736 goto free;
737
738 /*
739 * write footer if:
740 * - we killed it on open (opened with strict)
741 * - we've written data since opening
742 */
743 if (test_vhd_flag(s->flags, VHD_FLAG_OPEN_STRICT) || s->writes) {
744 memcpy(&s->vhd.bat, &s->bat.bat, sizeof(vhd_bat_t));
745 err = vhd_write_footer(&s->vhd, &s->vhd.footer);
746 memset(&s->vhd.bat, 0, sizeof(vhd_bat_t));
747
748 if (err)
749 EPRINTF("writing %s footer: %d\n", s->vhd.file, err);
750
751 if (!vhd_has_batmap(&s->vhd))
752 goto free;
753
754 err = vhd_write_batmap(&s->vhd, &s->bat.batmap);
755 if (err)
756 EPRINTF("writing %s batmap: %d\n", s->vhd.file, err);
757 }
758
759 free:
760 vhd_log_close(s);
761 vhd_free_bat(s);
762 vhd_free_bitmap_cache(s);
763 vhd_close(&s->vhd);
764 vhd_free(s);
765
766 memset(s, 0, sizeof(struct vhd_state));
767
768 return 0;
769 }
770
771 int
vhd_validate_parent(td_driver_t * child_driver,td_driver_t * parent_driver,td_flag_t flags)772 vhd_validate_parent(td_driver_t *child_driver,
773 td_driver_t *parent_driver, td_flag_t flags)
774 {
775 uint32_t status;
776 struct stat stats;
777 struct vhd_state *child = (struct vhd_state *)child_driver->data;
778 struct vhd_state *parent;
779
780 if (parent_driver->type != DISK_TYPE_VHD) {
781 if (child_driver->type != DISK_TYPE_VHD)
782 return -EINVAL;
783 if (child->vhd.footer.type != HD_TYPE_DIFF)
784 return -EINVAL;
785 if (!vhd_parent_raw(&child->vhd))
786 return -EINVAL;
787 return 0;
788 }
789
790 parent = (struct vhd_state *)parent_driver->data;
791
792 /*
793 * This check removed because of cases like:
794 * - parent VHD marked as 'hidden'
795 * - parent VHD modified during coalesce
796 */
797 /*
798 if (stat(parent->vhd.file, &stats)) {
799 DPRINTF("ERROR stating parent file %s\n", parent->vhd.file);
800 return -errno;
801 }
802
803 if (child->hdr.prt_ts != vhd_time(stats.st_mtime)) {
804 DPRINTF("ERROR: parent file has been modified since "
805 "snapshot. Child image no longer valid.\n");
806 return -EINVAL;
807 }
808 */
809
810 if (vhd_uuid_compare(&child->vhd.header.prt_uuid, &parent->vhd.footer.uuid)) {
811 DPRINTF("ERROR: %s: %s, %s: parent uuid has changed since "
812 "snapshot. Child image no longer valid.\n",
813 __func__, child->vhd.file, parent->vhd.file);
814 return -EINVAL;
815 }
816
817 /* TODO: compare sizes */
818
819 return 0;
820 }
821
822 int
vhd_get_parent_id(td_driver_t * driver,td_disk_id_t * id)823 vhd_get_parent_id(td_driver_t *driver, td_disk_id_t *id)
824 {
825 int err;
826 char *parent;
827 struct vhd_state *s;
828
829 DBG(TLOG_DBG, "\n");
830 memset(id, 0, sizeof(td_disk_id_t));
831
832 s = (struct vhd_state *)driver->data;
833
834 if (s->vhd.footer.type != HD_TYPE_DIFF)
835 return TD_NO_PARENT;
836
837 err = vhd_parent_locator_get(&s->vhd, &parent);
838 if (err)
839 return err;
840
841 id->name = parent;
842 id->drivertype = DISK_TYPE_VHD;
843 if (vhd_parent_raw(&s->vhd)) {
844 DPRINTF("VHD: parent is raw\n");
845 id->drivertype = DISK_TYPE_AIO;
846 }
847 return 0;
848 }
849
850 static inline void
clear_req_list(struct vhd_req_list * list)851 clear_req_list(struct vhd_req_list *list)
852 {
853 list->head = list->tail = NULL;
854 }
855
856 static inline void
add_to_tail(struct vhd_req_list * list,struct vhd_request * e)857 add_to_tail(struct vhd_req_list *list, struct vhd_request *e)
858 {
859 if (!list->head)
860 list->head = list->tail = e;
861 else
862 list->tail = list->tail->next = e;
863 }
864
865 static inline int
remove_from_req_list(struct vhd_req_list * list,struct vhd_request * e)866 remove_from_req_list(struct vhd_req_list *list, struct vhd_request *e)
867 {
868 struct vhd_request *i = list->head;
869
870 if (list->head == e) {
871 if (list->tail == e)
872 clear_req_list(list);
873 else
874 list->head = list->head->next;
875 return 0;
876 }
877
878 while (i->next) {
879 if (i->next == e) {
880 if (list->tail == e) {
881 i->next = NULL;
882 list->tail = i;
883 } else
884 i->next = i->next->next;
885 return 0;
886 }
887 i = i->next;
888 }
889
890 return -EINVAL;
891 }
892
893 static inline void
init_vhd_request(struct vhd_state * s,struct vhd_request * req)894 init_vhd_request(struct vhd_state *s, struct vhd_request *req)
895 {
896 memset(req, 0, sizeof(struct vhd_request));
897 req->state = s;
898 }
899
900 static inline void
init_tx(struct vhd_transaction * tx)901 init_tx(struct vhd_transaction *tx)
902 {
903 memset(tx, 0, sizeof(struct vhd_transaction));
904 }
905
906 static inline void
add_to_transaction(struct vhd_transaction * tx,struct vhd_request * r)907 add_to_transaction(struct vhd_transaction *tx, struct vhd_request *r)
908 {
909 ASSERT(!tx->closed);
910
911 r->tx = tx;
912 tx->started++;
913 add_to_tail(&tx->requests, r);
914 set_vhd_flag(tx->status, VHD_FLAG_TX_LIVE);
915
916 DBG(TLOG_DBG, "blk: 0x%04"PRIx64", lsec: 0x%08"PRIx64", tx: %p, "
917 "started: %d, finished: %d, status: %u\n",
918 r->treq.sec / SPB, r->treq.sec, tx,
919 tx->started, tx->finished, tx->status);
920 }
921
922 static inline int
transaction_completed(struct vhd_transaction * tx)923 transaction_completed(struct vhd_transaction *tx)
924 {
925 return (tx->started == tx->finished);
926 }
927
928 static inline void
init_bat(struct vhd_state * s)929 init_bat(struct vhd_state *s)
930 {
931 s->bat.req.tx = NULL;
932 s->bat.req.next = NULL;
933 s->bat.req.error = 0;
934 s->bat.pbw_blk = 0;
935 s->bat.pbw_offset = 0;
936 s->bat.status = 0;
937 }
938
939 static inline void
lock_bat(struct vhd_state * s)940 lock_bat(struct vhd_state *s)
941 {
942 set_vhd_flag(s->bat.status, VHD_FLAG_BAT_LOCKED);
943 }
944
945 static inline void
unlock_bat(struct vhd_state * s)946 unlock_bat(struct vhd_state *s)
947 {
948 clear_vhd_flag(s->bat.status, VHD_FLAG_BAT_LOCKED);
949 }
950
951 static inline int
bat_locked(struct vhd_state * s)952 bat_locked(struct vhd_state *s)
953 {
954 return test_vhd_flag(s->bat.status, VHD_FLAG_BAT_LOCKED);
955 }
956
957 static inline void
init_vhd_bitmap(struct vhd_state * s,struct vhd_bitmap * bm)958 init_vhd_bitmap(struct vhd_state *s, struct vhd_bitmap *bm)
959 {
960 bm->blk = 0;
961 bm->seqno = 0;
962 bm->status = 0;
963 init_tx(&bm->tx);
964 clear_req_list(&bm->queue);
965 clear_req_list(&bm->waiting);
966 memset(bm->map, 0, vhd_sectors_to_bytes(s->bm_secs));
967 memset(bm->shadow, 0, vhd_sectors_to_bytes(s->bm_secs));
968 init_vhd_request(s, &bm->req);
969 }
970
971 static inline struct vhd_bitmap *
get_bitmap(struct vhd_state * s,uint32_t block)972 get_bitmap(struct vhd_state *s, uint32_t block)
973 {
974 int i;
975 struct vhd_bitmap *bm;
976
977 for (i = 0; i < VHD_CACHE_SIZE; i++) {
978 bm = s->bitmap[i];
979 if (bm && bm->blk == block)
980 return bm;
981 }
982
983 return NULL;
984 }
985
986 static inline void
lock_bitmap(struct vhd_bitmap * bm)987 lock_bitmap(struct vhd_bitmap *bm)
988 {
989 set_vhd_flag(bm->status, VHD_FLAG_BM_LOCKED);
990 }
991
992 static inline void
unlock_bitmap(struct vhd_bitmap * bm)993 unlock_bitmap(struct vhd_bitmap *bm)
994 {
995 clear_vhd_flag(bm->status, VHD_FLAG_BM_LOCKED);
996 }
997
998 static inline int
bitmap_locked(struct vhd_bitmap * bm)999 bitmap_locked(struct vhd_bitmap *bm)
1000 {
1001 return test_vhd_flag(bm->status, VHD_FLAG_BM_LOCKED);
1002 }
1003
1004 static inline int
bitmap_valid(struct vhd_bitmap * bm)1005 bitmap_valid(struct vhd_bitmap *bm)
1006 {
1007 return !test_vhd_flag(bm->status, VHD_FLAG_BM_READ_PENDING);
1008 }
1009
1010 static inline int
bitmap_in_use(struct vhd_bitmap * bm)1011 bitmap_in_use(struct vhd_bitmap *bm)
1012 {
1013 return (test_vhd_flag(bm->status, VHD_FLAG_BM_READ_PENDING) ||
1014 test_vhd_flag(bm->status, VHD_FLAG_BM_WRITE_PENDING) ||
1015 test_vhd_flag(bm->tx.status, VHD_FLAG_TX_UPDATE_BAT) ||
1016 bm->waiting.head || bm->tx.requests.head || bm->queue.head);
1017 }
1018
1019 static inline int
bitmap_full(struct vhd_state * s,struct vhd_bitmap * bm)1020 bitmap_full(struct vhd_state *s, struct vhd_bitmap *bm)
1021 {
1022 int i, n;
1023
1024 n = s->spb >> 3;
1025 for (i = 0; i < n; i++)
1026 if (bm->map[i] != (char)0xFF)
1027 return 0;
1028
1029 DBG(TLOG_DBG, "bitmap 0x%04x full\n", bm->blk);
1030 return 1;
1031 }
1032
1033 static struct vhd_bitmap *
remove_lru_bitmap(struct vhd_state * s)1034 remove_lru_bitmap(struct vhd_state *s)
1035 {
1036 int i, idx = 0;
1037 u64 seq = s->bm_lru;
1038 struct vhd_bitmap *bm, *lru = NULL;
1039
1040 for (i = 0; i < VHD_CACHE_SIZE; i++) {
1041 bm = s->bitmap[i];
1042 if (bm && bm->seqno < seq && !bitmap_locked(bm)) {
1043 idx = i;
1044 lru = bm;
1045 seq = lru->seqno;
1046 }
1047 }
1048
1049 if (lru) {
1050 s->bitmap[idx] = NULL;
1051 ASSERT(!bitmap_in_use(lru));
1052 }
1053
1054 return lru;
1055 }
1056
1057 static int
alloc_vhd_bitmap(struct vhd_state * s,struct vhd_bitmap ** bitmap,uint32_t blk)1058 alloc_vhd_bitmap(struct vhd_state *s, struct vhd_bitmap **bitmap, uint32_t blk)
1059 {
1060 struct vhd_bitmap *bm;
1061
1062 *bitmap = NULL;
1063
1064 if (s->bm_free_count > 0) {
1065 bm = s->bitmap_free[--s->bm_free_count];
1066 } else {
1067 bm = remove_lru_bitmap(s);
1068 if (!bm)
1069 return -EBUSY;
1070 }
1071
1072 init_vhd_bitmap(s, bm);
1073 bm->blk = blk;
1074 *bitmap = bm;
1075
1076 return 0;
1077 }
1078
1079 static inline uint64_t
__bitmap_lru_seqno(struct vhd_state * s)1080 __bitmap_lru_seqno(struct vhd_state *s)
1081 {
1082 int i;
1083 struct vhd_bitmap *bm;
1084
1085 if (s->bm_lru == 0xffffffff) {
1086 s->bm_lru = 0;
1087 for (i = 0; i < VHD_CACHE_SIZE; i++) {
1088 bm = s->bitmap[i];
1089 if (bm) {
1090 bm->seqno >>= 1;
1091 if (bm->seqno > s->bm_lru)
1092 s->bm_lru = bm->seqno;
1093 }
1094 }
1095 }
1096
1097 return ++s->bm_lru;
1098 }
1099
1100 static inline void
touch_bitmap(struct vhd_state * s,struct vhd_bitmap * bm)1101 touch_bitmap(struct vhd_state *s, struct vhd_bitmap *bm)
1102 {
1103 bm->seqno = __bitmap_lru_seqno(s);
1104 }
1105
1106 static inline void
install_bitmap(struct vhd_state * s,struct vhd_bitmap * bm)1107 install_bitmap(struct vhd_state *s, struct vhd_bitmap *bm)
1108 {
1109 int i;
1110 for (i = 0; i < VHD_CACHE_SIZE; i++) {
1111 if (!s->bitmap[i]) {
1112 touch_bitmap(s, bm);
1113 s->bitmap[i] = bm;
1114 return;
1115 }
1116 }
1117
1118 ASSERT(0);
1119 }
1120
1121 static inline void
free_vhd_bitmap(struct vhd_state * s,struct vhd_bitmap * bm)1122 free_vhd_bitmap(struct vhd_state *s, struct vhd_bitmap *bm)
1123 {
1124 int i;
1125
1126 for (i = 0; i < VHD_CACHE_SIZE; i++)
1127 if (s->bitmap[i] == bm)
1128 break;
1129
1130 ASSERT(!bitmap_locked(bm));
1131 ASSERT(!bitmap_in_use(bm));
1132 ASSERT(i < VHD_CACHE_SIZE);
1133
1134 s->bitmap[i] = NULL;
1135 s->bitmap_free[s->bm_free_count++] = bm;
1136 }
1137
1138 static int
read_bitmap_cache(struct vhd_state * s,uint64_t sector,uint8_t op)1139 read_bitmap_cache(struct vhd_state *s, uint64_t sector, uint8_t op)
1140 {
1141 u32 blk, sec;
1142 struct vhd_bitmap *bm;
1143
1144 /* in fixed disks, every block is present */
1145 if (s->vhd.footer.type == HD_TYPE_FIXED)
1146 return VHD_BM_BIT_SET;
1147
1148 blk = sector / s->spb;
1149 sec = sector % s->spb;
1150
1151 if (blk > s->vhd.header.max_bat_size) {
1152 DPRINTF("ERROR: sec %"PRIu64" out of range, op = %d\n",
1153 sector, op);
1154 return -EINVAL;
1155 }
1156
1157 if (bat_entry(s, blk) == DD_BLK_UNUSED) {
1158 if (op == VHD_OP_DATA_WRITE &&
1159 s->bat.pbw_blk != blk && bat_locked(s))
1160 return VHD_BM_BAT_LOCKED;
1161
1162 return VHD_BM_BAT_CLEAR;
1163 }
1164
1165 if (test_batmap(s, blk)) {
1166 DBG(TLOG_DBG, "batmap set for 0x%04x\n", blk);
1167 return VHD_BM_BIT_SET;
1168 }
1169
1170 bm = get_bitmap(s, blk);
1171 if (!bm)
1172 return VHD_BM_NOT_CACHED;
1173
1174 /* bump lru count */
1175 touch_bitmap(s, bm);
1176
1177 if (test_vhd_flag(bm->status, VHD_FLAG_BM_READ_PENDING))
1178 return VHD_BM_READ_PENDING;
1179
1180 return ((vhd_bitmap_test(&s->vhd, bm->map, sec)) ?
1181 VHD_BM_BIT_SET : VHD_BM_BIT_CLEAR);
1182 }
1183
1184 static int
read_bitmap_cache_span(struct vhd_state * s,uint64_t sector,int nr_secs,int value)1185 read_bitmap_cache_span(struct vhd_state *s,
1186 uint64_t sector, int nr_secs, int value)
1187 {
1188 int ret;
1189 u32 blk, sec;
1190 struct vhd_bitmap *bm;
1191
1192 /* in fixed disks, every block is present */
1193 if (s->vhd.footer.type == HD_TYPE_FIXED)
1194 return nr_secs;
1195
1196 sec = sector % s->spb;
1197 blk = sector / s->spb;
1198
1199 if (test_batmap(s, blk))
1200 return MIN(nr_secs, s->spb - sec);
1201
1202 bm = get_bitmap(s, blk);
1203
1204 ASSERT(bm && bitmap_valid(bm));
1205
1206 for (ret = 0; sec < s->spb && ret < nr_secs; sec++, ret++)
1207 if (vhd_bitmap_test(&s->vhd, bm->map, sec) != value)
1208 break;
1209
1210 return ret;
1211 }
1212
1213 static inline struct vhd_request *
alloc_vhd_request(struct vhd_state * s)1214 alloc_vhd_request(struct vhd_state *s)
1215 {
1216 struct vhd_request *req = NULL;
1217
1218 if (s->vreq_free_count > 0) {
1219 req = s->vreq_free[--s->vreq_free_count];
1220 ASSERT(req->treq.secs == 0);
1221 init_vhd_request(s, req);
1222 return req;
1223 }
1224
1225 return NULL;
1226 }
1227
1228 static inline void
free_vhd_request(struct vhd_state * s,struct vhd_request * req)1229 free_vhd_request(struct vhd_state *s, struct vhd_request *req)
1230 {
1231 memset(req, 0, sizeof(struct vhd_request));
1232 s->vreq_free[s->vreq_free_count++] = req;
1233 }
1234
1235 static inline void
aio_read(struct vhd_state * s,struct vhd_request * req,uint64_t offset)1236 aio_read(struct vhd_state *s, struct vhd_request *req, uint64_t offset)
1237 {
1238 struct tiocb *tiocb = &req->tiocb;
1239
1240 td_prep_read(tiocb, s->vhd.fd, req->treq.buf,
1241 vhd_sectors_to_bytes(req->treq.secs),
1242 offset, vhd_complete, req);
1243 td_queue_tiocb(s->driver, tiocb);
1244
1245 s->queued++;
1246 s->reads++;
1247 s->read_size += req->treq.secs;
1248 TRACE(s);
1249 }
1250
1251 static inline void
aio_write(struct vhd_state * s,struct vhd_request * req,uint64_t offset)1252 aio_write(struct vhd_state *s, struct vhd_request *req, uint64_t offset)
1253 {
1254 struct tiocb *tiocb = &req->tiocb;
1255
1256 td_prep_write(tiocb, s->vhd.fd, req->treq.buf,
1257 vhd_sectors_to_bytes(req->treq.secs),
1258 offset, vhd_complete, req);
1259 td_queue_tiocb(s->driver, tiocb);
1260
1261 s->queued++;
1262 s->writes++;
1263 s->write_size += req->treq.secs;
1264 TRACE(s);
1265 }
1266
1267 static inline uint64_t
reserve_new_block(struct vhd_state * s,uint32_t blk)1268 reserve_new_block(struct vhd_state *s, uint32_t blk)
1269 {
1270 int gap = 0;
1271
1272 ASSERT(!test_vhd_flag(s->bat.status, VHD_FLAG_BAT_WRITE_STARTED));
1273
1274 /* data region of segment should begin on page boundary */
1275 if ((s->next_db + s->bm_secs) % s->spp)
1276 gap = (s->spp - ((s->next_db + s->bm_secs) % s->spp));
1277
1278 s->bat.pbw_blk = blk;
1279 s->bat.pbw_offset = s->next_db + gap;
1280
1281 return s->next_db;
1282 }
1283
1284 static int
schedule_bat_write(struct vhd_state * s)1285 schedule_bat_write(struct vhd_state *s)
1286 {
1287 int i;
1288 u32 blk;
1289 char *buf;
1290 u64 offset;
1291 struct vhd_request *req;
1292
1293 ASSERT(bat_locked(s));
1294
1295 req = &s->bat.req;
1296 buf = s->bat.bat_buf;
1297 blk = s->bat.pbw_blk;
1298
1299 init_vhd_request(s, req);
1300 memcpy(buf, &bat_entry(s, blk - (blk % 128)), 512);
1301
1302 ((u32 *)buf)[blk % 128] = s->bat.pbw_offset;
1303
1304 for (i = 0; i < 128; i++)
1305 BE32_OUT(&((u32 *)buf)[i]);
1306
1307 offset = s->vhd.header.table_offset + (blk - (blk % 128)) * 4;
1308 req->treq.secs = 1;
1309 req->treq.buf = buf;
1310 req->op = VHD_OP_BAT_WRITE;
1311 req->next = NULL;
1312
1313 aio_write(s, req, offset);
1314 set_vhd_flag(s->bat.status, VHD_FLAG_BAT_WRITE_STARTED);
1315
1316 DBG(TLOG_DBG, "blk: 0x%04x, pbwo: 0x%08"PRIx64", "
1317 "table_offset: 0x%08"PRIx64"\n", blk, s->bat.pbw_offset, offset);
1318
1319 return 0;
1320 }
1321
1322 static void
schedule_zero_bm_write(struct vhd_state * s,struct vhd_bitmap * bm,uint64_t lb_end)1323 schedule_zero_bm_write(struct vhd_state *s,
1324 struct vhd_bitmap *bm, uint64_t lb_end)
1325 {
1326 uint64_t offset;
1327 struct vhd_request *req = &s->bat.zero_req;
1328
1329 init_vhd_request(s, req);
1330
1331 offset = vhd_sectors_to_bytes(lb_end);
1332 req->op = VHD_OP_ZERO_BM_WRITE;
1333 req->treq.sec = s->bat.pbw_blk * s->spb;
1334 req->treq.secs = (s->bat.pbw_offset - lb_end) + s->bm_secs;
1335 req->treq.buf = vhd_zeros(vhd_sectors_to_bytes(req->treq.secs));
1336 req->next = NULL;
1337
1338 DBG(TLOG_DBG, "blk: 0x%04x, writing zero bitmap at 0x%08"PRIx64"\n",
1339 s->bat.pbw_blk, offset);
1340
1341 lock_bitmap(bm);
1342 add_to_transaction(&bm->tx, req);
1343 aio_write(s, req, offset);
1344 }
1345
1346 static int
update_bat(struct vhd_state * s,uint32_t blk)1347 update_bat(struct vhd_state *s, uint32_t blk)
1348 {
1349 int err;
1350 uint64_t lb_end;
1351 struct vhd_bitmap *bm;
1352
1353 ASSERT(bat_entry(s, blk) == DD_BLK_UNUSED);
1354
1355 if (bat_locked(s)) {
1356 ASSERT(s->bat.pbw_blk == blk);
1357 return 0;
1358 }
1359
1360 /* empty bitmap could already be in
1361 * cache if earlier bat update failed */
1362 bm = get_bitmap(s, blk);
1363 if (!bm) {
1364 /* install empty bitmap in cache */
1365 err = alloc_vhd_bitmap(s, &bm, blk);
1366 if (err)
1367 return err;
1368
1369 install_bitmap(s, bm);
1370 }
1371
1372 lock_bat(s);
1373 lb_end = reserve_new_block(s, blk);
1374 schedule_zero_bm_write(s, bm, lb_end);
1375 set_vhd_flag(bm->tx.status, VHD_FLAG_TX_UPDATE_BAT);
1376
1377 return 0;
1378 }
1379
1380 static int
allocate_block(struct vhd_state * s,uint32_t blk)1381 allocate_block(struct vhd_state *s, uint32_t blk)
1382 {
1383 char *zeros;
1384 int err, gap;
1385 uint64_t offset, size;
1386 struct vhd_bitmap *bm;
1387
1388 ASSERT(bat_entry(s, blk) == DD_BLK_UNUSED);
1389
1390 if (bat_locked(s)) {
1391 ASSERT(s->bat.pbw_blk == blk);
1392 if (s->bat.req.error)
1393 return -EBUSY;
1394 return 0;
1395 }
1396
1397 gap = 0;
1398 s->bat.pbw_blk = blk;
1399 offset = vhd_sectors_to_bytes(s->next_db);
1400
1401 /* data region of segment should begin on page boundary */
1402 if ((s->next_db + s->bm_secs) % s->spp) {
1403 gap = (s->spp - ((s->next_db + s->bm_secs) % s->spp));
1404 s->next_db += gap;
1405 }
1406
1407 s->bat.pbw_offset = s->next_db;
1408
1409 DBG(TLOG_DBG, "blk: 0x%04x, pbwo: 0x%08"PRIx64"\n",
1410 blk, s->bat.pbw_offset);
1411
1412 if (lseek(s->vhd.fd, offset, SEEK_SET) == (off_t)-1) {
1413 ERR(errno, "lseek failed\n");
1414 return -errno;
1415 }
1416
1417 size = vhd_sectors_to_bytes(s->spb + s->bm_secs + gap);
1418 err = write(s->vhd.fd, vhd_zeros(size), size);
1419 if (err != size) {
1420 err = (err == -1 ? -errno : -EIO);
1421 ERR(err, "write failed");
1422 return err;
1423 }
1424
1425 /* empty bitmap could already be in
1426 * cache if earlier bat update failed */
1427 bm = get_bitmap(s, blk);
1428 if (!bm) {
1429 /* install empty bitmap in cache */
1430 err = alloc_vhd_bitmap(s, &bm, blk);
1431 if (err)
1432 return err;
1433
1434 install_bitmap(s, bm);
1435 }
1436
1437 lock_bat(s);
1438 lock_bitmap(bm);
1439 schedule_bat_write(s);
1440 add_to_transaction(&bm->tx, &s->bat.req);
1441
1442 return 0;
1443 }
1444
1445 static int
schedule_data_read(struct vhd_state * s,td_request_t treq,vhd_flag_t flags)1446 schedule_data_read(struct vhd_state *s, td_request_t treq, vhd_flag_t flags)
1447 {
1448 u64 offset;
1449 u32 blk = 0, sec = 0;
1450 struct vhd_bitmap *bm;
1451 struct vhd_request *req;
1452
1453 if (s->vhd.footer.type == HD_TYPE_FIXED) {
1454 offset = vhd_sectors_to_bytes(treq.sec);
1455 goto make_request;
1456 }
1457
1458 blk = treq.sec / s->spb;
1459 sec = treq.sec % s->spb;
1460 bm = get_bitmap(s, blk);
1461 offset = bat_entry(s, blk);
1462
1463 ASSERT(offset != DD_BLK_UNUSED);
1464 ASSERT(test_batmap(s, blk) || (bm && bitmap_valid(bm)));
1465
1466 offset += s->bm_secs + sec;
1467 offset = vhd_sectors_to_bytes(offset);
1468
1469 make_request:
1470 req = alloc_vhd_request(s);
1471 if (!req)
1472 return -EBUSY;
1473
1474 req->treq = treq;
1475 req->flags = flags;
1476 req->op = VHD_OP_DATA_READ;
1477 req->next = NULL;
1478
1479 aio_read(s, req, offset);
1480
1481 DBG(TLOG_DBG, "%s: lsec: 0x%08"PRIx64", blk: 0x%04x, sec: 0x%04x, "
1482 "nr_secs: 0x%04x, offset: 0x%08"PRIx64", flags: 0x%08x, buf: %p\n",
1483 s->vhd.file, treq.sec, blk, sec, treq.secs, offset, req->flags,
1484 treq.buf);
1485
1486 return 0;
1487 }
1488
1489 static int
schedule_data_write(struct vhd_state * s,td_request_t treq,vhd_flag_t flags)1490 schedule_data_write(struct vhd_state *s, td_request_t treq, vhd_flag_t flags)
1491 {
1492 int err;
1493 u64 offset;
1494 u32 blk = 0, sec = 0;
1495 struct vhd_bitmap *bm = NULL;
1496 struct vhd_request *req;
1497
1498 if (s->vhd.footer.type == HD_TYPE_FIXED) {
1499 offset = vhd_sectors_to_bytes(treq.sec);
1500 goto make_request;
1501 }
1502
1503 blk = treq.sec / s->spb;
1504 sec = treq.sec % s->spb;
1505 offset = bat_entry(s, blk);
1506
1507 if (test_vhd_flag(flags, VHD_FLAG_REQ_UPDATE_BAT)) {
1508 if (test_vhd_flag(s->flags, VHD_FLAG_OPEN_PREALLOCATE))
1509 err = allocate_block(s, blk);
1510 else
1511 err = update_bat(s, blk);
1512
1513 if (err)
1514 return err;
1515
1516 offset = s->bat.pbw_offset;
1517 }
1518
1519 offset += s->bm_secs + sec;
1520 offset = vhd_sectors_to_bytes(offset);
1521
1522 make_request:
1523 req = alloc_vhd_request(s);
1524 if (!req)
1525 return -EBUSY;
1526
1527 req->treq = treq;
1528 req->flags = flags;
1529 req->op = VHD_OP_DATA_WRITE;
1530 req->next = NULL;
1531
1532 if (test_vhd_flag(flags, VHD_FLAG_REQ_UPDATE_BITMAP)) {
1533 bm = get_bitmap(s, blk);
1534 ASSERT(bm && bitmap_valid(bm));
1535 lock_bitmap(bm);
1536
1537 if (bm->tx.closed) {
1538 add_to_tail(&bm->queue, req);
1539 set_vhd_flag(req->flags, VHD_FLAG_REQ_QUEUED);
1540 } else
1541 add_to_transaction(&bm->tx, req);
1542 }
1543
1544 aio_write(s, req, offset);
1545
1546 DBG(TLOG_DBG, "%s: lsec: 0x%08"PRIx64", blk: 0x%04x, sec: 0x%04x, "
1547 "nr_secs: 0x%04x, offset: 0x%08"PRIx64", flags: 0x%08x\n",
1548 s->vhd.file, treq.sec, blk, sec, treq.secs, offset, req->flags);
1549
1550 return 0;
1551 }
1552
1553 static int
schedule_bitmap_read(struct vhd_state * s,uint32_t blk)1554 schedule_bitmap_read(struct vhd_state *s, uint32_t blk)
1555 {
1556 int err;
1557 u64 offset;
1558 struct vhd_bitmap *bm;
1559 struct vhd_request *req = NULL;
1560
1561 ASSERT(vhd_type_dynamic(&s->vhd));
1562
1563 offset = bat_entry(s, blk);
1564
1565 ASSERT(offset != DD_BLK_UNUSED);
1566 ASSERT(!get_bitmap(s, blk));
1567
1568 offset = vhd_sectors_to_bytes(offset);
1569
1570 err = alloc_vhd_bitmap(s, &bm, blk);
1571 if (err)
1572 return err;
1573
1574 req = &bm->req;
1575 init_vhd_request(s, req);
1576
1577 req->treq.sec = blk * s->spb;
1578 req->treq.secs = s->bm_secs;
1579 req->treq.buf = bm->map;
1580 req->treq.cb = NULL;
1581 req->op = VHD_OP_BITMAP_READ;
1582 req->next = NULL;
1583
1584 aio_read(s, req, offset);
1585 lock_bitmap(bm);
1586 install_bitmap(s, bm);
1587 set_vhd_flag(bm->status, VHD_FLAG_BM_READ_PENDING);
1588
1589 DBG(TLOG_DBG, "%s: lsec: 0x%08"PRIx64", blk: 0x%04x, nr_secs: 0x%04x, "
1590 "offset: 0x%08"PRIx64"\n", s->vhd.file, req->treq.sec, blk,
1591 req->treq.secs, offset);
1592
1593 return 0;
1594 }
1595
1596 static void
schedule_bitmap_write(struct vhd_state * s,uint32_t blk)1597 schedule_bitmap_write(struct vhd_state *s, uint32_t blk)
1598 {
1599 u64 offset;
1600 struct vhd_bitmap *bm;
1601 struct vhd_request *req;
1602
1603 bm = get_bitmap(s, blk);
1604 offset = bat_entry(s, blk);
1605
1606 ASSERT(vhd_type_dynamic(&s->vhd));
1607 ASSERT(bm && bitmap_valid(bm) &&
1608 !test_vhd_flag(bm->status, VHD_FLAG_BM_WRITE_PENDING));
1609
1610 if (offset == DD_BLK_UNUSED) {
1611 ASSERT(bat_locked(s) && s->bat.pbw_blk == blk);
1612 offset = s->bat.pbw_offset;
1613 }
1614
1615 offset = vhd_sectors_to_bytes(offset);
1616
1617 req = &bm->req;
1618 init_vhd_request(s, req);
1619
1620 req->treq.sec = blk * s->spb;
1621 req->treq.secs = s->bm_secs;
1622 req->treq.buf = bm->shadow;
1623 req->treq.cb = NULL;
1624 req->op = VHD_OP_BITMAP_WRITE;
1625 req->next = NULL;
1626
1627 aio_write(s, req, offset);
1628 lock_bitmap(bm);
1629 touch_bitmap(s, bm); /* bump lru count */
1630 set_vhd_flag(bm->status, VHD_FLAG_BM_WRITE_PENDING);
1631
1632 DBG(TLOG_DBG, "%s: blk: 0x%04x, sec: 0x%08"PRIx64", nr_secs: 0x%04x, "
1633 "offset: 0x%"PRIx64"\n", s->vhd.file, blk, req->treq.sec,
1634 req->treq.secs, offset);
1635 }
1636
1637 /*
1638 * queued requests will be submitted once the bitmap
1639 * describing them is read and the requests are validated.
1640 */
1641 static int
__vhd_queue_request(struct vhd_state * s,uint8_t op,td_request_t treq)1642 __vhd_queue_request(struct vhd_state *s, uint8_t op, td_request_t treq)
1643 {
1644 u32 blk;
1645 struct vhd_bitmap *bm;
1646 struct vhd_request *req;
1647
1648 ASSERT(vhd_type_dynamic(&s->vhd));
1649
1650 blk = treq.sec / s->spb;
1651 bm = get_bitmap(s, blk);
1652
1653 ASSERT(bm && test_vhd_flag(bm->status, VHD_FLAG_BM_READ_PENDING));
1654
1655 req = alloc_vhd_request(s);
1656 if (!req)
1657 return -EBUSY;
1658
1659 req->treq = treq;
1660 req->op = op;
1661 req->next = NULL;
1662
1663 add_to_tail(&bm->waiting, req);
1664 lock_bitmap(bm);
1665
1666 DBG(TLOG_DBG, "%s: lsec: 0x%08"PRIx64", blk: 0x%04x nr_secs: 0x%04x, "
1667 "op: %u\n", s->vhd.file, treq.sec, blk, treq.secs, op);
1668
1669 TRACE(s);
1670 return 0;
1671 }
1672
1673 static void
vhd_queue_read(td_driver_t * driver,td_request_t treq)1674 vhd_queue_read(td_driver_t *driver, td_request_t treq)
1675 {
1676 struct vhd_state *s = (struct vhd_state *)driver->data;
1677
1678 DBG(TLOG_DBG, "%s: lsec: 0x%08"PRIx64", secs: 0x%04x (seg: %d)\n",
1679 s->vhd.file, treq.sec, treq.secs, treq.sidx);
1680
1681 while (treq.secs) {
1682 int err;
1683 td_request_t clone;
1684
1685 err = 0;
1686 clone = treq;
1687
1688 switch (read_bitmap_cache(s, clone.sec, VHD_OP_DATA_READ)) {
1689 case -EINVAL:
1690 err = -EINVAL;
1691 goto fail;
1692
1693 case VHD_BM_BAT_CLEAR:
1694 clone.secs = MIN(clone.secs, s->spb - (clone.sec % s->spb));
1695 td_forward_request(clone);
1696 break;
1697
1698 case VHD_BM_BIT_CLEAR:
1699 clone.secs = read_bitmap_cache_span(s, clone.sec, clone.secs, 0);
1700 td_forward_request(clone);
1701 break;
1702
1703 case VHD_BM_BIT_SET:
1704 clone.secs = read_bitmap_cache_span(s, clone.sec, clone.secs, 1);
1705 err = schedule_data_read(s, clone, 0);
1706 if (err)
1707 goto fail;
1708 break;
1709
1710 case VHD_BM_NOT_CACHED:
1711 err = schedule_bitmap_read(s, clone.sec / s->spb);
1712 if (err)
1713 goto fail;
1714
1715 clone.secs = MIN(clone.secs, s->spb - (clone.sec % s->spb));
1716 err = __vhd_queue_request(s, VHD_OP_DATA_READ, clone);
1717 if (err)
1718 goto fail;
1719 break;
1720
1721 case VHD_BM_READ_PENDING:
1722 clone.secs = MIN(clone.secs, s->spb - (clone.sec % s->spb));
1723 err = __vhd_queue_request(s, VHD_OP_DATA_READ, clone);
1724 if (err)
1725 goto fail;
1726 break;
1727
1728 case VHD_BM_BAT_LOCKED:
1729 default:
1730 ASSERT(0);
1731 break;
1732 }
1733
1734 treq.sec += clone.secs;
1735 treq.secs -= clone.secs;
1736 treq.buf += vhd_sectors_to_bytes(clone.secs);
1737 continue;
1738
1739 fail:
1740 clone.secs = treq.secs;
1741 td_complete_request(clone, err);
1742 break;
1743 }
1744 }
1745
1746 static void
vhd_queue_write(td_driver_t * driver,td_request_t treq)1747 vhd_queue_write(td_driver_t *driver, td_request_t treq)
1748 {
1749 struct vhd_state *s = (struct vhd_state *)driver->data;
1750
1751 DBG(TLOG_DBG, "%s: lsec: 0x%08"PRIx64", secs: 0x%04x, (seg: %d)\n",
1752 s->vhd.file, treq.sec, treq.secs, treq.sidx);
1753
1754 while (treq.secs) {
1755 int err;
1756 uint8_t flags;
1757 td_request_t clone;
1758
1759 err = 0;
1760 flags = 0;
1761 clone = treq;
1762
1763 switch (read_bitmap_cache(s, clone.sec, VHD_OP_DATA_WRITE)) {
1764 case -EINVAL:
1765 err = -EINVAL;
1766 goto fail;
1767
1768 case VHD_BM_BAT_LOCKED:
1769 err = -EBUSY;
1770 clone.blocked = 1;
1771 goto fail;
1772
1773 case VHD_BM_BAT_CLEAR:
1774 flags = (VHD_FLAG_REQ_UPDATE_BAT |
1775 VHD_FLAG_REQ_UPDATE_BITMAP);
1776 clone.secs = MIN(clone.secs, s->spb - (clone.sec % s->spb));
1777 err = schedule_data_write(s, clone, flags);
1778 if (err)
1779 goto fail;
1780 break;
1781
1782 case VHD_BM_BIT_CLEAR:
1783 flags = VHD_FLAG_REQ_UPDATE_BITMAP;
1784 clone.secs = read_bitmap_cache_span(s, clone.sec, clone.secs, 0);
1785 err = schedule_data_write(s, clone, flags);
1786 if (err)
1787 goto fail;
1788 break;
1789
1790 case VHD_BM_BIT_SET:
1791 clone.secs = read_bitmap_cache_span(s, clone.sec, clone.secs, 1);
1792 err = schedule_data_write(s, clone, 0);
1793 if (err)
1794 goto fail;
1795 break;
1796
1797 case VHD_BM_NOT_CACHED:
1798 clone.secs = MIN(clone.secs, s->spb - (clone.sec % s->spb));
1799 err = schedule_bitmap_read(s, clone.sec / s->spb);
1800 if (err)
1801 goto fail;
1802
1803 err = __vhd_queue_request(s, VHD_OP_DATA_WRITE, clone);
1804 if (err)
1805 goto fail;
1806 break;
1807
1808 case VHD_BM_READ_PENDING:
1809 clone.secs = MIN(clone.secs, s->spb - (clone.sec % s->spb));
1810 err = __vhd_queue_request(s, VHD_OP_DATA_WRITE, clone);
1811 if (err)
1812 goto fail;
1813 break;
1814
1815 default:
1816 ASSERT(0);
1817 break;
1818 }
1819
1820 treq.sec += clone.secs;
1821 treq.secs -= clone.secs;
1822 treq.buf += vhd_sectors_to_bytes(clone.secs);
1823 continue;
1824
1825 fail:
1826 clone.secs = treq.secs;
1827 td_complete_request(clone, err);
1828 break;
1829 }
1830 }
1831
1832 static inline void
signal_completion(struct vhd_request * list,int error)1833 signal_completion(struct vhd_request *list, int error)
1834 {
1835 struct vhd_state *s;
1836 struct vhd_request *r, *next;
1837
1838 if (!list)
1839 return;
1840
1841 r = list;
1842 s = list->state;
1843
1844 while (r) {
1845 int err;
1846
1847 err = (error ? error : r->error);
1848 next = r->next;
1849 td_complete_request(r->treq, err);
1850 DBG(TLOG_DBG, "lsec: 0x%08"PRIx64", blk: 0x%04"PRIx64", "
1851 "err: %d\n", r->treq.sec, r->treq.sec / s->spb, err);
1852 free_vhd_request(s, r);
1853 r = next;
1854
1855 s->returned++;
1856 TRACE(s);
1857 }
1858 }
1859
1860 static void
start_new_bitmap_transaction(struct vhd_state * s,struct vhd_bitmap * bm)1861 start_new_bitmap_transaction(struct vhd_state *s, struct vhd_bitmap *bm)
1862 {
1863 int i, error = 0;
1864 struct vhd_transaction *tx;
1865 struct vhd_request *r, *next;
1866
1867 if (!bm->queue.head)
1868 return;
1869
1870 DBG(TLOG_DBG, "blk: 0x%04x\n", bm->blk);
1871
1872 r = bm->queue.head;
1873 tx = &bm->tx;
1874 clear_req_list(&bm->queue);
1875
1876 if (r && bat_entry(s, bm->blk) == DD_BLK_UNUSED)
1877 tx->error = -EIO;
1878
1879 while (r) {
1880 next = r->next;
1881 r->next = NULL;
1882 clear_vhd_flag(r->flags, VHD_FLAG_REQ_QUEUED);
1883
1884 add_to_transaction(tx, r);
1885 if (test_vhd_flag(r->flags, VHD_FLAG_REQ_FINISHED)) {
1886 tx->finished++;
1887 if (!r->error) {
1888 u32 sec = r->treq.sec % s->spb;
1889 for (i = 0; i < r->treq.secs; i++)
1890 vhd_bitmap_set(&s->vhd,
1891 bm->shadow, sec + i);
1892 }
1893 }
1894 r = next;
1895 }
1896
1897 /* perhaps all the queued writes already completed? */
1898 if (tx->started && transaction_completed(tx))
1899 finish_data_transaction(s, bm);
1900 }
1901
1902 static void
finish_bat_transaction(struct vhd_state * s,struct vhd_bitmap * bm)1903 finish_bat_transaction(struct vhd_state *s, struct vhd_bitmap *bm)
1904 {
1905 struct vhd_transaction *tx = &bm->tx;
1906
1907 if (!bat_locked(s))
1908 return;
1909
1910 if (s->bat.pbw_blk != bm->blk)
1911 return;
1912
1913 if (!s->bat.req.error)
1914 goto release;
1915
1916 if (!test_vhd_flag(tx->status, VHD_FLAG_TX_LIVE))
1917 goto release;
1918
1919 tx->closed = 1;
1920 return;
1921
1922 release:
1923 DBG(TLOG_DBG, "blk: 0x%04x\n", bm->blk);
1924 unlock_bat(s);
1925 init_bat(s);
1926 }
1927
1928 static void
finish_bitmap_transaction(struct vhd_state * s,struct vhd_bitmap * bm,int error)1929 finish_bitmap_transaction(struct vhd_state *s,
1930 struct vhd_bitmap *bm, int error)
1931 {
1932 int map_size;
1933 struct vhd_transaction *tx = &bm->tx;
1934
1935 DBG(TLOG_DBG, "blk: 0x%04x, err: %d\n", bm->blk, error);
1936 tx->error = (tx->error ? tx->error : error);
1937 map_size = vhd_sectors_to_bytes(s->bm_secs);
1938
1939 if (!test_vhd_flag(s->flags, VHD_FLAG_OPEN_PREALLOCATE)) {
1940 if (test_vhd_flag(tx->status, VHD_FLAG_TX_UPDATE_BAT)) {
1941 /* still waiting for bat write */
1942 ASSERT(bm->blk == s->bat.pbw_blk);
1943 ASSERT(test_vhd_flag(s->bat.status,
1944 VHD_FLAG_BAT_WRITE_STARTED));
1945 s->bat.req.tx = tx;
1946 return;
1947 }
1948 }
1949
1950 if (tx->error) {
1951 /* undo changes to shadow */
1952 memcpy(bm->shadow, bm->map, map_size);
1953 } else {
1954 /* complete atomic write */
1955 memcpy(bm->map, bm->shadow, map_size);
1956 if (!test_batmap(s, bm->blk) && bitmap_full(s, bm))
1957 set_batmap(s, bm->blk);
1958 }
1959
1960 /* transaction done; signal completions */
1961 signal_completion(tx->requests.head, tx->error);
1962 init_tx(tx);
1963 start_new_bitmap_transaction(s, bm);
1964
1965 if (!bitmap_in_use(bm))
1966 unlock_bitmap(bm);
1967
1968 finish_bat_transaction(s, bm);
1969 }
1970
1971 static void
finish_data_transaction(struct vhd_state * s,struct vhd_bitmap * bm)1972 finish_data_transaction(struct vhd_state *s, struct vhd_bitmap *bm)
1973 {
1974 struct vhd_transaction *tx = &bm->tx;
1975
1976 DBG(TLOG_DBG, "blk: 0x%04x\n", bm->blk);
1977
1978 tx->closed = 1;
1979
1980 if (!tx->error)
1981 return schedule_bitmap_write(s, bm->blk);
1982
1983 return finish_bitmap_transaction(s, bm, 0);
1984 }
1985
1986 static void
finish_bat_write(struct vhd_request * req)1987 finish_bat_write(struct vhd_request *req)
1988 {
1989 struct vhd_bitmap *bm;
1990 struct vhd_transaction *tx;
1991 struct vhd_state *s = req->state;
1992
1993 s->returned++;
1994 TRACE(s);
1995
1996 bm = get_bitmap(s, s->bat.pbw_blk);
1997
1998 DBG(TLOG_DBG, "blk 0x%04x, pbwo: 0x%08"PRIx64", err %d\n",
1999 s->bat.pbw_blk, s->bat.pbw_offset, req->error);
2000 ASSERT(bm && bitmap_valid(bm));
2001 ASSERT(bat_locked(s) &&
2002 test_vhd_flag(s->bat.status, VHD_FLAG_BAT_WRITE_STARTED));
2003
2004 tx = &bm->tx;
2005 ASSERT(test_vhd_flag(tx->status, VHD_FLAG_TX_LIVE));
2006
2007 if (!req->error) {
2008 bat_entry(s, s->bat.pbw_blk) = s->bat.pbw_offset;
2009 s->next_db = s->bat.pbw_offset + s->spb + s->bm_secs;
2010 } else
2011 tx->error = req->error;
2012
2013 if (test_vhd_flag(s->flags, VHD_FLAG_OPEN_PREALLOCATE)) {
2014 tx->finished++;
2015 remove_from_req_list(&tx->requests, req);
2016 if (transaction_completed(tx))
2017 finish_data_transaction(s, bm);
2018 } else {
2019 clear_vhd_flag(tx->status, VHD_FLAG_TX_UPDATE_BAT);
2020 if (s->bat.req.tx)
2021 finish_bitmap_transaction(s, bm, req->error);
2022 }
2023
2024 finish_bat_transaction(s, bm);
2025 }
2026
2027 static void
finish_zero_bm_write(struct vhd_request * req)2028 finish_zero_bm_write(struct vhd_request *req)
2029 {
2030 u32 blk;
2031 struct vhd_bitmap *bm;
2032 struct vhd_transaction *tx = req->tx;
2033 struct vhd_state *s = req->state;
2034
2035 s->returned++;
2036 TRACE(s);
2037
2038 blk = req->treq.sec / s->spb;
2039 bm = get_bitmap(s, blk);
2040
2041 DBG(TLOG_DBG, "blk: 0x%04x\n", blk);
2042 ASSERT(bat_locked(s));
2043 ASSERT(s->bat.pbw_blk == blk);
2044 ASSERT(bm && bitmap_valid(bm) && bitmap_locked(bm));
2045
2046 tx->finished++;
2047 remove_from_req_list(&tx->requests, req);
2048
2049 if (req->error) {
2050 unlock_bat(s);
2051 init_bat(s);
2052 tx->error = req->error;
2053 clear_vhd_flag(tx->status, VHD_FLAG_TX_UPDATE_BAT);
2054 } else
2055 schedule_bat_write(s);
2056
2057 if (transaction_completed(tx))
2058 finish_data_transaction(s, bm);
2059 }
2060
2061 static void
finish_bitmap_read(struct vhd_request * req)2062 finish_bitmap_read(struct vhd_request *req)
2063 {
2064 u32 blk;
2065 struct vhd_bitmap *bm;
2066 struct vhd_request *r, *next;
2067 struct vhd_state *s = req->state;
2068
2069 s->returned++;
2070 TRACE(s);
2071
2072 blk = req->treq.sec / s->spb;
2073 bm = get_bitmap(s, blk);
2074
2075 DBG(TLOG_DBG, "blk: 0x%04x\n", blk);
2076 ASSERT(bm && test_vhd_flag(bm->status, VHD_FLAG_BM_READ_PENDING));
2077
2078 r = bm->waiting.head;
2079 clear_req_list(&bm->waiting);
2080 clear_vhd_flag(bm->status, VHD_FLAG_BM_READ_PENDING);
2081
2082 if (!req->error) {
2083 memcpy(bm->shadow, bm->map, vhd_sectors_to_bytes(s->bm_secs));
2084
2085 while (r) {
2086 struct vhd_request tmp;
2087
2088 tmp = *r;
2089 next = r->next;
2090 free_vhd_request(s, r);
2091
2092 ASSERT(tmp.op == VHD_OP_DATA_READ ||
2093 tmp.op == VHD_OP_DATA_WRITE);
2094
2095 if (tmp.op == VHD_OP_DATA_READ)
2096 vhd_queue_read(s->driver, tmp.treq);
2097 else if (tmp.op == VHD_OP_DATA_WRITE)
2098 vhd_queue_write(s->driver, tmp.treq);
2099
2100 r = next;
2101 }
2102 } else {
2103 int err = req->error;
2104 unlock_bitmap(bm);
2105 free_vhd_bitmap(s, bm);
2106 return signal_completion(r, err);
2107 }
2108
2109 if (!bitmap_in_use(bm))
2110 unlock_bitmap(bm);
2111 }
2112
2113 static void
finish_bitmap_write(struct vhd_request * req)2114 finish_bitmap_write(struct vhd_request *req)
2115 {
2116 u32 blk;
2117 struct vhd_bitmap *bm;
2118 struct vhd_transaction *tx;
2119 struct vhd_state *s = req->state;
2120
2121 s->returned++;
2122 TRACE(s);
2123
2124 blk = req->treq.sec / s->spb;
2125 bm = get_bitmap(s, blk);
2126 tx = &bm->tx;
2127
2128 DBG(TLOG_DBG, "blk: 0x%04x, started: %d, finished: %d\n",
2129 blk, tx->started, tx->finished);
2130 ASSERT(tx->closed);
2131 ASSERT(bm && bitmap_valid(bm));
2132 ASSERT(test_vhd_flag(bm->status, VHD_FLAG_BM_WRITE_PENDING));
2133
2134 clear_vhd_flag(bm->status, VHD_FLAG_BM_WRITE_PENDING);
2135
2136 finish_bitmap_transaction(s, bm, req->error);
2137 }
2138
2139 static void
finish_data_read(struct vhd_request * req)2140 finish_data_read(struct vhd_request *req)
2141 {
2142 struct vhd_state *s = req->state;
2143
2144 DBG(TLOG_DBG, "lsec 0x%08"PRIx64", blk: 0x%04"PRIx64"\n",
2145 req->treq.sec, req->treq.sec / s->spb);
2146 signal_completion(req, 0);
2147 }
2148
2149 static void
finish_data_write(struct vhd_request * req)2150 finish_data_write(struct vhd_request *req)
2151 {
2152 int i;
2153 struct vhd_transaction *tx = req->tx;
2154 struct vhd_state *s = (struct vhd_state *)req->state;
2155
2156 set_vhd_flag(req->flags, VHD_FLAG_REQ_FINISHED);
2157
2158 if (tx) {
2159 u32 blk, sec;
2160 struct vhd_bitmap *bm;
2161
2162 blk = req->treq.sec / s->spb;
2163 sec = req->treq.sec % s->spb;
2164 bm = get_bitmap(s, blk);
2165
2166 ASSERT(bm && bitmap_valid(bm) && bitmap_locked(bm));
2167
2168 tx->finished++;
2169
2170 DBG(TLOG_DBG, "lsec: 0x%08"PRIx64", blk: 0x04%"PRIx64", "
2171 "tx->started: %d, tx->finished: %d\n", req->treq.sec,
2172 req->treq.sec / s->spb, tx->started, tx->finished);
2173
2174 if (!req->error)
2175 for (i = 0; i < req->treq.secs; i++)
2176 vhd_bitmap_set(&s->vhd, bm->shadow, sec + i);
2177
2178 if (transaction_completed(tx))
2179 finish_data_transaction(s, bm);
2180
2181 } else if (!test_vhd_flag(req->flags, VHD_FLAG_REQ_QUEUED)) {
2182 ASSERT(!req->next);
2183 DBG(TLOG_DBG, "lsec: 0x%08"PRIx64", blk: 0x%04"PRIx64"\n",
2184 req->treq.sec, req->treq.sec / s->spb);
2185 signal_completion(req, 0);
2186 }
2187 }
2188
2189 void
vhd_complete(void * arg,struct tiocb * tiocb,int err)2190 vhd_complete(void *arg, struct tiocb *tiocb, int err)
2191 {
2192 struct vhd_request *req = (struct vhd_request *)arg;
2193 struct vhd_state *s = req->state;
2194 struct iocb *io = &tiocb->iocb;
2195
2196 s->completed++;
2197 TRACE(s);
2198
2199 req->error = err;
2200
2201 if (req->error)
2202 ERR(req->error, "%s: op: %u, lsec: %"PRIu64", secs: %u, "
2203 "nbytes: %lu, blk: %"PRIu64", blk_offset: %u",
2204 s->vhd.file, req->op, req->treq.sec, req->treq.secs,
2205 io->u.c.nbytes, req->treq.sec / s->spb,
2206 bat_entry(s, req->treq.sec / s->spb));
2207
2208 switch (req->op) {
2209 case VHD_OP_DATA_READ:
2210 finish_data_read(req);
2211 break;
2212
2213 case VHD_OP_DATA_WRITE:
2214 finish_data_write(req);
2215 break;
2216
2217 case VHD_OP_BITMAP_READ:
2218 finish_bitmap_read(req);
2219 break;
2220
2221 case VHD_OP_BITMAP_WRITE:
2222 finish_bitmap_write(req);
2223 break;
2224
2225 case VHD_OP_ZERO_BM_WRITE:
2226 finish_zero_bm_write(req);
2227 break;
2228
2229 case VHD_OP_BAT_WRITE:
2230 finish_bat_write(req);
2231 break;
2232
2233 default:
2234 ASSERT(0);
2235 break;
2236 }
2237 }
2238
2239 void
vhd_debug(td_driver_t * driver)2240 vhd_debug(td_driver_t *driver)
2241 {
2242 int i;
2243 struct vhd_state *s = (struct vhd_state *)driver->data;
2244
2245 DBG(TLOG_WARN, "%s: QUEUED: 0x%08"PRIx64", COMPLETED: 0x%08"PRIx64", "
2246 "RETURNED: 0x%08"PRIx64"\n", s->vhd.file, s->queued, s->completed,
2247 s->returned);
2248 DBG(TLOG_WARN, "WRITES: 0x%08"PRIx64", AVG_WRITE_SIZE: %f\n",
2249 s->writes, (s->writes ? ((float)s->write_size / s->writes) : 0.0));
2250 DBG(TLOG_WARN, "READS: 0x%08"PRIx64", AVG_READ_SIZE: %f\n",
2251 s->reads, (s->reads ? ((float)s->read_size / s->reads) : 0.0));
2252
2253 DBG(TLOG_WARN, "ALLOCATED REQUESTS: (%lu total)\n", VHD_REQS_DATA);
2254 for (i = 0; i < VHD_REQS_DATA; i++) {
2255 struct vhd_request *r = &s->vreq_list[i];
2256 td_request_t *t = &r->treq;
2257 if (t->secs)
2258 DBG(TLOG_WARN, "%d: id: 0x%04"PRIx64", err: %d, op: %d,"
2259 " lsec: 0x%08"PRIx64", flags: %d, this: %p, "
2260 "next: %p, tx: %p\n", i, t->id, r->error, r->op,
2261 t->sec, r->flags, r, r->next, r->tx);
2262 }
2263
2264 DBG(TLOG_WARN, "BITMAP CACHE:\n");
2265 for (i = 0; i < VHD_CACHE_SIZE; i++) {
2266 int qnum = 0, wnum = 0, rnum = 0;
2267 struct vhd_bitmap *bm = s->bitmap[i];
2268 struct vhd_transaction *tx;
2269 struct vhd_request *r;
2270
2271 if (!bm)
2272 continue;
2273
2274 tx = &bm->tx;
2275 r = bm->queue.head;
2276 while (r) {
2277 qnum++;
2278 r = r->next;
2279 }
2280
2281 r = bm->waiting.head;
2282 while (r) {
2283 wnum++;
2284 r = r->next;
2285 }
2286
2287 r = tx->requests.head;
2288 while (r) {
2289 rnum++;
2290 r = r->next;
2291 }
2292
2293 DBG(TLOG_WARN, "%d: blk: 0x%04x, status: 0x%08x, q: %p, qnum: %d, w: %p, "
2294 "wnum: %d, locked: %d, in use: %d, tx: %p, tx_error: %d, "
2295 "started: %d, finished: %d, status: %u, reqs: %p, nreqs: %d\n",
2296 i, bm->blk, bm->status, bm->queue.head, qnum, bm->waiting.head,
2297 wnum, bitmap_locked(bm), bitmap_in_use(bm), tx, tx->error,
2298 tx->started, tx->finished, tx->status, tx->requests.head, rnum);
2299 }
2300
2301 DBG(TLOG_WARN, "BAT: status: 0x%08x, pbw_blk: 0x%04x, "
2302 "pbw_off: 0x%08"PRIx64", tx: %p\n", s->bat.status, s->bat.pbw_blk,
2303 s->bat.pbw_offset, s->bat.req.tx);
2304
2305 /*
2306 for (i = 0; i < s->hdr.max_bat_size; i++)
2307 DPRINTF("%d: %u\n", i, s->bat.bat[i]);
2308 */
2309 }
2310
2311 struct tap_disk tapdisk_vhd = {
2312 .disk_type = "tapdisk_vhd",
2313 .flags = 0,
2314 .private_data_size = sizeof(struct vhd_state),
2315 .td_open = _vhd_open,
2316 .td_close = _vhd_close,
2317 .td_queue_read = vhd_queue_read,
2318 .td_queue_write = vhd_queue_write,
2319 .td_get_parent_id = vhd_get_parent_id,
2320 .td_validate_parent = vhd_validate_parent,
2321 .td_debug = vhd_debug,
2322 };
2323