1 /*
2 * GRUB -- GRand Unified Bootloader
3 * Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; If not, see <http://www.gnu.org/licenses/>.
17 */
18 /*
19 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
20 * Use is subject to license terms.
21 */
22
23 /*
24 * The zfs plug-in routines for GRUB are:
25 *
26 * zfs_mount() - locates a valid uberblock of the root pool and reads
27 * in its MOS at the memory address MOS.
28 *
29 * zfs_open() - locates a plain file object by following the MOS
30 * and places its dnode at the memory address DNODE.
31 *
32 * zfs_read() - read in the data blocks pointed by the DNODE.
33 *
34 * ZFS_SCRATCH is used as a working area.
35 *
36 * (memory addr) MOS DNODE ZFS_SCRATCH
37 * | | |
38 * +-------V---------V----------V---------------+
39 * memory | | dnode | dnode | scratch |
40 * | | 512B | 512B | area |
41 * +--------------------------------------------+
42 */
43
44 #ifdef FSYS_ZFS
45
46 #include "shared.h"
47 #include "filesys.h"
48 #include "fsys_zfs.h"
49
50 /* cache for a file block of the currently zfs_open()-ed file */
51 static void *file_buf = NULL;
52 static uint64_t file_start = 0;
53 static uint64_t file_end = 0;
54
55 /* cache for a dnode block */
56 static dnode_phys_t *dnode_buf = NULL;
57 static dnode_phys_t *dnode_mdn = NULL;
58 static uint64_t dnode_start = 0;
59 static uint64_t dnode_end = 0;
60
61 static uint64_t pool_guid = 0;
62 static uberblock_t current_uberblock;
63 static char *stackbase;
64
65 decomp_entry_t decomp_table[ZIO_COMPRESS_FUNCTIONS] =
66 {
67 {"inherit", 0}, /* ZIO_COMPRESS_INHERIT */
68 {"on", lzjb_decompress}, /* ZIO_COMPRESS_ON */
69 {"off", 0}, /* ZIO_COMPRESS_OFF */
70 {"lzjb", lzjb_decompress}, /* ZIO_COMPRESS_LZJB */
71 {"empty", 0} /* ZIO_COMPRESS_EMPTY */
72 };
73
74 static int zio_read_data(blkptr_t *bp, void *buf, char *stack);
75
76 /*
77 * Our own version of bcmp().
78 */
79 static int
zfs_bcmp(const void * s1,const void * s2,size_t n)80 zfs_bcmp(const void *s1, const void *s2, size_t n)
81 {
82 const uint8_t *ps1 = s1;
83 const uint8_t *ps2 = s2;
84
85 if (s1 != s2 && n != 0) {
86 do {
87 if (*ps1++ != *ps2++)
88 return (1);
89 } while (--n != 0);
90 }
91
92 return (0);
93 }
94
95 /*
96 * Our own version of log2(). Same thing as highbit()-1.
97 */
98 static int
zfs_log2(uint64_t num)99 zfs_log2(uint64_t num)
100 {
101 int i = 0;
102
103 while (num > 1) {
104 i++;
105 num = num >> 1;
106 }
107
108 return (i);
109 }
110
111 /* Checksum Functions */
112 static void
zio_checksum_off(const void * buf,uint64_t size,zio_cksum_t * zcp)113 zio_checksum_off(const void *buf, uint64_t size, zio_cksum_t *zcp)
114 {
115 ZIO_SET_CHECKSUM(zcp, 0, 0, 0, 0);
116 }
117
118 /* Checksum Table and Values */
119 zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS] = {
120 { { NULL, NULL }, 0, 0, "inherit" },
121 { { NULL, NULL }, 0, 0, "on" },
122 { { zio_checksum_off, zio_checksum_off }, 0, 0, "off" },
123 { { zio_checksum_SHA256, zio_checksum_SHA256 }, 1, 1, "label" },
124 { { zio_checksum_SHA256, zio_checksum_SHA256 }, 1, 1, "gang_header" },
125 { { NULL, NULL }, 0, 0, "zilog" },
126 { { fletcher_2_native, fletcher_2_byteswap }, 0, 0, "fletcher2" },
127 { { fletcher_4_native, fletcher_4_byteswap }, 1, 0, "fletcher4" },
128 { { zio_checksum_SHA256, zio_checksum_SHA256 }, 1, 0, "SHA256" },
129 { { NULL, NULL }, 0, 0, "zilog2" }
130 };
131
132 /*
133 * zio_checksum_verify: Provides support for checksum verification.
134 *
135 * Fletcher2, Fletcher4, and SHA256 are supported.
136 *
137 * Return:
138 * -1 = Failure
139 * 0 = Success
140 */
141 static int
zio_checksum_verify(blkptr_t * bp,char * data,int size)142 zio_checksum_verify(blkptr_t *bp, char *data, int size)
143 {
144 zio_cksum_t zc = bp->blk_cksum;
145 uint32_t checksum = BP_GET_CHECKSUM(bp);
146 int byteswap = BP_SHOULD_BYTESWAP(bp);
147 zio_eck_t *zec = (zio_eck_t *)(data + size) - 1;
148 zio_checksum_info_t *ci = &zio_checksum_table[checksum];
149 zio_cksum_t actual_cksum, expected_cksum;
150
151 /* byteswap is not supported */
152 if (byteswap)
153 return (-1);
154
155 if (checksum >= ZIO_CHECKSUM_FUNCTIONS || ci->ci_func[0] == NULL)
156 return (-1);
157
158 if (ci->ci_eck) {
159 expected_cksum = zec->zec_cksum;
160 zec->zec_cksum = zc;
161 ci->ci_func[0](data, size, &actual_cksum);
162 zec->zec_cksum = expected_cksum;
163 zc = expected_cksum;
164
165 } else {
166 ci->ci_func[byteswap](data, size, &actual_cksum);
167 }
168
169 if ((actual_cksum.zc_word[0] - zc.zc_word[0]) |
170 (actual_cksum.zc_word[1] - zc.zc_word[1]) |
171 (actual_cksum.zc_word[2] - zc.zc_word[2]) |
172 (actual_cksum.zc_word[3] - zc.zc_word[3]))
173 return (-1);
174
175 return (0);
176 }
177
178 /*
179 * vdev_label_start returns the physical disk offset (in bytes) of
180 * label "l".
181 */
182 static uint64_t
vdev_label_start(uint64_t psize,int l)183 vdev_label_start(uint64_t psize, int l)
184 {
185 return (l * sizeof (vdev_label_t) + (l < VDEV_LABELS / 2 ?
186 0 : psize - VDEV_LABELS * sizeof (vdev_label_t)));
187 }
188
189 /*
190 * vdev_uberblock_compare takes two uberblock structures and returns an integer
191 * indicating the more recent of the two.
192 * Return Value = 1 if ub2 is more recent
193 * Return Value = -1 if ub1 is more recent
194 * The most recent uberblock is determined using its transaction number and
195 * timestamp. The uberblock with the highest transaction number is
196 * considered "newer". If the transaction numbers of the two blocks match, the
197 * timestamps are compared to determine the "newer" of the two.
198 */
199 static int
vdev_uberblock_compare(uberblock_t * ub1,uberblock_t * ub2)200 vdev_uberblock_compare(uberblock_t *ub1, uberblock_t *ub2)
201 {
202 if (ub1->ub_txg < ub2->ub_txg)
203 return (-1);
204 if (ub1->ub_txg > ub2->ub_txg)
205 return (1);
206
207 if (ub1->ub_timestamp < ub2->ub_timestamp)
208 return (-1);
209 if (ub1->ub_timestamp > ub2->ub_timestamp)
210 return (1);
211
212 return (0);
213 }
214
215 /*
216 * Three pieces of information are needed to verify an uberblock: the magic
217 * number, the version number, and the checksum.
218 *
219 * Currently Implemented: version number, magic number
220 * Need to Implement: checksum
221 *
222 * Return:
223 * 0 - Success
224 * -1 - Failure
225 */
226 static int
uberblock_verify(uberblock_phys_t * ub,uint64_t offset)227 uberblock_verify(uberblock_phys_t *ub, uint64_t offset)
228 {
229
230 uberblock_t *uber = &ub->ubp_uberblock;
231 blkptr_t bp;
232
233 BP_ZERO(&bp);
234 BP_SET_CHECKSUM(&bp, ZIO_CHECKSUM_LABEL);
235 BP_SET_BYTEORDER(&bp, ZFS_HOST_BYTEORDER);
236 ZIO_SET_CHECKSUM(&bp.blk_cksum, offset, 0, 0, 0);
237
238 if (zio_checksum_verify(&bp, (char *)ub, UBERBLOCK_SIZE) != 0)
239 return (-1);
240
241 if (uber->ub_magic == UBERBLOCK_MAGIC &&
242 uber->ub_version > 0 && uber->ub_version <= SPA_VERSION)
243 return (0);
244
245 return (-1);
246 }
247
248 /*
249 * Find the best uberblock.
250 * Return:
251 * Success - Pointer to the best uberblock.
252 * Failure - NULL
253 */
254 static uberblock_phys_t *
find_bestub(uberblock_phys_t * ub_array,uint64_t sector)255 find_bestub(uberblock_phys_t *ub_array, uint64_t sector)
256 {
257 uberblock_phys_t *ubbest = NULL;
258 uint64_t offset;
259 int i;
260
261 for (i = 0; i < (VDEV_UBERBLOCK_RING >> VDEV_UBERBLOCK_SHIFT); i++) {
262 offset = (sector << SPA_MINBLOCKSHIFT) +
263 VDEV_UBERBLOCK_OFFSET(i);
264 if (uberblock_verify(&ub_array[i], offset) == 0) {
265 if (ubbest == NULL) {
266 ubbest = &ub_array[i];
267 } else if (vdev_uberblock_compare(
268 &(ub_array[i].ubp_uberblock),
269 &(ubbest->ubp_uberblock)) > 0) {
270 ubbest = &ub_array[i];
271 }
272 }
273 }
274
275 return (ubbest);
276 }
277
278 /*
279 * Read a block of data based on the gang block address dva,
280 * and put its data in buf.
281 *
282 * Return:
283 * 0 - success
284 * 1 - failure
285 */
286 static int
zio_read_gang(blkptr_t * bp,dva_t * dva,void * buf,char * stack)287 zio_read_gang(blkptr_t *bp, dva_t *dva, void *buf, char *stack)
288 {
289 zio_gbh_phys_t *zio_gb;
290 uint64_t offset, sector;
291 blkptr_t tmpbp;
292 int i;
293
294 zio_gb = (zio_gbh_phys_t *)stack;
295 stack += SPA_GANGBLOCKSIZE;
296 offset = DVA_GET_OFFSET(dva);
297 sector = DVA_OFFSET_TO_PHYS_SECTOR(offset);
298
299 /* read in the gang block header */
300 if (devread(sector, 0, SPA_GANGBLOCKSIZE, (char *)zio_gb) == 0) {
301 grub_printf("failed to read in a gang block header\n");
302 return (1);
303 }
304
305 /* self checksuming the gang block header */
306 BP_ZERO(&tmpbp);
307 BP_SET_CHECKSUM(&tmpbp, ZIO_CHECKSUM_GANG_HEADER);
308 BP_SET_BYTEORDER(&tmpbp, ZFS_HOST_BYTEORDER);
309 ZIO_SET_CHECKSUM(&tmpbp.blk_cksum, DVA_GET_VDEV(dva),
310 DVA_GET_OFFSET(dva), bp->blk_birth, 0);
311 if (zio_checksum_verify(&tmpbp, (char *)zio_gb, SPA_GANGBLOCKSIZE)) {
312 grub_printf("failed to checksum a gang block header\n");
313 return (1);
314 }
315
316 for (i = 0; i < SPA_GBH_NBLKPTRS; i++) {
317 if (zio_gb->zg_blkptr[i].blk_birth == 0)
318 continue;
319
320 if (zio_read_data(&zio_gb->zg_blkptr[i], buf, stack))
321 return (1);
322 buf += BP_GET_PSIZE(&zio_gb->zg_blkptr[i]);
323 }
324
325 return (0);
326 }
327
328 /*
329 * Read in a block of raw data to buf.
330 *
331 * Return:
332 * 0 - success
333 * 1 - failure
334 */
335 static int
zio_read_data(blkptr_t * bp,void * buf,char * stack)336 zio_read_data(blkptr_t *bp, void *buf, char *stack)
337 {
338 int i, psize;
339
340 psize = BP_GET_PSIZE(bp);
341
342 /* pick a good dva from the block pointer */
343 for (i = 0; i < SPA_DVAS_PER_BP; i++) {
344 uint64_t offset, sector;
345
346 if (bp->blk_dva[i].dva_word[0] == 0 &&
347 bp->blk_dva[i].dva_word[1] == 0)
348 continue;
349
350 if (DVA_GET_GANG(&bp->blk_dva[i])) {
351 if (zio_read_gang(bp, &bp->blk_dva[i], buf, stack) == 0)
352 return (0);
353 } else {
354 /* read in a data block */
355 offset = DVA_GET_OFFSET(&bp->blk_dva[i]);
356 sector = DVA_OFFSET_TO_PHYS_SECTOR(offset);
357 if (devread(sector, 0, psize, buf))
358 return (0);
359 }
360 }
361
362 return (1);
363 }
364
365 /*
366 * Read in a block of data, verify its checksum, decompress if needed,
367 * and put the uncompressed data in buf.
368 *
369 * Return:
370 * 0 - success
371 * errnum - failure
372 */
373 static int
zio_read(blkptr_t * bp,void * buf,char * stack)374 zio_read(blkptr_t *bp, void *buf, char *stack)
375 {
376 int lsize, psize, comp;
377 char *retbuf;
378
379 comp = BP_GET_COMPRESS(bp);
380 lsize = BP_GET_LSIZE(bp);
381 psize = BP_GET_PSIZE(bp);
382
383 if ((unsigned int)comp >= ZIO_COMPRESS_FUNCTIONS ||
384 (comp != ZIO_COMPRESS_OFF &&
385 decomp_table[comp].decomp_func == NULL)) {
386 grub_printf("compression algorithm not supported\n");
387 return (ERR_FSYS_CORRUPT);
388 }
389
390 if ((char *)buf < stack && ((char *)buf) + lsize > stack) {
391 grub_printf("not enough memory allocated\n");
392 return (ERR_WONT_FIT);
393 }
394
395 retbuf = buf;
396 if (comp != ZIO_COMPRESS_OFF) {
397 buf = stack;
398 stack += psize;
399 }
400
401 if (zio_read_data(bp, buf, stack)) {
402 grub_printf("zio_read_data failed\n");
403 return (ERR_FSYS_CORRUPT);
404 }
405
406 if (zio_checksum_verify(bp, buf, psize) != 0) {
407 grub_printf("checksum verification failed\n");
408 return (ERR_FSYS_CORRUPT);
409 }
410
411 if (comp != ZIO_COMPRESS_OFF)
412 decomp_table[comp].decomp_func(buf, retbuf, psize, lsize);
413
414 return (0);
415 }
416
417 /*
418 * Get the block from a block id.
419 * push the block onto the stack.
420 *
421 * Return:
422 * 0 - success
423 * errnum - failure
424 */
425 static int
dmu_read(dnode_phys_t * dn,uint64_t blkid,void * buf,char * stack)426 dmu_read(dnode_phys_t *dn, uint64_t blkid, void *buf, char *stack)
427 {
428 int idx, level;
429 blkptr_t *bp_array = dn->dn_blkptr;
430 int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
431 blkptr_t *bp, *tmpbuf;
432
433 bp = (blkptr_t *)stack;
434 stack += sizeof (blkptr_t);
435
436 tmpbuf = (blkptr_t *)stack;
437 stack += 1<<dn->dn_indblkshift;
438
439 for (level = dn->dn_nlevels - 1; level >= 0; level--) {
440 idx = (blkid >> (epbs * level)) & ((1<<epbs)-1);
441 *bp = bp_array[idx];
442 if (level == 0)
443 tmpbuf = buf;
444 if (BP_IS_HOLE(bp)) {
445 grub_memset(buf, 0,
446 dn->dn_datablkszsec << SPA_MINBLOCKSHIFT);
447 break;
448 } else if ((errnum = zio_read(bp, tmpbuf, stack))) {
449 return (errnum);
450 }
451
452 bp_array = tmpbuf;
453 }
454
455 return (0);
456 }
457
458 /*
459 * mzap_lookup: Looks up property described by "name" and returns the value
460 * in "value".
461 *
462 * Return:
463 * 0 - success
464 * errnum - failure
465 */
466 static int
mzap_lookup(mzap_phys_t * zapobj,int objsize,char * name,uint64_t * value)467 mzap_lookup(mzap_phys_t *zapobj, int objsize, char *name,
468 uint64_t *value)
469 {
470 int i, chunks;
471 mzap_ent_phys_t *mzap_ent = zapobj->mz_chunk;
472
473 chunks = objsize/MZAP_ENT_LEN - 1;
474 for (i = 0; i < chunks; i++) {
475 if (grub_strcmp(mzap_ent[i].mze_name, name) == 0) {
476 *value = mzap_ent[i].mze_value;
477 return (0);
478 }
479 }
480
481 return (ERR_FSYS_CORRUPT);
482 }
483
484 static uint64_t
zap_hash(uint64_t salt,const char * name)485 zap_hash(uint64_t salt, const char *name)
486 {
487 static uint64_t table[256];
488 const uint8_t *cp;
489 uint8_t c;
490 uint64_t crc = salt;
491
492 if (table[128] == 0) {
493 uint64_t *ct;
494 int i, j;
495 for (i = 0; i < 256; i++) {
496 for (ct = table + i, *ct = i, j = 8; j > 0; j--)
497 *ct = (*ct >> 1) ^ (-(*ct & 1) &
498 ZFS_CRC64_POLY);
499 }
500 }
501
502 if (crc == 0 || table[128] != ZFS_CRC64_POLY) {
503 errnum = ERR_FSYS_CORRUPT;
504 return (0);
505 }
506
507 for (cp = (const uint8_t *)name; (c = *cp) != '\0'; cp++)
508 crc = (crc >> 8) ^ table[(crc ^ c) & 0xFF];
509
510 /*
511 * Only use 28 bits, since we need 4 bits in the cookie for the
512 * collision differentiator. We MUST use the high bits, since
513 * those are the onces that we first pay attention to when
514 * chosing the bucket.
515 */
516 crc &= ~((1ULL << (64 - 28)) - 1);
517
518 return (crc);
519 }
520
521 /*
522 * Only to be used on 8-bit arrays.
523 * array_len is actual len in bytes (not encoded le_value_length).
524 * buf is null-terminated.
525 */
526 static int
zap_leaf_array_equal(zap_leaf_phys_t * l,int blksft,int chunk,int array_len,const char * buf)527 zap_leaf_array_equal(zap_leaf_phys_t *l, int blksft, int chunk,
528 int array_len, const char *buf)
529 {
530 int bseen = 0;
531
532 while (bseen < array_len) {
533 struct zap_leaf_array *la =
534 &ZAP_LEAF_CHUNK(l, blksft, chunk).l_array;
535 int toread = MIN(array_len - bseen, ZAP_LEAF_ARRAY_BYTES);
536
537 if (chunk >= ZAP_LEAF_NUMCHUNKS(blksft))
538 return (0);
539
540 if (zfs_bcmp(la->la_array, buf + bseen, toread) != 0)
541 break;
542 chunk = la->la_next;
543 bseen += toread;
544 }
545 return (bseen == array_len);
546 }
547
548 /*
549 * Given a zap_leaf_phys_t, walk thru the zap leaf chunks to get the
550 * value for the property "name".
551 *
552 * Return:
553 * 0 - success
554 * errnum - failure
555 */
556 static int
zap_leaf_lookup(zap_leaf_phys_t * l,int blksft,uint64_t h,const char * name,uint64_t * value)557 zap_leaf_lookup(zap_leaf_phys_t *l, int blksft, uint64_t h,
558 const char *name, uint64_t *value)
559 {
560 uint16_t chunk;
561 struct zap_leaf_entry *le;
562
563 /* Verify if this is a valid leaf block */
564 if (l->l_hdr.lh_block_type != ZBT_LEAF)
565 return (ERR_FSYS_CORRUPT);
566 if (l->l_hdr.lh_magic != ZAP_LEAF_MAGIC)
567 return (ERR_FSYS_CORRUPT);
568
569 for (chunk = l->l_hash[LEAF_HASH(blksft, h)];
570 chunk != CHAIN_END; chunk = le->le_next) {
571
572 if (chunk >= ZAP_LEAF_NUMCHUNKS(blksft))
573 return (ERR_FSYS_CORRUPT);
574
575 le = ZAP_LEAF_ENTRY(l, blksft, chunk);
576
577 /* Verify the chunk entry */
578 if (le->le_type != ZAP_CHUNK_ENTRY)
579 return (ERR_FSYS_CORRUPT);
580
581 if (le->le_hash != h)
582 continue;
583
584 if (zap_leaf_array_equal(l, blksft, le->le_name_chunk,
585 le->le_name_length, name)) {
586
587 struct zap_leaf_array *la;
588 uint8_t *ip;
589
590 if (le->le_int_size != 8 || le->le_value_length != 1)
591 return (ERR_FSYS_CORRUPT);
592
593 /* get the uint64_t property value */
594 la = &ZAP_LEAF_CHUNK(l, blksft,
595 le->le_value_chunk).l_array;
596 ip = la->la_array;
597
598 *value = (uint64_t)ip[0] << 56 | (uint64_t)ip[1] << 48 |
599 (uint64_t)ip[2] << 40 | (uint64_t)ip[3] << 32 |
600 (uint64_t)ip[4] << 24 | (uint64_t)ip[5] << 16 |
601 (uint64_t)ip[6] << 8 | (uint64_t)ip[7];
602
603 return (0);
604 }
605 }
606
607 return (ERR_FSYS_CORRUPT);
608 }
609
610 /*
611 * Fat ZAP lookup
612 *
613 * Return:
614 * 0 - success
615 * errnum - failure
616 */
617 static int
fzap_lookup(dnode_phys_t * zap_dnode,zap_phys_t * zap,char * name,uint64_t * value,char * stack)618 fzap_lookup(dnode_phys_t *zap_dnode, zap_phys_t *zap,
619 char *name, uint64_t *value, char *stack)
620 {
621 zap_leaf_phys_t *l;
622 uint64_t hash, idx, blkid;
623 int blksft = zfs_log2(zap_dnode->dn_datablkszsec << DNODE_SHIFT);
624
625 /* Verify if this is a fat zap header block */
626 if (zap->zap_magic != (uint64_t)ZAP_MAGIC ||
627 zap->zap_flags != 0)
628 return (ERR_FSYS_CORRUPT);
629
630 hash = zap_hash(zap->zap_salt, name);
631 if (errnum)
632 return (errnum);
633
634 /* get block id from index */
635 if (zap->zap_ptrtbl.zt_numblks != 0) {
636 /* external pointer tables not supported */
637 return (ERR_FSYS_CORRUPT);
638 }
639 idx = ZAP_HASH_IDX(hash, zap->zap_ptrtbl.zt_shift);
640 blkid = ((uint64_t *)zap)[idx + (1<<(blksft-3-1))];
641
642 /* Get the leaf block */
643 l = (zap_leaf_phys_t *)stack;
644 stack += 1<<blksft;
645 if ((1<<blksft) < sizeof (zap_leaf_phys_t))
646 return (ERR_FSYS_CORRUPT);
647 if ((errnum = dmu_read(zap_dnode, blkid, l, stack)))
648 return (errnum);
649
650 return (zap_leaf_lookup(l, blksft, hash, name, value));
651 }
652
653 /*
654 * Read in the data of a zap object and find the value for a matching
655 * property name.
656 *
657 * Return:
658 * 0 - success
659 * errnum - failure
660 */
661 static int
zap_lookup(dnode_phys_t * zap_dnode,char * name,uint64_t * val,char * stack)662 zap_lookup(dnode_phys_t *zap_dnode, char *name, uint64_t *val, char *stack)
663 {
664 uint64_t block_type;
665 int size;
666 void *zapbuf;
667
668 /* Read in the first block of the zap object data. */
669 zapbuf = stack;
670 size = zap_dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT;
671 stack += size;
672
673 if ((errnum = dmu_read(zap_dnode, 0, zapbuf, stack)))
674 return (errnum);
675
676 block_type = *((uint64_t *)zapbuf);
677
678 if (block_type == ZBT_MICRO) {
679 return (mzap_lookup(zapbuf, size, name, val));
680 } else if (block_type == ZBT_HEADER) {
681 /* this is a fat zap */
682 return (fzap_lookup(zap_dnode, zapbuf, name,
683 val, stack));
684 }
685
686 return (ERR_FSYS_CORRUPT);
687 }
688
689 /*
690 * Get the dnode of an object number from the metadnode of an object set.
691 *
692 * Input
693 * mdn - metadnode to get the object dnode
694 * objnum - object number for the object dnode
695 * buf - data buffer that holds the returning dnode
696 * stack - scratch area
697 *
698 * Return:
699 * 0 - success
700 * errnum - failure
701 */
702 static int
dnode_get(dnode_phys_t * mdn,uint64_t objnum,uint8_t type,dnode_phys_t * buf,char * stack)703 dnode_get(dnode_phys_t *mdn, uint64_t objnum, uint8_t type, dnode_phys_t *buf,
704 char *stack)
705 {
706 uint64_t blkid, blksz; /* the block id this object dnode is in */
707 int epbs; /* shift of number of dnodes in a block */
708 int idx; /* index within a block */
709 dnode_phys_t *dnbuf;
710
711 blksz = mdn->dn_datablkszsec << SPA_MINBLOCKSHIFT;
712 epbs = zfs_log2(blksz) - DNODE_SHIFT;
713 blkid = objnum >> epbs;
714 idx = objnum & ((1<<epbs)-1);
715
716 if (dnode_buf != NULL && dnode_mdn == mdn &&
717 objnum >= dnode_start && objnum < dnode_end) {
718 grub_memmove(buf, &dnode_buf[idx], DNODE_SIZE);
719 VERIFY_DN_TYPE(buf, type);
720 return (0);
721 }
722
723 if (dnode_buf && blksz == 1<<DNODE_BLOCK_SHIFT) {
724 dnbuf = dnode_buf;
725 dnode_mdn = mdn;
726 dnode_start = blkid << epbs;
727 dnode_end = (blkid + 1) << epbs;
728 } else {
729 dnbuf = (dnode_phys_t *)stack;
730 stack += blksz;
731 }
732
733 if ((errnum = dmu_read(mdn, blkid, (char *)dnbuf, stack)))
734 return (errnum);
735
736 grub_memmove(buf, &dnbuf[idx], DNODE_SIZE);
737 VERIFY_DN_TYPE(buf, type);
738
739 return (0);
740 }
741
742 /*
743 * Check if this is a special file that resides at the top
744 * dataset of the pool. Currently this is the GRUB menu,
745 * boot signature and boot signature backup.
746 * str starts with '/'.
747 */
748 static int
is_top_dataset_file(char * str)749 is_top_dataset_file(char *str)
750 {
751 char *tptr;
752
753 if ((tptr = grub_strstr(str, "menu.lst")) &&
754 (tptr[8] == '\0' || tptr[8] == ' ') &&
755 *(tptr-1) == '/')
756 return (1);
757
758 if (grub_strncmp(str, BOOTSIGN_DIR"/",
759 grub_strlen(BOOTSIGN_DIR) + 1) == 0)
760 return (1);
761
762 if (grub_strcmp(str, BOOTSIGN_BACKUP) == 0)
763 return (1);
764
765 return (0);
766 }
767
768 /*
769 * Get the file dnode for a given file name where mdn is the meta dnode
770 * for this ZFS object set. When found, place the file dnode in dn.
771 * The 'path' argument will be mangled.
772 *
773 * Return:
774 * 0 - success
775 * errnum - failure
776 */
777 static int
dnode_get_path(dnode_phys_t * mdn,char * path,dnode_phys_t * dn,char * stack)778 dnode_get_path(dnode_phys_t *mdn, char *path, dnode_phys_t *dn,
779 char *stack)
780 {
781 uint64_t objnum, version;
782 char *cname, ch;
783
784 if ((errnum = dnode_get(mdn, MASTER_NODE_OBJ, DMU_OT_MASTER_NODE,
785 dn, stack)))
786 return (errnum);
787
788 if ((errnum = zap_lookup(dn, ZPL_VERSION_STR, &version, stack)))
789 return (errnum);
790 if (version > ZPL_VERSION)
791 return (-1);
792
793 if ((errnum = zap_lookup(dn, ZFS_ROOT_OBJ, &objnum, stack)))
794 return (errnum);
795
796 if ((errnum = dnode_get(mdn, objnum, DMU_OT_DIRECTORY_CONTENTS,
797 dn, stack)))
798 return (errnum);
799
800 /* skip leading slashes */
801 while (*path == '/')
802 path++;
803
804 while (*path && !isspace((uint8_t)*path)) {
805
806 /* get the next component name */
807 cname = path;
808 while (*path && !isspace((uint8_t)*path) && *path != '/')
809 path++;
810 ch = *path;
811 *path = 0; /* ensure null termination */
812
813 if ((errnum = zap_lookup(dn, cname, &objnum, stack)))
814 return (errnum);
815
816 objnum = ZFS_DIRENT_OBJ(objnum);
817 if ((errnum = dnode_get(mdn, objnum, 0, dn, stack)))
818 return (errnum);
819
820 *path = ch;
821 while (*path == '/')
822 path++;
823 }
824
825 /* We found the dnode for this file. Verify if it is a plain file. */
826 VERIFY_DN_TYPE(dn, DMU_OT_PLAIN_FILE_CONTENTS);
827
828 return (0);
829 }
830
831 /*
832 * Get the default 'bootfs' property value from the rootpool.
833 *
834 * Return:
835 * 0 - success
836 * errnum -failure
837 */
838 static int
get_default_bootfsobj(dnode_phys_t * mosmdn,uint64_t * obj,char * stack)839 get_default_bootfsobj(dnode_phys_t *mosmdn, uint64_t *obj, char *stack)
840 {
841 uint64_t objnum = 0;
842 dnode_phys_t *dn = (dnode_phys_t *)stack;
843 stack += DNODE_SIZE;
844
845 if ((errnum = dnode_get(mosmdn, DMU_POOL_DIRECTORY_OBJECT,
846 DMU_OT_OBJECT_DIRECTORY, dn, stack)))
847 return (errnum);
848
849 /*
850 * find the object number for 'pool_props', and get the dnode
851 * of the 'pool_props'.
852 */
853 if (zap_lookup(dn, DMU_POOL_PROPS, &objnum, stack))
854 return (ERR_FILESYSTEM_NOT_FOUND);
855
856 if ((errnum = dnode_get(mosmdn, objnum, DMU_OT_POOL_PROPS, dn, stack)))
857 return (errnum);
858
859 if (zap_lookup(dn, ZPOOL_PROP_BOOTFS, &objnum, stack))
860 return (ERR_FILESYSTEM_NOT_FOUND);
861
862 if (!objnum)
863 return (ERR_FILESYSTEM_NOT_FOUND);
864
865 *obj = objnum;
866 return (0);
867 }
868
869 /*
870 * Given a MOS metadnode, get the metadnode of a given filesystem name (fsname),
871 * e.g. pool/rootfs, or a given object number (obj), e.g. the object number
872 * of pool/rootfs.
873 *
874 * If no fsname and no obj are given, return the DSL_DIR metadnode.
875 * If fsname is given, return its metadnode and its matching object number.
876 * If only obj is given, return the metadnode for this object number.
877 *
878 * Return:
879 * 0 - success
880 * errnum - failure
881 */
882 static int
get_objset_mdn(dnode_phys_t * mosmdn,char * fsname,uint64_t * obj,dnode_phys_t * mdn,char * stack)883 get_objset_mdn(dnode_phys_t *mosmdn, char *fsname, uint64_t *obj,
884 dnode_phys_t *mdn, char *stack)
885 {
886 uint64_t objnum, headobj;
887 char *cname, ch;
888 blkptr_t *bp;
889 objset_phys_t *osp;
890 int issnapshot = 0;
891 char *snapname = NULL;
892
893 if (fsname == NULL && obj) {
894 headobj = *obj;
895 goto skip;
896 }
897
898 if ((errnum = dnode_get(mosmdn, DMU_POOL_DIRECTORY_OBJECT,
899 DMU_OT_OBJECT_DIRECTORY, mdn, stack)))
900 return (errnum);
901
902 if ((errnum = zap_lookup(mdn, DMU_POOL_ROOT_DATASET, &objnum,
903 stack)))
904 return (errnum);
905
906 if ((errnum = dnode_get(mosmdn, objnum, DMU_OT_DSL_DIR, mdn, stack)))
907 return (errnum);
908
909 if (fsname == NULL) {
910 headobj =
911 ((dsl_dir_phys_t *)DN_BONUS(mdn))->dd_head_dataset_obj;
912 goto skip;
913 }
914
915 /* take out the pool name */
916 while (*fsname && !isspace((uint8_t)*fsname) && *fsname != '/')
917 fsname++;
918
919 while (*fsname && !isspace((uint8_t)*fsname)) {
920 uint64_t childobj;
921
922 while (*fsname == '/')
923 fsname++;
924
925 cname = fsname;
926 while (*fsname && !isspace((uint8_t)*fsname) && *fsname != '/')
927 fsname++;
928 ch = *fsname;
929 *fsname = 0;
930
931 snapname = cname;
932 while (*snapname && !isspace((uint8_t)*snapname) && *snapname != '@')
933 snapname++;
934 if (*snapname == '@') {
935 issnapshot = 1;
936 *snapname = 0;
937 }
938 childobj =
939 ((dsl_dir_phys_t *)DN_BONUS(mdn))->dd_child_dir_zapobj;
940 if ((errnum = dnode_get(mosmdn, childobj,
941 DMU_OT_DSL_DIR_CHILD_MAP, mdn, stack)))
942 return (errnum);
943
944 if (zap_lookup(mdn, cname, &objnum, stack))
945 return (ERR_FILESYSTEM_NOT_FOUND);
946
947 if ((errnum = dnode_get(mosmdn, objnum, DMU_OT_DSL_DIR,
948 mdn, stack)))
949 return (errnum);
950
951 *fsname = ch;
952 if (issnapshot)
953 *snapname = '@';
954 }
955 headobj = ((dsl_dir_phys_t *)DN_BONUS(mdn))->dd_head_dataset_obj;
956 if (obj)
957 *obj = headobj;
958
959 skip:
960 if ((errnum = dnode_get(mosmdn, headobj, DMU_OT_DSL_DATASET, mdn, stack)))
961 return (errnum);
962 if (issnapshot) {
963 uint64_t snapobj;
964
965 snapobj = ((dsl_dataset_phys_t *)DN_BONUS(mdn))->
966 ds_snapnames_zapobj;
967
968 if ((errnum = dnode_get(mosmdn, snapobj,
969 DMU_OT_DSL_DS_SNAP_MAP, mdn, stack)))
970 return (errnum);
971 if (zap_lookup(mdn, snapname + 1, &headobj, stack))
972 return (ERR_FILESYSTEM_NOT_FOUND);
973 if ((errnum = dnode_get(mosmdn, headobj,
974 DMU_OT_DSL_DATASET, mdn, stack)))
975 return (errnum);
976 if (obj)
977 *obj = headobj;
978 }
979
980 bp = &((dsl_dataset_phys_t *)DN_BONUS(mdn))->ds_bp;
981 osp = (objset_phys_t *)stack;
982 stack += sizeof (objset_phys_t);
983 if ((errnum = zio_read(bp, osp, stack)))
984 return (errnum);
985
986 grub_memmove((char *)mdn, (char *)&osp->os_meta_dnode, DNODE_SIZE);
987
988 return (0);
989 }
990
991 /*
992 * For a given XDR packed nvlist, verify the first 4 bytes and move on.
993 *
994 * An XDR packed nvlist is encoded as (comments from nvs_xdr_create) :
995 *
996 * encoding method/host endian (4 bytes)
997 * nvl_version (4 bytes)
998 * nvl_nvflag (4 bytes)
999 * encoded nvpairs:
1000 * encoded size of the nvpair (4 bytes)
1001 * decoded size of the nvpair (4 bytes)
1002 * name string size (4 bytes)
1003 * name string data (sizeof(NV_ALIGN4(string))
1004 * data type (4 bytes)
1005 * # of elements in the nvpair (4 bytes)
1006 * data
1007 * 2 zero's for the last nvpair
1008 * (end of the entire list) (8 bytes)
1009 *
1010 * Return:
1011 * 0 - success
1012 * 1 - failure
1013 */
1014 static int
nvlist_unpack(char * nvlist,char ** out)1015 nvlist_unpack(char *nvlist, char **out)
1016 {
1017 /* Verify if the 1st and 2nd byte in the nvlist are valid. */
1018 if (nvlist[0] != NV_ENCODE_XDR || nvlist[1] != HOST_ENDIAN)
1019 return (1);
1020
1021 nvlist += 4;
1022 *out = nvlist;
1023 return (0);
1024 }
1025
1026 static char *
nvlist_array(char * nvlist,int index)1027 nvlist_array(char *nvlist, int index)
1028 {
1029 int i, encode_size;
1030
1031 for (i = 0; i < index; i++) {
1032 /* skip the header, nvl_version, and nvl_nvflag */
1033 nvlist = nvlist + 4 * 2;
1034
1035 while ((encode_size = BSWAP_32(*(uint32_t *)nvlist)))
1036 nvlist += encode_size; /* goto the next nvpair */
1037
1038 nvlist = nvlist + 4 * 2; /* skip the ending 2 zeros - 8 bytes */
1039 }
1040
1041 return (nvlist);
1042 }
1043
1044 static int
nvlist_lookup_value(char * nvlist,char * name,void * val,int valtype,int * nelmp)1045 nvlist_lookup_value(char *nvlist, char *name, void *val, int valtype,
1046 int *nelmp)
1047 {
1048 int name_len, type, slen, encode_size;
1049 char *nvpair, *nvp_name, *strval = val;
1050 uint64_t *intval = val;
1051
1052 /* skip the header, nvl_version, and nvl_nvflag */
1053 nvlist = nvlist + 4 * 2;
1054
1055 /*
1056 * Loop thru the nvpair list
1057 * The XDR representation of an integer is in big-endian byte order.
1058 */
1059 while ((encode_size = BSWAP_32(*(uint32_t *)nvlist))) {
1060
1061 nvpair = nvlist + 4 * 2; /* skip the encode/decode size */
1062
1063 name_len = BSWAP_32(*(uint32_t *)nvpair);
1064 nvpair += 4;
1065
1066 nvp_name = nvpair;
1067 nvpair = nvpair + ((name_len + 3) & ~3); /* align */
1068
1069 type = BSWAP_32(*(uint32_t *)nvpair);
1070 nvpair += 4;
1071
1072 if ((grub_strncmp(nvp_name, name, name_len) == 0) &&
1073 type == valtype) {
1074 int nelm;
1075
1076 if ((nelm = BSWAP_32(*(uint32_t *)nvpair)) < 1)
1077 return (1);
1078 nvpair += 4;
1079
1080 switch (valtype) {
1081 case DATA_TYPE_STRING:
1082 slen = BSWAP_32(*(uint32_t *)nvpair);
1083 nvpair += 4;
1084 grub_memmove(strval, nvpair, slen);
1085 strval[slen] = '\0';
1086 return (0);
1087
1088 case DATA_TYPE_UINT64:
1089 *intval = BSWAP_64(*(uint64_t *)nvpair);
1090 return (0);
1091
1092 case DATA_TYPE_NVLIST:
1093 *(void **)val = (void *)nvpair;
1094 return (0);
1095
1096 case DATA_TYPE_NVLIST_ARRAY:
1097 *(void **)val = (void *)nvpair;
1098 if (nelmp)
1099 *nelmp = nelm;
1100 return (0);
1101 }
1102 }
1103
1104 nvlist += encode_size; /* goto the next nvpair */
1105 }
1106
1107 return (1);
1108 }
1109
1110 /*
1111 * Check if this vdev is online and is in a good state.
1112 */
1113 static int
vdev_validate(char * nv)1114 vdev_validate(char *nv)
1115 {
1116 uint64_t ival;
1117
1118 if (nvlist_lookup_value(nv, ZPOOL_CONFIG_OFFLINE, &ival,
1119 DATA_TYPE_UINT64, NULL) == 0 ||
1120 nvlist_lookup_value(nv, ZPOOL_CONFIG_FAULTED, &ival,
1121 DATA_TYPE_UINT64, NULL) == 0 ||
1122 nvlist_lookup_value(nv, ZPOOL_CONFIG_REMOVED, &ival,
1123 DATA_TYPE_UINT64, NULL) == 0)
1124 return (ERR_DEV_VALUES);
1125
1126 return (0);
1127 }
1128
1129 /*
1130 * Get a valid vdev pathname/devid from the boot device.
1131 * The caller should already allocate MAXPATHLEN memory for bootpath and devid.
1132 */
1133 static int
vdev_get_bootpath(char * nv,uint64_t inguid,char * devid,char * bootpath,int is_spare)1134 vdev_get_bootpath(char *nv, uint64_t inguid, char *devid, char *bootpath,
1135 int is_spare)
1136 {
1137 char type[16];
1138
1139 if (nvlist_lookup_value(nv, ZPOOL_CONFIG_TYPE, &type, DATA_TYPE_STRING,
1140 NULL))
1141 return (ERR_FSYS_CORRUPT);
1142
1143 if (strcmp(type, VDEV_TYPE_DISK) == 0) {
1144 uint64_t guid;
1145
1146 if (vdev_validate(nv) != 0)
1147 return (ERR_NO_BOOTPATH);
1148
1149 if (nvlist_lookup_value(nv, ZPOOL_CONFIG_GUID,
1150 &guid, DATA_TYPE_UINT64, NULL) != 0)
1151 return (ERR_NO_BOOTPATH);
1152
1153 if (guid != inguid)
1154 return (ERR_NO_BOOTPATH);
1155
1156 /* for a spare vdev, pick the disk labeled with "is_spare" */
1157 if (is_spare) {
1158 uint64_t spare = 0;
1159 (void) nvlist_lookup_value(nv, ZPOOL_CONFIG_IS_SPARE,
1160 &spare, DATA_TYPE_UINT64, NULL);
1161 if (!spare)
1162 return (ERR_NO_BOOTPATH);
1163 }
1164
1165 if (nvlist_lookup_value(nv, ZPOOL_CONFIG_PHYS_PATH,
1166 bootpath, DATA_TYPE_STRING, NULL) != 0)
1167 bootpath[0] = '\0';
1168
1169 if (nvlist_lookup_value(nv, ZPOOL_CONFIG_DEVID,
1170 devid, DATA_TYPE_STRING, NULL) != 0)
1171 devid[0] = '\0';
1172
1173 if (strlen(bootpath) >= MAXPATHLEN ||
1174 strlen(devid) >= MAXPATHLEN)
1175 return (ERR_WONT_FIT);
1176
1177 return (0);
1178
1179 } else if (strcmp(type, VDEV_TYPE_MIRROR) == 0 ||
1180 strcmp(type, VDEV_TYPE_REPLACING) == 0 ||
1181 (is_spare = (strcmp(type, VDEV_TYPE_SPARE) == 0))) {
1182 int nelm, i;
1183 char *child;
1184
1185 if (nvlist_lookup_value(nv, ZPOOL_CONFIG_CHILDREN, &child,
1186 DATA_TYPE_NVLIST_ARRAY, &nelm))
1187 return (ERR_FSYS_CORRUPT);
1188
1189 for (i = 0; i < nelm; i++) {
1190 char *child_i;
1191
1192 child_i = nvlist_array(child, i);
1193 if (vdev_get_bootpath(child_i, inguid, devid,
1194 bootpath, is_spare) == 0)
1195 return (0);
1196 }
1197 }
1198
1199 return (ERR_NO_BOOTPATH);
1200 }
1201
1202 /*
1203 * Check the disk label information and retrieve needed vdev name-value pairs.
1204 *
1205 * Return:
1206 * 0 - success
1207 * ERR_* - failure
1208 */
1209 int
check_pool_label(uint64_t sector,char * stack,char * outdevid,char * outpath,uint64_t * outguid)1210 check_pool_label(uint64_t sector, char *stack, char *outdevid,
1211 char *outpath, uint64_t *outguid)
1212 {
1213 vdev_phys_t *vdev;
1214 uint64_t pool_state, txg = 0;
1215 char *nvlist, *nv;
1216 uint64_t diskguid;
1217 uint64_t version;
1218
1219 sector += (VDEV_SKIP_SIZE >> SPA_MINBLOCKSHIFT);
1220
1221 /* Read in the vdev name-value pair list (112K). */
1222 if (devread(sector, 0, VDEV_PHYS_SIZE, stack) == 0)
1223 return (ERR_READ);
1224
1225 vdev = (vdev_phys_t *)stack;
1226 stack += sizeof (vdev_phys_t);
1227
1228 if (nvlist_unpack(vdev->vp_nvlist, &nvlist))
1229 return (ERR_FSYS_CORRUPT);
1230
1231 if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_POOL_STATE, &pool_state,
1232 DATA_TYPE_UINT64, NULL))
1233 return (ERR_FSYS_CORRUPT);
1234
1235 if (pool_state == POOL_STATE_DESTROYED)
1236 return (ERR_FILESYSTEM_NOT_FOUND);
1237
1238 if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_POOL_NAME,
1239 current_rootpool, DATA_TYPE_STRING, NULL))
1240 return (ERR_FSYS_CORRUPT);
1241
1242 if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_POOL_TXG, &txg,
1243 DATA_TYPE_UINT64, NULL))
1244 return (ERR_FSYS_CORRUPT);
1245
1246 /* not an active device */
1247 if (txg == 0)
1248 return (ERR_NO_BOOTPATH);
1249
1250 if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_VERSION, &version,
1251 DATA_TYPE_UINT64, NULL))
1252 return (ERR_FSYS_CORRUPT);
1253 if (version > SPA_VERSION)
1254 return (ERR_NEWER_VERSION);
1255 if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_VDEV_TREE, &nv,
1256 DATA_TYPE_NVLIST, NULL))
1257 return (ERR_FSYS_CORRUPT);
1258 if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_GUID, &diskguid,
1259 DATA_TYPE_UINT64, NULL))
1260 return (ERR_FSYS_CORRUPT);
1261 if (vdev_get_bootpath(nv, diskguid, outdevid, outpath, 0))
1262 return (ERR_NO_BOOTPATH);
1263 if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_POOL_GUID, outguid,
1264 DATA_TYPE_UINT64, NULL))
1265 return (ERR_FSYS_CORRUPT);
1266 return (0);
1267 }
1268
1269 /*
1270 * zfs_mount() locates a valid uberblock of the root pool and read in its MOS
1271 * to the memory address MOS.
1272 *
1273 * Return:
1274 * 1 - success
1275 * 0 - failure
1276 */
1277 int
zfs_mount(void)1278 zfs_mount(void)
1279 {
1280 char *stack;
1281 int label = 0;
1282 uberblock_phys_t *ub_array, *ubbest;
1283 objset_phys_t *osp;
1284 char tmp_bootpath[MAXNAMELEN];
1285 char tmp_devid[MAXNAMELEN];
1286 uint64_t tmp_guid;
1287 uint64_t adjpl = (uint64_t)part_length << SPA_MINBLOCKSHIFT;
1288 int err = errnum; /* preserve previous errnum state */
1289
1290 /* if it's our first time here, zero the best uberblock out */
1291 if (best_drive == 0 && best_part == 0 && find_best_root) {
1292 grub_memset(¤t_uberblock, 0, sizeof (uberblock_t));
1293 pool_guid = 0;
1294 }
1295
1296 stackbase = ZFS_SCRATCH;
1297 stack = stackbase;
1298 ub_array = (uberblock_phys_t *)stack;
1299 stack += VDEV_UBERBLOCK_RING;
1300
1301 osp = (objset_phys_t *)stack;
1302 stack += sizeof (objset_phys_t);
1303 adjpl = P2ALIGN(adjpl, (uint64_t)sizeof (vdev_label_t));
1304
1305 for (label = 0; label < VDEV_LABELS; label++) {
1306
1307 uint64_t sector;
1308
1309 /*
1310 * some eltorito stacks don't give us a size and
1311 * we end up setting the size to MAXUINT, further
1312 * some of these devices stop working once a single
1313 * read past the end has been issued. Checking
1314 * for a maximum part_length and skipping the backup
1315 * labels at the end of the slice/partition/device
1316 * avoids breaking down on such devices.
1317 */
1318 if (part_length == MAXUINT && label == 2)
1319 break;
1320
1321 sector = vdev_label_start(adjpl,
1322 label) >> SPA_MINBLOCKSHIFT;
1323
1324 /* Read in the uberblock ring (128K). */
1325 if (devread(sector +
1326 ((VDEV_SKIP_SIZE + VDEV_PHYS_SIZE) >>
1327 SPA_MINBLOCKSHIFT), 0, VDEV_UBERBLOCK_RING,
1328 (char *)ub_array) == 0)
1329 continue;
1330
1331 if ((ubbest = find_bestub(ub_array, sector)) != NULL &&
1332 zio_read(&ubbest->ubp_uberblock.ub_rootbp, osp, stack)
1333 == 0) {
1334
1335 VERIFY_OS_TYPE(osp, DMU_OST_META);
1336
1337 if (check_pool_label(sector, stack, tmp_devid,
1338 tmp_bootpath, &tmp_guid))
1339 continue;
1340 if (pool_guid == 0)
1341 pool_guid = tmp_guid;
1342
1343 if (find_best_root && ((pool_guid != tmp_guid) ||
1344 vdev_uberblock_compare(&ubbest->ubp_uberblock,
1345 &(current_uberblock)) <= 0))
1346 continue;
1347
1348 /* Got the MOS. Save it at the memory addr MOS. */
1349 grub_memmove(MOS, &osp->os_meta_dnode, DNODE_SIZE);
1350 grub_memmove(¤t_uberblock,
1351 &ubbest->ubp_uberblock, sizeof (uberblock_t));
1352 grub_memmove(current_bootpath, tmp_bootpath,
1353 MAXNAMELEN);
1354 grub_memmove(current_devid, tmp_devid,
1355 grub_strlen(tmp_devid));
1356 is_zfs_mount = 1;
1357 return (1);
1358 }
1359 }
1360
1361 /*
1362 * While some fs impls. (tftp) rely on setting and keeping
1363 * global errnums set, others won't reset it and will break
1364 * when issuing rawreads. The goal here is to simply not
1365 * have zfs mount attempts impact the previous state.
1366 */
1367 errnum = err;
1368 return (0);
1369 }
1370
1371 /*
1372 * zfs_open() locates a file in the rootpool by following the
1373 * MOS and places the dnode of the file in the memory address DNODE.
1374 *
1375 * Return:
1376 * 1 - success
1377 * 0 - failure
1378 */
1379 int
zfs_open(char * filename)1380 zfs_open(char *filename)
1381 {
1382 char *stack;
1383 dnode_phys_t *mdn;
1384
1385 file_buf = NULL;
1386 stackbase = ZFS_SCRATCH;
1387 stack = stackbase;
1388
1389 mdn = (dnode_phys_t *)stack;
1390 stack += sizeof (dnode_phys_t);
1391
1392 dnode_mdn = NULL;
1393 dnode_buf = (dnode_phys_t *)stack;
1394 stack += 1<<DNODE_BLOCK_SHIFT;
1395
1396 /*
1397 * menu.lst is placed at the root pool filesystem level,
1398 * do not goto 'current_bootfs'.
1399 */
1400 if (is_top_dataset_file(filename)) {
1401 if ((errnum = get_objset_mdn(MOS, NULL, NULL, mdn, stack)))
1402 return (0);
1403
1404 current_bootfs_obj = 0;
1405 } else {
1406 if (current_bootfs[0] == '\0') {
1407 /* Get the default root filesystem object number */
1408 if ((errnum = get_default_bootfsobj(MOS,
1409 ¤t_bootfs_obj, stack)))
1410 return (0);
1411
1412 if ((errnum = get_objset_mdn(MOS, NULL,
1413 ¤t_bootfs_obj, mdn, stack)))
1414 return (0);
1415 } else {
1416 if ((errnum = get_objset_mdn(MOS, current_bootfs,
1417 ¤t_bootfs_obj, mdn, stack))) {
1418 grub_memset(current_bootfs, 0, MAXNAMELEN);
1419 return (0);
1420 }
1421 }
1422 }
1423
1424 if (dnode_get_path(mdn, filename, DNODE, stack)) {
1425 errnum = ERR_FILE_NOT_FOUND;
1426 return (0);
1427 }
1428
1429 /* get the file size and set the file position to 0 */
1430
1431 /*
1432 * For DMU_OT_SA we will need to locate the SIZE attribute
1433 * attribute, which could be either in the bonus buffer
1434 * or the "spill" block.
1435 */
1436 if (DNODE->dn_bonustype == DMU_OT_SA) {
1437 sa_hdr_phys_t *sahdrp;
1438 int hdrsize;
1439
1440 if (DNODE->dn_bonuslen != 0) {
1441 sahdrp = (sa_hdr_phys_t *)DN_BONUS(DNODE);
1442 } else {
1443 if (DNODE->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
1444 blkptr_t *bp = &DNODE->dn_spill;
1445 void *buf;
1446
1447 buf = (void *)stack;
1448 stack += BP_GET_LSIZE(bp);
1449
1450 /* reset errnum to rawread() failure */
1451 errnum = 0;
1452 if (zio_read(bp, buf, stack) != 0) {
1453 return (0);
1454 }
1455 sahdrp = buf;
1456 } else {
1457 errnum = ERR_FSYS_CORRUPT;
1458 return (0);
1459 }
1460 }
1461 hdrsize = SA_HDR_SIZE(sahdrp);
1462 filemax = *(uint64_t *)((char *)sahdrp + hdrsize +
1463 SA_SIZE_OFFSET);
1464 } else {
1465 filemax = ((znode_phys_t *)DN_BONUS(DNODE))->zp_size;
1466 }
1467 filepos = 0;
1468
1469 dnode_buf = NULL;
1470 return (1);
1471 }
1472
1473 /*
1474 * zfs_read reads in the data blocks pointed by the DNODE.
1475 *
1476 * Return:
1477 * len - the length successfully read in to the buffer
1478 * 0 - failure
1479 */
1480 int
zfs_read(char * buf,int len)1481 zfs_read(char *buf, int len)
1482 {
1483 char *stack;
1484 int blksz, length, movesize;
1485
1486 if (file_buf == NULL) {
1487 file_buf = stackbase;
1488 stackbase += SPA_MAXBLOCKSIZE;
1489 file_start = file_end = 0;
1490 }
1491 stack = stackbase;
1492
1493 /*
1494 * If offset is in memory, move it into the buffer provided and return.
1495 */
1496 if (filepos >= file_start && filepos+len <= file_end) {
1497 grub_memmove(buf, file_buf + filepos - file_start, len);
1498 filepos += len;
1499 return (len);
1500 }
1501
1502 blksz = DNODE->dn_datablkszsec << SPA_MINBLOCKSHIFT;
1503
1504 /*
1505 * Entire Dnode is too big to fit into the space available. We
1506 * will need to read it in chunks. This could be optimized to
1507 * read in as large a chunk as there is space available, but for
1508 * now, this only reads in one data block at a time.
1509 */
1510 length = len;
1511 while (length) {
1512 /*
1513 * Find requested blkid and the offset within that block.
1514 */
1515 uint64_t blkid = filepos / blksz;
1516
1517 if ((errnum = dmu_read(DNODE, blkid, file_buf, stack)))
1518 return (0);
1519
1520 file_start = blkid * blksz;
1521 file_end = file_start + blksz;
1522
1523 movesize = MIN(length, file_end - filepos);
1524
1525 grub_memmove(buf, file_buf + filepos - file_start,
1526 movesize);
1527 buf += movesize;
1528 length -= movesize;
1529 filepos += movesize;
1530 }
1531
1532 return (len);
1533 }
1534
1535 /*
1536 * No-Op
1537 */
1538 int
zfs_embed(int * start_sector,int needed_sectors)1539 zfs_embed(int *start_sector, int needed_sectors)
1540 {
1541 return (1);
1542 }
1543
1544 #endif /* FSYS_ZFS */
1545