1 #include <xen/init.h>
2 #include <xen/compile.h>
3 #include <xen/lib.h>
4 #include <xen/mm.h>
5 #include <xen/domain_page.h>
6 #include <xen/sched.h>
7 #include <asm/irq.h>
8 #include <asm/regs.h>
9 #include <xen/errno.h>
10 #include <xen/device_tree.h>
11 #include <xen/libfdt/libfdt.h>
12 #include <xen/guest_access.h>
13 #include <xen/iocap.h>
14 #include <xen/acpi.h>
15 #include <xen/warning.h>
16 #include <acpi/actables.h>
17 #include <asm/device.h>
18 #include <asm/setup.h>
19 #include <asm/platform.h>
20 #include <asm/psci.h>
21 #include <asm/setup.h>
22 #include <asm/cpufeature.h>
23
24 #include <asm/gic.h>
25 #include <xen/irq.h>
26 #include <xen/grant_table.h>
27 #include "kernel.h"
28
29 static unsigned int __initdata opt_dom0_max_vcpus;
30 integer_param("dom0_max_vcpus", opt_dom0_max_vcpus);
31
32 int dom0_11_mapping = 1;
33
34 static u64 __initdata dom0_mem;
35
parse_dom0_mem(const char * s)36 static int __init parse_dom0_mem(const char *s)
37 {
38 dom0_mem = parse_size_and_unit(s, &s);
39
40 return *s ? -EINVAL : 0;
41 }
42 custom_param("dom0_mem", parse_dom0_mem);
43
44 struct map_range_data
45 {
46 struct domain *d;
47 p2m_type_t p2mt;
48 };
49
50 /* Override macros from asm/page.h to make them work with mfn_t */
51 #undef virt_to_mfn
52 #define virt_to_mfn(va) _mfn(__virt_to_mfn(va))
53
54 //#define DEBUG_11_ALLOCATION
55 #ifdef DEBUG_11_ALLOCATION
56 # define D11PRINT(fmt, args...) printk(XENLOG_DEBUG fmt, ##args)
57 #else
58 # define D11PRINT(fmt, args...) do {} while ( 0 )
59 #endif
60
61 /*
62 * Amount of extra space required to dom0's device tree. No new nodes
63 * are added (yet) but one terminating reserve map entry (16 bytes) is
64 * added.
65 */
66 #define DOM0_FDT_EXTRA_SIZE (128 + sizeof(struct fdt_reserve_entry))
67
alloc_dom0_vcpu0(struct domain * dom0)68 struct vcpu *__init alloc_dom0_vcpu0(struct domain *dom0)
69 {
70 if ( opt_dom0_max_vcpus == 0 )
71 opt_dom0_max_vcpus = num_online_cpus();
72 if ( opt_dom0_max_vcpus > MAX_VIRT_CPUS )
73 opt_dom0_max_vcpus = MAX_VIRT_CPUS;
74
75 dom0->vcpu = xzalloc_array(struct vcpu *, opt_dom0_max_vcpus);
76 if ( !dom0->vcpu )
77 return NULL;
78 dom0->max_vcpus = opt_dom0_max_vcpus;
79
80 return alloc_vcpu(dom0, 0, 0);
81 }
82
get_11_allocation_size(paddr_t size)83 static unsigned int get_11_allocation_size(paddr_t size)
84 {
85 /*
86 * get_order_from_bytes returns the order greater than or equal to
87 * the given size, but we need less than or equal. Adding one to
88 * the size pushes an evenly aligned size into the next order, so
89 * we can then unconditionally subtract 1 from the order which is
90 * returned.
91 */
92 return get_order_from_bytes(size + 1) - 1;
93 }
94
95 /*
96 * Insert the given pages into a memory bank, banks are ordered by address.
97 *
98 * Returns false if the memory would be below bank 0 or we have run
99 * out of banks. In this case it will free the pages.
100 */
insert_11_bank(struct domain * d,struct kernel_info * kinfo,struct page_info * pg,unsigned int order)101 static bool insert_11_bank(struct domain *d,
102 struct kernel_info *kinfo,
103 struct page_info *pg,
104 unsigned int order)
105 {
106 int res, i;
107 paddr_t spfn;
108 paddr_t start, size;
109
110 spfn = page_to_mfn(pg);
111 start = pfn_to_paddr(spfn);
112 size = pfn_to_paddr((1 << order));
113
114 D11PRINT("Allocated %#"PRIpaddr"-%#"PRIpaddr" (%ldMB/%ldMB, order %d)\n",
115 start, start + size,
116 1UL << (order+PAGE_SHIFT-20),
117 /* Don't want format this as PRIpaddr (16 digit hex) */
118 (unsigned long)(kinfo->unassigned_mem >> 20),
119 order);
120
121 if ( kinfo->mem.nr_banks > 0 &&
122 size < MB(128) &&
123 start + size < kinfo->mem.bank[0].start )
124 {
125 D11PRINT("Allocation below bank 0 is too small, not using\n");
126 goto fail;
127 }
128
129 res = guest_physmap_add_page(d, _gfn(spfn), _mfn(spfn), order);
130 if ( res )
131 panic("Failed map pages to DOM0: %d", res);
132
133 kinfo->unassigned_mem -= size;
134
135 if ( kinfo->mem.nr_banks == 0 )
136 {
137 kinfo->mem.bank[0].start = start;
138 kinfo->mem.bank[0].size = size;
139 kinfo->mem.nr_banks = 1;
140 return true;
141 }
142
143 for( i = 0; i < kinfo->mem.nr_banks; i++ )
144 {
145 struct membank *bank = &kinfo->mem.bank[i];
146
147 /* If possible merge new memory into the start of the bank */
148 if ( bank->start == start+size )
149 {
150 bank->start = start;
151 bank->size += size;
152 return true;
153 }
154
155 /* If possible merge new memory onto the end of the bank */
156 if ( start == bank->start + bank->size )
157 {
158 bank->size += size;
159 return true;
160 }
161
162 /*
163 * Otherwise if it is below this bank insert new memory in a
164 * new bank before this one. If there was a lower bank we
165 * could have inserted the memory into/before we would already
166 * have done so, so this must be the right place.
167 */
168 if ( start + size < bank->start && kinfo->mem.nr_banks < NR_MEM_BANKS )
169 {
170 memmove(bank + 1, bank, sizeof(*bank)*(kinfo->mem.nr_banks - i));
171 kinfo->mem.nr_banks++;
172 bank->start = start;
173 bank->size = size;
174 return true;
175 }
176 }
177
178 if ( i == kinfo->mem.nr_banks && kinfo->mem.nr_banks < NR_MEM_BANKS )
179 {
180 struct membank *bank = &kinfo->mem.bank[kinfo->mem.nr_banks];
181
182 bank->start = start;
183 bank->size = size;
184 kinfo->mem.nr_banks++;
185 return true;
186 }
187
188 /* If we get here then there are no more banks to fill. */
189
190 fail:
191 free_domheap_pages(pg, order);
192 return false;
193 }
194
195 /*
196 * This is all pretty horrible.
197 *
198 * Requirements:
199 *
200 * 1. The dom0 kernel should be loaded within the first 128MB of RAM. This
201 * is necessary at least for Linux zImage kernels, which are all we
202 * support today.
203 * 2. We want to put the dom0 kernel, ramdisk and DTB in the same
204 * bank. Partly this is just easier for us to deal with, but also
205 * the ramdisk and DTB must be placed within a certain proximity of
206 * the kernel within RAM.
207 * 3. For dom0 we want to place as much of the RAM as we reasonably can
208 * below 4GB, so that it can be used by non-LPAE enabled kernels (32-bit)
209 * or when a device assigned to dom0 can only do 32-bit DMA access.
210 * 4. For 32-bit dom0 the kernel must be located below 4GB.
211 * 5. We want to have a few largers banks rather than many smaller ones.
212 *
213 * For the first two requirements we need to make sure that the lowest
214 * bank is sufficiently large.
215 *
216 * For convenience we also sort the banks by physical address.
217 *
218 * The memory allocator does not really give us the flexibility to
219 * meet these requirements directly. So instead of proceed as follows:
220 *
221 * We first allocate the largest allocation we can as low as we
222 * can. This then becomes the first bank. This bank must be at least
223 * 128MB (or dom0_mem if that is smaller).
224 *
225 * Then we start allocating more memory, trying to allocate the
226 * largest possible size and trying smaller sizes until we
227 * successfully allocate something.
228 *
229 * We then try and insert this memory in to the list of banks. If it
230 * can be merged into an existing bank then this is trivial.
231 *
232 * If the new memory is before the first bank (and cannot be merged into it)
233 * and is at least 128M then we allow it, otherwise we give up. Since the
234 * allocator prefers to allocate high addresses first and the first bank has
235 * already been allocated to be as low as possible this likely means we
236 * wouldn't have been able to allocate much more memory anyway.
237 *
238 * Otherwise we insert a new bank. If we've reached MAX_NR_BANKS then
239 * we give up.
240 *
241 * For 32-bit domain we require that the initial allocation for the
242 * first bank is under 4G. For 64-bit domain, the first bank is preferred
243 * to be allocated under 4G. Then for the subsequent allocations we
244 * initially allocate memory only from below 4GB. Once that runs out
245 * (as described above) we allow higher allocations and continue until
246 * that runs out (or we have allocated sufficient dom0 memory).
247 */
allocate_memory(struct domain * d,struct kernel_info * kinfo)248 static void allocate_memory(struct domain *d, struct kernel_info *kinfo)
249 {
250 const unsigned int min_low_order =
251 get_order_from_bytes(min_t(paddr_t, dom0_mem, MB(128)));
252 const unsigned int min_order = get_order_from_bytes(MB(4));
253 struct page_info *pg;
254 unsigned int order = get_11_allocation_size(kinfo->unassigned_mem);
255 int i;
256
257 bool lowmem = true;
258 unsigned int bits;
259
260 /*
261 * TODO: Implement memory bank allocation when DOM0 is not direct
262 * mapped
263 */
264 BUG_ON(!dom0_11_mapping);
265
266 printk("Allocating 1:1 mappings totalling %ldMB for dom0:\n",
267 /* Don't want format this as PRIpaddr (16 digit hex) */
268 (unsigned long)(kinfo->unassigned_mem >> 20));
269
270 kinfo->mem.nr_banks = 0;
271
272 /*
273 * First try and allocate the largest thing we can as low as
274 * possible to be bank 0.
275 */
276 while ( order >= min_low_order )
277 {
278 for ( bits = order ; bits <= (lowmem ? 32 : PADDR_BITS); bits++ )
279 {
280 pg = alloc_domheap_pages(d, order, MEMF_bits(bits));
281 if ( pg != NULL )
282 {
283 if ( !insert_11_bank(d, kinfo, pg, order) )
284 BUG(); /* Cannot fail for first bank */
285
286 goto got_bank0;
287 }
288 }
289 order--;
290 }
291
292 /* Failed to allocate bank0 under 4GB */
293 if ( is_32bit_domain(d) )
294 panic("Unable to allocate first memory bank.");
295
296 /* Try to allocate memory from above 4GB */
297 printk(XENLOG_INFO "No bank has been allocated below 4GB.\n");
298 lowmem = false;
299
300 got_bank0:
301
302 /*
303 * If we failed to allocate bank0 under 4GB, continue allocating
304 * memory from above 4GB and fill in banks.
305 */
306 order = get_11_allocation_size(kinfo->unassigned_mem);
307 while ( kinfo->unassigned_mem && kinfo->mem.nr_banks < NR_MEM_BANKS )
308 {
309 pg = alloc_domheap_pages(d, order, lowmem ? MEMF_bits(32) : 0);
310 if ( !pg )
311 {
312 order --;
313
314 if ( lowmem && order < min_low_order)
315 {
316 D11PRINT("Failed at min_low_order, allow high allocations\n");
317 order = get_11_allocation_size(kinfo->unassigned_mem);
318 lowmem = false;
319 continue;
320 }
321 if ( order >= min_order )
322 continue;
323
324 /* No more we can do */
325 break;
326 }
327
328 if ( !insert_11_bank(d, kinfo, pg, order) )
329 {
330 if ( kinfo->mem.nr_banks == NR_MEM_BANKS )
331 /* Nothing more we can do. */
332 break;
333
334 if ( lowmem )
335 {
336 D11PRINT("Allocation below bank 0, allow high allocations\n");
337 order = get_11_allocation_size(kinfo->unassigned_mem);
338 lowmem = false;
339 continue;
340 }
341 else
342 {
343 D11PRINT("Allocation below bank 0\n");
344 break;
345 }
346 }
347
348 /*
349 * Success, next time around try again to get the largest order
350 * allocation possible.
351 */
352 order = get_11_allocation_size(kinfo->unassigned_mem);
353 }
354
355 if ( kinfo->unassigned_mem )
356 printk("WARNING: Failed to allocate requested dom0 memory."
357 /* Don't want format this as PRIpaddr (16 digit hex) */
358 " %ldMB unallocated\n",
359 (unsigned long)kinfo->unassigned_mem >> 20);
360
361 for( i = 0; i < kinfo->mem.nr_banks; i++ )
362 {
363 printk("BANK[%d] %#"PRIpaddr"-%#"PRIpaddr" (%ldMB)\n",
364 i,
365 kinfo->mem.bank[i].start,
366 kinfo->mem.bank[i].start + kinfo->mem.bank[i].size,
367 /* Don't want format this as PRIpaddr (16 digit hex) */
368 (unsigned long)(kinfo->mem.bank[i].size >> 20));
369 }
370 }
371
write_properties(struct domain * d,struct kernel_info * kinfo,const struct dt_device_node * node)372 static int write_properties(struct domain *d, struct kernel_info *kinfo,
373 const struct dt_device_node *node)
374 {
375 const char *bootargs = NULL;
376 const struct dt_property *prop, *status = NULL;
377 int res = 0;
378 int had_dom0_bootargs = 0;
379
380 const struct bootmodule *kernel = kinfo->kernel_bootmodule;
381
382 if ( kernel && kernel->cmdline[0] )
383 bootargs = &kernel->cmdline[0];
384
385 dt_for_each_property_node (node, prop)
386 {
387 const void *prop_data = prop->value;
388 u32 prop_len = prop->length;
389
390 /*
391 * In chosen node:
392 *
393 * * remember xen,dom0-bootargs if we don't already have
394 * bootargs (from module #1, above).
395 * * remove bootargs, xen,dom0-bootargs, xen,xen-bootargs,
396 * linux,initrd-start and linux,initrd-end.
397 * * remove stdout-path.
398 * * remove bootargs, linux,uefi-system-table,
399 * linux,uefi-mmap-start, linux,uefi-mmap-size,
400 * linux,uefi-mmap-desc-size, and linux,uefi-mmap-desc-ver
401 * (since EFI boot is not currently supported in dom0).
402 */
403 if ( dt_node_path_is_equal(node, "/chosen") )
404 {
405 if ( dt_property_name_is_equal(prop, "xen,xen-bootargs") ||
406 dt_property_name_is_equal(prop, "linux,initrd-start") ||
407 dt_property_name_is_equal(prop, "linux,initrd-end") ||
408 dt_property_name_is_equal(prop, "stdout-path") ||
409 dt_property_name_is_equal(prop, "linux,uefi-system-table") ||
410 dt_property_name_is_equal(prop, "linux,uefi-mmap-start") ||
411 dt_property_name_is_equal(prop, "linux,uefi-mmap-size") ||
412 dt_property_name_is_equal(prop, "linux,uefi-mmap-desc-size") ||
413 dt_property_name_is_equal(prop, "linux,uefi-mmap-desc-ver"))
414 continue;
415
416 if ( dt_property_name_is_equal(prop, "xen,dom0-bootargs") )
417 {
418 had_dom0_bootargs = 1;
419 bootargs = prop->value;
420 continue;
421 }
422 if ( dt_property_name_is_equal(prop, "bootargs") )
423 {
424 if ( !bootargs && !had_dom0_bootargs )
425 bootargs = prop->value;
426 continue;
427 }
428 }
429
430 /* Don't expose the property "xen,passthrough" to the guest */
431 if ( dt_property_name_is_equal(prop, "xen,passthrough") )
432 continue;
433
434 /* Remember and skip the status property as Xen may modify it later */
435 if ( dt_property_name_is_equal(prop, "status") )
436 {
437 status = prop;
438 continue;
439 }
440
441 res = fdt_property(kinfo->fdt, prop->name, prop_data, prop_len);
442
443 if ( res )
444 return res;
445 }
446
447 /*
448 * Override the property "status" to disable the device when it's
449 * marked for passthrough.
450 */
451 if ( dt_device_for_passthrough(node) )
452 res = fdt_property_string(kinfo->fdt, "status", "disabled");
453 else if ( status )
454 res = fdt_property(kinfo->fdt, "status", status->value,
455 status->length);
456
457 if ( res )
458 return res;
459
460 if ( dt_node_path_is_equal(node, "/chosen") )
461 {
462 const struct bootmodule *initrd = kinfo->initrd_bootmodule;
463
464 if ( bootargs )
465 {
466 res = fdt_property(kinfo->fdt, "bootargs", bootargs,
467 strlen(bootargs) + 1);
468 if ( res )
469 return res;
470 }
471
472 /*
473 * If the bootloader provides an initrd, we must create a placeholder
474 * for the initrd properties. The values will be replaced later.
475 */
476 if ( initrd && initrd->size )
477 {
478 u64 a = 0;
479 res = fdt_property(kinfo->fdt, "linux,initrd-start", &a, sizeof(a));
480 if ( res )
481 return res;
482
483 res = fdt_property(kinfo->fdt, "linux,initrd-end", &a, sizeof(a));
484 if ( res )
485 return res;
486 }
487 }
488
489 return 0;
490 }
491
492 /*
493 * Helper to write an interrupts with the GIC format
494 * This code is assuming the irq is an PPI.
495 */
496
497 typedef __be32 gic_interrupt_t[3];
498
set_interrupt_ppi(gic_interrupt_t interrupt,unsigned int irq,unsigned int cpumask,unsigned int level)499 static void set_interrupt_ppi(gic_interrupt_t interrupt, unsigned int irq,
500 unsigned int cpumask, unsigned int level)
501 {
502 __be32 *cells = interrupt;
503
504 BUG_ON(irq < 16 && irq >= 32);
505
506 /* See linux Documentation/devictree/bindings/arm/gic.txt */
507 dt_set_cell(&cells, 1, 1); /* is a PPI */
508 dt_set_cell(&cells, 1, irq - 16); /* PPIs start at 16 */
509 dt_set_cell(&cells, 1, (cpumask << 8) | level);
510 }
511
512 /*
513 * Helper to set interrupts for a node in the flat device tree.
514 * It needs 2 property:
515 * "interrupts": contains the list of interrupts
516 * "interrupt-parent": link to the GIC
517 */
fdt_property_interrupts(void * fdt,gic_interrupt_t * intr,unsigned num_irq)518 static int fdt_property_interrupts(void *fdt, gic_interrupt_t *intr,
519 unsigned num_irq)
520 {
521 int res;
522
523 res = fdt_property(fdt, "interrupts", intr, sizeof (intr[0]) * num_irq);
524 if ( res )
525 return res;
526
527 res = fdt_property_cell(fdt, "interrupt-parent",
528 dt_interrupt_controller->phandle);
529
530 return res;
531 }
532
make_memory_node(const struct domain * d,void * fdt,const struct dt_device_node * parent,const struct kernel_info * kinfo)533 static int make_memory_node(const struct domain *d,
534 void *fdt,
535 const struct dt_device_node *parent,
536 const struct kernel_info *kinfo)
537 {
538 int res, i;
539 int reg_size = dt_child_n_addr_cells(parent) + dt_child_n_size_cells(parent);
540 int nr_cells = reg_size*kinfo->mem.nr_banks;
541 __be32 reg[nr_cells];
542 __be32 *cells;
543
544 dt_dprintk("Create memory node (reg size %d, nr cells %d)\n",
545 reg_size, nr_cells);
546
547 /* ePAPR 3.4 */
548 res = fdt_begin_node(fdt, "memory");
549 if ( res )
550 return res;
551
552 res = fdt_property_string(fdt, "device_type", "memory");
553 if ( res )
554 return res;
555
556 cells = ®[0];
557 for ( i = 0 ; i < kinfo->mem.nr_banks; i++ )
558 {
559 u64 start = kinfo->mem.bank[i].start;
560 u64 size = kinfo->mem.bank[i].size;
561
562 dt_dprintk(" Bank %d: %#"PRIx64"->%#"PRIx64"\n",
563 i, start, start + size);
564
565 dt_child_set_range(&cells, parent, start, size);
566 }
567
568 res = fdt_property(fdt, "reg", reg, sizeof(reg));
569 if ( res )
570 return res;
571
572 res = fdt_end_node(fdt);
573
574 return res;
575 }
576
make_hypervisor_node(const struct kernel_info * kinfo,const struct dt_device_node * parent)577 static int make_hypervisor_node(const struct kernel_info *kinfo,
578 const struct dt_device_node *parent)
579 {
580 const char compat[] =
581 "xen,xen-"__stringify(XEN_VERSION)"."__stringify(XEN_SUBVERSION)"\0"
582 "xen,xen";
583 __be32 reg[4];
584 gic_interrupt_t intr;
585 __be32 *cells;
586 int res;
587 /* Convenience alias */
588 int addrcells = dt_child_n_addr_cells(parent);
589 int sizecells = dt_child_n_size_cells(parent);
590 void *fdt = kinfo->fdt;
591
592 dt_dprintk("Create hypervisor node\n");
593
594 /*
595 * Sanity-check address sizes, since addresses and sizes which do
596 * not take up exactly 4 or 8 bytes are not supported.
597 */
598 if ((addrcells != 1 && addrcells != 2) ||
599 (sizecells != 1 && sizecells != 2))
600 panic("Cannot cope with this size");
601
602 /* See linux Documentation/devicetree/bindings/arm/xen.txt */
603 res = fdt_begin_node(fdt, "hypervisor");
604 if ( res )
605 return res;
606
607 /* Cannot use fdt_property_string due to embedded nulls */
608 res = fdt_property(fdt, "compatible", compat, sizeof(compat));
609 if ( res )
610 return res;
611
612 /* reg 0 is grant table space */
613 cells = ®[0];
614 dt_child_set_range(&cells, parent, kinfo->gnttab_start, kinfo->gnttab_size);
615 res = fdt_property(fdt, "reg", reg,
616 dt_cells_to_size(addrcells + sizecells));
617 if ( res )
618 return res;
619
620 /*
621 * Placeholder for the event channel interrupt. The values will be
622 * replaced later.
623 */
624 set_interrupt_ppi(intr, ~0, 0xf, IRQ_TYPE_INVALID);
625 res = fdt_property_interrupts(fdt, &intr, 1);
626 if ( res )
627 return res;
628
629 res = fdt_end_node(fdt);
630
631 return res;
632 }
633
make_psci_node(void * fdt,const struct dt_device_node * parent)634 static int make_psci_node(void *fdt, const struct dt_device_node *parent)
635 {
636 int res;
637 const char compat[] =
638 "arm,psci-0.2""\0"
639 "arm,psci";
640
641 dt_dprintk("Create PSCI node\n");
642
643 /* See linux Documentation/devicetree/bindings/arm/psci.txt */
644 res = fdt_begin_node(fdt, "psci");
645 if ( res )
646 return res;
647
648 res = fdt_property(fdt, "compatible", compat, sizeof(compat));
649 if ( res )
650 return res;
651
652 res = fdt_property_string(fdt, "method", "hvc");
653 if ( res )
654 return res;
655
656 res = fdt_property_cell(fdt, "cpu_off", PSCI_cpu_off);
657 if ( res )
658 return res;
659
660 res = fdt_property_cell(fdt, "cpu_on", PSCI_cpu_on);
661 if ( res )
662 return res;
663
664 res = fdt_end_node(fdt);
665
666 return res;
667 }
668
make_cpus_node(const struct domain * d,void * fdt,const struct dt_device_node * parent)669 static int make_cpus_node(const struct domain *d, void *fdt,
670 const struct dt_device_node *parent)
671 {
672 int res;
673 const struct dt_device_node *cpus = dt_find_node_by_path("/cpus");
674 const struct dt_device_node *npcpu;
675 unsigned int cpu;
676 const void *compatible = NULL;
677 u32 len;
678 /* Placeholder for cpu@ + a 32-bit number + \0 */
679 char buf[15];
680 u32 clock_frequency;
681 bool clock_valid;
682 uint64_t mpidr_aff;
683
684 dt_dprintk("Create cpus node\n");
685
686 if ( !cpus )
687 {
688 dprintk(XENLOG_ERR, "Missing /cpus node in the device tree?\n");
689 return -ENOENT;
690 }
691
692 /*
693 * Get the compatible property of CPUs from the device tree.
694 * We are assuming that all CPUs are the same so we are just look
695 * for the first one.
696 * TODO: Handle compatible per VCPU
697 */
698 dt_for_each_child_node(cpus, npcpu)
699 {
700 if ( dt_device_type_is_equal(npcpu, "cpu") )
701 {
702 compatible = dt_get_property(npcpu, "compatible", &len);
703 clock_valid = dt_property_read_u32(npcpu, "clock-frequency",
704 &clock_frequency);
705 break;
706 }
707 }
708
709 if ( !compatible )
710 {
711 dprintk(XENLOG_ERR, "Can't find cpu in the device tree?\n");
712 return -ENOENT;
713 }
714
715 /* See Linux Documentation/devicetree/booting-without-of.txt
716 * section III.5.b
717 */
718 res = fdt_begin_node(fdt, "cpus");
719 if ( res )
720 return res;
721
722 res = fdt_property_cell(fdt, "#address-cells", 1);
723 if ( res )
724 return res;
725
726 res = fdt_property_cell(fdt, "#size-cells", 0);
727 if ( res )
728 return res;
729
730 for ( cpu = 0; cpu < d->max_vcpus; cpu++ )
731 {
732 /*
733 * According to ARM CPUs bindings, the reg field should match
734 * the MPIDR's affinity bits. We will use AFF0 and AFF1 when
735 * constructing the reg value of the guest at the moment, for it
736 * is enough for the current max vcpu number.
737 */
738 mpidr_aff = vcpuid_to_vaffinity(cpu);
739 dt_dprintk("Create cpu@%"PRIx64" (logical CPUID: %d) node\n",
740 mpidr_aff, cpu);
741
742 snprintf(buf, sizeof(buf), "cpu@%"PRIx64, mpidr_aff);
743 res = fdt_begin_node(fdt, buf);
744 if ( res )
745 return res;
746
747 res = fdt_property(fdt, "compatible", compatible, len);
748 if ( res )
749 return res;
750
751 res = fdt_property_string(fdt, "device_type", "cpu");
752 if ( res )
753 return res;
754
755 res = fdt_property_cell(fdt, "reg", mpidr_aff);
756 if ( res )
757 return res;
758
759 if ( clock_valid )
760 {
761 res = fdt_property_cell(fdt, "clock-frequency", clock_frequency);
762 if ( res )
763 return res;
764 }
765
766 if ( is_64bit_domain(d) )
767 {
768 res = fdt_property_string(fdt, "enable-method", "psci");
769 if ( res )
770 return res;
771 }
772
773 res = fdt_end_node(fdt);
774 if ( res )
775 return res;
776 }
777
778 res = fdt_end_node(fdt);
779
780 return res;
781 }
782
make_gic_node(const struct domain * d,void * fdt,const struct dt_device_node * node)783 static int make_gic_node(const struct domain *d, void *fdt,
784 const struct dt_device_node *node)
785 {
786 const struct dt_device_node *gic = dt_interrupt_controller;
787 int res = 0;
788 const void *addrcells, *sizecells;
789 u32 addrcells_len, sizecells_len;
790
791 /*
792 * Xen currently supports only a single GIC. Discard any secondary
793 * GIC entries.
794 */
795 if ( node != dt_interrupt_controller )
796 {
797 dt_dprintk(" Skipping (secondary GIC)\n");
798 return 0;
799 }
800
801 dt_dprintk("Create gic node\n");
802
803 res = fdt_begin_node(fdt, "interrupt-controller");
804 if ( res )
805 return res;
806
807 /*
808 * The value of the property "phandle" in the property "interrupts"
809 * to know on which interrupt controller the interrupt is wired.
810 */
811 if ( gic->phandle )
812 {
813 dt_dprintk(" Set phandle = 0x%x\n", gic->phandle);
814 res = fdt_property_cell(fdt, "phandle", gic->phandle);
815 if ( res )
816 return res;
817 }
818
819 addrcells = dt_get_property(gic, "#address-cells", &addrcells_len);
820 if ( addrcells )
821 {
822 res = fdt_property(fdt, "#address-cells", addrcells, addrcells_len);
823 if ( res )
824 return res;
825 }
826
827 sizecells = dt_get_property(gic, "#size-cells", &sizecells_len);
828 if ( sizecells )
829 {
830 res = fdt_property(fdt, "#size-cells", sizecells, sizecells_len);
831 if ( res )
832 return res;
833 }
834
835 res = fdt_property_cell(fdt, "#interrupt-cells", 3);
836 if ( res )
837 return res;
838
839 res = fdt_property(fdt, "interrupt-controller", NULL, 0);
840 if ( res )
841 return res;
842
843 res = gic_make_hwdom_dt_node(d, node, fdt);
844 if ( res )
845 return res;
846
847 res = fdt_end_node(fdt);
848
849 return res;
850 }
851
make_timer_node(const struct domain * d,void * fdt,const struct dt_device_node * node)852 static int make_timer_node(const struct domain *d, void *fdt,
853 const struct dt_device_node *node)
854 {
855 static const struct dt_device_match timer_ids[] __initconst =
856 {
857 DT_MATCH_COMPATIBLE("arm,armv7-timer"),
858 DT_MATCH_COMPATIBLE("arm,armv8-timer"),
859 { /* sentinel */ },
860 };
861 struct dt_device_node *dev;
862 u32 len;
863 const void *compatible;
864 int res;
865 unsigned int irq;
866 gic_interrupt_t intrs[3];
867 u32 clock_frequency;
868 bool clock_valid;
869
870 dt_dprintk("Create timer node\n");
871
872 dev = dt_find_matching_node(NULL, timer_ids);
873 if ( !dev )
874 {
875 dprintk(XENLOG_ERR, "Missing timer node in the device tree?\n");
876 return -FDT_ERR_XEN(ENOENT);
877 }
878
879 compatible = dt_get_property(dev, "compatible", &len);
880 if ( !compatible )
881 {
882 dprintk(XENLOG_ERR, "Can't find compatible property for timer node\n");
883 return -FDT_ERR_XEN(ENOENT);
884 }
885
886 res = fdt_begin_node(fdt, "timer");
887 if ( res )
888 return res;
889
890 res = fdt_property(fdt, "compatible", compatible, len);
891 if ( res )
892 return res;
893
894 /* The timer IRQ is emulated by Xen. It always exposes an active-low
895 * level-sensitive interrupt */
896
897 irq = timer_get_irq(TIMER_PHYS_SECURE_PPI);
898 dt_dprintk(" Secure interrupt %u\n", irq);
899 set_interrupt_ppi(intrs[0], irq, 0xf, IRQ_TYPE_LEVEL_LOW);
900
901 irq = timer_get_irq(TIMER_PHYS_NONSECURE_PPI);
902 dt_dprintk(" Non secure interrupt %u\n", irq);
903 set_interrupt_ppi(intrs[1], irq, 0xf, IRQ_TYPE_LEVEL_LOW);
904
905 irq = timer_get_irq(TIMER_VIRT_PPI);
906 dt_dprintk(" Virt interrupt %u\n", irq);
907 set_interrupt_ppi(intrs[2], irq, 0xf, IRQ_TYPE_LEVEL_LOW);
908
909 res = fdt_property_interrupts(fdt, intrs, 3);
910 if ( res )
911 return res;
912
913 clock_valid = dt_property_read_u32(dev, "clock-frequency",
914 &clock_frequency);
915 if ( clock_valid )
916 {
917 res = fdt_property_cell(fdt, "clock-frequency", clock_frequency);
918 if ( res )
919 return res;
920 }
921
922 res = fdt_end_node(fdt);
923
924 return res;
925 }
926
map_irq_to_domain(struct domain * d,unsigned int irq,bool need_mapping,const char * devname)927 static int map_irq_to_domain(struct domain *d, unsigned int irq,
928 bool need_mapping, const char *devname)
929
930 {
931 int res;
932
933 res = irq_permit_access(d, irq);
934 if ( res )
935 {
936 printk(XENLOG_ERR "Unable to permit to dom%u access to IRQ %u\n",
937 d->domain_id, irq);
938 return res;
939 }
940
941 if ( need_mapping )
942 {
943 /*
944 * Checking the return of vgic_reserve_virq is not
945 * necessary. It should not fail except when we try to map
946 * the IRQ twice. This can legitimately happen if the IRQ is shared
947 */
948 vgic_reserve_virq(d, irq);
949
950 res = route_irq_to_guest(d, irq, irq, devname);
951 if ( res < 0 )
952 {
953 printk(XENLOG_ERR "Unable to map IRQ%"PRId32" to dom%d\n",
954 irq, d->domain_id);
955 return res;
956 }
957 }
958
959 dt_dprintk(" - IRQ: %u\n", irq);
960 return 0;
961 }
962
map_dt_irq_to_domain(const struct dt_device_node * dev,const struct dt_irq * dt_irq,void * data)963 static int map_dt_irq_to_domain(const struct dt_device_node *dev,
964 const struct dt_irq *dt_irq,
965 void *data)
966 {
967 struct domain *d = data;
968 unsigned int irq = dt_irq->irq;
969 int res;
970 bool need_mapping = !dt_device_for_passthrough(dev);
971
972 if ( irq < NR_LOCAL_IRQS )
973 {
974 printk(XENLOG_ERR "%s: IRQ%"PRId32" is not a SPI\n",
975 dt_node_name(dev), irq);
976 return -EINVAL;
977 }
978
979 /* Setup the IRQ type */
980 res = irq_set_spi_type(irq, dt_irq->type);
981 if ( res )
982 {
983 printk(XENLOG_ERR
984 "%s: Unable to setup IRQ%"PRId32" to dom%d\n",
985 dt_node_name(dev), irq, d->domain_id);
986 return res;
987 }
988
989 res = map_irq_to_domain(d, irq, need_mapping, dt_node_name(dev));
990
991 return 0;
992 }
993
map_range_to_domain(const struct dt_device_node * dev,u64 addr,u64 len,void * data)994 static int map_range_to_domain(const struct dt_device_node *dev,
995 u64 addr, u64 len,
996 void *data)
997 {
998 struct map_range_data *mr_data = data;
999 struct domain *d = mr_data->d;
1000 bool need_mapping = !dt_device_for_passthrough(dev);
1001 int res;
1002
1003 res = iomem_permit_access(d, paddr_to_pfn(addr),
1004 paddr_to_pfn(PAGE_ALIGN(addr + len - 1)));
1005 if ( res )
1006 {
1007 printk(XENLOG_ERR "Unable to permit to dom%d access to"
1008 " 0x%"PRIx64" - 0x%"PRIx64"\n",
1009 d->domain_id,
1010 addr & PAGE_MASK, PAGE_ALIGN(addr + len) - 1);
1011 return res;
1012 }
1013
1014 if ( need_mapping )
1015 {
1016 res = map_regions_p2mt(d,
1017 gaddr_to_gfn(addr),
1018 PFN_UP(len),
1019 maddr_to_mfn(addr),
1020 mr_data->p2mt);
1021
1022 if ( res < 0 )
1023 {
1024 printk(XENLOG_ERR "Unable to map 0x%"PRIx64
1025 " - 0x%"PRIx64" in domain %d\n",
1026 addr & PAGE_MASK, PAGE_ALIGN(addr + len) - 1,
1027 d->domain_id);
1028 return res;
1029 }
1030 }
1031
1032 dt_dprintk(" - MMIO: %010"PRIx64" - %010"PRIx64" P2MType=%x\n",
1033 addr, addr + len, mr_data->p2mt);
1034
1035 return 0;
1036 }
1037
1038 /*
1039 * For a node which describes a discoverable bus (such as a PCI bus)
1040 * then we may need to perform additional mappings in order to make
1041 * the child resources available to domain 0.
1042 */
map_device_children(struct domain * d,const struct dt_device_node * dev,p2m_type_t p2mt)1043 static int map_device_children(struct domain *d,
1044 const struct dt_device_node *dev,
1045 p2m_type_t p2mt)
1046 {
1047 struct map_range_data mr_data = { .d = d, .p2mt = p2mt };
1048 int ret;
1049
1050 if ( dt_device_type_is_equal(dev, "pci") )
1051 {
1052 dt_dprintk("Mapping children of %s to guest\n",
1053 dt_node_full_name(dev));
1054
1055 ret = dt_for_each_irq_map(dev, &map_dt_irq_to_domain, d);
1056 if ( ret < 0 )
1057 return ret;
1058
1059 ret = dt_for_each_range(dev, &map_range_to_domain, &mr_data);
1060 if ( ret < 0 )
1061 return ret;
1062 }
1063
1064 return 0;
1065 }
1066
1067 /*
1068 * For a given device node:
1069 * - Give permission to the guest to manage IRQ and MMIO range
1070 * - Retrieve the IRQ configuration (i.e edge/level) from device tree
1071 * When the device is not marked for guest passthrough:
1072 * - Assign the device to the guest if it's protected by an IOMMU
1073 * - Map the IRQs and iomem regions to DOM0
1074 */
handle_device(struct domain * d,struct dt_device_node * dev,p2m_type_t p2mt)1075 static int handle_device(struct domain *d, struct dt_device_node *dev,
1076 p2m_type_t p2mt)
1077 {
1078 unsigned int nirq;
1079 unsigned int naddr;
1080 unsigned int i;
1081 int res;
1082 struct dt_raw_irq rirq;
1083 u64 addr, size;
1084 bool need_mapping = !dt_device_for_passthrough(dev);
1085
1086 nirq = dt_number_of_irq(dev);
1087 naddr = dt_number_of_address(dev);
1088
1089 dt_dprintk("%s passthrough = %d nirq = %d naddr = %u\n",
1090 dt_node_full_name(dev), need_mapping, nirq, naddr);
1091
1092 if ( dt_device_is_protected(dev) && need_mapping )
1093 {
1094 dt_dprintk("%s setup iommu\n", dt_node_full_name(dev));
1095 res = iommu_assign_dt_device(d, dev);
1096 if ( res )
1097 {
1098 printk(XENLOG_ERR "Failed to setup the IOMMU for %s\n",
1099 dt_node_full_name(dev));
1100 return res;
1101 }
1102 }
1103
1104 /* Give permission and map IRQs */
1105 for ( i = 0; i < nirq; i++ )
1106 {
1107 res = dt_device_get_raw_irq(dev, i, &rirq);
1108 if ( res )
1109 {
1110 printk(XENLOG_ERR "Unable to retrieve irq %u for %s\n",
1111 i, dt_node_full_name(dev));
1112 return res;
1113 }
1114
1115 /*
1116 * Don't map IRQ that have no physical meaning
1117 * ie: IRQ whose controller is not the GIC
1118 */
1119 if ( rirq.controller != dt_interrupt_controller )
1120 {
1121 dt_dprintk("irq %u not connected to primary controller. Connected to %s\n",
1122 i, dt_node_full_name(rirq.controller));
1123 continue;
1124 }
1125
1126 res = platform_get_irq(dev, i);
1127 if ( res < 0 )
1128 {
1129 printk(XENLOG_ERR "Unable to get irq %u for %s\n",
1130 i, dt_node_full_name(dev));
1131 return res;
1132 }
1133
1134 res = map_irq_to_domain(d, res, need_mapping, dt_node_name(dev));
1135 if ( res )
1136 return res;
1137 }
1138
1139 /* Give permission and map MMIOs */
1140 for ( i = 0; i < naddr; i++ )
1141 {
1142 struct map_range_data mr_data = { .d = d, .p2mt = p2mt };
1143 res = dt_device_get_address(dev, i, &addr, &size);
1144 if ( res )
1145 {
1146 printk(XENLOG_ERR "Unable to retrieve address %u for %s\n",
1147 i, dt_node_full_name(dev));
1148 return res;
1149 }
1150
1151 res = map_range_to_domain(dev, addr, size, &mr_data);
1152 if ( res )
1153 return res;
1154 }
1155
1156 res = map_device_children(d, dev, p2mt);
1157 if ( res )
1158 return res;
1159
1160 return 0;
1161 }
1162
handle_node(struct domain * d,struct kernel_info * kinfo,struct dt_device_node * node,p2m_type_t p2mt)1163 static int handle_node(struct domain *d, struct kernel_info *kinfo,
1164 struct dt_device_node *node,
1165 p2m_type_t p2mt)
1166 {
1167 static const struct dt_device_match skip_matches[] __initconst =
1168 {
1169 DT_MATCH_COMPATIBLE("xen,xen"),
1170 DT_MATCH_COMPATIBLE("xen,multiboot-module"),
1171 DT_MATCH_COMPATIBLE("multiboot,module"),
1172 DT_MATCH_COMPATIBLE("arm,psci"),
1173 DT_MATCH_COMPATIBLE("arm,psci-0.2"),
1174 DT_MATCH_COMPATIBLE("arm,psci-1.0"),
1175 DT_MATCH_COMPATIBLE("arm,cortex-a7-pmu"),
1176 DT_MATCH_COMPATIBLE("arm,cortex-a15-pmu"),
1177 DT_MATCH_COMPATIBLE("arm,cortex-a53-edac"),
1178 DT_MATCH_COMPATIBLE("arm,armv8-pmuv3"),
1179 DT_MATCH_PATH("/cpus"),
1180 DT_MATCH_TYPE("memory"),
1181 /* The memory mapped timer is not supported by Xen. */
1182 DT_MATCH_COMPATIBLE("arm,armv7-timer-mem"),
1183 { /* sentinel */ },
1184 };
1185 static const struct dt_device_match timer_matches[] __initconst =
1186 {
1187 DT_MATCH_TIMER,
1188 { /* sentinel */ },
1189 };
1190 static const struct dt_device_match reserved_matches[] __initconst =
1191 {
1192 DT_MATCH_PATH("/psci"),
1193 DT_MATCH_PATH("/memory"),
1194 DT_MATCH_PATH("/hypervisor"),
1195 { /* sentinel */ },
1196 };
1197 struct dt_device_node *child;
1198 int res;
1199 const char *name;
1200 const char *path;
1201
1202 path = dt_node_full_name(node);
1203
1204 dt_dprintk("handle %s\n", path);
1205
1206 /* Skip theses nodes and the sub-nodes */
1207 if ( dt_match_node(skip_matches, node) )
1208 {
1209 dt_dprintk(" Skip it (matched)\n");
1210 return 0;
1211 }
1212 if ( platform_device_is_blacklisted(node) )
1213 {
1214 dt_dprintk(" Skip it (blacklisted)\n");
1215 return 0;
1216 }
1217
1218 /*
1219 * Replace these nodes with our own. Note that the original may be
1220 * used_by DOMID_XEN so this check comes first.
1221 */
1222 if ( device_get_class(node) == DEVICE_GIC )
1223 return make_gic_node(d, kinfo->fdt, node);
1224 if ( dt_match_node(timer_matches, node) )
1225 return make_timer_node(d, kinfo->fdt, node);
1226
1227 /* Skip nodes used by Xen */
1228 if ( dt_device_used_by(node) == DOMID_XEN )
1229 {
1230 dt_dprintk(" Skip it (used by Xen)\n");
1231 return 0;
1232 }
1233
1234 /*
1235 * Even if the IOMMU device is not used by Xen, it should not be
1236 * passthrough to DOM0
1237 */
1238 if ( device_get_class(node) == DEVICE_IOMMU )
1239 {
1240 dt_dprintk(" IOMMU, skip it\n");
1241 return 0;
1242 }
1243
1244 /*
1245 * Xen is using some path for its own purpose. Warn if a node
1246 * already exists with the same path.
1247 */
1248 if ( dt_match_node(reserved_matches, node) )
1249 printk(XENLOG_WARNING
1250 "WARNING: Path %s is reserved, skip the node as we may re-use the path.\n",
1251 path);
1252
1253 res = handle_device(d, node, p2mt);
1254 if ( res)
1255 return res;
1256
1257 /*
1258 * The property "name" is used to have a different name on older FDT
1259 * version. We want to keep the name retrieved during the tree
1260 * structure creation, that is store in the node path.
1261 */
1262 name = strrchr(path, '/');
1263 name = name ? name + 1 : path;
1264
1265 res = fdt_begin_node(kinfo->fdt, name);
1266 if ( res )
1267 return res;
1268
1269 res = write_properties(d, kinfo, node);
1270 if ( res )
1271 return res;
1272
1273 for ( child = node->child; child != NULL; child = child->sibling )
1274 {
1275 res = handle_node(d, kinfo, child, p2mt);
1276 if ( res )
1277 return res;
1278 }
1279
1280 if ( node == dt_host )
1281 {
1282 res = make_hypervisor_node(kinfo, node);
1283 if ( res )
1284 return res;
1285
1286 res = make_psci_node(kinfo->fdt, node);
1287 if ( res )
1288 return res;
1289
1290 res = make_cpus_node(d, kinfo->fdt, node);
1291 if ( res )
1292 return res;
1293
1294 res = make_memory_node(d, kinfo->fdt, node, kinfo);
1295 if ( res )
1296 return res;
1297
1298 }
1299
1300 res = fdt_end_node(kinfo->fdt);
1301
1302 return res;
1303 }
1304
prepare_dtb(struct domain * d,struct kernel_info * kinfo)1305 static int prepare_dtb(struct domain *d, struct kernel_info *kinfo)
1306 {
1307 const p2m_type_t default_p2mt = p2m_mmio_direct_c;
1308 const void *fdt;
1309 int new_size;
1310 int ret;
1311
1312 ASSERT(dt_host && (dt_host->sibling == NULL));
1313
1314 fdt = device_tree_flattened;
1315
1316 new_size = fdt_totalsize(fdt) + DOM0_FDT_EXTRA_SIZE;
1317 kinfo->fdt = xmalloc_bytes(new_size);
1318 if ( kinfo->fdt == NULL )
1319 return -ENOMEM;
1320
1321 ret = fdt_create(kinfo->fdt, new_size);
1322 if ( ret < 0 )
1323 goto err;
1324
1325 fdt_finish_reservemap(kinfo->fdt);
1326
1327 ret = handle_node(d, kinfo, dt_host, default_p2mt);
1328 if ( ret )
1329 goto err;
1330
1331 ret = fdt_finish(kinfo->fdt);
1332 if ( ret < 0 )
1333 goto err;
1334
1335 return 0;
1336
1337 err:
1338 printk("Device tree generation failed (%d).\n", ret);
1339 xfree(kinfo->fdt);
1340 return -EINVAL;
1341 }
1342
1343 #ifdef CONFIG_ACPI
1344 #define ACPI_DOM0_FDT_MIN_SIZE 4096
1345
acpi_iomem_deny_access(struct domain * d)1346 static int acpi_iomem_deny_access(struct domain *d)
1347 {
1348 acpi_status status;
1349 struct acpi_table_spcr *spcr = NULL;
1350 unsigned long mfn;
1351 int rc;
1352
1353 /* Firstly permit full MMIO capabilities. */
1354 rc = iomem_permit_access(d, 0UL, ~0UL);
1355 if ( rc )
1356 return rc;
1357
1358 /* TODO: Deny MMIO access for SMMU, GIC ITS */
1359 status = acpi_get_table(ACPI_SIG_SPCR, 0,
1360 (struct acpi_table_header **)&spcr);
1361
1362 if ( ACPI_FAILURE(status) )
1363 {
1364 printk("Failed to get SPCR table\n");
1365 return -EINVAL;
1366 }
1367
1368 mfn = spcr->serial_port.address >> PAGE_SHIFT;
1369 /* Deny MMIO access for UART */
1370 rc = iomem_deny_access(d, mfn, mfn + 1);
1371 if ( rc )
1372 return rc;
1373
1374 /* Deny MMIO access for GIC regions */
1375 return gic_iomem_deny_access(d);
1376 }
1377
acpi_route_spis(struct domain * d)1378 static int acpi_route_spis(struct domain *d)
1379 {
1380 int i, res;
1381 struct irq_desc *desc;
1382
1383 /*
1384 * Route the IRQ to hardware domain and permit the access.
1385 * The interrupt type will be set by set by the hardware domain.
1386 */
1387 for( i = NR_LOCAL_IRQS; i < vgic_num_irqs(d); i++ )
1388 {
1389 /*
1390 * TODO: Exclude the SPIs SMMU uses which should not be routed to
1391 * the hardware domain.
1392 */
1393 desc = irq_to_desc(i);
1394 if ( desc->action != NULL)
1395 continue;
1396
1397 /* XXX: Shall we use a proper devname? */
1398 res = map_irq_to_domain(d, i, true, "ACPI");
1399 if ( res )
1400 return res;
1401 }
1402
1403 return 0;
1404 }
1405
acpi_make_chosen_node(const struct kernel_info * kinfo)1406 static int acpi_make_chosen_node(const struct kernel_info *kinfo)
1407 {
1408 int res;
1409 const char *bootargs = NULL;
1410 const struct bootmodule *mod = kinfo->kernel_bootmodule;
1411 void *fdt = kinfo->fdt;
1412
1413 dt_dprintk("Create chosen node\n");
1414 res = fdt_begin_node(fdt, "chosen");
1415 if ( res )
1416 return res;
1417
1418 if ( mod && mod->cmdline[0] )
1419 {
1420 bootargs = &mod->cmdline[0];
1421 res = fdt_property(fdt, "bootargs", bootargs, strlen(bootargs) + 1);
1422 if ( res )
1423 return res;
1424 }
1425
1426 /*
1427 * If the bootloader provides an initrd, we must create a placeholder
1428 * for the initrd properties. The values will be replaced later.
1429 */
1430 if ( mod && mod->size )
1431 {
1432 u64 a = 0;
1433 res = fdt_property(kinfo->fdt, "linux,initrd-start", &a, sizeof(a));
1434 if ( res )
1435 return res;
1436
1437 res = fdt_property(kinfo->fdt, "linux,initrd-end", &a, sizeof(a));
1438 if ( res )
1439 return res;
1440 }
1441
1442 res = fdt_end_node(fdt);
1443
1444 return res;
1445 }
1446
acpi_make_hypervisor_node(const struct kernel_info * kinfo,struct membank tbl_add[])1447 static int acpi_make_hypervisor_node(const struct kernel_info *kinfo,
1448 struct membank tbl_add[])
1449 {
1450 const char compat[] =
1451 "xen,xen-"__stringify(XEN_VERSION)"."__stringify(XEN_SUBVERSION)"\0"
1452 "xen,xen";
1453 int res;
1454 /* Convenience alias */
1455 void *fdt = kinfo->fdt;
1456
1457 dt_dprintk("Create hypervisor node\n");
1458
1459 /* See linux Documentation/devicetree/bindings/arm/xen.txt */
1460 res = fdt_begin_node(fdt, "hypervisor");
1461 if ( res )
1462 return res;
1463
1464 /* Cannot use fdt_property_string due to embedded nulls */
1465 res = fdt_property(fdt, "compatible", compat, sizeof(compat));
1466 if ( res )
1467 return res;
1468
1469 res = acpi_make_efi_nodes(fdt, tbl_add);
1470 if ( res )
1471 return res;
1472
1473 res = fdt_end_node(fdt);
1474
1475 return res;
1476 }
1477
1478 /*
1479 * Prepare a minimal DTB for Dom0 which contains bootargs, initrd, memory
1480 * information, EFI table.
1481 */
create_acpi_dtb(struct kernel_info * kinfo,struct membank tbl_add[])1482 static int create_acpi_dtb(struct kernel_info *kinfo, struct membank tbl_add[])
1483 {
1484 int new_size;
1485 int ret;
1486
1487 dt_dprintk("Prepare a min DTB for DOM0\n");
1488
1489 /* Allocate min size for DT */
1490 new_size = ACPI_DOM0_FDT_MIN_SIZE;
1491 kinfo->fdt = xmalloc_bytes(new_size);
1492
1493 if ( kinfo->fdt == NULL )
1494 return -ENOMEM;
1495
1496 /* Create a new empty DT for DOM0 */
1497 ret = fdt_create(kinfo->fdt, new_size);
1498 if ( ret < 0 )
1499 goto err;
1500
1501 ret = fdt_finish_reservemap(kinfo->fdt);
1502 if ( ret < 0 )
1503 goto err;
1504
1505 ret = fdt_begin_node(kinfo->fdt, "/");
1506 if ( ret < 0 )
1507 goto err;
1508
1509 ret = fdt_property_cell(kinfo->fdt, "#address-cells", 2);
1510 if ( ret )
1511 return ret;
1512
1513 ret = fdt_property_cell(kinfo->fdt, "#size-cells", 1);
1514 if ( ret )
1515 return ret;
1516
1517 /* Create a chosen node for DOM0 */
1518 ret = acpi_make_chosen_node(kinfo);
1519 if ( ret )
1520 goto err;
1521
1522 ret = acpi_make_hypervisor_node(kinfo, tbl_add);
1523 if ( ret )
1524 goto err;
1525
1526 ret = fdt_end_node(kinfo->fdt);
1527 if ( ret < 0 )
1528 goto err;
1529
1530 ret = fdt_finish(kinfo->fdt);
1531 if ( ret < 0 )
1532 goto err;
1533
1534 return 0;
1535
1536 err:
1537 printk("Device tree generation failed (%d).\n", ret);
1538 xfree(kinfo->fdt);
1539 return -EINVAL;
1540 }
1541
acpi_map_other_tables(struct domain * d)1542 static void acpi_map_other_tables(struct domain *d)
1543 {
1544 int i;
1545 unsigned long res;
1546 u64 addr, size;
1547
1548 /* Map all ACPI tables to Dom0 using 1:1 mappings. */
1549 for( i = 0; i < acpi_gbl_root_table_list.count; i++ )
1550 {
1551 addr = acpi_gbl_root_table_list.tables[i].address;
1552 size = acpi_gbl_root_table_list.tables[i].length;
1553 res = map_regions_p2mt(d,
1554 gaddr_to_gfn(addr),
1555 PFN_UP(size),
1556 maddr_to_mfn(addr),
1557 p2m_mmio_direct_c);
1558 if ( res )
1559 {
1560 panic(XENLOG_ERR "Unable to map ACPI region 0x%"PRIx64
1561 " - 0x%"PRIx64" in domain \n",
1562 addr & PAGE_MASK, PAGE_ALIGN(addr + size) - 1);
1563 }
1564 }
1565 }
1566
acpi_create_rsdp(struct domain * d,struct membank tbl_add[])1567 static int acpi_create_rsdp(struct domain *d, struct membank tbl_add[])
1568 {
1569
1570 struct acpi_table_rsdp *rsdp = NULL;
1571 u64 addr;
1572 u64 table_size = sizeof(struct acpi_table_rsdp);
1573 u8 *base_ptr;
1574 u8 checksum;
1575
1576 addr = acpi_os_get_root_pointer();
1577 if ( !addr )
1578 {
1579 printk("Unable to get acpi root pointer\n");
1580 return -EINVAL;
1581 }
1582 rsdp = acpi_os_map_memory(addr, table_size);
1583 base_ptr = d->arch.efi_acpi_table
1584 + acpi_get_table_offset(tbl_add, TBL_RSDP);
1585 memcpy(base_ptr, rsdp, table_size);
1586 acpi_os_unmap_memory(rsdp, table_size);
1587
1588 rsdp = (struct acpi_table_rsdp *)base_ptr;
1589 /* Replace xsdt_physical_address */
1590 rsdp->xsdt_physical_address = tbl_add[TBL_XSDT].start;
1591 checksum = acpi_tb_checksum(ACPI_CAST_PTR(u8, rsdp), table_size);
1592 rsdp->checksum = rsdp->checksum - checksum;
1593
1594 tbl_add[TBL_RSDP].start = d->arch.efi_acpi_gpa
1595 + acpi_get_table_offset(tbl_add, TBL_RSDP);
1596 tbl_add[TBL_RSDP].size = table_size;
1597
1598 return 0;
1599 }
1600
acpi_xsdt_modify_entry(u64 entry[],unsigned long entry_count,char * signature,u64 addr)1601 static void acpi_xsdt_modify_entry(u64 entry[], unsigned long entry_count,
1602 char *signature, u64 addr)
1603 {
1604 int i;
1605 struct acpi_table_header *table;
1606 u64 size = sizeof(struct acpi_table_header);
1607
1608 for( i = 0; i < entry_count; i++ )
1609 {
1610 table = acpi_os_map_memory(entry[i], size);
1611 if ( ACPI_COMPARE_NAME(table->signature, signature) )
1612 {
1613 entry[i] = addr;
1614 acpi_os_unmap_memory(table, size);
1615 break;
1616 }
1617 acpi_os_unmap_memory(table, size);
1618 }
1619 }
1620
acpi_create_xsdt(struct domain * d,struct membank tbl_add[])1621 static int acpi_create_xsdt(struct domain *d, struct membank tbl_add[])
1622 {
1623 struct acpi_table_header *table = NULL;
1624 struct acpi_table_rsdp *rsdp_tbl;
1625 struct acpi_table_xsdt *xsdt = NULL;
1626 u64 table_size, addr;
1627 unsigned long entry_count;
1628 u8 *base_ptr;
1629 u8 checksum;
1630
1631 addr = acpi_os_get_root_pointer();
1632 if ( !addr )
1633 {
1634 printk("Unable to get acpi root pointer\n");
1635 return -EINVAL;
1636 }
1637 rsdp_tbl = acpi_os_map_memory(addr, sizeof(struct acpi_table_rsdp));
1638 table = acpi_os_map_memory(rsdp_tbl->xsdt_physical_address,
1639 sizeof(struct acpi_table_header));
1640
1641 /* Add place for STAO table in XSDT table */
1642 table_size = table->length + sizeof(u64);
1643 entry_count = (table->length - sizeof(struct acpi_table_header))
1644 / sizeof(u64);
1645 base_ptr = d->arch.efi_acpi_table
1646 + acpi_get_table_offset(tbl_add, TBL_XSDT);
1647 memcpy(base_ptr, table, table->length);
1648 acpi_os_unmap_memory(table, sizeof(struct acpi_table_header));
1649 acpi_os_unmap_memory(rsdp_tbl, sizeof(struct acpi_table_rsdp));
1650
1651 xsdt = (struct acpi_table_xsdt *)base_ptr;
1652 acpi_xsdt_modify_entry(xsdt->table_offset_entry, entry_count,
1653 ACPI_SIG_FADT, tbl_add[TBL_FADT].start);
1654 acpi_xsdt_modify_entry(xsdt->table_offset_entry, entry_count,
1655 ACPI_SIG_MADT, tbl_add[TBL_MADT].start);
1656 xsdt->table_offset_entry[entry_count] = tbl_add[TBL_STAO].start;
1657
1658 xsdt->header.length = table_size;
1659 checksum = acpi_tb_checksum(ACPI_CAST_PTR(u8, xsdt), table_size);
1660 xsdt->header.checksum -= checksum;
1661
1662 tbl_add[TBL_XSDT].start = d->arch.efi_acpi_gpa
1663 + acpi_get_table_offset(tbl_add, TBL_XSDT);
1664 tbl_add[TBL_XSDT].size = table_size;
1665
1666 return 0;
1667 }
1668
acpi_create_stao(struct domain * d,struct membank tbl_add[])1669 static int acpi_create_stao(struct domain *d, struct membank tbl_add[])
1670 {
1671 struct acpi_table_header *table = NULL;
1672 struct acpi_table_stao *stao = NULL;
1673 u32 table_size = sizeof(struct acpi_table_stao);
1674 u32 offset = acpi_get_table_offset(tbl_add, TBL_STAO);
1675 acpi_status status;
1676 u8 *base_ptr, checksum;
1677
1678 /* Copy OEM and ASL compiler fields from another table, use MADT */
1679 status = acpi_get_table(ACPI_SIG_MADT, 0, &table);
1680
1681 if ( ACPI_FAILURE(status) )
1682 {
1683 const char *msg = acpi_format_exception(status);
1684
1685 printk("STAO: Failed to get MADT table, %s\n", msg);
1686 return -EINVAL;
1687 }
1688
1689 base_ptr = d->arch.efi_acpi_table + offset;
1690 memcpy(base_ptr, table, sizeof(struct acpi_table_header));
1691
1692 stao = (struct acpi_table_stao *)base_ptr;
1693 memcpy(stao->header.signature, ACPI_SIG_STAO, 4);
1694 stao->header.revision = 1;
1695 stao->header.length = table_size;
1696 stao->ignore_uart = 1;
1697 checksum = acpi_tb_checksum(ACPI_CAST_PTR(u8, stao), table_size);
1698 stao->header.checksum -= checksum;
1699
1700 tbl_add[TBL_STAO].start = d->arch.efi_acpi_gpa + offset;
1701 tbl_add[TBL_STAO].size = table_size;
1702
1703 return 0;
1704 }
1705
acpi_create_madt(struct domain * d,struct membank tbl_add[])1706 static int acpi_create_madt(struct domain *d, struct membank tbl_add[])
1707 {
1708 struct acpi_table_header *table = NULL;
1709 struct acpi_table_madt *madt = NULL;
1710 struct acpi_subtable_header *header;
1711 struct acpi_madt_generic_distributor *gicd;
1712 u32 table_size = sizeof(struct acpi_table_madt);
1713 u32 offset = acpi_get_table_offset(tbl_add, TBL_MADT);
1714 int ret;
1715 acpi_status status;
1716 u8 *base_ptr, checksum;
1717
1718 status = acpi_get_table(ACPI_SIG_MADT, 0, &table);
1719
1720 if ( ACPI_FAILURE(status) )
1721 {
1722 const char *msg = acpi_format_exception(status);
1723
1724 printk("Failed to get MADT table, %s\n", msg);
1725 return -EINVAL;
1726 }
1727
1728 base_ptr = d->arch.efi_acpi_table + offset;
1729 memcpy(base_ptr, table, table_size);
1730
1731 /* Add Generic Distributor. */
1732 header = acpi_table_get_entry_madt(ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR, 0);
1733 if ( !header )
1734 {
1735 printk("Can't get GICD entry\n");
1736 return -EINVAL;
1737 }
1738 gicd = container_of(header, struct acpi_madt_generic_distributor, header);
1739 memcpy(base_ptr + table_size, gicd,
1740 sizeof(struct acpi_madt_generic_distributor));
1741 table_size += sizeof(struct acpi_madt_generic_distributor);
1742
1743 /* Add other subtables. */
1744 ret = gic_make_hwdom_madt(d, offset + table_size);
1745 if ( ret < 0 )
1746 {
1747 printk("Failed to get other subtables\n");
1748 return -EINVAL;
1749 }
1750 table_size += ret;
1751
1752 madt = (struct acpi_table_madt *)base_ptr;
1753 madt->header.length = table_size;
1754 checksum = acpi_tb_checksum(ACPI_CAST_PTR(u8, madt), table_size);
1755 madt->header.checksum -= checksum;
1756
1757 tbl_add[TBL_MADT].start = d->arch.efi_acpi_gpa + offset;
1758 tbl_add[TBL_MADT].size = table_size;
1759
1760 return 0;
1761 }
1762
acpi_create_fadt(struct domain * d,struct membank tbl_add[])1763 static int acpi_create_fadt(struct domain *d, struct membank tbl_add[])
1764 {
1765 struct acpi_table_header *table = NULL;
1766 struct acpi_table_fadt *fadt = NULL;
1767 u64 table_size;
1768 acpi_status status;
1769 u8 *base_ptr;
1770 u8 checksum;
1771
1772 status = acpi_get_table(ACPI_SIG_FADT, 0, &table);
1773
1774 if ( ACPI_FAILURE(status) )
1775 {
1776 const char *msg = acpi_format_exception(status);
1777
1778 printk("Failed to get FADT table, %s\n", msg);
1779 return -EINVAL;
1780 }
1781
1782 table_size = table->length;
1783 base_ptr = d->arch.efi_acpi_table
1784 + acpi_get_table_offset(tbl_add, TBL_FADT);
1785 memcpy(base_ptr, table, table_size);
1786 fadt = (struct acpi_table_fadt *)base_ptr;
1787
1788 /* Set PSCI_COMPLIANT and PSCI_USE_HVC */
1789 fadt->arm_boot_flags |= (ACPI_FADT_PSCI_COMPLIANT | ACPI_FADT_PSCI_USE_HVC);
1790 checksum = acpi_tb_checksum(ACPI_CAST_PTR(u8, fadt), table_size);
1791 fadt->header.checksum -= checksum;
1792
1793 tbl_add[TBL_FADT].start = d->arch.efi_acpi_gpa
1794 + acpi_get_table_offset(tbl_add, TBL_FADT);
1795 tbl_add[TBL_FADT].size = table_size;
1796
1797 return 0;
1798 }
1799
estimate_acpi_efi_size(struct domain * d,struct kernel_info * kinfo)1800 static int estimate_acpi_efi_size(struct domain *d, struct kernel_info *kinfo)
1801 {
1802 size_t efi_size, acpi_size, madt_size;
1803 u64 addr;
1804 struct acpi_table_rsdp *rsdp_tbl;
1805 struct acpi_table_header *table;
1806
1807 efi_size = estimate_efi_size(kinfo->mem.nr_banks);
1808
1809 acpi_size = ROUNDUP(sizeof(struct acpi_table_fadt), 8);
1810 acpi_size += ROUNDUP(sizeof(struct acpi_table_stao), 8);
1811
1812 madt_size = gic_get_hwdom_madt_size(d);
1813 acpi_size += ROUNDUP(madt_size, 8);
1814
1815 addr = acpi_os_get_root_pointer();
1816 if ( !addr )
1817 {
1818 printk("Unable to get acpi root pointer\n");
1819 return -EINVAL;
1820 }
1821
1822 rsdp_tbl = acpi_os_map_memory(addr, sizeof(struct acpi_table_rsdp));
1823 if ( !rsdp_tbl )
1824 {
1825 printk("Unable to map RSDP table\n");
1826 return -EINVAL;
1827 }
1828
1829 table = acpi_os_map_memory(rsdp_tbl->xsdt_physical_address,
1830 sizeof(struct acpi_table_header));
1831 acpi_os_unmap_memory(rsdp_tbl, sizeof(struct acpi_table_rsdp));
1832 if ( !table )
1833 {
1834 printk("Unable to map XSDT table\n");
1835 return -EINVAL;
1836 }
1837
1838 /* Add place for STAO table in XSDT table */
1839 acpi_size += ROUNDUP(table->length + sizeof(u64), 8);
1840 acpi_os_unmap_memory(table, sizeof(struct acpi_table_header));
1841
1842 acpi_size += ROUNDUP(sizeof(struct acpi_table_rsdp), 8);
1843 d->arch.efi_acpi_len = PAGE_ALIGN(ROUNDUP(efi_size, 8)
1844 + ROUNDUP(acpi_size, 8));
1845
1846 return 0;
1847 }
1848
prepare_acpi(struct domain * d,struct kernel_info * kinfo)1849 static int prepare_acpi(struct domain *d, struct kernel_info *kinfo)
1850 {
1851 int rc = 0;
1852 int order;
1853 struct membank tbl_add[TBL_MMAX] = {};
1854
1855 rc = estimate_acpi_efi_size(d, kinfo);
1856 if ( rc != 0 )
1857 return rc;
1858
1859 order = get_order_from_bytes(d->arch.efi_acpi_len);
1860 d->arch.efi_acpi_table = alloc_xenheap_pages(order, 0);
1861 if ( d->arch.efi_acpi_table == NULL )
1862 {
1863 printk("unable to allocate memory!\n");
1864 return -ENOMEM;
1865 }
1866 memset(d->arch.efi_acpi_table, 0, d->arch.efi_acpi_len);
1867
1868 /*
1869 * For ACPI, Dom0 doesn't use kinfo->gnttab_start to get the grant table
1870 * region. So we use it as the ACPI table mapped address. Also it needs to
1871 * check if the size of grant table region is enough for those ACPI tables.
1872 */
1873 d->arch.efi_acpi_gpa = kinfo->gnttab_start;
1874 if ( kinfo->gnttab_size < d->arch.efi_acpi_len )
1875 {
1876 printk("The grant table region is not enough to fit the ACPI tables!\n");
1877 return -EINVAL;
1878 }
1879
1880 rc = acpi_create_fadt(d, tbl_add);
1881 if ( rc != 0 )
1882 return rc;
1883
1884 rc = acpi_create_madt(d, tbl_add);
1885 if ( rc != 0 )
1886 return rc;
1887
1888 rc = acpi_create_stao(d, tbl_add);
1889 if ( rc != 0 )
1890 return rc;
1891
1892 rc = acpi_create_xsdt(d, tbl_add);
1893 if ( rc != 0 )
1894 return rc;
1895
1896 rc = acpi_create_rsdp(d, tbl_add);
1897 if ( rc != 0 )
1898 return rc;
1899
1900 acpi_map_other_tables(d);
1901 acpi_create_efi_system_table(d, tbl_add);
1902 acpi_create_efi_mmap_table(d, &kinfo->mem, tbl_add);
1903
1904 /* Map the EFI and ACPI tables to Dom0 */
1905 rc = map_regions_p2mt(d,
1906 gaddr_to_gfn(d->arch.efi_acpi_gpa),
1907 PFN_UP(d->arch.efi_acpi_len),
1908 virt_to_mfn(d->arch.efi_acpi_table),
1909 p2m_mmio_direct_c);
1910 if ( rc != 0 )
1911 {
1912 printk(XENLOG_ERR "Unable to map EFI/ACPI table 0x%"PRIx64
1913 " - 0x%"PRIx64" in domain %d\n",
1914 d->arch.efi_acpi_gpa & PAGE_MASK,
1915 PAGE_ALIGN(d->arch.efi_acpi_gpa + d->arch.efi_acpi_len) - 1,
1916 d->domain_id);
1917 return rc;
1918 }
1919
1920 /*
1921 * Flush the cache for this region, otherwise DOM0 may read wrong data when
1922 * the cache is disabled.
1923 */
1924 clean_and_invalidate_dcache_va_range(d->arch.efi_acpi_table,
1925 d->arch.efi_acpi_len);
1926
1927 rc = create_acpi_dtb(kinfo, tbl_add);
1928 if ( rc != 0 )
1929 return rc;
1930
1931 rc = acpi_route_spis(d);
1932 if ( rc != 0 )
1933 return rc;
1934
1935 rc = acpi_iomem_deny_access(d);
1936 if ( rc != 0 )
1937 return rc;
1938
1939 return 0;
1940 }
1941 #else
prepare_acpi(struct domain * d,struct kernel_info * kinfo)1942 static int prepare_acpi(struct domain *d, struct kernel_info *kinfo)
1943 {
1944 /* Only booting with ACPI will hit here */
1945 BUG();
1946 return -EINVAL;
1947 }
1948 #endif
dtb_load(struct kernel_info * kinfo)1949 static void dtb_load(struct kernel_info *kinfo)
1950 {
1951 void * __user dtb_virt = (void * __user)(register_t)kinfo->dtb_paddr;
1952 unsigned long left;
1953
1954 printk("Loading dom0 DTB to 0x%"PRIpaddr"-0x%"PRIpaddr"\n",
1955 kinfo->dtb_paddr, kinfo->dtb_paddr + fdt_totalsize(kinfo->fdt));
1956
1957 left = raw_copy_to_guest_flush_dcache(dtb_virt, kinfo->fdt,
1958 fdt_totalsize(kinfo->fdt));
1959 if ( left != 0 )
1960 panic("Unable to copy the DTB to dom0 memory (left = %lu bytes)", left);
1961 xfree(kinfo->fdt);
1962 }
1963
initrd_load(struct kernel_info * kinfo)1964 static void initrd_load(struct kernel_info *kinfo)
1965 {
1966 const struct bootmodule *mod = kinfo->initrd_bootmodule;
1967 paddr_t load_addr = kinfo->initrd_paddr;
1968 paddr_t paddr, len;
1969 unsigned long offs;
1970 int node;
1971 int res;
1972 __be32 val[2];
1973 __be32 *cellp;
1974
1975 if ( !mod || !mod->size )
1976 return;
1977
1978 paddr = mod->start;
1979 len = mod->size;
1980
1981 printk("Loading dom0 initrd from %"PRIpaddr" to 0x%"PRIpaddr"-0x%"PRIpaddr"\n",
1982 paddr, load_addr, load_addr + len);
1983
1984 /* Fix up linux,initrd-start and linux,initrd-end in /chosen */
1985 node = fdt_path_offset(kinfo->fdt, "/chosen");
1986 if ( node < 0 )
1987 panic("Cannot find the /chosen node");
1988
1989 cellp = (__be32 *)val;
1990 dt_set_cell(&cellp, ARRAY_SIZE(val), load_addr);
1991 res = fdt_setprop_inplace(kinfo->fdt, node, "linux,initrd-start",
1992 val, sizeof(val));
1993 if ( res )
1994 panic("Cannot fix up \"linux,initrd-start\" property");
1995
1996 cellp = (__be32 *)val;
1997 dt_set_cell(&cellp, ARRAY_SIZE(val), load_addr + len);
1998 res = fdt_setprop_inplace(kinfo->fdt, node, "linux,initrd-end",
1999 val, sizeof(val));
2000 if ( res )
2001 panic("Cannot fix up \"linux,initrd-end\" property");
2002
2003 for ( offs = 0; offs < len; )
2004 {
2005 uint64_t par;
2006 paddr_t s, l, ma = 0;
2007 void *dst;
2008
2009 s = offs & ~PAGE_MASK;
2010 l = min(PAGE_SIZE - s, len);
2011
2012 par = gvirt_to_maddr(load_addr + offs, &ma, GV2M_WRITE);
2013 if ( par )
2014 {
2015 panic("Unable to translate guest address");
2016 return;
2017 }
2018
2019 dst = map_domain_page(maddr_to_mfn(ma));
2020
2021 copy_from_paddr(dst + s, paddr + offs, l);
2022
2023 unmap_domain_page(dst);
2024 offs += l;
2025 }
2026 }
2027
evtchn_fixup(struct domain * d,struct kernel_info * kinfo)2028 static void evtchn_fixup(struct domain *d, struct kernel_info *kinfo)
2029 {
2030 int res, node;
2031 u64 val;
2032 gic_interrupt_t intr;
2033
2034 /*
2035 * The allocation of the event channel IRQ has been deferred until
2036 * now. At this time, all PPIs used by DOM0 have been registered.
2037 */
2038 res = vgic_allocate_ppi(d);
2039 if ( res < 0 )
2040 panic("Unable to allocate a PPI for the event channel interrupt\n");
2041
2042 d->arch.evtchn_irq = res;
2043
2044 printk("Allocating PPI %u for event channel interrupt\n",
2045 d->arch.evtchn_irq);
2046
2047 /* Set the value of domain param HVM_PARAM_CALLBACK_IRQ */
2048 val = MASK_INSR(HVM_PARAM_CALLBACK_TYPE_PPI,
2049 HVM_PARAM_CALLBACK_IRQ_TYPE_MASK);
2050 /* Active-low level-sensitive */
2051 val |= MASK_INSR(HVM_PARAM_CALLBACK_TYPE_PPI_FLAG_LOW_LEVEL,
2052 HVM_PARAM_CALLBACK_TYPE_PPI_FLAG_MASK);
2053 val |= d->arch.evtchn_irq;
2054 d->arch.hvm_domain.params[HVM_PARAM_CALLBACK_IRQ] = val;
2055
2056 /*
2057 * When booting Dom0 using ACPI, Dom0 can only get the event channel
2058 * interrupt via hypercall.
2059 */
2060 if ( !acpi_disabled )
2061 return;
2062
2063 /* Fix up "interrupts" in /hypervisor node */
2064 node = fdt_path_offset(kinfo->fdt, "/hypervisor");
2065 if ( node < 0 )
2066 panic("Cannot find the /hypervisor node");
2067
2068 /* Interrupt event channel upcall:
2069 * - Active-low level-sensitive
2070 * - All CPUs
2071 *
2072 * TODO: Handle properly the cpumask
2073 */
2074 set_interrupt_ppi(intr, d->arch.evtchn_irq, 0xf,
2075 IRQ_TYPE_LEVEL_LOW);
2076 res = fdt_setprop_inplace(kinfo->fdt, node, "interrupts",
2077 &intr, sizeof(intr));
2078 if ( res )
2079 panic("Cannot fix up \"interrupts\" property of the hypervisor node");
2080 }
2081
find_gnttab_region(struct domain * d,struct kernel_info * kinfo)2082 static void __init find_gnttab_region(struct domain *d,
2083 struct kernel_info *kinfo)
2084 {
2085 /*
2086 * The region used by Xen on the memory will never be mapped in DOM0
2087 * memory layout. Therefore it can be used for the grant table.
2088 *
2089 * Only use the text section as it's always present and will contain
2090 * enough space for a large grant table
2091 */
2092 kinfo->gnttab_start = __pa(_stext);
2093 kinfo->gnttab_size = gnttab_dom0_frames() << PAGE_SHIFT;
2094
2095 #ifdef CONFIG_ARM_32
2096 /*
2097 * The gnttab region must be under 4GB in order to work with DOM0
2098 * using short page table.
2099 * In practice it's always the case because Xen is always located
2100 * below 4GB, but be safe.
2101 */
2102 BUG_ON((kinfo->gnttab_start + kinfo->gnttab_size) > GB(4));
2103 #endif
2104
2105 printk("Grant table range: %#"PRIpaddr"-%#"PRIpaddr"\n",
2106 kinfo->gnttab_start, kinfo->gnttab_start + kinfo->gnttab_size);
2107 }
2108
construct_dom0(struct domain * d)2109 int construct_dom0(struct domain *d)
2110 {
2111 struct kernel_info kinfo = {};
2112 struct vcpu *saved_current;
2113 int rc, i, cpu;
2114
2115 struct vcpu *v = d->vcpu[0];
2116 struct cpu_user_regs *regs = &v->arch.cpu_info->guest_cpu_user_regs;
2117
2118 /* Sanity! */
2119 BUG_ON(d->domain_id != 0);
2120 BUG_ON(d->vcpu[0] == NULL);
2121 BUG_ON(v->is_initialised);
2122
2123 printk("*** LOADING DOMAIN 0 ***\n");
2124 if ( dom0_mem <= 0 )
2125 {
2126 warning_add("PLEASE SPECIFY dom0_mem PARAMETER - USING 512M FOR NOW\n");
2127 dom0_mem = MB(512);
2128 }
2129
2130
2131 iommu_hwdom_init(d);
2132
2133 d->max_pages = ~0U;
2134
2135 kinfo.unassigned_mem = dom0_mem;
2136
2137 rc = kernel_probe(&kinfo);
2138 if ( rc < 0 )
2139 return rc;
2140
2141 #ifdef CONFIG_ARM_64
2142 /* if aarch32 mode is not supported at EL1 do not allow 32-bit domain */
2143 if ( !(cpu_has_el1_32) && kinfo.type == DOMAIN_32BIT )
2144 {
2145 printk("Platform does not support 32-bit domain\n");
2146 return -EINVAL;
2147 }
2148 d->arch.type = kinfo.type;
2149
2150 if ( is_64bit_domain(d) )
2151 vcpu_switch_to_aarch64_mode(v);
2152
2153 #endif
2154
2155 allocate_memory(d, &kinfo);
2156 find_gnttab_region(d, &kinfo);
2157
2158 if ( acpi_disabled )
2159 rc = prepare_dtb(d, &kinfo);
2160 else
2161 rc = prepare_acpi(d, &kinfo);
2162
2163 if ( rc < 0 )
2164 return rc;
2165
2166 /* Map extra GIC MMIO, irqs and other hw stuffs to dom0. */
2167 rc = gic_map_hwdom_extra_mappings(d);
2168 if ( rc < 0 )
2169 return rc;
2170
2171 rc = platform_specific_mapping(d);
2172 if ( rc < 0 )
2173 return rc;
2174
2175 /*
2176 * The following loads use the domain's p2m and require current to
2177 * be a vcpu of the domain, temporarily switch
2178 */
2179 saved_current = current;
2180 p2m_restore_state(v);
2181 set_current(v);
2182
2183 /*
2184 * kernel_load will determine the placement of the kernel as well
2185 * as the initrd & fdt in RAM, so call it first.
2186 */
2187 kernel_load(&kinfo);
2188 /* initrd_load will fix up the fdt, so call it before dtb_load */
2189 initrd_load(&kinfo);
2190 /* Allocate the event channel IRQ and fix up the device tree */
2191 evtchn_fixup(d, &kinfo);
2192 dtb_load(&kinfo);
2193
2194 /* Now that we are done restore the original p2m and current. */
2195 set_current(saved_current);
2196 p2m_restore_state(saved_current);
2197
2198 discard_initial_modules();
2199
2200 memset(regs, 0, sizeof(*regs));
2201
2202 regs->pc = (register_t)kinfo.entry;
2203
2204 if ( is_32bit_domain(d) )
2205 {
2206 regs->cpsr = PSR_GUEST32_INIT;
2207
2208 /* FROM LINUX head.S
2209 *
2210 * Kernel startup entry point.
2211 * ---------------------------
2212 *
2213 * This is normally called from the decompressor code. The requirements
2214 * are: MMU = off, D-cache = off, I-cache = dont care, r0 = 0,
2215 * r1 = machine nr, r2 = atags or dtb pointer.
2216 *...
2217 */
2218 regs->r0 = 0; /* SBZ */
2219 regs->r1 = 0xffffffff; /* We use DTB therefore no machine id */
2220 regs->r2 = kinfo.dtb_paddr;
2221 }
2222 #ifdef CONFIG_ARM_64
2223 else
2224 {
2225 regs->cpsr = PSR_GUEST64_INIT;
2226 /* From linux/Documentation/arm64/booting.txt */
2227 regs->x0 = kinfo.dtb_paddr;
2228 regs->x1 = 0; /* Reserved for future use */
2229 regs->x2 = 0; /* Reserved for future use */
2230 regs->x3 = 0; /* Reserved for future use */
2231 }
2232 #endif
2233
2234 for ( i = 1, cpu = 0; i < d->max_vcpus; i++ )
2235 {
2236 cpu = cpumask_cycle(cpu, &cpu_online_map);
2237 if ( alloc_vcpu(d, i, cpu) == NULL )
2238 {
2239 printk("Failed to allocate dom0 vcpu %d on pcpu %d\n", i, cpu);
2240 break;
2241 }
2242
2243 if ( is_64bit_domain(d) )
2244 vcpu_switch_to_aarch64_mode(d->vcpu[i]);
2245 }
2246
2247 v->is_initialised = 1;
2248 clear_bit(_VPF_down, &v->pause_flags);
2249
2250 return 0;
2251 }
2252
2253 /*
2254 * Local variables:
2255 * mode: C
2256 * c-file-style: "BSD"
2257 * c-basic-offset: 4
2258 * indent-tabs-mode: nil
2259 * End:
2260 */
2261