1 #include <xen/init.h>
2 #include <xen/compile.h>
3 #include <xen/lib.h>
4 #include <xen/mm.h>
5 #include <xen/domain_page.h>
6 #include <xen/sched.h>
7 #include <asm/irq.h>
8 #include <asm/regs.h>
9 #include <xen/errno.h>
10 #include <xen/device_tree.h>
11 #include <xen/libfdt/libfdt.h>
12 #include <xen/guest_access.h>
13 #include <xen/iocap.h>
14 #include <xen/acpi.h>
15 #include <xen/warning.h>
16 #include <acpi/actables.h>
17 #include <asm/device.h>
18 #include <asm/setup.h>
19 #include <asm/platform.h>
20 #include <asm/psci.h>
21 #include <asm/setup.h>
22 #include <asm/cpufeature.h>
23 
24 #include <asm/gic.h>
25 #include <xen/irq.h>
26 #include <xen/grant_table.h>
27 #include "kernel.h"
28 
29 static unsigned int __initdata opt_dom0_max_vcpus;
30 integer_param("dom0_max_vcpus", opt_dom0_max_vcpus);
31 
32 int dom0_11_mapping = 1;
33 
34 static u64 __initdata dom0_mem;
35 
parse_dom0_mem(const char * s)36 static int __init parse_dom0_mem(const char *s)
37 {
38     dom0_mem = parse_size_and_unit(s, &s);
39 
40     return *s ? -EINVAL : 0;
41 }
42 custom_param("dom0_mem", parse_dom0_mem);
43 
44 struct map_range_data
45 {
46     struct domain *d;
47     p2m_type_t p2mt;
48 };
49 
50 /* Override macros from asm/page.h to make them work with mfn_t */
51 #undef virt_to_mfn
52 #define virt_to_mfn(va) _mfn(__virt_to_mfn(va))
53 
54 //#define DEBUG_11_ALLOCATION
55 #ifdef DEBUG_11_ALLOCATION
56 # define D11PRINT(fmt, args...) printk(XENLOG_DEBUG fmt, ##args)
57 #else
58 # define D11PRINT(fmt, args...) do {} while ( 0 )
59 #endif
60 
61 /*
62  * Amount of extra space required to dom0's device tree.  No new nodes
63  * are added (yet) but one terminating reserve map entry (16 bytes) is
64  * added.
65  */
66 #define DOM0_FDT_EXTRA_SIZE (128 + sizeof(struct fdt_reserve_entry))
67 
alloc_dom0_vcpu0(struct domain * dom0)68 struct vcpu *__init alloc_dom0_vcpu0(struct domain *dom0)
69 {
70     if ( opt_dom0_max_vcpus == 0 )
71         opt_dom0_max_vcpus = num_online_cpus();
72     if ( opt_dom0_max_vcpus > MAX_VIRT_CPUS )
73         opt_dom0_max_vcpus = MAX_VIRT_CPUS;
74 
75     dom0->vcpu = xzalloc_array(struct vcpu *, opt_dom0_max_vcpus);
76     if ( !dom0->vcpu )
77         return NULL;
78     dom0->max_vcpus = opt_dom0_max_vcpus;
79 
80     return alloc_vcpu(dom0, 0, 0);
81 }
82 
get_11_allocation_size(paddr_t size)83 static unsigned int get_11_allocation_size(paddr_t size)
84 {
85     /*
86      * get_order_from_bytes returns the order greater than or equal to
87      * the given size, but we need less than or equal. Adding one to
88      * the size pushes an evenly aligned size into the next order, so
89      * we can then unconditionally subtract 1 from the order which is
90      * returned.
91      */
92     return get_order_from_bytes(size + 1) - 1;
93 }
94 
95 /*
96  * Insert the given pages into a memory bank, banks are ordered by address.
97  *
98  * Returns false if the memory would be below bank 0 or we have run
99  * out of banks. In this case it will free the pages.
100  */
insert_11_bank(struct domain * d,struct kernel_info * kinfo,struct page_info * pg,unsigned int order)101 static bool insert_11_bank(struct domain *d,
102                            struct kernel_info *kinfo,
103                            struct page_info *pg,
104                            unsigned int order)
105 {
106     int res, i;
107     paddr_t spfn;
108     paddr_t start, size;
109 
110     spfn = page_to_mfn(pg);
111     start = pfn_to_paddr(spfn);
112     size = pfn_to_paddr((1 << order));
113 
114     D11PRINT("Allocated %#"PRIpaddr"-%#"PRIpaddr" (%ldMB/%ldMB, order %d)\n",
115              start, start + size,
116              1UL << (order+PAGE_SHIFT-20),
117              /* Don't want format this as PRIpaddr (16 digit hex) */
118              (unsigned long)(kinfo->unassigned_mem >> 20),
119              order);
120 
121     if ( kinfo->mem.nr_banks > 0 &&
122          size < MB(128) &&
123          start + size < kinfo->mem.bank[0].start )
124     {
125         D11PRINT("Allocation below bank 0 is too small, not using\n");
126         goto fail;
127     }
128 
129     res = guest_physmap_add_page(d, _gfn(spfn), _mfn(spfn), order);
130     if ( res )
131         panic("Failed map pages to DOM0: %d", res);
132 
133     kinfo->unassigned_mem -= size;
134 
135     if ( kinfo->mem.nr_banks == 0 )
136     {
137         kinfo->mem.bank[0].start = start;
138         kinfo->mem.bank[0].size = size;
139         kinfo->mem.nr_banks = 1;
140         return true;
141     }
142 
143     for( i = 0; i < kinfo->mem.nr_banks; i++ )
144     {
145         struct membank *bank = &kinfo->mem.bank[i];
146 
147         /* If possible merge new memory into the start of the bank */
148         if ( bank->start == start+size )
149         {
150             bank->start = start;
151             bank->size += size;
152             return true;
153         }
154 
155         /* If possible merge new memory onto the end of the bank */
156         if ( start == bank->start + bank->size )
157         {
158             bank->size += size;
159             return true;
160         }
161 
162         /*
163          * Otherwise if it is below this bank insert new memory in a
164          * new bank before this one. If there was a lower bank we
165          * could have inserted the memory into/before we would already
166          * have done so, so this must be the right place.
167          */
168         if ( start + size < bank->start && kinfo->mem.nr_banks < NR_MEM_BANKS )
169         {
170             memmove(bank + 1, bank, sizeof(*bank)*(kinfo->mem.nr_banks - i));
171             kinfo->mem.nr_banks++;
172             bank->start = start;
173             bank->size = size;
174             return true;
175         }
176     }
177 
178     if ( i == kinfo->mem.nr_banks && kinfo->mem.nr_banks < NR_MEM_BANKS )
179     {
180         struct membank *bank = &kinfo->mem.bank[kinfo->mem.nr_banks];
181 
182         bank->start = start;
183         bank->size = size;
184         kinfo->mem.nr_banks++;
185         return true;
186     }
187 
188     /* If we get here then there are no more banks to fill. */
189 
190 fail:
191     free_domheap_pages(pg, order);
192     return false;
193 }
194 
195 /*
196  * This is all pretty horrible.
197  *
198  * Requirements:
199  *
200  * 1. The dom0 kernel should be loaded within the first 128MB of RAM. This
201  *    is necessary at least for Linux zImage kernels, which are all we
202  *    support today.
203  * 2. We want to put the dom0 kernel, ramdisk and DTB in the same
204  *    bank. Partly this is just easier for us to deal with, but also
205  *    the ramdisk and DTB must be placed within a certain proximity of
206  *    the kernel within RAM.
207  * 3. For dom0 we want to place as much of the RAM as we reasonably can
208  *    below 4GB, so that it can be used by non-LPAE enabled kernels (32-bit)
209  *    or when a device assigned to dom0 can only do 32-bit DMA access.
210  * 4. For 32-bit dom0 the kernel must be located below 4GB.
211  * 5. We want to have a few largers banks rather than many smaller ones.
212  *
213  * For the first two requirements we need to make sure that the lowest
214  * bank is sufficiently large.
215  *
216  * For convenience we also sort the banks by physical address.
217  *
218  * The memory allocator does not really give us the flexibility to
219  * meet these requirements directly. So instead of proceed as follows:
220  *
221  * We first allocate the largest allocation we can as low as we
222  * can. This then becomes the first bank. This bank must be at least
223  * 128MB (or dom0_mem if that is smaller).
224  *
225  * Then we start allocating more memory, trying to allocate the
226  * largest possible size and trying smaller sizes until we
227  * successfully allocate something.
228  *
229  * We then try and insert this memory in to the list of banks. If it
230  * can be merged into an existing bank then this is trivial.
231  *
232  * If the new memory is before the first bank (and cannot be merged into it)
233  * and is at least 128M then we allow it, otherwise we give up. Since the
234  * allocator prefers to allocate high addresses first and the first bank has
235  * already been allocated to be as low as possible this likely means we
236  * wouldn't have been able to allocate much more memory anyway.
237  *
238  * Otherwise we insert a new bank. If we've reached MAX_NR_BANKS then
239  * we give up.
240  *
241  * For 32-bit domain we require that the initial allocation for the
242  * first bank is under 4G. For 64-bit domain, the first bank is preferred
243  * to be allocated under 4G. Then for the subsequent allocations we
244  * initially allocate memory only from below 4GB. Once that runs out
245  * (as described above) we allow higher allocations and continue until
246  * that runs out (or we have allocated sufficient dom0 memory).
247  */
allocate_memory(struct domain * d,struct kernel_info * kinfo)248 static void allocate_memory(struct domain *d, struct kernel_info *kinfo)
249 {
250     const unsigned int min_low_order =
251         get_order_from_bytes(min_t(paddr_t, dom0_mem, MB(128)));
252     const unsigned int min_order = get_order_from_bytes(MB(4));
253     struct page_info *pg;
254     unsigned int order = get_11_allocation_size(kinfo->unassigned_mem);
255     int i;
256 
257     bool lowmem = true;
258     unsigned int bits;
259 
260     /*
261      * TODO: Implement memory bank allocation when DOM0 is not direct
262      * mapped
263      */
264     BUG_ON(!dom0_11_mapping);
265 
266     printk("Allocating 1:1 mappings totalling %ldMB for dom0:\n",
267            /* Don't want format this as PRIpaddr (16 digit hex) */
268            (unsigned long)(kinfo->unassigned_mem >> 20));
269 
270     kinfo->mem.nr_banks = 0;
271 
272     /*
273      * First try and allocate the largest thing we can as low as
274      * possible to be bank 0.
275      */
276     while ( order >= min_low_order )
277     {
278         for ( bits = order ; bits <= (lowmem ? 32 : PADDR_BITS); bits++ )
279         {
280             pg = alloc_domheap_pages(d, order, MEMF_bits(bits));
281             if ( pg != NULL )
282             {
283                 if ( !insert_11_bank(d, kinfo, pg, order) )
284                     BUG(); /* Cannot fail for first bank */
285 
286                 goto got_bank0;
287             }
288         }
289         order--;
290     }
291 
292     /* Failed to allocate bank0 under 4GB */
293     if ( is_32bit_domain(d) )
294         panic("Unable to allocate first memory bank.");
295 
296     /* Try to allocate memory from above 4GB */
297     printk(XENLOG_INFO "No bank has been allocated below 4GB.\n");
298     lowmem = false;
299 
300  got_bank0:
301 
302     /*
303      * If we failed to allocate bank0 under 4GB, continue allocating
304      * memory from above 4GB and fill in banks.
305      */
306     order = get_11_allocation_size(kinfo->unassigned_mem);
307     while ( kinfo->unassigned_mem && kinfo->mem.nr_banks < NR_MEM_BANKS )
308     {
309         pg = alloc_domheap_pages(d, order, lowmem ? MEMF_bits(32) : 0);
310         if ( !pg )
311         {
312             order --;
313 
314             if ( lowmem && order < min_low_order)
315             {
316                 D11PRINT("Failed at min_low_order, allow high allocations\n");
317                 order = get_11_allocation_size(kinfo->unassigned_mem);
318                 lowmem = false;
319                 continue;
320             }
321             if ( order >= min_order )
322                 continue;
323 
324             /* No more we can do */
325             break;
326         }
327 
328         if ( !insert_11_bank(d, kinfo, pg, order) )
329         {
330             if ( kinfo->mem.nr_banks == NR_MEM_BANKS )
331                 /* Nothing more we can do. */
332                 break;
333 
334             if ( lowmem )
335             {
336                 D11PRINT("Allocation below bank 0, allow high allocations\n");
337                 order = get_11_allocation_size(kinfo->unassigned_mem);
338                 lowmem = false;
339                 continue;
340             }
341             else
342             {
343                 D11PRINT("Allocation below bank 0\n");
344                 break;
345             }
346         }
347 
348         /*
349          * Success, next time around try again to get the largest order
350          * allocation possible.
351          */
352         order = get_11_allocation_size(kinfo->unassigned_mem);
353     }
354 
355     if ( kinfo->unassigned_mem )
356         printk("WARNING: Failed to allocate requested dom0 memory."
357                /* Don't want format this as PRIpaddr (16 digit hex) */
358                " %ldMB unallocated\n",
359                (unsigned long)kinfo->unassigned_mem >> 20);
360 
361     for( i = 0; i < kinfo->mem.nr_banks; i++ )
362     {
363         printk("BANK[%d] %#"PRIpaddr"-%#"PRIpaddr" (%ldMB)\n",
364                i,
365                kinfo->mem.bank[i].start,
366                kinfo->mem.bank[i].start + kinfo->mem.bank[i].size,
367                /* Don't want format this as PRIpaddr (16 digit hex) */
368                (unsigned long)(kinfo->mem.bank[i].size >> 20));
369     }
370 }
371 
write_properties(struct domain * d,struct kernel_info * kinfo,const struct dt_device_node * node)372 static int write_properties(struct domain *d, struct kernel_info *kinfo,
373                             const struct dt_device_node *node)
374 {
375     const char *bootargs = NULL;
376     const struct dt_property *prop, *status = NULL;
377     int res = 0;
378     int had_dom0_bootargs = 0;
379 
380     const struct bootmodule *kernel = kinfo->kernel_bootmodule;
381 
382     if ( kernel && kernel->cmdline[0] )
383         bootargs = &kernel->cmdline[0];
384 
385     dt_for_each_property_node (node, prop)
386     {
387         const void *prop_data = prop->value;
388         u32 prop_len = prop->length;
389 
390         /*
391          * In chosen node:
392          *
393          * * remember xen,dom0-bootargs if we don't already have
394          *   bootargs (from module #1, above).
395          * * remove bootargs,  xen,dom0-bootargs, xen,xen-bootargs,
396          *   linux,initrd-start and linux,initrd-end.
397          * * remove stdout-path.
398          * * remove bootargs, linux,uefi-system-table,
399          *   linux,uefi-mmap-start, linux,uefi-mmap-size,
400          *   linux,uefi-mmap-desc-size, and linux,uefi-mmap-desc-ver
401          *   (since EFI boot is not currently supported in dom0).
402          */
403         if ( dt_node_path_is_equal(node, "/chosen") )
404         {
405             if ( dt_property_name_is_equal(prop, "xen,xen-bootargs") ||
406                  dt_property_name_is_equal(prop, "linux,initrd-start") ||
407                  dt_property_name_is_equal(prop, "linux,initrd-end") ||
408                  dt_property_name_is_equal(prop, "stdout-path") ||
409                  dt_property_name_is_equal(prop, "linux,uefi-system-table") ||
410                  dt_property_name_is_equal(prop, "linux,uefi-mmap-start") ||
411                  dt_property_name_is_equal(prop, "linux,uefi-mmap-size") ||
412                  dt_property_name_is_equal(prop, "linux,uefi-mmap-desc-size") ||
413                  dt_property_name_is_equal(prop, "linux,uefi-mmap-desc-ver"))
414                 continue;
415 
416             if ( dt_property_name_is_equal(prop, "xen,dom0-bootargs") )
417             {
418                 had_dom0_bootargs = 1;
419                 bootargs = prop->value;
420                 continue;
421             }
422             if ( dt_property_name_is_equal(prop, "bootargs") )
423             {
424                 if ( !bootargs  && !had_dom0_bootargs )
425                     bootargs = prop->value;
426                 continue;
427             }
428         }
429 
430         /* Don't expose the property "xen,passthrough" to the guest */
431         if ( dt_property_name_is_equal(prop, "xen,passthrough") )
432             continue;
433 
434         /* Remember and skip the status property as Xen may modify it later */
435         if ( dt_property_name_is_equal(prop, "status") )
436         {
437             status = prop;
438             continue;
439         }
440 
441         res = fdt_property(kinfo->fdt, prop->name, prop_data, prop_len);
442 
443         if ( res )
444             return res;
445     }
446 
447     /*
448      * Override the property "status" to disable the device when it's
449      * marked for passthrough.
450      */
451     if ( dt_device_for_passthrough(node) )
452         res = fdt_property_string(kinfo->fdt, "status", "disabled");
453     else if ( status )
454         res = fdt_property(kinfo->fdt, "status", status->value,
455                            status->length);
456 
457     if ( res )
458         return res;
459 
460     if ( dt_node_path_is_equal(node, "/chosen") )
461     {
462         const struct bootmodule *initrd = kinfo->initrd_bootmodule;
463 
464         if ( bootargs )
465         {
466             res = fdt_property(kinfo->fdt, "bootargs", bootargs,
467                                strlen(bootargs) + 1);
468             if ( res )
469                 return res;
470         }
471 
472         /*
473          * If the bootloader provides an initrd, we must create a placeholder
474          * for the initrd properties. The values will be replaced later.
475          */
476         if ( initrd && initrd->size )
477         {
478             u64 a = 0;
479             res = fdt_property(kinfo->fdt, "linux,initrd-start", &a, sizeof(a));
480             if ( res )
481                 return res;
482 
483             res = fdt_property(kinfo->fdt, "linux,initrd-end", &a, sizeof(a));
484             if ( res )
485                 return res;
486         }
487     }
488 
489     return 0;
490 }
491 
492 /*
493  * Helper to write an interrupts with the GIC format
494  * This code is assuming the irq is an PPI.
495  */
496 
497 typedef __be32 gic_interrupt_t[3];
498 
set_interrupt_ppi(gic_interrupt_t interrupt,unsigned int irq,unsigned int cpumask,unsigned int level)499 static void set_interrupt_ppi(gic_interrupt_t interrupt, unsigned int irq,
500                               unsigned int cpumask, unsigned int level)
501 {
502     __be32 *cells = interrupt;
503 
504     BUG_ON(irq < 16 && irq >= 32);
505 
506     /* See linux Documentation/devictree/bindings/arm/gic.txt */
507     dt_set_cell(&cells, 1, 1); /* is a PPI */
508     dt_set_cell(&cells, 1, irq - 16); /* PPIs start at 16 */
509     dt_set_cell(&cells, 1, (cpumask << 8) | level);
510 }
511 
512 /*
513  * Helper to set interrupts for a node in the flat device tree.
514  * It needs 2 property:
515  *  "interrupts": contains the list of interrupts
516  *  "interrupt-parent": link to the GIC
517  */
fdt_property_interrupts(void * fdt,gic_interrupt_t * intr,unsigned num_irq)518 static int fdt_property_interrupts(void *fdt, gic_interrupt_t *intr,
519                                    unsigned num_irq)
520 {
521     int res;
522 
523     res = fdt_property(fdt, "interrupts", intr, sizeof (intr[0]) * num_irq);
524     if ( res )
525         return res;
526 
527     res = fdt_property_cell(fdt, "interrupt-parent",
528                             dt_interrupt_controller->phandle);
529 
530     return res;
531 }
532 
make_memory_node(const struct domain * d,void * fdt,const struct dt_device_node * parent,const struct kernel_info * kinfo)533 static int make_memory_node(const struct domain *d,
534                             void *fdt,
535                             const struct dt_device_node *parent,
536                             const struct kernel_info *kinfo)
537 {
538     int res, i;
539     int reg_size = dt_child_n_addr_cells(parent) + dt_child_n_size_cells(parent);
540     int nr_cells = reg_size*kinfo->mem.nr_banks;
541     __be32 reg[nr_cells];
542     __be32 *cells;
543 
544     dt_dprintk("Create memory node (reg size %d, nr cells %d)\n",
545                reg_size, nr_cells);
546 
547     /* ePAPR 3.4 */
548     res = fdt_begin_node(fdt, "memory");
549     if ( res )
550         return res;
551 
552     res = fdt_property_string(fdt, "device_type", "memory");
553     if ( res )
554         return res;
555 
556     cells = &reg[0];
557     for ( i = 0 ; i < kinfo->mem.nr_banks; i++ )
558     {
559         u64 start = kinfo->mem.bank[i].start;
560         u64 size = kinfo->mem.bank[i].size;
561 
562         dt_dprintk("  Bank %d: %#"PRIx64"->%#"PRIx64"\n",
563                    i, start, start + size);
564 
565         dt_child_set_range(&cells, parent, start, size);
566     }
567 
568     res = fdt_property(fdt, "reg", reg, sizeof(reg));
569     if ( res )
570         return res;
571 
572     res = fdt_end_node(fdt);
573 
574     return res;
575 }
576 
make_hypervisor_node(const struct kernel_info * kinfo,const struct dt_device_node * parent)577 static int make_hypervisor_node(const struct kernel_info *kinfo,
578                                 const struct dt_device_node *parent)
579 {
580     const char compat[] =
581         "xen,xen-"__stringify(XEN_VERSION)"."__stringify(XEN_SUBVERSION)"\0"
582         "xen,xen";
583     __be32 reg[4];
584     gic_interrupt_t intr;
585     __be32 *cells;
586     int res;
587     /* Convenience alias */
588     int addrcells = dt_child_n_addr_cells(parent);
589     int sizecells = dt_child_n_size_cells(parent);
590     void *fdt = kinfo->fdt;
591 
592     dt_dprintk("Create hypervisor node\n");
593 
594     /*
595      * Sanity-check address sizes, since addresses and sizes which do
596      * not take up exactly 4 or 8 bytes are not supported.
597      */
598     if ((addrcells != 1 && addrcells != 2) ||
599         (sizecells != 1 && sizecells != 2))
600         panic("Cannot cope with this size");
601 
602     /* See linux Documentation/devicetree/bindings/arm/xen.txt */
603     res = fdt_begin_node(fdt, "hypervisor");
604     if ( res )
605         return res;
606 
607     /* Cannot use fdt_property_string due to embedded nulls */
608     res = fdt_property(fdt, "compatible", compat, sizeof(compat));
609     if ( res )
610         return res;
611 
612     /* reg 0 is grant table space */
613     cells = &reg[0];
614     dt_child_set_range(&cells, parent, kinfo->gnttab_start, kinfo->gnttab_size);
615     res = fdt_property(fdt, "reg", reg,
616                        dt_cells_to_size(addrcells + sizecells));
617     if ( res )
618         return res;
619 
620     /*
621      * Placeholder for the event channel interrupt.  The values will be
622      * replaced later.
623      */
624     set_interrupt_ppi(intr, ~0, 0xf, IRQ_TYPE_INVALID);
625     res = fdt_property_interrupts(fdt, &intr, 1);
626     if ( res )
627         return res;
628 
629     res = fdt_end_node(fdt);
630 
631     return res;
632 }
633 
make_psci_node(void * fdt,const struct dt_device_node * parent)634 static int make_psci_node(void *fdt, const struct dt_device_node *parent)
635 {
636     int res;
637     const char compat[] =
638         "arm,psci-0.2""\0"
639         "arm,psci";
640 
641     dt_dprintk("Create PSCI node\n");
642 
643     /* See linux Documentation/devicetree/bindings/arm/psci.txt */
644     res = fdt_begin_node(fdt, "psci");
645     if ( res )
646         return res;
647 
648     res = fdt_property(fdt, "compatible", compat, sizeof(compat));
649     if ( res )
650         return res;
651 
652     res = fdt_property_string(fdt, "method", "hvc");
653     if ( res )
654         return res;
655 
656     res = fdt_property_cell(fdt, "cpu_off", PSCI_cpu_off);
657     if ( res )
658         return res;
659 
660     res = fdt_property_cell(fdt, "cpu_on", PSCI_cpu_on);
661     if ( res )
662         return res;
663 
664     res = fdt_end_node(fdt);
665 
666     return res;
667 }
668 
make_cpus_node(const struct domain * d,void * fdt,const struct dt_device_node * parent)669 static int make_cpus_node(const struct domain *d, void *fdt,
670                           const struct dt_device_node *parent)
671 {
672     int res;
673     const struct dt_device_node *cpus = dt_find_node_by_path("/cpus");
674     const struct dt_device_node *npcpu;
675     unsigned int cpu;
676     const void *compatible = NULL;
677     u32 len;
678     /* Placeholder for cpu@ + a 32-bit number + \0 */
679     char buf[15];
680     u32 clock_frequency;
681     bool clock_valid;
682     uint64_t mpidr_aff;
683 
684     dt_dprintk("Create cpus node\n");
685 
686     if ( !cpus )
687     {
688         dprintk(XENLOG_ERR, "Missing /cpus node in the device tree?\n");
689         return -ENOENT;
690     }
691 
692     /*
693      * Get the compatible property of CPUs from the device tree.
694      * We are assuming that all CPUs are the same so we are just look
695      * for the first one.
696      * TODO: Handle compatible per VCPU
697      */
698     dt_for_each_child_node(cpus, npcpu)
699     {
700         if ( dt_device_type_is_equal(npcpu, "cpu") )
701         {
702             compatible = dt_get_property(npcpu, "compatible", &len);
703             clock_valid = dt_property_read_u32(npcpu, "clock-frequency",
704                                             &clock_frequency);
705             break;
706         }
707     }
708 
709     if ( !compatible )
710     {
711         dprintk(XENLOG_ERR, "Can't find cpu in the device tree?\n");
712         return -ENOENT;
713     }
714 
715     /* See Linux Documentation/devicetree/booting-without-of.txt
716      * section III.5.b
717      */
718     res = fdt_begin_node(fdt, "cpus");
719     if ( res )
720         return res;
721 
722     res = fdt_property_cell(fdt, "#address-cells", 1);
723     if ( res )
724         return res;
725 
726     res = fdt_property_cell(fdt, "#size-cells", 0);
727     if ( res )
728         return res;
729 
730     for ( cpu = 0; cpu < d->max_vcpus; cpu++ )
731     {
732         /*
733          * According to ARM CPUs bindings, the reg field should match
734          * the MPIDR's affinity bits. We will use AFF0 and AFF1 when
735          * constructing the reg value of the guest at the moment, for it
736          * is enough for the current max vcpu number.
737          */
738         mpidr_aff = vcpuid_to_vaffinity(cpu);
739         dt_dprintk("Create cpu@%"PRIx64" (logical CPUID: %d) node\n",
740                    mpidr_aff, cpu);
741 
742         snprintf(buf, sizeof(buf), "cpu@%"PRIx64, mpidr_aff);
743         res = fdt_begin_node(fdt, buf);
744         if ( res )
745             return res;
746 
747         res = fdt_property(fdt, "compatible", compatible, len);
748         if ( res )
749             return res;
750 
751         res = fdt_property_string(fdt, "device_type", "cpu");
752         if ( res )
753             return res;
754 
755         res = fdt_property_cell(fdt, "reg", mpidr_aff);
756         if ( res )
757             return res;
758 
759         if ( clock_valid )
760         {
761             res = fdt_property_cell(fdt, "clock-frequency", clock_frequency);
762             if ( res )
763                 return res;
764         }
765 
766         if ( is_64bit_domain(d) )
767         {
768             res = fdt_property_string(fdt, "enable-method", "psci");
769             if ( res )
770                 return res;
771         }
772 
773         res = fdt_end_node(fdt);
774         if ( res )
775             return res;
776     }
777 
778     res = fdt_end_node(fdt);
779 
780     return res;
781 }
782 
make_gic_node(const struct domain * d,void * fdt,const struct dt_device_node * node)783 static int make_gic_node(const struct domain *d, void *fdt,
784                          const struct dt_device_node *node)
785 {
786     const struct dt_device_node *gic = dt_interrupt_controller;
787     int res = 0;
788     const void *addrcells, *sizecells;
789     u32 addrcells_len, sizecells_len;
790 
791     /*
792      * Xen currently supports only a single GIC. Discard any secondary
793      * GIC entries.
794      */
795     if ( node != dt_interrupt_controller )
796     {
797         dt_dprintk("  Skipping (secondary GIC)\n");
798         return 0;
799     }
800 
801     dt_dprintk("Create gic node\n");
802 
803     res = fdt_begin_node(fdt, "interrupt-controller");
804     if ( res )
805         return res;
806 
807     /*
808      * The value of the property "phandle" in the property "interrupts"
809      * to know on which interrupt controller the interrupt is wired.
810      */
811     if ( gic->phandle )
812     {
813         dt_dprintk("  Set phandle = 0x%x\n", gic->phandle);
814         res = fdt_property_cell(fdt, "phandle", gic->phandle);
815         if ( res )
816             return res;
817     }
818 
819     addrcells = dt_get_property(gic, "#address-cells", &addrcells_len);
820     if ( addrcells )
821     {
822         res = fdt_property(fdt, "#address-cells", addrcells, addrcells_len);
823         if ( res )
824             return res;
825     }
826 
827     sizecells = dt_get_property(gic, "#size-cells", &sizecells_len);
828     if ( sizecells )
829     {
830         res = fdt_property(fdt, "#size-cells", sizecells, sizecells_len);
831         if ( res )
832             return res;
833     }
834 
835     res = fdt_property_cell(fdt, "#interrupt-cells", 3);
836     if ( res )
837         return res;
838 
839     res = fdt_property(fdt, "interrupt-controller", NULL, 0);
840     if ( res )
841         return res;
842 
843     res = gic_make_hwdom_dt_node(d, node, fdt);
844     if ( res )
845         return res;
846 
847     res = fdt_end_node(fdt);
848 
849     return res;
850 }
851 
make_timer_node(const struct domain * d,void * fdt,const struct dt_device_node * node)852 static int make_timer_node(const struct domain *d, void *fdt,
853                            const struct dt_device_node *node)
854 {
855     static const struct dt_device_match timer_ids[] __initconst =
856     {
857         DT_MATCH_COMPATIBLE("arm,armv7-timer"),
858         DT_MATCH_COMPATIBLE("arm,armv8-timer"),
859         { /* sentinel */ },
860     };
861     struct dt_device_node *dev;
862     u32 len;
863     const void *compatible;
864     int res;
865     unsigned int irq;
866     gic_interrupt_t intrs[3];
867     u32 clock_frequency;
868     bool clock_valid;
869 
870     dt_dprintk("Create timer node\n");
871 
872     dev = dt_find_matching_node(NULL, timer_ids);
873     if ( !dev )
874     {
875         dprintk(XENLOG_ERR, "Missing timer node in the device tree?\n");
876         return -FDT_ERR_XEN(ENOENT);
877     }
878 
879     compatible = dt_get_property(dev, "compatible", &len);
880     if ( !compatible )
881     {
882         dprintk(XENLOG_ERR, "Can't find compatible property for timer node\n");
883         return -FDT_ERR_XEN(ENOENT);
884     }
885 
886     res = fdt_begin_node(fdt, "timer");
887     if ( res )
888         return res;
889 
890     res = fdt_property(fdt, "compatible", compatible, len);
891     if ( res )
892         return res;
893 
894     /* The timer IRQ is emulated by Xen. It always exposes an active-low
895      * level-sensitive interrupt */
896 
897     irq = timer_get_irq(TIMER_PHYS_SECURE_PPI);
898     dt_dprintk("  Secure interrupt %u\n", irq);
899     set_interrupt_ppi(intrs[0], irq, 0xf, IRQ_TYPE_LEVEL_LOW);
900 
901     irq = timer_get_irq(TIMER_PHYS_NONSECURE_PPI);
902     dt_dprintk("  Non secure interrupt %u\n", irq);
903     set_interrupt_ppi(intrs[1], irq, 0xf, IRQ_TYPE_LEVEL_LOW);
904 
905     irq = timer_get_irq(TIMER_VIRT_PPI);
906     dt_dprintk("  Virt interrupt %u\n", irq);
907     set_interrupt_ppi(intrs[2], irq, 0xf, IRQ_TYPE_LEVEL_LOW);
908 
909     res = fdt_property_interrupts(fdt, intrs, 3);
910     if ( res )
911         return res;
912 
913     clock_valid = dt_property_read_u32(dev, "clock-frequency",
914                                        &clock_frequency);
915     if ( clock_valid )
916     {
917         res = fdt_property_cell(fdt, "clock-frequency", clock_frequency);
918         if ( res )
919             return res;
920     }
921 
922     res = fdt_end_node(fdt);
923 
924     return res;
925 }
926 
map_irq_to_domain(struct domain * d,unsigned int irq,bool need_mapping,const char * devname)927 static int map_irq_to_domain(struct domain *d, unsigned int irq,
928                              bool need_mapping, const char *devname)
929 
930 {
931     int res;
932 
933     res = irq_permit_access(d, irq);
934     if ( res )
935     {
936         printk(XENLOG_ERR "Unable to permit to dom%u access to IRQ %u\n",
937                d->domain_id, irq);
938         return res;
939     }
940 
941     if ( need_mapping )
942     {
943         /*
944          * Checking the return of vgic_reserve_virq is not
945          * necessary. It should not fail except when we try to map
946          * the IRQ twice. This can legitimately happen if the IRQ is shared
947          */
948         vgic_reserve_virq(d, irq);
949 
950         res = route_irq_to_guest(d, irq, irq, devname);
951         if ( res < 0 )
952         {
953             printk(XENLOG_ERR "Unable to map IRQ%"PRId32" to dom%d\n",
954                    irq, d->domain_id);
955             return res;
956         }
957     }
958 
959     dt_dprintk("  - IRQ: %u\n", irq);
960     return 0;
961 }
962 
map_dt_irq_to_domain(const struct dt_device_node * dev,const struct dt_irq * dt_irq,void * data)963 static int map_dt_irq_to_domain(const struct dt_device_node *dev,
964                                 const struct dt_irq *dt_irq,
965                                 void *data)
966 {
967     struct domain *d = data;
968     unsigned int irq = dt_irq->irq;
969     int res;
970     bool need_mapping = !dt_device_for_passthrough(dev);
971 
972     if ( irq < NR_LOCAL_IRQS )
973     {
974         printk(XENLOG_ERR "%s: IRQ%"PRId32" is not a SPI\n",
975                dt_node_name(dev), irq);
976         return -EINVAL;
977     }
978 
979     /* Setup the IRQ type */
980     res = irq_set_spi_type(irq, dt_irq->type);
981     if ( res )
982     {
983         printk(XENLOG_ERR
984                "%s: Unable to setup IRQ%"PRId32" to dom%d\n",
985                dt_node_name(dev), irq, d->domain_id);
986         return res;
987     }
988 
989     res = map_irq_to_domain(d, irq, need_mapping, dt_node_name(dev));
990 
991     return 0;
992 }
993 
map_range_to_domain(const struct dt_device_node * dev,u64 addr,u64 len,void * data)994 static int map_range_to_domain(const struct dt_device_node *dev,
995                                u64 addr, u64 len,
996                                void *data)
997 {
998     struct map_range_data *mr_data = data;
999     struct domain *d = mr_data->d;
1000     bool need_mapping = !dt_device_for_passthrough(dev);
1001     int res;
1002 
1003     res = iomem_permit_access(d, paddr_to_pfn(addr),
1004                               paddr_to_pfn(PAGE_ALIGN(addr + len - 1)));
1005     if ( res )
1006     {
1007         printk(XENLOG_ERR "Unable to permit to dom%d access to"
1008                " 0x%"PRIx64" - 0x%"PRIx64"\n",
1009                d->domain_id,
1010                addr & PAGE_MASK, PAGE_ALIGN(addr + len) - 1);
1011         return res;
1012     }
1013 
1014     if ( need_mapping )
1015     {
1016         res = map_regions_p2mt(d,
1017                                gaddr_to_gfn(addr),
1018                                PFN_UP(len),
1019                                maddr_to_mfn(addr),
1020                                mr_data->p2mt);
1021 
1022         if ( res < 0 )
1023         {
1024             printk(XENLOG_ERR "Unable to map 0x%"PRIx64
1025                    " - 0x%"PRIx64" in domain %d\n",
1026                    addr & PAGE_MASK, PAGE_ALIGN(addr + len) - 1,
1027                    d->domain_id);
1028             return res;
1029         }
1030     }
1031 
1032     dt_dprintk("  - MMIO: %010"PRIx64" - %010"PRIx64" P2MType=%x\n",
1033                addr, addr + len, mr_data->p2mt);
1034 
1035     return 0;
1036 }
1037 
1038 /*
1039  * For a node which describes a discoverable bus (such as a PCI bus)
1040  * then we may need to perform additional mappings in order to make
1041  * the child resources available to domain 0.
1042  */
map_device_children(struct domain * d,const struct dt_device_node * dev,p2m_type_t p2mt)1043 static int map_device_children(struct domain *d,
1044                                const struct dt_device_node *dev,
1045                                p2m_type_t p2mt)
1046 {
1047     struct map_range_data mr_data = { .d = d, .p2mt = p2mt };
1048     int ret;
1049 
1050     if ( dt_device_type_is_equal(dev, "pci") )
1051     {
1052         dt_dprintk("Mapping children of %s to guest\n",
1053                    dt_node_full_name(dev));
1054 
1055         ret = dt_for_each_irq_map(dev, &map_dt_irq_to_domain, d);
1056         if ( ret < 0 )
1057             return ret;
1058 
1059         ret = dt_for_each_range(dev, &map_range_to_domain, &mr_data);
1060         if ( ret < 0 )
1061             return ret;
1062     }
1063 
1064     return 0;
1065 }
1066 
1067 /*
1068  * For a given device node:
1069  *  - Give permission to the guest to manage IRQ and MMIO range
1070  *  - Retrieve the IRQ configuration (i.e edge/level) from device tree
1071  * When the device is not marked for guest passthrough:
1072  *  - Assign the device to the guest if it's protected by an IOMMU
1073  *  - Map the IRQs and iomem regions to DOM0
1074  */
handle_device(struct domain * d,struct dt_device_node * dev,p2m_type_t p2mt)1075 static int handle_device(struct domain *d, struct dt_device_node *dev,
1076                          p2m_type_t p2mt)
1077 {
1078     unsigned int nirq;
1079     unsigned int naddr;
1080     unsigned int i;
1081     int res;
1082     struct dt_raw_irq rirq;
1083     u64 addr, size;
1084     bool need_mapping = !dt_device_for_passthrough(dev);
1085 
1086     nirq = dt_number_of_irq(dev);
1087     naddr = dt_number_of_address(dev);
1088 
1089     dt_dprintk("%s passthrough = %d nirq = %d naddr = %u\n",
1090                dt_node_full_name(dev), need_mapping, nirq, naddr);
1091 
1092     if ( dt_device_is_protected(dev) && need_mapping )
1093     {
1094         dt_dprintk("%s setup iommu\n", dt_node_full_name(dev));
1095         res = iommu_assign_dt_device(d, dev);
1096         if ( res )
1097         {
1098             printk(XENLOG_ERR "Failed to setup the IOMMU for %s\n",
1099                    dt_node_full_name(dev));
1100             return res;
1101         }
1102     }
1103 
1104     /* Give permission and map IRQs */
1105     for ( i = 0; i < nirq; i++ )
1106     {
1107         res = dt_device_get_raw_irq(dev, i, &rirq);
1108         if ( res )
1109         {
1110             printk(XENLOG_ERR "Unable to retrieve irq %u for %s\n",
1111                    i, dt_node_full_name(dev));
1112             return res;
1113         }
1114 
1115         /*
1116          * Don't map IRQ that have no physical meaning
1117          * ie: IRQ whose controller is not the GIC
1118          */
1119         if ( rirq.controller != dt_interrupt_controller )
1120         {
1121             dt_dprintk("irq %u not connected to primary controller. Connected to %s\n",
1122                       i, dt_node_full_name(rirq.controller));
1123             continue;
1124         }
1125 
1126         res = platform_get_irq(dev, i);
1127         if ( res < 0 )
1128         {
1129             printk(XENLOG_ERR "Unable to get irq %u for %s\n",
1130                    i, dt_node_full_name(dev));
1131             return res;
1132         }
1133 
1134         res = map_irq_to_domain(d, res, need_mapping, dt_node_name(dev));
1135         if ( res )
1136             return res;
1137     }
1138 
1139     /* Give permission and map MMIOs */
1140     for ( i = 0; i < naddr; i++ )
1141     {
1142         struct map_range_data mr_data = { .d = d, .p2mt = p2mt };
1143         res = dt_device_get_address(dev, i, &addr, &size);
1144         if ( res )
1145         {
1146             printk(XENLOG_ERR "Unable to retrieve address %u for %s\n",
1147                    i, dt_node_full_name(dev));
1148             return res;
1149         }
1150 
1151         res = map_range_to_domain(dev, addr, size, &mr_data);
1152         if ( res )
1153             return res;
1154     }
1155 
1156     res = map_device_children(d, dev, p2mt);
1157     if ( res )
1158         return res;
1159 
1160     return 0;
1161 }
1162 
handle_node(struct domain * d,struct kernel_info * kinfo,struct dt_device_node * node,p2m_type_t p2mt)1163 static int handle_node(struct domain *d, struct kernel_info *kinfo,
1164                        struct dt_device_node *node,
1165                        p2m_type_t p2mt)
1166 {
1167     static const struct dt_device_match skip_matches[] __initconst =
1168     {
1169         DT_MATCH_COMPATIBLE("xen,xen"),
1170         DT_MATCH_COMPATIBLE("xen,multiboot-module"),
1171         DT_MATCH_COMPATIBLE("multiboot,module"),
1172         DT_MATCH_COMPATIBLE("arm,psci"),
1173         DT_MATCH_COMPATIBLE("arm,psci-0.2"),
1174         DT_MATCH_COMPATIBLE("arm,psci-1.0"),
1175         DT_MATCH_COMPATIBLE("arm,cortex-a7-pmu"),
1176         DT_MATCH_COMPATIBLE("arm,cortex-a15-pmu"),
1177         DT_MATCH_COMPATIBLE("arm,cortex-a53-edac"),
1178         DT_MATCH_COMPATIBLE("arm,armv8-pmuv3"),
1179         DT_MATCH_PATH("/cpus"),
1180         DT_MATCH_TYPE("memory"),
1181         /* The memory mapped timer is not supported by Xen. */
1182         DT_MATCH_COMPATIBLE("arm,armv7-timer-mem"),
1183         { /* sentinel */ },
1184     };
1185     static const struct dt_device_match timer_matches[] __initconst =
1186     {
1187         DT_MATCH_TIMER,
1188         { /* sentinel */ },
1189     };
1190     static const struct dt_device_match reserved_matches[] __initconst =
1191     {
1192         DT_MATCH_PATH("/psci"),
1193         DT_MATCH_PATH("/memory"),
1194         DT_MATCH_PATH("/hypervisor"),
1195         { /* sentinel */ },
1196     };
1197     struct dt_device_node *child;
1198     int res;
1199     const char *name;
1200     const char *path;
1201 
1202     path = dt_node_full_name(node);
1203 
1204     dt_dprintk("handle %s\n", path);
1205 
1206     /* Skip theses nodes and the sub-nodes */
1207     if ( dt_match_node(skip_matches, node) )
1208     {
1209         dt_dprintk("  Skip it (matched)\n");
1210         return 0;
1211     }
1212     if ( platform_device_is_blacklisted(node) )
1213     {
1214         dt_dprintk("  Skip it (blacklisted)\n");
1215         return 0;
1216     }
1217 
1218     /*
1219      * Replace these nodes with our own. Note that the original may be
1220      * used_by DOMID_XEN so this check comes first.
1221      */
1222     if ( device_get_class(node) == DEVICE_GIC )
1223         return make_gic_node(d, kinfo->fdt, node);
1224     if ( dt_match_node(timer_matches, node) )
1225         return make_timer_node(d, kinfo->fdt, node);
1226 
1227     /* Skip nodes used by Xen */
1228     if ( dt_device_used_by(node) == DOMID_XEN )
1229     {
1230         dt_dprintk("  Skip it (used by Xen)\n");
1231         return 0;
1232     }
1233 
1234     /*
1235      * Even if the IOMMU device is not used by Xen, it should not be
1236      * passthrough to DOM0
1237      */
1238     if ( device_get_class(node) == DEVICE_IOMMU )
1239     {
1240         dt_dprintk(" IOMMU, skip it\n");
1241         return 0;
1242     }
1243 
1244     /*
1245      * Xen is using some path for its own purpose. Warn if a node
1246      * already exists with the same path.
1247      */
1248     if ( dt_match_node(reserved_matches, node) )
1249         printk(XENLOG_WARNING
1250                "WARNING: Path %s is reserved, skip the node as we may re-use the path.\n",
1251                path);
1252 
1253     res = handle_device(d, node, p2mt);
1254     if ( res)
1255         return res;
1256 
1257     /*
1258      * The property "name" is used to have a different name on older FDT
1259      * version. We want to keep the name retrieved during the tree
1260      * structure creation, that is store in the node path.
1261      */
1262     name = strrchr(path, '/');
1263     name = name ? name + 1 : path;
1264 
1265     res = fdt_begin_node(kinfo->fdt, name);
1266     if ( res )
1267         return res;
1268 
1269     res = write_properties(d, kinfo, node);
1270     if ( res )
1271         return res;
1272 
1273     for ( child = node->child; child != NULL; child = child->sibling )
1274     {
1275         res = handle_node(d, kinfo, child, p2mt);
1276         if ( res )
1277             return res;
1278     }
1279 
1280     if ( node == dt_host )
1281     {
1282         res = make_hypervisor_node(kinfo, node);
1283         if ( res )
1284             return res;
1285 
1286         res = make_psci_node(kinfo->fdt, node);
1287         if ( res )
1288             return res;
1289 
1290         res = make_cpus_node(d, kinfo->fdt, node);
1291         if ( res )
1292             return res;
1293 
1294         res = make_memory_node(d, kinfo->fdt, node, kinfo);
1295         if ( res )
1296             return res;
1297 
1298     }
1299 
1300     res = fdt_end_node(kinfo->fdt);
1301 
1302     return res;
1303 }
1304 
prepare_dtb(struct domain * d,struct kernel_info * kinfo)1305 static int prepare_dtb(struct domain *d, struct kernel_info *kinfo)
1306 {
1307     const p2m_type_t default_p2mt = p2m_mmio_direct_c;
1308     const void *fdt;
1309     int new_size;
1310     int ret;
1311 
1312     ASSERT(dt_host && (dt_host->sibling == NULL));
1313 
1314     fdt = device_tree_flattened;
1315 
1316     new_size = fdt_totalsize(fdt) + DOM0_FDT_EXTRA_SIZE;
1317     kinfo->fdt = xmalloc_bytes(new_size);
1318     if ( kinfo->fdt == NULL )
1319         return -ENOMEM;
1320 
1321     ret = fdt_create(kinfo->fdt, new_size);
1322     if ( ret < 0 )
1323         goto err;
1324 
1325     fdt_finish_reservemap(kinfo->fdt);
1326 
1327     ret = handle_node(d, kinfo, dt_host, default_p2mt);
1328     if ( ret )
1329         goto err;
1330 
1331     ret = fdt_finish(kinfo->fdt);
1332     if ( ret < 0 )
1333         goto err;
1334 
1335     return 0;
1336 
1337   err:
1338     printk("Device tree generation failed (%d).\n", ret);
1339     xfree(kinfo->fdt);
1340     return -EINVAL;
1341 }
1342 
1343 #ifdef CONFIG_ACPI
1344 #define ACPI_DOM0_FDT_MIN_SIZE 4096
1345 
acpi_iomem_deny_access(struct domain * d)1346 static int acpi_iomem_deny_access(struct domain *d)
1347 {
1348     acpi_status status;
1349     struct acpi_table_spcr *spcr = NULL;
1350     unsigned long mfn;
1351     int rc;
1352 
1353     /* Firstly permit full MMIO capabilities. */
1354     rc = iomem_permit_access(d, 0UL, ~0UL);
1355     if ( rc )
1356         return rc;
1357 
1358     /* TODO: Deny MMIO access for SMMU, GIC ITS */
1359     status = acpi_get_table(ACPI_SIG_SPCR, 0,
1360                             (struct acpi_table_header **)&spcr);
1361 
1362     if ( ACPI_FAILURE(status) )
1363     {
1364         printk("Failed to get SPCR table\n");
1365         return -EINVAL;
1366     }
1367 
1368     mfn = spcr->serial_port.address >> PAGE_SHIFT;
1369     /* Deny MMIO access for UART */
1370     rc = iomem_deny_access(d, mfn, mfn + 1);
1371     if ( rc )
1372         return rc;
1373 
1374     /* Deny MMIO access for GIC regions */
1375     return gic_iomem_deny_access(d);
1376 }
1377 
acpi_route_spis(struct domain * d)1378 static int acpi_route_spis(struct domain *d)
1379 {
1380     int i, res;
1381     struct irq_desc *desc;
1382 
1383     /*
1384      * Route the IRQ to hardware domain and permit the access.
1385      * The interrupt type will be set by set by the hardware domain.
1386      */
1387     for( i = NR_LOCAL_IRQS; i < vgic_num_irqs(d); i++ )
1388     {
1389         /*
1390          * TODO: Exclude the SPIs SMMU uses which should not be routed to
1391          * the hardware domain.
1392          */
1393         desc = irq_to_desc(i);
1394         if ( desc->action != NULL)
1395             continue;
1396 
1397         /* XXX: Shall we use a proper devname? */
1398         res = map_irq_to_domain(d, i, true, "ACPI");
1399         if ( res )
1400             return res;
1401     }
1402 
1403     return 0;
1404 }
1405 
acpi_make_chosen_node(const struct kernel_info * kinfo)1406 static int acpi_make_chosen_node(const struct kernel_info *kinfo)
1407 {
1408     int res;
1409     const char *bootargs = NULL;
1410     const struct bootmodule *mod = kinfo->kernel_bootmodule;
1411     void *fdt = kinfo->fdt;
1412 
1413     dt_dprintk("Create chosen node\n");
1414     res = fdt_begin_node(fdt, "chosen");
1415     if ( res )
1416         return res;
1417 
1418     if ( mod && mod->cmdline[0] )
1419     {
1420         bootargs = &mod->cmdline[0];
1421         res = fdt_property(fdt, "bootargs", bootargs, strlen(bootargs) + 1);
1422         if ( res )
1423            return res;
1424     }
1425 
1426     /*
1427      * If the bootloader provides an initrd, we must create a placeholder
1428      * for the initrd properties. The values will be replaced later.
1429      */
1430     if ( mod && mod->size )
1431     {
1432         u64 a = 0;
1433         res = fdt_property(kinfo->fdt, "linux,initrd-start", &a, sizeof(a));
1434         if ( res )
1435             return res;
1436 
1437         res = fdt_property(kinfo->fdt, "linux,initrd-end", &a, sizeof(a));
1438         if ( res )
1439             return res;
1440     }
1441 
1442     res = fdt_end_node(fdt);
1443 
1444     return res;
1445 }
1446 
acpi_make_hypervisor_node(const struct kernel_info * kinfo,struct membank tbl_add[])1447 static int acpi_make_hypervisor_node(const struct kernel_info *kinfo,
1448                                      struct membank tbl_add[])
1449 {
1450     const char compat[] =
1451         "xen,xen-"__stringify(XEN_VERSION)"."__stringify(XEN_SUBVERSION)"\0"
1452         "xen,xen";
1453     int res;
1454     /* Convenience alias */
1455     void *fdt = kinfo->fdt;
1456 
1457     dt_dprintk("Create hypervisor node\n");
1458 
1459     /* See linux Documentation/devicetree/bindings/arm/xen.txt */
1460     res = fdt_begin_node(fdt, "hypervisor");
1461     if ( res )
1462         return res;
1463 
1464     /* Cannot use fdt_property_string due to embedded nulls */
1465     res = fdt_property(fdt, "compatible", compat, sizeof(compat));
1466     if ( res )
1467         return res;
1468 
1469     res = acpi_make_efi_nodes(fdt, tbl_add);
1470     if ( res )
1471         return res;
1472 
1473     res = fdt_end_node(fdt);
1474 
1475     return res;
1476 }
1477 
1478 /*
1479  * Prepare a minimal DTB for Dom0 which contains bootargs, initrd, memory
1480  * information, EFI table.
1481  */
create_acpi_dtb(struct kernel_info * kinfo,struct membank tbl_add[])1482 static int create_acpi_dtb(struct kernel_info *kinfo, struct membank tbl_add[])
1483 {
1484     int new_size;
1485     int ret;
1486 
1487     dt_dprintk("Prepare a min DTB for DOM0\n");
1488 
1489     /* Allocate min size for DT */
1490     new_size = ACPI_DOM0_FDT_MIN_SIZE;
1491     kinfo->fdt = xmalloc_bytes(new_size);
1492 
1493     if ( kinfo->fdt == NULL )
1494         return -ENOMEM;
1495 
1496     /* Create a new empty DT for DOM0 */
1497     ret = fdt_create(kinfo->fdt, new_size);
1498     if ( ret < 0 )
1499         goto err;
1500 
1501     ret = fdt_finish_reservemap(kinfo->fdt);
1502     if ( ret < 0 )
1503         goto err;
1504 
1505     ret = fdt_begin_node(kinfo->fdt, "/");
1506     if ( ret < 0 )
1507         goto err;
1508 
1509     ret = fdt_property_cell(kinfo->fdt, "#address-cells", 2);
1510     if ( ret )
1511         return ret;
1512 
1513     ret = fdt_property_cell(kinfo->fdt, "#size-cells", 1);
1514     if ( ret )
1515         return ret;
1516 
1517     /* Create a chosen node for DOM0 */
1518     ret = acpi_make_chosen_node(kinfo);
1519     if ( ret )
1520         goto err;
1521 
1522     ret = acpi_make_hypervisor_node(kinfo, tbl_add);
1523     if ( ret )
1524         goto err;
1525 
1526     ret = fdt_end_node(kinfo->fdt);
1527     if ( ret < 0 )
1528         goto err;
1529 
1530     ret = fdt_finish(kinfo->fdt);
1531     if ( ret < 0 )
1532         goto err;
1533 
1534     return 0;
1535 
1536   err:
1537     printk("Device tree generation failed (%d).\n", ret);
1538     xfree(kinfo->fdt);
1539     return -EINVAL;
1540 }
1541 
acpi_map_other_tables(struct domain * d)1542 static void acpi_map_other_tables(struct domain *d)
1543 {
1544     int i;
1545     unsigned long res;
1546     u64 addr, size;
1547 
1548     /* Map all ACPI tables to Dom0 using 1:1 mappings. */
1549     for( i = 0; i < acpi_gbl_root_table_list.count; i++ )
1550     {
1551         addr = acpi_gbl_root_table_list.tables[i].address;
1552         size = acpi_gbl_root_table_list.tables[i].length;
1553         res = map_regions_p2mt(d,
1554                                gaddr_to_gfn(addr),
1555                                PFN_UP(size),
1556                                maddr_to_mfn(addr),
1557                                p2m_mmio_direct_c);
1558         if ( res )
1559         {
1560              panic(XENLOG_ERR "Unable to map ACPI region 0x%"PRIx64
1561                    " - 0x%"PRIx64" in domain \n",
1562                    addr & PAGE_MASK, PAGE_ALIGN(addr + size) - 1);
1563         }
1564     }
1565 }
1566 
acpi_create_rsdp(struct domain * d,struct membank tbl_add[])1567 static int acpi_create_rsdp(struct domain *d, struct membank tbl_add[])
1568 {
1569 
1570     struct acpi_table_rsdp *rsdp = NULL;
1571     u64 addr;
1572     u64 table_size = sizeof(struct acpi_table_rsdp);
1573     u8 *base_ptr;
1574     u8 checksum;
1575 
1576     addr = acpi_os_get_root_pointer();
1577     if ( !addr  )
1578     {
1579         printk("Unable to get acpi root pointer\n");
1580         return -EINVAL;
1581     }
1582     rsdp = acpi_os_map_memory(addr, table_size);
1583     base_ptr = d->arch.efi_acpi_table
1584                + acpi_get_table_offset(tbl_add, TBL_RSDP);
1585     memcpy(base_ptr, rsdp, table_size);
1586     acpi_os_unmap_memory(rsdp, table_size);
1587 
1588     rsdp = (struct acpi_table_rsdp *)base_ptr;
1589     /* Replace xsdt_physical_address */
1590     rsdp->xsdt_physical_address = tbl_add[TBL_XSDT].start;
1591     checksum = acpi_tb_checksum(ACPI_CAST_PTR(u8, rsdp), table_size);
1592     rsdp->checksum = rsdp->checksum - checksum;
1593 
1594     tbl_add[TBL_RSDP].start = d->arch.efi_acpi_gpa
1595                               + acpi_get_table_offset(tbl_add, TBL_RSDP);
1596     tbl_add[TBL_RSDP].size = table_size;
1597 
1598     return 0;
1599 }
1600 
acpi_xsdt_modify_entry(u64 entry[],unsigned long entry_count,char * signature,u64 addr)1601 static void acpi_xsdt_modify_entry(u64 entry[], unsigned long entry_count,
1602                                    char *signature, u64 addr)
1603 {
1604     int i;
1605     struct acpi_table_header *table;
1606     u64 size = sizeof(struct acpi_table_header);
1607 
1608     for( i = 0; i < entry_count; i++ )
1609     {
1610         table = acpi_os_map_memory(entry[i], size);
1611         if ( ACPI_COMPARE_NAME(table->signature, signature) )
1612         {
1613             entry[i] = addr;
1614             acpi_os_unmap_memory(table, size);
1615             break;
1616         }
1617         acpi_os_unmap_memory(table, size);
1618     }
1619 }
1620 
acpi_create_xsdt(struct domain * d,struct membank tbl_add[])1621 static int acpi_create_xsdt(struct domain *d, struct membank tbl_add[])
1622 {
1623     struct acpi_table_header *table = NULL;
1624     struct acpi_table_rsdp *rsdp_tbl;
1625     struct acpi_table_xsdt *xsdt = NULL;
1626     u64 table_size, addr;
1627     unsigned long entry_count;
1628     u8 *base_ptr;
1629     u8 checksum;
1630 
1631     addr = acpi_os_get_root_pointer();
1632     if ( !addr )
1633     {
1634         printk("Unable to get acpi root pointer\n");
1635         return -EINVAL;
1636     }
1637     rsdp_tbl = acpi_os_map_memory(addr, sizeof(struct acpi_table_rsdp));
1638     table = acpi_os_map_memory(rsdp_tbl->xsdt_physical_address,
1639                                sizeof(struct acpi_table_header));
1640 
1641     /* Add place for STAO table in XSDT table */
1642     table_size = table->length + sizeof(u64);
1643     entry_count = (table->length - sizeof(struct acpi_table_header))
1644                   / sizeof(u64);
1645     base_ptr = d->arch.efi_acpi_table
1646                + acpi_get_table_offset(tbl_add, TBL_XSDT);
1647     memcpy(base_ptr, table, table->length);
1648     acpi_os_unmap_memory(table, sizeof(struct acpi_table_header));
1649     acpi_os_unmap_memory(rsdp_tbl, sizeof(struct acpi_table_rsdp));
1650 
1651     xsdt = (struct acpi_table_xsdt *)base_ptr;
1652     acpi_xsdt_modify_entry(xsdt->table_offset_entry, entry_count,
1653                            ACPI_SIG_FADT, tbl_add[TBL_FADT].start);
1654     acpi_xsdt_modify_entry(xsdt->table_offset_entry, entry_count,
1655                            ACPI_SIG_MADT, tbl_add[TBL_MADT].start);
1656     xsdt->table_offset_entry[entry_count] = tbl_add[TBL_STAO].start;
1657 
1658     xsdt->header.length = table_size;
1659     checksum = acpi_tb_checksum(ACPI_CAST_PTR(u8, xsdt), table_size);
1660     xsdt->header.checksum -= checksum;
1661 
1662     tbl_add[TBL_XSDT].start = d->arch.efi_acpi_gpa
1663                               + acpi_get_table_offset(tbl_add, TBL_XSDT);
1664     tbl_add[TBL_XSDT].size = table_size;
1665 
1666     return 0;
1667 }
1668 
acpi_create_stao(struct domain * d,struct membank tbl_add[])1669 static int acpi_create_stao(struct domain *d, struct membank tbl_add[])
1670 {
1671     struct acpi_table_header *table = NULL;
1672     struct acpi_table_stao *stao = NULL;
1673     u32 table_size = sizeof(struct acpi_table_stao);
1674     u32 offset = acpi_get_table_offset(tbl_add, TBL_STAO);
1675     acpi_status status;
1676     u8 *base_ptr, checksum;
1677 
1678     /* Copy OEM and ASL compiler fields from another table, use MADT */
1679     status = acpi_get_table(ACPI_SIG_MADT, 0, &table);
1680 
1681     if ( ACPI_FAILURE(status) )
1682     {
1683         const char *msg = acpi_format_exception(status);
1684 
1685         printk("STAO: Failed to get MADT table, %s\n", msg);
1686         return -EINVAL;
1687     }
1688 
1689     base_ptr = d->arch.efi_acpi_table + offset;
1690     memcpy(base_ptr, table, sizeof(struct acpi_table_header));
1691 
1692     stao = (struct acpi_table_stao *)base_ptr;
1693     memcpy(stao->header.signature, ACPI_SIG_STAO, 4);
1694     stao->header.revision = 1;
1695     stao->header.length = table_size;
1696     stao->ignore_uart = 1;
1697     checksum = acpi_tb_checksum(ACPI_CAST_PTR(u8, stao), table_size);
1698     stao->header.checksum -= checksum;
1699 
1700     tbl_add[TBL_STAO].start = d->arch.efi_acpi_gpa + offset;
1701     tbl_add[TBL_STAO].size = table_size;
1702 
1703     return 0;
1704 }
1705 
acpi_create_madt(struct domain * d,struct membank tbl_add[])1706 static int acpi_create_madt(struct domain *d, struct membank tbl_add[])
1707 {
1708     struct acpi_table_header *table = NULL;
1709     struct acpi_table_madt *madt = NULL;
1710     struct acpi_subtable_header *header;
1711     struct acpi_madt_generic_distributor *gicd;
1712     u32 table_size = sizeof(struct acpi_table_madt);
1713     u32 offset = acpi_get_table_offset(tbl_add, TBL_MADT);
1714     int ret;
1715     acpi_status status;
1716     u8 *base_ptr, checksum;
1717 
1718     status = acpi_get_table(ACPI_SIG_MADT, 0, &table);
1719 
1720     if ( ACPI_FAILURE(status) )
1721     {
1722         const char *msg = acpi_format_exception(status);
1723 
1724         printk("Failed to get MADT table, %s\n", msg);
1725         return -EINVAL;
1726     }
1727 
1728     base_ptr = d->arch.efi_acpi_table + offset;
1729     memcpy(base_ptr, table, table_size);
1730 
1731     /* Add Generic Distributor. */
1732     header = acpi_table_get_entry_madt(ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR, 0);
1733     if ( !header )
1734     {
1735         printk("Can't get GICD entry\n");
1736         return -EINVAL;
1737     }
1738     gicd = container_of(header, struct acpi_madt_generic_distributor, header);
1739     memcpy(base_ptr + table_size, gicd,
1740                 sizeof(struct acpi_madt_generic_distributor));
1741     table_size += sizeof(struct acpi_madt_generic_distributor);
1742 
1743     /* Add other subtables. */
1744     ret = gic_make_hwdom_madt(d, offset + table_size);
1745     if ( ret < 0 )
1746     {
1747         printk("Failed to get other subtables\n");
1748         return -EINVAL;
1749     }
1750     table_size += ret;
1751 
1752     madt = (struct acpi_table_madt *)base_ptr;
1753     madt->header.length = table_size;
1754     checksum = acpi_tb_checksum(ACPI_CAST_PTR(u8, madt), table_size);
1755     madt->header.checksum -= checksum;
1756 
1757     tbl_add[TBL_MADT].start = d->arch.efi_acpi_gpa + offset;
1758     tbl_add[TBL_MADT].size = table_size;
1759 
1760     return 0;
1761 }
1762 
acpi_create_fadt(struct domain * d,struct membank tbl_add[])1763 static int acpi_create_fadt(struct domain *d, struct membank tbl_add[])
1764 {
1765     struct acpi_table_header *table = NULL;
1766     struct acpi_table_fadt *fadt = NULL;
1767     u64 table_size;
1768     acpi_status status;
1769     u8 *base_ptr;
1770     u8 checksum;
1771 
1772     status = acpi_get_table(ACPI_SIG_FADT, 0, &table);
1773 
1774     if ( ACPI_FAILURE(status) )
1775     {
1776         const char *msg = acpi_format_exception(status);
1777 
1778         printk("Failed to get FADT table, %s\n", msg);
1779         return -EINVAL;
1780     }
1781 
1782     table_size = table->length;
1783     base_ptr = d->arch.efi_acpi_table
1784                + acpi_get_table_offset(tbl_add, TBL_FADT);
1785     memcpy(base_ptr, table, table_size);
1786     fadt = (struct acpi_table_fadt *)base_ptr;
1787 
1788     /* Set PSCI_COMPLIANT and PSCI_USE_HVC */
1789     fadt->arm_boot_flags |= (ACPI_FADT_PSCI_COMPLIANT | ACPI_FADT_PSCI_USE_HVC);
1790     checksum = acpi_tb_checksum(ACPI_CAST_PTR(u8, fadt), table_size);
1791     fadt->header.checksum -= checksum;
1792 
1793     tbl_add[TBL_FADT].start = d->arch.efi_acpi_gpa
1794                               + acpi_get_table_offset(tbl_add, TBL_FADT);
1795     tbl_add[TBL_FADT].size = table_size;
1796 
1797     return 0;
1798 }
1799 
estimate_acpi_efi_size(struct domain * d,struct kernel_info * kinfo)1800 static int estimate_acpi_efi_size(struct domain *d, struct kernel_info *kinfo)
1801 {
1802     size_t efi_size, acpi_size, madt_size;
1803     u64 addr;
1804     struct acpi_table_rsdp *rsdp_tbl;
1805     struct acpi_table_header *table;
1806 
1807     efi_size = estimate_efi_size(kinfo->mem.nr_banks);
1808 
1809     acpi_size = ROUNDUP(sizeof(struct acpi_table_fadt), 8);
1810     acpi_size += ROUNDUP(sizeof(struct acpi_table_stao), 8);
1811 
1812     madt_size = gic_get_hwdom_madt_size(d);
1813     acpi_size += ROUNDUP(madt_size, 8);
1814 
1815     addr = acpi_os_get_root_pointer();
1816     if ( !addr )
1817     {
1818         printk("Unable to get acpi root pointer\n");
1819         return -EINVAL;
1820     }
1821 
1822     rsdp_tbl = acpi_os_map_memory(addr, sizeof(struct acpi_table_rsdp));
1823     if ( !rsdp_tbl )
1824     {
1825         printk("Unable to map RSDP table\n");
1826         return -EINVAL;
1827     }
1828 
1829     table = acpi_os_map_memory(rsdp_tbl->xsdt_physical_address,
1830                                sizeof(struct acpi_table_header));
1831     acpi_os_unmap_memory(rsdp_tbl, sizeof(struct acpi_table_rsdp));
1832     if ( !table )
1833     {
1834         printk("Unable to map XSDT table\n");
1835         return -EINVAL;
1836     }
1837 
1838     /* Add place for STAO table in XSDT table */
1839     acpi_size += ROUNDUP(table->length + sizeof(u64), 8);
1840     acpi_os_unmap_memory(table, sizeof(struct acpi_table_header));
1841 
1842     acpi_size += ROUNDUP(sizeof(struct acpi_table_rsdp), 8);
1843     d->arch.efi_acpi_len = PAGE_ALIGN(ROUNDUP(efi_size, 8)
1844                                       + ROUNDUP(acpi_size, 8));
1845 
1846     return 0;
1847 }
1848 
prepare_acpi(struct domain * d,struct kernel_info * kinfo)1849 static int prepare_acpi(struct domain *d, struct kernel_info *kinfo)
1850 {
1851     int rc = 0;
1852     int order;
1853     struct membank tbl_add[TBL_MMAX] = {};
1854 
1855     rc = estimate_acpi_efi_size(d, kinfo);
1856     if ( rc != 0 )
1857         return rc;
1858 
1859     order = get_order_from_bytes(d->arch.efi_acpi_len);
1860     d->arch.efi_acpi_table = alloc_xenheap_pages(order, 0);
1861     if ( d->arch.efi_acpi_table == NULL )
1862     {
1863         printk("unable to allocate memory!\n");
1864         return -ENOMEM;
1865     }
1866     memset(d->arch.efi_acpi_table, 0, d->arch.efi_acpi_len);
1867 
1868     /*
1869      * For ACPI, Dom0 doesn't use kinfo->gnttab_start to get the grant table
1870      * region. So we use it as the ACPI table mapped address. Also it needs to
1871      * check if the size of grant table region is enough for those ACPI tables.
1872      */
1873     d->arch.efi_acpi_gpa = kinfo->gnttab_start;
1874     if ( kinfo->gnttab_size < d->arch.efi_acpi_len )
1875     {
1876         printk("The grant table region is not enough to fit the ACPI tables!\n");
1877         return -EINVAL;
1878     }
1879 
1880     rc = acpi_create_fadt(d, tbl_add);
1881     if ( rc != 0 )
1882         return rc;
1883 
1884     rc = acpi_create_madt(d, tbl_add);
1885     if ( rc != 0 )
1886         return rc;
1887 
1888     rc = acpi_create_stao(d, tbl_add);
1889     if ( rc != 0 )
1890         return rc;
1891 
1892     rc = acpi_create_xsdt(d, tbl_add);
1893     if ( rc != 0 )
1894         return rc;
1895 
1896     rc = acpi_create_rsdp(d, tbl_add);
1897     if ( rc != 0 )
1898         return rc;
1899 
1900     acpi_map_other_tables(d);
1901     acpi_create_efi_system_table(d, tbl_add);
1902     acpi_create_efi_mmap_table(d, &kinfo->mem, tbl_add);
1903 
1904     /* Map the EFI and ACPI tables to Dom0 */
1905     rc = map_regions_p2mt(d,
1906                           gaddr_to_gfn(d->arch.efi_acpi_gpa),
1907                           PFN_UP(d->arch.efi_acpi_len),
1908                           virt_to_mfn(d->arch.efi_acpi_table),
1909                           p2m_mmio_direct_c);
1910     if ( rc != 0 )
1911     {
1912         printk(XENLOG_ERR "Unable to map EFI/ACPI table 0x%"PRIx64
1913                " - 0x%"PRIx64" in domain %d\n",
1914                d->arch.efi_acpi_gpa & PAGE_MASK,
1915                PAGE_ALIGN(d->arch.efi_acpi_gpa + d->arch.efi_acpi_len) - 1,
1916                d->domain_id);
1917         return rc;
1918     }
1919 
1920     /*
1921      * Flush the cache for this region, otherwise DOM0 may read wrong data when
1922      * the cache is disabled.
1923      */
1924     clean_and_invalidate_dcache_va_range(d->arch.efi_acpi_table,
1925                                          d->arch.efi_acpi_len);
1926 
1927     rc = create_acpi_dtb(kinfo, tbl_add);
1928     if ( rc != 0 )
1929         return rc;
1930 
1931     rc = acpi_route_spis(d);
1932     if ( rc != 0 )
1933         return rc;
1934 
1935     rc = acpi_iomem_deny_access(d);
1936     if ( rc != 0 )
1937         return rc;
1938 
1939     return 0;
1940 }
1941 #else
prepare_acpi(struct domain * d,struct kernel_info * kinfo)1942 static int prepare_acpi(struct domain *d, struct kernel_info *kinfo)
1943 {
1944     /* Only booting with ACPI will hit here */
1945     BUG();
1946     return -EINVAL;
1947 }
1948 #endif
dtb_load(struct kernel_info * kinfo)1949 static void dtb_load(struct kernel_info *kinfo)
1950 {
1951     void * __user dtb_virt = (void * __user)(register_t)kinfo->dtb_paddr;
1952     unsigned long left;
1953 
1954     printk("Loading dom0 DTB to 0x%"PRIpaddr"-0x%"PRIpaddr"\n",
1955            kinfo->dtb_paddr, kinfo->dtb_paddr + fdt_totalsize(kinfo->fdt));
1956 
1957     left = raw_copy_to_guest_flush_dcache(dtb_virt, kinfo->fdt,
1958                                         fdt_totalsize(kinfo->fdt));
1959     if ( left != 0 )
1960         panic("Unable to copy the DTB to dom0 memory (left = %lu bytes)", left);
1961     xfree(kinfo->fdt);
1962 }
1963 
initrd_load(struct kernel_info * kinfo)1964 static void initrd_load(struct kernel_info *kinfo)
1965 {
1966     const struct bootmodule *mod = kinfo->initrd_bootmodule;
1967     paddr_t load_addr = kinfo->initrd_paddr;
1968     paddr_t paddr, len;
1969     unsigned long offs;
1970     int node;
1971     int res;
1972     __be32 val[2];
1973     __be32 *cellp;
1974 
1975     if ( !mod || !mod->size )
1976         return;
1977 
1978     paddr = mod->start;
1979     len = mod->size;
1980 
1981     printk("Loading dom0 initrd from %"PRIpaddr" to 0x%"PRIpaddr"-0x%"PRIpaddr"\n",
1982            paddr, load_addr, load_addr + len);
1983 
1984     /* Fix up linux,initrd-start and linux,initrd-end in /chosen */
1985     node = fdt_path_offset(kinfo->fdt, "/chosen");
1986     if ( node < 0 )
1987         panic("Cannot find the /chosen node");
1988 
1989     cellp = (__be32 *)val;
1990     dt_set_cell(&cellp, ARRAY_SIZE(val), load_addr);
1991     res = fdt_setprop_inplace(kinfo->fdt, node, "linux,initrd-start",
1992                               val, sizeof(val));
1993     if ( res )
1994         panic("Cannot fix up \"linux,initrd-start\" property");
1995 
1996     cellp = (__be32 *)val;
1997     dt_set_cell(&cellp, ARRAY_SIZE(val), load_addr + len);
1998     res = fdt_setprop_inplace(kinfo->fdt, node, "linux,initrd-end",
1999                               val, sizeof(val));
2000     if ( res )
2001         panic("Cannot fix up \"linux,initrd-end\" property");
2002 
2003     for ( offs = 0; offs < len; )
2004     {
2005         uint64_t par;
2006         paddr_t s, l, ma = 0;
2007         void *dst;
2008 
2009         s = offs & ~PAGE_MASK;
2010         l = min(PAGE_SIZE - s, len);
2011 
2012         par = gvirt_to_maddr(load_addr + offs, &ma, GV2M_WRITE);
2013         if ( par )
2014         {
2015             panic("Unable to translate guest address");
2016             return;
2017         }
2018 
2019         dst = map_domain_page(maddr_to_mfn(ma));
2020 
2021         copy_from_paddr(dst + s, paddr + offs, l);
2022 
2023         unmap_domain_page(dst);
2024         offs += l;
2025     }
2026 }
2027 
evtchn_fixup(struct domain * d,struct kernel_info * kinfo)2028 static void evtchn_fixup(struct domain *d, struct kernel_info *kinfo)
2029 {
2030     int res, node;
2031     u64 val;
2032     gic_interrupt_t intr;
2033 
2034     /*
2035      * The allocation of the event channel IRQ has been deferred until
2036      * now. At this time, all PPIs used by DOM0 have been registered.
2037      */
2038     res = vgic_allocate_ppi(d);
2039     if ( res < 0 )
2040         panic("Unable to allocate a PPI for the event channel interrupt\n");
2041 
2042     d->arch.evtchn_irq = res;
2043 
2044     printk("Allocating PPI %u for event channel interrupt\n",
2045            d->arch.evtchn_irq);
2046 
2047     /* Set the value of domain param HVM_PARAM_CALLBACK_IRQ */
2048     val = MASK_INSR(HVM_PARAM_CALLBACK_TYPE_PPI,
2049                     HVM_PARAM_CALLBACK_IRQ_TYPE_MASK);
2050     /* Active-low level-sensitive  */
2051     val |= MASK_INSR(HVM_PARAM_CALLBACK_TYPE_PPI_FLAG_LOW_LEVEL,
2052                      HVM_PARAM_CALLBACK_TYPE_PPI_FLAG_MASK);
2053     val |= d->arch.evtchn_irq;
2054     d->arch.hvm_domain.params[HVM_PARAM_CALLBACK_IRQ] = val;
2055 
2056     /*
2057      * When booting Dom0 using ACPI, Dom0 can only get the event channel
2058      * interrupt via hypercall.
2059      */
2060     if ( !acpi_disabled )
2061         return;
2062 
2063     /* Fix up "interrupts" in /hypervisor node */
2064     node = fdt_path_offset(kinfo->fdt, "/hypervisor");
2065     if ( node < 0 )
2066         panic("Cannot find the /hypervisor node");
2067 
2068     /* Interrupt event channel upcall:
2069      *  - Active-low level-sensitive
2070      *  - All CPUs
2071      *
2072      *  TODO: Handle properly the cpumask
2073      */
2074     set_interrupt_ppi(intr, d->arch.evtchn_irq, 0xf,
2075                       IRQ_TYPE_LEVEL_LOW);
2076     res = fdt_setprop_inplace(kinfo->fdt, node, "interrupts",
2077                               &intr, sizeof(intr));
2078     if ( res )
2079         panic("Cannot fix up \"interrupts\" property of the hypervisor node");
2080 }
2081 
find_gnttab_region(struct domain * d,struct kernel_info * kinfo)2082 static void __init find_gnttab_region(struct domain *d,
2083                                       struct kernel_info *kinfo)
2084 {
2085     /*
2086      * The region used by Xen on the memory will never be mapped in DOM0
2087      * memory layout. Therefore it can be used for the grant table.
2088      *
2089      * Only use the text section as it's always present and will contain
2090      * enough space for a large grant table
2091      */
2092     kinfo->gnttab_start = __pa(_stext);
2093     kinfo->gnttab_size = gnttab_dom0_frames() << PAGE_SHIFT;
2094 
2095 #ifdef CONFIG_ARM_32
2096     /*
2097      * The gnttab region must be under 4GB in order to work with DOM0
2098      * using short page table.
2099      * In practice it's always the case because Xen is always located
2100      * below 4GB, but be safe.
2101      */
2102     BUG_ON((kinfo->gnttab_start + kinfo->gnttab_size) > GB(4));
2103 #endif
2104 
2105     printk("Grant table range: %#"PRIpaddr"-%#"PRIpaddr"\n",
2106            kinfo->gnttab_start, kinfo->gnttab_start + kinfo->gnttab_size);
2107 }
2108 
construct_dom0(struct domain * d)2109 int construct_dom0(struct domain *d)
2110 {
2111     struct kernel_info kinfo = {};
2112     struct vcpu *saved_current;
2113     int rc, i, cpu;
2114 
2115     struct vcpu *v = d->vcpu[0];
2116     struct cpu_user_regs *regs = &v->arch.cpu_info->guest_cpu_user_regs;
2117 
2118     /* Sanity! */
2119     BUG_ON(d->domain_id != 0);
2120     BUG_ON(d->vcpu[0] == NULL);
2121     BUG_ON(v->is_initialised);
2122 
2123     printk("*** LOADING DOMAIN 0 ***\n");
2124     if ( dom0_mem <= 0 )
2125     {
2126         warning_add("PLEASE SPECIFY dom0_mem PARAMETER - USING 512M FOR NOW\n");
2127         dom0_mem = MB(512);
2128     }
2129 
2130 
2131     iommu_hwdom_init(d);
2132 
2133     d->max_pages = ~0U;
2134 
2135     kinfo.unassigned_mem = dom0_mem;
2136 
2137     rc = kernel_probe(&kinfo);
2138     if ( rc < 0 )
2139         return rc;
2140 
2141 #ifdef CONFIG_ARM_64
2142     /* if aarch32 mode is not supported at EL1 do not allow 32-bit domain */
2143     if ( !(cpu_has_el1_32) && kinfo.type == DOMAIN_32BIT )
2144     {
2145         printk("Platform does not support 32-bit domain\n");
2146         return -EINVAL;
2147     }
2148     d->arch.type = kinfo.type;
2149 
2150     if ( is_64bit_domain(d) )
2151         vcpu_switch_to_aarch64_mode(v);
2152 
2153 #endif
2154 
2155     allocate_memory(d, &kinfo);
2156     find_gnttab_region(d, &kinfo);
2157 
2158     if ( acpi_disabled )
2159         rc = prepare_dtb(d, &kinfo);
2160     else
2161         rc = prepare_acpi(d, &kinfo);
2162 
2163     if ( rc < 0 )
2164         return rc;
2165 
2166     /* Map extra GIC MMIO, irqs and other hw stuffs to dom0. */
2167     rc = gic_map_hwdom_extra_mappings(d);
2168     if ( rc < 0 )
2169         return rc;
2170 
2171     rc = platform_specific_mapping(d);
2172     if ( rc < 0 )
2173         return rc;
2174 
2175     /*
2176      * The following loads use the domain's p2m and require current to
2177      * be a vcpu of the domain, temporarily switch
2178      */
2179     saved_current = current;
2180     p2m_restore_state(v);
2181     set_current(v);
2182 
2183     /*
2184      * kernel_load will determine the placement of the kernel as well
2185      * as the initrd & fdt in RAM, so call it first.
2186      */
2187     kernel_load(&kinfo);
2188     /* initrd_load will fix up the fdt, so call it before dtb_load */
2189     initrd_load(&kinfo);
2190     /* Allocate the event channel IRQ and fix up the device tree */
2191     evtchn_fixup(d, &kinfo);
2192     dtb_load(&kinfo);
2193 
2194     /* Now that we are done restore the original p2m and current. */
2195     set_current(saved_current);
2196     p2m_restore_state(saved_current);
2197 
2198     discard_initial_modules();
2199 
2200     memset(regs, 0, sizeof(*regs));
2201 
2202     regs->pc = (register_t)kinfo.entry;
2203 
2204     if ( is_32bit_domain(d) )
2205     {
2206         regs->cpsr = PSR_GUEST32_INIT;
2207 
2208         /* FROM LINUX head.S
2209          *
2210          * Kernel startup entry point.
2211          * ---------------------------
2212          *
2213          * This is normally called from the decompressor code.  The requirements
2214          * are: MMU = off, D-cache = off, I-cache = dont care, r0 = 0,
2215          * r1 = machine nr, r2 = atags or dtb pointer.
2216          *...
2217          */
2218         regs->r0 = 0; /* SBZ */
2219         regs->r1 = 0xffffffff; /* We use DTB therefore no machine id */
2220         regs->r2 = kinfo.dtb_paddr;
2221     }
2222 #ifdef CONFIG_ARM_64
2223     else
2224     {
2225         regs->cpsr = PSR_GUEST64_INIT;
2226         /* From linux/Documentation/arm64/booting.txt */
2227         regs->x0 = kinfo.dtb_paddr;
2228         regs->x1 = 0; /* Reserved for future use */
2229         regs->x2 = 0; /* Reserved for future use */
2230         regs->x3 = 0; /* Reserved for future use */
2231     }
2232 #endif
2233 
2234     for ( i = 1, cpu = 0; i < d->max_vcpus; i++ )
2235     {
2236         cpu = cpumask_cycle(cpu, &cpu_online_map);
2237         if ( alloc_vcpu(d, i, cpu) == NULL )
2238         {
2239             printk("Failed to allocate dom0 vcpu %d on pcpu %d\n", i, cpu);
2240             break;
2241         }
2242 
2243         if ( is_64bit_domain(d) )
2244             vcpu_switch_to_aarch64_mode(d->vcpu[i]);
2245     }
2246 
2247     v->is_initialised = 1;
2248     clear_bit(_VPF_down, &v->pause_flags);
2249 
2250     return 0;
2251 }
2252 
2253 /*
2254  * Local variables:
2255  * mode: C
2256  * c-file-style: "BSD"
2257  * c-basic-offset: 4
2258  * indent-tabs-mode: nil
2259  * End:
2260  */
2261