1 /*
2  * Copyright 2020, Data61, CSIRO (ABN 41 687 119 230)
3  * Copyright 2015, 2016 Hesham Almatary <heshamelmatary@gmail.com>
4  * Copyright 2021, HENSOLDT Cyber
5  *
6  * SPDX-License-Identifier: GPL-2.0-only
7  */
8 
9 #include <assert.h>
10 #include <kernel/boot.h>
11 #include <machine/io.h>
12 #include <model/statedata.h>
13 #include <object/interrupt.h>
14 #include <arch/machine.h>
15 #include <arch/kernel/boot.h>
16 #include <arch/kernel/vspace.h>
17 #include <arch/benchmark.h>
18 #include <linker.h>
19 #include <plat/machine/hardware.h>
20 #include <machine.h>
21 
22 #ifdef ENABLE_SMP_SUPPORT
23 BOOT_BSS static volatile word_t node_boot_lock;
24 #endif
25 
26 BOOT_BSS static region_t res_reg[NUM_RESERVED_REGIONS];
27 
create_mapped_it_frame_cap(cap_t pd_cap,pptr_t pptr,vptr_t vptr,asid_t asid,bool_t use_large,bool_t executable)28 BOOT_CODE cap_t create_mapped_it_frame_cap(cap_t pd_cap, pptr_t pptr, vptr_t vptr, asid_t asid, bool_t
29                                            use_large, bool_t executable)
30 {
31     cap_t cap;
32     vm_page_size_t frame_size;
33 
34     if (use_large) {
35         frame_size = RISCV_Mega_Page;
36     } else {
37         frame_size = RISCV_4K_Page;
38     }
39 
40     cap = cap_frame_cap_new(
41               asid,                            /* capFMappedASID       */
42               pptr,                            /* capFBasePtr          */
43               frame_size,                      /* capFSize             */
44               wordFromVMRights(VMReadWrite),   /* capFVMRights         */
45               0,                               /* capFIsDevice         */
46               vptr                             /* capFMappedAddress    */
47           );
48 
49     map_it_frame_cap(pd_cap, cap);
50     return cap;
51 }
52 
arch_init_freemem(region_t ui_reg,p_region_t dtb_p_reg,v_region_t it_v_reg,word_t extra_bi_size_bits)53 BOOT_CODE static bool_t arch_init_freemem(region_t ui_reg,
54                                           p_region_t dtb_p_reg,
55                                           v_region_t it_v_reg,
56                                           word_t extra_bi_size_bits)
57 {
58     /* Reserve the kernel image region. This may look a bit awkward, as the
59      * symbols are a reference in the kernel image window, but all allocations
60      * are done in terms of the main kernel window, so we do some translation.
61      */
62     res_reg[0].start = (pptr_t)paddr_to_pptr(kpptr_to_paddr((void *)KERNEL_ELF_BASE));
63     res_reg[0].end = (pptr_t)paddr_to_pptr(kpptr_to_paddr((void *)ki_end));
64 
65     int index = 1;
66 
67     /* add the dtb region, if it is not empty */
68     if (dtb_p_reg.start) {
69         if (index >= ARRAY_SIZE(res_reg)) {
70             printf("ERROR: no slot to add DTB to reserved regions\n");
71             return false;
72         }
73         res_reg[index] = paddr_to_pptr_reg(dtb_p_reg);
74         index += 1;
75     }
76 
77     /* reserve the user image region */
78     if (index >= ARRAY_SIZE(res_reg)) {
79         printf("ERROR: no slot to add user image to reserved regions\n");
80         return false;
81     }
82     res_reg[index] = ui_reg;
83     index += 1;
84 
85     /* avail_p_regs comes from the auto-generated code */
86     return init_freemem(ARRAY_SIZE(avail_p_regs), avail_p_regs,
87                         index, res_reg,
88                         it_v_reg, extra_bi_size_bits);
89 }
90 
init_irqs(cap_t root_cnode_cap)91 BOOT_CODE static void init_irqs(cap_t root_cnode_cap)
92 {
93     irq_t i;
94 
95     for (i = 0; i <= maxIRQ; i++) {
96         if (i != irqInvalid) {
97             /* IRQ 0 is irqInvalid */
98             setIRQState(IRQInactive, i);
99         }
100     }
101     setIRQState(IRQTimer, KERNEL_TIMER_IRQ);
102 #ifdef ENABLE_SMP_SUPPORT
103     setIRQState(IRQIPI, irq_remote_call_ipi);
104     setIRQState(IRQIPI, irq_reschedule_ipi);
105 #endif
106     /* provide the IRQ control cap */
107     write_slot(SLOT_PTR(pptr_of_cap(root_cnode_cap), seL4_CapIRQControl), cap_irq_control_cap_new());
108 }
109 
110 /* ASM symbol for the CPU initialisation trap. */
111 extern char trap_entry[1];
112 
113 /* This and only this function initialises the CPU. It does NOT initialise any kernel state. */
114 
115 #ifdef CONFIG_HAVE_FPU
init_fpu(void)116 BOOT_CODE static void init_fpu(void)
117 {
118     set_fs_clean();
119     write_fcsr(0);
120     disableFpu();
121 }
122 #endif
123 
init_cpu(void)124 BOOT_CODE static void init_cpu(void)
125 {
126 
127     activate_kernel_vspace();
128     /* Write trap entry address to stvec */
129     write_stvec((word_t)trap_entry);
130     initLocalIRQController();
131 #ifndef CONFIG_KERNEL_MCS
132     initTimer();
133 #endif
134 
135     /* disable FPU access */
136     set_fs_off();
137 
138 #ifdef CONFIG_HAVE_FPU
139     init_fpu();
140 #endif
141 }
142 
143 /* This and only this function initialises the platform. It does NOT initialise any kernel state. */
144 
init_plat(void)145 BOOT_CODE static void init_plat(void)
146 {
147     initIRQController();
148 }
149 
150 
151 #ifdef ENABLE_SMP_SUPPORT
try_init_kernel_secondary_core(word_t hart_id,word_t core_id)152 BOOT_CODE static bool_t try_init_kernel_secondary_core(word_t hart_id, word_t core_id)
153 {
154     while (!node_boot_lock);
155 
156     fence_r_rw();
157 
158     init_cpu();
159     NODE_LOCK_SYS;
160 
161     ksNumCPUs++;
162     init_core_state(SchedulerAction_ResumeCurrentThread);
163     ifence_local();
164     return true;
165 }
166 
release_secondary_cores(void)167 BOOT_CODE static void release_secondary_cores(void)
168 {
169     node_boot_lock = 1;
170     fence_w_r();
171 
172     while (ksNumCPUs != CONFIG_MAX_NUM_NODES) {
173         __atomic_signal_fence(__ATOMIC_ACQ_REL);
174     }
175 }
176 
177 #endif
178 /* Main kernel initialisation function. */
179 
try_init_kernel(paddr_t ui_p_reg_start,paddr_t ui_p_reg_end,uint32_t pv_offset,vptr_t v_entry,paddr_t dtb_phys_addr,word_t dtb_size)180 static BOOT_CODE bool_t try_init_kernel(
181     paddr_t ui_p_reg_start,
182     paddr_t ui_p_reg_end,
183     uint32_t pv_offset,
184     vptr_t  v_entry,
185     paddr_t dtb_phys_addr,
186     word_t  dtb_size
187 )
188 {
189     cap_t root_cnode_cap;
190     cap_t it_pd_cap;
191     cap_t it_ap_cap;
192     cap_t ipcbuf_cap;
193     p_region_t boot_mem_reuse_p_reg = ((p_region_t) {
194         kpptr_to_paddr((void *)KERNEL_ELF_BASE), kpptr_to_paddr(ki_boot_end)
195     });
196     region_t boot_mem_reuse_reg = paddr_to_pptr_reg(boot_mem_reuse_p_reg);
197     region_t ui_reg = paddr_to_pptr_reg((p_region_t) {
198         ui_p_reg_start, ui_p_reg_end
199     });
200     word_t extra_bi_size = 0;
201     pptr_t extra_bi_offset = 0;
202     vptr_t extra_bi_frame_vptr;
203     vptr_t bi_frame_vptr;
204     vptr_t ipcbuf_vptr;
205     create_frames_of_region_ret_t create_frames_ret;
206     create_frames_of_region_ret_t extra_bi_ret;
207 
208     /* convert from physical addresses to userland vptrs */
209     v_region_t ui_v_reg = {
210         .start = ui_p_reg_start - pv_offset,
211         .end   = ui_p_reg_end   - pv_offset
212     };
213 
214     ipcbuf_vptr = ui_v_reg.end;
215     bi_frame_vptr = ipcbuf_vptr + BIT(PAGE_BITS);
216     extra_bi_frame_vptr = bi_frame_vptr + BIT(PAGE_BITS);
217 
218     map_kernel_window();
219 
220     /* initialise the CPU */
221     init_cpu();
222 
223     printf("Bootstrapping kernel\n");
224 
225     /* initialize the platform */
226     init_plat();
227 
228     /* If a DTB was provided, pass the data on as extra bootinfo */
229     p_region_t dtb_p_reg = P_REG_EMPTY;
230     if (dtb_size > 0) {
231         paddr_t dtb_phys_end = ROUND_UP(dtb_phys_addr + dtb_size, PAGE_BITS);
232         if (dtb_phys_end < dtb_phys_addr) {
233             /* An integer overflow happened in DTB end address calculation, the
234              * location or size passed seems invalid.
235              */
236             printf("ERROR: DTB location at %"SEL4_PRIx_word
237                    " len %"SEL4_PRIu_word" invalid\n",
238                    dtb_phys_addr, dtb_size);
239             return false;
240         }
241         /* If the DTB is located in physical memory that is not mapped in the
242          * kernel window we cannot access it.
243          */
244         if (dtb_phys_end >= PADDR_TOP) {
245             printf("ERROR: DTB at [%"SEL4_PRIx_word"..%"SEL4_PRIx_word"] "
246                    "exceeds PADDR_TOP (%"SEL4_PRIx_word")\n",
247                    dtb_phys_addr, dtb_phys_end, PADDR_TOP);
248             return false;
249         }
250         /* DTB seems valid and accessible, pass it on in bootinfo. */
251         extra_bi_size += sizeof(seL4_BootInfoHeader) + dtb_size;
252         /* Remember the page aligned memory region it uses. */
253         dtb_p_reg = (p_region_t) {
254             .start = ROUND_DOWN(dtb_phys_addr, PAGE_BITS),
255             .end   = dtb_phys_end
256         };
257     }
258 
259     /* The region of the initial thread is the user image + ipcbuf + boot info + extra */
260     word_t extra_bi_size_bits = calculate_extra_bi_size_bits(extra_bi_size);
261     v_region_t it_v_reg = {
262         .start = ui_v_reg.start,
263         .end   = extra_bi_frame_vptr + BIT(extra_bi_size_bits)
264     };
265     if (it_v_reg.end >= USER_TOP) {
266         /* Variable arguments for printf() require well defined integer types
267          * to work properly. Unfortunately, the definition of USER_TOP differs
268          * between platforms (int, long), so we have to cast here to play safe.
269          */
270         printf("ERROR: userland image virt [%"SEL4_PRIx_word"..%"SEL4_PRIx_word"]"
271                "exceeds USER_TOP (%"SEL4_PRIx_word")\n",
272                it_v_reg.start, it_v_reg.end, (word_t)USER_TOP);
273         return false;
274     }
275 
276     /* make the free memory available to alloc_region() */
277     if (!arch_init_freemem(ui_reg, dtb_p_reg, it_v_reg, extra_bi_size_bits)) {
278         printf("ERROR: free memory management initialization failed\n");
279         return false;
280     }
281 
282     /* create the root cnode */
283     root_cnode_cap = create_root_cnode();
284     if (cap_get_capType(root_cnode_cap) == cap_null_cap) {
285         printf("ERROR: root c-node creation failed\n");
286         return false;
287     }
288 
289     /* create the cap for managing thread domains */
290     create_domain_cap(root_cnode_cap);
291 
292     /* initialise the IRQ states and provide the IRQ control cap */
293     init_irqs(root_cnode_cap);
294 
295     /* create the bootinfo frame */
296     populate_bi_frame(0, CONFIG_MAX_NUM_NODES, ipcbuf_vptr, extra_bi_size);
297 
298     /* put DTB in the bootinfo block, if present. */
299     seL4_BootInfoHeader header;
300     if (dtb_size > 0) {
301         header.id = SEL4_BOOTINFO_HEADER_FDT;
302         header.len = sizeof(header) + dtb_size;
303         *(seL4_BootInfoHeader *)(rootserver.extra_bi + extra_bi_offset) = header;
304         extra_bi_offset += sizeof(header);
305         memcpy((void *)(rootserver.extra_bi + extra_bi_offset),
306                paddr_to_pptr(dtb_phys_addr),
307                dtb_size);
308         extra_bi_offset += dtb_size;
309     }
310 
311     if (extra_bi_size > extra_bi_offset) {
312         /* provide a chunk for any leftover padding in the extended boot info */
313         header.id = SEL4_BOOTINFO_HEADER_PADDING;
314         header.len = (extra_bi_size - extra_bi_offset);
315         *(seL4_BootInfoHeader *)(rootserver.extra_bi + extra_bi_offset) = header;
316     }
317 
318     /* Construct an initial address space with enough virtual addresses
319      * to cover the user image + ipc buffer and bootinfo frames */
320     it_pd_cap = create_it_address_space(root_cnode_cap, it_v_reg);
321     if (cap_get_capType(it_pd_cap) == cap_null_cap) {
322         printf("ERROR: address space creation for initial thread failed\n");
323         return false;
324     }
325 
326     /* Create and map bootinfo frame cap */
327     create_bi_frame_cap(
328         root_cnode_cap,
329         it_pd_cap,
330         bi_frame_vptr
331     );
332 
333     /* create and map extra bootinfo region */
334     if (extra_bi_size > 0) {
335         region_t extra_bi_region = {
336             .start = rootserver.extra_bi,
337             .end = rootserver.extra_bi + extra_bi_size
338         };
339         extra_bi_ret =
340             create_frames_of_region(
341                 root_cnode_cap,
342                 it_pd_cap,
343                 extra_bi_region,
344                 true,
345                 pptr_to_paddr((void *)extra_bi_region.start) - extra_bi_frame_vptr
346             );
347         if (!extra_bi_ret.success) {
348             printf("ERROR: mapping extra boot info to initial thread failed\n");
349             return false;
350         }
351         ndks_boot.bi_frame->extraBIPages = extra_bi_ret.region;
352     }
353 
354 #ifdef CONFIG_KERNEL_MCS
355     init_sched_control(root_cnode_cap, CONFIG_MAX_NUM_NODES);
356 #endif
357 
358     /* create the initial thread's IPC buffer */
359     ipcbuf_cap = create_ipcbuf_frame_cap(root_cnode_cap, it_pd_cap, ipcbuf_vptr);
360     if (cap_get_capType(ipcbuf_cap) == cap_null_cap) {
361         printf("ERROR: could not create IPC buffer for initial thread\n");
362         return false;
363     }
364 
365     /* create all userland image frames */
366     create_frames_ret =
367         create_frames_of_region(
368             root_cnode_cap,
369             it_pd_cap,
370             ui_reg,
371             true,
372             pv_offset
373         );
374     if (!create_frames_ret.success) {
375         printf("ERROR: could not create all userland image frames\n");
376         return false;
377     }
378     ndks_boot.bi_frame->userImageFrames = create_frames_ret.region;
379 
380     /* create the initial thread's ASID pool */
381     it_ap_cap = create_it_asid_pool(root_cnode_cap);
382     if (cap_get_capType(it_ap_cap) == cap_null_cap) {
383         printf("ERROR: could not create ASID pool for initial thread\n");
384         return false;
385     }
386     write_it_asid_pool(it_ap_cap, it_pd_cap);
387 
388 #ifdef CONFIG_KERNEL_MCS
389     NODE_STATE(ksCurTime) = getCurrentTime();
390 #endif
391 
392     /* create the idle thread */
393     if (!create_idle_thread()) {
394         printf("ERROR: could not create idle thread\n");
395         return false;
396     }
397 
398     /* create the initial thread */
399     tcb_t *initial = create_initial_thread(
400                          root_cnode_cap,
401                          it_pd_cap,
402                          v_entry,
403                          bi_frame_vptr,
404                          ipcbuf_vptr,
405                          ipcbuf_cap
406                      );
407 
408     if (initial == NULL) {
409         printf("ERROR: could not create initial thread\n");
410         return false;
411     }
412 
413     init_core_state(initial);
414 
415     /* convert the remaining free memory into UT objects and provide the caps */
416     if (!create_untypeds(
417             root_cnode_cap,
418             boot_mem_reuse_reg)) {
419         printf("ERROR: could not create untypteds for kernel image boot memory\n");
420         return false;
421     }
422 
423     /* no shared-frame caps (RISC-V has no multikernel support) */
424     ndks_boot.bi_frame->sharedFrames = S_REG_EMPTY;
425 
426     /* finalise the bootinfo frame */
427     bi_finalise();
428 
429     ksNumCPUs = 1;
430 
431     SMP_COND_STATEMENT(clh_lock_init());
432     SMP_COND_STATEMENT(release_secondary_cores());
433 
434     /* All cores are up now, so there can be concurrency. The kernel booting is
435      * supposed to be finished before the secondary cores are released, all the
436      * primary has to do now is schedule the initial thread. Currently there is
437      * nothing that touches any global data structures, nevertheless we grab the
438      * BKL here to play safe. It is released when the kernel is left. */
439     NODE_LOCK_SYS;
440 
441     printf("Booting all finished, dropped to user space\n");
442     return true;
443 }
444 
init_kernel(paddr_t ui_p_reg_start,paddr_t ui_p_reg_end,sword_t pv_offset,vptr_t v_entry,paddr_t dtb_addr_p,uint32_t dtb_size,word_t hart_id,word_t core_id)445 BOOT_CODE VISIBLE void init_kernel(
446     paddr_t ui_p_reg_start,
447     paddr_t ui_p_reg_end,
448     sword_t pv_offset,
449     vptr_t  v_entry,
450     paddr_t dtb_addr_p,
451     uint32_t dtb_size
452 #ifdef ENABLE_SMP_SUPPORT
453     ,
454     word_t hart_id,
455     word_t core_id
456 #endif
457 )
458 {
459     bool_t result;
460 
461 #ifdef ENABLE_SMP_SUPPORT
462     add_hart_to_core_map(hart_id, core_id);
463     if (core_id == 0) {
464         result = try_init_kernel(ui_p_reg_start,
465                                  ui_p_reg_end,
466                                  pv_offset,
467                                  v_entry,
468                                  dtb_addr_p,
469                                  dtb_size);
470     } else {
471         result = try_init_kernel_secondary_core(hart_id, core_id);
472     }
473 #else
474     result = try_init_kernel(ui_p_reg_start,
475                              ui_p_reg_end,
476                              pv_offset,
477                              v_entry,
478                              dtb_addr_p,
479                              dtb_size);
480 #endif
481     if (!result) {
482         fail("ERROR: kernel init failed");
483         UNREACHABLE();
484     }
485 
486 #ifdef CONFIG_KERNEL_MCS
487     NODE_STATE(ksCurTime) = getCurrentTime();
488     NODE_STATE(ksConsumed) = 0;
489 #endif
490 
491     schedule();
492     activateThread();
493 }
494