1 /*
2  * Copyright 2014, General Dynamics C4 Systems
3  *
4  * SPDX-License-Identifier: GPL-2.0-only
5  */
6 
7 #include <assert.h>
8 #include <kernel/boot.h>
9 #include <kernel/thread.h>
10 #include <machine/io.h>
11 #include <machine/registerset.h>
12 #include <model/statedata.h>
13 #include <arch/machine.h>
14 #include <arch/kernel/boot.h>
15 #include <arch/kernel/vspace.h>
16 #include <linker.h>
17 #include <hardware.h>
18 #include <util.h>
19 
20 /* (node-local) state accessed only during bootstrapping */
21 BOOT_BSS ndks_boot_t ndks_boot;
22 
23 BOOT_BSS rootserver_mem_t rootserver;
24 BOOT_BSS static region_t rootserver_mem;
25 
merge_regions(void)26 BOOT_CODE static void merge_regions(void)
27 {
28     /* Walk through reserved regions and see if any can be merged */
29     for (word_t i = 1; i < ndks_boot.resv_count;) {
30         if (ndks_boot.reserved[i - 1].end == ndks_boot.reserved[i].start) {
31             /* extend earlier region */
32             ndks_boot.reserved[i - 1].end = ndks_boot.reserved[i].end;
33             /* move everything else down */
34             for (word_t j = i + 1; j < ndks_boot.resv_count; j++) {
35                 ndks_boot.reserved[j - 1] = ndks_boot.reserved[j];
36             }
37 
38             ndks_boot.resv_count--;
39             /* don't increment i in case there are multiple adjacent regions */
40         } else {
41             i++;
42         }
43     }
44 }
45 
reserve_region(p_region_t reg)46 BOOT_CODE bool_t reserve_region(p_region_t reg)
47 {
48     word_t i;
49     assert(reg.start <= reg.end);
50     if (reg.start == reg.end) {
51         return true;
52     }
53 
54     /* keep the regions in order */
55     for (i = 0; i < ndks_boot.resv_count; i++) {
56         /* Try and merge the region to an existing one, if possible */
57         if (ndks_boot.reserved[i].start == reg.end) {
58             ndks_boot.reserved[i].start = reg.start;
59             merge_regions();
60             return true;
61         }
62         if (ndks_boot.reserved[i].end == reg.start) {
63             ndks_boot.reserved[i].end = reg.end;
64             merge_regions();
65             return true;
66         }
67         /* Otherwise figure out where it should go. */
68         if (ndks_boot.reserved[i].start > reg.end) {
69             /* move regions down, making sure there's enough room */
70             if (ndks_boot.resv_count + 1 >= MAX_NUM_RESV_REG) {
71                 printf("Can't mark region 0x%"SEL4_PRIx_word"-0x%"SEL4_PRIx_word
72                        " as reserved, try increasing MAX_NUM_RESV_REG (currently %d)\n",
73                        reg.start, reg.end, (int)MAX_NUM_RESV_REG);
74                 return false;
75             }
76             for (word_t j = ndks_boot.resv_count; j > i; j--) {
77                 ndks_boot.reserved[j] = ndks_boot.reserved[j - 1];
78             }
79             /* insert the new region */
80             ndks_boot.reserved[i] = reg;
81             ndks_boot.resv_count++;
82             return true;
83         }
84     }
85 
86     if (i + 1 == MAX_NUM_RESV_REG) {
87         printf("Can't mark region 0x%"SEL4_PRIx_word"-0x%"SEL4_PRIx_word
88                " as reserved, try increasing MAX_NUM_RESV_REG (currently %d)\n",
89                reg.start, reg.end, (int)MAX_NUM_RESV_REG);
90         return false;
91     }
92 
93     ndks_boot.reserved[i] = reg;
94     ndks_boot.resv_count++;
95 
96     return true;
97 }
98 
insert_region(region_t reg)99 BOOT_CODE static bool_t insert_region(region_t reg)
100 {
101     assert(reg.start <= reg.end);
102     if (is_reg_empty(reg)) {
103         return true;
104     }
105 
106     for (word_t i = 0; i < ARRAY_SIZE(ndks_boot.freemem); i++) {
107         if (is_reg_empty(ndks_boot.freemem[i])) {
108             reserve_region(pptr_to_paddr_reg(reg));
109             ndks_boot.freemem[i] = reg;
110             return true;
111         }
112     }
113 
114     /* We don't know if a platform or architecture picked MAX_NUM_FREEMEM_REG
115      * arbitrarily or carefully calculated it to be big enough. Running out of
116      * slots here is not really fatal, eventually memory allocation may fail
117      * if there is not enough free memory. However, allocations should never
118      * blindly assume to work, some error handling must always be in place even
119      * if the environment has been crafted carefully to support them. Thus, we
120      * don't stop the boot process here, but return an error. The caller should
121      * decide how bad this is.
122      */
123     printf("no free memory slot left for [%"SEL4_PRIx_word"..%"SEL4_PRIx_word"],"
124            " consider increasing MAX_NUM_FREEMEM_REG (%u)\n",
125            reg.start, reg.end, (unsigned int)MAX_NUM_FREEMEM_REG);
126 
127     /* For debug builds we consider this a fatal error. Rationale is, that the
128      * caller does not check the error code at the moment, but just ignores any
129      * failures silently. */
130     assert(0);
131 
132     return false;
133 }
134 
alloc_rootserver_obj(word_t size_bits,word_t n)135 BOOT_CODE static pptr_t alloc_rootserver_obj(word_t size_bits, word_t n)
136 {
137     pptr_t allocated = rootserver_mem.start;
138     /* allocated memory must be aligned */
139     assert(allocated % BIT(size_bits) == 0);
140     rootserver_mem.start += (n * BIT(size_bits));
141     /* we must not have run out of memory */
142     assert(rootserver_mem.start <= rootserver_mem.end);
143     memzero((void *) allocated, n * BIT(size_bits));
144     return allocated;
145 }
146 
rootserver_max_size_bits(word_t extra_bi_size_bits)147 BOOT_CODE static word_t rootserver_max_size_bits(word_t extra_bi_size_bits)
148 {
149     word_t cnode_size_bits = CONFIG_ROOT_CNODE_SIZE_BITS + seL4_SlotBits;
150     word_t max = MAX(cnode_size_bits, seL4_VSpaceBits);
151     return MAX(max, extra_bi_size_bits);
152 }
153 
calculate_rootserver_size(v_region_t it_v_reg,word_t extra_bi_size_bits)154 BOOT_CODE static word_t calculate_rootserver_size(v_region_t it_v_reg, word_t extra_bi_size_bits)
155 {
156     /* work out how much memory we need for root server objects */
157     word_t size = BIT(CONFIG_ROOT_CNODE_SIZE_BITS + seL4_SlotBits);
158     size += BIT(seL4_TCBBits); // root thread tcb
159     size += BIT(seL4_PageBits); // ipc buf
160     size += BIT(BI_FRAME_SIZE_BITS); // boot info
161     size += BIT(seL4_ASIDPoolBits);
162     size += extra_bi_size_bits > 0 ? BIT(extra_bi_size_bits) : 0;
163     size += BIT(seL4_VSpaceBits); // root vspace
164 #ifdef CONFIG_KERNEL_MCS
165     size += BIT(seL4_MinSchedContextBits); // root sched context
166 #endif
167     /* for all archs, seL4_PageTable Bits is the size of all non top-level paging structures */
168     return size + arch_get_n_paging(it_v_reg) * BIT(seL4_PageTableBits);
169 }
170 
maybe_alloc_extra_bi(word_t cmp_size_bits,word_t extra_bi_size_bits)171 BOOT_CODE static void maybe_alloc_extra_bi(word_t cmp_size_bits, word_t extra_bi_size_bits)
172 {
173     if (extra_bi_size_bits >= cmp_size_bits && rootserver.extra_bi == 0) {
174         rootserver.extra_bi = alloc_rootserver_obj(extra_bi_size_bits, 1);
175     }
176 }
177 
178 /* Create pptrs for all root server objects, starting at a give start address,
179  * to cover the virtual memory region v_reg, and any extra boot info.
180  */
create_rootserver_objects(pptr_t start,v_region_t it_v_reg,word_t extra_bi_size_bits)181 BOOT_CODE static void create_rootserver_objects(pptr_t start, v_region_t it_v_reg,
182                                                 word_t extra_bi_size_bits)
183 {
184     /* the largest object the PD, the root cnode, or the extra boot info */
185     word_t cnode_size_bits = CONFIG_ROOT_CNODE_SIZE_BITS + seL4_SlotBits;
186     word_t max = rootserver_max_size_bits(extra_bi_size_bits);
187 
188     word_t size = calculate_rootserver_size(it_v_reg, extra_bi_size_bits);
189     rootserver_mem.start = start;
190     rootserver_mem.end = start + size;
191 
192     maybe_alloc_extra_bi(max, extra_bi_size_bits);
193 
194     /* the root cnode is at least 4k, so it could be larger or smaller than a pd. */
195 #if (CONFIG_ROOT_CNODE_SIZE_BITS + seL4_SlotBits) > seL4_VSpaceBits
196     rootserver.cnode = alloc_rootserver_obj(cnode_size_bits, 1);
197     maybe_alloc_extra_bi(seL4_VSpaceBits, extra_bi_size_bits);
198     rootserver.vspace = alloc_rootserver_obj(seL4_VSpaceBits, 1);
199 #else
200     rootserver.vspace = alloc_rootserver_obj(seL4_VSpaceBits, 1);
201     maybe_alloc_extra_bi(cnode_size_bits, extra_bi_size_bits);
202     rootserver.cnode = alloc_rootserver_obj(cnode_size_bits, 1);
203 #endif
204 
205     /* at this point we are up to creating 4k objects - which is the min size of
206      * extra_bi so this is the last chance to allocate it */
207     maybe_alloc_extra_bi(seL4_PageBits, extra_bi_size_bits);
208     rootserver.asid_pool = alloc_rootserver_obj(seL4_ASIDPoolBits, 1);
209     rootserver.ipc_buf = alloc_rootserver_obj(seL4_PageBits, 1);
210     rootserver.boot_info = alloc_rootserver_obj(BI_FRAME_SIZE_BITS, 1);
211 
212     /* TCBs on aarch32 can be larger than page tables in certain configs */
213 #if seL4_TCBBits >= seL4_PageTableBits
214     rootserver.tcb = alloc_rootserver_obj(seL4_TCBBits, 1);
215 #endif
216 
217     /* paging structures are 4k on every arch except aarch32 (1k) */
218     word_t n = arch_get_n_paging(it_v_reg);
219     rootserver.paging.start = alloc_rootserver_obj(seL4_PageTableBits, n);
220     rootserver.paging.end = rootserver.paging.start + n * BIT(seL4_PageTableBits);
221 
222     /* for most archs, TCBs are smaller than page tables */
223 #if seL4_TCBBits < seL4_PageTableBits
224     rootserver.tcb = alloc_rootserver_obj(seL4_TCBBits, 1);
225 #endif
226 
227 #ifdef CONFIG_KERNEL_MCS
228     rootserver.sc = alloc_rootserver_obj(seL4_MinSchedContextBits, 1);
229 #endif
230     /* we should have allocated all our memory */
231     assert(rootserver_mem.start == rootserver_mem.end);
232 }
233 
write_slot(slot_ptr_t slot_ptr,cap_t cap)234 BOOT_CODE void write_slot(slot_ptr_t slot_ptr, cap_t cap)
235 {
236     slot_ptr->cap = cap;
237 
238     slot_ptr->cteMDBNode = nullMDBNode;
239     mdb_node_ptr_set_mdbRevocable(&slot_ptr->cteMDBNode, true);
240     mdb_node_ptr_set_mdbFirstBadged(&slot_ptr->cteMDBNode, true);
241 }
242 
243 /* Our root CNode needs to be able to fit all the initial caps and not
244  * cover all of memory.
245  */
246 compile_assert(root_cnode_size_valid,
247                CONFIG_ROOT_CNODE_SIZE_BITS < 32 - seL4_SlotBits &&
248                BIT(CONFIG_ROOT_CNODE_SIZE_BITS) >= seL4_NumInitialCaps &&
249                BIT(CONFIG_ROOT_CNODE_SIZE_BITS) >= (seL4_PageBits - seL4_SlotBits))
250 
251 BOOT_CODE cap_t
create_root_cnode(void)252 create_root_cnode(void)
253 {
254     cap_t cap = cap_cnode_cap_new(
255                     CONFIG_ROOT_CNODE_SIZE_BITS, /* radix */
256                     wordBits - CONFIG_ROOT_CNODE_SIZE_BITS, /* guard size */
257                     0, /* guard */
258                     rootserver.cnode); /* pptr */
259 
260     /* write the root CNode cap into the root CNode */
261     write_slot(SLOT_PTR(rootserver.cnode, seL4_CapInitThreadCNode), cap);
262 
263     return cap;
264 }
265 
266 /* Check domain scheduler assumptions. */
267 compile_assert(num_domains_valid,
268                CONFIG_NUM_DOMAINS >= 1 && CONFIG_NUM_DOMAINS <= 256)
269 compile_assert(num_priorities_valid,
270                CONFIG_NUM_PRIORITIES >= 1 && CONFIG_NUM_PRIORITIES <= 256)
271 
272 BOOT_CODE void
create_domain_cap(cap_t root_cnode_cap)273 create_domain_cap(cap_t root_cnode_cap)
274 {
275     /* Check domain scheduler assumptions. */
276     assert(ksDomScheduleLength > 0);
277     for (word_t i = 0; i < ksDomScheduleLength; i++) {
278         assert(ksDomSchedule[i].domain < CONFIG_NUM_DOMAINS);
279         assert(ksDomSchedule[i].length > 0);
280     }
281 
282     cap_t cap = cap_domain_cap_new();
283     write_slot(SLOT_PTR(pptr_of_cap(root_cnode_cap), seL4_CapDomain), cap);
284 }
285 
create_ipcbuf_frame_cap(cap_t root_cnode_cap,cap_t pd_cap,vptr_t vptr)286 BOOT_CODE cap_t create_ipcbuf_frame_cap(cap_t root_cnode_cap, cap_t pd_cap, vptr_t vptr)
287 {
288     clearMemory((void *)rootserver.ipc_buf, PAGE_BITS);
289 
290     /* create a cap of it and write it into the root CNode */
291     cap_t cap = create_mapped_it_frame_cap(pd_cap, rootserver.ipc_buf, vptr, IT_ASID, false, false);
292     write_slot(SLOT_PTR(pptr_of_cap(root_cnode_cap), seL4_CapInitThreadIPCBuffer), cap);
293 
294     return cap;
295 }
296 
create_bi_frame_cap(cap_t root_cnode_cap,cap_t pd_cap,vptr_t vptr)297 BOOT_CODE void create_bi_frame_cap(cap_t root_cnode_cap, cap_t pd_cap, vptr_t vptr)
298 {
299     /* create a cap of it and write it into the root CNode */
300     cap_t cap = create_mapped_it_frame_cap(pd_cap, rootserver.boot_info, vptr, IT_ASID, false, false);
301     write_slot(SLOT_PTR(pptr_of_cap(root_cnode_cap), seL4_CapBootInfoFrame), cap);
302 }
303 
calculate_extra_bi_size_bits(word_t extra_size)304 BOOT_CODE word_t calculate_extra_bi_size_bits(word_t extra_size)
305 {
306     if (extra_size == 0) {
307         return 0;
308     }
309 
310     word_t clzl_ret = clzl(ROUND_UP(extra_size, seL4_PageBits));
311     word_t msb = seL4_WordBits - 1 - clzl_ret;
312     /* If region is bigger than a page, make sure we overallocate rather than
313      * underallocate
314      */
315     if (extra_size > BIT(msb)) {
316         msb++;
317     }
318     return msb;
319 }
320 
populate_bi_frame(node_id_t node_id,word_t num_nodes,vptr_t ipcbuf_vptr,word_t extra_bi_size)321 BOOT_CODE void populate_bi_frame(node_id_t node_id, word_t num_nodes,
322                                  vptr_t ipcbuf_vptr, word_t extra_bi_size)
323 {
324     /* clear boot info memory */
325     clearMemory((void *)rootserver.boot_info, BI_FRAME_SIZE_BITS);
326     if (extra_bi_size) {
327         clearMemory((void *)rootserver.extra_bi,
328                     calculate_extra_bi_size_bits(extra_bi_size));
329     }
330 
331     /* initialise bootinfo-related global state */
332     seL4_BootInfo *bi = BI_PTR(rootserver.boot_info);
333     bi->nodeID = node_id;
334     bi->numNodes = num_nodes;
335     bi->numIOPTLevels = 0;
336     bi->ipcBuffer = (seL4_IPCBuffer *)ipcbuf_vptr;
337     bi->initThreadCNodeSizeBits = CONFIG_ROOT_CNODE_SIZE_BITS;
338     bi->initThreadDomain = ksDomSchedule[ksDomScheduleIdx].domain;
339     bi->extraLen = extra_bi_size;
340 
341     ndks_boot.bi_frame = bi;
342     ndks_boot.slot_pos_cur = seL4_NumInitialCaps;
343 }
344 
provide_cap(cap_t root_cnode_cap,cap_t cap)345 BOOT_CODE bool_t provide_cap(cap_t root_cnode_cap, cap_t cap)
346 {
347     if (ndks_boot.slot_pos_cur >= BIT(CONFIG_ROOT_CNODE_SIZE_BITS)) {
348         printf("ERROR: can't add another cap, all %"SEL4_PRIu_word
349                " (=2^CONFIG_ROOT_CNODE_SIZE_BITS) slots used\n",
350                BIT(CONFIG_ROOT_CNODE_SIZE_BITS));
351         return false;
352     }
353     write_slot(SLOT_PTR(pptr_of_cap(root_cnode_cap), ndks_boot.slot_pos_cur), cap);
354     ndks_boot.slot_pos_cur++;
355     return true;
356 }
357 
create_frames_of_region(cap_t root_cnode_cap,cap_t pd_cap,region_t reg,bool_t do_map,sword_t pv_offset)358 BOOT_CODE create_frames_of_region_ret_t create_frames_of_region(
359     cap_t    root_cnode_cap,
360     cap_t    pd_cap,
361     region_t reg,
362     bool_t   do_map,
363     sword_t  pv_offset
364 )
365 {
366     pptr_t     f;
367     cap_t      frame_cap;
368     seL4_SlotPos slot_pos_before;
369     seL4_SlotPos slot_pos_after;
370 
371     slot_pos_before = ndks_boot.slot_pos_cur;
372 
373     for (f = reg.start; f < reg.end; f += BIT(PAGE_BITS)) {
374         if (do_map) {
375             frame_cap = create_mapped_it_frame_cap(pd_cap, f, pptr_to_paddr((void *)(f - pv_offset)), IT_ASID, false, true);
376         } else {
377             frame_cap = create_unmapped_it_frame_cap(f, false);
378         }
379         if (!provide_cap(root_cnode_cap, frame_cap))
380             return (create_frames_of_region_ret_t) {
381             S_REG_EMPTY, false
382         };
383     }
384 
385     slot_pos_after = ndks_boot.slot_pos_cur;
386 
387     return (create_frames_of_region_ret_t) {
388         (seL4_SlotRegion) { slot_pos_before, slot_pos_after }, true
389     };
390 }
391 
create_it_asid_pool(cap_t root_cnode_cap)392 BOOT_CODE cap_t create_it_asid_pool(cap_t root_cnode_cap)
393 {
394     cap_t ap_cap = cap_asid_pool_cap_new(IT_ASID >> asidLowBits, rootserver.asid_pool);
395     write_slot(SLOT_PTR(pptr_of_cap(root_cnode_cap), seL4_CapInitThreadASIDPool), ap_cap);
396 
397     /* create ASID control cap */
398     write_slot(
399         SLOT_PTR(pptr_of_cap(root_cnode_cap), seL4_CapASIDControl),
400         cap_asid_control_cap_new()
401     );
402 
403     return ap_cap;
404 }
405 
406 #ifdef CONFIG_KERNEL_MCS
configure_sched_context(tcb_t * tcb,sched_context_t * sc_pptr,ticks_t timeslice,word_t core)407 BOOT_CODE static bool_t configure_sched_context(tcb_t *tcb, sched_context_t *sc_pptr, ticks_t timeslice, word_t core)
408 {
409     tcb->tcbSchedContext = sc_pptr;
410     REFILL_NEW(tcb->tcbSchedContext, MIN_REFILLS, timeslice, 0, core);
411 
412     tcb->tcbSchedContext->scTcb = tcb;
413     return true;
414 }
415 
init_sched_control(cap_t root_cnode_cap,word_t num_nodes)416 BOOT_CODE bool_t init_sched_control(cap_t root_cnode_cap, word_t num_nodes)
417 {
418     seL4_SlotPos slot_pos_before = ndks_boot.slot_pos_cur;
419 
420     /* create a sched control cap for each core */
421     for (unsigned int i = 0; i < num_nodes; i++) {
422         if (!provide_cap(root_cnode_cap, cap_sched_control_cap_new(i))) {
423             printf("can't init sched_control for node %u, provide_cap() failed\n", i);
424             return false;
425         }
426     }
427 
428     /* update boot info with slot region for sched control caps */
429     ndks_boot.bi_frame->schedcontrol = (seL4_SlotRegion) {
430         .start = slot_pos_before,
431         .end = ndks_boot.slot_pos_cur
432     };
433 
434     return true;
435 }
436 #endif
437 
create_idle_thread(void)438 BOOT_CODE bool_t create_idle_thread(void)
439 {
440     pptr_t pptr;
441 
442 #ifdef ENABLE_SMP_SUPPORT
443     for (unsigned int i = 0; i < CONFIG_MAX_NUM_NODES; i++) {
444 #endif /* ENABLE_SMP_SUPPORT */
445         pptr = (pptr_t) &ksIdleThreadTCB[SMP_TERNARY(i, 0)];
446         NODE_STATE_ON_CORE(ksIdleThread, i) = TCB_PTR(pptr + TCB_OFFSET);
447         configureIdleThread(NODE_STATE_ON_CORE(ksIdleThread, i));
448 #ifdef CONFIG_DEBUG_BUILD
449         setThreadName(NODE_STATE_ON_CORE(ksIdleThread, i), "idle_thread");
450 #endif
451         SMP_COND_STATEMENT(NODE_STATE_ON_CORE(ksIdleThread, i)->tcbAffinity = i);
452 #ifdef CONFIG_KERNEL_MCS
453         bool_t result = configure_sched_context(NODE_STATE_ON_CORE(ksIdleThread, i), SC_PTR(&ksIdleThreadSC[SMP_TERNARY(i, 0)]),
454                                                 usToTicks(CONFIG_BOOT_THREAD_TIME_SLICE * US_IN_MS), SMP_TERNARY(i, 0));
455         SMP_COND_STATEMENT(NODE_STATE_ON_CORE(ksIdleThread, i)->tcbSchedContext->scCore = i;)
456         if (!result) {
457             printf("Kernel init failed: Unable to allocate sc for idle thread\n");
458             return false;
459         }
460 #endif
461 #ifdef ENABLE_SMP_SUPPORT
462     }
463 #endif /* ENABLE_SMP_SUPPORT */
464     return true;
465 }
466 
create_initial_thread(cap_t root_cnode_cap,cap_t it_pd_cap,vptr_t ui_v_entry,vptr_t bi_frame_vptr,vptr_t ipcbuf_vptr,cap_t ipcbuf_cap)467 BOOT_CODE tcb_t *create_initial_thread(cap_t root_cnode_cap, cap_t it_pd_cap, vptr_t ui_v_entry, vptr_t bi_frame_vptr,
468                                        vptr_t ipcbuf_vptr, cap_t ipcbuf_cap)
469 {
470     tcb_t *tcb = TCB_PTR(rootserver.tcb + TCB_OFFSET);
471 #ifndef CONFIG_KERNEL_MCS
472     tcb->tcbTimeSlice = CONFIG_TIME_SLICE;
473 #endif
474 
475     Arch_initContext(&tcb->tcbArch.tcbContext);
476 
477     /* derive a copy of the IPC buffer cap for inserting */
478     deriveCap_ret_t dc_ret = deriveCap(SLOT_PTR(pptr_of_cap(root_cnode_cap), seL4_CapInitThreadIPCBuffer), ipcbuf_cap);
479     if (dc_ret.status != EXCEPTION_NONE) {
480         printf("Failed to derive copy of IPC Buffer\n");
481         return NULL;
482     }
483 
484     /* initialise TCB (corresponds directly to abstract specification) */
485     cteInsert(
486         root_cnode_cap,
487         SLOT_PTR(pptr_of_cap(root_cnode_cap), seL4_CapInitThreadCNode),
488         SLOT_PTR(rootserver.tcb, tcbCTable)
489     );
490     cteInsert(
491         it_pd_cap,
492         SLOT_PTR(pptr_of_cap(root_cnode_cap), seL4_CapInitThreadVSpace),
493         SLOT_PTR(rootserver.tcb, tcbVTable)
494     );
495     cteInsert(
496         dc_ret.cap,
497         SLOT_PTR(pptr_of_cap(root_cnode_cap), seL4_CapInitThreadIPCBuffer),
498         SLOT_PTR(rootserver.tcb, tcbBuffer)
499     );
500     tcb->tcbIPCBuffer = ipcbuf_vptr;
501 
502     setRegister(tcb, capRegister, bi_frame_vptr);
503     setNextPC(tcb, ui_v_entry);
504 
505     /* initialise TCB */
506 #ifdef CONFIG_KERNEL_MCS
507     if (!configure_sched_context(tcb, SC_PTR(rootserver.sc), usToTicks(CONFIG_BOOT_THREAD_TIME_SLICE * US_IN_MS), 0)) {
508         return NULL;
509     }
510 #endif
511 
512     tcb->tcbPriority = seL4_MaxPrio;
513     tcb->tcbMCP = seL4_MaxPrio;
514     tcb->tcbDomain = ksDomSchedule[ksDomScheduleIdx].domain;
515 #ifndef CONFIG_KERNEL_MCS
516     setupReplyMaster(tcb);
517 #endif
518     setThreadState(tcb, ThreadState_Running);
519 
520     ksCurDomain = ksDomSchedule[ksDomScheduleIdx].domain;
521 #ifdef CONFIG_KERNEL_MCS
522     ksDomainTime = usToTicks(ksDomSchedule[ksDomScheduleIdx].length * US_IN_MS);
523 #else
524     ksDomainTime = ksDomSchedule[ksDomScheduleIdx].length;
525 #endif
526     assert(ksCurDomain < CONFIG_NUM_DOMAINS && ksDomainTime > 0);
527 
528 #ifndef CONFIG_KERNEL_MCS
529     SMP_COND_STATEMENT(tcb->tcbAffinity = 0);
530 #endif
531 
532     /* create initial thread's TCB cap */
533     cap_t cap = cap_thread_cap_new(TCB_REF(tcb));
534     write_slot(SLOT_PTR(pptr_of_cap(root_cnode_cap), seL4_CapInitThreadTCB), cap);
535 
536 #ifdef CONFIG_KERNEL_MCS
537     cap = cap_sched_context_cap_new(SC_REF(tcb->tcbSchedContext), seL4_MinSchedContextBits);
538     write_slot(SLOT_PTR(pptr_of_cap(root_cnode_cap), seL4_CapInitThreadSC), cap);
539 #endif
540 #ifdef CONFIG_DEBUG_BUILD
541     setThreadName(tcb, "rootserver");
542 #endif
543 
544     return tcb;
545 }
546 
init_core_state(tcb_t * scheduler_action)547 BOOT_CODE void init_core_state(tcb_t *scheduler_action)
548 {
549 #ifdef CONFIG_HAVE_FPU
550     NODE_STATE(ksActiveFPUState) = NULL;
551 #endif
552 #ifdef CONFIG_DEBUG_BUILD
553     /* add initial threads to the debug queue */
554     NODE_STATE(ksDebugTCBs) = NULL;
555     if (scheduler_action != SchedulerAction_ResumeCurrentThread &&
556         scheduler_action != SchedulerAction_ChooseNewThread) {
557         tcbDebugAppend(scheduler_action);
558     }
559     tcbDebugAppend(NODE_STATE(ksIdleThread));
560 #endif
561     NODE_STATE(ksSchedulerAction) = scheduler_action;
562     NODE_STATE(ksCurThread) = NODE_STATE(ksIdleThread);
563 #ifdef CONFIG_KERNEL_MCS
564     NODE_STATE(ksCurSC) = NODE_STATE(ksCurThread->tcbSchedContext);
565     NODE_STATE(ksConsumed) = 0;
566     NODE_STATE(ksReprogram) = true;
567     NODE_STATE(ksReleaseHead) = NULL;
568     NODE_STATE(ksCurTime) = getCurrentTime();
569 #endif
570 }
571 
provide_untyped_cap(cap_t root_cnode_cap,bool_t device_memory,pptr_t pptr,word_t size_bits,seL4_SlotPos first_untyped_slot)572 BOOT_CODE static bool_t provide_untyped_cap(
573     cap_t      root_cnode_cap,
574     bool_t     device_memory,
575     pptr_t     pptr,
576     word_t     size_bits,
577     seL4_SlotPos first_untyped_slot
578 )
579 {
580     bool_t ret;
581     cap_t ut_cap;
582     word_t i = ndks_boot.slot_pos_cur - first_untyped_slot;
583     if (i < CONFIG_MAX_NUM_BOOTINFO_UNTYPED_CAPS) {
584         ndks_boot.bi_frame->untypedList[i] = (seL4_UntypedDesc) {
585             pptr_to_paddr((void *)pptr), size_bits, device_memory, {0}
586         };
587         ut_cap = cap_untyped_cap_new(MAX_FREE_INDEX(size_bits),
588                                      device_memory, size_bits, pptr);
589         ret = provide_cap(root_cnode_cap, ut_cap);
590     } else {
591         printf("Kernel init: Too many untyped regions for boot info\n");
592         ret = true;
593     }
594     return ret;
595 }
596 
create_untypeds_for_region(cap_t root_cnode_cap,bool_t device_memory,region_t reg,seL4_SlotPos first_untyped_slot)597 BOOT_CODE static bool_t create_untypeds_for_region(
598     cap_t      root_cnode_cap,
599     bool_t     device_memory,
600     region_t   reg,
601     seL4_SlotPos first_untyped_slot
602 )
603 {
604     while (!is_reg_empty(reg)) {
605 
606         /* Calculate the bit size of the region. */
607         unsigned int size_bits = seL4_WordBits - 1 - clzl(reg.end - reg.start);
608         /* The size can't exceed the largest possible untyped size. */
609         if (size_bits > seL4_MaxUntypedBits) {
610             size_bits = seL4_MaxUntypedBits;
611         }
612         /* The start address 0 satisfies any alignment needs, otherwise ensure
613          * the region's bit size does not exceed the alignment of the region.
614          */
615         if (0 != reg.start) {
616             unsigned int align_bits = ctzl(reg.start);
617             if (size_bits > align_bits) {
618                 size_bits = align_bits;
619             }
620         }
621         /* Provide an untyped capability for the region only if it is large
622          * enough to be retyped into objects later. Otherwise the region can't
623          * be used anyway.
624          */
625         if (size_bits >= seL4_MinUntypedBits) {
626             if (!provide_untyped_cap(root_cnode_cap, device_memory, reg.start, size_bits, first_untyped_slot)) {
627                 return false;
628             }
629         }
630         reg.start += BIT(size_bits);
631     }
632     return true;
633 }
634 
create_untypeds(cap_t root_cnode_cap,region_t boot_mem_reuse_reg)635 BOOT_CODE bool_t create_untypeds(cap_t root_cnode_cap,
636                                  region_t boot_mem_reuse_reg)
637 {
638     seL4_SlotPos first_untyped_slot = ndks_boot.slot_pos_cur;
639 
640     paddr_t start = 0;
641     for (word_t i = 0; i < ndks_boot.resv_count; i++) {
642         if (start < ndks_boot.reserved[i].start) {
643             region_t reg = paddr_to_pptr_reg((p_region_t) {
644                 start, ndks_boot.reserved[i].start
645             });
646             if (!create_untypeds_for_region(root_cnode_cap, true, reg, first_untyped_slot)) {
647                 printf("ERROR: creation of untypeds for device region #%u at"
648                        " [%"SEL4_PRIx_word"..%"SEL4_PRIx_word"] failed\n",
649                        (unsigned int)i, reg.start, reg.end);
650                 return false;
651             }
652         }
653 
654         start = ndks_boot.reserved[i].end;
655     }
656 
657     if (start < CONFIG_PADDR_USER_DEVICE_TOP) {
658         region_t reg = paddr_to_pptr_reg((p_region_t) {
659             start, CONFIG_PADDR_USER_DEVICE_TOP
660         });
661 
662         if (!create_untypeds_for_region(root_cnode_cap, true, reg, first_untyped_slot)) {
663             printf("ERROR: creation of untypeds for top device region"
664                    " [%"SEL4_PRIx_word"..%"SEL4_PRIx_word"] failed\n",
665                    reg.start, reg.end);
666             return false;
667         }
668     }
669 
670     /* if boot_mem_reuse_reg is not empty, we can create UT objs from boot code/data frames */
671     if (!create_untypeds_for_region(root_cnode_cap, false, boot_mem_reuse_reg, first_untyped_slot)) {
672         printf("ERROR: creation of untypeds for recycled boot memory"
673                " [%"SEL4_PRIx_word"..%"SEL4_PRIx_word"] failed\n",
674                boot_mem_reuse_reg.start, boot_mem_reuse_reg.end);
675         return false;
676     }
677 
678     /* convert remaining freemem into UT objects and provide the caps */
679     for (word_t i = 0; i < ARRAY_SIZE(ndks_boot.freemem); i++) {
680         region_t reg = ndks_boot.freemem[i];
681         ndks_boot.freemem[i] = REG_EMPTY;
682         if (!create_untypeds_for_region(root_cnode_cap, false, reg, first_untyped_slot)) {
683             printf("ERROR: creation of untypeds for free memory region #%u at"
684                    " [%"SEL4_PRIx_word"..%"SEL4_PRIx_word"] failed\n",
685                    (unsigned int)i, reg.start, reg.end);
686             return false;
687         }
688     }
689 
690     ndks_boot.bi_frame->untyped = (seL4_SlotRegion) {
691         .start = first_untyped_slot,
692         .end   = ndks_boot.slot_pos_cur
693     };
694 
695     return true;
696 }
697 
bi_finalise(void)698 BOOT_CODE void bi_finalise(void)
699 {
700     ndks_boot.bi_frame->empty = (seL4_SlotRegion) {
701         .start = ndks_boot.slot_pos_cur,
702         .end   = BIT(CONFIG_ROOT_CNODE_SIZE_BITS)
703     };
704 }
705 
ceiling_kernel_window(pptr_t p)706 BOOT_CODE static inline pptr_t ceiling_kernel_window(pptr_t p)
707 {
708     /* Adjust address if it exceeds the kernel window
709      * Note that we compare physical address in case of overflow.
710      */
711     if (pptr_to_paddr((void *)p) > PADDR_TOP) {
712         p = PPTR_TOP;
713     }
714     return p;
715 }
716 
check_available_memory(word_t n_available,const p_region_t * available)717 BOOT_CODE static bool_t check_available_memory(word_t n_available,
718                                                const p_region_t *available)
719 {
720     /* The system configuration is broken if no region is available. */
721     if (0 == n_available) {
722         printf("ERROR: no memory regions available\n");
723         return false;
724     }
725 
726     printf("available phys memory regions: %"SEL4_PRIu_word"\n", n_available);
727     /* Force ordering and exclusivity of available regions. */
728     for (word_t i = 0; i < n_available; i++) {
729         const p_region_t *r = &available[i];
730         printf("  [%"SEL4_PRIx_word"..%"SEL4_PRIx_word"]\n", r->start, r->end);
731 
732         /* Available regions must be sane */
733         if (r->start > r->end) {
734             printf("ERROR: memory region %"SEL4_PRIu_word" has start > end\n", i);
735             return false;
736         }
737 
738         /* Available regions can't be empty. */
739         if (r->start == r->end) {
740             printf("ERROR: memory region %"SEL4_PRIu_word" empty\n", i);
741             return false;
742         }
743 
744         /* Regions must be ordered and must not overlap. */
745         if ((i > 0) && (r->start < available[i - 1].end)) {
746             printf("ERROR: memory region %d in wrong order\n", (int)i);
747             return false;
748         }
749     }
750 
751     return true;
752 }
753 
754 
check_reserved_memory(word_t n_reserved,const region_t * reserved)755 BOOT_CODE static bool_t check_reserved_memory(word_t n_reserved,
756                                               const region_t *reserved)
757 {
758     printf("reserved virt address space regions: %"SEL4_PRIu_word"\n",
759            n_reserved);
760     /* Force ordering and exclusivity of reserved regions. */
761     for (word_t i = 0; i < n_reserved; i++) {
762         const region_t *r = &reserved[i];
763         printf("  [%"SEL4_PRIx_word"..%"SEL4_PRIx_word"]\n", r->start, r->end);
764 
765         /* Reserved regions must be sane, the size is allowed to be zero. */
766         if (r->start > r->end) {
767             printf("ERROR: reserved region %"SEL4_PRIu_word" has start > end\n", i);
768             return false;
769         }
770 
771         /* Regions must be ordered and must not overlap. */
772         if ((i > 0) && (r->start < reserved[i - 1].end)) {
773             printf("ERROR: reserved region %"SEL4_PRIu_word" in wrong order\n", i);
774             return false;
775         }
776     }
777 
778     return true;
779 }
780 
781 /* we can't declare arrays on the stack, so this is space for
782  * the function below to use. */
783 BOOT_BSS static region_t avail_reg[MAX_NUM_FREEMEM_REG];
784 /**
785  * Dynamically initialise the available memory on the platform.
786  * A region represents an area of memory.
787  */
init_freemem(word_t n_available,const p_region_t * available,word_t n_reserved,const region_t * reserved,v_region_t it_v_reg,word_t extra_bi_size_bits)788 BOOT_CODE bool_t init_freemem(word_t n_available, const p_region_t *available,
789                               word_t n_reserved, const region_t *reserved,
790                               v_region_t it_v_reg, word_t extra_bi_size_bits)
791 {
792 
793     if (!check_available_memory(n_available, available)) {
794         return false;
795     }
796 
797     if (!check_reserved_memory(n_reserved, reserved)) {
798         return false;
799     }
800 
801     for (word_t i = 0; i < ARRAY_SIZE(ndks_boot.freemem); i++) {
802         ndks_boot.freemem[i] = REG_EMPTY;
803     }
804 
805     /* convert the available regions to pptrs */
806     for (word_t i = 0; i < n_available; i++) {
807         avail_reg[i] = paddr_to_pptr_reg(available[i]);
808         avail_reg[i].end = ceiling_kernel_window(avail_reg[i].end);
809         avail_reg[i].start = ceiling_kernel_window(avail_reg[i].start);
810     }
811 
812     word_t a = 0;
813     word_t r = 0;
814     /* Now iterate through the available regions, removing any reserved regions. */
815     while (a < n_available && r < n_reserved) {
816         if (reserved[r].start == reserved[r].end) {
817             /* reserved region is empty - skip it */
818             r++;
819         } else if (avail_reg[a].start >= avail_reg[a].end) {
820             /* skip the entire region - it's empty now after trimming */
821             a++;
822         } else if (reserved[r].end <= avail_reg[a].start) {
823             /* the reserved region is below the available region - skip it*/
824             reserve_region(pptr_to_paddr_reg(reserved[r]));
825             r++;
826         } else if (reserved[r].start >= avail_reg[a].end) {
827             /* the reserved region is above the available region - take the whole thing */
828             insert_region(avail_reg[a]);
829             a++;
830         } else {
831             /* the reserved region overlaps with the available region */
832             if (reserved[r].start <= avail_reg[a].start) {
833                 /* the region overlaps with the start of the available region.
834                  * trim start of the available region */
835                 avail_reg[a].start = MIN(avail_reg[a].end, reserved[r].end);
836                 reserve_region(pptr_to_paddr_reg(reserved[r]));
837                 r++;
838             } else {
839                 assert(reserved[r].start < avail_reg[a].end);
840                 /* take the first chunk of the available region and move
841                  * the start to the end of the reserved region */
842                 region_t m = avail_reg[a];
843                 m.end = reserved[r].start;
844                 insert_region(m);
845                 if (avail_reg[a].end > reserved[r].end) {
846                     avail_reg[a].start = reserved[r].end;
847                     reserve_region(pptr_to_paddr_reg(reserved[r]));
848                     r++;
849                 } else {
850                     a++;
851                 }
852             }
853         }
854     }
855 
856     for (; r < n_reserved; r++) {
857         if (reserved[r].start < reserved[r].end) {
858             reserve_region(pptr_to_paddr_reg(reserved[r]));
859         }
860     }
861 
862     /* no more reserved regions - add the rest */
863     for (; a < n_available; a++) {
864         if (avail_reg[a].start < avail_reg[a].end) {
865             insert_region(avail_reg[a]);
866         }
867     }
868 
869     /* now try to fit the root server objects into a region */
870     int i = ARRAY_SIZE(ndks_boot.freemem) - 1;
871     if (!is_reg_empty(ndks_boot.freemem[i])) {
872         printf("ERROR: insufficient MAX_NUM_FREEMEM_REG (%u)\n",
873                (unsigned int)MAX_NUM_FREEMEM_REG);
874         return false;
875     }
876     /* skip any empty regions */
877     for (; is_reg_empty(ndks_boot.freemem[i]) && i >= 0; i--);
878 
879     /* try to grab the last available p region to create the root server objects
880      * from. If possible, retain any left over memory as an extra p region */
881     word_t size = calculate_rootserver_size(it_v_reg, extra_bi_size_bits);
882     word_t max = rootserver_max_size_bits(extra_bi_size_bits);
883     for (; i >= 0; i--) {
884         /* Invariant: both i and (i + 1) are valid indices in ndks_boot.freemem. */
885         assert(i < ARRAY_SIZE(ndks_boot.freemem) - 1);
886         /* Invariant; the region at index i is the current candidate.
887          * Invariant: regions 0 up to (i - 1), if any, are additional candidates.
888          * Invariant: region (i + 1) is empty. */
889         assert(is_reg_empty(ndks_boot.freemem[i + 1]));
890         /* Invariant: regions above (i + 1), if any, are empty or too small to use.
891          * Invariant: all non-empty regions are ordered, disjoint and unallocated. */
892 
893         /* We make a fresh variable to index the known-empty region, because the
894          * SimplExportAndRefine verification test has poor support for array
895          * indices that are sums of variables and small constants. */
896         int empty_index = i + 1;
897 
898         /* Try to take the top-most suitably sized and aligned chunk. */
899         pptr_t unaligned_start = ndks_boot.freemem[i].end - size;
900         pptr_t start = ROUND_DOWN(unaligned_start, max);
901         /* if unaligned_start didn't underflow, and start fits in the region,
902          * then we've found a region that fits the root server objects. */
903         if (unaligned_start <= ndks_boot.freemem[i].end
904             && start >= ndks_boot.freemem[i].start) {
905             create_rootserver_objects(start, it_v_reg, extra_bi_size_bits);
906             /* There may be leftovers before and after the memory we used. */
907             /* Shuffle the after leftover up to the empty slot (i + 1). */
908             ndks_boot.freemem[empty_index] = (region_t) {
909                 .start = start + size,
910                 .end = ndks_boot.freemem[i].end
911             };
912             /* Leave the before leftover in current slot i. */
913             ndks_boot.freemem[i].end = start;
914             /* Regions i and (i + 1) are now well defined, ordered, disjoint,
915              * and unallocated, so we can return successfully. */
916             return true;
917         }
918         /* Region i isn't big enough, so shuffle it up to slot (i + 1),
919          * which we know is unused. */
920         ndks_boot.freemem[empty_index] = ndks_boot.freemem[i];
921         /* Now region i is unused, so make it empty to reestablish the invariant
922          * for the next iteration (when it will be slot i + 1). */
923         ndks_boot.freemem[i] = REG_EMPTY;
924     }
925 
926     /* We didn't find a big enough region. */
927     printf("ERROR: no free memory region is big enough for root server "
928            "objects, need size/alignment of 2^%"SEL4_PRIu_word"\n", max);
929     return false;
930 }
931