1 /*
2 * Copyright 2014, General Dynamics C4 Systems
3 *
4 * SPDX-License-Identifier: GPL-2.0-only
5 */
6
7 #include <assert.h>
8 #include <kernel/boot.h>
9 #include <kernel/thread.h>
10 #include <machine/io.h>
11 #include <machine/registerset.h>
12 #include <model/statedata.h>
13 #include <arch/machine.h>
14 #include <arch/kernel/boot.h>
15 #include <arch/kernel/vspace.h>
16 #include <linker.h>
17 #include <hardware.h>
18 #include <util.h>
19
20 /* (node-local) state accessed only during bootstrapping */
21 BOOT_BSS ndks_boot_t ndks_boot;
22
23 BOOT_BSS rootserver_mem_t rootserver;
24 BOOT_BSS static region_t rootserver_mem;
25
merge_regions(void)26 BOOT_CODE static void merge_regions(void)
27 {
28 /* Walk through reserved regions and see if any can be merged */
29 for (word_t i = 1; i < ndks_boot.resv_count;) {
30 if (ndks_boot.reserved[i - 1].end == ndks_boot.reserved[i].start) {
31 /* extend earlier region */
32 ndks_boot.reserved[i - 1].end = ndks_boot.reserved[i].end;
33 /* move everything else down */
34 for (word_t j = i + 1; j < ndks_boot.resv_count; j++) {
35 ndks_boot.reserved[j - 1] = ndks_boot.reserved[j];
36 }
37
38 ndks_boot.resv_count--;
39 /* don't increment i in case there are multiple adjacent regions */
40 } else {
41 i++;
42 }
43 }
44 }
45
reserve_region(p_region_t reg)46 BOOT_CODE bool_t reserve_region(p_region_t reg)
47 {
48 word_t i;
49 assert(reg.start <= reg.end);
50 if (reg.start == reg.end) {
51 return true;
52 }
53
54 /* keep the regions in order */
55 for (i = 0; i < ndks_boot.resv_count; i++) {
56 /* Try and merge the region to an existing one, if possible */
57 if (ndks_boot.reserved[i].start == reg.end) {
58 ndks_boot.reserved[i].start = reg.start;
59 merge_regions();
60 return true;
61 }
62 if (ndks_boot.reserved[i].end == reg.start) {
63 ndks_boot.reserved[i].end = reg.end;
64 merge_regions();
65 return true;
66 }
67 /* Otherwise figure out where it should go. */
68 if (ndks_boot.reserved[i].start > reg.end) {
69 /* move regions down, making sure there's enough room */
70 if (ndks_boot.resv_count + 1 >= MAX_NUM_RESV_REG) {
71 printf("Can't mark region 0x%"SEL4_PRIx_word"-0x%"SEL4_PRIx_word
72 " as reserved, try increasing MAX_NUM_RESV_REG (currently %d)\n",
73 reg.start, reg.end, (int)MAX_NUM_RESV_REG);
74 return false;
75 }
76 for (word_t j = ndks_boot.resv_count; j > i; j--) {
77 ndks_boot.reserved[j] = ndks_boot.reserved[j - 1];
78 }
79 /* insert the new region */
80 ndks_boot.reserved[i] = reg;
81 ndks_boot.resv_count++;
82 return true;
83 }
84 }
85
86 if (i + 1 == MAX_NUM_RESV_REG) {
87 printf("Can't mark region 0x%"SEL4_PRIx_word"-0x%"SEL4_PRIx_word
88 " as reserved, try increasing MAX_NUM_RESV_REG (currently %d)\n",
89 reg.start, reg.end, (int)MAX_NUM_RESV_REG);
90 return false;
91 }
92
93 ndks_boot.reserved[i] = reg;
94 ndks_boot.resv_count++;
95
96 return true;
97 }
98
insert_region(region_t reg)99 BOOT_CODE static bool_t insert_region(region_t reg)
100 {
101 assert(reg.start <= reg.end);
102 if (is_reg_empty(reg)) {
103 return true;
104 }
105
106 for (word_t i = 0; i < ARRAY_SIZE(ndks_boot.freemem); i++) {
107 if (is_reg_empty(ndks_boot.freemem[i])) {
108 reserve_region(pptr_to_paddr_reg(reg));
109 ndks_boot.freemem[i] = reg;
110 return true;
111 }
112 }
113
114 /* We don't know if a platform or architecture picked MAX_NUM_FREEMEM_REG
115 * arbitrarily or carefully calculated it to be big enough. Running out of
116 * slots here is not really fatal, eventually memory allocation may fail
117 * if there is not enough free memory. However, allocations should never
118 * blindly assume to work, some error handling must always be in place even
119 * if the environment has been crafted carefully to support them. Thus, we
120 * don't stop the boot process here, but return an error. The caller should
121 * decide how bad this is.
122 */
123 printf("no free memory slot left for [%"SEL4_PRIx_word"..%"SEL4_PRIx_word"],"
124 " consider increasing MAX_NUM_FREEMEM_REG (%u)\n",
125 reg.start, reg.end, (unsigned int)MAX_NUM_FREEMEM_REG);
126
127 /* For debug builds we consider this a fatal error. Rationale is, that the
128 * caller does not check the error code at the moment, but just ignores any
129 * failures silently. */
130 assert(0);
131
132 return false;
133 }
134
alloc_rootserver_obj(word_t size_bits,word_t n)135 BOOT_CODE static pptr_t alloc_rootserver_obj(word_t size_bits, word_t n)
136 {
137 pptr_t allocated = rootserver_mem.start;
138 /* allocated memory must be aligned */
139 assert(allocated % BIT(size_bits) == 0);
140 rootserver_mem.start += (n * BIT(size_bits));
141 /* we must not have run out of memory */
142 assert(rootserver_mem.start <= rootserver_mem.end);
143 memzero((void *) allocated, n * BIT(size_bits));
144 return allocated;
145 }
146
rootserver_max_size_bits(word_t extra_bi_size_bits)147 BOOT_CODE static word_t rootserver_max_size_bits(word_t extra_bi_size_bits)
148 {
149 word_t cnode_size_bits = CONFIG_ROOT_CNODE_SIZE_BITS + seL4_SlotBits;
150 word_t max = MAX(cnode_size_bits, seL4_VSpaceBits);
151 return MAX(max, extra_bi_size_bits);
152 }
153
calculate_rootserver_size(v_region_t it_v_reg,word_t extra_bi_size_bits)154 BOOT_CODE static word_t calculate_rootserver_size(v_region_t it_v_reg, word_t extra_bi_size_bits)
155 {
156 /* work out how much memory we need for root server objects */
157 word_t size = BIT(CONFIG_ROOT_CNODE_SIZE_BITS + seL4_SlotBits);
158 size += BIT(seL4_TCBBits); // root thread tcb
159 size += BIT(seL4_PageBits); // ipc buf
160 size += BIT(BI_FRAME_SIZE_BITS); // boot info
161 size += BIT(seL4_ASIDPoolBits);
162 size += extra_bi_size_bits > 0 ? BIT(extra_bi_size_bits) : 0;
163 size += BIT(seL4_VSpaceBits); // root vspace
164 #ifdef CONFIG_KERNEL_MCS
165 size += BIT(seL4_MinSchedContextBits); // root sched context
166 #endif
167 /* for all archs, seL4_PageTable Bits is the size of all non top-level paging structures */
168 return size + arch_get_n_paging(it_v_reg) * BIT(seL4_PageTableBits);
169 }
170
maybe_alloc_extra_bi(word_t cmp_size_bits,word_t extra_bi_size_bits)171 BOOT_CODE static void maybe_alloc_extra_bi(word_t cmp_size_bits, word_t extra_bi_size_bits)
172 {
173 if (extra_bi_size_bits >= cmp_size_bits && rootserver.extra_bi == 0) {
174 rootserver.extra_bi = alloc_rootserver_obj(extra_bi_size_bits, 1);
175 }
176 }
177
178 /* Create pptrs for all root server objects, starting at a give start address,
179 * to cover the virtual memory region v_reg, and any extra boot info.
180 */
create_rootserver_objects(pptr_t start,v_region_t it_v_reg,word_t extra_bi_size_bits)181 BOOT_CODE static void create_rootserver_objects(pptr_t start, v_region_t it_v_reg,
182 word_t extra_bi_size_bits)
183 {
184 /* the largest object the PD, the root cnode, or the extra boot info */
185 word_t cnode_size_bits = CONFIG_ROOT_CNODE_SIZE_BITS + seL4_SlotBits;
186 word_t max = rootserver_max_size_bits(extra_bi_size_bits);
187
188 word_t size = calculate_rootserver_size(it_v_reg, extra_bi_size_bits);
189 rootserver_mem.start = start;
190 rootserver_mem.end = start + size;
191
192 maybe_alloc_extra_bi(max, extra_bi_size_bits);
193
194 /* the root cnode is at least 4k, so it could be larger or smaller than a pd. */
195 #if (CONFIG_ROOT_CNODE_SIZE_BITS + seL4_SlotBits) > seL4_VSpaceBits
196 rootserver.cnode = alloc_rootserver_obj(cnode_size_bits, 1);
197 maybe_alloc_extra_bi(seL4_VSpaceBits, extra_bi_size_bits);
198 rootserver.vspace = alloc_rootserver_obj(seL4_VSpaceBits, 1);
199 #else
200 rootserver.vspace = alloc_rootserver_obj(seL4_VSpaceBits, 1);
201 maybe_alloc_extra_bi(cnode_size_bits, extra_bi_size_bits);
202 rootserver.cnode = alloc_rootserver_obj(cnode_size_bits, 1);
203 #endif
204
205 /* at this point we are up to creating 4k objects - which is the min size of
206 * extra_bi so this is the last chance to allocate it */
207 maybe_alloc_extra_bi(seL4_PageBits, extra_bi_size_bits);
208 rootserver.asid_pool = alloc_rootserver_obj(seL4_ASIDPoolBits, 1);
209 rootserver.ipc_buf = alloc_rootserver_obj(seL4_PageBits, 1);
210 rootserver.boot_info = alloc_rootserver_obj(BI_FRAME_SIZE_BITS, 1);
211
212 /* TCBs on aarch32 can be larger than page tables in certain configs */
213 #if seL4_TCBBits >= seL4_PageTableBits
214 rootserver.tcb = alloc_rootserver_obj(seL4_TCBBits, 1);
215 #endif
216
217 /* paging structures are 4k on every arch except aarch32 (1k) */
218 word_t n = arch_get_n_paging(it_v_reg);
219 rootserver.paging.start = alloc_rootserver_obj(seL4_PageTableBits, n);
220 rootserver.paging.end = rootserver.paging.start + n * BIT(seL4_PageTableBits);
221
222 /* for most archs, TCBs are smaller than page tables */
223 #if seL4_TCBBits < seL4_PageTableBits
224 rootserver.tcb = alloc_rootserver_obj(seL4_TCBBits, 1);
225 #endif
226
227 #ifdef CONFIG_KERNEL_MCS
228 rootserver.sc = alloc_rootserver_obj(seL4_MinSchedContextBits, 1);
229 #endif
230 /* we should have allocated all our memory */
231 assert(rootserver_mem.start == rootserver_mem.end);
232 }
233
write_slot(slot_ptr_t slot_ptr,cap_t cap)234 BOOT_CODE void write_slot(slot_ptr_t slot_ptr, cap_t cap)
235 {
236 slot_ptr->cap = cap;
237
238 slot_ptr->cteMDBNode = nullMDBNode;
239 mdb_node_ptr_set_mdbRevocable(&slot_ptr->cteMDBNode, true);
240 mdb_node_ptr_set_mdbFirstBadged(&slot_ptr->cteMDBNode, true);
241 }
242
243 /* Our root CNode needs to be able to fit all the initial caps and not
244 * cover all of memory.
245 */
246 compile_assert(root_cnode_size_valid,
247 CONFIG_ROOT_CNODE_SIZE_BITS < 32 - seL4_SlotBits &&
248 BIT(CONFIG_ROOT_CNODE_SIZE_BITS) >= seL4_NumInitialCaps &&
249 BIT(CONFIG_ROOT_CNODE_SIZE_BITS) >= (seL4_PageBits - seL4_SlotBits))
250
251 BOOT_CODE cap_t
create_root_cnode(void)252 create_root_cnode(void)
253 {
254 cap_t cap = cap_cnode_cap_new(
255 CONFIG_ROOT_CNODE_SIZE_BITS, /* radix */
256 wordBits - CONFIG_ROOT_CNODE_SIZE_BITS, /* guard size */
257 0, /* guard */
258 rootserver.cnode); /* pptr */
259
260 /* write the root CNode cap into the root CNode */
261 write_slot(SLOT_PTR(rootserver.cnode, seL4_CapInitThreadCNode), cap);
262
263 return cap;
264 }
265
266 /* Check domain scheduler assumptions. */
267 compile_assert(num_domains_valid,
268 CONFIG_NUM_DOMAINS >= 1 && CONFIG_NUM_DOMAINS <= 256)
269 compile_assert(num_priorities_valid,
270 CONFIG_NUM_PRIORITIES >= 1 && CONFIG_NUM_PRIORITIES <= 256)
271
272 BOOT_CODE void
create_domain_cap(cap_t root_cnode_cap)273 create_domain_cap(cap_t root_cnode_cap)
274 {
275 /* Check domain scheduler assumptions. */
276 assert(ksDomScheduleLength > 0);
277 for (word_t i = 0; i < ksDomScheduleLength; i++) {
278 assert(ksDomSchedule[i].domain < CONFIG_NUM_DOMAINS);
279 assert(ksDomSchedule[i].length > 0);
280 }
281
282 cap_t cap = cap_domain_cap_new();
283 write_slot(SLOT_PTR(pptr_of_cap(root_cnode_cap), seL4_CapDomain), cap);
284 }
285
create_ipcbuf_frame_cap(cap_t root_cnode_cap,cap_t pd_cap,vptr_t vptr)286 BOOT_CODE cap_t create_ipcbuf_frame_cap(cap_t root_cnode_cap, cap_t pd_cap, vptr_t vptr)
287 {
288 clearMemory((void *)rootserver.ipc_buf, PAGE_BITS);
289
290 /* create a cap of it and write it into the root CNode */
291 cap_t cap = create_mapped_it_frame_cap(pd_cap, rootserver.ipc_buf, vptr, IT_ASID, false, false);
292 write_slot(SLOT_PTR(pptr_of_cap(root_cnode_cap), seL4_CapInitThreadIPCBuffer), cap);
293
294 return cap;
295 }
296
create_bi_frame_cap(cap_t root_cnode_cap,cap_t pd_cap,vptr_t vptr)297 BOOT_CODE void create_bi_frame_cap(cap_t root_cnode_cap, cap_t pd_cap, vptr_t vptr)
298 {
299 /* create a cap of it and write it into the root CNode */
300 cap_t cap = create_mapped_it_frame_cap(pd_cap, rootserver.boot_info, vptr, IT_ASID, false, false);
301 write_slot(SLOT_PTR(pptr_of_cap(root_cnode_cap), seL4_CapBootInfoFrame), cap);
302 }
303
calculate_extra_bi_size_bits(word_t extra_size)304 BOOT_CODE word_t calculate_extra_bi_size_bits(word_t extra_size)
305 {
306 if (extra_size == 0) {
307 return 0;
308 }
309
310 word_t clzl_ret = clzl(ROUND_UP(extra_size, seL4_PageBits));
311 word_t msb = seL4_WordBits - 1 - clzl_ret;
312 /* If region is bigger than a page, make sure we overallocate rather than
313 * underallocate
314 */
315 if (extra_size > BIT(msb)) {
316 msb++;
317 }
318 return msb;
319 }
320
populate_bi_frame(node_id_t node_id,word_t num_nodes,vptr_t ipcbuf_vptr,word_t extra_bi_size)321 BOOT_CODE void populate_bi_frame(node_id_t node_id, word_t num_nodes,
322 vptr_t ipcbuf_vptr, word_t extra_bi_size)
323 {
324 /* clear boot info memory */
325 clearMemory((void *)rootserver.boot_info, BI_FRAME_SIZE_BITS);
326 if (extra_bi_size) {
327 clearMemory((void *)rootserver.extra_bi,
328 calculate_extra_bi_size_bits(extra_bi_size));
329 }
330
331 /* initialise bootinfo-related global state */
332 seL4_BootInfo *bi = BI_PTR(rootserver.boot_info);
333 bi->nodeID = node_id;
334 bi->numNodes = num_nodes;
335 bi->numIOPTLevels = 0;
336 bi->ipcBuffer = (seL4_IPCBuffer *)ipcbuf_vptr;
337 bi->initThreadCNodeSizeBits = CONFIG_ROOT_CNODE_SIZE_BITS;
338 bi->initThreadDomain = ksDomSchedule[ksDomScheduleIdx].domain;
339 bi->extraLen = extra_bi_size;
340
341 ndks_boot.bi_frame = bi;
342 ndks_boot.slot_pos_cur = seL4_NumInitialCaps;
343 }
344
provide_cap(cap_t root_cnode_cap,cap_t cap)345 BOOT_CODE bool_t provide_cap(cap_t root_cnode_cap, cap_t cap)
346 {
347 if (ndks_boot.slot_pos_cur >= BIT(CONFIG_ROOT_CNODE_SIZE_BITS)) {
348 printf("ERROR: can't add another cap, all %"SEL4_PRIu_word
349 " (=2^CONFIG_ROOT_CNODE_SIZE_BITS) slots used\n",
350 BIT(CONFIG_ROOT_CNODE_SIZE_BITS));
351 return false;
352 }
353 write_slot(SLOT_PTR(pptr_of_cap(root_cnode_cap), ndks_boot.slot_pos_cur), cap);
354 ndks_boot.slot_pos_cur++;
355 return true;
356 }
357
create_frames_of_region(cap_t root_cnode_cap,cap_t pd_cap,region_t reg,bool_t do_map,sword_t pv_offset)358 BOOT_CODE create_frames_of_region_ret_t create_frames_of_region(
359 cap_t root_cnode_cap,
360 cap_t pd_cap,
361 region_t reg,
362 bool_t do_map,
363 sword_t pv_offset
364 )
365 {
366 pptr_t f;
367 cap_t frame_cap;
368 seL4_SlotPos slot_pos_before;
369 seL4_SlotPos slot_pos_after;
370
371 slot_pos_before = ndks_boot.slot_pos_cur;
372
373 for (f = reg.start; f < reg.end; f += BIT(PAGE_BITS)) {
374 if (do_map) {
375 frame_cap = create_mapped_it_frame_cap(pd_cap, f, pptr_to_paddr((void *)(f - pv_offset)), IT_ASID, false, true);
376 } else {
377 frame_cap = create_unmapped_it_frame_cap(f, false);
378 }
379 if (!provide_cap(root_cnode_cap, frame_cap))
380 return (create_frames_of_region_ret_t) {
381 S_REG_EMPTY, false
382 };
383 }
384
385 slot_pos_after = ndks_boot.slot_pos_cur;
386
387 return (create_frames_of_region_ret_t) {
388 (seL4_SlotRegion) { slot_pos_before, slot_pos_after }, true
389 };
390 }
391
create_it_asid_pool(cap_t root_cnode_cap)392 BOOT_CODE cap_t create_it_asid_pool(cap_t root_cnode_cap)
393 {
394 cap_t ap_cap = cap_asid_pool_cap_new(IT_ASID >> asidLowBits, rootserver.asid_pool);
395 write_slot(SLOT_PTR(pptr_of_cap(root_cnode_cap), seL4_CapInitThreadASIDPool), ap_cap);
396
397 /* create ASID control cap */
398 write_slot(
399 SLOT_PTR(pptr_of_cap(root_cnode_cap), seL4_CapASIDControl),
400 cap_asid_control_cap_new()
401 );
402
403 return ap_cap;
404 }
405
406 #ifdef CONFIG_KERNEL_MCS
configure_sched_context(tcb_t * tcb,sched_context_t * sc_pptr,ticks_t timeslice,word_t core)407 BOOT_CODE static bool_t configure_sched_context(tcb_t *tcb, sched_context_t *sc_pptr, ticks_t timeslice, word_t core)
408 {
409 tcb->tcbSchedContext = sc_pptr;
410 REFILL_NEW(tcb->tcbSchedContext, MIN_REFILLS, timeslice, 0, core);
411
412 tcb->tcbSchedContext->scTcb = tcb;
413 return true;
414 }
415
init_sched_control(cap_t root_cnode_cap,word_t num_nodes)416 BOOT_CODE bool_t init_sched_control(cap_t root_cnode_cap, word_t num_nodes)
417 {
418 seL4_SlotPos slot_pos_before = ndks_boot.slot_pos_cur;
419
420 /* create a sched control cap for each core */
421 for (unsigned int i = 0; i < num_nodes; i++) {
422 if (!provide_cap(root_cnode_cap, cap_sched_control_cap_new(i))) {
423 printf("can't init sched_control for node %u, provide_cap() failed\n", i);
424 return false;
425 }
426 }
427
428 /* update boot info with slot region for sched control caps */
429 ndks_boot.bi_frame->schedcontrol = (seL4_SlotRegion) {
430 .start = slot_pos_before,
431 .end = ndks_boot.slot_pos_cur
432 };
433
434 return true;
435 }
436 #endif
437
create_idle_thread(void)438 BOOT_CODE bool_t create_idle_thread(void)
439 {
440 pptr_t pptr;
441
442 #ifdef ENABLE_SMP_SUPPORT
443 for (unsigned int i = 0; i < CONFIG_MAX_NUM_NODES; i++) {
444 #endif /* ENABLE_SMP_SUPPORT */
445 pptr = (pptr_t) &ksIdleThreadTCB[SMP_TERNARY(i, 0)];
446 NODE_STATE_ON_CORE(ksIdleThread, i) = TCB_PTR(pptr + TCB_OFFSET);
447 configureIdleThread(NODE_STATE_ON_CORE(ksIdleThread, i));
448 #ifdef CONFIG_DEBUG_BUILD
449 setThreadName(NODE_STATE_ON_CORE(ksIdleThread, i), "idle_thread");
450 #endif
451 SMP_COND_STATEMENT(NODE_STATE_ON_CORE(ksIdleThread, i)->tcbAffinity = i);
452 #ifdef CONFIG_KERNEL_MCS
453 bool_t result = configure_sched_context(NODE_STATE_ON_CORE(ksIdleThread, i), SC_PTR(&ksIdleThreadSC[SMP_TERNARY(i, 0)]),
454 usToTicks(CONFIG_BOOT_THREAD_TIME_SLICE * US_IN_MS), SMP_TERNARY(i, 0));
455 SMP_COND_STATEMENT(NODE_STATE_ON_CORE(ksIdleThread, i)->tcbSchedContext->scCore = i;)
456 if (!result) {
457 printf("Kernel init failed: Unable to allocate sc for idle thread\n");
458 return false;
459 }
460 #endif
461 #ifdef ENABLE_SMP_SUPPORT
462 }
463 #endif /* ENABLE_SMP_SUPPORT */
464 return true;
465 }
466
create_initial_thread(cap_t root_cnode_cap,cap_t it_pd_cap,vptr_t ui_v_entry,vptr_t bi_frame_vptr,vptr_t ipcbuf_vptr,cap_t ipcbuf_cap)467 BOOT_CODE tcb_t *create_initial_thread(cap_t root_cnode_cap, cap_t it_pd_cap, vptr_t ui_v_entry, vptr_t bi_frame_vptr,
468 vptr_t ipcbuf_vptr, cap_t ipcbuf_cap)
469 {
470 tcb_t *tcb = TCB_PTR(rootserver.tcb + TCB_OFFSET);
471 #ifndef CONFIG_KERNEL_MCS
472 tcb->tcbTimeSlice = CONFIG_TIME_SLICE;
473 #endif
474
475 Arch_initContext(&tcb->tcbArch.tcbContext);
476
477 /* derive a copy of the IPC buffer cap for inserting */
478 deriveCap_ret_t dc_ret = deriveCap(SLOT_PTR(pptr_of_cap(root_cnode_cap), seL4_CapInitThreadIPCBuffer), ipcbuf_cap);
479 if (dc_ret.status != EXCEPTION_NONE) {
480 printf("Failed to derive copy of IPC Buffer\n");
481 return NULL;
482 }
483
484 /* initialise TCB (corresponds directly to abstract specification) */
485 cteInsert(
486 root_cnode_cap,
487 SLOT_PTR(pptr_of_cap(root_cnode_cap), seL4_CapInitThreadCNode),
488 SLOT_PTR(rootserver.tcb, tcbCTable)
489 );
490 cteInsert(
491 it_pd_cap,
492 SLOT_PTR(pptr_of_cap(root_cnode_cap), seL4_CapInitThreadVSpace),
493 SLOT_PTR(rootserver.tcb, tcbVTable)
494 );
495 cteInsert(
496 dc_ret.cap,
497 SLOT_PTR(pptr_of_cap(root_cnode_cap), seL4_CapInitThreadIPCBuffer),
498 SLOT_PTR(rootserver.tcb, tcbBuffer)
499 );
500 tcb->tcbIPCBuffer = ipcbuf_vptr;
501
502 setRegister(tcb, capRegister, bi_frame_vptr);
503 setNextPC(tcb, ui_v_entry);
504
505 /* initialise TCB */
506 #ifdef CONFIG_KERNEL_MCS
507 if (!configure_sched_context(tcb, SC_PTR(rootserver.sc), usToTicks(CONFIG_BOOT_THREAD_TIME_SLICE * US_IN_MS), 0)) {
508 return NULL;
509 }
510 #endif
511
512 tcb->tcbPriority = seL4_MaxPrio;
513 tcb->tcbMCP = seL4_MaxPrio;
514 tcb->tcbDomain = ksDomSchedule[ksDomScheduleIdx].domain;
515 #ifndef CONFIG_KERNEL_MCS
516 setupReplyMaster(tcb);
517 #endif
518 setThreadState(tcb, ThreadState_Running);
519
520 ksCurDomain = ksDomSchedule[ksDomScheduleIdx].domain;
521 #ifdef CONFIG_KERNEL_MCS
522 ksDomainTime = usToTicks(ksDomSchedule[ksDomScheduleIdx].length * US_IN_MS);
523 #else
524 ksDomainTime = ksDomSchedule[ksDomScheduleIdx].length;
525 #endif
526 assert(ksCurDomain < CONFIG_NUM_DOMAINS && ksDomainTime > 0);
527
528 #ifndef CONFIG_KERNEL_MCS
529 SMP_COND_STATEMENT(tcb->tcbAffinity = 0);
530 #endif
531
532 /* create initial thread's TCB cap */
533 cap_t cap = cap_thread_cap_new(TCB_REF(tcb));
534 write_slot(SLOT_PTR(pptr_of_cap(root_cnode_cap), seL4_CapInitThreadTCB), cap);
535
536 #ifdef CONFIG_KERNEL_MCS
537 cap = cap_sched_context_cap_new(SC_REF(tcb->tcbSchedContext), seL4_MinSchedContextBits);
538 write_slot(SLOT_PTR(pptr_of_cap(root_cnode_cap), seL4_CapInitThreadSC), cap);
539 #endif
540 #ifdef CONFIG_DEBUG_BUILD
541 setThreadName(tcb, "rootserver");
542 #endif
543
544 return tcb;
545 }
546
init_core_state(tcb_t * scheduler_action)547 BOOT_CODE void init_core_state(tcb_t *scheduler_action)
548 {
549 #ifdef CONFIG_HAVE_FPU
550 NODE_STATE(ksActiveFPUState) = NULL;
551 #endif
552 #ifdef CONFIG_DEBUG_BUILD
553 /* add initial threads to the debug queue */
554 NODE_STATE(ksDebugTCBs) = NULL;
555 if (scheduler_action != SchedulerAction_ResumeCurrentThread &&
556 scheduler_action != SchedulerAction_ChooseNewThread) {
557 tcbDebugAppend(scheduler_action);
558 }
559 tcbDebugAppend(NODE_STATE(ksIdleThread));
560 #endif
561 NODE_STATE(ksSchedulerAction) = scheduler_action;
562 NODE_STATE(ksCurThread) = NODE_STATE(ksIdleThread);
563 #ifdef CONFIG_KERNEL_MCS
564 NODE_STATE(ksCurSC) = NODE_STATE(ksCurThread->tcbSchedContext);
565 NODE_STATE(ksConsumed) = 0;
566 NODE_STATE(ksReprogram) = true;
567 NODE_STATE(ksReleaseHead) = NULL;
568 NODE_STATE(ksCurTime) = getCurrentTime();
569 #endif
570 }
571
provide_untyped_cap(cap_t root_cnode_cap,bool_t device_memory,pptr_t pptr,word_t size_bits,seL4_SlotPos first_untyped_slot)572 BOOT_CODE static bool_t provide_untyped_cap(
573 cap_t root_cnode_cap,
574 bool_t device_memory,
575 pptr_t pptr,
576 word_t size_bits,
577 seL4_SlotPos first_untyped_slot
578 )
579 {
580 bool_t ret;
581 cap_t ut_cap;
582 word_t i = ndks_boot.slot_pos_cur - first_untyped_slot;
583 if (i < CONFIG_MAX_NUM_BOOTINFO_UNTYPED_CAPS) {
584 ndks_boot.bi_frame->untypedList[i] = (seL4_UntypedDesc) {
585 pptr_to_paddr((void *)pptr), size_bits, device_memory, {0}
586 };
587 ut_cap = cap_untyped_cap_new(MAX_FREE_INDEX(size_bits),
588 device_memory, size_bits, pptr);
589 ret = provide_cap(root_cnode_cap, ut_cap);
590 } else {
591 printf("Kernel init: Too many untyped regions for boot info\n");
592 ret = true;
593 }
594 return ret;
595 }
596
create_untypeds_for_region(cap_t root_cnode_cap,bool_t device_memory,region_t reg,seL4_SlotPos first_untyped_slot)597 BOOT_CODE static bool_t create_untypeds_for_region(
598 cap_t root_cnode_cap,
599 bool_t device_memory,
600 region_t reg,
601 seL4_SlotPos first_untyped_slot
602 )
603 {
604 while (!is_reg_empty(reg)) {
605
606 /* Calculate the bit size of the region. */
607 unsigned int size_bits = seL4_WordBits - 1 - clzl(reg.end - reg.start);
608 /* The size can't exceed the largest possible untyped size. */
609 if (size_bits > seL4_MaxUntypedBits) {
610 size_bits = seL4_MaxUntypedBits;
611 }
612 /* The start address 0 satisfies any alignment needs, otherwise ensure
613 * the region's bit size does not exceed the alignment of the region.
614 */
615 if (0 != reg.start) {
616 unsigned int align_bits = ctzl(reg.start);
617 if (size_bits > align_bits) {
618 size_bits = align_bits;
619 }
620 }
621 /* Provide an untyped capability for the region only if it is large
622 * enough to be retyped into objects later. Otherwise the region can't
623 * be used anyway.
624 */
625 if (size_bits >= seL4_MinUntypedBits) {
626 if (!provide_untyped_cap(root_cnode_cap, device_memory, reg.start, size_bits, first_untyped_slot)) {
627 return false;
628 }
629 }
630 reg.start += BIT(size_bits);
631 }
632 return true;
633 }
634
create_untypeds(cap_t root_cnode_cap,region_t boot_mem_reuse_reg)635 BOOT_CODE bool_t create_untypeds(cap_t root_cnode_cap,
636 region_t boot_mem_reuse_reg)
637 {
638 seL4_SlotPos first_untyped_slot = ndks_boot.slot_pos_cur;
639
640 paddr_t start = 0;
641 for (word_t i = 0; i < ndks_boot.resv_count; i++) {
642 if (start < ndks_boot.reserved[i].start) {
643 region_t reg = paddr_to_pptr_reg((p_region_t) {
644 start, ndks_boot.reserved[i].start
645 });
646 if (!create_untypeds_for_region(root_cnode_cap, true, reg, first_untyped_slot)) {
647 printf("ERROR: creation of untypeds for device region #%u at"
648 " [%"SEL4_PRIx_word"..%"SEL4_PRIx_word"] failed\n",
649 (unsigned int)i, reg.start, reg.end);
650 return false;
651 }
652 }
653
654 start = ndks_boot.reserved[i].end;
655 }
656
657 if (start < CONFIG_PADDR_USER_DEVICE_TOP) {
658 region_t reg = paddr_to_pptr_reg((p_region_t) {
659 start, CONFIG_PADDR_USER_DEVICE_TOP
660 });
661
662 if (!create_untypeds_for_region(root_cnode_cap, true, reg, first_untyped_slot)) {
663 printf("ERROR: creation of untypeds for top device region"
664 " [%"SEL4_PRIx_word"..%"SEL4_PRIx_word"] failed\n",
665 reg.start, reg.end);
666 return false;
667 }
668 }
669
670 /* if boot_mem_reuse_reg is not empty, we can create UT objs from boot code/data frames */
671 if (!create_untypeds_for_region(root_cnode_cap, false, boot_mem_reuse_reg, first_untyped_slot)) {
672 printf("ERROR: creation of untypeds for recycled boot memory"
673 " [%"SEL4_PRIx_word"..%"SEL4_PRIx_word"] failed\n",
674 boot_mem_reuse_reg.start, boot_mem_reuse_reg.end);
675 return false;
676 }
677
678 /* convert remaining freemem into UT objects and provide the caps */
679 for (word_t i = 0; i < ARRAY_SIZE(ndks_boot.freemem); i++) {
680 region_t reg = ndks_boot.freemem[i];
681 ndks_boot.freemem[i] = REG_EMPTY;
682 if (!create_untypeds_for_region(root_cnode_cap, false, reg, first_untyped_slot)) {
683 printf("ERROR: creation of untypeds for free memory region #%u at"
684 " [%"SEL4_PRIx_word"..%"SEL4_PRIx_word"] failed\n",
685 (unsigned int)i, reg.start, reg.end);
686 return false;
687 }
688 }
689
690 ndks_boot.bi_frame->untyped = (seL4_SlotRegion) {
691 .start = first_untyped_slot,
692 .end = ndks_boot.slot_pos_cur
693 };
694
695 return true;
696 }
697
bi_finalise(void)698 BOOT_CODE void bi_finalise(void)
699 {
700 ndks_boot.bi_frame->empty = (seL4_SlotRegion) {
701 .start = ndks_boot.slot_pos_cur,
702 .end = BIT(CONFIG_ROOT_CNODE_SIZE_BITS)
703 };
704 }
705
ceiling_kernel_window(pptr_t p)706 BOOT_CODE static inline pptr_t ceiling_kernel_window(pptr_t p)
707 {
708 /* Adjust address if it exceeds the kernel window
709 * Note that we compare physical address in case of overflow.
710 */
711 if (pptr_to_paddr((void *)p) > PADDR_TOP) {
712 p = PPTR_TOP;
713 }
714 return p;
715 }
716
check_available_memory(word_t n_available,const p_region_t * available)717 BOOT_CODE static bool_t check_available_memory(word_t n_available,
718 const p_region_t *available)
719 {
720 /* The system configuration is broken if no region is available. */
721 if (0 == n_available) {
722 printf("ERROR: no memory regions available\n");
723 return false;
724 }
725
726 printf("available phys memory regions: %"SEL4_PRIu_word"\n", n_available);
727 /* Force ordering and exclusivity of available regions. */
728 for (word_t i = 0; i < n_available; i++) {
729 const p_region_t *r = &available[i];
730 printf(" [%"SEL4_PRIx_word"..%"SEL4_PRIx_word"]\n", r->start, r->end);
731
732 /* Available regions must be sane */
733 if (r->start > r->end) {
734 printf("ERROR: memory region %"SEL4_PRIu_word" has start > end\n", i);
735 return false;
736 }
737
738 /* Available regions can't be empty. */
739 if (r->start == r->end) {
740 printf("ERROR: memory region %"SEL4_PRIu_word" empty\n", i);
741 return false;
742 }
743
744 /* Regions must be ordered and must not overlap. */
745 if ((i > 0) && (r->start < available[i - 1].end)) {
746 printf("ERROR: memory region %d in wrong order\n", (int)i);
747 return false;
748 }
749 }
750
751 return true;
752 }
753
754
check_reserved_memory(word_t n_reserved,const region_t * reserved)755 BOOT_CODE static bool_t check_reserved_memory(word_t n_reserved,
756 const region_t *reserved)
757 {
758 printf("reserved virt address space regions: %"SEL4_PRIu_word"\n",
759 n_reserved);
760 /* Force ordering and exclusivity of reserved regions. */
761 for (word_t i = 0; i < n_reserved; i++) {
762 const region_t *r = &reserved[i];
763 printf(" [%"SEL4_PRIx_word"..%"SEL4_PRIx_word"]\n", r->start, r->end);
764
765 /* Reserved regions must be sane, the size is allowed to be zero. */
766 if (r->start > r->end) {
767 printf("ERROR: reserved region %"SEL4_PRIu_word" has start > end\n", i);
768 return false;
769 }
770
771 /* Regions must be ordered and must not overlap. */
772 if ((i > 0) && (r->start < reserved[i - 1].end)) {
773 printf("ERROR: reserved region %"SEL4_PRIu_word" in wrong order\n", i);
774 return false;
775 }
776 }
777
778 return true;
779 }
780
781 /* we can't declare arrays on the stack, so this is space for
782 * the function below to use. */
783 BOOT_BSS static region_t avail_reg[MAX_NUM_FREEMEM_REG];
784 /**
785 * Dynamically initialise the available memory on the platform.
786 * A region represents an area of memory.
787 */
init_freemem(word_t n_available,const p_region_t * available,word_t n_reserved,const region_t * reserved,v_region_t it_v_reg,word_t extra_bi_size_bits)788 BOOT_CODE bool_t init_freemem(word_t n_available, const p_region_t *available,
789 word_t n_reserved, const region_t *reserved,
790 v_region_t it_v_reg, word_t extra_bi_size_bits)
791 {
792
793 if (!check_available_memory(n_available, available)) {
794 return false;
795 }
796
797 if (!check_reserved_memory(n_reserved, reserved)) {
798 return false;
799 }
800
801 for (word_t i = 0; i < ARRAY_SIZE(ndks_boot.freemem); i++) {
802 ndks_boot.freemem[i] = REG_EMPTY;
803 }
804
805 /* convert the available regions to pptrs */
806 for (word_t i = 0; i < n_available; i++) {
807 avail_reg[i] = paddr_to_pptr_reg(available[i]);
808 avail_reg[i].end = ceiling_kernel_window(avail_reg[i].end);
809 avail_reg[i].start = ceiling_kernel_window(avail_reg[i].start);
810 }
811
812 word_t a = 0;
813 word_t r = 0;
814 /* Now iterate through the available regions, removing any reserved regions. */
815 while (a < n_available && r < n_reserved) {
816 if (reserved[r].start == reserved[r].end) {
817 /* reserved region is empty - skip it */
818 r++;
819 } else if (avail_reg[a].start >= avail_reg[a].end) {
820 /* skip the entire region - it's empty now after trimming */
821 a++;
822 } else if (reserved[r].end <= avail_reg[a].start) {
823 /* the reserved region is below the available region - skip it*/
824 reserve_region(pptr_to_paddr_reg(reserved[r]));
825 r++;
826 } else if (reserved[r].start >= avail_reg[a].end) {
827 /* the reserved region is above the available region - take the whole thing */
828 insert_region(avail_reg[a]);
829 a++;
830 } else {
831 /* the reserved region overlaps with the available region */
832 if (reserved[r].start <= avail_reg[a].start) {
833 /* the region overlaps with the start of the available region.
834 * trim start of the available region */
835 avail_reg[a].start = MIN(avail_reg[a].end, reserved[r].end);
836 reserve_region(pptr_to_paddr_reg(reserved[r]));
837 r++;
838 } else {
839 assert(reserved[r].start < avail_reg[a].end);
840 /* take the first chunk of the available region and move
841 * the start to the end of the reserved region */
842 region_t m = avail_reg[a];
843 m.end = reserved[r].start;
844 insert_region(m);
845 if (avail_reg[a].end > reserved[r].end) {
846 avail_reg[a].start = reserved[r].end;
847 reserve_region(pptr_to_paddr_reg(reserved[r]));
848 r++;
849 } else {
850 a++;
851 }
852 }
853 }
854 }
855
856 for (; r < n_reserved; r++) {
857 if (reserved[r].start < reserved[r].end) {
858 reserve_region(pptr_to_paddr_reg(reserved[r]));
859 }
860 }
861
862 /* no more reserved regions - add the rest */
863 for (; a < n_available; a++) {
864 if (avail_reg[a].start < avail_reg[a].end) {
865 insert_region(avail_reg[a]);
866 }
867 }
868
869 /* now try to fit the root server objects into a region */
870 int i = ARRAY_SIZE(ndks_boot.freemem) - 1;
871 if (!is_reg_empty(ndks_boot.freemem[i])) {
872 printf("ERROR: insufficient MAX_NUM_FREEMEM_REG (%u)\n",
873 (unsigned int)MAX_NUM_FREEMEM_REG);
874 return false;
875 }
876 /* skip any empty regions */
877 for (; is_reg_empty(ndks_boot.freemem[i]) && i >= 0; i--);
878
879 /* try to grab the last available p region to create the root server objects
880 * from. If possible, retain any left over memory as an extra p region */
881 word_t size = calculate_rootserver_size(it_v_reg, extra_bi_size_bits);
882 word_t max = rootserver_max_size_bits(extra_bi_size_bits);
883 for (; i >= 0; i--) {
884 /* Invariant: both i and (i + 1) are valid indices in ndks_boot.freemem. */
885 assert(i < ARRAY_SIZE(ndks_boot.freemem) - 1);
886 /* Invariant; the region at index i is the current candidate.
887 * Invariant: regions 0 up to (i - 1), if any, are additional candidates.
888 * Invariant: region (i + 1) is empty. */
889 assert(is_reg_empty(ndks_boot.freemem[i + 1]));
890 /* Invariant: regions above (i + 1), if any, are empty or too small to use.
891 * Invariant: all non-empty regions are ordered, disjoint and unallocated. */
892
893 /* We make a fresh variable to index the known-empty region, because the
894 * SimplExportAndRefine verification test has poor support for array
895 * indices that are sums of variables and small constants. */
896 int empty_index = i + 1;
897
898 /* Try to take the top-most suitably sized and aligned chunk. */
899 pptr_t unaligned_start = ndks_boot.freemem[i].end - size;
900 pptr_t start = ROUND_DOWN(unaligned_start, max);
901 /* if unaligned_start didn't underflow, and start fits in the region,
902 * then we've found a region that fits the root server objects. */
903 if (unaligned_start <= ndks_boot.freemem[i].end
904 && start >= ndks_boot.freemem[i].start) {
905 create_rootserver_objects(start, it_v_reg, extra_bi_size_bits);
906 /* There may be leftovers before and after the memory we used. */
907 /* Shuffle the after leftover up to the empty slot (i + 1). */
908 ndks_boot.freemem[empty_index] = (region_t) {
909 .start = start + size,
910 .end = ndks_boot.freemem[i].end
911 };
912 /* Leave the before leftover in current slot i. */
913 ndks_boot.freemem[i].end = start;
914 /* Regions i and (i + 1) are now well defined, ordered, disjoint,
915 * and unallocated, so we can return successfully. */
916 return true;
917 }
918 /* Region i isn't big enough, so shuffle it up to slot (i + 1),
919 * which we know is unused. */
920 ndks_boot.freemem[empty_index] = ndks_boot.freemem[i];
921 /* Now region i is unused, so make it empty to reestablish the invariant
922 * for the next iteration (when it will be slot i + 1). */
923 ndks_boot.freemem[i] = REG_EMPTY;
924 }
925
926 /* We didn't find a big enough region. */
927 printf("ERROR: no free memory region is big enough for root server "
928 "objects, need size/alignment of 2^%"SEL4_PRIu_word"\n", max);
929 return false;
930 }
931