1 /*
2 * SPDX-License-Identifier: BSD-3-Clause
3 * SPDX-FileCopyrightText: Copyright TF-RMM Contributors.
4 */
5
6 #include <arch.h>
7 #include <arch_helpers.h>
8 #include <assert.h>
9 #include <attestation_token.h>
10 #include <buffer.h>
11 #include <cpuid.h>
12 #include <debug.h>
13 #include <errno.h>
14 #include <gic.h>
15 #include <granule.h>
16 #include <memory_alloc.h>
17 #include <sizes.h>
18 #include <slot_buf_arch.h>
19 #include <stdbool.h>
20 #include <stdint.h>
21 #include <table.h>
22 #include <xlat_contexts.h>
23 #include <xlat_tables.h>
24
25 /*
26 * The VA space size for the high region, which maps the slot buffers,
27 * needs to be a power of two, so round NR_CPU_SLOTS up to the closest
28 * power of two.
29 */
30 #define ROUNDED_NR_CPU_SLOTS (1ULL << (64ULL - \
31 __builtin_clzll((NR_CPU_SLOTS) - 1)))
32
33 #define RMM_SLOT_BUF_VA_SIZE ((ROUNDED_NR_CPU_SLOTS) * (GRANULE_SIZE))
34
35 #define SLOT_VIRT ((ULL(0xffffffffffffffff) - \
36 RMM_SLOT_BUF_VA_SIZE + ULL(1)))
37
38 /*
39 * All the slot buffers for a given CPU must be mapped by a single translation
40 * table, which means the max VA size should be <= 4KB * 512
41 */
42 COMPILER_ASSERT((RMM_SLOT_BUF_VA_SIZE) <= (GRANULE_SIZE * XLAT_TABLE_ENTRIES));
43
44 /*
45 * For all translation stages if FEAT_TTST is implemented, while
46 * the PE is executing in AArch64 state and is using 4KB
47 * translation granules, the min address space size is 64KB
48 */
49 COMPILER_ASSERT((RMM_SLOT_BUF_VA_SIZE) >= (1 << 16U));
50
51 #define RMM_SLOT_BUF_MMAP MAP_REGION_TRANSIENT( \
52 SLOT_VIRT, \
53 RMM_SLOT_BUF_VA_SIZE, \
54 PAGE_SIZE)
55
56 #define SLOT_BUF_MMAP_REGIONS UL(1)
57
58 /*
59 * Attributes for a buffer slot page descriptor.
60 * Note that the AF bit on the descriptor is handled by the translation
61 * library (it assumes that access faults are not handled) so it does not
62 * need to be specified here.
63 */
64 #define SLOT_DESC_ATTR \
65 (MT_RW_DATA | MT_SHAREABILITY_ISH | MT_NG)
66
67 /*
68 * The base tables for all the contexts are manually allocated as a continous
69 * block of memory.
70 */
71 static uint64_t transient_base_table[XLAT_TABLE_ENTRIES * MAX_CPUS]
72 __aligned(BASE_XLAT_TABLES_ALIGNMENT)
73 __section("slot_buffer_xlat_tbls");
74
75 /* Allocate per-cpu xlat_ctx_tbls */
76 static struct xlat_ctx_tbls slot_buf_tbls[MAX_CPUS];
77
78 /*
79 * Allocate mmap regions and define common xlat_ctx_cfg shared will
80 * all slot_buf_xlat_ctx
81 */
82 XLAT_REGISTER_VA_SPACE(slot_buf, VA_HIGH_REGION,
83 SLOT_BUF_MMAP_REGIONS,
84 RMM_SLOT_BUF_VA_SIZE);
85
86 /* context definition */
87 static struct xlat_ctx slot_buf_xlat_ctx[MAX_CPUS];
88
89 /*
90 * Allocate a cache to store the last level table entry where the slot buffers
91 * are mapped to avoid needing to perform a table walk every time a buffer
92 * slot operation is needed.
93 */
94 static struct xlat_table_entry te_cache[MAX_CPUS];
95
slot_to_va(enum buffer_slot slot)96 static uintptr_t slot_to_va(enum buffer_slot slot)
97 {
98 assert(slot < NR_CPU_SLOTS);
99
100 return (uintptr_t)(SLOT_VIRT + (GRANULE_SIZE * slot));
101 }
102
get_slot_buf_xlat_ctx(void)103 static inline struct xlat_ctx *get_slot_buf_xlat_ctx(void)
104 {
105 return &slot_buf_xlat_ctx[my_cpuid()];
106 }
107
get_cache_entry(void)108 static inline struct xlat_table_entry *get_cache_entry(void)
109 {
110 return &te_cache[my_cpuid()];
111 }
112
slot_to_descriptor(enum buffer_slot slot)113 __unused static uint64_t slot_to_descriptor(enum buffer_slot slot)
114 {
115 uint64_t *entry = xlat_get_pte_from_table(get_cache_entry(),
116 slot_to_va(slot));
117
118 return xlat_read_descriptor(entry);
119 }
120
121 /*
122 * Setup xlat table for slot buffer mechanism for each PE.
123 * Must be called for every PE in the system
124 */
slot_buf_setup_xlat(void)125 void slot_buf_setup_xlat(void)
126 {
127 unsigned int cpuid = my_cpuid();
128 int ret = xlat_ctx_create_dynamic(get_slot_buf_xlat_ctx(),
129 &slot_buf_xlat_ctx_cfg,
130 &slot_buf_tbls[cpuid],
131 &transient_base_table[
132 XLAT_TABLE_ENTRIES * cpuid],
133 GET_NUM_BASE_LEVEL_ENTRIES(
134 RMM_SLOT_BUF_VA_SIZE),
135 NULL,
136 0U);
137
138 if (ret == -EINVAL) {
139 /*
140 * If the context was already created, carry on with the
141 * initialization. If it cannot be created, panic.
142 */
143 ERROR("%s (%u): Failed to create the empty context for the slot buffers\n",
144 __func__, __LINE__);
145 panic();
146 }
147
148 if (xlat_ctx_cfg_initialized(get_slot_buf_xlat_ctx()) == false) {
149 /* Add necessary mmap regions during cold boot */
150 struct xlat_mmap_region slot_buf_regions[] = {
151 RMM_SLOT_BUF_MMAP,
152 {0}
153 };
154
155 if (xlat_mmap_add_ctx(get_slot_buf_xlat_ctx(),
156 slot_buf_regions, true) != 0) {
157 ERROR("%s (%u): Failed to map slot buffer memory on high region\n",
158 __func__, __LINE__);
159 panic();
160 }
161
162 }
163
164 if (xlat_ctx_tbls_initialized(get_slot_buf_xlat_ctx()) == false) {
165 /*
166 * Initialize the translation tables for the current context.
167 * This is done on the first boot of each CPU.
168 */
169 int err;
170
171 err = xlat_init_tables_ctx(get_slot_buf_xlat_ctx());
172 if (err != 0) {
173 ERROR("%s (%u): xlat initialization failed with code %i\n",
174 __func__, __LINE__, err);
175 panic();
176 }
177 }
178
179 /*
180 * Confugure MMU registers. This function assumes that all the
181 * contexts of a particular VA region (HIGH or LOW VA) use the same
182 * limits for VA and PA spaces.
183 */
184 if (xlat_arch_setup_mmu_cfg(get_slot_buf_xlat_ctx())) {
185 ERROR("%s (%u): MMU registers failed to initialize\n",
186 __func__, __LINE__);
187 panic();
188 }
189 }
190
191 /*
192 * Finishes initializing the slot buffer mechanism.
193 * This function must be called after the MMU is enabled.
194 */
slot_buf_init(void)195 void slot_buf_init(void)
196 {
197 if (is_mmu_enabled() == false) {
198 ERROR("%s: MMU must be enabled\n", __func__);
199 panic();
200 }
201
202 /*
203 * Initialize (if not done yet) the internal cache with the last level
204 * translation table that holds the MMU descriptors for the slot
205 * buffers, so we can access them faster when we need to map/unmap.
206 */
207 if ((get_cache_entry())->table == NULL) {
208 if (xlat_get_table_from_va(get_cache_entry(),
209 get_slot_buf_xlat_ctx(),
210 slot_to_va(SLOT_NS)) != 0) {
211 ERROR("%s (%u): Failed to initialize table entry cache for CPU %u\n",
212 __func__, __LINE__, my_cpuid());
213 panic();
214
215 }
216 }
217 }
218
219 /*
220 * Buffer slots are intended to be transient, and should not be live at
221 * entry/exit of the RMM.
222 */
assert_cpu_slots_empty(void)223 void assert_cpu_slots_empty(void)
224 {
225 unsigned int i;
226
227 for (i = 0; i < NR_CPU_SLOTS; i++) {
228 assert(slot_to_descriptor(i) == INVALID_DESC);
229 }
230 }
231
is_ns_slot(enum buffer_slot slot)232 static inline bool is_ns_slot(enum buffer_slot slot)
233 {
234 return slot == SLOT_NS;
235 }
236
is_realm_slot(enum buffer_slot slot)237 static inline bool is_realm_slot(enum buffer_slot slot)
238 {
239 return (slot != SLOT_NS) && (slot < NR_CPU_SLOTS);
240 }
241
ns_granule_map(enum buffer_slot slot,struct granule * granule)242 static void *ns_granule_map(enum buffer_slot slot, struct granule *granule)
243 {
244 unsigned long addr = granule_addr(granule);
245
246 assert(is_ns_slot(slot));
247 return buffer_arch_map(slot, addr, true);
248 }
249
ns_buffer_unmap(enum buffer_slot slot)250 static void ns_buffer_unmap(enum buffer_slot slot)
251 {
252 assert(is_ns_slot(slot));
253
254 buffer_arch_unmap((void *)slot_to_va(slot));
255 }
256
257 /*
258 * Maps a granule @g into the provided @slot, returning
259 * the virtual address.
260 *
261 * The caller must either hold @g::lock or hold a reference.
262 */
granule_map(struct granule * g,enum buffer_slot slot)263 void *granule_map(struct granule *g, enum buffer_slot slot)
264 {
265 unsigned long addr = granule_addr(g);
266
267 assert(is_realm_slot(slot));
268
269 return buffer_arch_map(slot, addr, false);
270 }
271
buffer_unmap(void * buf)272 void buffer_unmap(void *buf)
273 {
274 buffer_arch_unmap(buf);
275 }
276
277 bool memcpy_ns_read(void *dest, const void *ns_src, unsigned long size);
278 bool memcpy_ns_write(void *ns_dest, const void *src, unsigned long size);
279
280 /*
281 * Map a Non secure granule @g into the slot @slot and read data from
282 * this granule to @dest. Unmap the granule once the read is done.
283 *
284 * It returns 'true' on success or `false` if not all data are copied.
285 * Only the least significant bits of @offset are considered, which allows the
286 * full PA of a non-granule aligned buffer to be used for the @offset parameter.
287 */
ns_buffer_read(enum buffer_slot slot,struct granule * ns_gr,unsigned int offset,unsigned int size,void * dest)288 bool ns_buffer_read(enum buffer_slot slot,
289 struct granule *ns_gr,
290 unsigned int offset,
291 unsigned int size,
292 void *dest)
293 {
294 uintptr_t src;
295 bool retval;
296
297 assert(is_ns_slot(slot));
298 assert(ns_gr != NULL);
299
300 /*
301 * To simplify the trapping mechanism around NS access,
302 * memcpy_ns_read uses a single 8-byte LDR instruction and
303 * all parameters must be aligned accordingly.
304 */
305 assert(ALIGNED(size, 8));
306 assert(ALIGNED(offset, 8));
307 assert(ALIGNED(dest, 8));
308
309 offset &= ~GRANULE_MASK;
310 assert(offset + size <= GRANULE_SIZE);
311
312 src = (uintptr_t)ns_granule_map(slot, ns_gr) + offset;
313 retval = memcpy_ns_read(dest, (void *)src, size);
314 ns_buffer_unmap(slot);
315
316 return retval;
317 }
318
319 /*
320 * Map a Non secure granule @g into the slot @slot and write data from
321 * this granule to @dest. Unmap the granule once the write is done.
322 *
323 * It returns 'true' on success or `false` if not all data are copied.
324 * Only the least significant bits of @offset are considered, which allows the
325 * full PA of a non-granule aligned buffer to be used for the @offset parameter.
326 */
ns_buffer_write(enum buffer_slot slot,struct granule * ns_gr,unsigned int offset,unsigned int size,void * src)327 bool ns_buffer_write(enum buffer_slot slot,
328 struct granule *ns_gr,
329 unsigned int offset,
330 unsigned int size,
331 void *src)
332 {
333 uintptr_t dest;
334 bool retval;
335
336 assert(is_ns_slot(slot));
337 assert(ns_gr != NULL);
338
339 /*
340 * To simplify the trapping mechanism around NS access,
341 * memcpy_ns_write uses a single 8-byte STR instruction and
342 * all parameters must be aligned accordingly.
343 */
344 assert(ALIGNED(size, 8));
345 assert(ALIGNED(offset, 8));
346 assert(ALIGNED(src, 8));
347
348 offset &= ~GRANULE_MASK;
349 assert(offset + size <= GRANULE_SIZE);
350
351 dest = (uintptr_t)ns_granule_map(slot, ns_gr) + offset;
352 retval = memcpy_ns_write((void *)dest, src, size);
353 ns_buffer_unmap(slot);
354
355 return retval;
356 }
357
358 /******************************************************************************
359 * Internal helpers
360 ******************************************************************************/
361
buffer_map_internal(enum buffer_slot slot,unsigned long addr,bool ns)362 void *buffer_map_internal(enum buffer_slot slot, unsigned long addr, bool ns)
363 {
364 uint64_t attr = SLOT_DESC_ATTR;
365 uintptr_t va = slot_to_va(slot);
366 struct xlat_table_entry *entry = get_cache_entry();
367
368 assert(GRANULE_ALIGNED(addr));
369
370 attr |= (ns == true ? MT_NS : MT_REALM);
371
372 if (xlat_map_memory_page_with_attrs(entry, va,
373 (uintptr_t)addr, attr) != 0) {
374 /* Error mapping the buffer */
375 return NULL;
376 }
377
378 return (void *)va;
379 }
380
buffer_unmap_internal(void * buf)381 void buffer_unmap_internal(void *buf)
382 {
383 /*
384 * Prevent the compiler from moving prior loads/stores to buf after the
385 * update to the translation table. Otherwise, those could fault.
386 */
387 COMPILER_BARRIER();
388
389 xlat_unmap_memory_page(get_cache_entry(), (uintptr_t)buf);
390 }
391