1 /*
2  * SPDX-License-Identifier: BSD-3-Clause
3  * SPDX-FileCopyrightText: Copyright TF-RMM Contributors.
4  */
5 
6 #include <arch.h>
7 #include <arch_helpers.h>
8 #include <assert.h>
9 #include <attestation_token.h>
10 #include <buffer.h>
11 #include <cpuid.h>
12 #include <debug.h>
13 #include <errno.h>
14 #include <gic.h>
15 #include <granule.h>
16 #include <memory_alloc.h>
17 #include <sizes.h>
18 #include <slot_buf_arch.h>
19 #include <stdbool.h>
20 #include <stdint.h>
21 #include <table.h>
22 #include <xlat_contexts.h>
23 #include <xlat_tables.h>
24 
25 /*
26  * The VA space size for the high region, which maps the slot buffers,
27  * needs to be a power of two, so round NR_CPU_SLOTS up to the closest
28  * power of two.
29  */
30 #define ROUNDED_NR_CPU_SLOTS (1ULL << (64ULL - \
31 				       __builtin_clzll((NR_CPU_SLOTS) - 1)))
32 
33 #define RMM_SLOT_BUF_VA_SIZE	((ROUNDED_NR_CPU_SLOTS) * (GRANULE_SIZE))
34 
35 #define SLOT_VIRT		((ULL(0xffffffffffffffff) - \
36 				 RMM_SLOT_BUF_VA_SIZE + ULL(1)))
37 
38 /*
39  * All the slot buffers for a given CPU must be mapped by a single translation
40  * table, which means the max VA size should be <= 4KB * 512
41  */
42 COMPILER_ASSERT((RMM_SLOT_BUF_VA_SIZE) <= (GRANULE_SIZE * XLAT_TABLE_ENTRIES));
43 
44 /*
45  * For all translation stages if FEAT_TTST is implemented, while
46  * the PE is executing in AArch64 state and is using 4KB
47  * translation granules, the min address space size is 64KB
48  */
49 COMPILER_ASSERT((RMM_SLOT_BUF_VA_SIZE) >= (1 << 16U));
50 
51 #define RMM_SLOT_BUF_MMAP	MAP_REGION_TRANSIENT(			\
52 					SLOT_VIRT,			\
53 					RMM_SLOT_BUF_VA_SIZE,		\
54 					PAGE_SIZE)
55 
56 #define SLOT_BUF_MMAP_REGIONS		UL(1)
57 
58 /*
59  * Attributes for a buffer slot page descriptor.
60  * Note that the AF bit on the descriptor is handled by the translation
61  * library (it assumes that access faults are not handled) so it does not
62  * need to be specified here.
63  */
64 #define SLOT_DESC_ATTR \
65 	(MT_RW_DATA | MT_SHAREABILITY_ISH | MT_NG)
66 
67 /*
68  * The base tables for all the contexts are manually allocated as a continous
69  * block of memory.
70  */
71 static uint64_t transient_base_table[XLAT_TABLE_ENTRIES * MAX_CPUS]
72 				    __aligned(BASE_XLAT_TABLES_ALIGNMENT)
73 				    __section("slot_buffer_xlat_tbls");
74 
75 /* Allocate per-cpu xlat_ctx_tbls */
76 static struct xlat_ctx_tbls slot_buf_tbls[MAX_CPUS];
77 
78 /*
79  * Allocate mmap regions and define common xlat_ctx_cfg shared will
80  * all slot_buf_xlat_ctx
81  */
82 XLAT_REGISTER_VA_SPACE(slot_buf, VA_HIGH_REGION,
83 		       SLOT_BUF_MMAP_REGIONS,
84 		       RMM_SLOT_BUF_VA_SIZE);
85 
86 /* context definition */
87 static struct xlat_ctx slot_buf_xlat_ctx[MAX_CPUS];
88 
89 /*
90  * Allocate a cache to store the last level table entry where the slot buffers
91  * are mapped to avoid needing to perform a table walk every time a buffer
92  * slot operation is needed.
93  */
94 static struct xlat_table_entry te_cache[MAX_CPUS];
95 
slot_to_va(enum buffer_slot slot)96 static uintptr_t slot_to_va(enum buffer_slot slot)
97 {
98 	assert(slot < NR_CPU_SLOTS);
99 
100 	return (uintptr_t)(SLOT_VIRT + (GRANULE_SIZE * slot));
101 }
102 
get_slot_buf_xlat_ctx(void)103 static inline struct xlat_ctx *get_slot_buf_xlat_ctx(void)
104 {
105 	return &slot_buf_xlat_ctx[my_cpuid()];
106 }
107 
get_cache_entry(void)108 static inline struct xlat_table_entry *get_cache_entry(void)
109 {
110 	return &te_cache[my_cpuid()];
111 }
112 
slot_to_descriptor(enum buffer_slot slot)113 __unused static uint64_t slot_to_descriptor(enum buffer_slot slot)
114 {
115 	uint64_t *entry = xlat_get_pte_from_table(get_cache_entry(),
116 						  slot_to_va(slot));
117 
118 	return xlat_read_descriptor(entry);
119 }
120 
121 /*
122  * Setup xlat table for slot buffer mechanism for each PE.
123  * Must be called for every PE in the system
124  */
slot_buf_setup_xlat(void)125 void slot_buf_setup_xlat(void)
126 {
127 	unsigned int cpuid = my_cpuid();
128 	int ret = xlat_ctx_create_dynamic(get_slot_buf_xlat_ctx(),
129 					  &slot_buf_xlat_ctx_cfg,
130 					  &slot_buf_tbls[cpuid],
131 					  &transient_base_table[
132 						XLAT_TABLE_ENTRIES * cpuid],
133 					  GET_NUM_BASE_LEVEL_ENTRIES(
134 							RMM_SLOT_BUF_VA_SIZE),
135 					  NULL,
136 					  0U);
137 
138 	if (ret == -EINVAL) {
139 		/*
140 		 * If the context was already created, carry on with the
141 		 * initialization. If it cannot be created, panic.
142 		 */
143 		ERROR("%s (%u): Failed to create the empty context for the slot buffers\n",
144 					__func__, __LINE__);
145 		panic();
146 	}
147 
148 	if (xlat_ctx_cfg_initialized(get_slot_buf_xlat_ctx()) == false) {
149 		/* Add necessary mmap regions during cold boot */
150 		struct xlat_mmap_region slot_buf_regions[] = {
151 			RMM_SLOT_BUF_MMAP,
152 			{0}
153 		};
154 
155 		if (xlat_mmap_add_ctx(get_slot_buf_xlat_ctx(),
156 				      slot_buf_regions, true) != 0) {
157 			ERROR("%s (%u): Failed to map slot buffer memory on high region\n",
158 				__func__, __LINE__);
159 			panic();
160 		}
161 
162 	}
163 
164 	if (xlat_ctx_tbls_initialized(get_slot_buf_xlat_ctx()) == false) {
165 		/*
166 		 * Initialize the translation tables for the current context.
167 		 * This is done on the first boot of each CPU.
168 		 */
169 		int err;
170 
171 		err = xlat_init_tables_ctx(get_slot_buf_xlat_ctx());
172 		if (err != 0) {
173 			ERROR("%s (%u): xlat initialization failed with code %i\n",
174 			__func__, __LINE__, err);
175 			panic();
176 		}
177 	}
178 
179 	/*
180 	 * Confugure MMU registers. This function assumes that all the
181 	 * contexts of a particular VA region (HIGH or LOW VA) use the same
182 	 * limits for VA and PA spaces.
183 	 */
184 	if (xlat_arch_setup_mmu_cfg(get_slot_buf_xlat_ctx())) {
185 		ERROR("%s (%u): MMU registers failed to initialize\n",
186 					__func__, __LINE__);
187 		panic();
188 	}
189 }
190 
191 /*
192  * Finishes initializing the slot buffer mechanism.
193  * This function must be called after the MMU is enabled.
194  */
slot_buf_init(void)195 void slot_buf_init(void)
196 {
197 	if (is_mmu_enabled() == false) {
198 		ERROR("%s: MMU must be enabled\n", __func__);
199 		panic();
200 	}
201 
202 	/*
203 	 * Initialize (if not done yet) the internal cache with the last level
204 	 * translation table that holds the MMU descriptors for the slot
205 	 * buffers, so we can access them faster when we need to map/unmap.
206 	 */
207 	if ((get_cache_entry())->table == NULL) {
208 		if (xlat_get_table_from_va(get_cache_entry(),
209 					   get_slot_buf_xlat_ctx(),
210 					   slot_to_va(SLOT_NS)) != 0) {
211 			ERROR("%s (%u): Failed to initialize table entry cache for CPU %u\n",
212 					__func__, __LINE__, my_cpuid());
213 			panic();
214 
215 		}
216 	}
217 }
218 
219 /*
220  * Buffer slots are intended to be transient, and should not be live at
221  * entry/exit of the RMM.
222  */
assert_cpu_slots_empty(void)223 void assert_cpu_slots_empty(void)
224 {
225 	unsigned int i;
226 
227 	for (i = 0; i < NR_CPU_SLOTS; i++) {
228 		assert(slot_to_descriptor(i) == INVALID_DESC);
229 	}
230 }
231 
is_ns_slot(enum buffer_slot slot)232 static inline bool is_ns_slot(enum buffer_slot slot)
233 {
234 	return slot == SLOT_NS;
235 }
236 
is_realm_slot(enum buffer_slot slot)237 static inline bool is_realm_slot(enum buffer_slot slot)
238 {
239 	return (slot != SLOT_NS) && (slot < NR_CPU_SLOTS);
240 }
241 
ns_granule_map(enum buffer_slot slot,struct granule * granule)242 static void *ns_granule_map(enum buffer_slot slot, struct granule *granule)
243 {
244 	unsigned long addr = granule_addr(granule);
245 
246 	assert(is_ns_slot(slot));
247 	return buffer_arch_map(slot, addr, true);
248 }
249 
ns_buffer_unmap(enum buffer_slot slot)250 static void ns_buffer_unmap(enum buffer_slot slot)
251 {
252 	assert(is_ns_slot(slot));
253 
254 	buffer_arch_unmap((void *)slot_to_va(slot));
255 }
256 
257 /*
258  * Maps a granule @g into the provided @slot, returning
259  * the virtual address.
260  *
261  * The caller must either hold @g::lock or hold a reference.
262  */
granule_map(struct granule * g,enum buffer_slot slot)263 void *granule_map(struct granule *g, enum buffer_slot slot)
264 {
265 	unsigned long addr = granule_addr(g);
266 
267 	assert(is_realm_slot(slot));
268 
269 	return buffer_arch_map(slot, addr, false);
270 }
271 
buffer_unmap(void * buf)272 void buffer_unmap(void *buf)
273 {
274 	buffer_arch_unmap(buf);
275 }
276 
277 bool memcpy_ns_read(void *dest, const void *ns_src, unsigned long size);
278 bool memcpy_ns_write(void *ns_dest, const void *src, unsigned long size);
279 
280 /*
281  * Map a Non secure granule @g into the slot @slot and read data from
282  * this granule to @dest. Unmap the granule once the read is done.
283  *
284  * It returns 'true' on success or `false` if not all data are copied.
285  * Only the least significant bits of @offset are considered, which allows the
286  * full PA of a non-granule aligned buffer to be used for the @offset parameter.
287  */
ns_buffer_read(enum buffer_slot slot,struct granule * ns_gr,unsigned int offset,unsigned int size,void * dest)288 bool ns_buffer_read(enum buffer_slot slot,
289 		    struct granule *ns_gr,
290 		    unsigned int offset,
291 		    unsigned int size,
292 		    void *dest)
293 {
294 	uintptr_t src;
295 	bool retval;
296 
297 	assert(is_ns_slot(slot));
298 	assert(ns_gr != NULL);
299 
300 	/*
301 	 * To simplify the trapping mechanism around NS access,
302 	 * memcpy_ns_read uses a single 8-byte LDR instruction and
303 	 * all parameters must be aligned accordingly.
304 	 */
305 	assert(ALIGNED(size, 8));
306 	assert(ALIGNED(offset, 8));
307 	assert(ALIGNED(dest, 8));
308 
309 	offset &= ~GRANULE_MASK;
310 	assert(offset + size <= GRANULE_SIZE);
311 
312 	src = (uintptr_t)ns_granule_map(slot, ns_gr) + offset;
313 	retval = memcpy_ns_read(dest, (void *)src, size);
314 	ns_buffer_unmap(slot);
315 
316 	return retval;
317 }
318 
319 /*
320  * Map a Non secure granule @g into the slot @slot and write data from
321  * this granule to @dest. Unmap the granule once the write is done.
322  *
323  * It returns 'true' on success or `false` if not all data are copied.
324  * Only the least significant bits of @offset are considered, which allows the
325  * full PA of a non-granule aligned buffer to be used for the @offset parameter.
326  */
ns_buffer_write(enum buffer_slot slot,struct granule * ns_gr,unsigned int offset,unsigned int size,void * src)327 bool ns_buffer_write(enum buffer_slot slot,
328 		     struct granule *ns_gr,
329 		     unsigned int offset,
330 		     unsigned int size,
331 		     void *src)
332 {
333 	uintptr_t dest;
334 	bool retval;
335 
336 	assert(is_ns_slot(slot));
337 	assert(ns_gr != NULL);
338 
339 	/*
340 	 * To simplify the trapping mechanism around NS access,
341 	 * memcpy_ns_write uses a single 8-byte STR instruction and
342 	 * all parameters must be aligned accordingly.
343 	 */
344 	assert(ALIGNED(size, 8));
345 	assert(ALIGNED(offset, 8));
346 	assert(ALIGNED(src, 8));
347 
348 	offset &= ~GRANULE_MASK;
349 	assert(offset + size <= GRANULE_SIZE);
350 
351 	dest = (uintptr_t)ns_granule_map(slot, ns_gr) + offset;
352 	retval = memcpy_ns_write((void *)dest, src, size);
353 	ns_buffer_unmap(slot);
354 
355 	return retval;
356 }
357 
358 /******************************************************************************
359  * Internal helpers
360  ******************************************************************************/
361 
buffer_map_internal(enum buffer_slot slot,unsigned long addr,bool ns)362 void *buffer_map_internal(enum buffer_slot slot, unsigned long addr, bool ns)
363 {
364 	uint64_t attr = SLOT_DESC_ATTR;
365 	uintptr_t va = slot_to_va(slot);
366 	struct xlat_table_entry *entry = get_cache_entry();
367 
368 	assert(GRANULE_ALIGNED(addr));
369 
370 	attr |= (ns == true ? MT_NS : MT_REALM);
371 
372 	if (xlat_map_memory_page_with_attrs(entry, va,
373 					    (uintptr_t)addr, attr) != 0) {
374 		/* Error mapping the buffer */
375 		return NULL;
376 	}
377 
378 	return (void *)va;
379 }
380 
buffer_unmap_internal(void * buf)381 void buffer_unmap_internal(void *buf)
382 {
383 	/*
384 	 * Prevent the compiler from moving prior loads/stores to buf after the
385 	 * update to the translation table. Otherwise, those could fault.
386 	 */
387 	COMPILER_BARRIER();
388 
389 	xlat_unmap_memory_page(get_cache_entry(), (uintptr_t)buf);
390 }
391