1/* 2 * Copyright (c) 2013-2022, Arm Limited and Contributors. All rights reserved. 3 * 4 * SPDX-License-Identifier: BSD-3-Clause 5 */ 6 7#include <arch.h> 8#include <asm_macros.S> 9#include <assert_macros.S> 10#include <common/bl_common.h> 11#include <lib/xlat_tables/xlat_tables_defs.h> 12 13 .globl smc 14 15 .globl zero_normalmem 16 .globl zeromem 17 .globl memcpy16 18 .globl gpt_tlbi_by_pa_ll 19 20 .globl disable_mmu_el1 21 .globl disable_mmu_el3 22 .globl disable_mmu_icache_el1 23 .globl disable_mmu_icache_el3 24 .globl fixup_gdt_reloc 25#if SUPPORT_VFP 26 .globl enable_vfp 27#endif 28 29func smc 30 smc #0 31endfunc smc 32 33/* ----------------------------------------------------------------------- 34 * void zero_normalmem(void *mem, unsigned int length); 35 * 36 * Initialise a region in normal memory to 0. This functions complies with the 37 * AAPCS and can be called from C code. 38 * 39 * NOTE: MMU must be enabled when using this function as it can only operate on 40 * normal memory. It is intended to be mainly used from C code when MMU 41 * is usually enabled. 42 * ----------------------------------------------------------------------- 43 */ 44.equ zero_normalmem, zeromem_dczva 45 46/* ----------------------------------------------------------------------- 47 * void zeromem(void *mem, unsigned int length); 48 * 49 * Initialise a region of device memory to 0. This functions complies with the 50 * AAPCS and can be called from C code. 51 * 52 * NOTE: When data caches and MMU are enabled, zero_normalmem can usually be 53 * used instead for faster zeroing. 54 * 55 * ----------------------------------------------------------------------- 56 */ 57func zeromem 58 /* x2 is the address past the last zeroed address */ 59 add x2, x0, x1 60 /* 61 * Uses the fallback path that does not use DC ZVA instruction and 62 * therefore does not need enabled MMU 63 */ 64 b .Lzeromem_dczva_fallback_entry 65endfunc zeromem 66 67/* ----------------------------------------------------------------------- 68 * void zeromem_dczva(void *mem, unsigned int length); 69 * 70 * Fill a region of normal memory of size "length" in bytes with null bytes. 71 * MMU must be enabled and the memory be of 72 * normal type. This is because this function internally uses the DC ZVA 73 * instruction, which generates an Alignment fault if used on any type of 74 * Device memory (see section D3.4.9 of the ARMv8 ARM, issue k). When the MMU 75 * is disabled, all memory behaves like Device-nGnRnE memory (see section 76 * D4.2.8), hence the requirement on the MMU being enabled. 77 * NOTE: The code assumes that the block size as defined in DCZID_EL0 78 * register is at least 16 bytes. 79 * 80 * ----------------------------------------------------------------------- 81 */ 82func zeromem_dczva 83 84 /* 85 * The function consists of a series of loops that zero memory one byte 86 * at a time, 16 bytes at a time or using the DC ZVA instruction to 87 * zero aligned block of bytes, which is assumed to be more than 16. 88 * In the case where the DC ZVA instruction cannot be used or if the 89 * first 16 bytes loop would overflow, there is fallback path that does 90 * not use DC ZVA. 91 * Note: The fallback path is also used by the zeromem function that 92 * branches to it directly. 93 * 94 * +---------+ zeromem_dczva 95 * | entry | 96 * +----+----+ 97 * | 98 * v 99 * +---------+ 100 * | checks |>o-------+ (If any check fails, fallback) 101 * +----+----+ | 102 * | |---------------+ 103 * v | Fallback path | 104 * +------+------+ |---------------+ 105 * | 1 byte loop | | 106 * +------+------+ .Lzeromem_dczva_initial_1byte_aligned_end 107 * | | 108 * v | 109 * +-------+-------+ | 110 * | 16 bytes loop | | 111 * +-------+-------+ | 112 * | | 113 * v | 114 * +------+------+ .Lzeromem_dczva_blocksize_aligned 115 * | DC ZVA loop | | 116 * +------+------+ | 117 * +--------+ | | 118 * | | | | 119 * | v v | 120 * | +-------+-------+ .Lzeromem_dczva_final_16bytes_aligned 121 * | | 16 bytes loop | | 122 * | +-------+-------+ | 123 * | | | 124 * | v | 125 * | +------+------+ .Lzeromem_dczva_final_1byte_aligned 126 * | | 1 byte loop | | 127 * | +-------------+ | 128 * | | | 129 * | v | 130 * | +---+--+ | 131 * | | exit | | 132 * | +------+ | 133 * | | 134 * | +--------------+ +------------------+ zeromem 135 * | | +----------------| zeromem function | 136 * | | | +------------------+ 137 * | v v 138 * | +-------------+ .Lzeromem_dczva_fallback_entry 139 * | | 1 byte loop | 140 * | +------+------+ 141 * | | 142 * +-----------+ 143 */ 144 145 /* 146 * Readable names for registers 147 * 148 * Registers x0, x1 and x2 are also set by zeromem which 149 * branches into the fallback path directly, so cursor, length and 150 * stop_address should not be retargeted to other registers. 151 */ 152 cursor .req x0 /* Start address and then current address */ 153 length .req x1 /* Length in bytes of the region to zero out */ 154 /* Reusing x1 as length is never used after block_mask is set */ 155 block_mask .req x1 /* Bitmask of the block size read in DCZID_EL0 */ 156 stop_address .req x2 /* Address past the last zeroed byte */ 157 block_size .req x3 /* Size of a block in bytes as read in DCZID_EL0 */ 158 tmp1 .req x4 159 tmp2 .req x5 160 161#if ENABLE_ASSERTIONS 162 /* 163 * Check for M bit (MMU enabled) of the current SCTLR_EL(1|3) 164 * register value and panic if the MMU is disabled. 165 */ 166#if defined(IMAGE_BL1) || defined(IMAGE_BL31) || (defined(IMAGE_BL2) && \ 167 (BL2_AT_EL3 || ENABLE_RME)) 168 mrs tmp1, sctlr_el3 169#else 170 mrs tmp1, sctlr_el1 171#endif 172 173 tst tmp1, #SCTLR_M_BIT 174 ASM_ASSERT(ne) 175#endif /* ENABLE_ASSERTIONS */ 176 177 /* stop_address is the address past the last to zero */ 178 add stop_address, cursor, length 179 180 /* 181 * Get block_size = (log2(<block size>) >> 2) (see encoding of 182 * dczid_el0 reg) 183 */ 184 mrs block_size, dczid_el0 185 186 /* 187 * Select the 4 lowest bits and convert the extracted log2(<block size 188 * in words>) to <block size in bytes> 189 */ 190 ubfx block_size, block_size, #0, #4 191 mov tmp2, #(1 << 2) 192 lsl block_size, tmp2, block_size 193 194#if ENABLE_ASSERTIONS 195 /* 196 * Assumes block size is at least 16 bytes to avoid manual realignment 197 * of the cursor at the end of the DCZVA loop. 198 */ 199 cmp block_size, #16 200 ASM_ASSERT(hs) 201#endif 202 /* 203 * Not worth doing all the setup for a region less than a block and 204 * protects against zeroing a whole block when the area to zero is 205 * smaller than that. Also, as it is assumed that the block size is at 206 * least 16 bytes, this also protects the initial aligning loops from 207 * trying to zero 16 bytes when length is less than 16. 208 */ 209 cmp length, block_size 210 b.lo .Lzeromem_dczva_fallback_entry 211 212 /* 213 * Calculate the bitmask of the block alignment. It will never 214 * underflow as the block size is between 4 bytes and 2kB. 215 * block_mask = block_size - 1 216 */ 217 sub block_mask, block_size, #1 218 219 /* 220 * length alias should not be used after this point unless it is 221 * defined as a register other than block_mask's. 222 */ 223 .unreq length 224 225 /* 226 * If the start address is already aligned to zero block size, go 227 * straight to the cache zeroing loop. This is safe because at this 228 * point, the length cannot be smaller than a block size. 229 */ 230 tst cursor, block_mask 231 b.eq .Lzeromem_dczva_blocksize_aligned 232 233 /* 234 * Calculate the first block-size-aligned address. It is assumed that 235 * the zero block size is at least 16 bytes. This address is the last 236 * address of this initial loop. 237 */ 238 orr tmp1, cursor, block_mask 239 add tmp1, tmp1, #1 240 241 /* 242 * If the addition overflows, skip the cache zeroing loops. This is 243 * quite unlikely however. 244 */ 245 cbz tmp1, .Lzeromem_dczva_fallback_entry 246 247 /* 248 * If the first block-size-aligned address is past the last address, 249 * fallback to the simpler code. 250 */ 251 cmp tmp1, stop_address 252 b.hi .Lzeromem_dczva_fallback_entry 253 254 /* 255 * If the start address is already aligned to 16 bytes, skip this loop. 256 * It is safe to do this because tmp1 (the stop address of the initial 257 * 16 bytes loop) will never be greater than the final stop address. 258 */ 259 tst cursor, #0xf 260 b.eq .Lzeromem_dczva_initial_1byte_aligned_end 261 262 /* Calculate the next address aligned to 16 bytes */ 263 orr tmp2, cursor, #0xf 264 add tmp2, tmp2, #1 265 /* If it overflows, fallback to the simple path (unlikely) */ 266 cbz tmp2, .Lzeromem_dczva_fallback_entry 267 /* 268 * Next aligned address cannot be after the stop address because the 269 * length cannot be smaller than 16 at this point. 270 */ 271 272 /* First loop: zero byte per byte */ 2731: 274 strb wzr, [cursor], #1 275 cmp cursor, tmp2 276 b.ne 1b 277.Lzeromem_dczva_initial_1byte_aligned_end: 278 279 /* 280 * Second loop: we need to zero 16 bytes at a time from cursor to tmp1 281 * before being able to use the code that deals with block-size-aligned 282 * addresses. 283 */ 284 cmp cursor, tmp1 285 b.hs 2f 2861: 287 stp xzr, xzr, [cursor], #16 288 cmp cursor, tmp1 289 b.lo 1b 2902: 291 292 /* 293 * Third loop: zero a block at a time using DC ZVA cache block zeroing 294 * instruction. 295 */ 296.Lzeromem_dczva_blocksize_aligned: 297 /* 298 * Calculate the last block-size-aligned address. If the result equals 299 * to the start address, the loop will exit immediately. 300 */ 301 bic tmp1, stop_address, block_mask 302 303 cmp cursor, tmp1 304 b.hs 2f 3051: 306 /* Zero the block containing the cursor */ 307 dc zva, cursor 308 /* Increment the cursor by the size of a block */ 309 add cursor, cursor, block_size 310 cmp cursor, tmp1 311 b.lo 1b 3122: 313 314 /* 315 * Fourth loop: zero 16 bytes at a time and then byte per byte the 316 * remaining area 317 */ 318.Lzeromem_dczva_final_16bytes_aligned: 319 /* 320 * Calculate the last 16 bytes aligned address. It is assumed that the 321 * block size will never be smaller than 16 bytes so that the current 322 * cursor is aligned to at least 16 bytes boundary. 323 */ 324 bic tmp1, stop_address, #15 325 326 cmp cursor, tmp1 327 b.hs 2f 3281: 329 stp xzr, xzr, [cursor], #16 330 cmp cursor, tmp1 331 b.lo 1b 3322: 333 334 /* Fifth and final loop: zero byte per byte */ 335.Lzeromem_dczva_final_1byte_aligned: 336 cmp cursor, stop_address 337 b.eq 2f 3381: 339 strb wzr, [cursor], #1 340 cmp cursor, stop_address 341 b.ne 1b 3422: 343 ret 344 345 /* Fallback for unaligned start addresses */ 346.Lzeromem_dczva_fallback_entry: 347 /* 348 * If the start address is already aligned to 16 bytes, skip this loop. 349 */ 350 tst cursor, #0xf 351 b.eq .Lzeromem_dczva_final_16bytes_aligned 352 353 /* Calculate the next address aligned to 16 bytes */ 354 orr tmp1, cursor, #15 355 add tmp1, tmp1, #1 356 /* If it overflows, fallback to byte per byte zeroing */ 357 cbz tmp1, .Lzeromem_dczva_final_1byte_aligned 358 /* If the next aligned address is after the stop address, fall back */ 359 cmp tmp1, stop_address 360 b.hs .Lzeromem_dczva_final_1byte_aligned 361 362 /* Fallback entry loop: zero byte per byte */ 3631: 364 strb wzr, [cursor], #1 365 cmp cursor, tmp1 366 b.ne 1b 367 368 b .Lzeromem_dczva_final_16bytes_aligned 369 370 .unreq cursor 371 /* 372 * length is already unreq'ed to reuse the register for another 373 * variable. 374 */ 375 .unreq stop_address 376 .unreq block_size 377 .unreq block_mask 378 .unreq tmp1 379 .unreq tmp2 380endfunc zeromem_dczva 381 382/* -------------------------------------------------------------------------- 383 * void memcpy16(void *dest, const void *src, unsigned int length) 384 * 385 * Copy length bytes from memory area src to memory area dest. 386 * The memory areas should not overlap. 387 * Destination and source addresses must be 16-byte aligned. 388 * -------------------------------------------------------------------------- 389 */ 390func memcpy16 391#if ENABLE_ASSERTIONS 392 orr x3, x0, x1 393 tst x3, #0xf 394 ASM_ASSERT(eq) 395#endif 396/* copy 16 bytes at a time */ 397m_loop16: 398 cmp x2, #16 399 b.lo m_loop1 400 ldp x3, x4, [x1], #16 401 stp x3, x4, [x0], #16 402 sub x2, x2, #16 403 b m_loop16 404/* copy byte per byte */ 405m_loop1: 406 cbz x2, m_end 407 ldrb w3, [x1], #1 408 strb w3, [x0], #1 409 subs x2, x2, #1 410 b.ne m_loop1 411m_end: 412 ret 413endfunc memcpy16 414 415/* --------------------------------------------------------------------------- 416 * Disable the MMU at EL3 417 * --------------------------------------------------------------------------- 418 */ 419 420func disable_mmu_el3 421 mov x1, #(SCTLR_M_BIT | SCTLR_C_BIT) 422do_disable_mmu_el3: 423 mrs x0, sctlr_el3 424 bic x0, x0, x1 425 msr sctlr_el3, x0 426 isb /* ensure MMU is off */ 427 dsb sy 428 ret 429endfunc disable_mmu_el3 430 431 432func disable_mmu_icache_el3 433 mov x1, #(SCTLR_M_BIT | SCTLR_C_BIT | SCTLR_I_BIT) 434 b do_disable_mmu_el3 435endfunc disable_mmu_icache_el3 436 437/* --------------------------------------------------------------------------- 438 * Disable the MMU at EL1 439 * --------------------------------------------------------------------------- 440 */ 441 442func disable_mmu_el1 443 mov x1, #(SCTLR_M_BIT | SCTLR_C_BIT) 444do_disable_mmu_el1: 445 mrs x0, sctlr_el1 446 bic x0, x0, x1 447 msr sctlr_el1, x0 448 isb /* ensure MMU is off */ 449 dsb sy 450 ret 451endfunc disable_mmu_el1 452 453 454func disable_mmu_icache_el1 455 mov x1, #(SCTLR_M_BIT | SCTLR_C_BIT | SCTLR_I_BIT) 456 b do_disable_mmu_el1 457endfunc disable_mmu_icache_el1 458 459/* --------------------------------------------------------------------------- 460 * Enable the use of VFP at EL3 461 * --------------------------------------------------------------------------- 462 */ 463#if SUPPORT_VFP 464func enable_vfp 465 mrs x0, cpacr_el1 466 orr x0, x0, #CPACR_VFP_BITS 467 msr cpacr_el1, x0 468 mrs x0, cptr_el3 469 mov x1, #AARCH64_CPTR_TFP 470 bic x0, x0, x1 471 msr cptr_el3, x0 472 isb 473 ret 474endfunc enable_vfp 475#endif 476 477/* --------------------------------------------------------------------------- 478 * Helper to fixup Global Descriptor table (GDT) and dynamic relocations 479 * (.rela.dyn) at runtime. 480 * 481 * This function is meant to be used when the firmware is compiled with -fpie 482 * and linked with -pie options. We rely on the linker script exporting 483 * appropriate markers for start and end of the section. For GOT, we 484 * expect __GOT_START__ and __GOT_END__. Similarly for .rela.dyn, we expect 485 * __RELA_START__ and __RELA_END__. 486 * 487 * The function takes the limits of the memory to apply fixups to as 488 * arguments (which is usually the limits of the relocable BL image). 489 * x0 - the start of the fixup region 490 * x1 - the limit of the fixup region 491 * These addresses have to be 4KB page aligned. 492 * --------------------------------------------------------------------------- 493 */ 494 495/* Relocation codes */ 496#define R_AARCH64_NONE 0 497#define R_AARCH64_RELATIVE 1027 498 499func fixup_gdt_reloc 500 mov x6, x0 501 mov x7, x1 502 503#if ENABLE_ASSERTIONS 504 /* Test if the limits are 4KB aligned */ 505 orr x0, x0, x1 506 tst x0, #(PAGE_SIZE_MASK) 507 ASM_ASSERT(eq) 508#endif 509 /* 510 * Calculate the offset based on return address in x30. 511 * Assume that this function is called within a page at the start of 512 * fixup region. 513 */ 514 and x2, x30, #~(PAGE_SIZE_MASK) 515 subs x0, x2, x6 /* Diff(S) = Current Address - Compiled Address */ 516 b.eq 3f /* Diff(S) = 0. No relocation needed */ 517 518 adrp x1, __GOT_START__ 519 add x1, x1, :lo12:__GOT_START__ 520 adrp x2, __GOT_END__ 521 add x2, x2, :lo12:__GOT_END__ 522 523 /* 524 * GOT is an array of 64_bit addresses which must be fixed up as 525 * new_addr = old_addr + Diff(S). 526 * The new_addr is the address currently the binary is executing from 527 * and old_addr is the address at compile time. 528 */ 5291: ldr x3, [x1] 530 531 /* Skip adding offset if address is < lower limit */ 532 cmp x3, x6 533 b.lo 2f 534 535 /* Skip adding offset if address is > upper limit */ 536 cmp x3, x7 537 b.hi 2f 538 add x3, x3, x0 539 str x3, [x1] 540 5412: add x1, x1, #8 542 cmp x1, x2 543 b.lo 1b 544 545 /* Starting dynamic relocations. Use adrp/adr to get RELA_START and END */ 5463: adrp x1, __RELA_START__ 547 add x1, x1, :lo12:__RELA_START__ 548 adrp x2, __RELA_END__ 549 add x2, x2, :lo12:__RELA_END__ 550 551 /* 552 * According to ELF-64 specification, the RELA data structure is as 553 * follows: 554 * typedef struct { 555 * Elf64_Addr r_offset; 556 * Elf64_Xword r_info; 557 * Elf64_Sxword r_addend; 558 * } Elf64_Rela; 559 * 560 * r_offset is address of reference 561 * r_info is symbol index and type of relocation (in this case 562 * code 1027 which corresponds to R_AARCH64_RELATIVE). 563 * r_addend is constant part of expression. 564 * 565 * Size of Elf64_Rela structure is 24 bytes. 566 */ 567 568 /* Skip R_AARCH64_NONE entry with code 0 */ 5691: ldr x3, [x1, #8] 570 cbz x3, 2f 571 572#if ENABLE_ASSERTIONS 573 /* Assert that the relocation type is R_AARCH64_RELATIVE */ 574 cmp x3, #R_AARCH64_RELATIVE 575 ASM_ASSERT(eq) 576#endif 577 ldr x3, [x1] /* r_offset */ 578 add x3, x0, x3 579 ldr x4, [x1, #16] /* r_addend */ 580 581 /* Skip adding offset if r_addend is < lower limit */ 582 cmp x4, x6 583 b.lo 2f 584 585 /* Skip adding offset if r_addend entry is > upper limit */ 586 cmp x4, x7 587 b.hi 2f 588 589 add x4, x0, x4 /* Diff(S) + r_addend */ 590 str x4, [x3] 591 5922: add x1, x1, #24 593 cmp x1, x2 594 b.lo 1b 595 ret 596endfunc fixup_gdt_reloc 597 598/* 599 * TODO: Currently only supports size of 4KB, 600 * support other sizes as well. 601 */ 602func gpt_tlbi_by_pa_ll 603#if ENABLE_ASSERTIONS 604 cmp x1, #PAGE_SIZE_4KB 605 ASM_ASSERT(eq) 606 tst x0, #(PAGE_SIZE_MASK) 607 ASM_ASSERT(eq) 608#endif 609 lsr x0, x0, #FOUR_KB_SHIFT /* 4KB size encoding is zero */ 610 sys #6, c8, c4, #7, x0 /* TLBI RPALOS, <Xt> */ 611 dsb sy 612 ret 613endfunc gpt_tlbi_by_pa_ll 614