1 /*
2 * Copyright 2014, General Dynamics C4 Systems
3 *
4 * SPDX-License-Identifier: GPL-2.0-only
5 */
6
7 #include <config.h>
8
9 #ifdef CONFIG_IOMMU
10
11 #include <kernel/boot.h>
12 #include <machine.h>
13 #include <machine/io.h>
14 #include <arch/kernel/apic.h>
15 #include <arch/model/statedata.h>
16 #include <linker.h>
17 #include <plat/machine/acpi.h>
18 #include <plat/machine/intel-vtd.h>
19 #include <util.h>
20
21 #define RTADDR_REG 0x20
22 #define GCMD_REG 0x18
23 #define GSTS_REG 0x1C
24 #define CCMD_REG 0x28
25 #define ECAP_REG 0x10
26 #define IOTLB_REG 0x08
27 #define FSTS_REG 0x34
28 #define FECTL_REG 0x38
29 #define FEDATA_REG 0x3C
30 #define FEADDR_REG 0x40
31 #define FEUADDR_REG 0x44
32 #define CAP_REG 0x08
33
34 /* Bit Positions within Registers */
35 #define SRTP 30 /* Set Root Table Pointer */
36 #define RTPS 30 /* Root Table Pointer Status */
37 #define TE 31 /* Translation Enable */
38 #define TES 31 /* Translation Enable Status */
39
40 /* ICC is 63rd bit in CCMD_REG, but since we will be
41 * accessing this register as 4 byte word, ICC becomes
42 * 31st bit in the upper 32bit word.
43 */
44 #define ICC (31 + 32) /* Invalidate Context Cache */
45 #define CIRG (29 + 32) /* Context Invalidation Request Granularity */
46 #define CAIG 27 /* Context Actual Invalidation Granularity */
47 #define CAIG_MASK 0x3
48 #define IVO_MASK 0x3FF
49 #define IVT 31 /* Invalidate IOTLB */
50 #define IIRG 28 /* IOTLB Invalidation Request Granularity */
51 #define IAIG 25 /* IOTLB Actual Invalidation Granularity */
52 #define IAIG_MASK 0x7
53 #define IP 30 /* Interrupt Pending */
54 #define FRI 0x8 /* Fault Recording Index */
55 #define FRI_MASK 0xFF
56 #define FRO 24
57 #define FRO_MASK 0xFF
58 #define FI 12
59 #define SID_MASK 0xFFFF
60 #define SID_BUS(a) (MASK(8) & (a >> 8))
61 #define SID_DEV(a) (MASK(5) & (a >> 3))
62 #define SID_FUNC(a) (MASK(3) & a)
63 #define FR_MASK 0xFF
64 #define FAULT_TYPE 30
65 #define FAULT 31
66 #define NFR 8 /* high word of CAP_REG */
67 #define NFR_MASK 0xff
68 #define PPF 1
69 #define PPF_MASK 1
70 #define PRESENT 1
71 #define WBF 27
72 #define WBFS 27
73 #define DID 8
74 #define RW 0x3
75
76 #define SAGAW 8
77 #define SAGAW_2_LEVEL 0x01
78 #define SAGAW_3_LEVEL 0x02
79 #define SAGAW_4_LEVEL 0x04
80 #define SAGAW_5_LEVEL 0x08
81 #define SAGAW_6_LEVEL 0x10
82
83 #define CONTEXT_GLOBAL_INVALIDATE 0x1
84 #define IOTLB_GLOBAL_INVALIDATE 0x1
85
86 #define DMA_TLB_READ_DRAIN BIT(17)
87 #define DMA_TLB_WRITE_DRAIN BIT(16)
88
89 #define N_VTD_CONTEXTS 256
90
91 typedef uint32_t drhu_id_t;
92
vtd_read32(drhu_id_t drhu_id,uint32_t offset)93 static inline uint32_t vtd_read32(drhu_id_t drhu_id, uint32_t offset)
94 {
95 return *(volatile uint32_t *)(PPTR_DRHU_START + (drhu_id << PAGE_BITS) + offset);
96 }
97
vtd_write32(drhu_id_t drhu_id,uint32_t offset,uint32_t value)98 static inline void vtd_write32(drhu_id_t drhu_id, uint32_t offset, uint32_t value)
99 {
100 *(volatile uint32_t *)(PPTR_DRHU_START + (drhu_id << PAGE_BITS) + offset) = value;
101 }
102
103
vtd_read64(drhu_id_t drhu_id,uint32_t offset)104 static inline uint64_t vtd_read64(drhu_id_t drhu_id, uint32_t offset)
105 {
106 return *(volatile uint64_t *)(PPTR_DRHU_START + (drhu_id << PAGE_BITS) + offset);
107 }
108
vtd_write64(drhu_id_t drhu_id,uint32_t offset,uint64_t value)109 static inline void vtd_write64(drhu_id_t drhu_id, uint32_t offset, uint64_t value)
110 {
111 *(volatile uint64_t *)(PPTR_DRHU_START + (drhu_id << PAGE_BITS) + offset) = value;
112 }
113
get_ivo(drhu_id_t drhu_id)114 static inline uint32_t get_ivo(drhu_id_t drhu_id)
115 {
116 return ((vtd_read32(drhu_id, ECAP_REG) >> 8) & IVO_MASK) * 16;
117 }
118
get_fro_offset(drhu_id_t drhu_id)119 static uint32_t get_fro_offset(drhu_id_t drhu_id)
120 {
121 uint32_t fro_offset;
122
123 /* Get bits 31 to 24 from lower Capability Register */
124 fro_offset = (vtd_read32(drhu_id, CAP_REG) >> FRO) & FRO_MASK;
125
126 /* Get bits 33 to 32 from higher Capability Register */
127 fro_offset |= (vtd_read32(drhu_id, CAP_REG + 4) & 0x3) << 8;
128
129 return fro_offset << 4;
130 }
131
invalidate_context_cache(void)132 void invalidate_context_cache(void)
133 {
134 /* FIXME - bugzilla bug 172
135 * 1. Instead of assuming global invalidation, this function should
136 * accept a parameter to control the granularity of invalidation
137 * request.
138 * 2. Instead of doing invalidation for all the IOMMUs, it should
139 * only do it for the IOMMU responsible for the requesting PCI
140 * device.
141 */
142
143 drhu_id_t i;
144
145 for (i = 0; i < x86KSnumDrhu; i++) {
146 /* Wait till ICC bit is clear */
147 uint64_t ccmd = 0;
148 while ((vtd_read64(i, CCMD_REG) >> ICC) & 1);
149
150 /* Program CIRG for Global Invalidation by setting bit 61 which
151 * will be bit 29 in upper 32 bits of CCMD_REG
152 */
153 ccmd = ((uint64_t)CONTEXT_GLOBAL_INVALIDATE << CIRG) | (1ull << ICC);
154
155 /* Invalidate Context Cache */
156 vtd_write64(i, CCMD_REG, ccmd);
157
158 /* Wait for the invalidation to complete */
159 while ((vtd_read64(i, CCMD_REG) >> ICC) & 1);
160 }
161 }
162
invalidate_iotlb(void)163 void invalidate_iotlb(void)
164 {
165 /* FIXME - bugzilla bug 172
166 * 1. Instead of assuming global invalidation, this function should
167 * accept a parameter to control the granularity of invalidation
168 * request.
169 * 2. Instead of doing invalidation for all the IOMMUs, it should
170 * only do it for the IOMMU responsible for the requesting PCI
171 * device.
172 */
173
174 uint8_t invalidate_command = IOTLB_GLOBAL_INVALIDATE;
175 uint32_t iotlb_reg_upper;
176 uint32_t ivo_offset;
177 drhu_id_t i;
178
179 for (i = 0; i < x86KSnumDrhu; i++) {
180 ivo_offset = get_ivo(i);
181
182 /* Wait till IVT bit is clear */
183 while ((vtd_read32(i, ivo_offset + IOTLB_REG + 4) >> IVT) & 1);
184
185 /* Program IIRG for Global Invalidation by setting bit 60 which
186 * will be bit 28 in upper 32 bits of IOTLB_REG
187 */
188 iotlb_reg_upper = invalidate_command << IIRG;
189
190 /* Invalidate IOTLB */
191 iotlb_reg_upper |= BIT(IVT);
192 iotlb_reg_upper |= DMA_TLB_READ_DRAIN | DMA_TLB_WRITE_DRAIN;
193
194 vtd_write32(i, ivo_offset + IOTLB_REG, 0);
195 vtd_write32(i, ivo_offset + IOTLB_REG + 4, iotlb_reg_upper);
196
197 /* Wait for the invalidation to complete */
198 while ((vtd_read32(i, ivo_offset + IOTLB_REG + 4) >> IVT) & 1);
199 }
200 }
201
vtd_clear_fault(drhu_id_t i,word_t fr_reg)202 static void vtd_clear_fault(drhu_id_t i, word_t fr_reg)
203 {
204 /* Clear the 'F' (Fault) bit to indicate that this fault is processed */
205 vtd_write32(i, fr_reg + 12, BIT(FAULT));
206 }
207
vtd_process_faults(drhu_id_t i)208 static void vtd_process_faults(drhu_id_t i)
209 {
210 /* Fault Recording register offset relative to the base register */
211 uint32_t fro_offset;
212 uint32_t source_id UNUSED;
213 uint32_t fault_type UNUSED;
214 uint32_t address[2] UNUSED;
215 uint32_t reason UNUSED;
216 uint32_t num_fault_regs;
217 uint32_t fr_reg;
218 uint32_t fault_status;
219 uint32_t fault_record_index;
220
221 /* Retrieves FRO by looking into Capability register bits 33 to 24 */
222 fro_offset = get_fro_offset(i);
223 fault_status = (vtd_read32(i, FSTS_REG) >> PPF) & PPF_MASK;
224
225 if (fault_status) {
226 num_fault_regs = ((vtd_read32(i, CAP_REG + 4) >> NFR) & NFR_MASK) + 1;
227 fault_record_index = (vtd_read32(i, FSTS_REG) >> FRI) & FRI_MASK;
228 fr_reg = fro_offset + 16 * fault_record_index;
229
230 /* Traverse the fault register ring buffer */
231 do {
232 source_id = vtd_read32(i, fr_reg + 8) & SID_MASK;
233
234 fault_type = (vtd_read32(i, fr_reg + 12) >> FAULT_TYPE) & 1;
235 address[1] = vtd_read32(i, fr_reg + 4);
236 address[0] = vtd_read32(i, fr_reg);
237 reason = vtd_read32(i, fr_reg + 12) & FR_MASK;
238
239 printf("IOMMU: DMA %s page fault ", fault_type ? "read" : "write");
240 printf("from 0x%x (bus: 0x%lx/dev: 0x%lx/fun: 0x%lx) ", source_id,
241 SID_BUS(source_id), SID_DEV(source_id), SID_FUNC(source_id));
242 printf("on address 0x%x:%x ", address[1], address[0]);
243 printf("with reason code 0x%x\n", reason);
244
245 vtd_clear_fault(i, fr_reg);
246
247 fault_record_index = (fault_record_index + 1) % num_fault_regs;
248 fr_reg = fro_offset + 16 * fault_record_index;
249 } while ((vtd_read32(i, fr_reg + 12) >> FAULT) & 1);
250
251 /* Check for Primary Fault Overflow */
252 if (vtd_read32(i, FSTS_REG) & 1) {
253 /* Clear PFO bit, so new faults will be generated again ! */
254 vtd_write32(i, FSTS_REG, 1);
255 }
256 }
257 }
258
vtd_handle_fault(void)259 void vtd_handle_fault(void)
260 {
261 drhu_id_t i;
262
263 for (i = 0; i < x86KSnumDrhu; i++) {
264 vtd_process_faults(i);
265 }
266 }
267
vtd_get_n_paging(acpi_rmrr_list_t * rmrr_list)268 BOOT_CODE word_t vtd_get_n_paging(acpi_rmrr_list_t *rmrr_list)
269 {
270 if (x86KSnumDrhu == 0) {
271 return 0;
272 }
273 assert(x86KSnumIOPTLevels > 0);
274
275 word_t size = 1; /* one for the root table */
276 size += N_VTD_CONTEXTS; /* one for each context */
277 size += rmrr_list->num; /* one for each device */
278
279 if (rmrr_list->num == 0) {
280 return size;
281 }
282
283 /* filter the identical regions by pci bus id */
284 acpi_rmrr_list_t filtered;
285 filtered.entries[0] = rmrr_list->entries[0];
286 filtered.num = 1;
287
288 for (word_t i = 1; i < rmrr_list->num; i++) {
289 if (vtd_get_root_index(rmrr_list->entries[i].device) !=
290 vtd_get_root_index(filtered.entries[filtered.num - 1].device) &&
291 rmrr_list->entries[i].base != filtered.entries[filtered.num - 1].base &&
292 rmrr_list->entries[i].limit != filtered.entries[filtered.num - 1].limit) {
293 filtered.entries[filtered.num] = rmrr_list->entries[i];
294 filtered.num++;
295 }
296 }
297
298 for (word_t i = x86KSnumIOPTLevels - 1; i > 0; i--) {
299 /* If we are still looking up bits beyond the 32bit of physical
300 * that we support then we select entry 0 in the current PT */
301 if ((VTD_PT_INDEX_BITS * i + seL4_PageBits) >= 32) {
302 size++;
303 } else {
304 for (word_t j = 0; j < filtered.num; j++) {
305 v_region_t region = (v_region_t) {
306 .start = filtered.entries[j].base,
307 .end = filtered.entries[j].limit
308 };
309 size += get_n_paging(region, 32 - (VTD_PT_INDEX_BITS * i + seL4_PageBits));
310 }
311 }
312 }
313 return size;
314 }
315
316 /* This function is a simplistic duplication of some of the logic
317 * in iospace.c
318 */
vtd_map_reserved_page(vtd_cte_t * vtd_context_table,int context_index,paddr_t addr)319 BOOT_CODE static void vtd_map_reserved_page(vtd_cte_t *vtd_context_table, int context_index, paddr_t addr)
320 {
321 int i;
322 vtd_pte_t *iopt;
323 vtd_pte_t *vtd_pte_slot;
324 /* first check for the first page table */
325 vtd_cte_t *vtd_context_slot = vtd_context_table + context_index;
326 if (!vtd_cte_ptr_get_present(vtd_context_slot)) {
327 iopt = (vtd_pte_t *) it_alloc_paging();
328 flushCacheRange(iopt, seL4_IOPageTableBits);
329
330 *vtd_context_slot = vtd_cte_new(
331 x86KSFirstValidIODomain, /* Domain ID */
332 true, /* RMRR Mapping */
333 x86KSnumIOPTLevels - 2, /* Address Width */
334 pptr_to_paddr(iopt), /* Address Space Root */
335 0, /* Translation Type */
336 true); /* Present */
337 x86KSFirstValidIODomain++;
338 flushCacheRange(vtd_context_slot, VTD_CTE_SIZE_BITS);
339 } else {
340 iopt = (vtd_pte_t *)paddr_to_pptr(vtd_cte_ptr_get_asr(vtd_context_slot));
341 }
342 /* now recursively find and map page tables */
343 for (i = x86KSnumIOPTLevels - 1; i >= 0; i--) {
344 uint32_t iopt_index;
345 /* If we are still looking up bits beyond the 32bit of physical
346 * that we support then we select entry 0 in the current PT */
347 if (VTD_PT_INDEX_BITS * i + seL4_PageBits >= 32) {
348 iopt_index = 0;
349 } else {
350 iopt_index = ((addr >> seL4_PageBits) >> (VTD_PT_INDEX_BITS * i)) & MASK(VTD_PT_INDEX_BITS);
351 }
352 vtd_pte_slot = iopt + iopt_index;
353 if (i == 0) {
354 /* Now put the mapping in */
355 *vtd_pte_slot = vtd_pte_new(addr, 1, 1);
356 flushCacheRange(vtd_pte_slot, VTD_PTE_SIZE_BITS);
357 } else {
358 if (!vtd_pte_ptr_get_write(vtd_pte_slot)) {
359 iopt = (vtd_pte_t *) it_alloc_paging();
360 flushCacheRange(iopt, seL4_IOPageTableBits);
361
362 *vtd_pte_slot = vtd_pte_new(pptr_to_paddr(iopt), 1, 1);
363 flushCacheRange(vtd_pte_slot, VTD_PTE_SIZE_BITS);
364 } else {
365 iopt = (vtd_pte_t *)paddr_to_pptr(vtd_pte_ptr_get_addr(vtd_pte_slot));
366 }
367 }
368 }
369 }
370
vtd_create_context_table(uint8_t bus,acpi_rmrr_list_t * rmrr_list)371 BOOT_CODE static void vtd_create_context_table(uint8_t bus, acpi_rmrr_list_t *rmrr_list)
372 {
373 word_t i;
374 vtd_cte_t *vtd_context_table = (vtd_cte_t *) it_alloc_paging();
375
376 printf("IOMMU: Create VTD context table for PCI bus 0x%x (pptr=%p)\n", bus, vtd_context_table);
377 flushCacheRange(vtd_context_table, VTD_CT_SIZE_BITS);
378
379 x86KSvtdRootTable[bus] =
380 vtd_rte_new(
381 pptr_to_paddr(vtd_context_table), /* Context Table Pointer */
382 true /* Present */
383 );
384 /* map in any RMRR regions */
385 for (i = 0; i < rmrr_list->num; i++) {
386 if (vtd_get_root_index(rmrr_list->entries[i].device) == bus) {
387 uint32_t addr;
388 for (addr = rmrr_list->entries[i].base; addr < rmrr_list->entries[i].limit; addr += BIT(seL4_PageBits)) {
389 vtd_map_reserved_page(vtd_context_table, vtd_get_context_index(rmrr_list->entries[i].device), addr);
390 }
391 }
392 }
393 }
394
vtd_enable(cpu_id_t cpu_id)395 BOOT_CODE static bool_t vtd_enable(cpu_id_t cpu_id)
396 {
397 drhu_id_t i;
398 uint32_t status = 0;
399
400 for (i = 0; i < x86KSnumDrhu; i++) {
401 pptr_t pa = (pptr_t)pptr_to_paddr((void *)x86KSvtdRootTable);
402
403 /* Set the Root Table Register */
404 vtd_write64(i, RTADDR_REG, pa);
405 status = vtd_read32(i, GSTS_REG);
406 status |= BIT(SRTP);
407 /* Set SRTP bit in GCMD_REG */
408 vtd_write32(i, GCMD_REG, status);
409
410 /* Wait for SRTP operation to complete by polling
411 * RTPS bit from GSTS_REG
412 */
413 while (!((vtd_read32(i, GSTS_REG) >> RTPS) & 1));
414 }
415
416 /* Globally invalidate context cache of all IOMMUs */
417 invalidate_context_cache();
418
419 /* Globally invalidate IOTLB of all IOMMUs */
420 invalidate_iotlb();
421
422 for (i = 0; i < x86KSnumDrhu; i++) {
423 uint32_t data, addr;
424
425 data = int_iommu;
426 addr = apic_get_base_paddr();
427 if (!addr) {
428 return false;
429 }
430 addr |= (cpu_id << 12);
431
432 vtd_process_faults(i);
433 vtd_write32(i, FECTL_REG, 0);
434 vtd_write32(i, FEDATA_REG, data);
435 vtd_write32(i, FEADDR_REG, addr);
436 vtd_write32(i, FEUADDR_REG, 0);
437 status = vtd_read32(i, GSTS_REG);
438 status |= BIT(WBF);
439 /*flush IOMMU write buffer */
440 vtd_write32(i, GCMD_REG, status);
441 while (((vtd_read32(i, GSTS_REG) >> WBFS) & 1));
442
443 printf("IOMMU 0x%x: enabling...", i);
444
445 status = vtd_read32(i, GSTS_REG);
446 status |= BIT(TE);
447 /* Enable the DMA translation by setting TE bit in GCMD_REG */
448 vtd_write32(i, GCMD_REG, status);
449
450 /* Wait for Translation Enable operation to complete by polling
451 * TES bit from GSTS_REG
452 */
453 while (!((vtd_read32(i, GSTS_REG) >> TES) & 1));
454
455 printf(" enabled\n");
456 }
457 return true;
458 }
459
vtd_init_num_iopts(uint32_t num_drhu)460 BOOT_CODE bool_t vtd_init_num_iopts(uint32_t num_drhu)
461 {
462 x86KSnumDrhu = num_drhu;
463 x86KSFirstValidIODomain = 0;
464
465 if (x86KSnumDrhu == 0) {
466 return true;
467 }
468
469 uint32_t aw_bitmask = 0xffffffff;
470 /* Start the number of domains at 16 bits */
471 uint32_t num_domain_id_bits = 16;
472 for (drhu_id_t i = 0; i < x86KSnumDrhu; i++) {
473 uint32_t bits_supported = 4 + 2 * (vtd_read32(i, CAP_REG) & 7);
474 aw_bitmask &= vtd_read32(i, CAP_REG) >> SAGAW;
475 printf("IOMMU 0x%x: %d-bit domain IDs supported\n", i, bits_supported);
476 if (bits_supported < num_domain_id_bits) {
477 num_domain_id_bits = bits_supported;
478 }
479 }
480
481 x86KSnumIODomainIDBits = num_domain_id_bits;
482 UNUSED uint32_t max_num_iopt_levels;
483 if (aw_bitmask & SAGAW_6_LEVEL) {
484 max_num_iopt_levels = 6;
485 } else if (aw_bitmask & SAGAW_5_LEVEL) {
486 max_num_iopt_levels = 5;
487 } else if (aw_bitmask & SAGAW_4_LEVEL) {
488 max_num_iopt_levels = 4;
489 } else if (aw_bitmask & SAGAW_3_LEVEL) {
490 max_num_iopt_levels = 3;
491 } else if (aw_bitmask & SAGAW_2_LEVEL) {
492 max_num_iopt_levels = 2;
493 } else {
494 printf("IOMMU: mismatch of supported number of PT levels between IOMMUs\n");
495 return false;
496 }
497
498 if (aw_bitmask & SAGAW_3_LEVEL) {
499 x86KSnumIOPTLevels = 3;
500 } else if (aw_bitmask & SAGAW_4_LEVEL) {
501 x86KSnumIOPTLevels = 4;
502 } else if (aw_bitmask & SAGAW_5_LEVEL) {
503 x86KSnumIOPTLevels = 5;
504 } else if (aw_bitmask & SAGAW_6_LEVEL) {
505 x86KSnumIOPTLevels = 6;
506 } else if (aw_bitmask & SAGAW_2_LEVEL) {
507 x86KSnumIOPTLevels = 2;
508 } else {
509 printf("IOMMU: mismatch of supported number of PT levels between IOMMUs\n");
510 return false;
511 }
512
513 printf("IOMMU: Using %d page-table levels (max. supported: %d)\n", x86KSnumIOPTLevels, max_num_iopt_levels);
514 return true;
515 }
516
517
vtd_init(cpu_id_t cpu_id,acpi_rmrr_list_t * rmrr_list)518 BOOT_CODE bool_t vtd_init(cpu_id_t cpu_id, acpi_rmrr_list_t *rmrr_list)
519 {
520 if (x86KSnumDrhu == 0) {
521 return true;
522 }
523
524 x86KSvtdRootTable = (vtd_rte_t *) it_alloc_paging();
525 for (uint32_t bus = 0; bus < N_VTD_CONTEXTS; bus++) {
526 vtd_create_context_table(bus, rmrr_list);
527 }
528
529 flushCacheRange(x86KSvtdRootTable, VTD_RT_SIZE_BITS);
530
531 if (!vtd_enable(cpu_id)) {
532 return false;
533 }
534 return true;
535 }
536
537 #endif /* CONFIG_IOMMU */
538