1 /*
2  * Copyright 2014, General Dynamics C4 Systems
3  *
4  * SPDX-License-Identifier: GPL-2.0-only
5  */
6 
7 #include <config.h>
8 
9 #ifdef CONFIG_IOMMU
10 
11 #include <kernel/boot.h>
12 #include <machine.h>
13 #include <machine/io.h>
14 #include <arch/kernel/apic.h>
15 #include <arch/model/statedata.h>
16 #include <linker.h>
17 #include <plat/machine/acpi.h>
18 #include <plat/machine/intel-vtd.h>
19 #include <util.h>
20 
21 #define RTADDR_REG  0x20
22 #define GCMD_REG    0x18
23 #define GSTS_REG    0x1C
24 #define CCMD_REG    0x28
25 #define ECAP_REG    0x10
26 #define IOTLB_REG   0x08
27 #define FSTS_REG    0x34
28 #define FECTL_REG   0x38
29 #define FEDATA_REG  0x3C
30 #define FEADDR_REG  0x40
31 #define FEUADDR_REG 0x44
32 #define CAP_REG     0x08
33 
34 /* Bit Positions within Registers */
35 #define SRTP        30  /* Set Root Table Pointer */
36 #define RTPS        30  /* Root Table Pointer Status */
37 #define TE          31  /* Translation Enable */
38 #define TES         31  /* Translation Enable Status */
39 
40 /* ICC is 63rd bit in CCMD_REG, but since we will be
41  * accessing this register as 4 byte word, ICC becomes
42  * 31st bit in the upper 32bit word.
43  */
44 #define ICC         (31 + 32)  /* Invalidate Context Cache */
45 #define CIRG        (29 + 32) /* Context Invalidation Request Granularity */
46 #define CAIG        27  /* Context Actual Invalidation Granularity */
47 #define CAIG_MASK   0x3
48 #define IVO_MASK    0x3FF
49 #define IVT         31  /* Invalidate IOTLB */
50 #define IIRG        28  /* IOTLB Invalidation Request Granularity */
51 #define IAIG        25  /* IOTLB Actual Invalidation Granularity */
52 #define IAIG_MASK   0x7
53 #define IP          30  /* Interrupt Pending */
54 #define FRI         0x8 /* Fault Recording Index */
55 #define FRI_MASK    0xFF
56 #define FRO         24
57 #define FRO_MASK    0xFF
58 #define FI          12
59 #define SID_MASK    0xFFFF
60 #define SID_BUS(a)  (MASK(8) & (a >> 8))
61 #define SID_DEV(a)  (MASK(5) & (a >> 3))
62 #define SID_FUNC(a) (MASK(3) & a)
63 #define FR_MASK     0xFF
64 #define FAULT_TYPE  30
65 #define FAULT       31
66 #define NFR         8   /* high word of CAP_REG */
67 #define NFR_MASK    0xff
68 #define PPF         1
69 #define PPF_MASK    1
70 #define PRESENT     1
71 #define WBF         27
72 #define WBFS        27
73 #define DID         8
74 #define RW          0x3
75 
76 #define SAGAW         8
77 #define SAGAW_2_LEVEL 0x01
78 #define SAGAW_3_LEVEL 0x02
79 #define SAGAW_4_LEVEL 0x04
80 #define SAGAW_5_LEVEL 0x08
81 #define SAGAW_6_LEVEL 0x10
82 
83 #define CONTEXT_GLOBAL_INVALIDATE 0x1
84 #define IOTLB_GLOBAL_INVALIDATE   0x1
85 
86 #define DMA_TLB_READ_DRAIN  BIT(17)
87 #define DMA_TLB_WRITE_DRAIN BIT(16)
88 
89 #define N_VTD_CONTEXTS 256
90 
91 typedef uint32_t drhu_id_t;
92 
vtd_read32(drhu_id_t drhu_id,uint32_t offset)93 static inline uint32_t vtd_read32(drhu_id_t drhu_id, uint32_t offset)
94 {
95     return *(volatile uint32_t *)(PPTR_DRHU_START + (drhu_id << PAGE_BITS) + offset);
96 }
97 
vtd_write32(drhu_id_t drhu_id,uint32_t offset,uint32_t value)98 static inline void vtd_write32(drhu_id_t drhu_id, uint32_t offset, uint32_t value)
99 {
100     *(volatile uint32_t *)(PPTR_DRHU_START + (drhu_id << PAGE_BITS) + offset) = value;
101 }
102 
103 
vtd_read64(drhu_id_t drhu_id,uint32_t offset)104 static inline uint64_t vtd_read64(drhu_id_t drhu_id, uint32_t offset)
105 {
106     return *(volatile uint64_t *)(PPTR_DRHU_START + (drhu_id << PAGE_BITS) + offset);
107 }
108 
vtd_write64(drhu_id_t drhu_id,uint32_t offset,uint64_t value)109 static inline void vtd_write64(drhu_id_t drhu_id, uint32_t offset, uint64_t value)
110 {
111     *(volatile uint64_t *)(PPTR_DRHU_START + (drhu_id << PAGE_BITS) + offset) = value;
112 }
113 
get_ivo(drhu_id_t drhu_id)114 static inline uint32_t get_ivo(drhu_id_t drhu_id)
115 {
116     return ((vtd_read32(drhu_id, ECAP_REG) >> 8) & IVO_MASK) * 16;
117 }
118 
get_fro_offset(drhu_id_t drhu_id)119 static uint32_t get_fro_offset(drhu_id_t drhu_id)
120 {
121     uint32_t fro_offset;
122 
123     /* Get bits 31 to 24 from lower Capability Register */
124     fro_offset = (vtd_read32(drhu_id, CAP_REG) >> FRO) & FRO_MASK;
125 
126     /* Get bits 33 to 32 from higher Capability Register */
127     fro_offset |= (vtd_read32(drhu_id, CAP_REG + 4) & 0x3) << 8;
128 
129     return fro_offset << 4;
130 }
131 
invalidate_context_cache(void)132 void invalidate_context_cache(void)
133 {
134     /* FIXME - bugzilla bug 172
135      * 1. Instead of assuming global invalidation, this function should
136      *    accept a parameter to control the granularity of invalidation
137      *    request.
138      * 2. Instead of doing invalidation for all the IOMMUs, it should
139      *    only do it for the IOMMU responsible for the requesting PCI
140      *    device.
141      */
142 
143     drhu_id_t i;
144 
145     for (i = 0; i < x86KSnumDrhu; i++) {
146         /* Wait till ICC bit is clear */
147         uint64_t ccmd = 0;
148         while ((vtd_read64(i, CCMD_REG) >> ICC) & 1);
149 
150         /* Program CIRG for Global Invalidation by setting bit 61 which
151          * will be bit 29 in upper 32 bits of CCMD_REG
152          */
153         ccmd = ((uint64_t)CONTEXT_GLOBAL_INVALIDATE << CIRG) | (1ull << ICC);
154 
155         /* Invalidate Context Cache */
156         vtd_write64(i, CCMD_REG, ccmd);
157 
158         /* Wait for the invalidation to complete */
159         while ((vtd_read64(i, CCMD_REG) >> ICC) & 1);
160     }
161 }
162 
invalidate_iotlb(void)163 void invalidate_iotlb(void)
164 {
165     /* FIXME - bugzilla bug 172
166      * 1. Instead of assuming global invalidation, this function should
167      *    accept a parameter to control the granularity of invalidation
168      *    request.
169      * 2. Instead of doing invalidation for all the IOMMUs, it should
170      *    only do it for the IOMMU responsible for the requesting PCI
171      *    device.
172      */
173 
174     uint8_t   invalidate_command = IOTLB_GLOBAL_INVALIDATE;
175     uint32_t  iotlb_reg_upper;
176     uint32_t  ivo_offset;
177     drhu_id_t i;
178 
179     for (i = 0; i < x86KSnumDrhu; i++) {
180         ivo_offset = get_ivo(i);
181 
182         /* Wait till IVT bit is clear */
183         while ((vtd_read32(i, ivo_offset + IOTLB_REG + 4) >> IVT) & 1);
184 
185         /* Program IIRG for Global Invalidation by setting bit 60 which
186          * will be bit 28 in upper 32 bits of IOTLB_REG
187          */
188         iotlb_reg_upper = invalidate_command << IIRG;
189 
190         /* Invalidate IOTLB */
191         iotlb_reg_upper |= BIT(IVT);
192         iotlb_reg_upper |= DMA_TLB_READ_DRAIN | DMA_TLB_WRITE_DRAIN;
193 
194         vtd_write32(i, ivo_offset + IOTLB_REG, 0);
195         vtd_write32(i, ivo_offset + IOTLB_REG + 4, iotlb_reg_upper);
196 
197         /* Wait for the invalidation to complete */
198         while ((vtd_read32(i, ivo_offset + IOTLB_REG + 4) >> IVT) & 1);
199     }
200 }
201 
vtd_clear_fault(drhu_id_t i,word_t fr_reg)202 static void vtd_clear_fault(drhu_id_t i, word_t fr_reg)
203 {
204     /* Clear the 'F' (Fault) bit to indicate that this fault is processed */
205     vtd_write32(i, fr_reg + 12, BIT(FAULT));
206 }
207 
vtd_process_faults(drhu_id_t i)208 static void vtd_process_faults(drhu_id_t i)
209 {
210     /* Fault Recording register offset relative to the base register */
211     uint32_t fro_offset;
212     uint32_t source_id UNUSED;
213     uint32_t fault_type UNUSED;
214     uint32_t address[2] UNUSED;
215     uint32_t reason UNUSED;
216     uint32_t num_fault_regs;
217     uint32_t fr_reg;
218     uint32_t fault_status;
219     uint32_t fault_record_index;
220 
221     /* Retrieves FRO by looking into Capability register bits 33 to 24 */
222     fro_offset = get_fro_offset(i);
223     fault_status = (vtd_read32(i, FSTS_REG) >> PPF) & PPF_MASK;
224 
225     if (fault_status) {
226         num_fault_regs = ((vtd_read32(i, CAP_REG + 4) >> NFR) & NFR_MASK) + 1;
227         fault_record_index = (vtd_read32(i, FSTS_REG) >> FRI) & FRI_MASK;
228         fr_reg = fro_offset + 16 * fault_record_index;
229 
230         /* Traverse the fault register ring buffer */
231         do {
232             source_id = vtd_read32(i, fr_reg + 8) & SID_MASK;
233 
234             fault_type = (vtd_read32(i, fr_reg + 12) >> FAULT_TYPE) & 1;
235             address[1] = vtd_read32(i, fr_reg + 4);
236             address[0] = vtd_read32(i, fr_reg);
237             reason = vtd_read32(i, fr_reg + 12) & FR_MASK;
238 
239             printf("IOMMU: DMA %s page fault ", fault_type ? "read" : "write");
240             printf("from 0x%x (bus: 0x%lx/dev: 0x%lx/fun: 0x%lx) ", source_id,
241                    SID_BUS(source_id), SID_DEV(source_id), SID_FUNC(source_id));
242             printf("on address 0x%x:%x ", address[1], address[0]);
243             printf("with reason code 0x%x\n", reason);
244 
245             vtd_clear_fault(i, fr_reg);
246 
247             fault_record_index = (fault_record_index + 1) % num_fault_regs;
248             fr_reg = fro_offset + 16 * fault_record_index;
249         } while ((vtd_read32(i, fr_reg + 12) >> FAULT) & 1);
250 
251         /* Check for Primary Fault Overflow */
252         if (vtd_read32(i, FSTS_REG) & 1) {
253             /* Clear PFO bit, so new faults will be generated again ! */
254             vtd_write32(i, FSTS_REG, 1);
255         }
256     }
257 }
258 
vtd_handle_fault(void)259 void vtd_handle_fault(void)
260 {
261     drhu_id_t i;
262 
263     for (i = 0; i < x86KSnumDrhu; i++) {
264         vtd_process_faults(i);
265     }
266 }
267 
vtd_get_n_paging(acpi_rmrr_list_t * rmrr_list)268 BOOT_CODE word_t vtd_get_n_paging(acpi_rmrr_list_t *rmrr_list)
269 {
270     if (x86KSnumDrhu == 0) {
271         return 0;
272     }
273     assert(x86KSnumIOPTLevels > 0);
274 
275     word_t size = 1; /* one for the root table */
276     size += N_VTD_CONTEXTS; /* one for each context */
277     size += rmrr_list->num; /* one for each device */
278 
279     if (rmrr_list->num == 0) {
280         return size;
281     }
282 
283     /* filter the identical regions by pci bus id */
284     acpi_rmrr_list_t filtered;
285     filtered.entries[0] = rmrr_list->entries[0];
286     filtered.num = 1;
287 
288     for (word_t i = 1; i < rmrr_list->num; i++) {
289         if (vtd_get_root_index(rmrr_list->entries[i].device) !=
290             vtd_get_root_index(filtered.entries[filtered.num - 1].device) &&
291             rmrr_list->entries[i].base != filtered.entries[filtered.num - 1].base &&
292             rmrr_list->entries[i].limit != filtered.entries[filtered.num - 1].limit) {
293             filtered.entries[filtered.num] = rmrr_list->entries[i];
294             filtered.num++;
295         }
296     }
297 
298     for (word_t i = x86KSnumIOPTLevels - 1; i > 0; i--) {
299         /* If we are still looking up bits beyond the 32bit of physical
300          * that we support then we select entry 0 in the current PT */
301         if ((VTD_PT_INDEX_BITS * i + seL4_PageBits) >= 32) {
302             size++;
303         } else {
304             for (word_t j = 0; j < filtered.num; j++) {
305                 v_region_t region = (v_region_t) {
306                     .start = filtered.entries[j].base,
307                     .end = filtered.entries[j].limit
308                 };
309                 size += get_n_paging(region, 32 - (VTD_PT_INDEX_BITS * i + seL4_PageBits));
310             }
311         }
312     }
313     return size;
314 }
315 
316 /* This function is a simplistic duplication of some of the logic
317  * in iospace.c
318  */
vtd_map_reserved_page(vtd_cte_t * vtd_context_table,int context_index,paddr_t addr)319 BOOT_CODE static void vtd_map_reserved_page(vtd_cte_t *vtd_context_table, int context_index, paddr_t addr)
320 {
321     int i;
322     vtd_pte_t *iopt;
323     vtd_pte_t *vtd_pte_slot;
324     /* first check for the first page table */
325     vtd_cte_t *vtd_context_slot = vtd_context_table + context_index;
326     if (!vtd_cte_ptr_get_present(vtd_context_slot)) {
327         iopt = (vtd_pte_t *) it_alloc_paging();
328         flushCacheRange(iopt, seL4_IOPageTableBits);
329 
330         *vtd_context_slot = vtd_cte_new(
331                                 x86KSFirstValidIODomain,  /* Domain ID                              */
332                                 true,                     /* RMRR Mapping                           */
333                                 x86KSnumIOPTLevels - 2,   /* Address Width                          */
334                                 pptr_to_paddr(iopt),      /* Address Space Root                     */
335                                 0,                        /* Translation Type                       */
336                                 true);                    /* Present                                */
337         x86KSFirstValidIODomain++;
338         flushCacheRange(vtd_context_slot, VTD_CTE_SIZE_BITS);
339     } else {
340         iopt = (vtd_pte_t *)paddr_to_pptr(vtd_cte_ptr_get_asr(vtd_context_slot));
341     }
342     /* now recursively find and map page tables */
343     for (i = x86KSnumIOPTLevels - 1; i >= 0; i--) {
344         uint32_t iopt_index;
345         /* If we are still looking up bits beyond the 32bit of physical
346          * that we support then we select entry 0 in the current PT */
347         if (VTD_PT_INDEX_BITS * i + seL4_PageBits >= 32) {
348             iopt_index = 0;
349         } else {
350             iopt_index = ((addr >> seL4_PageBits) >> (VTD_PT_INDEX_BITS * i)) & MASK(VTD_PT_INDEX_BITS);
351         }
352         vtd_pte_slot = iopt + iopt_index;
353         if (i == 0) {
354             /* Now put the mapping in */
355             *vtd_pte_slot = vtd_pte_new(addr, 1, 1);
356             flushCacheRange(vtd_pte_slot, VTD_PTE_SIZE_BITS);
357         } else {
358             if (!vtd_pte_ptr_get_write(vtd_pte_slot)) {
359                 iopt = (vtd_pte_t *) it_alloc_paging();
360                 flushCacheRange(iopt, seL4_IOPageTableBits);
361 
362                 *vtd_pte_slot = vtd_pte_new(pptr_to_paddr(iopt), 1, 1);
363                 flushCacheRange(vtd_pte_slot, VTD_PTE_SIZE_BITS);
364             } else {
365                 iopt = (vtd_pte_t *)paddr_to_pptr(vtd_pte_ptr_get_addr(vtd_pte_slot));
366             }
367         }
368     }
369 }
370 
vtd_create_context_table(uint8_t bus,acpi_rmrr_list_t * rmrr_list)371 BOOT_CODE static void vtd_create_context_table(uint8_t bus, acpi_rmrr_list_t *rmrr_list)
372 {
373     word_t i;
374     vtd_cte_t *vtd_context_table = (vtd_cte_t *) it_alloc_paging();
375 
376     printf("IOMMU: Create VTD context table for PCI bus 0x%x (pptr=%p)\n", bus, vtd_context_table);
377     flushCacheRange(vtd_context_table, VTD_CT_SIZE_BITS);
378 
379     x86KSvtdRootTable[bus] =
380         vtd_rte_new(
381             pptr_to_paddr(vtd_context_table), /* Context Table Pointer */
382             true                                           /* Present               */
383         );
384     /* map in any RMRR regions */
385     for (i = 0; i < rmrr_list->num; i++) {
386         if (vtd_get_root_index(rmrr_list->entries[i].device) == bus) {
387             uint32_t addr;
388             for (addr = rmrr_list->entries[i].base; addr < rmrr_list->entries[i].limit; addr += BIT(seL4_PageBits)) {
389                 vtd_map_reserved_page(vtd_context_table, vtd_get_context_index(rmrr_list->entries[i].device), addr);
390             }
391         }
392     }
393 }
394 
vtd_enable(cpu_id_t cpu_id)395 BOOT_CODE static bool_t vtd_enable(cpu_id_t cpu_id)
396 {
397     drhu_id_t i;
398     uint32_t status = 0;
399 
400     for (i = 0; i < x86KSnumDrhu; i++) {
401         pptr_t pa = (pptr_t)pptr_to_paddr((void *)x86KSvtdRootTable);
402 
403         /* Set the Root Table Register */
404         vtd_write64(i, RTADDR_REG, pa);
405         status = vtd_read32(i, GSTS_REG);
406         status |= BIT(SRTP);
407         /* Set SRTP bit in GCMD_REG */
408         vtd_write32(i, GCMD_REG, status);
409 
410         /* Wait for SRTP operation to complete by polling
411          * RTPS bit from GSTS_REG
412          */
413         while (!((vtd_read32(i, GSTS_REG) >> RTPS) & 1));
414     }
415 
416     /* Globally invalidate context cache of all IOMMUs */
417     invalidate_context_cache();
418 
419     /* Globally invalidate IOTLB of all IOMMUs */
420     invalidate_iotlb();
421 
422     for (i = 0; i < x86KSnumDrhu; i++) {
423         uint32_t data, addr;
424 
425         data = int_iommu;
426         addr = apic_get_base_paddr();
427         if (!addr) {
428             return false;
429         }
430         addr |= (cpu_id << 12);
431 
432         vtd_process_faults(i);
433         vtd_write32(i, FECTL_REG, 0);
434         vtd_write32(i, FEDATA_REG, data);
435         vtd_write32(i, FEADDR_REG, addr);
436         vtd_write32(i, FEUADDR_REG, 0);
437         status = vtd_read32(i, GSTS_REG);
438         status |= BIT(WBF);
439         /*flush IOMMU write buffer */
440         vtd_write32(i, GCMD_REG, status);
441         while (((vtd_read32(i, GSTS_REG) >> WBFS) & 1));
442 
443         printf("IOMMU 0x%x: enabling...", i);
444 
445         status = vtd_read32(i, GSTS_REG);
446         status |= BIT(TE);
447         /* Enable the DMA translation by setting TE bit in GCMD_REG */
448         vtd_write32(i, GCMD_REG, status);
449 
450         /* Wait for Translation Enable operation to complete by polling
451          * TES bit from GSTS_REG
452          */
453         while (!((vtd_read32(i, GSTS_REG) >> TES) & 1));
454 
455         printf(" enabled\n");
456     }
457     return true;
458 }
459 
vtd_init_num_iopts(uint32_t num_drhu)460 BOOT_CODE bool_t vtd_init_num_iopts(uint32_t num_drhu)
461 {
462     x86KSnumDrhu = num_drhu;
463     x86KSFirstValidIODomain = 0;
464 
465     if (x86KSnumDrhu == 0) {
466         return true;
467     }
468 
469     uint32_t aw_bitmask = 0xffffffff;
470     /* Start the number of domains at 16 bits */
471     uint32_t  num_domain_id_bits = 16;
472     for (drhu_id_t i = 0; i < x86KSnumDrhu; i++) {
473         uint32_t bits_supported = 4 + 2 * (vtd_read32(i, CAP_REG) & 7);
474         aw_bitmask &= vtd_read32(i, CAP_REG) >> SAGAW;
475         printf("IOMMU 0x%x: %d-bit domain IDs supported\n", i, bits_supported);
476         if (bits_supported < num_domain_id_bits) {
477             num_domain_id_bits = bits_supported;
478         }
479     }
480 
481     x86KSnumIODomainIDBits = num_domain_id_bits;
482     UNUSED uint32_t  max_num_iopt_levels;
483     if (aw_bitmask & SAGAW_6_LEVEL) {
484         max_num_iopt_levels = 6;
485     } else if (aw_bitmask & SAGAW_5_LEVEL) {
486         max_num_iopt_levels = 5;
487     } else if (aw_bitmask & SAGAW_4_LEVEL) {
488         max_num_iopt_levels = 4;
489     } else if (aw_bitmask & SAGAW_3_LEVEL) {
490         max_num_iopt_levels = 3;
491     } else if (aw_bitmask & SAGAW_2_LEVEL) {
492         max_num_iopt_levels = 2;
493     } else {
494         printf("IOMMU: mismatch of supported number of PT levels between IOMMUs\n");
495         return false;
496     }
497 
498     if (aw_bitmask & SAGAW_3_LEVEL) {
499         x86KSnumIOPTLevels = 3;
500     } else if (aw_bitmask & SAGAW_4_LEVEL) {
501         x86KSnumIOPTLevels = 4;
502     } else if (aw_bitmask & SAGAW_5_LEVEL) {
503         x86KSnumIOPTLevels = 5;
504     } else if (aw_bitmask & SAGAW_6_LEVEL) {
505         x86KSnumIOPTLevels = 6;
506     } else if (aw_bitmask & SAGAW_2_LEVEL) {
507         x86KSnumIOPTLevels = 2;
508     } else {
509         printf("IOMMU: mismatch of supported number of PT levels between IOMMUs\n");
510         return false;
511     }
512 
513     printf("IOMMU: Using %d page-table levels (max. supported: %d)\n", x86KSnumIOPTLevels, max_num_iopt_levels);
514     return true;
515 }
516 
517 
vtd_init(cpu_id_t cpu_id,acpi_rmrr_list_t * rmrr_list)518 BOOT_CODE bool_t vtd_init(cpu_id_t  cpu_id, acpi_rmrr_list_t *rmrr_list)
519 {
520     if (x86KSnumDrhu == 0) {
521         return true;
522     }
523 
524     x86KSvtdRootTable = (vtd_rte_t *) it_alloc_paging();
525     for (uint32_t bus = 0; bus < N_VTD_CONTEXTS; bus++) {
526         vtd_create_context_table(bus, rmrr_list);
527     }
528 
529     flushCacheRange(x86KSvtdRootTable, VTD_RT_SIZE_BITS);
530 
531     if (!vtd_enable(cpu_id)) {
532         return false;
533     }
534     return true;
535 }
536 
537 #endif /* CONFIG_IOMMU */
538