1 /*
2  * Copyright (c) 2006, Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; If not, see <http://www.gnu.org/licenses/>.
15  *
16  * Copyright (C) Allen Kay <allen.m.kay@intel.com>
17  * Copyright (C) Xiaohui Xin <xiaohui.xin@intel.com>
18  */
19 
20 
21 #include <xen/sched.h>
22 #include <xen/iommu.h>
23 #include <xen/time.h>
24 #include <xen/pci.h>
25 #include <xen/pci_regs.h>
26 #include "iommu.h"
27 #include "dmar.h"
28 #include "vtd.h"
29 #include "extern.h"
30 #include "../ats.h"
31 
32 #define VTD_QI_TIMEOUT	1
33 
34 static int __must_check invalidate_sync(struct iommu *iommu);
35 
print_qi_regs(struct iommu * iommu)36 static void print_qi_regs(struct iommu *iommu)
37 {
38     u64 val;
39 
40     val = dmar_readq(iommu->reg, DMAR_IQA_REG);
41     printk("DMAR_IQA_REG = %"PRIx64"\n", val);
42 
43     val = dmar_readq(iommu->reg, DMAR_IQH_REG);
44     printk("DMAR_IQH_REG = %"PRIx64"\n", val);
45 
46     val = dmar_readq(iommu->reg, DMAR_IQT_REG);
47     printk("DMAR_IQT_REG = %"PRIx64"\n", val);
48 }
49 
qinval_next_index(struct iommu * iommu)50 static unsigned int qinval_next_index(struct iommu *iommu)
51 {
52     u64 tail;
53 
54     tail = dmar_readq(iommu->reg, DMAR_IQT_REG);
55     tail >>= QINVAL_INDEX_SHIFT;
56 
57     /* (tail+1 == head) indicates a full queue, wait for HW */
58     while ( ( tail + 1 ) % QINVAL_ENTRY_NR ==
59             ( dmar_readq(iommu->reg, DMAR_IQH_REG) >> QINVAL_INDEX_SHIFT ) )
60         cpu_relax();
61 
62     return tail;
63 }
64 
qinval_update_qtail(struct iommu * iommu,unsigned int index)65 static void qinval_update_qtail(struct iommu *iommu, unsigned int index)
66 {
67     u64 val;
68 
69     /* Need hold register lock when update tail */
70     ASSERT( spin_is_locked(&iommu->register_lock) );
71     val = (index + 1) % QINVAL_ENTRY_NR;
72     dmar_writeq(iommu->reg, DMAR_IQT_REG, (val << QINVAL_INDEX_SHIFT));
73 }
74 
queue_invalidate_context_sync(struct iommu * iommu,u16 did,u16 source_id,u8 function_mask,u8 granu)75 static int __must_check queue_invalidate_context_sync(struct iommu *iommu,
76                                                       u16 did, u16 source_id,
77                                                       u8 function_mask,
78                                                       u8 granu)
79 {
80     unsigned long flags;
81     unsigned int index;
82     u64 entry_base;
83     struct qinval_entry *qinval_entry, *qinval_entries;
84 
85     spin_lock_irqsave(&iommu->register_lock, flags);
86     index = qinval_next_index(iommu);
87     entry_base = iommu_qi_ctrl(iommu)->qinval_maddr +
88                  ((index >> QINVAL_ENTRY_ORDER) << PAGE_SHIFT);
89     qinval_entries = map_vtd_domain_page(entry_base);
90     qinval_entry = &qinval_entries[index % (1 << QINVAL_ENTRY_ORDER)];
91 
92     qinval_entry->q.cc_inv_dsc.lo.type = TYPE_INVAL_CONTEXT;
93     qinval_entry->q.cc_inv_dsc.lo.granu = granu;
94     qinval_entry->q.cc_inv_dsc.lo.res_1 = 0;
95     qinval_entry->q.cc_inv_dsc.lo.did = did;
96     qinval_entry->q.cc_inv_dsc.lo.sid = source_id;
97     qinval_entry->q.cc_inv_dsc.lo.fm = function_mask;
98     qinval_entry->q.cc_inv_dsc.lo.res_2 = 0;
99     qinval_entry->q.cc_inv_dsc.hi.res = 0;
100 
101     qinval_update_qtail(iommu, index);
102     spin_unlock_irqrestore(&iommu->register_lock, flags);
103 
104     unmap_vtd_domain_page(qinval_entries);
105 
106     return invalidate_sync(iommu);
107 }
108 
queue_invalidate_iotlb_sync(struct iommu * iommu,u8 granu,u8 dr,u8 dw,u16 did,u8 am,u8 ih,u64 addr)109 static int __must_check queue_invalidate_iotlb_sync(struct iommu *iommu,
110                                                     u8 granu, u8 dr, u8 dw,
111                                                     u16 did, u8 am, u8 ih,
112                                                     u64 addr)
113 {
114     unsigned long flags;
115     unsigned int index;
116     u64 entry_base;
117     struct qinval_entry *qinval_entry, *qinval_entries;
118 
119     spin_lock_irqsave(&iommu->register_lock, flags);
120     index = qinval_next_index(iommu);
121     entry_base = iommu_qi_ctrl(iommu)->qinval_maddr +
122                  ((index >> QINVAL_ENTRY_ORDER) << PAGE_SHIFT);
123     qinval_entries = map_vtd_domain_page(entry_base);
124     qinval_entry = &qinval_entries[index % (1 << QINVAL_ENTRY_ORDER)];
125 
126     qinval_entry->q.iotlb_inv_dsc.lo.type = TYPE_INVAL_IOTLB;
127     qinval_entry->q.iotlb_inv_dsc.lo.granu = granu;
128     qinval_entry->q.iotlb_inv_dsc.lo.dr = dr;
129     qinval_entry->q.iotlb_inv_dsc.lo.dw = dw;
130     qinval_entry->q.iotlb_inv_dsc.lo.res_1 = 0;
131     qinval_entry->q.iotlb_inv_dsc.lo.did = did;
132     qinval_entry->q.iotlb_inv_dsc.lo.res_2 = 0;
133 
134     qinval_entry->q.iotlb_inv_dsc.hi.am = am;
135     qinval_entry->q.iotlb_inv_dsc.hi.ih = ih;
136     qinval_entry->q.iotlb_inv_dsc.hi.res_1 = 0;
137     qinval_entry->q.iotlb_inv_dsc.hi.addr = addr >> PAGE_SHIFT_4K;
138 
139     unmap_vtd_domain_page(qinval_entries);
140     qinval_update_qtail(iommu, index);
141     spin_unlock_irqrestore(&iommu->register_lock, flags);
142 
143     return invalidate_sync(iommu);
144 }
145 
queue_invalidate_wait(struct iommu * iommu,u8 iflag,u8 sw,u8 fn,bool_t flush_dev_iotlb)146 static int __must_check queue_invalidate_wait(struct iommu *iommu,
147                                               u8 iflag, u8 sw, u8 fn,
148                                               bool_t flush_dev_iotlb)
149 {
150     volatile u32 poll_slot = QINVAL_STAT_INIT;
151     unsigned int index;
152     unsigned long flags;
153     u64 entry_base;
154     struct qinval_entry *qinval_entry, *qinval_entries;
155 
156     spin_lock_irqsave(&iommu->register_lock, flags);
157     index = qinval_next_index(iommu);
158     entry_base = iommu_qi_ctrl(iommu)->qinval_maddr +
159                  ((index >> QINVAL_ENTRY_ORDER) << PAGE_SHIFT);
160     qinval_entries = map_vtd_domain_page(entry_base);
161     qinval_entry = &qinval_entries[index % (1 << QINVAL_ENTRY_ORDER)];
162 
163     qinval_entry->q.inv_wait_dsc.lo.type = TYPE_INVAL_WAIT;
164     qinval_entry->q.inv_wait_dsc.lo.iflag = iflag;
165     qinval_entry->q.inv_wait_dsc.lo.sw = sw;
166     qinval_entry->q.inv_wait_dsc.lo.fn = fn;
167     qinval_entry->q.inv_wait_dsc.lo.res_1 = 0;
168     qinval_entry->q.inv_wait_dsc.lo.sdata = QINVAL_STAT_DONE;
169     qinval_entry->q.inv_wait_dsc.hi.res_1 = 0;
170     qinval_entry->q.inv_wait_dsc.hi.saddr = virt_to_maddr(&poll_slot) >> 2;
171 
172     unmap_vtd_domain_page(qinval_entries);
173     qinval_update_qtail(iommu, index);
174     spin_unlock_irqrestore(&iommu->register_lock, flags);
175 
176     /* Now we don't support interrupt method */
177     if ( sw )
178     {
179         s_time_t timeout;
180 
181         /* In case all wait descriptor writes to same addr with same data */
182         timeout = NOW() + MILLISECS(flush_dev_iotlb ?
183                                     iommu_dev_iotlb_timeout : VTD_QI_TIMEOUT);
184 
185         while ( poll_slot != QINVAL_STAT_DONE )
186         {
187             if ( NOW() > timeout )
188             {
189                 print_qi_regs(iommu);
190                 printk(XENLOG_WARNING VTDPREFIX
191                        " Queue invalidate wait descriptor timed out\n");
192                 return -ETIMEDOUT;
193             }
194             cpu_relax();
195         }
196         return 0;
197     }
198 
199     return -EOPNOTSUPP;
200 }
201 
invalidate_sync(struct iommu * iommu)202 static int __must_check invalidate_sync(struct iommu *iommu)
203 {
204     struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
205 
206     ASSERT(qi_ctrl->qinval_maddr);
207 
208     return queue_invalidate_wait(iommu, 0, 1, 1, 0);
209 }
210 
dev_invalidate_sync(struct iommu * iommu,struct pci_dev * pdev,u16 did)211 static int __must_check dev_invalidate_sync(struct iommu *iommu,
212                                             struct pci_dev *pdev, u16 did)
213 {
214     struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
215     int rc;
216 
217     ASSERT(qi_ctrl->qinval_maddr);
218     rc = queue_invalidate_wait(iommu, 0, 1, 1, 1);
219     if ( rc == -ETIMEDOUT )
220     {
221         struct domain *d = NULL;
222 
223         if ( test_bit(did, iommu->domid_bitmap) )
224             d = rcu_lock_domain_by_id(iommu->domid_map[did]);
225 
226         /*
227          * In case the domain has been freed or the IOMMU domid bitmap is
228          * not valid, the device no longer belongs to this domain.
229          */
230         if ( d == NULL )
231             return rc;
232 
233         iommu_dev_iotlb_flush_timeout(d, pdev);
234         rcu_unlock_domain(d);
235     }
236 
237     return rc;
238 }
239 
qinval_device_iotlb_sync(struct iommu * iommu,struct pci_dev * pdev,u16 did,u16 size,u64 addr)240 int qinval_device_iotlb_sync(struct iommu *iommu, struct pci_dev *pdev,
241                              u16 did, u16 size, u64 addr)
242 {
243     unsigned long flags;
244     unsigned int index;
245     u64 entry_base;
246     struct qinval_entry *qinval_entry, *qinval_entries;
247 
248     ASSERT(pdev);
249     spin_lock_irqsave(&iommu->register_lock, flags);
250     index = qinval_next_index(iommu);
251     entry_base = iommu_qi_ctrl(iommu)->qinval_maddr +
252                  ((index >> QINVAL_ENTRY_ORDER) << PAGE_SHIFT);
253     qinval_entries = map_vtd_domain_page(entry_base);
254     qinval_entry = &qinval_entries[index % (1 << QINVAL_ENTRY_ORDER)];
255 
256     qinval_entry->q.dev_iotlb_inv_dsc.lo.type = TYPE_INVAL_DEVICE_IOTLB;
257     qinval_entry->q.dev_iotlb_inv_dsc.lo.res_1 = 0;
258     qinval_entry->q.dev_iotlb_inv_dsc.lo.max_invs_pend = pdev->ats.queue_depth;
259     qinval_entry->q.dev_iotlb_inv_dsc.lo.res_2 = 0;
260     qinval_entry->q.dev_iotlb_inv_dsc.lo.sid = PCI_BDF2(pdev->bus, pdev->devfn);
261     qinval_entry->q.dev_iotlb_inv_dsc.lo.res_3 = 0;
262 
263     qinval_entry->q.dev_iotlb_inv_dsc.hi.size = size;
264     qinval_entry->q.dev_iotlb_inv_dsc.hi.res_1 = 0;
265     qinval_entry->q.dev_iotlb_inv_dsc.hi.addr = addr >> PAGE_SHIFT_4K;
266 
267     unmap_vtd_domain_page(qinval_entries);
268     qinval_update_qtail(iommu, index);
269     spin_unlock_irqrestore(&iommu->register_lock, flags);
270 
271     return dev_invalidate_sync(iommu, pdev, did);
272 }
273 
queue_invalidate_iec_sync(struct iommu * iommu,u8 granu,u8 im,u16 iidx)274 static int __must_check queue_invalidate_iec_sync(struct iommu *iommu,
275                                                   u8 granu, u8 im, u16 iidx)
276 {
277     unsigned long flags;
278     unsigned int index;
279     u64 entry_base;
280     struct qinval_entry *qinval_entry, *qinval_entries;
281     int ret;
282 
283     spin_lock_irqsave(&iommu->register_lock, flags);
284     index = qinval_next_index(iommu);
285     entry_base = iommu_qi_ctrl(iommu)->qinval_maddr +
286                  ((index >> QINVAL_ENTRY_ORDER) << PAGE_SHIFT);
287     qinval_entries = map_vtd_domain_page(entry_base);
288     qinval_entry = &qinval_entries[index % (1 << QINVAL_ENTRY_ORDER)];
289 
290     qinval_entry->q.iec_inv_dsc.lo.type = TYPE_INVAL_IEC;
291     qinval_entry->q.iec_inv_dsc.lo.granu = granu;
292     qinval_entry->q.iec_inv_dsc.lo.res_1 = 0;
293     qinval_entry->q.iec_inv_dsc.lo.im = im;
294     qinval_entry->q.iec_inv_dsc.lo.iidx = iidx;
295     qinval_entry->q.iec_inv_dsc.lo.res_2 = 0;
296     qinval_entry->q.iec_inv_dsc.hi.res = 0;
297 
298     unmap_vtd_domain_page(qinval_entries);
299     qinval_update_qtail(iommu, index);
300     spin_unlock_irqrestore(&iommu->register_lock, flags);
301 
302     ret = invalidate_sync(iommu);
303 
304     /*
305      * reading vt-d architecture register will ensure
306      * draining happens in implementation independent way.
307      */
308     (void)dmar_readq(iommu->reg, DMAR_CAP_REG);
309 
310     return ret;
311 }
312 
iommu_flush_iec_global(struct iommu * iommu)313 int iommu_flush_iec_global(struct iommu *iommu)
314 {
315     return queue_invalidate_iec_sync(iommu, IEC_GLOBAL_INVL, 0, 0);
316 }
317 
iommu_flush_iec_index(struct iommu * iommu,u8 im,u16 iidx)318 int iommu_flush_iec_index(struct iommu *iommu, u8 im, u16 iidx)
319 {
320     return queue_invalidate_iec_sync(iommu, IEC_INDEX_INVL, im, iidx);
321 }
322 
flush_context_qi(void * _iommu,u16 did,u16 sid,u8 fm,u64 type,bool_t flush_non_present_entry)323 static int __must_check flush_context_qi(void *_iommu, u16 did,
324                                          u16 sid, u8 fm, u64 type,
325                                          bool_t flush_non_present_entry)
326 {
327     struct iommu *iommu = (struct iommu *)_iommu;
328     struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
329 
330     ASSERT(qi_ctrl->qinval_maddr);
331 
332     /*
333      * In the non-present entry flush case, if hardware doesn't cache
334      * non-present entry we do nothing and if hardware cache non-present
335      * entry, we flush entries of domain 0 (the domain id is used to cache
336      * any non-present entries)
337      */
338     if ( flush_non_present_entry )
339     {
340         if ( !cap_caching_mode(iommu->cap) )
341             return 1;
342         else
343             did = 0;
344     }
345 
346     return queue_invalidate_context_sync(iommu, did, sid, fm,
347                                          type >> DMA_CCMD_INVL_GRANU_OFFSET);
348 }
349 
flush_iotlb_qi(void * _iommu,u16 did,u64 addr,unsigned int size_order,u64 type,bool_t flush_non_present_entry,bool_t flush_dev_iotlb)350 static int __must_check flush_iotlb_qi(void *_iommu, u16 did, u64 addr,
351                                        unsigned int size_order, u64 type,
352                                        bool_t flush_non_present_entry,
353                                        bool_t flush_dev_iotlb)
354 {
355     u8 dr = 0, dw = 0;
356     int ret = 0, rc;
357     struct iommu *iommu = (struct iommu *)_iommu;
358     struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
359 
360     ASSERT(qi_ctrl->qinval_maddr);
361 
362     /*
363      * In the non-present entry flush case, if hardware doesn't cache
364      * non-present entry we do nothing and if hardware cache non-present
365      * entry, we flush entries of domain 0 (the domain id is used to cache
366      * any non-present entries)
367      */
368     if ( flush_non_present_entry )
369     {
370         if ( !cap_caching_mode(iommu->cap) )
371             return 1;
372         else
373             did = 0;
374     }
375 
376     /* use queued invalidation */
377     if (cap_write_drain(iommu->cap))
378         dw = 1;
379     if (cap_read_drain(iommu->cap))
380         dr = 1;
381     /* Need to conside the ih bit later */
382     rc = queue_invalidate_iotlb_sync(iommu,
383                                      type >> DMA_TLB_FLUSH_GRANU_OFFSET,
384                                      dr, dw, did, size_order, 0, addr);
385     if ( !ret )
386         ret = rc;
387 
388     if ( flush_dev_iotlb )
389     {
390         rc = dev_invalidate_iotlb(iommu, did, addr, size_order, type);
391         if ( !ret )
392             ret = rc;
393     }
394     return ret;
395 }
396 
enable_qinval(struct iommu * iommu)397 int enable_qinval(struct iommu *iommu)
398 {
399     struct acpi_drhd_unit *drhd;
400     struct qi_ctrl *qi_ctrl;
401     struct iommu_flush *flush;
402     u32 sts;
403     unsigned long flags;
404 
405     if ( !ecap_queued_inval(iommu->ecap) || !iommu_qinval )
406         return -ENOENT;
407 
408     qi_ctrl = iommu_qi_ctrl(iommu);
409     flush = iommu_get_flush(iommu);
410 
411     /* Return if already enabled by Xen */
412     sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
413     if ( (sts & DMA_GSTS_QIES) && qi_ctrl->qinval_maddr )
414         return 0;
415 
416     if ( qi_ctrl->qinval_maddr == 0 )
417     {
418         drhd = iommu_to_drhd(iommu);
419         qi_ctrl->qinval_maddr = alloc_pgtable_maddr(drhd, QINVAL_ARCH_PAGE_NR);
420         if ( qi_ctrl->qinval_maddr == 0 )
421         {
422             dprintk(XENLOG_WARNING VTDPREFIX,
423                     "Cannot allocate memory for qi_ctrl->qinval_maddr\n");
424             return -ENOMEM;
425         }
426     }
427 
428     flush->context = flush_context_qi;
429     flush->iotlb = flush_iotlb_qi;
430 
431     /* Setup Invalidation Queue Address(IQA) register with the
432      * address of the page we just allocated.  QS field at
433      * bits[2:0] to indicate size of queue is one 4KB page.
434      * That's 256 entries.  Queued Head (IQH) and Queue Tail (IQT)
435      * registers are automatically reset to 0 with write
436      * to IQA register.
437      */
438     qi_ctrl->qinval_maddr |= QINVAL_PAGE_ORDER;
439 
440     spin_lock_irqsave(&iommu->register_lock, flags);
441     dmar_writeq(iommu->reg, DMAR_IQA_REG, qi_ctrl->qinval_maddr);
442 
443     dmar_writeq(iommu->reg, DMAR_IQT_REG, 0);
444 
445     /* enable queued invalidation hardware */
446     sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
447     dmar_writel(iommu->reg, DMAR_GCMD_REG, sts | DMA_GCMD_QIE);
448 
449     /* Make sure hardware complete it */
450     IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, dmar_readl,
451                   (sts & DMA_GSTS_QIES), sts);
452     spin_unlock_irqrestore(&iommu->register_lock, flags);
453 
454     return 0;
455 }
456 
disable_qinval(struct iommu * iommu)457 void disable_qinval(struct iommu *iommu)
458 {
459     u32 sts;
460     unsigned long flags;
461 
462     if ( !ecap_queued_inval(iommu->ecap) )
463         return;
464 
465     spin_lock_irqsave(&iommu->register_lock, flags);
466     sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
467     if ( !(sts & DMA_GSTS_QIES) )
468         goto out;
469 
470     dmar_writel(iommu->reg, DMAR_GCMD_REG, sts & (~DMA_GCMD_QIE));
471 
472     /* Make sure hardware complete it */
473     IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, dmar_readl,
474                   !(sts & DMA_GSTS_QIES), sts);
475 out:
476     spin_unlock_irqrestore(&iommu->register_lock, flags);
477 }
478