1 /******************************************************************************
2  * domain_page.h
3  *
4  * Allow temporary mapping of domain pages.
5  *
6  * Copyright (c) 2003-2006, Keir Fraser <keir@xensource.com>
7  */
8 
9 #include <xen/domain_page.h>
10 #include <xen/efi.h>
11 #include <xen/mm.h>
12 #include <xen/perfc.h>
13 #include <xen/pfn.h>
14 #include <xen/sched.h>
15 #include <xen/vmap.h>
16 #include <asm/current.h>
17 #include <asm/flushtlb.h>
18 #include <asm/hardirq.h>
19 #include <asm/setup.h>
20 
21 static DEFINE_PER_CPU(struct vcpu *, override);
22 
mapcache_current_vcpu(void)23 static inline struct vcpu *mapcache_current_vcpu(void)
24 {
25     /* In the common case we use the mapcache of the running VCPU. */
26     struct vcpu *v = this_cpu(override) ?: current;
27 
28     /*
29      * When current isn't properly set up yet, this is equivalent to
30      * running in an idle vCPU (callers must check for NULL).
31      */
32     if ( !v )
33         return NULL;
34 
35     /*
36      * When using efi runtime page tables, we have the equivalent of the idle
37      * domain's page tables but current may point at another domain's VCPU.
38      * Return NULL as though current is not properly set up yet.
39      */
40     if ( efi_rs_using_pgtables() )
41         return NULL;
42 
43     /*
44      * If guest_table is NULL, and we are running a paravirtualised guest,
45      * then it means we are running on the idle domain's page table and must
46      * therefore use its mapcache.
47      */
48     if ( unlikely(pagetable_is_null(v->arch.guest_table)) && is_pv_vcpu(v) )
49     {
50         /* If we really are idling, perform lazy context switch now. */
51         if ( (v = idle_vcpu[smp_processor_id()]) == current )
52             sync_local_execstate();
53         /* We must now be running on the idle page table. */
54         ASSERT(cr3_pa(read_cr3()) == __pa(idle_pg_table));
55     }
56 
57     return v;
58 }
59 
mapcache_override_current(struct vcpu * v)60 void __init mapcache_override_current(struct vcpu *v)
61 {
62     this_cpu(override) = v;
63 }
64 
65 #define mapcache_l2_entry(e) ((e) >> PAGETABLE_ORDER)
66 #define MAPCACHE_L2_ENTRIES (mapcache_l2_entry(MAPCACHE_ENTRIES - 1) + 1)
67 #define MAPCACHE_L1ENT(idx) \
68     __linear_l1_table[l1_linear_offset(MAPCACHE_VIRT_START + pfn_to_paddr(idx))]
69 
map_domain_page(mfn_t mfn)70 void *map_domain_page(mfn_t mfn)
71 {
72     unsigned long flags;
73     unsigned int idx, i;
74     struct vcpu *v;
75     struct mapcache_domain *dcache;
76     struct mapcache_vcpu *vcache;
77     struct vcpu_maphash_entry *hashent;
78 
79 #ifdef NDEBUG
80     if ( mfn_x(mfn) <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) )
81         return mfn_to_virt(mfn_x(mfn));
82 #endif
83 
84     v = mapcache_current_vcpu();
85     if ( !v || !is_pv_vcpu(v) )
86         return mfn_to_virt(mfn_x(mfn));
87 
88     dcache = &v->domain->arch.pv.mapcache;
89     vcache = &v->arch.pv.mapcache;
90     if ( !dcache->inuse )
91         return mfn_to_virt(mfn_x(mfn));
92 
93     perfc_incr(map_domain_page_count);
94 
95     local_irq_save(flags);
96 
97     hashent = &vcache->hash[MAPHASH_HASHFN(mfn_x(mfn))];
98     if ( hashent->mfn == mfn_x(mfn) )
99     {
100         idx = hashent->idx;
101         ASSERT(idx < dcache->entries);
102         hashent->refcnt++;
103         ASSERT(hashent->refcnt);
104         ASSERT(mfn_eq(l1e_get_mfn(MAPCACHE_L1ENT(idx)), mfn));
105         goto out;
106     }
107 
108     spin_lock(&dcache->lock);
109 
110     /* Has some other CPU caused a wrap? We must flush if so. */
111     if ( unlikely(dcache->epoch != vcache->shadow_epoch) )
112     {
113         vcache->shadow_epoch = dcache->epoch;
114         if ( NEED_FLUSH(this_cpu(tlbflush_time), dcache->tlbflush_timestamp) )
115         {
116             perfc_incr(domain_page_tlb_flush);
117             flush_tlb_local();
118         }
119     }
120 
121     idx = find_next_zero_bit(dcache->inuse, dcache->entries, dcache->cursor);
122     if ( unlikely(idx >= dcache->entries) )
123     {
124         unsigned long accum = 0, prev = 0;
125 
126         /* /First/, clean the garbage map and update the inuse list. */
127         for ( i = 0; i < BITS_TO_LONGS(dcache->entries); i++ )
128         {
129             accum |= prev;
130             dcache->inuse[i] &= ~xchg(&dcache->garbage[i], 0);
131             prev = ~dcache->inuse[i];
132         }
133 
134         if ( accum | (prev & BITMAP_LAST_WORD_MASK(dcache->entries)) )
135             idx = find_first_zero_bit(dcache->inuse, dcache->entries);
136         else
137         {
138             /* Replace a hash entry instead. */
139             i = MAPHASH_HASHFN(mfn_x(mfn));
140             do {
141                 hashent = &vcache->hash[i];
142                 if ( hashent->idx != MAPHASHENT_NOTINUSE && !hashent->refcnt )
143                 {
144                     idx = hashent->idx;
145                     ASSERT(l1e_get_pfn(MAPCACHE_L1ENT(idx)) == hashent->mfn);
146                     l1e_write(&MAPCACHE_L1ENT(idx), l1e_empty());
147                     hashent->idx = MAPHASHENT_NOTINUSE;
148                     hashent->mfn = ~0UL;
149                     break;
150                 }
151                 if ( ++i == MAPHASH_ENTRIES )
152                     i = 0;
153             } while ( i != MAPHASH_HASHFN(mfn_x(mfn)) );
154         }
155         BUG_ON(idx >= dcache->entries);
156 
157         /* /Second/, flush TLBs. */
158         perfc_incr(domain_page_tlb_flush);
159         flush_tlb_local();
160         vcache->shadow_epoch = ++dcache->epoch;
161         dcache->tlbflush_timestamp = tlbflush_current_time();
162     }
163 
164     set_bit(idx, dcache->inuse);
165     dcache->cursor = idx + 1;
166 
167     spin_unlock(&dcache->lock);
168 
169     l1e_write(&MAPCACHE_L1ENT(idx), l1e_from_mfn(mfn, __PAGE_HYPERVISOR_RW));
170 
171  out:
172     local_irq_restore(flags);
173     return (void *)MAPCACHE_VIRT_START + pfn_to_paddr(idx);
174 }
175 
unmap_domain_page(const void * ptr)176 void unmap_domain_page(const void *ptr)
177 {
178     unsigned int idx;
179     struct vcpu *v;
180     struct mapcache_domain *dcache;
181     unsigned long va = (unsigned long)ptr, mfn, flags;
182     struct vcpu_maphash_entry *hashent;
183 
184     if ( !va || va >= DIRECTMAP_VIRT_START )
185         return;
186 
187     ASSERT(va >= MAPCACHE_VIRT_START && va < MAPCACHE_VIRT_END);
188 
189     v = mapcache_current_vcpu();
190     ASSERT(v && is_pv_vcpu(v));
191 
192     dcache = &v->domain->arch.pv.mapcache;
193     ASSERT(dcache->inuse);
194 
195     idx = PFN_DOWN(va - MAPCACHE_VIRT_START);
196     mfn = l1e_get_pfn(MAPCACHE_L1ENT(idx));
197     hashent = &v->arch.pv.mapcache.hash[MAPHASH_HASHFN(mfn)];
198 
199     local_irq_save(flags);
200 
201     if ( hashent->idx == idx )
202     {
203         ASSERT(hashent->mfn == mfn);
204         ASSERT(hashent->refcnt);
205         hashent->refcnt--;
206     }
207     else if ( !hashent->refcnt )
208     {
209         if ( hashent->idx != MAPHASHENT_NOTINUSE )
210         {
211             /* /First/, zap the PTE. */
212             ASSERT(l1e_get_pfn(MAPCACHE_L1ENT(hashent->idx)) ==
213                    hashent->mfn);
214             l1e_write(&MAPCACHE_L1ENT(hashent->idx), l1e_empty());
215             /* /Second/, mark as garbage. */
216             set_bit(hashent->idx, dcache->garbage);
217         }
218 
219         /* Add newly-freed mapping to the maphash. */
220         hashent->mfn = mfn;
221         hashent->idx = idx;
222     }
223     else
224     {
225         /* /First/, zap the PTE. */
226         l1e_write(&MAPCACHE_L1ENT(idx), l1e_empty());
227         /* /Second/, mark as garbage. */
228         set_bit(idx, dcache->garbage);
229     }
230 
231     local_irq_restore(flags);
232 }
233 
mapcache_domain_init(struct domain * d)234 int mapcache_domain_init(struct domain *d)
235 {
236     struct mapcache_domain *dcache = &d->arch.pv.mapcache;
237     unsigned int bitmap_pages;
238 
239     ASSERT(is_pv_domain(d));
240 
241 #ifdef NDEBUG
242     if ( !mem_hotplug && max_page <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) )
243         return 0;
244 #endif
245 
246     BUILD_BUG_ON(MAPCACHE_VIRT_END + PAGE_SIZE * (3 +
247                  2 * PFN_UP(BITS_TO_LONGS(MAPCACHE_ENTRIES) * sizeof(long))) >
248                  MAPCACHE_VIRT_START + (PERDOMAIN_SLOT_MBYTES << 20));
249     bitmap_pages = PFN_UP(BITS_TO_LONGS(MAPCACHE_ENTRIES) * sizeof(long));
250     dcache->inuse = (void *)MAPCACHE_VIRT_END + PAGE_SIZE;
251     dcache->garbage = dcache->inuse +
252                       (bitmap_pages + 1) * PAGE_SIZE / sizeof(long);
253 
254     spin_lock_init(&dcache->lock);
255 
256     return create_perdomain_mapping(d, (unsigned long)dcache->inuse,
257                                     2 * bitmap_pages + 1,
258                                     NIL(l1_pgentry_t *), NULL);
259 }
260 
mapcache_vcpu_init(struct vcpu * v)261 int mapcache_vcpu_init(struct vcpu *v)
262 {
263     struct domain *d = v->domain;
264     struct mapcache_domain *dcache = &d->arch.pv.mapcache;
265     unsigned long i;
266     unsigned int ents = d->max_vcpus * MAPCACHE_VCPU_ENTRIES;
267     unsigned int nr = PFN_UP(BITS_TO_LONGS(ents) * sizeof(long));
268 
269     if ( !is_pv_vcpu(v) || !dcache->inuse )
270         return 0;
271 
272     if ( ents > dcache->entries )
273     {
274         /* Populate page tables. */
275         int rc = create_perdomain_mapping(d, MAPCACHE_VIRT_START, ents,
276                                           NIL(l1_pgentry_t *), NULL);
277 
278         /* Populate bit maps. */
279         if ( !rc )
280             rc = create_perdomain_mapping(d, (unsigned long)dcache->inuse,
281                                           nr, NULL, NIL(struct page_info *));
282         if ( !rc )
283             rc = create_perdomain_mapping(d, (unsigned long)dcache->garbage,
284                                           nr, NULL, NIL(struct page_info *));
285 
286         if ( rc )
287             return rc;
288 
289         dcache->entries = ents;
290     }
291 
292     /* Mark all maphash entries as not in use. */
293     BUILD_BUG_ON(MAPHASHENT_NOTINUSE < MAPCACHE_ENTRIES);
294     for ( i = 0; i < MAPHASH_ENTRIES; i++ )
295     {
296         struct vcpu_maphash_entry *hashent = &v->arch.pv.mapcache.hash[i];
297 
298         hashent->mfn = ~0UL; /* never valid to map */
299         hashent->idx = MAPHASHENT_NOTINUSE;
300     }
301 
302     return 0;
303 }
304 
map_domain_page_global(mfn_t mfn)305 void *map_domain_page_global(mfn_t mfn)
306 {
307     ASSERT(!in_irq() &&
308            ((system_state >= SYS_STATE_boot &&
309              system_state < SYS_STATE_active) ||
310             local_irq_is_enabled()));
311 
312 #ifdef NDEBUG
313     if ( mfn_x(mfn) <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) )
314         return mfn_to_virt(mfn_x(mfn));
315 #endif
316 
317     return vmap(&mfn, 1);
318 }
319 
unmap_domain_page_global(const void * ptr)320 void unmap_domain_page_global(const void *ptr)
321 {
322     unsigned long va = (unsigned long)ptr;
323 
324     if ( va >= DIRECTMAP_VIRT_START )
325         return;
326 
327     ASSERT(va >= VMAP_VIRT_START && va < VMAP_VIRT_END);
328 
329     vunmap(ptr);
330 }
331 
332 /* Translate a map-domain-page'd address to the underlying MFN */
domain_page_map_to_mfn(const void * ptr)333 mfn_t domain_page_map_to_mfn(const void *ptr)
334 {
335     unsigned long va = (unsigned long)ptr;
336     const l1_pgentry_t *pl1e;
337 
338     if ( va >= DIRECTMAP_VIRT_START )
339         return _mfn(virt_to_mfn(ptr));
340 
341     if ( va >= VMAP_VIRT_START && va < VMAP_VIRT_END )
342     {
343         pl1e = virt_to_xen_l1e(va);
344         BUG_ON(!pl1e);
345     }
346     else
347     {
348         ASSERT(va >= MAPCACHE_VIRT_START && va < MAPCACHE_VIRT_END);
349         pl1e = &__linear_l1_table[l1_linear_offset(va)];
350     }
351 
352     return l1e_get_mfn(*pl1e);
353 }
354