1 /******************************************************************************
2 * domain_page.h
3 *
4 * Allow temporary mapping of domain pages.
5 *
6 * Copyright (c) 2003-2006, Keir Fraser <keir@xensource.com>
7 */
8
9 #include <xen/domain_page.h>
10 #include <xen/efi.h>
11 #include <xen/mm.h>
12 #include <xen/perfc.h>
13 #include <xen/pfn.h>
14 #include <xen/sched.h>
15 #include <xen/vmap.h>
16 #include <asm/current.h>
17 #include <asm/flushtlb.h>
18 #include <asm/hardirq.h>
19 #include <asm/setup.h>
20
21 static DEFINE_PER_CPU(struct vcpu *, override);
22
mapcache_current_vcpu(void)23 static inline struct vcpu *mapcache_current_vcpu(void)
24 {
25 /* In the common case we use the mapcache of the running VCPU. */
26 struct vcpu *v = this_cpu(override) ?: current;
27
28 /*
29 * When current isn't properly set up yet, this is equivalent to
30 * running in an idle vCPU (callers must check for NULL).
31 */
32 if ( !v )
33 return NULL;
34
35 /*
36 * When using efi runtime page tables, we have the equivalent of the idle
37 * domain's page tables but current may point at another domain's VCPU.
38 * Return NULL as though current is not properly set up yet.
39 */
40 if ( efi_rs_using_pgtables() )
41 return NULL;
42
43 /*
44 * If guest_table is NULL, and we are running a paravirtualised guest,
45 * then it means we are running on the idle domain's page table and must
46 * therefore use its mapcache.
47 */
48 if ( unlikely(pagetable_is_null(v->arch.guest_table)) && is_pv_vcpu(v) )
49 {
50 /* If we really are idling, perform lazy context switch now. */
51 if ( (v = idle_vcpu[smp_processor_id()]) == current )
52 sync_local_execstate();
53 /* We must now be running on the idle page table. */
54 ASSERT(cr3_pa(read_cr3()) == __pa(idle_pg_table));
55 }
56
57 return v;
58 }
59
mapcache_override_current(struct vcpu * v)60 void __init mapcache_override_current(struct vcpu *v)
61 {
62 this_cpu(override) = v;
63 }
64
65 #define mapcache_l2_entry(e) ((e) >> PAGETABLE_ORDER)
66 #define MAPCACHE_L2_ENTRIES (mapcache_l2_entry(MAPCACHE_ENTRIES - 1) + 1)
67 #define MAPCACHE_L1ENT(idx) \
68 __linear_l1_table[l1_linear_offset(MAPCACHE_VIRT_START + pfn_to_paddr(idx))]
69
map_domain_page(mfn_t mfn)70 void *map_domain_page(mfn_t mfn)
71 {
72 unsigned long flags;
73 unsigned int idx, i;
74 struct vcpu *v;
75 struct mapcache_domain *dcache;
76 struct mapcache_vcpu *vcache;
77 struct vcpu_maphash_entry *hashent;
78
79 #ifdef NDEBUG
80 if ( mfn_x(mfn) <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) )
81 return mfn_to_virt(mfn_x(mfn));
82 #endif
83
84 v = mapcache_current_vcpu();
85 if ( !v || !is_pv_vcpu(v) )
86 return mfn_to_virt(mfn_x(mfn));
87
88 dcache = &v->domain->arch.pv.mapcache;
89 vcache = &v->arch.pv.mapcache;
90 if ( !dcache->inuse )
91 return mfn_to_virt(mfn_x(mfn));
92
93 perfc_incr(map_domain_page_count);
94
95 local_irq_save(flags);
96
97 hashent = &vcache->hash[MAPHASH_HASHFN(mfn_x(mfn))];
98 if ( hashent->mfn == mfn_x(mfn) )
99 {
100 idx = hashent->idx;
101 ASSERT(idx < dcache->entries);
102 hashent->refcnt++;
103 ASSERT(hashent->refcnt);
104 ASSERT(mfn_eq(l1e_get_mfn(MAPCACHE_L1ENT(idx)), mfn));
105 goto out;
106 }
107
108 spin_lock(&dcache->lock);
109
110 /* Has some other CPU caused a wrap? We must flush if so. */
111 if ( unlikely(dcache->epoch != vcache->shadow_epoch) )
112 {
113 vcache->shadow_epoch = dcache->epoch;
114 if ( NEED_FLUSH(this_cpu(tlbflush_time), dcache->tlbflush_timestamp) )
115 {
116 perfc_incr(domain_page_tlb_flush);
117 flush_tlb_local();
118 }
119 }
120
121 idx = find_next_zero_bit(dcache->inuse, dcache->entries, dcache->cursor);
122 if ( unlikely(idx >= dcache->entries) )
123 {
124 unsigned long accum = 0, prev = 0;
125
126 /* /First/, clean the garbage map and update the inuse list. */
127 for ( i = 0; i < BITS_TO_LONGS(dcache->entries); i++ )
128 {
129 accum |= prev;
130 dcache->inuse[i] &= ~xchg(&dcache->garbage[i], 0);
131 prev = ~dcache->inuse[i];
132 }
133
134 if ( accum | (prev & BITMAP_LAST_WORD_MASK(dcache->entries)) )
135 idx = find_first_zero_bit(dcache->inuse, dcache->entries);
136 else
137 {
138 /* Replace a hash entry instead. */
139 i = MAPHASH_HASHFN(mfn_x(mfn));
140 do {
141 hashent = &vcache->hash[i];
142 if ( hashent->idx != MAPHASHENT_NOTINUSE && !hashent->refcnt )
143 {
144 idx = hashent->idx;
145 ASSERT(l1e_get_pfn(MAPCACHE_L1ENT(idx)) == hashent->mfn);
146 l1e_write(&MAPCACHE_L1ENT(idx), l1e_empty());
147 hashent->idx = MAPHASHENT_NOTINUSE;
148 hashent->mfn = ~0UL;
149 break;
150 }
151 if ( ++i == MAPHASH_ENTRIES )
152 i = 0;
153 } while ( i != MAPHASH_HASHFN(mfn_x(mfn)) );
154 }
155 BUG_ON(idx >= dcache->entries);
156
157 /* /Second/, flush TLBs. */
158 perfc_incr(domain_page_tlb_flush);
159 flush_tlb_local();
160 vcache->shadow_epoch = ++dcache->epoch;
161 dcache->tlbflush_timestamp = tlbflush_current_time();
162 }
163
164 set_bit(idx, dcache->inuse);
165 dcache->cursor = idx + 1;
166
167 spin_unlock(&dcache->lock);
168
169 l1e_write(&MAPCACHE_L1ENT(idx), l1e_from_mfn(mfn, __PAGE_HYPERVISOR_RW));
170
171 out:
172 local_irq_restore(flags);
173 return (void *)MAPCACHE_VIRT_START + pfn_to_paddr(idx);
174 }
175
unmap_domain_page(const void * ptr)176 void unmap_domain_page(const void *ptr)
177 {
178 unsigned int idx;
179 struct vcpu *v;
180 struct mapcache_domain *dcache;
181 unsigned long va = (unsigned long)ptr, mfn, flags;
182 struct vcpu_maphash_entry *hashent;
183
184 if ( !va || va >= DIRECTMAP_VIRT_START )
185 return;
186
187 ASSERT(va >= MAPCACHE_VIRT_START && va < MAPCACHE_VIRT_END);
188
189 v = mapcache_current_vcpu();
190 ASSERT(v && is_pv_vcpu(v));
191
192 dcache = &v->domain->arch.pv.mapcache;
193 ASSERT(dcache->inuse);
194
195 idx = PFN_DOWN(va - MAPCACHE_VIRT_START);
196 mfn = l1e_get_pfn(MAPCACHE_L1ENT(idx));
197 hashent = &v->arch.pv.mapcache.hash[MAPHASH_HASHFN(mfn)];
198
199 local_irq_save(flags);
200
201 if ( hashent->idx == idx )
202 {
203 ASSERT(hashent->mfn == mfn);
204 ASSERT(hashent->refcnt);
205 hashent->refcnt--;
206 }
207 else if ( !hashent->refcnt )
208 {
209 if ( hashent->idx != MAPHASHENT_NOTINUSE )
210 {
211 /* /First/, zap the PTE. */
212 ASSERT(l1e_get_pfn(MAPCACHE_L1ENT(hashent->idx)) ==
213 hashent->mfn);
214 l1e_write(&MAPCACHE_L1ENT(hashent->idx), l1e_empty());
215 /* /Second/, mark as garbage. */
216 set_bit(hashent->idx, dcache->garbage);
217 }
218
219 /* Add newly-freed mapping to the maphash. */
220 hashent->mfn = mfn;
221 hashent->idx = idx;
222 }
223 else
224 {
225 /* /First/, zap the PTE. */
226 l1e_write(&MAPCACHE_L1ENT(idx), l1e_empty());
227 /* /Second/, mark as garbage. */
228 set_bit(idx, dcache->garbage);
229 }
230
231 local_irq_restore(flags);
232 }
233
mapcache_domain_init(struct domain * d)234 int mapcache_domain_init(struct domain *d)
235 {
236 struct mapcache_domain *dcache = &d->arch.pv.mapcache;
237 unsigned int bitmap_pages;
238
239 ASSERT(is_pv_domain(d));
240
241 #ifdef NDEBUG
242 if ( !mem_hotplug && max_page <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) )
243 return 0;
244 #endif
245
246 BUILD_BUG_ON(MAPCACHE_VIRT_END + PAGE_SIZE * (3 +
247 2 * PFN_UP(BITS_TO_LONGS(MAPCACHE_ENTRIES) * sizeof(long))) >
248 MAPCACHE_VIRT_START + (PERDOMAIN_SLOT_MBYTES << 20));
249 bitmap_pages = PFN_UP(BITS_TO_LONGS(MAPCACHE_ENTRIES) * sizeof(long));
250 dcache->inuse = (void *)MAPCACHE_VIRT_END + PAGE_SIZE;
251 dcache->garbage = dcache->inuse +
252 (bitmap_pages + 1) * PAGE_SIZE / sizeof(long);
253
254 spin_lock_init(&dcache->lock);
255
256 return create_perdomain_mapping(d, (unsigned long)dcache->inuse,
257 2 * bitmap_pages + 1,
258 NIL(l1_pgentry_t *), NULL);
259 }
260
mapcache_vcpu_init(struct vcpu * v)261 int mapcache_vcpu_init(struct vcpu *v)
262 {
263 struct domain *d = v->domain;
264 struct mapcache_domain *dcache = &d->arch.pv.mapcache;
265 unsigned long i;
266 unsigned int ents = d->max_vcpus * MAPCACHE_VCPU_ENTRIES;
267 unsigned int nr = PFN_UP(BITS_TO_LONGS(ents) * sizeof(long));
268
269 if ( !is_pv_vcpu(v) || !dcache->inuse )
270 return 0;
271
272 if ( ents > dcache->entries )
273 {
274 /* Populate page tables. */
275 int rc = create_perdomain_mapping(d, MAPCACHE_VIRT_START, ents,
276 NIL(l1_pgentry_t *), NULL);
277
278 /* Populate bit maps. */
279 if ( !rc )
280 rc = create_perdomain_mapping(d, (unsigned long)dcache->inuse,
281 nr, NULL, NIL(struct page_info *));
282 if ( !rc )
283 rc = create_perdomain_mapping(d, (unsigned long)dcache->garbage,
284 nr, NULL, NIL(struct page_info *));
285
286 if ( rc )
287 return rc;
288
289 dcache->entries = ents;
290 }
291
292 /* Mark all maphash entries as not in use. */
293 BUILD_BUG_ON(MAPHASHENT_NOTINUSE < MAPCACHE_ENTRIES);
294 for ( i = 0; i < MAPHASH_ENTRIES; i++ )
295 {
296 struct vcpu_maphash_entry *hashent = &v->arch.pv.mapcache.hash[i];
297
298 hashent->mfn = ~0UL; /* never valid to map */
299 hashent->idx = MAPHASHENT_NOTINUSE;
300 }
301
302 return 0;
303 }
304
map_domain_page_global(mfn_t mfn)305 void *map_domain_page_global(mfn_t mfn)
306 {
307 ASSERT(!in_irq() &&
308 ((system_state >= SYS_STATE_boot &&
309 system_state < SYS_STATE_active) ||
310 local_irq_is_enabled()));
311
312 #ifdef NDEBUG
313 if ( mfn_x(mfn) <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) )
314 return mfn_to_virt(mfn_x(mfn));
315 #endif
316
317 return vmap(&mfn, 1);
318 }
319
unmap_domain_page_global(const void * ptr)320 void unmap_domain_page_global(const void *ptr)
321 {
322 unsigned long va = (unsigned long)ptr;
323
324 if ( va >= DIRECTMAP_VIRT_START )
325 return;
326
327 ASSERT(va >= VMAP_VIRT_START && va < VMAP_VIRT_END);
328
329 vunmap(ptr);
330 }
331
332 /* Translate a map-domain-page'd address to the underlying MFN */
domain_page_map_to_mfn(const void * ptr)333 mfn_t domain_page_map_to_mfn(const void *ptr)
334 {
335 unsigned long va = (unsigned long)ptr;
336 const l1_pgentry_t *pl1e;
337
338 if ( va >= DIRECTMAP_VIRT_START )
339 return _mfn(virt_to_mfn(ptr));
340
341 if ( va >= VMAP_VIRT_START && va < VMAP_VIRT_END )
342 {
343 pl1e = virt_to_xen_l1e(va);
344 BUG_ON(!pl1e);
345 }
346 else
347 {
348 ASSERT(va >= MAPCACHE_VIRT_START && va < MAPCACHE_VIRT_END);
349 pl1e = &__linear_l1_table[l1_linear_offset(va)];
350 }
351
352 return l1e_get_mfn(*pl1e);
353 }
354