1 /*
2 * intr.c: handling I/O, interrupts related VMX entry/exit
3 * Copyright (c) 2004, Intel Corporation.
4 * Copyright (c) 2004-2007, XenSource Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; If not, see <http://www.gnu.org/licenses/>.
17 */
18
19 #include <xen/init.h>
20 #include <xen/mm.h>
21 #include <xen/lib.h>
22 #include <xen/errno.h>
23 #include <xen/trace.h>
24 #include <xen/event.h>
25 #include <asm/apicdef.h>
26 #include <asm/current.h>
27 #include <asm/cpufeature.h>
28 #include <asm/processor.h>
29 #include <asm/msr.h>
30 #include <asm/hvm/hvm.h>
31 #include <asm/hvm/io.h>
32 #include <asm/hvm/support.h>
33 #include <asm/hvm/vmx/vmx.h>
34 #include <asm/hvm/vmx/vmcs.h>
35 #include <asm/hvm/vpic.h>
36 #include <asm/hvm/vlapic.h>
37 #include <asm/hvm/nestedhvm.h>
38 #include <public/hvm/ioreq.h>
39 #include <asm/hvm/trace.h>
40
41 /*
42 * A few notes on virtual NMI and INTR delivery, and interactions with
43 * interruptibility states:
44 *
45 * We can only inject an ExtInt if EFLAGS.IF = 1 and no blocking by
46 * STI nor MOV SS. Otherwise the VM entry fails. The 'virtual interrupt
47 * pending' control causes a VM exit when all these checks succeed. It will
48 * exit immediately after VM entry if the checks succeed at that point.
49 *
50 * We can only inject an NMI if no blocking by MOV SS (also, depending on
51 * implementation, if no blocking by STI). If pin-based 'virtual NMIs'
52 * control is specified then the NMI-blocking interruptibility flag is
53 * also checked. The 'virtual NMI pending' control (available only in
54 * conjunction with 'virtual NMIs') causes a VM exit when all these checks
55 * succeed. It will exit immediately after VM entry if the checks succeed
56 * at that point.
57 *
58 * Because a processor may or may not check blocking-by-STI when injecting
59 * a virtual NMI, it will be necessary to convert that to block-by-MOV-SS
60 * before specifying the 'virtual NMI pending' control. Otherwise we could
61 * enter an infinite loop where we check blocking-by-STI in software and
62 * thus delay delivery of a virtual NMI, but the processor causes immediate
63 * VM exit because it does not check blocking-by-STI.
64 *
65 * Injecting a virtual NMI sets the NMI-blocking interruptibility flag only
66 * if the 'virtual NMIs' control is set. Injecting *any* kind of event clears
67 * the STI- and MOV-SS-blocking interruptibility-state flags.
68 */
69
vmx_enable_intr_window(struct vcpu * v,struct hvm_intack intack)70 static void vmx_enable_intr_window(struct vcpu *v, struct hvm_intack intack)
71 {
72 u32 ctl = CPU_BASED_VIRTUAL_INTR_PENDING;
73
74 ASSERT(intack.source != hvm_intsrc_none);
75
76 if ( unlikely(tb_init_done) )
77 {
78 unsigned long intr;
79
80 __vmread(VM_ENTRY_INTR_INFO, &intr);
81 HVMTRACE_3D(INTR_WINDOW, intack.vector, intack.source,
82 (intr & INTR_INFO_VALID_MASK) ? intr & 0xff : -1);
83 }
84
85 if ( (intack.source == hvm_intsrc_nmi) && cpu_has_vmx_vnmi )
86 {
87 /*
88 * We set MOV-SS blocking in lieu of STI blocking when delivering an
89 * NMI. This is because it is processor-specific whether STI-blocking
90 * blocks NMIs. Hence we *must* check for STI-blocking on NMI delivery
91 * (otherwise vmentry will fail on processors that check for STI-
92 * blocking) but if the processor does not check for STI-blocking then
93 * we may immediately vmexit and hance make no progress!
94 * (see SDM 3B 21.3, "Other Causes of VM Exits").
95 */
96 unsigned long intr_shadow;
97
98 __vmread(GUEST_INTERRUPTIBILITY_INFO, &intr_shadow);
99 if ( intr_shadow & VMX_INTR_SHADOW_STI )
100 {
101 /* Having both STI-blocking and MOV-SS-blocking fails vmentry. */
102 intr_shadow &= ~VMX_INTR_SHADOW_STI;
103 intr_shadow |= VMX_INTR_SHADOW_MOV_SS;
104 __vmwrite(GUEST_INTERRUPTIBILITY_INFO, intr_shadow);
105 }
106 ctl = CPU_BASED_VIRTUAL_NMI_PENDING;
107 }
108
109 if ( !(v->arch.hvm_vmx.exec_control & ctl) )
110 {
111 v->arch.hvm_vmx.exec_control |= ctl;
112 vmx_update_cpu_exec_control(v);
113 }
114 }
115
116 /*
117 * Injecting interrupts for nested virtualization
118 *
119 * When injecting virtual interrupts (originated from L0), there are
120 * two major possibilities, within L1 context and within L2 context
121 * 1. L1 context (in_nesting == 0)
122 * Everything is the same as without nested, check RFLAGS.IF to
123 * see if the injection can be done, using VMCS to inject the
124 * interrupt
125 *
126 * 2. L2 context (in_nesting == 1)
127 * Causes a virtual VMExit, RFLAGS.IF is ignored, whether to ack
128 * irq according to intr_ack_on_exit, shouldn't block normally,
129 * except for:
130 * a. context transition
131 * interrupt needs to be blocked at virtual VMEntry time
132 * b. L2 idtv reinjection
133 * if L2 idtv is handled within L0 (e.g. L0 shadow page fault),
134 * it needs to be reinjected without exiting to L1, interrupt
135 * injection should be blocked as well at this point.
136 *
137 * Unfortunately, interrupt blocking in L2 won't work with simple
138 * intr_window_open (which depends on L2's IF). To solve this,
139 * the following algorithm can be used:
140 * v->arch.hvm_vmx.exec_control.VIRTUAL_INTR_PENDING now denotes
141 * only L0 control, physical control may be different from it.
142 * - if in L1, it behaves normally, intr window is written
143 * to physical control as it is
144 * - if in L2, replace it to MTF (or NMI window) if possible
145 * - if MTF/NMI window is not used, intr window can still be
146 * used but may have negative impact on interrupt performance.
147 */
148
nvmx_intr_blocked(struct vcpu * v)149 enum hvm_intblk nvmx_intr_blocked(struct vcpu *v)
150 {
151 int r = hvm_intblk_none;
152 struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
153
154 if ( nestedhvm_vcpu_in_guestmode(v) )
155 {
156 if ( nvcpu->nv_vmexit_pending ||
157 nvcpu->nv_vmswitch_in_progress )
158 r = hvm_intblk_rflags_ie;
159 else
160 {
161 unsigned long intr_info;
162
163 __vmread(VM_ENTRY_INTR_INFO, &intr_info);
164 if ( intr_info & INTR_INFO_VALID_MASK )
165 r = hvm_intblk_rflags_ie;
166 }
167 }
168 else if ( nvcpu->nv_vmentry_pending )
169 r = hvm_intblk_rflags_ie;
170
171 return r;
172 }
173
nvmx_intr_intercept(struct vcpu * v,struct hvm_intack intack)174 static int nvmx_intr_intercept(struct vcpu *v, struct hvm_intack intack)
175 {
176 u32 ctrl;
177
178 /* If blocked by L1's tpr, then nothing to do. */
179 if ( nestedhvm_vcpu_in_guestmode(v) &&
180 hvm_interrupt_blocked(v, intack) == hvm_intblk_tpr )
181 return 1;
182
183 if ( nvmx_intr_blocked(v) != hvm_intblk_none )
184 {
185 vmx_enable_intr_window(v, intack);
186 return 1;
187 }
188
189 if ( nestedhvm_vcpu_in_guestmode(v) )
190 {
191 ctrl = get_vvmcs(v, PIN_BASED_VM_EXEC_CONTROL);
192 if ( !(ctrl & PIN_BASED_EXT_INTR_MASK) )
193 return 0;
194
195 if ( intack.source == hvm_intsrc_pic ||
196 intack.source == hvm_intsrc_lapic )
197 {
198 vmx_inject_extint(intack.vector, intack.source);
199
200 ctrl = get_vvmcs(v, VM_EXIT_CONTROLS);
201 if ( ctrl & VM_EXIT_ACK_INTR_ON_EXIT )
202 {
203 /* for now, duplicate the ack path in vmx_intr_assist */
204 hvm_vcpu_ack_pending_irq(v, intack);
205 pt_intr_post(v, intack);
206
207 intack = hvm_vcpu_has_pending_irq(v);
208 if ( unlikely(intack.source != hvm_intsrc_none) )
209 vmx_enable_intr_window(v, intack);
210 }
211 else
212 vmx_enable_intr_window(v, intack);
213
214 return 1;
215 }
216 else if ( intack.source == hvm_intsrc_vector )
217 {
218 vmx_inject_extint(intack.vector, intack.source);
219 return 1;
220 }
221 }
222
223 return 0;
224 }
225
vmx_intr_assist(void)226 void vmx_intr_assist(void)
227 {
228 struct hvm_intack intack;
229 struct vcpu *v = current;
230 unsigned int tpr_threshold = 0;
231 enum hvm_intblk intblk;
232 int pt_vector = -1;
233
234 /* Block event injection when single step with MTF. */
235 if ( unlikely(v->arch.hvm_vcpu.single_step) )
236 {
237 v->arch.hvm_vmx.exec_control |= CPU_BASED_MONITOR_TRAP_FLAG;
238 vmx_update_cpu_exec_control(v);
239 return;
240 }
241
242 /* Crank the handle on interrupt state. */
243 if ( is_hvm_vcpu(v) )
244 pt_vector = pt_update_irq(v);
245
246 do {
247 unsigned long intr_info;
248
249 intack = hvm_vcpu_has_pending_irq(v);
250 if ( likely(intack.source == hvm_intsrc_none) )
251 goto out;
252
253 if ( unlikely(nvmx_intr_intercept(v, intack)) )
254 goto out;
255
256 intblk = hvm_interrupt_blocked(v, intack);
257 if ( cpu_has_vmx_virtual_intr_delivery )
258 {
259 /* Set "Interrupt-window exiting" for ExtINT and NMI. */
260 if ( (intblk != hvm_intblk_none) &&
261 (intack.source == hvm_intsrc_pic ||
262 intack.source == hvm_intsrc_vector ||
263 intack.source == hvm_intsrc_nmi) )
264 {
265 vmx_enable_intr_window(v, intack);
266 goto out;
267 }
268
269 __vmread(VM_ENTRY_INTR_INFO, &intr_info);
270 if ( intr_info & INTR_INFO_VALID_MASK )
271 {
272 if ( (intack.source == hvm_intsrc_pic) ||
273 (intack.source == hvm_intsrc_nmi) ||
274 (intack.source == hvm_intsrc_mce) )
275 vmx_enable_intr_window(v, intack);
276
277 goto out;
278 }
279 } else if ( intblk == hvm_intblk_tpr )
280 {
281 ASSERT(vlapic_enabled(vcpu_vlapic(v)));
282 ASSERT(intack.source == hvm_intsrc_lapic);
283 tpr_threshold = intack.vector >> 4;
284 goto out;
285 }
286 else if ( intblk != hvm_intblk_none )
287 {
288 vmx_enable_intr_window(v, intack);
289 goto out;
290 }
291 else
292 {
293 __vmread(VM_ENTRY_INTR_INFO, &intr_info);
294 if ( intr_info & INTR_INFO_VALID_MASK )
295 {
296 vmx_enable_intr_window(v, intack);
297 goto out;
298 }
299 }
300
301 intack = hvm_vcpu_ack_pending_irq(v, intack);
302 } while ( intack.source == hvm_intsrc_none );
303
304 if ( intack.source == hvm_intsrc_nmi )
305 {
306 vmx_inject_nmi();
307 }
308 else if ( intack.source == hvm_intsrc_mce )
309 {
310 hvm_inject_hw_exception(TRAP_machine_check, X86_EVENT_NO_EC);
311 }
312 else if ( cpu_has_vmx_virtual_intr_delivery &&
313 intack.source != hvm_intsrc_pic &&
314 intack.source != hvm_intsrc_vector )
315 {
316 unsigned long status;
317 unsigned int i, n;
318
319 /*
320 * intack.vector is the highest priority vector. So we set eoi_exit_bitmap
321 * for intack.vector - give a chance to post periodic time interrupts when
322 * periodic time interrupts become the highest one
323 */
324 if ( pt_vector != -1 )
325 {
326 #ifndef NDEBUG
327 /*
328 * We assert that intack.vector is the highest priority vector for
329 * only an interrupt from vlapic can reach this point and the
330 * highest vector is chosen in hvm_vcpu_has_pending_irq().
331 * But, in fact, the assertion failed sometimes. It is suspected
332 * that PIR is not synced to vIRR which makes pt_vector is left in
333 * PIR. In order to verify this suspicion, dump some information
334 * when the assertion fails.
335 */
336 if ( unlikely(intack.vector < pt_vector) )
337 {
338 const struct vlapic *vlapic;
339 const struct pi_desc *pi_desc;
340 const uint32_t *word;
341 unsigned int i;
342
343 printk(XENLOG_ERR "%pv: intack: %u:%02x pt: %02x\n",
344 current, intack.source, intack.vector, pt_vector);
345
346 vlapic = vcpu_vlapic(v);
347 if ( vlapic && vlapic->regs )
348 {
349 word = (const void *)&vlapic->regs->data[APIC_IRR];
350 printk(XENLOG_ERR "vIRR:");
351 for ( i = NR_VECTORS / 32; i-- ; )
352 printk(" %08x", word[i*4]);
353 printk("\n");
354 }
355
356 pi_desc = &v->arch.hvm_vmx.pi_desc;
357 if ( pi_desc )
358 {
359 word = (const void *)&pi_desc->pir;
360 printk(XENLOG_ERR " PIR:");
361 for ( i = NR_VECTORS / 32; i-- ; )
362 printk(" %08x", word[i]);
363 printk("\n");
364 }
365 }
366 #endif
367 ASSERT(intack.vector >= pt_vector);
368 vmx_set_eoi_exit_bitmap(v, intack.vector);
369 }
370
371 /* we need update the RVI field */
372 __vmread(GUEST_INTR_STATUS, &status);
373 status &= ~VMX_GUEST_INTR_STATUS_SUBFIELD_BITMASK;
374 status |= VMX_GUEST_INTR_STATUS_SUBFIELD_BITMASK &
375 intack.vector;
376 __vmwrite(GUEST_INTR_STATUS, status);
377
378 n = ARRAY_SIZE(v->arch.hvm_vmx.eoi_exit_bitmap);
379 while ( (i = find_first_bit(&v->arch.hvm_vmx.eoi_exitmap_changed,
380 n)) < n )
381 {
382 clear_bit(i, &v->arch.hvm_vmx.eoi_exitmap_changed);
383 __vmwrite(EOI_EXIT_BITMAP(i), v->arch.hvm_vmx.eoi_exit_bitmap[i]);
384 }
385
386 pt_intr_post(v, intack);
387 }
388 else
389 {
390 HVMTRACE_2D(INJ_VIRQ, intack.vector, /*fake=*/ 0);
391 vmx_inject_extint(intack.vector, intack.source);
392 pt_intr_post(v, intack);
393 }
394
395 /* Is there another IRQ to queue up behind this one? */
396 intack = hvm_vcpu_has_pending_irq(v);
397 if ( !cpu_has_vmx_virtual_intr_delivery ||
398 intack.source == hvm_intsrc_pic ||
399 intack.source == hvm_intsrc_vector )
400 {
401 if ( unlikely(intack.source != hvm_intsrc_none) )
402 vmx_enable_intr_window(v, intack);
403 }
404
405 out:
406 if ( !nestedhvm_vcpu_in_guestmode(v) &&
407 !cpu_has_vmx_virtual_intr_delivery &&
408 cpu_has_vmx_tpr_shadow )
409 __vmwrite(TPR_THRESHOLD, tpr_threshold);
410 }
411
412 /*
413 * Local variables:
414 * mode: C
415 * c-file-style: "BSD"
416 * c-basic-offset: 4
417 * tab-width: 4
418 * indent-tabs-mode: nil
419 * End:
420 */
421