1 /*
2  * intr.c: handling I/O, interrupts related VMX entry/exit
3  * Copyright (c) 2004, Intel Corporation.
4  * Copyright (c) 2004-2007, XenSource Inc.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  * You should have received a copy of the GNU General Public License along with
16  * this program; If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #include <xen/init.h>
20 #include <xen/mm.h>
21 #include <xen/lib.h>
22 #include <xen/errno.h>
23 #include <xen/trace.h>
24 #include <xen/event.h>
25 #include <asm/apicdef.h>
26 #include <asm/current.h>
27 #include <asm/cpufeature.h>
28 #include <asm/processor.h>
29 #include <asm/msr.h>
30 #include <asm/hvm/hvm.h>
31 #include <asm/hvm/io.h>
32 #include <asm/hvm/support.h>
33 #include <asm/hvm/vmx/vmx.h>
34 #include <asm/hvm/vmx/vmcs.h>
35 #include <asm/hvm/vpic.h>
36 #include <asm/hvm/vlapic.h>
37 #include <asm/hvm/nestedhvm.h>
38 #include <public/hvm/ioreq.h>
39 #include <asm/hvm/trace.h>
40 
41 /*
42  * A few notes on virtual NMI and INTR delivery, and interactions with
43  * interruptibility states:
44  *
45  * We can only inject an ExtInt if EFLAGS.IF = 1 and no blocking by
46  * STI nor MOV SS. Otherwise the VM entry fails. The 'virtual interrupt
47  * pending' control causes a VM exit when all these checks succeed. It will
48  * exit immediately after VM entry if the checks succeed at that point.
49  *
50  * We can only inject an NMI if no blocking by MOV SS (also, depending on
51  * implementation, if no blocking by STI). If pin-based 'virtual NMIs'
52  * control is specified then the NMI-blocking interruptibility flag is
53  * also checked. The 'virtual NMI pending' control (available only in
54  * conjunction with 'virtual NMIs') causes a VM exit when all these checks
55  * succeed. It will exit immediately after VM entry if the checks succeed
56  * at that point.
57  *
58  * Because a processor may or may not check blocking-by-STI when injecting
59  * a virtual NMI, it will be necessary to convert that to block-by-MOV-SS
60  * before specifying the 'virtual NMI pending' control. Otherwise we could
61  * enter an infinite loop where we check blocking-by-STI in software and
62  * thus delay delivery of a virtual NMI, but the processor causes immediate
63  * VM exit because it does not check blocking-by-STI.
64  *
65  * Injecting a virtual NMI sets the NMI-blocking interruptibility flag only
66  * if the 'virtual NMIs' control is set. Injecting *any* kind of event clears
67  * the STI- and MOV-SS-blocking interruptibility-state flags.
68  */
69 
vmx_enable_intr_window(struct vcpu * v,struct hvm_intack intack)70 static void vmx_enable_intr_window(struct vcpu *v, struct hvm_intack intack)
71 {
72     u32 ctl = CPU_BASED_VIRTUAL_INTR_PENDING;
73 
74     ASSERT(intack.source != hvm_intsrc_none);
75 
76     if ( unlikely(tb_init_done) )
77     {
78         unsigned long intr;
79 
80         __vmread(VM_ENTRY_INTR_INFO, &intr);
81         HVMTRACE_3D(INTR_WINDOW, intack.vector, intack.source,
82                     (intr & INTR_INFO_VALID_MASK) ? intr & 0xff : -1);
83     }
84 
85     if ( (intack.source == hvm_intsrc_nmi) && cpu_has_vmx_vnmi )
86     {
87         /*
88          * We set MOV-SS blocking in lieu of STI blocking when delivering an
89          * NMI. This is because it is processor-specific whether STI-blocking
90          * blocks NMIs. Hence we *must* check for STI-blocking on NMI delivery
91          * (otherwise vmentry will fail on processors that check for STI-
92          * blocking) but if the processor does not check for STI-blocking then
93          * we may immediately vmexit and hance make no progress!
94          * (see SDM 3B 21.3, "Other Causes of VM Exits").
95          */
96         unsigned long intr_shadow;
97 
98         __vmread(GUEST_INTERRUPTIBILITY_INFO, &intr_shadow);
99         if ( intr_shadow & VMX_INTR_SHADOW_STI )
100         {
101             /* Having both STI-blocking and MOV-SS-blocking fails vmentry. */
102             intr_shadow &= ~VMX_INTR_SHADOW_STI;
103             intr_shadow |= VMX_INTR_SHADOW_MOV_SS;
104             __vmwrite(GUEST_INTERRUPTIBILITY_INFO, intr_shadow);
105         }
106         ctl = CPU_BASED_VIRTUAL_NMI_PENDING;
107     }
108 
109     if ( !(v->arch.hvm_vmx.exec_control & ctl) )
110     {
111         v->arch.hvm_vmx.exec_control |= ctl;
112         vmx_update_cpu_exec_control(v);
113     }
114 }
115 
116 /*
117  * Injecting interrupts for nested virtualization
118  *
119  *  When injecting virtual interrupts (originated from L0), there are
120  *  two major possibilities, within L1 context and within L2 context
121  *   1. L1 context (in_nesting == 0)
122  *     Everything is the same as without nested, check RFLAGS.IF to
123  *     see if the injection can be done, using VMCS to inject the
124  *     interrupt
125  *
126  *   2. L2 context (in_nesting == 1)
127  *     Causes a virtual VMExit, RFLAGS.IF is ignored, whether to ack
128  *     irq according to intr_ack_on_exit, shouldn't block normally,
129  *     except for:
130  *    a. context transition
131  *     interrupt needs to be blocked at virtual VMEntry time
132  *    b. L2 idtv reinjection
133  *     if L2 idtv is handled within L0 (e.g. L0 shadow page fault),
134  *     it needs to be reinjected without exiting to L1, interrupt
135  *     injection should be blocked as well at this point.
136  *
137  *  Unfortunately, interrupt blocking in L2 won't work with simple
138  *  intr_window_open (which depends on L2's IF). To solve this,
139  *  the following algorithm can be used:
140  *   v->arch.hvm_vmx.exec_control.VIRTUAL_INTR_PENDING now denotes
141  *   only L0 control, physical control may be different from it.
142  *       - if in L1, it behaves normally, intr window is written
143  *         to physical control as it is
144  *       - if in L2, replace it to MTF (or NMI window) if possible
145  *       - if MTF/NMI window is not used, intr window can still be
146  *         used but may have negative impact on interrupt performance.
147  */
148 
nvmx_intr_blocked(struct vcpu * v)149 enum hvm_intblk nvmx_intr_blocked(struct vcpu *v)
150 {
151     int r = hvm_intblk_none;
152     struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
153 
154     if ( nestedhvm_vcpu_in_guestmode(v) )
155     {
156         if ( nvcpu->nv_vmexit_pending ||
157              nvcpu->nv_vmswitch_in_progress )
158             r = hvm_intblk_rflags_ie;
159         else
160         {
161             unsigned long intr_info;
162 
163             __vmread(VM_ENTRY_INTR_INFO, &intr_info);
164             if ( intr_info & INTR_INFO_VALID_MASK )
165                 r = hvm_intblk_rflags_ie;
166         }
167     }
168     else if ( nvcpu->nv_vmentry_pending )
169         r = hvm_intblk_rflags_ie;
170 
171     return r;
172 }
173 
nvmx_intr_intercept(struct vcpu * v,struct hvm_intack intack)174 static int nvmx_intr_intercept(struct vcpu *v, struct hvm_intack intack)
175 {
176     u32 ctrl;
177 
178     /* If blocked by L1's tpr, then nothing to do. */
179     if ( nestedhvm_vcpu_in_guestmode(v) &&
180          hvm_interrupt_blocked(v, intack) == hvm_intblk_tpr )
181         return 1;
182 
183     if ( nvmx_intr_blocked(v) != hvm_intblk_none )
184     {
185         vmx_enable_intr_window(v, intack);
186         return 1;
187     }
188 
189     if ( nestedhvm_vcpu_in_guestmode(v) )
190     {
191         ctrl = get_vvmcs(v, PIN_BASED_VM_EXEC_CONTROL);
192         if ( !(ctrl & PIN_BASED_EXT_INTR_MASK) )
193             return 0;
194 
195         if ( intack.source == hvm_intsrc_pic ||
196                  intack.source == hvm_intsrc_lapic )
197         {
198             vmx_inject_extint(intack.vector, intack.source);
199 
200             ctrl = get_vvmcs(v, VM_EXIT_CONTROLS);
201             if ( ctrl & VM_EXIT_ACK_INTR_ON_EXIT )
202             {
203                 /* for now, duplicate the ack path in vmx_intr_assist */
204                 hvm_vcpu_ack_pending_irq(v, intack);
205                 pt_intr_post(v, intack);
206 
207                 intack = hvm_vcpu_has_pending_irq(v);
208                 if ( unlikely(intack.source != hvm_intsrc_none) )
209                     vmx_enable_intr_window(v, intack);
210             }
211             else
212                 vmx_enable_intr_window(v, intack);
213 
214             return 1;
215         }
216         else if ( intack.source == hvm_intsrc_vector )
217         {
218             vmx_inject_extint(intack.vector, intack.source);
219             return 1;
220         }
221     }
222 
223     return 0;
224 }
225 
vmx_intr_assist(void)226 void vmx_intr_assist(void)
227 {
228     struct hvm_intack intack;
229     struct vcpu *v = current;
230     unsigned int tpr_threshold = 0;
231     enum hvm_intblk intblk;
232     int pt_vector = -1;
233 
234     /* Block event injection when single step with MTF. */
235     if ( unlikely(v->arch.hvm_vcpu.single_step) )
236     {
237         v->arch.hvm_vmx.exec_control |= CPU_BASED_MONITOR_TRAP_FLAG;
238         vmx_update_cpu_exec_control(v);
239         return;
240     }
241 
242     /* Crank the handle on interrupt state. */
243     if ( is_hvm_vcpu(v) )
244         pt_vector = pt_update_irq(v);
245 
246     do {
247         unsigned long intr_info;
248 
249         intack = hvm_vcpu_has_pending_irq(v);
250         if ( likely(intack.source == hvm_intsrc_none) )
251             goto out;
252 
253         if ( unlikely(nvmx_intr_intercept(v, intack)) )
254             goto out;
255 
256         intblk = hvm_interrupt_blocked(v, intack);
257         if ( cpu_has_vmx_virtual_intr_delivery )
258         {
259             /* Set "Interrupt-window exiting" for ExtINT and NMI. */
260             if ( (intblk != hvm_intblk_none) &&
261                  (intack.source == hvm_intsrc_pic ||
262                   intack.source == hvm_intsrc_vector ||
263                   intack.source == hvm_intsrc_nmi) )
264             {
265                 vmx_enable_intr_window(v, intack);
266                 goto out;
267             }
268 
269             __vmread(VM_ENTRY_INTR_INFO, &intr_info);
270             if ( intr_info & INTR_INFO_VALID_MASK )
271             {
272                 if ( (intack.source == hvm_intsrc_pic) ||
273                      (intack.source == hvm_intsrc_nmi) ||
274                      (intack.source == hvm_intsrc_mce) )
275                     vmx_enable_intr_window(v, intack);
276 
277                 goto out;
278             }
279         } else if ( intblk == hvm_intblk_tpr )
280         {
281             ASSERT(vlapic_enabled(vcpu_vlapic(v)));
282             ASSERT(intack.source == hvm_intsrc_lapic);
283             tpr_threshold = intack.vector >> 4;
284             goto out;
285         }
286         else if ( intblk != hvm_intblk_none )
287         {
288             vmx_enable_intr_window(v, intack);
289             goto out;
290         }
291         else
292         {
293             __vmread(VM_ENTRY_INTR_INFO, &intr_info);
294             if ( intr_info & INTR_INFO_VALID_MASK )
295             {
296                 vmx_enable_intr_window(v, intack);
297                 goto out;
298             }
299         }
300 
301         intack = hvm_vcpu_ack_pending_irq(v, intack);
302     } while ( intack.source == hvm_intsrc_none );
303 
304     if ( intack.source == hvm_intsrc_nmi )
305     {
306         vmx_inject_nmi();
307     }
308     else if ( intack.source == hvm_intsrc_mce )
309     {
310         hvm_inject_hw_exception(TRAP_machine_check, X86_EVENT_NO_EC);
311     }
312     else if ( cpu_has_vmx_virtual_intr_delivery &&
313               intack.source != hvm_intsrc_pic &&
314               intack.source != hvm_intsrc_vector )
315     {
316         unsigned long status;
317         unsigned int i, n;
318 
319        /*
320         * intack.vector is the highest priority vector. So we set eoi_exit_bitmap
321         * for intack.vector - give a chance to post periodic time interrupts when
322         * periodic time interrupts become the highest one
323         */
324         if ( pt_vector != -1 )
325         {
326 #ifndef NDEBUG
327             /*
328              * We assert that intack.vector is the highest priority vector for
329              * only an interrupt from vlapic can reach this point and the
330              * highest vector is chosen in hvm_vcpu_has_pending_irq().
331              * But, in fact, the assertion failed sometimes. It is suspected
332              * that PIR is not synced to vIRR which makes pt_vector is left in
333              * PIR. In order to verify this suspicion, dump some information
334              * when the assertion fails.
335              */
336             if ( unlikely(intack.vector < pt_vector) )
337             {
338                 const struct vlapic *vlapic;
339                 const struct pi_desc *pi_desc;
340                 const uint32_t *word;
341                 unsigned int i;
342 
343                 printk(XENLOG_ERR "%pv: intack: %u:%02x pt: %02x\n",
344                        current, intack.source, intack.vector, pt_vector);
345 
346                 vlapic = vcpu_vlapic(v);
347                 if ( vlapic && vlapic->regs )
348                 {
349                     word = (const void *)&vlapic->regs->data[APIC_IRR];
350                     printk(XENLOG_ERR "vIRR:");
351                     for ( i = NR_VECTORS / 32; i-- ; )
352                         printk(" %08x", word[i*4]);
353                     printk("\n");
354                 }
355 
356                 pi_desc = &v->arch.hvm_vmx.pi_desc;
357                 if ( pi_desc )
358                 {
359                     word = (const void *)&pi_desc->pir;
360                     printk(XENLOG_ERR " PIR:");
361                     for ( i = NR_VECTORS / 32; i-- ; )
362                         printk(" %08x", word[i]);
363                     printk("\n");
364                 }
365             }
366 #endif
367             ASSERT(intack.vector >= pt_vector);
368             vmx_set_eoi_exit_bitmap(v, intack.vector);
369         }
370 
371         /* we need update the RVI field */
372         __vmread(GUEST_INTR_STATUS, &status);
373         status &= ~VMX_GUEST_INTR_STATUS_SUBFIELD_BITMASK;
374         status |= VMX_GUEST_INTR_STATUS_SUBFIELD_BITMASK &
375                     intack.vector;
376         __vmwrite(GUEST_INTR_STATUS, status);
377 
378         n = ARRAY_SIZE(v->arch.hvm_vmx.eoi_exit_bitmap);
379         while ( (i = find_first_bit(&v->arch.hvm_vmx.eoi_exitmap_changed,
380                                     n)) < n )
381         {
382             clear_bit(i, &v->arch.hvm_vmx.eoi_exitmap_changed);
383             __vmwrite(EOI_EXIT_BITMAP(i), v->arch.hvm_vmx.eoi_exit_bitmap[i]);
384         }
385 
386         pt_intr_post(v, intack);
387     }
388     else
389     {
390         HVMTRACE_2D(INJ_VIRQ, intack.vector, /*fake=*/ 0);
391         vmx_inject_extint(intack.vector, intack.source);
392         pt_intr_post(v, intack);
393     }
394 
395     /* Is there another IRQ to queue up behind this one? */
396     intack = hvm_vcpu_has_pending_irq(v);
397     if ( !cpu_has_vmx_virtual_intr_delivery ||
398          intack.source == hvm_intsrc_pic ||
399          intack.source == hvm_intsrc_vector )
400     {
401         if ( unlikely(intack.source != hvm_intsrc_none) )
402             vmx_enable_intr_window(v, intack);
403     }
404 
405  out:
406     if ( !nestedhvm_vcpu_in_guestmode(v) &&
407          !cpu_has_vmx_virtual_intr_delivery &&
408          cpu_has_vmx_tpr_shadow )
409         __vmwrite(TPR_THRESHOLD, tpr_threshold);
410 }
411 
412 /*
413  * Local variables:
414  * mode: C
415  * c-file-style: "BSD"
416  * c-basic-offset: 4
417  * tab-width: 4
418  * indent-tabs-mode: nil
419  * End:
420  */
421