1 #include "x86-emulate.h"
2 
3 #include <stdarg.h>
4 #include <stdio.h>
5 
6 struct test {
7     const char *mnemonic;
8     unsigned int opc:8;
9     unsigned int spc:2;
10     unsigned int pfx:2;
11     unsigned int vsz:3;
12     unsigned int esz:4;
13     unsigned int scale:1;
14     unsigned int ext:3;
15 };
16 
17 enum spc {
18     SPC_invalid,
19     SPC_0f,
20     SPC_0f38,
21     SPC_0f3a,
22 };
23 
24 enum pfx {
25     PFX_,
26     PFX_66,
27     PFX_f3,
28     PFX_f2
29 };
30 
31 enum vl {
32     VL_128,
33     VL_256,
34     VL_512,
35 };
36 
37 enum scale { /* scale by memory operand ... */
38     SC_vl,   /* ... vector length */
39     SC_el,   /* ... element length */
40 };
41 
42 /*
43  * Vector size is determined either from EVEX.L'L (VL) or vector
44  * element size (EL), often controlled by EVEX.W (see enum esz).
45  */
46 enum vsz {
47     VSZ_vl,
48     VSZ_vl_2, /* VL / 2 */
49     VSZ_vl_4, /* VL / 4 */
50     VSZ_vl_8, /* VL / 8 */
51     /* "no broadcast" implied from here on. */
52     VSZ_el,
53     VSZ_el_2, /* EL * 2 */
54     VSZ_el_4, /* EL * 4 */
55     VSZ_el_8, /* EL * 8 */
56 };
57 
58 /*
59  * Vector element size is either an opcode attribute or often determined
60  * by EVEX.W (in which case enumerators below name two sizes). Instructions
61  * accessing GPRs often use EVEX.W to select between 32- and 64-bit GPR
62  * width, but this distinction goes away outside of 64-bit mode (and EVEX.W
63  * is ignored there).
64  */
65 enum esz {
66     ESZ_d,
67     ESZ_q,
68     ESZ_dq,
69     ESZ_sd,
70     ESZ_d_nb,
71     ESZ_q_nb,
72     /* "no broadcast" implied from here on. */
73 #ifdef __i386__
74     ESZ_d_WIG,
75 #endif
76     ESZ_b,
77     ESZ_w,
78     ESZ_bw,
79 };
80 
81 #ifndef __i386__
82 # define ESZ_dq64 ESZ_dq
83 #else
84 # define ESZ_dq64 ESZ_d_WIG
85 #endif
86 
87 #define INSNX(m, p, sp, o, e, vs, es, sc) { \
88     .mnemonic = #m, .opc = 0x##o, .spc = SPC_##sp, .pfx = PFX_##p, \
89     .vsz = VSZ_##vs, .esz = ESZ_##es, .scale = SC_##sc, .ext = 0##e \
90 }
91 #define INSN(m, p, sp, o, vs, es, sc) INSNX(m, p, sp, o, 0, vs, es, sc)
92 #define INSN_PFP(m, sp, o) \
93     INSN(m##pd, 66, sp, o, vl, q, vl), \
94     INSN(m##ps,   , sp, o, vl, d, vl)
95 #define INSN_PFP_NB(m, sp, o) \
96     INSN(m##pd, 66, sp, o, vl, q_nb, vl), \
97     INSN(m##ps,   , sp, o, vl, d_nb, vl)
98 #define INSN_SFP(m, sp, o) \
99     INSN(m##sd, f2, sp, o, el, q, el), \
100     INSN(m##ss, f3, sp, o, el, d, el)
101 
102 #define INSN_FP(m, sp, o) \
103     INSN_PFP(m, sp, o), \
104     INSN_SFP(m, sp, o)
105 
106 static const struct test avx512f_all[] = {
107     INSN_FP(add,             0f, 58),
108     INSN(align,        66, 0f3a, 03,    vl,     dq, vl),
109     INSN(blendm,       66, 0f38, 65,    vl,     sd, vl),
110     INSN(broadcastss,  66, 0f38, 18,    el,      d, el),
111     INSN_FP(cmp,             0f, c2),
112     INSN(comisd,       66,   0f, 2f,    el,      q, el),
113     INSN(comiss,         ,   0f, 2f,    el,      d, el),
114     INSN(compress,     66, 0f38, 8a,    vl,     sd, el),
115     INSN(cvtdq2pd,     f3,   0f, e6,    vl_2,    d, vl),
116     INSN(cvtdq2ps,       ,   0f, 5b,    vl,      d, vl),
117     INSN(cvtpd2dq,     f2,   0f, e6,    vl,      q, vl),
118     INSN(cvtpd2udq,      ,   0f, 79,    vl,      q, vl),
119     INSN(cvtpd2ps,     66,   0f, 5a,    vl,      q, vl),
120     INSN(cvtph2ps,     66, 0f38, 13,    vl_2, d_nb, vl),
121     INSN(cvtps2dq,     66,   0f, 5b,    vl,      d, vl),
122     INSN(cvtps2pd,       ,   0f, 5a,    vl_2,    d, vl),
123     INSN(cvtps2ph,     66, 0f3a, 1d,    vl_2, d_nb, vl),
124     INSN(cvtps2udq,      ,   0f, 79,    vl,      d, vl),
125     INSN(cvtsd2si,     f2,   0f, 2d,    el,      q, el),
126     INSN(cvtsd2usi,    f2,   0f, 79,    el,      q, el),
127     INSN(cvtsd2ss,     f2,   0f, 5a,    el,      q, el),
128     INSN(cvtsi2sd,     f2,   0f, 2a,    el,   dq64, el),
129     INSN(cvtsi2ss,     f3,   0f, 2a,    el,   dq64, el),
130     INSN(cvtss2sd,     f3,   0f, 5a,    el,      d, el),
131     INSN(cvtss2si,     f3,   0f, 2d,    el,      d, el),
132     INSN(cvtss2usi,    f3,   0f, 79,    el,      d, el),
133     INSN(cvttpd2dq,    66,   0f, e6,    vl,      q, vl),
134     INSN(cvttpd2udq,     ,   0f, 78,    vl,      q, vl),
135     INSN(cvttps2dq,    f3,   0f, 5b,    vl,      d, vl),
136     INSN(cvttps2udq,     ,   0f, 78,    vl,      d, vl),
137     INSN(cvttsd2si,    f2,   0f, 2c,    el,      q, el),
138     INSN(cvttsd2usi,   f2,   0f, 78,    el,      q, el),
139     INSN(cvttss2si,    f3,   0f, 2c,    el,      d, el),
140     INSN(cvttss2usi,   f3,   0f, 78,    el,      d, el),
141     INSN(cvtudq2pd,    f3,   0f, 7a,    vl_2,    d, vl),
142     INSN(cvtudq2ps,    f2,   0f, 7a,    vl,      d, vl),
143     INSN(cvtusi2sd,    f2,   0f, 7b,    el,   dq64, el),
144     INSN(cvtusi2ss,    f3,   0f, 7b,    el,   dq64, el),
145     INSN_FP(div,             0f, 5e),
146     INSN(expand,       66, 0f38, 88,    vl,     sd, el),
147     INSN(fixupimm,     66, 0f3a, 54,    vl,     sd, vl),
148     INSN(fixupimm,     66, 0f3a, 55,    el,     sd, el),
149     INSN(fmadd132,     66, 0f38, 98,    vl,     sd, vl),
150     INSN(fmadd132,     66, 0f38, 99,    el,     sd, el),
151     INSN(fmadd213,     66, 0f38, a8,    vl,     sd, vl),
152     INSN(fmadd213,     66, 0f38, a9,    el,     sd, el),
153     INSN(fmadd231,     66, 0f38, b8,    vl,     sd, vl),
154     INSN(fmadd231,     66, 0f38, b9,    el,     sd, el),
155     INSN(fmaddsub132,  66, 0f38, 96,    vl,     sd, vl),
156     INSN(fmaddsub213,  66, 0f38, a6,    vl,     sd, vl),
157     INSN(fmaddsub231,  66, 0f38, b6,    vl,     sd, vl),
158     INSN(fmsub132,     66, 0f38, 9a,    vl,     sd, vl),
159     INSN(fmsub132,     66, 0f38, 9b,    el,     sd, el),
160     INSN(fmsub213,     66, 0f38, aa,    vl,     sd, vl),
161     INSN(fmsub213,     66, 0f38, ab,    el,     sd, el),
162     INSN(fmsub231,     66, 0f38, ba,    vl,     sd, vl),
163     INSN(fmsub231,     66, 0f38, bb,    el,     sd, el),
164     INSN(fmsubadd132,  66, 0f38, 97,    vl,     sd, vl),
165     INSN(fmsubadd213,  66, 0f38, a7,    vl,     sd, vl),
166     INSN(fmsubadd231,  66, 0f38, b7,    vl,     sd, vl),
167     INSN(fnmadd132,    66, 0f38, 9c,    vl,     sd, vl),
168     INSN(fnmadd132,    66, 0f38, 9d,    el,     sd, el),
169     INSN(fnmadd213,    66, 0f38, ac,    vl,     sd, vl),
170     INSN(fnmadd213,    66, 0f38, ad,    el,     sd, el),
171     INSN(fnmadd231,    66, 0f38, bc,    vl,     sd, vl),
172     INSN(fnmadd231,    66, 0f38, bd,    el,     sd, el),
173     INSN(fnmsub132,    66, 0f38, 9e,    vl,     sd, vl),
174     INSN(fnmsub132,    66, 0f38, 9f,    el,     sd, el),
175     INSN(fnmsub213,    66, 0f38, ae,    vl,     sd, vl),
176     INSN(fnmsub213,    66, 0f38, af,    el,     sd, el),
177     INSN(fnmsub231,    66, 0f38, be,    vl,     sd, vl),
178     INSN(fnmsub231,    66, 0f38, bf,    el,     sd, el),
179     INSN(gatherd,      66, 0f38, 92,    vl,     sd, el),
180     INSN(gatherq,      66, 0f38, 93,    vl,     sd, el),
181     INSN(getexp,       66, 0f38, 42,    vl,     sd, vl),
182     INSN(getexp,       66, 0f38, 43,    el,     sd, el),
183     INSN(getmant,      66, 0f3a, 26,    vl,     sd, vl),
184     INSN(getmant,      66, 0f3a, 27,    el,     sd, el),
185     INSN_FP(max,             0f, 5f),
186     INSN_FP(min,             0f, 5d),
187     INSN_SFP(mov,            0f, 10),
188     INSN_SFP(mov,            0f, 11),
189     INSN_PFP_NB(mova,        0f, 28),
190     INSN_PFP_NB(mova,        0f, 29),
191     INSN(movddup,      f2,   0f, 12,    vl,   q_nb, vl),
192     INSN(movdqa32,     66,   0f, 6f,    vl,   d_nb, vl),
193     INSN(movdqa32,     66,   0f, 7f,    vl,   d_nb, vl),
194     INSN(movdqa64,     66,   0f, 6f,    vl,   q_nb, vl),
195     INSN(movdqa64,     66,   0f, 7f,    vl,   q_nb, vl),
196     INSN(movdqu32,     f3,   0f, 6f,    vl,   d_nb, vl),
197     INSN(movdqu32,     f3,   0f, 7f,    vl,   d_nb, vl),
198     INSN(movdqu64,     f3,   0f, 6f,    vl,   q_nb, vl),
199     INSN(movdqu64,     f3,   0f, 7f,    vl,   q_nb, vl),
200     INSN(movntdq,      66,   0f, e7,    vl,   d_nb, vl),
201     INSN(movntdqa,     66, 0f38, 2a,    vl,   d_nb, vl),
202     INSN_PFP_NB(movnt,       0f, 2b),
203     INSN(movshdup,     f3,   0f, 16,    vl,   d_nb, vl),
204     INSN(movsldup,     f3,   0f, 12,    vl,   d_nb, vl),
205     INSN_PFP_NB(movu,        0f, 10),
206     INSN_PFP_NB(movu,        0f, 11),
207     INSN_FP(mul,             0f, 59),
208     INSN(pabsd,        66, 0f38, 1e,    vl,      d, vl),
209     INSN(pabsq,        66, 0f38, 1f,    vl,      q, vl),
210     INSN(paddd,        66,   0f, fe,    vl,      d, vl),
211     INSN(paddq,        66,   0f, d4,    vl,      q, vl),
212     INSN(pand,         66,   0f, db,    vl,     dq, vl),
213     INSN(pandn,        66,   0f, df,    vl,     dq, vl),
214     INSN(pblendm,      66, 0f38, 64,    vl,     dq, vl),
215 //       pbroadcast,   66, 0f38, 7c,          dq64
216     INSN(pbroadcastd,  66, 0f38, 58,    el,      d, el),
217     INSN(pbroadcastq,  66, 0f38, 59,    el,      q, el),
218     INSN(pcmp,         66, 0f3a, 1f,    vl,     dq, vl),
219     INSN(pcmpeqd,      66,   0f, 76,    vl,      d, vl),
220     INSN(pcmpeqq,      66, 0f38, 29,    vl,      q, vl),
221     INSN(pcmpgtd,      66,   0f, 66,    vl,      d, vl),
222     INSN(pcmpgtq,      66, 0f38, 37,    vl,      q, vl),
223     INSN(pcmpu,        66, 0f3a, 1e,    vl,     dq, vl),
224     INSN(pcompress,    66, 0f38, 8b,    vl,     dq, el),
225     INSN(permi2,       66, 0f38, 76,    vl,     dq, vl),
226     INSN(permi2,       66, 0f38, 77,    vl,     sd, vl),
227     INSN(permilpd,     66, 0f38, 0d,    vl,      q, vl),
228     INSN(permilpd,     66, 0f3a, 05,    vl,      q, vl),
229     INSN(permilps,     66, 0f38, 0c,    vl,      d, vl),
230     INSN(permilps,     66, 0f3a, 04,    vl,      d, vl),
231     INSN(permt2,       66, 0f38, 7e,    vl,     dq, vl),
232     INSN(permt2,       66, 0f38, 7f,    vl,     sd, vl),
233     INSN(pexpand,      66, 0f38, 89,    vl,     dq, el),
234     INSN(pgatherd,     66, 0f38, 90,    vl,     dq, el),
235     INSN(pgatherq,     66, 0f38, 91,    vl,     dq, el),
236     INSN(pmaxs,        66, 0f38, 3d,    vl,     dq, vl),
237     INSN(pmaxu,        66, 0f38, 3f,    vl,     dq, vl),
238     INSN(pmins,        66, 0f38, 39,    vl,     dq, vl),
239     INSN(pminu,        66, 0f38, 3b,    vl,     dq, vl),
240     INSN(pmovdb,       f3, 0f38, 31,    vl_4,    b, vl),
241     INSN(pmovdw,       f3, 0f38, 33,    vl_2,    b, vl),
242     INSN(pmovqb,       f3, 0f38, 32,    vl_8,    b, vl),
243     INSN(pmovqd,       f3, 0f38, 35,    vl_2, d_nb, vl),
244     INSN(pmovqw,       f3, 0f38, 34,    vl_4,    b, vl),
245     INSN(pmovsdb,      f3, 0f38, 21,    vl_4,    b, vl),
246     INSN(pmovsdw,      f3, 0f38, 23,    vl_2,    b, vl),
247     INSN(pmovsqb,      f3, 0f38, 22,    vl_8,    b, vl),
248     INSN(pmovsqd,      f3, 0f38, 25,    vl_2, d_nb, vl),
249     INSN(pmovsqw,      f3, 0f38, 24,    vl_4,    b, vl),
250     INSN(pmovsxbd,     66, 0f38, 21,    vl_4,    b, vl),
251     INSN(pmovsxbq,     66, 0f38, 22,    vl_8,    b, vl),
252     INSN(pmovsxwd,     66, 0f38, 23,    vl_2,    w, vl),
253     INSN(pmovsxwq,     66, 0f38, 24,    vl_4,    w, vl),
254     INSN(pmovsxdq,     66, 0f38, 25,    vl_2, d_nb, vl),
255     INSN(pmovusdb,     f3, 0f38, 11,    vl_4,    b, vl),
256     INSN(pmovusdw,     f3, 0f38, 13,    vl_2,    b, vl),
257     INSN(pmovusqb,     f3, 0f38, 12,    vl_8,    b, vl),
258     INSN(pmovusqd,     f3, 0f38, 15,    vl_2, d_nb, vl),
259     INSN(pmovusqw,     f3, 0f38, 14,    vl_4,    b, vl),
260     INSN(pmovzxbd,     66, 0f38, 31,    vl_4,    b, vl),
261     INSN(pmovzxbq,     66, 0f38, 32,    vl_8,    b, vl),
262     INSN(pmovzxwd,     66, 0f38, 33,    vl_2,    w, vl),
263     INSN(pmovzxwq,     66, 0f38, 34,    vl_4,    w, vl),
264     INSN(pmovzxdq,     66, 0f38, 35,    vl_2, d_nb, vl),
265     INSN(pmuldq,       66, 0f38, 28,    vl,      q, vl),
266     INSN(pmulld,       66, 0f38, 40,    vl,      d, vl),
267     INSN(pmuludq,      66,   0f, f4,    vl,      q, vl),
268     INSN(por,          66,   0f, eb,    vl,     dq, vl),
269     INSNX(prol,        66,   0f, 72, 1, vl,     dq, vl),
270     INSN(prolv,        66, 0f38, 15,    vl,     dq, vl),
271     INSNX(pror,        66,   0f, 72, 0, vl,     dq, vl),
272     INSN(prorv,        66, 0f38, 14,    vl,     dq, vl),
273     INSN(pscatterd,    66, 0f38, a0,    vl,     dq, el),
274     INSN(pscatterq,    66, 0f38, a1,    vl,     dq, el),
275     INSN(pshufd,       66,   0f, 70,    vl,      d, vl),
276     INSN(pslld,        66,   0f, f2,    el_4,    d, vl),
277     INSNX(pslld,       66,   0f, 72, 6, vl,      d, vl),
278     INSN(psllq,        66,   0f, f3,    el_2,    q, vl),
279     INSNX(psllq,       66,   0f, 73, 6, vl,      q, vl),
280     INSN(psllv,        66, 0f38, 47,    vl,     dq, vl),
281     INSNX(psra,        66,   0f, 72, 4, vl,     dq, vl),
282     INSN(psrad,        66,   0f, e2,    el_4,    d, vl),
283     INSN(psraq,        66,   0f, e2,    el_2,    q, vl),
284     INSN(psrav,        66, 0f38, 46,    vl,     dq, vl),
285     INSN(psrld,        66,   0f, d2,    el_4,    d, vl),
286     INSNX(psrld,       66,   0f, 72, 2, vl,      d, vl),
287     INSN(psrlq,        66,   0f, d3,    el_2,    q, vl),
288     INSNX(psrlq,       66,   0f, 73, 2, vl,      q, vl),
289     INSN(psrlv,        66, 0f38, 45,    vl,     dq, vl),
290     INSN(psubd,        66,   0f, fa,    vl,      d, vl),
291     INSN(psubq,        66,   0f, fb,    vl,      q, vl),
292     INSN(pternlog,     66, 0f3a, 25,    vl,     dq, vl),
293     INSN(ptestm,       66, 0f38, 27,    vl,     dq, vl),
294     INSN(ptestnm,      f3, 0f38, 27,    vl,     dq, vl),
295     INSN(punpckhdq,    66,   0f, 6a,    vl,      d, vl),
296     INSN(punpckhqdq,   66,   0f, 6d,    vl,      q, vl),
297     INSN(punpckldq,    66,   0f, 62,    vl,      d, vl),
298     INSN(punpcklqdq,   66,   0f, 6c,    vl,      q, vl),
299     INSN(pxor,         66,   0f, ef,    vl,     dq, vl),
300     INSN(rcp14,        66, 0f38, 4c,    vl,     sd, vl),
301     INSN(rcp14,        66, 0f38, 4d,    el,     sd, el),
302     INSN(rndscalepd,   66, 0f3a, 09,    vl,      q, vl),
303     INSN(rndscaleps,   66, 0f3a, 08,    vl,      d, vl),
304     INSN(rndscalesd,   66, 0f3a, 0b,    el,      q, el),
305     INSN(rndscaless,   66, 0f3a, 0a,    el,      d, el),
306     INSN(rsqrt14,      66, 0f38, 4e,    vl,     sd, vl),
307     INSN(rsqrt14,      66, 0f38, 4f,    el,     sd, el),
308     INSN(scalef,       66, 0f38, 2c,    vl,     sd, vl),
309     INSN(scalef,       66, 0f38, 2d,    el,     sd, el),
310     INSN(scatterd,     66, 0f38, a2,    vl,     sd, el),
311     INSN(scatterq,     66, 0f38, a3,    vl,     sd, el),
312     INSN_PFP(shuf,           0f, c6),
313     INSN_FP(sqrt,            0f, 51),
314     INSN_FP(sub,             0f, 5c),
315     INSN(ucomisd,      66,   0f, 2e,    el,      q, el),
316     INSN(ucomiss,        ,   0f, 2e,    el,      d, el),
317     INSN_PFP(unpckh,         0f, 15),
318     INSN_PFP(unpckl,         0f, 14),
319 };
320 
321 static const struct test avx512f_128[] = {
322     INSN(extractps, 66, 0f3a, 17, el,    d, el),
323     INSN(insertps,  66, 0f3a, 21, el,    d, el),
324     INSN(mov,       66,   0f, 6e, el, dq64, el),
325     INSN(mov,       66,   0f, 7e, el, dq64, el),
326 //       movhlps,     ,   0f, 12,        d
327     INSN(movhpd,    66,   0f, 16, el,    q, vl),
328     INSN(movhpd,    66,   0f, 17, el,    q, vl),
329     INSN(movhps,      ,   0f, 16, el_2,  d, vl),
330     INSN(movhps,      ,   0f, 17, el_2,  d, vl),
331 //       movlhps,     ,   0f, 16,        d
332     INSN(movlpd,    66,   0f, 12, el,    q, vl),
333     INSN(movlpd,    66,   0f, 13, el,    q, vl),
334     INSN(movlps,      ,   0f, 12, el_2,  d, vl),
335     INSN(movlps,      ,   0f, 13, el_2,  d, vl),
336     INSN(movq,      f3,   0f, 7e, el,    q, el),
337     INSN(movq,      66,   0f, d6, el,    q, el),
338 };
339 
340 static const struct test avx512f_no128[] = {
341     INSN(broadcastf32x4, 66, 0f38, 1a, el_4,  d, vl),
342     INSN(broadcasti32x4, 66, 0f38, 5a, el_4,  d, vl),
343     INSN(broadcastsd,    66, 0f38, 19, el,    q, el),
344     INSN(extractf32x4,   66, 0f3a, 19, el_4,  d, vl),
345     INSN(extracti32x4,   66, 0f3a, 39, el_4,  d, vl),
346     INSN(insertf32x4,    66, 0f3a, 18, el_4,  d, vl),
347     INSN(inserti32x4,    66, 0f3a, 38, el_4,  d, vl),
348     INSN(perm,           66, 0f38, 36, vl,   dq, vl),
349     INSN(perm,           66, 0f38, 16, vl,   sd, vl),
350     INSN(permpd,         66, 0f3a, 01, vl,    q, vl),
351     INSN(permq,          66, 0f3a, 00, vl,    q, vl),
352     INSN(shuff32x4,      66, 0f3a, 23, vl,    d, vl),
353     INSN(shuff64x2,      66, 0f3a, 23, vl,    q, vl),
354     INSN(shufi32x4,      66, 0f3a, 43, vl,    d, vl),
355     INSN(shufi64x2,      66, 0f3a, 43, vl,    q, vl),
356 };
357 
358 static const struct test avx512f_512[] = {
359     INSN(broadcastf64x4, 66, 0f38, 1b, el_4, q, vl),
360     INSN(broadcasti64x4, 66, 0f38, 5b, el_4, q, vl),
361     INSN(extractf64x4,   66, 0f3a, 1b, el_4, q, vl),
362     INSN(extracti64x4,   66, 0f3a, 3b, el_4, q, vl),
363     INSN(insertf64x4,    66, 0f3a, 1a, el_4, q, vl),
364     INSN(inserti64x4,    66, 0f3a, 3a, el_4, q, vl),
365 };
366 
367 static const struct test avx512bw_all[] = {
368     INSN(dbpsadbw,    66, 0f3a, 42,    vl,    b, vl),
369     INSN(movdqu8,     f2,   0f, 6f,    vl,    b, vl),
370     INSN(movdqu8,     f2,   0f, 7f,    vl,    b, vl),
371     INSN(movdqu16,    f2,   0f, 6f,    vl,    w, vl),
372     INSN(movdqu16,    f2,   0f, 7f,    vl,    w, vl),
373     INSN(pabsb,       66, 0f38, 1c,    vl,    b, vl),
374     INSN(pabsw,       66, 0f38, 1d,    vl,    w, vl),
375     INSN(packssdw,    66,   0f, 6b,    vl, d_nb, vl),
376     INSN(packsswb,    66,   0f, 63,    vl,    w, vl),
377     INSN(packusdw,    66, 0f38, 2b,    vl, d_nb, vl),
378     INSN(packuswb,    66,   0f, 67,    vl,    w, vl),
379     INSN(paddb,       66,   0f, fc,    vl,    b, vl),
380     INSN(paddsb,      66,   0f, ec,    vl,    b, vl),
381     INSN(paddsw,      66,   0f, ed,    vl,    w, vl),
382     INSN(paddusb,     66,   0f, dc,    vl,    b, vl),
383     INSN(paddusw,     66,   0f, dd,    vl,    w, vl),
384     INSN(paddw,       66,   0f, fd,    vl,    w, vl),
385     INSN(palignr,     66, 0f3a, 0f,    vl,    b, vl),
386     INSN(pavgb,       66,   0f, e0,    vl,    b, vl),
387     INSN(pavgw,       66,   0f, e3,    vl,    w, vl),
388     INSN(pblendm,     66, 0f38, 66,    vl,   bw, vl),
389     INSN(pbroadcastb, 66, 0f38, 78,    el,    b, el),
390 //       pbroadcastb, 66, 0f38, 7a,           b
391     INSN(pbroadcastw, 66, 0f38, 79,    el_2,  b, vl),
392 //       pbroadcastw, 66, 0f38, 7b,           b
393     INSN(pcmp,        66, 0f3a, 3f,    vl,   bw, vl),
394     INSN(pcmpeqb,     66,   0f, 74,    vl,    b, vl),
395     INSN(pcmpeqw,     66,   0f, 75,    vl,    w, vl),
396     INSN(pcmpgtb,     66,   0f, 64,    vl,    b, vl),
397     INSN(pcmpgtw,     66,   0f, 65,    vl,    w, vl),
398     INSN(pcmpu,       66, 0f3a, 3e,    vl,   bw, vl),
399     INSN(permw,       66, 0f38, 8d,    vl,    w, vl),
400     INSN(permi2w,     66, 0f38, 75,    vl,    w, vl),
401     INSN(permt2w,     66, 0f38, 7d,    vl,    w, vl),
402     INSN(pmaddubsw,   66, 0f38, 04,    vl,    b, vl),
403     INSN(pmaddwd,     66,   0f, f5,    vl,    w, vl),
404     INSN(pmaxsb,      66, 0f38, 3c,    vl,    b, vl),
405     INSN(pmaxsw,      66,   0f, ee,    vl,    w, vl),
406     INSN(pmaxub,      66,   0f, de,    vl,    b, vl),
407     INSN(pmaxuw,      66, 0f38, 3e,    vl,    w, vl),
408     INSN(pminsb,      66, 0f38, 38,    vl,    b, vl),
409     INSN(pminsw,      66,   0f, ea,    vl,    w, vl),
410     INSN(pminub,      66,   0f, da,    vl,    b, vl),
411     INSN(pminuw,      66, 0f38, 3a,    vl,    w, vl),
412 //       pmovb2m,     f3, 0f38, 29,           b
413 //       pmovm2,      f3, 0f38, 28,          bw
414     INSN(pmovswb,     f3, 0f38, 20,    vl_2,  b, vl),
415     INSN(pmovsxbw,    66, 0f38, 20,    vl_2,  b, vl),
416     INSN(pmovuswb,    f3, 0f38, 10,    vl_2,  b, vl),
417 //       pmovw2m,     f3, 0f38, 29,           w
418     INSN(pmovwb,      f3, 0f38, 30,    vl_2,  b, vl),
419     INSN(pmovzxbw,    66, 0f38, 30,    vl_2,  b, vl),
420     INSN(pmulhrsw,    66, 0f38, 0b,    vl,    w, vl),
421     INSN(pmulhuw,     66,   0f, e4,    vl,    w, vl),
422     INSN(pmulhw,      66,   0f, e5,    vl,    w, vl),
423     INSN(pmullw,      66,   0f, d5,    vl,    w, vl),
424     INSN(psadbw,      66,   0f, f6,    vl,    b, vl),
425     INSN(pshufb,      66, 0f38, 00,    vl,    b, vl),
426     INSN(pshufhw,     f3,   0f, 70,    vl,    w, vl),
427     INSN(pshuflw,     f2,   0f, 70,    vl,    w, vl),
428     INSNX(pslldq,     66,   0f, 73, 7, vl,    b, vl),
429     INSN(psllvw,      66, 0f38, 12,    vl,    w, vl),
430     INSN(psllw,       66,   0f, f1,    el_8,  w, vl),
431     INSNX(psllw,      66,   0f, 71, 6, vl,    w, vl),
432     INSN(psravw,      66, 0f38, 11,    vl,    w, vl),
433     INSN(psraw,       66,   0f, e1,    el_8,  w, vl),
434     INSNX(psraw,      66,   0f, 71, 4, vl,    w, vl),
435     INSNX(psrldq,     66,   0f, 73, 3, vl,    b, vl),
436     INSN(psrlvw,      66, 0f38, 10,    vl,    w, vl),
437     INSN(psrlw,       66,   0f, d1,    el_8,  w, vl),
438     INSNX(psrlw,      66,   0f, 71, 2, vl,    w, vl),
439     INSN(psubb,       66,   0f, f8,    vl,    b, vl),
440     INSN(psubsb,      66,   0f, e8,    vl,    b, vl),
441     INSN(psubsw,      66,   0f, e9,    vl,    w, vl),
442     INSN(psubusb,     66,   0f, d8,    vl,    b, vl),
443     INSN(psubusw,     66,   0f, d9,    vl,    w, vl),
444     INSN(psubw,       66,   0f, f9,    vl,    w, vl),
445     INSN(ptestm,      66, 0f38, 26,    vl,   bw, vl),
446     INSN(ptestnm,     f3, 0f38, 26,    vl,   bw, vl),
447     INSN(punpckhbw,   66,   0f, 68,    vl,    b, vl),
448     INSN(punpckhwd,   66,   0f, 69,    vl,    w, vl),
449     INSN(punpcklbw,   66,   0f, 60,    vl,    b, vl),
450     INSN(punpcklwd,   66,   0f, 61,    vl,    w, vl),
451 };
452 
453 static const struct test avx512bw_128[] = {
454     INSN(pextrb, 66, 0f3a, 14, el, b, el),
455 //       pextrw, 66,   0f, c5,     w
456     INSN(pextrw, 66, 0f3a, 15, el, w, el),
457     INSN(pinsrb, 66, 0f3a, 20, el, b, el),
458     INSN(pinsrw, 66,   0f, c4, el, w, el),
459 };
460 
461 static const struct test avx512cd_all[] = {
462 //       pbroadcastmb2q, f3, 0f38, 2a,      q
463 //       pbroadcastmw2d, f3, 0f38, 3a,      d
464     INSN(pconflict,      66, 0f38, c4, vl, dq, vl),
465     INSN(plzcnt,         66, 0f38, 44, vl, dq, vl),
466 };
467 
468 static const struct test avx512dq_all[] = {
469     INSN_PFP(and,              0f, 54),
470     INSN_PFP(andn,             0f, 55),
471     INSN(broadcasti32x2, 66, 0f38, 59, el_2,  d, vl),
472     INSN(cvtpd2qq,       66,   0f, 7b,   vl,  q, vl),
473     INSN(cvtpd2uqq,      66,   0f, 79,   vl,  q, vl),
474     INSN(cvtps2qq,       66,   0f, 7b, vl_2,  d, vl),
475     INSN(cvtps2uqq,      66,   0f, 79, vl_2,  d, vl),
476     INSN(cvtqq2pd,       f3,   0f, e6,   vl,  q, vl),
477     INSN(cvtqq2ps,         ,   0f, 5b,   vl,  q, vl),
478     INSN(cvttpd2qq,      66,   0f, 7a,   vl,  q, vl),
479     INSN(cvttpd2uqq,     66,   0f, 78,   vl,  q, vl),
480     INSN(cvttps2qq,      66,   0f, 7a, vl_2,  d, vl),
481     INSN(cvttps2uqq,     66,   0f, 78, vl_2,  d, vl),
482     INSN(cvtuqq2pd,      f3,   0f, 7a,   vl,  q, vl),
483     INSN(cvtuqq2ps,      f2,   0f, 7a,   vl,  q, vl),
484     INSN(fpclass,        66, 0f3a, 66,   vl, sd, vl),
485     INSN(fpclass,        66, 0f3a, 67,   el, sd, el),
486     INSN_PFP(or,               0f, 56),
487 //       pmovd2m,        f3, 0f38, 39,        d
488 //       pmovm2,         f3, 0f38, 38,       dq
489 //       pmovq2m,        f3, 0f38, 39,        q
490     INSN(pmullq,         66, 0f38, 40,   vl,  q, vl),
491     INSN(range,          66, 0f3a, 50,   vl, sd, vl),
492     INSN(range,          66, 0f3a, 51,   el, sd, el),
493     INSN(reduce,         66, 0f3a, 56,   vl, sd, vl),
494     INSN(reduce,         66, 0f3a, 57,   el, sd, el),
495     INSN_PFP(xor,              0f, 57),
496 };
497 
498 static const struct test avx512dq_128[] = {
499     INSN(pextr, 66, 0f3a, 16, el, dq64, el),
500     INSN(pinsr, 66, 0f3a, 22, el, dq64, el),
501 };
502 
503 static const struct test avx512dq_no128[] = {
504     INSN(broadcastf32x2, 66, 0f38, 19, el_2, d, vl),
505     INSN(broadcastf64x2, 66, 0f38, 1a, el_2, q, vl),
506     INSN(broadcasti64x2, 66, 0f38, 5a, el_2, q, vl),
507     INSN(extractf64x2,   66, 0f3a, 19, el_2, q, vl),
508     INSN(extracti64x2,   66, 0f3a, 39, el_2, q, vl),
509     INSN(insertf64x2,    66, 0f3a, 18, el_2, q, vl),
510     INSN(inserti64x2,    66, 0f3a, 38, el_2, q, vl),
511 };
512 
513 static const struct test avx512dq_512[] = {
514     INSN(broadcastf32x8, 66, 0f38, 1b, el_8, d, vl),
515     INSN(broadcasti32x8, 66, 0f38, 5b, el_8, d, vl),
516     INSN(extractf32x8,   66, 0f3a, 1b, el_8, d, vl),
517     INSN(extracti32x8,   66, 0f3a, 3b, el_8, d, vl),
518     INSN(insertf32x8,    66, 0f3a, 1a, el_8, d, vl),
519     INSN(inserti32x8,    66, 0f3a, 3a, el_8, d, vl),
520 };
521 
522 static const struct test avx512er_512[] = {
523     INSN(exp2,    66, 0f38, c8, vl, sd, vl),
524     INSN(rcp28,   66, 0f38, ca, vl, sd, vl),
525     INSN(rcp28,   66, 0f38, cb, el, sd, el),
526     INSN(rsqrt28, 66, 0f38, cc, vl, sd, vl),
527     INSN(rsqrt28, 66, 0f38, cd, el, sd, el),
528 };
529 
530 static const struct test avx512pf_512[] = {
531     INSNX(gatherpf0d,  66, 0f38, c6, 1, vl, sd, el),
532     INSNX(gatherpf0q,  66, 0f38, c7, 1, vl, sd, el),
533     INSNX(gatherpf1d,  66, 0f38, c6, 2, vl, sd, el),
534     INSNX(gatherpf1q,  66, 0f38, c7, 2, vl, sd, el),
535     INSNX(scatterpf0d, 66, 0f38, c6, 5, vl, sd, el),
536     INSNX(scatterpf0q, 66, 0f38, c7, 5, vl, sd, el),
537     INSNX(scatterpf1d, 66, 0f38, c6, 6, vl, sd, el),
538     INSNX(scatterpf1q, 66, 0f38, c7, 6, vl, sd, el),
539 };
540 
541 static const struct test avx512_4fmaps_512[] = {
542     INSN(4fmaddps,  f2, 0f38, 9a, el_4, d, vl),
543     INSN(4fmaddss,  f2, 0f38, 9b, el_4, d, vl),
544     INSN(4fnmaddps, f2, 0f38, aa, el_4, d, vl),
545     INSN(4fnmaddss, f2, 0f38, ab, el_4, d, vl),
546 };
547 
548 static const struct test avx512_4vnniw_512[] = {
549     INSN(p4dpwssd,  f2, 0f38, 52, el_4, d, vl),
550     INSN(p4dpwssds, f2, 0f38, 53, el_4, d, vl),
551 };
552 
553 static const struct test avx512_bf16_all[] = {
554     INSN(vcvtne2ps2bf16, f2, 0f38, 72, vl, d, vl),
555     INSN(vcvtneps2bf16,  f3, 0f38, 72, vl, d, vl),
556     INSN(vdpbf16ps,      f3, 0f38, 52, vl, d, vl),
557 };
558 
559 static const struct test avx512_bitalg_all[] = {
560     INSN(popcnt,      66, 0f38, 54, vl, bw, vl),
561     INSN(pshufbitqmb, 66, 0f38, 8f, vl,  b, vl),
562 };
563 
564 static const struct test avx512_ifma_all[] = {
565     INSN(pmadd52huq, 66, 0f38, b5, vl, q, vl),
566     INSN(pmadd52luq, 66, 0f38, b4, vl, q, vl),
567 };
568 
569 static const struct test avx512_vbmi_all[] = {
570     INSN(permb,         66, 0f38, 8d, vl, b, vl),
571     INSN(permi2b,       66, 0f38, 75, vl, b, vl),
572     INSN(permt2b,       66, 0f38, 7d, vl, b, vl),
573     INSN(pmultishiftqb, 66, 0f38, 83, vl, q, vl),
574 };
575 
576 static const struct test avx512_vbmi2_all[] = {
577     INSN(pcompress, 66, 0f38, 63, vl, bw, el),
578     INSN(pexpand,   66, 0f38, 62, vl, bw, el),
579     INSN(pshld,     66, 0f3a, 71, vl, dq, vl),
580     INSN(pshldv,    66, 0f38, 71, vl, dq, vl),
581     INSN(pshldvw,   66, 0f38, 70, vl,  w, vl),
582     INSN(pshldw,    66, 0f3a, 70, vl,  w, vl),
583     INSN(pshrd,     66, 0f3a, 73, vl, dq, vl),
584     INSN(pshrdv,    66, 0f38, 73, vl, dq, vl),
585     INSN(pshrdvw,   66, 0f38, 72, vl,  w, vl),
586     INSN(pshrdw,    66, 0f3a, 72, vl,  w, vl),
587 };
588 
589 static const struct test avx512_vnni_all[] = {
590     INSN(pdpbusd,  66, 0f38, 50, vl, d, vl),
591     INSN(pdpbusds, 66, 0f38, 51, vl, d, vl),
592     INSN(pdpwssd,  66, 0f38, 52, vl, d, vl),
593     INSN(pdpwssds, 66, 0f38, 53, vl, d, vl),
594 };
595 
596 static const struct test avx512_vpopcntdq_all[] = {
597     INSN(popcnt, 66, 0f38, 55, vl, dq, vl)
598 };
599 
600 static const struct test gfni_all[] = {
601     INSN(gf2p8affineinvqb, 66, 0f3a, cf, vl, q, vl),
602     INSN(gf2p8affineqb,    66, 0f3a, ce, vl, q, vl),
603     INSN(gf2p8mulb,        66, 0f38, cf, vl, b, vl),
604 };
605 
606 /*
607  * The uses of b in this table are simply (one of) the shortest form(s) of
608  * saying "no broadcast" without introducing a 128-bit granularity enumerator.
609  * Due to all of the insns being WIG, w, d_nb, and q_nb would all also fit.
610  */
611 static const struct test vaes_all[] = {
612     INSN(aesdec,     66, 0f38, de, vl, b, vl),
613     INSN(aesdeclast, 66, 0f38, df, vl, b, vl),
614     INSN(aesenc,     66, 0f38, dc, vl, b, vl),
615     INSN(aesenclast, 66, 0f38, dd, vl, b, vl),
616 };
617 
618 static const struct test vpclmulqdq_all[] = {
619     INSN(pclmulqdq, 66, 0f3a, 44, vl, q_nb, vl)
620 };
621 
622 static const unsigned char vl_all[] = { VL_512, VL_128, VL_256 };
623 static const unsigned char vl_128[] = { VL_128 };
624 static const unsigned char vl_no128[] = { VL_512, VL_256 };
625 static const unsigned char vl_512[] = { VL_512 };
626 
627 /*
628  * This table, indicating the presence of an immediate (byte) for an opcode
629  * space 0f major opcode, is indexed by high major opcode byte nibble, with
630  * each table element then bit-indexed by low major opcode byte nibble.
631  */
632 static const uint16_t imm0f[16] = {
633     [0x7] = (1 << 0x0) /* vpshuf* */ |
634             (1 << 0x1) /* vps{ll,ra,rl}w */ |
635             (1 << 0x2) /* vps{l,r}ld, vp{rol,ror,sra}{d,q} */ |
636             (1 << 0x3) /* vps{l,r}l{,d}q */,
637     [0xc] = (1 << 0x2) /* vcmp{p,s}{d,s} */ |
638             (1 << 0x4) /* vpinsrw */ |
639             (1 << 0x5) /* vpextrw */ |
640             (1 << 0x6) /* vshufp{d,s} */,
641 };
642 
643 static struct x86_emulate_ops emulops;
644 
645 /*
646  * Access tracking (by granular) is used on the first 64 bytes of address
647  * space. Instructions get encode with a raw Disp8 value of 1, which then
648  * gets scaled accordingly. Hence accesses below the address <scaling factor>
649  * as well as at or above 2 * <scaling factor> are indications of bugs. To
650  * aid diagnosis / debugging, track all accesses below 3 * <scaling factor>.
651  * With AVX512 the maximum scaling factor is 64.
652  */
653 static unsigned int accessed[3 * 64];
654 
record_access(enum x86_segment seg,unsigned long offset,unsigned int bytes)655 static bool record_access(enum x86_segment seg, unsigned long offset,
656                           unsigned int bytes)
657 {
658     while ( bytes-- )
659     {
660         if ( offset >= ARRAY_SIZE(accessed) )
661             return false;
662         ++accessed[offset++];
663     }
664 
665     return true;
666 }
667 
read(enum x86_segment seg,unsigned long offset,void * p_data,unsigned int bytes,struct x86_emulate_ctxt * ctxt)668 static int read(enum x86_segment seg, unsigned long offset, void *p_data,
669                 unsigned int bytes, struct x86_emulate_ctxt *ctxt)
670 {
671     if ( !record_access(seg, offset, bytes + !bytes) )
672         return X86EMUL_UNHANDLEABLE;
673     memset(p_data, 0, bytes);
674     return X86EMUL_OKAY;
675 }
676 
write(enum x86_segment seg,unsigned long offset,void * p_data,unsigned int bytes,struct x86_emulate_ctxt * ctxt)677 static int write(enum x86_segment seg, unsigned long offset, void *p_data,
678                  unsigned int bytes, struct x86_emulate_ctxt *ctxt)
679 {
680     if ( !record_access(seg, offset, bytes + !bytes) )
681         return X86EMUL_UNHANDLEABLE;
682     return X86EMUL_OKAY;
683 }
684 
test_one(const struct test * test,enum vl vl,unsigned char * instr,struct x86_emulate_ctxt * ctxt)685 static void test_one(const struct test *test, enum vl vl,
686                      unsigned char *instr, struct x86_emulate_ctxt *ctxt)
687 {
688     unsigned int vsz, esz, i, n;
689     int rc;
690     bool sg = strstr(test->mnemonic, "gather") ||
691               strstr(test->mnemonic, "scatter");
692     bool imm = test->spc == SPC_0f3a ||
693                (test->spc == SPC_0f &&
694                 (imm0f[test->opc >> 4] & (1 << (test->opc & 0xf))));
695     union evex {
696         uint8_t raw[3];
697         struct {
698             uint8_t opcx:2;
699             uint8_t mbz:2;
700             uint8_t R:1;
701             uint8_t b:1;
702             uint8_t x:1;
703             uint8_t r:1;
704             uint8_t pfx:2;
705             uint8_t mbs:1;
706             uint8_t reg:4;
707             uint8_t w:1;
708             uint8_t opmsk:3;
709             uint8_t RX:1;
710             uint8_t bcst:1;
711             uint8_t lr:2;
712             uint8_t z:1;
713         };
714     } evex = {
715         .opcx = test->spc, .pfx = test->pfx, .lr = vl,
716         .R = 1, .b = 1, .x = 1, .r = 1, .mbs = 1,
717         .reg = 0xf, .RX = 1, .opmsk = sg,
718     };
719 
720     switch ( test->esz )
721     {
722     case ESZ_b:
723         esz = 1;
724         break;
725 
726     case ESZ_w:
727         esz = 2;
728         evex.w = 1;
729         break;
730 
731 #ifdef __i386__
732     case ESZ_d_WIG:
733         evex.w = 1;
734         /* fall through */
735 #endif
736     case ESZ_d: case ESZ_d_nb:
737         esz = 4;
738         break;
739 
740     case ESZ_q: case ESZ_q_nb:
741         esz = 8;
742         evex.w = 1;
743         break;
744 
745     default:
746         ASSERT_UNREACHABLE();
747     }
748 
749     switch ( test->vsz )
750     {
751     case VSZ_vl:
752         vsz = 16 << vl;
753         break;
754 
755     case VSZ_vl_2:
756         vsz = 8 << vl;
757         break;
758 
759     case VSZ_vl_4:
760         vsz = 4 << vl;
761         break;
762 
763     case VSZ_vl_8:
764         vsz = 2 << vl;
765         break;
766 
767     case VSZ_el:
768         vsz = esz;
769         break;
770 
771     case VSZ_el_2:
772         vsz = esz * 2;
773         break;
774 
775     case VSZ_el_4:
776         vsz = esz * 4;
777         break;
778 
779     case VSZ_el_8:
780         vsz = esz * 8;
781         break;
782 
783     default:
784         ASSERT_UNREACHABLE();
785     }
786 
787     /*
788      * Note: SIB addressing is used here, such that S/G insns can be handled
789      * without extra conditionals.
790      */
791     instr[0] = 0x62;
792     instr[1] = evex.raw[0];
793     instr[2] = evex.raw[1];
794     instr[3] = evex.raw[2];
795     instr[4] = test->opc;
796     instr[5] = 0x44 | (test->ext << 3); /* ModR/M */
797     instr[6] = 0x22; /* SIB: base rDX, index none / xMM4 */
798     instr[7] = 1; /* Disp8 */
799     instr[8] = 0; /* immediate, if any */
800 
801     asm volatile ( "kxnorw %k1, %k1, %k1" );
802     asm volatile ( "vxorps %xmm4, %xmm4, %xmm4" );
803 
804     ctxt->regs->eip = (unsigned long)&instr[0];
805     ctxt->regs->edx = 0;
806     memset(accessed, 0, sizeof(accessed));
807 
808     rc = x86_emulate(ctxt, &emulops);
809     if ( rc != X86EMUL_OKAY ||
810          (ctxt->regs->eip != (unsigned long)&instr[8 + imm]) )
811         goto fail;
812 
813     for ( i = 0; i < (test->scale == SC_vl ? vsz : esz); ++i )
814          if ( accessed[i] )
815              goto fail;
816 
817     n = test->scale == SC_vl ? vsz : esz;
818     if ( !sg )
819         n += vsz;
820     else if ( !strstr(test->mnemonic, "pf") )
821         n += esz;
822     else
823         ++n;
824 
825     for ( ; i < n; ++i )
826          if ( accessed[i] != (sg ? (vsz / esz) >> (test->opc & 1 & !evex.w)
827                                  : 1) )
828              goto fail;
829 
830     for ( ; i < ARRAY_SIZE(accessed); ++i )
831          if ( accessed[i] )
832              goto fail;
833 
834     /* Also check the broadcast case, if available. */
835     if ( test->vsz >= VSZ_el || test->scale != SC_vl )
836         return;
837 
838     switch ( test->esz )
839     {
840     case ESZ_d_nb: case ESZ_q_nb:
841     case ESZ_b: case ESZ_w: case ESZ_bw:
842         return;
843 
844     case ESZ_d: case ESZ_q:
845         break;
846 
847     default:
848         ASSERT_UNREACHABLE();
849     }
850 
851     evex.bcst = 1;
852     instr[3] = evex.raw[2];
853 
854     ctxt->regs->eip = (unsigned long)&instr[0];
855     memset(accessed, 0, sizeof(accessed));
856 
857     rc = x86_emulate(ctxt, &emulops);
858     if ( rc != X86EMUL_OKAY ||
859          (ctxt->regs->eip != (unsigned long)&instr[8 + imm]) )
860         goto fail;
861 
862     for ( i = 0; i < esz; ++i )
863          if ( accessed[i] )
864              goto fail;
865     for ( ; i < esz * 2; ++i )
866          if ( accessed[i] != 1 )
867              goto fail;
868     for ( ; i < ARRAY_SIZE(accessed); ++i )
869          if ( accessed[i] )
870              goto fail;
871 
872     return;
873 
874  fail:
875     printf("failed (v%s%s %u-bit)\n", test->mnemonic,
876            evex.bcst ? "/bcst" : "", 128 << vl);
877     exit(1);
878 }
879 
test_pair(const struct test * tmpl,enum vl vl,enum esz esz1,const char * suffix1,enum esz esz2,const char * suffix2,unsigned char * instr,struct x86_emulate_ctxt * ctxt)880 static void test_pair(const struct test *tmpl, enum vl vl,
881                       enum esz esz1, const char *suffix1,
882                       enum esz esz2, const char *suffix2,
883                       unsigned char *instr, struct x86_emulate_ctxt *ctxt)
884 {
885     struct test test = *tmpl;
886     char mnemonic[24];
887 
888     test.esz = esz1;
889     snprintf(mnemonic, ARRAY_SIZE(mnemonic), "%s%s", tmpl->mnemonic, suffix1);
890     test.mnemonic = mnemonic;
891     test_one(&test, vl, instr, ctxt);
892 
893     test.esz = esz2;
894     snprintf(mnemonic, ARRAY_SIZE(mnemonic), "%s%s", tmpl->mnemonic, suffix2);
895     test.mnemonic = mnemonic;
896     test_one(&test, vl, instr, ctxt);
897 }
898 
test_group(const struct test tests[],unsigned int nr_test,const unsigned char vl[],unsigned int nr_vl,void * instr,struct x86_emulate_ctxt * ctxt)899 static void test_group(const struct test tests[], unsigned int nr_test,
900                        const unsigned char vl[], unsigned int nr_vl,
901                        void *instr, struct x86_emulate_ctxt *ctxt)
902 {
903     unsigned int i, j;
904 
905     for ( i = 0; i < nr_test; ++i )
906     {
907         for ( j = 0; j < nr_vl; ++j )
908         {
909             if ( vl[0] == VL_512 && vl[j] != VL_512 && !cpu_has_avx512vl )
910                 continue;
911 
912             switch ( tests[i].esz )
913             {
914             case ESZ_q_nb:
915                 /* The 128-bit form of VMOVDDUP needs special casing. */
916                 if ( vl[j] == VL_128 && tests[i].spc == SPC_0f &&
917                      tests[i].opc == 0x12 && tests[i].pfx == PFX_f2 )
918                 {
919                     struct test test = tests[i];
920 
921                     test.vsz = VSZ_el;
922                     test.scale = SC_el;
923                     test_one(&test, vl[j], instr, ctxt);
924                     continue;
925                 }
926                 /* fall through */
927             default:
928                 test_one(&tests[i], vl[j], instr, ctxt);
929                 break;
930 
931             case ESZ_bw:
932                 test_pair(&tests[i], vl[j], ESZ_b, "b", ESZ_w, "w",
933                           instr, ctxt);
934                 break;
935 
936             case ESZ_dq:
937                 test_pair(&tests[i], vl[j], ESZ_d,
938                           strncmp(tests[i].mnemonic, "cvt", 3) ? "d" : "l",
939                           ESZ_q, "q", instr, ctxt);
940                 break;
941 
942 #ifdef __i386__
943             case ESZ_d_WIG:
944                 test_pair(&tests[i], vl[j], ESZ_d, "/W0",
945                           ESZ_d_WIG, "/W1", instr, ctxt);
946                 break;
947 #endif
948 
949             case ESZ_sd:
950                 test_pair(&tests[i], vl[j],
951                           ESZ_d, tests[i].vsz < VSZ_el ? "ps" : "ss",
952                           ESZ_q, tests[i].vsz < VSZ_el ? "pd" : "sd",
953                           instr, ctxt);
954                 break;
955             }
956         }
957     }
958 }
959 
evex_disp8_test(void * instr,struct x86_emulate_ctxt * ctxt,const struct x86_emulate_ops * ops)960 void evex_disp8_test(void *instr, struct x86_emulate_ctxt *ctxt,
961                      const struct x86_emulate_ops *ops)
962 {
963     emulops = *ops;
964     emulops.read = read;
965     emulops.write = write;
966 
967 #define RUN(feat, vl) do { \
968     if ( cpu_has_##feat ) \
969     { \
970         printf("%-40s", "Testing " #feat "/" #vl " disp8 handling..."); \
971         test_group(feat ## _ ## vl, ARRAY_SIZE(feat ## _ ## vl), \
972                    vl_ ## vl, ARRAY_SIZE(vl_ ## vl), instr, ctxt); \
973         printf("okay\n"); \
974     } \
975 } while ( false )
976 
977     RUN(avx512f, all);
978     RUN(avx512f, 128);
979     RUN(avx512f, no128);
980     RUN(avx512f, 512);
981     RUN(avx512bw, all);
982     RUN(avx512bw, 128);
983     RUN(avx512cd, all);
984     RUN(avx512dq, all);
985     RUN(avx512dq, 128);
986     RUN(avx512dq, no128);
987     RUN(avx512dq, 512);
988     RUN(avx512er, 512);
989 #define cpu_has_avx512pf cpu_has_avx512f
990     RUN(avx512pf, 512);
991     RUN(avx512_4fmaps, 512);
992     RUN(avx512_4vnniw, 512);
993     RUN(avx512_bf16, all);
994     RUN(avx512_bitalg, all);
995     RUN(avx512_ifma, all);
996     RUN(avx512_vbmi, all);
997     RUN(avx512_vbmi2, all);
998     RUN(avx512_vnni, all);
999     RUN(avx512_vpopcntdq, all);
1000 
1001     if ( cpu_has_avx512f )
1002     {
1003         RUN(gfni, all);
1004         RUN(vaes, all);
1005         RUN(vpclmulqdq, all);
1006     }
1007 }
1008