1 #include "x86-emulate.h"
2
3 #include <stdarg.h>
4 #include <stdio.h>
5
6 struct test {
7 const char *mnemonic;
8 unsigned int opc:8;
9 unsigned int spc:2;
10 unsigned int pfx:2;
11 unsigned int vsz:3;
12 unsigned int esz:4;
13 unsigned int scale:1;
14 unsigned int ext:3;
15 };
16
17 enum spc {
18 SPC_invalid,
19 SPC_0f,
20 SPC_0f38,
21 SPC_0f3a,
22 };
23
24 enum pfx {
25 PFX_,
26 PFX_66,
27 PFX_f3,
28 PFX_f2
29 };
30
31 enum vl {
32 VL_128,
33 VL_256,
34 VL_512,
35 };
36
37 enum scale { /* scale by memory operand ... */
38 SC_vl, /* ... vector length */
39 SC_el, /* ... element length */
40 };
41
42 /*
43 * Vector size is determined either from EVEX.L'L (VL) or vector
44 * element size (EL), often controlled by EVEX.W (see enum esz).
45 */
46 enum vsz {
47 VSZ_vl,
48 VSZ_vl_2, /* VL / 2 */
49 VSZ_vl_4, /* VL / 4 */
50 VSZ_vl_8, /* VL / 8 */
51 /* "no broadcast" implied from here on. */
52 VSZ_el,
53 VSZ_el_2, /* EL * 2 */
54 VSZ_el_4, /* EL * 4 */
55 VSZ_el_8, /* EL * 8 */
56 };
57
58 /*
59 * Vector element size is either an opcode attribute or often determined
60 * by EVEX.W (in which case enumerators below name two sizes). Instructions
61 * accessing GPRs often use EVEX.W to select between 32- and 64-bit GPR
62 * width, but this distinction goes away outside of 64-bit mode (and EVEX.W
63 * is ignored there).
64 */
65 enum esz {
66 ESZ_d,
67 ESZ_q,
68 ESZ_dq,
69 ESZ_sd,
70 ESZ_d_nb,
71 ESZ_q_nb,
72 /* "no broadcast" implied from here on. */
73 #ifdef __i386__
74 ESZ_d_WIG,
75 #endif
76 ESZ_b,
77 ESZ_w,
78 ESZ_bw,
79 };
80
81 #ifndef __i386__
82 # define ESZ_dq64 ESZ_dq
83 #else
84 # define ESZ_dq64 ESZ_d_WIG
85 #endif
86
87 #define INSNX(m, p, sp, o, e, vs, es, sc) { \
88 .mnemonic = #m, .opc = 0x##o, .spc = SPC_##sp, .pfx = PFX_##p, \
89 .vsz = VSZ_##vs, .esz = ESZ_##es, .scale = SC_##sc, .ext = 0##e \
90 }
91 #define INSN(m, p, sp, o, vs, es, sc) INSNX(m, p, sp, o, 0, vs, es, sc)
92 #define INSN_PFP(m, sp, o) \
93 INSN(m##pd, 66, sp, o, vl, q, vl), \
94 INSN(m##ps, , sp, o, vl, d, vl)
95 #define INSN_PFP_NB(m, sp, o) \
96 INSN(m##pd, 66, sp, o, vl, q_nb, vl), \
97 INSN(m##ps, , sp, o, vl, d_nb, vl)
98 #define INSN_SFP(m, sp, o) \
99 INSN(m##sd, f2, sp, o, el, q, el), \
100 INSN(m##ss, f3, sp, o, el, d, el)
101
102 #define INSN_FP(m, sp, o) \
103 INSN_PFP(m, sp, o), \
104 INSN_SFP(m, sp, o)
105
106 static const struct test avx512f_all[] = {
107 INSN_FP(add, 0f, 58),
108 INSN(align, 66, 0f3a, 03, vl, dq, vl),
109 INSN(blendm, 66, 0f38, 65, vl, sd, vl),
110 INSN(broadcastss, 66, 0f38, 18, el, d, el),
111 INSN_FP(cmp, 0f, c2),
112 INSN(comisd, 66, 0f, 2f, el, q, el),
113 INSN(comiss, , 0f, 2f, el, d, el),
114 INSN(compress, 66, 0f38, 8a, vl, sd, el),
115 INSN(cvtdq2pd, f3, 0f, e6, vl_2, d, vl),
116 INSN(cvtdq2ps, , 0f, 5b, vl, d, vl),
117 INSN(cvtpd2dq, f2, 0f, e6, vl, q, vl),
118 INSN(cvtpd2udq, , 0f, 79, vl, q, vl),
119 INSN(cvtpd2ps, 66, 0f, 5a, vl, q, vl),
120 INSN(cvtph2ps, 66, 0f38, 13, vl_2, d_nb, vl),
121 INSN(cvtps2dq, 66, 0f, 5b, vl, d, vl),
122 INSN(cvtps2pd, , 0f, 5a, vl_2, d, vl),
123 INSN(cvtps2ph, 66, 0f3a, 1d, vl_2, d_nb, vl),
124 INSN(cvtps2udq, , 0f, 79, vl, d, vl),
125 INSN(cvtsd2si, f2, 0f, 2d, el, q, el),
126 INSN(cvtsd2usi, f2, 0f, 79, el, q, el),
127 INSN(cvtsd2ss, f2, 0f, 5a, el, q, el),
128 INSN(cvtsi2sd, f2, 0f, 2a, el, dq64, el),
129 INSN(cvtsi2ss, f3, 0f, 2a, el, dq64, el),
130 INSN(cvtss2sd, f3, 0f, 5a, el, d, el),
131 INSN(cvtss2si, f3, 0f, 2d, el, d, el),
132 INSN(cvtss2usi, f3, 0f, 79, el, d, el),
133 INSN(cvttpd2dq, 66, 0f, e6, vl, q, vl),
134 INSN(cvttpd2udq, , 0f, 78, vl, q, vl),
135 INSN(cvttps2dq, f3, 0f, 5b, vl, d, vl),
136 INSN(cvttps2udq, , 0f, 78, vl, d, vl),
137 INSN(cvttsd2si, f2, 0f, 2c, el, q, el),
138 INSN(cvttsd2usi, f2, 0f, 78, el, q, el),
139 INSN(cvttss2si, f3, 0f, 2c, el, d, el),
140 INSN(cvttss2usi, f3, 0f, 78, el, d, el),
141 INSN(cvtudq2pd, f3, 0f, 7a, vl_2, d, vl),
142 INSN(cvtudq2ps, f2, 0f, 7a, vl, d, vl),
143 INSN(cvtusi2sd, f2, 0f, 7b, el, dq64, el),
144 INSN(cvtusi2ss, f3, 0f, 7b, el, dq64, el),
145 INSN_FP(div, 0f, 5e),
146 INSN(expand, 66, 0f38, 88, vl, sd, el),
147 INSN(fixupimm, 66, 0f3a, 54, vl, sd, vl),
148 INSN(fixupimm, 66, 0f3a, 55, el, sd, el),
149 INSN(fmadd132, 66, 0f38, 98, vl, sd, vl),
150 INSN(fmadd132, 66, 0f38, 99, el, sd, el),
151 INSN(fmadd213, 66, 0f38, a8, vl, sd, vl),
152 INSN(fmadd213, 66, 0f38, a9, el, sd, el),
153 INSN(fmadd231, 66, 0f38, b8, vl, sd, vl),
154 INSN(fmadd231, 66, 0f38, b9, el, sd, el),
155 INSN(fmaddsub132, 66, 0f38, 96, vl, sd, vl),
156 INSN(fmaddsub213, 66, 0f38, a6, vl, sd, vl),
157 INSN(fmaddsub231, 66, 0f38, b6, vl, sd, vl),
158 INSN(fmsub132, 66, 0f38, 9a, vl, sd, vl),
159 INSN(fmsub132, 66, 0f38, 9b, el, sd, el),
160 INSN(fmsub213, 66, 0f38, aa, vl, sd, vl),
161 INSN(fmsub213, 66, 0f38, ab, el, sd, el),
162 INSN(fmsub231, 66, 0f38, ba, vl, sd, vl),
163 INSN(fmsub231, 66, 0f38, bb, el, sd, el),
164 INSN(fmsubadd132, 66, 0f38, 97, vl, sd, vl),
165 INSN(fmsubadd213, 66, 0f38, a7, vl, sd, vl),
166 INSN(fmsubadd231, 66, 0f38, b7, vl, sd, vl),
167 INSN(fnmadd132, 66, 0f38, 9c, vl, sd, vl),
168 INSN(fnmadd132, 66, 0f38, 9d, el, sd, el),
169 INSN(fnmadd213, 66, 0f38, ac, vl, sd, vl),
170 INSN(fnmadd213, 66, 0f38, ad, el, sd, el),
171 INSN(fnmadd231, 66, 0f38, bc, vl, sd, vl),
172 INSN(fnmadd231, 66, 0f38, bd, el, sd, el),
173 INSN(fnmsub132, 66, 0f38, 9e, vl, sd, vl),
174 INSN(fnmsub132, 66, 0f38, 9f, el, sd, el),
175 INSN(fnmsub213, 66, 0f38, ae, vl, sd, vl),
176 INSN(fnmsub213, 66, 0f38, af, el, sd, el),
177 INSN(fnmsub231, 66, 0f38, be, vl, sd, vl),
178 INSN(fnmsub231, 66, 0f38, bf, el, sd, el),
179 INSN(gatherd, 66, 0f38, 92, vl, sd, el),
180 INSN(gatherq, 66, 0f38, 93, vl, sd, el),
181 INSN(getexp, 66, 0f38, 42, vl, sd, vl),
182 INSN(getexp, 66, 0f38, 43, el, sd, el),
183 INSN(getmant, 66, 0f3a, 26, vl, sd, vl),
184 INSN(getmant, 66, 0f3a, 27, el, sd, el),
185 INSN_FP(max, 0f, 5f),
186 INSN_FP(min, 0f, 5d),
187 INSN_SFP(mov, 0f, 10),
188 INSN_SFP(mov, 0f, 11),
189 INSN_PFP_NB(mova, 0f, 28),
190 INSN_PFP_NB(mova, 0f, 29),
191 INSN(movddup, f2, 0f, 12, vl, q_nb, vl),
192 INSN(movdqa32, 66, 0f, 6f, vl, d_nb, vl),
193 INSN(movdqa32, 66, 0f, 7f, vl, d_nb, vl),
194 INSN(movdqa64, 66, 0f, 6f, vl, q_nb, vl),
195 INSN(movdqa64, 66, 0f, 7f, vl, q_nb, vl),
196 INSN(movdqu32, f3, 0f, 6f, vl, d_nb, vl),
197 INSN(movdqu32, f3, 0f, 7f, vl, d_nb, vl),
198 INSN(movdqu64, f3, 0f, 6f, vl, q_nb, vl),
199 INSN(movdqu64, f3, 0f, 7f, vl, q_nb, vl),
200 INSN(movntdq, 66, 0f, e7, vl, d_nb, vl),
201 INSN(movntdqa, 66, 0f38, 2a, vl, d_nb, vl),
202 INSN_PFP_NB(movnt, 0f, 2b),
203 INSN(movshdup, f3, 0f, 16, vl, d_nb, vl),
204 INSN(movsldup, f3, 0f, 12, vl, d_nb, vl),
205 INSN_PFP_NB(movu, 0f, 10),
206 INSN_PFP_NB(movu, 0f, 11),
207 INSN_FP(mul, 0f, 59),
208 INSN(pabsd, 66, 0f38, 1e, vl, d, vl),
209 INSN(pabsq, 66, 0f38, 1f, vl, q, vl),
210 INSN(paddd, 66, 0f, fe, vl, d, vl),
211 INSN(paddq, 66, 0f, d4, vl, q, vl),
212 INSN(pand, 66, 0f, db, vl, dq, vl),
213 INSN(pandn, 66, 0f, df, vl, dq, vl),
214 INSN(pblendm, 66, 0f38, 64, vl, dq, vl),
215 // pbroadcast, 66, 0f38, 7c, dq64
216 INSN(pbroadcastd, 66, 0f38, 58, el, d, el),
217 INSN(pbroadcastq, 66, 0f38, 59, el, q, el),
218 INSN(pcmp, 66, 0f3a, 1f, vl, dq, vl),
219 INSN(pcmpeqd, 66, 0f, 76, vl, d, vl),
220 INSN(pcmpeqq, 66, 0f38, 29, vl, q, vl),
221 INSN(pcmpgtd, 66, 0f, 66, vl, d, vl),
222 INSN(pcmpgtq, 66, 0f38, 37, vl, q, vl),
223 INSN(pcmpu, 66, 0f3a, 1e, vl, dq, vl),
224 INSN(pcompress, 66, 0f38, 8b, vl, dq, el),
225 INSN(permi2, 66, 0f38, 76, vl, dq, vl),
226 INSN(permi2, 66, 0f38, 77, vl, sd, vl),
227 INSN(permilpd, 66, 0f38, 0d, vl, q, vl),
228 INSN(permilpd, 66, 0f3a, 05, vl, q, vl),
229 INSN(permilps, 66, 0f38, 0c, vl, d, vl),
230 INSN(permilps, 66, 0f3a, 04, vl, d, vl),
231 INSN(permt2, 66, 0f38, 7e, vl, dq, vl),
232 INSN(permt2, 66, 0f38, 7f, vl, sd, vl),
233 INSN(pexpand, 66, 0f38, 89, vl, dq, el),
234 INSN(pgatherd, 66, 0f38, 90, vl, dq, el),
235 INSN(pgatherq, 66, 0f38, 91, vl, dq, el),
236 INSN(pmaxs, 66, 0f38, 3d, vl, dq, vl),
237 INSN(pmaxu, 66, 0f38, 3f, vl, dq, vl),
238 INSN(pmins, 66, 0f38, 39, vl, dq, vl),
239 INSN(pminu, 66, 0f38, 3b, vl, dq, vl),
240 INSN(pmovdb, f3, 0f38, 31, vl_4, b, vl),
241 INSN(pmovdw, f3, 0f38, 33, vl_2, b, vl),
242 INSN(pmovqb, f3, 0f38, 32, vl_8, b, vl),
243 INSN(pmovqd, f3, 0f38, 35, vl_2, d_nb, vl),
244 INSN(pmovqw, f3, 0f38, 34, vl_4, b, vl),
245 INSN(pmovsdb, f3, 0f38, 21, vl_4, b, vl),
246 INSN(pmovsdw, f3, 0f38, 23, vl_2, b, vl),
247 INSN(pmovsqb, f3, 0f38, 22, vl_8, b, vl),
248 INSN(pmovsqd, f3, 0f38, 25, vl_2, d_nb, vl),
249 INSN(pmovsqw, f3, 0f38, 24, vl_4, b, vl),
250 INSN(pmovsxbd, 66, 0f38, 21, vl_4, b, vl),
251 INSN(pmovsxbq, 66, 0f38, 22, vl_8, b, vl),
252 INSN(pmovsxwd, 66, 0f38, 23, vl_2, w, vl),
253 INSN(pmovsxwq, 66, 0f38, 24, vl_4, w, vl),
254 INSN(pmovsxdq, 66, 0f38, 25, vl_2, d_nb, vl),
255 INSN(pmovusdb, f3, 0f38, 11, vl_4, b, vl),
256 INSN(pmovusdw, f3, 0f38, 13, vl_2, b, vl),
257 INSN(pmovusqb, f3, 0f38, 12, vl_8, b, vl),
258 INSN(pmovusqd, f3, 0f38, 15, vl_2, d_nb, vl),
259 INSN(pmovusqw, f3, 0f38, 14, vl_4, b, vl),
260 INSN(pmovzxbd, 66, 0f38, 31, vl_4, b, vl),
261 INSN(pmovzxbq, 66, 0f38, 32, vl_8, b, vl),
262 INSN(pmovzxwd, 66, 0f38, 33, vl_2, w, vl),
263 INSN(pmovzxwq, 66, 0f38, 34, vl_4, w, vl),
264 INSN(pmovzxdq, 66, 0f38, 35, vl_2, d_nb, vl),
265 INSN(pmuldq, 66, 0f38, 28, vl, q, vl),
266 INSN(pmulld, 66, 0f38, 40, vl, d, vl),
267 INSN(pmuludq, 66, 0f, f4, vl, q, vl),
268 INSN(por, 66, 0f, eb, vl, dq, vl),
269 INSNX(prol, 66, 0f, 72, 1, vl, dq, vl),
270 INSN(prolv, 66, 0f38, 15, vl, dq, vl),
271 INSNX(pror, 66, 0f, 72, 0, vl, dq, vl),
272 INSN(prorv, 66, 0f38, 14, vl, dq, vl),
273 INSN(pscatterd, 66, 0f38, a0, vl, dq, el),
274 INSN(pscatterq, 66, 0f38, a1, vl, dq, el),
275 INSN(pshufd, 66, 0f, 70, vl, d, vl),
276 INSN(pslld, 66, 0f, f2, el_4, d, vl),
277 INSNX(pslld, 66, 0f, 72, 6, vl, d, vl),
278 INSN(psllq, 66, 0f, f3, el_2, q, vl),
279 INSNX(psllq, 66, 0f, 73, 6, vl, q, vl),
280 INSN(psllv, 66, 0f38, 47, vl, dq, vl),
281 INSNX(psra, 66, 0f, 72, 4, vl, dq, vl),
282 INSN(psrad, 66, 0f, e2, el_4, d, vl),
283 INSN(psraq, 66, 0f, e2, el_2, q, vl),
284 INSN(psrav, 66, 0f38, 46, vl, dq, vl),
285 INSN(psrld, 66, 0f, d2, el_4, d, vl),
286 INSNX(psrld, 66, 0f, 72, 2, vl, d, vl),
287 INSN(psrlq, 66, 0f, d3, el_2, q, vl),
288 INSNX(psrlq, 66, 0f, 73, 2, vl, q, vl),
289 INSN(psrlv, 66, 0f38, 45, vl, dq, vl),
290 INSN(psubd, 66, 0f, fa, vl, d, vl),
291 INSN(psubq, 66, 0f, fb, vl, q, vl),
292 INSN(pternlog, 66, 0f3a, 25, vl, dq, vl),
293 INSN(ptestm, 66, 0f38, 27, vl, dq, vl),
294 INSN(ptestnm, f3, 0f38, 27, vl, dq, vl),
295 INSN(punpckhdq, 66, 0f, 6a, vl, d, vl),
296 INSN(punpckhqdq, 66, 0f, 6d, vl, q, vl),
297 INSN(punpckldq, 66, 0f, 62, vl, d, vl),
298 INSN(punpcklqdq, 66, 0f, 6c, vl, q, vl),
299 INSN(pxor, 66, 0f, ef, vl, dq, vl),
300 INSN(rcp14, 66, 0f38, 4c, vl, sd, vl),
301 INSN(rcp14, 66, 0f38, 4d, el, sd, el),
302 INSN(rndscalepd, 66, 0f3a, 09, vl, q, vl),
303 INSN(rndscaleps, 66, 0f3a, 08, vl, d, vl),
304 INSN(rndscalesd, 66, 0f3a, 0b, el, q, el),
305 INSN(rndscaless, 66, 0f3a, 0a, el, d, el),
306 INSN(rsqrt14, 66, 0f38, 4e, vl, sd, vl),
307 INSN(rsqrt14, 66, 0f38, 4f, el, sd, el),
308 INSN(scalef, 66, 0f38, 2c, vl, sd, vl),
309 INSN(scalef, 66, 0f38, 2d, el, sd, el),
310 INSN(scatterd, 66, 0f38, a2, vl, sd, el),
311 INSN(scatterq, 66, 0f38, a3, vl, sd, el),
312 INSN_PFP(shuf, 0f, c6),
313 INSN_FP(sqrt, 0f, 51),
314 INSN_FP(sub, 0f, 5c),
315 INSN(ucomisd, 66, 0f, 2e, el, q, el),
316 INSN(ucomiss, , 0f, 2e, el, d, el),
317 INSN_PFP(unpckh, 0f, 15),
318 INSN_PFP(unpckl, 0f, 14),
319 };
320
321 static const struct test avx512f_128[] = {
322 INSN(extractps, 66, 0f3a, 17, el, d, el),
323 INSN(insertps, 66, 0f3a, 21, el, d, el),
324 INSN(mov, 66, 0f, 6e, el, dq64, el),
325 INSN(mov, 66, 0f, 7e, el, dq64, el),
326 // movhlps, , 0f, 12, d
327 INSN(movhpd, 66, 0f, 16, el, q, vl),
328 INSN(movhpd, 66, 0f, 17, el, q, vl),
329 INSN(movhps, , 0f, 16, el_2, d, vl),
330 INSN(movhps, , 0f, 17, el_2, d, vl),
331 // movlhps, , 0f, 16, d
332 INSN(movlpd, 66, 0f, 12, el, q, vl),
333 INSN(movlpd, 66, 0f, 13, el, q, vl),
334 INSN(movlps, , 0f, 12, el_2, d, vl),
335 INSN(movlps, , 0f, 13, el_2, d, vl),
336 INSN(movq, f3, 0f, 7e, el, q, el),
337 INSN(movq, 66, 0f, d6, el, q, el),
338 };
339
340 static const struct test avx512f_no128[] = {
341 INSN(broadcastf32x4, 66, 0f38, 1a, el_4, d, vl),
342 INSN(broadcasti32x4, 66, 0f38, 5a, el_4, d, vl),
343 INSN(broadcastsd, 66, 0f38, 19, el, q, el),
344 INSN(extractf32x4, 66, 0f3a, 19, el_4, d, vl),
345 INSN(extracti32x4, 66, 0f3a, 39, el_4, d, vl),
346 INSN(insertf32x4, 66, 0f3a, 18, el_4, d, vl),
347 INSN(inserti32x4, 66, 0f3a, 38, el_4, d, vl),
348 INSN(perm, 66, 0f38, 36, vl, dq, vl),
349 INSN(perm, 66, 0f38, 16, vl, sd, vl),
350 INSN(permpd, 66, 0f3a, 01, vl, q, vl),
351 INSN(permq, 66, 0f3a, 00, vl, q, vl),
352 INSN(shuff32x4, 66, 0f3a, 23, vl, d, vl),
353 INSN(shuff64x2, 66, 0f3a, 23, vl, q, vl),
354 INSN(shufi32x4, 66, 0f3a, 43, vl, d, vl),
355 INSN(shufi64x2, 66, 0f3a, 43, vl, q, vl),
356 };
357
358 static const struct test avx512f_512[] = {
359 INSN(broadcastf64x4, 66, 0f38, 1b, el_4, q, vl),
360 INSN(broadcasti64x4, 66, 0f38, 5b, el_4, q, vl),
361 INSN(extractf64x4, 66, 0f3a, 1b, el_4, q, vl),
362 INSN(extracti64x4, 66, 0f3a, 3b, el_4, q, vl),
363 INSN(insertf64x4, 66, 0f3a, 1a, el_4, q, vl),
364 INSN(inserti64x4, 66, 0f3a, 3a, el_4, q, vl),
365 };
366
367 static const struct test avx512bw_all[] = {
368 INSN(dbpsadbw, 66, 0f3a, 42, vl, b, vl),
369 INSN(movdqu8, f2, 0f, 6f, vl, b, vl),
370 INSN(movdqu8, f2, 0f, 7f, vl, b, vl),
371 INSN(movdqu16, f2, 0f, 6f, vl, w, vl),
372 INSN(movdqu16, f2, 0f, 7f, vl, w, vl),
373 INSN(pabsb, 66, 0f38, 1c, vl, b, vl),
374 INSN(pabsw, 66, 0f38, 1d, vl, w, vl),
375 INSN(packssdw, 66, 0f, 6b, vl, d_nb, vl),
376 INSN(packsswb, 66, 0f, 63, vl, w, vl),
377 INSN(packusdw, 66, 0f38, 2b, vl, d_nb, vl),
378 INSN(packuswb, 66, 0f, 67, vl, w, vl),
379 INSN(paddb, 66, 0f, fc, vl, b, vl),
380 INSN(paddsb, 66, 0f, ec, vl, b, vl),
381 INSN(paddsw, 66, 0f, ed, vl, w, vl),
382 INSN(paddusb, 66, 0f, dc, vl, b, vl),
383 INSN(paddusw, 66, 0f, dd, vl, w, vl),
384 INSN(paddw, 66, 0f, fd, vl, w, vl),
385 INSN(palignr, 66, 0f3a, 0f, vl, b, vl),
386 INSN(pavgb, 66, 0f, e0, vl, b, vl),
387 INSN(pavgw, 66, 0f, e3, vl, w, vl),
388 INSN(pblendm, 66, 0f38, 66, vl, bw, vl),
389 INSN(pbroadcastb, 66, 0f38, 78, el, b, el),
390 // pbroadcastb, 66, 0f38, 7a, b
391 INSN(pbroadcastw, 66, 0f38, 79, el_2, b, vl),
392 // pbroadcastw, 66, 0f38, 7b, b
393 INSN(pcmp, 66, 0f3a, 3f, vl, bw, vl),
394 INSN(pcmpeqb, 66, 0f, 74, vl, b, vl),
395 INSN(pcmpeqw, 66, 0f, 75, vl, w, vl),
396 INSN(pcmpgtb, 66, 0f, 64, vl, b, vl),
397 INSN(pcmpgtw, 66, 0f, 65, vl, w, vl),
398 INSN(pcmpu, 66, 0f3a, 3e, vl, bw, vl),
399 INSN(permw, 66, 0f38, 8d, vl, w, vl),
400 INSN(permi2w, 66, 0f38, 75, vl, w, vl),
401 INSN(permt2w, 66, 0f38, 7d, vl, w, vl),
402 INSN(pmaddubsw, 66, 0f38, 04, vl, b, vl),
403 INSN(pmaddwd, 66, 0f, f5, vl, w, vl),
404 INSN(pmaxsb, 66, 0f38, 3c, vl, b, vl),
405 INSN(pmaxsw, 66, 0f, ee, vl, w, vl),
406 INSN(pmaxub, 66, 0f, de, vl, b, vl),
407 INSN(pmaxuw, 66, 0f38, 3e, vl, w, vl),
408 INSN(pminsb, 66, 0f38, 38, vl, b, vl),
409 INSN(pminsw, 66, 0f, ea, vl, w, vl),
410 INSN(pminub, 66, 0f, da, vl, b, vl),
411 INSN(pminuw, 66, 0f38, 3a, vl, w, vl),
412 // pmovb2m, f3, 0f38, 29, b
413 // pmovm2, f3, 0f38, 28, bw
414 INSN(pmovswb, f3, 0f38, 20, vl_2, b, vl),
415 INSN(pmovsxbw, 66, 0f38, 20, vl_2, b, vl),
416 INSN(pmovuswb, f3, 0f38, 10, vl_2, b, vl),
417 // pmovw2m, f3, 0f38, 29, w
418 INSN(pmovwb, f3, 0f38, 30, vl_2, b, vl),
419 INSN(pmovzxbw, 66, 0f38, 30, vl_2, b, vl),
420 INSN(pmulhrsw, 66, 0f38, 0b, vl, w, vl),
421 INSN(pmulhuw, 66, 0f, e4, vl, w, vl),
422 INSN(pmulhw, 66, 0f, e5, vl, w, vl),
423 INSN(pmullw, 66, 0f, d5, vl, w, vl),
424 INSN(psadbw, 66, 0f, f6, vl, b, vl),
425 INSN(pshufb, 66, 0f38, 00, vl, b, vl),
426 INSN(pshufhw, f3, 0f, 70, vl, w, vl),
427 INSN(pshuflw, f2, 0f, 70, vl, w, vl),
428 INSNX(pslldq, 66, 0f, 73, 7, vl, b, vl),
429 INSN(psllvw, 66, 0f38, 12, vl, w, vl),
430 INSN(psllw, 66, 0f, f1, el_8, w, vl),
431 INSNX(psllw, 66, 0f, 71, 6, vl, w, vl),
432 INSN(psravw, 66, 0f38, 11, vl, w, vl),
433 INSN(psraw, 66, 0f, e1, el_8, w, vl),
434 INSNX(psraw, 66, 0f, 71, 4, vl, w, vl),
435 INSNX(psrldq, 66, 0f, 73, 3, vl, b, vl),
436 INSN(psrlvw, 66, 0f38, 10, vl, w, vl),
437 INSN(psrlw, 66, 0f, d1, el_8, w, vl),
438 INSNX(psrlw, 66, 0f, 71, 2, vl, w, vl),
439 INSN(psubb, 66, 0f, f8, vl, b, vl),
440 INSN(psubsb, 66, 0f, e8, vl, b, vl),
441 INSN(psubsw, 66, 0f, e9, vl, w, vl),
442 INSN(psubusb, 66, 0f, d8, vl, b, vl),
443 INSN(psubusw, 66, 0f, d9, vl, w, vl),
444 INSN(psubw, 66, 0f, f9, vl, w, vl),
445 INSN(ptestm, 66, 0f38, 26, vl, bw, vl),
446 INSN(ptestnm, f3, 0f38, 26, vl, bw, vl),
447 INSN(punpckhbw, 66, 0f, 68, vl, b, vl),
448 INSN(punpckhwd, 66, 0f, 69, vl, w, vl),
449 INSN(punpcklbw, 66, 0f, 60, vl, b, vl),
450 INSN(punpcklwd, 66, 0f, 61, vl, w, vl),
451 };
452
453 static const struct test avx512bw_128[] = {
454 INSN(pextrb, 66, 0f3a, 14, el, b, el),
455 // pextrw, 66, 0f, c5, w
456 INSN(pextrw, 66, 0f3a, 15, el, w, el),
457 INSN(pinsrb, 66, 0f3a, 20, el, b, el),
458 INSN(pinsrw, 66, 0f, c4, el, w, el),
459 };
460
461 static const struct test avx512cd_all[] = {
462 // pbroadcastmb2q, f3, 0f38, 2a, q
463 // pbroadcastmw2d, f3, 0f38, 3a, d
464 INSN(pconflict, 66, 0f38, c4, vl, dq, vl),
465 INSN(plzcnt, 66, 0f38, 44, vl, dq, vl),
466 };
467
468 static const struct test avx512dq_all[] = {
469 INSN_PFP(and, 0f, 54),
470 INSN_PFP(andn, 0f, 55),
471 INSN(broadcasti32x2, 66, 0f38, 59, el_2, d, vl),
472 INSN(cvtpd2qq, 66, 0f, 7b, vl, q, vl),
473 INSN(cvtpd2uqq, 66, 0f, 79, vl, q, vl),
474 INSN(cvtps2qq, 66, 0f, 7b, vl_2, d, vl),
475 INSN(cvtps2uqq, 66, 0f, 79, vl_2, d, vl),
476 INSN(cvtqq2pd, f3, 0f, e6, vl, q, vl),
477 INSN(cvtqq2ps, , 0f, 5b, vl, q, vl),
478 INSN(cvttpd2qq, 66, 0f, 7a, vl, q, vl),
479 INSN(cvttpd2uqq, 66, 0f, 78, vl, q, vl),
480 INSN(cvttps2qq, 66, 0f, 7a, vl_2, d, vl),
481 INSN(cvttps2uqq, 66, 0f, 78, vl_2, d, vl),
482 INSN(cvtuqq2pd, f3, 0f, 7a, vl, q, vl),
483 INSN(cvtuqq2ps, f2, 0f, 7a, vl, q, vl),
484 INSN(fpclass, 66, 0f3a, 66, vl, sd, vl),
485 INSN(fpclass, 66, 0f3a, 67, el, sd, el),
486 INSN_PFP(or, 0f, 56),
487 // pmovd2m, f3, 0f38, 39, d
488 // pmovm2, f3, 0f38, 38, dq
489 // pmovq2m, f3, 0f38, 39, q
490 INSN(pmullq, 66, 0f38, 40, vl, q, vl),
491 INSN(range, 66, 0f3a, 50, vl, sd, vl),
492 INSN(range, 66, 0f3a, 51, el, sd, el),
493 INSN(reduce, 66, 0f3a, 56, vl, sd, vl),
494 INSN(reduce, 66, 0f3a, 57, el, sd, el),
495 INSN_PFP(xor, 0f, 57),
496 };
497
498 static const struct test avx512dq_128[] = {
499 INSN(pextr, 66, 0f3a, 16, el, dq64, el),
500 INSN(pinsr, 66, 0f3a, 22, el, dq64, el),
501 };
502
503 static const struct test avx512dq_no128[] = {
504 INSN(broadcastf32x2, 66, 0f38, 19, el_2, d, vl),
505 INSN(broadcastf64x2, 66, 0f38, 1a, el_2, q, vl),
506 INSN(broadcasti64x2, 66, 0f38, 5a, el_2, q, vl),
507 INSN(extractf64x2, 66, 0f3a, 19, el_2, q, vl),
508 INSN(extracti64x2, 66, 0f3a, 39, el_2, q, vl),
509 INSN(insertf64x2, 66, 0f3a, 18, el_2, q, vl),
510 INSN(inserti64x2, 66, 0f3a, 38, el_2, q, vl),
511 };
512
513 static const struct test avx512dq_512[] = {
514 INSN(broadcastf32x8, 66, 0f38, 1b, el_8, d, vl),
515 INSN(broadcasti32x8, 66, 0f38, 5b, el_8, d, vl),
516 INSN(extractf32x8, 66, 0f3a, 1b, el_8, d, vl),
517 INSN(extracti32x8, 66, 0f3a, 3b, el_8, d, vl),
518 INSN(insertf32x8, 66, 0f3a, 1a, el_8, d, vl),
519 INSN(inserti32x8, 66, 0f3a, 3a, el_8, d, vl),
520 };
521
522 static const struct test avx512er_512[] = {
523 INSN(exp2, 66, 0f38, c8, vl, sd, vl),
524 INSN(rcp28, 66, 0f38, ca, vl, sd, vl),
525 INSN(rcp28, 66, 0f38, cb, el, sd, el),
526 INSN(rsqrt28, 66, 0f38, cc, vl, sd, vl),
527 INSN(rsqrt28, 66, 0f38, cd, el, sd, el),
528 };
529
530 static const struct test avx512pf_512[] = {
531 INSNX(gatherpf0d, 66, 0f38, c6, 1, vl, sd, el),
532 INSNX(gatherpf0q, 66, 0f38, c7, 1, vl, sd, el),
533 INSNX(gatherpf1d, 66, 0f38, c6, 2, vl, sd, el),
534 INSNX(gatherpf1q, 66, 0f38, c7, 2, vl, sd, el),
535 INSNX(scatterpf0d, 66, 0f38, c6, 5, vl, sd, el),
536 INSNX(scatterpf0q, 66, 0f38, c7, 5, vl, sd, el),
537 INSNX(scatterpf1d, 66, 0f38, c6, 6, vl, sd, el),
538 INSNX(scatterpf1q, 66, 0f38, c7, 6, vl, sd, el),
539 };
540
541 static const struct test avx512_4fmaps_512[] = {
542 INSN(4fmaddps, f2, 0f38, 9a, el_4, d, vl),
543 INSN(4fmaddss, f2, 0f38, 9b, el_4, d, vl),
544 INSN(4fnmaddps, f2, 0f38, aa, el_4, d, vl),
545 INSN(4fnmaddss, f2, 0f38, ab, el_4, d, vl),
546 };
547
548 static const struct test avx512_4vnniw_512[] = {
549 INSN(p4dpwssd, f2, 0f38, 52, el_4, d, vl),
550 INSN(p4dpwssds, f2, 0f38, 53, el_4, d, vl),
551 };
552
553 static const struct test avx512_bf16_all[] = {
554 INSN(vcvtne2ps2bf16, f2, 0f38, 72, vl, d, vl),
555 INSN(vcvtneps2bf16, f3, 0f38, 72, vl, d, vl),
556 INSN(vdpbf16ps, f3, 0f38, 52, vl, d, vl),
557 };
558
559 static const struct test avx512_bitalg_all[] = {
560 INSN(popcnt, 66, 0f38, 54, vl, bw, vl),
561 INSN(pshufbitqmb, 66, 0f38, 8f, vl, b, vl),
562 };
563
564 static const struct test avx512_ifma_all[] = {
565 INSN(pmadd52huq, 66, 0f38, b5, vl, q, vl),
566 INSN(pmadd52luq, 66, 0f38, b4, vl, q, vl),
567 };
568
569 static const struct test avx512_vbmi_all[] = {
570 INSN(permb, 66, 0f38, 8d, vl, b, vl),
571 INSN(permi2b, 66, 0f38, 75, vl, b, vl),
572 INSN(permt2b, 66, 0f38, 7d, vl, b, vl),
573 INSN(pmultishiftqb, 66, 0f38, 83, vl, q, vl),
574 };
575
576 static const struct test avx512_vbmi2_all[] = {
577 INSN(pcompress, 66, 0f38, 63, vl, bw, el),
578 INSN(pexpand, 66, 0f38, 62, vl, bw, el),
579 INSN(pshld, 66, 0f3a, 71, vl, dq, vl),
580 INSN(pshldv, 66, 0f38, 71, vl, dq, vl),
581 INSN(pshldvw, 66, 0f38, 70, vl, w, vl),
582 INSN(pshldw, 66, 0f3a, 70, vl, w, vl),
583 INSN(pshrd, 66, 0f3a, 73, vl, dq, vl),
584 INSN(pshrdv, 66, 0f38, 73, vl, dq, vl),
585 INSN(pshrdvw, 66, 0f38, 72, vl, w, vl),
586 INSN(pshrdw, 66, 0f3a, 72, vl, w, vl),
587 };
588
589 static const struct test avx512_vnni_all[] = {
590 INSN(pdpbusd, 66, 0f38, 50, vl, d, vl),
591 INSN(pdpbusds, 66, 0f38, 51, vl, d, vl),
592 INSN(pdpwssd, 66, 0f38, 52, vl, d, vl),
593 INSN(pdpwssds, 66, 0f38, 53, vl, d, vl),
594 };
595
596 static const struct test avx512_vpopcntdq_all[] = {
597 INSN(popcnt, 66, 0f38, 55, vl, dq, vl)
598 };
599
600 static const struct test gfni_all[] = {
601 INSN(gf2p8affineinvqb, 66, 0f3a, cf, vl, q, vl),
602 INSN(gf2p8affineqb, 66, 0f3a, ce, vl, q, vl),
603 INSN(gf2p8mulb, 66, 0f38, cf, vl, b, vl),
604 };
605
606 /*
607 * The uses of b in this table are simply (one of) the shortest form(s) of
608 * saying "no broadcast" without introducing a 128-bit granularity enumerator.
609 * Due to all of the insns being WIG, w, d_nb, and q_nb would all also fit.
610 */
611 static const struct test vaes_all[] = {
612 INSN(aesdec, 66, 0f38, de, vl, b, vl),
613 INSN(aesdeclast, 66, 0f38, df, vl, b, vl),
614 INSN(aesenc, 66, 0f38, dc, vl, b, vl),
615 INSN(aesenclast, 66, 0f38, dd, vl, b, vl),
616 };
617
618 static const struct test vpclmulqdq_all[] = {
619 INSN(pclmulqdq, 66, 0f3a, 44, vl, q_nb, vl)
620 };
621
622 static const unsigned char vl_all[] = { VL_512, VL_128, VL_256 };
623 static const unsigned char vl_128[] = { VL_128 };
624 static const unsigned char vl_no128[] = { VL_512, VL_256 };
625 static const unsigned char vl_512[] = { VL_512 };
626
627 /*
628 * This table, indicating the presence of an immediate (byte) for an opcode
629 * space 0f major opcode, is indexed by high major opcode byte nibble, with
630 * each table element then bit-indexed by low major opcode byte nibble.
631 */
632 static const uint16_t imm0f[16] = {
633 [0x7] = (1 << 0x0) /* vpshuf* */ |
634 (1 << 0x1) /* vps{ll,ra,rl}w */ |
635 (1 << 0x2) /* vps{l,r}ld, vp{rol,ror,sra}{d,q} */ |
636 (1 << 0x3) /* vps{l,r}l{,d}q */,
637 [0xc] = (1 << 0x2) /* vcmp{p,s}{d,s} */ |
638 (1 << 0x4) /* vpinsrw */ |
639 (1 << 0x5) /* vpextrw */ |
640 (1 << 0x6) /* vshufp{d,s} */,
641 };
642
643 static struct x86_emulate_ops emulops;
644
645 /*
646 * Access tracking (by granular) is used on the first 64 bytes of address
647 * space. Instructions get encode with a raw Disp8 value of 1, which then
648 * gets scaled accordingly. Hence accesses below the address <scaling factor>
649 * as well as at or above 2 * <scaling factor> are indications of bugs. To
650 * aid diagnosis / debugging, track all accesses below 3 * <scaling factor>.
651 * With AVX512 the maximum scaling factor is 64.
652 */
653 static unsigned int accessed[3 * 64];
654
record_access(enum x86_segment seg,unsigned long offset,unsigned int bytes)655 static bool record_access(enum x86_segment seg, unsigned long offset,
656 unsigned int bytes)
657 {
658 while ( bytes-- )
659 {
660 if ( offset >= ARRAY_SIZE(accessed) )
661 return false;
662 ++accessed[offset++];
663 }
664
665 return true;
666 }
667
read(enum x86_segment seg,unsigned long offset,void * p_data,unsigned int bytes,struct x86_emulate_ctxt * ctxt)668 static int read(enum x86_segment seg, unsigned long offset, void *p_data,
669 unsigned int bytes, struct x86_emulate_ctxt *ctxt)
670 {
671 if ( !record_access(seg, offset, bytes + !bytes) )
672 return X86EMUL_UNHANDLEABLE;
673 memset(p_data, 0, bytes);
674 return X86EMUL_OKAY;
675 }
676
write(enum x86_segment seg,unsigned long offset,void * p_data,unsigned int bytes,struct x86_emulate_ctxt * ctxt)677 static int write(enum x86_segment seg, unsigned long offset, void *p_data,
678 unsigned int bytes, struct x86_emulate_ctxt *ctxt)
679 {
680 if ( !record_access(seg, offset, bytes + !bytes) )
681 return X86EMUL_UNHANDLEABLE;
682 return X86EMUL_OKAY;
683 }
684
test_one(const struct test * test,enum vl vl,unsigned char * instr,struct x86_emulate_ctxt * ctxt)685 static void test_one(const struct test *test, enum vl vl,
686 unsigned char *instr, struct x86_emulate_ctxt *ctxt)
687 {
688 unsigned int vsz, esz, i, n;
689 int rc;
690 bool sg = strstr(test->mnemonic, "gather") ||
691 strstr(test->mnemonic, "scatter");
692 bool imm = test->spc == SPC_0f3a ||
693 (test->spc == SPC_0f &&
694 (imm0f[test->opc >> 4] & (1 << (test->opc & 0xf))));
695 union evex {
696 uint8_t raw[3];
697 struct {
698 uint8_t opcx:2;
699 uint8_t mbz:2;
700 uint8_t R:1;
701 uint8_t b:1;
702 uint8_t x:1;
703 uint8_t r:1;
704 uint8_t pfx:2;
705 uint8_t mbs:1;
706 uint8_t reg:4;
707 uint8_t w:1;
708 uint8_t opmsk:3;
709 uint8_t RX:1;
710 uint8_t bcst:1;
711 uint8_t lr:2;
712 uint8_t z:1;
713 };
714 } evex = {
715 .opcx = test->spc, .pfx = test->pfx, .lr = vl,
716 .R = 1, .b = 1, .x = 1, .r = 1, .mbs = 1,
717 .reg = 0xf, .RX = 1, .opmsk = sg,
718 };
719
720 switch ( test->esz )
721 {
722 case ESZ_b:
723 esz = 1;
724 break;
725
726 case ESZ_w:
727 esz = 2;
728 evex.w = 1;
729 break;
730
731 #ifdef __i386__
732 case ESZ_d_WIG:
733 evex.w = 1;
734 /* fall through */
735 #endif
736 case ESZ_d: case ESZ_d_nb:
737 esz = 4;
738 break;
739
740 case ESZ_q: case ESZ_q_nb:
741 esz = 8;
742 evex.w = 1;
743 break;
744
745 default:
746 ASSERT_UNREACHABLE();
747 }
748
749 switch ( test->vsz )
750 {
751 case VSZ_vl:
752 vsz = 16 << vl;
753 break;
754
755 case VSZ_vl_2:
756 vsz = 8 << vl;
757 break;
758
759 case VSZ_vl_4:
760 vsz = 4 << vl;
761 break;
762
763 case VSZ_vl_8:
764 vsz = 2 << vl;
765 break;
766
767 case VSZ_el:
768 vsz = esz;
769 break;
770
771 case VSZ_el_2:
772 vsz = esz * 2;
773 break;
774
775 case VSZ_el_4:
776 vsz = esz * 4;
777 break;
778
779 case VSZ_el_8:
780 vsz = esz * 8;
781 break;
782
783 default:
784 ASSERT_UNREACHABLE();
785 }
786
787 /*
788 * Note: SIB addressing is used here, such that S/G insns can be handled
789 * without extra conditionals.
790 */
791 instr[0] = 0x62;
792 instr[1] = evex.raw[0];
793 instr[2] = evex.raw[1];
794 instr[3] = evex.raw[2];
795 instr[4] = test->opc;
796 instr[5] = 0x44 | (test->ext << 3); /* ModR/M */
797 instr[6] = 0x22; /* SIB: base rDX, index none / xMM4 */
798 instr[7] = 1; /* Disp8 */
799 instr[8] = 0; /* immediate, if any */
800
801 asm volatile ( "kxnorw %k1, %k1, %k1" );
802 asm volatile ( "vxorps %xmm4, %xmm4, %xmm4" );
803
804 ctxt->regs->eip = (unsigned long)&instr[0];
805 ctxt->regs->edx = 0;
806 memset(accessed, 0, sizeof(accessed));
807
808 rc = x86_emulate(ctxt, &emulops);
809 if ( rc != X86EMUL_OKAY ||
810 (ctxt->regs->eip != (unsigned long)&instr[8 + imm]) )
811 goto fail;
812
813 for ( i = 0; i < (test->scale == SC_vl ? vsz : esz); ++i )
814 if ( accessed[i] )
815 goto fail;
816
817 n = test->scale == SC_vl ? vsz : esz;
818 if ( !sg )
819 n += vsz;
820 else if ( !strstr(test->mnemonic, "pf") )
821 n += esz;
822 else
823 ++n;
824
825 for ( ; i < n; ++i )
826 if ( accessed[i] != (sg ? (vsz / esz) >> (test->opc & 1 & !evex.w)
827 : 1) )
828 goto fail;
829
830 for ( ; i < ARRAY_SIZE(accessed); ++i )
831 if ( accessed[i] )
832 goto fail;
833
834 /* Also check the broadcast case, if available. */
835 if ( test->vsz >= VSZ_el || test->scale != SC_vl )
836 return;
837
838 switch ( test->esz )
839 {
840 case ESZ_d_nb: case ESZ_q_nb:
841 case ESZ_b: case ESZ_w: case ESZ_bw:
842 return;
843
844 case ESZ_d: case ESZ_q:
845 break;
846
847 default:
848 ASSERT_UNREACHABLE();
849 }
850
851 evex.bcst = 1;
852 instr[3] = evex.raw[2];
853
854 ctxt->regs->eip = (unsigned long)&instr[0];
855 memset(accessed, 0, sizeof(accessed));
856
857 rc = x86_emulate(ctxt, &emulops);
858 if ( rc != X86EMUL_OKAY ||
859 (ctxt->regs->eip != (unsigned long)&instr[8 + imm]) )
860 goto fail;
861
862 for ( i = 0; i < esz; ++i )
863 if ( accessed[i] )
864 goto fail;
865 for ( ; i < esz * 2; ++i )
866 if ( accessed[i] != 1 )
867 goto fail;
868 for ( ; i < ARRAY_SIZE(accessed); ++i )
869 if ( accessed[i] )
870 goto fail;
871
872 return;
873
874 fail:
875 printf("failed (v%s%s %u-bit)\n", test->mnemonic,
876 evex.bcst ? "/bcst" : "", 128 << vl);
877 exit(1);
878 }
879
test_pair(const struct test * tmpl,enum vl vl,enum esz esz1,const char * suffix1,enum esz esz2,const char * suffix2,unsigned char * instr,struct x86_emulate_ctxt * ctxt)880 static void test_pair(const struct test *tmpl, enum vl vl,
881 enum esz esz1, const char *suffix1,
882 enum esz esz2, const char *suffix2,
883 unsigned char *instr, struct x86_emulate_ctxt *ctxt)
884 {
885 struct test test = *tmpl;
886 char mnemonic[24];
887
888 test.esz = esz1;
889 snprintf(mnemonic, ARRAY_SIZE(mnemonic), "%s%s", tmpl->mnemonic, suffix1);
890 test.mnemonic = mnemonic;
891 test_one(&test, vl, instr, ctxt);
892
893 test.esz = esz2;
894 snprintf(mnemonic, ARRAY_SIZE(mnemonic), "%s%s", tmpl->mnemonic, suffix2);
895 test.mnemonic = mnemonic;
896 test_one(&test, vl, instr, ctxt);
897 }
898
test_group(const struct test tests[],unsigned int nr_test,const unsigned char vl[],unsigned int nr_vl,void * instr,struct x86_emulate_ctxt * ctxt)899 static void test_group(const struct test tests[], unsigned int nr_test,
900 const unsigned char vl[], unsigned int nr_vl,
901 void *instr, struct x86_emulate_ctxt *ctxt)
902 {
903 unsigned int i, j;
904
905 for ( i = 0; i < nr_test; ++i )
906 {
907 for ( j = 0; j < nr_vl; ++j )
908 {
909 if ( vl[0] == VL_512 && vl[j] != VL_512 && !cpu_has_avx512vl )
910 continue;
911
912 switch ( tests[i].esz )
913 {
914 case ESZ_q_nb:
915 /* The 128-bit form of VMOVDDUP needs special casing. */
916 if ( vl[j] == VL_128 && tests[i].spc == SPC_0f &&
917 tests[i].opc == 0x12 && tests[i].pfx == PFX_f2 )
918 {
919 struct test test = tests[i];
920
921 test.vsz = VSZ_el;
922 test.scale = SC_el;
923 test_one(&test, vl[j], instr, ctxt);
924 continue;
925 }
926 /* fall through */
927 default:
928 test_one(&tests[i], vl[j], instr, ctxt);
929 break;
930
931 case ESZ_bw:
932 test_pair(&tests[i], vl[j], ESZ_b, "b", ESZ_w, "w",
933 instr, ctxt);
934 break;
935
936 case ESZ_dq:
937 test_pair(&tests[i], vl[j], ESZ_d,
938 strncmp(tests[i].mnemonic, "cvt", 3) ? "d" : "l",
939 ESZ_q, "q", instr, ctxt);
940 break;
941
942 #ifdef __i386__
943 case ESZ_d_WIG:
944 test_pair(&tests[i], vl[j], ESZ_d, "/W0",
945 ESZ_d_WIG, "/W1", instr, ctxt);
946 break;
947 #endif
948
949 case ESZ_sd:
950 test_pair(&tests[i], vl[j],
951 ESZ_d, tests[i].vsz < VSZ_el ? "ps" : "ss",
952 ESZ_q, tests[i].vsz < VSZ_el ? "pd" : "sd",
953 instr, ctxt);
954 break;
955 }
956 }
957 }
958 }
959
evex_disp8_test(void * instr,struct x86_emulate_ctxt * ctxt,const struct x86_emulate_ops * ops)960 void evex_disp8_test(void *instr, struct x86_emulate_ctxt *ctxt,
961 const struct x86_emulate_ops *ops)
962 {
963 emulops = *ops;
964 emulops.read = read;
965 emulops.write = write;
966
967 #define RUN(feat, vl) do { \
968 if ( cpu_has_##feat ) \
969 { \
970 printf("%-40s", "Testing " #feat "/" #vl " disp8 handling..."); \
971 test_group(feat ## _ ## vl, ARRAY_SIZE(feat ## _ ## vl), \
972 vl_ ## vl, ARRAY_SIZE(vl_ ## vl), instr, ctxt); \
973 printf("okay\n"); \
974 } \
975 } while ( false )
976
977 RUN(avx512f, all);
978 RUN(avx512f, 128);
979 RUN(avx512f, no128);
980 RUN(avx512f, 512);
981 RUN(avx512bw, all);
982 RUN(avx512bw, 128);
983 RUN(avx512cd, all);
984 RUN(avx512dq, all);
985 RUN(avx512dq, 128);
986 RUN(avx512dq, no128);
987 RUN(avx512dq, 512);
988 RUN(avx512er, 512);
989 #define cpu_has_avx512pf cpu_has_avx512f
990 RUN(avx512pf, 512);
991 RUN(avx512_4fmaps, 512);
992 RUN(avx512_4vnniw, 512);
993 RUN(avx512_bf16, all);
994 RUN(avx512_bitalg, all);
995 RUN(avx512_ifma, all);
996 RUN(avx512_vbmi, all);
997 RUN(avx512_vbmi2, all);
998 RUN(avx512_vnni, all);
999 RUN(avx512_vpopcntdq, all);
1000
1001 if ( cpu_has_avx512f )
1002 {
1003 RUN(gfni, all);
1004 RUN(vaes, all);
1005 RUN(vpclmulqdq, all);
1006 }
1007 }
1008