/src/xen/tools/fuzz/x86_instruction_emulator/x86_emulate/x86_emulate.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* SPDX-License-Identifier: GPL-2.0-or-later */ |
2 | | /****************************************************************************** |
3 | | * x86_emulate.c |
4 | | * |
5 | | * Generic x86 (32-bit and 64-bit) instruction decoder and emulator. |
6 | | * |
7 | | * Copyright (c) 2005-2007 Keir Fraser |
8 | | * Copyright (c) 2005-2007 XenSource Inc. |
9 | | */ |
10 | | |
11 | | #include "private.h" |
12 | | |
13 | | /* |
14 | | * The next two tables are indexed by high opcode extension byte (the one |
15 | | * that's encoded like an immediate) nibble, with each table element then |
16 | | * bit-indexed by low opcode extension byte nibble. |
17 | | */ |
18 | | static const uint16_t _3dnow_table[16] = { |
19 | | [0x0] = (1 << 0xd) /* pi2fd */, |
20 | | [0x1] = (1 << 0xd) /* pf2id */, |
21 | | [0x9] = (1 << 0x0) /* pfcmpge */ | |
22 | | (1 << 0x4) /* pfmin */ | |
23 | | (1 << 0x6) /* pfrcp */ | |
24 | | (1 << 0x7) /* pfrsqrt */ | |
25 | | (1 << 0xa) /* pfsub */ | |
26 | | (1 << 0xe) /* pfadd */, |
27 | | [0xa] = (1 << 0x0) /* pfcmpgt */ | |
28 | | (1 << 0x4) /* pfmax */ | |
29 | | (1 << 0x6) /* pfrcpit1 */ | |
30 | | (1 << 0x7) /* pfrsqit1 */ | |
31 | | (1 << 0xa) /* pfsubr */ | |
32 | | (1 << 0xe) /* pfacc */, |
33 | | [0xb] = (1 << 0x0) /* pfcmpeq */ | |
34 | | (1 << 0x4) /* pfmul */ | |
35 | | (1 << 0x6) /* pfrcpit2 */ | |
36 | | (1 << 0x7) /* pmulhrw */ | |
37 | | (1 << 0xf) /* pavgusb */, |
38 | | }; |
39 | | |
40 | | static const uint16_t _3dnow_ext_table[16] = { |
41 | | [0x0] = (1 << 0xc) /* pi2fw */, |
42 | | [0x1] = (1 << 0xc) /* pf2iw */, |
43 | | [0x8] = (1 << 0xa) /* pfnacc */ | |
44 | | (1 << 0xe) /* pfpnacc */, |
45 | | [0xb] = (1 << 0xb) /* pswapd */, |
46 | | }; |
47 | | |
48 | | /* Shift values between src and dst sizes of pmov{s,z}x{b,w,d}{w,d,q}. */ |
49 | | static const uint8_t pmov_convert_delta[] = { 1, 2, 3, 1, 2, 1 }; |
50 | | |
51 | | static const uint8_t sse_prefix[] = { 0x66, 0xf3, 0xf2 }; |
52 | | |
53 | | #ifdef __x86_64__ |
54 | 94.2k | # define PFX2 REX_PREFIX |
55 | | #else |
56 | | # define PFX2 0x3e |
57 | | #endif |
58 | 373k | #define PFX_BYTES 3 |
59 | 94.2k | #define init_prefixes(stub) ({ \ |
60 | 94.2k | uint8_t *buf_ = get_stub(stub); \ |
61 | 94.2k | buf_[0] = 0x3e; \ |
62 | 94.2k | buf_[1] = PFX2; \ |
63 | 94.2k | buf_[2] = 0x0f; \ |
64 | 94.2k | buf_ + 3; \ |
65 | 94.2k | }) |
66 | | |
67 | 37.7k | #define copy_VEX(ptr, vex) ({ \ |
68 | 37.7k | if ( !mode_64bit() ) \ |
69 | 37.7k | (vex).reg |= 8; \ |
70 | 37.7k | gcc11_wrap(ptr)[0 - PFX_BYTES] = ext < ext_8f08 ? 0xc4 : 0x8f; \ |
71 | 37.7k | (ptr)[1 - PFX_BYTES] = (vex).raw[0]; \ |
72 | 37.7k | (ptr)[2 - PFX_BYTES] = (vex).raw[1]; \ |
73 | 37.7k | container_of((ptr) + 1 - PFX_BYTES, typeof(vex), raw[0]); \ |
74 | 37.7k | }) |
75 | | |
76 | 90.9k | #define copy_REX_VEX(ptr, rex, vex) do { \ |
77 | 90.9k | if ( (vex).opcx != vex_none ) \ |
78 | 90.9k | copy_VEX(ptr, vex); \ |
79 | 90.9k | else \ |
80 | 90.9k | { \ |
81 | 56.3k | if ( (vex).pfx ) \ |
82 | 56.3k | (ptr)[0 - PFX_BYTES] = sse_prefix[(vex).pfx - 1]; \ |
83 | 56.3k | /* \ |
84 | 56.3k | * "rex" is always zero for other than 64-bit mode, so OR-ing it \ |
85 | 56.3k | * into any prefix (and not just REX_PREFIX) is safe on 32-bit \ |
86 | 56.3k | * (test harness) builds. \ |
87 | 56.3k | */ \ |
88 | 56.3k | (ptr)[1 - PFX_BYTES] |= rex; \ |
89 | 56.3k | } \ |
90 | 90.9k | } while (0) |
91 | | |
92 | 0 | #define EVEX_PFX_BYTES 4 |
93 | 0 | #define init_evex(stub) ({ \ |
94 | 0 | uint8_t *buf_ = get_stub(stub); \ |
95 | 0 | buf_[0] = 0x62; \ |
96 | 0 | buf_ + EVEX_PFX_BYTES; \ |
97 | 0 | }) |
98 | | |
99 | 0 | #define copy_EVEX(ptr, evex) ({ \ |
100 | 0 | if ( !mode_64bit() ) \ |
101 | 0 | (evex).reg |= 8; \ |
102 | 0 | (ptr)[1 - EVEX_PFX_BYTES] = (evex).raw[0]; \ |
103 | 0 | (ptr)[2 - EVEX_PFX_BYTES] = (evex).raw[1]; \ |
104 | 0 | (ptr)[3 - EVEX_PFX_BYTES] = (evex).raw[2]; \ |
105 | 0 | container_of((ptr) + 1 - EVEX_PFX_BYTES, typeof(evex), raw[0]); \ |
106 | 0 | }) |
107 | | |
108 | 67.1k | #define rep_prefix() (vex.pfx >= vex_f3) |
109 | 15.1k | #define repe_prefix() (vex.pfx == vex_f3) |
110 | 11.4k | #define repne_prefix() (vex.pfx == vex_f2) |
111 | | |
112 | | /* |
113 | | * While proper alignment gets specified in mmval_t, this doesn't get honored |
114 | | * by the compiler for automatic variables. Use this helper to instantiate a |
115 | | * suitably aligned variable, producing a pointer to access it. |
116 | | */ |
117 | | #define DECLARE_ALIGNED(type, var) \ |
118 | 613k | long __##var[(sizeof(type) + __alignof(type)) / __alignof(long) - 1]; \ |
119 | 613k | type *const var##p = \ |
120 | 613k | (void *)(((long)__##var + __alignof(type) - __alignof(__##var)) \ |
121 | 613k | & -__alignof(type)) |
122 | | |
123 | | /* MXCSR bit definitions. */ |
124 | 35.2k | #define MXCSR_MM (1U << 17) |
125 | | |
126 | | /* Segment selector error code bits. */ |
127 | | #define ECODE_EXT (1 << 0) |
128 | | #define ECODE_IDT (1 << 1) |
129 | | #define ECODE_TI (1 << 2) |
130 | | |
131 | | /* Raw emulation: instruction has two explicit operands. */ |
132 | | #define __emulate_2op_nobyte(_op, src, dst, sz, eflags, wsx,wsy,wdx,wdy, \ |
133 | 35.3k | lsx,lsy,ldx,ldy, qsx,qsy,qdx,qdy, extra...) \ |
134 | 35.3k | do{ unsigned long _tmp; \ |
135 | 35.3k | switch ( sz ) \ |
136 | 35.3k | { \ |
137 | 20.6k | case 2: \ |
138 | 20.6k | asm volatile ( \ |
139 | 20.6k | _PRE_EFLAGS("0","4","2") \ |
140 | 20.6k | _op"w %"wsx"3,%"wdx"1; " \ |
141 | 20.6k | _POST_EFLAGS("0","4","2") \ |
142 | 20.6k | : "+g" (eflags), "+" wdy (*(dst)), "=&r" (_tmp) \ |
143 | 20.6k | : wsy (src), "i" (EFLAGS_MASK), ## extra ); \ |
144 | 20.6k | break; \ |
145 | 9.67k | case 4: \ |
146 | 9.67k | asm volatile ( \ |
147 | 9.67k | _PRE_EFLAGS("0","4","2") \ |
148 | 9.67k | _op"l %"lsx"3,%"ldx"1; " \ |
149 | 9.67k | _POST_EFLAGS("0","4","2") \ |
150 | 9.67k | : "+g" (eflags), "+" ldy (*(dst)), "=&r" (_tmp) \ |
151 | 9.67k | : lsy (src), "i" (EFLAGS_MASK), ## extra ); \ |
152 | 9.67k | break; \ |
153 | 5.03k | case 8: \ |
154 | 5.03k | __emulate_2op_8byte(_op, src, dst, eflags, qsx, qsy, qdx, qdy, \ |
155 | 5.03k | ## extra); \ |
156 | 5.03k | break; \ |
157 | 35.3k | } \ |
158 | 35.3k | } while (0) |
159 | | #define __emulate_2op(_op, src, dst, sz, eflags, _bx, by, wx, wy, \ |
160 | 99.9k | lx, ly, qx, qy, extra...) \ |
161 | 99.9k | do{ unsigned long _tmp; \ |
162 | 99.9k | switch ( sz ) \ |
163 | 99.9k | { \ |
164 | 68.8k | case 1: \ |
165 | 68.8k | asm volatile ( \ |
166 | 68.8k | _PRE_EFLAGS("0","4","2") \ |
167 | 68.8k | _op"b %"_bx"3,%1; " \ |
168 | 68.8k | _POST_EFLAGS("0","4","2") \ |
169 | 68.8k | : "+g" (eflags), "+m" (*(dst)), "=&r" (_tmp) \ |
170 | 68.8k | : by (src), "i" (EFLAGS_MASK), ##extra ); \ |
171 | 68.8k | break; \ |
172 | 31.1k | default: \ |
173 | 31.1k | __emulate_2op_nobyte(_op, src, dst, sz, eflags, wx, wy, "", "m", \ |
174 | 31.1k | lx, ly, "", "m", qx, qy, "", "m", ##extra); \ |
175 | 31.1k | break; \ |
176 | 99.9k | } \ |
177 | 99.9k | } while (0) |
178 | | /* Source operand is byte-sized and may be restricted to just %cl. */ |
179 | | #define _emulate_2op_SrcB(op, src, dst, sz, eflags) \ |
180 | 5.68k | __emulate_2op(op, src, dst, sz, eflags, \ |
181 | 5.68k | "b", "c", "b", "c", "b", "c", "b", "c") |
182 | | #define emulate_2op_SrcB(op, src, dst, eflags) \ |
183 | 5.68k | _emulate_2op_SrcB(op, (src).val, &(dst).val, (dst).bytes, eflags) |
184 | | /* Source operand is byte, word, long or quad sized. */ |
185 | | #define _emulate_2op_SrcV(op, src, dst, sz, eflags, extra...) \ |
186 | 94.2k | __emulate_2op(op, src, dst, sz, eflags, \ |
187 | 94.2k | "b", "q", "w", "r", _LO32, "r", "", "r", ##extra) |
188 | | #define emulate_2op_SrcV(_op, _src, _dst, _eflags) \ |
189 | 94.2k | _emulate_2op_SrcV(_op, (_src).val, &(_dst).val, (_dst).bytes, _eflags) |
190 | | /* Source operand is word, long or quad sized. */ |
191 | | #define _emulate_2op_SrcV_nobyte(op, src, dst, sz, eflags, extra...) \ |
192 | 3.71k | __emulate_2op_nobyte(op, src, dst, sz, eflags, "w", "r", "", "m", \ |
193 | 3.71k | _LO32, "r", "", "m", "", "r", "", "m", ##extra) |
194 | | #define emulate_2op_SrcV_nobyte(_op, _src, _dst, _eflags) \ |
195 | 3.71k | _emulate_2op_SrcV_nobyte(_op, (_src).val, &(_dst).val, (_dst).bytes, \ |
196 | 3.71k | _eflags) |
197 | | /* Operands are word, long or quad sized and source may be in memory. */ |
198 | | #define emulate_2op_SrcV_srcmem(_op, _src, _dst, _eflags) \ |
199 | 558 | __emulate_2op_nobyte(_op, (_src).val, &(_dst).val, (_dst).bytes, \ |
200 | 558 | _eflags, "", "m", "w", "r", \ |
201 | 558 | "", "m", _LO32, "r", "", "m", "", "r") |
202 | | |
203 | | /* Instruction has only one explicit operand (no source operand). */ |
204 | 110k | #define _emulate_1op(_op, dst, sz, eflags, extra...) \ |
205 | 110k | do{ unsigned long _tmp; \ |
206 | 110k | switch ( sz ) \ |
207 | 110k | { \ |
208 | 620 | case 1: \ |
209 | 620 | asm volatile ( \ |
210 | 620 | _PRE_EFLAGS("0","3","2") \ |
211 | 620 | _op"b %1; " \ |
212 | 620 | _POST_EFLAGS("0","3","2") \ |
213 | 620 | : "+g" (eflags), "+m" (*(dst)), "=&r" (_tmp) \ |
214 | 620 | : "i" (EFLAGS_MASK), ##extra ); \ |
215 | 620 | break; \ |
216 | 86.1k | case 2: \ |
217 | 86.1k | asm volatile ( \ |
218 | 86.1k | _PRE_EFLAGS("0","3","2") \ |
219 | 86.1k | _op"w %1; " \ |
220 | 86.1k | _POST_EFLAGS("0","3","2") \ |
221 | 86.1k | : "+g" (eflags), "+m" (*(dst)), "=&r" (_tmp) \ |
222 | 86.1k | : "i" (EFLAGS_MASK), ##extra ); \ |
223 | 86.1k | break; \ |
224 | 22.8k | case 4: \ |
225 | 22.8k | asm volatile ( \ |
226 | 22.8k | _PRE_EFLAGS("0","3","2") \ |
227 | 22.8k | _op"l %1; " \ |
228 | 22.8k | _POST_EFLAGS("0","3","2") \ |
229 | 22.8k | : "+g" (eflags), "+m" (*(dst)), "=&r" (_tmp) \ |
230 | 22.8k | : "i" (EFLAGS_MASK), ##extra ); \ |
231 | 22.8k | break; \ |
232 | 590 | case 8: \ |
233 | 590 | __emulate_1op_8byte(_op, dst, eflags, ##extra); \ |
234 | 590 | break; \ |
235 | 110k | } \ |
236 | 110k | } while (0) |
237 | | #define emulate_1op(op, dst, eflags) \ |
238 | 110k | _emulate_1op(op, &(dst).val, (dst).bytes, eflags) |
239 | | |
240 | | /* Emulate an instruction with quadword operands (x86/64 only). */ |
241 | | #if defined(__x86_64__) |
242 | | #define __emulate_2op_8byte(_op, src, dst, eflags, \ |
243 | 5.03k | qsx, qsy, qdx, qdy, extra...) \ |
244 | 5.03k | do{ asm volatile ( \ |
245 | 5.03k | _PRE_EFLAGS("0","4","2") \ |
246 | 5.03k | _op"q %"qsx"3,%"qdx"1; " \ |
247 | 5.03k | _POST_EFLAGS("0","4","2") \ |
248 | 5.03k | : "+g" (eflags), "+" qdy (*(dst)), "=&r" (_tmp) \ |
249 | 5.03k | : qsy (src), "i" (EFLAGS_MASK), ##extra ); \ |
250 | 5.03k | } while (0) |
251 | 590 | #define __emulate_1op_8byte(_op, dst, eflags, extra...) \ |
252 | 590 | do{ asm volatile ( \ |
253 | 590 | _PRE_EFLAGS("0","3","2") \ |
254 | 590 | _op"q %1; " \ |
255 | 590 | _POST_EFLAGS("0","3","2") \ |
256 | 590 | : "+g" (eflags), "+m" (*(dst)), "=&r" (_tmp) \ |
257 | 590 | : "i" (EFLAGS_MASK), ##extra ); \ |
258 | 590 | } while (0) |
259 | | #elif defined(__i386__) |
260 | | #define __emulate_2op_8byte(op, src, dst, eflags, qsx, qsy, qdx, qdy, extra...) |
261 | | #define __emulate_1op_8byte(op, dst, eflags, extra...) |
262 | | #endif /* __i386__ */ |
263 | | |
264 | 2.42k | #define emulate_stub(dst, src...) do { \ |
265 | 2.42k | unsigned long tmp; \ |
266 | 2.42k | invoke_stub(_PRE_EFLAGS("[efl]", "[msk]", "[tmp]"), \ |
267 | 2.42k | _POST_EFLAGS("[efl]", "[msk]", "[tmp]"), \ |
268 | 2.42k | dst, [tmp] "=&r" (tmp), [efl] "+g" (_regs.eflags) \ |
269 | 2.42k | : [msk] "i" (EFLAGS_MASK), ## src); \ |
270 | 2.42k | } while (0) |
271 | | |
272 | | /* |
273 | | * Given byte has even parity (even number of 1s)? SDM Vol. 1 Sec. 3.4.3.1, |
274 | | * "Status Flags": EFLAGS.PF reflects parity of least-sig. byte of result only. |
275 | | */ |
276 | | static bool even_parity(uint8_t v) |
277 | 22.6k | { |
278 | 22.6k | asm ( "test %1,%1" ASM_FLAG_OUT(, "; setp %0") |
279 | 22.6k | : ASM_FLAG_OUT("=@ccp", "=qm") (v) : "q" (v) ); |
280 | | |
281 | 22.6k | return v; |
282 | 22.6k | } |
283 | | |
284 | | /* Update address held in a register, based on addressing mode. */ |
285 | 99.2k | #define _register_address_increment(reg, inc, byte_width) \ |
286 | 99.2k | do { \ |
287 | 99.2k | int _inc = (inc); /* signed type ensures sign extension to long */ \ |
288 | 99.2k | unsigned int _width = (byte_width); \ |
289 | 99.2k | if ( _width == sizeof(unsigned long) ) \ |
290 | 99.2k | (reg) += _inc; \ |
291 | 99.2k | else if ( mode_64bit() ) \ |
292 | 65.7k | (reg) = ((reg) + _inc) & ((1UL << (_width << 3)) - 1); \ |
293 | 65.7k | else \ |
294 | 65.7k | (reg) = ((reg) & ~((1UL << (_width << 3)) - 1)) | \ |
295 | 63.4k | (((reg) + _inc) & ((1UL << (_width << 3)) - 1)); \ |
296 | 99.2k | } while (0) |
297 | | #define register_address_adjust(reg, adj) \ |
298 | 39.0k | _register_address_increment(reg, \ |
299 | 39.0k | _regs.eflags & X86_EFLAGS_DF ? \ |
300 | 39.0k | -(adj) : (adj), \ |
301 | 39.0k | ad_bytes) |
302 | | |
303 | 42.7k | #define sp_pre_dec(dec) ({ \ |
304 | 42.7k | _register_address_increment(_regs.r(sp), -(dec), ctxt->sp_size/8); \ |
305 | 42.7k | truncate_word(_regs.r(sp), ctxt->sp_size/8); \ |
306 | 42.7k | }) |
307 | 17.3k | #define sp_post_inc(inc) ({ \ |
308 | 17.3k | unsigned long sp = truncate_word(_regs.r(sp), ctxt->sp_size/8); \ |
309 | 17.3k | _register_address_increment(_regs.r(sp), (inc), ctxt->sp_size/8); \ |
310 | 17.3k | sp; \ |
311 | 17.3k | }) |
312 | | |
313 | 9.25k | #define jmp_rel(rel) \ |
314 | 9.25k | do { \ |
315 | 9.25k | unsigned long ip = _regs.r(ip) + (int)(rel); \ |
316 | 9.25k | if ( op_bytes == 2 && (amd_like(ctxt) || !mode_64bit()) ) \ |
317 | 9.25k | ip = (uint16_t)ip; \ |
318 | 9.25k | else if ( !mode_64bit() ) \ |
319 | 4.22k | ip = (uint32_t)ip; \ |
320 | 9.25k | rc = ops->insn_fetch(ip, NULL, 0, ctxt); \ |
321 | 9.25k | if ( rc ) goto done; \ |
322 | 9.25k | _regs.r(ip) = ip; \ |
323 | 9.02k | singlestep = _regs.eflags & X86_EFLAGS_TF; \ |
324 | 9.02k | } while (0) |
325 | | |
326 | 1.72k | #define validate_far_branch(cs, ip) ({ \ |
327 | 1.72k | if ( sizeof(ip) <= 4 ) { \ |
328 | 348 | ASSERT(!ctxt->lma); \ |
329 | 348 | generate_exception_if((ip) > (cs)->limit, X86_EXC_GP, 0); \ |
330 | 348 | } else \ |
331 | 1.72k | generate_exception_if(ctxt->lma && (cs)->l \ |
332 | 1.72k | ? !is_canonical_address(ip) \ |
333 | 1.72k | : (ip) > (cs)->limit, X86_EXC_GP, 0); \ |
334 | 1.72k | }) |
335 | | |
336 | 1.20k | #define commit_far_branch(cs, newip) ( \ |
337 | 1.20k | ({ \ |
338 | 1.20k | validate_far_branch(cs, newip); \ |
339 | 1.20k | _regs.r(ip) = (newip); \ |
340 | 1.08k | singlestep = _regs.eflags & X86_EFLAGS_TF; \ |
341 | 1.08k | }), \ |
342 | 1.08k | ops->write_segment(x86_seg_cs, cs, ctxt) \ |
343 | 1.08k | ) |
344 | | |
345 | | int x86emul_get_fpu( |
346 | | enum x86_emulate_fpu_type type, |
347 | | struct x86_emulate_ctxt *ctxt, |
348 | | const struct x86_emulate_ops *ops) |
349 | 155k | { |
350 | 155k | uint64_t xcr0; |
351 | 155k | int rc; |
352 | | |
353 | 155k | fail_if(!ops->get_fpu); |
354 | 155k | ASSERT(type != X86EMUL_FPU_none); |
355 | | |
356 | 155k | if ( type < X86EMUL_FPU_ymm || !ops->read_xcr || |
357 | 155k | ops->read_xcr(0, &xcr0, ctxt) != X86EMUL_OKAY ) |
358 | 119k | { |
359 | 119k | ASSERT(!ctxt->event_pending); |
360 | 119k | xcr0 = 0; |
361 | 119k | } |
362 | | |
363 | 155k | switch ( type ) |
364 | 155k | { |
365 | 0 | case X86EMUL_FPU_zmm: |
366 | 0 | if ( !(xcr0 & X86_XCR0_ZMM) || !(xcr0 & X86_XCR0_HI_ZMM) || |
367 | 0 | !(xcr0 & X86_XCR0_OPMASK) ) |
368 | 0 | return X86EMUL_UNHANDLEABLE; |
369 | | /* fall through */ |
370 | 35.9k | case X86EMUL_FPU_ymm: |
371 | 35.9k | if ( !(xcr0 & X86_XCR0_SSE) || !(xcr0 & X86_XCR0_YMM) ) |
372 | 49 | return X86EMUL_UNHANDLEABLE; |
373 | 35.8k | break; |
374 | | |
375 | 35.8k | case X86EMUL_FPU_opmask: |
376 | 255 | if ( !(xcr0 & X86_XCR0_SSE) || !(xcr0 & X86_XCR0_OPMASK) ) |
377 | 255 | return X86EMUL_UNHANDLEABLE; |
378 | 0 | break; |
379 | | |
380 | 119k | default: |
381 | 119k | break; |
382 | 155k | } |
383 | | |
384 | 154k | rc = (ops->get_fpu)(type, ctxt); |
385 | | |
386 | 154k | if ( rc == X86EMUL_OKAY ) |
387 | 154k | { |
388 | 154k | unsigned long cr0; |
389 | | |
390 | 154k | fail_if(type == X86EMUL_FPU_fpu && !ops->put_fpu); |
391 | | |
392 | 154k | fail_if(!ops->read_cr); |
393 | 154k | if ( type >= X86EMUL_FPU_xmm ) |
394 | 75.5k | { |
395 | 75.5k | unsigned long cr4; |
396 | | |
397 | 75.5k | rc = ops->read_cr(4, &cr4, ctxt); |
398 | 75.5k | if ( rc != X86EMUL_OKAY ) |
399 | 0 | return rc; |
400 | 75.5k | generate_exception_if(!(cr4 & ((type == X86EMUL_FPU_xmm) |
401 | 75.5k | ? X86_CR4_OSFXSR : X86_CR4_OSXSAVE)), |
402 | 75.5k | X86_EXC_UD); |
403 | 75.5k | } |
404 | | |
405 | 154k | rc = ops->read_cr(0, &cr0, ctxt); |
406 | 154k | if ( rc != X86EMUL_OKAY ) |
407 | 0 | return rc; |
408 | 154k | if ( type >= X86EMUL_FPU_ymm ) |
409 | 35.8k | { |
410 | | /* Should be unreachable if VEX decoding is working correctly. */ |
411 | 35.8k | ASSERT((cr0 & X86_CR0_PE) && !(ctxt->regs->eflags & X86_EFLAGS_VM)); |
412 | 35.8k | } |
413 | 154k | if ( cr0 & X86_CR0_EM ) |
414 | 5.70k | { |
415 | 5.70k | generate_exception_if(type == X86EMUL_FPU_fpu, X86_EXC_NM); |
416 | 5.70k | generate_exception_if(type == X86EMUL_FPU_mmx, X86_EXC_UD); |
417 | 5.69k | generate_exception_if(type == X86EMUL_FPU_xmm, X86_EXC_UD); |
418 | 5.69k | } |
419 | 154k | generate_exception_if((cr0 & X86_CR0_TS) && |
420 | 154k | (type != X86EMUL_FPU_wait || (cr0 & X86_CR0_MP)), |
421 | 154k | X86_EXC_NM); |
422 | 154k | } |
423 | | |
424 | 155k | done: |
425 | 155k | return rc; |
426 | 154k | } |
427 | | |
428 | | static void put_fpu( |
429 | | enum x86_emulate_fpu_type type, |
430 | | bool failed_late, |
431 | | const struct x86_emulate_state *state, |
432 | | struct x86_emulate_ctxt *ctxt, |
433 | | const struct x86_emulate_ops *ops) |
434 | 1.19M | { |
435 | 1.19M | if ( unlikely(failed_late) && type == X86EMUL_FPU_fpu ) |
436 | 59 | ops->put_fpu(ctxt, X86EMUL_FPU_fpu, NULL); |
437 | 1.19M | else if ( unlikely(type == X86EMUL_FPU_fpu) && !state->fpu_ctrl ) |
438 | 60.3k | { |
439 | 60.3k | struct x86_emul_fpu_aux aux = { |
440 | 60.3k | .ip = ctxt->regs->r(ip), |
441 | 60.3k | .cs = ctxt->regs->cs, |
442 | 60.3k | .op = ((ctxt->opcode & 7) << 8) | state->modrm, |
443 | 60.3k | }; |
444 | 60.3k | struct segment_register sreg; |
445 | | |
446 | 60.3k | if ( ops->read_segment && |
447 | 60.3k | ops->read_segment(x86_seg_cs, &sreg, ctxt) == X86EMUL_OKAY ) |
448 | 51.0k | aux.cs = sreg.sel; |
449 | 60.3k | if ( state->ea.type == OP_MEM ) |
450 | 8.45k | { |
451 | 8.45k | aux.dp = state->ea.mem.off; |
452 | 8.45k | if ( state->ea.mem.seg == x86_seg_cs ) |
453 | 455 | aux.ds = aux.cs; |
454 | 7.99k | else if ( ops->read_segment && |
455 | 7.99k | ops->read_segment(state->ea.mem.seg, &sreg, |
456 | 6.06k | ctxt) == X86EMUL_OKAY ) |
457 | 6.06k | aux.ds = sreg.sel; |
458 | | #ifdef __XEN__ |
459 | | /* |
460 | | * While generally the expectation is that input structures are |
461 | | * fully populated, the selector fields under ctxt->regs normally |
462 | | * aren't set, with the exception of CS and SS for PV domains. |
463 | | * Read the real selector registers for PV, and assert that HVM |
464 | | * invocations always set a properly functioning ->read_segment() |
465 | | * hook. |
466 | | */ |
467 | | else if ( is_pv_vcpu(current) ) |
468 | | switch ( state->ea.mem.seg ) |
469 | | { |
470 | | case x86_seg_ds: aux.ds = read_sreg(ds); break; |
471 | | case x86_seg_es: aux.ds = read_sreg(es); break; |
472 | | case x86_seg_fs: aux.ds = read_sreg(fs); break; |
473 | | case x86_seg_gs: aux.ds = read_sreg(gs); break; |
474 | | case x86_seg_ss: aux.ds = ctxt->regs->ss; break; |
475 | | default: ASSERT_UNREACHABLE(); break; |
476 | | } |
477 | | else |
478 | | ASSERT_UNREACHABLE(); |
479 | | #else |
480 | 1.93k | else |
481 | 1.93k | switch ( state->ea.mem.seg ) |
482 | 1.93k | { |
483 | 955 | case x86_seg_ds: aux.ds = ctxt->regs->ds; break; |
484 | 200 | case x86_seg_es: aux.ds = ctxt->regs->es; break; |
485 | 194 | case x86_seg_fs: aux.ds = ctxt->regs->fs; break; |
486 | 282 | case x86_seg_gs: aux.ds = ctxt->regs->gs; break; |
487 | 307 | case x86_seg_ss: aux.ds = ctxt->regs->ss; break; |
488 | 0 | default: ASSERT_UNREACHABLE(); break; |
489 | 1.93k | } |
490 | 8.45k | #endif |
491 | 8.45k | aux.dval = true; |
492 | 8.45k | } |
493 | 60.3k | ops->put_fpu(ctxt, X86EMUL_FPU_none, &aux); |
494 | 60.3k | } |
495 | 1.13M | else if ( type != X86EMUL_FPU_none && ops->put_fpu ) |
496 | 95.4k | ops->put_fpu(ctxt, X86EMUL_FPU_none, NULL); |
497 | 1.19M | } |
498 | | |
499 | | static inline unsigned long get_loop_count( |
500 | | const struct cpu_user_regs *regs, |
501 | | int ad_bytes) |
502 | 37.1k | { |
503 | 37.1k | return (ad_bytes > 4) ? regs->r(cx) |
504 | 37.1k | : (ad_bytes < 4) ? regs->cx : regs->ecx; |
505 | 37.1k | } |
506 | | |
507 | | static inline void put_loop_count( |
508 | | struct cpu_user_regs *regs, |
509 | | int ad_bytes, |
510 | | unsigned long count) |
511 | 16.1k | { |
512 | 16.1k | if ( ad_bytes == 2 ) |
513 | 4.67k | regs->cx = count; |
514 | 11.5k | else |
515 | 11.5k | regs->r(cx) = ad_bytes == 4 ? (uint32_t)count : count; |
516 | 16.1k | } |
517 | | |
518 | 36.0k | #define get_rep_prefix(extend_si, extend_di) ({ \ |
519 | 36.0k | unsigned long max_reps = 1; \ |
520 | 36.0k | if ( rep_prefix() ) \ |
521 | 36.0k | max_reps = get_loop_count(&_regs, ad_bytes); \ |
522 | 36.0k | if ( max_reps == 0 ) \ |
523 | 36.0k | { \ |
524 | 5.04k | /* \ |
525 | 5.04k | * Skip the instruction if no repetitions are required, but \ |
526 | 5.04k | * zero extend relevant registers first when using 32-bit \ |
527 | 5.04k | * addressing in 64-bit mode. \ |
528 | 5.04k | */ \ |
529 | 5.04k | if ( !amd_like(ctxt) && mode_64bit() && ad_bytes == 4 ) \ |
530 | 5.04k | { \ |
531 | 0 | _regs.r(cx) = 0; \ |
532 | 0 | if ( extend_si ) _regs.r(si) = (uint32_t)_regs.r(si); \ |
533 | 0 | if ( extend_di ) _regs.r(di) = (uint32_t)_regs.r(di); \ |
534 | 0 | } \ |
535 | 5.04k | goto complete_insn; \ |
536 | 5.04k | } \ |
537 | 36.0k | if ( max_reps > 1 && (_regs.eflags & X86_EFLAGS_TF) && \ |
538 | 31.0k | !is_branch_step(ctxt, ops) ) \ |
539 | 31.0k | max_reps = 1; \ |
540 | 31.0k | max_reps; \ |
541 | 31.0k | }) |
542 | | |
543 | | static void __put_rep_prefix( |
544 | | struct cpu_user_regs *int_regs, |
545 | | struct cpu_user_regs *ext_regs, |
546 | | int ad_bytes, |
547 | | unsigned long reps_completed) |
548 | 14.0k | { |
549 | 14.0k | unsigned long ecx = get_loop_count(int_regs, ad_bytes); |
550 | | |
551 | | /* Reduce counter appropriately, and repeat instruction if non-zero. */ |
552 | 14.0k | ecx -= reps_completed; |
553 | 14.0k | if ( ecx != 0 ) |
554 | 12.3k | int_regs->r(ip) = ext_regs->r(ip); |
555 | | |
556 | 14.0k | put_loop_count(int_regs, ad_bytes, ecx); |
557 | 14.0k | } |
558 | | |
559 | 30.2k | #define put_rep_prefix(reps_completed) ({ \ |
560 | 30.2k | if ( rep_prefix() ) \ |
561 | 30.2k | { \ |
562 | 14.0k | __put_rep_prefix(&_regs, ctxt->regs, ad_bytes, reps_completed); \ |
563 | 14.0k | if ( unlikely(rc == X86EMUL_EXCEPTION) ) \ |
564 | 14.0k | goto complete_insn; \ |
565 | 14.0k | } \ |
566 | 30.2k | }) |
567 | | |
568 | | /* Clip maximum repetitions so that the index register at most just wraps. */ |
569 | 26.0k | #define truncate_ea_and_reps(ea, reps, bytes_per_rep) ({ \ |
570 | 26.0k | unsigned long todo__, ea__ = truncate_ea(ea); \ |
571 | 26.0k | if ( !(_regs.eflags & X86_EFLAGS_DF) ) \ |
572 | 26.0k | todo__ = truncate_ea(-ea__) / (bytes_per_rep); \ |
573 | 26.0k | else if ( truncate_ea(ea__ + (bytes_per_rep) - 1) < ea__ ) \ |
574 | 11.9k | todo__ = 1; \ |
575 | 11.9k | else \ |
576 | 11.9k | todo__ = ea__ / (bytes_per_rep) + 1; \ |
577 | 26.0k | if ( !todo__ ) \ |
578 | 26.0k | (reps) = 1; \ |
579 | 26.0k | else if ( todo__ < (reps) ) \ |
580 | 24.9k | (reps) = todo__; \ |
581 | 26.0k | ea__; \ |
582 | 26.0k | }) |
583 | | |
584 | | /* |
585 | | * Unsigned multiplication with double-word result. |
586 | | * IN: Multiplicand=m[0], Multiplier=m[1] |
587 | | * OUT: Return CF/OF (overflow status); Result=m[1]:m[0] |
588 | | */ |
589 | | static bool mul_dbl(unsigned long m[2]) |
590 | 533 | { |
591 | 533 | bool rc; |
592 | | |
593 | 533 | asm ( "mul %1" ASM_FLAG_OUT(, "; seto %2") |
594 | 533 | : "+a" (m[0]), "+d" (m[1]), ASM_FLAG_OUT("=@cco", "=qm") (rc) ); |
595 | | |
596 | 533 | return rc; |
597 | 533 | } |
598 | | |
599 | | /* |
600 | | * Signed multiplication with double-word result. |
601 | | * IN: Multiplicand=m[0], Multiplier=m[1] |
602 | | * OUT: Return CF/OF (overflow status); Result=m[1]:m[0] |
603 | | */ |
604 | | static bool imul_dbl(unsigned long m[2]) |
605 | 446 | { |
606 | 446 | bool rc; |
607 | | |
608 | 446 | asm ( "imul %1" ASM_FLAG_OUT(, "; seto %2") |
609 | 446 | : "+a" (m[0]), "+d" (m[1]), ASM_FLAG_OUT("=@cco", "=qm") (rc) ); |
610 | | |
611 | 446 | return rc; |
612 | 446 | } |
613 | | |
614 | | /* |
615 | | * Unsigned division of double-word dividend. |
616 | | * IN: Dividend=u[1]:u[0], Divisor=v |
617 | | * OUT: Return 1: #DE |
618 | | * Return 0: Quotient=u[0], Remainder=u[1] |
619 | | */ |
620 | | static bool div_dbl(unsigned long u[2], unsigned long v) |
621 | 2.85k | { |
622 | 2.85k | if ( (v == 0) || (u[1] >= v) ) |
623 | 144 | return 1; |
624 | 2.70k | asm ( "div"__OS" %2" : "+a" (u[0]), "+d" (u[1]) : "rm" (v) ); |
625 | 2.70k | return 0; |
626 | 2.85k | } |
627 | | |
628 | | /* |
629 | | * Signed division of double-word dividend. |
630 | | * IN: Dividend=u[1]:u[0], Divisor=v |
631 | | * OUT: Return 1: #DE |
632 | | * Return 0: Quotient=u[0], Remainder=u[1] |
633 | | * NB. We don't use idiv directly as it's moderately hard to work out |
634 | | * ahead of time whether it will #DE, which we cannot allow to happen. |
635 | | */ |
636 | | static bool idiv_dbl(unsigned long u[2], unsigned long v) |
637 | 1.79k | { |
638 | 1.79k | bool negu = (long)u[1] < 0, negv = (long)v < 0; |
639 | | |
640 | | /* u = abs(u) */ |
641 | 1.79k | if ( negu ) |
642 | 497 | { |
643 | 497 | u[1] = ~u[1]; |
644 | 497 | if ( (u[0] = -u[0]) == 0 ) |
645 | 69 | u[1]++; |
646 | 497 | } |
647 | | |
648 | | /* abs(u) / abs(v) */ |
649 | 1.79k | if ( div_dbl(u, negv ? -v : v) ) |
650 | 87 | return 1; |
651 | | |
652 | | /* Remainder has same sign as dividend. It cannot overflow. */ |
653 | 1.70k | if ( negu ) |
654 | 443 | u[1] = -u[1]; |
655 | | |
656 | | /* Quotient is overflowed if sign bit is set. */ |
657 | 1.70k | if ( negu ^ negv ) |
658 | 858 | { |
659 | 858 | if ( (long)u[0] >= 0 ) |
660 | 748 | u[0] = -u[0]; |
661 | 110 | else if ( (u[0] << 1) != 0 ) /* == 0x80...0 is okay */ |
662 | 76 | return 1; |
663 | 858 | } |
664 | 846 | else if ( (long)u[0] < 0 ) |
665 | 70 | return 1; |
666 | | |
667 | 1.55k | return 0; |
668 | 1.70k | } |
669 | | |
670 | | static bool |
671 | | test_cc( |
672 | | unsigned int condition, unsigned int flags) |
673 | 17.4k | { |
674 | 17.4k | int rc = 0; |
675 | | |
676 | 17.4k | switch ( (condition & 15) >> 1 ) |
677 | 17.4k | { |
678 | 2.84k | case 0: /* o */ |
679 | 2.84k | rc |= (flags & X86_EFLAGS_OF); |
680 | 2.84k | break; |
681 | 2.06k | case 1: /* b/c/nae */ |
682 | 2.06k | rc |= (flags & X86_EFLAGS_CF); |
683 | 2.06k | break; |
684 | 2.04k | case 2: /* z/e */ |
685 | 2.04k | rc |= (flags & X86_EFLAGS_ZF); |
686 | 2.04k | break; |
687 | 2.15k | case 3: /* be/na */ |
688 | 2.15k | rc |= (flags & (X86_EFLAGS_CF | X86_EFLAGS_ZF)); |
689 | 2.15k | break; |
690 | 2.42k | case 4: /* s */ |
691 | 2.42k | rc |= (flags & X86_EFLAGS_SF); |
692 | 2.42k | break; |
693 | 1.70k | case 5: /* p/pe */ |
694 | 1.70k | rc |= (flags & X86_EFLAGS_PF); |
695 | 1.70k | break; |
696 | 2.27k | case 7: /* le/ng */ |
697 | 2.27k | rc |= (flags & X86_EFLAGS_ZF); |
698 | | /* fall through */ |
699 | 4.17k | case 6: /* l/nge */ |
700 | 4.17k | rc |= (!(flags & X86_EFLAGS_SF) != !(flags & X86_EFLAGS_OF)); |
701 | 4.17k | break; |
702 | 17.4k | } |
703 | | |
704 | | /* Odd condition identifiers (lsb == 1) have inverted sense. */ |
705 | 17.4k | return (!!rc ^ (condition & 1)); |
706 | 17.4k | } |
707 | | |
708 | | int x86emul_get_cpl(struct x86_emulate_ctxt *ctxt, |
709 | | const struct x86_emulate_ops *ops) |
710 | 621k | { |
711 | 621k | struct segment_register reg; |
712 | | |
713 | 621k | if ( ctxt->regs->eflags & X86_EFLAGS_VM ) |
714 | 82.9k | return 3; |
715 | | |
716 | 538k | if ( (ops->read_segment == NULL) || |
717 | 538k | ops->read_segment(x86_seg_ss, ®, ctxt) ) |
718 | 90.0k | return -1; |
719 | | |
720 | 448k | return reg.dpl; |
721 | 538k | } |
722 | | |
723 | | static int |
724 | | _mode_iopl( |
725 | | struct x86_emulate_ctxt *ctxt, |
726 | | const struct x86_emulate_ops *ops) |
727 | 19.8k | { |
728 | 19.8k | int cpl = x86emul_get_cpl(ctxt, ops); |
729 | 19.8k | if ( cpl == -1 ) |
730 | 15 | return -1; |
731 | 19.8k | return cpl <= MASK_EXTR(ctxt->regs->eflags, X86_EFLAGS_IOPL); |
732 | 19.8k | } |
733 | | |
734 | 19.8k | #define mode_iopl() ({ \ |
735 | 19.8k | int _iopl = _mode_iopl(ctxt, ops); \ |
736 | 19.8k | fail_if(_iopl < 0); \ |
737 | 19.8k | _iopl; \ |
738 | 19.8k | }) |
739 | | #define mode_vif() ({ \ |
740 | | cr4 = 0; \ |
741 | | if ( ops->read_cr && x86emul_get_cpl(ctxt, ops) == 3 ) \ |
742 | | { \ |
743 | | rc = ops->read_cr(4, &cr4, ctxt); \ |
744 | | if ( rc != X86EMUL_OKAY ) goto done; \ |
745 | | } \ |
746 | | !!(cr4 & (_regs.eflags & X86_EFLAGS_VM ? X86_CR4_VME : X86_CR4_PVI)); \ |
747 | | }) |
748 | | |
749 | | static int ioport_access_check( |
750 | | unsigned int first_port, |
751 | | unsigned int bytes, |
752 | | struct x86_emulate_ctxt *ctxt, |
753 | | const struct x86_emulate_ops *ops) |
754 | 15.1k | { |
755 | 15.1k | unsigned long iobmp; |
756 | 15.1k | struct segment_register tr; |
757 | 15.1k | int rc = X86EMUL_OKAY; |
758 | | |
759 | 15.1k | if ( !(ctxt->regs->eflags & X86_EFLAGS_VM) && mode_iopl() ) |
760 | 14.5k | return X86EMUL_OKAY; |
761 | | |
762 | 588 | fail_if(ops->read_segment == NULL); |
763 | | /* |
764 | | * X86EMUL_DONE coming back here may be used to defer the port |
765 | | * permission check to the respective ioport hook. |
766 | | */ |
767 | 586 | if ( (rc = ops->read_segment(x86_seg_tr, &tr, ctxt)) != 0 ) |
768 | 0 | return rc == X86EMUL_DONE ? X86EMUL_OKAY : rc; |
769 | | |
770 | | /* Ensure the TSS has an io-bitmap-offset field. */ |
771 | 586 | generate_exception_if(tr.type != 0xb, X86_EXC_GP, 0); |
772 | | |
773 | 561 | switch ( rc = read_ulong(x86_seg_tr, 0x66, &iobmp, 2, ctxt, ops) ) |
774 | 561 | { |
775 | 538 | case X86EMUL_OKAY: |
776 | 538 | break; |
777 | | |
778 | 11 | case X86EMUL_EXCEPTION: |
779 | 11 | generate_exception_if(!ctxt->event_pending, X86_EXC_GP, 0); |
780 | | /* fallthrough */ |
781 | | |
782 | 2 | default: |
783 | 2 | return rc; |
784 | 561 | } |
785 | | |
786 | | /* Read two bytes including byte containing first port. */ |
787 | 538 | switch ( rc = read_ulong(x86_seg_tr, iobmp + first_port / 8, |
788 | 538 | &iobmp, 2, ctxt, ops) ) |
789 | 538 | { |
790 | 515 | case X86EMUL_OKAY: |
791 | 515 | break; |
792 | | |
793 | 22 | case X86EMUL_EXCEPTION: |
794 | 22 | generate_exception_if(!ctxt->event_pending, X86_EXC_GP, 0); |
795 | | /* fallthrough */ |
796 | | |
797 | 2 | default: |
798 | 2 | return rc; |
799 | 538 | } |
800 | | |
801 | 515 | generate_exception_if(iobmp & (((1 << bytes) - 1) << (first_port & 7)), |
802 | 515 | X86_EXC_GP, 0); |
803 | | |
804 | 584 | done: |
805 | 584 | return rc; |
806 | 515 | } |
807 | | |
808 | | static int |
809 | | realmode_load_seg( |
810 | | enum x86_segment seg, |
811 | | uint16_t sel, |
812 | | struct segment_register *sreg, |
813 | | struct x86_emulate_ctxt *ctxt, |
814 | | const struct x86_emulate_ops *ops) |
815 | 3.13k | { |
816 | 3.13k | int rc; |
817 | | |
818 | 3.13k | if ( !ops->read_segment ) |
819 | 1 | return X86EMUL_UNHANDLEABLE; |
820 | | |
821 | 3.12k | if ( (rc = ops->read_segment(seg, sreg, ctxt)) == X86EMUL_OKAY ) |
822 | 3.12k | { |
823 | 3.12k | sreg->sel = sel; |
824 | 3.12k | sreg->base = (uint32_t)sel << 4; |
825 | 3.12k | } |
826 | | |
827 | 3.12k | return rc; |
828 | 3.13k | } |
829 | | |
830 | | /* |
831 | | * Passing in x86_seg_none means |
832 | | * - suppress any exceptions other than #PF, |
833 | | * - don't commit any state. |
834 | | */ |
835 | | static int |
836 | | protmode_load_seg( |
837 | | enum x86_segment seg, |
838 | | uint16_t sel, bool is_ret, |
839 | | struct segment_register *sreg, |
840 | | struct x86_emulate_ctxt *ctxt, |
841 | | const struct x86_emulate_ops *ops) |
842 | 7.07k | { |
843 | 7.07k | const struct cpu_policy *cp = ctxt->cpu_policy; |
844 | 7.07k | enum x86_segment sel_seg = (sel & 4) ? x86_seg_ldtr : x86_seg_gdtr; |
845 | 7.07k | struct { uint32_t a, b; } desc, desc_hi = {}; |
846 | 7.07k | uint8_t dpl, rpl; |
847 | 7.07k | int cpl = x86emul_get_cpl(ctxt, ops); |
848 | 7.07k | uint32_t a_flag = 0x100; |
849 | 7.07k | int rc, fault_type = X86_EXC_GP; |
850 | | |
851 | 7.07k | if ( cpl < 0 ) |
852 | 5 | return X86EMUL_UNHANDLEABLE; |
853 | | |
854 | | /* NULL selector? */ |
855 | 7.07k | if ( (sel & 0xfffc) == 0 ) |
856 | 1.49k | { |
857 | 1.49k | switch ( seg ) |
858 | 1.49k | { |
859 | 68 | case x86_seg_ss: |
860 | 68 | if ( mode_64bit() && (cpl != 3) && (cpl == sel) ) |
861 | 1.46k | default: |
862 | 1.46k | break; |
863 | | /* fall through */ |
864 | 35 | case x86_seg_cs: |
865 | 36 | case x86_seg_tr: |
866 | 36 | goto raise_exn; |
867 | 1.49k | } |
868 | 1.46k | if ( seg == x86_seg_none || !_amd_like(cp) || vcpu_has_nscb() || |
869 | 1.46k | !ops->read_segment || |
870 | 1.46k | ops->read_segment(seg, sreg, ctxt) != X86EMUL_OKAY ) |
871 | 535 | memset(sreg, 0, sizeof(*sreg)); |
872 | 927 | else |
873 | 927 | sreg->attr = 0; |
874 | 1.46k | sreg->sel = sel; |
875 | | |
876 | | /* Since CPL == SS.DPL, we need to put back DPL. */ |
877 | 1.46k | if ( seg == x86_seg_ss ) |
878 | 64 | sreg->dpl = sel; |
879 | | |
880 | 1.46k | return X86EMUL_OKAY; |
881 | 1.49k | } |
882 | | |
883 | | /* System segment descriptors must reside in the GDT. */ |
884 | 5.57k | if ( is_x86_system_segment(seg) && (sel & 4) ) |
885 | 1 | goto raise_exn; |
886 | | |
887 | 5.57k | switch ( rc = ops->read(sel_seg, sel & 0xfff8, &desc, sizeof(desc), ctxt) ) |
888 | 5.57k | { |
889 | 5.49k | case X86EMUL_OKAY: |
890 | 5.49k | break; |
891 | | |
892 | 78 | case X86EMUL_EXCEPTION: |
893 | 78 | if ( !ctxt->event_pending ) |
894 | 64 | goto raise_exn; |
895 | | /* fallthrough */ |
896 | | |
897 | 16 | default: |
898 | 16 | return rc; |
899 | 5.57k | } |
900 | | |
901 | | /* System segments must have S flag == 0. */ |
902 | 5.49k | if ( is_x86_system_segment(seg) && (desc.b & (1u << 12)) ) |
903 | 1 | goto raise_exn; |
904 | | /* User segments must have S flag == 1. */ |
905 | 5.49k | if ( is_x86_user_segment(seg) && !(desc.b & (1u << 12)) ) |
906 | 28 | goto raise_exn; |
907 | | |
908 | 5.46k | dpl = (desc.b >> 13) & 3; |
909 | 5.46k | rpl = sel & 3; |
910 | | |
911 | 5.46k | switch ( seg ) |
912 | 5.46k | { |
913 | 484 | case x86_seg_cs: |
914 | | /* Code segment? */ |
915 | 484 | if ( !(desc.b & (1u<<11)) ) |
916 | 5 | goto raise_exn; |
917 | 479 | if ( is_ret |
918 | 479 | ? /* |
919 | | * Really rpl < cpl, but our sole caller doesn't handle |
920 | | * privilege level changes. |
921 | | */ |
922 | 223 | rpl != cpl || (desc.b & (1 << 10) ? dpl > rpl : dpl != rpl) |
923 | 479 | : desc.b & (1 << 10) |
924 | | /* Conforming segment: check DPL against CPL. */ |
925 | 256 | ? dpl > cpl |
926 | | /* Non-conforming segment: check RPL and DPL against CPL. */ |
927 | 256 | : rpl > cpl || dpl != cpl ) |
928 | 6 | goto raise_exn; |
929 | | /* |
930 | | * 64-bit code segments (L bit set) must have D bit clear. |
931 | | * Experimentally in long mode, the L and D bits are checked before |
932 | | * the Present bit. |
933 | | */ |
934 | 473 | if ( ctxt->lma && (desc.b & (1 << 21)) && (desc.b & (1 << 22)) ) |
935 | 14 | goto raise_exn; |
936 | 459 | sel = (sel ^ rpl) | cpl; |
937 | 459 | break; |
938 | 72 | case x86_seg_ss: |
939 | | /* Writable data segment? */ |
940 | 72 | if ( (desc.b & (5u<<9)) != (1u<<9) ) |
941 | 1 | goto raise_exn; |
942 | 71 | if ( (dpl != cpl) || (dpl != rpl) ) |
943 | 2 | goto raise_exn; |
944 | 69 | break; |
945 | 116 | case x86_seg_ldtr: |
946 | | /* LDT system segment? */ |
947 | 116 | if ( (desc.b & (15u<<8)) != (2u<<8) ) |
948 | 2 | goto raise_exn; |
949 | 114 | a_flag = 0; |
950 | 114 | break; |
951 | 55 | case x86_seg_tr: |
952 | | /* Available TSS system segment? */ |
953 | 55 | if ( (desc.b & (15u<<8)) != (9u<<8) ) |
954 | 4 | goto raise_exn; |
955 | 51 | a_flag = 0x200; /* busy flag */ |
956 | 51 | break; |
957 | 161 | default: |
958 | | /* Readable code or data segment? */ |
959 | 161 | if ( (desc.b & (5u<<9)) == (4u<<9) ) |
960 | 1 | goto raise_exn; |
961 | | /* Non-conforming segment: check DPL against RPL and CPL. */ |
962 | 160 | if ( ((desc.b & (6u<<9)) != (6u<<9)) && |
963 | 160 | ((dpl < cpl) || (dpl < rpl)) ) |
964 | 2 | goto raise_exn; |
965 | 158 | break; |
966 | 4.57k | case x86_seg_none: |
967 | | /* Non-conforming segment: check DPL against RPL and CPL. */ |
968 | 4.57k | if ( ((desc.b & (0x1c << 8)) != (0x1c << 8)) && |
969 | 4.57k | ((dpl < cpl) || (dpl < rpl)) ) |
970 | 770 | return X86EMUL_EXCEPTION; |
971 | 3.80k | a_flag = 0; |
972 | 3.80k | break; |
973 | 5.46k | } |
974 | | |
975 | | /* Segment present in memory? */ |
976 | 4.65k | if ( !(desc.b & (1 << 15)) && seg != x86_seg_none ) |
977 | 9 | { |
978 | 9 | fault_type = seg != x86_seg_ss ? X86_EXC_NP : X86_EXC_SS; |
979 | 9 | goto raise_exn; |
980 | 9 | } |
981 | | |
982 | 4.64k | if ( !is_x86_user_segment(seg) ) |
983 | 3.97k | { |
984 | | /* |
985 | | * Whether to use an 8- or 16-byte descriptor in long mode depends |
986 | | * on sub-mode, descriptor type, and vendor: |
987 | | * - non-system descriptors are always 8-byte ones, |
988 | | * - system descriptors are always 16-byte ones in 64-bit mode, |
989 | | * - (call) gates are always 16-byte ones, |
990 | | * - other system descriptors in compatibility mode have |
991 | | * - only their low 8-byte bytes read on Intel, |
992 | | * - all 16 bytes read with the high 8 bytes ignored on AMD. |
993 | | */ |
994 | 3.97k | bool wide = desc.b & 0x1000 |
995 | 3.97k | ? false : (desc.b & 0xf00) != 0xc00 && !_amd_like(cp) |
996 | 2.82k | ? mode_64bit() : ctxt->lma; |
997 | | |
998 | 3.97k | if ( wide ) |
999 | 617 | { |
1000 | 617 | switch ( rc = ops->read(sel_seg, (sel & 0xfff8) + 8, |
1001 | 617 | &desc_hi, sizeof(desc_hi), ctxt) ) |
1002 | 617 | { |
1003 | 602 | case X86EMUL_OKAY: |
1004 | 602 | break; |
1005 | | |
1006 | 14 | case X86EMUL_EXCEPTION: |
1007 | 14 | if ( !ctxt->event_pending ) |
1008 | 13 | goto raise_exn; |
1009 | | /* fall through */ |
1010 | 2 | default: |
1011 | 2 | return rc; |
1012 | 617 | } |
1013 | 602 | if ( !mode_64bit() && _amd_like(cp) && (desc.b & 0xf00) != 0xc00 ) |
1014 | 67 | desc_hi.b = desc_hi.a = 0; |
1015 | 602 | if ( (desc_hi.b & 0x00001f00) || |
1016 | 602 | (seg != x86_seg_none && |
1017 | 285 | !is_canonical_address((uint64_t)desc_hi.a << 32)) ) |
1018 | 337 | goto raise_exn; |
1019 | 602 | } |
1020 | 3.97k | } |
1021 | | |
1022 | | /* Ensure Accessed flag is set. */ |
1023 | 4.29k | if ( a_flag && !(desc.b & a_flag) ) |
1024 | 226 | { |
1025 | 226 | uint32_t new_desc_b = desc.b | a_flag; |
1026 | | |
1027 | 226 | fail_if(!ops->cmpxchg); |
1028 | 225 | switch ( (rc = ops->cmpxchg(sel_seg, (sel & 0xfff8) + 4, &desc.b, |
1029 | 225 | &new_desc_b, sizeof(desc.b), true, ctxt)) ) |
1030 | 225 | { |
1031 | 210 | case X86EMUL_OKAY: |
1032 | 210 | break; |
1033 | | |
1034 | 14 | case X86EMUL_EXCEPTION: |
1035 | 14 | if ( !ctxt->event_pending ) |
1036 | 0 | goto raise_exn; |
1037 | | /* fallthrough */ |
1038 | | |
1039 | 15 | default: |
1040 | 15 | return rc; |
1041 | | |
1042 | 0 | case X86EMUL_CMPXCHG_FAILED: |
1043 | 0 | return X86EMUL_RETRY; |
1044 | 225 | } |
1045 | | |
1046 | | /* Force the Accessed flag in our local copy. */ |
1047 | 210 | desc.b = new_desc_b; |
1048 | 210 | } |
1049 | | |
1050 | 4.28k | sreg->base = (((uint64_t)desc_hi.a << 32) | |
1051 | 4.28k | ((desc.b << 0) & 0xff000000u) | |
1052 | 4.28k | ((desc.b << 16) & 0x00ff0000u) | |
1053 | 4.28k | ((desc.a >> 16) & 0x0000ffffu)); |
1054 | 4.28k | sreg->attr = (((desc.b >> 8) & 0x00ffu) | |
1055 | 4.28k | ((desc.b >> 12) & 0x0f00u)); |
1056 | 4.28k | sreg->limit = (desc.b & 0x000f0000u) | (desc.a & 0x0000ffffu); |
1057 | 4.28k | if ( sreg->g ) |
1058 | 551 | sreg->limit = (sreg->limit << 12) | 0xfffu; |
1059 | 4.28k | sreg->sel = sel; |
1060 | 4.28k | return X86EMUL_OKAY; |
1061 | | |
1062 | 526 | raise_exn: |
1063 | 526 | generate_exception_if(seg != x86_seg_none, fault_type, sel & 0xfffc); |
1064 | 371 | rc = X86EMUL_EXCEPTION; |
1065 | 527 | done: |
1066 | 527 | return rc; |
1067 | 371 | } |
1068 | | |
1069 | | static int |
1070 | | load_seg( |
1071 | | enum x86_segment seg, |
1072 | | uint16_t sel, bool is_ret, |
1073 | | struct segment_register *sreg, |
1074 | | struct x86_emulate_ctxt *ctxt, |
1075 | | const struct x86_emulate_ops *ops) |
1076 | 5.05k | { |
1077 | 5.05k | struct segment_register reg; |
1078 | 5.05k | int rc; |
1079 | | |
1080 | 5.05k | if ( !ops->write_segment ) |
1081 | 5 | return X86EMUL_UNHANDLEABLE; |
1082 | | |
1083 | 5.04k | if ( !sreg ) |
1084 | 3.22k | sreg = ® |
1085 | | |
1086 | 5.04k | if ( in_protmode(ctxt, ops) ) |
1087 | 1.91k | rc = protmode_load_seg(seg, sel, is_ret, sreg, ctxt, ops); |
1088 | 3.13k | else |
1089 | 3.13k | rc = realmode_load_seg(seg, sel, sreg, ctxt, ops); |
1090 | | |
1091 | 5.04k | if ( !rc && sreg == ® ) |
1092 | 3.13k | rc = ops->write_segment(seg, sreg, ctxt); |
1093 | | |
1094 | 5.04k | return rc; |
1095 | 5.05k | } |
1096 | | |
1097 | | /* Map GPRs by ModRM encoding to their offset within struct cpu_user_regs. */ |
1098 | | const uint8_t cpu_user_regs_gpr_offsets[] = { |
1099 | | offsetof(struct cpu_user_regs, r(ax)), |
1100 | | offsetof(struct cpu_user_regs, r(cx)), |
1101 | | offsetof(struct cpu_user_regs, r(dx)), |
1102 | | offsetof(struct cpu_user_regs, r(bx)), |
1103 | | offsetof(struct cpu_user_regs, r(sp)), |
1104 | | offsetof(struct cpu_user_regs, r(bp)), |
1105 | | offsetof(struct cpu_user_regs, r(si)), |
1106 | | offsetof(struct cpu_user_regs, r(di)), |
1107 | | #ifdef __x86_64__ |
1108 | | offsetof(struct cpu_user_regs, r8), |
1109 | | offsetof(struct cpu_user_regs, r9), |
1110 | | offsetof(struct cpu_user_regs, r10), |
1111 | | offsetof(struct cpu_user_regs, r11), |
1112 | | offsetof(struct cpu_user_regs, r12), |
1113 | | offsetof(struct cpu_user_regs, r13), |
1114 | | offsetof(struct cpu_user_regs, r14), |
1115 | | offsetof(struct cpu_user_regs, r15), |
1116 | | #endif |
1117 | | }; |
1118 | | |
1119 | | static void *_decode_gpr( |
1120 | | struct cpu_user_regs *regs, unsigned int modrm_reg, bool legacy) |
1121 | 205k | { |
1122 | 205k | static const uint8_t byte_reg_offsets[] = { |
1123 | 205k | offsetof(struct cpu_user_regs, al), |
1124 | 205k | offsetof(struct cpu_user_regs, cl), |
1125 | 205k | offsetof(struct cpu_user_regs, dl), |
1126 | 205k | offsetof(struct cpu_user_regs, bl), |
1127 | 205k | offsetof(struct cpu_user_regs, ah), |
1128 | 205k | offsetof(struct cpu_user_regs, ch), |
1129 | 205k | offsetof(struct cpu_user_regs, dh), |
1130 | 205k | offsetof(struct cpu_user_regs, bh), |
1131 | 205k | }; |
1132 | | |
1133 | 205k | if ( !legacy ) |
1134 | 128k | return decode_gpr(regs, modrm_reg); |
1135 | | |
1136 | | /* Check that the array is a power of two. */ |
1137 | 77.5k | BUILD_BUG_ON(ARRAY_SIZE(byte_reg_offsets) & |
1138 | 77.5k | (ARRAY_SIZE(byte_reg_offsets) - 1)); |
1139 | | |
1140 | 77.5k | ASSERT(modrm_reg < ARRAY_SIZE(byte_reg_offsets)); |
1141 | | |
1142 | | /* Note that this also acts as array_access_nospec() stand-in. */ |
1143 | 77.5k | modrm_reg &= ARRAY_SIZE(byte_reg_offsets) - 1; |
1144 | | |
1145 | 77.5k | return (void *)regs + byte_reg_offsets[modrm_reg]; |
1146 | 77.5k | } |
1147 | | |
1148 | | static unsigned long *decode_vex_gpr( |
1149 | | unsigned int vex_reg, struct cpu_user_regs *regs, |
1150 | | const struct x86_emulate_ctxt *ctxt) |
1151 | 1.36k | { |
1152 | 1.36k | return decode_gpr(regs, ~vex_reg & (mode_64bit() ? 0xf : 7)); |
1153 | 1.36k | } |
1154 | | |
1155 | 9 | #define avx512_vlen_check(lig) do { \ |
1156 | 9 | switch ( evex.lr ) \ |
1157 | 9 | { \ |
1158 | 2 | default: \ |
1159 | 2 | generate_exception(X86_EXC_UD); \ |
1160 | 2 | case 2: \ |
1161 | 2 | break; \ |
1162 | 5 | case 0: case 1: \ |
1163 | 5 | if ( !(lig) ) \ |
1164 | 5 | host_and_vcpu_must_have(avx512vl); \ |
1165 | 5 | break; \ |
1166 | 9 | } \ |
1167 | 9 | } while ( false ) |
1168 | | |
1169 | | static bool is_branch_step(struct x86_emulate_ctxt *ctxt, |
1170 | | const struct x86_emulate_ops *ops) |
1171 | 164k | { |
1172 | 164k | uint64_t debugctl; |
1173 | 164k | int rc = X86EMUL_UNHANDLEABLE; |
1174 | | |
1175 | 164k | if ( !ops->read_msr || |
1176 | 164k | (rc = ops->read_msr(MSR_IA32_DEBUGCTLMSR, &debugctl, |
1177 | 107k | ctxt)) != X86EMUL_OKAY ) |
1178 | 56.8k | { |
1179 | 56.8k | if ( rc == X86EMUL_EXCEPTION ) |
1180 | 0 | x86_emul_reset_event(ctxt); |
1181 | 56.8k | debugctl = 0; |
1182 | 56.8k | } |
1183 | | |
1184 | 164k | return debugctl & IA32_DEBUGCTLMSR_BTF; |
1185 | 164k | } |
1186 | | |
1187 | | static void adjust_bnd(struct x86_emulate_ctxt *ctxt, |
1188 | | const struct x86_emulate_ops *ops, enum vex_pfx pfx) |
1189 | 14.6k | { |
1190 | 14.6k | uint64_t xcr0, bndcfg; |
1191 | 14.6k | int rc; |
1192 | | |
1193 | 14.6k | if ( pfx == vex_f2 || !cpu_has_mpx || !vcpu_has_mpx() ) |
1194 | 14.6k | return; |
1195 | | |
1196 | 0 | if ( !ops->read_xcr || ops->read_xcr(0, &xcr0, ctxt) != X86EMUL_OKAY || |
1197 | 0 | !(xcr0 & X86_XCR0_BNDREGS) || !(xcr0 & X86_XCR0_BNDCSR) ) |
1198 | 0 | { |
1199 | 0 | ASSERT(!ctxt->event_pending); |
1200 | 0 | return; |
1201 | 0 | } |
1202 | | |
1203 | 0 | if ( !mode_ring0() ) |
1204 | 0 | bndcfg = read_bndcfgu(); |
1205 | 0 | else if ( !ops->read_msr || |
1206 | 0 | (rc = ops->read_msr(MSR_IA32_BNDCFGS, &bndcfg, |
1207 | 0 | ctxt)) != X86EMUL_OKAY ) |
1208 | 0 | { |
1209 | 0 | if ( rc == X86EMUL_EXCEPTION ) |
1210 | 0 | x86_emul_reset_event(ctxt); |
1211 | 0 | return; |
1212 | 0 | } |
1213 | 0 | if ( (bndcfg & IA32_BNDCFGS_ENABLE) && !(bndcfg & IA32_BNDCFGS_PRESERVE) ) |
1214 | 0 | { |
1215 | | /* |
1216 | | * Using BNDMK or any other MPX instruction here is pointless, as |
1217 | | * we run with MPX disabled ourselves, and hence they're all no-ops. |
1218 | | * Therefore we have two ways to clear BNDn: Enable MPX temporarily |
1219 | | * (in which case executing any suitable non-prefixed branch |
1220 | | * instruction would do), or use XRSTOR. |
1221 | | */ |
1222 | 0 | xstate_set_init(X86_XCR0_BNDREGS); |
1223 | 0 | } |
1224 | 0 | done:; |
1225 | 0 | } |
1226 | | |
1227 | | int cf_check x86emul_unhandleable_rw( |
1228 | | enum x86_segment seg, |
1229 | | unsigned long offset, |
1230 | | void *p_data, |
1231 | | unsigned int bytes, |
1232 | | struct x86_emulate_ctxt *ctxt) |
1233 | 0 | { |
1234 | 0 | return X86EMUL_UNHANDLEABLE; |
1235 | 0 | } |
1236 | | |
1237 | | /* Helper definitions. */ |
1238 | 711k | #define op_bytes (state->op_bytes) |
1239 | 39.2k | #define ad_bytes (state->ad_bytes) |
1240 | 67.6k | #define ext (state->ext) |
1241 | 90.4k | #define modrm (state->modrm) |
1242 | | #define modrm_mod (state->modrm_mod) |
1243 | 205k | #define modrm_reg (state->modrm_reg) |
1244 | 131k | #define modrm_rm (state->modrm_rm) |
1245 | 385k | #define rex_prefix (state->rex_prefix) |
1246 | 102k | #define lock_prefix (state->lock_prefix) |
1247 | 428k | #define vex (state->vex) |
1248 | 1.01M | #define evex (state->evex) |
1249 | 82.8k | #define evex_encoded() (evex.mbs) |
1250 | 1.95M | #define ea (state->ea) |
1251 | | |
1252 | | /* Undo DEBUG wrapper. */ |
1253 | | #undef x86_emulate |
1254 | | |
1255 | | int |
1256 | | x86_emulate( |
1257 | | struct x86_emulate_ctxt *ctxt, |
1258 | | const struct x86_emulate_ops *ops) |
1259 | 613k | { |
1260 | | /* Shadow copy of register state. Committed on successful emulation. */ |
1261 | 613k | struct cpu_user_regs _regs = *ctxt->regs; |
1262 | 613k | const struct cpu_policy *__maybe_unused cp = ctxt->cpu_policy; |
1263 | 613k | struct x86_emulate_state state; |
1264 | 613k | int rc; |
1265 | 613k | uint8_t b, d, *opc = NULL; |
1266 | 613k | unsigned int first_byte = 0, elem_bytes, insn_bytes = 0; |
1267 | 613k | uint64_t op_mask = ~0ULL; |
1268 | 613k | bool singlestep = (_regs.eflags & X86_EFLAGS_TF) && |
1269 | 613k | !is_branch_step(ctxt, ops); |
1270 | 613k | bool sfence = false, fault_suppression = false; |
1271 | 613k | struct operand src = { .reg = PTR_POISON }; |
1272 | 613k | struct operand dst = { .reg = PTR_POISON }; |
1273 | 613k | unsigned long cr4; |
1274 | 613k | enum x86_emulate_fpu_type fpu_type = X86EMUL_FPU_none; |
1275 | 613k | struct x86_emulate_stub stub = {}; |
1276 | 613k | DECLARE_ALIGNED(mmval_t, mmval); |
1277 | 613k | struct stub_exn stub_exn = {}; |
1278 | | |
1279 | 613k | ASSERT(ops->read); |
1280 | | |
1281 | 613k | init_context(ctxt); |
1282 | | |
1283 | 613k | generate_exception_if((mode_vif() && |
1284 | 613k | (_regs.eflags & X86_EFLAGS_VIF) && |
1285 | 613k | (_regs.eflags & X86_EFLAGS_VIP)), |
1286 | 613k | X86_EXC_GP, 0); |
1287 | | |
1288 | 613k | rc = x86emul_decode(&state, ctxt, ops); |
1289 | 613k | if ( rc != X86EMUL_OKAY ) |
1290 | 10.3k | return rc; |
1291 | | |
1292 | | /* Sync rIP to post decode value. */ |
1293 | 603k | _regs.r(ip) = state.ip; |
1294 | | |
1295 | 603k | if ( ops->validate ) |
1296 | 0 | { |
1297 | 0 | #ifndef NDEBUG |
1298 | 0 | state.caller = __builtin_return_address(0); |
1299 | 0 | #endif |
1300 | 0 | rc = ops->validate(&state, ctxt); |
1301 | 0 | #ifndef NDEBUG |
1302 | 0 | state.caller = NULL; |
1303 | 0 | #endif |
1304 | 0 | if ( rc == X86EMUL_DONE ) |
1305 | 0 | goto complete_insn; |
1306 | 0 | if ( rc != X86EMUL_OKAY ) |
1307 | 0 | return rc; |
1308 | 0 | } |
1309 | | |
1310 | 603k | b = ctxt->opcode; |
1311 | 603k | d = state.desc; |
1312 | 8.20M | #define state (&state) |
1313 | 603k | elem_bytes = 2 << (!state->fp16 + evex.w); |
1314 | | |
1315 | 603k | generate_exception_if(state->not_64bit && mode_64bit(), X86_EXC_UD); |
1316 | | |
1317 | 603k | if ( ea.type == OP_REG ) |
1318 | 131k | ea.reg = _decode_gpr(&_regs, modrm_rm, (d & ByteOp) && !rex_prefix && !vex.opcx); |
1319 | | |
1320 | 603k | memset(mmvalp, 0xaa /* arbitrary */, sizeof(*mmvalp)); |
1321 | | |
1322 | | /* Decode and fetch the source operand: register, memory or immediate. */ |
1323 | 603k | switch ( d & SrcMask ) |
1324 | 603k | { |
1325 | 342k | case SrcNone: /* case SrcImplicit: */ |
1326 | 342k | src.type = OP_NONE; |
1327 | 342k | break; |
1328 | 69.7k | case SrcReg: |
1329 | 69.7k | src.type = OP_REG; |
1330 | 69.7k | if ( d & ByteOp ) |
1331 | 45.2k | { |
1332 | 45.2k | src.reg = _decode_gpr(&_regs, modrm_reg, !rex_prefix && !vex.opcx); |
1333 | 45.2k | src.val = *(uint8_t *)src.reg; |
1334 | 45.2k | src.bytes = 1; |
1335 | 45.2k | } |
1336 | 24.4k | else |
1337 | 24.4k | { |
1338 | 24.4k | src.reg = decode_gpr(&_regs, modrm_reg); |
1339 | 24.4k | switch ( (src.bytes = op_bytes) ) |
1340 | 24.4k | { |
1341 | 10.4k | case 2: src.val = *(uint16_t *)src.reg; break; |
1342 | 9.39k | case 4: src.val = *(uint32_t *)src.reg; break; |
1343 | 3.11k | case 8: src.val = *(uint64_t *)src.reg; break; |
1344 | 24.4k | } |
1345 | 24.4k | } |
1346 | 69.7k | break; |
1347 | 69.7k | case SrcMem16: |
1348 | 18.8k | ea.bytes = 2; |
1349 | 18.8k | goto srcmem_common; |
1350 | 110k | case SrcMem: |
1351 | 110k | if ( state->simd_size != simd_none ) |
1352 | 71.1k | break; |
1353 | 39.1k | ea.bytes = (d & ByteOp) ? 1 : op_bytes; |
1354 | 58.0k | srcmem_common: |
1355 | 58.0k | src = ea; |
1356 | 58.0k | if ( src.type == OP_REG ) |
1357 | 27.3k | { |
1358 | 27.3k | switch ( src.bytes ) |
1359 | 27.3k | { |
1360 | 1.82k | case 1: src.val = *(uint8_t *)src.reg; break; |
1361 | 17.3k | case 2: src.val = *(uint16_t *)src.reg; break; |
1362 | 4.93k | case 4: src.val = *(uint32_t *)src.reg; break; |
1363 | 3.27k | case 8: src.val = *(uint64_t *)src.reg; break; |
1364 | 27.3k | } |
1365 | 27.3k | } |
1366 | 30.6k | else if ( (rc = read_ulong(src.mem.seg, src.mem.off, |
1367 | 30.6k | &src.val, src.bytes, ctxt, ops)) ) |
1368 | 237 | goto done; |
1369 | 57.7k | break; |
1370 | 57.7k | case SrcImm: |
1371 | 34.3k | if ( !(d & ByteOp) ) |
1372 | 17.7k | src.bytes = op_bytes != 8 ? op_bytes : 4; |
1373 | 16.6k | else |
1374 | 16.6k | { |
1375 | 43.0k | case SrcImmByte: |
1376 | 43.0k | src.bytes = 1; |
1377 | 43.0k | } |
1378 | 60.7k | src.type = OP_IMM; |
1379 | 60.7k | src.val = imm1; |
1380 | 60.7k | break; |
1381 | 1.42k | case SrcImm16: |
1382 | 1.42k | src.type = OP_IMM; |
1383 | 1.42k | src.bytes = 2; |
1384 | 1.42k | src.val = imm1; |
1385 | 1.42k | break; |
1386 | 603k | } |
1387 | | |
1388 | 603k | #ifndef X86EMUL_NO_SIMD |
1389 | | /* With a memory operand, fetch the mask register in use (if any). */ |
1390 | 603k | if ( ea.type == OP_MEM && evex.opmsk && |
1391 | 603k | x86emul_get_fpu(fpu_type = X86EMUL_FPU_opmask, |
1392 | 607 | ctxt, ops) == X86EMUL_OKAY ) |
1393 | 0 | { |
1394 | 0 | uint8_t *stb = get_stub(stub); |
1395 | | |
1396 | | /* KMOV{W,Q} %k<n>, (%rax) */ |
1397 | 0 | stb[0] = 0xc4; |
1398 | 0 | stb[1] = 0xe1; |
1399 | 0 | stb[2] = cpu_has_avx512bw ? 0xf8 : 0x78; |
1400 | 0 | stb[3] = 0x91; |
1401 | 0 | stb[4] = evex.opmsk << 3; |
1402 | 0 | insn_bytes = 5; |
1403 | 0 | stb[5] = 0xc3; |
1404 | |
|
1405 | 0 | invoke_stub("", "", "+m" (op_mask) : "a" (&op_mask)); |
1406 | |
|
1407 | 0 | insn_bytes = 0; |
1408 | 0 | put_stub(stub); |
1409 | |
|
1410 | 0 | fault_suppression = true; |
1411 | 0 | } |
1412 | | |
1413 | 603k | if ( fpu_type == X86EMUL_FPU_opmask ) |
1414 | 607 | { |
1415 | | /* Squash (side) effects of the x86emul_get_fpu() above. */ |
1416 | 607 | x86_emul_reset_event(ctxt); |
1417 | 607 | put_fpu(X86EMUL_FPU_opmask, false, state, ctxt, ops); |
1418 | 607 | fpu_type = X86EMUL_FPU_none; |
1419 | 607 | } |
1420 | 603k | #endif /* !X86EMUL_NO_SIMD */ |
1421 | | |
1422 | | /* Decode (but don't fetch) the destination operand: register or memory. */ |
1423 | 603k | switch ( d & DstMask ) |
1424 | 603k | { |
1425 | 420k | case DstNone: /* case DstImplicit: */ |
1426 | | /* |
1427 | | * The only implicit-operands instructions allowed a LOCK prefix are |
1428 | | * CMPXCHG{8,16}B (MOV CRn is being handled elsewhere). |
1429 | | */ |
1430 | 420k | generate_exception_if(lock_prefix && |
1431 | 420k | (vex.opcx || ext != ext_0f || b != 0xc7 || |
1432 | 420k | (modrm_reg & 7) != 1 || ea.type != OP_MEM), |
1433 | 420k | X86_EXC_UD); |
1434 | 420k | dst.type = OP_NONE; |
1435 | 420k | break; |
1436 | | |
1437 | 97.7k | case DstReg: |
1438 | 97.7k | generate_exception_if(lock_prefix, X86_EXC_UD); |
1439 | 97.7k | dst.type = OP_REG; |
1440 | 97.7k | if ( d & ByteOp ) |
1441 | 26.7k | { |
1442 | 26.7k | dst.reg = _decode_gpr(&_regs, modrm_reg, !rex_prefix && !vex.opcx); |
1443 | 26.7k | dst.val = *(uint8_t *)dst.reg; |
1444 | 26.7k | dst.bytes = 1; |
1445 | 26.7k | } |
1446 | 70.9k | else |
1447 | 70.9k | { |
1448 | 70.9k | dst.reg = decode_gpr(&_regs, modrm_reg); |
1449 | 70.9k | switch ( (dst.bytes = op_bytes) ) |
1450 | 70.9k | { |
1451 | 26.0k | case 2: dst.val = *(uint16_t *)dst.reg; break; |
1452 | 17.2k | case 4: dst.val = *(uint32_t *)dst.reg; break; |
1453 | 8.85k | case 8: dst.val = *(uint64_t *)dst.reg; break; |
1454 | 70.9k | } |
1455 | 70.9k | } |
1456 | 97.7k | break; |
1457 | 97.7k | case DstBitBase: |
1458 | 3.82k | if ( ea.type == OP_MEM ) |
1459 | 2.19k | { |
1460 | | /* |
1461 | | * Instructions such as bt can reference an arbitrary offset from |
1462 | | * their memory operand, but the instruction doing the actual |
1463 | | * emulation needs the appropriate op_bytes read from memory. |
1464 | | * Adjust both the source register and memory operand to make an |
1465 | | * equivalent instruction. |
1466 | | * |
1467 | | * EA += BitOffset DIV op_bytes*8 |
1468 | | * BitOffset = BitOffset MOD op_bytes*8 |
1469 | | * DIV truncates towards negative infinity. |
1470 | | * MOD always produces a positive result. |
1471 | | */ |
1472 | 2.19k | if ( op_bytes == 2 ) |
1473 | 1.02k | src.val = (int16_t)src.val; |
1474 | 1.16k | else if ( op_bytes == 4 ) |
1475 | 786 | src.val = (int32_t)src.val; |
1476 | 2.19k | if ( (long)src.val < 0 ) |
1477 | 731 | ea.mem.off -= |
1478 | 731 | op_bytes + (((-src.val - 1) >> 3) & ~(op_bytes - 1L)); |
1479 | 1.46k | else |
1480 | 1.46k | ea.mem.off += (src.val >> 3) & ~(op_bytes - 1L); |
1481 | 2.19k | ea.mem.off = truncate_ea(ea.mem.off); |
1482 | 2.19k | } |
1483 | | |
1484 | | /* Bit index always truncated to within range. */ |
1485 | 3.82k | src.val &= (op_bytes << 3) - 1; |
1486 | | |
1487 | 3.82k | d = (d & ~DstMask) | DstMem; |
1488 | | /* Becomes a normal DstMem operation from here on. */ |
1489 | 3.82k | fallthrough; |
1490 | 84.5k | case DstMem: |
1491 | 84.5k | generate_exception_if(ea.type == OP_MEM && evex.z, X86_EXC_UD); |
1492 | 84.5k | if ( state->simd_size != simd_none ) |
1493 | 10.1k | { |
1494 | 10.1k | generate_exception_if(lock_prefix, X86_EXC_UD); |
1495 | 10.1k | break; |
1496 | 10.1k | } |
1497 | 74.3k | ea.bytes = (d & ByteOp) ? 1 : op_bytes; |
1498 | 74.3k | dst = ea; |
1499 | 74.3k | if ( dst.type == OP_REG ) |
1500 | 14.6k | { |
1501 | 14.6k | generate_exception_if(lock_prefix, X86_EXC_UD); |
1502 | 14.6k | switch ( dst.bytes ) |
1503 | 14.6k | { |
1504 | 4.76k | case 1: dst.val = *(uint8_t *)dst.reg; break; |
1505 | 3.67k | case 2: dst.val = *(uint16_t *)dst.reg; break; |
1506 | 4.03k | case 4: dst.val = *(uint32_t *)dst.reg; break; |
1507 | 2.20k | case 8: dst.val = *(uint64_t *)dst.reg; break; |
1508 | 14.6k | } |
1509 | 14.6k | } |
1510 | 59.6k | else if ( d & Mov ) /* optimisation - avoid slow emulated read */ |
1511 | 7.43k | { |
1512 | | /* Lock prefix is allowed only on RMW instructions. */ |
1513 | 7.43k | generate_exception_if(lock_prefix, X86_EXC_UD); |
1514 | 7.43k | fail_if(!ops->write); |
1515 | 7.43k | } |
1516 | 52.2k | else if ( !ops->rmw ) |
1517 | 52.2k | { |
1518 | 52.2k | fail_if(lock_prefix ? !ops->cmpxchg : !ops->write); |
1519 | 52.1k | if ( (rc = read_ulong(dst.mem.seg, dst.mem.off, |
1520 | 52.1k | &dst.val, dst.bytes, ctxt, ops)) ) |
1521 | 514 | goto done; |
1522 | 51.6k | dst.orig_val = dst.val; |
1523 | 51.6k | } |
1524 | 73.7k | break; |
1525 | 603k | } |
1526 | | |
1527 | 602k | switch ( ctxt->opcode ) |
1528 | 602k | { |
1529 | 0 | enum x86_segment seg; |
1530 | 0 | struct segment_register cs, sreg; |
1531 | 0 | struct cpuid_leaf leaf; |
1532 | 0 | uint64_t msr_val; |
1533 | 0 | unsigned int i, n; |
1534 | 0 | unsigned long dummy; |
1535 | | |
1536 | 40.6k | case 0x00: case 0x01: add: /* add reg,mem */ |
1537 | 40.6k | if ( ops->rmw && dst.type == OP_MEM ) |
1538 | 0 | state->rmw = rmw_add; |
1539 | 40.6k | else |
1540 | 40.6k | { |
1541 | 45.3k | case 0x02 ... 0x05: /* add */ |
1542 | 45.3k | emulate_2op_SrcV("add", src, dst, _regs.eflags); |
1543 | 45.3k | } |
1544 | 45.3k | break; |
1545 | | |
1546 | 45.3k | case 0x08: case 0x09: or: /* or reg,mem */ |
1547 | 1.26k | if ( ops->rmw && dst.type == OP_MEM ) |
1548 | 0 | state->rmw = rmw_or; |
1549 | 1.26k | else |
1550 | 1.26k | { |
1551 | 4.45k | case 0x0a ... 0x0d: /* or */ |
1552 | 4.45k | emulate_2op_SrcV("or", src, dst, _regs.eflags); |
1553 | 4.45k | } |
1554 | 4.45k | break; |
1555 | | |
1556 | 4.45k | case 0x10: case 0x11: adc: /* adc reg,mem */ |
1557 | 1.12k | if ( ops->rmw && dst.type == OP_MEM ) |
1558 | 0 | state->rmw = rmw_adc; |
1559 | 1.12k | else |
1560 | 1.12k | { |
1561 | 3.71k | case 0x12 ... 0x15: /* adc */ |
1562 | 3.71k | emulate_2op_SrcV("adc", src, dst, _regs.eflags); |
1563 | 3.71k | } |
1564 | 3.71k | break; |
1565 | | |
1566 | 3.71k | case 0x18: case 0x19: sbb: /* sbb reg,mem */ |
1567 | 1.06k | if ( ops->rmw && dst.type == OP_MEM ) |
1568 | 0 | state->rmw = rmw_sbb; |
1569 | 1.06k | else |
1570 | 1.06k | { |
1571 | 4.01k | case 0x1a ... 0x1d: /* sbb */ |
1572 | 4.01k | emulate_2op_SrcV("sbb", src, dst, _regs.eflags); |
1573 | 4.01k | } |
1574 | 4.01k | break; |
1575 | | |
1576 | 4.01k | case 0x20: case 0x21: and: /* and reg,mem */ |
1577 | 1.43k | if ( ops->rmw && dst.type == OP_MEM ) |
1578 | 0 | state->rmw = rmw_and; |
1579 | 1.43k | else |
1580 | 1.43k | { |
1581 | 5.70k | case 0x22 ... 0x25: /* and */ |
1582 | 5.70k | emulate_2op_SrcV("and", src, dst, _regs.eflags); |
1583 | 5.70k | } |
1584 | 5.70k | break; |
1585 | | |
1586 | 5.70k | case 0x28: case 0x29: sub: /* sub reg,mem */ |
1587 | 1.57k | if ( ops->rmw && dst.type == OP_MEM ) |
1588 | 0 | state->rmw = rmw_sub; |
1589 | 1.57k | else |
1590 | 1.57k | { |
1591 | 5.10k | case 0x2a ... 0x2d: /* sub */ |
1592 | 5.10k | emulate_2op_SrcV("sub", src, dst, _regs.eflags); |
1593 | 5.10k | } |
1594 | 5.10k | break; |
1595 | | |
1596 | 5.10k | case 0x30: case 0x31: xor: /* xor reg,mem */ |
1597 | 1.66k | if ( ops->rmw && dst.type == OP_MEM ) |
1598 | 0 | state->rmw = rmw_xor; |
1599 | 1.66k | else |
1600 | 1.66k | { |
1601 | 8.68k | case 0x32 ... 0x35: /* xor */ |
1602 | 8.68k | emulate_2op_SrcV("xor", src, dst, _regs.eflags); |
1603 | 8.68k | } |
1604 | 8.68k | break; |
1605 | | |
1606 | 8.68k | case 0x38: case 0x39: cmp: /* cmp reg,mem */ |
1607 | 2.23k | emulate_2op_SrcV("cmp", dst, src, _regs.eflags); |
1608 | 2.23k | dst.type = OP_NONE; |
1609 | 2.23k | break; |
1610 | | |
1611 | 5.30k | case 0x3a ... 0x3d: /* cmp */ |
1612 | 5.30k | emulate_2op_SrcV("cmp", src, dst, _regs.eflags); |
1613 | 5.30k | dst.type = OP_NONE; |
1614 | 5.30k | break; |
1615 | | |
1616 | 1.16k | case 0x06: /* push %%es */ |
1617 | 1.91k | case 0x0e: /* push %%cs */ |
1618 | 4.95k | case 0x16: /* push %%ss */ |
1619 | 5.37k | case 0x1e: /* push %%ds */ |
1620 | 5.57k | case X86EMUL_OPC(0x0f, 0xa0): /* push %%fs */ |
1621 | 5.76k | case X86EMUL_OPC(0x0f, 0xa8): /* push %%gs */ |
1622 | 5.76k | fail_if(ops->read_segment == NULL); |
1623 | 5.76k | if ( (rc = ops->read_segment((b >> 3) & 7, &sreg, |
1624 | 5.76k | ctxt)) != X86EMUL_OKAY ) |
1625 | 0 | goto done; |
1626 | 5.76k | src.val = sreg.sel; |
1627 | 5.76k | goto push; |
1628 | | |
1629 | 539 | case 0x07: /* pop %%es */ |
1630 | 924 | case 0x17: /* pop %%ss */ |
1631 | 1.24k | case 0x1f: /* pop %%ds */ |
1632 | 1.32k | case X86EMUL_OPC(0x0f, 0xa1): /* pop %%fs */ |
1633 | 1.74k | case X86EMUL_OPC(0x0f, 0xa9): /* pop %%gs */ |
1634 | 1.74k | fail_if(ops->write_segment == NULL); |
1635 | | /* 64-bit mode: POP defaults to a 64-bit operand. */ |
1636 | 1.73k | if ( mode_64bit() && (op_bytes == 4) ) |
1637 | 257 | op_bytes = 8; |
1638 | 1.73k | seg = (b >> 3) & 7; |
1639 | 1.73k | if ( (rc = read_ulong(x86_seg_ss, sp_post_inc(op_bytes), &dst.val, |
1640 | 1.73k | op_bytes, ctxt, ops)) != X86EMUL_OKAY || |
1641 | 1.73k | (rc = load_seg(seg, dst.val, 0, NULL, ctxt, ops)) != X86EMUL_OKAY ) |
1642 | 88 | goto done; |
1643 | 1.64k | if ( seg == x86_seg_ss ) |
1644 | 366 | ctxt->retire.mov_ss = true; |
1645 | 1.64k | break; |
1646 | | |
1647 | 5.95k | case 0x27: /* daa */ |
1648 | 19.7k | case 0x2f: /* das */ { |
1649 | 19.7k | uint8_t al = _regs.al; |
1650 | 19.7k | unsigned int eflags = _regs.eflags; |
1651 | | |
1652 | 19.7k | _regs.eflags &= ~(X86_EFLAGS_CF | X86_EFLAGS_AF | X86_EFLAGS_SF | |
1653 | 19.7k | X86_EFLAGS_ZF | X86_EFLAGS_PF); |
1654 | 19.7k | if ( ((al & 0x0f) > 9) || (eflags & X86_EFLAGS_AF) ) |
1655 | 8.61k | { |
1656 | 8.61k | _regs.eflags |= X86_EFLAGS_AF; |
1657 | 8.61k | if ( b == 0x2f && (al < 6 || (eflags & X86_EFLAGS_CF)) ) |
1658 | 3.64k | _regs.eflags |= X86_EFLAGS_CF; |
1659 | 8.61k | _regs.al += (b == 0x27) ? 6 : -6; |
1660 | 8.61k | } |
1661 | 19.7k | if ( (al > 0x99) || (eflags & X86_EFLAGS_CF) ) |
1662 | 8.49k | { |
1663 | 8.49k | _regs.al += (b == 0x27) ? 0x60 : -0x60; |
1664 | 8.49k | _regs.eflags |= X86_EFLAGS_CF; |
1665 | 8.49k | } |
1666 | 19.7k | _regs.eflags |= !_regs.al ? X86_EFLAGS_ZF : 0; |
1667 | 19.7k | _regs.eflags |= ((int8_t)_regs.al < 0) ? X86_EFLAGS_SF : 0; |
1668 | 19.7k | _regs.eflags |= even_parity(_regs.al) ? X86_EFLAGS_PF : 0; |
1669 | 19.7k | break; |
1670 | 5.95k | } |
1671 | | |
1672 | 1.07k | case 0x37: /* aaa */ |
1673 | 3.25k | case 0x3f: /* aas */ |
1674 | 3.25k | _regs.eflags &= ~X86_EFLAGS_CF; |
1675 | 3.25k | if ( (_regs.al > 9) || (_regs.eflags & X86_EFLAGS_AF) ) |
1676 | 2.02k | { |
1677 | 2.02k | _regs.al += (b == 0x37) ? 6 : -6; |
1678 | 2.02k | _regs.ah += (b == 0x37) ? 1 : -1; |
1679 | 2.02k | _regs.eflags |= X86_EFLAGS_CF | X86_EFLAGS_AF; |
1680 | 2.02k | } |
1681 | 3.25k | _regs.al &= 0x0f; |
1682 | 3.25k | break; |
1683 | | |
1684 | 107k | case 0x40 ... 0x4f: /* inc/dec reg */ |
1685 | 107k | dst.type = OP_REG; |
1686 | 107k | dst.reg = decode_gpr(&_regs, b & 7); |
1687 | 107k | dst.bytes = op_bytes; |
1688 | 107k | dst.val = *dst.reg; |
1689 | 107k | if ( b & 8 ) |
1690 | 107k | emulate_1op("dec", dst, _regs.eflags); |
1691 | 32.7k | else |
1692 | 107k | emulate_1op("inc", dst, _regs.eflags); |
1693 | 107k | break; |
1694 | | |
1695 | 107k | case 0x50 ... 0x57: /* push reg */ |
1696 | 25.6k | src.val = *decode_gpr(&_regs, (b & 7) | ((rex_prefix & 1) << 3)); |
1697 | 25.6k | goto push; |
1698 | | |
1699 | 5.26k | case 0x58 ... 0x5f: /* pop reg */ |
1700 | 5.26k | dst.type = OP_REG; |
1701 | 5.26k | dst.reg = decode_gpr(&_regs, (b & 7) | ((rex_prefix & 1) << 3)); |
1702 | 5.26k | dst.bytes = op_bytes; |
1703 | 5.26k | if ( mode_64bit() && (dst.bytes == 4) ) |
1704 | 706 | dst.bytes = 8; |
1705 | 5.26k | if ( (rc = read_ulong(x86_seg_ss, sp_post_inc(dst.bytes), |
1706 | 5.26k | &dst.val, dst.bytes, ctxt, ops)) != 0 ) |
1707 | 106 | goto done; |
1708 | 5.16k | break; |
1709 | | |
1710 | 5.16k | case 0x60: /* pusha */ |
1711 | 434 | fail_if(!ops->write); |
1712 | 433 | ea.val = _regs.esp; |
1713 | 3.72k | for ( i = 0; i < 8; i++ ) |
1714 | 3.32k | { |
1715 | 3.32k | void *reg = decode_gpr(&_regs, i); |
1716 | | |
1717 | 3.32k | if ( (rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes), |
1718 | 3.32k | reg != &_regs.esp ? reg : &ea.val, |
1719 | 3.32k | op_bytes, ctxt)) != 0 ) |
1720 | 30 | goto done; |
1721 | 3.32k | } |
1722 | 403 | break; |
1723 | | |
1724 | 528 | case 0x61: /* popa */ |
1725 | 4.56k | for ( i = 0; i < 8; i++ ) |
1726 | 4.07k | { |
1727 | 4.07k | void *reg = decode_gpr(&_regs, 7 - i); |
1728 | | |
1729 | 4.07k | if ( (rc = read_ulong(x86_seg_ss, sp_post_inc(op_bytes), |
1730 | 4.07k | &dst.val, op_bytes, ctxt, ops)) != 0 ) |
1731 | 36 | goto done; |
1732 | 4.03k | if ( reg == &_regs.r(sp) ) |
1733 | 508 | continue; |
1734 | 3.52k | if ( op_bytes == 2 ) |
1735 | 3.33k | *(uint16_t *)reg = dst.val; |
1736 | 195 | else |
1737 | 195 | *(unsigned long *)reg = dst.val; |
1738 | 3.52k | } |
1739 | 492 | break; |
1740 | | |
1741 | 837 | case 0x62: /* bound */ { |
1742 | 837 | int lb, ub, idx; |
1743 | | |
1744 | 837 | generate_exception_if(src.type != OP_MEM, X86_EXC_UD); |
1745 | 835 | if ( (rc = read_ulong(src.mem.seg, truncate_ea(src.mem.off + op_bytes), |
1746 | 835 | &ea.val, op_bytes, ctxt, ops)) ) |
1747 | 3 | goto done; |
1748 | 832 | ub = (op_bytes == 2) ? (int16_t)ea.val : (int32_t)ea.val; |
1749 | 832 | lb = (op_bytes == 2) ? (int16_t)src.val : (int32_t)src.val; |
1750 | 832 | idx = (op_bytes == 2) ? (int16_t)dst.val : (int32_t)dst.val; |
1751 | 832 | generate_exception_if((idx < lb) || (idx > ub), X86_EXC_BR); |
1752 | 762 | dst.type = OP_NONE; |
1753 | 762 | break; |
1754 | 832 | } |
1755 | | |
1756 | 1.41k | case 0x63: /* movsxd (x86/64) / arpl (x86/32) */ |
1757 | 1.41k | if ( mode_64bit() ) |
1758 | 634 | { |
1759 | | /* movsxd */ |
1760 | 634 | if ( ea.type == OP_REG ) |
1761 | 263 | src.val = *ea.reg; |
1762 | 371 | else if ( (rc = read_ulong(ea.mem.seg, ea.mem.off, &src.val, |
1763 | 371 | (op_bytes == 2 && !amd_like(ctxt) |
1764 | 371 | ? 2 : 4), |
1765 | 371 | ctxt, ops)) ) |
1766 | 16 | goto done; |
1767 | 618 | dst.val = (int32_t)src.val; |
1768 | 618 | } |
1769 | 780 | else |
1770 | 780 | { |
1771 | | /* arpl */ |
1772 | 780 | unsigned int src_rpl = dst.val & 3; |
1773 | | |
1774 | 780 | generate_exception_if(!in_protmode(ctxt, ops), X86_EXC_UD); |
1775 | | |
1776 | 778 | dst = ea; |
1777 | 778 | dst.bytes = 2; |
1778 | 778 | if ( dst.type == OP_REG ) |
1779 | 462 | dst.val = *dst.reg; |
1780 | 316 | else if ( (rc = read_ulong(dst.mem.seg, dst.mem.off, |
1781 | 316 | &dst.val, 2, ctxt, ops)) ) |
1782 | 16 | goto done; |
1783 | 762 | if ( src_rpl > (dst.val & 3) ) |
1784 | 232 | { |
1785 | 232 | _regs.eflags |= X86_EFLAGS_ZF; |
1786 | 232 | dst.val = (dst.val & ~3) | src_rpl; |
1787 | 232 | } |
1788 | 530 | else |
1789 | 530 | { |
1790 | 530 | _regs.eflags &= ~X86_EFLAGS_ZF; |
1791 | 530 | dst.type = OP_NONE; |
1792 | 530 | } |
1793 | 762 | } |
1794 | 1.38k | break; |
1795 | | |
1796 | 1.38k | case 0x68: /* push imm{16,32,64} */ |
1797 | 970 | case 0x6a: /* push imm8 */ |
1798 | 34.9k | push: |
1799 | 34.9k | ASSERT(d & Mov); /* writeback needed */ |
1800 | 34.9k | dst.type = OP_MEM; |
1801 | 34.9k | dst.bytes = mode_64bit() && (op_bytes == 4) ? 8 : op_bytes; |
1802 | 34.9k | dst.val = src.val; |
1803 | 34.9k | dst.mem.seg = x86_seg_ss; |
1804 | 34.9k | dst.mem.off = sp_pre_dec(dst.bytes); |
1805 | 34.9k | break; |
1806 | | |
1807 | 217 | case 0x69: /* imul imm16/32 */ |
1808 | 1.45k | case 0x6b: /* imul imm8 */ |
1809 | 1.45k | if ( ea.type == OP_REG ) |
1810 | 745 | dst.val = *ea.reg; |
1811 | 711 | else if ( (rc = read_ulong(ea.mem.seg, ea.mem.off, |
1812 | 711 | &dst.val, op_bytes, ctxt, ops)) ) |
1813 | 5 | goto done; |
1814 | 1.45k | goto imul; |
1815 | | |
1816 | 6.25k | case 0x6c ... 0x6d: /* ins %dx,%es:%edi */ { |
1817 | 6.25k | unsigned long nr_reps; |
1818 | 6.25k | unsigned int port = _regs.dx; |
1819 | | |
1820 | 6.25k | dst.bytes = !(b & 1) ? 1 : (op_bytes == 8) ? 4 : op_bytes; |
1821 | 6.25k | if ( (rc = ioport_access_check(port, dst.bytes, ctxt, ops)) != 0 ) |
1822 | 22 | goto done; |
1823 | 6.23k | nr_reps = get_rep_prefix(false, false /* don't extend RSI/RDI */); |
1824 | 5.44k | dst.mem.off = truncate_ea_and_reps(_regs.r(di), nr_reps, dst.bytes); |
1825 | 5.44k | dst.mem.seg = x86_seg_es; |
1826 | | /* Try the presumably most efficient approach first. */ |
1827 | 5.44k | if ( !ops->rep_ins ) |
1828 | 2.26k | nr_reps = 1; |
1829 | 5.44k | rc = X86EMUL_UNHANDLEABLE; |
1830 | 5.44k | if ( nr_reps == 1 && ops->read_io && ops->write ) |
1831 | 3.77k | { |
1832 | 3.77k | rc = ops->read_io(port, dst.bytes, &dst.val, ctxt); |
1833 | 3.77k | if ( rc != X86EMUL_UNHANDLEABLE ) |
1834 | 3.57k | nr_reps = 0; |
1835 | 3.77k | } |
1836 | 5.44k | if ( (nr_reps > 1 || rc == X86EMUL_UNHANDLEABLE) && ops->rep_ins ) |
1837 | 1.65k | rc = ops->rep_ins(port, dst.mem.seg, dst.mem.off, dst.bytes, |
1838 | 1.65k | &nr_reps, ctxt); |
1839 | 5.44k | if ( nr_reps >= 1 && rc == X86EMUL_UNHANDLEABLE ) |
1840 | 213 | { |
1841 | 213 | fail_if(!ops->read_io || !ops->write); |
1842 | 203 | if ( (rc = ops->read_io(port, dst.bytes, &dst.val, ctxt)) != 0 ) |
1843 | 1 | goto done; |
1844 | 202 | nr_reps = 0; |
1845 | 202 | } |
1846 | 5.43k | if ( !nr_reps && rc == X86EMUL_OKAY ) |
1847 | 3.59k | { |
1848 | 3.59k | dst.type = OP_MEM; |
1849 | 3.59k | nr_reps = 1; |
1850 | 3.59k | } |
1851 | 5.43k | register_address_adjust(_regs.r(di), nr_reps * dst.bytes); |
1852 | 5.43k | put_rep_prefix(nr_reps); |
1853 | 5.19k | if ( rc != X86EMUL_OKAY ) |
1854 | 255 | goto done; |
1855 | 4.93k | break; |
1856 | 5.19k | } |
1857 | | |
1858 | 7.11k | case 0x6e ... 0x6f: /* outs %esi,%dx */ { |
1859 | 7.11k | unsigned long nr_reps; |
1860 | 7.11k | unsigned int port = _regs.dx; |
1861 | | |
1862 | 7.11k | dst.bytes = !(b & 1) ? 1 : (op_bytes == 8) ? 4 : op_bytes; |
1863 | 7.11k | if ( (rc = ioport_access_check(port, dst.bytes, ctxt, ops)) != 0 ) |
1864 | 37 | goto done; |
1865 | 7.07k | nr_reps = get_rep_prefix(false, false /* don't extend RSI/RDI */); |
1866 | 6.25k | ea.mem.off = truncate_ea_and_reps(_regs.r(si), nr_reps, dst.bytes); |
1867 | | /* Try the presumably most efficient approach first. */ |
1868 | 6.25k | if ( !ops->rep_outs ) |
1869 | 868 | nr_reps = 1; |
1870 | 6.25k | rc = X86EMUL_UNHANDLEABLE; |
1871 | 6.25k | if ( nr_reps == 1 && ops->write_io ) |
1872 | 2.73k | { |
1873 | 2.73k | rc = read_ulong(ea.mem.seg, ea.mem.off, &dst.val, dst.bytes, |
1874 | 2.73k | ctxt, ops); |
1875 | 2.73k | if ( rc != X86EMUL_UNHANDLEABLE ) |
1876 | 1.78k | nr_reps = 0; |
1877 | 2.73k | } |
1878 | 6.25k | if ( (nr_reps > 1 || rc == X86EMUL_UNHANDLEABLE) && ops->rep_outs ) |
1879 | 4.14k | rc = ops->rep_outs(ea.mem.seg, ea.mem.off, port, dst.bytes, |
1880 | 4.14k | &nr_reps, ctxt); |
1881 | 6.25k | if ( nr_reps >= 1 && rc == X86EMUL_UNHANDLEABLE ) |
1882 | 326 | { |
1883 | 326 | if ( (rc = read_ulong(ea.mem.seg, ea.mem.off, &dst.val, |
1884 | 326 | dst.bytes, ctxt, ops)) != X86EMUL_OKAY ) |
1885 | 27 | goto done; |
1886 | 299 | fail_if(ops->write_io == NULL); |
1887 | 296 | nr_reps = 0; |
1888 | 296 | } |
1889 | 6.22k | if ( !nr_reps && rc == X86EMUL_OKAY ) |
1890 | 1.94k | { |
1891 | 1.94k | if ( (rc = ops->write_io(port, dst.bytes, dst.val, ctxt)) != 0 ) |
1892 | 22 | goto done; |
1893 | 1.91k | nr_reps = 1; |
1894 | 1.91k | } |
1895 | 6.20k | register_address_adjust(_regs.r(si), nr_reps * dst.bytes); |
1896 | 6.20k | put_rep_prefix(nr_reps); |
1897 | 5.96k | if ( rc != X86EMUL_OKAY ) |
1898 | 216 | goto done; |
1899 | 5.74k | break; |
1900 | 5.96k | } |
1901 | | |
1902 | 9.42k | case 0x70 ... 0x7f: /* jcc (short) */ |
1903 | 9.42k | if ( test_cc(b, _regs.eflags) ) |
1904 | 3.76k | jmp_rel((int32_t)src.val); |
1905 | 9.35k | adjust_bnd(ctxt, ops, vex.pfx); |
1906 | 9.35k | break; |
1907 | | |
1908 | 2.71k | case 0x80: case 0x81: case 0x82: case 0x83: /* Grp1 */ |
1909 | 2.71k | switch ( modrm_reg & 7 ) |
1910 | 2.71k | { |
1911 | 845 | case 0: goto add; |
1912 | 289 | case 1: goto or; |
1913 | 210 | case 2: goto adc; |
1914 | 278 | case 3: goto sbb; |
1915 | 87 | case 4: goto and; |
1916 | 243 | case 5: goto sub; |
1917 | 213 | case 6: goto xor; |
1918 | 548 | case 7: |
1919 | 548 | dst.val = imm1; |
1920 | 548 | goto cmp; |
1921 | 2.71k | } |
1922 | 0 | break; |
1923 | | |
1924 | 445 | case 0xa8 ... 0xa9: /* test imm,%%eax */ |
1925 | 1.86k | case 0x84 ... 0x85: test: /* test */ |
1926 | 1.86k | emulate_2op_SrcV("test", src, dst, _regs.eflags); |
1927 | 1.86k | dst.type = OP_NONE; |
1928 | 1.86k | break; |
1929 | | |
1930 | 3.34k | case 0x86 ... 0x87: xchg: /* xchg */ |
1931 | | /* |
1932 | | * The lock prefix is implied for this insn (and setting it for the |
1933 | | * register operands case here is benign to subsequent code). |
1934 | | */ |
1935 | 3.34k | lock_prefix = 1; |
1936 | 3.34k | if ( ops->rmw && dst.type == OP_MEM ) |
1937 | 0 | { |
1938 | 0 | state->rmw = rmw_xchg; |
1939 | 0 | break; |
1940 | 0 | } |
1941 | | /* Write back the register source. */ |
1942 | 3.34k | switch ( dst.bytes ) |
1943 | 3.34k | { |
1944 | 257 | case 1: *(uint8_t *)src.reg = (uint8_t)dst.val; break; |
1945 | 1.00k | case 2: *(uint16_t *)src.reg = (uint16_t)dst.val; break; |
1946 | 1.44k | case 4: *src.reg = (uint32_t)dst.val; break; /* 64b reg: zero-extend */ |
1947 | 645 | case 8: *src.reg = dst.val; break; |
1948 | 3.34k | } |
1949 | | /* Arrange for write back of the memory destination. */ |
1950 | 3.34k | dst.val = src.val; |
1951 | 3.34k | break; |
1952 | | |
1953 | 223 | case 0xc6: /* Grp11: mov / xabort */ |
1954 | 266 | case 0xc7: /* Grp11: mov / xbegin */ |
1955 | 266 | if ( modrm == 0xf8 && vcpu_has_rtm() ) |
1956 | 0 | { |
1957 | | /* |
1958 | | * xbegin unconditionally aborts, xabort is unconditionally |
1959 | | * a nop. It also does not truncate the destination address to |
1960 | | * 16 bits when 16-bit operand size is in effect. |
1961 | | */ |
1962 | 0 | if ( b & 1 ) |
1963 | 0 | { |
1964 | 0 | op_bytes = 4; |
1965 | 0 | jmp_rel((int32_t)src.val); |
1966 | 0 | _regs.r(ax) = 0; |
1967 | 0 | } |
1968 | 0 | dst.type = OP_NONE; |
1969 | 0 | break; |
1970 | 0 | } |
1971 | 266 | generate_exception_if((modrm_reg & 7) != 0, X86_EXC_UD); |
1972 | 233 | fallthrough; |
1973 | 1.52k | case 0x88 ... 0x8b: /* mov */ |
1974 | 1.90k | case 0xa0 ... 0xa1: /* mov mem.offs,{%al,%ax,%eax,%rax} */ |
1975 | 2.06k | case 0xa2 ... 0xa3: /* mov {%al,%ax,%eax,%rax},mem.offs */ |
1976 | 2.06k | dst.val = src.val; |
1977 | 2.06k | break; |
1978 | | |
1979 | 245 | case 0x8c: /* mov Sreg,r/m */ |
1980 | 245 | seg = modrm_reg & 7; /* REX.R is ignored. */ |
1981 | 245 | generate_exception_if(!is_x86_user_segment(seg), X86_EXC_UD); |
1982 | 1.46k | store_selector: |
1983 | 1.46k | fail_if(ops->read_segment == NULL); |
1984 | 1.46k | if ( (rc = ops->read_segment(seg, &sreg, ctxt)) != 0 ) |
1985 | 0 | goto done; |
1986 | 1.46k | dst.val = sreg.sel; |
1987 | 1.46k | if ( dst.type == OP_MEM ) |
1988 | 1.21k | dst.bytes = 2; |
1989 | 1.46k | break; |
1990 | | |
1991 | 240 | case 0x8d: /* lea */ |
1992 | 240 | generate_exception_if(ea.type != OP_MEM, X86_EXC_UD); |
1993 | 239 | dst.val = ea.mem.off; |
1994 | 239 | break; |
1995 | | |
1996 | 335 | case 0x8e: /* mov r/m,Sreg */ |
1997 | 335 | seg = modrm_reg & 7; /* REX.R is ignored. */ |
1998 | 335 | generate_exception_if(!is_x86_user_segment(seg) || |
1999 | 335 | seg == x86_seg_cs, X86_EXC_UD); |
2000 | 332 | if ( (rc = load_seg(seg, src.val, 0, NULL, ctxt, ops)) != 0 ) |
2001 | 11 | goto done; |
2002 | 321 | if ( seg == x86_seg_ss ) |
2003 | 63 | ctxt->retire.mov_ss = true; |
2004 | 321 | dst.type = OP_NONE; |
2005 | 321 | break; |
2006 | | |
2007 | 877 | case 0x8f: /* pop (sole member of Grp1a) */ |
2008 | 877 | generate_exception_if((modrm_reg & 7) != 0, X86_EXC_UD); |
2009 | | /* 64-bit mode: POP defaults to a 64-bit operand. */ |
2010 | 874 | if ( mode_64bit() && (dst.bytes == 4) ) |
2011 | 215 | dst.bytes = 8; |
2012 | 874 | if ( (rc = read_ulong(x86_seg_ss, sp_post_inc(dst.bytes), |
2013 | 874 | &dst.val, dst.bytes, ctxt, ops)) != 0 ) |
2014 | 22 | goto done; |
2015 | 852 | break; |
2016 | | |
2017 | 852 | case 0x90: /* nop / xchg %%r8,%%rax */ |
2018 | 947 | case X86EMUL_OPC_F3(0, 0x90): /* pause / xchg %%r8,%%rax */ |
2019 | 947 | if ( !(rex_prefix & REX_B) ) |
2020 | 590 | break; /* nop / pause */ |
2021 | | /* fall through */ |
2022 | | |
2023 | 2.75k | case 0x91 ... 0x97: /* xchg reg,%%rax */ |
2024 | 2.75k | dst.type = OP_REG; |
2025 | 2.75k | dst.bytes = op_bytes; |
2026 | 2.75k | dst.reg = decode_gpr(&_regs, (b & 7) | ((rex_prefix & 1) << 3)); |
2027 | 2.75k | dst.val = *dst.reg; |
2028 | 2.75k | goto xchg; |
2029 | | |
2030 | 698 | case 0x98: /* cbw/cwde/cdqe */ |
2031 | 698 | switch ( op_bytes ) |
2032 | 698 | { |
2033 | 187 | case 2: _regs.ax = (int8_t)_regs.ax; break; /* cbw */ |
2034 | 302 | case 4: _regs.r(ax) = (uint32_t)(int16_t)_regs.r(ax); break; /* cwde */ |
2035 | 209 | case 8: _regs.r(ax) = (int32_t)_regs.r(ax); break; /* cdqe */ |
2036 | 698 | } |
2037 | 698 | break; |
2038 | | |
2039 | 834 | case 0x99: /* cwd/cdq/cqo */ |
2040 | 834 | switch ( op_bytes ) |
2041 | 834 | { |
2042 | 295 | case 2: _regs.dx = -((int16_t)_regs.ax < 0); break; |
2043 | 292 | case 4: _regs.r(dx) = (uint32_t)-((int32_t)_regs.eax < 0); break; |
2044 | 0 | #ifdef __x86_64__ |
2045 | 247 | case 8: _regs.rdx = -((int64_t)_regs.rax < 0); break; |
2046 | 834 | #endif |
2047 | 834 | } |
2048 | 834 | break; |
2049 | | |
2050 | 834 | case 0x9a: /* call (far, absolute) */ |
2051 | 217 | ASSERT(!mode_64bit()); |
2052 | 563 | far_call: |
2053 | 563 | fail_if(!ops->read_segment || !ops->write); |
2054 | | |
2055 | 562 | if ( (rc = ops->read_segment(x86_seg_cs, &sreg, ctxt)) || |
2056 | 562 | (rc = load_seg(x86_seg_cs, imm2, 0, &cs, ctxt, ops)) || |
2057 | 562 | (validate_far_branch(&cs, imm1), |
2058 | 492 | src.val = sreg.sel, |
2059 | 492 | rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes), |
2060 | 492 | &src.val, op_bytes, ctxt)) || |
2061 | 562 | (rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes), |
2062 | 472 | &_regs.r(ip), op_bytes, ctxt)) || |
2063 | 562 | (rc = ops->write_segment(x86_seg_cs, &cs, ctxt)) ) |
2064 | 85 | goto done; |
2065 | | |
2066 | 477 | _regs.r(ip) = imm1; |
2067 | 477 | singlestep = _regs.eflags & X86_EFLAGS_TF; |
2068 | 477 | break; |
2069 | | |
2070 | 0 | #ifndef X86EMUL_NO_FPU |
2071 | 5 | case 0x9b: /* wait/fwait */ |
2072 | 62.6k | case 0xd8 ... 0xdf: /* FPU */ |
2073 | 62.6k | state->stub_exn = &stub_exn; |
2074 | 62.6k | rc = x86emul_fpu(state, &_regs, &dst, &src, ctxt, ops, |
2075 | 62.6k | &insn_bytes, &fpu_type, mmvalp); |
2076 | 62.6k | goto dispatch_from_helper; |
2077 | 0 | #endif |
2078 | | |
2079 | 858 | case 0x9c: /* pushf */ |
2080 | 858 | if ( (_regs.eflags & X86_EFLAGS_VM) && |
2081 | 858 | MASK_EXTR(_regs.eflags, X86_EFLAGS_IOPL) != 3 ) |
2082 | 414 | { |
2083 | 414 | cr4 = 0; |
2084 | 414 | if ( op_bytes == 2 && ops->read_cr ) |
2085 | 412 | { |
2086 | 412 | rc = ops->read_cr(4, &cr4, ctxt); |
2087 | 412 | if ( rc != X86EMUL_OKAY ) |
2088 | 0 | goto done; |
2089 | 412 | } |
2090 | 414 | generate_exception_if(!(cr4 & X86_CR4_VME), X86_EXC_GP, 0); |
2091 | 412 | src.val = (_regs.flags & ~X86_EFLAGS_IF) | X86_EFLAGS_IOPL; |
2092 | 412 | if ( _regs.eflags & X86_EFLAGS_VIF ) |
2093 | 194 | src.val |= X86_EFLAGS_IF; |
2094 | 412 | } |
2095 | 444 | else |
2096 | 444 | src.val = _regs.r(flags) & ~(X86_EFLAGS_VM | X86_EFLAGS_RF); |
2097 | 856 | goto push; |
2098 | | |
2099 | 1.81k | case 0x9d: /* popf */ { |
2100 | | /* |
2101 | | * Bits which may not be modified by this instruction. RF is handled |
2102 | | * uniformly during instruction retirement. |
2103 | | */ |
2104 | 1.81k | uint32_t mask = X86_EFLAGS_VIP | X86_EFLAGS_VIF | X86_EFLAGS_VM; |
2105 | | |
2106 | 1.81k | cr4 = 0; |
2107 | 1.81k | if ( !mode_ring0() ) |
2108 | 1.54k | { |
2109 | 1.54k | if ( _regs.eflags & X86_EFLAGS_VM ) |
2110 | 867 | { |
2111 | 867 | if ( op_bytes == 2 && ops->read_cr ) |
2112 | 477 | { |
2113 | 477 | rc = ops->read_cr(4, &cr4, ctxt); |
2114 | 477 | if ( rc != X86EMUL_OKAY ) |
2115 | 0 | goto done; |
2116 | 477 | } |
2117 | | /* All IOPL != 3 POPFs fail, except in vm86 mode. */ |
2118 | 867 | generate_exception_if(!(cr4 & X86_CR4_VME) && |
2119 | 867 | MASK_EXTR(_regs.eflags, X86_EFLAGS_IOPL) != 3, |
2120 | 867 | X86_EXC_GP, 0); |
2121 | 867 | } |
2122 | | /* |
2123 | | * IOPL cannot be modified outside of CPL 0. IF cannot be |
2124 | | * modified if IOPL < CPL. |
2125 | | */ |
2126 | 1.54k | mask |= X86_EFLAGS_IOPL; |
2127 | 1.54k | if ( !mode_iopl() ) |
2128 | 577 | mask |= X86_EFLAGS_IF; |
2129 | 1.54k | } |
2130 | | /* 64-bit mode: POPF defaults to a 64-bit operand. */ |
2131 | 1.81k | if ( mode_64bit() && (op_bytes == 4) ) |
2132 | 112 | op_bytes = 8; |
2133 | 1.81k | if ( (rc = read_ulong(x86_seg_ss, sp_post_inc(op_bytes), |
2134 | 1.81k | &dst.val, op_bytes, ctxt, ops)) != 0 ) |
2135 | 36 | goto done; |
2136 | 1.77k | if ( op_bytes == 2 ) |
2137 | 1.13k | { |
2138 | | /* 16-bit POPF preserves the upper 16 bits of EFLAGS. */ |
2139 | 1.13k | dst.val = (uint16_t)dst.val | (_regs.eflags & 0xffff0000u); |
2140 | | /* VME processing only applies at IOPL != 3. */ |
2141 | 1.13k | if ( (cr4 & X86_CR4_VME) && |
2142 | 1.13k | MASK_EXTR(_regs.eflags, X86_EFLAGS_IOPL) != 3 ) |
2143 | 277 | { |
2144 | 277 | generate_exception_if(dst.val & X86_EFLAGS_TF, X86_EXC_GP, 0); |
2145 | 276 | if ( dst.val & X86_EFLAGS_IF ) |
2146 | 206 | { |
2147 | 206 | generate_exception_if(_regs.eflags & X86_EFLAGS_VIP, |
2148 | 206 | X86_EXC_GP, 0); |
2149 | 205 | dst.val |= X86_EFLAGS_VIF; |
2150 | 205 | } |
2151 | 70 | else |
2152 | 70 | dst.val &= ~X86_EFLAGS_VIF; |
2153 | 275 | mask &= ~X86_EFLAGS_VIF; |
2154 | 275 | } |
2155 | 1.13k | } |
2156 | 1.77k | dst.val &= EFLAGS_MODIFIABLE; |
2157 | 1.77k | _regs.eflags &= mask; |
2158 | 1.77k | _regs.eflags |= (dst.val & ~mask) | X86_EFLAGS_MBS; |
2159 | 1.77k | break; |
2160 | 1.77k | } |
2161 | | |
2162 | 409 | case 0x9e: /* sahf */ |
2163 | 409 | if ( mode_64bit() ) |
2164 | 409 | vcpu_must_have(lahf_lm); |
2165 | 409 | *(uint8_t *)&_regs.eflags = (_regs.ah & EFLAGS_MASK) | X86_EFLAGS_MBS; |
2166 | 409 | break; |
2167 | | |
2168 | 270 | case 0x9f: /* lahf */ |
2169 | 270 | if ( mode_64bit() ) |
2170 | 270 | vcpu_must_have(lahf_lm); |
2171 | 270 | _regs.ah = (_regs.eflags & EFLAGS_MASK) | X86_EFLAGS_MBS; |
2172 | 270 | break; |
2173 | | |
2174 | 5.63k | case 0xa4 ... 0xa5: /* movs */ { |
2175 | 5.63k | unsigned long nr_reps = get_rep_prefix(true, true); |
2176 | | |
2177 | 4.98k | dst.bytes = (d & ByteOp) ? 1 : op_bytes; |
2178 | 4.98k | dst.mem.seg = x86_seg_es; |
2179 | 4.98k | dst.mem.off = truncate_ea_and_reps(_regs.r(di), nr_reps, dst.bytes); |
2180 | 4.98k | src.mem.off = truncate_ea_and_reps(_regs.r(si), nr_reps, dst.bytes); |
2181 | 4.98k | if ( (nr_reps == 1) || !ops->rep_movs || |
2182 | 4.98k | ((rc = ops->rep_movs(ea.mem.seg, src.mem.off, |
2183 | 1.30k | dst.mem.seg, dst.mem.off, dst.bytes, |
2184 | 1.30k | &nr_reps, ctxt)) == X86EMUL_UNHANDLEABLE) ) |
2185 | 4.04k | { |
2186 | 4.04k | if ( (rc = read_ulong(ea.mem.seg, src.mem.off, |
2187 | 4.04k | &dst.val, dst.bytes, ctxt, ops)) != 0 ) |
2188 | 370 | goto done; |
2189 | 3.67k | dst.type = OP_MEM; |
2190 | 3.67k | nr_reps = 1; |
2191 | 3.67k | } |
2192 | 4.61k | register_address_adjust(_regs.r(si), nr_reps * dst.bytes); |
2193 | 4.61k | register_address_adjust(_regs.r(di), nr_reps * dst.bytes); |
2194 | 4.61k | put_rep_prefix(nr_reps); |
2195 | 4.37k | if ( rc != X86EMUL_OKAY ) |
2196 | 0 | goto done; |
2197 | 4.37k | break; |
2198 | 4.37k | } |
2199 | | |
2200 | 5.11k | case 0xa6 ... 0xa7: /* cmps */ { |
2201 | 5.11k | unsigned long next_eip = _regs.r(ip); |
2202 | | |
2203 | 5.11k | get_rep_prefix(false, false /* don't extend RSI/RDI */); |
2204 | 4.43k | src.bytes = dst.bytes = (d & ByteOp) ? 1 : op_bytes; |
2205 | 4.43k | if ( (rc = read_ulong(ea.mem.seg, truncate_ea(_regs.r(si)), |
2206 | 4.43k | &dst.val, dst.bytes, ctxt, ops)) || |
2207 | 4.43k | (rc = read_ulong(x86_seg_es, truncate_ea(_regs.r(di)), |
2208 | 4.29k | &src.val, src.bytes, ctxt, ops)) ) |
2209 | 164 | goto done; |
2210 | 4.26k | register_address_adjust(_regs.r(si), dst.bytes); |
2211 | 4.26k | register_address_adjust(_regs.r(di), src.bytes); |
2212 | 4.26k | put_rep_prefix(1); |
2213 | | /* cmp: dst - src ==> src=*%%edi,dst=*%%esi ==> *%%esi - *%%edi */ |
2214 | 4.26k | emulate_2op_SrcV("cmp", src, dst, _regs.eflags); |
2215 | 4.26k | if ( (repe_prefix() && !(_regs.eflags & X86_EFLAGS_ZF)) || |
2216 | 4.26k | (repne_prefix() && (_regs.eflags & X86_EFLAGS_ZF)) ) |
2217 | 1.25k | _regs.r(ip) = next_eip; |
2218 | 4.26k | break; |
2219 | 4.26k | } |
2220 | | |
2221 | 4.89k | case 0xaa ... 0xab: /* stos */ { |
2222 | 4.89k | unsigned long nr_reps = get_rep_prefix(false, true); |
2223 | | |
2224 | 0 | dst.bytes = src.bytes; |
2225 | 4.33k | dst.mem.seg = x86_seg_es; |
2226 | 4.33k | dst.mem.off = truncate_ea_and_reps(_regs.r(di), nr_reps, dst.bytes); |
2227 | 4.33k | if ( (nr_reps == 1) || !ops->rep_stos || |
2228 | 4.33k | ((rc = ops->rep_stos(&src.val, |
2229 | 1.13k | dst.mem.seg, dst.mem.off, dst.bytes, |
2230 | 1.13k | &nr_reps, ctxt)) == X86EMUL_UNHANDLEABLE) ) |
2231 | 3.26k | { |
2232 | 3.26k | dst.val = src.val; |
2233 | 3.26k | dst.type = OP_MEM; |
2234 | 3.26k | nr_reps = 1; |
2235 | 3.26k | rc = X86EMUL_OKAY; |
2236 | 3.26k | } |
2237 | 4.33k | register_address_adjust(_regs.r(di), nr_reps * dst.bytes); |
2238 | 4.33k | put_rep_prefix(nr_reps); |
2239 | 4.12k | if ( rc != X86EMUL_OKAY ) |
2240 | 0 | goto done; |
2241 | 4.12k | break; |
2242 | 4.12k | } |
2243 | | |
2244 | 4.12k | case 0xac ... 0xad: /* lods */ |
2245 | 3.23k | get_rep_prefix(false, false /* don't extend RSI/RDI */); |
2246 | 2.48k | if ( (rc = read_ulong(ea.mem.seg, truncate_ea(_regs.r(si)), |
2247 | 2.48k | &dst.val, dst.bytes, ctxt, ops)) != 0 ) |
2248 | 114 | goto done; |
2249 | 2.37k | register_address_adjust(_regs.r(si), dst.bytes); |
2250 | 2.37k | put_rep_prefix(1); |
2251 | 2.37k | break; |
2252 | | |
2253 | 3.87k | case 0xae ... 0xaf: /* scas */ { |
2254 | 3.87k | unsigned long next_eip = _regs.r(ip); |
2255 | | |
2256 | 3.87k | get_rep_prefix(false, false /* don't extend RSI/RDI */); |
2257 | 3.08k | if ( (rc = read_ulong(x86_seg_es, truncate_ea(_regs.r(di)), |
2258 | 3.08k | &dst.val, src.bytes, ctxt, ops)) != 0 ) |
2259 | 99 | goto done; |
2260 | 2.98k | register_address_adjust(_regs.r(di), src.bytes); |
2261 | 2.98k | put_rep_prefix(1); |
2262 | | /* cmp: %%eax - *%%edi ==> src=%%eax,dst=*%%edi ==> src - dst */ |
2263 | 2.98k | dst.bytes = src.bytes; |
2264 | 2.98k | emulate_2op_SrcV("cmp", dst, src, _regs.eflags); |
2265 | 2.98k | if ( (repe_prefix() && !(_regs.eflags & X86_EFLAGS_ZF)) || |
2266 | 2.98k | (repne_prefix() && (_regs.eflags & X86_EFLAGS_ZF)) ) |
2267 | 787 | _regs.r(ip) = next_eip; |
2268 | 2.98k | break; |
2269 | 2.98k | } |
2270 | | |
2271 | 2.56k | case 0xb0 ... 0xb7: /* mov imm8,r8 */ |
2272 | 2.56k | dst.reg = _decode_gpr(&_regs, (b & 7) | ((rex_prefix & 1) << 3), |
2273 | 2.56k | !rex_prefix); |
2274 | 2.56k | dst.val = src.val; |
2275 | 2.56k | break; |
2276 | | |
2277 | 1.98k | case 0xb8 ... 0xbf: /* mov imm{16,32,64},r{16,32,64} */ |
2278 | 1.98k | dst.reg = decode_gpr(&_regs, (b & 7) | ((rex_prefix & 1) << 3)); |
2279 | 1.98k | dst.val = src.val; |
2280 | 1.98k | break; |
2281 | | |
2282 | 5.68k | case 0xc0 ... 0xc1: grp2: /* Grp2 */ |
2283 | 5.68k | generate_exception_if(lock_prefix, X86_EXC_UD); |
2284 | | |
2285 | 5.68k | switch ( modrm_reg & 7 ) |
2286 | 5.68k | { |
2287 | 0 | #define GRP2(name, ext) \ |
2288 | 5.68k | case ext: \ |
2289 | 5.68k | if ( ops->rmw && dst.type == OP_MEM ) \ |
2290 | 5.68k | state->rmw = rmw_##name; \ |
2291 | 5.68k | else \ |
2292 | 5.68k | emulate_2op_SrcB(#name, src, dst, _regs.eflags); \ |
2293 | 5.68k | break |
2294 | | |
2295 | 903 | GRP2(rol, 0); |
2296 | 903 | GRP2(ror, 1); |
2297 | 896 | GRP2(rcl, 2); |
2298 | 1.08k | GRP2(rcr, 3); |
2299 | 1.08k | case 6: /* sal/shl alias */ |
2300 | 719 | GRP2(shl, 4); |
2301 | 805 | GRP2(shr, 5); |
2302 | 5.68k | GRP2(sar, 7); |
2303 | 5.68k | #undef GRP2 |
2304 | 5.68k | } |
2305 | 5.68k | break; |
2306 | | |
2307 | 5.68k | case 0xc2: /* ret imm16 (near) */ |
2308 | 924 | case 0xc3: /* ret (near) */ |
2309 | 924 | op_bytes = (op_bytes == 4 || !amd_like(ctxt)) && mode_64bit() |
2310 | 924 | ? 8 : op_bytes; |
2311 | 924 | if ( (rc = read_ulong(x86_seg_ss, sp_post_inc(op_bytes + src.val), |
2312 | 924 | &dst.val, op_bytes, ctxt, ops)) != 0 || |
2313 | 924 | (rc = ops->insn_fetch(dst.val, NULL, 0, ctxt)) ) |
2314 | 35 | goto done; |
2315 | 889 | _regs.r(ip) = dst.val; |
2316 | 889 | adjust_bnd(ctxt, ops, vex.pfx); |
2317 | 889 | break; |
2318 | | |
2319 | 235 | case 0xc4: /* les */ |
2320 | 382 | case 0xc5: /* lds */ |
2321 | 382 | seg = (b & 1) * 3; /* es = 0, ds = 3 */ |
2322 | 856 | les: |
2323 | 856 | generate_exception_if(src.type != OP_MEM, X86_EXC_UD); |
2324 | 849 | if ( (rc = read_ulong(src.mem.seg, truncate_ea(src.mem.off + src.bytes), |
2325 | 849 | &dst.val, 2, ctxt, ops)) != X86EMUL_OKAY ) |
2326 | 23 | goto done; |
2327 | 826 | ASSERT(is_x86_user_segment(seg)); |
2328 | 826 | if ( (rc = load_seg(seg, dst.val, 0, NULL, ctxt, ops)) != X86EMUL_OKAY ) |
2329 | 9 | goto done; |
2330 | 817 | dst.val = src.val; |
2331 | 817 | break; |
2332 | | |
2333 | 1.11k | case 0xc8: /* enter imm16,imm8 */ |
2334 | 1.11k | dst.type = OP_REG; |
2335 | 1.11k | dst.bytes = (mode_64bit() && (op_bytes == 4)) ? 8 : op_bytes; |
2336 | 1.11k | dst.reg = (unsigned long *)&_regs.r(bp); |
2337 | 1.11k | fail_if(!ops->write); |
2338 | 1.10k | if ( (rc = ops->write(x86_seg_ss, sp_pre_dec(dst.bytes), |
2339 | 1.10k | &_regs.r(bp), dst.bytes, ctxt)) ) |
2340 | 17 | goto done; |
2341 | 1.09k | dst.val = _regs.r(sp); |
2342 | | |
2343 | 1.09k | n = imm2 & 31; |
2344 | 1.09k | if ( n ) |
2345 | 768 | { |
2346 | 1.42k | for ( i = 1; i < n; i++ ) |
2347 | 690 | { |
2348 | 690 | unsigned long ebp, temp_data; |
2349 | 690 | ebp = truncate_word(_regs.r(bp) - i*dst.bytes, ctxt->sp_size/8); |
2350 | 690 | if ( (rc = read_ulong(x86_seg_ss, ebp, |
2351 | 690 | &temp_data, dst.bytes, ctxt, ops)) || |
2352 | 690 | (rc = ops->write(x86_seg_ss, sp_pre_dec(dst.bytes), |
2353 | 667 | &temp_data, dst.bytes, ctxt)) ) |
2354 | 30 | goto done; |
2355 | 690 | } |
2356 | 738 | if ( (rc = ops->write(x86_seg_ss, sp_pre_dec(dst.bytes), |
2357 | 738 | &dst.val, dst.bytes, ctxt)) ) |
2358 | 8 | goto done; |
2359 | 738 | } |
2360 | | |
2361 | 1.05k | sp_pre_dec(src.val); |
2362 | 1.05k | break; |
2363 | | |
2364 | 859 | case 0xc9: /* leave */ |
2365 | | /* First writeback, to %%esp. */ |
2366 | 859 | dst.bytes = (mode_64bit() && (op_bytes == 4)) ? 8 : op_bytes; |
2367 | 859 | if ( dst.bytes == 2 ) |
2368 | 214 | _regs.sp = _regs.bp; |
2369 | 645 | else |
2370 | 645 | _regs.r(sp) = dst.bytes == 4 ? _regs.ebp : _regs.r(bp); |
2371 | | |
2372 | | /* Second writeback, to %%ebp. */ |
2373 | 859 | dst.type = OP_REG; |
2374 | 859 | dst.reg = (unsigned long *)&_regs.r(bp); |
2375 | 859 | if ( (rc = read_ulong(x86_seg_ss, sp_post_inc(dst.bytes), |
2376 | 859 | &dst.val, dst.bytes, ctxt, ops)) ) |
2377 | 24 | goto done; |
2378 | 835 | break; |
2379 | | |
2380 | 835 | case 0xca: /* ret imm16 (far) */ |
2381 | 374 | case 0xcb: /* ret (far) */ |
2382 | 374 | if ( (rc = read_ulong(x86_seg_ss, sp_post_inc(op_bytes), |
2383 | 374 | &dst.val, op_bytes, ctxt, ops)) || |
2384 | 374 | (rc = read_ulong(x86_seg_ss, sp_post_inc(op_bytes + src.val), |
2385 | 364 | &src.val, op_bytes, ctxt, ops)) || |
2386 | 374 | (rc = load_seg(x86_seg_cs, src.val, 1, &cs, ctxt, ops)) || |
2387 | 374 | (rc = commit_far_branch(&cs, dst.val)) ) |
2388 | 38 | goto done; |
2389 | 336 | break; |
2390 | | |
2391 | 393 | case 0xce: /* into */ |
2392 | 393 | if ( !(_regs.eflags & X86_EFLAGS_OF) ) |
2393 | 391 | break; |
2394 | | /* Fallthrough */ |
2395 | 6 | case 0xcc: /* int3 */ |
2396 | 9 | case 0xcd: /* int imm8 */ |
2397 | 10 | case 0xf1: /* int1 (icebp) */ |
2398 | 10 | ASSERT(!ctxt->event_pending); |
2399 | 10 | switch ( ctxt->opcode ) |
2400 | 10 | { |
2401 | 4 | case 0xcc: /* int3 */ |
2402 | 4 | ctxt->event.vector = X86_EXC_BP; |
2403 | 4 | ctxt->event.type = X86_ET_SW_EXC; |
2404 | 4 | break; |
2405 | 3 | case 0xcd: /* int imm8 */ |
2406 | 3 | ctxt->event.vector = imm1; |
2407 | 3 | ctxt->event.type = X86_ET_SW_INT; |
2408 | 3 | break; |
2409 | 2 | case 0xce: /* into */ |
2410 | 2 | ctxt->event.vector = X86_EXC_OF; |
2411 | 2 | ctxt->event.type = X86_ET_SW_EXC; |
2412 | 2 | break; |
2413 | 1 | case 0xf1: /* icebp */ |
2414 | 1 | ctxt->event.vector = X86_EXC_DB; |
2415 | 1 | ctxt->event.type = X86_ET_PRIV_SW_EXC; |
2416 | 1 | break; |
2417 | 10 | } |
2418 | 10 | ctxt->event.error_code = X86_EVENT_NO_EC; |
2419 | 10 | ctxt->event.insn_len = _regs.r(ip) - ctxt->regs->r(ip); |
2420 | 10 | ctxt->event_pending = true; |
2421 | 10 | rc = X86EMUL_EXCEPTION; |
2422 | 10 | goto done; |
2423 | | |
2424 | 362 | case 0xcf: /* iret */ { |
2425 | 362 | unsigned long sel, eip, eflags; |
2426 | 362 | uint32_t mask = X86_EFLAGS_VIP | X86_EFLAGS_VIF | X86_EFLAGS_VM; |
2427 | | |
2428 | 362 | fail_if(!in_realmode(ctxt, ops)); |
2429 | 357 | ctxt->retire.unblock_nmi = true; |
2430 | 357 | if ( (rc = read_ulong(x86_seg_ss, sp_post_inc(op_bytes), |
2431 | 357 | &eip, op_bytes, ctxt, ops)) || |
2432 | 357 | (rc = read_ulong(x86_seg_ss, sp_post_inc(op_bytes), |
2433 | 353 | &sel, op_bytes, ctxt, ops)) || |
2434 | 357 | (rc = read_ulong(x86_seg_ss, sp_post_inc(op_bytes), |
2435 | 352 | &eflags, op_bytes, ctxt, ops)) ) |
2436 | 7 | goto done; |
2437 | 350 | if ( op_bytes == 2 ) |
2438 | 282 | eflags = (uint16_t)eflags | (_regs.eflags & 0xffff0000u); |
2439 | 350 | eflags &= EFLAGS_MODIFIABLE; |
2440 | 350 | _regs.eflags &= mask; |
2441 | 350 | _regs.eflags |= (eflags & ~mask) | X86_EFLAGS_MBS; |
2442 | 350 | if ( (rc = load_seg(x86_seg_cs, sel, 1, &cs, ctxt, ops)) || |
2443 | 350 | (rc = commit_far_branch(&cs, (uint32_t)eip)) ) |
2444 | 10 | goto done; |
2445 | 340 | break; |
2446 | 350 | } |
2447 | | |
2448 | 1.07k | case 0xd0 ... 0xd1: /* Grp2 */ |
2449 | 1.07k | src.val = 1; |
2450 | 1.07k | goto grp2; |
2451 | | |
2452 | 2.56k | case 0xd2 ... 0xd3: /* Grp2 */ |
2453 | 2.56k | src.val = _regs.cl; |
2454 | 2.56k | goto grp2; |
2455 | | |
2456 | 170 | case 0xd4: /* aam */ |
2457 | 1.06k | case 0xd5: /* aad */ |
2458 | 1.06k | n = (uint8_t)src.val; |
2459 | 1.06k | if ( b & 0x01 ) |
2460 | 896 | _regs.ax = (uint8_t)(_regs.al + (_regs.ah * n)); |
2461 | 170 | else |
2462 | 170 | { |
2463 | 170 | generate_exception_if(!n, X86_EXC_DE); |
2464 | 168 | _regs.al = _regs.al % n; |
2465 | 168 | _regs.ah = _regs.al / n; |
2466 | 168 | } |
2467 | 1.06k | _regs.eflags &= ~(X86_EFLAGS_SF | X86_EFLAGS_ZF | X86_EFLAGS_PF); |
2468 | 1.06k | _regs.eflags |= !_regs.al ? X86_EFLAGS_ZF : 0; |
2469 | 1.06k | _regs.eflags |= ((int8_t)_regs.al < 0) ? X86_EFLAGS_SF : 0; |
2470 | 1.06k | _regs.eflags |= even_parity(_regs.al) ? X86_EFLAGS_PF : 0; |
2471 | 1.06k | break; |
2472 | | |
2473 | 453 | case 0xd6: /* salc */ |
2474 | 453 | _regs.al = (_regs.eflags & X86_EFLAGS_CF) ? 0xff : 0x00; |
2475 | 453 | break; |
2476 | | |
2477 | 626 | case 0xd7: /* xlat */ { |
2478 | 626 | unsigned long al; |
2479 | | |
2480 | 626 | if ( (rc = read_ulong(ea.mem.seg, truncate_ea(_regs.r(bx) + _regs.al), |
2481 | 626 | &al, 1, ctxt, ops)) != 0 ) |
2482 | 7 | goto done; |
2483 | 619 | _regs.al = al; |
2484 | 619 | break; |
2485 | 626 | } |
2486 | | |
2487 | 2.16k | case 0xe0 ... 0xe2: /* loop{,z,nz} */ { |
2488 | 2.16k | unsigned long count = get_loop_count(&_regs, ad_bytes); |
2489 | 2.16k | int do_jmp = !(_regs.eflags & X86_EFLAGS_ZF); /* loopnz */ |
2490 | | |
2491 | 2.16k | if ( b == 0xe1 ) |
2492 | 752 | do_jmp = !do_jmp; /* loopz */ |
2493 | 1.41k | else if ( b == 0xe2 ) |
2494 | 1.05k | do_jmp = 1; /* loop */ |
2495 | 2.16k | if ( count != 1 && do_jmp ) |
2496 | 1.19k | jmp_rel((int32_t)src.val); |
2497 | 2.09k | put_loop_count(&_regs, ad_bytes, count - 1); |
2498 | 2.09k | break; |
2499 | 2.16k | } |
2500 | | |
2501 | 1.40k | case 0xe3: /* jcxz/jecxz (short) */ |
2502 | 1.40k | if ( !get_loop_count(&_regs, ad_bytes) ) |
2503 | 806 | jmp_rel((int32_t)src.val); |
2504 | 1.38k | break; |
2505 | | |
2506 | 1.38k | case 0xe4: /* in imm8,%al */ |
2507 | 343 | case 0xe5: /* in imm8,%eax */ |
2508 | 421 | case 0xe6: /* out %al,imm8 */ |
2509 | 566 | case 0xe7: /* out %eax,imm8 */ |
2510 | 979 | case 0xec: /* in %dx,%al */ |
2511 | 1.19k | case 0xed: /* in %dx,%eax */ |
2512 | 1.27k | case 0xee: /* out %al,%dx */ |
2513 | 1.74k | case 0xef: /* out %eax,%dx */ { |
2514 | 1.74k | unsigned int port = ((b < 0xe8) ? (uint8_t)src.val : _regs.dx); |
2515 | | |
2516 | 1.74k | op_bytes = !(b & 1) ? 1 : (op_bytes == 8) ? 4 : op_bytes; |
2517 | 1.74k | if ( (rc = ioport_access_check(port, op_bytes, ctxt, ops)) != 0 ) |
2518 | 15 | goto done; |
2519 | 1.72k | if ( b & 2 ) |
2520 | 756 | { |
2521 | | /* out */ |
2522 | 756 | fail_if(ops->write_io == NULL); |
2523 | 755 | rc = ops->write_io(port, op_bytes, _regs.eax, ctxt); |
2524 | 755 | } |
2525 | 969 | else |
2526 | 969 | { |
2527 | | /* in */ |
2528 | 969 | dst.bytes = op_bytes; |
2529 | 969 | fail_if(ops->read_io == NULL); |
2530 | 966 | rc = ops->read_io(port, dst.bytes, &dst.val, ctxt); |
2531 | 966 | } |
2532 | 1.72k | if ( rc != 0 ) |
2533 | 40 | { |
2534 | 40 | if ( rc == X86EMUL_DONE ) |
2535 | 0 | goto complete_insn; |
2536 | 40 | goto done; |
2537 | 40 | } |
2538 | 1.68k | break; |
2539 | 1.72k | } |
2540 | | |
2541 | 1.68k | case 0xe8: /* call (near) */ { |
2542 | 778 | int32_t rel = src.val; |
2543 | | |
2544 | 778 | op_bytes = ((op_bytes == 4) && mode_64bit()) ? 8 : op_bytes; |
2545 | 778 | src.val = _regs.r(ip); |
2546 | 778 | jmp_rel(rel); |
2547 | 771 | adjust_bnd(ctxt, ops, vex.pfx); |
2548 | 771 | goto push; |
2549 | 778 | } |
2550 | | |
2551 | 482 | case 0xe9: /* jmp (near) */ |
2552 | 1.34k | case 0xeb: /* jmp (short) */ |
2553 | 1.34k | jmp_rel((int32_t)src.val); |
2554 | 1.31k | if ( !(b & 2) ) |
2555 | 478 | adjust_bnd(ctxt, ops, vex.pfx); |
2556 | 1.31k | break; |
2557 | | |
2558 | 169 | case 0xea: /* jmp (far, absolute) */ |
2559 | 169 | ASSERT(!mode_64bit()); |
2560 | 559 | far_jmp: |
2561 | 559 | if ( (rc = load_seg(x86_seg_cs, imm2, 0, &cs, ctxt, ops)) || |
2562 | 559 | (rc = commit_far_branch(&cs, imm1)) ) |
2563 | 72 | goto done; |
2564 | 487 | break; |
2565 | | |
2566 | 487 | case 0xf4: /* hlt */ |
2567 | 223 | generate_exception_if(!mode_ring0(), X86_EXC_GP, 0); |
2568 | 221 | ctxt->retire.hlt = true; |
2569 | 221 | break; |
2570 | | |
2571 | 432 | case 0xf5: /* cmc */ |
2572 | 432 | _regs.eflags ^= X86_EFLAGS_CF; |
2573 | 432 | break; |
2574 | | |
2575 | 8.21k | case 0xf6 ... 0xf7: /* Grp3 */ |
2576 | 8.21k | if ( (d & DstMask) == DstEax ) |
2577 | 6.11k | dst.reg = (unsigned long *)&_regs.r(ax); |
2578 | 8.21k | switch ( modrm_reg & 7 ) |
2579 | 8.21k | { |
2580 | 0 | unsigned long u[2], v; |
2581 | | |
2582 | 944 | case 0 ... 1: /* test */ |
2583 | 944 | dst.val = imm1; |
2584 | 944 | dst.bytes = src.bytes; |
2585 | 944 | goto test; |
2586 | 226 | case 2: /* not */ |
2587 | 226 | if ( ops->rmw && dst.type == OP_MEM ) |
2588 | 0 | state->rmw = rmw_not; |
2589 | 226 | else |
2590 | 226 | dst.val = ~dst.val; |
2591 | 226 | break; |
2592 | 931 | case 3: /* neg */ |
2593 | 931 | if ( ops->rmw && dst.type == OP_MEM ) |
2594 | 0 | state->rmw = rmw_neg; |
2595 | 931 | else |
2596 | 931 | emulate_1op("neg", dst, _regs.eflags); |
2597 | 931 | break; |
2598 | 1.86k | case 4: /* mul */ |
2599 | 1.86k | _regs.eflags &= ~(X86_EFLAGS_OF | X86_EFLAGS_CF); |
2600 | 1.86k | switch ( dst.bytes ) |
2601 | 1.86k | { |
2602 | 388 | case 1: |
2603 | 388 | dst.val = _regs.al; |
2604 | 388 | dst.val *= src.val; |
2605 | 388 | if ( (uint8_t)dst.val != (uint16_t)dst.val ) |
2606 | 199 | _regs.eflags |= X86_EFLAGS_OF | X86_EFLAGS_CF; |
2607 | 388 | dst.bytes = 2; |
2608 | 388 | break; |
2609 | 413 | case 2: |
2610 | 413 | dst.val = _regs.ax; |
2611 | 413 | dst.val *= src.val; |
2612 | 413 | if ( (uint16_t)dst.val != (uint32_t)dst.val ) |
2613 | 218 | _regs.eflags |= X86_EFLAGS_OF | X86_EFLAGS_CF; |
2614 | 413 | _regs.dx = dst.val >> 16; |
2615 | 413 | break; |
2616 | 0 | #ifdef __x86_64__ |
2617 | 532 | case 4: |
2618 | 532 | dst.val = _regs.eax; |
2619 | 532 | dst.val *= src.val; |
2620 | 532 | if ( (uint32_t)dst.val != dst.val ) |
2621 | 289 | _regs.eflags |= X86_EFLAGS_OF | X86_EFLAGS_CF; |
2622 | 532 | _regs.rdx = dst.val >> 32; |
2623 | 532 | break; |
2624 | 0 | #endif |
2625 | 533 | default: |
2626 | 533 | u[0] = src.val; |
2627 | 533 | u[1] = _regs.r(ax); |
2628 | 533 | if ( mul_dbl(u) ) |
2629 | 343 | _regs.eflags |= X86_EFLAGS_OF | X86_EFLAGS_CF; |
2630 | 533 | _regs.r(dx) = u[1]; |
2631 | 533 | dst.val = u[0]; |
2632 | 533 | break; |
2633 | 1.86k | } |
2634 | 1.86k | break; |
2635 | 1.86k | case 5: /* imul */ |
2636 | 1.39k | dst.val = _regs.r(ax); |
2637 | 2.84k | imul: |
2638 | 2.84k | _regs.eflags &= ~(X86_EFLAGS_OF | X86_EFLAGS_CF); |
2639 | 2.84k | switch ( dst.bytes ) |
2640 | 2.84k | { |
2641 | 283 | case 1: |
2642 | 283 | dst.val = (int8_t)src.val * (int8_t)dst.val; |
2643 | 283 | if ( (int8_t)dst.val != (int16_t)dst.val ) |
2644 | 224 | _regs.eflags |= X86_EFLAGS_OF | X86_EFLAGS_CF; |
2645 | 283 | ASSERT(b > 0x6b); |
2646 | 283 | dst.bytes = 2; |
2647 | 283 | break; |
2648 | 1.17k | case 2: |
2649 | 1.17k | dst.val = ((uint32_t)(int16_t)src.val * |
2650 | 1.17k | (uint32_t)(int16_t)dst.val); |
2651 | 1.17k | if ( (int16_t)dst.val != (int32_t)dst.val ) |
2652 | 552 | _regs.eflags |= X86_EFLAGS_OF | X86_EFLAGS_CF; |
2653 | 1.17k | if ( b > 0x6b ) |
2654 | 449 | _regs.dx = dst.val >> 16; |
2655 | 1.17k | break; |
2656 | 0 | #ifdef __x86_64__ |
2657 | 939 | case 4: |
2658 | 939 | dst.val = ((uint64_t)(int32_t)src.val * |
2659 | 939 | (uint64_t)(int32_t)dst.val); |
2660 | 939 | if ( (int32_t)dst.val != dst.val ) |
2661 | 593 | _regs.eflags |= X86_EFLAGS_OF | X86_EFLAGS_CF; |
2662 | 939 | if ( b > 0x6b ) |
2663 | 540 | _regs.rdx = dst.val >> 32; |
2664 | 939 | break; |
2665 | 0 | #endif |
2666 | 446 | default: |
2667 | 446 | u[0] = src.val; |
2668 | 446 | u[1] = dst.val; |
2669 | 446 | if ( imul_dbl(u) ) |
2670 | 226 | _regs.eflags |= X86_EFLAGS_OF | X86_EFLAGS_CF; |
2671 | 446 | if ( b > 0x6b ) |
2672 | 124 | _regs.r(dx) = u[1]; |
2673 | 446 | dst.val = u[0]; |
2674 | 446 | break; |
2675 | 2.84k | } |
2676 | 2.84k | break; |
2677 | 2.84k | case 6: /* div */ |
2678 | 1.06k | switch ( src.bytes ) |
2679 | 1.06k | { |
2680 | 160 | case 1: |
2681 | 160 | u[0] = _regs.ax; |
2682 | 160 | u[1] = 0; |
2683 | 160 | v = (uint8_t)src.val; |
2684 | 160 | generate_exception_if( |
2685 | 160 | div_dbl(u, v) || ((uint8_t)u[0] != (uint16_t)u[0]), |
2686 | 160 | X86_EXC_DE); |
2687 | 152 | dst.val = (uint8_t)u[0]; |
2688 | 152 | _regs.ah = u[1]; |
2689 | 152 | break; |
2690 | 255 | case 2: |
2691 | 255 | u[0] = (_regs.edx << 16) | _regs.ax; |
2692 | 255 | u[1] = 0; |
2693 | 255 | v = (uint16_t)src.val; |
2694 | 255 | generate_exception_if( |
2695 | 255 | div_dbl(u, v) || ((uint16_t)u[0] != (uint32_t)u[0]), |
2696 | 255 | X86_EXC_DE); |
2697 | 234 | dst.val = (uint16_t)u[0]; |
2698 | 234 | _regs.dx = u[1]; |
2699 | 234 | break; |
2700 | 0 | #ifdef __x86_64__ |
2701 | 318 | case 4: |
2702 | 318 | u[0] = (_regs.rdx << 32) | _regs.eax; |
2703 | 318 | u[1] = 0; |
2704 | 318 | v = (uint32_t)src.val; |
2705 | 318 | generate_exception_if( |
2706 | 318 | div_dbl(u, v) || ((uint32_t)u[0] != u[0]), |
2707 | 318 | X86_EXC_DE); |
2708 | 262 | dst.val = (uint32_t)u[0]; |
2709 | 262 | _regs.rdx = (uint32_t)u[1]; |
2710 | 262 | break; |
2711 | 0 | #endif |
2712 | 329 | default: |
2713 | 329 | u[0] = _regs.r(ax); |
2714 | 329 | u[1] = _regs.r(dx); |
2715 | 329 | v = src.val; |
2716 | 329 | generate_exception_if(div_dbl(u, v), X86_EXC_DE); |
2717 | 275 | dst.val = u[0]; |
2718 | 275 | _regs.r(dx) = u[1]; |
2719 | 275 | break; |
2720 | 1.06k | } |
2721 | 923 | break; |
2722 | 1.79k | case 7: /* idiv */ |
2723 | 1.79k | switch ( src.bytes ) |
2724 | 1.79k | { |
2725 | 254 | case 1: |
2726 | 254 | u[0] = (int16_t)_regs.ax; |
2727 | 254 | u[1] = ((long)u[0] < 0) ? ~0UL : 0UL; |
2728 | 254 | v = (int8_t)src.val; |
2729 | 254 | generate_exception_if( |
2730 | 254 | idiv_dbl(u, v) || ((int8_t)u[0] != (int16_t)u[0]), |
2731 | 254 | X86_EXC_DE); |
2732 | 246 | dst.val = (int8_t)u[0]; |
2733 | 246 | _regs.ah = u[1]; |
2734 | 246 | break; |
2735 | 473 | case 2: |
2736 | 473 | u[0] = (int32_t)((_regs.edx << 16) | _regs.ax); |
2737 | 473 | u[1] = ((long)u[0] < 0) ? ~0UL : 0UL; |
2738 | 473 | v = (int16_t)src.val; |
2739 | 473 | generate_exception_if( |
2740 | 473 | idiv_dbl(u, v) || ((int16_t)u[0] != (int32_t)u[0]), |
2741 | 473 | X86_EXC_DE); |
2742 | 433 | dst.val = (int16_t)u[0]; |
2743 | 433 | _regs.dx = u[1]; |
2744 | 433 | break; |
2745 | 0 | #ifdef __x86_64__ |
2746 | 455 | case 4: |
2747 | 455 | u[0] = (_regs.rdx << 32) | _regs.eax; |
2748 | 455 | u[1] = ((long)u[0] < 0) ? ~0UL : 0UL; |
2749 | 455 | v = (int32_t)src.val; |
2750 | 455 | generate_exception_if( |
2751 | 455 | idiv_dbl(u, v) || ((int32_t)u[0] != u[0]), |
2752 | 455 | X86_EXC_DE); |
2753 | 310 | dst.val = (int32_t)u[0]; |
2754 | 310 | _regs.rdx = (uint32_t)u[1]; |
2755 | 310 | break; |
2756 | 0 | #endif |
2757 | 609 | default: |
2758 | 609 | u[0] = _regs.r(ax); |
2759 | 609 | u[1] = _regs.r(dx); |
2760 | 609 | v = src.val; |
2761 | 609 | generate_exception_if(idiv_dbl(u, v), X86_EXC_DE); |
2762 | 382 | dst.val = u[0]; |
2763 | 382 | _regs.r(dx) = u[1]; |
2764 | 382 | break; |
2765 | 1.79k | } |
2766 | 1.37k | break; |
2767 | 8.21k | } |
2768 | 8.16k | break; |
2769 | | |
2770 | 8.16k | case 0xf8: /* clc */ |
2771 | 991 | _regs.eflags &= ~X86_EFLAGS_CF; |
2772 | 991 | break; |
2773 | | |
2774 | 3.08k | case 0xf9: /* stc */ |
2775 | 3.08k | _regs.eflags |= X86_EFLAGS_CF; |
2776 | 3.08k | break; |
2777 | | |
2778 | 1.60k | case 0xfa: /* cli */ |
2779 | 1.60k | if ( mode_iopl() ) |
2780 | 872 | _regs.eflags &= ~X86_EFLAGS_IF; |
2781 | 737 | else |
2782 | 737 | { |
2783 | 737 | generate_exception_if(!mode_vif(), X86_EXC_GP, 0); |
2784 | 733 | _regs.eflags &= ~X86_EFLAGS_VIF; |
2785 | 733 | } |
2786 | 1.60k | break; |
2787 | | |
2788 | 1.91k | case 0xfb: /* sti */ |
2789 | 1.91k | if ( mode_iopl() ) |
2790 | 1.03k | { |
2791 | 1.03k | if ( !(_regs.eflags & X86_EFLAGS_IF) ) |
2792 | 555 | ctxt->retire.sti = true; |
2793 | 1.03k | _regs.eflags |= X86_EFLAGS_IF; |
2794 | 1.03k | } |
2795 | 882 | else |
2796 | 882 | { |
2797 | 882 | generate_exception_if((_regs.eflags & X86_EFLAGS_VIP) || |
2798 | 882 | !mode_vif(), |
2799 | 882 | X86_EXC_GP, 0); |
2800 | 878 | if ( !(_regs.eflags & X86_EFLAGS_VIF) ) |
2801 | 530 | ctxt->retire.sti = true; |
2802 | 878 | _regs.eflags |= X86_EFLAGS_VIF; |
2803 | 878 | } |
2804 | 1.91k | break; |
2805 | | |
2806 | 1.91k | case 0xfc: /* cld */ |
2807 | 427 | _regs.eflags &= ~X86_EFLAGS_DF; |
2808 | 427 | break; |
2809 | | |
2810 | 1.02k | case 0xfd: /* std */ |
2811 | 1.02k | _regs.eflags |= X86_EFLAGS_DF; |
2812 | 1.02k | break; |
2813 | | |
2814 | 353 | case 0xfe: /* Grp4 */ |
2815 | 353 | generate_exception_if((modrm_reg & 7) >= 2, X86_EXC_UD); |
2816 | | /* Fallthrough. */ |
2817 | 4.16k | case 0xff: /* Grp5 */ |
2818 | 4.16k | switch ( modrm_reg & 7 ) |
2819 | 4.16k | { |
2820 | 702 | case 0: /* inc */ |
2821 | 702 | if ( ops->rmw && dst.type == OP_MEM ) |
2822 | 0 | state->rmw = rmw_inc; |
2823 | 702 | else |
2824 | 702 | emulate_1op("inc", dst, _regs.eflags); |
2825 | 702 | break; |
2826 | 854 | case 1: /* dec */ |
2827 | 854 | if ( ops->rmw && dst.type == OP_MEM ) |
2828 | 0 | state->rmw = rmw_dec; |
2829 | 854 | else |
2830 | 854 | emulate_1op("dec", dst, _regs.eflags); |
2831 | 854 | break; |
2832 | 854 | case 2: /* call (near) */ |
2833 | 76 | dst.val = _regs.r(ip); |
2834 | 76 | if ( (rc = ops->insn_fetch(src.val, NULL, 0, ctxt)) ) |
2835 | 1 | goto done; |
2836 | 75 | _regs.r(ip) = src.val; |
2837 | 75 | src.val = dst.val; |
2838 | 75 | adjust_bnd(ctxt, ops, vex.pfx); |
2839 | 75 | goto push; |
2840 | 854 | case 4: /* jmp (near) */ |
2841 | 854 | if ( (rc = ops->insn_fetch(src.val, NULL, 0, ctxt)) ) |
2842 | 7 | goto done; |
2843 | 847 | _regs.r(ip) = src.val; |
2844 | 847 | dst.type = OP_NONE; |
2845 | 847 | adjust_bnd(ctxt, ops, vex.pfx); |
2846 | 847 | break; |
2847 | 380 | case 3: /* call (far, absolute indirect) */ |
2848 | 806 | case 5: /* jmp (far, absolute indirect) */ |
2849 | 806 | generate_exception_if(src.type != OP_MEM, X86_EXC_UD); |
2850 | | |
2851 | 803 | if ( (rc = read_ulong(src.mem.seg, |
2852 | 803 | truncate_ea(src.mem.off + op_bytes), |
2853 | 803 | &imm2, 2, ctxt, ops)) ) |
2854 | 67 | goto done; |
2855 | 803 | imm1 = src.val; |
2856 | 736 | if ( !(modrm_reg & 4) ) |
2857 | 346 | goto far_call; |
2858 | 390 | goto far_jmp; |
2859 | 865 | case 6: /* push */ |
2860 | 865 | goto push; |
2861 | 4 | case 7: |
2862 | 4 | generate_exception(X86_EXC_UD); |
2863 | 4.16k | } |
2864 | 2.40k | break; |
2865 | | |
2866 | 2.76k | case X86EMUL_OPC(0x0f, 0x00): /* Grp6 */ |
2867 | 2.76k | seg = (modrm_reg & 1) ? x86_seg_tr : x86_seg_ldtr; |
2868 | 2.76k | generate_exception_if(!in_protmode(ctxt, ops), X86_EXC_UD); |
2869 | 2.76k | switch ( modrm_reg & 6 ) |
2870 | 2.76k | { |
2871 | 1.22k | case 0: /* sldt / str */ |
2872 | 1.22k | generate_exception_if(umip_active(ctxt, ops), X86_EXC_GP, 0); |
2873 | 1.22k | goto store_selector; |
2874 | 1.22k | case 2: /* lldt / ltr */ |
2875 | 374 | generate_exception_if(!mode_ring0(), X86_EXC_GP, 0); |
2876 | 372 | if ( (rc = load_seg(seg, src.val, 0, NULL, ctxt, ops)) != 0 ) |
2877 | 43 | goto done; |
2878 | 329 | break; |
2879 | 1.16k | case 4: /* verr / verw */ |
2880 | 1.16k | _regs.eflags &= ~X86_EFLAGS_ZF; |
2881 | 1.16k | switch ( rc = protmode_load_seg(x86_seg_none, src.val, false, |
2882 | 1.16k | &sreg, ctxt, ops) ) |
2883 | 1.16k | { |
2884 | 899 | case X86EMUL_OKAY: |
2885 | 899 | if ( sreg.s /* Excludes NUL selectors too. */ && |
2886 | 899 | ((modrm_reg & 1) ? ((sreg.type & 0xa) == 0x2) |
2887 | 612 | : ((sreg.type & 0xa) != 0x8)) ) |
2888 | 125 | _regs.eflags |= X86_EFLAGS_ZF; |
2889 | 899 | break; |
2890 | 264 | case X86EMUL_EXCEPTION: |
2891 | 264 | if ( ctxt->event_pending ) |
2892 | 1 | { |
2893 | 1 | ASSERT(ctxt->event.vector == X86_EXC_PF); |
2894 | 3 | default: |
2895 | 3 | goto done; |
2896 | 1 | } |
2897 | | /* Instead of the exception, ZF remains cleared. */ |
2898 | 263 | rc = X86EMUL_OKAY; |
2899 | 263 | break; |
2900 | 1.16k | } |
2901 | 1.16k | break; |
2902 | 1.16k | default: |
2903 | 1 | generate_exception_if(true, X86_EXC_UD); |
2904 | 0 | break; |
2905 | 2.76k | } |
2906 | 1.49k | break; |
2907 | | |
2908 | 36.3k | case X86EMUL_OPC(0x0f, 0x01): /* Grp7 */ |
2909 | 36.3k | rc = x86emul_0f01(state, &_regs, &dst, ctxt, ops); |
2910 | 36.3k | goto dispatch_from_helper; |
2911 | | |
2912 | 1.42k | case X86EMUL_OPC(0x0f, 0x02): /* lar */ |
2913 | 1.42k | generate_exception_if(!in_protmode(ctxt, ops), X86_EXC_UD); |
2914 | 1.42k | _regs.eflags &= ~X86_EFLAGS_ZF; |
2915 | 1.42k | switch ( rc = protmode_load_seg(x86_seg_none, src.val, false, &sreg, |
2916 | 1.42k | ctxt, ops) ) |
2917 | 1.42k | { |
2918 | 1.28k | case X86EMUL_OKAY: |
2919 | 1.28k | if ( !sreg.s ) |
2920 | 1.20k | { |
2921 | 1.20k | switch ( sreg.type ) |
2922 | 1.20k | { |
2923 | 73 | case 0x01: /* available 16-bit TSS */ |
2924 | 240 | case 0x03: /* busy 16-bit TSS */ |
2925 | 441 | case 0x04: /* 16-bit call gate */ |
2926 | 482 | case 0x05: /* 16/32-bit task gate */ |
2927 | 482 | if ( ctxt->lma ) |
2928 | 23 | break; |
2929 | | /* fall through */ |
2930 | 524 | case 0x02: /* LDT */ |
2931 | 590 | case 0x09: /* available 32/64-bit TSS */ |
2932 | 783 | case 0x0b: /* busy 32/64-bit TSS */ |
2933 | 982 | case 0x0c: /* 32/64-bit call gate */ |
2934 | 982 | _regs.eflags |= X86_EFLAGS_ZF; |
2935 | 982 | break; |
2936 | 1.20k | } |
2937 | 1.20k | } |
2938 | 74 | else |
2939 | 74 | _regs.eflags |= X86_EFLAGS_ZF; |
2940 | 1.28k | break; |
2941 | 1.28k | case X86EMUL_EXCEPTION: |
2942 | 138 | if ( ctxt->event_pending ) |
2943 | 1 | { |
2944 | 1 | ASSERT(ctxt->event.vector == X86_EXC_PF); |
2945 | 2 | default: |
2946 | 2 | goto done; |
2947 | 1 | } |
2948 | | /* Instead of the exception, ZF remains cleared. */ |
2949 | 137 | rc = X86EMUL_OKAY; |
2950 | 137 | break; |
2951 | 1.42k | } |
2952 | 1.42k | if ( _regs.eflags & X86_EFLAGS_ZF ) |
2953 | 1.05k | dst.val = ((sreg.attr & 0xff) << 8) | |
2954 | 1.05k | ((sreg.limit >> (sreg.g ? 12 : 0)) & 0xf0000) | |
2955 | 1.05k | ((sreg.attr & 0xf00) << 12); |
2956 | 364 | else |
2957 | 364 | dst.type = OP_NONE; |
2958 | 1.42k | break; |
2959 | | |
2960 | 2.57k | case X86EMUL_OPC(0x0f, 0x03): /* lsl */ |
2961 | 2.57k | generate_exception_if(!in_protmode(ctxt, ops), X86_EXC_UD); |
2962 | 2.57k | _regs.eflags &= ~X86_EFLAGS_ZF; |
2963 | 2.57k | switch ( rc = protmode_load_seg(x86_seg_none, src.val, false, &sreg, |
2964 | 2.57k | ctxt, ops) ) |
2965 | 2.57k | { |
2966 | 1.82k | case X86EMUL_OKAY: |
2967 | 1.82k | if ( !sreg.s ) |
2968 | 1.36k | { |
2969 | 1.36k | switch ( sreg.type ) |
2970 | 1.36k | { |
2971 | 185 | case 0x01: /* available 16-bit TSS */ |
2972 | 411 | case 0x03: /* busy 16-bit TSS */ |
2973 | 411 | if ( ctxt->lma ) |
2974 | 45 | break; |
2975 | | /* fall through */ |
2976 | 441 | case 0x02: /* LDT */ |
2977 | 516 | case 0x09: /* available 32/64-bit TSS */ |
2978 | 848 | case 0x0b: /* busy 32/64-bit TSS */ |
2979 | 848 | _regs.eflags |= X86_EFLAGS_ZF; |
2980 | 848 | break; |
2981 | 1.36k | } |
2982 | 1.36k | } |
2983 | 463 | else |
2984 | 463 | _regs.eflags |= X86_EFLAGS_ZF; |
2985 | 1.82k | break; |
2986 | 1.82k | case X86EMUL_EXCEPTION: |
2987 | 744 | if ( ctxt->event_pending ) |
2988 | 3 | { |
2989 | 3 | ASSERT(ctxt->event.vector == X86_EXC_PF); |
2990 | 4 | default: |
2991 | 4 | goto done; |
2992 | 3 | } |
2993 | | /* Instead of the exception, ZF remains cleared. */ |
2994 | 741 | rc = X86EMUL_OKAY; |
2995 | 741 | break; |
2996 | 2.57k | } |
2997 | 2.56k | if ( _regs.eflags & X86_EFLAGS_ZF ) |
2998 | 1.31k | dst.val = sreg.limit; |
2999 | 1.25k | else |
3000 | 1.25k | dst.type = OP_NONE; |
3001 | 2.56k | break; |
3002 | | |
3003 | 203 | case X86EMUL_OPC(0x0f, 0x05): /* syscall */ |
3004 | | /* |
3005 | | * Inject #UD if syscall/sysret are disabled. EFER.SCE can't be set |
3006 | | * with the respective CPUID bit clear, so no need for an explicit |
3007 | | * check of that one. |
3008 | | */ |
3009 | 203 | fail_if(ops->read_msr == NULL); |
3010 | 202 | if ( (rc = ops->read_msr(MSR_EFER, &msr_val, ctxt)) != X86EMUL_OKAY ) |
3011 | 0 | goto done; |
3012 | 202 | generate_exception_if((msr_val & EFER_SCE) == 0, X86_EXC_UD); |
3013 | 201 | generate_exception_if(!amd_like(ctxt) && !mode_64bit(), X86_EXC_UD); |
3014 | | |
3015 | 201 | if ( (rc = ops->read_msr(MSR_STAR, &msr_val, ctxt)) != X86EMUL_OKAY ) |
3016 | 0 | goto done; |
3017 | | |
3018 | 201 | cs.sel = (msr_val >> 32) & ~3; /* SELECTOR_RPL_MASK */ |
3019 | 201 | sreg.sel = cs.sel + 8; |
3020 | | |
3021 | 201 | cs.base = sreg.base = 0; /* flat segment */ |
3022 | 201 | cs.limit = sreg.limit = ~0u; /* 4GB limit */ |
3023 | 201 | sreg.attr = 0xc93; /* G+DB+P+S+Data */ |
3024 | | |
3025 | 201 | #ifdef __x86_64__ |
3026 | 201 | if ( ctxt->lma ) |
3027 | 200 | { |
3028 | 200 | cs.attr = 0xa9b; /* L+DB+P+S+Code */ |
3029 | | |
3030 | 200 | _regs.rcx = _regs.rip; |
3031 | 200 | _regs.r11 = _regs.eflags & ~X86_EFLAGS_RF; |
3032 | | |
3033 | 200 | if ( (rc = ops->read_msr(mode_64bit() ? MSR_LSTAR : MSR_CSTAR, |
3034 | 200 | &msr_val, ctxt)) != X86EMUL_OKAY ) |
3035 | 0 | goto done; |
3036 | 200 | _regs.rip = msr_val; |
3037 | | |
3038 | 200 | if ( (rc = ops->read_msr(MSR_SYSCALL_MASK, |
3039 | 200 | &msr_val, ctxt)) != X86EMUL_OKAY ) |
3040 | 0 | goto done; |
3041 | 200 | _regs.eflags &= ~(msr_val | X86_EFLAGS_RF); |
3042 | 200 | } |
3043 | 1 | else |
3044 | 1 | #endif |
3045 | 1 | { |
3046 | 1 | cs.attr = 0xc9b; /* G+DB+P+S+Code */ |
3047 | | |
3048 | 1 | _regs.r(cx) = _regs.eip; |
3049 | 1 | _regs.eip = msr_val; |
3050 | 1 | _regs.eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF | X86_EFLAGS_RF); |
3051 | 1 | } |
3052 | | |
3053 | 201 | fail_if(ops->write_segment == NULL); |
3054 | 200 | if ( (rc = ops->write_segment(x86_seg_cs, &cs, ctxt)) || |
3055 | 200 | (rc = ops->write_segment(x86_seg_ss, &sreg, ctxt)) ) |
3056 | 5 | goto done; |
3057 | | |
3058 | 195 | if ( ctxt->lma ) |
3059 | | /* In particular mode_64bit() needs to return true from here on. */ |
3060 | 195 | ctxt->addr_size = ctxt->sp_size = 64; |
3061 | | |
3062 | | /* |
3063 | | * SYSCALL (unlike most instructions) evaluates its singlestep action |
3064 | | * based on the resulting EFLAGS.TF, not the starting EFLAGS.TF. |
3065 | | * |
3066 | | * As the #DB is raised after the CPL change and before the OS can |
3067 | | * switch stack, it is a large risk for privilege escalation. |
3068 | | * |
3069 | | * 64bit kernels should mask EFLAGS.TF in MSR_SYSCALL_MASK to avoid any |
3070 | | * vulnerability. Running the #DB handler on an IST stack is also a |
3071 | | * mitigation. |
3072 | | * |
3073 | | * 32bit kernels have no ability to mask EFLAGS.TF at all. |
3074 | | * Their only mitigation is to use a task gate for handling |
3075 | | * #DB (or to not use enable EFER.SCE to start with). |
3076 | | */ |
3077 | 195 | singlestep = _regs.eflags & X86_EFLAGS_TF; |
3078 | 195 | break; |
3079 | | |
3080 | 777 | case X86EMUL_OPC(0x0f, 0x06): /* clts */ |
3081 | 777 | generate_exception_if(!mode_ring0(), X86_EXC_GP, 0); |
3082 | 776 | fail_if((ops->read_cr == NULL) || (ops->write_cr == NULL)); |
3083 | 774 | if ( (rc = ops->read_cr(0, &dst.val, ctxt)) != X86EMUL_OKAY || |
3084 | 774 | (rc = ops->write_cr(0, dst.val & ~X86_CR0_TS, ctxt)) != X86EMUL_OKAY ) |
3085 | 4 | goto done; |
3086 | 770 | break; |
3087 | | |
3088 | 770 | case X86EMUL_OPC(0x0f, 0x07): /* sysret */ |
3089 | | /* |
3090 | | * Inject #UD if syscall/sysret are disabled. EFER.SCE can't be set |
3091 | | * with the respective CPUID bit clear, so no need for an explicit |
3092 | | * check of that one. |
3093 | | */ |
3094 | 40 | fail_if(!ops->read_msr); |
3095 | 39 | if ( (rc = ops->read_msr(MSR_EFER, &msr_val, ctxt)) != X86EMUL_OKAY ) |
3096 | 0 | goto done; |
3097 | 39 | generate_exception_if(!(msr_val & EFER_SCE), X86_EXC_UD); |
3098 | 38 | generate_exception_if(!amd_like(ctxt) && !mode_64bit(), X86_EXC_UD); |
3099 | 38 | generate_exception_if(!mode_ring0(), X86_EXC_GP, 0); |
3100 | 35 | generate_exception_if(!in_protmode(ctxt, ops), X86_EXC_GP, 0); |
3101 | 35 | #ifdef __x86_64__ |
3102 | | /* |
3103 | | * Doing this for just Intel (rather than e.g. !amd_like()) as this is |
3104 | | * in fact risking to make guest OSes vulnerable to the equivalent of |
3105 | | * XSA-7 (CVE-2012-0217). |
3106 | | */ |
3107 | 35 | generate_exception_if(cp->x86_vendor == X86_VENDOR_INTEL && |
3108 | 35 | op_bytes == 8 && !is_canonical_address(_regs.rcx), |
3109 | 35 | X86_EXC_GP, 0); |
3110 | 35 | #endif |
3111 | | |
3112 | 35 | if ( (rc = ops->read_msr(MSR_STAR, &msr_val, ctxt)) != X86EMUL_OKAY ) |
3113 | 0 | goto done; |
3114 | | |
3115 | 35 | sreg.sel = ((msr_val >> 48) + 8) | 3; /* SELECTOR_RPL_MASK */ |
3116 | 35 | cs.sel = op_bytes == 8 ? sreg.sel + 8 : sreg.sel - 8; |
3117 | | |
3118 | 35 | cs.base = sreg.base = 0; /* flat segment */ |
3119 | 35 | cs.limit = sreg.limit = ~0u; /* 4GB limit */ |
3120 | 35 | cs.attr = 0xcfb; /* G+DB+P+DPL3+S+Code */ |
3121 | 35 | sreg.attr = 0xcf3; /* G+DB+P+DPL3+S+Data */ |
3122 | | |
3123 | | /* Only the selector part of SS gets updated by AMD and alike. */ |
3124 | 35 | if ( amd_like(ctxt) ) |
3125 | 34 | { |
3126 | 34 | fail_if(!ops->read_segment); |
3127 | 34 | if ( (rc = ops->read_segment(x86_seg_ss, &sreg, |
3128 | 34 | ctxt)) != X86EMUL_OKAY ) |
3129 | 0 | goto done; |
3130 | | |
3131 | | /* There's explicitly no RPL adjustment here. */ |
3132 | 34 | sreg.sel = (msr_val >> 48) + 8; |
3133 | | /* But DPL needs adjustment, for the new CPL to be correct. */ |
3134 | 34 | sreg.dpl = 3; |
3135 | 34 | } |
3136 | | |
3137 | 35 | #ifdef __x86_64__ |
3138 | 35 | if ( mode_64bit() ) |
3139 | 32 | { |
3140 | 32 | if ( op_bytes == 8 ) |
3141 | 23 | { |
3142 | 23 | cs.attr = 0xafb; /* L+DB+P+DPL3+S+Code */ |
3143 | 23 | _regs.rip = _regs.rcx; |
3144 | 23 | } |
3145 | 9 | else |
3146 | 9 | _regs.rip = _regs.ecx; |
3147 | | |
3148 | 32 | _regs.eflags = _regs.r11 & ~(X86_EFLAGS_RF | X86_EFLAGS_VM); |
3149 | 32 | } |
3150 | 3 | else |
3151 | 3 | #endif |
3152 | 3 | { |
3153 | 3 | _regs.r(ip) = _regs.ecx; |
3154 | 3 | _regs.eflags |= X86_EFLAGS_IF; |
3155 | 3 | } |
3156 | | |
3157 | 35 | fail_if(!ops->write_segment); |
3158 | 34 | if ( (rc = ops->write_segment(x86_seg_cs, &cs, ctxt)) != X86EMUL_OKAY || |
3159 | 34 | (rc = ops->write_segment(x86_seg_ss, &sreg, |
3160 | 16 | ctxt)) != X86EMUL_OKAY ) |
3161 | 18 | goto done; |
3162 | | |
3163 | 16 | singlestep = _regs.eflags & X86_EFLAGS_TF; |
3164 | 16 | break; |
3165 | | |
3166 | 218 | case X86EMUL_OPC(0x0f, 0x08): /* invd */ |
3167 | 566 | case X86EMUL_OPC(0x0f, 0x09): /* wbinvd / wbnoinvd */ |
3168 | 566 | generate_exception_if(!mode_ring0(), X86_EXC_GP, 0); |
3169 | 565 | fail_if(!ops->cache_op); |
3170 | 564 | if ( (rc = ops->cache_op(b == 0x09 ? !repe_prefix() || |
3171 | 344 | !vcpu_has_wbnoinvd() |
3172 | 344 | ? x86emul_wbinvd |
3173 | 344 | : x86emul_wbnoinvd |
3174 | 564 | : x86emul_invd, |
3175 | 564 | x86_seg_none, 0, |
3176 | 564 | ctxt)) != X86EMUL_OKAY ) |
3177 | 12 | goto done; |
3178 | 552 | break; |
3179 | | |
3180 | 552 | case X86EMUL_OPC(0x0f, 0x0b): /* ud2 */ |
3181 | 2 | case X86EMUL_OPC(0x0f, 0xb9): /* ud1 */ |
3182 | 6 | case X86EMUL_OPC(0x0f, 0xff): /* ud0 */ |
3183 | 6 | generate_exception(X86_EXC_UD); |
3184 | | |
3185 | 216 | case X86EMUL_OPC(0x0f, 0x0d): /* GrpP (prefetch) */ |
3186 | 414 | case X86EMUL_OPC(0x0f, 0x18): /* Grp16 (prefetch/nop) */ |
3187 | 1.74k | case X86EMUL_OPC(0x0f, 0x19) ... X86EMUL_OPC(0x0f, 0x1f): /* nop */ |
3188 | 1.74k | break; |
3189 | | |
3190 | 0 | #ifndef X86EMUL_NO_MMX |
3191 | | |
3192 | 1 | case X86EMUL_OPC(0x0f, 0x0e): /* femms */ |
3193 | 1 | host_and_vcpu_must_have(3dnow); |
3194 | 0 | asm volatile ( "femms" ); |
3195 | 0 | break; |
3196 | | |
3197 | 6 | case X86EMUL_OPC(0x0f, 0x0f): /* 3DNow! */ |
3198 | 6 | if ( _3dnow_table[(imm1 >> 4) & 0xf] & (1 << (imm1 & 0xf)) ) |
3199 | 6 | host_and_vcpu_must_have(3dnow); |
3200 | 5 | else if ( _3dnow_ext_table[(imm1 >> 4) & 0xf] & (1 << (imm1 & 0xf)) ) |
3201 | 5 | host_and_vcpu_must_have(3dnow_ext); |
3202 | 4 | else |
3203 | 4 | generate_exception(X86_EXC_UD); |
3204 | | |
3205 | 0 | get_fpu(X86EMUL_FPU_mmx); |
3206 | | |
3207 | 0 | d = DstReg | SrcMem; |
3208 | 0 | op_bytes = 8; |
3209 | 0 | state->simd_size = simd_other; |
3210 | 0 | goto simd_0f_imm8; |
3211 | | |
3212 | 0 | #endif /* !X86EMUL_NO_MMX */ |
3213 | | |
3214 | 0 | #if !defined(X86EMUL_NO_SIMD) && !defined(X86EMUL_NO_MMX) |
3215 | 0 | # define CASE_SIMD_PACKED_INT(pfx, opc) \ |
3216 | 523k | case X86EMUL_OPC(pfx, opc): \ |
3217 | 533k | case X86EMUL_OPC_66(pfx, opc) |
3218 | | #elif !defined(X86EMUL_NO_SIMD) |
3219 | | # define CASE_SIMD_PACKED_INT(pfx, opc) \ |
3220 | | case X86EMUL_OPC_66(pfx, opc) |
3221 | | #elif !defined(X86EMUL_NO_MMX) |
3222 | | # define CASE_SIMD_PACKED_INT(pfx, opc) \ |
3223 | | case X86EMUL_OPC(pfx, opc) |
3224 | | #else |
3225 | | # define CASE_SIMD_PACKED_INT(pfx, opc) C##pfx##_##opc |
3226 | | #endif |
3227 | | |
3228 | 0 | #ifndef X86EMUL_NO_SIMD |
3229 | | |
3230 | 0 | # define CASE_SIMD_PACKED_INT_VEX(pfx, opc) \ |
3231 | 492k | CASE_SIMD_PACKED_INT(pfx, opc): \ |
3232 | 499k | case X86EMUL_OPC_VEX_66(pfx, opc) |
3233 | | |
3234 | 0 | # define CASE_SIMD_ALL_FP(kind, pfx, opc) \ |
3235 | 160k | CASE_SIMD_PACKED_FP(kind, pfx, opc): \ |
3236 | 163k | CASE_SIMD_SCALAR_FP(kind, pfx, opc) |
3237 | 0 | # define CASE_SIMD_PACKED_FP(kind, pfx, opc) \ |
3238 | 272k | case X86EMUL_OPC##kind(pfx, opc): \ |
3239 | 279k | case X86EMUL_OPC##kind##_66(pfx, opc) |
3240 | 0 | # define CASE_SIMD_SCALAR_FP(kind, pfx, opc) \ |
3241 | 167k | case X86EMUL_OPC##kind##_F3(pfx, opc): \ |
3242 | 172k | case X86EMUL_OPC##kind##_F2(pfx, opc) |
3243 | 0 | # define CASE_SIMD_SINGLE_FP(kind, pfx, opc) \ |
3244 | 26.8k | case X86EMUL_OPC##kind(pfx, opc): \ |
3245 | 27.2k | case X86EMUL_OPC##kind##_F3(pfx, opc) |
3246 | | |
3247 | 0 | # define CASE_SIMD_ALL_FP_VEX(pfx, opc) \ |
3248 | 80.9k | CASE_SIMD_ALL_FP(, pfx, opc): \ |
3249 | 80.9k | CASE_SIMD_ALL_FP(_VEX, pfx, opc) |
3250 | 0 | # define CASE_SIMD_PACKED_FP_VEX(pfx, opc) \ |
3251 | 56.4k | CASE_SIMD_PACKED_FP(, pfx, opc): \ |
3252 | 58.1k | CASE_SIMD_PACKED_FP(_VEX, pfx, opc) |
3253 | 0 | # define CASE_SIMD_SCALAR_FP_VEX(pfx, opc) \ |
3254 | 2.32k | CASE_SIMD_SCALAR_FP(, pfx, opc): \ |
3255 | 2.62k | CASE_SIMD_SCALAR_FP(_VEX, pfx, opc) |
3256 | 0 | # define CASE_SIMD_SINGLE_FP_VEX(pfx, opc) \ |
3257 | 13.3k | CASE_SIMD_SINGLE_FP(, pfx, opc): \ |
3258 | 13.5k | CASE_SIMD_SINGLE_FP(_VEX, pfx, opc) |
3259 | | |
3260 | | #else |
3261 | | |
3262 | | # define CASE_SIMD_PACKED_INT_VEX(pfx, opc) \ |
3263 | | CASE_SIMD_PACKED_INT(pfx, opc) |
3264 | | |
3265 | | # define CASE_SIMD_ALL_FP(kind, pfx, opc) C##kind##pfx##_##opc |
3266 | | # define CASE_SIMD_PACKED_FP(kind, pfx, opc) Cp##kind##pfx##_##opc |
3267 | | # define CASE_SIMD_SCALAR_FP(kind, pfx, opc) Cs##kind##pfx##_##opc |
3268 | | # define CASE_SIMD_SINGLE_FP(kind, pfx, opc) C##kind##pfx##_##opc |
3269 | | |
3270 | | # define CASE_SIMD_ALL_FP_VEX(pfx, opc) CASE_SIMD_ALL_FP(, pfx, opc) |
3271 | | # define CASE_SIMD_PACKED_FP_VEX(pfx, opc) CASE_SIMD_PACKED_FP(, pfx, opc) |
3272 | | # define CASE_SIMD_SCALAR_FP_VEX(pfx, opc) CASE_SIMD_SCALAR_FP(, pfx, opc) |
3273 | | # define CASE_SIMD_SINGLE_FP_VEX(pfx, opc) CASE_SIMD_SINGLE_FP(, pfx, opc) |
3274 | | |
3275 | | #endif |
3276 | | |
3277 | 3 | CASE_SIMD_SCALAR_FP(, 0x0f, 0x2b): /* movnts{s,d} xmm,mem */ |
3278 | 3 | host_and_vcpu_must_have(sse4a); |
3279 | | /* fall through */ |
3280 | 1.86k | CASE_SIMD_PACKED_FP_VEX(0x0f, 0x2b): /* movntp{s,d} xmm,m128 */ |
3281 | | /* vmovntp{s,d} {x,y}mm,mem */ |
3282 | 1.86k | generate_exception_if(ea.type != OP_MEM, X86_EXC_UD); |
3283 | 695 | sfence = true; |
3284 | | /* fall through */ |
3285 | 9.64k | CASE_SIMD_ALL_FP_VEX(0x0f, 0x10): /* mov{up,s}{s,d} xmm/mem,xmm */ |
3286 | | /* vmovup{s,d} {x,y}mm/mem,{x,y}mm */ |
3287 | | /* vmovs{s,d} mem,xmm */ |
3288 | | /* vmovs{s,d} xmm,xmm,xmm */ |
3289 | 18.4k | CASE_SIMD_ALL_FP_VEX(0x0f, 0x11): /* mov{up,s}{s,d} xmm,xmm/mem */ |
3290 | | /* vmovup{s,d} {x,y}mm,{x,y}mm/mem */ |
3291 | | /* vmovs{s,d} xmm,mem */ |
3292 | | /* vmovs{s,d} xmm,xmm,xmm */ |
3293 | 18.4k | CASE_SIMD_PACKED_FP_VEX(0x0f, 0x14): /* unpcklp{s,d} xmm/m128,xmm */ |
3294 | | /* vunpcklp{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
3295 | 15.3k | CASE_SIMD_PACKED_FP_VEX(0x0f, 0x15): /* unpckhp{s,d} xmm/m128,xmm */ |
3296 | | /* vunpckhp{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
3297 | 17.3k | CASE_SIMD_PACKED_FP_VEX(0x0f, 0x28): /* movap{s,d} xmm/m128,xmm */ |
3298 | | /* vmovap{s,d} {x,y}mm/mem,{x,y}mm */ |
3299 | 20.3k | CASE_SIMD_PACKED_FP_VEX(0x0f, 0x29): /* movap{s,d} xmm,xmm/m128 */ |
3300 | | /* vmovap{s,d} {x,y}mm,{x,y}mm/mem */ |
3301 | 47.4k | CASE_SIMD_ALL_FP_VEX(0x0f, 0x51): /* sqrt{p,s}{s,d} xmm/mem,xmm */ |
3302 | | /* vsqrtp{s,d} {x,y}mm/mem,{x,y}mm */ |
3303 | | /* vsqrts{s,d} xmm/m32,xmm,xmm */ |
3304 | 47.4k | CASE_SIMD_SINGLE_FP_VEX(0x0f, 0x52): /* rsqrt{p,s}s xmm/mem,xmm */ |
3305 | | /* vrsqrtps {x,y}mm/mem,{x,y}mm */ |
3306 | | /* vrsqrtss xmm/m32,xmm,xmm */ |
3307 | 27.9k | CASE_SIMD_SINGLE_FP_VEX(0x0f, 0x53): /* rcp{p,s}s xmm/mem,xmm */ |
3308 | | /* vrcpps {x,y}mm/mem,{x,y}mm */ |
3309 | | /* vrcpss xmm/m32,xmm,xmm */ |
3310 | 29.4k | CASE_SIMD_PACKED_FP_VEX(0x0f, 0x54): /* andp{s,d} xmm/m128,xmm */ |
3311 | | /* vandp{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
3312 | 30.9k | CASE_SIMD_PACKED_FP_VEX(0x0f, 0x55): /* andnp{s,d} xmm/m128,xmm */ |
3313 | | /* vandnp{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
3314 | 33.3k | CASE_SIMD_PACKED_FP_VEX(0x0f, 0x56): /* orp{s,d} xmm/m128,xmm */ |
3315 | | /* vorp{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
3316 | 35.6k | CASE_SIMD_PACKED_FP_VEX(0x0f, 0x57): /* xorp{s,d} xmm/m128,xmm */ |
3317 | | /* vxorp{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
3318 | 76.1k | CASE_SIMD_ALL_FP_VEX(0x0f, 0x58): /* add{p,s}{s,d} xmm/mem,xmm */ |
3319 | | /* vadd{p,s}{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
3320 | 83.8k | CASE_SIMD_ALL_FP_VEX(0x0f, 0x59): /* mul{p,s}{s,d} xmm/mem,xmm */ |
3321 | | /* vmul{p,s}{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
3322 | 91.3k | CASE_SIMD_ALL_FP_VEX(0x0f, 0x5c): /* sub{p,s}{s,d} xmm/mem,xmm */ |
3323 | | /* vsub{p,s}{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
3324 | 96.7k | CASE_SIMD_ALL_FP_VEX(0x0f, 0x5d): /* min{p,s}{s,d} xmm/mem,xmm */ |
3325 | | /* vmin{p,s}{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
3326 | 103k | CASE_SIMD_ALL_FP_VEX(0x0f, 0x5e): /* div{p,s}{s,d} xmm/mem,xmm */ |
3327 | | /* vdiv{p,s}{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
3328 | 112k | CASE_SIMD_ALL_FP_VEX(0x0f, 0x5f): /* max{p,s}{s,d} xmm/mem,xmm */ |
3329 | | /* vmax{p,s}{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
3330 | 112k | simd_0f_fp: |
3331 | 18.7k | if ( vex.opcx == vex_none ) |
3332 | 11.9k | { |
3333 | 11.9k | if ( vex.pfx & VEX_PREFIX_DOUBLE_MASK ) |
3334 | 5.06k | { |
3335 | 16.5k | simd_0f_sse2: |
3336 | 16.5k | vcpu_must_have(sse2); |
3337 | 16.5k | } |
3338 | 6.85k | else |
3339 | 11.9k | vcpu_must_have(sse); |
3340 | 24.4k | simd_0f_xmm: |
3341 | 24.4k | get_fpu(X86EMUL_FPU_xmm); |
3342 | 24.4k | } |
3343 | 6.84k | else |
3344 | 6.84k | { |
3345 | | /* vmovs{s,d} to/from memory have only two operands. */ |
3346 | 6.84k | if ( (b & ~1) == 0x10 && ea.type == OP_MEM ) |
3347 | 801 | d |= TwoOp; |
3348 | 18.9k | simd_0f_avx: |
3349 | 18.9k | host_and_vcpu_must_have(avx); |
3350 | 23.6k | simd_0f_ymm: |
3351 | 23.6k | get_fpu(X86EMUL_FPU_ymm); |
3352 | 23.6k | } |
3353 | 58.6k | simd_0f_common: |
3354 | 58.6k | opc = init_prefixes(stub); |
3355 | 0 | opc[0] = b; |
3356 | 58.6k | opc[1] = modrm; |
3357 | 58.6k | if ( ea.type == OP_MEM ) |
3358 | 35.6k | { |
3359 | | /* convert memory operand to (%rAX) */ |
3360 | 35.6k | rex_prefix &= ~REX_B; |
3361 | 35.6k | vex.b = 1; |
3362 | 35.6k | opc[1] &= 0x38; |
3363 | 35.6k | } |
3364 | 58.6k | insn_bytes = PFX_BYTES + 2; |
3365 | 58.6k | break; |
3366 | | |
3367 | 5 | CASE_SIMD_PACKED_FP(_EVEX, 0x0f, 0x2b): /* vmovntp{s,d} [xyz]mm,mem */ |
3368 | 5 | generate_exception_if(ea.type != OP_MEM || evex.opmsk, X86_EXC_UD); |
3369 | 1 | sfence = true; |
3370 | | /* fall through */ |
3371 | 5 | CASE_SIMD_PACKED_FP(_EVEX, 0x0f, 0x10): /* vmovup{s,d} [xyz]mm/mem,[xyz]mm{k} */ |
3372 | 9 | CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x10): /* vmovs{s,d} mem,xmm{k} */ |
3373 | | /* vmovs{s,d} xmm,xmm,xmm{k} */ |
3374 | 13 | CASE_SIMD_PACKED_FP(_EVEX, 0x0f, 0x11): /* vmovup{s,d} [xyz]mm,[xyz]mm/mem{k} */ |
3375 | 19 | CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x11): /* vmovs{s,d} xmm,mem{k} */ |
3376 | | /* vmovs{s,d} xmm,xmm,xmm{k} */ |
3377 | 23 | CASE_SIMD_PACKED_FP(_EVEX, 0x0f, 0x28): /* vmovap{s,d} [xyz]mm/mem,[xyz]mm{k} */ |
3378 | 26 | CASE_SIMD_PACKED_FP(_EVEX, 0x0f, 0x29): /* vmovap{s,d} [xyz]mm,[xyz]mm/mem{k} */ |
3379 | | /* vmovs{s,d} to/from memory have only two operands. */ |
3380 | 26 | if ( (b & ~1) == 0x10 && ea.type == OP_MEM ) |
3381 | 8 | d |= TwoOp; |
3382 | 26 | generate_exception_if(evex.brs, X86_EXC_UD); |
3383 | | /* fall through */ |
3384 | 54 | CASE_SIMD_ALL_FP(_EVEX, 0x0f, 0x51): /* vsqrtp{s,d} [xyz]mm/mem,[xyz]mm{k} */ |
3385 | | /* vsqrts{s,d} xmm/m32,xmm,xmm{k} */ |
3386 | 70 | CASE_SIMD_ALL_FP(_EVEX, 0x0f, 0x58): /* vadd{p,s}{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
3387 | 86 | CASE_SIMD_ALL_FP(_EVEX, 0x0f, 0x59): /* vmul{p,s}{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
3388 | 98 | CASE_SIMD_ALL_FP(_EVEX, 0x0f, 0x5c): /* vsub{p,s}{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
3389 | 114 | CASE_SIMD_ALL_FP(_EVEX, 0x0f, 0x5d): /* vmin{p,s}{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
3390 | 130 | CASE_SIMD_ALL_FP(_EVEX, 0x0f, 0x5e): /* vdiv{p,s}{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
3391 | 146 | CASE_SIMD_ALL_FP(_EVEX, 0x0f, 0x5f): /* vmax{p,s}{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
3392 | 146 | avx512f_all_fp: |
3393 | 43 | generate_exception_if((evex.w != (evex.pfx & VEX_PREFIX_DOUBLE_MASK) || |
3394 | 43 | (ea.type != OP_REG && evex.brs && |
3395 | 43 | (evex.pfx & VEX_PREFIX_SCALAR_MASK))), |
3396 | 43 | X86_EXC_UD); |
3397 | 19 | host_and_vcpu_must_have(avx512f); |
3398 | 0 | if ( ea.type != OP_REG || !evex.brs ) |
3399 | 0 | avx512_vlen_check(evex.pfx & VEX_PREFIX_SCALAR_MASK); |
3400 | 0 | simd_zmm: |
3401 | 0 | get_fpu(X86EMUL_FPU_zmm); |
3402 | 0 | opc = init_evex(stub); |
3403 | 0 | opc[0] = b; |
3404 | 0 | opc[1] = modrm; |
3405 | 0 | if ( ea.type == OP_MEM ) |
3406 | 0 | { |
3407 | | /* convert memory operand to (%rAX) */ |
3408 | 0 | evex.b = 1; |
3409 | 0 | opc[1] &= 0x38; |
3410 | 0 | } |
3411 | 0 | insn_bytes = EVEX_PFX_BYTES + 2; |
3412 | 0 | break; |
3413 | | |
3414 | 0 | #ifndef X86EMUL_NO_SIMD |
3415 | | |
3416 | 204 | case X86EMUL_OPC_66(0x0f, 0x12): /* movlpd m64,xmm */ |
3417 | 398 | case X86EMUL_OPC_VEX_66(0x0f, 0x12): /* vmovlpd m64,xmm,xmm */ |
3418 | 4.32k | CASE_SIMD_PACKED_FP_VEX(0x0f, 0x13): /* movlp{s,d} xmm,m64 */ |
3419 | | /* vmovlp{s,d} xmm,m64 */ |
3420 | 4.32k | case X86EMUL_OPC_66(0x0f, 0x16): /* movhpd m64,xmm */ |
3421 | 1.87k | case X86EMUL_OPC_VEX_66(0x0f, 0x16): /* vmovhpd m64,xmm,xmm */ |
3422 | 9.21k | CASE_SIMD_PACKED_FP_VEX(0x0f, 0x17): /* movhp{s,d} xmm,m64 */ |
3423 | | /* vmovhp{s,d} xmm,m64 */ |
3424 | 9.21k | generate_exception_if(ea.type != OP_MEM, X86_EXC_UD); |
3425 | | /* fall through */ |
3426 | 2.77k | case X86EMUL_OPC(0x0f, 0x12): /* movlps m64,xmm */ |
3427 | | /* movhlps xmm,xmm */ |
3428 | 2.83k | case X86EMUL_OPC_VEX(0x0f, 0x12): /* vmovlps m64,xmm,xmm */ |
3429 | | /* vmovhlps xmm,xmm,xmm */ |
3430 | 3.07k | case X86EMUL_OPC(0x0f, 0x16): /* movhps m64,xmm */ |
3431 | | /* movlhps xmm,xmm */ |
3432 | 3.13k | case X86EMUL_OPC_VEX(0x0f, 0x16): /* vmovhps m64,xmm,xmm */ |
3433 | | /* vmovlhps xmm,xmm,xmm */ |
3434 | 3.13k | generate_exception_if(vex.l, X86_EXC_UD); |
3435 | 3.13k | if ( (d & DstMask) != DstMem ) |
3436 | 1.49k | d &= ~TwoOp; |
3437 | 3.13k | op_bytes = 8; |
3438 | 3.13k | goto simd_0f_fp; |
3439 | | |
3440 | 4 | case X86EMUL_OPC_EVEX_66(0x0f, 0x12): /* vmovlpd m64,xmm,xmm */ |
3441 | 11 | CASE_SIMD_PACKED_FP(_EVEX, 0x0f, 0x13): /* vmovlp{s,d} xmm,m64 */ |
3442 | 11 | case X86EMUL_OPC_EVEX_66(0x0f, 0x16): /* vmovhpd m64,xmm,xmm */ |
3443 | 19 | CASE_SIMD_PACKED_FP(_EVEX, 0x0f, 0x17): /* vmovhp{s,d} xmm,m64 */ |
3444 | 19 | generate_exception_if(ea.type != OP_MEM, X86_EXC_UD); |
3445 | | /* fall through */ |
3446 | 10 | case X86EMUL_OPC_EVEX(0x0f, 0x12): /* vmovlps m64,xmm,xmm */ |
3447 | | /* vmovhlps xmm,xmm,xmm */ |
3448 | 11 | case X86EMUL_OPC_EVEX(0x0f, 0x16): /* vmovhps m64,xmm,xmm */ |
3449 | | /* vmovlhps xmm,xmm,xmm */ |
3450 | 11 | generate_exception_if((evex.lr || evex.opmsk || evex.brs || |
3451 | 11 | evex.w != (evex.pfx & VEX_PREFIX_DOUBLE_MASK)), |
3452 | 11 | X86_EXC_UD); |
3453 | 1 | host_and_vcpu_must_have(avx512f); |
3454 | 0 | if ( (d & DstMask) != DstMem ) |
3455 | 0 | d &= ~TwoOp; |
3456 | 0 | op_bytes = 8; |
3457 | 0 | goto simd_zmm; |
3458 | | |
3459 | 68 | case X86EMUL_OPC_F3(0x0f, 0x12): /* movsldup xmm/m128,xmm */ |
3460 | 134 | case X86EMUL_OPC_VEX_F3(0x0f, 0x12): /* vmovsldup {x,y}mm/mem,{x,y}mm */ |
3461 | 200 | case X86EMUL_OPC_F2(0x0f, 0x12): /* movddup xmm/m64,xmm */ |
3462 | 265 | case X86EMUL_OPC_VEX_F2(0x0f, 0x12): /* vmovddup {x,y}mm/mem,{x,y}mm */ |
3463 | 331 | case X86EMUL_OPC_F3(0x0f, 0x16): /* movshdup xmm/m128,xmm */ |
3464 | 365 | case X86EMUL_OPC_VEX_F3(0x0f, 0x16): /* vmovshdup {x,y}mm/mem,{x,y}mm */ |
3465 | 365 | d |= TwoOp; |
3466 | 365 | op_bytes = !(vex.pfx & VEX_PREFIX_DOUBLE_MASK) || vex.l |
3467 | 365 | ? 16 << vex.l : 8; |
3468 | 2.20k | simd_0f_sse3_avx: |
3469 | 2.20k | if ( vex.opcx != vex_none ) |
3470 | 1.19k | goto simd_0f_avx; |
3471 | 1.01k | host_and_vcpu_must_have(sse3); |
3472 | 1.01k | goto simd_0f_xmm; |
3473 | | |
3474 | 1.01k | case X86EMUL_OPC_EVEX_F3(0x0f, 0x12): /* vmovsldup [xyz]mm/mem,[xyz]mm{k} */ |
3475 | 4 | case X86EMUL_OPC_EVEX_F2(0x0f, 0x12): /* vmovddup [xyz]mm/mem,[xyz]mm{k} */ |
3476 | 5 | case X86EMUL_OPC_EVEX_F3(0x0f, 0x16): /* vmovshdup [xyz]mm/mem,[xyz]mm{k} */ |
3477 | 5 | generate_exception_if((evex.brs || |
3478 | 5 | evex.w != (evex.pfx & VEX_PREFIX_DOUBLE_MASK)), |
3479 | 5 | X86_EXC_UD); |
3480 | 1 | host_and_vcpu_must_have(avx512f); |
3481 | 0 | avx512_vlen_check(false); |
3482 | 0 | d |= TwoOp; |
3483 | 0 | op_bytes = !(evex.pfx & VEX_PREFIX_DOUBLE_MASK) || evex.lr |
3484 | 0 | ? 16 << evex.lr : 8; |
3485 | 0 | fault_suppression = false; |
3486 | 0 | goto simd_zmm; |
3487 | | |
3488 | 3 | CASE_SIMD_PACKED_FP(_EVEX, 0x0f, 0x14): /* vunpcklp{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
3489 | 7 | CASE_SIMD_PACKED_FP(_EVEX, 0x0f, 0x15): /* vunpckhp{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
3490 | 7 | generate_exception_if(evex.w != (evex.pfx & VEX_PREFIX_DOUBLE_MASK), |
3491 | 7 | X86_EXC_UD); |
3492 | | /* fall through */ |
3493 | 3 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x76): /* vpermi2{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
3494 | 4 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x77): /* vpermi2p{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
3495 | 5 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x7e): /* vpermt2{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
3496 | 6 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x7f): /* vpermt2p{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
3497 | 6 | fault_suppression = false; |
3498 | | /* fall through */ |
3499 | 7 | case X86EMUL_OPC_EVEX_66(0x0f, 0xdb): /* vpand{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
3500 | 8 | case X86EMUL_OPC_EVEX_66(0x0f, 0xdf): /* vpandn{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
3501 | 9 | case X86EMUL_OPC_EVEX_66(0x0f, 0xeb): /* vpor{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
3502 | 10 | case X86EMUL_OPC_EVEX_66(0x0f, 0xef): /* vpxor{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
3503 | 11 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x14): /* vprorv{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
3504 | 12 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x15): /* vprolv{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
3505 | 13 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x39): /* vpmins{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
3506 | 14 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x3b): /* vpminu{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
3507 | 15 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x3d): /* vpmaxs{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
3508 | 16 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x3f): /* vpmaxu{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
3509 | 17 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x45): /* vpsrlv{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
3510 | 18 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x46): /* vpsrav{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
3511 | 19 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x47): /* vpsllv{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
3512 | 20 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x4c): /* vrcp14p{s,d} [xyz]mm/mem,[xyz]mm{k} */ |
3513 | 21 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x4e): /* vrsqrt14p{s,d} [xyz]mm/mem,[xyz]mm{k} */ |
3514 | 22 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x64): /* vpblendm{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
3515 | 23 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x65): /* vblendmp{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
3516 | 60 | avx512f_no_sae: |
3517 | 60 | host_and_vcpu_must_have(avx512f); |
3518 | 0 | generate_exception_if(ea.type != OP_MEM && evex.brs, X86_EXC_UD); |
3519 | 0 | avx512_vlen_check(false); |
3520 | 0 | goto simd_zmm; |
3521 | | |
3522 | 0 | #endif /* !X86EMUL_NO_SIMD */ |
3523 | | |
3524 | 317 | case X86EMUL_OPC(0x0f, 0x20): /* mov cr,reg */ |
3525 | 320 | case X86EMUL_OPC(0x0f, 0x21): /* mov dr,reg */ |
3526 | 428 | case X86EMUL_OPC(0x0f, 0x22): /* mov reg,cr */ |
3527 | 430 | case X86EMUL_OPC(0x0f, 0x23): /* mov reg,dr */ |
3528 | 430 | generate_exception_if(!mode_ring0(), X86_EXC_GP, 0); |
3529 | 426 | if ( b & 2 ) |
3530 | 106 | { |
3531 | | /* Write to CR/DR. */ |
3532 | 106 | typeof(ops->write_cr) write = (b & 1) ? ops->write_dr |
3533 | 106 | : ops->write_cr; |
3534 | | |
3535 | 106 | fail_if(!write); |
3536 | 105 | rc = write(modrm_reg, src.val, ctxt); |
3537 | 105 | } |
3538 | 320 | else |
3539 | 320 | { |
3540 | | /* Read from CR/DR. */ |
3541 | 320 | typeof(ops->read_cr) read = (b & 1) ? ops->read_dr : ops->read_cr; |
3542 | | |
3543 | 320 | fail_if(!read); |
3544 | 318 | rc = read(modrm_reg, &dst.val, ctxt); |
3545 | 318 | } |
3546 | 423 | if ( rc != X86EMUL_OKAY ) |
3547 | 4 | goto done; |
3548 | 419 | break; |
3549 | | |
3550 | 419 | #if !defined(X86EMUL_NO_MMX) && !defined(X86EMUL_NO_SIMD) |
3551 | | |
3552 | 420 | case X86EMUL_OPC_66(0x0f, 0x2a): /* cvtpi2pd mm/m64,xmm */ |
3553 | 420 | if ( ea.type == OP_REG ) |
3554 | 215 | { |
3555 | 574 | case X86EMUL_OPC(0x0f, 0x2a): /* cvtpi2ps mm/m64,xmm */ |
3556 | 1.61k | CASE_SIMD_PACKED_FP(, 0x0f, 0x2c): /* cvttp{s,d}2pi xmm/mem,mm */ |
3557 | 2.01k | CASE_SIMD_PACKED_FP(, 0x0f, 0x2d): /* cvtp{s,d}2pi xmm/mem,mm */ |
3558 | 2.01k | host_and_vcpu_must_have(mmx); |
3559 | 2.01k | } |
3560 | 1.24k | op_bytes = (b & 4) && (vex.pfx & VEX_PREFIX_DOUBLE_MASK) ? 16 : 8; |
3561 | 1.24k | goto simd_0f_fp; |
3562 | | |
3563 | 0 | #endif /* !X86EMUL_NO_MMX && !X86EMUL_NO_SIMD */ |
3564 | | |
3565 | 2.90k | CASE_SIMD_SCALAR_FP_VEX(0x0f, 0x2a): /* {,v}cvtsi2s{s,d} r/m,xmm */ |
3566 | 2.90k | if ( vex.opcx == vex_none ) |
3567 | 750 | { |
3568 | 750 | if ( vex.pfx & VEX_PREFIX_DOUBLE_MASK ) |
3569 | 750 | vcpu_must_have(sse2); |
3570 | 402 | else |
3571 | 750 | vcpu_must_have(sse); |
3572 | 750 | get_fpu(X86EMUL_FPU_xmm); |
3573 | 750 | } |
3574 | 162 | else |
3575 | 162 | { |
3576 | 162 | host_and_vcpu_must_have(avx); |
3577 | 162 | get_fpu(X86EMUL_FPU_ymm); |
3578 | 162 | } |
3579 | | |
3580 | 909 | if ( ea.type == OP_MEM ) |
3581 | 202 | { |
3582 | 202 | rc = read_ulong(ea.mem.seg, ea.mem.off, &src.val, |
3583 | 202 | rex_prefix & REX_W ? 8 : 4, ctxt, ops); |
3584 | 202 | if ( rc != X86EMUL_OKAY ) |
3585 | 16 | goto done; |
3586 | 202 | } |
3587 | 707 | else |
3588 | 707 | src.val = rex_prefix & REX_W ? *ea.reg : (uint32_t)*ea.reg; |
3589 | | |
3590 | 893 | state->simd_size = simd_none; |
3591 | 893 | goto simd_0f_rm; |
3592 | | |
3593 | 0 | #ifndef X86EMUL_NO_SIMD |
3594 | | |
3595 | 1 | case X86EMUL_OPC_EVEX_F3(5, 0x2a): /* vcvtsi2sh r/m,xmm,xmm */ |
3596 | 2 | case X86EMUL_OPC_EVEX_F3(5, 0x7b): /* vcvtusi2sh r/m,xmm,xmm */ |
3597 | 2 | host_and_vcpu_must_have(avx512_fp16); |
3598 | | /* fall through */ |
3599 | 1 | CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x2a): /* vcvtsi2s{s,d} r/m,xmm,xmm */ |
3600 | 7 | CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x7b): /* vcvtusi2s{s,d} r/m,xmm,xmm */ |
3601 | 7 | generate_exception_if(evex.opmsk || (ea.type != OP_REG && evex.brs), |
3602 | 7 | X86_EXC_UD); |
3603 | 2 | host_and_vcpu_must_have(avx512f); |
3604 | 0 | if ( !evex.brs ) |
3605 | 0 | avx512_vlen_check(true); |
3606 | 0 | get_fpu(X86EMUL_FPU_zmm); |
3607 | | |
3608 | 0 | if ( ea.type == OP_MEM ) |
3609 | 0 | { |
3610 | 0 | rc = read_ulong(ea.mem.seg, ea.mem.off, &src.val, |
3611 | 0 | rex_prefix & REX_W ? 8 : 4, ctxt, ops); |
3612 | 0 | if ( rc != X86EMUL_OKAY ) |
3613 | 0 | goto done; |
3614 | 0 | } |
3615 | 0 | else |
3616 | 0 | src.val = *ea.reg; |
3617 | | |
3618 | 0 | opc = init_evex(stub); |
3619 | 0 | opc[0] = b; |
3620 | | /* Convert memory/GPR source to %rAX. */ |
3621 | 0 | evex.b = 1; |
3622 | 0 | if ( !mode_64bit() ) |
3623 | 0 | evex.w = 0; |
3624 | | /* |
3625 | | * While SDM version 085 has explicit wording towards embedded rounding |
3626 | | * being ignored, it's still not entirely unambiguous with the exception |
3627 | | * type referred to. Be on the safe side for the stub. |
3628 | | */ |
3629 | 0 | if ( !evex.w && evex.pfx == vex_f2 ) |
3630 | 0 | { |
3631 | 0 | evex.brs = 0; |
3632 | 0 | evex.lr = 0; |
3633 | 0 | } |
3634 | 0 | opc[1] = (modrm & 0x38) | 0xc0; |
3635 | 0 | insn_bytes = EVEX_PFX_BYTES + 2; |
3636 | 0 | opc[2] = 0xc3; |
3637 | |
|
3638 | 0 | copy_EVEX(opc, evex); |
3639 | 0 | invoke_stub("", "", "=g" (dummy) : "a" (src.val)); |
3640 | |
|
3641 | 0 | put_stub(stub); |
3642 | 0 | state->simd_size = simd_none; |
3643 | 0 | break; |
3644 | | |
3645 | 1.64k | CASE_SIMD_SCALAR_FP_VEX(0x0f, 0x2c): /* {,v}cvtts{s,d}2si xmm/mem,reg */ |
3646 | 5.12k | CASE_SIMD_SCALAR_FP_VEX(0x0f, 0x2d): /* {,v}cvts{s,d}2si xmm/mem,reg */ |
3647 | 5.12k | if ( vex.opcx == vex_none ) |
3648 | 1.01k | { |
3649 | 1.01k | if ( vex.pfx & VEX_PREFIX_DOUBLE_MASK ) |
3650 | 1.01k | vcpu_must_have(sse2); |
3651 | 493 | else |
3652 | 1.01k | vcpu_must_have(sse); |
3653 | 1.01k | get_fpu(X86EMUL_FPU_xmm); |
3654 | 1.01k | } |
3655 | 778 | else |
3656 | 778 | { |
3657 | 778 | generate_exception_if(vex.reg != 0xf, X86_EXC_UD); |
3658 | 772 | host_and_vcpu_must_have(avx); |
3659 | 772 | get_fpu(X86EMUL_FPU_ymm); |
3660 | | |
3661 | | /* Work around erratum BT230. */ |
3662 | 771 | vex.l = 0; |
3663 | 771 | } |
3664 | | |
3665 | 1.78k | opc = init_prefixes(stub); |
3666 | 1.78k | cvts_2si: |
3667 | 1.78k | opc[0] = b; |
3668 | | /* Convert GPR destination to %rAX and memory operand to (%rCX). */ |
3669 | 1.78k | rex_prefix &= ~REX_R; |
3670 | 1.78k | vex.r = 1; |
3671 | 1.78k | evex.r = 1; |
3672 | 1.78k | if ( ea.type == OP_MEM ) |
3673 | 1.20k | { |
3674 | 1.20k | rex_prefix &= ~REX_B; |
3675 | 1.20k | vex.b = 1; |
3676 | 1.20k | evex.b = 1; |
3677 | 1.20k | opc[1] = 0x01; |
3678 | | |
3679 | 1.20k | rc = ops->read(ea.mem.seg, ea.mem.off, mmvalp, |
3680 | 1.20k | vex.pfx & VEX_PREFIX_DOUBLE_MASK |
3681 | 1.20k | ? 8 : 2 << !state->fp16, |
3682 | 1.20k | ctxt); |
3683 | 1.20k | if ( rc != X86EMUL_OKAY ) |
3684 | 30 | goto done; |
3685 | 1.20k | } |
3686 | 579 | else |
3687 | 579 | opc[1] = modrm & 0xc7; |
3688 | 1.75k | if ( !mode_64bit() ) |
3689 | 1.08k | { |
3690 | 1.08k | vex.w = 0; |
3691 | 1.08k | evex.w = 0; |
3692 | 1.08k | } |
3693 | 1.75k | if ( evex_encoded() ) |
3694 | 0 | { |
3695 | 0 | insn_bytes = EVEX_PFX_BYTES + 2; |
3696 | 0 | copy_EVEX(opc, evex); |
3697 | 0 | } |
3698 | 1.75k | else |
3699 | 1.75k | { |
3700 | 1.75k | insn_bytes = PFX_BYTES + 2; |
3701 | 1.75k | copy_REX_VEX(opc, rex_prefix, vex); |
3702 | 1.75k | } |
3703 | 1.75k | opc[2] = 0xc3; |
3704 | | |
3705 | 1.75k | ea.reg = decode_gpr(&_regs, modrm_reg); |
3706 | 1.75k | invoke_stub("", "", "=a" (*ea.reg) : "c" (mmvalp), "m" (*mmvalp)); |
3707 | | |
3708 | 1.75k | put_stub(stub); |
3709 | 1.75k | state->simd_size = simd_none; |
3710 | 1.75k | break; |
3711 | | |
3712 | 1 | case X86EMUL_OPC_EVEX_F3(5, 0x2c): /* vcvttsh2si xmm/mem,reg */ |
3713 | 2 | case X86EMUL_OPC_EVEX_F3(5, 0x2d): /* vcvtsh2si xmm/mem,reg */ |
3714 | 3 | case X86EMUL_OPC_EVEX_F3(5, 0x78): /* vcvttsh2usi xmm/mem,reg */ |
3715 | 4 | case X86EMUL_OPC_EVEX_F3(5, 0x79): /* vcvtsh2usi xmm/mem,reg */ |
3716 | 4 | host_and_vcpu_must_have(avx512_fp16); |
3717 | | /* fall through */ |
3718 | 7 | CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x2c): /* vcvtts{s,d}2si xmm/mem,reg */ |
3719 | 13 | CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x2d): /* vcvts{s,d}2si xmm/mem,reg */ |
3720 | 18 | CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x78): /* vcvtts{s,d}2usi xmm/mem,reg */ |
3721 | 23 | CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x79): /* vcvts{s,d}2usi xmm/mem,reg */ |
3722 | 23 | generate_exception_if((evex.reg != 0xf || !evex.RX || !evex.R || |
3723 | 23 | evex.opmsk || |
3724 | 23 | (ea.type != OP_REG && evex.brs)), |
3725 | 23 | X86_EXC_UD); |
3726 | 2 | host_and_vcpu_must_have(avx512f); |
3727 | 0 | if ( !evex.brs ) |
3728 | 0 | avx512_vlen_check(true); |
3729 | 0 | get_fpu(X86EMUL_FPU_zmm); |
3730 | 0 | opc = init_evex(stub); |
3731 | 0 | goto cvts_2si; |
3732 | | |
3733 | 1.99k | CASE_SIMD_PACKED_FP_VEX(0x0f, 0x2e): /* {,v}ucomis{s,d} xmm/mem,xmm */ |
3734 | 5.17k | CASE_SIMD_PACKED_FP_VEX(0x0f, 0x2f): /* {,v}comis{s,d} xmm/mem,xmm */ |
3735 | 5.17k | if ( vex.opcx == vex_none ) |
3736 | 781 | { |
3737 | 781 | if ( vex.pfx ) |
3738 | 781 | vcpu_must_have(sse2); |
3739 | 373 | else |
3740 | 781 | vcpu_must_have(sse); |
3741 | 781 | get_fpu(X86EMUL_FPU_xmm); |
3742 | 781 | } |
3743 | 885 | else |
3744 | 885 | { |
3745 | 885 | generate_exception_if(vex.reg != 0xf, X86_EXC_UD); |
3746 | 881 | host_and_vcpu_must_have(avx); |
3747 | 881 | get_fpu(X86EMUL_FPU_ymm); |
3748 | 881 | } |
3749 | | |
3750 | 1.66k | opc = init_prefixes(stub); |
3751 | 1.66k | op_bytes = 4 << vex.pfx; |
3752 | 1.66k | vcomi: |
3753 | 1.66k | opc[0] = b; |
3754 | 1.66k | opc[1] = modrm; |
3755 | 1.66k | if ( ea.type == OP_MEM ) |
3756 | 1.24k | { |
3757 | 1.24k | rc = ops->read(ea.mem.seg, ea.mem.off, mmvalp, op_bytes, ctxt); |
3758 | 1.24k | if ( rc != X86EMUL_OKAY ) |
3759 | 24 | goto done; |
3760 | | |
3761 | | /* Convert memory operand to (%rAX). */ |
3762 | 1.21k | rex_prefix &= ~REX_B; |
3763 | 1.21k | vex.b = 1; |
3764 | 1.21k | evex.b = 1; |
3765 | 1.21k | opc[1] &= 0x38; |
3766 | 1.21k | } |
3767 | 1.63k | if ( evex_encoded() ) |
3768 | 0 | { |
3769 | 0 | insn_bytes = EVEX_PFX_BYTES + 2; |
3770 | 0 | copy_EVEX(opc, evex); |
3771 | 0 | } |
3772 | 1.63k | else |
3773 | 1.63k | { |
3774 | 1.63k | insn_bytes = PFX_BYTES + 2; |
3775 | 1.63k | copy_REX_VEX(opc, rex_prefix, vex); |
3776 | 1.63k | } |
3777 | 1.63k | opc[2] = 0xc3; |
3778 | | |
3779 | 1.63k | _regs.eflags &= ~EFLAGS_MASK; |
3780 | 1.63k | invoke_stub("", |
3781 | 1.63k | _POST_EFLAGS("[eflags]", "[mask]", "[tmp]"), |
3782 | 1.63k | [eflags] "+g" (_regs.eflags), |
3783 | 1.63k | [tmp] "=&r" (dummy), "+m" (*mmvalp) |
3784 | 1.63k | : "a" (mmvalp), [mask] "i" (EFLAGS_MASK)); |
3785 | | |
3786 | 1.63k | put_stub(stub); |
3787 | 1.63k | ASSERT(!state->simd_size); |
3788 | 1.63k | break; |
3789 | | |
3790 | 1.63k | case X86EMUL_OPC_EVEX(5, 0x2e): /* vucomish xmm/m16,xmm */ |
3791 | 2 | case X86EMUL_OPC_EVEX(5, 0x2f): /* vcomish xmm/m16,xmm */ |
3792 | 2 | host_and_vcpu_must_have(avx512_fp16); |
3793 | 0 | generate_exception_if(evex.w, X86_EXC_UD); |
3794 | | /* fall through */ |
3795 | 5 | CASE_SIMD_PACKED_FP(_EVEX, 0x0f, 0x2e): /* vucomis{s,d} xmm/mem,xmm */ |
3796 | 14 | CASE_SIMD_PACKED_FP(_EVEX, 0x0f, 0x2f): /* vcomis{s,d} xmm/mem,xmm */ |
3797 | 14 | generate_exception_if((evex.reg != 0xf || !evex.RX || evex.opmsk || |
3798 | 14 | (ea.type != OP_REG && evex.brs) || |
3799 | 14 | evex.w != evex.pfx), |
3800 | 14 | X86_EXC_UD); |
3801 | 1 | host_and_vcpu_must_have(avx512f); |
3802 | 0 | if ( !evex.brs ) |
3803 | 0 | avx512_vlen_check(true); |
3804 | 0 | get_fpu(X86EMUL_FPU_zmm); |
3805 | | |
3806 | 0 | opc = init_evex(stub); |
3807 | 0 | op_bytes = 2 << (!state->fp16 + evex.w); |
3808 | 0 | goto vcomi; |
3809 | | |
3810 | 0 | #endif |
3811 | | |
3812 | 1.39k | case X86EMUL_OPC(0x0f, 0x30): /* wrmsr */ |
3813 | 1.39k | generate_exception_if(!mode_ring0(), X86_EXC_GP, 0); |
3814 | 1.38k | fail_if(ops->write_msr == NULL); |
3815 | 1.38k | if ( (rc = ops->write_msr(_regs.ecx, |
3816 | 1.38k | ((uint64_t)_regs.r(dx) << 32) | _regs.eax, |
3817 | 1.38k | ctxt)) != 0 ) |
3818 | 118 | goto done; |
3819 | 1.26k | break; |
3820 | | |
3821 | 1.26k | case X86EMUL_OPC(0x0f, 0x31): rdtsc: /* rdtsc */ |
3822 | 522 | if ( !mode_ring0() ) |
3823 | 253 | { |
3824 | 253 | fail_if(ops->read_cr == NULL); |
3825 | 252 | if ( (rc = ops->read_cr(4, &cr4, ctxt)) ) |
3826 | 0 | goto done; |
3827 | 252 | generate_exception_if(cr4 & X86_CR4_TSD, X86_EXC_GP, 0); |
3828 | 252 | } |
3829 | 520 | fail_if(ops->read_msr == NULL); |
3830 | 519 | if ( (rc = ops->read_msr(MSR_IA32_TSC, |
3831 | 519 | &msr_val, ctxt)) != X86EMUL_OKAY ) |
3832 | 21 | goto done; |
3833 | 498 | _regs.r(dx) = msr_val >> 32; |
3834 | 498 | _regs.r(ax) = (uint32_t)msr_val; |
3835 | 498 | break; |
3836 | | |
3837 | 329 | case X86EMUL_OPC(0x0f, 0x32): /* rdmsr */ |
3838 | 329 | generate_exception_if(!mode_ring0(), X86_EXC_GP, 0); |
3839 | 325 | fail_if(ops->read_msr == NULL); |
3840 | 324 | if ( (rc = ops->read_msr(_regs.ecx, &msr_val, ctxt)) != X86EMUL_OKAY ) |
3841 | 118 | goto done; |
3842 | 206 | _regs.r(dx) = msr_val >> 32; |
3843 | 206 | _regs.r(ax) = (uint32_t)msr_val; |
3844 | 206 | break; |
3845 | | |
3846 | 856 | case X86EMUL_OPC(0x0f, 0x34): /* sysenter */ |
3847 | 856 | vcpu_must_have(sep); |
3848 | 856 | generate_exception_if(amd_like(ctxt) && ctxt->lma, X86_EXC_UD); |
3849 | 808 | generate_exception_if(!in_protmode(ctxt, ops), X86_EXC_GP, 0); |
3850 | | |
3851 | 807 | fail_if(ops->read_msr == NULL); |
3852 | 807 | if ( (rc = ops->read_msr(MSR_IA32_SYSENTER_CS, |
3853 | 807 | &msr_val, ctxt)) != X86EMUL_OKAY ) |
3854 | 0 | goto done; |
3855 | | |
3856 | 807 | generate_exception_if(!(msr_val & 0xfffc), X86_EXC_GP, 0); |
3857 | | |
3858 | 806 | _regs.eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF | X86_EFLAGS_RF); |
3859 | | |
3860 | 806 | cs.sel = msr_val & ~3; /* SELECTOR_RPL_MASK */ |
3861 | 806 | cs.base = 0; /* flat segment */ |
3862 | 806 | cs.limit = ~0u; /* 4GB limit */ |
3863 | 806 | cs.attr = ctxt->lma ? 0xa9b /* G+L+P+S+Code */ |
3864 | 806 | : 0xc9b; /* G+DB+P+S+Code */ |
3865 | | |
3866 | 806 | sreg.sel = cs.sel + 8; |
3867 | 806 | sreg.base = 0; /* flat segment */ |
3868 | 806 | sreg.limit = ~0u; /* 4GB limit */ |
3869 | 806 | sreg.attr = 0xc93; /* G+DB+P+S+Data */ |
3870 | | |
3871 | 806 | if ( (rc = ops->read_msr(MSR_IA32_SYSENTER_EIP, |
3872 | 806 | &msr_val, ctxt)) != X86EMUL_OKAY ) |
3873 | 0 | goto done; |
3874 | 806 | _regs.r(ip) = ctxt->lma ? msr_val : (uint32_t)msr_val; |
3875 | | |
3876 | 806 | if ( (rc = ops->read_msr(MSR_IA32_SYSENTER_ESP, |
3877 | 806 | &msr_val, ctxt)) != X86EMUL_OKAY ) |
3878 | 0 | goto done; |
3879 | 806 | _regs.r(sp) = ctxt->lma ? msr_val : (uint32_t)msr_val; |
3880 | | |
3881 | 806 | fail_if(!ops->write_segment); |
3882 | 806 | if ( (rc = ops->write_segment(x86_seg_cs, &cs, |
3883 | 806 | ctxt)) != X86EMUL_OKAY || |
3884 | 806 | (rc = ops->write_segment(x86_seg_ss, &sreg, |
3885 | 803 | ctxt)) != X86EMUL_OKAY ) |
3886 | 4 | goto done; |
3887 | | |
3888 | 802 | if ( ctxt->lma ) |
3889 | | /* In particular mode_64bit() needs to return true from here on. */ |
3890 | 0 | ctxt->addr_size = ctxt->sp_size = 64; |
3891 | | |
3892 | 802 | singlestep = _regs.eflags & X86_EFLAGS_TF; |
3893 | 802 | break; |
3894 | | |
3895 | 272 | case X86EMUL_OPC(0x0f, 0x35): /* sysexit */ |
3896 | 272 | vcpu_must_have(sep); |
3897 | 272 | generate_exception_if(amd_like(ctxt) && ctxt->lma, X86_EXC_UD); |
3898 | 213 | generate_exception_if(!mode_ring0(), X86_EXC_GP, 0); |
3899 | 209 | generate_exception_if(!in_protmode(ctxt, ops), X86_EXC_GP, 0); |
3900 | | |
3901 | 208 | fail_if(ops->read_msr == NULL); |
3902 | 207 | if ( (rc = ops->read_msr(MSR_IA32_SYSENTER_CS, |
3903 | 207 | &msr_val, ctxt)) != X86EMUL_OKAY ) |
3904 | 0 | goto done; |
3905 | | |
3906 | 207 | generate_exception_if(!(msr_val & 0xfffc), X86_EXC_GP, 0); |
3907 | 207 | generate_exception_if(op_bytes == 8 && |
3908 | 207 | (!is_canonical_address(_regs.r(dx)) || |
3909 | 207 | !is_canonical_address(_regs.r(cx))), |
3910 | 207 | X86_EXC_GP, 0); |
3911 | | |
3912 | 207 | cs.sel = (msr_val | 3) + /* SELECTOR_RPL_MASK */ |
3913 | 207 | (op_bytes == 8 ? 32 : 16); |
3914 | 207 | cs.base = 0; /* flat segment */ |
3915 | 207 | cs.limit = ~0u; /* 4GB limit */ |
3916 | 207 | cs.attr = op_bytes == 8 ? 0xafb /* L+DB+P+DPL3+S+Code */ |
3917 | 207 | : 0xcfb; /* G+DB+P+DPL3+S+Code */ |
3918 | | |
3919 | 207 | sreg.sel = cs.sel + 8; |
3920 | 207 | sreg.base = 0; /* flat segment */ |
3921 | 207 | sreg.limit = ~0u; /* 4GB limit */ |
3922 | 207 | sreg.attr = 0xcf3; /* G+DB+P+DPL3+S+Data */ |
3923 | | |
3924 | 207 | fail_if(ops->write_segment == NULL); |
3925 | 206 | if ( (rc = ops->write_segment(x86_seg_cs, &cs, ctxt)) != 0 || |
3926 | 206 | (rc = ops->write_segment(x86_seg_ss, &sreg, ctxt)) != 0 ) |
3927 | 3 | goto done; |
3928 | | |
3929 | 203 | _regs.r(ip) = op_bytes == 8 ? _regs.r(dx) : _regs.edx; |
3930 | 203 | _regs.r(sp) = op_bytes == 8 ? _regs.r(cx) : _regs.ecx; |
3931 | | |
3932 | 203 | singlestep = _regs.eflags & X86_EFLAGS_TF; |
3933 | 203 | break; |
3934 | | |
3935 | 3.34k | case X86EMUL_OPC(0x0f, 0x40) ... X86EMUL_OPC(0x0f, 0x4f): /* cmovcc */ |
3936 | 3.34k | vcpu_must_have(cmov); |
3937 | 3.34k | if ( test_cc(b, _regs.eflags) ) |
3938 | 1.58k | dst.val = src.val; |
3939 | 3.34k | break; |
3940 | | |
3941 | 0 | #ifndef X86EMUL_NO_SIMD |
3942 | | |
3943 | 10 | case X86EMUL_OPC_VEX(0x0f, 0x4a): /* kadd{w,q} k,k,k */ |
3944 | 10 | if ( !vex.w ) |
3945 | 10 | host_and_vcpu_must_have(avx512dq); |
3946 | | /* fall through */ |
3947 | 8 | case X86EMUL_OPC_VEX(0x0f, 0x41): /* kand{w,q} k,k,k */ |
3948 | 11 | case X86EMUL_OPC_VEX_66(0x0f, 0x41): /* kand{b,d} k,k,k */ |
3949 | 13 | case X86EMUL_OPC_VEX(0x0f, 0x42): /* kandn{w,q} k,k,k */ |
3950 | 14 | case X86EMUL_OPC_VEX_66(0x0f, 0x42): /* kandn{b,d} k,k,k */ |
3951 | 16 | case X86EMUL_OPC_VEX(0x0f, 0x45): /* kor{w,q} k,k,k */ |
3952 | 17 | case X86EMUL_OPC_VEX_66(0x0f, 0x45): /* kor{b,d} k,k,k */ |
3953 | 18 | case X86EMUL_OPC_VEX(0x0f, 0x46): /* kxnor{w,q} k,k,k */ |
3954 | 19 | case X86EMUL_OPC_VEX_66(0x0f, 0x46): /* kxnor{b,d} k,k,k */ |
3955 | 21 | case X86EMUL_OPC_VEX(0x0f, 0x47): /* kxor{w,q} k,k,k */ |
3956 | 22 | case X86EMUL_OPC_VEX_66(0x0f, 0x47): /* kxor{b,d} k,k,k */ |
3957 | 25 | case X86EMUL_OPC_VEX_66(0x0f, 0x4a): /* kadd{b,d} k,k,k */ |
3958 | 25 | generate_exception_if(!vex.l, X86_EXC_UD); |
3959 | 16 | opmask_basic: |
3960 | 16 | if ( vex.w ) |
3961 | 16 | host_and_vcpu_must_have(avx512bw); |
3962 | 6 | else if ( vex.pfx ) |
3963 | 1 | host_and_vcpu_must_have(avx512dq); |
3964 | 9 | opmask_common: |
3965 | 9 | host_and_vcpu_must_have(avx512f); |
3966 | 0 | generate_exception_if(!vex.r || (mode_64bit() && !(vex.reg & 8)) || |
3967 | 0 | ea.type != OP_REG, X86_EXC_UD); |
3968 | | |
3969 | 0 | vex.reg |= 8; |
3970 | 0 | d &= ~TwoOp; |
3971 | |
|
3972 | 0 | get_fpu(X86EMUL_FPU_opmask); |
3973 | | |
3974 | 0 | opc = init_prefixes(stub); |
3975 | 0 | opc[0] = b; |
3976 | 0 | opc[1] = modrm; |
3977 | 0 | insn_bytes = PFX_BYTES + 2; |
3978 | |
|
3979 | 0 | state->simd_size = simd_other; |
3980 | 0 | op_bytes = 1; /* Any non-zero value will do. */ |
3981 | 0 | break; |
3982 | | |
3983 | 3 | case X86EMUL_OPC_VEX(0x0f, 0x44): /* knot{w,q} k,k */ |
3984 | 6 | case X86EMUL_OPC_VEX_66(0x0f, 0x44): /* knot{b,d} k,k */ |
3985 | 6 | generate_exception_if(vex.l || vex.reg != 0xf, X86_EXC_UD); |
3986 | 1 | goto opmask_basic; |
3987 | | |
3988 | 2 | case X86EMUL_OPC_VEX(0x0f, 0x4b): /* kunpck{w,d}{d,q} k,k,k */ |
3989 | 2 | generate_exception_if(!vex.l, X86_EXC_UD); |
3990 | 1 | host_and_vcpu_must_have(avx512bw); |
3991 | 0 | goto opmask_common; |
3992 | | |
3993 | 10 | case X86EMUL_OPC_VEX_66(0x0f, 0x4b): /* kunpckbw k,k,k */ |
3994 | 10 | generate_exception_if(!vex.l || vex.w, X86_EXC_UD); |
3995 | 4 | goto opmask_common; |
3996 | | |
3997 | 4 | #endif /* X86EMUL_NO_SIMD */ |
3998 | | |
3999 | 3.04k | CASE_SIMD_PACKED_FP_VEX(0x0f, 0x50): /* movmskp{s,d} xmm,reg */ |
4000 | | /* vmovmskp{s,d} {x,y}mm,reg */ |
4001 | 3.60k | CASE_SIMD_PACKED_INT_VEX(0x0f, 0xd7): /* pmovmskb {,x}mm,reg */ |
4002 | | /* vpmovmskb {x,y}mm,reg */ |
4003 | 3.60k | opc = init_prefixes(stub); |
4004 | 0 | opc[0] = b; |
4005 | | /* Convert GPR destination to %rAX. */ |
4006 | 1.31k | rex_prefix &= ~REX_R; |
4007 | 1.31k | vex.r = 1; |
4008 | 1.31k | if ( !mode_64bit() ) |
4009 | 790 | vex.w = 0; |
4010 | 1.31k | opc[1] = modrm & 0xc7; |
4011 | 1.31k | insn_bytes = PFX_BYTES + 2; |
4012 | 1.94k | simd_0f_to_gpr: |
4013 | 1.94k | opc[insn_bytes - PFX_BYTES] = 0xc3; |
4014 | | |
4015 | 1.94k | generate_exception_if(ea.type != OP_REG, X86_EXC_UD); |
4016 | | |
4017 | 1.91k | if ( vex.opcx == vex_none ) |
4018 | 1.25k | { |
4019 | 1.25k | if ( vex.pfx & VEX_PREFIX_DOUBLE_MASK ) |
4020 | 1.25k | vcpu_must_have(sse2); |
4021 | 681 | else |
4022 | 681 | { |
4023 | 681 | if ( b != 0x50 ) |
4024 | 189 | { |
4025 | 189 | host_and_vcpu_must_have(mmx); |
4026 | 189 | vcpu_must_have(mmxext); |
4027 | 189 | } |
4028 | 492 | else |
4029 | 681 | vcpu_must_have(sse); |
4030 | 681 | } |
4031 | 1.25k | if ( b == 0x50 || (vex.pfx & VEX_PREFIX_DOUBLE_MASK) ) |
4032 | 1.06k | get_fpu(X86EMUL_FPU_xmm); |
4033 | 189 | else |
4034 | 189 | get_fpu(X86EMUL_FPU_mmx); |
4035 | 1.25k | } |
4036 | 661 | else |
4037 | 661 | { |
4038 | 661 | generate_exception_if(vex.reg != 0xf, X86_EXC_UD); |
4039 | 657 | if ( b == 0x50 || !vex.l ) |
4040 | 657 | host_and_vcpu_must_have(avx); |
4041 | 63 | else |
4042 | 657 | host_and_vcpu_must_have(avx2); |
4043 | 657 | get_fpu(X86EMUL_FPU_ymm); |
4044 | 657 | } |
4045 | | |
4046 | 1.91k | copy_REX_VEX(opc, rex_prefix, vex); |
4047 | 1.91k | invoke_stub("", "", "=a" (dst.val) : [dummy] "i" (0)); |
4048 | | |
4049 | 1.91k | put_stub(stub); |
4050 | | |
4051 | 1.91k | ASSERT(!state->simd_size); |
4052 | 1.91k | dst.bytes = 4; |
4053 | 1.91k | break; |
4054 | | |
4055 | 3 | CASE_SIMD_PACKED_FP(_EVEX, 0x0f, 0x54): /* vandp{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
4056 | 7 | CASE_SIMD_PACKED_FP(_EVEX, 0x0f, 0x55): /* vandnp{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
4057 | 11 | CASE_SIMD_PACKED_FP(_EVEX, 0x0f, 0x56): /* vorp{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
4058 | 19 | CASE_SIMD_PACKED_FP(_EVEX, 0x0f, 0x57): /* vxorp{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
4059 | 19 | generate_exception_if((evex.w != (evex.pfx & VEX_PREFIX_DOUBLE_MASK) || |
4060 | 19 | (ea.type != OP_MEM && evex.brs)), |
4061 | 19 | X86_EXC_UD); |
4062 | 5 | host_and_vcpu_must_have(avx512dq); |
4063 | 0 | avx512_vlen_check(false); |
4064 | 0 | goto simd_zmm; |
4065 | | |
4066 | 4.35k | CASE_SIMD_ALL_FP_VEX(0x0f, 0x5a): /* cvt{p,s}{s,d}2{p,s}{s,d} xmm/mem,xmm */ |
4067 | | /* vcvtp{s,d}2p{s,d} {x,y}mm/mem,{x,y}mm */ |
4068 | | /* vcvts{s,d}2s{s,d} xmm/mem,xmm,xmm */ |
4069 | 4.35k | op_bytes = 4 << (((vex.pfx & VEX_PREFIX_SCALAR_MASK) ? 0 : 1 + vex.l) + |
4070 | 4.35k | !!(vex.pfx & VEX_PREFIX_DOUBLE_MASK)); |
4071 | 4.35k | simd_0f_cvt: |
4072 | 2.08k | if ( vex.opcx == vex_none ) |
4073 | 1.22k | goto simd_0f_sse2; |
4074 | 852 | goto simd_0f_avx; |
4075 | | |
4076 | 852 | CASE_SIMD_ALL_FP(_EVEX, 0x0f, 0x5a): /* vcvtp{s,d}2p{s,d} [xyz]mm/mem,[xyz]mm{k} */ |
4077 | | /* vcvts{s,d}2s{s,d} xmm/mem,xmm,xmm{k} */ |
4078 | 5 | op_bytes = 4 << (((evex.pfx & VEX_PREFIX_SCALAR_MASK) ? 0 : 1 + evex.lr) + |
4079 | 5 | evex.w); |
4080 | 5 | goto avx512f_all_fp; |
4081 | | |
4082 | 0 | #ifndef X86EMUL_NO_SIMD |
4083 | | |
4084 | 2.17k | CASE_SIMD_PACKED_FP_VEX(0x0f, 0x5b): /* cvt{ps,dq}2{dq,ps} xmm/mem,xmm */ |
4085 | | /* vcvt{ps,dq}2{dq,ps} {x,y}mm/mem,{x,y}mm */ |
4086 | 2.17k | case X86EMUL_OPC_F3(0x0f, 0x5b): /* cvttps2dq xmm/mem,xmm */ |
4087 | 931 | case X86EMUL_OPC_VEX_F3(0x0f, 0x5b): /* vcvttps2dq {x,y}mm/mem,{x,y}mm */ |
4088 | 931 | d |= TwoOp; |
4089 | 931 | op_bytes = 16 << vex.l; |
4090 | 931 | goto simd_0f_cvt; |
4091 | | |
4092 | 2 | case X86EMUL_OPC_EVEX_66(0x0f, 0x5b): /* vcvtps2dq [xyz]mm/mem,[xyz]mm{k} */ |
4093 | 3 | case X86EMUL_OPC_EVEX_F3(0x0f, 0x5b): /* vcvttps2dq [xyz]mm/mem,[xyz]mm{k} */ |
4094 | 3 | generate_exception_if(evex.w, X86_EXC_UD); |
4095 | | /* fall through */ |
4096 | 4 | case X86EMUL_OPC_EVEX(0x0f, 0x5b): /* vcvtdq2ps [xyz]mm/mem,[xyz]mm{k} */ |
4097 | | /* vcvtqq2ps [xyz]mm/mem,{x,y}mm{k} */ |
4098 | 5 | case X86EMUL_OPC_EVEX_F2(0x0f, 0x7a): /* vcvtudq2ps [xyz]mm/mem,[xyz]mm{k} */ |
4099 | | /* vcvtuqq2ps [xyz]mm/mem,{x,y}mm{k} */ |
4100 | 5 | if ( evex.w ) |
4101 | 5 | host_and_vcpu_must_have(avx512dq); |
4102 | 4 | else |
4103 | 4 | { |
4104 | 5 | case X86EMUL_OPC_EVEX(0x0f, 0x78): /* vcvttp{s,d}2udq [xyz]mm/mem,[xyz]mm{k} */ |
4105 | 6 | case X86EMUL_OPC_EVEX(0x0f, 0x79): /* vcvtp{s,d}2udq [xyz]mm/mem,[xyz]mm{k} */ |
4106 | 6 | host_and_vcpu_must_have(avx512f); |
4107 | 6 | } |
4108 | 0 | if ( ea.type != OP_REG || !evex.brs ) |
4109 | 0 | avx512_vlen_check(false); |
4110 | 0 | d |= TwoOp; |
4111 | 0 | op_bytes = 16 << evex.lr; |
4112 | 0 | goto simd_zmm; |
4113 | | |
4114 | 0 | #endif /* !X86EMUL_NO_SIMD */ |
4115 | | |
4116 | 810 | CASE_SIMD_PACKED_INT_VEX(0x0f, 0x60): /* punpcklbw {,x}mm/mem,{,x}mm */ |
4117 | | /* vpunpcklbw {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4118 | 2.22k | CASE_SIMD_PACKED_INT_VEX(0x0f, 0x61): /* punpcklwd {,x}mm/mem,{,x}mm */ |
4119 | | /* vpunpcklwd {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4120 | 3.71k | CASE_SIMD_PACKED_INT_VEX(0x0f, 0x62): /* punpckldq {,x}mm/mem,{,x}mm */ |
4121 | | /* vpunpckldq {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4122 | 4.72k | CASE_SIMD_PACKED_INT_VEX(0x0f, 0x68): /* punpckhbw {,x}mm/mem,{,x}mm */ |
4123 | | /* vpunpckhbw {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4124 | 5.70k | CASE_SIMD_PACKED_INT_VEX(0x0f, 0x69): /* punpckhwd {,x}mm/mem,{,x}mm */ |
4125 | | /* vpunpckhwd {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4126 | 6.40k | CASE_SIMD_PACKED_INT_VEX(0x0f, 0x6a): /* punpckhdq {,x}mm/mem,{,x}mm */ |
4127 | | /* vpunpckhdq {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4128 | 6.40k | op_bytes = vex.pfx ? 16 << vex.l : b & 8 ? 8 : 4; |
4129 | | /* fall through */ |
4130 | 7.90k | CASE_SIMD_PACKED_INT_VEX(0x0f, 0x63): /* packssbw {,x}mm/mem,{,x}mm */ |
4131 | | /* vpackssbw {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4132 | 9.34k | CASE_SIMD_PACKED_INT_VEX(0x0f, 0x64): /* pcmpgtb {,x}mm/mem,{,x}mm */ |
4133 | | /* vpcmpgtb {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4134 | 10.7k | CASE_SIMD_PACKED_INT_VEX(0x0f, 0x65): /* pcmpgtw {,x}mm/mem,{,x}mm */ |
4135 | | /* vpcmpgtw {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4136 | 11.9k | CASE_SIMD_PACKED_INT_VEX(0x0f, 0x66): /* pcmpgtd {,x}mm/mem,{,x}mm */ |
4137 | | /* vpcmpgtd {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4138 | 13.2k | CASE_SIMD_PACKED_INT_VEX(0x0f, 0x67): /* packusbw {,x}mm/mem,{,x}mm */ |
4139 | | /* vpackusbw {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4140 | 14.4k | CASE_SIMD_PACKED_INT_VEX(0x0f, 0x6b): /* packsswd {,x}mm/mem,{,x}mm */ |
4141 | | /* vpacksswd {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4142 | 14.4k | #ifndef X86EMUL_NO_SIMD |
4143 | 14.4k | case X86EMUL_OPC_66(0x0f, 0x6c): /* punpcklqdq xmm/m128,xmm */ |
4144 | 5.01k | case X86EMUL_OPC_VEX_66(0x0f, 0x6c): /* vpunpcklqdq {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4145 | 5.08k | case X86EMUL_OPC_66(0x0f, 0x6d): /* punpckhqdq xmm/m128,xmm */ |
4146 | 5.15k | case X86EMUL_OPC_VEX_66(0x0f, 0x6d): /* vpunpckhqdq {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4147 | 17.2k | CASE_SIMD_PACKED_INT_VEX(0x0f, 0x74): /* pcmpeqb {,x}mm/mem,{,x}mm */ |
4148 | | /* vpcmpeqb {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4149 | 19.4k | CASE_SIMD_PACKED_INT_VEX(0x0f, 0x75): /* pcmpeqw {,x}mm/mem,{,x}mm */ |
4150 | | /* vpcmpeqw {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4151 | 20.6k | CASE_SIMD_PACKED_INT_VEX(0x0f, 0x76): /* pcmpeqd {,x}mm/mem,{,x}mm */ |
4152 | | /* vpcmpeqd {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4153 | 21.5k | CASE_SIMD_PACKED_INT_VEX(0x0f, 0xd1): /* psrlw {,x}mm/mem,{,x}mm */ |
4154 | | /* vpsrlw xmm/m128,{x,y}mm,{x,y}mm */ |
4155 | 23.6k | CASE_SIMD_PACKED_INT_VEX(0x0f, 0xd2): /* psrld {,x}mm/mem,{,x}mm */ |
4156 | | /* vpsrld xmm/m128,{x,y}mm,{x,y}mm */ |
4157 | 26.1k | CASE_SIMD_PACKED_INT_VEX(0x0f, 0xd3): /* psrlq {,x}mm/mem,{,x}mm */ |
4158 | | /* vpsrlq xmm/m128,{x,y}mm,{x,y}mm */ |
4159 | 26.1k | case X86EMUL_OPC_66(0x0f, 0xd4): /* paddq xmm/m128,xmm */ |
4160 | 9.38k | case X86EMUL_OPC_VEX_66(0x0f, 0xd4): /* vpaddq {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4161 | 29.0k | CASE_SIMD_PACKED_INT_VEX(0x0f, 0xd5): /* pmullw {,x}mm/mem,{,x}mm */ |
4162 | | /* vpmullw {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4163 | 29.9k | CASE_SIMD_PACKED_INT_VEX(0x0f, 0xd8): /* psubusb {,x}mm/mem,{,x}mm */ |
4164 | | /* vpsubusb {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4165 | 31.1k | CASE_SIMD_PACKED_INT_VEX(0x0f, 0xd9): /* psubusw {,x}mm/mem,{,x}mm */ |
4166 | | /* vpsubusw {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4167 | 31.1k | case X86EMUL_OPC_66(0x0f, 0xda): /* pminub xmm/m128,xmm */ |
4168 | 10.7k | case X86EMUL_OPC_VEX_66(0x0f, 0xda): /* vpminub {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4169 | 33.9k | CASE_SIMD_PACKED_INT_VEX(0x0f, 0xdb): /* pand {,x}mm/mem,{,x}mm */ |
4170 | | /* vpand {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4171 | 35.0k | CASE_SIMD_PACKED_INT_VEX(0x0f, 0xdc): /* paddusb {,x}mm/mem,{,x}mm */ |
4172 | | /* vpaddusb {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4173 | 36.7k | CASE_SIMD_PACKED_INT_VEX(0x0f, 0xdd): /* paddusw {,x}mm/mem,{,x}mm */ |
4174 | | /* vpaddusw {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4175 | 36.7k | case X86EMUL_OPC_66(0x0f, 0xde): /* pmaxub xmm/m128,xmm */ |
4176 | 12.5k | case X86EMUL_OPC_VEX_66(0x0f, 0xde): /* vpmaxub {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4177 | 38.8k | CASE_SIMD_PACKED_INT_VEX(0x0f, 0xdf): /* pandn {,x}mm/mem,{,x}mm */ |
4178 | | /* vpandn {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4179 | 38.8k | case X86EMUL_OPC_66(0x0f, 0xe0): /* pavgb xmm/m128,xmm */ |
4180 | 13.1k | case X86EMUL_OPC_VEX_66(0x0f, 0xe0): /* vpavgb {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4181 | 39.7k | CASE_SIMD_PACKED_INT_VEX(0x0f, 0xe1): /* psraw {,x}mm/mem,{,x}mm */ |
4182 | | /* vpsraw xmm/m128,{x,y}mm,{x,y}mm */ |
4183 | 40.3k | CASE_SIMD_PACKED_INT_VEX(0x0f, 0xe2): /* psrad {,x}mm/mem,{,x}mm */ |
4184 | | /* vpsrad xmm/m128,{x,y}mm,{x,y}mm */ |
4185 | 40.3k | case X86EMUL_OPC_66(0x0f, 0xe3): /* pavgw xmm/m128,xmm */ |
4186 | 13.8k | case X86EMUL_OPC_VEX_66(0x0f, 0xe3): /* vpavgw {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4187 | 13.9k | case X86EMUL_OPC_66(0x0f, 0xe4): /* pmulhuw xmm/m128,xmm */ |
4188 | 14.0k | case X86EMUL_OPC_VEX_66(0x0f, 0xe4): /* vpmulhuw {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4189 | 42.4k | CASE_SIMD_PACKED_INT_VEX(0x0f, 0xe5): /* pmulhw {,x}mm/mem,{,x}mm */ |
4190 | | /* vpmulhw {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4191 | 43.3k | CASE_SIMD_PACKED_INT_VEX(0x0f, 0xe8): /* psubsb {,x}mm/mem,{,x}mm */ |
4192 | | /* vpsubsb {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4193 | 44.5k | CASE_SIMD_PACKED_INT_VEX(0x0f, 0xe9): /* psubsw {,x}mm/mem,{,x}mm */ |
4194 | | /* vpsubsw {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4195 | 44.5k | case X86EMUL_OPC_66(0x0f, 0xea): /* pminsw xmm/m128,xmm */ |
4196 | 15.3k | case X86EMUL_OPC_VEX_66(0x0f, 0xea): /* vpminsw {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4197 | 47.5k | CASE_SIMD_PACKED_INT_VEX(0x0f, 0xeb): /* por {,x}mm/mem,{,x}mm */ |
4198 | | /* vpor {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4199 | 48.6k | CASE_SIMD_PACKED_INT_VEX(0x0f, 0xec): /* paddsb {,x}mm/mem,{,x}mm */ |
4200 | | /* vpaddsb {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4201 | 49.4k | CASE_SIMD_PACKED_INT_VEX(0x0f, 0xed): /* paddsw {,x}mm/mem,{,x}mm */ |
4202 | | /* vpaddsw {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4203 | 49.4k | case X86EMUL_OPC_66(0x0f, 0xee): /* pmaxsw xmm/m128,xmm */ |
4204 | 16.9k | case X86EMUL_OPC_VEX_66(0x0f, 0xee): /* vpmaxsw {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4205 | 51.8k | CASE_SIMD_PACKED_INT_VEX(0x0f, 0xef): /* pxor {,x}mm/mem,{,x}mm */ |
4206 | | /* vpxor {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4207 | 53.2k | CASE_SIMD_PACKED_INT_VEX(0x0f, 0xf1): /* psllw {,x}mm/mem,{,x}mm */ |
4208 | | /* vpsllw xmm/m128,{x,y}mm,{x,y}mm */ |
4209 | 54.1k | CASE_SIMD_PACKED_INT_VEX(0x0f, 0xf2): /* pslld {,x}mm/mem,{,x}mm */ |
4210 | | /* vpslld xmm/m128,{x,y}mm,{x,y}mm */ |
4211 | 55.6k | CASE_SIMD_PACKED_INT_VEX(0x0f, 0xf3): /* psllq {,x}mm/mem,{,x}mm */ |
4212 | | /* vpsllq xmm/m128,{x,y}mm,{x,y}mm */ |
4213 | 55.6k | case X86EMUL_OPC_66(0x0f, 0xf4): /* pmuludq xmm/m128,xmm */ |
4214 | 19.0k | case X86EMUL_OPC_VEX_66(0x0f, 0xf4): /* vpmuludq {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4215 | 57.6k | CASE_SIMD_PACKED_INT_VEX(0x0f, 0xf5): /* pmaddwd {,x}mm/mem,{,x}mm */ |
4216 | | /* vpmaddwd {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4217 | 57.6k | case X86EMUL_OPC_66(0x0f, 0xf6): /* psadbw xmm/m128,xmm */ |
4218 | 19.6k | case X86EMUL_OPC_VEX_66(0x0f, 0xf6): /* vpsadbw {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4219 | 59.7k | CASE_SIMD_PACKED_INT_VEX(0x0f, 0xf8): /* psubb {,x}mm/mem,{,x}mm */ |
4220 | | /* vpsubb {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4221 | 60.7k | CASE_SIMD_PACKED_INT_VEX(0x0f, 0xf9): /* psubw {,x}mm/mem,{,x}mm */ |
4222 | | /* vpsubw {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4223 | 63.2k | CASE_SIMD_PACKED_INT_VEX(0x0f, 0xfa): /* psubd {,x}mm/mem,{,x}mm */ |
4224 | | /* vpsubd {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4225 | 63.2k | case X86EMUL_OPC_66(0x0f, 0xfb): /* psubq xmm/m128,xmm */ |
4226 | 21.3k | case X86EMUL_OPC_VEX_66(0x0f, 0xfb): /* vpsubq {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4227 | 21.3k | #endif /* !X86EMUL_NO_SIMD */ |
4228 | 64.5k | CASE_SIMD_PACKED_INT_VEX(0x0f, 0xfc): /* paddb {,x}mm/mem,{,x}mm */ |
4229 | | /* vpaddb {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4230 | 66.1k | CASE_SIMD_PACKED_INT_VEX(0x0f, 0xfd): /* paddw {,x}mm/mem,{,x}mm */ |
4231 | | /* vpaddw {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4232 | 66.8k | CASE_SIMD_PACKED_INT_VEX(0x0f, 0xfe): /* paddd {,x}mm/mem,{,x}mm */ |
4233 | | /* vpaddd {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4234 | 66.8k | simd_0f_int: |
4235 | 24.6k | #ifndef X86EMUL_NO_SIMD |
4236 | 24.6k | if ( vex.opcx != vex_none ) |
4237 | 7.17k | { |
4238 | 7.26k | case X86EMUL_OPC_VEX_66(0x0f38, 0x00): /* vpshufb {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4239 | 7.32k | case X86EMUL_OPC_VEX_66(0x0f38, 0x01): /* vphaddw {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4240 | 7.39k | case X86EMUL_OPC_VEX_66(0x0f38, 0x02): /* vphaddd {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4241 | 7.45k | case X86EMUL_OPC_VEX_66(0x0f38, 0x03): /* vphaddsw {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4242 | 7.52k | case X86EMUL_OPC_VEX_66(0x0f38, 0x04): /* vpmaddubsw {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4243 | 7.54k | case X86EMUL_OPC_VEX_66(0x0f38, 0x05): /* vphsubw {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4244 | 7.60k | case X86EMUL_OPC_VEX_66(0x0f38, 0x06): /* vphsubd {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4245 | 7.67k | case X86EMUL_OPC_VEX_66(0x0f38, 0x07): /* vphsubsw {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4246 | 7.74k | case X86EMUL_OPC_VEX_66(0x0f38, 0x08): /* vpsignb {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4247 | 7.80k | case X86EMUL_OPC_VEX_66(0x0f38, 0x09): /* vpsignw {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4248 | 7.87k | case X86EMUL_OPC_VEX_66(0x0f38, 0x0a): /* vpsignd {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4249 | 7.94k | case X86EMUL_OPC_VEX_66(0x0f38, 0x0b): /* vpmulhrsw {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4250 | 8.00k | case X86EMUL_OPC_VEX_66(0x0f38, 0x1c): /* vpabsb {x,y}mm/mem,{x,y}mm */ |
4251 | 8.07k | case X86EMUL_OPC_VEX_66(0x0f38, 0x1d): /* vpabsw {x,y}mm/mem,{x,y}mm */ |
4252 | 8.13k | case X86EMUL_OPC_VEX_66(0x0f38, 0x1e): /* vpabsd {x,y}mm/mem,{x,y}mm */ |
4253 | 8.20k | case X86EMUL_OPC_VEX_66(0x0f38, 0x28): /* vpmuldq {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4254 | 8.27k | case X86EMUL_OPC_VEX_66(0x0f38, 0x29): /* vpcmpeqq {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4255 | 8.33k | case X86EMUL_OPC_VEX_66(0x0f38, 0x2b): /* vpackusdw {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4256 | 8.41k | case X86EMUL_OPC_VEX_66(0x0f38, 0x37): /* vpcmpgtq {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4257 | 8.47k | case X86EMUL_OPC_VEX_66(0x0f38, 0x38): /* vpminsb {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4258 | 8.54k | case X86EMUL_OPC_VEX_66(0x0f38, 0x39): /* vpminsd {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4259 | 8.61k | case X86EMUL_OPC_VEX_66(0x0f38, 0x3a): /* vpminub {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4260 | 8.67k | case X86EMUL_OPC_VEX_66(0x0f38, 0x3b): /* vpminud {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4261 | 8.76k | case X86EMUL_OPC_VEX_66(0x0f38, 0x3c): /* vpmaxsb {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4262 | 8.83k | case X86EMUL_OPC_VEX_66(0x0f38, 0x3d): /* vpmaxsd {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4263 | 8.90k | case X86EMUL_OPC_VEX_66(0x0f38, 0x3e): /* vpmaxub {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4264 | 8.96k | case X86EMUL_OPC_VEX_66(0x0f38, 0x3f): /* vpmaxud {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4265 | 9.16k | case X86EMUL_OPC_VEX_66(0x0f38, 0x40): /* vpmulld {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4266 | 9.16k | if ( !vex.l ) |
4267 | 7.51k | goto simd_0f_avx; |
4268 | | /* fall through */ |
4269 | 1.71k | case X86EMUL_OPC_VEX_66(0x0f38, 0x45): /* vpsrlv{d,q} {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4270 | 1.78k | case X86EMUL_OPC_VEX_66(0x0f38, 0x47): /* vpsllv{d,q} {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4271 | 2.42k | simd_0f_avx2: |
4272 | 2.42k | host_and_vcpu_must_have(avx2); |
4273 | 2.42k | goto simd_0f_ymm; |
4274 | 2.42k | } |
4275 | 17.4k | if ( vex.pfx ) |
4276 | 9.15k | goto simd_0f_sse2; |
4277 | 8.31k | #endif /* !X86EMUL_NO_SIMD */ |
4278 | 10.9k | simd_0f_mmx: |
4279 | 10.9k | host_and_vcpu_must_have(mmx); |
4280 | 10.9k | get_fpu(X86EMUL_FPU_mmx); |
4281 | 10.8k | goto simd_0f_common; |
4282 | | |
4283 | 10.8k | #ifndef X86EMUL_NO_SIMD |
4284 | | |
4285 | 10.8k | case X86EMUL_OPC_EVEX_66(0x0f, 0xf6): /* vpsadbw [xyz]mm/mem,[xyz]mm,[xyz]mm */ |
4286 | 2 | generate_exception_if(evex.opmsk, X86_EXC_UD); |
4287 | | /* fall through */ |
4288 | 2 | case X86EMUL_OPC_EVEX_66(0x0f, 0x60): /* vpunpcklbw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
4289 | 3 | case X86EMUL_OPC_EVEX_66(0x0f, 0x61): /* vpunpcklwd [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
4290 | 4 | case X86EMUL_OPC_EVEX_66(0x0f, 0x68): /* vpunpckhbw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
4291 | 5 | case X86EMUL_OPC_EVEX_66(0x0f, 0x69): /* vpunpckhwd [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
4292 | 5 | op_bytes = 16 << evex.lr; |
4293 | | /* fall through */ |
4294 | 6 | case X86EMUL_OPC_EVEX_66(0x0f, 0x63): /* vpacksswb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
4295 | 7 | case X86EMUL_OPC_EVEX_66(0x0f, 0x67): /* vpackuswb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
4296 | 8 | case X86EMUL_OPC_EVEX_66(0x0f, 0xd1): /* vpsrlw xmm/m128,[xyz]mm,[xyz]mm{k} */ |
4297 | 9 | case X86EMUL_OPC_EVEX_66(0x0f, 0xe1): /* vpsraw xmm/m128,[xyz]mm,[xyz]mm{k} */ |
4298 | 10 | case X86EMUL_OPC_EVEX_66(0x0f, 0xf1): /* vpsllw xmm/m128,[xyz]mm,[xyz]mm{k} */ |
4299 | 11 | case X86EMUL_OPC_EVEX_66(0x0f, 0xf5): /* vpmaddwd [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
4300 | 12 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x00): /* vpshufb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
4301 | 13 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x04): /* vpmaddubsw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
4302 | 13 | fault_suppression = false; |
4303 | | /* fall through */ |
4304 | 14 | case X86EMUL_OPC_EVEX_66(0x0f, 0xd5): /* vpmullw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
4305 | 15 | case X86EMUL_OPC_EVEX_66(0x0f, 0xd8): /* vpsubusb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
4306 | 16 | case X86EMUL_OPC_EVEX_66(0x0f, 0xd9): /* vpsubusw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
4307 | 17 | case X86EMUL_OPC_EVEX_66(0x0f, 0xdc): /* vpaddusb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
4308 | 18 | case X86EMUL_OPC_EVEX_66(0x0f, 0xdd): /* vpaddusw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
4309 | 19 | case X86EMUL_OPC_EVEX_66(0x0f, 0xe0): /* vpavgb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
4310 | 20 | case X86EMUL_OPC_EVEX_66(0x0f, 0xe3): /* vpavgw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
4311 | 21 | case X86EMUL_OPC_EVEX_66(0x0f, 0xe5): /* vpmulhw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
4312 | 22 | case X86EMUL_OPC_EVEX_66(0x0f, 0xe8): /* vpsubsb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
4313 | 23 | case X86EMUL_OPC_EVEX_66(0x0f, 0xe9): /* vpsubsw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
4314 | 24 | case X86EMUL_OPC_EVEX_66(0x0f, 0xec): /* vpaddsb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
4315 | 25 | case X86EMUL_OPC_EVEX_66(0x0f, 0xed): /* vpaddsw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
4316 | 26 | case X86EMUL_OPC_EVEX_66(0x0f, 0xf8): /* vpsubb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
4317 | 27 | case X86EMUL_OPC_EVEX_66(0x0f, 0xf9): /* vpsubw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
4318 | 28 | case X86EMUL_OPC_EVEX_66(0x0f, 0xfc): /* vpaddb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
4319 | 29 | case X86EMUL_OPC_EVEX_66(0x0f, 0xfd): /* vpaddw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
4320 | 30 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x0b): /* vpmulhrsw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
4321 | 31 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x1c): /* vpabsb [xyz]mm/mem,[xyz]mm{k} */ |
4322 | 32 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x1d): /* vpabsw [xyz]mm/mem,[xyz]mm{k} */ |
4323 | 32 | host_and_vcpu_must_have(avx512bw); |
4324 | 0 | generate_exception_if(evex.brs, X86_EXC_UD); |
4325 | 0 | elem_bytes = 1 << (b & 1); |
4326 | 0 | goto avx512f_no_sae; |
4327 | | |
4328 | 1 | case X86EMUL_OPC_EVEX_66(0x0f, 0x62): /* vpunpckldq [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
4329 | 3 | case X86EMUL_OPC_EVEX_66(0x0f, 0x6a): /* vpunpckhdq [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
4330 | 3 | generate_exception_if(evex.w, X86_EXC_UD); |
4331 | 2 | fault_suppression = false; |
4332 | 2 | op_bytes = 16 << evex.lr; |
4333 | 2 | goto avx512f_no_sae; |
4334 | | |
4335 | 1 | case X86EMUL_OPC_EVEX_F3(0x0f38, 0x26): /* vptestnm{b,w} [xyz]mm/mem,[xyz]mm,k{k} */ |
4336 | 2 | case X86EMUL_OPC_EVEX_F3(0x0f38, 0x27): /* vptestnm{d,q} [xyz]mm/mem,[xyz]mm,k{k} */ |
4337 | 2 | op_bytes = 16 << evex.lr; |
4338 | | /* fall through */ |
4339 | 3 | case X86EMUL_OPC_EVEX_66(0x0f, 0x64): /* vpcmpeqb [xyz]mm/mem,[xyz]mm,k{k} */ |
4340 | 4 | case X86EMUL_OPC_EVEX_66(0x0f, 0x65): /* vpcmpeqw [xyz]mm/mem,[xyz]mm,k{k} */ |
4341 | 6 | case X86EMUL_OPC_EVEX_66(0x0f, 0x66): /* vpcmpeqd [xyz]mm/mem,[xyz]mm,k{k} */ |
4342 | 7 | case X86EMUL_OPC_EVEX_66(0x0f, 0x74): /* vpcmpgtb [xyz]mm/mem,[xyz]mm,k{k} */ |
4343 | 8 | case X86EMUL_OPC_EVEX_66(0x0f, 0x75): /* vpcmpgtw [xyz]mm/mem,[xyz]mm,k{k} */ |
4344 | 10 | case X86EMUL_OPC_EVEX_66(0x0f, 0x76): /* vpcmpgtd [xyz]mm/mem,[xyz]mm,k{k} */ |
4345 | 11 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x26): /* vptestm{b,w} [xyz]mm/mem,[xyz]mm,k{k} */ |
4346 | 13 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x27): /* vptestm{d,q} [xyz]mm/mem,[xyz]mm,k{k} */ |
4347 | 14 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x29): /* vpcmpeqq [xyz]mm/mem,[xyz]mm,k{k} */ |
4348 | 15 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x37): /* vpcmpgtq [xyz]mm/mem,[xyz]mm,k{k} */ |
4349 | 15 | generate_exception_if(!evex.r || !evex.R || evex.z, X86_EXC_UD); |
4350 | 6 | if ( b & (ext == ext_0f38 ? 1 : 2) ) |
4351 | 4 | { |
4352 | 4 | generate_exception_if(b != 0x27 && evex.w != (b & 1), X86_EXC_UD); |
4353 | 2 | goto avx512f_no_sae; |
4354 | 4 | } |
4355 | 2 | host_and_vcpu_must_have(avx512bw); |
4356 | 0 | generate_exception_if(evex.brs, X86_EXC_UD); |
4357 | 0 | elem_bytes = 1 << (ext == ext_0f ? b & 1 : evex.w); |
4358 | 0 | avx512_vlen_check(false); |
4359 | 0 | goto simd_zmm; |
4360 | | |
4361 | 1 | case X86EMUL_OPC_EVEX_66(0x0f, 0x6b): /* vpackssdw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
4362 | 4 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x2b): /* vpackusdw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
4363 | 4 | generate_exception_if(evex.w || evex.brs, X86_EXC_UD); |
4364 | 1 | fault_suppression = false; |
4365 | 1 | goto avx512f_no_sae; |
4366 | | |
4367 | 1 | case X86EMUL_OPC_EVEX_66(0x0f, 0x6c): /* vpunpcklqdq [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
4368 | 2 | case X86EMUL_OPC_EVEX_66(0x0f, 0x6d): /* vpunpckhqdq [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
4369 | 2 | fault_suppression = false; |
4370 | | /* fall through */ |
4371 | 3 | case X86EMUL_OPC_EVEX_66(0x0f, 0xd4): /* vpaddq [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
4372 | 4 | case X86EMUL_OPC_EVEX_66(0x0f, 0xf4): /* vpmuludq [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
4373 | 5 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x28): /* vpmuldq [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
4374 | 5 | generate_exception_if(!evex.w, X86_EXC_UD); |
4375 | 2 | goto avx512f_no_sae; |
4376 | | |
4377 | 2 | #endif /* X86EMUL_NO_SIMD */ |
4378 | | |
4379 | 1.26k | CASE_SIMD_PACKED_INT_VEX(0x0f, 0x6e): /* mov{d,q} r/m,{,x}mm */ |
4380 | | /* vmov{d,q} r/m,xmm */ |
4381 | 3.47k | CASE_SIMD_PACKED_INT_VEX(0x0f, 0x7e): /* mov{d,q} {,x}mm,r/m */ |
4382 | | /* vmov{d,q} xmm,r/m */ |
4383 | 3.47k | if ( vex.opcx != vex_none ) |
4384 | 323 | { |
4385 | 323 | generate_exception_if(vex.l || vex.reg != 0xf, X86_EXC_UD); |
4386 | 317 | host_and_vcpu_must_have(avx); |
4387 | 317 | get_fpu(X86EMUL_FPU_ymm); |
4388 | 317 | } |
4389 | 1.02k | else if ( vex.pfx ) |
4390 | 546 | { |
4391 | 546 | vcpu_must_have(sse2); |
4392 | 546 | get_fpu(X86EMUL_FPU_xmm); |
4393 | 546 | } |
4394 | 477 | else |
4395 | 477 | { |
4396 | 477 | host_and_vcpu_must_have(mmx); |
4397 | 477 | get_fpu(X86EMUL_FPU_mmx); |
4398 | 477 | } |
4399 | | |
4400 | 2.22k | simd_0f_rm: |
4401 | 2.22k | opc = init_prefixes(stub); |
4402 | 0 | opc[0] = b; |
4403 | | /* Convert memory/GPR operand to (%rAX). */ |
4404 | 2.22k | rex_prefix &= ~REX_B; |
4405 | 2.22k | vex.b = 1; |
4406 | 2.22k | if ( !mode_64bit() ) |
4407 | 969 | vex.w = 0; |
4408 | 2.22k | opc[1] = modrm & 0x38; |
4409 | 2.22k | insn_bytes = PFX_BYTES + 2; |
4410 | 2.22k | opc[2] = 0xc3; |
4411 | | |
4412 | 2.22k | copy_REX_VEX(opc, rex_prefix, vex); |
4413 | 2.22k | invoke_stub("", "", "+m" (src.val) : "a" (&src.val)); |
4414 | 2.22k | dst.val = src.val; |
4415 | | |
4416 | 2.22k | put_stub(stub); |
4417 | 2.22k | ASSERT(!state->simd_size); |
4418 | 2.22k | break; |
4419 | | |
4420 | 2.22k | #ifndef X86EMUL_NO_SIMD |
4421 | | |
4422 | 2.22k | case X86EMUL_OPC_EVEX_66(5, 0x7e): /* vmovw xmm,r/m16 */ |
4423 | 2 | ASSERT(dst.bytes >= 4); |
4424 | 2 | if ( dst.type == OP_MEM ) |
4425 | 1 | dst.bytes = 2; |
4426 | | /* fall through */ |
4427 | 3 | case X86EMUL_OPC_EVEX_66(5, 0x6e): /* vmovw r/m16,xmm */ |
4428 | 3 | host_and_vcpu_must_have(avx512_fp16); |
4429 | 0 | generate_exception_if(evex.w, X86_EXC_UD); |
4430 | | /* fall through */ |
4431 | 1 | case X86EMUL_OPC_EVEX_66(0x0f, 0x6e): /* vmov{d,q} r/m,xmm */ |
4432 | 7 | case X86EMUL_OPC_EVEX_66(0x0f, 0x7e): /* vmov{d,q} xmm,r/m */ |
4433 | 7 | generate_exception_if((evex.lr || evex.opmsk || evex.brs || |
4434 | 7 | evex.reg != 0xf || !evex.RX), |
4435 | 7 | X86_EXC_UD); |
4436 | 1 | host_and_vcpu_must_have(avx512f); |
4437 | 0 | get_fpu(X86EMUL_FPU_zmm); |
4438 | | |
4439 | 0 | opc = init_evex(stub); |
4440 | 0 | opc[0] = b; |
4441 | | /* Convert memory/GPR operand to (%rAX). */ |
4442 | 0 | evex.b = 1; |
4443 | 0 | if ( !mode_64bit() ) |
4444 | 0 | evex.w = 0; |
4445 | 0 | opc[1] = modrm & 0x38; |
4446 | 0 | insn_bytes = EVEX_PFX_BYTES + 2; |
4447 | 0 | opc[2] = 0xc3; |
4448 | |
|
4449 | 0 | copy_EVEX(opc, evex); |
4450 | 0 | invoke_stub("", "", "+m" (src.val) : "a" (&src.val)); |
4451 | 0 | dst.val = src.val; |
4452 | |
|
4453 | 0 | put_stub(stub); |
4454 | 0 | ASSERT(!state->simd_size); |
4455 | 0 | break; |
4456 | | |
4457 | 402 | case X86EMUL_OPC_66(0x0f, 0xe7): /* movntdq xmm,m128 */ |
4458 | 608 | case X86EMUL_OPC_VEX_66(0x0f, 0xe7): /* vmovntdq {x,y}mm,mem */ |
4459 | 608 | generate_exception_if(ea.type != OP_MEM, X86_EXC_UD); |
4460 | 608 | sfence = true; |
4461 | | /* fall through */ |
4462 | 674 | case X86EMUL_OPC_66(0x0f, 0x6f): /* movdqa xmm/m128,xmm */ |
4463 | 769 | case X86EMUL_OPC_VEX_66(0x0f, 0x6f): /* vmovdqa {x,y}mm/mem,{x,y}mm */ |
4464 | 835 | case X86EMUL_OPC_F3(0x0f, 0x6f): /* movdqu xmm/m128,xmm */ |
4465 | 906 | case X86EMUL_OPC_VEX_F3(0x0f, 0x6f): /* vmovdqu {x,y}mm/mem,{x,y}mm */ |
4466 | 1.23k | case X86EMUL_OPC_66(0x0f, 0x7f): /* movdqa xmm,xmm/m128 */ |
4467 | 2.32k | case X86EMUL_OPC_VEX_66(0x0f, 0x7f): /* vmovdqa {x,y}mm,{x,y}mm/mem */ |
4468 | 2.52k | case X86EMUL_OPC_F3(0x0f, 0x7f): /* movdqu xmm,xmm/m128 */ |
4469 | 2.62k | case X86EMUL_OPC_VEX_F3(0x0f, 0x7f): /* vmovdqu {x,y}mm,{x,y}mm/mem */ |
4470 | 2.78k | movdqa: |
4471 | 2.78k | d |= TwoOp; |
4472 | 2.78k | op_bytes = 16 << vex.l; |
4473 | 2.78k | if ( vex.opcx != vex_none ) |
4474 | 1.69k | goto simd_0f_avx; |
4475 | 1.09k | goto simd_0f_sse2; |
4476 | | |
4477 | 1.09k | case X86EMUL_OPC_EVEX_66(0x0f, 0xe7): /* vmovntdq [xyz]mm,mem */ |
4478 | 4 | generate_exception_if(ea.type != OP_MEM || evex.opmsk || evex.w, |
4479 | 4 | X86_EXC_UD); |
4480 | 1 | sfence = true; |
4481 | | /* fall through */ |
4482 | 2 | case X86EMUL_OPC_EVEX_66(0x0f, 0x6f): /* vmovdqa{32,64} [xyz]mm/mem,[xyz]mm{k} */ |
4483 | 3 | case X86EMUL_OPC_EVEX_F3(0x0f, 0x6f): /* vmovdqu{32,64} [xyz]mm/mem,[xyz]mm{k} */ |
4484 | 4 | case X86EMUL_OPC_EVEX_66(0x0f, 0x7f): /* vmovdqa{32,64} [xyz]mm,[xyz]mm/mem{k} */ |
4485 | 5 | case X86EMUL_OPC_EVEX_F3(0x0f, 0x7f): /* vmovdqu{32,64} [xyz]mm,[xyz]mm/mem{k} */ |
4486 | 6 | vmovdqa: |
4487 | 6 | generate_exception_if(evex.brs, X86_EXC_UD); |
4488 | 5 | d |= TwoOp; |
4489 | 5 | op_bytes = 16 << evex.lr; |
4490 | 5 | goto avx512f_no_sae; |
4491 | | |
4492 | 1 | case X86EMUL_OPC_EVEX_F2(0x0f, 0x6f): /* vmovdqu{8,16} [xyz]mm/mem,[xyz]mm{k} */ |
4493 | 2 | case X86EMUL_OPC_EVEX_F2(0x0f, 0x7f): /* vmovdqu{8,16} [xyz]mm,[xyz]mm/mem{k} */ |
4494 | 2 | host_and_vcpu_must_have(avx512bw); |
4495 | 0 | elem_bytes = 1 << evex.w; |
4496 | 0 | goto vmovdqa; |
4497 | | |
4498 | 198 | case X86EMUL_OPC_VEX_66(0x0f, 0xd6): /* vmovq xmm,xmm/m64 */ |
4499 | 198 | generate_exception_if(vex.l, X86_EXC_UD); |
4500 | 197 | d |= TwoOp; |
4501 | | /* fall through */ |
4502 | 320 | case X86EMUL_OPC_66(0x0f, 0xd6): /* movq xmm,xmm/m64 */ |
4503 | 320 | #endif /* !X86EMUL_NO_SIMD */ |
4504 | 320 | #ifndef X86EMUL_NO_MMX |
4505 | 392 | case X86EMUL_OPC(0x0f, 0x6f): /* movq mm/m64,mm */ |
4506 | 616 | case X86EMUL_OPC(0x0f, 0x7f): /* movq mm,mm/m64 */ |
4507 | 616 | #endif |
4508 | 616 | op_bytes = 8; |
4509 | 616 | goto simd_0f_int; |
4510 | | |
4511 | 0 | #ifndef X86EMUL_NO_SIMD |
4512 | 1.26k | CASE_SIMD_PACKED_INT_VEX(0x0f, 0x70):/* pshuf{w,d} $imm8,{,x}mm/mem,{,x}mm */ |
4513 | | /* vpshufd $imm8,{x,y}mm/mem,{x,y}mm */ |
4514 | 1.26k | case X86EMUL_OPC_F3(0x0f, 0x70): /* pshufhw $imm8,xmm/m128,xmm */ |
4515 | 940 | case X86EMUL_OPC_VEX_F3(0x0f, 0x70): /* vpshufhw $imm8,{x,y}mm/mem,{x,y}mm */ |
4516 | 1.18k | case X86EMUL_OPC_F2(0x0f, 0x70): /* pshuflw $imm8,xmm/m128,xmm */ |
4517 | 1.21k | case X86EMUL_OPC_VEX_F2(0x0f, 0x70): /* vpshuflw $imm8,{x,y}mm/mem,{x,y}mm */ |
4518 | 1.21k | d = (d & ~SrcMask) | SrcMem | TwoOp; |
4519 | 1.21k | op_bytes = vex.pfx ? 16 << vex.l : 8; |
4520 | 1.21k | #endif |
4521 | 2.54k | simd_0f_int_imm8: |
4522 | 2.54k | if ( vex.opcx != vex_none ) |
4523 | 1.23k | { |
4524 | 1.23k | #ifndef X86EMUL_NO_SIMD |
4525 | 1.29k | case X86EMUL_OPC_VEX_66(0x0f3a, 0x0e): /* vpblendw $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4526 | 1.36k | case X86EMUL_OPC_VEX_66(0x0f3a, 0x0f): /* vpalignr $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4527 | 1.42k | case X86EMUL_OPC_VEX_66(0x0f3a, 0x42): /* vmpsadbw $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4528 | 1.42k | #endif |
4529 | 1.42k | if ( vex.l ) |
4530 | 233 | { |
4531 | 1.15k | simd_0f_imm8_avx2: |
4532 | 1.15k | host_and_vcpu_must_have(avx2); |
4533 | 1.15k | } |
4534 | 1.19k | else |
4535 | 1.19k | { |
4536 | 1.19k | #ifndef X86EMUL_NO_SIMD |
4537 | 1.25k | case X86EMUL_OPC_VEX_66(0x0f3a, 0x08): /* vroundps $imm8,{x,y}mm/mem,{x,y}mm */ |
4538 | 1.32k | case X86EMUL_OPC_VEX_66(0x0f3a, 0x09): /* vroundpd $imm8,{x,y}mm/mem,{x,y}mm */ |
4539 | 1.51k | case X86EMUL_OPC_VEX_66(0x0f3a, 0x0a): /* vroundss $imm8,xmm/mem,xmm,xmm */ |
4540 | 1.55k | case X86EMUL_OPC_VEX_66(0x0f3a, 0x0b): /* vroundsd $imm8,xmm/mem,xmm,xmm */ |
4541 | 1.59k | case X86EMUL_OPC_VEX_66(0x0f3a, 0x0c): /* vblendps $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4542 | 1.66k | case X86EMUL_OPC_VEX_66(0x0f3a, 0x0d): /* vblendpd $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4543 | 1.73k | case X86EMUL_OPC_VEX_66(0x0f3a, 0x40): /* vdpps $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4544 | 1.73k | #endif |
4545 | 3.34k | simd_0f_imm8_avx: |
4546 | 3.34k | host_and_vcpu_must_have(avx); |
4547 | 3.34k | } |
4548 | 4.49k | simd_0f_imm8_ymm: |
4549 | 4.49k | get_fpu(X86EMUL_FPU_ymm); |
4550 | 4.49k | } |
4551 | 1.31k | else if ( vex.pfx ) |
4552 | 760 | { |
4553 | 1.21k | simd_0f_imm8_sse2: |
4554 | 1.21k | vcpu_must_have(sse2); |
4555 | 1.21k | get_fpu(X86EMUL_FPU_xmm); |
4556 | 1.21k | } |
4557 | 551 | else |
4558 | 551 | { |
4559 | 551 | host_and_vcpu_must_have(mmx); |
4560 | 551 | vcpu_must_have(mmxext); |
4561 | 551 | get_fpu(X86EMUL_FPU_mmx); |
4562 | 551 | } |
4563 | 6.63k | simd_0f_imm8: |
4564 | 6.63k | opc = init_prefixes(stub); |
4565 | 0 | opc[0] = b; |
4566 | 6.63k | opc[1] = modrm; |
4567 | 6.63k | if ( ea.type == OP_MEM ) |
4568 | 4.17k | { |
4569 | | /* Convert memory operand to (%rAX). */ |
4570 | 4.17k | rex_prefix &= ~REX_B; |
4571 | 4.17k | vex.b = 1; |
4572 | 4.17k | opc[1] &= 0x38; |
4573 | 4.17k | } |
4574 | 6.63k | opc[2] = imm1; |
4575 | 6.63k | insn_bytes = PFX_BYTES + 3; |
4576 | 6.63k | break; |
4577 | | |
4578 | 0 | #ifndef X86EMUL_NO_SIMD |
4579 | | |
4580 | 2 | case X86EMUL_OPC_EVEX_66(0x0f, 0x70): /* vpshufd $imm8,[xyz]mm/mem,[xyz]mm{k} */ |
4581 | 3 | case X86EMUL_OPC_EVEX_F3(0x0f, 0x70): /* vpshufhw $imm8,[xyz]mm/mem,[xyz]mm{k} */ |
4582 | 4 | case X86EMUL_OPC_EVEX_F2(0x0f, 0x70): /* vpshuflw $imm8,[xyz]mm/mem,[xyz]mm{k} */ |
4583 | 4 | if ( evex.pfx == vex_66 ) |
4584 | 2 | generate_exception_if(evex.w, X86_EXC_UD); |
4585 | 2 | else |
4586 | 2 | { |
4587 | 2 | host_and_vcpu_must_have(avx512bw); |
4588 | 0 | generate_exception_if(evex.brs, X86_EXC_UD); |
4589 | 0 | } |
4590 | 1 | d = (d & ~SrcMask) | SrcMem | TwoOp; |
4591 | 1 | op_bytes = 16 << evex.lr; |
4592 | 1 | fault_suppression = false; |
4593 | 1 | goto avx512f_imm8_no_sae; |
4594 | | |
4595 | 1.00k | CASE_SIMD_PACKED_INT(0x0f, 0x71): /* Grp12 */ |
4596 | 1.01k | case X86EMUL_OPC_VEX_66(0x0f, 0x71): |
4597 | 2.61k | CASE_SIMD_PACKED_INT(0x0f, 0x72): /* Grp13 */ |
4598 | 2.61k | case X86EMUL_OPC_VEX_66(0x0f, 0x72): |
4599 | 1.44k | switch ( modrm_reg & 7 ) |
4600 | 1.44k | { |
4601 | 315 | case 2: /* psrl{w,d} $imm8,{,x}mm */ |
4602 | | /* vpsrl{w,d} $imm8,{x,y}mm,{x,y}mm */ |
4603 | 598 | case 4: /* psra{w,d} $imm8,{,x}mm */ |
4604 | | /* vpsra{w,d} $imm8,{x,y}mm,{x,y}mm */ |
4605 | 1.43k | case 6: /* psll{w,d} $imm8,{,x}mm */ |
4606 | | /* vpsll{w,d} $imm8,{x,y}mm,{x,y}mm */ |
4607 | 1.43k | break; |
4608 | 5 | default: |
4609 | 5 | goto unrecognized_insn; |
4610 | 1.44k | } |
4611 | 2.69k | simd_0f_shift_imm: |
4612 | 2.69k | generate_exception_if(ea.type != OP_REG, X86_EXC_UD); |
4613 | | |
4614 | 2.68k | if ( vex.opcx != vex_none ) |
4615 | 625 | { |
4616 | 625 | if ( vex.l ) |
4617 | 625 | host_and_vcpu_must_have(avx2); |
4618 | 466 | else |
4619 | 625 | host_and_vcpu_must_have(avx); |
4620 | 625 | get_fpu(X86EMUL_FPU_ymm); |
4621 | 625 | } |
4622 | 2.06k | else if ( vex.pfx ) |
4623 | 1.14k | { |
4624 | 1.14k | vcpu_must_have(sse2); |
4625 | 1.14k | get_fpu(X86EMUL_FPU_xmm); |
4626 | 1.14k | } |
4627 | 920 | else |
4628 | 920 | { |
4629 | 920 | host_and_vcpu_must_have(mmx); |
4630 | 920 | get_fpu(X86EMUL_FPU_mmx); |
4631 | 920 | } |
4632 | | |
4633 | 2.67k | opc = init_prefixes(stub); |
4634 | 0 | opc[0] = b; |
4635 | 2.67k | opc[1] = modrm; |
4636 | 2.67k | opc[2] = imm1; |
4637 | 2.67k | insn_bytes = PFX_BYTES + 3; |
4638 | | |
4639 | 2.67k | #endif /* X86EMUL_NO_SIMD */ |
4640 | | |
4641 | 3.08k | simd_0f_reg_only: |
4642 | 3.08k | opc[insn_bytes - PFX_BYTES] = 0xc3; |
4643 | | |
4644 | 3.08k | copy_REX_VEX(opc, rex_prefix, vex); |
4645 | 3.08k | invoke_stub("", "", [dummy_out] "=g" (dummy) : [dummy_in] "i" (0) ); |
4646 | | |
4647 | 3.08k | put_stub(stub); |
4648 | 3.08k | ASSERT(!state->simd_size); |
4649 | 3.08k | break; |
4650 | | |
4651 | 3.08k | #ifndef X86EMUL_NO_SIMD |
4652 | | |
4653 | 3.08k | case X86EMUL_OPC_EVEX_66(0x0f, 0x71): /* Grp12 */ |
4654 | 4 | switch ( modrm_reg & 7 ) |
4655 | 4 | { |
4656 | 1 | case 2: /* vpsrlw $imm8,[xyz]mm/mem,[xyz]mm{k} */ |
4657 | 2 | case 4: /* vpsraw $imm8,[xyz]mm/mem,[xyz]mm{k} */ |
4658 | 3 | case 6: /* vpsllw $imm8,[xyz]mm/mem,[xyz]mm{k} */ |
4659 | 4 | avx512bw_shift_imm: |
4660 | 4 | fault_suppression = false; |
4661 | 4 | op_bytes = 16 << evex.lr; |
4662 | 4 | state->simd_size = simd_packed_int; |
4663 | 4 | goto avx512bw_imm; |
4664 | 4 | } |
4665 | 1 | goto unrecognized_insn; |
4666 | | |
4667 | 6 | case X86EMUL_OPC_EVEX_66(0x0f, 0x72): /* Grp13 */ |
4668 | 6 | switch ( modrm_reg & 7 ) |
4669 | 6 | { |
4670 | 1 | case 2: /* vpsrld $imm8,[xyz]mm/mem,[xyz]mm{k} */ |
4671 | 2 | case 6: /* vpslld $imm8,[xyz]mm/mem,[xyz]mm{k} */ |
4672 | 2 | generate_exception_if(evex.w, X86_EXC_UD); |
4673 | | /* fall through */ |
4674 | 2 | case 0: /* vpror{d,q} $imm8,[xyz]mm/mem,[xyz]mm{k} */ |
4675 | 3 | case 1: /* vprol{d,q} $imm8,[xyz]mm/mem,[xyz]mm{k} */ |
4676 | 4 | case 4: /* vpsra{d,q} $imm8,[xyz]mm/mem,[xyz]mm{k} */ |
4677 | 6 | avx512f_shift_imm: |
4678 | 6 | op_bytes = 16 << evex.lr; |
4679 | 6 | state->simd_size = simd_packed_int; |
4680 | 6 | goto avx512f_imm8_no_sae; |
4681 | 6 | } |
4682 | 1 | goto unrecognized_insn; |
4683 | | |
4684 | 1 | #endif /* !X86EMUL_NO_SIMD */ |
4685 | 1 | #ifndef X86EMUL_NO_MMX |
4686 | | |
4687 | 423 | case X86EMUL_OPC(0x0f, 0x73): /* Grp14 */ |
4688 | 423 | switch ( modrm_reg & 7 ) |
4689 | 423 | { |
4690 | 303 | case 2: /* psrlq $imm8,mm */ |
4691 | 422 | case 6: /* psllq $imm8,mm */ |
4692 | 422 | goto simd_0f_shift_imm; |
4693 | 423 | } |
4694 | 1 | goto unrecognized_insn; |
4695 | | |
4696 | 1 | #endif /* !X86EMUL_NO_MMX */ |
4697 | 1 | #ifndef X86EMUL_NO_SIMD |
4698 | | |
4699 | 550 | case X86EMUL_OPC_66(0x0f, 0x73): |
4700 | 835 | case X86EMUL_OPC_VEX_66(0x0f, 0x73): |
4701 | 835 | switch ( modrm_reg & 7 ) |
4702 | 835 | { |
4703 | 78 | case 2: /* psrlq $imm8,xmm */ |
4704 | | /* vpsrlq $imm8,{x,y}mm,{x,y}mm */ |
4705 | 400 | case 3: /* psrldq $imm8,xmm */ |
4706 | | /* vpsrldq $imm8,{x,y}mm,{x,y}mm */ |
4707 | 603 | case 6: /* psllq $imm8,xmm */ |
4708 | | /* vpsllq $imm8,{x,y}mm,{x,y}mm */ |
4709 | 834 | case 7: /* pslldq $imm8,xmm */ |
4710 | | /* vpslldq $imm8,{x,y}mm,{x,y}mm */ |
4711 | 834 | goto simd_0f_shift_imm; |
4712 | 835 | } |
4713 | 1 | goto unrecognized_insn; |
4714 | | |
4715 | 6 | case X86EMUL_OPC_EVEX_66(0x0f, 0x73): /* Grp14 */ |
4716 | 6 | switch ( modrm_reg & 7 ) |
4717 | 6 | { |
4718 | 1 | case 2: /* vpsrlq $imm8,[xyz]mm/mem,[xyz]mm{k} */ |
4719 | 3 | case 6: /* vpsllq $imm8,[xyz]mm/mem,[xyz]mm{k} */ |
4720 | 3 | generate_exception_if(!evex.w, X86_EXC_UD); |
4721 | 2 | goto avx512f_shift_imm; |
4722 | 2 | case 3: /* vpsrldq $imm8,[xyz]mm/mem,[xyz]mm */ |
4723 | 2 | case 7: /* vpslldq $imm8,[xyz]mm/mem,[xyz]mm */ |
4724 | 2 | generate_exception_if(evex.opmsk, X86_EXC_UD); |
4725 | 1 | goto avx512bw_shift_imm; |
4726 | 6 | } |
4727 | 1 | goto unrecognized_insn; |
4728 | | |
4729 | 1 | #endif /* !X86EMUL_NO_SIMD */ |
4730 | | |
4731 | 1 | #ifndef X86EMUL_NO_MMX |
4732 | 207 | case X86EMUL_OPC(0x0f, 0x77): /* emms */ |
4733 | 207 | #endif |
4734 | 207 | #ifndef X86EMUL_NO_SIMD |
4735 | 690 | case X86EMUL_OPC_VEX(0x0f, 0x77): /* vzero{all,upper} */ |
4736 | 690 | if ( vex.opcx != vex_none ) |
4737 | 483 | { |
4738 | 483 | generate_exception_if(vex.reg != 0xf, X86_EXC_UD); |
4739 | 482 | host_and_vcpu_must_have(avx); |
4740 | 482 | get_fpu(X86EMUL_FPU_ymm); |
4741 | | |
4742 | 481 | #ifdef __x86_64__ |
4743 | 481 | if ( !mode_64bit() ) |
4744 | 285 | { |
4745 | | /* |
4746 | | * Can't use the actual instructions here, as we must not |
4747 | | * touch YMM8...YMM15. |
4748 | | */ |
4749 | 285 | if ( vex.l ) |
4750 | 76 | { |
4751 | | /* vpxor %xmmN, %xmmN, %xmmN */ |
4752 | 76 | asm volatile ( ".byte 0xc5,0xf9,0xef,0xc0" ); |
4753 | 76 | asm volatile ( ".byte 0xc5,0xf1,0xef,0xc9" ); |
4754 | 76 | asm volatile ( ".byte 0xc5,0xe9,0xef,0xd2" ); |
4755 | 76 | asm volatile ( ".byte 0xc5,0xe1,0xef,0xdb" ); |
4756 | 76 | asm volatile ( ".byte 0xc5,0xd9,0xef,0xe4" ); |
4757 | 76 | asm volatile ( ".byte 0xc5,0xd1,0xef,0xed" ); |
4758 | 76 | asm volatile ( ".byte 0xc5,0xc9,0xef,0xf6" ); |
4759 | 76 | asm volatile ( ".byte 0xc5,0xc1,0xef,0xff" ); |
4760 | 76 | } |
4761 | 209 | else |
4762 | 209 | { |
4763 | | /* vpor %xmmN, %xmmN, %xmmN */ |
4764 | 209 | asm volatile ( ".byte 0xc5,0xf9,0xeb,0xc0" ); |
4765 | 209 | asm volatile ( ".byte 0xc5,0xf1,0xeb,0xc9" ); |
4766 | 209 | asm volatile ( ".byte 0xc5,0xe9,0xeb,0xd2" ); |
4767 | 209 | asm volatile ( ".byte 0xc5,0xe1,0xeb,0xdb" ); |
4768 | 209 | asm volatile ( ".byte 0xc5,0xd9,0xeb,0xe4" ); |
4769 | 209 | asm volatile ( ".byte 0xc5,0xd1,0xeb,0xed" ); |
4770 | 209 | asm volatile ( ".byte 0xc5,0xc9,0xeb,0xf6" ); |
4771 | 209 | asm volatile ( ".byte 0xc5,0xc1,0xeb,0xff" ); |
4772 | 209 | } |
4773 | | |
4774 | 285 | ASSERT(!state->simd_size); |
4775 | 285 | break; |
4776 | 285 | } |
4777 | 481 | #endif |
4778 | 481 | } |
4779 | 207 | else |
4780 | 207 | #endif /* !X86EMUL_NO_SIMD */ |
4781 | 207 | { |
4782 | 207 | host_and_vcpu_must_have(mmx); |
4783 | 207 | get_fpu(X86EMUL_FPU_mmx); |
4784 | 207 | } |
4785 | | |
4786 | | /* Work around erratum BT36. */ |
4787 | 402 | vex.w = 0; |
4788 | | |
4789 | 402 | opc = init_prefixes(stub); |
4790 | 0 | opc[0] = b; |
4791 | 402 | insn_bytes = PFX_BYTES + 1; |
4792 | 402 | goto simd_0f_reg_only; |
4793 | | |
4794 | 0 | #ifndef X86EMUL_NO_SIMD |
4795 | | |
4796 | 2 | case X86EMUL_OPC_66(0x0f, 0x78): /* Grp17 */ |
4797 | 2 | switch ( modrm_reg & 7 ) |
4798 | 2 | { |
4799 | 1 | case 0: /* extrq $imm8,$imm8,xmm */ |
4800 | 1 | break; |
4801 | 1 | default: |
4802 | 1 | goto unrecognized_insn; |
4803 | 2 | } |
4804 | | /* fall through */ |
4805 | 2 | case X86EMUL_OPC_F2(0x0f, 0x78): /* insertq $imm8,$imm8,xmm,xmm */ |
4806 | 2 | generate_exception_if(ea.type != OP_REG, X86_EXC_UD); |
4807 | | |
4808 | 1 | host_and_vcpu_must_have(sse4a); |
4809 | 1 | get_fpu(X86EMUL_FPU_xmm); |
4810 | | |
4811 | 1 | opc = init_prefixes(stub); |
4812 | 0 | opc[0] = b; |
4813 | 1 | opc[1] = modrm; |
4814 | 1 | opc[2] = imm1; |
4815 | 1 | opc[3] = imm2; |
4816 | 1 | insn_bytes = PFX_BYTES + 4; |
4817 | 1 | goto simd_0f_reg_only; |
4818 | | |
4819 | 2 | case X86EMUL_OPC_66(0x0f, 0x79): /* extrq xmm,xmm */ |
4820 | 3 | case X86EMUL_OPC_F2(0x0f, 0x79): /* insertq xmm,xmm */ |
4821 | 3 | generate_exception_if(ea.type != OP_REG, X86_EXC_UD); |
4822 | 1 | host_and_vcpu_must_have(sse4a); |
4823 | 1 | op_bytes = 8; |
4824 | 1 | goto simd_0f_xmm; |
4825 | | |
4826 | 2 | case X86EMUL_OPC_EVEX_66(0x0f, 0xe6): /* vcvttpd2dq [xyz]mm/mem,{x,y}mm{k} */ |
4827 | 3 | case X86EMUL_OPC_EVEX_F2(0x0f, 0xe6): /* vcvtpd2dq [xyz]mm/mem,{x,y}mm{k} */ |
4828 | 3 | generate_exception_if(!evex.w, X86_EXC_UD); |
4829 | | /* fall through */ |
4830 | 4 | case X86EMUL_OPC_EVEX_F3(0x0f, 0x7a): /* vcvtudq2pd {x,y}mm/mem,[xyz]mm{k} */ |
4831 | | /* vcvtuqq2pd [xyz]mm/mem,[xyz]mm{k} */ |
4832 | 7 | case X86EMUL_OPC_EVEX_F3(0x0f, 0xe6): /* vcvtdq2pd {x,y}mm/mem,[xyz]mm{k} */ |
4833 | | /* vcvtqq2pd [xyz]mm/mem,[xyz]mm{k} */ |
4834 | 7 | if ( evex.pfx != vex_f3 ) |
4835 | 7 | host_and_vcpu_must_have(avx512f); |
4836 | 6 | else if ( evex.w ) |
4837 | 2 | { |
4838 | 3 | case X86EMUL_OPC_EVEX_66(0x0f, 0x78): /* vcvttps2uqq {x,y}mm/mem,[xyz]mm{k} */ |
4839 | | /* vcvttpd2uqq [xyz]mm/mem,[xyz]mm{k} */ |
4840 | 4 | case X86EMUL_OPC_EVEX_66(0x0f, 0x79): /* vcvtps2uqq {x,y}mm/mem,[xyz]mm{k} */ |
4841 | | /* vcvtpd2uqq [xyz]mm/mem,[xyz]mm{k} */ |
4842 | 5 | case X86EMUL_OPC_EVEX_66(0x0f, 0x7a): /* vcvttps2qq {x,y}mm/mem,[xyz]mm{k} */ |
4843 | | /* vcvttpd2qq [xyz]mm/mem,[xyz]mm{k} */ |
4844 | 7 | case X86EMUL_OPC_EVEX_66(0x0f, 0x7b): /* vcvtps2qq {x,y}mm/mem,[xyz]mm{k} */ |
4845 | | /* vcvtpd2qq [xyz]mm/mem,[xyz]mm{k} */ |
4846 | 7 | host_and_vcpu_must_have(avx512dq); |
4847 | 7 | } |
4848 | 4 | else |
4849 | 4 | { |
4850 | 4 | host_and_vcpu_must_have(avx512f); |
4851 | | /* |
4852 | | * While SDM version 085 has explicit wording towards embedded |
4853 | | * rounding being ignored, it's still not entirely unambiguous with |
4854 | | * the exception type referred to. Be on the safe side for the stub. |
4855 | | */ |
4856 | 0 | if ( ea.type != OP_MEM && evex.brs ) |
4857 | 0 | { |
4858 | 0 | evex.brs = 0; |
4859 | 0 | evex.lr = 2; |
4860 | 0 | } |
4861 | 0 | } |
4862 | 0 | if ( ea.type != OP_REG || !evex.brs ) |
4863 | 0 | avx512_vlen_check(false); |
4864 | 0 | d |= TwoOp; |
4865 | 0 | op_bytes = 8 << (evex.w + evex.lr); |
4866 | 0 | goto simd_zmm; |
4867 | | |
4868 | 64 | case X86EMUL_OPC_F2(0x0f, 0xf0): /* lddqu m128,xmm */ |
4869 | 122 | case X86EMUL_OPC_VEX_F2(0x0f, 0xf0): /* vlddqu mem,{x,y}mm */ |
4870 | 122 | generate_exception_if(ea.type != OP_MEM, X86_EXC_UD); |
4871 | | /* fall through */ |
4872 | 188 | case X86EMUL_OPC_66(0x0f, 0x7c): /* haddpd xmm/m128,xmm */ |
4873 | 254 | case X86EMUL_OPC_F2(0x0f, 0x7c): /* haddps xmm/m128,xmm */ |
4874 | 342 | case X86EMUL_OPC_VEX_66(0x0f, 0x7c): /* vhaddpd {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4875 | 536 | case X86EMUL_OPC_VEX_F2(0x0f, 0x7c): /* vhaddps {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4876 | 698 | case X86EMUL_OPC_66(0x0f, 0x7d): /* hsubpd xmm/m128,xmm */ |
4877 | 764 | case X86EMUL_OPC_F2(0x0f, 0x7d): /* hsubps xmm/m128,xmm */ |
4878 | 1.08k | case X86EMUL_OPC_VEX_66(0x0f, 0x7d): /* vhsubpd {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4879 | 1.18k | case X86EMUL_OPC_VEX_F2(0x0f, 0x7d): /* vhsubps {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4880 | 1.37k | case X86EMUL_OPC_66(0x0f, 0xd0): /* addsubpd xmm/m128,xmm */ |
4881 | 1.57k | case X86EMUL_OPC_F2(0x0f, 0xd0): /* addsubps xmm/m128,xmm */ |
4882 | 1.76k | case X86EMUL_OPC_VEX_66(0x0f, 0xd0): /* vaddsubpd {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4883 | 1.83k | case X86EMUL_OPC_VEX_F2(0x0f, 0xd0): /* vaddsubps {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
4884 | 1.83k | op_bytes = 16 << vex.l; |
4885 | 1.83k | goto simd_0f_sse3_avx; |
4886 | | |
4887 | 66 | case X86EMUL_OPC_F3(0x0f, 0x7e): /* movq xmm/m64,xmm */ |
4888 | 132 | case X86EMUL_OPC_VEX_F3(0x0f, 0x7e): /* vmovq xmm/m64,xmm */ |
4889 | 132 | generate_exception_if(vex.l, X86_EXC_UD); |
4890 | 131 | op_bytes = 8; |
4891 | 131 | goto simd_0f_int; |
4892 | | |
4893 | 1 | case X86EMUL_OPC_EVEX_F3(0x0f, 0x7e): /* vmovq xmm/m64,xmm */ |
4894 | 5 | case X86EMUL_OPC_EVEX_66(0x0f, 0xd6): /* vmovq xmm,xmm/m64 */ |
4895 | 5 | generate_exception_if(evex.lr || !evex.w || evex.opmsk || evex.brs, |
4896 | 5 | X86_EXC_UD); |
4897 | 1 | host_and_vcpu_must_have(avx512f); |
4898 | 0 | d |= TwoOp; |
4899 | 0 | op_bytes = 8; |
4900 | 0 | goto simd_zmm; |
4901 | | |
4902 | 0 | #endif /* !X86EMUL_NO_SIMD */ |
4903 | | |
4904 | 2.21k | case X86EMUL_OPC(0x0f, 0x80) ... X86EMUL_OPC(0x0f, 0x8f): /* jcc (near) */ |
4905 | 2.21k | if ( test_cc(b, _regs.eflags) ) |
4906 | 1.35k | jmp_rel((int32_t)src.val); |
4907 | 2.19k | adjust_bnd(ctxt, ops, vex.pfx); |
4908 | 2.19k | break; |
4909 | | |
4910 | 2.43k | case X86EMUL_OPC(0x0f, 0x90) ... X86EMUL_OPC(0x0f, 0x9f): /* setcc */ |
4911 | 2.43k | dst.val = test_cc(b, _regs.eflags); |
4912 | 2.43k | break; |
4913 | | |
4914 | 0 | #ifndef X86EMUL_NO_SIMD |
4915 | | |
4916 | 1 | case X86EMUL_OPC_VEX(0x0f, 0x91): /* kmov{w,q} k,mem */ |
4917 | 3 | case X86EMUL_OPC_VEX_66(0x0f, 0x91): /* kmov{b,d} k,mem */ |
4918 | 3 | generate_exception_if(ea.type != OP_MEM, X86_EXC_UD); |
4919 | | /* fall through */ |
4920 | 3 | case X86EMUL_OPC_VEX(0x0f, 0x90): /* kmov{w,q} k/mem,k */ |
4921 | 4 | case X86EMUL_OPC_VEX_66(0x0f, 0x90): /* kmov{b,d} k/mem,k */ |
4922 | 4 | generate_exception_if(vex.l || !vex.r, X86_EXC_UD); |
4923 | 1 | host_and_vcpu_must_have(avx512f); |
4924 | 0 | if ( vex.w ) |
4925 | 0 | { |
4926 | 0 | host_and_vcpu_must_have(avx512bw); |
4927 | 0 | op_bytes = 4 << !vex.pfx; |
4928 | 0 | } |
4929 | 0 | else if ( vex.pfx ) |
4930 | 0 | { |
4931 | 0 | host_and_vcpu_must_have(avx512dq); |
4932 | 0 | op_bytes = 1; |
4933 | 0 | } |
4934 | 0 | else |
4935 | 0 | op_bytes = 2; |
4936 | | |
4937 | 0 | get_fpu(X86EMUL_FPU_opmask); |
4938 | | |
4939 | 0 | opc = init_prefixes(stub); |
4940 | 0 | opc[0] = b; |
4941 | 0 | opc[1] = modrm; |
4942 | 0 | if ( ea.type == OP_MEM ) |
4943 | 0 | { |
4944 | | /* convert memory operand to (%rAX) */ |
4945 | 0 | vex.b = 1; |
4946 | 0 | opc[1] &= 0x38; |
4947 | 0 | } |
4948 | 0 | insn_bytes = PFX_BYTES + 2; |
4949 | 0 | break; |
4950 | | |
4951 | 1 | case X86EMUL_OPC_VEX(0x0f, 0x92): /* kmovw r32,k */ |
4952 | 4 | case X86EMUL_OPC_VEX_66(0x0f, 0x92): /* kmovb r32,k */ |
4953 | 7 | case X86EMUL_OPC_VEX_F2(0x0f, 0x92): /* kmov{d,q} reg,k */ |
4954 | 7 | generate_exception_if(vex.l || !vex.r || vex.reg != 0xf || |
4955 | 7 | ea.type != OP_REG, X86_EXC_UD); |
4956 | | |
4957 | 1 | host_and_vcpu_must_have(avx512f); |
4958 | 0 | if ( vex.pfx == vex_f2 ) |
4959 | 0 | host_and_vcpu_must_have(avx512bw); |
4960 | 0 | else |
4961 | 0 | { |
4962 | 0 | generate_exception_if(vex.w, X86_EXC_UD); |
4963 | 0 | if ( vex.pfx ) |
4964 | 0 | host_and_vcpu_must_have(avx512dq); |
4965 | 0 | } |
4966 | | |
4967 | 0 | get_fpu(X86EMUL_FPU_opmask); |
4968 | | |
4969 | 0 | opc = init_prefixes(stub); |
4970 | 0 | opc[0] = b; |
4971 | | /* Convert GPR source to %rAX. */ |
4972 | 0 | vex.b = 1; |
4973 | 0 | if ( !mode_64bit() ) |
4974 | 0 | vex.w = 0; |
4975 | 0 | opc[1] = modrm & 0xf8; |
4976 | 0 | opc[2] = 0xc3; |
4977 | |
|
4978 | 0 | copy_VEX(opc, vex); |
4979 | 0 | ea.reg = decode_gpr(&_regs, modrm_rm); |
4980 | 0 | invoke_stub("", "", "=m" (dummy) : "a" (*ea.reg)); |
4981 | |
|
4982 | 0 | put_stub(stub); |
4983 | |
|
4984 | 0 | ASSERT(!state->simd_size); |
4985 | 0 | dst.type = OP_NONE; |
4986 | 0 | break; |
4987 | | |
4988 | 1 | case X86EMUL_OPC_VEX(0x0f, 0x93): /* kmovw k,r32 */ |
4989 | 5 | case X86EMUL_OPC_VEX_66(0x0f, 0x93): /* kmovb k,r32 */ |
4990 | 8 | case X86EMUL_OPC_VEX_F2(0x0f, 0x93): /* kmov{d,q} k,reg */ |
4991 | 8 | generate_exception_if(vex.l || vex.reg != 0xf || ea.type != OP_REG, |
4992 | 8 | X86_EXC_UD); |
4993 | 1 | dst = ea; |
4994 | 1 | dst.reg = decode_gpr(&_regs, modrm_reg); |
4995 | | |
4996 | 1 | host_and_vcpu_must_have(avx512f); |
4997 | 0 | if ( vex.pfx == vex_f2 ) |
4998 | 0 | { |
4999 | 0 | host_and_vcpu_must_have(avx512bw); |
5000 | 0 | dst.bytes = 4 << (mode_64bit() && vex.w); |
5001 | 0 | } |
5002 | 0 | else |
5003 | 0 | { |
5004 | 0 | generate_exception_if(vex.w, X86_EXC_UD); |
5005 | 0 | dst.bytes = 4; |
5006 | 0 | if ( vex.pfx ) |
5007 | 0 | host_and_vcpu_must_have(avx512dq); |
5008 | 0 | } |
5009 | | |
5010 | 0 | get_fpu(X86EMUL_FPU_opmask); |
5011 | | |
5012 | 0 | opc = init_prefixes(stub); |
5013 | 0 | opc[0] = b; |
5014 | | /* Convert GPR destination to %rAX. */ |
5015 | 0 | vex.r = 1; |
5016 | 0 | if ( !mode_64bit() ) |
5017 | 0 | vex.w = 0; |
5018 | 0 | opc[1] = modrm & 0xc7; |
5019 | 0 | opc[2] = 0xc3; |
5020 | |
|
5021 | 0 | copy_VEX(opc, vex); |
5022 | 0 | invoke_stub("", "", "=a" (dst.val) : [dummy] "i" (0)); |
5023 | |
|
5024 | 0 | put_stub(stub); |
5025 | |
|
5026 | 0 | ASSERT(!state->simd_size); |
5027 | 0 | break; |
5028 | | |
5029 | 9 | case X86EMUL_OPC_VEX(0x0f, 0x99): /* ktest{w,q} k,k */ |
5030 | 9 | if ( !vex.w ) |
5031 | 9 | host_and_vcpu_must_have(avx512dq); |
5032 | | /* fall through */ |
5033 | 5 | case X86EMUL_OPC_VEX(0x0f, 0x98): /* kortest{w,q} k,k */ |
5034 | 11 | case X86EMUL_OPC_VEX_66(0x0f, 0x98): /* kortest{b,d} k,k */ |
5035 | 12 | case X86EMUL_OPC_VEX_66(0x0f, 0x99): /* ktest{b,d} k,k */ |
5036 | 12 | generate_exception_if(vex.l || !vex.r || vex.reg != 0xf || |
5037 | 12 | ea.type != OP_REG, X86_EXC_UD); |
5038 | 1 | host_and_vcpu_must_have(avx512f); |
5039 | 0 | if ( vex.w ) |
5040 | 0 | host_and_vcpu_must_have(avx512bw); |
5041 | 0 | else if ( vex.pfx ) |
5042 | 0 | host_and_vcpu_must_have(avx512dq); |
5043 | | |
5044 | 0 | get_fpu(X86EMUL_FPU_opmask); |
5045 | | |
5046 | 0 | opc = init_prefixes(stub); |
5047 | 0 | opc[0] = b; |
5048 | 0 | opc[1] = modrm; |
5049 | 0 | opc[2] = 0xc3; |
5050 | |
|
5051 | 0 | copy_VEX(opc, vex); |
5052 | 0 | _regs.eflags &= ~EFLAGS_MASK; |
5053 | 0 | invoke_stub("", |
5054 | 0 | _POST_EFLAGS("[eflags]", "[mask]", "[tmp]"), |
5055 | 0 | [eflags] "+g" (_regs.eflags), |
5056 | 0 | "=a" (dst.val), [tmp] "=&r" (dummy) |
5057 | 0 | : [mask] "i" (EFLAGS_MASK)); |
5058 | |
|
5059 | 0 | put_stub(stub); |
5060 | |
|
5061 | 0 | ASSERT(!state->simd_size); |
5062 | 0 | dst.type = OP_NONE; |
5063 | 0 | break; |
5064 | | |
5065 | 0 | #endif /* !X86EMUL_NO_SIMD */ |
5066 | | |
5067 | 1.41k | case X86EMUL_OPC(0x0f, 0xa2): /* cpuid */ |
5068 | 1.41k | msr_val = 0; |
5069 | 1.41k | fail_if(ops->cpuid == NULL); |
5070 | | |
5071 | | /* Speculatively read MSR_INTEL_MISC_FEATURES_ENABLES. */ |
5072 | 1.41k | if ( ops->read_msr && !mode_ring0() && |
5073 | 1.41k | (rc = ops->read_msr(MSR_INTEL_MISC_FEATURES_ENABLES, |
5074 | 470 | &msr_val, ctxt)) == X86EMUL_EXCEPTION ) |
5075 | 470 | { |
5076 | | /* Not implemented. Squash the exception and proceed normally. */ |
5077 | 470 | x86_emul_reset_event(ctxt); |
5078 | 470 | rc = X86EMUL_OKAY; |
5079 | 470 | } |
5080 | 1.41k | if ( rc != X86EMUL_OKAY ) |
5081 | 0 | goto done; |
5082 | | |
5083 | 1.41k | generate_exception_if((msr_val & MSR_MISC_FEATURES_CPUID_FAULTING), |
5084 | 1.41k | X86_EXC_GP, 0); /* Faulting active? (Inc. CPL test) */ |
5085 | | |
5086 | 1.41k | rc = ops->cpuid(_regs.eax, _regs.ecx, &leaf, ctxt); |
5087 | 1.41k | if ( rc != X86EMUL_OKAY ) |
5088 | 0 | goto done; |
5089 | 1.41k | _regs.r(ax) = leaf.a; |
5090 | 1.41k | _regs.r(bx) = leaf.b; |
5091 | 1.41k | _regs.r(cx) = leaf.c; |
5092 | 1.41k | _regs.r(dx) = leaf.d; |
5093 | 1.41k | break; |
5094 | | |
5095 | 1.00k | case X86EMUL_OPC(0x0f, 0xa3): bt: /* bt */ |
5096 | 1.00k | generate_exception_if(lock_prefix, X86_EXC_UD); |
5097 | | |
5098 | 1.00k | if ( ops->rmw && dst.type == OP_MEM && |
5099 | 1.00k | (rc = read_ulong(dst.mem.seg, dst.mem.off, &dst.val, |
5100 | 0 | dst.bytes, ctxt, ops)) != X86EMUL_OKAY ) |
5101 | 0 | goto done; |
5102 | | |
5103 | 1.00k | emulate_2op_SrcV_nobyte("bt", src, dst, _regs.eflags); |
5104 | 1.00k | dst.type = OP_NONE; |
5105 | 1.00k | break; |
5106 | | |
5107 | 535 | case X86EMUL_OPC(0x0f, 0xa4): /* shld imm8,r,r/m */ |
5108 | 752 | case X86EMUL_OPC(0x0f, 0xa5): /* shld %%cl,r,r/m */ |
5109 | 951 | case X86EMUL_OPC(0x0f, 0xac): /* shrd imm8,r,r/m */ |
5110 | 2.18k | case X86EMUL_OPC(0x0f, 0xad): /* shrd %%cl,r,r/m */ { |
5111 | 2.18k | uint8_t shift, width = dst.bytes << 3; |
5112 | | |
5113 | 2.18k | generate_exception_if(lock_prefix, X86_EXC_UD); |
5114 | | |
5115 | 2.18k | if ( b & 1 ) |
5116 | 1.44k | shift = _regs.cl; |
5117 | 734 | else |
5118 | 734 | { |
5119 | 734 | shift = src.val; |
5120 | 734 | src.reg = decode_gpr(&_regs, modrm_reg); |
5121 | 734 | src.val = truncate_word(*src.reg, dst.bytes); |
5122 | 734 | } |
5123 | | |
5124 | 2.18k | if ( ops->rmw && dst.type == OP_MEM ) |
5125 | 0 | { |
5126 | 0 | ea.orig_val = shift; |
5127 | 0 | state->rmw = b & 8 ? rmw_shrd : rmw_shld; |
5128 | 0 | break; |
5129 | 0 | } |
5130 | | |
5131 | 2.18k | if ( (shift &= width - 1) == 0 ) |
5132 | 277 | break; |
5133 | 1.90k | dst.orig_val = dst.val; |
5134 | 1.90k | dst.val = (b & 8) ? |
5135 | | /* shrd */ |
5136 | 1.16k | ((dst.orig_val >> shift) | |
5137 | 1.16k | truncate_word(src.val << (width - shift), dst.bytes)) : |
5138 | | /* shld */ |
5139 | 1.90k | (truncate_word(dst.orig_val << shift, dst.bytes) | |
5140 | 736 | (src.val >> (width - shift))); |
5141 | 1.90k | _regs.eflags &= ~(X86_EFLAGS_OF | X86_EFLAGS_SF | X86_EFLAGS_ZF | |
5142 | 1.90k | X86_EFLAGS_PF | X86_EFLAGS_CF); |
5143 | 1.90k | if ( (dst.orig_val >> ((b & 8) ? (shift - 1) : (width - shift))) & 1 ) |
5144 | 876 | _regs.eflags |= X86_EFLAGS_CF; |
5145 | 1.90k | if ( ((dst.val ^ dst.orig_val) >> (width - 1)) & 1 ) |
5146 | 515 | _regs.eflags |= X86_EFLAGS_OF; |
5147 | 1.90k | _regs.eflags |= ((dst.val >> (width - 1)) & 1) ? X86_EFLAGS_SF : 0; |
5148 | 1.90k | _regs.eflags |= (dst.val == 0) ? X86_EFLAGS_ZF : 0; |
5149 | 1.90k | _regs.eflags |= even_parity(dst.val) ? X86_EFLAGS_PF : 0; |
5150 | 1.90k | break; |
5151 | 2.18k | } |
5152 | | |
5153 | 786 | case X86EMUL_OPC(0x0f, 0xab): bts: /* bts */ |
5154 | 786 | if ( ops->rmw && dst.type == OP_MEM ) |
5155 | 0 | state->rmw = rmw_bts; |
5156 | 786 | else |
5157 | 786 | emulate_2op_SrcV_nobyte("bts", src, dst, _regs.eflags); |
5158 | 786 | break; |
5159 | | |
5160 | 846 | case X86EMUL_OPC(0x0f, 0xae): /* Grp15 */ |
5161 | 853 | case X86EMUL_OPC_66(0x0f, 0xae): |
5162 | 1.24k | case X86EMUL_OPC_F3(0x0f, 0xae): |
5163 | 1.24k | #ifndef X86EMUL_NO_SIMD |
5164 | 1.32k | case X86EMUL_OPC_VEX(0x0f, 0xae): |
5165 | 1.32k | #endif |
5166 | 1.32k | rc = x86emul_0fae(state, &_regs, &dst, &src, ctxt, ops, &fpu_type); |
5167 | 1.32k | goto dispatch_from_helper; |
5168 | | |
5169 | 558 | case X86EMUL_OPC(0x0f, 0xaf): /* imul */ |
5170 | 558 | emulate_2op_SrcV_srcmem("imul", src, dst, _regs.eflags); |
5171 | 558 | break; |
5172 | | |
5173 | 1.16k | case X86EMUL_OPC(0x0f, 0xb0): case X86EMUL_OPC(0x0f, 0xb1): /* cmpxchg */ |
5174 | 1.16k | fail_if(!ops->cmpxchg); |
5175 | | |
5176 | 1.16k | if ( ops->rmw && dst.type == OP_MEM && |
5177 | 1.16k | (rc = read_ulong(dst.mem.seg, dst.mem.off, &dst.val, |
5178 | 0 | dst.bytes, ctxt, ops)) != X86EMUL_OKAY ) |
5179 | 0 | goto done; |
5180 | | |
5181 | 1.16k | _regs.eflags &= ~EFLAGS_MASK; |
5182 | 1.16k | if ( !((dst.val ^ _regs.r(ax)) & |
5183 | 1.16k | (~0UL >> (8 * (sizeof(long) - dst.bytes)))) ) |
5184 | 525 | { |
5185 | | /* Success: write back to memory. */ |
5186 | 525 | if ( dst.type == OP_MEM ) |
5187 | 203 | { |
5188 | 203 | dst.val = _regs.r(ax); |
5189 | 203 | switch ( rc = ops->cmpxchg(dst.mem.seg, dst.mem.off, &dst.val, |
5190 | 203 | &src.val, dst.bytes, lock_prefix, |
5191 | 203 | ctxt) ) |
5192 | 203 | { |
5193 | 200 | case X86EMUL_OKAY: |
5194 | 200 | dst.type = OP_NONE; |
5195 | 200 | _regs.eflags |= X86_EFLAGS_ZF | X86_EFLAGS_PF; |
5196 | 200 | break; |
5197 | 0 | case X86EMUL_CMPXCHG_FAILED: |
5198 | 0 | rc = X86EMUL_OKAY; |
5199 | 0 | break; |
5200 | 3 | default: |
5201 | 3 | goto done; |
5202 | 203 | } |
5203 | 203 | } |
5204 | 322 | else |
5205 | 322 | { |
5206 | 322 | dst.val = src.val; |
5207 | 322 | _regs.eflags |= X86_EFLAGS_ZF | X86_EFLAGS_PF; |
5208 | 322 | } |
5209 | 525 | } |
5210 | 1.16k | if ( !(_regs.eflags & X86_EFLAGS_ZF) ) |
5211 | 639 | { |
5212 | | /* Failure: write the value we saw to EAX. */ |
5213 | 639 | dst.type = OP_REG; |
5214 | 639 | dst.reg = (unsigned long *)&_regs.r(ax); |
5215 | | /* cmp: %%eax - dst ==> dst and src swapped for macro invocation */ |
5216 | 639 | src.val = _regs.r(ax); |
5217 | 639 | emulate_2op_SrcV("cmp", dst, src, _regs.eflags); |
5218 | 639 | ASSERT(!(_regs.eflags & X86_EFLAGS_ZF)); |
5219 | 639 | } |
5220 | 1.16k | break; |
5221 | | |
5222 | 1.16k | case X86EMUL_OPC(0x0f, 0xb2): /* lss */ |
5223 | 272 | case X86EMUL_OPC(0x0f, 0xb4): /* lfs */ |
5224 | 474 | case X86EMUL_OPC(0x0f, 0xb5): /* lgs */ |
5225 | 474 | seg = b & 7; |
5226 | 474 | goto les; |
5227 | | |
5228 | 957 | case X86EMUL_OPC(0x0f, 0xb3): btr: /* btr */ |
5229 | 957 | if ( ops->rmw && dst.type == OP_MEM ) |
5230 | 0 | state->rmw = rmw_btr; |
5231 | 957 | else |
5232 | 957 | emulate_2op_SrcV_nobyte("btr", src, dst, _regs.eflags); |
5233 | 957 | break; |
5234 | | |
5235 | 957 | case X86EMUL_OPC(0x0f, 0xb6): /* movzx rm8,r{16,32,64} */ |
5236 | | /* Recompute DstReg as we may have decoded AH/BH/CH/DH. */ |
5237 | 67 | dst.reg = decode_gpr(&_regs, modrm_reg); |
5238 | 67 | dst.bytes = op_bytes; |
5239 | 67 | dst.val = (uint8_t)src.val; |
5240 | 67 | break; |
5241 | | |
5242 | 186 | case X86EMUL_OPC(0x0f, 0xb7): /* movzx rm16,r{16,32,64} */ |
5243 | 186 | dst.val = (uint16_t)src.val; |
5244 | 186 | break; |
5245 | | |
5246 | 442 | case X86EMUL_OPC_F3(0x0f, 0xb8): /* popcnt r/m,r */ |
5247 | 442 | host_and_vcpu_must_have(popcnt); |
5248 | 442 | asm ( "popcnt %1,%0" : "=r" (dst.val) : "rm" (src.val) ); |
5249 | 442 | _regs.eflags &= ~EFLAGS_MASK; |
5250 | 442 | if ( !dst.val ) |
5251 | 202 | _regs.eflags |= X86_EFLAGS_ZF; |
5252 | 442 | break; |
5253 | | |
5254 | 772 | case X86EMUL_OPC(0x0f, 0xba): /* Grp8 */ |
5255 | 772 | switch ( modrm_reg & 7 ) |
5256 | 772 | { |
5257 | 198 | case 4: goto bt; |
5258 | 200 | case 5: goto bts; |
5259 | 194 | case 6: goto btr; |
5260 | 179 | case 7: goto btc; |
5261 | 1 | default: generate_exception(X86_EXC_UD); |
5262 | 772 | } |
5263 | 0 | break; |
5264 | | |
5265 | 970 | case X86EMUL_OPC(0x0f, 0xbb): btc: /* btc */ |
5266 | 970 | if ( ops->rmw && dst.type == OP_MEM ) |
5267 | 0 | state->rmw = rmw_btc; |
5268 | 970 | else |
5269 | 970 | emulate_2op_SrcV_nobyte("btc", src, dst, _regs.eflags); |
5270 | 970 | break; |
5271 | | |
5272 | 998 | case X86EMUL_OPC(0x0f, 0xbc): /* bsf or tzcnt */ |
5273 | 998 | { |
5274 | 998 | bool zf; |
5275 | | |
5276 | 998 | asm ( "bsf %2,%0" ASM_FLAG_OUT(, "; setz %1") |
5277 | 998 | : "=r" (dst.val), ASM_FLAG_OUT("=@ccz", "=qm") (zf) |
5278 | 998 | : "rm" (src.val) ); |
5279 | 998 | _regs.eflags &= ~X86_EFLAGS_ZF; |
5280 | 998 | if ( (vex.pfx == vex_f3) && vcpu_has_bmi1() ) |
5281 | 504 | { |
5282 | 504 | _regs.eflags &= ~X86_EFLAGS_CF; |
5283 | 504 | if ( zf ) |
5284 | 222 | { |
5285 | 222 | _regs.eflags |= X86_EFLAGS_CF; |
5286 | 222 | dst.val = op_bytes * 8; |
5287 | 222 | } |
5288 | 282 | else if ( !dst.val ) |
5289 | 197 | _regs.eflags |= X86_EFLAGS_ZF; |
5290 | 504 | } |
5291 | 494 | else if ( zf ) |
5292 | 236 | { |
5293 | 236 | _regs.eflags |= X86_EFLAGS_ZF; |
5294 | 236 | dst.type = OP_NONE; |
5295 | 236 | } |
5296 | 998 | break; |
5297 | 970 | } |
5298 | | |
5299 | 1.04k | case X86EMUL_OPC(0x0f, 0xbd): /* bsr or lzcnt */ |
5300 | 1.04k | { |
5301 | 1.04k | bool zf; |
5302 | | |
5303 | 1.04k | asm ( "bsr %2,%0" ASM_FLAG_OUT(, "; setz %1") |
5304 | 1.04k | : "=r" (dst.val), ASM_FLAG_OUT("=@ccz", "=qm") (zf) |
5305 | 1.04k | : "rm" (src.val) ); |
5306 | 1.04k | _regs.eflags &= ~X86_EFLAGS_ZF; |
5307 | 1.04k | if ( (vex.pfx == vex_f3) && vcpu_has_lzcnt() ) |
5308 | 611 | { |
5309 | 611 | _regs.eflags &= ~X86_EFLAGS_CF; |
5310 | 611 | if ( zf ) |
5311 | 212 | { |
5312 | 212 | _regs.eflags |= X86_EFLAGS_CF; |
5313 | 212 | dst.val = op_bytes * 8; |
5314 | 212 | } |
5315 | 399 | else |
5316 | 399 | { |
5317 | 399 | dst.val = op_bytes * 8 - 1 - dst.val; |
5318 | 399 | if ( !dst.val ) |
5319 | 202 | _regs.eflags |= X86_EFLAGS_ZF; |
5320 | 399 | } |
5321 | 611 | } |
5322 | 434 | else if ( zf ) |
5323 | 218 | { |
5324 | 218 | _regs.eflags |= X86_EFLAGS_ZF; |
5325 | 218 | dst.type = OP_NONE; |
5326 | 218 | } |
5327 | 1.04k | break; |
5328 | 970 | } |
5329 | | |
5330 | 214 | case X86EMUL_OPC(0x0f, 0xbe): /* movsx rm8,r{16,32,64} */ |
5331 | | /* Recompute DstReg as we may have decoded AH/BH/CH/DH. */ |
5332 | 214 | dst.reg = decode_gpr(&_regs, modrm_reg); |
5333 | 214 | dst.bytes = op_bytes; |
5334 | 214 | dst.val = (int8_t)src.val; |
5335 | 214 | break; |
5336 | | |
5337 | 196 | case X86EMUL_OPC(0x0f, 0xbf): /* movsx rm16,r{16,32,64} */ |
5338 | 196 | dst.val = (int16_t)src.val; |
5339 | 196 | break; |
5340 | | |
5341 | 567 | case X86EMUL_OPC(0x0f, 0xc0): case X86EMUL_OPC(0x0f, 0xc1): /* xadd */ |
5342 | 567 | if ( ops->rmw && dst.type == OP_MEM ) |
5343 | 0 | { |
5344 | 0 | state->rmw = rmw_xadd; |
5345 | 0 | break; |
5346 | 0 | } |
5347 | | /* Write back the register source. */ |
5348 | 567 | switch ( dst.bytes ) |
5349 | 567 | { |
5350 | 66 | case 1: *(uint8_t *)src.reg = (uint8_t)dst.val; break; |
5351 | 194 | case 2: *(uint16_t *)src.reg = (uint16_t)dst.val; break; |
5352 | 113 | case 4: *src.reg = (uint32_t)dst.val; break; /* 64b reg: zero-extend */ |
5353 | 194 | case 8: *src.reg = dst.val; break; |
5354 | 567 | } |
5355 | 567 | goto add; |
5356 | | |
5357 | 5.52k | CASE_SIMD_ALL_FP_VEX(0x0f, 0xc2): /* cmp{p,s}{s,d} $imm8,xmm/mem,xmm */ |
5358 | | /* vcmp{p,s}{s,d} $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */ |
5359 | 5.71k | CASE_SIMD_PACKED_FP_VEX(0x0f, 0xc6): /* shufp{s,d} $imm8,xmm/mem,xmm */ |
5360 | | /* vshufp{s,d} $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */ |
5361 | 5.71k | d = (d & ~SrcMask) | SrcMem; |
5362 | 5.71k | if ( vex.opcx == vex_none ) |
5363 | 863 | { |
5364 | 863 | if ( vex.pfx & VEX_PREFIX_DOUBLE_MASK ) |
5365 | 450 | goto simd_0f_imm8_sse2; |
5366 | 413 | vcpu_must_have(sse); |
5367 | 413 | get_fpu(X86EMUL_FPU_xmm); |
5368 | 411 | goto simd_0f_imm8; |
5369 | 413 | } |
5370 | 664 | goto simd_0f_imm8_avx; |
5371 | | |
5372 | 664 | #ifndef X86EMUL_NO_SIMD |
5373 | | |
5374 | 664 | CASE_SIMD_ALL_FP(_EVEX, 0x0f, 0xc2): /* vcmp{p,s}{s,d} $imm8,[xyz]mm/mem,[xyz]mm,k{k} */ |
5375 | 20 | generate_exception_if((evex.w != (evex.pfx & VEX_PREFIX_DOUBLE_MASK) || |
5376 | 20 | (ea.type != OP_REG && evex.brs && |
5377 | 20 | (evex.pfx & VEX_PREFIX_SCALAR_MASK)) || |
5378 | 20 | !evex.r || !evex.R || evex.z), |
5379 | 20 | X86_EXC_UD); |
5380 | 1 | host_and_vcpu_must_have(avx512f); |
5381 | 0 | if ( ea.type != OP_REG || !evex.brs ) |
5382 | 0 | avx512_vlen_check(evex.pfx & VEX_PREFIX_SCALAR_MASK); |
5383 | 0 | simd_imm8_zmm: |
5384 | 0 | if ( (d & SrcMask) == SrcImmByte ) |
5385 | 0 | d = (d & ~SrcMask) | SrcMem; |
5386 | 0 | get_fpu(X86EMUL_FPU_zmm); |
5387 | 0 | opc = init_evex(stub); |
5388 | 0 | opc[0] = b; |
5389 | 0 | opc[1] = modrm; |
5390 | 0 | if ( ea.type == OP_MEM ) |
5391 | 0 | { |
5392 | | /* convert memory operand to (%rAX) */ |
5393 | 0 | evex.b = 1; |
5394 | 0 | opc[1] &= 0x38; |
5395 | 0 | } |
5396 | 0 | opc[2] = imm1; |
5397 | 0 | insn_bytes = EVEX_PFX_BYTES + 3; |
5398 | 0 | break; |
5399 | | |
5400 | 0 | #endif /* !X86EMUL_NO_SIMD */ |
5401 | | |
5402 | 194 | case X86EMUL_OPC(0x0f, 0xc3): /* movnti */ |
5403 | | /* Ignore the non-temporal hint for now. */ |
5404 | 194 | vcpu_must_have(sse2); |
5405 | 194 | dst.val = src.val; |
5406 | 194 | sfence = true; |
5407 | 194 | break; |
5408 | | |
5409 | 903 | CASE_SIMD_PACKED_INT_VEX(0x0f, 0xc4): /* pinsrw $imm8,r32/m16,{,x}mm */ |
5410 | | /* vpinsrw $imm8,r32/m16,xmm,xmm */ |
5411 | 903 | generate_exception_if(vex.l, X86_EXC_UD); |
5412 | 441 | memcpy(mmvalp, &src.val, 2); |
5413 | 441 | ea.type = OP_MEM; |
5414 | 441 | state->simd_size = simd_other; |
5415 | 441 | goto simd_0f_int_imm8; |
5416 | | |
5417 | 0 | #ifndef X86EMUL_NO_SIMD |
5418 | | |
5419 | 2 | case X86EMUL_OPC_EVEX_66(0x0f, 0xc4): /* vpinsrw $imm8,r32/m16,xmm,xmm */ |
5420 | 3 | case X86EMUL_OPC_EVEX_66(0x0f3a, 0x20): /* vpinsrb $imm8,r32/m8,xmm,xmm */ |
5421 | 6 | case X86EMUL_OPC_EVEX_66(0x0f3a, 0x22): /* vpinsr{d,q} $imm8,r/m,xmm,xmm */ |
5422 | 6 | generate_exception_if(evex.lr || evex.opmsk || evex.brs, X86_EXC_UD); |
5423 | 2 | if ( b & 2 ) |
5424 | 2 | host_and_vcpu_must_have(avx512dq); |
5425 | 1 | else |
5426 | 2 | host_and_vcpu_must_have(avx512bw); |
5427 | 0 | if ( !mode_64bit() ) |
5428 | 0 | evex.w = 0; |
5429 | 0 | memcpy(mmvalp, &src.val, src.bytes); |
5430 | 0 | ea.type = OP_MEM; |
5431 | 0 | d = SrcMem16; /* Fake for the common SIMD code below. */ |
5432 | 0 | state->simd_size = simd_other; |
5433 | 0 | goto avx512f_imm8_no_sae; |
5434 | | |
5435 | 0 | #endif /* !X86EMUL_NO_SIMD */ |
5436 | | |
5437 | 977 | CASE_SIMD_PACKED_INT_VEX(0x0f, 0xc5): /* pextrw $imm8,{,x}mm,reg */ |
5438 | | /* vpextrw $imm8,xmm,reg */ |
5439 | 977 | generate_exception_if(vex.l, X86_EXC_UD); |
5440 | 629 | opc = init_prefixes(stub); |
5441 | 0 | opc[0] = b; |
5442 | | /* Convert GPR destination to %rAX. */ |
5443 | 629 | rex_prefix &= ~REX_R; |
5444 | 629 | vex.r = 1; |
5445 | 629 | if ( !mode_64bit() ) |
5446 | 262 | vex.w = 0; |
5447 | 629 | opc[1] = modrm & 0xc7; |
5448 | 629 | opc[2] = imm1; |
5449 | 629 | insn_bytes = PFX_BYTES + 3; |
5450 | 629 | goto simd_0f_to_gpr; |
5451 | | |
5452 | 0 | #ifndef X86EMUL_NO_SIMD |
5453 | | |
5454 | 4 | CASE_SIMD_PACKED_FP(_EVEX, 0x0f, 0xc6): /* vshufp{s,d} $imm8,[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
5455 | 4 | generate_exception_if(evex.w != (evex.pfx & VEX_PREFIX_DOUBLE_MASK), |
5456 | 4 | X86_EXC_UD); |
5457 | | /* fall through */ |
5458 | 3 | case X86EMUL_OPC_EVEX_66(0x0f3a, 0x03): /* valign{d,q} $imm8,[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
5459 | 3 | fault_suppression = false; |
5460 | | /* fall through */ |
5461 | 4 | case X86EMUL_OPC_EVEX_66(0x0f3a, 0x25): /* vpternlog{d,q} $imm8,[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
5462 | 17 | avx512f_imm8_no_sae: |
5463 | 17 | host_and_vcpu_must_have(avx512f); |
5464 | 0 | generate_exception_if(ea.type != OP_MEM && evex.brs, X86_EXC_UD); |
5465 | 0 | avx512_vlen_check(false); |
5466 | 0 | goto simd_imm8_zmm; |
5467 | | |
5468 | 0 | #endif /* X86EMUL_NO_SIMD */ |
5469 | | |
5470 | 1.87k | case X86EMUL_OPC(0x0f, 0xc7): /* Grp9 */ |
5471 | 1.87k | rc = x86emul_0fc7(state, &_regs, &dst, ctxt, ops, mmvalp); |
5472 | 1.87k | goto dispatch_from_helper; |
5473 | | |
5474 | 1.56k | case X86EMUL_OPC(0x0f, 0xc8) ... X86EMUL_OPC(0x0f, 0xcf): /* bswap */ |
5475 | 1.56k | dst.type = OP_REG; |
5476 | 1.56k | dst.reg = decode_gpr(&_regs, (b & 7) | ((rex_prefix & 1) << 3)); |
5477 | 1.56k | switch ( dst.bytes = op_bytes ) |
5478 | 1.56k | { |
5479 | 694 | default: /* case 2: */ |
5480 | | /* Undefined behaviour. Writes zero on all tested CPUs. */ |
5481 | 694 | dst.val = 0; |
5482 | 694 | break; |
5483 | 494 | case 4: |
5484 | 494 | #ifdef __x86_64__ |
5485 | 494 | asm ( "bswap %k0" : "=r" (dst.val) : "0" (*(uint32_t *)dst.reg) ); |
5486 | 494 | break; |
5487 | 380 | case 8: |
5488 | 380 | #endif |
5489 | 380 | asm ( "bswap %0" : "=r" (dst.val) : "0" (*dst.reg) ); |
5490 | 380 | break; |
5491 | 1.56k | } |
5492 | 1.56k | break; |
5493 | | |
5494 | 1.56k | #ifndef X86EMUL_NO_SIMD |
5495 | | |
5496 | 1.56k | case X86EMUL_OPC_EVEX_66(0x0f, 0xd2): /* vpsrld xmm/m128,[xyz]mm,[xyz]mm{k} */ |
5497 | 3 | case X86EMUL_OPC_EVEX_66(0x0f, 0xd3): /* vpsrlq xmm/m128,[xyz]mm,[xyz]mm{k} */ |
5498 | 5 | case X86EMUL_OPC_EVEX_66(0x0f, 0xe2): /* vpsra{d,q} xmm/m128,[xyz]mm,[xyz]mm{k} */ |
5499 | 6 | case X86EMUL_OPC_EVEX_66(0x0f, 0xf2): /* vpslld xmm/m128,[xyz]mm,[xyz]mm{k} */ |
5500 | 7 | case X86EMUL_OPC_EVEX_66(0x0f, 0xf3): /* vpsllq xmm/m128,[xyz]mm,[xyz]mm{k} */ |
5501 | 7 | generate_exception_if(evex.brs, X86_EXC_UD); |
5502 | | /* fall through */ |
5503 | 5 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x0c): /* vpermilps [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
5504 | 6 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x0d): /* vpermilpd [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
5505 | 6 | fault_suppression = false; |
5506 | 6 | if ( b == 0xe2 ) |
5507 | 1 | goto avx512f_no_sae; |
5508 | | /* fall through */ |
5509 | 6 | case X86EMUL_OPC_EVEX_66(0x0f, 0xfa): /* vpsubd [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
5510 | 6 | case X86EMUL_OPC_EVEX_66(0x0f, 0xfb): /* vpsubq [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
5511 | 7 | case X86EMUL_OPC_EVEX_66(0x0f, 0xfe): /* vpaddd [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
5512 | 7 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x1e): /* vpabsd [xyz]mm/mem,[xyz]mm{k} */ |
5513 | 8 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x1f): /* vpabsq [xyz]mm/mem,[xyz]mm{k} */ |
5514 | 8 | generate_exception_if(evex.w != (b & 1), X86_EXC_UD); |
5515 | 5 | goto avx512f_no_sae; |
5516 | | |
5517 | 5 | #endif /* !X86EMUL_NO_SIMD */ |
5518 | 5 | #ifndef X86EMUL_NO_MMX |
5519 | | |
5520 | 380 | case X86EMUL_OPC(0x0f, 0xd4): /* paddq mm/m64,mm */ |
5521 | 465 | case X86EMUL_OPC(0x0f, 0xf4): /* pmuludq mm/m64,mm */ |
5522 | 534 | case X86EMUL_OPC(0x0f, 0xfb): /* psubq mm/m64,mm */ |
5523 | 534 | vcpu_must_have(sse2); |
5524 | 534 | goto simd_0f_mmx; |
5525 | | |
5526 | 534 | #endif /* !X86EMUL_NO_MMX */ |
5527 | 534 | #if !defined(X86EMUL_NO_MMX) && !defined(X86EMUL_NO_SIMD) |
5528 | | |
5529 | 534 | case X86EMUL_OPC_F3(0x0f, 0xd6): /* movq2dq mm,xmm */ |
5530 | 387 | case X86EMUL_OPC_F2(0x0f, 0xd6): /* movdq2q xmm,mm */ |
5531 | 387 | generate_exception_if(ea.type != OP_REG, X86_EXC_UD); |
5532 | 381 | op_bytes = 8; |
5533 | 381 | host_and_vcpu_must_have(mmx); |
5534 | 381 | goto simd_0f_int; |
5535 | | |
5536 | 381 | #endif /* !X86EMUL_NO_MMX && !X86EMUL_NO_SIMD */ |
5537 | 381 | #ifndef X86EMUL_NO_MMX |
5538 | | |
5539 | 381 | case X86EMUL_OPC(0x0f, 0xe7): /* movntq mm,m64 */ |
5540 | 225 | generate_exception_if(ea.type != OP_MEM, X86_EXC_UD); |
5541 | 224 | sfence = true; |
5542 | | /* fall through */ |
5543 | 347 | case X86EMUL_OPC(0x0f, 0xda): /* pminub mm/m64,mm */ |
5544 | 382 | case X86EMUL_OPC(0x0f, 0xde): /* pmaxub mm/m64,mm */ |
5545 | 709 | case X86EMUL_OPC(0x0f, 0xea): /* pminsw mm/m64,mm */ |
5546 | 942 | case X86EMUL_OPC(0x0f, 0xee): /* pmaxsw mm/m64,mm */ |
5547 | 1.13k | case X86EMUL_OPC(0x0f, 0xe0): /* pavgb mm/m64,mm */ |
5548 | 1.34k | case X86EMUL_OPC(0x0f, 0xe3): /* pavgw mm/m64,mm */ |
5549 | 1.53k | case X86EMUL_OPC(0x0f, 0xe4): /* pmulhuw mm/m64,mm */ |
5550 | 2.06k | case X86EMUL_OPC(0x0f, 0xf6): /* psadbw mm/m64,mm */ |
5551 | 2.06k | vcpu_must_have(mmxext); |
5552 | 2.06k | goto simd_0f_mmx; |
5553 | | |
5554 | 2.06k | #endif /* !X86EMUL_NO_MMX */ |
5555 | 2.06k | #ifndef X86EMUL_NO_SIMD |
5556 | | |
5557 | 2.06k | case X86EMUL_OPC_EVEX_66(0x0f, 0xda): /* vpminub [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
5558 | 2 | case X86EMUL_OPC_EVEX_66(0x0f, 0xde): /* vpmaxub [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
5559 | 3 | case X86EMUL_OPC_EVEX_66(0x0f, 0xe4): /* vpmulhuw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
5560 | 4 | case X86EMUL_OPC_EVEX_66(0x0f, 0xea): /* vpminsw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
5561 | 5 | case X86EMUL_OPC_EVEX_66(0x0f, 0xee): /* vpmaxsw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
5562 | 5 | host_and_vcpu_must_have(avx512bw); |
5563 | 0 | generate_exception_if(evex.brs, X86_EXC_UD); |
5564 | 0 | elem_bytes = b & 0x10 ? 1 : 2; |
5565 | 0 | goto avx512f_no_sae; |
5566 | | |
5567 | 101 | case X86EMUL_OPC_66(0x0f, 0xe6): /* cvttpd2dq xmm/mem,xmm */ |
5568 | 135 | case X86EMUL_OPC_VEX_66(0x0f, 0xe6): /* vcvttpd2dq {x,y}mm/mem,xmm */ |
5569 | 201 | case X86EMUL_OPC_F3(0x0f, 0xe6): /* cvtdq2pd xmm/mem,xmm */ |
5570 | 235 | case X86EMUL_OPC_VEX_F3(0x0f, 0xe6): /* vcvtdq2pd xmm/mem,{x,y}mm */ |
5571 | 301 | case X86EMUL_OPC_F2(0x0f, 0xe6): /* cvtpd2dq xmm/mem,xmm */ |
5572 | 335 | case X86EMUL_OPC_VEX_F2(0x0f, 0xe6): /* vcvtpd2dq {x,y}mm/mem,xmm */ |
5573 | 335 | d |= TwoOp; |
5574 | 335 | op_bytes = 8 << (!!(vex.pfx & VEX_PREFIX_DOUBLE_MASK) + vex.l); |
5575 | 335 | goto simd_0f_cvt; |
5576 | | |
5577 | 0 | #endif /* !X86EMUL_NO_SIMD */ |
5578 | | |
5579 | 1.19k | CASE_SIMD_PACKED_INT_VEX(0x0f, 0xf7): /* {,v}maskmov{q,dqu} {,x}mm,{,x}mm */ |
5580 | 1.19k | generate_exception_if(ea.type != OP_REG, X86_EXC_UD); |
5581 | 508 | if ( vex.opcx != vex_none ) |
5582 | 128 | { |
5583 | 128 | generate_exception_if(vex.l || vex.reg != 0xf, X86_EXC_UD); |
5584 | 126 | d |= TwoOp; |
5585 | 126 | host_and_vcpu_must_have(avx); |
5586 | 126 | get_fpu(X86EMUL_FPU_ymm); |
5587 | 126 | } |
5588 | 380 | else if ( vex.pfx ) |
5589 | 81 | { |
5590 | 81 | vcpu_must_have(sse2); |
5591 | 81 | get_fpu(X86EMUL_FPU_xmm); |
5592 | 81 | } |
5593 | 299 | else |
5594 | 299 | { |
5595 | 299 | host_and_vcpu_must_have(mmx); |
5596 | 299 | vcpu_must_have(mmxext); |
5597 | 299 | get_fpu(X86EMUL_FPU_mmx); |
5598 | 299 | } |
5599 | | |
5600 | | /* |
5601 | | * While we can't reasonably provide fully correct behavior here |
5602 | | * (in particular avoiding the memory read in anticipation of all |
5603 | | * bytes in the range eventually being written), we can (and should) |
5604 | | * still suppress the memory access if all mask bits are clear. Read |
5605 | | * the mask bits via {,v}pmovmskb for that purpose. |
5606 | | */ |
5607 | 503 | opc = init_prefixes(stub); |
5608 | 0 | opc[0] = 0xd7; /* {,v}pmovmskb */ |
5609 | | /* (Ab)use "sfence" for latching the original REX.R / VEX.R. */ |
5610 | 503 | sfence = rex_prefix & REX_R; |
5611 | | /* Convert GPR destination to %rAX. */ |
5612 | 503 | rex_prefix &= ~REX_R; |
5613 | 503 | vex.r = 1; |
5614 | 503 | if ( !mode_64bit() ) |
5615 | 208 | vex.w = 0; |
5616 | 503 | opc[1] = modrm & 0xc7; |
5617 | 503 | opc[2] = 0xc3; |
5618 | | |
5619 | 503 | copy_REX_VEX(opc, rex_prefix, vex); |
5620 | 503 | invoke_stub("", "", "=a" (ea.val) : [dummy] "i" (0)); |
5621 | | |
5622 | 503 | put_stub(stub); |
5623 | 503 | if ( !ea.val ) |
5624 | 14 | goto complete_insn; |
5625 | | |
5626 | 489 | opc = init_prefixes(stub); |
5627 | 0 | opc[0] = b; |
5628 | 489 | opc[1] = modrm; |
5629 | 489 | insn_bytes = PFX_BYTES + 2; |
5630 | | /* Restore high bit of XMM destination. */ |
5631 | 489 | if ( sfence ) |
5632 | 200 | { |
5633 | 200 | rex_prefix |= REX_R; |
5634 | 200 | vex.r = 0; |
5635 | 200 | } |
5636 | | |
5637 | 489 | ea.type = OP_MEM; |
5638 | 489 | ea.mem.off = truncate_ea(_regs.r(di)); |
5639 | 489 | sfence = true; |
5640 | 489 | break; |
5641 | | |
5642 | 508 | CASE_SIMD_PACKED_INT(0x0f38, 0x00): /* pshufb {,x}mm/mem,{,x}mm */ |
5643 | 1.04k | CASE_SIMD_PACKED_INT(0x0f38, 0x01): /* phaddw {,x}mm/mem,{,x}mm */ |
5644 | 1.82k | CASE_SIMD_PACKED_INT(0x0f38, 0x02): /* phaddd {,x}mm/mem,{,x}mm */ |
5645 | 2.08k | CASE_SIMD_PACKED_INT(0x0f38, 0x03): /* phaddsw {,x}mm/mem,{,x}mm */ |
5646 | 2.58k | CASE_SIMD_PACKED_INT(0x0f38, 0x04): /* pmaddubsw {,x}mm/mem,{,x}mm */ |
5647 | 3.33k | CASE_SIMD_PACKED_INT(0x0f38, 0x05): /* phsubw {,x}mm/mem,{,x}mm */ |
5648 | 4.75k | CASE_SIMD_PACKED_INT(0x0f38, 0x06): /* phsubd {,x}mm/mem,{,x}mm */ |
5649 | 5.32k | CASE_SIMD_PACKED_INT(0x0f38, 0x07): /* phsubsw {,x}mm/mem,{,x}mm */ |
5650 | 5.88k | CASE_SIMD_PACKED_INT(0x0f38, 0x08): /* psignb {,x}mm/mem,{,x}mm */ |
5651 | 6.59k | CASE_SIMD_PACKED_INT(0x0f38, 0x09): /* psignw {,x}mm/mem,{,x}mm */ |
5652 | 7.26k | CASE_SIMD_PACKED_INT(0x0f38, 0x0a): /* psignd {,x}mm/mem,{,x}mm */ |
5653 | 7.79k | CASE_SIMD_PACKED_INT(0x0f38, 0x0b): /* pmulhrsw {,x}mm/mem,{,x}mm */ |
5654 | 8.30k | CASE_SIMD_PACKED_INT(0x0f38, 0x1c): /* pabsb {,x}mm/mem,{,x}mm */ |
5655 | 8.56k | CASE_SIMD_PACKED_INT(0x0f38, 0x1d): /* pabsw {,x}mm/mem,{,x}mm */ |
5656 | 8.76k | CASE_SIMD_PACKED_INT(0x0f38, 0x1e): /* pabsd {,x}mm/mem,{,x}mm */ |
5657 | 8.76k | host_and_vcpu_must_have(ssse3); |
5658 | 4.41k | if ( vex.pfx ) |
5659 | 1.51k | { |
5660 | 5.47k | simd_0f38_common: |
5661 | 5.47k | get_fpu(X86EMUL_FPU_xmm); |
5662 | 5.47k | } |
5663 | 2.90k | else |
5664 | 2.90k | { |
5665 | 2.90k | host_and_vcpu_must_have(mmx); |
5666 | 2.90k | get_fpu(X86EMUL_FPU_mmx); |
5667 | 2.90k | } |
5668 | 8.34k | opc = init_prefixes(stub); |
5669 | 0 | opc[0] = 0x38; |
5670 | 8.34k | opc[1] = b; |
5671 | 8.34k | opc[2] = modrm; |
5672 | 8.34k | if ( ea.type == OP_MEM ) |
5673 | 4.50k | { |
5674 | | /* Convert memory operand to (%rAX). */ |
5675 | 4.50k | rex_prefix &= ~REX_B; |
5676 | 4.50k | vex.b = 1; |
5677 | 4.50k | opc[2] &= 0x38; |
5678 | 4.50k | } |
5679 | 8.34k | insn_bytes = PFX_BYTES + 3; |
5680 | 8.34k | break; |
5681 | | |
5682 | 0 | #ifndef X86EMUL_NO_SIMD |
5683 | | |
5684 | 198 | case X86EMUL_OPC_VEX_66(0x0f38, 0x19): /* vbroadcastsd xmm/m64,ymm */ |
5685 | 265 | case X86EMUL_OPC_VEX_66(0x0f38, 0x1a): /* vbroadcastf128 m128,ymm */ |
5686 | 265 | generate_exception_if(!vex.l, X86_EXC_UD); |
5687 | | /* fall through */ |
5688 | 331 | case X86EMUL_OPC_VEX_66(0x0f38, 0x18): /* vbroadcastss xmm/m32,{x,y}mm */ |
5689 | 331 | if ( ea.type != OP_MEM ) |
5690 | 195 | { |
5691 | 195 | generate_exception_if(b & 2, X86_EXC_UD); |
5692 | 194 | host_and_vcpu_must_have(avx2); |
5693 | 194 | } |
5694 | | /* fall through */ |
5695 | 364 | case X86EMUL_OPC_VEX_66(0x0f38, 0x0c): /* vpermilps {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
5696 | 438 | case X86EMUL_OPC_VEX_66(0x0f38, 0x0d): /* vpermilpd {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
5697 | 438 | generate_exception_if(vex.w, X86_EXC_UD); |
5698 | 432 | goto simd_0f_avx; |
5699 | | |
5700 | 495 | case X86EMUL_OPC_VEX_66(0x0f38, 0x0e): /* vtestps {x,y}mm/mem,{x,y}mm */ |
5701 | 569 | case X86EMUL_OPC_VEX_66(0x0f38, 0x0f): /* vtestpd {x,y}mm/mem,{x,y}mm */ |
5702 | 569 | generate_exception_if(vex.w, X86_EXC_UD); |
5703 | | /* fall through */ |
5704 | 636 | case X86EMUL_OPC_66(0x0f38, 0x17): /* ptest xmm/m128,xmm */ |
5705 | 671 | case X86EMUL_OPC_VEX_66(0x0f38, 0x17): /* vptest {x,y}mm/mem,{x,y}mm */ |
5706 | 671 | if ( vex.opcx == vex_none ) |
5707 | 73 | { |
5708 | 73 | host_and_vcpu_must_have(sse4_1); |
5709 | 73 | get_fpu(X86EMUL_FPU_xmm); |
5710 | 73 | } |
5711 | 598 | else |
5712 | 598 | { |
5713 | 598 | generate_exception_if(vex.reg != 0xf, X86_EXC_UD); |
5714 | 596 | host_and_vcpu_must_have(avx); |
5715 | 596 | get_fpu(X86EMUL_FPU_ymm); |
5716 | 596 | } |
5717 | | |
5718 | 667 | opc = init_prefixes(stub); |
5719 | 667 | if ( vex.opcx == vex_none ) |
5720 | 72 | opc++[0] = 0x38; |
5721 | 667 | opc[0] = b; |
5722 | 667 | opc[1] = modrm; |
5723 | 667 | if ( ea.type == OP_MEM ) |
5724 | 140 | { |
5725 | 140 | rc = ops->read(ea.mem.seg, ea.mem.off, mmvalp, 16 << vex.l, ctxt); |
5726 | 140 | if ( rc != X86EMUL_OKAY ) |
5727 | 11 | goto done; |
5728 | | |
5729 | | /* Convert memory operand to (%rAX). */ |
5730 | 129 | rex_prefix &= ~REX_B; |
5731 | 129 | vex.b = 1; |
5732 | 129 | opc[1] &= 0x38; |
5733 | 129 | } |
5734 | 656 | insn_bytes = PFX_BYTES + 2; |
5735 | 656 | opc[2] = 0xc3; |
5736 | 656 | if ( vex.opcx == vex_none ) |
5737 | 71 | { |
5738 | | /* Cover for extra prefix byte. */ |
5739 | 71 | --opc; |
5740 | 71 | ++insn_bytes; |
5741 | 71 | } |
5742 | | |
5743 | 656 | copy_REX_VEX(opc, rex_prefix, vex); |
5744 | 656 | emulate_stub("+m" (*mmvalp), "a" (mmvalp)); |
5745 | | |
5746 | 656 | put_stub(stub); |
5747 | 656 | state->simd_size = simd_none; |
5748 | 656 | dst.type = OP_NONE; |
5749 | 656 | break; |
5750 | | |
5751 | 66 | case X86EMUL_OPC_66(0x0f38, 0x20): /* pmovsxbw xmm/m64,xmm */ |
5752 | 260 | case X86EMUL_OPC_66(0x0f38, 0x21): /* pmovsxbd xmm/m32,xmm */ |
5753 | 454 | case X86EMUL_OPC_66(0x0f38, 0x22): /* pmovsxbq xmm/m16,xmm */ |
5754 | 674 | case X86EMUL_OPC_66(0x0f38, 0x23): /* pmovsxwd xmm/m64,xmm */ |
5755 | 734 | case X86EMUL_OPC_66(0x0f38, 0x24): /* pmovsxwq xmm/m32,xmm */ |
5756 | 930 | case X86EMUL_OPC_66(0x0f38, 0x25): /* pmovsxdq xmm/m64,xmm */ |
5757 | 998 | case X86EMUL_OPC_66(0x0f38, 0x30): /* pmovzxbw xmm/m64,xmm */ |
5758 | 1.06k | case X86EMUL_OPC_66(0x0f38, 0x31): /* pmovzxbd xmm/m32,xmm */ |
5759 | 1.12k | case X86EMUL_OPC_66(0x0f38, 0x32): /* pmovzxbq xmm/m16,xmm */ |
5760 | 1.16k | case X86EMUL_OPC_66(0x0f38, 0x33): /* pmovzxwd xmm/m64,xmm */ |
5761 | 1.24k | case X86EMUL_OPC_66(0x0f38, 0x34): /* pmovzxwq xmm/m32,xmm */ |
5762 | 1.45k | case X86EMUL_OPC_66(0x0f38, 0x35): /* pmovzxdq xmm/m64,xmm */ |
5763 | 1.45k | op_bytes = 16 >> pmov_convert_delta[b & 7]; |
5764 | | /* fall through */ |
5765 | 1.71k | case X86EMUL_OPC_66(0x0f38, 0x10): /* pblendvb XMM0,xmm/m128,xmm */ |
5766 | 2.02k | case X86EMUL_OPC_66(0x0f38, 0x14): /* blendvps XMM0,xmm/m128,xmm */ |
5767 | 2.09k | case X86EMUL_OPC_66(0x0f38, 0x15): /* blendvpd XMM0,xmm/m128,xmm */ |
5768 | 2.16k | case X86EMUL_OPC_66(0x0f38, 0x28): /* pmuldq xmm/m128,xmm */ |
5769 | 2.23k | case X86EMUL_OPC_66(0x0f38, 0x29): /* pcmpeqq xmm/m128,xmm */ |
5770 | 2.42k | case X86EMUL_OPC_66(0x0f38, 0x2b): /* packusdw xmm/m128,xmm */ |
5771 | 2.61k | case X86EMUL_OPC_66(0x0f38, 0x38): /* pminsb xmm/m128,xmm */ |
5772 | 2.71k | case X86EMUL_OPC_66(0x0f38, 0x39): /* pminsd xmm/m128,xmm */ |
5773 | 2.78k | case X86EMUL_OPC_66(0x0f38, 0x3a): /* pminub xmm/m128,xmm */ |
5774 | 2.86k | case X86EMUL_OPC_66(0x0f38, 0x3b): /* pminud xmm/m128,xmm */ |
5775 | 3.06k | case X86EMUL_OPC_66(0x0f38, 0x3c): /* pmaxsb xmm/m128,xmm */ |
5776 | 3.12k | case X86EMUL_OPC_66(0x0f38, 0x3d): /* pmaxsd xmm/m128,xmm */ |
5777 | 3.19k | case X86EMUL_OPC_66(0x0f38, 0x3e): /* pmaxub xmm/m128,xmm */ |
5778 | 3.22k | case X86EMUL_OPC_66(0x0f38, 0x3f): /* pmaxud xmm/m128,xmm */ |
5779 | 3.29k | case X86EMUL_OPC_66(0x0f38, 0x40): /* pmulld xmm/m128,xmm */ |
5780 | 3.48k | case X86EMUL_OPC_66(0x0f38, 0x41): /* phminposuw xmm/m128,xmm */ |
5781 | 3.48k | host_and_vcpu_must_have(sse4_1); |
5782 | 3.48k | goto simd_0f38_common; |
5783 | | |
5784 | 3.48k | case X86EMUL_OPC_EVEX_66(0x0f38, 0x10): /* vpsrlvw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
5785 | 2 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x11): /* vpsravw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
5786 | 3 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x12): /* vpsllvw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
5787 | 3 | host_and_vcpu_must_have(avx512bw); |
5788 | 0 | generate_exception_if(!evex.w || evex.brs, X86_EXC_UD); |
5789 | 0 | elem_bytes = 2; |
5790 | 0 | goto avx512f_no_sae; |
5791 | | |
5792 | 1 | case X86EMUL_OPC_EVEX_F3(0x0f38, 0x10): /* vpmovuswb [xyz]mm,{x,y}mm/mem{k} */ |
5793 | 2 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x20): /* vpmovsxbw {x,y}mm/mem,[xyz]mm{k} */ |
5794 | 3 | case X86EMUL_OPC_EVEX_F3(0x0f38, 0x20): /* vpmovswb [xyz]mm,{x,y}mm/mem{k} */ |
5795 | 4 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x30): /* vpmovzxbw {x,y}mm/mem,[xyz]mm{k} */ |
5796 | 5 | case X86EMUL_OPC_EVEX_F3(0x0f38, 0x30): /* vpmovwb [xyz]mm,{x,y}mm/mem{k} */ |
5797 | 5 | host_and_vcpu_must_have(avx512bw); |
5798 | 0 | if ( evex.pfx != vex_f3 ) |
5799 | 0 | { |
5800 | 1 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x21): /* vpmovsxbd xmm/mem,[xyz]mm{k} */ |
5801 | 2 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x22): /* vpmovsxbq xmm/mem,[xyz]mm{k} */ |
5802 | 3 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x23): /* vpmovsxwd {x,y}mm/mem,[xyz]mm{k} */ |
5803 | 4 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x24): /* vpmovsxwq xmm/mem,[xyz]mm{k} */ |
5804 | 5 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x25): /* vpmovsxdq {x,y}mm/mem,[xyz]mm{k} */ |
5805 | 6 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x31): /* vpmovzxbd xmm/mem,[xyz]mm{k} */ |
5806 | 7 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x32): /* vpmovzxbq xmm/mem,[xyz]mm{k} */ |
5807 | 8 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x33): /* vpmovzxwd {x,y}mm/mem,[xyz]mm{k} */ |
5808 | 9 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x34): /* vpmovzxwq xmm/mem,[xyz]mm{k} */ |
5809 | 10 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x35): /* vpmovzxdq {x,y}mm/mem,[xyz]mm{k} */ |
5810 | 10 | generate_exception_if(evex.w && (b & 7) == 5, X86_EXC_UD); |
5811 | 10 | } |
5812 | 0 | else |
5813 | 0 | { |
5814 | 1 | case X86EMUL_OPC_EVEX_F3(0x0f38, 0x11): /* vpmovusdb [xyz]mm,xmm/mem{k} */ |
5815 | 2 | case X86EMUL_OPC_EVEX_F3(0x0f38, 0x12): /* vpmovusqb [xyz]mm,xmm/mem{k} */ |
5816 | 4 | case X86EMUL_OPC_EVEX_F3(0x0f38, 0x13): /* vpmovusdw [xyz]mm,{x,y}mm/mem{k} */ |
5817 | 5 | case X86EMUL_OPC_EVEX_F3(0x0f38, 0x14): /* vpmovusqw [xyz]mm,xmm/mem{k} */ |
5818 | 6 | case X86EMUL_OPC_EVEX_F3(0x0f38, 0x15): /* vpmovusqd [xyz]mm,{x,y}mm/mem{k} */ |
5819 | 7 | case X86EMUL_OPC_EVEX_F3(0x0f38, 0x21): /* vpmovsdb [xyz]mm,xmm/mem{k} */ |
5820 | 8 | case X86EMUL_OPC_EVEX_F3(0x0f38, 0x22): /* vpmovsqb [xyz]mm,xmm/mem{k} */ |
5821 | 9 | case X86EMUL_OPC_EVEX_F3(0x0f38, 0x23): /* vpmovsdw [xyz]mm,{x,y}mm/mem{k} */ |
5822 | 10 | case X86EMUL_OPC_EVEX_F3(0x0f38, 0x24): /* vpmovsqw [xyz]mm,xmm/mem{k} */ |
5823 | 11 | case X86EMUL_OPC_EVEX_F3(0x0f38, 0x25): /* vpmovsqd [xyz]mm,{x,y}mm/mem{k} */ |
5824 | 12 | case X86EMUL_OPC_EVEX_F3(0x0f38, 0x31): /* vpmovdb [xyz]mm,xmm/mem{k} */ |
5825 | 13 | case X86EMUL_OPC_EVEX_F3(0x0f38, 0x32): /* vpmovqb [xyz]mm,xmm/mem{k} */ |
5826 | 14 | case X86EMUL_OPC_EVEX_F3(0x0f38, 0x33): /* vpmovdw [xyz]mm,{x,y}mm/mem{k} */ |
5827 | 15 | case X86EMUL_OPC_EVEX_F3(0x0f38, 0x34): /* vpmovqw [xyz]mm,xmm/mem{k} */ |
5828 | 16 | case X86EMUL_OPC_EVEX_F3(0x0f38, 0x35): /* vpmovqd [xyz]mm,{x,y}mm/mem{k} */ |
5829 | 16 | generate_exception_if(evex.w || (ea.type != OP_REG && evex.z), X86_EXC_UD); |
5830 | 11 | d = DstMem | SrcReg | TwoOp; |
5831 | 11 | } |
5832 | 20 | generate_exception_if(evex.brs, X86_EXC_UD); |
5833 | 14 | op_bytes = 64 >> (pmov_convert_delta[b & 7] + 2 - evex.lr); |
5834 | 14 | elem_bytes = (b & 7) < 3 ? 1 : (b & 7) != 5 ? 2 : 4; |
5835 | 14 | goto avx512f_no_sae; |
5836 | | |
5837 | 77 | case X86EMUL_OPC_VEX_66(0x0f38, 0x13): /* vcvtph2ps xmm/mem,{x,y}mm */ |
5838 | 77 | generate_exception_if(vex.w, X86_EXC_UD); |
5839 | 71 | host_and_vcpu_must_have(f16c); |
5840 | 71 | op_bytes = 8 << vex.l; |
5841 | 71 | goto simd_0f_ymm; |
5842 | | |
5843 | 4 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x13): /* vcvtph2ps {x,y}mm/mem,[xyz]mm{k} */ |
5844 | 4 | generate_exception_if(evex.w || (ea.type != OP_REG && evex.brs), X86_EXC_UD); |
5845 | 2 | host_and_vcpu_must_have(avx512f); |
5846 | 0 | if ( !evex.brs ) |
5847 | 0 | avx512_vlen_check(false); |
5848 | 0 | op_bytes = 8 << evex.lr; |
5849 | 0 | elem_bytes = 2; |
5850 | 0 | goto simd_zmm; |
5851 | | |
5852 | 69 | case X86EMUL_OPC_VEX_66(0x0f38, 0x16): /* vpermps ymm/m256,ymm,ymm */ |
5853 | 270 | case X86EMUL_OPC_VEX_66(0x0f38, 0x36): /* vpermd ymm/m256,ymm,ymm */ |
5854 | 270 | generate_exception_if(!vex.l || vex.w, X86_EXC_UD); |
5855 | 263 | goto simd_0f_avx2; |
5856 | | |
5857 | 263 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x16): /* vpermp{s,d} {y,z}mm/mem,{y,z}mm,{y,z}mm{k} */ |
5858 | 3 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x36): /* vperm{d,q} {y,z}mm/mem,{y,z}mm,{y,z}mm{k} */ |
5859 | 3 | generate_exception_if(!evex.lr, X86_EXC_UD); |
5860 | 1 | fault_suppression = false; |
5861 | 1 | goto avx512f_no_sae; |
5862 | | |
5863 | 1 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x18): /* vbroadcastss xmm/m32,[xyz]mm{k} */ |
5864 | 3 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x58): /* vpbroadcastd xmm/m32,[xyz]mm{k} */ |
5865 | 3 | op_bytes = elem_bytes; |
5866 | 3 | generate_exception_if(evex.w || evex.brs, X86_EXC_UD); |
5867 | 3 | avx512_broadcast: |
5868 | | /* |
5869 | | * For the respective code below the main switch() to work we need to |
5870 | | * fold op_mask here: A source element gets read whenever any of its |
5871 | | * respective destination elements' mask bits is set. |
5872 | | */ |
5873 | 3 | if ( fault_suppression ) |
5874 | 0 | { |
5875 | 0 | n = 1 << ((b & 3) - evex.w); |
5876 | 0 | EXPECT(elem_bytes > 0); |
5877 | 0 | ASSERT(op_bytes == n * elem_bytes); |
5878 | 0 | for ( i = n; i < (16 << evex.lr) / elem_bytes; i += n ) |
5879 | 0 | op_mask |= (op_mask >> i) & ((1 << n) - 1); |
5880 | 0 | } |
5881 | 3 | goto avx512f_no_sae; |
5882 | | |
5883 | 3 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x1b): /* vbroadcastf32x8 m256,zmm{k} */ |
5884 | | /* vbroadcastf64x4 m256,zmm{k} */ |
5885 | 3 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x5b): /* vbroadcasti32x8 m256,zmm{k} */ |
5886 | | /* vbroadcasti64x4 m256,zmm{k} */ |
5887 | 3 | generate_exception_if(ea.type != OP_MEM || evex.lr != 2, X86_EXC_UD); |
5888 | | /* fall through */ |
5889 | 4 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x19): /* vbroadcastsd xmm/m64,{y,z}mm{k} */ |
5890 | | /* vbroadcastf32x2 xmm/m64,{y,z}mm{k} */ |
5891 | 4 | generate_exception_if(!evex.lr, X86_EXC_UD); |
5892 | | /* fall through */ |
5893 | 4 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x59): /* vpbroadcastq xmm/m64,[xyz]mm{k} */ |
5894 | | /* vbroadcasti32x2 xmm/m64,[xyz]mm{k} */ |
5895 | 4 | if ( b == 0x59 ) |
5896 | 1 | op_bytes = 8; |
5897 | 4 | generate_exception_if(evex.brs, X86_EXC_UD); |
5898 | 2 | if ( !evex.w ) |
5899 | 2 | host_and_vcpu_must_have(avx512dq); |
5900 | 1 | goto avx512_broadcast; |
5901 | | |
5902 | 1 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x1a): /* vbroadcastf32x4 m128,{y,z}mm{k} */ |
5903 | | /* vbroadcastf64x2 m128,{y,z}mm{k} */ |
5904 | 6 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x5a): /* vbroadcasti32x4 m128,{y,z}mm{k} */ |
5905 | | /* vbroadcasti64x2 m128,{y,z}mm{k} */ |
5906 | 6 | generate_exception_if(ea.type != OP_MEM || !evex.lr || evex.brs, |
5907 | 6 | X86_EXC_UD); |
5908 | 2 | if ( evex.w ) |
5909 | 2 | host_and_vcpu_must_have(avx512dq); |
5910 | 1 | goto avx512_broadcast; |
5911 | | |
5912 | 34 | case X86EMUL_OPC_VEX_66(0x0f38, 0x20): /* vpmovsxbw xmm/mem,{x,y}mm */ |
5913 | 100 | case X86EMUL_OPC_VEX_66(0x0f38, 0x21): /* vpmovsxbd xmm/mem,{x,y}mm */ |
5914 | 295 | case X86EMUL_OPC_VEX_66(0x0f38, 0x22): /* vpmovsxbq xmm/mem,{x,y}mm */ |
5915 | 364 | case X86EMUL_OPC_VEX_66(0x0f38, 0x23): /* vpmovsxwd xmm/mem,{x,y}mm */ |
5916 | 430 | case X86EMUL_OPC_VEX_66(0x0f38, 0x24): /* vpmovsxwq xmm/mem,{x,y}mm */ |
5917 | 499 | case X86EMUL_OPC_VEX_66(0x0f38, 0x25): /* vpmovsxdq xmm/mem,{x,y}mm */ |
5918 | 565 | case X86EMUL_OPC_VEX_66(0x0f38, 0x30): /* vpmovzxbw xmm/mem,{x,y}mm */ |
5919 | 633 | case X86EMUL_OPC_VEX_66(0x0f38, 0x31): /* vpmovzxbd xmm/mem,{x,y}mm */ |
5920 | 840 | case X86EMUL_OPC_VEX_66(0x0f38, 0x32): /* vpmovzxbq xmm/mem,{x,y}mm */ |
5921 | 903 | case X86EMUL_OPC_VEX_66(0x0f38, 0x33): /* vpmovzxwd xmm/mem,{x,y}mm */ |
5922 | 1.09k | case X86EMUL_OPC_VEX_66(0x0f38, 0x34): /* vpmovzxwq xmm/mem,{x,y}mm */ |
5923 | 1.17k | case X86EMUL_OPC_VEX_66(0x0f38, 0x35): /* vpmovzxdq xmm/mem,{x,y}mm */ |
5924 | 1.17k | op_bytes = 16 >> (pmov_convert_delta[b & 7] - vex.l); |
5925 | 1.17k | goto simd_0f_int; |
5926 | | |
5927 | 2 | case X86EMUL_OPC_EVEX_F3(0x0f38, 0x29): /* vpmov{b,w}2m [xyz]mm,k */ |
5928 | 3 | case X86EMUL_OPC_EVEX_F3(0x0f38, 0x39): /* vpmov{d,q}2m [xyz]mm,k */ |
5929 | 3 | generate_exception_if(!evex.r || !evex.R, X86_EXC_UD); |
5930 | | /* fall through */ |
5931 | 2 | case X86EMUL_OPC_EVEX_F3(0x0f38, 0x28): /* vpmovm2{b,w} k,[xyz]mm */ |
5932 | 3 | case X86EMUL_OPC_EVEX_F3(0x0f38, 0x38): /* vpmovm2{d,q} k,[xyz]mm */ |
5933 | 3 | if ( b & 0x10 ) |
5934 | 3 | host_and_vcpu_must_have(avx512dq); |
5935 | 2 | else |
5936 | 3 | host_and_vcpu_must_have(avx512bw); |
5937 | 0 | generate_exception_if(evex.opmsk || ea.type != OP_REG, X86_EXC_UD); |
5938 | 0 | d |= TwoOp; |
5939 | 0 | op_bytes = 16 << evex.lr; |
5940 | 0 | goto avx512f_no_sae; |
5941 | | |
5942 | 34 | case X86EMUL_OPC_66(0x0f38, 0x2a): /* movntdqa m128,xmm */ |
5943 | 167 | case X86EMUL_OPC_VEX_66(0x0f38, 0x2a): /* vmovntdqa mem,{x,y}mm */ |
5944 | 167 | generate_exception_if(ea.type != OP_MEM, X86_EXC_UD); |
5945 | | /* Ignore the non-temporal hint for now, using movdqa instead. */ |
5946 | 166 | asm volatile ( "mfence" ::: "memory" ); |
5947 | 166 | b = 0x6f; |
5948 | 166 | if ( vex.opcx == vex_none ) |
5949 | 166 | vcpu_must_have(sse4_1); |
5950 | 132 | else |
5951 | 132 | { |
5952 | 132 | vex.opcx = vex_0f; |
5953 | 132 | if ( vex.l ) |
5954 | 132 | vcpu_must_have(avx2); |
5955 | 132 | } |
5956 | 166 | goto movdqa; |
5957 | | |
5958 | 166 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x2a): /* vmovntdqa mem,[xyz]mm */ |
5959 | 4 | generate_exception_if(ea.type != OP_MEM || evex.opmsk || evex.w, |
5960 | 4 | X86_EXC_UD); |
5961 | | /* Ignore the non-temporal hint for now, using vmovdqa32 instead. */ |
5962 | 1 | asm volatile ( "mfence" ::: "memory" ); |
5963 | 1 | b = 0x6f; |
5964 | 1 | evex.opcx = vex_0f; |
5965 | 1 | goto vmovdqa; |
5966 | | |
5967 | 3 | case X86EMUL_OPC_EVEX_F3(0x0f38, 0x2a): /* vpbroadcastmb2q k,[xyz]mm */ |
5968 | 5 | case X86EMUL_OPC_EVEX_F3(0x0f38, 0x3a): /* vpbroadcastmw2d k,[xyz]mm */ |
5969 | 5 | generate_exception_if((ea.type != OP_REG || evex.opmsk || |
5970 | 5 | evex.w == ((b >> 4) & 1)), |
5971 | 5 | X86_EXC_UD); |
5972 | 1 | d |= TwoOp; |
5973 | 1 | op_bytes = 1; /* fake */ |
5974 | | /* fall through */ |
5975 | 2 | case X86EMUL_OPC_EVEX_66(0x0f38, 0xc4): /* vpconflict{d,q} [xyz]mm/mem,[xyz]mm{k} */ |
5976 | 2 | fault_suppression = false; |
5977 | | /* fall through */ |
5978 | 3 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x44): /* vplzcnt{d,q} [xyz]mm/mem,[xyz]mm{k} */ |
5979 | 3 | host_and_vcpu_must_have(avx512cd); |
5980 | 0 | goto avx512f_no_sae; |
5981 | | |
5982 | 61 | case X86EMUL_OPC_VEX_66(0x0f38, 0x2c): /* vmaskmovps mem,{x,y}mm,{x,y}mm */ |
5983 | 127 | case X86EMUL_OPC_VEX_66(0x0f38, 0x2d): /* vmaskmovpd mem,{x,y}mm,{x,y}mm */ |
5984 | 269 | case X86EMUL_OPC_VEX_66(0x0f38, 0x2e): /* vmaskmovps {x,y}mm,{x,y}mm,mem */ |
5985 | 414 | case X86EMUL_OPC_VEX_66(0x0f38, 0x2f): /* vmaskmovpd {x,y}mm,{x,y}mm,mem */ |
5986 | 414 | { |
5987 | 414 | typeof(vex) *pvex; |
5988 | | |
5989 | 414 | generate_exception_if(ea.type != OP_MEM || vex.w, X86_EXC_UD); |
5990 | 407 | host_and_vcpu_must_have(avx); |
5991 | 407 | elem_bytes = 4 << (b & 1); |
5992 | 612 | vmaskmov: |
5993 | 612 | get_fpu(X86EMUL_FPU_ymm); |
5994 | | |
5995 | | /* |
5996 | | * While we can't reasonably provide fully correct behavior here |
5997 | | * (in particular, for writes, avoiding the memory read in anticipation |
5998 | | * of all elements in the range eventually being written), we can (and |
5999 | | * should) still limit the memory access to the smallest possible range |
6000 | | * (suppressing it altogether if all mask bits are clear), to provide |
6001 | | * correct faulting behavior. Read the mask bits via vmovmskp{s,d} |
6002 | | * for that purpose. |
6003 | | */ |
6004 | 610 | opc = init_prefixes(stub); |
6005 | 610 | pvex = copy_VEX(opc, vex); |
6006 | 610 | pvex->opcx = vex_0f; |
6007 | 610 | if ( elem_bytes == 4 ) |
6008 | 404 | pvex->pfx = vex_none; |
6009 | 610 | opc[0] = 0x50; /* vmovmskp{s,d} */ |
6010 | | /* Use %rax as GPR destination and VEX.vvvv as source. */ |
6011 | 610 | pvex->r = 1; |
6012 | 610 | pvex->b = !mode_64bit() || (vex.reg >> 3); |
6013 | 610 | opc[1] = 0xc0 | (~vex.reg & 7); |
6014 | 610 | pvex->reg = 0xf; |
6015 | 610 | opc[2] = 0xc3; |
6016 | | |
6017 | 610 | invoke_stub("", "", "=a" (ea.val) : [dummy] "i" (0)); |
6018 | 610 | put_stub(stub); |
6019 | | |
6020 | 610 | evex.opmsk = 1; /* fake */ |
6021 | 610 | op_mask = ea.val; |
6022 | 610 | fault_suppression = true; |
6023 | | |
6024 | 610 | opc = init_prefixes(stub); |
6025 | 0 | opc[0] = b; |
6026 | | /* Convert memory operand to (%rAX). */ |
6027 | 610 | rex_prefix &= ~REX_B; |
6028 | 610 | vex.b = 1; |
6029 | 610 | opc[1] = modrm & 0x38; |
6030 | 610 | insn_bytes = PFX_BYTES + 2; |
6031 | | |
6032 | 610 | break; |
6033 | 610 | } |
6034 | | |
6035 | 1 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x2c): /* vscalefp{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
6036 | 2 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x42): /* vgetexpp{s,d} [xyz]mm/mem,[xyz]mm{k} */ |
6037 | 3 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x96): /* vfmaddsub132p{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
6038 | 4 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x97): /* vfmsubadd132p{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
6039 | 5 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x98): /* vfmadd132p{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
6040 | 6 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x9a): /* vfmsub132p{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
6041 | 7 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x9c): /* vfnmadd132p{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
6042 | 8 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x9e): /* vfnmsub132p{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
6043 | 9 | case X86EMUL_OPC_EVEX_66(0x0f38, 0xa6): /* vfmaddsub213p{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
6044 | 10 | case X86EMUL_OPC_EVEX_66(0x0f38, 0xa7): /* vfmsubadd213p{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
6045 | 11 | case X86EMUL_OPC_EVEX_66(0x0f38, 0xa8): /* vfmadd213p{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
6046 | 12 | case X86EMUL_OPC_EVEX_66(0x0f38, 0xaa): /* vfmsub213p{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
6047 | 13 | case X86EMUL_OPC_EVEX_66(0x0f38, 0xac): /* vfnmadd213p{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
6048 | 14 | case X86EMUL_OPC_EVEX_66(0x0f38, 0xae): /* vfnmsub213p{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
6049 | 15 | case X86EMUL_OPC_EVEX_66(0x0f38, 0xb6): /* vfmaddsub231p{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
6050 | 16 | case X86EMUL_OPC_EVEX_66(0x0f38, 0xb7): /* vfmsubadd231p{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
6051 | 17 | case X86EMUL_OPC_EVEX_66(0x0f38, 0xb8): /* vfmadd231p{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
6052 | 18 | case X86EMUL_OPC_EVEX_66(0x0f38, 0xba): /* vfmsub231p{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
6053 | 19 | case X86EMUL_OPC_EVEX_66(0x0f38, 0xbc): /* vfnmadd231p{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
6054 | 20 | case X86EMUL_OPC_EVEX_66(0x0f38, 0xbe): /* vfnmsub231p{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
6055 | 20 | host_and_vcpu_must_have(avx512f); |
6056 | 0 | if ( ea.type != OP_REG || !evex.brs ) |
6057 | 0 | avx512_vlen_check(false); |
6058 | 0 | goto simd_zmm; |
6059 | | |
6060 | 1 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x2d): /* vscalefs{s,d} xmm/mem,xmm,xmm{k} */ |
6061 | 2 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x43): /* vgetexps{s,d} xmm/mem,xmm,xmm{k} */ |
6062 | 3 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x99): /* vfmadd132s{s,d} xmm/mem,xmm,xmm{k} */ |
6063 | 4 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x9b): /* vfmsub132s{s,d} xmm/mem,xmm,xmm{k} */ |
6064 | 5 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x9d): /* vfnmadd132s{s,d} xmm/mem,xmm,xmm{k} */ |
6065 | 6 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x9f): /* vfnmsub132s{s,d} xmm/mem,xmm,xmm{k} */ |
6066 | 7 | case X86EMUL_OPC_EVEX_66(0x0f38, 0xa9): /* vfmadd213s{s,d} xmm/mem,xmm,xmm{k} */ |
6067 | 8 | case X86EMUL_OPC_EVEX_66(0x0f38, 0xab): /* vfmsub213s{s,d} xmm/mem,xmm,xmm{k} */ |
6068 | 9 | case X86EMUL_OPC_EVEX_66(0x0f38, 0xad): /* vfnmadd213s{s,d} xmm/mem,xmm,xmm{k} */ |
6069 | 10 | case X86EMUL_OPC_EVEX_66(0x0f38, 0xaf): /* vfnmsub213s{s,d} xmm/mem,xmm,xmm{k} */ |
6070 | 11 | case X86EMUL_OPC_EVEX_66(0x0f38, 0xb9): /* vfmadd231s{s,d} xmm/mem,xmm,xmm{k} */ |
6071 | 12 | case X86EMUL_OPC_EVEX_66(0x0f38, 0xbb): /* vfmsub231s{s,d} xmm/mem,xmm,xmm{k} */ |
6072 | 13 | case X86EMUL_OPC_EVEX_66(0x0f38, 0xbd): /* vfnmadd231s{s,d} xmm/mem,xmm,xmm{k} */ |
6073 | 14 | case X86EMUL_OPC_EVEX_66(0x0f38, 0xbf): /* vfnmsub231s{s,d} xmm/mem,xmm,xmm{k} */ |
6074 | 14 | host_and_vcpu_must_have(avx512f); |
6075 | 0 | generate_exception_if(ea.type != OP_REG && evex.brs, X86_EXC_UD); |
6076 | 0 | if ( !evex.brs ) |
6077 | 0 | avx512_vlen_check(true); |
6078 | 0 | goto simd_zmm; |
6079 | | |
6080 | 58 | case X86EMUL_OPC_66(0x0f38, 0x37): /* pcmpgtq xmm/m128,xmm */ |
6081 | 58 | host_and_vcpu_must_have(sse4_2); |
6082 | 58 | goto simd_0f38_common; |
6083 | | |
6084 | 58 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x38): /* vpminsb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
6085 | 2 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x3a): /* vpminuw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
6086 | 3 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x3c): /* vpmaxsb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
6087 | 4 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x3e): /* vpmaxuw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
6088 | 4 | host_and_vcpu_must_have(avx512bw); |
6089 | 0 | generate_exception_if(evex.brs, X86_EXC_UD); |
6090 | 0 | elem_bytes = b & 2 ?: 1; |
6091 | 0 | goto avx512f_no_sae; |
6092 | | |
6093 | 2 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x40): /* vpmull{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
6094 | 2 | if ( evex.w ) |
6095 | 2 | host_and_vcpu_must_have(avx512dq); |
6096 | 1 | goto avx512f_no_sae; |
6097 | | |
6098 | 66 | case X86EMUL_OPC_66(0x0f38, 0xdb): /* aesimc xmm/m128,xmm */ |
6099 | 185 | case X86EMUL_OPC_VEX_66(0x0f38, 0xdb): /* vaesimc xmm/m128,xmm */ |
6100 | 199 | case X86EMUL_OPC_66(0x0f38, 0xdc): /* aesenc xmm/m128,xmm,xmm */ |
6101 | 265 | case X86EMUL_OPC_66(0x0f38, 0xdd): /* aesenclast xmm/m128,xmm,xmm */ |
6102 | 333 | case X86EMUL_OPC_66(0x0f38, 0xde): /* aesdec xmm/m128,xmm,xmm */ |
6103 | 529 | case X86EMUL_OPC_66(0x0f38, 0xdf): /* aesdeclast xmm/m128,xmm,xmm */ |
6104 | 529 | host_and_vcpu_must_have(aesni); |
6105 | 529 | if ( vex.opcx == vex_none ) |
6106 | 410 | goto simd_0f38_common; |
6107 | | /* fall through */ |
6108 | 185 | case X86EMUL_OPC_VEX_66(0x0f38, 0x41): /* vphminposuw xmm/m128,xmm,xmm */ |
6109 | 185 | generate_exception_if(vex.l, X86_EXC_UD); |
6110 | 184 | goto simd_0f_avx; |
6111 | | |
6112 | 184 | case X86EMUL_OPC_VEX (0x0f38, 0x50): /* vpdpbuud [xy]mm/mem,[xy]mm,[xy]mm */ |
6113 | 2 | case X86EMUL_OPC_VEX_F3(0x0f38, 0x50): /* vpdpbsud [xy]mm/mem,[xy]mm,[xy]mm */ |
6114 | 3 | case X86EMUL_OPC_VEX_F2(0x0f38, 0x50): /* vpdpbssd [xy]mm/mem,[xy]mm,[xy]mm */ |
6115 | 4 | case X86EMUL_OPC_VEX (0x0f38, 0x51): /* vpdpbuuds [xy]mm/mem,[xy]mm,[xy]mm */ |
6116 | 5 | case X86EMUL_OPC_VEX_F3(0x0f38, 0x51): /* vpdpbsuds [xy]mm/mem,[xy]mm,[xy]mm */ |
6117 | 6 | case X86EMUL_OPC_VEX_F2(0x0f38, 0x51): /* vpdpbssds [xy]mm/mem,[xy]mm,[xy]mm */ |
6118 | 6 | host_and_vcpu_must_have(avx_vnni_int8); |
6119 | 0 | generate_exception_if(vex.w, X86_EXC_UD); |
6120 | 0 | op_bytes = 16 << vex.l; |
6121 | 0 | goto simd_0f_ymm; |
6122 | | |
6123 | 1 | case X86EMUL_OPC_VEX_66(0x0f38, 0x50): /* vpdpbusd [xy]mm/mem,[xy]mm,[xy]mm */ |
6124 | 2 | case X86EMUL_OPC_VEX_66(0x0f38, 0x51): /* vpdpbusds [xy]mm/mem,[xy]mm,[xy]mm */ |
6125 | 3 | case X86EMUL_OPC_VEX_66(0x0f38, 0x52): /* vpdpwssd [xy]mm/mem,[xy]mm,[xy]mm */ |
6126 | 3 | case X86EMUL_OPC_VEX_66(0x0f38, 0x53): /* vpdpwssds [xy]mm/mem,[xy]mm,[xy]mm */ |
6127 | 3 | host_and_vcpu_must_have(avx_vnni); |
6128 | 0 | generate_exception_if(vex.w, X86_EXC_UD); |
6129 | 0 | goto simd_0f_ymm; |
6130 | | |
6131 | 1 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x50): /* vpdpbusd [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
6132 | 2 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x51): /* vpdpbusds [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
6133 | 3 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x52): /* vpdpwssd [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
6134 | 4 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x53): /* vpdpwssds [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
6135 | 4 | host_and_vcpu_must_have(avx512_vnni); |
6136 | 0 | generate_exception_if(evex.w, X86_EXC_UD); |
6137 | 0 | goto avx512f_no_sae; |
6138 | | |
6139 | 1 | case X86EMUL_OPC_EVEX_F2(0x0f38, 0x72): /* vcvtne2ps2bf16 [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
6140 | 2 | case X86EMUL_OPC_EVEX_F3(0x0f38, 0x72): /* vcvtneps2bf16 [xyz]mm/mem,{x,y}mm{k} */ |
6141 | 2 | if ( evex.pfx == vex_f2 ) |
6142 | 1 | fault_suppression = false; |
6143 | 1 | else |
6144 | 1 | d |= TwoOp; |
6145 | | /* fall through */ |
6146 | 3 | case X86EMUL_OPC_EVEX_F3(0x0f38, 0x52): /* vdpbf16ps [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
6147 | 3 | host_and_vcpu_must_have(avx512_bf16); |
6148 | 0 | generate_exception_if(evex.w, X86_EXC_UD); |
6149 | 0 | op_bytes = 16 << evex.lr; |
6150 | 0 | goto avx512f_no_sae; |
6151 | | |
6152 | 68 | case X86EMUL_OPC_VEX_66(0x0f38, 0x58): /* vpbroadcastd xmm/m32,{x,y}mm */ |
6153 | 120 | case X86EMUL_OPC_VEX_66(0x0f38, 0x59): /* vpbroadcastq xmm/m64,{x,y}mm */ |
6154 | 180 | case X86EMUL_OPC_VEX_66(0x0f38, 0x78): /* vpbroadcastb xmm/m8,{x,y}mm */ |
6155 | 248 | case X86EMUL_OPC_VEX_66(0x0f38, 0x79): /* vpbroadcastw xmm/m16,{x,y}mm */ |
6156 | 248 | op_bytes = 1 << ((!(b & 0x20) * 2) + (b & 1)); |
6157 | | /* fall through */ |
6158 | 315 | case X86EMUL_OPC_VEX_66(0x0f38, 0x46): /* vpsravd {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
6159 | 315 | generate_exception_if(vex.w, X86_EXC_UD); |
6160 | 308 | goto simd_0f_avx2; |
6161 | | |
6162 | 308 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x4d): /* vrcp14s{s,d} xmm/mem,xmm,xmm{k} */ |
6163 | 2 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x4f): /* vrsqrt14s{s,d} xmm/mem,xmm,xmm{k} */ |
6164 | 2 | host_and_vcpu_must_have(avx512f); |
6165 | 0 | generate_exception_if(evex.brs, X86_EXC_UD); |
6166 | 0 | avx512_vlen_check(true); |
6167 | 0 | goto simd_zmm; |
6168 | | |
6169 | 5 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x8f): /* vpshufbitqmb [xyz]mm/mem,[xyz]mm,k{k} */ |
6170 | 5 | generate_exception_if(evex.w || !evex.r || !evex.R || evex.z, X86_EXC_UD); |
6171 | | /* fall through */ |
6172 | 2 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x54): /* vpopcnt{b,w} [xyz]mm/mem,[xyz]mm{k} */ |
6173 | 2 | host_and_vcpu_must_have(avx512_bitalg); |
6174 | | /* fall through */ |
6175 | 1 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x66): /* vpblendm{b,w} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
6176 | 1 | host_and_vcpu_must_have(avx512bw); |
6177 | 0 | generate_exception_if(evex.brs, X86_EXC_UD); |
6178 | 0 | elem_bytes = 1 << evex.w; |
6179 | 0 | goto avx512f_no_sae; |
6180 | | |
6181 | 1 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x55): /* vpopcnt{d,q} [xyz]mm/mem,[xyz]mm{k} */ |
6182 | 1 | host_and_vcpu_must_have(avx512_vpopcntdq); |
6183 | 0 | goto avx512f_no_sae; |
6184 | | |
6185 | 78 | case X86EMUL_OPC_VEX_66(0x0f38, 0x5a): /* vbroadcasti128 m128,ymm */ |
6186 | 78 | generate_exception_if(ea.type != OP_MEM || !vex.l || vex.w, X86_EXC_UD); |
6187 | 70 | goto simd_0f_avx2; |
6188 | | |
6189 | 70 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x62): /* vpexpand{b,w} [xyz]mm/mem,[xyz]mm{k} */ |
6190 | 2 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x63): /* vpcompress{b,w} [xyz]mm,[xyz]mm/mem{k} */ |
6191 | 2 | host_and_vcpu_must_have(avx512_vbmi2); |
6192 | 0 | elem_bytes = 1 << evex.w; |
6193 | | /* fall through */ |
6194 | 1 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x88): /* vexpandp{s,d} [xyz]mm/mem,[xyz]mm{k} */ |
6195 | 2 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x89): /* vpexpand{d,q} [xyz]mm/mem,[xyz]mm{k} */ |
6196 | 3 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x8a): /* vcompressp{s,d} [xyz]mm,[xyz]mm/mem{k} */ |
6197 | 4 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x8b): /* vpcompress{d,q} [xyz]mm,[xyz]mm/mem{k} */ |
6198 | 4 | host_and_vcpu_must_have(avx512f); |
6199 | 0 | generate_exception_if(evex.brs, X86_EXC_UD); |
6200 | 0 | avx512_vlen_check(false); |
6201 | | /* |
6202 | | * For the respective code below the main switch() to work we need to |
6203 | | * compact op_mask here: Memory accesses are non-sparse even if the |
6204 | | * mask register has sparsely set bits. |
6205 | | */ |
6206 | 0 | if ( likely(fault_suppression) ) |
6207 | 0 | { |
6208 | 0 | n = 1 << ((b & 8 ? 2 : 4) + evex.lr - evex.w); |
6209 | 0 | EXPECT(elem_bytes > 0); |
6210 | 0 | ASSERT(op_bytes == n * elem_bytes); |
6211 | 0 | op_mask &= ~0ULL >> (64 - n); |
6212 | 0 | n = hweight64(op_mask); |
6213 | 0 | op_bytes = n * elem_bytes; |
6214 | 0 | if ( n ) |
6215 | 0 | op_mask = ~0ULL >> (64 - n); |
6216 | 0 | } |
6217 | 0 | goto simd_zmm; |
6218 | | |
6219 | 1 | case X86EMUL_OPC_EVEX_F2(0x0f38, 0x68): /* vp2intersect{d,q} [xyz]mm/mem,[xyz]mm,k+1 */ |
6220 | 1 | host_and_vcpu_must_have(avx512_vp2intersect); |
6221 | 0 | generate_exception_if(evex.opmsk || !evex.r || !evex.R, X86_EXC_UD); |
6222 | 0 | op_bytes = 16 << evex.lr; |
6223 | 0 | goto avx512f_no_sae; |
6224 | | |
6225 | 1 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x70): /* vpshldvw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
6226 | 2 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x72): /* vpshrdvw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
6227 | 2 | generate_exception_if(!evex.w, X86_EXC_UD); |
6228 | 1 | elem_bytes = 2; |
6229 | | /* fall through */ |
6230 | 2 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x71): /* vpshldv{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
6231 | 3 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x73): /* vpshrdv{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
6232 | 3 | host_and_vcpu_must_have(avx512_vbmi2); |
6233 | 0 | goto avx512f_no_sae; |
6234 | | |
6235 | 2 | case X86EMUL_OPC_VEX (0x0f38, 0xb0): /* vcvtneoph2ps mem,[xy]mm */ |
6236 | 3 | case X86EMUL_OPC_VEX_66(0x0f38, 0xb0): /* vcvtneeph2ps mem,[xy]mm */ |
6237 | 4 | case X86EMUL_OPC_VEX_F3(0x0f38, 0xb0): /* vcvtneebf162ps mem,[xy]mm */ |
6238 | 5 | case X86EMUL_OPC_VEX_F2(0x0f38, 0xb0): /* vcvtneobf162ps mem,[xy]mm */ |
6239 | 5 | generate_exception_if(ea.type != OP_MEM, X86_EXC_UD); |
6240 | | /* fall through */ |
6241 | 5 | case X86EMUL_OPC_VEX_F3(0x0f38, 0x72): /* vcvtneps2bf16 [xy]mm/mem,xmm */ |
6242 | 5 | host_and_vcpu_must_have(avx_ne_convert); |
6243 | 0 | generate_exception_if(vex.w, X86_EXC_UD); |
6244 | 0 | d |= TwoOp; |
6245 | 0 | op_bytes = 16 << vex.l; |
6246 | 0 | goto simd_0f_ymm; |
6247 | | |
6248 | 1 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x75): /* vpermi2{b,w} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
6249 | 2 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x7d): /* vpermt2{b,w} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
6250 | 3 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x8d): /* vperm{b,w} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
6251 | 3 | if ( !evex.w ) |
6252 | 3 | host_and_vcpu_must_have(avx512_vbmi); |
6253 | 2 | else |
6254 | 3 | host_and_vcpu_must_have(avx512bw); |
6255 | 0 | generate_exception_if(evex.brs, X86_EXC_UD); |
6256 | 0 | fault_suppression = false; |
6257 | 0 | goto avx512f_no_sae; |
6258 | | |
6259 | 1 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x78): /* vpbroadcastb xmm/m8,[xyz]mm{k} */ |
6260 | 2 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x79): /* vpbroadcastw xmm/m16,[xyz]mm{k} */ |
6261 | 2 | host_and_vcpu_must_have(avx512bw); |
6262 | 0 | generate_exception_if(evex.w || evex.brs, X86_EXC_UD); |
6263 | 0 | op_bytes = elem_bytes = 1 << (b & 1); |
6264 | | /* See the comment at the avx512_broadcast label. */ |
6265 | 0 | op_mask |= !(b & 1 ? !(uint32_t)op_mask : !op_mask); |
6266 | 0 | goto avx512f_no_sae; |
6267 | | |
6268 | 1 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x7a): /* vpbroadcastb r32,[xyz]mm{k} */ |
6269 | 2 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x7b): /* vpbroadcastw r32,[xyz]mm{k} */ |
6270 | 2 | host_and_vcpu_must_have(avx512bw); |
6271 | 0 | generate_exception_if(evex.w, X86_EXC_UD); |
6272 | | /* fall through */ |
6273 | 5 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x7c): /* vpbroadcast{d,q} reg,[xyz]mm{k} */ |
6274 | 5 | generate_exception_if((ea.type != OP_REG || evex.brs || |
6275 | 5 | evex.reg != 0xf || !evex.RX), |
6276 | 5 | X86_EXC_UD); |
6277 | 1 | host_and_vcpu_must_have(avx512f); |
6278 | 0 | avx512_vlen_check(false); |
6279 | 0 | get_fpu(X86EMUL_FPU_zmm); |
6280 | | |
6281 | 0 | opc = init_evex(stub); |
6282 | 0 | opc[0] = b; |
6283 | | /* Convert GPR source to %rAX. */ |
6284 | 0 | evex.b = 1; |
6285 | 0 | if ( !mode_64bit() ) |
6286 | 0 | evex.w = 0; |
6287 | 0 | opc[1] = modrm & 0xf8; |
6288 | 0 | insn_bytes = EVEX_PFX_BYTES + 2; |
6289 | 0 | opc[2] = 0xc3; |
6290 | |
|
6291 | 0 | copy_EVEX(opc, evex); |
6292 | 0 | invoke_stub("", "", "=g" (dummy) : "a" (src.val)); |
6293 | |
|
6294 | 0 | put_stub(stub); |
6295 | 0 | ASSERT(!state->simd_size); |
6296 | 0 | break; |
6297 | | |
6298 | 0 | #endif /* !X86EMUL_NO_SIMD */ |
6299 | | |
6300 | 482 | case X86EMUL_OPC_66(0x0f38, 0x82): /* invpcid reg,m128 */ |
6301 | 482 | vcpu_must_have(invpcid); |
6302 | 482 | generate_exception_if(ea.type != OP_MEM, X86_EXC_UD); |
6303 | 481 | generate_exception_if(!mode_ring0(), X86_EXC_GP, 0); |
6304 | | |
6305 | 479 | if ( (rc = ops->read(ea.mem.seg, ea.mem.off, mmvalp, 16, |
6306 | 479 | ctxt)) != X86EMUL_OKAY ) |
6307 | 4 | goto done; |
6308 | | |
6309 | 475 | generate_exception_if(mmvalp->xmm[0] & ~0xfff, X86_EXC_GP, 0); |
6310 | 404 | dst.val = mode_64bit() ? *dst.reg : (uint32_t)*dst.reg; |
6311 | | |
6312 | 404 | switch ( dst.val ) |
6313 | 404 | { |
6314 | 141 | case X86_INVPCID_INDIV_ADDR: |
6315 | 141 | generate_exception_if(!is_canonical_address(mmvalp->xmm[1]), |
6316 | 141 | X86_EXC_GP, 0); |
6317 | | /* fall through */ |
6318 | 247 | case X86_INVPCID_SINGLE_CTXT: |
6319 | 247 | if ( !mode_64bit() || !ops->read_cr ) |
6320 | 153 | cr4 = 0; |
6321 | 94 | else if ( (rc = ops->read_cr(4, &cr4, ctxt)) != X86EMUL_OKAY ) |
6322 | 0 | goto done; |
6323 | 247 | generate_exception_if(!(cr4 & X86_CR4_PCIDE) && mmvalp->xmm[0], |
6324 | 247 | X86_EXC_GP, 0); |
6325 | 236 | break; |
6326 | 236 | case X86_INVPCID_ALL_INCL_GLOBAL: |
6327 | 136 | case X86_INVPCID_ALL_NON_GLOBAL: |
6328 | 136 | break; |
6329 | 4 | default: |
6330 | 4 | generate_exception(X86_EXC_GP, 0); |
6331 | 404 | } |
6332 | | |
6333 | 372 | fail_if(!ops->tlb_op); |
6334 | 371 | if ( (rc = ops->tlb_op(x86emul_invpcid, truncate_ea(mmvalp->xmm[1]), |
6335 | 371 | x86emul_invpcid_aux(mmvalp->xmm[0], dst.val), |
6336 | 371 | ctxt)) != X86EMUL_OKAY ) |
6337 | 1 | goto done; |
6338 | | |
6339 | 370 | state->simd_size = simd_none; |
6340 | 370 | break; |
6341 | | |
6342 | 0 | #ifndef X86EMUL_NO_SIMD |
6343 | | |
6344 | 2 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x83): /* vpmultishiftqb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
6345 | 2 | generate_exception_if(!evex.w, X86_EXC_UD); |
6346 | 1 | host_and_vcpu_must_have(avx512_vbmi); |
6347 | 0 | fault_suppression = false; |
6348 | 0 | goto avx512f_no_sae; |
6349 | | |
6350 | 150 | case X86EMUL_OPC_VEX_66(0x0f38, 0x8c): /* vpmaskmov{d,q} mem,{x,y}mm,{x,y}mm */ |
6351 | 206 | case X86EMUL_OPC_VEX_66(0x0f38, 0x8e): /* vpmaskmov{d,q} {x,y}mm,{x,y}mm,mem */ |
6352 | 206 | generate_exception_if(ea.type != OP_MEM, X86_EXC_UD); |
6353 | 205 | host_and_vcpu_must_have(avx2); |
6354 | 205 | elem_bytes = 4 << vex.w; |
6355 | 205 | goto vmaskmov; |
6356 | | |
6357 | 544 | case X86EMUL_OPC_VEX_66(0x0f38, 0x90): /* vpgatherd{d,q} {x,y}mm,mem,{x,y}mm */ |
6358 | 647 | case X86EMUL_OPC_VEX_66(0x0f38, 0x91): /* vpgatherq{d,q} {x,y}mm,mem,{x,y}mm */ |
6359 | 804 | case X86EMUL_OPC_VEX_66(0x0f38, 0x92): /* vgatherdp{s,d} {x,y}mm,mem,{x,y}mm */ |
6360 | 966 | case X86EMUL_OPC_VEX_66(0x0f38, 0x93): /* vgatherqp{s,d} {x,y}mm,mem,{x,y}mm */ |
6361 | 966 | { |
6362 | 966 | unsigned int mask_reg = ~vex.reg & (mode_64bit() ? 0xf : 7); |
6363 | 966 | typeof(vex) *pvex; |
6364 | 966 | union { |
6365 | 966 | int32_t dw[8]; |
6366 | 966 | int64_t qw[4]; |
6367 | 966 | } index, mask; |
6368 | 966 | bool done = false; |
6369 | | |
6370 | 966 | ASSERT(ea.type == OP_MEM); |
6371 | 966 | generate_exception_if(modrm_reg == state->sib_index || |
6372 | 966 | modrm_reg == mask_reg || |
6373 | 966 | state->sib_index == mask_reg, X86_EXC_UD); |
6374 | 962 | generate_exception_if(!cpu_has_avx, X86_EXC_UD); |
6375 | 962 | vcpu_must_have(avx2); |
6376 | 962 | get_fpu(X86EMUL_FPU_ymm); |
6377 | | |
6378 | | /* Read destination, index, and mask registers. */ |
6379 | 961 | opc = init_prefixes(stub); |
6380 | 961 | pvex = copy_VEX(opc, vex); |
6381 | 961 | pvex->opcx = vex_0f; |
6382 | 961 | opc[0] = 0x7f; /* vmovdqa */ |
6383 | | /* Use (%rax) as destination and modrm_reg as source. */ |
6384 | 961 | pvex->r = !mode_64bit() || !(modrm_reg & 8); |
6385 | 961 | pvex->b = 1; |
6386 | 961 | opc[1] = (modrm_reg & 7) << 3; |
6387 | 961 | pvex->reg = 0xf; |
6388 | 961 | opc[2] = 0xc3; |
6389 | | |
6390 | 961 | invoke_stub("", "", "=m" (*mmvalp) : "a" (mmvalp)); |
6391 | | |
6392 | 961 | pvex->pfx = vex_f3; /* vmovdqu */ |
6393 | | /* Switch to sib_index as source. */ |
6394 | 961 | pvex->r = !mode_64bit() || !(state->sib_index & 8); |
6395 | 961 | opc[1] = (state->sib_index & 7) << 3; |
6396 | | |
6397 | 961 | invoke_stub("", "", "=m" (index) : "a" (&index)); |
6398 | | |
6399 | | /* Switch to mask_reg as source. */ |
6400 | 961 | pvex->r = !mode_64bit() || !(mask_reg & 8); |
6401 | 961 | opc[1] = (mask_reg & 7) << 3; |
6402 | | |
6403 | 961 | invoke_stub("", "", "=m" (mask) : "a" (&mask)); |
6404 | 961 | put_stub(stub); |
6405 | | |
6406 | | /* Clear untouched parts of the destination and mask values. */ |
6407 | 961 | n = 1 << (2 + vex.l - ((b & 1) | vex.w)); |
6408 | 961 | op_bytes = 4 << vex.w; |
6409 | 961 | memset((void *)mmvalp + n * op_bytes, 0, 32 - n * op_bytes); |
6410 | 961 | memset((void *)&mask + n * op_bytes, 0, 32 - n * op_bytes); |
6411 | | |
6412 | 5.76k | for ( i = 0; i < n && rc == X86EMUL_OKAY; ++i ) |
6413 | 4.86k | { |
6414 | 4.86k | if ( (vex.w ? mask.qw[i] : mask.dw[i]) < 0 ) |
6415 | 1.07k | { |
6416 | 1.07k | unsigned long idx = b & 1 ? index.qw[i] : index.dw[i]; |
6417 | | |
6418 | 1.07k | rc = ops->read(ea.mem.seg, |
6419 | 1.07k | truncate_ea(ea.mem.off + |
6420 | 1.07k | (idx << state->sib_scale)), |
6421 | 1.07k | (void *)mmvalp + i * op_bytes, op_bytes, ctxt); |
6422 | 1.07k | if ( rc != X86EMUL_OKAY ) |
6423 | 65 | { |
6424 | | /* |
6425 | | * If we've made any progress and the access did not fault, |
6426 | | * force a retry instead. This is for example necessary to |
6427 | | * cope with the limited capacity of HVM's MMIO cache. |
6428 | | */ |
6429 | 65 | if ( rc != X86EMUL_EXCEPTION && done ) |
6430 | 0 | rc = X86EMUL_RETRY; |
6431 | 65 | break; |
6432 | 65 | } |
6433 | | |
6434 | | #ifdef __XEN__ |
6435 | | if ( i + 1 < n && local_events_need_delivery() ) |
6436 | | rc = X86EMUL_RETRY; |
6437 | | #endif |
6438 | | |
6439 | 1.01k | done = true; |
6440 | 1.01k | } |
6441 | | |
6442 | 4.80k | if ( vex.w ) |
6443 | 470 | mask.qw[i] = 0; |
6444 | 4.33k | else |
6445 | 4.33k | mask.dw[i] = 0; |
6446 | 4.80k | } |
6447 | | |
6448 | | /* Write destination and mask registers. */ |
6449 | 961 | opc = init_prefixes(stub); |
6450 | 961 | pvex = copy_VEX(opc, vex); |
6451 | 961 | pvex->opcx = vex_0f; |
6452 | 961 | opc[0] = 0x6f; /* vmovdqa */ |
6453 | | /* Use modrm_reg as destination and (%rax) as source. */ |
6454 | 961 | pvex->r = !mode_64bit() || !(modrm_reg & 8); |
6455 | 961 | pvex->b = 1; |
6456 | 961 | opc[1] = (modrm_reg & 7) << 3; |
6457 | 961 | pvex->reg = 0xf; |
6458 | 961 | opc[2] = 0xc3; |
6459 | | |
6460 | 961 | invoke_stub("", "", "+m" (*mmvalp) : "a" (mmvalp)); |
6461 | | |
6462 | 961 | pvex->pfx = vex_f3; /* vmovdqu */ |
6463 | | /* Switch to mask_reg as destination. */ |
6464 | 961 | pvex->r = !mode_64bit() || !(mask_reg & 8); |
6465 | 961 | opc[1] = (mask_reg & 7) << 3; |
6466 | | |
6467 | 961 | invoke_stub("", "", "+m" (mask) : "a" (&mask)); |
6468 | 961 | put_stub(stub); |
6469 | | |
6470 | 961 | if ( rc != X86EMUL_OKAY ) |
6471 | 65 | goto done; |
6472 | | |
6473 | 896 | state->simd_size = simd_none; |
6474 | 896 | break; |
6475 | 961 | } |
6476 | | |
6477 | 4 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x90): /* vpgatherd{d,q} mem,[xyz]mm{k} */ |
6478 | 5 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x91): /* vpgatherq{d,q} mem,[xyz]mm{k} */ |
6479 | 9 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x92): /* vgatherdp{s,d} mem,[xyz]mm{k} */ |
6480 | 10 | case X86EMUL_OPC_EVEX_66(0x0f38, 0x93): /* vgatherqp{s,d} mem,[xyz]mm{k} */ |
6481 | 10 | { |
6482 | 10 | typeof(evex) *pevex; |
6483 | 10 | union { |
6484 | 10 | int32_t dw[16]; |
6485 | 10 | int64_t qw[8]; |
6486 | 10 | } index; |
6487 | 10 | bool done = false; |
6488 | | |
6489 | 10 | ASSERT(ea.type == OP_MEM); |
6490 | 10 | generate_exception_if((!evex.opmsk || evex.brs || evex.z || |
6491 | 10 | evex.reg != 0xf || |
6492 | 10 | modrm_reg == state->sib_index), |
6493 | 10 | X86_EXC_UD); |
6494 | 4 | avx512_vlen_check(false); |
6495 | 1 | host_and_vcpu_must_have(avx512f); |
6496 | 0 | get_fpu(X86EMUL_FPU_zmm); |
6497 | | |
6498 | | /* Read destination and index registers. */ |
6499 | 0 | opc = init_evex(stub); |
6500 | 0 | pevex = copy_EVEX(opc, evex); |
6501 | 0 | pevex->opcx = vex_0f; |
6502 | 0 | opc[0] = 0x7f; /* vmovdqa{32,64} */ |
6503 | | /* |
6504 | | * The register writeback below has to retain masked-off elements, but |
6505 | | * needs to clear upper portions in the index-wider-than-data cases. |
6506 | | * Therefore read (and write below) the full register. The alternative |
6507 | | * would have been to fiddle with the mask register used. |
6508 | | */ |
6509 | 0 | pevex->opmsk = 0; |
6510 | | /* Use (%rax) as destination and modrm_reg as source. */ |
6511 | 0 | pevex->b = 1; |
6512 | 0 | opc[1] = (modrm_reg & 7) << 3; |
6513 | 0 | pevex->RX = 1; |
6514 | 0 | opc[2] = 0xc3; |
6515 | |
|
6516 | 0 | invoke_stub("", "", "=m" (*mmvalp) : "a" (mmvalp)); |
6517 | |
|
6518 | 0 | pevex->pfx = vex_f3; /* vmovdqu{32,64} */ |
6519 | 0 | pevex->w = b & 1; |
6520 | | /* Switch to sib_index as source. */ |
6521 | 0 | pevex->r = !mode_64bit() || !(state->sib_index & 0x08); |
6522 | 0 | pevex->R = !mode_64bit() || !(state->sib_index & 0x10); |
6523 | 0 | opc[1] = (state->sib_index & 7) << 3; |
6524 | |
|
6525 | 0 | invoke_stub("", "", "=m" (index) : "a" (&index)); |
6526 | 0 | put_stub(stub); |
6527 | | |
6528 | | /* Clear untouched parts of the destination and mask values. */ |
6529 | 0 | n = 1 << (2 + evex.lr - ((b & 1) | evex.w)); |
6530 | 0 | op_bytes = 4 << evex.w; |
6531 | 0 | memset((void *)mmvalp + n * op_bytes, 0, 64 - n * op_bytes); |
6532 | 0 | op_mask &= (1 << n) - 1; |
6533 | |
|
6534 | 0 | for ( i = 0; op_mask; ++i ) |
6535 | 0 | { |
6536 | 0 | unsigned long idx = b & 1 ? index.qw[i] : index.dw[i]; |
6537 | |
|
6538 | 0 | if ( !(op_mask & (1 << i)) ) |
6539 | 0 | continue; |
6540 | | |
6541 | 0 | rc = ops->read(ea.mem.seg, |
6542 | 0 | truncate_ea(ea.mem.off + |
6543 | 0 | (idx << state->sib_scale)), |
6544 | 0 | (void *)mmvalp + i * op_bytes, op_bytes, ctxt); |
6545 | 0 | if ( rc != X86EMUL_OKAY ) |
6546 | 0 | { |
6547 | | /* |
6548 | | * If we've made some progress and the access did not fault, |
6549 | | * force a retry instead. This is for example necessary to |
6550 | | * cope with the limited capacity of HVM's MMIO cache. |
6551 | | */ |
6552 | 0 | if ( rc != X86EMUL_EXCEPTION && done ) |
6553 | 0 | rc = X86EMUL_RETRY; |
6554 | 0 | break; |
6555 | 0 | } |
6556 | | |
6557 | 0 | op_mask &= ~(1 << i); |
6558 | 0 | done = true; |
6559 | |
|
6560 | | #ifdef __XEN__ |
6561 | | if ( op_mask && local_events_need_delivery() ) |
6562 | | { |
6563 | | rc = X86EMUL_RETRY; |
6564 | | break; |
6565 | | } |
6566 | | #endif |
6567 | 0 | } |
6568 | | |
6569 | | /* Write destination and mask registers. */ |
6570 | 0 | opc = init_evex(stub); |
6571 | 0 | pevex = copy_EVEX(opc, evex); |
6572 | 0 | pevex->opcx = vex_0f; |
6573 | 0 | opc[0] = 0x6f; /* vmovdqa{32,64} */ |
6574 | 0 | pevex->opmsk = 0; |
6575 | | /* Use modrm_reg as destination and (%rax) as source. */ |
6576 | 0 | pevex->b = 1; |
6577 | 0 | opc[1] = (modrm_reg & 7) << 3; |
6578 | 0 | pevex->RX = 1; |
6579 | 0 | opc[2] = 0xc3; |
6580 | |
|
6581 | 0 | invoke_stub("", "", "+m" (*mmvalp) : "a" (mmvalp)); |
6582 | | |
6583 | | /* |
6584 | | * kmovw: This is VEX-encoded, so we can't use pevex. Avoid copy_VEX() etc |
6585 | | * as well, since we can easily use the 2-byte VEX form here. |
6586 | | */ |
6587 | 0 | opc -= EVEX_PFX_BYTES; |
6588 | 0 | opc[0] = 0xc5; |
6589 | 0 | opc[1] = 0xf8; |
6590 | 0 | opc[2] = 0x90; |
6591 | | /* Use (%rax) as source. */ |
6592 | 0 | opc[3] = evex.opmsk << 3; |
6593 | 0 | opc[4] = 0xc3; |
6594 | |
|
6595 | 0 | invoke_stub("", "", "+m" (op_mask) : "a" (&op_mask)); |
6596 | 0 | put_stub(stub); |
6597 | |
|
6598 | 0 | if ( rc != X86EMUL_OKAY ) |
6599 | 0 | goto done; |
6600 | | |
6601 | 0 | state->simd_size = simd_none; |
6602 | 0 | break; |
6603 | 0 | } |
6604 | | |
6605 | 66 | case X86EMUL_OPC_VEX_66(0x0f38, 0x96): /* vfmaddsub132p{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
6606 | 132 | case X86EMUL_OPC_VEX_66(0x0f38, 0x97): /* vfmsubadd132p{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
6607 | 182 | case X86EMUL_OPC_VEX_66(0x0f38, 0x98): /* vfmadd132p{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
6608 | 216 | case X86EMUL_OPC_VEX_66(0x0f38, 0x99): /* vfmadd132s{s,d} xmm/mem,xmm,xmm */ |
6609 | 282 | case X86EMUL_OPC_VEX_66(0x0f38, 0x9a): /* vfmsub132p{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
6610 | 348 | case X86EMUL_OPC_VEX_66(0x0f38, 0x9b): /* vfmsub132s{s,d} xmm/mem,xmm,xmm */ |
6611 | 400 | case X86EMUL_OPC_VEX_66(0x0f38, 0x9c): /* vfnmadd132p{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
6612 | 464 | case X86EMUL_OPC_VEX_66(0x0f38, 0x9d): /* vfnmadd132s{s,d} xmm/mem,xmm,xmm */ |
6613 | 530 | case X86EMUL_OPC_VEX_66(0x0f38, 0x9e): /* vfnmsub132p{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
6614 | 596 | case X86EMUL_OPC_VEX_66(0x0f38, 0x9f): /* vfnmsub132s{s,d} xmm/mem,xmm,xmm */ |
6615 | 662 | case X86EMUL_OPC_VEX_66(0x0f38, 0xa6): /* vfmaddsub213p{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
6616 | 696 | case X86EMUL_OPC_VEX_66(0x0f38, 0xa7): /* vfmsubadd213p{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
6617 | 762 | case X86EMUL_OPC_VEX_66(0x0f38, 0xa8): /* vfmadd213p{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
6618 | 827 | case X86EMUL_OPC_VEX_66(0x0f38, 0xa9): /* vfmadd213s{s,d} xmm/mem,xmm,xmm */ |
6619 | 893 | case X86EMUL_OPC_VEX_66(0x0f38, 0xaa): /* vfmsub213p{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
6620 | 959 | case X86EMUL_OPC_VEX_66(0x0f38, 0xab): /* vfmsub213s{s,d} xmm/mem,xmm,xmm */ |
6621 | 1.02k | case X86EMUL_OPC_VEX_66(0x0f38, 0xac): /* vfnmadd213p{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
6622 | 1.32k | case X86EMUL_OPC_VEX_66(0x0f38, 0xad): /* vfnmadd213s{s,d} xmm/mem,xmm,xmm */ |
6623 | 1.40k | case X86EMUL_OPC_VEX_66(0x0f38, 0xae): /* vfnmsub213p{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
6624 | 1.59k | case X86EMUL_OPC_VEX_66(0x0f38, 0xaf): /* vfnmsub213s{s,d} xmm/mem,xmm,xmm */ |
6625 | 1.66k | case X86EMUL_OPC_VEX_66(0x0f38, 0xb6): /* vfmaddsub231p{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
6626 | 1.71k | case X86EMUL_OPC_VEX_66(0x0f38, 0xb7): /* vfmsubadd231p{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
6627 | 1.78k | case X86EMUL_OPC_VEX_66(0x0f38, 0xb8): /* vfmadd231p{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
6628 | 1.85k | case X86EMUL_OPC_VEX_66(0x0f38, 0xb9): /* vfmadd231s{s,d} xmm/mem,xmm,xmm */ |
6629 | 1.91k | case X86EMUL_OPC_VEX_66(0x0f38, 0xba): /* vfmsub231p{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
6630 | 1.98k | case X86EMUL_OPC_VEX_66(0x0f38, 0xbb): /* vfmsub231s{s,d} xmm/mem,xmm,xmm */ |
6631 | 2.04k | case X86EMUL_OPC_VEX_66(0x0f38, 0xbc): /* vfnmadd231p{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
6632 | 2.11k | case X86EMUL_OPC_VEX_66(0x0f38, 0xbd): /* vfnmadd231s{s,d} xmm/mem,xmm,xmm */ |
6633 | 2.18k | case X86EMUL_OPC_VEX_66(0x0f38, 0xbe): /* vfnmsub231p{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
6634 | 2.21k | case X86EMUL_OPC_VEX_66(0x0f38, 0xbf): /* vfnmsub231s{s,d} xmm/mem,xmm,xmm */ |
6635 | 2.21k | host_and_vcpu_must_have(fma); |
6636 | 2.21k | goto simd_0f_ymm; |
6637 | | |
6638 | 2.21k | case X86EMUL_OPC_EVEX_66(0x0f38, 0xa0): /* vpscatterd{d,q} [xyz]mm,mem{k} */ |
6639 | 6 | case X86EMUL_OPC_EVEX_66(0x0f38, 0xa1): /* vpscatterq{d,q} [xyz]mm,mem{k} */ |
6640 | 7 | case X86EMUL_OPC_EVEX_66(0x0f38, 0xa2): /* vscatterdp{s,d} [xyz]mm,mem{k} */ |
6641 | 10 | case X86EMUL_OPC_EVEX_66(0x0f38, 0xa3): /* vscatterqp{s,d} [xyz]mm,mem{k} */ |
6642 | 10 | { |
6643 | 10 | typeof(evex) *pevex; |
6644 | 10 | union { |
6645 | 10 | int32_t dw[16]; |
6646 | 10 | int64_t qw[8]; |
6647 | 10 | } index; |
6648 | 10 | bool done = false; |
6649 | | |
6650 | 10 | ASSERT(ea.type == OP_MEM); |
6651 | 10 | fail_if(!ops->write); |
6652 | 9 | generate_exception_if((!evex.opmsk || evex.brs || evex.z || |
6653 | 9 | evex.reg != 0xf || |
6654 | 9 | modrm_reg == state->sib_index), |
6655 | 9 | X86_EXC_UD); |
6656 | 5 | avx512_vlen_check(false); |
6657 | 1 | host_and_vcpu_must_have(avx512f); |
6658 | 0 | get_fpu(X86EMUL_FPU_zmm); |
6659 | | |
6660 | | /* Read source and index registers. */ |
6661 | 0 | opc = init_evex(stub); |
6662 | 0 | pevex = copy_EVEX(opc, evex); |
6663 | 0 | pevex->opcx = vex_0f; |
6664 | 0 | opc[0] = 0x7f; /* vmovdqa{32,64} */ |
6665 | | /* Use (%rax) as destination and modrm_reg as source. */ |
6666 | 0 | pevex->b = 1; |
6667 | 0 | opc[1] = (modrm_reg & 7) << 3; |
6668 | 0 | pevex->RX = 1; |
6669 | 0 | opc[2] = 0xc3; |
6670 | |
|
6671 | 0 | invoke_stub("", "", "=m" (*mmvalp) : "a" (mmvalp)); |
6672 | |
|
6673 | 0 | pevex->pfx = vex_f3; /* vmovdqu{32,64} */ |
6674 | 0 | pevex->w = b & 1; |
6675 | | /* Switch to sib_index as source. */ |
6676 | 0 | pevex->r = !mode_64bit() || !(state->sib_index & 0x08); |
6677 | 0 | pevex->R = !mode_64bit() || !(state->sib_index & 0x10); |
6678 | 0 | opc[1] = (state->sib_index & 7) << 3; |
6679 | |
|
6680 | 0 | invoke_stub("", "", "=m" (index) : "a" (&index)); |
6681 | 0 | put_stub(stub); |
6682 | | |
6683 | | /* Clear untouched parts of the mask value. */ |
6684 | 0 | n = 1 << (2 + evex.lr - ((b & 1) | evex.w)); |
6685 | 0 | op_bytes = 4 << evex.w; |
6686 | 0 | op_mask &= (1 << n) - 1; |
6687 | |
|
6688 | 0 | for ( i = 0; op_mask; ++i ) |
6689 | 0 | { |
6690 | 0 | unsigned long idx = b & 1 ? index.qw[i] : index.dw[i]; |
6691 | 0 | unsigned long offs = truncate_ea(ea.mem.off + |
6692 | 0 | (idx << state->sib_scale)); |
6693 | 0 | unsigned int j, slot; |
6694 | |
|
6695 | 0 | if ( !(op_mask & (1 << i)) ) |
6696 | 0 | continue; |
6697 | | |
6698 | | /* |
6699 | | * hvmemul_linear_mmio_access() will find a cache slot based on |
6700 | | * linear address. hvmemul_phys_mmio_access() will crash the |
6701 | | * domain if observing varying data getting written to the same |
6702 | | * cache slot. Utilize that squashing earlier writes to fully |
6703 | | * overlapping addresses is permitted by the spec. We can't, |
6704 | | * however, drop the writes altogether, to maintain correct |
6705 | | * faulting behavior. Instead write the data from the last of |
6706 | | * the fully overlapping slots multiple times. |
6707 | | */ |
6708 | 0 | for ( j = (slot = i) + 1; j < n; ++j ) |
6709 | 0 | { |
6710 | 0 | idx = b & 1 ? index.qw[j] : index.dw[j]; |
6711 | 0 | if ( (op_mask & (1 << j)) && |
6712 | 0 | truncate_ea(ea.mem.off + |
6713 | 0 | (idx << state->sib_scale)) == offs ) |
6714 | 0 | slot = j; |
6715 | 0 | } |
6716 | |
|
6717 | 0 | rc = ops->write(ea.mem.seg, offs, |
6718 | 0 | (void *)mmvalp + slot * op_bytes, op_bytes, ctxt); |
6719 | 0 | if ( rc != X86EMUL_OKAY ) |
6720 | 0 | { |
6721 | | /* See comment in gather emulation. */ |
6722 | 0 | if ( rc != X86EMUL_EXCEPTION && done ) |
6723 | 0 | rc = X86EMUL_RETRY; |
6724 | 0 | break; |
6725 | 0 | } |
6726 | | |
6727 | 0 | op_mask &= ~(1 << i); |
6728 | 0 | done = true; |
6729 | |
|
6730 | | #ifdef __XEN__ |
6731 | | if ( op_mask && local_events_need_delivery() ) |
6732 | | { |
6733 | | rc = X86EMUL_RETRY; |
6734 | | break; |
6735 | | } |
6736 | | #endif |
6737 | 0 | } |
6738 | | |
6739 | | /* Write mask register. See comment in gather emulation. */ |
6740 | 0 | opc = get_stub(stub); |
6741 | 0 | opc[0] = 0xc5; |
6742 | 0 | opc[1] = 0xf8; |
6743 | 0 | opc[2] = 0x90; |
6744 | | /* Use (%rax) as source. */ |
6745 | 0 | opc[3] = evex.opmsk << 3; |
6746 | 0 | opc[4] = 0xc3; |
6747 | |
|
6748 | 0 | invoke_stub("", "", "+m" (op_mask) : "a" (&op_mask)); |
6749 | 0 | put_stub(stub); |
6750 | |
|
6751 | 0 | if ( rc != X86EMUL_OKAY ) |
6752 | 0 | goto done; |
6753 | | |
6754 | 0 | state->simd_size = simd_none; |
6755 | 0 | break; |
6756 | 0 | } |
6757 | | |
6758 | 1 | case X86EMUL_OPC_VEX_66(0x0f38, 0xb1): /* vbcstnesh2ps mem,[xy]mm */ |
6759 | 2 | case X86EMUL_OPC_VEX_F3(0x0f38, 0xb1): /* vbcstnebf162ps mem,[xy]mm */ |
6760 | 2 | host_and_vcpu_must_have(avx_ne_convert); |
6761 | 0 | generate_exception_if(vex.w || ea.type != OP_MEM, X86_EXC_UD); |
6762 | 0 | op_bytes = 2; |
6763 | 0 | goto simd_0f_ymm; |
6764 | | |
6765 | 1 | case X86EMUL_OPC_VEX_66(0x0f38, 0xb4): /* vpmadd52luq [xy]mm/mem,[xy]mm,[xy]mm */ |
6766 | 2 | case X86EMUL_OPC_VEX_66(0x0f38, 0xb5): /* vpmadd52huq [xy]mm/mem,[xy]mm,[xy]mm */ |
6767 | 2 | host_and_vcpu_must_have(avx_ifma); |
6768 | 0 | generate_exception_if(!vex.w, X86_EXC_UD); |
6769 | 0 | goto simd_0f_ymm; |
6770 | | |
6771 | 1 | case X86EMUL_OPC_EVEX_66(0x0f38, 0xb4): /* vpmadd52luq [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
6772 | 2 | case X86EMUL_OPC_EVEX_66(0x0f38, 0xb5): /* vpmadd52huq [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
6773 | 2 | host_and_vcpu_must_have(avx512_ifma); |
6774 | 0 | generate_exception_if(!evex.w, X86_EXC_UD); |
6775 | 0 | goto avx512f_no_sae; |
6776 | | |
6777 | 1 | case X86EMUL_OPC(0x0f38, 0xc8): /* sha1nexte xmm/m128,xmm */ |
6778 | 2 | case X86EMUL_OPC(0x0f38, 0xc9): /* sha1msg1 xmm/m128,xmm */ |
6779 | 3 | case X86EMUL_OPC(0x0f38, 0xca): /* sha1msg2 xmm/m128,xmm */ |
6780 | 4 | case X86EMUL_OPC(0x0f38, 0xcb): /* sha256rnds2 XMM0,xmm/m128,xmm */ |
6781 | 5 | case X86EMUL_OPC(0x0f38, 0xcc): /* sha256msg1 xmm/m128,xmm */ |
6782 | 6 | case X86EMUL_OPC(0x0f38, 0xcd): /* sha256msg2 xmm/m128,xmm */ |
6783 | 6 | host_and_vcpu_must_have(sha); |
6784 | 6 | op_bytes = 16; |
6785 | 6 | goto simd_0f38_common; |
6786 | | |
6787 | 1 | case X86EMUL_OPC_VEX_F2(0x0f38, 0xcb): /* vsha512rnds2 xmm,ymm,ymm */ |
6788 | 2 | case X86EMUL_OPC_VEX_F2(0x0f38, 0xcc): /* vsha512msg1 xmm,ymm */ |
6789 | 3 | case X86EMUL_OPC_VEX_F2(0x0f38, 0xcd): /* vsha512msg2 ymm,ymm */ |
6790 | 3 | host_and_vcpu_must_have(sha512); |
6791 | 0 | generate_exception_if(ea.type != OP_REG || vex.w || !vex.l, X86_EXC_UD); |
6792 | 0 | op_bytes = 32; |
6793 | 0 | goto simd_0f_ymm; |
6794 | | |
6795 | 1 | case X86EMUL_OPC_66(0x0f38, 0xcf): /* gf2p8mulb xmm/m128,xmm */ |
6796 | 1 | host_and_vcpu_must_have(gfni); |
6797 | 0 | goto simd_0f38_common; |
6798 | | |
6799 | 1 | case X86EMUL_OPC_VEX_66(0x0f38, 0xcf): /* vgf2p8mulb {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
6800 | 1 | host_and_vcpu_must_have(gfni); |
6801 | 0 | generate_exception_if(vex.w, X86_EXC_UD); |
6802 | 0 | goto simd_0f_avx; |
6803 | | |
6804 | 1 | case X86EMUL_OPC_EVEX_66(0x0f38, 0xcf): /* vgf2p8mulb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
6805 | 1 | host_and_vcpu_must_have(gfni); |
6806 | 0 | generate_exception_if(evex.w || evex.brs, X86_EXC_UD); |
6807 | 0 | elem_bytes = 1; |
6808 | 0 | goto avx512f_no_sae; |
6809 | | |
6810 | 1 | case X86EMUL_OPC_VEX (0x0f38, 0xd2): /* vpdpwuud [xy]mm/mem,[xy]mm,[xy]mm */ |
6811 | 2 | case X86EMUL_OPC_VEX_66(0x0f38, 0xd2): /* vpdpwusd [xy]mm/mem,[xy]mm,[xy]mm */ |
6812 | 3 | case X86EMUL_OPC_VEX_F3(0x0f38, 0xd2): /* vpdpwsud [xy]mm/mem,[xy]mm,[xy]mm */ |
6813 | 4 | case X86EMUL_OPC_VEX (0x0f38, 0xd3): /* vpdpwuuds [xy]mm/mem,[xy]mm,[xy]mm */ |
6814 | 5 | case X86EMUL_OPC_VEX_66(0x0f38, 0xd3): /* vpdpwusds [xy]mm/mem,[xy]mm,[xy]mm */ |
6815 | 6 | case X86EMUL_OPC_VEX_F3(0x0f38, 0xd3): /* vpdpwsuds [xy]mm/mem,[xy]mm,[xy]mm */ |
6816 | 6 | host_and_vcpu_must_have(avx_vnni_int16); |
6817 | 0 | generate_exception_if(vex.w, X86_EXC_UD); |
6818 | 0 | op_bytes = 16 << vex.l; |
6819 | 0 | goto simd_0f_ymm; |
6820 | | |
6821 | 7 | case X86EMUL_OPC_VEX (0x0f38, 0xda): /* vsm3msg1 xmm/mem,xmm,xmm */ |
6822 | 12 | case X86EMUL_OPC_VEX_66(0x0f38, 0xda): /* vsm3msg2 xmm/mem,xmm,xmm */ |
6823 | 12 | generate_exception_if(vex.w || vex.l, X86_EXC_UD); |
6824 | 5 | host_and_vcpu_must_have(sm3); |
6825 | 0 | goto simd_0f_ymm; |
6826 | | |
6827 | 1 | case X86EMUL_OPC_VEX_F3(0x0f38, 0xda): /* vsm4key4 [xy]mm/mem,[xy]mm,[xy]mm */ |
6828 | 2 | case X86EMUL_OPC_VEX_F2(0x0f38, 0xda): /* vsm4rnds4 [xy]mm/mem,[xy]mm,[xy]mm */ |
6829 | 2 | host_and_vcpu_must_have(sm4); |
6830 | 0 | generate_exception_if(vex.w, X86_EXC_UD); |
6831 | 0 | op_bytes = 16 << vex.l; |
6832 | 0 | goto simd_0f_ymm; |
6833 | | |
6834 | 66 | case X86EMUL_OPC_VEX_66(0x0f38, 0xdc): /* vaesenc {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
6835 | 132 | case X86EMUL_OPC_VEX_66(0x0f38, 0xdd): /* vaesenclast {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
6836 | 200 | case X86EMUL_OPC_VEX_66(0x0f38, 0xde): /* vaesdec {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
6837 | 267 | case X86EMUL_OPC_VEX_66(0x0f38, 0xdf): /* vaesdeclast {x,y}mm/mem,{x,y}mm,{x,y}mm */ |
6838 | 267 | if ( !vex.l ) |
6839 | 267 | host_and_vcpu_must_have(aesni); |
6840 | 2 | else |
6841 | 267 | host_and_vcpu_must_have(vaes); |
6842 | 265 | goto simd_0f_avx; |
6843 | | |
6844 | 265 | case X86EMUL_OPC_EVEX_66(0x0f38, 0xdc): /* vaesenc [xyz]mm/mem,[xyz]mm,[xyz]mm */ |
6845 | 2 | case X86EMUL_OPC_EVEX_66(0x0f38, 0xdd): /* vaesenclast [xyz]mm/mem,[xyz]mm,[xyz]mm */ |
6846 | 3 | case X86EMUL_OPC_EVEX_66(0x0f38, 0xde): /* vaesdec [xyz]mm/mem,[xyz]mm,[xyz]mm */ |
6847 | 4 | case X86EMUL_OPC_EVEX_66(0x0f38, 0xdf): /* vaesdeclast [xyz]mm/mem,[xyz]mm,[xyz]mm */ |
6848 | 4 | host_and_vcpu_must_have(vaes); |
6849 | 0 | generate_exception_if(evex.brs || evex.opmsk, X86_EXC_UD); |
6850 | 0 | goto avx512f_no_sae; |
6851 | | |
6852 | 0 | #endif /* !X86EMUL_NO_SIMD */ |
6853 | | |
6854 | 16 | case X86EMUL_OPC_VEX_66(0x0f38, 0xe0) ... |
6855 | 16 | X86EMUL_OPC_VEX_66(0x0f38, 0xef): /* cmp<cc>xadd r,r,m */ |
6856 | 16 | generate_exception_if(!mode_64bit() || dst.type != OP_MEM || vex.l, |
6857 | 16 | X86_EXC_UD); |
6858 | 1 | host_and_vcpu_must_have(cmpccxadd); |
6859 | 0 | fail_if(!ops->rmw); |
6860 | 0 | state->rmw = rmw_cmpccxadd; |
6861 | 0 | break; |
6862 | | |
6863 | 370 | case X86EMUL_OPC(0x0f38, 0xf0): /* movbe m,r */ |
6864 | 640 | case X86EMUL_OPC(0x0f38, 0xf1): /* movbe r,m */ |
6865 | 640 | generate_exception_if(ea.type != OP_MEM, X86_EXC_UD); |
6866 | 637 | vcpu_must_have(movbe); |
6867 | 637 | switch ( op_bytes ) |
6868 | 637 | { |
6869 | 235 | case 2: |
6870 | 235 | asm ( "xchg %h0,%b0" : "=Q" (dst.val) |
6871 | 235 | : "0" (*(uint32_t *)&src.val) ); |
6872 | 235 | break; |
6873 | 206 | case 4: |
6874 | 206 | #ifdef __x86_64__ |
6875 | 206 | asm ( "bswap %k0" : "=r" (dst.val) |
6876 | 206 | : "0" (*(uint32_t *)&src.val) ); |
6877 | 206 | break; |
6878 | 196 | case 8: |
6879 | 196 | #endif |
6880 | 196 | asm ( "bswap %0" : "=r" (dst.val) : "0" (src.val) ); |
6881 | 196 | break; |
6882 | 0 | default: |
6883 | 0 | ASSERT_UNREACHABLE(); |
6884 | 0 | goto unhandleable; |
6885 | 637 | } |
6886 | 637 | break; |
6887 | | |
6888 | 637 | case X86EMUL_OPC_F2(0x0f38, 0xf0): /* crc32 r/m8, r{32,64} */ |
6889 | 332 | case X86EMUL_OPC_F2(0x0f38, 0xf1): /* crc32 r/m{16,32,64}, r{32,64} */ |
6890 | 332 | host_and_vcpu_must_have(sse4_2); |
6891 | 332 | dst.bytes = rex_prefix & REX_W ? 8 : 4; |
6892 | 332 | switch ( op_bytes ) |
6893 | 332 | { |
6894 | 0 | case 1: |
6895 | 0 | asm ( "crc32b %1,%k0" : "+r" (dst.val) |
6896 | 0 | : "qm" (*(uint8_t *)&src.val) ); |
6897 | 0 | break; |
6898 | 68 | case 2: |
6899 | 68 | asm ( "crc32w %1,%k0" : "+r" (dst.val) |
6900 | 68 | : "rm" (*(uint16_t *)&src.val) ); |
6901 | 68 | break; |
6902 | 191 | case 4: |
6903 | 191 | asm ( "crc32l %1,%k0" : "+r" (dst.val) |
6904 | 191 | : "rm" (*(uint32_t *)&src.val) ); |
6905 | 191 | break; |
6906 | 0 | #ifdef __x86_64__ |
6907 | 73 | case 8: |
6908 | 73 | asm ( "crc32q %1,%0" : "+r" (dst.val) : "rm" (src.val) ); |
6909 | 73 | break; |
6910 | 0 | #endif |
6911 | 0 | default: |
6912 | 0 | ASSERT_UNREACHABLE(); |
6913 | 0 | goto unhandleable; |
6914 | 332 | } |
6915 | 332 | break; |
6916 | | |
6917 | 332 | case X86EMUL_OPC_VEX(0x0f38, 0xf2): /* andn r/m,r,r */ |
6918 | 260 | case X86EMUL_OPC_VEX(0x0f38, 0xf5): /* bzhi r,r/m,r */ |
6919 | 328 | case X86EMUL_OPC_VEX_F3(0x0f38, 0xf5): /* pext r/m,r,r */ |
6920 | 395 | case X86EMUL_OPC_VEX_F2(0x0f38, 0xf5): /* pdep r/m,r,r */ |
6921 | 462 | case X86EMUL_OPC_VEX(0x0f38, 0xf7): /* bextr r,r/m,r */ |
6922 | 533 | case X86EMUL_OPC_VEX_66(0x0f38, 0xf7): /* shlx r,r/m,r */ |
6923 | 598 | case X86EMUL_OPC_VEX_F3(0x0f38, 0xf7): /* sarx r,r/m,r */ |
6924 | 633 | case X86EMUL_OPC_VEX_F2(0x0f38, 0xf7): /* shrx r,r/m,r */ |
6925 | 633 | { |
6926 | 633 | uint8_t *buf = get_stub(stub); |
6927 | 633 | typeof(vex) *pvex = container_of(buf + 1, typeof(vex), raw[0]); |
6928 | | |
6929 | 633 | if ( b == 0xf5 || vex.pfx ) |
6930 | 633 | host_and_vcpu_must_have(bmi2); |
6931 | 134 | else |
6932 | 633 | host_and_vcpu_must_have(bmi1); |
6933 | 633 | generate_exception_if(vex.l, X86_EXC_UD); |
6934 | | |
6935 | 629 | buf[0] = 0xc4; |
6936 | 629 | *pvex = vex; |
6937 | 629 | pvex->b = 1; |
6938 | 629 | pvex->r = 1; |
6939 | 629 | if ( !mode_64bit() ) |
6940 | 218 | pvex->w = 0; |
6941 | 629 | pvex->reg = 0xf; /* rAX */ |
6942 | 629 | buf[3] = b; |
6943 | 629 | buf[4] = 0x09; /* reg=rCX r/m=(%rCX) */ |
6944 | 629 | buf[5] = 0xc3; |
6945 | | |
6946 | 629 | src.reg = decode_vex_gpr(vex.reg, &_regs, ctxt); |
6947 | 629 | emulate_stub([dst] "=&c" (dst.val), "[dst]" (&src.val), "a" (*src.reg)); |
6948 | | |
6949 | 629 | put_stub(stub); |
6950 | 629 | break; |
6951 | 633 | } |
6952 | | |
6953 | 134 | case X86EMUL_OPC_VEX(0x0f38, 0xf3): /* Grp 17 */ |
6954 | 134 | { |
6955 | 134 | uint8_t *buf = get_stub(stub); |
6956 | 134 | typeof(vex) *pvex = container_of(buf + 1, typeof(vex), raw[0]); |
6957 | | |
6958 | 134 | switch ( modrm_reg & 7 ) |
6959 | 134 | { |
6960 | 125 | case 1: /* blsr r,r/m */ |
6961 | 132 | case 2: /* blsmsk r,r/m */ |
6962 | 133 | case 3: /* blsi r,r/m */ |
6963 | 133 | host_and_vcpu_must_have(bmi1); |
6964 | 133 | break; |
6965 | 133 | default: |
6966 | 1 | goto unrecognized_insn; |
6967 | 134 | } |
6968 | | |
6969 | 133 | generate_exception_if(vex.l, X86_EXC_UD); |
6970 | | |
6971 | 132 | buf[0] = 0xc4; |
6972 | 132 | *pvex = vex; |
6973 | 132 | pvex->b = 1; |
6974 | 132 | pvex->r = 1; |
6975 | 132 | if ( !mode_64bit() ) |
6976 | 66 | pvex->w = 0; |
6977 | 132 | pvex->reg = 0xf; /* rAX */ |
6978 | 132 | buf[3] = b; |
6979 | 132 | buf[4] = (modrm & 0x38) | 0x01; /* r/m=(%rCX) */ |
6980 | 132 | buf[5] = 0xc3; |
6981 | | |
6982 | 132 | dst.reg = decode_vex_gpr(vex.reg, &_regs, ctxt); |
6983 | 132 | emulate_stub("=&a" (dst.val), "c" (&src.val)); |
6984 | | |
6985 | 132 | put_stub(stub); |
6986 | 132 | break; |
6987 | 133 | } |
6988 | | |
6989 | 429 | case X86EMUL_OPC_66(0x0f38, 0xf6): /* adcx r/m,r */ |
6990 | 915 | case X86EMUL_OPC_F3(0x0f38, 0xf6): /* adox r/m,r */ |
6991 | 915 | { |
6992 | 915 | unsigned int mask = rep_prefix() ? X86_EFLAGS_OF : X86_EFLAGS_CF; |
6993 | 915 | unsigned int aux = _regs.eflags & mask ? ~0 : 0; |
6994 | 915 | bool carry; |
6995 | | |
6996 | 915 | vcpu_must_have(adx); |
6997 | 915 | #ifdef __x86_64__ |
6998 | 915 | if ( op_bytes == 8 ) |
6999 | 194 | asm ( "add %[aux],%[aux]\n\t" |
7000 | 194 | "adc %[src],%[dst]\n\t" |
7001 | 194 | ASM_FLAG_OUT(, "setc %[carry]") |
7002 | 194 | : [dst] "+r" (dst.val), |
7003 | 194 | [carry] ASM_FLAG_OUT("=@ccc", "=qm") (carry), |
7004 | 194 | [aux] "+r" (aux) |
7005 | 194 | : [src] "rm" (src.val) ); |
7006 | 721 | else |
7007 | 721 | #endif |
7008 | 721 | asm ( "add %[aux],%[aux]\n\t" |
7009 | 721 | "adc %k[src],%k[dst]\n\t" |
7010 | 721 | ASM_FLAG_OUT(, "setc %[carry]") |
7011 | 721 | : [dst] "+r" (dst.val), |
7012 | 721 | [carry] ASM_FLAG_OUT("=@ccc", "=qm") (carry), |
7013 | 721 | [aux] "+r" (aux) |
7014 | 721 | : [src] "rm" (src.val) ); |
7015 | 915 | if ( carry ) |
7016 | 266 | _regs.eflags |= mask; |
7017 | 649 | else |
7018 | 649 | _regs.eflags &= ~mask; |
7019 | 915 | break; |
7020 | 915 | } |
7021 | | |
7022 | 602 | case X86EMUL_OPC_VEX_F2(0x0f38, 0xf6): /* mulx r/m,r,r */ |
7023 | 602 | vcpu_must_have(bmi2); |
7024 | 602 | generate_exception_if(vex.l, X86_EXC_UD); |
7025 | 601 | ea.reg = decode_vex_gpr(vex.reg, &_regs, ctxt); |
7026 | 601 | if ( mode_64bit() && vex.w ) |
7027 | 209 | asm ( "mulq %3" : "=a" (*ea.reg), "=d" (dst.val) |
7028 | 209 | : "0" (src.val), "rm" (_regs.r(dx)) ); |
7029 | 392 | else |
7030 | 392 | asm ( "mull %3" : "=a" (*ea.reg), "=d" (dst.val) |
7031 | 392 | : "0" ((uint32_t)src.val), "rm" (_regs.edx) ); |
7032 | 601 | break; |
7033 | | |
7034 | 1 | case X86EMUL_OPC_66(0x0f38, 0xf8): /* movdir64b r,m512 */ |
7035 | 1 | host_and_vcpu_must_have(movdir64b); |
7036 | 0 | generate_exception_if(ea.type != OP_MEM, X86_EXC_UD); |
7037 | 0 | src.val = truncate_ea(*dst.reg); |
7038 | 0 | generate_exception_if(!is_aligned(x86_seg_es, src.val, 64, ctxt, ops), |
7039 | 0 | X86_EXC_GP, 0); |
7040 | 0 | fail_if(!ops->blk); |
7041 | 0 | state->blk = blk_movdir; |
7042 | 0 | BUILD_BUG_ON(sizeof(*mmvalp) < 64); |
7043 | 0 | if ( (rc = ops->read(ea.mem.seg, ea.mem.off, mmvalp, 64, |
7044 | 0 | ctxt)) != X86EMUL_OKAY || |
7045 | 0 | (rc = ops->blk(x86_seg_es, src.val, mmvalp, 64, &_regs.eflags, |
7046 | 0 | state, ctxt)) != X86EMUL_OKAY ) |
7047 | 0 | goto done; |
7048 | 0 | state->simd_size = simd_none; |
7049 | 0 | break; |
7050 | | |
7051 | 1 | case X86EMUL_OPC_F2(0x0f38, 0xf8): /* enqcmd r,m512 */ |
7052 | 2 | case X86EMUL_OPC_F3(0x0f38, 0xf8): /* enqcmds r,m512 */ |
7053 | 2 | host_and_vcpu_must_have(enqcmd); |
7054 | 0 | generate_exception_if(ea.type != OP_MEM, X86_EXC_UD); |
7055 | 0 | generate_exception_if(vex.pfx != vex_f2 && !mode_ring0(), X86_EXC_GP, 0); |
7056 | 0 | src.val = truncate_ea(*dst.reg); |
7057 | 0 | generate_exception_if(!is_aligned(x86_seg_es, src.val, 64, ctxt, ops), |
7058 | 0 | X86_EXC_GP, 0); |
7059 | 0 | fail_if(!ops->blk); |
7060 | 0 | BUILD_BUG_ON(sizeof(*mmvalp) < 64); |
7061 | 0 | if ( (rc = ops->read(ea.mem.seg, ea.mem.off, mmvalp, 64, |
7062 | 0 | ctxt)) != X86EMUL_OKAY ) |
7063 | 0 | goto done; |
7064 | 0 | if ( vex.pfx == vex_f2 ) /* enqcmd */ |
7065 | 0 | { |
7066 | 0 | generate_exception_if(mmvalp->data32[0], X86_EXC_GP, 0); |
7067 | 0 | fail_if(!ops->read_msr); |
7068 | 0 | if ( (rc = ops->read_msr(MSR_PASID, &msr_val, |
7069 | 0 | ctxt)) != X86EMUL_OKAY ) |
7070 | 0 | goto done; |
7071 | 0 | generate_exception_if(!(msr_val & PASID_VALID), X86_EXC_GP, 0); |
7072 | 0 | mmvalp->data32[0] = MASK_EXTR(msr_val, PASID_PASID_MASK); |
7073 | 0 | } |
7074 | 0 | else |
7075 | 0 | generate_exception_if(mmvalp->data32[0] & 0x7ff00000, X86_EXC_GP, 0); |
7076 | 0 | state->blk = blk_enqcmd; |
7077 | 0 | if ( (rc = ops->blk(x86_seg_es, src.val, mmvalp, 64, &_regs.eflags, |
7078 | 0 | state, ctxt)) != X86EMUL_OKAY ) |
7079 | 0 | goto done; |
7080 | 0 | state->simd_size = simd_none; |
7081 | 0 | break; |
7082 | | |
7083 | 1 | case X86EMUL_OPC(0x0f38, 0xf9): /* movdiri mem,r */ |
7084 | 1 | host_and_vcpu_must_have(movdiri); |
7085 | 0 | generate_exception_if(dst.type != OP_MEM, X86_EXC_UD); |
7086 | 0 | fail_if(!ops->blk); |
7087 | 0 | state->blk = blk_movdir; |
7088 | 0 | if ( (rc = ops->blk(dst.mem.seg, dst.mem.off, &src.val, op_bytes, |
7089 | 0 | &_regs.eflags, state, ctxt)) != X86EMUL_OKAY ) |
7090 | 0 | goto done; |
7091 | 0 | dst.type = OP_NONE; |
7092 | 0 | break; |
7093 | | |
7094 | 0 | #ifndef X86EMUL_NO_SIMD |
7095 | | |
7096 | 134 | case X86EMUL_OPC_VEX_66(0x0f3a, 0x00): /* vpermq $imm8,ymm/m256,ymm */ |
7097 | 474 | case X86EMUL_OPC_VEX_66(0x0f3a, 0x01): /* vpermpd $imm8,ymm/m256,ymm */ |
7098 | 474 | generate_exception_if(!vex.l || !vex.w, X86_EXC_UD); |
7099 | 468 | goto simd_0f_imm8_avx2; |
7100 | | |
7101 | 468 | case X86EMUL_OPC_EVEX_66(0x0f3a, 0x00): /* vpermq $imm8,{y,z}mm/mem,{y,z}mm{k} */ |
7102 | 3 | case X86EMUL_OPC_EVEX_66(0x0f3a, 0x01): /* vpermpd $imm8,{y,z}mm/mem,{y,z}mm{k} */ |
7103 | 3 | generate_exception_if(!evex.lr || !evex.w, X86_EXC_UD); |
7104 | 1 | fault_suppression = false; |
7105 | 1 | goto avx512f_imm8_no_sae; |
7106 | | |
7107 | 35 | case X86EMUL_OPC_VEX_66(0x0f3a, 0x38): /* vinserti128 $imm8,xmm/m128,ymm,ymm */ |
7108 | 351 | case X86EMUL_OPC_VEX_66(0x0f3a, 0x39): /* vextracti128 $imm8,ymm,xmm/m128 */ |
7109 | 417 | case X86EMUL_OPC_VEX_66(0x0f3a, 0x46): /* vperm2i128 $imm8,ymm/m256,ymm,ymm */ |
7110 | 417 | generate_exception_if(!vex.l, X86_EXC_UD); |
7111 | | /* fall through */ |
7112 | 456 | case X86EMUL_OPC_VEX_66(0x0f3a, 0x02): /* vpblendd $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */ |
7113 | 456 | generate_exception_if(vex.w, X86_EXC_UD); |
7114 | 450 | goto simd_0f_imm8_avx2; |
7115 | | |
7116 | 450 | case X86EMUL_OPC_VEX_66(0x0f3a, 0x06): /* vperm2f128 $imm8,ymm/m256,ymm,ymm */ |
7117 | 160 | case X86EMUL_OPC_VEX_66(0x0f3a, 0x18): /* vinsertf128 $imm8,xmm/m128,ymm,ymm */ |
7118 | 330 | case X86EMUL_OPC_VEX_66(0x0f3a, 0x19): /* vextractf128 $imm8,ymm,xmm/m128 */ |
7119 | 330 | generate_exception_if(!vex.l, X86_EXC_UD); |
7120 | | /* fall through */ |
7121 | 396 | case X86EMUL_OPC_VEX_66(0x0f3a, 0x04): /* vpermilps $imm8,{x,y}mm/mem,{x,y}mm */ |
7122 | 434 | case X86EMUL_OPC_VEX_66(0x0f3a, 0x05): /* vpermilpd $imm8,{x,y}mm/mem,{x,y}mm */ |
7123 | 434 | generate_exception_if(vex.w, X86_EXC_UD); |
7124 | 428 | goto simd_0f_imm8_avx; |
7125 | | |
7126 | 428 | case X86EMUL_OPC_EVEX_66(0x0f3a, 0x04): /* vpermilps $imm8,[xyz]mm/mem,[xyz]mm{k} */ |
7127 | 2 | case X86EMUL_OPC_EVEX_66(0x0f3a, 0x05): /* vpermilpd $imm8,[xyz]mm/mem,[xyz]mm{k} */ |
7128 | 2 | generate_exception_if(evex.w != (b & 1), X86_EXC_UD); |
7129 | 1 | fault_suppression = false; |
7130 | 1 | goto avx512f_imm8_no_sae; |
7131 | | |
7132 | 66 | case X86EMUL_OPC_66(0x0f3a, 0x08): /* roundps $imm8,xmm/m128,xmm */ |
7133 | 125 | case X86EMUL_OPC_66(0x0f3a, 0x09): /* roundpd $imm8,xmm/m128,xmm */ |
7134 | 216 | case X86EMUL_OPC_66(0x0f3a, 0x0a): /* roundss $imm8,xmm/m128,xmm */ |
7135 | 457 | case X86EMUL_OPC_66(0x0f3a, 0x0b): /* roundsd $imm8,xmm/m128,xmm */ |
7136 | 491 | case X86EMUL_OPC_66(0x0f3a, 0x0c): /* blendps $imm8,xmm/m128,xmm */ |
7137 | 685 | case X86EMUL_OPC_66(0x0f3a, 0x0d): /* blendpd $imm8,xmm/m128,xmm */ |
7138 | 751 | case X86EMUL_OPC_66(0x0f3a, 0x0e): /* pblendw $imm8,xmm/m128,xmm */ |
7139 | 1.04k | case X86EMUL_OPC_66(0x0f3a, 0x40): /* dpps $imm8,xmm/m128,xmm */ |
7140 | 1.10k | case X86EMUL_OPC_66(0x0f3a, 0x41): /* dppd $imm8,xmm/m128,xmm */ |
7141 | 1.12k | case X86EMUL_OPC_66(0x0f3a, 0x42): /* mpsadbw $imm8,xmm/m128,xmm */ |
7142 | 1.12k | host_and_vcpu_must_have(sse4_1); |
7143 | 1.12k | goto simd_0f3a_common; |
7144 | | |
7145 | 1.12k | case X86EMUL_OPC_EVEX_66(0x0f3a, 0x0a): /* vrndscaless $imm8,xmm/mem,xmm,xmm{k} */ |
7146 | 3 | case X86EMUL_OPC_EVEX_66(0x0f3a, 0x0b): /* vrndscalesd $imm8,xmm/mem,xmm,xmm{k} */ |
7147 | 3 | generate_exception_if(ea.type != OP_REG && evex.brs, X86_EXC_UD); |
7148 | | /* fall through */ |
7149 | 3 | case X86EMUL_OPC_EVEX_66(0x0f3a, 0x08): /* vrndscaleps $imm8,[xyz]mm/mem,[xyz]mm{k} */ |
7150 | 4 | case X86EMUL_OPC_EVEX_66(0x0f3a, 0x09): /* vrndscalepd $imm8,[xyz]mm/mem,[xyz]mm{k} */ |
7151 | 4 | host_and_vcpu_must_have(avx512f); |
7152 | 0 | generate_exception_if(evex.w != (b & 1), X86_EXC_UD); |
7153 | 0 | avx512_vlen_check(b & 2); |
7154 | 0 | goto simd_imm8_zmm; |
7155 | | |
7156 | 3 | case X86EMUL_OPC_EVEX(0x0f3a, 0x0a): /* vrndscalesh $imm8,xmm/mem,xmm,xmm{k} */ |
7157 | 3 | generate_exception_if(ea.type != OP_REG && evex.brs, X86_EXC_UD); |
7158 | | /* fall through */ |
7159 | 3 | case X86EMUL_OPC_EVEX(0x0f3a, 0x08): /* vrndscaleph $imm8,[xyz]mm/mem,[xyz]mm{k} */ |
7160 | 3 | host_and_vcpu_must_have(avx512_fp16); |
7161 | 0 | generate_exception_if(evex.w, X86_EXC_UD); |
7162 | 0 | avx512_vlen_check(b & 2); |
7163 | 0 | goto simd_imm8_zmm; |
7164 | | |
7165 | 0 | #endif /* X86EMUL_NO_SIMD */ |
7166 | | |
7167 | 529 | CASE_SIMD_PACKED_INT(0x0f3a, 0x0f): /* palignr $imm8,{,x}mm/mem,{,x}mm */ |
7168 | 529 | host_and_vcpu_must_have(ssse3); |
7169 | 298 | if ( vex.pfx ) |
7170 | 67 | { |
7171 | 1.79k | simd_0f3a_common: |
7172 | 1.79k | get_fpu(X86EMUL_FPU_xmm); |
7173 | 1.79k | } |
7174 | 231 | else |
7175 | 231 | { |
7176 | 231 | host_and_vcpu_must_have(mmx); |
7177 | 231 | get_fpu(X86EMUL_FPU_mmx); |
7178 | 231 | } |
7179 | 2.01k | opc = init_prefixes(stub); |
7180 | 0 | opc[0] = 0x3a; |
7181 | 2.01k | opc[1] = b; |
7182 | 2.01k | opc[2] = modrm; |
7183 | 2.01k | if ( ea.type == OP_MEM ) |
7184 | 1.27k | { |
7185 | | /* Convert memory operand to (%rAX). */ |
7186 | 1.27k | rex_prefix &= ~REX_B; |
7187 | 1.27k | vex.b = 1; |
7188 | 1.27k | opc[2] &= 0x38; |
7189 | 1.27k | } |
7190 | 2.01k | opc[3] = imm1; |
7191 | 2.01k | insn_bytes = PFX_BYTES + 4; |
7192 | 2.01k | break; |
7193 | | |
7194 | 0 | #ifndef X86EMUL_NO_SIMD |
7195 | | |
7196 | 2 | case X86EMUL_OPC_EVEX_66(0x0f3a, 0x42): /* vdbpsadbw $imm8,[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
7197 | 2 | generate_exception_if(evex.w, X86_EXC_UD); |
7198 | | /* fall through */ |
7199 | 2 | case X86EMUL_OPC_EVEX_66(0x0f3a, 0x0f): /* vpalignr $imm8,[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
7200 | 2 | fault_suppression = false; |
7201 | 2 | goto avx512bw_imm; |
7202 | | |
7203 | 64 | case X86EMUL_OPC_66(0x0f3a, 0x14): /* pextrb $imm8,xmm,r/m */ |
7204 | 130 | case X86EMUL_OPC_66(0x0f3a, 0x15): /* pextrw $imm8,xmm,r/m */ |
7205 | 316 | case X86EMUL_OPC_66(0x0f3a, 0x16): /* pextr{d,q} $imm8,xmm,r/m */ |
7206 | 366 | case X86EMUL_OPC_66(0x0f3a, 0x17): /* extractps $imm8,xmm,r/m */ |
7207 | 366 | host_and_vcpu_must_have(sse4_1); |
7208 | 366 | get_fpu(X86EMUL_FPU_xmm); |
7209 | | |
7210 | 365 | opc = init_prefixes(stub); |
7211 | 0 | opc++[0] = 0x3a; |
7212 | 1.38k | pextr: |
7213 | 1.38k | opc[0] = b; |
7214 | | /* Convert memory/GPR operand to (%rAX). */ |
7215 | 1.38k | rex_prefix &= ~REX_B; |
7216 | 1.38k | evex.b = vex.b = 1; |
7217 | 1.38k | if ( !mode_64bit() ) |
7218 | 929 | evex.w = vex.w = 0; |
7219 | 1.38k | opc[1] = modrm & 0x38; |
7220 | 1.38k | opc[2] = imm1; |
7221 | 1.38k | opc[3] = 0xc3; |
7222 | 1.38k | if ( vex.opcx == vex_none ) |
7223 | 365 | { |
7224 | | /* Cover for extra prefix byte. */ |
7225 | 365 | --opc; |
7226 | 365 | } |
7227 | | |
7228 | 1.38k | if ( evex_encoded() ) |
7229 | 0 | copy_EVEX(opc, evex); |
7230 | 1.38k | else |
7231 | 1.38k | copy_REX_VEX(opc, rex_prefix, vex); |
7232 | 1.38k | invoke_stub("", "", "=m" (dst.val) : "a" (&dst.val)); |
7233 | 1.38k | put_stub(stub); |
7234 | | |
7235 | 1.38k | ASSERT(!state->simd_size); |
7236 | 1.38k | dst.bytes = dst.type == OP_REG || b == 0x17 ? 4 : 1 << (b & 3); |
7237 | 1.38k | if ( b == 0x16 && (rex_prefix & REX_W) ) |
7238 | 67 | dst.bytes = 8; |
7239 | 1.38k | break; |
7240 | | |
7241 | 392 | case X86EMUL_OPC_VEX_66(0x0f3a, 0x14): /* vpextrb $imm8,xmm,r/m */ |
7242 | 825 | case X86EMUL_OPC_VEX_66(0x0f3a, 0x15): /* vpextrw $imm8,xmm,r/m */ |
7243 | 925 | case X86EMUL_OPC_VEX_66(0x0f3a, 0x16): /* vpextr{d,q} $imm8,xmm,r/m */ |
7244 | 1.02k | case X86EMUL_OPC_VEX_66(0x0f3a, 0x17): /* vextractps $imm8,xmm,r/m */ |
7245 | 1.02k | generate_exception_if(vex.l || vex.reg != 0xf, X86_EXC_UD); |
7246 | 1.02k | host_and_vcpu_must_have(avx); |
7247 | 1.02k | get_fpu(X86EMUL_FPU_ymm); |
7248 | | |
7249 | | /* Work around erratum BT41. */ |
7250 | 1.01k | if ( !mode_64bit() ) |
7251 | 650 | vex.w = 0; |
7252 | | |
7253 | 1.01k | opc = init_prefixes(stub); |
7254 | 0 | goto pextr; |
7255 | | |
7256 | 3 | case X86EMUL_OPC_EVEX_66(0x0f, 0xc5): /* vpextrw $imm8,xmm,reg */ |
7257 | 3 | generate_exception_if(ea.type != OP_REG || !evex.R, X86_EXC_UD); |
7258 | | /* Convert to alternative encoding: We want to use a memory operand. */ |
7259 | 1 | evex.opcx = ext_0f3a; |
7260 | 1 | b = 0x15; |
7261 | 1 | modrm <<= 3; |
7262 | 1 | evex.r = evex.b; |
7263 | 1 | evex.R = evex.x; |
7264 | | /* fall through */ |
7265 | 2 | case X86EMUL_OPC_EVEX_66(0x0f3a, 0x14): /* vpextrb $imm8,xmm,r/m */ |
7266 | 5 | case X86EMUL_OPC_EVEX_66(0x0f3a, 0x15): /* vpextrw $imm8,xmm,r/m */ |
7267 | 7 | case X86EMUL_OPC_EVEX_66(0x0f3a, 0x16): /* vpextr{d,q} $imm8,xmm,r/m */ |
7268 | 9 | case X86EMUL_OPC_EVEX_66(0x0f3a, 0x17): /* vextractps $imm8,xmm,r/m */ |
7269 | 9 | generate_exception_if((evex.lr || evex.reg != 0xf || !evex.RX || |
7270 | 9 | evex.opmsk || evex.brs), |
7271 | 9 | X86_EXC_UD); |
7272 | 3 | if ( !(b & 2) ) |
7273 | 3 | host_and_vcpu_must_have(avx512bw); |
7274 | 2 | else if ( !(b & 1) ) |
7275 | 2 | host_and_vcpu_must_have(avx512dq); |
7276 | 1 | else |
7277 | 1 | host_and_vcpu_must_have(avx512f); |
7278 | 0 | get_fpu(X86EMUL_FPU_zmm); |
7279 | 0 | opc = init_evex(stub); |
7280 | 0 | goto pextr; |
7281 | | |
7282 | 1 | case X86EMUL_OPC_EVEX_66(0x0f3a, 0x18): /* vinsertf32x4 $imm8,xmm/m128,{y,z}mm{k} */ |
7283 | | /* vinsertf64x2 $imm8,xmm/m128,{y,z}mm{k} */ |
7284 | 2 | case X86EMUL_OPC_EVEX_66(0x0f3a, 0x19): /* vextractf32x4 $imm8,{y,z}mm,xmm/m128{k} */ |
7285 | | /* vextractf64x2 $imm8,{y,z}mm,xmm/m128{k} */ |
7286 | 3 | case X86EMUL_OPC_EVEX_66(0x0f3a, 0x38): /* vinserti32x4 $imm8,xmm/m128,{y,z}mm{k} */ |
7287 | | /* vinserti64x2 $imm8,xmm/m128,{y,z}mm{k} */ |
7288 | 5 | case X86EMUL_OPC_EVEX_66(0x0f3a, 0x39): /* vextracti32x4 $imm8,{y,z}mm,xmm/m128{k} */ |
7289 | | /* vextracti64x2 $imm8,{y,z}mm,xmm/m128{k} */ |
7290 | 5 | if ( evex.w ) |
7291 | 5 | host_and_vcpu_must_have(avx512dq); |
7292 | 3 | generate_exception_if(evex.brs, X86_EXC_UD); |
7293 | | /* fall through */ |
7294 | 3 | case X86EMUL_OPC_EVEX_66(0x0f3a, 0x23): /* vshuff32x4 $imm8,{y,z}mm/mem,{y,z}mm,{y,z}mm{k} */ |
7295 | | /* vshuff64x2 $imm8,{y,z}mm/mem,{y,z}mm,{y,z}mm{k} */ |
7296 | 4 | case X86EMUL_OPC_EVEX_66(0x0f3a, 0x43): /* vshufi32x4 $imm8,{y,z}mm/mem,{y,z}mm,{y,z}mm{k} */ |
7297 | | /* vshufi64x2 $imm8,{y,z}mm/mem,{y,z}mm,{y,z}mm{k} */ |
7298 | 4 | generate_exception_if(!evex.lr, X86_EXC_UD); |
7299 | 2 | fault_suppression = false; |
7300 | 2 | goto avx512f_imm8_no_sae; |
7301 | | |
7302 | 1 | case X86EMUL_OPC_EVEX_66(0x0f3a, 0x1a): /* vinsertf32x4 $imm8,ymm/m256,zmm{k} */ |
7303 | | /* vinsertf64x2 $imm8,ymm/m256,zmm{k} */ |
7304 | 4 | case X86EMUL_OPC_EVEX_66(0x0f3a, 0x1b): /* vextractf32x8 $imm8,zmm,ymm/m256{k} */ |
7305 | | /* vextractf64x4 $imm8,zmm,ymm/m256{k} */ |
7306 | 5 | case X86EMUL_OPC_EVEX_66(0x0f3a, 0x3a): /* vinserti32x4 $imm8,ymm/m256,zmm{k} */ |
7307 | | /* vinserti64x2 $imm8,ymm/m256,zmm{k} */ |
7308 | 6 | case X86EMUL_OPC_EVEX_66(0x0f3a, 0x3b): /* vextracti32x8 $imm8,zmm,ymm/m256{k} */ |
7309 | | /* vextracti64x4 $imm8,zmm,ymm/m256{k} */ |
7310 | 6 | if ( !evex.w ) |
7311 | 6 | host_and_vcpu_must_have(avx512dq); |
7312 | 4 | generate_exception_if(evex.lr != 2 || evex.brs, X86_EXC_UD); |
7313 | 1 | fault_suppression = false; |
7314 | 1 | goto avx512f_imm8_no_sae; |
7315 | | |
7316 | 639 | case X86EMUL_OPC_VEX_66(0x0f3a, 0x1d): /* vcvtps2ph $imm8,{x,y}mm,xmm/mem */ |
7317 | 645 | case X86EMUL_OPC_EVEX_66(0x0f3a, 0x1d): /* vcvtps2ph $imm8,[xyz]mm,{x,y}mm/mem{k} */ |
7318 | 645 | { |
7319 | 645 | uint32_t mxcsr; |
7320 | | |
7321 | 645 | fail_if(!ops->write); |
7322 | 644 | if ( evex_encoded() ) |
7323 | 6 | { |
7324 | 6 | generate_exception_if((evex.w || evex.reg != 0xf || !evex.RX || |
7325 | 6 | (ea.type != OP_REG && (evex.z || evex.brs))), |
7326 | 6 | X86_EXC_UD); |
7327 | 2 | host_and_vcpu_must_have(avx512f); |
7328 | 0 | avx512_vlen_check(false); |
7329 | 0 | opc = init_evex(stub); |
7330 | 0 | } |
7331 | 638 | else |
7332 | 638 | { |
7333 | 638 | generate_exception_if(vex.w || vex.reg != 0xf, X86_EXC_UD); |
7334 | 623 | host_and_vcpu_must_have(f16c); |
7335 | 623 | opc = init_prefixes(stub); |
7336 | 623 | } |
7337 | | |
7338 | 623 | op_bytes = 8 << evex.lr; |
7339 | | |
7340 | 623 | opc[0] = b; |
7341 | 623 | opc[1] = modrm; |
7342 | 623 | if ( ea.type == OP_MEM ) |
7343 | 298 | { |
7344 | | /* Convert memory operand to (%rAX). */ |
7345 | 298 | vex.b = 1; |
7346 | 298 | evex.b = 1; |
7347 | 298 | opc[1] &= 0x38; |
7348 | 298 | } |
7349 | 623 | opc[2] = imm1; |
7350 | 623 | if ( evex_encoded() ) |
7351 | 0 | { |
7352 | 0 | unsigned int full = 0; |
7353 | |
|
7354 | 0 | insn_bytes = EVEX_PFX_BYTES + 3; |
7355 | 0 | copy_EVEX(opc, evex); |
7356 | |
|
7357 | 0 | if ( ea.type == OP_MEM && evex.opmsk ) |
7358 | 0 | { |
7359 | 0 | full = 0xffff >> (16 - op_bytes / 2); |
7360 | 0 | op_mask &= full; |
7361 | 0 | if ( !op_mask ) |
7362 | 0 | goto complete_insn; |
7363 | | |
7364 | 0 | first_byte = __builtin_ctz(op_mask); |
7365 | 0 | op_mask >>= first_byte; |
7366 | 0 | full >>= first_byte; |
7367 | 0 | first_byte <<= 1; |
7368 | 0 | op_bytes = (32 - __builtin_clz(op_mask)) << 1; |
7369 | | |
7370 | | /* |
7371 | | * We may need to read (parts of) the memory operand for the |
7372 | | * purpose of merging in order to avoid splitting the write |
7373 | | * below into multiple ones. |
7374 | | */ |
7375 | 0 | if ( op_mask != full && |
7376 | 0 | (rc = ops->read(ea.mem.seg, |
7377 | 0 | truncate_ea(ea.mem.off + first_byte), |
7378 | 0 | (void *)mmvalp + first_byte, op_bytes, |
7379 | 0 | ctxt)) != X86EMUL_OKAY ) |
7380 | 0 | goto done; |
7381 | 0 | } |
7382 | 0 | } |
7383 | 623 | else |
7384 | 623 | { |
7385 | 623 | insn_bytes = PFX_BYTES + 3; |
7386 | 623 | copy_VEX(opc, vex); |
7387 | 623 | } |
7388 | 623 | opc[3] = 0xc3; |
7389 | | |
7390 | | /* Latch MXCSR - we may need to restore it below. */ |
7391 | 623 | invoke_stub("stmxcsr %[mxcsr]", "", |
7392 | 623 | "=m" (*mmvalp), [mxcsr] "=m" (mxcsr) : "a" (mmvalp)); |
7393 | | |
7394 | 623 | put_stub(stub); |
7395 | | |
7396 | 623 | if ( ea.type == OP_MEM ) |
7397 | 298 | { |
7398 | 298 | rc = ops->write(ea.mem.seg, truncate_ea(ea.mem.off + first_byte), |
7399 | 298 | (void *)mmvalp + first_byte, op_bytes, ctxt); |
7400 | 298 | if ( rc != X86EMUL_OKAY ) |
7401 | 6 | { |
7402 | 6 | asm volatile ( "ldmxcsr %0" :: "m" (mxcsr) ); |
7403 | 6 | goto done; |
7404 | 6 | } |
7405 | 298 | } |
7406 | | |
7407 | 617 | state->simd_size = simd_none; |
7408 | 617 | break; |
7409 | 623 | } |
7410 | | |
7411 | 2 | case X86EMUL_OPC_EVEX_66(0x0f3a, 0x1e): /* vpcmpu{d,q} $imm8,[xyz]mm/mem,[xyz]mm,k{k} */ |
7412 | 3 | case X86EMUL_OPC_EVEX_66(0x0f3a, 0x1f): /* vpcmp{d,q} $imm8,[xyz]mm/mem,[xyz]mm,k{k} */ |
7413 | 4 | case X86EMUL_OPC_EVEX_66(0x0f3a, 0x3e): /* vpcmpu{b,w} $imm8,[xyz]mm/mem,[xyz]mm,k{k} */ |
7414 | 6 | case X86EMUL_OPC_EVEX_66(0x0f3a, 0x3f): /* vpcmp{b,w} $imm8,[xyz]mm/mem,[xyz]mm,k{k} */ |
7415 | 6 | generate_exception_if(!evex.r || !evex.R || evex.z, X86_EXC_UD); |
7416 | 2 | if ( !(b & 0x20) ) |
7417 | 1 | goto avx512f_imm8_no_sae; |
7418 | 7 | avx512bw_imm: |
7419 | 7 | host_and_vcpu_must_have(avx512bw); |
7420 | 0 | generate_exception_if(evex.brs, X86_EXC_UD); |
7421 | 0 | elem_bytes = 1 << evex.w; |
7422 | 0 | avx512_vlen_check(false); |
7423 | 0 | goto simd_imm8_zmm; |
7424 | | |
7425 | 211 | case X86EMUL_OPC_66(0x0f3a, 0x20): /* pinsrb $imm8,r32/m8,xmm */ |
7426 | 405 | case X86EMUL_OPC_66(0x0f3a, 0x22): /* pinsr{d,q} $imm8,r/m,xmm */ |
7427 | 405 | host_and_vcpu_must_have(sse4_1); |
7428 | 405 | memcpy(mmvalp, &src.val, src.bytes); |
7429 | 405 | ea.type = OP_MEM; |
7430 | 405 | d = SrcMem16; /* Fake for the common SIMD code below. */ |
7431 | 405 | state->simd_size = simd_other; |
7432 | 405 | goto simd_0f3a_common; |
7433 | | |
7434 | 506 | case X86EMUL_OPC_VEX_66(0x0f3a, 0x20): /* vpinsrb $imm8,r32/m8,xmm,xmm */ |
7435 | 688 | case X86EMUL_OPC_VEX_66(0x0f3a, 0x22): /* vpinsr{d,q} $imm8,r/m,xmm,xmm */ |
7436 | 688 | generate_exception_if(vex.l, X86_EXC_UD); |
7437 | 686 | if ( !mode_64bit() ) |
7438 | 317 | vex.w = 0; |
7439 | 686 | memcpy(mmvalp, &src.val, src.bytes); |
7440 | 686 | ea.type = OP_MEM; |
7441 | 686 | d = SrcMem16; /* Fake for the common SIMD code below. */ |
7442 | 686 | state->simd_size = simd_other; |
7443 | 686 | goto simd_0f_int_imm8; |
7444 | | |
7445 | 62 | case X86EMUL_OPC_66(0x0f3a, 0x21): /* insertps $imm8,xmm/m32,xmm */ |
7446 | 62 | host_and_vcpu_must_have(sse4_1); |
7447 | 62 | op_bytes = 4; |
7448 | 62 | goto simd_0f3a_common; |
7449 | | |
7450 | 64 | case X86EMUL_OPC_VEX_66(0x0f3a, 0x21): /* vinsertps $imm8,xmm/m32,xmm,xmm */ |
7451 | 64 | op_bytes = 4; |
7452 | | /* fall through */ |
7453 | 122 | case X86EMUL_OPC_VEX_66(0x0f3a, 0x41): /* vdppd $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */ |
7454 | 122 | generate_exception_if(vex.l, X86_EXC_UD); |
7455 | 121 | goto simd_0f_imm8_avx; |
7456 | | |
7457 | 121 | case X86EMUL_OPC_EVEX_66(0x0f3a, 0x21): /* vinsertps $imm8,xmm/m32,xmm,xmm */ |
7458 | 1 | host_and_vcpu_must_have(avx512f); |
7459 | 0 | generate_exception_if(evex.lr || evex.w || evex.opmsk || evex.brs, |
7460 | 0 | X86_EXC_UD); |
7461 | 0 | op_bytes = 4; |
7462 | 0 | goto simd_imm8_zmm; |
7463 | | |
7464 | 1 | case X86EMUL_OPC_EVEX_66(0x0f3a, 0x50): /* vrangep{s,d} $imm8,[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
7465 | 1 | case X86EMUL_OPC_EVEX_66(0x0f3a, 0x56): /* vreducep{s,d} $imm8,[xyz]mm/mem,[xyz]mm{k} */ |
7466 | 1 | host_and_vcpu_must_have(avx512dq); |
7467 | | /* fall through */ |
7468 | 1 | case X86EMUL_OPC_EVEX_66(0x0f3a, 0x26): /* vgetmantp{s,d} $imm8,[xyz]mm/mem,[xyz]mm{k} */ |
7469 | 2 | case X86EMUL_OPC_EVEX_66(0x0f3a, 0x54): /* vfixupimmp{s,d} $imm8,[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
7470 | 2 | host_and_vcpu_must_have(avx512f); |
7471 | 0 | if ( ea.type != OP_REG || !evex.brs ) |
7472 | 0 | avx512_vlen_check(false); |
7473 | 0 | goto simd_imm8_zmm; |
7474 | | |
7475 | 1 | case X86EMUL_OPC_EVEX(0x0f3a, 0x26): /* vgetmantph $imm8,[xyz]mm/mem,[xyz]mm{k} */ |
7476 | 2 | case X86EMUL_OPC_EVEX(0x0f3a, 0x56): /* vreduceph $imm8,[xyz]mm/mem,[xyz]mm{k} */ |
7477 | 2 | host_and_vcpu_must_have(avx512_fp16); |
7478 | 0 | generate_exception_if(evex.w, X86_EXC_UD); |
7479 | 0 | if ( ea.type != OP_REG || !evex.brs ) |
7480 | 0 | avx512_vlen_check(false); |
7481 | 0 | goto simd_imm8_zmm; |
7482 | | |
7483 | 1 | case X86EMUL_OPC_EVEX_66(0x0f3a, 0x51): /* vranges{s,d} $imm8,xmm/mem,xmm,xmm{k} */ |
7484 | 2 | case X86EMUL_OPC_EVEX_66(0x0f3a, 0x57): /* vreduces{s,d} $imm8,xmm/mem,xmm,xmm{k} */ |
7485 | 2 | host_and_vcpu_must_have(avx512dq); |
7486 | | /* fall through */ |
7487 | 1 | case X86EMUL_OPC_EVEX_66(0x0f3a, 0x27): /* vgetmants{s,d} $imm8,xmm/mem,xmm,xmm{k} */ |
7488 | 2 | case X86EMUL_OPC_EVEX_66(0x0f3a, 0x55): /* vfixupimms{s,d} $imm8,xmm/mem,xmm,xmm{k} */ |
7489 | 2 | host_and_vcpu_must_have(avx512f); |
7490 | 0 | generate_exception_if(ea.type != OP_REG && evex.brs, X86_EXC_UD); |
7491 | 0 | if ( !evex.brs ) |
7492 | 0 | avx512_vlen_check(true); |
7493 | 0 | goto simd_imm8_zmm; |
7494 | | |
7495 | 1 | case X86EMUL_OPC_EVEX(0x0f3a, 0x27): /* vgetmantsh $imm8,xmm/mem,xmm,xmm{k} */ |
7496 | 1 | case X86EMUL_OPC_EVEX(0x0f3a, 0x57): /* vreducesh $imm8,xmm/mem,xmm,xmm{k} */ |
7497 | 1 | host_and_vcpu_must_have(avx512_fp16); |
7498 | 0 | generate_exception_if(evex.w, X86_EXC_UD); |
7499 | 0 | if ( !evex.brs ) |
7500 | 0 | avx512_vlen_check(true); |
7501 | 0 | else |
7502 | 0 | generate_exception_if(ea.type != OP_REG, X86_EXC_UD); |
7503 | 0 | goto simd_imm8_zmm; |
7504 | | |
7505 | 7 | case X86EMUL_OPC_VEX_66(0x0f3a, 0x30): /* kshiftr{b,w} $imm8,k,k */ |
7506 | 15 | case X86EMUL_OPC_VEX_66(0x0f3a, 0x32): /* kshiftl{b,w} $imm8,k,k */ |
7507 | 15 | if ( !vex.w ) |
7508 | 15 | host_and_vcpu_must_have(avx512dq); |
7509 | 9 | opmask_shift_imm: |
7510 | 9 | generate_exception_if(vex.l || !vex.r || vex.reg != 0xf || |
7511 | 9 | ea.type != OP_REG, X86_EXC_UD); |
7512 | 1 | host_and_vcpu_must_have(avx512f); |
7513 | 0 | get_fpu(X86EMUL_FPU_opmask); |
7514 | 0 | op_bytes = 1; /* Any non-zero value will do. */ |
7515 | 0 | goto simd_0f_imm8; |
7516 | | |
7517 | 1 | case X86EMUL_OPC_VEX_66(0x0f3a, 0x31): /* kshiftr{d,q} $imm8,k,k */ |
7518 | 2 | case X86EMUL_OPC_VEX_66(0x0f3a, 0x33): /* kshiftl{d,q} $imm8,k,k */ |
7519 | 2 | host_and_vcpu_must_have(avx512bw); |
7520 | 0 | goto opmask_shift_imm; |
7521 | | |
7522 | 67 | case X86EMUL_OPC_66(0x0f3a, 0x44): /* pclmulqdq $imm8,xmm/m128,xmm */ |
7523 | 130 | case X86EMUL_OPC_VEX_66(0x0f3a, 0x44): /* vpclmulqdq $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */ |
7524 | 130 | host_and_vcpu_must_have(pclmulqdq); |
7525 | 130 | if ( vex.opcx == vex_none ) |
7526 | 67 | goto simd_0f3a_common; |
7527 | 63 | if ( vex.l ) |
7528 | 63 | host_and_vcpu_must_have(vpclmulqdq); |
7529 | 62 | goto simd_0f_imm8_avx; |
7530 | | |
7531 | 62 | case X86EMUL_OPC_EVEX_66(0x0f3a, 0x44): /* vpclmulqdq $imm8,[xyz]mm/mem,[xyz]mm,[xyz]mm */ |
7532 | 1 | host_and_vcpu_must_have(vpclmulqdq); |
7533 | 0 | generate_exception_if(evex.brs || evex.opmsk, X86_EXC_UD); |
7534 | 0 | goto avx512f_imm8_no_sae; |
7535 | | |
7536 | 1 | case X86EMUL_OPC_VEX_66(0x0f3a, 0x48): /* vpermil2ps $imm,{x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */ |
7537 | | /* vpermil2ps $imm,{x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */ |
7538 | 2 | case X86EMUL_OPC_VEX_66(0x0f3a, 0x49): /* vpermil2pd $imm,{x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */ |
7539 | | /* vpermil2pd $imm,{x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */ |
7540 | 2 | host_and_vcpu_must_have(xop); |
7541 | 0 | goto simd_0f_imm8_ymm; |
7542 | | |
7543 | 224 | case X86EMUL_OPC_VEX_66(0x0f3a, 0x4a): /* vblendvps {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */ |
7544 | 295 | case X86EMUL_OPC_VEX_66(0x0f3a, 0x4b): /* vblendvpd {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */ |
7545 | 295 | generate_exception_if(vex.w, X86_EXC_UD); |
7546 | 288 | goto simd_0f_imm8_avx; |
7547 | | |
7548 | 288 | case X86EMUL_OPC_VEX_66(0x0f3a, 0x4c): /* vpblendvb {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */ |
7549 | 205 | generate_exception_if(vex.w, X86_EXC_UD); |
7550 | 199 | goto simd_0f_int_imm8; |
7551 | | |
7552 | 199 | case X86EMUL_OPC_VEX_66(0x0f3a, 0x5c): /* vfmaddsubps {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */ |
7553 | | /* vfmaddsubps {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */ |
7554 | 2 | case X86EMUL_OPC_VEX_66(0x0f3a, 0x5d): /* vfmaddsubpd {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */ |
7555 | | /* vfmaddsubpd {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */ |
7556 | 3 | case X86EMUL_OPC_VEX_66(0x0f3a, 0x5e): /* vfmsubaddps {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */ |
7557 | | /* vfmsubaddps {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */ |
7558 | 4 | case X86EMUL_OPC_VEX_66(0x0f3a, 0x5f): /* vfmsubaddpd {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */ |
7559 | | /* vfmsubaddpd {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */ |
7560 | 5 | case X86EMUL_OPC_VEX_66(0x0f3a, 0x68): /* vfmaddps {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */ |
7561 | | /* vfmaddps {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */ |
7562 | 6 | case X86EMUL_OPC_VEX_66(0x0f3a, 0x69): /* vfmaddpd {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */ |
7563 | | /* vfmaddpd {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */ |
7564 | 7 | case X86EMUL_OPC_VEX_66(0x0f3a, 0x6a): /* vfmaddss xmm,xmm/m32,xmm,xmm */ |
7565 | | /* vfmaddss xmm/m32,xmm,xmm,xmm */ |
7566 | 8 | case X86EMUL_OPC_VEX_66(0x0f3a, 0x6b): /* vfmaddsd xmm,xmm/m64,xmm,xmm */ |
7567 | | /* vfmaddsd xmm/m64,xmm,xmm,xmm */ |
7568 | 9 | case X86EMUL_OPC_VEX_66(0x0f3a, 0x6c): /* vfmsubps {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */ |
7569 | | /* vfmsubps {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */ |
7570 | 10 | case X86EMUL_OPC_VEX_66(0x0f3a, 0x6d): /* vfmsubpd {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */ |
7571 | | /* vfmsubpd {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */ |
7572 | 11 | case X86EMUL_OPC_VEX_66(0x0f3a, 0x6e): /* vfmsubss xmm,xmm/m32,xmm,xmm */ |
7573 | | /* vfmsubss xmm/m32,xmm,xmm,xmm */ |
7574 | 12 | case X86EMUL_OPC_VEX_66(0x0f3a, 0x6f): /* vfmsubsd xmm,xmm/m64,xmm,xmm */ |
7575 | | /* vfmsubsd xmm/m64,xmm,xmm,xmm */ |
7576 | 13 | case X86EMUL_OPC_VEX_66(0x0f3a, 0x78): /* vfnmaddps {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */ |
7577 | | /* vfnmaddps {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */ |
7578 | 14 | case X86EMUL_OPC_VEX_66(0x0f3a, 0x79): /* vfnmaddpd {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */ |
7579 | | /* vfnmaddpd {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */ |
7580 | 15 | case X86EMUL_OPC_VEX_66(0x0f3a, 0x7a): /* vfnmaddss xmm,xmm/m32,xmm,xmm */ |
7581 | | /* vfnmaddss xmm/m32,xmm,xmm,xmm */ |
7582 | 16 | case X86EMUL_OPC_VEX_66(0x0f3a, 0x7b): /* vfnmaddsd xmm,xmm/m64,xmm,xmm */ |
7583 | | /* vfnmaddsd xmm/m64,xmm,xmm,xmm */ |
7584 | 17 | case X86EMUL_OPC_VEX_66(0x0f3a, 0x7c): /* vfnmsubps {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */ |
7585 | | /* vfnmsubps {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */ |
7586 | 18 | case X86EMUL_OPC_VEX_66(0x0f3a, 0x7d): /* vfnmsubpd {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */ |
7587 | | /* vfnmsubpd {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */ |
7588 | 19 | case X86EMUL_OPC_VEX_66(0x0f3a, 0x7e): /* vfnmsubss xmm,xmm/m32,xmm,xmm */ |
7589 | | /* vfnmsubss xmm/m32,xmm,xmm,xmm */ |
7590 | 20 | case X86EMUL_OPC_VEX_66(0x0f3a, 0x7f): /* vfnmsubsd xmm,xmm/m64,xmm,xmm */ |
7591 | | /* vfnmsubsd xmm/m64,xmm,xmm,xmm */ |
7592 | 20 | host_and_vcpu_must_have(fma4); |
7593 | 0 | goto simd_0f_imm8_ymm; |
7594 | | |
7595 | 118 | case X86EMUL_OPC_66(0x0f3a, 0x60): /* pcmpestrm $imm8,xmm/m128,xmm */ |
7596 | 321 | case X86EMUL_OPC_VEX_66(0x0f3a, 0x60): /* vpcmpestrm $imm8,xmm/m128,xmm */ |
7597 | 517 | case X86EMUL_OPC_66(0x0f3a, 0x61): /* pcmpestri $imm8,xmm/m128,xmm */ |
7598 | 773 | case X86EMUL_OPC_VEX_66(0x0f3a, 0x61): /* vpcmpestri $imm8,xmm/m128,xmm */ |
7599 | 839 | case X86EMUL_OPC_66(0x0f3a, 0x62): /* pcmpistrm $imm8,xmm/m128,xmm */ |
7600 | 907 | case X86EMUL_OPC_VEX_66(0x0f3a, 0x62): /* vpcmpistrm $imm8,xmm/m128,xmm */ |
7601 | 975 | case X86EMUL_OPC_66(0x0f3a, 0x63): /* pcmpistri $imm8,xmm/m128,xmm */ |
7602 | 1.04k | case X86EMUL_OPC_VEX_66(0x0f3a, 0x63): /* vpcmpistri $imm8,xmm/m128,xmm */ |
7603 | 1.04k | if ( vex.opcx == vex_none ) |
7604 | 448 | { |
7605 | 448 | host_and_vcpu_must_have(sse4_2); |
7606 | 448 | get_fpu(X86EMUL_FPU_xmm); |
7607 | 448 | } |
7608 | 593 | else |
7609 | 593 | { |
7610 | 593 | generate_exception_if(vex.l || vex.reg != 0xf, X86_EXC_UD); |
7611 | 585 | host_and_vcpu_must_have(avx); |
7612 | 585 | get_fpu(X86EMUL_FPU_ymm); |
7613 | 585 | } |
7614 | | |
7615 | 1.03k | opc = init_prefixes(stub); |
7616 | 1.03k | if ( vex.opcx == vex_none ) |
7617 | 447 | opc++[0] = 0x3a; |
7618 | 1.03k | opc[0] = b; |
7619 | 1.03k | opc[1] = modrm; |
7620 | 1.03k | if ( ea.type == OP_MEM ) |
7621 | 508 | { |
7622 | | /* Convert memory operand to (%rDI). */ |
7623 | 508 | rex_prefix &= ~REX_B; |
7624 | 508 | vex.b = 1; |
7625 | 508 | opc[1] &= 0x3f; |
7626 | 508 | opc[1] |= 0x07; |
7627 | | |
7628 | 508 | rc = ops->read(ea.mem.seg, ea.mem.off, mmvalp, 16, ctxt); |
7629 | 508 | if ( rc != X86EMUL_OKAY ) |
7630 | 26 | goto done; |
7631 | 508 | } |
7632 | 1.00k | opc[2] = imm1; |
7633 | 1.00k | insn_bytes = PFX_BYTES + 3; |
7634 | 1.00k | opc[3] = 0xc3; |
7635 | 1.00k | if ( vex.opcx == vex_none ) |
7636 | 429 | { |
7637 | | /* Cover for extra prefix byte. */ |
7638 | 429 | --opc; |
7639 | 429 | ++insn_bytes; |
7640 | 429 | } |
7641 | | |
7642 | 1.00k | copy_REX_VEX(opc, rex_prefix, vex); |
7643 | 1.00k | #ifdef __x86_64__ |
7644 | 1.00k | if ( rex_prefix & REX_W ) |
7645 | 238 | emulate_stub("=c" (dst.val), "m" (*mmvalp), "D" (mmvalp), |
7646 | 1.00k | "a" (_regs.rax), "d" (_regs.rdx)); |
7647 | 767 | else |
7648 | 767 | #endif |
7649 | 767 | emulate_stub("=c" (dst.val), "m" (*mmvalp), "D" (mmvalp), |
7650 | 1.00k | "a" (_regs.eax), "d" (_regs.edx)); |
7651 | | |
7652 | 1.00k | state->simd_size = simd_none; |
7653 | 1.00k | if ( b & 1 ) |
7654 | 573 | _regs.r(cx) = (uint32_t)dst.val; |
7655 | 1.00k | dst.type = OP_NONE; |
7656 | 1.00k | break; |
7657 | | |
7658 | 1 | case X86EMUL_OPC_EVEX_66(0x0f3a, 0x66): /* vfpclassp{s,d} $imm8,[xyz]mm/mem,k{k} */ |
7659 | 2 | case X86EMUL_OPC_EVEX_66(0x0f3a, 0x67): /* vfpclasss{s,d} $imm8,xmm/mem,k{k} */ |
7660 | 2 | host_and_vcpu_must_have(avx512dq); |
7661 | 0 | generate_exception_if(!evex.r || !evex.R || evex.z, X86_EXC_UD); |
7662 | 0 | if ( !(b & 1) ) |
7663 | 0 | goto avx512f_imm8_no_sae; |
7664 | 0 | generate_exception_if(evex.brs, X86_EXC_UD); |
7665 | 0 | avx512_vlen_check(true); |
7666 | 0 | goto simd_imm8_zmm; |
7667 | | |
7668 | 1 | case X86EMUL_OPC_EVEX(0x0f3a, 0x66): /* vfpclassph $imm8,[xyz]mm/mem,k{k} */ |
7669 | 2 | case X86EMUL_OPC_EVEX(0x0f3a, 0x67): /* vfpclasssh $imm8,xmm/mem,k{k} */ |
7670 | 2 | host_and_vcpu_must_have(avx512_fp16); |
7671 | 0 | generate_exception_if(evex.w || !evex.r || !evex.R || evex.z, X86_EXC_UD); |
7672 | 0 | if ( !(b & 1) ) |
7673 | 0 | goto avx512f_imm8_no_sae; |
7674 | 0 | generate_exception_if(evex.brs, X86_EXC_UD); |
7675 | 0 | avx512_vlen_check(true); |
7676 | 0 | goto simd_imm8_zmm; |
7677 | | |
7678 | 1 | case X86EMUL_OPC_EVEX_66(0x0f3a, 0x70): /* vpshldw $imm8,[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
7679 | 2 | case X86EMUL_OPC_EVEX_66(0x0f3a, 0x72): /* vpshrdw $imm8,[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
7680 | 2 | generate_exception_if(!evex.w, X86_EXC_UD); |
7681 | 1 | elem_bytes = 2; |
7682 | | /* fall through */ |
7683 | 2 | case X86EMUL_OPC_EVEX_66(0x0f3a, 0x71): /* vpshld{d,q} $imm8,[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
7684 | 3 | case X86EMUL_OPC_EVEX_66(0x0f3a, 0x73): /* vpshrd{d,q} $imm8,[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
7685 | 3 | host_and_vcpu_must_have(avx512_vbmi2); |
7686 | 0 | goto avx512f_imm8_no_sae; |
7687 | | |
7688 | 3 | case X86EMUL_OPC_EVEX_F3(0x0f3a, 0xc2): /* vcmpsh $imm8,xmm/mem,xmm,k{k} */ |
7689 | 3 | generate_exception_if(ea.type != OP_REG && evex.brs, X86_EXC_UD); |
7690 | | /* fall through */ |
7691 | 3 | case X86EMUL_OPC_EVEX(0x0f3a, 0xc2): /* vcmpph $imm8,[xyz]mm/mem,[xyz]mm,k{k} */ |
7692 | 3 | host_and_vcpu_must_have(avx512_fp16); |
7693 | 0 | generate_exception_if(evex.w || !evex.r || !evex.R || evex.z, X86_EXC_UD); |
7694 | 0 | if ( ea.type != OP_REG || !evex.brs ) |
7695 | 0 | avx512_vlen_check(evex.pfx & VEX_PREFIX_SCALAR_MASK); |
7696 | 0 | goto simd_imm8_zmm; |
7697 | | |
7698 | 1 | case X86EMUL_OPC(0x0f3a, 0xcc): /* sha1rnds4 $imm8,xmm/m128,xmm */ |
7699 | 1 | host_and_vcpu_must_have(sha); |
7700 | 1 | op_bytes = 16; |
7701 | 1 | goto simd_0f3a_common; |
7702 | | |
7703 | 1 | case X86EMUL_OPC_66(0x0f3a, 0xce): /* gf2p8affineqb $imm8,xmm/m128,xmm */ |
7704 | 2 | case X86EMUL_OPC_66(0x0f3a, 0xcf): /* gf2p8affineinvqb $imm8,xmm/m128,xmm */ |
7705 | 2 | host_and_vcpu_must_have(gfni); |
7706 | 0 | goto simd_0f3a_common; |
7707 | | |
7708 | 1 | case X86EMUL_OPC_VEX_66(0x0f3a, 0xce): /* vgf2p8affineqb $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */ |
7709 | 2 | case X86EMUL_OPC_VEX_66(0x0f3a, 0xcf): /* vgf2p8affineinvqb $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */ |
7710 | 2 | host_and_vcpu_must_have(gfni); |
7711 | 0 | generate_exception_if(!vex.w, X86_EXC_UD); |
7712 | 0 | goto simd_0f_imm8_avx; |
7713 | | |
7714 | 1 | case X86EMUL_OPC_EVEX_66(0x0f3a, 0xce): /* vgf2p8affineqb $imm8,[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
7715 | 2 | case X86EMUL_OPC_EVEX_66(0x0f3a, 0xcf): /* vgf2p8affineinvqb $imm8,[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
7716 | 2 | host_and_vcpu_must_have(gfni); |
7717 | 0 | generate_exception_if(!evex.w, X86_EXC_UD); |
7718 | 0 | fault_suppression = false; |
7719 | 0 | goto avx512f_imm8_no_sae; |
7720 | | |
7721 | 1 | case X86EMUL_OPC_VEX_66(0x0f3a, 0xde): /* vsm3rnds2 $imm8,xmm/mem,xmm,xmm */ |
7722 | 1 | host_and_vcpu_must_have(sm3); |
7723 | 0 | generate_exception_if(vex.w || vex.l, X86_EXC_UD); |
7724 | 0 | op_bytes = 16; |
7725 | 0 | goto simd_0f_imm8_ymm; |
7726 | | |
7727 | 66 | case X86EMUL_OPC_66(0x0f3a, 0xdf): /* aeskeygenassist $imm8,xmm/m128,xmm */ |
7728 | 112 | case X86EMUL_OPC_VEX_66(0x0f3a, 0xdf): /* vaeskeygenassist $imm8,xmm/m128,xmm */ |
7729 | 112 | host_and_vcpu_must_have(aesni); |
7730 | 112 | if ( vex.opcx == vex_none ) |
7731 | 66 | goto simd_0f3a_common; |
7732 | 46 | generate_exception_if(vex.l, X86_EXC_UD); |
7733 | 46 | goto simd_0f_imm8_avx; |
7734 | | |
7735 | 46 | #endif /* X86EMUL_NO_SIMD */ |
7736 | | |
7737 | 301 | case X86EMUL_OPC_VEX_F2(0x0f3a, 0xf0): /* rorx imm,r/m,r */ |
7738 | 301 | vcpu_must_have(bmi2); |
7739 | 301 | generate_exception_if(vex.l || vex.reg != 0xf, X86_EXC_UD); |
7740 | 296 | if ( mode_64bit() && vex.w ) |
7741 | 66 | asm ( "rorq %b1,%0" : "=g" (dst.val) : "c" (imm1), "0" (src.val) ); |
7742 | 230 | else |
7743 | 230 | asm ( "rorl %b1,%k0" : "=g" (dst.val) : "c" (imm1), "0" (src.val) ); |
7744 | 296 | break; |
7745 | | |
7746 | 0 | #ifndef X86EMUL_NO_SIMD |
7747 | | |
7748 | 1 | case X86EMUL_OPC_EVEX_F3(5, 0x10): /* vmovsh m16,xmm{k} */ |
7749 | | /* vmovsh xmm,xmm,xmm{k} */ |
7750 | 2 | case X86EMUL_OPC_EVEX_F3(5, 0x11): /* vmovsh xmm,m16{k} */ |
7751 | | /* vmovsh xmm,xmm,xmm{k} */ |
7752 | 2 | generate_exception_if(evex.brs, X86_EXC_UD); |
7753 | 1 | if ( ea.type == OP_MEM ) |
7754 | 1 | d |= TwoOp; |
7755 | 0 | else |
7756 | 0 | { |
7757 | 1 | case X86EMUL_OPC_EVEX_F3(5, 0x51): /* vsqrtsh xmm/m16,xmm,xmm{k} */ |
7758 | 1 | d &= ~TwoOp; |
7759 | 1 | } |
7760 | | /* fall through */ |
7761 | 3 | case X86EMUL_OPC_EVEX(5, 0x51): /* vsqrtph [xyz]mm/mem,[xyz]mm{k} */ |
7762 | 9 | CASE_SIMD_SINGLE_FP(_EVEX, 5, 0x58): /* vadd{p,s}h [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
7763 | 13 | CASE_SIMD_SINGLE_FP(_EVEX, 5, 0x59): /* vmul{p,s}h [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
7764 | 17 | CASE_SIMD_SINGLE_FP(_EVEX, 5, 0x5c): /* vsub{p,s}h [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
7765 | 21 | CASE_SIMD_SINGLE_FP(_EVEX, 5, 0x5d): /* vmin{p,s}h [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
7766 | 25 | CASE_SIMD_SINGLE_FP(_EVEX, 5, 0x5e): /* vdiv{p,s}h [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
7767 | 29 | CASE_SIMD_SINGLE_FP(_EVEX, 5, 0x5f): /* vmax{p,s}h [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
7768 | 29 | host_and_vcpu_must_have(avx512_fp16); |
7769 | 0 | generate_exception_if(evex.w, X86_EXC_UD); |
7770 | 0 | goto avx512f_all_fp; |
7771 | | |
7772 | 14 | CASE_SIMD_ALL_FP(_EVEX, 5, 0x5a): /* vcvtp{h,d}2p{h,d} [xyz]mm/mem,[xyz]mm{k} */ |
7773 | | /* vcvts{h,d}2s{h,d} xmm/mem,xmm,xmm{k} */ |
7774 | 14 | host_and_vcpu_must_have(avx512_fp16); |
7775 | 0 | if ( vex.pfx & VEX_PREFIX_SCALAR_MASK ) |
7776 | 0 | d &= ~TwoOp; |
7777 | 0 | op_bytes = 2 << (((evex.pfx & VEX_PREFIX_SCALAR_MASK) ? 0 : 1 + evex.lr) + |
7778 | 0 | 2 * evex.w); |
7779 | 0 | goto avx512f_all_fp; |
7780 | | |
7781 | 1 | case X86EMUL_OPC_EVEX (5, 0x5b): /* vcvtdq2ph [xyz]mm/mem,[xy]mm{k} */ |
7782 | | /* vcvtqq2ph [xyz]mm/mem,xmm{k} */ |
7783 | 2 | case X86EMUL_OPC_EVEX_F2(5, 0x7a): /* vcvtudq2ph [xyz]mm/mem,[xy]mm{k} */ |
7784 | | /* vcvtuqq2ph [xyz]mm/mem,xmm{k} */ |
7785 | 2 | host_and_vcpu_must_have(avx512_fp16); |
7786 | 0 | if ( ea.type != OP_REG || !evex.brs ) |
7787 | 0 | avx512_vlen_check(false); |
7788 | 0 | op_bytes = 16 << evex.lr; |
7789 | 0 | goto simd_zmm; |
7790 | | |
7791 | 1 | case X86EMUL_OPC_EVEX_66(5, 0x5b): /* vcvtph2dq [xy]mm/mem,[xyz]mm{k} */ |
7792 | 2 | case X86EMUL_OPC_EVEX_F3(5, 0x5b): /* vcvttph2dq [xy]mm/mem,[xyz]mm{k} */ |
7793 | 3 | case X86EMUL_OPC_EVEX (5, 0x78): /* vcvttph2udq [xy]mm/mem,[xyz]mm{k} */ |
7794 | 5 | case X86EMUL_OPC_EVEX (5, 0x79): /* vcvtph2udq [xy]mm/mem,[xyz]mm{k} */ |
7795 | 5 | host_and_vcpu_must_have(avx512_fp16); |
7796 | 0 | generate_exception_if(evex.w, X86_EXC_UD); |
7797 | 0 | if ( ea.type != OP_REG || !evex.brs ) |
7798 | 0 | avx512_vlen_check(false); |
7799 | 0 | op_bytes = 8 << evex.lr; |
7800 | 0 | goto simd_zmm; |
7801 | | |
7802 | 1 | case X86EMUL_OPC_EVEX_66(5, 0x78): /* vcvttph2uqq xmm/mem,[xyz]mm{k} */ |
7803 | 2 | case X86EMUL_OPC_EVEX_66(5, 0x79): /* vcvtph2uqq xmm/mem,[xyz]mm{k} */ |
7804 | 3 | case X86EMUL_OPC_EVEX_66(5, 0x7a): /* vcvttph2qq xmm/mem,[xyz]mm{k} */ |
7805 | 5 | case X86EMUL_OPC_EVEX_66(5, 0x7b): /* vcvtph2qq xmm/mem,[xyz]mm{k} */ |
7806 | 5 | host_and_vcpu_must_have(avx512_fp16); |
7807 | 0 | generate_exception_if(evex.w, X86_EXC_UD); |
7808 | 0 | if ( ea.type != OP_REG || !evex.brs ) |
7809 | 0 | avx512_vlen_check(false); |
7810 | 0 | op_bytes = 4 << (evex.w + evex.lr); |
7811 | 0 | goto simd_zmm; |
7812 | | |
7813 | 1 | case X86EMUL_OPC_EVEX (5, 0x7c): /* vcvttph2uw [xyz]mm/mem,[xyz]mm{k} */ |
7814 | 2 | case X86EMUL_OPC_EVEX_66(5, 0x7c): /* vcvttph2w [xyz]mm/mem,[xyz]mm{k} */ |
7815 | 3 | case X86EMUL_OPC_EVEX (5, 0x7d): /* vcvtph2uw [xyz]mm/mem,[xyz]mm{k} */ |
7816 | 4 | case X86EMUL_OPC_EVEX_66(5, 0x7d): /* vcvtph2w [xyz]mm/mem,[xyz]mm{k} */ |
7817 | 5 | case X86EMUL_OPC_EVEX_F3(5, 0x7d): /* vcvtw2ph [xyz]mm/mem,[xyz]mm{k} */ |
7818 | 6 | case X86EMUL_OPC_EVEX_F2(5, 0x7d): /* vcvtuw2ph [xyz]mm/mem,[xyz]mm{k} */ |
7819 | 7 | case X86EMUL_OPC_EVEX_66(6, 0x13): /* vcvtph2psx [xy]mm/mem,[xyz]mm{k} */ |
7820 | 7 | op_bytes = 8 << ((ext == ext_map5) + evex.lr); |
7821 | | /* fall through */ |
7822 | 8 | case X86EMUL_OPC_EVEX_66(5, 0x1d): /* vcvtps2phx [xyz]mm/mem,[xy]mm{k} */ |
7823 | 9 | case X86EMUL_OPC_EVEX_66(6, 0x2c): /* vscalefph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
7824 | 10 | case X86EMUL_OPC_EVEX_66(6, 0x42): /* vgetexpph [xyz]mm/mem,[xyz]mm{k} */ |
7825 | 11 | case X86EMUL_OPC_EVEX_66(6, 0x96): /* vfmaddsub132ph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
7826 | 12 | case X86EMUL_OPC_EVEX_66(6, 0x97): /* vfmsubadd132ph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
7827 | 13 | case X86EMUL_OPC_EVEX_66(6, 0x98): /* vfmadd132ph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
7828 | 14 | case X86EMUL_OPC_EVEX_66(6, 0x9a): /* vfmsub132ph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
7829 | 15 | case X86EMUL_OPC_EVEX_66(6, 0x9c): /* vfnmadd132ph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
7830 | 16 | case X86EMUL_OPC_EVEX_66(6, 0x9e): /* vfnmsub132ph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
7831 | 17 | case X86EMUL_OPC_EVEX_66(6, 0xa6): /* vfmaddsub213ph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
7832 | 18 | case X86EMUL_OPC_EVEX_66(6, 0xa7): /* vfmsubadd213ph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
7833 | 19 | case X86EMUL_OPC_EVEX_66(6, 0xa8): /* vfmadd213ph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
7834 | 20 | case X86EMUL_OPC_EVEX_66(6, 0xaa): /* vfmsub213ph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
7835 | 21 | case X86EMUL_OPC_EVEX_66(6, 0xac): /* vfnmadd213ph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
7836 | 22 | case X86EMUL_OPC_EVEX_66(6, 0xae): /* vfnmsub213ph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
7837 | 23 | case X86EMUL_OPC_EVEX_66(6, 0xb6): /* vfmaddsub231ph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
7838 | 24 | case X86EMUL_OPC_EVEX_66(6, 0xb7): /* vfmsubadd231ph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
7839 | 25 | case X86EMUL_OPC_EVEX_66(6, 0xb8): /* vfmadd231ph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
7840 | 26 | case X86EMUL_OPC_EVEX_66(6, 0xba): /* vfmsub231ph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
7841 | 27 | case X86EMUL_OPC_EVEX_66(6, 0xbc): /* vfnmadd231ph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
7842 | 28 | case X86EMUL_OPC_EVEX_66(6, 0xbe): /* vfnmsub231ph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
7843 | 28 | host_and_vcpu_must_have(avx512_fp16); |
7844 | 0 | generate_exception_if(evex.w, X86_EXC_UD); |
7845 | 0 | if ( ea.type != OP_REG || !evex.brs ) |
7846 | 0 | avx512_vlen_check(false); |
7847 | 0 | goto simd_zmm; |
7848 | | |
7849 | 1 | case X86EMUL_OPC_EVEX(5, 0x1d): /* vcvtss2sh xmm/mem,xmm,xmm{k} */ |
7850 | 2 | case X86EMUL_OPC_EVEX(6, 0x13): /* vcvtsh2ss xmm/mem,xmm,xmm{k} */ |
7851 | 3 | case X86EMUL_OPC_EVEX_66(6, 0x2d): /* vscalefsh xmm/m16,xmm,xmm{k} */ |
7852 | 4 | case X86EMUL_OPC_EVEX_66(6, 0x43): /* vgetexpsh xmm/m16,xmm,xmm{k} */ |
7853 | 5 | case X86EMUL_OPC_EVEX_66(6, 0x99): /* vfmadd132sh xmm/m16,xmm,xmm{k} */ |
7854 | 6 | case X86EMUL_OPC_EVEX_66(6, 0x9b): /* vfmsub132sh xmm/m16,xmm,xmm{k} */ |
7855 | 7 | case X86EMUL_OPC_EVEX_66(6, 0x9d): /* vfnmadd132sh xmm/m16,xmm,xmm{k} */ |
7856 | 8 | case X86EMUL_OPC_EVEX_66(6, 0x9f): /* vfnmsub132sh xmm/m16,xmm,xmm{k} */ |
7857 | 9 | case X86EMUL_OPC_EVEX_66(6, 0xa9): /* vfmadd213sh xmm/m16,xmm,xmm{k} */ |
7858 | 10 | case X86EMUL_OPC_EVEX_66(6, 0xab): /* vfmsub213sh xmm/m16,xmm,xmm{k} */ |
7859 | 11 | case X86EMUL_OPC_EVEX_66(6, 0xad): /* vfnmadd213sh xmm/m16,xmm,xmm{k} */ |
7860 | 12 | case X86EMUL_OPC_EVEX_66(6, 0xaf): /* vfnmsub213sh xmm/m16,xmm,xmm{k} */ |
7861 | 13 | case X86EMUL_OPC_EVEX_66(6, 0xb9): /* vfmadd231sh xmm/m16,xmm,xmm{k} */ |
7862 | 14 | case X86EMUL_OPC_EVEX_66(6, 0xbb): /* vfmsub231sh xmm/m16,xmm,xmm{k} */ |
7863 | 15 | case X86EMUL_OPC_EVEX_66(6, 0xbd): /* vfnmadd231sh xmm/m16,xmm,xmm{k} */ |
7864 | 16 | case X86EMUL_OPC_EVEX_66(6, 0xbf): /* vfnmsub231sh xmm/m16,xmm,xmm{k} */ |
7865 | 16 | host_and_vcpu_must_have(avx512_fp16); |
7866 | 0 | generate_exception_if(evex.w || (ea.type != OP_REG && evex.brs), |
7867 | 0 | X86_EXC_UD); |
7868 | 0 | if ( !evex.brs ) |
7869 | 0 | avx512_vlen_check(true); |
7870 | 0 | goto simd_zmm; |
7871 | | |
7872 | 1 | case X86EMUL_OPC_EVEX_66(6, 0x4c): /* vrcpph [xyz]mm/mem,[xyz]mm{k} */ |
7873 | 2 | case X86EMUL_OPC_EVEX_66(6, 0x4e): /* vrsqrtph [xyz]mm/mem,[xyz]mm{k} */ |
7874 | 2 | host_and_vcpu_must_have(avx512_fp16); |
7875 | 0 | generate_exception_if(evex.w, X86_EXC_UD); |
7876 | 0 | goto avx512f_no_sae; |
7877 | | |
7878 | 1 | case X86EMUL_OPC_EVEX_66(6, 0x4d): /* vrcpsh xmm/m16,xmm,xmm{k} */ |
7879 | 2 | case X86EMUL_OPC_EVEX_66(6, 0x4f): /* vrsqrtsh xmm/m16,xmm,xmm{k} */ |
7880 | 2 | host_and_vcpu_must_have(avx512_fp16); |
7881 | 0 | generate_exception_if(evex.w || evex.brs, X86_EXC_UD); |
7882 | 0 | avx512_vlen_check(true); |
7883 | 0 | goto simd_zmm; |
7884 | | |
7885 | 1 | case X86EMUL_OPC_EVEX_F3(6, 0x56): /* vfmaddcph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
7886 | 2 | case X86EMUL_OPC_EVEX_F2(6, 0x56): /* vfcmaddcph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
7887 | 3 | case X86EMUL_OPC_EVEX_F3(6, 0xd6): /* vfmulcph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
7888 | 4 | case X86EMUL_OPC_EVEX_F2(6, 0xd6): /* vfcmulcph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ |
7889 | 4 | op_bytes = 16 << evex.lr; |
7890 | | /* fall through */ |
7891 | 5 | case X86EMUL_OPC_EVEX_F3(6, 0x57): /* vfmaddcsh xmm/m16,xmm,xmm{k} */ |
7892 | 6 | case X86EMUL_OPC_EVEX_F2(6, 0x57): /* vfcmaddcsh xmm/m16,xmm,xmm{k} */ |
7893 | 7 | case X86EMUL_OPC_EVEX_F3(6, 0xd7): /* vfmulcsh xmm/m16,xmm,xmm{k} */ |
7894 | 8 | case X86EMUL_OPC_EVEX_F2(6, 0xd7): /* vfcmulcsh xmm/m16,xmm,xmm{k} */ |
7895 | 8 | { |
7896 | 8 | unsigned int src1 = ~evex.reg; |
7897 | | |
7898 | 8 | host_and_vcpu_must_have(avx512_fp16); |
7899 | 0 | generate_exception_if(evex.w || ((b & 1) && ea.type != OP_REG && evex.brs), |
7900 | 0 | X86_EXC_UD); |
7901 | 0 | if ( mode_64bit() ) |
7902 | 0 | src1 = (src1 & 0xf) | (!evex.RX << 4); |
7903 | 0 | else |
7904 | 0 | src1 &= 7; |
7905 | 0 | generate_exception_if(modrm_reg == src1 || |
7906 | 0 | (ea.type != OP_MEM && modrm_reg == modrm_rm), |
7907 | 0 | X86_EXC_UD); |
7908 | 0 | if ( ea.type != OP_REG || !evex.brs ) |
7909 | 0 | avx512_vlen_check(b & 1); |
7910 | 0 | goto simd_zmm; |
7911 | 0 | } |
7912 | | |
7913 | 0 | case X86EMUL_OPC_XOP(08, 0x85): /* vpmacssww xmm,xmm/m128,xmm,xmm */ |
7914 | 1 | case X86EMUL_OPC_XOP(08, 0x86): /* vpmacsswd xmm,xmm/m128,xmm,xmm */ |
7915 | 3 | case X86EMUL_OPC_XOP(08, 0x87): /* vpmacssdql xmm,xmm/m128,xmm,xmm */ |
7916 | 3 | case X86EMUL_OPC_XOP(08, 0x8e): /* vpmacssdd xmm,xmm/m128,xmm,xmm */ |
7917 | 4 | case X86EMUL_OPC_XOP(08, 0x8f): /* vpmacssdqh xmm,xmm/m128,xmm,xmm */ |
7918 | 5 | case X86EMUL_OPC_XOP(08, 0x95): /* vpmacsww xmm,xmm/m128,xmm,xmm */ |
7919 | 6 | case X86EMUL_OPC_XOP(08, 0x96): /* vpmacswd xmm,xmm/m128,xmm,xmm */ |
7920 | 7 | case X86EMUL_OPC_XOP(08, 0x97): /* vpmacsdql xmm,xmm/m128,xmm,xmm */ |
7921 | 8 | case X86EMUL_OPC_XOP(08, 0x9e): /* vpmacsdd xmm,xmm/m128,xmm,xmm */ |
7922 | 9 | case X86EMUL_OPC_XOP(08, 0x9f): /* vpmacsdqh xmm,xmm/m128,xmm,xmm */ |
7923 | 10 | case X86EMUL_OPC_XOP(08, 0xa6): /* vpmadcsswd xmm,xmm/m128,xmm,xmm */ |
7924 | 11 | case X86EMUL_OPC_XOP(08, 0xb6): /* vpmadcswd xmm,xmm/m128,xmm,xmm */ |
7925 | 12 | case X86EMUL_OPC_XOP(08, 0xc0): /* vprotb $imm,xmm/m128,xmm */ |
7926 | 13 | case X86EMUL_OPC_XOP(08, 0xc1): /* vprotw $imm,xmm/m128,xmm */ |
7927 | 14 | case X86EMUL_OPC_XOP(08, 0xc2): /* vprotd $imm,xmm/m128,xmm */ |
7928 | 15 | case X86EMUL_OPC_XOP(08, 0xc3): /* vprotq $imm,xmm/m128,xmm */ |
7929 | 17 | case X86EMUL_OPC_XOP(08, 0xcc): /* vpcomb $imm,xmm/m128,xmm,xmm */ |
7930 | 18 | case X86EMUL_OPC_XOP(08, 0xcd): /* vpcomw $imm,xmm/m128,xmm,xmm */ |
7931 | 19 | case X86EMUL_OPC_XOP(08, 0xce): /* vpcomd $imm,xmm/m128,xmm,xmm */ |
7932 | 21 | case X86EMUL_OPC_XOP(08, 0xcf): /* vpcomq $imm,xmm/m128,xmm,xmm */ |
7933 | 22 | case X86EMUL_OPC_XOP(08, 0xec): /* vpcomub $imm,xmm/m128,xmm,xmm */ |
7934 | 23 | case X86EMUL_OPC_XOP(08, 0xed): /* vpcomuw $imm,xmm/m128,xmm,xmm */ |
7935 | 24 | case X86EMUL_OPC_XOP(08, 0xee): /* vpcomud $imm,xmm/m128,xmm,xmm */ |
7936 | 25 | case X86EMUL_OPC_XOP(08, 0xef): /* vpcomuq $imm,xmm/m128,xmm,xmm */ |
7937 | 25 | generate_exception_if(vex.w, X86_EXC_UD); |
7938 | | /* fall through */ |
7939 | 20 | case X86EMUL_OPC_XOP(08, 0xa3): /* vpperm xmm/m128,xmm,xmm,xmm */ |
7940 | | /* vpperm xmm,xmm/m128,xmm,xmm */ |
7941 | 20 | generate_exception_if(vex.l, X86_EXC_UD); |
7942 | | /* fall through */ |
7943 | 11 | case X86EMUL_OPC_XOP(08, 0xa2): /* vpcmov {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */ |
7944 | | /* vpcmov {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */ |
7945 | 11 | host_and_vcpu_must_have(xop); |
7946 | 0 | goto simd_0f_imm8_ymm; |
7947 | | |
7948 | 0 | #endif /* X86EMUL_NO_SIMD */ |
7949 | | |
7950 | 2 | case X86EMUL_OPC_XOP(09, 0x01): /* XOP Grp1 */ |
7951 | 2 | switch ( modrm_reg & 7 ) |
7952 | 2 | { |
7953 | 0 | case 1: /* blcfill r/m,r */ |
7954 | 1 | case 2: /* blsfill r/m,r */ |
7955 | 1 | case 3: /* blcs r/m,r */ |
7956 | 1 | case 4: /* tzmsk r/m,r */ |
7957 | 1 | case 5: /* blcic r/m,r */ |
7958 | 1 | case 6: /* blsic r/m,r */ |
7959 | 1 | case 7: /* t1mskc r/m,r */ |
7960 | 1 | host_and_vcpu_must_have(tbm); |
7961 | 0 | break; |
7962 | 1 | default: |
7963 | 1 | goto unrecognized_insn; |
7964 | 2 | } |
7965 | | |
7966 | 0 | xop_09_rm_rv: |
7967 | 0 | { |
7968 | 0 | uint8_t *buf = get_stub(stub); |
7969 | 0 | typeof(vex) *pxop = container_of(buf + 1, typeof(vex), raw[0]); |
7970 | |
|
7971 | 0 | generate_exception_if(vex.l, X86_EXC_UD); |
7972 | | |
7973 | 0 | buf[0] = 0x8f; |
7974 | 0 | *pxop = vex; |
7975 | 0 | pxop->b = 1; |
7976 | 0 | pxop->r = 1; |
7977 | 0 | pxop->reg = 0xf; /* rAX */ |
7978 | 0 | buf[3] = b; |
7979 | 0 | buf[4] = (modrm & 0x38) | 0x01; /* r/m=(%rCX) */ |
7980 | 0 | buf[5] = 0xc3; |
7981 | |
|
7982 | 0 | dst.reg = decode_vex_gpr(vex.reg, &_regs, ctxt); |
7983 | 0 | emulate_stub([dst] "=&a" (dst.val), "c" (&src.val)); |
7984 | |
|
7985 | 0 | put_stub(stub); |
7986 | 0 | break; |
7987 | 0 | } |
7988 | | |
7989 | 3 | case X86EMUL_OPC_XOP(09, 0x02): /* XOP Grp2 */ |
7990 | 3 | switch ( modrm_reg & 7 ) |
7991 | 3 | { |
7992 | 1 | case 1: /* blcmsk r/m,r */ |
7993 | 2 | case 6: /* blci r/m,r */ |
7994 | 2 | host_and_vcpu_must_have(tbm); |
7995 | 0 | goto xop_09_rm_rv; |
7996 | 3 | } |
7997 | 1 | goto unrecognized_insn; |
7998 | | |
7999 | 2 | case X86EMUL_OPC_XOP(09, 0x12): /* XOP Grp3 */ |
8000 | 2 | switch ( modrm_reg & 7 ) |
8001 | 2 | { |
8002 | 1 | case 0: /* llwpcb r */ |
8003 | 1 | case 1: /* slwpcb r */ |
8004 | | /* LWP is unsupported, so produce #UD unconditionally. */ |
8005 | 1 | generate_exception(X86_EXC_UD); |
8006 | 2 | } |
8007 | 1 | goto unrecognized_insn; |
8008 | | |
8009 | 1 | #ifndef X86EMUL_NO_SIMD |
8010 | | |
8011 | 1 | case X86EMUL_OPC_XOP(09, 0x82): /* vfrczss xmm/m128,xmm */ |
8012 | 2 | case X86EMUL_OPC_XOP(09, 0x83): /* vfrczsd xmm/m128,xmm */ |
8013 | 2 | generate_exception_if(vex.l, X86_EXC_UD); |
8014 | | /* fall through */ |
8015 | 2 | case X86EMUL_OPC_XOP(09, 0x80): /* vfrczps {x,y}mm/mem,{x,y}mm */ |
8016 | 3 | case X86EMUL_OPC_XOP(09, 0x81): /* vfrczpd {x,y}mm/mem,{x,y}mm */ |
8017 | 3 | host_and_vcpu_must_have(xop); |
8018 | 0 | generate_exception_if(vex.w, X86_EXC_UD); |
8019 | 0 | goto simd_0f_ymm; |
8020 | | |
8021 | 1 | case X86EMUL_OPC_XOP(09, 0xc1): /* vphaddbw xmm/m128,xmm */ |
8022 | 2 | case X86EMUL_OPC_XOP(09, 0xc2): /* vphaddbd xmm/m128,xmm */ |
8023 | 3 | case X86EMUL_OPC_XOP(09, 0xc3): /* vphaddbq xmm/m128,xmm */ |
8024 | 4 | case X86EMUL_OPC_XOP(09, 0xc6): /* vphaddwd xmm/m128,xmm */ |
8025 | 5 | case X86EMUL_OPC_XOP(09, 0xc7): /* vphaddwq xmm/m128,xmm */ |
8026 | 6 | case X86EMUL_OPC_XOP(09, 0xcb): /* vphadddq xmm/m128,xmm */ |
8027 | 7 | case X86EMUL_OPC_XOP(09, 0xd1): /* vphaddubw xmm/m128,xmm */ |
8028 | 8 | case X86EMUL_OPC_XOP(09, 0xd2): /* vphaddubd xmm/m128,xmm */ |
8029 | 9 | case X86EMUL_OPC_XOP(09, 0xd3): /* vphaddubq xmm/m128,xmm */ |
8030 | 10 | case X86EMUL_OPC_XOP(09, 0xd6): /* vphadduwd xmm/m128,xmm */ |
8031 | 11 | case X86EMUL_OPC_XOP(09, 0xd7): /* vphadduwq xmm/m128,xmm */ |
8032 | 12 | case X86EMUL_OPC_XOP(09, 0xdb): /* vphaddudq xmm/m128,xmm */ |
8033 | 13 | case X86EMUL_OPC_XOP(09, 0xe2): /* vphsubwd xmm/m128,xmm */ |
8034 | 14 | case X86EMUL_OPC_XOP(09, 0xe3): /* vphsubdq xmm/m128,xmm */ |
8035 | 15 | case X86EMUL_OPC_XOP(09, 0xe1): /* vphsubbw xmm/m128,xmm */ |
8036 | 15 | generate_exception_if(vex.w, X86_EXC_UD); |
8037 | | /* fall through */ |
8038 | 13 | case X86EMUL_OPC_XOP(09, 0x90): /* vprotb xmm/m128,xmm,xmm */ |
8039 | | /* vprotb xmm,xmm/m128,xmm */ |
8040 | 14 | case X86EMUL_OPC_XOP(09, 0x91): /* vprotw xmm/m128,xmm,xmm */ |
8041 | | /* vprotw xmm,xmm/m128,xmm */ |
8042 | 15 | case X86EMUL_OPC_XOP(09, 0x92): /* vprotd xmm/m128,xmm,xmm */ |
8043 | | /* vprotd xmm,xmm/m128,xmm */ |
8044 | 16 | case X86EMUL_OPC_XOP(09, 0x93): /* vprotq xmm/m128,xmm,xmm */ |
8045 | | /* vprotq xmm,xmm/m128,xmm */ |
8046 | 17 | case X86EMUL_OPC_XOP(09, 0x94): /* vpshlb xmm/m128,xmm,xmm */ |
8047 | | /* vpshlb xmm,xmm/m128,xmm */ |
8048 | 18 | case X86EMUL_OPC_XOP(09, 0x95): /* vpshlw xmm/m128,xmm,xmm */ |
8049 | | /* vpshlw xmm,xmm/m128,xmm */ |
8050 | 19 | case X86EMUL_OPC_XOP(09, 0x96): /* vpshld xmm/m128,xmm,xmm */ |
8051 | | /* vpshld xmm,xmm/m128,xmm */ |
8052 | 20 | case X86EMUL_OPC_XOP(09, 0x97): /* vpshlq xmm/m128,xmm,xmm */ |
8053 | | /* vpshlq xmm,xmm/m128,xmm */ |
8054 | 21 | case X86EMUL_OPC_XOP(09, 0x98): /* vpshab xmm/m128,xmm,xmm */ |
8055 | | /* vpshab xmm,xmm/m128,xmm */ |
8056 | 22 | case X86EMUL_OPC_XOP(09, 0x99): /* vpshaw xmm/m128,xmm,xmm */ |
8057 | | /* vpshaw xmm,xmm/m128,xmm */ |
8058 | 23 | case X86EMUL_OPC_XOP(09, 0x9a): /* vpshad xmm/m128,xmm,xmm */ |
8059 | | /* vpshad xmm,xmm/m128,xmm */ |
8060 | 24 | case X86EMUL_OPC_XOP(09, 0x9b): /* vpshaq xmm/m128,xmm,xmm */ |
8061 | | /* vpshaq xmm,xmm/m128,xmm */ |
8062 | 24 | generate_exception_if(vex.l, X86_EXC_UD); |
8063 | 14 | host_and_vcpu_must_have(xop); |
8064 | 0 | goto simd_0f_ymm; |
8065 | | |
8066 | 0 | #endif /* X86EMUL_NO_SIMD */ |
8067 | | |
8068 | 1 | case X86EMUL_OPC_XOP(0a, 0x10): /* bextr imm,r/m,r */ |
8069 | 1 | { |
8070 | 1 | uint8_t *buf = get_stub(stub); |
8071 | 1 | typeof(vex) *pxop = container_of(buf + 1, typeof(vex), raw[0]); |
8072 | | |
8073 | 1 | host_and_vcpu_must_have(tbm); |
8074 | 0 | generate_exception_if(vex.l || vex.reg != 0xf, X86_EXC_UD); |
8075 | | |
8076 | 0 | if ( ea.type == OP_REG ) |
8077 | 0 | src.val = *ea.reg; |
8078 | 0 | else if ( (rc = read_ulong(ea.mem.seg, ea.mem.off, &src.val, op_bytes, |
8079 | 0 | ctxt, ops)) != X86EMUL_OKAY ) |
8080 | 0 | goto done; |
8081 | | |
8082 | 0 | buf[0] = 0x8f; |
8083 | 0 | *pxop = vex; |
8084 | 0 | pxop->b = 1; |
8085 | 0 | pxop->r = 1; |
8086 | 0 | buf[3] = b; |
8087 | 0 | buf[4] = 0x09; /* reg=rCX r/m=(%rCX) */ |
8088 | 0 | *(uint32_t *)(buf + 5) = imm1; |
8089 | 0 | buf[9] = 0xc3; |
8090 | |
|
8091 | 0 | emulate_stub([dst] "=&c" (dst.val), "[dst]" (&src.val)); |
8092 | |
|
8093 | 0 | put_stub(stub); |
8094 | 0 | break; |
8095 | 0 | } |
8096 | | |
8097 | 2 | case X86EMUL_OPC_XOP(0a, 0x12): /* XOP Grp4 */ |
8098 | 2 | switch ( modrm_reg & 7 ) |
8099 | 2 | { |
8100 | 0 | case 0: /* lwpins $imm32,r/m,r */ |
8101 | 1 | case 1: /* lwpval $imm32,r/m,r */ |
8102 | | /* LWP is unsupported, so produce #UD unconditionally. */ |
8103 | 1 | generate_exception(X86_EXC_UD); |
8104 | 2 | } |
8105 | 1 | goto unrecognized_insn; |
8106 | | |
8107 | 863 | default: |
8108 | 863 | unimplemented_insn: __maybe_unused; |
8109 | 863 | rc = X86EMUL_UNIMPLEMENTED; |
8110 | 863 | goto done; |
8111 | 16 | unrecognized_insn: |
8112 | 16 | rc = X86EMUL_UNRECOGNIZED; |
8113 | 16 | goto done; |
8114 | | |
8115 | 102k | dispatch_from_helper: |
8116 | 102k | if ( rc == X86EMUL_OKAY ) |
8117 | 100k | break; |
8118 | | |
8119 | 1.28k | switch ( rc ) |
8120 | 1.28k | { |
8121 | 66 | case X86EMUL_rdtsc: |
8122 | 66 | goto rdtsc; |
8123 | | |
8124 | | #ifdef __XEN__ |
8125 | | case X86EMUL_stub_failure: |
8126 | | goto emulation_stub_failure; |
8127 | | #endif |
8128 | 1.28k | } |
8129 | | |
8130 | | /* Internally used state change indicators may not make it here. */ |
8131 | 1.22k | if ( rc < 0 ) |
8132 | 0 | { |
8133 | 0 | ASSERT_UNREACHABLE(); |
8134 | 0 | rc = X86EMUL_UNHANDLEABLE; |
8135 | 0 | } |
8136 | 1.22k | goto done; |
8137 | 602k | } |
8138 | | |
8139 | 588k | if ( state->rmw != rmw_NONE ) |
8140 | 0 | { |
8141 | 0 | ea.val = src.val; |
8142 | 0 | op_bytes = dst.bytes; |
8143 | 0 | state->stub_exn = &stub_exn; |
8144 | 0 | rc = ops->rmw(dst.mem.seg, dst.mem.off, dst.bytes, &_regs.eflags, |
8145 | 0 | state, ctxt); |
8146 | | #ifdef __XEN__ |
8147 | | if ( rc == X86EMUL_stub_failure ) |
8148 | | goto emulation_stub_failure; |
8149 | | #endif |
8150 | 0 | if ( rc != X86EMUL_OKAY ) |
8151 | 0 | goto done; |
8152 | | |
8153 | | /* Some operations require a register to be written. */ |
8154 | 0 | switch ( state->rmw ) |
8155 | 0 | { |
8156 | 0 | case rmw_cmpccxadd: |
8157 | 0 | case rmw_xchg: |
8158 | 0 | case rmw_xadd: |
8159 | 0 | switch ( dst.bytes ) |
8160 | 0 | { |
8161 | 0 | case 1: *(uint8_t *)src.reg = (uint8_t)ea.val; break; |
8162 | 0 | case 2: *(uint16_t *)src.reg = (uint16_t)ea.val; break; |
8163 | 0 | case 4: *src.reg = (uint32_t)ea.val; break; /* 64b reg: zero-extend */ |
8164 | 0 | case 8: *src.reg = ea.val; break; |
8165 | 0 | } |
8166 | 0 | break; |
8167 | | |
8168 | 0 | default: |
8169 | 0 | break; |
8170 | 0 | } |
8171 | | |
8172 | 0 | dst.type = OP_NONE; |
8173 | 0 | } |
8174 | 588k | else if ( state->simd_size != simd_none ) |
8175 | 76.7k | { |
8176 | 76.7k | generate_exception_if((vex.opcx && (d & TwoOp) && |
8177 | 76.7k | (vex.reg != 0xf || (evex_encoded() && !evex.RX))), |
8178 | 76.7k | X86_EXC_UD); |
8179 | | |
8180 | 76.7k | EXPECT(op_bytes); |
8181 | 76.7k | EXPECT(opc); |
8182 | | |
8183 | 76.7k | if ( evex_encoded() ) |
8184 | 0 | { |
8185 | 0 | opc[insn_bytes - EVEX_PFX_BYTES] = 0xc3; |
8186 | 0 | copy_EVEX(opc, evex); |
8187 | 0 | } |
8188 | 76.7k | else |
8189 | 76.7k | { |
8190 | 76.7k | opc[insn_bytes - PFX_BYTES] = 0xc3; |
8191 | 76.7k | copy_REX_VEX(opc, rex_prefix, vex); |
8192 | 76.7k | } |
8193 | | |
8194 | 76.7k | if ( ea.type == OP_MEM ) |
8195 | 46.6k | { |
8196 | 46.6k | uint32_t mxcsr = 0; |
8197 | 46.6k | uint64_t full = 0; |
8198 | | |
8199 | 46.6k | if ( op_bytes < 16 || |
8200 | 46.6k | (vex.opcx |
8201 | 24.1k | ? /* vmov{{a,nt}p{s,d},{,nt}dqa,ntdq} are exceptions. */ |
8202 | 12.7k | ext == ext_0f |
8203 | 12.7k | ? ((b | 1) != 0x29 && b != 0x2b && |
8204 | 7.00k | ((b | 0x10) != 0x7f || vex.pfx != vex_66) && |
8205 | 7.00k | b != 0xe7) |
8206 | 12.7k | : (ext != ext_0f38 || b != 0x2a) |
8207 | 24.1k | : /* movup{s,d}, {,mask}movdqu, and lddqu are exceptions. */ |
8208 | 24.1k | ext == ext_0f && |
8209 | 11.4k | ((b | 1) == 0x11 || |
8210 | 9.41k | ((b | 0x10) == 0x7f && vex.pfx == vex_f3) || |
8211 | 9.41k | b == 0xf7 || b == 0xf0)) ) |
8212 | 35.2k | mxcsr = MXCSR_MM; |
8213 | 11.4k | else if ( vcpu_has_misalignsse() ) |
8214 | 11.4k | asm ( "stmxcsr %0" : "=m" (mxcsr) ); |
8215 | 46.6k | generate_exception_if(!(mxcsr & MXCSR_MM) && |
8216 | 46.6k | !is_aligned(ea.mem.seg, ea.mem.off, op_bytes, |
8217 | 46.6k | ctxt, ops), |
8218 | 46.6k | X86_EXC_GP, 0); |
8219 | | |
8220 | 46.3k | EXPECT(elem_bytes > 0); |
8221 | 46.3k | if ( evex.brs ) |
8222 | 0 | { |
8223 | 0 | ASSERT((d & DstMask) != DstMem); |
8224 | 0 | op_bytes = elem_bytes; |
8225 | 0 | } |
8226 | 46.3k | if ( evex.opmsk ) |
8227 | 610 | { |
8228 | 610 | ASSERT(!(op_bytes % elem_bytes)); |
8229 | 610 | full = ~0ULL >> (64 - op_bytes / elem_bytes); |
8230 | 610 | op_mask &= full; |
8231 | 610 | } |
8232 | 46.3k | if ( fault_suppression ) |
8233 | 610 | { |
8234 | 610 | if ( !op_mask ) |
8235 | 318 | goto simd_no_mem; |
8236 | 292 | if ( !evex.brs ) |
8237 | 292 | { |
8238 | 292 | first_byte = __builtin_ctzll(op_mask); |
8239 | 292 | op_mask >>= first_byte; |
8240 | 292 | full >>= first_byte; |
8241 | 292 | first_byte *= elem_bytes; |
8242 | 292 | op_bytes = (64 - __builtin_clzll(op_mask)) * elem_bytes; |
8243 | 292 | } |
8244 | 292 | } |
8245 | | /* |
8246 | | * Independent of fault suppression we may need to read (parts of) |
8247 | | * the memory operand for the purpose of merging without splitting |
8248 | | * the write below into multiple ones. Note that the EVEX.Z check |
8249 | | * here isn't strictly needed, due to there not currently being |
8250 | | * any instructions allowing zeroing-merging on memory writes (and |
8251 | | * we raise #UD during DstMem processing far above in this case), |
8252 | | * yet conceptually the read is then unnecessary. |
8253 | | */ |
8254 | 46.0k | if ( evex.opmsk && !evex.z && (d & DstMask) == DstMem && |
8255 | 46.0k | op_mask != full ) |
8256 | 14 | d = (d & ~SrcMask) | SrcMem; |
8257 | | |
8258 | 46.0k | switch ( d & SrcMask ) |
8259 | 46.0k | { |
8260 | 37.3k | case SrcMem: |
8261 | 37.3k | rc = ops->read(ea.mem.seg, truncate_ea(ea.mem.off + first_byte), |
8262 | 37.3k | (void *)mmvalp + first_byte, op_bytes, |
8263 | 37.3k | ctxt); |
8264 | 37.3k | if ( rc != X86EMUL_OKAY ) |
8265 | 2.58k | goto done; |
8266 | | /* fall through */ |
8267 | 36.3k | case SrcMem16: |
8268 | 36.3k | dst.type = OP_NONE; |
8269 | 36.3k | break; |
8270 | 7.09k | default: |
8271 | 7.09k | EXPECT((d & DstMask) == DstMem); |
8272 | 7.09k | break; |
8273 | 46.0k | } |
8274 | 43.4k | if ( (d & DstMask) == DstMem ) |
8275 | 7.56k | { |
8276 | 7.56k | fail_if(!ops->write); /* Check before running the stub. */ |
8277 | 7.55k | if ( (d & SrcMask) == SrcMem ) |
8278 | 475 | d |= Mov; /* Force memory write to occur below. */ |
8279 | | |
8280 | 7.55k | switch ( ctxt->opcode ) |
8281 | 7.55k | { |
8282 | 10 | case X86EMUL_OPC_VEX_66(0x0f38, 0x2e): /* vmaskmovps */ |
8283 | 149 | case X86EMUL_OPC_VEX_66(0x0f38, 0x2f): /* vmaskmovpd */ |
8284 | 198 | case X86EMUL_OPC_VEX_66(0x0f38, 0x8e): /* vpmaskmov{d,q} */ |
8285 | | /* These have merge semantics; force write to occur. */ |
8286 | 198 | d |= Mov; |
8287 | 198 | break; |
8288 | 7.36k | default: |
8289 | 7.36k | ASSERT(d & Mov); |
8290 | 7.36k | break; |
8291 | 7.55k | } |
8292 | | |
8293 | 7.55k | dst.type = OP_MEM; |
8294 | 7.55k | dst.bytes = op_bytes; |
8295 | 7.55k | dst.mem = ea.mem; |
8296 | 7.55k | } |
8297 | 43.4k | } |
8298 | 30.0k | else |
8299 | 30.0k | { |
8300 | 30.4k | simd_no_mem: |
8301 | 30.4k | dst.type = OP_NONE; |
8302 | 30.4k | } |
8303 | | |
8304 | | /* {,v}maskmov{q,dqu}, as an exception, uses rDI. */ |
8305 | 73.8k | if ( likely((ctxt->opcode & ~(X86EMUL_OPC_PFX_MASK | |
8306 | 73.8k | X86EMUL_OPC_ENCODING_MASK)) != |
8307 | 73.8k | X86EMUL_OPC(0x0f, 0xf7)) ) |
8308 | 73.3k | invoke_stub("", "", "+m" (*mmvalp) : "a" (mmvalp)); |
8309 | 463 | else |
8310 | 463 | invoke_stub("", "", "+m" (*mmvalp) : "D" (mmvalp)); |
8311 | | |
8312 | 73.8k | put_stub(stub); |
8313 | 73.8k | } |
8314 | | |
8315 | 585k | switch ( dst.type ) |
8316 | 585k | { |
8317 | 199k | case OP_REG: |
8318 | | /* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */ |
8319 | 199k | switch ( dst.bytes ) |
8320 | 199k | { |
8321 | 28.1k | case 1: *(uint8_t *)dst.reg = (uint8_t)dst.val; break; |
8322 | 115k | case 2: *(uint16_t *)dst.reg = (uint16_t)dst.val; break; |
8323 | 43.1k | case 4: *dst.reg = (uint32_t)dst.val; break; /* 64b: zero-ext */ |
8324 | 9.64k | case 8: *dst.reg = dst.val; break; |
8325 | 199k | } |
8326 | 199k | break; |
8327 | 199k | case OP_MEM: |
8328 | 119k | if ( !(d & Mov) && (dst.orig_val == dst.val) && |
8329 | 119k | !ctxt->force_writeback ) |
8330 | 20.8k | /* nothing to do */; |
8331 | 98.5k | else if ( lock_prefix ) |
8332 | 732 | { |
8333 | 732 | fail_if(!ops->cmpxchg); |
8334 | 731 | rc = ops->cmpxchg( |
8335 | 731 | dst.mem.seg, dst.mem.off, &dst.orig_val, |
8336 | 731 | &dst.val, dst.bytes, true, ctxt); |
8337 | 731 | if ( rc == X86EMUL_CMPXCHG_FAILED ) |
8338 | 0 | rc = X86EMUL_RETRY; |
8339 | 731 | } |
8340 | 97.8k | else |
8341 | 97.8k | { |
8342 | 97.8k | fail_if(!ops->write); |
8343 | 97.6k | rc = ops->write(dst.mem.seg, truncate_ea(dst.mem.off + first_byte), |
8344 | 97.6k | !state->simd_size ? &dst.val |
8345 | 97.6k | : (void *)mmvalp + first_byte, |
8346 | 97.6k | dst.bytes, ctxt); |
8347 | 97.6k | if ( sfence ) |
8348 | 2.15k | asm volatile ( "sfence" ::: "memory" ); |
8349 | 97.6k | } |
8350 | 119k | if ( rc != 0 ) |
8351 | 1.15k | goto done; |
8352 | 118k | break; |
8353 | 266k | default: |
8354 | 266k | break; |
8355 | 585k | } |
8356 | | |
8357 | 590k | complete_insn: /* Commit shadow register state. */ |
8358 | 590k | put_fpu(fpu_type, false, state, ctxt, ops); |
8359 | 590k | fpu_type = X86EMUL_FPU_none; |
8360 | | |
8361 | | /* Zero the upper 32 bits of %rip if not in 64-bit mode. */ |
8362 | 590k | if ( !mode_64bit() ) |
8363 | 480k | _regs.r(ip) = (uint32_t)_regs.r(ip); |
8364 | | |
8365 | | /* Should a singlestep #DB be raised? */ |
8366 | 590k | if ( rc == X86EMUL_OKAY && singlestep && !ctxt->retire.mov_ss ) |
8367 | 111k | { |
8368 | 111k | ctxt->retire.singlestep = true; |
8369 | 111k | ctxt->retire.sti = false; |
8370 | 111k | } |
8371 | | |
8372 | 590k | if ( rc != X86EMUL_DONE ) |
8373 | 590k | *ctxt->regs = _regs; |
8374 | 0 | else |
8375 | 0 | { |
8376 | 0 | ctxt->regs->r(ip) = _regs.r(ip); |
8377 | 0 | rc = X86EMUL_OKAY; |
8378 | 0 | } |
8379 | | |
8380 | 590k | ctxt->regs->eflags &= ~X86_EFLAGS_RF; |
8381 | | |
8382 | 603k | done: |
8383 | 603k | put_fpu(fpu_type, insn_bytes > 0 && dst.type == OP_MEM, state, ctxt, ops); |
8384 | 603k | put_stub(stub); |
8385 | 603k | return rc; |
8386 | 0 | #undef state |
8387 | | |
8388 | | #ifdef __XEN__ |
8389 | | emulation_stub_failure: |
8390 | | if ( stub_exn.info.fields.trapnr == X86_EXC_MF ) |
8391 | | generate_exception(X86_EXC_MF); |
8392 | | if ( stub_exn.info.fields.trapnr == X86_EXC_XM ) |
8393 | | { |
8394 | | if ( !ops->read_cr || ops->read_cr(4, &cr4, ctxt) != X86EMUL_OKAY ) |
8395 | | cr4 = X86_CR4_OSXMMEXCPT; |
8396 | | generate_exception(cr4 & X86_CR4_OSXMMEXCPT ? X86_EXC_XM : X86_EXC_UD); |
8397 | | } |
8398 | | gprintk(XENLOG_WARNING, |
8399 | | "exception %u (ec=%04x) in emulation stub (line %u)\n", |
8400 | | stub_exn.info.fields.trapnr, stub_exn.info.fields.ec, |
8401 | | stub_exn.line); |
8402 | | gprintk(XENLOG_INFO, " stub: %"__stringify(MAX_INST_LEN)"ph\n", |
8403 | | stub.func); |
8404 | | if ( stub_exn.info.fields.trapnr == X86_EXC_UD ) |
8405 | | generate_exception(X86_EXC_UD); |
8406 | | domain_crash(current->domain); |
8407 | | #endif |
8408 | | |
8409 | 0 | unhandleable: |
8410 | 0 | rc = X86EMUL_UNHANDLEABLE; |
8411 | 0 | goto done; |
8412 | 590k | } |
8413 | | |
8414 | | #undef op_bytes |
8415 | | #undef ad_bytes |
8416 | | #undef ext |
8417 | | #undef modrm |
8418 | | #undef modrm_mod |
8419 | | #undef modrm_reg |
8420 | | #undef modrm_rm |
8421 | | #undef rex_prefix |
8422 | | #undef lock_prefix |
8423 | | #undef vex |
8424 | | #undef ea |
8425 | | |
8426 | | int x86_emul_rmw( |
8427 | | void *ptr, |
8428 | | unsigned int bytes, |
8429 | | uint32_t *eflags, |
8430 | | struct x86_emulate_state *s, |
8431 | | struct x86_emulate_ctxt *ctxt) |
8432 | | #define stub_exn (*s->stub_exn) /* for invoke_stub() */ |
8433 | 0 | { |
8434 | 0 | unsigned long *dst = ptr; |
8435 | |
|
8436 | 0 | ASSERT(bytes == s->op_bytes); |
8437 | | |
8438 | | /* |
8439 | | * We cannot use Jcc below, as this code executes with the guest status flags |
8440 | | * loaded into the EFLAGS register. Hence our only choice is J{E,R}CXZ. |
8441 | | */ |
8442 | 0 | #ifdef __x86_64__ |
8443 | 0 | # define JCXZ "jrcxz" |
8444 | | #else |
8445 | | # define JCXZ "jecxz" |
8446 | | #endif |
8447 | | |
8448 | 0 | #define COND_LOCK(op) \ |
8449 | 0 | JCXZ " .L" #op "%=\n\t" \ |
8450 | 0 | "lock\n" \ |
8451 | 0 | ".L" #op "%=:\n\t" \ |
8452 | 0 | #op |
8453 | | |
8454 | 0 | switch ( s->rmw ) |
8455 | 0 | { |
8456 | 0 | #define UNOP(op) \ |
8457 | 0 | case rmw_##op: \ |
8458 | 0 | _emulate_1op(COND_LOCK(op), dst, bytes, *eflags, \ |
8459 | 0 | "c" ((long)s->lock_prefix) ); \ |
8460 | 0 | break |
8461 | 0 | #define BINOP(op, sfx) \ |
8462 | 0 | case rmw_##op: \ |
8463 | 0 | _emulate_2op_SrcV##sfx(COND_LOCK(op), \ |
8464 | 0 | s->ea.val, dst, bytes, *eflags, \ |
8465 | 0 | "c" ((long)s->lock_prefix) ); \ |
8466 | 0 | break |
8467 | 0 | #define SHIFT(op) \ |
8468 | 0 | case rmw_##op: \ |
8469 | 0 | ASSERT(!s->lock_prefix); \ |
8470 | 0 | _emulate_2op_SrcB(#op, s->ea.val, dst, bytes, *eflags); \ |
8471 | 0 | break |
8472 | | |
8473 | 0 | BINOP(adc, ); |
8474 | 0 | BINOP(add, ); |
8475 | 0 | BINOP(and, ); |
8476 | 0 | BINOP(btc, _nobyte); |
8477 | 0 | BINOP(bts, _nobyte); |
8478 | 0 | BINOP(btr, _nobyte); |
8479 | 0 | UNOP(dec); |
8480 | 0 | UNOP(inc); |
8481 | 0 | UNOP(neg); |
8482 | 0 | BINOP(or, ); |
8483 | 0 | SHIFT(rcl); |
8484 | 0 | SHIFT(rcr); |
8485 | 0 | SHIFT(rol); |
8486 | 0 | SHIFT(ror); |
8487 | 0 | SHIFT(sar); |
8488 | 0 | BINOP(sbb, ); |
8489 | 0 | SHIFT(shl); |
8490 | 0 | SHIFT(shr); |
8491 | 0 | BINOP(sub, ); |
8492 | 0 | BINOP(xor, ); |
8493 | | |
8494 | 0 | #undef UNOP |
8495 | 0 | #undef BINOP |
8496 | 0 | #undef SHIFT |
8497 | | |
8498 | 0 | #ifdef __x86_64__ |
8499 | 0 | case rmw_cmpccxadd: |
8500 | 0 | { |
8501 | 0 | struct x86_emulate_stub stub = {}; |
8502 | 0 | uint8_t *buf = get_stub(stub); |
8503 | 0 | typeof(s->vex) *pvex = container_of(buf + 1, typeof(s->vex), |
8504 | 0 | raw[0]); |
8505 | 0 | unsigned long dummy; |
8506 | |
|
8507 | 0 | buf[0] = 0xc4; |
8508 | 0 | *pvex = s->vex; |
8509 | 0 | pvex->b = 1; |
8510 | 0 | pvex->r = 1; |
8511 | 0 | pvex->reg = 0xf; /* rAX */ |
8512 | 0 | buf[3] = ctxt->opcode; |
8513 | 0 | buf[4] = 0x11; /* reg=rDX r/m=(%RCX) */ |
8514 | 0 | buf[5] = 0xc3; |
8515 | |
|
8516 | 0 | *eflags &= ~EFLAGS_MASK; |
8517 | 0 | invoke_stub("", |
8518 | 0 | _POST_EFLAGS("[eflags]", "[mask]", "[tmp]"), |
8519 | 0 | "+m" (*dst), "+d" (s->ea.val), |
8520 | 0 | [tmp] "=&r" (dummy), [eflags] "+g" (*eflags) |
8521 | 0 | : "a" (*decode_vex_gpr(s->vex.reg, ctxt->regs, ctxt)), |
8522 | 0 | "c" (dst), [mask] "i" (EFLAGS_MASK)); |
8523 | |
|
8524 | 0 | put_stub(stub); |
8525 | 0 | break; |
8526 | 0 | } |
8527 | 0 | #endif |
8528 | | |
8529 | 0 | case rmw_not: |
8530 | 0 | switch ( s->op_bytes ) |
8531 | 0 | { |
8532 | 0 | case 1: |
8533 | 0 | asm ( COND_LOCK(notb) " %0" |
8534 | 0 | : "+m" (*dst) : "c" ((long)s->lock_prefix) ); |
8535 | 0 | break; |
8536 | 0 | case 2: |
8537 | 0 | asm ( COND_LOCK(notw) " %0" |
8538 | 0 | : "+m" (*dst) : "c" ((long)s->lock_prefix) ); |
8539 | 0 | break; |
8540 | 0 | case 4: |
8541 | 0 | asm ( COND_LOCK(notl) " %0" |
8542 | 0 | : "+m" (*dst) : "c" ((long)s->lock_prefix) ); |
8543 | 0 | break; |
8544 | 0 | #ifdef __x86_64__ |
8545 | 0 | case 8: |
8546 | 0 | asm ( COND_LOCK(notq) " %0" |
8547 | 0 | : "+m" (*dst) : "c" ((long)s->lock_prefix) ); |
8548 | 0 | break; |
8549 | 0 | #endif |
8550 | 0 | } |
8551 | 0 | break; |
8552 | | |
8553 | 0 | case rmw_shld: |
8554 | 0 | ASSERT(!s->lock_prefix); |
8555 | 0 | _emulate_2op_SrcV_nobyte("shld", |
8556 | 0 | s->ea.val, dst, bytes, *eflags, |
8557 | 0 | "c" (s->ea.orig_val) ); |
8558 | 0 | break; |
8559 | | |
8560 | 0 | case rmw_shrd: |
8561 | 0 | ASSERT(!s->lock_prefix); |
8562 | 0 | _emulate_2op_SrcV_nobyte("shrd", |
8563 | 0 | s->ea.val, dst, bytes, *eflags, |
8564 | 0 | "c" (s->ea.orig_val) ); |
8565 | 0 | break; |
8566 | | |
8567 | 0 | case rmw_xadd: |
8568 | 0 | *eflags &= ~EFLAGS_MASK; |
8569 | 0 | switch ( s->op_bytes ) |
8570 | 0 | { |
8571 | 0 | unsigned long dummy; |
8572 | | |
8573 | 0 | #define XADD(sz, cst, mod) \ |
8574 | 0 | case sz: \ |
8575 | 0 | asm ( "" \ |
8576 | 0 | COND_LOCK(xadd) " %"#mod"[reg], %[mem]; " \ |
8577 | 0 | _POST_EFLAGS("[efl]", "[msk]", "[tmp]") \ |
8578 | 0 | : [reg] "+" #cst (s->ea.val), \ |
8579 | 0 | [mem] "+m" (*dst), \ |
8580 | 0 | [efl] "+g" (*eflags), \ |
8581 | 0 | [tmp] "=&r" (dummy) \ |
8582 | 0 | : "c" ((long)s->lock_prefix), \ |
8583 | 0 | [msk] "i" (EFLAGS_MASK) ); \ |
8584 | 0 | break |
8585 | 0 | XADD(1, q, b); |
8586 | 0 | XADD(2, r, w); |
8587 | 0 | XADD(4, r, k); |
8588 | 0 | #ifdef __x86_64__ |
8589 | 0 | XADD(8, r, ); |
8590 | 0 | #endif |
8591 | 0 | #undef XADD |
8592 | 0 | } |
8593 | 0 | break; |
8594 | | |
8595 | 0 | case rmw_xchg: |
8596 | 0 | switch ( s->op_bytes ) |
8597 | 0 | { |
8598 | 0 | case 1: |
8599 | 0 | asm ( "xchg %b0, %b1" : "+q" (s->ea.val), "+m" (*dst) ); |
8600 | 0 | break; |
8601 | 0 | case 2: |
8602 | 0 | asm ( "xchg %w0, %w1" : "+r" (s->ea.val), "+m" (*dst) ); |
8603 | 0 | break; |
8604 | 0 | case 4: |
8605 | 0 | #ifdef __x86_64__ |
8606 | 0 | asm ( "xchg %k0, %k1" : "+r" (s->ea.val), "+m" (*dst) ); |
8607 | 0 | break; |
8608 | 0 | case 8: |
8609 | 0 | #endif |
8610 | 0 | asm ( "xchg %0, %1" : "+r" (s->ea.val), "+m" (*dst) ); |
8611 | 0 | break; |
8612 | 0 | } |
8613 | 0 | break; |
8614 | | |
8615 | 0 | default: |
8616 | 0 | ASSERT_UNREACHABLE(); |
8617 | 0 | return X86EMUL_UNHANDLEABLE; |
8618 | 0 | } |
8619 | | |
8620 | 0 | #undef COND_LOCK |
8621 | 0 | #undef JCXZ |
8622 | | |
8623 | 0 | return X86EMUL_OKAY; |
8624 | |
|
8625 | | #if defined(__XEN__) && defined(__x86_64__) |
8626 | | emulation_stub_failure: |
8627 | | return X86EMUL_stub_failure; |
8628 | | #endif |
8629 | 0 | } |
8630 | | #undef stub_exn |
8631 | | |
8632 | | static void __init __maybe_unused build_assertions(void) |
8633 | 0 | { |
8634 | 0 | /* Check the values against SReg3 encoding in opcode/ModRM bytes. */ |
8635 | 0 | BUILD_BUG_ON(x86_seg_es != 0); |
8636 | 0 | BUILD_BUG_ON(x86_seg_cs != 1); |
8637 | 0 | BUILD_BUG_ON(x86_seg_ss != 2); |
8638 | 0 | BUILD_BUG_ON(x86_seg_ds != 3); |
8639 | 0 | BUILD_BUG_ON(x86_seg_fs != 4); |
8640 | 0 | BUILD_BUG_ON(x86_seg_gs != 5); |
8641 | 0 |
|
8642 | 0 | /* Check X86_ET_* against VMCB EVENTINJ and VMCS INTR_INFO type fields. */ |
8643 | 0 | BUILD_BUG_ON(X86_ET_EXT_INTR != 0); |
8644 | 0 | BUILD_BUG_ON(X86_ET_NMI != 2); |
8645 | 0 | BUILD_BUG_ON(X86_ET_HW_EXC != 3); |
8646 | 0 | BUILD_BUG_ON(X86_ET_SW_INT != 4); |
8647 | 0 | BUILD_BUG_ON(X86_ET_PRIV_SW_EXC != 5); |
8648 | 0 | BUILD_BUG_ON(X86_ET_SW_EXC != 6); |
8649 | 0 | BUILD_BUG_ON(X86_ET_OTHER != 7); |
8650 | 0 | } |
8651 | | |
8652 | | #ifndef NDEBUG |
8653 | | /* |
8654 | | * In debug builds, wrap x86_emulate() with some assertions about its expected |
8655 | | * behaviour. |
8656 | | */ |
8657 | | int x86_emulate_wrapper( |
8658 | | struct x86_emulate_ctxt *ctxt, |
8659 | | const struct x86_emulate_ops *ops) |
8660 | 613k | { |
8661 | 613k | unsigned long orig_ip = ctxt->regs->r(ip); |
8662 | 613k | int rc; |
8663 | | |
8664 | 613k | #ifdef __x86_64__ |
8665 | 613k | if ( mode_64bit() ) |
8666 | 613k | ASSERT(ctxt->lma); |
8667 | | #else |
8668 | | ASSERT(!ctxt->lma && !mode_64bit()); |
8669 | | #endif |
8670 | | |
8671 | 613k | rc = x86_emulate(ctxt, ops); |
8672 | | |
8673 | | /* |
8674 | | * X86EMUL_DONE is an internal signal in the emulator, and is not expected |
8675 | | * to ever escape out to callers. |
8676 | | */ |
8677 | 613k | ASSERT(rc != X86EMUL_DONE); |
8678 | | |
8679 | | /* |
8680 | | * Most retire flags should only be set for successful instruction |
8681 | | * emulation. |
8682 | | */ |
8683 | 613k | if ( rc != X86EMUL_OKAY ) |
8684 | 24.2k | { |
8685 | 24.2k | typeof(ctxt->retire) retire = ctxt->retire; |
8686 | | |
8687 | 24.2k | retire.unblock_nmi = false; |
8688 | 24.2k | ASSERT(!retire.raw); |
8689 | 24.2k | } |
8690 | | |
8691 | | /* All cases returning X86EMUL_EXCEPTION should have fault semantics. */ |
8692 | 613k | if ( rc == X86EMUL_EXCEPTION ) |
8693 | 613k | ASSERT(ctxt->regs->r(ip) == orig_ip); |
8694 | | |
8695 | | /* |
8696 | | * An event being pending should exactly match returning |
8697 | | * X86EMUL_EXCEPTION. (If this trips, the chances are a codepath has |
8698 | | * called hvm_inject_hw_exception() rather than using |
8699 | | * x86_emul_hw_exception(), or the invocation of a hook has caused an |
8700 | | * exception to be raised, while the caller was only checking for |
8701 | | * success/failure.) |
8702 | | */ |
8703 | 613k | ASSERT(ctxt->event_pending == (rc == X86EMUL_EXCEPTION)); |
8704 | | |
8705 | 613k | return rc; |
8706 | 613k | } |
8707 | | #endif |