Coverage Report

Created: 2025-05-11 06:41

/src/xen/tools/fuzz/x86_instruction_emulator/x86_emulate/x86_emulate.c
Line
Count
Source (jump to first uncovered line)
1
/* SPDX-License-Identifier: GPL-2.0-or-later */
2
/******************************************************************************
3
 * x86_emulate.c
4
 *
5
 * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
6
 *
7
 * Copyright (c) 2005-2007 Keir Fraser
8
 * Copyright (c) 2005-2007 XenSource Inc.
9
 */
10
11
#include "private.h"
12
13
/*
14
 * The next two tables are indexed by high opcode extension byte (the one
15
 * that's encoded like an immediate) nibble, with each table element then
16
 * bit-indexed by low opcode extension byte nibble.
17
 */
18
static const uint16_t _3dnow_table[16] = {
19
    [0x0] = (1 << 0xd) /* pi2fd */,
20
    [0x1] = (1 << 0xd) /* pf2id */,
21
    [0x9] = (1 << 0x0) /* pfcmpge */ |
22
            (1 << 0x4) /* pfmin */ |
23
            (1 << 0x6) /* pfrcp */ |
24
            (1 << 0x7) /* pfrsqrt */ |
25
            (1 << 0xa) /* pfsub */ |
26
            (1 << 0xe) /* pfadd */,
27
    [0xa] = (1 << 0x0) /* pfcmpgt */ |
28
            (1 << 0x4) /* pfmax */ |
29
            (1 << 0x6) /* pfrcpit1 */ |
30
            (1 << 0x7) /* pfrsqit1 */ |
31
            (1 << 0xa) /* pfsubr */ |
32
            (1 << 0xe) /* pfacc */,
33
    [0xb] = (1 << 0x0) /* pfcmpeq */ |
34
            (1 << 0x4) /* pfmul */ |
35
            (1 << 0x6) /* pfrcpit2 */ |
36
            (1 << 0x7) /* pmulhrw */ |
37
            (1 << 0xf) /* pavgusb */,
38
};
39
40
static const uint16_t _3dnow_ext_table[16] = {
41
    [0x0] = (1 << 0xc) /* pi2fw */,
42
    [0x1] = (1 << 0xc) /* pf2iw */,
43
    [0x8] = (1 << 0xa) /* pfnacc */ |
44
            (1 << 0xe) /* pfpnacc */,
45
    [0xb] = (1 << 0xb) /* pswapd */,
46
};
47
48
/* Shift values between src and dst sizes of pmov{s,z}x{b,w,d}{w,d,q}. */
49
static const uint8_t pmov_convert_delta[] = { 1, 2, 3, 1, 2, 1 };
50
51
static const uint8_t sse_prefix[] = { 0x66, 0xf3, 0xf2 };
52
53
#ifdef __x86_64__
54
94.2k
# define PFX2 REX_PREFIX
55
#else
56
# define PFX2 0x3e
57
#endif
58
373k
#define PFX_BYTES 3
59
94.2k
#define init_prefixes(stub) ({ \
60
94.2k
    uint8_t *buf_ = get_stub(stub); \
61
94.2k
    buf_[0] = 0x3e; \
62
94.2k
    buf_[1] = PFX2; \
63
94.2k
    buf_[2] = 0x0f; \
64
94.2k
    buf_ + 3; \
65
94.2k
})
66
67
37.7k
#define copy_VEX(ptr, vex) ({ \
68
37.7k
    if ( !mode_64bit() ) \
69
37.7k
        (vex).reg |= 8; \
70
37.7k
    gcc11_wrap(ptr)[0 - PFX_BYTES] = ext < ext_8f08 ? 0xc4 : 0x8f; \
71
37.7k
    (ptr)[1 - PFX_BYTES] = (vex).raw[0]; \
72
37.7k
    (ptr)[2 - PFX_BYTES] = (vex).raw[1]; \
73
37.7k
    container_of((ptr) + 1 - PFX_BYTES, typeof(vex), raw[0]); \
74
37.7k
})
75
76
90.9k
#define copy_REX_VEX(ptr, rex, vex) do { \
77
90.9k
    if ( (vex).opcx != vex_none ) \
78
90.9k
        copy_VEX(ptr, vex); \
79
90.9k
    else \
80
90.9k
    { \
81
56.3k
        if ( (vex).pfx ) \
82
56.3k
            (ptr)[0 - PFX_BYTES] = sse_prefix[(vex).pfx - 1]; \
83
56.3k
        /* \
84
56.3k
         * "rex" is always zero for other than 64-bit mode, so OR-ing it \
85
56.3k
         * into any prefix (and not just REX_PREFIX) is safe on 32-bit \
86
56.3k
         * (test harness) builds. \
87
56.3k
         */ \
88
56.3k
        (ptr)[1 - PFX_BYTES] |= rex; \
89
56.3k
    } \
90
90.9k
} while (0)
91
92
0
#define EVEX_PFX_BYTES 4
93
0
#define init_evex(stub) ({ \
94
0
    uint8_t *buf_ = get_stub(stub); \
95
0
    buf_[0] = 0x62; \
96
0
    buf_ + EVEX_PFX_BYTES; \
97
0
})
98
99
0
#define copy_EVEX(ptr, evex) ({ \
100
0
    if ( !mode_64bit() ) \
101
0
        (evex).reg |= 8; \
102
0
    (ptr)[1 - EVEX_PFX_BYTES] = (evex).raw[0]; \
103
0
    (ptr)[2 - EVEX_PFX_BYTES] = (evex).raw[1]; \
104
0
    (ptr)[3 - EVEX_PFX_BYTES] = (evex).raw[2]; \
105
0
    container_of((ptr) + 1 - EVEX_PFX_BYTES, typeof(evex), raw[0]); \
106
0
})
107
108
67.1k
#define rep_prefix()   (vex.pfx >= vex_f3)
109
15.1k
#define repe_prefix()  (vex.pfx == vex_f3)
110
11.4k
#define repne_prefix() (vex.pfx == vex_f2)
111
112
/*
113
 * While proper alignment gets specified in mmval_t, this doesn't get honored
114
 * by the compiler for automatic variables. Use this helper to instantiate a
115
 * suitably aligned variable, producing a pointer to access it.
116
 */
117
#define DECLARE_ALIGNED(type, var)                                        \
118
613k
    long __##var[(sizeof(type) + __alignof(type)) / __alignof(long) - 1]; \
119
613k
    type *const var##p =                                                  \
120
613k
        (void *)(((long)__##var + __alignof(type) - __alignof(__##var))   \
121
613k
                 & -__alignof(type))
122
123
/* MXCSR bit definitions. */
124
35.2k
#define MXCSR_MM  (1U << 17)
125
126
/* Segment selector error code bits. */
127
#define ECODE_EXT (1 << 0)
128
#define ECODE_IDT (1 << 1)
129
#define ECODE_TI  (1 << 2)
130
131
/* Raw emulation: instruction has two explicit operands. */
132
#define __emulate_2op_nobyte(_op, src, dst, sz, eflags, wsx,wsy,wdx,wdy,   \
133
35.3k
                             lsx,lsy,ldx,ldy, qsx,qsy,qdx,qdy, extra...)   \
134
35.3k
do{ unsigned long _tmp;                                                    \
135
35.3k
    switch ( sz )                                                          \
136
35.3k
    {                                                                      \
137
20.6k
    case 2:                                                                \
138
20.6k
        asm volatile (                                                     \
139
20.6k
            _PRE_EFLAGS("0","4","2")                                       \
140
20.6k
            _op"w %"wsx"3,%"wdx"1; "                                       \
141
20.6k
            _POST_EFLAGS("0","4","2")                                      \
142
20.6k
            : "+g" (eflags), "+" wdy (*(dst)), "=&r" (_tmp)                \
143
20.6k
            : wsy (src), "i" (EFLAGS_MASK), ## extra );                    \
144
20.6k
        break;                                                             \
145
9.67k
    case 4:                                                                \
146
9.67k
        asm volatile (                                                     \
147
9.67k
            _PRE_EFLAGS("0","4","2")                                       \
148
9.67k
            _op"l %"lsx"3,%"ldx"1; "                                       \
149
9.67k
            _POST_EFLAGS("0","4","2")                                      \
150
9.67k
            : "+g" (eflags), "+" ldy (*(dst)), "=&r" (_tmp)                \
151
9.67k
            : lsy (src), "i" (EFLAGS_MASK), ## extra );                    \
152
9.67k
        break;                                                             \
153
5.03k
    case 8:                                                                \
154
5.03k
        __emulate_2op_8byte(_op, src, dst, eflags, qsx, qsy, qdx, qdy,     \
155
5.03k
                            ## extra);                                     \
156
5.03k
        break;                                                             \
157
35.3k
    }                                                                      \
158
35.3k
} while (0)
159
#define __emulate_2op(_op, src, dst, sz, eflags, _bx, by, wx, wy,          \
160
99.9k
                      lx, ly, qx, qy, extra...)                            \
161
99.9k
do{ unsigned long _tmp;                                                    \
162
99.9k
    switch ( sz )                                                          \
163
99.9k
    {                                                                      \
164
68.8k
    case 1:                                                                \
165
68.8k
        asm volatile (                                                     \
166
68.8k
            _PRE_EFLAGS("0","4","2")                                       \
167
68.8k
            _op"b %"_bx"3,%1; "                                            \
168
68.8k
            _POST_EFLAGS("0","4","2")                                      \
169
68.8k
            : "+g" (eflags), "+m" (*(dst)), "=&r" (_tmp)                   \
170
68.8k
            : by (src), "i" (EFLAGS_MASK), ##extra );                      \
171
68.8k
        break;                                                             \
172
31.1k
    default:                                                               \
173
31.1k
        __emulate_2op_nobyte(_op, src, dst, sz, eflags, wx, wy, "", "m",   \
174
31.1k
                             lx, ly, "", "m", qx, qy, "", "m", ##extra);   \
175
31.1k
        break;                                                             \
176
99.9k
    }                                                                      \
177
99.9k
} while (0)
178
/* Source operand is byte-sized and may be restricted to just %cl. */
179
#define _emulate_2op_SrcB(op, src, dst, sz, eflags)                        \
180
5.68k
    __emulate_2op(op, src, dst, sz, eflags,                                \
181
5.68k
                  "b", "c", "b", "c", "b", "c", "b", "c")
182
#define emulate_2op_SrcB(op, src, dst, eflags)                             \
183
5.68k
    _emulate_2op_SrcB(op, (src).val, &(dst).val, (dst).bytes, eflags)
184
/* Source operand is byte, word, long or quad sized. */
185
#define _emulate_2op_SrcV(op, src, dst, sz, eflags, extra...)              \
186
94.2k
    __emulate_2op(op, src, dst, sz, eflags,                                \
187
94.2k
                  "b", "q", "w", "r", _LO32, "r", "", "r", ##extra)
188
#define emulate_2op_SrcV(_op, _src, _dst, _eflags)                         \
189
94.2k
    _emulate_2op_SrcV(_op, (_src).val, &(_dst).val, (_dst).bytes, _eflags)
190
/* Source operand is word, long or quad sized. */
191
#define _emulate_2op_SrcV_nobyte(op, src, dst, sz, eflags, extra...)       \
192
3.71k
    __emulate_2op_nobyte(op, src, dst, sz, eflags, "w", "r", "", "m",      \
193
3.71k
                         _LO32, "r", "", "m", "", "r", "", "m", ##extra)
194
#define emulate_2op_SrcV_nobyte(_op, _src, _dst, _eflags)                  \
195
3.71k
    _emulate_2op_SrcV_nobyte(_op, (_src).val, &(_dst).val, (_dst).bytes,   \
196
3.71k
                             _eflags)
197
/* Operands are word, long or quad sized and source may be in memory. */
198
#define emulate_2op_SrcV_srcmem(_op, _src, _dst, _eflags)                  \
199
558
    __emulate_2op_nobyte(_op, (_src).val, &(_dst).val, (_dst).bytes,       \
200
558
                         _eflags, "", "m", "w", "r",                       \
201
558
                         "", "m", _LO32, "r", "", "m", "", "r")
202
203
/* Instruction has only one explicit operand (no source operand). */
204
110k
#define _emulate_1op(_op, dst, sz, eflags, extra...)                       \
205
110k
do{ unsigned long _tmp;                                                    \
206
110k
    switch ( sz )                                                          \
207
110k
    {                                                                      \
208
620
    case 1:                                                                \
209
620
        asm volatile (                                                     \
210
620
            _PRE_EFLAGS("0","3","2")                                       \
211
620
            _op"b %1; "                                                    \
212
620
            _POST_EFLAGS("0","3","2")                                      \
213
620
            : "+g" (eflags), "+m" (*(dst)), "=&r" (_tmp)                   \
214
620
            : "i" (EFLAGS_MASK), ##extra );                                \
215
620
        break;                                                             \
216
86.1k
    case 2:                                                                \
217
86.1k
        asm volatile (                                                     \
218
86.1k
            _PRE_EFLAGS("0","3","2")                                       \
219
86.1k
            _op"w %1; "                                                    \
220
86.1k
            _POST_EFLAGS("0","3","2")                                      \
221
86.1k
            : "+g" (eflags), "+m" (*(dst)), "=&r" (_tmp)                   \
222
86.1k
            : "i" (EFLAGS_MASK), ##extra );                                \
223
86.1k
        break;                                                             \
224
22.8k
    case 4:                                                                \
225
22.8k
        asm volatile (                                                     \
226
22.8k
            _PRE_EFLAGS("0","3","2")                                       \
227
22.8k
            _op"l %1; "                                                    \
228
22.8k
            _POST_EFLAGS("0","3","2")                                      \
229
22.8k
            : "+g" (eflags), "+m" (*(dst)), "=&r" (_tmp)                   \
230
22.8k
            : "i" (EFLAGS_MASK), ##extra );                                \
231
22.8k
        break;                                                             \
232
590
    case 8:                                                                \
233
590
        __emulate_1op_8byte(_op, dst, eflags, ##extra);                    \
234
590
        break;                                                             \
235
110k
    }                                                                      \
236
110k
} while (0)
237
#define emulate_1op(op, dst, eflags)                                       \
238
110k
    _emulate_1op(op, &(dst).val, (dst).bytes, eflags)
239
240
/* Emulate an instruction with quadword operands (x86/64 only). */
241
#if defined(__x86_64__)
242
#define __emulate_2op_8byte(_op, src, dst, eflags,                      \
243
5.03k
                            qsx, qsy, qdx, qdy, extra...)               \
244
5.03k
do{ asm volatile (                                                      \
245
5.03k
        _PRE_EFLAGS("0","4","2")                                        \
246
5.03k
        _op"q %"qsx"3,%"qdx"1; "                                        \
247
5.03k
        _POST_EFLAGS("0","4","2")                                       \
248
5.03k
        : "+g" (eflags), "+" qdy (*(dst)), "=&r" (_tmp)                 \
249
5.03k
        : qsy (src), "i" (EFLAGS_MASK), ##extra );                      \
250
5.03k
} while (0)
251
590
#define __emulate_1op_8byte(_op, dst, eflags, extra...)                 \
252
590
do{ asm volatile (                                                      \
253
590
        _PRE_EFLAGS("0","3","2")                                        \
254
590
        _op"q %1; "                                                     \
255
590
        _POST_EFLAGS("0","3","2")                                       \
256
590
        : "+g" (eflags), "+m" (*(dst)), "=&r" (_tmp)                    \
257
590
        : "i" (EFLAGS_MASK), ##extra );                                 \
258
590
} while (0)
259
#elif defined(__i386__)
260
#define __emulate_2op_8byte(op, src, dst, eflags, qsx, qsy, qdx, qdy, extra...)
261
#define __emulate_1op_8byte(op, dst, eflags, extra...)
262
#endif /* __i386__ */
263
264
2.42k
#define emulate_stub(dst, src...) do {                                  \
265
2.42k
    unsigned long tmp;                                                  \
266
2.42k
    invoke_stub(_PRE_EFLAGS("[efl]", "[msk]", "[tmp]"),                 \
267
2.42k
                _POST_EFLAGS("[efl]", "[msk]", "[tmp]"),                \
268
2.42k
                dst, [tmp] "=&r" (tmp), [efl] "+g" (_regs.eflags)       \
269
2.42k
                : [msk] "i" (EFLAGS_MASK), ## src);                     \
270
2.42k
} while (0)
271
272
/*
273
 * Given byte has even parity (even number of 1s)? SDM Vol. 1 Sec. 3.4.3.1,
274
 * "Status Flags": EFLAGS.PF reflects parity of least-sig. byte of result only.
275
 */
276
static bool even_parity(uint8_t v)
277
22.6k
{
278
22.6k
    asm ( "test %1,%1" ASM_FLAG_OUT(, "; setp %0")
279
22.6k
          : ASM_FLAG_OUT("=@ccp", "=qm") (v) : "q" (v) );
280
281
22.6k
    return v;
282
22.6k
}
283
284
/* Update address held in a register, based on addressing mode. */
285
99.2k
#define _register_address_increment(reg, inc, byte_width)               \
286
99.2k
do {                                                                    \
287
99.2k
    int _inc = (inc); /* signed type ensures sign extension to long */  \
288
99.2k
    unsigned int _width = (byte_width);                                 \
289
99.2k
    if ( _width == sizeof(unsigned long) )                              \
290
99.2k
        (reg) += _inc;                                                  \
291
99.2k
    else if ( mode_64bit() )                                            \
292
65.7k
        (reg) = ((reg) + _inc) & ((1UL << (_width << 3)) - 1);          \
293
65.7k
    else                                                                \
294
65.7k
        (reg) = ((reg) & ~((1UL << (_width << 3)) - 1)) |               \
295
63.4k
                (((reg) + _inc) & ((1UL << (_width << 3)) - 1));        \
296
99.2k
} while (0)
297
#define register_address_adjust(reg, adj)                               \
298
39.0k
    _register_address_increment(reg,                                    \
299
39.0k
                                _regs.eflags & X86_EFLAGS_DF ?          \
300
39.0k
                                -(adj) : (adj),                         \
301
39.0k
                                ad_bytes)
302
303
42.7k
#define sp_pre_dec(dec) ({                                              \
304
42.7k
    _register_address_increment(_regs.r(sp), -(dec), ctxt->sp_size/8);  \
305
42.7k
    truncate_word(_regs.r(sp), ctxt->sp_size/8);                        \
306
42.7k
})
307
17.3k
#define sp_post_inc(inc) ({                                             \
308
17.3k
    unsigned long sp = truncate_word(_regs.r(sp), ctxt->sp_size/8);     \
309
17.3k
    _register_address_increment(_regs.r(sp), (inc), ctxt->sp_size/8);   \
310
17.3k
    sp;                                                                 \
311
17.3k
})
312
313
9.25k
#define jmp_rel(rel)                                                    \
314
9.25k
do {                                                                    \
315
9.25k
    unsigned long ip = _regs.r(ip) + (int)(rel);                        \
316
9.25k
    if ( op_bytes == 2 && (amd_like(ctxt) || !mode_64bit()) )           \
317
9.25k
        ip = (uint16_t)ip;                                              \
318
9.25k
    else if ( !mode_64bit() )                                           \
319
4.22k
        ip = (uint32_t)ip;                                              \
320
9.25k
    rc = ops->insn_fetch(ip, NULL, 0, ctxt);                            \
321
9.25k
    if ( rc ) goto done;                                                \
322
9.25k
    _regs.r(ip) = ip;                                                   \
323
9.02k
    singlestep = _regs.eflags & X86_EFLAGS_TF;                          \
324
9.02k
} while (0)
325
326
1.72k
#define validate_far_branch(cs, ip) ({                                  \
327
1.72k
    if ( sizeof(ip) <= 4 ) {                                            \
328
348
        ASSERT(!ctxt->lma);                                             \
329
348
        generate_exception_if((ip) > (cs)->limit, X86_EXC_GP, 0);       \
330
348
    } else                                                              \
331
1.72k
        generate_exception_if(ctxt->lma && (cs)->l                      \
332
1.72k
                              ? !is_canonical_address(ip)               \
333
1.72k
                              : (ip) > (cs)->limit, X86_EXC_GP, 0);     \
334
1.72k
})
335
336
1.20k
#define commit_far_branch(cs, newip) (                                  \
337
1.20k
        ({                                                              \
338
1.20k
            validate_far_branch(cs, newip);                             \
339
1.20k
            _regs.r(ip) = (newip);                                      \
340
1.08k
            singlestep = _regs.eflags & X86_EFLAGS_TF;                  \
341
1.08k
        }),                                                             \
342
1.08k
        ops->write_segment(x86_seg_cs, cs, ctxt)                        \
343
1.08k
    )
344
345
int x86emul_get_fpu(
346
    enum x86_emulate_fpu_type type,
347
    struct x86_emulate_ctxt *ctxt,
348
    const struct x86_emulate_ops *ops)
349
155k
{
350
155k
    uint64_t xcr0;
351
155k
    int rc;
352
353
155k
    fail_if(!ops->get_fpu);
354
155k
    ASSERT(type != X86EMUL_FPU_none);
355
356
155k
    if ( type < X86EMUL_FPU_ymm || !ops->read_xcr ||
357
155k
         ops->read_xcr(0, &xcr0, ctxt) != X86EMUL_OKAY )
358
119k
    {
359
119k
        ASSERT(!ctxt->event_pending);
360
119k
        xcr0 = 0;
361
119k
    }
362
363
155k
    switch ( type )
364
155k
    {
365
0
    case X86EMUL_FPU_zmm:
366
0
        if ( !(xcr0 & X86_XCR0_ZMM) || !(xcr0 & X86_XCR0_HI_ZMM) ||
367
0
             !(xcr0 & X86_XCR0_OPMASK) )
368
0
            return X86EMUL_UNHANDLEABLE;
369
        /* fall through */
370
35.9k
    case X86EMUL_FPU_ymm:
371
35.9k
        if ( !(xcr0 & X86_XCR0_SSE) || !(xcr0 & X86_XCR0_YMM) )
372
49
            return X86EMUL_UNHANDLEABLE;
373
35.8k
        break;
374
375
35.8k
    case X86EMUL_FPU_opmask:
376
255
        if ( !(xcr0 & X86_XCR0_SSE) || !(xcr0 & X86_XCR0_OPMASK) )
377
255
            return X86EMUL_UNHANDLEABLE;
378
0
        break;
379
380
119k
    default:
381
119k
        break;
382
155k
    }
383
384
154k
    rc = (ops->get_fpu)(type, ctxt);
385
386
154k
    if ( rc == X86EMUL_OKAY )
387
154k
    {
388
154k
        unsigned long cr0;
389
390
154k
        fail_if(type == X86EMUL_FPU_fpu && !ops->put_fpu);
391
392
154k
        fail_if(!ops->read_cr);
393
154k
        if ( type >= X86EMUL_FPU_xmm )
394
75.5k
        {
395
75.5k
            unsigned long cr4;
396
397
75.5k
            rc = ops->read_cr(4, &cr4, ctxt);
398
75.5k
            if ( rc != X86EMUL_OKAY )
399
0
                return rc;
400
75.5k
            generate_exception_if(!(cr4 & ((type == X86EMUL_FPU_xmm)
401
75.5k
                                           ? X86_CR4_OSFXSR : X86_CR4_OSXSAVE)),
402
75.5k
                                  X86_EXC_UD);
403
75.5k
        }
404
405
154k
        rc = ops->read_cr(0, &cr0, ctxt);
406
154k
        if ( rc != X86EMUL_OKAY )
407
0
            return rc;
408
154k
        if ( type >= X86EMUL_FPU_ymm )
409
35.8k
        {
410
            /* Should be unreachable if VEX decoding is working correctly. */
411
35.8k
            ASSERT((cr0 & X86_CR0_PE) && !(ctxt->regs->eflags & X86_EFLAGS_VM));
412
35.8k
        }
413
154k
        if ( cr0 & X86_CR0_EM )
414
5.70k
        {
415
5.70k
            generate_exception_if(type == X86EMUL_FPU_fpu, X86_EXC_NM);
416
5.70k
            generate_exception_if(type == X86EMUL_FPU_mmx, X86_EXC_UD);
417
5.69k
            generate_exception_if(type == X86EMUL_FPU_xmm, X86_EXC_UD);
418
5.69k
        }
419
154k
        generate_exception_if((cr0 & X86_CR0_TS) &&
420
154k
                              (type != X86EMUL_FPU_wait || (cr0 & X86_CR0_MP)),
421
154k
                              X86_EXC_NM);
422
154k
    }
423
424
155k
 done:
425
155k
    return rc;
426
154k
}
427
428
static void put_fpu(
429
    enum x86_emulate_fpu_type type,
430
    bool failed_late,
431
    const struct x86_emulate_state *state,
432
    struct x86_emulate_ctxt *ctxt,
433
    const struct x86_emulate_ops *ops)
434
1.19M
{
435
1.19M
    if ( unlikely(failed_late) && type == X86EMUL_FPU_fpu )
436
59
        ops->put_fpu(ctxt, X86EMUL_FPU_fpu, NULL);
437
1.19M
    else if ( unlikely(type == X86EMUL_FPU_fpu) && !state->fpu_ctrl )
438
60.3k
    {
439
60.3k
        struct x86_emul_fpu_aux aux = {
440
60.3k
            .ip = ctxt->regs->r(ip),
441
60.3k
            .cs = ctxt->regs->cs,
442
60.3k
            .op = ((ctxt->opcode & 7) << 8) | state->modrm,
443
60.3k
        };
444
60.3k
        struct segment_register sreg;
445
446
60.3k
        if ( ops->read_segment &&
447
60.3k
             ops->read_segment(x86_seg_cs, &sreg, ctxt) == X86EMUL_OKAY )
448
51.0k
            aux.cs = sreg.sel;
449
60.3k
        if ( state->ea.type == OP_MEM )
450
8.45k
        {
451
8.45k
            aux.dp = state->ea.mem.off;
452
8.45k
            if ( state->ea.mem.seg == x86_seg_cs )
453
455
                aux.ds = aux.cs;
454
7.99k
            else if ( ops->read_segment &&
455
7.99k
                      ops->read_segment(state->ea.mem.seg, &sreg,
456
6.06k
                                        ctxt) == X86EMUL_OKAY )
457
6.06k
                aux.ds = sreg.sel;
458
#ifdef __XEN__
459
            /*
460
             * While generally the expectation is that input structures are
461
             * fully populated, the selector fields under ctxt->regs normally
462
             * aren't set, with the exception of CS and SS for PV domains.
463
             * Read the real selector registers for PV, and assert that HVM
464
             * invocations always set a properly functioning ->read_segment()
465
             * hook.
466
             */
467
            else if ( is_pv_vcpu(current) )
468
                switch ( state->ea.mem.seg )
469
                {
470
                case x86_seg_ds: aux.ds = read_sreg(ds);  break;
471
                case x86_seg_es: aux.ds = read_sreg(es);  break;
472
                case x86_seg_fs: aux.ds = read_sreg(fs);  break;
473
                case x86_seg_gs: aux.ds = read_sreg(gs);  break;
474
                case x86_seg_ss: aux.ds = ctxt->regs->ss; break;
475
                default:         ASSERT_UNREACHABLE();    break;
476
                }
477
            else
478
                ASSERT_UNREACHABLE();
479
#else
480
1.93k
            else
481
1.93k
                switch ( state->ea.mem.seg )
482
1.93k
                {
483
955
                case x86_seg_ds: aux.ds = ctxt->regs->ds; break;
484
200
                case x86_seg_es: aux.ds = ctxt->regs->es; break;
485
194
                case x86_seg_fs: aux.ds = ctxt->regs->fs; break;
486
282
                case x86_seg_gs: aux.ds = ctxt->regs->gs; break;
487
307
                case x86_seg_ss: aux.ds = ctxt->regs->ss; break;
488
0
                default:         ASSERT_UNREACHABLE();    break;
489
1.93k
                }
490
8.45k
#endif
491
8.45k
            aux.dval = true;
492
8.45k
        }
493
60.3k
        ops->put_fpu(ctxt, X86EMUL_FPU_none, &aux);
494
60.3k
    }
495
1.13M
    else if ( type != X86EMUL_FPU_none && ops->put_fpu )
496
95.4k
        ops->put_fpu(ctxt, X86EMUL_FPU_none, NULL);
497
1.19M
}
498
499
static inline unsigned long get_loop_count(
500
    const struct cpu_user_regs *regs,
501
    int ad_bytes)
502
37.1k
{
503
37.1k
    return (ad_bytes > 4) ? regs->r(cx)
504
37.1k
                          : (ad_bytes < 4) ? regs->cx : regs->ecx;
505
37.1k
}
506
507
static inline void put_loop_count(
508
    struct cpu_user_regs *regs,
509
    int ad_bytes,
510
    unsigned long count)
511
16.1k
{
512
16.1k
    if ( ad_bytes == 2 )
513
4.67k
        regs->cx = count;
514
11.5k
    else
515
11.5k
        regs->r(cx) = ad_bytes == 4 ? (uint32_t)count : count;
516
16.1k
}
517
518
36.0k
#define get_rep_prefix(extend_si, extend_di) ({                         \
519
36.0k
    unsigned long max_reps = 1;                                         \
520
36.0k
    if ( rep_prefix() )                                                 \
521
36.0k
        max_reps = get_loop_count(&_regs, ad_bytes);                    \
522
36.0k
    if ( max_reps == 0 )                                                \
523
36.0k
    {                                                                   \
524
5.04k
        /*                                                              \
525
5.04k
         * Skip the instruction if no repetitions are required, but     \
526
5.04k
         * zero extend relevant registers first when using 32-bit       \
527
5.04k
         * addressing in 64-bit mode.                                   \
528
5.04k
         */                                                             \
529
5.04k
        if ( !amd_like(ctxt) && mode_64bit() && ad_bytes == 4 )         \
530
5.04k
        {                                                               \
531
0
            _regs.r(cx) = 0;                                            \
532
0
            if ( extend_si ) _regs.r(si) = (uint32_t)_regs.r(si);       \
533
0
            if ( extend_di ) _regs.r(di) = (uint32_t)_regs.r(di);       \
534
0
        }                                                               \
535
5.04k
        goto complete_insn;                                             \
536
5.04k
    }                                                                   \
537
36.0k
    if ( max_reps > 1 && (_regs.eflags & X86_EFLAGS_TF) &&              \
538
31.0k
         !is_branch_step(ctxt, ops) )                                   \
539
31.0k
        max_reps = 1;                                                   \
540
31.0k
    max_reps;                                                           \
541
31.0k
})
542
543
static void __put_rep_prefix(
544
    struct cpu_user_regs *int_regs,
545
    struct cpu_user_regs *ext_regs,
546
    int ad_bytes,
547
    unsigned long reps_completed)
548
14.0k
{
549
14.0k
    unsigned long ecx = get_loop_count(int_regs, ad_bytes);
550
551
    /* Reduce counter appropriately, and repeat instruction if non-zero. */
552
14.0k
    ecx -= reps_completed;
553
14.0k
    if ( ecx != 0 )
554
12.3k
        int_regs->r(ip) = ext_regs->r(ip);
555
556
14.0k
    put_loop_count(int_regs, ad_bytes, ecx);
557
14.0k
}
558
559
30.2k
#define put_rep_prefix(reps_completed) ({                               \
560
30.2k
    if ( rep_prefix() )                                                 \
561
30.2k
    {                                                                   \
562
14.0k
        __put_rep_prefix(&_regs, ctxt->regs, ad_bytes, reps_completed); \
563
14.0k
        if ( unlikely(rc == X86EMUL_EXCEPTION) )                        \
564
14.0k
            goto complete_insn;                                         \
565
14.0k
    }                                                                   \
566
30.2k
})
567
568
/* Clip maximum repetitions so that the index register at most just wraps. */
569
26.0k
#define truncate_ea_and_reps(ea, reps, bytes_per_rep) ({                  \
570
26.0k
    unsigned long todo__, ea__ = truncate_ea(ea);                         \
571
26.0k
    if ( !(_regs.eflags & X86_EFLAGS_DF) )                                \
572
26.0k
        todo__ = truncate_ea(-ea__) / (bytes_per_rep);                    \
573
26.0k
    else if ( truncate_ea(ea__ + (bytes_per_rep) - 1) < ea__ )            \
574
11.9k
        todo__ = 1;                                                       \
575
11.9k
    else                                                                  \
576
11.9k
        todo__ = ea__ / (bytes_per_rep) + 1;                              \
577
26.0k
    if ( !todo__ )                                                        \
578
26.0k
        (reps) = 1;                                                       \
579
26.0k
    else if ( todo__ < (reps) )                                           \
580
24.9k
        (reps) = todo__;                                                  \
581
26.0k
    ea__;                                                                 \
582
26.0k
})
583
584
/*
585
 * Unsigned multiplication with double-word result.
586
 * IN:  Multiplicand=m[0], Multiplier=m[1]
587
 * OUT: Return CF/OF (overflow status); Result=m[1]:m[0]
588
 */
589
static bool mul_dbl(unsigned long m[2])
590
533
{
591
533
    bool rc;
592
593
533
    asm ( "mul %1" ASM_FLAG_OUT(, "; seto %2")
594
533
          : "+a" (m[0]), "+d" (m[1]), ASM_FLAG_OUT("=@cco", "=qm") (rc) );
595
596
533
    return rc;
597
533
}
598
599
/*
600
 * Signed multiplication with double-word result.
601
 * IN:  Multiplicand=m[0], Multiplier=m[1]
602
 * OUT: Return CF/OF (overflow status); Result=m[1]:m[0]
603
 */
604
static bool imul_dbl(unsigned long m[2])
605
446
{
606
446
    bool rc;
607
608
446
    asm ( "imul %1" ASM_FLAG_OUT(, "; seto %2")
609
446
          : "+a" (m[0]), "+d" (m[1]), ASM_FLAG_OUT("=@cco", "=qm") (rc) );
610
611
446
    return rc;
612
446
}
613
614
/*
615
 * Unsigned division of double-word dividend.
616
 * IN:  Dividend=u[1]:u[0], Divisor=v
617
 * OUT: Return 1: #DE
618
 *      Return 0: Quotient=u[0], Remainder=u[1]
619
 */
620
static bool div_dbl(unsigned long u[2], unsigned long v)
621
2.85k
{
622
2.85k
    if ( (v == 0) || (u[1] >= v) )
623
144
        return 1;
624
2.70k
    asm ( "div"__OS" %2" : "+a" (u[0]), "+d" (u[1]) : "rm" (v) );
625
2.70k
    return 0;
626
2.85k
}
627
628
/*
629
 * Signed division of double-word dividend.
630
 * IN:  Dividend=u[1]:u[0], Divisor=v
631
 * OUT: Return 1: #DE
632
 *      Return 0: Quotient=u[0], Remainder=u[1]
633
 * NB. We don't use idiv directly as it's moderately hard to work out
634
 *     ahead of time whether it will #DE, which we cannot allow to happen.
635
 */
636
static bool idiv_dbl(unsigned long u[2], unsigned long v)
637
1.79k
{
638
1.79k
    bool negu = (long)u[1] < 0, negv = (long)v < 0;
639
640
    /* u = abs(u) */
641
1.79k
    if ( negu )
642
497
    {
643
497
        u[1] = ~u[1];
644
497
        if ( (u[0] = -u[0]) == 0 )
645
69
            u[1]++;
646
497
    }
647
648
    /* abs(u) / abs(v) */
649
1.79k
    if ( div_dbl(u, negv ? -v : v) )
650
87
        return 1;
651
652
    /* Remainder has same sign as dividend. It cannot overflow. */
653
1.70k
    if ( negu )
654
443
        u[1] = -u[1];
655
656
    /* Quotient is overflowed if sign bit is set. */
657
1.70k
    if ( negu ^ negv )
658
858
    {
659
858
        if ( (long)u[0] >= 0 )
660
748
            u[0] = -u[0];
661
110
        else if ( (u[0] << 1) != 0 ) /* == 0x80...0 is okay */
662
76
            return 1;
663
858
    }
664
846
    else if ( (long)u[0] < 0 )
665
70
        return 1;
666
667
1.55k
    return 0;
668
1.70k
}
669
670
static bool
671
test_cc(
672
    unsigned int condition, unsigned int flags)
673
17.4k
{
674
17.4k
    int rc = 0;
675
676
17.4k
    switch ( (condition & 15) >> 1 )
677
17.4k
    {
678
2.84k
    case 0: /* o */
679
2.84k
        rc |= (flags & X86_EFLAGS_OF);
680
2.84k
        break;
681
2.06k
    case 1: /* b/c/nae */
682
2.06k
        rc |= (flags & X86_EFLAGS_CF);
683
2.06k
        break;
684
2.04k
    case 2: /* z/e */
685
2.04k
        rc |= (flags & X86_EFLAGS_ZF);
686
2.04k
        break;
687
2.15k
    case 3: /* be/na */
688
2.15k
        rc |= (flags & (X86_EFLAGS_CF | X86_EFLAGS_ZF));
689
2.15k
        break;
690
2.42k
    case 4: /* s */
691
2.42k
        rc |= (flags & X86_EFLAGS_SF);
692
2.42k
        break;
693
1.70k
    case 5: /* p/pe */
694
1.70k
        rc |= (flags & X86_EFLAGS_PF);
695
1.70k
        break;
696
2.27k
    case 7: /* le/ng */
697
2.27k
        rc |= (flags & X86_EFLAGS_ZF);
698
        /* fall through */
699
4.17k
    case 6: /* l/nge */
700
4.17k
        rc |= (!(flags & X86_EFLAGS_SF) != !(flags & X86_EFLAGS_OF));
701
4.17k
        break;
702
17.4k
    }
703
704
    /* Odd condition identifiers (lsb == 1) have inverted sense. */
705
17.4k
    return (!!rc ^ (condition & 1));
706
17.4k
}
707
708
int x86emul_get_cpl(struct x86_emulate_ctxt *ctxt,
709
                    const struct x86_emulate_ops *ops)
710
621k
{
711
621k
    struct segment_register reg;
712
713
621k
    if ( ctxt->regs->eflags & X86_EFLAGS_VM )
714
82.9k
        return 3;
715
716
538k
    if ( (ops->read_segment == NULL) ||
717
538k
         ops->read_segment(x86_seg_ss, &reg, ctxt) )
718
90.0k
        return -1;
719
720
448k
    return reg.dpl;
721
538k
}
722
723
static int
724
_mode_iopl(
725
    struct x86_emulate_ctxt *ctxt,
726
    const struct x86_emulate_ops  *ops)
727
19.8k
{
728
19.8k
    int cpl = x86emul_get_cpl(ctxt, ops);
729
19.8k
    if ( cpl == -1 )
730
15
        return -1;
731
19.8k
    return cpl <= MASK_EXTR(ctxt->regs->eflags, X86_EFLAGS_IOPL);
732
19.8k
}
733
734
19.8k
#define mode_iopl() ({                          \
735
19.8k
    int _iopl = _mode_iopl(ctxt, ops);          \
736
19.8k
    fail_if(_iopl < 0);                         \
737
19.8k
    _iopl;                                      \
738
19.8k
})
739
#define mode_vif() ({                                        \
740
    cr4 = 0;                                                 \
741
    if ( ops->read_cr && x86emul_get_cpl(ctxt, ops) == 3 )   \
742
    {                                                        \
743
        rc = ops->read_cr(4, &cr4, ctxt);                    \
744
        if ( rc != X86EMUL_OKAY ) goto done;                 \
745
    }                                                        \
746
    !!(cr4 & (_regs.eflags & X86_EFLAGS_VM ? X86_CR4_VME : X86_CR4_PVI)); \
747
})
748
749
static int ioport_access_check(
750
    unsigned int first_port,
751
    unsigned int bytes,
752
    struct x86_emulate_ctxt *ctxt,
753
    const struct x86_emulate_ops *ops)
754
15.1k
{
755
15.1k
    unsigned long iobmp;
756
15.1k
    struct segment_register tr;
757
15.1k
    int rc = X86EMUL_OKAY;
758
759
15.1k
    if ( !(ctxt->regs->eflags & X86_EFLAGS_VM) && mode_iopl() )
760
14.5k
        return X86EMUL_OKAY;
761
762
588
    fail_if(ops->read_segment == NULL);
763
    /*
764
     * X86EMUL_DONE coming back here may be used to defer the port
765
     * permission check to the respective ioport hook.
766
     */
767
586
    if ( (rc = ops->read_segment(x86_seg_tr, &tr, ctxt)) != 0 )
768
0
        return rc == X86EMUL_DONE ? X86EMUL_OKAY : rc;
769
770
    /* Ensure the TSS has an io-bitmap-offset field. */
771
586
    generate_exception_if(tr.type != 0xb, X86_EXC_GP, 0);
772
773
561
    switch ( rc = read_ulong(x86_seg_tr, 0x66, &iobmp, 2, ctxt, ops) )
774
561
    {
775
538
    case X86EMUL_OKAY:
776
538
        break;
777
778
11
    case X86EMUL_EXCEPTION:
779
11
        generate_exception_if(!ctxt->event_pending, X86_EXC_GP, 0);
780
        /* fallthrough */
781
782
2
    default:
783
2
        return rc;
784
561
    }
785
786
    /* Read two bytes including byte containing first port. */
787
538
    switch ( rc = read_ulong(x86_seg_tr, iobmp + first_port / 8,
788
538
                             &iobmp, 2, ctxt, ops) )
789
538
    {
790
515
    case X86EMUL_OKAY:
791
515
        break;
792
793
22
    case X86EMUL_EXCEPTION:
794
22
        generate_exception_if(!ctxt->event_pending, X86_EXC_GP, 0);
795
        /* fallthrough */
796
797
2
    default:
798
2
        return rc;
799
538
    }
800
801
515
    generate_exception_if(iobmp & (((1 << bytes) - 1) << (first_port & 7)),
802
515
                          X86_EXC_GP, 0);
803
804
584
 done:
805
584
    return rc;
806
515
}
807
808
static int
809
realmode_load_seg(
810
    enum x86_segment seg,
811
    uint16_t sel,
812
    struct segment_register *sreg,
813
    struct x86_emulate_ctxt *ctxt,
814
    const struct x86_emulate_ops *ops)
815
3.13k
{
816
3.13k
    int rc;
817
818
3.13k
    if ( !ops->read_segment )
819
1
        return X86EMUL_UNHANDLEABLE;
820
821
3.12k
    if ( (rc = ops->read_segment(seg, sreg, ctxt)) == X86EMUL_OKAY )
822
3.12k
    {
823
3.12k
        sreg->sel  = sel;
824
3.12k
        sreg->base = (uint32_t)sel << 4;
825
3.12k
    }
826
827
3.12k
    return rc;
828
3.13k
}
829
830
/*
831
 * Passing in x86_seg_none means
832
 * - suppress any exceptions other than #PF,
833
 * - don't commit any state.
834
 */
835
static int
836
protmode_load_seg(
837
    enum x86_segment seg,
838
    uint16_t sel, bool is_ret,
839
    struct segment_register *sreg,
840
    struct x86_emulate_ctxt *ctxt,
841
    const struct x86_emulate_ops *ops)
842
7.07k
{
843
7.07k
    const struct cpu_policy *cp = ctxt->cpu_policy;
844
7.07k
    enum x86_segment sel_seg = (sel & 4) ? x86_seg_ldtr : x86_seg_gdtr;
845
7.07k
    struct { uint32_t a, b; } desc, desc_hi = {};
846
7.07k
    uint8_t dpl, rpl;
847
7.07k
    int cpl = x86emul_get_cpl(ctxt, ops);
848
7.07k
    uint32_t a_flag = 0x100;
849
7.07k
    int rc, fault_type = X86_EXC_GP;
850
851
7.07k
    if ( cpl < 0 )
852
5
        return X86EMUL_UNHANDLEABLE;
853
854
    /* NULL selector? */
855
7.07k
    if ( (sel & 0xfffc) == 0 )
856
1.49k
    {
857
1.49k
        switch ( seg )
858
1.49k
        {
859
68
        case x86_seg_ss:
860
68
            if ( mode_64bit() && (cpl != 3) && (cpl == sel) )
861
1.46k
        default:
862
1.46k
                break;
863
            /* fall through */
864
35
        case x86_seg_cs:
865
36
        case x86_seg_tr:
866
36
            goto raise_exn;
867
1.49k
        }
868
1.46k
        if ( seg == x86_seg_none || !_amd_like(cp) || vcpu_has_nscb() ||
869
1.46k
             !ops->read_segment ||
870
1.46k
             ops->read_segment(seg, sreg, ctxt) != X86EMUL_OKAY )
871
535
            memset(sreg, 0, sizeof(*sreg));
872
927
        else
873
927
            sreg->attr = 0;
874
1.46k
        sreg->sel = sel;
875
876
        /* Since CPL == SS.DPL, we need to put back DPL. */
877
1.46k
        if ( seg == x86_seg_ss )
878
64
            sreg->dpl = sel;
879
880
1.46k
        return X86EMUL_OKAY;
881
1.49k
    }
882
883
    /* System segment descriptors must reside in the GDT. */
884
5.57k
    if ( is_x86_system_segment(seg) && (sel & 4) )
885
1
        goto raise_exn;
886
887
5.57k
    switch ( rc = ops->read(sel_seg, sel & 0xfff8, &desc, sizeof(desc), ctxt) )
888
5.57k
    {
889
5.49k
    case X86EMUL_OKAY:
890
5.49k
        break;
891
892
78
    case X86EMUL_EXCEPTION:
893
78
        if ( !ctxt->event_pending )
894
64
            goto raise_exn;
895
        /* fallthrough */
896
897
16
    default:
898
16
        return rc;
899
5.57k
    }
900
901
    /* System segments must have S flag == 0. */
902
5.49k
    if ( is_x86_system_segment(seg) && (desc.b & (1u << 12)) )
903
1
        goto raise_exn;
904
    /* User segments must have S flag == 1. */
905
5.49k
    if ( is_x86_user_segment(seg) && !(desc.b & (1u << 12)) )
906
28
        goto raise_exn;
907
908
5.46k
    dpl = (desc.b >> 13) & 3;
909
5.46k
    rpl = sel & 3;
910
911
5.46k
    switch ( seg )
912
5.46k
    {
913
484
    case x86_seg_cs:
914
        /* Code segment? */
915
484
        if ( !(desc.b & (1u<<11)) )
916
5
            goto raise_exn;
917
479
        if ( is_ret
918
479
             ? /*
919
                * Really rpl < cpl, but our sole caller doesn't handle
920
                * privilege level changes.
921
                */
922
223
               rpl != cpl || (desc.b & (1 << 10) ? dpl > rpl : dpl != rpl)
923
479
             : desc.b & (1 << 10)
924
               /* Conforming segment: check DPL against CPL. */
925
256
               ? dpl > cpl
926
               /* Non-conforming segment: check RPL and DPL against CPL. */
927
256
               : rpl > cpl || dpl != cpl )
928
6
            goto raise_exn;
929
        /*
930
         * 64-bit code segments (L bit set) must have D bit clear.
931
         * Experimentally in long mode, the L and D bits are checked before
932
         * the Present bit.
933
         */
934
473
        if ( ctxt->lma && (desc.b & (1 << 21)) && (desc.b & (1 << 22)) )
935
14
            goto raise_exn;
936
459
        sel = (sel ^ rpl) | cpl;
937
459
        break;
938
72
    case x86_seg_ss:
939
        /* Writable data segment? */
940
72
        if ( (desc.b & (5u<<9)) != (1u<<9) )
941
1
            goto raise_exn;
942
71
        if ( (dpl != cpl) || (dpl != rpl) )
943
2
            goto raise_exn;
944
69
        break;
945
116
    case x86_seg_ldtr:
946
        /* LDT system segment? */
947
116
        if ( (desc.b & (15u<<8)) != (2u<<8) )
948
2
            goto raise_exn;
949
114
        a_flag = 0;
950
114
        break;
951
55
    case x86_seg_tr:
952
        /* Available TSS system segment? */
953
55
        if ( (desc.b & (15u<<8)) != (9u<<8) )
954
4
            goto raise_exn;
955
51
        a_flag = 0x200; /* busy flag */
956
51
        break;
957
161
    default:
958
        /* Readable code or data segment? */
959
161
        if ( (desc.b & (5u<<9)) == (4u<<9) )
960
1
            goto raise_exn;
961
        /* Non-conforming segment: check DPL against RPL and CPL. */
962
160
        if ( ((desc.b & (6u<<9)) != (6u<<9)) &&
963
160
             ((dpl < cpl) || (dpl < rpl)) )
964
2
            goto raise_exn;
965
158
        break;
966
4.57k
    case x86_seg_none:
967
        /* Non-conforming segment: check DPL against RPL and CPL. */
968
4.57k
        if ( ((desc.b & (0x1c << 8)) != (0x1c << 8)) &&
969
4.57k
             ((dpl < cpl) || (dpl < rpl)) )
970
770
            return X86EMUL_EXCEPTION;
971
3.80k
        a_flag = 0;
972
3.80k
        break;
973
5.46k
    }
974
975
    /* Segment present in memory? */
976
4.65k
    if ( !(desc.b & (1 << 15)) && seg != x86_seg_none )
977
9
    {
978
9
        fault_type = seg != x86_seg_ss ? X86_EXC_NP : X86_EXC_SS;
979
9
        goto raise_exn;
980
9
    }
981
982
4.64k
    if ( !is_x86_user_segment(seg) )
983
3.97k
    {
984
        /*
985
         * Whether to use an 8- or 16-byte descriptor in long mode depends
986
         * on sub-mode, descriptor type, and vendor:
987
         * - non-system descriptors are always 8-byte ones,
988
         * - system descriptors are always 16-byte ones in 64-bit mode,
989
         * - (call) gates are always 16-byte ones,
990
         * - other system descriptors in compatibility mode have
991
         *   - only their low 8-byte bytes read on Intel,
992
         *   - all 16 bytes read with the high 8 bytes ignored on AMD.
993
         */
994
3.97k
        bool wide = desc.b & 0x1000
995
3.97k
                    ? false : (desc.b & 0xf00) != 0xc00 && !_amd_like(cp)
996
2.82k
                               ? mode_64bit() : ctxt->lma;
997
998
3.97k
        if ( wide )
999
617
        {
1000
617
            switch ( rc = ops->read(sel_seg, (sel & 0xfff8) + 8,
1001
617
                                    &desc_hi, sizeof(desc_hi), ctxt) )
1002
617
            {
1003
602
            case X86EMUL_OKAY:
1004
602
                break;
1005
1006
14
            case X86EMUL_EXCEPTION:
1007
14
                if ( !ctxt->event_pending )
1008
13
                    goto raise_exn;
1009
                /* fall through */
1010
2
            default:
1011
2
                return rc;
1012
617
            }
1013
602
            if ( !mode_64bit() && _amd_like(cp) && (desc.b & 0xf00) != 0xc00 )
1014
67
                desc_hi.b = desc_hi.a = 0;
1015
602
            if ( (desc_hi.b & 0x00001f00) ||
1016
602
                 (seg != x86_seg_none &&
1017
285
                  !is_canonical_address((uint64_t)desc_hi.a << 32)) )
1018
337
                goto raise_exn;
1019
602
        }
1020
3.97k
    }
1021
1022
    /* Ensure Accessed flag is set. */
1023
4.29k
    if ( a_flag && !(desc.b & a_flag) )
1024
226
    {
1025
226
        uint32_t new_desc_b = desc.b | a_flag;
1026
1027
226
        fail_if(!ops->cmpxchg);
1028
225
        switch ( (rc = ops->cmpxchg(sel_seg, (sel & 0xfff8) + 4, &desc.b,
1029
225
                                    &new_desc_b, sizeof(desc.b), true, ctxt)) )
1030
225
        {
1031
210
        case X86EMUL_OKAY:
1032
210
            break;
1033
1034
14
        case X86EMUL_EXCEPTION:
1035
14
            if ( !ctxt->event_pending )
1036
0
                goto raise_exn;
1037
            /* fallthrough */
1038
1039
15
        default:
1040
15
            return rc;
1041
1042
0
        case X86EMUL_CMPXCHG_FAILED:
1043
0
            return X86EMUL_RETRY;
1044
225
        }
1045
1046
        /* Force the Accessed flag in our local copy. */
1047
210
        desc.b = new_desc_b;
1048
210
    }
1049
1050
4.28k
    sreg->base = (((uint64_t)desc_hi.a << 32) |
1051
4.28k
                  ((desc.b <<  0) & 0xff000000u) |
1052
4.28k
                  ((desc.b << 16) & 0x00ff0000u) |
1053
4.28k
                  ((desc.a >> 16) & 0x0000ffffu));
1054
4.28k
    sreg->attr = (((desc.b >>  8) & 0x00ffu) |
1055
4.28k
                  ((desc.b >> 12) & 0x0f00u));
1056
4.28k
    sreg->limit = (desc.b & 0x000f0000u) | (desc.a & 0x0000ffffu);
1057
4.28k
    if ( sreg->g )
1058
551
        sreg->limit = (sreg->limit << 12) | 0xfffu;
1059
4.28k
    sreg->sel = sel;
1060
4.28k
    return X86EMUL_OKAY;
1061
1062
526
 raise_exn:
1063
526
    generate_exception_if(seg != x86_seg_none, fault_type, sel & 0xfffc);
1064
371
    rc = X86EMUL_EXCEPTION;
1065
527
 done:
1066
527
    return rc;
1067
371
}
1068
1069
static int
1070
load_seg(
1071
    enum x86_segment seg,
1072
    uint16_t sel, bool is_ret,
1073
    struct segment_register *sreg,
1074
    struct x86_emulate_ctxt *ctxt,
1075
    const struct x86_emulate_ops *ops)
1076
5.05k
{
1077
5.05k
    struct segment_register reg;
1078
5.05k
    int rc;
1079
1080
5.05k
    if ( !ops->write_segment )
1081
5
        return X86EMUL_UNHANDLEABLE;
1082
1083
5.04k
    if ( !sreg )
1084
3.22k
        sreg = &reg;
1085
1086
5.04k
    if ( in_protmode(ctxt, ops) )
1087
1.91k
        rc = protmode_load_seg(seg, sel, is_ret, sreg, ctxt, ops);
1088
3.13k
    else
1089
3.13k
        rc = realmode_load_seg(seg, sel, sreg, ctxt, ops);
1090
1091
5.04k
    if ( !rc && sreg == &reg )
1092
3.13k
        rc = ops->write_segment(seg, sreg, ctxt);
1093
1094
5.04k
    return rc;
1095
5.05k
}
1096
1097
/* Map GPRs by ModRM encoding to their offset within struct cpu_user_regs. */
1098
const uint8_t cpu_user_regs_gpr_offsets[] = {
1099
    offsetof(struct cpu_user_regs, r(ax)),
1100
    offsetof(struct cpu_user_regs, r(cx)),
1101
    offsetof(struct cpu_user_regs, r(dx)),
1102
    offsetof(struct cpu_user_regs, r(bx)),
1103
    offsetof(struct cpu_user_regs, r(sp)),
1104
    offsetof(struct cpu_user_regs, r(bp)),
1105
    offsetof(struct cpu_user_regs, r(si)),
1106
    offsetof(struct cpu_user_regs, r(di)),
1107
#ifdef __x86_64__
1108
    offsetof(struct cpu_user_regs, r8),
1109
    offsetof(struct cpu_user_regs, r9),
1110
    offsetof(struct cpu_user_regs, r10),
1111
    offsetof(struct cpu_user_regs, r11),
1112
    offsetof(struct cpu_user_regs, r12),
1113
    offsetof(struct cpu_user_regs, r13),
1114
    offsetof(struct cpu_user_regs, r14),
1115
    offsetof(struct cpu_user_regs, r15),
1116
#endif
1117
};
1118
1119
static void *_decode_gpr(
1120
    struct cpu_user_regs *regs, unsigned int modrm_reg, bool legacy)
1121
205k
{
1122
205k
    static const uint8_t byte_reg_offsets[] = {
1123
205k
        offsetof(struct cpu_user_regs, al),
1124
205k
        offsetof(struct cpu_user_regs, cl),
1125
205k
        offsetof(struct cpu_user_regs, dl),
1126
205k
        offsetof(struct cpu_user_regs, bl),
1127
205k
        offsetof(struct cpu_user_regs, ah),
1128
205k
        offsetof(struct cpu_user_regs, ch),
1129
205k
        offsetof(struct cpu_user_regs, dh),
1130
205k
        offsetof(struct cpu_user_regs, bh),
1131
205k
    };
1132
1133
205k
    if ( !legacy )
1134
128k
        return decode_gpr(regs, modrm_reg);
1135
1136
    /* Check that the array is a power of two. */
1137
77.5k
    BUILD_BUG_ON(ARRAY_SIZE(byte_reg_offsets) &
1138
77.5k
                 (ARRAY_SIZE(byte_reg_offsets) - 1));
1139
1140
77.5k
    ASSERT(modrm_reg < ARRAY_SIZE(byte_reg_offsets));
1141
1142
    /* Note that this also acts as array_access_nospec() stand-in. */
1143
77.5k
    modrm_reg &= ARRAY_SIZE(byte_reg_offsets) - 1;
1144
1145
77.5k
    return (void *)regs + byte_reg_offsets[modrm_reg];
1146
77.5k
}
1147
1148
static unsigned long *decode_vex_gpr(
1149
    unsigned int vex_reg, struct cpu_user_regs *regs,
1150
    const struct x86_emulate_ctxt *ctxt)
1151
1.36k
{
1152
1.36k
    return decode_gpr(regs, ~vex_reg & (mode_64bit() ? 0xf : 7));
1153
1.36k
}
1154
1155
9
#define avx512_vlen_check(lig) do { \
1156
9
    switch ( evex.lr ) \
1157
9
    { \
1158
2
    default: \
1159
2
        generate_exception(X86_EXC_UD); \
1160
2
    case 2: \
1161
2
        break; \
1162
5
    case 0: case 1: \
1163
5
        if ( !(lig) ) \
1164
5
            host_and_vcpu_must_have(avx512vl); \
1165
5
        break; \
1166
9
    } \
1167
9
} while ( false )
1168
1169
static bool is_branch_step(struct x86_emulate_ctxt *ctxt,
1170
                           const struct x86_emulate_ops *ops)
1171
164k
{
1172
164k
    uint64_t debugctl;
1173
164k
    int rc = X86EMUL_UNHANDLEABLE;
1174
1175
164k
    if ( !ops->read_msr ||
1176
164k
         (rc = ops->read_msr(MSR_IA32_DEBUGCTLMSR, &debugctl,
1177
107k
                             ctxt)) != X86EMUL_OKAY )
1178
56.8k
    {
1179
56.8k
        if ( rc == X86EMUL_EXCEPTION )
1180
0
            x86_emul_reset_event(ctxt);
1181
56.8k
        debugctl = 0;
1182
56.8k
    }
1183
1184
164k
    return debugctl & IA32_DEBUGCTLMSR_BTF;
1185
164k
}
1186
1187
static void adjust_bnd(struct x86_emulate_ctxt *ctxt,
1188
                       const struct x86_emulate_ops *ops, enum vex_pfx pfx)
1189
14.6k
{
1190
14.6k
    uint64_t xcr0, bndcfg;
1191
14.6k
    int rc;
1192
1193
14.6k
    if ( pfx == vex_f2 || !cpu_has_mpx || !vcpu_has_mpx() )
1194
14.6k
        return;
1195
1196
0
    if ( !ops->read_xcr || ops->read_xcr(0, &xcr0, ctxt) != X86EMUL_OKAY ||
1197
0
         !(xcr0 & X86_XCR0_BNDREGS) || !(xcr0 & X86_XCR0_BNDCSR) )
1198
0
    {
1199
0
        ASSERT(!ctxt->event_pending);
1200
0
        return;
1201
0
    }
1202
1203
0
    if ( !mode_ring0() )
1204
0
        bndcfg = read_bndcfgu();
1205
0
    else if ( !ops->read_msr ||
1206
0
              (rc = ops->read_msr(MSR_IA32_BNDCFGS, &bndcfg,
1207
0
                                  ctxt)) != X86EMUL_OKAY )
1208
0
    {
1209
0
        if ( rc == X86EMUL_EXCEPTION )
1210
0
            x86_emul_reset_event(ctxt);
1211
0
        return;
1212
0
    }
1213
0
    if ( (bndcfg & IA32_BNDCFGS_ENABLE) && !(bndcfg & IA32_BNDCFGS_PRESERVE) )
1214
0
    {
1215
        /*
1216
         * Using BNDMK or any other MPX instruction here is pointless, as
1217
         * we run with MPX disabled ourselves, and hence they're all no-ops.
1218
         * Therefore we have two ways to clear BNDn: Enable MPX temporarily
1219
         * (in which case executing any suitable non-prefixed branch
1220
         * instruction would do), or use XRSTOR.
1221
         */
1222
0
        xstate_set_init(X86_XCR0_BNDREGS);
1223
0
    }
1224
0
 done:;
1225
0
}
1226
1227
int cf_check x86emul_unhandleable_rw(
1228
    enum x86_segment seg,
1229
    unsigned long offset,
1230
    void *p_data,
1231
    unsigned int bytes,
1232
    struct x86_emulate_ctxt *ctxt)
1233
0
{
1234
0
    return X86EMUL_UNHANDLEABLE;
1235
0
}
1236
1237
/* Helper definitions. */
1238
711k
#define op_bytes (state->op_bytes)
1239
39.2k
#define ad_bytes (state->ad_bytes)
1240
67.6k
#define ext (state->ext)
1241
90.4k
#define modrm (state->modrm)
1242
#define modrm_mod (state->modrm_mod)
1243
205k
#define modrm_reg (state->modrm_reg)
1244
131k
#define modrm_rm (state->modrm_rm)
1245
385k
#define rex_prefix (state->rex_prefix)
1246
102k
#define lock_prefix (state->lock_prefix)
1247
428k
#define vex (state->vex)
1248
1.01M
#define evex (state->evex)
1249
82.8k
#define evex_encoded() (evex.mbs)
1250
1.95M
#define ea (state->ea)
1251
1252
/* Undo DEBUG wrapper. */
1253
#undef x86_emulate
1254
1255
int
1256
x86_emulate(
1257
    struct x86_emulate_ctxt *ctxt,
1258
    const struct x86_emulate_ops *ops)
1259
613k
{
1260
    /* Shadow copy of register state. Committed on successful emulation. */
1261
613k
    struct cpu_user_regs _regs = *ctxt->regs;
1262
613k
    const struct cpu_policy *__maybe_unused cp = ctxt->cpu_policy;
1263
613k
    struct x86_emulate_state state;
1264
613k
    int rc;
1265
613k
    uint8_t b, d, *opc = NULL;
1266
613k
    unsigned int first_byte = 0, elem_bytes, insn_bytes = 0;
1267
613k
    uint64_t op_mask = ~0ULL;
1268
613k
    bool singlestep = (_regs.eflags & X86_EFLAGS_TF) &&
1269
613k
      !is_branch_step(ctxt, ops);
1270
613k
    bool sfence = false, fault_suppression = false;
1271
613k
    struct operand src = { .reg = PTR_POISON };
1272
613k
    struct operand dst = { .reg = PTR_POISON };
1273
613k
    unsigned long cr4;
1274
613k
    enum x86_emulate_fpu_type fpu_type = X86EMUL_FPU_none;
1275
613k
    struct x86_emulate_stub stub = {};
1276
613k
    DECLARE_ALIGNED(mmval_t, mmval);
1277
613k
    struct stub_exn stub_exn = {};
1278
1279
613k
    ASSERT(ops->read);
1280
1281
613k
    init_context(ctxt);
1282
1283
613k
    generate_exception_if((mode_vif() &&
1284
613k
                           (_regs.eflags & X86_EFLAGS_VIF) &&
1285
613k
                           (_regs.eflags & X86_EFLAGS_VIP)),
1286
613k
                          X86_EXC_GP, 0);
1287
1288
613k
    rc = x86emul_decode(&state, ctxt, ops);
1289
613k
    if ( rc != X86EMUL_OKAY )
1290
10.3k
        return rc;
1291
1292
    /* Sync rIP to post decode value. */
1293
603k
    _regs.r(ip) = state.ip;
1294
1295
603k
    if ( ops->validate )
1296
0
    {
1297
0
#ifndef NDEBUG
1298
0
        state.caller = __builtin_return_address(0);
1299
0
#endif
1300
0
        rc = ops->validate(&state, ctxt);
1301
0
#ifndef NDEBUG
1302
0
        state.caller = NULL;
1303
0
#endif
1304
0
        if ( rc == X86EMUL_DONE )
1305
0
            goto complete_insn;
1306
0
        if ( rc != X86EMUL_OKAY )
1307
0
            return rc;
1308
0
    }
1309
1310
603k
    b = ctxt->opcode;
1311
603k
    d = state.desc;
1312
8.20M
#define state (&state)
1313
603k
    elem_bytes = 2 << (!state->fp16 + evex.w);
1314
1315
603k
    generate_exception_if(state->not_64bit && mode_64bit(), X86_EXC_UD);
1316
1317
603k
    if ( ea.type == OP_REG )
1318
131k
        ea.reg = _decode_gpr(&_regs, modrm_rm, (d & ByteOp) && !rex_prefix && !vex.opcx);
1319
1320
603k
    memset(mmvalp, 0xaa /* arbitrary */, sizeof(*mmvalp));
1321
1322
    /* Decode and fetch the source operand: register, memory or immediate. */
1323
603k
    switch ( d & SrcMask )
1324
603k
    {
1325
342k
    case SrcNone: /* case SrcImplicit: */
1326
342k
        src.type = OP_NONE;
1327
342k
        break;
1328
69.7k
    case SrcReg:
1329
69.7k
        src.type = OP_REG;
1330
69.7k
        if ( d & ByteOp )
1331
45.2k
        {
1332
45.2k
            src.reg = _decode_gpr(&_regs, modrm_reg, !rex_prefix && !vex.opcx);
1333
45.2k
            src.val = *(uint8_t *)src.reg;
1334
45.2k
            src.bytes = 1;
1335
45.2k
        }
1336
24.4k
        else
1337
24.4k
        {
1338
24.4k
            src.reg = decode_gpr(&_regs, modrm_reg);
1339
24.4k
            switch ( (src.bytes = op_bytes) )
1340
24.4k
            {
1341
10.4k
            case 2: src.val = *(uint16_t *)src.reg; break;
1342
9.39k
            case 4: src.val = *(uint32_t *)src.reg; break;
1343
3.11k
            case 8: src.val = *(uint64_t *)src.reg; break;
1344
24.4k
            }
1345
24.4k
        }
1346
69.7k
        break;
1347
69.7k
    case SrcMem16:
1348
18.8k
        ea.bytes = 2;
1349
18.8k
        goto srcmem_common;
1350
110k
    case SrcMem:
1351
110k
        if ( state->simd_size != simd_none )
1352
71.1k
            break;
1353
39.1k
        ea.bytes = (d & ByteOp) ? 1 : op_bytes;
1354
58.0k
    srcmem_common:
1355
58.0k
        src = ea;
1356
58.0k
        if ( src.type == OP_REG )
1357
27.3k
        {
1358
27.3k
            switch ( src.bytes )
1359
27.3k
            {
1360
1.82k
            case 1: src.val = *(uint8_t  *)src.reg; break;
1361
17.3k
            case 2: src.val = *(uint16_t *)src.reg; break;
1362
4.93k
            case 4: src.val = *(uint32_t *)src.reg; break;
1363
3.27k
            case 8: src.val = *(uint64_t *)src.reg; break;
1364
27.3k
            }
1365
27.3k
        }
1366
30.6k
        else if ( (rc = read_ulong(src.mem.seg, src.mem.off,
1367
30.6k
                                   &src.val, src.bytes, ctxt, ops)) )
1368
237
            goto done;
1369
57.7k
        break;
1370
57.7k
    case SrcImm:
1371
34.3k
        if ( !(d & ByteOp) )
1372
17.7k
            src.bytes = op_bytes != 8 ? op_bytes : 4;
1373
16.6k
        else
1374
16.6k
        {
1375
43.0k
    case SrcImmByte:
1376
43.0k
            src.bytes = 1;
1377
43.0k
        }
1378
60.7k
        src.type  = OP_IMM;
1379
60.7k
        src.val   = imm1;
1380
60.7k
        break;
1381
1.42k
    case SrcImm16:
1382
1.42k
        src.type  = OP_IMM;
1383
1.42k
        src.bytes = 2;
1384
1.42k
        src.val   = imm1;
1385
1.42k
        break;
1386
603k
    }
1387
1388
603k
#ifndef X86EMUL_NO_SIMD
1389
    /* With a memory operand, fetch the mask register in use (if any). */
1390
603k
    if ( ea.type == OP_MEM && evex.opmsk &&
1391
603k
         x86emul_get_fpu(fpu_type = X86EMUL_FPU_opmask,
1392
607
                         ctxt, ops) == X86EMUL_OKAY )
1393
0
    {
1394
0
        uint8_t *stb = get_stub(stub);
1395
1396
        /* KMOV{W,Q} %k<n>, (%rax) */
1397
0
        stb[0] = 0xc4;
1398
0
        stb[1] = 0xe1;
1399
0
        stb[2] = cpu_has_avx512bw ? 0xf8 : 0x78;
1400
0
        stb[3] = 0x91;
1401
0
        stb[4] = evex.opmsk << 3;
1402
0
        insn_bytes = 5;
1403
0
        stb[5] = 0xc3;
1404
1405
0
        invoke_stub("", "", "+m" (op_mask) : "a" (&op_mask));
1406
1407
0
        insn_bytes = 0;
1408
0
        put_stub(stub);
1409
1410
0
        fault_suppression = true;
1411
0
    }
1412
1413
603k
    if ( fpu_type == X86EMUL_FPU_opmask )
1414
607
    {
1415
        /* Squash (side) effects of the x86emul_get_fpu() above. */
1416
607
        x86_emul_reset_event(ctxt);
1417
607
        put_fpu(X86EMUL_FPU_opmask, false, state, ctxt, ops);
1418
607
        fpu_type = X86EMUL_FPU_none;
1419
607
    }
1420
603k
#endif /* !X86EMUL_NO_SIMD */
1421
1422
    /* Decode (but don't fetch) the destination operand: register or memory. */
1423
603k
    switch ( d & DstMask )
1424
603k
    {
1425
420k
    case DstNone: /* case DstImplicit: */
1426
        /*
1427
         * The only implicit-operands instructions allowed a LOCK prefix are
1428
         * CMPXCHG{8,16}B (MOV CRn is being handled elsewhere).
1429
         */
1430
420k
        generate_exception_if(lock_prefix &&
1431
420k
                              (vex.opcx || ext != ext_0f || b != 0xc7 ||
1432
420k
                               (modrm_reg & 7) != 1 || ea.type != OP_MEM),
1433
420k
                              X86_EXC_UD);
1434
420k
        dst.type = OP_NONE;
1435
420k
        break;
1436
1437
97.7k
    case DstReg:
1438
97.7k
        generate_exception_if(lock_prefix, X86_EXC_UD);
1439
97.7k
        dst.type = OP_REG;
1440
97.7k
        if ( d & ByteOp )
1441
26.7k
        {
1442
26.7k
            dst.reg = _decode_gpr(&_regs, modrm_reg, !rex_prefix && !vex.opcx);
1443
26.7k
            dst.val = *(uint8_t *)dst.reg;
1444
26.7k
            dst.bytes = 1;
1445
26.7k
        }
1446
70.9k
        else
1447
70.9k
        {
1448
70.9k
            dst.reg = decode_gpr(&_regs, modrm_reg);
1449
70.9k
            switch ( (dst.bytes = op_bytes) )
1450
70.9k
            {
1451
26.0k
            case 2: dst.val = *(uint16_t *)dst.reg; break;
1452
17.2k
            case 4: dst.val = *(uint32_t *)dst.reg; break;
1453
8.85k
            case 8: dst.val = *(uint64_t *)dst.reg; break;
1454
70.9k
            }
1455
70.9k
        }
1456
97.7k
        break;
1457
97.7k
    case DstBitBase:
1458
3.82k
        if ( ea.type == OP_MEM )
1459
2.19k
        {
1460
            /*
1461
             * Instructions such as bt can reference an arbitrary offset from
1462
             * their memory operand, but the instruction doing the actual
1463
             * emulation needs the appropriate op_bytes read from memory.
1464
             * Adjust both the source register and memory operand to make an
1465
             * equivalent instruction.
1466
             *
1467
             * EA       += BitOffset DIV op_bytes*8
1468
             * BitOffset = BitOffset MOD op_bytes*8
1469
             * DIV truncates towards negative infinity.
1470
             * MOD always produces a positive result.
1471
             */
1472
2.19k
            if ( op_bytes == 2 )
1473
1.02k
                src.val = (int16_t)src.val;
1474
1.16k
            else if ( op_bytes == 4 )
1475
786
                src.val = (int32_t)src.val;
1476
2.19k
            if ( (long)src.val < 0 )
1477
731
                ea.mem.off -=
1478
731
                    op_bytes + (((-src.val - 1) >> 3) & ~(op_bytes - 1L));
1479
1.46k
            else
1480
1.46k
                ea.mem.off += (src.val >> 3) & ~(op_bytes - 1L);
1481
2.19k
            ea.mem.off = truncate_ea(ea.mem.off);
1482
2.19k
        }
1483
1484
        /* Bit index always truncated to within range. */
1485
3.82k
        src.val &= (op_bytes << 3) - 1;
1486
1487
3.82k
        d = (d & ~DstMask) | DstMem;
1488
        /* Becomes a normal DstMem operation from here on. */
1489
3.82k
        fallthrough;
1490
84.5k
    case DstMem:
1491
84.5k
        generate_exception_if(ea.type == OP_MEM && evex.z, X86_EXC_UD);
1492
84.5k
        if ( state->simd_size != simd_none )
1493
10.1k
        {
1494
10.1k
            generate_exception_if(lock_prefix, X86_EXC_UD);
1495
10.1k
            break;
1496
10.1k
        }
1497
74.3k
        ea.bytes = (d & ByteOp) ? 1 : op_bytes;
1498
74.3k
        dst = ea;
1499
74.3k
        if ( dst.type == OP_REG )
1500
14.6k
        {
1501
14.6k
            generate_exception_if(lock_prefix, X86_EXC_UD);
1502
14.6k
            switch ( dst.bytes )
1503
14.6k
            {
1504
4.76k
            case 1: dst.val = *(uint8_t  *)dst.reg; break;
1505
3.67k
            case 2: dst.val = *(uint16_t *)dst.reg; break;
1506
4.03k
            case 4: dst.val = *(uint32_t *)dst.reg; break;
1507
2.20k
            case 8: dst.val = *(uint64_t *)dst.reg; break;
1508
14.6k
            }
1509
14.6k
        }
1510
59.6k
        else if ( d & Mov ) /* optimisation - avoid slow emulated read */
1511
7.43k
        {
1512
            /* Lock prefix is allowed only on RMW instructions. */
1513
7.43k
            generate_exception_if(lock_prefix, X86_EXC_UD);
1514
7.43k
            fail_if(!ops->write);
1515
7.43k
        }
1516
52.2k
        else if ( !ops->rmw )
1517
52.2k
        {
1518
52.2k
            fail_if(lock_prefix ? !ops->cmpxchg : !ops->write);
1519
52.1k
            if ( (rc = read_ulong(dst.mem.seg, dst.mem.off,
1520
52.1k
                                  &dst.val, dst.bytes, ctxt, ops)) )
1521
514
                goto done;
1522
51.6k
            dst.orig_val = dst.val;
1523
51.6k
        }
1524
73.7k
        break;
1525
603k
    }
1526
1527
602k
    switch ( ctxt->opcode )
1528
602k
    {
1529
0
        enum x86_segment seg;
1530
0
        struct segment_register cs, sreg;
1531
0
        struct cpuid_leaf leaf;
1532
0
        uint64_t msr_val;
1533
0
        unsigned int i, n;
1534
0
        unsigned long dummy;
1535
1536
40.6k
    case 0x00: case 0x01: add: /* add reg,mem */
1537
40.6k
        if ( ops->rmw && dst.type == OP_MEM )
1538
0
            state->rmw = rmw_add;
1539
40.6k
        else
1540
40.6k
        {
1541
45.3k
    case 0x02 ... 0x05: /* add */
1542
45.3k
            emulate_2op_SrcV("add", src, dst, _regs.eflags);
1543
45.3k
        }
1544
45.3k
        break;
1545
1546
45.3k
    case 0x08: case 0x09: or: /* or reg,mem */
1547
1.26k
        if ( ops->rmw && dst.type == OP_MEM )
1548
0
            state->rmw = rmw_or;
1549
1.26k
        else
1550
1.26k
        {
1551
4.45k
    case 0x0a ... 0x0d: /* or */
1552
4.45k
            emulate_2op_SrcV("or", src, dst, _regs.eflags);
1553
4.45k
        }
1554
4.45k
        break;
1555
1556
4.45k
    case 0x10: case 0x11: adc: /* adc reg,mem */
1557
1.12k
        if ( ops->rmw && dst.type == OP_MEM )
1558
0
            state->rmw = rmw_adc;
1559
1.12k
        else
1560
1.12k
        {
1561
3.71k
    case 0x12 ... 0x15: /* adc */
1562
3.71k
            emulate_2op_SrcV("adc", src, dst, _regs.eflags);
1563
3.71k
        }
1564
3.71k
        break;
1565
1566
3.71k
    case 0x18: case 0x19: sbb: /* sbb reg,mem */
1567
1.06k
        if ( ops->rmw && dst.type == OP_MEM )
1568
0
            state->rmw = rmw_sbb;
1569
1.06k
        else
1570
1.06k
        {
1571
4.01k
    case 0x1a ... 0x1d: /* sbb */
1572
4.01k
            emulate_2op_SrcV("sbb", src, dst, _regs.eflags);
1573
4.01k
        }
1574
4.01k
        break;
1575
1576
4.01k
    case 0x20: case 0x21: and: /* and reg,mem */
1577
1.43k
        if ( ops->rmw && dst.type == OP_MEM )
1578
0
            state->rmw = rmw_and;
1579
1.43k
        else
1580
1.43k
        {
1581
5.70k
    case 0x22 ... 0x25: /* and */
1582
5.70k
            emulate_2op_SrcV("and", src, dst, _regs.eflags);
1583
5.70k
        }
1584
5.70k
        break;
1585
1586
5.70k
    case 0x28: case 0x29: sub: /* sub reg,mem */
1587
1.57k
        if ( ops->rmw && dst.type == OP_MEM )
1588
0
            state->rmw = rmw_sub;
1589
1.57k
        else
1590
1.57k
        {
1591
5.10k
    case 0x2a ... 0x2d: /* sub */
1592
5.10k
            emulate_2op_SrcV("sub", src, dst, _regs.eflags);
1593
5.10k
        }
1594
5.10k
        break;
1595
1596
5.10k
    case 0x30: case 0x31: xor: /* xor reg,mem */
1597
1.66k
        if ( ops->rmw && dst.type == OP_MEM )
1598
0
            state->rmw = rmw_xor;
1599
1.66k
        else
1600
1.66k
        {
1601
8.68k
    case 0x32 ... 0x35: /* xor */
1602
8.68k
            emulate_2op_SrcV("xor", src, dst, _regs.eflags);
1603
8.68k
        }
1604
8.68k
        break;
1605
1606
8.68k
    case 0x38: case 0x39: cmp: /* cmp reg,mem */
1607
2.23k
        emulate_2op_SrcV("cmp", dst, src, _regs.eflags);
1608
2.23k
        dst.type = OP_NONE;
1609
2.23k
        break;
1610
1611
5.30k
    case 0x3a ... 0x3d: /* cmp */
1612
5.30k
        emulate_2op_SrcV("cmp", src, dst, _regs.eflags);
1613
5.30k
        dst.type = OP_NONE;
1614
5.30k
        break;
1615
1616
1.16k
    case 0x06: /* push %%es */
1617
1.91k
    case 0x0e: /* push %%cs */
1618
4.95k
    case 0x16: /* push %%ss */
1619
5.37k
    case 0x1e: /* push %%ds */
1620
5.57k
    case X86EMUL_OPC(0x0f, 0xa0): /* push %%fs */
1621
5.76k
    case X86EMUL_OPC(0x0f, 0xa8): /* push %%gs */
1622
5.76k
        fail_if(ops->read_segment == NULL);
1623
5.76k
        if ( (rc = ops->read_segment((b >> 3) & 7, &sreg,
1624
5.76k
                                     ctxt)) != X86EMUL_OKAY )
1625
0
            goto done;
1626
5.76k
        src.val = sreg.sel;
1627
5.76k
        goto push;
1628
1629
539
    case 0x07: /* pop %%es */
1630
924
    case 0x17: /* pop %%ss */
1631
1.24k
    case 0x1f: /* pop %%ds */
1632
1.32k
    case X86EMUL_OPC(0x0f, 0xa1): /* pop %%fs */
1633
1.74k
    case X86EMUL_OPC(0x0f, 0xa9): /* pop %%gs */
1634
1.74k
        fail_if(ops->write_segment == NULL);
1635
        /* 64-bit mode: POP defaults to a 64-bit operand. */
1636
1.73k
        if ( mode_64bit() && (op_bytes == 4) )
1637
257
            op_bytes = 8;
1638
1.73k
        seg = (b >> 3) & 7;
1639
1.73k
        if ( (rc = read_ulong(x86_seg_ss, sp_post_inc(op_bytes), &dst.val,
1640
1.73k
                              op_bytes, ctxt, ops)) != X86EMUL_OKAY ||
1641
1.73k
             (rc = load_seg(seg, dst.val, 0, NULL, ctxt, ops)) != X86EMUL_OKAY )
1642
88
            goto done;
1643
1.64k
        if ( seg == x86_seg_ss )
1644
366
            ctxt->retire.mov_ss = true;
1645
1.64k
        break;
1646
1647
5.95k
    case 0x27: /* daa */
1648
19.7k
    case 0x2f: /* das */ {
1649
19.7k
        uint8_t al = _regs.al;
1650
19.7k
        unsigned int eflags = _regs.eflags;
1651
1652
19.7k
        _regs.eflags &= ~(X86_EFLAGS_CF | X86_EFLAGS_AF | X86_EFLAGS_SF |
1653
19.7k
                          X86_EFLAGS_ZF | X86_EFLAGS_PF);
1654
19.7k
        if ( ((al & 0x0f) > 9) || (eflags & X86_EFLAGS_AF) )
1655
8.61k
        {
1656
8.61k
            _regs.eflags |= X86_EFLAGS_AF;
1657
8.61k
            if ( b == 0x2f && (al < 6 || (eflags & X86_EFLAGS_CF)) )
1658
3.64k
                _regs.eflags |= X86_EFLAGS_CF;
1659
8.61k
            _regs.al += (b == 0x27) ? 6 : -6;
1660
8.61k
        }
1661
19.7k
        if ( (al > 0x99) || (eflags & X86_EFLAGS_CF) )
1662
8.49k
        {
1663
8.49k
            _regs.al += (b == 0x27) ? 0x60 : -0x60;
1664
8.49k
            _regs.eflags |= X86_EFLAGS_CF;
1665
8.49k
        }
1666
19.7k
        _regs.eflags |= !_regs.al ? X86_EFLAGS_ZF : 0;
1667
19.7k
        _regs.eflags |= ((int8_t)_regs.al < 0) ? X86_EFLAGS_SF : 0;
1668
19.7k
        _regs.eflags |= even_parity(_regs.al) ? X86_EFLAGS_PF : 0;
1669
19.7k
        break;
1670
5.95k
    }
1671
1672
1.07k
    case 0x37: /* aaa */
1673
3.25k
    case 0x3f: /* aas */
1674
3.25k
        _regs.eflags &= ~X86_EFLAGS_CF;
1675
3.25k
        if ( (_regs.al > 9) || (_regs.eflags & X86_EFLAGS_AF) )
1676
2.02k
        {
1677
2.02k
            _regs.al += (b == 0x37) ? 6 : -6;
1678
2.02k
            _regs.ah += (b == 0x37) ? 1 : -1;
1679
2.02k
            _regs.eflags |= X86_EFLAGS_CF | X86_EFLAGS_AF;
1680
2.02k
        }
1681
3.25k
        _regs.al &= 0x0f;
1682
3.25k
        break;
1683
1684
107k
    case 0x40 ... 0x4f: /* inc/dec reg */
1685
107k
        dst.type  = OP_REG;
1686
107k
        dst.reg   = decode_gpr(&_regs, b & 7);
1687
107k
        dst.bytes = op_bytes;
1688
107k
        dst.val   = *dst.reg;
1689
107k
        if ( b & 8 )
1690
107k
            emulate_1op("dec", dst, _regs.eflags);
1691
32.7k
        else
1692
107k
            emulate_1op("inc", dst, _regs.eflags);
1693
107k
        break;
1694
1695
107k
    case 0x50 ... 0x57: /* push reg */
1696
25.6k
        src.val = *decode_gpr(&_regs, (b & 7) | ((rex_prefix & 1) << 3));
1697
25.6k
        goto push;
1698
1699
5.26k
    case 0x58 ... 0x5f: /* pop reg */
1700
5.26k
        dst.type  = OP_REG;
1701
5.26k
        dst.reg   = decode_gpr(&_regs, (b & 7) | ((rex_prefix & 1) << 3));
1702
5.26k
        dst.bytes = op_bytes;
1703
5.26k
        if ( mode_64bit() && (dst.bytes == 4) )
1704
706
            dst.bytes = 8;
1705
5.26k
        if ( (rc = read_ulong(x86_seg_ss, sp_post_inc(dst.bytes),
1706
5.26k
                              &dst.val, dst.bytes, ctxt, ops)) != 0 )
1707
106
            goto done;
1708
5.16k
        break;
1709
1710
5.16k
    case 0x60: /* pusha */
1711
434
        fail_if(!ops->write);
1712
433
        ea.val = _regs.esp;
1713
3.72k
        for ( i = 0; i < 8; i++ )
1714
3.32k
        {
1715
3.32k
            void *reg = decode_gpr(&_regs, i);
1716
1717
3.32k
            if ( (rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes),
1718
3.32k
                                  reg != &_regs.esp ? reg : &ea.val,
1719
3.32k
                                  op_bytes, ctxt)) != 0 )
1720
30
                goto done;
1721
3.32k
        }
1722
403
        break;
1723
1724
528
    case 0x61: /* popa */
1725
4.56k
        for ( i = 0; i < 8; i++ )
1726
4.07k
        {
1727
4.07k
            void *reg = decode_gpr(&_regs, 7 - i);
1728
1729
4.07k
            if ( (rc = read_ulong(x86_seg_ss, sp_post_inc(op_bytes),
1730
4.07k
                                  &dst.val, op_bytes, ctxt, ops)) != 0 )
1731
36
                goto done;
1732
4.03k
            if ( reg == &_regs.r(sp) )
1733
508
                continue;
1734
3.52k
            if ( op_bytes == 2 )
1735
3.33k
                *(uint16_t *)reg = dst.val;
1736
195
            else
1737
195
                *(unsigned long *)reg = dst.val;
1738
3.52k
        }
1739
492
        break;
1740
1741
837
    case 0x62: /* bound */ {
1742
837
        int lb, ub, idx;
1743
1744
837
        generate_exception_if(src.type != OP_MEM, X86_EXC_UD);
1745
835
        if ( (rc = read_ulong(src.mem.seg, truncate_ea(src.mem.off + op_bytes),
1746
835
                              &ea.val, op_bytes, ctxt, ops)) )
1747
3
            goto done;
1748
832
        ub  = (op_bytes == 2) ? (int16_t)ea.val   : (int32_t)ea.val;
1749
832
        lb  = (op_bytes == 2) ? (int16_t)src.val  : (int32_t)src.val;
1750
832
        idx = (op_bytes == 2) ? (int16_t)dst.val  : (int32_t)dst.val;
1751
832
        generate_exception_if((idx < lb) || (idx > ub), X86_EXC_BR);
1752
762
        dst.type = OP_NONE;
1753
762
        break;
1754
832
    }
1755
1756
1.41k
    case 0x63: /* movsxd (x86/64) / arpl (x86/32) */
1757
1.41k
        if ( mode_64bit() )
1758
634
        {
1759
            /* movsxd */
1760
634
            if ( ea.type == OP_REG )
1761
263
                src.val = *ea.reg;
1762
371
            else if ( (rc = read_ulong(ea.mem.seg, ea.mem.off, &src.val,
1763
371
                                       (op_bytes == 2 && !amd_like(ctxt)
1764
371
                                        ? 2 : 4),
1765
371
                                       ctxt, ops)) )
1766
16
                goto done;
1767
618
            dst.val = (int32_t)src.val;
1768
618
        }
1769
780
        else
1770
780
        {
1771
            /* arpl */
1772
780
            unsigned int src_rpl = dst.val & 3;
1773
1774
780
            generate_exception_if(!in_protmode(ctxt, ops), X86_EXC_UD);
1775
1776
778
            dst = ea;
1777
778
            dst.bytes = 2;
1778
778
            if ( dst.type == OP_REG )
1779
462
                dst.val = *dst.reg;
1780
316
            else if ( (rc = read_ulong(dst.mem.seg, dst.mem.off,
1781
316
                                       &dst.val, 2, ctxt, ops)) )
1782
16
                goto done;
1783
762
            if ( src_rpl > (dst.val & 3) )
1784
232
            {
1785
232
                _regs.eflags |= X86_EFLAGS_ZF;
1786
232
                dst.val = (dst.val & ~3) | src_rpl;
1787
232
            }
1788
530
            else
1789
530
            {
1790
530
                _regs.eflags &= ~X86_EFLAGS_ZF;
1791
530
                dst.type = OP_NONE;
1792
530
            }
1793
762
        }
1794
1.38k
        break;
1795
1796
1.38k
    case 0x68: /* push imm{16,32,64} */
1797
970
    case 0x6a: /* push imm8 */
1798
34.9k
    push:
1799
34.9k
        ASSERT(d & Mov); /* writeback needed */
1800
34.9k
        dst.type  = OP_MEM;
1801
34.9k
        dst.bytes = mode_64bit() && (op_bytes == 4) ? 8 : op_bytes;
1802
34.9k
        dst.val = src.val;
1803
34.9k
        dst.mem.seg = x86_seg_ss;
1804
34.9k
        dst.mem.off = sp_pre_dec(dst.bytes);
1805
34.9k
        break;
1806
1807
217
    case 0x69: /* imul imm16/32 */
1808
1.45k
    case 0x6b: /* imul imm8 */
1809
1.45k
        if ( ea.type == OP_REG )
1810
745
            dst.val = *ea.reg;
1811
711
        else if ( (rc = read_ulong(ea.mem.seg, ea.mem.off,
1812
711
                                   &dst.val, op_bytes, ctxt, ops)) )
1813
5
            goto done;
1814
1.45k
        goto imul;
1815
1816
6.25k
    case 0x6c ... 0x6d: /* ins %dx,%es:%edi */ {
1817
6.25k
        unsigned long nr_reps;
1818
6.25k
        unsigned int port = _regs.dx;
1819
1820
6.25k
        dst.bytes = !(b & 1) ? 1 : (op_bytes == 8) ? 4 : op_bytes;
1821
6.25k
        if ( (rc = ioport_access_check(port, dst.bytes, ctxt, ops)) != 0 )
1822
22
            goto done;
1823
6.23k
        nr_reps = get_rep_prefix(false, false /* don't extend RSI/RDI */);
1824
5.44k
        dst.mem.off = truncate_ea_and_reps(_regs.r(di), nr_reps, dst.bytes);
1825
5.44k
        dst.mem.seg = x86_seg_es;
1826
        /* Try the presumably most efficient approach first. */
1827
5.44k
        if ( !ops->rep_ins )
1828
2.26k
            nr_reps = 1;
1829
5.44k
        rc = X86EMUL_UNHANDLEABLE;
1830
5.44k
        if ( nr_reps == 1 && ops->read_io && ops->write )
1831
3.77k
        {
1832
3.77k
            rc = ops->read_io(port, dst.bytes, &dst.val, ctxt);
1833
3.77k
            if ( rc != X86EMUL_UNHANDLEABLE )
1834
3.57k
                nr_reps = 0;
1835
3.77k
        }
1836
5.44k
        if ( (nr_reps > 1 || rc == X86EMUL_UNHANDLEABLE) && ops->rep_ins )
1837
1.65k
            rc = ops->rep_ins(port, dst.mem.seg, dst.mem.off, dst.bytes,
1838
1.65k
                              &nr_reps, ctxt);
1839
5.44k
        if ( nr_reps >= 1 && rc == X86EMUL_UNHANDLEABLE )
1840
213
        {
1841
213
            fail_if(!ops->read_io || !ops->write);
1842
203
            if ( (rc = ops->read_io(port, dst.bytes, &dst.val, ctxt)) != 0 )
1843
1
                goto done;
1844
202
            nr_reps = 0;
1845
202
        }
1846
5.43k
        if ( !nr_reps && rc == X86EMUL_OKAY )
1847
3.59k
        {
1848
3.59k
            dst.type = OP_MEM;
1849
3.59k
            nr_reps = 1;
1850
3.59k
        }
1851
5.43k
        register_address_adjust(_regs.r(di), nr_reps * dst.bytes);
1852
5.43k
        put_rep_prefix(nr_reps);
1853
5.19k
        if ( rc != X86EMUL_OKAY )
1854
255
            goto done;
1855
4.93k
        break;
1856
5.19k
    }
1857
1858
7.11k
    case 0x6e ... 0x6f: /* outs %esi,%dx */ {
1859
7.11k
        unsigned long nr_reps;
1860
7.11k
        unsigned int port = _regs.dx;
1861
1862
7.11k
        dst.bytes = !(b & 1) ? 1 : (op_bytes == 8) ? 4 : op_bytes;
1863
7.11k
        if ( (rc = ioport_access_check(port, dst.bytes, ctxt, ops)) != 0 )
1864
37
            goto done;
1865
7.07k
        nr_reps = get_rep_prefix(false, false /* don't extend RSI/RDI */);
1866
6.25k
        ea.mem.off = truncate_ea_and_reps(_regs.r(si), nr_reps, dst.bytes);
1867
        /* Try the presumably most efficient approach first. */
1868
6.25k
        if ( !ops->rep_outs )
1869
868
            nr_reps = 1;
1870
6.25k
        rc = X86EMUL_UNHANDLEABLE;
1871
6.25k
        if ( nr_reps == 1 && ops->write_io )
1872
2.73k
        {
1873
2.73k
            rc = read_ulong(ea.mem.seg, ea.mem.off, &dst.val, dst.bytes,
1874
2.73k
                            ctxt, ops);
1875
2.73k
            if ( rc != X86EMUL_UNHANDLEABLE )
1876
1.78k
                nr_reps = 0;
1877
2.73k
        }
1878
6.25k
        if ( (nr_reps > 1 || rc == X86EMUL_UNHANDLEABLE) && ops->rep_outs )
1879
4.14k
            rc = ops->rep_outs(ea.mem.seg, ea.mem.off, port, dst.bytes,
1880
4.14k
                               &nr_reps, ctxt);
1881
6.25k
        if ( nr_reps >= 1 && rc == X86EMUL_UNHANDLEABLE )
1882
326
        {
1883
326
            if ( (rc = read_ulong(ea.mem.seg, ea.mem.off, &dst.val,
1884
326
                                  dst.bytes, ctxt, ops)) != X86EMUL_OKAY )
1885
27
                goto done;
1886
299
            fail_if(ops->write_io == NULL);
1887
296
            nr_reps = 0;
1888
296
        }
1889
6.22k
        if ( !nr_reps && rc == X86EMUL_OKAY )
1890
1.94k
        {
1891
1.94k
            if ( (rc = ops->write_io(port, dst.bytes, dst.val, ctxt)) != 0 )
1892
22
                goto done;
1893
1.91k
            nr_reps = 1;
1894
1.91k
        }
1895
6.20k
        register_address_adjust(_regs.r(si), nr_reps * dst.bytes);
1896
6.20k
        put_rep_prefix(nr_reps);
1897
5.96k
        if ( rc != X86EMUL_OKAY )
1898
216
            goto done;
1899
5.74k
        break;
1900
5.96k
    }
1901
1902
9.42k
    case 0x70 ... 0x7f: /* jcc (short) */
1903
9.42k
        if ( test_cc(b, _regs.eflags) )
1904
3.76k
            jmp_rel((int32_t)src.val);
1905
9.35k
        adjust_bnd(ctxt, ops, vex.pfx);
1906
9.35k
        break;
1907
1908
2.71k
    case 0x80: case 0x81: case 0x82: case 0x83: /* Grp1 */
1909
2.71k
        switch ( modrm_reg & 7 )
1910
2.71k
        {
1911
845
        case 0: goto add;
1912
289
        case 1: goto or;
1913
210
        case 2: goto adc;
1914
278
        case 3: goto sbb;
1915
87
        case 4: goto and;
1916
243
        case 5: goto sub;
1917
213
        case 6: goto xor;
1918
548
        case 7:
1919
548
            dst.val = imm1;
1920
548
            goto cmp;
1921
2.71k
        }
1922
0
        break;
1923
1924
445
    case 0xa8 ... 0xa9: /* test imm,%%eax */
1925
1.86k
    case 0x84 ... 0x85: test: /* test */
1926
1.86k
        emulate_2op_SrcV("test", src, dst, _regs.eflags);
1927
1.86k
        dst.type = OP_NONE;
1928
1.86k
        break;
1929
1930
3.34k
    case 0x86 ... 0x87: xchg: /* xchg */
1931
        /*
1932
         * The lock prefix is implied for this insn (and setting it for the
1933
         * register operands case here is benign to subsequent code).
1934
         */
1935
3.34k
        lock_prefix = 1;
1936
3.34k
        if ( ops->rmw && dst.type == OP_MEM )
1937
0
        {
1938
0
            state->rmw = rmw_xchg;
1939
0
            break;
1940
0
        }
1941
        /* Write back the register source. */
1942
3.34k
        switch ( dst.bytes )
1943
3.34k
        {
1944
257
        case 1: *(uint8_t  *)src.reg = (uint8_t)dst.val; break;
1945
1.00k
        case 2: *(uint16_t *)src.reg = (uint16_t)dst.val; break;
1946
1.44k
        case 4: *src.reg = (uint32_t)dst.val; break; /* 64b reg: zero-extend */
1947
645
        case 8: *src.reg = dst.val; break;
1948
3.34k
        }
1949
        /* Arrange for write back of the memory destination. */
1950
3.34k
        dst.val = src.val;
1951
3.34k
        break;
1952
1953
223
    case 0xc6: /* Grp11: mov / xabort */
1954
266
    case 0xc7: /* Grp11: mov / xbegin */
1955
266
        if ( modrm == 0xf8 && vcpu_has_rtm() )
1956
0
        {
1957
            /*
1958
             * xbegin unconditionally aborts, xabort is unconditionally
1959
             * a nop. It also does not truncate the destination address to
1960
             * 16 bits when 16-bit operand size is in effect.
1961
             */
1962
0
            if ( b & 1 )
1963
0
            {
1964
0
                op_bytes = 4;
1965
0
                jmp_rel((int32_t)src.val);
1966
0
                _regs.r(ax) = 0;
1967
0
            }
1968
0
            dst.type = OP_NONE;
1969
0
            break;
1970
0
        }
1971
266
        generate_exception_if((modrm_reg & 7) != 0, X86_EXC_UD);
1972
233
        fallthrough;
1973
1.52k
    case 0x88 ... 0x8b: /* mov */
1974
1.90k
    case 0xa0 ... 0xa1: /* mov mem.offs,{%al,%ax,%eax,%rax} */
1975
2.06k
    case 0xa2 ... 0xa3: /* mov {%al,%ax,%eax,%rax},mem.offs */
1976
2.06k
        dst.val = src.val;
1977
2.06k
        break;
1978
1979
245
    case 0x8c: /* mov Sreg,r/m */
1980
245
        seg = modrm_reg & 7; /* REX.R is ignored. */
1981
245
        generate_exception_if(!is_x86_user_segment(seg), X86_EXC_UD);
1982
1.46k
    store_selector:
1983
1.46k
        fail_if(ops->read_segment == NULL);
1984
1.46k
        if ( (rc = ops->read_segment(seg, &sreg, ctxt)) != 0 )
1985
0
            goto done;
1986
1.46k
        dst.val = sreg.sel;
1987
1.46k
        if ( dst.type == OP_MEM )
1988
1.21k
            dst.bytes = 2;
1989
1.46k
        break;
1990
1991
240
    case 0x8d: /* lea */
1992
240
        generate_exception_if(ea.type != OP_MEM, X86_EXC_UD);
1993
239
        dst.val = ea.mem.off;
1994
239
        break;
1995
1996
335
    case 0x8e: /* mov r/m,Sreg */
1997
335
        seg = modrm_reg & 7; /* REX.R is ignored. */
1998
335
        generate_exception_if(!is_x86_user_segment(seg) ||
1999
335
                              seg == x86_seg_cs, X86_EXC_UD);
2000
332
        if ( (rc = load_seg(seg, src.val, 0, NULL, ctxt, ops)) != 0 )
2001
11
            goto done;
2002
321
        if ( seg == x86_seg_ss )
2003
63
            ctxt->retire.mov_ss = true;
2004
321
        dst.type = OP_NONE;
2005
321
        break;
2006
2007
877
    case 0x8f: /* pop (sole member of Grp1a) */
2008
877
        generate_exception_if((modrm_reg & 7) != 0, X86_EXC_UD);
2009
        /* 64-bit mode: POP defaults to a 64-bit operand. */
2010
874
        if ( mode_64bit() && (dst.bytes == 4) )
2011
215
            dst.bytes = 8;
2012
874
        if ( (rc = read_ulong(x86_seg_ss, sp_post_inc(dst.bytes),
2013
874
                              &dst.val, dst.bytes, ctxt, ops)) != 0 )
2014
22
            goto done;
2015
852
        break;
2016
2017
852
    case 0x90: /* nop / xchg %%r8,%%rax */
2018
947
    case X86EMUL_OPC_F3(0, 0x90): /* pause / xchg %%r8,%%rax */
2019
947
        if ( !(rex_prefix & REX_B) )
2020
590
            break; /* nop / pause */
2021
        /* fall through */
2022
2023
2.75k
    case 0x91 ... 0x97: /* xchg reg,%%rax */
2024
2.75k
        dst.type = OP_REG;
2025
2.75k
        dst.bytes = op_bytes;
2026
2.75k
        dst.reg  = decode_gpr(&_regs, (b & 7) | ((rex_prefix & 1) << 3));
2027
2.75k
        dst.val  = *dst.reg;
2028
2.75k
        goto xchg;
2029
2030
698
    case 0x98: /* cbw/cwde/cdqe */
2031
698
        switch ( op_bytes )
2032
698
        {
2033
187
        case 2: _regs.ax = (int8_t)_regs.ax; break; /* cbw */
2034
302
        case 4: _regs.r(ax) = (uint32_t)(int16_t)_regs.r(ax); break; /* cwde */
2035
209
        case 8: _regs.r(ax) = (int32_t)_regs.r(ax); break; /* cdqe */
2036
698
        }
2037
698
        break;
2038
2039
834
    case 0x99: /* cwd/cdq/cqo */
2040
834
        switch ( op_bytes )
2041
834
        {
2042
295
        case 2: _regs.dx = -((int16_t)_regs.ax < 0); break;
2043
292
        case 4: _regs.r(dx) = (uint32_t)-((int32_t)_regs.eax < 0); break;
2044
0
#ifdef __x86_64__
2045
247
        case 8: _regs.rdx = -((int64_t)_regs.rax < 0); break;
2046
834
#endif
2047
834
        }
2048
834
        break;
2049
2050
834
    case 0x9a: /* call (far, absolute) */
2051
217
        ASSERT(!mode_64bit());
2052
563
    far_call:
2053
563
        fail_if(!ops->read_segment || !ops->write);
2054
2055
562
        if ( (rc = ops->read_segment(x86_seg_cs, &sreg, ctxt)) ||
2056
562
             (rc = load_seg(x86_seg_cs, imm2, 0, &cs, ctxt, ops)) ||
2057
562
             (validate_far_branch(&cs, imm1),
2058
492
              src.val = sreg.sel,
2059
492
              rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes),
2060
492
                              &src.val, op_bytes, ctxt)) ||
2061
562
             (rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes),
2062
472
                              &_regs.r(ip), op_bytes, ctxt)) ||
2063
562
             (rc = ops->write_segment(x86_seg_cs, &cs, ctxt)) )
2064
85
            goto done;
2065
2066
477
        _regs.r(ip) = imm1;
2067
477
        singlestep = _regs.eflags & X86_EFLAGS_TF;
2068
477
        break;
2069
2070
0
#ifndef X86EMUL_NO_FPU
2071
5
    case 0x9b:  /* wait/fwait */
2072
62.6k
    case 0xd8 ... 0xdf: /* FPU */
2073
62.6k
        state->stub_exn = &stub_exn;
2074
62.6k
        rc = x86emul_fpu(state, &_regs, &dst, &src, ctxt, ops,
2075
62.6k
                         &insn_bytes, &fpu_type, mmvalp);
2076
62.6k
        goto dispatch_from_helper;
2077
0
#endif
2078
2079
858
    case 0x9c: /* pushf */
2080
858
        if ( (_regs.eflags & X86_EFLAGS_VM) &&
2081
858
             MASK_EXTR(_regs.eflags, X86_EFLAGS_IOPL) != 3 )
2082
414
        {
2083
414
            cr4 = 0;
2084
414
            if ( op_bytes == 2 && ops->read_cr )
2085
412
            {
2086
412
                rc = ops->read_cr(4, &cr4, ctxt);
2087
412
                if ( rc != X86EMUL_OKAY )
2088
0
                    goto done;
2089
412
            }
2090
414
            generate_exception_if(!(cr4 & X86_CR4_VME), X86_EXC_GP, 0);
2091
412
            src.val = (_regs.flags & ~X86_EFLAGS_IF) | X86_EFLAGS_IOPL;
2092
412
            if ( _regs.eflags & X86_EFLAGS_VIF )
2093
194
                src.val |= X86_EFLAGS_IF;
2094
412
        }
2095
444
        else
2096
444
            src.val = _regs.r(flags) & ~(X86_EFLAGS_VM | X86_EFLAGS_RF);
2097
856
        goto push;
2098
2099
1.81k
    case 0x9d: /* popf */ {
2100
        /*
2101
         * Bits which may not be modified by this instruction. RF is handled
2102
         * uniformly during instruction retirement.
2103
         */
2104
1.81k
        uint32_t mask = X86_EFLAGS_VIP | X86_EFLAGS_VIF | X86_EFLAGS_VM;
2105
2106
1.81k
        cr4 = 0;
2107
1.81k
        if ( !mode_ring0() )
2108
1.54k
        {
2109
1.54k
            if ( _regs.eflags & X86_EFLAGS_VM )
2110
867
            {
2111
867
                if ( op_bytes == 2 && ops->read_cr )
2112
477
                {
2113
477
                    rc = ops->read_cr(4, &cr4, ctxt);
2114
477
                    if ( rc != X86EMUL_OKAY )
2115
0
                        goto done;
2116
477
                }
2117
                /* All IOPL != 3 POPFs fail, except in vm86 mode. */
2118
867
                generate_exception_if(!(cr4 & X86_CR4_VME) &&
2119
867
                                      MASK_EXTR(_regs.eflags, X86_EFLAGS_IOPL) != 3,
2120
867
                                      X86_EXC_GP, 0);
2121
867
            }
2122
            /*
2123
             * IOPL cannot be modified outside of CPL 0.  IF cannot be
2124
             * modified if IOPL < CPL.
2125
             */
2126
1.54k
            mask |= X86_EFLAGS_IOPL;
2127
1.54k
            if ( !mode_iopl() )
2128
577
                mask |= X86_EFLAGS_IF;
2129
1.54k
        }
2130
        /* 64-bit mode: POPF defaults to a 64-bit operand. */
2131
1.81k
        if ( mode_64bit() && (op_bytes == 4) )
2132
112
            op_bytes = 8;
2133
1.81k
        if ( (rc = read_ulong(x86_seg_ss, sp_post_inc(op_bytes),
2134
1.81k
                              &dst.val, op_bytes, ctxt, ops)) != 0 )
2135
36
            goto done;
2136
1.77k
        if ( op_bytes == 2 )
2137
1.13k
        {
2138
            /* 16-bit POPF preserves the upper 16 bits of EFLAGS. */
2139
1.13k
            dst.val = (uint16_t)dst.val | (_regs.eflags & 0xffff0000u);
2140
            /* VME processing only applies at IOPL != 3. */
2141
1.13k
            if ( (cr4 & X86_CR4_VME) &&
2142
1.13k
                 MASK_EXTR(_regs.eflags, X86_EFLAGS_IOPL) != 3 )
2143
277
            {
2144
277
                generate_exception_if(dst.val & X86_EFLAGS_TF, X86_EXC_GP, 0);
2145
276
                if ( dst.val & X86_EFLAGS_IF )
2146
206
                {
2147
206
                    generate_exception_if(_regs.eflags & X86_EFLAGS_VIP,
2148
206
                                          X86_EXC_GP, 0);
2149
205
                    dst.val |= X86_EFLAGS_VIF;
2150
205
                }
2151
70
                else
2152
70
                    dst.val &= ~X86_EFLAGS_VIF;
2153
275
                mask &= ~X86_EFLAGS_VIF;
2154
275
            }
2155
1.13k
        }
2156
1.77k
        dst.val &= EFLAGS_MODIFIABLE;
2157
1.77k
        _regs.eflags &= mask;
2158
1.77k
        _regs.eflags |= (dst.val & ~mask) | X86_EFLAGS_MBS;
2159
1.77k
        break;
2160
1.77k
    }
2161
2162
409
    case 0x9e: /* sahf */
2163
409
        if ( mode_64bit() )
2164
409
            vcpu_must_have(lahf_lm);
2165
409
        *(uint8_t *)&_regs.eflags = (_regs.ah & EFLAGS_MASK) | X86_EFLAGS_MBS;
2166
409
        break;
2167
2168
270
    case 0x9f: /* lahf */
2169
270
        if ( mode_64bit() )
2170
270
            vcpu_must_have(lahf_lm);
2171
270
        _regs.ah = (_regs.eflags & EFLAGS_MASK) | X86_EFLAGS_MBS;
2172
270
        break;
2173
2174
5.63k
    case 0xa4 ... 0xa5: /* movs */ {
2175
5.63k
        unsigned long nr_reps = get_rep_prefix(true, true);
2176
2177
4.98k
        dst.bytes = (d & ByteOp) ? 1 : op_bytes;
2178
4.98k
        dst.mem.seg = x86_seg_es;
2179
4.98k
        dst.mem.off = truncate_ea_and_reps(_regs.r(di), nr_reps, dst.bytes);
2180
4.98k
        src.mem.off = truncate_ea_and_reps(_regs.r(si), nr_reps, dst.bytes);
2181
4.98k
        if ( (nr_reps == 1) || !ops->rep_movs ||
2182
4.98k
             ((rc = ops->rep_movs(ea.mem.seg, src.mem.off,
2183
1.30k
                                  dst.mem.seg, dst.mem.off, dst.bytes,
2184
1.30k
                                  &nr_reps, ctxt)) == X86EMUL_UNHANDLEABLE) )
2185
4.04k
        {
2186
4.04k
            if ( (rc = read_ulong(ea.mem.seg, src.mem.off,
2187
4.04k
                                  &dst.val, dst.bytes, ctxt, ops)) != 0 )
2188
370
                goto done;
2189
3.67k
            dst.type = OP_MEM;
2190
3.67k
            nr_reps = 1;
2191
3.67k
        }
2192
4.61k
        register_address_adjust(_regs.r(si), nr_reps * dst.bytes);
2193
4.61k
        register_address_adjust(_regs.r(di), nr_reps * dst.bytes);
2194
4.61k
        put_rep_prefix(nr_reps);
2195
4.37k
        if ( rc != X86EMUL_OKAY )
2196
0
            goto done;
2197
4.37k
        break;
2198
4.37k
    }
2199
2200
5.11k
    case 0xa6 ... 0xa7: /* cmps */ {
2201
5.11k
        unsigned long next_eip = _regs.r(ip);
2202
2203
5.11k
        get_rep_prefix(false, false /* don't extend RSI/RDI */);
2204
4.43k
        src.bytes = dst.bytes = (d & ByteOp) ? 1 : op_bytes;
2205
4.43k
        if ( (rc = read_ulong(ea.mem.seg, truncate_ea(_regs.r(si)),
2206
4.43k
                              &dst.val, dst.bytes, ctxt, ops)) ||
2207
4.43k
             (rc = read_ulong(x86_seg_es, truncate_ea(_regs.r(di)),
2208
4.29k
                              &src.val, src.bytes, ctxt, ops)) )
2209
164
            goto done;
2210
4.26k
        register_address_adjust(_regs.r(si), dst.bytes);
2211
4.26k
        register_address_adjust(_regs.r(di), src.bytes);
2212
4.26k
        put_rep_prefix(1);
2213
        /* cmp: dst - src ==> src=*%%edi,dst=*%%esi ==> *%%esi - *%%edi */
2214
4.26k
        emulate_2op_SrcV("cmp", src, dst, _regs.eflags);
2215
4.26k
        if ( (repe_prefix() && !(_regs.eflags & X86_EFLAGS_ZF)) ||
2216
4.26k
             (repne_prefix() && (_regs.eflags & X86_EFLAGS_ZF)) )
2217
1.25k
            _regs.r(ip) = next_eip;
2218
4.26k
        break;
2219
4.26k
    }
2220
2221
4.89k
    case 0xaa ... 0xab: /* stos */ {
2222
4.89k
        unsigned long nr_reps = get_rep_prefix(false, true);
2223
2224
0
        dst.bytes = src.bytes;
2225
4.33k
        dst.mem.seg = x86_seg_es;
2226
4.33k
        dst.mem.off = truncate_ea_and_reps(_regs.r(di), nr_reps, dst.bytes);
2227
4.33k
        if ( (nr_reps == 1) || !ops->rep_stos ||
2228
4.33k
             ((rc = ops->rep_stos(&src.val,
2229
1.13k
                                  dst.mem.seg, dst.mem.off, dst.bytes,
2230
1.13k
                                  &nr_reps, ctxt)) == X86EMUL_UNHANDLEABLE) )
2231
3.26k
        {
2232
3.26k
            dst.val = src.val;
2233
3.26k
            dst.type = OP_MEM;
2234
3.26k
            nr_reps = 1;
2235
3.26k
            rc = X86EMUL_OKAY;
2236
3.26k
        }
2237
4.33k
        register_address_adjust(_regs.r(di), nr_reps * dst.bytes);
2238
4.33k
        put_rep_prefix(nr_reps);
2239
4.12k
        if ( rc != X86EMUL_OKAY )
2240
0
            goto done;
2241
4.12k
        break;
2242
4.12k
    }
2243
2244
4.12k
    case 0xac ... 0xad: /* lods */
2245
3.23k
        get_rep_prefix(false, false /* don't extend RSI/RDI */);
2246
2.48k
        if ( (rc = read_ulong(ea.mem.seg, truncate_ea(_regs.r(si)),
2247
2.48k
                              &dst.val, dst.bytes, ctxt, ops)) != 0 )
2248
114
            goto done;
2249
2.37k
        register_address_adjust(_regs.r(si), dst.bytes);
2250
2.37k
        put_rep_prefix(1);
2251
2.37k
        break;
2252
2253
3.87k
    case 0xae ... 0xaf: /* scas */ {
2254
3.87k
        unsigned long next_eip = _regs.r(ip);
2255
2256
3.87k
        get_rep_prefix(false, false /* don't extend RSI/RDI */);
2257
3.08k
        if ( (rc = read_ulong(x86_seg_es, truncate_ea(_regs.r(di)),
2258
3.08k
                              &dst.val, src.bytes, ctxt, ops)) != 0 )
2259
99
            goto done;
2260
2.98k
        register_address_adjust(_regs.r(di), src.bytes);
2261
2.98k
        put_rep_prefix(1);
2262
        /* cmp: %%eax - *%%edi ==> src=%%eax,dst=*%%edi ==> src - dst */
2263
2.98k
        dst.bytes = src.bytes;
2264
2.98k
        emulate_2op_SrcV("cmp", dst, src, _regs.eflags);
2265
2.98k
        if ( (repe_prefix() && !(_regs.eflags & X86_EFLAGS_ZF)) ||
2266
2.98k
             (repne_prefix() && (_regs.eflags & X86_EFLAGS_ZF)) )
2267
787
            _regs.r(ip) = next_eip;
2268
2.98k
        break;
2269
2.98k
    }
2270
2271
2.56k
    case 0xb0 ... 0xb7: /* mov imm8,r8 */
2272
2.56k
        dst.reg = _decode_gpr(&_regs, (b & 7) | ((rex_prefix & 1) << 3),
2273
2.56k
                              !rex_prefix);
2274
2.56k
        dst.val = src.val;
2275
2.56k
        break;
2276
2277
1.98k
    case 0xb8 ... 0xbf: /* mov imm{16,32,64},r{16,32,64} */
2278
1.98k
        dst.reg = decode_gpr(&_regs, (b & 7) | ((rex_prefix & 1) << 3));
2279
1.98k
        dst.val = src.val;
2280
1.98k
        break;
2281
2282
5.68k
    case 0xc0 ... 0xc1: grp2: /* Grp2 */
2283
5.68k
        generate_exception_if(lock_prefix, X86_EXC_UD);
2284
2285
5.68k
        switch ( modrm_reg & 7 )
2286
5.68k
        {
2287
0
#define GRP2(name, ext) \
2288
5.68k
        case ext: \
2289
5.68k
            if ( ops->rmw && dst.type == OP_MEM ) \
2290
5.68k
                state->rmw = rmw_##name; \
2291
5.68k
            else \
2292
5.68k
                emulate_2op_SrcB(#name, src, dst, _regs.eflags); \
2293
5.68k
            break
2294
2295
903
        GRP2(rol, 0);
2296
903
        GRP2(ror, 1);
2297
896
        GRP2(rcl, 2);
2298
1.08k
        GRP2(rcr, 3);
2299
1.08k
        case 6: /* sal/shl alias */
2300
719
        GRP2(shl, 4);
2301
805
        GRP2(shr, 5);
2302
5.68k
        GRP2(sar, 7);
2303
5.68k
#undef GRP2
2304
5.68k
        }
2305
5.68k
        break;
2306
2307
5.68k
    case 0xc2: /* ret imm16 (near) */
2308
924
    case 0xc3: /* ret (near) */
2309
924
        op_bytes = (op_bytes == 4 || !amd_like(ctxt)) && mode_64bit()
2310
924
                   ? 8 : op_bytes;
2311
924
        if ( (rc = read_ulong(x86_seg_ss, sp_post_inc(op_bytes + src.val),
2312
924
                              &dst.val, op_bytes, ctxt, ops)) != 0 ||
2313
924
             (rc = ops->insn_fetch(dst.val, NULL, 0, ctxt)) )
2314
35
            goto done;
2315
889
        _regs.r(ip) = dst.val;
2316
889
        adjust_bnd(ctxt, ops, vex.pfx);
2317
889
        break;
2318
2319
235
    case 0xc4: /* les */
2320
382
    case 0xc5: /* lds */
2321
382
        seg = (b & 1) * 3; /* es = 0, ds = 3 */
2322
856
    les:
2323
856
        generate_exception_if(src.type != OP_MEM, X86_EXC_UD);
2324
849
        if ( (rc = read_ulong(src.mem.seg, truncate_ea(src.mem.off + src.bytes),
2325
849
                              &dst.val, 2, ctxt, ops)) != X86EMUL_OKAY )
2326
23
            goto done;
2327
826
        ASSERT(is_x86_user_segment(seg));
2328
826
        if ( (rc = load_seg(seg, dst.val, 0, NULL, ctxt, ops)) != X86EMUL_OKAY )
2329
9
            goto done;
2330
817
        dst.val = src.val;
2331
817
        break;
2332
2333
1.11k
    case 0xc8: /* enter imm16,imm8 */
2334
1.11k
        dst.type = OP_REG;
2335
1.11k
        dst.bytes = (mode_64bit() && (op_bytes == 4)) ? 8 : op_bytes;
2336
1.11k
        dst.reg = (unsigned long *)&_regs.r(bp);
2337
1.11k
        fail_if(!ops->write);
2338
1.10k
        if ( (rc = ops->write(x86_seg_ss, sp_pre_dec(dst.bytes),
2339
1.10k
                              &_regs.r(bp), dst.bytes, ctxt)) )
2340
17
            goto done;
2341
1.09k
        dst.val = _regs.r(sp);
2342
2343
1.09k
        n = imm2 & 31;
2344
1.09k
        if ( n )
2345
768
        {
2346
1.42k
            for ( i = 1; i < n; i++ )
2347
690
            {
2348
690
                unsigned long ebp, temp_data;
2349
690
                ebp = truncate_word(_regs.r(bp) - i*dst.bytes, ctxt->sp_size/8);
2350
690
                if ( (rc = read_ulong(x86_seg_ss, ebp,
2351
690
                                      &temp_data, dst.bytes, ctxt, ops)) ||
2352
690
                     (rc = ops->write(x86_seg_ss, sp_pre_dec(dst.bytes),
2353
667
                                      &temp_data, dst.bytes, ctxt)) )
2354
30
                    goto done;
2355
690
            }
2356
738
            if ( (rc = ops->write(x86_seg_ss, sp_pre_dec(dst.bytes),
2357
738
                                  &dst.val, dst.bytes, ctxt)) )
2358
8
                goto done;
2359
738
        }
2360
2361
1.05k
        sp_pre_dec(src.val);
2362
1.05k
        break;
2363
2364
859
    case 0xc9: /* leave */
2365
        /* First writeback, to %%esp. */
2366
859
        dst.bytes = (mode_64bit() && (op_bytes == 4)) ? 8 : op_bytes;
2367
859
        if ( dst.bytes == 2 )
2368
214
            _regs.sp = _regs.bp;
2369
645
        else
2370
645
            _regs.r(sp) = dst.bytes == 4 ? _regs.ebp : _regs.r(bp);
2371
2372
        /* Second writeback, to %%ebp. */
2373
859
        dst.type = OP_REG;
2374
859
        dst.reg = (unsigned long *)&_regs.r(bp);
2375
859
        if ( (rc = read_ulong(x86_seg_ss, sp_post_inc(dst.bytes),
2376
859
                              &dst.val, dst.bytes, ctxt, ops)) )
2377
24
            goto done;
2378
835
        break;
2379
2380
835
    case 0xca: /* ret imm16 (far) */
2381
374
    case 0xcb: /* ret (far) */
2382
374
        if ( (rc = read_ulong(x86_seg_ss, sp_post_inc(op_bytes),
2383
374
                              &dst.val, op_bytes, ctxt, ops)) ||
2384
374
             (rc = read_ulong(x86_seg_ss, sp_post_inc(op_bytes + src.val),
2385
364
                              &src.val, op_bytes, ctxt, ops)) ||
2386
374
             (rc = load_seg(x86_seg_cs, src.val, 1, &cs, ctxt, ops)) ||
2387
374
             (rc = commit_far_branch(&cs, dst.val)) )
2388
38
            goto done;
2389
336
        break;
2390
2391
393
    case 0xce: /* into */
2392
393
        if ( !(_regs.eflags & X86_EFLAGS_OF) )
2393
391
            break;
2394
        /* Fallthrough */
2395
6
    case 0xcc: /* int3 */
2396
9
    case 0xcd: /* int imm8 */
2397
10
    case 0xf1: /* int1 (icebp) */
2398
10
        ASSERT(!ctxt->event_pending);
2399
10
        switch ( ctxt->opcode )
2400
10
        {
2401
4
        case 0xcc: /* int3 */
2402
4
            ctxt->event.vector = X86_EXC_BP;
2403
4
            ctxt->event.type = X86_ET_SW_EXC;
2404
4
            break;
2405
3
        case 0xcd: /* int imm8 */
2406
3
            ctxt->event.vector = imm1;
2407
3
            ctxt->event.type = X86_ET_SW_INT;
2408
3
            break;
2409
2
        case 0xce: /* into */
2410
2
            ctxt->event.vector = X86_EXC_OF;
2411
2
            ctxt->event.type = X86_ET_SW_EXC;
2412
2
            break;
2413
1
        case 0xf1: /* icebp */
2414
1
            ctxt->event.vector = X86_EXC_DB;
2415
1
            ctxt->event.type = X86_ET_PRIV_SW_EXC;
2416
1
            break;
2417
10
        }
2418
10
        ctxt->event.error_code = X86_EVENT_NO_EC;
2419
10
        ctxt->event.insn_len = _regs.r(ip) - ctxt->regs->r(ip);
2420
10
        ctxt->event_pending = true;
2421
10
        rc = X86EMUL_EXCEPTION;
2422
10
        goto done;
2423
2424
362
    case 0xcf: /* iret */ {
2425
362
        unsigned long sel, eip, eflags;
2426
362
        uint32_t mask = X86_EFLAGS_VIP | X86_EFLAGS_VIF | X86_EFLAGS_VM;
2427
2428
362
        fail_if(!in_realmode(ctxt, ops));
2429
357
        ctxt->retire.unblock_nmi = true;
2430
357
        if ( (rc = read_ulong(x86_seg_ss, sp_post_inc(op_bytes),
2431
357
                              &eip, op_bytes, ctxt, ops)) ||
2432
357
             (rc = read_ulong(x86_seg_ss, sp_post_inc(op_bytes),
2433
353
                              &sel, op_bytes, ctxt, ops)) ||
2434
357
             (rc = read_ulong(x86_seg_ss, sp_post_inc(op_bytes),
2435
352
                              &eflags, op_bytes, ctxt, ops)) )
2436
7
            goto done;
2437
350
        if ( op_bytes == 2 )
2438
282
            eflags = (uint16_t)eflags | (_regs.eflags & 0xffff0000u);
2439
350
        eflags &= EFLAGS_MODIFIABLE;
2440
350
        _regs.eflags &= mask;
2441
350
        _regs.eflags |= (eflags & ~mask) | X86_EFLAGS_MBS;
2442
350
        if ( (rc = load_seg(x86_seg_cs, sel, 1, &cs, ctxt, ops)) ||
2443
350
             (rc = commit_far_branch(&cs, (uint32_t)eip)) )
2444
10
            goto done;
2445
340
        break;
2446
350
    }
2447
2448
1.07k
    case 0xd0 ... 0xd1: /* Grp2 */
2449
1.07k
        src.val = 1;
2450
1.07k
        goto grp2;
2451
2452
2.56k
    case 0xd2 ... 0xd3: /* Grp2 */
2453
2.56k
        src.val = _regs.cl;
2454
2.56k
        goto grp2;
2455
2456
170
    case 0xd4: /* aam */
2457
1.06k
    case 0xd5: /* aad */
2458
1.06k
        n = (uint8_t)src.val;
2459
1.06k
        if ( b & 0x01 )
2460
896
            _regs.ax = (uint8_t)(_regs.al + (_regs.ah * n));
2461
170
        else
2462
170
        {
2463
170
            generate_exception_if(!n, X86_EXC_DE);
2464
168
            _regs.al = _regs.al % n;
2465
168
            _regs.ah = _regs.al / n;
2466
168
        }
2467
1.06k
        _regs.eflags &= ~(X86_EFLAGS_SF | X86_EFLAGS_ZF | X86_EFLAGS_PF);
2468
1.06k
        _regs.eflags |= !_regs.al ? X86_EFLAGS_ZF : 0;
2469
1.06k
        _regs.eflags |= ((int8_t)_regs.al < 0) ? X86_EFLAGS_SF : 0;
2470
1.06k
        _regs.eflags |= even_parity(_regs.al) ? X86_EFLAGS_PF : 0;
2471
1.06k
        break;
2472
2473
453
    case 0xd6: /* salc */
2474
453
        _regs.al = (_regs.eflags & X86_EFLAGS_CF) ? 0xff : 0x00;
2475
453
        break;
2476
2477
626
    case 0xd7: /* xlat */ {
2478
626
        unsigned long al;
2479
2480
626
        if ( (rc = read_ulong(ea.mem.seg, truncate_ea(_regs.r(bx) + _regs.al),
2481
626
                              &al, 1, ctxt, ops)) != 0 )
2482
7
            goto done;
2483
619
        _regs.al = al;
2484
619
        break;
2485
626
    }
2486
2487
2.16k
    case 0xe0 ... 0xe2: /* loop{,z,nz} */ {
2488
2.16k
        unsigned long count = get_loop_count(&_regs, ad_bytes);
2489
2.16k
        int do_jmp = !(_regs.eflags & X86_EFLAGS_ZF); /* loopnz */
2490
2491
2.16k
        if ( b == 0xe1 )
2492
752
            do_jmp = !do_jmp; /* loopz */
2493
1.41k
        else if ( b == 0xe2 )
2494
1.05k
            do_jmp = 1; /* loop */
2495
2.16k
        if ( count != 1 && do_jmp )
2496
1.19k
            jmp_rel((int32_t)src.val);
2497
2.09k
        put_loop_count(&_regs, ad_bytes, count - 1);
2498
2.09k
        break;
2499
2.16k
    }
2500
2501
1.40k
    case 0xe3: /* jcxz/jecxz (short) */
2502
1.40k
        if ( !get_loop_count(&_regs, ad_bytes) )
2503
806
            jmp_rel((int32_t)src.val);
2504
1.38k
        break;
2505
2506
1.38k
    case 0xe4: /* in imm8,%al */
2507
343
    case 0xe5: /* in imm8,%eax */
2508
421
    case 0xe6: /* out %al,imm8 */
2509
566
    case 0xe7: /* out %eax,imm8 */
2510
979
    case 0xec: /* in %dx,%al */
2511
1.19k
    case 0xed: /* in %dx,%eax */
2512
1.27k
    case 0xee: /* out %al,%dx */
2513
1.74k
    case 0xef: /* out %eax,%dx */ {
2514
1.74k
        unsigned int port = ((b < 0xe8) ? (uint8_t)src.val : _regs.dx);
2515
2516
1.74k
        op_bytes = !(b & 1) ? 1 : (op_bytes == 8) ? 4 : op_bytes;
2517
1.74k
        if ( (rc = ioport_access_check(port, op_bytes, ctxt, ops)) != 0 )
2518
15
            goto done;
2519
1.72k
        if ( b & 2 )
2520
756
        {
2521
            /* out */
2522
756
            fail_if(ops->write_io == NULL);
2523
755
            rc = ops->write_io(port, op_bytes, _regs.eax, ctxt);
2524
755
        }
2525
969
        else
2526
969
        {
2527
            /* in */
2528
969
            dst.bytes = op_bytes;
2529
969
            fail_if(ops->read_io == NULL);
2530
966
            rc = ops->read_io(port, dst.bytes, &dst.val, ctxt);
2531
966
        }
2532
1.72k
        if ( rc != 0 )
2533
40
        {
2534
40
            if ( rc == X86EMUL_DONE )
2535
0
                goto complete_insn;
2536
40
            goto done;
2537
40
        }
2538
1.68k
        break;
2539
1.72k
    }
2540
2541
1.68k
    case 0xe8: /* call (near) */ {
2542
778
        int32_t rel = src.val;
2543
2544
778
        op_bytes = ((op_bytes == 4) && mode_64bit()) ? 8 : op_bytes;
2545
778
        src.val = _regs.r(ip);
2546
778
        jmp_rel(rel);
2547
771
        adjust_bnd(ctxt, ops, vex.pfx);
2548
771
        goto push;
2549
778
    }
2550
2551
482
    case 0xe9: /* jmp (near) */
2552
1.34k
    case 0xeb: /* jmp (short) */
2553
1.34k
        jmp_rel((int32_t)src.val);
2554
1.31k
        if ( !(b & 2) )
2555
478
            adjust_bnd(ctxt, ops, vex.pfx);
2556
1.31k
        break;
2557
2558
169
    case 0xea: /* jmp (far, absolute) */
2559
169
        ASSERT(!mode_64bit());
2560
559
    far_jmp:
2561
559
        if ( (rc = load_seg(x86_seg_cs, imm2, 0, &cs, ctxt, ops)) ||
2562
559
             (rc = commit_far_branch(&cs, imm1)) )
2563
72
            goto done;
2564
487
        break;
2565
2566
487
    case 0xf4: /* hlt */
2567
223
        generate_exception_if(!mode_ring0(), X86_EXC_GP, 0);
2568
221
        ctxt->retire.hlt = true;
2569
221
        break;
2570
2571
432
    case 0xf5: /* cmc */
2572
432
        _regs.eflags ^= X86_EFLAGS_CF;
2573
432
        break;
2574
2575
8.21k
    case 0xf6 ... 0xf7: /* Grp3 */
2576
8.21k
        if ( (d & DstMask) == DstEax )
2577
6.11k
            dst.reg = (unsigned long *)&_regs.r(ax);
2578
8.21k
        switch ( modrm_reg & 7 )
2579
8.21k
        {
2580
0
            unsigned long u[2], v;
2581
2582
944
        case 0 ... 1: /* test */
2583
944
            dst.val = imm1;
2584
944
            dst.bytes = src.bytes;
2585
944
            goto test;
2586
226
        case 2: /* not */
2587
226
            if ( ops->rmw && dst.type == OP_MEM )
2588
0
                state->rmw = rmw_not;
2589
226
            else
2590
226
                dst.val = ~dst.val;
2591
226
            break;
2592
931
        case 3: /* neg */
2593
931
            if ( ops->rmw && dst.type == OP_MEM )
2594
0
                state->rmw = rmw_neg;
2595
931
            else
2596
931
                emulate_1op("neg", dst, _regs.eflags);
2597
931
            break;
2598
1.86k
        case 4: /* mul */
2599
1.86k
            _regs.eflags &= ~(X86_EFLAGS_OF | X86_EFLAGS_CF);
2600
1.86k
            switch ( dst.bytes )
2601
1.86k
            {
2602
388
            case 1:
2603
388
                dst.val = _regs.al;
2604
388
                dst.val *= src.val;
2605
388
                if ( (uint8_t)dst.val != (uint16_t)dst.val )
2606
199
                    _regs.eflags |= X86_EFLAGS_OF | X86_EFLAGS_CF;
2607
388
                dst.bytes = 2;
2608
388
                break;
2609
413
            case 2:
2610
413
                dst.val = _regs.ax;
2611
413
                dst.val *= src.val;
2612
413
                if ( (uint16_t)dst.val != (uint32_t)dst.val )
2613
218
                    _regs.eflags |= X86_EFLAGS_OF | X86_EFLAGS_CF;
2614
413
                _regs.dx = dst.val >> 16;
2615
413
                break;
2616
0
#ifdef __x86_64__
2617
532
            case 4:
2618
532
                dst.val = _regs.eax;
2619
532
                dst.val *= src.val;
2620
532
                if ( (uint32_t)dst.val != dst.val )
2621
289
                    _regs.eflags |= X86_EFLAGS_OF | X86_EFLAGS_CF;
2622
532
                _regs.rdx = dst.val >> 32;
2623
532
                break;
2624
0
#endif
2625
533
            default:
2626
533
                u[0] = src.val;
2627
533
                u[1] = _regs.r(ax);
2628
533
                if ( mul_dbl(u) )
2629
343
                    _regs.eflags |= X86_EFLAGS_OF | X86_EFLAGS_CF;
2630
533
                _regs.r(dx) = u[1];
2631
533
                dst.val = u[0];
2632
533
                break;
2633
1.86k
            }
2634
1.86k
            break;
2635
1.86k
        case 5: /* imul */
2636
1.39k
            dst.val = _regs.r(ax);
2637
2.84k
        imul:
2638
2.84k
            _regs.eflags &= ~(X86_EFLAGS_OF | X86_EFLAGS_CF);
2639
2.84k
            switch ( dst.bytes )
2640
2.84k
            {
2641
283
            case 1:
2642
283
                dst.val = (int8_t)src.val * (int8_t)dst.val;
2643
283
                if ( (int8_t)dst.val != (int16_t)dst.val )
2644
224
                    _regs.eflags |= X86_EFLAGS_OF | X86_EFLAGS_CF;
2645
283
                ASSERT(b > 0x6b);
2646
283
                dst.bytes = 2;
2647
283
                break;
2648
1.17k
            case 2:
2649
1.17k
                dst.val = ((uint32_t)(int16_t)src.val *
2650
1.17k
                           (uint32_t)(int16_t)dst.val);
2651
1.17k
                if ( (int16_t)dst.val != (int32_t)dst.val )
2652
552
                    _regs.eflags |= X86_EFLAGS_OF | X86_EFLAGS_CF;
2653
1.17k
                if ( b > 0x6b )
2654
449
                    _regs.dx = dst.val >> 16;
2655
1.17k
                break;
2656
0
#ifdef __x86_64__
2657
939
            case 4:
2658
939
                dst.val = ((uint64_t)(int32_t)src.val *
2659
939
                           (uint64_t)(int32_t)dst.val);
2660
939
                if ( (int32_t)dst.val != dst.val )
2661
593
                    _regs.eflags |= X86_EFLAGS_OF | X86_EFLAGS_CF;
2662
939
                if ( b > 0x6b )
2663
540
                    _regs.rdx = dst.val >> 32;
2664
939
                break;
2665
0
#endif
2666
446
            default:
2667
446
                u[0] = src.val;
2668
446
                u[1] = dst.val;
2669
446
                if ( imul_dbl(u) )
2670
226
                    _regs.eflags |= X86_EFLAGS_OF | X86_EFLAGS_CF;
2671
446
                if ( b > 0x6b )
2672
124
                    _regs.r(dx) = u[1];
2673
446
                dst.val = u[0];
2674
446
                break;
2675
2.84k
            }
2676
2.84k
            break;
2677
2.84k
        case 6: /* div */
2678
1.06k
            switch ( src.bytes )
2679
1.06k
            {
2680
160
            case 1:
2681
160
                u[0] = _regs.ax;
2682
160
                u[1] = 0;
2683
160
                v    = (uint8_t)src.val;
2684
160
                generate_exception_if(
2685
160
                    div_dbl(u, v) || ((uint8_t)u[0] != (uint16_t)u[0]),
2686
160
                    X86_EXC_DE);
2687
152
                dst.val = (uint8_t)u[0];
2688
152
                _regs.ah = u[1];
2689
152
                break;
2690
255
            case 2:
2691
255
                u[0] = (_regs.edx << 16) | _regs.ax;
2692
255
                u[1] = 0;
2693
255
                v    = (uint16_t)src.val;
2694
255
                generate_exception_if(
2695
255
                    div_dbl(u, v) || ((uint16_t)u[0] != (uint32_t)u[0]),
2696
255
                    X86_EXC_DE);
2697
234
                dst.val = (uint16_t)u[0];
2698
234
                _regs.dx = u[1];
2699
234
                break;
2700
0
#ifdef __x86_64__
2701
318
            case 4:
2702
318
                u[0] = (_regs.rdx << 32) | _regs.eax;
2703
318
                u[1] = 0;
2704
318
                v    = (uint32_t)src.val;
2705
318
                generate_exception_if(
2706
318
                    div_dbl(u, v) || ((uint32_t)u[0] != u[0]),
2707
318
                    X86_EXC_DE);
2708
262
                dst.val   = (uint32_t)u[0];
2709
262
                _regs.rdx = (uint32_t)u[1];
2710
262
                break;
2711
0
#endif
2712
329
            default:
2713
329
                u[0] = _regs.r(ax);
2714
329
                u[1] = _regs.r(dx);
2715
329
                v    = src.val;
2716
329
                generate_exception_if(div_dbl(u, v), X86_EXC_DE);
2717
275
                dst.val     = u[0];
2718
275
                _regs.r(dx) = u[1];
2719
275
                break;
2720
1.06k
            }
2721
923
            break;
2722
1.79k
        case 7: /* idiv */
2723
1.79k
            switch ( src.bytes )
2724
1.79k
            {
2725
254
            case 1:
2726
254
                u[0] = (int16_t)_regs.ax;
2727
254
                u[1] = ((long)u[0] < 0) ? ~0UL : 0UL;
2728
254
                v    = (int8_t)src.val;
2729
254
                generate_exception_if(
2730
254
                    idiv_dbl(u, v) || ((int8_t)u[0] != (int16_t)u[0]),
2731
254
                    X86_EXC_DE);
2732
246
                dst.val = (int8_t)u[0];
2733
246
                _regs.ah = u[1];
2734
246
                break;
2735
473
            case 2:
2736
473
                u[0] = (int32_t)((_regs.edx << 16) | _regs.ax);
2737
473
                u[1] = ((long)u[0] < 0) ? ~0UL : 0UL;
2738
473
                v    = (int16_t)src.val;
2739
473
                generate_exception_if(
2740
473
                    idiv_dbl(u, v) || ((int16_t)u[0] != (int32_t)u[0]),
2741
473
                    X86_EXC_DE);
2742
433
                dst.val = (int16_t)u[0];
2743
433
                _regs.dx = u[1];
2744
433
                break;
2745
0
#ifdef __x86_64__
2746
455
            case 4:
2747
455
                u[0] = (_regs.rdx << 32) | _regs.eax;
2748
455
                u[1] = ((long)u[0] < 0) ? ~0UL : 0UL;
2749
455
                v    = (int32_t)src.val;
2750
455
                generate_exception_if(
2751
455
                    idiv_dbl(u, v) || ((int32_t)u[0] != u[0]),
2752
455
                    X86_EXC_DE);
2753
310
                dst.val   = (int32_t)u[0];
2754
310
                _regs.rdx = (uint32_t)u[1];
2755
310
                break;
2756
0
#endif
2757
609
            default:
2758
609
                u[0] = _regs.r(ax);
2759
609
                u[1] = _regs.r(dx);
2760
609
                v    = src.val;
2761
609
                generate_exception_if(idiv_dbl(u, v), X86_EXC_DE);
2762
382
                dst.val     = u[0];
2763
382
                _regs.r(dx) = u[1];
2764
382
                break;
2765
1.79k
            }
2766
1.37k
            break;
2767
8.21k
        }
2768
8.16k
        break;
2769
2770
8.16k
    case 0xf8: /* clc */
2771
991
        _regs.eflags &= ~X86_EFLAGS_CF;
2772
991
        break;
2773
2774
3.08k
    case 0xf9: /* stc */
2775
3.08k
        _regs.eflags |= X86_EFLAGS_CF;
2776
3.08k
        break;
2777
2778
1.60k
    case 0xfa: /* cli */
2779
1.60k
        if ( mode_iopl() )
2780
872
            _regs.eflags &= ~X86_EFLAGS_IF;
2781
737
        else
2782
737
        {
2783
737
            generate_exception_if(!mode_vif(), X86_EXC_GP, 0);
2784
733
            _regs.eflags &= ~X86_EFLAGS_VIF;
2785
733
        }
2786
1.60k
        break;
2787
2788
1.91k
    case 0xfb: /* sti */
2789
1.91k
        if ( mode_iopl() )
2790
1.03k
        {
2791
1.03k
            if ( !(_regs.eflags & X86_EFLAGS_IF) )
2792
555
                ctxt->retire.sti = true;
2793
1.03k
            _regs.eflags |= X86_EFLAGS_IF;
2794
1.03k
        }
2795
882
        else
2796
882
        {
2797
882
            generate_exception_if((_regs.eflags & X86_EFLAGS_VIP) ||
2798
882
          !mode_vif(),
2799
882
                                  X86_EXC_GP, 0);
2800
878
            if ( !(_regs.eflags & X86_EFLAGS_VIF) )
2801
530
                ctxt->retire.sti = true;
2802
878
            _regs.eflags |= X86_EFLAGS_VIF;
2803
878
        }
2804
1.91k
        break;
2805
2806
1.91k
    case 0xfc: /* cld */
2807
427
        _regs.eflags &= ~X86_EFLAGS_DF;
2808
427
        break;
2809
2810
1.02k
    case 0xfd: /* std */
2811
1.02k
        _regs.eflags |= X86_EFLAGS_DF;
2812
1.02k
        break;
2813
2814
353
    case 0xfe: /* Grp4 */
2815
353
        generate_exception_if((modrm_reg & 7) >= 2, X86_EXC_UD);
2816
        /* Fallthrough. */
2817
4.16k
    case 0xff: /* Grp5 */
2818
4.16k
        switch ( modrm_reg & 7 )
2819
4.16k
        {
2820
702
        case 0: /* inc */
2821
702
            if ( ops->rmw && dst.type == OP_MEM )
2822
0
                state->rmw = rmw_inc;
2823
702
            else
2824
702
                emulate_1op("inc", dst, _regs.eflags);
2825
702
            break;
2826
854
        case 1: /* dec */
2827
854
            if ( ops->rmw && dst.type == OP_MEM )
2828
0
                state->rmw = rmw_dec;
2829
854
            else
2830
854
                emulate_1op("dec", dst, _regs.eflags);
2831
854
            break;
2832
854
        case 2: /* call (near) */
2833
76
            dst.val = _regs.r(ip);
2834
76
            if ( (rc = ops->insn_fetch(src.val, NULL, 0, ctxt)) )
2835
1
                goto done;
2836
75
            _regs.r(ip) = src.val;
2837
75
            src.val = dst.val;
2838
75
            adjust_bnd(ctxt, ops, vex.pfx);
2839
75
            goto push;
2840
854
        case 4: /* jmp (near) */
2841
854
            if ( (rc = ops->insn_fetch(src.val, NULL, 0, ctxt)) )
2842
7
                goto done;
2843
847
            _regs.r(ip) = src.val;
2844
847
            dst.type = OP_NONE;
2845
847
            adjust_bnd(ctxt, ops, vex.pfx);
2846
847
            break;
2847
380
        case 3: /* call (far, absolute indirect) */
2848
806
        case 5: /* jmp (far, absolute indirect) */
2849
806
            generate_exception_if(src.type != OP_MEM, X86_EXC_UD);
2850
2851
803
            if ( (rc = read_ulong(src.mem.seg,
2852
803
                                  truncate_ea(src.mem.off + op_bytes),
2853
803
                                  &imm2, 2, ctxt, ops)) )
2854
67
                goto done;
2855
803
            imm1 = src.val;
2856
736
            if ( !(modrm_reg & 4) )
2857
346
                goto far_call;
2858
390
            goto far_jmp;
2859
865
        case 6: /* push */
2860
865
            goto push;
2861
4
        case 7:
2862
4
            generate_exception(X86_EXC_UD);
2863
4.16k
        }
2864
2.40k
        break;
2865
2866
2.76k
    case X86EMUL_OPC(0x0f, 0x00): /* Grp6 */
2867
2.76k
        seg = (modrm_reg & 1) ? x86_seg_tr : x86_seg_ldtr;
2868
2.76k
        generate_exception_if(!in_protmode(ctxt, ops), X86_EXC_UD);
2869
2.76k
        switch ( modrm_reg & 6 )
2870
2.76k
        {
2871
1.22k
        case 0: /* sldt / str */
2872
1.22k
            generate_exception_if(umip_active(ctxt, ops), X86_EXC_GP, 0);
2873
1.22k
            goto store_selector;
2874
1.22k
        case 2: /* lldt / ltr */
2875
374
            generate_exception_if(!mode_ring0(), X86_EXC_GP, 0);
2876
372
            if ( (rc = load_seg(seg, src.val, 0, NULL, ctxt, ops)) != 0 )
2877
43
                goto done;
2878
329
            break;
2879
1.16k
        case 4: /* verr / verw */
2880
1.16k
            _regs.eflags &= ~X86_EFLAGS_ZF;
2881
1.16k
            switch ( rc = protmode_load_seg(x86_seg_none, src.val, false,
2882
1.16k
                                            &sreg, ctxt, ops) )
2883
1.16k
            {
2884
899
            case X86EMUL_OKAY:
2885
899
                if ( sreg.s /* Excludes NUL selectors too. */ &&
2886
899
                     ((modrm_reg & 1) ? ((sreg.type & 0xa) == 0x2)
2887
612
                                      : ((sreg.type & 0xa) != 0x8)) )
2888
125
                    _regs.eflags |= X86_EFLAGS_ZF;
2889
899
                break;
2890
264
            case X86EMUL_EXCEPTION:
2891
264
                if ( ctxt->event_pending )
2892
1
                {
2893
1
                    ASSERT(ctxt->event.vector == X86_EXC_PF);
2894
3
            default:
2895
3
                    goto done;
2896
1
                }
2897
                /* Instead of the exception, ZF remains cleared. */
2898
263
                rc = X86EMUL_OKAY;
2899
263
                break;
2900
1.16k
            }
2901
1.16k
            break;
2902
1.16k
        default:
2903
1
            generate_exception_if(true, X86_EXC_UD);
2904
0
            break;
2905
2.76k
        }
2906
1.49k
        break;
2907
2908
36.3k
    case X86EMUL_OPC(0x0f, 0x01): /* Grp7 */
2909
36.3k
        rc = x86emul_0f01(state, &_regs, &dst, ctxt, ops);
2910
36.3k
        goto dispatch_from_helper;
2911
2912
1.42k
    case X86EMUL_OPC(0x0f, 0x02): /* lar */
2913
1.42k
        generate_exception_if(!in_protmode(ctxt, ops), X86_EXC_UD);
2914
1.42k
        _regs.eflags &= ~X86_EFLAGS_ZF;
2915
1.42k
        switch ( rc = protmode_load_seg(x86_seg_none, src.val, false, &sreg,
2916
1.42k
                                        ctxt, ops) )
2917
1.42k
        {
2918
1.28k
        case X86EMUL_OKAY:
2919
1.28k
            if ( !sreg.s )
2920
1.20k
            {
2921
1.20k
                switch ( sreg.type )
2922
1.20k
                {
2923
73
                case 0x01: /* available 16-bit TSS */
2924
240
                case 0x03: /* busy 16-bit TSS */
2925
441
                case 0x04: /* 16-bit call gate */
2926
482
                case 0x05: /* 16/32-bit task gate */
2927
482
                    if ( ctxt->lma )
2928
23
                        break;
2929
                    /* fall through */
2930
524
                case 0x02: /* LDT */
2931
590
                case 0x09: /* available 32/64-bit TSS */
2932
783
                case 0x0b: /* busy 32/64-bit TSS */
2933
982
                case 0x0c: /* 32/64-bit call gate */
2934
982
                    _regs.eflags |= X86_EFLAGS_ZF;
2935
982
                    break;
2936
1.20k
                }
2937
1.20k
            }
2938
74
            else
2939
74
                _regs.eflags |= X86_EFLAGS_ZF;
2940
1.28k
            break;
2941
1.28k
        case X86EMUL_EXCEPTION:
2942
138
            if ( ctxt->event_pending )
2943
1
            {
2944
1
                ASSERT(ctxt->event.vector == X86_EXC_PF);
2945
2
        default:
2946
2
                goto done;
2947
1
            }
2948
            /* Instead of the exception, ZF remains cleared. */
2949
137
            rc = X86EMUL_OKAY;
2950
137
            break;
2951
1.42k
        }
2952
1.42k
        if ( _regs.eflags & X86_EFLAGS_ZF )
2953
1.05k
            dst.val = ((sreg.attr & 0xff) << 8) |
2954
1.05k
                      ((sreg.limit >> (sreg.g ? 12 : 0)) & 0xf0000) |
2955
1.05k
                      ((sreg.attr & 0xf00) << 12);
2956
364
        else
2957
364
            dst.type = OP_NONE;
2958
1.42k
        break;
2959
2960
2.57k
    case X86EMUL_OPC(0x0f, 0x03): /* lsl */
2961
2.57k
        generate_exception_if(!in_protmode(ctxt, ops), X86_EXC_UD);
2962
2.57k
        _regs.eflags &= ~X86_EFLAGS_ZF;
2963
2.57k
        switch ( rc = protmode_load_seg(x86_seg_none, src.val, false, &sreg,
2964
2.57k
                                        ctxt, ops) )
2965
2.57k
        {
2966
1.82k
        case X86EMUL_OKAY:
2967
1.82k
            if ( !sreg.s )
2968
1.36k
            {
2969
1.36k
                switch ( sreg.type )
2970
1.36k
                {
2971
185
                case 0x01: /* available 16-bit TSS */
2972
411
                case 0x03: /* busy 16-bit TSS */
2973
411
                    if ( ctxt->lma )
2974
45
                        break;
2975
                    /* fall through */
2976
441
                case 0x02: /* LDT */
2977
516
                case 0x09: /* available 32/64-bit TSS */
2978
848
                case 0x0b: /* busy 32/64-bit TSS */
2979
848
                    _regs.eflags |= X86_EFLAGS_ZF;
2980
848
                    break;
2981
1.36k
                }
2982
1.36k
            }
2983
463
            else
2984
463
                _regs.eflags |= X86_EFLAGS_ZF;
2985
1.82k
            break;
2986
1.82k
        case X86EMUL_EXCEPTION:
2987
744
            if ( ctxt->event_pending )
2988
3
            {
2989
3
                ASSERT(ctxt->event.vector == X86_EXC_PF);
2990
4
        default:
2991
4
                goto done;
2992
3
            }
2993
            /* Instead of the exception, ZF remains cleared. */
2994
741
            rc = X86EMUL_OKAY;
2995
741
            break;
2996
2.57k
        }
2997
2.56k
        if ( _regs.eflags & X86_EFLAGS_ZF )
2998
1.31k
            dst.val = sreg.limit;
2999
1.25k
        else
3000
1.25k
            dst.type = OP_NONE;
3001
2.56k
        break;
3002
3003
203
    case X86EMUL_OPC(0x0f, 0x05): /* syscall */
3004
        /*
3005
         * Inject #UD if syscall/sysret are disabled. EFER.SCE can't be set
3006
         * with the respective CPUID bit clear, so no need for an explicit
3007
         * check of that one.
3008
         */
3009
203
        fail_if(ops->read_msr == NULL);
3010
202
        if ( (rc = ops->read_msr(MSR_EFER, &msr_val, ctxt)) != X86EMUL_OKAY )
3011
0
            goto done;
3012
202
        generate_exception_if((msr_val & EFER_SCE) == 0, X86_EXC_UD);
3013
201
        generate_exception_if(!amd_like(ctxt) && !mode_64bit(), X86_EXC_UD);
3014
3015
201
        if ( (rc = ops->read_msr(MSR_STAR, &msr_val, ctxt)) != X86EMUL_OKAY )
3016
0
            goto done;
3017
3018
201
        cs.sel = (msr_val >> 32) & ~3; /* SELECTOR_RPL_MASK */
3019
201
        sreg.sel = cs.sel + 8;
3020
3021
201
        cs.base = sreg.base = 0; /* flat segment */
3022
201
        cs.limit = sreg.limit = ~0u;  /* 4GB limit */
3023
201
        sreg.attr = 0xc93; /* G+DB+P+S+Data */
3024
3025
201
#ifdef __x86_64__
3026
201
        if ( ctxt->lma )
3027
200
        {
3028
200
            cs.attr = 0xa9b; /* L+DB+P+S+Code */
3029
3030
200
            _regs.rcx = _regs.rip;
3031
200
            _regs.r11 = _regs.eflags & ~X86_EFLAGS_RF;
3032
3033
200
            if ( (rc = ops->read_msr(mode_64bit() ? MSR_LSTAR : MSR_CSTAR,
3034
200
                                     &msr_val, ctxt)) != X86EMUL_OKAY )
3035
0
                goto done;
3036
200
            _regs.rip = msr_val;
3037
3038
200
            if ( (rc = ops->read_msr(MSR_SYSCALL_MASK,
3039
200
                                     &msr_val, ctxt)) != X86EMUL_OKAY )
3040
0
                goto done;
3041
200
            _regs.eflags &= ~(msr_val | X86_EFLAGS_RF);
3042
200
        }
3043
1
        else
3044
1
#endif
3045
1
        {
3046
1
            cs.attr = 0xc9b; /* G+DB+P+S+Code */
3047
3048
1
            _regs.r(cx) = _regs.eip;
3049
1
            _regs.eip = msr_val;
3050
1
            _regs.eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF | X86_EFLAGS_RF);
3051
1
        }
3052
3053
201
        fail_if(ops->write_segment == NULL);
3054
200
        if ( (rc = ops->write_segment(x86_seg_cs, &cs, ctxt)) ||
3055
200
             (rc = ops->write_segment(x86_seg_ss, &sreg, ctxt)) )
3056
5
            goto done;
3057
3058
195
        if ( ctxt->lma )
3059
            /* In particular mode_64bit() needs to return true from here on. */
3060
195
            ctxt->addr_size = ctxt->sp_size = 64;
3061
3062
        /*
3063
         * SYSCALL (unlike most instructions) evaluates its singlestep action
3064
         * based on the resulting EFLAGS.TF, not the starting EFLAGS.TF.
3065
         *
3066
         * As the #DB is raised after the CPL change and before the OS can
3067
         * switch stack, it is a large risk for privilege escalation.
3068
         *
3069
         * 64bit kernels should mask EFLAGS.TF in MSR_SYSCALL_MASK to avoid any
3070
         * vulnerability.  Running the #DB handler on an IST stack is also a
3071
         * mitigation.
3072
         *
3073
         * 32bit kernels have no ability to mask EFLAGS.TF at all.
3074
         * Their only mitigation is to use a task gate for handling
3075
         * #DB (or to not use enable EFER.SCE to start with).
3076
         */
3077
195
        singlestep = _regs.eflags & X86_EFLAGS_TF;
3078
195
        break;
3079
3080
777
    case X86EMUL_OPC(0x0f, 0x06): /* clts */
3081
777
        generate_exception_if(!mode_ring0(), X86_EXC_GP, 0);
3082
776
        fail_if((ops->read_cr == NULL) || (ops->write_cr == NULL));
3083
774
        if ( (rc = ops->read_cr(0, &dst.val, ctxt)) != X86EMUL_OKAY ||
3084
774
             (rc = ops->write_cr(0, dst.val & ~X86_CR0_TS, ctxt)) != X86EMUL_OKAY )
3085
4
            goto done;
3086
770
        break;
3087
3088
770
    case X86EMUL_OPC(0x0f, 0x07): /* sysret */
3089
        /*
3090
         * Inject #UD if syscall/sysret are disabled. EFER.SCE can't be set
3091
         * with the respective CPUID bit clear, so no need for an explicit
3092
         * check of that one.
3093
         */
3094
40
        fail_if(!ops->read_msr);
3095
39
        if ( (rc = ops->read_msr(MSR_EFER, &msr_val, ctxt)) != X86EMUL_OKAY )
3096
0
            goto done;
3097
39
        generate_exception_if(!(msr_val & EFER_SCE), X86_EXC_UD);
3098
38
        generate_exception_if(!amd_like(ctxt) && !mode_64bit(), X86_EXC_UD);
3099
38
        generate_exception_if(!mode_ring0(), X86_EXC_GP, 0);
3100
35
        generate_exception_if(!in_protmode(ctxt, ops), X86_EXC_GP, 0);
3101
35
#ifdef __x86_64__
3102
        /*
3103
         * Doing this for just Intel (rather than e.g. !amd_like()) as this is
3104
         * in fact risking to make guest OSes vulnerable to the equivalent of
3105
         * XSA-7 (CVE-2012-0217).
3106
         */
3107
35
        generate_exception_if(cp->x86_vendor == X86_VENDOR_INTEL &&
3108
35
                              op_bytes == 8 && !is_canonical_address(_regs.rcx),
3109
35
                              X86_EXC_GP, 0);
3110
35
#endif
3111
3112
35
        if ( (rc = ops->read_msr(MSR_STAR, &msr_val, ctxt)) != X86EMUL_OKAY )
3113
0
            goto done;
3114
3115
35
        sreg.sel = ((msr_val >> 48) + 8) | 3; /* SELECTOR_RPL_MASK */
3116
35
        cs.sel = op_bytes == 8 ? sreg.sel + 8 : sreg.sel - 8;
3117
3118
35
        cs.base = sreg.base = 0; /* flat segment */
3119
35
        cs.limit = sreg.limit = ~0u; /* 4GB limit */
3120
35
        cs.attr = 0xcfb; /* G+DB+P+DPL3+S+Code */
3121
35
        sreg.attr = 0xcf3; /* G+DB+P+DPL3+S+Data */
3122
3123
        /* Only the selector part of SS gets updated by AMD and alike. */
3124
35
        if ( amd_like(ctxt) )
3125
34
        {
3126
34
            fail_if(!ops->read_segment);
3127
34
            if ( (rc = ops->read_segment(x86_seg_ss, &sreg,
3128
34
                                         ctxt)) != X86EMUL_OKAY )
3129
0
                goto done;
3130
3131
            /* There's explicitly no RPL adjustment here. */
3132
34
            sreg.sel = (msr_val >> 48) + 8;
3133
            /* But DPL needs adjustment, for the new CPL to be correct. */
3134
34
            sreg.dpl = 3;
3135
34
        }
3136
3137
35
#ifdef __x86_64__
3138
35
        if ( mode_64bit() )
3139
32
        {
3140
32
            if ( op_bytes == 8 )
3141
23
            {
3142
23
                cs.attr = 0xafb; /* L+DB+P+DPL3+S+Code */
3143
23
                _regs.rip = _regs.rcx;
3144
23
            }
3145
9
            else
3146
9
                _regs.rip = _regs.ecx;
3147
3148
32
            _regs.eflags = _regs.r11 & ~(X86_EFLAGS_RF | X86_EFLAGS_VM);
3149
32
        }
3150
3
        else
3151
3
#endif
3152
3
        {
3153
3
            _regs.r(ip) = _regs.ecx;
3154
3
            _regs.eflags |= X86_EFLAGS_IF;
3155
3
        }
3156
3157
35
        fail_if(!ops->write_segment);
3158
34
        if ( (rc = ops->write_segment(x86_seg_cs, &cs, ctxt)) != X86EMUL_OKAY ||
3159
34
             (rc = ops->write_segment(x86_seg_ss, &sreg,
3160
16
                                      ctxt)) != X86EMUL_OKAY )
3161
18
            goto done;
3162
3163
16
        singlestep = _regs.eflags & X86_EFLAGS_TF;
3164
16
        break;
3165
3166
218
    case X86EMUL_OPC(0x0f, 0x08): /* invd */
3167
566
    case X86EMUL_OPC(0x0f, 0x09): /* wbinvd / wbnoinvd */
3168
566
        generate_exception_if(!mode_ring0(), X86_EXC_GP, 0);
3169
565
        fail_if(!ops->cache_op);
3170
564
        if ( (rc = ops->cache_op(b == 0x09 ? !repe_prefix() ||
3171
344
                                             !vcpu_has_wbnoinvd()
3172
344
                                             ? x86emul_wbinvd
3173
344
                                             : x86emul_wbnoinvd
3174
564
                                           : x86emul_invd,
3175
564
                                 x86_seg_none, 0,
3176
564
                                 ctxt)) != X86EMUL_OKAY )
3177
12
            goto done;
3178
552
        break;
3179
3180
552
    case X86EMUL_OPC(0x0f, 0x0b): /* ud2 */
3181
2
    case X86EMUL_OPC(0x0f, 0xb9): /* ud1 */
3182
6
    case X86EMUL_OPC(0x0f, 0xff): /* ud0 */
3183
6
        generate_exception(X86_EXC_UD);
3184
3185
216
    case X86EMUL_OPC(0x0f, 0x0d): /* GrpP (prefetch) */
3186
414
    case X86EMUL_OPC(0x0f, 0x18): /* Grp16 (prefetch/nop) */
3187
1.74k
    case X86EMUL_OPC(0x0f, 0x19) ... X86EMUL_OPC(0x0f, 0x1f): /* nop */
3188
1.74k
        break;
3189
3190
0
#ifndef X86EMUL_NO_MMX
3191
3192
1
    case X86EMUL_OPC(0x0f, 0x0e): /* femms */
3193
1
        host_and_vcpu_must_have(3dnow);
3194
0
        asm volatile ( "femms" );
3195
0
        break;
3196
3197
6
    case X86EMUL_OPC(0x0f, 0x0f): /* 3DNow! */
3198
6
        if ( _3dnow_table[(imm1 >> 4) & 0xf] & (1 << (imm1 & 0xf)) )
3199
6
            host_and_vcpu_must_have(3dnow);
3200
5
        else if ( _3dnow_ext_table[(imm1 >> 4) & 0xf] & (1 << (imm1 & 0xf)) )
3201
5
            host_and_vcpu_must_have(3dnow_ext);
3202
4
        else
3203
4
            generate_exception(X86_EXC_UD);
3204
3205
0
        get_fpu(X86EMUL_FPU_mmx);
3206
3207
0
        d = DstReg | SrcMem;
3208
0
        op_bytes = 8;
3209
0
        state->simd_size = simd_other;
3210
0
        goto simd_0f_imm8;
3211
3212
0
#endif /* !X86EMUL_NO_MMX */
3213
3214
0
#if !defined(X86EMUL_NO_SIMD) && !defined(X86EMUL_NO_MMX)
3215
0
# define CASE_SIMD_PACKED_INT(pfx, opc)      \
3216
523k
    case X86EMUL_OPC(pfx, opc):              \
3217
533k
    case X86EMUL_OPC_66(pfx, opc)
3218
#elif !defined(X86EMUL_NO_SIMD)
3219
# define CASE_SIMD_PACKED_INT(pfx, opc)      \
3220
    case X86EMUL_OPC_66(pfx, opc)
3221
#elif !defined(X86EMUL_NO_MMX)
3222
# define CASE_SIMD_PACKED_INT(pfx, opc)      \
3223
    case X86EMUL_OPC(pfx, opc)
3224
#else
3225
# define CASE_SIMD_PACKED_INT(pfx, opc) C##pfx##_##opc
3226
#endif
3227
3228
0
#ifndef X86EMUL_NO_SIMD
3229
3230
0
# define CASE_SIMD_PACKED_INT_VEX(pfx, opc)  \
3231
492k
    CASE_SIMD_PACKED_INT(pfx, opc):          \
3232
499k
    case X86EMUL_OPC_VEX_66(pfx, opc)
3233
3234
0
# define CASE_SIMD_ALL_FP(kind, pfx, opc)    \
3235
160k
    CASE_SIMD_PACKED_FP(kind, pfx, opc):     \
3236
163k
    CASE_SIMD_SCALAR_FP(kind, pfx, opc)
3237
0
# define CASE_SIMD_PACKED_FP(kind, pfx, opc) \
3238
272k
    case X86EMUL_OPC##kind(pfx, opc):        \
3239
279k
    case X86EMUL_OPC##kind##_66(pfx, opc)
3240
0
# define CASE_SIMD_SCALAR_FP(kind, pfx, opc) \
3241
167k
    case X86EMUL_OPC##kind##_F3(pfx, opc):   \
3242
172k
    case X86EMUL_OPC##kind##_F2(pfx, opc)
3243
0
# define CASE_SIMD_SINGLE_FP(kind, pfx, opc) \
3244
26.8k
    case X86EMUL_OPC##kind(pfx, opc):        \
3245
27.2k
    case X86EMUL_OPC##kind##_F3(pfx, opc)
3246
3247
0
# define CASE_SIMD_ALL_FP_VEX(pfx, opc)      \
3248
80.9k
    CASE_SIMD_ALL_FP(, pfx, opc):            \
3249
80.9k
    CASE_SIMD_ALL_FP(_VEX, pfx, opc)
3250
0
# define CASE_SIMD_PACKED_FP_VEX(pfx, opc)   \
3251
56.4k
    CASE_SIMD_PACKED_FP(, pfx, opc):         \
3252
58.1k
    CASE_SIMD_PACKED_FP(_VEX, pfx, opc)
3253
0
# define CASE_SIMD_SCALAR_FP_VEX(pfx, opc)   \
3254
2.32k
    CASE_SIMD_SCALAR_FP(, pfx, opc):         \
3255
2.62k
    CASE_SIMD_SCALAR_FP(_VEX, pfx, opc)
3256
0
# define CASE_SIMD_SINGLE_FP_VEX(pfx, opc)   \
3257
13.3k
    CASE_SIMD_SINGLE_FP(, pfx, opc):         \
3258
13.5k
    CASE_SIMD_SINGLE_FP(_VEX, pfx, opc)
3259
3260
#else
3261
3262
# define CASE_SIMD_PACKED_INT_VEX(pfx, opc)  \
3263
    CASE_SIMD_PACKED_INT(pfx, opc)
3264
3265
# define CASE_SIMD_ALL_FP(kind, pfx, opc)    C##kind##pfx##_##opc
3266
# define CASE_SIMD_PACKED_FP(kind, pfx, opc) Cp##kind##pfx##_##opc
3267
# define CASE_SIMD_SCALAR_FP(kind, pfx, opc) Cs##kind##pfx##_##opc
3268
# define CASE_SIMD_SINGLE_FP(kind, pfx, opc) C##kind##pfx##_##opc
3269
3270
# define CASE_SIMD_ALL_FP_VEX(pfx, opc)    CASE_SIMD_ALL_FP(, pfx, opc)
3271
# define CASE_SIMD_PACKED_FP_VEX(pfx, opc) CASE_SIMD_PACKED_FP(, pfx, opc)
3272
# define CASE_SIMD_SCALAR_FP_VEX(pfx, opc) CASE_SIMD_SCALAR_FP(, pfx, opc)
3273
# define CASE_SIMD_SINGLE_FP_VEX(pfx, opc) CASE_SIMD_SINGLE_FP(, pfx, opc)
3274
3275
#endif
3276
3277
3
    CASE_SIMD_SCALAR_FP(, 0x0f, 0x2b):     /* movnts{s,d} xmm,mem */
3278
3
        host_and_vcpu_must_have(sse4a);
3279
        /* fall through */
3280
1.86k
    CASE_SIMD_PACKED_FP_VEX(0x0f, 0x2b):   /* movntp{s,d} xmm,m128 */
3281
                                           /* vmovntp{s,d} {x,y}mm,mem */
3282
1.86k
        generate_exception_if(ea.type != OP_MEM, X86_EXC_UD);
3283
695
        sfence = true;
3284
        /* fall through */
3285
9.64k
    CASE_SIMD_ALL_FP_VEX(0x0f, 0x10):      /* mov{up,s}{s,d} xmm/mem,xmm */
3286
                                           /* vmovup{s,d} {x,y}mm/mem,{x,y}mm */
3287
                                           /* vmovs{s,d} mem,xmm */
3288
                                           /* vmovs{s,d} xmm,xmm,xmm */
3289
18.4k
    CASE_SIMD_ALL_FP_VEX(0x0f, 0x11):      /* mov{up,s}{s,d} xmm,xmm/mem */
3290
                                           /* vmovup{s,d} {x,y}mm,{x,y}mm/mem */
3291
                                           /* vmovs{s,d} xmm,mem */
3292
                                           /* vmovs{s,d} xmm,xmm,xmm */
3293
18.4k
    CASE_SIMD_PACKED_FP_VEX(0x0f, 0x14):   /* unpcklp{s,d} xmm/m128,xmm */
3294
                                           /* vunpcklp{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
3295
15.3k
    CASE_SIMD_PACKED_FP_VEX(0x0f, 0x15):   /* unpckhp{s,d} xmm/m128,xmm */
3296
                                           /* vunpckhp{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
3297
17.3k
    CASE_SIMD_PACKED_FP_VEX(0x0f, 0x28):   /* movap{s,d} xmm/m128,xmm */
3298
                                           /* vmovap{s,d} {x,y}mm/mem,{x,y}mm */
3299
20.3k
    CASE_SIMD_PACKED_FP_VEX(0x0f, 0x29):   /* movap{s,d} xmm,xmm/m128 */
3300
                                           /* vmovap{s,d} {x,y}mm,{x,y}mm/mem */
3301
47.4k
    CASE_SIMD_ALL_FP_VEX(0x0f, 0x51):      /* sqrt{p,s}{s,d} xmm/mem,xmm */
3302
                                           /* vsqrtp{s,d} {x,y}mm/mem,{x,y}mm */
3303
                                           /* vsqrts{s,d} xmm/m32,xmm,xmm */
3304
47.4k
    CASE_SIMD_SINGLE_FP_VEX(0x0f, 0x52):   /* rsqrt{p,s}s xmm/mem,xmm */
3305
                                           /* vrsqrtps {x,y}mm/mem,{x,y}mm */
3306
                                           /* vrsqrtss xmm/m32,xmm,xmm */
3307
27.9k
    CASE_SIMD_SINGLE_FP_VEX(0x0f, 0x53):   /* rcp{p,s}s xmm/mem,xmm */
3308
                                           /* vrcpps {x,y}mm/mem,{x,y}mm */
3309
                                           /* vrcpss xmm/m32,xmm,xmm */
3310
29.4k
    CASE_SIMD_PACKED_FP_VEX(0x0f, 0x54):   /* andp{s,d} xmm/m128,xmm */
3311
                                           /* vandp{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
3312
30.9k
    CASE_SIMD_PACKED_FP_VEX(0x0f, 0x55):   /* andnp{s,d} xmm/m128,xmm */
3313
                                           /* vandnp{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
3314
33.3k
    CASE_SIMD_PACKED_FP_VEX(0x0f, 0x56):   /* orp{s,d} xmm/m128,xmm */
3315
                                           /* vorp{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
3316
35.6k
    CASE_SIMD_PACKED_FP_VEX(0x0f, 0x57):   /* xorp{s,d} xmm/m128,xmm */
3317
                                           /* vxorp{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
3318
76.1k
    CASE_SIMD_ALL_FP_VEX(0x0f, 0x58):      /* add{p,s}{s,d} xmm/mem,xmm */
3319
                                           /* vadd{p,s}{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
3320
83.8k
    CASE_SIMD_ALL_FP_VEX(0x0f, 0x59):      /* mul{p,s}{s,d} xmm/mem,xmm */
3321
                                           /* vmul{p,s}{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
3322
91.3k
    CASE_SIMD_ALL_FP_VEX(0x0f, 0x5c):      /* sub{p,s}{s,d} xmm/mem,xmm */
3323
                                           /* vsub{p,s}{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
3324
96.7k
    CASE_SIMD_ALL_FP_VEX(0x0f, 0x5d):      /* min{p,s}{s,d} xmm/mem,xmm */
3325
                                           /* vmin{p,s}{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
3326
103k
    CASE_SIMD_ALL_FP_VEX(0x0f, 0x5e):      /* div{p,s}{s,d} xmm/mem,xmm */
3327
                                           /* vdiv{p,s}{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
3328
112k
    CASE_SIMD_ALL_FP_VEX(0x0f, 0x5f):      /* max{p,s}{s,d} xmm/mem,xmm */
3329
                                           /* vmax{p,s}{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
3330
112k
    simd_0f_fp:
3331
18.7k
        if ( vex.opcx == vex_none )
3332
11.9k
        {
3333
11.9k
            if ( vex.pfx & VEX_PREFIX_DOUBLE_MASK )
3334
5.06k
            {
3335
16.5k
    simd_0f_sse2:
3336
16.5k
                vcpu_must_have(sse2);
3337
16.5k
            }
3338
6.85k
            else
3339
11.9k
                vcpu_must_have(sse);
3340
24.4k
    simd_0f_xmm:
3341
24.4k
            get_fpu(X86EMUL_FPU_xmm);
3342
24.4k
        }
3343
6.84k
        else
3344
6.84k
        {
3345
            /* vmovs{s,d} to/from memory have only two operands. */
3346
6.84k
            if ( (b & ~1) == 0x10 && ea.type == OP_MEM )
3347
801
                d |= TwoOp;
3348
18.9k
    simd_0f_avx:
3349
18.9k
            host_and_vcpu_must_have(avx);
3350
23.6k
    simd_0f_ymm:
3351
23.6k
            get_fpu(X86EMUL_FPU_ymm);
3352
23.6k
        }
3353
58.6k
    simd_0f_common:
3354
58.6k
        opc = init_prefixes(stub);
3355
0
        opc[0] = b;
3356
58.6k
        opc[1] = modrm;
3357
58.6k
        if ( ea.type == OP_MEM )
3358
35.6k
        {
3359
            /* convert memory operand to (%rAX) */
3360
35.6k
            rex_prefix &= ~REX_B;
3361
35.6k
            vex.b = 1;
3362
35.6k
            opc[1] &= 0x38;
3363
35.6k
        }
3364
58.6k
        insn_bytes = PFX_BYTES + 2;
3365
58.6k
        break;
3366
3367
5
    CASE_SIMD_PACKED_FP(_EVEX, 0x0f, 0x2b): /* vmovntp{s,d} [xyz]mm,mem */
3368
5
        generate_exception_if(ea.type != OP_MEM || evex.opmsk, X86_EXC_UD);
3369
1
        sfence = true;
3370
        /* fall through */
3371
5
    CASE_SIMD_PACKED_FP(_EVEX, 0x0f, 0x10): /* vmovup{s,d} [xyz]mm/mem,[xyz]mm{k} */
3372
9
    CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x10): /* vmovs{s,d} mem,xmm{k} */
3373
                                            /* vmovs{s,d} xmm,xmm,xmm{k} */
3374
13
    CASE_SIMD_PACKED_FP(_EVEX, 0x0f, 0x11): /* vmovup{s,d} [xyz]mm,[xyz]mm/mem{k} */
3375
19
    CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x11): /* vmovs{s,d} xmm,mem{k} */
3376
                                            /* vmovs{s,d} xmm,xmm,xmm{k} */
3377
23
    CASE_SIMD_PACKED_FP(_EVEX, 0x0f, 0x28): /* vmovap{s,d} [xyz]mm/mem,[xyz]mm{k} */
3378
26
    CASE_SIMD_PACKED_FP(_EVEX, 0x0f, 0x29): /* vmovap{s,d} [xyz]mm,[xyz]mm/mem{k} */
3379
        /* vmovs{s,d} to/from memory have only two operands. */
3380
26
        if ( (b & ~1) == 0x10 && ea.type == OP_MEM )
3381
8
            d |= TwoOp;
3382
26
        generate_exception_if(evex.brs, X86_EXC_UD);
3383
        /* fall through */
3384
54
    CASE_SIMD_ALL_FP(_EVEX, 0x0f, 0x51):    /* vsqrtp{s,d} [xyz]mm/mem,[xyz]mm{k} */
3385
                                            /* vsqrts{s,d} xmm/m32,xmm,xmm{k} */
3386
70
    CASE_SIMD_ALL_FP(_EVEX, 0x0f, 0x58):    /* vadd{p,s}{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
3387
86
    CASE_SIMD_ALL_FP(_EVEX, 0x0f, 0x59):    /* vmul{p,s}{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
3388
98
    CASE_SIMD_ALL_FP(_EVEX, 0x0f, 0x5c):    /* vsub{p,s}{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
3389
114
    CASE_SIMD_ALL_FP(_EVEX, 0x0f, 0x5d):    /* vmin{p,s}{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
3390
130
    CASE_SIMD_ALL_FP(_EVEX, 0x0f, 0x5e):    /* vdiv{p,s}{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
3391
146
    CASE_SIMD_ALL_FP(_EVEX, 0x0f, 0x5f):    /* vmax{p,s}{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
3392
146
    avx512f_all_fp:
3393
43
        generate_exception_if((evex.w != (evex.pfx & VEX_PREFIX_DOUBLE_MASK) ||
3394
43
                               (ea.type != OP_REG && evex.brs &&
3395
43
                                (evex.pfx & VEX_PREFIX_SCALAR_MASK))),
3396
43
                              X86_EXC_UD);
3397
19
        host_and_vcpu_must_have(avx512f);
3398
0
        if ( ea.type != OP_REG || !evex.brs )
3399
0
            avx512_vlen_check(evex.pfx & VEX_PREFIX_SCALAR_MASK);
3400
0
    simd_zmm:
3401
0
        get_fpu(X86EMUL_FPU_zmm);
3402
0
        opc = init_evex(stub);
3403
0
        opc[0] = b;
3404
0
        opc[1] = modrm;
3405
0
        if ( ea.type == OP_MEM )
3406
0
        {
3407
            /* convert memory operand to (%rAX) */
3408
0
            evex.b = 1;
3409
0
            opc[1] &= 0x38;
3410
0
        }
3411
0
        insn_bytes = EVEX_PFX_BYTES + 2;
3412
0
        break;
3413
3414
0
#ifndef X86EMUL_NO_SIMD
3415
3416
204
    case X86EMUL_OPC_66(0x0f, 0x12):       /* movlpd m64,xmm */
3417
398
    case X86EMUL_OPC_VEX_66(0x0f, 0x12):   /* vmovlpd m64,xmm,xmm */
3418
4.32k
    CASE_SIMD_PACKED_FP_VEX(0x0f, 0x13):   /* movlp{s,d} xmm,m64 */
3419
                                           /* vmovlp{s,d} xmm,m64 */
3420
4.32k
    case X86EMUL_OPC_66(0x0f, 0x16):       /* movhpd m64,xmm */
3421
1.87k
    case X86EMUL_OPC_VEX_66(0x0f, 0x16):   /* vmovhpd m64,xmm,xmm */
3422
9.21k
    CASE_SIMD_PACKED_FP_VEX(0x0f, 0x17):   /* movhp{s,d} xmm,m64 */
3423
                                           /* vmovhp{s,d} xmm,m64 */
3424
9.21k
        generate_exception_if(ea.type != OP_MEM, X86_EXC_UD);
3425
        /* fall through */
3426
2.77k
    case X86EMUL_OPC(0x0f, 0x12):          /* movlps m64,xmm */
3427
                                           /* movhlps xmm,xmm */
3428
2.83k
    case X86EMUL_OPC_VEX(0x0f, 0x12):      /* vmovlps m64,xmm,xmm */
3429
                                           /* vmovhlps xmm,xmm,xmm */
3430
3.07k
    case X86EMUL_OPC(0x0f, 0x16):          /* movhps m64,xmm */
3431
                                           /* movlhps xmm,xmm */
3432
3.13k
    case X86EMUL_OPC_VEX(0x0f, 0x16):      /* vmovhps m64,xmm,xmm */
3433
                                           /* vmovlhps xmm,xmm,xmm */
3434
3.13k
        generate_exception_if(vex.l, X86_EXC_UD);
3435
3.13k
        if ( (d & DstMask) != DstMem )
3436
1.49k
            d &= ~TwoOp;
3437
3.13k
        op_bytes = 8;
3438
3.13k
        goto simd_0f_fp;
3439
3440
4
    case X86EMUL_OPC_EVEX_66(0x0f, 0x12):   /* vmovlpd m64,xmm,xmm */
3441
11
    CASE_SIMD_PACKED_FP(_EVEX, 0x0f, 0x13): /* vmovlp{s,d} xmm,m64 */
3442
11
    case X86EMUL_OPC_EVEX_66(0x0f, 0x16):   /* vmovhpd m64,xmm,xmm */
3443
19
    CASE_SIMD_PACKED_FP(_EVEX, 0x0f, 0x17): /* vmovhp{s,d} xmm,m64 */
3444
19
        generate_exception_if(ea.type != OP_MEM, X86_EXC_UD);
3445
        /* fall through */
3446
10
    case X86EMUL_OPC_EVEX(0x0f, 0x12):      /* vmovlps m64,xmm,xmm */
3447
                                            /* vmovhlps xmm,xmm,xmm */
3448
11
    case X86EMUL_OPC_EVEX(0x0f, 0x16):      /* vmovhps m64,xmm,xmm */
3449
                                            /* vmovlhps xmm,xmm,xmm */
3450
11
        generate_exception_if((evex.lr || evex.opmsk || evex.brs ||
3451
11
                               evex.w != (evex.pfx & VEX_PREFIX_DOUBLE_MASK)),
3452
11
                              X86_EXC_UD);
3453
1
        host_and_vcpu_must_have(avx512f);
3454
0
        if ( (d & DstMask) != DstMem )
3455
0
            d &= ~TwoOp;
3456
0
        op_bytes = 8;
3457
0
        goto simd_zmm;
3458
3459
68
    case X86EMUL_OPC_F3(0x0f, 0x12):       /* movsldup xmm/m128,xmm */
3460
134
    case X86EMUL_OPC_VEX_F3(0x0f, 0x12):   /* vmovsldup {x,y}mm/mem,{x,y}mm */
3461
200
    case X86EMUL_OPC_F2(0x0f, 0x12):       /* movddup xmm/m64,xmm */
3462
265
    case X86EMUL_OPC_VEX_F2(0x0f, 0x12):   /* vmovddup {x,y}mm/mem,{x,y}mm */
3463
331
    case X86EMUL_OPC_F3(0x0f, 0x16):       /* movshdup xmm/m128,xmm */
3464
365
    case X86EMUL_OPC_VEX_F3(0x0f, 0x16):   /* vmovshdup {x,y}mm/mem,{x,y}mm */
3465
365
        d |= TwoOp;
3466
365
        op_bytes = !(vex.pfx & VEX_PREFIX_DOUBLE_MASK) || vex.l
3467
365
                   ? 16 << vex.l : 8;
3468
2.20k
    simd_0f_sse3_avx:
3469
2.20k
        if ( vex.opcx != vex_none )
3470
1.19k
            goto simd_0f_avx;
3471
1.01k
        host_and_vcpu_must_have(sse3);
3472
1.01k
        goto simd_0f_xmm;
3473
3474
1.01k
    case X86EMUL_OPC_EVEX_F3(0x0f, 0x12):   /* vmovsldup [xyz]mm/mem,[xyz]mm{k} */
3475
4
    case X86EMUL_OPC_EVEX_F2(0x0f, 0x12):   /* vmovddup [xyz]mm/mem,[xyz]mm{k} */
3476
5
    case X86EMUL_OPC_EVEX_F3(0x0f, 0x16):   /* vmovshdup [xyz]mm/mem,[xyz]mm{k} */
3477
5
        generate_exception_if((evex.brs ||
3478
5
                               evex.w != (evex.pfx & VEX_PREFIX_DOUBLE_MASK)),
3479
5
                              X86_EXC_UD);
3480
1
        host_and_vcpu_must_have(avx512f);
3481
0
        avx512_vlen_check(false);
3482
0
        d |= TwoOp;
3483
0
        op_bytes = !(evex.pfx & VEX_PREFIX_DOUBLE_MASK) || evex.lr
3484
0
                   ? 16 << evex.lr : 8;
3485
0
        fault_suppression = false;
3486
0
        goto simd_zmm;
3487
3488
3
    CASE_SIMD_PACKED_FP(_EVEX, 0x0f, 0x14): /* vunpcklp{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
3489
7
    CASE_SIMD_PACKED_FP(_EVEX, 0x0f, 0x15): /* vunpckhp{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
3490
7
        generate_exception_if(evex.w != (evex.pfx & VEX_PREFIX_DOUBLE_MASK),
3491
7
                              X86_EXC_UD);
3492
        /* fall through */
3493
3
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x76): /* vpermi2{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
3494
4
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x77): /* vpermi2p{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
3495
5
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x7e): /* vpermt2{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
3496
6
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x7f): /* vpermt2p{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
3497
6
        fault_suppression = false;
3498
        /* fall through */
3499
7
    case X86EMUL_OPC_EVEX_66(0x0f, 0xdb): /* vpand{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
3500
8
    case X86EMUL_OPC_EVEX_66(0x0f, 0xdf): /* vpandn{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
3501
9
    case X86EMUL_OPC_EVEX_66(0x0f, 0xeb): /* vpor{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
3502
10
    case X86EMUL_OPC_EVEX_66(0x0f, 0xef): /* vpxor{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
3503
11
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x14): /* vprorv{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
3504
12
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x15): /* vprolv{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
3505
13
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x39): /* vpmins{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
3506
14
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x3b): /* vpminu{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
3507
15
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x3d): /* vpmaxs{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
3508
16
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x3f): /* vpmaxu{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
3509
17
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x45): /* vpsrlv{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
3510
18
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x46): /* vpsrav{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
3511
19
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x47): /* vpsllv{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
3512
20
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x4c): /* vrcp14p{s,d} [xyz]mm/mem,[xyz]mm{k} */
3513
21
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x4e): /* vrsqrt14p{s,d} [xyz]mm/mem,[xyz]mm{k} */
3514
22
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x64): /* vpblendm{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
3515
23
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x65): /* vblendmp{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
3516
60
    avx512f_no_sae:
3517
60
        host_and_vcpu_must_have(avx512f);
3518
0
        generate_exception_if(ea.type != OP_MEM && evex.brs, X86_EXC_UD);
3519
0
        avx512_vlen_check(false);
3520
0
        goto simd_zmm;
3521
3522
0
#endif /* !X86EMUL_NO_SIMD */
3523
3524
317
    case X86EMUL_OPC(0x0f, 0x20): /* mov cr,reg */
3525
320
    case X86EMUL_OPC(0x0f, 0x21): /* mov dr,reg */
3526
428
    case X86EMUL_OPC(0x0f, 0x22): /* mov reg,cr */
3527
430
    case X86EMUL_OPC(0x0f, 0x23): /* mov reg,dr */
3528
430
        generate_exception_if(!mode_ring0(), X86_EXC_GP, 0);
3529
426
        if ( b & 2 )
3530
106
        {
3531
            /* Write to CR/DR. */
3532
106
            typeof(ops->write_cr) write = (b & 1) ? ops->write_dr
3533
106
                                                  : ops->write_cr;
3534
3535
106
            fail_if(!write);
3536
105
            rc = write(modrm_reg, src.val, ctxt);
3537
105
        }
3538
320
        else
3539
320
        {
3540
            /* Read from CR/DR. */
3541
320
            typeof(ops->read_cr) read = (b & 1) ? ops->read_dr : ops->read_cr;
3542
3543
320
            fail_if(!read);
3544
318
            rc = read(modrm_reg, &dst.val, ctxt);
3545
318
        }
3546
423
        if ( rc != X86EMUL_OKAY )
3547
4
            goto done;
3548
419
        break;
3549
3550
419
#if !defined(X86EMUL_NO_MMX) && !defined(X86EMUL_NO_SIMD)
3551
3552
420
    case X86EMUL_OPC_66(0x0f, 0x2a):       /* cvtpi2pd mm/m64,xmm */
3553
420
        if ( ea.type == OP_REG )
3554
215
        {
3555
574
    case X86EMUL_OPC(0x0f, 0x2a):          /* cvtpi2ps mm/m64,xmm */
3556
1.61k
    CASE_SIMD_PACKED_FP(, 0x0f, 0x2c):     /* cvttp{s,d}2pi xmm/mem,mm */
3557
2.01k
    CASE_SIMD_PACKED_FP(, 0x0f, 0x2d):     /* cvtp{s,d}2pi xmm/mem,mm */
3558
2.01k
            host_and_vcpu_must_have(mmx);
3559
2.01k
        }
3560
1.24k
        op_bytes = (b & 4) && (vex.pfx & VEX_PREFIX_DOUBLE_MASK) ? 16 : 8;
3561
1.24k
        goto simd_0f_fp;
3562
3563
0
#endif /* !X86EMUL_NO_MMX && !X86EMUL_NO_SIMD */
3564
3565
2.90k
    CASE_SIMD_SCALAR_FP_VEX(0x0f, 0x2a):   /* {,v}cvtsi2s{s,d} r/m,xmm */
3566
2.90k
        if ( vex.opcx == vex_none )
3567
750
        {
3568
750
            if ( vex.pfx & VEX_PREFIX_DOUBLE_MASK )
3569
750
                vcpu_must_have(sse2);
3570
402
            else
3571
750
                vcpu_must_have(sse);
3572
750
            get_fpu(X86EMUL_FPU_xmm);
3573
750
        }
3574
162
        else
3575
162
        {
3576
162
            host_and_vcpu_must_have(avx);
3577
162
            get_fpu(X86EMUL_FPU_ymm);
3578
162
        }
3579
3580
909
        if ( ea.type == OP_MEM )
3581
202
        {
3582
202
            rc = read_ulong(ea.mem.seg, ea.mem.off, &src.val,
3583
202
                            rex_prefix & REX_W ? 8 : 4, ctxt, ops);
3584
202
            if ( rc != X86EMUL_OKAY )
3585
16
                goto done;
3586
202
        }
3587
707
        else
3588
707
            src.val = rex_prefix & REX_W ? *ea.reg : (uint32_t)*ea.reg;
3589
3590
893
        state->simd_size = simd_none;
3591
893
        goto simd_0f_rm;
3592
3593
0
#ifndef X86EMUL_NO_SIMD
3594
3595
1
    case X86EMUL_OPC_EVEX_F3(5, 0x2a):      /* vcvtsi2sh r/m,xmm,xmm */
3596
2
    case X86EMUL_OPC_EVEX_F3(5, 0x7b):      /* vcvtusi2sh r/m,xmm,xmm */
3597
2
        host_and_vcpu_must_have(avx512_fp16);
3598
        /* fall through */
3599
1
    CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x2a): /* vcvtsi2s{s,d} r/m,xmm,xmm */
3600
7
    CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x7b): /* vcvtusi2s{s,d} r/m,xmm,xmm */
3601
7
        generate_exception_if(evex.opmsk || (ea.type != OP_REG && evex.brs),
3602
7
                              X86_EXC_UD);
3603
2
        host_and_vcpu_must_have(avx512f);
3604
0
        if ( !evex.brs )
3605
0
            avx512_vlen_check(true);
3606
0
        get_fpu(X86EMUL_FPU_zmm);
3607
3608
0
        if ( ea.type == OP_MEM )
3609
0
        {
3610
0
            rc = read_ulong(ea.mem.seg, ea.mem.off, &src.val,
3611
0
                            rex_prefix & REX_W ? 8 : 4, ctxt, ops);
3612
0
            if ( rc != X86EMUL_OKAY )
3613
0
                goto done;
3614
0
        }
3615
0
        else
3616
0
            src.val = *ea.reg;
3617
3618
0
        opc = init_evex(stub);
3619
0
        opc[0] = b;
3620
        /* Convert memory/GPR source to %rAX. */
3621
0
        evex.b = 1;
3622
0
        if ( !mode_64bit() )
3623
0
            evex.w = 0;
3624
        /*
3625
         * While SDM version 085 has explicit wording towards embedded rounding
3626
         * being ignored, it's still not entirely unambiguous with the exception
3627
         * type referred to. Be on the safe side for the stub.
3628
         */
3629
0
        if ( !evex.w && evex.pfx == vex_f2 )
3630
0
        {
3631
0
            evex.brs = 0;
3632
0
            evex.lr = 0;
3633
0
        }
3634
0
        opc[1] = (modrm & 0x38) | 0xc0;
3635
0
        insn_bytes = EVEX_PFX_BYTES + 2;
3636
0
        opc[2] = 0xc3;
3637
3638
0
        copy_EVEX(opc, evex);
3639
0
        invoke_stub("", "", "=g" (dummy) : "a" (src.val));
3640
3641
0
        put_stub(stub);
3642
0
        state->simd_size = simd_none;
3643
0
        break;
3644
3645
1.64k
    CASE_SIMD_SCALAR_FP_VEX(0x0f, 0x2c):   /* {,v}cvtts{s,d}2si xmm/mem,reg */
3646
5.12k
    CASE_SIMD_SCALAR_FP_VEX(0x0f, 0x2d):   /* {,v}cvts{s,d}2si xmm/mem,reg */
3647
5.12k
        if ( vex.opcx == vex_none )
3648
1.01k
        {
3649
1.01k
            if ( vex.pfx & VEX_PREFIX_DOUBLE_MASK )
3650
1.01k
                vcpu_must_have(sse2);
3651
493
            else
3652
1.01k
                vcpu_must_have(sse);
3653
1.01k
            get_fpu(X86EMUL_FPU_xmm);
3654
1.01k
        }
3655
778
        else
3656
778
        {
3657
778
            generate_exception_if(vex.reg != 0xf, X86_EXC_UD);
3658
772
            host_and_vcpu_must_have(avx);
3659
772
            get_fpu(X86EMUL_FPU_ymm);
3660
3661
            /* Work around erratum BT230. */
3662
771
            vex.l = 0;
3663
771
        }
3664
3665
1.78k
        opc = init_prefixes(stub);
3666
1.78k
    cvts_2si:
3667
1.78k
        opc[0] = b;
3668
        /* Convert GPR destination to %rAX and memory operand to (%rCX). */
3669
1.78k
        rex_prefix &= ~REX_R;
3670
1.78k
        vex.r = 1;
3671
1.78k
        evex.r = 1;
3672
1.78k
        if ( ea.type == OP_MEM )
3673
1.20k
        {
3674
1.20k
            rex_prefix &= ~REX_B;
3675
1.20k
            vex.b = 1;
3676
1.20k
            evex.b = 1;
3677
1.20k
            opc[1] = 0x01;
3678
3679
1.20k
            rc = ops->read(ea.mem.seg, ea.mem.off, mmvalp,
3680
1.20k
                           vex.pfx & VEX_PREFIX_DOUBLE_MASK
3681
1.20k
                           ? 8 : 2 << !state->fp16,
3682
1.20k
                           ctxt);
3683
1.20k
            if ( rc != X86EMUL_OKAY )
3684
30
                goto done;
3685
1.20k
        }
3686
579
        else
3687
579
            opc[1] = modrm & 0xc7;
3688
1.75k
        if ( !mode_64bit() )
3689
1.08k
        {
3690
1.08k
            vex.w = 0;
3691
1.08k
            evex.w = 0;
3692
1.08k
        }
3693
1.75k
        if ( evex_encoded() )
3694
0
        {
3695
0
            insn_bytes = EVEX_PFX_BYTES + 2;
3696
0
            copy_EVEX(opc, evex);
3697
0
        }
3698
1.75k
        else
3699
1.75k
        {
3700
1.75k
            insn_bytes = PFX_BYTES + 2;
3701
1.75k
            copy_REX_VEX(opc, rex_prefix, vex);
3702
1.75k
        }
3703
1.75k
        opc[2] = 0xc3;
3704
3705
1.75k
        ea.reg = decode_gpr(&_regs, modrm_reg);
3706
1.75k
        invoke_stub("", "", "=a" (*ea.reg) : "c" (mmvalp), "m" (*mmvalp));
3707
3708
1.75k
        put_stub(stub);
3709
1.75k
        state->simd_size = simd_none;
3710
1.75k
        break;
3711
3712
1
    case X86EMUL_OPC_EVEX_F3(5, 0x2c):      /* vcvttsh2si xmm/mem,reg */
3713
2
    case X86EMUL_OPC_EVEX_F3(5, 0x2d):      /* vcvtsh2si xmm/mem,reg */
3714
3
    case X86EMUL_OPC_EVEX_F3(5, 0x78):      /* vcvttsh2usi xmm/mem,reg */
3715
4
    case X86EMUL_OPC_EVEX_F3(5, 0x79):      /* vcvtsh2usi xmm/mem,reg */
3716
4
        host_and_vcpu_must_have(avx512_fp16);
3717
        /* fall through */
3718
7
    CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x2c): /* vcvtts{s,d}2si xmm/mem,reg */
3719
13
    CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x2d): /* vcvts{s,d}2si xmm/mem,reg */
3720
18
    CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x78): /* vcvtts{s,d}2usi xmm/mem,reg */
3721
23
    CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x79): /* vcvts{s,d}2usi xmm/mem,reg */
3722
23
        generate_exception_if((evex.reg != 0xf || !evex.RX || !evex.R ||
3723
23
                               evex.opmsk ||
3724
23
                               (ea.type != OP_REG && evex.brs)),
3725
23
                              X86_EXC_UD);
3726
2
        host_and_vcpu_must_have(avx512f);
3727
0
        if ( !evex.brs )
3728
0
            avx512_vlen_check(true);
3729
0
        get_fpu(X86EMUL_FPU_zmm);
3730
0
        opc = init_evex(stub);
3731
0
        goto cvts_2si;
3732
3733
1.99k
    CASE_SIMD_PACKED_FP_VEX(0x0f, 0x2e):   /* {,v}ucomis{s,d} xmm/mem,xmm */
3734
5.17k
    CASE_SIMD_PACKED_FP_VEX(0x0f, 0x2f):   /* {,v}comis{s,d} xmm/mem,xmm */
3735
5.17k
        if ( vex.opcx == vex_none )
3736
781
        {
3737
781
            if ( vex.pfx )
3738
781
                vcpu_must_have(sse2);
3739
373
            else
3740
781
                vcpu_must_have(sse);
3741
781
            get_fpu(X86EMUL_FPU_xmm);
3742
781
        }
3743
885
        else
3744
885
        {
3745
885
            generate_exception_if(vex.reg != 0xf, X86_EXC_UD);
3746
881
            host_and_vcpu_must_have(avx);
3747
881
            get_fpu(X86EMUL_FPU_ymm);
3748
881
        }
3749
3750
1.66k
        opc = init_prefixes(stub);
3751
1.66k
        op_bytes = 4 << vex.pfx;
3752
1.66k
    vcomi:
3753
1.66k
        opc[0] = b;
3754
1.66k
        opc[1] = modrm;
3755
1.66k
        if ( ea.type == OP_MEM )
3756
1.24k
        {
3757
1.24k
            rc = ops->read(ea.mem.seg, ea.mem.off, mmvalp, op_bytes, ctxt);
3758
1.24k
            if ( rc != X86EMUL_OKAY )
3759
24
                goto done;
3760
3761
            /* Convert memory operand to (%rAX). */
3762
1.21k
            rex_prefix &= ~REX_B;
3763
1.21k
            vex.b = 1;
3764
1.21k
            evex.b = 1;
3765
1.21k
            opc[1] &= 0x38;
3766
1.21k
        }
3767
1.63k
        if ( evex_encoded() )
3768
0
        {
3769
0
            insn_bytes = EVEX_PFX_BYTES + 2;
3770
0
            copy_EVEX(opc, evex);
3771
0
        }
3772
1.63k
        else
3773
1.63k
        {
3774
1.63k
            insn_bytes = PFX_BYTES + 2;
3775
1.63k
            copy_REX_VEX(opc, rex_prefix, vex);
3776
1.63k
        }
3777
1.63k
        opc[2] = 0xc3;
3778
3779
1.63k
        _regs.eflags &= ~EFLAGS_MASK;
3780
1.63k
        invoke_stub("",
3781
1.63k
                    _POST_EFLAGS("[eflags]", "[mask]", "[tmp]"),
3782
1.63k
                    [eflags] "+g" (_regs.eflags),
3783
1.63k
                    [tmp] "=&r" (dummy), "+m" (*mmvalp)
3784
1.63k
                    : "a" (mmvalp), [mask] "i" (EFLAGS_MASK));
3785
3786
1.63k
        put_stub(stub);
3787
1.63k
        ASSERT(!state->simd_size);
3788
1.63k
        break;
3789
3790
1.63k
    case X86EMUL_OPC_EVEX(5, 0x2e): /* vucomish xmm/m16,xmm */
3791
2
    case X86EMUL_OPC_EVEX(5, 0x2f): /* vcomish xmm/m16,xmm */
3792
2
        host_and_vcpu_must_have(avx512_fp16);
3793
0
        generate_exception_if(evex.w, X86_EXC_UD);
3794
        /* fall through */
3795
5
    CASE_SIMD_PACKED_FP(_EVEX, 0x0f, 0x2e): /* vucomis{s,d} xmm/mem,xmm */
3796
14
    CASE_SIMD_PACKED_FP(_EVEX, 0x0f, 0x2f): /* vcomis{s,d} xmm/mem,xmm */
3797
14
        generate_exception_if((evex.reg != 0xf || !evex.RX || evex.opmsk ||
3798
14
                               (ea.type != OP_REG && evex.brs) ||
3799
14
                               evex.w != evex.pfx),
3800
14
                              X86_EXC_UD);
3801
1
        host_and_vcpu_must_have(avx512f);
3802
0
        if ( !evex.brs )
3803
0
            avx512_vlen_check(true);
3804
0
        get_fpu(X86EMUL_FPU_zmm);
3805
3806
0
        opc = init_evex(stub);
3807
0
        op_bytes = 2 << (!state->fp16 + evex.w);
3808
0
        goto vcomi;
3809
3810
0
#endif
3811
3812
1.39k
    case X86EMUL_OPC(0x0f, 0x30): /* wrmsr */
3813
1.39k
        generate_exception_if(!mode_ring0(), X86_EXC_GP, 0);
3814
1.38k
        fail_if(ops->write_msr == NULL);
3815
1.38k
        if ( (rc = ops->write_msr(_regs.ecx,
3816
1.38k
                                  ((uint64_t)_regs.r(dx) << 32) | _regs.eax,
3817
1.38k
                                  ctxt)) != 0 )
3818
118
            goto done;
3819
1.26k
        break;
3820
3821
1.26k
    case X86EMUL_OPC(0x0f, 0x31): rdtsc: /* rdtsc */
3822
522
        if ( !mode_ring0() )
3823
253
        {
3824
253
            fail_if(ops->read_cr == NULL);
3825
252
            if ( (rc = ops->read_cr(4, &cr4, ctxt)) )
3826
0
                goto done;
3827
252
            generate_exception_if(cr4 & X86_CR4_TSD, X86_EXC_GP, 0);
3828
252
        }
3829
520
        fail_if(ops->read_msr == NULL);
3830
519
        if ( (rc = ops->read_msr(MSR_IA32_TSC,
3831
519
                                 &msr_val, ctxt)) != X86EMUL_OKAY )
3832
21
            goto done;
3833
498
        _regs.r(dx) = msr_val >> 32;
3834
498
        _regs.r(ax) = (uint32_t)msr_val;
3835
498
        break;
3836
3837
329
    case X86EMUL_OPC(0x0f, 0x32): /* rdmsr */
3838
329
        generate_exception_if(!mode_ring0(), X86_EXC_GP, 0);
3839
325
        fail_if(ops->read_msr == NULL);
3840
324
        if ( (rc = ops->read_msr(_regs.ecx, &msr_val, ctxt)) != X86EMUL_OKAY )
3841
118
            goto done;
3842
206
        _regs.r(dx) = msr_val >> 32;
3843
206
        _regs.r(ax) = (uint32_t)msr_val;
3844
206
        break;
3845
3846
856
    case X86EMUL_OPC(0x0f, 0x34): /* sysenter */
3847
856
        vcpu_must_have(sep);
3848
856
        generate_exception_if(amd_like(ctxt) && ctxt->lma, X86_EXC_UD);
3849
808
        generate_exception_if(!in_protmode(ctxt, ops), X86_EXC_GP, 0);
3850
3851
807
        fail_if(ops->read_msr == NULL);
3852
807
        if ( (rc = ops->read_msr(MSR_IA32_SYSENTER_CS,
3853
807
                                 &msr_val, ctxt)) != X86EMUL_OKAY )
3854
0
            goto done;
3855
3856
807
        generate_exception_if(!(msr_val & 0xfffc), X86_EXC_GP, 0);
3857
3858
806
        _regs.eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF | X86_EFLAGS_RF);
3859
3860
806
        cs.sel = msr_val & ~3; /* SELECTOR_RPL_MASK */
3861
806
        cs.base = 0;   /* flat segment */
3862
806
        cs.limit = ~0u;  /* 4GB limit */
3863
806
        cs.attr = ctxt->lma ? 0xa9b  /* G+L+P+S+Code */
3864
806
                            : 0xc9b; /* G+DB+P+S+Code */
3865
3866
806
        sreg.sel = cs.sel + 8;
3867
806
        sreg.base = 0;   /* flat segment */
3868
806
        sreg.limit = ~0u;  /* 4GB limit */
3869
806
        sreg.attr = 0xc93; /* G+DB+P+S+Data */
3870
3871
806
        if ( (rc = ops->read_msr(MSR_IA32_SYSENTER_EIP,
3872
806
                                 &msr_val, ctxt)) != X86EMUL_OKAY )
3873
0
            goto done;
3874
806
        _regs.r(ip) = ctxt->lma ? msr_val : (uint32_t)msr_val;
3875
3876
806
        if ( (rc = ops->read_msr(MSR_IA32_SYSENTER_ESP,
3877
806
                                 &msr_val, ctxt)) != X86EMUL_OKAY )
3878
0
            goto done;
3879
806
        _regs.r(sp) = ctxt->lma ? msr_val : (uint32_t)msr_val;
3880
3881
806
        fail_if(!ops->write_segment);
3882
806
        if ( (rc = ops->write_segment(x86_seg_cs, &cs,
3883
806
                                      ctxt)) != X86EMUL_OKAY ||
3884
806
             (rc = ops->write_segment(x86_seg_ss, &sreg,
3885
803
                                      ctxt)) != X86EMUL_OKAY )
3886
4
            goto done;
3887
3888
802
        if ( ctxt->lma )
3889
            /* In particular mode_64bit() needs to return true from here on. */
3890
0
            ctxt->addr_size = ctxt->sp_size = 64;
3891
3892
802
        singlestep = _regs.eflags & X86_EFLAGS_TF;
3893
802
        break;
3894
3895
272
    case X86EMUL_OPC(0x0f, 0x35): /* sysexit */
3896
272
        vcpu_must_have(sep);
3897
272
        generate_exception_if(amd_like(ctxt) && ctxt->lma, X86_EXC_UD);
3898
213
        generate_exception_if(!mode_ring0(), X86_EXC_GP, 0);
3899
209
        generate_exception_if(!in_protmode(ctxt, ops), X86_EXC_GP, 0);
3900
3901
208
        fail_if(ops->read_msr == NULL);
3902
207
        if ( (rc = ops->read_msr(MSR_IA32_SYSENTER_CS,
3903
207
                                 &msr_val, ctxt)) != X86EMUL_OKAY )
3904
0
            goto done;
3905
3906
207
        generate_exception_if(!(msr_val & 0xfffc), X86_EXC_GP, 0);
3907
207
        generate_exception_if(op_bytes == 8 &&
3908
207
                              (!is_canonical_address(_regs.r(dx)) ||
3909
207
                               !is_canonical_address(_regs.r(cx))),
3910
207
                              X86_EXC_GP, 0);
3911
3912
207
        cs.sel = (msr_val | 3) + /* SELECTOR_RPL_MASK */
3913
207
                 (op_bytes == 8 ? 32 : 16);
3914
207
        cs.base = 0;   /* flat segment */
3915
207
        cs.limit = ~0u;  /* 4GB limit */
3916
207
        cs.attr = op_bytes == 8 ? 0xafb  /* L+DB+P+DPL3+S+Code */
3917
207
                                : 0xcfb; /* G+DB+P+DPL3+S+Code */
3918
3919
207
        sreg.sel = cs.sel + 8;
3920
207
        sreg.base = 0;   /* flat segment */
3921
207
        sreg.limit = ~0u;  /* 4GB limit */
3922
207
        sreg.attr = 0xcf3; /* G+DB+P+DPL3+S+Data */
3923
3924
207
        fail_if(ops->write_segment == NULL);
3925
206
        if ( (rc = ops->write_segment(x86_seg_cs, &cs, ctxt)) != 0 ||
3926
206
             (rc = ops->write_segment(x86_seg_ss, &sreg, ctxt)) != 0 )
3927
3
            goto done;
3928
3929
203
        _regs.r(ip) = op_bytes == 8 ? _regs.r(dx) : _regs.edx;
3930
203
        _regs.r(sp) = op_bytes == 8 ? _regs.r(cx) : _regs.ecx;
3931
3932
203
        singlestep = _regs.eflags & X86_EFLAGS_TF;
3933
203
        break;
3934
3935
3.34k
    case X86EMUL_OPC(0x0f, 0x40) ... X86EMUL_OPC(0x0f, 0x4f): /* cmovcc */
3936
3.34k
        vcpu_must_have(cmov);
3937
3.34k
        if ( test_cc(b, _regs.eflags) )
3938
1.58k
            dst.val = src.val;
3939
3.34k
        break;
3940
3941
0
#ifndef X86EMUL_NO_SIMD
3942
3943
10
    case X86EMUL_OPC_VEX(0x0f, 0x4a):    /* kadd{w,q} k,k,k */
3944
10
        if ( !vex.w )
3945
10
            host_and_vcpu_must_have(avx512dq);
3946
        /* fall through */
3947
8
    case X86EMUL_OPC_VEX(0x0f, 0x41):    /* kand{w,q} k,k,k */
3948
11
    case X86EMUL_OPC_VEX_66(0x0f, 0x41): /* kand{b,d} k,k,k */
3949
13
    case X86EMUL_OPC_VEX(0x0f, 0x42):    /* kandn{w,q} k,k,k */
3950
14
    case X86EMUL_OPC_VEX_66(0x0f, 0x42): /* kandn{b,d} k,k,k */
3951
16
    case X86EMUL_OPC_VEX(0x0f, 0x45):    /* kor{w,q} k,k,k */
3952
17
    case X86EMUL_OPC_VEX_66(0x0f, 0x45): /* kor{b,d} k,k,k */
3953
18
    case X86EMUL_OPC_VEX(0x0f, 0x46):    /* kxnor{w,q} k,k,k */
3954
19
    case X86EMUL_OPC_VEX_66(0x0f, 0x46): /* kxnor{b,d} k,k,k */
3955
21
    case X86EMUL_OPC_VEX(0x0f, 0x47):    /* kxor{w,q} k,k,k */
3956
22
    case X86EMUL_OPC_VEX_66(0x0f, 0x47): /* kxor{b,d} k,k,k */
3957
25
    case X86EMUL_OPC_VEX_66(0x0f, 0x4a): /* kadd{b,d} k,k,k */
3958
25
        generate_exception_if(!vex.l, X86_EXC_UD);
3959
16
    opmask_basic:
3960
16
        if ( vex.w )
3961
16
            host_and_vcpu_must_have(avx512bw);
3962
6
        else if ( vex.pfx )
3963
1
            host_and_vcpu_must_have(avx512dq);
3964
9
    opmask_common:
3965
9
        host_and_vcpu_must_have(avx512f);
3966
0
        generate_exception_if(!vex.r || (mode_64bit() && !(vex.reg & 8)) ||
3967
0
                              ea.type != OP_REG, X86_EXC_UD);
3968
3969
0
        vex.reg |= 8;
3970
0
        d &= ~TwoOp;
3971
3972
0
        get_fpu(X86EMUL_FPU_opmask);
3973
3974
0
        opc = init_prefixes(stub);
3975
0
        opc[0] = b;
3976
0
        opc[1] = modrm;
3977
0
        insn_bytes = PFX_BYTES + 2;
3978
3979
0
        state->simd_size = simd_other;
3980
0
        op_bytes = 1; /* Any non-zero value will do. */
3981
0
        break;
3982
3983
3
    case X86EMUL_OPC_VEX(0x0f, 0x44):    /* knot{w,q} k,k */
3984
6
    case X86EMUL_OPC_VEX_66(0x0f, 0x44): /* knot{b,d} k,k */
3985
6
        generate_exception_if(vex.l || vex.reg != 0xf, X86_EXC_UD);
3986
1
        goto opmask_basic;
3987
3988
2
    case X86EMUL_OPC_VEX(0x0f, 0x4b):    /* kunpck{w,d}{d,q} k,k,k */
3989
2
        generate_exception_if(!vex.l, X86_EXC_UD);
3990
1
        host_and_vcpu_must_have(avx512bw);
3991
0
        goto opmask_common;
3992
3993
10
    case X86EMUL_OPC_VEX_66(0x0f, 0x4b): /* kunpckbw k,k,k */
3994
10
        generate_exception_if(!vex.l || vex.w, X86_EXC_UD);
3995
4
        goto opmask_common;
3996
3997
4
#endif /* X86EMUL_NO_SIMD */
3998
3999
3.04k
    CASE_SIMD_PACKED_FP_VEX(0x0f, 0x50):   /* movmskp{s,d} xmm,reg */
4000
                                           /* vmovmskp{s,d} {x,y}mm,reg */
4001
3.60k
    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xd7):  /* pmovmskb {,x}mm,reg */
4002
                                           /* vpmovmskb {x,y}mm,reg */
4003
3.60k
        opc = init_prefixes(stub);
4004
0
        opc[0] = b;
4005
        /* Convert GPR destination to %rAX. */
4006
1.31k
        rex_prefix &= ~REX_R;
4007
1.31k
        vex.r = 1;
4008
1.31k
        if ( !mode_64bit() )
4009
790
            vex.w = 0;
4010
1.31k
        opc[1] = modrm & 0xc7;
4011
1.31k
        insn_bytes = PFX_BYTES + 2;
4012
1.94k
    simd_0f_to_gpr:
4013
1.94k
        opc[insn_bytes - PFX_BYTES] = 0xc3;
4014
4015
1.94k
        generate_exception_if(ea.type != OP_REG, X86_EXC_UD);
4016
4017
1.91k
        if ( vex.opcx == vex_none )
4018
1.25k
        {
4019
1.25k
            if ( vex.pfx & VEX_PREFIX_DOUBLE_MASK )
4020
1.25k
                vcpu_must_have(sse2);
4021
681
            else
4022
681
            {
4023
681
                if ( b != 0x50 )
4024
189
                {
4025
189
                    host_and_vcpu_must_have(mmx);
4026
189
                    vcpu_must_have(mmxext);
4027
189
                }
4028
492
                else
4029
681
                    vcpu_must_have(sse);
4030
681
            }
4031
1.25k
            if ( b == 0x50 || (vex.pfx & VEX_PREFIX_DOUBLE_MASK) )
4032
1.06k
                get_fpu(X86EMUL_FPU_xmm);
4033
189
            else
4034
189
                get_fpu(X86EMUL_FPU_mmx);
4035
1.25k
        }
4036
661
        else
4037
661
        {
4038
661
            generate_exception_if(vex.reg != 0xf, X86_EXC_UD);
4039
657
            if ( b == 0x50 || !vex.l )
4040
657
                host_and_vcpu_must_have(avx);
4041
63
            else
4042
657
                host_and_vcpu_must_have(avx2);
4043
657
            get_fpu(X86EMUL_FPU_ymm);
4044
657
        }
4045
4046
1.91k
        copy_REX_VEX(opc, rex_prefix, vex);
4047
1.91k
        invoke_stub("", "", "=a" (dst.val) : [dummy] "i" (0));
4048
4049
1.91k
        put_stub(stub);
4050
4051
1.91k
        ASSERT(!state->simd_size);
4052
1.91k
        dst.bytes = 4;
4053
1.91k
        break;
4054
4055
3
    CASE_SIMD_PACKED_FP(_EVEX, 0x0f, 0x54): /* vandp{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
4056
7
    CASE_SIMD_PACKED_FP(_EVEX, 0x0f, 0x55): /* vandnp{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
4057
11
    CASE_SIMD_PACKED_FP(_EVEX, 0x0f, 0x56): /* vorp{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
4058
19
    CASE_SIMD_PACKED_FP(_EVEX, 0x0f, 0x57): /* vxorp{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
4059
19
        generate_exception_if((evex.w != (evex.pfx & VEX_PREFIX_DOUBLE_MASK) ||
4060
19
                               (ea.type != OP_MEM && evex.brs)),
4061
19
                              X86_EXC_UD);
4062
5
        host_and_vcpu_must_have(avx512dq);
4063
0
        avx512_vlen_check(false);
4064
0
        goto simd_zmm;
4065
4066
4.35k
    CASE_SIMD_ALL_FP_VEX(0x0f, 0x5a):      /* cvt{p,s}{s,d}2{p,s}{s,d} xmm/mem,xmm */
4067
                                           /* vcvtp{s,d}2p{s,d} {x,y}mm/mem,{x,y}mm */
4068
                                           /* vcvts{s,d}2s{s,d} xmm/mem,xmm,xmm */
4069
4.35k
        op_bytes = 4 << (((vex.pfx & VEX_PREFIX_SCALAR_MASK) ? 0 : 1 + vex.l) +
4070
4.35k
                         !!(vex.pfx & VEX_PREFIX_DOUBLE_MASK));
4071
4.35k
    simd_0f_cvt:
4072
2.08k
        if ( vex.opcx == vex_none )
4073
1.22k
            goto simd_0f_sse2;
4074
852
        goto simd_0f_avx;
4075
4076
852
    CASE_SIMD_ALL_FP(_EVEX, 0x0f, 0x5a):   /* vcvtp{s,d}2p{s,d} [xyz]mm/mem,[xyz]mm{k} */
4077
                                           /* vcvts{s,d}2s{s,d} xmm/mem,xmm,xmm{k} */
4078
5
        op_bytes = 4 << (((evex.pfx & VEX_PREFIX_SCALAR_MASK) ? 0 : 1 + evex.lr) +
4079
5
                         evex.w);
4080
5
        goto avx512f_all_fp;
4081
4082
0
#ifndef X86EMUL_NO_SIMD
4083
4084
2.17k
    CASE_SIMD_PACKED_FP_VEX(0x0f, 0x5b):   /* cvt{ps,dq}2{dq,ps} xmm/mem,xmm */
4085
                                           /* vcvt{ps,dq}2{dq,ps} {x,y}mm/mem,{x,y}mm */
4086
2.17k
    case X86EMUL_OPC_F3(0x0f, 0x5b):       /* cvttps2dq xmm/mem,xmm */
4087
931
    case X86EMUL_OPC_VEX_F3(0x0f, 0x5b):   /* vcvttps2dq {x,y}mm/mem,{x,y}mm */
4088
931
        d |= TwoOp;
4089
931
        op_bytes = 16 << vex.l;
4090
931
        goto simd_0f_cvt;
4091
4092
2
    case X86EMUL_OPC_EVEX_66(0x0f, 0x5b): /* vcvtps2dq [xyz]mm/mem,[xyz]mm{k} */
4093
3
    case X86EMUL_OPC_EVEX_F3(0x0f, 0x5b): /* vcvttps2dq [xyz]mm/mem,[xyz]mm{k} */
4094
3
        generate_exception_if(evex.w, X86_EXC_UD);
4095
        /* fall through */
4096
4
    case X86EMUL_OPC_EVEX(0x0f, 0x5b):    /* vcvtdq2ps [xyz]mm/mem,[xyz]mm{k} */
4097
                                          /* vcvtqq2ps [xyz]mm/mem,{x,y}mm{k} */
4098
5
    case X86EMUL_OPC_EVEX_F2(0x0f, 0x7a): /* vcvtudq2ps [xyz]mm/mem,[xyz]mm{k} */
4099
                                          /* vcvtuqq2ps [xyz]mm/mem,{x,y}mm{k} */
4100
5
        if ( evex.w )
4101
5
            host_and_vcpu_must_have(avx512dq);
4102
4
        else
4103
4
        {
4104
5
    case X86EMUL_OPC_EVEX(0x0f, 0x78):    /* vcvttp{s,d}2udq [xyz]mm/mem,[xyz]mm{k} */
4105
6
    case X86EMUL_OPC_EVEX(0x0f, 0x79):    /* vcvtp{s,d}2udq [xyz]mm/mem,[xyz]mm{k} */
4106
6
            host_and_vcpu_must_have(avx512f);
4107
6
        }
4108
0
        if ( ea.type != OP_REG || !evex.brs )
4109
0
            avx512_vlen_check(false);
4110
0
        d |= TwoOp;
4111
0
        op_bytes = 16 << evex.lr;
4112
0
        goto simd_zmm;
4113
4114
0
#endif /* !X86EMUL_NO_SIMD */
4115
4116
810
    CASE_SIMD_PACKED_INT_VEX(0x0f, 0x60): /* punpcklbw {,x}mm/mem,{,x}mm */
4117
                                          /* vpunpcklbw {x,y}mm/mem,{x,y}mm,{x,y}mm */
4118
2.22k
    CASE_SIMD_PACKED_INT_VEX(0x0f, 0x61): /* punpcklwd {,x}mm/mem,{,x}mm */
4119
                                          /* vpunpcklwd {x,y}mm/mem,{x,y}mm,{x,y}mm */
4120
3.71k
    CASE_SIMD_PACKED_INT_VEX(0x0f, 0x62): /* punpckldq {,x}mm/mem,{,x}mm */
4121
                                          /* vpunpckldq {x,y}mm/mem,{x,y}mm,{x,y}mm */
4122
4.72k
    CASE_SIMD_PACKED_INT_VEX(0x0f, 0x68): /* punpckhbw {,x}mm/mem,{,x}mm */
4123
                                          /* vpunpckhbw {x,y}mm/mem,{x,y}mm,{x,y}mm */
4124
5.70k
    CASE_SIMD_PACKED_INT_VEX(0x0f, 0x69): /* punpckhwd {,x}mm/mem,{,x}mm */
4125
                                          /* vpunpckhwd {x,y}mm/mem,{x,y}mm,{x,y}mm */
4126
6.40k
    CASE_SIMD_PACKED_INT_VEX(0x0f, 0x6a): /* punpckhdq {,x}mm/mem,{,x}mm */
4127
                                          /* vpunpckhdq {x,y}mm/mem,{x,y}mm,{x,y}mm */
4128
6.40k
        op_bytes = vex.pfx ? 16 << vex.l : b & 8 ? 8 : 4;
4129
        /* fall through */
4130
7.90k
    CASE_SIMD_PACKED_INT_VEX(0x0f, 0x63): /* packssbw {,x}mm/mem,{,x}mm */
4131
                                          /* vpackssbw {x,y}mm/mem,{x,y}mm,{x,y}mm */
4132
9.34k
    CASE_SIMD_PACKED_INT_VEX(0x0f, 0x64): /* pcmpgtb {,x}mm/mem,{,x}mm */
4133
                                          /* vpcmpgtb {x,y}mm/mem,{x,y}mm,{x,y}mm */
4134
10.7k
    CASE_SIMD_PACKED_INT_VEX(0x0f, 0x65): /* pcmpgtw {,x}mm/mem,{,x}mm */
4135
                                          /* vpcmpgtw {x,y}mm/mem,{x,y}mm,{x,y}mm */
4136
11.9k
    CASE_SIMD_PACKED_INT_VEX(0x0f, 0x66): /* pcmpgtd {,x}mm/mem,{,x}mm */
4137
                                          /* vpcmpgtd {x,y}mm/mem,{x,y}mm,{x,y}mm */
4138
13.2k
    CASE_SIMD_PACKED_INT_VEX(0x0f, 0x67): /* packusbw {,x}mm/mem,{,x}mm */
4139
                                          /* vpackusbw {x,y}mm/mem,{x,y}mm,{x,y}mm */
4140
14.4k
    CASE_SIMD_PACKED_INT_VEX(0x0f, 0x6b): /* packsswd {,x}mm/mem,{,x}mm */
4141
                                          /* vpacksswd {x,y}mm/mem,{x,y}mm,{x,y}mm */
4142
14.4k
#ifndef X86EMUL_NO_SIMD
4143
14.4k
    case X86EMUL_OPC_66(0x0f, 0x6c):     /* punpcklqdq xmm/m128,xmm */
4144
5.01k
    case X86EMUL_OPC_VEX_66(0x0f, 0x6c): /* vpunpcklqdq {x,y}mm/mem,{x,y}mm,{x,y}mm */
4145
5.08k
    case X86EMUL_OPC_66(0x0f, 0x6d):     /* punpckhqdq xmm/m128,xmm */
4146
5.15k
    case X86EMUL_OPC_VEX_66(0x0f, 0x6d): /* vpunpckhqdq {x,y}mm/mem,{x,y}mm,{x,y}mm */
4147
17.2k
    CASE_SIMD_PACKED_INT_VEX(0x0f, 0x74): /* pcmpeqb {,x}mm/mem,{,x}mm */
4148
                                          /* vpcmpeqb {x,y}mm/mem,{x,y}mm,{x,y}mm */
4149
19.4k
    CASE_SIMD_PACKED_INT_VEX(0x0f, 0x75): /* pcmpeqw {,x}mm/mem,{,x}mm */
4150
                                          /* vpcmpeqw {x,y}mm/mem,{x,y}mm,{x,y}mm */
4151
20.6k
    CASE_SIMD_PACKED_INT_VEX(0x0f, 0x76): /* pcmpeqd {,x}mm/mem,{,x}mm */
4152
                                          /* vpcmpeqd {x,y}mm/mem,{x,y}mm,{x,y}mm */
4153
21.5k
    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xd1): /* psrlw {,x}mm/mem,{,x}mm */
4154
                                          /* vpsrlw xmm/m128,{x,y}mm,{x,y}mm */
4155
23.6k
    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xd2): /* psrld {,x}mm/mem,{,x}mm */
4156
                                          /* vpsrld xmm/m128,{x,y}mm,{x,y}mm */
4157
26.1k
    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xd3): /* psrlq {,x}mm/mem,{,x}mm */
4158
                                          /* vpsrlq xmm/m128,{x,y}mm,{x,y}mm */
4159
26.1k
    case X86EMUL_OPC_66(0x0f, 0xd4):     /* paddq xmm/m128,xmm */
4160
9.38k
    case X86EMUL_OPC_VEX_66(0x0f, 0xd4): /* vpaddq {x,y}mm/mem,{x,y}mm,{x,y}mm */
4161
29.0k
    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xd5): /* pmullw {,x}mm/mem,{,x}mm */
4162
                                          /* vpmullw {x,y}mm/mem,{x,y}mm,{x,y}mm */
4163
29.9k
    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xd8): /* psubusb {,x}mm/mem,{,x}mm */
4164
                                          /* vpsubusb {x,y}mm/mem,{x,y}mm,{x,y}mm */
4165
31.1k
    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xd9): /* psubusw {,x}mm/mem,{,x}mm */
4166
                                          /* vpsubusw {x,y}mm/mem,{x,y}mm,{x,y}mm */
4167
31.1k
    case X86EMUL_OPC_66(0x0f, 0xda):     /* pminub xmm/m128,xmm */
4168
10.7k
    case X86EMUL_OPC_VEX_66(0x0f, 0xda): /* vpminub {x,y}mm/mem,{x,y}mm,{x,y}mm */
4169
33.9k
    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xdb): /* pand {,x}mm/mem,{,x}mm */
4170
                                          /* vpand {x,y}mm/mem,{x,y}mm,{x,y}mm */
4171
35.0k
    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xdc): /* paddusb {,x}mm/mem,{,x}mm */
4172
                                          /* vpaddusb {x,y}mm/mem,{x,y}mm,{x,y}mm */
4173
36.7k
    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xdd): /* paddusw {,x}mm/mem,{,x}mm */
4174
                                          /* vpaddusw {x,y}mm/mem,{x,y}mm,{x,y}mm */
4175
36.7k
    case X86EMUL_OPC_66(0x0f, 0xde):     /* pmaxub xmm/m128,xmm */
4176
12.5k
    case X86EMUL_OPC_VEX_66(0x0f, 0xde): /* vpmaxub {x,y}mm/mem,{x,y}mm,{x,y}mm */
4177
38.8k
    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xdf): /* pandn {,x}mm/mem,{,x}mm */
4178
                                          /* vpandn {x,y}mm/mem,{x,y}mm,{x,y}mm */
4179
38.8k
    case X86EMUL_OPC_66(0x0f, 0xe0):     /* pavgb xmm/m128,xmm */
4180
13.1k
    case X86EMUL_OPC_VEX_66(0x0f, 0xe0): /* vpavgb {x,y}mm/mem,{x,y}mm,{x,y}mm */
4181
39.7k
    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xe1): /* psraw {,x}mm/mem,{,x}mm */
4182
                                          /* vpsraw xmm/m128,{x,y}mm,{x,y}mm */
4183
40.3k
    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xe2): /* psrad {,x}mm/mem,{,x}mm */
4184
                                          /* vpsrad xmm/m128,{x,y}mm,{x,y}mm */
4185
40.3k
    case X86EMUL_OPC_66(0x0f, 0xe3):     /* pavgw xmm/m128,xmm */
4186
13.8k
    case X86EMUL_OPC_VEX_66(0x0f, 0xe3): /* vpavgw {x,y}mm/mem,{x,y}mm,{x,y}mm */
4187
13.9k
    case X86EMUL_OPC_66(0x0f, 0xe4):     /* pmulhuw xmm/m128,xmm */
4188
14.0k
    case X86EMUL_OPC_VEX_66(0x0f, 0xe4): /* vpmulhuw {x,y}mm/mem,{x,y}mm,{x,y}mm */
4189
42.4k
    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xe5): /* pmulhw {,x}mm/mem,{,x}mm */
4190
                                          /* vpmulhw {x,y}mm/mem,{x,y}mm,{x,y}mm */
4191
43.3k
    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xe8): /* psubsb {,x}mm/mem,{,x}mm */
4192
                                          /* vpsubsb {x,y}mm/mem,{x,y}mm,{x,y}mm */
4193
44.5k
    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xe9): /* psubsw {,x}mm/mem,{,x}mm */
4194
                                          /* vpsubsw {x,y}mm/mem,{x,y}mm,{x,y}mm */
4195
44.5k
    case X86EMUL_OPC_66(0x0f, 0xea):     /* pminsw xmm/m128,xmm */
4196
15.3k
    case X86EMUL_OPC_VEX_66(0x0f, 0xea): /* vpminsw {x,y}mm/mem,{x,y}mm,{x,y}mm */
4197
47.5k
    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xeb): /* por {,x}mm/mem,{,x}mm */
4198
                                          /* vpor {x,y}mm/mem,{x,y}mm,{x,y}mm */
4199
48.6k
    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xec): /* paddsb {,x}mm/mem,{,x}mm */
4200
                                          /* vpaddsb {x,y}mm/mem,{x,y}mm,{x,y}mm */
4201
49.4k
    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xed): /* paddsw {,x}mm/mem,{,x}mm */
4202
                                          /* vpaddsw {x,y}mm/mem,{x,y}mm,{x,y}mm */
4203
49.4k
    case X86EMUL_OPC_66(0x0f, 0xee):     /* pmaxsw xmm/m128,xmm */
4204
16.9k
    case X86EMUL_OPC_VEX_66(0x0f, 0xee): /* vpmaxsw {x,y}mm/mem,{x,y}mm,{x,y}mm */
4205
51.8k
    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xef): /* pxor {,x}mm/mem,{,x}mm */
4206
                                          /* vpxor {x,y}mm/mem,{x,y}mm,{x,y}mm */
4207
53.2k
    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xf1): /* psllw {,x}mm/mem,{,x}mm */
4208
                                          /* vpsllw xmm/m128,{x,y}mm,{x,y}mm */
4209
54.1k
    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xf2): /* pslld {,x}mm/mem,{,x}mm */
4210
                                          /* vpslld xmm/m128,{x,y}mm,{x,y}mm */
4211
55.6k
    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xf3): /* psllq {,x}mm/mem,{,x}mm */
4212
                                          /* vpsllq xmm/m128,{x,y}mm,{x,y}mm */
4213
55.6k
    case X86EMUL_OPC_66(0x0f, 0xf4):     /* pmuludq xmm/m128,xmm */
4214
19.0k
    case X86EMUL_OPC_VEX_66(0x0f, 0xf4): /* vpmuludq {x,y}mm/mem,{x,y}mm,{x,y}mm */
4215
57.6k
    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xf5): /* pmaddwd {,x}mm/mem,{,x}mm */
4216
                                          /* vpmaddwd {x,y}mm/mem,{x,y}mm,{x,y}mm */
4217
57.6k
    case X86EMUL_OPC_66(0x0f, 0xf6):     /* psadbw xmm/m128,xmm */
4218
19.6k
    case X86EMUL_OPC_VEX_66(0x0f, 0xf6): /* vpsadbw {x,y}mm/mem,{x,y}mm,{x,y}mm */
4219
59.7k
    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xf8): /* psubb {,x}mm/mem,{,x}mm */
4220
                                          /* vpsubb {x,y}mm/mem,{x,y}mm,{x,y}mm */
4221
60.7k
    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xf9): /* psubw {,x}mm/mem,{,x}mm */
4222
                                          /* vpsubw {x,y}mm/mem,{x,y}mm,{x,y}mm */
4223
63.2k
    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xfa): /* psubd {,x}mm/mem,{,x}mm */
4224
                                          /* vpsubd {x,y}mm/mem,{x,y}mm,{x,y}mm */
4225
63.2k
    case X86EMUL_OPC_66(0x0f, 0xfb):     /* psubq xmm/m128,xmm */
4226
21.3k
    case X86EMUL_OPC_VEX_66(0x0f, 0xfb): /* vpsubq {x,y}mm/mem,{x,y}mm,{x,y}mm */
4227
21.3k
#endif /* !X86EMUL_NO_SIMD */
4228
64.5k
    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xfc): /* paddb {,x}mm/mem,{,x}mm */
4229
                                          /* vpaddb {x,y}mm/mem,{x,y}mm,{x,y}mm */
4230
66.1k
    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xfd): /* paddw {,x}mm/mem,{,x}mm */
4231
                                          /* vpaddw {x,y}mm/mem,{x,y}mm,{x,y}mm */
4232
66.8k
    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xfe): /* paddd {,x}mm/mem,{,x}mm */
4233
                                          /* vpaddd {x,y}mm/mem,{x,y}mm,{x,y}mm */
4234
66.8k
    simd_0f_int:
4235
24.6k
#ifndef X86EMUL_NO_SIMD
4236
24.6k
        if ( vex.opcx != vex_none )
4237
7.17k
        {
4238
7.26k
    case X86EMUL_OPC_VEX_66(0x0f38, 0x00): /* vpshufb {x,y}mm/mem,{x,y}mm,{x,y}mm */
4239
7.32k
    case X86EMUL_OPC_VEX_66(0x0f38, 0x01): /* vphaddw {x,y}mm/mem,{x,y}mm,{x,y}mm */
4240
7.39k
    case X86EMUL_OPC_VEX_66(0x0f38, 0x02): /* vphaddd {x,y}mm/mem,{x,y}mm,{x,y}mm */
4241
7.45k
    case X86EMUL_OPC_VEX_66(0x0f38, 0x03): /* vphaddsw {x,y}mm/mem,{x,y}mm,{x,y}mm */
4242
7.52k
    case X86EMUL_OPC_VEX_66(0x0f38, 0x04): /* vpmaddubsw {x,y}mm/mem,{x,y}mm,{x,y}mm */
4243
7.54k
    case X86EMUL_OPC_VEX_66(0x0f38, 0x05): /* vphsubw {x,y}mm/mem,{x,y}mm,{x,y}mm */
4244
7.60k
    case X86EMUL_OPC_VEX_66(0x0f38, 0x06): /* vphsubd {x,y}mm/mem,{x,y}mm,{x,y}mm */
4245
7.67k
    case X86EMUL_OPC_VEX_66(0x0f38, 0x07): /* vphsubsw {x,y}mm/mem,{x,y}mm,{x,y}mm */
4246
7.74k
    case X86EMUL_OPC_VEX_66(0x0f38, 0x08): /* vpsignb {x,y}mm/mem,{x,y}mm,{x,y}mm */
4247
7.80k
    case X86EMUL_OPC_VEX_66(0x0f38, 0x09): /* vpsignw {x,y}mm/mem,{x,y}mm,{x,y}mm */
4248
7.87k
    case X86EMUL_OPC_VEX_66(0x0f38, 0x0a): /* vpsignd {x,y}mm/mem,{x,y}mm,{x,y}mm */
4249
7.94k
    case X86EMUL_OPC_VEX_66(0x0f38, 0x0b): /* vpmulhrsw {x,y}mm/mem,{x,y}mm,{x,y}mm */
4250
8.00k
    case X86EMUL_OPC_VEX_66(0x0f38, 0x1c): /* vpabsb {x,y}mm/mem,{x,y}mm */
4251
8.07k
    case X86EMUL_OPC_VEX_66(0x0f38, 0x1d): /* vpabsw {x,y}mm/mem,{x,y}mm */
4252
8.13k
    case X86EMUL_OPC_VEX_66(0x0f38, 0x1e): /* vpabsd {x,y}mm/mem,{x,y}mm */
4253
8.20k
    case X86EMUL_OPC_VEX_66(0x0f38, 0x28): /* vpmuldq {x,y}mm/mem,{x,y}mm,{x,y}mm */
4254
8.27k
    case X86EMUL_OPC_VEX_66(0x0f38, 0x29): /* vpcmpeqq {x,y}mm/mem,{x,y}mm,{x,y}mm */
4255
8.33k
    case X86EMUL_OPC_VEX_66(0x0f38, 0x2b): /* vpackusdw {x,y}mm/mem,{x,y}mm,{x,y}mm */
4256
8.41k
    case X86EMUL_OPC_VEX_66(0x0f38, 0x37): /* vpcmpgtq {x,y}mm/mem,{x,y}mm,{x,y}mm */
4257
8.47k
    case X86EMUL_OPC_VEX_66(0x0f38, 0x38): /* vpminsb {x,y}mm/mem,{x,y}mm,{x,y}mm */
4258
8.54k
    case X86EMUL_OPC_VEX_66(0x0f38, 0x39): /* vpminsd {x,y}mm/mem,{x,y}mm,{x,y}mm */
4259
8.61k
    case X86EMUL_OPC_VEX_66(0x0f38, 0x3a): /* vpminub {x,y}mm/mem,{x,y}mm,{x,y}mm */
4260
8.67k
    case X86EMUL_OPC_VEX_66(0x0f38, 0x3b): /* vpminud {x,y}mm/mem,{x,y}mm,{x,y}mm */
4261
8.76k
    case X86EMUL_OPC_VEX_66(0x0f38, 0x3c): /* vpmaxsb {x,y}mm/mem,{x,y}mm,{x,y}mm */
4262
8.83k
    case X86EMUL_OPC_VEX_66(0x0f38, 0x3d): /* vpmaxsd {x,y}mm/mem,{x,y}mm,{x,y}mm */
4263
8.90k
    case X86EMUL_OPC_VEX_66(0x0f38, 0x3e): /* vpmaxub {x,y}mm/mem,{x,y}mm,{x,y}mm */
4264
8.96k
    case X86EMUL_OPC_VEX_66(0x0f38, 0x3f): /* vpmaxud {x,y}mm/mem,{x,y}mm,{x,y}mm */
4265
9.16k
    case X86EMUL_OPC_VEX_66(0x0f38, 0x40): /* vpmulld {x,y}mm/mem,{x,y}mm,{x,y}mm */
4266
9.16k
            if ( !vex.l )
4267
7.51k
                goto simd_0f_avx;
4268
            /* fall through */
4269
1.71k
    case X86EMUL_OPC_VEX_66(0x0f38, 0x45): /* vpsrlv{d,q} {x,y}mm/mem,{x,y}mm,{x,y}mm */
4270
1.78k
    case X86EMUL_OPC_VEX_66(0x0f38, 0x47): /* vpsllv{d,q} {x,y}mm/mem,{x,y}mm,{x,y}mm */
4271
2.42k
    simd_0f_avx2:
4272
2.42k
            host_and_vcpu_must_have(avx2);
4273
2.42k
            goto simd_0f_ymm;
4274
2.42k
        }
4275
17.4k
        if ( vex.pfx )
4276
9.15k
            goto simd_0f_sse2;
4277
8.31k
#endif /* !X86EMUL_NO_SIMD */
4278
10.9k
    simd_0f_mmx:
4279
10.9k
        host_and_vcpu_must_have(mmx);
4280
10.9k
        get_fpu(X86EMUL_FPU_mmx);
4281
10.8k
        goto simd_0f_common;
4282
4283
10.8k
#ifndef X86EMUL_NO_SIMD
4284
4285
10.8k
    case X86EMUL_OPC_EVEX_66(0x0f, 0xf6): /* vpsadbw [xyz]mm/mem,[xyz]mm,[xyz]mm */
4286
2
        generate_exception_if(evex.opmsk, X86_EXC_UD);
4287
        /* fall through */
4288
2
    case X86EMUL_OPC_EVEX_66(0x0f, 0x60): /* vpunpcklbw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
4289
3
    case X86EMUL_OPC_EVEX_66(0x0f, 0x61): /* vpunpcklwd [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
4290
4
    case X86EMUL_OPC_EVEX_66(0x0f, 0x68): /* vpunpckhbw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
4291
5
    case X86EMUL_OPC_EVEX_66(0x0f, 0x69): /* vpunpckhwd [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
4292
5
        op_bytes = 16 << evex.lr;
4293
        /* fall through */
4294
6
    case X86EMUL_OPC_EVEX_66(0x0f, 0x63): /* vpacksswb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
4295
7
    case X86EMUL_OPC_EVEX_66(0x0f, 0x67): /* vpackuswb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
4296
8
    case X86EMUL_OPC_EVEX_66(0x0f, 0xd1): /* vpsrlw xmm/m128,[xyz]mm,[xyz]mm{k} */
4297
9
    case X86EMUL_OPC_EVEX_66(0x0f, 0xe1): /* vpsraw xmm/m128,[xyz]mm,[xyz]mm{k} */
4298
10
    case X86EMUL_OPC_EVEX_66(0x0f, 0xf1): /* vpsllw xmm/m128,[xyz]mm,[xyz]mm{k} */
4299
11
    case X86EMUL_OPC_EVEX_66(0x0f, 0xf5): /* vpmaddwd [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
4300
12
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x00): /* vpshufb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
4301
13
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x04): /* vpmaddubsw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
4302
13
        fault_suppression = false;
4303
        /* fall through */
4304
14
    case X86EMUL_OPC_EVEX_66(0x0f, 0xd5): /* vpmullw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
4305
15
    case X86EMUL_OPC_EVEX_66(0x0f, 0xd8): /* vpsubusb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
4306
16
    case X86EMUL_OPC_EVEX_66(0x0f, 0xd9): /* vpsubusw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
4307
17
    case X86EMUL_OPC_EVEX_66(0x0f, 0xdc): /* vpaddusb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
4308
18
    case X86EMUL_OPC_EVEX_66(0x0f, 0xdd): /* vpaddusw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
4309
19
    case X86EMUL_OPC_EVEX_66(0x0f, 0xe0): /* vpavgb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
4310
20
    case X86EMUL_OPC_EVEX_66(0x0f, 0xe3): /* vpavgw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
4311
21
    case X86EMUL_OPC_EVEX_66(0x0f, 0xe5): /* vpmulhw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
4312
22
    case X86EMUL_OPC_EVEX_66(0x0f, 0xe8): /* vpsubsb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
4313
23
    case X86EMUL_OPC_EVEX_66(0x0f, 0xe9): /* vpsubsw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
4314
24
    case X86EMUL_OPC_EVEX_66(0x0f, 0xec): /* vpaddsb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
4315
25
    case X86EMUL_OPC_EVEX_66(0x0f, 0xed): /* vpaddsw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
4316
26
    case X86EMUL_OPC_EVEX_66(0x0f, 0xf8): /* vpsubb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
4317
27
    case X86EMUL_OPC_EVEX_66(0x0f, 0xf9): /* vpsubw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
4318
28
    case X86EMUL_OPC_EVEX_66(0x0f, 0xfc): /* vpaddb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
4319
29
    case X86EMUL_OPC_EVEX_66(0x0f, 0xfd): /* vpaddw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
4320
30
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x0b): /* vpmulhrsw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
4321
31
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x1c): /* vpabsb [xyz]mm/mem,[xyz]mm{k} */
4322
32
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x1d): /* vpabsw [xyz]mm/mem,[xyz]mm{k} */
4323
32
        host_and_vcpu_must_have(avx512bw);
4324
0
        generate_exception_if(evex.brs, X86_EXC_UD);
4325
0
        elem_bytes = 1 << (b & 1);
4326
0
        goto avx512f_no_sae;
4327
4328
1
    case X86EMUL_OPC_EVEX_66(0x0f, 0x62): /* vpunpckldq [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
4329
3
    case X86EMUL_OPC_EVEX_66(0x0f, 0x6a): /* vpunpckhdq [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
4330
3
        generate_exception_if(evex.w, X86_EXC_UD);
4331
2
        fault_suppression = false;
4332
2
        op_bytes = 16 << evex.lr;
4333
2
        goto avx512f_no_sae;
4334
4335
1
    case X86EMUL_OPC_EVEX_F3(0x0f38, 0x26): /* vptestnm{b,w} [xyz]mm/mem,[xyz]mm,k{k} */
4336
2
    case X86EMUL_OPC_EVEX_F3(0x0f38, 0x27): /* vptestnm{d,q} [xyz]mm/mem,[xyz]mm,k{k} */
4337
2
        op_bytes = 16 << evex.lr;
4338
        /* fall through */
4339
3
    case X86EMUL_OPC_EVEX_66(0x0f,   0x64): /* vpcmpeqb [xyz]mm/mem,[xyz]mm,k{k} */
4340
4
    case X86EMUL_OPC_EVEX_66(0x0f,   0x65): /* vpcmpeqw [xyz]mm/mem,[xyz]mm,k{k} */
4341
6
    case X86EMUL_OPC_EVEX_66(0x0f,   0x66): /* vpcmpeqd [xyz]mm/mem,[xyz]mm,k{k} */
4342
7
    case X86EMUL_OPC_EVEX_66(0x0f,   0x74): /* vpcmpgtb [xyz]mm/mem,[xyz]mm,k{k} */
4343
8
    case X86EMUL_OPC_EVEX_66(0x0f,   0x75): /* vpcmpgtw [xyz]mm/mem,[xyz]mm,k{k} */
4344
10
    case X86EMUL_OPC_EVEX_66(0x0f,   0x76): /* vpcmpgtd [xyz]mm/mem,[xyz]mm,k{k} */
4345
11
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x26): /* vptestm{b,w} [xyz]mm/mem,[xyz]mm,k{k} */
4346
13
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x27): /* vptestm{d,q} [xyz]mm/mem,[xyz]mm,k{k} */
4347
14
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x29): /* vpcmpeqq [xyz]mm/mem,[xyz]mm,k{k} */
4348
15
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x37): /* vpcmpgtq [xyz]mm/mem,[xyz]mm,k{k} */
4349
15
        generate_exception_if(!evex.r || !evex.R || evex.z, X86_EXC_UD);
4350
6
        if ( b & (ext == ext_0f38 ? 1 : 2) )
4351
4
        {
4352
4
            generate_exception_if(b != 0x27 && evex.w != (b & 1), X86_EXC_UD);
4353
2
            goto avx512f_no_sae;
4354
4
        }
4355
2
        host_and_vcpu_must_have(avx512bw);
4356
0
        generate_exception_if(evex.brs, X86_EXC_UD);
4357
0
        elem_bytes = 1 << (ext == ext_0f ? b & 1 : evex.w);
4358
0
        avx512_vlen_check(false);
4359
0
        goto simd_zmm;
4360
4361
1
    case X86EMUL_OPC_EVEX_66(0x0f, 0x6b): /* vpackssdw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
4362
4
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x2b): /* vpackusdw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
4363
4
        generate_exception_if(evex.w || evex.brs, X86_EXC_UD);
4364
1
        fault_suppression = false;
4365
1
        goto avx512f_no_sae;
4366
4367
1
    case X86EMUL_OPC_EVEX_66(0x0f, 0x6c): /* vpunpcklqdq [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
4368
2
    case X86EMUL_OPC_EVEX_66(0x0f, 0x6d): /* vpunpckhqdq [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
4369
2
        fault_suppression = false;
4370
        /* fall through */
4371
3
    case X86EMUL_OPC_EVEX_66(0x0f, 0xd4): /* vpaddq [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
4372
4
    case X86EMUL_OPC_EVEX_66(0x0f, 0xf4): /* vpmuludq [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
4373
5
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x28): /* vpmuldq [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
4374
5
        generate_exception_if(!evex.w, X86_EXC_UD);
4375
2
        goto avx512f_no_sae;
4376
4377
2
#endif /* X86EMUL_NO_SIMD */
4378
4379
1.26k
    CASE_SIMD_PACKED_INT_VEX(0x0f, 0x6e): /* mov{d,q} r/m,{,x}mm */
4380
                                          /* vmov{d,q} r/m,xmm */
4381
3.47k
    CASE_SIMD_PACKED_INT_VEX(0x0f, 0x7e): /* mov{d,q} {,x}mm,r/m */
4382
                                          /* vmov{d,q} xmm,r/m */
4383
3.47k
        if ( vex.opcx != vex_none )
4384
323
        {
4385
323
            generate_exception_if(vex.l || vex.reg != 0xf, X86_EXC_UD);
4386
317
            host_and_vcpu_must_have(avx);
4387
317
            get_fpu(X86EMUL_FPU_ymm);
4388
317
        }
4389
1.02k
        else if ( vex.pfx )
4390
546
        {
4391
546
            vcpu_must_have(sse2);
4392
546
            get_fpu(X86EMUL_FPU_xmm);
4393
546
        }
4394
477
        else
4395
477
        {
4396
477
            host_and_vcpu_must_have(mmx);
4397
477
            get_fpu(X86EMUL_FPU_mmx);
4398
477
        }
4399
4400
2.22k
    simd_0f_rm:
4401
2.22k
        opc = init_prefixes(stub);
4402
0
        opc[0] = b;
4403
        /* Convert memory/GPR operand to (%rAX). */
4404
2.22k
        rex_prefix &= ~REX_B;
4405
2.22k
        vex.b = 1;
4406
2.22k
        if ( !mode_64bit() )
4407
969
            vex.w = 0;
4408
2.22k
        opc[1] = modrm & 0x38;
4409
2.22k
        insn_bytes = PFX_BYTES + 2;
4410
2.22k
        opc[2] = 0xc3;
4411
4412
2.22k
        copy_REX_VEX(opc, rex_prefix, vex);
4413
2.22k
        invoke_stub("", "", "+m" (src.val) : "a" (&src.val));
4414
2.22k
        dst.val = src.val;
4415
4416
2.22k
        put_stub(stub);
4417
2.22k
        ASSERT(!state->simd_size);
4418
2.22k
        break;
4419
4420
2.22k
#ifndef X86EMUL_NO_SIMD
4421
4422
2.22k
    case X86EMUL_OPC_EVEX_66(5, 0x7e): /* vmovw xmm,r/m16 */
4423
2
        ASSERT(dst.bytes >= 4);
4424
2
        if ( dst.type == OP_MEM )
4425
1
            dst.bytes = 2;
4426
        /* fall through */
4427
3
    case X86EMUL_OPC_EVEX_66(5, 0x6e): /* vmovw r/m16,xmm */
4428
3
        host_and_vcpu_must_have(avx512_fp16);
4429
0
        generate_exception_if(evex.w, X86_EXC_UD);
4430
        /* fall through */
4431
1
    case X86EMUL_OPC_EVEX_66(0x0f, 0x6e): /* vmov{d,q} r/m,xmm */
4432
7
    case X86EMUL_OPC_EVEX_66(0x0f, 0x7e): /* vmov{d,q} xmm,r/m */
4433
7
        generate_exception_if((evex.lr || evex.opmsk || evex.brs ||
4434
7
                               evex.reg != 0xf || !evex.RX),
4435
7
                              X86_EXC_UD);
4436
1
        host_and_vcpu_must_have(avx512f);
4437
0
        get_fpu(X86EMUL_FPU_zmm);
4438
4439
0
        opc = init_evex(stub);
4440
0
        opc[0] = b;
4441
        /* Convert memory/GPR operand to (%rAX). */
4442
0
        evex.b = 1;
4443
0
        if ( !mode_64bit() )
4444
0
            evex.w = 0;
4445
0
        opc[1] = modrm & 0x38;
4446
0
        insn_bytes = EVEX_PFX_BYTES + 2;
4447
0
        opc[2] = 0xc3;
4448
4449
0
        copy_EVEX(opc, evex);
4450
0
        invoke_stub("", "", "+m" (src.val) : "a" (&src.val));
4451
0
        dst.val = src.val;
4452
4453
0
        put_stub(stub);
4454
0
        ASSERT(!state->simd_size);
4455
0
        break;
4456
4457
402
    case X86EMUL_OPC_66(0x0f, 0xe7):     /* movntdq xmm,m128 */
4458
608
    case X86EMUL_OPC_VEX_66(0x0f, 0xe7): /* vmovntdq {x,y}mm,mem */
4459
608
        generate_exception_if(ea.type != OP_MEM, X86_EXC_UD);
4460
608
        sfence = true;
4461
        /* fall through */
4462
674
    case X86EMUL_OPC_66(0x0f, 0x6f):     /* movdqa xmm/m128,xmm */
4463
769
    case X86EMUL_OPC_VEX_66(0x0f, 0x6f): /* vmovdqa {x,y}mm/mem,{x,y}mm */
4464
835
    case X86EMUL_OPC_F3(0x0f, 0x6f):     /* movdqu xmm/m128,xmm */
4465
906
    case X86EMUL_OPC_VEX_F3(0x0f, 0x6f): /* vmovdqu {x,y}mm/mem,{x,y}mm */
4466
1.23k
    case X86EMUL_OPC_66(0x0f, 0x7f):     /* movdqa xmm,xmm/m128 */
4467
2.32k
    case X86EMUL_OPC_VEX_66(0x0f, 0x7f): /* vmovdqa {x,y}mm,{x,y}mm/mem */
4468
2.52k
    case X86EMUL_OPC_F3(0x0f, 0x7f):     /* movdqu xmm,xmm/m128 */
4469
2.62k
    case X86EMUL_OPC_VEX_F3(0x0f, 0x7f): /* vmovdqu {x,y}mm,{x,y}mm/mem */
4470
2.78k
    movdqa:
4471
2.78k
        d |= TwoOp;
4472
2.78k
        op_bytes = 16 << vex.l;
4473
2.78k
        if ( vex.opcx != vex_none )
4474
1.69k
            goto simd_0f_avx;
4475
1.09k
        goto simd_0f_sse2;
4476
4477
1.09k
    case X86EMUL_OPC_EVEX_66(0x0f, 0xe7): /* vmovntdq [xyz]mm,mem */
4478
4
        generate_exception_if(ea.type != OP_MEM || evex.opmsk || evex.w,
4479
4
                              X86_EXC_UD);
4480
1
        sfence = true;
4481
        /* fall through */
4482
2
    case X86EMUL_OPC_EVEX_66(0x0f, 0x6f): /* vmovdqa{32,64} [xyz]mm/mem,[xyz]mm{k} */
4483
3
    case X86EMUL_OPC_EVEX_F3(0x0f, 0x6f): /* vmovdqu{32,64} [xyz]mm/mem,[xyz]mm{k} */
4484
4
    case X86EMUL_OPC_EVEX_66(0x0f, 0x7f): /* vmovdqa{32,64} [xyz]mm,[xyz]mm/mem{k} */
4485
5
    case X86EMUL_OPC_EVEX_F3(0x0f, 0x7f): /* vmovdqu{32,64} [xyz]mm,[xyz]mm/mem{k} */
4486
6
    vmovdqa:
4487
6
        generate_exception_if(evex.brs, X86_EXC_UD);
4488
5
        d |= TwoOp;
4489
5
        op_bytes = 16 << evex.lr;
4490
5
        goto avx512f_no_sae;
4491
4492
1
    case X86EMUL_OPC_EVEX_F2(0x0f, 0x6f): /* vmovdqu{8,16} [xyz]mm/mem,[xyz]mm{k} */
4493
2
    case X86EMUL_OPC_EVEX_F2(0x0f, 0x7f): /* vmovdqu{8,16} [xyz]mm,[xyz]mm/mem{k} */
4494
2
        host_and_vcpu_must_have(avx512bw);
4495
0
        elem_bytes = 1 << evex.w;
4496
0
        goto vmovdqa;
4497
4498
198
    case X86EMUL_OPC_VEX_66(0x0f, 0xd6): /* vmovq xmm,xmm/m64 */
4499
198
        generate_exception_if(vex.l, X86_EXC_UD);
4500
197
        d |= TwoOp;
4501
        /* fall through */
4502
320
    case X86EMUL_OPC_66(0x0f, 0xd6):     /* movq xmm,xmm/m64 */
4503
320
#endif /* !X86EMUL_NO_SIMD */
4504
320
#ifndef X86EMUL_NO_MMX
4505
392
    case X86EMUL_OPC(0x0f, 0x6f):        /* movq mm/m64,mm */
4506
616
    case X86EMUL_OPC(0x0f, 0x7f):        /* movq mm,mm/m64 */
4507
616
#endif
4508
616
        op_bytes = 8;
4509
616
        goto simd_0f_int;
4510
4511
0
#ifndef X86EMUL_NO_SIMD
4512
1.26k
    CASE_SIMD_PACKED_INT_VEX(0x0f, 0x70):/* pshuf{w,d} $imm8,{,x}mm/mem,{,x}mm */
4513
                                         /* vpshufd $imm8,{x,y}mm/mem,{x,y}mm */
4514
1.26k
    case X86EMUL_OPC_F3(0x0f, 0x70):     /* pshufhw $imm8,xmm/m128,xmm */
4515
940
    case X86EMUL_OPC_VEX_F3(0x0f, 0x70): /* vpshufhw $imm8,{x,y}mm/mem,{x,y}mm */
4516
1.18k
    case X86EMUL_OPC_F2(0x0f, 0x70):     /* pshuflw $imm8,xmm/m128,xmm */
4517
1.21k
    case X86EMUL_OPC_VEX_F2(0x0f, 0x70): /* vpshuflw $imm8,{x,y}mm/mem,{x,y}mm */
4518
1.21k
        d = (d & ~SrcMask) | SrcMem | TwoOp;
4519
1.21k
        op_bytes = vex.pfx ? 16 << vex.l : 8;
4520
1.21k
#endif
4521
2.54k
    simd_0f_int_imm8:
4522
2.54k
        if ( vex.opcx != vex_none )
4523
1.23k
        {
4524
1.23k
#ifndef X86EMUL_NO_SIMD
4525
1.29k
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x0e): /* vpblendw $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
4526
1.36k
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x0f): /* vpalignr $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
4527
1.42k
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x42): /* vmpsadbw $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
4528
1.42k
#endif
4529
1.42k
            if ( vex.l )
4530
233
            {
4531
1.15k
    simd_0f_imm8_avx2:
4532
1.15k
                host_and_vcpu_must_have(avx2);
4533
1.15k
            }
4534
1.19k
            else
4535
1.19k
            {
4536
1.19k
#ifndef X86EMUL_NO_SIMD
4537
1.25k
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x08): /* vroundps $imm8,{x,y}mm/mem,{x,y}mm */
4538
1.32k
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x09): /* vroundpd $imm8,{x,y}mm/mem,{x,y}mm */
4539
1.51k
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x0a): /* vroundss $imm8,xmm/mem,xmm,xmm */
4540
1.55k
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x0b): /* vroundsd $imm8,xmm/mem,xmm,xmm */
4541
1.59k
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x0c): /* vblendps $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
4542
1.66k
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x0d): /* vblendpd $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
4543
1.73k
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x40): /* vdpps $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
4544
1.73k
#endif
4545
3.34k
    simd_0f_imm8_avx:
4546
3.34k
                host_and_vcpu_must_have(avx);
4547
3.34k
            }
4548
4.49k
    simd_0f_imm8_ymm:
4549
4.49k
            get_fpu(X86EMUL_FPU_ymm);
4550
4.49k
        }
4551
1.31k
        else if ( vex.pfx )
4552
760
        {
4553
1.21k
    simd_0f_imm8_sse2:
4554
1.21k
            vcpu_must_have(sse2);
4555
1.21k
            get_fpu(X86EMUL_FPU_xmm);
4556
1.21k
        }
4557
551
        else
4558
551
        {
4559
551
            host_and_vcpu_must_have(mmx);
4560
551
            vcpu_must_have(mmxext);
4561
551
            get_fpu(X86EMUL_FPU_mmx);
4562
551
        }
4563
6.63k
    simd_0f_imm8:
4564
6.63k
        opc = init_prefixes(stub);
4565
0
        opc[0] = b;
4566
6.63k
        opc[1] = modrm;
4567
6.63k
        if ( ea.type == OP_MEM )
4568
4.17k
        {
4569
            /* Convert memory operand to (%rAX). */
4570
4.17k
            rex_prefix &= ~REX_B;
4571
4.17k
            vex.b = 1;
4572
4.17k
            opc[1] &= 0x38;
4573
4.17k
        }
4574
6.63k
        opc[2] = imm1;
4575
6.63k
        insn_bytes = PFX_BYTES + 3;
4576
6.63k
        break;
4577
4578
0
#ifndef X86EMUL_NO_SIMD
4579
4580
2
    case X86EMUL_OPC_EVEX_66(0x0f, 0x70): /* vpshufd $imm8,[xyz]mm/mem,[xyz]mm{k} */
4581
3
    case X86EMUL_OPC_EVEX_F3(0x0f, 0x70): /* vpshufhw $imm8,[xyz]mm/mem,[xyz]mm{k} */
4582
4
    case X86EMUL_OPC_EVEX_F2(0x0f, 0x70): /* vpshuflw $imm8,[xyz]mm/mem,[xyz]mm{k} */
4583
4
        if ( evex.pfx == vex_66 )
4584
2
            generate_exception_if(evex.w, X86_EXC_UD);
4585
2
        else
4586
2
        {
4587
2
            host_and_vcpu_must_have(avx512bw);
4588
0
            generate_exception_if(evex.brs, X86_EXC_UD);
4589
0
        }
4590
1
        d = (d & ~SrcMask) | SrcMem | TwoOp;
4591
1
        op_bytes = 16 << evex.lr;
4592
1
        fault_suppression = false;
4593
1
        goto avx512f_imm8_no_sae;
4594
4595
1.00k
    CASE_SIMD_PACKED_INT(0x0f, 0x71):    /* Grp12 */
4596
1.01k
    case X86EMUL_OPC_VEX_66(0x0f, 0x71):
4597
2.61k
    CASE_SIMD_PACKED_INT(0x0f, 0x72):    /* Grp13 */
4598
2.61k
    case X86EMUL_OPC_VEX_66(0x0f, 0x72):
4599
1.44k
        switch ( modrm_reg & 7 )
4600
1.44k
        {
4601
315
        case 2: /* psrl{w,d} $imm8,{,x}mm */
4602
                /* vpsrl{w,d} $imm8,{x,y}mm,{x,y}mm */
4603
598
        case 4: /* psra{w,d} $imm8,{,x}mm */
4604
                /* vpsra{w,d} $imm8,{x,y}mm,{x,y}mm */
4605
1.43k
        case 6: /* psll{w,d} $imm8,{,x}mm */
4606
                /* vpsll{w,d} $imm8,{x,y}mm,{x,y}mm */
4607
1.43k
            break;
4608
5
        default:
4609
5
            goto unrecognized_insn;
4610
1.44k
        }
4611
2.69k
    simd_0f_shift_imm:
4612
2.69k
        generate_exception_if(ea.type != OP_REG, X86_EXC_UD);
4613
4614
2.68k
        if ( vex.opcx != vex_none )
4615
625
        {
4616
625
            if ( vex.l )
4617
625
                host_and_vcpu_must_have(avx2);
4618
466
            else
4619
625
                host_and_vcpu_must_have(avx);
4620
625
            get_fpu(X86EMUL_FPU_ymm);
4621
625
        }
4622
2.06k
        else if ( vex.pfx )
4623
1.14k
        {
4624
1.14k
            vcpu_must_have(sse2);
4625
1.14k
            get_fpu(X86EMUL_FPU_xmm);
4626
1.14k
        }
4627
920
        else
4628
920
        {
4629
920
            host_and_vcpu_must_have(mmx);
4630
920
            get_fpu(X86EMUL_FPU_mmx);
4631
920
        }
4632
4633
2.67k
        opc = init_prefixes(stub);
4634
0
        opc[0] = b;
4635
2.67k
        opc[1] = modrm;
4636
2.67k
        opc[2] = imm1;
4637
2.67k
        insn_bytes = PFX_BYTES + 3;
4638
4639
2.67k
#endif /* X86EMUL_NO_SIMD */
4640
4641
3.08k
    simd_0f_reg_only:
4642
3.08k
        opc[insn_bytes - PFX_BYTES] = 0xc3;
4643
4644
3.08k
        copy_REX_VEX(opc, rex_prefix, vex);
4645
3.08k
        invoke_stub("", "", [dummy_out] "=g" (dummy) : [dummy_in] "i" (0) );
4646
4647
3.08k
        put_stub(stub);
4648
3.08k
        ASSERT(!state->simd_size);
4649
3.08k
        break;
4650
4651
3.08k
#ifndef X86EMUL_NO_SIMD
4652
4653
3.08k
    case X86EMUL_OPC_EVEX_66(0x0f, 0x71): /* Grp12 */
4654
4
        switch ( modrm_reg & 7 )
4655
4
        {
4656
1
        case 2: /* vpsrlw $imm8,[xyz]mm/mem,[xyz]mm{k} */
4657
2
        case 4: /* vpsraw $imm8,[xyz]mm/mem,[xyz]mm{k} */
4658
3
        case 6: /* vpsllw $imm8,[xyz]mm/mem,[xyz]mm{k} */
4659
4
        avx512bw_shift_imm:
4660
4
            fault_suppression = false;
4661
4
            op_bytes = 16 << evex.lr;
4662
4
            state->simd_size = simd_packed_int;
4663
4
            goto avx512bw_imm;
4664
4
        }
4665
1
        goto unrecognized_insn;
4666
4667
6
    case X86EMUL_OPC_EVEX_66(0x0f, 0x72): /* Grp13 */
4668
6
        switch ( modrm_reg & 7 )
4669
6
        {
4670
1
        case 2: /* vpsrld $imm8,[xyz]mm/mem,[xyz]mm{k} */
4671
2
        case 6: /* vpslld $imm8,[xyz]mm/mem,[xyz]mm{k} */
4672
2
            generate_exception_if(evex.w, X86_EXC_UD);
4673
            /* fall through */
4674
2
        case 0: /* vpror{d,q} $imm8,[xyz]mm/mem,[xyz]mm{k} */
4675
3
        case 1: /* vprol{d,q} $imm8,[xyz]mm/mem,[xyz]mm{k} */
4676
4
        case 4: /* vpsra{d,q} $imm8,[xyz]mm/mem,[xyz]mm{k} */
4677
6
        avx512f_shift_imm:
4678
6
            op_bytes = 16 << evex.lr;
4679
6
            state->simd_size = simd_packed_int;
4680
6
            goto avx512f_imm8_no_sae;
4681
6
        }
4682
1
        goto unrecognized_insn;
4683
4684
1
#endif /* !X86EMUL_NO_SIMD */
4685
1
#ifndef X86EMUL_NO_MMX
4686
4687
423
    case X86EMUL_OPC(0x0f, 0x73):        /* Grp14 */
4688
423
        switch ( modrm_reg & 7 )
4689
423
        {
4690
303
        case 2: /* psrlq $imm8,mm */
4691
422
        case 6: /* psllq $imm8,mm */
4692
422
            goto simd_0f_shift_imm;
4693
423
        }
4694
1
        goto unrecognized_insn;
4695
4696
1
#endif /* !X86EMUL_NO_MMX */
4697
1
#ifndef X86EMUL_NO_SIMD
4698
4699
550
    case X86EMUL_OPC_66(0x0f, 0x73):
4700
835
    case X86EMUL_OPC_VEX_66(0x0f, 0x73):
4701
835
        switch ( modrm_reg & 7 )
4702
835
        {
4703
78
        case 2: /* psrlq $imm8,xmm */
4704
                /* vpsrlq $imm8,{x,y}mm,{x,y}mm */
4705
400
        case 3: /* psrldq $imm8,xmm */
4706
                /* vpsrldq $imm8,{x,y}mm,{x,y}mm */
4707
603
        case 6: /* psllq $imm8,xmm */
4708
                /* vpsllq $imm8,{x,y}mm,{x,y}mm */
4709
834
        case 7: /* pslldq $imm8,xmm */
4710
                /* vpslldq $imm8,{x,y}mm,{x,y}mm */
4711
834
            goto simd_0f_shift_imm;
4712
835
        }
4713
1
        goto unrecognized_insn;
4714
4715
6
    case X86EMUL_OPC_EVEX_66(0x0f, 0x73): /* Grp14 */
4716
6
        switch ( modrm_reg & 7 )
4717
6
        {
4718
1
        case 2: /* vpsrlq $imm8,[xyz]mm/mem,[xyz]mm{k} */
4719
3
        case 6: /* vpsllq $imm8,[xyz]mm/mem,[xyz]mm{k} */
4720
3
            generate_exception_if(!evex.w, X86_EXC_UD);
4721
2
            goto avx512f_shift_imm;
4722
2
        case 3: /* vpsrldq $imm8,[xyz]mm/mem,[xyz]mm */
4723
2
        case 7: /* vpslldq $imm8,[xyz]mm/mem,[xyz]mm */
4724
2
            generate_exception_if(evex.opmsk, X86_EXC_UD);
4725
1
            goto avx512bw_shift_imm;
4726
6
        }
4727
1
        goto unrecognized_insn;
4728
4729
1
#endif /* !X86EMUL_NO_SIMD */
4730
4731
1
#ifndef X86EMUL_NO_MMX
4732
207
    case X86EMUL_OPC(0x0f, 0x77):        /* emms */
4733
207
#endif
4734
207
#ifndef X86EMUL_NO_SIMD
4735
690
    case X86EMUL_OPC_VEX(0x0f, 0x77):    /* vzero{all,upper} */
4736
690
        if ( vex.opcx != vex_none )
4737
483
        {
4738
483
            generate_exception_if(vex.reg != 0xf, X86_EXC_UD);
4739
482
            host_and_vcpu_must_have(avx);
4740
482
            get_fpu(X86EMUL_FPU_ymm);
4741
4742
481
#ifdef __x86_64__
4743
481
            if ( !mode_64bit() )
4744
285
            {
4745
                /*
4746
                 * Can't use the actual instructions here, as we must not
4747
                 * touch YMM8...YMM15.
4748
                 */
4749
285
                if ( vex.l )
4750
76
                {
4751
                    /* vpxor %xmmN, %xmmN, %xmmN */
4752
76
                    asm volatile ( ".byte 0xc5,0xf9,0xef,0xc0" );
4753
76
                    asm volatile ( ".byte 0xc5,0xf1,0xef,0xc9" );
4754
76
                    asm volatile ( ".byte 0xc5,0xe9,0xef,0xd2" );
4755
76
                    asm volatile ( ".byte 0xc5,0xe1,0xef,0xdb" );
4756
76
                    asm volatile ( ".byte 0xc5,0xd9,0xef,0xe4" );
4757
76
                    asm volatile ( ".byte 0xc5,0xd1,0xef,0xed" );
4758
76
                    asm volatile ( ".byte 0xc5,0xc9,0xef,0xf6" );
4759
76
                    asm volatile ( ".byte 0xc5,0xc1,0xef,0xff" );
4760
76
                }
4761
209
                else
4762
209
                {
4763
                    /* vpor %xmmN, %xmmN, %xmmN */
4764
209
                    asm volatile ( ".byte 0xc5,0xf9,0xeb,0xc0" );
4765
209
                    asm volatile ( ".byte 0xc5,0xf1,0xeb,0xc9" );
4766
209
                    asm volatile ( ".byte 0xc5,0xe9,0xeb,0xd2" );
4767
209
                    asm volatile ( ".byte 0xc5,0xe1,0xeb,0xdb" );
4768
209
                    asm volatile ( ".byte 0xc5,0xd9,0xeb,0xe4" );
4769
209
                    asm volatile ( ".byte 0xc5,0xd1,0xeb,0xed" );
4770
209
                    asm volatile ( ".byte 0xc5,0xc9,0xeb,0xf6" );
4771
209
                    asm volatile ( ".byte 0xc5,0xc1,0xeb,0xff" );
4772
209
                }
4773
4774
285
                ASSERT(!state->simd_size);
4775
285
                break;
4776
285
            }
4777
481
#endif
4778
481
        }
4779
207
        else
4780
207
#endif /* !X86EMUL_NO_SIMD */
4781
207
        {
4782
207
            host_and_vcpu_must_have(mmx);
4783
207
            get_fpu(X86EMUL_FPU_mmx);
4784
207
        }
4785
4786
        /* Work around erratum BT36. */
4787
402
        vex.w = 0;
4788
4789
402
        opc = init_prefixes(stub);
4790
0
        opc[0] = b;
4791
402
        insn_bytes = PFX_BYTES + 1;
4792
402
        goto simd_0f_reg_only;
4793
4794
0
#ifndef X86EMUL_NO_SIMD
4795
4796
2
    case X86EMUL_OPC_66(0x0f, 0x78):     /* Grp17 */
4797
2
        switch ( modrm_reg & 7 )
4798
2
        {
4799
1
        case 0: /* extrq $imm8,$imm8,xmm */
4800
1
            break;
4801
1
        default:
4802
1
            goto unrecognized_insn;
4803
2
        }
4804
        /* fall through */
4805
2
    case X86EMUL_OPC_F2(0x0f, 0x78):     /* insertq $imm8,$imm8,xmm,xmm */
4806
2
        generate_exception_if(ea.type != OP_REG, X86_EXC_UD);
4807
4808
1
        host_and_vcpu_must_have(sse4a);
4809
1
        get_fpu(X86EMUL_FPU_xmm);
4810
4811
1
        opc = init_prefixes(stub);
4812
0
        opc[0] = b;
4813
1
        opc[1] = modrm;
4814
1
        opc[2] = imm1;
4815
1
        opc[3] = imm2;
4816
1
        insn_bytes = PFX_BYTES + 4;
4817
1
        goto simd_0f_reg_only;
4818
4819
2
    case X86EMUL_OPC_66(0x0f, 0x79):     /* extrq xmm,xmm */
4820
3
    case X86EMUL_OPC_F2(0x0f, 0x79):     /* insertq xmm,xmm */
4821
3
        generate_exception_if(ea.type != OP_REG, X86_EXC_UD);
4822
1
        host_and_vcpu_must_have(sse4a);
4823
1
        op_bytes = 8;
4824
1
        goto simd_0f_xmm;
4825
4826
2
    case X86EMUL_OPC_EVEX_66(0x0f, 0xe6):   /* vcvttpd2dq [xyz]mm/mem,{x,y}mm{k} */
4827
3
    case X86EMUL_OPC_EVEX_F2(0x0f, 0xe6):   /* vcvtpd2dq [xyz]mm/mem,{x,y}mm{k} */
4828
3
        generate_exception_if(!evex.w, X86_EXC_UD);
4829
        /* fall through */
4830
4
    case X86EMUL_OPC_EVEX_F3(0x0f, 0x7a):   /* vcvtudq2pd {x,y}mm/mem,[xyz]mm{k} */
4831
                                            /* vcvtuqq2pd [xyz]mm/mem,[xyz]mm{k} */
4832
7
    case X86EMUL_OPC_EVEX_F3(0x0f, 0xe6):   /* vcvtdq2pd {x,y}mm/mem,[xyz]mm{k} */
4833
                                            /* vcvtqq2pd [xyz]mm/mem,[xyz]mm{k} */
4834
7
        if ( evex.pfx != vex_f3 )
4835
7
            host_and_vcpu_must_have(avx512f);
4836
6
        else if ( evex.w )
4837
2
        {
4838
3
    case X86EMUL_OPC_EVEX_66(0x0f, 0x78):   /* vcvttps2uqq {x,y}mm/mem,[xyz]mm{k} */
4839
                                            /* vcvttpd2uqq [xyz]mm/mem,[xyz]mm{k} */
4840
4
    case X86EMUL_OPC_EVEX_66(0x0f, 0x79):   /* vcvtps2uqq {x,y}mm/mem,[xyz]mm{k} */
4841
                                            /* vcvtpd2uqq [xyz]mm/mem,[xyz]mm{k} */
4842
5
    case X86EMUL_OPC_EVEX_66(0x0f, 0x7a):   /* vcvttps2qq {x,y}mm/mem,[xyz]mm{k} */
4843
                                            /* vcvttpd2qq [xyz]mm/mem,[xyz]mm{k} */
4844
7
    case X86EMUL_OPC_EVEX_66(0x0f, 0x7b):   /* vcvtps2qq {x,y}mm/mem,[xyz]mm{k} */
4845
                                            /* vcvtpd2qq [xyz]mm/mem,[xyz]mm{k} */
4846
7
            host_and_vcpu_must_have(avx512dq);
4847
7
        }
4848
4
        else
4849
4
        {
4850
4
            host_and_vcpu_must_have(avx512f);
4851
            /*
4852
             * While SDM version 085 has explicit wording towards embedded
4853
             * rounding being ignored, it's still not entirely unambiguous with
4854
             * the exception type referred to. Be on the safe side for the stub.
4855
             */
4856
0
            if ( ea.type != OP_MEM && evex.brs )
4857
0
            {
4858
0
                evex.brs = 0;
4859
0
                evex.lr = 2;
4860
0
            }
4861
0
        }
4862
0
        if ( ea.type != OP_REG || !evex.brs )
4863
0
            avx512_vlen_check(false);
4864
0
        d |= TwoOp;
4865
0
        op_bytes = 8 << (evex.w + evex.lr);
4866
0
        goto simd_zmm;
4867
4868
64
    case X86EMUL_OPC_F2(0x0f, 0xf0):     /* lddqu m128,xmm */
4869
122
    case X86EMUL_OPC_VEX_F2(0x0f, 0xf0): /* vlddqu mem,{x,y}mm */
4870
122
        generate_exception_if(ea.type != OP_MEM, X86_EXC_UD);
4871
        /* fall through */
4872
188
    case X86EMUL_OPC_66(0x0f, 0x7c):     /* haddpd xmm/m128,xmm */
4873
254
    case X86EMUL_OPC_F2(0x0f, 0x7c):     /* haddps xmm/m128,xmm */
4874
342
    case X86EMUL_OPC_VEX_66(0x0f, 0x7c): /* vhaddpd {x,y}mm/mem,{x,y}mm,{x,y}mm */
4875
536
    case X86EMUL_OPC_VEX_F2(0x0f, 0x7c): /* vhaddps {x,y}mm/mem,{x,y}mm,{x,y}mm */
4876
698
    case X86EMUL_OPC_66(0x0f, 0x7d):     /* hsubpd xmm/m128,xmm */
4877
764
    case X86EMUL_OPC_F2(0x0f, 0x7d):     /* hsubps xmm/m128,xmm */
4878
1.08k
    case X86EMUL_OPC_VEX_66(0x0f, 0x7d): /* vhsubpd {x,y}mm/mem,{x,y}mm,{x,y}mm */
4879
1.18k
    case X86EMUL_OPC_VEX_F2(0x0f, 0x7d): /* vhsubps {x,y}mm/mem,{x,y}mm,{x,y}mm */
4880
1.37k
    case X86EMUL_OPC_66(0x0f, 0xd0):     /* addsubpd xmm/m128,xmm */
4881
1.57k
    case X86EMUL_OPC_F2(0x0f, 0xd0):     /* addsubps xmm/m128,xmm */
4882
1.76k
    case X86EMUL_OPC_VEX_66(0x0f, 0xd0): /* vaddsubpd {x,y}mm/mem,{x,y}mm,{x,y}mm */
4883
1.83k
    case X86EMUL_OPC_VEX_F2(0x0f, 0xd0): /* vaddsubps {x,y}mm/mem,{x,y}mm,{x,y}mm */
4884
1.83k
        op_bytes = 16 << vex.l;
4885
1.83k
        goto simd_0f_sse3_avx;
4886
4887
66
    case X86EMUL_OPC_F3(0x0f, 0x7e):     /* movq xmm/m64,xmm */
4888
132
    case X86EMUL_OPC_VEX_F3(0x0f, 0x7e): /* vmovq xmm/m64,xmm */
4889
132
        generate_exception_if(vex.l, X86_EXC_UD);
4890
131
        op_bytes = 8;
4891
131
        goto simd_0f_int;
4892
4893
1
    case X86EMUL_OPC_EVEX_F3(0x0f, 0x7e): /* vmovq xmm/m64,xmm */
4894
5
    case X86EMUL_OPC_EVEX_66(0x0f, 0xd6): /* vmovq xmm,xmm/m64 */
4895
5
        generate_exception_if(evex.lr || !evex.w || evex.opmsk || evex.brs,
4896
5
                              X86_EXC_UD);
4897
1
        host_and_vcpu_must_have(avx512f);
4898
0
        d |= TwoOp;
4899
0
        op_bytes = 8;
4900
0
        goto simd_zmm;
4901
4902
0
#endif /* !X86EMUL_NO_SIMD */
4903
4904
2.21k
    case X86EMUL_OPC(0x0f, 0x80) ... X86EMUL_OPC(0x0f, 0x8f): /* jcc (near) */
4905
2.21k
        if ( test_cc(b, _regs.eflags) )
4906
1.35k
            jmp_rel((int32_t)src.val);
4907
2.19k
        adjust_bnd(ctxt, ops, vex.pfx);
4908
2.19k
        break;
4909
4910
2.43k
    case X86EMUL_OPC(0x0f, 0x90) ... X86EMUL_OPC(0x0f, 0x9f): /* setcc */
4911
2.43k
        dst.val = test_cc(b, _regs.eflags);
4912
2.43k
        break;
4913
4914
0
#ifndef X86EMUL_NO_SIMD
4915
4916
1
    case X86EMUL_OPC_VEX(0x0f, 0x91):    /* kmov{w,q} k,mem */
4917
3
    case X86EMUL_OPC_VEX_66(0x0f, 0x91): /* kmov{b,d} k,mem */
4918
3
        generate_exception_if(ea.type != OP_MEM, X86_EXC_UD);
4919
        /* fall through */
4920
3
    case X86EMUL_OPC_VEX(0x0f, 0x90):    /* kmov{w,q} k/mem,k */
4921
4
    case X86EMUL_OPC_VEX_66(0x0f, 0x90): /* kmov{b,d} k/mem,k */
4922
4
        generate_exception_if(vex.l || !vex.r, X86_EXC_UD);
4923
1
        host_and_vcpu_must_have(avx512f);
4924
0
        if ( vex.w )
4925
0
        {
4926
0
            host_and_vcpu_must_have(avx512bw);
4927
0
            op_bytes = 4 << !vex.pfx;
4928
0
        }
4929
0
        else if ( vex.pfx )
4930
0
        {
4931
0
            host_and_vcpu_must_have(avx512dq);
4932
0
            op_bytes = 1;
4933
0
        }
4934
0
        else
4935
0
            op_bytes = 2;
4936
4937
0
        get_fpu(X86EMUL_FPU_opmask);
4938
4939
0
        opc = init_prefixes(stub);
4940
0
        opc[0] = b;
4941
0
        opc[1] = modrm;
4942
0
        if ( ea.type == OP_MEM )
4943
0
        {
4944
            /* convert memory operand to (%rAX) */
4945
0
            vex.b = 1;
4946
0
            opc[1] &= 0x38;
4947
0
        }
4948
0
        insn_bytes = PFX_BYTES + 2;
4949
0
        break;
4950
4951
1
    case X86EMUL_OPC_VEX(0x0f, 0x92):    /* kmovw r32,k */
4952
4
    case X86EMUL_OPC_VEX_66(0x0f, 0x92): /* kmovb r32,k */
4953
7
    case X86EMUL_OPC_VEX_F2(0x0f, 0x92): /* kmov{d,q} reg,k */
4954
7
        generate_exception_if(vex.l || !vex.r || vex.reg != 0xf ||
4955
7
                              ea.type != OP_REG, X86_EXC_UD);
4956
4957
1
        host_and_vcpu_must_have(avx512f);
4958
0
        if ( vex.pfx == vex_f2 )
4959
0
            host_and_vcpu_must_have(avx512bw);
4960
0
        else
4961
0
        {
4962
0
            generate_exception_if(vex.w, X86_EXC_UD);
4963
0
            if ( vex.pfx )
4964
0
                host_and_vcpu_must_have(avx512dq);
4965
0
        }
4966
4967
0
        get_fpu(X86EMUL_FPU_opmask);
4968
4969
0
        opc = init_prefixes(stub);
4970
0
        opc[0] = b;
4971
        /* Convert GPR source to %rAX. */
4972
0
        vex.b = 1;
4973
0
        if ( !mode_64bit() )
4974
0
            vex.w = 0;
4975
0
        opc[1] = modrm & 0xf8;
4976
0
        opc[2] = 0xc3;
4977
4978
0
        copy_VEX(opc, vex);
4979
0
        ea.reg = decode_gpr(&_regs, modrm_rm);
4980
0
        invoke_stub("", "", "=m" (dummy) : "a" (*ea.reg));
4981
4982
0
        put_stub(stub);
4983
4984
0
        ASSERT(!state->simd_size);
4985
0
        dst.type = OP_NONE;
4986
0
        break;
4987
4988
1
    case X86EMUL_OPC_VEX(0x0f, 0x93):    /* kmovw k,r32 */
4989
5
    case X86EMUL_OPC_VEX_66(0x0f, 0x93): /* kmovb k,r32 */
4990
8
    case X86EMUL_OPC_VEX_F2(0x0f, 0x93): /* kmov{d,q} k,reg */
4991
8
        generate_exception_if(vex.l || vex.reg != 0xf || ea.type != OP_REG,
4992
8
                              X86_EXC_UD);
4993
1
        dst = ea;
4994
1
        dst.reg = decode_gpr(&_regs, modrm_reg);
4995
4996
1
        host_and_vcpu_must_have(avx512f);
4997
0
        if ( vex.pfx == vex_f2 )
4998
0
        {
4999
0
            host_and_vcpu_must_have(avx512bw);
5000
0
            dst.bytes = 4 << (mode_64bit() && vex.w);
5001
0
        }
5002
0
        else
5003
0
        {
5004
0
            generate_exception_if(vex.w, X86_EXC_UD);
5005
0
            dst.bytes = 4;
5006
0
            if ( vex.pfx )
5007
0
                host_and_vcpu_must_have(avx512dq);
5008
0
        }
5009
5010
0
        get_fpu(X86EMUL_FPU_opmask);
5011
5012
0
        opc = init_prefixes(stub);
5013
0
        opc[0] = b;
5014
        /* Convert GPR destination to %rAX. */
5015
0
        vex.r = 1;
5016
0
        if ( !mode_64bit() )
5017
0
            vex.w = 0;
5018
0
        opc[1] = modrm & 0xc7;
5019
0
        opc[2] = 0xc3;
5020
5021
0
        copy_VEX(opc, vex);
5022
0
        invoke_stub("", "", "=a" (dst.val) : [dummy] "i" (0));
5023
5024
0
        put_stub(stub);
5025
5026
0
        ASSERT(!state->simd_size);
5027
0
        break;
5028
5029
9
    case X86EMUL_OPC_VEX(0x0f, 0x99):    /* ktest{w,q} k,k */
5030
9
        if ( !vex.w )
5031
9
            host_and_vcpu_must_have(avx512dq);
5032
        /* fall through */
5033
5
    case X86EMUL_OPC_VEX(0x0f, 0x98):    /* kortest{w,q} k,k */
5034
11
    case X86EMUL_OPC_VEX_66(0x0f, 0x98): /* kortest{b,d} k,k */
5035
12
    case X86EMUL_OPC_VEX_66(0x0f, 0x99): /* ktest{b,d} k,k */
5036
12
        generate_exception_if(vex.l || !vex.r || vex.reg != 0xf ||
5037
12
                              ea.type != OP_REG, X86_EXC_UD);
5038
1
        host_and_vcpu_must_have(avx512f);
5039
0
        if ( vex.w )
5040
0
            host_and_vcpu_must_have(avx512bw);
5041
0
        else if ( vex.pfx )
5042
0
            host_and_vcpu_must_have(avx512dq);
5043
5044
0
        get_fpu(X86EMUL_FPU_opmask);
5045
5046
0
        opc = init_prefixes(stub);
5047
0
        opc[0] = b;
5048
0
        opc[1] = modrm;
5049
0
        opc[2] = 0xc3;
5050
5051
0
        copy_VEX(opc, vex);
5052
0
        _regs.eflags &= ~EFLAGS_MASK;
5053
0
        invoke_stub("",
5054
0
                    _POST_EFLAGS("[eflags]", "[mask]", "[tmp]"),
5055
0
                    [eflags] "+g" (_regs.eflags),
5056
0
                    "=a" (dst.val), [tmp] "=&r" (dummy)
5057
0
                    : [mask] "i" (EFLAGS_MASK));
5058
5059
0
        put_stub(stub);
5060
5061
0
        ASSERT(!state->simd_size);
5062
0
        dst.type = OP_NONE;
5063
0
        break;
5064
5065
0
#endif /* !X86EMUL_NO_SIMD */
5066
5067
1.41k
    case X86EMUL_OPC(0x0f, 0xa2): /* cpuid */
5068
1.41k
        msr_val = 0;
5069
1.41k
        fail_if(ops->cpuid == NULL);
5070
5071
        /* Speculatively read MSR_INTEL_MISC_FEATURES_ENABLES. */
5072
1.41k
        if ( ops->read_msr && !mode_ring0() &&
5073
1.41k
             (rc = ops->read_msr(MSR_INTEL_MISC_FEATURES_ENABLES,
5074
470
                                 &msr_val, ctxt)) == X86EMUL_EXCEPTION )
5075
470
        {
5076
            /* Not implemented.  Squash the exception and proceed normally. */
5077
470
            x86_emul_reset_event(ctxt);
5078
470
            rc = X86EMUL_OKAY;
5079
470
        }
5080
1.41k
        if ( rc != X86EMUL_OKAY )
5081
0
            goto done;
5082
5083
1.41k
        generate_exception_if((msr_val & MSR_MISC_FEATURES_CPUID_FAULTING),
5084
1.41k
                              X86_EXC_GP, 0); /* Faulting active? (Inc. CPL test) */
5085
5086
1.41k
        rc = ops->cpuid(_regs.eax, _regs.ecx, &leaf, ctxt);
5087
1.41k
        if ( rc != X86EMUL_OKAY )
5088
0
            goto done;
5089
1.41k
        _regs.r(ax) = leaf.a;
5090
1.41k
        _regs.r(bx) = leaf.b;
5091
1.41k
        _regs.r(cx) = leaf.c;
5092
1.41k
        _regs.r(dx) = leaf.d;
5093
1.41k
        break;
5094
5095
1.00k
    case X86EMUL_OPC(0x0f, 0xa3): bt: /* bt */
5096
1.00k
        generate_exception_if(lock_prefix, X86_EXC_UD);
5097
5098
1.00k
        if ( ops->rmw && dst.type == OP_MEM &&
5099
1.00k
             (rc = read_ulong(dst.mem.seg, dst.mem.off, &dst.val,
5100
0
                              dst.bytes, ctxt, ops)) != X86EMUL_OKAY )
5101
0
            goto done;
5102
5103
1.00k
        emulate_2op_SrcV_nobyte("bt", src, dst, _regs.eflags);
5104
1.00k
        dst.type = OP_NONE;
5105
1.00k
        break;
5106
5107
535
    case X86EMUL_OPC(0x0f, 0xa4): /* shld imm8,r,r/m */
5108
752
    case X86EMUL_OPC(0x0f, 0xa5): /* shld %%cl,r,r/m */
5109
951
    case X86EMUL_OPC(0x0f, 0xac): /* shrd imm8,r,r/m */
5110
2.18k
    case X86EMUL_OPC(0x0f, 0xad): /* shrd %%cl,r,r/m */ {
5111
2.18k
        uint8_t shift, width = dst.bytes << 3;
5112
5113
2.18k
        generate_exception_if(lock_prefix, X86_EXC_UD);
5114
5115
2.18k
        if ( b & 1 )
5116
1.44k
            shift = _regs.cl;
5117
734
        else
5118
734
        {
5119
734
            shift = src.val;
5120
734
            src.reg = decode_gpr(&_regs, modrm_reg);
5121
734
            src.val = truncate_word(*src.reg, dst.bytes);
5122
734
        }
5123
5124
2.18k
        if ( ops->rmw && dst.type == OP_MEM )
5125
0
        {
5126
0
            ea.orig_val = shift;
5127
0
            state->rmw = b & 8 ? rmw_shrd : rmw_shld;
5128
0
            break;
5129
0
        }
5130
5131
2.18k
        if ( (shift &= width - 1) == 0 )
5132
277
            break;
5133
1.90k
        dst.orig_val = dst.val;
5134
1.90k
        dst.val = (b & 8) ?
5135
                  /* shrd */
5136
1.16k
                  ((dst.orig_val >> shift) |
5137
1.16k
                   truncate_word(src.val << (width - shift), dst.bytes)) :
5138
                  /* shld */
5139
1.90k
                  (truncate_word(dst.orig_val << shift, dst.bytes) |
5140
736
                   (src.val >> (width - shift)));
5141
1.90k
        _regs.eflags &= ~(X86_EFLAGS_OF | X86_EFLAGS_SF | X86_EFLAGS_ZF |
5142
1.90k
                          X86_EFLAGS_PF | X86_EFLAGS_CF);
5143
1.90k
        if ( (dst.orig_val >> ((b & 8) ? (shift - 1) : (width - shift))) & 1 )
5144
876
            _regs.eflags |= X86_EFLAGS_CF;
5145
1.90k
        if ( ((dst.val ^ dst.orig_val) >> (width - 1)) & 1 )
5146
515
            _regs.eflags |= X86_EFLAGS_OF;
5147
1.90k
        _regs.eflags |= ((dst.val >> (width - 1)) & 1) ? X86_EFLAGS_SF : 0;
5148
1.90k
        _regs.eflags |= (dst.val == 0) ? X86_EFLAGS_ZF : 0;
5149
1.90k
        _regs.eflags |= even_parity(dst.val) ? X86_EFLAGS_PF : 0;
5150
1.90k
        break;
5151
2.18k
    }
5152
5153
786
    case X86EMUL_OPC(0x0f, 0xab): bts: /* bts */
5154
786
        if ( ops->rmw && dst.type == OP_MEM )
5155
0
            state->rmw = rmw_bts;
5156
786
        else
5157
786
            emulate_2op_SrcV_nobyte("bts", src, dst, _regs.eflags);
5158
786
        break;
5159
5160
846
    case X86EMUL_OPC(0x0f, 0xae): /* Grp15 */
5161
853
    case X86EMUL_OPC_66(0x0f, 0xae):
5162
1.24k
    case X86EMUL_OPC_F3(0x0f, 0xae):
5163
1.24k
#ifndef X86EMUL_NO_SIMD
5164
1.32k
    case X86EMUL_OPC_VEX(0x0f, 0xae):
5165
1.32k
#endif
5166
1.32k
        rc = x86emul_0fae(state, &_regs, &dst, &src, ctxt, ops, &fpu_type);
5167
1.32k
        goto dispatch_from_helper;
5168
5169
558
    case X86EMUL_OPC(0x0f, 0xaf): /* imul */
5170
558
        emulate_2op_SrcV_srcmem("imul", src, dst, _regs.eflags);
5171
558
        break;
5172
5173
1.16k
    case X86EMUL_OPC(0x0f, 0xb0): case X86EMUL_OPC(0x0f, 0xb1): /* cmpxchg */
5174
1.16k
        fail_if(!ops->cmpxchg);
5175
5176
1.16k
        if ( ops->rmw && dst.type == OP_MEM &&
5177
1.16k
             (rc = read_ulong(dst.mem.seg, dst.mem.off, &dst.val,
5178
0
                              dst.bytes, ctxt, ops)) != X86EMUL_OKAY )
5179
0
            goto done;
5180
5181
1.16k
        _regs.eflags &= ~EFLAGS_MASK;
5182
1.16k
        if ( !((dst.val ^ _regs.r(ax)) &
5183
1.16k
               (~0UL >> (8 * (sizeof(long) - dst.bytes)))) )
5184
525
        {
5185
            /* Success: write back to memory. */
5186
525
            if ( dst.type == OP_MEM )
5187
203
            {
5188
203
                dst.val = _regs.r(ax);
5189
203
                switch ( rc = ops->cmpxchg(dst.mem.seg, dst.mem.off, &dst.val,
5190
203
                                           &src.val, dst.bytes, lock_prefix,
5191
203
                                           ctxt) )
5192
203
                {
5193
200
                case X86EMUL_OKAY:
5194
200
                    dst.type = OP_NONE;
5195
200
                    _regs.eflags |= X86_EFLAGS_ZF | X86_EFLAGS_PF;
5196
200
                    break;
5197
0
                case X86EMUL_CMPXCHG_FAILED:
5198
0
                    rc = X86EMUL_OKAY;
5199
0
                    break;
5200
3
                default:
5201
3
                    goto done;
5202
203
                }
5203
203
            }
5204
322
            else
5205
322
            {
5206
322
                dst.val = src.val;
5207
322
                _regs.eflags |= X86_EFLAGS_ZF | X86_EFLAGS_PF;
5208
322
            }
5209
525
        }
5210
1.16k
        if ( !(_regs.eflags & X86_EFLAGS_ZF) )
5211
639
        {
5212
            /* Failure: write the value we saw to EAX. */
5213
639
            dst.type = OP_REG;
5214
639
            dst.reg  = (unsigned long *)&_regs.r(ax);
5215
            /* cmp: %%eax - dst ==> dst and src swapped for macro invocation */
5216
639
            src.val = _regs.r(ax);
5217
639
            emulate_2op_SrcV("cmp", dst, src, _regs.eflags);
5218
639
            ASSERT(!(_regs.eflags & X86_EFLAGS_ZF));
5219
639
        }
5220
1.16k
        break;
5221
5222
1.16k
    case X86EMUL_OPC(0x0f, 0xb2): /* lss */
5223
272
    case X86EMUL_OPC(0x0f, 0xb4): /* lfs */
5224
474
    case X86EMUL_OPC(0x0f, 0xb5): /* lgs */
5225
474
        seg = b & 7;
5226
474
        goto les;
5227
5228
957
    case X86EMUL_OPC(0x0f, 0xb3): btr: /* btr */
5229
957
        if ( ops->rmw && dst.type == OP_MEM )
5230
0
            state->rmw = rmw_btr;
5231
957
        else
5232
957
            emulate_2op_SrcV_nobyte("btr", src, dst, _regs.eflags);
5233
957
        break;
5234
5235
957
    case X86EMUL_OPC(0x0f, 0xb6): /* movzx rm8,r{16,32,64} */
5236
        /* Recompute DstReg as we may have decoded AH/BH/CH/DH. */
5237
67
        dst.reg   = decode_gpr(&_regs, modrm_reg);
5238
67
        dst.bytes = op_bytes;
5239
67
        dst.val   = (uint8_t)src.val;
5240
67
        break;
5241
5242
186
    case X86EMUL_OPC(0x0f, 0xb7): /* movzx rm16,r{16,32,64} */
5243
186
        dst.val = (uint16_t)src.val;
5244
186
        break;
5245
5246
442
    case X86EMUL_OPC_F3(0x0f, 0xb8): /* popcnt r/m,r */
5247
442
        host_and_vcpu_must_have(popcnt);
5248
442
        asm ( "popcnt %1,%0" : "=r" (dst.val) : "rm" (src.val) );
5249
442
        _regs.eflags &= ~EFLAGS_MASK;
5250
442
        if ( !dst.val )
5251
202
            _regs.eflags |= X86_EFLAGS_ZF;
5252
442
        break;
5253
5254
772
    case X86EMUL_OPC(0x0f, 0xba): /* Grp8 */
5255
772
        switch ( modrm_reg & 7 )
5256
772
        {
5257
198
        case 4: goto bt;
5258
200
        case 5: goto bts;
5259
194
        case 6: goto btr;
5260
179
        case 7: goto btc;
5261
1
        default: generate_exception(X86_EXC_UD);
5262
772
        }
5263
0
        break;
5264
5265
970
    case X86EMUL_OPC(0x0f, 0xbb): btc: /* btc */
5266
970
        if ( ops->rmw && dst.type == OP_MEM )
5267
0
            state->rmw = rmw_btc;
5268
970
        else
5269
970
            emulate_2op_SrcV_nobyte("btc", src, dst, _regs.eflags);
5270
970
        break;
5271
5272
998
    case X86EMUL_OPC(0x0f, 0xbc): /* bsf or tzcnt */
5273
998
    {
5274
998
        bool zf;
5275
5276
998
        asm ( "bsf %2,%0" ASM_FLAG_OUT(, "; setz %1")
5277
998
              : "=r" (dst.val), ASM_FLAG_OUT("=@ccz", "=qm") (zf)
5278
998
              : "rm" (src.val) );
5279
998
        _regs.eflags &= ~X86_EFLAGS_ZF;
5280
998
        if ( (vex.pfx == vex_f3) && vcpu_has_bmi1() )
5281
504
        {
5282
504
            _regs.eflags &= ~X86_EFLAGS_CF;
5283
504
            if ( zf )
5284
222
            {
5285
222
                _regs.eflags |= X86_EFLAGS_CF;
5286
222
                dst.val = op_bytes * 8;
5287
222
            }
5288
282
            else if ( !dst.val )
5289
197
                _regs.eflags |= X86_EFLAGS_ZF;
5290
504
        }
5291
494
        else if ( zf )
5292
236
        {
5293
236
            _regs.eflags |= X86_EFLAGS_ZF;
5294
236
            dst.type = OP_NONE;
5295
236
        }
5296
998
        break;
5297
970
    }
5298
5299
1.04k
    case X86EMUL_OPC(0x0f, 0xbd): /* bsr or lzcnt */
5300
1.04k
    {
5301
1.04k
        bool zf;
5302
5303
1.04k
        asm ( "bsr %2,%0" ASM_FLAG_OUT(, "; setz %1")
5304
1.04k
              : "=r" (dst.val), ASM_FLAG_OUT("=@ccz", "=qm") (zf)
5305
1.04k
              : "rm" (src.val) );
5306
1.04k
        _regs.eflags &= ~X86_EFLAGS_ZF;
5307
1.04k
        if ( (vex.pfx == vex_f3) && vcpu_has_lzcnt() )
5308
611
        {
5309
611
            _regs.eflags &= ~X86_EFLAGS_CF;
5310
611
            if ( zf )
5311
212
            {
5312
212
                _regs.eflags |= X86_EFLAGS_CF;
5313
212
                dst.val = op_bytes * 8;
5314
212
            }
5315
399
            else
5316
399
            {
5317
399
                dst.val = op_bytes * 8 - 1 - dst.val;
5318
399
                if ( !dst.val )
5319
202
                    _regs.eflags |= X86_EFLAGS_ZF;
5320
399
            }
5321
611
        }
5322
434
        else if ( zf )
5323
218
        {
5324
218
            _regs.eflags |= X86_EFLAGS_ZF;
5325
218
            dst.type = OP_NONE;
5326
218
        }
5327
1.04k
        break;
5328
970
    }
5329
5330
214
    case X86EMUL_OPC(0x0f, 0xbe): /* movsx rm8,r{16,32,64} */
5331
        /* Recompute DstReg as we may have decoded AH/BH/CH/DH. */
5332
214
        dst.reg   = decode_gpr(&_regs, modrm_reg);
5333
214
        dst.bytes = op_bytes;
5334
214
        dst.val   = (int8_t)src.val;
5335
214
        break;
5336
5337
196
    case X86EMUL_OPC(0x0f, 0xbf): /* movsx rm16,r{16,32,64} */
5338
196
        dst.val = (int16_t)src.val;
5339
196
        break;
5340
5341
567
    case X86EMUL_OPC(0x0f, 0xc0): case X86EMUL_OPC(0x0f, 0xc1): /* xadd */
5342
567
        if ( ops->rmw && dst.type == OP_MEM )
5343
0
        {
5344
0
            state->rmw = rmw_xadd;
5345
0
            break;
5346
0
        }
5347
        /* Write back the register source. */
5348
567
        switch ( dst.bytes )
5349
567
        {
5350
66
        case 1: *(uint8_t  *)src.reg = (uint8_t)dst.val; break;
5351
194
        case 2: *(uint16_t *)src.reg = (uint16_t)dst.val; break;
5352
113
        case 4: *src.reg = (uint32_t)dst.val; break; /* 64b reg: zero-extend */
5353
194
        case 8: *src.reg = dst.val; break;
5354
567
        }
5355
567
        goto add;
5356
5357
5.52k
    CASE_SIMD_ALL_FP_VEX(0x0f, 0xc2):      /* cmp{p,s}{s,d} $imm8,xmm/mem,xmm */
5358
                                           /* vcmp{p,s}{s,d} $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
5359
5.71k
    CASE_SIMD_PACKED_FP_VEX(0x0f, 0xc6):   /* shufp{s,d} $imm8,xmm/mem,xmm */
5360
                                           /* vshufp{s,d} $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
5361
5.71k
        d = (d & ~SrcMask) | SrcMem;
5362
5.71k
        if ( vex.opcx == vex_none )
5363
863
        {
5364
863
            if ( vex.pfx & VEX_PREFIX_DOUBLE_MASK )
5365
450
                goto simd_0f_imm8_sse2;
5366
413
            vcpu_must_have(sse);
5367
413
            get_fpu(X86EMUL_FPU_xmm);
5368
411
            goto simd_0f_imm8;
5369
413
        }
5370
664
        goto simd_0f_imm8_avx;
5371
5372
664
#ifndef X86EMUL_NO_SIMD
5373
5374
664
    CASE_SIMD_ALL_FP(_EVEX, 0x0f, 0xc2): /* vcmp{p,s}{s,d} $imm8,[xyz]mm/mem,[xyz]mm,k{k} */
5375
20
        generate_exception_if((evex.w != (evex.pfx & VEX_PREFIX_DOUBLE_MASK) ||
5376
20
                               (ea.type != OP_REG && evex.brs &&
5377
20
                                (evex.pfx & VEX_PREFIX_SCALAR_MASK)) ||
5378
20
                               !evex.r || !evex.R || evex.z),
5379
20
                              X86_EXC_UD);
5380
1
        host_and_vcpu_must_have(avx512f);
5381
0
        if ( ea.type != OP_REG || !evex.brs )
5382
0
            avx512_vlen_check(evex.pfx & VEX_PREFIX_SCALAR_MASK);
5383
0
    simd_imm8_zmm:
5384
0
        if ( (d & SrcMask) == SrcImmByte )
5385
0
            d = (d & ~SrcMask) | SrcMem;
5386
0
        get_fpu(X86EMUL_FPU_zmm);
5387
0
        opc = init_evex(stub);
5388
0
        opc[0] = b;
5389
0
        opc[1] = modrm;
5390
0
        if ( ea.type == OP_MEM )
5391
0
        {
5392
            /* convert memory operand to (%rAX) */
5393
0
            evex.b = 1;
5394
0
            opc[1] &= 0x38;
5395
0
        }
5396
0
        opc[2] = imm1;
5397
0
        insn_bytes = EVEX_PFX_BYTES + 3;
5398
0
        break;
5399
5400
0
#endif /* !X86EMUL_NO_SIMD */
5401
5402
194
    case X86EMUL_OPC(0x0f, 0xc3): /* movnti */
5403
        /* Ignore the non-temporal hint for now. */
5404
194
        vcpu_must_have(sse2);
5405
194
        dst.val = src.val;
5406
194
        sfence = true;
5407
194
        break;
5408
5409
903
    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xc4):  /* pinsrw $imm8,r32/m16,{,x}mm */
5410
                                           /* vpinsrw $imm8,r32/m16,xmm,xmm */
5411
903
        generate_exception_if(vex.l, X86_EXC_UD);
5412
441
        memcpy(mmvalp, &src.val, 2);
5413
441
        ea.type = OP_MEM;
5414
441
        state->simd_size = simd_other;
5415
441
        goto simd_0f_int_imm8;
5416
5417
0
#ifndef X86EMUL_NO_SIMD
5418
5419
2
    case X86EMUL_OPC_EVEX_66(0x0f, 0xc4):   /* vpinsrw $imm8,r32/m16,xmm,xmm */
5420
3
    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x20): /* vpinsrb $imm8,r32/m8,xmm,xmm */
5421
6
    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x22): /* vpinsr{d,q} $imm8,r/m,xmm,xmm */
5422
6
        generate_exception_if(evex.lr || evex.opmsk || evex.brs, X86_EXC_UD);
5423
2
        if ( b & 2 )
5424
2
            host_and_vcpu_must_have(avx512dq);
5425
1
        else
5426
2
            host_and_vcpu_must_have(avx512bw);
5427
0
        if ( !mode_64bit() )
5428
0
            evex.w = 0;
5429
0
        memcpy(mmvalp, &src.val, src.bytes);
5430
0
        ea.type = OP_MEM;
5431
0
        d = SrcMem16; /* Fake for the common SIMD code below. */
5432
0
        state->simd_size = simd_other;
5433
0
        goto avx512f_imm8_no_sae;
5434
5435
0
#endif /* !X86EMUL_NO_SIMD */
5436
5437
977
    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xc5):  /* pextrw $imm8,{,x}mm,reg */
5438
                                           /* vpextrw $imm8,xmm,reg */
5439
977
        generate_exception_if(vex.l, X86_EXC_UD);
5440
629
        opc = init_prefixes(stub);
5441
0
        opc[0] = b;
5442
        /* Convert GPR destination to %rAX. */
5443
629
        rex_prefix &= ~REX_R;
5444
629
        vex.r = 1;
5445
629
        if ( !mode_64bit() )
5446
262
            vex.w = 0;
5447
629
        opc[1] = modrm & 0xc7;
5448
629
        opc[2] = imm1;
5449
629
        insn_bytes = PFX_BYTES + 3;
5450
629
        goto simd_0f_to_gpr;
5451
5452
0
#ifndef X86EMUL_NO_SIMD
5453
5454
4
    CASE_SIMD_PACKED_FP(_EVEX, 0x0f, 0xc6): /* vshufp{s,d} $imm8,[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
5455
4
        generate_exception_if(evex.w != (evex.pfx & VEX_PREFIX_DOUBLE_MASK),
5456
4
                              X86_EXC_UD);
5457
        /* fall through */
5458
3
    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x03): /* valign{d,q} $imm8,[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
5459
3
        fault_suppression = false;
5460
        /* fall through */
5461
4
    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x25): /* vpternlog{d,q} $imm8,[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
5462
17
    avx512f_imm8_no_sae:
5463
17
        host_and_vcpu_must_have(avx512f);
5464
0
        generate_exception_if(ea.type != OP_MEM && evex.brs, X86_EXC_UD);
5465
0
        avx512_vlen_check(false);
5466
0
        goto simd_imm8_zmm;
5467
5468
0
#endif /* X86EMUL_NO_SIMD */
5469
5470
1.87k
    case X86EMUL_OPC(0x0f, 0xc7): /* Grp9 */
5471
1.87k
        rc =  x86emul_0fc7(state, &_regs, &dst, ctxt, ops, mmvalp);
5472
1.87k
        goto dispatch_from_helper;
5473
5474
1.56k
    case X86EMUL_OPC(0x0f, 0xc8) ... X86EMUL_OPC(0x0f, 0xcf): /* bswap */
5475
1.56k
        dst.type = OP_REG;
5476
1.56k
        dst.reg  = decode_gpr(&_regs, (b & 7) | ((rex_prefix & 1) << 3));
5477
1.56k
        switch ( dst.bytes = op_bytes )
5478
1.56k
        {
5479
694
        default: /* case 2: */
5480
            /* Undefined behaviour. Writes zero on all tested CPUs. */
5481
694
            dst.val = 0;
5482
694
            break;
5483
494
        case 4:
5484
494
#ifdef __x86_64__
5485
494
            asm ( "bswap %k0" : "=r" (dst.val) : "0" (*(uint32_t *)dst.reg) );
5486
494
            break;
5487
380
        case 8:
5488
380
#endif
5489
380
            asm ( "bswap %0" : "=r" (dst.val) : "0" (*dst.reg) );
5490
380
            break;
5491
1.56k
        }
5492
1.56k
        break;
5493
5494
1.56k
#ifndef X86EMUL_NO_SIMD
5495
5496
1.56k
    case X86EMUL_OPC_EVEX_66(0x0f, 0xd2): /* vpsrld xmm/m128,[xyz]mm,[xyz]mm{k} */
5497
3
    case X86EMUL_OPC_EVEX_66(0x0f, 0xd3): /* vpsrlq xmm/m128,[xyz]mm,[xyz]mm{k} */
5498
5
    case X86EMUL_OPC_EVEX_66(0x0f, 0xe2): /* vpsra{d,q} xmm/m128,[xyz]mm,[xyz]mm{k} */
5499
6
    case X86EMUL_OPC_EVEX_66(0x0f, 0xf2): /* vpslld xmm/m128,[xyz]mm,[xyz]mm{k} */
5500
7
    case X86EMUL_OPC_EVEX_66(0x0f, 0xf3): /* vpsllq xmm/m128,[xyz]mm,[xyz]mm{k} */
5501
7
        generate_exception_if(evex.brs, X86_EXC_UD);
5502
        /* fall through */
5503
5
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x0c): /* vpermilps [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
5504
6
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x0d): /* vpermilpd [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
5505
6
        fault_suppression = false;
5506
6
        if ( b == 0xe2 )
5507
1
            goto avx512f_no_sae;
5508
        /* fall through */
5509
6
    case X86EMUL_OPC_EVEX_66(0x0f, 0xfa): /* vpsubd [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
5510
6
    case X86EMUL_OPC_EVEX_66(0x0f, 0xfb): /* vpsubq [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
5511
7
    case X86EMUL_OPC_EVEX_66(0x0f, 0xfe): /* vpaddd [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
5512
7
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x1e): /* vpabsd [xyz]mm/mem,[xyz]mm{k} */
5513
8
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x1f): /* vpabsq [xyz]mm/mem,[xyz]mm{k} */
5514
8
        generate_exception_if(evex.w != (b & 1), X86_EXC_UD);
5515
5
        goto avx512f_no_sae;
5516
5517
5
#endif /* !X86EMUL_NO_SIMD */
5518
5
#ifndef X86EMUL_NO_MMX
5519
5520
380
    case X86EMUL_OPC(0x0f, 0xd4):        /* paddq mm/m64,mm */
5521
465
    case X86EMUL_OPC(0x0f, 0xf4):        /* pmuludq mm/m64,mm */
5522
534
    case X86EMUL_OPC(0x0f, 0xfb):        /* psubq mm/m64,mm */
5523
534
        vcpu_must_have(sse2);
5524
534
        goto simd_0f_mmx;
5525
5526
534
#endif /* !X86EMUL_NO_MMX */
5527
534
#if !defined(X86EMUL_NO_MMX) && !defined(X86EMUL_NO_SIMD)
5528
5529
534
    case X86EMUL_OPC_F3(0x0f, 0xd6):     /* movq2dq mm,xmm */
5530
387
    case X86EMUL_OPC_F2(0x0f, 0xd6):     /* movdq2q xmm,mm */
5531
387
        generate_exception_if(ea.type != OP_REG, X86_EXC_UD);
5532
381
        op_bytes = 8;
5533
381
        host_and_vcpu_must_have(mmx);
5534
381
        goto simd_0f_int;
5535
5536
381
#endif /* !X86EMUL_NO_MMX && !X86EMUL_NO_SIMD */
5537
381
#ifndef X86EMUL_NO_MMX
5538
5539
381
    case X86EMUL_OPC(0x0f, 0xe7):        /* movntq mm,m64 */
5540
225
        generate_exception_if(ea.type != OP_MEM, X86_EXC_UD);
5541
224
        sfence = true;
5542
        /* fall through */
5543
347
    case X86EMUL_OPC(0x0f, 0xda):        /* pminub mm/m64,mm */
5544
382
    case X86EMUL_OPC(0x0f, 0xde):        /* pmaxub mm/m64,mm */
5545
709
    case X86EMUL_OPC(0x0f, 0xea):        /* pminsw mm/m64,mm */
5546
942
    case X86EMUL_OPC(0x0f, 0xee):        /* pmaxsw mm/m64,mm */
5547
1.13k
    case X86EMUL_OPC(0x0f, 0xe0):        /* pavgb mm/m64,mm */
5548
1.34k
    case X86EMUL_OPC(0x0f, 0xe3):        /* pavgw mm/m64,mm */
5549
1.53k
    case X86EMUL_OPC(0x0f, 0xe4):        /* pmulhuw mm/m64,mm */
5550
2.06k
    case X86EMUL_OPC(0x0f, 0xf6):        /* psadbw mm/m64,mm */
5551
2.06k
        vcpu_must_have(mmxext);
5552
2.06k
        goto simd_0f_mmx;
5553
5554
2.06k
#endif /* !X86EMUL_NO_MMX */
5555
2.06k
#ifndef X86EMUL_NO_SIMD
5556
5557
2.06k
    case X86EMUL_OPC_EVEX_66(0x0f, 0xda): /* vpminub [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
5558
2
    case X86EMUL_OPC_EVEX_66(0x0f, 0xde): /* vpmaxub [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
5559
3
    case X86EMUL_OPC_EVEX_66(0x0f, 0xe4): /* vpmulhuw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
5560
4
    case X86EMUL_OPC_EVEX_66(0x0f, 0xea): /* vpminsw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
5561
5
    case X86EMUL_OPC_EVEX_66(0x0f, 0xee): /* vpmaxsw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
5562
5
        host_and_vcpu_must_have(avx512bw);
5563
0
        generate_exception_if(evex.brs, X86_EXC_UD);
5564
0
        elem_bytes = b & 0x10 ? 1 : 2;
5565
0
        goto avx512f_no_sae;
5566
5567
101
    case X86EMUL_OPC_66(0x0f, 0xe6):       /* cvttpd2dq xmm/mem,xmm */
5568
135
    case X86EMUL_OPC_VEX_66(0x0f, 0xe6):   /* vcvttpd2dq {x,y}mm/mem,xmm */
5569
201
    case X86EMUL_OPC_F3(0x0f, 0xe6):       /* cvtdq2pd xmm/mem,xmm */
5570
235
    case X86EMUL_OPC_VEX_F3(0x0f, 0xe6):   /* vcvtdq2pd xmm/mem,{x,y}mm */
5571
301
    case X86EMUL_OPC_F2(0x0f, 0xe6):       /* cvtpd2dq xmm/mem,xmm */
5572
335
    case X86EMUL_OPC_VEX_F2(0x0f, 0xe6):   /* vcvtpd2dq {x,y}mm/mem,xmm */
5573
335
        d |= TwoOp;
5574
335
        op_bytes = 8 << (!!(vex.pfx & VEX_PREFIX_DOUBLE_MASK) + vex.l);
5575
335
        goto simd_0f_cvt;
5576
5577
0
#endif /* !X86EMUL_NO_SIMD */
5578
5579
1.19k
    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xf7): /* {,v}maskmov{q,dqu} {,x}mm,{,x}mm */
5580
1.19k
        generate_exception_if(ea.type != OP_REG, X86_EXC_UD);
5581
508
        if ( vex.opcx != vex_none )
5582
128
        {
5583
128
            generate_exception_if(vex.l || vex.reg != 0xf, X86_EXC_UD);
5584
126
            d |= TwoOp;
5585
126
            host_and_vcpu_must_have(avx);
5586
126
            get_fpu(X86EMUL_FPU_ymm);
5587
126
        }
5588
380
        else if ( vex.pfx )
5589
81
        {
5590
81
            vcpu_must_have(sse2);
5591
81
            get_fpu(X86EMUL_FPU_xmm);
5592
81
        }
5593
299
        else
5594
299
        {
5595
299
            host_and_vcpu_must_have(mmx);
5596
299
            vcpu_must_have(mmxext);
5597
299
            get_fpu(X86EMUL_FPU_mmx);
5598
299
        }
5599
5600
        /*
5601
         * While we can't reasonably provide fully correct behavior here
5602
         * (in particular avoiding the memory read in anticipation of all
5603
         * bytes in the range eventually being written), we can (and should)
5604
         * still suppress the memory access if all mask bits are clear. Read
5605
         * the mask bits via {,v}pmovmskb for that purpose.
5606
         */
5607
503
        opc = init_prefixes(stub);
5608
0
        opc[0] = 0xd7; /* {,v}pmovmskb */
5609
        /* (Ab)use "sfence" for latching the original REX.R / VEX.R. */
5610
503
        sfence = rex_prefix & REX_R;
5611
        /* Convert GPR destination to %rAX. */
5612
503
        rex_prefix &= ~REX_R;
5613
503
        vex.r = 1;
5614
503
        if ( !mode_64bit() )
5615
208
            vex.w = 0;
5616
503
        opc[1] = modrm & 0xc7;
5617
503
        opc[2] = 0xc3;
5618
5619
503
        copy_REX_VEX(opc, rex_prefix, vex);
5620
503
        invoke_stub("", "", "=a" (ea.val) : [dummy] "i" (0));
5621
5622
503
        put_stub(stub);
5623
503
        if ( !ea.val )
5624
14
            goto complete_insn;
5625
5626
489
        opc = init_prefixes(stub);
5627
0
        opc[0] = b;
5628
489
        opc[1] = modrm;
5629
489
        insn_bytes = PFX_BYTES + 2;
5630
        /* Restore high bit of XMM destination. */
5631
489
        if ( sfence )
5632
200
        {
5633
200
            rex_prefix |= REX_R;
5634
200
            vex.r = 0;
5635
200
        }
5636
5637
489
        ea.type = OP_MEM;
5638
489
        ea.mem.off = truncate_ea(_regs.r(di));
5639
489
        sfence = true;
5640
489
        break;
5641
5642
508
    CASE_SIMD_PACKED_INT(0x0f38, 0x00): /* pshufb {,x}mm/mem,{,x}mm */
5643
1.04k
    CASE_SIMD_PACKED_INT(0x0f38, 0x01): /* phaddw {,x}mm/mem,{,x}mm */
5644
1.82k
    CASE_SIMD_PACKED_INT(0x0f38, 0x02): /* phaddd {,x}mm/mem,{,x}mm */
5645
2.08k
    CASE_SIMD_PACKED_INT(0x0f38, 0x03): /* phaddsw {,x}mm/mem,{,x}mm */
5646
2.58k
    CASE_SIMD_PACKED_INT(0x0f38, 0x04): /* pmaddubsw {,x}mm/mem,{,x}mm */
5647
3.33k
    CASE_SIMD_PACKED_INT(0x0f38, 0x05): /* phsubw {,x}mm/mem,{,x}mm */
5648
4.75k
    CASE_SIMD_PACKED_INT(0x0f38, 0x06): /* phsubd {,x}mm/mem,{,x}mm */
5649
5.32k
    CASE_SIMD_PACKED_INT(0x0f38, 0x07): /* phsubsw {,x}mm/mem,{,x}mm */
5650
5.88k
    CASE_SIMD_PACKED_INT(0x0f38, 0x08): /* psignb {,x}mm/mem,{,x}mm */
5651
6.59k
    CASE_SIMD_PACKED_INT(0x0f38, 0x09): /* psignw {,x}mm/mem,{,x}mm */
5652
7.26k
    CASE_SIMD_PACKED_INT(0x0f38, 0x0a): /* psignd {,x}mm/mem,{,x}mm */
5653
7.79k
    CASE_SIMD_PACKED_INT(0x0f38, 0x0b): /* pmulhrsw {,x}mm/mem,{,x}mm */
5654
8.30k
    CASE_SIMD_PACKED_INT(0x0f38, 0x1c): /* pabsb {,x}mm/mem,{,x}mm */
5655
8.56k
    CASE_SIMD_PACKED_INT(0x0f38, 0x1d): /* pabsw {,x}mm/mem,{,x}mm */
5656
8.76k
    CASE_SIMD_PACKED_INT(0x0f38, 0x1e): /* pabsd {,x}mm/mem,{,x}mm */
5657
8.76k
        host_and_vcpu_must_have(ssse3);
5658
4.41k
        if ( vex.pfx )
5659
1.51k
        {
5660
5.47k
    simd_0f38_common:
5661
5.47k
            get_fpu(X86EMUL_FPU_xmm);
5662
5.47k
        }
5663
2.90k
        else
5664
2.90k
        {
5665
2.90k
            host_and_vcpu_must_have(mmx);
5666
2.90k
            get_fpu(X86EMUL_FPU_mmx);
5667
2.90k
        }
5668
8.34k
        opc = init_prefixes(stub);
5669
0
        opc[0] = 0x38;
5670
8.34k
        opc[1] = b;
5671
8.34k
        opc[2] = modrm;
5672
8.34k
        if ( ea.type == OP_MEM )
5673
4.50k
        {
5674
            /* Convert memory operand to (%rAX). */
5675
4.50k
            rex_prefix &= ~REX_B;
5676
4.50k
            vex.b = 1;
5677
4.50k
            opc[2] &= 0x38;
5678
4.50k
        }
5679
8.34k
        insn_bytes = PFX_BYTES + 3;
5680
8.34k
        break;
5681
5682
0
#ifndef X86EMUL_NO_SIMD
5683
5684
198
    case X86EMUL_OPC_VEX_66(0x0f38, 0x19): /* vbroadcastsd xmm/m64,ymm */
5685
265
    case X86EMUL_OPC_VEX_66(0x0f38, 0x1a): /* vbroadcastf128 m128,ymm */
5686
265
        generate_exception_if(!vex.l, X86_EXC_UD);
5687
        /* fall through */
5688
331
    case X86EMUL_OPC_VEX_66(0x0f38, 0x18): /* vbroadcastss xmm/m32,{x,y}mm */
5689
331
        if ( ea.type != OP_MEM )
5690
195
        {
5691
195
            generate_exception_if(b & 2, X86_EXC_UD);
5692
194
            host_and_vcpu_must_have(avx2);
5693
194
        }
5694
        /* fall through */
5695
364
    case X86EMUL_OPC_VEX_66(0x0f38, 0x0c): /* vpermilps {x,y}mm/mem,{x,y}mm,{x,y}mm */
5696
438
    case X86EMUL_OPC_VEX_66(0x0f38, 0x0d): /* vpermilpd {x,y}mm/mem,{x,y}mm,{x,y}mm */
5697
438
        generate_exception_if(vex.w, X86_EXC_UD);
5698
432
        goto simd_0f_avx;
5699
5700
495
    case X86EMUL_OPC_VEX_66(0x0f38, 0x0e): /* vtestps {x,y}mm/mem,{x,y}mm */
5701
569
    case X86EMUL_OPC_VEX_66(0x0f38, 0x0f): /* vtestpd {x,y}mm/mem,{x,y}mm */
5702
569
        generate_exception_if(vex.w, X86_EXC_UD);
5703
        /* fall through */
5704
636
    case X86EMUL_OPC_66(0x0f38, 0x17):     /* ptest xmm/m128,xmm */
5705
671
    case X86EMUL_OPC_VEX_66(0x0f38, 0x17): /* vptest {x,y}mm/mem,{x,y}mm */
5706
671
        if ( vex.opcx == vex_none )
5707
73
        {
5708
73
            host_and_vcpu_must_have(sse4_1);
5709
73
            get_fpu(X86EMUL_FPU_xmm);
5710
73
        }
5711
598
        else
5712
598
        {
5713
598
            generate_exception_if(vex.reg != 0xf, X86_EXC_UD);
5714
596
            host_and_vcpu_must_have(avx);
5715
596
            get_fpu(X86EMUL_FPU_ymm);
5716
596
        }
5717
5718
667
        opc = init_prefixes(stub);
5719
667
        if ( vex.opcx == vex_none )
5720
72
            opc++[0] = 0x38;
5721
667
        opc[0] = b;
5722
667
        opc[1] = modrm;
5723
667
        if ( ea.type == OP_MEM )
5724
140
        {
5725
140
            rc = ops->read(ea.mem.seg, ea.mem.off, mmvalp, 16 << vex.l, ctxt);
5726
140
            if ( rc != X86EMUL_OKAY )
5727
11
                goto done;
5728
5729
            /* Convert memory operand to (%rAX). */
5730
129
            rex_prefix &= ~REX_B;
5731
129
            vex.b = 1;
5732
129
            opc[1] &= 0x38;
5733
129
        }
5734
656
        insn_bytes = PFX_BYTES + 2;
5735
656
        opc[2] = 0xc3;
5736
656
        if ( vex.opcx == vex_none )
5737
71
        {
5738
            /* Cover for extra prefix byte. */
5739
71
            --opc;
5740
71
            ++insn_bytes;
5741
71
        }
5742
5743
656
        copy_REX_VEX(opc, rex_prefix, vex);
5744
656
        emulate_stub("+m" (*mmvalp), "a" (mmvalp));
5745
5746
656
        put_stub(stub);
5747
656
        state->simd_size = simd_none;
5748
656
        dst.type = OP_NONE;
5749
656
        break;
5750
5751
66
    case X86EMUL_OPC_66(0x0f38, 0x20): /* pmovsxbw xmm/m64,xmm */
5752
260
    case X86EMUL_OPC_66(0x0f38, 0x21): /* pmovsxbd xmm/m32,xmm */
5753
454
    case X86EMUL_OPC_66(0x0f38, 0x22): /* pmovsxbq xmm/m16,xmm */
5754
674
    case X86EMUL_OPC_66(0x0f38, 0x23): /* pmovsxwd xmm/m64,xmm */
5755
734
    case X86EMUL_OPC_66(0x0f38, 0x24): /* pmovsxwq xmm/m32,xmm */
5756
930
    case X86EMUL_OPC_66(0x0f38, 0x25): /* pmovsxdq xmm/m64,xmm */
5757
998
    case X86EMUL_OPC_66(0x0f38, 0x30): /* pmovzxbw xmm/m64,xmm */
5758
1.06k
    case X86EMUL_OPC_66(0x0f38, 0x31): /* pmovzxbd xmm/m32,xmm */
5759
1.12k
    case X86EMUL_OPC_66(0x0f38, 0x32): /* pmovzxbq xmm/m16,xmm */
5760
1.16k
    case X86EMUL_OPC_66(0x0f38, 0x33): /* pmovzxwd xmm/m64,xmm */
5761
1.24k
    case X86EMUL_OPC_66(0x0f38, 0x34): /* pmovzxwq xmm/m32,xmm */
5762
1.45k
    case X86EMUL_OPC_66(0x0f38, 0x35): /* pmovzxdq xmm/m64,xmm */
5763
1.45k
        op_bytes = 16 >> pmov_convert_delta[b & 7];
5764
        /* fall through */
5765
1.71k
    case X86EMUL_OPC_66(0x0f38, 0x10): /* pblendvb XMM0,xmm/m128,xmm */
5766
2.02k
    case X86EMUL_OPC_66(0x0f38, 0x14): /* blendvps XMM0,xmm/m128,xmm */
5767
2.09k
    case X86EMUL_OPC_66(0x0f38, 0x15): /* blendvpd XMM0,xmm/m128,xmm */
5768
2.16k
    case X86EMUL_OPC_66(0x0f38, 0x28): /* pmuldq xmm/m128,xmm */
5769
2.23k
    case X86EMUL_OPC_66(0x0f38, 0x29): /* pcmpeqq xmm/m128,xmm */
5770
2.42k
    case X86EMUL_OPC_66(0x0f38, 0x2b): /* packusdw xmm/m128,xmm */
5771
2.61k
    case X86EMUL_OPC_66(0x0f38, 0x38): /* pminsb xmm/m128,xmm */
5772
2.71k
    case X86EMUL_OPC_66(0x0f38, 0x39): /* pminsd xmm/m128,xmm */
5773
2.78k
    case X86EMUL_OPC_66(0x0f38, 0x3a): /* pminub xmm/m128,xmm */
5774
2.86k
    case X86EMUL_OPC_66(0x0f38, 0x3b): /* pminud xmm/m128,xmm */
5775
3.06k
    case X86EMUL_OPC_66(0x0f38, 0x3c): /* pmaxsb xmm/m128,xmm */
5776
3.12k
    case X86EMUL_OPC_66(0x0f38, 0x3d): /* pmaxsd xmm/m128,xmm */
5777
3.19k
    case X86EMUL_OPC_66(0x0f38, 0x3e): /* pmaxub xmm/m128,xmm */
5778
3.22k
    case X86EMUL_OPC_66(0x0f38, 0x3f): /* pmaxud xmm/m128,xmm */
5779
3.29k
    case X86EMUL_OPC_66(0x0f38, 0x40): /* pmulld xmm/m128,xmm */
5780
3.48k
    case X86EMUL_OPC_66(0x0f38, 0x41): /* phminposuw xmm/m128,xmm */
5781
3.48k
        host_and_vcpu_must_have(sse4_1);
5782
3.48k
        goto simd_0f38_common;
5783
5784
3.48k
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x10): /* vpsrlvw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
5785
2
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x11): /* vpsravw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
5786
3
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x12): /* vpsllvw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
5787
3
        host_and_vcpu_must_have(avx512bw);
5788
0
        generate_exception_if(!evex.w || evex.brs, X86_EXC_UD);
5789
0
        elem_bytes = 2;
5790
0
        goto avx512f_no_sae;
5791
5792
1
    case X86EMUL_OPC_EVEX_F3(0x0f38, 0x10): /* vpmovuswb [xyz]mm,{x,y}mm/mem{k} */
5793
2
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x20): /* vpmovsxbw {x,y}mm/mem,[xyz]mm{k} */
5794
3
    case X86EMUL_OPC_EVEX_F3(0x0f38, 0x20): /* vpmovswb [xyz]mm,{x,y}mm/mem{k} */
5795
4
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x30): /* vpmovzxbw {x,y}mm/mem,[xyz]mm{k} */
5796
5
    case X86EMUL_OPC_EVEX_F3(0x0f38, 0x30): /* vpmovwb [xyz]mm,{x,y}mm/mem{k} */
5797
5
        host_and_vcpu_must_have(avx512bw);
5798
0
        if ( evex.pfx != vex_f3 )
5799
0
        {
5800
1
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x21): /* vpmovsxbd xmm/mem,[xyz]mm{k} */
5801
2
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x22): /* vpmovsxbq xmm/mem,[xyz]mm{k} */
5802
3
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x23): /* vpmovsxwd {x,y}mm/mem,[xyz]mm{k} */
5803
4
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x24): /* vpmovsxwq xmm/mem,[xyz]mm{k} */
5804
5
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x25): /* vpmovsxdq {x,y}mm/mem,[xyz]mm{k} */
5805
6
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x31): /* vpmovzxbd xmm/mem,[xyz]mm{k} */
5806
7
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x32): /* vpmovzxbq xmm/mem,[xyz]mm{k} */
5807
8
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x33): /* vpmovzxwd {x,y}mm/mem,[xyz]mm{k} */
5808
9
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x34): /* vpmovzxwq xmm/mem,[xyz]mm{k} */
5809
10
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x35): /* vpmovzxdq {x,y}mm/mem,[xyz]mm{k} */
5810
10
            generate_exception_if(evex.w && (b & 7) == 5, X86_EXC_UD);
5811
10
        }
5812
0
        else
5813
0
        {
5814
1
    case X86EMUL_OPC_EVEX_F3(0x0f38, 0x11): /* vpmovusdb [xyz]mm,xmm/mem{k} */
5815
2
    case X86EMUL_OPC_EVEX_F3(0x0f38, 0x12): /* vpmovusqb [xyz]mm,xmm/mem{k} */
5816
4
    case X86EMUL_OPC_EVEX_F3(0x0f38, 0x13): /* vpmovusdw [xyz]mm,{x,y}mm/mem{k} */
5817
5
    case X86EMUL_OPC_EVEX_F3(0x0f38, 0x14): /* vpmovusqw [xyz]mm,xmm/mem{k} */
5818
6
    case X86EMUL_OPC_EVEX_F3(0x0f38, 0x15): /* vpmovusqd [xyz]mm,{x,y}mm/mem{k} */
5819
7
    case X86EMUL_OPC_EVEX_F3(0x0f38, 0x21): /* vpmovsdb [xyz]mm,xmm/mem{k} */
5820
8
    case X86EMUL_OPC_EVEX_F3(0x0f38, 0x22): /* vpmovsqb [xyz]mm,xmm/mem{k} */
5821
9
    case X86EMUL_OPC_EVEX_F3(0x0f38, 0x23): /* vpmovsdw [xyz]mm,{x,y}mm/mem{k} */
5822
10
    case X86EMUL_OPC_EVEX_F3(0x0f38, 0x24): /* vpmovsqw [xyz]mm,xmm/mem{k} */
5823
11
    case X86EMUL_OPC_EVEX_F3(0x0f38, 0x25): /* vpmovsqd [xyz]mm,{x,y}mm/mem{k} */
5824
12
    case X86EMUL_OPC_EVEX_F3(0x0f38, 0x31): /* vpmovdb [xyz]mm,xmm/mem{k} */
5825
13
    case X86EMUL_OPC_EVEX_F3(0x0f38, 0x32): /* vpmovqb [xyz]mm,xmm/mem{k} */
5826
14
    case X86EMUL_OPC_EVEX_F3(0x0f38, 0x33): /* vpmovdw [xyz]mm,{x,y}mm/mem{k} */
5827
15
    case X86EMUL_OPC_EVEX_F3(0x0f38, 0x34): /* vpmovqw [xyz]mm,xmm/mem{k} */
5828
16
    case X86EMUL_OPC_EVEX_F3(0x0f38, 0x35): /* vpmovqd [xyz]mm,{x,y}mm/mem{k} */
5829
16
            generate_exception_if(evex.w || (ea.type != OP_REG && evex.z), X86_EXC_UD);
5830
11
            d = DstMem | SrcReg | TwoOp;
5831
11
        }
5832
20
        generate_exception_if(evex.brs, X86_EXC_UD);
5833
14
        op_bytes = 64 >> (pmov_convert_delta[b & 7] + 2 - evex.lr);
5834
14
        elem_bytes = (b & 7) < 3 ? 1 : (b & 7) != 5 ? 2 : 4;
5835
14
        goto avx512f_no_sae;
5836
5837
77
    case X86EMUL_OPC_VEX_66(0x0f38, 0x13): /* vcvtph2ps xmm/mem,{x,y}mm */
5838
77
        generate_exception_if(vex.w, X86_EXC_UD);
5839
71
        host_and_vcpu_must_have(f16c);
5840
71
        op_bytes = 8 << vex.l;
5841
71
        goto simd_0f_ymm;
5842
5843
4
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x13): /* vcvtph2ps {x,y}mm/mem,[xyz]mm{k} */
5844
4
        generate_exception_if(evex.w || (ea.type != OP_REG && evex.brs), X86_EXC_UD);
5845
2
        host_and_vcpu_must_have(avx512f);
5846
0
        if ( !evex.brs )
5847
0
            avx512_vlen_check(false);
5848
0
        op_bytes = 8 << evex.lr;
5849
0
        elem_bytes = 2;
5850
0
        goto simd_zmm;
5851
5852
69
    case X86EMUL_OPC_VEX_66(0x0f38, 0x16): /* vpermps ymm/m256,ymm,ymm */
5853
270
    case X86EMUL_OPC_VEX_66(0x0f38, 0x36): /* vpermd ymm/m256,ymm,ymm */
5854
270
        generate_exception_if(!vex.l || vex.w, X86_EXC_UD);
5855
263
        goto simd_0f_avx2;
5856
5857
263
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x16): /* vpermp{s,d} {y,z}mm/mem,{y,z}mm,{y,z}mm{k} */
5858
3
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x36): /* vperm{d,q} {y,z}mm/mem,{y,z}mm,{y,z}mm{k} */
5859
3
        generate_exception_if(!evex.lr, X86_EXC_UD);
5860
1
        fault_suppression = false;
5861
1
        goto avx512f_no_sae;
5862
5863
1
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x18): /* vbroadcastss xmm/m32,[xyz]mm{k} */
5864
3
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x58): /* vpbroadcastd xmm/m32,[xyz]mm{k} */
5865
3
        op_bytes = elem_bytes;
5866
3
        generate_exception_if(evex.w || evex.brs, X86_EXC_UD);
5867
3
    avx512_broadcast:
5868
        /*
5869
         * For the respective code below the main switch() to work we need to
5870
         * fold op_mask here: A source element gets read whenever any of its
5871
         * respective destination elements' mask bits is set.
5872
         */
5873
3
        if ( fault_suppression )
5874
0
        {
5875
0
            n = 1 << ((b & 3) - evex.w);
5876
0
            EXPECT(elem_bytes > 0);
5877
0
            ASSERT(op_bytes == n * elem_bytes);
5878
0
            for ( i = n; i < (16 << evex.lr) / elem_bytes; i += n )
5879
0
                op_mask |= (op_mask >> i) & ((1 << n) - 1);
5880
0
        }
5881
3
        goto avx512f_no_sae;
5882
5883
3
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x1b): /* vbroadcastf32x8 m256,zmm{k} */
5884
                                            /* vbroadcastf64x4 m256,zmm{k} */
5885
3
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x5b): /* vbroadcasti32x8 m256,zmm{k} */
5886
                                            /* vbroadcasti64x4 m256,zmm{k} */
5887
3
        generate_exception_if(ea.type != OP_MEM || evex.lr != 2, X86_EXC_UD);
5888
        /* fall through */
5889
4
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x19): /* vbroadcastsd xmm/m64,{y,z}mm{k} */
5890
                                            /* vbroadcastf32x2 xmm/m64,{y,z}mm{k} */
5891
4
        generate_exception_if(!evex.lr, X86_EXC_UD);
5892
        /* fall through */
5893
4
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x59): /* vpbroadcastq xmm/m64,[xyz]mm{k} */
5894
                                            /* vbroadcasti32x2 xmm/m64,[xyz]mm{k} */
5895
4
        if ( b == 0x59 )
5896
1
            op_bytes = 8;
5897
4
        generate_exception_if(evex.brs, X86_EXC_UD);
5898
2
        if ( !evex.w )
5899
2
            host_and_vcpu_must_have(avx512dq);
5900
1
        goto avx512_broadcast;
5901
5902
1
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x1a): /* vbroadcastf32x4 m128,{y,z}mm{k} */
5903
                                            /* vbroadcastf64x2 m128,{y,z}mm{k} */
5904
6
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x5a): /* vbroadcasti32x4 m128,{y,z}mm{k} */
5905
                                            /* vbroadcasti64x2 m128,{y,z}mm{k} */
5906
6
        generate_exception_if(ea.type != OP_MEM || !evex.lr || evex.brs,
5907
6
                              X86_EXC_UD);
5908
2
        if ( evex.w )
5909
2
            host_and_vcpu_must_have(avx512dq);
5910
1
        goto avx512_broadcast;
5911
5912
34
    case X86EMUL_OPC_VEX_66(0x0f38, 0x20): /* vpmovsxbw xmm/mem,{x,y}mm */
5913
100
    case X86EMUL_OPC_VEX_66(0x0f38, 0x21): /* vpmovsxbd xmm/mem,{x,y}mm */
5914
295
    case X86EMUL_OPC_VEX_66(0x0f38, 0x22): /* vpmovsxbq xmm/mem,{x,y}mm */
5915
364
    case X86EMUL_OPC_VEX_66(0x0f38, 0x23): /* vpmovsxwd xmm/mem,{x,y}mm */
5916
430
    case X86EMUL_OPC_VEX_66(0x0f38, 0x24): /* vpmovsxwq xmm/mem,{x,y}mm */
5917
499
    case X86EMUL_OPC_VEX_66(0x0f38, 0x25): /* vpmovsxdq xmm/mem,{x,y}mm */
5918
565
    case X86EMUL_OPC_VEX_66(0x0f38, 0x30): /* vpmovzxbw xmm/mem,{x,y}mm */
5919
633
    case X86EMUL_OPC_VEX_66(0x0f38, 0x31): /* vpmovzxbd xmm/mem,{x,y}mm */
5920
840
    case X86EMUL_OPC_VEX_66(0x0f38, 0x32): /* vpmovzxbq xmm/mem,{x,y}mm */
5921
903
    case X86EMUL_OPC_VEX_66(0x0f38, 0x33): /* vpmovzxwd xmm/mem,{x,y}mm */
5922
1.09k
    case X86EMUL_OPC_VEX_66(0x0f38, 0x34): /* vpmovzxwq xmm/mem,{x,y}mm */
5923
1.17k
    case X86EMUL_OPC_VEX_66(0x0f38, 0x35): /* vpmovzxdq xmm/mem,{x,y}mm */
5924
1.17k
        op_bytes = 16 >> (pmov_convert_delta[b & 7] - vex.l);
5925
1.17k
        goto simd_0f_int;
5926
5927
2
    case X86EMUL_OPC_EVEX_F3(0x0f38, 0x29): /* vpmov{b,w}2m [xyz]mm,k */
5928
3
    case X86EMUL_OPC_EVEX_F3(0x0f38, 0x39): /* vpmov{d,q}2m [xyz]mm,k */
5929
3
        generate_exception_if(!evex.r || !evex.R, X86_EXC_UD);
5930
        /* fall through */
5931
2
    case X86EMUL_OPC_EVEX_F3(0x0f38, 0x28): /* vpmovm2{b,w} k,[xyz]mm */
5932
3
    case X86EMUL_OPC_EVEX_F3(0x0f38, 0x38): /* vpmovm2{d,q} k,[xyz]mm */
5933
3
        if ( b & 0x10 )
5934
3
            host_and_vcpu_must_have(avx512dq);
5935
2
        else
5936
3
            host_and_vcpu_must_have(avx512bw);
5937
0
        generate_exception_if(evex.opmsk || ea.type != OP_REG, X86_EXC_UD);
5938
0
        d |= TwoOp;
5939
0
        op_bytes = 16 << evex.lr;
5940
0
        goto avx512f_no_sae;
5941
5942
34
    case X86EMUL_OPC_66(0x0f38, 0x2a):     /* movntdqa m128,xmm */
5943
167
    case X86EMUL_OPC_VEX_66(0x0f38, 0x2a): /* vmovntdqa mem,{x,y}mm */
5944
167
        generate_exception_if(ea.type != OP_MEM, X86_EXC_UD);
5945
        /* Ignore the non-temporal hint for now, using movdqa instead. */
5946
166
        asm volatile ( "mfence" ::: "memory" );
5947
166
        b = 0x6f;
5948
166
        if ( vex.opcx == vex_none )
5949
166
            vcpu_must_have(sse4_1);
5950
132
        else
5951
132
        {
5952
132
            vex.opcx = vex_0f;
5953
132
            if ( vex.l )
5954
132
                vcpu_must_have(avx2);
5955
132
        }
5956
166
        goto movdqa;
5957
5958
166
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x2a): /* vmovntdqa mem,[xyz]mm */
5959
4
        generate_exception_if(ea.type != OP_MEM || evex.opmsk || evex.w,
5960
4
                              X86_EXC_UD);
5961
        /* Ignore the non-temporal hint for now, using vmovdqa32 instead. */
5962
1
        asm volatile ( "mfence" ::: "memory" );
5963
1
        b = 0x6f;
5964
1
        evex.opcx = vex_0f;
5965
1
        goto vmovdqa;
5966
5967
3
    case X86EMUL_OPC_EVEX_F3(0x0f38, 0x2a): /* vpbroadcastmb2q k,[xyz]mm */
5968
5
    case X86EMUL_OPC_EVEX_F3(0x0f38, 0x3a): /* vpbroadcastmw2d k,[xyz]mm */
5969
5
        generate_exception_if((ea.type != OP_REG || evex.opmsk ||
5970
5
                               evex.w == ((b >> 4) & 1)),
5971
5
                              X86_EXC_UD);
5972
1
        d |= TwoOp;
5973
1
        op_bytes = 1; /* fake */
5974
        /* fall through */
5975
2
    case X86EMUL_OPC_EVEX_66(0x0f38, 0xc4): /* vpconflict{d,q} [xyz]mm/mem,[xyz]mm{k} */
5976
2
        fault_suppression = false;
5977
        /* fall through */
5978
3
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x44): /* vplzcnt{d,q} [xyz]mm/mem,[xyz]mm{k} */
5979
3
        host_and_vcpu_must_have(avx512cd);
5980
0
        goto avx512f_no_sae;
5981
5982
61
    case X86EMUL_OPC_VEX_66(0x0f38, 0x2c): /* vmaskmovps mem,{x,y}mm,{x,y}mm */
5983
127
    case X86EMUL_OPC_VEX_66(0x0f38, 0x2d): /* vmaskmovpd mem,{x,y}mm,{x,y}mm */
5984
269
    case X86EMUL_OPC_VEX_66(0x0f38, 0x2e): /* vmaskmovps {x,y}mm,{x,y}mm,mem */
5985
414
    case X86EMUL_OPC_VEX_66(0x0f38, 0x2f): /* vmaskmovpd {x,y}mm,{x,y}mm,mem */
5986
414
    {
5987
414
        typeof(vex) *pvex;
5988
5989
414
        generate_exception_if(ea.type != OP_MEM || vex.w, X86_EXC_UD);
5990
407
        host_and_vcpu_must_have(avx);
5991
407
        elem_bytes = 4 << (b & 1);
5992
612
    vmaskmov:
5993
612
        get_fpu(X86EMUL_FPU_ymm);
5994
5995
        /*
5996
         * While we can't reasonably provide fully correct behavior here
5997
         * (in particular, for writes, avoiding the memory read in anticipation
5998
         * of all elements in the range eventually being written), we can (and
5999
         * should) still limit the memory access to the smallest possible range
6000
         * (suppressing it altogether if all mask bits are clear), to provide
6001
         * correct faulting behavior. Read the mask bits via vmovmskp{s,d}
6002
         * for that purpose.
6003
         */
6004
610
        opc = init_prefixes(stub);
6005
610
        pvex = copy_VEX(opc, vex);
6006
610
        pvex->opcx = vex_0f;
6007
610
        if ( elem_bytes == 4 )
6008
404
            pvex->pfx = vex_none;
6009
610
        opc[0] = 0x50; /* vmovmskp{s,d} */
6010
        /* Use %rax as GPR destination and VEX.vvvv as source. */
6011
610
        pvex->r = 1;
6012
610
        pvex->b = !mode_64bit() || (vex.reg >> 3);
6013
610
        opc[1] = 0xc0 | (~vex.reg & 7);
6014
610
        pvex->reg = 0xf;
6015
610
        opc[2] = 0xc3;
6016
6017
610
        invoke_stub("", "", "=a" (ea.val) : [dummy] "i" (0));
6018
610
        put_stub(stub);
6019
6020
610
        evex.opmsk = 1; /* fake */
6021
610
        op_mask = ea.val;
6022
610
        fault_suppression = true;
6023
6024
610
        opc = init_prefixes(stub);
6025
0
        opc[0] = b;
6026
        /* Convert memory operand to (%rAX). */
6027
610
        rex_prefix &= ~REX_B;
6028
610
        vex.b = 1;
6029
610
        opc[1] = modrm & 0x38;
6030
610
        insn_bytes = PFX_BYTES + 2;
6031
6032
610
        break;
6033
610
    }
6034
6035
1
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x2c): /* vscalefp{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
6036
2
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x42): /* vgetexpp{s,d} [xyz]mm/mem,[xyz]mm{k} */
6037
3
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x96): /* vfmaddsub132p{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
6038
4
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x97): /* vfmsubadd132p{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
6039
5
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x98): /* vfmadd132p{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
6040
6
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x9a): /* vfmsub132p{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
6041
7
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x9c): /* vfnmadd132p{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
6042
8
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x9e): /* vfnmsub132p{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
6043
9
    case X86EMUL_OPC_EVEX_66(0x0f38, 0xa6): /* vfmaddsub213p{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
6044
10
    case X86EMUL_OPC_EVEX_66(0x0f38, 0xa7): /* vfmsubadd213p{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
6045
11
    case X86EMUL_OPC_EVEX_66(0x0f38, 0xa8): /* vfmadd213p{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
6046
12
    case X86EMUL_OPC_EVEX_66(0x0f38, 0xaa): /* vfmsub213p{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
6047
13
    case X86EMUL_OPC_EVEX_66(0x0f38, 0xac): /* vfnmadd213p{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
6048
14
    case X86EMUL_OPC_EVEX_66(0x0f38, 0xae): /* vfnmsub213p{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
6049
15
    case X86EMUL_OPC_EVEX_66(0x0f38, 0xb6): /* vfmaddsub231p{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
6050
16
    case X86EMUL_OPC_EVEX_66(0x0f38, 0xb7): /* vfmsubadd231p{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
6051
17
    case X86EMUL_OPC_EVEX_66(0x0f38, 0xb8): /* vfmadd231p{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
6052
18
    case X86EMUL_OPC_EVEX_66(0x0f38, 0xba): /* vfmsub231p{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
6053
19
    case X86EMUL_OPC_EVEX_66(0x0f38, 0xbc): /* vfnmadd231p{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
6054
20
    case X86EMUL_OPC_EVEX_66(0x0f38, 0xbe): /* vfnmsub231p{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
6055
20
        host_and_vcpu_must_have(avx512f);
6056
0
        if ( ea.type != OP_REG || !evex.brs )
6057
0
            avx512_vlen_check(false);
6058
0
        goto simd_zmm;
6059
6060
1
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x2d): /* vscalefs{s,d} xmm/mem,xmm,xmm{k} */
6061
2
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x43): /* vgetexps{s,d} xmm/mem,xmm,xmm{k} */
6062
3
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x99): /* vfmadd132s{s,d} xmm/mem,xmm,xmm{k} */
6063
4
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x9b): /* vfmsub132s{s,d} xmm/mem,xmm,xmm{k} */
6064
5
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x9d): /* vfnmadd132s{s,d} xmm/mem,xmm,xmm{k} */
6065
6
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x9f): /* vfnmsub132s{s,d} xmm/mem,xmm,xmm{k} */
6066
7
    case X86EMUL_OPC_EVEX_66(0x0f38, 0xa9): /* vfmadd213s{s,d} xmm/mem,xmm,xmm{k} */
6067
8
    case X86EMUL_OPC_EVEX_66(0x0f38, 0xab): /* vfmsub213s{s,d} xmm/mem,xmm,xmm{k} */
6068
9
    case X86EMUL_OPC_EVEX_66(0x0f38, 0xad): /* vfnmadd213s{s,d} xmm/mem,xmm,xmm{k} */
6069
10
    case X86EMUL_OPC_EVEX_66(0x0f38, 0xaf): /* vfnmsub213s{s,d} xmm/mem,xmm,xmm{k} */
6070
11
    case X86EMUL_OPC_EVEX_66(0x0f38, 0xb9): /* vfmadd231s{s,d} xmm/mem,xmm,xmm{k} */
6071
12
    case X86EMUL_OPC_EVEX_66(0x0f38, 0xbb): /* vfmsub231s{s,d} xmm/mem,xmm,xmm{k} */
6072
13
    case X86EMUL_OPC_EVEX_66(0x0f38, 0xbd): /* vfnmadd231s{s,d} xmm/mem,xmm,xmm{k} */
6073
14
    case X86EMUL_OPC_EVEX_66(0x0f38, 0xbf): /* vfnmsub231s{s,d} xmm/mem,xmm,xmm{k} */
6074
14
        host_and_vcpu_must_have(avx512f);
6075
0
        generate_exception_if(ea.type != OP_REG && evex.brs, X86_EXC_UD);
6076
0
        if ( !evex.brs )
6077
0
            avx512_vlen_check(true);
6078
0
        goto simd_zmm;
6079
6080
58
    case X86EMUL_OPC_66(0x0f38, 0x37): /* pcmpgtq xmm/m128,xmm */
6081
58
        host_and_vcpu_must_have(sse4_2);
6082
58
        goto simd_0f38_common;
6083
6084
58
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x38): /* vpminsb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
6085
2
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x3a): /* vpminuw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
6086
3
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x3c): /* vpmaxsb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
6087
4
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x3e): /* vpmaxuw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
6088
4
        host_and_vcpu_must_have(avx512bw);
6089
0
        generate_exception_if(evex.brs, X86_EXC_UD);
6090
0
        elem_bytes = b & 2 ?: 1;
6091
0
        goto avx512f_no_sae;
6092
6093
2
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x40): /* vpmull{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
6094
2
        if ( evex.w )
6095
2
            host_and_vcpu_must_have(avx512dq);
6096
1
        goto avx512f_no_sae;
6097
6098
66
    case X86EMUL_OPC_66(0x0f38, 0xdb):     /* aesimc xmm/m128,xmm */
6099
185
    case X86EMUL_OPC_VEX_66(0x0f38, 0xdb): /* vaesimc xmm/m128,xmm */
6100
199
    case X86EMUL_OPC_66(0x0f38, 0xdc):     /* aesenc xmm/m128,xmm,xmm */
6101
265
    case X86EMUL_OPC_66(0x0f38, 0xdd):     /* aesenclast xmm/m128,xmm,xmm */
6102
333
    case X86EMUL_OPC_66(0x0f38, 0xde):     /* aesdec xmm/m128,xmm,xmm */
6103
529
    case X86EMUL_OPC_66(0x0f38, 0xdf):     /* aesdeclast xmm/m128,xmm,xmm */
6104
529
        host_and_vcpu_must_have(aesni);
6105
529
        if ( vex.opcx == vex_none )
6106
410
            goto simd_0f38_common;
6107
        /* fall through */
6108
185
    case X86EMUL_OPC_VEX_66(0x0f38, 0x41): /* vphminposuw xmm/m128,xmm,xmm */
6109
185
        generate_exception_if(vex.l, X86_EXC_UD);
6110
184
        goto simd_0f_avx;
6111
6112
184
    case X86EMUL_OPC_VEX   (0x0f38, 0x50): /* vpdpbuud [xy]mm/mem,[xy]mm,[xy]mm */
6113
2
    case X86EMUL_OPC_VEX_F3(0x0f38, 0x50): /* vpdpbsud [xy]mm/mem,[xy]mm,[xy]mm */
6114
3
    case X86EMUL_OPC_VEX_F2(0x0f38, 0x50): /* vpdpbssd [xy]mm/mem,[xy]mm,[xy]mm */
6115
4
    case X86EMUL_OPC_VEX   (0x0f38, 0x51): /* vpdpbuuds [xy]mm/mem,[xy]mm,[xy]mm */
6116
5
    case X86EMUL_OPC_VEX_F3(0x0f38, 0x51): /* vpdpbsuds [xy]mm/mem,[xy]mm,[xy]mm */
6117
6
    case X86EMUL_OPC_VEX_F2(0x0f38, 0x51): /* vpdpbssds [xy]mm/mem,[xy]mm,[xy]mm */
6118
6
        host_and_vcpu_must_have(avx_vnni_int8);
6119
0
        generate_exception_if(vex.w, X86_EXC_UD);
6120
0
        op_bytes = 16 << vex.l;
6121
0
        goto simd_0f_ymm;
6122
6123
1
    case X86EMUL_OPC_VEX_66(0x0f38, 0x50): /* vpdpbusd [xy]mm/mem,[xy]mm,[xy]mm */
6124
2
    case X86EMUL_OPC_VEX_66(0x0f38, 0x51): /* vpdpbusds [xy]mm/mem,[xy]mm,[xy]mm */
6125
3
    case X86EMUL_OPC_VEX_66(0x0f38, 0x52): /* vpdpwssd [xy]mm/mem,[xy]mm,[xy]mm */
6126
3
    case X86EMUL_OPC_VEX_66(0x0f38, 0x53): /* vpdpwssds [xy]mm/mem,[xy]mm,[xy]mm */
6127
3
        host_and_vcpu_must_have(avx_vnni);
6128
0
        generate_exception_if(vex.w, X86_EXC_UD);
6129
0
        goto simd_0f_ymm;
6130
6131
1
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x50): /* vpdpbusd [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
6132
2
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x51): /* vpdpbusds [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
6133
3
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x52): /* vpdpwssd [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
6134
4
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x53): /* vpdpwssds [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
6135
4
        host_and_vcpu_must_have(avx512_vnni);
6136
0
        generate_exception_if(evex.w, X86_EXC_UD);
6137
0
        goto avx512f_no_sae;
6138
6139
1
    case X86EMUL_OPC_EVEX_F2(0x0f38, 0x72): /* vcvtne2ps2bf16 [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
6140
2
    case X86EMUL_OPC_EVEX_F3(0x0f38, 0x72): /* vcvtneps2bf16 [xyz]mm/mem,{x,y}mm{k} */
6141
2
        if ( evex.pfx == vex_f2 )
6142
1
            fault_suppression = false;
6143
1
        else
6144
1
            d |= TwoOp;
6145
        /* fall through */
6146
3
    case X86EMUL_OPC_EVEX_F3(0x0f38, 0x52): /* vdpbf16ps [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
6147
3
        host_and_vcpu_must_have(avx512_bf16);
6148
0
        generate_exception_if(evex.w, X86_EXC_UD);
6149
0
        op_bytes = 16 << evex.lr;
6150
0
        goto avx512f_no_sae;
6151
6152
68
    case X86EMUL_OPC_VEX_66(0x0f38, 0x58): /* vpbroadcastd xmm/m32,{x,y}mm */
6153
120
    case X86EMUL_OPC_VEX_66(0x0f38, 0x59): /* vpbroadcastq xmm/m64,{x,y}mm */
6154
180
    case X86EMUL_OPC_VEX_66(0x0f38, 0x78): /* vpbroadcastb xmm/m8,{x,y}mm */
6155
248
    case X86EMUL_OPC_VEX_66(0x0f38, 0x79): /* vpbroadcastw xmm/m16,{x,y}mm */
6156
248
        op_bytes = 1 << ((!(b & 0x20) * 2) + (b & 1));
6157
        /* fall through */
6158
315
    case X86EMUL_OPC_VEX_66(0x0f38, 0x46): /* vpsravd {x,y}mm/mem,{x,y}mm,{x,y}mm */
6159
315
        generate_exception_if(vex.w, X86_EXC_UD);
6160
308
        goto simd_0f_avx2;
6161
6162
308
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x4d): /* vrcp14s{s,d} xmm/mem,xmm,xmm{k} */
6163
2
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x4f): /* vrsqrt14s{s,d} xmm/mem,xmm,xmm{k} */
6164
2
        host_and_vcpu_must_have(avx512f);
6165
0
        generate_exception_if(evex.brs, X86_EXC_UD);
6166
0
        avx512_vlen_check(true);
6167
0
        goto simd_zmm;
6168
6169
5
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x8f): /* vpshufbitqmb [xyz]mm/mem,[xyz]mm,k{k} */
6170
5
        generate_exception_if(evex.w || !evex.r || !evex.R || evex.z, X86_EXC_UD);
6171
        /* fall through */
6172
2
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x54): /* vpopcnt{b,w} [xyz]mm/mem,[xyz]mm{k} */
6173
2
        host_and_vcpu_must_have(avx512_bitalg);
6174
        /* fall through */
6175
1
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x66): /* vpblendm{b,w} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
6176
1
        host_and_vcpu_must_have(avx512bw);
6177
0
        generate_exception_if(evex.brs, X86_EXC_UD);
6178
0
        elem_bytes = 1 << evex.w;
6179
0
        goto avx512f_no_sae;
6180
6181
1
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x55): /* vpopcnt{d,q} [xyz]mm/mem,[xyz]mm{k} */
6182
1
        host_and_vcpu_must_have(avx512_vpopcntdq);
6183
0
        goto avx512f_no_sae;
6184
6185
78
    case X86EMUL_OPC_VEX_66(0x0f38, 0x5a): /* vbroadcasti128 m128,ymm */
6186
78
        generate_exception_if(ea.type != OP_MEM || !vex.l || vex.w, X86_EXC_UD);
6187
70
        goto simd_0f_avx2;
6188
6189
70
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x62): /* vpexpand{b,w} [xyz]mm/mem,[xyz]mm{k} */
6190
2
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x63): /* vpcompress{b,w} [xyz]mm,[xyz]mm/mem{k} */
6191
2
        host_and_vcpu_must_have(avx512_vbmi2);
6192
0
        elem_bytes = 1 << evex.w;
6193
        /* fall through */
6194
1
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x88): /* vexpandp{s,d} [xyz]mm/mem,[xyz]mm{k} */
6195
2
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x89): /* vpexpand{d,q} [xyz]mm/mem,[xyz]mm{k} */
6196
3
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x8a): /* vcompressp{s,d} [xyz]mm,[xyz]mm/mem{k} */
6197
4
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x8b): /* vpcompress{d,q} [xyz]mm,[xyz]mm/mem{k} */
6198
4
        host_and_vcpu_must_have(avx512f);
6199
0
        generate_exception_if(evex.brs, X86_EXC_UD);
6200
0
        avx512_vlen_check(false);
6201
        /*
6202
         * For the respective code below the main switch() to work we need to
6203
         * compact op_mask here: Memory accesses are non-sparse even if the
6204
         * mask register has sparsely set bits.
6205
         */
6206
0
        if ( likely(fault_suppression) )
6207
0
        {
6208
0
            n = 1 << ((b & 8 ? 2 : 4) + evex.lr - evex.w);
6209
0
            EXPECT(elem_bytes > 0);
6210
0
            ASSERT(op_bytes == n * elem_bytes);
6211
0
            op_mask &= ~0ULL >> (64 - n);
6212
0
            n = hweight64(op_mask);
6213
0
            op_bytes = n * elem_bytes;
6214
0
            if ( n )
6215
0
                op_mask = ~0ULL >> (64 - n);
6216
0
        }
6217
0
        goto simd_zmm;
6218
6219
1
    case X86EMUL_OPC_EVEX_F2(0x0f38, 0x68): /* vp2intersect{d,q} [xyz]mm/mem,[xyz]mm,k+1 */
6220
1
        host_and_vcpu_must_have(avx512_vp2intersect);
6221
0
        generate_exception_if(evex.opmsk || !evex.r || !evex.R, X86_EXC_UD);
6222
0
        op_bytes = 16 << evex.lr;
6223
0
        goto avx512f_no_sae;
6224
6225
1
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x70): /* vpshldvw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
6226
2
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x72): /* vpshrdvw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
6227
2
        generate_exception_if(!evex.w, X86_EXC_UD);
6228
1
        elem_bytes = 2;
6229
        /* fall through */
6230
2
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x71): /* vpshldv{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
6231
3
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x73): /* vpshrdv{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
6232
3
        host_and_vcpu_must_have(avx512_vbmi2);
6233
0
        goto avx512f_no_sae;
6234
6235
2
    case X86EMUL_OPC_VEX   (0x0f38, 0xb0): /* vcvtneoph2ps mem,[xy]mm */
6236
3
    case X86EMUL_OPC_VEX_66(0x0f38, 0xb0): /* vcvtneeph2ps mem,[xy]mm */
6237
4
    case X86EMUL_OPC_VEX_F3(0x0f38, 0xb0): /* vcvtneebf162ps mem,[xy]mm */
6238
5
    case X86EMUL_OPC_VEX_F2(0x0f38, 0xb0): /* vcvtneobf162ps mem,[xy]mm */
6239
5
        generate_exception_if(ea.type != OP_MEM, X86_EXC_UD);
6240
        /* fall through */
6241
5
    case X86EMUL_OPC_VEX_F3(0x0f38, 0x72): /* vcvtneps2bf16 [xy]mm/mem,xmm */
6242
5
        host_and_vcpu_must_have(avx_ne_convert);
6243
0
        generate_exception_if(vex.w, X86_EXC_UD);
6244
0
        d |= TwoOp;
6245
0
        op_bytes = 16 << vex.l;
6246
0
        goto simd_0f_ymm;
6247
6248
1
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x75): /* vpermi2{b,w} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
6249
2
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x7d): /* vpermt2{b,w} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
6250
3
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x8d): /* vperm{b,w} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
6251
3
        if ( !evex.w )
6252
3
            host_and_vcpu_must_have(avx512_vbmi);
6253
2
        else
6254
3
            host_and_vcpu_must_have(avx512bw);
6255
0
        generate_exception_if(evex.brs, X86_EXC_UD);
6256
0
        fault_suppression = false;
6257
0
        goto avx512f_no_sae;
6258
6259
1
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x78): /* vpbroadcastb xmm/m8,[xyz]mm{k} */
6260
2
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x79): /* vpbroadcastw xmm/m16,[xyz]mm{k} */
6261
2
        host_and_vcpu_must_have(avx512bw);
6262
0
        generate_exception_if(evex.w || evex.brs, X86_EXC_UD);
6263
0
        op_bytes = elem_bytes = 1 << (b & 1);
6264
        /* See the comment at the avx512_broadcast label. */
6265
0
        op_mask |= !(b & 1 ? !(uint32_t)op_mask : !op_mask);
6266
0
        goto avx512f_no_sae;
6267
6268
1
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x7a): /* vpbroadcastb r32,[xyz]mm{k} */
6269
2
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x7b): /* vpbroadcastw r32,[xyz]mm{k} */
6270
2
        host_and_vcpu_must_have(avx512bw);
6271
0
        generate_exception_if(evex.w, X86_EXC_UD);
6272
        /* fall through */
6273
5
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x7c): /* vpbroadcast{d,q} reg,[xyz]mm{k} */
6274
5
        generate_exception_if((ea.type != OP_REG || evex.brs ||
6275
5
                               evex.reg != 0xf || !evex.RX),
6276
5
                              X86_EXC_UD);
6277
1
        host_and_vcpu_must_have(avx512f);
6278
0
        avx512_vlen_check(false);
6279
0
        get_fpu(X86EMUL_FPU_zmm);
6280
6281
0
        opc = init_evex(stub);
6282
0
        opc[0] = b;
6283
        /* Convert GPR source to %rAX. */
6284
0
        evex.b = 1;
6285
0
        if ( !mode_64bit() )
6286
0
            evex.w = 0;
6287
0
        opc[1] = modrm & 0xf8;
6288
0
        insn_bytes = EVEX_PFX_BYTES + 2;
6289
0
        opc[2] = 0xc3;
6290
6291
0
        copy_EVEX(opc, evex);
6292
0
        invoke_stub("", "", "=g" (dummy) : "a" (src.val));
6293
6294
0
        put_stub(stub);
6295
0
        ASSERT(!state->simd_size);
6296
0
        break;
6297
6298
0
#endif /* !X86EMUL_NO_SIMD */
6299
6300
482
    case X86EMUL_OPC_66(0x0f38, 0x82): /* invpcid reg,m128 */
6301
482
        vcpu_must_have(invpcid);
6302
482
        generate_exception_if(ea.type != OP_MEM, X86_EXC_UD);
6303
481
        generate_exception_if(!mode_ring0(), X86_EXC_GP, 0);
6304
6305
479
        if ( (rc = ops->read(ea.mem.seg, ea.mem.off, mmvalp, 16,
6306
479
                             ctxt)) != X86EMUL_OKAY )
6307
4
            goto done;
6308
6309
475
        generate_exception_if(mmvalp->xmm[0] & ~0xfff, X86_EXC_GP, 0);
6310
404
        dst.val = mode_64bit() ? *dst.reg : (uint32_t)*dst.reg;
6311
6312
404
        switch ( dst.val )
6313
404
        {
6314
141
        case X86_INVPCID_INDIV_ADDR:
6315
141
             generate_exception_if(!is_canonical_address(mmvalp->xmm[1]),
6316
141
                                   X86_EXC_GP, 0);
6317
             /* fall through */
6318
247
        case X86_INVPCID_SINGLE_CTXT:
6319
247
             if ( !mode_64bit() || !ops->read_cr )
6320
153
                 cr4 = 0;
6321
94
             else if ( (rc = ops->read_cr(4, &cr4, ctxt)) != X86EMUL_OKAY )
6322
0
                 goto done;
6323
247
             generate_exception_if(!(cr4 & X86_CR4_PCIDE) && mmvalp->xmm[0],
6324
247
                                   X86_EXC_GP, 0);
6325
236
             break;
6326
236
        case X86_INVPCID_ALL_INCL_GLOBAL:
6327
136
        case X86_INVPCID_ALL_NON_GLOBAL:
6328
136
             break;
6329
4
        default:
6330
4
             generate_exception(X86_EXC_GP, 0);
6331
404
        }
6332
6333
372
        fail_if(!ops->tlb_op);
6334
371
        if ( (rc = ops->tlb_op(x86emul_invpcid, truncate_ea(mmvalp->xmm[1]),
6335
371
                               x86emul_invpcid_aux(mmvalp->xmm[0], dst.val),
6336
371
                               ctxt)) != X86EMUL_OKAY )
6337
1
            goto done;
6338
6339
370
        state->simd_size = simd_none;
6340
370
        break;
6341
6342
0
#ifndef X86EMUL_NO_SIMD
6343
6344
2
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x83): /* vpmultishiftqb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
6345
2
        generate_exception_if(!evex.w, X86_EXC_UD);
6346
1
        host_and_vcpu_must_have(avx512_vbmi);
6347
0
        fault_suppression = false;
6348
0
        goto avx512f_no_sae;
6349
6350
150
    case X86EMUL_OPC_VEX_66(0x0f38, 0x8c): /* vpmaskmov{d,q} mem,{x,y}mm,{x,y}mm */
6351
206
    case X86EMUL_OPC_VEX_66(0x0f38, 0x8e): /* vpmaskmov{d,q} {x,y}mm,{x,y}mm,mem */
6352
206
        generate_exception_if(ea.type != OP_MEM, X86_EXC_UD);
6353
205
        host_and_vcpu_must_have(avx2);
6354
205
        elem_bytes = 4 << vex.w;
6355
205
        goto vmaskmov;
6356
6357
544
    case X86EMUL_OPC_VEX_66(0x0f38, 0x90): /* vpgatherd{d,q} {x,y}mm,mem,{x,y}mm */
6358
647
    case X86EMUL_OPC_VEX_66(0x0f38, 0x91): /* vpgatherq{d,q} {x,y}mm,mem,{x,y}mm */
6359
804
    case X86EMUL_OPC_VEX_66(0x0f38, 0x92): /* vgatherdp{s,d} {x,y}mm,mem,{x,y}mm */
6360
966
    case X86EMUL_OPC_VEX_66(0x0f38, 0x93): /* vgatherqp{s,d} {x,y}mm,mem,{x,y}mm */
6361
966
    {
6362
966
        unsigned int mask_reg = ~vex.reg & (mode_64bit() ? 0xf : 7);
6363
966
        typeof(vex) *pvex;
6364
966
        union {
6365
966
            int32_t dw[8];
6366
966
            int64_t qw[4];
6367
966
        } index, mask;
6368
966
        bool done = false;
6369
6370
966
        ASSERT(ea.type == OP_MEM);
6371
966
        generate_exception_if(modrm_reg == state->sib_index ||
6372
966
                              modrm_reg == mask_reg ||
6373
966
                              state->sib_index == mask_reg, X86_EXC_UD);
6374
962
        generate_exception_if(!cpu_has_avx, X86_EXC_UD);
6375
962
        vcpu_must_have(avx2);
6376
962
        get_fpu(X86EMUL_FPU_ymm);
6377
6378
        /* Read destination, index, and mask registers. */
6379
961
        opc = init_prefixes(stub);
6380
961
        pvex = copy_VEX(opc, vex);
6381
961
        pvex->opcx = vex_0f;
6382
961
        opc[0] = 0x7f; /* vmovdqa */
6383
        /* Use (%rax) as destination and modrm_reg as source. */
6384
961
        pvex->r = !mode_64bit() || !(modrm_reg & 8);
6385
961
        pvex->b = 1;
6386
961
        opc[1] = (modrm_reg & 7) << 3;
6387
961
        pvex->reg = 0xf;
6388
961
        opc[2] = 0xc3;
6389
6390
961
        invoke_stub("", "", "=m" (*mmvalp) : "a" (mmvalp));
6391
6392
961
        pvex->pfx = vex_f3; /* vmovdqu */
6393
        /* Switch to sib_index as source. */
6394
961
        pvex->r = !mode_64bit() || !(state->sib_index & 8);
6395
961
        opc[1] = (state->sib_index & 7) << 3;
6396
6397
961
        invoke_stub("", "", "=m" (index) : "a" (&index));
6398
6399
        /* Switch to mask_reg as source. */
6400
961
        pvex->r = !mode_64bit() || !(mask_reg & 8);
6401
961
        opc[1] = (mask_reg & 7) << 3;
6402
6403
961
        invoke_stub("", "", "=m" (mask) : "a" (&mask));
6404
961
        put_stub(stub);
6405
6406
        /* Clear untouched parts of the destination and mask values. */
6407
961
        n = 1 << (2 + vex.l - ((b & 1) | vex.w));
6408
961
        op_bytes = 4 << vex.w;
6409
961
        memset((void *)mmvalp + n * op_bytes, 0, 32 - n * op_bytes);
6410
961
        memset((void *)&mask + n * op_bytes, 0, 32 - n * op_bytes);
6411
6412
5.76k
        for ( i = 0; i < n && rc == X86EMUL_OKAY; ++i )
6413
4.86k
        {
6414
4.86k
            if ( (vex.w ? mask.qw[i] : mask.dw[i]) < 0 )
6415
1.07k
            {
6416
1.07k
                unsigned long idx = b & 1 ? index.qw[i] : index.dw[i];
6417
6418
1.07k
                rc = ops->read(ea.mem.seg,
6419
1.07k
                               truncate_ea(ea.mem.off +
6420
1.07k
                                           (idx << state->sib_scale)),
6421
1.07k
                               (void *)mmvalp + i * op_bytes, op_bytes, ctxt);
6422
1.07k
                if ( rc != X86EMUL_OKAY )
6423
65
                {
6424
                    /*
6425
                     * If we've made any progress and the access did not fault,
6426
                     * force a retry instead. This is for example necessary to
6427
                     * cope with the limited capacity of HVM's MMIO cache.
6428
                     */
6429
65
                    if ( rc != X86EMUL_EXCEPTION && done )
6430
0
                        rc = X86EMUL_RETRY;
6431
65
                    break;
6432
65
                }
6433
6434
#ifdef __XEN__
6435
                if ( i + 1 < n && local_events_need_delivery() )
6436
                    rc = X86EMUL_RETRY;
6437
#endif
6438
6439
1.01k
                done = true;
6440
1.01k
            }
6441
6442
4.80k
            if ( vex.w )
6443
470
                mask.qw[i] = 0;
6444
4.33k
            else
6445
4.33k
                mask.dw[i] = 0;
6446
4.80k
        }
6447
6448
        /* Write destination and mask registers. */
6449
961
        opc = init_prefixes(stub);
6450
961
        pvex = copy_VEX(opc, vex);
6451
961
        pvex->opcx = vex_0f;
6452
961
        opc[0] = 0x6f; /* vmovdqa */
6453
        /* Use modrm_reg as destination and (%rax) as source. */
6454
961
        pvex->r = !mode_64bit() || !(modrm_reg & 8);
6455
961
        pvex->b = 1;
6456
961
        opc[1] = (modrm_reg & 7) << 3;
6457
961
        pvex->reg = 0xf;
6458
961
        opc[2] = 0xc3;
6459
6460
961
        invoke_stub("", "", "+m" (*mmvalp) : "a" (mmvalp));
6461
6462
961
        pvex->pfx = vex_f3; /* vmovdqu */
6463
        /* Switch to mask_reg as destination. */
6464
961
        pvex->r = !mode_64bit() || !(mask_reg & 8);
6465
961
        opc[1] = (mask_reg & 7) << 3;
6466
6467
961
        invoke_stub("", "", "+m" (mask) : "a" (&mask));
6468
961
        put_stub(stub);
6469
6470
961
        if ( rc != X86EMUL_OKAY )
6471
65
            goto done;
6472
6473
896
        state->simd_size = simd_none;
6474
896
        break;
6475
961
    }
6476
6477
4
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x90): /* vpgatherd{d,q} mem,[xyz]mm{k} */
6478
5
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x91): /* vpgatherq{d,q} mem,[xyz]mm{k} */
6479
9
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x92): /* vgatherdp{s,d} mem,[xyz]mm{k} */
6480
10
    case X86EMUL_OPC_EVEX_66(0x0f38, 0x93): /* vgatherqp{s,d} mem,[xyz]mm{k} */
6481
10
    {
6482
10
        typeof(evex) *pevex;
6483
10
        union {
6484
10
            int32_t dw[16];
6485
10
            int64_t qw[8];
6486
10
        } index;
6487
10
        bool done = false;
6488
6489
10
        ASSERT(ea.type == OP_MEM);
6490
10
        generate_exception_if((!evex.opmsk || evex.brs || evex.z ||
6491
10
                               evex.reg != 0xf ||
6492
10
                               modrm_reg == state->sib_index),
6493
10
                              X86_EXC_UD);
6494
4
        avx512_vlen_check(false);
6495
1
        host_and_vcpu_must_have(avx512f);
6496
0
        get_fpu(X86EMUL_FPU_zmm);
6497
6498
        /* Read destination and index registers. */
6499
0
        opc = init_evex(stub);
6500
0
        pevex = copy_EVEX(opc, evex);
6501
0
        pevex->opcx = vex_0f;
6502
0
        opc[0] = 0x7f; /* vmovdqa{32,64} */
6503
        /*
6504
         * The register writeback below has to retain masked-off elements, but
6505
         * needs to clear upper portions in the index-wider-than-data cases.
6506
         * Therefore read (and write below) the full register. The alternative
6507
         * would have been to fiddle with the mask register used.
6508
         */
6509
0
        pevex->opmsk = 0;
6510
        /* Use (%rax) as destination and modrm_reg as source. */
6511
0
        pevex->b = 1;
6512
0
        opc[1] = (modrm_reg & 7) << 3;
6513
0
        pevex->RX = 1;
6514
0
        opc[2] = 0xc3;
6515
6516
0
        invoke_stub("", "", "=m" (*mmvalp) : "a" (mmvalp));
6517
6518
0
        pevex->pfx = vex_f3; /* vmovdqu{32,64} */
6519
0
        pevex->w = b & 1;
6520
        /* Switch to sib_index as source. */
6521
0
        pevex->r = !mode_64bit() || !(state->sib_index & 0x08);
6522
0
        pevex->R = !mode_64bit() || !(state->sib_index & 0x10);
6523
0
        opc[1] = (state->sib_index & 7) << 3;
6524
6525
0
        invoke_stub("", "", "=m" (index) : "a" (&index));
6526
0
        put_stub(stub);
6527
6528
        /* Clear untouched parts of the destination and mask values. */
6529
0
        n = 1 << (2 + evex.lr - ((b & 1) | evex.w));
6530
0
        op_bytes = 4 << evex.w;
6531
0
        memset((void *)mmvalp + n * op_bytes, 0, 64 - n * op_bytes);
6532
0
        op_mask &= (1 << n) - 1;
6533
6534
0
        for ( i = 0; op_mask; ++i )
6535
0
        {
6536
0
            unsigned long idx = b & 1 ? index.qw[i] : index.dw[i];
6537
6538
0
            if ( !(op_mask & (1 << i)) )
6539
0
                continue;
6540
6541
0
            rc = ops->read(ea.mem.seg,
6542
0
                           truncate_ea(ea.mem.off +
6543
0
                                       (idx << state->sib_scale)),
6544
0
                           (void *)mmvalp + i * op_bytes, op_bytes, ctxt);
6545
0
            if ( rc != X86EMUL_OKAY )
6546
0
            {
6547
                /*
6548
                 * If we've made some progress and the access did not fault,
6549
                 * force a retry instead. This is for example necessary to
6550
                 * cope with the limited capacity of HVM's MMIO cache.
6551
                 */
6552
0
                if ( rc != X86EMUL_EXCEPTION && done )
6553
0
                    rc = X86EMUL_RETRY;
6554
0
                break;
6555
0
            }
6556
6557
0
            op_mask &= ~(1 << i);
6558
0
            done = true;
6559
6560
#ifdef __XEN__
6561
            if ( op_mask && local_events_need_delivery() )
6562
            {
6563
                rc = X86EMUL_RETRY;
6564
                break;
6565
            }
6566
#endif
6567
0
        }
6568
6569
        /* Write destination and mask registers. */
6570
0
        opc = init_evex(stub);
6571
0
        pevex = copy_EVEX(opc, evex);
6572
0
        pevex->opcx = vex_0f;
6573
0
        opc[0] = 0x6f; /* vmovdqa{32,64} */
6574
0
        pevex->opmsk = 0;
6575
        /* Use modrm_reg as destination and (%rax) as source. */
6576
0
        pevex->b = 1;
6577
0
        opc[1] = (modrm_reg & 7) << 3;
6578
0
        pevex->RX = 1;
6579
0
        opc[2] = 0xc3;
6580
6581
0
        invoke_stub("", "", "+m" (*mmvalp) : "a" (mmvalp));
6582
6583
        /*
6584
         * kmovw: This is VEX-encoded, so we can't use pevex. Avoid copy_VEX() etc
6585
         * as well, since we can easily use the 2-byte VEX form here.
6586
         */
6587
0
        opc -= EVEX_PFX_BYTES;
6588
0
        opc[0] = 0xc5;
6589
0
        opc[1] = 0xf8;
6590
0
        opc[2] = 0x90;
6591
        /* Use (%rax) as source. */
6592
0
        opc[3] = evex.opmsk << 3;
6593
0
        opc[4] = 0xc3;
6594
6595
0
        invoke_stub("", "", "+m" (op_mask) : "a" (&op_mask));
6596
0
        put_stub(stub);
6597
6598
0
        if ( rc != X86EMUL_OKAY )
6599
0
            goto done;
6600
6601
0
        state->simd_size = simd_none;
6602
0
        break;
6603
0
    }
6604
6605
66
    case X86EMUL_OPC_VEX_66(0x0f38, 0x96): /* vfmaddsub132p{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
6606
132
    case X86EMUL_OPC_VEX_66(0x0f38, 0x97): /* vfmsubadd132p{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
6607
182
    case X86EMUL_OPC_VEX_66(0x0f38, 0x98): /* vfmadd132p{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
6608
216
    case X86EMUL_OPC_VEX_66(0x0f38, 0x99): /* vfmadd132s{s,d} xmm/mem,xmm,xmm */
6609
282
    case X86EMUL_OPC_VEX_66(0x0f38, 0x9a): /* vfmsub132p{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
6610
348
    case X86EMUL_OPC_VEX_66(0x0f38, 0x9b): /* vfmsub132s{s,d} xmm/mem,xmm,xmm */
6611
400
    case X86EMUL_OPC_VEX_66(0x0f38, 0x9c): /* vfnmadd132p{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
6612
464
    case X86EMUL_OPC_VEX_66(0x0f38, 0x9d): /* vfnmadd132s{s,d} xmm/mem,xmm,xmm */
6613
530
    case X86EMUL_OPC_VEX_66(0x0f38, 0x9e): /* vfnmsub132p{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
6614
596
    case X86EMUL_OPC_VEX_66(0x0f38, 0x9f): /* vfnmsub132s{s,d} xmm/mem,xmm,xmm */
6615
662
    case X86EMUL_OPC_VEX_66(0x0f38, 0xa6): /* vfmaddsub213p{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
6616
696
    case X86EMUL_OPC_VEX_66(0x0f38, 0xa7): /* vfmsubadd213p{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
6617
762
    case X86EMUL_OPC_VEX_66(0x0f38, 0xa8): /* vfmadd213p{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
6618
827
    case X86EMUL_OPC_VEX_66(0x0f38, 0xa9): /* vfmadd213s{s,d} xmm/mem,xmm,xmm */
6619
893
    case X86EMUL_OPC_VEX_66(0x0f38, 0xaa): /* vfmsub213p{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
6620
959
    case X86EMUL_OPC_VEX_66(0x0f38, 0xab): /* vfmsub213s{s,d} xmm/mem,xmm,xmm */
6621
1.02k
    case X86EMUL_OPC_VEX_66(0x0f38, 0xac): /* vfnmadd213p{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
6622
1.32k
    case X86EMUL_OPC_VEX_66(0x0f38, 0xad): /* vfnmadd213s{s,d} xmm/mem,xmm,xmm */
6623
1.40k
    case X86EMUL_OPC_VEX_66(0x0f38, 0xae): /* vfnmsub213p{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
6624
1.59k
    case X86EMUL_OPC_VEX_66(0x0f38, 0xaf): /* vfnmsub213s{s,d} xmm/mem,xmm,xmm */
6625
1.66k
    case X86EMUL_OPC_VEX_66(0x0f38, 0xb6): /* vfmaddsub231p{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
6626
1.71k
    case X86EMUL_OPC_VEX_66(0x0f38, 0xb7): /* vfmsubadd231p{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
6627
1.78k
    case X86EMUL_OPC_VEX_66(0x0f38, 0xb8): /* vfmadd231p{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
6628
1.85k
    case X86EMUL_OPC_VEX_66(0x0f38, 0xb9): /* vfmadd231s{s,d} xmm/mem,xmm,xmm */
6629
1.91k
    case X86EMUL_OPC_VEX_66(0x0f38, 0xba): /* vfmsub231p{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
6630
1.98k
    case X86EMUL_OPC_VEX_66(0x0f38, 0xbb): /* vfmsub231s{s,d} xmm/mem,xmm,xmm */
6631
2.04k
    case X86EMUL_OPC_VEX_66(0x0f38, 0xbc): /* vfnmadd231p{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
6632
2.11k
    case X86EMUL_OPC_VEX_66(0x0f38, 0xbd): /* vfnmadd231s{s,d} xmm/mem,xmm,xmm */
6633
2.18k
    case X86EMUL_OPC_VEX_66(0x0f38, 0xbe): /* vfnmsub231p{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
6634
2.21k
    case X86EMUL_OPC_VEX_66(0x0f38, 0xbf): /* vfnmsub231s{s,d} xmm/mem,xmm,xmm */
6635
2.21k
        host_and_vcpu_must_have(fma);
6636
2.21k
        goto simd_0f_ymm;
6637
6638
2.21k
    case X86EMUL_OPC_EVEX_66(0x0f38, 0xa0): /* vpscatterd{d,q} [xyz]mm,mem{k} */
6639
6
    case X86EMUL_OPC_EVEX_66(0x0f38, 0xa1): /* vpscatterq{d,q} [xyz]mm,mem{k} */
6640
7
    case X86EMUL_OPC_EVEX_66(0x0f38, 0xa2): /* vscatterdp{s,d} [xyz]mm,mem{k} */
6641
10
    case X86EMUL_OPC_EVEX_66(0x0f38, 0xa3): /* vscatterqp{s,d} [xyz]mm,mem{k} */
6642
10
    {
6643
10
        typeof(evex) *pevex;
6644
10
        union {
6645
10
            int32_t dw[16];
6646
10
            int64_t qw[8];
6647
10
        } index;
6648
10
        bool done = false;
6649
6650
10
        ASSERT(ea.type == OP_MEM);
6651
10
        fail_if(!ops->write);
6652
9
        generate_exception_if((!evex.opmsk || evex.brs || evex.z ||
6653
9
                               evex.reg != 0xf ||
6654
9
                               modrm_reg == state->sib_index),
6655
9
                              X86_EXC_UD);
6656
5
        avx512_vlen_check(false);
6657
1
        host_and_vcpu_must_have(avx512f);
6658
0
        get_fpu(X86EMUL_FPU_zmm);
6659
6660
        /* Read source and index registers. */
6661
0
        opc = init_evex(stub);
6662
0
        pevex = copy_EVEX(opc, evex);
6663
0
        pevex->opcx = vex_0f;
6664
0
        opc[0] = 0x7f; /* vmovdqa{32,64} */
6665
        /* Use (%rax) as destination and modrm_reg as source. */
6666
0
        pevex->b = 1;
6667
0
        opc[1] = (modrm_reg & 7) << 3;
6668
0
        pevex->RX = 1;
6669
0
        opc[2] = 0xc3;
6670
6671
0
        invoke_stub("", "", "=m" (*mmvalp) : "a" (mmvalp));
6672
6673
0
        pevex->pfx = vex_f3; /* vmovdqu{32,64} */
6674
0
        pevex->w = b & 1;
6675
        /* Switch to sib_index as source. */
6676
0
        pevex->r = !mode_64bit() || !(state->sib_index & 0x08);
6677
0
        pevex->R = !mode_64bit() || !(state->sib_index & 0x10);
6678
0
        opc[1] = (state->sib_index & 7) << 3;
6679
6680
0
        invoke_stub("", "", "=m" (index) : "a" (&index));
6681
0
        put_stub(stub);
6682
6683
        /* Clear untouched parts of the mask value. */
6684
0
        n = 1 << (2 + evex.lr - ((b & 1) | evex.w));
6685
0
        op_bytes = 4 << evex.w;
6686
0
        op_mask &= (1 << n) - 1;
6687
6688
0
        for ( i = 0; op_mask; ++i )
6689
0
        {
6690
0
            unsigned long idx = b & 1 ? index.qw[i] : index.dw[i];
6691
0
            unsigned long offs = truncate_ea(ea.mem.off +
6692
0
                                             (idx << state->sib_scale));
6693
0
            unsigned int j, slot;
6694
6695
0
            if ( !(op_mask & (1 << i)) )
6696
0
                continue;
6697
6698
            /*
6699
             * hvmemul_linear_mmio_access() will find a cache slot based on
6700
             * linear address.  hvmemul_phys_mmio_access() will crash the
6701
             * domain if observing varying data getting written to the same
6702
             * cache slot.  Utilize that squashing earlier writes to fully
6703
             * overlapping addresses is permitted by the spec.  We can't,
6704
             * however, drop the writes altogether, to maintain correct
6705
             * faulting behavior.  Instead write the data from the last of
6706
             * the fully overlapping slots multiple times.
6707
             */
6708
0
            for ( j = (slot = i) + 1; j < n; ++j )
6709
0
            {
6710
0
                idx = b & 1 ? index.qw[j] : index.dw[j];
6711
0
                if ( (op_mask & (1 << j)) &&
6712
0
                     truncate_ea(ea.mem.off +
6713
0
                                 (idx << state->sib_scale)) == offs )
6714
0
                    slot = j;
6715
0
            }
6716
6717
0
            rc = ops->write(ea.mem.seg, offs,
6718
0
                            (void *)mmvalp + slot * op_bytes, op_bytes, ctxt);
6719
0
            if ( rc != X86EMUL_OKAY )
6720
0
            {
6721
                /* See comment in gather emulation. */
6722
0
                if ( rc != X86EMUL_EXCEPTION && done )
6723
0
                    rc = X86EMUL_RETRY;
6724
0
                break;
6725
0
            }
6726
6727
0
            op_mask &= ~(1 << i);
6728
0
            done = true;
6729
6730
#ifdef __XEN__
6731
            if ( op_mask && local_events_need_delivery() )
6732
            {
6733
                rc = X86EMUL_RETRY;
6734
                break;
6735
            }
6736
#endif
6737
0
        }
6738
6739
        /* Write mask register. See comment in gather emulation. */
6740
0
        opc = get_stub(stub);
6741
0
        opc[0] = 0xc5;
6742
0
        opc[1] = 0xf8;
6743
0
        opc[2] = 0x90;
6744
        /* Use (%rax) as source. */
6745
0
        opc[3] = evex.opmsk << 3;
6746
0
        opc[4] = 0xc3;
6747
6748
0
        invoke_stub("", "", "+m" (op_mask) : "a" (&op_mask));
6749
0
        put_stub(stub);
6750
6751
0
        if ( rc != X86EMUL_OKAY )
6752
0
            goto done;
6753
6754
0
        state->simd_size = simd_none;
6755
0
        break;
6756
0
    }
6757
6758
1
    case X86EMUL_OPC_VEX_66(0x0f38, 0xb1): /* vbcstnesh2ps mem,[xy]mm */
6759
2
    case X86EMUL_OPC_VEX_F3(0x0f38, 0xb1): /* vbcstnebf162ps mem,[xy]mm */
6760
2
        host_and_vcpu_must_have(avx_ne_convert);
6761
0
        generate_exception_if(vex.w || ea.type != OP_MEM, X86_EXC_UD);
6762
0
        op_bytes = 2;
6763
0
        goto simd_0f_ymm;
6764
6765
1
    case X86EMUL_OPC_VEX_66(0x0f38, 0xb4): /* vpmadd52luq [xy]mm/mem,[xy]mm,[xy]mm */
6766
2
    case X86EMUL_OPC_VEX_66(0x0f38, 0xb5): /* vpmadd52huq [xy]mm/mem,[xy]mm,[xy]mm */
6767
2
        host_and_vcpu_must_have(avx_ifma);
6768
0
        generate_exception_if(!vex.w, X86_EXC_UD);
6769
0
        goto simd_0f_ymm;
6770
6771
1
    case X86EMUL_OPC_EVEX_66(0x0f38, 0xb4): /* vpmadd52luq [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
6772
2
    case X86EMUL_OPC_EVEX_66(0x0f38, 0xb5): /* vpmadd52huq [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
6773
2
        host_and_vcpu_must_have(avx512_ifma);
6774
0
        generate_exception_if(!evex.w, X86_EXC_UD);
6775
0
        goto avx512f_no_sae;
6776
6777
1
    case X86EMUL_OPC(0x0f38, 0xc8):     /* sha1nexte xmm/m128,xmm */
6778
2
    case X86EMUL_OPC(0x0f38, 0xc9):     /* sha1msg1 xmm/m128,xmm */
6779
3
    case X86EMUL_OPC(0x0f38, 0xca):     /* sha1msg2 xmm/m128,xmm */
6780
4
    case X86EMUL_OPC(0x0f38, 0xcb):     /* sha256rnds2 XMM0,xmm/m128,xmm */
6781
5
    case X86EMUL_OPC(0x0f38, 0xcc):     /* sha256msg1 xmm/m128,xmm */
6782
6
    case X86EMUL_OPC(0x0f38, 0xcd):     /* sha256msg2 xmm/m128,xmm */
6783
6
        host_and_vcpu_must_have(sha);
6784
6
        op_bytes = 16;
6785
6
        goto simd_0f38_common;
6786
6787
1
    case X86EMUL_OPC_VEX_F2(0x0f38, 0xcb): /* vsha512rnds2 xmm,ymm,ymm */
6788
2
    case X86EMUL_OPC_VEX_F2(0x0f38, 0xcc): /* vsha512msg1 xmm,ymm */
6789
3
    case X86EMUL_OPC_VEX_F2(0x0f38, 0xcd): /* vsha512msg2 ymm,ymm */
6790
3
        host_and_vcpu_must_have(sha512);
6791
0
        generate_exception_if(ea.type != OP_REG || vex.w || !vex.l, X86_EXC_UD);
6792
0
        op_bytes = 32;
6793
0
        goto simd_0f_ymm;
6794
6795
1
    case X86EMUL_OPC_66(0x0f38, 0xcf):      /* gf2p8mulb xmm/m128,xmm */
6796
1
        host_and_vcpu_must_have(gfni);
6797
0
        goto simd_0f38_common;
6798
6799
1
    case X86EMUL_OPC_VEX_66(0x0f38, 0xcf):  /* vgf2p8mulb {x,y}mm/mem,{x,y}mm,{x,y}mm */
6800
1
        host_and_vcpu_must_have(gfni);
6801
0
        generate_exception_if(vex.w, X86_EXC_UD);
6802
0
        goto simd_0f_avx;
6803
6804
1
    case X86EMUL_OPC_EVEX_66(0x0f38, 0xcf): /* vgf2p8mulb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
6805
1
        host_and_vcpu_must_have(gfni);
6806
0
        generate_exception_if(evex.w || evex.brs, X86_EXC_UD);
6807
0
        elem_bytes = 1;
6808
0
        goto avx512f_no_sae;
6809
6810
1
    case X86EMUL_OPC_VEX   (0x0f38, 0xd2): /* vpdpwuud [xy]mm/mem,[xy]mm,[xy]mm */
6811
2
    case X86EMUL_OPC_VEX_66(0x0f38, 0xd2): /* vpdpwusd [xy]mm/mem,[xy]mm,[xy]mm */
6812
3
    case X86EMUL_OPC_VEX_F3(0x0f38, 0xd2): /* vpdpwsud [xy]mm/mem,[xy]mm,[xy]mm */
6813
4
    case X86EMUL_OPC_VEX   (0x0f38, 0xd3): /* vpdpwuuds [xy]mm/mem,[xy]mm,[xy]mm */
6814
5
    case X86EMUL_OPC_VEX_66(0x0f38, 0xd3): /* vpdpwusds [xy]mm/mem,[xy]mm,[xy]mm */
6815
6
    case X86EMUL_OPC_VEX_F3(0x0f38, 0xd3): /* vpdpwsuds [xy]mm/mem,[xy]mm,[xy]mm */
6816
6
        host_and_vcpu_must_have(avx_vnni_int16);
6817
0
        generate_exception_if(vex.w, X86_EXC_UD);
6818
0
        op_bytes = 16 << vex.l;
6819
0
        goto simd_0f_ymm;
6820
6821
7
    case X86EMUL_OPC_VEX   (0x0f38, 0xda): /* vsm3msg1 xmm/mem,xmm,xmm */
6822
12
    case X86EMUL_OPC_VEX_66(0x0f38, 0xda): /* vsm3msg2 xmm/mem,xmm,xmm */
6823
12
        generate_exception_if(vex.w || vex.l, X86_EXC_UD);
6824
5
        host_and_vcpu_must_have(sm3);
6825
0
        goto simd_0f_ymm;
6826
6827
1
    case X86EMUL_OPC_VEX_F3(0x0f38, 0xda): /* vsm4key4 [xy]mm/mem,[xy]mm,[xy]mm */
6828
2
    case X86EMUL_OPC_VEX_F2(0x0f38, 0xda): /* vsm4rnds4 [xy]mm/mem,[xy]mm,[xy]mm */
6829
2
        host_and_vcpu_must_have(sm4);
6830
0
        generate_exception_if(vex.w, X86_EXC_UD);
6831
0
        op_bytes = 16 << vex.l;
6832
0
        goto simd_0f_ymm;
6833
6834
66
    case X86EMUL_OPC_VEX_66(0x0f38, 0xdc):  /* vaesenc {x,y}mm/mem,{x,y}mm,{x,y}mm */
6835
132
    case X86EMUL_OPC_VEX_66(0x0f38, 0xdd):  /* vaesenclast {x,y}mm/mem,{x,y}mm,{x,y}mm */
6836
200
    case X86EMUL_OPC_VEX_66(0x0f38, 0xde):  /* vaesdec {x,y}mm/mem,{x,y}mm,{x,y}mm */
6837
267
    case X86EMUL_OPC_VEX_66(0x0f38, 0xdf):  /* vaesdeclast {x,y}mm/mem,{x,y}mm,{x,y}mm */
6838
267
        if ( !vex.l )
6839
267
            host_and_vcpu_must_have(aesni);
6840
2
        else
6841
267
            host_and_vcpu_must_have(vaes);
6842
265
        goto simd_0f_avx;
6843
6844
265
    case X86EMUL_OPC_EVEX_66(0x0f38, 0xdc): /* vaesenc [xyz]mm/mem,[xyz]mm,[xyz]mm */
6845
2
    case X86EMUL_OPC_EVEX_66(0x0f38, 0xdd): /* vaesenclast [xyz]mm/mem,[xyz]mm,[xyz]mm */
6846
3
    case X86EMUL_OPC_EVEX_66(0x0f38, 0xde): /* vaesdec [xyz]mm/mem,[xyz]mm,[xyz]mm */
6847
4
    case X86EMUL_OPC_EVEX_66(0x0f38, 0xdf): /* vaesdeclast [xyz]mm/mem,[xyz]mm,[xyz]mm */
6848
4
        host_and_vcpu_must_have(vaes);
6849
0
        generate_exception_if(evex.brs || evex.opmsk, X86_EXC_UD);
6850
0
        goto avx512f_no_sae;
6851
6852
0
#endif /* !X86EMUL_NO_SIMD */
6853
6854
16
    case X86EMUL_OPC_VEX_66(0x0f38, 0xe0) ...
6855
16
         X86EMUL_OPC_VEX_66(0x0f38, 0xef): /* cmp<cc>xadd r,r,m */
6856
16
        generate_exception_if(!mode_64bit() || dst.type != OP_MEM || vex.l,
6857
16
                              X86_EXC_UD);
6858
1
        host_and_vcpu_must_have(cmpccxadd);
6859
0
        fail_if(!ops->rmw);
6860
0
        state->rmw = rmw_cmpccxadd;
6861
0
        break;
6862
6863
370
    case X86EMUL_OPC(0x0f38, 0xf0): /* movbe m,r */
6864
640
    case X86EMUL_OPC(0x0f38, 0xf1): /* movbe r,m */
6865
640
        generate_exception_if(ea.type != OP_MEM, X86_EXC_UD);
6866
637
        vcpu_must_have(movbe);
6867
637
        switch ( op_bytes )
6868
637
        {
6869
235
        case 2:
6870
235
            asm ( "xchg %h0,%b0" : "=Q" (dst.val)
6871
235
                                 : "0" (*(uint32_t *)&src.val) );
6872
235
            break;
6873
206
        case 4:
6874
206
#ifdef __x86_64__
6875
206
            asm ( "bswap %k0" : "=r" (dst.val)
6876
206
                              : "0" (*(uint32_t *)&src.val) );
6877
206
            break;
6878
196
        case 8:
6879
196
#endif
6880
196
            asm ( "bswap %0" : "=r" (dst.val) : "0" (src.val) );
6881
196
            break;
6882
0
        default:
6883
0
            ASSERT_UNREACHABLE();
6884
0
            goto unhandleable;
6885
637
        }
6886
637
        break;
6887
6888
637
    case X86EMUL_OPC_F2(0x0f38, 0xf0): /* crc32 r/m8, r{32,64} */
6889
332
    case X86EMUL_OPC_F2(0x0f38, 0xf1): /* crc32 r/m{16,32,64}, r{32,64} */
6890
332
        host_and_vcpu_must_have(sse4_2);
6891
332
        dst.bytes = rex_prefix & REX_W ? 8 : 4;
6892
332
        switch ( op_bytes )
6893
332
        {
6894
0
        case 1:
6895
0
            asm ( "crc32b %1,%k0" : "+r" (dst.val)
6896
0
                                  : "qm" (*(uint8_t *)&src.val) );
6897
0
            break;
6898
68
        case 2:
6899
68
            asm ( "crc32w %1,%k0" : "+r" (dst.val)
6900
68
                                  : "rm" (*(uint16_t *)&src.val) );
6901
68
            break;
6902
191
        case 4:
6903
191
            asm ( "crc32l %1,%k0" : "+r" (dst.val)
6904
191
                                  : "rm" (*(uint32_t *)&src.val) );
6905
191
            break;
6906
0
#ifdef __x86_64__
6907
73
        case 8:
6908
73
            asm ( "crc32q %1,%0" : "+r" (dst.val) : "rm" (src.val) );
6909
73
            break;
6910
0
#endif
6911
0
        default:
6912
0
            ASSERT_UNREACHABLE();
6913
0
            goto unhandleable;
6914
332
        }
6915
332
        break;
6916
6917
332
    case X86EMUL_OPC_VEX(0x0f38, 0xf2):    /* andn r/m,r,r */
6918
260
    case X86EMUL_OPC_VEX(0x0f38, 0xf5):    /* bzhi r,r/m,r */
6919
328
    case X86EMUL_OPC_VEX_F3(0x0f38, 0xf5): /* pext r/m,r,r */
6920
395
    case X86EMUL_OPC_VEX_F2(0x0f38, 0xf5): /* pdep r/m,r,r */
6921
462
    case X86EMUL_OPC_VEX(0x0f38, 0xf7):    /* bextr r,r/m,r */
6922
533
    case X86EMUL_OPC_VEX_66(0x0f38, 0xf7): /* shlx r,r/m,r */
6923
598
    case X86EMUL_OPC_VEX_F3(0x0f38, 0xf7): /* sarx r,r/m,r */
6924
633
    case X86EMUL_OPC_VEX_F2(0x0f38, 0xf7): /* shrx r,r/m,r */
6925
633
    {
6926
633
        uint8_t *buf = get_stub(stub);
6927
633
        typeof(vex) *pvex = container_of(buf + 1, typeof(vex), raw[0]);
6928
6929
633
        if ( b == 0xf5 || vex.pfx )
6930
633
            host_and_vcpu_must_have(bmi2);
6931
134
        else
6932
633
            host_and_vcpu_must_have(bmi1);
6933
633
        generate_exception_if(vex.l, X86_EXC_UD);
6934
6935
629
        buf[0] = 0xc4;
6936
629
        *pvex = vex;
6937
629
        pvex->b = 1;
6938
629
        pvex->r = 1;
6939
629
        if ( !mode_64bit() )
6940
218
            pvex->w = 0;
6941
629
        pvex->reg = 0xf; /* rAX */
6942
629
        buf[3] = b;
6943
629
        buf[4] = 0x09; /* reg=rCX r/m=(%rCX) */
6944
629
        buf[5] = 0xc3;
6945
6946
629
        src.reg = decode_vex_gpr(vex.reg, &_regs, ctxt);
6947
629
        emulate_stub([dst] "=&c" (dst.val), "[dst]" (&src.val), "a" (*src.reg));
6948
6949
629
        put_stub(stub);
6950
629
        break;
6951
633
    }
6952
6953
134
    case X86EMUL_OPC_VEX(0x0f38, 0xf3): /* Grp 17 */
6954
134
    {
6955
134
        uint8_t *buf = get_stub(stub);
6956
134
        typeof(vex) *pvex = container_of(buf + 1, typeof(vex), raw[0]);
6957
6958
134
        switch ( modrm_reg & 7 )
6959
134
        {
6960
125
        case 1: /* blsr r,r/m */
6961
132
        case 2: /* blsmsk r,r/m */
6962
133
        case 3: /* blsi r,r/m */
6963
133
            host_and_vcpu_must_have(bmi1);
6964
133
            break;
6965
133
        default:
6966
1
            goto unrecognized_insn;
6967
134
        }
6968
6969
133
        generate_exception_if(vex.l, X86_EXC_UD);
6970
6971
132
        buf[0] = 0xc4;
6972
132
        *pvex = vex;
6973
132
        pvex->b = 1;
6974
132
        pvex->r = 1;
6975
132
        if ( !mode_64bit() )
6976
66
            pvex->w = 0;
6977
132
        pvex->reg = 0xf; /* rAX */
6978
132
        buf[3] = b;
6979
132
        buf[4] = (modrm & 0x38) | 0x01; /* r/m=(%rCX) */
6980
132
        buf[5] = 0xc3;
6981
6982
132
        dst.reg = decode_vex_gpr(vex.reg, &_regs, ctxt);
6983
132
        emulate_stub("=&a" (dst.val), "c" (&src.val));
6984
6985
132
        put_stub(stub);
6986
132
        break;
6987
133
    }
6988
6989
429
    case X86EMUL_OPC_66(0x0f38, 0xf6): /* adcx r/m,r */
6990
915
    case X86EMUL_OPC_F3(0x0f38, 0xf6): /* adox r/m,r */
6991
915
    {
6992
915
        unsigned int mask = rep_prefix() ? X86_EFLAGS_OF : X86_EFLAGS_CF;
6993
915
        unsigned int aux = _regs.eflags & mask ? ~0 : 0;
6994
915
        bool carry;
6995
6996
915
        vcpu_must_have(adx);
6997
915
#ifdef __x86_64__
6998
915
        if ( op_bytes == 8 )
6999
194
            asm ( "add %[aux],%[aux]\n\t"
7000
194
                  "adc %[src],%[dst]\n\t"
7001
194
                  ASM_FLAG_OUT(, "setc %[carry]")
7002
194
                  : [dst] "+r" (dst.val),
7003
194
                    [carry] ASM_FLAG_OUT("=@ccc", "=qm") (carry),
7004
194
                    [aux] "+r" (aux)
7005
194
                  : [src] "rm" (src.val) );
7006
721
        else
7007
721
#endif
7008
721
            asm ( "add %[aux],%[aux]\n\t"
7009
721
                  "adc %k[src],%k[dst]\n\t"
7010
721
                  ASM_FLAG_OUT(, "setc %[carry]")
7011
721
                  : [dst] "+r" (dst.val),
7012
721
                    [carry] ASM_FLAG_OUT("=@ccc", "=qm") (carry),
7013
721
                    [aux] "+r" (aux)
7014
721
                  : [src] "rm" (src.val) );
7015
915
        if ( carry )
7016
266
            _regs.eflags |= mask;
7017
649
        else
7018
649
            _regs.eflags &= ~mask;
7019
915
        break;
7020
915
    }
7021
7022
602
    case X86EMUL_OPC_VEX_F2(0x0f38, 0xf6): /* mulx r/m,r,r */
7023
602
        vcpu_must_have(bmi2);
7024
602
        generate_exception_if(vex.l, X86_EXC_UD);
7025
601
        ea.reg = decode_vex_gpr(vex.reg, &_regs, ctxt);
7026
601
        if ( mode_64bit() && vex.w )
7027
209
            asm ( "mulq %3" : "=a" (*ea.reg), "=d" (dst.val)
7028
209
                            : "0" (src.val), "rm" (_regs.r(dx)) );
7029
392
        else
7030
392
            asm ( "mull %3" : "=a" (*ea.reg), "=d" (dst.val)
7031
392
                            : "0" ((uint32_t)src.val), "rm" (_regs.edx) );
7032
601
        break;
7033
7034
1
    case X86EMUL_OPC_66(0x0f38, 0xf8): /* movdir64b r,m512 */
7035
1
        host_and_vcpu_must_have(movdir64b);
7036
0
        generate_exception_if(ea.type != OP_MEM, X86_EXC_UD);
7037
0
        src.val = truncate_ea(*dst.reg);
7038
0
        generate_exception_if(!is_aligned(x86_seg_es, src.val, 64, ctxt, ops),
7039
0
                              X86_EXC_GP, 0);
7040
0
        fail_if(!ops->blk);
7041
0
        state->blk = blk_movdir;
7042
0
        BUILD_BUG_ON(sizeof(*mmvalp) < 64);
7043
0
        if ( (rc = ops->read(ea.mem.seg, ea.mem.off, mmvalp, 64,
7044
0
                             ctxt)) != X86EMUL_OKAY ||
7045
0
             (rc = ops->blk(x86_seg_es, src.val, mmvalp, 64, &_regs.eflags,
7046
0
                            state, ctxt)) != X86EMUL_OKAY )
7047
0
            goto done;
7048
0
        state->simd_size = simd_none;
7049
0
        break;
7050
7051
1
    case X86EMUL_OPC_F2(0x0f38, 0xf8): /* enqcmd r,m512 */
7052
2
    case X86EMUL_OPC_F3(0x0f38, 0xf8): /* enqcmds r,m512 */
7053
2
        host_and_vcpu_must_have(enqcmd);
7054
0
        generate_exception_if(ea.type != OP_MEM, X86_EXC_UD);
7055
0
        generate_exception_if(vex.pfx != vex_f2 && !mode_ring0(), X86_EXC_GP, 0);
7056
0
        src.val = truncate_ea(*dst.reg);
7057
0
        generate_exception_if(!is_aligned(x86_seg_es, src.val, 64, ctxt, ops),
7058
0
                              X86_EXC_GP, 0);
7059
0
        fail_if(!ops->blk);
7060
0
        BUILD_BUG_ON(sizeof(*mmvalp) < 64);
7061
0
        if ( (rc = ops->read(ea.mem.seg, ea.mem.off, mmvalp, 64,
7062
0
                             ctxt)) != X86EMUL_OKAY )
7063
0
            goto done;
7064
0
        if ( vex.pfx == vex_f2 ) /* enqcmd */
7065
0
        {
7066
0
            generate_exception_if(mmvalp->data32[0], X86_EXC_GP, 0);
7067
0
            fail_if(!ops->read_msr);
7068
0
            if ( (rc = ops->read_msr(MSR_PASID, &msr_val,
7069
0
                                     ctxt)) != X86EMUL_OKAY )
7070
0
                goto done;
7071
0
            generate_exception_if(!(msr_val & PASID_VALID), X86_EXC_GP, 0);
7072
0
            mmvalp->data32[0] = MASK_EXTR(msr_val, PASID_PASID_MASK);
7073
0
        }
7074
0
        else
7075
0
            generate_exception_if(mmvalp->data32[0] & 0x7ff00000, X86_EXC_GP, 0);
7076
0
        state->blk = blk_enqcmd;
7077
0
        if ( (rc = ops->blk(x86_seg_es, src.val, mmvalp, 64, &_regs.eflags,
7078
0
                            state, ctxt)) != X86EMUL_OKAY )
7079
0
            goto done;
7080
0
        state->simd_size = simd_none;
7081
0
        break;
7082
7083
1
    case X86EMUL_OPC(0x0f38, 0xf9): /* movdiri mem,r */
7084
1
        host_and_vcpu_must_have(movdiri);
7085
0
        generate_exception_if(dst.type != OP_MEM, X86_EXC_UD);
7086
0
        fail_if(!ops->blk);
7087
0
        state->blk = blk_movdir;
7088
0
        if ( (rc = ops->blk(dst.mem.seg, dst.mem.off, &src.val, op_bytes,
7089
0
                            &_regs.eflags, state, ctxt)) != X86EMUL_OKAY )
7090
0
            goto done;
7091
0
        dst.type = OP_NONE;
7092
0
        break;
7093
7094
0
#ifndef X86EMUL_NO_SIMD
7095
7096
134
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x00): /* vpermq $imm8,ymm/m256,ymm */
7097
474
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x01): /* vpermpd $imm8,ymm/m256,ymm */
7098
474
        generate_exception_if(!vex.l || !vex.w, X86_EXC_UD);
7099
468
        goto simd_0f_imm8_avx2;
7100
7101
468
    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x00): /* vpermq $imm8,{y,z}mm/mem,{y,z}mm{k} */
7102
3
    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x01): /* vpermpd $imm8,{y,z}mm/mem,{y,z}mm{k} */
7103
3
        generate_exception_if(!evex.lr || !evex.w, X86_EXC_UD);
7104
1
        fault_suppression = false;
7105
1
        goto avx512f_imm8_no_sae;
7106
7107
35
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x38): /* vinserti128 $imm8,xmm/m128,ymm,ymm */
7108
351
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x39): /* vextracti128 $imm8,ymm,xmm/m128 */
7109
417
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x46): /* vperm2i128 $imm8,ymm/m256,ymm,ymm */
7110
417
        generate_exception_if(!vex.l, X86_EXC_UD);
7111
        /* fall through */
7112
456
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x02): /* vpblendd $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
7113
456
        generate_exception_if(vex.w, X86_EXC_UD);
7114
450
        goto simd_0f_imm8_avx2;
7115
7116
450
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x06): /* vperm2f128 $imm8,ymm/m256,ymm,ymm */
7117
160
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x18): /* vinsertf128 $imm8,xmm/m128,ymm,ymm */
7118
330
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x19): /* vextractf128 $imm8,ymm,xmm/m128 */
7119
330
        generate_exception_if(!vex.l, X86_EXC_UD);
7120
        /* fall through */
7121
396
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x04): /* vpermilps $imm8,{x,y}mm/mem,{x,y}mm */
7122
434
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x05): /* vpermilpd $imm8,{x,y}mm/mem,{x,y}mm */
7123
434
        generate_exception_if(vex.w, X86_EXC_UD);
7124
428
        goto simd_0f_imm8_avx;
7125
7126
428
    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x04): /* vpermilps $imm8,[xyz]mm/mem,[xyz]mm{k} */
7127
2
    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x05): /* vpermilpd $imm8,[xyz]mm/mem,[xyz]mm{k} */
7128
2
        generate_exception_if(evex.w != (b & 1), X86_EXC_UD);
7129
1
        fault_suppression = false;
7130
1
        goto avx512f_imm8_no_sae;
7131
7132
66
    case X86EMUL_OPC_66(0x0f3a, 0x08): /* roundps $imm8,xmm/m128,xmm */
7133
125
    case X86EMUL_OPC_66(0x0f3a, 0x09): /* roundpd $imm8,xmm/m128,xmm */
7134
216
    case X86EMUL_OPC_66(0x0f3a, 0x0a): /* roundss $imm8,xmm/m128,xmm */
7135
457
    case X86EMUL_OPC_66(0x0f3a, 0x0b): /* roundsd $imm8,xmm/m128,xmm */
7136
491
    case X86EMUL_OPC_66(0x0f3a, 0x0c): /* blendps $imm8,xmm/m128,xmm */
7137
685
    case X86EMUL_OPC_66(0x0f3a, 0x0d): /* blendpd $imm8,xmm/m128,xmm */
7138
751
    case X86EMUL_OPC_66(0x0f3a, 0x0e): /* pblendw $imm8,xmm/m128,xmm */
7139
1.04k
    case X86EMUL_OPC_66(0x0f3a, 0x40): /* dpps $imm8,xmm/m128,xmm */
7140
1.10k
    case X86EMUL_OPC_66(0x0f3a, 0x41): /* dppd $imm8,xmm/m128,xmm */
7141
1.12k
    case X86EMUL_OPC_66(0x0f3a, 0x42): /* mpsadbw $imm8,xmm/m128,xmm */
7142
1.12k
        host_and_vcpu_must_have(sse4_1);
7143
1.12k
        goto simd_0f3a_common;
7144
7145
1.12k
    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x0a): /* vrndscaless $imm8,xmm/mem,xmm,xmm{k} */
7146
3
    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x0b): /* vrndscalesd $imm8,xmm/mem,xmm,xmm{k} */
7147
3
        generate_exception_if(ea.type != OP_REG && evex.brs, X86_EXC_UD);
7148
        /* fall through */
7149
3
    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x08): /* vrndscaleps $imm8,[xyz]mm/mem,[xyz]mm{k} */
7150
4
    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x09): /* vrndscalepd $imm8,[xyz]mm/mem,[xyz]mm{k} */
7151
4
        host_and_vcpu_must_have(avx512f);
7152
0
        generate_exception_if(evex.w != (b & 1), X86_EXC_UD);
7153
0
        avx512_vlen_check(b & 2);
7154
0
        goto simd_imm8_zmm;
7155
7156
3
    case X86EMUL_OPC_EVEX(0x0f3a, 0x0a): /* vrndscalesh $imm8,xmm/mem,xmm,xmm{k} */
7157
3
        generate_exception_if(ea.type != OP_REG && evex.brs, X86_EXC_UD);
7158
        /* fall through */
7159
3
    case X86EMUL_OPC_EVEX(0x0f3a, 0x08): /* vrndscaleph $imm8,[xyz]mm/mem,[xyz]mm{k} */
7160
3
        host_and_vcpu_must_have(avx512_fp16);
7161
0
        generate_exception_if(evex.w, X86_EXC_UD);
7162
0
        avx512_vlen_check(b & 2);
7163
0
        goto simd_imm8_zmm;
7164
7165
0
#endif /* X86EMUL_NO_SIMD */
7166
7167
529
    CASE_SIMD_PACKED_INT(0x0f3a, 0x0f): /* palignr $imm8,{,x}mm/mem,{,x}mm */
7168
529
        host_and_vcpu_must_have(ssse3);
7169
298
        if ( vex.pfx )
7170
67
        {
7171
1.79k
    simd_0f3a_common:
7172
1.79k
            get_fpu(X86EMUL_FPU_xmm);
7173
1.79k
        }
7174
231
        else
7175
231
        {
7176
231
            host_and_vcpu_must_have(mmx);
7177
231
            get_fpu(X86EMUL_FPU_mmx);
7178
231
        }
7179
2.01k
        opc = init_prefixes(stub);
7180
0
        opc[0] = 0x3a;
7181
2.01k
        opc[1] = b;
7182
2.01k
        opc[2] = modrm;
7183
2.01k
        if ( ea.type == OP_MEM )
7184
1.27k
        {
7185
            /* Convert memory operand to (%rAX). */
7186
1.27k
            rex_prefix &= ~REX_B;
7187
1.27k
            vex.b = 1;
7188
1.27k
            opc[2] &= 0x38;
7189
1.27k
        }
7190
2.01k
        opc[3] = imm1;
7191
2.01k
        insn_bytes = PFX_BYTES + 4;
7192
2.01k
        break;
7193
7194
0
#ifndef X86EMUL_NO_SIMD
7195
7196
2
    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x42): /* vdbpsadbw $imm8,[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
7197
2
        generate_exception_if(evex.w, X86_EXC_UD);
7198
        /* fall through */
7199
2
    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x0f): /* vpalignr $imm8,[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
7200
2
        fault_suppression = false;
7201
2
        goto avx512bw_imm;
7202
7203
64
    case X86EMUL_OPC_66(0x0f3a, 0x14): /* pextrb $imm8,xmm,r/m */
7204
130
    case X86EMUL_OPC_66(0x0f3a, 0x15): /* pextrw $imm8,xmm,r/m */
7205
316
    case X86EMUL_OPC_66(0x0f3a, 0x16): /* pextr{d,q} $imm8,xmm,r/m */
7206
366
    case X86EMUL_OPC_66(0x0f3a, 0x17): /* extractps $imm8,xmm,r/m */
7207
366
        host_and_vcpu_must_have(sse4_1);
7208
366
        get_fpu(X86EMUL_FPU_xmm);
7209
7210
365
        opc = init_prefixes(stub);
7211
0
        opc++[0] = 0x3a;
7212
1.38k
    pextr:
7213
1.38k
        opc[0] = b;
7214
        /* Convert memory/GPR operand to (%rAX). */
7215
1.38k
        rex_prefix &= ~REX_B;
7216
1.38k
        evex.b = vex.b = 1;
7217
1.38k
        if ( !mode_64bit() )
7218
929
            evex.w = vex.w = 0;
7219
1.38k
        opc[1] = modrm & 0x38;
7220
1.38k
        opc[2] = imm1;
7221
1.38k
        opc[3] = 0xc3;
7222
1.38k
        if ( vex.opcx == vex_none )
7223
365
        {
7224
            /* Cover for extra prefix byte. */
7225
365
            --opc;
7226
365
        }
7227
7228
1.38k
        if ( evex_encoded() )
7229
0
            copy_EVEX(opc, evex);
7230
1.38k
        else
7231
1.38k
            copy_REX_VEX(opc, rex_prefix, vex);
7232
1.38k
        invoke_stub("", "", "=m" (dst.val) : "a" (&dst.val));
7233
1.38k
        put_stub(stub);
7234
7235
1.38k
        ASSERT(!state->simd_size);
7236
1.38k
        dst.bytes = dst.type == OP_REG || b == 0x17 ? 4 : 1 << (b & 3);
7237
1.38k
        if ( b == 0x16 && (rex_prefix & REX_W) )
7238
67
            dst.bytes = 8;
7239
1.38k
        break;
7240
7241
392
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x14): /* vpextrb $imm8,xmm,r/m */
7242
825
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x15): /* vpextrw $imm8,xmm,r/m */
7243
925
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x16): /* vpextr{d,q} $imm8,xmm,r/m */
7244
1.02k
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x17): /* vextractps $imm8,xmm,r/m */
7245
1.02k
        generate_exception_if(vex.l || vex.reg != 0xf, X86_EXC_UD);
7246
1.02k
        host_and_vcpu_must_have(avx);
7247
1.02k
        get_fpu(X86EMUL_FPU_ymm);
7248
7249
        /* Work around erratum BT41. */
7250
1.01k
        if ( !mode_64bit() )
7251
650
            vex.w = 0;
7252
7253
1.01k
        opc = init_prefixes(stub);
7254
0
        goto pextr;
7255
7256
3
    case X86EMUL_OPC_EVEX_66(0x0f, 0xc5):   /* vpextrw $imm8,xmm,reg */
7257
3
        generate_exception_if(ea.type != OP_REG || !evex.R, X86_EXC_UD);
7258
        /* Convert to alternative encoding: We want to use a memory operand. */
7259
1
        evex.opcx = ext_0f3a;
7260
1
        b = 0x15;
7261
1
        modrm <<= 3;
7262
1
        evex.r = evex.b;
7263
1
        evex.R = evex.x;
7264
        /* fall through */
7265
2
    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x14): /* vpextrb $imm8,xmm,r/m */
7266
5
    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x15): /* vpextrw $imm8,xmm,r/m */
7267
7
    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x16): /* vpextr{d,q} $imm8,xmm,r/m */
7268
9
    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x17): /* vextractps $imm8,xmm,r/m */
7269
9
        generate_exception_if((evex.lr || evex.reg != 0xf || !evex.RX ||
7270
9
                               evex.opmsk || evex.brs),
7271
9
                              X86_EXC_UD);
7272
3
        if ( !(b & 2) )
7273
3
            host_and_vcpu_must_have(avx512bw);
7274
2
        else if ( !(b & 1) )
7275
2
            host_and_vcpu_must_have(avx512dq);
7276
1
        else
7277
1
            host_and_vcpu_must_have(avx512f);
7278
0
        get_fpu(X86EMUL_FPU_zmm);
7279
0
        opc = init_evex(stub);
7280
0
        goto pextr;
7281
7282
1
    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x18): /* vinsertf32x4 $imm8,xmm/m128,{y,z}mm{k} */
7283
                                            /* vinsertf64x2 $imm8,xmm/m128,{y,z}mm{k} */
7284
2
    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x19): /* vextractf32x4 $imm8,{y,z}mm,xmm/m128{k} */
7285
                                            /* vextractf64x2 $imm8,{y,z}mm,xmm/m128{k} */
7286
3
    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x38): /* vinserti32x4 $imm8,xmm/m128,{y,z}mm{k} */
7287
                                            /* vinserti64x2 $imm8,xmm/m128,{y,z}mm{k} */
7288
5
    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x39): /* vextracti32x4 $imm8,{y,z}mm,xmm/m128{k} */
7289
                                            /* vextracti64x2 $imm8,{y,z}mm,xmm/m128{k} */
7290
5
        if ( evex.w )
7291
5
            host_and_vcpu_must_have(avx512dq);
7292
3
        generate_exception_if(evex.brs, X86_EXC_UD);
7293
        /* fall through */
7294
3
    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x23): /* vshuff32x4 $imm8,{y,z}mm/mem,{y,z}mm,{y,z}mm{k} */
7295
                                            /* vshuff64x2 $imm8,{y,z}mm/mem,{y,z}mm,{y,z}mm{k} */
7296
4
    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x43): /* vshufi32x4 $imm8,{y,z}mm/mem,{y,z}mm,{y,z}mm{k} */
7297
                                            /* vshufi64x2 $imm8,{y,z}mm/mem,{y,z}mm,{y,z}mm{k} */
7298
4
        generate_exception_if(!evex.lr, X86_EXC_UD);
7299
2
        fault_suppression = false;
7300
2
        goto avx512f_imm8_no_sae;
7301
7302
1
    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x1a): /* vinsertf32x4 $imm8,ymm/m256,zmm{k} */
7303
                                            /* vinsertf64x2 $imm8,ymm/m256,zmm{k} */
7304
4
    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x1b): /* vextractf32x8 $imm8,zmm,ymm/m256{k} */
7305
                                            /* vextractf64x4 $imm8,zmm,ymm/m256{k} */
7306
5
    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x3a): /* vinserti32x4 $imm8,ymm/m256,zmm{k} */
7307
                                            /* vinserti64x2 $imm8,ymm/m256,zmm{k} */
7308
6
    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x3b): /* vextracti32x8 $imm8,zmm,ymm/m256{k} */
7309
                                            /* vextracti64x4 $imm8,zmm,ymm/m256{k} */
7310
6
        if ( !evex.w )
7311
6
            host_and_vcpu_must_have(avx512dq);
7312
4
        generate_exception_if(evex.lr != 2 || evex.brs, X86_EXC_UD);
7313
1
        fault_suppression = false;
7314
1
        goto avx512f_imm8_no_sae;
7315
7316
639
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x1d): /* vcvtps2ph $imm8,{x,y}mm,xmm/mem */
7317
645
    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x1d): /* vcvtps2ph $imm8,[xyz]mm,{x,y}mm/mem{k} */
7318
645
    {
7319
645
        uint32_t mxcsr;
7320
7321
645
        fail_if(!ops->write);
7322
644
        if ( evex_encoded() )
7323
6
        {
7324
6
            generate_exception_if((evex.w || evex.reg != 0xf || !evex.RX ||
7325
6
                                   (ea.type != OP_REG && (evex.z || evex.brs))),
7326
6
                                  X86_EXC_UD);
7327
2
            host_and_vcpu_must_have(avx512f);
7328
0
            avx512_vlen_check(false);
7329
0
            opc = init_evex(stub);
7330
0
        }
7331
638
        else
7332
638
        {
7333
638
            generate_exception_if(vex.w || vex.reg != 0xf, X86_EXC_UD);
7334
623
            host_and_vcpu_must_have(f16c);
7335
623
            opc = init_prefixes(stub);
7336
623
        }
7337
7338
623
        op_bytes = 8 << evex.lr;
7339
7340
623
        opc[0] = b;
7341
623
        opc[1] = modrm;
7342
623
        if ( ea.type == OP_MEM )
7343
298
        {
7344
            /* Convert memory operand to (%rAX). */
7345
298
            vex.b = 1;
7346
298
            evex.b = 1;
7347
298
            opc[1] &= 0x38;
7348
298
        }
7349
623
        opc[2] = imm1;
7350
623
        if ( evex_encoded() )
7351
0
        {
7352
0
            unsigned int full = 0;
7353
7354
0
            insn_bytes = EVEX_PFX_BYTES + 3;
7355
0
            copy_EVEX(opc, evex);
7356
7357
0
            if ( ea.type == OP_MEM && evex.opmsk )
7358
0
            {
7359
0
                full = 0xffff >> (16 - op_bytes / 2);
7360
0
                op_mask &= full;
7361
0
                if ( !op_mask )
7362
0
                    goto complete_insn;
7363
7364
0
                first_byte = __builtin_ctz(op_mask);
7365
0
                op_mask >>= first_byte;
7366
0
                full >>= first_byte;
7367
0
                first_byte <<= 1;
7368
0
                op_bytes = (32 - __builtin_clz(op_mask)) << 1;
7369
7370
                /*
7371
                 * We may need to read (parts of) the memory operand for the
7372
                 * purpose of merging in order to avoid splitting the write
7373
                 * below into multiple ones.
7374
                 */
7375
0
                if ( op_mask != full &&
7376
0
                     (rc = ops->read(ea.mem.seg,
7377
0
                                     truncate_ea(ea.mem.off + first_byte),
7378
0
                                     (void *)mmvalp + first_byte, op_bytes,
7379
0
                                     ctxt)) != X86EMUL_OKAY )
7380
0
                    goto done;
7381
0
            }
7382
0
        }
7383
623
        else
7384
623
        {
7385
623
            insn_bytes = PFX_BYTES + 3;
7386
623
            copy_VEX(opc, vex);
7387
623
        }
7388
623
        opc[3] = 0xc3;
7389
7390
        /* Latch MXCSR - we may need to restore it below. */
7391
623
        invoke_stub("stmxcsr %[mxcsr]", "",
7392
623
                    "=m" (*mmvalp), [mxcsr] "=m" (mxcsr) : "a" (mmvalp));
7393
7394
623
        put_stub(stub);
7395
7396
623
        if ( ea.type == OP_MEM )
7397
298
        {
7398
298
            rc = ops->write(ea.mem.seg, truncate_ea(ea.mem.off + first_byte),
7399
298
                            (void *)mmvalp + first_byte, op_bytes, ctxt);
7400
298
            if ( rc != X86EMUL_OKAY )
7401
6
            {
7402
6
                asm volatile ( "ldmxcsr %0" :: "m" (mxcsr) );
7403
6
                goto done;
7404
6
            }
7405
298
        }
7406
7407
617
        state->simd_size = simd_none;
7408
617
        break;
7409
623
    }
7410
7411
2
    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x1e): /* vpcmpu{d,q} $imm8,[xyz]mm/mem,[xyz]mm,k{k} */
7412
3
    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x1f): /* vpcmp{d,q} $imm8,[xyz]mm/mem,[xyz]mm,k{k} */
7413
4
    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x3e): /* vpcmpu{b,w} $imm8,[xyz]mm/mem,[xyz]mm,k{k} */
7414
6
    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x3f): /* vpcmp{b,w} $imm8,[xyz]mm/mem,[xyz]mm,k{k} */
7415
6
        generate_exception_if(!evex.r || !evex.R || evex.z, X86_EXC_UD);
7416
2
        if ( !(b & 0x20) )
7417
1
            goto avx512f_imm8_no_sae;
7418
7
    avx512bw_imm:
7419
7
        host_and_vcpu_must_have(avx512bw);
7420
0
        generate_exception_if(evex.brs, X86_EXC_UD);
7421
0
        elem_bytes = 1 << evex.w;
7422
0
        avx512_vlen_check(false);
7423
0
        goto simd_imm8_zmm;
7424
7425
211
    case X86EMUL_OPC_66(0x0f3a, 0x20): /* pinsrb $imm8,r32/m8,xmm */
7426
405
    case X86EMUL_OPC_66(0x0f3a, 0x22): /* pinsr{d,q} $imm8,r/m,xmm */
7427
405
        host_and_vcpu_must_have(sse4_1);
7428
405
        memcpy(mmvalp, &src.val, src.bytes);
7429
405
        ea.type = OP_MEM;
7430
405
        d = SrcMem16; /* Fake for the common SIMD code below. */
7431
405
        state->simd_size = simd_other;
7432
405
        goto simd_0f3a_common;
7433
7434
506
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x20): /* vpinsrb $imm8,r32/m8,xmm,xmm */
7435
688
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x22): /* vpinsr{d,q} $imm8,r/m,xmm,xmm */
7436
688
        generate_exception_if(vex.l, X86_EXC_UD);
7437
686
        if ( !mode_64bit() )
7438
317
            vex.w = 0;
7439
686
        memcpy(mmvalp, &src.val, src.bytes);
7440
686
        ea.type = OP_MEM;
7441
686
        d = SrcMem16; /* Fake for the common SIMD code below. */
7442
686
        state->simd_size = simd_other;
7443
686
        goto simd_0f_int_imm8;
7444
7445
62
    case X86EMUL_OPC_66(0x0f3a, 0x21): /* insertps $imm8,xmm/m32,xmm */
7446
62
        host_and_vcpu_must_have(sse4_1);
7447
62
        op_bytes = 4;
7448
62
        goto simd_0f3a_common;
7449
7450
64
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x21): /* vinsertps $imm8,xmm/m32,xmm,xmm */
7451
64
        op_bytes = 4;
7452
        /* fall through */
7453
122
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x41): /* vdppd $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
7454
122
        generate_exception_if(vex.l, X86_EXC_UD);
7455
121
        goto simd_0f_imm8_avx;
7456
7457
121
    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x21): /* vinsertps $imm8,xmm/m32,xmm,xmm */
7458
1
        host_and_vcpu_must_have(avx512f);
7459
0
        generate_exception_if(evex.lr || evex.w || evex.opmsk || evex.brs,
7460
0
                              X86_EXC_UD);
7461
0
        op_bytes = 4;
7462
0
        goto simd_imm8_zmm;
7463
7464
1
    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x50): /* vrangep{s,d} $imm8,[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
7465
1
    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x56): /* vreducep{s,d} $imm8,[xyz]mm/mem,[xyz]mm{k} */
7466
1
        host_and_vcpu_must_have(avx512dq);
7467
        /* fall through */
7468
1
    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x26): /* vgetmantp{s,d} $imm8,[xyz]mm/mem,[xyz]mm{k} */
7469
2
    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x54): /* vfixupimmp{s,d} $imm8,[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
7470
2
        host_and_vcpu_must_have(avx512f);
7471
0
        if ( ea.type != OP_REG || !evex.brs )
7472
0
            avx512_vlen_check(false);
7473
0
        goto simd_imm8_zmm;
7474
7475
1
    case X86EMUL_OPC_EVEX(0x0f3a, 0x26): /* vgetmantph $imm8,[xyz]mm/mem,[xyz]mm{k} */
7476
2
    case X86EMUL_OPC_EVEX(0x0f3a, 0x56): /* vreduceph $imm8,[xyz]mm/mem,[xyz]mm{k} */
7477
2
        host_and_vcpu_must_have(avx512_fp16);
7478
0
        generate_exception_if(evex.w, X86_EXC_UD);
7479
0
        if ( ea.type != OP_REG || !evex.brs )
7480
0
            avx512_vlen_check(false);
7481
0
        goto simd_imm8_zmm;
7482
7483
1
    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x51): /* vranges{s,d} $imm8,xmm/mem,xmm,xmm{k} */
7484
2
    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x57): /* vreduces{s,d} $imm8,xmm/mem,xmm,xmm{k} */
7485
2
        host_and_vcpu_must_have(avx512dq);
7486
        /* fall through */
7487
1
    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x27): /* vgetmants{s,d} $imm8,xmm/mem,xmm,xmm{k} */
7488
2
    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x55): /* vfixupimms{s,d} $imm8,xmm/mem,xmm,xmm{k} */
7489
2
        host_and_vcpu_must_have(avx512f);
7490
0
        generate_exception_if(ea.type != OP_REG && evex.brs, X86_EXC_UD);
7491
0
        if ( !evex.brs )
7492
0
            avx512_vlen_check(true);
7493
0
        goto simd_imm8_zmm;
7494
7495
1
    case X86EMUL_OPC_EVEX(0x0f3a, 0x27): /* vgetmantsh $imm8,xmm/mem,xmm,xmm{k} */
7496
1
    case X86EMUL_OPC_EVEX(0x0f3a, 0x57): /* vreducesh $imm8,xmm/mem,xmm,xmm{k} */
7497
1
        host_and_vcpu_must_have(avx512_fp16);
7498
0
        generate_exception_if(evex.w, X86_EXC_UD);
7499
0
        if ( !evex.brs )
7500
0
            avx512_vlen_check(true);
7501
0
        else
7502
0
            generate_exception_if(ea.type != OP_REG, X86_EXC_UD);
7503
0
        goto simd_imm8_zmm;
7504
7505
7
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x30): /* kshiftr{b,w} $imm8,k,k */
7506
15
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x32): /* kshiftl{b,w} $imm8,k,k */
7507
15
        if ( !vex.w )
7508
15
            host_and_vcpu_must_have(avx512dq);
7509
9
    opmask_shift_imm:
7510
9
        generate_exception_if(vex.l || !vex.r || vex.reg != 0xf ||
7511
9
                              ea.type != OP_REG, X86_EXC_UD);
7512
1
        host_and_vcpu_must_have(avx512f);
7513
0
        get_fpu(X86EMUL_FPU_opmask);
7514
0
        op_bytes = 1; /* Any non-zero value will do. */
7515
0
        goto simd_0f_imm8;
7516
7517
1
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x31): /* kshiftr{d,q} $imm8,k,k */
7518
2
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x33): /* kshiftl{d,q} $imm8,k,k */
7519
2
        host_and_vcpu_must_have(avx512bw);
7520
0
        goto opmask_shift_imm;
7521
7522
67
    case X86EMUL_OPC_66(0x0f3a, 0x44):     /* pclmulqdq $imm8,xmm/m128,xmm */
7523
130
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x44): /* vpclmulqdq $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
7524
130
        host_and_vcpu_must_have(pclmulqdq);
7525
130
        if ( vex.opcx == vex_none )
7526
67
            goto simd_0f3a_common;
7527
63
        if ( vex.l )
7528
63
            host_and_vcpu_must_have(vpclmulqdq);
7529
62
        goto simd_0f_imm8_avx;
7530
7531
62
    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x44): /* vpclmulqdq $imm8,[xyz]mm/mem,[xyz]mm,[xyz]mm */
7532
1
        host_and_vcpu_must_have(vpclmulqdq);
7533
0
        generate_exception_if(evex.brs || evex.opmsk, X86_EXC_UD);
7534
0
        goto avx512f_imm8_no_sae;
7535
7536
1
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x48): /* vpermil2ps $imm,{x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
7537
                                           /* vpermil2ps $imm,{x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
7538
2
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x49): /* vpermil2pd $imm,{x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
7539
                                           /* vpermil2pd $imm,{x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
7540
2
        host_and_vcpu_must_have(xop);
7541
0
        goto simd_0f_imm8_ymm;
7542
7543
224
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x4a): /* vblendvps {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
7544
295
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x4b): /* vblendvpd {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
7545
295
        generate_exception_if(vex.w, X86_EXC_UD);
7546
288
        goto simd_0f_imm8_avx;
7547
7548
288
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x4c): /* vpblendvb {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
7549
205
        generate_exception_if(vex.w, X86_EXC_UD);
7550
199
        goto simd_0f_int_imm8;
7551
7552
199
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x5c): /* vfmaddsubps {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
7553
                                           /* vfmaddsubps {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
7554
2
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x5d): /* vfmaddsubpd {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
7555
                                           /* vfmaddsubpd {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
7556
3
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x5e): /* vfmsubaddps {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
7557
                                           /* vfmsubaddps {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
7558
4
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x5f): /* vfmsubaddpd {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
7559
                                           /* vfmsubaddpd {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
7560
5
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x68): /* vfmaddps {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
7561
                                           /* vfmaddps {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
7562
6
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x69): /* vfmaddpd {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
7563
                                           /* vfmaddpd {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
7564
7
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x6a): /* vfmaddss xmm,xmm/m32,xmm,xmm */
7565
                                           /* vfmaddss xmm/m32,xmm,xmm,xmm */
7566
8
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x6b): /* vfmaddsd xmm,xmm/m64,xmm,xmm */
7567
                                           /* vfmaddsd xmm/m64,xmm,xmm,xmm */
7568
9
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x6c): /* vfmsubps {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
7569
                                           /* vfmsubps {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
7570
10
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x6d): /* vfmsubpd {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
7571
                                           /* vfmsubpd {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
7572
11
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x6e): /* vfmsubss xmm,xmm/m32,xmm,xmm */
7573
                                           /* vfmsubss xmm/m32,xmm,xmm,xmm */
7574
12
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x6f): /* vfmsubsd xmm,xmm/m64,xmm,xmm */
7575
                                           /* vfmsubsd xmm/m64,xmm,xmm,xmm */
7576
13
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x78): /* vfnmaddps {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
7577
                                           /* vfnmaddps {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
7578
14
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x79): /* vfnmaddpd {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
7579
                                           /* vfnmaddpd {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
7580
15
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x7a): /* vfnmaddss xmm,xmm/m32,xmm,xmm */
7581
                                           /* vfnmaddss xmm/m32,xmm,xmm,xmm */
7582
16
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x7b): /* vfnmaddsd xmm,xmm/m64,xmm,xmm */
7583
                                           /* vfnmaddsd xmm/m64,xmm,xmm,xmm */
7584
17
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x7c): /* vfnmsubps {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
7585
                                           /* vfnmsubps {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
7586
18
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x7d): /* vfnmsubpd {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
7587
                                           /* vfnmsubpd {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
7588
19
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x7e): /* vfnmsubss xmm,xmm/m32,xmm,xmm */
7589
                                           /* vfnmsubss xmm/m32,xmm,xmm,xmm */
7590
20
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x7f): /* vfnmsubsd xmm,xmm/m64,xmm,xmm */
7591
                                           /* vfnmsubsd xmm/m64,xmm,xmm,xmm */
7592
20
        host_and_vcpu_must_have(fma4);
7593
0
        goto simd_0f_imm8_ymm;
7594
7595
118
    case X86EMUL_OPC_66(0x0f3a, 0x60):     /* pcmpestrm $imm8,xmm/m128,xmm */
7596
321
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x60): /* vpcmpestrm $imm8,xmm/m128,xmm */
7597
517
    case X86EMUL_OPC_66(0x0f3a, 0x61):     /* pcmpestri $imm8,xmm/m128,xmm */
7598
773
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x61): /* vpcmpestri $imm8,xmm/m128,xmm */
7599
839
    case X86EMUL_OPC_66(0x0f3a, 0x62):     /* pcmpistrm $imm8,xmm/m128,xmm */
7600
907
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x62): /* vpcmpistrm $imm8,xmm/m128,xmm */
7601
975
    case X86EMUL_OPC_66(0x0f3a, 0x63):     /* pcmpistri $imm8,xmm/m128,xmm */
7602
1.04k
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x63): /* vpcmpistri $imm8,xmm/m128,xmm */
7603
1.04k
        if ( vex.opcx == vex_none )
7604
448
        {
7605
448
            host_and_vcpu_must_have(sse4_2);
7606
448
            get_fpu(X86EMUL_FPU_xmm);
7607
448
        }
7608
593
        else
7609
593
        {
7610
593
            generate_exception_if(vex.l || vex.reg != 0xf, X86_EXC_UD);
7611
585
            host_and_vcpu_must_have(avx);
7612
585
            get_fpu(X86EMUL_FPU_ymm);
7613
585
        }
7614
7615
1.03k
        opc = init_prefixes(stub);
7616
1.03k
        if ( vex.opcx == vex_none )
7617
447
            opc++[0] = 0x3a;
7618
1.03k
        opc[0] = b;
7619
1.03k
        opc[1] = modrm;
7620
1.03k
        if ( ea.type == OP_MEM )
7621
508
        {
7622
            /* Convert memory operand to (%rDI). */
7623
508
            rex_prefix &= ~REX_B;
7624
508
            vex.b = 1;
7625
508
            opc[1] &= 0x3f;
7626
508
            opc[1] |= 0x07;
7627
7628
508
            rc = ops->read(ea.mem.seg, ea.mem.off, mmvalp, 16, ctxt);
7629
508
            if ( rc != X86EMUL_OKAY )
7630
26
                goto done;
7631
508
        }
7632
1.00k
        opc[2] = imm1;
7633
1.00k
        insn_bytes = PFX_BYTES + 3;
7634
1.00k
        opc[3] = 0xc3;
7635
1.00k
        if ( vex.opcx == vex_none )
7636
429
        {
7637
            /* Cover for extra prefix byte. */
7638
429
            --opc;
7639
429
            ++insn_bytes;
7640
429
        }
7641
7642
1.00k
        copy_REX_VEX(opc, rex_prefix, vex);
7643
1.00k
#ifdef __x86_64__
7644
1.00k
        if ( rex_prefix & REX_W )
7645
238
            emulate_stub("=c" (dst.val), "m" (*mmvalp), "D" (mmvalp),
7646
1.00k
                         "a" (_regs.rax), "d" (_regs.rdx));
7647
767
        else
7648
767
#endif
7649
767
            emulate_stub("=c" (dst.val), "m" (*mmvalp), "D" (mmvalp),
7650
1.00k
                         "a" (_regs.eax), "d" (_regs.edx));
7651
7652
1.00k
        state->simd_size = simd_none;
7653
1.00k
        if ( b & 1 )
7654
573
            _regs.r(cx) = (uint32_t)dst.val;
7655
1.00k
        dst.type = OP_NONE;
7656
1.00k
        break;
7657
7658
1
    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x66): /* vfpclassp{s,d} $imm8,[xyz]mm/mem,k{k} */
7659
2
    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x67): /* vfpclasss{s,d} $imm8,xmm/mem,k{k} */
7660
2
        host_and_vcpu_must_have(avx512dq);
7661
0
        generate_exception_if(!evex.r || !evex.R || evex.z, X86_EXC_UD);
7662
0
        if ( !(b & 1) )
7663
0
            goto avx512f_imm8_no_sae;
7664
0
        generate_exception_if(evex.brs, X86_EXC_UD);
7665
0
        avx512_vlen_check(true);
7666
0
        goto simd_imm8_zmm;
7667
7668
1
    case X86EMUL_OPC_EVEX(0x0f3a, 0x66): /* vfpclassph $imm8,[xyz]mm/mem,k{k} */
7669
2
    case X86EMUL_OPC_EVEX(0x0f3a, 0x67): /* vfpclasssh $imm8,xmm/mem,k{k} */
7670
2
        host_and_vcpu_must_have(avx512_fp16);
7671
0
        generate_exception_if(evex.w || !evex.r || !evex.R || evex.z, X86_EXC_UD);
7672
0
        if ( !(b & 1) )
7673
0
            goto avx512f_imm8_no_sae;
7674
0
        generate_exception_if(evex.brs, X86_EXC_UD);
7675
0
        avx512_vlen_check(true);
7676
0
        goto simd_imm8_zmm;
7677
7678
1
    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x70): /* vpshldw $imm8,[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
7679
2
    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x72): /* vpshrdw $imm8,[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
7680
2
        generate_exception_if(!evex.w, X86_EXC_UD);
7681
1
        elem_bytes = 2;
7682
        /* fall through */
7683
2
    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x71): /* vpshld{d,q} $imm8,[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
7684
3
    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x73): /* vpshrd{d,q} $imm8,[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
7685
3
        host_and_vcpu_must_have(avx512_vbmi2);
7686
0
        goto avx512f_imm8_no_sae;
7687
7688
3
    case X86EMUL_OPC_EVEX_F3(0x0f3a, 0xc2): /* vcmpsh $imm8,xmm/mem,xmm,k{k} */
7689
3
        generate_exception_if(ea.type != OP_REG && evex.brs, X86_EXC_UD);
7690
        /* fall through */
7691
3
    case X86EMUL_OPC_EVEX(0x0f3a, 0xc2): /* vcmpph $imm8,[xyz]mm/mem,[xyz]mm,k{k} */
7692
3
        host_and_vcpu_must_have(avx512_fp16);
7693
0
        generate_exception_if(evex.w || !evex.r || !evex.R || evex.z, X86_EXC_UD);
7694
0
        if ( ea.type != OP_REG || !evex.brs )
7695
0
            avx512_vlen_check(evex.pfx & VEX_PREFIX_SCALAR_MASK);
7696
0
        goto simd_imm8_zmm;
7697
7698
1
    case X86EMUL_OPC(0x0f3a, 0xcc):     /* sha1rnds4 $imm8,xmm/m128,xmm */
7699
1
        host_and_vcpu_must_have(sha);
7700
1
        op_bytes = 16;
7701
1
        goto simd_0f3a_common;
7702
7703
1
    case X86EMUL_OPC_66(0x0f3a, 0xce):      /* gf2p8affineqb $imm8,xmm/m128,xmm */
7704
2
    case X86EMUL_OPC_66(0x0f3a, 0xcf):      /* gf2p8affineinvqb $imm8,xmm/m128,xmm */
7705
2
        host_and_vcpu_must_have(gfni);
7706
0
        goto simd_0f3a_common;
7707
7708
1
    case X86EMUL_OPC_VEX_66(0x0f3a, 0xce):  /* vgf2p8affineqb $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
7709
2
    case X86EMUL_OPC_VEX_66(0x0f3a, 0xcf):  /* vgf2p8affineinvqb $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
7710
2
        host_and_vcpu_must_have(gfni);
7711
0
        generate_exception_if(!vex.w, X86_EXC_UD);
7712
0
        goto simd_0f_imm8_avx;
7713
7714
1
    case X86EMUL_OPC_EVEX_66(0x0f3a, 0xce): /* vgf2p8affineqb $imm8,[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
7715
2
    case X86EMUL_OPC_EVEX_66(0x0f3a, 0xcf): /* vgf2p8affineinvqb $imm8,[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
7716
2
        host_and_vcpu_must_have(gfni);
7717
0
        generate_exception_if(!evex.w, X86_EXC_UD);
7718
0
        fault_suppression = false;
7719
0
        goto avx512f_imm8_no_sae;
7720
7721
1
    case X86EMUL_OPC_VEX_66(0x0f3a, 0xde): /* vsm3rnds2 $imm8,xmm/mem,xmm,xmm */
7722
1
        host_and_vcpu_must_have(sm3);
7723
0
        generate_exception_if(vex.w || vex.l, X86_EXC_UD);
7724
0
        op_bytes = 16;
7725
0
        goto simd_0f_imm8_ymm;
7726
7727
66
    case X86EMUL_OPC_66(0x0f3a, 0xdf):     /* aeskeygenassist $imm8,xmm/m128,xmm */
7728
112
    case X86EMUL_OPC_VEX_66(0x0f3a, 0xdf): /* vaeskeygenassist $imm8,xmm/m128,xmm */
7729
112
        host_and_vcpu_must_have(aesni);
7730
112
        if ( vex.opcx == vex_none )
7731
66
            goto simd_0f3a_common;
7732
46
        generate_exception_if(vex.l, X86_EXC_UD);
7733
46
        goto simd_0f_imm8_avx;
7734
7735
46
#endif /* X86EMUL_NO_SIMD */
7736
7737
301
    case X86EMUL_OPC_VEX_F2(0x0f3a, 0xf0): /* rorx imm,r/m,r */
7738
301
        vcpu_must_have(bmi2);
7739
301
        generate_exception_if(vex.l || vex.reg != 0xf, X86_EXC_UD);
7740
296
        if ( mode_64bit() && vex.w )
7741
66
            asm ( "rorq %b1,%0" : "=g" (dst.val) : "c" (imm1), "0" (src.val) );
7742
230
        else
7743
230
            asm ( "rorl %b1,%k0" : "=g" (dst.val) : "c" (imm1), "0" (src.val) );
7744
296
        break;
7745
7746
0
#ifndef X86EMUL_NO_SIMD
7747
7748
1
    case X86EMUL_OPC_EVEX_F3(5, 0x10):   /* vmovsh m16,xmm{k} */
7749
                                         /* vmovsh xmm,xmm,xmm{k} */
7750
2
    case X86EMUL_OPC_EVEX_F3(5, 0x11):   /* vmovsh xmm,m16{k} */
7751
                                         /* vmovsh xmm,xmm,xmm{k} */
7752
2
        generate_exception_if(evex.brs, X86_EXC_UD);
7753
1
        if ( ea.type == OP_MEM )
7754
1
            d |= TwoOp;
7755
0
        else
7756
0
        {
7757
1
    case X86EMUL_OPC_EVEX_F3(5, 0x51):   /* vsqrtsh xmm/m16,xmm,xmm{k} */
7758
1
            d &= ~TwoOp;
7759
1
        }
7760
        /* fall through */
7761
3
    case X86EMUL_OPC_EVEX(5, 0x51):      /* vsqrtph [xyz]mm/mem,[xyz]mm{k} */
7762
9
    CASE_SIMD_SINGLE_FP(_EVEX, 5, 0x58): /* vadd{p,s}h [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
7763
13
    CASE_SIMD_SINGLE_FP(_EVEX, 5, 0x59): /* vmul{p,s}h [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
7764
17
    CASE_SIMD_SINGLE_FP(_EVEX, 5, 0x5c): /* vsub{p,s}h [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
7765
21
    CASE_SIMD_SINGLE_FP(_EVEX, 5, 0x5d): /* vmin{p,s}h [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
7766
25
    CASE_SIMD_SINGLE_FP(_EVEX, 5, 0x5e): /* vdiv{p,s}h [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
7767
29
    CASE_SIMD_SINGLE_FP(_EVEX, 5, 0x5f): /* vmax{p,s}h [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
7768
29
        host_and_vcpu_must_have(avx512_fp16);
7769
0
        generate_exception_if(evex.w, X86_EXC_UD);
7770
0
        goto avx512f_all_fp;
7771
7772
14
    CASE_SIMD_ALL_FP(_EVEX, 5, 0x5a):  /* vcvtp{h,d}2p{h,d} [xyz]mm/mem,[xyz]mm{k} */
7773
                                       /* vcvts{h,d}2s{h,d} xmm/mem,xmm,xmm{k} */
7774
14
        host_and_vcpu_must_have(avx512_fp16);
7775
0
        if ( vex.pfx & VEX_PREFIX_SCALAR_MASK )
7776
0
            d &= ~TwoOp;
7777
0
        op_bytes = 2 << (((evex.pfx & VEX_PREFIX_SCALAR_MASK) ? 0 : 1 + evex.lr) +
7778
0
                         2 * evex.w);
7779
0
        goto avx512f_all_fp;
7780
7781
1
    case X86EMUL_OPC_EVEX   (5, 0x5b): /* vcvtdq2ph [xyz]mm/mem,[xy]mm{k} */
7782
                                       /* vcvtqq2ph [xyz]mm/mem,xmm{k} */
7783
2
    case X86EMUL_OPC_EVEX_F2(5, 0x7a): /* vcvtudq2ph [xyz]mm/mem,[xy]mm{k} */
7784
                                       /* vcvtuqq2ph [xyz]mm/mem,xmm{k} */
7785
2
        host_and_vcpu_must_have(avx512_fp16);
7786
0
        if ( ea.type != OP_REG || !evex.brs )
7787
0
            avx512_vlen_check(false);
7788
0
        op_bytes = 16 << evex.lr;
7789
0
        goto simd_zmm;
7790
7791
1
    case X86EMUL_OPC_EVEX_66(5, 0x5b): /* vcvtph2dq [xy]mm/mem,[xyz]mm{k} */
7792
2
    case X86EMUL_OPC_EVEX_F3(5, 0x5b): /* vcvttph2dq [xy]mm/mem,[xyz]mm{k} */
7793
3
    case X86EMUL_OPC_EVEX   (5, 0x78): /* vcvttph2udq [xy]mm/mem,[xyz]mm{k} */
7794
5
    case X86EMUL_OPC_EVEX   (5, 0x79): /* vcvtph2udq [xy]mm/mem,[xyz]mm{k} */
7795
5
        host_and_vcpu_must_have(avx512_fp16);
7796
0
        generate_exception_if(evex.w, X86_EXC_UD);
7797
0
        if ( ea.type != OP_REG || !evex.brs )
7798
0
            avx512_vlen_check(false);
7799
0
        op_bytes = 8 << evex.lr;
7800
0
        goto simd_zmm;
7801
7802
1
    case X86EMUL_OPC_EVEX_66(5, 0x78): /* vcvttph2uqq xmm/mem,[xyz]mm{k} */
7803
2
    case X86EMUL_OPC_EVEX_66(5, 0x79): /* vcvtph2uqq xmm/mem,[xyz]mm{k} */
7804
3
    case X86EMUL_OPC_EVEX_66(5, 0x7a): /* vcvttph2qq xmm/mem,[xyz]mm{k} */
7805
5
    case X86EMUL_OPC_EVEX_66(5, 0x7b): /* vcvtph2qq xmm/mem,[xyz]mm{k} */
7806
5
        host_and_vcpu_must_have(avx512_fp16);
7807
0
        generate_exception_if(evex.w, X86_EXC_UD);
7808
0
        if ( ea.type != OP_REG || !evex.brs )
7809
0
            avx512_vlen_check(false);
7810
0
        op_bytes = 4 << (evex.w + evex.lr);
7811
0
        goto simd_zmm;
7812
7813
1
    case X86EMUL_OPC_EVEX   (5, 0x7c): /* vcvttph2uw [xyz]mm/mem,[xyz]mm{k} */
7814
2
    case X86EMUL_OPC_EVEX_66(5, 0x7c): /* vcvttph2w [xyz]mm/mem,[xyz]mm{k} */
7815
3
    case X86EMUL_OPC_EVEX   (5, 0x7d): /* vcvtph2uw [xyz]mm/mem,[xyz]mm{k} */
7816
4
    case X86EMUL_OPC_EVEX_66(5, 0x7d): /* vcvtph2w [xyz]mm/mem,[xyz]mm{k} */
7817
5
    case X86EMUL_OPC_EVEX_F3(5, 0x7d): /* vcvtw2ph [xyz]mm/mem,[xyz]mm{k} */
7818
6
    case X86EMUL_OPC_EVEX_F2(5, 0x7d): /* vcvtuw2ph [xyz]mm/mem,[xyz]mm{k} */
7819
7
    case X86EMUL_OPC_EVEX_66(6, 0x13): /* vcvtph2psx [xy]mm/mem,[xyz]mm{k} */
7820
7
        op_bytes = 8 << ((ext == ext_map5) + evex.lr);
7821
        /* fall through */
7822
8
    case X86EMUL_OPC_EVEX_66(5, 0x1d): /* vcvtps2phx [xyz]mm/mem,[xy]mm{k} */
7823
9
    case X86EMUL_OPC_EVEX_66(6, 0x2c): /* vscalefph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
7824
10
    case X86EMUL_OPC_EVEX_66(6, 0x42): /* vgetexpph [xyz]mm/mem,[xyz]mm{k} */
7825
11
    case X86EMUL_OPC_EVEX_66(6, 0x96): /* vfmaddsub132ph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
7826
12
    case X86EMUL_OPC_EVEX_66(6, 0x97): /* vfmsubadd132ph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
7827
13
    case X86EMUL_OPC_EVEX_66(6, 0x98): /* vfmadd132ph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
7828
14
    case X86EMUL_OPC_EVEX_66(6, 0x9a): /* vfmsub132ph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
7829
15
    case X86EMUL_OPC_EVEX_66(6, 0x9c): /* vfnmadd132ph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
7830
16
    case X86EMUL_OPC_EVEX_66(6, 0x9e): /* vfnmsub132ph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
7831
17
    case X86EMUL_OPC_EVEX_66(6, 0xa6): /* vfmaddsub213ph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
7832
18
    case X86EMUL_OPC_EVEX_66(6, 0xa7): /* vfmsubadd213ph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
7833
19
    case X86EMUL_OPC_EVEX_66(6, 0xa8): /* vfmadd213ph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
7834
20
    case X86EMUL_OPC_EVEX_66(6, 0xaa): /* vfmsub213ph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
7835
21
    case X86EMUL_OPC_EVEX_66(6, 0xac): /* vfnmadd213ph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
7836
22
    case X86EMUL_OPC_EVEX_66(6, 0xae): /* vfnmsub213ph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
7837
23
    case X86EMUL_OPC_EVEX_66(6, 0xb6): /* vfmaddsub231ph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
7838
24
    case X86EMUL_OPC_EVEX_66(6, 0xb7): /* vfmsubadd231ph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
7839
25
    case X86EMUL_OPC_EVEX_66(6, 0xb8): /* vfmadd231ph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
7840
26
    case X86EMUL_OPC_EVEX_66(6, 0xba): /* vfmsub231ph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
7841
27
    case X86EMUL_OPC_EVEX_66(6, 0xbc): /* vfnmadd231ph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
7842
28
    case X86EMUL_OPC_EVEX_66(6, 0xbe): /* vfnmsub231ph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
7843
28
        host_and_vcpu_must_have(avx512_fp16);
7844
0
        generate_exception_if(evex.w, X86_EXC_UD);
7845
0
        if ( ea.type != OP_REG || !evex.brs )
7846
0
            avx512_vlen_check(false);
7847
0
        goto simd_zmm;
7848
7849
1
    case X86EMUL_OPC_EVEX(5, 0x1d):    /* vcvtss2sh xmm/mem,xmm,xmm{k} */
7850
2
    case X86EMUL_OPC_EVEX(6, 0x13):    /* vcvtsh2ss xmm/mem,xmm,xmm{k} */
7851
3
    case X86EMUL_OPC_EVEX_66(6, 0x2d): /* vscalefsh xmm/m16,xmm,xmm{k} */
7852
4
    case X86EMUL_OPC_EVEX_66(6, 0x43): /* vgetexpsh xmm/m16,xmm,xmm{k} */
7853
5
    case X86EMUL_OPC_EVEX_66(6, 0x99): /* vfmadd132sh xmm/m16,xmm,xmm{k} */
7854
6
    case X86EMUL_OPC_EVEX_66(6, 0x9b): /* vfmsub132sh xmm/m16,xmm,xmm{k} */
7855
7
    case X86EMUL_OPC_EVEX_66(6, 0x9d): /* vfnmadd132sh xmm/m16,xmm,xmm{k} */
7856
8
    case X86EMUL_OPC_EVEX_66(6, 0x9f): /* vfnmsub132sh xmm/m16,xmm,xmm{k} */
7857
9
    case X86EMUL_OPC_EVEX_66(6, 0xa9): /* vfmadd213sh xmm/m16,xmm,xmm{k} */
7858
10
    case X86EMUL_OPC_EVEX_66(6, 0xab): /* vfmsub213sh xmm/m16,xmm,xmm{k} */
7859
11
    case X86EMUL_OPC_EVEX_66(6, 0xad): /* vfnmadd213sh xmm/m16,xmm,xmm{k} */
7860
12
    case X86EMUL_OPC_EVEX_66(6, 0xaf): /* vfnmsub213sh xmm/m16,xmm,xmm{k} */
7861
13
    case X86EMUL_OPC_EVEX_66(6, 0xb9): /* vfmadd231sh xmm/m16,xmm,xmm{k} */
7862
14
    case X86EMUL_OPC_EVEX_66(6, 0xbb): /* vfmsub231sh xmm/m16,xmm,xmm{k} */
7863
15
    case X86EMUL_OPC_EVEX_66(6, 0xbd): /* vfnmadd231sh xmm/m16,xmm,xmm{k} */
7864
16
    case X86EMUL_OPC_EVEX_66(6, 0xbf): /* vfnmsub231sh xmm/m16,xmm,xmm{k} */
7865
16
        host_and_vcpu_must_have(avx512_fp16);
7866
0
        generate_exception_if(evex.w || (ea.type != OP_REG && evex.brs),
7867
0
                              X86_EXC_UD);
7868
0
        if ( !evex.brs )
7869
0
            avx512_vlen_check(true);
7870
0
        goto simd_zmm;
7871
7872
1
    case X86EMUL_OPC_EVEX_66(6, 0x4c): /* vrcpph [xyz]mm/mem,[xyz]mm{k} */
7873
2
    case X86EMUL_OPC_EVEX_66(6, 0x4e): /* vrsqrtph [xyz]mm/mem,[xyz]mm{k} */
7874
2
        host_and_vcpu_must_have(avx512_fp16);
7875
0
        generate_exception_if(evex.w, X86_EXC_UD);
7876
0
        goto avx512f_no_sae;
7877
7878
1
    case X86EMUL_OPC_EVEX_66(6, 0x4d): /* vrcpsh xmm/m16,xmm,xmm{k} */
7879
2
    case X86EMUL_OPC_EVEX_66(6, 0x4f): /* vrsqrtsh xmm/m16,xmm,xmm{k} */
7880
2
        host_and_vcpu_must_have(avx512_fp16);
7881
0
        generate_exception_if(evex.w || evex.brs, X86_EXC_UD);
7882
0
        avx512_vlen_check(true);
7883
0
        goto simd_zmm;
7884
7885
1
    case X86EMUL_OPC_EVEX_F3(6, 0x56): /* vfmaddcph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
7886
2
    case X86EMUL_OPC_EVEX_F2(6, 0x56): /* vfcmaddcph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
7887
3
    case X86EMUL_OPC_EVEX_F3(6, 0xd6): /* vfmulcph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
7888
4
    case X86EMUL_OPC_EVEX_F2(6, 0xd6): /* vfcmulcph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
7889
4
        op_bytes = 16 << evex.lr;
7890
        /* fall through */
7891
5
    case X86EMUL_OPC_EVEX_F3(6, 0x57): /* vfmaddcsh xmm/m16,xmm,xmm{k} */
7892
6
    case X86EMUL_OPC_EVEX_F2(6, 0x57): /* vfcmaddcsh xmm/m16,xmm,xmm{k} */
7893
7
    case X86EMUL_OPC_EVEX_F3(6, 0xd7): /* vfmulcsh xmm/m16,xmm,xmm{k} */
7894
8
    case X86EMUL_OPC_EVEX_F2(6, 0xd7): /* vfcmulcsh xmm/m16,xmm,xmm{k} */
7895
8
    {
7896
8
        unsigned int src1 = ~evex.reg;
7897
7898
8
        host_and_vcpu_must_have(avx512_fp16);
7899
0
        generate_exception_if(evex.w || ((b & 1) && ea.type != OP_REG && evex.brs),
7900
0
                              X86_EXC_UD);
7901
0
        if ( mode_64bit() )
7902
0
            src1 = (src1 & 0xf) | (!evex.RX << 4);
7903
0
        else
7904
0
            src1 &= 7;
7905
0
        generate_exception_if(modrm_reg == src1 ||
7906
0
                              (ea.type != OP_MEM && modrm_reg == modrm_rm),
7907
0
                              X86_EXC_UD);
7908
0
        if ( ea.type != OP_REG || !evex.brs )
7909
0
            avx512_vlen_check(b & 1);
7910
0
        goto simd_zmm;
7911
0
    }
7912
7913
0
    case X86EMUL_OPC_XOP(08, 0x85): /* vpmacssww xmm,xmm/m128,xmm,xmm */
7914
1
    case X86EMUL_OPC_XOP(08, 0x86): /* vpmacsswd xmm,xmm/m128,xmm,xmm */
7915
3
    case X86EMUL_OPC_XOP(08, 0x87): /* vpmacssdql xmm,xmm/m128,xmm,xmm */
7916
3
    case X86EMUL_OPC_XOP(08, 0x8e): /* vpmacssdd xmm,xmm/m128,xmm,xmm */
7917
4
    case X86EMUL_OPC_XOP(08, 0x8f): /* vpmacssdqh xmm,xmm/m128,xmm,xmm */
7918
5
    case X86EMUL_OPC_XOP(08, 0x95): /* vpmacsww xmm,xmm/m128,xmm,xmm */
7919
6
    case X86EMUL_OPC_XOP(08, 0x96): /* vpmacswd xmm,xmm/m128,xmm,xmm */
7920
7
    case X86EMUL_OPC_XOP(08, 0x97): /* vpmacsdql xmm,xmm/m128,xmm,xmm */
7921
8
    case X86EMUL_OPC_XOP(08, 0x9e): /* vpmacsdd xmm,xmm/m128,xmm,xmm */
7922
9
    case X86EMUL_OPC_XOP(08, 0x9f): /* vpmacsdqh xmm,xmm/m128,xmm,xmm */
7923
10
    case X86EMUL_OPC_XOP(08, 0xa6): /* vpmadcsswd xmm,xmm/m128,xmm,xmm */
7924
11
    case X86EMUL_OPC_XOP(08, 0xb6): /* vpmadcswd xmm,xmm/m128,xmm,xmm */
7925
12
    case X86EMUL_OPC_XOP(08, 0xc0): /* vprotb $imm,xmm/m128,xmm */
7926
13
    case X86EMUL_OPC_XOP(08, 0xc1): /* vprotw $imm,xmm/m128,xmm */
7927
14
    case X86EMUL_OPC_XOP(08, 0xc2): /* vprotd $imm,xmm/m128,xmm */
7928
15
    case X86EMUL_OPC_XOP(08, 0xc3): /* vprotq $imm,xmm/m128,xmm */
7929
17
    case X86EMUL_OPC_XOP(08, 0xcc): /* vpcomb $imm,xmm/m128,xmm,xmm */
7930
18
    case X86EMUL_OPC_XOP(08, 0xcd): /* vpcomw $imm,xmm/m128,xmm,xmm */
7931
19
    case X86EMUL_OPC_XOP(08, 0xce): /* vpcomd $imm,xmm/m128,xmm,xmm */
7932
21
    case X86EMUL_OPC_XOP(08, 0xcf): /* vpcomq $imm,xmm/m128,xmm,xmm */
7933
22
    case X86EMUL_OPC_XOP(08, 0xec): /* vpcomub $imm,xmm/m128,xmm,xmm */
7934
23
    case X86EMUL_OPC_XOP(08, 0xed): /* vpcomuw $imm,xmm/m128,xmm,xmm */
7935
24
    case X86EMUL_OPC_XOP(08, 0xee): /* vpcomud $imm,xmm/m128,xmm,xmm */
7936
25
    case X86EMUL_OPC_XOP(08, 0xef): /* vpcomuq $imm,xmm/m128,xmm,xmm */
7937
25
        generate_exception_if(vex.w, X86_EXC_UD);
7938
        /* fall through */
7939
20
    case X86EMUL_OPC_XOP(08, 0xa3): /* vpperm xmm/m128,xmm,xmm,xmm */
7940
                                    /* vpperm xmm,xmm/m128,xmm,xmm */
7941
20
        generate_exception_if(vex.l, X86_EXC_UD);
7942
        /* fall through */
7943
11
    case X86EMUL_OPC_XOP(08, 0xa2): /* vpcmov {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
7944
                                    /* vpcmov {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
7945
11
        host_and_vcpu_must_have(xop);
7946
0
        goto simd_0f_imm8_ymm;
7947
7948
0
#endif /* X86EMUL_NO_SIMD */
7949
7950
2
    case X86EMUL_OPC_XOP(09, 0x01): /* XOP Grp1 */
7951
2
        switch ( modrm_reg & 7 )
7952
2
        {
7953
0
        case 1: /* blcfill r/m,r */
7954
1
        case 2: /* blsfill r/m,r */
7955
1
        case 3: /* blcs r/m,r */
7956
1
        case 4: /* tzmsk r/m,r */
7957
1
        case 5: /* blcic r/m,r */
7958
1
        case 6: /* blsic r/m,r */
7959
1
        case 7: /* t1mskc r/m,r */
7960
1
            host_and_vcpu_must_have(tbm);
7961
0
            break;
7962
1
        default:
7963
1
            goto unrecognized_insn;
7964
2
        }
7965
7966
0
    xop_09_rm_rv:
7967
0
    {
7968
0
        uint8_t *buf = get_stub(stub);
7969
0
        typeof(vex) *pxop = container_of(buf + 1, typeof(vex), raw[0]);
7970
7971
0
        generate_exception_if(vex.l, X86_EXC_UD);
7972
7973
0
        buf[0] = 0x8f;
7974
0
        *pxop = vex;
7975
0
        pxop->b = 1;
7976
0
        pxop->r = 1;
7977
0
        pxop->reg = 0xf; /* rAX */
7978
0
        buf[3] = b;
7979
0
        buf[4] = (modrm & 0x38) | 0x01; /* r/m=(%rCX) */
7980
0
        buf[5] = 0xc3;
7981
7982
0
        dst.reg = decode_vex_gpr(vex.reg, &_regs, ctxt);
7983
0
        emulate_stub([dst] "=&a" (dst.val), "c" (&src.val));
7984
7985
0
        put_stub(stub);
7986
0
        break;
7987
0
    }
7988
7989
3
    case X86EMUL_OPC_XOP(09, 0x02): /* XOP Grp2 */
7990
3
        switch ( modrm_reg & 7 )
7991
3
        {
7992
1
        case 1: /* blcmsk r/m,r */
7993
2
        case 6: /* blci r/m,r */
7994
2
            host_and_vcpu_must_have(tbm);
7995
0
            goto xop_09_rm_rv;
7996
3
        }
7997
1
        goto unrecognized_insn;
7998
7999
2
    case X86EMUL_OPC_XOP(09, 0x12): /* XOP Grp3 */
8000
2
        switch ( modrm_reg & 7 )
8001
2
        {
8002
1
        case 0: /* llwpcb r */
8003
1
        case 1: /* slwpcb r */
8004
            /* LWP is unsupported, so produce #UD unconditionally. */
8005
1
            generate_exception(X86_EXC_UD);
8006
2
        }
8007
1
        goto unrecognized_insn;
8008
8009
1
#ifndef X86EMUL_NO_SIMD
8010
8011
1
    case X86EMUL_OPC_XOP(09, 0x82): /* vfrczss xmm/m128,xmm */
8012
2
    case X86EMUL_OPC_XOP(09, 0x83): /* vfrczsd xmm/m128,xmm */
8013
2
        generate_exception_if(vex.l, X86_EXC_UD);
8014
        /* fall through */
8015
2
    case X86EMUL_OPC_XOP(09, 0x80): /* vfrczps {x,y}mm/mem,{x,y}mm */
8016
3
    case X86EMUL_OPC_XOP(09, 0x81): /* vfrczpd {x,y}mm/mem,{x,y}mm */
8017
3
        host_and_vcpu_must_have(xop);
8018
0
        generate_exception_if(vex.w, X86_EXC_UD);
8019
0
        goto simd_0f_ymm;
8020
8021
1
    case X86EMUL_OPC_XOP(09, 0xc1): /* vphaddbw xmm/m128,xmm */
8022
2
    case X86EMUL_OPC_XOP(09, 0xc2): /* vphaddbd xmm/m128,xmm */
8023
3
    case X86EMUL_OPC_XOP(09, 0xc3): /* vphaddbq xmm/m128,xmm */
8024
4
    case X86EMUL_OPC_XOP(09, 0xc6): /* vphaddwd xmm/m128,xmm */
8025
5
    case X86EMUL_OPC_XOP(09, 0xc7): /* vphaddwq xmm/m128,xmm */
8026
6
    case X86EMUL_OPC_XOP(09, 0xcb): /* vphadddq xmm/m128,xmm */
8027
7
    case X86EMUL_OPC_XOP(09, 0xd1): /* vphaddubw xmm/m128,xmm */
8028
8
    case X86EMUL_OPC_XOP(09, 0xd2): /* vphaddubd xmm/m128,xmm */
8029
9
    case X86EMUL_OPC_XOP(09, 0xd3): /* vphaddubq xmm/m128,xmm */
8030
10
    case X86EMUL_OPC_XOP(09, 0xd6): /* vphadduwd xmm/m128,xmm */
8031
11
    case X86EMUL_OPC_XOP(09, 0xd7): /* vphadduwq xmm/m128,xmm */
8032
12
    case X86EMUL_OPC_XOP(09, 0xdb): /* vphaddudq xmm/m128,xmm */
8033
13
    case X86EMUL_OPC_XOP(09, 0xe2): /* vphsubwd xmm/m128,xmm */
8034
14
    case X86EMUL_OPC_XOP(09, 0xe3): /* vphsubdq xmm/m128,xmm */
8035
15
    case X86EMUL_OPC_XOP(09, 0xe1): /* vphsubbw xmm/m128,xmm */
8036
15
        generate_exception_if(vex.w, X86_EXC_UD);
8037
        /* fall through */
8038
13
    case X86EMUL_OPC_XOP(09, 0x90): /* vprotb xmm/m128,xmm,xmm */
8039
                                    /* vprotb xmm,xmm/m128,xmm */
8040
14
    case X86EMUL_OPC_XOP(09, 0x91): /* vprotw xmm/m128,xmm,xmm */
8041
                                    /* vprotw xmm,xmm/m128,xmm */
8042
15
    case X86EMUL_OPC_XOP(09, 0x92): /* vprotd xmm/m128,xmm,xmm */
8043
                                    /* vprotd xmm,xmm/m128,xmm */
8044
16
    case X86EMUL_OPC_XOP(09, 0x93): /* vprotq xmm/m128,xmm,xmm */
8045
                                    /* vprotq xmm,xmm/m128,xmm */
8046
17
    case X86EMUL_OPC_XOP(09, 0x94): /* vpshlb xmm/m128,xmm,xmm */
8047
                                    /* vpshlb xmm,xmm/m128,xmm */
8048
18
    case X86EMUL_OPC_XOP(09, 0x95): /* vpshlw xmm/m128,xmm,xmm */
8049
                                    /* vpshlw xmm,xmm/m128,xmm */
8050
19
    case X86EMUL_OPC_XOP(09, 0x96): /* vpshld xmm/m128,xmm,xmm */
8051
                                    /* vpshld xmm,xmm/m128,xmm */
8052
20
    case X86EMUL_OPC_XOP(09, 0x97): /* vpshlq xmm/m128,xmm,xmm */
8053
                                    /* vpshlq xmm,xmm/m128,xmm */
8054
21
    case X86EMUL_OPC_XOP(09, 0x98): /* vpshab xmm/m128,xmm,xmm */
8055
                                    /* vpshab xmm,xmm/m128,xmm */
8056
22
    case X86EMUL_OPC_XOP(09, 0x99): /* vpshaw xmm/m128,xmm,xmm */
8057
                                    /* vpshaw xmm,xmm/m128,xmm */
8058
23
    case X86EMUL_OPC_XOP(09, 0x9a): /* vpshad xmm/m128,xmm,xmm */
8059
                                    /* vpshad xmm,xmm/m128,xmm */
8060
24
    case X86EMUL_OPC_XOP(09, 0x9b): /* vpshaq xmm/m128,xmm,xmm */
8061
                                    /* vpshaq xmm,xmm/m128,xmm */
8062
24
        generate_exception_if(vex.l, X86_EXC_UD);
8063
14
        host_and_vcpu_must_have(xop);
8064
0
        goto simd_0f_ymm;
8065
8066
0
#endif /* X86EMUL_NO_SIMD */
8067
8068
1
    case X86EMUL_OPC_XOP(0a, 0x10): /* bextr imm,r/m,r */
8069
1
    {
8070
1
        uint8_t *buf = get_stub(stub);
8071
1
        typeof(vex) *pxop = container_of(buf + 1, typeof(vex), raw[0]);
8072
8073
1
        host_and_vcpu_must_have(tbm);
8074
0
        generate_exception_if(vex.l || vex.reg != 0xf, X86_EXC_UD);
8075
8076
0
        if ( ea.type == OP_REG )
8077
0
            src.val = *ea.reg;
8078
0
        else if ( (rc = read_ulong(ea.mem.seg, ea.mem.off, &src.val, op_bytes,
8079
0
                                   ctxt, ops)) != X86EMUL_OKAY )
8080
0
            goto done;
8081
8082
0
        buf[0] = 0x8f;
8083
0
        *pxop = vex;
8084
0
        pxop->b = 1;
8085
0
        pxop->r = 1;
8086
0
        buf[3] = b;
8087
0
        buf[4] = 0x09; /* reg=rCX r/m=(%rCX) */
8088
0
        *(uint32_t *)(buf + 5) = imm1;
8089
0
        buf[9] = 0xc3;
8090
8091
0
        emulate_stub([dst] "=&c" (dst.val), "[dst]" (&src.val));
8092
8093
0
        put_stub(stub);
8094
0
        break;
8095
0
    }
8096
8097
2
    case X86EMUL_OPC_XOP(0a, 0x12): /* XOP Grp4 */
8098
2
        switch ( modrm_reg & 7 )
8099
2
        {
8100
0
        case 0: /* lwpins $imm32,r/m,r */
8101
1
        case 1: /* lwpval $imm32,r/m,r */
8102
            /* LWP is unsupported, so produce #UD unconditionally. */
8103
1
            generate_exception(X86_EXC_UD);
8104
2
        }
8105
1
        goto unrecognized_insn;
8106
8107
863
    default:
8108
863
    unimplemented_insn: __maybe_unused;
8109
863
        rc = X86EMUL_UNIMPLEMENTED;
8110
863
        goto done;
8111
16
    unrecognized_insn:
8112
16
        rc = X86EMUL_UNRECOGNIZED;
8113
16
        goto done;
8114
8115
102k
    dispatch_from_helper:
8116
102k
        if ( rc == X86EMUL_OKAY )
8117
100k
            break;
8118
8119
1.28k
        switch ( rc )
8120
1.28k
        {
8121
66
        case X86EMUL_rdtsc:
8122
66
            goto rdtsc;
8123
8124
#ifdef __XEN__
8125
        case X86EMUL_stub_failure:
8126
            goto emulation_stub_failure;
8127
#endif
8128
1.28k
        }
8129
8130
        /* Internally used state change indicators may not make it here. */
8131
1.22k
        if ( rc < 0 )
8132
0
        {
8133
0
            ASSERT_UNREACHABLE();
8134
0
            rc = X86EMUL_UNHANDLEABLE;
8135
0
        }
8136
1.22k
        goto done;
8137
602k
    }
8138
8139
588k
    if ( state->rmw != rmw_NONE )
8140
0
    {
8141
0
        ea.val = src.val;
8142
0
        op_bytes = dst.bytes;
8143
0
        state->stub_exn = &stub_exn;
8144
0
        rc = ops->rmw(dst.mem.seg, dst.mem.off, dst.bytes, &_regs.eflags,
8145
0
                      state, ctxt);
8146
#ifdef __XEN__
8147
        if ( rc == X86EMUL_stub_failure )
8148
            goto emulation_stub_failure;
8149
#endif
8150
0
        if ( rc != X86EMUL_OKAY )
8151
0
            goto done;
8152
8153
        /* Some operations require a register to be written. */
8154
0
        switch ( state->rmw )
8155
0
        {
8156
0
        case rmw_cmpccxadd:
8157
0
        case rmw_xchg:
8158
0
        case rmw_xadd:
8159
0
            switch ( dst.bytes )
8160
0
            {
8161
0
            case 1: *(uint8_t  *)src.reg = (uint8_t)ea.val; break;
8162
0
            case 2: *(uint16_t *)src.reg = (uint16_t)ea.val; break;
8163
0
            case 4: *src.reg = (uint32_t)ea.val; break; /* 64b reg: zero-extend */
8164
0
            case 8: *src.reg = ea.val; break;
8165
0
            }
8166
0
            break;
8167
8168
0
        default:
8169
0
            break;
8170
0
        }
8171
8172
0
        dst.type = OP_NONE;
8173
0
    }
8174
588k
    else if ( state->simd_size != simd_none )
8175
76.7k
    {
8176
76.7k
        generate_exception_if((vex.opcx && (d & TwoOp) &&
8177
76.7k
                               (vex.reg != 0xf || (evex_encoded() && !evex.RX))),
8178
76.7k
                              X86_EXC_UD);
8179
8180
76.7k
        EXPECT(op_bytes);
8181
76.7k
        EXPECT(opc);
8182
8183
76.7k
        if ( evex_encoded() )
8184
0
        {
8185
0
            opc[insn_bytes - EVEX_PFX_BYTES] = 0xc3;
8186
0
            copy_EVEX(opc, evex);
8187
0
        }
8188
76.7k
        else
8189
76.7k
        {
8190
76.7k
            opc[insn_bytes - PFX_BYTES] = 0xc3;
8191
76.7k
            copy_REX_VEX(opc, rex_prefix, vex);
8192
76.7k
        }
8193
8194
76.7k
        if ( ea.type == OP_MEM )
8195
46.6k
        {
8196
46.6k
            uint32_t mxcsr = 0;
8197
46.6k
            uint64_t full = 0;
8198
8199
46.6k
            if ( op_bytes < 16 ||
8200
46.6k
                 (vex.opcx
8201
24.1k
                  ? /* vmov{{a,nt}p{s,d},{,nt}dqa,ntdq} are exceptions. */
8202
12.7k
                    ext == ext_0f
8203
12.7k
                    ? ((b | 1) != 0x29 && b != 0x2b &&
8204
7.00k
                       ((b | 0x10) != 0x7f || vex.pfx != vex_66) &&
8205
7.00k
                       b != 0xe7)
8206
12.7k
                    : (ext != ext_0f38 || b != 0x2a)
8207
24.1k
                  : /* movup{s,d}, {,mask}movdqu, and lddqu are exceptions. */
8208
24.1k
                    ext == ext_0f &&
8209
11.4k
                    ((b | 1) == 0x11 ||
8210
9.41k
                     ((b | 0x10) == 0x7f && vex.pfx == vex_f3) ||
8211
9.41k
                     b == 0xf7 || b == 0xf0)) )
8212
35.2k
                mxcsr = MXCSR_MM;
8213
11.4k
            else if ( vcpu_has_misalignsse() )
8214
11.4k
                asm ( "stmxcsr %0" : "=m" (mxcsr) );
8215
46.6k
            generate_exception_if(!(mxcsr & MXCSR_MM) &&
8216
46.6k
                                  !is_aligned(ea.mem.seg, ea.mem.off, op_bytes,
8217
46.6k
                                              ctxt, ops),
8218
46.6k
                                  X86_EXC_GP, 0);
8219
8220
46.3k
            EXPECT(elem_bytes > 0);
8221
46.3k
            if ( evex.brs )
8222
0
            {
8223
0
                ASSERT((d & DstMask) != DstMem);
8224
0
                op_bytes = elem_bytes;
8225
0
            }
8226
46.3k
            if ( evex.opmsk )
8227
610
            {
8228
610
                ASSERT(!(op_bytes % elem_bytes));
8229
610
                full = ~0ULL >> (64 - op_bytes / elem_bytes);
8230
610
                op_mask &= full;
8231
610
            }
8232
46.3k
            if ( fault_suppression )
8233
610
            {
8234
610
                if ( !op_mask )
8235
318
                    goto simd_no_mem;
8236
292
                if ( !evex.brs )
8237
292
                {
8238
292
                    first_byte = __builtin_ctzll(op_mask);
8239
292
                    op_mask >>= first_byte;
8240
292
                    full >>= first_byte;
8241
292
                    first_byte *= elem_bytes;
8242
292
                    op_bytes = (64 - __builtin_clzll(op_mask)) * elem_bytes;
8243
292
                }
8244
292
            }
8245
            /*
8246
             * Independent of fault suppression we may need to read (parts of)
8247
             * the memory operand for the purpose of merging without splitting
8248
             * the write below into multiple ones. Note that the EVEX.Z check
8249
             * here isn't strictly needed, due to there not currently being
8250
             * any instructions allowing zeroing-merging on memory writes (and
8251
             * we raise #UD during DstMem processing far above in this case),
8252
             * yet conceptually the read is then unnecessary.
8253
             */
8254
46.0k
            if ( evex.opmsk && !evex.z && (d & DstMask) == DstMem &&
8255
46.0k
                 op_mask != full )
8256
14
                d = (d & ~SrcMask) | SrcMem;
8257
8258
46.0k
            switch ( d & SrcMask )
8259
46.0k
            {
8260
37.3k
            case SrcMem:
8261
37.3k
                rc = ops->read(ea.mem.seg, truncate_ea(ea.mem.off + first_byte),
8262
37.3k
                               (void *)mmvalp + first_byte, op_bytes,
8263
37.3k
                               ctxt);
8264
37.3k
                if ( rc != X86EMUL_OKAY )
8265
2.58k
                    goto done;
8266
                /* fall through */
8267
36.3k
            case SrcMem16:
8268
36.3k
                dst.type = OP_NONE;
8269
36.3k
                break;
8270
7.09k
            default:
8271
7.09k
                EXPECT((d & DstMask) == DstMem);
8272
7.09k
                break;
8273
46.0k
            }
8274
43.4k
            if ( (d & DstMask) == DstMem )
8275
7.56k
            {
8276
7.56k
                fail_if(!ops->write); /* Check before running the stub. */
8277
7.55k
                if ( (d & SrcMask) == SrcMem )
8278
475
                    d |= Mov; /* Force memory write to occur below. */
8279
8280
7.55k
                switch ( ctxt->opcode )
8281
7.55k
                {
8282
10
                case X86EMUL_OPC_VEX_66(0x0f38, 0x2e): /* vmaskmovps */
8283
149
                case X86EMUL_OPC_VEX_66(0x0f38, 0x2f): /* vmaskmovpd */
8284
198
                case X86EMUL_OPC_VEX_66(0x0f38, 0x8e): /* vpmaskmov{d,q} */
8285
                    /* These have merge semantics; force write to occur. */
8286
198
                    d |= Mov;
8287
198
                    break;
8288
7.36k
                default:
8289
7.36k
                    ASSERT(d & Mov);
8290
7.36k
                    break;
8291
7.55k
                }
8292
8293
7.55k
                dst.type = OP_MEM;
8294
7.55k
                dst.bytes = op_bytes;
8295
7.55k
                dst.mem = ea.mem;
8296
7.55k
            }
8297
43.4k
        }
8298
30.0k
        else
8299
30.0k
        {
8300
30.4k
        simd_no_mem:
8301
30.4k
            dst.type = OP_NONE;
8302
30.4k
        }
8303
8304
        /* {,v}maskmov{q,dqu}, as an exception, uses rDI. */
8305
73.8k
        if ( likely((ctxt->opcode & ~(X86EMUL_OPC_PFX_MASK |
8306
73.8k
                                      X86EMUL_OPC_ENCODING_MASK)) !=
8307
73.8k
                    X86EMUL_OPC(0x0f, 0xf7)) )
8308
73.3k
            invoke_stub("", "", "+m" (*mmvalp) : "a" (mmvalp));
8309
463
        else
8310
463
            invoke_stub("", "", "+m" (*mmvalp) : "D" (mmvalp));
8311
8312
73.8k
        put_stub(stub);
8313
73.8k
    }
8314
8315
585k
    switch ( dst.type )
8316
585k
    {
8317
199k
    case OP_REG:
8318
        /* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
8319
199k
        switch ( dst.bytes )
8320
199k
        {
8321
28.1k
        case 1: *(uint8_t  *)dst.reg = (uint8_t)dst.val; break;
8322
115k
        case 2: *(uint16_t *)dst.reg = (uint16_t)dst.val; break;
8323
43.1k
        case 4: *dst.reg = (uint32_t)dst.val; break; /* 64b: zero-ext */
8324
9.64k
        case 8: *dst.reg = dst.val; break;
8325
199k
        }
8326
199k
        break;
8327
199k
    case OP_MEM:
8328
119k
        if ( !(d & Mov) && (dst.orig_val == dst.val) &&
8329
119k
             !ctxt->force_writeback )
8330
20.8k
            /* nothing to do */;
8331
98.5k
        else if ( lock_prefix )
8332
732
        {
8333
732
            fail_if(!ops->cmpxchg);
8334
731
            rc = ops->cmpxchg(
8335
731
                dst.mem.seg, dst.mem.off, &dst.orig_val,
8336
731
                &dst.val, dst.bytes, true, ctxt);
8337
731
            if ( rc == X86EMUL_CMPXCHG_FAILED )
8338
0
                rc = X86EMUL_RETRY;
8339
731
        }
8340
97.8k
        else
8341
97.8k
        {
8342
97.8k
            fail_if(!ops->write);
8343
97.6k
            rc = ops->write(dst.mem.seg, truncate_ea(dst.mem.off + first_byte),
8344
97.6k
                            !state->simd_size ? &dst.val
8345
97.6k
                                              : (void *)mmvalp + first_byte,
8346
97.6k
                            dst.bytes, ctxt);
8347
97.6k
            if ( sfence )
8348
2.15k
                asm volatile ( "sfence" ::: "memory" );
8349
97.6k
        }
8350
119k
        if ( rc != 0 )
8351
1.15k
            goto done;
8352
118k
        break;
8353
266k
    default:
8354
266k
        break;
8355
585k
    }
8356
8357
590k
 complete_insn: /* Commit shadow register state. */
8358
590k
    put_fpu(fpu_type, false, state, ctxt, ops);
8359
590k
    fpu_type = X86EMUL_FPU_none;
8360
8361
    /* Zero the upper 32 bits of %rip if not in 64-bit mode. */
8362
590k
    if ( !mode_64bit() )
8363
480k
        _regs.r(ip) = (uint32_t)_regs.r(ip);
8364
8365
    /* Should a singlestep #DB be raised? */
8366
590k
    if ( rc == X86EMUL_OKAY && singlestep && !ctxt->retire.mov_ss )
8367
111k
    {
8368
111k
        ctxt->retire.singlestep = true;
8369
111k
        ctxt->retire.sti = false;
8370
111k
    }
8371
8372
590k
    if ( rc != X86EMUL_DONE )
8373
590k
        *ctxt->regs = _regs;
8374
0
    else
8375
0
    {
8376
0
        ctxt->regs->r(ip) = _regs.r(ip);
8377
0
        rc = X86EMUL_OKAY;
8378
0
    }
8379
8380
590k
    ctxt->regs->eflags &= ~X86_EFLAGS_RF;
8381
8382
603k
 done:
8383
603k
    put_fpu(fpu_type, insn_bytes > 0 && dst.type == OP_MEM, state, ctxt, ops);
8384
603k
    put_stub(stub);
8385
603k
    return rc;
8386
0
#undef state
8387
8388
#ifdef __XEN__
8389
 emulation_stub_failure:
8390
    if ( stub_exn.info.fields.trapnr == X86_EXC_MF )
8391
        generate_exception(X86_EXC_MF);
8392
    if ( stub_exn.info.fields.trapnr == X86_EXC_XM )
8393
    {
8394
        if ( !ops->read_cr || ops->read_cr(4, &cr4, ctxt) != X86EMUL_OKAY )
8395
            cr4 = X86_CR4_OSXMMEXCPT;
8396
        generate_exception(cr4 & X86_CR4_OSXMMEXCPT ? X86_EXC_XM : X86_EXC_UD);
8397
    }
8398
    gprintk(XENLOG_WARNING,
8399
            "exception %u (ec=%04x) in emulation stub (line %u)\n",
8400
            stub_exn.info.fields.trapnr, stub_exn.info.fields.ec,
8401
            stub_exn.line);
8402
    gprintk(XENLOG_INFO, "  stub: %"__stringify(MAX_INST_LEN)"ph\n",
8403
            stub.func);
8404
    if ( stub_exn.info.fields.trapnr == X86_EXC_UD )
8405
        generate_exception(X86_EXC_UD);
8406
    domain_crash(current->domain);
8407
#endif
8408
8409
0
 unhandleable:
8410
0
    rc = X86EMUL_UNHANDLEABLE;
8411
0
    goto done;
8412
590k
}
8413
8414
#undef op_bytes
8415
#undef ad_bytes
8416
#undef ext
8417
#undef modrm
8418
#undef modrm_mod
8419
#undef modrm_reg
8420
#undef modrm_rm
8421
#undef rex_prefix
8422
#undef lock_prefix
8423
#undef vex
8424
#undef ea
8425
8426
int x86_emul_rmw(
8427
    void *ptr,
8428
    unsigned int bytes,
8429
    uint32_t *eflags,
8430
    struct x86_emulate_state *s,
8431
    struct x86_emulate_ctxt *ctxt)
8432
#define stub_exn (*s->stub_exn) /* for invoke_stub() */
8433
0
{
8434
0
    unsigned long *dst = ptr;
8435
8436
0
    ASSERT(bytes == s->op_bytes);
8437
8438
/*
8439
 * We cannot use Jcc below, as this code executes with the guest status flags
8440
 * loaded into the EFLAGS register. Hence our only choice is J{E,R}CXZ.
8441
 */
8442
0
#ifdef __x86_64__
8443
0
# define JCXZ "jrcxz"
8444
#else
8445
# define JCXZ "jecxz"
8446
#endif
8447
8448
0
#define COND_LOCK(op) \
8449
0
    JCXZ " .L" #op "%=\n\t" \
8450
0
    "lock\n" \
8451
0
    ".L" #op "%=:\n\t" \
8452
0
    #op
8453
8454
0
    switch ( s->rmw )
8455
0
    {
8456
0
#define UNOP(op) \
8457
0
    case rmw_##op: \
8458
0
        _emulate_1op(COND_LOCK(op), dst, bytes, *eflags, \
8459
0
                     "c" ((long)s->lock_prefix) ); \
8460
0
        break
8461
0
#define BINOP(op, sfx) \
8462
0
    case rmw_##op: \
8463
0
        _emulate_2op_SrcV##sfx(COND_LOCK(op), \
8464
0
                               s->ea.val, dst, bytes, *eflags, \
8465
0
                               "c" ((long)s->lock_prefix) ); \
8466
0
        break
8467
0
#define SHIFT(op) \
8468
0
    case rmw_##op: \
8469
0
        ASSERT(!s->lock_prefix); \
8470
0
        _emulate_2op_SrcB(#op, s->ea.val, dst, bytes, *eflags); \
8471
0
        break
8472
8473
0
    BINOP(adc, );
8474
0
    BINOP(add, );
8475
0
    BINOP(and, );
8476
0
    BINOP(btc, _nobyte);
8477
0
    BINOP(bts, _nobyte);
8478
0
    BINOP(btr, _nobyte);
8479
0
     UNOP(dec);
8480
0
     UNOP(inc);
8481
0
     UNOP(neg);
8482
0
    BINOP(or, );
8483
0
    SHIFT(rcl);
8484
0
    SHIFT(rcr);
8485
0
    SHIFT(rol);
8486
0
    SHIFT(ror);
8487
0
    SHIFT(sar);
8488
0
    BINOP(sbb, );
8489
0
    SHIFT(shl);
8490
0
    SHIFT(shr);
8491
0
    BINOP(sub, );
8492
0
    BINOP(xor, );
8493
8494
0
#undef UNOP
8495
0
#undef BINOP
8496
0
#undef SHIFT
8497
8498
0
#ifdef __x86_64__
8499
0
    case rmw_cmpccxadd:
8500
0
    {
8501
0
        struct x86_emulate_stub stub = {};
8502
0
        uint8_t *buf = get_stub(stub);
8503
0
        typeof(s->vex) *pvex = container_of(buf + 1, typeof(s->vex),
8504
0
                                            raw[0]);
8505
0
        unsigned long dummy;
8506
8507
0
        buf[0] = 0xc4;
8508
0
        *pvex = s->vex;
8509
0
        pvex->b = 1;
8510
0
        pvex->r = 1;
8511
0
        pvex->reg = 0xf; /* rAX */
8512
0
        buf[3] = ctxt->opcode;
8513
0
        buf[4] = 0x11; /* reg=rDX r/m=(%RCX) */
8514
0
        buf[5] = 0xc3;
8515
8516
0
        *eflags &= ~EFLAGS_MASK;
8517
0
        invoke_stub("",
8518
0
                    _POST_EFLAGS("[eflags]", "[mask]", "[tmp]"),
8519
0
                    "+m" (*dst), "+d" (s->ea.val),
8520
0
                    [tmp] "=&r" (dummy), [eflags] "+g" (*eflags)
8521
0
                    : "a" (*decode_vex_gpr(s->vex.reg, ctxt->regs, ctxt)),
8522
0
                      "c" (dst), [mask] "i" (EFLAGS_MASK));
8523
8524
0
        put_stub(stub);
8525
0
        break;
8526
0
    }
8527
0
#endif
8528
8529
0
    case rmw_not:
8530
0
        switch ( s->op_bytes )
8531
0
        {
8532
0
        case 1:
8533
0
            asm ( COND_LOCK(notb) " %0"
8534
0
                  : "+m" (*dst) : "c" ((long)s->lock_prefix) );
8535
0
            break;
8536
0
        case 2:
8537
0
            asm ( COND_LOCK(notw) " %0"
8538
0
                  : "+m" (*dst) : "c" ((long)s->lock_prefix) );
8539
0
            break;
8540
0
        case 4:
8541
0
            asm ( COND_LOCK(notl) " %0"
8542
0
                  : "+m" (*dst) : "c" ((long)s->lock_prefix) );
8543
0
            break;
8544
0
#ifdef __x86_64__
8545
0
        case 8:
8546
0
            asm ( COND_LOCK(notq) " %0"
8547
0
                  : "+m" (*dst) : "c" ((long)s->lock_prefix) );
8548
0
            break;
8549
0
#endif
8550
0
        }
8551
0
        break;
8552
8553
0
    case rmw_shld:
8554
0
        ASSERT(!s->lock_prefix);
8555
0
        _emulate_2op_SrcV_nobyte("shld",
8556
0
                                 s->ea.val, dst, bytes, *eflags,
8557
0
                                 "c" (s->ea.orig_val) );
8558
0
        break;
8559
8560
0
    case rmw_shrd:
8561
0
        ASSERT(!s->lock_prefix);
8562
0
        _emulate_2op_SrcV_nobyte("shrd",
8563
0
                                 s->ea.val, dst, bytes, *eflags,
8564
0
                                 "c" (s->ea.orig_val) );
8565
0
        break;
8566
8567
0
    case rmw_xadd:
8568
0
        *eflags &= ~EFLAGS_MASK;
8569
0
        switch ( s->op_bytes )
8570
0
        {
8571
0
            unsigned long dummy;
8572
8573
0
#define XADD(sz, cst, mod) \
8574
0
        case sz: \
8575
0
            asm ( "" \
8576
0
                  COND_LOCK(xadd) " %"#mod"[reg], %[mem]; " \
8577
0
                  _POST_EFLAGS("[efl]", "[msk]", "[tmp]") \
8578
0
                  : [reg] "+" #cst (s->ea.val), \
8579
0
                    [mem] "+m" (*dst), \
8580
0
                    [efl] "+g" (*eflags), \
8581
0
                    [tmp] "=&r" (dummy) \
8582
0
                  : "c" ((long)s->lock_prefix), \
8583
0
                    [msk] "i" (EFLAGS_MASK) ); \
8584
0
            break
8585
0
        XADD(1, q, b);
8586
0
        XADD(2, r, w);
8587
0
        XADD(4, r, k);
8588
0
#ifdef __x86_64__
8589
0
        XADD(8, r, );
8590
0
#endif
8591
0
#undef XADD
8592
0
        }
8593
0
        break;
8594
8595
0
    case rmw_xchg:
8596
0
        switch ( s->op_bytes )
8597
0
        {
8598
0
        case 1:
8599
0
            asm ( "xchg %b0, %b1" : "+q" (s->ea.val), "+m" (*dst) );
8600
0
            break;
8601
0
        case 2:
8602
0
            asm ( "xchg %w0, %w1" : "+r" (s->ea.val), "+m" (*dst) );
8603
0
            break;
8604
0
        case 4:
8605
0
#ifdef __x86_64__
8606
0
            asm ( "xchg %k0, %k1" : "+r" (s->ea.val), "+m" (*dst) );
8607
0
            break;
8608
0
        case 8:
8609
0
#endif
8610
0
            asm ( "xchg %0, %1" : "+r" (s->ea.val), "+m" (*dst) );
8611
0
            break;
8612
0
        }
8613
0
        break;
8614
8615
0
    default:
8616
0
        ASSERT_UNREACHABLE();
8617
0
        return X86EMUL_UNHANDLEABLE;
8618
0
    }
8619
8620
0
#undef COND_LOCK
8621
0
#undef JCXZ
8622
8623
0
    return X86EMUL_OKAY;
8624
8625
#if defined(__XEN__) && defined(__x86_64__)
8626
 emulation_stub_failure:
8627
    return X86EMUL_stub_failure;
8628
#endif
8629
0
}
8630
#undef stub_exn
8631
8632
static void __init __maybe_unused build_assertions(void)
8633
0
{
8634
0
    /* Check the values against SReg3 encoding in opcode/ModRM bytes. */
8635
0
    BUILD_BUG_ON(x86_seg_es != 0);
8636
0
    BUILD_BUG_ON(x86_seg_cs != 1);
8637
0
    BUILD_BUG_ON(x86_seg_ss != 2);
8638
0
    BUILD_BUG_ON(x86_seg_ds != 3);
8639
0
    BUILD_BUG_ON(x86_seg_fs != 4);
8640
0
    BUILD_BUG_ON(x86_seg_gs != 5);
8641
0
8642
0
    /* Check X86_ET_* against VMCB EVENTINJ and VMCS INTR_INFO type fields. */
8643
0
    BUILD_BUG_ON(X86_ET_EXT_INTR    != 0);
8644
0
    BUILD_BUG_ON(X86_ET_NMI         != 2);
8645
0
    BUILD_BUG_ON(X86_ET_HW_EXC      != 3);
8646
0
    BUILD_BUG_ON(X86_ET_SW_INT      != 4);
8647
0
    BUILD_BUG_ON(X86_ET_PRIV_SW_EXC != 5);
8648
0
    BUILD_BUG_ON(X86_ET_SW_EXC      != 6);
8649
0
    BUILD_BUG_ON(X86_ET_OTHER       != 7);
8650
0
}
8651
8652
#ifndef NDEBUG
8653
/*
8654
 * In debug builds, wrap x86_emulate() with some assertions about its expected
8655
 * behaviour.
8656
 */
8657
int x86_emulate_wrapper(
8658
    struct x86_emulate_ctxt *ctxt,
8659
    const struct x86_emulate_ops *ops)
8660
613k
{
8661
613k
    unsigned long orig_ip = ctxt->regs->r(ip);
8662
613k
    int rc;
8663
8664
613k
#ifdef __x86_64__
8665
613k
    if ( mode_64bit() )
8666
613k
        ASSERT(ctxt->lma);
8667
#else
8668
    ASSERT(!ctxt->lma && !mode_64bit());
8669
#endif
8670
8671
613k
    rc = x86_emulate(ctxt, ops);
8672
8673
    /*
8674
     * X86EMUL_DONE is an internal signal in the emulator, and is not expected
8675
     * to ever escape out to callers.
8676
     */
8677
613k
    ASSERT(rc != X86EMUL_DONE);
8678
8679
    /*
8680
     * Most retire flags should only be set for successful instruction
8681
     * emulation.
8682
     */
8683
613k
    if ( rc != X86EMUL_OKAY )
8684
24.2k
    {
8685
24.2k
        typeof(ctxt->retire) retire = ctxt->retire;
8686
8687
24.2k
        retire.unblock_nmi = false;
8688
24.2k
        ASSERT(!retire.raw);
8689
24.2k
    }
8690
8691
    /* All cases returning X86EMUL_EXCEPTION should have fault semantics. */
8692
613k
    if ( rc == X86EMUL_EXCEPTION )
8693
613k
        ASSERT(ctxt->regs->r(ip) == orig_ip);
8694
8695
    /*
8696
     * An event being pending should exactly match returning
8697
     * X86EMUL_EXCEPTION.  (If this trips, the chances are a codepath has
8698
     * called hvm_inject_hw_exception() rather than using
8699
     * x86_emul_hw_exception(), or the invocation of a hook has caused an
8700
     * exception to be raised, while the caller was only checking for
8701
     * success/failure.)
8702
     */
8703
613k
    ASSERT(ctxt->event_pending == (rc == X86EMUL_EXCEPTION));
8704
8705
613k
    return rc;
8706
613k
}
8707
#endif