Coverage Report

Created: 2025-05-12 07:09

/src/pcre2/deps/sljit/sljit_src/sljitNativeX86_common.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 *    Stack-less Just-In-Time compiler
3
 *
4
 *    Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5
 *
6
 * Redistribution and use in source and binary forms, with or without modification, are
7
 * permitted provided that the following conditions are met:
8
 *
9
 *   1. Redistributions of source code must retain the above copyright notice, this list of
10
 *      conditions and the following disclaimer.
11
 *
12
 *   2. Redistributions in binary form must reproduce the above copyright notice, this list
13
 *      of conditions and the following disclaimer in the documentation and/or other materials
14
 *      provided with the distribution.
15
 *
16
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19
 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21
 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22
 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24
 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25
 */
26
27
SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
28
0
{
29
0
  return "x86" SLJIT_CPUINFO;
30
0
}
31
32
/*
33
   32b register indexes:
34
     0 - EAX
35
     1 - ECX
36
     2 - EDX
37
     3 - EBX
38
     4 - ESP
39
     5 - EBP
40
     6 - ESI
41
     7 - EDI
42
*/
43
44
/*
45
   64b register indexes:
46
     0 - RAX
47
     1 - RCX
48
     2 - RDX
49
     3 - RBX
50
     4 - RSP
51
     5 - RBP
52
     6 - RSI
53
     7 - RDI
54
     8 - R8   - From now on REX prefix is required
55
     9 - R9
56
    10 - R10
57
    11 - R11
58
    12 - R12
59
    13 - R13
60
    14 - R14
61
    15 - R15
62
*/
63
64
294M
#define TMP_REG1  (SLJIT_NUMBER_OF_REGISTERS + 2)
65
0
#define TMP_FREG  (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
66
67
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
68
69
static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = {
70
  0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 5, 7, 6, 4, 3
71
};
72
73
static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2] = {
74
  0, 1, 2, 3, 4, 5, 6, 7, 0
75
};
76
77
#define CHECK_EXTRA_REGS(p, w, do) \
78
  if (p >= SLJIT_R3 && p <= SLJIT_S3) { \
79
    w = (2 * SSIZE_OF(sw)) + ((p) - SLJIT_R3) * SSIZE_OF(sw); \
80
    p = SLJIT_MEM1(SLJIT_SP); \
81
    do; \
82
  }
83
84
#else /* SLJIT_CONFIG_X86_32 */
85
86
20.0M
#define TMP_REG2  (SLJIT_NUMBER_OF_REGISTERS + 3)
87
88
/* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
89
   Note: avoid to use r12 and r13 for memory addressing
90
   therefore r12 is better to be a higher saved register. */
91
#ifndef _WIN64
92
/* Args: rdi(=7), rsi(=6), rdx(=2), rcx(=1), r8, r9. Scratches: rax(=0), r10, r11 */
93
static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {
94
  0, 0, 6, 7, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 9
95
};
96
/* low-map. reg_map & 0x7. */
97
static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = {
98
  0, 0, 6, 7, 1, 0,  3,  2,  4, 5,  5,  6,  7, 3, 4, 2, 1
99
};
100
#else
101
/* Args: rcx(=1), rdx(=2), r8, r9. Scratches: rax(=0), r10, r11 */
102
static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {
103
  0, 0, 2, 8, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 9, 10
104
};
105
/* low-map. reg_map & 0x7. */
106
static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = {
107
  0, 0, 2, 0, 1,  3,  4, 5,  5,  6,  7, 7, 6, 3, 4, 1,  2
108
};
109
#endif
110
111
/* Args: xmm0-xmm3 */
112
static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2] = {
113
  0, 0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 4
114
};
115
/* low-map. freg_map & 0x7. */
116
static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2] = {
117
  0, 0, 1, 2, 3, 5, 6, 7, 0, 1,  2,  3,  4,  5,  6,  7, 4
118
};
119
120
1.21G
#define REX_W   0x48
121
71.1M
#define REX_R   0x44
122
13.6M
#define REX_X   0x42
123
323M
#define REX_B   0x41
124
1.74k
#define REX   0x40
125
126
#ifndef _WIN64
127
3.84G
#define HALFWORD_MAX 0x7fffffffl
128
2.10G
#define HALFWORD_MIN -0x80000000l
129
#else
130
#define HALFWORD_MAX 0x7fffffffll
131
#define HALFWORD_MIN -0x80000000ll
132
#endif
133
134
823M
#define IS_HALFWORD(x)    ((x) <= HALFWORD_MAX && (x) >= HALFWORD_MIN)
135
704M
#define NOT_HALFWORD(x)   ((x) > HALFWORD_MAX || (x) < HALFWORD_MIN)
136
137
#define CHECK_EXTRA_REGS(p, w, do)
138
139
#endif /* SLJIT_CONFIG_X86_32 */
140
141
4.79G
#define U8(v)     ((sljit_u8)(v))
142
143
/* Size flags for emit_x86_instruction: */
144
2.00G
#define EX86_BIN_INS    ((sljit_uw)0x000010)
145
1.56G
#define EX86_SHIFT_INS    ((sljit_uw)0x000020)
146
1.24G
#define EX86_BYTE_ARG   ((sljit_uw)0x000040)
147
280M
#define EX86_HALF_ARG   ((sljit_uw)0x000080)
148
/* Size flags for both emit_x86_instruction and emit_vex_instruction: */
149
53.6M
#define EX86_REX    ((sljit_uw)0x000100)
150
1.17G
#define EX86_NO_REXW    ((sljit_uw)0x000200)
151
2.46G
#define EX86_PREF_66    ((sljit_uw)0x000400)
152
2.46G
#define EX86_PREF_F2    ((sljit_uw)0x000800)
153
2.46G
#define EX86_PREF_F3    ((sljit_uw)0x001000)
154
1.51G
#define EX86_SSE2_OP1   ((sljit_uw)0x002000)
155
641M
#define EX86_SSE2_OP2   ((sljit_uw)0x004000)
156
129k
#define EX86_SSE2   (EX86_SSE2_OP1 | EX86_SSE2_OP2)
157
1.23G
#define EX86_VEX_EXT    ((sljit_uw)0x008000)
158
/* Op flags for emit_vex_instruction: */
159
21.0k
#define VEX_OP_0F38   ((sljit_uw)0x010000)
160
21.0k
#define VEX_OP_0F3A   ((sljit_uw)0x020000)
161
0
#define VEX_SSE2_OPV    ((sljit_uw)0x040000)
162
0
#define VEX_AUTO_W    ((sljit_uw)0x080000)
163
0
#define VEX_W     ((sljit_uw)0x100000)
164
108k
#define VEX_256     ((sljit_uw)0x200000)
165
166
0
#define EX86_SELECT_66(op)  (((op) & SLJIT_32) ? 0 : EX86_PREF_66)
167
#define EX86_SELECT_F2_F3(op) (((op) & SLJIT_32) ? EX86_PREF_F3 : EX86_PREF_F2)
168
169
/* --------------------------------------------------------------------- */
170
/*  Instruction forms                                                    */
171
/* --------------------------------------------------------------------- */
172
173
#define ADD     (/* BINARY */ 0 << 3)
174
119M
#define ADD_EAX_i32   0x05
175
119M
#define ADD_r_rm    0x03
176
119M
#define ADD_rm_r    0x01
177
#define ADDSD_x_xm    0x58
178
#define ADC     (/* BINARY */ 2 << 3)
179
0
#define ADC_EAX_i32   0x15
180
0
#define ADC_r_rm    0x13
181
0
#define ADC_rm_r    0x11
182
#define AND     (/* BINARY */ 4 << 3)
183
8.12M
#define AND_EAX_i32   0x25
184
8.12M
#define AND_r_rm    0x23
185
8.12M
#define AND_rm_r    0x21
186
0
#define ANDPD_x_xm    0x54
187
#define BSR_r_rm    (/* GROUP_0F */ 0xbd)
188
#define BSF_r_rm    (/* GROUP_0F */ 0xbc)
189
0
#define BSWAP_r     (/* GROUP_0F */ 0xc8)
190
34.3M
#define CALL_i32    0xe8
191
#define CALL_rm     (/* GROUP_FF */ 2 << 3)
192
0
#define CDQ     0x99
193
#define CMOVE_r_rm    (/* GROUP_0F */ 0x44)
194
#define CMP     (/* BINARY */ 7 << 3)
195
#define CMP_EAX_i32   0x3d
196
170M
#define CMP_r_rm    0x3b
197
6.43M
#define CMP_rm_r    0x39
198
#define CMPS_x_xm   0xc2
199
#define CMPXCHG_rm_r    0xb1
200
#define CMPXCHG_rm8_r   0xb0
201
#define CVTPD2PS_x_xm   0x5a
202
#define CVTPS2PD_x_xm   0x5a
203
#define CVTSI2SD_x_rm   0x2a
204
#define CVTTSD2SI_r_xm    0x2c
205
0
#define DIV     (/* GROUP_F7 */ 6 << 3)
206
#define DIVSD_x_xm    0x5e
207
#define EXTRACTPS_x_xm    0x17
208
#define FLDS      0xd9
209
#define FLDL      0xdd
210
#define FSTPS     0xd9
211
#define FSTPD     0xdd
212
#define INSERTPS_x_xm   0x21
213
0
#define INT3      0xcc
214
0
#define IDIV      (/* GROUP_F7 */ 7 << 3)
215
0
#define IMUL      (/* GROUP_F7 */ 5 << 3)
216
#define IMUL_r_rm   (/* GROUP_0F */ 0xaf)
217
0
#define IMUL_r_rm_i8    0x6b
218
0
#define IMUL_r_rm_i32   0x69
219
0
#define JL_i8     0x7c
220
#define JE_i8     0x74
221
#define JNC_i8      0x73
222
#define JNE_i8      0x75
223
24.0M
#define JMP_i8      0xeb
224
57.3M
#define JMP_i32     0xe9
225
#define JMP_rm      (/* GROUP_FF */ 4 << 3)
226
35.5M
#define LEA_r_m     0x8d
227
#define LOOP_i8     0xe2
228
#define LZCNT_r_rm    (/* GROUP_F3 */ /* GROUP_0F */ 0xbd)
229
149M
#define MOV_r_rm    0x8b
230
6.82M
#define MOV_r_i32   0xb8
231
195M
#define MOV_rm_r    0x89
232
92.2M
#define MOV_rm_i32    0xc7
233
0
#define MOV_rm8_i8    0xc6
234
0
#define MOV_rm8_r8    0x88
235
0
#define MOVAPS_x_xm   0x28
236
#define MOVAPS_xm_x   0x29
237
59.1k
#define MOVD_x_rm   0x6e
238
#define MOVD_rm_x   0x7e
239
0
#define MOVDDUP_x_xm    0x12
240
108k
#define MOVDQA_x_xm   0x6f
241
0
#define MOVDQA_xm_x   0x7f
242
0
#define MOVDQU_x_xm   0x6f
243
0
#define MOVHLPS_x_x   0x12
244
0
#define MOVHPD_m_x    0x17
245
0
#define MOVHPD_x_m    0x16
246
#define MOVLHPS_x_x   0x16
247
0
#define MOVLPD_m_x    0x13
248
0
#define MOVLPD_x_m    0x12
249
87.5k
#define MOVMSKPS_r_x    (/* GROUP_0F */ 0x50)
250
0
#define MOVQ_x_xm   (/* GROUP_0F */ 0x7e)
251
0
#define MOVSD_x_xm    0x10
252
0
#define MOVSD_xm_x    0x11
253
0
#define MOVSHDUP_x_xm   0x16
254
31.4M
#define MOVSXD_r_rm   0x63
255
#define MOVSX_r_rm8   (/* GROUP_0F */ 0xbe)
256
#define MOVSX_r_rm16    (/* GROUP_0F */ 0xbf)
257
0
#define MOVUPS_x_xm   0x10
258
7.17M
#define MOVZX_r_rm8   (/* GROUP_0F */ 0xb6)
259
#define MOVZX_r_rm16    (/* GROUP_0F */ 0xb7)
260
0
#define MUL     (/* GROUP_F7 */ 4 << 3)
261
#define MULSD_x_xm    0x59
262
87.5k
#define NEG_rm      (/* GROUP_F7 */ 3 << 3)
263
0
#define NOP     0x90
264
0
#define NOT_rm      (/* GROUP_F7 */ 2 << 3)
265
#define OR      (/* BINARY */ 1 << 3)
266
20.3M
#define OR_r_rm     0x0b
267
20.3M
#define OR_EAX_i32    0x0d
268
20.3M
#define OR_rm_r     0x09
269
4.40M
#define OR_rm8_r8   0x08
270
0
#define ORPD_x_xm   0x56
271
#define PACKSSWB_x_xm   (/* GROUP_0F */ 0x63)
272
42.0k
#define PAND_x_xm   0xdb
273
#define PCMPEQD_x_xm    0x76
274
0
#define PINSRB_x_rm_i8    0x20
275
0
#define PINSRW_x_rm_i8    0xc4
276
0
#define PINSRD_x_rm_i8    0x22
277
0
#define PEXTRB_rm_x_i8    0x14
278
0
#define PEXTRW_rm_x_i8    0x15
279
0
#define PEXTRD_rm_x_i8    0x16
280
87.5k
#define PMOVMSKB_r_x    (/* GROUP_0F */ 0xd7)
281
#define PMOVSXBD_x_xm   0x21
282
#define PMOVSXBQ_x_xm   0x22
283
#define PMOVSXBW_x_xm   0x20
284
#define PMOVSXDQ_x_xm   0x25
285
#define PMOVSXWD_x_xm   0x23
286
#define PMOVSXWQ_x_xm   0x24
287
#define PMOVZXBD_x_xm   0x31
288
#define PMOVZXBQ_x_xm   0x32
289
#define PMOVZXBW_x_xm   0x30
290
#define PMOVZXDQ_x_xm   0x35
291
#define PMOVZXWD_x_xm   0x33
292
#define PMOVZXWQ_x_xm   0x34
293
#define POP_r     0x58
294
69.0k
#define POP_rm      0x8f
295
#define POPF      0x9d
296
0
#define POR_x_xm    0xeb
297
0
#define PREFETCH    0x18
298
0
#define PSHUFB_x_xm   0x00
299
0
#define PSHUFD_x_xm   0x70
300
0
#define PSHUFLW_x_xm    0x70
301
#define PSRLDQ_x    0x73
302
0
#define PSLLD_x_i8    0x72
303
0
#define PSLLQ_x_i8    0x73
304
#define PUSH_i32    0x68
305
#define PUSH_r      0x50
306
0
#define PUSH_rm     (/* GROUP_FF */ 6 << 3)
307
#define PUSHF     0x9c
308
0
#define PXOR_x_xm   0xef
309
0
#define ROL     (/* SHIFT */ 0 << 3)
310
0
#define ROR     (/* SHIFT */ 1 << 3)
311
237k
#define RET_near    0xc3
312
#define RET_i16     0xc2
313
#define SBB     (/* BINARY */ 3 << 3)
314
0
#define SBB_EAX_i32   0x1d
315
0
#define SBB_r_rm    0x1b
316
0
#define SBB_rm_r    0x19
317
0
#define SAR     (/* SHIFT */ 7 << 3)
318
22.4M
#define SHL     (/* SHIFT */ 4 << 3)
319
#define SHLD      (/* GROUP_0F */ 0xa5)
320
#define SHRD      (/* GROUP_0F */ 0xad)
321
5.71M
#define SHR     (/* SHIFT */ 5 << 3)
322
0
#define SHUFPS_x_xm   0xc6
323
#define SUB     (/* BINARY */ 5 << 3)
324
62.9M
#define SUB_EAX_i32   0x2d
325
62.9M
#define SUB_r_rm    0x2b
326
62.9M
#define SUB_rm_r    0x29
327
#define SUBSD_x_xm    0x5c
328
#define TEST_EAX_i32    0xa9
329
1.84M
#define TEST_rm_r   0x85
330
#define TZCNT_r_rm    (/* GROUP_F3 */ /* GROUP_0F */ 0xbc)
331
0
#define UCOMISD_x_xm    0x2e
332
#define UNPCKLPD_x_xm   0x14
333
#define UNPCKLPS_x_xm   0x14
334
0
#define VBROADCASTSD_x_xm 0x19
335
0
#define VBROADCASTSS_x_xm 0x18
336
#define VEXTRACTF128_x_ym 0x19
337
#define VEXTRACTI128_x_ym 0x39
338
#define VINSERTF128_y_y_xm  0x18
339
#define VINSERTI128_y_y_xm  0x38
340
0
#define VPBROADCASTB_x_xm 0x78
341
0
#define VPBROADCASTD_x_xm 0x58
342
0
#define VPBROADCASTQ_x_xm 0x59
343
0
#define VPBROADCASTW_x_xm 0x79
344
#define VPERMPD_y_ym    0x01
345
#define VPERMQ_y_ym   0x00
346
#define XCHG_EAX_r    0x90
347
#define XCHG_r_rm   0x87
348
0
#define XOR     (/* BINARY */ 6 << 3)
349
72.8k
#define XOR_EAX_i32   0x35
350
72.8k
#define XOR_r_rm    0x33
351
72.8k
#define XOR_rm_r    0x31
352
0
#define XORPD_x_xm    0x57
353
354
445M
#define GROUP_0F    0x0f
355
#define GROUP_66    0x66
356
#define GROUP_F3    0xf3
357
31.7M
#define GROUP_F7    0xf7
358
7.18M
#define GROUP_FF    0xff
359
28.1M
#define GROUP_BINARY_81   0x81
360
299M
#define GROUP_BINARY_83   0x83
361
4.15M
#define GROUP_SHIFT_1   0xd1
362
18.8M
#define GROUP_SHIFT_N   0xc1
363
5.23M
#define GROUP_SHIFT_CL    0xd3
364
#define GROUP_LOCK    0xf0
365
366
11.5M
#define MOD_REG     0xc0
367
#define MOD_DISP8   0x40
368
369
1.33G
#define INC_SIZE(s)   (*inst++ = U8(s), compiler->size += (s))
370
371
475k
#define PUSH_REG(r)   (*inst++ = U8(PUSH_r + (r)))
372
336k
#define POP_REG(r)    (*inst++ = U8(POP_r + (r)))
373
177k
#define RET()     (*inst++ = RET_near)
374
#define RET_I16(n)    (*inst++ = RET_i16, *inst++ = U8(n), *inst++ = 0)
375
376
373M
#define SLJIT_INST_LABEL  255
377
1.09G
#define SLJIT_INST_JUMP   254
378
5.38M
#define SLJIT_INST_MOV_ADDR 253
379
2.07G
#define SLJIT_INST_CONST  252
380
381
/* Multithreading does not affect these static variables, since they store
382
   built-in CPU features. Therefore they can be overwritten by different threads
383
   if they detect the CPU features in the same time. */
384
1
#define CPU_FEATURE_DETECTED    0x001
385
#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
386
#define CPU_FEATURE_SSE2    0x002
387
#endif
388
53.8k
#define CPU_FEATURE_SSE41   0x004
389
1
#define CPU_FEATURE_LZCNT   0x008
390
1
#define CPU_FEATURE_TZCNT   0x010
391
33.7M
#define CPU_FEATURE_CMOV    0x020
392
335k
#define CPU_FEATURE_AVX     0x040
393
59.1k
#define CPU_FEATURE_AVX2    0x080
394
2
#define CPU_FEATURE_OSXSAVE   0x100
395
396
static sljit_u32 cpu_feature_list = 0;
397
398
#ifdef _WIN32_WCE
399
#include <cmnintrin.h>
400
#elif defined(_MSC_VER) && _MSC_VER >= 1400
401
#include <intrin.h>
402
#elif defined(__INTEL_COMPILER)
403
#include <cpuid.h>
404
#endif
405
406
#if (defined(_MSC_VER) && _MSC_VER >= 1400) || defined(__INTEL_COMPILER) \
407
  || (defined(__INTEL_LLVM_COMPILER) && defined(__XSAVE__))
408
#include <immintrin.h>
409
#endif
410
411
/******************************************************/
412
/*    Unaligned-store functions                       */
413
/******************************************************/
414
415
static SLJIT_INLINE void sljit_unaligned_store_s16(void *addr, sljit_s16 value)
416
0
{
417
0
  SLJIT_MEMCPY(addr, &value, sizeof(value));
418
0
}
419
420
static SLJIT_INLINE void sljit_unaligned_store_s32(void *addr, sljit_s32 value)
421
758M
{
422
758M
  SLJIT_MEMCPY(addr, &value, sizeof(value));
423
758M
}
424
425
static SLJIT_INLINE void sljit_unaligned_store_sw(void *addr, sljit_sw value)
426
28.5M
{
427
28.5M
  SLJIT_MEMCPY(addr, &value, sizeof(value));
428
28.5M
}
429
430
/******************************************************/
431
/*    Utility functions                               */
432
/******************************************************/
433
434
static void execute_cpu_id(sljit_u32 info[4])
435
5
{
436
#if (defined(_MSC_VER) && _MSC_VER >= 1400) \
437
  || (defined(__INTEL_COMPILER) && __INTEL_COMPILER == 2021 && __INTEL_COMPILER_UPDATE >= 7)
438
439
  __cpuidex((int*)info, (int)info[0], (int)info[2]);
440
441
#elif (defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1900)
442
443
  __get_cpuid_count(info[0], info[2], info, info + 1, info + 2, info + 3);
444
445
#elif (defined(_MSC_VER) || defined(__INTEL_COMPILER)) \
446
  && (defined(SLJIT_CONFIG_X86_32) && SLJIT_CONFIG_X86_32)
447
448
  /* Intel syntax. */
449
  __asm {
450
    mov esi, info
451
    mov eax, [esi]
452
    mov ecx, [esi + 8]
453
    cpuid
454
    mov [esi], eax
455
    mov [esi + 4], ebx
456
    mov [esi + 8], ecx
457
    mov [esi + 12], edx
458
  }
459
460
#else
461
462
5
  __asm__ __volatile__ (
463
5
    "cpuid\n"
464
5
    : "=a" (info[0]), "=b" (info[1]), "=c" (info[2]), "=d" (info[3])
465
5
    : "0" (info[0]), "2" (info[2])
466
5
  );
467
468
5
#endif
469
5
}
470
471
static sljit_u32 execute_get_xcr0_low(void)
472
1
{
473
1
  sljit_u32 xcr0;
474
475
#if (defined(_MSC_VER) && _MSC_VER >= 1400) || defined(__INTEL_COMPILER) \
476
  || (defined(__INTEL_LLVM_COMPILER) && defined(__XSAVE__))
477
478
  xcr0 = (sljit_u32)_xgetbv(0);
479
480
#elif defined(__TINYC__)
481
482
  __asm__ (
483
    "xorl %%ecx, %%ecx\n"
484
    ".byte 0x0f\n"
485
    ".byte 0x01\n"
486
    ".byte 0xd0\n"
487
    : "=a" (xcr0)
488
    :
489
#if defined(SLJIT_CONFIG_X86_32) && SLJIT_CONFIG_X86_32
490
    : "ecx", "edx"
491
#else /* !SLJIT_CONFIG_X86_32 */
492
    : "rcx", "rdx"
493
#endif /* SLJIT_CONFIG_X86_32 */
494
  );
495
496
#elif (defined(__INTEL_LLVM_COMPILER) && __INTEL_LLVM_COMPILER < 20220100) \
497
  || (defined(__clang__) && __clang_major__ < 14) \
498
  || (defined(__GNUC__) && __GNUC__ < 3) \
499
  || defined(__SUNPRO_C) || defined(__SUNPRO_CC)
500
501
  /* AT&T syntax. */
502
  __asm__ (
503
    "xorl %%ecx, %%ecx\n"
504
    "xgetbv\n"
505
    : "=a" (xcr0)
506
    :
507
#if defined(SLJIT_CONFIG_X86_32) && SLJIT_CONFIG_X86_32
508
    : "ecx", "edx"
509
#else /* !SLJIT_CONFIG_X86_32 */
510
    : "rcx", "rdx"
511
#endif /* SLJIT_CONFIG_X86_32 */
512
  );
513
514
#elif defined(_MSC_VER)
515
516
  /* Intel syntax. */
517
  __asm {
518
    xor ecx, ecx
519
    xgetbv
520
    mov xcr0, eax
521
  }
522
523
#else
524
525
1
  __asm__ (
526
1
    "xor{l %%ecx, %%ecx | ecx, ecx}\n"
527
1
    "xgetbv\n"
528
1
    : "=a" (xcr0)
529
1
    :
530
#if defined(SLJIT_CONFIG_X86_32) && SLJIT_CONFIG_X86_32
531
    : "ecx", "edx"
532
#else /* !SLJIT_CONFIG_X86_32 */
533
1
    : "rcx", "rdx"
534
1
#endif /* SLJIT_CONFIG_X86_32 */
535
1
  );
536
537
1
#endif
538
1
  return xcr0;
539
1
}
540
541
static void get_cpu_features(void)
542
1
{
543
1
  sljit_u32 feature_list = CPU_FEATURE_DETECTED;
544
1
  sljit_u32 info[4] = {0};
545
1
  sljit_u32 max_id;
546
547
1
  execute_cpu_id(info);
548
1
  max_id = info[0];
549
550
1
  if (max_id >= 7) {
551
1
    info[0] = 7;
552
1
    info[2] = 0;
553
1
    execute_cpu_id(info);
554
555
1
    if (info[1] & 0x8)
556
1
      feature_list |= CPU_FEATURE_TZCNT;
557
1
    if (info[1] & 0x20)
558
1
      feature_list |= CPU_FEATURE_AVX2;
559
1
  }
560
561
1
  if (max_id >= 1) {
562
1
    info[0] = 1;
563
#if defined(SLJIT_CONFIG_X86_32) && SLJIT_CONFIG_X86_32
564
    /* Winchip 2 and Cyrix MII bugs */
565
    info[1] = info[2] = 0;
566
#endif
567
1
    execute_cpu_id(info);
568
569
1
    if (info[2] & 0x80000)
570
1
      feature_list |= CPU_FEATURE_SSE41;
571
1
    if (info[2] & 0x8000000)
572
1
      feature_list |= CPU_FEATURE_OSXSAVE;
573
1
    if (info[2] & 0x10000000)
574
1
      feature_list |= CPU_FEATURE_AVX;
575
#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
576
    if (info[3] & 0x4000000)
577
      feature_list |= CPU_FEATURE_SSE2;
578
#endif
579
1
    if (info[3] & 0x8000)
580
1
      feature_list |= CPU_FEATURE_CMOV;
581
1
  }
582
583
1
  info[0] = 0x80000000;
584
1
  execute_cpu_id(info);
585
1
  max_id = info[0];
586
587
1
  if (max_id >= 0x80000001) {
588
1
    info[0] = 0x80000001;
589
1
    execute_cpu_id(info);
590
591
1
    if (info[2] & 0x20)
592
1
      feature_list |= CPU_FEATURE_LZCNT;
593
1
  }
594
595
1
  if ((feature_list & CPU_FEATURE_OSXSAVE) && (execute_get_xcr0_low() & 0x4) == 0)
596
0
    feature_list &= ~(sljit_u32)(CPU_FEATURE_AVX | CPU_FEATURE_AVX2);
597
598
1
  cpu_feature_list = feature_list;
599
1
}
600
601
static sljit_u8 get_jump_code(sljit_uw type)
602
454M
{
603
454M
  switch (type) {
604
91.6M
  case SLJIT_EQUAL:
605
91.6M
  case SLJIT_ATOMIC_STORED:
606
91.6M
  case SLJIT_F_EQUAL:
607
91.6M
  case SLJIT_UNORDERED_OR_EQUAL:
608
91.6M
    return 0x84 /* je */;
609
610
182M
  case SLJIT_NOT_EQUAL:
611
182M
  case SLJIT_ATOMIC_NOT_STORED:
612
182M
  case SLJIT_F_NOT_EQUAL:
613
182M
  case SLJIT_ORDERED_NOT_EQUAL:
614
182M
    return 0x85 /* jne */;
615
616
61.4M
  case SLJIT_LESS:
617
61.4M
  case SLJIT_CARRY:
618
61.4M
  case SLJIT_F_LESS:
619
61.4M
  case SLJIT_UNORDERED_OR_LESS:
620
61.4M
  case SLJIT_UNORDERED_OR_GREATER:
621
61.4M
    return 0x82 /* jc */;
622
623
53.3M
  case SLJIT_GREATER_EQUAL:
624
53.3M
  case SLJIT_NOT_CARRY:
625
53.3M
  case SLJIT_F_GREATER_EQUAL:
626
53.3M
  case SLJIT_ORDERED_GREATER_EQUAL:
627
53.3M
  case SLJIT_ORDERED_LESS_EQUAL:
628
53.3M
    return 0x83 /* jae */;
629
630
52.0M
  case SLJIT_GREATER:
631
52.0M
  case SLJIT_F_GREATER:
632
52.0M
  case SLJIT_ORDERED_LESS:
633
52.0M
  case SLJIT_ORDERED_GREATER:
634
52.0M
    return 0x87 /* jnbe */;
635
636
7.87M
  case SLJIT_LESS_EQUAL:
637
7.87M
  case SLJIT_F_LESS_EQUAL:
638
7.87M
  case SLJIT_UNORDERED_OR_GREATER_EQUAL:
639
7.87M
  case SLJIT_UNORDERED_OR_LESS_EQUAL:
640
7.87M
    return 0x86 /* jbe */;
641
642
0
  case SLJIT_SIG_LESS:
643
0
    return 0x8c /* jl */;
644
645
0
  case SLJIT_SIG_GREATER_EQUAL:
646
0
    return 0x8d /* jnl */;
647
648
6.19M
  case SLJIT_SIG_GREATER:
649
6.19M
    return 0x8f /* jnle */;
650
651
4.70k
  case SLJIT_SIG_LESS_EQUAL:
652
4.70k
    return 0x8e /* jle */;
653
654
0
  case SLJIT_OVERFLOW:
655
0
    return 0x80 /* jo */;
656
657
0
  case SLJIT_NOT_OVERFLOW:
658
0
    return 0x81 /* jno */;
659
660
0
  case SLJIT_UNORDERED:
661
0
  case SLJIT_ORDERED_EQUAL: /* NaN. */
662
0
    return 0x8a /* jp */;
663
664
0
  case SLJIT_ORDERED:
665
0
  case SLJIT_UNORDERED_OR_NOT_EQUAL: /* Not NaN. */
666
0
    return 0x8b /* jpo */;
667
454M
  }
668
0
  return 0;
669
454M
}
670
671
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
672
static sljit_u8* detect_far_jump_type(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_sw executable_offset);
673
#else /* !SLJIT_CONFIG_X86_32 */
674
static sljit_u8* detect_far_jump_type(struct sljit_jump *jump, sljit_u8 *code_ptr);
675
static sljit_u8* generate_mov_addr_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_sw executable_offset);
676
#endif /* SLJIT_CONFIG_X86_32 */
677
678
static sljit_u8* detect_near_jump_type(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_sw executable_offset)
679
548M
{
680
548M
  sljit_uw type = jump->flags >> TYPE_SHIFT;
681
548M
  sljit_s32 short_jump;
682
548M
  sljit_uw label_addr;
683
548M
  sljit_uw jump_addr;
684
685
548M
  jump_addr = (sljit_uw)code_ptr;
686
548M
  if (!(jump->flags & JUMP_ADDR)) {
687
541M
    label_addr = (sljit_uw)(code + jump->u.label->size);
688
689
541M
    if (jump->u.label->size > jump->addr)
690
476M
      jump_addr = (sljit_uw)(code + jump->addr);
691
541M
  } else
692
6.82M
    label_addr = jump->u.target - (sljit_uw)executable_offset;
693
694
548M
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
695
548M
  if ((sljit_sw)(label_addr - (jump_addr + 6)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump_addr + 5)) < HALFWORD_MIN)
696
6.82M
    return detect_far_jump_type(jump, code_ptr);
697
541M
#endif /* SLJIT_CONFIG_X86_64 */
698
699
541M
  short_jump = (sljit_sw)(label_addr - (jump_addr + 2)) >= -0x80 && (sljit_sw)(label_addr - (jump_addr + 2)) <= 0x7f;
700
701
541M
  if (type == SLJIT_JUMP) {
702
81.4M
    if (short_jump)
703
24.0M
      *code_ptr++ = JMP_i8;
704
57.3M
    else
705
57.3M
      *code_ptr++ = JMP_i32;
706
460M
  } else if (type > SLJIT_JUMP) {
707
34.3M
    short_jump = 0;
708
34.3M
    *code_ptr++ = CALL_i32;
709
425M
  } else if (short_jump) {
710
146M
    *code_ptr++ = U8(get_jump_code(type) - 0x10);
711
279M
  } else {
712
279M
    *code_ptr++ = GROUP_0F;
713
279M
    *code_ptr++ = get_jump_code(type);
714
279M
  }
715
716
541M
  jump->addr = (sljit_uw)code_ptr;
717
718
541M
  if (short_jump) {
719
170M
    jump->flags |= PATCH_MB;
720
170M
    code_ptr += sizeof(sljit_s8);
721
371M
  } else {
722
371M
    jump->flags |= PATCH_MW;
723
371M
    code_ptr += sizeof(sljit_s32);
724
371M
  }
725
726
541M
  return code_ptr;
727
548M
}
728
729
static void generate_jump_or_mov_addr(struct sljit_jump *jump, sljit_sw executable_offset)
730
551M
{
731
551M
  sljit_uw flags = jump->flags;
732
551M
  sljit_uw addr = (flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr;
733
551M
  sljit_uw jump_addr = jump->addr;
734
551M
  SLJIT_UNUSED_ARG(executable_offset);
735
736
551M
  if (SLJIT_UNLIKELY(flags & JUMP_MOV_ADDR)) {
737
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
738
    sljit_unaligned_store_sw((void*)(jump_addr - sizeof(sljit_sw)), (sljit_sw)addr);
739
#else /* SLJIT_CONFIG_X86_32 */
740
2.69M
    if (flags & PATCH_MD) {
741
0
      SLJIT_ASSERT(addr > HALFWORD_MAX);
742
0
      sljit_unaligned_store_sw((void*)(jump_addr - sizeof(sljit_sw)), (sljit_sw)addr);
743
0
      return;
744
0
    }
745
746
2.69M
    if (flags & PATCH_MW) {
747
2.69M
      addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET((sljit_u8*)jump_addr, executable_offset);
748
2.69M
      SLJIT_ASSERT((sljit_sw)addr <= HALFWORD_MAX && (sljit_sw)addr >= HALFWORD_MIN);
749
2.69M
    } else {
750
0
      SLJIT_ASSERT(addr <= HALFWORD_MAX);
751
0
    }
752
2.69M
    sljit_unaligned_store_s32((void*)(jump_addr - sizeof(sljit_s32)), (sljit_s32)addr);
753
2.69M
#endif /* !SLJIT_CONFIG_X86_32 */
754
2.69M
    return;
755
2.69M
  }
756
757
548M
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
758
548M
  if (SLJIT_UNLIKELY(flags & PATCH_MD)) {
759
0
    SLJIT_ASSERT(!(flags & JUMP_ADDR));
760
0
    sljit_unaligned_store_sw((void*)jump_addr, (sljit_sw)addr);
761
0
    return;
762
0
  }
763
548M
#endif /* SLJIT_CONFIG_X86_64 */
764
765
548M
  addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET((sljit_u8*)jump_addr, executable_offset);
766
767
548M
  if (flags & PATCH_MB) {
768
170M
    addr -= sizeof(sljit_s8);
769
170M
    SLJIT_ASSERT((sljit_sw)addr <= 0x7f && (sljit_sw)addr >= -0x80);
770
170M
    *(sljit_u8*)jump_addr = U8(addr);
771
170M
    return;
772
378M
  } else if (flags & PATCH_MW) {
773
371M
    addr -= sizeof(sljit_s32);
774
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
775
    sljit_unaligned_store_sw((void*)jump_addr, (sljit_sw)addr);
776
#else /* !SLJIT_CONFIG_X86_32 */
777
371M
    SLJIT_ASSERT((sljit_sw)addr <= HALFWORD_MAX && (sljit_sw)addr >= HALFWORD_MIN);
778
371M
    sljit_unaligned_store_s32((void*)jump_addr, (sljit_s32)addr);
779
371M
#endif /* SLJIT_CONFIG_X86_32 */
780
371M
  }
781
548M
}
782
783
static sljit_u8 *process_extended_label(sljit_u8 *code_ptr, struct sljit_extended_label *ext_label)
784
0
{
785
0
  sljit_uw mask;
786
0
  sljit_u8 *ptr = code_ptr;
787
788
0
  SLJIT_ASSERT(ext_label->label.u.index == SLJIT_LABEL_ALIGNED);
789
0
  mask = ext_label->data;
790
791
0
  code_ptr = (sljit_u8*)(((sljit_uw)code_ptr + mask) & ~mask);
792
793
0
  while (ptr < code_ptr)
794
0
    *ptr++ = NOP;
795
796
0
  return code_ptr;
797
0
}
798
799
static void reduce_code_size(struct sljit_compiler *compiler)
800
59.4k
{
801
59.4k
  struct sljit_label *label;
802
59.4k
  struct sljit_jump *jump;
803
59.4k
  sljit_uw next_label_size;
804
59.4k
  sljit_uw next_jump_addr;
805
59.4k
  sljit_uw next_min_addr;
806
59.4k
  sljit_uw size_reduce = 0;
807
59.4k
  sljit_sw diff;
808
59.4k
  sljit_uw type;
809
#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
810
  sljit_uw size_reduce_max;
811
#endif /* SLJIT_DEBUG */
812
813
59.4k
  label = compiler->labels;
814
59.4k
  jump = compiler->jumps;
815
816
59.4k
  next_label_size = SLJIT_GET_NEXT_SIZE(label);
817
59.4k
  next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump);
818
819
709M
  while (1) {
820
709M
    next_min_addr = next_label_size;
821
709M
    if (next_jump_addr < next_min_addr)
822
522M
      next_min_addr = next_jump_addr;
823
824
709M
    if (next_min_addr == SLJIT_MAX_ADDRESS)
825
59.4k
      break;
826
827
709M
    if (next_min_addr == next_label_size) {
828
186M
      label->size -= size_reduce;
829
830
186M
      label = label->next;
831
186M
      next_label_size = SLJIT_GET_NEXT_SIZE(label);
832
186M
    }
833
834
709M
    if (next_min_addr != next_jump_addr)
835
158M
      continue;
836
837
551M
    jump->addr -= size_reduce;
838
551M
    if (!(jump->flags & JUMP_MOV_ADDR)) {
839
#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
840
      size_reduce_max = size_reduce + (((jump->flags >> TYPE_SHIFT) < SLJIT_JUMP) ? CJUMP_MAX_SIZE : JUMP_MAX_SIZE);
841
#endif /* SLJIT_DEBUG */
842
843
548M
      if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) {
844
548M
        if (jump->flags & JUMP_ADDR) {
845
6.82M
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
846
6.82M
          if (jump->u.target <= 0xffffffffl)
847
0
            size_reduce += sizeof(sljit_s32);
848
6.82M
#endif /* SLJIT_CONFIG_X86_64 */
849
541M
        } else {
850
          /* Unit size: instruction. */
851
541M
          diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr;
852
541M
          if (jump->u.label->size > jump->addr) {
853
476M
            SLJIT_ASSERT(jump->u.label->size - size_reduce >= jump->addr);
854
476M
            diff -= (sljit_sw)size_reduce;
855
476M
          }
856
541M
          type = jump->flags >> TYPE_SHIFT;
857
858
541M
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
859
541M
          if (type == SLJIT_JUMP) {
860
81.4M
            if (diff <= 0x7f + 2 && diff >= -0x80 + 2)
861
23.4M
              size_reduce += JUMP_MAX_SIZE - 2;
862
57.9M
            else if (diff <= HALFWORD_MAX + 5 && diff >= HALFWORD_MIN + 5)
863
57.9M
              size_reduce += JUMP_MAX_SIZE - 5;
864
460M
          } else if (type < SLJIT_JUMP) {
865
425M
            if (diff <= 0x7f + 2 && diff >= -0x80 + 2)
866
119M
              size_reduce += CJUMP_MAX_SIZE - 2;
867
306M
            else if (diff <= HALFWORD_MAX + 6 && diff >= HALFWORD_MIN + 6)
868
306M
              size_reduce += CJUMP_MAX_SIZE - 6;
869
425M
          } else  {
870
34.3M
            if (diff <= HALFWORD_MAX + 5 && diff >= HALFWORD_MIN + 5)
871
34.3M
              size_reduce += JUMP_MAX_SIZE - 5;
872
34.3M
          }
873
#else /* !SLJIT_CONFIG_X86_64 */
874
          if (type == SLJIT_JUMP) {
875
            if (diff <= 0x7f + 2 && diff >= -0x80 + 2)
876
              size_reduce += JUMP_MAX_SIZE - 2;
877
          } else if (type < SLJIT_JUMP) {
878
            if (diff <= 0x7f + 2 && diff >= -0x80 + 2)
879
              size_reduce += CJUMP_MAX_SIZE - 2;
880
          }
881
#endif /* SLJIT_CONFIG_X86_64 */
882
541M
        }
883
548M
      }
884
885
#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
886
      jump->flags |= (size_reduce_max - size_reduce) << JUMP_SIZE_SHIFT;
887
#endif /* SLJIT_DEBUG */
888
548M
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
889
548M
    } else {
890
#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
891
      size_reduce_max = size_reduce + 10;
892
#endif /* SLJIT_DEBUG */
893
894
2.69M
      if (!(jump->flags & JUMP_ADDR)) {
895
2.69M
        diff = (sljit_sw)jump->u.label->size - (sljit_sw)(jump->addr - 3);
896
897
2.69M
        if (diff <= HALFWORD_MAX && diff >= HALFWORD_MIN)
898
2.69M
          size_reduce += 3;
899
2.69M
      } else if (jump->u.target <= 0xffffffffl)
900
0
        size_reduce += (jump->flags & MOV_ADDR_HI) ? 4 : 5;
901
902
#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
903
      jump->flags |= (size_reduce_max - size_reduce) << JUMP_SIZE_SHIFT;
904
#endif /* SLJIT_DEBUG */
905
2.69M
#endif /* SLJIT_CONFIG_X86_64 */
906
2.69M
    }
907
908
551M
    jump = jump->next;
909
551M
    next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump);
910
551M
  }
911
912
59.4k
  compiler->size -= size_reduce;
913
59.4k
}
914
915
SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler, sljit_s32 options, void *exec_allocator_data)
916
59.4k
{
917
59.4k
  struct sljit_memory_fragment *buf;
918
59.4k
  sljit_u8 *code;
919
59.4k
  sljit_u8 *code_ptr;
920
59.4k
  sljit_u8 *buf_ptr;
921
59.4k
  sljit_u8 *buf_end;
922
59.4k
  sljit_u8 len;
923
59.4k
  sljit_sw executable_offset;
924
#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
925
  sljit_uw addr;
926
#endif /* SLJIT_DEBUG */
927
928
59.4k
  struct sljit_label *label;
929
59.4k
  struct sljit_jump *jump;
930
59.4k
  struct sljit_const *const_;
931
932
59.4k
  CHECK_ERROR_PTR();
933
59.4k
  CHECK_PTR(check_sljit_generate_code(compiler));
934
935
59.4k
  reduce_code_size(compiler);
936
937
  /* Second code generation pass. */
938
59.4k
  code = (sljit_u8*)allocate_executable_memory(compiler->size, options, exec_allocator_data, &executable_offset);
939
59.4k
  PTR_FAIL_WITH_EXEC_IF(code);
940
941
59.4k
  reverse_buf(compiler);
942
59.4k
  buf = compiler->buf;
943
944
59.4k
  code_ptr = code;
945
59.4k
  label = compiler->labels;
946
59.4k
  jump = compiler->jumps;
947
59.4k
  const_ = compiler->consts;
948
949
2.16M
  do {
950
2.16M
    buf_ptr = buf->memory;
951
2.16M
    buf_end = buf_ptr + buf->used_size;
952
2.07G
    do {
953
2.07G
      len = *buf_ptr++;
954
2.07G
      SLJIT_ASSERT(len > 0);
955
2.07G
      if (len < SLJIT_INST_CONST) {
956
        /* The code is already generated. */
957
1.33G
        SLJIT_MEMCPY(code_ptr, buf_ptr, len);
958
1.33G
        code_ptr += len;
959
1.33G
        buf_ptr += len;
960
1.33G
      } else {
961
737M
        switch (len) {
962
186M
        case SLJIT_INST_LABEL:
963
186M
          if (label->u.index >= SLJIT_LABEL_ALIGNED)
964
0
            code_ptr = process_extended_label(code_ptr, (struct sljit_extended_label*)label);
965
966
186M
          label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
967
186M
          label->size = (sljit_uw)(code_ptr - code);
968
186M
          label = label->next;
969
186M
          break;
970
548M
        case SLJIT_INST_JUMP:
971
#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
972
          addr = (sljit_uw)code_ptr;
973
#endif /* SLJIT_DEBUG */
974
548M
          if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
975
548M
            code_ptr = detect_near_jump_type(jump, code_ptr, code, executable_offset);
976
0
          else {
977
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
978
            code_ptr = detect_far_jump_type(jump, code_ptr, executable_offset);
979
#else /* !SLJIT_CONFIG_X86_32 */
980
0
            code_ptr = detect_far_jump_type(jump, code_ptr);
981
0
#endif /* SLJIT_CONFIG_X86_32 */
982
0
          }
983
984
548M
          SLJIT_ASSERT((sljit_uw)code_ptr - addr <= ((jump->flags >> JUMP_SIZE_SHIFT) & 0xff));
985
548M
          jump = jump->next;
986
548M
          break;
987
2.69M
        case SLJIT_INST_MOV_ADDR:
988
2.69M
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
989
2.69M
          code_ptr = generate_mov_addr_code(jump, code_ptr, code, executable_offset);
990
2.69M
#endif /* SLJIT_CONFIG_X86_64 */
991
2.69M
          jump->addr = (sljit_uw)code_ptr;
992
2.69M
          jump = jump->next;
993
2.69M
          break;
994
0
        default:
995
0
          SLJIT_ASSERT(len == SLJIT_INST_CONST);
996
0
          const_->addr = (sljit_uw)code_ptr;
997
0
          const_ = const_->next;
998
0
          break;
999
737M
        }
1000
737M
      }
1001
2.07G
    } while (buf_ptr < buf_end);
1002
1003
2.16M
    SLJIT_ASSERT(buf_ptr == buf_end);
1004
2.16M
    buf = buf->next;
1005
2.16M
  } while (buf);
1006
1007
59.4k
  SLJIT_ASSERT(!label);
1008
59.4k
  SLJIT_ASSERT(!jump);
1009
59.4k
  SLJIT_ASSERT(!const_);
1010
59.4k
  SLJIT_ASSERT(code_ptr <= code + compiler->size);
1011
1012
59.4k
  jump = compiler->jumps;
1013
551M
  while (jump) {
1014
551M
    generate_jump_or_mov_addr(jump, executable_offset);
1015
551M
    jump = jump->next;
1016
551M
  }
1017
1018
59.4k
  compiler->error = SLJIT_ERR_COMPILED;
1019
59.4k
  compiler->executable_offset = executable_offset;
1020
59.4k
  compiler->executable_size = (sljit_uw)(code_ptr - code);
1021
1022
59.4k
  code = (sljit_u8*)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
1023
1024
59.4k
  SLJIT_UPDATE_WX_FLAGS(code, (sljit_u8*)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset), 1);
1025
59.4k
  return (void*)code;
1026
59.4k
}
1027
1028
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
1029
33.8M
{
1030
33.8M
  switch (feature_type) {
1031
0
  case SLJIT_HAS_FPU:
1032
#ifdef SLJIT_IS_FPU_AVAILABLE
1033
    return (SLJIT_IS_FPU_AVAILABLE) != 0;
1034
#elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
1035
    if (cpu_feature_list == 0)
1036
      get_cpu_features();
1037
    return (cpu_feature_list & CPU_FEATURE_SSE2) != 0;
1038
#else /* SLJIT_DETECT_SSE2 */
1039
0
    return 1;
1040
0
#endif /* SLJIT_DETECT_SSE2 */
1041
1042
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1043
  case SLJIT_HAS_VIRTUAL_REGISTERS:
1044
    return 1;
1045
#endif /* SLJIT_CONFIG_X86_32 */
1046
1047
0
  case SLJIT_HAS_CLZ:
1048
0
    if (cpu_feature_list == 0)
1049
0
      get_cpu_features();
1050
1051
0
    return (cpu_feature_list & CPU_FEATURE_LZCNT) ? 1 : 2;
1052
1053
0
  case SLJIT_HAS_CTZ:
1054
0
    if (cpu_feature_list == 0)
1055
0
      get_cpu_features();
1056
1057
0
    return (cpu_feature_list & CPU_FEATURE_TZCNT) ? 1 : 2;
1058
1059
33.7M
  case SLJIT_HAS_CMOV:
1060
33.7M
    if (cpu_feature_list == 0)
1061
0
      get_cpu_features();
1062
33.7M
    return (cpu_feature_list & CPU_FEATURE_CMOV) != 0;
1063
1064
0
  case SLJIT_HAS_REV:
1065
0
  case SLJIT_HAS_ROT:
1066
0
  case SLJIT_HAS_PREFETCH:
1067
0
  case SLJIT_HAS_COPY_F32:
1068
0
  case SLJIT_HAS_COPY_F64:
1069
0
  case SLJIT_HAS_ATOMIC:
1070
0
  case SLJIT_HAS_MEMORY_BARRIER:
1071
0
    return 1;
1072
1073
0
#if !(defined SLJIT_IS_FPU_AVAILABLE) || SLJIT_IS_FPU_AVAILABLE
1074
0
  case SLJIT_HAS_AVX:
1075
0
    if (cpu_feature_list == 0)
1076
0
      get_cpu_features();
1077
0
    return (cpu_feature_list & CPU_FEATURE_AVX) != 0;
1078
0
  case SLJIT_HAS_AVX2:
1079
0
    if (cpu_feature_list == 0)
1080
0
      get_cpu_features();
1081
0
    return (cpu_feature_list & CPU_FEATURE_AVX2) != 0;
1082
53.8k
  case SLJIT_HAS_SIMD:
1083
53.8k
    if (cpu_feature_list == 0)
1084
0
      get_cpu_features();
1085
53.8k
    return (cpu_feature_list & CPU_FEATURE_SSE41) != 0;
1086
0
#endif /* SLJIT_IS_FPU_AVAILABLE */
1087
13.1k
  default:
1088
13.1k
    return 0;
1089
33.8M
  }
1090
33.8M
}
1091
1092
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type)
1093
0
{
1094
0
  switch (type) {
1095
0
  case SLJIT_ORDERED_EQUAL:
1096
0
  case SLJIT_UNORDERED_OR_NOT_EQUAL:
1097
0
    return 2;
1098
0
  }
1099
0
1100
0
  return 0;
1101
0
}
1102
1103
/* --------------------------------------------------------------------- */
1104
/*  Operators                                                            */
1105
/* --------------------------------------------------------------------- */
1106
1107
211M
#define BINARY_OPCODE(opcode) (((opcode ## _EAX_i32) << 24) | ((opcode ## _r_rm) << 16) | ((opcode ## _rm_r) << 8) | (opcode))
1108
1109
#define BINARY_IMM32(op_imm, immw, arg, argw) \
1110
327M
  do { \
1111
327M
    inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1112
327M
    FAIL_IF(!inst); \
1113
327M
    *(inst + 1) |= (op_imm); \
1114
327M
  } while (0)
1115
1116
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1117
1118
#define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1119
328M
  do { \
1120
328M
    if (IS_HALFWORD(immw) || compiler->mode32) { \
1121
327M
      BINARY_IMM32(op_imm, immw, arg, argw); \
1122
327M
    } \
1123
328M
    else { \
1124
1.45M
      FAIL_IF(emit_load_imm64(compiler, FAST_IS_REG(arg) ? TMP_REG2 : TMP_REG1, immw)); \
1125
1.45M
      inst = emit_x86_instruction(compiler, 1, FAST_IS_REG(arg) ? TMP_REG2 : TMP_REG1, 0, arg, argw); \
1126
1.45M
      FAIL_IF(!inst); \
1127
1.45M
      *inst = (op_mr); \
1128
1.45M
    } \
1129
328M
  } while (0)
1130
1131
#define BINARY_EAX_IMM(op_eax_imm, immw) \
1132
70.4M
  FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw))
1133
1134
#else /* !SLJIT_CONFIG_X86_64 */
1135
1136
#define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1137
  BINARY_IMM32(op_imm, immw, arg, argw)
1138
1139
#define BINARY_EAX_IMM(op_eax_imm, immw) \
1140
  FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw))
1141
1142
#endif /* SLJIT_CONFIG_X86_64 */
1143
1144
static sljit_s32 emit_byte(struct sljit_compiler *compiler, sljit_u8 byte)
1145
168k
{
1146
168k
  sljit_u8 *inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1147
168k
  FAIL_IF(!inst);
1148
168k
  INC_SIZE(1);
1149
168k
  *inst = byte;
1150
168k
  return SLJIT_SUCCESS;
1151
168k
}
1152
1153
static sljit_s32 emit_mov(struct sljit_compiler *compiler,
1154
  sljit_s32 dst, sljit_sw dstw,
1155
  sljit_s32 src, sljit_sw srcw);
1156
1157
#define EMIT_MOV(compiler, dst, dstw, src, srcw) \
1158
406M
  FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
1159
1160
static sljit_s32 emit_groupf(struct sljit_compiler *compiler,
1161
  sljit_uw op,
1162
  sljit_s32 dst, sljit_s32 src, sljit_sw srcw);
1163
1164
static sljit_s32 emit_groupf_ext(struct sljit_compiler *compiler,
1165
  sljit_uw op,
1166
  sljit_s32 dst, sljit_s32 src, sljit_sw srcw);
1167
1168
static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,
1169
  sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src);
1170
1171
static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,
1172
  sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw);
1173
1174
static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler,
1175
  sljit_s32 src1, sljit_sw src1w,
1176
  sljit_s32 src2, sljit_sw src2w);
1177
1178
static sljit_s32 emit_cmov_generic(struct sljit_compiler *compiler, sljit_s32 type,
1179
  sljit_s32 dst_reg,
1180
  sljit_s32 src, sljit_sw srcw);
1181
1182
static SLJIT_INLINE sljit_s32 emit_endbranch(struct sljit_compiler *compiler)
1183
2.75M
{
1184
#if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET)
1185
  /* Emit endbr32/endbr64 when CET is enabled.  */
1186
  sljit_u8 *inst;
1187
  inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1188
  FAIL_IF(!inst);
1189
  INC_SIZE(4);
1190
  inst[0] = GROUP_F3;
1191
  inst[1] = GROUP_0F;
1192
  inst[2] = 0x1e;
1193
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1194
  inst[3] = 0xfb;
1195
#else /* !SLJIT_CONFIG_X86_32 */
1196
  inst[3] = 0xfa;
1197
#endif /* SLJIT_CONFIG_X86_32 */
1198
#else /* !SLJIT_CONFIG_X86_CET */
1199
2.75M
  SLJIT_UNUSED_ARG(compiler);
1200
2.75M
#endif /* SLJIT_CONFIG_X86_CET */
1201
2.75M
  return SLJIT_SUCCESS;
1202
2.75M
}
1203
1204
#if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) && defined (__SHSTK__)
1205
1206
static SLJIT_INLINE sljit_s32 emit_rdssp(struct sljit_compiler *compiler, sljit_s32 reg)
1207
{
1208
  sljit_u8 *inst;
1209
  sljit_s32 size;
1210
1211
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1212
  size = 5;
1213
#else
1214
  size = 4;
1215
#endif
1216
1217
  inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
1218
  FAIL_IF(!inst);
1219
  INC_SIZE(size);
1220
  *inst++ = GROUP_F3;
1221
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1222
  *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : REX_B);
1223
#endif
1224
  inst[0] = GROUP_0F;
1225
  inst[1] = 0x1e;
1226
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1227
  inst[2] = U8(MOD_REG | (0x1 << 3) | reg_lmap[reg]);
1228
#else
1229
  inst[2] = U8(MOD_REG | (0x1 << 3) | reg_map[reg]);
1230
#endif
1231
  return SLJIT_SUCCESS;
1232
}
1233
1234
static SLJIT_INLINE sljit_s32 emit_incssp(struct sljit_compiler *compiler, sljit_s32 reg)
1235
{
1236
  sljit_u8 *inst;
1237
  sljit_s32 size;
1238
1239
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1240
  size = 5;
1241
#else
1242
  size = 4;
1243
#endif
1244
1245
  inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
1246
  FAIL_IF(!inst);
1247
  INC_SIZE(size);
1248
  *inst++ = GROUP_F3;
1249
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1250
  *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : REX_B);
1251
#endif
1252
  inst[0] = GROUP_0F;
1253
  inst[1] = 0xae;
1254
  inst[2] = (0x3 << 6) | (0x5 << 3) | (reg_map[reg] & 0x7);
1255
  return SLJIT_SUCCESS;
1256
}
1257
1258
#endif /* SLJIT_CONFIG_X86_CET && __SHSTK__ */
1259
1260
static SLJIT_INLINE sljit_s32 cpu_has_shadow_stack(void)
1261
59.4k
{
1262
#if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) && defined (__SHSTK__)
1263
  return _get_ssp() != 0;
1264
#else /* !SLJIT_CONFIG_X86_CET || !__SHSTK__ */
1265
59.4k
  return 0;
1266
59.4k
#endif /* SLJIT_CONFIG_X86_CET && __SHSTK__ */
1267
59.4k
}
1268
1269
static SLJIT_INLINE sljit_s32 adjust_shadow_stack(struct sljit_compiler *compiler,
1270
  sljit_s32 src, sljit_sw srcw)
1271
0
{
1272
#if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) && defined (__SHSTK__)
1273
  sljit_u8 *inst, *jz_after_cmp_inst;
1274
  sljit_uw size_jz_after_cmp_inst;
1275
1276
  sljit_uw size_before_rdssp_inst = compiler->size;
1277
1278
  /* Generate "RDSSP TMP_REG1". */
1279
  FAIL_IF(emit_rdssp(compiler, TMP_REG1));
1280
1281
  /* Load return address on shadow stack into TMP_REG1. */
1282
  EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(TMP_REG1), 0);
1283
1284
  /* Compare return address against TMP_REG1. */
1285
  FAIL_IF(emit_cmp_binary (compiler, TMP_REG1, 0, src, srcw));
1286
1287
  /* Generate JZ to skip shadow stack ajdustment when shadow
1288
     stack matches normal stack. */
1289
  inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
1290
  FAIL_IF(!inst);
1291
  INC_SIZE(2);
1292
  *inst++ = get_jump_code(SLJIT_EQUAL) - 0x10;
1293
  size_jz_after_cmp_inst = compiler->size;
1294
  jz_after_cmp_inst = inst;
1295
1296
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1297
  /* REX_W is not necessary. */
1298
  compiler->mode32 = 1;
1299
#endif
1300
  /* Load 1 into TMP_REG1. */
1301
  EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1);
1302
1303
  /* Generate "INCSSP TMP_REG1". */
1304
  FAIL_IF(emit_incssp(compiler, TMP_REG1));
1305
1306
  /* Jump back to "RDSSP TMP_REG1" to check shadow stack again. */
1307
  inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
1308
  FAIL_IF(!inst);
1309
  INC_SIZE(2);
1310
  inst[0] = JMP_i8;
1311
  inst[1] = size_before_rdssp_inst - compiler->size;
1312
1313
  *jz_after_cmp_inst = compiler->size - size_jz_after_cmp_inst;
1314
#else /* !SLJIT_CONFIG_X86_CET || !__SHSTK__ */
1315
0
  SLJIT_UNUSED_ARG(compiler);
1316
0
  SLJIT_UNUSED_ARG(src);
1317
0
  SLJIT_UNUSED_ARG(srcw);
1318
0
#endif /* SLJIT_CONFIG_X86_CET && __SHSTK__ */
1319
0
  return SLJIT_SUCCESS;
1320
0
}
1321
1322
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1323
#include "sljitNativeX86_32.c"
1324
#else
1325
#include "sljitNativeX86_64.c"
1326
#endif
1327
1328
static sljit_s32 emit_mov(struct sljit_compiler *compiler,
1329
  sljit_s32 dst, sljit_sw dstw,
1330
  sljit_s32 src, sljit_sw srcw)
1331
416M
{
1332
416M
  sljit_u8* inst;
1333
1334
416M
  if (FAST_IS_REG(src)) {
1335
183M
    inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);
1336
183M
    FAIL_IF(!inst);
1337
183M
    *inst = MOV_rm_r;
1338
183M
    return SLJIT_SUCCESS;
1339
183M
  }
1340
1341
233M
  if (src == SLJIT_IMM) {
1342
83.8M
    if (FAST_IS_REG(dst)) {
1343
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1344
      return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw);
1345
#else
1346
26.2M
      if (!compiler->mode32) {
1347
26.1M
        if (NOT_HALFWORD(srcw))
1348
3.88M
          return emit_load_imm64(compiler, dst, srcw);
1349
26.1M
      }
1350
36.0k
      else
1351
36.0k
        return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, U8(MOV_r_i32 | reg_lmap[dst]), srcw);
1352
26.2M
#endif
1353
26.2M
    }
1354
79.9M
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1355
79.9M
    if (!compiler->mode32 && NOT_HALFWORD(srcw)) {
1356
      /* Immediate to memory move. Only SLJIT_MOV operation copies
1357
         an immediate directly into memory so TMP_REG1 can be used. */
1358
90.4k
      FAIL_IF(emit_load_imm64(compiler, TMP_REG1, srcw));
1359
90.4k
      inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1360
90.4k
      FAIL_IF(!inst);
1361
90.4k
      *inst = MOV_rm_r;
1362
90.4k
      return SLJIT_SUCCESS;
1363
90.4k
    }
1364
79.8M
#endif
1365
79.8M
    inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw);
1366
79.8M
    FAIL_IF(!inst);
1367
79.8M
    *inst = MOV_rm_i32;
1368
79.8M
    return SLJIT_SUCCESS;
1369
79.8M
  }
1370
149M
  if (FAST_IS_REG(dst)) {
1371
143M
    inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw);
1372
143M
    FAIL_IF(!inst);
1373
143M
    *inst = MOV_r_rm;
1374
143M
    return SLJIT_SUCCESS;
1375
143M
  }
1376
1377
  /* Memory to memory move. Only SLJIT_MOV operation copies
1378
     data from memory to memory so TMP_REG1 can be used. */
1379
5.76M
  inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
1380
5.76M
  FAIL_IF(!inst);
1381
5.76M
  *inst = MOV_r_rm;
1382
5.76M
  inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1383
5.76M
  FAIL_IF(!inst);
1384
5.76M
  *inst = MOV_rm_r;
1385
5.76M
  return SLJIT_SUCCESS;
1386
5.76M
}
1387
1388
static sljit_s32 emit_cmov_generic(struct sljit_compiler *compiler, sljit_s32 type,
1389
  sljit_s32 dst_reg,
1390
  sljit_s32 src, sljit_sw srcw)
1391
0
{
1392
0
  sljit_u8* inst;
1393
0
  sljit_uw size;
1394
1395
0
  SLJIT_ASSERT(type >= SLJIT_EQUAL && type <= SLJIT_ORDERED_LESS_EQUAL);
1396
1397
0
  inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
1398
0
  FAIL_IF(!inst);
1399
0
  INC_SIZE(2);
1400
0
  inst[0] = U8(get_jump_code((sljit_uw)type ^ 0x1) - 0x10);
1401
1402
0
  size = compiler->size;
1403
0
  EMIT_MOV(compiler, dst_reg, 0, src, srcw);
1404
1405
0
  inst[1] = U8(compiler->size - size);
1406
0
  return SLJIT_SUCCESS;
1407
0
}
1408
1409
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
1410
2.75M
{
1411
2.75M
  sljit_u8 *inst;
1412
2.75M
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1413
2.75M
  sljit_uw size;
1414
2.75M
#endif
1415
1416
2.75M
  CHECK_ERROR();
1417
2.75M
  CHECK(check_sljit_emit_op0(compiler, op));
1418
1419
2.75M
  switch (GET_OPCODE(op)) {
1420
0
  case SLJIT_BREAKPOINT:
1421
0
    return emit_byte(compiler, INT3);
1422
0
  case SLJIT_NOP:
1423
0
    return emit_byte(compiler, NOP);
1424
0
  case SLJIT_LMUL_UW:
1425
0
  case SLJIT_LMUL_SW:
1426
0
  case SLJIT_DIVMOD_UW:
1427
0
  case SLJIT_DIVMOD_SW:
1428
0
  case SLJIT_DIV_UW:
1429
0
  case SLJIT_DIV_SW:
1430
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1431
#ifdef _WIN64
1432
    SLJIT_ASSERT(
1433
      reg_map[SLJIT_R0] == 0
1434
      && reg_map[SLJIT_R1] == 2
1435
      && reg_map[TMP_REG1] > 7);
1436
#else
1437
0
    SLJIT_ASSERT(
1438
0
      reg_map[SLJIT_R0] == 0
1439
0
      && reg_map[SLJIT_R1] < 7
1440
0
      && reg_map[TMP_REG1] == 2);
1441
0
#endif
1442
0
    compiler->mode32 = op & SLJIT_32;
1443
0
#endif
1444
0
    SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments);
1445
1446
0
    op = GET_OPCODE(op);
1447
0
    if ((op | 0x2) == SLJIT_DIV_UW) {
1448
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
1449
      EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
1450
      inst = emit_x86_instruction(compiler, 1, SLJIT_R1, 0, SLJIT_R1, 0);
1451
#else
1452
0
      inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
1453
0
#endif
1454
0
      FAIL_IF(!inst);
1455
0
      *inst = XOR_r_rm;
1456
0
    }
1457
1458
0
    if ((op | 0x2) == SLJIT_DIV_SW) {
1459
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
1460
      EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
1461
#endif
1462
1463
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1464
      FAIL_IF(emit_byte(compiler, CDQ));
1465
#else
1466
0
      if (!compiler->mode32) {
1467
0
        inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
1468
0
        FAIL_IF(!inst);
1469
0
        INC_SIZE(2);
1470
0
        inst[0] = REX_W;
1471
0
        inst[1] = CDQ;
1472
0
      } else
1473
0
        FAIL_IF(emit_byte(compiler, CDQ));
1474
0
#endif
1475
0
    }
1476
1477
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1478
    inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
1479
    FAIL_IF(!inst);
1480
    INC_SIZE(2);
1481
    inst[0] = GROUP_F7;
1482
    inst[1] = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]);
1483
#else /* !SLJIT_CONFIG_X86_32 */
1484
#ifdef _WIN64
1485
    size = (!compiler->mode32 || op >= SLJIT_DIVMOD_UW) ? 3 : 2;
1486
#else /* !_WIN64 */
1487
0
    size = (!compiler->mode32) ? 3 : 2;
1488
0
#endif /* _WIN64 */
1489
0
    inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
1490
0
    FAIL_IF(!inst);
1491
0
    INC_SIZE(size);
1492
#ifdef _WIN64
1493
    if (!compiler->mode32)
1494
      *inst++ = REX_W | ((op >= SLJIT_DIVMOD_UW) ? REX_B : 0);
1495
    else if (op >= SLJIT_DIVMOD_UW)
1496
      *inst++ = REX_B;
1497
    inst[0] = GROUP_F7;
1498
    inst[1] = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]);
1499
#else /* !_WIN64 */
1500
0
    if (!compiler->mode32)
1501
0
      *inst++ = REX_W;
1502
0
    inst[0] = GROUP_F7;
1503
0
    inst[1] = MOD_REG | reg_map[SLJIT_R1];
1504
0
#endif /* _WIN64 */
1505
0
#endif /* SLJIT_CONFIG_X86_32 */
1506
0
    switch (op) {
1507
0
    case SLJIT_LMUL_UW:
1508
0
      inst[1] |= MUL;
1509
0
      break;
1510
0
    case SLJIT_LMUL_SW:
1511
0
      inst[1] |= IMUL;
1512
0
      break;
1513
0
    case SLJIT_DIVMOD_UW:
1514
0
    case SLJIT_DIV_UW:
1515
0
      inst[1] |= DIV;
1516
0
      break;
1517
0
    case SLJIT_DIVMOD_SW:
1518
0
    case SLJIT_DIV_SW:
1519
0
      inst[1] |= IDIV;
1520
0
      break;
1521
0
    }
1522
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
1523
0
    if (op <= SLJIT_DIVMOD_SW)
1524
0
      EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
1525
#else
1526
    if (op >= SLJIT_DIV_UW)
1527
      EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
1528
#endif
1529
0
    break;
1530
0
  case SLJIT_MEMORY_BARRIER:
1531
0
    inst = (sljit_u8*)ensure_buf(compiler, 1 + 3);
1532
0
    FAIL_IF(!inst);
1533
0
    INC_SIZE(3);
1534
0
    inst[0] = GROUP_0F;
1535
0
    inst[1] = 0xae;
1536
0
    inst[2] = 0xf0;
1537
0
    return SLJIT_SUCCESS;
1538
2.69M
  case SLJIT_ENDBR:
1539
2.69M
    return emit_endbranch(compiler);
1540
59.4k
  case SLJIT_SKIP_FRAMES_BEFORE_RETURN:
1541
59.4k
    return skip_frames_before_return(compiler);
1542
2.75M
  }
1543
1544
0
  return SLJIT_SUCCESS;
1545
2.75M
}
1546
1547
static sljit_s32 emit_mov_byte(struct sljit_compiler *compiler, sljit_s32 sign,
1548
  sljit_s32 dst, sljit_sw dstw,
1549
  sljit_s32 src, sljit_sw srcw)
1550
111M
{
1551
111M
  sljit_u8* inst;
1552
111M
  sljit_s32 dst_r;
1553
1554
111M
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1555
111M
  compiler->mode32 = 0;
1556
111M
#endif
1557
1558
111M
  if (src == SLJIT_IMM) {
1559
0
    if (FAST_IS_REG(dst)) {
1560
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1561
      return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw);
1562
#else
1563
0
      inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
1564
0
      FAIL_IF(!inst);
1565
0
      *inst = MOV_rm_i32;
1566
0
      return SLJIT_SUCCESS;
1567
0
#endif
1568
0
    }
1569
0
    inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw);
1570
0
    FAIL_IF(!inst);
1571
0
    *inst = MOV_rm8_i8;
1572
0
    return SLJIT_SUCCESS;
1573
0
  }
1574
1575
111M
  dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1576
1577
111M
  if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) {
1578
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1579
    if (reg_map[src] >= 4) {
1580
      SLJIT_ASSERT(dst_r == TMP_REG1);
1581
      EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
1582
    } else
1583
      dst_r = src;
1584
#else
1585
0
    dst_r = src;
1586
0
#endif
1587
111M
  } else {
1588
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1589
    if (FAST_IS_REG(src) && reg_map[src] >= 4) {
1590
      /* Both src and dst are registers. */
1591
      SLJIT_ASSERT(FAST_IS_REG(dst));
1592
1593
      if (src == dst && !sign) {
1594
        inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0);
1595
        FAIL_IF(!inst);
1596
        *(inst + 1) |= AND;
1597
        return SLJIT_SUCCESS;
1598
      }
1599
1600
      EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
1601
      src = TMP_REG1;
1602
      srcw = 0;
1603
    }
1604
#endif /* !SLJIT_CONFIG_X86_32 */
1605
1606
    /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
1607
111M
    FAIL_IF(emit_groupf(compiler, sign ? MOVSX_r_rm8 : MOVZX_r_rm8, dst_r, src, srcw));
1608
111M
  }
1609
1610
111M
  if (dst & SLJIT_MEM) {
1611
0
    inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw);
1612
0
    FAIL_IF(!inst);
1613
0
    *inst = MOV_rm8_r8;
1614
0
  }
1615
1616
111M
  return SLJIT_SUCCESS;
1617
111M
}
1618
1619
static sljit_s32 emit_prefetch(struct sljit_compiler *compiler, sljit_s32 op,
1620
  sljit_s32 src, sljit_sw srcw)
1621
0
{
1622
0
  sljit_u8* inst;
1623
1624
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1625
0
  compiler->mode32 = 1;
1626
0
#endif
1627
1628
0
  inst = emit_x86_instruction(compiler, 2, 0, 0, src, srcw);
1629
0
  FAIL_IF(!inst);
1630
0
  inst[0] = GROUP_0F;
1631
0
  inst[1] = PREFETCH;
1632
1633
0
  if (op == SLJIT_PREFETCH_L1)
1634
0
    inst[2] |= (1 << 3);
1635
0
  else if (op == SLJIT_PREFETCH_L2)
1636
0
    inst[2] |= (2 << 3);
1637
0
  else if (op == SLJIT_PREFETCH_L3)
1638
0
    inst[2] |= (3 << 3);
1639
1640
0
  return SLJIT_SUCCESS;
1641
0
}
1642
1643
static sljit_s32 emit_mov_half(struct sljit_compiler *compiler, sljit_s32 sign,
1644
  sljit_s32 dst, sljit_sw dstw,
1645
  sljit_s32 src, sljit_sw srcw)
1646
18.7M
{
1647
18.7M
  sljit_u8* inst;
1648
18.7M
  sljit_s32 dst_r;
1649
1650
18.7M
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1651
18.7M
  compiler->mode32 = 0;
1652
18.7M
#endif
1653
1654
18.7M
  if (src == SLJIT_IMM) {
1655
0
    if (FAST_IS_REG(dst)) {
1656
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1657
      return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw);
1658
#else
1659
0
      inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
1660
0
      FAIL_IF(!inst);
1661
0
      *inst = MOV_rm_i32;
1662
0
      return SLJIT_SUCCESS;
1663
0
#endif
1664
0
    }
1665
0
    inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw);
1666
0
    FAIL_IF(!inst);
1667
0
    *inst = MOV_rm_i32;
1668
0
    return SLJIT_SUCCESS;
1669
0
  }
1670
1671
18.7M
  dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1672
1673
18.7M
  if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
1674
0
    dst_r = src;
1675
18.7M
  else
1676
18.7M
    FAIL_IF(emit_groupf(compiler, sign ? MOVSX_r_rm16 : MOVZX_r_rm16, dst_r, src, srcw));
1677
1678
18.7M
  if (dst & SLJIT_MEM) {
1679
0
    inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);
1680
0
    FAIL_IF(!inst);
1681
0
    *inst = MOV_rm_r;
1682
0
  }
1683
1684
18.7M
  return SLJIT_SUCCESS;
1685
18.7M
}
1686
1687
static sljit_s32 emit_unary(struct sljit_compiler *compiler, sljit_u8 opcode,
1688
  sljit_s32 dst, sljit_sw dstw,
1689
  sljit_s32 src, sljit_sw srcw)
1690
87.5k
{
1691
87.5k
  sljit_u8* inst;
1692
1693
87.5k
  if (dst == src && dstw == srcw) {
1694
    /* Same input and output */
1695
87.5k
    inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1696
87.5k
    FAIL_IF(!inst);
1697
87.5k
    inst[0] = GROUP_F7;
1698
87.5k
    inst[1] |= opcode;
1699
87.5k
    return SLJIT_SUCCESS;
1700
87.5k
  }
1701
1702
0
  if (FAST_IS_REG(dst)) {
1703
0
    EMIT_MOV(compiler, dst, 0, src, srcw);
1704
0
    inst = emit_x86_instruction(compiler, 1, 0, 0, dst, 0);
1705
0
    FAIL_IF(!inst);
1706
0
    inst[0] = GROUP_F7;
1707
0
    inst[1] |= opcode;
1708
0
    return SLJIT_SUCCESS;
1709
0
  }
1710
1711
0
  EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1712
0
  inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1713
0
  FAIL_IF(!inst);
1714
0
  inst[0] = GROUP_F7;
1715
0
  inst[1] |= opcode;
1716
0
  EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1717
0
  return SLJIT_SUCCESS;
1718
0
}
1719
1720
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1721
static const sljit_sw emit_clz_arg = 32 + 31;
1722
static const sljit_sw emit_ctz_arg = 32;
1723
#endif
1724
1725
static sljit_s32 emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 is_clz,
1726
  sljit_s32 dst, sljit_sw dstw,
1727
  sljit_s32 src, sljit_sw srcw)
1728
0
{
1729
0
  sljit_u8* inst;
1730
0
  sljit_s32 dst_r;
1731
0
  sljit_sw max;
1732
1733
0
  SLJIT_ASSERT(cpu_feature_list != 0);
1734
1735
0
  dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1736
1737
0
  if (is_clz ? (cpu_feature_list & CPU_FEATURE_LZCNT) : (cpu_feature_list & CPU_FEATURE_TZCNT)) {
1738
0
    FAIL_IF(emit_groupf(compiler, (is_clz ? LZCNT_r_rm : TZCNT_r_rm) | EX86_PREF_F3, dst_r, src, srcw));
1739
1740
0
    if (dst & SLJIT_MEM)
1741
0
      EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1742
0
    return SLJIT_SUCCESS;
1743
0
  }
1744
1745
0
  FAIL_IF(emit_groupf(compiler, is_clz ? BSR_r_rm : BSF_r_rm, dst_r, src, srcw));
1746
1747
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1748
  max = is_clz ? (32 + 31) : 32;
1749
1750
  if (cpu_feature_list & CPU_FEATURE_CMOV) {
1751
    if (dst_r != TMP_REG1) {
1752
      EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, max);
1753
      inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0);
1754
    }
1755
    else
1756
      inst = emit_x86_instruction(compiler, 2, dst_r, 0, SLJIT_MEM0(), is_clz ? (sljit_sw)&emit_clz_arg : (sljit_sw)&emit_ctz_arg);
1757
1758
    FAIL_IF(!inst);
1759
    inst[0] = GROUP_0F;
1760
    inst[1] = CMOVE_r_rm;
1761
  }
1762
  else
1763
    FAIL_IF(emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, max));
1764
1765
  if (is_clz) {
1766
    inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
1767
    FAIL_IF(!inst);
1768
    *(inst + 1) |= XOR;
1769
  }
1770
#else
1771
0
  if (is_clz)
1772
0
    max = compiler->mode32 ? (32 + 31) : (64 + 63);
1773
0
  else
1774
0
    max = compiler->mode32 ? 32 : 64;
1775
1776
0
  if (cpu_feature_list & CPU_FEATURE_CMOV) {
1777
0
    EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, max);
1778
0
    FAIL_IF(emit_groupf(compiler, CMOVE_r_rm, dst_r, TMP_REG2, 0));
1779
0
  } else
1780
0
    FAIL_IF(emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, max));
1781
1782
0
  if (is_clz) {
1783
0
    inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, max >> 1, dst_r, 0);
1784
0
    FAIL_IF(!inst);
1785
0
    *(inst + 1) |= XOR;
1786
0
  }
1787
0
#endif
1788
1789
0
  if (dst & SLJIT_MEM)
1790
0
    EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1791
0
  return SLJIT_SUCCESS;
1792
0
}
1793
1794
static sljit_s32 emit_bswap(struct sljit_compiler *compiler,
1795
  sljit_s32 op,
1796
  sljit_s32 dst, sljit_sw dstw,
1797
  sljit_s32 src, sljit_sw srcw)
1798
0
{
1799
0
  sljit_u8 *inst;
1800
0
  sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1801
0
  sljit_uw size;
1802
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1803
0
  sljit_u8 rex = 0;
1804
#else /* !SLJIT_CONFIG_X86_64 */
1805
  sljit_s32 dst_is_ereg = op & SLJIT_32;
1806
#endif /* SLJIT_CONFIG_X86_64 */
1807
1808
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1809
0
  if (op == SLJIT_REV_U32 || op == SLJIT_REV_S32)
1810
0
    compiler->mode32 = 1;
1811
#else /* !SLJIT_CONFIG_X86_64 */
1812
  op &= ~SLJIT_32;
1813
#endif /* SLJIT_CONFIG_X86_64 */
1814
1815
0
  if (src != dst_r) {
1816
    /* Only the lower 16 bit is read for eregs. */
1817
0
    if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16)
1818
0
      FAIL_IF(emit_mov_half(compiler, 0, dst_r, 0, src, srcw));
1819
0
    else
1820
0
      EMIT_MOV(compiler, dst_r, 0, src, srcw);
1821
0
  }
1822
1823
0
  size = 2;
1824
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1825
0
  if (!compiler->mode32)
1826
0
    rex = REX_W;
1827
1828
0
  if (reg_map[dst_r] >= 8)
1829
0
    rex |= REX_B;
1830
1831
0
  if (rex != 0)
1832
0
    size++;
1833
0
#endif /* SLJIT_CONFIG_X86_64 */
1834
1835
0
  inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
1836
0
  FAIL_IF(!inst);
1837
0
  INC_SIZE(size);
1838
1839
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1840
0
  if (rex != 0)
1841
0
    *inst++ = rex;
1842
1843
0
  inst[0] = GROUP_0F;
1844
0
  inst[1] = BSWAP_r | reg_lmap[dst_r];
1845
#else /* !SLJIT_CONFIG_X86_64 */
1846
  inst[0] = GROUP_0F;
1847
  inst[1] = BSWAP_r | reg_map[dst_r];
1848
#endif /* SLJIT_CONFIG_X86_64 */
1849
1850
0
  if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16) {
1851
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1852
0
    size = compiler->mode32 ? 16 : 48;
1853
#else /* !SLJIT_CONFIG_X86_64 */
1854
    size = 16;
1855
#endif /* SLJIT_CONFIG_X86_64 */
1856
1857
0
    inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, (sljit_sw)size, dst_r, 0);
1858
0
    FAIL_IF(!inst);
1859
0
    if (op == SLJIT_REV_U16)
1860
0
      inst[1] |= SHR;
1861
0
    else
1862
0
      inst[1] |= SAR;
1863
0
  }
1864
1865
0
  if (dst & SLJIT_MEM) {
1866
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1867
    if (dst_is_ereg)
1868
      op = SLJIT_REV;
1869
#endif /* SLJIT_CONFIG_X86_32 */
1870
0
    if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16)
1871
0
      return emit_mov_half(compiler, 0, dst, dstw, TMP_REG1, 0);
1872
1873
0
    return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
1874
0
  }
1875
1876
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1877
0
  if (op == SLJIT_REV_S32) {
1878
0
    compiler->mode32 = 0;
1879
0
    inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);
1880
0
    FAIL_IF(!inst);
1881
0
    *inst = MOVSXD_r_rm;
1882
0
  }
1883
0
#endif /* SLJIT_CONFIG_X86_64 */
1884
1885
0
  return SLJIT_SUCCESS;
1886
0
}
1887
1888
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
1889
  sljit_s32 dst, sljit_sw dstw,
1890
  sljit_s32 src, sljit_sw srcw)
1891
546M
{
1892
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1893
  sljit_s32 dst_is_ereg = 0;
1894
#else /* !SLJIT_CONFIG_X86_32 */
1895
546M
  sljit_s32 op_flags = GET_ALL_FLAGS(op);
1896
546M
#endif /* SLJIT_CONFIG_X86_32 */
1897
1898
546M
  CHECK_ERROR();
1899
546M
  CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
1900
546M
  ADJUST_LOCAL_OFFSET(dst, dstw);
1901
546M
  ADJUST_LOCAL_OFFSET(src, srcw);
1902
1903
546M
  CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
1904
546M
  CHECK_EXTRA_REGS(src, srcw, (void)0);
1905
546M
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1906
546M
  compiler->mode32 = op_flags & SLJIT_32;
1907
546M
#endif /* SLJIT_CONFIG_X86_64 */
1908
1909
546M
  op = GET_OPCODE(op);
1910
1911
546M
  if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) {
1912
546M
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1913
546M
    compiler->mode32 = 0;
1914
546M
#endif /* SLJIT_CONFIG_X86_64 */
1915
1916
546M
    if (FAST_IS_REG(src) && src == dst) {
1917
0
      if (!TYPE_CAST_NEEDED(op))
1918
0
        return SLJIT_SUCCESS;
1919
0
    }
1920
1921
546M
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1922
546M
    if (op_flags & SLJIT_32) {
1923
0
      if (src & SLJIT_MEM) {
1924
0
        if (op == SLJIT_MOV_S32)
1925
0
          op = SLJIT_MOV_U32;
1926
0
      }
1927
0
      else if (src == SLJIT_IMM) {
1928
0
        if (op == SLJIT_MOV_U32)
1929
0
          op = SLJIT_MOV_S32;
1930
0
      }
1931
0
    }
1932
546M
#endif /* SLJIT_CONFIG_X86_64 */
1933
1934
546M
    if (src == SLJIT_IMM) {
1935
79.2M
      switch (op) {
1936
0
      case SLJIT_MOV_U8:
1937
0
        srcw = (sljit_u8)srcw;
1938
0
        break;
1939
0
      case SLJIT_MOV_S8:
1940
0
        srcw = (sljit_s8)srcw;
1941
0
        break;
1942
0
      case SLJIT_MOV_U16:
1943
0
        srcw = (sljit_u16)srcw;
1944
0
        break;
1945
0
      case SLJIT_MOV_S16:
1946
0
        srcw = (sljit_s16)srcw;
1947
0
        break;
1948
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1949
12.3M
      case SLJIT_MOV_U32:
1950
12.3M
        srcw = (sljit_u32)srcw;
1951
12.3M
        break;
1952
0
      case SLJIT_MOV_S32:
1953
0
        srcw = (sljit_s32)srcw;
1954
0
        break;
1955
79.2M
#endif /* SLJIT_CONFIG_X86_64 */
1956
79.2M
      }
1957
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1958
      if (SLJIT_UNLIKELY(dst_is_ereg))
1959
        return emit_mov(compiler, dst, dstw, src, srcw);
1960
#endif /* SLJIT_CONFIG_X86_32 */
1961
79.2M
    }
1962
1963
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1964
    if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_U32 || op == SLJIT_MOV_S32 || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) {
1965
      SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP));
1966
      dst = TMP_REG1;
1967
    }
1968
#endif /* SLJIT_CONFIG_X86_32 */
1969
1970
546M
    switch (op) {
1971
365M
    case SLJIT_MOV:
1972
365M
    case SLJIT_MOV_P:
1973
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1974
    case SLJIT_MOV_U32:
1975
    case SLJIT_MOV_S32:
1976
    case SLJIT_MOV32:
1977
#endif /* SLJIT_CONFIG_X86_32 */
1978
365M
      EMIT_MOV(compiler, dst, dstw, src, srcw);
1979
365M
      break;
1980
111M
    case SLJIT_MOV_U8:
1981
111M
      FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw));
1982
111M
      break;
1983
111M
    case SLJIT_MOV_S8:
1984
0
      FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw));
1985
0
      break;
1986
18.7M
    case SLJIT_MOV_U16:
1987
18.7M
      FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw));
1988
18.7M
      break;
1989
18.7M
    case SLJIT_MOV_S16:
1990
0
      FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw));
1991
0
      break;
1992
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1993
19.4M
    case SLJIT_MOV_U32:
1994
19.4M
      FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw));
1995
19.4M
      break;
1996
31.4M
    case SLJIT_MOV_S32:
1997
31.4M
      FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw));
1998
31.4M
      break;
1999
31.4M
    case SLJIT_MOV32:
2000
0
      compiler->mode32 = 1;
2001
0
      EMIT_MOV(compiler, dst, dstw, src, srcw);
2002
0
      compiler->mode32 = 0;
2003
0
      break;
2004
546M
#endif /* SLJIT_CONFIG_X86_64 */
2005
546M
    }
2006
2007
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2008
    if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1)
2009
      return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0);
2010
#endif /* SLJIT_CONFIG_X86_32 */
2011
546M
    return SLJIT_SUCCESS;
2012
546M
  }
2013
2014
0
  switch (op) {
2015
0
  case SLJIT_CLZ:
2016
0
  case SLJIT_CTZ:
2017
0
    return emit_clz_ctz(compiler, (op == SLJIT_CLZ), dst, dstw, src, srcw);
2018
0
  case SLJIT_REV:
2019
0
  case SLJIT_REV_U16:
2020
0
  case SLJIT_REV_S16:
2021
0
  case SLJIT_REV_U32:
2022
0
  case SLJIT_REV_S32:
2023
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2024
    if (dst_is_ereg)
2025
      op |= SLJIT_32;
2026
#endif /* SLJIT_CONFIG_X86_32 */
2027
0
    return emit_bswap(compiler, op, dst, dstw, src, srcw);
2028
0
  }
2029
2030
0
  return SLJIT_SUCCESS;
2031
0
}
2032
2033
static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler,
2034
  sljit_u32 op_types,
2035
  sljit_s32 dst, sljit_sw dstw,
2036
  sljit_s32 src1, sljit_sw src1w,
2037
  sljit_s32 src2, sljit_sw src2w)
2038
148M
{
2039
148M
  sljit_u8* inst;
2040
148M
  sljit_u8 op_eax_imm = U8(op_types >> 24);
2041
148M
  sljit_u8 op_rm = U8((op_types >> 16) & 0xff);
2042
148M
  sljit_u8 op_mr = U8((op_types >> 8) & 0xff);
2043
148M
  sljit_u8 op_imm = U8(op_types & 0xff);
2044
2045
148M
  if (dst == src1 && dstw == src1w) {
2046
143M
    if (src2 == SLJIT_IMM) {
2047
128M
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2048
128M
      if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
2049
#else
2050
      if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
2051
#endif
2052
6.88M
        BINARY_EAX_IMM(op_eax_imm, src2w);
2053
6.88M
      }
2054
121M
      else {
2055
121M
        BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
2056
121M
      }
2057
128M
    }
2058
14.3M
    else if (FAST_IS_REG(dst)) {
2059
14.3M
      inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
2060
14.3M
      FAIL_IF(!inst);
2061
14.3M
      *inst = op_rm;
2062
14.3M
    }
2063
19.4k
    else if (FAST_IS_REG(src2)) {
2064
      /* Special exception for sljit_emit_op_flags. */
2065
19.4k
      inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
2066
19.4k
      FAIL_IF(!inst);
2067
19.4k
      *inst = op_mr;
2068
19.4k
    }
2069
0
    else {
2070
0
      EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
2071
0
      inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
2072
0
      FAIL_IF(!inst);
2073
0
      *inst = op_mr;
2074
0
    }
2075
143M
    return SLJIT_SUCCESS;
2076
143M
  }
2077
2078
  /* Only for cumulative operations. */
2079
5.04M
  if (dst == src2 && dstw == src2w) {
2080
0
    if (src1 == SLJIT_IMM) {
2081
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2082
0
      if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
2083
#else
2084
      if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128)) {
2085
#endif
2086
0
        BINARY_EAX_IMM(op_eax_imm, src1w);
2087
0
      }
2088
0
      else {
2089
0
        BINARY_IMM(op_imm, op_mr, src1w, dst, dstw);
2090
0
      }
2091
0
    }
2092
0
    else if (FAST_IS_REG(dst)) {
2093
0
      inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w);
2094
0
      FAIL_IF(!inst);
2095
0
      *inst = op_rm;
2096
0
    }
2097
0
    else if (FAST_IS_REG(src1)) {
2098
0
      inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw);
2099
0
      FAIL_IF(!inst);
2100
0
      *inst = op_mr;
2101
0
    }
2102
0
    else {
2103
0
      EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2104
0
      inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
2105
0
      FAIL_IF(!inst);
2106
0
      *inst = op_mr;
2107
0
    }
2108
0
    return SLJIT_SUCCESS;
2109
0
  }
2110
2111
  /* General version. */
2112
5.04M
  if (FAST_IS_REG(dst)) {
2113
5.04M
    EMIT_MOV(compiler, dst, 0, src1, src1w);
2114
5.04M
    if (src2 == SLJIT_IMM) {
2115
5.04M
      BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
2116
5.04M
    }
2117
0
    else {
2118
0
      inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
2119
0
      FAIL_IF(!inst);
2120
0
      *inst = op_rm;
2121
0
    }
2122
5.04M
  }
2123
0
  else {
2124
    /* This version requires less memory writing. */
2125
0
    EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2126
0
    if (src2 == SLJIT_IMM) {
2127
0
      BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
2128
0
    }
2129
0
    else {
2130
0
      inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
2131
0
      FAIL_IF(!inst);
2132
0
      *inst = op_rm;
2133
0
    }
2134
0
    EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2135
0
  }
2136
2137
5.04M
  return SLJIT_SUCCESS;
2138
5.04M
}
2139
2140
static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler,
2141
  sljit_u32 op_types,
2142
  sljit_s32 dst, sljit_sw dstw,
2143
  sljit_s32 src1, sljit_sw src1w,
2144
  sljit_s32 src2, sljit_sw src2w)
2145
63.0M
{
2146
63.0M
  sljit_u8* inst;
2147
63.0M
  sljit_u8 op_eax_imm = U8(op_types >> 24);
2148
63.0M
  sljit_u8 op_rm = U8((op_types >> 16) & 0xff);
2149
63.0M
  sljit_u8 op_mr = U8((op_types >> 8) & 0xff);
2150
63.0M
  sljit_u8 op_imm = U8(op_types & 0xff);
2151
2152
63.0M
  if (dst == src1 && dstw == src1w) {
2153
60.7M
    if (src2 == SLJIT_IMM) {
2154
60.3M
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2155
60.3M
      if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
2156
#else
2157
      if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
2158
#endif
2159
775k
        BINARY_EAX_IMM(op_eax_imm, src2w);
2160
775k
      }
2161
59.5M
      else {
2162
59.5M
        BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
2163
59.5M
      }
2164
60.3M
    }
2165
464k
    else if (FAST_IS_REG(dst)) {
2166
464k
      inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
2167
464k
      FAIL_IF(!inst);
2168
464k
      *inst = op_rm;
2169
464k
    }
2170
0
    else if (FAST_IS_REG(src2)) {
2171
0
      inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
2172
0
      FAIL_IF(!inst);
2173
0
      *inst = op_mr;
2174
0
    }
2175
0
    else {
2176
0
      EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
2177
0
      inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
2178
0
      FAIL_IF(!inst);
2179
0
      *inst = op_mr;
2180
0
    }
2181
60.7M
    return SLJIT_SUCCESS;
2182
60.7M
  }
2183
2184
  /* General version. */
2185
2.24M
  if (FAST_IS_REG(dst) && dst != src2) {
2186
2.24M
    EMIT_MOV(compiler, dst, 0, src1, src1w);
2187
2.24M
    if (src2 == SLJIT_IMM) {
2188
1.18M
      BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
2189
1.18M
    }
2190
1.05M
    else {
2191
1.05M
      inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
2192
1.05M
      FAIL_IF(!inst);
2193
1.05M
      *inst = op_rm;
2194
1.05M
    }
2195
2.24M
  }
2196
0
  else {
2197
    /* This version requires less memory writing. */
2198
0
    EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2199
0
    if (src2 == SLJIT_IMM) {
2200
0
      BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
2201
0
    }
2202
0
    else {
2203
0
      inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
2204
0
      FAIL_IF(!inst);
2205
0
      *inst = op_rm;
2206
0
    }
2207
0
    EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2208
0
  }
2209
2210
2.24M
  return SLJIT_SUCCESS;
2211
2.24M
}
2212
2213
static sljit_s32 emit_mul(struct sljit_compiler *compiler,
2214
  sljit_s32 dst, sljit_sw dstw,
2215
  sljit_s32 src1, sljit_sw src1w,
2216
  sljit_s32 src2, sljit_sw src2w)
2217
0
{
2218
0
  sljit_u8* inst;
2219
0
  sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
2220
2221
  /* Register destination. */
2222
0
  if (dst_r == src1 && src2 != SLJIT_IMM) {
2223
0
    FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, src2, src2w));
2224
0
  } else if (dst_r == src2 && src1 != SLJIT_IMM) {
2225
0
    FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, src1, src1w));
2226
0
  } else if (src1 == SLJIT_IMM) {
2227
0
    if (src2 == SLJIT_IMM) {
2228
0
      EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);
2229
0
      src2 = dst_r;
2230
0
      src2w = 0;
2231
0
    }
2232
2233
0
    if (src1w <= 127 && src1w >= -128) {
2234
0
      inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
2235
0
      FAIL_IF(!inst);
2236
0
      *inst = IMUL_r_rm_i8;
2237
2238
0
      FAIL_IF(emit_byte(compiler, U8(src1w)));
2239
0
    }
2240
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2241
    else {
2242
      inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
2243
      FAIL_IF(!inst);
2244
      *inst = IMUL_r_rm_i32;
2245
      inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
2246
      FAIL_IF(!inst);
2247
      INC_SIZE(4);
2248
      sljit_unaligned_store_sw(inst, src1w);
2249
    }
2250
#else
2251
0
    else if (IS_HALFWORD(src1w)) {
2252
0
      inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
2253
0
      FAIL_IF(!inst);
2254
0
      *inst = IMUL_r_rm_i32;
2255
0
      inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
2256
0
      FAIL_IF(!inst);
2257
0
      INC_SIZE(4);
2258
0
      sljit_unaligned_store_s32(inst, (sljit_s32)src1w);
2259
0
    }
2260
0
    else {
2261
0
      if (dst_r != src2)
2262
0
        EMIT_MOV(compiler, dst_r, 0, src2, src2w);
2263
0
      FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
2264
0
      FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, TMP_REG2, 0));
2265
0
    }
2266
0
#endif
2267
0
  }
2268
0
  else if (src2 == SLJIT_IMM) {
2269
    /* Note: src1 is NOT immediate. */
2270
2271
0
    if (src2w <= 127 && src2w >= -128) {
2272
0
      inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
2273
0
      FAIL_IF(!inst);
2274
0
      *inst = IMUL_r_rm_i8;
2275
2276
0
      FAIL_IF(emit_byte(compiler, U8(src2w)));
2277
0
    }
2278
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2279
    else {
2280
      inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
2281
      FAIL_IF(!inst);
2282
      *inst = IMUL_r_rm_i32;
2283
2284
      inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
2285
      FAIL_IF(!inst);
2286
      INC_SIZE(4);
2287
      sljit_unaligned_store_sw(inst, src2w);
2288
    }
2289
#else
2290
0
    else if (IS_HALFWORD(src2w)) {
2291
0
      inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
2292
0
      FAIL_IF(!inst);
2293
0
      *inst = IMUL_r_rm_i32;
2294
2295
0
      inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
2296
0
      FAIL_IF(!inst);
2297
0
      INC_SIZE(4);
2298
0
      sljit_unaligned_store_s32(inst, (sljit_s32)src2w);
2299
0
    } else {
2300
0
      if (dst_r != src1)
2301
0
        EMIT_MOV(compiler, dst_r, 0, src1, src1w);
2302
0
      FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
2303
0
      FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, TMP_REG2, 0));
2304
0
    }
2305
0
#endif
2306
0
  } else {
2307
    /* Neither argument is immediate. */
2308
0
    if (ADDRESSING_DEPENDS_ON(src2, dst_r))
2309
0
      dst_r = TMP_REG1;
2310
0
    EMIT_MOV(compiler, dst_r, 0, src1, src1w);
2311
0
    FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, src2, src2w));
2312
0
  }
2313
2314
0
  if (dst & SLJIT_MEM)
2315
0
    EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2316
2317
0
  return SLJIT_SUCCESS;
2318
0
}
2319
2320
static sljit_s32 emit_lea_binary(struct sljit_compiler *compiler,
2321
  sljit_s32 dst, sljit_sw dstw,
2322
  sljit_s32 src1, sljit_sw src1w,
2323
  sljit_s32 src2, sljit_sw src2w)
2324
195M
{
2325
195M
  sljit_u8* inst;
2326
195M
  sljit_s32 dst_r, done = 0;
2327
2328
  /* These cases better be left to handled by normal way. */
2329
195M
  if (dst == src1 && dstw == src1w)
2330
161M
    return SLJIT_ERR_UNSUPPORTED;
2331
34.1M
  if (dst == src2 && dstw == src2w)
2332
0
    return SLJIT_ERR_UNSUPPORTED;
2333
2334
34.1M
  dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
2335
2336
34.1M
  if (FAST_IS_REG(src1)) {
2337
32.8M
    if (FAST_IS_REG(src2)) {
2338
0
      inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);
2339
0
      FAIL_IF(!inst);
2340
0
      *inst = LEA_r_m;
2341
0
      done = 1;
2342
0
    }
2343
32.8M
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2344
32.8M
    if (src2 == SLJIT_IMM && (compiler->mode32 || IS_HALFWORD(src2w))) {
2345
32.8M
      inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_s32)src2w);
2346
#else
2347
    if (src2 == SLJIT_IMM) {
2348
      inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);
2349
#endif
2350
32.8M
      FAIL_IF(!inst);
2351
32.8M
      *inst = LEA_r_m;
2352
32.8M
      done = 1;
2353
32.8M
    }
2354
32.8M
  }
2355
1.23M
  else if (FAST_IS_REG(src2)) {
2356
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2357
0
    if (src1 == SLJIT_IMM && (compiler->mode32 || IS_HALFWORD(src1w))) {
2358
0
      inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_s32)src1w);
2359
#else
2360
    if (src1 == SLJIT_IMM) {
2361
      inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);
2362
#endif
2363
0
      FAIL_IF(!inst);
2364
0
      *inst = LEA_r_m;
2365
0
      done = 1;
2366
0
    }
2367
0
  }
2368
2369
34.1M
  if (done) {
2370
32.8M
    if (dst_r == TMP_REG1)
2371
536k
      return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2372
32.3M
    return SLJIT_SUCCESS;
2373
32.8M
  }
2374
1.23M
  return SLJIT_ERR_UNSUPPORTED;
2375
34.1M
}
2376
2377
static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler,
2378
  sljit_s32 src1, sljit_sw src1w,
2379
  sljit_s32 src2, sljit_sw src2w)
2380
379M
{
2381
379M
  sljit_u8* inst;
2382
2383
379M
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2384
379M
  if (src1 == SLJIT_R0 && src2 == SLJIT_IMM && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
2385
#else
2386
  if (src1 == SLJIT_R0 && src2 == SLJIT_IMM && (src2w > 127 || src2w < -128)) {
2387
#endif
2388
60.9M
    BINARY_EAX_IMM(CMP_EAX_i32, src2w);
2389
60.9M
    return SLJIT_SUCCESS;
2390
60.9M
  }
2391
2392
318M
  if (FAST_IS_REG(src1)) {
2393
311M
    if (src2 == SLJIT_IMM) {
2394
140M
      BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0);
2395
140M
    }
2396
170M
    else {
2397
170M
      inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
2398
170M
      FAIL_IF(!inst);
2399
170M
      *inst = CMP_r_rm;
2400
170M
    }
2401
311M
    return SLJIT_SUCCESS;
2402
311M
  }
2403
2404
7.00M
  if (FAST_IS_REG(src2) && src1 != SLJIT_IMM) {
2405
6.43M
    inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
2406
6.43M
    FAIL_IF(!inst);
2407
6.43M
    *inst = CMP_rm_r;
2408
6.43M
    return SLJIT_SUCCESS;
2409
6.43M
  }
2410
2411
572k
  if (src2 == SLJIT_IMM) {
2412
561k
    if (src1 == SLJIT_IMM) {
2413
0
      EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2414
0
      src1 = TMP_REG1;
2415
0
      src1w = 0;
2416
0
    }
2417
561k
    BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w);
2418
561k
  }
2419
11.0k
  else {
2420
11.0k
    EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2421
11.0k
    inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
2422
11.0k
    FAIL_IF(!inst);
2423
11.0k
    *inst = CMP_r_rm;
2424
11.0k
  }
2425
572k
  return SLJIT_SUCCESS;
2426
572k
}
2427
2428
static sljit_s32 emit_test_binary(struct sljit_compiler *compiler,
2429
  sljit_s32 src1, sljit_sw src1w,
2430
  sljit_s32 src2, sljit_sw src2w)
2431
35.3M
{
2432
35.3M
  sljit_u8* inst;
2433
2434
35.3M
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2435
35.3M
  if (src1 == SLJIT_R0 && src2 == SLJIT_IMM && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
2436
#else
2437
  if (src1 == SLJIT_R0 && src2 == SLJIT_IMM && (src2w > 127 || src2w < -128)) {
2438
#endif
2439
1.87M
    BINARY_EAX_IMM(TEST_EAX_i32, src2w);
2440
1.87M
    return SLJIT_SUCCESS;
2441
1.87M
  }
2442
2443
33.5M
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2444
33.5M
  if (src2 == SLJIT_R0 && src1 == SLJIT_IMM && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
2445
#else
2446
  if (src2 == SLJIT_R0 && src1 == SLJIT_IMM && (src1w > 127 || src1w < -128)) {
2447
#endif
2448
0
    BINARY_EAX_IMM(TEST_EAX_i32, src1w);
2449
0
    return SLJIT_SUCCESS;
2450
0
  }
2451
2452
33.5M
  if (src1 != SLJIT_IMM) {
2453
33.5M
    if (src2 == SLJIT_IMM) {
2454
31.6M
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2455
31.6M
      if (IS_HALFWORD(src2w) || compiler->mode32) {
2456
31.6M
        inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
2457
31.6M
        FAIL_IF(!inst);
2458
31.6M
        *inst = GROUP_F7;
2459
31.6M
      } else {
2460
0
        FAIL_IF(emit_load_imm64(compiler, FAST_IS_REG(src1) ? TMP_REG2 : TMP_REG1, src2w));
2461
0
        inst = emit_x86_instruction(compiler, 1, FAST_IS_REG(src1) ? TMP_REG2 : TMP_REG1, 0, src1, src1w);
2462
0
        FAIL_IF(!inst);
2463
0
        *inst = TEST_rm_r;
2464
0
      }
2465
#else
2466
      inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
2467
      FAIL_IF(!inst);
2468
      *inst = GROUP_F7;
2469
#endif
2470
31.6M
      return SLJIT_SUCCESS;
2471
31.6M
    }
2472
1.84M
    else if (FAST_IS_REG(src1)) {
2473
1.84M
      inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
2474
1.84M
      FAIL_IF(!inst);
2475
1.84M
      *inst = TEST_rm_r;
2476
1.84M
      return SLJIT_SUCCESS;
2477
1.84M
    }
2478
33.5M
  }
2479
2480
0
  if (src2 != SLJIT_IMM) {
2481
0
    if (src1 == SLJIT_IMM) {
2482
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2483
0
      if (IS_HALFWORD(src1w) || compiler->mode32) {
2484
0
        inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, src2w);
2485
0
        FAIL_IF(!inst);
2486
0
        *inst = GROUP_F7;
2487
0
      }
2488
0
      else {
2489
0
        FAIL_IF(emit_load_imm64(compiler, TMP_REG1, src1w));
2490
0
        inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
2491
0
        FAIL_IF(!inst);
2492
0
        *inst = TEST_rm_r;
2493
0
      }
2494
#else
2495
      inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, src2w);
2496
      FAIL_IF(!inst);
2497
      *inst = GROUP_F7;
2498
#endif
2499
0
      return SLJIT_SUCCESS;
2500
0
    }
2501
0
    else if (FAST_IS_REG(src2)) {
2502
0
      inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
2503
0
      FAIL_IF(!inst);
2504
0
      *inst = TEST_rm_r;
2505
0
      return SLJIT_SUCCESS;
2506
0
    }
2507
0
  }
2508
2509
0
  EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2510
0
  if (src2 == SLJIT_IMM) {
2511
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2512
0
    if (IS_HALFWORD(src2w) || compiler->mode32) {
2513
0
      inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
2514
0
      FAIL_IF(!inst);
2515
0
      *inst = GROUP_F7;
2516
0
    }
2517
0
    else {
2518
0
      FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
2519
0
      inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REG1, 0);
2520
0
      FAIL_IF(!inst);
2521
0
      *inst = TEST_rm_r;
2522
0
    }
2523
#else
2524
    inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
2525
    FAIL_IF(!inst);
2526
    *inst = GROUP_F7;
2527
#endif
2528
0
  }
2529
0
  else {
2530
0
    inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
2531
0
    FAIL_IF(!inst);
2532
0
    *inst = TEST_rm_r;
2533
0
  }
2534
0
  return SLJIT_SUCCESS;
2535
0
}
2536
2537
static sljit_s32 emit_shift(struct sljit_compiler *compiler,
2538
  sljit_u8 mode,
2539
  sljit_s32 dst, sljit_sw dstw,
2540
  sljit_s32 src1, sljit_sw src1w,
2541
  sljit_s32 src2, sljit_sw src2w)
2542
28.1M
{
2543
28.1M
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2544
28.1M
  sljit_s32 mode32;
2545
28.1M
#endif
2546
28.1M
  sljit_u8* inst;
2547
2548
28.1M
  if (src2 == SLJIT_IMM || src2 == SLJIT_PREF_SHIFT_REG) {
2549
28.1M
    if (dst == src1 && dstw == src1w) {
2550
15.3M
      inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);
2551
15.3M
      FAIL_IF(!inst);
2552
15.3M
      inst[1] |= mode;
2553
15.3M
      return SLJIT_SUCCESS;
2554
15.3M
    }
2555
12.8M
    if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {
2556
1.83M
      EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2557
1.83M
      inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2558
1.83M
      FAIL_IF(!inst);
2559
1.83M
      inst[1] |= mode;
2560
1.83M
      EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2561
1.83M
      return SLJIT_SUCCESS;
2562
1.83M
    }
2563
10.9M
    if (FAST_IS_REG(dst)) {
2564
10.9M
      EMIT_MOV(compiler, dst, 0, src1, src1w);
2565
10.9M
      inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);
2566
10.9M
      FAIL_IF(!inst);
2567
10.9M
      inst[1] |= mode;
2568
10.9M
      return SLJIT_SUCCESS;
2569
10.9M
    }
2570
2571
0
    EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2572
0
    inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
2573
0
    FAIL_IF(!inst);
2574
0
    inst[1] |= mode;
2575
0
    EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2576
0
    return SLJIT_SUCCESS;
2577
0
  }
2578
2579
3.48k
  if (dst == SLJIT_PREF_SHIFT_REG) {
2580
3.48k
    EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2581
3.48k
    EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2582
3.48k
    inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2583
3.48k
    FAIL_IF(!inst);
2584
3.48k
    inst[1] |= mode;
2585
3.48k
    return emit_mov(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2586
3.48k
  }
2587
2588
0
  if (FAST_IS_REG(dst) && dst != src2 && dst != TMP_REG1 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
2589
0
    if (src1 != dst)
2590
0
      EMIT_MOV(compiler, dst, 0, src1, src1w);
2591
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2592
0
    mode32 = compiler->mode32;
2593
0
    compiler->mode32 = 0;
2594
0
#endif
2595
0
    EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
2596
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2597
0
    compiler->mode32 = mode32;
2598
0
#endif
2599
0
    EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2600
0
    inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
2601
0
    FAIL_IF(!inst);
2602
0
    inst[1] |= mode;
2603
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2604
0
    compiler->mode32 = 0;
2605
0
#endif
2606
0
    EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2607
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2608
0
    compiler->mode32 = mode32;
2609
0
#endif
2610
0
    return SLJIT_SUCCESS;
2611
0
  }
2612
2613
  /* This case is complex since ecx itself may be used for
2614
     addressing, and this case must be supported as well. */
2615
0
  EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2616
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2617
  EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_PREF_SHIFT_REG, 0);
2618
#else /* !SLJIT_CONFIG_X86_32 */
2619
0
  mode32 = compiler->mode32;
2620
0
  compiler->mode32 = 0;
2621
0
  EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
2622
0
  compiler->mode32 = mode32;
2623
0
#endif /* SLJIT_CONFIG_X86_32 */
2624
2625
0
  EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2626
0
  inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2627
0
  FAIL_IF(!inst);
2628
0
  inst[1] |= mode;
2629
2630
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2631
  EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), 0);
2632
#else
2633
0
  compiler->mode32 = 0;
2634
0
  EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
2635
0
  compiler->mode32 = mode32;
2636
0
#endif /* SLJIT_CONFIG_X86_32 */
2637
2638
0
  if (dst != TMP_REG1)
2639
0
    return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2640
2641
0
  return SLJIT_SUCCESS;
2642
0
}
2643
2644
static sljit_s32 emit_shift_with_flags(struct sljit_compiler *compiler,
2645
  sljit_u8 mode, sljit_s32 set_flags,
2646
  sljit_s32 dst, sljit_sw dstw,
2647
  sljit_s32 src1, sljit_sw src1w,
2648
  sljit_s32 src2, sljit_sw src2w)
2649
28.1M
{
2650
  /* The CPU does not set flags if the shift count is 0. */
2651
28.1M
  if (src2 == SLJIT_IMM) {
2652
22.9M
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2653
22.9M
    src2w &= compiler->mode32 ? 0x1f : 0x3f;
2654
#else /* !SLJIT_CONFIG_X86_64 */
2655
    src2w &= 0x1f;
2656
#endif /* SLJIT_CONFIG_X86_64 */
2657
22.9M
    if (src2w != 0)
2658
22.9M
      return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2659
2660
0
    if (!set_flags)
2661
0
      return emit_mov(compiler, dst, dstw, src1, src1w);
2662
    /* OR dst, src, 0 */
2663
0
    return emit_cum_binary(compiler, BINARY_OPCODE(OR),
2664
0
      dst, dstw, src1, src1w, SLJIT_IMM, 0);
2665
0
  }
2666
2667
5.23M
  if (!set_flags)
2668
5.23M
    return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2669
2670
0
  if (!FAST_IS_REG(dst))
2671
0
    FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0));
2672
2673
0
  FAIL_IF(emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w));
2674
2675
0
  if (FAST_IS_REG(dst))
2676
0
    return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0);
2677
0
  return SLJIT_SUCCESS;
2678
0
}
2679
2680
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
2681
  sljit_s32 dst, sljit_sw dstw,
2682
  sljit_s32 src1, sljit_sw src1w,
2683
  sljit_s32 src2, sljit_sw src2w)
2684
261M
{
2685
261M
  CHECK_ERROR();
2686
261M
  CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w));
2687
261M
  ADJUST_LOCAL_OFFSET(dst, dstw);
2688
261M
  ADJUST_LOCAL_OFFSET(src1, src1w);
2689
261M
  ADJUST_LOCAL_OFFSET(src2, src2w);
2690
2691
261M
  CHECK_EXTRA_REGS(dst, dstw, (void)0);
2692
261M
  CHECK_EXTRA_REGS(src1, src1w, (void)0);
2693
261M
  CHECK_EXTRA_REGS(src2, src2w, (void)0);
2694
261M
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2695
261M
  compiler->mode32 = op & SLJIT_32;
2696
261M
#endif
2697
2698
261M
  switch (GET_OPCODE(op)) {
2699
141M
  case SLJIT_ADD:
2700
141M
    if (!HAS_FLAGS(op)) {
2701
141M
      if (emit_lea_binary(compiler, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
2702
21.9M
        return compiler->error;
2703
141M
    }
2704
119M
    return emit_cum_binary(compiler, BINARY_OPCODE(ADD),
2705
119M
      dst, dstw, src1, src1w, src2, src2w);
2706
0
  case SLJIT_ADDC:
2707
0
    return emit_cum_binary(compiler, BINARY_OPCODE(ADC),
2708
0
      dst, dstw, src1, src1w, src2, src2w);
2709
63.5M
  case SLJIT_SUB:
2710
63.5M
    if (src1 == SLJIT_IMM && src1w == 0)
2711
0
      return emit_unary(compiler, NEG_rm, dst, dstw, src2, src2w);
2712
2713
63.5M
    if (!HAS_FLAGS(op)) {
2714
44.4M
      if (src2 == SLJIT_IMM && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
2715
541k
        return compiler->error;
2716
43.9M
      if (FAST_IS_REG(dst) && src2 == dst) {
2717
87.5k
        FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), dst, 0, dst, 0, src1, src1w));
2718
87.5k
        return emit_unary(compiler, NEG_rm, dst, 0, dst, 0);
2719
87.5k
      }
2720
43.9M
    }
2721
2722
62.9M
    return emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
2723
62.9M
      dst, dstw, src1, src1w, src2, src2w);
2724
0
  case SLJIT_SUBC:
2725
0
    return emit_non_cum_binary(compiler, BINARY_OPCODE(SBB),
2726
0
      dst, dstw, src1, src1w, src2, src2w);
2727
0
  case SLJIT_MUL:
2728
0
    return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w);
2729
8.12M
  case SLJIT_AND:
2730
8.12M
    return emit_cum_binary(compiler, BINARY_OPCODE(AND),
2731
8.12M
      dst, dstw, src1, src1w, src2, src2w);
2732
20.3M
  case SLJIT_OR:
2733
20.3M
    return emit_cum_binary(compiler, BINARY_OPCODE(OR),
2734
20.3M
      dst, dstw, src1, src1w, src2, src2w);
2735
72.8k
  case SLJIT_XOR:
2736
72.8k
    if (!HAS_FLAGS(op)) {
2737
66.5k
      if (src2 == SLJIT_IMM && src2w == -1)
2738
0
        return emit_unary(compiler, NOT_rm, dst, dstw, src1, src1w);
2739
66.5k
      if (src1 == SLJIT_IMM && src1w == -1)
2740
0
        return emit_unary(compiler, NOT_rm, dst, dstw, src2, src2w);
2741
66.5k
    }
2742
2743
72.8k
    return emit_cum_binary(compiler, BINARY_OPCODE(XOR),
2744
72.8k
      dst, dstw, src1, src1w, src2, src2w);
2745
22.4M
  case SLJIT_SHL:
2746
22.4M
  case SLJIT_MSHL:
2747
22.4M
    return emit_shift_with_flags(compiler, SHL, HAS_FLAGS(op),
2748
22.4M
      dst, dstw, src1, src1w, src2, src2w);
2749
5.71M
  case SLJIT_LSHR:
2750
5.71M
  case SLJIT_MLSHR:
2751
5.71M
    return emit_shift_with_flags(compiler, SHR, HAS_FLAGS(op),
2752
5.71M
      dst, dstw, src1, src1w, src2, src2w);
2753
0
  case SLJIT_ASHR:
2754
0
  case SLJIT_MASHR:
2755
0
    return emit_shift_with_flags(compiler, SAR, HAS_FLAGS(op),
2756
0
      dst, dstw, src1, src1w, src2, src2w);
2757
0
  case SLJIT_ROTL:
2758
0
    return emit_shift_with_flags(compiler, ROL, 0,
2759
0
      dst, dstw, src1, src1w, src2, src2w);
2760
0
  case SLJIT_ROTR:
2761
0
    return emit_shift_with_flags(compiler, ROR, 0,
2762
0
      dst, dstw, src1, src1w, src2, src2w);
2763
261M
  }
2764
2765
0
  return SLJIT_SUCCESS;
2766
261M
}
2767
2768
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op,
2769
  sljit_s32 src1, sljit_sw src1w,
2770
  sljit_s32 src2, sljit_sw src2w)
2771
414M
{
2772
414M
  sljit_s32 opcode = GET_OPCODE(op);
2773
2774
414M
  CHECK_ERROR();
2775
414M
  CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
2776
2777
414M
  if (opcode != SLJIT_SUB && opcode != SLJIT_AND) {
2778
0
    SLJIT_SKIP_CHECKS(compiler);
2779
0
    return sljit_emit_op2(compiler, op, TMP_REG1, 0, src1, src1w, src2, src2w);
2780
0
  }
2781
2782
414M
  ADJUST_LOCAL_OFFSET(src1, src1w);
2783
414M
  ADJUST_LOCAL_OFFSET(src2, src2w);
2784
2785
414M
  CHECK_EXTRA_REGS(src1, src1w, (void)0);
2786
414M
  CHECK_EXTRA_REGS(src2, src2w, (void)0);
2787
414M
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2788
414M
  compiler->mode32 = op & SLJIT_32;
2789
414M
#endif
2790
2791
414M
  if (opcode == SLJIT_SUB)
2792
379M
    return emit_cmp_binary(compiler, src1, src1w, src2, src2w);
2793
2794
35.3M
  return emit_test_binary(compiler, src1, src1w, src2, src2w);
2795
414M
}
2796
2797
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op,
2798
  sljit_s32 dst_reg,
2799
  sljit_s32 src1, sljit_sw src1w,
2800
  sljit_s32 src2, sljit_sw src2w)
2801
0
{
2802
0
  sljit_u8* inst;
2803
0
  sljit_sw dstw = 0;
2804
0
2805
0
  CHECK_ERROR();
2806
0
  CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w));
2807
0
  ADJUST_LOCAL_OFFSET(src1, src1w);
2808
0
  ADJUST_LOCAL_OFFSET(src2, src2w);
2809
0
2810
0
  CHECK_EXTRA_REGS(dst_reg, dstw, (void)0);
2811
0
  CHECK_EXTRA_REGS(src1, src1w, (void)0);
2812
0
  CHECK_EXTRA_REGS(src2, src2w, (void)0);
2813
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2814
0
  compiler->mode32 = op & SLJIT_32;
2815
0
#endif
2816
0
2817
0
  switch (GET_OPCODE(op)) {
2818
0
  case SLJIT_MULADD:
2819
0
    FAIL_IF(emit_mul(compiler, TMP_REG1, 0, src1, src1w, src2, src2w));
2820
0
    inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst_reg, dstw);
2821
0
    FAIL_IF(!inst);
2822
0
    *inst = ADD_rm_r;
2823
0
    return SLJIT_SUCCESS;
2824
0
  }
2825
0
2826
0
  return SLJIT_SUCCESS;
2827
0
}
2828
2829
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
2830
  sljit_s32 dst_reg,
2831
  sljit_s32 src1_reg,
2832
  sljit_s32 src2_reg,
2833
  sljit_s32 src3, sljit_sw src3w)
2834
0
{
2835
0
  sljit_s32 is_rotate, is_left, move_src1;
2836
0
  sljit_u8* inst;
2837
0
  sljit_sw src1w = 0;
2838
0
  sljit_sw dstw = 0;
2839
0
  /* The whole register must be saved even for 32 bit operations. */
2840
0
  sljit_u8 restore_ecx = 0;
2841
0
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2842
0
  sljit_sw src2w = 0;
2843
0
  sljit_s32 restore_sp4 = 0;
2844
0
#endif /* SLJIT_CONFIG_X86_32 */
2845
0
2846
0
  CHECK_ERROR();
2847
0
  CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w));
2848
0
  ADJUST_LOCAL_OFFSET(src3, src3w);
2849
0
2850
0
  CHECK_EXTRA_REGS(dst_reg, dstw, (void)0);
2851
0
  CHECK_EXTRA_REGS(src3, src3w, (void)0);
2852
0
2853
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2854
0
  compiler->mode32 = op & SLJIT_32;
2855
0
#endif /* SLJIT_CONFIG_X86_64 */
2856
0
2857
0
  if (src3 == SLJIT_IMM) {
2858
0
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2859
0
    src3w &= 0x1f;
2860
0
#else /* !SLJIT_CONFIG_X86_32 */
2861
0
    src3w &= (op & SLJIT_32) ? 0x1f : 0x3f;
2862
0
#endif /* SLJIT_CONFIG_X86_32 */
2863
0
2864
0
    if (src3w == 0)
2865
0
      return SLJIT_SUCCESS;
2866
0
  }
2867
0
2868
0
  is_left = (GET_OPCODE(op) == SLJIT_SHL || GET_OPCODE(op) == SLJIT_MSHL);
2869
0
2870
0
  is_rotate = (src1_reg == src2_reg);
2871
0
  CHECK_EXTRA_REGS(src1_reg, src1w, (void)0);
2872
0
  CHECK_EXTRA_REGS(src2_reg, src2w, (void)0);
2873
0
2874
0
  if (is_rotate)
2875
0
    return emit_shift(compiler, is_left ? ROL : ROR, dst_reg, dstw, src1_reg, src1w, src3, src3w);
2876
0
2877
0
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2878
0
  if (src2_reg & SLJIT_MEM) {
2879
0
    EMIT_MOV(compiler, TMP_REG1, 0, src2_reg, src2w);
2880
0
    src2_reg = TMP_REG1;
2881
0
  }
2882
0
#endif /* SLJIT_CONFIG_X86_32 */
2883
0
2884
0
  if (dst_reg == SLJIT_PREF_SHIFT_REG && src3 != SLJIT_IMM && (src3 != SLJIT_PREF_SHIFT_REG || src1_reg != SLJIT_PREF_SHIFT_REG)) {
2885
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2886
0
    EMIT_MOV(compiler, TMP_REG1, 0, src1_reg, src1w);
2887
0
    src1_reg = TMP_REG1;
2888
0
    src1w = 0;
2889
0
#else /* !SLJIT_CONFIG_X86_64 */
2890
0
    if (src2_reg != TMP_REG1) {
2891
0
      EMIT_MOV(compiler, TMP_REG1, 0, src1_reg, src1w);
2892
0
      src1_reg = TMP_REG1;
2893
0
      src1w = 0;
2894
0
    } else if ((src1_reg & SLJIT_MEM) || src1_reg == SLJIT_PREF_SHIFT_REG) {
2895
0
      restore_sp4 = (src3 == SLJIT_R0) ? SLJIT_R1 : SLJIT_R0;
2896
0
      EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32), restore_sp4, 0);
2897
0
      EMIT_MOV(compiler, restore_sp4, 0, src1_reg, src1w);
2898
0
      src1_reg = restore_sp4;
2899
0
      src1w = 0;
2900
0
    } else {
2901
0
      EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32), src1_reg, 0);
2902
0
      restore_sp4 = src1_reg;
2903
0
    }
2904
0
#endif /* SLJIT_CONFIG_X86_64 */
2905
0
2906
0
    if (src3 != SLJIT_PREF_SHIFT_REG)
2907
0
      EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src3, src3w);
2908
0
  } else {
2909
0
    if (src2_reg == SLJIT_PREF_SHIFT_REG && src3 != SLJIT_IMM && src3 != SLJIT_PREF_SHIFT_REG) {
2910
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2911
0
      compiler->mode32 = 0;
2912
0
#endif /* SLJIT_CONFIG_X86_64 */
2913
0
      EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
2914
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2915
0
      compiler->mode32 = op & SLJIT_32;
2916
0
#endif /* SLJIT_CONFIG_X86_64 */
2917
0
      src2_reg = TMP_REG1;
2918
0
      restore_ecx = 1;
2919
0
    }
2920
0
2921
0
    move_src1 = 0;
2922
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2923
0
    if (dst_reg != src1_reg) {
2924
0
      if (dst_reg != src3) {
2925
0
        EMIT_MOV(compiler, dst_reg, 0, src1_reg, src1w);
2926
0
        src1_reg = dst_reg;
2927
0
        src1w = 0;
2928
0
      } else
2929
0
        move_src1 = 1;
2930
0
    }
2931
0
#else /* !SLJIT_CONFIG_X86_64 */
2932
0
    if (dst_reg & SLJIT_MEM) {
2933
0
      if (src2_reg != TMP_REG1) {
2934
0
        EMIT_MOV(compiler, TMP_REG1, 0, src1_reg, src1w);
2935
0
        src1_reg = TMP_REG1;
2936
0
        src1w = 0;
2937
0
      } else if ((src1_reg & SLJIT_MEM) || src1_reg == SLJIT_PREF_SHIFT_REG) {
2938
0
        restore_sp4 = (src3 == SLJIT_R0) ? SLJIT_R1 : SLJIT_R0;
2939
0
        EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32), restore_sp4, 0);
2940
0
        EMIT_MOV(compiler, restore_sp4, 0, src1_reg, src1w);
2941
0
        src1_reg = restore_sp4;
2942
0
        src1w = 0;
2943
0
      } else {
2944
0
        EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32), src1_reg, 0);
2945
0
        restore_sp4 = src1_reg;
2946
0
      }
2947
0
    } else if (dst_reg != src1_reg) {
2948
0
      if (dst_reg != src3) {
2949
0
        EMIT_MOV(compiler, dst_reg, 0, src1_reg, src1w);
2950
0
        src1_reg = dst_reg;
2951
0
        src1w = 0;
2952
0
      } else
2953
0
        move_src1 = 1;
2954
0
    }
2955
0
#endif /* SLJIT_CONFIG_X86_64 */
2956
0
2957
0
    if (src3 != SLJIT_IMM && src3 != SLJIT_PREF_SHIFT_REG) {
2958
0
      if (!restore_ecx) {
2959
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2960
0
        compiler->mode32 = 0;
2961
0
        EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
2962
0
        compiler->mode32 = op & SLJIT_32;
2963
0
        restore_ecx = 1;
2964
0
#else /* !SLJIT_CONFIG_X86_64 */
2965
0
        if (src1_reg != TMP_REG1 && src2_reg != TMP_REG1) {
2966
0
          EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
2967
0
          restore_ecx = 1;
2968
0
        } else {
2969
0
          EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_PREF_SHIFT_REG, 0);
2970
0
          restore_ecx = 2;
2971
0
        }
2972
0
#endif /* SLJIT_CONFIG_X86_64 */
2973
0
      }
2974
0
      EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src3, src3w);
2975
0
    }
2976
0
2977
0
    if (move_src1) {
2978
0
      EMIT_MOV(compiler, dst_reg, 0, src1_reg, src1w);
2979
0
      src1_reg = dst_reg;
2980
0
      src1w = 0;
2981
0
    }
2982
0
  }
2983
0
2984
0
  inst = emit_x86_instruction(compiler, 2, src2_reg, 0, src1_reg, src1w);
2985
0
  FAIL_IF(!inst);
2986
0
  inst[0] = GROUP_0F;
2987
0
2988
0
  if (src3 == SLJIT_IMM) {
2989
0
    inst[1] = U8((is_left ? SHLD : SHRD) - 1);
2990
0
2991
0
    /* Immediate argument is added separately. */
2992
0
    FAIL_IF(emit_byte(compiler, U8(src3w)));
2993
0
  } else
2994
0
    inst[1] = U8(is_left ? SHLD : SHRD);
2995
0
2996
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2997
0
  if (restore_ecx) {
2998
0
    compiler->mode32 = 0;
2999
0
    EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
3000
0
  }
3001
0
3002
0
  if (src1_reg != dst_reg) {
3003
0
    compiler->mode32 = op & SLJIT_32;
3004
0
    return emit_mov(compiler, dst_reg, dstw, src1_reg, 0);
3005
0
  }
3006
0
#else /* !SLJIT_CONFIG_X86_64 */
3007
0
  if (restore_ecx)
3008
0
    EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, restore_ecx == 1 ? TMP_REG1 : SLJIT_MEM1(SLJIT_SP), 0);
3009
0
3010
0
  if (src1_reg != dst_reg)
3011
0
    EMIT_MOV(compiler, dst_reg, dstw, src1_reg, 0);
3012
0
3013
0
  if (restore_sp4)
3014
0
    return emit_mov(compiler, restore_sp4, 0, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32));
3015
0
#endif /* SLJIT_CONFIG_X86_32 */
3016
0
3017
0
  return SLJIT_SUCCESS;
3018
0
}
3019
3020
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
3021
  sljit_s32 src, sljit_sw srcw)
3022
177k
{
3023
177k
  CHECK_ERROR();
3024
177k
  CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
3025
177k
  ADJUST_LOCAL_OFFSET(src, srcw);
3026
3027
177k
  CHECK_EXTRA_REGS(src, srcw, (void)0);
3028
3029
177k
  switch (op) {
3030
177k
  case SLJIT_FAST_RETURN:
3031
177k
    return emit_fast_return(compiler, src, srcw);
3032
0
  case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
3033
    /* Don't adjust shadow stack if it isn't enabled.  */
3034
0
    if (!cpu_has_shadow_stack ())
3035
0
      return SLJIT_SUCCESS;
3036
0
    return adjust_shadow_stack(compiler, src, srcw);
3037
0
  case SLJIT_PREFETCH_L1:
3038
0
  case SLJIT_PREFETCH_L2:
3039
0
  case SLJIT_PREFETCH_L3:
3040
0
  case SLJIT_PREFETCH_ONCE:
3041
0
    return emit_prefetch(compiler, op, src, srcw);
3042
177k
  }
3043
3044
0
  return SLJIT_SUCCESS;
3045
177k
}
3046
3047
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op,
3048
  sljit_s32 dst, sljit_sw dstw)
3049
157k
{
3050
157k
  CHECK_ERROR();
3051
157k
  CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw));
3052
157k
  ADJUST_LOCAL_OFFSET(dst, dstw);
3053
3054
157k
  CHECK_EXTRA_REGS(dst, dstw, (void)0);
3055
3056
157k
  switch (op) {
3057
157k
  case SLJIT_FAST_ENTER:
3058
157k
    return emit_fast_enter(compiler, dst, dstw);
3059
0
  case SLJIT_GET_RETURN_ADDRESS:
3060
0
    return sljit_emit_get_return_address(compiler, dst, dstw);
3061
157k
  }
3062
3063
0
  return SLJIT_SUCCESS;
3064
157k
}
3065
3066
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg)
3067
837k
{
3068
837k
  CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));
3069
3070
837k
  if (type == SLJIT_GP_REGISTER) {
3071
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3072
    if (reg >= SLJIT_R3 && reg <= SLJIT_R8)
3073
      return -1;
3074
#endif /* SLJIT_CONFIG_X86_32 */
3075
620k
    return reg_map[reg];
3076
620k
  }
3077
3078
217k
  if (type != SLJIT_FLOAT_REGISTER && type != SLJIT_SIMD_REG_128 && type != SLJIT_SIMD_REG_256 && type != SLJIT_SIMD_REG_512)
3079
0
    return -1;
3080
3081
217k
  return freg_map[reg];
3082
217k
}
3083
3084
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
3085
  void *instruction, sljit_u32 size)
3086
194k
{
3087
194k
  sljit_u8 *inst;
3088
3089
194k
  CHECK_ERROR();
3090
194k
  CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
3091
3092
194k
  inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
3093
194k
  FAIL_IF(!inst);
3094
194k
  INC_SIZE(size);
3095
194k
  SLJIT_MEMCPY(inst, instruction, size);
3096
194k
  return SLJIT_SUCCESS;
3097
194k
}
3098
3099
/* --------------------------------------------------------------------- */
3100
/*  Floating point operators                                             */
3101
/* --------------------------------------------------------------------- */
3102
3103
/* Alignment(3) + 4 * 16 bytes. */
3104
static sljit_u32 sse2_data[3 + (4 * 4)];
3105
static sljit_u32 *sse2_buffer;
3106
3107
static void init_compiler(void)
3108
1
{
3109
1
  get_cpu_features();
3110
3111
  /* Align to 16 bytes. */
3112
1
  sse2_buffer = (sljit_u32*)(((sljit_uw)sse2_data + 15) & ~(sljit_uw)0xf);
3113
3114
  /* Single precision constants (each constant is 16 byte long). */
3115
1
  sse2_buffer[0] = 0x80000000;
3116
1
  sse2_buffer[4] = 0x7fffffff;
3117
  /* Double precision constants (each constant is 16 byte long). */
3118
1
  sse2_buffer[8] = 0;
3119
1
  sse2_buffer[9] = 0x80000000;
3120
1
  sse2_buffer[12] = 0xffffffff;
3121
1
  sse2_buffer[13] = 0x7fffffff;
3122
1
}
3123
3124
static sljit_s32 emit_groupf(struct sljit_compiler *compiler,
3125
  sljit_uw op,
3126
  sljit_s32 dst, sljit_s32 src, sljit_sw srcw)
3127
147M
{
3128
147M
  sljit_u8 *inst = emit_x86_instruction(compiler, 2 | (op & ~(sljit_uw)0xff), dst, 0, src, srcw);
3129
147M
  FAIL_IF(!inst);
3130
147M
  inst[0] = GROUP_0F;
3131
147M
  inst[1] = op & 0xff;
3132
147M
  return SLJIT_SUCCESS;
3133
147M
}
3134
3135
static sljit_s32 emit_groupf_ext(struct sljit_compiler *compiler,
3136
  sljit_uw op,
3137
  sljit_s32 dst, sljit_s32 src, sljit_sw srcw)
3138
0
{
3139
0
  sljit_u8 *inst;
3140
3141
0
  SLJIT_ASSERT((op & EX86_SSE2) && ((op & VEX_OP_0F38) || (op & VEX_OP_0F3A)));
3142
3143
0
  inst = emit_x86_instruction(compiler, 3 | (op & ~((sljit_uw)0xff | VEX_OP_0F38 | VEX_OP_0F3A)), dst, 0, src, srcw);
3144
0
  FAIL_IF(!inst);
3145
0
  inst[0] = GROUP_0F;
3146
0
  inst[1] = U8((op & VEX_OP_0F38) ? 0x38 : 0x3A);
3147
0
  inst[2] = op & 0xff;
3148
0
  return SLJIT_SUCCESS;
3149
0
}
3150
3151
static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,
3152
  sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw)
3153
0
{
3154
0
  return emit_groupf(compiler, MOVSD_x_xm | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, dst, src, srcw);
3155
0
}
3156
3157
static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,
3158
  sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src)
3159
0
{
3160
0
  return emit_groupf(compiler, MOVSD_xm_x | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, src, dst, dstw);
3161
0
}
3162
3163
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
3164
  sljit_s32 dst, sljit_sw dstw,
3165
  sljit_s32 src, sljit_sw srcw)
3166
0
{
3167
0
  sljit_s32 dst_r;
3168
3169
0
  CHECK_EXTRA_REGS(dst, dstw, (void)0);
3170
0
  dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
3171
3172
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3173
0
  if (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64)
3174
0
    compiler->mode32 = 0;
3175
0
#endif
3176
3177
0
  FAIL_IF(emit_groupf(compiler, CVTTSD2SI_r_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP2, dst_r, src, srcw));
3178
3179
0
  if (dst & SLJIT_MEM)
3180
0
    return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
3181
0
  return SLJIT_SUCCESS;
3182
0
}
3183
3184
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
3185
  sljit_s32 dst, sljit_sw dstw,
3186
  sljit_s32 src, sljit_sw srcw)
3187
0
{
3188
0
  sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
3189
3190
0
  CHECK_EXTRA_REGS(src, srcw, (void)0);
3191
3192
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3193
0
  if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)
3194
0
    compiler->mode32 = 0;
3195
0
#endif
3196
3197
0
  if (src == SLJIT_IMM) {
3198
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3199
0
    if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
3200
0
      srcw = (sljit_s32)srcw;
3201
0
#endif
3202
0
    EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
3203
0
    src = TMP_REG1;
3204
0
    srcw = 0;
3205
0
  }
3206
3207
0
  FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP1, dst_r, src, srcw));
3208
3209
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3210
0
  compiler->mode32 = 1;
3211
0
#endif
3212
0
  if (dst_r == TMP_FREG)
3213
0
    return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
3214
0
  return SLJIT_SUCCESS;
3215
0
}
3216
3217
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
3218
  sljit_s32 src1, sljit_sw src1w,
3219
  sljit_s32 src2, sljit_sw src2w)
3220
0
{
3221
0
  switch (GET_FLAG_TYPE(op)) {
3222
0
  case SLJIT_ORDERED_EQUAL:
3223
    /* Also: SLJIT_UNORDERED_OR_NOT_EQUAL */
3224
0
    FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w));
3225
0
    FAIL_IF(emit_groupf(compiler, CMPS_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, TMP_FREG, src2, src2w));
3226
3227
    /* EQ */
3228
0
    FAIL_IF(emit_byte(compiler, 0));
3229
3230
0
    src1 = TMP_FREG;
3231
0
    src2 = TMP_FREG;
3232
0
    src2w = 0;
3233
0
    break;
3234
3235
0
  case SLJIT_ORDERED_LESS:
3236
0
  case SLJIT_UNORDERED_OR_GREATER:
3237
    /* Also: SLJIT_UNORDERED_OR_GREATER_EQUAL, SLJIT_ORDERED_LESS_EQUAL  */
3238
0
    if (!FAST_IS_REG(src2)) {
3239
0
      FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src2, src2w));
3240
0
      src2 = TMP_FREG;
3241
0
    }
3242
3243
0
    return emit_groupf(compiler, UCOMISD_x_xm | EX86_SELECT_66(op) | EX86_SSE2, src2, src1, src1w);
3244
0
  }
3245
3246
0
  if (!FAST_IS_REG(src1)) {
3247
0
    FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w));
3248
0
    src1 = TMP_FREG;
3249
0
  }
3250
3251
0
  return emit_groupf(compiler, UCOMISD_x_xm | EX86_SELECT_66(op) | EX86_SSE2, src1, src2, src2w);
3252
0
}
3253
3254
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
3255
  sljit_s32 dst, sljit_sw dstw,
3256
  sljit_s32 src, sljit_sw srcw)
3257
0
{
3258
0
  sljit_s32 dst_r;
3259
0
  sljit_u8 *inst;
3260
3261
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3262
0
  compiler->mode32 = 1;
3263
0
#endif
3264
3265
0
  CHECK_ERROR();
3266
0
  SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
3267
3268
0
  if (GET_OPCODE(op) == SLJIT_MOV_F64) {
3269
0
    if (FAST_IS_REG(dst))
3270
0
      return emit_sse2_load(compiler, op & SLJIT_32, dst, src, srcw);
3271
0
    if (FAST_IS_REG(src))
3272
0
      return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, src);
3273
0
    FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src, srcw));
3274
0
    return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
3275
0
  }
3276
3277
0
  if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) {
3278
0
    dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
3279
0
    if (FAST_IS_REG(src)) {
3280
      /* We overwrite the high bits of source. From SLJIT point of view,
3281
         this is not an issue.
3282
         Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */
3283
0
      FAIL_IF(emit_groupf(compiler, UNPCKLPD_x_xm | ((op & SLJIT_32) ? EX86_PREF_66 : 0) | EX86_SSE2, src, src, 0));
3284
0
    } else {
3285
0
      FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_32), TMP_FREG, src, srcw));
3286
0
      src = TMP_FREG;
3287
0
    }
3288
3289
0
    FAIL_IF(emit_groupf(compiler, CVTPD2PS_x_xm | ((op & SLJIT_32) ? EX86_PREF_66 : 0) | EX86_SSE2, dst_r, src, 0));
3290
0
    if (dst_r == TMP_FREG)
3291
0
      return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
3292
0
    return SLJIT_SUCCESS;
3293
0
  }
3294
3295
0
  if (FAST_IS_REG(dst)) {
3296
0
    dst_r = (dst == src) ? TMP_FREG : dst;
3297
3298
0
    if (src & SLJIT_MEM)
3299
0
      FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src, srcw));
3300
3301
0
    FAIL_IF(emit_groupf(compiler, PCMPEQD_x_xm | EX86_PREF_66 | EX86_SSE2, dst_r, dst_r, 0));
3302
3303
0
    inst = emit_x86_instruction(compiler, 2 | EX86_PREF_66 | EX86_SSE2_OP2, 0, 0, dst_r, 0);
3304
0
    inst[0] = GROUP_0F;
3305
    /* Same as PSRLD_x / PSRLQ_x */
3306
0
    inst[1] = (op & SLJIT_32) ? PSLLD_x_i8 : PSLLQ_x_i8;
3307
3308
0
    if (GET_OPCODE(op) == SLJIT_ABS_F64) {
3309
0
      inst[2] |= 2 << 3;
3310
0
      FAIL_IF(emit_byte(compiler, 1));
3311
0
    } else {
3312
0
      inst[2] |= 6 << 3;
3313
0
      FAIL_IF(emit_byte(compiler, ((op & SLJIT_32) ? 31 : 63)));
3314
0
    }
3315
3316
0
    if (dst_r != TMP_FREG)
3317
0
      dst_r = (src & SLJIT_MEM) ? TMP_FREG : src;
3318
0
    return emit_groupf(compiler, (GET_OPCODE(op) == SLJIT_NEG_F64 ? XORPD_x_xm : ANDPD_x_xm) | EX86_SSE2, dst, dst_r, 0);
3319
0
  }
3320
3321
0
  FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src, srcw));
3322
3323
0
  switch (GET_OPCODE(op)) {
3324
0
  case SLJIT_NEG_F64:
3325
0
    FAIL_IF(emit_groupf(compiler, XORPD_x_xm | EX86_SELECT_66(op) | EX86_SSE2, TMP_FREG, SLJIT_MEM0(), (sljit_sw)((op & SLJIT_32) ? sse2_buffer : sse2_buffer + 8)));
3326
0
    break;
3327
3328
0
  case SLJIT_ABS_F64:
3329
0
    FAIL_IF(emit_groupf(compiler, ANDPD_x_xm | EX86_SELECT_66(op) | EX86_SSE2, TMP_FREG, SLJIT_MEM0(), (sljit_sw)((op & SLJIT_32) ? sse2_buffer + 4 : sse2_buffer + 12)));
3330
0
    break;
3331
0
  }
3332
3333
0
  return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
3334
0
}
3335
3336
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
3337
  sljit_s32 dst, sljit_sw dstw,
3338
  sljit_s32 src1, sljit_sw src1w,
3339
  sljit_s32 src2, sljit_sw src2w)
3340
0
{
3341
0
  sljit_s32 dst_r;
3342
0
3343
0
  CHECK_ERROR();
3344
0
  CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
3345
0
  ADJUST_LOCAL_OFFSET(dst, dstw);
3346
0
  ADJUST_LOCAL_OFFSET(src1, src1w);
3347
0
  ADJUST_LOCAL_OFFSET(src2, src2w);
3348
0
3349
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3350
0
  compiler->mode32 = 1;
3351
0
#endif
3352
0
3353
0
  if (FAST_IS_REG(dst)) {
3354
0
    dst_r = dst;
3355
0
    if (dst == src1)
3356
0
      ; /* Do nothing here. */
3357
0
    else if (dst == src2 && (GET_OPCODE(op) == SLJIT_ADD_F64 || GET_OPCODE(op) == SLJIT_MUL_F64)) {
3358
0
      /* Swap arguments. */
3359
0
      src2 = src1;
3360
0
      src2w = src1w;
3361
0
    } else if (dst != src2)
3362
0
      FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, dst_r, src1, src1w));
3363
0
    else {
3364
0
      dst_r = TMP_FREG;
3365
0
      FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w));
3366
0
    }
3367
0
  } else {
3368
0
    dst_r = TMP_FREG;
3369
0
    FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w));
3370
0
  }
3371
0
3372
0
  switch (GET_OPCODE(op)) {
3373
0
  case SLJIT_ADD_F64:
3374
0
    FAIL_IF(emit_groupf(compiler, ADDSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, src2, src2w));
3375
0
    break;
3376
0
3377
0
  case SLJIT_SUB_F64:
3378
0
    FAIL_IF(emit_groupf(compiler, SUBSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, src2, src2w));
3379
0
    break;
3380
0
3381
0
  case SLJIT_MUL_F64:
3382
0
    FAIL_IF(emit_groupf(compiler, MULSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, src2, src2w));
3383
0
    break;
3384
0
3385
0
  case SLJIT_DIV_F64:
3386
0
    FAIL_IF(emit_groupf(compiler, DIVSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, src2, src2w));
3387
0
    break;
3388
0
  }
3389
0
3390
0
  if (dst_r != dst)
3391
0
    return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
3392
0
  return SLJIT_SUCCESS;
3393
0
}
3394
3395
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2r(struct sljit_compiler *compiler, sljit_s32 op,
3396
  sljit_s32 dst_freg,
3397
  sljit_s32 src1, sljit_sw src1w,
3398
  sljit_s32 src2, sljit_sw src2w)
3399
0
{
3400
0
  sljit_uw pref;
3401
0
3402
0
  CHECK_ERROR();
3403
0
  CHECK(check_sljit_emit_fop2r(compiler, op, dst_freg, src1, src1w, src2, src2w));
3404
0
  ADJUST_LOCAL_OFFSET(src1, src1w);
3405
0
  ADJUST_LOCAL_OFFSET(src2, src2w);
3406
0
3407
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3408
0
  compiler->mode32 = 1;
3409
0
#endif
3410
0
3411
0
  if (dst_freg == src1) {
3412
0
    FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src2, src2w));
3413
0
    pref = EX86_SELECT_66(op) | EX86_SSE2;
3414
0
    FAIL_IF(emit_groupf(compiler, XORPD_x_xm | pref, TMP_FREG, src1, src1w));
3415
0
    FAIL_IF(emit_groupf(compiler, ANDPD_x_xm | pref, TMP_FREG, SLJIT_MEM0(), (sljit_sw)((op & SLJIT_32) ? sse2_buffer : sse2_buffer + 8)));
3416
0
    return emit_groupf(compiler, XORPD_x_xm | pref, dst_freg, TMP_FREG, 0);
3417
0
  }
3418
0
3419
0
  if (src1 & SLJIT_MEM) {
3420
0
    FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w));
3421
0
    src1 = TMP_FREG;
3422
0
    src1w = 0;
3423
0
  }
3424
0
3425
0
  if (dst_freg != src2)
3426
0
    FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, dst_freg, src2, src2w));
3427
0
3428
0
  pref = EX86_SELECT_66(op) | EX86_SSE2;
3429
0
  FAIL_IF(emit_groupf(compiler, XORPD_x_xm | pref, dst_freg, src1, src1w));
3430
0
  FAIL_IF(emit_groupf(compiler, ANDPD_x_xm | pref, dst_freg, SLJIT_MEM0(), (sljit_sw)((op & SLJIT_32) ? sse2_buffer : sse2_buffer + 8)));
3431
0
  return emit_groupf(compiler, XORPD_x_xm | pref, dst_freg, src1, src1w);
3432
0
}
3433
3434
/* --------------------------------------------------------------------- */
3435
/*  Conditional instructions                                             */
3436
/* --------------------------------------------------------------------- */
3437
3438
SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
3439
250M
{
3440
250M
  sljit_u8 *inst;
3441
250M
  struct sljit_label *label;
3442
3443
250M
  CHECK_ERROR_PTR();
3444
250M
  CHECK_PTR(check_sljit_emit_label(compiler));
3445
3446
250M
  if (compiler->last_label && compiler->last_label->size == compiler->size)
3447
63.5M
    return compiler->last_label;
3448
3449
186M
  label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
3450
186M
  PTR_FAIL_IF(!label);
3451
186M
  set_label(label, compiler);
3452
3453
186M
  inst = (sljit_u8*)ensure_buf(compiler, 1);
3454
186M
  PTR_FAIL_IF(!inst);
3455
186M
  inst[0] = SLJIT_INST_LABEL;
3456
3457
186M
  return label;
3458
186M
}
3459
3460
SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_aligned_label(struct sljit_compiler *compiler,
3461
  sljit_s32 alignment, struct sljit_read_only_buffer *buffers)
3462
0
{
3463
0
  sljit_uw mask, size;
3464
0
  sljit_u8 *inst;
3465
0
  struct sljit_label *label;
3466
0
  struct sljit_label *next_label;
3467
0
  struct sljit_extended_label *ext_label;
3468
0
3469
0
  CHECK_ERROR_PTR();
3470
0
  CHECK_PTR(check_sljit_emit_aligned_label(compiler, alignment, buffers));
3471
0
3472
0
  sljit_reset_read_only_buffers(buffers);
3473
0
3474
0
  if (alignment <= SLJIT_LABEL_ALIGN_1) {
3475
0
    SLJIT_SKIP_CHECKS(compiler);
3476
0
    label = sljit_emit_label(compiler);
3477
0
    PTR_FAIL_IF(!label);
3478
0
  } else {
3479
0
    /* The used space is filled with NOPs. */
3480
0
    mask = ((sljit_uw)1 << alignment) - 1;
3481
0
    compiler->size += mask;
3482
0
3483
0
    inst = (sljit_u8*)ensure_buf(compiler, 1);
3484
0
    PTR_FAIL_IF(!inst);
3485
0
    inst[0] = SLJIT_INST_LABEL;
3486
0
3487
0
    ext_label = (struct sljit_extended_label*)ensure_abuf(compiler, sizeof(struct sljit_extended_label));
3488
0
    PTR_FAIL_IF(!ext_label);
3489
0
    set_extended_label(ext_label, compiler, SLJIT_LABEL_ALIGNED, mask);
3490
0
    label = &ext_label->label;
3491
0
  }
3492
0
3493
0
  if (buffers == NULL)
3494
0
    return label;
3495
0
3496
0
  next_label = label;
3497
0
3498
0
  while (1) {
3499
0
    buffers->u.label = next_label;
3500
0
    size = buffers->size;
3501
0
3502
0
    while (size >= 4) {
3503
0
      inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
3504
0
      PTR_FAIL_IF(!inst);
3505
0
      INC_SIZE(4);
3506
0
      inst[0] = NOP;
3507
0
      inst[1] = NOP;
3508
0
      inst[2] = NOP;
3509
0
      inst[3] = NOP;
3510
0
      size -= 4;
3511
0
    }
3512
0
3513
0
    if (size > 0) {
3514
0
      inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
3515
0
      PTR_FAIL_IF(!inst);
3516
0
      INC_SIZE(size);
3517
0
3518
0
      do {
3519
0
        *inst++ = NOP;
3520
0
      } while (--size != 0);
3521
0
    }
3522
0
3523
0
    buffers = buffers->next;
3524
0
3525
0
    if (buffers == NULL)
3526
0
      break;
3527
0
3528
0
    SLJIT_SKIP_CHECKS(compiler);
3529
0
    next_label = sljit_emit_label(compiler);
3530
0
    PTR_FAIL_IF(!next_label);
3531
0
  }
3532
0
3533
0
  return label;
3534
0
}
3535
3536
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
3537
541M
{
3538
541M
  sljit_u8 *inst;
3539
541M
  struct sljit_jump *jump;
3540
3541
541M
  CHECK_ERROR_PTR();
3542
541M
  CHECK_PTR(check_sljit_emit_jump(compiler, type));
3543
3544
541M
  jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
3545
541M
  PTR_FAIL_IF_NULL(jump);
3546
541M
  set_jump(jump, compiler, (sljit_u32)((type & SLJIT_REWRITABLE_JUMP) | ((type & 0xff) << TYPE_SHIFT)));
3547
541M
  type &= 0xff;
3548
3549
541M
  jump->addr = compiler->size;
3550
  /* Worst case size. */
3551
541M
  compiler->size += (type >= SLJIT_JUMP) ? JUMP_MAX_SIZE : CJUMP_MAX_SIZE;
3552
541M
  inst = (sljit_u8*)ensure_buf(compiler, 1);
3553
541M
  PTR_FAIL_IF_NULL(inst);
3554
3555
541M
  inst[0] = SLJIT_INST_JUMP;
3556
541M
  return jump;
3557
541M
}
3558
3559
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
3560
7.18M
{
3561
7.18M
  sljit_u8 *inst;
3562
7.18M
  struct sljit_jump *jump;
3563
3564
7.18M
  CHECK_ERROR();
3565
7.18M
  CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
3566
7.18M
  ADJUST_LOCAL_OFFSET(src, srcw);
3567
3568
7.18M
  CHECK_EXTRA_REGS(src, srcw, (void)0);
3569
3570
7.18M
  if (src == SLJIT_IMM) {
3571
6.82M
    jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
3572
6.82M
    FAIL_IF_NULL(jump);
3573
6.82M
    set_jump(jump, compiler, (sljit_u32)(JUMP_ADDR | (type << TYPE_SHIFT)));
3574
6.82M
    jump->u.target = (sljit_uw)srcw;
3575
3576
6.82M
    jump->addr = compiler->size;
3577
    /* Worst case size. */
3578
6.82M
    compiler->size += JUMP_MAX_SIZE;
3579
6.82M
    inst = (sljit_u8*)ensure_buf(compiler, 1);
3580
6.82M
    FAIL_IF_NULL(inst);
3581
3582
6.82M
    inst[0] = SLJIT_INST_JUMP;
3583
6.82M
  } else {
3584
363k
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3585
    /* REX_W is not necessary (src is not immediate). */
3586
363k
    compiler->mode32 = 1;
3587
363k
#endif
3588
363k
    inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
3589
363k
    FAIL_IF(!inst);
3590
363k
    inst[0] = GROUP_FF;
3591
363k
    inst[1] = U8(inst[1] | ((type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm));
3592
363k
  }
3593
7.18M
  return SLJIT_SUCCESS;
3594
7.18M
}
3595
3596
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
3597
  sljit_s32 dst, sljit_sw dstw,
3598
  sljit_s32 type)
3599
11.5M
{
3600
11.5M
  sljit_u8 *inst;
3601
11.5M
  sljit_u8 cond_set;
3602
11.5M
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3603
11.5M
  sljit_s32 reg;
3604
11.5M
  sljit_uw size;
3605
11.5M
#endif /* !SLJIT_CONFIG_X86_64 */
3606
  /* ADJUST_LOCAL_OFFSET and CHECK_EXTRA_REGS might overwrite these values. */
3607
11.5M
  sljit_s32 dst_save = dst;
3608
11.5M
  sljit_sw dstw_save = dstw;
3609
3610
11.5M
  CHECK_ERROR();
3611
11.5M
  CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
3612
3613
11.5M
  ADJUST_LOCAL_OFFSET(dst, dstw);
3614
11.5M
  CHECK_EXTRA_REGS(dst, dstw, (void)0);
3615
3616
  /* setcc = jcc + 0x10. */
3617
11.5M
  cond_set = U8(get_jump_code((sljit_uw)type) + 0x10);
3618
3619
11.5M
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3620
11.5M
  if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst)) {
3621
4.40M
    size = 3 + 2;
3622
4.40M
    if (reg_map[TMP_REG1] >= 4)
3623
0
      size += 1 + 1;
3624
4.40M
    else if (reg_map[dst] >= 4)
3625
0
      size++;
3626
3627
4.40M
    inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
3628
4.40M
    FAIL_IF(!inst);
3629
4.40M
    INC_SIZE(size);
3630
    /* Set low register to conditional flag. */
3631
4.40M
    if (reg_map[TMP_REG1] >= 4)
3632
0
      *inst++ = (reg_map[TMP_REG1] <= 7) ? REX : REX_B;
3633
3634
4.40M
    inst[0] = GROUP_0F;
3635
4.40M
    inst[1] = cond_set;
3636
4.40M
    inst[2] = MOD_REG | reg_lmap[TMP_REG1];
3637
4.40M
    inst += 3;
3638
3639
4.40M
    if (reg_map[TMP_REG1] >= 4 || reg_map[dst] >= 4)
3640
0
      *inst++ = U8(REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B));
3641
3642
4.40M
    inst[0] = OR_rm8_r8;
3643
4.40M
    inst[1] = U8(MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst]);
3644
4.40M
    return SLJIT_SUCCESS;
3645
4.40M
  }
3646
3647
7.17M
  reg = (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG1;
3648
3649
7.17M
  size = 3 + (reg_map[reg] >= 4) + 4;
3650
7.17M
  inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
3651
7.17M
  FAIL_IF(!inst);
3652
7.17M
  INC_SIZE(size);
3653
  /* Set low register to conditional flag. */
3654
3655
7.17M
  if (reg_map[reg] >= 4)
3656
1.74k
    *inst++ = (reg_map[reg] <= 7) ? REX : REX_B;
3657
3658
7.17M
  inst[0] = GROUP_0F;
3659
7.17M
  inst[1] = cond_set;
3660
7.17M
  inst[2] = MOD_REG | reg_lmap[reg];
3661
3662
7.17M
  inst[3] = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
3663
  /* The movzx instruction does not affect flags. */
3664
7.17M
  inst[4] = GROUP_0F;
3665
7.17M
  inst[5] = MOVZX_r_rm8;
3666
7.17M
  inst[6] = U8(MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg]);
3667
3668
7.17M
  if (reg != TMP_REG1)
3669
5.55M
    return SLJIT_SUCCESS;
3670
3671
1.62M
  if (GET_OPCODE(op) < SLJIT_ADD) {
3672
0
    compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV;
3673
0
    return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
3674
0
  }
3675
3676
1.62M
  SLJIT_SKIP_CHECKS(compiler);
3677
1.62M
  return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
3678
3679
#else /* !SLJIT_CONFIG_X86_64 */
3680
  SLJIT_ASSERT(reg_map[TMP_REG1] < 4);
3681
3682
  /* The SLJIT_CONFIG_X86_32 code path starts here. */
3683
  if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst) && reg_map[dst] <= 4) {
3684
    /* Low byte is accessible. */
3685
    inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 3);
3686
    FAIL_IF(!inst);
3687
    INC_SIZE(3 + 3);
3688
    /* Set low byte to conditional flag. */
3689
    inst[0] = GROUP_0F;
3690
    inst[1] = cond_set;
3691
    inst[2] = U8(MOD_REG | reg_map[dst]);
3692
3693
    inst[3] = GROUP_0F;
3694
    inst[4] = MOVZX_r_rm8;
3695
    inst[5] = U8(MOD_REG | (reg_map[dst] << 3) | reg_map[dst]);
3696
    return SLJIT_SUCCESS;
3697
  }
3698
3699
  if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && reg_map[dst] <= 4) {
3700
    inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 2);
3701
    FAIL_IF(!inst);
3702
    INC_SIZE(3 + 2);
3703
3704
    /* Set low byte to conditional flag. */
3705
    inst[0] = GROUP_0F;
3706
    inst[1] = cond_set;
3707
    inst[2] = U8(MOD_REG | reg_map[TMP_REG1]);
3708
3709
    inst[3] = OR_rm8_r8;
3710
    inst[4] = U8(MOD_REG | (reg_map[TMP_REG1] << 3) | reg_map[dst]);
3711
    return SLJIT_SUCCESS;
3712
  }
3713
3714
  inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 3);
3715
  FAIL_IF(!inst);
3716
  INC_SIZE(3 + 3);
3717
  /* Set low byte to conditional flag. */
3718
  inst[0] = GROUP_0F;
3719
  inst[1] = cond_set;
3720
  inst[2] = U8(MOD_REG | reg_map[TMP_REG1]);
3721
3722
  inst[3] = GROUP_0F;
3723
  inst[4] = MOVZX_r_rm8;
3724
  inst[5] = U8(MOD_REG | (reg_map[TMP_REG1] << 3) | reg_map[TMP_REG1]);
3725
3726
  if (GET_OPCODE(op) < SLJIT_ADD)
3727
    return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
3728
3729
  SLJIT_SKIP_CHECKS(compiler);
3730
  return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
3731
#endif /* SLJIT_CONFIG_X86_64 */
3732
1.62M
}
3733
3734
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type,
3735
  sljit_s32 dst_freg,
3736
  sljit_s32 src1, sljit_sw src1w,
3737
  sljit_s32 src2_freg)
3738
0
{
3739
0
  sljit_u8* inst;
3740
0
  sljit_uw size;
3741
0
3742
0
  CHECK_ERROR();
3743
0
  CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg));
3744
0
3745
0
  ADJUST_LOCAL_OFFSET(src1, src1w);
3746
0
3747
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3748
0
  compiler->mode32 = 1;
3749
0
#endif /* SLJIT_CONFIG_X86_64 */
3750
0
3751
0
  if (dst_freg != src2_freg) {
3752
0
    if (dst_freg == src1) {
3753
0
      src1 = src2_freg;
3754
0
      src1w = 0;
3755
0
      type ^= 0x1;
3756
0
    } else
3757
0
      FAIL_IF(emit_sse2_load(compiler, type & SLJIT_32, dst_freg, src2_freg, 0));
3758
0
  }
3759
0
3760
0
  inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
3761
0
  FAIL_IF(!inst);
3762
0
  INC_SIZE(2);
3763
0
  inst[0] = U8(get_jump_code((sljit_uw)(type & ~SLJIT_32) ^ 0x1) - 0x10);
3764
0
3765
0
  size = compiler->size;
3766
0
  FAIL_IF(emit_sse2_load(compiler, type & SLJIT_32, dst_freg, src1, src1w));
3767
0
3768
0
  inst[1] = U8(compiler->size - size);
3769
0
  return SLJIT_SUCCESS;
3770
0
}
3771
3772
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,
3773
  sljit_s32 vreg,
3774
  sljit_s32 srcdst, sljit_sw srcdstw)
3775
108k
{
3776
108k
  sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3777
108k
  sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3778
108k
  sljit_s32 alignment = SLJIT_SIMD_GET_ELEM2_SIZE(type);
3779
108k
  sljit_uw op;
3780
3781
108k
  CHECK_ERROR();
3782
108k
  CHECK(check_sljit_emit_simd_mov(compiler, type, vreg, srcdst, srcdstw));
3783
3784
108k
  ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
3785
3786
108k
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3787
108k
  compiler->mode32 = 1;
3788
108k
#endif /* SLJIT_CONFIG_X86_64 */
3789
3790
108k
  switch (reg_size) {
3791
108k
  case 4:
3792
108k
    op = EX86_SSE2;
3793
108k
    break;
3794
0
  case 5:
3795
0
    if (!(cpu_feature_list & CPU_FEATURE_AVX2))
3796
0
      return SLJIT_ERR_UNSUPPORTED;
3797
0
    op = EX86_SSE2 | VEX_256;
3798
0
    break;
3799
0
  default:
3800
0
    return SLJIT_ERR_UNSUPPORTED;
3801
108k
  }
3802
3803
108k
  if (!(srcdst & SLJIT_MEM))
3804
0
    alignment = reg_size;
3805
3806
108k
  if (type & SLJIT_SIMD_FLOAT) {
3807
0
    if (elem_size == 2 || elem_size == 3) {
3808
0
      op |= alignment >= reg_size ? MOVAPS_x_xm : MOVUPS_x_xm;
3809
3810
0
      if (elem_size == 3)
3811
0
        op |= EX86_PREF_66;
3812
3813
0
      if (type & SLJIT_SIMD_STORE)
3814
0
        op += 1;
3815
0
    } else
3816
0
      return SLJIT_ERR_UNSUPPORTED;
3817
108k
  } else {
3818
108k
    op |= ((type & SLJIT_SIMD_STORE) ? MOVDQA_xm_x : MOVDQA_x_xm)
3819
108k
      | (alignment >= reg_size ? EX86_PREF_66 : EX86_PREF_F3);
3820
108k
  }
3821
3822
108k
  if (type & SLJIT_SIMD_TEST)
3823
0
    return SLJIT_SUCCESS;
3824
3825
108k
  if ((op & VEX_256) || ((cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX)))
3826
0
    return emit_vex_instruction(compiler, op, vreg, 0, srcdst, srcdstw);
3827
3828
108k
  return emit_groupf(compiler, op, vreg, srcdst, srcdstw);
3829
108k
}
3830
3831
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type,
3832
  sljit_s32 vreg,
3833
  sljit_s32 src, sljit_sw srcw)
3834
0
{
3835
0
  sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3836
0
  sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3837
0
  sljit_s32 use_vex = (cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX);
3838
0
  sljit_u8 *inst;
3839
0
  sljit_u8 opcode = 0;
3840
0
  sljit_uw op;
3841
0
3842
0
  CHECK_ERROR();
3843
0
  CHECK(check_sljit_emit_simd_replicate(compiler, type, vreg, src, srcw));
3844
0
3845
0
  ADJUST_LOCAL_OFFSET(src, srcw);
3846
0
3847
0
  if (!(type & SLJIT_SIMD_FLOAT)) {
3848
0
    CHECK_EXTRA_REGS(src, srcw, (void)0);
3849
0
  }
3850
0
3851
0
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3852
0
  if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : (elem_size > 2))
3853
0
    return SLJIT_ERR_UNSUPPORTED;
3854
0
#else /* !SLJIT_CONFIG_X86_32 */
3855
0
  compiler->mode32 = 1;
3856
0
3857
0
  if (elem_size > 3 || ((type & SLJIT_SIMD_FLOAT) && elem_size < 2))
3858
0
    return SLJIT_ERR_UNSUPPORTED;
3859
0
#endif /* SLJIT_CONFIG_X86_32 */
3860
0
3861
0
  if (reg_size != 4 && (reg_size != 5 || !(cpu_feature_list & CPU_FEATURE_AVX2)))
3862
0
    return SLJIT_ERR_UNSUPPORTED;
3863
0
3864
0
  if (type & SLJIT_SIMD_TEST)
3865
0
    return SLJIT_SUCCESS;
3866
0
3867
0
  if (reg_size == 5)
3868
0
    use_vex = 1;
3869
0
3870
0
  if (use_vex && src != SLJIT_IMM) {
3871
0
    op = 0;
3872
0
3873
0
    switch (elem_size) {
3874
0
    case 0:
3875
0
      if (cpu_feature_list & CPU_FEATURE_AVX2)
3876
0
        op = VPBROADCASTB_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
3877
0
      break;
3878
0
    case 1:
3879
0
      if (cpu_feature_list & CPU_FEATURE_AVX2)
3880
0
        op = VPBROADCASTW_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
3881
0
      break;
3882
0
    case 2:
3883
0
      if (type & SLJIT_SIMD_FLOAT) {
3884
0
        if ((cpu_feature_list & CPU_FEATURE_AVX2) || ((cpu_feature_list & CPU_FEATURE_AVX) && (src & SLJIT_MEM)))
3885
0
          op = VBROADCASTSS_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
3886
0
      } else if (cpu_feature_list & CPU_FEATURE_AVX2)
3887
0
        op = VPBROADCASTD_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
3888
0
      break;
3889
0
    default:
3890
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3891
0
      if (!(type & SLJIT_SIMD_FLOAT)) {
3892
0
        if (cpu_feature_list & CPU_FEATURE_AVX2)
3893
0
          op = VPBROADCASTQ_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
3894
0
        break;
3895
0
      }
3896
0
#endif /* SLJIT_CONFIG_X86_64 */
3897
0
3898
0
      if (reg_size == 5)
3899
0
        op = VBROADCASTSD_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
3900
0
      break;
3901
0
    }
3902
0
3903
0
    if (op != 0) {
3904
0
      if (!(src & SLJIT_MEM) && !(type & SLJIT_SIMD_FLOAT)) {
3905
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3906
0
        if (elem_size >= 3)
3907
0
          compiler->mode32 = 0;
3908
0
#endif /* SLJIT_CONFIG_X86_64 */
3909
0
        FAIL_IF(emit_vex_instruction(compiler, MOVD_x_rm | VEX_AUTO_W | EX86_PREF_66 | EX86_SSE2_OP1, vreg, 0, src, srcw));
3910
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3911
0
        compiler->mode32 = 1;
3912
0
#endif /* SLJIT_CONFIG_X86_64 */
3913
0
        src = vreg;
3914
0
        srcw = 0;
3915
0
      }
3916
0
3917
0
      if (reg_size == 5)
3918
0
        op |= VEX_256;
3919
0
3920
0
      return emit_vex_instruction(compiler, op, vreg, 0, src, srcw);
3921
0
    }
3922
0
  }
3923
0
3924
0
  if (type & SLJIT_SIMD_FLOAT) {
3925
0
    if (src == SLJIT_IMM) {
3926
0
      if (use_vex)
3927
0
        return emit_vex_instruction(compiler, XORPD_x_xm | (reg_size == 5 ? VEX_256 : 0) | (elem_size == 3 ? EX86_PREF_66 : 0) | EX86_SSE2 | VEX_SSE2_OPV, vreg, vreg, vreg, 0);
3928
0
3929
0
      return emit_groupf(compiler, XORPD_x_xm | (elem_size == 3 ? EX86_PREF_66 : 0) | EX86_SSE2, vreg, vreg, 0);
3930
0
    }
3931
0
3932
0
    SLJIT_ASSERT(reg_size == 4);
3933
0
3934
0
    if (use_vex) {
3935
0
      if (elem_size == 3)
3936
0
        return emit_vex_instruction(compiler, MOVDDUP_x_xm | EX86_PREF_F2 | EX86_SSE2, vreg, 0, src, srcw);
3937
0
3938
0
      SLJIT_ASSERT(!(src & SLJIT_MEM));
3939
0
      FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | EX86_SSE2 | VEX_SSE2_OPV, vreg, src, src, 0));
3940
0
      return emit_byte(compiler, 0);
3941
0
    }
3942
0
3943
0
    if (elem_size == 2 && vreg != src) {
3944
0
      FAIL_IF(emit_sse2_load(compiler, 1, vreg, src, srcw));
3945
0
      src = vreg;
3946
0
      srcw = 0;
3947
0
    }
3948
0
3949
0
    op = (elem_size == 2 ? SHUFPS_x_xm : MOVDDUP_x_xm) | (elem_size == 2 ? 0 : EX86_PREF_F2) | EX86_SSE2;
3950
0
    FAIL_IF(emit_groupf(compiler, op, vreg, src, srcw));
3951
0
3952
0
    if (elem_size == 2)
3953
0
      return emit_byte(compiler, 0);
3954
0
    return SLJIT_SUCCESS;
3955
0
  }
3956
0
3957
0
  if (src == SLJIT_IMM) {
3958
0
    if (elem_size == 0) {
3959
0
      srcw = (sljit_u8)srcw;
3960
0
      srcw |= srcw << 8;
3961
0
      srcw |= srcw << 16;
3962
0
      elem_size = 2;
3963
0
    } else if (elem_size == 1) {
3964
0
      srcw = (sljit_u16)srcw;
3965
0
      srcw |= srcw << 16;
3966
0
      elem_size = 2;
3967
0
    }
3968
0
3969
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3970
0
    if (elem_size == 2 && (sljit_s32)srcw == -1)
3971
0
      srcw = -1;
3972
0
#endif /* SLJIT_CONFIG_X86_64 */
3973
0
3974
0
    if (srcw == 0 || srcw == -1) {
3975
0
      if (use_vex)
3976
0
        return emit_vex_instruction(compiler, (srcw == 0 ? PXOR_x_xm : PCMPEQD_x_xm) | (reg_size == 5 ? VEX_256 : 0) | EX86_PREF_66 | EX86_SSE2 | VEX_SSE2_OPV, vreg, vreg, vreg, 0);
3977
0
3978
0
      return emit_groupf(compiler, (srcw == 0 ? PXOR_x_xm : PCMPEQD_x_xm) | EX86_PREF_66 | EX86_SSE2, vreg, vreg, 0);
3979
0
    }
3980
0
3981
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3982
0
    if (elem_size == 3)
3983
0
      FAIL_IF(emit_load_imm64(compiler, TMP_REG1, srcw));
3984
0
    else
3985
0
#endif /* SLJIT_CONFIG_X86_64 */
3986
0
      EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
3987
0
3988
0
    src = TMP_REG1;
3989
0
    srcw = 0;
3990
0
3991
0
  }
3992
0
3993
0
  op = 2;
3994
0
  opcode = MOVD_x_rm;
3995
0
3996
0
  switch (elem_size) {
3997
0
  case 0:
3998
0
    if (!FAST_IS_REG(src)) {
3999
0
      opcode = 0x3a /* Prefix of PINSRB_x_rm_i8. */;
4000
0
      op = 3;
4001
0
    }
4002
0
    break;
4003
0
  case 1:
4004
0
    if (!FAST_IS_REG(src))
4005
0
      opcode = PINSRW_x_rm_i8;
4006
0
    break;
4007
0
  case 2:
4008
0
    break;
4009
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4010
0
  case 3:
4011
0
    /* MOVQ */
4012
0
    compiler->mode32 = 0;
4013
0
    break;
4014
0
#endif /* SLJIT_CONFIG_X86_64 */
4015
0
  }
4016
0
4017
0
  if (use_vex) {
4018
0
    if (opcode != MOVD_x_rm) {
4019
0
      op = (opcode == 0x3a) ? (PINSRB_x_rm_i8 | VEX_OP_0F3A) : opcode;
4020
0
      FAIL_IF(emit_vex_instruction(compiler, op | EX86_PREF_66 | EX86_SSE2_OP1 | VEX_SSE2_OPV, vreg, vreg, src, srcw));
4021
0
    } else
4022
0
      FAIL_IF(emit_vex_instruction(compiler, MOVD_x_rm | VEX_AUTO_W | EX86_PREF_66 | EX86_SSE2_OP1, vreg, 0, src, srcw));
4023
0
  } else {
4024
0
    inst = emit_x86_instruction(compiler, op | EX86_PREF_66 | EX86_SSE2_OP1, vreg, 0, src, srcw);
4025
0
    FAIL_IF(!inst);
4026
0
    inst[0] = GROUP_0F;
4027
0
    inst[1] = opcode;
4028
0
4029
0
    if (op == 3) {
4030
0
      SLJIT_ASSERT(opcode == 0x3a);
4031
0
      inst[2] = PINSRB_x_rm_i8;
4032
0
    }
4033
0
  }
4034
0
4035
0
  if ((cpu_feature_list & CPU_FEATURE_AVX2) && use_vex && elem_size >= 2) {
4036
0
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4037
0
    op = VPBROADCASTD_x_xm;
4038
0
#else /* !SLJIT_CONFIG_X86_32 */
4039
0
    op = (elem_size == 3) ? VPBROADCASTQ_x_xm : VPBROADCASTD_x_xm;
4040
0
#endif /* SLJIT_CONFIG_X86_32 */
4041
0
    return emit_vex_instruction(compiler, op | ((reg_size == 5) ? VEX_256 : 0) | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, 0, vreg, 0);
4042
0
  }
4043
0
4044
0
  SLJIT_ASSERT(reg_size == 4);
4045
0
4046
0
  if (opcode != MOVD_x_rm)
4047
0
    FAIL_IF(emit_byte(compiler, 0));
4048
0
4049
0
  switch (elem_size) {
4050
0
  case 0:
4051
0
    if (use_vex) {
4052
0
      FAIL_IF(emit_vex_instruction(compiler, PXOR_x_xm | EX86_PREF_66 | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, TMP_FREG, TMP_FREG, 0));
4053
0
      return emit_vex_instruction(compiler, PSHUFB_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2 | VEX_SSE2_OPV, vreg, vreg, TMP_FREG, 0);
4054
0
    }
4055
0
    FAIL_IF(emit_groupf(compiler, PXOR_x_xm | EX86_PREF_66 | EX86_SSE2, TMP_FREG, TMP_FREG, 0));
4056
0
    return emit_groupf_ext(compiler, PSHUFB_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, TMP_FREG, 0);
4057
0
  case 1:
4058
0
    if (use_vex)
4059
0
      FAIL_IF(emit_vex_instruction(compiler, PSHUFLW_x_xm | EX86_PREF_F2 | EX86_SSE2, vreg, 0, vreg, 0));
4060
0
    else
4061
0
      FAIL_IF(emit_groupf(compiler, PSHUFLW_x_xm | EX86_PREF_F2 | EX86_SSE2, vreg, vreg, 0));
4062
0
    FAIL_IF(emit_byte(compiler, 0));
4063
0
    /* fallthrough */
4064
0
  default:
4065
0
    if (use_vex)
4066
0
      FAIL_IF(emit_vex_instruction(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, vreg, 0, vreg, 0));
4067
0
    else
4068
0
      FAIL_IF(emit_groupf(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, vreg, vreg, 0));
4069
0
    return emit_byte(compiler, 0);
4070
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4071
0
  case 3:
4072
0
    compiler->mode32 = 1;
4073
0
    if (use_vex)
4074
0
      FAIL_IF(emit_vex_instruction(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, vreg, 0, vreg, 0));
4075
0
    else
4076
0
      FAIL_IF(emit_groupf(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, vreg, vreg, 0));
4077
0
    return emit_byte(compiler, 0x44);
4078
0
#endif /* SLJIT_CONFIG_X86_64 */
4079
0
  }
4080
0
}
4081
4082
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type,
4083
  sljit_s32 vreg, sljit_s32 lane_index,
4084
  sljit_s32 srcdst, sljit_sw srcdstw)
4085
59.1k
{
4086
59.1k
  sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4087
59.1k
  sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4088
59.1k
  sljit_s32 use_vex = (cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX);
4089
59.1k
  sljit_u8 *inst;
4090
59.1k
  sljit_u8 opcode = 0;
4091
59.1k
  sljit_uw op;
4092
59.1k
  sljit_s32 vreg_orig = vreg;
4093
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4094
  sljit_s32 srcdst_is_ereg = 0;
4095
  sljit_s32 srcdst_orig = 0;
4096
  sljit_sw srcdstw_orig = 0;
4097
#endif /* SLJIT_CONFIG_X86_32 */
4098
4099
59.1k
  CHECK_ERROR();
4100
59.1k
  CHECK(check_sljit_emit_simd_lane_mov(compiler, type, vreg, lane_index, srcdst, srcdstw));
4101
4102
59.1k
  ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
4103
4104
59.1k
  if (reg_size == 5) {
4105
0
    if (!(cpu_feature_list & CPU_FEATURE_AVX2))
4106
0
      return SLJIT_ERR_UNSUPPORTED;
4107
0
    use_vex = 1;
4108
59.1k
  } else if (reg_size != 4)
4109
0
    return SLJIT_ERR_UNSUPPORTED;
4110
4111
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4112
  if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : elem_size > 2)
4113
    return SLJIT_ERR_UNSUPPORTED;
4114
#else /* SLJIT_CONFIG_X86_32 */
4115
59.1k
  if (elem_size > 3 || ((type & SLJIT_SIMD_FLOAT) && elem_size < 2))
4116
0
    return SLJIT_ERR_UNSUPPORTED;
4117
59.1k
#endif /* SLJIT_CONFIG_X86_32 */
4118
4119
59.1k
  if (type & SLJIT_SIMD_TEST)
4120
0
    return SLJIT_SUCCESS;
4121
4122
59.1k
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4123
59.1k
  compiler->mode32 = 1;
4124
#else /* !SLJIT_CONFIG_X86_64 */
4125
  if (!(type & SLJIT_SIMD_FLOAT)) {
4126
    CHECK_EXTRA_REGS(srcdst, srcdstw, srcdst_is_ereg = 1);
4127
4128
    if ((type & SLJIT_SIMD_STORE) && ((srcdst_is_ereg && elem_size < 2) || (elem_size == 0 && (type & SLJIT_SIMD_LANE_SIGNED) && FAST_IS_REG(srcdst) && reg_map[srcdst] >= 4))) {
4129
      srcdst_orig = srcdst;
4130
      srcdstw_orig = srcdstw;
4131
      srcdst = TMP_REG1;
4132
      srcdstw = 0;
4133
    }
4134
  }
4135
#endif /* SLJIT_CONFIG_X86_64 */
4136
4137
59.1k
  if (type & SLJIT_SIMD_LANE_ZERO) {
4138
59.1k
    if (lane_index == 0) {
4139
59.1k
      if (!(type & SLJIT_SIMD_FLOAT)) {
4140
59.1k
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4141
59.1k
        if (elem_size == 3) {
4142
0
          compiler->mode32 = 0;
4143
0
          elem_size = 2;
4144
0
        }
4145
59.1k
#endif /* SLJIT_CONFIG_X86_64 */
4146
59.1k
        if (srcdst == SLJIT_IMM) {
4147
36.0k
          if (elem_size == 0)
4148
0
            srcdstw = (sljit_u8)srcdstw;
4149
36.0k
          else if (elem_size == 1)
4150
0
            srcdstw = (sljit_u16)srcdstw;
4151
4152
36.0k
          EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcdstw);
4153
36.0k
          srcdst = TMP_REG1;
4154
36.0k
          srcdstw = 0;
4155
36.0k
          elem_size = 2;
4156
36.0k
        }
4157
4158
59.1k
        if (elem_size == 2) {
4159
59.1k
          if (use_vex)
4160
0
            return emit_vex_instruction(compiler, MOVD_x_rm | VEX_AUTO_W | EX86_PREF_66 | EX86_SSE2_OP1, vreg, 0, srcdst, srcdstw);
4161
59.1k
          return emit_groupf(compiler, MOVD_x_rm | EX86_PREF_66 | EX86_SSE2_OP1, vreg, srcdst, srcdstw);
4162
59.1k
        }
4163
59.1k
      } else if (srcdst & SLJIT_MEM) {
4164
0
        SLJIT_ASSERT(elem_size == 2 || elem_size == 3);
4165
4166
0
        if (use_vex)
4167
0
          return emit_vex_instruction(compiler, MOVSD_x_xm | (elem_size == 2 ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, vreg, 0, srcdst, srcdstw);
4168
0
        return emit_groupf(compiler, MOVSD_x_xm | (elem_size == 2 ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, vreg, srcdst, srcdstw);
4169
0
      } else if (elem_size == 3) {
4170
0
        if (use_vex)
4171
0
          return emit_vex_instruction(compiler, MOVQ_x_xm | EX86_PREF_F3 | EX86_SSE2, vreg, 0, srcdst, 0);
4172
0
        return emit_groupf(compiler, MOVQ_x_xm | EX86_PREF_F3 | EX86_SSE2, vreg, srcdst, 0);
4173
0
      } else if (use_vex) {
4174
0
        FAIL_IF(emit_vex_instruction(compiler, XORPD_x_xm | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, TMP_FREG, TMP_FREG, 0));
4175
0
        return emit_vex_instruction(compiler, MOVSD_x_xm | EX86_PREF_F3 | EX86_SSE2 | VEX_SSE2_OPV, vreg, TMP_FREG, srcdst, 0);
4176
0
      }
4177
59.1k
    }
4178
4179
0
    if (reg_size == 5 && lane_index >= (1 << (4 - elem_size))) {
4180
0
      vreg = TMP_FREG;
4181
0
      lane_index -= (1 << (4 - elem_size));
4182
0
    } else if ((type & SLJIT_SIMD_FLOAT) && vreg == srcdst) {
4183
0
      if (use_vex)
4184
0
        FAIL_IF(emit_vex_instruction(compiler, MOVSD_x_xm | (elem_size == 2 ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, TMP_FREG, srcdst, srcdstw));
4185
0
      else
4186
0
        FAIL_IF(emit_sse2_load(compiler, elem_size == 2, TMP_FREG, srcdst, srcdstw));
4187
0
      srcdst = TMP_FREG;
4188
0
      srcdstw = 0;
4189
0
    }
4190
4191
0
    op = ((!(type & SLJIT_SIMD_FLOAT) || elem_size != 2) ? EX86_PREF_66 : 0)
4192
0
      | ((type & SLJIT_SIMD_FLOAT) ? XORPD_x_xm : PXOR_x_xm) | EX86_SSE2;
4193
4194
0
    if (use_vex)
4195
0
      FAIL_IF(emit_vex_instruction(compiler, op | (reg_size == 5 ? VEX_256 : 0) | VEX_SSE2_OPV, vreg, vreg, vreg, 0));
4196
0
    else
4197
0
      FAIL_IF(emit_groupf(compiler, op, vreg, vreg, 0));
4198
0
  } else if (reg_size == 5 && lane_index >= (1 << (4 - elem_size))) {
4199
0
    FAIL_IF(emit_vex_instruction(compiler, ((type & SLJIT_SIMD_FLOAT) ? VEXTRACTF128_x_ym : VEXTRACTI128_x_ym) | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, vreg, 0, TMP_FREG, 0));
4200
0
    FAIL_IF(emit_byte(compiler, 1));
4201
4202
0
    vreg = TMP_FREG;
4203
0
    lane_index -= (1 << (4 - elem_size));
4204
0
  }
4205
4206
0
  if (type & SLJIT_SIMD_FLOAT) {
4207
0
    if (elem_size == 3) {
4208
0
      if (srcdst & SLJIT_MEM) {
4209
0
        if (type & SLJIT_SIMD_STORE)
4210
0
          op = lane_index == 0 ? MOVLPD_m_x : MOVHPD_m_x;
4211
0
        else
4212
0
          op = lane_index == 0 ? MOVLPD_x_m : MOVHPD_x_m;
4213
4214
        /* VEX prefix clears upper bits of the target register. */
4215
0
        if (use_vex && ((type & SLJIT_SIMD_STORE) || reg_size == 4 || vreg == TMP_FREG))
4216
0
          FAIL_IF(emit_vex_instruction(compiler, op | EX86_PREF_66 | EX86_SSE2
4217
0
            | ((type & SLJIT_SIMD_STORE) ? 0 : VEX_SSE2_OPV), vreg, (type & SLJIT_SIMD_STORE) ? 0 : vreg, srcdst, srcdstw));
4218
0
        else
4219
0
          FAIL_IF(emit_groupf(compiler, op | EX86_PREF_66 | EX86_SSE2, vreg, srcdst, srcdstw));
4220
4221
        /* In case of store, vreg is not TMP_FREG. */
4222
0
      } else if (type & SLJIT_SIMD_STORE) {
4223
0
        if (lane_index == 1) {
4224
0
          if (use_vex)
4225
0
            return emit_vex_instruction(compiler, MOVHLPS_x_x | EX86_SSE2 | VEX_SSE2_OPV, srcdst, srcdst, vreg, 0);
4226
0
          return emit_groupf(compiler, MOVHLPS_x_x | EX86_SSE2, srcdst, vreg, 0);
4227
0
        }
4228
0
        if (use_vex)
4229
0
          return emit_vex_instruction(compiler, MOVSD_x_xm | EX86_PREF_F2 | EX86_SSE2 | VEX_SSE2_OPV, srcdst, srcdst, vreg, 0);
4230
0
        return emit_sse2_load(compiler, 0, srcdst, vreg, 0);
4231
0
      } else if (use_vex && (reg_size == 4 || vreg == TMP_FREG)) {
4232
0
        if (lane_index == 1)
4233
0
          FAIL_IF(emit_vex_instruction(compiler, MOVLHPS_x_x | EX86_SSE2 | VEX_SSE2_OPV, vreg, vreg, srcdst, 0));
4234
0
        else
4235
0
          FAIL_IF(emit_vex_instruction(compiler, MOVSD_x_xm | EX86_PREF_F2 | EX86_SSE2 | VEX_SSE2_OPV, vreg, vreg, srcdst, 0));
4236
0
      } else {
4237
0
        if (lane_index == 1)
4238
0
          FAIL_IF(emit_groupf(compiler, MOVLHPS_x_x | EX86_SSE2, vreg, srcdst, 0));
4239
0
        else
4240
0
          FAIL_IF(emit_sse2_load(compiler, 0, vreg, srcdst, 0));
4241
0
      }
4242
0
    } else if (type & SLJIT_SIMD_STORE) {
4243
0
      if (lane_index == 0) {
4244
0
        if (use_vex)
4245
0
          return emit_vex_instruction(compiler, MOVSD_xm_x | EX86_PREF_F3 | EX86_SSE2 | ((srcdst & SLJIT_MEM) ? 0 : VEX_SSE2_OPV),
4246
0
            vreg, ((srcdst & SLJIT_MEM) ? 0 : srcdst), srcdst, srcdstw);
4247
0
        return emit_sse2_store(compiler, 1, srcdst, srcdstw, vreg);
4248
0
      }
4249
4250
0
      if (srcdst & SLJIT_MEM) {
4251
0
        if (use_vex)
4252
0
          FAIL_IF(emit_vex_instruction(compiler, EXTRACTPS_x_xm | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, vreg, 0, srcdst, srcdstw));
4253
0
        else
4254
0
          FAIL_IF(emit_groupf_ext(compiler, EXTRACTPS_x_xm | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, vreg, srcdst, srcdstw));
4255
0
        return emit_byte(compiler, U8(lane_index));
4256
0
      }
4257
4258
0
      if (use_vex) {
4259
0
        FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | EX86_SSE2 | VEX_SSE2_OPV, srcdst, vreg, vreg, 0));
4260
0
        return emit_byte(compiler, U8(lane_index));
4261
0
      }
4262
4263
0
      if (srcdst == vreg)
4264
0
        op = SHUFPS_x_xm | EX86_SSE2;
4265
0
      else {
4266
0
        switch (lane_index) {
4267
0
        case 1:
4268
0
          op = MOVSHDUP_x_xm | EX86_PREF_F3 | EX86_SSE2;
4269
0
          break;
4270
0
        case 2:
4271
0
          op = MOVHLPS_x_x | EX86_SSE2;
4272
0
          break;
4273
0
        default:
4274
0
          SLJIT_ASSERT(lane_index == 3);
4275
0
          op = PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2;
4276
0
          break;
4277
0
        }
4278
0
      }
4279
4280
0
      FAIL_IF(emit_groupf(compiler, op, srcdst, vreg, 0));
4281
4282
0
      op &= 0xff;
4283
0
      if (op == SHUFPS_x_xm || op == PSHUFD_x_xm)
4284
0
        return emit_byte(compiler, U8(lane_index));
4285
4286
0
      return SLJIT_SUCCESS;
4287
0
    } else {
4288
0
      if (lane_index != 0 || (srcdst & SLJIT_MEM)) {
4289
0
        FAIL_IF(emit_groupf_ext(compiler, INSERTPS_x_xm | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, vreg, srcdst, srcdstw));
4290
0
        FAIL_IF(emit_byte(compiler, U8(lane_index << 4)));
4291
0
      } else
4292
0
        FAIL_IF(emit_sse2_store(compiler, 1, vreg, 0, srcdst));
4293
0
    }
4294
4295
0
    if (vreg != TMP_FREG || (type & SLJIT_SIMD_STORE))
4296
0
      return SLJIT_SUCCESS;
4297
4298
0
    SLJIT_ASSERT(reg_size == 5);
4299
4300
0
    if (type & SLJIT_SIMD_LANE_ZERO) {
4301
0
      FAIL_IF(emit_vex_instruction(compiler, VPERMPD_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, vreg_orig, 0, TMP_FREG, 0));
4302
0
      return emit_byte(compiler, 0x4e);
4303
0
    }
4304
4305
0
    FAIL_IF(emit_vex_instruction(compiler, VINSERTF128_y_y_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2 | VEX_SSE2_OPV, vreg_orig, vreg_orig, TMP_FREG, 0));
4306
0
    return emit_byte(compiler, 1);
4307
0
  }
4308
4309
0
  if (srcdst == SLJIT_IMM) {
4310
0
    EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcdstw);
4311
0
    srcdst = TMP_REG1;
4312
0
    srcdstw = 0;
4313
0
  }
4314
4315
0
  op = 3;
4316
4317
0
  switch (elem_size) {
4318
0
  case 0:
4319
0
    opcode = (type & SLJIT_SIMD_STORE) ? PEXTRB_rm_x_i8 : PINSRB_x_rm_i8;
4320
0
    break;
4321
0
  case 1:
4322
0
    if (!(type & SLJIT_SIMD_STORE)) {
4323
0
      op = 2;
4324
0
      opcode = PINSRW_x_rm_i8;
4325
0
    } else
4326
0
      opcode = PEXTRW_rm_x_i8;
4327
0
    break;
4328
0
  case 2:
4329
0
    opcode = (type & SLJIT_SIMD_STORE) ? PEXTRD_rm_x_i8 : PINSRD_x_rm_i8;
4330
0
    break;
4331
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4332
0
  case 3:
4333
    /* PINSRQ / PEXTRQ */
4334
0
    opcode = (type & SLJIT_SIMD_STORE) ? PEXTRD_rm_x_i8 : PINSRD_x_rm_i8;
4335
0
    compiler->mode32 = 0;
4336
0
    break;
4337
0
#endif /* SLJIT_CONFIG_X86_64 */
4338
0
  }
4339
4340
0
  if (use_vex && (type & SLJIT_SIMD_STORE)) {
4341
0
    op = opcode | ((op == 3) ? VEX_OP_0F3A : 0);
4342
0
    FAIL_IF(emit_vex_instruction(compiler, op | EX86_PREF_66 | VEX_AUTO_W | EX86_SSE2_OP1 | VEX_SSE2_OPV, vreg, 0, srcdst, srcdstw));
4343
0
  } else {
4344
0
    inst = emit_x86_instruction(compiler, op | EX86_PREF_66 | EX86_SSE2_OP1, vreg, 0, srcdst, srcdstw);
4345
0
    FAIL_IF(!inst);
4346
0
    inst[0] = GROUP_0F;
4347
4348
0
    if (op == 3) {
4349
0
      inst[1] = 0x3a;
4350
0
      inst[2] = opcode;
4351
0
    } else
4352
0
      inst[1] = opcode;
4353
0
  }
4354
4355
0
  FAIL_IF(emit_byte(compiler, U8(lane_index)));
4356
4357
0
  if (!(type & SLJIT_SIMD_LANE_SIGNED) || (srcdst & SLJIT_MEM)) {
4358
0
    if (vreg == TMP_FREG && !(type & SLJIT_SIMD_STORE)) {
4359
0
      SLJIT_ASSERT(reg_size == 5);
4360
4361
0
      if (type & SLJIT_SIMD_LANE_ZERO) {
4362
0
        FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, vreg_orig, 0, TMP_FREG, 0));
4363
0
        return emit_byte(compiler, 0x4e);
4364
0
      }
4365
4366
0
      FAIL_IF(emit_vex_instruction(compiler, VINSERTI128_y_y_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2 | VEX_SSE2_OPV, vreg_orig, vreg_orig, TMP_FREG, 0));
4367
0
      return emit_byte(compiler, 1);
4368
0
    }
4369
4370
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4371
    if (srcdst_orig & SLJIT_MEM)
4372
      return emit_mov(compiler, srcdst_orig, srcdstw_orig, TMP_REG1, 0);
4373
#endif /* SLJIT_CONFIG_X86_32 */
4374
0
    return SLJIT_SUCCESS;
4375
0
  }
4376
4377
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4378
0
  if (elem_size >= 3)
4379
0
    return SLJIT_SUCCESS;
4380
4381
0
  compiler->mode32 = (type & SLJIT_32);
4382
4383
0
  op = 2;
4384
4385
0
  if (elem_size == 0)
4386
0
    op |= EX86_REX;
4387
4388
0
  if (elem_size == 2) {
4389
0
    if (type & SLJIT_32)
4390
0
      return SLJIT_SUCCESS;
4391
4392
0
    SLJIT_ASSERT(!(compiler->mode32));
4393
0
    op = 1;
4394
0
  }
4395
4396
0
  inst = emit_x86_instruction(compiler, op, srcdst, 0, srcdst, 0);
4397
0
  FAIL_IF(!inst);
4398
4399
0
  if (op != 1) {
4400
0
    inst[0] = GROUP_0F;
4401
0
    inst[1] = U8((elem_size == 0) ? MOVSX_r_rm8 : MOVSX_r_rm16);
4402
0
  } else
4403
0
    inst[0] = MOVSXD_r_rm;
4404
#else /* !SLJIT_CONFIG_X86_64 */
4405
  if (elem_size >= 2)
4406
    return SLJIT_SUCCESS;
4407
4408
  FAIL_IF(emit_groupf(compiler, (elem_size == 0) ? MOVSX_r_rm8 : MOVSX_r_rm16,
4409
    (srcdst_orig != 0 && FAST_IS_REG(srcdst_orig)) ? srcdst_orig : srcdst, srcdst, 0));
4410
4411
  if (srcdst_orig & SLJIT_MEM)
4412
    return emit_mov(compiler, srcdst_orig, srcdstw_orig, TMP_REG1, 0);
4413
#endif /* SLJIT_CONFIG_X86_64 */
4414
0
  return SLJIT_SUCCESS;
4415
0
}
4416
4417
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type,
4418
  sljit_s32 vreg,
4419
  sljit_s32 src, sljit_s32 src_lane_index)
4420
59.1k
{
4421
59.1k
  sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4422
59.1k
  sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4423
59.1k
  sljit_s32 use_vex = (cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX);
4424
59.1k
  sljit_uw pref;
4425
59.1k
  sljit_u8 byte;
4426
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4427
  sljit_s32 opcode3 = TMP_REG1;
4428
#else /* !SLJIT_CONFIG_X86_32 */
4429
59.1k
  sljit_s32 opcode3 = SLJIT_S0;
4430
59.1k
#endif /* SLJIT_CONFIG_X86_32 */
4431
4432
59.1k
  CHECK_ERROR();
4433
59.1k
  CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, vreg, src, src_lane_index));
4434
4435
59.1k
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4436
59.1k
  compiler->mode32 = 1;
4437
59.1k
#endif /* SLJIT_CONFIG_X86_64 */
4438
59.1k
  SLJIT_ASSERT(reg_map[opcode3] == 3);
4439
4440
59.1k
  if (reg_size == 5) {
4441
0
    if (!(cpu_feature_list & CPU_FEATURE_AVX2))
4442
0
      return SLJIT_ERR_UNSUPPORTED;
4443
0
    use_vex = 1;
4444
59.1k
  } else if (reg_size != 4)
4445
0
    return SLJIT_ERR_UNSUPPORTED;
4446
4447
59.1k
  if (type & SLJIT_SIMD_FLOAT) {
4448
0
    pref = 0;
4449
0
    byte = U8(src_lane_index);
4450
4451
0
    if (elem_size == 3) {
4452
0
      if (type & SLJIT_SIMD_TEST)
4453
0
        return SLJIT_SUCCESS;
4454
4455
0
      if (reg_size == 5) {
4456
0
        if (src_lane_index == 0)
4457
0
          return emit_vex_instruction(compiler, VBROADCASTSD_x_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, 0, src, 0);
4458
4459
0
        FAIL_IF(emit_vex_instruction(compiler, VPERMPD_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, vreg, 0, src, 0));
4460
4461
0
        byte = U8(byte | (byte << 2));
4462
0
        return emit_byte(compiler, U8(byte | (byte << 4)));
4463
0
      }
4464
4465
0
      if (src_lane_index == 0) {
4466
0
        if (use_vex)
4467
0
          return emit_vex_instruction(compiler, MOVDDUP_x_xm | EX86_PREF_F2 | EX86_SSE2, vreg, 0, src, 0);
4468
0
        return emit_groupf(compiler, MOVDDUP_x_xm | EX86_PREF_F2 | EX86_SSE2, vreg, src, 0);
4469
0
      }
4470
4471
      /* Changes it to SHUFPD_x_xm. */
4472
0
      pref = EX86_PREF_66;
4473
0
    } else if (elem_size != 2)
4474
0
      return SLJIT_ERR_UNSUPPORTED;
4475
0
    else if (type & SLJIT_SIMD_TEST)
4476
0
      return SLJIT_SUCCESS;
4477
4478
0
    if (reg_size == 5) {
4479
0
      SLJIT_ASSERT(elem_size == 2);
4480
4481
0
      if (src_lane_index == 0)
4482
0
        return emit_vex_instruction(compiler, VBROADCASTSS_x_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, 0, src, 0);
4483
4484
0
      FAIL_IF(emit_vex_instruction(compiler, VPERMPD_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, vreg, 0, src, 0));
4485
4486
0
      byte = 0x44;
4487
0
      if (src_lane_index >= 4) {
4488
0
        byte = 0xee;
4489
0
        src_lane_index -= 4;
4490
0
      }
4491
4492
0
      FAIL_IF(emit_byte(compiler, byte));
4493
0
      FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | VEX_256 | pref | EX86_SSE2 | VEX_SSE2_OPV, vreg, vreg, vreg, 0));
4494
0
      byte = U8(src_lane_index);
4495
0
    } else if (use_vex) {
4496
0
      FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | pref | EX86_SSE2 | VEX_SSE2_OPV, vreg, src, src, 0));
4497
0
    } else {
4498
0
      if (vreg != src)
4499
0
        FAIL_IF(emit_groupf(compiler, MOVAPS_x_xm | pref | EX86_SSE2, vreg, src, 0));
4500
4501
0
      FAIL_IF(emit_groupf(compiler, SHUFPS_x_xm | pref | EX86_SSE2, vreg, vreg, 0));
4502
0
    }
4503
4504
0
    if (elem_size == 2) {
4505
0
      byte = U8(byte | (byte << 2));
4506
0
      byte = U8(byte | (byte << 4));
4507
0
    } else
4508
0
      byte = U8(byte | (byte << 1));
4509
4510
0
    return emit_byte(compiler, U8(byte));
4511
0
  }
4512
4513
59.1k
  if (type & SLJIT_SIMD_TEST)
4514
0
    return SLJIT_SUCCESS;
4515
4516
59.1k
  if (elem_size == 0) {
4517
0
    if (reg_size == 5 && src_lane_index >= 16) {
4518
0
      FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, vreg, 0, src, 0));
4519
0
      FAIL_IF(emit_byte(compiler, src_lane_index >= 24 ? 0xff : 0xaa));
4520
0
      src_lane_index &= 0x7;
4521
0
      src = vreg;
4522
0
    }
4523
4524
0
    if (src_lane_index != 0 || (vreg != src && (!(cpu_feature_list & CPU_FEATURE_AVX2) || !use_vex))) {
4525
0
      pref = 0;
4526
4527
0
      if ((src_lane_index & 0x3) == 0) {
4528
0
        pref = EX86_PREF_66;
4529
0
        byte = U8(src_lane_index >> 2);
4530
0
      } else if (src_lane_index < 8 && (src_lane_index & 0x1) == 0) {
4531
0
        pref = EX86_PREF_F2;
4532
0
        byte = U8(src_lane_index >> 1);
4533
0
      } else {
4534
0
        if (!use_vex) {
4535
0
          if (vreg != src)
4536
0
            FAIL_IF(emit_groupf(compiler, MOVDQA_x_xm | EX86_PREF_66 | EX86_SSE2, vreg, src, 0));
4537
4538
0
          FAIL_IF(emit_groupf(compiler, PSRLDQ_x | EX86_PREF_66 | EX86_SSE2_OP2, opcode3, vreg, 0));
4539
0
        } else
4540
0
          FAIL_IF(emit_vex_instruction(compiler, PSRLDQ_x | EX86_PREF_66 | EX86_SSE2_OP2 | VEX_SSE2_OPV, opcode3, vreg, src, 0));
4541
4542
0
        FAIL_IF(emit_byte(compiler, U8(src_lane_index)));
4543
0
      }
4544
4545
0
      if (pref != 0) {
4546
0
        if (use_vex)
4547
0
          FAIL_IF(emit_vex_instruction(compiler, PSHUFLW_x_xm | pref | EX86_SSE2, vreg, 0, src, 0));
4548
0
        else
4549
0
          FAIL_IF(emit_groupf(compiler, PSHUFLW_x_xm | pref | EX86_SSE2, vreg, src, 0));
4550
0
        FAIL_IF(emit_byte(compiler, byte));
4551
0
      }
4552
4553
0
      src = vreg;
4554
0
    }
4555
4556
0
    if (use_vex && (cpu_feature_list & CPU_FEATURE_AVX2))
4557
0
      return emit_vex_instruction(compiler, VPBROADCASTB_x_xm | (reg_size == 5 ? VEX_256 : 0) | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, 0, src, 0);
4558
4559
0
    SLJIT_ASSERT(reg_size == 4);
4560
0
    FAIL_IF(emit_groupf(compiler, PXOR_x_xm | EX86_PREF_66 | EX86_SSE2, TMP_FREG, TMP_FREG, 0));
4561
0
    return emit_groupf_ext(compiler, PSHUFB_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, TMP_FREG, 0);
4562
0
  }
4563
4564
59.1k
  if ((cpu_feature_list & CPU_FEATURE_AVX2) && use_vex && src_lane_index == 0 && elem_size <= 3) {
4565
0
    switch (elem_size) {
4566
0
    case 1:
4567
0
      pref = VPBROADCASTW_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
4568
0
      break;
4569
0
    case 2:
4570
0
      pref = VPBROADCASTD_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
4571
0
      break;
4572
0
    default:
4573
0
      pref = VPBROADCASTQ_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
4574
0
      break;
4575
0
    }
4576
4577
0
    if (reg_size == 5)
4578
0
      pref |= VEX_256;
4579
4580
0
    return emit_vex_instruction(compiler, pref, vreg, 0, src, 0);
4581
0
  }
4582
4583
59.1k
  if (reg_size == 5) {
4584
0
    switch (elem_size) {
4585
0
    case 1:
4586
0
      byte = U8(src_lane_index & 0x3);
4587
0
      src_lane_index >>= 2;
4588
0
      pref = PSHUFLW_x_xm | VEX_256 | ((src_lane_index & 1) == 0 ? EX86_PREF_F2 : EX86_PREF_F3) | EX86_SSE2;
4589
0
      break;
4590
0
    case 2:
4591
0
      byte = U8(src_lane_index & 0x3);
4592
0
      src_lane_index >>= 1;
4593
0
      pref = PSHUFD_x_xm | VEX_256 | EX86_PREF_66 | EX86_SSE2;
4594
0
      break;
4595
0
    case 3:
4596
0
      pref = 0;
4597
0
      break;
4598
0
    default:
4599
0
      FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, vreg, 0, src, 0));
4600
0
      return emit_byte(compiler, U8(src_lane_index == 0 ? 0x44 : 0xee));
4601
0
    }
4602
4603
0
    if (pref != 0) {
4604
0
      FAIL_IF(emit_vex_instruction(compiler, pref, vreg, 0, src, 0));
4605
0
      byte = U8(byte | (byte << 2));
4606
0
      FAIL_IF(emit_byte(compiler, U8(byte | (byte << 4))));
4607
4608
0
      if (src_lane_index == 0)
4609
0
        return emit_vex_instruction(compiler, VPBROADCASTQ_x_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, 0, vreg, 0);
4610
4611
0
      src = vreg;
4612
0
    }
4613
4614
0
    FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, vreg, 0, src, 0));
4615
0
    byte = U8(src_lane_index);
4616
0
    byte = U8(byte | (byte << 2));
4617
0
    return emit_byte(compiler, U8(byte | (byte << 4)));
4618
0
  }
4619
4620
59.1k
  switch (elem_size) {
4621
0
  case 1:
4622
0
    byte = U8(src_lane_index & 0x3);
4623
0
    src_lane_index >>= 1;
4624
0
    pref = (src_lane_index & 2) == 0 ? EX86_PREF_F2 : EX86_PREF_F3;
4625
4626
0
    if (use_vex)
4627
0
      FAIL_IF(emit_vex_instruction(compiler, PSHUFLW_x_xm | pref | EX86_SSE2, vreg, 0, src, 0));
4628
0
    else
4629
0
      FAIL_IF(emit_groupf(compiler, PSHUFLW_x_xm | pref | EX86_SSE2, vreg, src, 0));
4630
0
    byte = U8(byte | (byte << 2));
4631
0
    FAIL_IF(emit_byte(compiler, U8(byte | (byte << 4))));
4632
4633
0
    if ((cpu_feature_list & CPU_FEATURE_AVX2) && use_vex && pref == EX86_PREF_F2)
4634
0
      return emit_vex_instruction(compiler, VPBROADCASTD_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, 0, vreg, 0);
4635
4636
0
    src = vreg;
4637
    /* fallthrough */
4638
59.1k
  case 2:
4639
59.1k
    byte = U8(src_lane_index);
4640
59.1k
    byte = U8(byte | (byte << 2));
4641
59.1k
    break;
4642
0
  default:
4643
0
    byte = U8(src_lane_index << 1);
4644
0
    byte = U8(byte | (byte << 2) | 0x4);
4645
0
    break;
4646
59.1k
  }
4647
4648
59.1k
  if (use_vex)
4649
0
    FAIL_IF(emit_vex_instruction(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, vreg, 0, src, 0));
4650
59.1k
  else
4651
59.1k
    FAIL_IF(emit_groupf(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, vreg, src, 0));
4652
59.1k
  return emit_byte(compiler, U8(byte | (byte << 4)));
4653
59.1k
}
4654
4655
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type,
4656
  sljit_s32 vreg,
4657
  sljit_s32 src, sljit_sw srcw)
4658
0
{
4659
0
  sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4660
0
  sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4661
0
  sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type);
4662
0
  sljit_s32 use_vex = (cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX);
4663
0
  sljit_u8 opcode;
4664
0
4665
0
  CHECK_ERROR();
4666
0
  CHECK(check_sljit_emit_simd_extend(compiler, type, vreg, src, srcw));
4667
0
4668
0
  ADJUST_LOCAL_OFFSET(src, srcw);
4669
0
4670
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4671
0
  compiler->mode32 = 1;
4672
0
#endif /* SLJIT_CONFIG_X86_64 */
4673
0
4674
0
  if (reg_size == 5) {
4675
0
    if (!(cpu_feature_list & CPU_FEATURE_AVX2))
4676
0
      return SLJIT_ERR_UNSUPPORTED;
4677
0
    use_vex = 1;
4678
0
  } else if (reg_size != 4)
4679
0
    return SLJIT_ERR_UNSUPPORTED;
4680
0
4681
0
  if (type & SLJIT_SIMD_FLOAT) {
4682
0
    if (elem_size != 2 || elem2_size != 3)
4683
0
      return SLJIT_ERR_UNSUPPORTED;
4684
0
4685
0
    if (type & SLJIT_SIMD_TEST)
4686
0
      return SLJIT_SUCCESS;
4687
0
4688
0
    if (use_vex)
4689
0
      return emit_vex_instruction(compiler, CVTPS2PD_x_xm | ((reg_size == 5) ? VEX_256 : 0) | EX86_SSE2, vreg, 0, src, srcw);
4690
0
    return emit_groupf(compiler, CVTPS2PD_x_xm | EX86_SSE2, vreg, src, srcw);
4691
0
  }
4692
0
4693
0
  switch (elem_size) {
4694
0
  case 0:
4695
0
    if (elem2_size == 1)
4696
0
      opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXBW_x_xm : PMOVZXBW_x_xm;
4697
0
    else if (elem2_size == 2)
4698
0
      opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXBD_x_xm : PMOVZXBD_x_xm;
4699
0
    else if (elem2_size == 3)
4700
0
      opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXBQ_x_xm : PMOVZXBQ_x_xm;
4701
0
    else
4702
0
      return SLJIT_ERR_UNSUPPORTED;
4703
0
    break;
4704
0
  case 1:
4705
0
    if (elem2_size == 2)
4706
0
      opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXWD_x_xm : PMOVZXWD_x_xm;
4707
0
    else if (elem2_size == 3)
4708
0
      opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXWQ_x_xm : PMOVZXWQ_x_xm;
4709
0
    else
4710
0
      return SLJIT_ERR_UNSUPPORTED;
4711
0
    break;
4712
0
  case 2:
4713
0
    if (elem2_size == 3)
4714
0
      opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXDQ_x_xm : PMOVZXDQ_x_xm;
4715
0
    else
4716
0
      return SLJIT_ERR_UNSUPPORTED;
4717
0
    break;
4718
0
  default:
4719
0
    return SLJIT_ERR_UNSUPPORTED;
4720
0
  }
4721
0
4722
0
  if (type & SLJIT_SIMD_TEST)
4723
0
    return SLJIT_SUCCESS;
4724
0
4725
0
  if (use_vex)
4726
0
    return emit_vex_instruction(compiler, opcode | ((reg_size == 5) ? VEX_256 : 0) | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, 0, src, srcw);
4727
0
  return emit_groupf_ext(compiler, opcode | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, src, srcw);
4728
0
}
4729
4730
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type,
4731
  sljit_s32 vreg,
4732
  sljit_s32 dst, sljit_sw dstw)
4733
87.5k
{
4734
87.5k
  sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4735
87.5k
  sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4736
87.5k
  sljit_s32 use_vex = (cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX);
4737
87.5k
  sljit_s32 dst_r;
4738
87.5k
  sljit_uw op;
4739
87.5k
  sljit_u8 *inst;
4740
4741
87.5k
  CHECK_ERROR();
4742
87.5k
  CHECK(check_sljit_emit_simd_sign(compiler, type, vreg, dst, dstw));
4743
4744
87.5k
  ADJUST_LOCAL_OFFSET(dst, dstw);
4745
4746
87.5k
  CHECK_EXTRA_REGS(dst, dstw, (void)0);
4747
87.5k
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4748
87.5k
  compiler->mode32 = 1;
4749
87.5k
#endif /* SLJIT_CONFIG_X86_64 */
4750
4751
87.5k
  if (elem_size > 3 || ((type & SLJIT_SIMD_FLOAT) && elem_size < 2))
4752
0
    return SLJIT_ERR_UNSUPPORTED;
4753
4754
87.5k
  if (reg_size == 4) {
4755
87.5k
    if (type & SLJIT_SIMD_TEST)
4756
0
      return SLJIT_SUCCESS;
4757
4758
87.5k
    op = EX86_PREF_66 | EX86_SSE2_OP2;
4759
4760
87.5k
    switch (elem_size) {
4761
0
    case 1:
4762
0
      if (use_vex)
4763
0
        FAIL_IF(emit_vex_instruction(compiler, PACKSSWB_x_xm | EX86_PREF_66 | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, vreg, vreg, 0));
4764
0
      else
4765
0
        FAIL_IF(emit_groupf(compiler, PACKSSWB_x_xm | EX86_PREF_66 | EX86_SSE2, TMP_FREG, vreg, 0));
4766
0
      vreg = TMP_FREG;
4767
0
      break;
4768
0
    case 2:
4769
0
      op = EX86_SSE2_OP2;
4770
0
      break;
4771
87.5k
    }
4772
4773
87.5k
    dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
4774
87.5k
    op |= (elem_size < 2) ? PMOVMSKB_r_x : MOVMSKPS_r_x;
4775
4776
87.5k
    if (use_vex)
4777
0
      FAIL_IF(emit_vex_instruction(compiler, op, dst_r, 0, vreg, 0));
4778
87.5k
    else
4779
87.5k
      FAIL_IF(emit_groupf(compiler, op, dst_r, vreg, 0));
4780
4781
87.5k
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4782
87.5k
    compiler->mode32 = type & SLJIT_32;
4783
87.5k
#endif /* SLJIT_CONFIG_X86_64 */
4784
4785
87.5k
    if (elem_size == 1) {
4786
0
      inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 8, dst_r, 0);
4787
0
      FAIL_IF(!inst);
4788
0
      inst[1] |= SHR;
4789
0
    }
4790
4791
87.5k
    if (dst_r == TMP_REG1)
4792
0
      return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
4793
4794
87.5k
    return SLJIT_SUCCESS;
4795
87.5k
  }
4796
4797
0
  if (reg_size != 5 || !(cpu_feature_list & CPU_FEATURE_AVX2))
4798
0
    return SLJIT_ERR_UNSUPPORTED;
4799
4800
0
  if (type & SLJIT_SIMD_TEST)
4801
0
    return SLJIT_SUCCESS;
4802
4803
0
  dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
4804
4805
0
  if (elem_size == 1) {
4806
0
    FAIL_IF(emit_vex_instruction(compiler, VEXTRACTI128_x_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, vreg, 0, TMP_FREG, 0));
4807
0
    FAIL_IF(emit_byte(compiler, 1));
4808
0
    FAIL_IF(emit_vex_instruction(compiler, PACKSSWB_x_xm | VEX_256 | EX86_PREF_66 | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, vreg, TMP_FREG, 0));
4809
0
    FAIL_IF(emit_groupf(compiler, PMOVMSKB_r_x | EX86_PREF_66 | EX86_SSE2_OP2, dst_r, TMP_FREG, 0));
4810
0
  } else {
4811
0
    op = MOVMSKPS_r_x | VEX_256 | EX86_SSE2_OP2;
4812
4813
0
    if (elem_size == 0)
4814
0
      op = PMOVMSKB_r_x | VEX_256 | EX86_PREF_66 | EX86_SSE2_OP2;
4815
0
    else if (elem_size == 3)
4816
0
      op |= EX86_PREF_66;
4817
4818
0
    FAIL_IF(emit_vex_instruction(compiler, op, dst_r, 0, vreg, 0));
4819
0
  }
4820
4821
0
  if (dst_r == TMP_REG1) {
4822
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4823
0
    compiler->mode32 = type & SLJIT_32;
4824
0
#endif /* SLJIT_CONFIG_X86_64 */
4825
0
    return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
4826
0
  }
4827
4828
0
  return SLJIT_SUCCESS;
4829
0
}
4830
4831
static sljit_s32 emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,
4832
  sljit_s32 dst_vreg, sljit_s32 src_vreg)
4833
0
{
4834
0
  sljit_uw op = ((type & SLJIT_SIMD_FLOAT) ? MOVAPS_x_xm : MOVDQA_x_xm) | EX86_SSE2;
4835
4836
0
  SLJIT_ASSERT(SLJIT_SIMD_GET_REG_SIZE(type) == 4);
4837
4838
0
  if (!(type & SLJIT_SIMD_FLOAT) || SLJIT_SIMD_GET_ELEM_SIZE(type) == 3)
4839
0
    op |= EX86_PREF_66;
4840
4841
0
  return emit_groupf(compiler, op, dst_vreg, src_vreg, 0);
4842
0
}
4843
4844
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type,
4845
  sljit_s32 dst_vreg, sljit_s32 src1_vreg, sljit_s32 src2, sljit_sw src2w)
4846
21.0k
{
4847
21.0k
  sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4848
21.0k
  sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4849
21.0k
  sljit_s32 use_vex = (cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX);
4850
21.0k
  sljit_uw op = 0;
4851
21.0k
  sljit_uw mov_op = 0;
4852
4853
21.0k
  CHECK_ERROR();
4854
21.0k
  CHECK(check_sljit_emit_simd_op2(compiler, type, dst_vreg, src1_vreg, src2, src2w));
4855
21.0k
  ADJUST_LOCAL_OFFSET(src2, src2w);
4856
4857
21.0k
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4858
21.0k
  compiler->mode32 = 1;
4859
21.0k
#endif /* SLJIT_CONFIG_X86_64 */
4860
4861
21.0k
  if (reg_size == 5) {
4862
0
    if (!(cpu_feature_list & CPU_FEATURE_AVX2))
4863
0
      return SLJIT_ERR_UNSUPPORTED;
4864
21.0k
  } else if (reg_size != 4)
4865
0
    return SLJIT_ERR_UNSUPPORTED;
4866
4867
21.0k
  if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
4868
0
    return SLJIT_ERR_UNSUPPORTED;
4869
4870
21.0k
  switch (SLJIT_SIMD_GET_OPCODE(type)) {
4871
21.0k
  case SLJIT_SIMD_OP2_AND:
4872
21.0k
    op = (type & SLJIT_SIMD_FLOAT) ? ANDPD_x_xm : PAND_x_xm;
4873
4874
21.0k
    if (!(type & SLJIT_SIMD_FLOAT) || elem_size == 3)
4875
21.0k
      op |= EX86_PREF_66;
4876
21.0k
    break;
4877
0
  case SLJIT_SIMD_OP2_OR:
4878
0
    op = (type & SLJIT_SIMD_FLOAT) ? ORPD_x_xm : POR_x_xm;
4879
4880
0
    if (!(type & SLJIT_SIMD_FLOAT) || elem_size == 3)
4881
0
      op |= EX86_PREF_66;
4882
0
    break;
4883
0
  case SLJIT_SIMD_OP2_XOR:
4884
0
    op = (type & SLJIT_SIMD_FLOAT) ? XORPD_x_xm : PXOR_x_xm;
4885
4886
0
    if (!(type & SLJIT_SIMD_FLOAT) || elem_size == 3)
4887
0
      op |= EX86_PREF_66;
4888
0
    break;
4889
4890
0
  case SLJIT_SIMD_OP2_SHUFFLE:
4891
0
    if (reg_size != 4)
4892
0
      return SLJIT_ERR_UNSUPPORTED;
4893
4894
0
    op = PSHUFB_x_xm | EX86_PREF_66 | VEX_OP_0F38;
4895
0
    break;
4896
21.0k
  }
4897
4898
21.0k
  if (type & SLJIT_SIMD_TEST)
4899
0
    return SLJIT_SUCCESS;
4900
4901
21.0k
  if ((src2 & SLJIT_MEM) && SLJIT_SIMD_GET_ELEM2_SIZE(type) < reg_size) {
4902
0
    mov_op = ((type & SLJIT_SIMD_FLOAT) ? (MOVUPS_x_xm | (elem_size == 3 ? EX86_PREF_66 : 0)) : (MOVDQU_x_xm | EX86_PREF_F3)) | EX86_SSE2;
4903
0
    if (use_vex)
4904
0
      FAIL_IF(emit_vex_instruction(compiler, mov_op, TMP_FREG, 0, src2, src2w));
4905
0
    else
4906
0
      FAIL_IF(emit_groupf(compiler, mov_op, TMP_FREG, src2, src2w));
4907
4908
0
    src2 = TMP_FREG;
4909
0
    src2w = 0;
4910
0
  }
4911
4912
21.0k
  if (reg_size == 5 || use_vex) {
4913
0
    if (reg_size == 5)
4914
0
      op |= VEX_256;
4915
4916
0
    return emit_vex_instruction(compiler, op | EX86_SSE2 | VEX_SSE2_OPV, dst_vreg, src1_vreg, src2, src2w);
4917
0
  }
4918
4919
21.0k
  if (dst_vreg != src1_vreg) {
4920
0
    if (dst_vreg == src2) {
4921
0
      if (SLJIT_SIMD_GET_OPCODE(type) == SLJIT_SIMD_OP2_SHUFFLE) {
4922
0
        FAIL_IF(emit_simd_mov(compiler, type, TMP_FREG, src2));
4923
0
        FAIL_IF(emit_simd_mov(compiler, type, dst_vreg, src1_vreg));
4924
0
        src2 = TMP_FREG;
4925
0
        src2w = 0;
4926
0
      } else
4927
0
        src2 = src1_vreg;
4928
0
    } else
4929
0
      FAIL_IF(emit_simd_mov(compiler, type, dst_vreg, src1_vreg));
4930
0
  }
4931
4932
21.0k
  if (op & (VEX_OP_0F38 | VEX_OP_0F3A))
4933
0
    return emit_groupf_ext(compiler, op | EX86_SSE2, dst_vreg, src2, src2w);
4934
21.0k
  return emit_groupf(compiler, op | EX86_SSE2, dst_vreg, src2, src2w);
4935
21.0k
}
4936
4937
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op,
4938
  sljit_s32 dst_reg,
4939
  sljit_s32 mem_reg)
4940
0
{
4941
0
  CHECK_ERROR();
4942
0
  CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg));
4943
0
4944
0
  if ((op & SLJIT_ATOMIC_USE_LS) || GET_OPCODE(op) == SLJIT_MOV_S8 || GET_OPCODE(op) == SLJIT_MOV_S16 || GET_OPCODE(op) == SLJIT_MOV_S32)
4945
0
    return SLJIT_ERR_UNSUPPORTED;
4946
0
4947
0
  if (op & SLJIT_ATOMIC_TEST)
4948
0
    return SLJIT_SUCCESS;
4949
0
4950
0
  SLJIT_SKIP_CHECKS(compiler);
4951
0
  return sljit_emit_op1(compiler, op & ~SLJIT_ATOMIC_USE_CAS, dst_reg, 0, SLJIT_MEM1(mem_reg), 0);
4952
0
}
4953
4954
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op,
4955
  sljit_s32 src_reg,
4956
  sljit_s32 mem_reg,
4957
  sljit_s32 temp_reg)
4958
0
{
4959
0
  sljit_uw pref;
4960
0
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4961
0
  sljit_s32 saved_reg = TMP_REG1;
4962
0
  sljit_s32 swap_tmp = 0;
4963
0
  sljit_sw srcw = 0;
4964
0
  sljit_sw tempw = 0;
4965
0
#endif /* SLJIT_CONFIG_X86_32 */
4966
0
4967
0
  CHECK_ERROR();
4968
0
  CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg));
4969
0
  CHECK_EXTRA_REGS(src_reg, srcw, (void)0);
4970
0
  CHECK_EXTRA_REGS(temp_reg, tempw, (void)0);
4971
0
4972
0
  SLJIT_ASSERT(FAST_IS_REG(src_reg) || src_reg == SLJIT_MEM1(SLJIT_SP));
4973
0
  SLJIT_ASSERT(FAST_IS_REG(temp_reg) || temp_reg == SLJIT_MEM1(SLJIT_SP));
4974
0
4975
0
  if ((op & SLJIT_ATOMIC_USE_LS) || GET_OPCODE(op) == SLJIT_MOV_S8 || GET_OPCODE(op) == SLJIT_MOV_S16 || GET_OPCODE(op) == SLJIT_MOV_S32)
4976
0
    return SLJIT_ERR_UNSUPPORTED;
4977
0
4978
0
  if (op & SLJIT_ATOMIC_TEST)
4979
0
    return SLJIT_SUCCESS;
4980
0
4981
0
  op = GET_OPCODE(op);
4982
0
4983
0
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4984
0
  if (temp_reg == SLJIT_TMP_DEST_REG) {
4985
0
    FAIL_IF(emit_byte(compiler, XCHG_EAX_r | reg_map[TMP_REG1]));
4986
0
4987
0
    if (src_reg == SLJIT_R0)
4988
0
      src_reg = TMP_REG1;
4989
0
    if (mem_reg == SLJIT_R0)
4990
0
      mem_reg = TMP_REG1;
4991
0
4992
0
    temp_reg = SLJIT_R0;
4993
0
    swap_tmp = 1;
4994
0
  }
4995
0
4996
0
  /* Src is virtual register or its low byte is not accessible. */
4997
0
  if ((src_reg & SLJIT_MEM) || (op == SLJIT_MOV_U8 && reg_map[src_reg] >= 4)) {
4998
0
    SLJIT_ASSERT(src_reg != SLJIT_R1 && temp_reg != SLJIT_TMP_DEST_REG);
4999
0
5000
0
    if (swap_tmp) {
5001
0
      saved_reg = (mem_reg != SLJIT_R1) ? SLJIT_R1 : SLJIT_R2;
5002
0
5003
0
      EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, saved_reg, 0);
5004
0
      EMIT_MOV(compiler, saved_reg, 0, src_reg, srcw);
5005
0
    } else
5006
0
      EMIT_MOV(compiler, TMP_REG1, 0, src_reg, srcw);
5007
0
5008
0
    src_reg = saved_reg;
5009
0
5010
0
    if (mem_reg == src_reg)
5011
0
      mem_reg = saved_reg;
5012
0
  }
5013
0
#endif /* SLJIT_CONFIG_X86_32 */
5014
0
5015
0
  if (temp_reg != SLJIT_R0) {
5016
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
5017
0
    compiler->mode32 = 0;
5018
0
5019
0
    EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_R0, 0);
5020
0
    EMIT_MOV(compiler, SLJIT_R0, 0, temp_reg, 0);
5021
0
5022
0
    if (src_reg == SLJIT_R0)
5023
0
      src_reg = TMP_REG2;
5024
0
    if (mem_reg == SLJIT_R0)
5025
0
      mem_reg = TMP_REG2;
5026
0
#else /* !SLJIT_CONFIG_X86_64 */
5027
0
    SLJIT_ASSERT(!swap_tmp);
5028
0
5029
0
    if (src_reg == TMP_REG1) {
5030
0
      if (mem_reg == SLJIT_R0) {
5031
0
        EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_R1, 0);
5032
0
        EMIT_MOV(compiler, SLJIT_R1, 0, SLJIT_R0, 0);
5033
0
        EMIT_MOV(compiler, SLJIT_R0, 0, temp_reg, tempw);
5034
0
5035
0
        mem_reg = SLJIT_R1;
5036
0
        saved_reg = SLJIT_R1;
5037
0
      } else {
5038
0
        EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_R0, 0);
5039
0
        EMIT_MOV(compiler, SLJIT_R0, 0, temp_reg, tempw);
5040
0
        saved_reg = SLJIT_R0;
5041
0
      }
5042
0
    } else {
5043
0
      EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R0, 0);
5044
0
      EMIT_MOV(compiler, SLJIT_R0, 0, temp_reg, tempw);
5045
0
5046
0
      if (src_reg == SLJIT_R0)
5047
0
        src_reg = TMP_REG1;
5048
0
      if (mem_reg == SLJIT_R0)
5049
0
        mem_reg = TMP_REG1;
5050
0
    }
5051
0
#endif /* SLJIT_CONFIG_X86_64 */
5052
0
  }
5053
0
5054
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
5055
0
  compiler->mode32 = op != SLJIT_MOV && op != SLJIT_MOV_P;
5056
0
#endif /* SLJIT_CONFIG_X86_64 */
5057
0
5058
0
  /* Lock prefix. */
5059
0
  FAIL_IF(emit_byte(compiler, GROUP_LOCK));
5060
0
5061
0
  pref = 0;
5062
0
  if (op == SLJIT_MOV_U16)
5063
0
    pref = EX86_HALF_ARG | EX86_PREF_66;
5064
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
5065
0
  if (op == SLJIT_MOV_U8)
5066
0
    pref = EX86_REX;
5067
0
#endif /* SLJIT_CONFIG_X86_64 */
5068
0
5069
0
  FAIL_IF(emit_groupf(compiler, (op == SLJIT_MOV_U8 ? CMPXCHG_rm8_r : CMPXCHG_rm_r) | pref, src_reg, SLJIT_MEM1(mem_reg), 0));
5070
0
5071
0
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
5072
0
  if (swap_tmp) {
5073
0
    SLJIT_ASSERT(temp_reg == SLJIT_R0);
5074
0
    FAIL_IF(emit_byte(compiler, XCHG_EAX_r | reg_map[TMP_REG1]));
5075
0
5076
0
    if (saved_reg != TMP_REG1)
5077
0
      return emit_mov(compiler, saved_reg, 0, SLJIT_MEM1(SLJIT_SP), 0);
5078
0
    return SLJIT_SUCCESS;
5079
0
  }
5080
0
#endif /* SLJIT_CONFIG_X86_32 */
5081
0
5082
0
  if (temp_reg != SLJIT_R0) {
5083
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
5084
0
    compiler->mode32 = 0;
5085
0
    return emit_mov(compiler, SLJIT_R0, 0, TMP_REG2, 0);
5086
0
#else /* !SLJIT_CONFIG_X86_64 */
5087
0
    EMIT_MOV(compiler, SLJIT_R0, 0, (saved_reg == SLJIT_R0) ? SLJIT_MEM1(SLJIT_SP) : saved_reg, 0);
5088
0
    if (saved_reg == SLJIT_R1)
5089
0
      return emit_mov(compiler, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_SP), 0);
5090
0
#endif /* SLJIT_CONFIG_X86_64 */
5091
0
  }
5092
0
  return SLJIT_SUCCESS;
5093
0
}
5094
5095
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
5096
10.3M
{
5097
10.3M
  CHECK_ERROR();
5098
10.3M
  CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset));
5099
10.3M
  ADJUST_LOCAL_OFFSET(dst, dstw);
5100
10.3M
  ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset);
5101
5102
10.3M
  CHECK_EXTRA_REGS(dst, dstw, (void)0);
5103
5104
10.3M
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
5105
10.3M
  compiler->mode32 = 0;
5106
10.3M
#endif
5107
5108
10.3M
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
5109
10.3M
  if (NOT_HALFWORD(offset)) {
5110
0
    FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset));
5111
#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
5112
    SLJIT_ASSERT(emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED);
5113
    return compiler->error;
5114
#else
5115
0
    return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0);
5116
0
#endif
5117
0
  }
5118
10.3M
#endif
5119
5120
10.3M
  if (offset != 0)
5121
10.3M
    return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset);
5122
9.40k
  return emit_mov(compiler, dst, dstw, SLJIT_SP, 0);
5123
10.3M
}
5124
5125
SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 op,
5126
  sljit_s32 dst, sljit_sw dstw,
5127
  sljit_sw init_value)
5128
0
{
5129
0
  sljit_u8 *inst;
5130
0
  struct sljit_const *const_;
5131
0
  sljit_s32 reg;
5132
0
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
5133
0
  sljit_s32 dst_is_ereg = 0;
5134
0
#endif /* !SLJIT_CONFIG_X86_32 */
5135
0
5136
0
  CHECK_ERROR_PTR();
5137
0
  CHECK_PTR(check_sljit_emit_const(compiler, op, dst, dstw, init_value));
5138
0
  ADJUST_LOCAL_OFFSET(dst, dstw);
5139
0
5140
0
  CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
5141
0
5142
0
  const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
5143
0
  PTR_FAIL_IF(!const_);
5144
0
  set_const(const_, compiler);
5145
0
5146
0
  switch (GET_OPCODE(op)) {
5147
0
  case SLJIT_MOV_U8:
5148
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
5149
0
    compiler->mode32 = (op & SLJIT_32);
5150
0
#endif /* SLJIT_CONFIG_X86_64 */
5151
0
5152
0
    if ((init_value & 0x100) != 0)
5153
0
      init_value = init_value | -(sljit_sw)0x100;
5154
0
    else
5155
0
      init_value = (sljit_u8)init_value;
5156
0
5157
0
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
5158
0
    if (dst_is_ereg) {
5159
0
      if (emit_mov(compiler, dst, dstw, SLJIT_IMM, (sljit_s32)init_value))
5160
0
        return NULL;
5161
0
      dst = 0;
5162
0
      break;
5163
0
    }
5164
0
#endif /* !SLJIT_CONFIG_X86_32 */
5165
0
5166
0
    reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
5167
0
5168
0
    if (emit_mov(compiler, reg, 0, SLJIT_IMM, init_value))
5169
0
      return NULL;
5170
0
    break;
5171
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
5172
0
  case SLJIT_MOV:
5173
0
    compiler->mode32 = 0;
5174
0
    reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
5175
0
5176
0
    if (emit_load_imm64(compiler, reg, init_value))
5177
0
      return NULL;
5178
0
    break;
5179
0
#endif /* SLJIT_CONFIG_X86_64 */
5180
0
  default:
5181
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
5182
0
    compiler->mode32 = (op == SLJIT_MOV32);
5183
0
#endif /* SLJIT_CONFIG_X86_64 */
5184
0
5185
0
    if (emit_mov(compiler, dst, dstw, SLJIT_IMM, (sljit_s32)init_value))
5186
0
      return NULL;
5187
0
    dst = 0;
5188
0
    break;
5189
0
  }
5190
0
5191
0
  inst = (sljit_u8*)ensure_buf(compiler, 1);
5192
0
  PTR_FAIL_IF(!inst);
5193
0
5194
0
  inst[0] = SLJIT_INST_CONST;
5195
0
5196
0
  if (dst & SLJIT_MEM) {
5197
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
5198
0
    if (op == SLJIT_MOV) {
5199
0
      if (emit_mov(compiler, dst, dstw, TMP_REG1, 0))
5200
0
        return NULL;
5201
0
      return const_;
5202
0
    }
5203
0
#endif
5204
0
5205
0
    if (emit_mov_byte(compiler, 0, dst, dstw, TMP_REG1, 0))
5206
0
      return NULL;
5207
0
  }
5208
0
5209
0
  return const_;
5210
0
}
5211
5212
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_op_addr(struct sljit_compiler *compiler, sljit_s32 op,
5213
  sljit_s32 dst, sljit_sw dstw)
5214
2.69M
{
5215
2.69M
  struct sljit_jump *jump;
5216
2.69M
  sljit_u8 *inst;
5217
2.69M
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
5218
2.69M
  sljit_s32 reg;
5219
2.69M
#endif /* SLJIT_CONFIG_X86_64 */
5220
2.69M
  SLJIT_UNUSED_ARG(op);
5221
5222
2.69M
  CHECK_ERROR_PTR();
5223
2.69M
  CHECK_PTR(check_sljit_emit_op_addr(compiler, op, dst, dstw));
5224
2.69M
  ADJUST_LOCAL_OFFSET(dst, dstw);
5225
5226
2.69M
  CHECK_EXTRA_REGS(dst, dstw, (void)0);
5227
5228
2.69M
  jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
5229
2.69M
  PTR_FAIL_IF(!jump);
5230
2.69M
  set_mov_addr(jump, compiler, 0);
5231
5232
2.69M
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
5233
2.69M
  compiler->mode32 = 0;
5234
2.69M
  if (dst & SLJIT_MEM)
5235
2.69M
    reg = TMP_REG1;
5236
0
  else
5237
0
    reg = (op != SLJIT_ADD_ABS_ADDR) ? dst : TMP_REG2;
5238
5239
2.69M
  PTR_FAIL_IF(emit_load_imm64(compiler, reg, 0));
5240
2.69M
  jump->addr = compiler->size;
5241
5242
2.69M
  if (reg_map[reg] >= 8)
5243
0
    jump->flags |= MOV_ADDR_HI;
5244
#else /* !SLJIT_CONFIG_X86_64 */
5245
  if (op == SLJIT_ADD_ABS_ADDR) {
5246
    if (dst != SLJIT_R0) {
5247
      /* Must not be a signed byte argument. */
5248
      inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0x100, dst, dstw);
5249
      PTR_FAIL_IF(!inst);
5250
      *(inst + 1) |= ADD;
5251
    } else
5252
      PTR_FAIL_IF(emit_do_imm(compiler, ADD_EAX_i32, 0));
5253
  } else {
5254
    PTR_FAIL_IF(emit_mov(compiler, dst, dstw, SLJIT_IMM, 0));
5255
  }
5256
#endif /* SLJIT_CONFIG_X86_64 */
5257
5258
2.69M
  inst = (sljit_u8*)ensure_buf(compiler, 1);
5259
2.69M
  PTR_FAIL_IF(!inst);
5260
5261
2.69M
  inst[0] = SLJIT_INST_MOV_ADDR;
5262
5263
2.69M
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
5264
2.69M
  if (op == SLJIT_ADD_ABS_ADDR) {
5265
0
    inst = emit_x86_instruction(compiler, 1, reg, 0, dst, dstw);
5266
0
    PTR_FAIL_IF(!inst);
5267
0
    *inst = ADD_rm_r;
5268
2.69M
  } else if (dst & SLJIT_MEM)
5269
2.69M
    PTR_FAIL_IF(emit_mov(compiler, dst, dstw, TMP_REG1, 0));
5270
2.69M
#endif /* SLJIT_CONFIG_X86_64 */
5271
5272
2.69M
  return jump;
5273
2.69M
}
5274
5275
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
5276
0
{
5277
0
  SLJIT_UNUSED_ARG(executable_offset);
5278
0
5279
0
  SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_uw)), 0);
5280
0
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
5281
0
  sljit_unaligned_store_sw((void*)addr, (sljit_sw)(new_target - (addr + 4) - (sljit_uw)executable_offset));
5282
0
#else
5283
0
  sljit_unaligned_store_sw((void*)addr, (sljit_sw)new_target);
5284
0
#endif
5285
0
  SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_uw)), 1);
5286
0
}
5287
5288
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_s32 op, sljit_sw new_constant, sljit_sw executable_offset)
5289
0
{
5290
0
  void *start_addr;
5291
0
  SLJIT_UNUSED_ARG(executable_offset);
5292
0
5293
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
5294
0
  if (op == SLJIT_MOV) {
5295
0
    start_addr = (void*)(addr - sizeof(sljit_sw));
5296
0
    SLJIT_UPDATE_WX_FLAGS(start_addr, (void*)addr, 0);
5297
0
    sljit_unaligned_store_sw(start_addr, new_constant);
5298
0
    SLJIT_UPDATE_WX_FLAGS(start_addr, (void*)addr, 1);
5299
0
    return;
5300
0
  }
5301
0
#endif
5302
0
5303
0
  start_addr = (void*)(addr - sizeof(sljit_s32));
5304
0
5305
0
  if ((op | SLJIT_32) == SLJIT_MOV32_U8) {
5306
0
    if ((new_constant & 0x100) != 0)
5307
0
      new_constant = new_constant | -(sljit_sw)0x100;
5308
0
    else
5309
0
      new_constant = (sljit_u8)new_constant;
5310
0
  }
5311
0
5312
0
  SLJIT_UPDATE_WX_FLAGS(start_addr, (void*)addr, 0);
5313
0
  sljit_unaligned_store_s32(start_addr, (sljit_s32)new_constant);
5314
0
  SLJIT_UPDATE_WX_FLAGS(start_addr, (void*)addr, 1);
5315
0
}