/src/pcre2/deps/sljit/sljit_src/sljitNativeX86_common.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Stack-less Just-In-Time compiler |
3 | | * |
4 | | * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. |
5 | | * |
6 | | * Redistribution and use in source and binary forms, with or without modification, are |
7 | | * permitted provided that the following conditions are met: |
8 | | * |
9 | | * 1. Redistributions of source code must retain the above copyright notice, this list of |
10 | | * conditions and the following disclaimer. |
11 | | * |
12 | | * 2. Redistributions in binary form must reproduce the above copyright notice, this list |
13 | | * of conditions and the following disclaimer in the documentation and/or other materials |
14 | | * provided with the distribution. |
15 | | * |
16 | | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY |
17 | | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
18 | | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT |
19 | | * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, |
20 | | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED |
21 | | * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR |
22 | | * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
23 | | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN |
24 | | * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
25 | | */ |
26 | | |
27 | | SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) |
28 | 0 | { |
29 | 0 | return "x86" SLJIT_CPUINFO; |
30 | 0 | } |
31 | | |
32 | | /* |
33 | | 32b register indexes: |
34 | | 0 - EAX |
35 | | 1 - ECX |
36 | | 2 - EDX |
37 | | 3 - EBX |
38 | | 4 - ESP |
39 | | 5 - EBP |
40 | | 6 - ESI |
41 | | 7 - EDI |
42 | | */ |
43 | | |
44 | | /* |
45 | | 64b register indexes: |
46 | | 0 - RAX |
47 | | 1 - RCX |
48 | | 2 - RDX |
49 | | 3 - RBX |
50 | | 4 - RSP |
51 | | 5 - RBP |
52 | | 6 - RSI |
53 | | 7 - RDI |
54 | | 8 - R8 - From now on REX prefix is required |
55 | | 9 - R9 |
56 | | 10 - R10 |
57 | | 11 - R11 |
58 | | 12 - R12 |
59 | | 13 - R13 |
60 | | 14 - R14 |
61 | | 15 - R15 |
62 | | */ |
63 | | |
64 | 294M | #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) |
65 | 0 | #define TMP_FREG (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) |
66 | | |
67 | | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
68 | | |
69 | | static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = { |
70 | | 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 5, 7, 6, 4, 3 |
71 | | }; |
72 | | |
73 | | static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2] = { |
74 | | 0, 1, 2, 3, 4, 5, 6, 7, 0 |
75 | | }; |
76 | | |
77 | | #define CHECK_EXTRA_REGS(p, w, do) \ |
78 | | if (p >= SLJIT_R3 && p <= SLJIT_S3) { \ |
79 | | w = (2 * SSIZE_OF(sw)) + ((p) - SLJIT_R3) * SSIZE_OF(sw); \ |
80 | | p = SLJIT_MEM1(SLJIT_SP); \ |
81 | | do; \ |
82 | | } |
83 | | |
84 | | #else /* SLJIT_CONFIG_X86_32 */ |
85 | | |
86 | 98.6M | #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) |
87 | | |
88 | | /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present |
89 | | Note: avoid to use r12 and r13 for memory addressing |
90 | | therefore r12 is better to be a higher saved register. */ |
91 | | #ifndef _WIN64 |
92 | | /* Args: rdi(=7), rsi(=6), rdx(=2), rcx(=1), r8, r9. Scratches: rax(=0), r10, r11 */ |
93 | | static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = { |
94 | | 0, 0, 6, 7, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 9 |
95 | | }; |
96 | | /* low-map. reg_map & 0x7. */ |
97 | | static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = { |
98 | | 0, 0, 6, 7, 1, 0, 3, 2, 4, 5, 5, 6, 7, 3, 4, 2, 1 |
99 | | }; |
100 | | #else |
101 | | /* Args: rcx(=1), rdx(=2), r8, r9. Scratches: rax(=0), r10, r11 */ |
102 | | static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = { |
103 | | 0, 0, 2, 8, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 9, 10 |
104 | | }; |
105 | | /* low-map. reg_map & 0x7. */ |
106 | | static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = { |
107 | | 0, 0, 2, 0, 1, 3, 4, 5, 5, 6, 7, 7, 6, 3, 4, 1, 2 |
108 | | }; |
109 | | #endif |
110 | | |
111 | | /* Args: xmm0-xmm3 */ |
112 | | static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2] = { |
113 | | 0, 0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 4 |
114 | | }; |
115 | | /* low-map. freg_map & 0x7. */ |
116 | | static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2] = { |
117 | | 0, 0, 1, 2, 3, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 4 |
118 | | }; |
119 | | |
120 | 794M | #define REX_W 0x48 |
121 | 131M | #define REX_R 0x44 |
122 | 2.60M | #define REX_X 0x42 |
123 | 290M | #define REX_B 0x41 |
124 | 876 | #define REX 0x40 |
125 | | |
126 | | #ifndef _WIN64 |
127 | 2.84G | #define HALFWORD_MAX 0x7fffffffl |
128 | 1.39G | #define HALFWORD_MIN -0x80000000l |
129 | | #else |
130 | | #define HALFWORD_MAX 0x7fffffffll |
131 | | #define HALFWORD_MIN -0x80000000ll |
132 | | #endif |
133 | | |
134 | 720M | #define IS_HALFWORD(x) ((x) <= HALFWORD_MAX && (x) >= HALFWORD_MIN) |
135 | 417M | #define NOT_HALFWORD(x) ((x) > HALFWORD_MAX || (x) < HALFWORD_MIN) |
136 | | |
137 | | #define CHECK_EXTRA_REGS(p, w, do) |
138 | | |
139 | | #endif /* SLJIT_CONFIG_X86_32 */ |
140 | | |
141 | 3.35G | #define U8(v) ((sljit_u8)(v)) |
142 | | |
143 | | /* Size flags for emit_x86_instruction: */ |
144 | 1.13G | #define EX86_BIN_INS ((sljit_uw)0x000010) |
145 | 984M | #define EX86_SHIFT_INS ((sljit_uw)0x000020) |
146 | 543M | #define EX86_BYTE_ARG ((sljit_uw)0x000040) |
147 | 199M | #define EX86_HALF_ARG ((sljit_uw)0x000080) |
148 | | /* Size flags for both emit_x86_instruction and emit_vex_instruction: */ |
149 | 146M | #define EX86_REX ((sljit_uw)0x000100) |
150 | 627M | #define EX86_NO_REXW ((sljit_uw)0x000200) |
151 | 1.54G | #define EX86_PREF_66 ((sljit_uw)0x000400) |
152 | 1.54G | #define EX86_PREF_F2 ((sljit_uw)0x000800) |
153 | 1.54G | #define EX86_PREF_F3 ((sljit_uw)0x001000) |
154 | 1.09G | #define EX86_SSE2_OP1 ((sljit_uw)0x002000) |
155 | 426M | #define EX86_SSE2_OP2 ((sljit_uw)0x004000) |
156 | 176k | #define EX86_SSE2 (EX86_SSE2_OP1 | EX86_SSE2_OP2) |
157 | 774M | #define EX86_VEX_EXT ((sljit_uw)0x008000) |
158 | | /* Op flags for emit_vex_instruction: */ |
159 | 28.9k | #define VEX_OP_0F38 ((sljit_uw)0x010000) |
160 | 28.9k | #define VEX_OP_0F3A ((sljit_uw)0x020000) |
161 | 0 | #define VEX_SSE2_OPV ((sljit_uw)0x040000) |
162 | 0 | #define VEX_AUTO_W ((sljit_uw)0x080000) |
163 | 0 | #define VEX_W ((sljit_uw)0x100000) |
164 | 147k | #define VEX_256 ((sljit_uw)0x200000) |
165 | | |
166 | 0 | #define EX86_SELECT_66(op) (((op) & SLJIT_32) ? 0 : EX86_PREF_66) |
167 | | #define EX86_SELECT_F2_F3(op) (((op) & SLJIT_32) ? EX86_PREF_F3 : EX86_PREF_F2) |
168 | | |
169 | | /* --------------------------------------------------------------------- */ |
170 | | /* Instruction forms */ |
171 | | /* --------------------------------------------------------------------- */ |
172 | | |
173 | | #define ADD (/* BINARY */ 0 << 3) |
174 | 46.6M | #define ADD_EAX_i32 0x05 |
175 | 46.6M | #define ADD_r_rm 0x03 |
176 | 46.6M | #define ADD_rm_r 0x01 |
177 | | #define ADDSD_x_xm 0x58 |
178 | | #define ADC (/* BINARY */ 2 << 3) |
179 | 0 | #define ADC_EAX_i32 0x15 |
180 | 0 | #define ADC_r_rm 0x13 |
181 | 0 | #define ADC_rm_r 0x11 |
182 | | #define AND (/* BINARY */ 4 << 3) |
183 | 1.73M | #define AND_EAX_i32 0x25 |
184 | 1.73M | #define AND_r_rm 0x23 |
185 | 1.73M | #define AND_rm_r 0x21 |
186 | 0 | #define ANDPD_x_xm 0x54 |
187 | | #define BSR_r_rm (/* GROUP_0F */ 0xbd) |
188 | | #define BSF_r_rm (/* GROUP_0F */ 0xbc) |
189 | 0 | #define BSWAP_r (/* GROUP_0F */ 0xc8) |
190 | 18.8M | #define CALL_i32 0xe8 |
191 | | #define CALL_rm (/* GROUP_FF */ 2 << 3) |
192 | 0 | #define CDQ 0x99 |
193 | | #define CMOVE_r_rm (/* GROUP_0F */ 0x44) |
194 | | #define CMP (/* BINARY */ 7 << 3) |
195 | | #define CMP_EAX_i32 0x3d |
196 | 48.0M | #define CMP_r_rm 0x3b |
197 | 642k | #define CMP_rm_r 0x39 |
198 | | #define CMPS_x_xm 0xc2 |
199 | | #define CMPXCHG_rm_r 0xb1 |
200 | | #define CMPXCHG_rm8_r 0xb0 |
201 | | #define CVTPD2PS_x_xm 0x5a |
202 | | #define CVTPS2PD_x_xm 0x5a |
203 | | #define CVTSI2SD_x_rm 0x2a |
204 | | #define CVTTSD2SI_r_xm 0x2c |
205 | 0 | #define DIV (/* GROUP_F7 */ 6 << 3) |
206 | | #define DIVSD_x_xm 0x5e |
207 | | #define EXTRACTPS_x_xm 0x17 |
208 | | #define FLDS 0xd9 |
209 | | #define FLDL 0xdd |
210 | | #define FSTPS 0xd9 |
211 | | #define FSTPD 0xdd |
212 | | #define INSERTPS_x_xm 0x21 |
213 | 0 | #define INT3 0xcc |
214 | 0 | #define IDIV (/* GROUP_F7 */ 7 << 3) |
215 | 0 | #define IMUL (/* GROUP_F7 */ 5 << 3) |
216 | | #define IMUL_r_rm (/* GROUP_0F */ 0xaf) |
217 | 0 | #define IMUL_r_rm_i8 0x6b |
218 | 0 | #define IMUL_r_rm_i32 0x69 |
219 | 0 | #define JL_i8 0x7c |
220 | | #define JE_i8 0x74 |
221 | | #define JNC_i8 0x73 |
222 | | #define JNE_i8 0x75 |
223 | 6.48M | #define JMP_i8 0xeb |
224 | 83.7M | #define JMP_i32 0xe9 |
225 | | #define JMP_rm (/* GROUP_FF */ 4 << 3) |
226 | 8.81M | #define LEA_r_m 0x8d |
227 | | #define LOOP_i8 0xe2 |
228 | | #define LZCNT_r_rm (/* GROUP_F3 */ /* GROUP_0F */ 0xbd) |
229 | 185M | #define MOV_r_rm 0x8b |
230 | 8.04M | #define MOV_r_i32 0xb8 |
231 | 118M | #define MOV_rm_r 0x89 |
232 | 73.7M | #define MOV_rm_i32 0xc7 |
233 | 0 | #define MOV_rm8_i8 0xc6 |
234 | 0 | #define MOV_rm8_r8 0x88 |
235 | 0 | #define MOVAPS_x_xm 0x28 |
236 | | #define MOVAPS_xm_x 0x29 |
237 | 77.5k | #define MOVD_x_rm 0x6e |
238 | | #define MOVD_rm_x 0x7e |
239 | 0 | #define MOVDDUP_x_xm 0x12 |
240 | 147k | #define MOVDQA_x_xm 0x6f |
241 | 0 | #define MOVDQA_xm_x 0x7f |
242 | 0 | #define MOVDQU_x_xm 0x6f |
243 | 0 | #define MOVHLPS_x_x 0x12 |
244 | 0 | #define MOVHPD_m_x 0x17 |
245 | 0 | #define MOVHPD_x_m 0x16 |
246 | | #define MOVLHPS_x_x 0x16 |
247 | 0 | #define MOVLPD_m_x 0x13 |
248 | 0 | #define MOVLPD_x_m 0x12 |
249 | 118k | #define MOVMSKPS_r_x (/* GROUP_0F */ 0x50) |
250 | 0 | #define MOVQ_x_xm (/* GROUP_0F */ 0x7e) |
251 | 0 | #define MOVSD_x_xm 0x10 |
252 | 0 | #define MOVSD_xm_x 0x11 |
253 | 0 | #define MOVSHDUP_x_xm 0x16 |
254 | 152k | #define MOVSXD_r_rm 0x63 |
255 | | #define MOVSX_r_rm8 (/* GROUP_0F */ 0xbe) |
256 | | #define MOVSX_r_rm16 (/* GROUP_0F */ 0xbf) |
257 | 0 | #define MOVUPS_x_xm 0x10 |
258 | 66.0M | #define MOVZX_r_rm8 (/* GROUP_0F */ 0xb6) |
259 | | #define MOVZX_r_rm16 (/* GROUP_0F */ 0xb7) |
260 | 0 | #define MUL (/* GROUP_F7 */ 4 << 3) |
261 | | #define MULSD_x_xm 0x59 |
262 | 28.9k | #define NEG_rm (/* GROUP_F7 */ 3 << 3) |
263 | 0 | #define NOP 0x90 |
264 | 0 | #define NOT_rm (/* GROUP_F7 */ 2 << 3) |
265 | | #define OR (/* BINARY */ 1 << 3) |
266 | 2.42M | #define OR_r_rm 0x0b |
267 | 2.42M | #define OR_EAX_i32 0x0d |
268 | 2.42M | #define OR_rm_r 0x09 |
269 | 990k | #define OR_rm8_r8 0x08 |
270 | 0 | #define ORPD_x_xm 0x56 |
271 | | #define PACKSSWB_x_xm (/* GROUP_0F */ 0x63) |
272 | 57.9k | #define PAND_x_xm 0xdb |
273 | | #define PCMPEQD_x_xm 0x76 |
274 | 0 | #define PINSRB_x_rm_i8 0x20 |
275 | 0 | #define PINSRW_x_rm_i8 0xc4 |
276 | 0 | #define PINSRD_x_rm_i8 0x22 |
277 | 0 | #define PEXTRB_rm_x_i8 0x14 |
278 | 0 | #define PEXTRW_rm_x_i8 0x15 |
279 | 0 | #define PEXTRD_rm_x_i8 0x16 |
280 | 118k | #define PMOVMSKB_r_x (/* GROUP_0F */ 0xd7) |
281 | | #define PMOVSXBD_x_xm 0x21 |
282 | | #define PMOVSXBQ_x_xm 0x22 |
283 | | #define PMOVSXBW_x_xm 0x20 |
284 | | #define PMOVSXDQ_x_xm 0x25 |
285 | | #define PMOVSXWD_x_xm 0x23 |
286 | | #define PMOVSXWQ_x_xm 0x24 |
287 | | #define PMOVZXBD_x_xm 0x31 |
288 | | #define PMOVZXBQ_x_xm 0x32 |
289 | | #define PMOVZXBW_x_xm 0x30 |
290 | | #define PMOVZXDQ_x_xm 0x35 |
291 | | #define PMOVZXWD_x_xm 0x33 |
292 | | #define PMOVZXWQ_x_xm 0x34 |
293 | | #define POP_r 0x58 |
294 | 70.2k | #define POP_rm 0x8f |
295 | | #define POPF 0x9d |
296 | 0 | #define POR_x_xm 0xeb |
297 | 0 | #define PREFETCH 0x18 |
298 | 0 | #define PSHUFB_x_xm 0x00 |
299 | 0 | #define PSHUFD_x_xm 0x70 |
300 | 0 | #define PSHUFLW_x_xm 0x70 |
301 | | #define PSRLDQ_x 0x73 |
302 | 0 | #define PSLLD_x_i8 0x72 |
303 | 0 | #define PSLLQ_x_i8 0x73 |
304 | | #define PUSH_i32 0x68 |
305 | | #define PUSH_r 0x50 |
306 | 0 | #define PUSH_rm (/* GROUP_FF */ 6 << 3) |
307 | | #define PUSHF 0x9c |
308 | 0 | #define PXOR_x_xm 0xef |
309 | 0 | #define ROL (/* SHIFT */ 0 << 3) |
310 | 0 | #define ROR (/* SHIFT */ 1 << 3) |
311 | 189k | #define RET_near 0xc3 |
312 | | #define RET_i16 0xc2 |
313 | | #define SBB (/* BINARY */ 3 << 3) |
314 | 0 | #define SBB_EAX_i32 0x1d |
315 | 0 | #define SBB_r_rm 0x1b |
316 | 0 | #define SBB_rm_r 0x19 |
317 | 62.8k | #define SAR (/* SHIFT */ 7 << 3) |
318 | 4.47M | #define SHL (/* SHIFT */ 4 << 3) |
319 | | #define SHLD (/* GROUP_0F */ 0xa5) |
320 | | #define SHRD (/* GROUP_0F */ 0xad) |
321 | 1.51M | #define SHR (/* SHIFT */ 5 << 3) |
322 | 0 | #define SHUFPS_x_xm 0xc6 |
323 | | #define SUB (/* BINARY */ 5 << 3) |
324 | 30.1M | #define SUB_EAX_i32 0x2d |
325 | 30.1M | #define SUB_r_rm 0x2b |
326 | 30.1M | #define SUB_rm_r 0x29 |
327 | | #define SUBSD_x_xm 0x5c |
328 | | #define TEST_EAX_i32 0xa9 |
329 | 635k | #define TEST_rm_r 0x85 |
330 | | #define TZCNT_r_rm (/* GROUP_F3 */ /* GROUP_0F */ 0xbc) |
331 | 0 | #define UCOMISD_x_xm 0x2e |
332 | | #define UNPCKLPD_x_xm 0x14 |
333 | | #define UNPCKLPS_x_xm 0x14 |
334 | 0 | #define VBROADCASTSD_x_xm 0x19 |
335 | 0 | #define VBROADCASTSS_x_xm 0x18 |
336 | | #define VEXTRACTF128_x_ym 0x19 |
337 | | #define VEXTRACTI128_x_ym 0x39 |
338 | | #define VINSERTF128_y_y_xm 0x18 |
339 | | #define VINSERTI128_y_y_xm 0x38 |
340 | 0 | #define VPBROADCASTB_x_xm 0x78 |
341 | 0 | #define VPBROADCASTD_x_xm 0x58 |
342 | 0 | #define VPBROADCASTQ_x_xm 0x59 |
343 | 0 | #define VPBROADCASTW_x_xm 0x79 |
344 | | #define VPERMPD_y_ym 0x01 |
345 | | #define VPERMQ_y_ym 0x00 |
346 | | #define XCHG_EAX_r 0x90 |
347 | | #define XCHG_r_rm 0x87 |
348 | 0 | #define XOR (/* BINARY */ 6 << 3) |
349 | 25.0k | #define XOR_EAX_i32 0x35 |
350 | 25.0k | #define XOR_r_rm 0x33 |
351 | 25.0k | #define XOR_rm_r 0x31 |
352 | 0 | #define XORPD_x_xm 0x57 |
353 | | |
354 | 402M | #define GROUP_0F 0x0f |
355 | | #define GROUP_66 0x66 |
356 | | #define GROUP_F3 0xf3 |
357 | 2.16M | #define GROUP_F7 0xf7 |
358 | 8.06M | #define GROUP_FF 0xff |
359 | 47.1M | #define GROUP_BINARY_81 0x81 |
360 | 96.1M | #define GROUP_BINARY_83 0x83 |
361 | 1.06M | #define GROUP_SHIFT_1 0xd1 |
362 | 3.72M | #define GROUP_SHIFT_N 0xc1 |
363 | 1.26M | #define GROUP_SHIFT_CL 0xd3 |
364 | | #define GROUP_LOCK 0xf0 |
365 | | |
366 | 67.0M | #define MOD_REG 0xc0 |
367 | | #define MOD_DISP8 0x40 |
368 | | |
369 | 1.09G | #define INC_SIZE(s) (*inst++ = U8(s), compiler->size += (s)) |
370 | | |
371 | 441k | #define PUSH_REG(r) (*inst++ = U8(PUSH_r + (r))) |
372 | 343k | #define POP_REG(r) (*inst++ = U8(POP_r + (r))) |
373 | 127k | #define RET() (*inst++ = RET_near) |
374 | | #define RET_I16(n) (*inst++ = RET_i16, *inst++ = U8(n), *inst++ = 0) |
375 | | |
376 | 254M | #define SLJIT_INST_LABEL 255 |
377 | 744M | #define SLJIT_INST_JUMP 254 |
378 | 399k | #define SLJIT_INST_MOV_ADDR 253 |
379 | 1.59G | #define SLJIT_INST_CONST 252 |
380 | | |
381 | | /* Multithreading does not affect these static variables, since they store |
382 | | built-in CPU features. Therefore they can be overwritten by different threads |
383 | | if they detect the CPU features in the same time. */ |
384 | 1 | #define CPU_FEATURE_DETECTED 0x001 |
385 | | #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) |
386 | | #define CPU_FEATURE_SSE2 0x002 |
387 | | #endif |
388 | 69.7k | #define CPU_FEATURE_SSE41 0x004 |
389 | 1 | #define CPU_FEATURE_LZCNT 0x008 |
390 | 1 | #define CPU_FEATURE_TZCNT 0x010 |
391 | 91.2M | #define CPU_FEATURE_CMOV 0x020 |
392 | 449k | #define CPU_FEATURE_AVX 0x040 |
393 | 77.5k | #define CPU_FEATURE_AVX2 0x080 |
394 | 2 | #define CPU_FEATURE_OSXSAVE 0x100 |
395 | | |
396 | | static sljit_u32 cpu_feature_list = 0; |
397 | | |
398 | | #ifdef _WIN32_WCE |
399 | | #include <cmnintrin.h> |
400 | | #elif defined(_MSC_VER) && _MSC_VER >= 1400 |
401 | | #include <intrin.h> |
402 | | #elif defined(__INTEL_COMPILER) |
403 | | #include <cpuid.h> |
404 | | #endif |
405 | | |
406 | | #if (defined(_MSC_VER) && _MSC_VER >= 1400) || defined(__INTEL_COMPILER) \ |
407 | | || (defined(__INTEL_LLVM_COMPILER) && defined(__XSAVE__)) |
408 | | #include <immintrin.h> |
409 | | #endif |
410 | | |
411 | | /******************************************************/ |
412 | | /* Unaligned-store functions */ |
413 | | /******************************************************/ |
414 | | |
415 | | static SLJIT_INLINE void sljit_unaligned_store_s16(void *addr, sljit_s16 value) |
416 | 0 | { |
417 | 0 | SLJIT_MEMCPY(addr, &value, sizeof(value)); |
418 | 0 | } |
419 | | |
420 | | static SLJIT_INLINE void sljit_unaligned_store_s32(void *addr, sljit_s32 value) |
421 | 618M | { |
422 | 618M | SLJIT_MEMCPY(addr, &value, sizeof(value)); |
423 | 618M | } |
424 | | |
425 | | static SLJIT_INLINE void sljit_unaligned_store_sw(void *addr, sljit_sw value) |
426 | 101M | { |
427 | 101M | SLJIT_MEMCPY(addr, &value, sizeof(value)); |
428 | 101M | } |
429 | | |
430 | | /******************************************************/ |
431 | | /* Utility functions */ |
432 | | /******************************************************/ |
433 | | |
434 | | static void execute_cpu_id(sljit_u32 info[4]) |
435 | 5 | { |
436 | | #if (defined(_MSC_VER) && _MSC_VER >= 1400) \ |
437 | | || (defined(__INTEL_COMPILER) && __INTEL_COMPILER == 2021 && __INTEL_COMPILER_UPDATE >= 7) |
438 | | |
439 | | __cpuidex((int*)info, (int)info[0], (int)info[2]); |
440 | | |
441 | | #elif (defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1900) |
442 | | |
443 | | __get_cpuid_count(info[0], info[2], info, info + 1, info + 2, info + 3); |
444 | | |
445 | | #elif (defined(_MSC_VER) || defined(__INTEL_COMPILER)) \ |
446 | | && (defined(SLJIT_CONFIG_X86_32) && SLJIT_CONFIG_X86_32) |
447 | | |
448 | | /* Intel syntax. */ |
449 | | __asm { |
450 | | mov esi, info |
451 | | mov eax, [esi] |
452 | | mov ecx, [esi + 8] |
453 | | cpuid |
454 | | mov [esi], eax |
455 | | mov [esi + 4], ebx |
456 | | mov [esi + 8], ecx |
457 | | mov [esi + 12], edx |
458 | | } |
459 | | |
460 | | #else |
461 | | |
462 | 5 | __asm__ __volatile__ ( |
463 | 5 | "cpuid\n" |
464 | 5 | : "=a" (info[0]), "=b" (info[1]), "=c" (info[2]), "=d" (info[3]) |
465 | 5 | : "0" (info[0]), "2" (info[2]) |
466 | 5 | ); |
467 | | |
468 | 5 | #endif |
469 | 5 | } |
470 | | |
471 | | static sljit_u32 execute_get_xcr0_low(void) |
472 | 1 | { |
473 | 1 | sljit_u32 xcr0; |
474 | | |
475 | | #if (defined(_MSC_VER) && _MSC_VER >= 1400) || defined(__INTEL_COMPILER) \ |
476 | | || (defined(__INTEL_LLVM_COMPILER) && defined(__XSAVE__)) |
477 | | |
478 | | xcr0 = (sljit_u32)_xgetbv(0); |
479 | | |
480 | | #elif defined(__TINYC__) |
481 | | |
482 | | __asm__ ( |
483 | | "xorl %%ecx, %%ecx\n" |
484 | | ".byte 0x0f\n" |
485 | | ".byte 0x01\n" |
486 | | ".byte 0xd0\n" |
487 | | : "=a" (xcr0) |
488 | | : |
489 | | #if defined(SLJIT_CONFIG_X86_32) && SLJIT_CONFIG_X86_32 |
490 | | : "ecx", "edx" |
491 | | #else /* !SLJIT_CONFIG_X86_32 */ |
492 | | : "rcx", "rdx" |
493 | | #endif /* SLJIT_CONFIG_X86_32 */ |
494 | | ); |
495 | | |
496 | | #elif (defined(__INTEL_LLVM_COMPILER) && __INTEL_LLVM_COMPILER < 20220100) \ |
497 | | || (defined(__clang__) && __clang_major__ < 14) \ |
498 | | || (defined(__GNUC__) && __GNUC__ < 3) \ |
499 | | || defined(__SUNPRO_C) || defined(__SUNPRO_CC) |
500 | | |
501 | | /* AT&T syntax. */ |
502 | | __asm__ ( |
503 | | "xorl %%ecx, %%ecx\n" |
504 | | "xgetbv\n" |
505 | | : "=a" (xcr0) |
506 | | : |
507 | | #if defined(SLJIT_CONFIG_X86_32) && SLJIT_CONFIG_X86_32 |
508 | | : "ecx", "edx" |
509 | | #else /* !SLJIT_CONFIG_X86_32 */ |
510 | | : "rcx", "rdx" |
511 | | #endif /* SLJIT_CONFIG_X86_32 */ |
512 | | ); |
513 | | |
514 | | #elif defined(_MSC_VER) |
515 | | |
516 | | /* Intel syntax. */ |
517 | | __asm { |
518 | | xor ecx, ecx |
519 | | xgetbv |
520 | | mov xcr0, eax |
521 | | } |
522 | | |
523 | | #else |
524 | | |
525 | 1 | __asm__ ( |
526 | 1 | "xor{l %%ecx, %%ecx | ecx, ecx}\n" |
527 | 1 | "xgetbv\n" |
528 | 1 | : "=a" (xcr0) |
529 | 1 | : |
530 | | #if defined(SLJIT_CONFIG_X86_32) && SLJIT_CONFIG_X86_32 |
531 | | : "ecx", "edx" |
532 | | #else /* !SLJIT_CONFIG_X86_32 */ |
533 | 1 | : "rcx", "rdx" |
534 | 1 | #endif /* SLJIT_CONFIG_X86_32 */ |
535 | 1 | ); |
536 | | |
537 | 1 | #endif |
538 | 1 | return xcr0; |
539 | 1 | } |
540 | | |
541 | | static void get_cpu_features(void) |
542 | 1 | { |
543 | 1 | sljit_u32 feature_list = CPU_FEATURE_DETECTED; |
544 | 1 | sljit_u32 info[4] = {0}; |
545 | 1 | sljit_u32 max_id; |
546 | | |
547 | 1 | execute_cpu_id(info); |
548 | 1 | max_id = info[0]; |
549 | | |
550 | 1 | if (max_id >= 7) { |
551 | 1 | info[0] = 7; |
552 | 1 | info[2] = 0; |
553 | 1 | execute_cpu_id(info); |
554 | | |
555 | 1 | if (info[1] & 0x8) |
556 | 1 | feature_list |= CPU_FEATURE_TZCNT; |
557 | 1 | if (info[1] & 0x20) |
558 | 1 | feature_list |= CPU_FEATURE_AVX2; |
559 | 1 | } |
560 | | |
561 | 1 | if (max_id >= 1) { |
562 | 1 | info[0] = 1; |
563 | | #if defined(SLJIT_CONFIG_X86_32) && SLJIT_CONFIG_X86_32 |
564 | | /* Winchip 2 and Cyrix MII bugs */ |
565 | | info[1] = info[2] = 0; |
566 | | #endif |
567 | 1 | execute_cpu_id(info); |
568 | | |
569 | 1 | if (info[2] & 0x80000) |
570 | 1 | feature_list |= CPU_FEATURE_SSE41; |
571 | 1 | if (info[2] & 0x8000000) |
572 | 1 | feature_list |= CPU_FEATURE_OSXSAVE; |
573 | 1 | if (info[2] & 0x10000000) |
574 | 1 | feature_list |= CPU_FEATURE_AVX; |
575 | | #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) |
576 | | if (info[3] & 0x4000000) |
577 | | feature_list |= CPU_FEATURE_SSE2; |
578 | | #endif |
579 | 1 | if (info[3] & 0x8000) |
580 | 1 | feature_list |= CPU_FEATURE_CMOV; |
581 | 1 | } |
582 | | |
583 | 1 | info[0] = 0x80000000; |
584 | 1 | execute_cpu_id(info); |
585 | 1 | max_id = info[0]; |
586 | | |
587 | 1 | if (max_id >= 0x80000001) { |
588 | 1 | info[0] = 0x80000001; |
589 | 1 | execute_cpu_id(info); |
590 | | |
591 | 1 | if (info[2] & 0x20) |
592 | 1 | feature_list |= CPU_FEATURE_LZCNT; |
593 | 1 | } |
594 | | |
595 | 1 | if ((feature_list & CPU_FEATURE_OSXSAVE) && (execute_get_xcr0_low() & 0x4) == 0) |
596 | 0 | feature_list &= ~(sljit_u32)(CPU_FEATURE_AVX | CPU_FEATURE_AVX2); |
597 | | |
598 | 1 | cpu_feature_list = feature_list; |
599 | 1 | } |
600 | | |
601 | | static sljit_u8 get_jump_code(sljit_uw type) |
602 | 411M | { |
603 | 411M | switch (type) { |
604 | 164M | case SLJIT_EQUAL: |
605 | 164M | case SLJIT_ATOMIC_STORED: |
606 | 164M | case SLJIT_F_EQUAL: |
607 | 164M | case SLJIT_UNORDERED_OR_EQUAL: |
608 | 164M | return 0x84 /* je */; |
609 | | |
610 | 123M | case SLJIT_NOT_EQUAL: |
611 | 123M | case SLJIT_ATOMIC_NOT_STORED: |
612 | 123M | case SLJIT_F_NOT_EQUAL: |
613 | 123M | case SLJIT_ORDERED_NOT_EQUAL: |
614 | 123M | return 0x85 /* jne */; |
615 | | |
616 | 18.7M | case SLJIT_LESS: |
617 | 18.7M | case SLJIT_CARRY: |
618 | 18.7M | case SLJIT_F_LESS: |
619 | 18.7M | case SLJIT_UNORDERED_OR_LESS: |
620 | 18.7M | case SLJIT_UNORDERED_OR_GREATER: |
621 | 18.7M | return 0x82 /* jc */; |
622 | | |
623 | 7.05M | case SLJIT_GREATER_EQUAL: |
624 | 7.05M | case SLJIT_NOT_CARRY: |
625 | 7.05M | case SLJIT_F_GREATER_EQUAL: |
626 | 7.05M | case SLJIT_ORDERED_GREATER_EQUAL: |
627 | 7.05M | case SLJIT_ORDERED_LESS_EQUAL: |
628 | 7.05M | return 0x83 /* jae */; |
629 | | |
630 | 86.6M | case SLJIT_GREATER: |
631 | 86.6M | case SLJIT_F_GREATER: |
632 | 86.6M | case SLJIT_ORDERED_LESS: |
633 | 86.6M | case SLJIT_ORDERED_GREATER: |
634 | 86.6M | return 0x87 /* jnbe */; |
635 | | |
636 | 3.49M | case SLJIT_LESS_EQUAL: |
637 | 3.49M | case SLJIT_F_LESS_EQUAL: |
638 | 3.49M | case SLJIT_UNORDERED_OR_GREATER_EQUAL: |
639 | 3.49M | case SLJIT_UNORDERED_OR_LESS_EQUAL: |
640 | 3.49M | return 0x86 /* jbe */; |
641 | | |
642 | 0 | case SLJIT_SIG_LESS: |
643 | 0 | return 0x8c /* jl */; |
644 | | |
645 | 0 | case SLJIT_SIG_GREATER_EQUAL: |
646 | 0 | return 0x8d /* jnl */; |
647 | | |
648 | 7.86M | case SLJIT_SIG_GREATER: |
649 | 7.86M | return 0x8f /* jnle */; |
650 | | |
651 | 7.70k | case SLJIT_SIG_LESS_EQUAL: |
652 | 7.70k | return 0x8e /* jle */; |
653 | | |
654 | 0 | case SLJIT_OVERFLOW: |
655 | 0 | return 0x80 /* jo */; |
656 | | |
657 | 0 | case SLJIT_NOT_OVERFLOW: |
658 | 0 | return 0x81 /* jno */; |
659 | | |
660 | 0 | case SLJIT_UNORDERED: |
661 | 0 | case SLJIT_ORDERED_EQUAL: /* NaN. */ |
662 | 0 | return 0x8a /* jp */; |
663 | | |
664 | 0 | case SLJIT_ORDERED: |
665 | 0 | case SLJIT_UNORDERED_OR_NOT_EQUAL: /* Not NaN. */ |
666 | 0 | return 0x8b /* jpo */; |
667 | 411M | } |
668 | 0 | return 0; |
669 | 411M | } |
670 | | |
671 | | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
672 | | static sljit_u8* detect_far_jump_type(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_sw executable_offset); |
673 | | #else /* !SLJIT_CONFIG_X86_32 */ |
674 | | static sljit_u8* detect_far_jump_type(struct sljit_jump *jump, sljit_u8 *code_ptr); |
675 | | static sljit_u8* generate_mov_addr_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_sw executable_offset); |
676 | | #endif /* SLJIT_CONFIG_X86_32 */ |
677 | | |
678 | | static sljit_u8* detect_near_jump_type(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_sw executable_offset) |
679 | 372M | { |
680 | 372M | sljit_uw type = jump->flags >> TYPE_SHIFT; |
681 | 372M | sljit_s32 short_jump; |
682 | 372M | sljit_uw label_addr; |
683 | 372M | sljit_uw jump_addr; |
684 | | |
685 | 372M | jump_addr = (sljit_uw)code_ptr; |
686 | 372M | if (!(jump->flags & JUMP_ADDR)) { |
687 | 364M | label_addr = (sljit_uw)(code + jump->u.label->size); |
688 | | |
689 | 364M | if (jump->u.label->size > jump->addr) |
690 | 335M | jump_addr = (sljit_uw)(code + jump->addr); |
691 | 364M | } else |
692 | 8.04M | label_addr = jump->u.target - (sljit_uw)executable_offset; |
693 | | |
694 | 372M | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
695 | 372M | if ((sljit_sw)(label_addr - (jump_addr + 6)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump_addr + 5)) < HALFWORD_MIN) |
696 | 8.04M | return detect_far_jump_type(jump, code_ptr); |
697 | 364M | #endif /* SLJIT_CONFIG_X86_64 */ |
698 | | |
699 | 364M | short_jump = (sljit_sw)(label_addr - (jump_addr + 2)) >= -0x80 && (sljit_sw)(label_addr - (jump_addr + 2)) <= 0x7f; |
700 | | |
701 | 364M | if (type == SLJIT_JUMP) { |
702 | 90.2M | if (short_jump) |
703 | 6.48M | *code_ptr++ = JMP_i8; |
704 | 83.7M | else |
705 | 83.7M | *code_ptr++ = JMP_i32; |
706 | 274M | } else if (type > SLJIT_JUMP) { |
707 | 18.8M | short_jump = 0; |
708 | 18.8M | *code_ptr++ = CALL_i32; |
709 | 255M | } else if (short_jump) { |
710 | 78.8M | *code_ptr++ = U8(get_jump_code(type) - 0x10); |
711 | 176M | } else { |
712 | 176M | *code_ptr++ = GROUP_0F; |
713 | 176M | *code_ptr++ = get_jump_code(type); |
714 | 176M | } |
715 | | |
716 | 364M | jump->addr = (sljit_uw)code_ptr; |
717 | | |
718 | 364M | if (short_jump) { |
719 | 85.3M | jump->flags |= PATCH_MB; |
720 | 85.3M | code_ptr += sizeof(sljit_s8); |
721 | 279M | } else { |
722 | 279M | jump->flags |= PATCH_MW; |
723 | 279M | code_ptr += sizeof(sljit_s32); |
724 | 279M | } |
725 | | |
726 | 364M | return code_ptr; |
727 | 372M | } |
728 | | |
729 | | static void generate_jump_or_mov_addr(struct sljit_jump *jump, sljit_sw executable_offset) |
730 | 372M | { |
731 | 372M | sljit_uw flags = jump->flags; |
732 | 372M | sljit_uw addr = (flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr; |
733 | 372M | sljit_uw jump_addr = jump->addr; |
734 | 372M | SLJIT_UNUSED_ARG(executable_offset); |
735 | | |
736 | 372M | if (SLJIT_UNLIKELY(flags & JUMP_MOV_ADDR)) { |
737 | | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
738 | | sljit_unaligned_store_sw((void*)(jump_addr - sizeof(sljit_sw)), (sljit_sw)addr); |
739 | | #else /* SLJIT_CONFIG_X86_32 */ |
740 | 199k | if (flags & PATCH_MD) { |
741 | 0 | SLJIT_ASSERT(addr > HALFWORD_MAX); |
742 | 0 | sljit_unaligned_store_sw((void*)(jump_addr - sizeof(sljit_sw)), (sljit_sw)addr); |
743 | 0 | return; |
744 | 0 | } |
745 | | |
746 | 199k | if (flags & PATCH_MW) { |
747 | 199k | addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET((sljit_u8*)jump_addr, executable_offset); |
748 | 199k | SLJIT_ASSERT((sljit_sw)addr <= HALFWORD_MAX && (sljit_sw)addr >= HALFWORD_MIN); |
749 | 199k | } else { |
750 | 0 | SLJIT_ASSERT(addr <= HALFWORD_MAX); |
751 | 0 | } |
752 | 199k | sljit_unaligned_store_s32((void*)(jump_addr - sizeof(sljit_s32)), (sljit_s32)addr); |
753 | 199k | #endif /* !SLJIT_CONFIG_X86_32 */ |
754 | 199k | return; |
755 | 199k | } |
756 | | |
757 | 372M | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
758 | 372M | if (SLJIT_UNLIKELY(flags & PATCH_MD)) { |
759 | 0 | SLJIT_ASSERT(!(flags & JUMP_ADDR)); |
760 | 0 | sljit_unaligned_store_sw((void*)jump_addr, (sljit_sw)addr); |
761 | 0 | return; |
762 | 0 | } |
763 | 372M | #endif /* SLJIT_CONFIG_X86_64 */ |
764 | | |
765 | 372M | addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET((sljit_u8*)jump_addr, executable_offset); |
766 | | |
767 | 372M | if (flags & PATCH_MB) { |
768 | 85.3M | addr -= sizeof(sljit_s8); |
769 | 85.3M | SLJIT_ASSERT((sljit_sw)addr <= 0x7f && (sljit_sw)addr >= -0x80); |
770 | 85.3M | *(sljit_u8*)jump_addr = U8(addr); |
771 | 85.3M | return; |
772 | 287M | } else if (flags & PATCH_MW) { |
773 | 279M | addr -= sizeof(sljit_s32); |
774 | | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
775 | | sljit_unaligned_store_sw((void*)jump_addr, (sljit_sw)addr); |
776 | | #else /* !SLJIT_CONFIG_X86_32 */ |
777 | 279M | SLJIT_ASSERT((sljit_sw)addr <= HALFWORD_MAX && (sljit_sw)addr >= HALFWORD_MIN); |
778 | 279M | sljit_unaligned_store_s32((void*)jump_addr, (sljit_s32)addr); |
779 | 279M | #endif /* SLJIT_CONFIG_X86_32 */ |
780 | 279M | } |
781 | 372M | } |
782 | | |
783 | | static sljit_u8 *process_extended_label(sljit_u8 *code_ptr, struct sljit_extended_label *ext_label) |
784 | 0 | { |
785 | 0 | sljit_uw mask; |
786 | 0 | sljit_u8 *ptr = code_ptr; |
787 | |
|
788 | 0 | SLJIT_ASSERT(ext_label->label.u.index == SLJIT_LABEL_ALIGNED); |
789 | 0 | mask = ext_label->data; |
790 | |
|
791 | 0 | code_ptr = (sljit_u8*)(((sljit_uw)code_ptr + mask) & ~mask); |
792 | |
|
793 | 0 | while (ptr < code_ptr) |
794 | 0 | *ptr++ = NOP; |
795 | |
|
796 | 0 | return code_ptr; |
797 | 0 | } |
798 | | |
799 | | static void reduce_code_size(struct sljit_compiler *compiler) |
800 | 62.8k | { |
801 | 62.8k | struct sljit_label *label; |
802 | 62.8k | struct sljit_jump *jump; |
803 | 62.8k | sljit_uw next_label_size; |
804 | 62.8k | sljit_uw next_jump_addr; |
805 | 62.8k | sljit_uw next_min_addr; |
806 | 62.8k | sljit_uw size_reduce = 0; |
807 | 62.8k | sljit_sw diff; |
808 | 62.8k | sljit_uw type; |
809 | | #if (defined SLJIT_DEBUG && SLJIT_DEBUG) |
810 | | sljit_uw size_reduce_max; |
811 | | #endif /* SLJIT_DEBUG */ |
812 | | |
813 | 62.8k | label = compiler->labels; |
814 | 62.8k | jump = compiler->jumps; |
815 | | |
816 | 62.8k | next_label_size = SLJIT_GET_NEXT_SIZE(label); |
817 | 62.8k | next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump); |
818 | | |
819 | 483M | while (1) { |
820 | 483M | next_min_addr = next_label_size; |
821 | 483M | if (next_jump_addr < next_min_addr) |
822 | 356M | next_min_addr = next_jump_addr; |
823 | | |
824 | 483M | if (next_min_addr == SLJIT_MAX_ADDRESS) |
825 | 62.8k | break; |
826 | | |
827 | 483M | if (next_min_addr == next_label_size) { |
828 | 127M | label->size -= size_reduce; |
829 | | |
830 | 127M | label = label->next; |
831 | 127M | next_label_size = SLJIT_GET_NEXT_SIZE(label); |
832 | 127M | } |
833 | | |
834 | 483M | if (next_min_addr != next_jump_addr) |
835 | 111M | continue; |
836 | | |
837 | 372M | jump->addr -= size_reduce; |
838 | 372M | if (!(jump->flags & JUMP_MOV_ADDR)) { |
839 | | #if (defined SLJIT_DEBUG && SLJIT_DEBUG) |
840 | | size_reduce_max = size_reduce + (((jump->flags >> TYPE_SHIFT) < SLJIT_JUMP) ? CJUMP_MAX_SIZE : JUMP_MAX_SIZE); |
841 | | #endif /* SLJIT_DEBUG */ |
842 | | |
843 | 372M | if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) { |
844 | 372M | if (jump->flags & JUMP_ADDR) { |
845 | 8.04M | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
846 | 8.04M | if (jump->u.target <= 0xffffffffl) |
847 | 0 | size_reduce += sizeof(sljit_s32); |
848 | 8.04M | #endif /* SLJIT_CONFIG_X86_64 */ |
849 | 364M | } else { |
850 | | /* Unit size: instruction. */ |
851 | 364M | diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr; |
852 | 364M | if (jump->u.label->size > jump->addr) { |
853 | 335M | SLJIT_ASSERT(jump->u.label->size - size_reduce >= jump->addr); |
854 | 335M | diff -= (sljit_sw)size_reduce; |
855 | 335M | } |
856 | 364M | type = jump->flags >> TYPE_SHIFT; |
857 | | |
858 | 364M | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
859 | 364M | if (type == SLJIT_JUMP) { |
860 | 90.2M | if (diff <= 0x7f + 2 && diff >= -0x80 + 2) |
861 | 5.52M | size_reduce += JUMP_MAX_SIZE - 2; |
862 | 84.6M | else if (diff <= HALFWORD_MAX + 5 && diff >= HALFWORD_MIN + 5) |
863 | 84.6M | size_reduce += JUMP_MAX_SIZE - 5; |
864 | 274M | } else if (type < SLJIT_JUMP) { |
865 | 255M | if (diff <= 0x7f + 2 && diff >= -0x80 + 2) |
866 | 62.6M | size_reduce += CJUMP_MAX_SIZE - 2; |
867 | 192M | else if (diff <= HALFWORD_MAX + 6 && diff >= HALFWORD_MIN + 6) |
868 | 192M | size_reduce += CJUMP_MAX_SIZE - 6; |
869 | 255M | } else { |
870 | 18.8M | if (diff <= HALFWORD_MAX + 5 && diff >= HALFWORD_MIN + 5) |
871 | 18.8M | size_reduce += JUMP_MAX_SIZE - 5; |
872 | 18.8M | } |
873 | | #else /* !SLJIT_CONFIG_X86_64 */ |
874 | | if (type == SLJIT_JUMP) { |
875 | | if (diff <= 0x7f + 2 && diff >= -0x80 + 2) |
876 | | size_reduce += JUMP_MAX_SIZE - 2; |
877 | | } else if (type < SLJIT_JUMP) { |
878 | | if (diff <= 0x7f + 2 && diff >= -0x80 + 2) |
879 | | size_reduce += CJUMP_MAX_SIZE - 2; |
880 | | } |
881 | | #endif /* SLJIT_CONFIG_X86_64 */ |
882 | 364M | } |
883 | 372M | } |
884 | | |
885 | | #if (defined SLJIT_DEBUG && SLJIT_DEBUG) |
886 | | jump->flags |= (size_reduce_max - size_reduce) << JUMP_SIZE_SHIFT; |
887 | | #endif /* SLJIT_DEBUG */ |
888 | 372M | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
889 | 372M | } else { |
890 | | #if (defined SLJIT_DEBUG && SLJIT_DEBUG) |
891 | | size_reduce_max = size_reduce + 10; |
892 | | #endif /* SLJIT_DEBUG */ |
893 | | |
894 | 199k | if (!(jump->flags & JUMP_ADDR)) { |
895 | 199k | diff = (sljit_sw)jump->u.label->size - (sljit_sw)(jump->addr - 3); |
896 | | |
897 | 199k | if (diff <= HALFWORD_MAX && diff >= HALFWORD_MIN) |
898 | 199k | size_reduce += 3; |
899 | 199k | } else if (jump->u.target <= 0xffffffffl) |
900 | 0 | size_reduce += (jump->flags & MOV_ADDR_HI) ? 4 : 5; |
901 | | |
902 | | #if (defined SLJIT_DEBUG && SLJIT_DEBUG) |
903 | | jump->flags |= (size_reduce_max - size_reduce) << JUMP_SIZE_SHIFT; |
904 | | #endif /* SLJIT_DEBUG */ |
905 | 199k | #endif /* SLJIT_CONFIG_X86_64 */ |
906 | 199k | } |
907 | | |
908 | 372M | jump = jump->next; |
909 | 372M | next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump); |
910 | 372M | } |
911 | | |
912 | 62.8k | compiler->size -= size_reduce; |
913 | 62.8k | } |
914 | | |
915 | | SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler, sljit_s32 options, void *exec_allocator_data) |
916 | 62.8k | { |
917 | 62.8k | struct sljit_memory_fragment *buf; |
918 | 62.8k | sljit_u8 *code; |
919 | 62.8k | sljit_u8 *code_ptr; |
920 | 62.8k | sljit_u8 *buf_ptr; |
921 | 62.8k | sljit_u8 *buf_end; |
922 | 62.8k | sljit_u8 len; |
923 | 62.8k | sljit_sw executable_offset; |
924 | | #if (defined SLJIT_DEBUG && SLJIT_DEBUG) |
925 | | sljit_uw addr; |
926 | | #endif /* SLJIT_DEBUG */ |
927 | | |
928 | 62.8k | struct sljit_label *label; |
929 | 62.8k | struct sljit_jump *jump; |
930 | 62.8k | struct sljit_const *const_; |
931 | | |
932 | 62.8k | CHECK_ERROR_PTR(); |
933 | 62.8k | CHECK_PTR(check_sljit_generate_code(compiler)); |
934 | | |
935 | 62.8k | reduce_code_size(compiler); |
936 | | |
937 | | /* Second code generation pass. */ |
938 | 62.8k | code = (sljit_u8*)allocate_executable_memory(compiler->size, options, exec_allocator_data, &executable_offset); |
939 | 62.8k | PTR_FAIL_WITH_EXEC_IF(code); |
940 | | |
941 | 62.8k | reverse_buf(compiler); |
942 | 62.8k | buf = compiler->buf; |
943 | | |
944 | 62.8k | code_ptr = code; |
945 | 62.8k | label = compiler->labels; |
946 | 62.8k | jump = compiler->jumps; |
947 | 62.8k | const_ = compiler->consts; |
948 | | |
949 | 1.86M | do { |
950 | 1.86M | buf_ptr = buf->memory; |
951 | 1.86M | buf_end = buf_ptr + buf->used_size; |
952 | 1.59G | do { |
953 | 1.59G | len = *buf_ptr++; |
954 | 1.59G | SLJIT_ASSERT(len > 0); |
955 | 1.59G | if (len < SLJIT_INST_CONST) { |
956 | | /* The code is already generated. */ |
957 | 1.09G | SLJIT_MEMCPY(code_ptr, buf_ptr, len); |
958 | 1.09G | code_ptr += len; |
959 | 1.09G | buf_ptr += len; |
960 | 1.09G | } else { |
961 | 500M | switch (len) { |
962 | 127M | case SLJIT_INST_LABEL: |
963 | 127M | if (label->u.index >= SLJIT_LABEL_ALIGNED) |
964 | 0 | code_ptr = process_extended_label(code_ptr, (struct sljit_extended_label*)label); |
965 | | |
966 | 127M | label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); |
967 | 127M | label->size = (sljit_uw)(code_ptr - code); |
968 | 127M | label = label->next; |
969 | 127M | break; |
970 | 372M | case SLJIT_INST_JUMP: |
971 | | #if (defined SLJIT_DEBUG && SLJIT_DEBUG) |
972 | | addr = (sljit_uw)code_ptr; |
973 | | #endif /* SLJIT_DEBUG */ |
974 | 372M | if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) |
975 | 372M | code_ptr = detect_near_jump_type(jump, code_ptr, code, executable_offset); |
976 | 0 | else { |
977 | | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
978 | | code_ptr = detect_far_jump_type(jump, code_ptr, executable_offset); |
979 | | #else /* !SLJIT_CONFIG_X86_32 */ |
980 | 0 | code_ptr = detect_far_jump_type(jump, code_ptr); |
981 | 0 | #endif /* SLJIT_CONFIG_X86_32 */ |
982 | 0 | } |
983 | | |
984 | 372M | SLJIT_ASSERT((sljit_uw)code_ptr - addr <= ((jump->flags >> JUMP_SIZE_SHIFT) & 0xff)); |
985 | 372M | jump = jump->next; |
986 | 372M | break; |
987 | 199k | case SLJIT_INST_MOV_ADDR: |
988 | 199k | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
989 | 199k | code_ptr = generate_mov_addr_code(jump, code_ptr, code, executable_offset); |
990 | 199k | #endif /* SLJIT_CONFIG_X86_64 */ |
991 | 199k | jump->addr = (sljit_uw)code_ptr; |
992 | 199k | jump = jump->next; |
993 | 199k | break; |
994 | 0 | default: |
995 | 0 | SLJIT_ASSERT(len == SLJIT_INST_CONST); |
996 | 0 | const_->addr = (sljit_uw)code_ptr; |
997 | 0 | const_ = const_->next; |
998 | 0 | break; |
999 | 500M | } |
1000 | 500M | } |
1001 | 1.59G | } while (buf_ptr < buf_end); |
1002 | | |
1003 | 1.86M | SLJIT_ASSERT(buf_ptr == buf_end); |
1004 | 1.86M | buf = buf->next; |
1005 | 1.86M | } while (buf); |
1006 | | |
1007 | 62.8k | SLJIT_ASSERT(!label); |
1008 | 62.8k | SLJIT_ASSERT(!jump); |
1009 | 62.8k | SLJIT_ASSERT(!const_); |
1010 | 62.8k | SLJIT_ASSERT(code_ptr <= code + compiler->size); |
1011 | | |
1012 | 62.8k | jump = compiler->jumps; |
1013 | 372M | while (jump) { |
1014 | 372M | generate_jump_or_mov_addr(jump, executable_offset); |
1015 | 372M | jump = jump->next; |
1016 | 372M | } |
1017 | | |
1018 | 62.8k | compiler->error = SLJIT_ERR_COMPILED; |
1019 | 62.8k | compiler->executable_offset = executable_offset; |
1020 | 62.8k | compiler->executable_size = (sljit_uw)(code_ptr - code); |
1021 | | |
1022 | 62.8k | code = (sljit_u8*)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); |
1023 | | |
1024 | 62.8k | SLJIT_UPDATE_WX_FLAGS(code, (sljit_u8*)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset), 1); |
1025 | 62.8k | return (void*)code; |
1026 | 62.8k | } |
1027 | | |
1028 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) |
1029 | 91.3M | { |
1030 | 91.3M | switch (feature_type) { |
1031 | 0 | case SLJIT_HAS_FPU: |
1032 | | #ifdef SLJIT_IS_FPU_AVAILABLE |
1033 | | return (SLJIT_IS_FPU_AVAILABLE) != 0; |
1034 | | #elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) |
1035 | | if (cpu_feature_list == 0) |
1036 | | get_cpu_features(); |
1037 | | return (cpu_feature_list & CPU_FEATURE_SSE2) != 0; |
1038 | | #else /* SLJIT_DETECT_SSE2 */ |
1039 | 0 | return 1; |
1040 | 0 | #endif /* SLJIT_DETECT_SSE2 */ |
1041 | | |
1042 | | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
1043 | | case SLJIT_HAS_VIRTUAL_REGISTERS: |
1044 | | return 1; |
1045 | | #endif /* SLJIT_CONFIG_X86_32 */ |
1046 | | |
1047 | 0 | case SLJIT_HAS_CLZ: |
1048 | 0 | if (cpu_feature_list == 0) |
1049 | 0 | get_cpu_features(); |
1050 | |
|
1051 | 0 | return (cpu_feature_list & CPU_FEATURE_LZCNT) ? 1 : 2; |
1052 | | |
1053 | 0 | case SLJIT_HAS_CTZ: |
1054 | 0 | if (cpu_feature_list == 0) |
1055 | 0 | get_cpu_features(); |
1056 | |
|
1057 | 0 | return (cpu_feature_list & CPU_FEATURE_TZCNT) ? 1 : 2; |
1058 | | |
1059 | 91.2M | case SLJIT_HAS_CMOV: |
1060 | 91.2M | if (cpu_feature_list == 0) |
1061 | 0 | get_cpu_features(); |
1062 | 91.2M | return (cpu_feature_list & CPU_FEATURE_CMOV) != 0; |
1063 | | |
1064 | 0 | case SLJIT_HAS_REV: |
1065 | 0 | case SLJIT_HAS_ROT: |
1066 | 0 | case SLJIT_HAS_PREFETCH: |
1067 | 0 | case SLJIT_HAS_COPY_F32: |
1068 | 0 | case SLJIT_HAS_COPY_F64: |
1069 | 0 | case SLJIT_HAS_ATOMIC: |
1070 | 0 | case SLJIT_HAS_MEMORY_BARRIER: |
1071 | 0 | return 1; |
1072 | | |
1073 | 0 | #if !(defined SLJIT_IS_FPU_AVAILABLE) || SLJIT_IS_FPU_AVAILABLE |
1074 | 0 | case SLJIT_HAS_AVX: |
1075 | 0 | if (cpu_feature_list == 0) |
1076 | 0 | get_cpu_features(); |
1077 | 0 | return (cpu_feature_list & CPU_FEATURE_AVX) != 0; |
1078 | 0 | case SLJIT_HAS_AVX2: |
1079 | 0 | if (cpu_feature_list == 0) |
1080 | 0 | get_cpu_features(); |
1081 | 0 | return (cpu_feature_list & CPU_FEATURE_AVX2) != 0; |
1082 | 69.7k | case SLJIT_HAS_SIMD: |
1083 | 69.7k | if (cpu_feature_list == 0) |
1084 | 0 | get_cpu_features(); |
1085 | 69.7k | return (cpu_feature_list & CPU_FEATURE_SSE41) != 0; |
1086 | 0 | #endif /* SLJIT_IS_FPU_AVAILABLE */ |
1087 | 7.86k | default: |
1088 | 7.86k | return 0; |
1089 | 91.3M | } |
1090 | 91.3M | } |
1091 | | |
1092 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type) |
1093 | 0 | { |
1094 | 0 | switch (type) { |
1095 | 0 | case SLJIT_ORDERED_EQUAL: |
1096 | 0 | case SLJIT_UNORDERED_OR_NOT_EQUAL: |
1097 | 0 | return 2; |
1098 | 0 | } |
1099 | 0 |
|
1100 | 0 | return 0; |
1101 | 0 | } |
1102 | | |
1103 | | /* --------------------------------------------------------------------- */ |
1104 | | /* Operators */ |
1105 | | /* --------------------------------------------------------------------- */ |
1106 | | |
1107 | 80.9M | #define BINARY_OPCODE(opcode) (((opcode ## _EAX_i32) << 24) | ((opcode ## _r_rm) << 16) | ((opcode ## _rm_r) << 8) | (opcode)) |
1108 | | |
1109 | | #define BINARY_IMM32(op_imm, immw, arg, argw) \ |
1110 | 143M | do { \ |
1111 | 143M | inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \ |
1112 | 143M | FAIL_IF(!inst); \ |
1113 | 143M | *(inst + 1) |= (op_imm); \ |
1114 | 143M | } while (0) |
1115 | | |
1116 | | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
1117 | | |
1118 | | #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \ |
1119 | 233M | do { \ |
1120 | 233M | if (IS_HALFWORD(immw) || compiler->mode32) { \ |
1121 | 143M | BINARY_IMM32(op_imm, immw, arg, argw); \ |
1122 | 143M | } \ |
1123 | 233M | else { \ |
1124 | 89.9M | FAIL_IF(emit_load_imm64(compiler, FAST_IS_REG(arg) ? TMP_REG2 : TMP_REG1, immw)); \ |
1125 | 89.9M | inst = emit_x86_instruction(compiler, 1, FAST_IS_REG(arg) ? TMP_REG2 : TMP_REG1, 0, arg, argw); \ |
1126 | 89.9M | FAIL_IF(!inst); \ |
1127 | 89.9M | *inst = (op_mr); \ |
1128 | 89.9M | } \ |
1129 | 233M | } while (0) |
1130 | | |
1131 | | #define BINARY_EAX_IMM(op_eax_imm, immw) \ |
1132 | 161M | FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw)) |
1133 | | |
1134 | | #else /* !SLJIT_CONFIG_X86_64 */ |
1135 | | |
1136 | | #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \ |
1137 | | BINARY_IMM32(op_imm, immw, arg, argw) |
1138 | | |
1139 | | #define BINARY_EAX_IMM(op_eax_imm, immw) \ |
1140 | | FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw)) |
1141 | | |
1142 | | #endif /* SLJIT_CONFIG_X86_64 */ |
1143 | | |
1144 | | static sljit_s32 emit_byte(struct sljit_compiler *compiler, sljit_u8 byte) |
1145 | 167k | { |
1146 | 167k | sljit_u8 *inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); |
1147 | 167k | FAIL_IF(!inst); |
1148 | 167k | INC_SIZE(1); |
1149 | 167k | *inst = byte; |
1150 | 167k | return SLJIT_SUCCESS; |
1151 | 167k | } |
1152 | | |
1153 | | static sljit_s32 emit_mov(struct sljit_compiler *compiler, |
1154 | | sljit_s32 dst, sljit_sw dstw, |
1155 | | sljit_s32 src, sljit_sw srcw); |
1156 | | |
1157 | | #define EMIT_MOV(compiler, dst, dstw, src, srcw) \ |
1158 | 345M | FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw)); |
1159 | | |
1160 | | static sljit_s32 emit_groupf(struct sljit_compiler *compiler, |
1161 | | sljit_uw op, |
1162 | | sljit_s32 dst, sljit_s32 src, sljit_sw srcw); |
1163 | | |
1164 | | static sljit_s32 emit_groupf_ext(struct sljit_compiler *compiler, |
1165 | | sljit_uw op, |
1166 | | sljit_s32 dst, sljit_s32 src, sljit_sw srcw); |
1167 | | |
1168 | | static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler, |
1169 | | sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src); |
1170 | | |
1171 | | static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler, |
1172 | | sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw); |
1173 | | |
1174 | | static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler, |
1175 | | sljit_s32 src1, sljit_sw src1w, |
1176 | | sljit_s32 src2, sljit_sw src2w); |
1177 | | |
1178 | | static sljit_s32 emit_cmov_generic(struct sljit_compiler *compiler, sljit_s32 type, |
1179 | | sljit_s32 dst_reg, |
1180 | | sljit_s32 src, sljit_sw srcw); |
1181 | | |
1182 | | static SLJIT_INLINE sljit_s32 emit_endbranch(struct sljit_compiler *compiler) |
1183 | 262k | { |
1184 | | #if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) |
1185 | | /* Emit endbr32/endbr64 when CET is enabled. */ |
1186 | | sljit_u8 *inst; |
1187 | | inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); |
1188 | | FAIL_IF(!inst); |
1189 | | INC_SIZE(4); |
1190 | | inst[0] = GROUP_F3; |
1191 | | inst[1] = GROUP_0F; |
1192 | | inst[2] = 0x1e; |
1193 | | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
1194 | | inst[3] = 0xfb; |
1195 | | #else /* !SLJIT_CONFIG_X86_32 */ |
1196 | | inst[3] = 0xfa; |
1197 | | #endif /* SLJIT_CONFIG_X86_32 */ |
1198 | | #else /* !SLJIT_CONFIG_X86_CET */ |
1199 | 262k | SLJIT_UNUSED_ARG(compiler); |
1200 | 262k | #endif /* SLJIT_CONFIG_X86_CET */ |
1201 | 262k | return SLJIT_SUCCESS; |
1202 | 262k | } |
1203 | | |
1204 | | #if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) && defined (__SHSTK__) |
1205 | | |
1206 | | static SLJIT_INLINE sljit_s32 emit_rdssp(struct sljit_compiler *compiler, sljit_s32 reg) |
1207 | | { |
1208 | | sljit_u8 *inst; |
1209 | | sljit_s32 size; |
1210 | | |
1211 | | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
1212 | | size = 5; |
1213 | | #else |
1214 | | size = 4; |
1215 | | #endif |
1216 | | |
1217 | | inst = (sljit_u8*)ensure_buf(compiler, 1 + size); |
1218 | | FAIL_IF(!inst); |
1219 | | INC_SIZE(size); |
1220 | | *inst++ = GROUP_F3; |
1221 | | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
1222 | | *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : REX_B); |
1223 | | #endif |
1224 | | inst[0] = GROUP_0F; |
1225 | | inst[1] = 0x1e; |
1226 | | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
1227 | | inst[2] = U8(MOD_REG | (0x1 << 3) | reg_lmap[reg]); |
1228 | | #else |
1229 | | inst[2] = U8(MOD_REG | (0x1 << 3) | reg_map[reg]); |
1230 | | #endif |
1231 | | return SLJIT_SUCCESS; |
1232 | | } |
1233 | | |
1234 | | static SLJIT_INLINE sljit_s32 emit_incssp(struct sljit_compiler *compiler, sljit_s32 reg) |
1235 | | { |
1236 | | sljit_u8 *inst; |
1237 | | sljit_s32 size; |
1238 | | |
1239 | | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
1240 | | size = 5; |
1241 | | #else |
1242 | | size = 4; |
1243 | | #endif |
1244 | | |
1245 | | inst = (sljit_u8*)ensure_buf(compiler, 1 + size); |
1246 | | FAIL_IF(!inst); |
1247 | | INC_SIZE(size); |
1248 | | *inst++ = GROUP_F3; |
1249 | | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
1250 | | *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : REX_B); |
1251 | | #endif |
1252 | | inst[0] = GROUP_0F; |
1253 | | inst[1] = 0xae; |
1254 | | inst[2] = (0x3 << 6) | (0x5 << 3) | (reg_map[reg] & 0x7); |
1255 | | return SLJIT_SUCCESS; |
1256 | | } |
1257 | | |
1258 | | #endif /* SLJIT_CONFIG_X86_CET && __SHSTK__ */ |
1259 | | |
1260 | | static SLJIT_INLINE sljit_s32 cpu_has_shadow_stack(void) |
1261 | 62.8k | { |
1262 | | #if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) && defined (__SHSTK__) |
1263 | | return _get_ssp() != 0; |
1264 | | #else /* !SLJIT_CONFIG_X86_CET || !__SHSTK__ */ |
1265 | 62.8k | return 0; |
1266 | 62.8k | #endif /* SLJIT_CONFIG_X86_CET && __SHSTK__ */ |
1267 | 62.8k | } |
1268 | | |
1269 | | static SLJIT_INLINE sljit_s32 adjust_shadow_stack(struct sljit_compiler *compiler, |
1270 | | sljit_s32 src, sljit_sw srcw) |
1271 | 0 | { |
1272 | | #if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) && defined (__SHSTK__) |
1273 | | sljit_u8 *inst, *jz_after_cmp_inst; |
1274 | | sljit_uw size_jz_after_cmp_inst; |
1275 | | |
1276 | | sljit_uw size_before_rdssp_inst = compiler->size; |
1277 | | |
1278 | | /* Generate "RDSSP TMP_REG1". */ |
1279 | | FAIL_IF(emit_rdssp(compiler, TMP_REG1)); |
1280 | | |
1281 | | /* Load return address on shadow stack into TMP_REG1. */ |
1282 | | EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(TMP_REG1), 0); |
1283 | | |
1284 | | /* Compare return address against TMP_REG1. */ |
1285 | | FAIL_IF(emit_cmp_binary (compiler, TMP_REG1, 0, src, srcw)); |
1286 | | |
1287 | | /* Generate JZ to skip shadow stack ajdustment when shadow |
1288 | | stack matches normal stack. */ |
1289 | | inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); |
1290 | | FAIL_IF(!inst); |
1291 | | INC_SIZE(2); |
1292 | | *inst++ = get_jump_code(SLJIT_EQUAL) - 0x10; |
1293 | | size_jz_after_cmp_inst = compiler->size; |
1294 | | jz_after_cmp_inst = inst; |
1295 | | |
1296 | | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
1297 | | /* REX_W is not necessary. */ |
1298 | | compiler->mode32 = 1; |
1299 | | #endif |
1300 | | /* Load 1 into TMP_REG1. */ |
1301 | | EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1); |
1302 | | |
1303 | | /* Generate "INCSSP TMP_REG1". */ |
1304 | | FAIL_IF(emit_incssp(compiler, TMP_REG1)); |
1305 | | |
1306 | | /* Jump back to "RDSSP TMP_REG1" to check shadow stack again. */ |
1307 | | inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); |
1308 | | FAIL_IF(!inst); |
1309 | | INC_SIZE(2); |
1310 | | inst[0] = JMP_i8; |
1311 | | inst[1] = size_before_rdssp_inst - compiler->size; |
1312 | | |
1313 | | *jz_after_cmp_inst = compiler->size - size_jz_after_cmp_inst; |
1314 | | #else /* !SLJIT_CONFIG_X86_CET || !__SHSTK__ */ |
1315 | 0 | SLJIT_UNUSED_ARG(compiler); |
1316 | 0 | SLJIT_UNUSED_ARG(src); |
1317 | 0 | SLJIT_UNUSED_ARG(srcw); |
1318 | 0 | #endif /* SLJIT_CONFIG_X86_CET && __SHSTK__ */ |
1319 | 0 | return SLJIT_SUCCESS; |
1320 | 0 | } |
1321 | | |
1322 | | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
1323 | | #include "sljitNativeX86_32.c" |
1324 | | #else |
1325 | | #include "sljitNativeX86_64.c" |
1326 | | #endif |
1327 | | |
1328 | | static sljit_s32 emit_mov(struct sljit_compiler *compiler, |
1329 | | sljit_s32 dst, sljit_sw dstw, |
1330 | | sljit_s32 src, sljit_sw srcw) |
1331 | 353M | { |
1332 | 353M | sljit_u8* inst; |
1333 | | |
1334 | 353M | if (FAST_IS_REG(src)) { |
1335 | 109M | inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw); |
1336 | 109M | FAIL_IF(!inst); |
1337 | 109M | *inst = MOV_rm_r; |
1338 | 109M | return SLJIT_SUCCESS; |
1339 | 109M | } |
1340 | | |
1341 | 244M | if (src == SLJIT_IMM) { |
1342 | 58.6M | if (FAST_IS_REG(dst)) { |
1343 | | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
1344 | | return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw); |
1345 | | #else |
1346 | 4.73M | if (!compiler->mode32) { |
1347 | 4.68M | if (NOT_HALFWORD(srcw)) |
1348 | 608k | return emit_load_imm64(compiler, dst, srcw); |
1349 | 4.68M | } |
1350 | 46.4k | else |
1351 | 46.4k | return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, U8(MOV_r_i32 | reg_lmap[dst]), srcw); |
1352 | 4.73M | #endif |
1353 | 4.73M | } |
1354 | 57.9M | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
1355 | 57.9M | if (!compiler->mode32 && NOT_HALFWORD(srcw)) { |
1356 | | /* Immediate to memory move. Only SLJIT_MOV operation copies |
1357 | | an immediate directly into memory so TMP_REG1 can be used. */ |
1358 | 24.5k | FAIL_IF(emit_load_imm64(compiler, TMP_REG1, srcw)); |
1359 | 24.5k | inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw); |
1360 | 24.5k | FAIL_IF(!inst); |
1361 | 24.5k | *inst = MOV_rm_r; |
1362 | 24.5k | return SLJIT_SUCCESS; |
1363 | 24.5k | } |
1364 | 57.9M | #endif |
1365 | 57.9M | inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw); |
1366 | 57.9M | FAIL_IF(!inst); |
1367 | 57.9M | *inst = MOV_rm_i32; |
1368 | 57.9M | return SLJIT_SUCCESS; |
1369 | 57.9M | } |
1370 | 185M | if (FAST_IS_REG(dst)) { |
1371 | 183M | inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw); |
1372 | 183M | FAIL_IF(!inst); |
1373 | 183M | *inst = MOV_r_rm; |
1374 | 183M | return SLJIT_SUCCESS; |
1375 | 183M | } |
1376 | | |
1377 | | /* Memory to memory move. Only SLJIT_MOV operation copies |
1378 | | data from memory to memory so TMP_REG1 can be used. */ |
1379 | 1.54M | inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw); |
1380 | 1.54M | FAIL_IF(!inst); |
1381 | 1.54M | *inst = MOV_r_rm; |
1382 | 1.54M | inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw); |
1383 | 1.54M | FAIL_IF(!inst); |
1384 | 1.54M | *inst = MOV_rm_r; |
1385 | 1.54M | return SLJIT_SUCCESS; |
1386 | 1.54M | } |
1387 | | |
1388 | | static sljit_s32 emit_cmov_generic(struct sljit_compiler *compiler, sljit_s32 type, |
1389 | | sljit_s32 dst_reg, |
1390 | | sljit_s32 src, sljit_sw srcw) |
1391 | 0 | { |
1392 | 0 | sljit_u8* inst; |
1393 | 0 | sljit_uw size; |
1394 | |
|
1395 | 0 | SLJIT_ASSERT(type >= SLJIT_EQUAL && type <= SLJIT_ORDERED_LESS_EQUAL); |
1396 | |
|
1397 | 0 | inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); |
1398 | 0 | FAIL_IF(!inst); |
1399 | 0 | INC_SIZE(2); |
1400 | 0 | inst[0] = U8(get_jump_code((sljit_uw)type ^ 0x1) - 0x10); |
1401 | |
|
1402 | 0 | size = compiler->size; |
1403 | 0 | EMIT_MOV(compiler, dst_reg, 0, src, srcw); |
1404 | |
|
1405 | 0 | inst[1] = U8(compiler->size - size); |
1406 | 0 | return SLJIT_SUCCESS; |
1407 | 0 | } |
1408 | | |
1409 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) |
1410 | 262k | { |
1411 | 262k | sljit_u8 *inst; |
1412 | 262k | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
1413 | 262k | sljit_uw size; |
1414 | 262k | #endif |
1415 | | |
1416 | 262k | CHECK_ERROR(); |
1417 | 262k | CHECK(check_sljit_emit_op0(compiler, op)); |
1418 | | |
1419 | 262k | switch (GET_OPCODE(op)) { |
1420 | 0 | case SLJIT_BREAKPOINT: |
1421 | 0 | return emit_byte(compiler, INT3); |
1422 | 0 | case SLJIT_NOP: |
1423 | 0 | return emit_byte(compiler, NOP); |
1424 | 0 | case SLJIT_LMUL_UW: |
1425 | 0 | case SLJIT_LMUL_SW: |
1426 | 0 | case SLJIT_DIVMOD_UW: |
1427 | 0 | case SLJIT_DIVMOD_SW: |
1428 | 0 | case SLJIT_DIV_UW: |
1429 | 0 | case SLJIT_DIV_SW: |
1430 | 0 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
1431 | | #ifdef _WIN64 |
1432 | | SLJIT_ASSERT( |
1433 | | reg_map[SLJIT_R0] == 0 |
1434 | | && reg_map[SLJIT_R1] == 2 |
1435 | | && reg_map[TMP_REG1] > 7); |
1436 | | #else |
1437 | 0 | SLJIT_ASSERT( |
1438 | 0 | reg_map[SLJIT_R0] == 0 |
1439 | 0 | && reg_map[SLJIT_R1] < 7 |
1440 | 0 | && reg_map[TMP_REG1] == 2); |
1441 | 0 | #endif |
1442 | 0 | compiler->mode32 = op & SLJIT_32; |
1443 | 0 | #endif |
1444 | 0 | SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments); |
1445 | |
|
1446 | 0 | op = GET_OPCODE(op); |
1447 | 0 | if ((op | 0x2) == SLJIT_DIV_UW) { |
1448 | | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64) |
1449 | | EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0); |
1450 | | inst = emit_x86_instruction(compiler, 1, SLJIT_R1, 0, SLJIT_R1, 0); |
1451 | | #else |
1452 | 0 | inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0); |
1453 | 0 | #endif |
1454 | 0 | FAIL_IF(!inst); |
1455 | 0 | *inst = XOR_r_rm; |
1456 | 0 | } |
1457 | | |
1458 | 0 | if ((op | 0x2) == SLJIT_DIV_SW) { |
1459 | | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64) |
1460 | | EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0); |
1461 | | #endif |
1462 | |
|
1463 | | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
1464 | | FAIL_IF(emit_byte(compiler, CDQ)); |
1465 | | #else |
1466 | 0 | if (!compiler->mode32) { |
1467 | 0 | inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); |
1468 | 0 | FAIL_IF(!inst); |
1469 | 0 | INC_SIZE(2); |
1470 | 0 | inst[0] = REX_W; |
1471 | 0 | inst[1] = CDQ; |
1472 | 0 | } else |
1473 | 0 | FAIL_IF(emit_byte(compiler, CDQ)); |
1474 | 0 | #endif |
1475 | 0 | } |
1476 | | |
1477 | | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
1478 | | inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); |
1479 | | FAIL_IF(!inst); |
1480 | | INC_SIZE(2); |
1481 | | inst[0] = GROUP_F7; |
1482 | | inst[1] = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]); |
1483 | | #else /* !SLJIT_CONFIG_X86_32 */ |
1484 | | #ifdef _WIN64 |
1485 | | size = (!compiler->mode32 || op >= SLJIT_DIVMOD_UW) ? 3 : 2; |
1486 | | #else /* !_WIN64 */ |
1487 | 0 | size = (!compiler->mode32) ? 3 : 2; |
1488 | 0 | #endif /* _WIN64 */ |
1489 | 0 | inst = (sljit_u8*)ensure_buf(compiler, 1 + size); |
1490 | 0 | FAIL_IF(!inst); |
1491 | 0 | INC_SIZE(size); |
1492 | | #ifdef _WIN64 |
1493 | | if (!compiler->mode32) |
1494 | | *inst++ = REX_W | ((op >= SLJIT_DIVMOD_UW) ? REX_B : 0); |
1495 | | else if (op >= SLJIT_DIVMOD_UW) |
1496 | | *inst++ = REX_B; |
1497 | | inst[0] = GROUP_F7; |
1498 | | inst[1] = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]); |
1499 | | #else /* !_WIN64 */ |
1500 | 0 | if (!compiler->mode32) |
1501 | 0 | *inst++ = REX_W; |
1502 | 0 | inst[0] = GROUP_F7; |
1503 | 0 | inst[1] = MOD_REG | reg_map[SLJIT_R1]; |
1504 | 0 | #endif /* _WIN64 */ |
1505 | 0 | #endif /* SLJIT_CONFIG_X86_32 */ |
1506 | 0 | switch (op) { |
1507 | 0 | case SLJIT_LMUL_UW: |
1508 | 0 | inst[1] |= MUL; |
1509 | 0 | break; |
1510 | 0 | case SLJIT_LMUL_SW: |
1511 | 0 | inst[1] |= IMUL; |
1512 | 0 | break; |
1513 | 0 | case SLJIT_DIVMOD_UW: |
1514 | 0 | case SLJIT_DIV_UW: |
1515 | 0 | inst[1] |= DIV; |
1516 | 0 | break; |
1517 | 0 | case SLJIT_DIVMOD_SW: |
1518 | 0 | case SLJIT_DIV_SW: |
1519 | 0 | inst[1] |= IDIV; |
1520 | 0 | break; |
1521 | 0 | } |
1522 | 0 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64) |
1523 | 0 | if (op <= SLJIT_DIVMOD_SW) |
1524 | 0 | EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0); |
1525 | | #else |
1526 | | if (op >= SLJIT_DIV_UW) |
1527 | | EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0); |
1528 | | #endif |
1529 | 0 | break; |
1530 | 0 | case SLJIT_MEMORY_BARRIER: |
1531 | 0 | inst = (sljit_u8*)ensure_buf(compiler, 1 + 3); |
1532 | 0 | FAIL_IF(!inst); |
1533 | 0 | INC_SIZE(3); |
1534 | 0 | inst[0] = GROUP_0F; |
1535 | 0 | inst[1] = 0xae; |
1536 | 0 | inst[2] = 0xf0; |
1537 | 0 | return SLJIT_SUCCESS; |
1538 | 199k | case SLJIT_ENDBR: |
1539 | 199k | return emit_endbranch(compiler); |
1540 | 62.8k | case SLJIT_SKIP_FRAMES_BEFORE_RETURN: |
1541 | 62.8k | return skip_frames_before_return(compiler); |
1542 | 262k | } |
1543 | | |
1544 | 0 | return SLJIT_SUCCESS; |
1545 | 262k | } |
1546 | | |
1547 | | static sljit_s32 emit_mov_byte(struct sljit_compiler *compiler, sljit_s32 sign, |
1548 | | sljit_s32 dst, sljit_sw dstw, |
1549 | | sljit_s32 src, sljit_sw srcw) |
1550 | 2.19M | { |
1551 | 2.19M | sljit_u8* inst; |
1552 | 2.19M | sljit_s32 dst_r; |
1553 | | |
1554 | 2.19M | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
1555 | 2.19M | compiler->mode32 = 0; |
1556 | 2.19M | #endif |
1557 | | |
1558 | 2.19M | if (src == SLJIT_IMM) { |
1559 | 0 | if (FAST_IS_REG(dst)) { |
1560 | | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
1561 | | return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw); |
1562 | | #else |
1563 | 0 | inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0); |
1564 | 0 | FAIL_IF(!inst); |
1565 | 0 | *inst = MOV_rm_i32; |
1566 | 0 | return SLJIT_SUCCESS; |
1567 | 0 | #endif |
1568 | 0 | } |
1569 | 0 | inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw); |
1570 | 0 | FAIL_IF(!inst); |
1571 | 0 | *inst = MOV_rm8_i8; |
1572 | 0 | return SLJIT_SUCCESS; |
1573 | 0 | } |
1574 | | |
1575 | 2.19M | dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; |
1576 | | |
1577 | 2.19M | if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) { |
1578 | | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
1579 | | if (reg_map[src] >= 4) { |
1580 | | SLJIT_ASSERT(dst_r == TMP_REG1); |
1581 | | EMIT_MOV(compiler, TMP_REG1, 0, src, 0); |
1582 | | } else |
1583 | | dst_r = src; |
1584 | | #else |
1585 | 0 | dst_r = src; |
1586 | 0 | #endif |
1587 | 2.19M | } else { |
1588 | | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
1589 | | if (FAST_IS_REG(src) && reg_map[src] >= 4) { |
1590 | | /* Both src and dst are registers. */ |
1591 | | SLJIT_ASSERT(FAST_IS_REG(dst)); |
1592 | | |
1593 | | if (src == dst && !sign) { |
1594 | | inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0); |
1595 | | FAIL_IF(!inst); |
1596 | | *(inst + 1) |= AND; |
1597 | | return SLJIT_SUCCESS; |
1598 | | } |
1599 | | |
1600 | | EMIT_MOV(compiler, TMP_REG1, 0, src, 0); |
1601 | | src = TMP_REG1; |
1602 | | srcw = 0; |
1603 | | } |
1604 | | #endif /* !SLJIT_CONFIG_X86_32 */ |
1605 | | |
1606 | | /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */ |
1607 | 2.19M | FAIL_IF(emit_groupf(compiler, sign ? MOVSX_r_rm8 : MOVZX_r_rm8, dst_r, src, srcw)); |
1608 | 2.19M | } |
1609 | | |
1610 | 2.19M | if (dst & SLJIT_MEM) { |
1611 | 0 | inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw); |
1612 | 0 | FAIL_IF(!inst); |
1613 | 0 | *inst = MOV_rm8_r8; |
1614 | 0 | } |
1615 | | |
1616 | 2.19M | return SLJIT_SUCCESS; |
1617 | 2.19M | } |
1618 | | |
1619 | | static sljit_s32 emit_prefetch(struct sljit_compiler *compiler, sljit_s32 op, |
1620 | | sljit_s32 src, sljit_sw srcw) |
1621 | 0 | { |
1622 | 0 | sljit_u8* inst; |
1623 | |
|
1624 | 0 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
1625 | 0 | compiler->mode32 = 1; |
1626 | 0 | #endif |
1627 | |
|
1628 | 0 | inst = emit_x86_instruction(compiler, 2, 0, 0, src, srcw); |
1629 | 0 | FAIL_IF(!inst); |
1630 | 0 | inst[0] = GROUP_0F; |
1631 | 0 | inst[1] = PREFETCH; |
1632 | |
|
1633 | 0 | if (op == SLJIT_PREFETCH_L1) |
1634 | 0 | inst[2] |= (1 << 3); |
1635 | 0 | else if (op == SLJIT_PREFETCH_L2) |
1636 | 0 | inst[2] |= (2 << 3); |
1637 | 0 | else if (op == SLJIT_PREFETCH_L3) |
1638 | 0 | inst[2] |= (3 << 3); |
1639 | |
|
1640 | 0 | return SLJIT_SUCCESS; |
1641 | 0 | } |
1642 | | |
1643 | | static sljit_s32 emit_mov_half(struct sljit_compiler *compiler, sljit_s32 sign, |
1644 | | sljit_s32 dst, sljit_sw dstw, |
1645 | | sljit_s32 src, sljit_sw srcw) |
1646 | 1.13M | { |
1647 | 1.13M | sljit_u8* inst; |
1648 | 1.13M | sljit_s32 dst_r; |
1649 | | |
1650 | 1.13M | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
1651 | 1.13M | compiler->mode32 = 0; |
1652 | 1.13M | #endif |
1653 | | |
1654 | 1.13M | if (src == SLJIT_IMM) { |
1655 | 0 | if (FAST_IS_REG(dst)) { |
1656 | | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
1657 | | return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw); |
1658 | | #else |
1659 | 0 | inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0); |
1660 | 0 | FAIL_IF(!inst); |
1661 | 0 | *inst = MOV_rm_i32; |
1662 | 0 | return SLJIT_SUCCESS; |
1663 | 0 | #endif |
1664 | 0 | } |
1665 | 0 | inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw); |
1666 | 0 | FAIL_IF(!inst); |
1667 | 0 | *inst = MOV_rm_i32; |
1668 | 0 | return SLJIT_SUCCESS; |
1669 | 0 | } |
1670 | | |
1671 | 1.13M | dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; |
1672 | | |
1673 | 1.13M | if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) |
1674 | 0 | dst_r = src; |
1675 | 1.13M | else |
1676 | 1.13M | FAIL_IF(emit_groupf(compiler, sign ? MOVSX_r_rm16 : MOVZX_r_rm16, dst_r, src, srcw)); |
1677 | | |
1678 | 1.13M | if (dst & SLJIT_MEM) { |
1679 | 0 | inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw); |
1680 | 0 | FAIL_IF(!inst); |
1681 | 0 | *inst = MOV_rm_r; |
1682 | 0 | } |
1683 | | |
1684 | 1.13M | return SLJIT_SUCCESS; |
1685 | 1.13M | } |
1686 | | |
1687 | | static sljit_s32 emit_unary(struct sljit_compiler *compiler, sljit_u8 opcode, |
1688 | | sljit_s32 dst, sljit_sw dstw, |
1689 | | sljit_s32 src, sljit_sw srcw) |
1690 | 28.9k | { |
1691 | 28.9k | sljit_u8* inst; |
1692 | | |
1693 | 28.9k | if (dst == src && dstw == srcw) { |
1694 | | /* Same input and output */ |
1695 | 28.9k | inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); |
1696 | 28.9k | FAIL_IF(!inst); |
1697 | 28.9k | inst[0] = GROUP_F7; |
1698 | 28.9k | inst[1] |= opcode; |
1699 | 28.9k | return SLJIT_SUCCESS; |
1700 | 28.9k | } |
1701 | | |
1702 | 0 | if (FAST_IS_REG(dst)) { |
1703 | 0 | EMIT_MOV(compiler, dst, 0, src, srcw); |
1704 | 0 | inst = emit_x86_instruction(compiler, 1, 0, 0, dst, 0); |
1705 | 0 | FAIL_IF(!inst); |
1706 | 0 | inst[0] = GROUP_F7; |
1707 | 0 | inst[1] |= opcode; |
1708 | 0 | return SLJIT_SUCCESS; |
1709 | 0 | } |
1710 | | |
1711 | 0 | EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); |
1712 | 0 | inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0); |
1713 | 0 | FAIL_IF(!inst); |
1714 | 0 | inst[0] = GROUP_F7; |
1715 | 0 | inst[1] |= opcode; |
1716 | 0 | EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); |
1717 | 0 | return SLJIT_SUCCESS; |
1718 | 0 | } |
1719 | | |
1720 | | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
1721 | | static const sljit_sw emit_clz_arg = 32 + 31; |
1722 | | static const sljit_sw emit_ctz_arg = 32; |
1723 | | #endif |
1724 | | |
1725 | | static sljit_s32 emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 is_clz, |
1726 | | sljit_s32 dst, sljit_sw dstw, |
1727 | | sljit_s32 src, sljit_sw srcw) |
1728 | 0 | { |
1729 | 0 | sljit_u8* inst; |
1730 | 0 | sljit_s32 dst_r; |
1731 | 0 | sljit_sw max; |
1732 | |
|
1733 | 0 | SLJIT_ASSERT(cpu_feature_list != 0); |
1734 | |
|
1735 | 0 | dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; |
1736 | |
|
1737 | 0 | if (is_clz ? (cpu_feature_list & CPU_FEATURE_LZCNT) : (cpu_feature_list & CPU_FEATURE_TZCNT)) { |
1738 | 0 | FAIL_IF(emit_groupf(compiler, (is_clz ? LZCNT_r_rm : TZCNT_r_rm) | EX86_PREF_F3, dst_r, src, srcw)); |
1739 | | |
1740 | 0 | if (dst & SLJIT_MEM) |
1741 | 0 | EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); |
1742 | 0 | return SLJIT_SUCCESS; |
1743 | 0 | } |
1744 | | |
1745 | 0 | FAIL_IF(emit_groupf(compiler, is_clz ? BSR_r_rm : BSF_r_rm, dst_r, src, srcw)); |
1746 | | |
1747 | | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
1748 | | max = is_clz ? (32 + 31) : 32; |
1749 | | |
1750 | | if (cpu_feature_list & CPU_FEATURE_CMOV) { |
1751 | | if (dst_r != TMP_REG1) { |
1752 | | EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, max); |
1753 | | inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0); |
1754 | | } |
1755 | | else |
1756 | | inst = emit_x86_instruction(compiler, 2, dst_r, 0, SLJIT_MEM0(), is_clz ? (sljit_sw)&emit_clz_arg : (sljit_sw)&emit_ctz_arg); |
1757 | | |
1758 | | FAIL_IF(!inst); |
1759 | | inst[0] = GROUP_0F; |
1760 | | inst[1] = CMOVE_r_rm; |
1761 | | } |
1762 | | else |
1763 | | FAIL_IF(emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, max)); |
1764 | | |
1765 | | if (is_clz) { |
1766 | | inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0); |
1767 | | FAIL_IF(!inst); |
1768 | | *(inst + 1) |= XOR; |
1769 | | } |
1770 | | #else |
1771 | 0 | if (is_clz) |
1772 | 0 | max = compiler->mode32 ? (32 + 31) : (64 + 63); |
1773 | 0 | else |
1774 | 0 | max = compiler->mode32 ? 32 : 64; |
1775 | |
|
1776 | 0 | if (cpu_feature_list & CPU_FEATURE_CMOV) { |
1777 | 0 | EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, max); |
1778 | 0 | FAIL_IF(emit_groupf(compiler, CMOVE_r_rm, dst_r, TMP_REG2, 0)); |
1779 | 0 | } else |
1780 | 0 | FAIL_IF(emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, max)); |
1781 | | |
1782 | 0 | if (is_clz) { |
1783 | 0 | inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, max >> 1, dst_r, 0); |
1784 | 0 | FAIL_IF(!inst); |
1785 | 0 | *(inst + 1) |= XOR; |
1786 | 0 | } |
1787 | 0 | #endif |
1788 | | |
1789 | 0 | if (dst & SLJIT_MEM) |
1790 | 0 | EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); |
1791 | 0 | return SLJIT_SUCCESS; |
1792 | 0 | } |
1793 | | |
1794 | | static sljit_s32 emit_bswap(struct sljit_compiler *compiler, |
1795 | | sljit_s32 op, |
1796 | | sljit_s32 dst, sljit_sw dstw, |
1797 | | sljit_s32 src, sljit_sw srcw) |
1798 | 0 | { |
1799 | 0 | sljit_u8 *inst; |
1800 | 0 | sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; |
1801 | 0 | sljit_uw size; |
1802 | 0 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
1803 | 0 | sljit_u8 rex = 0; |
1804 | | #else /* !SLJIT_CONFIG_X86_64 */ |
1805 | | sljit_s32 dst_is_ereg = op & SLJIT_32; |
1806 | | #endif /* SLJIT_CONFIG_X86_64 */ |
1807 | |
|
1808 | 0 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
1809 | 0 | if (op == SLJIT_REV_U32 || op == SLJIT_REV_S32) |
1810 | 0 | compiler->mode32 = 1; |
1811 | | #else /* !SLJIT_CONFIG_X86_64 */ |
1812 | | op &= ~SLJIT_32; |
1813 | | #endif /* SLJIT_CONFIG_X86_64 */ |
1814 | |
|
1815 | 0 | if (src != dst_r) { |
1816 | | /* Only the lower 16 bit is read for eregs. */ |
1817 | 0 | if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16) |
1818 | 0 | FAIL_IF(emit_mov_half(compiler, 0, dst_r, 0, src, srcw)); |
1819 | 0 | else |
1820 | 0 | EMIT_MOV(compiler, dst_r, 0, src, srcw); |
1821 | 0 | } |
1822 | | |
1823 | 0 | size = 2; |
1824 | 0 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
1825 | 0 | if (!compiler->mode32) |
1826 | 0 | rex = REX_W; |
1827 | |
|
1828 | 0 | if (reg_map[dst_r] >= 8) |
1829 | 0 | rex |= REX_B; |
1830 | |
|
1831 | 0 | if (rex != 0) |
1832 | 0 | size++; |
1833 | 0 | #endif /* SLJIT_CONFIG_X86_64 */ |
1834 | |
|
1835 | 0 | inst = (sljit_u8*)ensure_buf(compiler, 1 + size); |
1836 | 0 | FAIL_IF(!inst); |
1837 | 0 | INC_SIZE(size); |
1838 | |
|
1839 | 0 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
1840 | 0 | if (rex != 0) |
1841 | 0 | *inst++ = rex; |
1842 | |
|
1843 | 0 | inst[0] = GROUP_0F; |
1844 | 0 | inst[1] = BSWAP_r | reg_lmap[dst_r]; |
1845 | | #else /* !SLJIT_CONFIG_X86_64 */ |
1846 | | inst[0] = GROUP_0F; |
1847 | | inst[1] = BSWAP_r | reg_map[dst_r]; |
1848 | | #endif /* SLJIT_CONFIG_X86_64 */ |
1849 | |
|
1850 | 0 | if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16) { |
1851 | 0 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
1852 | 0 | size = compiler->mode32 ? 16 : 48; |
1853 | | #else /* !SLJIT_CONFIG_X86_64 */ |
1854 | | size = 16; |
1855 | | #endif /* SLJIT_CONFIG_X86_64 */ |
1856 | |
|
1857 | 0 | inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, (sljit_sw)size, dst_r, 0); |
1858 | 0 | FAIL_IF(!inst); |
1859 | 0 | if (op == SLJIT_REV_U16) |
1860 | 0 | inst[1] |= SHR; |
1861 | 0 | else |
1862 | 0 | inst[1] |= SAR; |
1863 | 0 | } |
1864 | | |
1865 | 0 | if (dst & SLJIT_MEM) { |
1866 | | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
1867 | | if (dst_is_ereg) |
1868 | | op = SLJIT_REV; |
1869 | | #endif /* SLJIT_CONFIG_X86_32 */ |
1870 | 0 | if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16) |
1871 | 0 | return emit_mov_half(compiler, 0, dst, dstw, TMP_REG1, 0); |
1872 | | |
1873 | 0 | return emit_mov(compiler, dst, dstw, TMP_REG1, 0); |
1874 | 0 | } |
1875 | | |
1876 | 0 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
1877 | 0 | if (op == SLJIT_REV_S32) { |
1878 | 0 | compiler->mode32 = 0; |
1879 | 0 | inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0); |
1880 | 0 | FAIL_IF(!inst); |
1881 | 0 | *inst = MOVSXD_r_rm; |
1882 | 0 | } |
1883 | 0 | #endif /* SLJIT_CONFIG_X86_64 */ |
1884 | | |
1885 | 0 | return SLJIT_SUCCESS; |
1886 | 0 | } |
1887 | | |
1888 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, |
1889 | | sljit_s32 dst, sljit_sw dstw, |
1890 | | sljit_s32 src, sljit_sw srcw) |
1891 | 356M | { |
1892 | | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
1893 | | sljit_s32 dst_is_ereg = 0; |
1894 | | #else /* !SLJIT_CONFIG_X86_32 */ |
1895 | 356M | sljit_s32 op_flags = GET_ALL_FLAGS(op); |
1896 | 356M | #endif /* SLJIT_CONFIG_X86_32 */ |
1897 | | |
1898 | 356M | CHECK_ERROR(); |
1899 | 356M | CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); |
1900 | 356M | ADJUST_LOCAL_OFFSET(dst, dstw); |
1901 | 356M | ADJUST_LOCAL_OFFSET(src, srcw); |
1902 | | |
1903 | 356M | CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1); |
1904 | 356M | CHECK_EXTRA_REGS(src, srcw, (void)0); |
1905 | 356M | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
1906 | 356M | compiler->mode32 = op_flags & SLJIT_32; |
1907 | 356M | #endif /* SLJIT_CONFIG_X86_64 */ |
1908 | | |
1909 | 356M | op = GET_OPCODE(op); |
1910 | | |
1911 | 356M | if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) { |
1912 | 356M | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
1913 | 356M | compiler->mode32 = 0; |
1914 | 356M | #endif /* SLJIT_CONFIG_X86_64 */ |
1915 | | |
1916 | 356M | if (FAST_IS_REG(src) && src == dst) { |
1917 | 0 | if (!TYPE_CAST_NEEDED(op)) |
1918 | 0 | return SLJIT_SUCCESS; |
1919 | 0 | } |
1920 | | |
1921 | 356M | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
1922 | 356M | if (op_flags & SLJIT_32) { |
1923 | 0 | if (src & SLJIT_MEM) { |
1924 | 0 | if (op == SLJIT_MOV_S32) |
1925 | 0 | op = SLJIT_MOV_U32; |
1926 | 0 | } |
1927 | 0 | else if (src == SLJIT_IMM) { |
1928 | 0 | if (op == SLJIT_MOV_U32) |
1929 | 0 | op = SLJIT_MOV_S32; |
1930 | 0 | } |
1931 | 0 | } |
1932 | 356M | #endif /* SLJIT_CONFIG_X86_64 */ |
1933 | | |
1934 | 356M | if (src == SLJIT_IMM) { |
1935 | 72.4M | switch (op) { |
1936 | 0 | case SLJIT_MOV_U8: |
1937 | 0 | srcw = (sljit_u8)srcw; |
1938 | 0 | break; |
1939 | 0 | case SLJIT_MOV_S8: |
1940 | 0 | srcw = (sljit_s8)srcw; |
1941 | 0 | break; |
1942 | 0 | case SLJIT_MOV_U16: |
1943 | 0 | srcw = (sljit_u16)srcw; |
1944 | 0 | break; |
1945 | 0 | case SLJIT_MOV_S16: |
1946 | 0 | srcw = (sljit_s16)srcw; |
1947 | 0 | break; |
1948 | 0 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
1949 | 15.7M | case SLJIT_MOV_U32: |
1950 | 15.7M | srcw = (sljit_u32)srcw; |
1951 | 15.7M | break; |
1952 | 0 | case SLJIT_MOV_S32: |
1953 | 0 | srcw = (sljit_s32)srcw; |
1954 | 0 | break; |
1955 | 72.4M | #endif /* SLJIT_CONFIG_X86_64 */ |
1956 | 72.4M | } |
1957 | | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
1958 | | if (SLJIT_UNLIKELY(dst_is_ereg)) |
1959 | | return emit_mov(compiler, dst, dstw, src, srcw); |
1960 | | #endif /* SLJIT_CONFIG_X86_32 */ |
1961 | 72.4M | } |
1962 | | |
1963 | | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
1964 | | if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_U32 || op == SLJIT_MOV_S32 || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) { |
1965 | | SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP)); |
1966 | | dst = TMP_REG1; |
1967 | | } |
1968 | | #endif /* SLJIT_CONFIG_X86_32 */ |
1969 | | |
1970 | 356M | switch (op) { |
1971 | 214M | case SLJIT_MOV: |
1972 | 214M | case SLJIT_MOV_P: |
1973 | | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
1974 | | case SLJIT_MOV_U32: |
1975 | | case SLJIT_MOV_S32: |
1976 | | case SLJIT_MOV32: |
1977 | | #endif /* SLJIT_CONFIG_X86_32 */ |
1978 | 214M | EMIT_MOV(compiler, dst, dstw, src, srcw); |
1979 | 214M | break; |
1980 | 2.19M | case SLJIT_MOV_U8: |
1981 | 2.19M | FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw)); |
1982 | 2.19M | break; |
1983 | 2.19M | case SLJIT_MOV_S8: |
1984 | 0 | FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw)); |
1985 | 0 | break; |
1986 | 1.13M | case SLJIT_MOV_U16: |
1987 | 1.13M | FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw)); |
1988 | 1.13M | break; |
1989 | 1.13M | case SLJIT_MOV_S16: |
1990 | 0 | FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw)); |
1991 | 0 | break; |
1992 | 0 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
1993 | 137M | case SLJIT_MOV_U32: |
1994 | 137M | FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw)); |
1995 | 137M | break; |
1996 | 137M | case SLJIT_MOV_S32: |
1997 | 152k | FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw)); |
1998 | 152k | break; |
1999 | 152k | case SLJIT_MOV32: |
2000 | 0 | compiler->mode32 = 1; |
2001 | 0 | EMIT_MOV(compiler, dst, dstw, src, srcw); |
2002 | 0 | compiler->mode32 = 0; |
2003 | 0 | break; |
2004 | 356M | #endif /* SLJIT_CONFIG_X86_64 */ |
2005 | 356M | } |
2006 | | |
2007 | | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
2008 | | if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1) |
2009 | | return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0); |
2010 | | #endif /* SLJIT_CONFIG_X86_32 */ |
2011 | 356M | return SLJIT_SUCCESS; |
2012 | 356M | } |
2013 | | |
2014 | 0 | switch (op) { |
2015 | 0 | case SLJIT_CLZ: |
2016 | 0 | case SLJIT_CTZ: |
2017 | 0 | return emit_clz_ctz(compiler, (op == SLJIT_CLZ), dst, dstw, src, srcw); |
2018 | 0 | case SLJIT_REV: |
2019 | 0 | case SLJIT_REV_U16: |
2020 | 0 | case SLJIT_REV_S16: |
2021 | 0 | case SLJIT_REV_U32: |
2022 | 0 | case SLJIT_REV_S32: |
2023 | | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
2024 | | if (dst_is_ereg) |
2025 | | op |= SLJIT_32; |
2026 | | #endif /* SLJIT_CONFIG_X86_32 */ |
2027 | 0 | return emit_bswap(compiler, op, dst, dstw, src, srcw); |
2028 | 0 | } |
2029 | | |
2030 | 0 | return SLJIT_SUCCESS; |
2031 | 0 | } |
2032 | | |
2033 | | static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler, |
2034 | | sljit_u32 op_types, |
2035 | | sljit_s32 dst, sljit_sw dstw, |
2036 | | sljit_s32 src1, sljit_sw src1w, |
2037 | | sljit_s32 src2, sljit_sw src2w) |
2038 | 50.8M | { |
2039 | 50.8M | sljit_u8* inst; |
2040 | 50.8M | sljit_u8 op_eax_imm = U8(op_types >> 24); |
2041 | 50.8M | sljit_u8 op_rm = U8((op_types >> 16) & 0xff); |
2042 | 50.8M | sljit_u8 op_mr = U8((op_types >> 8) & 0xff); |
2043 | 50.8M | sljit_u8 op_imm = U8(op_types & 0xff); |
2044 | | |
2045 | 50.8M | if (dst == src1 && dstw == src1w) { |
2046 | 49.5M | if (src2 == SLJIT_IMM) { |
2047 | 46.7M | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
2048 | 46.7M | if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { |
2049 | | #else |
2050 | | if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) { |
2051 | | #endif |
2052 | 147k | BINARY_EAX_IMM(op_eax_imm, src2w); |
2053 | 147k | } |
2054 | 46.5M | else { |
2055 | 46.5M | BINARY_IMM(op_imm, op_mr, src2w, dst, dstw); |
2056 | 46.5M | } |
2057 | 46.7M | } |
2058 | 2.79M | else if (FAST_IS_REG(dst)) { |
2059 | 2.79M | inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w); |
2060 | 2.79M | FAIL_IF(!inst); |
2061 | 2.79M | *inst = op_rm; |
2062 | 2.79M | } |
2063 | 3.49k | else if (FAST_IS_REG(src2)) { |
2064 | | /* Special exception for sljit_emit_op_flags. */ |
2065 | 3.49k | inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw); |
2066 | 3.49k | FAIL_IF(!inst); |
2067 | 3.49k | *inst = op_mr; |
2068 | 3.49k | } |
2069 | 0 | else { |
2070 | 0 | EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w); |
2071 | 0 | inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw); |
2072 | 0 | FAIL_IF(!inst); |
2073 | 0 | *inst = op_mr; |
2074 | 0 | } |
2075 | 49.5M | return SLJIT_SUCCESS; |
2076 | 49.5M | } |
2077 | | |
2078 | | /* Only for cumulative operations. */ |
2079 | 1.28M | if (dst == src2 && dstw == src2w) { |
2080 | 0 | if (src1 == SLJIT_IMM) { |
2081 | 0 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
2082 | 0 | if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) { |
2083 | | #else |
2084 | | if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128)) { |
2085 | | #endif |
2086 | 0 | BINARY_EAX_IMM(op_eax_imm, src1w); |
2087 | 0 | } |
2088 | 0 | else { |
2089 | 0 | BINARY_IMM(op_imm, op_mr, src1w, dst, dstw); |
2090 | 0 | } |
2091 | 0 | } |
2092 | 0 | else if (FAST_IS_REG(dst)) { |
2093 | 0 | inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w); |
2094 | 0 | FAIL_IF(!inst); |
2095 | 0 | *inst = op_rm; |
2096 | 0 | } |
2097 | 0 | else if (FAST_IS_REG(src1)) { |
2098 | 0 | inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw); |
2099 | 0 | FAIL_IF(!inst); |
2100 | 0 | *inst = op_mr; |
2101 | 0 | } |
2102 | 0 | else { |
2103 | 0 | EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); |
2104 | 0 | inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw); |
2105 | 0 | FAIL_IF(!inst); |
2106 | 0 | *inst = op_mr; |
2107 | 0 | } |
2108 | 0 | return SLJIT_SUCCESS; |
2109 | 0 | } |
2110 | | |
2111 | | /* General version. */ |
2112 | 1.28M | if (FAST_IS_REG(dst)) { |
2113 | 1.28M | EMIT_MOV(compiler, dst, 0, src1, src1w); |
2114 | 1.28M | if (src2 == SLJIT_IMM) { |
2115 | 1.28M | BINARY_IMM(op_imm, op_mr, src2w, dst, 0); |
2116 | 1.28M | } |
2117 | 0 | else { |
2118 | 0 | inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w); |
2119 | 0 | FAIL_IF(!inst); |
2120 | 0 | *inst = op_rm; |
2121 | 0 | } |
2122 | 1.28M | } |
2123 | 0 | else { |
2124 | | /* This version requires less memory writing. */ |
2125 | 0 | EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); |
2126 | 0 | if (src2 == SLJIT_IMM) { |
2127 | 0 | BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0); |
2128 | 0 | } |
2129 | 0 | else { |
2130 | 0 | inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); |
2131 | 0 | FAIL_IF(!inst); |
2132 | 0 | *inst = op_rm; |
2133 | 0 | } |
2134 | 0 | EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); |
2135 | 0 | } |
2136 | | |
2137 | 1.28M | return SLJIT_SUCCESS; |
2138 | 1.28M | } |
2139 | | |
2140 | | static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler, |
2141 | | sljit_u32 op_types, |
2142 | | sljit_s32 dst, sljit_sw dstw, |
2143 | | sljit_s32 src1, sljit_sw src1w, |
2144 | | sljit_s32 src2, sljit_sw src2w) |
2145 | 30.1M | { |
2146 | 30.1M | sljit_u8* inst; |
2147 | 30.1M | sljit_u8 op_eax_imm = U8(op_types >> 24); |
2148 | 30.1M | sljit_u8 op_rm = U8((op_types >> 16) & 0xff); |
2149 | 30.1M | sljit_u8 op_mr = U8((op_types >> 8) & 0xff); |
2150 | 30.1M | sljit_u8 op_imm = U8(op_types & 0xff); |
2151 | | |
2152 | 30.1M | if (dst == src1 && dstw == src1w) { |
2153 | 27.0M | if (src2 == SLJIT_IMM) { |
2154 | 26.8M | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
2155 | 26.8M | if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { |
2156 | | #else |
2157 | | if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) { |
2158 | | #endif |
2159 | 2.18M | BINARY_EAX_IMM(op_eax_imm, src2w); |
2160 | 2.18M | } |
2161 | 24.6M | else { |
2162 | 24.6M | BINARY_IMM(op_imm, op_mr, src2w, dst, dstw); |
2163 | 24.6M | } |
2164 | 26.8M | } |
2165 | 236k | else if (FAST_IS_REG(dst)) { |
2166 | 236k | inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w); |
2167 | 236k | FAIL_IF(!inst); |
2168 | 236k | *inst = op_rm; |
2169 | 236k | } |
2170 | 0 | else if (FAST_IS_REG(src2)) { |
2171 | 0 | inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw); |
2172 | 0 | FAIL_IF(!inst); |
2173 | 0 | *inst = op_mr; |
2174 | 0 | } |
2175 | 0 | else { |
2176 | 0 | EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w); |
2177 | 0 | inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw); |
2178 | 0 | FAIL_IF(!inst); |
2179 | 0 | *inst = op_mr; |
2180 | 0 | } |
2181 | 27.0M | return SLJIT_SUCCESS; |
2182 | 27.0M | } |
2183 | | |
2184 | | /* General version. */ |
2185 | 3.07M | if (FAST_IS_REG(dst) && dst != src2) { |
2186 | 3.07M | EMIT_MOV(compiler, dst, 0, src1, src1w); |
2187 | 3.07M | if (src2 == SLJIT_IMM) { |
2188 | 2.68M | BINARY_IMM(op_imm, op_mr, src2w, dst, 0); |
2189 | 2.68M | } |
2190 | 390k | else { |
2191 | 390k | inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w); |
2192 | 390k | FAIL_IF(!inst); |
2193 | 390k | *inst = op_rm; |
2194 | 390k | } |
2195 | 3.07M | } |
2196 | 0 | else { |
2197 | | /* This version requires less memory writing. */ |
2198 | 0 | EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); |
2199 | 0 | if (src2 == SLJIT_IMM) { |
2200 | 0 | BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0); |
2201 | 0 | } |
2202 | 0 | else { |
2203 | 0 | inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); |
2204 | 0 | FAIL_IF(!inst); |
2205 | 0 | *inst = op_rm; |
2206 | 0 | } |
2207 | 0 | EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); |
2208 | 0 | } |
2209 | | |
2210 | 3.07M | return SLJIT_SUCCESS; |
2211 | 3.07M | } |
2212 | | |
2213 | | static sljit_s32 emit_mul(struct sljit_compiler *compiler, |
2214 | | sljit_s32 dst, sljit_sw dstw, |
2215 | | sljit_s32 src1, sljit_sw src1w, |
2216 | | sljit_s32 src2, sljit_sw src2w) |
2217 | 0 | { |
2218 | 0 | sljit_u8* inst; |
2219 | 0 | sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; |
2220 | | |
2221 | | /* Register destination. */ |
2222 | 0 | if (dst_r == src1 && src2 != SLJIT_IMM) { |
2223 | 0 | FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, src2, src2w)); |
2224 | 0 | } else if (dst_r == src2 && src1 != SLJIT_IMM) { |
2225 | 0 | FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, src1, src1w)); |
2226 | 0 | } else if (src1 == SLJIT_IMM) { |
2227 | 0 | if (src2 == SLJIT_IMM) { |
2228 | 0 | EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w); |
2229 | 0 | src2 = dst_r; |
2230 | 0 | src2w = 0; |
2231 | 0 | } |
2232 | | |
2233 | 0 | if (src1w <= 127 && src1w >= -128) { |
2234 | 0 | inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); |
2235 | 0 | FAIL_IF(!inst); |
2236 | 0 | *inst = IMUL_r_rm_i8; |
2237 | |
|
2238 | 0 | FAIL_IF(emit_byte(compiler, U8(src1w))); |
2239 | 0 | } |
2240 | | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
2241 | | else { |
2242 | | inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); |
2243 | | FAIL_IF(!inst); |
2244 | | *inst = IMUL_r_rm_i32; |
2245 | | inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); |
2246 | | FAIL_IF(!inst); |
2247 | | INC_SIZE(4); |
2248 | | sljit_unaligned_store_sw(inst, src1w); |
2249 | | } |
2250 | | #else |
2251 | 0 | else if (IS_HALFWORD(src1w)) { |
2252 | 0 | inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); |
2253 | 0 | FAIL_IF(!inst); |
2254 | 0 | *inst = IMUL_r_rm_i32; |
2255 | 0 | inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); |
2256 | 0 | FAIL_IF(!inst); |
2257 | 0 | INC_SIZE(4); |
2258 | 0 | sljit_unaligned_store_s32(inst, (sljit_s32)src1w); |
2259 | 0 | } |
2260 | 0 | else { |
2261 | 0 | if (dst_r != src2) |
2262 | 0 | EMIT_MOV(compiler, dst_r, 0, src2, src2w); |
2263 | 0 | FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w)); |
2264 | 0 | FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, TMP_REG2, 0)); |
2265 | 0 | } |
2266 | 0 | #endif |
2267 | 0 | } |
2268 | 0 | else if (src2 == SLJIT_IMM) { |
2269 | | /* Note: src1 is NOT immediate. */ |
2270 | |
|
2271 | 0 | if (src2w <= 127 && src2w >= -128) { |
2272 | 0 | inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); |
2273 | 0 | FAIL_IF(!inst); |
2274 | 0 | *inst = IMUL_r_rm_i8; |
2275 | |
|
2276 | 0 | FAIL_IF(emit_byte(compiler, U8(src2w))); |
2277 | 0 | } |
2278 | | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
2279 | | else { |
2280 | | inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); |
2281 | | FAIL_IF(!inst); |
2282 | | *inst = IMUL_r_rm_i32; |
2283 | | |
2284 | | inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); |
2285 | | FAIL_IF(!inst); |
2286 | | INC_SIZE(4); |
2287 | | sljit_unaligned_store_sw(inst, src2w); |
2288 | | } |
2289 | | #else |
2290 | 0 | else if (IS_HALFWORD(src2w)) { |
2291 | 0 | inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); |
2292 | 0 | FAIL_IF(!inst); |
2293 | 0 | *inst = IMUL_r_rm_i32; |
2294 | |
|
2295 | 0 | inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); |
2296 | 0 | FAIL_IF(!inst); |
2297 | 0 | INC_SIZE(4); |
2298 | 0 | sljit_unaligned_store_s32(inst, (sljit_s32)src2w); |
2299 | 0 | } else { |
2300 | 0 | if (dst_r != src1) |
2301 | 0 | EMIT_MOV(compiler, dst_r, 0, src1, src1w); |
2302 | 0 | FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w)); |
2303 | 0 | FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, TMP_REG2, 0)); |
2304 | 0 | } |
2305 | 0 | #endif |
2306 | 0 | } else { |
2307 | | /* Neither argument is immediate. */ |
2308 | 0 | if (ADDRESSING_DEPENDS_ON(src2, dst_r)) |
2309 | 0 | dst_r = TMP_REG1; |
2310 | 0 | EMIT_MOV(compiler, dst_r, 0, src1, src1w); |
2311 | 0 | FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, src2, src2w)); |
2312 | 0 | } |
2313 | | |
2314 | 0 | if (dst & SLJIT_MEM) |
2315 | 0 | EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); |
2316 | |
|
2317 | 0 | return SLJIT_SUCCESS; |
2318 | 0 | } |
2319 | | |
2320 | | static sljit_s32 emit_lea_binary(struct sljit_compiler *compiler, |
2321 | | sljit_s32 dst, sljit_sw dstw, |
2322 | | sljit_s32 src1, sljit_sw src1w, |
2323 | | sljit_s32 src2, sljit_sw src2w) |
2324 | 79.9M | { |
2325 | 79.9M | sljit_u8* inst; |
2326 | 79.9M | sljit_s32 dst_r, done = 0; |
2327 | | |
2328 | | /* These cases better be left to handled by normal way. */ |
2329 | 79.9M | if (dst == src1 && dstw == src1w) |
2330 | 68.6M | return SLJIT_ERR_UNSUPPORTED; |
2331 | 11.3M | if (dst == src2 && dstw == src2w) |
2332 | 0 | return SLJIT_ERR_UNSUPPORTED; |
2333 | | |
2334 | 11.3M | dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; |
2335 | | |
2336 | 11.3M | if (FAST_IS_REG(src1)) { |
2337 | 8.61M | if (FAST_IS_REG(src2)) { |
2338 | 0 | inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0); |
2339 | 0 | FAIL_IF(!inst); |
2340 | 0 | *inst = LEA_r_m; |
2341 | 0 | done = 1; |
2342 | 0 | } |
2343 | 8.61M | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
2344 | 8.61M | if (src2 == SLJIT_IMM && (compiler->mode32 || IS_HALFWORD(src2w))) { |
2345 | 8.61M | inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_s32)src2w); |
2346 | | #else |
2347 | | if (src2 == SLJIT_IMM) { |
2348 | | inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w); |
2349 | | #endif |
2350 | 8.61M | FAIL_IF(!inst); |
2351 | 8.61M | *inst = LEA_r_m; |
2352 | 8.61M | done = 1; |
2353 | 8.61M | } |
2354 | 8.61M | } |
2355 | 2.70M | else if (FAST_IS_REG(src2)) { |
2356 | 0 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
2357 | 0 | if (src1 == SLJIT_IMM && (compiler->mode32 || IS_HALFWORD(src1w))) { |
2358 | 0 | inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_s32)src1w); |
2359 | | #else |
2360 | | if (src1 == SLJIT_IMM) { |
2361 | | inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w); |
2362 | | #endif |
2363 | 0 | FAIL_IF(!inst); |
2364 | 0 | *inst = LEA_r_m; |
2365 | 0 | done = 1; |
2366 | 0 | } |
2367 | 0 | } |
2368 | | |
2369 | 11.3M | if (done) { |
2370 | 8.61M | if (dst_r == TMP_REG1) |
2371 | 123k | return emit_mov(compiler, dst, dstw, TMP_REG1, 0); |
2372 | 8.49M | return SLJIT_SUCCESS; |
2373 | 8.61M | } |
2374 | 2.70M | return SLJIT_ERR_UNSUPPORTED; |
2375 | 11.3M | } |
2376 | | |
2377 | | static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler, |
2378 | | sljit_s32 src1, sljit_sw src1w, |
2379 | | sljit_s32 src2, sljit_sw src2w) |
2380 | 365M | { |
2381 | 365M | sljit_u8* inst; |
2382 | | |
2383 | 365M | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
2384 | 365M | if (src1 == SLJIT_R0 && src2 == SLJIT_IMM && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { |
2385 | | #else |
2386 | | if (src1 == SLJIT_R0 && src2 == SLJIT_IMM && (src2w > 127 || src2w < -128)) { |
2387 | | #endif |
2388 | 158M | BINARY_EAX_IMM(CMP_EAX_i32, src2w); |
2389 | 158M | return SLJIT_SUCCESS; |
2390 | 158M | } |
2391 | | |
2392 | 206M | if (FAST_IS_REG(src1)) { |
2393 | 205M | if (src2 == SLJIT_IMM) { |
2394 | 157M | BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0); |
2395 | 157M | } |
2396 | 48.0M | else { |
2397 | 48.0M | inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w); |
2398 | 48.0M | FAIL_IF(!inst); |
2399 | 48.0M | *inst = CMP_r_rm; |
2400 | 48.0M | } |
2401 | 205M | return SLJIT_SUCCESS; |
2402 | 205M | } |
2403 | | |
2404 | 768k | if (FAST_IS_REG(src2) && src1 != SLJIT_IMM) { |
2405 | 642k | inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w); |
2406 | 642k | FAIL_IF(!inst); |
2407 | 642k | *inst = CMP_rm_r; |
2408 | 642k | return SLJIT_SUCCESS; |
2409 | 642k | } |
2410 | | |
2411 | 125k | if (src2 == SLJIT_IMM) { |
2412 | 123k | if (src1 == SLJIT_IMM) { |
2413 | 0 | EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); |
2414 | 0 | src1 = TMP_REG1; |
2415 | 0 | src1w = 0; |
2416 | 0 | } |
2417 | 123k | BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w); |
2418 | 123k | } |
2419 | 2.44k | else { |
2420 | 2.44k | EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); |
2421 | 2.44k | inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); |
2422 | 2.44k | FAIL_IF(!inst); |
2423 | 2.44k | *inst = CMP_r_rm; |
2424 | 2.44k | } |
2425 | 125k | return SLJIT_SUCCESS; |
2426 | 125k | } |
2427 | | |
2428 | | static sljit_s32 emit_test_binary(struct sljit_compiler *compiler, |
2429 | | sljit_s32 src1, sljit_sw src1w, |
2430 | | sljit_s32 src2, sljit_sw src2w) |
2431 | 2.98M | { |
2432 | 2.98M | sljit_u8* inst; |
2433 | | |
2434 | 2.98M | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
2435 | 2.98M | if (src1 == SLJIT_R0 && src2 == SLJIT_IMM && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { |
2436 | | #else |
2437 | | if (src1 == SLJIT_R0 && src2 == SLJIT_IMM && (src2w > 127 || src2w < -128)) { |
2438 | | #endif |
2439 | 213k | BINARY_EAX_IMM(TEST_EAX_i32, src2w); |
2440 | 213k | return SLJIT_SUCCESS; |
2441 | 213k | } |
2442 | | |
2443 | 2.76M | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
2444 | 2.76M | if (src2 == SLJIT_R0 && src1 == SLJIT_IMM && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) { |
2445 | | #else |
2446 | | if (src2 == SLJIT_R0 && src1 == SLJIT_IMM && (src1w > 127 || src1w < -128)) { |
2447 | | #endif |
2448 | 0 | BINARY_EAX_IMM(TEST_EAX_i32, src1w); |
2449 | 0 | return SLJIT_SUCCESS; |
2450 | 0 | } |
2451 | | |
2452 | 2.76M | if (src1 != SLJIT_IMM) { |
2453 | 2.76M | if (src2 == SLJIT_IMM) { |
2454 | 2.13M | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
2455 | 2.13M | if (IS_HALFWORD(src2w) || compiler->mode32) { |
2456 | 2.13M | inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w); |
2457 | 2.13M | FAIL_IF(!inst); |
2458 | 2.13M | *inst = GROUP_F7; |
2459 | 2.13M | } else { |
2460 | 0 | FAIL_IF(emit_load_imm64(compiler, FAST_IS_REG(src1) ? TMP_REG2 : TMP_REG1, src2w)); |
2461 | 0 | inst = emit_x86_instruction(compiler, 1, FAST_IS_REG(src1) ? TMP_REG2 : TMP_REG1, 0, src1, src1w); |
2462 | 0 | FAIL_IF(!inst); |
2463 | 0 | *inst = TEST_rm_r; |
2464 | 0 | } |
2465 | | #else |
2466 | | inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w); |
2467 | | FAIL_IF(!inst); |
2468 | | *inst = GROUP_F7; |
2469 | | #endif |
2470 | 2.13M | return SLJIT_SUCCESS; |
2471 | 2.13M | } |
2472 | 635k | else if (FAST_IS_REG(src1)) { |
2473 | 635k | inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w); |
2474 | 635k | FAIL_IF(!inst); |
2475 | 635k | *inst = TEST_rm_r; |
2476 | 635k | return SLJIT_SUCCESS; |
2477 | 635k | } |
2478 | 2.76M | } |
2479 | | |
2480 | 0 | if (src2 != SLJIT_IMM) { |
2481 | 0 | if (src1 == SLJIT_IMM) { |
2482 | 0 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
2483 | 0 | if (IS_HALFWORD(src1w) || compiler->mode32) { |
2484 | 0 | inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, src2w); |
2485 | 0 | FAIL_IF(!inst); |
2486 | 0 | *inst = GROUP_F7; |
2487 | 0 | } |
2488 | 0 | else { |
2489 | 0 | FAIL_IF(emit_load_imm64(compiler, TMP_REG1, src1w)); |
2490 | 0 | inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); |
2491 | 0 | FAIL_IF(!inst); |
2492 | 0 | *inst = TEST_rm_r; |
2493 | 0 | } |
2494 | | #else |
2495 | | inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, src2w); |
2496 | | FAIL_IF(!inst); |
2497 | | *inst = GROUP_F7; |
2498 | | #endif |
2499 | 0 | return SLJIT_SUCCESS; |
2500 | 0 | } |
2501 | 0 | else if (FAST_IS_REG(src2)) { |
2502 | 0 | inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w); |
2503 | 0 | FAIL_IF(!inst); |
2504 | 0 | *inst = TEST_rm_r; |
2505 | 0 | return SLJIT_SUCCESS; |
2506 | 0 | } |
2507 | 0 | } |
2508 | | |
2509 | 0 | EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); |
2510 | 0 | if (src2 == SLJIT_IMM) { |
2511 | 0 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
2512 | 0 | if (IS_HALFWORD(src2w) || compiler->mode32) { |
2513 | 0 | inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0); |
2514 | 0 | FAIL_IF(!inst); |
2515 | 0 | *inst = GROUP_F7; |
2516 | 0 | } |
2517 | 0 | else { |
2518 | 0 | FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w)); |
2519 | 0 | inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REG1, 0); |
2520 | 0 | FAIL_IF(!inst); |
2521 | 0 | *inst = TEST_rm_r; |
2522 | 0 | } |
2523 | | #else |
2524 | | inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0); |
2525 | | FAIL_IF(!inst); |
2526 | | *inst = GROUP_F7; |
2527 | | #endif |
2528 | 0 | } |
2529 | 0 | else { |
2530 | 0 | inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); |
2531 | 0 | FAIL_IF(!inst); |
2532 | 0 | *inst = TEST_rm_r; |
2533 | 0 | } |
2534 | 0 | return SLJIT_SUCCESS; |
2535 | 0 | } |
2536 | | |
2537 | | static sljit_s32 emit_shift(struct sljit_compiler *compiler, |
2538 | | sljit_u8 mode, |
2539 | | sljit_s32 dst, sljit_sw dstw, |
2540 | | sljit_s32 src1, sljit_sw src1w, |
2541 | | sljit_s32 src2, sljit_sw src2w) |
2542 | 6.05M | { |
2543 | 6.05M | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
2544 | 6.05M | sljit_s32 mode32; |
2545 | 6.05M | #endif |
2546 | 6.05M | sljit_u8* inst; |
2547 | | |
2548 | 6.05M | if (src2 == SLJIT_IMM || src2 == SLJIT_PREF_SHIFT_REG) { |
2549 | 6.05M | if (dst == src1 && dstw == src1w) { |
2550 | 3.56M | inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw); |
2551 | 3.56M | FAIL_IF(!inst); |
2552 | 3.56M | inst[1] |= mode; |
2553 | 3.56M | return SLJIT_SUCCESS; |
2554 | 3.56M | } |
2555 | 2.48M | if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) { |
2556 | 631k | EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); |
2557 | 631k | inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); |
2558 | 631k | FAIL_IF(!inst); |
2559 | 631k | inst[1] |= mode; |
2560 | 631k | EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); |
2561 | 631k | return SLJIT_SUCCESS; |
2562 | 631k | } |
2563 | 1.85M | if (FAST_IS_REG(dst)) { |
2564 | 1.85M | EMIT_MOV(compiler, dst, 0, src1, src1w); |
2565 | 1.85M | inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0); |
2566 | 1.85M | FAIL_IF(!inst); |
2567 | 1.85M | inst[1] |= mode; |
2568 | 1.85M | return SLJIT_SUCCESS; |
2569 | 1.85M | } |
2570 | | |
2571 | 0 | EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); |
2572 | 0 | inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0); |
2573 | 0 | FAIL_IF(!inst); |
2574 | 0 | inst[1] |= mode; |
2575 | 0 | EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); |
2576 | 0 | return SLJIT_SUCCESS; |
2577 | 0 | } |
2578 | | |
2579 | 1.75k | if (dst == SLJIT_PREF_SHIFT_REG) { |
2580 | 1.75k | EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); |
2581 | 1.75k | EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); |
2582 | 1.75k | inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); |
2583 | 1.75k | FAIL_IF(!inst); |
2584 | 1.75k | inst[1] |= mode; |
2585 | 1.75k | return emit_mov(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); |
2586 | 1.75k | } |
2587 | | |
2588 | 0 | if (FAST_IS_REG(dst) && dst != src2 && dst != TMP_REG1 && !ADDRESSING_DEPENDS_ON(src2, dst)) { |
2589 | 0 | if (src1 != dst) |
2590 | 0 | EMIT_MOV(compiler, dst, 0, src1, src1w); |
2591 | 0 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
2592 | 0 | mode32 = compiler->mode32; |
2593 | 0 | compiler->mode32 = 0; |
2594 | 0 | #endif |
2595 | 0 | EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0); |
2596 | 0 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
2597 | 0 | compiler->mode32 = mode32; |
2598 | 0 | #endif |
2599 | 0 | EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); |
2600 | 0 | inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0); |
2601 | 0 | FAIL_IF(!inst); |
2602 | 0 | inst[1] |= mode; |
2603 | 0 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
2604 | 0 | compiler->mode32 = 0; |
2605 | 0 | #endif |
2606 | 0 | EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); |
2607 | 0 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
2608 | 0 | compiler->mode32 = mode32; |
2609 | 0 | #endif |
2610 | 0 | return SLJIT_SUCCESS; |
2611 | 0 | } |
2612 | | |
2613 | | /* This case is complex since ecx itself may be used for |
2614 | | addressing, and this case must be supported as well. */ |
2615 | 0 | EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); |
2616 | | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
2617 | | EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_PREF_SHIFT_REG, 0); |
2618 | | #else /* !SLJIT_CONFIG_X86_32 */ |
2619 | 0 | mode32 = compiler->mode32; |
2620 | 0 | compiler->mode32 = 0; |
2621 | 0 | EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0); |
2622 | 0 | compiler->mode32 = mode32; |
2623 | 0 | #endif /* SLJIT_CONFIG_X86_32 */ |
2624 | |
|
2625 | 0 | EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); |
2626 | 0 | inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); |
2627 | 0 | FAIL_IF(!inst); |
2628 | 0 | inst[1] |= mode; |
2629 | |
|
2630 | | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
2631 | | EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), 0); |
2632 | | #else |
2633 | 0 | compiler->mode32 = 0; |
2634 | 0 | EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0); |
2635 | 0 | compiler->mode32 = mode32; |
2636 | 0 | #endif /* SLJIT_CONFIG_X86_32 */ |
2637 | |
|
2638 | 0 | if (dst != TMP_REG1) |
2639 | 0 | return emit_mov(compiler, dst, dstw, TMP_REG1, 0); |
2640 | | |
2641 | 0 | return SLJIT_SUCCESS; |
2642 | 0 | } |
2643 | | |
2644 | | static sljit_s32 emit_shift_with_flags(struct sljit_compiler *compiler, |
2645 | | sljit_u8 mode, sljit_s32 set_flags, |
2646 | | sljit_s32 dst, sljit_sw dstw, |
2647 | | sljit_s32 src1, sljit_sw src1w, |
2648 | | sljit_s32 src2, sljit_sw src2w) |
2649 | 6.05M | { |
2650 | | /* The CPU does not set flags if the shift count is 0. */ |
2651 | 6.05M | if (src2 == SLJIT_IMM) { |
2652 | 4.79M | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
2653 | 4.79M | src2w &= compiler->mode32 ? 0x1f : 0x3f; |
2654 | | #else /* !SLJIT_CONFIG_X86_64 */ |
2655 | | src2w &= 0x1f; |
2656 | | #endif /* SLJIT_CONFIG_X86_64 */ |
2657 | 4.79M | if (src2w != 0) |
2658 | 4.79M | return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w); |
2659 | | |
2660 | 0 | if (!set_flags) |
2661 | 0 | return emit_mov(compiler, dst, dstw, src1, src1w); |
2662 | | /* OR dst, src, 0 */ |
2663 | 0 | return emit_cum_binary(compiler, BINARY_OPCODE(OR), |
2664 | 0 | dst, dstw, src1, src1w, SLJIT_IMM, 0); |
2665 | 0 | } |
2666 | | |
2667 | 1.26M | if (!set_flags) |
2668 | 1.26M | return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w); |
2669 | | |
2670 | 0 | if (!FAST_IS_REG(dst)) |
2671 | 0 | FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0)); |
2672 | | |
2673 | 0 | FAIL_IF(emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w)); |
2674 | | |
2675 | 0 | if (FAST_IS_REG(dst)) |
2676 | 0 | return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0); |
2677 | 0 | return SLJIT_SUCCESS; |
2678 | 0 | } |
2679 | | |
2680 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, |
2681 | | sljit_s32 dst, sljit_sw dstw, |
2682 | | sljit_s32 src1, sljit_sw src1w, |
2683 | | sljit_s32 src2, sljit_sw src2w) |
2684 | 87.5M | { |
2685 | 87.5M | CHECK_ERROR(); |
2686 | 87.5M | CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w)); |
2687 | 87.5M | ADJUST_LOCAL_OFFSET(dst, dstw); |
2688 | 87.5M | ADJUST_LOCAL_OFFSET(src1, src1w); |
2689 | 87.5M | ADJUST_LOCAL_OFFSET(src2, src2w); |
2690 | | |
2691 | 87.5M | CHECK_EXTRA_REGS(dst, dstw, (void)0); |
2692 | 87.5M | CHECK_EXTRA_REGS(src1, src1w, (void)0); |
2693 | 87.5M | CHECK_EXTRA_REGS(src2, src2w, (void)0); |
2694 | 87.5M | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
2695 | 87.5M | compiler->mode32 = op & SLJIT_32; |
2696 | 87.5M | #endif |
2697 | | |
2698 | 87.5M | switch (GET_OPCODE(op)) { |
2699 | 46.9M | case SLJIT_ADD: |
2700 | 46.9M | if (!HAS_FLAGS(op)) { |
2701 | 46.9M | if (emit_lea_binary(compiler, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED) |
2702 | 378k | return compiler->error; |
2703 | 46.9M | } |
2704 | 46.6M | return emit_cum_binary(compiler, BINARY_OPCODE(ADD), |
2705 | 46.6M | dst, dstw, src1, src1w, src2, src2w); |
2706 | 0 | case SLJIT_ADDC: |
2707 | 0 | return emit_cum_binary(compiler, BINARY_OPCODE(ADC), |
2708 | 0 | dst, dstw, src1, src1w, src2, src2w); |
2709 | 30.3M | case SLJIT_SUB: |
2710 | 30.3M | if (src1 == SLJIT_IMM && src1w == 0) |
2711 | 0 | return emit_unary(compiler, NEG_rm, dst, dstw, src2, src2w); |
2712 | | |
2713 | 30.3M | if (!HAS_FLAGS(op)) { |
2714 | 25.2M | if (src2 == SLJIT_IMM && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED) |
2715 | 177k | return compiler->error; |
2716 | 25.0M | if (FAST_IS_REG(dst) && src2 == dst) { |
2717 | 28.9k | FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), dst, 0, dst, 0, src1, src1w)); |
2718 | 28.9k | return emit_unary(compiler, NEG_rm, dst, 0, dst, 0); |
2719 | 28.9k | } |
2720 | 25.0M | } |
2721 | | |
2722 | 30.1M | return emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), |
2723 | 30.1M | dst, dstw, src1, src1w, src2, src2w); |
2724 | 0 | case SLJIT_SUBC: |
2725 | 0 | return emit_non_cum_binary(compiler, BINARY_OPCODE(SBB), |
2726 | 0 | dst, dstw, src1, src1w, src2, src2w); |
2727 | 0 | case SLJIT_MUL: |
2728 | 0 | return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w); |
2729 | 1.73M | case SLJIT_AND: |
2730 | 1.73M | return emit_cum_binary(compiler, BINARY_OPCODE(AND), |
2731 | 1.73M | dst, dstw, src1, src1w, src2, src2w); |
2732 | 2.42M | case SLJIT_OR: |
2733 | 2.42M | return emit_cum_binary(compiler, BINARY_OPCODE(OR), |
2734 | 2.42M | dst, dstw, src1, src1w, src2, src2w); |
2735 | 25.0k | case SLJIT_XOR: |
2736 | 25.0k | if (!HAS_FLAGS(op)) { |
2737 | 21.2k | if (src2 == SLJIT_IMM && src2w == -1) |
2738 | 0 | return emit_unary(compiler, NOT_rm, dst, dstw, src1, src1w); |
2739 | 21.2k | if (src1 == SLJIT_IMM && src1w == -1) |
2740 | 0 | return emit_unary(compiler, NOT_rm, dst, dstw, src2, src2w); |
2741 | 21.2k | } |
2742 | | |
2743 | 25.0k | return emit_cum_binary(compiler, BINARY_OPCODE(XOR), |
2744 | 25.0k | dst, dstw, src1, src1w, src2, src2w); |
2745 | 4.47M | case SLJIT_SHL: |
2746 | 4.47M | case SLJIT_MSHL: |
2747 | 4.47M | return emit_shift_with_flags(compiler, SHL, HAS_FLAGS(op), |
2748 | 4.47M | dst, dstw, src1, src1w, src2, src2w); |
2749 | 1.51M | case SLJIT_LSHR: |
2750 | 1.51M | case SLJIT_MLSHR: |
2751 | 1.51M | return emit_shift_with_flags(compiler, SHR, HAS_FLAGS(op), |
2752 | 1.51M | dst, dstw, src1, src1w, src2, src2w); |
2753 | 62.8k | case SLJIT_ASHR: |
2754 | 62.8k | case SLJIT_MASHR: |
2755 | 62.8k | return emit_shift_with_flags(compiler, SAR, HAS_FLAGS(op), |
2756 | 62.8k | dst, dstw, src1, src1w, src2, src2w); |
2757 | 0 | case SLJIT_ROTL: |
2758 | 0 | return emit_shift_with_flags(compiler, ROL, 0, |
2759 | 0 | dst, dstw, src1, src1w, src2, src2w); |
2760 | 0 | case SLJIT_ROTR: |
2761 | 0 | return emit_shift_with_flags(compiler, ROR, 0, |
2762 | 0 | dst, dstw, src1, src1w, src2, src2w); |
2763 | 87.5M | } |
2764 | | |
2765 | 0 | return SLJIT_SUCCESS; |
2766 | 87.5M | } |
2767 | | |
2768 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op, |
2769 | | sljit_s32 src1, sljit_sw src1w, |
2770 | | sljit_s32 src2, sljit_sw src2w) |
2771 | 368M | { |
2772 | 368M | sljit_s32 opcode = GET_OPCODE(op); |
2773 | | |
2774 | 368M | CHECK_ERROR(); |
2775 | 368M | CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); |
2776 | | |
2777 | 368M | if (opcode != SLJIT_SUB && opcode != SLJIT_AND) { |
2778 | 0 | SLJIT_SKIP_CHECKS(compiler); |
2779 | 0 | return sljit_emit_op2(compiler, op, TMP_REG1, 0, src1, src1w, src2, src2w); |
2780 | 0 | } |
2781 | | |
2782 | 368M | ADJUST_LOCAL_OFFSET(src1, src1w); |
2783 | 368M | ADJUST_LOCAL_OFFSET(src2, src2w); |
2784 | | |
2785 | 368M | CHECK_EXTRA_REGS(src1, src1w, (void)0); |
2786 | 368M | CHECK_EXTRA_REGS(src2, src2w, (void)0); |
2787 | 368M | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
2788 | 368M | compiler->mode32 = op & SLJIT_32; |
2789 | 368M | #endif |
2790 | | |
2791 | 368M | if (opcode == SLJIT_SUB) |
2792 | 365M | return emit_cmp_binary(compiler, src1, src1w, src2, src2w); |
2793 | | |
2794 | 2.98M | return emit_test_binary(compiler, src1, src1w, src2, src2w); |
2795 | 368M | } |
2796 | | |
2797 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op, |
2798 | | sljit_s32 dst_reg, |
2799 | | sljit_s32 src1, sljit_sw src1w, |
2800 | | sljit_s32 src2, sljit_sw src2w) |
2801 | 0 | { |
2802 | 0 | sljit_u8* inst; |
2803 | 0 | sljit_sw dstw = 0; |
2804 | 0 |
|
2805 | 0 | CHECK_ERROR(); |
2806 | 0 | CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w)); |
2807 | 0 | ADJUST_LOCAL_OFFSET(src1, src1w); |
2808 | 0 | ADJUST_LOCAL_OFFSET(src2, src2w); |
2809 | 0 |
|
2810 | 0 | CHECK_EXTRA_REGS(dst_reg, dstw, (void)0); |
2811 | 0 | CHECK_EXTRA_REGS(src1, src1w, (void)0); |
2812 | 0 | CHECK_EXTRA_REGS(src2, src2w, (void)0); |
2813 | 0 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
2814 | 0 | compiler->mode32 = op & SLJIT_32; |
2815 | 0 | #endif |
2816 | 0 |
|
2817 | 0 | switch (GET_OPCODE(op)) { |
2818 | 0 | case SLJIT_MULADD: |
2819 | 0 | FAIL_IF(emit_mul(compiler, TMP_REG1, 0, src1, src1w, src2, src2w)); |
2820 | 0 | inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst_reg, dstw); |
2821 | 0 | FAIL_IF(!inst); |
2822 | 0 | *inst = ADD_rm_r; |
2823 | 0 | return SLJIT_SUCCESS; |
2824 | 0 | } |
2825 | 0 |
|
2826 | 0 | return SLJIT_SUCCESS; |
2827 | 0 | } |
2828 | | |
2829 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op, |
2830 | | sljit_s32 dst_reg, |
2831 | | sljit_s32 src1_reg, |
2832 | | sljit_s32 src2_reg, |
2833 | | sljit_s32 src3, sljit_sw src3w) |
2834 | 0 | { |
2835 | 0 | sljit_s32 is_rotate, is_left, move_src1; |
2836 | 0 | sljit_u8* inst; |
2837 | 0 | sljit_sw src1w = 0; |
2838 | 0 | sljit_sw dstw = 0; |
2839 | 0 | /* The whole register must be saved even for 32 bit operations. */ |
2840 | 0 | sljit_u8 restore_ecx = 0; |
2841 | 0 | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
2842 | 0 | sljit_sw src2w = 0; |
2843 | 0 | sljit_s32 restore_sp4 = 0; |
2844 | 0 | #endif /* SLJIT_CONFIG_X86_32 */ |
2845 | 0 |
|
2846 | 0 | CHECK_ERROR(); |
2847 | 0 | CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w)); |
2848 | 0 | ADJUST_LOCAL_OFFSET(src3, src3w); |
2849 | 0 |
|
2850 | 0 | CHECK_EXTRA_REGS(dst_reg, dstw, (void)0); |
2851 | 0 | CHECK_EXTRA_REGS(src3, src3w, (void)0); |
2852 | 0 |
|
2853 | 0 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
2854 | 0 | compiler->mode32 = op & SLJIT_32; |
2855 | 0 | #endif /* SLJIT_CONFIG_X86_64 */ |
2856 | 0 |
|
2857 | 0 | if (src3 == SLJIT_IMM) { |
2858 | 0 | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
2859 | 0 | src3w &= 0x1f; |
2860 | 0 | #else /* !SLJIT_CONFIG_X86_32 */ |
2861 | 0 | src3w &= (op & SLJIT_32) ? 0x1f : 0x3f; |
2862 | 0 | #endif /* SLJIT_CONFIG_X86_32 */ |
2863 | 0 |
|
2864 | 0 | if (src3w == 0) |
2865 | 0 | return SLJIT_SUCCESS; |
2866 | 0 | } |
2867 | 0 |
|
2868 | 0 | is_left = (GET_OPCODE(op) == SLJIT_SHL || GET_OPCODE(op) == SLJIT_MSHL); |
2869 | 0 |
|
2870 | 0 | is_rotate = (src1_reg == src2_reg); |
2871 | 0 | CHECK_EXTRA_REGS(src1_reg, src1w, (void)0); |
2872 | 0 | CHECK_EXTRA_REGS(src2_reg, src2w, (void)0); |
2873 | 0 |
|
2874 | 0 | if (is_rotate) |
2875 | 0 | return emit_shift(compiler, is_left ? ROL : ROR, dst_reg, dstw, src1_reg, src1w, src3, src3w); |
2876 | 0 |
|
2877 | 0 | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
2878 | 0 | if (src2_reg & SLJIT_MEM) { |
2879 | 0 | EMIT_MOV(compiler, TMP_REG1, 0, src2_reg, src2w); |
2880 | 0 | src2_reg = TMP_REG1; |
2881 | 0 | } |
2882 | 0 | #endif /* SLJIT_CONFIG_X86_32 */ |
2883 | 0 |
|
2884 | 0 | if (dst_reg == SLJIT_PREF_SHIFT_REG && src3 != SLJIT_IMM && (src3 != SLJIT_PREF_SHIFT_REG || src1_reg != SLJIT_PREF_SHIFT_REG)) { |
2885 | 0 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
2886 | 0 | EMIT_MOV(compiler, TMP_REG1, 0, src1_reg, src1w); |
2887 | 0 | src1_reg = TMP_REG1; |
2888 | 0 | src1w = 0; |
2889 | 0 | #else /* !SLJIT_CONFIG_X86_64 */ |
2890 | 0 | if (src2_reg != TMP_REG1) { |
2891 | 0 | EMIT_MOV(compiler, TMP_REG1, 0, src1_reg, src1w); |
2892 | 0 | src1_reg = TMP_REG1; |
2893 | 0 | src1w = 0; |
2894 | 0 | } else if ((src1_reg & SLJIT_MEM) || src1_reg == SLJIT_PREF_SHIFT_REG) { |
2895 | 0 | restore_sp4 = (src3 == SLJIT_R0) ? SLJIT_R1 : SLJIT_R0; |
2896 | 0 | EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32), restore_sp4, 0); |
2897 | 0 | EMIT_MOV(compiler, restore_sp4, 0, src1_reg, src1w); |
2898 | 0 | src1_reg = restore_sp4; |
2899 | 0 | src1w = 0; |
2900 | 0 | } else { |
2901 | 0 | EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32), src1_reg, 0); |
2902 | 0 | restore_sp4 = src1_reg; |
2903 | 0 | } |
2904 | 0 | #endif /* SLJIT_CONFIG_X86_64 */ |
2905 | 0 |
|
2906 | 0 | if (src3 != SLJIT_PREF_SHIFT_REG) |
2907 | 0 | EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src3, src3w); |
2908 | 0 | } else { |
2909 | 0 | if (src2_reg == SLJIT_PREF_SHIFT_REG && src3 != SLJIT_IMM && src3 != SLJIT_PREF_SHIFT_REG) { |
2910 | 0 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
2911 | 0 | compiler->mode32 = 0; |
2912 | 0 | #endif /* SLJIT_CONFIG_X86_64 */ |
2913 | 0 | EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0); |
2914 | 0 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
2915 | 0 | compiler->mode32 = op & SLJIT_32; |
2916 | 0 | #endif /* SLJIT_CONFIG_X86_64 */ |
2917 | 0 | src2_reg = TMP_REG1; |
2918 | 0 | restore_ecx = 1; |
2919 | 0 | } |
2920 | 0 |
|
2921 | 0 | move_src1 = 0; |
2922 | 0 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
2923 | 0 | if (dst_reg != src1_reg) { |
2924 | 0 | if (dst_reg != src3) { |
2925 | 0 | EMIT_MOV(compiler, dst_reg, 0, src1_reg, src1w); |
2926 | 0 | src1_reg = dst_reg; |
2927 | 0 | src1w = 0; |
2928 | 0 | } else |
2929 | 0 | move_src1 = 1; |
2930 | 0 | } |
2931 | 0 | #else /* !SLJIT_CONFIG_X86_64 */ |
2932 | 0 | if (dst_reg & SLJIT_MEM) { |
2933 | 0 | if (src2_reg != TMP_REG1) { |
2934 | 0 | EMIT_MOV(compiler, TMP_REG1, 0, src1_reg, src1w); |
2935 | 0 | src1_reg = TMP_REG1; |
2936 | 0 | src1w = 0; |
2937 | 0 | } else if ((src1_reg & SLJIT_MEM) || src1_reg == SLJIT_PREF_SHIFT_REG) { |
2938 | 0 | restore_sp4 = (src3 == SLJIT_R0) ? SLJIT_R1 : SLJIT_R0; |
2939 | 0 | EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32), restore_sp4, 0); |
2940 | 0 | EMIT_MOV(compiler, restore_sp4, 0, src1_reg, src1w); |
2941 | 0 | src1_reg = restore_sp4; |
2942 | 0 | src1w = 0; |
2943 | 0 | } else { |
2944 | 0 | EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32), src1_reg, 0); |
2945 | 0 | restore_sp4 = src1_reg; |
2946 | 0 | } |
2947 | 0 | } else if (dst_reg != src1_reg) { |
2948 | 0 | if (dst_reg != src3) { |
2949 | 0 | EMIT_MOV(compiler, dst_reg, 0, src1_reg, src1w); |
2950 | 0 | src1_reg = dst_reg; |
2951 | 0 | src1w = 0; |
2952 | 0 | } else |
2953 | 0 | move_src1 = 1; |
2954 | 0 | } |
2955 | 0 | #endif /* SLJIT_CONFIG_X86_64 */ |
2956 | 0 |
|
2957 | 0 | if (src3 != SLJIT_IMM && src3 != SLJIT_PREF_SHIFT_REG) { |
2958 | 0 | if (!restore_ecx) { |
2959 | 0 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
2960 | 0 | compiler->mode32 = 0; |
2961 | 0 | EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0); |
2962 | 0 | compiler->mode32 = op & SLJIT_32; |
2963 | 0 | restore_ecx = 1; |
2964 | 0 | #else /* !SLJIT_CONFIG_X86_64 */ |
2965 | 0 | if (src1_reg != TMP_REG1 && src2_reg != TMP_REG1) { |
2966 | 0 | EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0); |
2967 | 0 | restore_ecx = 1; |
2968 | 0 | } else { |
2969 | 0 | EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_PREF_SHIFT_REG, 0); |
2970 | 0 | restore_ecx = 2; |
2971 | 0 | } |
2972 | 0 | #endif /* SLJIT_CONFIG_X86_64 */ |
2973 | 0 | } |
2974 | 0 | EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src3, src3w); |
2975 | 0 | } |
2976 | 0 |
|
2977 | 0 | if (move_src1) { |
2978 | 0 | EMIT_MOV(compiler, dst_reg, 0, src1_reg, src1w); |
2979 | 0 | src1_reg = dst_reg; |
2980 | 0 | src1w = 0; |
2981 | 0 | } |
2982 | 0 | } |
2983 | 0 |
|
2984 | 0 | inst = emit_x86_instruction(compiler, 2, src2_reg, 0, src1_reg, src1w); |
2985 | 0 | FAIL_IF(!inst); |
2986 | 0 | inst[0] = GROUP_0F; |
2987 | 0 |
|
2988 | 0 | if (src3 == SLJIT_IMM) { |
2989 | 0 | inst[1] = U8((is_left ? SHLD : SHRD) - 1); |
2990 | 0 |
|
2991 | 0 | /* Immediate argument is added separately. */ |
2992 | 0 | FAIL_IF(emit_byte(compiler, U8(src3w))); |
2993 | 0 | } else |
2994 | 0 | inst[1] = U8(is_left ? SHLD : SHRD); |
2995 | 0 |
|
2996 | 0 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
2997 | 0 | if (restore_ecx) { |
2998 | 0 | compiler->mode32 = 0; |
2999 | 0 | EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); |
3000 | 0 | } |
3001 | 0 |
|
3002 | 0 | if (src1_reg != dst_reg) { |
3003 | 0 | compiler->mode32 = op & SLJIT_32; |
3004 | 0 | return emit_mov(compiler, dst_reg, dstw, src1_reg, 0); |
3005 | 0 | } |
3006 | 0 | #else /* !SLJIT_CONFIG_X86_64 */ |
3007 | 0 | if (restore_ecx) |
3008 | 0 | EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, restore_ecx == 1 ? TMP_REG1 : SLJIT_MEM1(SLJIT_SP), 0); |
3009 | 0 |
|
3010 | 0 | if (src1_reg != dst_reg) |
3011 | 0 | EMIT_MOV(compiler, dst_reg, dstw, src1_reg, 0); |
3012 | 0 |
|
3013 | 0 | if (restore_sp4) |
3014 | 0 | return emit_mov(compiler, restore_sp4, 0, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32)); |
3015 | 0 | #endif /* SLJIT_CONFIG_X86_32 */ |
3016 | 0 |
|
3017 | 0 | return SLJIT_SUCCESS; |
3018 | 0 | } |
3019 | | |
3020 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, |
3021 | | sljit_s32 src, sljit_sw srcw) |
3022 | 127k | { |
3023 | 127k | CHECK_ERROR(); |
3024 | 127k | CHECK(check_sljit_emit_op_src(compiler, op, src, srcw)); |
3025 | 127k | ADJUST_LOCAL_OFFSET(src, srcw); |
3026 | | |
3027 | 127k | CHECK_EXTRA_REGS(src, srcw, (void)0); |
3028 | | |
3029 | 127k | switch (op) { |
3030 | 127k | case SLJIT_FAST_RETURN: |
3031 | 127k | return emit_fast_return(compiler, src, srcw); |
3032 | 0 | case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN: |
3033 | | /* Don't adjust shadow stack if it isn't enabled. */ |
3034 | 0 | if (!cpu_has_shadow_stack ()) |
3035 | 0 | return SLJIT_SUCCESS; |
3036 | 0 | return adjust_shadow_stack(compiler, src, srcw); |
3037 | 0 | case SLJIT_PREFETCH_L1: |
3038 | 0 | case SLJIT_PREFETCH_L2: |
3039 | 0 | case SLJIT_PREFETCH_L3: |
3040 | 0 | case SLJIT_PREFETCH_ONCE: |
3041 | 0 | return emit_prefetch(compiler, op, src, srcw); |
3042 | 127k | } |
3043 | | |
3044 | 0 | return SLJIT_SUCCESS; |
3045 | 127k | } |
3046 | | |
3047 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op, |
3048 | | sljit_s32 dst, sljit_sw dstw) |
3049 | 126k | { |
3050 | 126k | CHECK_ERROR(); |
3051 | 126k | CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw)); |
3052 | 126k | ADJUST_LOCAL_OFFSET(dst, dstw); |
3053 | | |
3054 | 126k | CHECK_EXTRA_REGS(dst, dstw, (void)0); |
3055 | | |
3056 | 126k | switch (op) { |
3057 | 126k | case SLJIT_FAST_ENTER: |
3058 | 126k | return emit_fast_enter(compiler, dst, dstw); |
3059 | 0 | case SLJIT_GET_RETURN_ADDRESS: |
3060 | 0 | return sljit_emit_get_return_address(compiler, dst, dstw); |
3061 | 126k | } |
3062 | | |
3063 | 0 | return SLJIT_SUCCESS; |
3064 | 126k | } |
3065 | | |
3066 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg) |
3067 | 672k | { |
3068 | 672k | CHECK_REG_INDEX(check_sljit_get_register_index(type, reg)); |
3069 | | |
3070 | 672k | if (type == SLJIT_GP_REGISTER) { |
3071 | | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
3072 | | if (reg >= SLJIT_R3 && reg <= SLJIT_R8) |
3073 | | return -1; |
3074 | | #endif /* SLJIT_CONFIG_X86_32 */ |
3075 | 378k | return reg_map[reg]; |
3076 | 378k | } |
3077 | | |
3078 | 294k | if (type != SLJIT_FLOAT_REGISTER && type != SLJIT_SIMD_REG_128 && type != SLJIT_SIMD_REG_256 && type != SLJIT_SIMD_REG_512) |
3079 | 0 | return -1; |
3080 | | |
3081 | 294k | return freg_map[reg]; |
3082 | 294k | } |
3083 | | |
3084 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, |
3085 | | void *instruction, sljit_u32 size) |
3086 | 254k | { |
3087 | 254k | sljit_u8 *inst; |
3088 | | |
3089 | 254k | CHECK_ERROR(); |
3090 | 254k | CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); |
3091 | | |
3092 | 254k | inst = (sljit_u8*)ensure_buf(compiler, 1 + size); |
3093 | 254k | FAIL_IF(!inst); |
3094 | 254k | INC_SIZE(size); |
3095 | 254k | SLJIT_MEMCPY(inst, instruction, size); |
3096 | 254k | return SLJIT_SUCCESS; |
3097 | 254k | } |
3098 | | |
3099 | | /* --------------------------------------------------------------------- */ |
3100 | | /* Floating point operators */ |
3101 | | /* --------------------------------------------------------------------- */ |
3102 | | |
3103 | | /* Alignment(3) + 4 * 16 bytes. */ |
3104 | | static sljit_u32 sse2_data[3 + (4 * 4)]; |
3105 | | static sljit_u32 *sse2_buffer; |
3106 | | |
3107 | | static void init_compiler(void) |
3108 | 1 | { |
3109 | 1 | get_cpu_features(); |
3110 | | |
3111 | | /* Align to 16 bytes. */ |
3112 | 1 | sse2_buffer = (sljit_u32*)(((sljit_uw)sse2_data + 15) & ~(sljit_uw)0xf); |
3113 | | |
3114 | | /* Single precision constants (each constant is 16 byte long). */ |
3115 | 1 | sse2_buffer[0] = 0x80000000; |
3116 | 1 | sse2_buffer[4] = 0x7fffffff; |
3117 | | /* Double precision constants (each constant is 16 byte long). */ |
3118 | 1 | sse2_buffer[8] = 0; |
3119 | 1 | sse2_buffer[9] = 0x80000000; |
3120 | 1 | sse2_buffer[12] = 0xffffffff; |
3121 | 1 | sse2_buffer[13] = 0x7fffffff; |
3122 | 1 | } |
3123 | | |
3124 | | static sljit_s32 emit_groupf(struct sljit_compiler *compiler, |
3125 | | sljit_uw op, |
3126 | | sljit_s32 dst, sljit_s32 src, sljit_sw srcw) |
3127 | 92.9M | { |
3128 | 92.9M | sljit_u8 *inst = emit_x86_instruction(compiler, 2 | (op & ~(sljit_uw)0xff), dst, 0, src, srcw); |
3129 | 92.9M | FAIL_IF(!inst); |
3130 | 92.9M | inst[0] = GROUP_0F; |
3131 | 92.9M | inst[1] = op & 0xff; |
3132 | 92.9M | return SLJIT_SUCCESS; |
3133 | 92.9M | } |
3134 | | |
3135 | | static sljit_s32 emit_groupf_ext(struct sljit_compiler *compiler, |
3136 | | sljit_uw op, |
3137 | | sljit_s32 dst, sljit_s32 src, sljit_sw srcw) |
3138 | 0 | { |
3139 | 0 | sljit_u8 *inst; |
3140 | |
|
3141 | 0 | SLJIT_ASSERT((op & EX86_SSE2) && ((op & VEX_OP_0F38) || (op & VEX_OP_0F3A))); |
3142 | |
|
3143 | 0 | inst = emit_x86_instruction(compiler, 3 | (op & ~((sljit_uw)0xff | VEX_OP_0F38 | VEX_OP_0F3A)), dst, 0, src, srcw); |
3144 | 0 | FAIL_IF(!inst); |
3145 | 0 | inst[0] = GROUP_0F; |
3146 | 0 | inst[1] = U8((op & VEX_OP_0F38) ? 0x38 : 0x3A); |
3147 | 0 | inst[2] = op & 0xff; |
3148 | 0 | return SLJIT_SUCCESS; |
3149 | 0 | } |
3150 | | |
3151 | | static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler, |
3152 | | sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw) |
3153 | 0 | { |
3154 | 0 | return emit_groupf(compiler, MOVSD_x_xm | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, dst, src, srcw); |
3155 | 0 | } |
3156 | | |
3157 | | static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler, |
3158 | | sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src) |
3159 | 0 | { |
3160 | 0 | return emit_groupf(compiler, MOVSD_xm_x | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, src, dst, dstw); |
3161 | 0 | } |
3162 | | |
3163 | | static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, |
3164 | | sljit_s32 dst, sljit_sw dstw, |
3165 | | sljit_s32 src, sljit_sw srcw) |
3166 | 0 | { |
3167 | 0 | sljit_s32 dst_r; |
3168 | |
|
3169 | 0 | CHECK_EXTRA_REGS(dst, dstw, (void)0); |
3170 | 0 | dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; |
3171 | |
|
3172 | 0 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
3173 | 0 | if (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64) |
3174 | 0 | compiler->mode32 = 0; |
3175 | 0 | #endif |
3176 | |
|
3177 | 0 | FAIL_IF(emit_groupf(compiler, CVTTSD2SI_r_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP2, dst_r, src, srcw)); |
3178 | | |
3179 | 0 | if (dst & SLJIT_MEM) |
3180 | 0 | return emit_mov(compiler, dst, dstw, TMP_REG1, 0); |
3181 | 0 | return SLJIT_SUCCESS; |
3182 | 0 | } |
3183 | | |
3184 | | static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, |
3185 | | sljit_s32 dst, sljit_sw dstw, |
3186 | | sljit_s32 src, sljit_sw srcw) |
3187 | 0 | { |
3188 | 0 | sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG; |
3189 | |
|
3190 | 0 | CHECK_EXTRA_REGS(src, srcw, (void)0); |
3191 | |
|
3192 | 0 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
3193 | 0 | if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW) |
3194 | 0 | compiler->mode32 = 0; |
3195 | 0 | #endif |
3196 | |
|
3197 | 0 | if (src == SLJIT_IMM) { |
3198 | 0 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
3199 | 0 | if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) |
3200 | 0 | srcw = (sljit_s32)srcw; |
3201 | 0 | #endif |
3202 | 0 | EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); |
3203 | 0 | src = TMP_REG1; |
3204 | 0 | srcw = 0; |
3205 | 0 | } |
3206 | | |
3207 | 0 | FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP1, dst_r, src, srcw)); |
3208 | | |
3209 | 0 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
3210 | 0 | compiler->mode32 = 1; |
3211 | 0 | #endif |
3212 | 0 | if (dst_r == TMP_FREG) |
3213 | 0 | return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG); |
3214 | 0 | return SLJIT_SUCCESS; |
3215 | 0 | } |
3216 | | |
3217 | | static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, |
3218 | | sljit_s32 src1, sljit_sw src1w, |
3219 | | sljit_s32 src2, sljit_sw src2w) |
3220 | 0 | { |
3221 | 0 | switch (GET_FLAG_TYPE(op)) { |
3222 | 0 | case SLJIT_ORDERED_EQUAL: |
3223 | | /* Also: SLJIT_UNORDERED_OR_NOT_EQUAL */ |
3224 | 0 | FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w)); |
3225 | 0 | FAIL_IF(emit_groupf(compiler, CMPS_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, TMP_FREG, src2, src2w)); |
3226 | | |
3227 | | /* EQ */ |
3228 | 0 | FAIL_IF(emit_byte(compiler, 0)); |
3229 | | |
3230 | 0 | src1 = TMP_FREG; |
3231 | 0 | src2 = TMP_FREG; |
3232 | 0 | src2w = 0; |
3233 | 0 | break; |
3234 | | |
3235 | 0 | case SLJIT_ORDERED_LESS: |
3236 | 0 | case SLJIT_UNORDERED_OR_GREATER: |
3237 | | /* Also: SLJIT_UNORDERED_OR_GREATER_EQUAL, SLJIT_ORDERED_LESS_EQUAL */ |
3238 | 0 | if (!FAST_IS_REG(src2)) { |
3239 | 0 | FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src2, src2w)); |
3240 | 0 | src2 = TMP_FREG; |
3241 | 0 | } |
3242 | | |
3243 | 0 | return emit_groupf(compiler, UCOMISD_x_xm | EX86_SELECT_66(op) | EX86_SSE2, src2, src1, src1w); |
3244 | 0 | } |
3245 | | |
3246 | 0 | if (!FAST_IS_REG(src1)) { |
3247 | 0 | FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w)); |
3248 | 0 | src1 = TMP_FREG; |
3249 | 0 | } |
3250 | | |
3251 | 0 | return emit_groupf(compiler, UCOMISD_x_xm | EX86_SELECT_66(op) | EX86_SSE2, src1, src2, src2w); |
3252 | 0 | } |
3253 | | |
3254 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, |
3255 | | sljit_s32 dst, sljit_sw dstw, |
3256 | | sljit_s32 src, sljit_sw srcw) |
3257 | 0 | { |
3258 | 0 | sljit_s32 dst_r; |
3259 | 0 | sljit_u8 *inst; |
3260 | |
|
3261 | 0 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
3262 | 0 | compiler->mode32 = 1; |
3263 | 0 | #endif |
3264 | |
|
3265 | 0 | CHECK_ERROR(); |
3266 | 0 | SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); |
3267 | |
|
3268 | 0 | if (GET_OPCODE(op) == SLJIT_MOV_F64) { |
3269 | 0 | if (FAST_IS_REG(dst)) |
3270 | 0 | return emit_sse2_load(compiler, op & SLJIT_32, dst, src, srcw); |
3271 | 0 | if (FAST_IS_REG(src)) |
3272 | 0 | return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, src); |
3273 | 0 | FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src, srcw)); |
3274 | 0 | return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG); |
3275 | 0 | } |
3276 | | |
3277 | 0 | if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) { |
3278 | 0 | dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG; |
3279 | 0 | if (FAST_IS_REG(src)) { |
3280 | | /* We overwrite the high bits of source. From SLJIT point of view, |
3281 | | this is not an issue. |
3282 | | Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */ |
3283 | 0 | FAIL_IF(emit_groupf(compiler, UNPCKLPD_x_xm | ((op & SLJIT_32) ? EX86_PREF_66 : 0) | EX86_SSE2, src, src, 0)); |
3284 | 0 | } else { |
3285 | 0 | FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_32), TMP_FREG, src, srcw)); |
3286 | 0 | src = TMP_FREG; |
3287 | 0 | } |
3288 | | |
3289 | 0 | FAIL_IF(emit_groupf(compiler, CVTPD2PS_x_xm | ((op & SLJIT_32) ? EX86_PREF_66 : 0) | EX86_SSE2, dst_r, src, 0)); |
3290 | 0 | if (dst_r == TMP_FREG) |
3291 | 0 | return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG); |
3292 | 0 | return SLJIT_SUCCESS; |
3293 | 0 | } |
3294 | | |
3295 | 0 | if (FAST_IS_REG(dst)) { |
3296 | 0 | dst_r = (dst == src) ? TMP_FREG : dst; |
3297 | |
|
3298 | 0 | if (src & SLJIT_MEM) |
3299 | 0 | FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src, srcw)); |
3300 | | |
3301 | 0 | FAIL_IF(emit_groupf(compiler, PCMPEQD_x_xm | EX86_PREF_66 | EX86_SSE2, dst_r, dst_r, 0)); |
3302 | | |
3303 | 0 | inst = emit_x86_instruction(compiler, 2 | EX86_PREF_66 | EX86_SSE2_OP2, 0, 0, dst_r, 0); |
3304 | 0 | inst[0] = GROUP_0F; |
3305 | | /* Same as PSRLD_x / PSRLQ_x */ |
3306 | 0 | inst[1] = (op & SLJIT_32) ? PSLLD_x_i8 : PSLLQ_x_i8; |
3307 | |
|
3308 | 0 | if (GET_OPCODE(op) == SLJIT_ABS_F64) { |
3309 | 0 | inst[2] |= 2 << 3; |
3310 | 0 | FAIL_IF(emit_byte(compiler, 1)); |
3311 | 0 | } else { |
3312 | 0 | inst[2] |= 6 << 3; |
3313 | 0 | FAIL_IF(emit_byte(compiler, ((op & SLJIT_32) ? 31 : 63))); |
3314 | 0 | } |
3315 | | |
3316 | 0 | if (dst_r != TMP_FREG) |
3317 | 0 | dst_r = (src & SLJIT_MEM) ? TMP_FREG : src; |
3318 | 0 | return emit_groupf(compiler, (GET_OPCODE(op) == SLJIT_NEG_F64 ? XORPD_x_xm : ANDPD_x_xm) | EX86_SSE2, dst, dst_r, 0); |
3319 | 0 | } |
3320 | | |
3321 | 0 | FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src, srcw)); |
3322 | | |
3323 | 0 | switch (GET_OPCODE(op)) { |
3324 | 0 | case SLJIT_NEG_F64: |
3325 | 0 | FAIL_IF(emit_groupf(compiler, XORPD_x_xm | EX86_SELECT_66(op) | EX86_SSE2, TMP_FREG, SLJIT_MEM0(), (sljit_sw)((op & SLJIT_32) ? sse2_buffer : sse2_buffer + 8))); |
3326 | 0 | break; |
3327 | | |
3328 | 0 | case SLJIT_ABS_F64: |
3329 | 0 | FAIL_IF(emit_groupf(compiler, ANDPD_x_xm | EX86_SELECT_66(op) | EX86_SSE2, TMP_FREG, SLJIT_MEM0(), (sljit_sw)((op & SLJIT_32) ? sse2_buffer + 4 : sse2_buffer + 12))); |
3330 | 0 | break; |
3331 | 0 | } |
3332 | | |
3333 | 0 | return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG); |
3334 | 0 | } |
3335 | | |
3336 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, |
3337 | | sljit_s32 dst, sljit_sw dstw, |
3338 | | sljit_s32 src1, sljit_sw src1w, |
3339 | | sljit_s32 src2, sljit_sw src2w) |
3340 | 0 | { |
3341 | 0 | sljit_s32 dst_r; |
3342 | 0 |
|
3343 | 0 | CHECK_ERROR(); |
3344 | 0 | CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); |
3345 | 0 | ADJUST_LOCAL_OFFSET(dst, dstw); |
3346 | 0 | ADJUST_LOCAL_OFFSET(src1, src1w); |
3347 | 0 | ADJUST_LOCAL_OFFSET(src2, src2w); |
3348 | 0 |
|
3349 | 0 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
3350 | 0 | compiler->mode32 = 1; |
3351 | 0 | #endif |
3352 | 0 |
|
3353 | 0 | if (FAST_IS_REG(dst)) { |
3354 | 0 | dst_r = dst; |
3355 | 0 | if (dst == src1) |
3356 | 0 | ; /* Do nothing here. */ |
3357 | 0 | else if (dst == src2 && (GET_OPCODE(op) == SLJIT_ADD_F64 || GET_OPCODE(op) == SLJIT_MUL_F64)) { |
3358 | 0 | /* Swap arguments. */ |
3359 | 0 | src2 = src1; |
3360 | 0 | src2w = src1w; |
3361 | 0 | } else if (dst != src2) |
3362 | 0 | FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, dst_r, src1, src1w)); |
3363 | 0 | else { |
3364 | 0 | dst_r = TMP_FREG; |
3365 | 0 | FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w)); |
3366 | 0 | } |
3367 | 0 | } else { |
3368 | 0 | dst_r = TMP_FREG; |
3369 | 0 | FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w)); |
3370 | 0 | } |
3371 | 0 |
|
3372 | 0 | switch (GET_OPCODE(op)) { |
3373 | 0 | case SLJIT_ADD_F64: |
3374 | 0 | FAIL_IF(emit_groupf(compiler, ADDSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, src2, src2w)); |
3375 | 0 | break; |
3376 | 0 |
|
3377 | 0 | case SLJIT_SUB_F64: |
3378 | 0 | FAIL_IF(emit_groupf(compiler, SUBSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, src2, src2w)); |
3379 | 0 | break; |
3380 | 0 |
|
3381 | 0 | case SLJIT_MUL_F64: |
3382 | 0 | FAIL_IF(emit_groupf(compiler, MULSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, src2, src2w)); |
3383 | 0 | break; |
3384 | 0 |
|
3385 | 0 | case SLJIT_DIV_F64: |
3386 | 0 | FAIL_IF(emit_groupf(compiler, DIVSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, src2, src2w)); |
3387 | 0 | break; |
3388 | 0 | } |
3389 | 0 |
|
3390 | 0 | if (dst_r != dst) |
3391 | 0 | return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG); |
3392 | 0 | return SLJIT_SUCCESS; |
3393 | 0 | } |
3394 | | |
3395 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2r(struct sljit_compiler *compiler, sljit_s32 op, |
3396 | | sljit_s32 dst_freg, |
3397 | | sljit_s32 src1, sljit_sw src1w, |
3398 | | sljit_s32 src2, sljit_sw src2w) |
3399 | 0 | { |
3400 | 0 | sljit_uw pref; |
3401 | 0 |
|
3402 | 0 | CHECK_ERROR(); |
3403 | 0 | CHECK(check_sljit_emit_fop2r(compiler, op, dst_freg, src1, src1w, src2, src2w)); |
3404 | 0 | ADJUST_LOCAL_OFFSET(src1, src1w); |
3405 | 0 | ADJUST_LOCAL_OFFSET(src2, src2w); |
3406 | 0 |
|
3407 | 0 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
3408 | 0 | compiler->mode32 = 1; |
3409 | 0 | #endif |
3410 | 0 |
|
3411 | 0 | if (dst_freg == src1) { |
3412 | 0 | FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src2, src2w)); |
3413 | 0 | pref = EX86_SELECT_66(op) | EX86_SSE2; |
3414 | 0 | FAIL_IF(emit_groupf(compiler, XORPD_x_xm | pref, TMP_FREG, src1, src1w)); |
3415 | 0 | FAIL_IF(emit_groupf(compiler, ANDPD_x_xm | pref, TMP_FREG, SLJIT_MEM0(), (sljit_sw)((op & SLJIT_32) ? sse2_buffer : sse2_buffer + 8))); |
3416 | 0 | return emit_groupf(compiler, XORPD_x_xm | pref, dst_freg, TMP_FREG, 0); |
3417 | 0 | } |
3418 | 0 |
|
3419 | 0 | if (src1 & SLJIT_MEM) { |
3420 | 0 | FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w)); |
3421 | 0 | src1 = TMP_FREG; |
3422 | 0 | src1w = 0; |
3423 | 0 | } |
3424 | 0 |
|
3425 | 0 | if (dst_freg != src2) |
3426 | 0 | FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, dst_freg, src2, src2w)); |
3427 | 0 |
|
3428 | 0 | pref = EX86_SELECT_66(op) | EX86_SSE2; |
3429 | 0 | FAIL_IF(emit_groupf(compiler, XORPD_x_xm | pref, dst_freg, src1, src1w)); |
3430 | 0 | FAIL_IF(emit_groupf(compiler, ANDPD_x_xm | pref, dst_freg, SLJIT_MEM0(), (sljit_sw)((op & SLJIT_32) ? sse2_buffer : sse2_buffer + 8))); |
3431 | 0 | return emit_groupf(compiler, XORPD_x_xm | pref, dst_freg, src1, src1w); |
3432 | 0 | } |
3433 | | |
3434 | | /* --------------------------------------------------------------------- */ |
3435 | | /* Conditional instructions */ |
3436 | | /* --------------------------------------------------------------------- */ |
3437 | | |
3438 | | SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) |
3439 | 148M | { |
3440 | 148M | sljit_u8 *inst; |
3441 | 148M | struct sljit_label *label; |
3442 | | |
3443 | 148M | CHECK_ERROR_PTR(); |
3444 | 148M | CHECK_PTR(check_sljit_emit_label(compiler)); |
3445 | | |
3446 | 148M | if (compiler->last_label && compiler->last_label->size == compiler->size) |
3447 | 21.0M | return compiler->last_label; |
3448 | | |
3449 | 127M | label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); |
3450 | 127M | PTR_FAIL_IF(!label); |
3451 | 127M | set_label(label, compiler); |
3452 | | |
3453 | 127M | inst = (sljit_u8*)ensure_buf(compiler, 1); |
3454 | 127M | PTR_FAIL_IF(!inst); |
3455 | 127M | inst[0] = SLJIT_INST_LABEL; |
3456 | | |
3457 | 127M | return label; |
3458 | 127M | } |
3459 | | |
3460 | | SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_aligned_label(struct sljit_compiler *compiler, |
3461 | | sljit_s32 alignment, struct sljit_read_only_buffer *buffers) |
3462 | 0 | { |
3463 | 0 | sljit_uw mask, size; |
3464 | 0 | sljit_u8 *inst; |
3465 | 0 | struct sljit_label *label; |
3466 | 0 | struct sljit_label *next_label; |
3467 | 0 | struct sljit_extended_label *ext_label; |
3468 | 0 |
|
3469 | 0 | CHECK_ERROR_PTR(); |
3470 | 0 | CHECK_PTR(check_sljit_emit_aligned_label(compiler, alignment, buffers)); |
3471 | 0 |
|
3472 | 0 | sljit_reset_read_only_buffers(buffers); |
3473 | 0 |
|
3474 | 0 | if (alignment <= SLJIT_LABEL_ALIGN_1) { |
3475 | 0 | SLJIT_SKIP_CHECKS(compiler); |
3476 | 0 | label = sljit_emit_label(compiler); |
3477 | 0 | PTR_FAIL_IF(!label); |
3478 | 0 | } else { |
3479 | 0 | /* The used space is filled with NOPs. */ |
3480 | 0 | mask = ((sljit_uw)1 << alignment) - 1; |
3481 | 0 | compiler->size += mask; |
3482 | 0 |
|
3483 | 0 | inst = (sljit_u8*)ensure_buf(compiler, 1); |
3484 | 0 | PTR_FAIL_IF(!inst); |
3485 | 0 | inst[0] = SLJIT_INST_LABEL; |
3486 | 0 |
|
3487 | 0 | ext_label = (struct sljit_extended_label*)ensure_abuf(compiler, sizeof(struct sljit_extended_label)); |
3488 | 0 | PTR_FAIL_IF(!ext_label); |
3489 | 0 | set_extended_label(ext_label, compiler, SLJIT_LABEL_ALIGNED, mask); |
3490 | 0 | label = &ext_label->label; |
3491 | 0 | } |
3492 | 0 |
|
3493 | 0 | if (buffers == NULL) |
3494 | 0 | return label; |
3495 | 0 |
|
3496 | 0 | next_label = label; |
3497 | 0 |
|
3498 | 0 | while (1) { |
3499 | 0 | buffers->u.label = next_label; |
3500 | 0 | size = buffers->size; |
3501 | 0 |
|
3502 | 0 | while (size >= 4) { |
3503 | 0 | inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); |
3504 | 0 | PTR_FAIL_IF(!inst); |
3505 | 0 | INC_SIZE(4); |
3506 | 0 | inst[0] = NOP; |
3507 | 0 | inst[1] = NOP; |
3508 | 0 | inst[2] = NOP; |
3509 | 0 | inst[3] = NOP; |
3510 | 0 | size -= 4; |
3511 | 0 | } |
3512 | 0 |
|
3513 | 0 | if (size > 0) { |
3514 | 0 | inst = (sljit_u8*)ensure_buf(compiler, 1 + size); |
3515 | 0 | PTR_FAIL_IF(!inst); |
3516 | 0 | INC_SIZE(size); |
3517 | 0 |
|
3518 | 0 | do { |
3519 | 0 | *inst++ = NOP; |
3520 | 0 | } while (--size != 0); |
3521 | 0 | } |
3522 | 0 |
|
3523 | 0 | buffers = buffers->next; |
3524 | 0 |
|
3525 | 0 | if (buffers == NULL) |
3526 | 0 | break; |
3527 | 0 |
|
3528 | 0 | SLJIT_SKIP_CHECKS(compiler); |
3529 | 0 | next_label = sljit_emit_label(compiler); |
3530 | 0 | PTR_FAIL_IF(!next_label); |
3531 | 0 | } |
3532 | 0 |
|
3533 | 0 | return label; |
3534 | 0 | } |
3535 | | |
3536 | | SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) |
3537 | 364M | { |
3538 | 364M | sljit_u8 *inst; |
3539 | 364M | struct sljit_jump *jump; |
3540 | | |
3541 | 364M | CHECK_ERROR_PTR(); |
3542 | 364M | CHECK_PTR(check_sljit_emit_jump(compiler, type)); |
3543 | | |
3544 | 364M | jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); |
3545 | 364M | PTR_FAIL_IF_NULL(jump); |
3546 | 364M | set_jump(jump, compiler, (sljit_u32)((type & SLJIT_REWRITABLE_JUMP) | ((type & 0xff) << TYPE_SHIFT))); |
3547 | 364M | type &= 0xff; |
3548 | | |
3549 | 364M | jump->addr = compiler->size; |
3550 | | /* Worst case size. */ |
3551 | 364M | compiler->size += (type >= SLJIT_JUMP) ? JUMP_MAX_SIZE : CJUMP_MAX_SIZE; |
3552 | 364M | inst = (sljit_u8*)ensure_buf(compiler, 1); |
3553 | 364M | PTR_FAIL_IF_NULL(inst); |
3554 | | |
3555 | 364M | inst[0] = SLJIT_INST_JUMP; |
3556 | 364M | return jump; |
3557 | 364M | } |
3558 | | |
3559 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) |
3560 | 8.06M | { |
3561 | 8.06M | sljit_u8 *inst; |
3562 | 8.06M | struct sljit_jump *jump; |
3563 | | |
3564 | 8.06M | CHECK_ERROR(); |
3565 | 8.06M | CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); |
3566 | 8.06M | ADJUST_LOCAL_OFFSET(src, srcw); |
3567 | | |
3568 | 8.06M | CHECK_EXTRA_REGS(src, srcw, (void)0); |
3569 | | |
3570 | 8.06M | if (src == SLJIT_IMM) { |
3571 | 8.04M | jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); |
3572 | 8.04M | FAIL_IF_NULL(jump); |
3573 | 8.04M | set_jump(jump, compiler, (sljit_u32)(JUMP_ADDR | (type << TYPE_SHIFT))); |
3574 | 8.04M | jump->u.target = (sljit_uw)srcw; |
3575 | | |
3576 | 8.04M | jump->addr = compiler->size; |
3577 | | /* Worst case size. */ |
3578 | 8.04M | compiler->size += JUMP_MAX_SIZE; |
3579 | 8.04M | inst = (sljit_u8*)ensure_buf(compiler, 1); |
3580 | 8.04M | FAIL_IF_NULL(inst); |
3581 | | |
3582 | 8.04M | inst[0] = SLJIT_INST_JUMP; |
3583 | 8.04M | } else { |
3584 | 20.2k | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
3585 | | /* REX_W is not necessary (src is not immediate). */ |
3586 | 20.2k | compiler->mode32 = 1; |
3587 | 20.2k | #endif |
3588 | 20.2k | inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw); |
3589 | 20.2k | FAIL_IF(!inst); |
3590 | 20.2k | inst[0] = GROUP_FF; |
3591 | 20.2k | inst[1] = U8(inst[1] | ((type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm)); |
3592 | 20.2k | } |
3593 | 8.06M | return SLJIT_SUCCESS; |
3594 | 8.06M | } |
3595 | | |
3596 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, |
3597 | | sljit_s32 dst, sljit_sw dstw, |
3598 | | sljit_s32 type) |
3599 | 67.0M | { |
3600 | 67.0M | sljit_u8 *inst; |
3601 | 67.0M | sljit_u8 cond_set; |
3602 | 67.0M | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
3603 | 67.0M | sljit_s32 reg; |
3604 | 67.0M | sljit_uw size; |
3605 | 67.0M | #endif /* !SLJIT_CONFIG_X86_64 */ |
3606 | | /* ADJUST_LOCAL_OFFSET and CHECK_EXTRA_REGS might overwrite these values. */ |
3607 | 67.0M | sljit_s32 dst_save = dst; |
3608 | 67.0M | sljit_sw dstw_save = dstw; |
3609 | | |
3610 | 67.0M | CHECK_ERROR(); |
3611 | 67.0M | CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type)); |
3612 | | |
3613 | 67.0M | ADJUST_LOCAL_OFFSET(dst, dstw); |
3614 | 67.0M | CHECK_EXTRA_REGS(dst, dstw, (void)0); |
3615 | | |
3616 | | /* setcc = jcc + 0x10. */ |
3617 | 67.0M | cond_set = U8(get_jump_code((sljit_uw)type) + 0x10); |
3618 | | |
3619 | 67.0M | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
3620 | 67.0M | if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst)) { |
3621 | 990k | size = 3 + 2; |
3622 | 990k | if (reg_map[TMP_REG1] >= 4) |
3623 | 0 | size += 1 + 1; |
3624 | 990k | else if (reg_map[dst] >= 4) |
3625 | 0 | size++; |
3626 | | |
3627 | 990k | inst = (sljit_u8*)ensure_buf(compiler, 1 + size); |
3628 | 990k | FAIL_IF(!inst); |
3629 | 990k | INC_SIZE(size); |
3630 | | /* Set low register to conditional flag. */ |
3631 | 990k | if (reg_map[TMP_REG1] >= 4) |
3632 | 0 | *inst++ = (reg_map[TMP_REG1] <= 7) ? REX : REX_B; |
3633 | | |
3634 | 990k | inst[0] = GROUP_0F; |
3635 | 990k | inst[1] = cond_set; |
3636 | 990k | inst[2] = MOD_REG | reg_lmap[TMP_REG1]; |
3637 | 990k | inst += 3; |
3638 | | |
3639 | 990k | if (reg_map[TMP_REG1] >= 4 || reg_map[dst] >= 4) |
3640 | 0 | *inst++ = U8(REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B)); |
3641 | | |
3642 | 990k | inst[0] = OR_rm8_r8; |
3643 | 990k | inst[1] = U8(MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst]); |
3644 | 990k | return SLJIT_SUCCESS; |
3645 | 990k | } |
3646 | | |
3647 | 66.0M | reg = (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG1; |
3648 | | |
3649 | 66.0M | size = 3 + (reg_map[reg] >= 4) + 4; |
3650 | 66.0M | inst = (sljit_u8*)ensure_buf(compiler, 1 + size); |
3651 | 66.0M | FAIL_IF(!inst); |
3652 | 66.0M | INC_SIZE(size); |
3653 | | /* Set low register to conditional flag. */ |
3654 | | |
3655 | 66.0M | if (reg_map[reg] >= 4) |
3656 | 876 | *inst++ = (reg_map[reg] <= 7) ? REX : REX_B; |
3657 | | |
3658 | 66.0M | inst[0] = GROUP_0F; |
3659 | 66.0M | inst[1] = cond_set; |
3660 | 66.0M | inst[2] = MOD_REG | reg_lmap[reg]; |
3661 | | |
3662 | 66.0M | inst[3] = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R)); |
3663 | | /* The movzx instruction does not affect flags. */ |
3664 | 66.0M | inst[4] = GROUP_0F; |
3665 | 66.0M | inst[5] = MOVZX_r_rm8; |
3666 | 66.0M | inst[6] = U8(MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg]); |
3667 | | |
3668 | 66.0M | if (reg != TMP_REG1) |
3669 | 65.6M | return SLJIT_SUCCESS; |
3670 | | |
3671 | 416k | if (GET_OPCODE(op) < SLJIT_ADD) { |
3672 | 0 | compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV; |
3673 | 0 | return emit_mov(compiler, dst, dstw, TMP_REG1, 0); |
3674 | 0 | } |
3675 | | |
3676 | 416k | SLJIT_SKIP_CHECKS(compiler); |
3677 | 416k | return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0); |
3678 | | |
3679 | | #else /* !SLJIT_CONFIG_X86_64 */ |
3680 | | SLJIT_ASSERT(reg_map[TMP_REG1] < 4); |
3681 | | |
3682 | | /* The SLJIT_CONFIG_X86_32 code path starts here. */ |
3683 | | if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst) && reg_map[dst] <= 4) { |
3684 | | /* Low byte is accessible. */ |
3685 | | inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 3); |
3686 | | FAIL_IF(!inst); |
3687 | | INC_SIZE(3 + 3); |
3688 | | /* Set low byte to conditional flag. */ |
3689 | | inst[0] = GROUP_0F; |
3690 | | inst[1] = cond_set; |
3691 | | inst[2] = U8(MOD_REG | reg_map[dst]); |
3692 | | |
3693 | | inst[3] = GROUP_0F; |
3694 | | inst[4] = MOVZX_r_rm8; |
3695 | | inst[5] = U8(MOD_REG | (reg_map[dst] << 3) | reg_map[dst]); |
3696 | | return SLJIT_SUCCESS; |
3697 | | } |
3698 | | |
3699 | | if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && reg_map[dst] <= 4) { |
3700 | | inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 2); |
3701 | | FAIL_IF(!inst); |
3702 | | INC_SIZE(3 + 2); |
3703 | | |
3704 | | /* Set low byte to conditional flag. */ |
3705 | | inst[0] = GROUP_0F; |
3706 | | inst[1] = cond_set; |
3707 | | inst[2] = U8(MOD_REG | reg_map[TMP_REG1]); |
3708 | | |
3709 | | inst[3] = OR_rm8_r8; |
3710 | | inst[4] = U8(MOD_REG | (reg_map[TMP_REG1] << 3) | reg_map[dst]); |
3711 | | return SLJIT_SUCCESS; |
3712 | | } |
3713 | | |
3714 | | inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 3); |
3715 | | FAIL_IF(!inst); |
3716 | | INC_SIZE(3 + 3); |
3717 | | /* Set low byte to conditional flag. */ |
3718 | | inst[0] = GROUP_0F; |
3719 | | inst[1] = cond_set; |
3720 | | inst[2] = U8(MOD_REG | reg_map[TMP_REG1]); |
3721 | | |
3722 | | inst[3] = GROUP_0F; |
3723 | | inst[4] = MOVZX_r_rm8; |
3724 | | inst[5] = U8(MOD_REG | (reg_map[TMP_REG1] << 3) | reg_map[TMP_REG1]); |
3725 | | |
3726 | | if (GET_OPCODE(op) < SLJIT_ADD) |
3727 | | return emit_mov(compiler, dst, dstw, TMP_REG1, 0); |
3728 | | |
3729 | | SLJIT_SKIP_CHECKS(compiler); |
3730 | | return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0); |
3731 | | #endif /* SLJIT_CONFIG_X86_64 */ |
3732 | 416k | } |
3733 | | |
3734 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type, |
3735 | | sljit_s32 dst_freg, |
3736 | | sljit_s32 src1, sljit_sw src1w, |
3737 | | sljit_s32 src2_freg) |
3738 | 0 | { |
3739 | 0 | sljit_u8* inst; |
3740 | 0 | sljit_uw size; |
3741 | 0 |
|
3742 | 0 | CHECK_ERROR(); |
3743 | 0 | CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg)); |
3744 | 0 |
|
3745 | 0 | ADJUST_LOCAL_OFFSET(src1, src1w); |
3746 | 0 |
|
3747 | 0 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
3748 | 0 | compiler->mode32 = 1; |
3749 | 0 | #endif /* SLJIT_CONFIG_X86_64 */ |
3750 | 0 |
|
3751 | 0 | if (dst_freg != src2_freg) { |
3752 | 0 | if (dst_freg == src1) { |
3753 | 0 | src1 = src2_freg; |
3754 | 0 | src1w = 0; |
3755 | 0 | type ^= 0x1; |
3756 | 0 | } else |
3757 | 0 | FAIL_IF(emit_sse2_load(compiler, type & SLJIT_32, dst_freg, src2_freg, 0)); |
3758 | 0 | } |
3759 | 0 |
|
3760 | 0 | inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); |
3761 | 0 | FAIL_IF(!inst); |
3762 | 0 | INC_SIZE(2); |
3763 | 0 | inst[0] = U8(get_jump_code((sljit_uw)(type & ~SLJIT_32) ^ 0x1) - 0x10); |
3764 | 0 |
|
3765 | 0 | size = compiler->size; |
3766 | 0 | FAIL_IF(emit_sse2_load(compiler, type & SLJIT_32, dst_freg, src1, src1w)); |
3767 | 0 |
|
3768 | 0 | inst[1] = U8(compiler->size - size); |
3769 | 0 | return SLJIT_SUCCESS; |
3770 | 0 | } |
3771 | | |
3772 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type, |
3773 | | sljit_s32 vreg, |
3774 | | sljit_s32 srcdst, sljit_sw srcdstw) |
3775 | 147k | { |
3776 | 147k | sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); |
3777 | 147k | sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); |
3778 | 147k | sljit_s32 alignment = SLJIT_SIMD_GET_ELEM2_SIZE(type); |
3779 | 147k | sljit_uw op; |
3780 | | |
3781 | 147k | CHECK_ERROR(); |
3782 | 147k | CHECK(check_sljit_emit_simd_mov(compiler, type, vreg, srcdst, srcdstw)); |
3783 | | |
3784 | 147k | ADJUST_LOCAL_OFFSET(srcdst, srcdstw); |
3785 | | |
3786 | 147k | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
3787 | 147k | compiler->mode32 = 1; |
3788 | 147k | #endif /* SLJIT_CONFIG_X86_64 */ |
3789 | | |
3790 | 147k | switch (reg_size) { |
3791 | 147k | case 4: |
3792 | 147k | op = EX86_SSE2; |
3793 | 147k | break; |
3794 | 0 | case 5: |
3795 | 0 | if (!(cpu_feature_list & CPU_FEATURE_AVX2)) |
3796 | 0 | return SLJIT_ERR_UNSUPPORTED; |
3797 | 0 | op = EX86_SSE2 | VEX_256; |
3798 | 0 | break; |
3799 | 0 | default: |
3800 | 0 | return SLJIT_ERR_UNSUPPORTED; |
3801 | 147k | } |
3802 | | |
3803 | 147k | if (!(srcdst & SLJIT_MEM)) |
3804 | 0 | alignment = reg_size; |
3805 | | |
3806 | 147k | if (type & SLJIT_SIMD_FLOAT) { |
3807 | 0 | if (elem_size == 2 || elem_size == 3) { |
3808 | 0 | op |= alignment >= reg_size ? MOVAPS_x_xm : MOVUPS_x_xm; |
3809 | |
|
3810 | 0 | if (elem_size == 3) |
3811 | 0 | op |= EX86_PREF_66; |
3812 | |
|
3813 | 0 | if (type & SLJIT_SIMD_STORE) |
3814 | 0 | op += 1; |
3815 | 0 | } else |
3816 | 0 | return SLJIT_ERR_UNSUPPORTED; |
3817 | 147k | } else { |
3818 | 147k | op |= ((type & SLJIT_SIMD_STORE) ? MOVDQA_xm_x : MOVDQA_x_xm) |
3819 | 147k | | (alignment >= reg_size ? EX86_PREF_66 : EX86_PREF_F3); |
3820 | 147k | } |
3821 | | |
3822 | 147k | if (type & SLJIT_SIMD_TEST) |
3823 | 0 | return SLJIT_SUCCESS; |
3824 | | |
3825 | 147k | if ((op & VEX_256) || ((cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX))) |
3826 | 0 | return emit_vex_instruction(compiler, op, vreg, 0, srcdst, srcdstw); |
3827 | | |
3828 | 147k | return emit_groupf(compiler, op, vreg, srcdst, srcdstw); |
3829 | 147k | } |
3830 | | |
3831 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type, |
3832 | | sljit_s32 vreg, |
3833 | | sljit_s32 src, sljit_sw srcw) |
3834 | 0 | { |
3835 | 0 | sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); |
3836 | 0 | sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); |
3837 | 0 | sljit_s32 use_vex = (cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX); |
3838 | 0 | sljit_u8 *inst; |
3839 | 0 | sljit_u8 opcode = 0; |
3840 | 0 | sljit_uw op; |
3841 | 0 |
|
3842 | 0 | CHECK_ERROR(); |
3843 | 0 | CHECK(check_sljit_emit_simd_replicate(compiler, type, vreg, src, srcw)); |
3844 | 0 |
|
3845 | 0 | ADJUST_LOCAL_OFFSET(src, srcw); |
3846 | 0 |
|
3847 | 0 | if (!(type & SLJIT_SIMD_FLOAT)) { |
3848 | 0 | CHECK_EXTRA_REGS(src, srcw, (void)0); |
3849 | 0 | } |
3850 | 0 |
|
3851 | 0 | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
3852 | 0 | if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : (elem_size > 2)) |
3853 | 0 | return SLJIT_ERR_UNSUPPORTED; |
3854 | 0 | #else /* !SLJIT_CONFIG_X86_32 */ |
3855 | 0 | compiler->mode32 = 1; |
3856 | 0 |
|
3857 | 0 | if (elem_size > 3 || ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)) |
3858 | 0 | return SLJIT_ERR_UNSUPPORTED; |
3859 | 0 | #endif /* SLJIT_CONFIG_X86_32 */ |
3860 | 0 |
|
3861 | 0 | if (reg_size != 4 && (reg_size != 5 || !(cpu_feature_list & CPU_FEATURE_AVX2))) |
3862 | 0 | return SLJIT_ERR_UNSUPPORTED; |
3863 | 0 |
|
3864 | 0 | if (type & SLJIT_SIMD_TEST) |
3865 | 0 | return SLJIT_SUCCESS; |
3866 | 0 |
|
3867 | 0 | if (reg_size == 5) |
3868 | 0 | use_vex = 1; |
3869 | 0 |
|
3870 | 0 | if (use_vex && src != SLJIT_IMM) { |
3871 | 0 | op = 0; |
3872 | 0 |
|
3873 | 0 | switch (elem_size) { |
3874 | 0 | case 0: |
3875 | 0 | if (cpu_feature_list & CPU_FEATURE_AVX2) |
3876 | 0 | op = VPBROADCASTB_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2; |
3877 | 0 | break; |
3878 | 0 | case 1: |
3879 | 0 | if (cpu_feature_list & CPU_FEATURE_AVX2) |
3880 | 0 | op = VPBROADCASTW_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2; |
3881 | 0 | break; |
3882 | 0 | case 2: |
3883 | 0 | if (type & SLJIT_SIMD_FLOAT) { |
3884 | 0 | if ((cpu_feature_list & CPU_FEATURE_AVX2) || ((cpu_feature_list & CPU_FEATURE_AVX) && (src & SLJIT_MEM))) |
3885 | 0 | op = VBROADCASTSS_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2; |
3886 | 0 | } else if (cpu_feature_list & CPU_FEATURE_AVX2) |
3887 | 0 | op = VPBROADCASTD_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2; |
3888 | 0 | break; |
3889 | 0 | default: |
3890 | 0 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
3891 | 0 | if (!(type & SLJIT_SIMD_FLOAT)) { |
3892 | 0 | if (cpu_feature_list & CPU_FEATURE_AVX2) |
3893 | 0 | op = VPBROADCASTQ_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2; |
3894 | 0 | break; |
3895 | 0 | } |
3896 | 0 | #endif /* SLJIT_CONFIG_X86_64 */ |
3897 | 0 |
|
3898 | 0 | if (reg_size == 5) |
3899 | 0 | op = VBROADCASTSD_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2; |
3900 | 0 | break; |
3901 | 0 | } |
3902 | 0 |
|
3903 | 0 | if (op != 0) { |
3904 | 0 | if (!(src & SLJIT_MEM) && !(type & SLJIT_SIMD_FLOAT)) { |
3905 | 0 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
3906 | 0 | if (elem_size >= 3) |
3907 | 0 | compiler->mode32 = 0; |
3908 | 0 | #endif /* SLJIT_CONFIG_X86_64 */ |
3909 | 0 | FAIL_IF(emit_vex_instruction(compiler, MOVD_x_rm | VEX_AUTO_W | EX86_PREF_66 | EX86_SSE2_OP1, vreg, 0, src, srcw)); |
3910 | 0 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
3911 | 0 | compiler->mode32 = 1; |
3912 | 0 | #endif /* SLJIT_CONFIG_X86_64 */ |
3913 | 0 | src = vreg; |
3914 | 0 | srcw = 0; |
3915 | 0 | } |
3916 | 0 |
|
3917 | 0 | if (reg_size == 5) |
3918 | 0 | op |= VEX_256; |
3919 | 0 |
|
3920 | 0 | return emit_vex_instruction(compiler, op, vreg, 0, src, srcw); |
3921 | 0 | } |
3922 | 0 | } |
3923 | 0 |
|
3924 | 0 | if (type & SLJIT_SIMD_FLOAT) { |
3925 | 0 | if (src == SLJIT_IMM) { |
3926 | 0 | if (use_vex) |
3927 | 0 | return emit_vex_instruction(compiler, XORPD_x_xm | (reg_size == 5 ? VEX_256 : 0) | (elem_size == 3 ? EX86_PREF_66 : 0) | EX86_SSE2 | VEX_SSE2_OPV, vreg, vreg, vreg, 0); |
3928 | 0 |
|
3929 | 0 | return emit_groupf(compiler, XORPD_x_xm | (elem_size == 3 ? EX86_PREF_66 : 0) | EX86_SSE2, vreg, vreg, 0); |
3930 | 0 | } |
3931 | 0 |
|
3932 | 0 | SLJIT_ASSERT(reg_size == 4); |
3933 | 0 |
|
3934 | 0 | if (use_vex) { |
3935 | 0 | if (elem_size == 3) |
3936 | 0 | return emit_vex_instruction(compiler, MOVDDUP_x_xm | EX86_PREF_F2 | EX86_SSE2, vreg, 0, src, srcw); |
3937 | 0 |
|
3938 | 0 | SLJIT_ASSERT(!(src & SLJIT_MEM)); |
3939 | 0 | FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | EX86_SSE2 | VEX_SSE2_OPV, vreg, src, src, 0)); |
3940 | 0 | return emit_byte(compiler, 0); |
3941 | 0 | } |
3942 | 0 |
|
3943 | 0 | if (elem_size == 2 && vreg != src) { |
3944 | 0 | FAIL_IF(emit_sse2_load(compiler, 1, vreg, src, srcw)); |
3945 | 0 | src = vreg; |
3946 | 0 | srcw = 0; |
3947 | 0 | } |
3948 | 0 |
|
3949 | 0 | op = (elem_size == 2 ? SHUFPS_x_xm : MOVDDUP_x_xm) | (elem_size == 2 ? 0 : EX86_PREF_F2) | EX86_SSE2; |
3950 | 0 | FAIL_IF(emit_groupf(compiler, op, vreg, src, srcw)); |
3951 | 0 |
|
3952 | 0 | if (elem_size == 2) |
3953 | 0 | return emit_byte(compiler, 0); |
3954 | 0 | return SLJIT_SUCCESS; |
3955 | 0 | } |
3956 | 0 |
|
3957 | 0 | if (src == SLJIT_IMM) { |
3958 | 0 | if (elem_size == 0) { |
3959 | 0 | srcw = (sljit_u8)srcw; |
3960 | 0 | srcw |= srcw << 8; |
3961 | 0 | srcw |= srcw << 16; |
3962 | 0 | elem_size = 2; |
3963 | 0 | } else if (elem_size == 1) { |
3964 | 0 | srcw = (sljit_u16)srcw; |
3965 | 0 | srcw |= srcw << 16; |
3966 | 0 | elem_size = 2; |
3967 | 0 | } |
3968 | 0 |
|
3969 | 0 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
3970 | 0 | if (elem_size == 2 && (sljit_s32)srcw == -1) |
3971 | 0 | srcw = -1; |
3972 | 0 | #endif /* SLJIT_CONFIG_X86_64 */ |
3973 | 0 |
|
3974 | 0 | if (srcw == 0 || srcw == -1) { |
3975 | 0 | if (use_vex) |
3976 | 0 | return emit_vex_instruction(compiler, (srcw == 0 ? PXOR_x_xm : PCMPEQD_x_xm) | (reg_size == 5 ? VEX_256 : 0) | EX86_PREF_66 | EX86_SSE2 | VEX_SSE2_OPV, vreg, vreg, vreg, 0); |
3977 | 0 |
|
3978 | 0 | return emit_groupf(compiler, (srcw == 0 ? PXOR_x_xm : PCMPEQD_x_xm) | EX86_PREF_66 | EX86_SSE2, vreg, vreg, 0); |
3979 | 0 | } |
3980 | 0 |
|
3981 | 0 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
3982 | 0 | if (elem_size == 3) |
3983 | 0 | FAIL_IF(emit_load_imm64(compiler, TMP_REG1, srcw)); |
3984 | 0 | else |
3985 | 0 | #endif /* SLJIT_CONFIG_X86_64 */ |
3986 | 0 | EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw); |
3987 | 0 |
|
3988 | 0 | src = TMP_REG1; |
3989 | 0 | srcw = 0; |
3990 | 0 |
|
3991 | 0 | } |
3992 | 0 |
|
3993 | 0 | op = 2; |
3994 | 0 | opcode = MOVD_x_rm; |
3995 | 0 |
|
3996 | 0 | switch (elem_size) { |
3997 | 0 | case 0: |
3998 | 0 | if (!FAST_IS_REG(src)) { |
3999 | 0 | opcode = 0x3a /* Prefix of PINSRB_x_rm_i8. */; |
4000 | 0 | op = 3; |
4001 | 0 | } |
4002 | 0 | break; |
4003 | 0 | case 1: |
4004 | 0 | if (!FAST_IS_REG(src)) |
4005 | 0 | opcode = PINSRW_x_rm_i8; |
4006 | 0 | break; |
4007 | 0 | case 2: |
4008 | 0 | break; |
4009 | 0 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
4010 | 0 | case 3: |
4011 | 0 | /* MOVQ */ |
4012 | 0 | compiler->mode32 = 0; |
4013 | 0 | break; |
4014 | 0 | #endif /* SLJIT_CONFIG_X86_64 */ |
4015 | 0 | } |
4016 | 0 |
|
4017 | 0 | if (use_vex) { |
4018 | 0 | if (opcode != MOVD_x_rm) { |
4019 | 0 | op = (opcode == 0x3a) ? (PINSRB_x_rm_i8 | VEX_OP_0F3A) : opcode; |
4020 | 0 | FAIL_IF(emit_vex_instruction(compiler, op | EX86_PREF_66 | EX86_SSE2_OP1 | VEX_SSE2_OPV, vreg, vreg, src, srcw)); |
4021 | 0 | } else |
4022 | 0 | FAIL_IF(emit_vex_instruction(compiler, MOVD_x_rm | VEX_AUTO_W | EX86_PREF_66 | EX86_SSE2_OP1, vreg, 0, src, srcw)); |
4023 | 0 | } else { |
4024 | 0 | inst = emit_x86_instruction(compiler, op | EX86_PREF_66 | EX86_SSE2_OP1, vreg, 0, src, srcw); |
4025 | 0 | FAIL_IF(!inst); |
4026 | 0 | inst[0] = GROUP_0F; |
4027 | 0 | inst[1] = opcode; |
4028 | 0 |
|
4029 | 0 | if (op == 3) { |
4030 | 0 | SLJIT_ASSERT(opcode == 0x3a); |
4031 | 0 | inst[2] = PINSRB_x_rm_i8; |
4032 | 0 | } |
4033 | 0 | } |
4034 | 0 |
|
4035 | 0 | if ((cpu_feature_list & CPU_FEATURE_AVX2) && use_vex && elem_size >= 2) { |
4036 | 0 | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
4037 | 0 | op = VPBROADCASTD_x_xm; |
4038 | 0 | #else /* !SLJIT_CONFIG_X86_32 */ |
4039 | 0 | op = (elem_size == 3) ? VPBROADCASTQ_x_xm : VPBROADCASTD_x_xm; |
4040 | 0 | #endif /* SLJIT_CONFIG_X86_32 */ |
4041 | 0 | return emit_vex_instruction(compiler, op | ((reg_size == 5) ? VEX_256 : 0) | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, 0, vreg, 0); |
4042 | 0 | } |
4043 | 0 |
|
4044 | 0 | SLJIT_ASSERT(reg_size == 4); |
4045 | 0 |
|
4046 | 0 | if (opcode != MOVD_x_rm) |
4047 | 0 | FAIL_IF(emit_byte(compiler, 0)); |
4048 | 0 |
|
4049 | 0 | switch (elem_size) { |
4050 | 0 | case 0: |
4051 | 0 | if (use_vex) { |
4052 | 0 | FAIL_IF(emit_vex_instruction(compiler, PXOR_x_xm | EX86_PREF_66 | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, TMP_FREG, TMP_FREG, 0)); |
4053 | 0 | return emit_vex_instruction(compiler, PSHUFB_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2 | VEX_SSE2_OPV, vreg, vreg, TMP_FREG, 0); |
4054 | 0 | } |
4055 | 0 | FAIL_IF(emit_groupf(compiler, PXOR_x_xm | EX86_PREF_66 | EX86_SSE2, TMP_FREG, TMP_FREG, 0)); |
4056 | 0 | return emit_groupf_ext(compiler, PSHUFB_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, TMP_FREG, 0); |
4057 | 0 | case 1: |
4058 | 0 | if (use_vex) |
4059 | 0 | FAIL_IF(emit_vex_instruction(compiler, PSHUFLW_x_xm | EX86_PREF_F2 | EX86_SSE2, vreg, 0, vreg, 0)); |
4060 | 0 | else |
4061 | 0 | FAIL_IF(emit_groupf(compiler, PSHUFLW_x_xm | EX86_PREF_F2 | EX86_SSE2, vreg, vreg, 0)); |
4062 | 0 | FAIL_IF(emit_byte(compiler, 0)); |
4063 | 0 | /* fallthrough */ |
4064 | 0 | default: |
4065 | 0 | if (use_vex) |
4066 | 0 | FAIL_IF(emit_vex_instruction(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, vreg, 0, vreg, 0)); |
4067 | 0 | else |
4068 | 0 | FAIL_IF(emit_groupf(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, vreg, vreg, 0)); |
4069 | 0 | return emit_byte(compiler, 0); |
4070 | 0 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
4071 | 0 | case 3: |
4072 | 0 | compiler->mode32 = 1; |
4073 | 0 | if (use_vex) |
4074 | 0 | FAIL_IF(emit_vex_instruction(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, vreg, 0, vreg, 0)); |
4075 | 0 | else |
4076 | 0 | FAIL_IF(emit_groupf(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, vreg, vreg, 0)); |
4077 | 0 | return emit_byte(compiler, 0x44); |
4078 | 0 | #endif /* SLJIT_CONFIG_X86_64 */ |
4079 | 0 | } |
4080 | 0 | } |
4081 | | |
4082 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type, |
4083 | | sljit_s32 vreg, sljit_s32 lane_index, |
4084 | | sljit_s32 srcdst, sljit_sw srcdstw) |
4085 | 77.5k | { |
4086 | 77.5k | sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); |
4087 | 77.5k | sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); |
4088 | 77.5k | sljit_s32 use_vex = (cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX); |
4089 | 77.5k | sljit_u8 *inst; |
4090 | 77.5k | sljit_u8 opcode = 0; |
4091 | 77.5k | sljit_uw op; |
4092 | 77.5k | sljit_s32 vreg_orig = vreg; |
4093 | | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
4094 | | sljit_s32 srcdst_is_ereg = 0; |
4095 | | sljit_s32 srcdst_orig = 0; |
4096 | | sljit_sw srcdstw_orig = 0; |
4097 | | #endif /* SLJIT_CONFIG_X86_32 */ |
4098 | | |
4099 | 77.5k | CHECK_ERROR(); |
4100 | 77.5k | CHECK(check_sljit_emit_simd_lane_mov(compiler, type, vreg, lane_index, srcdst, srcdstw)); |
4101 | | |
4102 | 77.5k | ADJUST_LOCAL_OFFSET(srcdst, srcdstw); |
4103 | | |
4104 | 77.5k | if (reg_size == 5) { |
4105 | 0 | if (!(cpu_feature_list & CPU_FEATURE_AVX2)) |
4106 | 0 | return SLJIT_ERR_UNSUPPORTED; |
4107 | 0 | use_vex = 1; |
4108 | 77.5k | } else if (reg_size != 4) |
4109 | 0 | return SLJIT_ERR_UNSUPPORTED; |
4110 | | |
4111 | | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
4112 | | if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : elem_size > 2) |
4113 | | return SLJIT_ERR_UNSUPPORTED; |
4114 | | #else /* SLJIT_CONFIG_X86_32 */ |
4115 | 77.5k | if (elem_size > 3 || ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)) |
4116 | 0 | return SLJIT_ERR_UNSUPPORTED; |
4117 | 77.5k | #endif /* SLJIT_CONFIG_X86_32 */ |
4118 | | |
4119 | 77.5k | if (type & SLJIT_SIMD_TEST) |
4120 | 0 | return SLJIT_SUCCESS; |
4121 | | |
4122 | 77.5k | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
4123 | 77.5k | compiler->mode32 = 1; |
4124 | | #else /* !SLJIT_CONFIG_X86_64 */ |
4125 | | if (!(type & SLJIT_SIMD_FLOAT)) { |
4126 | | CHECK_EXTRA_REGS(srcdst, srcdstw, srcdst_is_ereg = 1); |
4127 | | |
4128 | | if ((type & SLJIT_SIMD_STORE) && ((srcdst_is_ereg && elem_size < 2) || (elem_size == 0 && (type & SLJIT_SIMD_LANE_SIGNED) && FAST_IS_REG(srcdst) && reg_map[srcdst] >= 4))) { |
4129 | | srcdst_orig = srcdst; |
4130 | | srcdstw_orig = srcdstw; |
4131 | | srcdst = TMP_REG1; |
4132 | | srcdstw = 0; |
4133 | | } |
4134 | | } |
4135 | | #endif /* SLJIT_CONFIG_X86_64 */ |
4136 | | |
4137 | 77.5k | if (type & SLJIT_SIMD_LANE_ZERO) { |
4138 | 77.5k | if (lane_index == 0) { |
4139 | 77.5k | if (!(type & SLJIT_SIMD_FLOAT)) { |
4140 | 77.5k | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
4141 | 77.5k | if (elem_size == 3) { |
4142 | 0 | compiler->mode32 = 0; |
4143 | 0 | elem_size = 2; |
4144 | 0 | } |
4145 | 77.5k | #endif /* SLJIT_CONFIG_X86_64 */ |
4146 | 77.5k | if (srcdst == SLJIT_IMM) { |
4147 | 46.4k | if (elem_size == 0) |
4148 | 0 | srcdstw = (sljit_u8)srcdstw; |
4149 | 46.4k | else if (elem_size == 1) |
4150 | 0 | srcdstw = (sljit_u16)srcdstw; |
4151 | | |
4152 | 46.4k | EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcdstw); |
4153 | 46.4k | srcdst = TMP_REG1; |
4154 | 46.4k | srcdstw = 0; |
4155 | 46.4k | elem_size = 2; |
4156 | 46.4k | } |
4157 | | |
4158 | 77.5k | if (elem_size == 2) { |
4159 | 77.5k | if (use_vex) |
4160 | 0 | return emit_vex_instruction(compiler, MOVD_x_rm | VEX_AUTO_W | EX86_PREF_66 | EX86_SSE2_OP1, vreg, 0, srcdst, srcdstw); |
4161 | 77.5k | return emit_groupf(compiler, MOVD_x_rm | EX86_PREF_66 | EX86_SSE2_OP1, vreg, srcdst, srcdstw); |
4162 | 77.5k | } |
4163 | 77.5k | } else if (srcdst & SLJIT_MEM) { |
4164 | 0 | SLJIT_ASSERT(elem_size == 2 || elem_size == 3); |
4165 | |
|
4166 | 0 | if (use_vex) |
4167 | 0 | return emit_vex_instruction(compiler, MOVSD_x_xm | (elem_size == 2 ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, vreg, 0, srcdst, srcdstw); |
4168 | 0 | return emit_groupf(compiler, MOVSD_x_xm | (elem_size == 2 ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, vreg, srcdst, srcdstw); |
4169 | 0 | } else if (elem_size == 3) { |
4170 | 0 | if (use_vex) |
4171 | 0 | return emit_vex_instruction(compiler, MOVQ_x_xm | EX86_PREF_F3 | EX86_SSE2, vreg, 0, srcdst, 0); |
4172 | 0 | return emit_groupf(compiler, MOVQ_x_xm | EX86_PREF_F3 | EX86_SSE2, vreg, srcdst, 0); |
4173 | 0 | } else if (use_vex) { |
4174 | 0 | FAIL_IF(emit_vex_instruction(compiler, XORPD_x_xm | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, TMP_FREG, TMP_FREG, 0)); |
4175 | 0 | return emit_vex_instruction(compiler, MOVSD_x_xm | EX86_PREF_F3 | EX86_SSE2 | VEX_SSE2_OPV, vreg, TMP_FREG, srcdst, 0); |
4176 | 0 | } |
4177 | 77.5k | } |
4178 | | |
4179 | 0 | if (reg_size == 5 && lane_index >= (1 << (4 - elem_size))) { |
4180 | 0 | vreg = TMP_FREG; |
4181 | 0 | lane_index -= (1 << (4 - elem_size)); |
4182 | 0 | } else if ((type & SLJIT_SIMD_FLOAT) && vreg == srcdst) { |
4183 | 0 | if (use_vex) |
4184 | 0 | FAIL_IF(emit_vex_instruction(compiler, MOVSD_x_xm | (elem_size == 2 ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, TMP_FREG, srcdst, srcdstw)); |
4185 | 0 | else |
4186 | 0 | FAIL_IF(emit_sse2_load(compiler, elem_size == 2, TMP_FREG, srcdst, srcdstw)); |
4187 | 0 | srcdst = TMP_FREG; |
4188 | 0 | srcdstw = 0; |
4189 | 0 | } |
4190 | | |
4191 | 0 | op = ((!(type & SLJIT_SIMD_FLOAT) || elem_size != 2) ? EX86_PREF_66 : 0) |
4192 | 0 | | ((type & SLJIT_SIMD_FLOAT) ? XORPD_x_xm : PXOR_x_xm) | EX86_SSE2; |
4193 | |
|
4194 | 0 | if (use_vex) |
4195 | 0 | FAIL_IF(emit_vex_instruction(compiler, op | (reg_size == 5 ? VEX_256 : 0) | VEX_SSE2_OPV, vreg, vreg, vreg, 0)); |
4196 | 0 | else |
4197 | 0 | FAIL_IF(emit_groupf(compiler, op, vreg, vreg, 0)); |
4198 | 0 | } else if (reg_size == 5 && lane_index >= (1 << (4 - elem_size))) { |
4199 | 0 | FAIL_IF(emit_vex_instruction(compiler, ((type & SLJIT_SIMD_FLOAT) ? VEXTRACTF128_x_ym : VEXTRACTI128_x_ym) | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, vreg, 0, TMP_FREG, 0)); |
4200 | 0 | FAIL_IF(emit_byte(compiler, 1)); |
4201 | | |
4202 | 0 | vreg = TMP_FREG; |
4203 | 0 | lane_index -= (1 << (4 - elem_size)); |
4204 | 0 | } |
4205 | | |
4206 | 0 | if (type & SLJIT_SIMD_FLOAT) { |
4207 | 0 | if (elem_size == 3) { |
4208 | 0 | if (srcdst & SLJIT_MEM) { |
4209 | 0 | if (type & SLJIT_SIMD_STORE) |
4210 | 0 | op = lane_index == 0 ? MOVLPD_m_x : MOVHPD_m_x; |
4211 | 0 | else |
4212 | 0 | op = lane_index == 0 ? MOVLPD_x_m : MOVHPD_x_m; |
4213 | | |
4214 | | /* VEX prefix clears upper bits of the target register. */ |
4215 | 0 | if (use_vex && ((type & SLJIT_SIMD_STORE) || reg_size == 4 || vreg == TMP_FREG)) |
4216 | 0 | FAIL_IF(emit_vex_instruction(compiler, op | EX86_PREF_66 | EX86_SSE2 |
4217 | 0 | | ((type & SLJIT_SIMD_STORE) ? 0 : VEX_SSE2_OPV), vreg, (type & SLJIT_SIMD_STORE) ? 0 : vreg, srcdst, srcdstw)); |
4218 | 0 | else |
4219 | 0 | FAIL_IF(emit_groupf(compiler, op | EX86_PREF_66 | EX86_SSE2, vreg, srcdst, srcdstw)); |
4220 | | |
4221 | | /* In case of store, vreg is not TMP_FREG. */ |
4222 | 0 | } else if (type & SLJIT_SIMD_STORE) { |
4223 | 0 | if (lane_index == 1) { |
4224 | 0 | if (use_vex) |
4225 | 0 | return emit_vex_instruction(compiler, MOVHLPS_x_x | EX86_SSE2 | VEX_SSE2_OPV, srcdst, srcdst, vreg, 0); |
4226 | 0 | return emit_groupf(compiler, MOVHLPS_x_x | EX86_SSE2, srcdst, vreg, 0); |
4227 | 0 | } |
4228 | 0 | if (use_vex) |
4229 | 0 | return emit_vex_instruction(compiler, MOVSD_x_xm | EX86_PREF_F2 | EX86_SSE2 | VEX_SSE2_OPV, srcdst, srcdst, vreg, 0); |
4230 | 0 | return emit_sse2_load(compiler, 0, srcdst, vreg, 0); |
4231 | 0 | } else if (use_vex && (reg_size == 4 || vreg == TMP_FREG)) { |
4232 | 0 | if (lane_index == 1) |
4233 | 0 | FAIL_IF(emit_vex_instruction(compiler, MOVLHPS_x_x | EX86_SSE2 | VEX_SSE2_OPV, vreg, vreg, srcdst, 0)); |
4234 | 0 | else |
4235 | 0 | FAIL_IF(emit_vex_instruction(compiler, MOVSD_x_xm | EX86_PREF_F2 | EX86_SSE2 | VEX_SSE2_OPV, vreg, vreg, srcdst, 0)); |
4236 | 0 | } else { |
4237 | 0 | if (lane_index == 1) |
4238 | 0 | FAIL_IF(emit_groupf(compiler, MOVLHPS_x_x | EX86_SSE2, vreg, srcdst, 0)); |
4239 | 0 | else |
4240 | 0 | FAIL_IF(emit_sse2_load(compiler, 0, vreg, srcdst, 0)); |
4241 | 0 | } |
4242 | 0 | } else if (type & SLJIT_SIMD_STORE) { |
4243 | 0 | if (lane_index == 0) { |
4244 | 0 | if (use_vex) |
4245 | 0 | return emit_vex_instruction(compiler, MOVSD_xm_x | EX86_PREF_F3 | EX86_SSE2 | ((srcdst & SLJIT_MEM) ? 0 : VEX_SSE2_OPV), |
4246 | 0 | vreg, ((srcdst & SLJIT_MEM) ? 0 : srcdst), srcdst, srcdstw); |
4247 | 0 | return emit_sse2_store(compiler, 1, srcdst, srcdstw, vreg); |
4248 | 0 | } |
4249 | | |
4250 | 0 | if (srcdst & SLJIT_MEM) { |
4251 | 0 | if (use_vex) |
4252 | 0 | FAIL_IF(emit_vex_instruction(compiler, EXTRACTPS_x_xm | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, vreg, 0, srcdst, srcdstw)); |
4253 | 0 | else |
4254 | 0 | FAIL_IF(emit_groupf_ext(compiler, EXTRACTPS_x_xm | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, vreg, srcdst, srcdstw)); |
4255 | 0 | return emit_byte(compiler, U8(lane_index)); |
4256 | 0 | } |
4257 | | |
4258 | 0 | if (use_vex) { |
4259 | 0 | FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | EX86_SSE2 | VEX_SSE2_OPV, srcdst, vreg, vreg, 0)); |
4260 | 0 | return emit_byte(compiler, U8(lane_index)); |
4261 | 0 | } |
4262 | | |
4263 | 0 | if (srcdst == vreg) |
4264 | 0 | op = SHUFPS_x_xm | EX86_SSE2; |
4265 | 0 | else { |
4266 | 0 | switch (lane_index) { |
4267 | 0 | case 1: |
4268 | 0 | op = MOVSHDUP_x_xm | EX86_PREF_F3 | EX86_SSE2; |
4269 | 0 | break; |
4270 | 0 | case 2: |
4271 | 0 | op = MOVHLPS_x_x | EX86_SSE2; |
4272 | 0 | break; |
4273 | 0 | default: |
4274 | 0 | SLJIT_ASSERT(lane_index == 3); |
4275 | 0 | op = PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2; |
4276 | 0 | break; |
4277 | 0 | } |
4278 | 0 | } |
4279 | | |
4280 | 0 | FAIL_IF(emit_groupf(compiler, op, srcdst, vreg, 0)); |
4281 | | |
4282 | 0 | op &= 0xff; |
4283 | 0 | if (op == SHUFPS_x_xm || op == PSHUFD_x_xm) |
4284 | 0 | return emit_byte(compiler, U8(lane_index)); |
4285 | | |
4286 | 0 | return SLJIT_SUCCESS; |
4287 | 0 | } else { |
4288 | 0 | if (lane_index != 0 || (srcdst & SLJIT_MEM)) { |
4289 | 0 | FAIL_IF(emit_groupf_ext(compiler, INSERTPS_x_xm | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, vreg, srcdst, srcdstw)); |
4290 | 0 | FAIL_IF(emit_byte(compiler, U8(lane_index << 4))); |
4291 | 0 | } else |
4292 | 0 | FAIL_IF(emit_sse2_store(compiler, 1, vreg, 0, srcdst)); |
4293 | 0 | } |
4294 | | |
4295 | 0 | if (vreg != TMP_FREG || (type & SLJIT_SIMD_STORE)) |
4296 | 0 | return SLJIT_SUCCESS; |
4297 | | |
4298 | 0 | SLJIT_ASSERT(reg_size == 5); |
4299 | |
|
4300 | 0 | if (type & SLJIT_SIMD_LANE_ZERO) { |
4301 | 0 | FAIL_IF(emit_vex_instruction(compiler, VPERMPD_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, vreg_orig, 0, TMP_FREG, 0)); |
4302 | 0 | return emit_byte(compiler, 0x4e); |
4303 | 0 | } |
4304 | | |
4305 | 0 | FAIL_IF(emit_vex_instruction(compiler, VINSERTF128_y_y_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2 | VEX_SSE2_OPV, vreg_orig, vreg_orig, TMP_FREG, 0)); |
4306 | 0 | return emit_byte(compiler, 1); |
4307 | 0 | } |
4308 | | |
4309 | 0 | if (srcdst == SLJIT_IMM) { |
4310 | 0 | EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcdstw); |
4311 | 0 | srcdst = TMP_REG1; |
4312 | 0 | srcdstw = 0; |
4313 | 0 | } |
4314 | | |
4315 | 0 | op = 3; |
4316 | |
|
4317 | 0 | switch (elem_size) { |
4318 | 0 | case 0: |
4319 | 0 | opcode = (type & SLJIT_SIMD_STORE) ? PEXTRB_rm_x_i8 : PINSRB_x_rm_i8; |
4320 | 0 | break; |
4321 | 0 | case 1: |
4322 | 0 | if (!(type & SLJIT_SIMD_STORE)) { |
4323 | 0 | op = 2; |
4324 | 0 | opcode = PINSRW_x_rm_i8; |
4325 | 0 | } else |
4326 | 0 | opcode = PEXTRW_rm_x_i8; |
4327 | 0 | break; |
4328 | 0 | case 2: |
4329 | 0 | opcode = (type & SLJIT_SIMD_STORE) ? PEXTRD_rm_x_i8 : PINSRD_x_rm_i8; |
4330 | 0 | break; |
4331 | 0 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
4332 | 0 | case 3: |
4333 | | /* PINSRQ / PEXTRQ */ |
4334 | 0 | opcode = (type & SLJIT_SIMD_STORE) ? PEXTRD_rm_x_i8 : PINSRD_x_rm_i8; |
4335 | 0 | compiler->mode32 = 0; |
4336 | 0 | break; |
4337 | 0 | #endif /* SLJIT_CONFIG_X86_64 */ |
4338 | 0 | } |
4339 | | |
4340 | 0 | if (use_vex && (type & SLJIT_SIMD_STORE)) { |
4341 | 0 | op = opcode | ((op == 3) ? VEX_OP_0F3A : 0); |
4342 | 0 | FAIL_IF(emit_vex_instruction(compiler, op | EX86_PREF_66 | VEX_AUTO_W | EX86_SSE2_OP1 | VEX_SSE2_OPV, vreg, 0, srcdst, srcdstw)); |
4343 | 0 | } else { |
4344 | 0 | inst = emit_x86_instruction(compiler, op | EX86_PREF_66 | EX86_SSE2_OP1, vreg, 0, srcdst, srcdstw); |
4345 | 0 | FAIL_IF(!inst); |
4346 | 0 | inst[0] = GROUP_0F; |
4347 | |
|
4348 | 0 | if (op == 3) { |
4349 | 0 | inst[1] = 0x3a; |
4350 | 0 | inst[2] = opcode; |
4351 | 0 | } else |
4352 | 0 | inst[1] = opcode; |
4353 | 0 | } |
4354 | | |
4355 | 0 | FAIL_IF(emit_byte(compiler, U8(lane_index))); |
4356 | | |
4357 | 0 | if (!(type & SLJIT_SIMD_LANE_SIGNED) || (srcdst & SLJIT_MEM)) { |
4358 | 0 | if (vreg == TMP_FREG && !(type & SLJIT_SIMD_STORE)) { |
4359 | 0 | SLJIT_ASSERT(reg_size == 5); |
4360 | |
|
4361 | 0 | if (type & SLJIT_SIMD_LANE_ZERO) { |
4362 | 0 | FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, vreg_orig, 0, TMP_FREG, 0)); |
4363 | 0 | return emit_byte(compiler, 0x4e); |
4364 | 0 | } |
4365 | | |
4366 | 0 | FAIL_IF(emit_vex_instruction(compiler, VINSERTI128_y_y_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2 | VEX_SSE2_OPV, vreg_orig, vreg_orig, TMP_FREG, 0)); |
4367 | 0 | return emit_byte(compiler, 1); |
4368 | 0 | } |
4369 | | |
4370 | | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
4371 | | if (srcdst_orig & SLJIT_MEM) |
4372 | | return emit_mov(compiler, srcdst_orig, srcdstw_orig, TMP_REG1, 0); |
4373 | | #endif /* SLJIT_CONFIG_X86_32 */ |
4374 | 0 | return SLJIT_SUCCESS; |
4375 | 0 | } |
4376 | | |
4377 | 0 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
4378 | 0 | if (elem_size >= 3) |
4379 | 0 | return SLJIT_SUCCESS; |
4380 | | |
4381 | 0 | compiler->mode32 = (type & SLJIT_32); |
4382 | |
|
4383 | 0 | op = 2; |
4384 | |
|
4385 | 0 | if (elem_size == 0) |
4386 | 0 | op |= EX86_REX; |
4387 | |
|
4388 | 0 | if (elem_size == 2) { |
4389 | 0 | if (type & SLJIT_32) |
4390 | 0 | return SLJIT_SUCCESS; |
4391 | | |
4392 | 0 | SLJIT_ASSERT(!(compiler->mode32)); |
4393 | 0 | op = 1; |
4394 | 0 | } |
4395 | | |
4396 | 0 | inst = emit_x86_instruction(compiler, op, srcdst, 0, srcdst, 0); |
4397 | 0 | FAIL_IF(!inst); |
4398 | | |
4399 | 0 | if (op != 1) { |
4400 | 0 | inst[0] = GROUP_0F; |
4401 | 0 | inst[1] = U8((elem_size == 0) ? MOVSX_r_rm8 : MOVSX_r_rm16); |
4402 | 0 | } else |
4403 | 0 | inst[0] = MOVSXD_r_rm; |
4404 | | #else /* !SLJIT_CONFIG_X86_64 */ |
4405 | | if (elem_size >= 2) |
4406 | | return SLJIT_SUCCESS; |
4407 | | |
4408 | | FAIL_IF(emit_groupf(compiler, (elem_size == 0) ? MOVSX_r_rm8 : MOVSX_r_rm16, |
4409 | | (srcdst_orig != 0 && FAST_IS_REG(srcdst_orig)) ? srcdst_orig : srcdst, srcdst, 0)); |
4410 | | |
4411 | | if (srcdst_orig & SLJIT_MEM) |
4412 | | return emit_mov(compiler, srcdst_orig, srcdstw_orig, TMP_REG1, 0); |
4413 | | #endif /* SLJIT_CONFIG_X86_64 */ |
4414 | 0 | return SLJIT_SUCCESS; |
4415 | 0 | } |
4416 | | |
4417 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type, |
4418 | | sljit_s32 vreg, |
4419 | | sljit_s32 src, sljit_s32 src_lane_index) |
4420 | 77.5k | { |
4421 | 77.5k | sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); |
4422 | 77.5k | sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); |
4423 | 77.5k | sljit_s32 use_vex = (cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX); |
4424 | 77.5k | sljit_uw pref; |
4425 | 77.5k | sljit_u8 byte; |
4426 | | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
4427 | | sljit_s32 opcode3 = TMP_REG1; |
4428 | | #else /* !SLJIT_CONFIG_X86_32 */ |
4429 | 77.5k | sljit_s32 opcode3 = SLJIT_S0; |
4430 | 77.5k | #endif /* SLJIT_CONFIG_X86_32 */ |
4431 | | |
4432 | 77.5k | CHECK_ERROR(); |
4433 | 77.5k | CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, vreg, src, src_lane_index)); |
4434 | | |
4435 | 77.5k | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
4436 | 77.5k | compiler->mode32 = 1; |
4437 | 77.5k | #endif /* SLJIT_CONFIG_X86_64 */ |
4438 | 77.5k | SLJIT_ASSERT(reg_map[opcode3] == 3); |
4439 | | |
4440 | 77.5k | if (reg_size == 5) { |
4441 | 0 | if (!(cpu_feature_list & CPU_FEATURE_AVX2)) |
4442 | 0 | return SLJIT_ERR_UNSUPPORTED; |
4443 | 0 | use_vex = 1; |
4444 | 77.5k | } else if (reg_size != 4) |
4445 | 0 | return SLJIT_ERR_UNSUPPORTED; |
4446 | | |
4447 | 77.5k | if (type & SLJIT_SIMD_FLOAT) { |
4448 | 0 | pref = 0; |
4449 | 0 | byte = U8(src_lane_index); |
4450 | |
|
4451 | 0 | if (elem_size == 3) { |
4452 | 0 | if (type & SLJIT_SIMD_TEST) |
4453 | 0 | return SLJIT_SUCCESS; |
4454 | | |
4455 | 0 | if (reg_size == 5) { |
4456 | 0 | if (src_lane_index == 0) |
4457 | 0 | return emit_vex_instruction(compiler, VBROADCASTSD_x_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, 0, src, 0); |
4458 | | |
4459 | 0 | FAIL_IF(emit_vex_instruction(compiler, VPERMPD_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, vreg, 0, src, 0)); |
4460 | | |
4461 | 0 | byte = U8(byte | (byte << 2)); |
4462 | 0 | return emit_byte(compiler, U8(byte | (byte << 4))); |
4463 | 0 | } |
4464 | | |
4465 | 0 | if (src_lane_index == 0) { |
4466 | 0 | if (use_vex) |
4467 | 0 | return emit_vex_instruction(compiler, MOVDDUP_x_xm | EX86_PREF_F2 | EX86_SSE2, vreg, 0, src, 0); |
4468 | 0 | return emit_groupf(compiler, MOVDDUP_x_xm | EX86_PREF_F2 | EX86_SSE2, vreg, src, 0); |
4469 | 0 | } |
4470 | | |
4471 | | /* Changes it to SHUFPD_x_xm. */ |
4472 | 0 | pref = EX86_PREF_66; |
4473 | 0 | } else if (elem_size != 2) |
4474 | 0 | return SLJIT_ERR_UNSUPPORTED; |
4475 | 0 | else if (type & SLJIT_SIMD_TEST) |
4476 | 0 | return SLJIT_SUCCESS; |
4477 | | |
4478 | 0 | if (reg_size == 5) { |
4479 | 0 | SLJIT_ASSERT(elem_size == 2); |
4480 | |
|
4481 | 0 | if (src_lane_index == 0) |
4482 | 0 | return emit_vex_instruction(compiler, VBROADCASTSS_x_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, 0, src, 0); |
4483 | | |
4484 | 0 | FAIL_IF(emit_vex_instruction(compiler, VPERMPD_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, vreg, 0, src, 0)); |
4485 | | |
4486 | 0 | byte = 0x44; |
4487 | 0 | if (src_lane_index >= 4) { |
4488 | 0 | byte = 0xee; |
4489 | 0 | src_lane_index -= 4; |
4490 | 0 | } |
4491 | |
|
4492 | 0 | FAIL_IF(emit_byte(compiler, byte)); |
4493 | 0 | FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | VEX_256 | pref | EX86_SSE2 | VEX_SSE2_OPV, vreg, vreg, vreg, 0)); |
4494 | 0 | byte = U8(src_lane_index); |
4495 | 0 | } else if (use_vex) { |
4496 | 0 | FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | pref | EX86_SSE2 | VEX_SSE2_OPV, vreg, src, src, 0)); |
4497 | 0 | } else { |
4498 | 0 | if (vreg != src) |
4499 | 0 | FAIL_IF(emit_groupf(compiler, MOVAPS_x_xm | pref | EX86_SSE2, vreg, src, 0)); |
4500 | | |
4501 | 0 | FAIL_IF(emit_groupf(compiler, SHUFPS_x_xm | pref | EX86_SSE2, vreg, vreg, 0)); |
4502 | 0 | } |
4503 | | |
4504 | 0 | if (elem_size == 2) { |
4505 | 0 | byte = U8(byte | (byte << 2)); |
4506 | 0 | byte = U8(byte | (byte << 4)); |
4507 | 0 | } else |
4508 | 0 | byte = U8(byte | (byte << 1)); |
4509 | |
|
4510 | 0 | return emit_byte(compiler, U8(byte)); |
4511 | 0 | } |
4512 | | |
4513 | 77.5k | if (type & SLJIT_SIMD_TEST) |
4514 | 0 | return SLJIT_SUCCESS; |
4515 | | |
4516 | 77.5k | if (elem_size == 0) { |
4517 | 0 | if (reg_size == 5 && src_lane_index >= 16) { |
4518 | 0 | FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, vreg, 0, src, 0)); |
4519 | 0 | FAIL_IF(emit_byte(compiler, src_lane_index >= 24 ? 0xff : 0xaa)); |
4520 | 0 | src_lane_index &= 0x7; |
4521 | 0 | src = vreg; |
4522 | 0 | } |
4523 | | |
4524 | 0 | if (src_lane_index != 0 || (vreg != src && (!(cpu_feature_list & CPU_FEATURE_AVX2) || !use_vex))) { |
4525 | 0 | pref = 0; |
4526 | |
|
4527 | 0 | if ((src_lane_index & 0x3) == 0) { |
4528 | 0 | pref = EX86_PREF_66; |
4529 | 0 | byte = U8(src_lane_index >> 2); |
4530 | 0 | } else if (src_lane_index < 8 && (src_lane_index & 0x1) == 0) { |
4531 | 0 | pref = EX86_PREF_F2; |
4532 | 0 | byte = U8(src_lane_index >> 1); |
4533 | 0 | } else { |
4534 | 0 | if (!use_vex) { |
4535 | 0 | if (vreg != src) |
4536 | 0 | FAIL_IF(emit_groupf(compiler, MOVDQA_x_xm | EX86_PREF_66 | EX86_SSE2, vreg, src, 0)); |
4537 | | |
4538 | 0 | FAIL_IF(emit_groupf(compiler, PSRLDQ_x | EX86_PREF_66 | EX86_SSE2_OP2, opcode3, vreg, 0)); |
4539 | 0 | } else |
4540 | 0 | FAIL_IF(emit_vex_instruction(compiler, PSRLDQ_x | EX86_PREF_66 | EX86_SSE2_OP2 | VEX_SSE2_OPV, opcode3, vreg, src, 0)); |
4541 | | |
4542 | 0 | FAIL_IF(emit_byte(compiler, U8(src_lane_index))); |
4543 | 0 | } |
4544 | | |
4545 | 0 | if (pref != 0) { |
4546 | 0 | if (use_vex) |
4547 | 0 | FAIL_IF(emit_vex_instruction(compiler, PSHUFLW_x_xm | pref | EX86_SSE2, vreg, 0, src, 0)); |
4548 | 0 | else |
4549 | 0 | FAIL_IF(emit_groupf(compiler, PSHUFLW_x_xm | pref | EX86_SSE2, vreg, src, 0)); |
4550 | 0 | FAIL_IF(emit_byte(compiler, byte)); |
4551 | 0 | } |
4552 | | |
4553 | 0 | src = vreg; |
4554 | 0 | } |
4555 | | |
4556 | 0 | if (use_vex && (cpu_feature_list & CPU_FEATURE_AVX2)) |
4557 | 0 | return emit_vex_instruction(compiler, VPBROADCASTB_x_xm | (reg_size == 5 ? VEX_256 : 0) | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, 0, src, 0); |
4558 | | |
4559 | 0 | SLJIT_ASSERT(reg_size == 4); |
4560 | 0 | FAIL_IF(emit_groupf(compiler, PXOR_x_xm | EX86_PREF_66 | EX86_SSE2, TMP_FREG, TMP_FREG, 0)); |
4561 | 0 | return emit_groupf_ext(compiler, PSHUFB_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, TMP_FREG, 0); |
4562 | 0 | } |
4563 | | |
4564 | 77.5k | if ((cpu_feature_list & CPU_FEATURE_AVX2) && use_vex && src_lane_index == 0 && elem_size <= 3) { |
4565 | 0 | switch (elem_size) { |
4566 | 0 | case 1: |
4567 | 0 | pref = VPBROADCASTW_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2; |
4568 | 0 | break; |
4569 | 0 | case 2: |
4570 | 0 | pref = VPBROADCASTD_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2; |
4571 | 0 | break; |
4572 | 0 | default: |
4573 | 0 | pref = VPBROADCASTQ_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2; |
4574 | 0 | break; |
4575 | 0 | } |
4576 | | |
4577 | 0 | if (reg_size == 5) |
4578 | 0 | pref |= VEX_256; |
4579 | |
|
4580 | 0 | return emit_vex_instruction(compiler, pref, vreg, 0, src, 0); |
4581 | 0 | } |
4582 | | |
4583 | 77.5k | if (reg_size == 5) { |
4584 | 0 | switch (elem_size) { |
4585 | 0 | case 1: |
4586 | 0 | byte = U8(src_lane_index & 0x3); |
4587 | 0 | src_lane_index >>= 2; |
4588 | 0 | pref = PSHUFLW_x_xm | VEX_256 | ((src_lane_index & 1) == 0 ? EX86_PREF_F2 : EX86_PREF_F3) | EX86_SSE2; |
4589 | 0 | break; |
4590 | 0 | case 2: |
4591 | 0 | byte = U8(src_lane_index & 0x3); |
4592 | 0 | src_lane_index >>= 1; |
4593 | 0 | pref = PSHUFD_x_xm | VEX_256 | EX86_PREF_66 | EX86_SSE2; |
4594 | 0 | break; |
4595 | 0 | case 3: |
4596 | 0 | pref = 0; |
4597 | 0 | break; |
4598 | 0 | default: |
4599 | 0 | FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, vreg, 0, src, 0)); |
4600 | 0 | return emit_byte(compiler, U8(src_lane_index == 0 ? 0x44 : 0xee)); |
4601 | 0 | } |
4602 | | |
4603 | 0 | if (pref != 0) { |
4604 | 0 | FAIL_IF(emit_vex_instruction(compiler, pref, vreg, 0, src, 0)); |
4605 | 0 | byte = U8(byte | (byte << 2)); |
4606 | 0 | FAIL_IF(emit_byte(compiler, U8(byte | (byte << 4)))); |
4607 | | |
4608 | 0 | if (src_lane_index == 0) |
4609 | 0 | return emit_vex_instruction(compiler, VPBROADCASTQ_x_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, 0, vreg, 0); |
4610 | | |
4611 | 0 | src = vreg; |
4612 | 0 | } |
4613 | | |
4614 | 0 | FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, vreg, 0, src, 0)); |
4615 | 0 | byte = U8(src_lane_index); |
4616 | 0 | byte = U8(byte | (byte << 2)); |
4617 | 0 | return emit_byte(compiler, U8(byte | (byte << 4))); |
4618 | 0 | } |
4619 | | |
4620 | 77.5k | switch (elem_size) { |
4621 | 0 | case 1: |
4622 | 0 | byte = U8(src_lane_index & 0x3); |
4623 | 0 | src_lane_index >>= 1; |
4624 | 0 | pref = (src_lane_index & 2) == 0 ? EX86_PREF_F2 : EX86_PREF_F3; |
4625 | |
|
4626 | 0 | if (use_vex) |
4627 | 0 | FAIL_IF(emit_vex_instruction(compiler, PSHUFLW_x_xm | pref | EX86_SSE2, vreg, 0, src, 0)); |
4628 | 0 | else |
4629 | 0 | FAIL_IF(emit_groupf(compiler, PSHUFLW_x_xm | pref | EX86_SSE2, vreg, src, 0)); |
4630 | 0 | byte = U8(byte | (byte << 2)); |
4631 | 0 | FAIL_IF(emit_byte(compiler, U8(byte | (byte << 4)))); |
4632 | | |
4633 | 0 | if ((cpu_feature_list & CPU_FEATURE_AVX2) && use_vex && pref == EX86_PREF_F2) |
4634 | 0 | return emit_vex_instruction(compiler, VPBROADCASTD_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, 0, vreg, 0); |
4635 | | |
4636 | 0 | src = vreg; |
4637 | | /* fallthrough */ |
4638 | 77.5k | case 2: |
4639 | 77.5k | byte = U8(src_lane_index); |
4640 | 77.5k | byte = U8(byte | (byte << 2)); |
4641 | 77.5k | break; |
4642 | 0 | default: |
4643 | 0 | byte = U8(src_lane_index << 1); |
4644 | 0 | byte = U8(byte | (byte << 2) | 0x4); |
4645 | 0 | break; |
4646 | 77.5k | } |
4647 | | |
4648 | 77.5k | if (use_vex) |
4649 | 0 | FAIL_IF(emit_vex_instruction(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, vreg, 0, src, 0)); |
4650 | 77.5k | else |
4651 | 77.5k | FAIL_IF(emit_groupf(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, vreg, src, 0)); |
4652 | 77.5k | return emit_byte(compiler, U8(byte | (byte << 4))); |
4653 | 77.5k | } |
4654 | | |
4655 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type, |
4656 | | sljit_s32 vreg, |
4657 | | sljit_s32 src, sljit_sw srcw) |
4658 | 0 | { |
4659 | 0 | sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); |
4660 | 0 | sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); |
4661 | 0 | sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type); |
4662 | 0 | sljit_s32 use_vex = (cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX); |
4663 | 0 | sljit_u8 opcode; |
4664 | 0 |
|
4665 | 0 | CHECK_ERROR(); |
4666 | 0 | CHECK(check_sljit_emit_simd_extend(compiler, type, vreg, src, srcw)); |
4667 | 0 |
|
4668 | 0 | ADJUST_LOCAL_OFFSET(src, srcw); |
4669 | 0 |
|
4670 | 0 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
4671 | 0 | compiler->mode32 = 1; |
4672 | 0 | #endif /* SLJIT_CONFIG_X86_64 */ |
4673 | 0 |
|
4674 | 0 | if (reg_size == 5) { |
4675 | 0 | if (!(cpu_feature_list & CPU_FEATURE_AVX2)) |
4676 | 0 | return SLJIT_ERR_UNSUPPORTED; |
4677 | 0 | use_vex = 1; |
4678 | 0 | } else if (reg_size != 4) |
4679 | 0 | return SLJIT_ERR_UNSUPPORTED; |
4680 | 0 |
|
4681 | 0 | if (type & SLJIT_SIMD_FLOAT) { |
4682 | 0 | if (elem_size != 2 || elem2_size != 3) |
4683 | 0 | return SLJIT_ERR_UNSUPPORTED; |
4684 | 0 |
|
4685 | 0 | if (type & SLJIT_SIMD_TEST) |
4686 | 0 | return SLJIT_SUCCESS; |
4687 | 0 |
|
4688 | 0 | if (use_vex) |
4689 | 0 | return emit_vex_instruction(compiler, CVTPS2PD_x_xm | ((reg_size == 5) ? VEX_256 : 0) | EX86_SSE2, vreg, 0, src, srcw); |
4690 | 0 | return emit_groupf(compiler, CVTPS2PD_x_xm | EX86_SSE2, vreg, src, srcw); |
4691 | 0 | } |
4692 | 0 |
|
4693 | 0 | switch (elem_size) { |
4694 | 0 | case 0: |
4695 | 0 | if (elem2_size == 1) |
4696 | 0 | opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXBW_x_xm : PMOVZXBW_x_xm; |
4697 | 0 | else if (elem2_size == 2) |
4698 | 0 | opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXBD_x_xm : PMOVZXBD_x_xm; |
4699 | 0 | else if (elem2_size == 3) |
4700 | 0 | opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXBQ_x_xm : PMOVZXBQ_x_xm; |
4701 | 0 | else |
4702 | 0 | return SLJIT_ERR_UNSUPPORTED; |
4703 | 0 | break; |
4704 | 0 | case 1: |
4705 | 0 | if (elem2_size == 2) |
4706 | 0 | opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXWD_x_xm : PMOVZXWD_x_xm; |
4707 | 0 | else if (elem2_size == 3) |
4708 | 0 | opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXWQ_x_xm : PMOVZXWQ_x_xm; |
4709 | 0 | else |
4710 | 0 | return SLJIT_ERR_UNSUPPORTED; |
4711 | 0 | break; |
4712 | 0 | case 2: |
4713 | 0 | if (elem2_size == 3) |
4714 | 0 | opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXDQ_x_xm : PMOVZXDQ_x_xm; |
4715 | 0 | else |
4716 | 0 | return SLJIT_ERR_UNSUPPORTED; |
4717 | 0 | break; |
4718 | 0 | default: |
4719 | 0 | return SLJIT_ERR_UNSUPPORTED; |
4720 | 0 | } |
4721 | 0 |
|
4722 | 0 | if (type & SLJIT_SIMD_TEST) |
4723 | 0 | return SLJIT_SUCCESS; |
4724 | 0 |
|
4725 | 0 | if (use_vex) |
4726 | 0 | return emit_vex_instruction(compiler, opcode | ((reg_size == 5) ? VEX_256 : 0) | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, 0, src, srcw); |
4727 | 0 | return emit_groupf_ext(compiler, opcode | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, src, srcw); |
4728 | 0 | } |
4729 | | |
4730 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type, |
4731 | | sljit_s32 vreg, |
4732 | | sljit_s32 dst, sljit_sw dstw) |
4733 | 118k | { |
4734 | 118k | sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); |
4735 | 118k | sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); |
4736 | 118k | sljit_s32 use_vex = (cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX); |
4737 | 118k | sljit_s32 dst_r; |
4738 | 118k | sljit_uw op; |
4739 | 118k | sljit_u8 *inst; |
4740 | | |
4741 | 118k | CHECK_ERROR(); |
4742 | 118k | CHECK(check_sljit_emit_simd_sign(compiler, type, vreg, dst, dstw)); |
4743 | | |
4744 | 118k | ADJUST_LOCAL_OFFSET(dst, dstw); |
4745 | | |
4746 | 118k | CHECK_EXTRA_REGS(dst, dstw, (void)0); |
4747 | 118k | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
4748 | 118k | compiler->mode32 = 1; |
4749 | 118k | #endif /* SLJIT_CONFIG_X86_64 */ |
4750 | | |
4751 | 118k | if (elem_size > 3 || ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)) |
4752 | 0 | return SLJIT_ERR_UNSUPPORTED; |
4753 | | |
4754 | 118k | if (reg_size == 4) { |
4755 | 118k | if (type & SLJIT_SIMD_TEST) |
4756 | 0 | return SLJIT_SUCCESS; |
4757 | | |
4758 | 118k | op = EX86_PREF_66 | EX86_SSE2_OP2; |
4759 | | |
4760 | 118k | switch (elem_size) { |
4761 | 0 | case 1: |
4762 | 0 | if (use_vex) |
4763 | 0 | FAIL_IF(emit_vex_instruction(compiler, PACKSSWB_x_xm | EX86_PREF_66 | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, vreg, vreg, 0)); |
4764 | 0 | else |
4765 | 0 | FAIL_IF(emit_groupf(compiler, PACKSSWB_x_xm | EX86_PREF_66 | EX86_SSE2, TMP_FREG, vreg, 0)); |
4766 | 0 | vreg = TMP_FREG; |
4767 | 0 | break; |
4768 | 0 | case 2: |
4769 | 0 | op = EX86_SSE2_OP2; |
4770 | 0 | break; |
4771 | 118k | } |
4772 | | |
4773 | 118k | dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; |
4774 | 118k | op |= (elem_size < 2) ? PMOVMSKB_r_x : MOVMSKPS_r_x; |
4775 | | |
4776 | 118k | if (use_vex) |
4777 | 0 | FAIL_IF(emit_vex_instruction(compiler, op, dst_r, 0, vreg, 0)); |
4778 | 118k | else |
4779 | 118k | FAIL_IF(emit_groupf(compiler, op, dst_r, vreg, 0)); |
4780 | | |
4781 | 118k | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
4782 | 118k | compiler->mode32 = type & SLJIT_32; |
4783 | 118k | #endif /* SLJIT_CONFIG_X86_64 */ |
4784 | | |
4785 | 118k | if (elem_size == 1) { |
4786 | 0 | inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 8, dst_r, 0); |
4787 | 0 | FAIL_IF(!inst); |
4788 | 0 | inst[1] |= SHR; |
4789 | 0 | } |
4790 | | |
4791 | 118k | if (dst_r == TMP_REG1) |
4792 | 0 | return emit_mov(compiler, dst, dstw, TMP_REG1, 0); |
4793 | | |
4794 | 118k | return SLJIT_SUCCESS; |
4795 | 118k | } |
4796 | | |
4797 | 0 | if (reg_size != 5 || !(cpu_feature_list & CPU_FEATURE_AVX2)) |
4798 | 0 | return SLJIT_ERR_UNSUPPORTED; |
4799 | | |
4800 | 0 | if (type & SLJIT_SIMD_TEST) |
4801 | 0 | return SLJIT_SUCCESS; |
4802 | | |
4803 | 0 | dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; |
4804 | |
|
4805 | 0 | if (elem_size == 1) { |
4806 | 0 | FAIL_IF(emit_vex_instruction(compiler, VEXTRACTI128_x_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, vreg, 0, TMP_FREG, 0)); |
4807 | 0 | FAIL_IF(emit_byte(compiler, 1)); |
4808 | 0 | FAIL_IF(emit_vex_instruction(compiler, PACKSSWB_x_xm | VEX_256 | EX86_PREF_66 | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, vreg, TMP_FREG, 0)); |
4809 | 0 | FAIL_IF(emit_groupf(compiler, PMOVMSKB_r_x | EX86_PREF_66 | EX86_SSE2_OP2, dst_r, TMP_FREG, 0)); |
4810 | 0 | } else { |
4811 | 0 | op = MOVMSKPS_r_x | VEX_256 | EX86_SSE2_OP2; |
4812 | |
|
4813 | 0 | if (elem_size == 0) |
4814 | 0 | op = PMOVMSKB_r_x | VEX_256 | EX86_PREF_66 | EX86_SSE2_OP2; |
4815 | 0 | else if (elem_size == 3) |
4816 | 0 | op |= EX86_PREF_66; |
4817 | |
|
4818 | 0 | FAIL_IF(emit_vex_instruction(compiler, op, dst_r, 0, vreg, 0)); |
4819 | 0 | } |
4820 | | |
4821 | 0 | if (dst_r == TMP_REG1) { |
4822 | 0 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
4823 | 0 | compiler->mode32 = type & SLJIT_32; |
4824 | 0 | #endif /* SLJIT_CONFIG_X86_64 */ |
4825 | 0 | return emit_mov(compiler, dst, dstw, TMP_REG1, 0); |
4826 | 0 | } |
4827 | | |
4828 | 0 | return SLJIT_SUCCESS; |
4829 | 0 | } |
4830 | | |
4831 | | static sljit_s32 emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type, |
4832 | | sljit_s32 dst_vreg, sljit_s32 src_vreg) |
4833 | 0 | { |
4834 | 0 | sljit_uw op = ((type & SLJIT_SIMD_FLOAT) ? MOVAPS_x_xm : MOVDQA_x_xm) | EX86_SSE2; |
4835 | |
|
4836 | 0 | SLJIT_ASSERT(SLJIT_SIMD_GET_REG_SIZE(type) == 4); |
4837 | |
|
4838 | 0 | if (!(type & SLJIT_SIMD_FLOAT) || SLJIT_SIMD_GET_ELEM_SIZE(type) == 3) |
4839 | 0 | op |= EX86_PREF_66; |
4840 | |
|
4841 | 0 | return emit_groupf(compiler, op, dst_vreg, src_vreg, 0); |
4842 | 0 | } |
4843 | | |
4844 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type, |
4845 | | sljit_s32 dst_vreg, sljit_s32 src1_vreg, sljit_s32 src2, sljit_sw src2w) |
4846 | 28.9k | { |
4847 | 28.9k | sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); |
4848 | 28.9k | sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); |
4849 | 28.9k | sljit_s32 use_vex = (cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX); |
4850 | 28.9k | sljit_uw op = 0; |
4851 | 28.9k | sljit_uw mov_op = 0; |
4852 | | |
4853 | 28.9k | CHECK_ERROR(); |
4854 | 28.9k | CHECK(check_sljit_emit_simd_op2(compiler, type, dst_vreg, src1_vreg, src2, src2w)); |
4855 | 28.9k | ADJUST_LOCAL_OFFSET(src2, src2w); |
4856 | | |
4857 | 28.9k | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
4858 | 28.9k | compiler->mode32 = 1; |
4859 | 28.9k | #endif /* SLJIT_CONFIG_X86_64 */ |
4860 | | |
4861 | 28.9k | if (reg_size == 5) { |
4862 | 0 | if (!(cpu_feature_list & CPU_FEATURE_AVX2)) |
4863 | 0 | return SLJIT_ERR_UNSUPPORTED; |
4864 | 28.9k | } else if (reg_size != 4) |
4865 | 0 | return SLJIT_ERR_UNSUPPORTED; |
4866 | | |
4867 | 28.9k | if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3)) |
4868 | 0 | return SLJIT_ERR_UNSUPPORTED; |
4869 | | |
4870 | 28.9k | switch (SLJIT_SIMD_GET_OPCODE(type)) { |
4871 | 28.9k | case SLJIT_SIMD_OP2_AND: |
4872 | 28.9k | op = (type & SLJIT_SIMD_FLOAT) ? ANDPD_x_xm : PAND_x_xm; |
4873 | | |
4874 | 28.9k | if (!(type & SLJIT_SIMD_FLOAT) || elem_size == 3) |
4875 | 28.9k | op |= EX86_PREF_66; |
4876 | 28.9k | break; |
4877 | 0 | case SLJIT_SIMD_OP2_OR: |
4878 | 0 | op = (type & SLJIT_SIMD_FLOAT) ? ORPD_x_xm : POR_x_xm; |
4879 | |
|
4880 | 0 | if (!(type & SLJIT_SIMD_FLOAT) || elem_size == 3) |
4881 | 0 | op |= EX86_PREF_66; |
4882 | 0 | break; |
4883 | 0 | case SLJIT_SIMD_OP2_XOR: |
4884 | 0 | op = (type & SLJIT_SIMD_FLOAT) ? XORPD_x_xm : PXOR_x_xm; |
4885 | |
|
4886 | 0 | if (!(type & SLJIT_SIMD_FLOAT) || elem_size == 3) |
4887 | 0 | op |= EX86_PREF_66; |
4888 | 0 | break; |
4889 | | |
4890 | 0 | case SLJIT_SIMD_OP2_SHUFFLE: |
4891 | 0 | if (reg_size != 4) |
4892 | 0 | return SLJIT_ERR_UNSUPPORTED; |
4893 | | |
4894 | 0 | op = PSHUFB_x_xm | EX86_PREF_66 | VEX_OP_0F38; |
4895 | 0 | break; |
4896 | 28.9k | } |
4897 | | |
4898 | 28.9k | if (type & SLJIT_SIMD_TEST) |
4899 | 0 | return SLJIT_SUCCESS; |
4900 | | |
4901 | 28.9k | if ((src2 & SLJIT_MEM) && SLJIT_SIMD_GET_ELEM2_SIZE(type) < reg_size) { |
4902 | 0 | mov_op = ((type & SLJIT_SIMD_FLOAT) ? (MOVUPS_x_xm | (elem_size == 3 ? EX86_PREF_66 : 0)) : (MOVDQU_x_xm | EX86_PREF_F3)) | EX86_SSE2; |
4903 | 0 | if (use_vex) |
4904 | 0 | FAIL_IF(emit_vex_instruction(compiler, mov_op, TMP_FREG, 0, src2, src2w)); |
4905 | 0 | else |
4906 | 0 | FAIL_IF(emit_groupf(compiler, mov_op, TMP_FREG, src2, src2w)); |
4907 | | |
4908 | 0 | src2 = TMP_FREG; |
4909 | 0 | src2w = 0; |
4910 | 0 | } |
4911 | | |
4912 | 28.9k | if (reg_size == 5 || use_vex) { |
4913 | 0 | if (reg_size == 5) |
4914 | 0 | op |= VEX_256; |
4915 | |
|
4916 | 0 | return emit_vex_instruction(compiler, op | EX86_SSE2 | VEX_SSE2_OPV, dst_vreg, src1_vreg, src2, src2w); |
4917 | 0 | } |
4918 | | |
4919 | 28.9k | if (dst_vreg != src1_vreg) { |
4920 | 0 | if (dst_vreg == src2) { |
4921 | 0 | if (SLJIT_SIMD_GET_OPCODE(type) == SLJIT_SIMD_OP2_SHUFFLE) { |
4922 | 0 | FAIL_IF(emit_simd_mov(compiler, type, TMP_FREG, src2)); |
4923 | 0 | FAIL_IF(emit_simd_mov(compiler, type, dst_vreg, src1_vreg)); |
4924 | 0 | src2 = TMP_FREG; |
4925 | 0 | src2w = 0; |
4926 | 0 | } else |
4927 | 0 | src2 = src1_vreg; |
4928 | 0 | } else |
4929 | 0 | FAIL_IF(emit_simd_mov(compiler, type, dst_vreg, src1_vreg)); |
4930 | 0 | } |
4931 | | |
4932 | 28.9k | if (op & (VEX_OP_0F38 | VEX_OP_0F3A)) |
4933 | 0 | return emit_groupf_ext(compiler, op | EX86_SSE2, dst_vreg, src2, src2w); |
4934 | 28.9k | return emit_groupf(compiler, op | EX86_SSE2, dst_vreg, src2, src2w); |
4935 | 28.9k | } |
4936 | | |
4937 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op, |
4938 | | sljit_s32 dst_reg, |
4939 | | sljit_s32 mem_reg) |
4940 | 0 | { |
4941 | 0 | CHECK_ERROR(); |
4942 | 0 | CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg)); |
4943 | 0 |
|
4944 | 0 | if ((op & SLJIT_ATOMIC_USE_LS) || GET_OPCODE(op) == SLJIT_MOV_S8 || GET_OPCODE(op) == SLJIT_MOV_S16 || GET_OPCODE(op) == SLJIT_MOV_S32) |
4945 | 0 | return SLJIT_ERR_UNSUPPORTED; |
4946 | 0 |
|
4947 | 0 | if (op & SLJIT_ATOMIC_TEST) |
4948 | 0 | return SLJIT_SUCCESS; |
4949 | 0 |
|
4950 | 0 | SLJIT_SKIP_CHECKS(compiler); |
4951 | 0 | return sljit_emit_op1(compiler, op & ~SLJIT_ATOMIC_USE_CAS, dst_reg, 0, SLJIT_MEM1(mem_reg), 0); |
4952 | 0 | } |
4953 | | |
4954 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op, |
4955 | | sljit_s32 src_reg, |
4956 | | sljit_s32 mem_reg, |
4957 | | sljit_s32 temp_reg) |
4958 | 0 | { |
4959 | 0 | sljit_uw pref; |
4960 | 0 | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
4961 | 0 | sljit_s32 saved_reg = TMP_REG1; |
4962 | 0 | sljit_s32 swap_tmp = 0; |
4963 | 0 | sljit_sw srcw = 0; |
4964 | 0 | sljit_sw tempw = 0; |
4965 | 0 | #endif /* SLJIT_CONFIG_X86_32 */ |
4966 | 0 |
|
4967 | 0 | CHECK_ERROR(); |
4968 | 0 | CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg)); |
4969 | 0 | CHECK_EXTRA_REGS(src_reg, srcw, (void)0); |
4970 | 0 | CHECK_EXTRA_REGS(temp_reg, tempw, (void)0); |
4971 | 0 |
|
4972 | 0 | SLJIT_ASSERT(FAST_IS_REG(src_reg) || src_reg == SLJIT_MEM1(SLJIT_SP)); |
4973 | 0 | SLJIT_ASSERT(FAST_IS_REG(temp_reg) || temp_reg == SLJIT_MEM1(SLJIT_SP)); |
4974 | 0 |
|
4975 | 0 | if ((op & SLJIT_ATOMIC_USE_LS) || GET_OPCODE(op) == SLJIT_MOV_S8 || GET_OPCODE(op) == SLJIT_MOV_S16 || GET_OPCODE(op) == SLJIT_MOV_S32) |
4976 | 0 | return SLJIT_ERR_UNSUPPORTED; |
4977 | 0 |
|
4978 | 0 | if (op & SLJIT_ATOMIC_TEST) |
4979 | 0 | return SLJIT_SUCCESS; |
4980 | 0 |
|
4981 | 0 | op = GET_OPCODE(op); |
4982 | 0 |
|
4983 | 0 | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
4984 | 0 | if (temp_reg == SLJIT_TMP_DEST_REG) { |
4985 | 0 | FAIL_IF(emit_byte(compiler, XCHG_EAX_r | reg_map[TMP_REG1])); |
4986 | 0 |
|
4987 | 0 | if (src_reg == SLJIT_R0) |
4988 | 0 | src_reg = TMP_REG1; |
4989 | 0 | if (mem_reg == SLJIT_R0) |
4990 | 0 | mem_reg = TMP_REG1; |
4991 | 0 |
|
4992 | 0 | temp_reg = SLJIT_R0; |
4993 | 0 | swap_tmp = 1; |
4994 | 0 | } |
4995 | 0 |
|
4996 | 0 | /* Src is virtual register or its low byte is not accessible. */ |
4997 | 0 | if ((src_reg & SLJIT_MEM) || (op == SLJIT_MOV_U8 && reg_map[src_reg] >= 4)) { |
4998 | 0 | SLJIT_ASSERT(src_reg != SLJIT_R1 && temp_reg != SLJIT_TMP_DEST_REG); |
4999 | 0 |
|
5000 | 0 | if (swap_tmp) { |
5001 | 0 | saved_reg = (mem_reg != SLJIT_R1) ? SLJIT_R1 : SLJIT_R2; |
5002 | 0 |
|
5003 | 0 | EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, saved_reg, 0); |
5004 | 0 | EMIT_MOV(compiler, saved_reg, 0, src_reg, srcw); |
5005 | 0 | } else |
5006 | 0 | EMIT_MOV(compiler, TMP_REG1, 0, src_reg, srcw); |
5007 | 0 |
|
5008 | 0 | src_reg = saved_reg; |
5009 | 0 |
|
5010 | 0 | if (mem_reg == src_reg) |
5011 | 0 | mem_reg = saved_reg; |
5012 | 0 | } |
5013 | 0 | #endif /* SLJIT_CONFIG_X86_32 */ |
5014 | 0 |
|
5015 | 0 | if (temp_reg != SLJIT_R0) { |
5016 | 0 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
5017 | 0 | compiler->mode32 = 0; |
5018 | 0 |
|
5019 | 0 | EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_R0, 0); |
5020 | 0 | EMIT_MOV(compiler, SLJIT_R0, 0, temp_reg, 0); |
5021 | 0 |
|
5022 | 0 | if (src_reg == SLJIT_R0) |
5023 | 0 | src_reg = TMP_REG2; |
5024 | 0 | if (mem_reg == SLJIT_R0) |
5025 | 0 | mem_reg = TMP_REG2; |
5026 | 0 | #else /* !SLJIT_CONFIG_X86_64 */ |
5027 | 0 | SLJIT_ASSERT(!swap_tmp); |
5028 | 0 |
|
5029 | 0 | if (src_reg == TMP_REG1) { |
5030 | 0 | if (mem_reg == SLJIT_R0) { |
5031 | 0 | EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_R1, 0); |
5032 | 0 | EMIT_MOV(compiler, SLJIT_R1, 0, SLJIT_R0, 0); |
5033 | 0 | EMIT_MOV(compiler, SLJIT_R0, 0, temp_reg, tempw); |
5034 | 0 |
|
5035 | 0 | mem_reg = SLJIT_R1; |
5036 | 0 | saved_reg = SLJIT_R1; |
5037 | 0 | } else { |
5038 | 0 | EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_R0, 0); |
5039 | 0 | EMIT_MOV(compiler, SLJIT_R0, 0, temp_reg, tempw); |
5040 | 0 | saved_reg = SLJIT_R0; |
5041 | 0 | } |
5042 | 0 | } else { |
5043 | 0 | EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R0, 0); |
5044 | 0 | EMIT_MOV(compiler, SLJIT_R0, 0, temp_reg, tempw); |
5045 | 0 |
|
5046 | 0 | if (src_reg == SLJIT_R0) |
5047 | 0 | src_reg = TMP_REG1; |
5048 | 0 | if (mem_reg == SLJIT_R0) |
5049 | 0 | mem_reg = TMP_REG1; |
5050 | 0 | } |
5051 | 0 | #endif /* SLJIT_CONFIG_X86_64 */ |
5052 | 0 | } |
5053 | 0 |
|
5054 | 0 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
5055 | 0 | compiler->mode32 = op != SLJIT_MOV && op != SLJIT_MOV_P; |
5056 | 0 | #endif /* SLJIT_CONFIG_X86_64 */ |
5057 | 0 |
|
5058 | 0 | /* Lock prefix. */ |
5059 | 0 | FAIL_IF(emit_byte(compiler, GROUP_LOCK)); |
5060 | 0 |
|
5061 | 0 | pref = 0; |
5062 | 0 | if (op == SLJIT_MOV_U16) |
5063 | 0 | pref = EX86_HALF_ARG | EX86_PREF_66; |
5064 | 0 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
5065 | 0 | if (op == SLJIT_MOV_U8) |
5066 | 0 | pref = EX86_REX; |
5067 | 0 | #endif /* SLJIT_CONFIG_X86_64 */ |
5068 | 0 |
|
5069 | 0 | FAIL_IF(emit_groupf(compiler, (op == SLJIT_MOV_U8 ? CMPXCHG_rm8_r : CMPXCHG_rm_r) | pref, src_reg, SLJIT_MEM1(mem_reg), 0)); |
5070 | 0 |
|
5071 | 0 | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
5072 | 0 | if (swap_tmp) { |
5073 | 0 | SLJIT_ASSERT(temp_reg == SLJIT_R0); |
5074 | 0 | FAIL_IF(emit_byte(compiler, XCHG_EAX_r | reg_map[TMP_REG1])); |
5075 | 0 |
|
5076 | 0 | if (saved_reg != TMP_REG1) |
5077 | 0 | return emit_mov(compiler, saved_reg, 0, SLJIT_MEM1(SLJIT_SP), 0); |
5078 | 0 | return SLJIT_SUCCESS; |
5079 | 0 | } |
5080 | 0 | #endif /* SLJIT_CONFIG_X86_32 */ |
5081 | 0 |
|
5082 | 0 | if (temp_reg != SLJIT_R0) { |
5083 | 0 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
5084 | 0 | compiler->mode32 = 0; |
5085 | 0 | return emit_mov(compiler, SLJIT_R0, 0, TMP_REG2, 0); |
5086 | 0 | #else /* !SLJIT_CONFIG_X86_64 */ |
5087 | 0 | EMIT_MOV(compiler, SLJIT_R0, 0, (saved_reg == SLJIT_R0) ? SLJIT_MEM1(SLJIT_SP) : saved_reg, 0); |
5088 | 0 | if (saved_reg == SLJIT_R1) |
5089 | 0 | return emit_mov(compiler, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_SP), 0); |
5090 | 0 | #endif /* SLJIT_CONFIG_X86_64 */ |
5091 | 0 | } |
5092 | 0 | return SLJIT_SUCCESS; |
5093 | 0 | } |
5094 | | |
5095 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset) |
5096 | 8.07M | { |
5097 | 8.07M | CHECK_ERROR(); |
5098 | 8.07M | CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset)); |
5099 | 8.07M | ADJUST_LOCAL_OFFSET(dst, dstw); |
5100 | 8.07M | ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset); |
5101 | | |
5102 | 8.07M | CHECK_EXTRA_REGS(dst, dstw, (void)0); |
5103 | | |
5104 | 8.07M | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
5105 | 8.07M | compiler->mode32 = 0; |
5106 | 8.07M | #endif |
5107 | | |
5108 | 8.07M | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
5109 | 8.07M | if (NOT_HALFWORD(offset)) { |
5110 | 0 | FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset)); |
5111 | | #if (defined SLJIT_DEBUG && SLJIT_DEBUG) |
5112 | | SLJIT_ASSERT(emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED); |
5113 | | return compiler->error; |
5114 | | #else |
5115 | 0 | return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0); |
5116 | 0 | #endif |
5117 | 0 | } |
5118 | 8.07M | #endif |
5119 | | |
5120 | 8.07M | if (offset != 0) |
5121 | 8.06M | return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset); |
5122 | 15.4k | return emit_mov(compiler, dst, dstw, SLJIT_SP, 0); |
5123 | 8.07M | } |
5124 | | |
5125 | | SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 op, |
5126 | | sljit_s32 dst, sljit_sw dstw, |
5127 | | sljit_sw init_value) |
5128 | 0 | { |
5129 | 0 | sljit_u8 *inst; |
5130 | 0 | struct sljit_const *const_; |
5131 | 0 | sljit_s32 reg; |
5132 | 0 | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
5133 | 0 | sljit_s32 dst_is_ereg = 0; |
5134 | 0 | #endif /* !SLJIT_CONFIG_X86_32 */ |
5135 | 0 |
|
5136 | 0 | CHECK_ERROR_PTR(); |
5137 | 0 | CHECK_PTR(check_sljit_emit_const(compiler, op, dst, dstw, init_value)); |
5138 | 0 | ADJUST_LOCAL_OFFSET(dst, dstw); |
5139 | 0 |
|
5140 | 0 | CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1); |
5141 | 0 |
|
5142 | 0 | const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); |
5143 | 0 | PTR_FAIL_IF(!const_); |
5144 | 0 | set_const(const_, compiler); |
5145 | 0 |
|
5146 | 0 | switch (GET_OPCODE(op)) { |
5147 | 0 | case SLJIT_MOV_U8: |
5148 | 0 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
5149 | 0 | compiler->mode32 = (op & SLJIT_32); |
5150 | 0 | #endif /* SLJIT_CONFIG_X86_64 */ |
5151 | 0 |
|
5152 | 0 | if ((init_value & 0x100) != 0) |
5153 | 0 | init_value = init_value | -(sljit_sw)0x100; |
5154 | 0 | else |
5155 | 0 | init_value = (sljit_u8)init_value; |
5156 | 0 |
|
5157 | 0 | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
5158 | 0 | if (dst_is_ereg) { |
5159 | 0 | if (emit_mov(compiler, dst, dstw, SLJIT_IMM, (sljit_s32)init_value)) |
5160 | 0 | return NULL; |
5161 | 0 | dst = 0; |
5162 | 0 | break; |
5163 | 0 | } |
5164 | 0 | #endif /* !SLJIT_CONFIG_X86_32 */ |
5165 | 0 |
|
5166 | 0 | reg = FAST_IS_REG(dst) ? dst : TMP_REG1; |
5167 | 0 |
|
5168 | 0 | if (emit_mov(compiler, reg, 0, SLJIT_IMM, init_value)) |
5169 | 0 | return NULL; |
5170 | 0 | break; |
5171 | 0 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
5172 | 0 | case SLJIT_MOV: |
5173 | 0 | compiler->mode32 = 0; |
5174 | 0 | reg = FAST_IS_REG(dst) ? dst : TMP_REG1; |
5175 | 0 |
|
5176 | 0 | if (emit_load_imm64(compiler, reg, init_value)) |
5177 | 0 | return NULL; |
5178 | 0 | break; |
5179 | 0 | #endif /* SLJIT_CONFIG_X86_64 */ |
5180 | 0 | default: |
5181 | 0 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
5182 | 0 | compiler->mode32 = (op == SLJIT_MOV32); |
5183 | 0 | #endif /* SLJIT_CONFIG_X86_64 */ |
5184 | 0 |
|
5185 | 0 | if (emit_mov(compiler, dst, dstw, SLJIT_IMM, (sljit_s32)init_value)) |
5186 | 0 | return NULL; |
5187 | 0 | dst = 0; |
5188 | 0 | break; |
5189 | 0 | } |
5190 | 0 |
|
5191 | 0 | inst = (sljit_u8*)ensure_buf(compiler, 1); |
5192 | 0 | PTR_FAIL_IF(!inst); |
5193 | 0 |
|
5194 | 0 | inst[0] = SLJIT_INST_CONST; |
5195 | 0 |
|
5196 | 0 | if (dst & SLJIT_MEM) { |
5197 | 0 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
5198 | 0 | if (op == SLJIT_MOV) { |
5199 | 0 | if (emit_mov(compiler, dst, dstw, TMP_REG1, 0)) |
5200 | 0 | return NULL; |
5201 | 0 | return const_; |
5202 | 0 | } |
5203 | 0 | #endif |
5204 | 0 |
|
5205 | 0 | if (emit_mov_byte(compiler, 0, dst, dstw, TMP_REG1, 0)) |
5206 | 0 | return NULL; |
5207 | 0 | } |
5208 | 0 |
|
5209 | 0 | return const_; |
5210 | 0 | } |
5211 | | |
5212 | | SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_op_addr(struct sljit_compiler *compiler, sljit_s32 op, |
5213 | | sljit_s32 dst, sljit_sw dstw) |
5214 | 199k | { |
5215 | 199k | struct sljit_jump *jump; |
5216 | 199k | sljit_u8 *inst; |
5217 | 199k | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
5218 | 199k | sljit_s32 reg; |
5219 | 199k | #endif /* SLJIT_CONFIG_X86_64 */ |
5220 | 199k | SLJIT_UNUSED_ARG(op); |
5221 | | |
5222 | 199k | CHECK_ERROR_PTR(); |
5223 | 199k | CHECK_PTR(check_sljit_emit_op_addr(compiler, op, dst, dstw)); |
5224 | 199k | ADJUST_LOCAL_OFFSET(dst, dstw); |
5225 | | |
5226 | 199k | CHECK_EXTRA_REGS(dst, dstw, (void)0); |
5227 | | |
5228 | 199k | jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); |
5229 | 199k | PTR_FAIL_IF(!jump); |
5230 | 199k | set_mov_addr(jump, compiler, 0); |
5231 | | |
5232 | 199k | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
5233 | 199k | compiler->mode32 = 0; |
5234 | 199k | if (dst & SLJIT_MEM) |
5235 | 199k | reg = TMP_REG1; |
5236 | 0 | else |
5237 | 0 | reg = (op != SLJIT_ADD_ABS_ADDR) ? dst : TMP_REG2; |
5238 | | |
5239 | 199k | PTR_FAIL_IF(emit_load_imm64(compiler, reg, 0)); |
5240 | 199k | jump->addr = compiler->size; |
5241 | | |
5242 | 199k | if (reg_map[reg] >= 8) |
5243 | 0 | jump->flags |= MOV_ADDR_HI; |
5244 | | #else /* !SLJIT_CONFIG_X86_64 */ |
5245 | | if (op == SLJIT_ADD_ABS_ADDR) { |
5246 | | if (dst != SLJIT_R0) { |
5247 | | /* Must not be a signed byte argument. */ |
5248 | | inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0x100, dst, dstw); |
5249 | | PTR_FAIL_IF(!inst); |
5250 | | *(inst + 1) |= ADD; |
5251 | | } else |
5252 | | PTR_FAIL_IF(emit_do_imm(compiler, ADD_EAX_i32, 0)); |
5253 | | } else { |
5254 | | PTR_FAIL_IF(emit_mov(compiler, dst, dstw, SLJIT_IMM, 0)); |
5255 | | } |
5256 | | #endif /* SLJIT_CONFIG_X86_64 */ |
5257 | | |
5258 | 199k | inst = (sljit_u8*)ensure_buf(compiler, 1); |
5259 | 199k | PTR_FAIL_IF(!inst); |
5260 | | |
5261 | 199k | inst[0] = SLJIT_INST_MOV_ADDR; |
5262 | | |
5263 | 199k | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
5264 | 199k | if (op == SLJIT_ADD_ABS_ADDR) { |
5265 | 0 | inst = emit_x86_instruction(compiler, 1, reg, 0, dst, dstw); |
5266 | 0 | PTR_FAIL_IF(!inst); |
5267 | 0 | *inst = ADD_rm_r; |
5268 | 199k | } else if (dst & SLJIT_MEM) |
5269 | 199k | PTR_FAIL_IF(emit_mov(compiler, dst, dstw, TMP_REG1, 0)); |
5270 | 199k | #endif /* SLJIT_CONFIG_X86_64 */ |
5271 | | |
5272 | 199k | return jump; |
5273 | 199k | } |
5274 | | |
5275 | | SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) |
5276 | 0 | { |
5277 | 0 | SLJIT_UNUSED_ARG(executable_offset); |
5278 | 0 |
|
5279 | 0 | SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_uw)), 0); |
5280 | 0 | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
5281 | 0 | sljit_unaligned_store_sw((void*)addr, (sljit_sw)(new_target - (addr + 4) - (sljit_uw)executable_offset)); |
5282 | 0 | #else |
5283 | 0 | sljit_unaligned_store_sw((void*)addr, (sljit_sw)new_target); |
5284 | 0 | #endif |
5285 | 0 | SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_uw)), 1); |
5286 | 0 | } |
5287 | | |
5288 | | SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_s32 op, sljit_sw new_constant, sljit_sw executable_offset) |
5289 | 0 | { |
5290 | 0 | void *start_addr; |
5291 | 0 | SLJIT_UNUSED_ARG(executable_offset); |
5292 | 0 |
|
5293 | 0 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
5294 | 0 | if (op == SLJIT_MOV) { |
5295 | 0 | start_addr = (void*)(addr - sizeof(sljit_sw)); |
5296 | 0 | SLJIT_UPDATE_WX_FLAGS(start_addr, (void*)addr, 0); |
5297 | 0 | sljit_unaligned_store_sw(start_addr, new_constant); |
5298 | 0 | SLJIT_UPDATE_WX_FLAGS(start_addr, (void*)addr, 1); |
5299 | 0 | return; |
5300 | 0 | } |
5301 | 0 | #endif |
5302 | 0 |
|
5303 | 0 | start_addr = (void*)(addr - sizeof(sljit_s32)); |
5304 | 0 |
|
5305 | 0 | if ((op | SLJIT_32) == SLJIT_MOV32_U8) { |
5306 | 0 | if ((new_constant & 0x100) != 0) |
5307 | 0 | new_constant = new_constant | -(sljit_sw)0x100; |
5308 | 0 | else |
5309 | 0 | new_constant = (sljit_u8)new_constant; |
5310 | 0 | } |
5311 | 0 |
|
5312 | 0 | SLJIT_UPDATE_WX_FLAGS(start_addr, (void*)addr, 0); |
5313 | 0 | sljit_unaligned_store_s32(start_addr, (sljit_s32)new_constant); |
5314 | 0 | SLJIT_UPDATE_WX_FLAGS(start_addr, (void*)addr, 1); |
5315 | 0 | } |