Coverage Report

Created: 2025-05-12 07:08

/src/pcre2/src/pcre2_jit_compile.c
Line
Count
Source (jump to first uncovered line)
1
/*************************************************
2
*      Perl-Compatible Regular Expressions       *
3
*************************************************/
4
5
/* PCRE is a library of functions to support regular expressions whose syntax
6
and semantics are as close as possible to those of the Perl 5 language.
7
8
                       Written by Philip Hazel
9
                    This module by Zoltan Herczeg
10
     Original API code Copyright (c) 1997-2012 University of Cambridge
11
          New API code Copyright (c) 2016-2024 University of Cambridge
12
13
-----------------------------------------------------------------------------
14
Redistribution and use in source and binary forms, with or without
15
modification, are permitted provided that the following conditions are met:
16
17
    * Redistributions of source code must retain the above copyright notice,
18
      this list of conditions and the following disclaimer.
19
20
    * Redistributions in binary form must reproduce the above copyright
21
      notice, this list of conditions and the following disclaimer in the
22
      documentation and/or other materials provided with the distribution.
23
24
    * Neither the name of the University of Cambridge nor the names of its
25
      contributors may be used to endorse or promote products derived from
26
      this software without specific prior written permission.
27
28
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
29
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
POSSIBILITY OF SUCH DAMAGE.
39
-----------------------------------------------------------------------------
40
*/
41
42
#if defined(__has_feature)
43
#if __has_feature(memory_sanitizer)
44
#include <sanitizer/msan_interface.h>
45
#endif /* __has_feature(memory_sanitizer) */
46
#endif /* defined(__has_feature) */
47
48
#include "pcre2_internal.h"
49
50
#ifdef SUPPORT_JIT
51
52
/* All-in-one: Since we use the JIT compiler only from here,
53
we just include it. This way we don't need to touch the build
54
system files. */
55
56
#define SLJIT_CONFIG_AUTO 1
57
#define SLJIT_CONFIG_STATIC 1
58
#define SLJIT_VERBOSE 0
59
60
#ifdef PCRE2_DEBUG
61
#define SLJIT_DEBUG 1
62
#else
63
#define SLJIT_DEBUG 0
64
#endif
65
66
7.49M
#define SLJIT_MALLOC(size, allocator_data) pcre2_jit_malloc(size, allocator_data)
67
7.49M
#define SLJIT_FREE(ptr, allocator_data) pcre2_jit_free(ptr, allocator_data)
68
69
static void * pcre2_jit_malloc(size_t size, void *allocator_data)
70
7.49M
{
71
7.49M
pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
72
7.49M
return allocator->malloc(size, allocator->memory_data);
73
7.49M
}
74
75
static void pcre2_jit_free(void *ptr, void *allocator_data)
76
7.49M
{
77
7.49M
pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
78
7.49M
allocator->free(ptr, allocator->memory_data);
79
7.49M
}
80
81
#include "../deps/sljit/sljit_src/sljitLir.c"
82
83
#if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
84
#error Unsupported architecture
85
#endif
86
87
/* Defines for debugging purposes. */
88
89
/* 1 - Use unoptimized capturing brackets.
90
   2 - Enable capture_last_ptr (includes option 1). */
91
/* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
92
93
/* 1 - Always have a control head. */
94
/* #define DEBUG_FORCE_CONTROL_HEAD 1 */
95
96
/* Allocate memory for the regex stack on the real machine stack.
97
Fast, but limited size. */
98
0
#define MACHINE_STACK_SIZE 32768
99
100
/* Growth rate for stack allocated by the OS. Should be the multiply
101
of page size. */
102
0
#define STACK_GROWTH_RATE 8192
103
104
/* Enable to check that the allocation could destroy temporaries. */
105
#if defined SLJIT_DEBUG && SLJIT_DEBUG
106
#define DESTROY_REGISTERS 1
107
#endif
108
109
/*
110
Short summary about the backtracking mechanism empolyed by the jit code generator:
111
112
The code generator follows the recursive nature of the PERL compatible regular
113
expressions. The basic blocks of regular expressions are condition checkers
114
whose execute different commands depending on the result of the condition check.
115
The relationship between the operators can be horizontal (concatenation) and
116
vertical (sub-expression) (See struct backtrack_common for more details).
117
118
  'ab' - 'a' and 'b' regexps are concatenated
119
  'a+' - 'a' is the sub-expression of the '+' operator
120
121
The condition checkers are boolean (true/false) checkers. Machine code is generated
122
for the checker itself and for the actions depending on the result of the checker.
123
The 'true' case is called as the matching path (expected path), and the other is called as
124
the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
125
branches on the matching path.
126
127
 Greedy star operator (*) :
128
   Matching path: match happens.
129
   Backtrack path: match failed.
130
 Non-greedy star operator (*?) :
131
   Matching path: no need to perform a match.
132
   Backtrack path: match is required.
133
134
The following example shows how the code generated for a capturing bracket
135
with two alternatives. Let A, B, C, D are arbirary regular expressions, and
136
we have the following regular expression:
137
138
   A(B|C)D
139
140
The generated code will be the following:
141
142
 A matching path
143
 '(' matching path (pushing arguments to the stack)
144
 B matching path
145
 ')' matching path (pushing arguments to the stack)
146
 D matching path
147
 return with successful match
148
149
 D backtrack path
150
 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
151
 B backtrack path
152
 C expected path
153
 jump to D matching path
154
 C backtrack path
155
 A backtrack path
156
157
 Notice, that the order of backtrack code paths are the opposite of the fast
158
 code paths. In this way the topmost value on the stack is always belong
159
 to the current backtrack code path. The backtrack path must check
160
 whether there is a next alternative. If so, it needs to jump back to
161
 the matching path eventually. Otherwise it needs to clear out its own stack
162
 frame and continue the execution on the backtrack code paths.
163
*/
164
165
/*
166
Saved stack frames:
167
168
Atomic blocks and asserts require reloading the values of private data
169
when the backtrack mechanism performed. Because of OP_RECURSE, the data
170
are not necessarly known in compile time, thus we need a dynamic restore
171
mechanism.
172
173
The stack frames are stored in a chain list, and have the following format:
174
([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
175
176
Thus we can restore the private data to a particular point in the stack.
177
*/
178
179
typedef struct jit_arguments {
180
  /* Pointers first. */
181
  struct sljit_stack *stack;
182
  PCRE2_SPTR str;
183
  PCRE2_SPTR begin;
184
  PCRE2_SPTR end;
185
  pcre2_match_data *match_data;
186
  PCRE2_SPTR startchar_ptr;
187
  PCRE2_UCHAR *mark_ptr;
188
  int (*callout)(pcre2_callout_block *, void *);
189
  void *callout_data;
190
  /* Everything else after. */
191
  sljit_uw offset_limit;
192
  sljit_u32 limit_match;
193
  sljit_u32 oveccount;
194
  sljit_u32 options;
195
} jit_arguments;
196
197
249k
#define JIT_NUMBER_OF_COMPILE_MODES 3
198
199
typedef struct executable_functions {
200
  void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
201
  void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
202
  sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
203
  sljit_u32 top_bracket;
204
  sljit_u32 limit_match;
205
} executable_functions;
206
207
typedef struct jump_list {
208
  struct sljit_jump *jump;
209
  struct jump_list *next;
210
} jump_list;
211
212
typedef struct stub_list {
213
  struct sljit_jump *start;
214
  struct sljit_label *quit;
215
  struct stub_list *next;
216
} stub_list;
217
218
enum frame_types {
219
  no_frame = -1,
220
  no_stack = -2
221
};
222
223
enum control_types {
224
  type_mark = 0,
225
  type_then_trap = 1
226
};
227
228
enum  early_fail_types {
229
  type_skip = 0,
230
  type_fail = 1,
231
  type_fail_range = 2
232
};
233
234
typedef int (SLJIT_FUNC *jit_function)(jit_arguments *args);
235
236
/* The following structure is the key data type for the recursive
237
code generator. It is allocated by compile_matchingpath, and contains
238
the arguments for compile_backtrackingpath. Must be the first member
239
of its descendants. */
240
typedef struct backtrack_common {
241
  /* Backtracking path of an opcode, which falls back
242
     to our opcode, if it cannot resume matching. */
243
  struct backtrack_common *prev;
244
  /* Backtracks for opcodes without backtracking path.
245
     These opcodes are between 'prev' and the current
246
     opcode, and they never resume the match. */
247
  jump_list *simple_backtracks;
248
  /* Internal backtracking list for block constructs
249
     which contains other opcodes, such as brackets,
250
     asserts, conditionals, etc. */
251
  struct backtrack_common *top;
252
  /* Backtracks used internally by the opcode. For component
253
     opcodes, this list is also used by those opcodes without
254
     backtracking path which follows the 'top' backtrack. */
255
  jump_list *own_backtracks;
256
  /* Opcode pointer. */
257
  PCRE2_SPTR cc;
258
} backtrack_common;
259
260
typedef struct assert_backtrack {
261
  backtrack_common common;
262
  jump_list *condfailed;
263
  /* Less than 0 if a frame is not needed. */
264
  int framesize;
265
  /* Points to our private memory word on the stack. */
266
  int private_data_ptr;
267
  /* For iterators. */
268
  struct sljit_label *matchingpath;
269
} assert_backtrack;
270
271
typedef struct bracket_backtrack {
272
  backtrack_common common;
273
  /* Where to coninue if an alternative is successfully matched. */
274
  struct sljit_label *alternative_matchingpath;
275
  /* For rmin and rmax iterators. */
276
  struct sljit_label *recursive_matchingpath;
277
  /* For greedy ? operator. */
278
  struct sljit_label *zero_matchingpath;
279
  /* Contains the branches of a failed condition. */
280
  union {
281
    /* Both for OP_COND, OP_SCOND, OP_ASSERT_SCS. */
282
    jump_list *no_capture;
283
    assert_backtrack *assert;
284
    /* For OP_ONCE. Less than 0 if not needed. */
285
    int framesize;
286
  } u;
287
  /* For brackets with >3 alternatives. */
288
  struct sljit_jump *matching_mov_addr;
289
  /* Points to our private memory word on the stack. */
290
  int private_data_ptr;
291
} bracket_backtrack;
292
293
typedef struct bracketpos_backtrack {
294
  backtrack_common common;
295
  /* Points to our private memory word on the stack. */
296
  int private_data_ptr;
297
  /* Reverting stack is needed. */
298
  int framesize;
299
  /* Allocated stack size. */
300
  int stacksize;
301
} bracketpos_backtrack;
302
303
typedef struct braminzero_backtrack {
304
  backtrack_common common;
305
  struct sljit_label *matchingpath;
306
} braminzero_backtrack;
307
308
typedef struct char_iterator_backtrack {
309
  backtrack_common common;
310
  /* Next iteration. */
311
  struct sljit_label *matchingpath;
312
  /* Creating a range based on the next character. */
313
  struct {
314
    unsigned int othercasebit;
315
    PCRE2_UCHAR chr;
316
    BOOL charpos_enabled;
317
  } charpos;
318
} char_iterator_backtrack;
319
320
typedef struct ref_iterator_backtrack {
321
  backtrack_common common;
322
  /* Next iteration. */
323
  struct sljit_label *matchingpath;
324
} ref_iterator_backtrack;
325
326
typedef struct recurse_entry {
327
  struct recurse_entry *next;
328
  /* Contains the function entry label. */
329
  struct sljit_label *entry_label;
330
  /* Contains the function entry label. */
331
  struct sljit_label *backtrack_label;
332
  /* Collects the entry calls until the function is not created. */
333
  jump_list *entry_calls;
334
  /* Collects the backtrack calls until the function is not created. */
335
  jump_list *backtrack_calls;
336
  /* Points to the starting opcode. */
337
  sljit_sw start;
338
  /* Start of caller arguments. */
339
  PCRE2_SPTR arg_start;
340
  /* Size of caller arguments in bytes. */
341
  sljit_uw arg_size;
342
} recurse_entry;
343
344
typedef struct recurse_backtrack {
345
  backtrack_common common;
346
  /* Return to the matching path. */
347
  struct sljit_label *matchingpath;
348
  /* Recursive pattern. */
349
  recurse_entry *entry;
350
  /* Pattern is inlined. */
351
  BOOL inlined_pattern;
352
} recurse_backtrack;
353
354
typedef struct vreverse_backtrack {
355
  backtrack_common common;
356
  /* Return to the matching path. */
357
  struct sljit_label *matchingpath;
358
} vreverse_backtrack;
359
360
403k
#define OP_THEN_TRAP OP_TABLE_LENGTH
361
362
typedef struct then_trap_backtrack {
363
  backtrack_common common;
364
  /* If then_trap is not NULL, this structure contains the real
365
  then_trap for the backtracking path. */
366
  struct then_trap_backtrack *then_trap;
367
  /* Points to the starting opcode. */
368
  sljit_sw start;
369
  /* Exit point for the then opcodes of this alternative. */
370
  jump_list *quit;
371
  /* Frame size of the current alternative. */
372
  int framesize;
373
} then_trap_backtrack;
374
375
659k
#define MAX_N_CHARS 12
376
49.2k
#define MAX_DIFF_CHARS 5
377
378
typedef struct fast_forward_char_data {
379
  /* Number of characters in the chars array, 255 for any character. */
380
  sljit_u8 count;
381
  /* Number of last UTF-8 characters in the chars array. */
382
  sljit_u8 last_count;
383
  /* Available characters in the current position. */
384
  PCRE2_UCHAR chars[MAX_DIFF_CHARS];
385
} fast_forward_char_data;
386
387
6.87M
#define MAX_CLASS_RANGE_SIZE 4
388
4.01M
#define MAX_CLASS_CHARS_SIZE 3
389
390
typedef struct compiler_common {
391
  /* The sljit ceneric compiler. */
392
  struct sljit_compiler *compiler;
393
  /* Compiled regular expression. */
394
  pcre2_real_code *re;
395
  /* First byte code. */
396
  PCRE2_SPTR start;
397
  /* Maps private data offset to each opcode. */
398
  sljit_s32 *private_data_ptrs;
399
  /* Chain list of read-only data ptrs. */
400
  void *read_only_data_head;
401
  /* Bitset which tells which capture brackets can be optimized. */
402
  sljit_u8 *optimized_cbrackets;
403
  /* Bitset for tracking capture bracket status. */
404
  sljit_u8 *cbracket_bitset;
405
  /* Tells whether the starting offset is a target of then. */
406
  sljit_u8 *then_offsets;
407
  /* Current position where a THEN must jump. */
408
  then_trap_backtrack *then_trap;
409
  /* Starting offset of private data for capturing brackets. */
410
  sljit_s32 cbra_ptr;
411
#if defined SLJIT_DEBUG && SLJIT_DEBUG
412
  /* End offset of locals for assertions. */
413
  sljit_s32 locals_size;
414
#endif
415
  /* Output vector starting point. Must be divisible by 2. */
416
  sljit_s32 ovector_start;
417
  /* Points to the starting character of the current match. */
418
  sljit_s32 start_ptr;
419
  /* Last known position of the requested byte. */
420
  sljit_s32 req_char_ptr;
421
  /* Head of the last recursion. */
422
  sljit_s32 recursive_head_ptr;
423
  /* First inspected character for partial matching.
424
     (Needed for avoiding zero length partial matches.) */
425
  sljit_s32 start_used_ptr;
426
  /* Starting pointer for partial soft matches. */
427
  sljit_s32 hit_start;
428
  /* Pointer of the match end position. */
429
  sljit_s32 match_end_ptr;
430
  /* Points to the marked string. */
431
  sljit_s32 mark_ptr;
432
  /* Head of the recursive control verb management chain.
433
     Each item must have a previous offset and type
434
     (see control_types) values. See do_search_mark. */
435
  sljit_s32 control_head_ptr;
436
  /* The offset of the saved STR_END in the outermost
437
     scan substring block. Since scan substring restores
438
     STR_END after a match, it is enough to restore
439
     STR_END inside a scan substring block. */
440
  sljit_s32 restore_end_ptr;
441
  /* Points to the last matched capture block index. */
442
  sljit_s32 capture_last_ptr;
443
  /* Fast forward skipping byte code pointer. */
444
  PCRE2_SPTR fast_forward_bc_ptr;
445
  /* Locals used by fast fail optimization. */
446
  sljit_s32 early_fail_start_ptr;
447
  sljit_s32 early_fail_end_ptr;
448
  /* Byte length of optimized_cbrackets and cbracket_bitset. */
449
  sljit_u32 cbracket_bitset_length;
450
  /* Variables used by recursive call generator. */
451
  sljit_s32 recurse_bitset_size;
452
  uint8_t *recurse_bitset;
453
454
  /* Flipped and lower case tables. */
455
  const sljit_u8 *fcc;
456
  sljit_sw lcc;
457
  /* Mode can be PCRE2_JIT_COMPLETE and others. */
458
  int mode;
459
  /* TRUE, when empty match is accepted for partial matching. */
460
  BOOL allow_empty_partial;
461
  /* TRUE, when minlength is greater than 0. */
462
  BOOL might_be_empty;
463
  /* \K is found in the pattern. */
464
  BOOL has_set_som;
465
  /* (*SKIP:arg) is found in the pattern. */
466
  BOOL has_skip_arg;
467
  /* (*THEN) is found in the pattern. */
468
  BOOL has_then;
469
  /* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */
470
  BOOL has_skip_in_assert_back;
471
  /* Quit is redirected by recurse, negative assertion, or positive assertion in conditional block. */
472
  BOOL local_quit_available;
473
  /* Currently in a positive assertion. */
474
  BOOL in_positive_assertion;
475
  /* Newline control. */
476
  int nltype;
477
  sljit_u32 nlmax;
478
  sljit_u32 nlmin;
479
  int newline;
480
  int bsr_nltype;
481
  sljit_u32 bsr_nlmax;
482
  sljit_u32 bsr_nlmin;
483
  /* Dollar endonly. */
484
  int endonly;
485
  /* Tables. */
486
  sljit_sw ctypes;
487
  /* Named capturing brackets. */
488
  PCRE2_SPTR name_table;
489
  sljit_sw name_count;
490
  sljit_sw name_entry_size;
491
492
  /* Labels and jump lists. */
493
  struct sljit_label *partialmatchlabel;
494
  struct sljit_label *quit_label;
495
  struct sljit_label *abort_label;
496
  struct sljit_label *accept_label;
497
  struct sljit_label *ff_newline_shortcut;
498
  stub_list *stubs;
499
  recurse_entry *entries;
500
  recurse_entry *currententry;
501
  jump_list *partialmatch;
502
  jump_list *quit;
503
  jump_list *positive_assertion_quit;
504
  jump_list *abort;
505
  jump_list *failed_match;
506
  jump_list *accept;
507
  jump_list *calllimit;
508
  jump_list *stackalloc;
509
  jump_list *revertframes;
510
  jump_list *wordboundary;
511
  jump_list *ucp_wordboundary;
512
  jump_list *anynewline;
513
  jump_list *hspace;
514
  jump_list *vspace;
515
  jump_list *casefulcmp;
516
  jump_list *caselesscmp;
517
  jump_list *reset_match;
518
  /* Same as reset_match, but resets the STR_PTR as well. */
519
  jump_list *restart_match;
520
  BOOL unset_backref;
521
  BOOL alt_circumflex;
522
#ifdef SUPPORT_UNICODE
523
  BOOL utf;
524
  BOOL invalid_utf;
525
  BOOL ucp;
526
  /* Points to saving area for iref. */
527
  jump_list *getucd;
528
  jump_list *getucdtype;
529
#if PCRE2_CODE_UNIT_WIDTH == 8
530
  jump_list *utfreadchar;
531
  jump_list *utfreadtype8;
532
  jump_list *utfpeakcharback;
533
#endif
534
#if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
535
  jump_list *utfreadchar_invalid;
536
  jump_list *utfreadnewline_invalid;
537
  jump_list *utfmoveback_invalid;
538
  jump_list *utfpeakcharback_invalid;
539
#endif
540
#endif /* SUPPORT_UNICODE */
541
} compiler_common;
542
543
/* For byte_sequence_compare. */
544
545
typedef struct compare_context {
546
  int length;
547
  int sourcereg;
548
#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
549
  int ucharptr;
550
  union {
551
    sljit_s32 asint;
552
    sljit_u16 asushort;
553
#if PCRE2_CODE_UNIT_WIDTH == 8
554
    sljit_u8 asbyte;
555
    sljit_u8 asuchars[4];
556
#elif PCRE2_CODE_UNIT_WIDTH == 16
557
    sljit_u16 asuchars[2];
558
#elif PCRE2_CODE_UNIT_WIDTH == 32
559
    sljit_u32 asuchars[1];
560
#endif
561
  } c;
562
  union {
563
    sljit_s32 asint;
564
    sljit_u16 asushort;
565
#if PCRE2_CODE_UNIT_WIDTH == 8
566
    sljit_u8 asbyte;
567
    sljit_u8 asuchars[4];
568
#elif PCRE2_CODE_UNIT_WIDTH == 16
569
    sljit_u16 asuchars[2];
570
#elif PCRE2_CODE_UNIT_WIDTH == 32
571
    sljit_u32 asuchars[1];
572
#endif
573
  } oc;
574
#endif
575
} compare_context;
576
577
/* Undefine sljit macros. */
578
#undef CMP
579
580
/* Used for accessing the elements of the stack. */
581
9.59M
#define STACK(i)      ((i) * SSIZE_OF(sw))
582
583
#ifdef SLJIT_PREF_SHIFT_REG
584
#if SLJIT_PREF_SHIFT_REG == SLJIT_R2
585
/* Nothing. */
586
#elif SLJIT_PREF_SHIFT_REG == SLJIT_R3
587
#define SHIFT_REG_IS_R3
588
#else
589
#error "Unsupported shift register"
590
#endif
591
#endif
592
593
112M
#define TMP1          SLJIT_R0
594
#ifdef SHIFT_REG_IS_R3
595
51.3M
#define TMP2          SLJIT_R3
596
7.35M
#define TMP3          SLJIT_R2
597
#else
598
#define TMP2          SLJIT_R2
599
#define TMP3          SLJIT_R3
600
#endif
601
0
#define STR_PTR       SLJIT_R1
602
5.65k
#define STR_END       SLJIT_S0
603
23.7k
#define STACK_TOP     SLJIT_S1
604
795k
#define STACK_LIMIT   SLJIT_S2
605
801k
#define COUNT_MATCH   SLJIT_S3
606
795k
#define ARGUMENTS     SLJIT_S4
607
690k
#define RETURN_ADDR   SLJIT_R4
608
609
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
610
#define HAS_VIRTUAL_REGISTERS 1
611
#else
612
3.75M
#define HAS_VIRTUAL_REGISTERS 0
613
#endif
614
615
/* Local space layout. */
616
/* Max limit of recursions. */
617
#define LIMIT_MATCH      (0 * sizeof(sljit_sw))
618
/* Local variables. Their number is computed by check_opcode_types. */
619
137k
#define LOCAL0           (1 * sizeof(sljit_sw))
620
4.04k
#define LOCAL1           (2 * sizeof(sljit_sw))
621
714k
#define LOCAL2           (3 * sizeof(sljit_sw))
622
130k
#define LOCAL3           (4 * sizeof(sljit_sw))
623
#define LOCAL4           (5 * sizeof(sljit_sw))
624
/* The output vector is stored on the stack, and contains pointers
625
to characters. The vector data is divided into two groups: the first
626
group contains the start / end character pointers, and the second is
627
the start pointers when the end of the capturing group has not yet reached. */
628
5.21M
#define OVECTOR_START    (common->ovector_start)
629
5.15M
#define OVECTOR(i)       (OVECTOR_START + (i) * SSIZE_OF(sw))
630
2.43M
#define OVECTOR_PRIV(i)  (common->cbra_ptr + (i) * SSIZE_OF(sw))
631
62.7M
#define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
632
633
#if PCRE2_CODE_UNIT_WIDTH == 8
634
#define MOV_UCHAR  SLJIT_MOV_U8
635
#define IN_UCHARS(x) (x)
636
#elif PCRE2_CODE_UNIT_WIDTH == 16
637
6.05k
#define MOV_UCHAR  SLJIT_MOV_U16
638
#define UCHAR_SHIFT (1)
639
268M
#define IN_UCHARS(x) ((x) * 2)
640
#elif PCRE2_CODE_UNIT_WIDTH == 32
641
#define MOV_UCHAR  SLJIT_MOV_U32
642
#define UCHAR_SHIFT (2)
643
#define IN_UCHARS(x) ((x) * 4)
644
#else
645
#error Unsupported compiling mode
646
#endif
647
648
/* Shortcuts. */
649
#define DEFINE_COMPILER \
650
349M
  struct sljit_compiler *compiler = common->compiler
651
#define OP1(op, dst, dstw, src, srcw) \
652
395M
  sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
653
#define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
654
154M
  sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
655
#define OP2U(op, src1, src1w, src2, src2w) \
656
128M
  sljit_emit_op2u(compiler, (op), (src1), (src1w), (src2), (src2w))
657
#define OP_SRC(op, src, srcw) \
658
115k
  sljit_emit_op_src(compiler, (op), (src), (srcw))
659
#define LABEL() \
660
97.6M
  sljit_emit_label(compiler)
661
#define JUMP(type) \
662
132M
  sljit_emit_jump(compiler, (type))
663
#define JUMPTO(type, label) \
664
39.6M
  sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
665
#define JUMPHERE(jump) \
666
84.1M
  sljit_set_label((jump), sljit_emit_label(compiler))
667
#define SET_LABEL(jump, label) \
668
230M
  sljit_set_label((jump), (label))
669
#define CMP(type, src1, src1w, src2, src2w) \
670
183M
  sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
671
#define CMPTO(type, src1, src1w, src2, src2w, label) \
672
8.79M
  sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
673
#define OP_FLAGS(op, dst, dstw, type) \
674
50.3M
  sljit_emit_op_flags(compiler, (op), (dst), (dstw), (type))
675
#define SELECT(type, dst_reg, src1, src1w, src2_reg) \
676
64.7M
  sljit_emit_select(compiler, (type), (dst_reg), (src1), (src1w), (src2_reg))
677
#define GET_LOCAL_BASE(dst, dstw, offset) \
678
8.57M
  sljit_get_local_base(compiler, (dst), (dstw), (offset))
679
680
3.94M
#define READ_CHAR_MAX ((sljit_u32)0xffffffff)
681
682
#define INVALID_UTF_CHAR -1
683
#define UNASSIGNED_UTF_CHAR 888
684
685
#if defined SUPPORT_UNICODE
686
#if PCRE2_CODE_UNIT_WIDTH == 8
687
688
#define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
689
  { \
690
  if (ptr[0] <= 0x7f) \
691
    c = *ptr++; \
692
  else if (ptr + 1 < end && ptr[1] >= 0x80 && ptr[1] < 0xc0) \
693
    { \
694
    c = ptr[1] - 0x80; \
695
    \
696
    if (ptr[0] >= 0xc2 && ptr[0] <= 0xdf) \
697
      { \
698
      c |= (ptr[0] - 0xc0) << 6; \
699
      ptr += 2; \
700
      } \
701
    else if (ptr + 2 < end && ptr[2] >= 0x80 && ptr[2] < 0xc0) \
702
      { \
703
      c = c << 6 | (ptr[2] - 0x80); \
704
      \
705
      if (ptr[0] >= 0xe0 && ptr[0] <= 0xef) \
706
        { \
707
        c |= (ptr[0] - 0xe0) << 12; \
708
        ptr += 3; \
709
        \
710
        if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \
711
          { \
712
          invalid_action; \
713
          } \
714
        } \
715
      else if (ptr + 3 < end && ptr[3] >= 0x80 && ptr[3] < 0xc0) \
716
        { \
717
        c = c << 6 | (ptr[3] - 0x80); \
718
        \
719
        if (ptr[0] >= 0xf0 && ptr[0] <= 0xf4) \
720
          { \
721
          c |= (ptr[0] - 0xf0) << 18; \
722
          ptr += 4; \
723
          \
724
          if (c >= 0x110000 || c < 0x10000) \
725
            { \
726
            invalid_action; \
727
            } \
728
          } \
729
        else \
730
          { \
731
          invalid_action; \
732
          } \
733
        } \
734
      else \
735
        { \
736
        invalid_action; \
737
        } \
738
      } \
739
    else \
740
      { \
741
      invalid_action; \
742
      } \
743
    } \
744
  else \
745
    { \
746
    invalid_action; \
747
    } \
748
  }
749
750
#define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
751
  { \
752
  c = ptr[-1]; \
753
  if (c <= 0x7f) \
754
    ptr--; \
755
  else if (ptr - 1 > start && ptr[-1] >= 0x80 && ptr[-1] < 0xc0) \
756
    { \
757
    c -= 0x80; \
758
    \
759
    if (ptr[-2] >= 0xc2 && ptr[-2] <= 0xdf) \
760
      { \
761
      c |= (ptr[-2] - 0xc0) << 6; \
762
      ptr -= 2; \
763
      } \
764
    else if (ptr - 2 > start && ptr[-2] >= 0x80 && ptr[-2] < 0xc0) \
765
      { \
766
      c = c << 6 | (ptr[-2] - 0x80); \
767
      \
768
      if (ptr[-3] >= 0xe0 && ptr[-3] <= 0xef) \
769
        { \
770
        c |= (ptr[-3] - 0xe0) << 12; \
771
        ptr -= 3; \
772
        \
773
        if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \
774
          { \
775
          invalid_action; \
776
          } \
777
        } \
778
      else if (ptr - 3 > start && ptr[-3] >= 0x80 && ptr[-3] < 0xc0) \
779
        { \
780
        c = c << 6 | (ptr[-3] - 0x80); \
781
        \
782
        if (ptr[-4] >= 0xf0 && ptr[-4] <= 0xf4) \
783
          { \
784
          c |= (ptr[-4] - 0xf0) << 18; \
785
          ptr -= 4; \
786
          \
787
          if (c >= 0x110000 || c < 0x10000) \
788
            { \
789
            invalid_action; \
790
            } \
791
          } \
792
        else \
793
          { \
794
          invalid_action; \
795
          } \
796
        } \
797
      else \
798
        { \
799
        invalid_action; \
800
        } \
801
      } \
802
    else \
803
      { \
804
      invalid_action; \
805
      } \
806
    } \
807
  else \
808
    { \
809
    invalid_action; \
810
    } \
811
  }
812
813
#elif PCRE2_CODE_UNIT_WIDTH == 16
814
815
#define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
816
0
  { \
817
0
  if (ptr[0] < 0xd800 || ptr[0] >= 0xe000) \
818
0
    c = *ptr++; \
819
0
  else if (ptr[0] < 0xdc00 && ptr + 1 < end && ptr[1] >= 0xdc00 && ptr[1] < 0xe000) \
820
0
    { \
821
0
    c = (((ptr[0] - 0xd800) << 10) | (ptr[1] - 0xdc00)) + 0x10000; \
822
0
    ptr += 2; \
823
0
    } \
824
0
  else \
825
0
    { \
826
0
    invalid_action; \
827
0
    } \
828
0
  }
829
830
#define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
831
0
  { \
832
0
  c = ptr[-1]; \
833
0
  if (c < 0xd800 || c >= 0xe000) \
834
0
    ptr--; \
835
0
  else if (c >= 0xdc00 && ptr - 1 > start && ptr[-2] >= 0xd800 && ptr[-2] < 0xdc00) \
836
0
    { \
837
0
    c = (((ptr[-2] - 0xd800) << 10) | (c - 0xdc00)) + 0x10000; \
838
0
    ptr -= 2; \
839
0
    } \
840
0
  else \
841
0
    { \
842
0
    invalid_action; \
843
0
    } \
844
0
  }
845
846
847
#elif PCRE2_CODE_UNIT_WIDTH == 32
848
849
#define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
850
  { \
851
  if (ptr[0] < 0xd800 || (ptr[0] >= 0xe000 && ptr[0] < 0x110000)) \
852
    c = *ptr++; \
853
  else \
854
    { \
855
    invalid_action; \
856
    } \
857
  }
858
859
#define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
860
  { \
861
  c = ptr[-1]; \
862
  if (ptr[-1] < 0xd800 || (ptr[-1] >= 0xe000 && ptr[-1] < 0x110000)) \
863
    ptr--; \
864
  else \
865
    { \
866
    invalid_action; \
867
    } \
868
  }
869
870
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
871
#endif /* SUPPORT_UNICODE */
872
873
static PCRE2_SPTR bracketend(PCRE2_SPTR cc)
874
59.5M
{
875
59.5M
SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERT_SCS) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
876
76.1M
do cc += GET(cc, 1); while (*cc == OP_ALT);
877
59.5M
SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
878
59.5M
cc += 1 + LINK_SIZE;
879
59.5M
return cc;
880
59.5M
}
881
882
static int no_alternatives(PCRE2_SPTR cc)
883
816k
{
884
816k
int count = 0;
885
816k
SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERT_SCS) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
886
816k
do
887
2.00M
  {
888
2.00M
  cc += GET(cc, 1);
889
2.00M
  count++;
890
2.00M
  }
891
2.00M
while (*cc == OP_ALT);
892
816k
SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
893
816k
return count;
894
816k
}
895
896
static BOOL find_vreverse(PCRE2_SPTR cc)
897
1.00M
{
898
1.00M
  SLJIT_ASSERT(*cc == OP_ASSERTBACK || *cc == OP_ASSERTBACK_NOT ||  *cc == OP_ASSERTBACK_NA);
899
900
1.00M
  do
901
1.12M
    {
902
1.12M
    if (cc[1 + LINK_SIZE] == OP_VREVERSE)
903
562k
      return TRUE;
904
559k
    cc += GET(cc, 1);
905
559k
    }
906
1.00M
  while (*cc == OP_ALT);
907
908
447k
  return FALSE;
909
1.00M
}
910
911
/* Functions whose might need modification for all new supported opcodes:
912
 next_opcode
913
 check_opcode_types
914
 set_private_data_ptrs
915
 get_framesize
916
 init_frame
917
 get_recurse_data_length
918
 copy_recurse_data
919
 compile_matchingpath
920
 compile_backtrackingpath
921
*/
922
923
static PCRE2_SPTR next_opcode(compiler_common *common, PCRE2_SPTR cc)
924
13.7G
{
925
13.7G
SLJIT_UNUSED_ARG(common);
926
13.7G
switch(*cc)
927
13.7G
  {
928
15.4M
  case OP_SOD:
929
19.9M
  case OP_SOM:
930
19.9M
  case OP_SET_SOM:
931
29.9M
  case OP_NOT_WORD_BOUNDARY:
932
30.9M
  case OP_WORD_BOUNDARY:
933
41.6M
  case OP_NOT_DIGIT:
934
49.4M
  case OP_DIGIT:
935
103M
  case OP_NOT_WHITESPACE:
936
162M
  case OP_WHITESPACE:
937
171M
  case OP_NOT_WORDCHAR:
938
200M
  case OP_WORDCHAR:
939
210M
  case OP_ANY:
940
212M
  case OP_ALLANY:
941
247M
  case OP_NOTPROP:
942
333M
  case OP_PROP:
943
401M
  case OP_ANYNL:
944
412M
  case OP_NOT_HSPACE:
945
415M
  case OP_HSPACE:
946
428M
  case OP_NOT_VSPACE:
947
428M
  case OP_VSPACE:
948
503M
  case OP_EXTUNI:
949
518M
  case OP_EODN:
950
521M
  case OP_EOD:
951
533M
  case OP_CIRC:
952
537M
  case OP_CIRCM:
953
590M
  case OP_DOLL:
954
597M
  case OP_DOLLM:
955
598M
  case OP_CRSTAR:
956
599M
  case OP_CRMINSTAR:
957
645M
  case OP_CRPLUS:
958
646M
  case OP_CRMINPLUS:
959
647M
  case OP_CRQUERY:
960
647M
  case OP_CRMINQUERY:
961
649M
  case OP_CRRANGE:
962
650M
  case OP_CRMINRANGE:
963
650M
  case OP_CRPOSSTAR:
964
663M
  case OP_CRPOSPLUS:
965
663M
  case OP_CRPOSQUERY:
966
665M
  case OP_CRPOSRANGE:
967
666M
  case OP_CLASS:
968
670M
  case OP_NCLASS:
969
676M
  case OP_REF:
970
679M
  case OP_REFI:
971
679M
  case OP_DNREF:
972
679M
  case OP_DNREFI:
973
680M
  case OP_RECURSE:
974
1.10G
  case OP_CALLOUT:
975
1.18G
  case OP_ALT:
976
2.21G
  case OP_KET:
977
2.25G
  case OP_KETRMAX:
978
2.26G
  case OP_KETRMIN:
979
2.33G
  case OP_KETRPOS:
980
2.34G
  case OP_REVERSE:
981
2.34G
  case OP_VREVERSE:
982
2.40G
  case OP_ASSERT:
983
2.42G
  case OP_ASSERT_NOT:
984
2.42G
  case OP_ASSERTBACK:
985
2.42G
  case OP_ASSERTBACK_NOT:
986
2.68G
  case OP_ASSERT_NA:
987
2.68G
  case OP_ASSERTBACK_NA:
988
2.68G
  case OP_ASSERT_SCS:
989
2.69G
  case OP_ONCE:
990
2.69G
  case OP_SCRIPT_RUN:
991
3.27G
  case OP_BRA:
992
3.28G
  case OP_BRAPOS:
993
3.29G
  case OP_CBRA:
994
3.29G
  case OP_CBRAPOS:
995
3.29G
  case OP_COND:
996
3.31G
  case OP_SBRA:
997
3.36G
  case OP_SBRAPOS:
998
3.36G
  case OP_SCBRA:
999
3.36G
  case OP_SCBRAPOS:
1000
3.36G
  case OP_SCOND:
1001
3.36G
  case OP_CREF:
1002
3.36G
  case OP_DNCREF:
1003
3.36G
  case OP_RREF:
1004
3.36G
  case OP_DNRREF:
1005
3.36G
  case OP_FALSE:
1006
3.36G
  case OP_TRUE:
1007
3.63G
  case OP_BRAZERO:
1008
3.75G
  case OP_BRAMINZERO:
1009
3.75G
  case OP_BRAPOSZERO:
1010
3.75G
  case OP_PRUNE:
1011
3.80G
  case OP_SKIP:
1012
3.80G
  case OP_THEN:
1013
3.81G
  case OP_COMMIT:
1014
3.81G
  case OP_FAIL:
1015
3.81G
  case OP_ACCEPT:
1016
3.81G
  case OP_ASSERT_ACCEPT:
1017
3.81G
  case OP_CLOSE:
1018
3.81G
  case OP_SKIPZERO:
1019
3.81G
  case OP_NOT_UCP_WORD_BOUNDARY:
1020
3.81G
  case OP_UCP_WORD_BOUNDARY:
1021
3.81G
  return cc + PRIV(OP_lengths)[*cc];
1022
1023
6.52G
  case OP_CHAR:
1024
9.45G
  case OP_CHARI:
1025
9.45G
  case OP_NOT:
1026
9.45G
  case OP_NOTI:
1027
9.47G
  case OP_STAR:
1028
9.47G
  case OP_MINSTAR:
1029
9.49G
  case OP_PLUS:
1030
9.49G
  case OP_MINPLUS:
1031
9.50G
  case OP_QUERY:
1032
9.51G
  case OP_MINQUERY:
1033
9.51G
  case OP_UPTO:
1034
9.51G
  case OP_MINUPTO:
1035
9.52G
  case OP_EXACT:
1036
9.55G
  case OP_POSSTAR:
1037
9.59G
  case OP_POSPLUS:
1038
9.62G
  case OP_POSQUERY:
1039
9.63G
  case OP_POSUPTO:
1040
9.63G
  case OP_STARI:
1041
9.64G
  case OP_MINSTARI:
1042
9.64G
  case OP_PLUSI:
1043
9.65G
  case OP_MINPLUSI:
1044
9.66G
  case OP_QUERYI:
1045
9.66G
  case OP_MINQUERYI:
1046
9.66G
  case OP_UPTOI:
1047
9.67G
  case OP_MINUPTOI:
1048
9.68G
  case OP_EXACTI:
1049
9.69G
  case OP_POSSTARI:
1050
9.70G
  case OP_POSPLUSI:
1051
9.70G
  case OP_POSQUERYI:
1052
9.71G
  case OP_POSUPTOI:
1053
9.71G
  case OP_NOTSTAR:
1054
9.71G
  case OP_NOTMINSTAR:
1055
9.71G
  case OP_NOTPLUS:
1056
9.71G
  case OP_NOTMINPLUS:
1057
9.71G
  case OP_NOTQUERY:
1058
9.71G
  case OP_NOTMINQUERY:
1059
9.71G
  case OP_NOTUPTO:
1060
9.71G
  case OP_NOTMINUPTO:
1061
9.71G
  case OP_NOTEXACT:
1062
9.71G
  case OP_NOTPOSSTAR:
1063
9.71G
  case OP_NOTPOSPLUS:
1064
9.71G
  case OP_NOTPOSQUERY:
1065
9.71G
  case OP_NOTPOSUPTO:
1066
9.71G
  case OP_NOTSTARI:
1067
9.71G
  case OP_NOTMINSTARI:
1068
9.71G
  case OP_NOTPLUSI:
1069
9.71G
  case OP_NOTMINPLUSI:
1070
9.71G
  case OP_NOTQUERYI:
1071
9.71G
  case OP_NOTMINQUERYI:
1072
9.71G
  case OP_NOTUPTOI:
1073
9.72G
  case OP_NOTMINUPTOI:
1074
9.72G
  case OP_NOTEXACTI:
1075
9.72G
  case OP_NOTPOSSTARI:
1076
9.72G
  case OP_NOTPOSPLUSI:
1077
9.72G
  case OP_NOTPOSQUERYI:
1078
9.72G
  case OP_NOTPOSUPTOI:
1079
9.72G
  cc += PRIV(OP_lengths)[*cc];
1080
9.72G
#ifdef SUPPORT_UNICODE
1081
9.72G
  if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1082
9.72G
#endif
1083
9.72G
  return cc;
1084
1085
  /* Special cases. */
1086
17.2M
  case OP_TYPESTAR:
1087
21.7M
  case OP_TYPEMINSTAR:
1088
32.4M
  case OP_TYPEPLUS:
1089
35.3M
  case OP_TYPEMINPLUS:
1090
38.7M
  case OP_TYPEQUERY:
1091
42.2M
  case OP_TYPEMINQUERY:
1092
82.7M
  case OP_TYPEUPTO:
1093
87.0M
  case OP_TYPEMINUPTO:
1094
106M
  case OP_TYPEEXACT:
1095
111M
  case OP_TYPEPOSSTAR:
1096
116M
  case OP_TYPEPOSPLUS:
1097
134M
  case OP_TYPEPOSQUERY:
1098
154M
  case OP_TYPEPOSUPTO:
1099
154M
  return cc + PRIV(OP_lengths)[*cc] - 1;
1100
1101
0
  case OP_ANYBYTE:
1102
0
#ifdef SUPPORT_UNICODE
1103
0
  if (common->utf) return NULL;
1104
0
#endif
1105
0
  return cc + 1;
1106
1107
317k
  case OP_CALLOUT_STR:
1108
317k
  return cc + GET(cc, 1 + 2*LINK_SIZE);
1109
1110
0
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1111
288k
  case OP_ECLASS:
1112
74.3M
  case OP_XCLASS:
1113
74.3M
  SLJIT_COMPILE_ASSERT(OP_XCLASS + 1 == OP_ECLASS && OP_CLASS + 1 == OP_NCLASS && OP_NCLASS < OP_XCLASS, class_byte_code_order);
1114
74.3M
  return cc + GET(cc, 1);
1115
0
#endif
1116
1117
48.7k
  case OP_MARK:
1118
55.0k
  case OP_COMMIT_ARG:
1119
63.5k
  case OP_PRUNE_ARG:
1120
267k
  case OP_SKIP_ARG:
1121
273k
  case OP_THEN_ARG:
1122
273k
  return cc + 1 + 2 + cc[1];
1123
1124
0
  default:
1125
0
  SLJIT_UNREACHABLE();
1126
0
  return NULL;
1127
13.7G
  }
1128
13.7G
}
1129
1130
static sljit_s32 ref_update_local_size(compiler_common *common, PCRE2_SPTR cc, sljit_s32 current_locals_size)
1131
697k
{
1132
/* Depends on do_casefulcmp(), do_caselesscmp(), and compile_ref_matchingpath() */
1133
697k
int locals_size = 2 * SSIZE_OF(sw);
1134
697k
SLJIT_UNUSED_ARG(common);
1135
1136
697k
#ifdef SUPPORT_UNICODE
1137
697k
if ((*cc == OP_REFI || *cc == OP_DNREFI) && (common->utf || common->ucp))
1138
265k
  locals_size = 3 * SSIZE_OF(sw);
1139
697k
#endif
1140
1141
697k
cc += PRIV(OP_lengths)[*cc];
1142
/* Although do_casefulcmp() uses only one local, the allocate_stack()
1143
calls during the repeat destroys LOCAL1 variables. */
1144
697k
if (*cc >= OP_CRSTAR && *cc <= OP_CRPOSRANGE)
1145
320k
  locals_size += 2 * SSIZE_OF(sw);
1146
1147
697k
return (current_locals_size >= locals_size) ? current_locals_size : locals_size;
1148
697k
}
1149
1150
static SLJIT_INLINE BOOL is_optimized_cbracket(compiler_common *common, sljit_s32 capture_index)
1151
23.3M
{
1152
23.3M
sljit_u8 bit = (sljit_u8)(1 << (capture_index & 0x7));
1153
23.3M
return (common->optimized_cbrackets[capture_index >> 3] & bit) != 0;
1154
23.3M
}
1155
1156
static SLJIT_INLINE void clear_optimized_cbracket(compiler_common *common, sljit_s32 capture_index)
1157
928k
{
1158
928k
sljit_u8 mask = (sljit_u8)~(1 << (capture_index & 0x7));
1159
928k
common->optimized_cbrackets[capture_index >> 3] &= mask;
1160
928k
}
1161
1162
static BOOL check_opcode_types(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend)
1163
68.0k
{
1164
68.0k
int count;
1165
68.0k
PCRE2_SPTR slot;
1166
68.0k
PCRE2_SPTR assert_back_end = cc - 1;
1167
68.0k
PCRE2_SPTR assert_na_end = cc - 1;
1168
68.0k
sljit_s32 locals_size = 2 * SSIZE_OF(sw);
1169
68.0k
BOOL set_recursive_head = FALSE;
1170
68.0k
BOOL set_capture_last = FALSE;
1171
68.0k
BOOL set_mark = FALSE;
1172
1173
/* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
1174
143M
while (cc < ccend)
1175
143M
  {
1176
143M
  switch(*cc)
1177
143M
    {
1178
22.4k
    case OP_SET_SOM:
1179
22.4k
    common->has_set_som = TRUE;
1180
22.4k
    common->might_be_empty = TRUE;
1181
22.4k
    cc += 1;
1182
22.4k
    break;
1183
1184
94.4k
    case OP_TYPEUPTO:
1185
169k
    case OP_TYPEEXACT:
1186
169k
    if (cc[1 + IMM2_SIZE] == OP_EXTUNI && locals_size <= 3 * SSIZE_OF(sw))
1187
55.9k
      locals_size = 3 * SSIZE_OF(sw);
1188
169k
    cc += (2 + IMM2_SIZE) - 1;
1189
169k
    break;
1190
1191
88.7k
    case OP_TYPEPOSSTAR:
1192
155k
    case OP_TYPEPOSPLUS:
1193
289k
    case OP_TYPEPOSQUERY:
1194
289k
    if (cc[1] == OP_EXTUNI && locals_size <= 3 * SSIZE_OF(sw))
1195
18.2k
      locals_size = 3 * SSIZE_OF(sw);
1196
289k
    cc += 2 - 1;
1197
289k
    break;
1198
1199
47.8k
    case OP_TYPEPOSUPTO:
1200
47.8k
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1201
47.8k
    if (common->utf && locals_size <= 3 * SSIZE_OF(sw))
1202
17.4k
      locals_size = 3 * SSIZE_OF(sw);
1203
47.8k
#endif
1204
47.8k
    if (cc[1 + IMM2_SIZE] == OP_EXTUNI && locals_size <= 3 * SSIZE_OF(sw))
1205
17.9k
      locals_size = 3 * SSIZE_OF(sw);
1206
47.8k
    cc += (2 + IMM2_SIZE) - 1;
1207
47.8k
    break;
1208
1209
299k
    case OP_REFI:
1210
697k
    case OP_REF:
1211
697k
    locals_size = ref_update_local_size(common, cc, locals_size);
1212
697k
    clear_optimized_cbracket(common, GET2(cc, 1));
1213
697k
    cc += PRIV(OP_lengths)[*cc];
1214
697k
    break;
1215
1216
1.94M
    case OP_ASSERT_NA:
1217
2.04M
    case OP_ASSERTBACK_NA:
1218
2.04M
    case OP_ASSERT_SCS:
1219
2.04M
    slot = bracketend(cc);
1220
2.04M
    if (slot > assert_na_end)
1221
2.03M
      assert_na_end = slot;
1222
2.04M
    cc += 1 + LINK_SIZE;
1223
2.04M
    break;
1224
1225
32.3k
    case OP_CBRAPOS:
1226
93.6k
    case OP_SCBRAPOS:
1227
93.6k
    clear_optimized_cbracket(common, GET2(cc, 1 + LINK_SIZE));
1228
93.6k
    cc += 1 + LINK_SIZE + IMM2_SIZE;
1229
93.6k
    break;
1230
1231
59.2k
    case OP_COND:
1232
67.6k
    case OP_SCOND:
1233
    /* Only AUTO_CALLOUT can insert this opcode. We do
1234
       not intend to support this case. */
1235
67.6k
    if (cc[1 + LINK_SIZE] == OP_CALLOUT || cc[1 + LINK_SIZE] == OP_CALLOUT_STR)
1236
1.06k
      return FALSE;
1237
66.5k
    cc += 1 + LINK_SIZE;
1238
66.5k
    break;
1239
1240
9.74k
    case OP_CREF:
1241
9.74k
    clear_optimized_cbracket(common, GET2(cc, 1));
1242
9.74k
    cc += 1 + IMM2_SIZE;
1243
9.74k
    break;
1244
1245
0
    case OP_DNREFI:
1246
0
    case OP_DNREF:
1247
0
    locals_size = ref_update_local_size(common, cc, locals_size);
1248
    /* Fall through */
1249
17.2k
    case OP_DNCREF:
1250
17.2k
    count = GET2(cc, 1 + IMM2_SIZE);
1251
17.2k
    slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
1252
144k
    while (count-- > 0)
1253
127k
      {
1254
127k
      clear_optimized_cbracket(common, GET2(slot, 0));
1255
127k
      slot += common->name_entry_size;
1256
127k
      }
1257
17.2k
    cc += PRIV(OP_lengths)[*cc];
1258
17.2k
    break;
1259
1260
583k
    case OP_RECURSE:
1261
    /* Set its value only once. */
1262
583k
    set_recursive_head = TRUE;
1263
583k
    cc += 1 + LINK_SIZE;
1264
583k
    while (*cc == OP_CREF)
1265
557
      {
1266
557
      clear_optimized_cbracket(common, GET2(cc, 1));
1267
557
      cc += 1 + IMM2_SIZE;
1268
557
      }
1269
583k
    break;
1270
1271
7.88M
    case OP_CALLOUT:
1272
7.89M
    case OP_CALLOUT_STR:
1273
7.89M
    set_capture_last = TRUE;
1274
7.89M
    cc += (*cc == OP_CALLOUT) ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2*LINK_SIZE);
1275
7.89M
    break;
1276
1277
248k
    case OP_ASSERTBACK:
1278
248k
    slot = bracketend(cc);
1279
248k
    if (slot > assert_back_end)
1280
241k
      assert_back_end = slot;
1281
248k
    cc += 1 + LINK_SIZE;
1282
248k
    break;
1283
1284
4.72k
    case OP_THEN_ARG:
1285
4.72k
    common->has_then = TRUE;
1286
4.72k
    common->control_head_ptr = 1;
1287
    /* Fall through. */
1288
1289
17.1k
    case OP_COMMIT_ARG:
1290
26.2k
    case OP_PRUNE_ARG:
1291
75.1k
    case OP_MARK:
1292
75.1k
    set_mark = TRUE;
1293
75.1k
    cc += 1 + 2 + cc[1];
1294
75.1k
    break;
1295
1296
229k
    case OP_THEN:
1297
229k
    common->has_then = TRUE;
1298
229k
    common->control_head_ptr = 1;
1299
229k
    cc += 1;
1300
229k
    break;
1301
1302
194k
    case OP_SKIP:
1303
194k
    if (cc < assert_back_end)
1304
4.67k
      common->has_skip_in_assert_back = TRUE;
1305
194k
    cc += 1;
1306
194k
    break;
1307
1308
53.2k
    case OP_SKIP_ARG:
1309
53.2k
    common->control_head_ptr = 1;
1310
53.2k
    common->has_skip_arg = TRUE;
1311
53.2k
    if (cc < assert_back_end)
1312
47.6k
      common->has_skip_in_assert_back = TRUE;
1313
53.2k
    cc += 1 + 2 + cc[1];
1314
53.2k
    break;
1315
1316
48.4k
    case OP_ASSERT_ACCEPT:
1317
48.4k
    if (cc < assert_na_end)
1318
4.62k
      return FALSE;
1319
43.8k
    cc++;
1320
43.8k
    break;
1321
1322
0
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1323
41.3k
    case OP_CRPOSRANGE:
1324
    /* The second value can be 0 for infinite repeats. */
1325
41.3k
    if (common->utf && GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE) && locals_size <= 3 * SSIZE_OF(sw))
1326
9.53k
      locals_size = 3 * SSIZE_OF(sw);
1327
41.3k
    cc += 1 + 2 * IMM2_SIZE;
1328
41.3k
    break;
1329
1330
66.6k
    case OP_POSUPTO:
1331
87.9k
    case OP_POSUPTOI:
1332
92.4k
    case OP_NOTPOSUPTO:
1333
97.0k
    case OP_NOTPOSUPTOI:
1334
97.0k
    if (common->utf && locals_size <= 3 * SSIZE_OF(sw))
1335
19.7k
      locals_size = 3 * SSIZE_OF(sw);
1336
97.0k
#endif
1337
    /* Fall through */
1338
130M
    default:
1339
130M
    cc = next_opcode(common, cc);
1340
130M
    if (cc == NULL)
1341
0
      return FALSE;
1342
130M
    break;
1343
143M
    }
1344
143M
  }
1345
1346
62.3k
SLJIT_ASSERT((locals_size & (SSIZE_OF(sw) - 1)) == 0);
1347
#if defined SLJIT_DEBUG && SLJIT_DEBUG
1348
common->locals_size = locals_size;
1349
#endif
1350
1351
62.3k
if (locals_size > 0)
1352
62.3k
  common->ovector_start += locals_size;
1353
1354
62.3k
if (set_mark)
1355
901
  {
1356
901
  SLJIT_ASSERT(common->mark_ptr == 0);
1357
901
  common->mark_ptr = common->ovector_start;
1358
901
  common->ovector_start += sizeof(sljit_sw);
1359
901
  }
1360
1361
62.3k
if (set_recursive_head)
1362
6.94k
  {
1363
6.94k
  SLJIT_ASSERT(common->recursive_head_ptr == 0);
1364
6.94k
  common->recursive_head_ptr = common->ovector_start;
1365
6.94k
  common->ovector_start += sizeof(sljit_sw);
1366
6.94k
  }
1367
1368
62.3k
if (set_capture_last)
1369
9.25k
  {
1370
9.25k
  SLJIT_ASSERT(common->capture_last_ptr == 0);
1371
9.25k
  common->capture_last_ptr = common->ovector_start;
1372
9.25k
  common->ovector_start += sizeof(sljit_sw);
1373
9.25k
  }
1374
1375
62.3k
return TRUE;
1376
68.0k
}
1377
1378
334k
#define EARLY_FAIL_ENHANCE_MAX (3 + 3)
1379
1380
/*
1381
  Start represent the number of allowed early fail enhancements
1382
1383
  The 0-2 values has a special meaning:
1384
    0 - skip is allowed for all iterators
1385
    1 - fail is allowed for all iterators
1386
    2 - fail is allowed for greedy iterators
1387
    3 - only ranged early fail is allowed
1388
  >3 - (start - 3) number of remaining ranged early fails allowed
1389
1390
return: the updated value of start
1391
*/
1392
static int detect_early_fail(compiler_common *common, PCRE2_SPTR cc,
1393
   int *private_data_start, sljit_s32 depth, int start)
1394
244k
{
1395
244k
PCRE2_SPTR begin = cc;
1396
244k
PCRE2_SPTR next_alt;
1397
244k
PCRE2_SPTR end;
1398
244k
PCRE2_SPTR accelerated_start;
1399
244k
int result = 0;
1400
244k
int count, prev_count;
1401
1402
244k
SLJIT_ASSERT(*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA);
1403
244k
SLJIT_ASSERT(*cc != OP_CBRA || is_optimized_cbracket(common, GET2(cc, 1 + LINK_SIZE)));
1404
244k
SLJIT_ASSERT(start < EARLY_FAIL_ENHANCE_MAX);
1405
1406
244k
next_alt = cc + GET(cc, 1);
1407
244k
if (*next_alt == OP_ALT && start < 1)
1408
9.06k
  start = 1;
1409
1410
244k
do
1411
417k
  {
1412
417k
  count = start;
1413
417k
  cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0);
1414
1415
4.95M
  while (TRUE)
1416
4.95M
    {
1417
4.95M
    accelerated_start = NULL;
1418
1419
4.95M
    switch(*cc)
1420
4.95M
      {
1421
2.07k
      case OP_SOD:
1422
3.72k
      case OP_SOM:
1423
5.98k
      case OP_SET_SOM:
1424
9.41k
      case OP_NOT_WORD_BOUNDARY:
1425
19.2k
      case OP_WORD_BOUNDARY:
1426
26.0k
      case OP_EODN:
1427
32.9k
      case OP_EOD:
1428
45.4k
      case OP_CIRC:
1429
48.6k
      case OP_CIRCM:
1430
65.2k
      case OP_DOLL:
1431
66.2k
      case OP_DOLLM:
1432
67.5k
      case OP_NOT_UCP_WORD_BOUNDARY:
1433
72.4k
      case OP_UCP_WORD_BOUNDARY:
1434
      /* Zero width assertions. */
1435
72.4k
      cc++;
1436
72.4k
      continue;
1437
1438
18.5k
      case OP_NOT_DIGIT:
1439
30.4k
      case OP_DIGIT:
1440
44.8k
      case OP_NOT_WHITESPACE:
1441
58.2k
      case OP_WHITESPACE:
1442
68.1k
      case OP_NOT_WORDCHAR:
1443
77.4k
      case OP_WORDCHAR:
1444
91.3k
      case OP_ANY:
1445
94.5k
      case OP_ALLANY:
1446
94.5k
      case OP_ANYBYTE:
1447
102k
      case OP_NOT_HSPACE:
1448
112k
      case OP_HSPACE:
1449
114k
      case OP_NOT_VSPACE:
1450
121k
      case OP_VSPACE:
1451
121k
      if (count < 1)
1452
1.44k
        count = 1;
1453
121k
      cc++;
1454
121k
      continue;
1455
1456
11.2k
      case OP_ANYNL:
1457
27.4k
      case OP_EXTUNI:
1458
27.4k
      if (count < 3)
1459
989
        count = 3;
1460
27.4k
      cc++;
1461
27.4k
      continue;
1462
1463
9.22k
      case OP_NOTPROP:
1464
22.3k
      case OP_PROP:
1465
22.3k
      if (count < 1)
1466
223
        count = 1;
1467
22.3k
      cc += 1 + 2;
1468
22.3k
      continue;
1469
1470
2.86M
      case OP_CHAR:
1471
3.71M
      case OP_CHARI:
1472
3.72M
      case OP_NOT:
1473
3.73M
      case OP_NOTI:
1474
3.73M
      if (count < 1)
1475
14.8k
        count = 1;
1476
3.73M
      cc += 2;
1477
3.73M
#ifdef SUPPORT_UNICODE
1478
3.73M
      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1479
3.73M
#endif
1480
3.73M
      continue;
1481
1482
2.12k
      case OP_TYPEMINSTAR:
1483
6.76k
      case OP_TYPEMINPLUS:
1484
6.76k
      if (count == 2)
1485
0
        count = 3;
1486
      /* Fall through */
1487
1488
15.7k
      case OP_TYPESTAR:
1489
34.7k
      case OP_TYPEPLUS:
1490
39.9k
      case OP_TYPEPOSSTAR:
1491
46.2k
      case OP_TYPEPOSPLUS:
1492
      /* The type or prop opcode is skipped in the next iteration. */
1493
46.2k
      cc += 1;
1494
1495
46.2k
      if (cc[0] != OP_ANYNL && cc[0] != OP_EXTUNI)
1496
30.5k
        {
1497
30.5k
        accelerated_start = cc - 1;
1498
30.5k
        break;
1499
30.5k
        }
1500
1501
15.7k
      if (count < 3)
1502
1.01k
        count = 3;
1503
15.7k
      continue;
1504
1505
5.61k
      case OP_TYPEEXACT:
1506
5.61k
      if (count < 1)
1507
538
        count = 1;
1508
5.61k
      cc += 1 + IMM2_SIZE;
1509
5.61k
      continue;
1510
1511
2.21k
      case OP_TYPEUPTO:
1512
2.80k
      case OP_TYPEMINUPTO:
1513
7.49k
      case OP_TYPEPOSUPTO:
1514
7.49k
      cc += IMM2_SIZE;
1515
      /* Fall through */
1516
1517
22.9k
      case OP_TYPEQUERY:
1518
30.7k
      case OP_TYPEMINQUERY:
1519
42.0k
      case OP_TYPEPOSQUERY:
1520
      /* The type or prop opcode is skipped in the next iteration. */
1521
42.0k
      if (count < 3)
1522
7.66k
        count = 3;
1523
42.0k
      cc += 1;
1524
42.0k
      continue;
1525
1526
1.29k
      case OP_MINSTAR:
1527
2.30k
      case OP_MINPLUS:
1528
3.23k
      case OP_MINSTARI:
1529
4.02k
      case OP_MINPLUSI:
1530
4.28k
      case OP_NOTMINSTAR:
1531
4.78k
      case OP_NOTMINPLUS:
1532
5.07k
      case OP_NOTMINSTARI:
1533
5.43k
      case OP_NOTMINPLUSI:
1534
5.43k
      if (count == 2)
1535
0
        count = 3;
1536
      /* Fall through */
1537
1538
8.93k
      case OP_STAR:
1539
12.0k
      case OP_PLUS:
1540
24.2k
      case OP_POSSTAR:
1541
36.0k
      case OP_POSPLUS:
1542
1543
37.1k
      case OP_STARI:
1544
38.1k
      case OP_PLUSI:
1545
40.7k
      case OP_POSSTARI:
1546
43.2k
      case OP_POSPLUSI:
1547
1548
44.9k
      case OP_NOTSTAR:
1549
47.7k
      case OP_NOTPLUS:
1550
48.4k
      case OP_NOTPOSSTAR:
1551
48.7k
      case OP_NOTPOSPLUS:
1552
1553
49.1k
      case OP_NOTSTARI:
1554
49.7k
      case OP_NOTPLUSI:
1555
50.0k
      case OP_NOTPOSSTARI:
1556
50.3k
      case OP_NOTPOSPLUSI:
1557
50.3k
      accelerated_start = cc;
1558
50.3k
      cc += 2;
1559
50.3k
#ifdef SUPPORT_UNICODE
1560
50.3k
      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1561
50.3k
#endif
1562
50.3k
      break;
1563
1564
14.7k
      case OP_EXACT:
1565
14.7k
      if (count < 1)
1566
81
        count = 1;
1567
14.7k
      cc += 2 + IMM2_SIZE;
1568
14.7k
#ifdef SUPPORT_UNICODE
1569
14.7k
      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1570
14.7k
#endif
1571
14.7k
      continue;
1572
1573
3.30k
      case OP_UPTO:
1574
4.60k
      case OP_MINUPTO:
1575
14.8k
      case OP_POSUPTO:
1576
16.4k
      case OP_UPTOI:
1577
18.0k
      case OP_MINUPTOI:
1578
23.5k
      case OP_EXACTI:
1579
28.1k
      case OP_POSUPTOI:
1580
34.5k
      case OP_NOTUPTO:
1581
35.6k
      case OP_NOTMINUPTO:
1582
44.3k
      case OP_NOTEXACT:
1583
46.5k
      case OP_NOTPOSUPTO:
1584
49.0k
      case OP_NOTUPTOI:
1585
50.1k
      case OP_NOTMINUPTOI:
1586
57.8k
      case OP_NOTEXACTI:
1587
61.6k
      case OP_NOTPOSUPTOI:
1588
61.6k
      cc += IMM2_SIZE;
1589
      /* Fall through */
1590
1591
84.4k
      case OP_QUERY:
1592
90.0k
      case OP_MINQUERY:
1593
130k
      case OP_POSQUERY:
1594
135k
      case OP_QUERYI:
1595
140k
      case OP_MINQUERYI:
1596
148k
      case OP_POSQUERYI:
1597
151k
      case OP_NOTQUERY:
1598
152k
      case OP_NOTMINQUERY:
1599
155k
      case OP_NOTPOSQUERY:
1600
156k
      case OP_NOTQUERYI:
1601
157k
      case OP_NOTMINQUERYI:
1602
159k
      case OP_NOTPOSQUERYI:
1603
159k
      if (count < 3)
1604
10.4k
        count = 3;
1605
159k
      cc += 2;
1606
159k
#ifdef SUPPORT_UNICODE
1607
159k
      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1608
159k
#endif
1609
159k
      continue;
1610
1611
17.1k
      case OP_CLASS:
1612
24.2k
      case OP_NCLASS:
1613
24.2k
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1614
59.3k
      case OP_XCLASS:
1615
60.6k
      case OP_ECLASS:
1616
60.6k
      accelerated_start = cc;
1617
60.6k
      cc += (*cc >= OP_XCLASS) ? GET(cc, 1) : (unsigned int)(1 + (32 / sizeof(PCRE2_UCHAR)));
1618
#else
1619
      accelerated_start = cc;
1620
      cc += (1 + (32 / sizeof(PCRE2_UCHAR)));
1621
#endif
1622
1623
60.6k
      switch (*cc)
1624
60.6k
        {
1625
604
        case OP_CRMINSTAR:
1626
1.03k
        case OP_CRMINPLUS:
1627
1.03k
        if (count == 2)
1628
0
          count = 3;
1629
        /* Fall through */
1630
1631
2.40k
        case OP_CRSTAR:
1632
3.54k
        case OP_CRPLUS:
1633
5.36k
        case OP_CRPOSSTAR:
1634
6.47k
        case OP_CRPOSPLUS:
1635
6.47k
        cc++;
1636
6.47k
        break;
1637
1638
5.32k
        case OP_CRRANGE:
1639
6.02k
        case OP_CRMINRANGE:
1640
10.4k
        case OP_CRPOSRANGE:
1641
10.4k
        if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
1642
4.32k
          {
1643
          /* Exact repeat. */
1644
4.32k
          cc += 1 + 2 * IMM2_SIZE;
1645
4.32k
          if (count < 1)
1646
30
            count = 1;
1647
4.32k
          continue;
1648
4.32k
          }
1649
1650
6.12k
        cc += 2 * IMM2_SIZE;
1651
        /* Fall through */
1652
13.1k
        case OP_CRQUERY:
1653
14.6k
        case OP_CRMINQUERY:
1654
18.6k
        case OP_CRPOSQUERY:
1655
18.6k
        cc++;
1656
18.6k
        if (count < 3)
1657
2.72k
          count = 3;
1658
18.6k
        continue;
1659
1660
31.2k
        default:
1661
        /* No repeat. */
1662
31.2k
        if (count < 1)
1663
946
          count = 1;
1664
31.2k
        continue;
1665
60.6k
        }
1666
6.47k
      break;
1667
1668
44.6k
      case OP_BRA:
1669
200k
      case OP_CBRA:
1670
200k
      prev_count = count;
1671
200k
      if (count < 1)
1672
2.80k
        count = 1;
1673
1674
200k
      if (depth >= 4)
1675
75
        break;
1676
1677
200k
      if (count < 3 && cc[GET(cc, 1)] == OP_ALT)
1678
2.42k
        count = 3;
1679
1680
200k
      end = bracketend(cc);
1681
200k
      if (end[-1 - LINK_SIZE] != OP_KET || (*cc == OP_CBRA && !is_optimized_cbracket(common, GET2(cc, 1 + LINK_SIZE))))
1682
2.07k
        break;
1683
1684
197k
      prev_count = detect_early_fail(common, cc, private_data_start, depth + 1, prev_count);
1685
1686
197k
      if (prev_count > count)
1687
6.49k
        count = prev_count;
1688
1689
197k
      if (PRIVATE_DATA(cc) != 0)
1690
4.81k
        common->private_data_ptrs[begin - common->start] = 1;
1691
1692
197k
      if (count < EARLY_FAIL_ENHANCE_MAX)
1693
194k
        {
1694
194k
        cc = end;
1695
194k
        continue;
1696
194k
        }
1697
3.21k
      break;
1698
1699
219k
      case OP_KET:
1700
219k
      SLJIT_ASSERT(PRIVATE_DATA(cc) == 0);
1701
219k
      if (cc >= next_alt)
1702
219k
        break;
1703
0
      cc += 1 + LINK_SIZE;
1704
0
      continue;
1705
4.95M
      }
1706
1707
490k
    if (accelerated_start == NULL)
1708
402k
      break;
1709
1710
87.4k
    if (count == 0)
1711
8.41k
      {
1712
8.41k
      common->fast_forward_bc_ptr = accelerated_start;
1713
8.41k
      common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_skip;
1714
8.41k
      *private_data_start += sizeof(sljit_sw);
1715
8.41k
      count = 4;
1716
8.41k
      }
1717
79.0k
    else if (count < 3)
1718
35.7k
      {
1719
35.7k
      common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail;
1720
1721
35.7k
      if (common->early_fail_start_ptr == 0)
1722
8.19k
        common->early_fail_start_ptr = *private_data_start;
1723
1724
35.7k
      *private_data_start += sizeof(sljit_sw);
1725
35.7k
      common->early_fail_end_ptr = *private_data_start;
1726
1727
35.7k
      if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1728
0
        return EARLY_FAIL_ENHANCE_MAX;
1729
1730
35.7k
      count = 4;
1731
35.7k
      }
1732
43.2k
    else
1733
43.2k
      {
1734
43.2k
      common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail_range;
1735
1736
43.2k
      if (common->early_fail_start_ptr == 0)
1737
4.48k
        common->early_fail_start_ptr = *private_data_start;
1738
1739
43.2k
      *private_data_start += 2 * sizeof(sljit_sw);
1740
43.2k
      common->early_fail_end_ptr = *private_data_start;
1741
1742
43.2k
      if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1743
0
        return EARLY_FAIL_ENHANCE_MAX;
1744
1745
43.2k
      count++;
1746
43.2k
      }
1747
1748
    /* Cannot be part of a repeat. */
1749
87.4k
    common->private_data_ptrs[begin - common->start] = 1;
1750
1751
87.4k
    if (count >= EARLY_FAIL_ENHANCE_MAX)
1752
14.0k
      break;
1753
87.4k
    }
1754
1755
417k
  if (*cc != OP_ALT && *cc != OP_KET)
1756
49.3k
    result = EARLY_FAIL_ENHANCE_MAX;
1757
367k
  else if (result < count)
1758
223k
    result = count;
1759
1760
417k
  cc = next_alt;
1761
417k
  next_alt = cc + GET(cc, 1);
1762
417k
  }
1763
417k
while (*cc == OP_ALT);
1764
1765
244k
return result;
1766
244k
}
1767
1768
static int get_class_iterator_size(PCRE2_SPTR cc)
1769
2.23M
{
1770
2.23M
sljit_u32 min;
1771
2.23M
sljit_u32 max;
1772
2.23M
switch(*cc)
1773
2.23M
  {
1774
41.8k
  case OP_CRSTAR:
1775
618k
  case OP_CRPLUS:
1776
618k
  return 2;
1777
1778
16.9k
  case OP_CRMINSTAR:
1779
111k
  case OP_CRMINPLUS:
1780
233k
  case OP_CRQUERY:
1781
311k
  case OP_CRMINQUERY:
1782
311k
  return 1;
1783
1784
139k
  case OP_CRRANGE:
1785
259k
  case OP_CRMINRANGE:
1786
259k
  min = GET2(cc, 1);
1787
259k
  max = GET2(cc, 1 + IMM2_SIZE);
1788
259k
  if (max == 0)
1789
31.4k
    return (*cc == OP_CRRANGE) ? 2 : 1;
1790
228k
  max -= min;
1791
228k
  if (max > (sljit_u32)(*cc == OP_CRRANGE ? 0 : 1))
1792
165k
    max = 2;
1793
228k
  return max;
1794
1795
1.04M
  default:
1796
1.04M
  return 0;
1797
2.23M
  }
1798
2.23M
}
1799
1800
static BOOL detect_repeat(compiler_common *common, PCRE2_SPTR begin)
1801
2.29M
{
1802
2.29M
PCRE2_SPTR end = bracketend(begin);
1803
2.29M
PCRE2_SPTR next;
1804
2.29M
PCRE2_SPTR next_end;
1805
2.29M
PCRE2_SPTR max_end;
1806
2.29M
PCRE2_UCHAR type;
1807
2.29M
sljit_sw length = end - begin;
1808
2.29M
sljit_s32 min, max, i;
1809
1810
/* Detect fixed iterations first. */
1811
2.29M
if (end[-(1 + LINK_SIZE)] != OP_KET || PRIVATE_DATA(begin) != 0)
1812
96.0k
  return FALSE;
1813
1814
/* /(?:AB){4,6}/ is currently converted to /(?:AB){3}(?AB){1,3}/
1815
 * Skip the check of the second part. */
1816
2.19M
if (PRIVATE_DATA(end - LINK_SIZE) != 0)
1817
1.42k
  return TRUE;
1818
1819
2.19M
next = end;
1820
2.19M
min = 1;
1821
2.76M
while (1)
1822
2.76M
  {
1823
2.76M
  if (*next != *begin)
1824
2.18M
    break;
1825
582k
  next_end = bracketend(next);
1826
582k
  if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
1827
12.3k
    break;
1828
570k
  next = next_end;
1829
570k
  min++;
1830
570k
  }
1831
1832
2.19M
if (min == 2)
1833
3.70k
  return FALSE;
1834
1835
2.18M
max = 0;
1836
2.18M
max_end = next;
1837
2.18M
if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
1838
156k
  {
1839
156k
  type = *next;
1840
24.2M
  while (1)
1841
24.2M
    {
1842
24.2M
    if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
1843
53.0k
      break;
1844
24.1M
    next_end = bracketend(next + 2 + LINK_SIZE);
1845
24.1M
    if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
1846
103k
      break;
1847
24.0M
    next = next_end;
1848
24.0M
    max++;
1849
24.0M
    }
1850
1851
156k
  if (next[0] == type && next[1] == *begin && max >= 1)
1852
81.0k
    {
1853
81.0k
    next_end = bracketend(next + 1);
1854
81.0k
    if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
1855
46.1k
      {
1856
239k
      for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
1857
193k
        if (*next_end != OP_KET)
1858
0
          break;
1859
1860
46.1k
      if (i == max)
1861
46.1k
        {
1862
        /* Patterns must fit into an int32 even for link-size=4. */
1863
46.1k
        common->private_data_ptrs[max_end - common->start - LINK_SIZE] = (sljit_s32)(next_end - max_end);
1864
46.1k
        common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
1865
        /* +2 the original and the last. */
1866
46.1k
        common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
1867
46.1k
        if (min == 1)
1868
44.7k
          return TRUE;
1869
1.42k
        min--;
1870
1.42k
        max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
1871
1.42k
        }
1872
46.1k
      }
1873
81.0k
    }
1874
156k
  }
1875
1876
2.14M
if (min >= 3)
1877
12.6k
  {
1878
12.6k
  common->private_data_ptrs[end - common->start - LINK_SIZE] = (sljit_s32)(max_end - end);
1879
12.6k
  common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1880
12.6k
  common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1881
12.6k
  return TRUE;
1882
12.6k
  }
1883
1884
2.13M
return FALSE;
1885
2.14M
}
1886
1887
#define CASE_ITERATOR_PRIVATE_DATA_1 \
1888
98.6k
    case OP_MINSTAR: \
1889
206k
    case OP_MINPLUS: \
1890
959k
    case OP_QUERY: \
1891
1.13M
    case OP_MINQUERY: \
1892
1.27M
    case OP_MINSTARI: \
1893
1.46M
    case OP_MINPLUSI: \
1894
1.67M
    case OP_QUERYI: \
1895
2.08M
    case OP_MINQUERYI: \
1896
2.10M
    case OP_NOTMINSTAR: \
1897
2.11M
    case OP_NOTMINPLUS: \
1898
2.12M
    case OP_NOTQUERY: \
1899
2.13M
    case OP_NOTMINQUERY: \
1900
2.14M
    case OP_NOTMINSTARI: \
1901
2.15M
    case OP_NOTMINPLUSI: \
1902
2.16M
    case OP_NOTQUERYI: \
1903
2.16M
    case OP_NOTMINQUERYI:
1904
1905
#define CASE_ITERATOR_PRIVATE_DATA_2A \
1906
435k
    case OP_STAR: \
1907
999k
    case OP_PLUS: \
1908
1.14M
    case OP_STARI: \
1909
1.34M
    case OP_PLUSI: \
1910
1.39M
    case OP_NOTSTAR: \
1911
1.42M
    case OP_NOTPLUS: \
1912
1.44M
    case OP_NOTSTARI: \
1913
1.45M
    case OP_NOTPLUSI:
1914
1915
#define CASE_ITERATOR_PRIVATE_DATA_2B \
1916
63.5k
    case OP_UPTO: \
1917
90.6k
    case OP_MINUPTO: \
1918
126k
    case OP_UPTOI: \
1919
166k
    case OP_MINUPTOI: \
1920
205k
    case OP_NOTUPTO: \
1921
212k
    case OP_NOTMINUPTO: \
1922
219k
    case OP_NOTUPTOI: \
1923
241k
    case OP_NOTMINUPTOI:
1924
1925
#define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1926
142k
    case OP_TYPEMINSTAR: \
1927
444k
    case OP_TYPEMINPLUS: \
1928
736k
    case OP_TYPEQUERY: \
1929
862k
    case OP_TYPEMINQUERY:
1930
1931
#define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1932
423k
    case OP_TYPESTAR: \
1933
1.41M
    case OP_TYPEPLUS:
1934
1935
#define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1936
325k
    case OP_TYPEUPTO: \
1937
367k
    case OP_TYPEMINUPTO:
1938
1939
static void set_private_data_ptrs(compiler_common *common, int *private_data_start, PCRE2_SPTR ccend)
1940
62.3k
{
1941
62.3k
PCRE2_SPTR cc = common->start;
1942
62.3k
PCRE2_SPTR alternative;
1943
62.3k
PCRE2_SPTR end = NULL;
1944
62.3k
int private_data_ptr = *private_data_start;
1945
62.3k
int space, size, bracketlen;
1946
62.3k
BOOL repeat_check = TRUE;
1947
1948
125M
while (cc < ccend)
1949
125M
  {
1950
125M
  space = 0;
1951
125M
  size = 0;
1952
125M
  bracketlen = 0;
1953
125M
  if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1954
0
    break;
1955
1956
  /* When the bracket is prefixed by a zero iteration, skip the repeat check (at this point). */
1957
125M
  if (repeat_check && (*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
1958
2.29M
    {
1959
2.29M
    if (detect_repeat(common, cc))
1960
58.7k
      {
1961
      /* These brackets are converted to repeats, so no global
1962
      based single character repeat is allowed. */
1963
58.7k
      if (cc >= end)
1964
57.8k
        end = bracketend(cc);
1965
58.7k
      }
1966
2.29M
    }
1967
125M
  repeat_check = TRUE;
1968
1969
125M
  switch(*cc)
1970
125M
    {
1971
7.22M
    case OP_KET:
1972
7.22M
    if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1973
58.7k
      {
1974
58.7k
      common->private_data_ptrs[cc - common->start] = private_data_ptr;
1975
58.7k
      private_data_ptr += sizeof(sljit_sw);
1976
58.7k
      cc += common->private_data_ptrs[cc + 1 - common->start];
1977
58.7k
      }
1978
7.22M
    cc += 1 + LINK_SIZE;
1979
7.22M
    break;
1980
1981
329k
    case OP_ASSERT:
1982
1.00M
    case OP_ASSERT_NOT:
1983
1.24M
    case OP_ASSERTBACK:
1984
1.77M
    case OP_ASSERTBACK_NOT:
1985
3.70M
    case OP_ASSERT_NA:
1986
3.82M
    case OP_ONCE:
1987
4.20M
    case OP_SCRIPT_RUN:
1988
4.24M
    case OP_BRAPOS:
1989
4.31M
    case OP_SBRA:
1990
4.37M
    case OP_SBRAPOS:
1991
4.37M
    case OP_SCOND:
1992
4.37M
    common->private_data_ptrs[cc - common->start] = private_data_ptr;
1993
4.37M
    private_data_ptr += sizeof(sljit_sw);
1994
4.37M
    bracketlen = 1 + LINK_SIZE;
1995
4.37M
    break;
1996
1997
103k
    case OP_ASSERTBACK_NA:
1998
103k
    common->private_data_ptrs[cc - common->start] = private_data_ptr;
1999
103k
    private_data_ptr += sizeof(sljit_sw);
2000
2001
103k
    if (find_vreverse(cc))
2002
77.2k
      {
2003
77.2k
      common->private_data_ptrs[cc + 1 - common->start] = 1;
2004
77.2k
      private_data_ptr += sizeof(sljit_sw);
2005
77.2k
      }
2006
2007
103k
    bracketlen = 1 + LINK_SIZE;
2008
103k
    break;
2009
2010
0
    case OP_ASSERT_SCS:
2011
0
    common->private_data_ptrs[cc - common->start] = private_data_ptr;
2012
0
    private_data_ptr += 2 * sizeof(sljit_sw);
2013
0
    bracketlen = 1 + LINK_SIZE;
2014
0
    break;
2015
2016
28.3k
    case OP_CBRAPOS:
2017
84.4k
    case OP_SCBRAPOS:
2018
84.4k
    common->private_data_ptrs[cc - common->start] = private_data_ptr;
2019
84.4k
    private_data_ptr += sizeof(sljit_sw);
2020
84.4k
    bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
2021
84.4k
    break;
2022
2023
39.6k
    case OP_COND:
2024
    /* Might be a hidden SCOND. */
2025
39.6k
    common->private_data_ptrs[cc - common->start] = 0;
2026
39.6k
    alternative = cc + GET(cc, 1);
2027
39.6k
    if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
2028
0
      {
2029
0
      common->private_data_ptrs[cc - common->start] = private_data_ptr;
2030
0
      private_data_ptr += sizeof(sljit_sw);
2031
0
      }
2032
39.6k
    bracketlen = 1 + LINK_SIZE;
2033
39.6k
    break;
2034
2035
1.04M
    case OP_BRA:
2036
1.04M
    bracketlen = 1 + LINK_SIZE;
2037
1.04M
    break;
2038
2039
2.03M
    case OP_CBRA:
2040
2.32M
    case OP_SCBRA:
2041
2.32M
    bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
2042
2.32M
    break;
2043
2044
854k
    case OP_BRAZERO:
2045
1.30M
    case OP_BRAMINZERO:
2046
1.32M
    case OP_BRAPOSZERO:
2047
1.32M
    size = 1;
2048
1.32M
    repeat_check = FALSE;
2049
1.32M
    break;
2050
2051
828k
    CASE_ITERATOR_PRIVATE_DATA_1
2052
828k
    size = -2;
2053
828k
    space = 1;
2054
828k
    break;
2055
2056
641k
    CASE_ITERATOR_PRIVATE_DATA_2A
2057
641k
    size = -2;
2058
641k
    space = 2;
2059
641k
    break;
2060
2061
56.6k
    CASE_ITERATOR_PRIVATE_DATA_2B
2062
56.6k
    size = -(2 + IMM2_SIZE);
2063
56.6k
    space = 2;
2064
56.6k
    break;
2065
2066
345k
    CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2067
345k
    size = 1;
2068
345k
    space = 1;
2069
345k
    break;
2070
2071
552k
    CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2072
552k
    size = 1;
2073
552k
    if (cc[1] != OP_EXTUNI)
2074
394k
      space = 2;
2075
552k
    break;
2076
2077
86.1k
    case OP_TYPEUPTO:
2078
86.1k
    size = 1 + IMM2_SIZE;
2079
86.1k
    if (cc[1 + IMM2_SIZE] != OP_EXTUNI)
2080
66.3k
      space = 2;
2081
86.1k
    break;
2082
2083
14.0k
    case OP_TYPEMINUPTO:
2084
14.0k
    size = 1 + IMM2_SIZE;
2085
14.0k
    space = 2;
2086
14.0k
    break;
2087
2088
175k
    case OP_CLASS:
2089
266k
    case OP_NCLASS:
2090
266k
    size = 1 + 32 / sizeof(PCRE2_UCHAR);
2091
266k
    space = get_class_iterator_size(cc + size);
2092
266k
    break;
2093
2094
0
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
2095
1.08M
    case OP_XCLASS:
2096
1.21M
    case OP_ECLASS:
2097
1.21M
    size = GET(cc, 1);
2098
1.21M
    space = get_class_iterator_size(cc + size);
2099
1.21M
    break;
2100
0
#endif
2101
2102
105M
    default:
2103
105M
    cc = next_opcode(common, cc);
2104
105M
    SLJIT_ASSERT(cc != NULL);
2105
105M
    break;
2106
125M
    }
2107
2108
  /* Character iterators, which are not inside a repeated bracket,
2109
     gets a private slot instead of allocating it on the stack. */
2110
125M
  if (space > 0 && cc >= end)
2111
2.58M
    {
2112
2.58M
    common->private_data_ptrs[cc - common->start] = private_data_ptr;
2113
2.58M
    private_data_ptr += sizeof(sljit_sw) * space;
2114
2.58M
    }
2115
2116
125M
  if (size != 0)
2117
5.33M
    {
2118
5.33M
    if (size < 0)
2119
1.52M
      {
2120
1.52M
      cc += -size;
2121
1.52M
#ifdef SUPPORT_UNICODE
2122
1.52M
      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2123
1.52M
#endif
2124
1.52M
      }
2125
3.80M
    else
2126
3.80M
      cc += size;
2127
5.33M
    }
2128
2129
125M
  if (bracketlen > 0)
2130
7.97M
    {
2131
7.97M
    if (cc >= end)
2132
7.80M
      {
2133
7.80M
      end = bracketend(cc);
2134
7.80M
      if (end[-1 - LINK_SIZE] == OP_KET)
2135
7.12M
        end = NULL;
2136
7.80M
      }
2137
7.97M
    cc += bracketlen;
2138
7.97M
    }
2139
125M
  }
2140
62.3k
*private_data_start = private_data_ptr;
2141
62.3k
}
2142
2143
static SLJIT_INLINE BOOL is_cbracket_processed(compiler_common *common, sljit_s32 capture_index)
2144
128M
{
2145
128M
sljit_u8 bit = (sljit_u8)(1 << (capture_index & 0x7));
2146
128M
sljit_u8 *ptr = common->cbracket_bitset + (capture_index >> 3);
2147
128M
sljit_u8 value = *ptr;
2148
2149
128M
*ptr |= bit;
2150
128M
return (value & bit) != 0;
2151
128M
}
2152
2153
/* Returns with a frame_types (always < 0) if no need for frame. */
2154
static int get_framesize(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, BOOL recursive, BOOL *needs_control_head)
2155
3.56M
{
2156
3.56M
int length = 0;
2157
3.56M
int possessive = 0;
2158
3.56M
int offset;
2159
3.56M
BOOL stack_restore = FALSE;
2160
3.56M
BOOL setsom_found = recursive;
2161
3.56M
BOOL setmark_found = recursive;
2162
/* The last capture is a local variable even for recursions. */
2163
3.56M
BOOL capture_last_found = FALSE;
2164
2165
#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2166
SLJIT_ASSERT(common->control_head_ptr != 0);
2167
*needs_control_head = TRUE;
2168
#else
2169
3.56M
*needs_control_head = FALSE;
2170
3.56M
#endif
2171
2172
3.56M
memset(common->cbracket_bitset, 0, common->cbracket_bitset_length);
2173
2174
3.56M
if (ccend == NULL)
2175
3.36M
  {
2176
3.36M
  ccend = bracketend(cc) - (1 + LINK_SIZE);
2177
3.36M
  if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
2178
120k
    {
2179
120k
    possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
2180
    /* This is correct regardless of common->capture_last_ptr. */
2181
120k
    capture_last_found = TRUE;
2182
120k
    }
2183
3.36M
  cc = next_opcode(common, cc);
2184
3.36M
  }
2185
2186
3.56M
SLJIT_ASSERT(cc != NULL);
2187
14.1G
while (cc < ccend)
2188
14.1G
  switch(*cc)
2189
14.1G
    {
2190
132k
    case OP_SET_SOM:
2191
132k
    SLJIT_ASSERT(common->has_set_som);
2192
132k
    stack_restore = TRUE;
2193
132k
    if (!setsom_found)
2194
1.23k
      {
2195
1.23k
      length += 2;
2196
1.23k
      setsom_found = TRUE;
2197
1.23k
      }
2198
132k
    cc += 1;
2199
132k
    break;
2200
2201
2.59M
    case OP_MARK:
2202
5.32M
    case OP_COMMIT_ARG:
2203
12.7M
    case OP_PRUNE_ARG:
2204
12.7M
    case OP_THEN_ARG:
2205
12.7M
    SLJIT_ASSERT(common->mark_ptr != 0);
2206
12.7M
    stack_restore = TRUE;
2207
12.7M
    if (!setmark_found)
2208
41.4k
      {
2209
41.4k
      length += 2;
2210
41.4k
      setmark_found = TRUE;
2211
41.4k
      }
2212
12.7M
    if (common->control_head_ptr != 0)
2213
1.68M
      *needs_control_head = TRUE;
2214
12.7M
    cc += 1 + 2 + cc[1];
2215
12.7M
    break;
2216
2217
667M
    case OP_RECURSE:
2218
667M
    stack_restore = TRUE;
2219
667M
    if (common->has_set_som && !setsom_found)
2220
10.3k
      {
2221
10.3k
      length += 2;
2222
10.3k
      setsom_found = TRUE;
2223
10.3k
      }
2224
667M
    if (common->mark_ptr != 0 && !setmark_found)
2225
26.9k
      {
2226
26.9k
      length += 2;
2227
26.9k
      setmark_found = TRUE;
2228
26.9k
      }
2229
667M
    if (common->capture_last_ptr != 0 && !capture_last_found)
2230
68.9k
      {
2231
68.9k
      length += 2;
2232
68.9k
      capture_last_found = TRUE;
2233
68.9k
      }
2234
2235
667M
    cc += 1 + LINK_SIZE;
2236
667M
    while (*cc == OP_CREF)
2237
26.2k
      {
2238
26.2k
      offset = GET2(cc, 1);
2239
26.2k
      if (!is_cbracket_processed(common, offset))
2240
1.67k
        length += 3;
2241
26.2k
      cc += 1 + IMM2_SIZE;
2242
26.2k
      }
2243
667M
    break;
2244
2245
106M
    case OP_CBRA:
2246
110M
    case OP_CBRAPOS:
2247
113M
    case OP_SCBRA:
2248
125M
    case OP_SCBRAPOS:
2249
125M
    stack_restore = TRUE;
2250
125M
    if (common->capture_last_ptr != 0 && !capture_last_found)
2251
92.6k
      {
2252
92.6k
      length += 2;
2253
92.6k
      capture_last_found = TRUE;
2254
92.6k
      }
2255
2256
125M
    offset = GET2(cc, 1 + LINK_SIZE);
2257
125M
    if (!is_cbracket_processed(common, offset))
2258
2.96M
      length += 3;
2259
125M
    cc += 1 + LINK_SIZE + IMM2_SIZE;
2260
125M
    break;
2261
2262
19.7M
    case OP_THEN:
2263
19.7M
    stack_restore = TRUE;
2264
19.7M
    if (common->control_head_ptr != 0)
2265
19.7M
      *needs_control_head = TRUE;
2266
19.7M
    cc ++;
2267
19.7M
    break;
2268
2269
2.94G
    default:
2270
2.94G
    stack_restore = TRUE;
2271
    /* Fall through. */
2272
2273
2.95G
    case OP_NOT_WORD_BOUNDARY:
2274
2.95G
    case OP_WORD_BOUNDARY:
2275
2.96G
    case OP_NOT_DIGIT:
2276
2.97G
    case OP_DIGIT:
2277
3.02G
    case OP_NOT_WHITESPACE:
2278
3.08G
    case OP_WHITESPACE:
2279
3.08G
    case OP_NOT_WORDCHAR:
2280
3.11G
    case OP_WORDCHAR:
2281
3.12G
    case OP_ANY:
2282
3.12G
    case OP_ALLANY:
2283
3.12G
    case OP_ANYBYTE:
2284
3.16G
    case OP_NOTPROP:
2285
3.24G
    case OP_PROP:
2286
3.31G
    case OP_ANYNL:
2287
3.32G
    case OP_NOT_HSPACE:
2288
3.32G
    case OP_HSPACE:
2289
3.33G
    case OP_NOT_VSPACE:
2290
3.33G
    case OP_VSPACE:
2291
3.40G
    case OP_EXTUNI:
2292
3.42G
    case OP_EODN:
2293
3.42G
    case OP_EOD:
2294
3.43G
    case OP_CIRC:
2295
3.43G
    case OP_CIRCM:
2296
3.49G
    case OP_DOLL:
2297
3.49G
    case OP_DOLLM:
2298
9.76G
    case OP_CHAR:
2299
12.5G
    case OP_CHARI:
2300
12.5G
    case OP_NOT:
2301
12.5G
    case OP_NOTI:
2302
2303
12.6G
    case OP_EXACT:
2304
12.6G
    case OP_POSSTAR:
2305
12.6G
    case OP_POSPLUS:
2306
12.6G
    case OP_POSQUERY:
2307
12.7G
    case OP_POSUPTO:
2308
2309
12.7G
    case OP_EXACTI:
2310
12.7G
    case OP_POSSTARI:
2311
12.7G
    case OP_POSPLUSI:
2312
12.7G
    case OP_POSQUERYI:
2313
12.7G
    case OP_POSUPTOI:
2314
2315
12.7G
    case OP_NOTEXACT:
2316
12.7G
    case OP_NOTPOSSTAR:
2317
12.7G
    case OP_NOTPOSPLUS:
2318
12.7G
    case OP_NOTPOSQUERY:
2319
12.7G
    case OP_NOTPOSUPTO:
2320
2321
12.7G
    case OP_NOTEXACTI:
2322
12.7G
    case OP_NOTPOSSTARI:
2323
12.7G
    case OP_NOTPOSPLUSI:
2324
12.7G
    case OP_NOTPOSQUERYI:
2325
12.7G
    case OP_NOTPOSUPTOI:
2326
2327
12.7G
    case OP_TYPEEXACT:
2328
12.7G
    case OP_TYPEPOSSTAR:
2329
12.7G
    case OP_TYPEPOSPLUS:
2330
12.7G
    case OP_TYPEPOSQUERY:
2331
12.8G
    case OP_TYPEPOSUPTO:
2332
2333
12.8G
    case OP_CLASS:
2334
12.8G
    case OP_NCLASS:
2335
12.8G
    case OP_XCLASS:
2336
12.8G
    case OP_ECLASS:
2337
2338
13.2G
    case OP_CALLOUT:
2339
13.2G
    case OP_CALLOUT_STR:
2340
2341
13.2G
    case OP_NOT_UCP_WORD_BOUNDARY:
2342
13.2G
    case OP_UCP_WORD_BOUNDARY:
2343
2344
13.2G
    cc = next_opcode(common, cc);
2345
13.2G
    SLJIT_ASSERT(cc != NULL);
2346
13.2G
    break;
2347
14.1G
    }
2348
2349
/* Possessive quantifiers can use a special case. */
2350
3.56M
if (SLJIT_UNLIKELY(possessive == length))
2351
2.16M
  return stack_restore ? no_frame : no_stack;
2352
2353
1.40M
if (length > 0)
2354
1.40M
  return length + 1;
2355
0
return stack_restore ? no_frame : no_stack;
2356
1.40M
}
2357
2358
static void init_frame(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, int stackpos, int stacktop)
2359
1.13M
{
2360
1.13M
DEFINE_COMPILER;
2361
1.13M
BOOL setsom_found = FALSE;
2362
1.13M
BOOL setmark_found = FALSE;
2363
/* The last capture is a local variable even for recursions. */
2364
1.13M
BOOL capture_last_found = FALSE;
2365
1.13M
int offset;
2366
2367
/* >= 1 + shortest item size (2) */
2368
1.13M
SLJIT_UNUSED_ARG(stacktop);
2369
1.13M
SLJIT_ASSERT(stackpos >= stacktop + 2);
2370
2371
1.13M
memset(common->cbracket_bitset, 0, common->cbracket_bitset_length);
2372
2373
1.13M
stackpos = STACK(stackpos);
2374
1.13M
if (ccend == NULL)
2375
1.05M
  {
2376
1.05M
  ccend = bracketend(cc) - (1 + LINK_SIZE);
2377
1.05M
  if (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS)
2378
1.04M
    cc = next_opcode(common, cc);
2379
1.05M
  }
2380
2381
/* The data is restored by do_revertframes(). */
2382
1.13M
SLJIT_ASSERT(cc != NULL);
2383
57.2M
while (cc < ccend)
2384
56.1M
  switch(*cc)
2385
56.1M
    {
2386
2.20k
    case OP_SET_SOM:
2387
2.20k
    SLJIT_ASSERT(common->has_set_som);
2388
2.20k
    if (!setsom_found)
2389
1.23k
      {
2390
1.23k
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
2391
1.23k
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
2392
1.23k
      stackpos -= SSIZE_OF(sw);
2393
1.23k
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2394
1.23k
      stackpos -= SSIZE_OF(sw);
2395
1.23k
      setsom_found = TRUE;
2396
1.23k
      }
2397
2.20k
    cc += 1;
2398
2.20k
    break;
2399
2400
34.1k
    case OP_MARK:
2401
42.1k
    case OP_COMMIT_ARG:
2402
58.3k
    case OP_PRUNE_ARG:
2403
62.2k
    case OP_THEN_ARG:
2404
62.2k
    SLJIT_ASSERT(common->mark_ptr != 0);
2405
62.2k
    if (!setmark_found)
2406
41.4k
      {
2407
41.4k
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2408
41.4k
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
2409
41.4k
      stackpos -= SSIZE_OF(sw);
2410
41.4k
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2411
41.4k
      stackpos -= SSIZE_OF(sw);
2412
41.4k
      setmark_found = TRUE;
2413
41.4k
      }
2414
62.2k
    cc += 1 + 2 + cc[1];
2415
62.2k
    break;
2416
2417
315k
    case OP_RECURSE:
2418
315k
    if (common->has_set_som && !setsom_found)
2419
10.3k
      {
2420
10.3k
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
2421
10.3k
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
2422
10.3k
      stackpos -= SSIZE_OF(sw);
2423
10.3k
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2424
10.3k
      stackpos -= SSIZE_OF(sw);
2425
10.3k
      setsom_found = TRUE;
2426
10.3k
      }
2427
315k
    if (common->mark_ptr != 0 && !setmark_found)
2428
26.9k
      {
2429
26.9k
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2430
26.9k
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
2431
26.9k
      stackpos -= SSIZE_OF(sw);
2432
26.9k
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2433
26.9k
      stackpos -= SSIZE_OF(sw);
2434
26.9k
      setmark_found = TRUE;
2435
26.9k
      }
2436
315k
    if (common->capture_last_ptr != 0 && !capture_last_found)
2437
9.68k
      {
2438
9.68k
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
2439
9.68k
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
2440
9.68k
      stackpos -= SSIZE_OF(sw);
2441
9.68k
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2442
9.68k
      stackpos -= SSIZE_OF(sw);
2443
9.68k
      capture_last_found = TRUE;
2444
9.68k
      }
2445
315k
    cc += 1 + LINK_SIZE;
2446
316k
    while (*cc == OP_CREF)
2447
802
      {
2448
802
      offset = GET2(cc, 1);
2449
802
      if (!is_cbracket_processed(common, offset))
2450
353
        {
2451
353
        offset <<= 1;
2452
353
        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
2453
353
        stackpos -= SSIZE_OF(sw);
2454
353
        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
2455
353
        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
2456
353
        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2457
353
        stackpos -= SSIZE_OF(sw);
2458
353
        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
2459
353
        stackpos -= SSIZE_OF(sw);
2460
353
        }
2461
802
      cc += 1 + IMM2_SIZE;
2462
802
      }
2463
315k
    break;
2464
2465
2.01M
    case OP_CBRA:
2466
2.04M
    case OP_CBRAPOS:
2467
2.41M
    case OP_SCBRA:
2468
2.62M
    case OP_SCBRAPOS:
2469
2.62M
    if (common->capture_last_ptr != 0 && !capture_last_found)
2470
89.5k
      {
2471
89.5k
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
2472
89.5k
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
2473
89.5k
      stackpos -= SSIZE_OF(sw);
2474
89.5k
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2475
89.5k
      stackpos -= SSIZE_OF(sw);
2476
89.5k
      capture_last_found = TRUE;
2477
89.5k
      }
2478
2479
2.62M
    offset = GET2(cc, 1 + LINK_SIZE);
2480
2.62M
    if (!is_cbracket_processed(common, offset))
2481
2.32M
      {
2482
2.32M
      offset <<= 1;
2483
2.32M
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
2484
2.32M
      stackpos -= SSIZE_OF(sw);
2485
2.32M
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
2486
2.32M
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
2487
2.32M
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2488
2.32M
      stackpos -= SSIZE_OF(sw);
2489
2.32M
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
2490
2.32M
      stackpos -= SSIZE_OF(sw);
2491
2.32M
      }
2492
2493
2.62M
    cc += 1 + LINK_SIZE + IMM2_SIZE;
2494
2.62M
    break;
2495
2496
53.1M
    default:
2497
53.1M
    cc = next_opcode(common, cc);
2498
53.1M
    SLJIT_ASSERT(cc != NULL);
2499
53.1M
    break;
2500
56.1M
    }
2501
2502
1.13M
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
2503
1.13M
SLJIT_ASSERT(stackpos == STACK(stacktop));
2504
1.13M
}
2505
2506
14.3M
#define RECURSE_TMP_REG_COUNT 3
2507
2508
typedef struct delayed_mem_copy_status {
2509
  struct sljit_compiler *compiler;
2510
  int store_bases[RECURSE_TMP_REG_COUNT];
2511
  sljit_s32 store_offsets[RECURSE_TMP_REG_COUNT];
2512
  int tmp_regs[RECURSE_TMP_REG_COUNT];
2513
  int saved_tmp_regs[RECURSE_TMP_REG_COUNT];
2514
  int next_tmp_reg;
2515
} delayed_mem_copy_status;
2516
2517
static void delayed_mem_copy_init(delayed_mem_copy_status *status, compiler_common *common)
2518
43.9k
{
2519
43.9k
int i;
2520
2521
175k
for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
2522
131k
  {
2523
131k
  SLJIT_ASSERT(status->tmp_regs[i] >= 0);
2524
131k
  SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, status->saved_tmp_regs[i]) < 0 || status->tmp_regs[i] == status->saved_tmp_regs[i]);
2525
2526
131k
  status->store_bases[i] = -1;
2527
131k
  }
2528
43.9k
status->next_tmp_reg = 0;
2529
43.9k
status->compiler = common->compiler;
2530
43.9k
}
2531
2532
static void delayed_mem_copy_move(delayed_mem_copy_status *status, int load_base, sljit_sw load_offset,
2533
  int store_base, sljit_s32 store_offset)
2534
13.8M
{
2535
13.8M
struct sljit_compiler *compiler = status->compiler;
2536
13.8M
int next_tmp_reg = status->next_tmp_reg;
2537
13.8M
int tmp_reg = status->tmp_regs[next_tmp_reg];
2538
2539
13.8M
SLJIT_ASSERT(load_base > 0 && store_base > 0);
2540
2541
13.8M
if (status->store_bases[next_tmp_reg] == -1)
2542
109k
  {
2543
  /* Preserve virtual registers. */
2544
109k
  if (sljit_get_register_index(SLJIT_GP_REGISTER, status->saved_tmp_regs[next_tmp_reg]) < 0)
2545
0
    OP1(SLJIT_MOV, status->saved_tmp_regs[next_tmp_reg], 0, tmp_reg, 0);
2546
109k
  }
2547
13.7M
else
2548
13.7M
  OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
2549
2550
13.8M
OP1(SLJIT_MOV, tmp_reg, 0, SLJIT_MEM1(load_base), load_offset);
2551
13.8M
status->store_bases[next_tmp_reg] = store_base;
2552
13.8M
status->store_offsets[next_tmp_reg] = store_offset;
2553
2554
13.8M
status->next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
2555
13.8M
}
2556
2557
static void delayed_mem_copy_finish(delayed_mem_copy_status *status)
2558
43.9k
{
2559
43.9k
struct sljit_compiler *compiler = status->compiler;
2560
43.9k
int next_tmp_reg = status->next_tmp_reg;
2561
43.9k
int tmp_reg, saved_tmp_reg, i;
2562
2563
175k
for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
2564
131k
  {
2565
131k
  if (status->store_bases[next_tmp_reg] != -1)
2566
109k
    {
2567
109k
    tmp_reg = status->tmp_regs[next_tmp_reg];
2568
109k
    saved_tmp_reg = status->saved_tmp_regs[next_tmp_reg];
2569
2570
109k
    OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
2571
2572
    /* Restore virtual registers. */
2573
109k
    if (sljit_get_register_index(SLJIT_GP_REGISTER, saved_tmp_reg) < 0)
2574
0
      OP1(SLJIT_MOV, tmp_reg, 0, saved_tmp_reg, 0);
2575
109k
    }
2576
2577
131k
  next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
2578
131k
  }
2579
43.9k
}
2580
2581
#undef RECURSE_TMP_REG_COUNT
2582
2583
static BOOL recurse_check_bit(compiler_common *common, sljit_sw bit_index)
2584
15.2M
{
2585
15.2M
uint8_t *byte;
2586
15.2M
uint8_t mask;
2587
2588
15.2M
SLJIT_ASSERT((bit_index & (sizeof(sljit_sw) - 1)) == 0);
2589
2590
15.2M
bit_index >>= SLJIT_WORD_SHIFT;
2591
2592
15.2M
SLJIT_ASSERT((bit_index >> 3) < common->recurse_bitset_size);
2593
2594
15.2M
mask = 1 << (bit_index & 0x7);
2595
15.2M
byte = common->recurse_bitset + (bit_index >> 3);
2596
2597
15.2M
if (*byte & mask)
2598
5.63M
  return FALSE;
2599
2600
9.63M
*byte |= mask;
2601
9.63M
return TRUE;
2602
15.2M
}
2603
2604
enum get_recurse_flags {
2605
  recurse_flag_quit_found = (1 << 0),
2606
  recurse_flag_accept_found = (1 << 1),
2607
  recurse_flag_setsom_found = (1 << 2),
2608
  recurse_flag_setmark_found = (1 << 3),
2609
  recurse_flag_control_head_found = (1 << 4),
2610
  recurse_flag_recurse_arg = (1 << 5),
2611
};
2612
2613
static int get_recurse_data_length(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, uint32_t *result_flags)
2614
10.5k
{
2615
10.5k
int length = 1;
2616
10.5k
int size, offset;
2617
10.5k
PCRE2_SPTR alternative, cref;
2618
10.5k
uint32_t recurse_flags = 0;
2619
2620
10.5k
memset(common->recurse_bitset, 0, common->recurse_bitset_size);
2621
2622
10.5k
if (common->currententry->arg_size > 0)
2623
60
  {
2624
60
  cref = common->currententry->arg_start;
2625
2626
60
  do
2627
60
    {
2628
60
    offset = GET2(cref, 1);
2629
60
    recurse_check_bit(common, OVECTOR(offset << 1));
2630
60
    cref += 1 + IMM2_SIZE;
2631
60
    }
2632
60
  while (*cref == OP_CREF);
2633
60
  }
2634
2635
#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2636
SLJIT_ASSERT(common->control_head_ptr != 0);
2637
recurse_flags |= recurse_flag_control_head_found;
2638
#endif
2639
2640
/* Calculate the sum of the private machine words. */
2641
35.3M
while (cc < ccend)
2642
35.3M
  {
2643
35.3M
  size = 0;
2644
35.3M
  switch(*cc)
2645
35.3M
    {
2646
4.88k
    case OP_SET_SOM:
2647
4.88k
    SLJIT_ASSERT(common->has_set_som);
2648
4.88k
    recurse_flags |= recurse_flag_setsom_found;
2649
4.88k
    cc += 1;
2650
4.88k
    break;
2651
2652
396k
    case OP_RECURSE:
2653
396k
    if (common->has_set_som)
2654
5.68k
      recurse_flags |= recurse_flag_setsom_found;
2655
396k
    if (common->mark_ptr != 0)
2656
21.6k
      recurse_flags |= recurse_flag_setmark_found;
2657
396k
    if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2658
1.31k
      length++;
2659
396k
    cc += 1 + LINK_SIZE;
2660
396k
    if (*cc == OP_CREF)
2661
629
      recurse_flags |= recurse_flag_recurse_arg;
2662
396k
    break;
2663
2664
1.62M
    case OP_KET:
2665
1.62M
    offset = PRIVATE_DATA(cc);
2666
1.62M
    if (offset != 0)
2667
42.0k
      {
2668
42.0k
      if (recurse_check_bit(common, offset))
2669
42.0k
        length++;
2670
42.0k
      SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
2671
42.0k
      cc += PRIVATE_DATA(cc + 1);
2672
42.0k
      }
2673
1.62M
    cc += 1 + LINK_SIZE;
2674
1.62M
    break;
2675
2676
123k
    case OP_ASSERT:
2677
254k
    case OP_ASSERT_NOT:
2678
276k
    case OP_ASSERTBACK:
2679
388k
    case OP_ASSERTBACK_NOT:
2680
767k
    case OP_ASSERT_NA:
2681
792k
    case OP_ASSERTBACK_NA:
2682
814k
    case OP_ONCE:
2683
856k
    case OP_SCRIPT_RUN:
2684
872k
    case OP_BRAPOS:
2685
901k
    case OP_SBRA:
2686
950k
    case OP_SBRAPOS:
2687
951k
    case OP_SCOND:
2688
951k
    SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
2689
951k
    if (recurse_check_bit(common, PRIVATE_DATA(cc)))
2690
951k
      length++;
2691
951k
    cc += 1 + LINK_SIZE;
2692
951k
    break;
2693
2694
2.03k
    case OP_CREF:
2695
2.03k
    if ((recurse_flags & recurse_flag_recurse_arg) != 0)
2696
629
      {
2697
629
      offset = GET2(cc, 1);
2698
629
      if (recurse_check_bit(common, OVECTOR(offset << 1)))
2699
88
        {
2700
88
        SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1)));
2701
88
        length += 2;
2702
88
        }
2703
2704
629
      if (cc[1 + IMM2_SIZE] != OP_CREF)
2705
629
        recurse_flags &= ~(uint32_t)recurse_flag_recurse_arg;
2706
629
      }
2707
2.03k
    cc += 1 + IMM2_SIZE;
2708
2.03k
    break;
2709
2710
0
    case OP_ASSERT_SCS:
2711
0
    SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
2712
0
    if (recurse_check_bit(common, PRIVATE_DATA(cc)))
2713
0
      length += 2;
2714
0
    cc += 1 + LINK_SIZE;
2715
0
    break;
2716
2717
490k
    case OP_CBRA:
2718
558k
    case OP_SCBRA:
2719
558k
    offset = GET2(cc, 1 + LINK_SIZE);
2720
558k
    if (recurse_check_bit(common, OVECTOR(offset << 1)))
2721
19.8k
      {
2722
19.8k
      SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1)));
2723
19.8k
      length += 2;
2724
19.8k
      }
2725
558k
    if (!is_optimized_cbracket(common, offset) && recurse_check_bit(common, OVECTOR_PRIV(offset)))
2726
5.74k
      length++;
2727
558k
    if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2728
262
      length++;
2729
558k
    cc += 1 + LINK_SIZE + IMM2_SIZE;
2730
558k
    break;
2731
2732
7.03k
    case OP_CBRAPOS:
2733
36.3k
    case OP_SCBRAPOS:
2734
36.3k
    offset = GET2(cc, 1 + LINK_SIZE);
2735
36.3k
    if (recurse_check_bit(common, OVECTOR(offset << 1)))
2736
8.73k
      {
2737
8.73k
      SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1)));
2738
8.73k
      length += 2;
2739
8.73k
      }
2740
36.3k
    if (recurse_check_bit(common, OVECTOR_PRIV(offset)))
2741
8.74k
      length++;
2742
36.3k
    if (recurse_check_bit(common, PRIVATE_DATA(cc)))
2743
36.3k
      length++;
2744
36.3k
    if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2745
39
      length++;
2746
36.3k
    cc += 1 + LINK_SIZE + IMM2_SIZE;
2747
36.3k
    break;
2748
2749
8.18k
    case OP_COND:
2750
    /* Might be a hidden SCOND. */
2751
8.18k
    alternative = cc + GET(cc, 1);
2752
8.18k
    if ((*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) && recurse_check_bit(common, PRIVATE_DATA(cc)))
2753
0
      length++;
2754
8.18k
    cc += 1 + LINK_SIZE;
2755
8.18k
    break;
2756
2757
247k
    CASE_ITERATOR_PRIVATE_DATA_1
2758
247k
    offset = PRIVATE_DATA(cc);
2759
247k
    if (offset != 0 && recurse_check_bit(common, offset))
2760
226k
      length++;
2761
247k
    cc += 2;
2762
247k
#ifdef SUPPORT_UNICODE
2763
247k
    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2764
247k
#endif
2765
247k
    break;
2766
2767
151k
    CASE_ITERATOR_PRIVATE_DATA_2A
2768
151k
    offset = PRIVATE_DATA(cc);
2769
151k
    if (offset != 0 && recurse_check_bit(common, offset))
2770
136k
      {
2771
136k
      SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2772
136k
      length += 2;
2773
136k
      }
2774
151k
    cc += 2;
2775
151k
#ifdef SUPPORT_UNICODE
2776
151k
    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2777
151k
#endif
2778
151k
    break;
2779
2780
34.3k
    CASE_ITERATOR_PRIVATE_DATA_2B
2781
34.3k
    offset = PRIVATE_DATA(cc);
2782
34.3k
    if (offset != 0 && recurse_check_bit(common, offset))
2783
26.8k
      {
2784
26.8k
      SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2785
26.8k
      length += 2;
2786
26.8k
      }
2787
34.3k
    cc += 2 + IMM2_SIZE;
2788
34.3k
#ifdef SUPPORT_UNICODE
2789
34.3k
    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2790
34.3k
#endif
2791
34.3k
    break;
2792
2793
94.2k
    CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2794
94.2k
    offset = PRIVATE_DATA(cc);
2795
94.2k
    if (offset != 0 && recurse_check_bit(common, offset))
2796
79.7k
      length++;
2797
94.2k
    cc += 1;
2798
94.2k
    break;
2799
2800
161k
    CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2801
161k
    offset = PRIVATE_DATA(cc);
2802
161k
    if (offset != 0 && recurse_check_bit(common, offset))
2803
115k
      {
2804
115k
      SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2805
115k
      length += 2;
2806
115k
      }
2807
161k
    cc += 1;
2808
161k
    break;
2809
2810
67.4k
    CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2811
67.4k
    offset = PRIVATE_DATA(cc);
2812
67.4k
    if (offset != 0 && recurse_check_bit(common, offset))
2813
52.6k
      {
2814
52.6k
      SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2815
52.6k
      length += 2;
2816
52.6k
      }
2817
67.4k
    cc += 1 + IMM2_SIZE;
2818
67.4k
    break;
2819
2820
30.5k
    case OP_CLASS:
2821
65.7k
    case OP_NCLASS:
2822
65.7k
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
2823
312k
    case OP_XCLASS:
2824
324k
    case OP_ECLASS:
2825
324k
    size = (*cc >= OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2826
#else
2827
    size = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2828
#endif
2829
2830
324k
    offset = PRIVATE_DATA(cc);
2831
324k
    if (offset != 0 && recurse_check_bit(common, offset))
2832
140k
      length += get_class_iterator_size(cc + size);
2833
324k
    cc += size;
2834
324k
    break;
2835
2836
7.13k
    case OP_MARK:
2837
11.8k
    case OP_COMMIT_ARG:
2838
19.5k
    case OP_PRUNE_ARG:
2839
20.5k
    case OP_THEN_ARG:
2840
20.5k
    SLJIT_ASSERT(common->mark_ptr != 0);
2841
20.5k
    recurse_flags |= recurse_flag_setmark_found;
2842
20.5k
    if (common->control_head_ptr != 0)
2843
4.16k
      recurse_flags |= recurse_flag_control_head_found;
2844
20.5k
    if (*cc != OP_MARK)
2845
13.4k
      recurse_flags |= recurse_flag_quit_found;
2846
2847
20.5k
    cc += 1 + 2 + cc[1];
2848
20.5k
    break;
2849
2850
50.5k
    case OP_PRUNE:
2851
178k
    case OP_SKIP:
2852
196k
    case OP_COMMIT:
2853
196k
    recurse_flags |= recurse_flag_quit_found;
2854
196k
    cc++;
2855
196k
    break;
2856
2857
5.94k
    case OP_SKIP_ARG:
2858
5.94k
    recurse_flags |= recurse_flag_quit_found;
2859
5.94k
    cc += 1 + 2 + cc[1];
2860
5.94k
    break;
2861
2862
49.1k
    case OP_THEN:
2863
49.1k
    SLJIT_ASSERT(common->control_head_ptr != 0);
2864
49.1k
    recurse_flags |= recurse_flag_quit_found | recurse_flag_control_head_found;
2865
49.1k
    cc++;
2866
49.1k
    break;
2867
2868
6.64k
    case OP_ACCEPT:
2869
11.3k
    case OP_ASSERT_ACCEPT:
2870
11.3k
    recurse_flags |= recurse_flag_accept_found;
2871
11.3k
    cc++;
2872
11.3k
    break;
2873
2874
30.4M
    default:
2875
30.4M
    cc = next_opcode(common, cc);
2876
30.4M
    SLJIT_ASSERT(cc != NULL);
2877
30.4M
    break;
2878
35.3M
    }
2879
35.3M
  }
2880
10.5k
SLJIT_ASSERT(cc == ccend);
2881
2882
10.5k
if (recurse_flags & recurse_flag_control_head_found)
2883
1.04k
  length++;
2884
10.5k
if (recurse_flags & recurse_flag_quit_found)
2885
2.01k
  {
2886
2.01k
  if (recurse_flags & recurse_flag_setsom_found)
2887
268
    length++;
2888
2.01k
  if (recurse_flags & recurse_flag_setmark_found)
2889
453
    length++;
2890
2.01k
  }
2891
2892
10.5k
*result_flags = recurse_flags;
2893
10.5k
return length;
2894
10.5k
}
2895
2896
enum copy_recurse_data_types {
2897
  recurse_copy_from_global,
2898
  recurse_copy_private_to_global,
2899
  recurse_copy_shared_to_global,
2900
  recurse_copy_kept_shared_to_global,
2901
  recurse_swap_global
2902
};
2903
2904
static void copy_recurse_data(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
2905
  int type, int stackptr, int stacktop, uint32_t recurse_flags)
2906
43.9k
{
2907
43.9k
delayed_mem_copy_status status;
2908
43.9k
PCRE2_SPTR alternative, cref;
2909
43.9k
sljit_sw private_srcw[2];
2910
43.9k
sljit_sw shared_srcw[3];
2911
43.9k
sljit_sw kept_shared_srcw[2];
2912
43.9k
int private_count, shared_count, kept_shared_count;
2913
43.9k
int from_sp, base_reg, offset, i;
2914
2915
43.9k
memset(common->recurse_bitset, 0, common->recurse_bitset_size);
2916
2917
43.9k
if (common->currententry->arg_size > 0)
2918
256
  {
2919
256
  cref = common->currententry->arg_start;
2920
2921
256
  do
2922
256
    {
2923
256
    offset = GET2(cref, 1);
2924
256
    recurse_check_bit(common, OVECTOR(offset << 1));
2925
256
    cref += 1 + IMM2_SIZE;
2926
256
    }
2927
256
  while (*cref == OP_CREF);
2928
256
  }
2929
2930
#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2931
SLJIT_ASSERT(common->control_head_ptr != 0);
2932
recurse_check_bit(common, common->control_head_ptr);
2933
#endif
2934
2935
43.9k
switch (type)
2936
43.9k
  {
2937
10.5k
  case recurse_copy_from_global:
2938
10.5k
  from_sp = TRUE;
2939
10.5k
  base_reg = STACK_TOP;
2940
10.5k
  break;
2941
2942
10.5k
  case recurse_copy_private_to_global:
2943
11.8k
  case recurse_copy_shared_to_global:
2944
12.4k
  case recurse_copy_kept_shared_to_global:
2945
12.4k
  from_sp = FALSE;
2946
12.4k
  base_reg = STACK_TOP;
2947
12.4k
  break;
2948
2949
21.0k
  default:
2950
21.0k
  SLJIT_ASSERT(type == recurse_swap_global);
2951
21.0k
  from_sp = FALSE;
2952
21.0k
  base_reg = TMP2;
2953
21.0k
  break;
2954
43.9k
  }
2955
2956
43.9k
stackptr = STACK(stackptr);
2957
43.9k
stacktop = STACK(stacktop);
2958
2959
43.9k
status.tmp_regs[0] = TMP1;
2960
43.9k
status.saved_tmp_regs[0] = TMP1;
2961
2962
43.9k
if (base_reg != TMP2)
2963
22.9k
  {
2964
22.9k
  status.tmp_regs[1] = TMP2;
2965
22.9k
  status.saved_tmp_regs[1] = TMP2;
2966
22.9k
  }
2967
21.0k
else
2968
21.0k
  {
2969
21.0k
  status.saved_tmp_regs[1] = RETURN_ADDR;
2970
21.0k
  if (HAS_VIRTUAL_REGISTERS)
2971
0
    status.tmp_regs[1] = STR_PTR;
2972
21.0k
  else
2973
21.0k
    status.tmp_regs[1] = RETURN_ADDR;
2974
21.0k
  }
2975
2976
43.9k
status.saved_tmp_regs[2] = TMP3;
2977
43.9k
if (HAS_VIRTUAL_REGISTERS)
2978
0
  status.tmp_regs[2] = STR_END;
2979
43.9k
else
2980
43.9k
  status.tmp_regs[2] = TMP3;
2981
2982
43.9k
delayed_mem_copy_init(&status, common);
2983
2984
43.9k
if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
2985
42.0k
  {
2986
42.0k
  SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
2987
2988
42.0k
  if (!from_sp)
2989
31.5k
    delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->recursive_head_ptr);
2990
2991
42.0k
  if (from_sp || type == recurse_swap_global)
2992
31.5k
    delayed_mem_copy_move(&status, SLJIT_SP, common->recursive_head_ptr, base_reg, stackptr);
2993
42.0k
  }
2994
2995
43.9k
stackptr += sizeof(sljit_sw);
2996
2997
#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2998
if (type != recurse_copy_shared_to_global)
2999
  {
3000
  if (!from_sp)
3001
    delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->control_head_ptr);
3002
3003
  if (from_sp || type == recurse_swap_global)
3004
    delayed_mem_copy_move(&status, SLJIT_SP, common->control_head_ptr, base_reg, stackptr);
3005
  }
3006
3007
stackptr += sizeof(sljit_sw);
3008
#endif
3009
3010
154M
while (cc < ccend)
3011
154M
  {
3012
154M
  private_count = 0;
3013
154M
  shared_count = 0;
3014
154M
  kept_shared_count = 0;
3015
3016
154M
  switch(*cc)
3017
154M
    {
3018
23.1k
    case OP_SET_SOM:
3019
23.1k
    SLJIT_ASSERT(common->has_set_som);
3020
23.1k
    if ((recurse_flags & recurse_flag_quit_found) && recurse_check_bit(common, OVECTOR(0)))
3021
677
      {
3022
677
      kept_shared_srcw[0] = OVECTOR(0);
3023
677
      kept_shared_count = 1;
3024
677
      }
3025
23.1k
    cc += 1;
3026
23.1k
    break;
3027
3028
1.67M
    case OP_RECURSE:
3029
1.67M
    if (recurse_flags & recurse_flag_quit_found)
3030
505k
      {
3031
505k
      if (common->has_set_som && recurse_check_bit(common, OVECTOR(0)))
3032
724
        {
3033
724
        kept_shared_srcw[0] = OVECTOR(0);
3034
724
        kept_shared_count = 1;
3035
724
        }
3036
505k
      if (common->mark_ptr != 0 && recurse_check_bit(common, common->mark_ptr))
3037
1.52k
        {
3038
1.52k
        kept_shared_srcw[kept_shared_count] = common->mark_ptr;
3039
1.52k
        kept_shared_count++;
3040
1.52k
        }
3041
505k
      }
3042
3043
1.67M
    if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
3044
5.41k
      {
3045
5.41k
      shared_srcw[0] = common->capture_last_ptr;
3046
5.41k
      shared_count = 1;
3047
5.41k
      }
3048
3049
1.67M
    cc += 1 + LINK_SIZE;
3050
1.67M
    if (*cc == OP_CREF)
3051
3.12k
      recurse_flags |= recurse_flag_recurse_arg;
3052
1.67M
    break;
3053
3054
6.86M
    case OP_KET:
3055
6.86M
    private_srcw[0] = PRIVATE_DATA(cc);
3056
6.86M
    if (private_srcw[0] != 0)
3057
171k
      {
3058
171k
      if (recurse_check_bit(common, private_srcw[0]))
3059
171k
        private_count = 1;
3060
171k
      SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
3061
171k
      cc += PRIVATE_DATA(cc + 1);
3062
171k
      }
3063
6.86M
    cc += 1 + LINK_SIZE;
3064
6.86M
    break;
3065
3066
533k
    case OP_ASSERT:
3067
1.07M
    case OP_ASSERT_NOT:
3068
1.16M
    case OP_ASSERTBACK:
3069
1.62M
    case OP_ASSERTBACK_NOT:
3070
3.22M
    case OP_ASSERT_NA:
3071
3.33M
    case OP_ASSERTBACK_NA:
3072
3.43M
    case OP_ONCE:
3073
3.61M
    case OP_SCRIPT_RUN:
3074
3.69M
    case OP_BRAPOS:
3075
3.80M
    case OP_SBRA:
3076
4.00M
    case OP_SBRAPOS:
3077
4.01M
    case OP_SCOND:
3078
4.01M
    private_srcw[0] = PRIVATE_DATA(cc);
3079
4.01M
    if (recurse_check_bit(common, private_srcw[0]))
3080
4.01M
      private_count = 1;
3081
4.01M
    cc += 1 + LINK_SIZE;
3082
4.01M
    break;
3083
3084
8.75k
    case OP_CREF:
3085
8.75k
    if ((recurse_flags & recurse_flag_recurse_arg) != 0)
3086
3.12k
      {
3087
3.12k
      offset = GET2(cc, 1);
3088
3.12k
      shared_srcw[0] = OVECTOR(offset << 1);
3089
3.12k
      if (recurse_check_bit(common, shared_srcw[0]))
3090
393
        {
3091
393
        shared_srcw[1] = shared_srcw[0] + sizeof(sljit_sw);
3092
393
        SLJIT_ASSERT(recurse_check_bit(common, shared_srcw[1]));
3093
393
        shared_count = 2;
3094
393
        }
3095
3096
3.12k
      if (cc[1 + IMM2_SIZE] != OP_CREF)
3097
3.12k
        recurse_flags &= ~(uint32_t)recurse_flag_recurse_arg;
3098
3.12k
      }
3099
8.75k
    cc += 1 + IMM2_SIZE;
3100
8.75k
    break;
3101
3102
0
    case OP_ASSERT_SCS:
3103
0
    private_srcw[0] = PRIVATE_DATA(cc);
3104
0
    private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
3105
0
    if (recurse_check_bit(common, private_srcw[0]))
3106
0
      private_count = 2;
3107
0
    cc += 1 + LINK_SIZE;
3108
0
    break;
3109
3110
2.08M
    case OP_CBRA:
3111
2.37M
    case OP_SCBRA:
3112
2.37M
    offset = GET2(cc, 1 + LINK_SIZE);
3113
2.37M
    shared_srcw[0] = OVECTOR(offset << 1);
3114
2.37M
    if (recurse_check_bit(common, shared_srcw[0]))
3115
89.0k
      {
3116
89.0k
      shared_srcw[1] = shared_srcw[0] + sizeof(sljit_sw);
3117
89.0k
      SLJIT_ASSERT(recurse_check_bit(common, shared_srcw[1]));
3118
89.0k
      shared_count = 2;
3119
89.0k
      }
3120
3121
2.37M
    if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
3122
1.21k
      {
3123
1.21k
      shared_srcw[shared_count] = common->capture_last_ptr;
3124
1.21k
      shared_count++;
3125
1.21k
      }
3126
3127
2.37M
    if (!is_optimized_cbracket(common, offset))
3128
881k
      {
3129
881k
      private_srcw[0] = OVECTOR_PRIV(offset);
3130
881k
      if (recurse_check_bit(common, private_srcw[0]))
3131
25.8k
        private_count = 1;
3132
881k
      }
3133
3134
2.37M
    cc += 1 + LINK_SIZE + IMM2_SIZE;
3135
2.37M
    break;
3136
3137
30.5k
    case OP_CBRAPOS:
3138
158k
    case OP_SCBRAPOS:
3139
158k
    offset = GET2(cc, 1 + LINK_SIZE);
3140
158k
    shared_srcw[0] = OVECTOR(offset << 1);
3141
158k
    if (recurse_check_bit(common, shared_srcw[0]))
3142
38.4k
      {
3143
38.4k
      shared_srcw[1] = shared_srcw[0] + sizeof(sljit_sw);
3144
38.4k
      SLJIT_ASSERT(recurse_check_bit(common, shared_srcw[1]));
3145
38.4k
      shared_count = 2;
3146
38.4k
      }
3147
3148
158k
    if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
3149
161
      {
3150
161
      shared_srcw[shared_count] = common->capture_last_ptr;
3151
161
      shared_count++;
3152
161
      }
3153
3154
158k
    private_srcw[0] = PRIVATE_DATA(cc);
3155
158k
    if (recurse_check_bit(common, private_srcw[0]))
3156
158k
      private_count = 1;
3157
3158
158k
    offset = OVECTOR_PRIV(offset);
3159
158k
    if (recurse_check_bit(common, offset))
3160
38.5k
      {
3161
38.5k
      private_srcw[private_count] = offset;
3162
38.5k
      private_count++;
3163
38.5k
      }
3164
158k
    cc += 1 + LINK_SIZE + IMM2_SIZE;
3165
158k
    break;
3166
3167
34.6k
    case OP_COND:
3168
    /* Might be a hidden SCOND. */
3169
34.6k
    alternative = cc + GET(cc, 1);
3170
34.6k
    if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
3171
0
      {
3172
0
      private_srcw[0] = PRIVATE_DATA(cc);
3173
0
      if (recurse_check_bit(common, private_srcw[0]))
3174
0
        private_count = 1;
3175
0
      }
3176
34.6k
    cc += 1 + LINK_SIZE;
3177
34.6k
    break;
3178
3179
1.09M
    CASE_ITERATOR_PRIVATE_DATA_1
3180
1.09M
    private_srcw[0] = PRIVATE_DATA(cc);
3181
1.09M
    if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
3182
996k
      private_count = 1;
3183
1.09M
    cc += 2;
3184
1.09M
#ifdef SUPPORT_UNICODE
3185
1.09M
    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3186
1.09M
#endif
3187
1.09M
    break;
3188
3189
659k
    CASE_ITERATOR_PRIVATE_DATA_2A
3190
659k
    private_srcw[0] = PRIVATE_DATA(cc);
3191
659k
    if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
3192
595k
      {
3193
595k
      private_count = 2;
3194
595k
      private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
3195
595k
      SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
3196
595k
      }
3197
659k
    cc += 2;
3198
659k
#ifdef SUPPORT_UNICODE
3199
659k
    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3200
659k
#endif
3201
659k
    break;
3202
3203
150k
    CASE_ITERATOR_PRIVATE_DATA_2B
3204
150k
    private_srcw[0] = PRIVATE_DATA(cc);
3205
150k
    if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
3206
118k
      {
3207
118k
      private_count = 2;
3208
118k
      private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
3209
118k
      SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
3210
118k
      }
3211
150k
    cc += 2 + IMM2_SIZE;
3212
150k
#ifdef SUPPORT_UNICODE
3213
150k
    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3214
150k
#endif
3215
150k
    break;
3216
3217
422k
    CASE_ITERATOR_TYPE_PRIVATE_DATA_1
3218
422k
    private_srcw[0] = PRIVATE_DATA(cc);
3219
422k
    if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
3220
360k
      private_count = 1;
3221
422k
    cc += 1;
3222
422k
    break;
3223
3224
698k
    CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
3225
698k
    private_srcw[0] = PRIVATE_DATA(cc);
3226
698k
    if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
3227
500k
      {
3228
500k
      private_count = 2;
3229
500k
      private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
3230
500k
      SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
3231
500k
      }
3232
698k
    cc += 1;
3233
698k
    break;
3234
3235
300k
    CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
3236
300k
    private_srcw[0] = PRIVATE_DATA(cc);
3237
300k
    if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
3238
240k
      {
3239
240k
      private_count = 2;
3240
240k
      private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
3241
240k
      SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
3242
240k
      }
3243
300k
    cc += 1 + IMM2_SIZE;
3244
300k
    break;
3245
3246
130k
    case OP_CLASS:
3247
284k
    case OP_NCLASS:
3248
284k
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
3249
1.33M
    case OP_XCLASS:
3250
1.38M
    case OP_ECLASS:
3251
1.38M
    i = (*cc >= OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
3252
#else
3253
    i = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
3254
#endif
3255
1.38M
    if (PRIVATE_DATA(cc) != 0)
3256
610k
      {
3257
610k
      private_count = 1;
3258
610k
      private_srcw[0] = PRIVATE_DATA(cc);
3259
610k
      switch(get_class_iterator_size(cc + i))
3260
610k
        {
3261
193k
        case 1:
3262
193k
        break;
3263
3264
416k
        case 2:
3265
416k
        if (recurse_check_bit(common, private_srcw[0]))
3266
416k
          {
3267
416k
          private_count = 2;
3268
416k
          private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
3269
416k
          SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
3270
416k
          }
3271
416k
        break;
3272
3273
0
        default:
3274
0
        SLJIT_UNREACHABLE();
3275
0
        break;
3276
610k
        }
3277
610k
      }
3278
1.38M
    cc += i;
3279
1.38M
    break;
3280
3281
31.6k
    case OP_MARK:
3282
55.2k
    case OP_COMMIT_ARG:
3283
86.5k
    case OP_PRUNE_ARG:
3284
90.6k
    case OP_THEN_ARG:
3285
90.6k
    SLJIT_ASSERT(common->mark_ptr != 0);
3286
90.6k
    if ((recurse_flags & recurse_flag_quit_found) && recurse_check_bit(common, common->mark_ptr))
3287
761
      {
3288
761
      kept_shared_srcw[0] = common->mark_ptr;
3289
761
      kept_shared_count = 1;
3290
761
      }
3291
90.6k
    if (common->control_head_ptr != 0 && recurse_check_bit(common, common->control_head_ptr))
3292
985
      {
3293
985
      private_srcw[0] = common->control_head_ptr;
3294
985
      private_count = 1;
3295
985
      }
3296
90.6k
    cc += 1 + 2 + cc[1];
3297
90.6k
    break;
3298
3299
211k
    case OP_THEN:
3300
211k
    SLJIT_ASSERT(common->control_head_ptr != 0);
3301
211k
    if (recurse_check_bit(common, common->control_head_ptr))
3302
4.00k
      {
3303
4.00k
      private_srcw[0] = common->control_head_ptr;
3304
4.00k
      private_count = 1;
3305
4.00k
      }
3306
211k
    cc++;
3307
211k
    break;
3308
3309
134M
    default:
3310
134M
    cc = next_opcode(common, cc);
3311
134M
    SLJIT_ASSERT(cc != NULL);
3312
134M
    continue;
3313
154M
    }
3314
3315
20.1M
  if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
3316
18.9M
    {
3317
18.9M
    SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
3318
3319
27.9M
    for (i = 0; i < private_count; i++)
3320
8.99M
      {
3321
8.99M
      SLJIT_ASSERT(private_srcw[i] != 0);
3322
3323
8.99M
      if (!from_sp)
3324
6.74M
        delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, (sljit_s32)private_srcw[i]);
3325
3326
8.99M
      if (from_sp || type == recurse_swap_global)
3327
6.74M
        delayed_mem_copy_move(&status, SLJIT_SP, private_srcw[i], base_reg, stackptr);
3328
3329
8.99M
      stackptr += sizeof(sljit_sw);
3330
8.99M
      }
3331
18.9M
    }
3332
1.24M
  else
3333
1.24M
    stackptr += sizeof(sljit_sw) * private_count;
3334
3335
20.1M
  if (type != recurse_copy_private_to_global && type != recurse_copy_kept_shared_to_global)
3336
15.1M
    {
3337
15.1M
    SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_swap_global);
3338
3339
15.3M
    for (i = 0; i < shared_count; i++)
3340
193k
      {
3341
193k
      SLJIT_ASSERT(shared_srcw[i] != 0);
3342
3343
193k
      if (!from_sp)
3344
134k
        delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, (sljit_s32)shared_srcw[i]);
3345
3346
193k
      if (from_sp || type == recurse_swap_global)
3347
176k
        delayed_mem_copy_move(&status, SLJIT_SP, shared_srcw[i], base_reg, stackptr);
3348
3349
193k
      stackptr += sizeof(sljit_sw);
3350
193k
      }
3351
15.1M
    }
3352
4.98M
  else
3353
4.98M
    stackptr += sizeof(sljit_sw) * shared_count;
3354
3355
20.1M
  if (type != recurse_copy_private_to_global && type != recurse_swap_global)
3356
5.97M
    {
3357
5.97M
    SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_copy_kept_shared_to_global);
3358
3359
5.97M
    for (i = 0; i < kept_shared_count; i++)
3360
1.52k
      {
3361
1.52k
      SLJIT_ASSERT(kept_shared_srcw[i] != 0);
3362
3363
1.52k
      if (!from_sp)
3364
799
        delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, (sljit_s32)kept_shared_srcw[i]);
3365
3366
1.52k
      if (from_sp || type == recurse_swap_global)
3367
721
        delayed_mem_copy_move(&status, SLJIT_SP, kept_shared_srcw[i], base_reg, stackptr);
3368
3369
1.52k
      stackptr += sizeof(sljit_sw);
3370
1.52k
      }
3371
5.97M
    }
3372
14.1M
  else
3373
14.1M
    stackptr += sizeof(sljit_sw) * kept_shared_count;
3374
20.1M
  }
3375
3376
43.9k
SLJIT_ASSERT(cc == ccend && stackptr == stacktop);
3377
3378
43.9k
delayed_mem_copy_finish(&status);
3379
43.9k
}
3380
3381
static SLJIT_INLINE PCRE2_SPTR set_then_offsets(compiler_common *common, PCRE2_SPTR cc, sljit_u8 *current_offset)
3382
539k
{
3383
539k
PCRE2_SPTR end = bracketend(cc);
3384
539k
BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
3385
3386
/* Assert captures *THEN verb even if it has no alternatives. */
3387
539k
if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
3388
159k
  current_offset = NULL;
3389
380k
else if (*cc >= OP_ASSERT_NA && *cc <= OP_ASSERT_SCS)
3390
92.9k
  has_alternatives = TRUE;
3391
/* Conditional block does never capture. */
3392
287k
else if (*cc == OP_COND || *cc == OP_SCOND)
3393
23.5k
  has_alternatives = FALSE;
3394
3395
539k
cc = next_opcode(common, cc);
3396
3397
539k
if (has_alternatives)
3398
201k
  {
3399
201k
  switch (*cc)
3400
201k
    {
3401
16.7k
    case OP_REVERSE:
3402
16.7k
    case OP_CREF:
3403
16.7k
      cc += 1 + IMM2_SIZE;
3404
16.7k
      break;
3405
34.0k
    case OP_VREVERSE:
3406
34.0k
    case OP_DNCREF:
3407
34.0k
      cc += 1 + 2 * IMM2_SIZE;
3408
34.0k
      break;
3409
201k
    }
3410
3411
201k
  current_offset = common->then_offsets + (cc - common->start);
3412
201k
  }
3413
3414
11.1M
while (cc < end)
3415
10.6M
  {
3416
10.6M
  if (*cc >= OP_ASSERT && *cc <= OP_SCOND)
3417
537k
    {
3418
537k
    cc = set_then_offsets(common, cc, current_offset);
3419
537k
    continue;
3420
537k
    }
3421
3422
10.0M
  if (*cc == OP_ALT && has_alternatives)
3423
151k
    {
3424
151k
    cc += 1 + LINK_SIZE;
3425
3426
151k
    if (*cc == OP_REVERSE)
3427
50.4k
      cc += 1 + IMM2_SIZE;
3428
101k
    else if (*cc == OP_VREVERSE)
3429
8.02k
      cc += 1 + 2 * IMM2_SIZE;
3430
3431
151k
    current_offset = common->then_offsets + (cc - common->start);
3432
151k
    continue;
3433
151k
    }
3434
3435
9.91M
  if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
3436
162k
    *current_offset = 1;
3437
9.91M
  cc = next_opcode(common, cc);
3438
9.91M
  }
3439
3440
539k
cc = end - 1 - LINK_SIZE;
3441
3442
/* Ignore repeats. */
3443
539k
if (*cc == OP_KET && PRIVATE_DATA(cc) != 0)
3444
2.55k
  end += PRIVATE_DATA(cc + 1);
3445
3446
539k
return end;
3447
539k
}
3448
3449
#undef CASE_ITERATOR_PRIVATE_DATA_1
3450
#undef CASE_ITERATOR_PRIVATE_DATA_2A
3451
#undef CASE_ITERATOR_PRIVATE_DATA_2B
3452
#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
3453
#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
3454
#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
3455
3456
static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
3457
5.64M
{
3458
5.64M
return (value & (value - 1)) == 0;
3459
5.64M
}
3460
3461
static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
3462
52.1M
{
3463
282M
while (list != NULL)
3464
230M
  {
3465
  /* sljit_set_label is clever enough to do nothing
3466
  if either the jump or the label is NULL. */
3467
230M
  SET_LABEL(list->jump, label);
3468
230M
  list = list->next;
3469
230M
  }
3470
52.1M
}
3471
3472
static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
3473
230M
{
3474
230M
jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
3475
230M
if (list_item)
3476
230M
  {
3477
230M
  list_item->next = *list;
3478
230M
  list_item->jump = jump;
3479
230M
  *list = list_item;
3480
230M
  }
3481
230M
}
3482
3483
static void add_stub(compiler_common *common, struct sljit_jump *start)
3484
25.4M
{
3485
25.4M
DEFINE_COMPILER;
3486
25.4M
stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
3487
3488
25.4M
if (list_item)
3489
25.4M
  {
3490
25.4M
  list_item->start = start;
3491
25.4M
  list_item->quit = LABEL();
3492
25.4M
  list_item->next = common->stubs;
3493
25.4M
  common->stubs = list_item;
3494
25.4M
  }
3495
25.4M
}
3496
3497
static void flush_stubs(compiler_common *common)
3498
434k
{
3499
434k
DEFINE_COMPILER;
3500
434k
stub_list *list_item = common->stubs;
3501
3502
25.8M
while (list_item)
3503
25.4M
  {
3504
25.4M
  JUMPHERE(list_item->start);
3505
25.4M
  add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
3506
25.4M
  JUMPTO(SLJIT_JUMP, list_item->quit);
3507
25.4M
  list_item = list_item->next;
3508
25.4M
  }
3509
434k
common->stubs = NULL;
3510
434k
}
3511
3512
static SLJIT_INLINE void count_match(compiler_common *common)
3513
10.0M
{
3514
10.0M
DEFINE_COMPILER;
3515
3516
10.0M
OP2(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
3517
10.0M
add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
3518
10.0M
}
3519
3520
static SLJIT_INLINE void allocate_stack(compiler_common *common, sljit_s32 size)
3521
25.4M
{
3522
/* May destroy all locals and registers except TMP2. */
3523
25.4M
DEFINE_COMPILER;
3524
3525
25.4M
SLJIT_ASSERT(size > 0);
3526
25.4M
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * SSIZE_OF(sw));
3527
#ifdef DESTROY_REGISTERS
3528
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
3529
OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3530
OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
3531
#if defined SLJIT_DEBUG && SLJIT_DEBUG
3532
SLJIT_ASSERT(common->locals_size >= 2 * SSIZE_OF(sw));
3533
/* These two are also used by the stackalloc calls. */
3534
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL0, TMP1, 0);
3535
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL1, TMP1, 0);
3536
#endif
3537
#endif
3538
25.4M
add_stub(common, CMP(SLJIT_LESS, STACK_TOP, 0, STACK_LIMIT, 0));
3539
25.4M
}
3540
3541
static SLJIT_INLINE void free_stack(compiler_common *common, sljit_s32 size)
3542
23.1M
{
3543
23.1M
DEFINE_COMPILER;
3544
3545
23.1M
SLJIT_ASSERT(size > 0);
3546
23.1M
OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * SSIZE_OF(sw));
3547
23.1M
}
3548
3549
static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
3550
584
{
3551
584
DEFINE_COMPILER;
3552
584
sljit_uw *result;
3553
3554
584
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
3555
0
  return NULL;
3556
3557
584
result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
3558
584
if (SLJIT_UNLIKELY(result == NULL))
3559
0
  {
3560
0
  sljit_set_compiler_memory_error(compiler);
3561
0
  return NULL;
3562
0
  }
3563
3564
584
*(void**)result = common->read_only_data_head;
3565
584
common->read_only_data_head = (void *)result;
3566
584
return result + 1;
3567
584
}
3568
3569
static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
3570
62.3k
{
3571
62.3k
DEFINE_COMPILER;
3572
62.3k
struct sljit_label *loop;
3573
62.3k
sljit_s32 i;
3574
3575
/* At this point we can freely use all temporary registers. */
3576
62.3k
SLJIT_ASSERT(length > 1);
3577
/* TMP1 returns with begin - 1. */
3578
62.3k
OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
3579
62.3k
if (length < 8)
3580
59.8k
  {
3581
143k
  for (i = 1; i < length; i++)
3582
83.5k
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
3583
59.8k
  }
3584
2.49k
else
3585
2.49k
  {
3586
2.49k
  if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)) == SLJIT_SUCCESS)
3587
0
    {
3588
0
    GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
3589
0
    OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
3590
0
    loop = LABEL();
3591
0
    sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw));
3592
0
    OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
3593
0
    JUMPTO(SLJIT_NOT_ZERO, loop);
3594
0
    }
3595
2.49k
  else
3596
2.49k
    {
3597
2.49k
    GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START + sizeof(sljit_sw));
3598
2.49k
    OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
3599
2.49k
    loop = LABEL();
3600
2.49k
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R0, 0);
3601
2.49k
    OP2(SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, sizeof(sljit_sw));
3602
2.49k
    OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
3603
2.49k
    JUMPTO(SLJIT_NOT_ZERO, loop);
3604
2.49k
    }
3605
2.49k
  }
3606
62.3k
}
3607
3608
static SLJIT_INLINE void reset_early_fail(compiler_common *common)
3609
12.6k
{
3610
12.6k
DEFINE_COMPILER;
3611
12.6k
sljit_u32 size = (sljit_u32)(common->early_fail_end_ptr - common->early_fail_start_ptr);
3612
12.6k
sljit_u32 uncleared_size;
3613
12.6k
sljit_s32 src = SLJIT_IMM;
3614
12.6k
sljit_s32 i;
3615
12.6k
struct sljit_label *loop;
3616
3617
12.6k
SLJIT_ASSERT(common->early_fail_start_ptr < common->early_fail_end_ptr);
3618
3619
12.6k
if (size == sizeof(sljit_sw))
3620
3.45k
  {
3621
3.45k
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->early_fail_start_ptr, SLJIT_IMM, 0);
3622
3.45k
  return;
3623
3.45k
  }
3624
3625
9.23k
if (sljit_get_register_index(SLJIT_GP_REGISTER, TMP3) >= 0 && !sljit_has_cpu_feature(SLJIT_HAS_ZERO_REGISTER))
3626
9.23k
  {
3627
9.23k
  OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
3628
9.23k
  src = TMP3;
3629
9.23k
  }
3630
3631
9.23k
if (size <= 6 * sizeof(sljit_sw))
3632
7.89k
  {
3633
34.3k
  for (i = common->early_fail_start_ptr; i < common->early_fail_end_ptr; i += sizeof(sljit_sw))
3634
26.4k
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, src, 0);
3635
7.89k
  return;
3636
7.89k
  }
3637
3638
1.33k
GET_LOCAL_BASE(TMP1, 0, common->early_fail_start_ptr);
3639
3640
1.33k
uncleared_size = ((size / sizeof(sljit_sw)) % 3) * sizeof(sljit_sw);
3641
3642
1.33k
OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, size - uncleared_size);
3643
3644
1.33k
loop = LABEL();
3645
1.33k
OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);
3646
1.33k
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3647
1.33k
OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -2 * SSIZE_OF(sw), src, 0);
3648
1.33k
OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -1 * SSIZE_OF(sw), src, 0);
3649
1.33k
CMPTO(SLJIT_LESS, TMP1, 0, TMP2, 0, loop);
3650
3651
1.33k
if (uncleared_size >= sizeof(sljit_sw))
3652
964
  OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);
3653
3654
1.33k
if (uncleared_size >= 2 * sizeof(sljit_sw))
3655
473
  OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), sizeof(sljit_sw), src, 0);
3656
1.33k
}
3657
3658
static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
3659
2.75k
{
3660
2.75k
DEFINE_COMPILER;
3661
2.75k
struct sljit_label *loop;
3662
2.75k
int i;
3663
3664
2.75k
SLJIT_ASSERT(length > 1);
3665
/* OVECTOR(1) contains the "string begin - 1" constant. */
3666
2.75k
if (length > 2)
3667
1.05k
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
3668
2.75k
if (length < 8)
3669
2.39k
  {
3670
4.20k
  for (i = 2; i < length; i++)
3671
1.80k
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
3672
2.39k
  }
3673
362
else
3674
362
  {
3675
362
  if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)) == SLJIT_SUCCESS)
3676
0
    {
3677
0
    GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
3678
0
    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
3679
0
    loop = LABEL();
3680
0
    sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
3681
0
    OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
3682
0
    JUMPTO(SLJIT_NOT_ZERO, loop);
3683
0
    }
3684
362
  else
3685
362
    {
3686
362
    GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + 2 * sizeof(sljit_sw));
3687
362
    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
3688
362
    loop = LABEL();
3689
362
    OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
3690
362
    OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(sljit_sw));
3691
362
    OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
3692
362
    JUMPTO(SLJIT_NOT_ZERO, loop);
3693
362
    }
3694
362
  }
3695
3696
2.75k
if (!HAS_VIRTUAL_REGISTERS)
3697
2.75k
  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, stack));
3698
0
else
3699
0
  OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
3700
3701
2.75k
if (common->mark_ptr != 0)
3702
410
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
3703
2.75k
if (common->control_head_ptr != 0)
3704
1.17k
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
3705
2.75k
if (HAS_VIRTUAL_REGISTERS)
3706
0
  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
3707
3708
2.75k
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3709
2.75k
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, end));
3710
2.75k
}
3711
3712
static sljit_sw SLJIT_FUNC do_search_mark(sljit_sw *current, PCRE2_SPTR skip_arg)
3713
0
{
3714
0
while (current != NULL)
3715
0
  {
3716
0
  switch (current[1])
3717
0
    {
3718
0
    case type_then_trap:
3719
0
    break;
3720
3721
0
    case type_mark:
3722
0
    if (PRIV(strcmp)(skip_arg, (PCRE2_SPTR)current[2]) == 0)
3723
0
      return current[3];
3724
0
    break;
3725
3726
0
    default:
3727
0
    SLJIT_UNREACHABLE();
3728
0
    break;
3729
0
    }
3730
0
  SLJIT_ASSERT(current[0] == 0 || current < (sljit_sw*)current[0]);
3731
0
  current = (sljit_sw*)current[0];
3732
0
  }
3733
0
return 0;
3734
0
}
3735
3736
static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
3737
62.3k
{
3738
62.3k
DEFINE_COMPILER;
3739
62.3k
struct sljit_label *loop;
3740
62.3k
BOOL has_pre;
3741
3742
/* At this point we can freely use all registers. */
3743
62.3k
OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
3744
62.3k
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
3745
3746
62.3k
if (HAS_VIRTUAL_REGISTERS)
3747
0
  {
3748
0
  OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
3749
0
  OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3750
0
  if (common->mark_ptr != 0)
3751
0
    OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
3752
0
  OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, oveccount));
3753
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
3754
0
  if (common->mark_ptr != 0)
3755
0
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
3756
0
  OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, match_data),
3757
0
    SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
3758
0
  }
3759
62.3k
else
3760
62.3k
  {
3761
62.3k
  OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3762
62.3k
  OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, match_data));
3763
62.3k
  if (common->mark_ptr != 0)
3764
897
    OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
3765
62.3k
  OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, oveccount));
3766
62.3k
  OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
3767
62.3k
  if (common->mark_ptr != 0)
3768
897
    OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R0, 0);
3769
62.3k
  OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
3770
62.3k
  }
3771
3772
62.3k
has_pre = sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)) == SLJIT_SUCCESS;
3773
3774
62.3k
GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START - (has_pre ? sizeof(sljit_sw) : 0));
3775
62.3k
OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? SLJIT_R0 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
3776
3777
62.3k
loop = LABEL();
3778
3779
62.3k
if (has_pre)
3780
0
  sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw));
3781
62.3k
else
3782
62.3k
  {
3783
62.3k
  OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0);
3784
62.3k
  OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
3785
62.3k
  }
3786
3787
62.3k
OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, sizeof(PCRE2_SIZE));
3788
62.3k
OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_R0, 0);
3789
/* Copy the integer value to the output buffer */
3790
62.3k
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3791
62.3k
OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
3792
62.3k
#endif
3793
3794
62.3k
SLJIT_ASSERT(sizeof(PCRE2_SIZE) == 4 || sizeof(PCRE2_SIZE) == 8);
3795
62.3k
OP1(((sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV), SLJIT_MEM1(SLJIT_R2), 0, SLJIT_S1, 0);
3796
3797
62.3k
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3798
62.3k
JUMPTO(SLJIT_NOT_ZERO, loop);
3799
3800
/* Calculate the return value, which is the maximum ovector value. */
3801
62.3k
if (topbracket > 1)
3802
12.3k
  {
3803
12.3k
  if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * SSIZE_OF(sw))) == SLJIT_SUCCESS)
3804
0
    {
3805
0
    GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
3806
0
    OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
3807
3808
    /* OVECTOR(0) is never equal to SLJIT_S2. */
3809
0
    loop = LABEL();
3810
0
    sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * SSIZE_OF(sw)));
3811
0
    OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3812
0
    CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
3813
0
    OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
3814
0
    }
3815
12.3k
  else
3816
12.3k
    {
3817
12.3k
    GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + (topbracket - 1) * 2 * sizeof(sljit_sw));
3818
12.3k
    OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
3819
3820
    /* OVECTOR(0) is never equal to SLJIT_S2. */
3821
12.3k
    loop = LABEL();
3822
12.3k
    OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), 0);
3823
12.3k
    OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * SSIZE_OF(sw));
3824
12.3k
    OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3825
12.3k
    CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
3826
12.3k
    OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
3827
12.3k
    }
3828
12.3k
  }
3829
50.0k
else
3830
50.0k
  OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
3831
62.3k
}
3832
3833
static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
3834
0
{
3835
0
DEFINE_COMPILER;
3836
0
sljit_s32 mov_opcode;
3837
0
sljit_s32 arguments_reg = !HAS_VIRTUAL_REGISTERS ? ARGUMENTS : SLJIT_R1;
3838
3839
0
SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S0, str_end_must_be_saved_reg0);
3840
0
SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
3841
0
  && (common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start != 0 : common->hit_start == 0));
3842
3843
0
if (arguments_reg != ARGUMENTS)
3844
0
  OP1(SLJIT_MOV, arguments_reg, 0, ARGUMENTS, 0);
3845
0
OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP),
3846
0
  common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start : common->start_ptr);
3847
0
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_PARTIAL);
3848
3849
/* Store match begin and end. */
3850
0
OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, begin));
3851
0
OP1(SLJIT_MOV, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_R2, 0);
3852
0
OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, match_data));
3853
3854
0
mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
3855
3856
0
OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S1, 0);
3857
0
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3858
0
OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
3859
0
#endif
3860
0
OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector), SLJIT_R2, 0);
3861
3862
0
OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_S1, 0);
3863
0
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3864
0
OP2(SLJIT_ASHR, STR_END, 0, STR_END, 0, SLJIT_IMM, UCHAR_SHIFT);
3865
0
#endif
3866
0
OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector) + sizeof(PCRE2_SIZE), STR_END, 0);
3867
3868
0
JUMPTO(SLJIT_JUMP, quit);
3869
0
}
3870
3871
static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
3872
1.39M
{
3873
/* May destroy TMP1. */
3874
1.39M
DEFINE_COMPILER;
3875
1.39M
struct sljit_jump *jump;
3876
3877
1.39M
if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3878
0
  {
3879
  /* The value of -1 must be kept for start_used_ptr! */
3880
0
  OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
3881
  /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
3882
  is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
3883
0
  jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
3884
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3885
0
  JUMPHERE(jump);
3886
0
  }
3887
1.39M
else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
3888
0
  {
3889
0
  jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3890
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3891
0
  JUMPHERE(jump);
3892
0
  }
3893
1.39M
}
3894
3895
static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, PCRE2_SPTR cc)
3896
72.6M
{
3897
/* Detects if the character has an othercase. */
3898
72.6M
unsigned int c;
3899
3900
72.6M
#ifdef SUPPORT_UNICODE
3901
72.6M
if (common->utf || common->ucp)
3902
59.5M
  {
3903
59.5M
  if (common->utf)
3904
48.0M
    {
3905
48.0M
    GETCHAR(c, cc);
3906
48.0M
    }
3907
11.4M
  else
3908
11.4M
    c = *cc;
3909
3910
59.5M
  if (c > 127)
3911
39.0M
    return c != UCD_OTHERCASE(c);
3912
3913
20.4M
  return common->fcc[c] != c;
3914
59.5M
  }
3915
13.1M
else
3916
13.1M
#endif
3917
13.1M
  c = *cc;
3918
13.1M
return MAX_255(c) ? common->fcc[c] != c : FALSE;
3919
72.6M
}
3920
3921
static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
3922
1.11M
{
3923
/* Returns with the othercase. */
3924
1.11M
#ifdef SUPPORT_UNICODE
3925
1.11M
if ((common->utf || common->ucp) && c > 127)
3926
1.07M
  return UCD_OTHERCASE(c);
3927
32.7k
#endif
3928
32.7k
return TABLE_GET(c, common->fcc, c);
3929
1.11M
}
3930
3931
static unsigned int char_get_othercase_bit(compiler_common *common, PCRE2_SPTR cc)
3932
8.36M
{
3933
/* Detects if the character and its othercase has only 1 bit difference. */
3934
8.36M
unsigned int c, oc, bit;
3935
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3936
int n;
3937
#endif
3938
3939
8.36M
#ifdef SUPPORT_UNICODE
3940
8.36M
if (common->utf || common->ucp)
3941
7.77M
  {
3942
7.77M
  if (common->utf)
3943
6.48M
    {
3944
6.48M
    GETCHAR(c, cc);
3945
6.48M
    }
3946
1.28M
  else
3947
1.28M
    c = *cc;
3948
3949
7.77M
  if (c <= 127)
3950
2.64M
    oc = common->fcc[c];
3951
5.12M
  else
3952
5.12M
    oc = UCD_OTHERCASE(c);
3953
7.77M
  }
3954
595k
else
3955
595k
  {
3956
595k
  c = *cc;
3957
595k
  oc = TABLE_GET(c, common->fcc, c);
3958
595k
  }
3959
#else
3960
c = *cc;
3961
oc = TABLE_GET(c, common->fcc, c);
3962
#endif
3963
3964
8.36M
SLJIT_ASSERT(c != oc);
3965
3966
8.36M
bit = c ^ oc;
3967
3968
8.36M
#ifndef EBCDIC
3969
/* Optimized for English alphabet. */
3970
8.36M
if (c <= 127 && bit == 0x20)
3971
3.23M
  return (0 << 8) | 0x20;
3972
5.12M
#endif
3973
3974
/* Since c != oc, they must have at least 1 bit difference. */
3975
5.12M
if (!is_powerof2(bit))
3976
2.71M
  return 0;
3977
3978
#if PCRE2_CODE_UNIT_WIDTH == 8
3979
3980
#ifdef SUPPORT_UNICODE
3981
if (common->utf && c > 127)
3982
  {
3983
  n = GET_EXTRALEN(*cc);
3984
  while ((bit & 0x3f) == 0)
3985
    {
3986
    n--;
3987
    bit >>= 6;
3988
    }
3989
  return (n << 8) | bit;
3990
  }
3991
#endif /* SUPPORT_UNICODE */
3992
return (0 << 8) | bit;
3993
3994
#elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3995
3996
2.41M
#ifdef SUPPORT_UNICODE
3997
2.41M
if (common->utf && c > 65535)
3998
816
  {
3999
816
  if (bit >= (1u << 10))
4000
0
    bit >>= 10;
4001
816
  else
4002
816
    return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
4003
816
  }
4004
2.41M
#endif /* SUPPORT_UNICODE */
4005
2.41M
return (bit < 256) ? ((0u << 8) | bit) : ((1u << 8) | (bit >> 8));
4006
4007
2.41M
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4008
2.41M
}
4009
4010
static void check_partial(compiler_common *common, BOOL force)
4011
173k
{
4012
/* Checks whether a partial matching is occurred. Does not modify registers. */
4013
173k
DEFINE_COMPILER;
4014
173k
struct sljit_jump *jump = NULL;
4015
4016
173k
SLJIT_ASSERT(!force || common->mode != PCRE2_JIT_COMPLETE);
4017
4018
173k
if (common->mode == PCRE2_JIT_COMPLETE)
4019
173k
  return;
4020
4021
0
if (!force && !common->allow_empty_partial)
4022
0
  jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
4023
0
else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
4024
0
  jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
4025
4026
0
if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
4027
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
4028
0
else
4029
0
  {
4030
0
  if (common->partialmatchlabel != NULL)
4031
0
    JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
4032
0
  else
4033
0
    add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
4034
0
  }
4035
4036
0
if (jump != NULL)
4037
0
  JUMPHERE(jump);
4038
0
}
4039
4040
static void check_str_end(compiler_common *common, jump_list **end_reached)
4041
4.04k
{
4042
/* Does not affect registers. Usually used in a tight spot. */
4043
4.04k
DEFINE_COMPILER;
4044
4.04k
struct sljit_jump *jump;
4045
4046
4.04k
if (common->mode == PCRE2_JIT_COMPLETE)
4047
4.04k
  {
4048
4.04k
  add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4049
4.04k
  return;
4050
4.04k
  }
4051
4052
0
jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
4053
0
if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
4054
0
  {
4055
0
  add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
4056
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
4057
0
  add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
4058
0
  }
4059
0
else
4060
0
  {
4061
0
  add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
4062
0
  if (common->partialmatchlabel != NULL)
4063
0
    JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
4064
0
  else
4065
0
    add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
4066
0
  }
4067
0
JUMPHERE(jump);
4068
0
}
4069
4070
static void detect_partial_match(compiler_common *common, jump_list **backtracks)
4071
9.10M
{
4072
9.10M
DEFINE_COMPILER;
4073
9.10M
struct sljit_jump *jump;
4074
4075
9.10M
if (common->mode == PCRE2_JIT_COMPLETE)
4076
9.10M
  {
4077
9.10M
  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4078
9.10M
  return;
4079
9.10M
  }
4080
4081
/* Partial matching mode. */
4082
0
jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
4083
0
if (!common->allow_empty_partial)
4084
0
  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
4085
0
else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
4086
0
  add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1));
4087
4088
0
if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
4089
0
  {
4090
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
4091
0
  add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4092
0
  }
4093
0
else
4094
0
  {
4095
0
  if (common->partialmatchlabel != NULL)
4096
0
    JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
4097
0
  else
4098
0
    add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
4099
0
  }
4100
0
JUMPHERE(jump);
4101
0
}
4102
4103
static void process_partial_match(compiler_common *common)
4104
2.97M
{
4105
2.97M
DEFINE_COMPILER;
4106
2.97M
struct sljit_jump *jump;
4107
4108
/* Partial matching mode. */
4109
2.97M
if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
4110
0
  {
4111
0
  jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
4112
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
4113
0
  JUMPHERE(jump);
4114
0
  }
4115
2.97M
else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
4116
0
  {
4117
0
  if (common->partialmatchlabel != NULL)
4118
0
    CMPTO(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0, common->partialmatchlabel);
4119
0
  else
4120
0
    add_jump(compiler, &common->partialmatch, CMP(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
4121
0
  }
4122
2.97M
}
4123
4124
static void detect_partial_match_to(compiler_common *common, struct sljit_label *label)
4125
2.95M
{
4126
2.95M
DEFINE_COMPILER;
4127
4128
2.95M
CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, label);
4129
2.95M
process_partial_match(common);
4130
2.95M
}
4131
4132
static void peek_char(compiler_common *common, sljit_u32 max, sljit_s32 dst, sljit_sw dstw, jump_list **backtracks)
4133
102k
{
4134
/* Reads the character into TMP1, keeps STR_PTR.
4135
Does not check STR_END. TMP2, dst, RETURN_ADDR Destroyed. */
4136
102k
DEFINE_COMPILER;
4137
102k
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4138
102k
struct sljit_jump *jump;
4139
102k
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
4140
4141
102k
SLJIT_UNUSED_ARG(max);
4142
102k
SLJIT_UNUSED_ARG(dst);
4143
102k
SLJIT_UNUSED_ARG(dstw);
4144
102k
SLJIT_UNUSED_ARG(backtracks);
4145
4146
102k
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4147
4148
102k
#ifdef SUPPORT_UNICODE
4149
#if PCRE2_CODE_UNIT_WIDTH == 8
4150
if (common->utf)
4151
  {
4152
  if (max < 128) return;
4153
4154
  jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
4155
  OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);
4156
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4157
  add_jump(compiler, common->invalid_utf ? &common->utfreadchar_invalid : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
4158
  OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);
4159
  if (backtracks && common->invalid_utf)
4160
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4161
  JUMPHERE(jump);
4162
  }
4163
#elif PCRE2_CODE_UNIT_WIDTH == 16
4164
102k
if (common->utf)
4165
73.3k
  {
4166
73.3k
  if (max < 0xd800) return;
4167
4168
1.03k
  OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4169
4170
1.03k
  if (common->invalid_utf)
4171
0
    {
4172
0
    jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4173
0
    OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);
4174
0
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4175
0
    add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4176
0
    OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);
4177
0
    if (backtracks && common->invalid_utf)
4178
0
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4179
0
    }
4180
1.03k
  else
4181
1.03k
    {
4182
    /* TMP2 contains the high surrogate. */
4183
1.03k
    jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
4184
1.03k
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4185
1.03k
    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
4186
1.03k
    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
4187
1.03k
    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4188
1.03k
    }
4189
4190
1.03k
  JUMPHERE(jump);
4191
1.03k
  }
4192
#elif PCRE2_CODE_UNIT_WIDTH == 32
4193
if (common->invalid_utf)
4194
  {
4195
  if (max < 0xd800) return;
4196
4197
  if (backtracks != NULL)
4198
    {
4199
    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4200
    add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4201
    add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
4202
    }
4203
  else
4204
    {
4205
    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4206
    OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000);
4207
    SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
4208
    OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4209
    SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
4210
    }
4211
  }
4212
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4213
102k
#endif /* SUPPORT_UNICODE */
4214
102k
}
4215
4216
static void peek_char_back(compiler_common *common, sljit_u32 max, jump_list **backtracks)
4217
78.9k
{
4218
/* Reads one character back without moving STR_PTR. TMP2 must
4219
contain the start of the subject buffer. Affects TMP1, TMP2, and RETURN_ADDR. */
4220
78.9k
DEFINE_COMPILER;
4221
4222
78.9k
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4223
78.9k
struct sljit_jump *jump;
4224
78.9k
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
4225
4226
78.9k
SLJIT_UNUSED_ARG(max);
4227
78.9k
SLJIT_UNUSED_ARG(backtracks);
4228
4229
78.9k
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4230
4231
78.9k
#ifdef SUPPORT_UNICODE
4232
#if PCRE2_CODE_UNIT_WIDTH == 8
4233
if (common->utf)
4234
  {
4235
  if (max < 128) return;
4236
4237
  jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
4238
  if (common->invalid_utf)
4239
    {
4240
    add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));
4241
    if (backtracks != NULL)
4242
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4243
    }
4244
  else
4245
    add_jump(compiler, &common->utfpeakcharback, JUMP(SLJIT_FAST_CALL));
4246
  JUMPHERE(jump);
4247
  }
4248
#elif PCRE2_CODE_UNIT_WIDTH == 16
4249
78.9k
if (common->utf)
4250
41.1k
  {
4251
41.1k
  if (max < 0xd800) return;
4252
4253
1.03k
  if (common->invalid_utf)
4254
0
    {
4255
0
    jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
4256
0
    add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));
4257
0
    if (backtracks != NULL)
4258
0
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4259
0
    }
4260
1.03k
  else
4261
1.03k
    {
4262
1.03k
    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
4263
1.03k
    jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xdc00);
4264
    /* TMP2 contains the low surrogate. */
4265
1.03k
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4266
1.03k
    OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
4267
1.03k
    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4268
1.03k
    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
4269
1.03k
    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4270
1.03k
    }
4271
1.03k
    JUMPHERE(jump);
4272
1.03k
  }
4273
#elif PCRE2_CODE_UNIT_WIDTH == 32
4274
if (common->invalid_utf)
4275
  {
4276
  OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4277
  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4278
  add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
4279
  }
4280
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4281
78.9k
#endif /* SUPPORT_UNICODE */
4282
78.9k
}
4283
4284
5.81M
#define READ_CHAR_UPDATE_STR_PTR 0x1
4285
7.89k
#define READ_CHAR_UTF8_NEWLINE 0x2
4286
7.89k
#define READ_CHAR_NEWLINE (READ_CHAR_UPDATE_STR_PTR | READ_CHAR_UTF8_NEWLINE)
4287
304k
#define READ_CHAR_VALID_UTF 0x4
4288
4289
static void read_char(compiler_common *common, sljit_u32 min, sljit_u32 max,
4290
  jump_list **backtracks, sljit_u32 options)
4291
6.20M
{
4292
/* Reads the precise value of a character into TMP1, if the character is
4293
between min and max (c >= min && c <= max). Otherwise it returns with a value
4294
outside the range. Does not check STR_END. */
4295
6.20M
DEFINE_COMPILER;
4296
6.20M
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4297
6.20M
struct sljit_jump *jump;
4298
6.20M
#endif
4299
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4300
struct sljit_jump *jump2;
4301
#endif
4302
4303
6.20M
SLJIT_UNUSED_ARG(min);
4304
6.20M
SLJIT_UNUSED_ARG(max);
4305
6.20M
SLJIT_UNUSED_ARG(backtracks);
4306
6.20M
SLJIT_UNUSED_ARG(options);
4307
6.20M
SLJIT_ASSERT(min <= max);
4308
4309
6.20M
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4310
6.20M
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4311
4312
6.20M
#ifdef SUPPORT_UNICODE
4313
#if PCRE2_CODE_UNIT_WIDTH == 8
4314
if (common->utf)
4315
  {
4316
  if (max < 128 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
4317
4318
  if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
4319
    {
4320
    jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
4321
4322
    if (options & READ_CHAR_UTF8_NEWLINE)
4323
      add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));
4324
    else
4325
      add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4326
4327
    if (backtracks != NULL)
4328
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4329
    JUMPHERE(jump);
4330
    return;
4331
    }
4332
4333
  jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4334
  if (min >= 0x10000)
4335
    {
4336
    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
4337
    if (options & READ_CHAR_UPDATE_STR_PTR)
4338
      OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4339
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4340
    jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
4341
    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4342
    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4343
    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4344
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4345
    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4346
    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4347
    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4348
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4349
    if (!(options & READ_CHAR_UPDATE_STR_PTR))
4350
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4351
    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4352
    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4353
    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4354
    JUMPHERE(jump2);
4355
    if (options & READ_CHAR_UPDATE_STR_PTR)
4356
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
4357
    }
4358
  else if (min >= 0x800 && max <= 0xffff)
4359
    {
4360
    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4361
    if (options & READ_CHAR_UPDATE_STR_PTR)
4362
      OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4363
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4364
    jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
4365
    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4366
    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4367
    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4368
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4369
    if (!(options & READ_CHAR_UPDATE_STR_PTR))
4370
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4371
    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4372
    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4373
    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4374
    JUMPHERE(jump2);
4375
    if (options & READ_CHAR_UPDATE_STR_PTR)
4376
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
4377
    }
4378
  else if (max >= 0x800)
4379
    {
4380
    add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
4381
    }
4382
  else if (max < 128)
4383
    {
4384
    OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4385
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4386
    }
4387
  else
4388
    {
4389
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4390
    if (!(options & READ_CHAR_UPDATE_STR_PTR))
4391
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4392
    else
4393
      OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4394
    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4395
    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4396
    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4397
    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4398
    if (options & READ_CHAR_UPDATE_STR_PTR)
4399
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
4400
    }
4401
  JUMPHERE(jump);
4402
  }
4403
#elif PCRE2_CODE_UNIT_WIDTH == 16
4404
6.20M
if (common->utf)
4405
3.27M
  {
4406
3.27M
  if (max < 0xd800 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
4407
4408
2.31M
  if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
4409
0
    {
4410
0
    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4411
0
    jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4412
4413
0
    if (options & READ_CHAR_UTF8_NEWLINE)
4414
0
      add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));
4415
0
    else
4416
0
      add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4417
4418
0
    if (backtracks != NULL)
4419
0
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4420
0
    JUMPHERE(jump);
4421
0
    return;
4422
0
    }
4423
4424
2.31M
  if (max >= 0x10000)
4425
1.34M
    {
4426
1.34M
    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4427
1.34M
    jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
4428
    /* TMP2 contains the high surrogate. */
4429
1.34M
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4430
1.34M
    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
4431
1.34M
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4432
1.34M
    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
4433
1.34M
    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4434
1.34M
    JUMPHERE(jump);
4435
1.34M
    return;
4436
1.34M
    }
4437
4438
  /* Skip low surrogate if necessary. */
4439
966k
  OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4440
4441
966k
  if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)
4442
966k
    {
4443
966k
    if (options & READ_CHAR_UPDATE_STR_PTR)
4444
598k
      OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4445
966k
    OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400);
4446
966k
    if (options & READ_CHAR_UPDATE_STR_PTR)
4447
598k
      SELECT(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0, STR_PTR);
4448
966k
    if (max >= 0xd800)
4449
402k
      SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, 0x10000, TMP1);
4450
966k
    }
4451
0
  else
4452
0
    {
4453
0
    jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
4454
0
    if (options & READ_CHAR_UPDATE_STR_PTR)
4455
0
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4456
0
    if (max >= 0xd800)
4457
0
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
4458
0
    JUMPHERE(jump);
4459
0
    }
4460
966k
  }
4461
#elif PCRE2_CODE_UNIT_WIDTH == 32
4462
if (common->invalid_utf)
4463
  {
4464
  if (backtracks != NULL)
4465
    {
4466
    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4467
    add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4468
    add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
4469
    }
4470
  else
4471
    {
4472
    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4473
    OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000);
4474
    SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
4475
    OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4476
    SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
4477
    }
4478
  }
4479
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4480
6.20M
#endif /* SUPPORT_UNICODE */
4481
6.20M
}
4482
4483
static void skip_valid_char(compiler_common *common)
4484
836k
{
4485
836k
DEFINE_COMPILER;
4486
836k
#if (defined SUPPORT_UNICODE) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
4487
836k
struct sljit_jump *jump;
4488
836k
#endif
4489
4490
836k
#if (defined SUPPORT_UNICODE) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
4491
836k
  if (common->utf)
4492
312k
    {
4493
312k
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4494
312k
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4495
#if PCRE2_CODE_UNIT_WIDTH == 8
4496
    jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4497
    OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4498
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4499
#elif PCRE2_CODE_UNIT_WIDTH == 16
4500
312k
    jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
4501
312k
    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4502
312k
    OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xd800);
4503
312k
    OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
4504
312k
    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4505
312k
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4506
312k
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
4507
312k
    JUMPHERE(jump);
4508
312k
    return;
4509
312k
    }
4510
523k
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */
4511
523k
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4512
523k
}
4513
4514
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4515
4516
static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass)
4517
{
4518
/* Tells whether the character codes below 128 are enough
4519
to determine a match. */
4520
const sljit_u8 value = nclass ? 0xff : 0;
4521
const sljit_u8 *end = bitset + 32;
4522
4523
bitset += 16;
4524
do
4525
  {
4526
  if (*bitset++ != value)
4527
    return FALSE;
4528
  }
4529
while (bitset < end);
4530
return TRUE;
4531
}
4532
4533
static void read_char7_type(compiler_common *common, jump_list **backtracks, BOOL negated)
4534
{
4535
/* Reads the precise character type of a character into TMP1, if the character
4536
is less than 128. Otherwise it returns with zero. Does not check STR_END. The
4537
full_read argument tells whether characters above max are accepted or not. */
4538
DEFINE_COMPILER;
4539
struct sljit_jump *jump;
4540
4541
SLJIT_ASSERT(common->utf);
4542
4543
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
4544
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4545
4546
/* All values > 127 are zero in ctypes. */
4547
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4548
4549
if (negated)
4550
  {
4551
  jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);
4552
4553
  if (common->invalid_utf)
4554
    {
4555
    OP1(SLJIT_MOV, TMP1, 0, TMP2, 0);
4556
    add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4557
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4558
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4559
    }
4560
  else
4561
    {
4562
    OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4563
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4564
    }
4565
  JUMPHERE(jump);
4566
  }
4567
}
4568
4569
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
4570
4571
static void read_char8_type(compiler_common *common, jump_list **backtracks, BOOL negated)
4572
1.54M
{
4573
/* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
4574
1.54M
DEFINE_COMPILER;
4575
1.54M
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
4576
1.54M
struct sljit_jump *jump;
4577
1.54M
#endif
4578
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4579
struct sljit_jump *jump2;
4580
#endif
4581
4582
1.54M
SLJIT_UNUSED_ARG(backtracks);
4583
1.54M
SLJIT_UNUSED_ARG(negated);
4584
4585
1.54M
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
4586
1.54M
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4587
4588
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4589
if (common->utf)
4590
  {
4591
  /* The result of this read may be unused, but saves an "else" part. */
4592
  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4593
  jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);
4594
4595
  if (!negated)
4596
    {
4597
    if (common->invalid_utf)
4598
      add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4599
4600
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4601
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4602
    OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4603
    if (common->invalid_utf)
4604
      add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe0 - 0xc2));
4605
4606
    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4607
    OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
4608
    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4609
    if (common->invalid_utf)
4610
      add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40));
4611
4612
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4613
    jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4614
    OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4615
    JUMPHERE(jump2);
4616
    }
4617
  else if (common->invalid_utf)
4618
    {
4619
    add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4620
    OP1(SLJIT_MOV, TMP2, 0, TMP1, 0);
4621
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4622
4623
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4624
    jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4625
    OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4626
    JUMPHERE(jump2);
4627
    }
4628
  else
4629
    add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
4630
4631
  JUMPHERE(jump);
4632
  return;
4633
  }
4634
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
4635
4636
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32
4637
if (common->invalid_utf && negated)
4638
  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x110000));
4639
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32 */
4640
4641
1.54M
#if PCRE2_CODE_UNIT_WIDTH != 8
4642
/* The ctypes array contains only 256 values. */
4643
1.54M
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4644
1.54M
jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4645
1.54M
#endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
4646
1.54M
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4647
1.54M
#if PCRE2_CODE_UNIT_WIDTH != 8
4648
1.54M
JUMPHERE(jump);
4649
1.54M
#endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
4650
4651
1.54M
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
4652
1.54M
if (common->utf && negated)
4653
73.8k
  {
4654
  /* Skip low surrogate if necessary. */
4655
73.8k
  if (!common->invalid_utf)
4656
73.8k
    {
4657
73.8k
    OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
4658
4659
73.8k
    if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)
4660
73.8k
      {
4661
73.8k
      OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4662
73.8k
      OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400);
4663
73.8k
      SELECT(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0, STR_PTR);
4664
73.8k
      }
4665
0
    else
4666
0
      {
4667
0
      jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
4668
0
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4669
0
      JUMPHERE(jump);
4670
0
      }
4671
73.8k
    return;
4672
73.8k
    }
4673
4674
0
  OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
4675
0
  jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4676
0
  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));
4677
0
  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4678
4679
0
  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4680
0
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4681
0
  OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
4682
0
  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));
4683
4684
0
  JUMPHERE(jump);
4685
0
  return;
4686
73.8k
  }
4687
1.54M
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16 */
4688
1.54M
}
4689
4690
static void move_back(compiler_common *common, jump_list **backtracks, BOOL must_be_valid)
4691
2.54M
{
4692
/* Goes one character back. Affects STR_PTR and TMP1. If must_be_valid is TRUE,
4693
TMP2 is not used. Otherwise TMP2 must contain the start of the subject buffer,
4694
and it is destroyed. Does not modify STR_PTR for invalid character sequences. */
4695
2.54M
DEFINE_COMPILER;
4696
4697
2.54M
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4698
2.54M
struct sljit_jump *jump;
4699
2.54M
#endif
4700
4701
2.54M
#ifdef SUPPORT_UNICODE
4702
#if PCRE2_CODE_UNIT_WIDTH == 8
4703
struct sljit_label *label;
4704
4705
if (common->utf)
4706
  {
4707
  if (!must_be_valid && common->invalid_utf)
4708
    {
4709
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4710
    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4711
    jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
4712
    add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));
4713
    if (backtracks != NULL)
4714
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4715
    JUMPHERE(jump);
4716
    return;
4717
    }
4718
4719
  label = LABEL();
4720
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4721
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4722
  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4723
  CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
4724
  return;
4725
  }
4726
#elif PCRE2_CODE_UNIT_WIDTH == 16
4727
2.54M
if (common->utf)
4728
1.12M
  {
4729
1.12M
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4730
1.12M
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4731
4732
1.12M
  if (!must_be_valid && common->invalid_utf)
4733
0
    {
4734
0
    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4735
0
    jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000 - 0xd800);
4736
0
    add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));
4737
0
    if (backtracks != NULL)
4738
0
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4739
0
    JUMPHERE(jump);
4740
0
    return;
4741
0
    }
4742
4743
  /* Skip low surrogate if necessary. */
4744
1.12M
  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4745
1.12M
  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xdc00);
4746
1.12M
  OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
4747
1.12M
  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4748
1.12M
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4749
1.12M
  return;
4750
1.12M
  }
4751
#elif PCRE2_CODE_UNIT_WIDTH == 32
4752
if (common->invalid_utf && !must_be_valid)
4753
  {
4754
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4755
  if (backtracks != NULL)
4756
    {
4757
    add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4758
    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4759
    return;
4760
    }
4761
4762
  OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x110000);
4763
  OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
4764
  OP2(SLJIT_SHL,  TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4765
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4766
  return;
4767
  }
4768
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4769
1.41M
#endif /* SUPPORT_UNICODE */
4770
4771
1.41M
SLJIT_UNUSED_ARG(backtracks);
4772
1.41M
SLJIT_UNUSED_ARG(must_be_valid);
4773
4774
1.41M
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4775
1.41M
}
4776
4777
static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
4778
946k
{
4779
/* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
4780
946k
DEFINE_COMPILER;
4781
946k
struct sljit_jump *jump;
4782
4783
946k
if (nltype == NLTYPE_ANY)
4784
435k
  {
4785
435k
  add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4786
435k
  sljit_set_current_flags(compiler, SLJIT_SET_Z);
4787
435k
  add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
4788
435k
  }
4789
510k
else if (nltype == NLTYPE_ANYCRLF)
4790
0
  {
4791
0
  if (jumpifmatch)
4792
0
    {
4793
0
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
4794
0
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4795
0
    }
4796
0
  else
4797
0
    {
4798
0
    jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4799
0
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4800
0
    JUMPHERE(jump);
4801
0
    }
4802
0
  }
4803
510k
else
4804
510k
  {
4805
510k
  SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
4806
510k
  add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4807
510k
  }
4808
946k
}
4809
4810
#ifdef SUPPORT_UNICODE
4811
4812
#if PCRE2_CODE_UNIT_WIDTH == 8
4813
static void do_utfreadchar(compiler_common *common)
4814
{
4815
/* Fast decoding a UTF-8 character. TMP1 contains the first byte
4816
of the character (>= 0xc0). Return char value in TMP1. */
4817
DEFINE_COMPILER;
4818
struct sljit_jump *jump;
4819
4820
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
4821
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4822
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4823
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4824
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4825
4826
/* Searching for the first zero. */
4827
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800);
4828
jump = JUMP(SLJIT_NOT_ZERO);
4829
/* Two byte sequence. */
4830
OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3000);
4831
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4832
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4833
4834
JUMPHERE(jump);
4835
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4836
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4837
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4838
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4839
4840
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x10000);
4841
jump = JUMP(SLJIT_NOT_ZERO);
4842
/* Three byte sequence. */
4843
OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0000);
4844
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4845
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4846
4847
/* Four byte sequence. */
4848
JUMPHERE(jump);
4849
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4850
OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0000);
4851
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4852
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4853
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4854
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4855
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4856
}
4857
4858
static void do_utfreadtype8(compiler_common *common)
4859
{
4860
/* Fast decoding a UTF-8 character type. TMP2 contains the first byte
4861
of the character (>= 0xc0). Return value in TMP1. */
4862
DEFINE_COMPILER;
4863
struct sljit_jump *jump;
4864
struct sljit_jump *compare;
4865
4866
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
4867
4868
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0x20);
4869
jump = JUMP(SLJIT_NOT_ZERO);
4870
/* Two byte sequence. */
4871
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4872
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4873
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
4874
/* The upper 5 bits are known at this point. */
4875
compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
4876
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4877
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4878
OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
4879
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4880
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4881
4882
JUMPHERE(compare);
4883
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4884
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4885
4886
/* We only have types for characters less than 256. */
4887
JUMPHERE(jump);
4888
OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4889
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4890
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4891
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4892
}
4893
4894
static void do_utfreadchar_invalid(compiler_common *common)
4895
{
4896
/* Slow decoding a UTF-8 character. TMP1 contains the first byte
4897
of the character (>= 0xc0). Return char value in TMP1. STR_PTR is
4898
undefined for invalid characters. */
4899
DEFINE_COMPILER;
4900
sljit_s32 i;
4901
sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
4902
struct sljit_jump *jump;
4903
struct sljit_jump *buffer_end_close;
4904
struct sljit_label *three_byte_entry;
4905
struct sljit_label *exit_invalid_label;
4906
struct sljit_jump *exit_invalid[11];
4907
4908
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
4909
4910
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc2);
4911
4912
/* Usually more than 3 characters remained in the subject buffer. */
4913
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4914
4915
/* Not a valid start of a multi-byte sequence, no more bytes read. */
4916
exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xf5 - 0xc2);
4917
4918
buffer_end_close = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
4919
4920
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4921
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4922
/* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4923
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4924
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4925
exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4926
4927
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800);
4928
jump = JUMP(SLJIT_NOT_ZERO);
4929
4930
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4931
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4932
4933
JUMPHERE(jump);
4934
4935
/* Three-byte sequence. */
4936
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4937
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4938
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4939
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4940
if (has_cmov)
4941
  {
4942
  OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4943
  SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0x20000, TMP1);
4944
  exit_invalid[2] = NULL;
4945
  }
4946
else
4947
  exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4948
4949
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x10000);
4950
jump = JUMP(SLJIT_NOT_ZERO);
4951
4952
three_byte_entry = LABEL();
4953
4954
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2d800);
4955
if (has_cmov)
4956
  {
4957
  OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4958
  SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0xd800, TMP1);
4959
  exit_invalid[3] = NULL;
4960
  }
4961
else
4962
  exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4963
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4964
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4965
4966
if (has_cmov)
4967
  {
4968
  OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4969
  SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
4970
  exit_invalid[4] = NULL;
4971
  }
4972
else
4973
  exit_invalid[4] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4974
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4975
4976
JUMPHERE(jump);
4977
4978
/* Four-byte sequence. */
4979
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4980
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4981
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4982
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4983
if (has_cmov)
4984
  {
4985
  OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4986
  SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0, TMP1);
4987
  exit_invalid[5] = NULL;
4988
  }
4989
else
4990
  exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4991
4992
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc10000);
4993
if (has_cmov)
4994
  {
4995
  OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4996
  SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000, TMP1);
4997
  exit_invalid[6] = NULL;
4998
  }
4999
else
5000
  exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
5001
5002
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
5003
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5004
5005
JUMPHERE(buffer_end_close);
5006
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5007
exit_invalid[7] = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
5008
5009
/* Two-byte sequence. */
5010
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5011
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
5012
/* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
5013
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5014
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
5015
exit_invalid[8] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
5016
5017
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800);
5018
jump = JUMP(SLJIT_NOT_ZERO);
5019
5020
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5021
5022
/* Three-byte sequence. */
5023
JUMPHERE(jump);
5024
exit_invalid[9] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5025
5026
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5027
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
5028
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
5029
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5030
if (has_cmov)
5031
  {
5032
  OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
5033
  SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
5034
  exit_invalid[10] = NULL;
5035
  }
5036
else
5037
  exit_invalid[10] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
5038
5039
/* One will be substracted from STR_PTR later. */
5040
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5041
5042
/* Four byte sequences are not possible. */
5043
CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x30000, three_byte_entry);
5044
5045
exit_invalid_label = LABEL();
5046
for (i = 0; i < 11; i++)
5047
  sljit_set_label(exit_invalid[i], exit_invalid_label);
5048
5049
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5050
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5051
}
5052
5053
static void do_utfreadnewline_invalid(compiler_common *common)
5054
{
5055
/* Slow decoding a UTF-8 character, specialized for newlines.
5056
TMP1 contains the first byte of the character (>= 0xc0). Return
5057
char value in TMP1. */
5058
DEFINE_COMPILER;
5059
struct sljit_label *loop;
5060
struct sljit_label *skip_start;
5061
struct sljit_label *three_byte_exit;
5062
struct sljit_jump *jump[5];
5063
5064
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5065
5066
if (common->nltype != NLTYPE_ANY)
5067
  {
5068
  SLJIT_ASSERT(common->nltype != NLTYPE_FIXED || common->newline < 128);
5069
5070
  /* All newlines are ascii, just skip intermediate octets. */
5071
  jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5072
  loop = LABEL();
5073
  if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)) == SLJIT_SUCCESS)
5074
    sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5075
  else
5076
    {
5077
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5078
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5079
    }
5080
5081
  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
5082
  CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
5083
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5084
5085
  JUMPHERE(jump[0]);
5086
5087
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5088
  OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5089
  return;
5090
  }
5091
5092
jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5093
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5094
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5095
5096
jump[1] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xc2);
5097
jump[2] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xe2);
5098
5099
skip_start = LABEL();
5100
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
5101
jump[3] = CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80);
5102
5103
/* Skip intermediate octets. */
5104
loop = LABEL();
5105
jump[4] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5106
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5107
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5108
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
5109
CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
5110
5111
JUMPHERE(jump[3]);
5112
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5113
5114
three_byte_exit = LABEL();
5115
JUMPHERE(jump[0]);
5116
JUMPHERE(jump[4]);
5117
5118
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5119
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5120
5121
/* Two byte long newline: 0x85. */
5122
JUMPHERE(jump[1]);
5123
CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x85, skip_start);
5124
5125
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x85);
5126
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5127
5128
/* Three byte long newlines: 0x2028 and 0x2029. */
5129
JUMPHERE(jump[2]);
5130
CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, skip_start);
5131
CMPTO(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0, three_byte_exit);
5132
5133
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5134
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5135
5136
OP2(SLJIT_SUB, TMP1, 0, TMP2, 0, SLJIT_IMM, 0x80);
5137
CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40, skip_start);
5138
5139
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0x2000);
5140
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5141
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5142
}
5143
5144
static void do_utfmoveback_invalid(compiler_common *common)
5145
{
5146
/* Goes one character back. */
5147
DEFINE_COMPILER;
5148
sljit_s32 i;
5149
struct sljit_jump *jump;
5150
struct sljit_jump *buffer_start_close;
5151
struct sljit_label *exit_ok_label;
5152
struct sljit_label *exit_invalid_label;
5153
struct sljit_jump *exit_invalid[7];
5154
5155
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5156
5157
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
5158
exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
5159
5160
/* Two-byte sequence. */
5161
buffer_start_close = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
5162
5163
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
5164
5165
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
5166
jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x20);
5167
5168
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
5169
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5170
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5171
5172
/* Three-byte sequence. */
5173
JUMPHERE(jump);
5174
exit_invalid[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);
5175
5176
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5177
5178
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
5179
jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x10);
5180
5181
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
5182
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5183
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5184
5185
/* Four-byte sequence. */
5186
JUMPHERE(jump);
5187
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);
5188
exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40);
5189
5190
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5191
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0);
5192
exit_invalid[3] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x05);
5193
5194
exit_ok_label = LABEL();
5195
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
5196
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5197
5198
/* Two-byte sequence. */
5199
JUMPHERE(buffer_start_close);
5200
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5201
5202
exit_invalid[4] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
5203
5204
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5205
5206
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
5207
CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20, exit_ok_label);
5208
5209
/* Three-byte sequence. */
5210
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5211
exit_invalid[5] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);
5212
exit_invalid[6] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
5213
5214
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5215
5216
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
5217
CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10, exit_ok_label);
5218
5219
/* Four-byte sequences are not possible. */
5220
5221
exit_invalid_label = LABEL();
5222
sljit_set_label(exit_invalid[5], exit_invalid_label);
5223
sljit_set_label(exit_invalid[6], exit_invalid_label);
5224
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
5225
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
5226
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5227
5228
JUMPHERE(exit_invalid[4]);
5229
/* -2 + 4 = 2 */
5230
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5231
5232
exit_invalid_label = LABEL();
5233
for (i = 0; i < 4; i++)
5234
  sljit_set_label(exit_invalid[i], exit_invalid_label);
5235
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
5236
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(4));
5237
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5238
}
5239
5240
static void do_utfpeakcharback(compiler_common *common)
5241
{
5242
/* Peak a character back. Does not modify STR_PTR. */
5243
DEFINE_COMPILER;
5244
struct sljit_jump *jump[2];
5245
5246
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5247
5248
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5249
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
5250
jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20);
5251
5252
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
5253
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
5254
jump[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10);
5255
5256
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));
5257
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);
5258
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);
5259
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
5260
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5261
5262
JUMPHERE(jump[1]);
5263
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5264
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
5265
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
5266
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5267
5268
JUMPHERE(jump[0]);
5269
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5270
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
5271
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
5272
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5273
5274
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5275
}
5276
5277
static void do_utfpeakcharback_invalid(compiler_common *common)
5278
{
5279
/* Peak a character back. Does not modify STR_PTR. */
5280
DEFINE_COMPILER;
5281
sljit_s32 i;
5282
sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
5283
struct sljit_jump *jump[2];
5284
struct sljit_label *two_byte_entry;
5285
struct sljit_label *three_byte_entry;
5286
struct sljit_label *exit_invalid_label;
5287
struct sljit_jump *exit_invalid[8];
5288
5289
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5290
5291
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
5292
exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
5293
jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5294
5295
/* Two-byte sequence. */
5296
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5297
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
5298
jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x1e);
5299
5300
two_byte_entry = LABEL();
5301
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
5302
/* If TMP1 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
5303
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5304
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5305
5306
JUMPHERE(jump[1]);
5307
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
5308
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
5309
exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
5310
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
5311
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5312
5313
/* Three-byte sequence. */
5314
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
5315
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
5316
jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x10);
5317
5318
three_byte_entry = LABEL();
5319
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
5320
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5321
5322
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5323
if (has_cmov)
5324
  {
5325
  OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
5326
  SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, -0xd800, TMP1);
5327
  exit_invalid[2] = NULL;
5328
  }
5329
else
5330
  exit_invalid[2] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
5331
5332
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5333
if (has_cmov)
5334
  {
5335
  OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
5336
  SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
5337
  exit_invalid[3] = NULL;
5338
  }
5339
else
5340
  exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
5341
5342
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5343
5344
JUMPHERE(jump[1]);
5345
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0 - 0x80);
5346
exit_invalid[4] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
5347
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
5348
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5349
5350
/* Four-byte sequence. */
5351
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));
5352
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
5353
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);
5354
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 18);
5355
/* ADD is used instead of OR because of the SUB 0x10000 above. */
5356
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5357
5358
if (has_cmov)
5359
  {
5360
  OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
5361
  SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000, TMP1);
5362
  exit_invalid[5] = NULL;
5363
  }
5364
else
5365
  exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
5366
5367
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
5368
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5369
5370
JUMPHERE(jump[0]);
5371
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5372
jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5373
5374
/* Two-byte sequence. */
5375
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5376
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
5377
CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);
5378
5379
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
5380
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
5381
exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
5382
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
5383
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5384
5385
/* Three-byte sequence. */
5386
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
5387
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
5388
CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x10, three_byte_entry);
5389
5390
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5391
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5392
5393
JUMPHERE(jump[0]);
5394
exit_invalid[7] = CMP(SLJIT_GREATER, TMP2, 0, STR_PTR, 0);
5395
5396
/* Two-byte sequence. */
5397
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5398
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
5399
CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);
5400
5401
exit_invalid_label = LABEL();
5402
for (i = 0; i < 8; i++)
5403
  sljit_set_label(exit_invalid[i], exit_invalid_label);
5404
5405
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5406
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5407
}
5408
5409
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
5410
5411
#if PCRE2_CODE_UNIT_WIDTH == 16
5412
5413
static void do_utfreadchar_invalid(compiler_common *common)
5414
0
{
5415
/* Slow decoding a UTF-16 character. TMP1 contains the first half
5416
of the character (>= 0xd800). Return char value in TMP1. STR_PTR is
5417
undefined for invalid characters. */
5418
0
DEFINE_COMPILER;
5419
0
struct sljit_jump *exit_invalid[3];
5420
5421
0
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5422
5423
/* TMP2 contains the high surrogate. */
5424
0
exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
5425
0
exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5426
5427
0
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5428
0
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
5429
0
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5430
5431
0
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
5432
0
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
5433
0
exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
5434
5435
0
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5436
0
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5437
5438
0
JUMPHERE(exit_invalid[0]);
5439
0
JUMPHERE(exit_invalid[1]);
5440
0
JUMPHERE(exit_invalid[2]);
5441
0
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5442
0
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5443
0
}
5444
5445
static void do_utfreadnewline_invalid(compiler_common *common)
5446
0
{
5447
/* Slow decoding a UTF-16 character, specialized for newlines.
5448
TMP1 contains the first half of the character (>= 0xd800). Return
5449
char value in TMP1. */
5450
5451
0
DEFINE_COMPILER;
5452
0
struct sljit_jump *exit_invalid[2];
5453
5454
0
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5455
5456
/* TMP2 contains the high surrogate. */
5457
0
exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5458
5459
0
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5460
0
exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
5461
5462
0
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
5463
0
OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400);
5464
0
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
5465
0
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
5466
0
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
5467
0
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5468
5469
0
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5470
5471
0
JUMPHERE(exit_invalid[0]);
5472
0
JUMPHERE(exit_invalid[1]);
5473
0
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5474
0
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5475
0
}
5476
5477
static void do_utfmoveback_invalid(compiler_common *common)
5478
0
{
5479
/* Goes one character back. */
5480
0
DEFINE_COMPILER;
5481
0
struct sljit_jump *exit_invalid[3];
5482
5483
0
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5484
5485
0
exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400);
5486
0
exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5487
5488
0
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5489
0
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5490
0
exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
5491
5492
0
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5493
0
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
5494
0
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5495
5496
0
JUMPHERE(exit_invalid[0]);
5497
0
JUMPHERE(exit_invalid[1]);
5498
0
JUMPHERE(exit_invalid[2]);
5499
5500
0
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5501
0
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
5502
0
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5503
0
}
5504
5505
static void do_utfpeakcharback_invalid(compiler_common *common)
5506
0
{
5507
/* Peak a character back. Does not modify STR_PTR. */
5508
0
DEFINE_COMPILER;
5509
0
struct sljit_jump *jump;
5510
0
struct sljit_jump *exit_invalid[3];
5511
5512
0
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5513
5514
0
jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000);
5515
0
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5516
0
exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
5517
0
exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5518
5519
0
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5520
0
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
5521
0
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
5522
0
exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
5523
0
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
5524
0
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5525
5526
0
JUMPHERE(jump);
5527
0
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5528
5529
0
JUMPHERE(exit_invalid[0]);
5530
0
JUMPHERE(exit_invalid[1]);
5531
0
JUMPHERE(exit_invalid[2]);
5532
5533
0
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5534
0
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5535
0
}
5536
5537
#endif /* PCRE2_CODE_UNIT_WIDTH == 16 */
5538
5539
/* UCD_BLOCK_SIZE must be 128 (see the assert below). */
5540
#define UCD_BLOCK_MASK 127
5541
#define UCD_BLOCK_SHIFT 7
5542
5543
static void do_getucd(compiler_common *common)
5544
1.05k
{
5545
/* Search the UCD record for the character comes in TMP1.
5546
Returns chartype in TMP1 and UCD offset in TMP2. */
5547
1.05k
DEFINE_COMPILER;
5548
#if PCRE2_CODE_UNIT_WIDTH == 32
5549
struct sljit_jump *jump;
5550
#endif
5551
5552
#if defined SLJIT_DEBUG && SLJIT_DEBUG
5553
/* dummy_ucd_record */
5554
const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);
5555
SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
5556
SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
5557
#endif
5558
5559
1.05k
SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
5560
5561
1.05k
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5562
5563
#if PCRE2_CODE_UNIT_WIDTH == 32
5564
if (!common->utf)
5565
  {
5566
  jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
5567
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
5568
  JUMPHERE(jump);
5569
  }
5570
#endif
5571
5572
1.05k
OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5573
1.05k
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5574
1.05k
OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5575
1.05k
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5576
1.05k
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5577
1.05k
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5578
1.05k
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5579
1.05k
OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5580
1.05k
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5581
1.05k
}
5582
5583
static void do_getucdtype(compiler_common *common)
5584
1.10k
{
5585
/* Search the UCD record for the character comes in TMP1.
5586
Returns chartype in TMP1 and UCD offset in TMP2. */
5587
1.10k
DEFINE_COMPILER;
5588
#if PCRE2_CODE_UNIT_WIDTH == 32
5589
struct sljit_jump *jump;
5590
#endif
5591
5592
#if defined SLJIT_DEBUG && SLJIT_DEBUG
5593
/* dummy_ucd_record */
5594
const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);
5595
SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
5596
SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
5597
#endif
5598
5599
1.10k
SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
5600
5601
1.10k
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5602
5603
#if PCRE2_CODE_UNIT_WIDTH == 32
5604
if (!common->utf)
5605
  {
5606
  jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
5607
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
5608
  JUMPHERE(jump);
5609
  }
5610
#endif
5611
5612
1.10k
OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5613
1.10k
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5614
1.10k
OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5615
1.10k
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5616
1.10k
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5617
1.10k
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5618
1.10k
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5619
1.10k
OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5620
5621
/* TMP2 is multiplied by 12. Same as (TMP2 << 2) + ((TMP2 << 2) << 1). */
5622
1.10k
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5623
1.10k
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
5624
1.10k
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5625
1.10k
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 1);
5626
5627
1.10k
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5628
1.10k
}
5629
5630
#endif /* SUPPORT_UNICODE */
5631
5632
static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common)
5633
58.2k
{
5634
58.2k
DEFINE_COMPILER;
5635
58.2k
struct sljit_label *mainloop;
5636
58.2k
struct sljit_label *newlinelabel = NULL;
5637
58.2k
struct sljit_jump *start;
5638
58.2k
struct sljit_jump *end = NULL;
5639
58.2k
struct sljit_jump *end2 = NULL;
5640
58.2k
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5641
58.2k
struct sljit_label *loop;
5642
58.2k
struct sljit_jump *jump;
5643
58.2k
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5644
58.2k
jump_list *newline = NULL;
5645
58.2k
sljit_u32 overall_options = common->re->overall_options;
5646
58.2k
BOOL hascrorlf = (common->re->flags & PCRE2_HASCRORLF) != 0;
5647
58.2k
BOOL newlinecheck = FALSE;
5648
58.2k
BOOL readuchar = FALSE;
5649
5650
58.2k
if (!(hascrorlf || (overall_options & PCRE2_FIRSTLINE) != 0)
5651
58.2k
    && (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
5652
0
  newlinecheck = TRUE;
5653
5654
58.2k
SLJIT_ASSERT(common->abort_label == NULL);
5655
5656
58.2k
if ((overall_options & PCRE2_FIRSTLINE) != 0)
5657
7.89k
  {
5658
  /* Search for the end of the first line. */
5659
7.89k
  SLJIT_ASSERT(common->match_end_ptr != 0);
5660
7.89k
  OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
5661
5662
7.89k
  if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5663
0
    {
5664
0
    mainloop = LABEL();
5665
0
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5666
0
    end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5667
0
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5668
0
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5669
0
    CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
5670
0
    CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
5671
0
    JUMPHERE(end);
5672
0
    OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5673
0
    }
5674
7.89k
  else
5675
7.89k
    {
5676
7.89k
    end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5677
7.89k
    mainloop = LABEL();
5678
    /* Continual stores does not cause data dependency. */
5679
7.89k
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
5680
7.89k
    read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
5681
7.89k
    check_newlinechar(common, common->nltype, &newline, TRUE);
5682
7.89k
    CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
5683
7.89k
    JUMPHERE(end);
5684
7.89k
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
5685
7.89k
    set_jumps(newline, LABEL());
5686
7.89k
    }
5687
5688
7.89k
  OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
5689
7.89k
  }
5690
50.3k
else if ((overall_options & PCRE2_USE_OFFSET_LIMIT) != 0)
5691
2.67k
  {
5692
  /* Check whether offset limit is set and valid. */
5693
2.67k
  SLJIT_ASSERT(common->match_end_ptr != 0);
5694
5695
2.67k
  if (HAS_VIRTUAL_REGISTERS)
5696
0
    {
5697
0
    OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5698
0
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, offset_limit));
5699
0
    }
5700
2.67k
  else
5701
2.67k
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, offset_limit));
5702
5703
2.67k
  OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
5704
2.67k
  end = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw) PCRE2_UNSET);
5705
2.67k
  if (HAS_VIRTUAL_REGISTERS)
5706
0
    OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5707
2.67k
  else
5708
2.67k
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
5709
5710
2.67k
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5711
2.67k
  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5712
2.67k
#endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */
5713
2.67k
  if (HAS_VIRTUAL_REGISTERS)
5714
0
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5715
5716
2.67k
  OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
5717
2.67k
  end2 = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
5718
2.67k
  OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
5719
2.67k
  JUMPHERE(end2);
5720
2.67k
  OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
5721
2.67k
  add_jump(compiler, &common->abort, CMP(SLJIT_LESS, TMP2, 0, STR_PTR, 0));
5722
2.67k
  JUMPHERE(end);
5723
2.67k
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, TMP2, 0);
5724
2.67k
  }
5725
5726
58.2k
start = JUMP(SLJIT_JUMP);
5727
5728
58.2k
if (newlinecheck)
5729
0
  {
5730
0
  newlinelabel = LABEL();
5731
0
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5732
0
  end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5733
0
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5734
0
  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
5735
0
  OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
5736
0
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5737
0
  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5738
0
#endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */
5739
0
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5740
0
  end2 = JUMP(SLJIT_JUMP);
5741
0
  }
5742
5743
58.2k
mainloop = LABEL();
5744
5745
/* Increasing the STR_PTR here requires one less jump in the most common case. */
5746
58.2k
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5747
58.2k
if (common->utf && !common->invalid_utf) readuchar = TRUE;
5748
58.2k
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5749
58.2k
if (newlinecheck) readuchar = TRUE;
5750
5751
58.2k
if (readuchar)
5752
15.6k
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5753
5754
58.2k
if (newlinecheck)
5755
0
  CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
5756
5757
58.2k
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5758
58.2k
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5759
#if PCRE2_CODE_UNIT_WIDTH == 8
5760
if (common->invalid_utf)
5761
  {
5762
  /* Skip continuation code units. */
5763
  loop = LABEL();
5764
  jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5765
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5766
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5767
  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
5768
  CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x40, loop);
5769
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5770
  JUMPHERE(jump);
5771
  }
5772
else if (common->utf)
5773
  {
5774
  jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5775
  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5776
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5777
  JUMPHERE(jump);
5778
  }
5779
#elif PCRE2_CODE_UNIT_WIDTH == 16
5780
58.2k
if (common->invalid_utf)
5781
0
  {
5782
  /* Skip continuation code units. */
5783
0
  loop = LABEL();
5784
0
  jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5785
0
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5786
0
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5787
0
  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
5788
0
  CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400, loop);
5789
0
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5790
0
  JUMPHERE(jump);
5791
0
  }
5792
58.2k
else if (common->utf)
5793
15.6k
  {
5794
15.6k
  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5795
5796
15.6k
  if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
5797
15.6k
    {
5798
15.6k
    OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5799
15.6k
    OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x400);
5800
15.6k
    SELECT(SLJIT_LESS, STR_PTR, TMP2, 0, STR_PTR);
5801
15.6k
    }
5802
0
  else
5803
0
    {
5804
0
    OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x400);
5805
0
    OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
5806
0
    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5807
0
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5808
0
    }
5809
15.6k
  }
5810
58.2k
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
5811
58.2k
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5812
58.2k
JUMPHERE(start);
5813
5814
58.2k
if (newlinecheck)
5815
0
  {
5816
0
  JUMPHERE(end);
5817
0
  JUMPHERE(end2);
5818
0
  }
5819
5820
58.2k
return mainloop;
5821
58.2k
}
5822
5823
5824
static SLJIT_INLINE void add_prefix_char(PCRE2_UCHAR chr, fast_forward_char_data *chars, BOOL last)
5825
1.03M
{
5826
1.03M
sljit_u32 i, count = chars->count;
5827
5828
1.03M
if (count == 255)
5829
477k
  return;
5830
5831
554k
if (count == 0)
5832
143k
  {
5833
143k
  chars->count = 1;
5834
143k
  chars->chars[0] = chr;
5835
5836
143k
  if (last)
5837
142k
    chars->last_count = 1;
5838
143k
  return;
5839
143k
  }
5840
5841
848k
for (i = 0; i < count; i++)
5842
799k
  if (chars->chars[i] == chr)
5843
361k
    return;
5844
5845
49.2k
if (count >= MAX_DIFF_CHARS)
5846
2.47k
  {
5847
2.47k
  chars->count = 255;
5848
2.47k
  return;
5849
2.47k
  }
5850
5851
46.8k
chars->chars[count] = chr;
5852
46.8k
chars->count = count + 1;
5853
5854
46.8k
if (last)
5855
46.5k
  chars->last_count++;
5856
46.8k
}
5857
5858
/* Value can be increased if needed. Patterns
5859
such as /(a|){33}b/ can exhaust the stack.
5860
5861
Note: /(a|){29}b/ already stops scan_prefix()
5862
because it reaches the maximum step_count. */
5863
294k
#define SCAN_PREFIX_STACK_END 32
5864
5865
/*
5866
Scan prefix stores the prefix string in the chars array.
5867
The elements of the chars array is either small character
5868
sets or "any" (count is set to 255).
5869
5870
Examples (the chars array is represented by a simple regex):
5871
5872
/(abc|xbyd)/ prefix: /[ax]b[cy]/ (length: 3)
5873
/a[a-z]b+c/ prefix: a.b (length: 3)
5874
/ab?cd/ prefix: a[bc][cd] (length: 3)
5875
/(ab|cd)|(ef|gh)/ prefix: [aceg][bdfh] (length: 2)
5876
5877
The length is returned by scan_prefix(). The length is
5878
less than or equal than the minimum length of the pattern.
5879
*/
5880
5881
static int scan_prefix(compiler_common *common, PCRE2_SPTR cc, fast_forward_char_data *chars)
5882
47.1k
{
5883
47.1k
fast_forward_char_data *chars_start = chars;
5884
47.1k
fast_forward_char_data *chars_end = chars + MAX_N_CHARS;
5885
47.1k
PCRE2_SPTR cc_stack[SCAN_PREFIX_STACK_END];
5886
47.1k
fast_forward_char_data *chars_stack[SCAN_PREFIX_STACK_END];
5887
47.1k
sljit_u8 next_alternative_stack[SCAN_PREFIX_STACK_END];
5888
47.1k
BOOL last, any, class, caseless;
5889
47.1k
int stack_ptr, step_count, repeat, len, len_save;
5890
47.1k
sljit_u32 chr; /* Any unicode character. */
5891
47.1k
sljit_u8 *bytes, *bytes_end, byte;
5892
47.1k
PCRE2_SPTR alternative, cc_save, oc;
5893
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5894
PCRE2_UCHAR othercase[4];
5895
#elif defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
5896
47.1k
PCRE2_UCHAR othercase[2];
5897
#else
5898
PCRE2_UCHAR othercase[1];
5899
#endif
5900
5901
47.1k
repeat = 1;
5902
47.1k
stack_ptr = 0;
5903
47.1k
step_count = 10000;
5904
3.78M
while (TRUE)
5905
3.78M
  {
5906
3.78M
  if (--step_count == 0)
5907
106
    return 0;
5908
5909
3.78M
  SLJIT_ASSERT(chars <= chars_start + MAX_N_CHARS);
5910
5911
3.78M
  if (chars >= chars_end)
5912
576k
    {
5913
576k
    if (stack_ptr == 0)
5914
46.9k
      return (int)(chars_end - chars_start);
5915
5916
529k
    --stack_ptr;
5917
529k
    cc = cc_stack[stack_ptr];
5918
529k
    chars = chars_stack[stack_ptr];
5919
5920
529k
    if (chars >= chars_end)
5921
28.3k
      continue;
5922
5923
500k
    if (next_alternative_stack[stack_ptr] != 0)
5924
354k
      {
5925
      /* When an alternative is processed, the
5926
      next alternative is pushed onto the stack. */
5927
354k
      SLJIT_ASSERT(*cc == OP_ALT);
5928
354k
      alternative = cc + GET(cc, 1);
5929
354k
      if (*alternative == OP_ALT)
5930
236k
        {
5931
236k
        SLJIT_ASSERT(stack_ptr < SCAN_PREFIX_STACK_END);
5932
236k
        SLJIT_ASSERT(chars_stack[stack_ptr] == chars);
5933
236k
        SLJIT_ASSERT(next_alternative_stack[stack_ptr] == 1);
5934
236k
        cc_stack[stack_ptr] = alternative;
5935
236k
        stack_ptr++;
5936
236k
        }
5937
354k
      cc += 1 + LINK_SIZE;
5938
354k
      }
5939
500k
    }
5940
5941
3.71M
  last = TRUE;
5942
3.71M
  any = FALSE;
5943
3.71M
  class = FALSE;
5944
3.71M
  caseless = FALSE;
5945
5946
3.71M
  switch (*cc)
5947
3.71M
    {
5948
203k
    case OP_CHARI:
5949
203k
    caseless = TRUE;
5950
    /* Fall through */
5951
802k
    case OP_CHAR:
5952
802k
    last = FALSE;
5953
802k
    cc++;
5954
802k
    break;
5955
5956
1.18k
    case OP_SOD:
5957
2.32k
    case OP_SOM:
5958
2.71k
    case OP_SET_SOM:
5959
9.44k
    case OP_NOT_WORD_BOUNDARY:
5960
17.7k
    case OP_WORD_BOUNDARY:
5961
19.1k
    case OP_EODN:
5962
20.4k
    case OP_EOD:
5963
23.2k
    case OP_CIRC:
5964
23.9k
    case OP_CIRCM:
5965
58.1k
    case OP_DOLL:
5966
60.8k
    case OP_DOLLM:
5967
63.6k
    case OP_NOT_UCP_WORD_BOUNDARY:
5968
65.0k
    case OP_UCP_WORD_BOUNDARY:
5969
    /* Zero width assertions. */
5970
65.0k
    cc++;
5971
65.0k
    continue;
5972
5973
87.9k
    case OP_ASSERT:
5974
214k
    case OP_ASSERT_NOT:
5975
286k
    case OP_ASSERTBACK:
5976
602k
    case OP_ASSERTBACK_NOT:
5977
1.64M
    case OP_ASSERT_NA:
5978
1.67M
    case OP_ASSERTBACK_NA:
5979
1.67M
    case OP_ASSERT_SCS:
5980
1.67M
    cc = bracketend(cc);
5981
1.67M
    continue;
5982
5983
643
    case OP_PLUSI:
5984
6.06k
    case OP_MINPLUSI:
5985
9.16k
    case OP_POSPLUSI:
5986
9.16k
    caseless = TRUE;
5987
    /* Fall through */
5988
11.9k
    case OP_PLUS:
5989
19.3k
    case OP_MINPLUS:
5990
38.6k
    case OP_POSPLUS:
5991
38.6k
    cc++;
5992
38.6k
    break;
5993
5994
2.02k
    case OP_EXACTI:
5995
2.02k
    caseless = TRUE;
5996
    /* Fall through */
5997
6.50k
    case OP_EXACT:
5998
6.50k
    repeat = GET2(cc, 1);
5999
6.50k
    last = FALSE;
6000
6.50k
    cc += 1 + IMM2_SIZE;
6001
6.50k
    break;
6002
6003
3.36k
    case OP_QUERYI:
6004
23.5k
    case OP_MINQUERYI:
6005
32.9k
    case OP_POSQUERYI:
6006
32.9k
    caseless = TRUE;
6007
    /* Fall through */
6008
59.4k
    case OP_QUERY:
6009
66.4k
    case OP_MINQUERY:
6010
108k
    case OP_POSQUERY:
6011
108k
    len = 1;
6012
108k
    cc++;
6013
108k
#ifdef SUPPORT_UNICODE
6014
108k
    if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
6015
108k
#endif
6016
108k
    if (stack_ptr >= SCAN_PREFIX_STACK_END)
6017
492
      {
6018
492
      chars_end = chars;
6019
492
      continue;
6020
492
      }
6021
6022
108k
    cc_stack[stack_ptr] = cc + len;
6023
108k
    chars_stack[stack_ptr] = chars;
6024
108k
    next_alternative_stack[stack_ptr] = 0;
6025
108k
    stack_ptr++;
6026
6027
108k
    last = FALSE;
6028
108k
    break;
6029
6030
251k
    case OP_KET:
6031
251k
    cc += 1 + LINK_SIZE;
6032
251k
    continue;
6033
6034
393k
    case OP_ALT:
6035
393k
    cc += GET(cc, 1);
6036
393k
    continue;
6037
6038
14.6k
    case OP_ONCE:
6039
108k
    case OP_BRA:
6040
111k
    case OP_BRAPOS:
6041
204k
    case OP_CBRA:
6042
208k
    case OP_CBRAPOS:
6043
208k
    alternative = cc + GET(cc, 1);
6044
208k
    if (*alternative == OP_ALT)
6045
147k
      {
6046
147k
      if (stack_ptr >= SCAN_PREFIX_STACK_END)
6047
305
        {
6048
305
        chars_end = chars;
6049
305
        continue;
6050
305
        }
6051
6052
146k
      cc_stack[stack_ptr] = alternative;
6053
146k
      chars_stack[stack_ptr] = chars;
6054
146k
      next_alternative_stack[stack_ptr] = 1;
6055
146k
      stack_ptr++;
6056
146k
      }
6057
6058
208k
    if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
6059
97.1k
      cc += IMM2_SIZE;
6060
208k
    cc += 1 + LINK_SIZE;
6061
208k
    continue;
6062
6063
44.8k
    case OP_CLASS:
6064
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
6065
    if (common->utf && !is_char7_bitset((const sljit_u8 *)(cc + 1), FALSE))
6066
      {
6067
      chars_end = chars;
6068
      continue;
6069
      }
6070
#endif
6071
44.8k
    class = TRUE;
6072
44.8k
    break;
6073
6074
2.53k
    case OP_NCLASS:
6075
2.53k
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6076
2.53k
    if (common->utf)
6077
102
      {
6078
102
      chars_end = chars;
6079
102
      continue;
6080
102
      }
6081
2.43k
#endif
6082
2.43k
    class = TRUE;
6083
2.43k
    break;
6084
6085
0
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
6086
10.8k
    case OP_XCLASS:
6087
11.3k
    case OP_ECLASS:
6088
11.3k
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6089
11.3k
    if (common->utf)
6090
454
      {
6091
454
      chars_end = chars;
6092
454
      continue;
6093
454
      }
6094
10.8k
#endif
6095
10.8k
    any = TRUE;
6096
10.8k
    cc += GET(cc, 1);
6097
10.8k
    break;
6098
0
#endif
6099
6100
5.62k
    case OP_DIGIT:
6101
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
6102
    if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
6103
      {
6104
      chars_end = chars;
6105
      continue;
6106
      }
6107
#endif
6108
5.62k
    any = TRUE;
6109
5.62k
    cc++;
6110
5.62k
    break;
6111
6112
3.05k
    case OP_WHITESPACE:
6113
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
6114
    if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
6115
      {
6116
      chars_end = chars;
6117
      continue;
6118
      }
6119
#endif
6120
3.05k
    any = TRUE;
6121
3.05k
    cc++;
6122
3.05k
    break;
6123
6124
2.52k
    case OP_WORDCHAR:
6125
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
6126
    if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
6127
      {
6128
      chars_end = chars;
6129
      continue;
6130
      }
6131
#endif
6132
2.52k
    any = TRUE;
6133
2.52k
    cc++;
6134
2.52k
    break;
6135
6136
1.90k
    case OP_NOT:
6137
2.74k
    case OP_NOTI:
6138
2.74k
    cc++;
6139
    /* Fall through. */
6140
6.96k
    case OP_NOT_DIGIT:
6141
11.7k
    case OP_NOT_WHITESPACE:
6142
16.4k
    case OP_NOT_WORDCHAR:
6143
18.5k
    case OP_ANY:
6144
21.8k
    case OP_ALLANY:
6145
21.8k
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6146
21.8k
    if (common->utf)
6147
324
      {
6148
324
      chars_end = chars;
6149
324
      continue;
6150
324
      }
6151
21.4k
#endif
6152
21.4k
    any = TRUE;
6153
21.4k
    cc++;
6154
21.4k
    break;
6155
6156
0
#ifdef SUPPORT_UNICODE
6157
5.94k
    case OP_NOTPROP:
6158
16.4k
    case OP_PROP:
6159
16.4k
#if PCRE2_CODE_UNIT_WIDTH != 32
6160
16.4k
    if (common->utf)
6161
437
      {
6162
437
      chars_end = chars;
6163
437
      continue;
6164
437
      }
6165
16.0k
#endif
6166
16.0k
    any = TRUE;
6167
16.0k
    cc += 1 + 2;
6168
16.0k
    break;
6169
0
#endif
6170
6171
1.45k
    case OP_TYPEEXACT:
6172
1.45k
    repeat = GET2(cc, 1);
6173
1.45k
    cc += 1 + IMM2_SIZE;
6174
1.45k
    continue;
6175
6176
1.89k
    case OP_NOTEXACT:
6177
3.27k
    case OP_NOTEXACTI:
6178
3.27k
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6179
3.27k
    if (common->utf)
6180
79
      {
6181
79
      chars_end = chars;
6182
79
      continue;
6183
79
      }
6184
3.19k
#endif
6185
3.19k
    any = TRUE;
6186
3.19k
    repeat = GET2(cc, 1);
6187
3.19k
    cc += 1 + IMM2_SIZE + 1;
6188
3.19k
    break;
6189
6190
47.4k
    default:
6191
47.4k
    chars_end = chars;
6192
47.4k
    continue;
6193
3.71M
    }
6194
6195
1.06M
  SLJIT_ASSERT(chars < chars_end);
6196
6197
1.06M
  if (any)
6198
62.7k
    {
6199
62.7k
    do
6200
68.7k
      {
6201
68.7k
      chars->count = 255;
6202
68.7k
      chars++;
6203
68.7k
      }
6204
68.7k
    while (--repeat > 0 && chars < chars_end);
6205
6206
62.7k
    repeat = 1;
6207
62.7k
    continue;
6208
62.7k
    }
6209
6210
1.00M
  if (class)
6211
47.2k
    {
6212
47.2k
    bytes = (sljit_u8*) (cc + 1);
6213
47.2k
    cc += 1 + 32 / sizeof(PCRE2_UCHAR);
6214
6215
47.2k
    SLJIT_ASSERT(last == TRUE && repeat == 1);
6216
47.2k
    switch (*cc)
6217
47.2k
      {
6218
13.3k
      case OP_CRQUERY:
6219
13.7k
      case OP_CRMINQUERY:
6220
22.3k
      case OP_CRPOSQUERY:
6221
22.3k
      last = FALSE;
6222
      /* Fall through */
6223
28.1k
      case OP_CRSTAR:
6224
30.2k
      case OP_CRMINSTAR:
6225
39.0k
      case OP_CRPOSSTAR:
6226
39.0k
      if (stack_ptr >= SCAN_PREFIX_STACK_END)
6227
48
        {
6228
48
        chars_end = chars;
6229
48
        continue;
6230
48
        }
6231
6232
38.9k
      cc_stack[stack_ptr] = ++cc;
6233
38.9k
      chars_stack[stack_ptr] = chars;
6234
38.9k
      next_alternative_stack[stack_ptr] = 0;
6235
38.9k
      stack_ptr++;
6236
38.9k
      break;
6237
6238
3.77k
      default:
6239
4.94k
      case OP_CRPLUS:
6240
5.49k
      case OP_CRMINPLUS:
6241
5.74k
      case OP_CRPOSPLUS:
6242
5.74k
      break;
6243
6244
684
      case OP_CRRANGE:
6245
1.21k
      case OP_CRMINRANGE:
6246
2.52k
      case OP_CRPOSRANGE:
6247
2.52k
      repeat = GET2(cc, 1);
6248
2.52k
      if (repeat <= 0)
6249
98
        {
6250
98
        chars_end = chars;
6251
98
        continue;
6252
98
        }
6253
6254
2.42k
      last = (repeat != (int)GET2(cc, 1 + IMM2_SIZE));
6255
2.42k
      cc += 1 + 2 * IMM2_SIZE;
6256
2.42k
      break;
6257
47.2k
      }
6258
6259
47.1k
    do
6260
48.8k
      {
6261
48.8k
      if (bytes[31] & 0x80)
6262
2.59k
        chars->count = 255;
6263
46.2k
      else if (chars->count != 255)
6264
4.44k
        {
6265
4.44k
        bytes_end = bytes + 32;
6266
4.44k
        chr = 0;
6267
4.44k
        do
6268
126k
          {
6269
126k
          byte = *bytes++;
6270
126k
          SLJIT_ASSERT((chr & 0x7) == 0);
6271
126k
          if (byte == 0)
6272
117k
            chr += 8;
6273
8.31k
          else
6274
8.31k
            {
6275
8.31k
            do
6276
35.0k
              {
6277
35.0k
              if ((byte & 0x1) != 0)
6278
11.9k
                add_prefix_char(chr, chars, TRUE);
6279
35.0k
              byte >>= 1;
6280
35.0k
              chr++;
6281
35.0k
              }
6282
35.0k
            while (byte != 0);
6283
8.31k
            chr = (chr + 7) & (sljit_u32)(~7);
6284
8.31k
            }
6285
126k
          }
6286
126k
        while (chars->count != 255 && bytes < bytes_end);
6287
4.44k
        bytes = bytes_end - 32;
6288
4.44k
        }
6289
6290
48.8k
      chars++;
6291
48.8k
      }
6292
48.8k
    while (--repeat > 0 && chars < chars_end);
6293
6294
47.1k
    repeat = 1;
6295
47.1k
    if (last)
6296
24.1k
      chars_end = chars;
6297
47.1k
    continue;
6298
47.2k
    }
6299
6300
955k
  len = 1;
6301
955k
#ifdef SUPPORT_UNICODE
6302
955k
  if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
6303
955k
#endif
6304
6305
955k
  if (caseless && char_has_othercase(common, cc))
6306
49.3k
    {
6307
49.3k
#ifdef SUPPORT_UNICODE
6308
49.3k
    if (common->utf)
6309
33.6k
      {
6310
33.6k
      GETCHAR(chr, cc);
6311
33.6k
      if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
6312
0
        {
6313
0
        chars_end = chars;
6314
0
        continue;
6315
0
        }
6316
33.6k
      }
6317
15.6k
    else
6318
15.6k
#endif
6319
15.6k
      {
6320
15.6k
      chr = *cc;
6321
15.6k
#ifdef SUPPORT_UNICODE
6322
15.6k
      if (common->ucp && chr > 127)
6323
5.33k
        {
6324
5.33k
        chr = UCD_OTHERCASE(chr);
6325
5.33k
        othercase[0] = (chr == (PCRE2_UCHAR)chr) ? chr : *cc;
6326
5.33k
        }
6327
10.3k
      else
6328
10.3k
#endif
6329
10.3k
        othercase[0] = TABLE_GET(chr, common->fcc, chr);
6330
15.6k
      }
6331
49.3k
    }
6332
906k
  else
6333
906k
    {
6334
906k
    caseless = FALSE;
6335
906k
    othercase[0] = 0; /* Stops compiler warning - PH */
6336
906k
    }
6337
6338
955k
  len_save = len;
6339
955k
  cc_save = cc;
6340
968k
  while (TRUE)
6341
968k
    {
6342
968k
    oc = othercase;
6343
968k
    do
6344
969k
      {
6345
969k
      len--;
6346
6347
969k
      chr = *cc;
6348
969k
      add_prefix_char(*cc, chars, len == 0);
6349
6350
969k
      if (caseless)
6351
49.8k
        add_prefix_char(*oc, chars, len == 0);
6352
6353
969k
      chars++;
6354
969k
      cc++;
6355
969k
      oc++;
6356
969k
      }
6357
969k
    while (len > 0 && chars < chars_end);
6358
6359
968k
    if (--repeat == 0 || chars >= chars_end)
6360
955k
      break;
6361
6362
12.4k
    len = len_save;
6363
12.4k
    cc = cc_save;
6364
12.4k
    }
6365
6366
955k
  repeat = 1;
6367
955k
  if (last)
6368
38.6k
    chars_end = chars;
6369
955k
  }
6370
47.1k
}
6371
6372
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6373
static void jumpto_if_not_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg, struct sljit_label *label)
6374
58
{
6375
#if PCRE2_CODE_UNIT_WIDTH == 8
6376
OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0);
6377
CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0x80, label);
6378
#elif PCRE2_CODE_UNIT_WIDTH == 16
6379
58
OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00);
6380
58
CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00, label);
6381
#else
6382
#error "Unknown code width"
6383
#endif
6384
58
}
6385
#endif
6386
6387
#include "pcre2_jit_simd_inc.h"
6388
6389
#ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
6390
6391
static BOOL check_fast_forward_char_pair_simd(compiler_common *common, fast_forward_char_data *chars, int max)
6392
22.8k
{
6393
22.8k
  sljit_s32 i, j, max_i = 0, max_j = 0;
6394
22.8k
  sljit_u32 max_pri = 0;
6395
22.8k
  sljit_s32 max_offset = max_fast_forward_char_pair_offset();
6396
22.8k
  PCRE2_UCHAR a1, a2, a_pri, b1, b2, b_pri;
6397
6398
109k
  for (i = max - 1; i >= 1; i--)
6399
86.6k
    {
6400
86.6k
    if (chars[i].last_count > 2)
6401
76.3k
      {
6402
76.3k
      a1 = chars[i].chars[0];
6403
76.3k
      a2 = chars[i].chars[1];
6404
76.3k
      a_pri = chars[i].last_count;
6405
6406
76.3k
      j = i - max_offset;
6407
76.3k
      if (j < 0)
6408
56.3k
        j = 0;
6409
6410
384k
      while (j < i)
6411
308k
        {
6412
308k
        b_pri = chars[j].last_count;
6413
308k
        if (b_pri > 2 && (sljit_u32)a_pri + (sljit_u32)b_pri >= max_pri)
6414
283k
          {
6415
283k
          b1 = chars[j].chars[0];
6416
283k
          b2 = chars[j].chars[1];
6417
6418
283k
          if (a1 != b1 && a1 != b2 && a2 != b1 && a2 != b2)
6419
241k
            {
6420
241k
            max_pri = a_pri + b_pri;
6421
241k
            max_i = i;
6422
241k
            max_j = j;
6423
241k
            }
6424
283k
          }
6425
308k
        j++;
6426
308k
        }
6427
76.3k
      }
6428
86.6k
    }
6429
6430
22.8k
if (max_pri == 0)
6431
9.18k
  return FALSE;
6432
6433
13.6k
fast_forward_char_pair_simd(common, max_i, chars[max_i].chars[0], chars[max_i].chars[1], max_j, chars[max_j].chars[0], chars[max_j].chars[1]);
6434
13.6k
return TRUE;
6435
22.8k
}
6436
6437
#endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */
6438
6439
static void fast_forward_first_char2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
6440
7.78k
{
6441
7.78k
DEFINE_COMPILER;
6442
7.78k
struct sljit_label *start;
6443
7.78k
struct sljit_jump *match;
6444
7.78k
struct sljit_jump *partial_quit;
6445
7.78k
PCRE2_UCHAR mask;
6446
7.78k
BOOL has_match_end = (common->match_end_ptr != 0);
6447
6448
7.78k
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE || offset == 0);
6449
6450
7.78k
if (has_match_end)
6451
1.17k
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6452
6453
7.78k
if (offset > 0)
6454
488
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
6455
6456
7.78k
if (has_match_end)
6457
1.17k
  {
6458
1.17k
  OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6459
6460
1.17k
  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
6461
1.17k
  OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0);
6462
1.17k
  SELECT(SLJIT_GREATER, STR_END, TMP1, 0, STR_END);
6463
1.17k
  }
6464
6465
7.78k
#ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD
6466
6467
7.78k
if (JIT_HAS_FAST_FORWARD_CHAR_SIMD)
6468
7.78k
  {
6469
7.78k
  fast_forward_char_simd(common, char1, char2, offset);
6470
6471
7.78k
  if (offset > 0)
6472
488
    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
6473
6474
7.78k
  if (has_match_end)
6475
1.17k
    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6476
7.78k
  return;
6477
7.78k
  }
6478
6479
0
#endif
6480
6481
0
start = LABEL();
6482
6483
0
partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6484
0
if (common->mode == PCRE2_JIT_COMPLETE)
6485
0
  add_jump(compiler, &common->failed_match, partial_quit);
6486
6487
0
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6488
0
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6489
6490
0
if (char1 == char2)
6491
0
  CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1, start);
6492
0
else
6493
0
  {
6494
0
  mask = char1 ^ char2;
6495
0
  if (is_powerof2(mask))
6496
0
    {
6497
0
    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
6498
0
    CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask, start);
6499
0
    }
6500
0
  else
6501
0
    {
6502
0
    match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1);
6503
0
    CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char2, start);
6504
0
    JUMPHERE(match);
6505
0
    }
6506
0
  }
6507
6508
0
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6509
0
if (common->utf && offset > 0)
6510
0
  {
6511
0
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-(offset + 1)));
6512
0
  jumpto_if_not_utf_char_start(compiler, TMP1, start);
6513
0
  }
6514
0
#endif
6515
6516
0
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
6517
6518
0
if (common->mode != PCRE2_JIT_COMPLETE)
6519
0
  JUMPHERE(partial_quit);
6520
6521
0
if (has_match_end)
6522
0
  OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6523
0
}
6524
6525
static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common)
6526
47.1k
{
6527
47.1k
DEFINE_COMPILER;
6528
47.1k
struct sljit_label *start;
6529
47.1k
struct sljit_jump *match;
6530
47.1k
fast_forward_char_data chars[MAX_N_CHARS];
6531
47.1k
sljit_s32 offset;
6532
47.1k
PCRE2_UCHAR mask;
6533
47.1k
PCRE2_UCHAR *char_set, *char_set_end;
6534
47.1k
int i, max, from;
6535
47.1k
int range_right = -1, range_len;
6536
47.1k
sljit_u8 *update_table = NULL;
6537
47.1k
BOOL in_range;
6538
6539
612k
for (i = 0; i < MAX_N_CHARS; i++)
6540
565k
  {
6541
565k
  chars[i].count = 0;
6542
565k
  chars[i].last_count = 0;
6543
565k
  }
6544
6545
47.1k
max = scan_prefix(common, common->start, chars);
6546
6547
47.1k
if (max < 1)
6548
24.2k
  return FALSE;
6549
6550
/* Convert last_count to priority. */
6551
132k
for (i = 0; i < max; i++)
6552
109k
  {
6553
109k
  SLJIT_ASSERT(chars[i].last_count <= chars[i].count);
6554
6555
109k
  switch (chars[i].count)
6556
109k
    {
6557
83
    case 0:
6558
83
    chars[i].count = 255;
6559
83
    chars[i].last_count = 0;
6560
83
    break;
6561
6562
84.2k
    case 1:
6563
84.2k
    chars[i].last_count = (chars[i].last_count == 1) ? 7 : 5;
6564
    /* Simplifies algorithms later. */
6565
84.2k
    chars[i].chars[1] = chars[i].chars[0];
6566
84.2k
    break;
6567
6568
10.0k
    case 2:
6569
10.0k
    SLJIT_ASSERT(chars[i].chars[0] != chars[i].chars[1]);
6570
6571
10.0k
    if (is_powerof2(chars[i].chars[0] ^ chars[i].chars[1]))
6572
1.69k
      chars[i].last_count = (chars[i].last_count == 2) ? 6 : 4;
6573
8.32k
    else
6574
8.32k
      chars[i].last_count = (chars[i].last_count == 2) ? 3 : 2;
6575
10.0k
    break;
6576
6577
15.0k
    default:
6578
15.0k
    chars[i].last_count = (chars[i].count == 255) ? 0 : 1;
6579
15.0k
    break;
6580
109k
    }
6581
109k
  }
6582
6583
22.8k
#ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
6584
22.8k
if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && check_fast_forward_char_pair_simd(common, chars, max))
6585
13.6k
  return TRUE;
6586
9.18k
#endif
6587
6588
9.18k
in_range = FALSE;
6589
/* Prevent compiler "uninitialized" warning */
6590
9.18k
from = 0;
6591
9.18k
range_len = 4 /* minimum length */ - 1;
6592
36.9k
for (i = 0; i <= max; i++)
6593
27.7k
  {
6594
27.7k
  if (in_range && (i - from) > range_len && (chars[i - 1].count < 255))
6595
2.50k
    {
6596
2.50k
    range_len = i - from;
6597
2.50k
    range_right = i - 1;
6598
2.50k
    }
6599
6600
27.7k
  if (i < max && chars[i].count < 255)
6601
11.1k
    {
6602
11.1k
    SLJIT_ASSERT(chars[i].count > 0);
6603
11.1k
    if (!in_range)
6604
6.63k
      {
6605
6.63k
      in_range = TRUE;
6606
6.63k
      from = i;
6607
6.63k
      }
6608
11.1k
    }
6609
16.5k
  else
6610
16.5k
    in_range = FALSE;
6611
27.7k
  }
6612
6613
9.18k
if (range_right >= 0)
6614
584
  {
6615
584
  update_table = (sljit_u8 *)allocate_read_only_data(common, 256);
6616
584
  if (update_table == NULL)
6617
0
    return TRUE;
6618
584
  memset(update_table, IN_UCHARS(range_len), 256);
6619
6620
4.83k
  for (i = 0; i < range_len; i++)
6621
4.25k
    {
6622
4.25k
    SLJIT_ASSERT(chars[range_right - i].count > 0 && chars[range_right - i].count < 255);
6623
6624
4.25k
    char_set = chars[range_right - i].chars;
6625
4.25k
    char_set_end = char_set + chars[range_right - i].count;
6626
4.25k
    do
6627
9.03k
      {
6628
9.03k
      if (update_table[(*char_set) & 0xff] > IN_UCHARS(i))
6629
2.55k
        update_table[(*char_set) & 0xff] = IN_UCHARS(i);
6630
9.03k
      char_set++;
6631
9.03k
      }
6632
9.03k
    while (char_set < char_set_end);
6633
4.25k
    }
6634
584
  }
6635
6636
9.18k
offset = -1;
6637
/* Scan forward. */
6638
27.7k
for (i = 0; i < max; i++)
6639
18.5k
  {
6640
18.5k
  if (range_right == i)
6641
584
    continue;
6642
6643
17.9k
  if (offset == -1)
6644
13.5k
    {
6645
13.5k
    if (chars[i].last_count >= 2)
6646
5.76k
      offset = i;
6647
13.5k
    }
6648
4.48k
  else if (chars[offset].last_count < chars[i].last_count)
6649
47
    offset = i;
6650
17.9k
  }
6651
6652
9.18k
SLJIT_ASSERT(offset == -1 || (chars[offset].count >= 1 && chars[offset].count <= 2));
6653
6654
9.18k
if (range_right < 0)
6655
8.60k
  {
6656
8.60k
  if (offset < 0)
6657
3.28k
    return FALSE;
6658
  /* Works regardless the value is 1 or 2. */
6659
5.31k
  fast_forward_first_char2(common, chars[offset].chars[0], chars[offset].chars[1], offset);
6660
5.31k
  return TRUE;
6661
8.60k
  }
6662
6663
584
SLJIT_ASSERT(range_right != offset);
6664
6665
584
if (common->match_end_ptr != 0)
6666
70
  {
6667
70
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6668
70
  OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6669
70
  OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6670
70
  add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
6671
70
  OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0);
6672
70
  SELECT(SLJIT_GREATER, STR_END, TMP1, 0, STR_END);
6673
70
  }
6674
514
else
6675
514
  {
6676
514
  OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6677
514
  add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
6678
514
  }
6679
6680
584
SLJIT_ASSERT(range_right >= 0);
6681
6682
584
if (!HAS_VIRTUAL_REGISTERS)
6683
584
  OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
6684
6685
584
start = LABEL();
6686
584
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
6687
6688
584
#if PCRE2_CODE_UNIT_WIDTH == 8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
6689
584
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
6690
#else
6691
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
6692
#endif
6693
6694
584
if (!HAS_VIRTUAL_REGISTERS)
6695
584
  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
6696
0
else
6697
0
  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
6698
6699
584
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6700
584
CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
6701
6702
584
if (offset >= 0)
6703
451
  {
6704
451
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offset));
6705
451
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6706
6707
451
  if (chars[offset].count == 1)
6708
281
    CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0], start);
6709
170
  else
6710
170
    {
6711
170
    mask = chars[offset].chars[0] ^ chars[offset].chars[1];
6712
170
    if (is_powerof2(mask))
6713
20
      {
6714
20
      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
6715
20
      CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0] | mask, start);
6716
20
      }
6717
150
    else
6718
150
      {
6719
150
      match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0]);
6720
150
      CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[1], start);
6721
150
      JUMPHERE(match);
6722
150
      }
6723
170
    }
6724
451
  }
6725
6726
584
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6727
584
if (common->utf && offset != 0)
6728
58
  {
6729
58
  if (offset < 0)
6730
38
    {
6731
38
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6732
38
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6733
38
    }
6734
20
  else
6735
20
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6736
6737
58
  jumpto_if_not_utf_char_start(compiler, TMP1, start);
6738
6739
58
  if (offset < 0)
6740
38
    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6741
58
  }
6742
584
#endif
6743
6744
584
if (offset >= 0)
6745
451
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6746
6747
584
if (common->match_end_ptr != 0)
6748
70
  OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6749
514
else
6750
514
  OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6751
584
return TRUE;
6752
9.18k
}
6753
6754
static SLJIT_INLINE void fast_forward_first_char(compiler_common *common)
6755
2.46k
{
6756
2.46k
PCRE2_UCHAR first_char = (PCRE2_UCHAR)(common->re->first_codeunit);
6757
2.46k
PCRE2_UCHAR oc;
6758
6759
2.46k
oc = first_char;
6760
2.46k
if ((common->re->flags & PCRE2_FIRSTCASELESS) != 0)
6761
299
  {
6762
299
  oc = TABLE_GET(first_char, common->fcc, first_char);
6763
299
#if defined SUPPORT_UNICODE
6764
299
  if (first_char > 127 && (common->utf || common->ucp))
6765
119
    oc = UCD_OTHERCASE(first_char);
6766
299
#endif
6767
299
  }
6768
6769
2.46k
fast_forward_first_char2(common, first_char, oc, 0);
6770
2.46k
}
6771
6772
static SLJIT_INLINE void fast_forward_newline(compiler_common *common)
6773
403
{
6774
403
DEFINE_COMPILER;
6775
403
struct sljit_label *loop;
6776
403
struct sljit_jump *lastchar = NULL;
6777
403
struct sljit_jump *firstchar;
6778
403
struct sljit_jump *quit = NULL;
6779
403
struct sljit_jump *foundcr = NULL;
6780
403
struct sljit_jump *notfoundnl;
6781
403
jump_list *newline = NULL;
6782
6783
403
if (common->match_end_ptr != 0)
6784
79
  {
6785
79
  OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6786
79
  OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6787
79
  }
6788
6789
403
if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6790
0
  {
6791
0
#ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
6792
0
  if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && common->mode == PCRE2_JIT_COMPLETE)
6793
0
    {
6794
0
    if (HAS_VIRTUAL_REGISTERS)
6795
0
      {
6796
0
      OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6797
0
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6798
0
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6799
0
      }
6800
0
    else
6801
0
      {
6802
0
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6803
0
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
6804
0
      }
6805
0
    firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6806
6807
0
    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6808
0
    OP2U(SLJIT_SUB | SLJIT_SET_Z, STR_PTR, 0, TMP1, 0);
6809
0
    OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_NOT_EQUAL);
6810
0
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6811
0
    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
6812
0
#endif
6813
0
    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6814
6815
0
    fast_forward_char_pair_simd(common, 1, common->newline & 0xff, common->newline & 0xff, 0, (common->newline >> 8) & 0xff, (common->newline >> 8) & 0xff);
6816
0
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6817
0
    }
6818
0
  else
6819
0
#endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */
6820
0
    {
6821
0
    lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6822
0
    if (HAS_VIRTUAL_REGISTERS)
6823
0
      {
6824
0
      OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6825
0
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6826
0
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6827
0
      }
6828
0
    else
6829
0
      {
6830
0
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6831
0
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
6832
0
      }
6833
0
    firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6834
6835
0
    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
6836
0
    OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, STR_PTR, 0, TMP1, 0);
6837
0
    OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL);
6838
0
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6839
0
    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
6840
0
#endif
6841
0
    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6842
6843
0
    loop = LABEL();
6844
0
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6845
0
    quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6846
0
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
6847
0
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6848
0
    CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
6849
0
    CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
6850
6851
0
    JUMPHERE(quit);
6852
0
    JUMPHERE(lastchar);
6853
0
    }
6854
6855
0
  JUMPHERE(firstchar);
6856
6857
0
  if (common->match_end_ptr != 0)
6858
0
    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6859
0
  return;
6860
0
  }
6861
6862
403
if (HAS_VIRTUAL_REGISTERS)
6863
0
  {
6864
0
  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6865
0
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6866
0
  }
6867
403
else
6868
403
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6869
6870
/* Example: match /^/ to \r\n from offset 1. */
6871
403
firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6872
6873
403
if (common->nltype == NLTYPE_ANY)
6874
0
  move_back(common, NULL, FALSE);
6875
403
else
6876
403
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6877
6878
403
loop = LABEL();
6879
403
common->ff_newline_shortcut = loop;
6880
6881
403
#ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD
6882
403
if (JIT_HAS_FAST_FORWARD_CHAR_SIMD && (common->nltype == NLTYPE_FIXED || common->nltype == NLTYPE_ANYCRLF))
6883
403
  {
6884
403
  if (common->nltype == NLTYPE_ANYCRLF)
6885
0
    {
6886
0
    fast_forward_char_simd(common, CHAR_CR, CHAR_LF, 0);
6887
0
    if (common->mode != PCRE2_JIT_COMPLETE)
6888
0
      lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6889
6890
0
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6891
0
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6892
0
    quit = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6893
0
    }
6894
403
   else
6895
403
    {
6896
403
    fast_forward_char_simd(common, common->newline, common->newline, 0);
6897
6898
403
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6899
403
    if (common->mode != PCRE2_JIT_COMPLETE)
6900
0
      {
6901
0
      OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
6902
0
      SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);
6903
0
      }
6904
403
    }
6905
403
  }
6906
0
else
6907
0
#endif /* JIT_HAS_FAST_FORWARD_CHAR_SIMD */
6908
0
  {
6909
0
  read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
6910
0
  lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6911
0
  if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
6912
0
    foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6913
0
  check_newlinechar(common, common->nltype, &newline, FALSE);
6914
0
  set_jumps(newline, loop);
6915
0
  }
6916
6917
403
if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
6918
0
  {
6919
0
  if (quit == NULL)
6920
0
    {
6921
0
    quit = JUMP(SLJIT_JUMP);
6922
0
    JUMPHERE(foundcr);
6923
0
    }
6924
6925
0
  notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6926
0
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6927
0
  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_NL);
6928
0
  OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
6929
0
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6930
0
  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
6931
0
#endif
6932
0
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6933
0
  JUMPHERE(notfoundnl);
6934
0
  JUMPHERE(quit);
6935
0
  }
6936
6937
403
if (lastchar)
6938
0
  JUMPHERE(lastchar);
6939
403
JUMPHERE(firstchar);
6940
6941
403
if (common->match_end_ptr != 0)
6942
79
  OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6943
403
}
6944
6945
static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
6946
6947
static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common)
6948
9.94k
{
6949
9.94k
DEFINE_COMPILER;
6950
9.94k
const sljit_u8 *start_bits = common->re->start_bitmap;
6951
9.94k
struct sljit_label *start;
6952
9.94k
struct sljit_jump *partial_quit;
6953
9.94k
#if PCRE2_CODE_UNIT_WIDTH != 8
6954
9.94k
struct sljit_jump *found = NULL;
6955
9.94k
#endif
6956
9.94k
jump_list *matches = NULL;
6957
6958
9.94k
if (common->match_end_ptr != 0)
6959
1.08k
  {
6960
1.08k
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6961
1.08k
  OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
6962
1.08k
  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6963
1.08k
  OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0);
6964
1.08k
  SELECT(SLJIT_GREATER, STR_END, TMP1, 0, STR_END);
6965
1.08k
  }
6966
6967
9.94k
start = LABEL();
6968
6969
9.94k
partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6970
9.94k
if (common->mode == PCRE2_JIT_COMPLETE)
6971
9.94k
  add_jump(compiler, &common->failed_match, partial_quit);
6972
6973
9.94k
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6974
9.94k
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6975
6976
9.94k
if (!optimize_class(common, start_bits, (start_bits[31] & 0x80) != 0, FALSE, &matches))
6977
4.05k
  {
6978
4.05k
#if PCRE2_CODE_UNIT_WIDTH != 8
6979
4.05k
  if ((start_bits[31] & 0x80) != 0)
6980
3.77k
    found = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255);
6981
284
  else
6982
284
    CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255, start);
6983
#elif defined SUPPORT_UNICODE
6984
  if (common->utf && is_char7_bitset(start_bits, FALSE))
6985
    CMPTO(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 127, start);
6986
#endif
6987
4.05k
  OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
6988
4.05k
  OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
6989
4.05k
  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
6990
4.05k
  if (!HAS_VIRTUAL_REGISTERS)
6991
4.05k
    {
6992
4.05k
    OP2(SLJIT_SHL, TMP3, 0, SLJIT_IMM, 1, TMP2, 0);
6993
4.05k
    OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP3, 0);
6994
4.05k
    }
6995
0
  else
6996
0
    {
6997
0
    OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6998
0
    OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
6999
0
    }
7000
4.05k
  JUMPTO(SLJIT_ZERO, start);
7001
4.05k
  }
7002
5.88k
else
7003
5.88k
  set_jumps(matches, start);
7004
7005
9.94k
#if PCRE2_CODE_UNIT_WIDTH != 8
7006
9.94k
if (found != NULL)
7007
3.77k
  JUMPHERE(found);
7008
9.94k
#endif
7009
7010
9.94k
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7011
7012
9.94k
if (common->mode != PCRE2_JIT_COMPLETE)
7013
0
  JUMPHERE(partial_quit);
7014
7015
9.94k
if (common->match_end_ptr != 0)
7016
1.08k
  OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
7017
9.94k
}
7018
7019
static SLJIT_INLINE jump_list *search_requested_char(compiler_common *common, PCRE2_UCHAR req_char, BOOL caseless, BOOL has_firstchar)
7020
34.4k
{
7021
34.4k
DEFINE_COMPILER;
7022
34.4k
struct sljit_label *loop;
7023
34.4k
struct sljit_jump *toolong;
7024
34.4k
struct sljit_jump *already_found;
7025
34.4k
struct sljit_jump *found;
7026
34.4k
struct sljit_jump *found_oc = NULL;
7027
34.4k
jump_list *not_found = NULL;
7028
34.4k
sljit_u32 oc, bit;
7029
7030
34.4k
SLJIT_ASSERT(common->req_char_ptr != 0);
7031
34.4k
OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(REQ_CU_MAX) * 100);
7032
34.4k
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
7033
34.4k
toolong = CMP(SLJIT_LESS, TMP2, 0, STR_END, 0);
7034
34.4k
already_found = CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0);
7035
7036
34.4k
if (has_firstchar)
7037
17.2k
  OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7038
17.2k
else
7039
17.2k
  OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
7040
7041
34.4k
oc = req_char;
7042
34.4k
if (caseless)
7043
1.04k
  {
7044
1.04k
  oc = TABLE_GET(req_char, common->fcc, req_char);
7045
1.04k
#if defined SUPPORT_UNICODE
7046
1.04k
  if (req_char > 127 && (common->utf || common->ucp))
7047
677
    oc = UCD_OTHERCASE(req_char);
7048
1.04k
#endif
7049
1.04k
  }
7050
7051
34.4k
#ifdef JIT_HAS_FAST_REQUESTED_CHAR_SIMD
7052
34.4k
if (JIT_HAS_FAST_REQUESTED_CHAR_SIMD)
7053
34.4k
  {
7054
34.4k
  not_found = fast_requested_char_simd(common, req_char, oc);
7055
34.4k
  }
7056
0
else
7057
0
#endif
7058
0
  {
7059
0
  loop = LABEL();
7060
0
  add_jump(compiler, &not_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
7061
7062
0
  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
7063
7064
0
  if (req_char == oc)
7065
0
    found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
7066
0
  else
7067
0
    {
7068
0
    bit = req_char ^ oc;
7069
0
    if (is_powerof2(bit))
7070
0
      {
7071
0
       OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
7072
0
      found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
7073
0
      }
7074
0
    else
7075
0
      {
7076
0
      found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
7077
0
      found_oc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
7078
0
      }
7079
0
    }
7080
0
  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7081
0
  JUMPTO(SLJIT_JUMP, loop);
7082
7083
0
  JUMPHERE(found);
7084
0
  if (found_oc)
7085
0
    JUMPHERE(found_oc);
7086
0
  }
7087
7088
34.4k
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
7089
7090
34.4k
JUMPHERE(already_found);
7091
34.4k
JUMPHERE(toolong);
7092
34.4k
return not_found;
7093
34.4k
}
7094
7095
static void do_revertframes(compiler_common *common)
7096
3.80k
{
7097
3.80k
DEFINE_COMPILER;
7098
3.80k
struct sljit_jump *jump;
7099
3.80k
struct sljit_label *mainloop;
7100
7101
3.80k
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
7102
3.80k
GET_LOCAL_BASE(TMP1, 0, 0);
7103
7104
/* Drop frames until we reach STACK_TOP. */
7105
3.80k
mainloop = LABEL();
7106
3.80k
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -SSIZE_OF(sw));
7107
3.80k
OP2U(SLJIT_SUB | SLJIT_SET_SIG_LESS_EQUAL | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0);
7108
3.80k
jump = JUMP(SLJIT_SIG_LESS_EQUAL);
7109
7110
3.80k
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
7111
3.80k
if (HAS_VIRTUAL_REGISTERS)
7112
0
  {
7113
0
  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
7114
0
  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -(3 * SSIZE_OF(sw)));
7115
0
  OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * SSIZE_OF(sw));
7116
0
  }
7117
3.80k
else
7118
3.80k
  {
7119
3.80k
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
7120
3.80k
  OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(3 * SSIZE_OF(sw)));
7121
3.80k
  OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * SSIZE_OF(sw));
7122
3.80k
  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
7123
3.80k
  GET_LOCAL_BASE(TMP1, 0, 0);
7124
3.80k
  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP3, 0);
7125
3.80k
  }
7126
3.80k
JUMPTO(SLJIT_JUMP, mainloop);
7127
7128
3.80k
JUMPHERE(jump);
7129
3.80k
sljit_set_current_flags(compiler, SLJIT_CURRENT_FLAGS_SUB | SLJIT_CURRENT_FLAGS_COMPARE | SLJIT_SET_SIG_LESS_EQUAL | SLJIT_SET_Z);
7130
3.80k
jump = JUMP(SLJIT_NOT_ZERO /* SIG_LESS */);
7131
/* End of reverting values. */
7132
3.80k
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7133
7134
3.80k
JUMPHERE(jump);
7135
3.80k
OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, TMP2, 0);
7136
3.80k
if (HAS_VIRTUAL_REGISTERS)
7137
0
  {
7138
0
  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
7139
0
  OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * SSIZE_OF(sw));
7140
0
  }
7141
3.80k
else
7142
3.80k
  {
7143
3.80k
  OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
7144
3.80k
  OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * SSIZE_OF(sw));
7145
3.80k
  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP3, 0);
7146
3.80k
  }
7147
3.80k
JUMPTO(SLJIT_JUMP, mainloop);
7148
3.80k
}
7149
7150
#ifdef SUPPORT_UNICODE
7151
609k
#define UCPCAT(bit) (1 << (bit))
7152
181k
#define UCPCAT2(bit1, bit2) (UCPCAT(bit1) | UCPCAT(bit2))
7153
7.81k
#define UCPCAT3(bit1, bit2, bit3) (UCPCAT(bit1) | UCPCAT(bit2) | UCPCAT(bit3))
7154
425k
#define UCPCAT_RANGE(start, end) (((1 << ((end) + 1)) - 1) - ((1 << (start)) - 1))
7155
189k
#define UCPCAT_L UCPCAT_RANGE(ucp_Ll, ucp_Lu)
7156
189k
#define UCPCAT_N UCPCAT_RANGE(ucp_Nd, ucp_No)
7157
2.62M
#define UCPCAT_ALL ((1 << (ucp_Zs + 1)) - 1)
7158
#endif
7159
7160
static void check_wordboundary(compiler_common *common, BOOL ucp)
7161
4.04k
{
7162
4.04k
DEFINE_COMPILER;
7163
4.04k
struct sljit_jump *skipread;
7164
4.04k
jump_list *skipread_list = NULL;
7165
4.04k
#ifdef SUPPORT_UNICODE
7166
4.04k
struct sljit_label *valid_utf;
7167
4.04k
jump_list *invalid_utf1 = NULL;
7168
4.04k
#endif /* SUPPORT_UNICODE */
7169
4.04k
jump_list *invalid_utf2 = NULL;
7170
4.04k
#if PCRE2_CODE_UNIT_WIDTH != 8 || defined SUPPORT_UNICODE
7171
4.04k
struct sljit_jump *jump;
7172
4.04k
#endif /* PCRE2_CODE_UNIT_WIDTH != 8 || SUPPORT_UNICODE */
7173
7174
4.04k
SLJIT_UNUSED_ARG(ucp);
7175
4.04k
SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
7176
7177
4.04k
SLJIT_ASSERT(common->locals_size >= 2 * SSIZE_OF(sw));
7178
4.04k
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCAL0);
7179
/* Get type of the previous char, and put it to TMP3. */
7180
4.04k
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
7181
4.04k
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
7182
4.04k
OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
7183
4.04k
skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
7184
7185
4.04k
#ifdef SUPPORT_UNICODE
7186
4.04k
if (common->invalid_utf)
7187
0
  {
7188
0
  peek_char_back(common, READ_CHAR_MAX, &invalid_utf1);
7189
7190
0
  if (common->mode != PCRE2_JIT_COMPLETE)
7191
0
    {
7192
0
    OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
7193
0
    OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
7194
0
    move_back(common, NULL, TRUE);
7195
0
    check_start_used_ptr(common);
7196
0
    OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7197
0
    OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0);
7198
0
    }
7199
0
  }
7200
4.04k
else
7201
4.04k
#endif /* SUPPORT_UNICODE */
7202
4.04k
  {
7203
4.04k
  if (common->mode == PCRE2_JIT_COMPLETE)
7204
4.04k
    peek_char_back(common, READ_CHAR_MAX, NULL);
7205
0
  else
7206
0
    {
7207
0
    move_back(common, NULL, TRUE);
7208
0
    check_start_used_ptr(common);
7209
0
    read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR);
7210
0
    }
7211
4.04k
  }
7212
7213
/* Testing char type. */
7214
4.04k
#ifdef SUPPORT_UNICODE
7215
4.04k
if (ucp)
7216
1.10k
  {
7217
1.10k
  add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
7218
1.10k
  OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP1, 0);
7219
1.10k
  OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, UCPCAT2(ucp_Mn, ucp_Pc) | UCPCAT_L | UCPCAT_N);
7220
1.10k
  OP_FLAGS(SLJIT_MOV, TMP3, 0, SLJIT_NOT_ZERO);
7221
1.10k
  }
7222
2.93k
else
7223
2.93k
#endif /* SUPPORT_UNICODE */
7224
2.93k
  {
7225
2.93k
#if PCRE2_CODE_UNIT_WIDTH != 8
7226
2.93k
  jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7227
#elif defined SUPPORT_UNICODE
7228
  /* Here TMP3 has already been zeroed. */
7229
  jump = NULL;
7230
  if (common->utf)
7231
    jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7232
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7233
2.93k
  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
7234
2.93k
  OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
7235
2.93k
  OP2(SLJIT_AND, TMP3, 0, TMP1, 0, SLJIT_IMM, 1);
7236
2.93k
#if PCRE2_CODE_UNIT_WIDTH != 8
7237
2.93k
  JUMPHERE(jump);
7238
#elif defined SUPPORT_UNICODE
7239
  if (jump != NULL)
7240
    JUMPHERE(jump);
7241
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7242
2.93k
  }
7243
4.04k
JUMPHERE(skipread);
7244
7245
4.04k
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
7246
4.04k
check_str_end(common, &skipread_list);
7247
4.04k
peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCAL1, &invalid_utf2);
7248
7249
/* Testing char type. This is a code duplication. */
7250
4.04k
#ifdef SUPPORT_UNICODE
7251
7252
4.04k
valid_utf = LABEL();
7253
7254
4.04k
if (ucp)
7255
1.10k
  {
7256
1.10k
  add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
7257
1.10k
  OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP1, 0);
7258
1.10k
  OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, UCPCAT2(ucp_Mn, ucp_Pc) | UCPCAT_L | UCPCAT_N);
7259
1.10k
  OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
7260
1.10k
  }
7261
2.93k
else
7262
2.93k
#endif /* SUPPORT_UNICODE */
7263
2.93k
  {
7264
2.93k
#if PCRE2_CODE_UNIT_WIDTH != 8
7265
  /* TMP2 may be destroyed by peek_char. */
7266
2.93k
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
7267
2.93k
  jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7268
#elif defined SUPPORT_UNICODE
7269
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
7270
  jump = NULL;
7271
  if (common->utf)
7272
    jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7273
#endif
7274
2.93k
  OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
7275
2.93k
  OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
7276
2.93k
  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
7277
2.93k
#if PCRE2_CODE_UNIT_WIDTH != 8
7278
2.93k
  JUMPHERE(jump);
7279
#elif defined SUPPORT_UNICODE
7280
  if (jump != NULL)
7281
    JUMPHERE(jump);
7282
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7283
2.93k
  }
7284
4.04k
set_jumps(skipread_list, LABEL());
7285
7286
4.04k
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
7287
4.04k
OP2(SLJIT_XOR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, TMP3, 0);
7288
4.04k
OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7289
7290
4.04k
#ifdef SUPPORT_UNICODE
7291
4.04k
if (common->invalid_utf)
7292
0
  {
7293
0
  set_jumps(invalid_utf1, LABEL());
7294
7295
0
  peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCAL1, NULL);
7296
0
  CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR, valid_utf);
7297
7298
0
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
7299
0
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, -1);
7300
0
  OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7301
7302
0
  set_jumps(invalid_utf2, LABEL());
7303
0
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
7304
0
  OP1(SLJIT_MOV, TMP2, 0, TMP3, 0);
7305
0
  OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7306
0
  }
7307
4.04k
#endif /* SUPPORT_UNICODE */
7308
4.04k
}
7309
7310
static BOOL optimize_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
7311
1.73M
{
7312
/* May destroy TMP1. */
7313
1.73M
DEFINE_COMPILER;
7314
1.73M
int ranges[MAX_CLASS_RANGE_SIZE];
7315
1.73M
sljit_u8 bit, cbit, all;
7316
1.73M
int i, byte, length = 0;
7317
7318
1.73M
bit = bits[0] & 0x1;
7319
/* All bits will be zero or one (since bit is zero or one). */
7320
1.73M
all = (sljit_u8)-bit;
7321
7322
60.7M
for (i = 0; i < 256; )
7323
60.0M
  {
7324
60.0M
  byte = i >> 3;
7325
60.0M
  if ((i & 0x7) == 0 && bits[byte] == all)
7326
28.3M
    i += 8;
7327
31.7M
  else
7328
31.7M
    {
7329
31.7M
    cbit = (bits[byte] >> (i & 0x7)) & 0x1;
7330
31.7M
    if (cbit != bit)
7331
6.87M
      {
7332
6.87M
      if (length >= MAX_CLASS_RANGE_SIZE)
7333
1.06M
        return FALSE;
7334
5.80M
      ranges[length] = i;
7335
5.80M
      length++;
7336
5.80M
      bit = cbit;
7337
5.80M
      all = (sljit_u8)-cbit; /* sign extend bit into byte */
7338
5.80M
      }
7339
30.6M
    i++;
7340
30.6M
    }
7341
60.0M
  }
7342
7343
667k
if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
7344
3.58k
  {
7345
3.58k
  if (length >= MAX_CLASS_RANGE_SIZE)
7346
1.41k
    return FALSE;
7347
2.16k
  ranges[length] = 256;
7348
2.16k
  length++;
7349
2.16k
  }
7350
7351
666k
if (length < 0 || length > 4)
7352
0
  return FALSE;
7353
7354
666k
bit = bits[0] & 0x1;
7355
666k
if (invert) bit ^= 0x1;
7356
7357
/* No character is accepted. */
7358
666k
if (length == 0 && bit == 0)
7359
87.0k
  add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7360
7361
666k
switch(length)
7362
666k
  {
7363
97.6k
  case 0:
7364
  /* When bit != 0, all characters are accepted. */
7365
97.6k
  return TRUE;
7366
7367
81.9k
  case 1:
7368
81.9k
  add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
7369
81.9k
  return TRUE;
7370
7371
159k
  case 2:
7372
159k
  if (ranges[0] + 1 != ranges[1])
7373
63.8k
    {
7374
63.8k
    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
7375
63.8k
    add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
7376
63.8k
    }
7377
96.0k
  else
7378
96.0k
    add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
7379
159k
  return TRUE;
7380
7381
156k
  case 3:
7382
156k
  if (bit != 0)
7383
73.6k
    {
7384
73.6k
    add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
7385
73.6k
    if (ranges[0] + 1 != ranges[1])
7386
67.0k
      {
7387
67.0k
      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
7388
67.0k
      add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
7389
67.0k
      }
7390
6.56k
    else
7391
6.56k
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
7392
73.6k
    return TRUE;
7393
73.6k
    }
7394
7395
82.5k
  add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
7396
82.5k
  if (ranges[1] + 1 != ranges[2])
7397
16.1k
    {
7398
16.1k
    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
7399
16.1k
    add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
7400
16.1k
    }
7401
66.4k
  else
7402
66.4k
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
7403
82.5k
  return TRUE;
7404
7405
170k
  case 4:
7406
170k
  if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
7407
170k
      && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
7408
170k
      && (ranges[1] & (ranges[2] - ranges[0])) == 0
7409
170k
      && is_powerof2(ranges[2] - ranges[0]))
7410
9.53k
    {
7411
9.53k
    SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);
7412
9.53k
    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
7413
9.53k
    if (ranges[2] + 1 != ranges[3])
7414
2.90k
      {
7415
2.90k
      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
7416
2.90k
      add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
7417
2.90k
      }
7418
6.62k
    else
7419
6.62k
      add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
7420
9.53k
    return TRUE;
7421
9.53k
    }
7422
7423
161k
  if (bit != 0)
7424
53.1k
    {
7425
53.1k
    i = 0;
7426
53.1k
    if (ranges[0] + 1 != ranges[1])
7427
35.5k
      {
7428
35.5k
      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
7429
35.5k
      add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
7430
35.5k
      i = ranges[0];
7431
35.5k
      }
7432
17.5k
    else
7433
17.5k
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
7434
7435
53.1k
    if (ranges[2] + 1 != ranges[3])
7436
7.22k
      {
7437
7.22k
      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
7438
7.22k
      add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
7439
7.22k
      }
7440
45.8k
    else
7441
45.8k
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
7442
53.1k
    return TRUE;
7443
53.1k
    }
7444
7445
108k
  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
7446
108k
  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
7447
108k
  if (ranges[1] + 1 != ranges[2])
7448
97.3k
    {
7449
97.3k
    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
7450
97.3k
    add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
7451
97.3k
    }
7452
10.7k
  else
7453
10.7k
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
7454
108k
  return TRUE;
7455
7456
0
  default:
7457
0
  SLJIT_UNREACHABLE();
7458
0
  return FALSE;
7459
666k
  }
7460
666k
}
7461
7462
static BOOL optimize_class_chars(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
7463
1.06M
{
7464
/* May destroy TMP1. */
7465
1.06M
DEFINE_COMPILER;
7466
1.06M
uint16_t char_list[MAX_CLASS_CHARS_SIZE];
7467
1.06M
uint8_t byte;
7468
1.06M
sljit_s32 type;
7469
1.06M
int i, j, k, len, c;
7470
7471
1.06M
if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV))
7472
0
  return FALSE;
7473
7474
1.06M
len = 0;
7475
7476
11.5M
for (i = 0; i < 32; i++)
7477
11.3M
  {
7478
11.3M
  byte = bits[i];
7479
7480
11.3M
  if (nclass)
7481
3.53M
    byte = (sljit_u8)~byte;
7482
7483
11.3M
  j = 0;
7484
18.5M
  while (byte != 0)
7485
8.04M
    {
7486
8.04M
    if (byte & 0x1)
7487
4.17M
      {
7488
4.17M
      c = i * 8 + j;
7489
7490
4.17M
      k = len;
7491
7492
4.17M
      if ((c & 0x20) != 0)
7493
1.73M
        {
7494
4.65M
        for (k = 0; k < len; k++)
7495
3.07M
          if (char_list[k] == c - 0x20)
7496
154k
            {
7497
154k
            char_list[k] |= 0x120;
7498
154k
            break;
7499
154k
            }
7500
1.73M
        }
7501
7502
4.17M
      if (k == len)
7503
4.01M
        {
7504
4.01M
        if (len >= MAX_CLASS_CHARS_SIZE)
7505
868k
          return FALSE;
7506
7507
3.14M
        char_list[len++] = (uint16_t) c;
7508
3.14M
        }
7509
4.17M
      }
7510
7511
7.17M
    byte >>= 1;
7512
7.17M
    j++;
7513
7.17M
    }
7514
11.3M
  }
7515
7516
196k
if (len == 0) return FALSE;  /* Should never occur, but stops analyzers complaining. */
7517
7518
196k
i = 0;
7519
196k
j = 0;
7520
7521
196k
if (char_list[0] == 0)
7522
131k
  {
7523
131k
  i++;
7524
131k
  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0);
7525
131k
  OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_ZERO);
7526
131k
  }
7527
65.0k
else
7528
65.0k
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
7529
7530
608k
while (i < len)
7531
412k
  {
7532
412k
  if ((char_list[i] & 0x100) != 0)
7533
84.1k
    j++;
7534
328k
  else
7535
328k
    {
7536
328k
    OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, char_list[i]);
7537
328k
    SELECT(SLJIT_ZERO, TMP2, TMP1, 0, TMP2);
7538
328k
    }
7539
412k
  i++;
7540
412k
  }
7541
7542
196k
if (j != 0)
7543
75.5k
  {
7544
75.5k
  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x20);
7545
7546
257k
  for (i = 0; i < len; i++)
7547
181k
    if ((char_list[i] & 0x100) != 0)
7548
84.1k
      {
7549
84.1k
      j--;
7550
84.1k
      OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, char_list[i] & 0xff);
7551
84.1k
      SELECT(SLJIT_ZERO, TMP2, TMP1, 0, TMP2);
7552
84.1k
      }
7553
75.5k
  }
7554
7555
196k
if (invert)
7556
162k
  nclass = !nclass;
7557
7558
196k
type = nclass ? SLJIT_NOT_EQUAL : SLJIT_EQUAL;
7559
196k
add_jump(compiler, backtracks, CMP(type, TMP2, 0, SLJIT_IMM, 0));
7560
196k
return TRUE;
7561
196k
}
7562
7563
static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
7564
1.73M
{
7565
/* May destroy TMP1. */
7566
1.73M
if (optimize_class_ranges(common, bits, nclass, invert, backtracks))
7567
666k
  return TRUE;
7568
1.06M
return optimize_class_chars(common, bits, nclass, invert, backtracks);
7569
1.73M
}
7570
7571
static void check_anynewline(compiler_common *common)
7572
3.72k
{
7573
/* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7574
3.72k
DEFINE_COMPILER;
7575
7576
3.72k
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
7577
7578
#ifdef EBCDIC
7579
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_LF);
7580
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7581
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_VT);
7582
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7583
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_FF);
7584
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7585
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_CR);
7586
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7587
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_NEL);
7588
#else
7589
3.72k
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, CHAR_LF);
7590
3.72k
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR - CHAR_LF);
7591
3.72k
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7592
3.72k
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_NEL - CHAR_LF);
7593
3.72k
#endif
7594
3.72k
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7595
#if PCRE2_CODE_UNIT_WIDTH == 8
7596
if (common->utf)
7597
  {
7598
#endif
7599
3.72k
  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7600
3.72k
  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
7601
3.72k
  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x2029 - CHAR_LF);
7602
#if PCRE2_CODE_UNIT_WIDTH == 8
7603
  }
7604
#endif
7605
3.72k
#endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7606
3.72k
OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7607
3.72k
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7608
3.72k
}
7609
7610
static void check_hspace(compiler_common *common)
7611
10.1k
{
7612
/* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7613
10.1k
DEFINE_COMPILER;
7614
7615
10.1k
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
7616
7617
10.1k
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_HT);
7618
10.1k
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7619
10.1k
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_SPACE);
7620
10.1k
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7621
10.1k
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_NBSP);
7622
10.1k
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7623
#if PCRE2_CODE_UNIT_WIDTH == 8
7624
if (common->utf)
7625
  {
7626
#endif
7627
10.1k
  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7628
10.1k
  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x1680);
7629
10.1k
  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7630
10.1k
  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e);
7631
10.1k
  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7632
10.1k
  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
7633
10.1k
  OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
7634
10.1k
  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
7635
10.1k
  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
7636
10.1k
  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7637
10.1k
  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
7638
10.1k
  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7639
10.1k
  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
7640
#if PCRE2_CODE_UNIT_WIDTH == 8
7641
  }
7642
#endif
7643
10.1k
#endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7644
10.1k
OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7645
7646
10.1k
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7647
10.1k
}
7648
7649
static void check_vspace(compiler_common *common)
7650
4.27k
{
7651
/* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7652
4.27k
DEFINE_COMPILER;
7653
7654
4.27k
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
7655
7656
#ifdef EBCDIC
7657
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_LF);
7658
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7659
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_VT);
7660
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7661
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_FF);
7662
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7663
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_CR);
7664
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7665
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_NEL);
7666
#else
7667
4.27k
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, CHAR_LF);
7668
4.27k
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR - CHAR_LF);
7669
4.27k
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7670
4.27k
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_NEL - CHAR_LF);
7671
4.27k
#endif
7672
4.27k
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7673
#if PCRE2_CODE_UNIT_WIDTH == 8
7674
if (common->utf)
7675
  {
7676
#endif
7677
4.27k
  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7678
4.27k
  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
7679
4.27k
  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x2029 - CHAR_LF);
7680
#if PCRE2_CODE_UNIT_WIDTH == 8
7681
  }
7682
#endif
7683
4.27k
#endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7684
4.27k
OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7685
7686
4.27k
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7687
4.27k
}
7688
7689
static void do_casefulcmp(compiler_common *common)
7690
2.62k
{
7691
2.62k
DEFINE_COMPILER;
7692
2.62k
struct sljit_jump *jump;
7693
2.62k
struct sljit_label *label;
7694
2.62k
int char1_reg;
7695
2.62k
int char2_reg;
7696
7697
2.62k
if (HAS_VIRTUAL_REGISTERS)
7698
0
  {
7699
0
  char1_reg = STR_END;
7700
0
  char2_reg = STACK_TOP;
7701
0
  }
7702
2.62k
else
7703
2.62k
  {
7704
2.62k
  char1_reg = TMP3;
7705
2.62k
  char2_reg = RETURN_ADDR;
7706
2.62k
  }
7707
7708
/* Update ref_update_local_size() when this changes. */
7709
2.62k
SLJIT_ASSERT(common->locals_size >= SSIZE_OF(sw));
7710
2.62k
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCAL0);
7711
2.62k
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
7712
7713
2.62k
if (char1_reg == STR_END)
7714
0
  {
7715
0
  OP1(SLJIT_MOV, TMP3, 0, char1_reg, 0);
7716
0
  OP1(SLJIT_MOV, RETURN_ADDR, 0, char2_reg, 0);
7717
0
  }
7718
7719
2.62k
if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7720
0
  {
7721
0
  label = LABEL();
7722
0
  sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7723
0
  sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7724
0
  jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7725
0
  OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7726
0
  JUMPTO(SLJIT_NOT_ZERO, label);
7727
7728
0
  JUMPHERE(jump);
7729
0
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
7730
0
  }
7731
2.62k
else if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7732
0
  {
7733
0
  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7734
0
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7735
7736
0
  label = LABEL();
7737
0
  sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7738
0
  sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7739
0
  jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7740
0
  OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7741
0
  JUMPTO(SLJIT_NOT_ZERO, label);
7742
7743
0
  JUMPHERE(jump);
7744
0
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
7745
0
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7746
0
  }
7747
2.62k
else
7748
2.62k
  {
7749
2.62k
  label = LABEL();
7750
2.62k
  OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
7751
2.62k
  OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
7752
2.62k
  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7753
2.62k
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7754
2.62k
  jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7755
2.62k
  OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7756
2.62k
  JUMPTO(SLJIT_NOT_ZERO, label);
7757
7758
2.62k
  JUMPHERE(jump);
7759
2.62k
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
7760
2.62k
  }
7761
7762
2.62k
if (char1_reg == STR_END)
7763
0
  {
7764
0
  OP1(SLJIT_MOV, char1_reg, 0, TMP3, 0);
7765
0
  OP1(SLJIT_MOV, char2_reg, 0, RETURN_ADDR, 0);
7766
0
  }
7767
7768
2.62k
OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7769
2.62k
}
7770
7771
static void do_caselesscmp(compiler_common *common)
7772
400
{
7773
400
DEFINE_COMPILER;
7774
400
struct sljit_jump *jump;
7775
400
struct sljit_label *label;
7776
400
int char1_reg = STR_END;
7777
400
int char2_reg;
7778
400
int lcc_table;
7779
400
int opt_type = 0;
7780
7781
400
if (HAS_VIRTUAL_REGISTERS)
7782
0
  {
7783
0
  char2_reg = STACK_TOP;
7784
0
  lcc_table = STACK_LIMIT;
7785
0
  }
7786
400
else
7787
400
  {
7788
400
  char2_reg = RETURN_ADDR;
7789
400
  lcc_table = TMP3;
7790
400
  }
7791
7792
400
if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7793
0
  opt_type = 1;
7794
400
else if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7795
0
  opt_type = 2;
7796
7797
/* Update ref_update_local_size() when this changes. */
7798
400
SLJIT_ASSERT(common->locals_size >= 2 * SSIZE_OF(sw));
7799
400
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCAL0);
7800
400
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
7801
7802
400
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL1, char1_reg, 0);
7803
7804
400
if (char2_reg == STACK_TOP)
7805
0
  {
7806
0
  OP1(SLJIT_MOV, TMP3, 0, char2_reg, 0);
7807
0
  OP1(SLJIT_MOV, RETURN_ADDR, 0, lcc_table, 0);
7808
0
  }
7809
7810
400
OP1(SLJIT_MOV, lcc_table, 0, SLJIT_IMM, common->lcc);
7811
7812
400
if (opt_type == 1)
7813
0
  {
7814
0
  label = LABEL();
7815
0
  sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7816
0
  sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7817
0
  }
7818
400
else if (opt_type == 2)
7819
0
  {
7820
0
  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7821
0
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7822
7823
0
  label = LABEL();
7824
0
  sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7825
0
  sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7826
0
  }
7827
400
else
7828
400
  {
7829
400
  label = LABEL();
7830
400
  OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
7831
400
  OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
7832
400
  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7833
400
  }
7834
7835
400
#if PCRE2_CODE_UNIT_WIDTH != 8
7836
400
jump = CMP(SLJIT_GREATER, char1_reg, 0, SLJIT_IMM, 255);
7837
400
#endif
7838
400
OP1(SLJIT_MOV_U8, char1_reg, 0, SLJIT_MEM2(lcc_table, char1_reg), 0);
7839
400
#if PCRE2_CODE_UNIT_WIDTH != 8
7840
400
JUMPHERE(jump);
7841
400
jump = CMP(SLJIT_GREATER, char2_reg, 0, SLJIT_IMM, 255);
7842
400
#endif
7843
400
OP1(SLJIT_MOV_U8, char2_reg, 0, SLJIT_MEM2(lcc_table, char2_reg), 0);
7844
400
#if PCRE2_CODE_UNIT_WIDTH != 8
7845
400
JUMPHERE(jump);
7846
400
#endif
7847
7848
400
if (opt_type == 0)
7849
400
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7850
7851
400
jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7852
400
OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7853
400
JUMPTO(SLJIT_NOT_ZERO, label);
7854
7855
400
JUMPHERE(jump);
7856
400
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
7857
7858
400
if (opt_type == 2)
7859
0
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7860
7861
400
if (char2_reg == STACK_TOP)
7862
0
  {
7863
0
  OP1(SLJIT_MOV, char2_reg, 0, TMP3, 0);
7864
0
  OP1(SLJIT_MOV, lcc_table, 0, RETURN_ADDR, 0);
7865
0
  }
7866
7867
400
OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL1);
7868
400
OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7869
400
}
7870
7871
#include "pcre2_jit_char_inc.h"
7872
7873
static PCRE2_SPTR compile_simple_assertion_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks)
7874
1.80M
{
7875
1.80M
DEFINE_COMPILER;
7876
1.80M
struct sljit_jump *jump[4];
7877
7878
1.80M
switch(type)
7879
1.80M
  {
7880
145k
  case OP_SOD:
7881
145k
  if (HAS_VIRTUAL_REGISTERS)
7882
0
    {
7883
0
    OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
7884
0
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
7885
0
    }
7886
145k
  else
7887
145k
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
7888
145k
  add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
7889
145k
  return cc;
7890
7891
33.5k
  case OP_SOM:
7892
33.5k
  if (HAS_VIRTUAL_REGISTERS)
7893
0
    {
7894
0
    OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
7895
0
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
7896
0
    }
7897
33.5k
  else
7898
33.5k
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
7899
33.5k
  add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
7900
33.5k
  return cc;
7901
7902
100k
  case OP_NOT_WORD_BOUNDARY:
7903
177k
  case OP_WORD_BOUNDARY:
7904
221k
  case OP_NOT_UCP_WORD_BOUNDARY:
7905
259k
  case OP_UCP_WORD_BOUNDARY:
7906
259k
  add_jump(compiler, (type == OP_NOT_WORD_BOUNDARY || type == OP_WORD_BOUNDARY) ? &common->wordboundary : &common->ucp_wordboundary, JUMP(SLJIT_FAST_CALL));
7907
259k
#ifdef SUPPORT_UNICODE
7908
259k
  if (common->invalid_utf)
7909
0
    {
7910
0
    add_jump(compiler, backtracks, CMP((type == OP_NOT_WORD_BOUNDARY || type == OP_NOT_UCP_WORD_BOUNDARY) ? SLJIT_NOT_EQUAL : SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0));
7911
0
    return cc;
7912
0
    }
7913
259k
#endif /* SUPPORT_UNICODE */
7914
259k
  sljit_set_current_flags(compiler, SLJIT_SET_Z);
7915
259k
  add_jump(compiler, backtracks, JUMP((type == OP_NOT_WORD_BOUNDARY || type == OP_NOT_UCP_WORD_BOUNDARY) ? SLJIT_NOT_ZERO : SLJIT_ZERO));
7916
259k
  return cc;
7917
7918
379k
  case OP_EODN:
7919
  /* Requires rather complex checks. */
7920
379k
  jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
7921
379k
  if (common->nltype == NLTYPE_FIXED && common->newline > 255)
7922
0
    {
7923
0
    OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
7924
0
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
7925
0
    if (common->mode == PCRE2_JIT_COMPLETE)
7926
0
      add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
7927
0
    else
7928
0
      {
7929
0
      jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
7930
0
      OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, STR_END, 0);
7931
0
      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
7932
0
      OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
7933
0
      OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_EQUAL);
7934
0
      add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
7935
0
      check_partial(common, TRUE);
7936
0
      add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7937
0
      JUMPHERE(jump[1]);
7938
0
      }
7939
0
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7940
0
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
7941
0
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
7942
0
    }
7943
379k
  else if (common->nltype == NLTYPE_FIXED)
7944
379k
    {
7945
379k
    OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7946
379k
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
7947
379k
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
7948
379k
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
7949
379k
    }
7950
0
  else
7951
0
    {
7952
0
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
7953
0
    jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
7954
0
    OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
7955
0
    OP2U(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_GREATER, TMP2, 0, STR_END, 0);
7956
0
    jump[2] = JUMP(SLJIT_GREATER);
7957
0
    add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL) /* LESS */);
7958
    /* Equal. */
7959
0
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7960
0
    jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
7961
0
    add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7962
7963
0
    JUMPHERE(jump[1]);
7964
0
    if (common->nltype == NLTYPE_ANYCRLF)
7965
0
      {
7966
0
      OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7967
0
      add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
7968
0
      add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
7969
0
      }
7970
0
    else
7971
0
      {
7972
0
      OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
7973
0
      read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
7974
0
      add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
7975
0
      add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
7976
0
      sljit_set_current_flags(compiler, SLJIT_SET_Z);
7977
0
      add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
7978
0
      OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
7979
0
      }
7980
0
    JUMPHERE(jump[2]);
7981
0
    JUMPHERE(jump[3]);
7982
0
    }
7983
379k
  JUMPHERE(jump[0]);
7984
379k
  if (common->mode != PCRE2_JIT_COMPLETE)
7985
0
    check_partial(common, TRUE);
7986
379k
  return cc;
7987
7988
173k
  case OP_EOD:
7989
173k
  add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
7990
173k
  if (common->mode != PCRE2_JIT_COMPLETE)
7991
0
    check_partial(common, TRUE);
7992
173k
  return cc;
7993
7994
351k
  case OP_DOLL:
7995
351k
  if (HAS_VIRTUAL_REGISTERS)
7996
0
    {
7997
0
    OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
7998
0
    OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
7999
0
    }
8000
351k
  else
8001
351k
    OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8002
351k
  add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8003
8004
351k
  if (!common->endonly)
8005
276k
    compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks);
8006
75.5k
  else
8007
75.5k
    {
8008
75.5k
    add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
8009
75.5k
    check_partial(common, FALSE);
8010
75.5k
    }
8011
351k
  return cc;
8012
8013
98.1k
  case OP_DOLLM:
8014
98.1k
  jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
8015
98.1k
  if (HAS_VIRTUAL_REGISTERS)
8016
0
    {
8017
0
    OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8018
0
    OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8019
0
    }
8020
98.1k
  else
8021
98.1k
    OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8022
98.1k
  add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8023
98.1k
  check_partial(common, FALSE);
8024
98.1k
  jump[0] = JUMP(SLJIT_JUMP);
8025
98.1k
  JUMPHERE(jump[1]);
8026
8027
98.1k
  if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8028
0
    {
8029
0
    OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8030
0
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8031
0
    if (common->mode == PCRE2_JIT_COMPLETE)
8032
0
      add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
8033
0
    else
8034
0
      {
8035
0
      jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
8036
      /* STR_PTR = STR_END - IN_UCHARS(1) */
8037
0
      add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8038
0
      check_partial(common, TRUE);
8039
0
      add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8040
0
      JUMPHERE(jump[1]);
8041
0
      }
8042
8043
0
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8044
0
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8045
0
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8046
0
    }
8047
98.1k
  else
8048
98.1k
    {
8049
98.1k
    peek_char(common, common->nlmax, TMP3, 0, NULL);
8050
98.1k
    check_newlinechar(common, common->nltype, backtracks, FALSE);
8051
98.1k
    }
8052
98.1k
  JUMPHERE(jump[0]);
8053
98.1k
  return cc;
8054
8055
288k
  case OP_CIRC:
8056
288k
  if (HAS_VIRTUAL_REGISTERS)
8057
0
    {
8058
0
    OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8059
0
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
8060
0
    add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
8061
0
    OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8062
0
    add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8063
0
    }
8064
288k
  else
8065
288k
    {
8066
288k
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8067
288k
    add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
8068
288k
    OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8069
288k
    add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8070
288k
    }
8071
288k
  return cc;
8072
8073
74.8k
  case OP_CIRCM:
8074
  /* TMP2 might be used by peek_char_back. */
8075
74.8k
  if (HAS_VIRTUAL_REGISTERS)
8076
0
    {
8077
0
    OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8078
0
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8079
0
    jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);
8080
0
    OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8081
0
    }
8082
74.8k
  else
8083
74.8k
    {
8084
74.8k
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8085
74.8k
    jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);
8086
74.8k
    OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8087
74.8k
    }
8088
74.8k
  add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8089
74.8k
  jump[0] = JUMP(SLJIT_JUMP);
8090
74.8k
  JUMPHERE(jump[1]);
8091
8092
74.8k
  if (!common->alt_circumflex)
8093
50.4k
    add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8094
8095
74.8k
  if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8096
0
    {
8097
0
    OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8098
0
    add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, TMP2, 0));
8099
0
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
8100
0
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
8101
0
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8102
0
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8103
0
    }
8104
74.8k
  else
8105
74.8k
    {
8106
74.8k
    peek_char_back(common, common->nlmax, backtracks);
8107
74.8k
    check_newlinechar(common, common->nltype, backtracks, FALSE);
8108
74.8k
    }
8109
74.8k
  JUMPHERE(jump[0]);
8110
74.8k
  return cc;
8111
1.80M
  }
8112
0
SLJIT_UNREACHABLE();
8113
0
return cc;
8114
1.80M
}
8115
8116
/* Forward definitions. */
8117
static void compile_matchingpath(compiler_common *, PCRE2_SPTR, PCRE2_SPTR, backtrack_common *);
8118
static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
8119
8120
#define PUSH_BACKTRACK(size, ccstart, error) \
8121
22.9M
  do \
8122
22.9M
    { \
8123
22.9M
    backtrack = sljit_alloc_memory(compiler, (size)); \
8124
22.9M
    if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
8125
22.9M
      return error; \
8126
22.9M
    memset(backtrack, 0, size); \
8127
22.9M
    backtrack->prev = parent->top; \
8128
22.9M
    backtrack->cc = (ccstart); \
8129
22.9M
    parent->top = backtrack; \
8130
22.9M
    } \
8131
22.9M
  while (0)
8132
8133
#define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
8134
3.13M
  do \
8135
3.13M
    { \
8136
3.13M
    backtrack = sljit_alloc_memory(compiler, (size)); \
8137
3.13M
    if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
8138
3.13M
      return; \
8139
3.13M
    memset(backtrack, 0, size); \
8140
3.13M
    backtrack->prev = parent->top; \
8141
3.13M
    backtrack->cc = (ccstart); \
8142
3.13M
    parent->top = backtrack; \
8143
3.13M
    } \
8144
3.13M
  while (0)
8145
8146
22.8M
#define BACKTRACK_AS(type) ((type *)backtrack)
8147
8148
static void compile_dnref_search(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
8149
0
{
8150
/* The OVECTOR offset goes to TMP2. */
8151
0
DEFINE_COMPILER;
8152
0
int count = GET2(cc, 1 + IMM2_SIZE);
8153
0
PCRE2_SPTR slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
8154
0
unsigned int offset;
8155
0
jump_list *found = NULL;
8156
8157
0
SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
8158
8159
0
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
8160
8161
0
count--;
8162
0
while (count-- > 0)
8163
0
  {
8164
0
  offset = GET2(slot, 0) << 1;
8165
0
  GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
8166
0
  add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
8167
0
  slot += common->name_entry_size;
8168
0
  }
8169
8170
0
offset = GET2(slot, 0) << 1;
8171
0
GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
8172
0
if (backtracks != NULL && !common->unset_backref)
8173
0
  add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
8174
8175
0
set_jumps(found, LABEL());
8176
0
}
8177
8178
static void compile_ref_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
8179
795k
{
8180
795k
DEFINE_COMPILER;
8181
795k
BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
8182
795k
int offset = 0;
8183
795k
struct sljit_jump *jump = NULL;
8184
795k
struct sljit_jump *partial;
8185
795k
struct sljit_jump *nopartial;
8186
795k
#if defined SUPPORT_UNICODE
8187
795k
struct sljit_label *loop;
8188
795k
struct sljit_label *caseless_loop;
8189
795k
struct sljit_jump *turkish_ascii_i = NULL;
8190
795k
struct sljit_jump *turkish_non_ascii_i = NULL;
8191
795k
jump_list *no_match = NULL;
8192
795k
int source_reg = COUNT_MATCH;
8193
795k
int source_end_reg = ARGUMENTS;
8194
795k
int char1_reg = STACK_LIMIT;
8195
795k
PCRE2_UCHAR refi_flag = 0;
8196
8197
795k
if (*cc == OP_REFI || *cc == OP_DNREFI)
8198
342k
  refi_flag = cc[PRIV(OP_lengths)[*cc] - 1];
8199
795k
#endif /* SUPPORT_UNICODE */
8200
8201
795k
if (ref)
8202
795k
  {
8203
795k
  offset = GET2(cc, 1) << 1;
8204
795k
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
8205
  /* OVECTOR(1) contains the "string begin - 1" constant. */
8206
795k
  if (withchecks && !common->unset_backref)
8207
525k
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
8208
795k
  }
8209
0
else
8210
0
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
8211
8212
795k
#if defined SUPPORT_UNICODE
8213
795k
if ((common->utf || common->ucp) && (*cc == OP_REFI || *cc == OP_DNREFI))
8214
304k
  {
8215
  /* Update ref_update_local_size() when this changes. */
8216
304k
  SLJIT_ASSERT(common->locals_size >= 3 * SSIZE_OF(sw));
8217
8218
304k
  if (ref)
8219
304k
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
8220
0
  else
8221
0
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
8222
8223
304k
  if (withchecks && emptyfail)
8224
121k
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0));
8225
8226
304k
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL0, source_reg, 0);
8227
304k
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL1, source_end_reg, 0);
8228
304k
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL2, char1_reg, 0);
8229
8230
304k
  OP1(SLJIT_MOV, source_reg, 0, TMP1, 0);
8231
304k
  OP1(SLJIT_MOV, source_end_reg, 0, TMP2, 0);
8232
8233
304k
  loop = LABEL();
8234
304k
  jump = CMP(SLJIT_GREATER_EQUAL, source_reg, 0, source_end_reg, 0);
8235
304k
  partial = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
8236
8237
  /* Read original character. It must be a valid UTF character. */
8238
304k
  OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
8239
304k
  OP1(SLJIT_MOV, STR_PTR, 0, source_reg, 0);
8240
8241
304k
  read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR | READ_CHAR_VALID_UTF);
8242
8243
304k
  OP1(SLJIT_MOV, source_reg, 0, STR_PTR, 0);
8244
304k
  OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
8245
304k
  OP1(SLJIT_MOV, char1_reg, 0, TMP1, 0);
8246
8247
  /* Read second character. */
8248
304k
  read_char(common, 0, READ_CHAR_MAX, &no_match, READ_CHAR_UPDATE_STR_PTR);
8249
8250
304k
  CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
8251
8252
304k
  if ((refi_flag & (REFI_FLAG_TURKISH_CASING|REFI_FLAG_CASELESS_RESTRICT)) ==
8253
304k
        REFI_FLAG_TURKISH_CASING)
8254
0
    {
8255
0
    OP2(SLJIT_OR, SLJIT_TMP_DEST_REG, 0, char1_reg, 0, SLJIT_IMM, 0x20);
8256
0
    turkish_ascii_i = CMP(SLJIT_EQUAL, SLJIT_TMP_DEST_REG, 0, SLJIT_IMM, 0x69);
8257
8258
0
    OP2(SLJIT_OR, SLJIT_TMP_DEST_REG, 0, char1_reg, 0, SLJIT_IMM, 0x1);
8259
0
    turkish_non_ascii_i = CMP(SLJIT_EQUAL, SLJIT_TMP_DEST_REG, 0, SLJIT_IMM, 0x131);
8260
0
    }
8261
8262
304k
  OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
8263
8264
304k
  add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
8265
8266
304k
  OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
8267
304k
  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
8268
304k
  OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
8269
8270
304k
  OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records));
8271
8272
304k
  OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, other_case));
8273
304k
  OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, caseset));
8274
304k
  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0);
8275
304k
  CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
8276
8277
304k
  add_jump(compiler, &no_match, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
8278
304k
  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
8279
304k
  OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_caseless_sets));
8280
8281
304k
  if (refi_flag & REFI_FLAG_CASELESS_RESTRICT)
8282
6.06k
    add_jump(compiler, &no_match, CMP(SLJIT_LESS | SLJIT_32, SLJIT_MEM1(TMP2), 0, SLJIT_IMM, 128));
8283
8284
304k
  caseless_loop = LABEL();
8285
304k
  OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP2), 0);
8286
304k
  OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(uint32_t));
8287
304k
  OP2U(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, TMP1, 0, char1_reg, 0);
8288
304k
  JUMPTO(SLJIT_EQUAL, loop);
8289
304k
  JUMPTO(SLJIT_LESS, caseless_loop);
8290
8291
304k
  if ((refi_flag & (REFI_FLAG_TURKISH_CASING|REFI_FLAG_CASELESS_RESTRICT)) ==
8292
304k
        REFI_FLAG_TURKISH_CASING)
8293
0
    {
8294
0
    add_jump(compiler, &no_match, JUMP(SLJIT_JUMP));
8295
0
    JUMPHERE(turkish_ascii_i);
8296
8297
0
    OP2(SLJIT_LSHR, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 5);
8298
0
    OP2(SLJIT_AND, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 1);
8299
0
    OP2(SLJIT_XOR, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 1);
8300
0
    OP2(SLJIT_ADD, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 0x130);
8301
0
    CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
8302
8303
0
    add_jump(compiler, &no_match, JUMP(SLJIT_JUMP));
8304
0
    JUMPHERE(turkish_non_ascii_i);
8305
8306
0
    OP2(SLJIT_AND, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 1);
8307
0
    OP2(SLJIT_XOR, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 1);
8308
0
    OP2(SLJIT_SHL, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 5);
8309
0
    OP2(SLJIT_ADD, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 0x49);
8310
0
    CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
8311
0
    }
8312
8313
304k
  set_jumps(no_match, LABEL());
8314
304k
  if (common->mode == PCRE2_JIT_COMPLETE)
8315
304k
    JUMPHERE(partial);
8316
8317
304k
  OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
8318
304k
  OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL1);
8319
304k
  OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL2);
8320
304k
  add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8321
8322
304k
  if (common->mode != PCRE2_JIT_COMPLETE)
8323
0
    {
8324
0
    JUMPHERE(partial);
8325
0
    OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
8326
0
    OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL1);
8327
0
    OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL2);
8328
8329
0
    check_partial(common, FALSE);
8330
0
    add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8331
0
    }
8332
8333
304k
  JUMPHERE(jump);
8334
304k
  OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
8335
304k
  OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL1);
8336
304k
  OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL2);
8337
304k
  return;
8338
304k
  }
8339
490k
else
8340
490k
#endif /* SUPPORT_UNICODE */
8341
490k
  {
8342
490k
  if (ref)
8343
490k
    OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
8344
0
  else
8345
0
    OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
8346
8347
490k
  if (withchecks)
8348
303k
    jump = JUMP(SLJIT_ZERO);
8349
8350
490k
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
8351
490k
  partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
8352
490k
  if (common->mode == PCRE2_JIT_COMPLETE)
8353
490k
    add_jump(compiler, backtracks, partial);
8354
8355
490k
  add_jump(compiler, (*cc == OP_REF || *cc == OP_DNREF) ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
8356
490k
  add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
8357
8358
490k
  if (common->mode != PCRE2_JIT_COMPLETE)
8359
0
    {
8360
0
    nopartial = JUMP(SLJIT_JUMP);
8361
0
    JUMPHERE(partial);
8362
    /* TMP2 -= STR_END - STR_PTR */
8363
0
    OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
8364
0
    OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
8365
0
    partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
8366
0
    OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
8367
0
    add_jump(compiler, (*cc == OP_REF || *cc == OP_DNREF) ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
8368
0
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
8369
0
    JUMPHERE(partial);
8370
0
    check_partial(common, FALSE);
8371
0
    add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8372
0
    JUMPHERE(nopartial);
8373
0
    }
8374
490k
  }
8375
8376
490k
if (jump != NULL)
8377
303k
  {
8378
303k
  if (emptyfail)
8379
21.0k
    add_jump(compiler, backtracks, jump);
8380
282k
  else
8381
282k
    JUMPHERE(jump);
8382
303k
  }
8383
490k
}
8384
8385
static SLJIT_INLINE PCRE2_SPTR compile_ref_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
8386
338k
{
8387
338k
DEFINE_COMPILER;
8388
338k
BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
8389
338k
backtrack_common *backtrack;
8390
338k
PCRE2_UCHAR type;
8391
338k
int local_start = LOCAL2;
8392
338k
int offset = 0;
8393
338k
struct sljit_label *label;
8394
338k
struct sljit_jump *zerolength;
8395
338k
struct sljit_jump *jump = NULL;
8396
338k
PCRE2_SPTR ccbegin = cc;
8397
338k
int min = 0, max = 0;
8398
338k
BOOL minimize;
8399
8400
338k
PUSH_BACKTRACK(sizeof(ref_iterator_backtrack), cc, NULL);
8401
8402
338k
if (ref)
8403
338k
  offset = GET2(cc, 1) << 1;
8404
0
else
8405
0
  cc += IMM2_SIZE;
8406
8407
338k
if (*ccbegin == OP_REFI || *ccbegin == OP_DNREFI)
8408
142k
  {
8409
142k
  cc += 1;
8410
142k
#ifdef SUPPORT_UNICODE
8411
142k
  if (common->utf || common->ucp)
8412
130k
    local_start = LOCAL3;
8413
142k
#endif
8414
142k
  }
8415
8416
338k
type = cc[1 + IMM2_SIZE];
8417
8418
338k
SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
8419
/* Update ref_update_local_size() when this changes. */
8420
338k
SLJIT_ASSERT(local_start + 2 * SSIZE_OF(sw) <= (int)LOCAL0 + common->locals_size);
8421
338k
minimize = (type & 0x1) != 0;
8422
338k
switch(type)
8423
338k
  {
8424
28.1k
  case OP_CRSTAR:
8425
48.3k
  case OP_CRMINSTAR:
8426
48.3k
  min = 0;
8427
48.3k
  max = 0;
8428
48.3k
  cc += 1 + IMM2_SIZE + 1;
8429
48.3k
  break;
8430
94.1k
  case OP_CRPLUS:
8431
164k
  case OP_CRMINPLUS:
8432
164k
  min = 1;
8433
164k
  max = 0;
8434
164k
  cc += 1 + IMM2_SIZE + 1;
8435
164k
  break;
8436
49.5k
  case OP_CRQUERY:
8437
78.3k
  case OP_CRMINQUERY:
8438
78.3k
  min = 0;
8439
78.3k
  max = 1;
8440
78.3k
  cc += 1 + IMM2_SIZE + 1;
8441
78.3k
  break;
8442
24.2k
  case OP_CRRANGE:
8443
46.8k
  case OP_CRMINRANGE:
8444
46.8k
  min = GET2(cc, 1 + IMM2_SIZE + 1);
8445
46.8k
  max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
8446
46.8k
  cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
8447
46.8k
  break;
8448
0
  default:
8449
0
  SLJIT_UNREACHABLE();
8450
0
  break;
8451
338k
  }
8452
8453
338k
if (!minimize)
8454
196k
  {
8455
196k
  if (min == 0)
8456
78.2k
    {
8457
78.2k
    allocate_stack(common, 2);
8458
78.2k
    if (ref)
8459
78.2k
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
8460
78.2k
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8461
78.2k
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
8462
    /* Temporary release of STR_PTR. */
8463
78.2k
    OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
8464
    /* Handles both invalid and empty cases. Since the minimum repeat,
8465
    is zero the invalid case is basically the same as an empty case. */
8466
78.2k
    if (ref)
8467
78.2k
      zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
8468
0
    else
8469
0
      {
8470
0
      compile_dnref_search(common, ccbegin, NULL);
8471
0
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
8472
0
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), local_start + SSIZE_OF(sw), TMP2, 0);
8473
0
      zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
8474
0
      }
8475
    /* Restore if not zero length. */
8476
78.2k
    OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
8477
78.2k
    }
8478
117k
  else
8479
117k
    {
8480
117k
    allocate_stack(common, 1);
8481
117k
    if (ref)
8482
117k
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
8483
117k
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8484
8485
117k
    if (ref)
8486
117k
      {
8487
117k
      if (!common->unset_backref)
8488
115k
        add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
8489
117k
      zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
8490
117k
      }
8491
0
    else
8492
0
      {
8493
0
      compile_dnref_search(common, ccbegin, &backtrack->own_backtracks);
8494
0
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
8495
0
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), local_start + SSIZE_OF(sw), TMP2, 0);
8496
0
      zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
8497
0
      }
8498
117k
    }
8499
8500
196k
  if (min > 1 || max > 1)
8501
24.2k
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), local_start, SLJIT_IMM, 0);
8502
8503
196k
  label = LABEL();
8504
196k
  if (!ref)
8505
0
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), local_start + SSIZE_OF(sw));
8506
196k
  compile_ref_matchingpath(common, ccbegin, &backtrack->own_backtracks, FALSE, FALSE);
8507
8508
196k
  if (min > 1 || max > 1)
8509
24.2k
    {
8510
24.2k
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), local_start);
8511
24.2k
    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
8512
24.2k
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), local_start, TMP1, 0);
8513
24.2k
    if (min > 1)
8514
23.6k
      CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
8515
24.2k
    if (max > 1)
8516
23.4k
      {
8517
23.4k
      jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
8518
23.4k
      allocate_stack(common, 1);
8519
23.4k
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8520
23.4k
      JUMPTO(SLJIT_JUMP, label);
8521
23.4k
      JUMPHERE(jump);
8522
23.4k
      }
8523
24.2k
    }
8524
8525
196k
  if (max == 0)
8526
123k
    {
8527
    /* Includes min > 1 case as well. */
8528
123k
    allocate_stack(common, 1);
8529
123k
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8530
123k
    JUMPTO(SLJIT_JUMP, label);
8531
123k
    }
8532
8533
196k
  JUMPHERE(zerolength);
8534
196k
  BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
8535
8536
196k
  count_match(common);
8537
196k
  return cc;
8538
196k
  }
8539
8540
142k
allocate_stack(common, ref ? 2 : 3);
8541
142k
if (ref)
8542
142k
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
8543
142k
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8544
142k
if (type != OP_CRMINSTAR)
8545
121k
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
8546
8547
142k
if (min == 0)
8548
49.0k
  {
8549
  /* Handles both invalid and empty cases. Since the minimum repeat,
8550
  is zero the invalid case is basically the same as an empty case. */
8551
49.0k
  if (ref)
8552
49.0k
    zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
8553
0
  else
8554
0
    {
8555
0
    compile_dnref_search(common, ccbegin, NULL);
8556
0
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
8557
0
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
8558
0
    zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
8559
0
    }
8560
  /* Length is non-zero, we can match real repeats. */
8561
49.0k
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8562
49.0k
  jump = JUMP(SLJIT_JUMP);
8563
49.0k
  }
8564
92.9k
else
8565
92.9k
  {
8566
92.9k
  if (ref)
8567
92.9k
    {
8568
92.9k
    if (!common->unset_backref)
8569
90.8k
      add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
8570
92.9k
    zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
8571
92.9k
    }
8572
0
  else
8573
0
    {
8574
0
    compile_dnref_search(common, ccbegin, &backtrack->own_backtracks);
8575
0
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
8576
0
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
8577
0
    zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
8578
0
    }
8579
92.9k
  }
8580
8581
142k
BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
8582
142k
if (max > 0)
8583
51.1k
  add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
8584
8585
142k
if (!ref)
8586
0
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
8587
142k
compile_ref_matchingpath(common, ccbegin, &backtrack->own_backtracks, TRUE, TRUE);
8588
142k
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8589
8590
142k
if (min > 1)
8591
22.5k
  {
8592
22.5k
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
8593
22.5k
  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
8594
22.5k
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
8595
22.5k
  CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(ref_iterator_backtrack)->matchingpath);
8596
22.5k
  }
8597
119k
else if (max > 0)
8598
28.8k
  OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
8599
8600
142k
if (jump != NULL)
8601
49.0k
  JUMPHERE(jump);
8602
142k
JUMPHERE(zerolength);
8603
8604
142k
count_match(common);
8605
142k
return cc;
8606
338k
}
8607
8608
static SLJIT_INLINE PCRE2_SPTR compile_recurse_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
8609
790k
{
8610
790k
DEFINE_COMPILER;
8611
790k
backtrack_common *backtrack;
8612
790k
recurse_entry *entry = common->entries;
8613
790k
recurse_entry *prev = NULL;
8614
790k
PCRE2_SPTR end;
8615
790k
sljit_sw start = GET(cc, 1);
8616
790k
sljit_uw arg_size;
8617
790k
PCRE2_SPTR start_cc;
8618
790k
BOOL needs_control_head;
8619
8620
790k
end = cc + 1 + LINK_SIZE;
8621
8622
791k
while (*end == OP_CREF)
8623
1.07k
  end += 1 + IMM2_SIZE;
8624
8625
790k
PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, end);
8626
8627
/* Inlining simple patterns. */
8628
790k
if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
8629
14.3k
  {
8630
14.3k
  start_cc = common->start + start;
8631
14.3k
  compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
8632
14.3k
  BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
8633
14.3k
  return end;
8634
14.3k
  }
8635
8636
776k
cc += 1 + LINK_SIZE;
8637
776k
arg_size = (sljit_uw)IN_UCHARS(end - cc);
8638
1.08M
while (entry != NULL)
8639
1.07M
  {
8640
1.07M
  if (entry->start == start && entry->arg_size == arg_size
8641
1.07M
      && (arg_size == 0 || memcmp(cc, entry->arg_start, arg_size) == 0))
8642
765k
    break;
8643
311k
  prev = entry;
8644
311k
  entry = entry->next;
8645
311k
  }
8646
8647
776k
if (entry == NULL)
8648
10.5k
  {
8649
10.5k
  entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
8650
10.5k
  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
8651
0
    return end;
8652
10.5k
  entry->next = NULL;
8653
10.5k
  entry->entry_label = NULL;
8654
10.5k
  entry->backtrack_label = NULL;
8655
10.5k
  entry->entry_calls = NULL;
8656
10.5k
  entry->backtrack_calls = NULL;
8657
10.5k
  entry->start = start;
8658
10.5k
  entry->arg_start = cc;
8659
10.5k
  entry->arg_size = arg_size;
8660
8661
10.5k
  if (prev != NULL)
8662
3.72k
    prev->next = entry;
8663
6.78k
  else
8664
6.78k
    common->entries = entry;
8665
10.5k
  }
8666
8667
776k
BACKTRACK_AS(recurse_backtrack)->entry = entry;
8668
8669
776k
if (entry->entry_label == NULL)
8670
392k
  add_jump(compiler, &entry->entry_calls, JUMP(SLJIT_FAST_CALL));
8671
383k
else
8672
383k
  JUMPTO(SLJIT_FAST_CALL, entry->entry_label);
8673
/* Leave if the match is failed. */
8674
776k
add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
8675
776k
BACKTRACK_AS(recurse_backtrack)->matchingpath = LABEL();
8676
776k
return end;
8677
776k
}
8678
8679
static sljit_s32 SLJIT_FUNC do_callout_jit(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector)
8680
0
{
8681
0
PCRE2_SPTR begin;
8682
0
PCRE2_SIZE *ovector;
8683
0
sljit_u32 oveccount, capture_top;
8684
8685
0
if (arguments->callout == NULL)
8686
0
  return 0;
8687
8688
0
SLJIT_COMPILE_ASSERT(sizeof (PCRE2_SIZE) <= sizeof (sljit_sw), pcre2_size_must_be_lower_than_sljit_sw_size);
8689
8690
0
begin = arguments->begin;
8691
0
ovector = (PCRE2_SIZE*)(callout_block + 1);
8692
0
oveccount = callout_block->capture_top;
8693
8694
0
SLJIT_ASSERT(oveccount >= 1);
8695
8696
0
callout_block->version = 2;
8697
0
callout_block->callout_flags = 0;
8698
8699
/* Offsets in subject. */
8700
0
callout_block->subject_length = arguments->end - arguments->begin;
8701
0
callout_block->start_match = jit_ovector[0] - begin;
8702
0
callout_block->current_position = (PCRE2_SPTR)callout_block->offset_vector - begin;
8703
0
callout_block->subject = begin;
8704
8705
/* Convert and copy the JIT offset vector to the ovector array. */
8706
0
callout_block->capture_top = 1;
8707
0
callout_block->offset_vector = ovector;
8708
8709
0
ovector[0] = PCRE2_UNSET;
8710
0
ovector[1] = PCRE2_UNSET;
8711
0
ovector += 2;
8712
0
jit_ovector += 2;
8713
0
capture_top = 1;
8714
8715
/* Convert pointers to sizes. */
8716
0
while (--oveccount != 0)
8717
0
  {
8718
0
  capture_top++;
8719
8720
0
  ovector[0] = (PCRE2_SIZE)(jit_ovector[0] - begin);
8721
0
  ovector[1] = (PCRE2_SIZE)(jit_ovector[1] - begin);
8722
8723
0
  if (ovector[0] != PCRE2_UNSET)
8724
0
    callout_block->capture_top = capture_top;
8725
8726
0
  ovector += 2;
8727
0
  jit_ovector += 2;
8728
0
  }
8729
8730
0
return (arguments->callout)(callout_block, arguments->callout_data);
8731
0
}
8732
8733
#define CALLOUT_ARG_OFFSET(arg) \
8734
    SLJIT_OFFSETOF(pcre2_callout_block, arg)
8735
8736
static SLJIT_INLINE PCRE2_SPTR compile_callout_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
8737
8.49M
{
8738
8.49M
DEFINE_COMPILER;
8739
8.49M
backtrack_common *backtrack;
8740
8.49M
sljit_s32 mov_opcode;
8741
8.49M
unsigned int callout_length = (*cc == OP_CALLOUT)
8742
8.49M
    ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2 * LINK_SIZE);
8743
8.49M
sljit_sw value1;
8744
8.49M
sljit_sw value2;
8745
8.49M
sljit_sw value3;
8746
8.49M
sljit_s32 callout_arg_size = (common->re->top_bracket + 1) * 2 * SSIZE_OF(sw); /* top_bracket is uint16 so maximum is 1MiB */
8747
8748
8.49M
PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
8749
8750
8.49M
callout_arg_size = (sizeof(pcre2_callout_block) + callout_arg_size + sizeof(sljit_sw) - 1) / sizeof(sljit_sw);
8751
8752
8.49M
allocate_stack(common, callout_arg_size);
8753
8754
8.49M
SLJIT_ASSERT(common->capture_last_ptr != 0);
8755
8.49M
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
8756
8.49M
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8757
8.49M
value1 = (*cc == OP_CALLOUT) ? cc[1 + 2 * LINK_SIZE] : 0;
8758
8.49M
OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, value1);
8759
8.49M
OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
8760
8.49M
OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_top), SLJIT_IMM, common->re->top_bracket + 1);
8761
8762
/* These pointer sized fields temporarly stores internal variables. */
8763
8.49M
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
8764
8765
8.49M
if (common->mark_ptr != 0)
8766
436k
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
8767
8.49M
mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
8768
8.49M
OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 1));
8769
8.49M
OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 1 + LINK_SIZE));
8770
8771
8.49M
if (*cc == OP_CALLOUT)
8772
8.47M
  {
8773
8.47M
  value1 = 0;
8774
8.47M
  value2 = 0;
8775
8.47M
  value3 = 0;
8776
8.47M
  }
8777
22.1k
else
8778
22.1k
  {
8779
22.1k
  value1 = (sljit_sw) (cc + (1 + 4*LINK_SIZE) + 1);
8780
22.1k
  value2 = (callout_length - (1 + 4*LINK_SIZE + 2));
8781
22.1k
  value3 = (sljit_sw) (GET(cc, 1 + 3*LINK_SIZE));
8782
22.1k
  }
8783
8784
8.49M
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string), SLJIT_IMM, value1);
8785
8.49M
OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_length), SLJIT_IMM, value2);
8786
8.49M
OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_offset), SLJIT_IMM, value3);
8787
8.49M
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
8788
8789
8.49M
SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
8790
8791
/* Needed to save important temporary registers. */
8792
8.49M
SLJIT_ASSERT(common->locals_size >= SSIZE_OF(sw));
8793
8.49M
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL0, STR_PTR, 0);
8794
/* SLJIT_R0 = arguments */
8795
8.49M
OP1(SLJIT_MOV, SLJIT_R1, 0, STACK_TOP, 0);
8796
8.49M
GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
8797
8.49M
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS3(32, W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_callout_jit));
8798
8.49M
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
8799
8.49M
free_stack(common, callout_arg_size);
8800
8801
/* Check return value. */
8802
8.49M
OP2U(SLJIT_SUB32 | SLJIT_SET_Z | SLJIT_SET_SIG_GREATER, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
8803
8.49M
add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_SIG_GREATER));
8804
8.49M
if (common->abort_label == NULL)
8805
5.72M
  add_jump(compiler, &common->abort, JUMP(SLJIT_NOT_EQUAL) /* SIG_LESS */);
8806
2.76M
else
8807
2.76M
  JUMPTO(SLJIT_NOT_EQUAL /* SIG_LESS */, common->abort_label);
8808
8.49M
return cc + callout_length;
8809
8.49M
}
8810
8811
#undef CALLOUT_ARG_SIZE
8812
#undef CALLOUT_ARG_OFFSET
8813
8814
static PCRE2_SPTR compile_reverse_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
8815
1.39M
{
8816
1.39M
DEFINE_COMPILER;
8817
1.39M
backtrack_common *backtrack = NULL;
8818
1.39M
jump_list **reverse_failed;
8819
1.39M
unsigned int lmin, lmax;
8820
1.39M
#ifdef SUPPORT_UNICODE
8821
1.39M
struct sljit_jump *jump;
8822
1.39M
struct sljit_label *label;
8823
1.39M
#endif
8824
8825
1.39M
SLJIT_ASSERT(parent->top == NULL);
8826
8827
1.39M
if (*cc == OP_REVERSE)
8828
647k
  {
8829
647k
  reverse_failed = &parent->own_backtracks;
8830
647k
  lmin = GET2(cc, 1);
8831
647k
  lmax = lmin;
8832
647k
  cc += 1 + IMM2_SIZE;
8833
8834
647k
  SLJIT_ASSERT(lmin > 0);
8835
647k
  }
8836
751k
else
8837
751k
  {
8838
751k
  SLJIT_ASSERT(*cc == OP_VREVERSE);
8839
751k
  PUSH_BACKTRACK(sizeof(vreverse_backtrack), cc, cc + 1 + 2 * IMM2_SIZE);
8840
8841
751k
  reverse_failed = &backtrack->own_backtracks;
8842
751k
  lmin = GET2(cc, 1);
8843
751k
  lmax = GET2(cc, 1 + IMM2_SIZE);
8844
751k
  cc += 1 + 2 * IMM2_SIZE;
8845
8846
751k
  SLJIT_ASSERT(lmin < lmax);
8847
751k
  }
8848
8849
1.39M
if (HAS_VIRTUAL_REGISTERS)
8850
0
  {
8851
0
  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8852
0
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8853
0
  }
8854
1.39M
else
8855
1.39M
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8856
8857
1.39M
#ifdef SUPPORT_UNICODE
8858
1.39M
if (common->utf)
8859
471k
  {
8860
471k
  if (lmin > 0)
8861
462k
    {
8862
462k
    OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, lmin);
8863
462k
    label = LABEL();
8864
462k
    add_jump(compiler, reverse_failed, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0));
8865
462k
    move_back(common, reverse_failed, FALSE);
8866
462k
    OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
8867
462k
    JUMPTO(SLJIT_NOT_ZERO, label);
8868
462k
    }
8869
8870
471k
  if (lmin < lmax)
8871
252k
    {
8872
252k
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
8873
8874
252k
    OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, lmax - lmin);
8875
252k
    label = LABEL();
8876
252k
    jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
8877
252k
    move_back(common, reverse_failed, FALSE);
8878
252k
    OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
8879
252k
    JUMPTO(SLJIT_NOT_ZERO, label);
8880
8881
252k
    JUMPHERE(jump);
8882
252k
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0);
8883
252k
    }
8884
471k
  }
8885
928k
else
8886
928k
#endif
8887
928k
  {
8888
928k
  if (lmin > 0)
8889
875k
    {
8890
875k
    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(lmin));
8891
875k
    add_jump(compiler, reverse_failed, CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0));
8892
875k
    }
8893
8894
928k
  if (lmin < lmax)
8895
498k
    {
8896
498k
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
8897
8898
498k
    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(lmax - lmin));
8899
498k
    OP2U(SLJIT_SUB | SLJIT_SET_LESS, STR_PTR, 0, TMP2, 0);
8900
498k
    SELECT(SLJIT_LESS, STR_PTR, TMP2, 0, STR_PTR);
8901
8902
498k
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0);
8903
498k
    }
8904
928k
  }
8905
8906
1.39M
check_start_used_ptr(common);
8907
8908
1.39M
if (lmin < lmax)
8909
751k
  BACKTRACK_AS(vreverse_backtrack)->matchingpath = LABEL();
8910
8911
1.39M
return cc;
8912
1.39M
}
8913
8914
static SLJIT_INLINE BOOL assert_needs_str_ptr_saving(PCRE2_SPTR cc)
8915
1.11M
{
8916
1.30M
while (TRUE)
8917
1.30M
  {
8918
1.30M
  switch (*cc)
8919
1.30M
    {
8920
1.68k
    case OP_CALLOUT_STR:
8921
1.68k
    cc += GET(cc, 1 + 2*LINK_SIZE);
8922
1.68k
    break;
8923
8924
669
    case OP_NOT_WORD_BOUNDARY:
8925
8.70k
    case OP_WORD_BOUNDARY:
8926
14.3k
    case OP_CIRC:
8927
16.8k
    case OP_CIRCM:
8928
43.6k
    case OP_DOLL:
8929
64.5k
    case OP_DOLLM:
8930
147k
    case OP_CALLOUT:
8931
183k
    case OP_ALT:
8932
184k
    case OP_NOT_UCP_WORD_BOUNDARY:
8933
188k
    case OP_UCP_WORD_BOUNDARY:
8934
188k
    cc += PRIV(OP_lengths)[*cc];
8935
188k
    break;
8936
8937
24.5k
    case OP_KET:
8938
24.5k
    return FALSE;
8939
8940
1.08M
    default:
8941
1.08M
    return TRUE;
8942
1.30M
    }
8943
1.30M
  }
8944
1.11M
}
8945
8946
static PCRE2_SPTR compile_assert_matchingpath(compiler_common *common, PCRE2_SPTR cc, assert_backtrack *backtrack, BOOL conditional)
8947
2.14M
{
8948
2.14M
DEFINE_COMPILER;
8949
2.14M
int framesize;
8950
2.14M
int extrasize;
8951
2.14M
BOOL local_quit_available = FALSE;
8952
2.14M
BOOL needs_control_head;
8953
2.14M
BOOL end_block_size = 0;
8954
2.14M
BOOL has_vreverse;
8955
2.14M
int private_data_ptr;
8956
2.14M
backtrack_common altbacktrack;
8957
2.14M
PCRE2_SPTR ccbegin;
8958
2.14M
PCRE2_UCHAR opcode;
8959
2.14M
PCRE2_UCHAR bra = OP_BRA;
8960
2.14M
jump_list *tmp = NULL;
8961
2.14M
jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.own_backtracks;
8962
2.14M
jump_list **found;
8963
/* Saving previous accept variables. */
8964
2.14M
BOOL save_local_quit_available = common->local_quit_available;
8965
2.14M
BOOL save_in_positive_assertion = common->in_positive_assertion;
8966
2.14M
sljit_s32 save_restore_end_ptr = common->restore_end_ptr;
8967
2.14M
then_trap_backtrack *save_then_trap = common->then_trap;
8968
2.14M
struct sljit_label *save_quit_label = common->quit_label;
8969
2.14M
struct sljit_label *save_accept_label = common->accept_label;
8970
2.14M
jump_list *save_quit = common->quit;
8971
2.14M
jump_list *save_positive_assertion_quit = common->positive_assertion_quit;
8972
2.14M
jump_list *save_accept = common->accept;
8973
2.14M
struct sljit_jump *jump;
8974
2.14M
struct sljit_jump *brajump = NULL;
8975
8976
/* Assert captures then. */
8977
2.14M
common->then_trap = NULL;
8978
8979
2.14M
if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
8980
127k
  {
8981
127k
  SLJIT_ASSERT(!conditional);
8982
127k
  bra = *cc;
8983
127k
  cc++;
8984
127k
  }
8985
8986
2.14M
private_data_ptr = PRIVATE_DATA(cc);
8987
2.14M
SLJIT_ASSERT(private_data_ptr != 0);
8988
2.14M
framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
8989
2.14M
backtrack->framesize = framesize;
8990
2.14M
backtrack->private_data_ptr = private_data_ptr;
8991
2.14M
opcode = *cc;
8992
2.14M
SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
8993
2.14M
found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
8994
2.14M
ccbegin = cc;
8995
2.14M
cc += GET(cc, 1);
8996
8997
2.14M
if (bra == OP_BRAMINZERO)
8998
36.9k
  {
8999
  /* This is a braminzero backtrack path. */
9000
36.9k
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9001
36.9k
  free_stack(common, 1);
9002
36.9k
  brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
9003
36.9k
  }
9004
9005
2.14M
if ((opcode == OP_ASSERTBACK || opcode == OP_ASSERTBACK_NOT) && find_vreverse(ccbegin))
9006
485k
  end_block_size = 3;
9007
9008
2.14M
if (framesize < 0)
9009
1.20M
  {
9010
1.20M
  extrasize = 1;
9011
1.20M
  if (bra == OP_BRA && !assert_needs_str_ptr_saving(ccbegin + 1 + LINK_SIZE))
9012
24.5k
    extrasize = 0;
9013
9014
1.20M
  extrasize += end_block_size;
9015
9016
1.20M
  if (needs_control_head)
9017
70.9k
    extrasize++;
9018
9019
1.20M
  if (framesize == no_frame)
9020
988k
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
9021
9022
1.20M
  if (extrasize > 0)
9023
1.17M
    allocate_stack(common, extrasize);
9024
9025
1.20M
  if (needs_control_head)
9026
70.9k
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9027
9028
1.20M
  if (extrasize > 0)
9029
1.17M
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9030
9031
1.20M
  if (needs_control_head)
9032
70.9k
    {
9033
70.9k
    SLJIT_ASSERT(extrasize == end_block_size + 2);
9034
70.9k
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
9035
70.9k
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1), TMP1, 0);
9036
70.9k
    }
9037
1.20M
  }
9038
946k
else
9039
946k
  {
9040
946k
  extrasize = (needs_control_head ? 3 : 2) + end_block_size;
9041
9042
946k
  OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0);
9043
946k
  allocate_stack(common, framesize + extrasize);
9044
9045
946k
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9046
946k
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
9047
946k
  if (needs_control_head)
9048
84.3k
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9049
946k
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9050
9051
946k
  if (needs_control_head)
9052
84.3k
    {
9053
84.3k
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 2), TMP1, 0);
9054
84.3k
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1), TMP2, 0);
9055
84.3k
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
9056
84.3k
    }
9057
861k
  else
9058
861k
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1), TMP1, 0);
9059
9060
946k
  init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize);
9061
946k
  }
9062
9063
2.14M
if (end_block_size > 0)
9064
485k
  {
9065
485k
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_END, 0);
9066
485k
  OP1(SLJIT_MOV, STR_END, 0, STR_PTR, 0);
9067
485k
  }
9068
9069
2.14M
memset(&altbacktrack, 0, sizeof(backtrack_common));
9070
2.14M
if (conditional || (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT))
9071
1.46M
  {
9072
  /* Control verbs cannot escape from these asserts. */
9073
1.46M
  local_quit_available = TRUE;
9074
1.46M
  common->restore_end_ptr = 0;
9075
1.46M
  common->local_quit_available = TRUE;
9076
1.46M
  common->quit_label = NULL;
9077
1.46M
  common->quit = NULL;
9078
1.46M
  }
9079
9080
2.14M
common->in_positive_assertion = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK);
9081
2.14M
common->positive_assertion_quit = NULL;
9082
9083
2.71M
while (1)
9084
2.71M
  {
9085
2.71M
  common->accept_label = NULL;
9086
2.71M
  common->accept = NULL;
9087
2.71M
  altbacktrack.top = NULL;
9088
2.71M
  altbacktrack.own_backtracks = NULL;
9089
9090
2.71M
  if (*ccbegin == OP_ALT && extrasize > 0)
9091
561k
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9092
9093
2.71M
  altbacktrack.cc = ccbegin;
9094
2.71M
  ccbegin += 1 + LINK_SIZE;
9095
9096
2.71M
  has_vreverse = (*ccbegin == OP_VREVERSE);
9097
2.71M
  if (*ccbegin == OP_REVERSE || has_vreverse)
9098
1.21M
    ccbegin = compile_reverse_matchingpath(common, ccbegin, &altbacktrack);
9099
9100
2.71M
  compile_matchingpath(common, ccbegin, cc, &altbacktrack);
9101
2.71M
  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9102
0
    {
9103
0
    if (local_quit_available)
9104
0
      {
9105
0
      common->local_quit_available = save_local_quit_available;
9106
0
      common->quit_label = save_quit_label;
9107
0
      common->quit = save_quit;
9108
0
      }
9109
0
    common->in_positive_assertion = save_in_positive_assertion;
9110
0
    common->restore_end_ptr = save_restore_end_ptr;
9111
0
    common->then_trap = save_then_trap;
9112
0
    common->accept_label = save_accept_label;
9113
0
    common->positive_assertion_quit = save_positive_assertion_quit;
9114
0
    common->accept = save_accept;
9115
0
    return NULL;
9116
0
    }
9117
9118
2.71M
  if (has_vreverse)
9119
614k
    {
9120
614k
    SLJIT_ASSERT(altbacktrack.top != NULL);
9121
614k
    add_jump(compiler, &altbacktrack.top->simple_backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
9122
614k
    }
9123
9124
2.71M
  common->accept_label = LABEL();
9125
2.71M
  if (common->accept != NULL)
9126
23.3k
    set_jumps(common->accept, common->accept_label);
9127
9128
  /* Reset stack. */
9129
2.71M
  if (framesize < 0)
9130
1.63M
    {
9131
1.63M
    if (framesize == no_frame)
9132
1.42M
      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9133
213k
    else if (extrasize > 0)
9134
190k
      free_stack(common, extrasize);
9135
9136
1.63M
    if (end_block_size > 0)
9137
784k
      OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1));
9138
9139
1.63M
    if (needs_control_head)
9140
132k
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
9141
1.63M
    }
9142
1.07M
  else
9143
1.07M
    {
9144
1.07M
    if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
9145
370k
      {
9146
      /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
9147
370k
      OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
9148
9149
370k
      if (end_block_size > 0)
9150
18.0k
        OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 2));
9151
9152
370k
      if (needs_control_head)
9153
105k
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
9154
370k
      }
9155
702k
    else
9156
702k
      {
9157
702k
      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9158
9159
702k
      if (end_block_size > 0)
9160
11.8k
        OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize + 1));
9161
9162
702k
      if (needs_control_head)
9163
9.69k
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 2));
9164
702k
      add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9165
702k
      OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
9166
702k
      }
9167
1.07M
    }
9168
9169
2.71M
  if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
9170
1.87M
    {
9171
    /* We know that STR_PTR was stored on the top of the stack. */
9172
1.87M
    if (conditional)
9173
16.6k
      {
9174
16.6k
      if (extrasize > 0)
9175
16.2k
        OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-end_block_size - (needs_control_head ? 2 : 1)));
9176
16.6k
      }
9177
1.86M
    else if (bra == OP_BRAZERO)
9178
47.7k
      {
9179
47.7k
      if (framesize < 0)
9180
31.0k
        OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
9181
16.6k
      else
9182
16.6k
        {
9183
16.6k
        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
9184
16.6k
        OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize));
9185
16.6k
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
9186
16.6k
        }
9187
47.7k
      OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9188
47.7k
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9189
47.7k
      }
9190
1.81M
    else if (framesize >= 0)
9191
685k
      {
9192
      /* For OP_BRA and OP_BRAMINZERO. */
9193
685k
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
9194
685k
      }
9195
1.87M
    }
9196
2.71M
  add_jump(compiler, found, JUMP(SLJIT_JUMP));
9197
9198
2.71M
  compile_backtrackingpath(common, altbacktrack.top);
9199
2.71M
  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9200
0
    {
9201
0
    if (local_quit_available)
9202
0
      {
9203
0
      common->local_quit_available = save_local_quit_available;
9204
0
      common->quit_label = save_quit_label;
9205
0
      common->quit = save_quit;
9206
0
      }
9207
0
    common->in_positive_assertion = save_in_positive_assertion;
9208
0
    common->restore_end_ptr = save_restore_end_ptr;
9209
0
    common->then_trap = save_then_trap;
9210
0
    common->accept_label = save_accept_label;
9211
0
    common->positive_assertion_quit = save_positive_assertion_quit;
9212
0
    common->accept = save_accept;
9213
0
    return NULL;
9214
0
    }
9215
2.71M
  set_jumps(altbacktrack.own_backtracks, LABEL());
9216
9217
2.71M
  if (*cc != OP_ALT)
9218
2.14M
    break;
9219
9220
563k
  ccbegin = cc;
9221
563k
  cc += GET(cc, 1);
9222
563k
  }
9223
9224
2.14M
if (local_quit_available)
9225
1.46M
  {
9226
1.46M
  SLJIT_ASSERT(common->positive_assertion_quit == NULL);
9227
  /* Makes the check less complicated below. */
9228
1.46M
  common->positive_assertion_quit = common->quit;
9229
1.46M
  }
9230
9231
/* None of them matched. */
9232
2.14M
if (common->positive_assertion_quit != NULL)
9233
243k
  {
9234
243k
  jump = JUMP(SLJIT_JUMP);
9235
243k
  set_jumps(common->positive_assertion_quit, LABEL());
9236
243k
  SLJIT_ASSERT(framesize != no_stack);
9237
243k
  if (framesize < 0)
9238
42.1k
    OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
9239
200k
  else
9240
200k
    {
9241
200k
    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9242
200k
    add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9243
200k
    OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (extrasize + 1) * sizeof(sljit_sw));
9244
200k
    }
9245
243k
  JUMPHERE(jump);
9246
243k
  }
9247
9248
2.14M
if (end_block_size > 0)
9249
485k
  OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9250
9251
2.14M
if (needs_control_head)
9252
155k
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1));
9253
9254
2.14M
if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
9255
704k
  {
9256
  /* Assert is failed. */
9257
704k
  if ((conditional && extrasize > 0) || bra == OP_BRAZERO)
9258
64.0k
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9259
9260
704k
  if (framesize < 0)
9261
414k
    {
9262
    /* The topmost item should be 0. */
9263
414k
    if (bra == OP_BRAZERO)
9264
34.4k
      {
9265
34.4k
      if (extrasize >= 2)
9266
11.3k
        free_stack(common, extrasize - 1);
9267
34.4k
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9268
34.4k
      }
9269
380k
    else if (extrasize > 0)
9270
358k
      free_stack(common, extrasize);
9271
414k
    }
9272
289k
  else
9273
289k
    {
9274
289k
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
9275
    /* The topmost item should be 0. */
9276
289k
    if (bra == OP_BRAZERO)
9277
13.9k
      {
9278
13.9k
      free_stack(common, framesize + extrasize - 1);
9279
13.9k
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9280
13.9k
      }
9281
275k
    else
9282
275k
      free_stack(common, framesize + extrasize);
9283
289k
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
9284
289k
    }
9285
704k
  jump = JUMP(SLJIT_JUMP);
9286
704k
  if (bra != OP_BRAZERO)
9287
656k
    add_jump(compiler, target, jump);
9288
9289
  /* Assert is successful. */
9290
704k
  set_jumps(tmp, LABEL());
9291
704k
  if (framesize < 0)
9292
414k
    {
9293
    /* We know that STR_PTR was stored on the top of the stack. */
9294
414k
    if (extrasize > 0)
9295
393k
      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
9296
9297
    /* Keep the STR_PTR on the top of the stack. */
9298
414k
    if (bra == OP_BRAZERO)
9299
34.4k
      {
9300
      /* This allocation is always successful. */
9301
34.4k
      OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9302
34.4k
      if (extrasize >= 2)
9303
11.3k
        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9304
34.4k
      }
9305
380k
    else if (bra == OP_BRAMINZERO)
9306
14.9k
      {
9307
14.9k
      OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9308
14.9k
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9309
14.9k
      }
9310
414k
    }
9311
289k
  else
9312
289k
    {
9313
289k
    if (bra == OP_BRA)
9314
270k
      {
9315
      /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
9316
270k
      OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
9317
270k
      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1));
9318
270k
      }
9319
18.9k
    else
9320
18.9k
      {
9321
      /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
9322
18.9k
      OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + end_block_size + 2) * sizeof(sljit_sw));
9323
9324
18.9k
      if (extrasize == 2 + end_block_size)
9325
16.4k
        {
9326
16.4k
        OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9327
16.4k
        if (bra == OP_BRAMINZERO)
9328
4.37k
          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9329
16.4k
        }
9330
2.48k
      else
9331
2.48k
        {
9332
2.48k
        SLJIT_ASSERT(extrasize == 3 + end_block_size);
9333
2.48k
        OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
9334
2.48k
        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
9335
2.48k
        }
9336
18.9k
      }
9337
289k
    }
9338
9339
704k
  if (bra == OP_BRAZERO)
9340
48.4k
    {
9341
48.4k
    backtrack->matchingpath = LABEL();
9342
48.4k
    SET_LABEL(jump, backtrack->matchingpath);
9343
48.4k
    }
9344
656k
  else if (bra == OP_BRAMINZERO)
9345
19.9k
    {
9346
19.9k
    JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
9347
19.9k
    JUMPHERE(brajump);
9348
19.9k
    SLJIT_ASSERT(framesize != 0);
9349
19.9k
    if (framesize > 0)
9350
4.99k
      {
9351
4.99k
      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9352
4.99k
      add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9353
4.99k
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
9354
4.99k
      OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
9355
4.99k
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
9356
4.99k
      }
9357
19.9k
    set_jumps(backtrack->common.own_backtracks, LABEL());
9358
19.9k
    }
9359
704k
  }
9360
1.44M
else
9361
1.44M
  {
9362
  /* AssertNot is successful. */
9363
1.44M
  if (framesize < 0)
9364
786k
    {
9365
786k
    if (extrasize > 0)
9366
783k
      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9367
9368
786k
    if (bra != OP_BRA)
9369
38.1k
      {
9370
38.1k
      if (extrasize >= 2)
9371
1.04k
        free_stack(common, extrasize - 1);
9372
38.1k
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9373
38.1k
      }
9374
748k
    else if (extrasize > 0)
9375
745k
      free_stack(common, extrasize);
9376
786k
    }
9377
656k
  else
9378
656k
    {
9379
656k
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9380
656k
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
9381
    /* The topmost item should be 0. */
9382
656k
    if (bra != OP_BRA)
9383
20.6k
      {
9384
20.6k
      free_stack(common, framesize + extrasize - 1);
9385
20.6k
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9386
20.6k
      }
9387
635k
    else
9388
635k
      free_stack(common, framesize + extrasize);
9389
656k
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
9390
656k
    }
9391
9392
1.44M
  if (bra == OP_BRAZERO)
9393
41.8k
    backtrack->matchingpath = LABEL();
9394
1.40M
  else if (bra == OP_BRAMINZERO)
9395
16.9k
    {
9396
16.9k
    JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
9397
16.9k
    JUMPHERE(brajump);
9398
16.9k
    }
9399
9400
1.44M
  if (bra != OP_BRA)
9401
58.7k
    {
9402
58.7k
    SLJIT_ASSERT(found == &backtrack->common.own_backtracks);
9403
58.7k
    set_jumps(backtrack->common.own_backtracks, LABEL());
9404
58.7k
    backtrack->common.own_backtracks = NULL;
9405
58.7k
    }
9406
1.44M
  }
9407
9408
2.14M
if (local_quit_available)
9409
1.46M
  {
9410
1.46M
  common->local_quit_available = save_local_quit_available;
9411
1.46M
  common->quit_label = save_quit_label;
9412
1.46M
  common->quit = save_quit;
9413
1.46M
  }
9414
9415
2.14M
common->in_positive_assertion = save_in_positive_assertion;
9416
2.14M
common->restore_end_ptr = save_restore_end_ptr;
9417
2.14M
common->then_trap = save_then_trap;
9418
2.14M
common->accept_label = save_accept_label;
9419
2.14M
common->positive_assertion_quit = save_positive_assertion_quit;
9420
2.14M
common->accept = save_accept;
9421
2.14M
return cc + 1 + LINK_SIZE;
9422
2.14M
}
9423
9424
static SLJIT_INLINE void match_once_common(compiler_common *common, PCRE2_UCHAR ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
9425
215k
{
9426
215k
DEFINE_COMPILER;
9427
215k
int stacksize;
9428
9429
215k
if (framesize < 0)
9430
154k
  {
9431
154k
  if (framesize == no_frame)
9432
130k
    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9433
24.6k
  else
9434
24.6k
    {
9435
24.6k
    stacksize = needs_control_head ? 1 : 0;
9436
24.6k
    if (ket != OP_KET || has_alternatives)
9437
12.4k
      stacksize++;
9438
9439
24.6k
    if (stacksize > 0)
9440
12.4k
      free_stack(common, stacksize);
9441
24.6k
    }
9442
9443
154k
  if (needs_control_head)
9444
9.57k
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? STACK(-2) : STACK(-1));
9445
9446
  /* TMP2 which is set here used by OP_KETRMAX below. */
9447
154k
  if (ket == OP_KETRMAX)
9448
69.3k
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
9449
85.6k
  else if (ket == OP_KETRMIN)
9450
31.2k
    {
9451
    /* Move the STR_PTR to the private_data_ptr. */
9452
31.2k
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
9453
31.2k
    }
9454
154k
  }
9455
60.0k
else
9456
60.0k
  {
9457
60.0k
  stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
9458
60.0k
  OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
9459
60.0k
  if (needs_control_head)
9460
14.2k
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
9461
9462
60.0k
  if (ket == OP_KETRMAX)
9463
18.8k
    {
9464
    /* TMP2 which is set here used by OP_KETRMAX below. */
9465
18.8k
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9466
18.8k
    }
9467
60.0k
  }
9468
215k
if (needs_control_head)
9469
23.8k
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
9470
215k
}
9471
9472
static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
9473
3.06M
{
9474
3.06M
DEFINE_COMPILER;
9475
9476
3.06M
if (common->capture_last_ptr != 0)
9477
411k
  {
9478
411k
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
9479
411k
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
9480
411k
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
9481
411k
  stacksize++;
9482
411k
  }
9483
3.06M
if (!is_optimized_cbracket(common, offset >> 1))
9484
1.04M
  {
9485
1.04M
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9486
1.04M
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9487
1.04M
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
9488
1.04M
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9489
1.04M
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
9490
1.04M
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
9491
1.04M
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
9492
1.04M
  stacksize += 2;
9493
1.04M
  }
9494
3.06M
return stacksize;
9495
3.06M
}
9496
9497
static PCRE2_SPTR SLJIT_FUNC do_script_run(PCRE2_SPTR ptr, PCRE2_SPTR endptr)
9498
0
{
9499
0
  if (PRIV(script_run)(ptr, endptr, FALSE))
9500
0
    return endptr;
9501
0
  return NULL;
9502
0
}
9503
9504
#ifdef SUPPORT_UNICODE
9505
9506
static PCRE2_SPTR SLJIT_FUNC do_script_run_utf(PCRE2_SPTR ptr, PCRE2_SPTR endptr)
9507
0
{
9508
0
  if (PRIV(script_run)(ptr, endptr, TRUE))
9509
0
    return endptr;
9510
0
  return NULL;
9511
0
}
9512
9513
#endif /* SUPPORT_UNICODE */
9514
9515
static void match_script_run_common(compiler_common *common, int private_data_ptr, backtrack_common *parent)
9516
753k
{
9517
753k
DEFINE_COMPILER;
9518
9519
753k
SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
9520
9521
753k
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9522
753k
#ifdef SUPPORT_UNICODE
9523
753k
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,
9524
753k
  common->utf ? SLJIT_FUNC_ADDR(do_script_run_utf) : SLJIT_FUNC_ADDR(do_script_run));
9525
#else
9526
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_script_run));
9527
#endif
9528
9529
753k
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
9530
753k
add_jump(compiler, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
9531
753k
}
9532
9533
/*
9534
  Handling bracketed expressions is probably the most complex part.
9535
9536
  Stack layout naming characters:
9537
    S - Push the current STR_PTR
9538
    0 - Push a 0 (NULL)
9539
    A - Push the current STR_PTR. Needed for restoring the STR_PTR
9540
        before the next alternative. Not pushed if there are no alternatives.
9541
    M - Any values pushed by the current alternative. Can be empty, or anything.
9542
    C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
9543
    L - Push the previous local (pointed by localptr) to the stack
9544
   () - opional values stored on the stack
9545
  ()* - optonal, can be stored multiple times
9546
9547
  The following list shows the regular expression templates, their PCRE byte codes
9548
  and stack layout supported by pcre-sljit.
9549
9550
  (?:)                     OP_BRA     | OP_KET                A M
9551
  ()                       OP_CBRA    | OP_KET                C M
9552
  (?:)+                    OP_BRA     | OP_KETRMAX        0   A M S   ( A M S )*
9553
                           OP_SBRA    | OP_KETRMAX        0   L M S   ( L M S )*
9554
  (?:)+?                   OP_BRA     | OP_KETRMIN        0   A M S   ( A M S )*
9555
                           OP_SBRA    | OP_KETRMIN        0   L M S   ( L M S )*
9556
  ()+                      OP_CBRA    | OP_KETRMAX        0   C M S   ( C M S )*
9557
                           OP_SCBRA   | OP_KETRMAX        0   C M S   ( C M S )*
9558
  ()+?                     OP_CBRA    | OP_KETRMIN        0   C M S   ( C M S )*
9559
                           OP_SCBRA   | OP_KETRMIN        0   C M S   ( C M S )*
9560
  (?:)?    OP_BRAZERO    | OP_BRA     | OP_KET            S ( A M 0 )
9561
  (?:)??   OP_BRAMINZERO | OP_BRA     | OP_KET            S ( A M 0 )
9562
  ()?      OP_BRAZERO    | OP_CBRA    | OP_KET            S ( C M 0 )
9563
  ()??     OP_BRAMINZERO | OP_CBRA    | OP_KET            S ( C M 0 )
9564
  (?:)*    OP_BRAZERO    | OP_BRA     | OP_KETRMAX      S 0 ( A M S )*
9565
           OP_BRAZERO    | OP_SBRA    | OP_KETRMAX      S 0 ( L M S )*
9566
  (?:)*?   OP_BRAMINZERO | OP_BRA     | OP_KETRMIN      S 0 ( A M S )*
9567
           OP_BRAMINZERO | OP_SBRA    | OP_KETRMIN      S 0 ( L M S )*
9568
  ()*      OP_BRAZERO    | OP_CBRA    | OP_KETRMAX      S 0 ( C M S )*
9569
           OP_BRAZERO    | OP_SCBRA   | OP_KETRMAX      S 0 ( C M S )*
9570
  ()*?     OP_BRAMINZERO | OP_CBRA    | OP_KETRMIN      S 0 ( C M S )*
9571
           OP_BRAMINZERO | OP_SCBRA   | OP_KETRMIN      S 0 ( C M S )*
9572
9573
9574
  Stack layout naming characters:
9575
    A - Push the alternative index (starting from 0) on the stack.
9576
        Not pushed if there is no alternatives.
9577
    M - Any values pushed by the current alternative. Can be empty, or anything.
9578
9579
  The next list shows the possible content of a bracket:
9580
  (|)     OP_*BRA    | OP_ALT ...         M A
9581
  (?()|)  OP_*COND   | OP_ALT             M A
9582
  (?>|)   OP_ONCE    | OP_ALT ...         [stack trace] M A
9583
                                          Or nothing, if trace is unnecessary
9584
*/
9585
9586
static PCRE2_SPTR compile_bracket_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9587
7.39M
{
9588
7.39M
DEFINE_COMPILER;
9589
7.39M
backtrack_common *backtrack;
9590
7.39M
PCRE2_UCHAR opcode;
9591
7.39M
int private_data_ptr = 0;
9592
7.39M
int offset = 0;
9593
7.39M
int i, stacksize;
9594
7.39M
int repeat_ptr = 0, repeat_length = 0;
9595
7.39M
int repeat_type = 0, repeat_count = 0;
9596
7.39M
PCRE2_SPTR ccbegin;
9597
7.39M
PCRE2_SPTR matchingpath;
9598
7.39M
PCRE2_SPTR slot;
9599
7.39M
PCRE2_UCHAR bra = OP_BRA;
9600
7.39M
PCRE2_UCHAR ket;
9601
7.39M
assert_backtrack *assert;
9602
7.39M
BOOL has_alternatives;
9603
7.39M
BOOL needs_control_head = FALSE;
9604
7.39M
BOOL has_vreverse = FALSE;
9605
7.39M
struct sljit_jump *jump;
9606
7.39M
struct sljit_jump *skip;
9607
7.39M
jump_list *jumplist;
9608
7.39M
struct sljit_label *rmax_label = NULL;
9609
7.39M
struct sljit_jump *braminzero = NULL;
9610
9611
7.39M
PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
9612
9613
7.39M
if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
9614
1.57M
  {
9615
1.57M
  bra = *cc;
9616
1.57M
  cc++;
9617
1.57M
  opcode = *cc;
9618
1.57M
  }
9619
9620
7.39M
opcode = *cc;
9621
7.39M
ccbegin = cc;
9622
7.39M
matchingpath = bracketend(cc) - 1 - LINK_SIZE;
9623
7.39M
ket = *matchingpath;
9624
7.39M
if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
9625
100k
  {
9626
100k
  repeat_ptr = PRIVATE_DATA(matchingpath);
9627
100k
  repeat_length = PRIVATE_DATA(matchingpath + 1);
9628
100k
  repeat_type = PRIVATE_DATA(matchingpath + 2);
9629
100k
  repeat_count = PRIVATE_DATA(matchingpath + 3);
9630
100k
  SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
9631
100k
  if (repeat_type == OP_UPTO)
9632
61.0k
    ket = OP_KETRMAX;
9633
100k
  if (repeat_type == OP_MINUPTO)
9634
26.3k
    ket = OP_KETRMIN;
9635
100k
  }
9636
9637
7.39M
matchingpath = ccbegin + 1 + LINK_SIZE;
9638
7.39M
SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
9639
7.39M
SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
9640
7.39M
cc += GET(cc, 1);
9641
9642
7.39M
has_alternatives = *cc == OP_ALT;
9643
7.39M
if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
9644
55.4k
  {
9645
55.4k
  SLJIT_COMPILE_ASSERT(OP_DNRREF == OP_RREF + 1 && OP_FALSE == OP_RREF + 2 && OP_TRUE == OP_RREF + 3,
9646
55.4k
    compile_time_checks_must_be_grouped_together);
9647
55.4k
  has_alternatives = ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL) ? FALSE : TRUE;
9648
55.4k
  }
9649
9650
7.39M
if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
9651
0
  opcode = OP_SCOND;
9652
9653
7.39M
if (opcode == OP_CBRA || opcode == OP_SCBRA)
9654
2.88M
  {
9655
  /* Capturing brackets has a pre-allocated space. */
9656
2.88M
  offset = GET2(ccbegin, 1 + LINK_SIZE);
9657
2.88M
  if (!is_optimized_cbracket(common, offset))
9658
1.02M
    {
9659
1.02M
    private_data_ptr = OVECTOR_PRIV(offset);
9660
1.02M
    offset <<= 1;
9661
1.02M
    }
9662
1.85M
  else
9663
1.85M
    {
9664
1.85M
    offset <<= 1;
9665
1.85M
    private_data_ptr = OVECTOR(offset);
9666
1.85M
    }
9667
2.88M
  BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
9668
2.88M
  matchingpath += IMM2_SIZE;
9669
2.88M
  }
9670
4.51M
else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_ONCE
9671
4.51M
         || opcode == OP_ASSERT_SCS || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
9672
3.10M
  {
9673
  /* Other brackets simply allocate the next entry. */
9674
3.10M
  private_data_ptr = PRIVATE_DATA(ccbegin);
9675
3.10M
  SLJIT_ASSERT(private_data_ptr != 0);
9676
3.10M
  BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
9677
3.10M
  if (opcode == OP_ONCE)
9678
138k
    BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
9679
3.10M
  }
9680
9681
/* Instructions before the first alternative. */
9682
7.39M
stacksize = 0;
9683
7.39M
if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
9684
648k
  stacksize++;
9685
7.39M
if (bra == OP_BRAZERO)
9686
1.04M
  stacksize++;
9687
9688
7.39M
if (stacksize > 0)
9689
1.38M
  allocate_stack(common, stacksize);
9690
9691
7.39M
stacksize = 0;
9692
7.39M
if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
9693
648k
  {
9694
648k
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
9695
648k
  stacksize++;
9696
648k
  }
9697
9698
7.39M
if (bra == OP_BRAZERO)
9699
1.04M
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
9700
9701
7.39M
if (bra == OP_BRAMINZERO)
9702
531k
  {
9703
  /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
9704
531k
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9705
531k
  if (ket != OP_KETRMIN)
9706
376k
    {
9707
376k
    free_stack(common, 1);
9708
376k
    braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
9709
376k
    }
9710
154k
  else if (opcode == OP_ONCE || opcode >= OP_SBRA)
9711
125k
    {
9712
125k
    jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
9713
125k
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9714
    /* Nothing stored during the first run. */
9715
125k
    skip = JUMP(SLJIT_JUMP);
9716
125k
    JUMPHERE(jump);
9717
    /* Checking zero-length iteration. */
9718
125k
    if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
9719
122k
      {
9720
      /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
9721
122k
      braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9722
122k
      }
9723
3.11k
    else
9724
3.11k
      {
9725
      /* Except when the whole stack frame must be saved. */
9726
3.11k
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9727
3.11k
      braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-BACKTRACK_AS(bracket_backtrack)->u.framesize - 2));
9728
3.11k
      }
9729
125k
    JUMPHERE(skip);
9730
125k
    }
9731
29.3k
  else
9732
29.3k
    {
9733
29.3k
    jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
9734
29.3k
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9735
29.3k
    JUMPHERE(jump);
9736
29.3k
    }
9737
531k
  }
9738
9739
7.39M
if (repeat_type != 0)
9740
100k
  {
9741
100k
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, repeat_count);
9742
100k
  if (repeat_type == OP_EXACT)
9743
13.2k
    rmax_label = LABEL();
9744
100k
  }
9745
9746
7.39M
if (ket == OP_KETRMIN)
9747
243k
  BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
9748
9749
7.39M
if (ket == OP_KETRMAX)
9750
559k
  {
9751
559k
  rmax_label = LABEL();
9752
559k
  if (has_alternatives && opcode >= OP_BRA && opcode < OP_SBRA && repeat_type == 0)
9753
16.1k
    BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
9754
559k
  }
9755
9756
/* Handling capturing brackets and alternatives. */
9757
7.39M
if (opcode == OP_ONCE)
9758
138k
  {
9759
138k
  stacksize = 0;
9760
138k
  if (needs_control_head)
9761
21.3k
    {
9762
21.3k
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9763
21.3k
    stacksize++;
9764
21.3k
    }
9765
9766
138k
  if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
9767
96.4k
    {
9768
    /* Neither capturing brackets nor recursions are found in the block. */
9769
96.4k
    if (ket == OP_KETRMIN)
9770
18.5k
      {
9771
18.5k
      stacksize += 2;
9772
18.5k
      if (!needs_control_head)
9773
17.9k
        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9774
18.5k
      }
9775
77.8k
    else
9776
77.8k
      {
9777
77.8k
      if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
9778
57.9k
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
9779
77.8k
      if (ket == OP_KETRMAX || has_alternatives)
9780
47.9k
        stacksize++;
9781
77.8k
      }
9782
9783
96.4k
    if (stacksize > 0)
9784
72.6k
      allocate_stack(common, stacksize);
9785
9786
96.4k
    stacksize = 0;
9787
96.4k
    if (needs_control_head)
9788
7.84k
      {
9789
7.84k
      stacksize++;
9790
7.84k
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
9791
7.84k
      }
9792
9793
96.4k
    if (ket == OP_KETRMIN)
9794
18.5k
      {
9795
18.5k
      if (needs_control_head)
9796
638
        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9797
18.5k
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
9798
18.5k
      if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
9799
13.8k
        OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
9800
18.5k
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
9801
18.5k
      }
9802
77.8k
    else if (ket == OP_KETRMAX || has_alternatives)
9803
47.9k
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
9804
96.4k
    }
9805
42.1k
  else
9806
42.1k
    {
9807
42.1k
    if (ket != OP_KET || has_alternatives)
9808
19.4k
      stacksize++;
9809
9810
42.1k
    stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
9811
42.1k
    allocate_stack(common, stacksize);
9812
9813
42.1k
    if (needs_control_head)
9814
13.4k
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
9815
9816
42.1k
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9817
42.1k
    OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
9818
9819
42.1k
    stacksize = needs_control_head ? 1 : 0;
9820
42.1k
    if (ket != OP_KET || has_alternatives)
9821
19.4k
      {
9822
19.4k
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
9823
19.4k
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
9824
19.4k
      stacksize++;
9825
19.4k
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
9826
19.4k
      }
9827
22.7k
    else
9828
22.7k
      {
9829
22.7k
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
9830
22.7k
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
9831
22.7k
      }
9832
42.1k
    init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1);
9833
42.1k
    }
9834
138k
  }
9835
7.25M
else if (opcode == OP_CBRA || opcode == OP_SCBRA)
9836
2.88M
  {
9837
  /* Saving the previous values. */
9838
2.88M
  if (is_optimized_cbracket(common, offset >> 1))
9839
1.85M
    {
9840
1.85M
    SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
9841
1.85M
    allocate_stack(common, 2);
9842
1.85M
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9843
1.85M
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
9844
1.85M
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
9845
1.85M
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
9846
1.85M
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
9847
1.85M
    }
9848
1.02M
  else
9849
1.02M
    {
9850
1.02M
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9851
1.02M
    allocate_stack(common, 1);
9852
1.02M
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
9853
1.02M
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
9854
1.02M
    }
9855
2.88M
  }
9856
4.37M
else if (opcode == OP_ASSERTBACK_NA && PRIVATE_DATA(ccbegin + 1))
9857
99.1k
  {
9858
99.1k
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9859
99.1k
  allocate_stack(common, 4);
9860
99.1k
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
9861
99.1k
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
9862
99.1k
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), STR_END, 0);
9863
99.1k
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
9864
99.1k
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9865
99.1k
  OP1(SLJIT_MOV, STR_END, 0, STR_PTR, 0);
9866
9867
99.1k
  has_vreverse = (*matchingpath == OP_VREVERSE);
9868
99.1k
  if (*matchingpath == OP_REVERSE || has_vreverse)
9869
94.6k
    matchingpath = compile_reverse_matchingpath(common, matchingpath, backtrack);
9870
99.1k
  }
9871
4.27M
else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
9872
2.86M
  {
9873
  /* Saving the previous value. */
9874
2.86M
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9875
2.86M
  allocate_stack(common, 1);
9876
2.86M
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
9877
2.86M
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
9878
9879
2.86M
  if (*matchingpath == OP_REVERSE)
9880
29.0k
    matchingpath = compile_reverse_matchingpath(common, matchingpath, backtrack);
9881
2.86M
  }
9882
1.40M
else if (opcode == OP_ASSERT_SCS)
9883
0
  {
9884
  /* Nested scs blocks will not update this variable. */
9885
0
  if (common->restore_end_ptr == 0)
9886
0
    common->restore_end_ptr = private_data_ptr + sizeof(sljit_sw);
9887
9888
0
  if (*matchingpath == OP_CREF && (matchingpath[1 + IMM2_SIZE] != OP_CREF && matchingpath[1 + IMM2_SIZE] != OP_DNCREF))
9889
0
    {
9890
    /* Optimized case for a single capture reference. */
9891
0
    i = OVECTOR(GET2(matchingpath, 1) << 1);
9892
9893
0
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), i);
9894
9895
0
    add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.no_capture), CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9896
0
    matchingpath += 1 + IMM2_SIZE;
9897
9898
0
    allocate_stack(common, has_alternatives ? 3 : 2);
9899
9900
0
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9901
0
    OP1(SLJIT_MOV, SLJIT_TMP_DEST_REG, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
9902
0
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), STR_END, 0);
9903
0
    OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), i + sizeof(sljit_sw));
9904
0
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
9905
0
    OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0);
9906
0
    }
9907
0
  else
9908
0
    {
9909
0
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
9910
0
    jumplist = NULL;
9911
9912
0
    while (TRUE)
9913
0
      {
9914
0
      if (*matchingpath == OP_CREF)
9915
0
        {
9916
0
        sljit_get_local_base(compiler, TMP2, 0, OVECTOR(GET2(matchingpath, 1) << 1));
9917
0
        matchingpath += 1 + IMM2_SIZE;
9918
0
        }
9919
0
      else
9920
0
        {
9921
0
        SLJIT_ASSERT(*matchingpath == OP_DNCREF);
9922
9923
0
        i = GET2(matchingpath, 1 + IMM2_SIZE);
9924
0
        slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
9925
9926
0
        while (i-- > 1)
9927
0
          {
9928
0
          sljit_get_local_base(compiler, TMP2, 0, OVECTOR(GET2(slot, 0) << 1));
9929
0
          add_jump(compiler, &jumplist, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), 0, TMP1, 0));
9930
0
          slot += common->name_entry_size;
9931
0
          }
9932
9933
0
        sljit_get_local_base(compiler, TMP2, 0, OVECTOR(GET2(slot, 0) << 1));
9934
0
        matchingpath += 1 + 2 * IMM2_SIZE;
9935
0
        }
9936
9937
0
      if (*matchingpath != OP_CREF && *matchingpath != OP_DNCREF)
9938
0
        break;
9939
9940
0
      add_jump(compiler, &jumplist, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), 0, TMP1, 0));
9941
0
      }
9942
9943
0
    add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.no_capture),
9944
0
      CMP(SLJIT_EQUAL, SLJIT_MEM1(TMP2), 0, TMP1, 0));
9945
9946
0
    set_jumps(jumplist, LABEL());
9947
9948
0
    allocate_stack(common, has_alternatives ? 3 : 2);
9949
9950
0
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9951
0
    OP1(SLJIT_MOV, SLJIT_TMP_DEST_REG, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
9952
0
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
9953
0
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), 0);
9954
0
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), STR_END, 0);
9955
0
    OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9956
0
    }
9957
9958
0
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
9959
0
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_TMP_DEST_REG, 0);
9960
9961
0
  if (has_alternatives)
9962
0
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0);
9963
0
  }
9964
1.40M
else if (has_alternatives)
9965
104k
  {
9966
  /* Pushing the starting string pointer. */
9967
104k
  allocate_stack(common, 1);
9968
104k
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9969
104k
  }
9970
9971
/* Generating code for the first alternative. */
9972
7.39M
if (opcode == OP_COND || opcode == OP_SCOND)
9973
55.4k
  {
9974
55.4k
  if (*matchingpath == OP_CREF)
9975
8.30k
    {
9976
8.30k
    SLJIT_ASSERT(has_alternatives);
9977
8.30k
    add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.no_capture),
9978
8.30k
      CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9979
8.30k
    matchingpath += 1 + IMM2_SIZE;
9980
8.30k
    }
9981
47.1k
  else if (*matchingpath == OP_DNCREF)
9982
14.5k
    {
9983
14.5k
    SLJIT_ASSERT(has_alternatives);
9984
9985
14.5k
    i = GET2(matchingpath, 1 + IMM2_SIZE);
9986
14.5k
    slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
9987
14.5k
    OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
9988
14.5k
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
9989
14.5k
    OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
9990
14.5k
    slot += common->name_entry_size;
9991
14.5k
    i--;
9992
157k
    while (i-- > 0)
9993
142k
      {
9994
142k
      OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
9995
142k
      OP2(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, STR_PTR, 0);
9996
142k
      slot += common->name_entry_size;
9997
142k
      }
9998
14.5k
    OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
9999
14.5k
    add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.no_capture), JUMP(SLJIT_ZERO));
10000
14.5k
    matchingpath += 1 + 2 * IMM2_SIZE;
10001
14.5k
    }
10002
32.6k
  else if ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL)
10003
5.83k
    {
10004
    /* Never has other case. */
10005
5.83k
    BACKTRACK_AS(bracket_backtrack)->u.no_capture = NULL;
10006
5.83k
    SLJIT_ASSERT(!has_alternatives);
10007
10008
5.83k
    if (*matchingpath == OP_TRUE)
10009
0
      {
10010
0
      stacksize = 1;
10011
0
      matchingpath++;
10012
0
      }
10013
5.83k
    else if (*matchingpath == OP_FALSE || *matchingpath == OP_FAIL)
10014
3.14k
      stacksize = 0;
10015
2.69k
    else if (*matchingpath == OP_RREF)
10016
2.69k
      {
10017
2.69k
      stacksize = GET2(matchingpath, 1);
10018
2.69k
      if (common->currententry == NULL)
10019
1.91k
        stacksize = 0;
10020
771
      else if (stacksize == RREF_ANY)
10021
420
        stacksize = 1;
10022
351
      else if (common->currententry->start == 0)
10023
248
        stacksize = stacksize == 0;
10024
103
      else
10025
103
        stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
10026
10027
2.69k
      if (stacksize != 0)
10028
420
        matchingpath += 1 + IMM2_SIZE;
10029
2.69k
      }
10030
0
    else
10031
0
      {
10032
0
      if (common->currententry == NULL || common->currententry->start == 0)
10033
0
        stacksize = 0;
10034
0
      else
10035
0
        {
10036
0
        stacksize = GET2(matchingpath, 1 + IMM2_SIZE);
10037
0
        slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
10038
0
        i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
10039
0
        while (stacksize > 0)
10040
0
          {
10041
0
          if ((int)GET2(slot, 0) == i)
10042
0
            break;
10043
0
          slot += common->name_entry_size;
10044
0
          stacksize--;
10045
0
          }
10046
0
        }
10047
10048
0
      if (stacksize != 0)
10049
0
        matchingpath += 1 + 2 * IMM2_SIZE;
10050
0
      }
10051
10052
      /* The stacksize == 0 is a common "else" case. */
10053
5.83k
      if (stacksize == 0)
10054
5.41k
        {
10055
5.41k
        if (*cc == OP_ALT)
10056
772
          {
10057
772
          matchingpath = cc + 1 + LINK_SIZE;
10058
772
          cc += GET(cc, 1);
10059
772
          }
10060
4.64k
        else
10061
4.64k
          matchingpath = cc;
10062
5.41k
        }
10063
5.83k
    }
10064
26.7k
  else
10065
26.7k
    {
10066
26.7k
    SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
10067
    /* Similar code as PUSH_BACKTRACK macro. */
10068
26.7k
    assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
10069
26.7k
    if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10070
0
      return NULL;
10071
26.7k
    memset(assert, 0, sizeof(assert_backtrack));
10072
26.7k
    assert->common.cc = matchingpath;
10073
26.7k
    BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
10074
26.7k
    matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
10075
26.7k
    if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10076
0
      return NULL;
10077
26.7k
    }
10078
55.4k
  }
10079
10080
7.39M
compile_matchingpath(common, matchingpath, cc, backtrack);
10081
7.39M
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10082
0
  return NULL;
10083
10084
7.39M
switch (opcode)
10085
7.39M
  {
10086
128k
  case OP_ASSERTBACK_NA:
10087
128k
    if (has_vreverse)
10088
90.0k
      {
10089
90.0k
      SLJIT_ASSERT(backtrack->top != NULL && PRIVATE_DATA(ccbegin + 1));
10090
90.0k
      add_jump(compiler, &backtrack->top->simple_backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
10091
90.0k
      }
10092
10093
128k
    if (PRIVATE_DATA(ccbegin + 1))
10094
99.1k
      OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
10095
128k
    break;
10096
138k
  case OP_ONCE:
10097
138k
    match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
10098
138k
    break;
10099
421k
  case OP_SCRIPT_RUN:
10100
421k
    match_script_run_common(common, private_data_ptr, backtrack);
10101
421k
    break;
10102
7.39M
  }
10103
10104
7.39M
stacksize = 0;
10105
7.39M
if (repeat_type == OP_MINUPTO)
10106
26.3k
  {
10107
  /* We need to preserve the counter. TMP2 will be used below. */
10108
26.3k
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
10109
26.3k
  stacksize++;
10110
26.3k
  }
10111
7.39M
if (ket != OP_KET || bra != OP_BRA)
10112
1.91M
  stacksize++;
10113
7.39M
if (offset != 0)
10114
2.88M
  {
10115
2.88M
  if (common->capture_last_ptr != 0)
10116
400k
    stacksize++;
10117
2.88M
  if (!is_optimized_cbracket(common, offset >> 1))
10118
1.02M
    stacksize += 2;
10119
2.88M
  }
10120
7.39M
if (has_alternatives && opcode != OP_ONCE)
10121
746k
  stacksize++;
10122
10123
7.39M
if (stacksize > 0)
10124
3.44M
  allocate_stack(common, stacksize);
10125
10126
7.39M
stacksize = 0;
10127
7.39M
if (repeat_type == OP_MINUPTO)
10128
26.3k
  {
10129
  /* TMP2 was set above. */
10130
26.3k
  OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
10131
26.3k
  stacksize++;
10132
26.3k
  }
10133
10134
7.39M
if (ket != OP_KET || bra != OP_BRA)
10135
1.91M
  {
10136
1.91M
  if (ket != OP_KET)
10137
803k
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10138
1.10M
  else
10139
1.10M
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10140
1.91M
  stacksize++;
10141
1.91M
  }
10142
10143
7.39M
if (offset != 0)
10144
2.88M
  stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
10145
10146
/* Skip and count the other alternatives. */
10147
7.39M
i = 1;
10148
8.55M
while (*cc == OP_ALT)
10149
1.15M
  {
10150
1.15M
  cc += GET(cc, 1);
10151
1.15M
  i++;
10152
1.15M
  }
10153
10154
7.39M
if (has_alternatives)
10155
805k
  {
10156
805k
  if (opcode != OP_ONCE)
10157
746k
    {
10158
746k
    if (i <= 3)
10159
654k
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10160
92.5k
    else
10161
92.5k
      BACKTRACK_AS(bracket_backtrack)->matching_mov_addr = sljit_emit_op_addr(compiler, SLJIT_MOV_ADDR, SLJIT_MEM1(STACK_TOP), STACK(stacksize));
10162
746k
    }
10163
805k
  if (ket != OP_KETRMAX)
10164
741k
    BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
10165
805k
  }
10166
10167
/* Must be after the matchingpath label. */
10168
7.39M
if (offset != 0 && is_optimized_cbracket(common, offset >> 1))
10169
1.85M
  {
10170
1.85M
  SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
10171
1.85M
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10172
1.85M
  }
10173
5.53M
else switch (opcode)
10174
5.53M
  {
10175
2.30M
  case OP_ASSERT_NA:
10176
2.30M
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10177
2.30M
    break;
10178
0
  case OP_ASSERT_SCS:
10179
0
    OP1(SLJIT_MOV, TMP1, 0, STR_END, 0);
10180
0
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10181
0
    OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
10182
0
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), TMP1, 0);
10183
10184
    /* Nested scs blocks will not update this variable. */
10185
0
    if (common->restore_end_ptr == private_data_ptr + SSIZE_OF(sw))
10186
0
      common->restore_end_ptr = 0;
10187
0
    break;
10188
5.53M
  }
10189
10190
7.39M
if (ket == OP_KETRMAX)
10191
559k
  {
10192
559k
  if (repeat_type != 0)
10193
61.0k
    {
10194
61.0k
    if (has_alternatives)
10195
2.29k
      BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
10196
61.0k
    OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10197
61.0k
    JUMPTO(SLJIT_NOT_ZERO, rmax_label);
10198
    /* Drop STR_PTR for greedy plus quantifier. */
10199
61.0k
    if (opcode != OP_ONCE)
10200
60.6k
      free_stack(common, 1);
10201
61.0k
    }
10202
498k
  else if (opcode < OP_BRA || opcode >= OP_SBRA)
10203
367k
    {
10204
367k
    if (has_alternatives)
10205
45.9k
      BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
10206
10207
    /* Checking zero-length iteration. */
10208
367k
    if (opcode != OP_ONCE)
10209
320k
      {
10210
      /* This case includes opcodes such as OP_SCRIPT_RUN. */
10211
320k
      CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label);
10212
      /* Drop STR_PTR for greedy plus quantifier. */
10213
320k
      if (bra != OP_BRAZERO)
10214
122k
        free_stack(common, 1);
10215
320k
      }
10216
46.4k
    else
10217
      /* TMP2 must contain the starting STR_PTR. */
10218
46.4k
      CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
10219
367k
    }
10220
131k
  else
10221
131k
    JUMPTO(SLJIT_JUMP, rmax_label);
10222
559k
  BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
10223
559k
  }
10224
10225
7.39M
if (repeat_type == OP_EXACT)
10226
13.2k
  {
10227
13.2k
  count_match(common);
10228
13.2k
  OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10229
13.2k
  JUMPTO(SLJIT_NOT_ZERO, rmax_label);
10230
13.2k
  }
10231
7.38M
else if (repeat_type == OP_UPTO)
10232
61.0k
  {
10233
  /* We need to preserve the counter. */
10234
61.0k
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
10235
61.0k
  allocate_stack(common, 1);
10236
61.0k
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10237
61.0k
  }
10238
10239
7.39M
if (bra == OP_BRAZERO)
10240
1.04M
  BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
10241
10242
7.39M
if (bra == OP_BRAMINZERO)
10243
531k
  {
10244
  /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
10245
531k
  JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
10246
531k
  if (braminzero != NULL)
10247
501k
    {
10248
501k
    JUMPHERE(braminzero);
10249
    /* We need to release the end pointer to perform the
10250
    backtrack for the zero-length iteration. When
10251
    framesize is < 0, OP_ONCE will do the release itself. */
10252
501k
    if (opcode == OP_ONCE)
10253
20.1k
      {
10254
20.1k
      int framesize = BACKTRACK_AS(bracket_backtrack)->u.framesize;
10255
10256
20.1k
      SLJIT_ASSERT(framesize != 0);
10257
20.1k
      if (framesize > 0)
10258
3.12k
        {
10259
3.12k
        OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10260
3.12k
        add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10261
3.12k
        OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
10262
3.12k
        }
10263
20.1k
      }
10264
481k
    else if (ket == OP_KETRMIN)
10265
105k
      free_stack(common, 1);
10266
501k
    }
10267
  /* Continue to the normal backtrack. */
10268
531k
  }
10269
10270
7.39M
if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO || (has_alternatives && repeat_type != OP_EXACT))
10271
2.11M
  count_match(common);
10272
10273
7.39M
cc += 1 + LINK_SIZE;
10274
10275
7.39M
if (opcode == OP_ONCE)
10276
138k
  {
10277
138k
  int data;
10278
138k
  int framesize = BACKTRACK_AS(bracket_backtrack)->u.framesize;
10279
10280
138k
  SLJIT_ASSERT(SHRT_MIN <= framesize && framesize < SHRT_MAX/2);
10281
  /* We temporarily encode the needs_control_head in the lowest bit.
10282
     The real value should be short enough for this operation to work
10283
     without triggering Undefined Behaviour. */
10284
138k
  data = (int)((short)((unsigned short)framesize << 1) | (needs_control_head ? 1 : 0));
10285
138k
  BACKTRACK_AS(bracket_backtrack)->u.framesize = data;
10286
138k
  }
10287
7.39M
return cc + repeat_length;
10288
7.39M
}
10289
10290
static PCRE2_SPTR compile_bracketpos_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
10291
284k
{
10292
284k
DEFINE_COMPILER;
10293
284k
backtrack_common *backtrack;
10294
284k
PCRE2_UCHAR opcode;
10295
284k
int private_data_ptr;
10296
284k
int cbraprivptr = 0;
10297
284k
BOOL needs_control_head;
10298
284k
int framesize;
10299
284k
int stacksize;
10300
284k
int offset = 0;
10301
284k
BOOL zero = FALSE;
10302
284k
PCRE2_SPTR ccbegin = NULL;
10303
284k
int stack; /* Also contains the offset of control head. */
10304
284k
struct sljit_label *loop = NULL;
10305
284k
struct jump_list *emptymatch = NULL;
10306
10307
284k
PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
10308
284k
if (*cc == OP_BRAPOSZERO)
10309
20.7k
  {
10310
20.7k
  zero = TRUE;
10311
20.7k
  cc++;
10312
20.7k
  }
10313
10314
284k
opcode = *cc;
10315
284k
private_data_ptr = PRIVATE_DATA(cc);
10316
284k
SLJIT_ASSERT(private_data_ptr != 0);
10317
284k
BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
10318
284k
switch(opcode)
10319
284k
  {
10320
58.7k
  case OP_BRAPOS:
10321
163k
  case OP_SBRAPOS:
10322
163k
  ccbegin = cc + 1 + LINK_SIZE;
10323
163k
  break;
10324
10325
35.4k
  case OP_CBRAPOS:
10326
120k
  case OP_SCBRAPOS:
10327
120k
  offset = GET2(cc, 1 + LINK_SIZE);
10328
  /* This case cannot be optimized in the same way as
10329
  normal capturing brackets. */
10330
120k
  SLJIT_ASSERT(!is_optimized_cbracket(common, offset));
10331
120k
  cbraprivptr = OVECTOR_PRIV(offset);
10332
120k
  offset <<= 1;
10333
120k
  ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
10334
120k
  break;
10335
10336
0
  default:
10337
0
  SLJIT_UNREACHABLE();
10338
0
  break;
10339
284k
  }
10340
10341
284k
framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
10342
284k
BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
10343
284k
if (framesize < 0)
10344
218k
  {
10345
218k
  if (offset != 0)
10346
113k
    {
10347
113k
    stacksize = 2;
10348
113k
    if (common->capture_last_ptr != 0)
10349
8.76k
      stacksize++;
10350
113k
    }
10351
105k
  else
10352
105k
    stacksize = 1;
10353
10354
218k
  if (needs_control_head)
10355
1.55k
    stacksize++;
10356
218k
  if (!zero)
10357
199k
    stacksize++;
10358
10359
218k
  BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
10360
218k
  allocate_stack(common, stacksize);
10361
218k
  if (framesize == no_frame)
10362
155k
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
10363
10364
218k
  stack = 0;
10365
218k
  if (offset != 0)
10366
113k
    {
10367
113k
    stack = 2;
10368
113k
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
10369
113k
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
10370
113k
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
10371
113k
    if (common->capture_last_ptr != 0)
10372
8.76k
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
10373
113k
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
10374
113k
    if (needs_control_head)
10375
403
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10376
113k
    if (common->capture_last_ptr != 0)
10377
8.76k
      {
10378
8.76k
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
10379
8.76k
      stack = 3;
10380
8.76k
      }
10381
113k
    }
10382
105k
  else
10383
105k
    {
10384
105k
    if (needs_control_head)
10385
1.14k
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10386
105k
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10387
105k
    stack = 1;
10388
105k
    }
10389
10390
218k
  if (needs_control_head)
10391
1.55k
    stack++;
10392
218k
  if (!zero)
10393
199k
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
10394
218k
  if (needs_control_head)
10395
1.55k
    {
10396
1.55k
    stack--;
10397
1.55k
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
10398
1.55k
    }
10399
218k
  }
10400
65.6k
else
10401
65.6k
  {
10402
65.6k
  stacksize = framesize + 1;
10403
65.6k
  if (!zero)
10404
64.4k
    stacksize++;
10405
65.6k
  if (needs_control_head)
10406
2.14k
    stacksize++;
10407
65.6k
  if (offset == 0)
10408
58.2k
    stacksize++;
10409
65.6k
  BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
10410
10411
65.6k
  allocate_stack(common, stacksize);
10412
65.6k
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10413
65.6k
  if (needs_control_head)
10414
2.14k
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10415
65.6k
  OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
10416
10417
65.6k
  stack = 0;
10418
65.6k
  if (!zero)
10419
64.4k
    {
10420
64.4k
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
10421
64.4k
    stack = 1;
10422
64.4k
    }
10423
65.6k
  if (needs_control_head)
10424
2.14k
    {
10425
2.14k
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
10426
2.14k
    stack++;
10427
2.14k
    }
10428
65.6k
  if (offset == 0)
10429
58.2k
    {
10430
58.2k
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
10431
58.2k
    stack++;
10432
58.2k
    }
10433
65.6k
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
10434
65.6k
  init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize);
10435
65.6k
  stack -= 1 + (offset == 0);
10436
65.6k
  }
10437
10438
284k
if (offset != 0)
10439
120k
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
10440
10441
284k
loop = LABEL();
10442
361k
while (*cc != OP_KETRPOS)
10443
361k
  {
10444
361k
  backtrack->top = NULL;
10445
361k
  backtrack->own_backtracks = NULL;
10446
361k
  cc += GET(cc, 1);
10447
10448
361k
  compile_matchingpath(common, ccbegin, cc, backtrack);
10449
361k
  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10450
0
    return NULL;
10451
10452
361k
  if (framesize < 0)
10453
290k
    {
10454
290k
    if (framesize == no_frame)
10455
227k
      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10456
10457
290k
    if (offset != 0)
10458
169k
      {
10459
169k
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
10460
169k
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10461
169k
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
10462
169k
      if (common->capture_last_ptr != 0)
10463
8.86k
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
10464
169k
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10465
169k
      }
10466
121k
    else
10467
121k
      {
10468
121k
      if (opcode == OP_SBRAPOS)
10469
91.7k
        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10470
121k
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10471
121k
      }
10472
10473
    /* Even if the match is empty, we need to reset the control head. */
10474
290k
    if (needs_control_head)
10475
1.68k
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
10476
10477
290k
    if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
10478
230k
      add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
10479
10480
290k
    if (!zero)
10481
261k
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
10482
290k
    }
10483
70.3k
  else
10484
70.3k
    {
10485
70.3k
    if (offset != 0)
10486
11.6k
      {
10487
11.6k
      OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
10488
11.6k
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
10489
11.6k
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10490
11.6k
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
10491
11.6k
      if (common->capture_last_ptr != 0)
10492
698
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
10493
11.6k
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10494
11.6k
      }
10495
58.7k
    else
10496
58.7k
      {
10497
58.7k
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10498
58.7k
      OP2(SLJIT_SUB, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
10499
58.7k
      if (opcode == OP_SBRAPOS)
10500
20.5k
        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
10501
58.7k
      OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(-framesize - 2), STR_PTR, 0);
10502
58.7k
      }
10503
10504
    /* Even if the match is empty, we need to reset the control head. */
10505
70.3k
    if (needs_control_head)
10506
3.60k
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
10507
10508
70.3k
    if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
10509
24.9k
      add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
10510
10511
70.3k
    if (!zero)
10512
67.8k
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10513
70.3k
    }
10514
10515
361k
  JUMPTO(SLJIT_JUMP, loop);
10516
361k
  flush_stubs(common);
10517
10518
361k
  compile_backtrackingpath(common, backtrack->top);
10519
361k
  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10520
0
    return NULL;
10521
361k
  set_jumps(backtrack->own_backtracks, LABEL());
10522
10523
361k
  if (framesize < 0)
10524
290k
    {
10525
290k
    if (offset != 0)
10526
169k
      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
10527
121k
    else
10528
121k
      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10529
290k
    }
10530
70.3k
  else
10531
70.3k
    {
10532
70.3k
    if (offset != 0)
10533
11.6k
      {
10534
      /* Last alternative. */
10535
11.6k
      if (*cc == OP_KETRPOS)
10536
7.40k
        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10537
11.6k
      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
10538
11.6k
      }
10539
58.7k
    else
10540
58.7k
      {
10541
58.7k
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10542
58.7k
      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
10543
58.7k
      }
10544
70.3k
    }
10545
10546
361k
  if (*cc == OP_KETRPOS)
10547
284k
    break;
10548
76.8k
  ccbegin = cc + 1 + LINK_SIZE;
10549
76.8k
  }
10550
10551
/* We don't have to restore the control head in case of a failed match. */
10552
10553
284k
backtrack->own_backtracks = NULL;
10554
284k
if (!zero)
10555
263k
  {
10556
263k
  if (framesize < 0)
10557
199k
    add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
10558
64.4k
  else /* TMP2 is set to [private_data_ptr] above. */
10559
64.4k
    add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), STACK(-stacksize), SLJIT_IMM, 0));
10560
263k
  }
10561
10562
/* None of them matched. */
10563
284k
set_jumps(emptymatch, LABEL());
10564
284k
count_match(common);
10565
284k
return cc + 1 + LINK_SIZE;
10566
284k
}
10567
10568
static SLJIT_INLINE PCRE2_SPTR get_iterator_parameters(compiler_common *common, PCRE2_SPTR cc, PCRE2_UCHAR *opcode, PCRE2_UCHAR *type, sljit_u32 *max, sljit_u32 *exact, PCRE2_SPTR *end)
10569
10.5M
{
10570
10.5M
int class_len;
10571
10572
10.5M
*opcode = *cc;
10573
10.5M
*exact = 0;
10574
10575
10.5M
if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
10576
3.91M
  {
10577
3.91M
  cc++;
10578
3.91M
  *type = OP_CHAR;
10579
3.91M
  }
10580
6.65M
else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
10581
1.88M
  {
10582
1.88M
  cc++;
10583
1.88M
  *type = OP_CHARI;
10584
1.88M
  *opcode -= OP_STARI - OP_STAR;
10585
1.88M
  }
10586
4.76M
else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
10587
199k
  {
10588
199k
  cc++;
10589
199k
  *type = OP_NOT;
10590
199k
  *opcode -= OP_NOTSTAR - OP_STAR;
10591
199k
  }
10592
4.56M
else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
10593
106k
  {
10594
106k
  cc++;
10595
106k
  *type = OP_NOTI;
10596
106k
  *opcode -= OP_NOTSTARI - OP_STAR;
10597
106k
  }
10598
4.46M
else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
10599
3.05M
  {
10600
3.05M
  cc++;
10601
3.05M
  *opcode -= OP_TYPESTAR - OP_STAR;
10602
3.05M
  *type = OP_END;
10603
3.05M
  }
10604
1.40M
else
10605
1.40M
  {
10606
1.40M
  SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS || *opcode == OP_ECLASS);
10607
1.40M
  *type = *opcode;
10608
1.40M
  class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(PCRE2_UCHAR))) : GET(cc, 1);
10609
1.40M
  *opcode = cc[class_len];
10610
1.40M
  cc++;
10611
10612
1.40M
  if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
10613
894k
    {
10614
894k
    *opcode -= OP_CRSTAR - OP_STAR;
10615
894k
    *end = cc + class_len;
10616
10617
894k
    if (*opcode == OP_PLUS || *opcode == OP_MINPLUS)
10618
625k
      {
10619
625k
      *exact = 1;
10620
625k
      *opcode -= OP_PLUS - OP_STAR;
10621
625k
      }
10622
894k
    return cc;
10623
894k
    }
10624
10625
507k
  if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)
10626
213k
    {
10627
213k
    *opcode -= OP_CRPOSSTAR - OP_POSSTAR;
10628
213k
    *end = cc + class_len;
10629
10630
213k
    if (*opcode == OP_POSPLUS)
10631
159k
      {
10632
159k
      *exact = 1;
10633
159k
      *opcode = OP_POSSTAR;
10634
159k
      }
10635
213k
    return cc;
10636
213k
    }
10637
10638
294k
  SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);
10639
294k
  *max = GET2(cc, (class_len + IMM2_SIZE));
10640
294k
  *exact = GET2(cc, class_len);
10641
294k
  *end = cc + class_len + 2 * IMM2_SIZE;
10642
10643
294k
  if (*max == 0)
10644
30.5k
    {
10645
30.5k
    SLJIT_ASSERT(*exact > 1);
10646
30.5k
    if (*opcode == OP_CRRANGE)
10647
14.0k
      *opcode = OP_UPTO;
10648
16.4k
    else if (*opcode == OP_CRPOSRANGE)
10649
3.91k
      *opcode = OP_POSUPTO;
10650
12.5k
    else
10651
12.5k
      *opcode = OP_MINSTAR;
10652
30.5k
    return cc;
10653
30.5k
    }
10654
10655
263k
  *max -= *exact;
10656
263k
  if (*max == 0)
10657
20.9k
    *opcode = OP_EXACT;
10658
242k
  else
10659
242k
    {
10660
242k
    SLJIT_ASSERT(*exact > 0 || *max > 1);
10661
242k
    if (*opcode == OP_CRRANGE)
10662
123k
      *opcode = OP_UPTO;
10663
118k
    else if (*opcode == OP_CRPOSRANGE)
10664
23.2k
      *opcode = OP_POSUPTO;
10665
95.6k
    else if (*max == 1)
10666
46.7k
      *opcode = OP_MINQUERY;
10667
48.9k
    else
10668
48.9k
      *opcode = OP_MINUPTO;
10669
242k
    }
10670
263k
  return cc;
10671
294k
  }
10672
10673
9.16M
switch(*opcode)
10674
9.16M
  {
10675
225k
  case OP_EXACT:
10676
225k
  *exact = GET2(cc, 0);
10677
225k
  cc += IMM2_SIZE;
10678
225k
  break;
10679
10680
1.86M
  case OP_PLUS:
10681
2.44M
  case OP_MINPLUS:
10682
2.44M
  *exact = 1;
10683
2.44M
  *opcode -= OP_PLUS - OP_STAR;
10684
2.44M
  break;
10685
10686
808k
  case OP_POSPLUS:
10687
808k
  *exact = 1;
10688
808k
  *opcode = OP_POSSTAR;
10689
808k
  break;
10690
10691
400k
  case OP_UPTO:
10692
517k
  case OP_MINUPTO:
10693
669k
  case OP_POSUPTO:
10694
669k
  *max = GET2(cc, 0);
10695
669k
  cc += IMM2_SIZE;
10696
669k
  break;
10697
9.16M
  }
10698
10699
9.16M
if (*type == OP_END)
10700
3.05M
  {
10701
3.05M
  *type = *cc;
10702
3.05M
  *end = next_opcode(common, cc);
10703
3.05M
  cc++;
10704
3.05M
  return cc;
10705
3.05M
  }
10706
10707
6.11M
*end = cc + 1;
10708
6.11M
#ifdef SUPPORT_UNICODE
10709
6.11M
if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
10710
6.11M
#endif
10711
6.11M
return cc;
10712
9.16M
}
10713
10714
static PCRE2_SPTR compile_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent, jump_list **prev_backtracks)
10715
6.76M
{
10716
6.76M
DEFINE_COMPILER;
10717
6.76M
backtrack_common *backtrack = NULL;
10718
6.76M
PCRE2_SPTR begin = cc;
10719
6.76M
PCRE2_UCHAR opcode;
10720
6.76M
PCRE2_UCHAR type;
10721
6.76M
sljit_u32 max = 0, exact;
10722
6.76M
sljit_s32 early_fail_ptr = PRIVATE_DATA(cc + 1);
10723
6.76M
sljit_s32 early_fail_type;
10724
6.76M
BOOL charpos_enabled, use_tmp;
10725
6.76M
PCRE2_UCHAR charpos_char;
10726
6.76M
unsigned int charpos_othercasebit;
10727
6.76M
PCRE2_SPTR end;
10728
6.76M
jump_list *no_match = NULL;
10729
6.76M
jump_list *no_char1_match = NULL;
10730
6.76M
struct sljit_jump *jump = NULL;
10731
6.76M
struct sljit_label *label;
10732
6.76M
int private_data_ptr = PRIVATE_DATA(cc);
10733
6.76M
int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
10734
6.76M
int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
10735
6.76M
int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + SSIZE_OF(sw);
10736
6.76M
int tmp_base, tmp_offset;
10737
10738
6.76M
early_fail_type = (early_fail_ptr & 0x7);
10739
6.76M
early_fail_ptr >>= 3;
10740
10741
/* During recursion, these optimizations are disabled. */
10742
6.76M
if (common->early_fail_start_ptr == 0 && common->fast_forward_bc_ptr == NULL)
10743
5.95M
  {
10744
5.95M
  early_fail_ptr = 0;
10745
5.95M
  early_fail_type = type_skip;
10746
5.95M
  }
10747
10748
6.76M
SLJIT_ASSERT(common->fast_forward_bc_ptr != NULL || early_fail_ptr == 0
10749
6.76M
  || (early_fail_ptr >= common->early_fail_start_ptr && early_fail_ptr <= common->early_fail_end_ptr));
10750
10751
6.76M
if (early_fail_type == type_fail)
10752
35.7k
  add_jump(compiler, prev_backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr));
10753
10754
6.76M
cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
10755
10756
6.76M
if (type != OP_EXTUNI)
10757
6.38M
  {
10758
6.38M
  tmp_base = TMP3;
10759
6.38M
  tmp_offset = 0;
10760
6.38M
  }
10761
376k
else
10762
376k
  {
10763
376k
  tmp_base = SLJIT_MEM1(SLJIT_SP);
10764
376k
  tmp_offset = LOCAL2;
10765
376k
  }
10766
10767
6.76M
if (opcode == OP_EXACT)
10768
246k
  {
10769
246k
  SLJIT_ASSERT(early_fail_ptr == 0 && exact >= 2);
10770
10771
246k
  if (common->mode == PCRE2_JIT_COMPLETE
10772
246k
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
10773
246k
      && !common->utf
10774
246k
#endif
10775
246k
      && type != OP_ANYNL && type != OP_EXTUNI)
10776
135k
    {
10777
135k
    OP2(SLJIT_SUB, TMP1, 0, STR_END, 0, STR_PTR, 0);
10778
135k
    add_jump(compiler, prev_backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, IN_UCHARS(exact)));
10779
10780
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32
10781
    if (type == OP_ALLANY && !common->invalid_utf)
10782
#else
10783
135k
    if (type == OP_ALLANY)
10784
5.06k
#endif
10785
5.06k
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact));
10786
130k
    else
10787
130k
      {
10788
130k
      OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
10789
130k
      label = LABEL();
10790
130k
      compile_char1_matchingpath(common, type, cc, prev_backtracks, FALSE);
10791
130k
      OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
10792
130k
      JUMPTO(SLJIT_NOT_ZERO, label);
10793
130k
      }
10794
135k
    }
10795
111k
  else
10796
111k
    {
10797
111k
    SLJIT_ASSERT(tmp_base == TMP3 || common->locals_size >= 3 * SSIZE_OF(sw));
10798
111k
    OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
10799
111k
    label = LABEL();
10800
111k
    compile_char1_matchingpath(common, type, cc, prev_backtracks, TRUE);
10801
111k
    OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
10802
111k
    JUMPTO(SLJIT_NOT_ZERO, label);
10803
111k
    }
10804
246k
  }
10805
10806
6.76M
if (early_fail_type == type_fail_range)
10807
43.2k
  {
10808
  /* Range end first, followed by range start. */
10809
43.2k
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr);
10810
43.2k
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + SSIZE_OF(sw));
10811
43.2k
  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, TMP2, 0);
10812
43.2k
  OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, TMP2, 0);
10813
43.2k
  add_jump(compiler, prev_backtracks, CMP(SLJIT_LESS_EQUAL, TMP2, 0, TMP1, 0));
10814
10815
43.2k
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
10816
43.2k
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + SSIZE_OF(sw), STR_PTR, 0);
10817
43.2k
  }
10818
10819
6.76M
if (opcode < OP_EXACT)
10820
3.80M
  PUSH_BACKTRACK(sizeof(char_iterator_backtrack), begin, NULL);
10821
10822
6.76M
switch(opcode)
10823
6.76M
  {
10824
1.77M
  case OP_STAR:
10825
2.04M
  case OP_UPTO:
10826
2.04M
  SLJIT_ASSERT(backtrack != NULL && (early_fail_ptr == 0 || opcode == OP_STAR));
10827
2.04M
  max += exact;
10828
10829
2.04M
  if (type == OP_EXTUNI)
10830
212k
    {
10831
212k
    SLJIT_ASSERT(private_data_ptr == 0);
10832
212k
    SLJIT_ASSERT(early_fail_ptr == 0);
10833
10834
212k
    if (exact == 1)
10835
117k
      {
10836
117k
      SLJIT_ASSERT(opcode == OP_STAR);
10837
117k
      allocate_stack(common, 1);
10838
117k
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10839
117k
      }
10840
95.4k
    else
10841
95.4k
      {
10842
      /* If OP_EXTUNI is present, it has a separate EXACT opcode. */
10843
95.4k
      SLJIT_ASSERT(exact == 0);
10844
10845
95.4k
      allocate_stack(common, 2);
10846
95.4k
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10847
95.4k
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
10848
95.4k
      }
10849
10850
212k
    if (opcode == OP_UPTO)
10851
33.5k
      {
10852
33.5k
      SLJIT_ASSERT(common->locals_size >= 3 * SSIZE_OF(sw));
10853
33.5k
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL2, SLJIT_IMM, max);
10854
33.5k
      }
10855
10856
212k
    label = LABEL();
10857
212k
    compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE);
10858
212k
    if (opcode == OP_UPTO)
10859
33.5k
      {
10860
33.5k
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL2);
10861
33.5k
      OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
10862
33.5k
      jump = JUMP(SLJIT_ZERO);
10863
33.5k
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL2, TMP1, 0);
10864
33.5k
      }
10865
10866
    /* We cannot use TMP3 because of allocate_stack. */
10867
212k
    allocate_stack(common, 1);
10868
212k
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10869
212k
    JUMPTO(SLJIT_JUMP, label);
10870
212k
    if (jump != NULL)
10871
33.5k
      JUMPHERE(jump);
10872
212k
    BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
10873
212k
    break;
10874
212k
    }
10875
1.83M
#ifdef SUPPORT_UNICODE
10876
1.83M
  else if (type == OP_ALLANY && !common->invalid_utf)
10877
#else
10878
  else if (type == OP_ALLANY)
10879
#endif
10880
28.3k
    {
10881
28.3k
    if (opcode == OP_STAR)
10882
21.8k
      {
10883
21.8k
      if (exact == 1)
10884
19.0k
        detect_partial_match(common, prev_backtracks);
10885
10886
21.8k
      if (private_data_ptr == 0)
10887
2.27k
        allocate_stack(common, 2);
10888
10889
21.8k
      OP1(SLJIT_MOV, base, offset0, STR_END, 0);
10890
21.8k
      OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
10891
10892
21.8k
      OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
10893
21.8k
      process_partial_match(common);
10894
10895
21.8k
      if (early_fail_ptr != 0)
10896
421
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0);
10897
21.8k
      BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
10898
21.8k
      break;
10899
21.8k
      }
10900
6.41k
#ifdef SUPPORT_UNICODE
10901
6.41k
    else if (!common->utf)
10902
#else
10903
    else
10904
#endif
10905
5.31k
      {
10906
      /* If OP_ALLANY is present, it has a separate EXACT opcode. */
10907
5.31k
      SLJIT_ASSERT(exact == 0);
10908
10909
5.31k
      if (private_data_ptr == 0)
10910
213
        allocate_stack(common, 2);
10911
10912
5.31k
      OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
10913
5.31k
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max));
10914
10915
5.31k
      if (common->mode == PCRE2_JIT_COMPLETE)
10916
5.31k
        {
10917
5.31k
        OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
10918
5.31k
        SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);
10919
5.31k
        }
10920
0
      else
10921
0
        {
10922
0
        jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0);
10923
0
        process_partial_match(common);
10924
0
        JUMPHERE(jump);
10925
0
        }
10926
10927
5.31k
      OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
10928
10929
5.31k
      if (early_fail_ptr != 0)
10930
0
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
10931
5.31k
      BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
10932
5.31k
      break;
10933
5.31k
      }
10934
28.3k
    }
10935
10936
1.80M
  charpos_enabled = FALSE;
10937
1.80M
  charpos_char = 0;
10938
1.80M
  charpos_othercasebit = 0;
10939
10940
1.80M
  SLJIT_ASSERT(tmp_base == TMP3);
10941
1.80M
  if ((type != OP_CHAR && type != OP_CHARI) && (*end == OP_CHAR || *end == OP_CHARI))
10942
540k
    {
10943
540k
#ifdef SUPPORT_UNICODE
10944
540k
    charpos_enabled = !common->utf || !HAS_EXTRALEN(end[1]);
10945
#else
10946
    charpos_enabled = TRUE;
10947
#endif
10948
540k
    if (charpos_enabled && *end == OP_CHARI && char_has_othercase(common, end + 1))
10949
11.9k
      {
10950
11.9k
      charpos_othercasebit = char_get_othercase_bit(common, end + 1);
10951
11.9k
      if (charpos_othercasebit == 0)
10952
4.36k
        charpos_enabled = FALSE;
10953
11.9k
      }
10954
10955
540k
    if (charpos_enabled)
10956
534k
      {
10957
534k
      charpos_char = end[1];
10958
      /* Consume the OP_CHAR opcode. */
10959
534k
      end += 2;
10960
#if PCRE2_CODE_UNIT_WIDTH == 8
10961
      SLJIT_ASSERT((charpos_othercasebit >> 8) == 0);
10962
#elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
10963
534k
      SLJIT_ASSERT((charpos_othercasebit >> 9) == 0);
10964
534k
      if ((charpos_othercasebit & 0x100) != 0)
10965
0
        charpos_othercasebit = (charpos_othercasebit & 0xff) << 8;
10966
534k
#endif
10967
534k
      if (charpos_othercasebit != 0)
10968
7.62k
        charpos_char |= charpos_othercasebit;
10969
10970
534k
      BACKTRACK_AS(char_iterator_backtrack)->charpos.charpos_enabled = TRUE;
10971
534k
      BACKTRACK_AS(char_iterator_backtrack)->charpos.chr = charpos_char;
10972
534k
      BACKTRACK_AS(char_iterator_backtrack)->charpos.othercasebit = charpos_othercasebit;
10973
10974
534k
      if (private_data_ptr == 0)
10975
24.5k
        allocate_stack(common, 2);
10976
10977
534k
      use_tmp = (opcode == OP_STAR);
10978
10979
534k
      if (use_tmp)
10980
418k
        {
10981
418k
        OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
10982
418k
        OP1(SLJIT_MOV, base, offset0, TMP3, 0);
10983
418k
        }
10984
116k
      else
10985
116k
        {
10986
116k
        OP1(SLJIT_MOV, base, offset1, COUNT_MATCH, 0);
10987
116k
        OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_IMM, 0);
10988
116k
        OP1(SLJIT_MOV, base, offset0, COUNT_MATCH, 0);
10989
116k
        OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, exact == max ? 0 : (max + 1));
10990
116k
        }
10991
10992
      /* Search the first instance of charpos_char. */
10993
534k
      if (exact > 0)
10994
350k
        detect_partial_match(common, &no_match);
10995
184k
      else
10996
184k
        jump = JUMP(SLJIT_JUMP);
10997
10998
534k
      label = LABEL();
10999
11000
534k
      if (opcode == OP_UPTO)
11001
116k
        {
11002
116k
        if (exact == max)
11003
6.24k
          OP2(SLJIT_ADD, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
11004
110k
        else
11005
110k
          {
11006
110k
          OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
11007
110k
          add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11008
110k
          }
11009
116k
        }
11010
11011
534k
      compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
11012
11013
534k
      if (early_fail_ptr != 0)
11014
11.6k
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11015
11016
534k
      if (exact == 0)
11017
184k
        JUMPHERE(jump);
11018
11019
534k
      detect_partial_match(common, &no_match);
11020
11021
534k
      if (opcode == OP_UPTO && exact > 0)
11022
47.3k
        {
11023
47.3k
        if (exact == max)
11024
6.24k
          CMPTO(SLJIT_LESS, TMP3, 0, SLJIT_IMM, exact, label);
11025
41.1k
        else
11026
41.1k
          CMPTO(SLJIT_GREATER, TMP3, 0, SLJIT_IMM, (max + 1) - exact, label);
11027
47.3k
        }
11028
11029
534k
      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
11030
534k
      if (charpos_othercasebit != 0)
11031
7.62k
        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
11032
534k
      CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
11033
11034
534k
      OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11035
534k
      if (use_tmp)
11036
418k
        {
11037
418k
        OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, SLJIT_IMM, 0);
11038
418k
        SELECT(SLJIT_EQUAL, TMP3, STR_PTR, 0, TMP3);
11039
418k
        }
11040
116k
      else
11041
116k
        {
11042
116k
        OP2U(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, SLJIT_IMM, 0);
11043
116k
        SELECT(SLJIT_EQUAL, COUNT_MATCH, STR_PTR, 0, COUNT_MATCH);
11044
116k
        }
11045
534k
      JUMPTO(SLJIT_JUMP, label);
11046
11047
534k
      set_jumps(no_match, LABEL());
11048
534k
      OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
11049
534k
      if (use_tmp)
11050
418k
        OP1(SLJIT_MOV, base, offset1, TMP3, 0);
11051
116k
      else
11052
116k
        {
11053
116k
        OP1(SLJIT_MOV, TMP1, 0, base, offset1);
11054
116k
        OP1(SLJIT_MOV, base, offset1, COUNT_MATCH, 0);
11055
116k
        OP1(SLJIT_MOV, COUNT_MATCH, 0, TMP1, 0);
11056
116k
        }
11057
11058
534k
      add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0));
11059
11060
534k
      BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11061
534k
      OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11062
534k
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11063
534k
      break;
11064
534k
      }
11065
540k
    }
11066
11067
1.26M
  if (private_data_ptr == 0)
11068
98.1k
    allocate_stack(common, 2);
11069
11070
1.26M
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11071
1.26M
  use_tmp = (opcode == OP_STAR);
11072
11073
1.26M
  if (common->utf)
11074
327k
    {
11075
327k
    if (!use_tmp)
11076
44.3k
      OP1(SLJIT_MOV, base, offset0, COUNT_MATCH, 0);
11077
11078
327k
    OP1(SLJIT_MOV, use_tmp ? TMP3 : COUNT_MATCH, 0, STR_PTR, 0);
11079
327k
    }
11080
1.26M
#endif
11081
11082
1.26M
  if (opcode == OP_UPTO)
11083
114k
    OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, exact == max ? -(sljit_sw)exact : (sljit_sw)max);
11084
11085
1.26M
  if (opcode == OP_UPTO && exact > 0)
11086
17.9k
    {
11087
17.9k
    label = LABEL();
11088
17.9k
    detect_partial_match(common, &no_match);
11089
17.9k
    compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11090
17.9k
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11091
17.9k
    if (common->utf)
11092
5.38k
      OP1(SLJIT_MOV, use_tmp ? TMP3 : COUNT_MATCH, 0, STR_PTR, 0);
11093
17.9k
#endif
11094
11095
17.9k
    if (exact == max)
11096
773
      {
11097
773
      OP2(SLJIT_ADD | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
11098
773
      JUMPTO(SLJIT_NOT_ZERO, label);
11099
773
      }
11100
17.1k
    else
11101
17.1k
      {
11102
17.1k
      OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
11103
17.1k
      add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11104
17.1k
      CMPTO(SLJIT_NOT_EQUAL, TMP3, 0, SLJIT_IMM, max - exact, label);
11105
17.1k
      }
11106
11107
17.9k
    OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11108
17.9k
    JUMPTO(SLJIT_JUMP, label);
11109
17.9k
    }
11110
1.24M
  else
11111
1.24M
    {
11112
1.24M
    OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11113
11114
1.24M
    detect_partial_match(common, &no_match);
11115
1.24M
    label = LABEL();
11116
1.24M
    compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11117
1.24M
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11118
1.24M
    if (common->utf)
11119
322k
      OP1(SLJIT_MOV, use_tmp ? TMP3 : COUNT_MATCH, 0, STR_PTR, 0);
11120
1.24M
#endif
11121
11122
1.24M
    if (opcode == OP_UPTO)
11123
96.0k
      {
11124
96.0k
      OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
11125
96.0k
      add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11126
96.0k
      }
11127
11128
1.24M
    detect_partial_match_to(common, label);
11129
1.24M
    }
11130
11131
1.26M
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11132
1.26M
  if (common->utf)
11133
327k
    {
11134
327k
    set_jumps(no_char1_match, LABEL());
11135
327k
    set_jumps(no_match, LABEL());
11136
327k
    if (use_tmp)
11137
283k
      {
11138
283k
      OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
11139
283k
      OP1(SLJIT_MOV, base, offset0, TMP3, 0);
11140
283k
      }
11141
44.3k
    else
11142
44.3k
      {
11143
44.3k
      OP1(SLJIT_MOV, STR_PTR, 0, COUNT_MATCH, 0);
11144
44.3k
      OP1(SLJIT_MOV, COUNT_MATCH, 0, base, offset0);
11145
44.3k
      OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11146
44.3k
      }
11147
327k
    }
11148
940k
  else
11149
940k
#endif
11150
940k
    {
11151
940k
    if (opcode != OP_UPTO || exact == 0)
11152
927k
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11153
940k
    set_jumps(no_char1_match, LABEL());
11154
11155
940k
    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11156
940k
    set_jumps(no_match, LABEL());
11157
940k
    OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11158
940k
    }
11159
11160
1.26M
  if (opcode == OP_UPTO)
11161
114k
    {
11162
114k
    if (exact > 0)
11163
17.9k
      {
11164
17.9k
      if (max == exact)
11165
773
        jump = CMP(SLJIT_GREATER_EQUAL, TMP3, 0, SLJIT_IMM, -(sljit_sw)exact);
11166
17.1k
      else
11167
17.1k
        jump = CMP(SLJIT_GREATER, TMP3, 0, SLJIT_IMM, max - exact);
11168
11169
17.9k
      add_jump(compiler, &backtrack->own_backtracks, jump);
11170
17.9k
      }
11171
114k
    }
11172
1.15M
  else if (exact == 1)
11173
757k
    add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, base, offset1, STR_PTR, 0));
11174
11175
1.26M
  if (early_fail_ptr != 0)
11176
23.4k
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11177
11178
1.26M
  BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11179
1.26M
  break;
11180
11181
734k
  case OP_QUERY:
11182
734k
  SLJIT_ASSERT(backtrack != NULL && early_fail_ptr == 0);
11183
734k
  if (private_data_ptr == 0)
11184
71.4k
    allocate_stack(common, 1);
11185
734k
  OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11186
734k
  compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE);
11187
734k
  BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11188
734k
  break;
11189
11190
556k
  case OP_MINSTAR:
11191
950k
  case OP_MINQUERY:
11192
950k
  SLJIT_ASSERT(backtrack != NULL && (opcode == OP_MINSTAR || early_fail_ptr == 0));
11193
950k
  if (private_data_ptr == 0)
11194
74.2k
    allocate_stack(common, 1);
11195
11196
950k
  if (exact >= 1)
11197
369k
    {
11198
369k
    if (exact >= 2)
11199
27.6k
      {
11200
      /* Extuni has a separate exact opcode. */
11201
27.6k
      SLJIT_ASSERT(tmp_base == TMP3 && early_fail_ptr == 0);
11202
27.6k
      OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, exact);
11203
27.6k
      }
11204
11205
369k
    if (opcode == OP_MINQUERY)
11206
23.3k
      OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, -1);
11207
11208
369k
    label = LABEL();
11209
369k
    BACKTRACK_AS(char_iterator_backtrack)->matchingpath = label;
11210
11211
369k
    compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE);
11212
11213
369k
    if (exact >= 2)
11214
27.6k
      {
11215
27.6k
      OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
11216
27.6k
      JUMPTO(SLJIT_NOT_ZERO, label);
11217
27.6k
      }
11218
11219
369k
    if (opcode == OP_MINQUERY)
11220
23.3k
      OP2(SLJIT_AND, base, offset0, base, offset0, STR_PTR, 0);
11221
346k
    else
11222
346k
      OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11223
369k
    }
11224
580k
  else
11225
580k
    {
11226
580k
    OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11227
580k
    BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11228
580k
    }
11229
11230
950k
  if (early_fail_ptr != 0)
11231
10.2k
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11232
950k
  break;
11233
11234
82.6k
  case OP_MINUPTO:
11235
82.6k
  SLJIT_ASSERT(backtrack != NULL && early_fail_ptr == 0);
11236
82.6k
  if (private_data_ptr == 0)
11237
7.30k
    allocate_stack(common, 2);
11238
11239
82.6k
  OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, max + 1);
11240
11241
82.6k
  if (exact == 0)
11242
62.2k
    {
11243
62.2k
    OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11244
62.2k
    BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11245
62.2k
    break;
11246
62.2k
    }
11247
11248
20.4k
  if (exact >= 2)
11249
16.5k
    {
11250
    /* Extuni has a separate exact opcode. */
11251
16.5k
    SLJIT_ASSERT(tmp_base == TMP3);
11252
16.5k
    OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, exact);
11253
16.5k
    }
11254
11255
20.4k
  label = LABEL();
11256
20.4k
  BACKTRACK_AS(char_iterator_backtrack)->matchingpath = label;
11257
11258
20.4k
  compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE);
11259
11260
20.4k
  if (exact >= 2)
11261
16.5k
    {
11262
16.5k
    OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
11263
16.5k
    JUMPTO(SLJIT_NOT_ZERO, label);
11264
16.5k
    }
11265
11266
20.4k
  OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11267
20.4k
  break;
11268
11269
246k
  case OP_EXACT:
11270
246k
  SLJIT_ASSERT(backtrack == NULL);
11271
246k
  break;
11272
11273
1.59M
  case OP_POSSTAR:
11274
1.59M
  SLJIT_ASSERT(backtrack == NULL);
11275
1.59M
#if defined SUPPORT_UNICODE
11276
1.59M
  if (type == OP_ALLANY && !common->invalid_utf)
11277
#else
11278
  if (type == OP_ALLANY)
11279
#endif
11280
3.77k
    {
11281
3.77k
    if (exact == 1)
11282
1.41k
      detect_partial_match(common, prev_backtracks);
11283
11284
3.77k
    OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
11285
3.77k
    process_partial_match(common);
11286
3.77k
    if (early_fail_ptr != 0)
11287
131
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0);
11288
3.77k
    break;
11289
3.77k
    }
11290
11291
1.59M
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11292
1.59M
  if (common->utf)
11293
382k
    {
11294
382k
    SLJIT_ASSERT(tmp_base == TMP3 || common->locals_size >= 3 * SSIZE_OF(sw));
11295
11296
382k
    if (tmp_base != TMP3)
11297
6.14k
      {
11298
6.14k
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL2, COUNT_MATCH, 0);
11299
6.14k
      tmp_base = COUNT_MATCH;
11300
6.14k
      }
11301
11302
382k
    OP1(SLJIT_MOV, tmp_base, 0, exact == 1 ? SLJIT_IMM : STR_PTR, 0);
11303
382k
    detect_partial_match(common, &no_match);
11304
382k
    label = LABEL();
11305
382k
    compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
11306
382k
    OP1(SLJIT_MOV, tmp_base, 0, STR_PTR, 0);
11307
382k
    detect_partial_match_to(common, label);
11308
11309
382k
    set_jumps(no_match, LABEL());
11310
382k
    OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, 0);
11311
11312
382k
    if (tmp_base != TMP3)
11313
6.14k
      OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LOCAL2);
11314
11315
382k
    if (exact == 1)
11316
241k
      add_jump(compiler, prev_backtracks, CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0));
11317
11318
382k
    if (early_fail_ptr != 0)
11319
5.14k
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11320
382k
    break;
11321
382k
    }
11322
1.20M
#endif
11323
11324
1.20M
  if (exact == 1)
11325
724k
    OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11326
11327
1.20M
  detect_partial_match(common, &no_match);
11328
1.20M
  label = LABEL();
11329
  /* Extuni never fails, so no_char1_match is not used in that case.
11330
     Anynl optionally reads an extra character on success. */
11331
1.20M
  compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11332
1.20M
  detect_partial_match_to(common, label);
11333
1.20M
  if (type != OP_EXTUNI)
11334
1.20M
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11335
11336
1.20M
  set_jumps(no_char1_match, LABEL());
11337
1.20M
  if (type != OP_EXTUNI)
11338
1.20M
    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11339
11340
1.20M
  set_jumps(no_match, LABEL());
11341
11342
1.20M
  if (exact == 1)
11343
724k
    add_jump(compiler, prev_backtracks, CMP(SLJIT_EQUAL, tmp_base, tmp_offset, STR_PTR, 0));
11344
11345
1.20M
  if (early_fail_ptr != 0)
11346
36.4k
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11347
1.20M
  break;
11348
11349
179k
  case OP_POSUPTO:
11350
179k
  SLJIT_ASSERT(backtrack == NULL && early_fail_ptr == 0);
11351
179k
  max += exact;
11352
11353
179k
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11354
179k
  if (type == OP_EXTUNI || common->utf)
11355
#else
11356
  if (type == OP_EXTUNI)
11357
#endif
11358
68.8k
    {
11359
68.8k
    SLJIT_ASSERT(common->locals_size >= 3 * SSIZE_OF(sw));
11360
11361
    /* Count match is not modified by compile_char1_matchingpath. */
11362
68.8k
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL2, COUNT_MATCH, 0);
11363
68.8k
    OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_IMM, exact == max ? 0 : max);
11364
11365
68.8k
    label = LABEL();
11366
    /* Extuni only modifies TMP3 on successful match. */
11367
68.8k
    OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
11368
68.8k
    compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
11369
11370
68.8k
    if (exact == max)
11371
1.19k
      {
11372
1.19k
      OP2(SLJIT_ADD, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
11373
1.19k
      JUMPTO(SLJIT_JUMP, label);
11374
1.19k
      }
11375
67.6k
    else
11376
67.6k
      {
11377
67.6k
      OP2(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
11378
67.6k
      JUMPTO(SLJIT_NOT_ZERO, label);
11379
67.6k
      OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
11380
67.6k
      }
11381
11382
68.8k
    set_jumps(no_match, LABEL());
11383
11384
68.8k
    if (exact > 0)
11385
7.87k
      {
11386
7.87k
      if (exact == max)
11387
1.19k
        OP2U(SLJIT_SUB | SLJIT_SET_LESS, COUNT_MATCH, 0, SLJIT_IMM, exact);
11388
6.68k
      else
11389
6.68k
        OP2U(SLJIT_SUB | SLJIT_SET_GREATER, COUNT_MATCH, 0, SLJIT_IMM, max - exact);
11390
7.87k
      }
11391
11392
68.8k
    OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LOCAL2);
11393
11394
68.8k
    if (exact > 0)
11395
7.87k
      add_jump(compiler, prev_backtracks, JUMP(exact == max ? SLJIT_LESS : SLJIT_GREATER));
11396
68.8k
    OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
11397
68.8k
    break;
11398
68.8k
    }
11399
11400
110k
  SLJIT_ASSERT(tmp_base == TMP3);
11401
11402
110k
  OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, exact == max ? 0 : max);
11403
11404
110k
  detect_partial_match(common, &no_match);
11405
110k
  label = LABEL();
11406
110k
  compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11407
11408
110k
  if (exact == max)
11409
2.72k
    OP2(SLJIT_ADD, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
11410
107k
  else
11411
107k
    {
11412
107k
    OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
11413
107k
    add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11414
107k
    }
11415
110k
  detect_partial_match_to(common, label);
11416
110k
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11417
11418
110k
  set_jumps(no_char1_match, LABEL());
11419
110k
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11420
110k
  set_jumps(no_match, LABEL());
11421
11422
110k
  if (exact > 0)
11423
16.6k
    {
11424
16.6k
    if (exact == max)
11425
2.72k
      jump = CMP(SLJIT_LESS, TMP3, 0, SLJIT_IMM, exact);
11426
13.8k
    else
11427
13.8k
      jump = CMP(SLJIT_GREATER, TMP3, 0, SLJIT_IMM, max - exact);
11428
11429
16.6k
    add_jump(compiler, prev_backtracks, jump);
11430
16.6k
    }
11431
110k
  break;
11432
11433
930k
  case OP_POSQUERY:
11434
930k
  SLJIT_ASSERT(backtrack == NULL && early_fail_ptr == 0);
11435
930k
  SLJIT_ASSERT(tmp_base == TMP3 || common->locals_size >= 3 * SSIZE_OF(sw));
11436
930k
  OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11437
930k
  compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
11438
930k
  OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11439
930k
  set_jumps(no_match, LABEL());
11440
930k
  OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
11441
930k
  break;
11442
11443
0
  default:
11444
0
  SLJIT_UNREACHABLE();
11445
0
  break;
11446
6.76M
  }
11447
11448
6.76M
count_match(common);
11449
6.76M
return end;
11450
6.76M
}
11451
11452
static SLJIT_INLINE PCRE2_SPTR compile_fail_accept_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11453
226k
{
11454
226k
DEFINE_COMPILER;
11455
226k
backtrack_common *backtrack;
11456
11457
226k
PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
11458
11459
226k
if (*cc == OP_FAIL)
11460
42.4k
  {
11461
42.4k
  add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_JUMP));
11462
42.4k
  return cc + 1;
11463
42.4k
  }
11464
11465
183k
if (*cc == OP_ACCEPT && common->currententry == NULL && (common->re->overall_options & PCRE2_ENDANCHORED) != 0)
11466
29.4k
  add_jump(compiler, &common->restart_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
11467
11468
183k
if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)
11469
104k
  {
11470
  /* No need to check notempty conditions. */
11471
104k
  if (common->accept_label == NULL)
11472
81.0k
    add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
11473
23.2k
  else
11474
23.2k
    JUMPTO(SLJIT_JUMP, common->accept_label);
11475
104k
  return cc + 1;
11476
104k
  }
11477
11478
79.3k
if (common->accept_label == NULL)
11479
45.5k
  add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)));
11480
33.7k
else
11481
33.7k
  CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label);
11482
11483
79.3k
if (HAS_VIRTUAL_REGISTERS)
11484
0
  {
11485
0
  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
11486
0
  OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
11487
0
  }
11488
79.3k
else
11489
79.3k
  OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options));
11490
11491
79.3k
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
11492
79.3k
add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_NOT_ZERO));
11493
79.3k
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
11494
79.3k
if (common->accept_label == NULL)
11495
45.5k
  add_jump(compiler, &common->accept, JUMP(SLJIT_ZERO));
11496
33.7k
else
11497
33.7k
  JUMPTO(SLJIT_ZERO, common->accept_label);
11498
11499
79.3k
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
11500
79.3k
if (common->accept_label == NULL)
11501
45.5k
  add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
11502
33.7k
else
11503
33.7k
  CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);
11504
79.3k
add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_JUMP));
11505
79.3k
return cc + 1;
11506
183k
}
11507
11508
static SLJIT_INLINE PCRE2_SPTR compile_close_matchingpath(compiler_common *common, PCRE2_SPTR cc)
11509
93.3k
{
11510
93.3k
DEFINE_COMPILER;
11511
93.3k
int offset = GET2(cc, 1);
11512
93.3k
BOOL optimized_cbracket = is_optimized_cbracket(common, offset);
11513
11514
/* Data will be discarded anyway... */
11515
93.3k
if (common->currententry != NULL)
11516
8.37k
  return cc + 1 + IMM2_SIZE;
11517
11518
85.0k
if (!optimized_cbracket)
11519
4.02k
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR_PRIV(offset));
11520
85.0k
offset <<= 1;
11521
85.0k
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
11522
85.0k
if (!optimized_cbracket)
11523
4.02k
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
11524
85.0k
return cc + 1 + IMM2_SIZE;
11525
93.3k
}
11526
11527
static SLJIT_INLINE PCRE2_SPTR compile_control_verb_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11528
879k
{
11529
879k
DEFINE_COMPILER;
11530
879k
backtrack_common *backtrack;
11531
879k
PCRE2_UCHAR opcode = *cc;
11532
879k
PCRE2_SPTR ccend = cc + 1;
11533
11534
879k
if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG ||
11535
879k
    opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
11536
86.4k
  ccend += 2 + cc[1];
11537
11538
879k
PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
11539
11540
879k
if (opcode == OP_SKIP)
11541
315k
  {
11542
315k
  allocate_stack(common, 1);
11543
315k
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11544
315k
  return ccend;
11545
315k
  }
11546
11547
563k
if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
11548
30.2k
  {
11549
30.2k
  if (HAS_VIRTUAL_REGISTERS)
11550
0
    OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
11551
30.2k
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
11552
30.2k
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
11553
30.2k
  OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
11554
30.2k
  }
11555
11556
563k
return ccend;
11557
879k
}
11558
11559
static PCRE2_UCHAR then_trap_opcode[1] = { OP_THEN_TRAP };
11560
11561
static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
11562
201k
{
11563
201k
DEFINE_COMPILER;
11564
201k
backtrack_common *backtrack;
11565
201k
BOOL needs_control_head;
11566
201k
int size;
11567
11568
201k
PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
11569
201k
common->then_trap = BACKTRACK_AS(then_trap_backtrack);
11570
201k
BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
11571
201k
BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start);
11572
201k
BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head);
11573
11574
201k
size = BACKTRACK_AS(then_trap_backtrack)->framesize;
11575
201k
size = 3 + (size < 0 ? 0 : size);
11576
11577
201k
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11578
201k
allocate_stack(common, size);
11579
201k
if (size > 3)
11580
85.1k
  OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));
11581
116k
else
11582
116k
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
11583
201k
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start);
11584
201k
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap);
11585
201k
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0);
11586
11587
201k
size = BACKTRACK_AS(then_trap_backtrack)->framesize;
11588
201k
if (size >= 0)
11589
85.1k
  init_frame(common, cc, ccend, size - 1, 0);
11590
201k
}
11591
11592
static void compile_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
11593
11.7M
{
11594
11.7M
DEFINE_COMPILER;
11595
11.7M
backtrack_common *backtrack;
11596
11.7M
BOOL has_then_trap = FALSE;
11597
11.7M
then_trap_backtrack *save_then_trap = NULL;
11598
11.7M
size_t op_len;
11599
11600
11.7M
SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS));
11601
11602
11.7M
if (common->has_then && common->then_offsets[cc - common->start] != 0)
11603
201k
  {
11604
201k
  SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0);
11605
201k
  has_then_trap = TRUE;
11606
201k
  save_then_trap = common->then_trap;
11607
  /* Tail item on backtrack. */
11608
201k
  compile_then_trap_matchingpath(common, cc, ccend, parent);
11609
201k
  }
11610
11611
71.2M
while (cc < ccend)
11612
59.4M
  {
11613
59.4M
  switch(*cc)
11614
59.4M
    {
11615
145k
    case OP_SOD:
11616
178k
    case OP_SOM:
11617
279k
    case OP_NOT_WORD_BOUNDARY:
11618
356k
    case OP_WORD_BOUNDARY:
11619
460k
    case OP_EODN:
11620
634k
    case OP_EOD:
11621
985k
    case OP_DOLL:
11622
1.08M
    case OP_DOLLM:
11623
1.37M
    case OP_CIRC:
11624
1.44M
    case OP_CIRCM:
11625
1.49M
    case OP_NOT_UCP_WORD_BOUNDARY:
11626
1.52M
    case OP_UCP_WORD_BOUNDARY:
11627
1.52M
    cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);
11628
1.52M
    break;
11629
11630
98.0k
    case OP_NOT_DIGIT:
11631
263k
    case OP_DIGIT:
11632
456k
    case OP_NOT_WHITESPACE:
11633
757k
    case OP_WHITESPACE:
11634
875k
    case OP_NOT_WORDCHAR:
11635
1.14M
    case OP_WORDCHAR:
11636
1.35M
    case OP_ANY:
11637
1.41M
    case OP_ALLANY:
11638
1.41M
    case OP_ANYBYTE:
11639
1.61M
    case OP_NOTPROP:
11640
2.26M
    case OP_PROP:
11641
2.42M
    case OP_ANYNL:
11642
2.56M
    case OP_NOT_HSPACE:
11643
2.71M
    case OP_HSPACE:
11644
2.78M
    case OP_NOT_VSPACE:
11645
2.79M
    case OP_VSPACE:
11646
2.98M
    case OP_EXTUNI:
11647
3.00M
    case OP_NOT:
11648
3.01M
    case OP_NOTI:
11649
3.01M
    cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE);
11650
3.01M
    break;
11651
11652
24.2k
    case OP_SET_SOM:
11653
24.2k
    PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
11654
24.2k
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
11655
24.2k
    allocate_stack(common, 1);
11656
24.2k
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
11657
24.2k
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
11658
24.2k
    cc++;
11659
24.2k
    break;
11660
11661
14.0M
    case OP_CHAR:
11662
26.0M
    case OP_CHARI:
11663
26.0M
    if (common->mode == PCRE2_JIT_COMPLETE)
11664
26.0M
      cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);
11665
0
    else
11666
0
      cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE);
11667
26.0M
    break;
11668
11669
221k
    case OP_STAR:
11670
276k
    case OP_MINSTAR:
11671
569k
    case OP_PLUS:
11672
616k
    case OP_MINPLUS:
11673
1.00M
    case OP_QUERY:
11674
1.09M
    case OP_MINQUERY:
11675
1.11M
    case OP_UPTO:
11676
1.12M
    case OP_MINUPTO:
11677
1.20M
    case OP_EXACT:
11678
1.56M
    case OP_POSSTAR:
11679
2.10M
    case OP_POSPLUS:
11680
2.71M
    case OP_POSQUERY:
11681
2.78M
    case OP_POSUPTO:
11682
2.86M
    case OP_STARI:
11683
2.93M
    case OP_MINSTARI:
11684
3.05M
    case OP_PLUSI:
11685
3.15M
    case OP_MINPLUSI:
11686
3.26M
    case OP_QUERYI:
11687
3.44M
    case OP_MINQUERYI:
11688
3.45M
    case OP_UPTOI:
11689
3.47M
    case OP_MINUPTOI:
11690
3.50M
    case OP_EXACTI:
11691
3.64M
    case OP_POSSTARI:
11692
3.83M
    case OP_POSPLUSI:
11693
3.96M
    case OP_POSQUERYI:
11694
3.98M
    case OP_POSUPTOI:
11695
4.01M
    case OP_NOTSTAR:
11696
4.01M
    case OP_NOTMINSTAR:
11697
4.03M
    case OP_NOTPLUS:
11698
4.03M
    case OP_NOTMINPLUS:
11699
4.04M
    case OP_NOTQUERY:
11700
4.04M
    case OP_NOTMINQUERY:
11701
4.06M
    case OP_NOTUPTO:
11702
4.06M
    case OP_NOTMINUPTO:
11703
4.08M
    case OP_NOTEXACT:
11704
4.08M
    case OP_NOTPOSSTAR:
11705
4.09M
    case OP_NOTPOSPLUS:
11706
4.10M
    case OP_NOTPOSQUERY:
11707
4.11M
    case OP_NOTPOSUPTO:
11708
4.11M
    case OP_NOTSTARI:
11709
4.12M
    case OP_NOTMINSTARI:
11710
4.13M
    case OP_NOTPLUSI:
11711
4.13M
    case OP_NOTMINPLUSI:
11712
4.13M
    case OP_NOTQUERYI:
11713
4.14M
    case OP_NOTMINQUERYI:
11714
4.14M
    case OP_NOTUPTOI:
11715
4.15M
    case OP_NOTMINUPTOI:
11716
4.16M
    case OP_NOTEXACTI:
11717
4.16M
    case OP_NOTPOSSTARI:
11718
4.16M
    case OP_NOTPOSPLUSI:
11719
4.17M
    case OP_NOTPOSQUERYI:
11720
4.17M
    case OP_NOTPOSUPTOI:
11721
4.39M
    case OP_TYPESTAR:
11722
4.45M
    case OP_TYPEMINSTAR:
11723
4.95M
    case OP_TYPEPLUS:
11724
5.08M
    case OP_TYPEMINPLUS:
11725
5.25M
    case OP_TYPEQUERY:
11726
5.30M
    case OP_TYPEMINQUERY:
11727
5.45M
    case OP_TYPEUPTO:
11728
5.47M
    case OP_TYPEMINUPTO:
11729
5.57M
    case OP_TYPEEXACT:
11730
5.65M
    case OP_TYPEPOSSTAR:
11731
5.72M
    case OP_TYPEPOSPLUS:
11732
5.86M
    case OP_TYPEPOSQUERY:
11733
5.92M
    case OP_TYPEPOSUPTO:
11734
5.92M
    cc = compile_iterator_matchingpath(common, cc, parent, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);
11735
5.92M
    break;
11736
11737
206k
    case OP_CLASS:
11738
331k
    case OP_NCLASS:
11739
331k
    if (cc[1 + (32 / sizeof(PCRE2_UCHAR))] >= OP_CRSTAR && cc[1 + (32 / sizeof(PCRE2_UCHAR))] <= OP_CRPOSRANGE)
11740
194k
      cc = compile_iterator_matchingpath(common, cc, parent, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);
11741
137k
    else
11742
137k
      cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE);
11743
331k
    break;
11744
11745
0
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
11746
1.33M
    case OP_XCLASS:
11747
1.47M
    case OP_ECLASS:
11748
1.47M
    op_len = GET(cc, 1);
11749
1.47M
    if (cc[op_len] >= OP_CRSTAR && cc[op_len] <= OP_CRPOSRANGE)
11750
637k
      cc = compile_iterator_matchingpath(common, cc, parent, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);
11751
837k
    else
11752
837k
      cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE);
11753
1.47M
    break;
11754
0
#endif
11755
11756
452k
    case OP_REF:
11757
795k
    case OP_REFI:
11758
795k
    op_len = PRIV(OP_lengths)[*cc];
11759
795k
    if (cc[op_len] >= OP_CRSTAR && cc[op_len] <= OP_CRPOSRANGE)
11760
338k
      cc = compile_ref_iterator_matchingpath(common, cc, parent);
11761
456k
    else
11762
456k
      {
11763
456k
      compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE, FALSE);
11764
456k
      cc += op_len;
11765
456k
      }
11766
795k
    break;
11767
11768
0
    case OP_DNREF:
11769
0
    case OP_DNREFI:
11770
0
    op_len = PRIV(OP_lengths)[*cc];
11771
0
    if (cc[op_len] >= OP_CRSTAR && cc[op_len] <= OP_CRPOSRANGE)
11772
0
      cc = compile_ref_iterator_matchingpath(common, cc, parent);
11773
0
    else
11774
0
      {
11775
0
      compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);
11776
0
      compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE, FALSE);
11777
0
      cc += op_len;
11778
0
      }
11779
0
    break;
11780
11781
790k
    case OP_RECURSE:
11782
790k
    cc = compile_recurse_matchingpath(common, cc, parent);
11783
790k
    break;
11784
11785
8.47M
    case OP_CALLOUT:
11786
8.49M
    case OP_CALLOUT_STR:
11787
8.49M
    cc = compile_callout_matchingpath(common, cc, parent);
11788
8.49M
    break;
11789
11790
410k
    case OP_ASSERT:
11791
1.17M
    case OP_ASSERT_NOT:
11792
1.37M
    case OP_ASSERTBACK:
11793
1.99M
    case OP_ASSERTBACK_NOT:
11794
1.99M
    PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
11795
1.99M
    cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
11796
1.99M
    break;
11797
11798
568k
    case OP_BRAMINZERO:
11799
568k
    PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
11800
568k
    cc = bracketend(cc + 1);
11801
568k
    if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
11802
413k
      {
11803
413k
      allocate_stack(common, 1);
11804
413k
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11805
413k
      }
11806
154k
    else
11807
154k
      {
11808
154k
      allocate_stack(common, 2);
11809
154k
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
11810
154k
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
11811
154k
      }
11812
568k
    BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
11813
568k
    count_match(common);
11814
568k
    break;
11815
11816
2.30M
    case OP_ASSERT_NA:
11817
2.43M
    case OP_ASSERTBACK_NA:
11818
2.43M
    case OP_ASSERT_SCS:
11819
2.52M
    case OP_ONCE:
11820
2.94M
    case OP_SCRIPT_RUN:
11821
3.27M
    case OP_BRA:
11822
5.61M
    case OP_CBRA:
11823
5.65M
    case OP_COND:
11824
5.70M
    case OP_SBRA:
11825
5.81M
    case OP_SCBRA:
11826
5.82M
    case OP_SCOND:
11827
5.82M
    cc = compile_bracket_matchingpath(common, cc, parent);
11828
5.82M
    break;
11829
11830
1.13M
    case OP_BRAZERO:
11831
1.13M
    if (cc[1] > OP_ASSERTBACK_NOT)
11832
1.04M
      cc = compile_bracket_matchingpath(common, cc, parent);
11833
90.2k
    else
11834
90.2k
      {
11835
90.2k
      PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
11836
90.2k
      cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
11837
90.2k
      }
11838
1.13M
    break;
11839
11840
1.13M
    case OP_BRAPOS:
11841
82.0k
    case OP_CBRAPOS:
11842
184k
    case OP_SBRAPOS:
11843
263k
    case OP_SCBRAPOS:
11844
284k
    case OP_BRAPOSZERO:
11845
284k
    cc = compile_bracketpos_matchingpath(common, cc, parent);
11846
284k
    break;
11847
11848
54.2k
    case OP_MARK:
11849
54.2k
    PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
11850
54.2k
    SLJIT_ASSERT(common->mark_ptr != 0);
11851
54.2k
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
11852
54.2k
    allocate_stack(common, common->has_skip_arg ? 5 : 1);
11853
54.2k
    if (HAS_VIRTUAL_REGISTERS)
11854
0
      OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
11855
54.2k
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0);
11856
54.2k
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
11857
54.2k
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
11858
54.2k
    OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
11859
54.2k
    if (common->has_skip_arg)
11860
25.6k
      {
11861
25.6k
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11862
25.6k
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
11863
25.6k
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark);
11864
25.6k
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2));
11865
25.6k
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
11866
25.6k
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
11867
25.6k
      }
11868
54.2k
    cc += 1 + 2 + cc[1];
11869
54.2k
    break;
11870
11871
144k
    case OP_PRUNE:
11872
160k
    case OP_PRUNE_ARG:
11873
476k
    case OP_SKIP:
11874
532k
    case OP_SKIP_ARG:
11875
784k
    case OP_THEN:
11876
787k
    case OP_THEN_ARG:
11877
868k
    case OP_COMMIT:
11878
879k
    case OP_COMMIT_ARG:
11879
879k
    cc = compile_control_verb_matchingpath(common, cc, parent);
11880
879k
    break;
11881
11882
42.4k
    case OP_FAIL:
11883
195k
    case OP_ACCEPT:
11884
226k
    case OP_ASSERT_ACCEPT:
11885
226k
    cc = compile_fail_accept_matchingpath(common, cc, parent);
11886
226k
    break;
11887
11888
93.3k
    case OP_CLOSE:
11889
93.3k
    cc = compile_close_matchingpath(common, cc);
11890
93.3k
    break;
11891
11892
2.92k
    case OP_SKIPZERO:
11893
2.92k
    cc = bracketend(cc + 1);
11894
2.92k
    break;
11895
11896
0
    default:
11897
0
    SLJIT_UNREACHABLE();
11898
0
    return;
11899
59.4M
    }
11900
59.4M
  if (cc == NULL)
11901
0
    return;
11902
59.4M
  }
11903
11904
11.7M
if (has_then_trap)
11905
201k
  {
11906
  /* Head item on backtrack. */
11907
201k
  PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
11908
201k
  BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
11909
201k
  BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap;
11910
201k
  common->then_trap = save_then_trap;
11911
201k
  }
11912
11.7M
SLJIT_ASSERT(cc == ccend);
11913
11.7M
}
11914
11915
#undef PUSH_BACKTRACK
11916
#undef PUSH_BACKTRACK_NOVALUE
11917
#undef BACKTRACK_AS
11918
11919
#define COMPILE_BACKTRACKINGPATH(current) \
11920
8.59M
  do \
11921
8.59M
    { \
11922
8.59M
    compile_backtrackingpath(common, (current)); \
11923
8.59M
    if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
11924
8.59M
      return; \
11925
8.59M
    } \
11926
8.59M
  while (0)
11927
11928
16.2M
#define CURRENT_AS(type) ((type *)current)
11929
11930
static void compile_newline_move_back(compiler_common *common)
11931
181k
{
11932
181k
DEFINE_COMPILER;
11933
181k
struct sljit_jump *jump;
11934
11935
181k
OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11936
181k
jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, TMP2, 0);
11937
/* All newlines are single byte, or their last byte
11938
is not equal to CHAR_NL/CHAR_CR even if UTF is enabled. */
11939
181k
OP1(MOV_UCHAR, SLJIT_TMP_DEST_REG, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
11940
181k
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
11941
181k
OP2(SLJIT_SHL, SLJIT_TMP_DEST_REG, 0, SLJIT_TMP_DEST_REG, 0, SLJIT_IMM, 8);
11942
181k
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_TMP_DEST_REG, 0);
11943
181k
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_CR << 8 | CHAR_NL);
11944
181k
OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
11945
181k
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
11946
181k
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
11947
181k
#endif
11948
181k
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
11949
181k
JUMPHERE(jump);
11950
181k
}
11951
11952
static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
11953
3.80M
{
11954
3.80M
DEFINE_COMPILER;
11955
3.80M
PCRE2_SPTR cc = current->cc;
11956
3.80M
PCRE2_UCHAR opcode;
11957
3.80M
PCRE2_UCHAR type;
11958
3.80M
sljit_u32 max = 0, exact;
11959
3.80M
struct sljit_label *label = NULL;
11960
3.80M
struct sljit_jump *jump = NULL;
11961
3.80M
jump_list *jumplist = NULL;
11962
3.80M
PCRE2_SPTR end;
11963
3.80M
int private_data_ptr = PRIVATE_DATA(cc);
11964
3.80M
int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
11965
3.80M
int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
11966
3.80M
int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + SSIZE_OF(sw);
11967
11968
3.80M
cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
11969
11970
3.80M
switch(opcode)
11971
3.80M
  {
11972
1.77M
  case OP_STAR:
11973
2.04M
  case OP_UPTO:
11974
2.04M
  if (type == OP_EXTUNI)
11975
212k
    {
11976
212k
    SLJIT_ASSERT(private_data_ptr == 0);
11977
212k
    set_jumps(current->own_backtracks, LABEL());
11978
212k
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11979
212k
    free_stack(common, 1);
11980
212k
    CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
11981
212k
    }
11982
1.83M
  else
11983
1.83M
    {
11984
1.83M
    if (CURRENT_AS(char_iterator_backtrack)->charpos.charpos_enabled)
11985
534k
      {
11986
534k
      OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
11987
534k
      OP1(SLJIT_MOV, TMP2, 0, base, offset1);
11988
11989
534k
      jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
11990
534k
      label = LABEL();
11991
534k
      if (type == OP_ANYNL)
11992
36.2k
        compile_newline_move_back(common);
11993
534k
      move_back(common, NULL, TRUE);
11994
11995
534k
      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
11996
534k
      if (CURRENT_AS(char_iterator_backtrack)->charpos.othercasebit != 0)
11997
7.62k
        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->charpos.othercasebit);
11998
534k
      CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->charpos.chr, CURRENT_AS(char_iterator_backtrack)->matchingpath);
11999
      /* The range beginning must match, no need to compare. */
12000
534k
      JUMPTO(SLJIT_JUMP, label);
12001
12002
534k
      set_jumps(current->own_backtracks, LABEL());
12003
534k
      current->own_backtracks = NULL;
12004
534k
      }
12005
1.29M
    else
12006
1.29M
      {
12007
1.29M
      OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12008
12009
1.29M
      if (opcode == OP_STAR && exact == 1)
12010
776k
        {
12011
776k
        if (type == OP_ANYNL)
12012
84.9k
          {
12013
84.9k
          OP1(SLJIT_MOV, TMP2, 0, base, offset1);
12014
84.9k
          compile_newline_move_back(common);
12015
84.9k
          }
12016
12017
776k
        move_back(common, NULL, TRUE);
12018
776k
        jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1);
12019
776k
        }
12020
518k
      else
12021
518k
        {
12022
518k
        if (type == OP_ANYNL)
12023
60.7k
          {
12024
60.7k
          OP1(SLJIT_MOV, TMP2, 0, base, offset1);
12025
60.7k
          jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
12026
60.7k
          compile_newline_move_back(common);
12027
60.7k
          }
12028
457k
        else
12029
457k
          jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1);
12030
12031
518k
        move_back(common, NULL, TRUE);
12032
518k
        }
12033
12034
1.29M
      OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12035
1.29M
      JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12036
12037
1.29M
      set_jumps(current->own_backtracks, LABEL());
12038
1.29M
      }
12039
12040
1.83M
    JUMPHERE(jump);
12041
1.83M
    if (private_data_ptr == 0)
12042
125k
      free_stack(common, 2);
12043
1.83M
    }
12044
2.04M
  break;
12045
12046
734k
  case OP_QUERY:
12047
734k
  OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12048
734k
  OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12049
734k
  CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12050
734k
  jump = JUMP(SLJIT_JUMP);
12051
734k
  set_jumps(current->own_backtracks, LABEL());
12052
734k
  OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12053
734k
  OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12054
734k
  JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12055
734k
  JUMPHERE(jump);
12056
734k
  if (private_data_ptr == 0)
12057
71.4k
    free_stack(common, 1);
12058
734k
  break;
12059
12060
556k
  case OP_MINSTAR:
12061
556k
  OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12062
556k
  if (exact == 0)
12063
210k
    {
12064
210k
    compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12065
210k
    OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12066
210k
    }
12067
346k
  else if (exact > 1)
12068
6.28k
    OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1);
12069
12070
556k
  JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12071
556k
  set_jumps(exact > 0 ? current->own_backtracks : jumplist, LABEL());
12072
556k
  if (private_data_ptr == 0)
12073
41.8k
    free_stack(common, 1);
12074
556k
  break;
12075
12076
82.6k
  case OP_MINUPTO:
12077
82.6k
  OP1(SLJIT_MOV, TMP1, 0, base, offset1);
12078
82.6k
  OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12079
82.6k
  OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
12080
12081
82.6k
  if (exact == 0)
12082
62.2k
    {
12083
62.2k
    add_jump(compiler, &jumplist, JUMP(SLJIT_ZERO));
12084
12085
62.2k
    OP1(SLJIT_MOV, base, offset1, TMP1, 0);
12086
62.2k
    compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12087
62.2k
    OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12088
62.2k
    JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12089
12090
62.2k
    set_jumps(jumplist, LABEL());
12091
62.2k
    }
12092
20.4k
  else
12093
20.4k
    {
12094
20.4k
    if (exact > 1)
12095
16.5k
      OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1);
12096
20.4k
    OP1(SLJIT_MOV, base, offset1, TMP1, 0);
12097
20.4k
    JUMPTO(SLJIT_NOT_ZERO, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12098
12099
20.4k
    set_jumps(current->own_backtracks, LABEL());
12100
20.4k
    }
12101
12102
82.6k
  if (private_data_ptr == 0)
12103
7.30k
    free_stack(common, 2);
12104
82.6k
  break;
12105
12106
393k
  case OP_MINQUERY:
12107
393k
  OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12108
393k
  OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12109
12110
393k
  if (exact >= 1)
12111
23.3k
    {
12112
23.3k
    if (exact >= 2)
12113
21.3k
      OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1);
12114
23.3k
    CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12115
23.3k
    set_jumps(current->own_backtracks, LABEL());
12116
23.3k
    }
12117
370k
  else
12118
370k
    {
12119
370k
    jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
12120
370k
    compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12121
370k
    JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12122
370k
    set_jumps(jumplist, LABEL());
12123
370k
    JUMPHERE(jump);
12124
370k
    }
12125
12126
393k
  if (private_data_ptr == 0)
12127
32.3k
    free_stack(common, 1);
12128
393k
  break;
12129
12130
0
  default:
12131
0
  SLJIT_UNREACHABLE();
12132
0
  break;
12133
3.80M
  }
12134
3.80M
}
12135
12136
static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12137
338k
{
12138
338k
DEFINE_COMPILER;
12139
338k
PCRE2_SPTR cc = current->cc;
12140
338k
BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
12141
338k
PCRE2_UCHAR type;
12142
12143
338k
type = cc[PRIV(OP_lengths)[*cc]];
12144
12145
338k
if ((type & 0x1) == 0)
12146
196k
  {
12147
  /* Maximize case. */
12148
196k
  set_jumps(current->own_backtracks, LABEL());
12149
196k
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12150
196k
  free_stack(common, 1);
12151
196k
  CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
12152
196k
  return;
12153
196k
  }
12154
12155
142k
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12156
142k
CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
12157
142k
set_jumps(current->own_backtracks, LABEL());
12158
142k
free_stack(common, ref ? 2 : 3);
12159
142k
}
12160
12161
static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12162
790k
{
12163
790k
DEFINE_COMPILER;
12164
790k
recurse_entry *entry;
12165
12166
790k
if (!CURRENT_AS(recurse_backtrack)->inlined_pattern)
12167
776k
  {
12168
776k
  entry = CURRENT_AS(recurse_backtrack)->entry;
12169
776k
  if (entry->backtrack_label == NULL)
12170
478k
    add_jump(compiler, &entry->backtrack_calls, JUMP(SLJIT_FAST_CALL));
12171
297k
  else
12172
297k
    JUMPTO(SLJIT_FAST_CALL, entry->backtrack_label);
12173
776k
  CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(recurse_backtrack)->matchingpath);
12174
776k
  }
12175
14.3k
else
12176
14.3k
  compile_backtrackingpath(common, current->top);
12177
12178
790k
set_jumps(current->own_backtracks, LABEL());
12179
790k
}
12180
12181
static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12182
2.08M
{
12183
2.08M
DEFINE_COMPILER;
12184
2.08M
PCRE2_SPTR cc = current->cc;
12185
2.08M
PCRE2_UCHAR bra = OP_BRA;
12186
2.08M
struct sljit_jump *brajump = NULL;
12187
12188
2.08M
SLJIT_ASSERT(*cc != OP_BRAMINZERO);
12189
2.08M
if (*cc == OP_BRAZERO)
12190
90.2k
  {
12191
90.2k
  bra = *cc;
12192
90.2k
  cc++;
12193
90.2k
  }
12194
12195
2.08M
if (bra == OP_BRAZERO)
12196
90.2k
  {
12197
90.2k
  SLJIT_ASSERT(current->own_backtracks == NULL);
12198
90.2k
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12199
90.2k
  }
12200
12201
2.08M
if (CURRENT_AS(assert_backtrack)->framesize < 0)
12202
1.16M
  {
12203
1.16M
  set_jumps(current->own_backtracks, LABEL());
12204
12205
1.16M
  if (bra == OP_BRAZERO)
12206
61.8k
    {
12207
61.8k
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12208
61.8k
    CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
12209
61.8k
    free_stack(common, 1);
12210
61.8k
    }
12211
1.16M
  return;
12212
1.16M
  }
12213
12214
918k
if (bra == OP_BRAZERO)
12215
28.3k
  {
12216
28.3k
  if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
12217
14.4k
    {
12218
14.4k
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12219
14.4k
    CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
12220
14.4k
    free_stack(common, 1);
12221
14.4k
    return;
12222
14.4k
    }
12223
13.9k
  free_stack(common, 1);
12224
13.9k
  brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
12225
13.9k
  }
12226
12227
903k
if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
12228
273k
  {
12229
273k
  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr);
12230
273k
  add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12231
273k
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
12232
273k
  OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(assert_backtrack)->framesize - 1) * sizeof(sljit_sw));
12233
273k
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, TMP1, 0);
12234
12235
273k
  set_jumps(current->own_backtracks, LABEL());
12236
273k
  }
12237
630k
else
12238
630k
  set_jumps(current->own_backtracks, LABEL());
12239
12240
903k
if (bra == OP_BRAZERO)
12241
13.9k
  {
12242
  /* We know there is enough place on the stack. */
12243
13.9k
  OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
12244
13.9k
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12245
13.9k
  JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath);
12246
13.9k
  JUMPHERE(brajump);
12247
13.9k
  }
12248
903k
}
12249
12250
static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12251
7.39M
{
12252
7.39M
DEFINE_COMPILER;
12253
7.39M
int opcode, stacksize, alt_count, alt_max;
12254
7.39M
int offset = 0;
12255
7.39M
int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;
12256
7.39M
int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;
12257
7.39M
PCRE2_SPTR cc = current->cc;
12258
7.39M
PCRE2_SPTR ccbegin;
12259
7.39M
PCRE2_SPTR ccprev;
12260
7.39M
PCRE2_UCHAR bra = OP_BRA;
12261
7.39M
PCRE2_UCHAR ket;
12262
7.39M
const assert_backtrack *assert;
12263
7.39M
BOOL has_alternatives;
12264
7.39M
BOOL needs_control_head = FALSE;
12265
7.39M
BOOL has_vreverse;
12266
7.39M
struct sljit_jump *brazero = NULL;
12267
7.39M
struct sljit_jump *next_alt = NULL;
12268
7.39M
struct sljit_jump *once = NULL;
12269
7.39M
struct sljit_jump *cond = NULL;
12270
7.39M
struct sljit_label *rmin_label = NULL;
12271
7.39M
struct sljit_label *exact_label = NULL;
12272
7.39M
struct sljit_jump *mov_addr = NULL;
12273
12274
7.39M
if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
12275
1.57M
  {
12276
1.57M
  bra = *cc;
12277
1.57M
  cc++;
12278
1.57M
  }
12279
12280
7.39M
opcode = *cc;
12281
7.39M
ccbegin = bracketend(cc) - 1 - LINK_SIZE;
12282
7.39M
ket = *ccbegin;
12283
7.39M
if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0)
12284
100k
  {
12285
100k
  repeat_ptr = PRIVATE_DATA(ccbegin);
12286
100k
  repeat_type = PRIVATE_DATA(ccbegin + 2);
12287
100k
  repeat_count = PRIVATE_DATA(ccbegin + 3);
12288
100k
  SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0);
12289
100k
  if (repeat_type == OP_UPTO)
12290
61.0k
    ket = OP_KETRMAX;
12291
100k
  if (repeat_type == OP_MINUPTO)
12292
26.3k
    ket = OP_KETRMIN;
12293
100k
  }
12294
7.39M
ccbegin = cc;
12295
7.39M
cc += GET(cc, 1);
12296
7.39M
has_alternatives = *cc == OP_ALT;
12297
7.39M
if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12298
55.4k
  has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.no_capture != NULL;
12299
7.39M
if (opcode == OP_CBRA || opcode == OP_SCBRA)
12300
2.88M
  offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
12301
7.39M
if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
12302
0
  opcode = OP_SCOND;
12303
12304
7.39M
alt_max = has_alternatives ? no_alternatives(ccbegin) : 0;
12305
12306
/* Decoding the needs_control_head in framesize. */
12307
7.39M
if (opcode == OP_ONCE)
12308
138k
  {
12309
138k
  needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0;
12310
138k
  CURRENT_AS(bracket_backtrack)->u.framesize >>= 1;
12311
138k
  }
12312
12313
7.39M
if (ket != OP_KET && repeat_type != 0)
12314
87.4k
  {
12315
  /* TMP1 is used in OP_KETRMIN below. */
12316
87.4k
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12317
87.4k
  free_stack(common, 1);
12318
87.4k
  if (repeat_type == OP_UPTO)
12319
61.0k
    OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0, SLJIT_IMM, 1);
12320
26.3k
  else
12321
26.3k
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
12322
87.4k
  }
12323
12324
7.39M
if (ket == OP_KETRMAX)
12325
559k
  {
12326
559k
  if (bra == OP_BRAZERO)
12327
309k
    {
12328
309k
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12329
309k
    free_stack(common, 1);
12330
309k
    brazero = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12331
309k
    }
12332
559k
  }
12333
6.83M
else if (ket == OP_KETRMIN)
12334
243k
  {
12335
243k
  if (bra != OP_BRAMINZERO)
12336
89.3k
    {
12337
89.3k
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12338
89.3k
    if (repeat_type != 0)
12339
26.3k
      {
12340
      /* TMP1 was set a few lines above. */
12341
26.3k
      CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12342
      /* Drop STR_PTR for non-greedy plus quantifier. */
12343
26.3k
      if (opcode != OP_ONCE)
12344
25.9k
        free_stack(common, 1);
12345
26.3k
      }
12346
62.9k
    else if (opcode >= OP_SBRA || opcode == OP_ONCE)
12347
45.0k
      {
12348
      /* Checking zero-length iteration. */
12349
45.0k
      if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)
12350
42.3k
        CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12351
2.76k
      else
12352
2.76k
        {
12353
2.76k
        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12354
2.76k
        CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 2), CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12355
2.76k
        }
12356
      /* Drop STR_PTR for non-greedy plus quantifier. */
12357
45.0k
      if (opcode != OP_ONCE)
12358
41.1k
        free_stack(common, 1);
12359
45.0k
      }
12360
17.9k
    else
12361
17.9k
      JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12362
89.3k
    }
12363
243k
  rmin_label = LABEL();
12364
243k
  if (repeat_type != 0)
12365
26.3k
    OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
12366
243k
  }
12367
6.59M
else if (bra == OP_BRAZERO)
12368
731k
  {
12369
731k
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12370
731k
  free_stack(common, 1);
12371
731k
  brazero = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12372
731k
  }
12373
5.86M
else if (repeat_type == OP_EXACT)
12374
13.2k
  {
12375
13.2k
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
12376
13.2k
  exact_label = LABEL();
12377
13.2k
  }
12378
12379
7.39M
if (offset != 0)
12380
2.88M
  {
12381
2.88M
  if (common->capture_last_ptr != 0)
12382
400k
    {
12383
400k
    SLJIT_ASSERT(!is_optimized_cbracket(common, offset >> 1));
12384
400k
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12385
400k
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12386
400k
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
12387
400k
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
12388
400k
    free_stack(common, 3);
12389
400k
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP2, 0);
12390
400k
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
12391
400k
    }
12392
2.47M
  else if (!is_optimized_cbracket(common, offset >> 1))
12393
624k
    {
12394
624k
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12395
624k
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12396
624k
    free_stack(common, 2);
12397
624k
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12398
624k
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
12399
624k
    }
12400
2.88M
  }
12401
4.51M
else if (SLJIT_UNLIKELY(opcode == OP_ASSERT_SCS))
12402
0
  {
12403
0
  OP1(SLJIT_MOV, TMP1, 0, STR_END, 0);
12404
0
  OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
12405
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), TMP1, 0);
12406
12407
  /* Nested scs blocks will not update this variable. */
12408
0
  if (common->restore_end_ptr == 0)
12409
0
    common->restore_end_ptr = private_data_ptr + sizeof(sljit_sw);
12410
0
  }
12411
12412
7.39M
if (SLJIT_UNLIKELY(opcode == OP_ONCE))
12413
138k
  {
12414
138k
  int framesize = CURRENT_AS(bracket_backtrack)->u.framesize;
12415
12416
138k
  SLJIT_ASSERT(framesize != 0);
12417
138k
  if (framesize > 0)
12418
42.1k
    {
12419
42.1k
    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12420
42.1k
    add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12421
42.1k
    OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
12422
42.1k
    }
12423
138k
  once = JUMP(SLJIT_JUMP);
12424
138k
  }
12425
7.25M
else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12426
55.4k
  {
12427
55.4k
  if (has_alternatives)
12428
49.6k
    {
12429
    /* Always exactly one alternative. */
12430
49.6k
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12431
49.6k
    free_stack(common, 1);
12432
12433
49.6k
    alt_max = 2;
12434
49.6k
    next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12435
49.6k
    }
12436
55.4k
  }
12437
7.20M
else if (has_alternatives)
12438
697k
  {
12439
697k
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12440
697k
  free_stack(common, 1);
12441
12442
697k
  if (alt_max > 3)
12443
92.5k
    {
12444
92.5k
    sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);
12445
12446
92.5k
    SLJIT_ASSERT(CURRENT_AS(bracket_backtrack)->matching_mov_addr != NULL);
12447
92.5k
    sljit_set_label(CURRENT_AS(bracket_backtrack)->matching_mov_addr, LABEL());
12448
92.5k
    sljit_emit_op0(compiler, SLJIT_ENDBR);
12449
92.5k
    }
12450
604k
  else
12451
604k
    next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12452
697k
  }
12453
12454
7.39M
COMPILE_BACKTRACKINGPATH(current->top);
12455
7.39M
if (current->own_backtracks)
12456
3.72M
  set_jumps(current->own_backtracks, LABEL());
12457
12458
7.39M
if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12459
55.4k
  {
12460
  /* Conditional block always has at most one alternative. */
12461
55.4k
  if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
12462
26.7k
    {
12463
26.7k
    SLJIT_ASSERT(has_alternatives);
12464
26.7k
    assert = CURRENT_AS(bracket_backtrack)->u.assert;
12465
26.7k
    SLJIT_ASSERT(assert->framesize != 0);
12466
26.7k
    if (assert->framesize > 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
12467
11.4k
      {
12468
11.4k
      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
12469
11.4k
      add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12470
11.4k
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
12471
11.4k
      OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
12472
11.4k
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
12473
11.4k
      }
12474
26.7k
    cond = JUMP(SLJIT_JUMP);
12475
26.7k
    set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());
12476
26.7k
    }
12477
28.6k
  else if (CURRENT_AS(bracket_backtrack)->u.no_capture != NULL)
12478
22.8k
    {
12479
22.8k
    SLJIT_ASSERT(has_alternatives);
12480
22.8k
    cond = JUMP(SLJIT_JUMP);
12481
22.8k
    set_jumps(CURRENT_AS(bracket_backtrack)->u.no_capture, LABEL());
12482
22.8k
    }
12483
5.83k
  else
12484
5.83k
    SLJIT_ASSERT(!has_alternatives);
12485
55.4k
  }
12486
12487
7.39M
if (has_alternatives)
12488
805k
  {
12489
805k
  alt_count = 1;
12490
805k
  do
12491
1.19M
    {
12492
1.19M
    current->top = NULL;
12493
1.19M
    current->own_backtracks = NULL;
12494
1.19M
    current->simple_backtracks = NULL;
12495
    /* Conditional blocks always have an additional alternative, even if it is empty. */
12496
1.19M
    if (*cc == OP_ALT)
12497
1.15M
      {
12498
1.15M
      ccprev = cc + 1 + LINK_SIZE;
12499
1.15M
      cc += GET(cc, 1);
12500
12501
1.15M
      has_vreverse = FALSE;
12502
12503
1.15M
      switch (opcode)
12504
1.15M
        {
12505
0
        case OP_ASSERTBACK:
12506
67.5k
        case OP_ASSERTBACK_NA:
12507
67.5k
          SLJIT_ASSERT(private_data_ptr != 0);
12508
67.5k
          OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12509
12510
67.5k
          has_vreverse = (*ccprev == OP_VREVERSE);
12511
67.5k
          if (*ccprev == OP_REVERSE || has_vreverse)
12512
63.4k
            ccprev = compile_reverse_matchingpath(common, ccprev, current);
12513
67.5k
          break;
12514
0
        case OP_ASSERT_SCS:
12515
0
          OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
12516
0
          break;
12517
76.3k
        case OP_ONCE:
12518
76.3k
          OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0));
12519
76.3k
          break;
12520
7.15k
        case OP_COND:
12521
8.35k
        case OP_SCOND:
12522
8.35k
          break;
12523
1.00M
        default:
12524
1.00M
          if (private_data_ptr != 0)
12525
863k
            OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12526
141k
          else
12527
141k
            OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12528
1.00M
          break;
12529
1.15M
        }
12530
12531
1.15M
      compile_matchingpath(common, ccprev, cc, current);
12532
1.15M
      if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
12533
0
        return;
12534
12535
1.15M
      switch (opcode)
12536
1.15M
        {
12537
67.5k
        case OP_ASSERTBACK_NA:
12538
67.5k
          if (has_vreverse)
12539
47.2k
            {
12540
47.2k
            SLJIT_ASSERT(current->top != NULL && PRIVATE_DATA(ccbegin + 1));
12541
47.2k
            add_jump(compiler, &current->top->simple_backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
12542
47.2k
            }
12543
12544
67.5k
          if (PRIVATE_DATA(ccbegin + 1))
12545
66.7k
            OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
12546
67.5k
          break;
12547
345k
        case OP_ASSERT_NA:
12548
345k
          OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12549
345k
          break;
12550
331k
        case OP_SCRIPT_RUN:
12551
331k
          match_script_run_common(common, private_data_ptr, current);
12552
331k
          break;
12553
1.15M
        }
12554
1.15M
      }
12555
12556
    /* Instructions after the current alternative is successfully matched. */
12557
    /* There is a similar code in compile_bracket_matchingpath. */
12558
1.19M
    if (opcode == OP_ONCE)
12559
76.3k
      match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
12560
12561
1.19M
    stacksize = 0;
12562
1.19M
    if (repeat_type == OP_MINUPTO)
12563
2.31k
      {
12564
      /* We need to preserve the counter. TMP2 will be used below. */
12565
2.31k
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
12566
2.31k
      stacksize++;
12567
2.31k
      }
12568
1.19M
    if (ket != OP_KET || bra != OP_BRA)
12569
114k
      stacksize++;
12570
1.19M
    if (offset != 0)
12571
180k
      {
12572
180k
      if (common->capture_last_ptr != 0)
12573
11.4k
        stacksize++;
12574
180k
      if (!is_optimized_cbracket(common, offset >> 1))
12575
18.5k
        stacksize += 2;
12576
180k
      }
12577
1.19M
    if (opcode != OP_ONCE)
12578
1.12M
      stacksize++;
12579
12580
1.19M
    if (stacksize > 0)
12581
1.18M
      allocate_stack(common, stacksize);
12582
12583
1.19M
    stacksize = 0;
12584
1.19M
    if (repeat_type == OP_MINUPTO)
12585
2.31k
      {
12586
      /* TMP2 was set above. */
12587
2.31k
      OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
12588
2.31k
      stacksize++;
12589
2.31k
      }
12590
12591
1.19M
    if (ket != OP_KET || bra != OP_BRA)
12592
114k
      {
12593
114k
      if (ket != OP_KET)
12594
112k
        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
12595
1.47k
      else
12596
1.47k
        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
12597
114k
      stacksize++;
12598
114k
      }
12599
12600
1.19M
    if (offset != 0)
12601
180k
      stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
12602
12603
1.19M
    if (opcode != OP_ONCE)
12604
1.12M
      {
12605
1.12M
      if (alt_max <= 3)
12606
742k
        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count);
12607
380k
      else
12608
380k
        mov_addr = sljit_emit_op_addr(compiler, SLJIT_MOV_ADDR, SLJIT_MEM1(STACK_TOP), STACK(stacksize));
12609
1.12M
      }
12610
12611
1.19M
    if (offset != 0 && ket == OP_KETRMAX && is_optimized_cbracket(common, offset >> 1))
12612
23.2k
      {
12613
      /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */
12614
23.2k
      SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
12615
23.2k
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
12616
23.2k
      }
12617
12618
1.19M
    JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath);
12619
12620
1.19M
    if (opcode != OP_ONCE)
12621
1.12M
      {
12622
1.12M
      if (alt_max <= 3)
12623
742k
        {
12624
742k
        JUMPHERE(next_alt);
12625
742k
        alt_count++;
12626
742k
        if (alt_count < alt_max)
12627
88.1k
          {
12628
88.1k
          SLJIT_ASSERT(alt_count == 2 && alt_max == 3);
12629
88.1k
          next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);
12630
88.1k
          }
12631
742k
        }
12632
380k
      else
12633
380k
        {
12634
380k
        sljit_set_label(mov_addr, LABEL());
12635
380k
        sljit_emit_op0(compiler, SLJIT_ENDBR);
12636
380k
        }
12637
1.12M
      }
12638
12639
1.19M
    COMPILE_BACKTRACKINGPATH(current->top);
12640
1.19M
    if (current->own_backtracks)
12641
697k
      set_jumps(current->own_backtracks, LABEL());
12642
1.19M
    SLJIT_ASSERT(!current->simple_backtracks);
12643
1.19M
    }
12644
1.19M
  while (*cc == OP_ALT);
12645
12646
805k
  if (cond != NULL)
12647
49.6k
    {
12648
49.6k
    SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
12649
49.6k
    if (ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT)
12650
8.40k
      {
12651
8.40k
      assert = CURRENT_AS(bracket_backtrack)->u.assert;
12652
8.40k
      SLJIT_ASSERT(assert->framesize != 0);
12653
8.40k
      if (assert->framesize > 0)
12654
5.12k
        {
12655
5.12k
        OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
12656
5.12k
        add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12657
5.12k
        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
12658
5.12k
        OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
12659
5.12k
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
12660
5.12k
        }
12661
8.40k
      }
12662
49.6k
    JUMPHERE(cond);
12663
49.6k
    }
12664
12665
  /* Free the STR_PTR. */
12666
805k
  if (private_data_ptr == 0)
12667
104k
    free_stack(common, 1);
12668
805k
  }
12669
12670
7.39M
if (offset != 0)
12671
2.88M
  {
12672
  /* Using both tmp register is better for instruction scheduling. */
12673
2.88M
  if (is_optimized_cbracket(common, offset >> 1))
12674
1.85M
    {
12675
1.85M
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12676
1.85M
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12677
1.85M
    free_stack(common, 2);
12678
1.85M
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12679
1.85M
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
12680
1.85M
    }
12681
1.02M
  else
12682
1.02M
    {
12683
1.02M
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12684
1.02M
    free_stack(common, 1);
12685
1.02M
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
12686
1.02M
    }
12687
2.88M
  }
12688
4.51M
else if (opcode == OP_ASSERTBACK_NA && PRIVATE_DATA(ccbegin + 1))
12689
99.1k
  {
12690
99.1k
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12691
99.1k
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12692
99.1k
  OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
12693
99.1k
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
12694
99.1k
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), TMP2, 0);
12695
99.1k
  free_stack(common, 4);
12696
99.1k
  }
12697
4.41M
else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
12698
2.86M
  {
12699
2.86M
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));
12700
2.86M
  free_stack(common, 1);
12701
2.86M
  }
12702
1.54M
else if (opcode == OP_ASSERT_SCS)
12703
0
  {
12704
0
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12705
0
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12706
0
  OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
12707
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
12708
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), TMP2, 0);
12709
0
  free_stack(common, has_alternatives ? 3 : 2);
12710
12711
0
  set_jumps(CURRENT_AS(bracket_backtrack)->u.no_capture, LABEL());
12712
12713
  /* Nested scs blocks will not update this variable. */
12714
0
  if (common->restore_end_ptr == private_data_ptr + SSIZE_OF(sw))
12715
0
    common->restore_end_ptr = 0;
12716
0
  }
12717
1.54M
else if (opcode == OP_ONCE)
12718
138k
  {
12719
138k
  cc = ccbegin + GET(ccbegin, 1);
12720
138k
  stacksize = needs_control_head ? 1 : 0;
12721
12722
138k
  if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
12723
42.1k
    {
12724
    /* Reset head and drop saved frame. */
12725
42.1k
    stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1);
12726
42.1k
    }
12727
96.4k
  else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
12728
47.9k
    {
12729
    /* The STR_PTR must be released. */
12730
47.9k
    stacksize++;
12731
47.9k
    }
12732
12733
138k
  if (stacksize > 0)
12734
96.9k
    free_stack(common, stacksize);
12735
12736
138k
  JUMPHERE(once);
12737
  /* Restore previous private_data_ptr */
12738
138k
  if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
12739
42.1k
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 1));
12740
96.4k
  else if (ket == OP_KETRMIN)
12741
18.5k
    {
12742
18.5k
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12743
    /* See the comment below. */
12744
18.5k
    free_stack(common, 2);
12745
18.5k
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
12746
18.5k
    }
12747
138k
  }
12748
12749
7.39M
if (repeat_type == OP_EXACT)
12750
13.2k
  {
12751
13.2k
  OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
12752
13.2k
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
12753
13.2k
  CMPTO(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label);
12754
13.2k
  }
12755
7.38M
else if (ket == OP_KETRMAX)
12756
559k
  {
12757
559k
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12758
559k
  if (bra != OP_BRAZERO)
12759
250k
    free_stack(common, 1);
12760
12761
559k
  CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12762
559k
  if (bra == OP_BRAZERO)
12763
309k
    {
12764
309k
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12765
309k
    JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
12766
309k
    JUMPHERE(brazero);
12767
309k
    free_stack(common, 1);
12768
309k
    }
12769
559k
  }
12770
6.82M
else if (ket == OP_KETRMIN)
12771
243k
  {
12772
243k
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12773
12774
  /* OP_ONCE removes everything in case of a backtrack, so we don't
12775
  need to explicitly release the STR_PTR. The extra release would
12776
  affect badly the free_stack(2) above. */
12777
243k
  if (opcode != OP_ONCE)
12778
219k
    free_stack(common, 1);
12779
243k
  CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label);
12780
243k
  if (opcode == OP_ONCE)
12781
24.4k
    free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
12782
219k
  else if (bra == OP_BRAMINZERO)
12783
134k
    free_stack(common, 1);
12784
243k
  }
12785
6.57M
else if (bra == OP_BRAZERO)
12786
731k
  {
12787
731k
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12788
731k
  JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
12789
731k
  JUMPHERE(brazero);
12790
731k
  }
12791
7.39M
}
12792
12793
static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12794
284k
{
12795
284k
DEFINE_COMPILER;
12796
284k
int offset;
12797
284k
struct sljit_jump *jump;
12798
284k
PCRE2_SPTR cc;
12799
12800
/* No retry on backtrack, just drop everything. */
12801
284k
if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
12802
218k
  {
12803
218k
  cc = current->cc;
12804
12805
218k
  if (*cc == OP_BRAPOSZERO)
12806
19.5k
    cc++;
12807
12808
218k
  if (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS)
12809
113k
    {
12810
113k
    offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
12811
113k
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12812
113k
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12813
113k
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12814
113k
    if (common->capture_last_ptr != 0)
12815
8.76k
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
12816
113k
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
12817
113k
    if (common->capture_last_ptr != 0)
12818
8.76k
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
12819
113k
    }
12820
218k
  set_jumps(current->own_backtracks, LABEL());
12821
218k
  free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
12822
218k
  return;
12823
218k
  }
12824
12825
65.6k
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr);
12826
65.6k
add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12827
65.6k
OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracketpos_backtrack)->framesize - 1) * sizeof(sljit_sw));
12828
12829
65.6k
if (current->own_backtracks)
12830
64.4k
  {
12831
64.4k
  jump = JUMP(SLJIT_JUMP);
12832
64.4k
  set_jumps(current->own_backtracks, LABEL());
12833
  /* Drop the stack frame. */
12834
64.4k
  free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
12835
64.4k
  JUMPHERE(jump);
12836
64.4k
  }
12837
65.6k
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracketpos_backtrack)->framesize - 1));
12838
65.6k
}
12839
12840
static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12841
568k
{
12842
568k
assert_backtrack backtrack;
12843
12844
568k
current->top = NULL;
12845
568k
current->own_backtracks = NULL;
12846
568k
current->simple_backtracks = NULL;
12847
568k
if (current->cc[1] > OP_ASSERTBACK_NOT)
12848
531k
  {
12849
  /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */
12850
531k
  compile_bracket_matchingpath(common, current->cc, current);
12851
531k
  if (SLJIT_UNLIKELY(sljit_get_compiler_error(common->compiler)))
12852
0
    return;
12853
531k
  compile_bracket_backtrackingpath(common, current->top);
12854
531k
  }
12855
36.9k
else
12856
36.9k
  {
12857
36.9k
  memset(&backtrack, 0, sizeof(backtrack));
12858
36.9k
  backtrack.common.cc = current->cc;
12859
36.9k
  backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath;
12860
  /* Manual call of compile_assert_matchingpath. */
12861
36.9k
  compile_assert_matchingpath(common, current->cc, &backtrack, FALSE);
12862
36.9k
  if (SLJIT_UNLIKELY(sljit_get_compiler_error(common->compiler)))
12863
0
    return;
12864
36.9k
  }
12865
568k
SLJIT_ASSERT(!current->simple_backtracks && !current->own_backtracks);
12866
568k
}
12867
12868
static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12869
787k
{
12870
787k
DEFINE_COMPILER;
12871
787k
PCRE2_UCHAR opcode = *current->cc;
12872
787k
struct sljit_label *loop;
12873
787k
struct sljit_jump *jump;
12874
12875
787k
if (opcode == OP_THEN || opcode == OP_THEN_ARG)
12876
255k
  {
12877
255k
  if (common->then_trap != NULL)
12878
206k
    {
12879
206k
    SLJIT_ASSERT(common->control_head_ptr != 0);
12880
12881
206k
    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
12882
206k
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap);
12883
206k
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start);
12884
206k
    jump = JUMP(SLJIT_JUMP);
12885
12886
206k
    loop = LABEL();
12887
206k
    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12888
206k
    JUMPHERE(jump);
12889
206k
    CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0, loop);
12890
206k
    CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0, loop);
12891
206k
    add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP));
12892
206k
    return;
12893
206k
    }
12894
48.9k
  else if (!common->local_quit_available && common->in_positive_assertion)
12895
26.5k
    {
12896
26.5k
    add_jump(compiler, &common->positive_assertion_quit, JUMP(SLJIT_JUMP));
12897
26.5k
    return;
12898
26.5k
    }
12899
255k
  }
12900
12901
555k
if (common->restore_end_ptr != 0 && opcode != OP_SKIP_ARG)
12902
0
  OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->restore_end_ptr);
12903
12904
555k
if (common->local_quit_available)
12905
362k
  {
12906
  /* Abort match with a fail. */
12907
362k
  if (common->quit_label == NULL)
12908
362k
    add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
12909
0
  else
12910
0
    JUMPTO(SLJIT_JUMP, common->quit_label);
12911
362k
  return;
12912
362k
  }
12913
12914
192k
if (opcode == OP_SKIP_ARG)
12915
50.1k
  {
12916
50.1k
  SLJIT_ASSERT(common->control_head_ptr != 0 && TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
12917
50.1k
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
12918
50.1k
  OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2));
12919
50.1k
  sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_search_mark));
12920
12921
50.1k
  if (common->restore_end_ptr == 0)
12922
50.1k
    {
12923
50.1k
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_R0, 0);
12924
50.1k
    add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0));
12925
50.1k
    return;
12926
50.1k
    }
12927
12928
0
  jump = CMP(SLJIT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0);
12929
0
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_R0, 0);
12930
0
  OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->restore_end_ptr);
12931
0
  add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
12932
0
  JUMPHERE(jump);
12933
0
  return;
12934
50.1k
  }
12935
12936
142k
if (opcode == OP_SKIP)
12937
70.6k
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12938
71.4k
else
12939
71.4k
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0);
12940
142k
add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
12941
142k
}
12942
12943
static SLJIT_INLINE void compile_vreverse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12944
751k
{
12945
751k
DEFINE_COMPILER;
12946
751k
struct sljit_jump *jump;
12947
751k
struct sljit_label *label;
12948
12949
751k
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
12950
751k
jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(3));
12951
751k
skip_valid_char(common);
12952
751k
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0);
12953
751k
JUMPTO(SLJIT_JUMP, CURRENT_AS(vreverse_backtrack)->matchingpath);
12954
12955
751k
label = LABEL();
12956
751k
sljit_set_label(jump, label);
12957
751k
set_jumps(current->own_backtracks, label);
12958
751k
}
12959
12960
static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12961
403k
{
12962
403k
DEFINE_COMPILER;
12963
403k
struct sljit_jump *jump;
12964
403k
int framesize;
12965
403k
int size;
12966
12967
403k
if (CURRENT_AS(then_trap_backtrack)->then_trap)
12968
201k
  {
12969
201k
  common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap;
12970
201k
  return;
12971
201k
  }
12972
12973
201k
size = CURRENT_AS(then_trap_backtrack)->framesize;
12974
201k
size = 3 + (size < 0 ? 0 : size);
12975
12976
201k
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3));
12977
201k
free_stack(common, size);
12978
201k
jump = JUMP(SLJIT_JUMP);
12979
12980
201k
set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL());
12981
12982
201k
framesize = CURRENT_AS(then_trap_backtrack)->framesize;
12983
201k
SLJIT_ASSERT(framesize != 0);
12984
12985
/* STACK_TOP is set by THEN. */
12986
201k
if (framesize > 0)
12987
85.1k
  {
12988
85.1k
  add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12989
85.1k
  OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
12990
85.1k
  }
12991
201k
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12992
201k
free_stack(common, 3);
12993
12994
201k
JUMPHERE(jump);
12995
201k
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
12996
201k
}
12997
12998
static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12999
11.7M
{
13000
11.7M
DEFINE_COMPILER;
13001
11.7M
then_trap_backtrack *save_then_trap = common->then_trap;
13002
13003
37.3M
while (current)
13004
25.5M
  {
13005
25.5M
  if (current->simple_backtracks != NULL)
13006
15.1M
    set_jumps(current->simple_backtracks, LABEL());
13007
25.5M
  switch(*current->cc)
13008
25.5M
    {
13009
24.2k
    case OP_SET_SOM:
13010
24.2k
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13011
24.2k
    free_stack(common, 1);
13012
24.2k
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP1, 0);
13013
24.2k
    break;
13014
13015
221k
    case OP_STAR:
13016
276k
    case OP_MINSTAR:
13017
569k
    case OP_PLUS:
13018
616k
    case OP_MINPLUS:
13019
1.00M
    case OP_QUERY:
13020
1.09M
    case OP_MINQUERY:
13021
1.11M
    case OP_UPTO:
13022
1.12M
    case OP_MINUPTO:
13023
1.12M
    case OP_EXACT:
13024
1.12M
    case OP_POSSTAR:
13025
1.12M
    case OP_POSPLUS:
13026
1.12M
    case OP_POSQUERY:
13027
1.12M
    case OP_POSUPTO:
13028
1.21M
    case OP_STARI:
13029
1.27M
    case OP_MINSTARI:
13030
1.40M
    case OP_PLUSI:
13031
1.50M
    case OP_MINPLUSI:
13032
1.61M
    case OP_QUERYI:
13033
1.78M
    case OP_MINQUERYI:
13034
1.80M
    case OP_UPTOI:
13035
1.81M
    case OP_MINUPTOI:
13036
1.81M
    case OP_EXACTI:
13037
1.81M
    case OP_POSSTARI:
13038
1.81M
    case OP_POSPLUSI:
13039
1.81M
    case OP_POSQUERYI:
13040
1.81M
    case OP_POSUPTOI:
13041
1.83M
    case OP_NOTSTAR:
13042
1.84M
    case OP_NOTMINSTAR:
13043
1.86M
    case OP_NOTPLUS:
13044
1.86M
    case OP_NOTMINPLUS:
13045
1.87M
    case OP_NOTQUERY:
13046
1.87M
    case OP_NOTMINQUERY:
13047
1.89M
    case OP_NOTUPTO:
13048
1.89M
    case OP_NOTMINUPTO:
13049
1.89M
    case OP_NOTEXACT:
13050
1.89M
    case OP_NOTPOSSTAR:
13051
1.89M
    case OP_NOTPOSPLUS:
13052
1.89M
    case OP_NOTPOSQUERY:
13053
1.89M
    case OP_NOTPOSUPTO:
13054
1.90M
    case OP_NOTSTARI:
13055
1.91M
    case OP_NOTMINSTARI:
13056
1.91M
    case OP_NOTPLUSI:
13057
1.91M
    case OP_NOTMINPLUSI:
13058
1.92M
    case OP_NOTQUERYI:
13059
1.92M
    case OP_NOTMINQUERYI:
13060
1.92M
    case OP_NOTUPTOI:
13061
1.93M
    case OP_NOTMINUPTOI:
13062
1.93M
    case OP_NOTEXACTI:
13063
1.93M
    case OP_NOTPOSSTARI:
13064
1.93M
    case OP_NOTPOSPLUSI:
13065
1.93M
    case OP_NOTPOSQUERYI:
13066
1.93M
    case OP_NOTPOSUPTOI:
13067
2.15M
    case OP_TYPESTAR:
13068
2.21M
    case OP_TYPEMINSTAR:
13069
2.71M
    case OP_TYPEPLUS:
13070
2.85M
    case OP_TYPEMINPLUS:
13071
3.01M
    case OP_TYPEQUERY:
13072
3.07M
    case OP_TYPEMINQUERY:
13073
3.21M
    case OP_TYPEUPTO:
13074
3.23M
    case OP_TYPEMINUPTO:
13075
3.23M
    case OP_TYPEEXACT:
13076
3.23M
    case OP_TYPEPOSSTAR:
13077
3.23M
    case OP_TYPEPOSPLUS:
13078
3.23M
    case OP_TYPEPOSQUERY:
13079
3.23M
    case OP_TYPEPOSUPTO:
13080
    /* Since classes has no backtracking path, this
13081
    backtrackingpath was pushed by an iterator. */
13082
3.31M
    case OP_CLASS:
13083
3.38M
    case OP_NCLASS:
13084
3.38M
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
13085
3.77M
    case OP_XCLASS:
13086
3.80M
    case OP_ECLASS:
13087
3.80M
#endif
13088
3.80M
    compile_iterator_backtrackingpath(common, current);
13089
3.80M
    break;
13090
13091
195k
    case OP_REF:
13092
338k
    case OP_REFI:
13093
338k
    case OP_DNREF:
13094
338k
    case OP_DNREFI:
13095
338k
    compile_ref_iterator_backtrackingpath(common, current);
13096
338k
    break;
13097
13098
790k
    case OP_RECURSE:
13099
790k
    compile_recurse_backtrackingpath(common, current);
13100
790k
    break;
13101
13102
410k
    case OP_ASSERT:
13103
1.17M
    case OP_ASSERT_NOT:
13104
1.37M
    case OP_ASSERTBACK:
13105
1.99M
    case OP_ASSERTBACK_NOT:
13106
1.99M
    compile_assert_backtrackingpath(common, current);
13107
1.99M
    break;
13108
13109
2.30M
    case OP_ASSERT_NA:
13110
2.43M
    case OP_ASSERTBACK_NA:
13111
2.43M
    case OP_ASSERT_SCS:
13112
2.52M
    case OP_ONCE:
13113
2.94M
    case OP_SCRIPT_RUN:
13114
3.27M
    case OP_BRA:
13115
5.61M
    case OP_CBRA:
13116
5.65M
    case OP_COND:
13117
5.70M
    case OP_SBRA:
13118
5.81M
    case OP_SCBRA:
13119
5.82M
    case OP_SCOND:
13120
5.82M
    compile_bracket_backtrackingpath(common, current);
13121
5.82M
    break;
13122
13123
1.13M
    case OP_BRAZERO:
13124
1.13M
    if (current->cc[1] > OP_ASSERTBACK_NOT)
13125
1.04M
      compile_bracket_backtrackingpath(common, current);
13126
90.2k
    else
13127
90.2k
      compile_assert_backtrackingpath(common, current);
13128
1.13M
    break;
13129
13130
49.9k
    case OP_BRAPOS:
13131
82.0k
    case OP_CBRAPOS:
13132
184k
    case OP_SBRAPOS:
13133
263k
    case OP_SCBRAPOS:
13134
284k
    case OP_BRAPOSZERO:
13135
284k
    compile_bracketpos_backtrackingpath(common, current);
13136
284k
    break;
13137
13138
568k
    case OP_BRAMINZERO:
13139
568k
    compile_braminzero_backtrackingpath(common, current);
13140
568k
    break;
13141
13142
54.2k
    case OP_MARK:
13143
54.2k
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0));
13144
54.2k
    if (common->has_skip_arg)
13145
25.6k
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13146
54.2k
    free_stack(common, common->has_skip_arg ? 5 : 1);
13147
54.2k
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
13148
54.2k
    if (common->has_skip_arg)
13149
25.6k
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
13150
54.2k
    break;
13151
13152
251k
    case OP_THEN:
13153
255k
    case OP_THEN_ARG:
13154
399k
    case OP_PRUNE:
13155
415k
    case OP_PRUNE_ARG:
13156
731k
    case OP_SKIP:
13157
787k
    case OP_SKIP_ARG:
13158
787k
    compile_control_verb_backtrackingpath(common, current);
13159
787k
    break;
13160
13161
80.4k
    case OP_COMMIT:
13162
91.0k
    case OP_COMMIT_ARG:
13163
91.0k
    if (common->restore_end_ptr != 0)
13164
0
      OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->restore_end_ptr);
13165
13166
91.0k
    if (!common->local_quit_available)
13167
60.6k
      OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13168
13169
91.0k
    if (common->quit_label == NULL)
13170
39.9k
      add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
13171
51.1k
    else
13172
51.1k
      JUMPTO(SLJIT_JUMP, common->quit_label);
13173
91.0k
    break;
13174
13175
8.47M
    case OP_CALLOUT:
13176
8.49M
    case OP_CALLOUT_STR:
13177
8.53M
    case OP_FAIL:
13178
8.68M
    case OP_ACCEPT:
13179
8.71M
    case OP_ASSERT_ACCEPT:
13180
8.71M
    set_jumps(current->own_backtracks, LABEL());
13181
8.71M
    break;
13182
13183
751k
    case OP_VREVERSE:
13184
751k
    compile_vreverse_backtrackingpath(common, current);
13185
751k
    break;
13186
13187
403k
    case OP_THEN_TRAP:
13188
    /* A virtual opcode for then traps. */
13189
403k
    compile_then_trap_backtrackingpath(common, current);
13190
403k
    break;
13191
13192
0
    default:
13193
0
    SLJIT_UNREACHABLE();
13194
0
    break;
13195
25.5M
    }
13196
25.5M
  current = current->prev;
13197
25.5M
  }
13198
11.7M
common->then_trap = save_then_trap;
13199
11.7M
}
13200
13201
static SLJIT_INLINE void compile_recurse(compiler_common *common)
13202
10.5k
{
13203
10.5k
DEFINE_COMPILER;
13204
10.5k
PCRE2_SPTR cc = common->start + common->currententry->start;
13205
10.5k
PCRE2_SPTR ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
13206
10.5k
PCRE2_SPTR ccend = bracketend(cc) - (1 + LINK_SIZE);
13207
10.5k
uint32_t recurse_flags = 0;
13208
10.5k
int private_data_size = get_recurse_data_length(common, ccbegin, ccend, &recurse_flags);
13209
10.5k
int alt_count, alt_max, local_size;
13210
10.5k
backtrack_common altbacktrack;
13211
10.5k
jump_list *match = NULL;
13212
10.5k
struct sljit_jump *next_alt = NULL;
13213
10.5k
struct sljit_jump *accept_exit = NULL;
13214
10.5k
struct sljit_label *quit;
13215
10.5k
struct sljit_jump *mov_addr = NULL;
13216
13217
/* Recurse captures then. */
13218
10.5k
common->then_trap = NULL;
13219
13220
10.5k
SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
13221
13222
10.5k
alt_max = no_alternatives(cc);
13223
10.5k
alt_count = 0;
13224
13225
/* Matching path. */
13226
10.5k
SLJIT_ASSERT(common->currententry->entry_label == NULL && common->recursive_head_ptr != 0);
13227
10.5k
common->currententry->entry_label = LABEL();
13228
10.5k
set_jumps(common->currententry->entry_calls, common->currententry->entry_label);
13229
13230
10.5k
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, TMP2, 0);
13231
10.5k
count_match(common);
13232
13233
10.5k
local_size = (alt_max > 1) ? 2 : 1;
13234
13235
/* (Reversed) stack layout:
13236
   [private data][return address][optional: str ptr] ... [optional: alternative index][recursive_head_ptr] */
13237
13238
10.5k
allocate_stack(common, private_data_size + local_size);
13239
/* Save return address. */
13240
10.5k
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP2, 0);
13241
13242
10.5k
copy_recurse_data(common, ccbegin, ccend, recurse_copy_from_global, local_size, private_data_size + local_size, recurse_flags);
13243
13244
/* This variable is saved and restored all time when we enter or exit from a recursive context. */
13245
10.5k
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0);
13246
13247
10.5k
if (recurse_flags & recurse_flag_control_head_found)
13248
1.04k
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
13249
13250
10.5k
if (alt_max > 1)
13251
3.80k
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
13252
13253
10.5k
memset(&altbacktrack, 0, sizeof(backtrack_common));
13254
10.5k
common->quit_label = NULL;
13255
10.5k
common->accept_label = NULL;
13256
10.5k
common->quit = NULL;
13257
10.5k
common->accept = NULL;
13258
10.5k
altbacktrack.cc = ccbegin;
13259
10.5k
cc += GET(cc, 1);
13260
39.6k
while (1)
13261
39.6k
  {
13262
39.6k
  altbacktrack.top = NULL;
13263
39.6k
  altbacktrack.own_backtracks = NULL;
13264
13265
39.6k
  if (altbacktrack.cc != ccbegin)
13266
29.1k
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13267
13268
39.6k
  compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack);
13269
39.6k
  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13270
0
    return;
13271
13272
39.6k
  allocate_stack(common, (alt_max > 1 || (recurse_flags & recurse_flag_accept_found)) ? 2 : 1);
13273
39.6k
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13274
13275
39.6k
  if (alt_max > 1 || (recurse_flags & recurse_flag_accept_found))
13276
33.1k
    {
13277
33.1k
    if (alt_max > 3)
13278
26.1k
      mov_addr = sljit_emit_op_addr(compiler, SLJIT_MOV_ADDR, SLJIT_MEM1(STACK_TOP), STACK(1));
13279
6.96k
    else
13280
6.96k
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_count);
13281
33.1k
    }
13282
13283
39.6k
  add_jump(compiler, &match, JUMP(SLJIT_JUMP));
13284
13285
39.6k
  if (alt_count == 0)
13286
10.5k
    {
13287
    /* Backtracking path entry. */
13288
10.5k
    SLJIT_ASSERT(common->currententry->backtrack_label == NULL);
13289
10.5k
    common->currententry->backtrack_label = LABEL();
13290
10.5k
    set_jumps(common->currententry->backtrack_calls, common->currententry->backtrack_label);
13291
13292
10.5k
    sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, TMP1, 0);
13293
13294
10.5k
    if (recurse_flags & recurse_flag_accept_found)
13295
561
      accept_exit = CMP(SLJIT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);
13296
13297
10.5k
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13298
    /* Save return address. */
13299
10.5k
    OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(local_size - 1), TMP1, 0);
13300
13301
10.5k
    copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, recurse_flags);
13302
13303
10.5k
    if (alt_max > 1)
13304
3.80k
      {
13305
3.80k
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13306
3.80k
      free_stack(common, 2);
13307
13308
3.80k
      if (alt_max > 3)
13309
962
        {
13310
962
        sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);
13311
962
        sljit_set_label(mov_addr, LABEL());
13312
962
        sljit_emit_op0(compiler, SLJIT_ENDBR);
13313
962
        }
13314
2.84k
      else
13315
2.84k
        next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
13316
3.80k
      }
13317
6.69k
    else
13318
6.69k
      free_stack(common, (recurse_flags & recurse_flag_accept_found) ? 2 : 1);
13319
10.5k
    }
13320
29.1k
  else if (alt_max > 3)
13321
25.1k
    {
13322
25.1k
    sljit_set_label(mov_addr, LABEL());
13323
25.1k
    sljit_emit_op0(compiler, SLJIT_ENDBR);
13324
25.1k
    }
13325
3.91k
  else
13326
3.91k
    {
13327
3.91k
    JUMPHERE(next_alt);
13328
3.91k
    if (alt_count + 1 < alt_max)
13329
1.07k
      {
13330
1.07k
      SLJIT_ASSERT(alt_count == 1 && alt_max == 3);
13331
1.07k
      next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);
13332
1.07k
      }
13333
3.91k
    }
13334
13335
39.6k
  alt_count++;
13336
13337
39.6k
  compile_backtrackingpath(common, altbacktrack.top);
13338
39.6k
  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13339
0
    return;
13340
39.6k
  set_jumps(altbacktrack.own_backtracks, LABEL());
13341
13342
39.6k
  if (*cc != OP_ALT)
13343
10.5k
    break;
13344
13345
29.1k
  altbacktrack.cc = cc + 1 + LINK_SIZE;
13346
29.1k
  cc += GET(cc, 1);
13347
29.1k
  }
13348
13349
/* No alternative is matched. */
13350
13351
10.5k
quit = LABEL();
13352
13353
10.5k
copy_recurse_data(common, ccbegin, ccend, recurse_copy_private_to_global, local_size, private_data_size + local_size, recurse_flags);
13354
13355
10.5k
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
13356
10.5k
free_stack(common, private_data_size + local_size);
13357
10.5k
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
13358
10.5k
OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
13359
13360
10.5k
if (common->quit != NULL)
13361
1.39k
  {
13362
1.39k
  SLJIT_ASSERT(recurse_flags & recurse_flag_quit_found);
13363
13364
1.39k
  set_jumps(common->quit, LABEL());
13365
1.39k
  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13366
1.39k
  copy_recurse_data(common, ccbegin, ccend, recurse_copy_shared_to_global, local_size, private_data_size + local_size, recurse_flags);
13367
1.39k
  JUMPTO(SLJIT_JUMP, quit);
13368
1.39k
  }
13369
13370
10.5k
if (recurse_flags & recurse_flag_accept_found)
13371
561
  {
13372
561
  JUMPHERE(accept_exit);
13373
561
  free_stack(common, 2);
13374
13375
  /* Save return address. */
13376
561
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP1, 0);
13377
13378
561
  copy_recurse_data(common, ccbegin, ccend, recurse_copy_kept_shared_to_global, local_size, private_data_size + local_size, recurse_flags);
13379
13380
561
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
13381
561
  free_stack(common, private_data_size + local_size);
13382
561
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
13383
561
  OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
13384
561
  }
13385
13386
10.5k
if (common->accept != NULL)
13387
476
  {
13388
476
  SLJIT_ASSERT(recurse_flags & recurse_flag_accept_found);
13389
13390
476
  set_jumps(common->accept, LABEL());
13391
13392
476
  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13393
476
  OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0);
13394
13395
476
  allocate_stack(common, 2);
13396
476
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);
13397
476
  }
13398
13399
10.5k
set_jumps(match, LABEL());
13400
13401
10.5k
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
13402
13403
10.5k
copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, recurse_flags);
13404
13405
10.5k
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), STACK(local_size - 1));
13406
10.5k
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
13407
10.5k
OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
13408
10.5k
}
13409
13410
#undef COMPILE_BACKTRACKINGPATH
13411
#undef CURRENT_AS
13412
13413
#define PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS \
13414
68.0k
  (PCRE2_JIT_INVALID_UTF)
13415
13416
static int jit_compile(pcre2_code *code, sljit_u32 mode)
13417
68.0k
{
13418
68.0k
pcre2_real_code *re = (pcre2_real_code *)code;
13419
68.0k
struct sljit_compiler *compiler;
13420
68.0k
backtrack_common rootbacktrack;
13421
68.0k
compiler_common common_data;
13422
68.0k
compiler_common *common = &common_data;
13423
68.0k
const sljit_u8 *tables = re->tables;
13424
68.0k
void *allocator_data = &re->memctl;
13425
68.0k
int private_data_size;
13426
68.0k
PCRE2_SPTR ccend;
13427
68.0k
executable_functions *functions;
13428
68.0k
void *executable_func;
13429
68.0k
sljit_uw executable_size, private_data_length, total_length;
13430
68.0k
struct sljit_label *mainloop_label = NULL;
13431
68.0k
struct sljit_label *continue_match_label;
13432
68.0k
struct sljit_label *empty_match_found_label = NULL;
13433
68.0k
struct sljit_label *empty_match_backtrack_label = NULL;
13434
68.0k
struct sljit_label *reset_match_label;
13435
68.0k
struct sljit_label *quit_label;
13436
68.0k
struct sljit_jump *jump;
13437
68.0k
struct sljit_jump *minlength_check_failed = NULL;
13438
68.0k
struct sljit_jump *empty_match = NULL;
13439
68.0k
struct sljit_jump *end_anchor_failed = NULL;
13440
68.0k
jump_list *reqcu_not_found = NULL;
13441
13442
68.0k
SLJIT_ASSERT(tables);
13443
13444
#if HAS_VIRTUAL_REGISTERS == 1
13445
SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, TMP3) < 0 && sljit_get_register_index(SLJIT_GP_REGISTER, ARGUMENTS) < 0 && sljit_get_register_index(SLJIT_GP_REGISTER, RETURN_ADDR) < 0);
13446
#elif HAS_VIRTUAL_REGISTERS == 0
13447
68.0k
SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, TMP3) >= 0 && sljit_get_register_index(SLJIT_GP_REGISTER, ARGUMENTS) >= 0 && sljit_get_register_index(SLJIT_GP_REGISTER, RETURN_ADDR) >= 0);
13448
#else
13449
#error "Invalid value for HAS_VIRTUAL_REGISTERS"
13450
#endif
13451
13452
68.0k
memset(&rootbacktrack, 0, sizeof(backtrack_common));
13453
68.0k
memset(common, 0, sizeof(compiler_common));
13454
68.0k
common->re = re;
13455
68.0k
common->name_table = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
13456
68.0k
rootbacktrack.cc = (PCRE2_SPTR)((uint8_t *)re + re->code_start);
13457
13458
68.0k
#ifdef SUPPORT_UNICODE
13459
68.0k
common->invalid_utf = (mode & PCRE2_JIT_INVALID_UTF) != 0;
13460
68.0k
#endif /* SUPPORT_UNICODE */
13461
68.0k
mode &= ~PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS;
13462
13463
68.0k
common->start = rootbacktrack.cc;
13464
68.0k
common->read_only_data_head = NULL;
13465
68.0k
common->fcc = tables + fcc_offset;
13466
68.0k
common->lcc = (sljit_sw)(tables + lcc_offset);
13467
68.0k
common->mode = mode;
13468
68.0k
common->might_be_empty = (re->minlength == 0) || (re->flags & PCRE2_MATCH_EMPTY);
13469
68.0k
common->allow_empty_partial = (re->max_lookbehind > 0) || (re->flags & PCRE2_MATCH_EMPTY);
13470
68.0k
common->nltype = NLTYPE_FIXED;
13471
68.0k
switch(re->newline_convention)
13472
68.0k
  {
13473
9
  case PCRE2_NEWLINE_CR: common->newline = CHAR_CR; break;
13474
68.0k
  case PCRE2_NEWLINE_LF: common->newline = CHAR_NL; break;
13475
0
  case PCRE2_NEWLINE_CRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
13476
0
  case PCRE2_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
13477
0
  case PCRE2_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
13478
0
  case PCRE2_NEWLINE_NUL: common->newline = CHAR_NUL; break;
13479
0
  default: return PCRE2_ERROR_INTERNAL;
13480
68.0k
  }
13481
68.0k
common->nlmax = READ_CHAR_MAX;
13482
68.0k
common->nlmin = 0;
13483
68.0k
if (re->bsr_convention == PCRE2_BSR_UNICODE)
13484
68.0k
  common->bsr_nltype = NLTYPE_ANY;
13485
0
else if (re->bsr_convention == PCRE2_BSR_ANYCRLF)
13486
0
  common->bsr_nltype = NLTYPE_ANYCRLF;
13487
0
else
13488
0
  {
13489
#ifdef BSR_ANYCRLF
13490
  common->bsr_nltype = NLTYPE_ANYCRLF;
13491
#else
13492
0
  common->bsr_nltype = NLTYPE_ANY;
13493
0
#endif
13494
0
  }
13495
68.0k
common->bsr_nlmax = READ_CHAR_MAX;
13496
68.0k
common->bsr_nlmin = 0;
13497
68.0k
common->endonly = (re->overall_options & PCRE2_DOLLAR_ENDONLY) != 0;
13498
68.0k
common->ctypes = (sljit_sw)(tables + ctypes_offset);
13499
68.0k
common->name_count = re->name_count;
13500
68.0k
common->name_entry_size = re->name_entry_size;
13501
68.0k
common->unset_backref = (re->overall_options & PCRE2_MATCH_UNSET_BACKREF) != 0;
13502
68.0k
common->alt_circumflex = (re->overall_options & PCRE2_ALT_CIRCUMFLEX) != 0;
13503
68.0k
#ifdef SUPPORT_UNICODE
13504
/* PCRE2_UTF[16|32] have the same value as PCRE2_UTF8. */
13505
68.0k
common->utf = (re->overall_options & PCRE2_UTF) != 0;
13506
68.0k
common->ucp = (re->overall_options & PCRE2_UCP) != 0;
13507
68.0k
if (common->utf)
13508
19.8k
  {
13509
19.8k
  if (common->nltype == NLTYPE_ANY)
13510
0
    common->nlmax = 0x2029;
13511
19.8k
  else if (common->nltype == NLTYPE_ANYCRLF)
13512
0
    common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
13513
19.8k
  else
13514
19.8k
    {
13515
    /* We only care about the first newline character. */
13516
19.8k
    common->nlmax = common->newline & 0xff;
13517
19.8k
    }
13518
13519
19.8k
  if (common->nltype == NLTYPE_FIXED)
13520
19.8k
    common->nlmin = common->newline & 0xff;
13521
0
  else
13522
0
    common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
13523
13524
19.8k
  if (common->bsr_nltype == NLTYPE_ANY)
13525
19.8k
    common->bsr_nlmax = 0x2029;
13526
0
  else
13527
0
    common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
13528
19.8k
  common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
13529
19.8k
  }
13530
48.1k
else
13531
48.1k
  common->invalid_utf = FALSE;
13532
68.0k
#endif /* SUPPORT_UNICODE */
13533
68.0k
ccend = bracketend(common->start);
13534
13535
/* Calculate the local space size on the stack. */
13536
68.0k
common->ovector_start = LOCAL0;
13537
/* Allocate space for temporary data structures. */
13538
68.0k
private_data_length = ccend - common->start;
13539
/* The chance of overflow is very low, but might happen on 32 bit. */
13540
68.0k
if (private_data_length > ~(sljit_uw)0 / sizeof(sljit_s32))
13541
0
  return PCRE2_ERROR_NOMEMORY;
13542
13543
68.0k
private_data_length *= sizeof(sljit_s32);
13544
/* Align to 32 bit. */
13545
68.0k
common->cbracket_bitset_length = ((re->top_bracket + 1) + (sljit_u32)7) & ~(sljit_u32)7;
13546
68.0k
total_length = common->cbracket_bitset_length << 1;
13547
68.0k
if (~(sljit_uw)0 - private_data_length < total_length)
13548
0
  return PCRE2_ERROR_NOMEMORY;
13549
13550
68.0k
total_length += private_data_length;
13551
68.0k
common->private_data_ptrs = (sljit_s32*)SLJIT_MALLOC(total_length, allocator_data);
13552
68.0k
if (!common->private_data_ptrs)
13553
0
  return PCRE2_ERROR_NOMEMORY;
13554
13555
68.0k
memset(common->private_data_ptrs, 0, private_data_length);
13556
68.0k
common->optimized_cbrackets = ((sljit_u8 *)common->private_data_ptrs) + private_data_length;
13557
#if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1
13558
memset(common->optimized_cbrackets, 0, common->cbracket_bitset_length);
13559
#else
13560
68.0k
memset(common->optimized_cbrackets, 0xff, common->cbracket_bitset_length);
13561
68.0k
#endif
13562
68.0k
common->cbracket_bitset = common->optimized_cbrackets + common->cbracket_bitset_length;
13563
13564
68.0k
SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
13565
#if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2
13566
common->capture_last_ptr = common->ovector_start;
13567
common->ovector_start += sizeof(sljit_sw);
13568
#endif
13569
68.0k
if (!check_opcode_types(common, common->start, ccend))
13570
5.68k
  {
13571
5.68k
  SLJIT_FREE(common->private_data_ptrs, allocator_data);
13572
5.68k
  return PCRE2_ERROR_JIT_UNSUPPORTED;
13573
5.68k
  }
13574
13575
/* Checking flags and updating ovector_start. */
13576
62.3k
if (mode == PCRE2_JIT_COMPLETE &&
13577
62.3k
    (re->flags & PCRE2_LASTSET) != 0 &&
13578
62.3k
    (re->optimization_flags & PCRE2_OPTIM_START_OPTIMIZE) != 0)
13579
34.4k
  {
13580
34.4k
  common->req_char_ptr = common->ovector_start;
13581
34.4k
  common->ovector_start += sizeof(sljit_sw);
13582
34.4k
  }
13583
13584
62.3k
if (mode != PCRE2_JIT_COMPLETE)
13585
0
  {
13586
0
  common->start_used_ptr = common->ovector_start;
13587
0
  common->ovector_start += sizeof(sljit_sw);
13588
0
  if (mode == PCRE2_JIT_PARTIAL_SOFT)
13589
0
    {
13590
0
    common->hit_start = common->ovector_start;
13591
0
    common->ovector_start += sizeof(sljit_sw);
13592
0
    }
13593
0
  }
13594
13595
62.3k
if ((re->overall_options & (PCRE2_FIRSTLINE | PCRE2_USE_OFFSET_LIMIT)) != 0)
13596
13.3k
  {
13597
13.3k
  common->match_end_ptr = common->ovector_start;
13598
13.3k
  common->ovector_start += sizeof(sljit_sw);
13599
13.3k
  }
13600
13601
#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
13602
common->control_head_ptr = 1;
13603
#endif
13604
13605
62.3k
if (common->control_head_ptr != 0)
13606
2.44k
  {
13607
2.44k
  common->control_head_ptr = common->ovector_start;
13608
2.44k
  common->ovector_start += sizeof(sljit_sw);
13609
2.44k
  }
13610
13611
62.3k
if (common->has_set_som)
13612
320
  {
13613
  /* Saving the real start pointer is necessary. */
13614
320
  common->start_ptr = common->ovector_start;
13615
320
  common->ovector_start += sizeof(sljit_sw);
13616
320
  }
13617
13618
/* Aligning ovector to even number of sljit words. */
13619
62.3k
if ((common->ovector_start & sizeof(sljit_sw)) != 0)
13620
25.1k
  common->ovector_start += sizeof(sljit_sw);
13621
13622
62.3k
if (common->start_ptr == 0)
13623
62.0k
  common->start_ptr = OVECTOR(0);
13624
13625
/* Capturing brackets cannot be optimized if callouts are allowed. */
13626
62.3k
if (common->capture_last_ptr != 0)
13627
9.25k
  memset(common->optimized_cbrackets, 0, common->cbracket_bitset_length);
13628
13629
62.3k
SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
13630
62.3k
common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
13631
62.3k
private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
13632
13633
62.3k
if ((re->overall_options & PCRE2_ANCHORED) == 0 &&
13634
62.3k
    (re->optimization_flags & PCRE2_OPTIM_START_OPTIMIZE) != 0 &&
13635
62.3k
    !common->has_skip_in_assert_back)
13636
46.9k
  detect_early_fail(common, common->start, &private_data_size, 0, 0);
13637
13638
62.3k
set_private_data_ptrs(common, &private_data_size, ccend);
13639
13640
62.3k
SLJIT_ASSERT(common->early_fail_start_ptr <= common->early_fail_end_ptr);
13641
13642
62.3k
if (private_data_size > 65536)
13643
8
  {
13644
8
  SLJIT_FREE(common->private_data_ptrs, allocator_data);
13645
8
  return PCRE2_ERROR_JIT_UNSUPPORTED;
13646
8
  }
13647
13648
62.3k
if (common->has_then)
13649
1.99k
  {
13650
1.99k
  total_length = ccend - common->start;
13651
1.99k
  common->then_offsets = (sljit_u8 *)SLJIT_MALLOC(total_length, allocator_data);
13652
1.99k
  if (!common->then_offsets)
13653
0
    {
13654
0
    SLJIT_FREE(common->private_data_ptrs, allocator_data);
13655
0
    return PCRE2_ERROR_NOMEMORY;
13656
0
    }
13657
1.99k
  memset(common->then_offsets, 0, total_length);
13658
1.99k
  set_then_offsets(common, common->start, NULL);
13659
1.99k
  }
13660
13661
62.3k
compiler = sljit_create_compiler(allocator_data);
13662
62.3k
if (!compiler)
13663
0
  {
13664
0
  SLJIT_FREE(common->private_data_ptrs, allocator_data);
13665
0
  if (common->has_then)
13666
0
    SLJIT_FREE(common->then_offsets, allocator_data);
13667
0
  return PCRE2_ERROR_NOMEMORY;
13668
0
  }
13669
62.3k
common->compiler = compiler;
13670
13671
/* Main pcre2_jit_exec entry. */
13672
62.3k
SLJIT_ASSERT((private_data_size & (sizeof(sljit_sw) - 1)) == 0);
13673
62.3k
sljit_emit_enter(compiler, 0, SLJIT_ARGS1(W, W), 5 | SLJIT_ENTER_VECTOR(SLJIT_NUMBER_OF_SCRATCH_VECTOR_REGISTERS), 5, private_data_size);
13674
13675
/* Register init. */
13676
62.3k
reset_ovector(common, (re->top_bracket + 1) * 2);
13677
62.3k
if (common->req_char_ptr != 0)
13678
34.4k
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, SLJIT_R0, 0);
13679
13680
62.3k
OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_S0, 0);
13681
62.3k
OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0);
13682
62.3k
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
13683
62.3k
OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
13684
62.3k
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
13685
62.3k
OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));
13686
62.3k
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, end));
13687
62.3k
OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, start));
13688
62.3k
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
13689
62.3k
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0);
13690
13691
62.3k
if (common->early_fail_start_ptr < common->early_fail_end_ptr)
13692
12.6k
  reset_early_fail(common);
13693
13694
62.3k
if (mode == PCRE2_JIT_PARTIAL_SOFT)
13695
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
13696
62.3k
if (common->mark_ptr != 0)
13697
897
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
13698
62.3k
if (common->control_head_ptr != 0)
13699
2.44k
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
13700
13701
/* Main part of the matching */
13702
62.3k
if ((re->overall_options & PCRE2_ANCHORED) == 0)
13703
58.2k
  {
13704
58.2k
  mainloop_label = mainloop_entry(common);
13705
58.2k
  continue_match_label = LABEL();
13706
  /* Forward search if possible. */
13707
58.2k
  if ((re->optimization_flags & PCRE2_OPTIM_START_OPTIMIZE) != 0)
13708
47.1k
    {
13709
47.1k
    if (mode == PCRE2_JIT_COMPLETE && fast_forward_first_n_chars(common))
13710
19.5k
      ;
13711
27.5k
    else if ((re->flags & PCRE2_FIRSTSET) != 0)
13712
2.46k
      fast_forward_first_char(common);
13713
25.0k
    else if ((re->flags & PCRE2_STARTLINE) != 0)
13714
403
      fast_forward_newline(common);
13715
24.6k
    else if ((re->flags & PCRE2_FIRSTMAPSET) != 0)
13716
9.94k
      fast_forward_start_bits(common);
13717
47.1k
    }
13718
58.2k
  }
13719
4.11k
else
13720
4.11k
  continue_match_label = LABEL();
13721
13722
62.3k
if (mode == PCRE2_JIT_COMPLETE && re->minlength > 0 &&
13723
62.3k
    (re->optimization_flags & PCRE2_OPTIM_START_OPTIMIZE) != 0)
13724
45.1k
  {
13725
45.1k
  OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13726
45.1k
  OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(re->minlength));
13727
45.1k
  minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0);
13728
45.1k
  }
13729
62.3k
if (common->req_char_ptr != 0)
13730
34.4k
  reqcu_not_found = search_requested_char(common, (PCRE2_UCHAR)(re->last_codeunit), (re->flags & PCRE2_LASTCASELESS) != 0, (re->flags & PCRE2_FIRSTSET) != 0);
13731
13732
/* Store the current STR_PTR in OVECTOR(0). */
13733
62.3k
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
13734
/* Copy the limit of allowed recursions. */
13735
62.3k
OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH);
13736
62.3k
if (common->capture_last_ptr != 0)
13737
9.25k
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, 0);
13738
62.3k
if (common->fast_forward_bc_ptr != NULL)
13739
8.41k
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3, STR_PTR, 0);
13740
13741
62.3k
if (common->start_ptr != OVECTOR(0))
13742
320
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0);
13743
13744
/* Copy the beginning of the string. */
13745
62.3k
if (mode == PCRE2_JIT_PARTIAL_SOFT)
13746
0
  {
13747
0
  jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
13748
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
13749
0
  JUMPHERE(jump);
13750
0
  }
13751
62.3k
else if (mode == PCRE2_JIT_PARTIAL_HARD)
13752
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
13753
13754
62.3k
compile_matchingpath(common, common->start, ccend, &rootbacktrack);
13755
62.3k
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13756
0
  {
13757
0
  sljit_free_compiler(compiler);
13758
0
  SLJIT_FREE(common->private_data_ptrs, allocator_data);
13759
0
  if (common->has_then)
13760
0
    SLJIT_FREE(common->then_offsets, allocator_data);
13761
0
  PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
13762
0
  return PCRE2_ERROR_NOMEMORY;
13763
0
  }
13764
13765
62.3k
if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
13766
8.25k
  end_anchor_failed = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0);
13767
13768
62.3k
if (common->might_be_empty)
13769
17.6k
  {
13770
17.6k
  empty_match = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
13771
17.6k
  empty_match_found_label = LABEL();
13772
17.6k
  }
13773
13774
62.3k
common->accept_label = LABEL();
13775
62.3k
if (common->accept != NULL)
13776
602
  set_jumps(common->accept, common->accept_label);
13777
13778
/* This means we have a match. Update the ovector. */
13779
62.3k
copy_ovector(common, re->top_bracket + 1);
13780
62.3k
common->quit_label = common->abort_label = LABEL();
13781
62.3k
if (common->quit != NULL)
13782
93
  set_jumps(common->quit, common->quit_label);
13783
62.3k
if (common->abort != NULL)
13784
11.1k
  set_jumps(common->abort, common->abort_label);
13785
62.3k
if (minlength_check_failed != NULL)
13786
45.1k
  SET_LABEL(minlength_check_failed, common->abort_label);
13787
13788
62.3k
sljit_emit_op0(compiler, SLJIT_SKIP_FRAMES_BEFORE_RETURN);
13789
62.3k
sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);
13790
13791
62.3k
if (common->failed_match != NULL)
13792
32.3k
  {
13793
32.3k
  SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
13794
32.3k
  set_jumps(common->failed_match, LABEL());
13795
32.3k
  OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13796
32.3k
  JUMPTO(SLJIT_JUMP, common->abort_label);
13797
32.3k
  }
13798
13799
62.3k
if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
13800
8.25k
  JUMPHERE(end_anchor_failed);
13801
13802
62.3k
if (mode != PCRE2_JIT_COMPLETE)
13803
0
  {
13804
0
  common->partialmatchlabel = LABEL();
13805
0
  set_jumps(common->partialmatch, common->partialmatchlabel);
13806
0
  return_with_partial_match(common, common->quit_label);
13807
0
  }
13808
13809
62.3k
if (common->might_be_empty)
13810
17.6k
  empty_match_backtrack_label = LABEL();
13811
62.3k
compile_backtrackingpath(common, rootbacktrack.top);
13812
62.3k
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13813
0
  {
13814
0
  sljit_free_compiler(compiler);
13815
0
  SLJIT_FREE(common->private_data_ptrs, allocator_data);
13816
0
  if (common->has_then)
13817
0
    SLJIT_FREE(common->then_offsets, allocator_data);
13818
0
  PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
13819
0
  return PCRE2_ERROR_NOMEMORY;
13820
0
  }
13821
13822
62.3k
SLJIT_ASSERT(rootbacktrack.prev == NULL);
13823
62.3k
reset_match_label = LABEL();
13824
13825
62.3k
if (mode == PCRE2_JIT_PARTIAL_SOFT)
13826
0
  {
13827
  /* Update hit_start only in the first time. */
13828
0
  jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
13829
0
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
13830
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
13831
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, TMP1, 0);
13832
0
  JUMPHERE(jump);
13833
0
  }
13834
13835
/* Check we have remaining characters. */
13836
62.3k
if ((re->overall_options & PCRE2_ANCHORED) == 0 && common->match_end_ptr != 0)
13837
10.5k
  {
13838
10.5k
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
13839
10.5k
  }
13840
13841
62.3k
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP),
13842
62.3k
    (common->fast_forward_bc_ptr != NULL) ? (PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3) : common->start_ptr);
13843
13844
62.3k
if ((re->overall_options & PCRE2_ANCHORED) == 0)
13845
58.2k
  {
13846
58.2k
  if (common->ff_newline_shortcut != NULL)
13847
403
    {
13848
    /* There cannot be more newlines if PCRE2_FIRSTLINE is set. */
13849
403
    if ((re->overall_options & PCRE2_FIRSTLINE) == 0)
13850
344
      {
13851
344
      if (common->match_end_ptr != 0)
13852
20
        {
13853
20
        OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
13854
20
        OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
13855
20
        CMPTO(SLJIT_LESS, STR_PTR, 0, TMP1, 0, common->ff_newline_shortcut);
13856
20
        OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
13857
20
        }
13858
324
      else
13859
324
        CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut);
13860
344
      }
13861
403
    }
13862
57.8k
  else
13863
57.8k
    CMPTO(SLJIT_LESS, STR_PTR, 0, (common->match_end_ptr == 0) ? STR_END : TMP1, 0, mainloop_label);
13864
58.2k
  }
13865
13866
/* No more remaining characters. */
13867
62.3k
if (reqcu_not_found != NULL)
13868
34.4k
  set_jumps(reqcu_not_found, LABEL());
13869
13870
62.3k
if (mode == PCRE2_JIT_PARTIAL_SOFT)
13871
0
  CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);
13872
13873
62.3k
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13874
62.3k
JUMPTO(SLJIT_JUMP, common->quit_label);
13875
13876
62.3k
flush_stubs(common);
13877
13878
62.3k
if (common->might_be_empty)
13879
17.6k
  {
13880
17.6k
  JUMPHERE(empty_match);
13881
17.6k
  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
13882
17.6k
  OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
13883
17.6k
  OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
13884
17.6k
  JUMPTO(SLJIT_NOT_ZERO, empty_match_backtrack_label);
13885
17.6k
  OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
13886
17.6k
  JUMPTO(SLJIT_ZERO, empty_match_found_label);
13887
17.6k
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
13888
17.6k
  CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);
13889
17.6k
  JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);
13890
17.6k
  }
13891
13892
62.3k
common->fast_forward_bc_ptr = NULL;
13893
62.3k
common->early_fail_start_ptr = 0;
13894
62.3k
common->early_fail_end_ptr = 0;
13895
62.3k
common->currententry = common->entries;
13896
62.3k
common->local_quit_available = TRUE;
13897
62.3k
quit_label = common->quit_label;
13898
62.3k
SLJIT_ASSERT(common->restore_end_ptr == 0);
13899
13900
62.3k
if (common->currententry != NULL)
13901
6.78k
  {
13902
  /* A free bit for each private data. */
13903
6.78k
  common->recurse_bitset_size = ((private_data_size / SSIZE_OF(sw)) + 7) >> 3;
13904
6.78k
  SLJIT_ASSERT(common->recurse_bitset_size > 0);
13905
6.78k
  common->recurse_bitset = (sljit_u8*)SLJIT_MALLOC(common->recurse_bitset_size, allocator_data);;
13906
13907
6.78k
  if (common->recurse_bitset != NULL)
13908
6.78k
    {
13909
6.78k
    do
13910
10.5k
      {
13911
      /* Might add new entries. */
13912
10.5k
      compile_recurse(common);
13913
10.5k
      if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13914
0
        break;
13915
10.5k
      flush_stubs(common);
13916
10.5k
      common->currententry = common->currententry->next;
13917
10.5k
      }
13918
10.5k
    while (common->currententry != NULL);
13919
13920
6.78k
    SLJIT_FREE(common->recurse_bitset, allocator_data);
13921
6.78k
    }
13922
13923
6.78k
  if (common->currententry != NULL)
13924
0
    {
13925
    /* The common->recurse_bitset has been freed. */
13926
0
    SLJIT_ASSERT(sljit_get_compiler_error(compiler) || common->recurse_bitset == NULL);
13927
13928
0
    sljit_free_compiler(compiler);
13929
0
    SLJIT_FREE(common->private_data_ptrs, allocator_data);
13930
0
    if (common->has_then)
13931
0
      SLJIT_FREE(common->then_offsets, allocator_data);
13932
0
    PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
13933
0
    return PCRE2_ERROR_NOMEMORY;
13934
0
    }
13935
6.78k
  }
13936
13937
62.3k
common->local_quit_available = FALSE;
13938
62.3k
common->quit_label = quit_label;
13939
62.3k
SLJIT_ASSERT(common->restore_end_ptr == 0);
13940
13941
/* Allocating stack, returns with PCRE2_ERROR_JIT_STACKLIMIT if fails. */
13942
/* This is a (really) rare case. */
13943
62.3k
set_jumps(common->stackalloc, LABEL());
13944
/* RETURN_ADDR is not a saved register. */
13945
62.3k
SLJIT_ASSERT(common->locals_size >= 2 * SSIZE_OF(sw));
13946
62.3k
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCAL0);
13947
13948
62.3k
SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
13949
13950
62.3k
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL1, STR_PTR, 0);
13951
62.3k
OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
13952
62.3k
OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_LIMIT, 0, SLJIT_IMM, STACK_GROWTH_RATE);
13953
62.3k
OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, stack));
13954
62.3k
OP1(SLJIT_MOV, STACK_LIMIT, 0, TMP2, 0);
13955
13956
62.3k
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(sljit_stack_resize));
13957
13958
62.3k
jump = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
13959
62.3k
OP1(SLJIT_MOV, TMP2, 0, STACK_LIMIT, 0);
13960
62.3k
OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_RETURN_REG, 0);
13961
62.3k
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
13962
62.3k
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCAL1);
13963
62.3k
OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
13964
13965
/* Allocation failed. */
13966
62.3k
JUMPHERE(jump);
13967
/* We break the return address cache here, but this is a really rare case. */
13968
62.3k
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_JIT_STACKLIMIT);
13969
62.3k
JUMPTO(SLJIT_JUMP, common->quit_label);
13970
13971
/* Call limit reached. */
13972
62.3k
set_jumps(common->calllimit, LABEL());
13973
62.3k
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_MATCHLIMIT);
13974
62.3k
JUMPTO(SLJIT_JUMP, common->quit_label);
13975
13976
62.3k
if (common->revertframes != NULL)
13977
3.80k
  {
13978
3.80k
  set_jumps(common->revertframes, LABEL());
13979
3.80k
  do_revertframes(common);
13980
3.80k
  }
13981
62.3k
if (common->wordboundary != NULL)
13982
2.93k
  {
13983
2.93k
  set_jumps(common->wordboundary, LABEL());
13984
2.93k
  check_wordboundary(common, FALSE);
13985
2.93k
  }
13986
62.3k
if (common->ucp_wordboundary != NULL)
13987
1.10k
  {
13988
1.10k
  set_jumps(common->ucp_wordboundary, LABEL());
13989
1.10k
  check_wordboundary(common, TRUE);
13990
1.10k
  }
13991
62.3k
if (common->anynewline != NULL)
13992
3.72k
  {
13993
3.72k
  set_jumps(common->anynewline, LABEL());
13994
3.72k
  check_anynewline(common);
13995
3.72k
  }
13996
62.3k
if (common->hspace != NULL)
13997
10.1k
  {
13998
10.1k
  set_jumps(common->hspace, LABEL());
13999
10.1k
  check_hspace(common);
14000
10.1k
  }
14001
62.3k
if (common->vspace != NULL)
14002
4.27k
  {
14003
4.27k
  set_jumps(common->vspace, LABEL());
14004
4.27k
  check_vspace(common);
14005
4.27k
  }
14006
62.3k
if (common->casefulcmp != NULL)
14007
2.62k
  {
14008
2.62k
  set_jumps(common->casefulcmp, LABEL());
14009
2.62k
  do_casefulcmp(common);
14010
2.62k
  }
14011
62.3k
if (common->caselesscmp != NULL)
14012
400
  {
14013
400
  set_jumps(common->caselesscmp, LABEL());
14014
400
  do_caselesscmp(common);
14015
400
  }
14016
62.3k
if (common->reset_match != NULL || common->restart_match != NULL)
14017
2.75k
  {
14018
2.75k
  if (common->restart_match != NULL)
14019
155
    {
14020
155
    set_jumps(common->restart_match, LABEL());
14021
155
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
14022
155
    }
14023
14024
2.75k
  set_jumps(common->reset_match, LABEL());
14025
2.75k
  do_reset_match(common, (re->top_bracket + 1) * 2);
14026
  /* The value of restart_match is in TMP1. */
14027
2.75k
  CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);
14028
2.75k
  OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
14029
2.75k
  JUMPTO(SLJIT_JUMP, reset_match_label);
14030
2.75k
  }
14031
62.3k
#ifdef SUPPORT_UNICODE
14032
#if PCRE2_CODE_UNIT_WIDTH == 8
14033
if (common->utfreadchar != NULL)
14034
  {
14035
  set_jumps(common->utfreadchar, LABEL());
14036
  do_utfreadchar(common);
14037
  }
14038
if (common->utfreadtype8 != NULL)
14039
  {
14040
  set_jumps(common->utfreadtype8, LABEL());
14041
  do_utfreadtype8(common);
14042
  }
14043
if (common->utfpeakcharback != NULL)
14044
  {
14045
  set_jumps(common->utfpeakcharback, LABEL());
14046
  do_utfpeakcharback(common);
14047
  }
14048
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
14049
62.3k
#if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
14050
62.3k
if (common->utfreadchar_invalid != NULL)
14051
0
  {
14052
0
  set_jumps(common->utfreadchar_invalid, LABEL());
14053
0
  do_utfreadchar_invalid(common);
14054
0
  }
14055
62.3k
if (common->utfreadnewline_invalid != NULL)
14056
0
  {
14057
0
  set_jumps(common->utfreadnewline_invalid, LABEL());
14058
0
  do_utfreadnewline_invalid(common);
14059
0
  }
14060
62.3k
if (common->utfmoveback_invalid)
14061
0
  {
14062
0
  set_jumps(common->utfmoveback_invalid, LABEL());
14063
0
  do_utfmoveback_invalid(common);
14064
0
  }
14065
62.3k
if (common->utfpeakcharback_invalid)
14066
0
  {
14067
0
  set_jumps(common->utfpeakcharback_invalid, LABEL());
14068
0
  do_utfpeakcharback_invalid(common);
14069
0
  }
14070
62.3k
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16 */
14071
62.3k
if (common->getucd != NULL)
14072
1.05k
  {
14073
1.05k
  set_jumps(common->getucd, LABEL());
14074
1.05k
  do_getucd(common);
14075
1.05k
  }
14076
62.3k
if (common->getucdtype != NULL)
14077
1.10k
  {
14078
1.10k
  set_jumps(common->getucdtype, LABEL());
14079
1.10k
  do_getucdtype(common);
14080
1.10k
  }
14081
62.3k
#endif /* SUPPORT_UNICODE */
14082
14083
62.3k
SLJIT_FREE(common->private_data_ptrs, allocator_data);
14084
62.3k
if (common->has_then)
14085
1.99k
  SLJIT_FREE(common->then_offsets, allocator_data);
14086
14087
62.3k
executable_func = sljit_generate_code(compiler, 0, NULL);
14088
62.3k
executable_size = sljit_get_generated_code_size(compiler);
14089
62.3k
sljit_free_compiler(compiler);
14090
14091
62.3k
if (executable_func == NULL)
14092
0
  {
14093
0
  PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14094
0
  return PCRE2_ERROR_NOMEMORY;
14095
0
  }
14096
14097
/* Reuse the function descriptor if possible. */
14098
62.3k
if (re->executable_jit != NULL)
14099
0
  functions = (executable_functions *)re->executable_jit;
14100
62.3k
else
14101
62.3k
  {
14102
62.3k
  functions = SLJIT_MALLOC(sizeof(executable_functions), allocator_data);
14103
62.3k
  if (functions == NULL)
14104
0
    {
14105
    /* This case is highly unlikely since we just recently
14106
    freed a lot of memory. Not impossible though. */
14107
0
    sljit_free_code(executable_func, NULL);
14108
0
    PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14109
0
    return PCRE2_ERROR_NOMEMORY;
14110
0
    }
14111
62.3k
  memset(functions, 0, sizeof(executable_functions));
14112
62.3k
  functions->top_bracket = re->top_bracket + 1;
14113
62.3k
  functions->limit_match = re->limit_match;
14114
62.3k
  re->executable_jit = functions;
14115
62.3k
  }
14116
14117
/* Turn mode into an index. */
14118
62.3k
if (mode == PCRE2_JIT_COMPLETE)
14119
62.3k
  mode = 0;
14120
0
else
14121
0
  mode = (mode == PCRE2_JIT_PARTIAL_SOFT) ? 1 : 2;
14122
14123
62.3k
SLJIT_ASSERT(mode < JIT_NUMBER_OF_COMPILE_MODES);
14124
62.3k
functions->executable_funcs[mode] = executable_func;
14125
62.3k
functions->read_only_data_heads[mode] = common->read_only_data_head;
14126
62.3k
functions->executable_sizes[mode] = executable_size;
14127
62.3k
return 0;
14128
62.3k
}
14129
14130
#endif
14131
14132
/*************************************************
14133
*        JIT compile a Regular Expression        *
14134
*************************************************/
14135
14136
/* This function used JIT to convert a previously-compiled pattern into machine
14137
code.
14138
14139
Arguments:
14140
  code          a compiled pattern
14141
  options       JIT option bits
14142
14143
Returns:        0: success or (*NOJIT) was used
14144
               <0: an error code
14145
*/
14146
14147
#define PUBLIC_JIT_COMPILE_OPTIONS \
14148
68.0k
  (PCRE2_JIT_COMPLETE|PCRE2_JIT_PARTIAL_SOFT|PCRE2_JIT_PARTIAL_HARD|PCRE2_JIT_INVALID_UTF)
14149
14150
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
14151
pcre2_jit_compile(pcre2_code *code, uint32_t options)
14152
68.0k
{
14153
68.0k
pcre2_real_code *re = (pcre2_real_code *)code;
14154
68.0k
#ifdef SUPPORT_JIT
14155
68.0k
void *exec_memory;
14156
68.0k
executable_functions *functions;
14157
68.0k
static int executable_allocator_is_working = -1;
14158
14159
68.0k
if (executable_allocator_is_working == -1)
14160
1
  {
14161
  /* Checks whether the executable allocator is working. This check
14162
     might run multiple times in multi-threaded environments, but the
14163
     result should not be affected by it. */
14164
1
  exec_memory = SLJIT_MALLOC_EXEC(32, NULL);
14165
1
  if (exec_memory != NULL)
14166
1
    {
14167
1
    SLJIT_FREE_EXEC(((sljit_u8*)(exec_memory)) + SLJIT_EXEC_OFFSET(exec_memory), NULL);
14168
1
    executable_allocator_is_working = 1;
14169
1
    }
14170
0
  else executable_allocator_is_working = 0;
14171
1
  }
14172
68.0k
#endif
14173
14174
68.0k
if (options & PCRE2_JIT_TEST_ALLOC)
14175
0
  {
14176
0
  if (options != PCRE2_JIT_TEST_ALLOC)
14177
0
    return PCRE2_ERROR_JIT_BADOPTION;
14178
14179
0
#ifdef SUPPORT_JIT
14180
0
  return executable_allocator_is_working ? 0 : PCRE2_ERROR_NOMEMORY;
14181
#else
14182
  return PCRE2_ERROR_JIT_UNSUPPORTED;
14183
#endif
14184
0
  }
14185
14186
68.0k
if (code == NULL)
14187
0
  return PCRE2_ERROR_NULL;
14188
14189
68.0k
if ((options & ~PUBLIC_JIT_COMPILE_OPTIONS) != 0)
14190
0
  return PCRE2_ERROR_JIT_BADOPTION;
14191
14192
/* Support for invalid UTF was first introduced in JIT, with the option
14193
PCRE2_JIT_INVALID_UTF. Later, support was added to the interpreter, and the
14194
compile-time option PCRE2_MATCH_INVALID_UTF was created. This is now the
14195
preferred feature, with the earlier option deprecated. However, for backward
14196
compatibility, if the earlier option is set, it forces the new option so that
14197
if JIT matching falls back to the interpreter, there is still support for
14198
invalid UTF. However, if this function has already been successfully called
14199
without PCRE2_JIT_INVALID_UTF and without PCRE2_MATCH_INVALID_UTF (meaning that
14200
non-invalid-supporting JIT code was compiled), give an error.
14201
14202
If in the future support for PCRE2_JIT_INVALID_UTF is withdrawn, the following
14203
actions are needed:
14204
14205
  1. Remove the definition from pcre2.h.in and from the list in
14206
     PUBLIC_JIT_COMPILE_OPTIONS above.
14207
14208
  2. Replace PCRE2_JIT_INVALID_UTF with a local flag in this module.
14209
14210
  3. Replace PCRE2_JIT_INVALID_UTF in pcre2_jit_test.c.
14211
14212
  4. Delete the following short block of code. The setting of "re" and
14213
     "functions" can be moved into the JIT-only block below, but if that is
14214
     done, (void)re and (void)functions will be needed in the non-JIT case, to
14215
     avoid compiler warnings.
14216
*/
14217
14218
68.0k
#ifdef SUPPORT_JIT
14219
68.0k
functions = (executable_functions *)re->executable_jit;
14220
68.0k
#endif
14221
14222
68.0k
if ((options & PCRE2_JIT_INVALID_UTF) != 0)
14223
0
  {
14224
0
  if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) == 0)
14225
0
    {
14226
0
#ifdef SUPPORT_JIT
14227
0
    if (functions != NULL) return PCRE2_ERROR_JIT_BADOPTION;
14228
0
#endif
14229
0
    re->overall_options |= PCRE2_MATCH_INVALID_UTF;
14230
0
    }
14231
0
  }
14232
14233
/* The above tests are run with and without JIT support. This means that
14234
PCRE2_JIT_INVALID_UTF propagates back into the regex options (ensuring
14235
interpreter support) even in the absence of JIT. But now, if there is no JIT
14236
support, give an error return. */
14237
14238
#ifndef SUPPORT_JIT
14239
return PCRE2_ERROR_JIT_BADOPTION;
14240
#else  /* SUPPORT_JIT */
14241
14242
/* There is JIT support. Do the necessary. */
14243
14244
68.0k
if ((re->flags & PCRE2_NOJIT) != 0) return 0;
14245
14246
68.0k
if (!executable_allocator_is_working)
14247
0
  return PCRE2_ERROR_NOMEMORY;
14248
14249
68.0k
if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0)
14250
0
  options |= PCRE2_JIT_INVALID_UTF;
14251
14252
68.0k
if ((options & PCRE2_JIT_COMPLETE) != 0 && (functions == NULL
14253
68.0k
    || functions->executable_funcs[0] == NULL)) {
14254
68.0k
  uint32_t excluded_options = (PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_PARTIAL_HARD);
14255
68.0k
  int result = jit_compile(code, options & ~excluded_options);
14256
68.0k
  if (result != 0)
14257
5.69k
    return result;
14258
68.0k
  }
14259
14260
62.3k
if ((options & PCRE2_JIT_PARTIAL_SOFT) != 0 && (functions == NULL
14261
0
    || functions->executable_funcs[1] == NULL)) {
14262
0
  uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_HARD);
14263
0
  int result = jit_compile(code, options & ~excluded_options);
14264
0
  if (result != 0)
14265
0
    return result;
14266
0
  }
14267
14268
62.3k
if ((options & PCRE2_JIT_PARTIAL_HARD) != 0 && (functions == NULL
14269
0
    || functions->executable_funcs[2] == NULL)) {
14270
0
  uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT);
14271
0
  int result = jit_compile(code, options & ~excluded_options);
14272
0
  if (result != 0)
14273
0
    return result;
14274
0
  }
14275
14276
62.3k
return 0;
14277
14278
62.3k
#endif  /* SUPPORT_JIT */
14279
62.3k
}
14280
14281
/* JIT compiler uses an all-in-one approach. This improves security,
14282
   since the code generator functions are not exported. */
14283
14284
#define INCLUDED_FROM_PCRE2_JIT_COMPILE
14285
14286
#include "pcre2_jit_match_inc.h"
14287
#include "pcre2_jit_misc_inc.h"
14288
14289
/* End of pcre2_jit_compile.c */