Ruby  2.7.0p0(2019-12-25revision647ee6f091eafcce70ffb75ddf7e121e192ab217)
regcomp.c
Go to the documentation of this file.
1 /**********************************************************************
2  regcomp.c - Onigmo (Oniguruma-mod) (regular expression library)
3 **********************************************************************/
4 /*-
5  * Copyright (c) 2002-2013 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6  * Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  * notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  * notice, this list of conditions and the following disclaimer in the
16  * documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 #include "regparse.h"
32 
34 
35 extern OnigCaseFoldType
37 {
39 }
40 
41 extern int
43 {
44  OnigDefaultCaseFoldFlag = case_fold_flag;
45  return 0;
46 }
47 
48 
49 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
50 static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
51 #endif
52 
53 #if 0
54 static UChar*
55 str_dup(UChar* s, UChar* end)
56 {
57  ptrdiff_t len = end - s;
58 
59  if (len > 0) {
60  UChar* r = (UChar* )xmalloc(len + 1);
62  xmemcpy(r, s, len);
63  r[len] = (UChar )0;
64  return r;
65  }
66  else return NULL;
67 }
68 #endif
69 
70 static void
71 swap_node(Node* a, Node* b)
72 {
73  Node c;
74  c = *a; *a = *b; *b = c;
75 
76  if (NTYPE(a) == NT_STR) {
77  StrNode* sn = NSTR(a);
78  if (sn->capa == 0) {
79  size_t len = sn->end - sn->s;
80  sn->s = sn->buf;
81  sn->end = sn->s + len;
82  }
83  }
84 
85  if (NTYPE(b) == NT_STR) {
86  StrNode* sn = NSTR(b);
87  if (sn->capa == 0) {
88  size_t len = sn->end - sn->s;
89  sn->s = sn->buf;
90  sn->end = sn->s + len;
91  }
92  }
93 }
94 
95 static OnigDistance
96 distance_add(OnigDistance d1, OnigDistance d2)
97 {
100  else {
101  if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2;
102  else return ONIG_INFINITE_DISTANCE;
103  }
104 }
105 
106 static OnigDistance
107 distance_multiply(OnigDistance d, int m)
108 {
109  if (m == 0) return 0;
110 
111  if (d < ONIG_INFINITE_DISTANCE / m)
112  return d * m;
113  else
114  return ONIG_INFINITE_DISTANCE;
115 }
116 
117 static int
118 bitset_is_empty(BitSetRef bs)
119 {
120  int i;
121  for (i = 0; i < BITSET_SIZE; i++) {
122  if (bs[i] != 0) return 0;
123  }
124  return 1;
125 }
126 
127 #ifdef ONIG_DEBUG
128 static int
129 bitset_on_num(BitSetRef bs)
130 {
131  int i, n;
132 
133  n = 0;
134  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
135  if (BITSET_AT(bs, i)) n++;
136  }
137  return n;
138 }
139 #endif
140 
141 extern int
143 {
144  if (size <= 0) {
145  size = 0;
146  buf->p = NULL;
147  }
148  else {
149  buf->p = (UChar* )xmalloc(size);
150  if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
151  }
152 
153  buf->alloc = (unsigned int )size;
154  buf->used = 0;
155  return 0;
156 }
157 
158 
159 #ifdef USE_SUBEXP_CALL
160 
161 static int
162 unset_addr_list_init(UnsetAddrList* uslist, int size)
163 {
164  UnsetAddr* p;
165 
166  p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
168  uslist->num = 0;
169  uslist->alloc = size;
170  uslist->us = p;
171  return 0;
172 }
173 
174 static void
175 unset_addr_list_end(UnsetAddrList* uslist)
176 {
177  if (IS_NOT_NULL(uslist->us))
178  xfree(uslist->us);
179 }
180 
181 static int
182 unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
183 {
184  UnsetAddr* p;
185  int size;
186 
187  if (uslist->num >= uslist->alloc) {
188  size = uslist->alloc * 2;
189  p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size);
191  uslist->alloc = size;
192  uslist->us = p;
193  }
194 
195  uslist->us[uslist->num].offset = offset;
196  uslist->us[uslist->num].target = node;
197  uslist->num++;
198  return 0;
199 }
200 #endif /* USE_SUBEXP_CALL */
201 
202 
203 static int
204 add_opcode(regex_t* reg, int opcode)
205 {
206  BBUF_ADD1(reg, opcode);
207  return 0;
208 }
209 
210 #ifdef USE_COMBINATION_EXPLOSION_CHECK
211 static int
212 add_state_check_num(regex_t* reg, int num)
213 {
215 
217  return 0;
218 }
219 #endif
220 
221 static int
222 add_rel_addr(regex_t* reg, int addr)
223 {
224  RelAddrType ra = (RelAddrType )addr;
225 
226  BBUF_ADD(reg, &ra, SIZE_RELADDR);
227  return 0;
228 }
229 
230 static int
231 add_abs_addr(regex_t* reg, int addr)
232 {
233  AbsAddrType ra = (AbsAddrType )addr;
234 
235  BBUF_ADD(reg, &ra, SIZE_ABSADDR);
236  return 0;
237 }
238 
239 static int
240 add_length(regex_t* reg, OnigDistance len)
241 {
242  LengthType l = (LengthType )len;
243 
244  BBUF_ADD(reg, &l, SIZE_LENGTH);
245  return 0;
246 }
247 
248 static int
249 add_mem_num(regex_t* reg, int num)
250 {
251  MemNumType n = (MemNumType )num;
252 
253  BBUF_ADD(reg, &n, SIZE_MEMNUM);
254  return 0;
255 }
256 
257 #if 0
258 static int
259 add_pointer(regex_t* reg, void* addr)
260 {
261  PointerType ptr = (PointerType )addr;
262 
263  BBUF_ADD(reg, &ptr, SIZE_POINTER);
264  return 0;
265 }
266 #endif
267 
268 static int
269 add_option(regex_t* reg, OnigOptionType option)
270 {
271  BBUF_ADD(reg, &option, SIZE_OPTION);
272  return 0;
273 }
274 
275 static int
276 add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
277 {
278  int r;
279 
280  r = add_opcode(reg, opcode);
281  if (r) return r;
282  r = add_rel_addr(reg, addr);
283  return r;
284 }
285 
286 static int
287 add_bytes(regex_t* reg, UChar* bytes, OnigDistance len)
288 {
289  BBUF_ADD(reg, bytes, len);
290  return 0;
291 }
292 
293 static int
294 add_bitset(regex_t* reg, BitSetRef bs)
295 {
296  BBUF_ADD(reg, bs, SIZE_BITSET);
297  return 0;
298 }
299 
300 static int
301 add_opcode_option(regex_t* reg, int opcode, OnigOptionType option)
302 {
303  int r;
304 
305  r = add_opcode(reg, opcode);
306  if (r) return r;
307  r = add_option(reg, option);
308  return r;
309 }
310 
311 static int compile_length_tree(Node* node, regex_t* reg);
312 static int compile_tree(Node* node, regex_t* reg);
313 
314 
315 #define IS_NEED_STR_LEN_OP_EXACT(op) \
316  ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\
317  (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
318 
319 static int
320 select_str_opcode(int mb_len, OnigDistance byte_len, int ignore_case)
321 {
322  int op;
323  OnigDistance str_len = (byte_len + mb_len - 1) / mb_len;
324 
325  if (ignore_case) {
326  switch (str_len) {
327  case 1: op = OP_EXACT1_IC; break;
328  default: op = OP_EXACTN_IC; break;
329  }
330  }
331  else {
332  switch (mb_len) {
333  case 1:
334  switch (str_len) {
335  case 1: op = OP_EXACT1; break;
336  case 2: op = OP_EXACT2; break;
337  case 3: op = OP_EXACT3; break;
338  case 4: op = OP_EXACT4; break;
339  case 5: op = OP_EXACT5; break;
340  default: op = OP_EXACTN; break;
341  }
342  break;
343 
344  case 2:
345  switch (str_len) {
346  case 1: op = OP_EXACTMB2N1; break;
347  case 2: op = OP_EXACTMB2N2; break;
348  case 3: op = OP_EXACTMB2N3; break;
349  default: op = OP_EXACTMB2N; break;
350  }
351  break;
352 
353  case 3:
354  op = OP_EXACTMB3N;
355  break;
356 
357  default:
358  op = OP_EXACTMBN;
359  break;
360  }
361  }
362  return op;
363 }
364 
365 static int
366 compile_tree_empty_check(Node* node, regex_t* reg, int empty_info)
367 {
368  int r;
369  int saved_num_null_check = reg->num_null_check;
370 
371  if (empty_info != 0) {
372  r = add_opcode(reg, OP_NULL_CHECK_START);
373  if (r) return r;
374  r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */
375  if (r) return r;
376  reg->num_null_check++;
377  }
378 
379  r = compile_tree(node, reg);
380  if (r) return r;
381 
382  if (empty_info != 0) {
383  if (empty_info == NQ_TARGET_IS_EMPTY)
384  r = add_opcode(reg, OP_NULL_CHECK_END);
385  else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
386  r = add_opcode(reg, OP_NULL_CHECK_END_MEMST);
387  else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
388  r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH);
389 
390  if (r) return r;
391  r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */
392  }
393  return r;
394 }
395 
396 #ifdef USE_SUBEXP_CALL
397 static int
398 compile_call(CallNode* node, regex_t* reg)
399 {
400  int r;
401 
402  r = add_opcode(reg, OP_CALL);
403  if (r) return r;
404  r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
405  node->target);
406  if (r) return r;
407  r = add_abs_addr(reg, 0 /*dummy addr.*/);
408  return r;
409 }
410 #endif
411 
412 static int
413 compile_tree_n_times(Node* node, int n, regex_t* reg)
414 {
415  int i, r;
416 
417  for (i = 0; i < n; i++) {
418  r = compile_tree(node, reg);
419  if (r) return r;
420  }
421  return 0;
422 }
423 
424 static int
425 add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance byte_len,
426  regex_t* reg ARG_UNUSED, int ignore_case)
427 {
428  int len;
429  int op = select_str_opcode(mb_len, byte_len, ignore_case);
430 
431  len = SIZE_OPCODE;
432 
433  if (op == OP_EXACTMBN) len += SIZE_LENGTH;
434  if (IS_NEED_STR_LEN_OP_EXACT(op))
435  len += SIZE_LENGTH;
436 
437  len += (int )byte_len;
438  return len;
439 }
440 
441 static int
442 add_compile_string(UChar* s, int mb_len, OnigDistance byte_len,
443  regex_t* reg, int ignore_case)
444 {
445  int op = select_str_opcode(mb_len, byte_len, ignore_case);
446  add_opcode(reg, op);
447 
448  if (op == OP_EXACTMBN)
449  add_length(reg, mb_len);
450 
451  if (IS_NEED_STR_LEN_OP_EXACT(op)) {
452  if (op == OP_EXACTN_IC)
453  add_length(reg, byte_len);
454  else
455  add_length(reg, byte_len / mb_len);
456  }
457 
458  add_bytes(reg, s, byte_len);
459  return 0;
460 }
461 
462 
463 static int
464 compile_length_string_node(Node* node, regex_t* reg)
465 {
466  int rlen, r, len, prev_len, blen, ambig;
467  OnigEncoding enc = reg->enc;
468  UChar *p, *prev;
469  StrNode* sn;
470 
471  sn = NSTR(node);
472  if (sn->end <= sn->s)
473  return 0;
474 
475  ambig = NSTRING_IS_AMBIG(node);
476 
477  p = prev = sn->s;
478  prev_len = enclen(enc, p, sn->end);
479  p += prev_len;
480  blen = prev_len;
481  rlen = 0;
482 
483  for (; p < sn->end; ) {
484  len = enclen(enc, p, sn->end);
485  if (len == prev_len || ambig) {
486  blen += len;
487  }
488  else {
489  r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
490  rlen += r;
491  prev = p;
492  blen = len;
493  prev_len = len;
494  }
495  p += len;
496  }
497  r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
498  rlen += r;
499  return rlen;
500 }
501 
502 static int
503 compile_length_string_raw_node(StrNode* sn, regex_t* reg)
504 {
505  if (sn->end <= sn->s)
506  return 0;
507 
508  return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
509 }
510 
511 static int
512 compile_string_node(Node* node, regex_t* reg)
513 {
514  int r, len, prev_len, blen, ambig;
515  OnigEncoding enc = reg->enc;
516  UChar *p, *prev, *end;
517  StrNode* sn;
518 
519  sn = NSTR(node);
520  if (sn->end <= sn->s)
521  return 0;
522 
523  end = sn->end;
524  ambig = NSTRING_IS_AMBIG(node);
525 
526  p = prev = sn->s;
527  prev_len = enclen(enc, p, end);
528  p += prev_len;
529  blen = prev_len;
530 
531  for (; p < end; ) {
532  len = enclen(enc, p, end);
533  if (len == prev_len || ambig) {
534  blen += len;
535  }
536  else {
537  r = add_compile_string(prev, prev_len, blen, reg, ambig);
538  if (r) return r;
539 
540  prev = p;
541  blen = len;
542  prev_len = len;
543  }
544 
545  p += len;
546  }
547  return add_compile_string(prev, prev_len, blen, reg, ambig);
548 }
549 
550 static int
551 compile_string_raw_node(StrNode* sn, regex_t* reg)
552 {
553  if (sn->end <= sn->s)
554  return 0;
555 
556  return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
557 }
558 
559 static int
560 add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
561 {
562 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
563  add_length(reg, mbuf->used);
564  return add_bytes(reg, mbuf->p, mbuf->used);
565 #else
566  int r, pad_size;
568 
569  GET_ALIGNMENT_PAD_SIZE(p, pad_size);
570  add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
571  if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
572 
573  r = add_bytes(reg, mbuf->p, mbuf->used);
574 
575  /* padding for return value from compile_length_cclass_node() to be fix. */
576  pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
577  if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
578  return r;
579 #endif
580 }
581 
582 static int
583 compile_length_cclass_node(CClassNode* cc, regex_t* reg)
584 {
585  int len;
586 
587  if (IS_NULL(cc->mbuf)) {
589  }
590  else {
591  if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
592  len = SIZE_OPCODE;
593  }
594  else {
596  }
597 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
598  len += SIZE_LENGTH + cc->mbuf->used;
599 #else
600  len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
601 #endif
602  }
603 
604  return len;
605 }
606 
607 static int
608 compile_cclass_node(CClassNode* cc, regex_t* reg)
609 {
610  int r;
611 
612  if (IS_NULL(cc->mbuf)) {
613  if (IS_NCCLASS_NOT(cc))
614  add_opcode(reg, OP_CCLASS_NOT);
615  else
616  add_opcode(reg, OP_CCLASS);
617 
618  r = add_bitset(reg, cc->bs);
619  }
620  else {
621  if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
622  if (IS_NCCLASS_NOT(cc))
623  add_opcode(reg, OP_CCLASS_MB_NOT);
624  else
625  add_opcode(reg, OP_CCLASS_MB);
626 
627  r = add_multi_byte_cclass(cc->mbuf, reg);
628  }
629  else {
630  if (IS_NCCLASS_NOT(cc))
631  add_opcode(reg, OP_CCLASS_MIX_NOT);
632  else
633  add_opcode(reg, OP_CCLASS_MIX);
634 
635  r = add_bitset(reg, cc->bs);
636  if (r) return r;
637  r = add_multi_byte_cclass(cc->mbuf, reg);
638  }
639  }
640 
641  return r;
642 }
643 
644 static int
645 entry_repeat_range(regex_t* reg, int id, int lower, int upper)
646 {
647 #define REPEAT_RANGE_ALLOC 4
648 
649  OnigRepeatRange* p;
650 
651  if (reg->repeat_range_alloc == 0) {
654  reg->repeat_range = p;
656  }
657  else if (reg->repeat_range_alloc <= id) {
658  int n;
661  sizeof(OnigRepeatRange) * n);
663  reg->repeat_range = p;
664  reg->repeat_range_alloc = n;
665  }
666  else {
667  p = reg->repeat_range;
668  }
669 
670  p[id].lower = lower;
671  p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
672  return 0;
673 }
674 
675 static int
676 compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info,
677  regex_t* reg)
678 {
679  int r;
680  int num_repeat = reg->num_repeat;
681 
682  r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
683  if (r) return r;
684  r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
685  reg->num_repeat++;
686  if (r) return r;
687  r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
688  if (r) return r;
689 
690  r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
691  if (r) return r;
692 
693  r = compile_tree_empty_check(qn->target, reg, empty_info);
694  if (r) return r;
695 
696  if (
697 #ifdef USE_SUBEXP_CALL
698  reg->num_call > 0 ||
699 #endif
701  r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
702  }
703  else {
704  r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
705  }
706  if (r) return r;
707  r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
708  return r;
709 }
710 
711 static int
712 is_anychar_star_quantifier(QtfrNode* qn)
713 {
714  if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
715  NTYPE(qn->target) == NT_CANY)
716  return 1;
717  else
718  return 0;
719 }
720 
721 #define QUANTIFIER_EXPAND_LIMIT_SIZE 50
722 #define CKN_ON (ckn > 0)
723 
724 #ifdef USE_COMBINATION_EXPLOSION_CHECK
725 
726 static int
727 compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
728 {
729  int len, mod_tlen, cklen;
730  int ckn;
731  int infinite = IS_REPEAT_INFINITE(qn->upper);
732  int empty_info = qn->target_empty_info;
733  int tlen = compile_length_tree(qn->target, reg);
734 
735  if (tlen < 0) return tlen;
736 
737  ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
738 
739  cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
740 
741  /* anychar repeat */
742  if (NTYPE(qn->target) == NT_CANY) {
743  if (qn->greedy && infinite) {
744  if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
745  return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
746  else
747  return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
748  }
749  }
750 
751  if (empty_info != 0)
752  mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
753  else
754  mod_tlen = tlen;
755 
756  if (infinite && qn->lower <= 1) {
757  if (qn->greedy) {
758  if (qn->lower == 1)
759  len = SIZE_OP_JUMP;
760  else
761  len = 0;
762 
763  len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
764  }
765  else {
766  if (qn->lower == 0)
767  len = SIZE_OP_JUMP;
768  else
769  len = 0;
770 
771  len += mod_tlen + SIZE_OP_PUSH + cklen;
772  }
773  }
774  else if (qn->upper == 0) {
775  if (qn->is_referred != 0) /* /(?<n>..){0}/ */
776  len = SIZE_OP_JUMP + tlen;
777  else
778  len = 0;
779  }
780  else if (qn->upper == 1 && qn->greedy) {
781  if (qn->lower == 0) {
782  if (CKN_ON) {
783  len = SIZE_OP_STATE_CHECK_PUSH + tlen;
784  }
785  else {
786  len = SIZE_OP_PUSH + tlen;
787  }
788  }
789  else {
790  len = tlen;
791  }
792  }
793  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
794  len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
795  }
796  else {
798  + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
799  if (CKN_ON)
800  len += SIZE_OP_STATE_CHECK;
801  }
802 
803  return len;
804 }
805 
806 static int
807 compile_quantifier_node(QtfrNode* qn, regex_t* reg)
808 {
809  int r, mod_tlen;
810  int ckn;
811  int infinite = IS_REPEAT_INFINITE(qn->upper);
812  int empty_info = qn->target_empty_info;
813  int tlen = compile_length_tree(qn->target, reg);
814 
815  if (tlen < 0) return tlen;
816 
817  ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
818 
819  if (is_anychar_star_quantifier(qn)) {
820  r = compile_tree_n_times(qn->target, qn->lower, reg);
821  if (r) return r;
822  if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
823  if (IS_MULTILINE(reg->options))
824  r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
825  else
826  r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
827  if (r) return r;
828  if (CKN_ON) {
829  r = add_state_check_num(reg, ckn);
830  if (r) return r;
831  }
832 
833  return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
834  }
835  else {
836  if (IS_MULTILINE(reg->options)) {
837  r = add_opcode(reg, (CKN_ON ?
839  : OP_ANYCHAR_ML_STAR));
840  }
841  else {
842  r = add_opcode(reg, (CKN_ON ?
844  : OP_ANYCHAR_STAR));
845  }
846  if (r) return r;
847  if (CKN_ON)
848  r = add_state_check_num(reg, ckn);
849 
850  return r;
851  }
852  }
853 
854  if (empty_info != 0)
855  mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
856  else
857  mod_tlen = tlen;
858 
859  if (infinite && qn->lower <= 1) {
860  if (qn->greedy) {
861  if (qn->lower == 1) {
862  r = add_opcode_rel_addr(reg, OP_JUMP,
863  (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
864  if (r) return r;
865  }
866 
867  if (CKN_ON) {
868  r = add_opcode(reg, OP_STATE_CHECK_PUSH);
869  if (r) return r;
870  r = add_state_check_num(reg, ckn);
871  if (r) return r;
872  r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
873  }
874  else {
875  r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
876  }
877  if (r) return r;
878  r = compile_tree_empty_check(qn->target, reg, empty_info);
879  if (r) return r;
880  r = add_opcode_rel_addr(reg, OP_JUMP,
881  -(mod_tlen + (int )SIZE_OP_JUMP
882  + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
883  }
884  else {
885  if (qn->lower == 0) {
886  r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
887  if (r) return r;
888  }
889  r = compile_tree_empty_check(qn->target, reg, empty_info);
890  if (r) return r;
891  if (CKN_ON) {
892  r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
893  if (r) return r;
894  r = add_state_check_num(reg, ckn);
895  if (r) return r;
896  r = add_rel_addr(reg,
897  -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
898  }
899  else
900  r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
901  }
902  }
903  else if (qn->upper == 0) {
904  if (qn->is_referred != 0) { /* /(?<n>..){0}/ */
905  r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
906  if (r) return r;
907  r = compile_tree(qn->target, reg);
908  }
909  else
910  r = 0;
911  }
912  else if (qn->upper == 1 && qn->greedy) {
913  if (qn->lower == 0) {
914  if (CKN_ON) {
915  r = add_opcode(reg, OP_STATE_CHECK_PUSH);
916  if (r) return r;
917  r = add_state_check_num(reg, ckn);
918  if (r) return r;
919  r = add_rel_addr(reg, tlen);
920  }
921  else {
922  r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
923  }
924  if (r) return r;
925  }
926 
927  r = compile_tree(qn->target, reg);
928  }
929  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
930  if (CKN_ON) {
931  r = add_opcode(reg, OP_STATE_CHECK_PUSH);
932  if (r) return r;
933  r = add_state_check_num(reg, ckn);
934  if (r) return r;
935  r = add_rel_addr(reg, SIZE_OP_JUMP);
936  }
937  else {
938  r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
939  }
940 
941  if (r) return r;
942  r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
943  if (r) return r;
944  r = compile_tree(qn->target, reg);
945  }
946  else {
947  r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
948  if (CKN_ON) {
949  if (r) return r;
950  r = add_opcode(reg, OP_STATE_CHECK);
951  if (r) return r;
952  r = add_state_check_num(reg, ckn);
953  }
954  }
955  return r;
956 }
957 
958 #else /* USE_COMBINATION_EXPLOSION_CHECK */
959 
960 static int
961 compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
962 {
963  int len, mod_tlen;
964  int infinite = IS_REPEAT_INFINITE(qn->upper);
965  int empty_info = qn->target_empty_info;
966  int tlen = compile_length_tree(qn->target, reg);
967 
968  if (tlen < 0) return tlen;
969 
970  /* anychar repeat */
971  if (NTYPE(qn->target) == NT_CANY) {
972  if (qn->greedy && infinite) {
973  if (IS_NOT_NULL(qn->next_head_exact))
974  return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
975  else
976  return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
977  }
978  }
979 
980  if (empty_info != 0)
981  mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
982  else
983  mod_tlen = tlen;
984 
985  if (infinite &&
986  (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
987  if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
988  len = SIZE_OP_JUMP;
989  }
990  else {
991  len = tlen * qn->lower;
992  }
993 
994  if (qn->greedy) {
995 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
996  if (IS_NOT_NULL(qn->head_exact))
998  else
999 #endif
1000  if (IS_NOT_NULL(qn->next_head_exact))
1001  len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
1002  else
1003  len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
1004  }
1005  else
1006  len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
1007  }
1008  else if (qn->upper == 0 && qn->is_referred != 0) { /* /(?<n>..){0}/ */
1009  len = SIZE_OP_JUMP + tlen;
1010  }
1011  else if (!infinite && qn->greedy &&
1012  (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1014  len = tlen * qn->lower;
1015  len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
1016  }
1017  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1018  len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
1019  }
1020  else {
1022  + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
1023  }
1024 
1025  return len;
1026 }
1027 
1028 static int
1029 compile_quantifier_node(QtfrNode* qn, regex_t* reg)
1030 {
1031  int i, r, mod_tlen;
1032  int infinite = IS_REPEAT_INFINITE(qn->upper);
1033  int empty_info = qn->target_empty_info;
1034  int tlen = compile_length_tree(qn->target, reg);
1035 
1036  if (tlen < 0) return tlen;
1037 
1038  if (is_anychar_star_quantifier(qn)) {
1039  r = compile_tree_n_times(qn->target, qn->lower, reg);
1040  if (r) return r;
1041  if (IS_NOT_NULL(qn->next_head_exact)) {
1042  if (IS_MULTILINE(reg->options))
1043  r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
1044  else
1045  r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
1046  if (r) return r;
1047  return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1048  }
1049  else {
1050  if (IS_MULTILINE(reg->options))
1051  return add_opcode(reg, OP_ANYCHAR_ML_STAR);
1052  else
1053  return add_opcode(reg, OP_ANYCHAR_STAR);
1054  }
1055  }
1056 
1057  if (empty_info != 0)
1058  mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
1059  else
1060  mod_tlen = tlen;
1061 
1062  if (infinite &&
1063  (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1064  if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1065  if (qn->greedy) {
1066 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
1067  if (IS_NOT_NULL(qn->head_exact))
1068  r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
1069  else
1070 #endif
1071  if (IS_NOT_NULL(qn->next_head_exact))
1072  r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
1073  else
1074  r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
1075  }
1076  else {
1077  r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
1078  }
1079  if (r) return r;
1080  }
1081  else {
1082  r = compile_tree_n_times(qn->target, qn->lower, reg);
1083  if (r) return r;
1084  }
1085 
1086  if (qn->greedy) {
1087 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
1088  if (IS_NOT_NULL(qn->head_exact)) {
1089  r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
1090  mod_tlen + SIZE_OP_JUMP);
1091  if (r) return r;
1092  add_bytes(reg, NSTR(qn->head_exact)->s, 1);
1093  r = compile_tree_empty_check(qn->target, reg, empty_info);
1094  if (r) return r;
1095  r = add_opcode_rel_addr(reg, OP_JUMP,
1096  -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
1097  }
1098  else
1099 #endif
1100  if (IS_NOT_NULL(qn->next_head_exact)) {
1101  r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
1102  mod_tlen + SIZE_OP_JUMP);
1103  if (r) return r;
1104  add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1105  r = compile_tree_empty_check(qn->target, reg, empty_info);
1106  if (r) return r;
1107  r = add_opcode_rel_addr(reg, OP_JUMP,
1108  -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
1109  }
1110  else {
1111  r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
1112  if (r) return r;
1113  r = compile_tree_empty_check(qn->target, reg, empty_info);
1114  if (r) return r;
1115  r = add_opcode_rel_addr(reg, OP_JUMP,
1116  -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
1117  }
1118  }
1119  else {
1120  r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
1121  if (r) return r;
1122  r = compile_tree_empty_check(qn->target, reg, empty_info);
1123  if (r) return r;
1124  r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
1125  }
1126  }
1127  else if (qn->upper == 0 && qn->is_referred != 0) { /* /(?<n>..){0}/ */
1128  r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1129  if (r) return r;
1130  r = compile_tree(qn->target, reg);
1131  }
1132  else if (!infinite && qn->greedy &&
1133  (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1135  int n = qn->upper - qn->lower;
1136 
1137  r = compile_tree_n_times(qn->target, qn->lower, reg);
1138  if (r) return r;
1139 
1140  for (i = 0; i < n; i++) {
1141  r = add_opcode_rel_addr(reg, OP_PUSH,
1142  (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
1143  if (r) return r;
1144  r = compile_tree(qn->target, reg);
1145  if (r) return r;
1146  }
1147  }
1148  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1149  r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
1150  if (r) return r;
1151  r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1152  if (r) return r;
1153  r = compile_tree(qn->target, reg);
1154  }
1155  else {
1156  r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
1157  }
1158  return r;
1159 }
1160 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
1161 
1162 static int
1163 compile_length_option_node(EncloseNode* node, regex_t* reg)
1164 {
1165  int tlen;
1166  OnigOptionType prev = reg->options;
1167 
1168  reg->options = node->option;
1169  tlen = compile_length_tree(node->target, reg);
1170  reg->options = prev;
1171 
1172  if (tlen < 0) return tlen;
1173 
1174  if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1176  + tlen + SIZE_OP_SET_OPTION;
1177  }
1178  else
1179  return tlen;
1180 }
1181 
1182 static int
1183 compile_option_node(EncloseNode* node, regex_t* reg)
1184 {
1185  int r;
1186  OnigOptionType prev = reg->options;
1187 
1188  if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1189  r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
1190  if (r) return r;
1191  r = add_opcode_option(reg, OP_SET_OPTION, prev);
1192  if (r) return r;
1193  r = add_opcode(reg, OP_FAIL);
1194  if (r) return r;
1195  }
1196 
1197  reg->options = node->option;
1198  r = compile_tree(node->target, reg);
1199  reg->options = prev;
1200 
1201  if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1202  if (r) return r;
1203  r = add_opcode_option(reg, OP_SET_OPTION, prev);
1204  }
1205  return r;
1206 }
1207 
1208 static int
1209 compile_length_enclose_node(EncloseNode* node, regex_t* reg)
1210 {
1211  int len;
1212  int tlen;
1213 
1214  if (node->type == ENCLOSE_OPTION)
1215  return compile_length_option_node(node, reg);
1216 
1217  if (node->target) {
1218  tlen = compile_length_tree(node->target, reg);
1219  if (tlen < 0) return tlen;
1220  }
1221  else
1222  tlen = 0;
1223 
1224  switch (node->type) {
1225  case ENCLOSE_MEMORY:
1226 #ifdef USE_SUBEXP_CALL
1227  if (IS_ENCLOSE_CALLED(node)) {
1230  if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1231  len += (IS_ENCLOSE_RECURSION(node)
1233  else
1234  len += (IS_ENCLOSE_RECURSION(node)
1236  }
1237  else if (IS_ENCLOSE_RECURSION(node)) {
1239  len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1241  }
1242  else
1243 #endif
1244  {
1245  if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1247  else
1249 
1250  len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1252  }
1253  break;
1254 
1257  QtfrNode* qn = NQTFR(node->target);
1258  tlen = compile_length_tree(qn->target, reg);
1259  if (tlen < 0) return tlen;
1260 
1261  len = tlen * qn->lower
1262  + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
1263  }
1264  else {
1266  }
1267  break;
1268 
1269  case ENCLOSE_CONDITION:
1271  if (NTYPE(node->target) == NT_ALT) {
1272  Node* x = node->target;
1273 
1274  tlen = compile_length_tree(NCAR(x), reg); /* yes-node */
1275  if (tlen < 0) return tlen;
1276  len += tlen + SIZE_OP_JUMP;
1277  if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
1278  x = NCDR(x);
1279  tlen = compile_length_tree(NCAR(x), reg); /* no-node */
1280  if (tlen < 0) return tlen;
1281  len += tlen;
1282  if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN;
1283  }
1284  else {
1285  return ONIGERR_PARSER_BUG;
1286  }
1287  break;
1288 
1289  case ENCLOSE_ABSENT:
1291  break;
1292 
1293  default:
1294  return ONIGERR_TYPE_BUG;
1295  break;
1296  }
1297 
1298  return len;
1299 }
1300 
1301 static int get_char_length_tree(Node* node, regex_t* reg, int* len);
1302 
1303 static int
1304 compile_enclose_node(EncloseNode* node, regex_t* reg)
1305 {
1306  int r, len;
1307 
1308  if (node->type == ENCLOSE_OPTION)
1309  return compile_option_node(node, reg);
1310 
1311  switch (node->type) {
1312  case ENCLOSE_MEMORY:
1313 #ifdef USE_SUBEXP_CALL
1314  if (IS_ENCLOSE_CALLED(node)) {
1315  r = add_opcode(reg, OP_CALL);
1316  if (r) return r;
1318  node->state |= NST_ADDR_FIXED;
1319  r = add_abs_addr(reg, (int )node->call_addr);
1320  if (r) return r;
1321  len = compile_length_tree(node->target, reg);
1323  if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1324  len += (IS_ENCLOSE_RECURSION(node)
1326  else
1327  len += (IS_ENCLOSE_RECURSION(node)
1329 
1330  r = add_opcode_rel_addr(reg, OP_JUMP, len);
1331  if (r) return r;
1332  }
1333 #endif
1334  if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1335  r = add_opcode(reg, OP_MEMORY_START_PUSH);
1336  else
1337  r = add_opcode(reg, OP_MEMORY_START);
1338  if (r) return r;
1339  r = add_mem_num(reg, node->regnum);
1340  if (r) return r;
1341  r = compile_tree(node->target, reg);
1342  if (r) return r;
1343 #ifdef USE_SUBEXP_CALL
1344  if (IS_ENCLOSE_CALLED(node)) {
1345  if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1346  r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1348  else
1349  r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1351 
1352  if (r) return r;
1353  r = add_mem_num(reg, node->regnum);
1354  if (r) return r;
1355  r = add_opcode(reg, OP_RETURN);
1356  }
1357  else if (IS_ENCLOSE_RECURSION(node)) {
1358  if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1359  r = add_opcode(reg, OP_MEMORY_END_PUSH_REC);
1360  else
1361  r = add_opcode(reg, OP_MEMORY_END_REC);
1362  if (r) return r;
1363  r = add_mem_num(reg, node->regnum);
1364  }
1365  else
1366 #endif
1367  {
1368  if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1369  r = add_opcode(reg, OP_MEMORY_END_PUSH);
1370  else
1371  r = add_opcode(reg, OP_MEMORY_END);
1372  if (r) return r;
1373  r = add_mem_num(reg, node->regnum);
1374  }
1375  break;
1376 
1379  QtfrNode* qn = NQTFR(node->target);
1380  r = compile_tree_n_times(qn->target, qn->lower, reg);
1381  if (r) return r;
1382 
1383  len = compile_length_tree(qn->target, reg);
1384  if (len < 0) return len;
1385 
1386  r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP);
1387  if (r) return r;
1388  r = compile_tree(qn->target, reg);
1389  if (r) return r;
1390  r = add_opcode(reg, OP_POP);
1391  if (r) return r;
1392  r = add_opcode_rel_addr(reg, OP_JUMP,
1393  -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
1394  }
1395  else {
1396  r = add_opcode(reg, OP_PUSH_STOP_BT);
1397  if (r) return r;
1398  r = compile_tree(node->target, reg);
1399  if (r) return r;
1400  r = add_opcode(reg, OP_POP_STOP_BT);
1401  }
1402  break;
1403 
1404  case ENCLOSE_CONDITION:
1405  r = add_opcode(reg, OP_CONDITION);
1406  if (r) return r;
1407  r = add_mem_num(reg, node->regnum);
1408  if (r) return r;
1409 
1410  if (NTYPE(node->target) == NT_ALT) {
1411  Node* x = node->target;
1412  int len2;
1413 
1414  len = compile_length_tree(NCAR(x), reg); /* yes-node */
1415  if (len < 0) return len;
1416  if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
1417  x = NCDR(x);
1418  len2 = compile_length_tree(NCAR(x), reg); /* no-node */
1419  if (len2 < 0) return len2;
1420  if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN;
1421 
1422  x = node->target;
1423  r = add_rel_addr(reg, len + SIZE_OP_JUMP);
1424  if (r) return r;
1425  r = compile_tree(NCAR(x), reg); /* yes-node */
1426  if (r) return r;
1427  r = add_opcode_rel_addr(reg, OP_JUMP, len2);
1428  if (r) return r;
1429  x = NCDR(x);
1430  r = compile_tree(NCAR(x), reg); /* no-node */
1431  }
1432  else {
1433  return ONIGERR_PARSER_BUG;
1434  }
1435  break;
1436 
1437  case ENCLOSE_ABSENT:
1438  len = compile_length_tree(node->target, reg);
1439  if (len < 0) return len;
1440 
1441  r = add_opcode(reg, OP_PUSH_ABSENT_POS);
1442  if (r) return r;
1443  r = add_opcode_rel_addr(reg, OP_ABSENT, len + SIZE_OP_ABSENT_END);
1444  if (r) return r;
1445  r = compile_tree(node->target, reg);
1446  if (r) return r;
1447  r = add_opcode(reg, OP_ABSENT_END);
1448  break;
1449 
1450  default:
1451  return ONIGERR_TYPE_BUG;
1452  break;
1453  }
1454 
1455  return r;
1456 }
1457 
1458 static int
1459 compile_length_anchor_node(AnchorNode* node, regex_t* reg)
1460 {
1461  int len;
1462  int tlen = 0;
1463 
1464  if (node->target) {
1465  tlen = compile_length_tree(node->target, reg);
1466  if (tlen < 0) return tlen;
1467  }
1468 
1469  switch (node->type) {
1470  case ANCHOR_PREC_READ:
1472  break;
1473  case ANCHOR_PREC_READ_NOT:
1475  break;
1476  case ANCHOR_LOOK_BEHIND:
1477  len = SIZE_OP_LOOK_BEHIND + tlen;
1478  break;
1481  break;
1482 
1483  default:
1484  len = SIZE_OPCODE;
1485  break;
1486  }
1487 
1488  return len;
1489 }
1490 
1491 static int
1492 compile_anchor_node(AnchorNode* node, regex_t* reg)
1493 {
1494  int r, len;
1495 
1496  switch (node->type) {
1497  case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break;
1498  case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break;
1499  case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break;
1500  case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break;
1501  case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break;
1502  case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
1503 
1504  case ANCHOR_WORD_BOUND:
1505  if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BOUND);
1506  else r = add_opcode(reg, OP_WORD_BOUND);
1507  break;
1508  case ANCHOR_NOT_WORD_BOUND:
1509  if (node->ascii_range) r = add_opcode(reg, OP_NOT_ASCII_WORD_BOUND);
1510  else r = add_opcode(reg, OP_NOT_WORD_BOUND);
1511  break;
1512 #ifdef USE_WORD_BEGIN_END
1513  case ANCHOR_WORD_BEGIN:
1514  if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BEGIN);
1515  else r = add_opcode(reg, OP_WORD_BEGIN);
1516  break;
1517  case ANCHOR_WORD_END:
1518  if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_END);
1519  else r = add_opcode(reg, OP_WORD_END);
1520  break;
1521 #endif
1522  case ANCHOR_KEEP: r = add_opcode(reg, OP_KEEP); break;
1523 
1524  case ANCHOR_PREC_READ:
1525  r = add_opcode(reg, OP_PUSH_POS);
1526  if (r) return r;
1527  r = compile_tree(node->target, reg);
1528  if (r) return r;
1529  r = add_opcode(reg, OP_POP_POS);
1530  break;
1531 
1532  case ANCHOR_PREC_READ_NOT:
1533  len = compile_length_tree(node->target, reg);
1534  if (len < 0) return len;
1535  r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS);
1536  if (r) return r;
1537  r = compile_tree(node->target, reg);
1538  if (r) return r;
1539  r = add_opcode(reg, OP_FAIL_POS);
1540  break;
1541 
1542  case ANCHOR_LOOK_BEHIND:
1543  {
1544  int n;
1545  r = add_opcode(reg, OP_LOOK_BEHIND);
1546  if (r) return r;
1547  if (node->char_len < 0) {
1548  r = get_char_length_tree(node->target, reg, &n);
1550  }
1551  else
1552  n = node->char_len;
1553  r = add_length(reg, n);
1554  if (r) return r;
1555  r = compile_tree(node->target, reg);
1556  }
1557  break;
1558 
1560  {
1561  int n;
1562  len = compile_length_tree(node->target, reg);
1563  r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT,
1565  if (r) return r;
1566  if (node->char_len < 0) {
1567  r = get_char_length_tree(node->target, reg, &n);
1569  }
1570  else
1571  n = node->char_len;
1572  r = add_length(reg, n);
1573  if (r) return r;
1574  r = compile_tree(node->target, reg);
1575  if (r) return r;
1576  r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT);
1577  }
1578  break;
1579 
1580  default:
1581  return ONIGERR_TYPE_BUG;
1582  break;
1583  }
1584 
1585  return r;
1586 }
1587 
1588 static int
1589 compile_length_tree(Node* node, regex_t* reg)
1590 {
1591  int len, type, r;
1592 
1593  type = NTYPE(node);
1594  switch (type) {
1595  case NT_LIST:
1596  len = 0;
1597  do {
1598  r = compile_length_tree(NCAR(node), reg);
1599  if (r < 0) return r;
1600  len += r;
1601  } while (IS_NOT_NULL(node = NCDR(node)));
1602  r = len;
1603  break;
1604 
1605  case NT_ALT:
1606  {
1607  int n = 0;
1608  len = 0;
1609  do {
1610  r = compile_length_tree(NCAR(node), reg);
1611  if (r < 0) return r;
1612  len += r;
1613  n++;
1614  } while (IS_NOT_NULL(node = NCDR(node)));
1615  r = len;
1616  r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
1617  }
1618  break;
1619 
1620  case NT_STR:
1621  if (NSTRING_IS_RAW(node))
1622  r = compile_length_string_raw_node(NSTR(node), reg);
1623  else
1624  r = compile_length_string_node(node, reg);
1625  break;
1626 
1627  case NT_CCLASS:
1628  r = compile_length_cclass_node(NCCLASS(node), reg);
1629  break;
1630 
1631  case NT_CTYPE:
1632  case NT_CANY:
1633  r = SIZE_OPCODE;
1634  break;
1635 
1636  case NT_BREF:
1637  {
1638  BRefNode* br = NBREF(node);
1639 
1640 #ifdef USE_BACKREF_WITH_LEVEL
1641  if (IS_BACKREF_NEST_LEVEL(br)) {
1643  SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1644  }
1645  else
1646 #endif
1647  if (br->back_num == 1) {
1648  r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
1650  }
1651  else {
1652  r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1653  }
1654  }
1655  break;
1656 
1657 #ifdef USE_SUBEXP_CALL
1658  case NT_CALL:
1659  r = SIZE_OP_CALL;
1660  break;
1661 #endif
1662 
1663  case NT_QTFR:
1664  r = compile_length_quantifier_node(NQTFR(node), reg);
1665  break;
1666 
1667  case NT_ENCLOSE:
1668  r = compile_length_enclose_node(NENCLOSE(node), reg);
1669  break;
1670 
1671  case NT_ANCHOR:
1672  r = compile_length_anchor_node(NANCHOR(node), reg);
1673  break;
1674 
1675  default:
1676  return ONIGERR_TYPE_BUG;
1677  break;
1678  }
1679 
1680  return r;
1681 }
1682 
1683 static int
1684 compile_tree(Node* node, regex_t* reg)
1685 {
1686  int n, type, len, pos, r = 0;
1687 
1688  type = NTYPE(node);
1689  switch (type) {
1690  case NT_LIST:
1691  do {
1692  r = compile_tree(NCAR(node), reg);
1693  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1694  break;
1695 
1696  case NT_ALT:
1697  {
1698  Node* x = node;
1699  len = 0;
1700  do {
1701  len += compile_length_tree(NCAR(x), reg);
1702  if (NCDR(x) != NULL) {
1704  }
1705  } while (IS_NOT_NULL(x = NCDR(x)));
1706  pos = reg->used + len; /* goal position */
1707 
1708  do {
1709  len = compile_length_tree(NCAR(node), reg);
1710  if (IS_NOT_NULL(NCDR(node))) {
1711  r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
1712  if (r) break;
1713  }
1714  r = compile_tree(NCAR(node), reg);
1715  if (r) break;
1716  if (IS_NOT_NULL(NCDR(node))) {
1717  len = pos - (reg->used + SIZE_OP_JUMP);
1718  r = add_opcode_rel_addr(reg, OP_JUMP, len);
1719  if (r) break;
1720  }
1721  } while (IS_NOT_NULL(node = NCDR(node)));
1722  }
1723  break;
1724 
1725  case NT_STR:
1726  if (NSTRING_IS_RAW(node))
1727  r = compile_string_raw_node(NSTR(node), reg);
1728  else
1729  r = compile_string_node(node, reg);
1730  break;
1731 
1732  case NT_CCLASS:
1733  r = compile_cclass_node(NCCLASS(node), reg);
1734  break;
1735 
1736  case NT_CTYPE:
1737  {
1738  int op;
1739 
1740  switch (NCTYPE(node)->ctype) {
1741  case ONIGENC_CTYPE_WORD:
1742  if (NCTYPE(node)->ascii_range != 0) {
1743  if (NCTYPE(node)->not != 0) op = OP_NOT_ASCII_WORD;
1744  else op = OP_ASCII_WORD;
1745  }
1746  else {
1747  if (NCTYPE(node)->not != 0) op = OP_NOT_WORD;
1748  else op = OP_WORD;
1749  }
1750  break;
1751  default:
1752  return ONIGERR_TYPE_BUG;
1753  break;
1754  }
1755  r = add_opcode(reg, op);
1756  }
1757  break;
1758 
1759  case NT_CANY:
1760  if (IS_MULTILINE(reg->options))
1761  r = add_opcode(reg, OP_ANYCHAR_ML);
1762  else
1763  r = add_opcode(reg, OP_ANYCHAR);
1764  break;
1765 
1766  case NT_BREF:
1767  {
1768  BRefNode* br = NBREF(node);
1769 
1770 #ifdef USE_BACKREF_WITH_LEVEL
1771  if (IS_BACKREF_NEST_LEVEL(br)) {
1772  r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
1773  if (r) return r;
1774  r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
1775  if (r) return r;
1776  r = add_length(reg, br->nest_level);
1777  if (r) return r;
1778 
1779  goto add_bacref_mems;
1780  }
1781  else
1782 #endif
1783  if (br->back_num == 1) {
1784  n = br->back_static[0];
1785  if (IS_IGNORECASE(reg->options)) {
1786  r = add_opcode(reg, OP_BACKREFN_IC);
1787  if (r) return r;
1788  r = add_mem_num(reg, n);
1789  }
1790  else {
1791  switch (n) {
1792  case 1: r = add_opcode(reg, OP_BACKREF1); break;
1793  case 2: r = add_opcode(reg, OP_BACKREF2); break;
1794  default:
1795  r = add_opcode(reg, OP_BACKREFN);
1796  if (r) return r;
1797  r = add_mem_num(reg, n);
1798  break;
1799  }
1800  }
1801  }
1802  else {
1803  int i;
1804  int* p;
1805 
1806  if (IS_IGNORECASE(reg->options)) {
1807  r = add_opcode(reg, OP_BACKREF_MULTI_IC);
1808  }
1809  else {
1810  r = add_opcode(reg, OP_BACKREF_MULTI);
1811  }
1812  if (r) return r;
1813 
1814 #ifdef USE_BACKREF_WITH_LEVEL
1815  add_bacref_mems:
1816 #endif
1817  r = add_length(reg, br->back_num);
1818  if (r) return r;
1819  p = BACKREFS_P(br);
1820  for (i = br->back_num - 1; i >= 0; i--) {
1821  r = add_mem_num(reg, p[i]);
1822  if (r) return r;
1823  }
1824  }
1825  }
1826  break;
1827 
1828 #ifdef USE_SUBEXP_CALL
1829  case NT_CALL:
1830  r = compile_call(NCALL(node), reg);
1831  break;
1832 #endif
1833 
1834  case NT_QTFR:
1835  r = compile_quantifier_node(NQTFR(node), reg);
1836  break;
1837 
1838  case NT_ENCLOSE:
1839  r = compile_enclose_node(NENCLOSE(node), reg);
1840  break;
1841 
1842  case NT_ANCHOR:
1843  r = compile_anchor_node(NANCHOR(node), reg);
1844  break;
1845 
1846  default:
1847 #ifdef ONIG_DEBUG
1848  fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node));
1849 #endif
1850  break;
1851  }
1852 
1853  return r;
1854 }
1855 
1856 #ifdef USE_NAMED_GROUP
1857 
1858 static int
1859 noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
1860 {
1861  int r = 0;
1862  Node* node = *plink;
1863 
1864  switch (NTYPE(node)) {
1865  case NT_LIST:
1866  case NT_ALT:
1867  do {
1868  r = noname_disable_map(&(NCAR(node)), map, counter);
1869  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1870  break;
1871 
1872  case NT_QTFR:
1873  {
1874  Node** ptarget = &(NQTFR(node)->target);
1875  Node* old = *ptarget;
1876  r = noname_disable_map(ptarget, map, counter);
1877  if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {
1878  onig_reduce_nested_quantifier(node, *ptarget);
1879  }
1880  }
1881  break;
1882 
1883  case NT_ENCLOSE:
1884  {
1885  EncloseNode* en = NENCLOSE(node);
1886  if (en->type == ENCLOSE_MEMORY) {
1887  if (IS_ENCLOSE_NAMED_GROUP(en)) {
1888  (*counter)++;
1889  map[en->regnum].new_val = *counter;
1890  en->regnum = *counter;
1891  }
1892  else if (en->regnum != 0) {
1893  *plink = en->target;
1894  en->target = NULL_NODE;
1895  onig_node_free(node);
1896  r = noname_disable_map(plink, map, counter);
1897  break;
1898  }
1899  }
1900  r = noname_disable_map(&(en->target), map, counter);
1901  }
1902  break;
1903 
1904  case NT_ANCHOR:
1905  if (NANCHOR(node)->target)
1906  r = noname_disable_map(&(NANCHOR(node)->target), map, counter);
1907  break;
1908 
1909  default:
1910  break;
1911  }
1912 
1913  return r;
1914 }
1915 
1916 static int
1917 renumber_node_backref(Node* node, GroupNumRemap* map)
1918 {
1919  int i, pos, n, old_num;
1920  int *backs;
1921  BRefNode* bn = NBREF(node);
1922 
1923  if (! IS_BACKREF_NAME_REF(bn))
1925 
1926  old_num = bn->back_num;
1927  if (IS_NULL(bn->back_dynamic))
1928  backs = bn->back_static;
1929  else
1930  backs = bn->back_dynamic;
1931 
1932  for (i = 0, pos = 0; i < old_num; i++) {
1933  n = map[backs[i]].new_val;
1934  if (n > 0) {
1935  backs[pos] = n;
1936  pos++;
1937  }
1938  }
1939 
1940  bn->back_num = pos;
1941  return 0;
1942 }
1943 
1944 static int
1945 renumber_by_map(Node* node, GroupNumRemap* map)
1946 {
1947  int r = 0;
1948 
1949  switch (NTYPE(node)) {
1950  case NT_LIST:
1951  case NT_ALT:
1952  do {
1953  r = renumber_by_map(NCAR(node), map);
1954  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1955  break;
1956  case NT_QTFR:
1957  r = renumber_by_map(NQTFR(node)->target, map);
1958  break;
1959  case NT_ENCLOSE:
1960  {
1961  EncloseNode* en = NENCLOSE(node);
1962  if (en->type == ENCLOSE_CONDITION)
1963  en->regnum = map[en->regnum].new_val;
1964  r = renumber_by_map(en->target, map);
1965  }
1966  break;
1967 
1968  case NT_BREF:
1969  r = renumber_node_backref(node, map);
1970  break;
1971 
1972  case NT_ANCHOR:
1973  if (NANCHOR(node)->target)
1974  r = renumber_by_map(NANCHOR(node)->target, map);
1975  break;
1976 
1977  default:
1978  break;
1979  }
1980 
1981  return r;
1982 }
1983 
1984 static int
1985 numbered_ref_check(Node* node)
1986 {
1987  int r = 0;
1988 
1989  switch (NTYPE(node)) {
1990  case NT_LIST:
1991  case NT_ALT:
1992  do {
1993  r = numbered_ref_check(NCAR(node));
1994  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1995  break;
1996  case NT_QTFR:
1997  r = numbered_ref_check(NQTFR(node)->target);
1998  break;
1999  case NT_ENCLOSE:
2000  r = numbered_ref_check(NENCLOSE(node)->target);
2001  break;
2002 
2003  case NT_BREF:
2004  if (! IS_BACKREF_NAME_REF(NBREF(node)))
2006  break;
2007 
2008  case NT_ANCHOR:
2009  if (NANCHOR(node)->target)
2010  r = numbered_ref_check(NANCHOR(node)->target);
2011  break;
2012 
2013  default:
2014  break;
2015  }
2016 
2017  return r;
2018 }
2019 
2020 static int
2021 disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
2022 {
2023  int r, i, pos, counter;
2024  BitStatusType loc;
2025  GroupNumRemap* map;
2026 
2027  map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1));
2029  for (i = 1; i <= env->num_mem; i++) {
2030  map[i].new_val = 0;
2031  }
2032  counter = 0;
2033  r = noname_disable_map(root, map, &counter);
2034  if (r != 0) return r;
2035 
2036  r = renumber_by_map(*root, map);
2037  if (r != 0) return r;
2038 
2039  for (i = 1, pos = 1; i <= env->num_mem; i++) {
2040  if (map[i].new_val > 0) {
2042  pos++;
2043  }
2044  }
2045 
2046  loc = env->capture_history;
2047  BIT_STATUS_CLEAR(env->capture_history);
2048  for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
2049  if (BIT_STATUS_AT(loc, i)) {
2050  BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val);
2051  }
2052  }
2053 
2054  env->num_mem = env->num_named;
2055  reg->num_mem = env->num_named;
2056 
2057  return onig_renumber_name_table(reg, map);
2058 }
2059 #endif /* USE_NAMED_GROUP */
2060 
2061 #ifdef USE_SUBEXP_CALL
2062 static int
2063 unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
2064 {
2065  int i, offset;
2066  EncloseNode* en;
2067  AbsAddrType addr;
2068 
2069  for (i = 0; i < uslist->num; i++) {
2070  en = NENCLOSE(uslist->us[i].target);
2071  if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
2072  addr = en->call_addr;
2073  offset = uslist->us[i].offset;
2074 
2075  BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
2076  }
2077  return 0;
2078 }
2079 #endif
2080 
2081 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2082 static int
2083 quantifiers_memory_node_info(Node* node)
2084 {
2085  int r = 0;
2086 
2087  switch (NTYPE(node)) {
2088  case NT_LIST:
2089  case NT_ALT:
2090  {
2091  int v;
2092  do {
2093  v = quantifiers_memory_node_info(NCAR(node));
2094  if (v > r) r = v;
2095  } while (v >= 0 && IS_NOT_NULL(node = NCDR(node)));
2096  }
2097  break;
2098 
2099 # ifdef USE_SUBEXP_CALL
2100  case NT_CALL:
2101  if (IS_CALL_RECURSION(NCALL(node))) {
2102  return NQ_TARGET_IS_EMPTY_REC; /* tiny version */
2103  }
2104  else
2105  r = quantifiers_memory_node_info(NCALL(node)->target);
2106  break;
2107 # endif
2108 
2109  case NT_QTFR:
2110  {
2111  QtfrNode* qn = NQTFR(node);
2112  if (qn->upper != 0) {
2113  r = quantifiers_memory_node_info(qn->target);
2114  }
2115  }
2116  break;
2117 
2118  case NT_ENCLOSE:
2119  {
2120  EncloseNode* en = NENCLOSE(node);
2121  switch (en->type) {
2122  case ENCLOSE_MEMORY:
2123  return NQ_TARGET_IS_EMPTY_MEM;
2124  break;
2125 
2126  case ENCLOSE_OPTION:
2128  case ENCLOSE_CONDITION:
2129  case ENCLOSE_ABSENT:
2130  r = quantifiers_memory_node_info(en->target);
2131  break;
2132  default:
2133  break;
2134  }
2135  }
2136  break;
2137 
2138  case NT_BREF:
2139  case NT_STR:
2140  case NT_CTYPE:
2141  case NT_CCLASS:
2142  case NT_CANY:
2143  case NT_ANCHOR:
2144  default:
2145  break;
2146  }
2147 
2148  return r;
2149 }
2150 #endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */
2151 
2152 static int
2153 get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
2154 {
2155  OnigDistance tmin;
2156  int r = 0;
2157 
2158  *min = 0;
2159  switch (NTYPE(node)) {
2160  case NT_BREF:
2161  {
2162  int i;
2163  int* backs;
2164  Node** nodes = SCANENV_MEM_NODES(env);
2165  BRefNode* br = NBREF(node);
2166  if (br->state & NST_RECURSION) break;
2167 
2168  backs = BACKREFS_P(br);
2169  if (backs[0] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2170  r = get_min_match_length(nodes[backs[0]], min, env);
2171  if (r != 0) break;
2172  for (i = 1; i < br->back_num; i++) {
2173  if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2174  r = get_min_match_length(nodes[backs[i]], &tmin, env);
2175  if (r != 0) break;
2176  if (*min > tmin) *min = tmin;
2177  }
2178  }
2179  break;
2180 
2181 #ifdef USE_SUBEXP_CALL
2182  case NT_CALL:
2183  if (IS_CALL_RECURSION(NCALL(node))) {
2184  EncloseNode* en = NENCLOSE(NCALL(node)->target);
2185  if (IS_ENCLOSE_MIN_FIXED(en))
2186  *min = en->min_len;
2187  }
2188  else
2189  r = get_min_match_length(NCALL(node)->target, min, env);
2190  break;
2191 #endif
2192 
2193  case NT_LIST:
2194  do {
2195  r = get_min_match_length(NCAR(node), &tmin, env);
2196  if (r == 0) *min += tmin;
2197  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2198  break;
2199 
2200  case NT_ALT:
2201  {
2202  Node *x, *y;
2203  y = node;
2204  do {
2205  x = NCAR(y);
2206  r = get_min_match_length(x, &tmin, env);
2207  if (r != 0) break;
2208  if (y == node) *min = tmin;
2209  else if (*min > tmin) *min = tmin;
2210  } while (r == 0 && IS_NOT_NULL(y = NCDR(y)));
2211  }
2212  break;
2213 
2214  case NT_STR:
2215  {
2216  StrNode* sn = NSTR(node);
2217  *min = sn->end - sn->s;
2218  }
2219  break;
2220 
2221  case NT_CTYPE:
2222  *min = 1;
2223  break;
2224 
2225  case NT_CCLASS:
2226  case NT_CANY:
2227  *min = 1;
2228  break;
2229 
2230  case NT_QTFR:
2231  {
2232  QtfrNode* qn = NQTFR(node);
2233 
2234  if (qn->lower > 0) {
2235  r = get_min_match_length(qn->target, min, env);
2236  if (r == 0)
2237  *min = distance_multiply(*min, qn->lower);
2238  }
2239  }
2240  break;
2241 
2242  case NT_ENCLOSE:
2243  {
2244  EncloseNode* en = NENCLOSE(node);
2245  switch (en->type) {
2246  case ENCLOSE_MEMORY:
2247  if (IS_ENCLOSE_MIN_FIXED(en))
2248  *min = en->min_len;
2249  else {
2250  if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2251  *min = 0; /* recursive */
2252  else {
2254  r = get_min_match_length(en->target, min, env);
2256  if (r == 0) {
2257  en->min_len = *min;
2259  }
2260  }
2261  }
2262  break;
2263 
2264  case ENCLOSE_OPTION:
2266  case ENCLOSE_CONDITION:
2267  r = get_min_match_length(en->target, min, env);
2268  break;
2269 
2270  case ENCLOSE_ABSENT:
2271  break;
2272  }
2273  }
2274  break;
2275 
2276  case NT_ANCHOR:
2277  default:
2278  break;
2279  }
2280 
2281  return r;
2282 }
2283 
2284 static int
2285 get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
2286 {
2287  OnigDistance tmax;
2288  int r = 0;
2289 
2290  *max = 0;
2291  switch (NTYPE(node)) {
2292  case NT_LIST:
2293  do {
2294  r = get_max_match_length(NCAR(node), &tmax, env);
2295  if (r == 0)
2296  *max = distance_add(*max, tmax);
2297  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2298  break;
2299 
2300  case NT_ALT:
2301  do {
2302  r = get_max_match_length(NCAR(node), &tmax, env);
2303  if (r == 0 && *max < tmax) *max = tmax;
2304  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2305  break;
2306 
2307  case NT_STR:
2308  {
2309  StrNode* sn = NSTR(node);
2310  *max = sn->end - sn->s;
2311  }
2312  break;
2313 
2314  case NT_CTYPE:
2315  *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2316  break;
2317 
2318  case NT_CCLASS:
2319  case NT_CANY:
2320  *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2321  break;
2322 
2323  case NT_BREF:
2324  {
2325  int i;
2326  int* backs;
2327  Node** nodes = SCANENV_MEM_NODES(env);
2328  BRefNode* br = NBREF(node);
2329  if (br->state & NST_RECURSION) {
2330  *max = ONIG_INFINITE_DISTANCE;
2331  break;
2332  }
2333  backs = BACKREFS_P(br);
2334  for (i = 0; i < br->back_num; i++) {
2335  if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2336  r = get_max_match_length(nodes[backs[i]], &tmax, env);
2337  if (r != 0) break;
2338  if (*max < tmax) *max = tmax;
2339  }
2340  }
2341  break;
2342 
2343 #ifdef USE_SUBEXP_CALL
2344  case NT_CALL:
2345  if (! IS_CALL_RECURSION(NCALL(node)))
2346  r = get_max_match_length(NCALL(node)->target, max, env);
2347  else
2348  *max = ONIG_INFINITE_DISTANCE;
2349  break;
2350 #endif
2351 
2352  case NT_QTFR:
2353  {
2354  QtfrNode* qn = NQTFR(node);
2355 
2356  if (qn->upper != 0) {
2357  r = get_max_match_length(qn->target, max, env);
2358  if (r == 0 && *max != 0) {
2359  if (! IS_REPEAT_INFINITE(qn->upper))
2360  *max = distance_multiply(*max, qn->upper);
2361  else
2362  *max = ONIG_INFINITE_DISTANCE;
2363  }
2364  }
2365  }
2366  break;
2367 
2368  case NT_ENCLOSE:
2369  {
2370  EncloseNode* en = NENCLOSE(node);
2371  switch (en->type) {
2372  case ENCLOSE_MEMORY:
2373  if (IS_ENCLOSE_MAX_FIXED(en))
2374  *max = en->max_len;
2375  else {
2376  if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2377  *max = ONIG_INFINITE_DISTANCE;
2378  else {
2380  r = get_max_match_length(en->target, max, env);
2382  if (r == 0) {
2383  en->max_len = *max;
2385  }
2386  }
2387  }
2388  break;
2389 
2390  case ENCLOSE_OPTION:
2392  case ENCLOSE_CONDITION:
2393  r = get_max_match_length(en->target, max, env);
2394  break;
2395 
2396  case ENCLOSE_ABSENT:
2397  break;
2398  }
2399  }
2400  break;
2401 
2402  case NT_ANCHOR:
2403  default:
2404  break;
2405  }
2406 
2407  return r;
2408 }
2409 
2410 #define GET_CHAR_LEN_VARLEN -1
2411 #define GET_CHAR_LEN_TOP_ALT_VARLEN -2
2412 
2413 /* fixed size pattern node only */
2414 static int
2415 get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
2416 {
2417  int tlen;
2418  int r = 0;
2419 
2420  level++;
2421  *len = 0;
2422  switch (NTYPE(node)) {
2423  case NT_LIST:
2424  do {
2425  r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2426  if (r == 0)
2427  *len = (int )distance_add(*len, tlen);
2428  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2429  break;
2430 
2431  case NT_ALT:
2432  {
2433  int tlen2;
2434  int varlen = 0;
2435 
2436  r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2437  while (r == 0 && IS_NOT_NULL(node = NCDR(node))) {
2438  r = get_char_length_tree1(NCAR(node), reg, &tlen2, level);
2439  if (r == 0) {
2440  if (tlen != tlen2)
2441  varlen = 1;
2442  }
2443  }
2444  if (r == 0) {
2445  if (varlen != 0) {
2446  if (level == 1)
2448  else
2449  r = GET_CHAR_LEN_VARLEN;
2450  }
2451  else
2452  *len = tlen;
2453  }
2454  }
2455  break;
2456 
2457  case NT_STR:
2458  {
2459  StrNode* sn = NSTR(node);
2460  UChar *s = sn->s;
2461  while (s < sn->end) {
2462  s += enclen(reg->enc, s, sn->end);
2463  (*len)++;
2464  }
2465  }
2466  break;
2467 
2468  case NT_QTFR:
2469  {
2470  QtfrNode* qn = NQTFR(node);
2471  if (qn->lower == qn->upper) {
2472  r = get_char_length_tree1(qn->target, reg, &tlen, level);
2473  if (r == 0)
2474  *len = (int )distance_multiply(tlen, qn->lower);
2475  }
2476  else
2477  r = GET_CHAR_LEN_VARLEN;
2478  }
2479  break;
2480 
2481 #ifdef USE_SUBEXP_CALL
2482  case NT_CALL:
2483  if (! IS_CALL_RECURSION(NCALL(node)))
2484  r = get_char_length_tree1(NCALL(node)->target, reg, len, level);
2485  else
2486  r = GET_CHAR_LEN_VARLEN;
2487  break;
2488 #endif
2489 
2490  case NT_CTYPE:
2491  *len = 1;
2492  break;
2493 
2494  case NT_CCLASS:
2495  case NT_CANY:
2496  *len = 1;
2497  break;
2498 
2499  case NT_ENCLOSE:
2500  {
2501  EncloseNode* en = NENCLOSE(node);
2502  switch (en->type) {
2503  case ENCLOSE_MEMORY:
2504 #ifdef USE_SUBEXP_CALL
2505  if (IS_ENCLOSE_CLEN_FIXED(en))
2506  *len = en->char_len;
2507  else {
2508  r = get_char_length_tree1(en->target, reg, len, level);
2509  if (r == 0) {
2510  en->char_len = *len;
2512  }
2513  }
2514  break;
2515 #endif
2516  case ENCLOSE_OPTION:
2518  case ENCLOSE_CONDITION:
2519  r = get_char_length_tree1(en->target, reg, len, level);
2520  break;
2521  case ENCLOSE_ABSENT:
2522  default:
2523  break;
2524  }
2525  }
2526  break;
2527 
2528  case NT_ANCHOR:
2529  break;
2530 
2531  default:
2532  r = GET_CHAR_LEN_VARLEN;
2533  break;
2534  }
2535 
2536  return r;
2537 }
2538 
2539 static int
2540 get_char_length_tree(Node* node, regex_t* reg, int* len)
2541 {
2542  return get_char_length_tree1(node, reg, len, 0);
2543 }
2544 
2545 /* x is not included y ==> 1 : 0 */
2546 static int
2547 is_not_included(Node* x, Node* y, regex_t* reg)
2548 {
2549  int i;
2550  OnigDistance len;
2551  OnigCodePoint code;
2552  UChar *p;
2553  int ytype;
2554 
2555  retry:
2556  ytype = NTYPE(y);
2557  switch (NTYPE(x)) {
2558  case NT_CTYPE:
2559  {
2560  switch (ytype) {
2561  case NT_CTYPE:
2562  if (NCTYPE(y)->ctype == NCTYPE(x)->ctype &&
2563  NCTYPE(y)->not != NCTYPE(x)->not &&
2564  NCTYPE(y)->ascii_range == NCTYPE(x)->ascii_range)
2565  return 1;
2566  else
2567  return 0;
2568  break;
2569 
2570  case NT_CCLASS:
2571  swap:
2572  {
2573  Node* tmp;
2574  tmp = x; x = y; y = tmp;
2575  goto retry;
2576  }
2577  break;
2578 
2579  case NT_STR:
2580  goto swap;
2581  break;
2582 
2583  default:
2584  break;
2585  }
2586  }
2587  break;
2588 
2589  case NT_CCLASS:
2590  {
2591  CClassNode* xc = NCCLASS(x);
2592  switch (ytype) {
2593  case NT_CTYPE:
2594  switch (NCTYPE(y)->ctype) {
2595  case ONIGENC_CTYPE_WORD:
2596  if (NCTYPE(y)->not == 0) {
2597  if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
2598  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2599  if (BITSET_AT(xc->bs, i)) {
2600  if (NCTYPE(y)->ascii_range) {
2601  if (IS_CODE_SB_WORD(reg->enc, i)) return 0;
2602  }
2603  else {
2604  if (ONIGENC_IS_CODE_WORD(reg->enc, i)) return 0;
2605  }
2606  }
2607  }
2608  return 1;
2609  }
2610  return 0;
2611  }
2612  else {
2613  if (IS_NOT_NULL(xc->mbuf)) return 0;
2614  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2615  int is_word;
2616  if (NCTYPE(y)->ascii_range)
2617  is_word = IS_CODE_SB_WORD(reg->enc, i);
2618  else
2619  is_word = ONIGENC_IS_CODE_WORD(reg->enc, i);
2620  if (! is_word) {
2621  if (!IS_NCCLASS_NOT(xc)) {
2622  if (BITSET_AT(xc->bs, i))
2623  return 0;
2624  }
2625  else {
2626  if (! BITSET_AT(xc->bs, i))
2627  return 0;
2628  }
2629  }
2630  }
2631  return 1;
2632  }
2633  break;
2634 
2635  default:
2636  break;
2637  }
2638  break;
2639 
2640  case NT_CCLASS:
2641  {
2642  int v;
2643  CClassNode* yc = NCCLASS(y);
2644 
2645  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2646  v = BITSET_AT(xc->bs, i);
2647  if ((v != 0 && !IS_NCCLASS_NOT(xc)) ||
2648  (v == 0 && IS_NCCLASS_NOT(xc))) {
2649  v = BITSET_AT(yc->bs, i);
2650  if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
2651  (v == 0 && IS_NCCLASS_NOT(yc)))
2652  return 0;
2653  }
2654  }
2655  if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
2656  (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
2657  return 1;
2658  return 0;
2659  }
2660  break;
2661 
2662  case NT_STR:
2663  goto swap;
2664  break;
2665 
2666  default:
2667  break;
2668  }
2669  }
2670  break;
2671 
2672  case NT_STR:
2673  {
2674  StrNode* xs = NSTR(x);
2675  if (NSTRING_LEN(x) == 0)
2676  break;
2677 
2678  switch (ytype) {
2679  case NT_CTYPE:
2680  switch (NCTYPE(y)->ctype) {
2681  case ONIGENC_CTYPE_WORD:
2682  if (NCTYPE(y)->ascii_range) {
2683  if (ONIGENC_IS_MBC_ASCII_WORD(reg->enc, xs->s, xs->end))
2684  return NCTYPE(y)->not;
2685  else
2686  return !(NCTYPE(y)->not);
2687  }
2688  else {
2689  if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
2690  return NCTYPE(y)->not;
2691  else
2692  return !(NCTYPE(y)->not);
2693  }
2694  break;
2695  default:
2696  break;
2697  }
2698  break;
2699 
2700  case NT_CCLASS:
2701  {
2702  CClassNode* cc = NCCLASS(y);
2703 
2704  code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
2705  xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
2706  return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
2707  }
2708  break;
2709 
2710  case NT_STR:
2711  {
2712  UChar *q;
2713  StrNode* ys = NSTR(y);
2714  len = NSTRING_LEN(x);
2715  if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
2716  if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
2717  /* tiny version */
2718  return 0;
2719  }
2720  else {
2721  for (i = 0, p = ys->s, q = xs->s; (OnigDistance )i < len; i++, p++, q++) {
2722  if (*p != *q) return 1;
2723  }
2724  }
2725  }
2726  break;
2727 
2728  default:
2729  break;
2730  }
2731  }
2732  break;
2733 
2734  default:
2735  break;
2736  }
2737 
2738  return 0;
2739 }
2740 
2741 static Node*
2742 get_head_value_node(Node* node, int exact, regex_t* reg)
2743 {
2744  Node* n = NULL_NODE;
2745 
2746  switch (NTYPE(node)) {
2747  case NT_BREF:
2748  case NT_ALT:
2749  case NT_CANY:
2750 #ifdef USE_SUBEXP_CALL
2751  case NT_CALL:
2752 #endif
2753  break;
2754 
2755  case NT_CTYPE:
2756  case NT_CCLASS:
2757  if (exact == 0) {
2758  n = node;
2759  }
2760  break;
2761 
2762  case NT_LIST:
2763  n = get_head_value_node(NCAR(node), exact, reg);
2764  break;
2765 
2766  case NT_STR:
2767  {
2768  StrNode* sn = NSTR(node);
2769 
2770  if (sn->end <= sn->s)
2771  break;
2772 
2773  if (exact != 0 &&
2774  !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
2775  }
2776  else {
2777  n = node;
2778  }
2779  }
2780  break;
2781 
2782  case NT_QTFR:
2783  {
2784  QtfrNode* qn = NQTFR(node);
2785  if (qn->lower > 0) {
2786 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
2787  if (IS_NOT_NULL(qn->head_exact))
2788  n = qn->head_exact;
2789  else
2790 #endif
2791  n = get_head_value_node(qn->target, exact, reg);
2792  }
2793  }
2794  break;
2795 
2796  case NT_ENCLOSE:
2797  {
2798  EncloseNode* en = NENCLOSE(node);
2799  switch (en->type) {
2800  case ENCLOSE_OPTION:
2801  {
2802  OnigOptionType options = reg->options;
2803 
2804  reg->options = NENCLOSE(node)->option;
2805  n = get_head_value_node(NENCLOSE(node)->target, exact, reg);
2806  reg->options = options;
2807  }
2808  break;
2809 
2810  case ENCLOSE_MEMORY:
2812  case ENCLOSE_CONDITION:
2813  n = get_head_value_node(en->target, exact, reg);
2814  break;
2815 
2816  case ENCLOSE_ABSENT:
2817  break;
2818  }
2819  }
2820  break;
2821 
2822  case NT_ANCHOR:
2823  if (NANCHOR(node)->type == ANCHOR_PREC_READ)
2824  n = get_head_value_node(NANCHOR(node)->target, exact, reg);
2825  break;
2826 
2827  default:
2828  break;
2829  }
2830 
2831  return n;
2832 }
2833 
2834 static int
2835 check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask)
2836 {
2837  int type, r = 0;
2838 
2839  type = NTYPE(node);
2840  if ((NTYPE2BIT(type) & type_mask) == 0)
2841  return 1;
2842 
2843  switch (type) {
2844  case NT_LIST:
2845  case NT_ALT:
2846  do {
2847  r = check_type_tree(NCAR(node), type_mask, enclose_mask,
2848  anchor_mask);
2849  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2850  break;
2851 
2852  case NT_QTFR:
2853  r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask,
2854  anchor_mask);
2855  break;
2856 
2857  case NT_ENCLOSE:
2858  {
2859  EncloseNode* en = NENCLOSE(node);
2860  if ((en->type & enclose_mask) == 0)
2861  return 1;
2862 
2863  r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask);
2864  }
2865  break;
2866 
2867  case NT_ANCHOR:
2868  type = NANCHOR(node)->type;
2869  if ((type & anchor_mask) == 0)
2870  return 1;
2871 
2872  if (NANCHOR(node)->target)
2873  r = check_type_tree(NANCHOR(node)->target,
2874  type_mask, enclose_mask, anchor_mask);
2875  break;
2876 
2877  default:
2878  break;
2879  }
2880  return r;
2881 }
2882 
2883 #ifdef USE_SUBEXP_CALL
2884 
2885 # define RECURSION_EXIST 1
2886 # define RECURSION_INFINITE 2
2887 
2888 static int
2889 subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
2890 {
2891  int type;
2892  int r = 0;
2893 
2894  type = NTYPE(node);
2895  switch (type) {
2896  case NT_LIST:
2897  {
2898  Node *x;
2899  OnigDistance min;
2900  int ret;
2901 
2902  x = node;
2903  do {
2904  ret = subexp_inf_recursive_check(NCAR(x), env, head);
2905  if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2906  r |= ret;
2907  if (head) {
2908  ret = get_min_match_length(NCAR(x), &min, env);
2909  if (ret != 0) return ret;
2910  if (min != 0) head = 0;
2911  }
2912  } while (IS_NOT_NULL(x = NCDR(x)));
2913  }
2914  break;
2915 
2916  case NT_ALT:
2917  {
2918  int ret;
2919  r = RECURSION_EXIST;
2920  do {
2921  ret = subexp_inf_recursive_check(NCAR(node), env, head);
2922  if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2923  r &= ret;
2924  } while (IS_NOT_NULL(node = NCDR(node)));
2925  }
2926  break;
2927 
2928  case NT_QTFR:
2929  r = subexp_inf_recursive_check(NQTFR(node)->target, env, head);
2930  if (r == RECURSION_EXIST) {
2931  if (NQTFR(node)->lower == 0) r = 0;
2932  }
2933  break;
2934 
2935  case NT_ANCHOR:
2936  {
2937  AnchorNode* an = NANCHOR(node);
2938  switch (an->type) {
2939  case ANCHOR_PREC_READ:
2940  case ANCHOR_PREC_READ_NOT:
2941  case ANCHOR_LOOK_BEHIND:
2943  r = subexp_inf_recursive_check(an->target, env, head);
2944  break;
2945  }
2946  }
2947  break;
2948 
2949  case NT_CALL:
2950  r = subexp_inf_recursive_check(NCALL(node)->target, env, head);
2951  break;
2952 
2953  case NT_ENCLOSE:
2954  if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
2955  return 0;
2956  else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2957  return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
2958  else {
2960  r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head);
2962  }
2963  break;
2964 
2965  default:
2966  break;
2967  }
2968 
2969  return r;
2970 }
2971 
2972 static int
2973 subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
2974 {
2975  int type;
2976  int r = 0;
2977 
2978  type = NTYPE(node);
2979  switch (type) {
2980  case NT_LIST:
2981  case NT_ALT:
2982  do {
2983  r = subexp_inf_recursive_check_trav(NCAR(node), env);
2984  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2985  break;
2986 
2987  case NT_QTFR:
2988  r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env);
2989  break;
2990 
2991  case NT_ANCHOR:
2992  {
2993  AnchorNode* an = NANCHOR(node);
2994  switch (an->type) {
2995  case ANCHOR_PREC_READ:
2996  case ANCHOR_PREC_READ_NOT:
2997  case ANCHOR_LOOK_BEHIND:
2999  r = subexp_inf_recursive_check_trav(an->target, env);
3000  break;
3001  }
3002  }
3003  break;
3004 
3005  case NT_ENCLOSE:
3006  {
3007  EncloseNode* en = NENCLOSE(node);
3008 
3009  if (IS_ENCLOSE_RECURSION(en)) {
3011  r = subexp_inf_recursive_check(en->target, env, 1);
3012  if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION;
3014  }
3015  r = subexp_inf_recursive_check_trav(en->target, env);
3016  }
3017 
3018  break;
3019 
3020  default:
3021  break;
3022  }
3023 
3024  return r;
3025 }
3026 
3027 static int
3028 subexp_recursive_check(Node* node)
3029 {
3030  int r = 0;
3031 
3032  switch (NTYPE(node)) {
3033  case NT_LIST:
3034  case NT_ALT:
3035  do {
3036  r |= subexp_recursive_check(NCAR(node));
3037  } while (IS_NOT_NULL(node = NCDR(node)));
3038  break;
3039 
3040  case NT_QTFR:
3041  r = subexp_recursive_check(NQTFR(node)->target);
3042  break;
3043 
3044  case NT_ANCHOR:
3045  {
3046  AnchorNode* an = NANCHOR(node);
3047  switch (an->type) {
3048  case ANCHOR_PREC_READ:
3049  case ANCHOR_PREC_READ_NOT:
3050  case ANCHOR_LOOK_BEHIND:
3052  r = subexp_recursive_check(an->target);
3053  break;
3054  }
3055  }
3056  break;
3057 
3058  case NT_CALL:
3059  r = subexp_recursive_check(NCALL(node)->target);
3060  if (r != 0) SET_CALL_RECURSION(node);
3061  break;
3062 
3063  case NT_ENCLOSE:
3064  if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
3065  return 0;
3066  else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
3067  return 1; /* recursion */
3068  else {
3070  r = subexp_recursive_check(NENCLOSE(node)->target);
3072  }
3073  break;
3074 
3075  default:
3076  break;
3077  }
3078 
3079  return r;
3080 }
3081 
3082 
3083 static int
3084 subexp_recursive_check_trav(Node* node, ScanEnv* env)
3085 {
3086 # define FOUND_CALLED_NODE 1
3087 
3088  int type;
3089  int r = 0;
3090 
3091  type = NTYPE(node);
3092  switch (type) {
3093  case NT_LIST:
3094  case NT_ALT:
3095  {
3096  int ret;
3097  do {
3098  ret = subexp_recursive_check_trav(NCAR(node), env);
3099  if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
3100  else if (ret < 0) return ret;
3101  } while (IS_NOT_NULL(node = NCDR(node)));
3102  }
3103  break;
3104 
3105  case NT_QTFR:
3106  r = subexp_recursive_check_trav(NQTFR(node)->target, env);
3107  if (NQTFR(node)->upper == 0) {
3108  if (r == FOUND_CALLED_NODE)
3109  NQTFR(node)->is_referred = 1;
3110  }
3111  break;
3112 
3113  case NT_ANCHOR:
3114  {
3115  AnchorNode* an = NANCHOR(node);
3116  switch (an->type) {
3117  case ANCHOR_PREC_READ:
3118  case ANCHOR_PREC_READ_NOT:
3119  case ANCHOR_LOOK_BEHIND:
3121  r = subexp_recursive_check_trav(an->target, env);
3122  break;
3123  }
3124  }
3125  break;
3126 
3127  case NT_ENCLOSE:
3128  {
3129  EncloseNode* en = NENCLOSE(node);
3130 
3131  if (! IS_ENCLOSE_RECURSION(en)) {
3132  if (IS_ENCLOSE_CALLED(en)) {
3134  r = subexp_recursive_check(en->target);
3135  if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION);
3137  }
3138  }
3139  r = subexp_recursive_check_trav(en->target, env);
3140  if (IS_ENCLOSE_CALLED(en))
3141  r |= FOUND_CALLED_NODE;
3142  }
3143  break;
3144 
3145  default:
3146  break;
3147  }
3148 
3149  return r;
3150 }
3151 
3152 static int
3153 setup_subexp_call(Node* node, ScanEnv* env)
3154 {
3155  int type;
3156  int r = 0;
3157 
3158  type = NTYPE(node);
3159  switch (type) {
3160  case NT_LIST:
3161  do {
3162  r = setup_subexp_call(NCAR(node), env);
3163  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3164  break;
3165 
3166  case NT_ALT:
3167  do {
3168  r = setup_subexp_call(NCAR(node), env);
3169  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3170  break;
3171 
3172  case NT_QTFR:
3173  r = setup_subexp_call(NQTFR(node)->target, env);
3174  break;
3175  case NT_ENCLOSE:
3176  r = setup_subexp_call(NENCLOSE(node)->target, env);
3177  break;
3178 
3179  case NT_CALL:
3180  {
3181  CallNode* cn = NCALL(node);
3182  Node** nodes = SCANENV_MEM_NODES(env);
3183 
3184  if (cn->group_num != 0) {
3185  int gnum = cn->group_num;
3186 
3187 # ifdef USE_NAMED_GROUP
3188  if (env->num_named > 0 &&
3192  }
3193 # endif
3194  if (gnum > env->num_mem) {
3198  }
3199 
3200 # ifdef USE_NAMED_GROUP
3201  set_call_attr:
3202 # endif
3203  cn->target = nodes[cn->group_num];
3204  if (IS_NULL(cn->target)) {
3208  }
3210  BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num);
3211  cn->unset_addr_list = env->unset_addr_list;
3212  }
3213 # ifdef USE_NAMED_GROUP
3214 # ifdef USE_PERL_SUBEXP_CALL
3215  else if (cn->name == cn->name_end) {
3216  goto set_call_attr;
3217  }
3218 # endif
3219  else {
3220  int *refs;
3221 
3222  int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end,
3223  &refs);
3224  if (n <= 0) {
3228  }
3229  else if (n > 1 &&
3234  }
3235  else {
3236  cn->group_num = refs[0];
3237  goto set_call_attr;
3238  }
3239  }
3240 # endif
3241  }
3242  break;
3243 
3244  case NT_ANCHOR:
3245  {
3246  AnchorNode* an = NANCHOR(node);
3247 
3248  switch (an->type) {
3249  case ANCHOR_PREC_READ:
3250  case ANCHOR_PREC_READ_NOT:
3251  case ANCHOR_LOOK_BEHIND:
3253  r = setup_subexp_call(an->target, env);
3254  break;
3255  }
3256  }
3257  break;
3258 
3259  default:
3260  break;
3261  }
3262 
3263  return r;
3264 }
3265 #endif
3266 
3267 /* divide different length alternatives in look-behind.
3268  (?<=A|B) ==> (?<=A)|(?<=B)
3269  (?<!A|B) ==> (?<!A)(?<!B)
3270 */
3271 static int
3272 divide_look_behind_alternatives(Node* node)
3273 {
3274  Node *head, *np, *insert_node;
3275  AnchorNode* an = NANCHOR(node);
3276  int anc_type = an->type;
3277 
3278  head = an->target;
3279  np = NCAR(head);
3280  swap_node(node, head);
3281  NCAR(node) = head;
3282  NANCHOR(head)->target = np;
3283 
3284  np = node;
3285  while ((np = NCDR(np)) != NULL_NODE) {
3286  insert_node = onig_node_new_anchor(anc_type);
3287  CHECK_NULL_RETURN_MEMERR(insert_node);
3288  NANCHOR(insert_node)->target = NCAR(np);
3289  NCAR(np) = insert_node;
3290  }
3291 
3292  if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
3293  np = node;
3294  do {
3295  SET_NTYPE(np, NT_LIST); /* alt -> list */
3296  } while ((np = NCDR(np)) != NULL_NODE);
3297  }
3298  return 0;
3299 }
3300 
3301 static int
3302 setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
3303 {
3304  int r, len;
3305  AnchorNode* an = NANCHOR(node);
3306 
3307  r = get_char_length_tree(an->target, reg, &len);
3308  if (r == 0)
3309  an->char_len = len;
3310  else if (r == GET_CHAR_LEN_VARLEN)
3312  else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
3314  r = divide_look_behind_alternatives(node);
3315  else
3317  }
3318 
3319  return r;
3320 }
3321 
3322 static int
3323 next_setup(Node* node, Node* next_node, regex_t* reg)
3324 {
3325  int type;
3326 
3327  retry:
3328  type = NTYPE(node);
3329  if (type == NT_QTFR) {
3330  QtfrNode* qn = NQTFR(node);
3331  if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
3332 #ifdef USE_QTFR_PEEK_NEXT
3333  Node* n = get_head_value_node(next_node, 1, reg);
3334  /* '\0': for UTF-16BE etc... */
3335  if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') {
3336  qn->next_head_exact = n;
3337  }
3338 #endif
3339  /* automatic possessification a*b ==> (?>a*)b */
3340  if (qn->lower <= 1) {
3341  int ttype = NTYPE(qn->target);
3342  if (IS_NODE_TYPE_SIMPLE(ttype)) {
3343  Node *x, *y;
3344  x = get_head_value_node(qn->target, 0, reg);
3345  if (IS_NOT_NULL(x)) {
3346  y = get_head_value_node(next_node, 0, reg);
3347  if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
3351  swap_node(node, en);
3352  NENCLOSE(node)->target = en;
3353  }
3354  }
3355  }
3356  }
3357  }
3358  }
3359  else if (type == NT_ENCLOSE) {
3360  EncloseNode* en = NENCLOSE(node);
3361  if (en->type == ENCLOSE_MEMORY) {
3362  node = en->target;
3363  goto retry;
3364  }
3365  }
3366  return 0;
3367 }
3368 
3369 
3370 static int
3371 update_string_node_case_fold(regex_t* reg, Node *node)
3372 {
3374  UChar *sbuf, *ebuf, *sp;
3375  int r, i, len;
3376  OnigDistance sbuf_size;
3377  StrNode* sn = NSTR(node);
3378 
3379  end = sn->end;
3380  sbuf_size = (end - sn->s) * 2;
3381  sbuf = (UChar* )xmalloc(sbuf_size);
3383  ebuf = sbuf + sbuf_size;
3384 
3385  sp = sbuf;
3386  p = sn->s;
3387  while (p < end) {
3388  len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
3389  for (i = 0; i < len; i++) {
3390  if (sp >= ebuf) {
3391  UChar* p = (UChar* )xrealloc(sbuf, sbuf_size * 2);
3392  if (IS_NULL(p)) {
3393  xfree(sbuf);
3394  return ONIGERR_MEMORY;
3395  }
3396  sbuf = p;
3397  sp = sbuf + sbuf_size;
3398  sbuf_size *= 2;
3399  ebuf = sbuf + sbuf_size;
3400  }
3401 
3402  *sp++ = buf[i];
3403  }
3404  }
3405 
3406  r = onig_node_str_set(node, sbuf, sp);
3407 
3408  xfree(sbuf);
3409  return r;
3410 }
3411 
3412 static int
3413 expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end,
3414  regex_t* reg)
3415 {
3416  int r;
3417  Node *node;
3418 
3419  node = onig_node_new_str(s, end);
3420  if (IS_NULL(node)) return ONIGERR_MEMORY;
3421 
3422  r = update_string_node_case_fold(reg, node);
3423  if (r != 0) {
3424  onig_node_free(node);
3425  return r;
3426  }
3427 
3428  NSTRING_SET_AMBIG(node);
3430  *rnode = node;
3431  return 0;
3432 }
3433 
3434 static int
3435 is_case_fold_variable_len(int item_num, OnigCaseFoldCodeItem items[],
3436  int slen)
3437 {
3438  int i;
3439 
3440  for (i = 0; i < item_num; i++) {
3441  if (items[i].byte_len != slen) {
3442  return 1;
3443  }
3444  if (items[i].code_len != 1) {
3445  return 1;
3446  }
3447  }
3448  return 0;
3449 }
3450 
3451 static int
3452 expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
3453  UChar *p, int slen, UChar *end,
3454  regex_t* reg, Node **rnode)
3455 {
3456  int r, i, j, len, varlen;
3457  Node *anode, *var_anode, *snode, *xnode, *an;
3459 
3460  *rnode = var_anode = NULL_NODE;
3461 
3462  varlen = 0;
3463  for (i = 0; i < item_num; i++) {
3464  if (items[i].byte_len != slen) {
3465  varlen = 1;
3466  break;
3467  }
3468  }
3469 
3470  if (varlen != 0) {
3471  *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3472  if (IS_NULL(var_anode)) return ONIGERR_MEMORY;
3473 
3474  xnode = onig_node_new_list(NULL, NULL);
3475  if (IS_NULL(xnode)) goto mem_err;
3476  NCAR(var_anode) = xnode;
3477 
3479  if (IS_NULL(anode)) goto mem_err;
3480  NCAR(xnode) = anode;
3481  }
3482  else {
3483  *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3484  if (IS_NULL(anode)) return ONIGERR_MEMORY;
3485  }
3486 
3487  snode = onig_node_new_str(p, p + slen);
3488  if (IS_NULL(snode)) goto mem_err;
3489 
3490  NCAR(anode) = snode;
3491 
3492  for (i = 0; i < item_num; i++) {
3493  snode = onig_node_new_str(NULL, NULL);
3494  if (IS_NULL(snode)) goto mem_err;
3495 
3496  for (j = 0; j < items[i].code_len; j++) {
3497  len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
3498  if (len < 0) {
3499  r = len;
3500  goto mem_err2;
3501  }
3502 
3503  r = onig_node_str_cat(snode, buf, buf + len);
3504  if (r != 0) goto mem_err2;
3505  }
3506 
3508  if (IS_NULL(an)) {
3509  goto mem_err2;
3510  }
3511 
3512  if (items[i].byte_len != slen) {
3513  Node *rem;
3514  UChar *q = p + items[i].byte_len;
3515 
3516  if (q < end) {
3517  r = expand_case_fold_make_rem_string(&rem, q, end, reg);
3518  if (r != 0) {
3519  onig_node_free(an);
3520  goto mem_err2;
3521  }
3522 
3523  xnode = onig_node_list_add(NULL_NODE, snode);
3524  if (IS_NULL(xnode)) {
3525  onig_node_free(an);
3526  onig_node_free(rem);
3527  goto mem_err2;
3528  }
3529  if (IS_NULL(onig_node_list_add(xnode, rem))) {
3530  onig_node_free(an);
3531  onig_node_free(xnode);
3532  onig_node_free(rem);
3533  goto mem_err;
3534  }
3535 
3536  NCAR(an) = xnode;
3537  }
3538  else {
3539  NCAR(an) = snode;
3540  }
3541 
3542  NCDR(var_anode) = an;
3543  var_anode = an;
3544  }
3545  else {
3546  NCAR(an) = snode;
3547  NCDR(anode) = an;
3548  anode = an;
3549  }
3550  }
3551 
3552  return varlen;
3553 
3554  mem_err2:
3555  onig_node_free(snode);
3556 
3557  mem_err:
3558  onig_node_free(*rnode);
3559 
3560  return ONIGERR_MEMORY;
3561 }
3562 
3563 static int
3564 expand_case_fold_string(Node* node, regex_t* reg)
3565 {
3566 #define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
3567 
3568  int r, n, len, alt_num;
3569  int varlen = 0;
3570  UChar *start, *end, *p;
3571  Node *top_root, *root, *snode, *prev_node;
3573  StrNode* sn = NSTR(node);
3574 
3575  if (NSTRING_IS_AMBIG(node)) return 0;
3576 
3577  start = sn->s;
3578  end = sn->end;
3579  if (start >= end) return 0;
3580 
3581  r = 0;
3582  top_root = root = prev_node = snode = NULL_NODE;
3583  alt_num = 1;
3584  p = start;
3585  while (p < end) {
3587  p, end, items);
3588  if (n < 0) {
3589  r = n;
3590  goto err;
3591  }
3592 
3593  len = enclen(reg->enc, p, end);
3594 
3595  varlen = is_case_fold_variable_len(n, items, len);
3596  if (n == 0 || varlen == 0) {
3597  if (IS_NULL(snode)) {
3598  if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3599  onig_node_free(top_root);
3600  top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3601  if (IS_NULL(root)) {
3602  onig_node_free(prev_node);
3603  goto mem_err;
3604  }
3605  }
3606 
3607  prev_node = snode = onig_node_new_str(NULL, NULL);
3608  if (IS_NULL(snode)) goto mem_err;
3609  if (IS_NOT_NULL(root)) {
3610  if (IS_NULL(onig_node_list_add(root, snode))) {
3611  onig_node_free(snode);
3612  goto mem_err;
3613  }
3614  }
3615  }
3616 
3617  r = onig_node_str_cat(snode, p, p + len);
3618  if (r != 0) goto err;
3619  }
3620  else {
3621  alt_num *= (n + 1);
3622  if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
3623 
3624  if (IS_NOT_NULL(snode)) {
3625  r = update_string_node_case_fold(reg, snode);
3626  if (r == 0) {
3627  NSTRING_SET_AMBIG(snode);
3628  }
3629  }
3630  if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3631  onig_node_free(top_root);
3632  top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3633  if (IS_NULL(root)) {
3634  onig_node_free(prev_node);
3635  goto mem_err;
3636  }
3637  }
3638 
3639  r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
3640  if (r < 0) goto mem_err;
3641  if (r == 1) {
3642  if (IS_NULL(root)) {
3643  top_root = prev_node;
3644  }
3645  else {
3646  if (IS_NULL(onig_node_list_add(root, prev_node))) {
3647  onig_node_free(prev_node);
3648  goto mem_err;
3649  }
3650  }
3651 
3652  root = NCAR(prev_node);
3653  }
3654  else { /* r == 0 */
3655  if (IS_NOT_NULL(root)) {
3656  if (IS_NULL(onig_node_list_add(root, prev_node))) {
3657  onig_node_free(prev_node);
3658  goto mem_err;
3659  }
3660  }
3661  }
3662 
3663  snode = NULL_NODE;
3664  }
3665 
3666  p += len;
3667  }
3668  if (IS_NOT_NULL(snode)) {
3669  r = update_string_node_case_fold(reg, snode);
3670  if (r == 0) {
3671  NSTRING_SET_AMBIG(snode);
3672  }
3673  }
3674 
3675  if (p < end) {
3676  Node *srem;
3677 
3678  r = expand_case_fold_make_rem_string(&srem, p, end, reg);
3679  if (r != 0) goto mem_err;
3680 
3681  if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
3682  onig_node_free(top_root);
3683  top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3684  if (IS_NULL(root)) {
3685  onig_node_free(srem);
3686  onig_node_free(prev_node);
3687  goto mem_err;
3688  }
3689  }
3690 
3691  if (IS_NULL(root)) {
3692  prev_node = srem;
3693  }
3694  else {
3695  if (IS_NULL(onig_node_list_add(root, srem))) {
3696  onig_node_free(srem);
3697  goto mem_err;
3698  }
3699  }
3700  }
3701 
3702  /* ending */
3703  top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
3704  swap_node(node, top_root);
3705  onig_node_free(top_root);
3706  return 0;
3707 
3708  mem_err:
3709  r = ONIGERR_MEMORY;
3710 
3711  err:
3712  onig_node_free(top_root);
3713  return r;
3714 }
3715 
3716 
3717 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3718 
3719 # define CEC_THRES_NUM_BIG_REPEAT 512
3720 # define CEC_INFINITE_NUM 0x7fffffff
3721 
3722 # define CEC_IN_INFINITE_REPEAT (1<<0)
3723 # define CEC_IN_FINITE_REPEAT (1<<1)
3724 # define CEC_CONT_BIG_REPEAT (1<<2)
3725 
3726 static int
3727 setup_comb_exp_check(Node* node, int state, ScanEnv* env)
3728 {
3729  int type;
3730  int r = state;
3731 
3732  type = NTYPE(node);
3733  switch (type) {
3734  case NT_LIST:
3735  {
3736  Node* prev = NULL_NODE;
3737  do {
3738  r = setup_comb_exp_check(NCAR(node), r, env);
3739  prev = NCAR(node);
3740  } while (r >= 0 && IS_NOT_NULL(node = NCDR(node)));
3741  }
3742  break;
3743 
3744  case NT_ALT:
3745  {
3746  int ret;
3747  do {
3748  ret = setup_comb_exp_check(NCAR(node), state, env);
3749  r |= ret;
3750  } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node)));
3751  }
3752  break;
3753 
3754  case NT_QTFR:
3755  {
3756  int child_state = state;
3757  int add_state = 0;
3758  QtfrNode* qn = NQTFR(node);
3759  Node* target = qn->target;
3760  int var_num;
3761 
3762  if (! IS_REPEAT_INFINITE(qn->upper)) {
3763  if (qn->upper > 1) {
3764  /* {0,1}, {1,1} are allowed */
3765  child_state |= CEC_IN_FINITE_REPEAT;
3766 
3767  /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
3768  if (env->backrefed_mem == 0) {
3769  if (NTYPE(qn->target) == NT_ENCLOSE) {
3770  EncloseNode* en = NENCLOSE(qn->target);
3771  if (en->type == ENCLOSE_MEMORY) {
3772  if (NTYPE(en->target) == NT_QTFR) {
3773  QtfrNode* q = NQTFR(en->target);
3774  if (IS_REPEAT_INFINITE(q->upper)
3775  && q->greedy == qn->greedy) {
3776  qn->upper = (qn->lower == 0 ? 1 : qn->lower);
3777  if (qn->upper == 1)
3778  child_state = state;
3779  }
3780  }
3781  }
3782  }
3783  }
3784  }
3785  }
3786 
3787  if (state & CEC_IN_FINITE_REPEAT) {
3788  qn->comb_exp_check_num = -1;
3789  }
3790  else {
3791  if (IS_REPEAT_INFINITE(qn->upper)) {
3792  var_num = CEC_INFINITE_NUM;
3793  child_state |= CEC_IN_INFINITE_REPEAT;
3794  }
3795  else {
3796  var_num = qn->upper - qn->lower;
3797  }
3798 
3799  if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
3800  add_state |= CEC_CONT_BIG_REPEAT;
3801 
3802  if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
3803  ((state & CEC_CONT_BIG_REPEAT) != 0 &&
3804  var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
3805  if (qn->comb_exp_check_num == 0) {
3806  env->num_comb_exp_check++;
3807  qn->comb_exp_check_num = env->num_comb_exp_check;
3808  if (env->curr_max_regnum > env->comb_exp_max_regnum)
3809  env->comb_exp_max_regnum = env->curr_max_regnum;
3810  }
3811  }
3812  }
3813 
3814  r = setup_comb_exp_check(target, child_state, env);
3815  r |= add_state;
3816  }
3817  break;
3818 
3819  case NT_ENCLOSE:
3820  {
3821  EncloseNode* en = NENCLOSE(node);
3822 
3823  switch (en->type) {
3824  case ENCLOSE_MEMORY:
3825  {
3826  if (env->curr_max_regnum < en->regnum)
3827  env->curr_max_regnum = en->regnum;
3828 
3829  r = setup_comb_exp_check(en->target, state, env);
3830  }
3831  break;
3832 
3833  default:
3834  r = setup_comb_exp_check(en->target, state, env);
3835  break;
3836  }
3837  }
3838  break;
3839 
3840 # ifdef USE_SUBEXP_CALL
3841  case NT_CALL:
3842  if (IS_CALL_RECURSION(NCALL(node)))
3843  env->has_recursion = 1;
3844  else
3845  r = setup_comb_exp_check(NCALL(node)->target, state, env);
3846  break;
3847 # endif
3848 
3849  default:
3850  break;
3851  }
3852 
3853  return r;
3854 }
3855 #endif
3856 
3857 #define IN_ALT (1<<0)
3858 #define IN_NOT (1<<1)
3859 #define IN_REPEAT (1<<2)
3860 #define IN_VAR_REPEAT (1<<3)
3861 #define IN_CALL (1<<4)
3862 #define IN_RECCALL (1<<5)
3863 
3864 /* setup_tree does the following work.
3865  1. check empty loop. (set qn->target_empty_info)
3866  2. expand ignore-case in char class.
3867  3. set memory status bit flags. (reg->mem_stats)
3868  4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
3869  5. find invalid patterns in look-behind.
3870  6. expand repeated string.
3871  */
3872 static int
3873 setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
3874 {
3875  int type;
3876  int r = 0;
3877 
3878 restart:
3879  type = NTYPE(node);
3880  switch (type) {
3881  case NT_LIST:
3882  {
3883  Node* prev = NULL_NODE;
3884  do {
3885  r = setup_tree(NCAR(node), reg, state, env);
3886  if (IS_NOT_NULL(prev) && r == 0) {
3887  r = next_setup(prev, NCAR(node), reg);
3888  }
3889  prev = NCAR(node);
3890  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3891  }
3892  break;
3893 
3894  case NT_ALT:
3895  do {
3896  r = setup_tree(NCAR(node), reg, (state | IN_ALT), env);
3897  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3898  break;
3899 
3900  case NT_CCLASS:
3901  break;
3902 
3903  case NT_STR:
3904  if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
3905  r = expand_case_fold_string(node, reg);
3906  }
3907  break;
3908 
3909  case NT_CTYPE:
3910  case NT_CANY:
3911  break;
3912 
3913 #ifdef USE_SUBEXP_CALL
3914  case NT_CALL:
3915  break;
3916 #endif
3917 
3918  case NT_BREF:
3919  {
3920  int i;
3921  int* p;
3922  Node** nodes = SCANENV_MEM_NODES(env);
3923  BRefNode* br = NBREF(node);
3924  p = BACKREFS_P(br);
3925  for (i = 0; i < br->back_num; i++) {
3926  if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
3927  BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
3928  BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
3929 #ifdef USE_BACKREF_WITH_LEVEL
3930  if (IS_BACKREF_NEST_LEVEL(br)) {
3931  BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
3932  }
3933 #endif
3935  }
3936  }
3937  break;
3938 
3939  case NT_QTFR:
3940  {
3941  OnigDistance d;
3942  QtfrNode* qn = NQTFR(node);
3943  Node* target = qn->target;
3944 
3945  if ((state & IN_REPEAT) != 0) {
3946  qn->state |= NST_IN_REPEAT;
3947  }
3948 
3949  if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
3950  r = get_min_match_length(target, &d, env);
3951  if (r) break;
3952  if (d == 0) {
3954 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3955  r = quantifiers_memory_node_info(target);
3956  if (r < 0) break;
3957  if (r > 0) {
3958  qn->target_empty_info = r;
3959  }
3960 #endif
3961 #if 0
3962  r = get_max_match_length(target, &d, env);
3963  if (r == 0 && d == 0) {
3964  /* ()* ==> ()?, ()+ ==> () */
3965  qn->upper = 1;
3966  if (qn->lower > 1) qn->lower = 1;
3967  if (NTYPE(target) == NT_STR) {
3968  qn->upper = qn->lower = 0; /* /(?:)+/ ==> // */
3969  }
3970  }
3971 #endif
3972  }
3973  }
3974 
3975  state |= IN_REPEAT;
3976  if (qn->lower != qn->upper)
3977  state |= IN_VAR_REPEAT;
3978  r = setup_tree(target, reg, state, env);
3979  if (r) break;
3980 
3981  /* expand string */
3982 #define EXPAND_STRING_MAX_LENGTH 100
3983  if (NTYPE(target) == NT_STR) {
3984  if (qn->lower > 1) {
3985  int i, n = qn->lower;
3986  OnigDistance len = NSTRING_LEN(target);
3987  StrNode* sn = NSTR(target);
3988  Node* np;
3989 
3990  np = onig_node_new_str(sn->s, sn->end);
3991  if (IS_NULL(np)) return ONIGERR_MEMORY;
3992  NSTR(np)->flag = sn->flag;
3993 
3994  for (i = 1; i < n && (i+1) * len <= EXPAND_STRING_MAX_LENGTH; i++) {
3995  r = onig_node_str_cat(np, sn->s, sn->end);
3996  if (r) {
3997  onig_node_free(np);
3998  return r;
3999  }
4000  }
4001  if (i < qn->upper || IS_REPEAT_INFINITE(qn->upper)) {
4002  Node *np1, *np2;
4003 
4004  qn->lower -= i;
4005  if (! IS_REPEAT_INFINITE(qn->upper))
4006  qn->upper -= i;
4007 
4008  np1 = onig_node_new_list(np, NULL);
4009  if (IS_NULL(np1)) {
4010  onig_node_free(np);
4011  return ONIGERR_MEMORY;
4012  }
4013  swap_node(np1, node);
4014  np2 = onig_node_list_add(node, np1);
4015  if (IS_NULL(np2)) {
4016  onig_node_free(np1);
4017  return ONIGERR_MEMORY;
4018  }
4019  }
4020  else {
4021  swap_node(np, node);
4022  onig_node_free(np);
4023  }
4024  break; /* break case NT_QTFR: */
4025  }
4026  }
4027 
4028 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
4029  if (qn->greedy && (qn->target_empty_info != 0)) {
4030  if (NTYPE(target) == NT_QTFR) {
4031  QtfrNode* tqn = NQTFR(target);
4032  if (IS_NOT_NULL(tqn->head_exact)) {
4033  qn->head_exact = tqn->head_exact;
4034  tqn->head_exact = NULL;
4035  }
4036  }
4037  else {
4038  qn->head_exact = get_head_value_node(qn->target, 1, reg);
4039  }
4040  }
4041 #endif
4042  }
4043  break;
4044 
4045  case NT_ENCLOSE:
4046  {
4047  EncloseNode* en = NENCLOSE(node);
4048 
4049  switch (en->type) {
4050  case ENCLOSE_OPTION:
4051  {
4052  OnigOptionType options = reg->options;
4053  reg->options = NENCLOSE(node)->option;
4054  r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4055  reg->options = options;
4056  }
4057  break;
4058 
4059  case ENCLOSE_MEMORY:
4060  if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT | IN_CALL)) != 0) {
4061  BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);
4062  /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
4063  }
4064  if (IS_ENCLOSE_CALLED(en))
4065  state |= IN_CALL;
4066  if (IS_ENCLOSE_RECURSION(en))
4067  state |= IN_RECCALL;
4068  else if ((state & IN_RECCALL) != 0)
4069  SET_CALL_RECURSION(node);
4070  r = setup_tree(en->target, reg, state, env);
4071  break;
4072 
4074  {
4075  Node* target = en->target;
4076  r = setup_tree(target, reg, state, env);
4077  if (NTYPE(target) == NT_QTFR) {
4078  QtfrNode* tqn = NQTFR(target);
4079  if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
4080  tqn->greedy != 0) { /* (?>a*), a*+ etc... */
4081  int qtype = NTYPE(tqn->target);
4082  if (IS_NODE_TYPE_SIMPLE(qtype))
4084  }
4085  }
4086  }
4087  break;
4088 
4089  case ENCLOSE_CONDITION:
4090 #ifdef USE_NAMED_GROUP
4091  if (! IS_ENCLOSE_NAME_REF(NENCLOSE(node)) &&
4092  env->num_named > 0 &&
4096  }
4097 #endif
4098  if (NENCLOSE(node)->regnum > env->num_mem)
4099  return ONIGERR_INVALID_BACKREF;
4100  r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4101  break;
4102 
4103  case ENCLOSE_ABSENT:
4104  r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4105  break;
4106  }
4107  }
4108  break;
4109 
4110  case NT_ANCHOR:
4111  {
4112  AnchorNode* an = NANCHOR(node);
4113 
4114  switch (an->type) {
4115  case ANCHOR_PREC_READ:
4116  r = setup_tree(an->target, reg, state, env);
4117  break;
4118  case ANCHOR_PREC_READ_NOT:
4119  r = setup_tree(an->target, reg, (state | IN_NOT), env);
4120  break;
4121 
4122 /* allowed node types in look-behind */
4123 #define ALLOWED_TYPE_IN_LB \
4124  ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
4125  BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
4126 
4127 #define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY | ENCLOSE_OPTION )
4128 #define ALLOWED_ENCLOSE_IN_LB_NOT ENCLOSE_OPTION
4129 
4130 #define ALLOWED_ANCHOR_IN_LB \
4131 ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4132  ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4133  ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4134  ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4135 #define ALLOWED_ANCHOR_IN_LB_NOT \
4136 ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4137  ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4138  ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4139  ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4140 
4141  case ANCHOR_LOOK_BEHIND:
4142  {
4143  r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
4145  if (r < 0) return r;
4146  if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4147  if (NTYPE(node) != NT_ANCHOR) goto restart;
4148  r = setup_tree(an->target, reg, state, env);
4149  if (r != 0) return r;
4150  r = setup_look_behind(node, reg, env);
4151  }
4152  break;
4153 
4155  {
4156  r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
4158  if (r < 0) return r;
4159  if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4160  if (NTYPE(node) != NT_ANCHOR) goto restart;
4161  r = setup_tree(an->target, reg, (state | IN_NOT), env);
4162  if (r != 0) return r;
4163  r = setup_look_behind(node, reg, env);
4164  }
4165  break;
4166  }
4167  }
4168  break;
4169 
4170  default:
4171  break;
4172  }
4173 
4174  return r;
4175 }
4176 
4177 #ifndef USE_SUNDAY_QUICK_SEARCH
4178 /* set skip map for Boyer-Moore search */
4179 static int
4180 set_bm_skip(UChar* s, UChar* end, regex_t* reg,
4181  UChar skip[], int** int_skip, int ignore_case)
4182 {
4183  OnigDistance i, len;
4184  int clen, flen, n, j, k;
4187  OnigEncoding enc = reg->enc;
4188 
4189  len = end - s;
4190  if (len < ONIG_CHAR_TABLE_SIZE) {
4191  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )len;
4192 
4193  n = 0;
4194  for (i = 0; i < len - 1; i += clen) {
4195  p = s + i;
4196  if (ignore_case)
4198  p, end, items);
4199  clen = enclen(enc, p, end);
4200  if (p + clen > end)
4201  clen = (int )(end - p);
4202 
4203  for (j = 0; j < n; j++) {
4204  if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4205  return 1; /* different length isn't supported. */
4206  flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4207  if (flen != clen)
4208  return 1; /* different length isn't supported. */
4209  }
4210  for (j = 0; j < clen; j++) {
4211  skip[s[i + j]] = (UChar )(len - 1 - i - j);
4212  for (k = 0; k < n; k++) {
4213  skip[buf[k][j]] = (UChar )(len - 1 - i - j);
4214  }
4215  }
4216  }
4217  }
4218  else {
4219 # if OPT_EXACT_MAXLEN < ONIG_CHAR_TABLE_SIZE
4220  /* This should not happen. */
4221  return ONIGERR_TYPE_BUG;
4222 # else
4223  if (IS_NULL(*int_skip)) {
4224  *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
4225  if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
4226  }
4227  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )len;
4228 
4229  n = 0;
4230  for (i = 0; i < len - 1; i += clen) {
4231  p = s + i;
4232  if (ignore_case)
4234  p, end, items);
4235  clen = enclen(enc, p, end);
4236  if (p + clen > end)
4237  clen = (int )(end - p);
4238 
4239  for (j = 0; j < n; j++) {
4240  if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4241  return 1; /* different length isn't supported. */
4242  flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4243  if (flen != clen)
4244  return 1; /* different length isn't supported. */
4245  }
4246  for (j = 0; j < clen; j++) {
4247  (*int_skip)[s[i + j]] = (int )(len - 1 - i - j);
4248  for (k = 0; k < n; k++) {
4249  (*int_skip)[buf[k][j]] = (int )(len - 1 - i - j);
4250  }
4251  }
4252  }
4253 # endif
4254  }
4255  return 0;
4256 }
4257 
4258 #else /* USE_SUNDAY_QUICK_SEARCH */
4259 
4260 /* set skip map for Sunday's quick search */
4261 static int
4262 set_bm_skip(UChar* s, UChar* end, regex_t* reg,
4263  UChar skip[], int** int_skip, int ignore_case)
4264 {
4265  OnigDistance i, len;
4266  int clen, flen, n, j, k;
4269  OnigEncoding enc = reg->enc;
4270 
4271  len = end - s;
4272  if (len < ONIG_CHAR_TABLE_SIZE) {
4273  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )(len + 1);
4274 
4275  n = 0;
4276  for (i = 0; i < len; i += clen) {
4277  p = s + i;
4278  if (ignore_case)
4280  p, end, items);
4281  clen = enclen(enc, p, end);
4282  if (p + clen > end)
4283  clen = (int )(end - p);
4284 
4285  for (j = 0; j < n; j++) {
4286  if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4287  return 1; /* different length isn't supported. */
4288  flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4289  if (flen != clen)
4290  return 1; /* different length isn't supported. */
4291  }
4292  for (j = 0; j < clen; j++) {
4293  skip[s[i + j]] = (UChar )(len - i - j);
4294  for (k = 0; k < n; k++) {
4295  skip[buf[k][j]] = (UChar )(len - i - j);
4296  }
4297  }
4298  }
4299  }
4300  else {
4301 # if OPT_EXACT_MAXLEN < ONIG_CHAR_TABLE_SIZE
4302  /* This should not happen. */
4303  return ONIGERR_TYPE_BUG;
4304 # else
4305  if (IS_NULL(*int_skip)) {
4306  *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
4307  if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
4308  }
4309  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )(len + 1);
4310 
4311  n = 0;
4312  for (i = 0; i < len; i += clen) {
4313  p = s + i;
4314  if (ignore_case)
4316  p, end, items);
4317  clen = enclen(enc, p, end);
4318  if (p + clen > end)
4319  clen = (int )(end - p);
4320 
4321  for (j = 0; j < n; j++) {
4322  if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4323  return 1; /* different length isn't supported. */
4324  flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4325  if (flen != clen)
4326  return 1; /* different length isn't supported. */
4327  }
4328  for (j = 0; j < clen; j++) {
4329  (*int_skip)[s[i + j]] = (int )(len - i - j);
4330  for (k = 0; k < n; k++) {
4331  (*int_skip)[buf[k][j]] = (int )(len - i - j);
4332  }
4333  }
4334  }
4335 # endif
4336  }
4337  return 0;
4338 }
4339 #endif /* USE_SUNDAY_QUICK_SEARCH */
4340 
4341 typedef struct {
4342  OnigDistance min; /* min byte length */
4343  OnigDistance max; /* max byte length */
4344 } MinMaxLen;
4345 
4346 typedef struct {
4352 } OptEnv;
4353 
4354 typedef struct {
4357 } OptAncInfo;
4358 
4359 typedef struct {
4360  MinMaxLen mmd; /* info position */
4362 
4364  int ignore_case; /* -1: unset, 0: case sensitive, 1: ignore case */
4365  int len;
4367 } OptExactInfo;
4368 
4369 typedef struct {
4370  MinMaxLen mmd; /* info position */
4372 
4373  int value; /* weighted value */
4375 } OptMapInfo;
4376 
4377 typedef struct {
4379 
4381  OptExactInfo exb; /* boundary */
4382  OptExactInfo exm; /* middle */
4383  OptExactInfo expr; /* prec read (?=...) */
4384 
4385  OptMapInfo map; /* boundary */
4386 } NodeOptInfo;
4387 
4388 
4389 static int
4390 map_position_value(OnigEncoding enc, int i)
4391 {
4392  static const short int ByteValTable[] = {
4393  5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
4394  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4395  12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
4396  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
4397  5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4398  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
4399  5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4400  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
4401  };
4402 
4403  if (i < numberof(ByteValTable)) {
4404  if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
4405  return 20;
4406  else
4407  return (int )ByteValTable[i];
4408  }
4409  else
4410  return 4; /* Take it easy. */
4411 }
4412 
4413 static int
4414 distance_value(MinMaxLen* mm)
4415 {
4416  /* 1000 / (min-max-dist + 1) */
4417  static const short int dist_vals[] = {
4418  1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
4419  91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
4420  48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
4421  32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
4422  24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
4423  20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
4424  16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
4425  14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
4426  12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
4427  11, 11, 11, 11, 11, 10, 10, 10, 10, 10
4428  };
4429 
4430  OnigDistance d;
4431 
4432  if (mm->max == ONIG_INFINITE_DISTANCE) return 0;
4433 
4434  d = mm->max - mm->min;
4435  if (d < numberof(dist_vals))
4436  /* return dist_vals[d] * 16 / (mm->min + 12); */
4437  return (int )dist_vals[d];
4438  else
4439  return 1;
4440 }
4441 
4442 static int
4443 comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2)
4444 {
4445  if (v2 <= 0) return -1;
4446  if (v1 <= 0) return 1;
4447 
4448  v1 *= distance_value(d1);
4449  v2 *= distance_value(d2);
4450 
4451  if (v2 > v1) return 1;
4452  if (v2 < v1) return -1;
4453 
4454  if (d2->min < d1->min) return 1;
4455  if (d2->min > d1->min) return -1;
4456  return 0;
4457 }
4458 
4459 static int
4460 is_equal_mml(MinMaxLen* a, MinMaxLen* b)
4461 {
4462  return (a->min == b->min && a->max == b->max) ? 1 : 0;
4463 }
4464 
4465 
4466 static void
4467 set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max)
4468 {
4469  mml->min = min;
4470  mml->max = max;
4471 }
4472 
4473 static void
4474 clear_mml(MinMaxLen* mml)
4475 {
4476  mml->min = mml->max = 0;
4477 }
4478 
4479 static void
4480 copy_mml(MinMaxLen* to, MinMaxLen* from)
4481 {
4482  to->min = from->min;
4483  to->max = from->max;
4484 }
4485 
4486 static void
4487 add_mml(MinMaxLen* to, MinMaxLen* from)
4488 {
4489  to->min = distance_add(to->min, from->min);
4490  to->max = distance_add(to->max, from->max);
4491 }
4492 
4493 #if 0
4494 static void
4495 add_len_mml(MinMaxLen* to, OnigDistance len)
4496 {
4497  to->min = distance_add(to->min, len);
4498  to->max = distance_add(to->max, len);
4499 }
4500 #endif
4501 
4502 static void
4503 alt_merge_mml(MinMaxLen* to, MinMaxLen* from)
4504 {
4505  if (to->min > from->min) to->min = from->min;
4506  if (to->max < from->max) to->max = from->max;
4507 }
4508 
4509 static void
4510 copy_opt_env(OptEnv* to, OptEnv* from)
4511 {
4512  *to = *from;
4513 }
4514 
4515 static void
4516 clear_opt_anc_info(OptAncInfo* anc)
4517 {
4518  anc->left_anchor = 0;
4519  anc->right_anchor = 0;
4520 }
4521 
4522 static void
4523 copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from)
4524 {
4525  *to = *from;
4526 }
4527 
4528 static void
4529 concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right,
4530  OnigDistance left_len, OnigDistance right_len)
4531 {
4532  clear_opt_anc_info(to);
4533 
4534  to->left_anchor = left->left_anchor;
4535  if (left_len == 0) {
4536  to->left_anchor |= right->left_anchor;
4537  }
4538 
4539  to->right_anchor = right->right_anchor;
4540  if (right_len == 0) {
4541  to->right_anchor |= left->right_anchor;
4542  }
4543  else {
4545  }
4546 }
4547 
4548 static int
4549 is_left_anchor(int anc)
4550 {
4551  if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF ||
4552  anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ ||
4553  anc == ANCHOR_PREC_READ_NOT)
4554  return 0;
4555 
4556  return 1;
4557 }
4558 
4559 static int
4560 is_set_opt_anc_info(OptAncInfo* to, int anc)
4561 {
4562  if ((to->left_anchor & anc) != 0) return 1;
4563 
4564  return ((to->right_anchor & anc) != 0 ? 1 : 0);
4565 }
4566 
4567 static void
4568 add_opt_anc_info(OptAncInfo* to, int anc)
4569 {
4570  if (is_left_anchor(anc))
4571  to->left_anchor |= anc;
4572  else
4573  to->right_anchor |= anc;
4574 }
4575 
4576 static void
4577 remove_opt_anc_info(OptAncInfo* to, int anc)
4578 {
4579  if (is_left_anchor(anc))
4580  to->left_anchor &= ~anc;
4581  else
4582  to->right_anchor &= ~anc;
4583 }
4584 
4585 static void
4586 alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add)
4587 {
4588  to->left_anchor &= add->left_anchor;
4589  to->right_anchor &= add->right_anchor;
4590 }
4591 
4592 static int
4593 is_full_opt_exact_info(OptExactInfo* ex)
4594 {
4595  return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0);
4596 }
4597 
4598 static void
4599 clear_opt_exact_info(OptExactInfo* ex)
4600 {
4601  clear_mml(&ex->mmd);
4602  clear_opt_anc_info(&ex->anc);
4603  ex->reach_end = 0;
4604  ex->ignore_case = -1; /* unset */
4605  ex->len = 0;
4606  ex->s[0] = '\0';
4607 }
4608 
4609 static void
4610 copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from)
4611 {
4612  *to = *from;
4613 }
4614 
4615 static void
4616 concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc)
4617 {
4618  int i, j, len;
4619  UChar *p, *end;
4620  OptAncInfo tanc;
4621 
4622  if (to->ignore_case < 0)
4623  to->ignore_case = add->ignore_case;
4624  else if (to->ignore_case != add->ignore_case)
4625  return ; /* avoid */
4626 
4627  p = add->s;
4628  end = p + add->len;
4629  for (i = to->len; p < end; ) {
4630  len = enclen(enc, p, end);
4631  if (i + len > OPT_EXACT_MAXLEN) break;
4632  for (j = 0; j < len && p < end; j++)
4633  to->s[i++] = *p++;
4634  }
4635 
4636  to->len = i;
4637  to->reach_end = (p == end ? add->reach_end : 0);
4638 
4639  concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
4640  if (! to->reach_end) tanc.right_anchor = 0;
4641  copy_opt_anc_info(&to->anc, &tanc);
4642 }
4643 
4644 static void
4645 concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end,
4646  int raw ARG_UNUSED, OnigEncoding enc)
4647 {
4648  int i, j, len;
4649  UChar *p;
4650 
4651  for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
4652  len = enclen(enc, p, end);
4653  if (i + len > OPT_EXACT_MAXLEN) break;
4654  for (j = 0; j < len && p < end; j++)
4655  to->s[i++] = *p++;
4656  }
4657 
4658  to->len = i;
4659 }
4660 
4661 static void
4662 alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env)
4663 {
4664  int i, j, len;
4665 
4666  if (add->len == 0 || to->len == 0) {
4667  clear_opt_exact_info(to);
4668  return ;
4669  }
4670 
4671  if (! is_equal_mml(&to->mmd, &add->mmd)) {
4672  clear_opt_exact_info(to);
4673  return ;
4674  }
4675 
4676  for (i = 0; i < to->len && i < add->len; ) {
4677  if (to->s[i] != add->s[i]) break;
4678  len = enclen(env->enc, to->s + i, to->s + to->len);
4679 
4680  for (j = 1; j < len; j++) {
4681  if (to->s[i+j] != add->s[i+j]) break;
4682  }
4683  if (j < len) break;
4684  i += len;
4685  }
4686 
4687  if (! add->reach_end || i < add->len || i < to->len) {
4688  to->reach_end = 0;
4689  }
4690  to->len = i;
4691  if (to->ignore_case < 0)
4692  to->ignore_case = add->ignore_case;
4693  else if (add->ignore_case >= 0)
4694  to->ignore_case |= add->ignore_case;
4695 
4696  alt_merge_opt_anc_info(&to->anc, &add->anc);
4697  if (! to->reach_end) to->anc.right_anchor = 0;
4698 }
4699 
4700 static void
4701 select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt)
4702 {
4703  int v1, v2;
4704 
4705  v1 = now->len;
4706  v2 = alt->len;
4707 
4708  if (v2 == 0) {
4709  return ;
4710  }
4711  else if (v1 == 0) {
4712  copy_opt_exact_info(now, alt);
4713  return ;
4714  }
4715  else if (v1 <= 2 && v2 <= 2) {
4716  /* ByteValTable[x] is big value --> low price */
4717  v2 = map_position_value(enc, now->s[0]);
4718  v1 = map_position_value(enc, alt->s[0]);
4719 
4720  if (now->len > 1) v1 += 5;
4721  if (alt->len > 1) v2 += 5;
4722  }
4723 
4724  if (now->ignore_case <= 0) v1 *= 2;
4725  if (alt->ignore_case <= 0) v2 *= 2;
4726 
4727  if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4728  copy_opt_exact_info(now, alt);
4729 }
4730 
4731 static void
4732 clear_opt_map_info(OptMapInfo* map)
4733 {
4734  static const OptMapInfo clean_info = {
4735  {0, 0}, {0, 0}, 0,
4736  {
4737  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4738  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4739  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4740  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4741  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4742  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4743  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4744  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4745  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4746  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4747  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4748  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4749  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4750  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4751  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4752  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4753  }
4754  };
4755 
4756  xmemcpy(map, &clean_info, sizeof(OptMapInfo));
4757 }
4758 
4759 static void
4760 copy_opt_map_info(OptMapInfo* to, OptMapInfo* from)
4761 {
4762  *to = *from;
4763 }
4764 
4765 static void
4766 add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc)
4767 {
4768  if (map->map[c] == 0) {
4769  map->map[c] = 1;
4770  map->value += map_position_value(enc, c);
4771  }
4772 }
4773 
4774 static int
4775 add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
4776  OnigEncoding enc, OnigCaseFoldType case_fold_flag)
4777 {
4780  int i, n;
4781 
4782  add_char_opt_map_info(map, p[0], enc);
4783 
4784  case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag);
4785  n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items);
4786  if (n < 0) return n;
4787 
4788  for (i = 0; i < n; i++) {
4789  ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);
4790  add_char_opt_map_info(map, buf[0], enc);
4791  }
4792 
4793  return 0;
4794 }
4795 
4796 static void
4797 select_opt_map_info(OptMapInfo* now, OptMapInfo* alt)
4798 {
4799  const int z = 1<<15; /* 32768: something big value */
4800 
4801  int v1, v2;
4802 
4803  if (alt->value == 0) return ;
4804  if (now->value == 0) {
4805  copy_opt_map_info(now, alt);
4806  return ;
4807  }
4808 
4809  v1 = z / now->value;
4810  v2 = z / alt->value;
4811  if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4812  copy_opt_map_info(now, alt);
4813 }
4814 
4815 static int
4816 comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m)
4817 {
4818 #define COMP_EM_BASE 20
4819  int ve, vm;
4820 
4821  if (m->value <= 0) return -1;
4822 
4823  ve = COMP_EM_BASE * e->len * (e->ignore_case > 0 ? 1 : 2);
4824  vm = COMP_EM_BASE * 5 * 2 / m->value;
4825  return comp_distance_value(&e->mmd, &m->mmd, ve, vm);
4826 }
4827 
4828 static void
4829 alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add)
4830 {
4831  int i, val;
4832 
4833  /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
4834  if (to->value == 0) return ;
4835  if (add->value == 0 || to->mmd.max < add->mmd.min) {
4836  clear_opt_map_info(to);
4837  return ;
4838  }
4839 
4840  alt_merge_mml(&to->mmd, &add->mmd);
4841 
4842  val = 0;
4843  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
4844  if (add->map[i])
4845  to->map[i] = 1;
4846 
4847  if (to->map[i])
4848  val += map_position_value(enc, i);
4849  }
4850  to->value = val;
4851 
4852  alt_merge_opt_anc_info(&to->anc, &add->anc);
4853 }
4854 
4855 static void
4856 set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd)
4857 {
4858  copy_mml(&(opt->exb.mmd), mmd);
4859  copy_mml(&(opt->expr.mmd), mmd);
4860  copy_mml(&(opt->map.mmd), mmd);
4861 }
4862 
4863 static void
4864 clear_node_opt_info(NodeOptInfo* opt)
4865 {
4866  clear_mml(&opt->len);
4867  clear_opt_anc_info(&opt->anc);
4868  clear_opt_exact_info(&opt->exb);
4869  clear_opt_exact_info(&opt->exm);
4870  clear_opt_exact_info(&opt->expr);
4871  clear_opt_map_info(&opt->map);
4872 }
4873 
4874 static void
4875 copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from)
4876 {
4877  *to = *from;
4878 }
4879 
4880 static void
4881 concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add)
4882 {
4883  int exb_reach, exm_reach;
4884  OptAncInfo tanc;
4885 
4886  concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
4887  copy_opt_anc_info(&to->anc, &tanc);
4888 
4889  if (add->exb.len > 0 && to->len.max == 0) {
4890  concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc,
4891  to->len.max, add->len.max);
4892  copy_opt_anc_info(&add->exb.anc, &tanc);
4893  }
4894 
4895  if (add->map.value > 0 && to->len.max == 0) {
4896  if (add->map.mmd.max == 0)
4897  add->map.anc.left_anchor |= to->anc.left_anchor;
4898  }
4899 
4900  exb_reach = to->exb.reach_end;
4901  exm_reach = to->exm.reach_end;
4902 
4903  if (add->len.max != 0)
4904  to->exb.reach_end = to->exm.reach_end = 0;
4905 
4906  if (add->exb.len > 0) {
4907  if (exb_reach) {
4908  concat_opt_exact_info(&to->exb, &add->exb, enc);
4909  clear_opt_exact_info(&add->exb);
4910  }
4911  else if (exm_reach) {
4912  concat_opt_exact_info(&to->exm, &add->exb, enc);
4913  clear_opt_exact_info(&add->exb);
4914  }
4915  }
4916  select_opt_exact_info(enc, &to->exm, &add->exb);
4917  select_opt_exact_info(enc, &to->exm, &add->exm);
4918 
4919  if (to->expr.len > 0) {
4920  if (add->len.max > 0) {
4921  if (to->expr.len > (int )add->len.max)
4922  to->expr.len = (int )add->len.max;
4923 
4924  if (to->expr.mmd.max == 0)
4925  select_opt_exact_info(enc, &to->exb, &to->expr);
4926  else
4927  select_opt_exact_info(enc, &to->exm, &to->expr);
4928  }
4929  }
4930  else if (add->expr.len > 0) {
4931  copy_opt_exact_info(&to->expr, &add->expr);
4932  }
4933 
4934  select_opt_map_info(&to->map, &add->map);
4935 
4936  add_mml(&to->len, &add->len);
4937 }
4938 
4939 static void
4940 alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env)
4941 {
4942  alt_merge_opt_anc_info (&to->anc, &add->anc);
4943  alt_merge_opt_exact_info(&to->exb, &add->exb, env);
4944  alt_merge_opt_exact_info(&to->exm, &add->exm, env);
4945  alt_merge_opt_exact_info(&to->expr, &add->expr, env);
4946  alt_merge_opt_map_info(env->enc, &to->map, &add->map);
4947 
4948  alt_merge_mml(&to->len, &add->len);
4949 }
4950 
4951 
4952 #define MAX_NODE_OPT_INFO_REF_COUNT 5
4953 
4954 static int
4955 optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
4956 {
4957  int type;
4958  int r = 0;
4959 
4960  clear_node_opt_info(opt);
4961  set_bound_node_opt_info(opt, &env->mmd);
4962 
4963  type = NTYPE(node);
4964  switch (type) {
4965  case NT_LIST:
4966  {
4967  OptEnv nenv;
4968  NodeOptInfo nopt;
4969  Node* nd = node;
4970 
4971  copy_opt_env(&nenv, env);
4972  do {
4973  r = optimize_node_left(NCAR(nd), &nopt, &nenv);
4974  if (r == 0) {
4975  add_mml(&nenv.mmd, &nopt.len);
4976  concat_left_node_opt_info(env->enc, opt, &nopt);
4977  }
4978  } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd)));
4979  }
4980  break;
4981 
4982  case NT_ALT:
4983  {
4984  NodeOptInfo nopt;
4985  Node* nd = node;
4986 
4987  do {
4988  r = optimize_node_left(NCAR(nd), &nopt, env);
4989  if (r == 0) {
4990  if (nd == node) copy_node_opt_info(opt, &nopt);
4991  else alt_merge_node_opt_info(opt, &nopt, env);
4992  }
4993  } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd)));
4994  }
4995  break;
4996 
4997  case NT_STR:
4998  {
4999  StrNode* sn = NSTR(node);
5000  OnigDistance slen = sn->end - sn->s;
5001  int is_raw = NSTRING_IS_RAW(node);
5002 
5003  if (! NSTRING_IS_AMBIG(node)) {
5004  concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
5005  is_raw, env->enc);
5006  opt->exb.ignore_case = 0;
5007  if (slen > 0) {
5008  add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
5009  }
5010  set_mml(&opt->len, slen, slen);
5011  }
5012  else {
5013  OnigDistance max;
5014 
5015  if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {
5016  int n = onigenc_strlen(env->enc, sn->s, sn->end);
5017  max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n;
5018  }
5019  else {
5020  concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
5021  is_raw, env->enc);
5022  opt->exb.ignore_case = 1;
5023 
5024  if (slen > 0) {
5025  r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
5026  env->enc, env->case_fold_flag);
5027  if (r != 0) break;
5028  }
5029 
5030  max = slen;
5031  }
5032 
5033  set_mml(&opt->len, slen, max);
5034  }
5035 
5036  if ((OnigDistance )opt->exb.len == slen)
5037  opt->exb.reach_end = 1;
5038  }
5039  break;
5040 
5041  case NT_CCLASS:
5042  {
5043  int i, z;
5044  CClassNode* cc = NCCLASS(node);
5045 
5046  /* no need to check ignore case. (set in setup_tree()) */
5047 
5048  if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
5049  OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
5051 
5052  set_mml(&opt->len, min, max);
5053  }
5054  else {
5055  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
5056  z = BITSET_AT(cc->bs, i);
5057  if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) {
5058  add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5059  }
5060  }
5061  set_mml(&opt->len, 1, 1);
5062  }
5063  }
5064  break;
5065 
5066  case NT_CTYPE:
5067  {
5068  int i, min, max;
5069  int maxcode;
5070 
5071  max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
5072 
5073  if (max == 1) {
5074  min = 1;
5075 
5076  maxcode = NCTYPE(node)->ascii_range ? 0x80 : SINGLE_BYTE_SIZE;
5077  switch (NCTYPE(node)->ctype) {
5078  case ONIGENC_CTYPE_WORD:
5079  if (NCTYPE(node)->not != 0) {
5080  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
5081  if (! ONIGENC_IS_CODE_WORD(env->enc, i) || i >= maxcode) {
5082  add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5083  }
5084  }
5085  }
5086  else {
5087  for (i = 0; i < maxcode; i++) {
5088  if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
5089  add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5090  }
5091  }
5092  }
5093  break;
5094  }
5095  }
5096  else {
5097  min = ONIGENC_MBC_MINLEN(env->enc);
5098  }
5099  set_mml(&opt->len, min, max);
5100  }
5101  break;
5102 
5103  case NT_CANY:
5104  {
5105  OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
5107  set_mml(&opt->len, min, max);
5108  }
5109  break;
5110 
5111  case NT_ANCHOR:
5112  switch (NANCHOR(node)->type) {
5113  case ANCHOR_BEGIN_BUF:
5114  case ANCHOR_BEGIN_POSITION:
5115  case ANCHOR_BEGIN_LINE:
5116  case ANCHOR_END_BUF:
5117  case ANCHOR_SEMI_END_BUF:
5118  case ANCHOR_END_LINE:
5119  case ANCHOR_LOOK_BEHIND: /* just for (?<=x).* */
5120  case ANCHOR_PREC_READ_NOT: /* just for (?!x).* */
5121  add_opt_anc_info(&opt->anc, NANCHOR(node)->type);
5122  break;
5123 
5124  case ANCHOR_PREC_READ:
5125  {
5126  NodeOptInfo nopt;
5127 
5128  r = optimize_node_left(NANCHOR(node)->target, &nopt, env);
5129  if (r == 0) {
5130  if (nopt.exb.len > 0)
5131  copy_opt_exact_info(&opt->expr, &nopt.exb);
5132  else if (nopt.exm.len > 0)
5133  copy_opt_exact_info(&opt->expr, &nopt.exm);
5134 
5135  opt->expr.reach_end = 0;
5136 
5137  if (nopt.map.value > 0)
5138  copy_opt_map_info(&opt->map, &nopt.map);
5139  }
5140  }
5141  break;
5142 
5144  break;
5145  }
5146  break;
5147 
5148  case NT_BREF:
5149  {
5150  int i;
5151  int* backs;
5152  OnigDistance min, max, tmin, tmax;
5153  Node** nodes = SCANENV_MEM_NODES(env->scan_env);
5154  BRefNode* br = NBREF(node);
5155 
5156  if (br->state & NST_RECURSION) {
5157  set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5158  break;
5159  }
5160  backs = BACKREFS_P(br);
5161  r = get_min_match_length(nodes[backs[0]], &min, env->scan_env);
5162  if (r != 0) break;
5163  r = get_max_match_length(nodes[backs[0]], &max, env->scan_env);
5164  if (r != 0) break;
5165  for (i = 1; i < br->back_num; i++) {
5166  r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);
5167  if (r != 0) break;
5168  r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);
5169  if (r != 0) break;
5170  if (min > tmin) min = tmin;
5171  if (max < tmax) max = tmax;
5172  }
5173  if (r == 0) set_mml(&opt->len, min, max);
5174  }
5175  break;
5176 
5177 #ifdef USE_SUBEXP_CALL
5178  case NT_CALL:
5179  if (IS_CALL_RECURSION(NCALL(node)))
5180  set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5181  else {
5182  OnigOptionType save = env->options;
5183  env->options = NENCLOSE(NCALL(node)->target)->option;
5184  r = optimize_node_left(NCALL(node)->target, opt, env);
5185  env->options = save;
5186  }
5187  break;
5188 #endif
5189 
5190  case NT_QTFR:
5191  {
5192  int i;
5193  OnigDistance min, max;
5194  NodeOptInfo nopt;
5195  QtfrNode* qn = NQTFR(node);
5196 
5197  r = optimize_node_left(qn->target, &nopt, env);
5198  if (r) break;
5199 
5200  if (/*qn->lower == 0 &&*/ IS_REPEAT_INFINITE(qn->upper)) {
5201  if (env->mmd.max == 0 &&
5202  NTYPE(qn->target) == NT_CANY && qn->greedy) {
5203  if (IS_MULTILINE(env->options))
5204  /* implicit anchor: /.*a/ ==> /\A.*a/ */
5205  add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);
5206  else
5207  add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);
5208  }
5209  }
5210  else {
5211  if (qn->lower > 0) {
5212  copy_node_opt_info(opt, &nopt);
5213  if (nopt.exb.len > 0) {
5214  if (nopt.exb.reach_end) {
5215  for (i = 2; i <= qn->lower &&
5216  ! is_full_opt_exact_info(&opt->exb); i++) {
5217  concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
5218  }
5219  if (i < qn->lower) {
5220  opt->exb.reach_end = 0;
5221  }
5222  }
5223  }
5224 
5225  if (qn->lower != qn->upper) {
5226  opt->exb.reach_end = 0;
5227  opt->exm.reach_end = 0;
5228  }
5229  if (qn->lower > 1)
5230  opt->exm.reach_end = 0;
5231  }
5232  }
5233 
5234  min = distance_multiply(nopt.len.min, qn->lower);
5235  if (IS_REPEAT_INFINITE(qn->upper))
5236  max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
5237  else
5238  max = distance_multiply(nopt.len.max, qn->upper);
5239 
5240  set_mml(&opt->len, min, max);
5241  }
5242  break;
5243 
5244  case NT_ENCLOSE:
5245  {
5246  EncloseNode* en = NENCLOSE(node);
5247 
5248  switch (en->type) {
5249  case ENCLOSE_OPTION:
5250  {
5251  OnigOptionType save = env->options;
5252 
5253  env->options = en->option;
5254  r = optimize_node_left(en->target, opt, env);
5255  env->options = save;
5256  }
5257  break;
5258 
5259  case ENCLOSE_MEMORY:
5260 #ifdef USE_SUBEXP_CALL
5261  en->opt_count++;
5263  OnigDistance min, max;
5264 
5265  min = 0;
5266  max = ONIG_INFINITE_DISTANCE;
5267  if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len;
5268  if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len;
5269  set_mml(&opt->len, min, max);
5270  }
5271  else
5272 #endif
5273  {
5274  r = optimize_node_left(en->target, opt, env);
5275 
5276  if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) {
5277  if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))
5278  remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK);
5279  }
5280  }
5281  break;
5282 
5284  case ENCLOSE_CONDITION:
5285  r = optimize_node_left(en->target, opt, env);
5286  break;
5287 
5288  case ENCLOSE_ABSENT:
5289  set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5290  break;
5291  }
5292  }
5293  break;
5294 
5295  default:
5296 #ifdef ONIG_DEBUG
5297  fprintf(stderr, "optimize_node_left: undefined node type %d\n",
5298  NTYPE(node));
5299 #endif
5300  r = ONIGERR_TYPE_BUG;
5301  break;
5302  }
5303 
5304  return r;
5305 }
5306 
5307 static int
5308 set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
5309 {
5310  int r;
5311  int allow_reverse;
5312 
5313  if (e->len == 0) return 0;
5314 
5315  reg->exact = (UChar* )xmalloc(e->len);
5317  xmemcpy(reg->exact, e->s, e->len);
5318  reg->exact_end = reg->exact + e->len;
5319 
5320  allow_reverse =
5322 
5323  if (e->ignore_case > 0) {
5324  if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
5325  r = set_bm_skip(reg->exact, reg->exact_end, reg,
5326  reg->map, &(reg->int_map), 1);
5327  if (r == 0) {
5328  reg->optimize = (allow_reverse != 0
5330  }
5331  else {
5333  }
5334  }
5335  else {
5337  }
5338  }
5339  else {
5340  if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
5341  r = set_bm_skip(reg->exact, reg->exact_end, reg,
5342  reg->map, &(reg->int_map), 0);
5343  if (r == 0) {
5344  reg->optimize = (allow_reverse != 0
5346  }
5347  else {
5349  }
5350  }
5351  else {
5353  }
5354  }
5355 
5356  reg->dmin = e->mmd.min;
5357  reg->dmax = e->mmd.max;
5358 
5359  if (reg->dmin != ONIG_INFINITE_DISTANCE) {
5360  reg->threshold_len = (int )(reg->dmin + (reg->exact_end - reg->exact));
5361  }
5362 
5363  return 0;
5364 }
5365 
5366 static void
5367 set_optimize_map_info(regex_t* reg, OptMapInfo* m)
5368 {
5369  int i;
5370 
5371  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5372  reg->map[i] = m->map[i];
5373 
5374  reg->optimize = ONIG_OPTIMIZE_MAP;
5375  reg->dmin = m->mmd.min;
5376  reg->dmax = m->mmd.max;
5377 
5378  if (reg->dmin != ONIG_INFINITE_DISTANCE) {
5379  reg->threshold_len = (int )(reg->dmin + 1);
5380  }
5381 }
5382 
5383 static void
5384 set_sub_anchor(regex_t* reg, OptAncInfo* anc)
5385 {
5386  reg->sub_anchor |= anc->left_anchor & ANCHOR_BEGIN_LINE;
5387  reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE;
5388 }
5389 
5390 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5391 static void print_optimize_info(FILE* f, regex_t* reg);
5392 #endif
5393 
5394 static int
5395 set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
5396 {
5397 
5398  int r;
5399  NodeOptInfo opt;
5400  OptEnv env;
5401 
5402  env.enc = reg->enc;
5403  env.options = reg->options;
5404  env.case_fold_flag = reg->case_fold_flag;
5405  env.scan_env = scan_env;
5406  clear_mml(&env.mmd);
5407 
5408  r = optimize_node_left(node, &opt, &env);
5409  if (r) return r;
5410 
5411  reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
5414 
5416  reg->anchor &= ~ANCHOR_ANYCHAR_STAR_ML;
5417 
5420 
5421  if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
5422  reg->anchor_dmin = opt.len.min;
5423  reg->anchor_dmax = opt.len.max;
5424  }
5425 
5426  if (opt.exb.len > 0 || opt.exm.len > 0) {
5427  select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
5428  if (opt.map.value > 0 &&
5429  comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
5430  goto set_map;
5431  }
5432  else {
5433  r = set_optimize_exact_info(reg, &opt.exb);
5434  set_sub_anchor(reg, &opt.exb.anc);
5435  }
5436  }
5437  else if (opt.map.value > 0) {
5438  set_map:
5439  set_optimize_map_info(reg, &opt.map);
5440  set_sub_anchor(reg, &opt.map.anc);
5441  }
5442  else {
5444  if (opt.len.max == 0)
5446  }
5447 
5448 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5449  print_optimize_info(stderr, reg);
5450 #endif
5451  return r;
5452 }
5453 
5454 static void
5455 clear_optimize_info(regex_t* reg)
5456 {
5458  reg->anchor = 0;
5459  reg->anchor_dmin = 0;
5460  reg->anchor_dmax = 0;
5461  reg->sub_anchor = 0;
5462  reg->exact_end = (UChar* )NULL;
5463  reg->threshold_len = 0;
5464  if (IS_NOT_NULL(reg->exact)) {
5465  xfree(reg->exact);
5466  reg->exact = (UChar* )NULL;
5467  }
5468 }
5469 
5470 #ifdef ONIG_DEBUG
5471 
5472 static void print_enc_string(FILE* fp, OnigEncoding enc,
5473  const UChar *s, const UChar *end)
5474 {
5475  fprintf(fp, "\nPATTERN: /");
5476 
5477  if (ONIGENC_MBC_MINLEN(enc) > 1) {
5478  const UChar *p;
5479  OnigCodePoint code;
5480 
5481  p = s;
5482  while (p < end) {
5483  code = ONIGENC_MBC_TO_CODE(enc, p, end);
5484  if (code >= 0x80) {
5485  fprintf(fp, " 0x%04x ", (int )code);
5486  }
5487  else {
5488  fputc((int )code, fp);
5489  }
5490 
5491  p += enclen(enc, p, end);
5492  }
5493  }
5494  else {
5495  while (s < end) {
5496  fputc((int )*s, fp);
5497  s++;
5498  }
5499  }
5500 
5501  fprintf(fp, "/ (%s)\n", enc->name);
5502 }
5503 #endif /* ONIG_DEBUG */
5504 
5505 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5506 static void
5507 print_distance_range(FILE* f, OnigDistance a, OnigDistance b)
5508 {
5509  if (a == ONIG_INFINITE_DISTANCE)
5510  fputs("inf", f);
5511  else
5512  fprintf(f, "(%"PRIuPTR")", a);
5513 
5514  fputs("-", f);
5515 
5516  if (b == ONIG_INFINITE_DISTANCE)
5517  fputs("inf", f);
5518  else
5519  fprintf(f, "(%"PRIuPTR")", b);
5520 }
5521 
5522 static void
5523 print_anchor(FILE* f, int anchor)
5524 {
5525  int q = 0;
5526 
5527  fprintf(f, "[");
5528 
5529  if (anchor & ANCHOR_BEGIN_BUF) {
5530  fprintf(f, "begin-buf");
5531  q = 1;
5532  }
5533  if (anchor & ANCHOR_BEGIN_LINE) {
5534  if (q) fprintf(f, ", ");
5535  q = 1;
5536  fprintf(f, "begin-line");
5537  }
5538  if (anchor & ANCHOR_BEGIN_POSITION) {
5539  if (q) fprintf(f, ", ");
5540  q = 1;
5541  fprintf(f, "begin-pos");
5542  }
5543  if (anchor & ANCHOR_END_BUF) {
5544  if (q) fprintf(f, ", ");
5545  q = 1;
5546  fprintf(f, "end-buf");
5547  }
5548  if (anchor & ANCHOR_SEMI_END_BUF) {
5549  if (q) fprintf(f, ", ");
5550  q = 1;
5551  fprintf(f, "semi-end-buf");
5552  }
5553  if (anchor & ANCHOR_END_LINE) {
5554  if (q) fprintf(f, ", ");
5555  q = 1;
5556  fprintf(f, "end-line");
5557  }
5558  if (anchor & ANCHOR_ANYCHAR_STAR) {
5559  if (q) fprintf(f, ", ");
5560  q = 1;
5561  fprintf(f, "anychar-star");
5562  }
5563  if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
5564  if (q) fprintf(f, ", ");
5565  fprintf(f, "anychar-star-ml");
5566  }
5567 
5568  fprintf(f, "]");
5569 }
5570 
5571 static void
5572 print_optimize_info(FILE* f, regex_t* reg)
5573 {
5574  static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
5575  "EXACT_IC", "MAP",
5576  "EXACT_BM_IC", "EXACT_BM_NOT_REV_IC" };
5577 
5578  fprintf(f, "optimize: %s\n", on[reg->optimize]);
5579  fprintf(f, " anchor: "); print_anchor(f, reg->anchor);
5580  if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
5581  print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
5582  fprintf(f, "\n");
5583 
5584  if (reg->optimize) {
5585  fprintf(f, " sub anchor: "); print_anchor(f, reg->sub_anchor);
5586  fprintf(f, "\n");
5587  }
5588  fprintf(f, "\n");
5589 
5590  if (reg->exact) {
5591  UChar *p;
5592  fprintf(f, "exact: [");
5593  for (p = reg->exact; p < reg->exact_end; p++) {
5594  fputc(*p, f);
5595  }
5596  fprintf(f, "]: length: %"PRIdPTR"\n", (reg->exact_end - reg->exact));
5597  }
5598  else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
5599  int c, i, n = 0;
5600 
5601  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5602  if (reg->map[i]) n++;
5603 
5604  fprintf(f, "map: n=%d\n", n);
5605  if (n > 0) {
5606  c = 0;
5607  fputc('[', f);
5608  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
5609  if (reg->map[i] != 0) {
5610  if (c > 0) fputs(", ", f);
5611  c++;
5612  if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
5614  fputc(i, f);
5615  else
5616  fprintf(f, "%d", i);
5617  }
5618  }
5619  fprintf(f, "]\n");
5620  }
5621  }
5622 }
5623 #endif /* ONIG_DEBUG_COMPILE || ONIG_DEBUG_MATCH */
5624 
5625 
5626 extern void
5628 {
5629  if (IS_NOT_NULL(reg)) {
5630  if (IS_NOT_NULL(reg->p)) xfree(reg->p);
5631  if (IS_NOT_NULL(reg->exact)) xfree(reg->exact);
5632  if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map);
5634  if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range);
5635  if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain);
5636 
5637 #ifdef USE_NAMED_GROUP
5638  onig_names_free(reg);
5639 #endif
5640  }
5641 }
5642 
5643 extern void
5645 {
5646  if (IS_NOT_NULL(reg)) {
5647  onig_free_body(reg);
5648  xfree(reg);
5649  }
5650 }
5651 
5652 #ifdef RUBY
5653 size_t
5655 {
5656  size_t size = sizeof(regex_t);
5657  if (IS_NULL(reg)) return 0;
5658  if (IS_NOT_NULL(reg->p)) size += reg->alloc;
5659  if (IS_NOT_NULL(reg->exact)) size += reg->exact_end - reg->exact;
5660  if (IS_NOT_NULL(reg->int_map)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
5661  if (IS_NOT_NULL(reg->int_map_backward)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
5662  if (IS_NOT_NULL(reg->repeat_range)) size += reg->repeat_range_alloc * sizeof(OnigRepeatRange);
5663  if (IS_NOT_NULL(reg->chain)) size += onig_memsize(reg->chain);
5664 
5665  return size;
5666 }
5667 
5668 size_t
5670 {
5671  size_t size = sizeof(*regs);
5672  if (IS_NULL(regs)) return 0;
5673  size += regs->allocated * (sizeof(*regs->beg) + sizeof(*regs->end));
5674  return size;
5675 }
5676 #endif
5677 
5678 #define REGEX_TRANSFER(to,from) do {\
5679  onig_free_body(to);\
5680  xmemcpy(to, from, sizeof(regex_t));\
5681  xfree(from);\
5682 } while (0)
5683 
5684 #if 0
5685 extern void
5686 onig_transfer(regex_t* to, regex_t* from)
5687 {
5688  REGEX_TRANSFER(to, from);
5689 }
5690 #endif
5691 
5692 #ifdef ONIG_DEBUG_COMPILE
5693 static void print_compiled_byte_code_list(FILE* f, regex_t* reg);
5694 #endif
5695 #ifdef ONIG_DEBUG_PARSE_TREE
5696 static void print_tree(FILE* f, Node* node);
5697 #endif
5698 
5699 #ifdef RUBY
5700 extern int
5701 onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5702  OnigErrorInfo* einfo)
5703 {
5704  return onig_compile_ruby(reg, pattern, pattern_end, einfo, NULL, 0);
5705 }
5706 #endif
5707 
5708 #ifdef RUBY
5709 extern int
5710 onig_compile_ruby(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5711  OnigErrorInfo* einfo, const char *sourcefile, int sourceline)
5712 #else
5713 extern int
5714 onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5715  OnigErrorInfo* einfo)
5716 #endif
5717 {
5718 #define COMPILE_INIT_SIZE 20
5719 
5720  int r;
5721  OnigDistance init_size;
5722  Node* root;
5723  ScanEnv scan_env = {0};
5724 #ifdef USE_SUBEXP_CALL
5725  UnsetAddrList uslist;
5726 #endif
5727 
5728  if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
5729 
5730 #ifdef RUBY
5731  scan_env.sourcefile = sourcefile;
5732  scan_env.sourceline = sourceline;
5733 #endif
5734 
5735 #ifdef ONIG_DEBUG
5736  print_enc_string(stderr, reg->enc, pattern, pattern_end);
5737 #endif
5738 
5739  if (reg->alloc == 0) {
5740  init_size = (pattern_end - pattern) * 2;
5741  if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
5742  r = BBUF_INIT(reg, init_size);
5743  if (r != 0) goto end;
5744  }
5745  else
5746  reg->used = 0;
5747 
5748  reg->num_mem = 0;
5749  reg->num_repeat = 0;
5750  reg->num_null_check = 0;
5751  reg->repeat_range_alloc = 0;
5752  reg->repeat_range = (OnigRepeatRange* )NULL;
5753 #ifdef USE_COMBINATION_EXPLOSION_CHECK
5754  reg->num_comb_exp_check = 0;
5755 #endif
5756 
5757  r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
5758  if (r != 0) goto err;
5759 
5760 #ifdef ONIG_DEBUG_PARSE_TREE
5761 # if 0
5762  fprintf(stderr, "ORIGINAL PARSE TREE:\n");
5763  print_tree(stderr, root);
5764 # endif
5765 #endif
5766 
5767 #ifdef USE_NAMED_GROUP
5768  /* mixed use named group and no-named group */
5769  if (scan_env.num_named > 0 &&
5772  if (scan_env.num_named != scan_env.num_mem)
5773  r = disable_noname_group_capture(&root, reg, &scan_env);
5774  else
5775  r = numbered_ref_check(root);
5776 
5777  if (r != 0) goto err;
5778  }
5779 #endif
5780 
5781 #ifdef USE_SUBEXP_CALL
5782  if (scan_env.num_call > 0) {
5783  r = unset_addr_list_init(&uslist, scan_env.num_call);
5784  if (r != 0) goto err;
5785  scan_env.unset_addr_list = &uslist;
5786  r = setup_subexp_call(root, &scan_env);
5787  if (r != 0) goto err_unset;
5788  r = subexp_recursive_check_trav(root, &scan_env);
5789  if (r < 0) goto err_unset;
5790  r = subexp_inf_recursive_check_trav(root, &scan_env);
5791  if (r != 0) goto err_unset;
5792 
5793  reg->num_call = scan_env.num_call;
5794  }
5795  else
5796  reg->num_call = 0;
5797 #endif
5798 
5799  r = setup_tree(root, reg, 0, &scan_env);
5800  if (r != 0) goto err_unset;
5801 
5802 #ifdef ONIG_DEBUG_PARSE_TREE
5803  print_tree(stderr, root);
5804 #endif
5805 
5806  reg->capture_history = scan_env.capture_history;
5807  reg->bt_mem_start = scan_env.bt_mem_start;
5808  reg->bt_mem_start |= reg->capture_history;
5809  if (IS_FIND_CONDITION(reg->options))
5811  else {
5812  reg->bt_mem_end = scan_env.bt_mem_end;
5813  reg->bt_mem_end |= reg->capture_history;
5814  }
5815 
5816 #ifdef USE_COMBINATION_EXPLOSION_CHECK
5817  if (scan_env.backrefed_mem == 0
5818 # ifdef USE_SUBEXP_CALL
5819  || scan_env.num_call == 0
5820 # endif
5821  ) {
5822  setup_comb_exp_check(root, 0, &scan_env);
5823 # ifdef USE_SUBEXP_CALL
5824  if (scan_env.has_recursion != 0) {
5825  scan_env.num_comb_exp_check = 0;
5826  }
5827  else
5828 # endif
5829  if (scan_env.comb_exp_max_regnum > 0) {
5830  int i;
5831  for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
5832  if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
5833  scan_env.num_comb_exp_check = 0;
5834  break;
5835  }
5836  }
5837  }
5838  }
5839 
5840  reg->num_comb_exp_check = scan_env.num_comb_exp_check;
5841 #endif
5842 
5843  clear_optimize_info(reg);
5844 #ifndef ONIG_DONT_OPTIMIZE
5845  r = set_optimize_info_from_tree(root, reg, &scan_env);
5846  if (r != 0) goto err_unset;
5847 #endif
5848 
5849  if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) {
5850  xfree(scan_env.mem_nodes_dynamic);
5851  scan_env.mem_nodes_dynamic = (Node** )NULL;
5852  }
5853 
5854  r = compile_tree(root, reg);
5855  if (r == 0) {
5856  r = add_opcode(reg, OP_END);
5857 #ifdef USE_SUBEXP_CALL
5858  if (scan_env.num_call > 0) {
5859  r = unset_addr_list_fix(&uslist, reg);
5860  unset_addr_list_end(&uslist);
5861  if (r) goto err;
5862  }
5863 #endif
5864 
5865  if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0))
5867  else {
5868  if (reg->bt_mem_start != 0)
5870  else
5872  }
5873  }
5874 #ifdef USE_SUBEXP_CALL
5875  else if (scan_env.num_call > 0) {
5876  unset_addr_list_end(&uslist);
5877  }
5878 #endif
5879  onig_node_free(root);
5880 
5881 #ifdef ONIG_DEBUG_COMPILE
5882 # ifdef USE_NAMED_GROUP
5883  onig_print_names(stderr, reg);
5884 # endif
5885  print_compiled_byte_code_list(stderr, reg);
5886 #endif
5887 
5888  end:
5889  return r;
5890 
5891  err_unset:
5892 #ifdef USE_SUBEXP_CALL
5893  if (scan_env.num_call > 0) {
5894  unset_addr_list_end(&uslist);
5895  }
5896 #endif
5897  err:
5898  if (IS_NOT_NULL(scan_env.error)) {
5899  if (IS_NOT_NULL(einfo)) {
5900  einfo->enc = scan_env.enc;
5901  einfo->par = scan_env.error;
5902  einfo->par_end = scan_env.error_end;
5903  }
5904  }
5905 
5906  onig_node_free(root);
5907  if (IS_NOT_NULL(scan_env.mem_nodes_dynamic))
5908  xfree(scan_env.mem_nodes_dynamic);
5909  return r;
5910 }
5911 
5912 static int onig_inited = 0;
5913 
5914 extern int
5916  OnigCaseFoldType case_fold_flag,
5917  OnigEncoding enc, const OnigSyntaxType* syntax)
5918 {
5919  if (! onig_inited)
5920  onig_init();
5921 
5922  if (IS_NULL(reg))
5923  return ONIGERR_INVALID_ARGUMENT;
5924 
5925  if (ONIGENC_IS_UNDEF(enc))
5927 
5931  }
5932 
5933  if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
5934  option |= syntax->options;
5935  option &= ~ONIG_OPTION_SINGLELINE;
5936  }
5937  else
5938  option |= syntax->options;
5939 
5940  (reg)->enc = enc;
5941  (reg)->options = option;
5942  (reg)->syntax = syntax;
5943  (reg)->optimize = 0;
5944  (reg)->exact = (UChar* )NULL;
5945  (reg)->int_map = (int* )NULL;
5946  (reg)->int_map_backward = (int* )NULL;
5947  (reg)->chain = (regex_t* )NULL;
5948 
5949  (reg)->p = (UChar* )NULL;
5950  (reg)->alloc = 0;
5951  (reg)->used = 0;
5952  (reg)->name_table = (void* )NULL;
5953 
5954  (reg)->case_fold_flag = case_fold_flag;
5955  return 0;
5956 }
5957 
5958 extern int
5959 onig_new_without_alloc(regex_t* reg, const UChar* pattern,
5960  const UChar* pattern_end, OnigOptionType option, OnigEncoding enc,
5961  const OnigSyntaxType* syntax, OnigErrorInfo* einfo)
5962 {
5963  int r;
5964 
5965  r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
5966  if (r) return r;
5967 
5968  r = onig_compile(reg, pattern, pattern_end, einfo);
5969  return r;
5970 }
5971 
5972 extern int
5973 onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
5974  OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax,
5975  OnigErrorInfo* einfo)
5976 {
5977  int r;
5978 
5979  *reg = (regex_t* )xmalloc(sizeof(regex_t));
5980  if (IS_NULL(*reg)) return ONIGERR_MEMORY;
5981 
5982  r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
5983  if (r) goto err;
5984 
5985  r = onig_compile(*reg, pattern, pattern_end, einfo);
5986  if (r) {
5987  err:
5988  onig_free(*reg);
5989  *reg = NULL;
5990  }
5991  return r;
5992 }
5993 
5994 extern int
5996 {
5997  return onig_init();
5998 }
5999 
6000 extern int
6002 {
6003  if (onig_inited != 0)
6004  return 0;
6005 
6006  onig_inited = 1;
6007 
6008 #if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
6009  _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
6010 #endif
6011 
6012  onigenc_init();
6013  /* onigenc_set_default_caseconv_table((UChar* )0); */
6014 
6015 #ifdef ONIG_DEBUG_STATISTICS
6016  onig_statistics_init();
6017 #endif
6018 
6019  return 0;
6020 }
6021 
6022 
6023 static OnigEndCallListItemType* EndCallTop;
6024 
6025 extern void onig_add_end_call(void (*func)(void))
6026 {
6028 
6029  item = (OnigEndCallListItemType* )xmalloc(sizeof(*item));
6030  if (item == 0) return ;
6031 
6032  item->next = EndCallTop;
6033  item->func = func;
6034 
6035  EndCallTop = item;
6036 }
6037 
6038 static void
6039 exec_end_call_list(void)
6040 {
6042  void (*func)(void);
6043 
6044  while (EndCallTop != 0) {
6045  func = EndCallTop->func;
6046  (*func)();
6047 
6048  prev = EndCallTop;
6049  EndCallTop = EndCallTop->next;
6050  xfree(prev);
6051  }
6052 }
6053 
6054 extern int
6056 {
6057  exec_end_call_list();
6058 
6059 #ifdef ONIG_DEBUG_STATISTICS
6060  onig_print_statistics(stderr);
6061 #endif
6062 
6063 #if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
6064  _CrtDumpMemoryLeaks();
6065 #endif
6066 
6067  onig_inited = 0;
6068 
6069  return 0;
6070 }
6071 
6072 extern int
6074 {
6075  OnigCodePoint n, *data;
6076  OnigCodePoint low, high, x;
6077 
6078  GET_CODE_POINT(n, p);
6079  data = (OnigCodePoint* )p;
6080  data++;
6081 
6082  for (low = 0, high = n; low < high; ) {
6083  x = (low + high) >> 1;
6084  if (code > data[x * 2 + 1])
6085  low = x + 1;
6086  else
6087  high = x;
6088  }
6089 
6090  return ((low < n && code >= data[low * 2]) ? 1 : 0);
6091 }
6092 
6093 extern int
6095 {
6096  int found;
6097 
6098  if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
6099  if (IS_NULL(cc->mbuf)) {
6100  found = 0;
6101  }
6102  else {
6103  found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
6104  }
6105  }
6106  else {
6107  found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
6108  }
6109 
6110  if (IS_NCCLASS_NOT(cc))
6111  return !found;
6112  else
6113  return found;
6114 }
6115 
6116 extern int
6118 {
6119  int len;
6120 
6121  if (ONIGENC_MBC_MINLEN(enc) > 1) {
6122  len = 2;
6123  }
6124  else {
6125  len = ONIGENC_CODE_TO_MBCLEN(enc, code);
6126  }
6127  return onig_is_code_in_cc_len(len, code, cc);
6128 }
6129 
6130 
6131 #ifdef ONIG_DEBUG
6132 
6133 /* arguments type */
6134 # define ARG_SPECIAL -1
6135 # define ARG_NON 0
6136 # define ARG_RELADDR 1
6137 # define ARG_ABSADDR 2
6138 # define ARG_LENGTH 3
6139 # define ARG_MEMNUM 4
6140 # define ARG_OPTION 5
6141 # define ARG_STATE_CHECK 6
6142 
6143 OnigOpInfoType OnigOpInfo[] = {
6144  { OP_FINISH, "finish", ARG_NON },
6145  { OP_END, "end", ARG_NON },
6146  { OP_EXACT1, "exact1", ARG_SPECIAL },
6147  { OP_EXACT2, "exact2", ARG_SPECIAL },
6148  { OP_EXACT3, "exact3", ARG_SPECIAL },
6149  { OP_EXACT4, "exact4", ARG_SPECIAL },
6150  { OP_EXACT5, "exact5", ARG_SPECIAL },
6151  { OP_EXACTN, "exactn", ARG_SPECIAL },
6152  { OP_EXACTMB2N1, "exactmb2-n1", ARG_SPECIAL },
6153  { OP_EXACTMB2N2, "exactmb2-n2", ARG_SPECIAL },
6154  { OP_EXACTMB2N3, "exactmb2-n3", ARG_SPECIAL },
6155  { OP_EXACTMB2N, "exactmb2-n", ARG_SPECIAL },
6156  { OP_EXACTMB3N, "exactmb3n" , ARG_SPECIAL },
6157  { OP_EXACTMBN, "exactmbn", ARG_SPECIAL },
6158  { OP_EXACT1_IC, "exact1-ic", ARG_SPECIAL },
6159  { OP_EXACTN_IC, "exactn-ic", ARG_SPECIAL },
6160  { OP_CCLASS, "cclass", ARG_SPECIAL },
6161  { OP_CCLASS_MB, "cclass-mb", ARG_SPECIAL },
6162  { OP_CCLASS_MIX, "cclass-mix", ARG_SPECIAL },
6163  { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL },
6164  { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL },
6165  { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL },
6166  { OP_ANYCHAR, "anychar", ARG_NON },
6167  { OP_ANYCHAR_ML, "anychar-ml", ARG_NON },
6168  { OP_ANYCHAR_STAR, "anychar*", ARG_NON },
6169  { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON },
6170  { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
6171  { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
6172  { OP_WORD, "word", ARG_NON },
6173  { OP_NOT_WORD, "not-word", ARG_NON },
6174  { OP_WORD_BOUND, "word-bound", ARG_NON },
6175  { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON },
6176  { OP_WORD_BEGIN, "word-begin", ARG_NON },
6177  { OP_WORD_END, "word-end", ARG_NON },
6178  { OP_ASCII_WORD, "ascii-word", ARG_NON },
6179  { OP_NOT_ASCII_WORD, "not-ascii-word", ARG_NON },
6180  { OP_ASCII_WORD_BOUND, "ascii-word-bound", ARG_NON },
6181  { OP_NOT_ASCII_WORD_BOUND,"not-ascii-word-bound", ARG_NON },
6182  { OP_ASCII_WORD_BEGIN, "ascii-word-begin", ARG_NON },
6183  { OP_ASCII_WORD_END, "ascii-word-end", ARG_NON },
6184  { OP_BEGIN_BUF, "begin-buf", ARG_NON },
6185  { OP_END_BUF, "end-buf", ARG_NON },
6186  { OP_BEGIN_LINE, "begin-line", ARG_NON },
6187  { OP_END_LINE, "end-line", ARG_NON },
6188  { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON },
6189  { OP_BEGIN_POSITION, "begin-position", ARG_NON },
6190  { OP_BACKREF1, "backref1", ARG_NON },
6191  { OP_BACKREF2, "backref2", ARG_NON },
6192  { OP_BACKREFN, "backrefn", ARG_MEMNUM },
6193  { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL },
6194  { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL },
6195  { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL },
6196  { OP_BACKREF_WITH_LEVEL, "backref_at_level", ARG_SPECIAL },
6197  { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM },
6198  { OP_MEMORY_START, "mem-start", ARG_MEMNUM },
6199  { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM },
6200  { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM },
6201  { OP_MEMORY_END, "mem-end", ARG_MEMNUM },
6202  { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM },
6203  { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION },
6204  { OP_SET_OPTION, "set-option", ARG_OPTION },
6205  { OP_KEEP, "keep", ARG_NON },
6206  { OP_FAIL, "fail", ARG_NON },
6207  { OP_JUMP, "jump", ARG_RELADDR },
6208  { OP_PUSH, "push", ARG_RELADDR },
6209  { OP_POP, "pop", ARG_NON },
6210  { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL },
6211  { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL },
6212  { OP_REPEAT, "repeat", ARG_SPECIAL },
6213  { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL },
6214  { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM },
6215  { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM },
6216  { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM },
6217  { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM },
6218  { OP_NULL_CHECK_START, "null-check-start", ARG_MEMNUM },
6219  { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM },
6220  { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM },
6221  { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM },
6222  { OP_PUSH_POS, "push-pos", ARG_NON },
6223  { OP_POP_POS, "pop-pos", ARG_NON },
6224  { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR },
6225  { OP_FAIL_POS, "fail-pos", ARG_NON },
6226  { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON },
6227  { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON },
6228  { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL },
6229  { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },
6230  { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
6231  { OP_PUSH_ABSENT_POS, "push-absent-pos", ARG_NON },
6232  { OP_ABSENT, "absent", ARG_RELADDR },
6233  { OP_ABSENT_END, "absent-end", ARG_NON },
6234  { OP_CALL, "call", ARG_ABSADDR },
6235  { OP_RETURN, "return", ARG_NON },
6236  { OP_CONDITION, "condition", ARG_SPECIAL },
6237  { OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL },
6238  { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },
6239  { OP_STATE_CHECK, "state-check", ARG_STATE_CHECK },
6240  { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*", ARG_STATE_CHECK },
6242  "state-check-anychar-ml*", ARG_STATE_CHECK },
6243  { -1, "", ARG_NON }
6244 };
6245 
6246 static const char*
6247 op2name(int opcode)
6248 {
6249  int i;
6250 
6251  for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
6252  if (opcode == OnigOpInfo[i].opcode)
6253  return OnigOpInfo[i].name;
6254  }
6255  return "";
6256 }
6257 
6258 static int
6259 op2arg_type(int opcode)
6260 {
6261  int i;
6262 
6263  for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
6264  if (opcode == OnigOpInfo[i].opcode)
6265  return OnigOpInfo[i].arg_type;
6266  }
6267  return ARG_SPECIAL;
6268 }
6269 
6270 # ifdef ONIG_DEBUG_PARSE_TREE
6271 static void
6272 Indent(FILE* f, int indent)
6273 {
6274  int i;
6275  for (i = 0; i < indent; i++) putc(' ', f);
6276 }
6277 # endif /* ONIG_DEBUG_PARSE_TREE */
6278 
6279 static void
6280 p_string(FILE* f, ptrdiff_t len, UChar* s)
6281 {
6282  fputs(":", f);
6283  while (len-- > 0) { fputc(*s++, f); }
6284 }
6285 
6286 static void
6287 p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
6288 {
6289  int x = len * mb_len;
6290 
6291  fprintf(f, ":%d:", len);
6292  while (x-- > 0) { fputc(*s++, f); }
6293 }
6294 
6295 extern void
6296 onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
6297  OnigEncoding enc)
6298 {
6299  int i, n, arg_type;
6300  RelAddrType addr;
6301  LengthType len;
6302  MemNumType mem;
6303  StateCheckNumType scn;
6304  OnigCodePoint code;
6305  UChar *q;
6306 
6307  fprintf(f, "[%s", op2name(*bp));
6308  arg_type = op2arg_type(*bp);
6309  if (arg_type != ARG_SPECIAL) {
6310  bp++;
6311  switch (arg_type) {
6312  case ARG_NON:
6313  break;
6314  case ARG_RELADDR:
6315  GET_RELADDR_INC(addr, bp);
6316  fprintf(f, ":(%s%d)", (addr >= 0) ? "+" : "", addr);
6317  break;
6318  case ARG_ABSADDR:
6319  GET_ABSADDR_INC(addr, bp);
6320  fprintf(f, ":(%d)", addr);
6321  break;
6322  case ARG_LENGTH:
6323  GET_LENGTH_INC(len, bp);
6324  fprintf(f, ":%d", len);
6325  break;
6326  case ARG_MEMNUM:
6327  mem = *((MemNumType* )bp);
6328  bp += SIZE_MEMNUM;
6329  fprintf(f, ":%d", mem);
6330  break;
6331  case ARG_OPTION:
6332  {
6333  OnigOptionType option = *((OnigOptionType* )bp);
6334  bp += SIZE_OPTION;
6335  fprintf(f, ":%d", option);
6336  }
6337  break;
6338 
6339  case ARG_STATE_CHECK:
6340  scn = *((StateCheckNumType* )bp);
6342  fprintf(f, ":%d", scn);
6343  break;
6344  }
6345  }
6346  else {
6347  switch (*bp++) {
6348  case OP_EXACT1:
6351  p_string(f, 1, bp++); break;
6352  case OP_EXACT2:
6353  p_string(f, 2, bp); bp += 2; break;
6354  case OP_EXACT3:
6355  p_string(f, 3, bp); bp += 3; break;
6356  case OP_EXACT4:
6357  p_string(f, 4, bp); bp += 4; break;
6358  case OP_EXACT5:
6359  p_string(f, 5, bp); bp += 5; break;
6360  case OP_EXACTN:
6361  GET_LENGTH_INC(len, bp);
6362  p_len_string(f, len, 1, bp);
6363  bp += len;
6364  break;
6365 
6366  case OP_EXACTMB2N1:
6367  p_string(f, 2, bp); bp += 2; break;
6368  case OP_EXACTMB2N2:
6369  p_string(f, 4, bp); bp += 4; break;
6370  case OP_EXACTMB2N3:
6371  p_string(f, 6, bp); bp += 6; break;
6372  case OP_EXACTMB2N:
6373  GET_LENGTH_INC(len, bp);
6374  p_len_string(f, len, 2, bp);
6375  bp += len * 2;
6376  break;
6377  case OP_EXACTMB3N:
6378  GET_LENGTH_INC(len, bp);
6379  p_len_string(f, len, 3, bp);
6380  bp += len * 3;
6381  break;
6382  case OP_EXACTMBN:
6383  {
6384  int mb_len;
6385 
6386  GET_LENGTH_INC(mb_len, bp);
6387  GET_LENGTH_INC(len, bp);
6388  fprintf(f, ":%d:%d:", mb_len, len);
6389  n = len * mb_len;
6390  while (n-- > 0) { fputc(*bp++, f); }
6391  }
6392  break;
6393 
6394  case OP_EXACT1_IC:
6395  len = enclen(enc, bp, bpend);
6396  p_string(f, len, bp);
6397  bp += len;
6398  break;
6399  case OP_EXACTN_IC:
6400  GET_LENGTH_INC(len, bp);
6401  p_len_string(f, len, 1, bp);
6402  bp += len;
6403  break;
6404 
6405  case OP_CCLASS:
6406  n = bitset_on_num((BitSetRef )bp);
6407  bp += SIZE_BITSET;
6408  fprintf(f, ":%d", n);
6409  break;
6410 
6411  case OP_CCLASS_NOT:
6412  n = bitset_on_num((BitSetRef )bp);
6413  bp += SIZE_BITSET;
6414  fprintf(f, ":%d", n);
6415  break;
6416 
6417  case OP_CCLASS_MB:
6418  case OP_CCLASS_MB_NOT:
6419  GET_LENGTH_INC(len, bp);
6420  q = bp;
6421 # ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6422  ALIGNMENT_RIGHT(q);
6423 # endif
6424  GET_CODE_POINT(code, q);
6425  bp += len;
6426  fprintf(f, ":%d:%d", (int )code, len);
6427  break;
6428 
6429  case OP_CCLASS_MIX:
6430  case OP_CCLASS_MIX_NOT:
6431  n = bitset_on_num((BitSetRef )bp);
6432  bp += SIZE_BITSET;
6433  GET_LENGTH_INC(len, bp);
6434  q = bp;
6435 # ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6436  ALIGNMENT_RIGHT(q);
6437 # endif
6438  GET_CODE_POINT(code, q);
6439  bp += len;
6440  fprintf(f, ":%d:%d:%d", n, (int )code, len);
6441  break;
6442 
6443  case OP_BACKREFN_IC:
6444  mem = *((MemNumType* )bp);
6445  bp += SIZE_MEMNUM;
6446  fprintf(f, ":%d", mem);
6447  break;
6448 
6449  case OP_BACKREF_MULTI_IC:
6450  case OP_BACKREF_MULTI:
6451  fputs(" ", f);
6452  GET_LENGTH_INC(len, bp);
6453  for (i = 0; i < len; i++) {
6454  GET_MEMNUM_INC(mem, bp);
6455  if (i > 0) fputs(", ", f);
6456  fprintf(f, "%d", mem);
6457  }
6458  break;
6459 
6460  case OP_BACKREF_WITH_LEVEL:
6461  {
6462  OnigOptionType option;
6463  LengthType level;
6464 
6465  GET_OPTION_INC(option, bp);
6466  fprintf(f, ":%d", option);
6467  GET_LENGTH_INC(level, bp);
6468  fprintf(f, ":%d", level);
6469 
6470  fputs(" ", f);
6471  GET_LENGTH_INC(len, bp);
6472  for (i = 0; i < len; i++) {
6473  GET_MEMNUM_INC(mem, bp);
6474  if (i > 0) fputs(", ", f);
6475  fprintf(f, "%d", mem);
6476  }
6477  }
6478  break;
6479 
6480  case OP_REPEAT:
6481  case OP_REPEAT_NG:
6482  {
6483  mem = *((MemNumType* )bp);
6484  bp += SIZE_MEMNUM;
6485  addr = *((RelAddrType* )bp);
6486  bp += SIZE_RELADDR;
6487  fprintf(f, ":%d:%d", mem, addr);
6488  }
6489  break;
6490 
6492  case OP_PUSH_IF_PEEK_NEXT:
6493  addr = *((RelAddrType* )bp);
6494  bp += SIZE_RELADDR;
6495  fprintf(f, ":(%s%d)", (addr >= 0) ? "+" : "", addr);
6496  p_string(f, 1, bp);
6497  bp += 1;
6498  break;
6499 
6500  case OP_LOOK_BEHIND:
6501  GET_LENGTH_INC(len, bp);
6502  fprintf(f, ":%d", len);
6503  break;
6504 
6506  GET_RELADDR_INC(addr, bp);
6507  GET_LENGTH_INC(len, bp);
6508  fprintf(f, ":%d:(%s%d)", len, (addr >= 0) ? "+" : "", addr);
6509  break;
6510 
6511  case OP_STATE_CHECK_PUSH:
6513  scn = *((StateCheckNumType* )bp);
6515  addr = *((RelAddrType* )bp);
6516  bp += SIZE_RELADDR;
6517  fprintf(f, ":%d:(%s%d)", scn, (addr >= 0) ? "+" : "", addr);
6518  break;
6519 
6520  case OP_CONDITION:
6521  GET_MEMNUM_INC(mem, bp);
6522  GET_RELADDR_INC(addr, bp);
6523  fprintf(f, ":%d:(%s%d)", mem, (addr >= 0) ? "+" : "", addr);
6524  break;
6525 
6526  default:
6527  fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
6528  bp[-1]);
6529  }
6530  }
6531  fputs("]", f);
6532  if (nextp) *nextp = bp;
6533 }
6534 
6535 # ifdef ONIG_DEBUG_COMPILE
6536 static void
6537 print_compiled_byte_code_list(FILE* f, regex_t* reg)
6538 {
6539  int ncode;
6540  UChar* bp = reg->p;
6541  UChar* end = reg->p + reg->used;
6542 
6543  fprintf(f, "code length: %d", reg->used);
6544 
6545  ncode = -1;
6546  while (bp < end) {
6547  ncode++;
6548  if (ncode % 5 == 0)
6549  fprintf(f, "\n%ld:", bp - reg->p);
6550  else
6551  fprintf(f, " %ld:", bp - reg->p);
6552  onig_print_compiled_byte_code(f, bp, end, &bp, reg->enc);
6553  }
6554 
6555  fprintf(f, "\n");
6556 }
6557 # endif /* ONIG_DEBUG_COMPILE */
6558 
6559 # ifdef ONIG_DEBUG_PARSE_TREE
6560 static void
6561 print_indent_tree(FILE* f, Node* node, int indent)
6562 {
6563  int i, type, container_p = 0;
6564  int add = 3;
6565  UChar* p;
6566 
6567  Indent(f, indent);
6568  if (IS_NULL(node)) {
6569  fprintf(f, "ERROR: null node!!!\n");
6570  exit (0);
6571  }
6572 
6573  type = NTYPE(node);
6574  switch (type) {
6575  case NT_LIST:
6576  case NT_ALT:
6577  if (NTYPE(node) == NT_LIST)
6578  fprintf(f, "<list:%"PRIxPTR">\n", (intptr_t )node);
6579  else
6580  fprintf(f, "<alt:%"PRIxPTR">\n", (intptr_t )node);
6581 
6582  print_indent_tree(f, NCAR(node), indent + add);
6583  while (IS_NOT_NULL(node = NCDR(node))) {
6584  if (NTYPE(node) != type) {
6585  fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
6586  exit(0);
6587  }
6588  print_indent_tree(f, NCAR(node), indent + add);
6589  }
6590  break;
6591 
6592  case NT_STR:
6593  fprintf(f, "<string%s:%"PRIxPTR">",
6594  (NSTRING_IS_RAW(node) ? "-raw" : ""), (intptr_t )node);
6595  for (p = NSTR(node)->s; p < NSTR(node)->end; p++) {
6596  if (*p >= 0x20 && *p < 0x7f)
6597  fputc(*p, f);
6598  else {
6599  fprintf(f, " 0x%02x", *p);
6600  }
6601  }
6602  break;
6603 
6604  case NT_CCLASS:
6605  fprintf(f, "<cclass:%"PRIxPTR">", (intptr_t )node);
6606  if (IS_NCCLASS_NOT(NCCLASS(node))) fputs("not ", f);
6607  if (NCCLASS(node)->mbuf) {
6608  BBuf* bbuf = NCCLASS(node)->mbuf;
6609  OnigCodePoint* data = (OnigCodePoint* )bbuf->p;
6610  OnigCodePoint* end = (OnigCodePoint* )(bbuf->p + bbuf->used);
6611  fprintf(f, "%d", *data++);
6612  for (; data < end; data+=2) {
6613  fprintf(f, ",");
6614  fprintf(f, "%04x-%04x", data[0], data[1]);
6615  }
6616  }
6617  break;
6618 
6619  case NT_CTYPE:
6620  fprintf(f, "<ctype:%"PRIxPTR"> ", (intptr_t )node);
6621  switch (NCTYPE(node)->ctype) {
6622  case ONIGENC_CTYPE_WORD:
6623  if (NCTYPE(node)->not != 0)
6624  fputs("not word", f);
6625  else
6626  fputs("word", f);
6627  break;
6628 
6629  default:
6630  fprintf(f, "ERROR: undefined ctype.\n");
6631  exit(0);
6632  }
6633  break;
6634 
6635  case NT_CANY:
6636  fprintf(f, "<anychar:%"PRIxPTR">", (intptr_t )node);
6637  break;
6638 
6639  case NT_ANCHOR:
6640  fprintf(f, "<anchor:%"PRIxPTR"> ", (intptr_t )node);
6641  switch (NANCHOR(node)->type) {
6642  case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break;
6643  case ANCHOR_END_BUF: fputs("end buf", f); break;
6644  case ANCHOR_BEGIN_LINE: fputs("begin line", f); break;
6645  case ANCHOR_END_LINE: fputs("end line", f); break;
6646  case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break;
6647  case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;
6648 
6649  case ANCHOR_WORD_BOUND: fputs("word bound", f); break;
6650  case ANCHOR_NOT_WORD_BOUND: fputs("not word bound", f); break;
6651 # ifdef USE_WORD_BEGIN_END
6652  case ANCHOR_WORD_BEGIN: fputs("word begin", f); break;
6653  case ANCHOR_WORD_END: fputs("word end", f); break;
6654 # endif
6655  case ANCHOR_PREC_READ: fputs("prec read", f); container_p = TRUE; break;
6656  case ANCHOR_PREC_READ_NOT: fputs("prec read not", f); container_p = TRUE; break;
6657  case ANCHOR_LOOK_BEHIND: fputs("look_behind", f); container_p = TRUE; break;
6658  case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); container_p = TRUE; break;
6659  case ANCHOR_KEEP: fputs("keep",f); break;
6660 
6661  default:
6662  fprintf(f, "ERROR: undefined anchor type.\n");
6663  break;
6664  }
6665  break;
6666 
6667  case NT_BREF:
6668  {
6669  int* p;
6670  BRefNode* br = NBREF(node);
6671  p = BACKREFS_P(br);
6672  fprintf(f, "<backref:%"PRIxPTR">", (intptr_t )node);
6673  for (i = 0; i < br->back_num; i++) {
6674  if (i > 0) fputs(", ", f);
6675  fprintf(f, "%d", p[i]);
6676  }
6677  }
6678  break;
6679 
6680 # ifdef USE_SUBEXP_CALL
6681  case NT_CALL:
6682  {
6683  CallNode* cn = NCALL(node);
6684  fprintf(f, "<call:%"PRIxPTR">", (intptr_t )node);
6685  p_string(f, cn->name_end - cn->name, cn->name);
6686  }
6687  break;
6688 # endif
6689 
6690  case NT_QTFR:
6691  fprintf(f, "<quantifier:%"PRIxPTR">{%d,%d}%s\n", (intptr_t )node,
6692  NQTFR(node)->lower, NQTFR(node)->upper,
6693  (NQTFR(node)->greedy ? "" : "?"));
6694  print_indent_tree(f, NQTFR(node)->target, indent + add);
6695  break;
6696 
6697  case NT_ENCLOSE:
6698  fprintf(f, "<enclose:%"PRIxPTR"> ", (intptr_t )node);
6699  switch (NENCLOSE(node)->type) {
6700  case ENCLOSE_OPTION:
6701  fprintf(f, "option:%d", NENCLOSE(node)->option);
6702  break;
6703  case ENCLOSE_MEMORY:
6704  fprintf(f, "memory:%d", NENCLOSE(node)->regnum);
6705  break;
6707  fprintf(f, "stop-bt");
6708  break;
6709  case ENCLOSE_CONDITION:
6710  fprintf(f, "condition:%d", NENCLOSE(node)->regnum);
6711  break;
6712  case ENCLOSE_ABSENT:
6713  fprintf(f, "absent");
6714  break;
6715 
6716  default:
6717  break;
6718  }
6719  fprintf(f, "\n");
6720  print_indent_tree(f, NENCLOSE(node)->target, indent + add);
6721  break;
6722 
6723  default:
6724  fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node));
6725  break;
6726  }
6727 
6728  if (type != NT_LIST && type != NT_ALT && type != NT_QTFR &&
6729  type != NT_ENCLOSE)
6730  fprintf(f, "\n");
6731 
6732  if (container_p) print_indent_tree(f, NANCHOR(node)->target, indent + add);
6733 
6734  fflush(f);
6735 }
6736 
6737 static void
6738 print_tree(FILE* f, Node* node)
6739 {
6740  print_indent_tree(f, node, 0);
6741 }
6742 # endif /* ONIG_DEBUG_PARSE_TREE */
6743 #endif /* ONIG_DEBUG */
re_registers::allocated
int allocated
Definition: onigmo.h:717
OP_PUSH_STOP_BT
@ OP_PUSH_STOP_BT
Definition: regint.h:642
ANCHOR_BEGIN_LINE
#define ANCHOR_BEGIN_LINE
Definition: regint.h:528
OP_CCLASS
@ OP_CCLASS
Definition: regint.h:569
CallNode::target
struct _Node * target
Definition: regparse.h:229
SIZE_OP_ABSENT_END
#define SIZE_OP_ABSENT_END
Definition: regint.h:739
CHECK_NULL_RETURN_MEMERR
#define CHECK_NULL_RETURN_MEMERR(p)
Definition: regint.h:301
_Node
Definition: regparse.h:265
NodeOptInfo::len
MinMaxLen len
Definition: regcomp.c:4378
re_pattern_buffer::dmax
OnigDistance dmax
Definition: onigmo.h:794
re_pattern_buffer::exact
unsigned char * exact
Definition: onigmo.h:788
IS_ENCLOSE_CALLED
#define IS_ENCLOSE_CALLED(en)
Definition: regparse.h:147
OP_MEMORY_END_REC
@ OP_MEMORY_END_REC
Definition: regint.h:617
PointerType
void * PointerType
Definition: regint.h:673
ONIG_INFINITE_DISTANCE
#define ONIG_INFINITE_DISTANCE
Definition: onigmo.h:85
NodeOptInfo::exb
OptExactInfo exb
Definition: regcomp.c:4381
NST_MAX_FIXED
#define NST_MAX_FIXED
Definition: regparse.h:129
MinMaxLen::min
OnigDistance min
Definition: regcomp.c:4342
EncloseNode::min_len
OnigDistance min_len
Definition: regparse.h:204
re_pattern_buffer::alloc
unsigned int alloc
Definition: onigmo.h:759
re_registers::end
OnigPosition * end
Definition: onigmo.h:720
_BBuf::p
UChar * p
Definition: regint.h:442
OP_ABSENT_END
@ OP_ABSENT_END
Definition: regint.h:649
OP_NULL_CHECK_END_MEMST
@ OP_NULL_CHECK_END_MEMST
Definition: regint.h:635
void
void
Definition: rb_mjit_min_header-2.7.0.h:13273
EncloseNode::call_addr
AbsAddrType call_addr
Definition: regparse.h:201
TRUE
#define TRUE
Definition: nkf.h:175
re_pattern_buffer::threshold_len
int threshold_len
Definition: onigmo.h:783
SIZE_ABSADDR
#define SIZE_ABSADDR
Definition: regint.h:677
BRefNode::back_num
int back_num
Definition: regparse.h:238
OP_END_LINE
@ OP_END_LINE
Definition: regint.h:600
NT_LIST
#define NT_LIST
Definition: regparse.h:46
OP_BEGIN_BUF
@ OP_BEGIN_BUF
Definition: regint.h:597
re_pattern_buffer::repeat_range_alloc
int repeat_range_alloc
Definition: onigmo.h:770
QtfrNode
Definition: regparse.h:179
SIZE_OP_MEMORY_END_PUSH_REC
#define SIZE_OP_MEMORY_END_PUSH_REC
Definition: regint.h:724
OP_ASCII_WORD_BEGIN
@ OP_ASCII_WORD_BEGIN
Definition: regint.h:594
ONIGENC_IS_UNDEF
#define ONIGENC_IS_UNDEF(enc)
Definition: onigmo.h:317
SIZE_OP_PUSH
#define SIZE_OP_PUSH
Definition: regint.h:708
BitStatusType
unsigned int BitStatusType
Definition: regint.h:352
OP_MEMORY_END
@ OP_MEMORY_END
Definition: regint.h:616
ONIGENC_MBC_CASE_FOLD
#define ONIGENC_MBC_CASE_FOLD(enc, flag, pp, end, buf)
Definition: onigmo.h:332
OP_PUSH_POS
@ OP_PUSH_POS
Definition: regint.h:638
PRIuPTR
#define PRIuPTR
Definition: ffitest.h:134
re_pattern_buffer::int_map
int * int_map
Definition: onigmo.h:791
OP_STATE_CHECK_ANYCHAR_ML_STAR
@ OP_STATE_CHECK_ANYCHAR_ML_STAR
Definition: regint.h:660
StrNode::s
UChar * s
Definition: regparse.h:172
OP_ANYCHAR_STAR
@ OP_ANYCHAR_STAR
Definition: regint.h:578
OP_CCLASS_MB
@ OP_CCLASS_MB
Definition: regint.h:570
ScanEnv::capture_history
BitStatusType capture_history
Definition: regparse.h:295
NT_ENCLOSE
#define NT_ENCLOSE
Definition: regparse.h:44
onig_reduce_nested_quantifier
void onig_reduce_nested_quantifier(Node *pnode, Node *cnode)
Definition: regparse.c:2204
ONIGERR_INVALID_ARGUMENT
#define ONIGERR_INVALID_ARGUMENT
Definition: onigmo.h:640
id
const int id
Definition: nkf.c:209
onig_memsize
size_t onig_memsize(const regex_t *reg)
Definition: regcomp.c:5654
OptExactInfo::ignore_case
int ignore_case
Definition: regcomp.c:4364
env
#define env
NCAR
#define NCAR(node)
Definition: regparse.h:86
OP_BEGIN_LINE
@ OP_BEGIN_LINE
Definition: regint.h:599
ANCHOR_WORD_BEGIN
#define ANCHOR_WORD_BEGIN
Definition: regint.h:536
CClassNode
Definition: regint.h:804
NodeOptInfo::exm
OptExactInfo exm
Definition: regcomp.c:4382
OP_NOT_ASCII_WORD
@ OP_NOT_ASCII_WORD
Definition: regint.h:591
QUANTIFIER_EXPAND_LIMIT_SIZE
#define QUANTIFIER_EXPAND_LIMIT_SIZE
Definition: regcomp.c:721
OP_REPEAT_INC_NG
@ OP_REPEAT_INC_NG
Definition: regint.h:630
ONIG_OPTION_IGNORECASE
#define ONIG_OPTION_IGNORECASE
Definition: onigmo.h:451
IS_NCCLASS_NOT
#define IS_NCCLASS_NOT(nd)
Definition: regint.h:796
onig_compile_ruby
int onig_compile_ruby(regex_t *reg, const UChar *pattern, const UChar *pattern_end, OnigErrorInfo *einfo, const char *sourcefile, int sourceline)
Definition: regcomp.c:5710
SIZE_OP_MEMORY_END
#define SIZE_OP_MEMORY_END
Definition: regint.h:725
ScanEnv::num_mem
int num_mem
Definition: regparse.h:308
NT_QTFR
#define NT_QTFR
Definition: regparse.h:43
UnsetAddr::offset
int offset
Definition: regparse.h:213
OP_EXACTMB2N2
@ OP_EXACTMB2N2
Definition: regint.h:560
GET_CHAR_LEN_VARLEN
#define GET_CHAR_LEN_VARLEN
Definition: regcomp.c:2410
re_pattern_buffer::case_fold_flag
OnigCaseFoldType case_fold_flag
Definition: onigmo.h:779
onig_renumber_name_table
int onig_renumber_name_table(regex_t *reg, GroupNumRemap *map)
Definition: regparse.c:611
OP_NOT_WORD_BOUND
@ OP_NOT_WORD_BOUND
Definition: regint.h:586
ALLOWED_ANCHOR_IN_LB_NOT
#define ALLOWED_ANCHOR_IN_LB_NOT
ONIGERR_UNDEFINED_NAME_REFERENCE
#define ONIGERR_UNDEFINED_NAME_REFERENCE
Definition: onigmo.h:682
ONIGENC_GET_CASE_FOLD_CODES_BY_STR
#define ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, acs)
Definition: onigmo.h:340
ANCHOR_LOOK_BEHIND
#define ANCHOR_LOOK_BEHIND
Definition: regint.h:540
onig_initialize
int onig_initialize(OnigEncoding encodings[] ARG_UNUSED, int n ARG_UNUSED)
Definition: regcomp.c:5995
SET_NTYPE
#define SET_NTYPE(node, ntype)
Definition: regparse.h:70
onig_is_in_code_range
int onig_is_in_code_range(const UChar *p, OnigCodePoint code)
Definition: regcomp.c:6073
ONIGENC_MBC_CASE_FOLD_MAXLEN
#define ONIGENC_MBC_CASE_FOLD_MAXLEN
Definition: onigmo.h:290
OptMapInfo::map
UChar map[ONIG_CHAR_TABLE_SIZE]
Definition: regcomp.c:4374
CClassNode::bs
BitSet bs
Definition: regint.h:807
OptEnv::case_fold_flag
OnigCaseFoldType case_fold_flag
Definition: regcomp.c:4350
OptExactInfo::s
UChar s[OPT_EXACT_MAXLEN]
Definition: regcomp.c:4366
onigenc_strlen
ONIG_EXTERN int onigenc_strlen(OnigEncoding enc, const OnigUChar *p, const OnigUChar *end)
SIZE_OP_POP
#define SIZE_OP_POP
Definition: regint.h:709
bp
#define bp()
Definition: internal.h:1445
THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION
#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION
BBUF_GET_OFFSET_POS
#define BBUF_GET_OFFSET_POS(buf)
Definition: regint.h:494
QtfrNode::target_empty_info
int target_empty_info
Definition: regparse.h:186
NT_CCLASS
#define NT_CCLASS
Definition: regparse.h:39
EncloseNode::state
int state
Definition: regparse.h:197
OP_FAIL_LOOK_BEHIND_NOT
@ OP_FAIL_LOOK_BEHIND_NOT
Definition: regint.h:646
ONIGENC_MBC_MINLEN
#define ONIGENC_MBC_MINLEN(enc)
Definition: onigmo.h:364
ONIGENC_CASE_FOLD_MIN
#define ONIGENC_CASE_FOLD_MIN
Definition: onigmo.h:130
NST_IN_REPEAT
#define NST_IN_REPEAT
Definition: regparse.h:140
GET_MEMNUM_INC
#define GET_MEMNUM_INC(num, p)
Definition: regint.h:690
OP_ANYCHAR_ML_STAR
@ OP_ANYCHAR_ML_STAR
Definition: regint.h:579
NT_CANY
#define NT_CANY
Definition: regparse.h:41
re_pattern_buffer::enc
OnigEncoding enc
Definition: onigmo.h:776
BRefNode::back_static
int back_static[NODE_BACKREFS_SIZE]
Definition: regparse.h:239
BBUF_GET_ADD_ADDRESS
#define BBUF_GET_ADD_ADDRESS(buf)
Definition: regint.h:493
IS_SYNTAX_BV
#define IS_SYNTAX_BV(syn, bvm)
Definition: regparse.h:332
StrNode::capa
int capa
Definition: regparse.h:175
ONIGENC_CODE_TO_MBC_MAXLEN
#define ONIGENC_CODE_TO_MBC_MAXLEN
Definition: onigmo.h:289
OP_EXACTMBN
@ OP_EXACTMBN
Definition: regint.h:564
ONIGERR_UNDEFINED_GROUP_REFERENCE
#define ONIGERR_UNDEFINED_GROUP_REFERENCE
Definition: onigmo.h:683
QtfrNode::next_head_exact
struct _Node * next_head_exact
Definition: regparse.h:188
OP_WORD
@ OP_WORD
Definition: regint.h:583
OP_NOT_ASCII_WORD_BOUND
@ OP_NOT_ASCII_WORD_BOUND
Definition: regint.h:593
re_pattern_buffer::repeat_range
OnigRepeatRange * repeat_range
Definition: onigmo.h:774
re_pattern_buffer::chain
struct re_pattern_buffer * chain
Definition: onigmo.h:797
OP_END
@ OP_END
Definition: regint.h:551
OnigDistance
size_t OnigDistance
Definition: onigmo.h:82
ONIGERR_NEVER_ENDING_RECURSION
#define ONIGERR_NEVER_ENDING_RECURSION
Definition: onigmo.h:686
PRIxPTR
#define PRIxPTR
Definition: ruby.h:148
OnigCaseFoldCodeItem::code_len
int code_len
Definition: onigmo.h:145
OP_PUSH
@ OP_PUSH
Definition: regint.h:623
OP_RETURN
@ OP_RETURN
Definition: regint.h:652
OptExactInfo::len
int len
Definition: regcomp.c:4365
NST_STOP_BT_SIMPLE_REPEAT
#define NST_STOP_BT_SIMPLE_REPEAT
Definition: regparse.h:134
onigenc_init
ONIG_EXTERN int onigenc_init(void)
Definition: regenc.c:36
SIZE_OP_SET_OPTION
#define SIZE_OP_SET_OPTION
Definition: regint.h:718
EncloseNode::regnum
int regnum
Definition: regparse.h:199
OP_EXACT2
@ OP_EXACT2
Definition: regint.h:554
xmemcpy
#define xmemcpy
Definition: regint.h:202
REPEAT_RANGE_ALLOC
#define REPEAT_RANGE_ALLOC
OnigCodePoint
unsigned int OnigCodePoint
Definition: onigmo.h:80
int
__inline__ int
Definition: rb_mjit_min_header-2.7.0.h:2839
ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP
#define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP
Definition: onigmo.h:595
NSTRING_IS_AMBIG
#define NSTRING_IS_AMBIG(node)
Definition: regparse.h:115
NULL_NODE
#define NULL_NODE
Definition: regparse.h:283
ARG_UNUSED
#define ARG_UNUSED
Definition: nkf.h:181
SIZE_OP_PUSH_IF_PEEK_NEXT
#define SIZE_OP_PUSH_IF_PEEK_NEXT
Definition: regint.h:711
BBUF_ADD
#define BBUF_ADD(buf, bytes, n)
Definition: regint.h:491
BRefNode::state
int state
Definition: regparse.h:237
old
VALUE ID VALUE old
Definition: rb_mjit_min_header-2.7.0.h:16133
ANCHOR_PREC_READ_NOT
#define ANCHOR_PREC_READ_NOT
Definition: regint.h:539
onig_get_default_case_fold_flag
OnigCaseFoldType onig_get_default_case_fold_flag(void)
Definition: regcomp.c:36
OP_CCLASS_NOT
@ OP_CCLASS_NOT
Definition: regint.h:572
NSTRING_IS_RAW
#define NSTRING_IS_RAW(node)
Definition: regparse.h:114
SET_ENCLOSE_STATUS
#define SET_ENCLOSE_STATUS(node, f)
Definition: regparse.h:144
BIT_STATUS_ON_AT_SIMPLE
#define BIT_STATUS_ON_AT_SIMPLE(stats, n)
Definition: regint.h:367
OP_ANYCHAR
@ OP_ANYCHAR
Definition: regint.h:576
NT_CTYPE
#define NT_CTYPE
Definition: regparse.h:40
OP_REPEAT
@ OP_REPEAT
Definition: regint.h:627
IS_CALL_RECURSION
#define IS_CALL_RECURSION(cn)
Definition: regparse.h:161
OP_EXACT3
@ OP_EXACT3
Definition: regint.h:555
ScanEnv::enc
OnigEncoding enc
Definition: regparse.h:293
RECURSION_INFINITE
#define RECURSION_INFINITE
Definition: regcomp.c:2886
ONIGENC_CODE_TO_MBC
#define ONIGENC_CODE_TO_MBC(enc, code, buf)
Definition: onigmo.h:368
OP_BACKREF1
@ OP_BACKREF1
Definition: regint.h:604
OnigRepeatRange::upper
int upper
Definition: onigmo.h:746
onig_add_end_call
void onig_add_end_call(void(*func)(void))
Definition: regcomp.c:6025
ANCHOR_ANYCHAR_STAR
#define ANCHOR_ANYCHAR_STAR
Definition: regint.h:543
OnigEndCallListItem
Definition: regint.h:879
re_pattern_buffer::sub_anchor
int sub_anchor
Definition: onigmo.h:787
ONIGERR_INVALID_COMBINATION_OF_OPTIONS
#define ONIGERR_INVALID_COMBINATION_OF_OPTIONS
Definition: onigmo.h:693
OP_POP_POS
@ OP_POP_POS
Definition: regint.h:639
ANCHOR_ANYCHAR_STAR_MASK
#define ANCHOR_ANYCHAR_STAR_MASK
Definition: regparse.h:91
OP_BACKREF_MULTI_IC
@ OP_BACKREF_MULTI_IC
Definition: regint.h:609
COMPILE_INIT_SIZE
#define COMPILE_INIT_SIZE
ScanEnv::error
UChar * error
Definition: regparse.h:301
ONIGERR_INVALID_BACKREF
#define ONIGERR_INVALID_BACKREF
Definition: onigmo.h:674
onig_node_str_set
int onig_node_str_set(Node *node, const UChar *s, const UChar *end)
Definition: regparse.c:1412
ptr
struct RIMemo * ptr
Definition: debug.c:74
OP_CONDITION
@ OP_CONDITION
Definition: regint.h:654
OP_MEMORY_END_PUSH_REC
@ OP_MEMORY_END_PUSH_REC
Definition: regint.h:615
OP_PUSH_ABSENT_POS
@ OP_PUSH_ABSENT_POS
Definition: regint.h:647
NBREF
#define NBREF(node)
Definition: regparse.h:79
onig_free_body
void onig_free_body(regex_t *reg)
Definition: regcomp.c:5627
COMP_EM_BASE
#define COMP_EM_BASE
ALLOWED_ANCHOR_IN_LB
#define ALLOWED_ANCHOR_IN_LB
DISABLE_CASE_FOLD_MULTI_CHAR
#define DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag)
Definition: regint.h:405
CKN_ON
#define CKN_ON
Definition: regcomp.c:722
CallNode::group_num
int group_num
Definition: regparse.h:226
NT_STR
#define NT_STR
Definition: regparse.h:38
BBUF_INIT
#define BBUF_INIT(buf, size)
Definition: regint.h:447
AnchorNode::char_len
int char_len
Definition: regparse.h:248
NANCHOR
#define NANCHOR(node)
Definition: regparse.h:82
OP_MEMORY_END_PUSH
@ OP_MEMORY_END_PUSH
Definition: regint.h:614
OP_PUSH_OR_JUMP_EXACT1
@ OP_PUSH_OR_JUMP_EXACT1
Definition: regint.h:625
QtfrNode::head_exact
struct _Node * head_exact
Definition: regparse.h:187
SIZE_OP_FAIL
#define SIZE_OP_FAIL
Definition: regint.h:720
GET_OPTION_INC
#define GET_OPTION_INC(option, p)
Definition: regint.h:692
re_pattern_buffer::bt_mem_start
unsigned int bt_mem_start
Definition: onigmo.h:767
IS_QUANTIFIER_IN_REPEAT
#define IS_QUANTIFIER_IN_REPEAT(qn)
Definition: regparse.h:165
OP_PUSH_IF_PEEK_NEXT
@ OP_PUSH_IF_PEEK_NEXT
Definition: regint.h:626
BIT_STATUS_CLEAR
#define BIT_STATUS_CLEAR(stats)
Definition: regint.h:355
NULL
#define NULL
Definition: _sdbm.c:101
exit
void exit(int __status) __attribute__((__noreturn__))
OP_REPEAT_NG
@ OP_REPEAT_NG
Definition: regint.h:628
OP_EXACTN
@ OP_EXACTN
Definition: regint.h:558
IS_ENCLOSE_RECURSION
#define IS_ENCLOSE_RECURSION(en)
Definition: regparse.h:149
ONIGENC_MBC_MAXLEN
#define ONIGENC_MBC_MAXLEN(enc)
Definition: onigmo.h:362
OptEnv
Definition: regcomp.c:4346
SIZE_OP_NULL_CHECK_START
#define SIZE_OP_NULL_CHECK_START
Definition: regint.h:729
IS_BACKREF_NEST_LEVEL
#define IS_BACKREF_NEST_LEVEL(bn)
Definition: regparse.h:164
re_pattern_buffer::stack_pop_level
int stack_pop_level
Definition: onigmo.h:769
OP_BACKREF_MULTI
@ OP_BACKREF_MULTI
Definition: regint.h:608
ALLOWED_ENCLOSE_IN_LB_NOT
#define ALLOWED_ENCLOSE_IN_LB_NOT
OP_NULL_CHECK_END_MEMST_PUSH
@ OP_NULL_CHECK_END_MEMST_PUSH
Definition: regint.h:636
onig_bbuf_init
int onig_bbuf_init(BBuf *buf, OnigDistance size)
Definition: regcomp.c:142
NST_MARK1
#define NST_MARK1
Definition: regparse.h:131
SIZE_MEMNUM
#define SIZE_MEMNUM
Definition: regint.h:679
_BBuf
Definition: regint.h:441
onig_init
int onig_init(void)
Definition: regcomp.c:6001
MinMaxLen
Definition: regcomp.c:4341
re_pattern_buffer::num_comb_exp_check
int num_comb_exp_check
Definition: onigmo.h:764
ENCLOSE_OPTION
#define ENCLOSE_OPTION
Definition: regparse.h:95
enclen
#define enclen(enc, p, e)
Definition: regenc.h:93
NENCLOSE
#define NENCLOSE(node)
Definition: regparse.h:81
OP_STATE_CHECK
@ OP_STATE_CHECK
Definition: regint.h:658
SIZE_STATE_CHECK_NUM
#define SIZE_STATE_CHECK_NUM
Definition: regint.h:680
re_pattern_buffer::int_map_backward
int * int_map_backward
Definition: onigmo.h:792
ScanEnv::bt_mem_end
BitStatusType bt_mem_end
Definition: regparse.h:297
BIT_STATUS_ON_ALL
#define BIT_STATUS_ON_ALL(stats)
Definition: regint.h:356
BITSET_SIZE
#define BITSET_SIZE
Definition: regint.h:415
SIZE_OP_ANYCHAR_STAR_PEEK_NEXT
#define SIZE_OP_ANYCHAR_STAR_PEEK_NEXT
Definition: regint.h:706
ALLOWED_ENCLOSE_IN_LB
#define ALLOWED_ENCLOSE_IN_LB
NQ_TARGET_IS_EMPTY_REC
#define NQ_TARGET_IS_EMPTY_REC
Definition: regparse.h:125
SIZE_OP_MEMORY_END_REC
#define SIZE_OP_MEMORY_END_REC
Definition: regint.h:726
onig_node_new_enclose
Node * onig_node_new_enclose(int type)
Definition: regparse.c:1347
ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC
#define ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC
Definition: regint.h:349
add
#define add(x, y)
Definition: date_strftime.c:23
STACK_POP_LEVEL_FREE
#define STACK_POP_LEVEL_FREE
Definition: regint.h:337
OP_BACKREFN_IC
@ OP_BACKREFN_IC
Definition: regint.h:607
v
int VALUE v
Definition: rb_mjit_min_header-2.7.0.h:12332
ANCHOR_WORD_END
#define ANCHOR_WORD_END
Definition: regint.h:537
SIZE_OP_FAIL_POS
#define SIZE_OP_FAIL_POS
Definition: regint.h:717
re_pattern_buffer::num_call
int num_call
Definition: onigmo.h:765
EncloseNode
Definition: regparse.h:195
re_pattern_buffer::options
OnigOptionType options
Definition: onigmo.h:772
RECURSION_EXIST
#define RECURSION_EXIST
Definition: regcomp.c:2885
OnigRepeatRange
Definition: onigmo.h:744
IS_CODE_SB_WORD
#define IS_CODE_SB_WORD(enc, code)
Definition: regint.h:876
cc
const struct rb_call_cache * cc
Definition: rb_mjit_min_header-2.7.0.h:13228
OP_SET_OPTION_PUSH
@ OP_SET_OPTION_PUSH
Definition: regint.h:663
ANCHOR_NOT_WORD_BOUND
#define ANCHOR_NOT_WORD_BOUND
Definition: regint.h:535
SIZE_OP_MEMORY_END_PUSH
#define SIZE_OP_MEMORY_END_PUSH
Definition: regint.h:723
EncloseNode::target
struct _Node * target
Definition: regparse.h:202
ScanEnv::backrefed_mem
BitStatusType backrefed_mem
Definition: regparse.h:298
re_pattern_buffer::p
unsigned char * p
Definition: onigmo.h:757
ENCLOSE_STOP_BACKTRACK
#define ENCLOSE_STOP_BACKTRACK
Definition: regparse.h:96
OptEnv::options
OnigOptionType options
Definition: regcomp.c:4349
BIT_STATUS_AT
#define BIT_STATUS_AT(stats, n)
Definition: regint.h:357
SIZE_OP_MEMORY_START
#define SIZE_OP_MEMORY_START
Definition: regint.h:721
ONIG_OPTIMIZE_EXACT_BM_IC
#define ONIG_OPTIMIZE_EXACT_BM_IC
Definition: regint.h:348
OP_BACKREF_WITH_LEVEL
@ OP_BACKREF_WITH_LEVEL
Definition: regint.h:610
OP_MEMORY_START_PUSH
@ OP_MEMORY_START_PUSH
Definition: regint.h:613
CallNode::unset_addr_list
UnsetAddrList * unset_addr_list
Definition: regparse.h:230
onig_parse_make_tree
int onig_parse_make_tree(Node **root, const UChar *pattern, const UChar *end, regex_t *reg, ScanEnv *env)
Definition: regparse.c:6611
if
if((ID)(DISPID) nameid !=nameid)
Definition: win32ole.c:357
AnchorNode::type
int type
Definition: regparse.h:246
GET_ALIGNMENT_PAD_SIZE
#define GET_ALIGNMENT_PAD_SIZE(addr, pad_size)
Definition: regint.h:323
UnsetAddr::target
struct _Node * target
Definition: regparse.h:214
SIZE_OP_ANYCHAR_STAR
#define SIZE_OP_ANYCHAR_STAR
Definition: regint.h:705
re_pattern_buffer::anchor_dmax
OnigDistance anchor_dmax
Definition: onigmo.h:786
CClassNode::mbuf
BBuf * mbuf
Definition: regint.h:808
IN_ALT
#define IN_ALT
Definition: regcomp.c:3857
re_pattern_buffer::optimize
int optimize
Definition: onigmo.h:782
IS_REPEAT_INFINITE
#define IS_REPEAT_INFINITE(n)
Definition: regint.h:409
OptEnv::mmd
MinMaxLen mmd
Definition: regcomp.c:4347
ENCLOSE_MEMORY
#define ENCLOSE_MEMORY
Definition: regparse.h:94
IS_NODE_TYPE_SIMPLE
#define IS_NODE_TYPE_SIMPLE(type)
Definition: regparse.h:65
OP_EXACT4
@ OP_EXACT4
Definition: regint.h:556
ONIG_OPTIMIZE_NONE
#define ONIG_OPTIMIZE_NONE
Definition: regint.h:342
IS_ENCLOSE_MARK1
#define IS_ENCLOSE_MARK1(en)
Definition: regparse.h:150
NodeOptInfo::anc
OptAncInfo anc
Definition: regcomp.c:4380
OnigSyntaxType
Definition: onigmo.h:479
OnigEndCallListItem::func
void(* func)(void)
Definition: regint.h:881
MinMaxLen::max
OnigDistance max
Definition: regcomp.c:4343
onig_reg_init
int onig_reg_init(regex_t *reg, OnigOptionType option, OnigCaseFoldType case_fold_flag, OnigEncoding enc, const OnigSyntaxType *syntax)
Definition: regcomp.c:5915
NST_MEM_BACKREFED
#define NST_MEM_BACKREFED
Definition: regparse.h:133
QtfrNode::lower
int lower
Definition: regparse.h:183
OptMapInfo::mmd
MinMaxLen mmd
Definition: regcomp.c:4370
BITSET_AT
#define BITSET_AT(bs, pos)
Definition: regint.h:435
ONIGENC_IS_ALLOWED_REVERSE_MATCH
#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc, s, end)
Definition: onigmo.h:334
BACKREFS_P
#define BACKREFS_P(br)
Definition: regparse.h:119
fprintf
int fprintf(FILE *__restrict, const char *__restrict,...) __attribute__((__format__(__printf__
NST_MARK2
#define NST_MARK2
Definition: regparse.h:132
re_pattern_buffer::map
unsigned char map[ONIG_CHAR_TABLE_SIZE]
Definition: onigmo.h:790
GET_CODE_POINT
#define GET_CODE_POINT(code, p)
Definition: regint.h:697
OP_CCLASS_MIX_NOT
@ OP_CCLASS_MIX_NOT
Definition: regint.h:574
GET_CHAR_LEN_TOP_ALT_VARLEN
#define GET_CHAR_LEN_TOP_ALT_VARLEN
Definition: regcomp.c:2411
IS_FIND_CONDITION
#define IS_FIND_CONDITION(option)
Definition: regint.h:387
ScanEnv::unset_addr_list
UnsetAddrList * unset_addr_list
Definition: regparse.h:305
StrNode::buf
UChar buf[NODE_STR_BUF_SIZE]
Definition: regparse.h:176
IN_REPEAT
#define IN_REPEAT
Definition: regcomp.c:3859
USE_SUBEXP_CALL
#define USE_SUBEXP_CALL
Definition: regint.h:70
OP_PUSH_LOOK_BEHIND_NOT
@ OP_PUSH_LOOK_BEHIND_NOT
Definition: regint.h:645
MAX_NODE_OPT_INFO_REF_COUNT
#define MAX_NODE_OPT_INFO_REF_COUNT
Definition: regcomp.c:4952
GroupNumRemap
Definition: regparse.h:335
NSTRING_SET_AMBIG
#define NSTRING_SET_AMBIG(node)
Definition: regparse.h:111
SIZE_OP_LOOK_BEHIND
#define SIZE_OP_LOOK_BEHIND
Definition: regint.h:731
EncloseNode::max_len
OnigDistance max_len
Definition: regparse.h:205
EncloseNode::opt_count
int opt_count
Definition: regparse.h:207
SIZE_OP_RETURN
#define SIZE_OP_RETURN
Definition: regint.h:735
BBUF_WRITE
#define BBUF_WRITE(buf, pos, bytes, n)
Definition: regint.h:477
SINGLE_BYTE_SIZE
#define SINGLE_BYTE_SIZE
Definition: regint.h:413
FOUND_CALLED_NODE
#define FOUND_CALLED_NODE
SIZE_OP_REPEAT_INC
#define SIZE_OP_REPEAT_INC
Definition: regint.h:712
ONIG_OPTIMIZE_MAP
#define ONIG_OPTIMIZE_MAP
Definition: regint.h:347
OnigErrorInfo::enc
OnigEncoding enc
Definition: onigmo.h:739
re_pattern_buffer::num_repeat
int num_repeat
Definition: onigmo.h:762
OP_FINISH
@ OP_FINISH
Definition: regint.h:550
onig_node_new_anchor
Node * onig_node_new_anchor(int type)
Definition: regparse.c:1222
SIZE_OP_CONDITION
#define SIZE_OP_CONDITION
Definition: regint.h:736
CLEAR_ENCLOSE_STATUS
#define CLEAR_ENCLOSE_STATUS(node, f)
Definition: regparse.h:145
IS_MULTILINE
#define IS_MULTILINE(option)
Definition: regint.h:382
NT_ALT
#define NT_ALT
Definition: regparse.h:47
ONIGENC_IS_CODE_PRINT
#define ONIGENC_IS_CODE_PRINT(enc, code)
Definition: onigmo.h:378
ONIG_OPTIMIZE_EXACT_IC
#define ONIG_OPTIMIZE_EXACT_IC
Definition: regint.h:346
i
uint32_t i
Definition: rb_mjit_min_header-2.7.0.h:5464
IS_ENCLOSE_CLEN_FIXED
#define IS_ENCLOSE_CLEN_FIXED(en)
Definition: regparse.h:154
onig_new_without_alloc
int onig_new_without_alloc(regex_t *reg, const UChar *pattern, const UChar *pattern_end, OnigOptionType option, OnigEncoding enc, const OnigSyntaxType *syntax, OnigErrorInfo *einfo)
Definition: regcomp.c:5959
QtfrNode::upper
int upper
Definition: regparse.h:184
SIZE_RELADDR
#define SIZE_RELADDR
Definition: regint.h:676
ALIGNMENT_RIGHT
#define ALIGNMENT_RIGHT(addr)
Definition: regint.h:329
ONIGENC_CODE_TO_MBCLEN
#define ONIGENC_CODE_TO_MBCLEN(enc, code)
Definition: onigmo.h:367
OP_CALL
@ OP_CALL
Definition: regint.h:651
SIZE_OP_PUSH_LOOK_BEHIND_NOT
#define SIZE_OP_PUSH_LOOK_BEHIND_NOT
Definition: regint.h:732
ScanEnv::num_named
int num_named
Definition: regparse.h:310
onig_node_new_list
Node * onig_node_new_list(Node *left, Node *right)
Definition: regparse.c:1186
onig_node_str_cat
int onig_node_str_cat(Node *node, const UChar *s, const UChar *end)
Definition: regparse.c:1376
RelAddrType
int RelAddrType
Definition: regint.h:667
OP_EXACT1
@ OP_EXACT1
Definition: regint.h:553
OP_JUMP
@ OP_JUMP
Definition: regint.h:622
ANCHOR_END_BUF_MASK
#define ANCHOR_END_BUF_MASK
Definition: regparse.h:92
NQTFR
#define NQTFR(node)
Definition: regparse.h:80
NodeOptInfo::map
OptMapInfo map
Definition: regcomp.c:4385
OnigEndCallListItem::next
struct OnigEndCallListItem * next
Definition: regint.h:880
SIZE_OP_NULL_CHECK_END
#define SIZE_OP_NULL_CHECK_END
Definition: regint.h:730
_BBuf::used
unsigned int used
Definition: regint.h:443
OP_POP_STOP_BT
@ OP_POP_STOP_BT
Definition: regint.h:643
ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL
#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL
Definition: onigmo.h:598
OnigDefaultCaseFoldFlag
OnigCaseFoldType OnigDefaultCaseFoldFlag
Definition: regcomp.c:33
BIT_STATUS_ON_AT
#define BIT_STATUS_ON_AT(stats, n)
Definition: regint.h:360
ONIGENC_MBC_MAXLEN_DIST
#define ONIGENC_MBC_MAXLEN_DIST(enc)
Definition: onigmo.h:363
OP_REPEAT_INC
@ OP_REPEAT_INC
Definition: regint.h:629
StrNode::flag
unsigned int flag
Definition: regparse.h:174
ANCHOR_END_BUF
#define ANCHOR_END_BUF
Definition: regint.h:530
onig_region_memsize
size_t onig_region_memsize(const OnigRegion *regs)
Definition: regcomp.c:5669
OP_ANYCHAR_ML
@ OP_ANYCHAR_ML
Definition: regint.h:577
stderr
#define stderr
Definition: rb_mjit_min_header-2.7.0.h:1485
fflush
int fflush(FILE *)
OP_EXACTMB2N3
@ OP_EXACTMB2N3
Definition: regint.h:561
OP_ANYCHAR_STAR_PEEK_NEXT
@ OP_ANYCHAR_STAR_PEEK_NEXT
Definition: regint.h:580
GET_LENGTH_INC
#define GET_LENGTH_INC(len, p)
Definition: regint.h:689
IS_ENCLOSE_ADDR_FIXED
#define IS_ENCLOSE_ADDR_FIXED(en)
Definition: regparse.h:148
NSTRING_IS_DONT_GET_OPT_INFO
#define NSTRING_IS_DONT_GET_OPT_INFO(node)
Definition: regparse.h:116
onig_compile
int onig_compile(regex_t *reg, const UChar *pattern, const UChar *pattern_end, OnigErrorInfo *einfo)
Definition: regcomp.c:5701
UnsetAddrList::us
UnsetAddr * us
Definition: regparse.h:220
SCANENV_MEM_NODES
#define SCANENV_MEM_NODES(senv)
Definition: regparse.h:286
ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND
#define ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND
Definition: onigmo.h:594
SET_CALL_RECURSION
#define SET_CALL_RECURSION(node)
Definition: regparse.h:160
ALLOWED_TYPE_IN_LB
#define ALLOWED_TYPE_IN_LB
OP_CCLASS_MB_NOT
@ OP_CCLASS_MB_NOT
Definition: regint.h:573
ENCLOSE_ABSENT
#define ENCLOSE_ABSENT
Definition: regparse.h:98
OnigOptionType
unsigned int OnigOptionType
Definition: onigmo.h:445
OP_EXACTMB2N1
@ OP_EXACTMB2N1
Definition: regint.h:559
ANCHOR_PREC_READ
#define ANCHOR_PREC_READ
Definition: regint.h:538
ENCLOSE_CONDITION
#define ENCLOSE_CONDITION
Definition: regparse.h:97
LengthType
int LengthType
Definition: regint.h:669
NCCLASS
#define NCCLASS(node)
Definition: regparse.h:77
OP_NOT_WORD
@ OP_NOT_WORD
Definition: regint.h:584
onig_transfer
void onig_transfer(regex_t *to, regex_t *from)
OP_EXACTMB3N
@ OP_EXACTMB3N
Definition: regint.h:563
OP_ASCII_WORD_BOUND
@ OP_ASCII_WORD_BOUND
Definition: regint.h:592
size
int size
Definition: encoding.c:58
BRefNode::nest_level
int nest_level
Definition: regparse.h:241
NCALL
#define NCALL(node)
Definition: regparse.h:84
StateCheckNumType
short int StateCheckNumType
Definition: regint.h:672
OnigErrorInfo::par_end
OnigUChar * par_end
Definition: onigmo.h:741
ScanEnv::mem_nodes_dynamic
Node ** mem_nodes_dynamic
Definition: regparse.h:314
ONIGERR_DEFAULT_ENCODING_IS_NOT_SET
#define ONIGERR_DEFAULT_ENCODING_IS_NOT_SET
Definition: onigmo.h:637
IS_ENCLOSE_MARK2
#define IS_ENCLOSE_MARK2(en)
Definition: regparse.h:151
ANCHOR_SEMI_END_BUF
#define ANCHOR_SEMI_END_BUF
Definition: regint.h:531
GET_RELADDR_INC
#define GET_RELADDR_INC(addr, p)
Definition: regint.h:687
re_pattern_buffer::bt_mem_end
unsigned int bt_mem_end
Definition: onigmo.h:768
OnigEncoding
const typedef OnigEncodingType * OnigEncoding
Definition: onigmo.h:182
NT_BREF
#define NT_BREF
Definition: regparse.h:42
EXPAND_STRING_MAX_LENGTH
#define EXPAND_STRING_MAX_LENGTH
OP_SEMI_END_BUF
@ OP_SEMI_END_BUF
Definition: regint.h:601
onig_node_new_alt
Node * onig_node_new_alt(Node *left, Node *right)
Definition: regparse.c:1210
ONIGERR_PARSER_BUG
#define ONIGERR_PARSER_BUG
Definition: onigmo.h:631
IS_IGNORECASE
#define IS_IGNORECASE(option)
Definition: regint.h:383
SIZE_BITSET
#define SIZE_BITSET
Definition: regint.h:425
ONIGENC_IS_MBC_WORD
#define ONIGENC_IS_MBC_WORD(enc, s, end)
Definition: onigmo.h:322
CallNode
Definition: regparse.h:223
OptExactInfo::reach_end
int reach_end
Definition: regcomp.c:4363
re_pattern_buffer::anchor
int anchor
Definition: onigmo.h:784
onig_name_to_group_numbers
ONIG_EXTERN int onig_name_to_group_numbers(OnigRegex reg, const OnigUChar *name, const OnigUChar *name_end, int **nums)
OP_ASCII_WORD
@ OP_ASCII_WORD
Definition: regint.h:590
ANCHOR_BEGIN_POSITION
#define ANCHOR_BEGIN_POSITION
Definition: regint.h:529
re_pattern_buffer::num_null_check
int num_null_check
Definition: onigmo.h:763
OptEnv::enc
OnigEncoding enc
Definition: regcomp.c:4348
OnigRepeatRange::lower
int lower
Definition: onigmo.h:745
NST_RECURSION
#define NST_RECURSION
Definition: regparse.h:135
STACK_POP_LEVEL_ALL
#define STACK_POP_LEVEL_ALL
Definition: regint.h:339
IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT
#define IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(en)
Definition: regparse.h:155
OP_BEGIN_POSITION
@ OP_BEGIN_POSITION
Definition: regint.h:602
OptExactInfo::anc
OptAncInfo anc
Definition: regcomp.c:4361
BBUF_ADD1
#define BBUF_ADD1(buf, byte)
Definition: regint.h:492
re_pattern_buffer::dmin
OnigDistance dmin
Definition: onigmo.h:793
CallNode::name_end
UChar * name_end
Definition: regparse.h:228
OP_EXACT1_IC
@ OP_EXACT1_IC
Definition: regint.h:566
OP_BACKREFN
@ OP_BACKREFN
Definition: regint.h:606
re_pattern_buffer::used
unsigned int used
Definition: onigmo.h:758
ANCHOR_END_LINE
#define ANCHOR_END_LINE
Definition: regint.h:532
ONIG_OPTION_DONT_CAPTURE_GROUP
#define ONIG_OPTION_DONT_CAPTURE_GROUP
Definition: onigmo.h:459
onig_is_code_in_cc
int onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode *cc)
Definition: regcomp.c:6117
onig_names_free
int onig_names_free(regex_t *reg)
Definition: regparse.c:525
ONIG_MAX_CAPTURE_HISTORY_GROUP
#define ONIG_MAX_CAPTURE_HISTORY_GROUP
Definition: onigmo.h:700
SIZE_OP_MEMORY_START_PUSH
#define SIZE_OP_MEMORY_START_PUSH
Definition: regint.h:722
buf
unsigned char buf[MIME_BUF_SIZE]
Definition: nkf.c:4322
n
const char size_t n
Definition: rb_mjit_min_header-2.7.0.h:5456
re_pattern_buffer
Definition: onigmo.h:755
QtfrNode::state
int state
Definition: regparse.h:181
ONIGENC_IS_MBC_ASCII_WORD
#define ONIGENC_IS_MBC_ASCII_WORD(enc, s, end)
Definition: onigmo.h:324
UChar
#define UChar
Definition: onigmo.h:76
OnigSyntaxType::options
OnigOptionType options
Definition: onigmo.h:483
ONIGENC_IS_CODE_WORD
#define ONIGENC_IS_CODE_WORD(enc, code)
Definition: onigmo.h:400
OptMapInfo::value
int value
Definition: regcomp.c:4373
f
#define f
NQ_TARGET_IS_EMPTY_MEM
#define NQ_TARGET_IS_EMPTY_MEM
Definition: regparse.h:124
NQ_TARGET_IS_EMPTY
#define NQ_TARGET_IS_EMPTY
Definition: regparse.h:123
BitSetRef
Bits * BitSetRef
Definition: regint.h:423
OP_WORD_END
@ OP_WORD_END
Definition: regint.h:588
OptAncInfo::left_anchor
int left_anchor
Definition: regcomp.c:4355
xmalloc
#define xmalloc
Definition: defines.h:211
xrealloc
#define xrealloc
Definition: defines.h:214
regparse.h
SIZE_OP_POP_STOP_BT
#define SIZE_OP_POP_STOP_BT
Definition: regint.h:728
OP_EXACT5
@ OP_EXACT5
Definition: regint.h:557
ANCHOR_KEEP
#define ANCHOR_KEEP
Definition: regint.h:546
OP_REPEAT_INC_SG
@ OP_REPEAT_INC_SG
Definition: regint.h:631
ScanEnv::sourcefile
const char * sourcefile
Definition: regparse.h:324
ONIGERR_TYPE_BUG
#define ONIGERR_TYPE_BUG
Definition: onigmo.h:630
OP_FAIL_POS
@ OP_FAIL_POS
Definition: regint.h:641
NodeOptInfo::expr
OptExactInfo expr
Definition: regcomp.c:4383
EncloseNode::option
OnigOptionType option
Definition: regparse.h:200
ScanEnv::error_end
UChar * error_end
Definition: regparse.h:302
SIZE_OP_FAIL_LOOK_BEHIND_NOT
#define SIZE_OP_FAIL_LOOK_BEHIND_NOT
Definition: regint.h:733
UnsetAddrList
Definition: regparse.h:217
UnsetAddrList::alloc
int alloc
Definition: regparse.h:219
OptAncInfo::right_anchor
int right_anchor
Definition: regcomp.c:4356
OP_ASCII_WORD_END
@ OP_ASCII_WORD_END
Definition: regint.h:595
ONIGERR_MEMORY
#define ONIGERR_MEMORY
Definition: onigmo.h:629
OP_ABSENT
@ OP_ABSENT
Definition: regint.h:648
regex_t
OnigRegexType regex_t
Definition: onigmo.h:803
SIZE_OP_PUSH_ABSENT_POS
#define SIZE_OP_PUSH_ABSENT_POS
Definition: regint.h:737
OptExactInfo::mmd
MinMaxLen mmd
Definition: regcomp.c:4360
PRIdPTR
#define PRIdPTR
Definition: ruby.h:144
IS_BACKREF_NAME_REF
#define IS_BACKREF_NAME_REF(bn)
Definition: regparse.h:163
SIZE_LENGTH
#define SIZE_LENGTH
Definition: regint.h:678
OnigErrorInfo::par
OnigUChar * par
Definition: onigmo.h:740
AbsAddrType
int AbsAddrType
Definition: regint.h:668
MemNumType
short int MemNumType
Definition: regint.h:671
OnigCaseFoldCodeItem::byte_len
int byte_len
Definition: onigmo.h:144
IS_NEED_STR_LEN_OP_EXACT
#define IS_NEED_STR_LEN_OP_EXACT(op)
Definition: regcomp.c:315
OPT_EXACT_MAXLEN
#define OPT_EXACT_MAXLEN
Definition: regint.h:90
OP_PUSH_POS_NOT
@ OP_PUSH_POS_NOT
Definition: regint.h:640
re_pattern_buffer::anchor_dmin
OnigDistance anchor_dmin
Definition: onigmo.h:785
OP_CCLASS_MIX
@ OP_CCLASS_MIX
Definition: regint.h:571
EncloseNode::char_len
int char_len
Definition: regparse.h:206
OptEnv::scan_env
ScanEnv * scan_env
Definition: regcomp.c:4351
IS_ENCLOSE_MAX_FIXED
#define IS_ENCLOSE_MAX_FIXED(en)
Definition: regparse.h:153
IS_ENCLOSE_NAME_REF
#define IS_ENCLOSE_NAME_REF(en)
Definition: regparse.h:158
OptMapInfo
Definition: regcomp.c:4369
AnchorNode
Definition: regparse.h:244
AnchorNode::ascii_range
int ascii_range
Definition: regparse.h:249
re_registers::beg
OnigPosition * beg
Definition: onigmo.h:719
OptAncInfo
Definition: regcomp.c:4354
ONIG_IS_OPTION_ON
#define ONIG_IS_OPTION_ON(options, option)
Definition: onigmo.h:476
QtfrNode::target
struct _Node * target
Definition: regparse.h:182
re_registers
Definition: onigmo.h:716
OP_STATE_CHECK_PUSH
@ OP_STATE_CHECK_PUSH
Definition: regint.h:656
SIZE_OPTION
#define SIZE_OPTION
Definition: regint.h:682
ONIGENC_CASE_FOLD_DEFAULT
#define ONIGENC_CASE_FOLD_DEFAULT
Definition: onigmo.h:131
err
int err
Definition: win32.c:135
OP_SET_OPTION
@ OP_SET_OPTION
Definition: regint.h:664
ANCHOR_LOOK_BEHIND_NOT
#define ANCHOR_LOOK_BEHIND_NOT
Definition: regint.h:541
fputc
int fputc(int, FILE *)
ONIGENC_CTYPE_WORD
#define ONIGENC_CTYPE_WORD
Definition: onigmo.h:306
ANCHOR_WORD_BOUND
#define ANCHOR_WORD_BOUND
Definition: regint.h:534
IN_NOT
#define IN_NOT
Definition: regcomp.c:3858
ScanEnv
Definition: regparse.h:290
ONIGERR_INVALID_CONDITION_PATTERN
#define ONIGERR_INVALID_CONDITION_PATTERN
Definition: onigmo.h:664
xfree
#define xfree
Definition: defines.h:216
STACK_POP_LEVEL_MEM_START
#define STACK_POP_LEVEL_MEM_START
Definition: regint.h:338
OP_EXACTN_IC
@ OP_EXACTN_IC
Definition: regint.h:567
ONIG_CHAR_TABLE_SIZE
#define ONIG_CHAR_TABLE_SIZE
Definition: onigmo.h:753
NSTRING_SET_DONT_GET_OPT_INFO
#define NSTRING_SET_DONT_GET_OPT_INFO(node)
Definition: regparse.h:112
onig_is_code_in_cc_len
int onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode *cc)
Definition: regcomp.c:6094
OP_BACKREF2
@ OP_BACKREF2
Definition: regint.h:605
onig_end
int onig_end(void)
Definition: regcomp.c:6055
OP_WORD_BEGIN
@ OP_WORD_BEGIN
Definition: regint.h:587
BRefNode
Definition: regparse.h:235
EncloseNode::type
int type
Definition: regparse.h:198
IS_ENCLOSE_MIN_FIXED
#define IS_ENCLOSE_MIN_FIXED(en)
Definition: regparse.h:152
SIZE_OP_PUSH_POS
#define SIZE_OP_PUSH_POS
Definition: regint.h:714
ANCHOR_BEGIN_BUF
#define ANCHOR_BEGIN_BUF
Definition: regint.h:527
OP_FAIL
@ OP_FAIL
Definition: regint.h:621
CallNode::name
UChar * name
Definition: regparse.h:227
NTYPE2BIT
#define NTYPE2BIT(type)
Definition: regparse.h:51
len
uint8_t len
Definition: escape.c:17
CHECK_NULL_RETURN
#define CHECK_NULL_RETURN(p)
Definition: regint.h:300
OP_STATE_CHECK_ANYCHAR_STAR
@ OP_STATE_CHECK_ANYCHAR_STAR
Definition: regint.h:659
ScanEnv::syntax
const OnigSyntaxType * syntax
Definition: regparse.h:294
ScanEnv::num_call
int num_call
Definition: regparse.h:307
OptMapInfo::anc
OptAncInfo anc
Definition: regcomp.c:4371
ONIG_OPTIMIZE_EXACT
#define ONIG_OPTIMIZE_EXACT
Definition: regint.h:343
intptr_t
int intptr_t
Definition: win32.h:90
ONIGENC_MBC_TO_CODE
#define ONIGENC_MBC_TO_CODE(enc, p, end)
Definition: onigmo.h:366
ANCHOR_ANYCHAR_STAR_ML
#define ANCHOR_ANYCHAR_STAR_ML
Definition: regint.h:544
OP_KEEP
@ OP_KEEP
Definition: regint.h:619
OP_ANYCHAR_ML_STAR_PEEK_NEXT
@ OP_ANYCHAR_ML_STAR_PEEK_NEXT
Definition: regint.h:581
QtfrNode::is_referred
int is_referred
Definition: regparse.h:189
re_pattern_buffer::num_mem
int num_mem
Definition: onigmo.h:761
OP_LOOK_BEHIND
@ OP_LOOK_BEHIND
Definition: regint.h:644
OptExactInfo
Definition: regcomp.c:4359
SIZE_POINTER
#define SIZE_POINTER
Definition: regint.h:684
IS_NOT_NULL
#define IS_NOT_NULL(p)
Definition: regint.h:299
onig_scan_env_set_error_string
void onig_scan_env_set_error_string(ScanEnv *env, int ecode ARG_UNUSED, UChar *arg, UChar *arg_end)
Definition: regparse.c:6638
onig_node_new_str
Node * onig_node_new_str(const UChar *s, const UChar *end)
Definition: regparse.c:1481
ONIG_OPTION_NEGATE_SINGLELINE
#define ONIG_OPTION_NEGATE_SINGLELINE
Definition: onigmo.h:458
SIZE_OP_JUMP
#define SIZE_OP_JUMP
Definition: regint.h:707
SIZE_OP_PUSH_POS_NOT
#define SIZE_OP_PUSH_POS_NOT
Definition: regint.h:715
NCTYPE
#define NCTYPE(node)
Definition: regparse.h:78
QtfrNode::greedy
int greedy
Definition: regparse.h:185
ONIG_OPTION_SINGLELINE
#define ONIG_OPTION_SINGLELINE
Definition: onigmo.h:455
IS_ENCLOSE_NAMED_GROUP
#define IS_ENCLOSE_NAMED_GROUP(en)
Definition: regparse.h:157
OP_NULL_CHECK_END
@ OP_NULL_CHECK_END
Definition: regint.h:634
onig_free
void onig_free(regex_t *reg)
Definition: regcomp.c:5644
StrNode
Definition: regparse.h:170
fputs
int fputs(const char *__restrict, FILE *__restrict)
REGEX_TRANSFER
#define REGEX_TRANSFER(to, from)
Definition: regcomp.c:5678
ONIG_OPTION_CAPTURE_GROUP
#define ONIG_OPTION_CAPTURE_GROUP
Definition: onigmo.h:460
WORD_ALIGNMENT_SIZE
#define WORD_ALIGNMENT_SIZE
Definition: regint.h:321
onig_node_list_add
Node * onig_node_list_add(Node *list, Node *x)
Definition: regparse.c:1192
IN_CALL
#define IN_CALL
Definition: regcomp.c:3861
OP_END_BUF
@ OP_END_BUF
Definition: regint.h:598
OP_STATE_CHECK_PUSH_OR_JUMP
@ OP_STATE_CHECK_PUSH_OR_JUMP
Definition: regint.h:657
re_pattern_buffer::exact_end
unsigned char * exact_end
Definition: onigmo.h:789
ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM
#define ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM
Definition: onigmo.h:135
AnchorNode::target
struct _Node * target
Definition: regparse.h:247
SIZE_OPCODE
#define SIZE_OPCODE
Definition: regint.h:675
OnigErrorInfo
Definition: onigmo.h:738
OP_WORD_BOUND
@ OP_WORD_BOUND
Definition: regint.h:585
onig_node_free
void onig_node_free(Node *node)
Definition: regparse.c:1062
ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED
#define ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED
Definition: onigmo.h:675
ScanEnv::sourceline
int sourceline
Definition: regparse.h:325
NST_ADDR_FIXED
#define NST_ADDR_FIXED
Definition: regparse.h:137
OnigCaseFoldCodeItem
Definition: onigmo.h:143
ONIGERR_INVALID_LOOK_BEHIND_PATTERN
#define ONIGERR_INVALID_LOOK_BEHIND_PATTERN
Definition: onigmo.h:662
SIZE_OP_CALL
#define SIZE_OP_CALL
Definition: regint.h:734
IN_VAR_REPEAT
#define IN_VAR_REPEAT
Definition: regcomp.c:3860
NST_MIN_FIXED
#define NST_MIN_FIXED
Definition: regparse.h:128
NST_CLEN_FIXED
#define NST_CLEN_FIXED
Definition: regparse.h:130
numberof
#define numberof(array)
Definition: etc.c:618
onig_set_default_case_fold_flag
int onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
Definition: regcomp.c:42
ONIG_OPTIMIZE_EXACT_BM_NOT_REV
#define ONIG_OPTIMIZE_EXACT_BM_NOT_REV
Definition: regint.h:345
NCDR
#define NCDR(node)
Definition: regparse.h:87
SIZE_OP_PUSH_OR_JUMP_EXACT1
#define SIZE_OP_PUSH_OR_JUMP_EXACT1
Definition: regint.h:710
GroupNumRemap::new_val
int new_val
Definition: regparse.h:336
OnigCaseFoldType
unsigned int OnigCaseFoldType
Definition: onigmo.h:95
SIZE_OP_POP_POS
#define SIZE_OP_POP_POS
Definition: regint.h:716
UnsetAddr
Definition: regparse.h:212
UnsetAddrList::num
int num
Definition: regparse.h:218
NSTR
#define NSTR(node)
Definition: regparse.h:76
ptrdiff_t
long int ptrdiff_t
Definition: rb_mjit_min_header-2.7.0.h:802
OP_POP
@ OP_POP
Definition: regint.h:624
ONIG_OPTIMIZE_EXACT_BM
#define ONIG_OPTIMIZE_EXACT_BM
Definition: regint.h:344
SIZE_OP_ABSENT
#define SIZE_OP_ABSENT
Definition: regint.h:738
NT_CALL
#define NT_CALL
Definition: regparse.h:48
IS_NULL
#define IS_NULL(p)
Definition: regint.h:298
putc
int putc(int, FILE *)
re_pattern_buffer::capture_history
unsigned int capture_history
Definition: onigmo.h:766
ScanEnv::bt_mem_start
BitStatusType bt_mem_start
Definition: regparse.h:296
OP_MEMORY_START
@ OP_MEMORY_START
Definition: regint.h:612
ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL
#define ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL
Definition: onigmo.h:685
NodeOptInfo
Definition: regcomp.c:4377
OP_REPEAT_INC_NG_SG
@ OP_REPEAT_INC_NG_SG
Definition: regint.h:632
GET_ABSADDR_INC
#define GET_ABSADDR_INC(addr, p)
Definition: regint.h:688
StrNode::end
UChar * end
Definition: regparse.h:173
ruby::backward::cxxanyargs::type
VALUE type(ANYARGS)
ANYARGS-ed function type.
Definition: cxxanyargs.hpp:39
IN_RECCALL
#define IN_RECCALL
Definition: regcomp.c:3862
NT_ANCHOR
#define NT_ANCHOR
Definition: regparse.h:45
NST_CALLED
#define NST_CALLED
Definition: regparse.h:136
xalloca
#define xalloca
Definition: regint.h:213
BRefNode::back_dynamic
int * back_dynamic
Definition: regparse.h:240
SIZE_OP_SET_OPTION_PUSH
#define SIZE_OP_SET_OPTION_PUSH
Definition: regint.h:719
__sFILE
Definition: vsnprintf.c:169
NTYPE
#define NTYPE(node)
Definition: regparse.h:69
d1
#define d1
SIZE_OP_PUSH_STOP_BT
#define SIZE_OP_PUSH_STOP_BT
Definition: regint.h:727
IS_DYNAMIC_OPTION
#define IS_DYNAMIC_OPTION(option)
Definition: regint.h:403
OP_NULL_CHECK_START
@ OP_NULL_CHECK_START
Definition: regint.h:633
OP_EXACTMB2N
@ OP_EXACTMB2N
Definition: regint.h:562
onig_new
int onig_new(regex_t **reg, const UChar *pattern, const UChar *pattern_end, OnigOptionType option, OnigEncoding enc, const OnigSyntaxType *syntax, OnigErrorInfo *einfo)
Definition: regcomp.c:5973
NSTRING_LEN
#define NSTRING_LEN(node)
Definition: regparse.h:108