Ruby  2.7.0p0(2019-12-25revision647ee6f091eafcce70ffb75ddf7e121e192ab217)
regparse.c
Go to the documentation of this file.
1 /**********************************************************************
2  regparse.c - Onigmo (Oniguruma-mod) (regular expression library)
3 **********************************************************************/
4 /*-
5  * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6  * Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  * notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  * notice, this list of conditions and the following disclaimer in the
16  * documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 #include "regparse.h"
32 #include <stdarg.h>
33 
34 #define WARN_BUFSIZE 256
35 
36 #define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
37 
38 
55 #ifndef RUBY
57 #endif
63  , ( SYN_GNU_REGEX_BV |
74  ,
75  {
76  (OnigCodePoint )'\\' /* esc */
77  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
78  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
79  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
80  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
81  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
82  }
83 };
84 
86 
87 extern void onig_null_warn(const char* s ARG_UNUSED) { }
88 
89 #ifdef DEFAULT_WARN_FUNCTION
90 static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION;
91 #else
92 static OnigWarnFunc onig_warn = onig_null_warn;
93 #endif
94 
95 #ifdef DEFAULT_VERB_WARN_FUNCTION
96 static OnigWarnFunc onig_verb_warn = (OnigWarnFunc )DEFAULT_VERB_WARN_FUNCTION;
97 #else
98 static OnigWarnFunc onig_verb_warn = onig_null_warn;
99 #endif
100 
102 {
103  onig_warn = f;
104 }
105 
107 {
108  onig_verb_warn = f;
109 }
110 
111 static void CC_DUP_WARN(ScanEnv *env, OnigCodePoint from, OnigCodePoint to);
112 
113 
114 static unsigned int ParseDepthLimit = DEFAULT_PARSE_DEPTH_LIMIT;
115 
116 extern unsigned int
118 {
119  return ParseDepthLimit;
120 }
121 
122 extern int
123 onig_set_parse_depth_limit(unsigned int depth)
124 {
125  if (depth == 0)
126  ParseDepthLimit = DEFAULT_PARSE_DEPTH_LIMIT;
127  else
128  ParseDepthLimit = depth;
129  return 0;
130 }
131 
132 
133 static void
134 bbuf_free(BBuf* bbuf)
135 {
136  if (IS_NOT_NULL(bbuf)) {
137  if (IS_NOT_NULL(bbuf->p)) xfree(bbuf->p);
138  xfree(bbuf);
139  }
140 }
141 
142 static int
143 bbuf_clone(BBuf** rto, BBuf* from)
144 {
145  int r;
146  BBuf *to;
147 
148  *rto = to = (BBuf* )xmalloc(sizeof(BBuf));
150  r = BBUF_INIT(to, from->alloc);
151  if (r != 0) return r;
152  to->used = from->used;
153  xmemcpy(to->p, from->p, from->used);
154  return 0;
155 }
156 
157 #define BACKREF_REL_TO_ABS(rel_no, env) \
158  ((env)->num_mem + 1 + (rel_no))
159 
160 #define ONOFF(v,f,negative) (negative) ? ((v) &= ~(f)) : ((v) |= (f))
161 
162 #define MBCODE_START_POS(enc) \
163  (OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80)
164 
165 #define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf) \
166  add_code_range_to_buf(pbuf, env, MBCODE_START_POS(enc), ONIG_LAST_CODE_POINT)
167 
168 #define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\
169  if (! ONIGENC_IS_SINGLEBYTE(enc)) {\
170  r = SET_ALL_MULTI_BYTE_RANGE(enc, &(mbuf));\
171  if (r) return r;\
172  }\
173 } while (0)
174 
175 
176 #define BITSET_SET_BIT_CHKDUP(bs, pos) do { \
177  if (BITSET_AT(bs, pos)) CC_DUP_WARN(env, pos, pos); \
178  BS_ROOM(bs, pos) |= BS_BIT(pos); \
179 } while (0)
180 
181 #define BITSET_IS_EMPTY(bs,empty) do {\
182  int i;\
183  empty = 1;\
184  for (i = 0; i < BITSET_SIZE; i++) {\
185  if ((bs)[i] != 0) {\
186  empty = 0; break;\
187  }\
188  }\
189 } while (0)
190 
191 static void
192 bitset_set_range(ScanEnv *env, BitSetRef bs, int from, int to)
193 {
194  int i;
195  for (i = from; i <= to && i < SINGLE_BYTE_SIZE; i++) {
197  }
198 }
199 
200 #if 0
201 static void
202 bitset_set_all(BitSetRef bs)
203 {
204  int i;
205  for (i = 0; i < BITSET_SIZE; i++) { bs[i] = ~((Bits )0); }
206 }
207 #endif
208 
209 static void
210 bitset_invert(BitSetRef bs)
211 {
212  int i;
213  for (i = 0; i < BITSET_SIZE; i++) { bs[i] = ~(bs[i]); }
214 }
215 
216 static void
217 bitset_invert_to(BitSetRef from, BitSetRef to)
218 {
219  int i;
220  for (i = 0; i < BITSET_SIZE; i++) { to[i] = ~(from[i]); }
221 }
222 
223 static void
224 bitset_and(BitSetRef dest, BitSetRef bs)
225 {
226  int i;
227  for (i = 0; i < BITSET_SIZE; i++) { dest[i] &= bs[i]; }
228 }
229 
230 static void
231 bitset_or(BitSetRef dest, BitSetRef bs)
232 {
233  int i;
234  for (i = 0; i < BITSET_SIZE; i++) { dest[i] |= bs[i]; }
235 }
236 
237 static void
238 bitset_copy(BitSetRef dest, BitSetRef bs)
239 {
240  int i;
241  for (i = 0; i < BITSET_SIZE; i++) { dest[i] = bs[i]; }
242 }
243 
244 #if defined(USE_NAMED_GROUP) && !defined(USE_ST_LIBRARY)
245 extern int
246 onig_strncmp(const UChar* s1, const UChar* s2, int n)
247 {
248  int x;
249 
250  while (n-- > 0) {
251  x = *s2++ - *s1++;
252  if (x) return x;
253  }
254  return 0;
255 }
256 #endif
257 
258 extern void
259 onig_strcpy(UChar* dest, const UChar* src, const UChar* end)
260 {
261  ptrdiff_t len = end - src;
262  if (len > 0) {
263  xmemcpy(dest, src, len);
264  dest[len] = (UChar )0;
265  }
266 }
267 
268 #ifdef USE_NAMED_GROUP
269 static UChar*
270 strdup_with_null(OnigEncoding enc, UChar* s, UChar* end)
271 {
272  ptrdiff_t slen;
273  int term_len, i;
274  UChar *r;
275 
276  slen = end - s;
277  term_len = ONIGENC_MBC_MINLEN(enc);
278 
279  r = (UChar* )xmalloc(slen + term_len);
281  xmemcpy(r, s, slen);
282 
283  for (i = 0; i < term_len; i++)
284  r[slen + i] = (UChar )0;
285 
286  return r;
287 }
288 #endif
289 
290 /* scan pattern methods */
291 #define PEND_VALUE 0
292 
293 #ifdef __GNUC__
294 /* get rid of Wunused-but-set-variable and Wuninitialized */
295 # define PFETCH_READY UChar* pfetch_prev = NULL; (void)pfetch_prev
296 #else
297 # define PFETCH_READY UChar* pfetch_prev
298 #endif
299 #define PEND (p < end ? 0 : 1)
300 #define PUNFETCH p = pfetch_prev
301 #define PINC do { \
302  pfetch_prev = p; \
303  p += enclen(enc, p, end); \
304 } while (0)
305 #define PFETCH(c) do { \
306  c = ((enc->max_enc_len == 1) ? *p : ONIGENC_MBC_TO_CODE(enc, p, end)); \
307  pfetch_prev = p; \
308  p += enclen(enc, p, end); \
309 } while (0)
310 
311 #define PINC_S do { \
312  p += enclen(enc, p, end); \
313 } while (0)
314 #define PFETCH_S(c) do { \
315  c = ((enc->max_enc_len == 1) ? *p : ONIGENC_MBC_TO_CODE(enc, p, end)); \
316  p += enclen(enc, p, end); \
317 } while (0)
318 
319 #define PPEEK (p < end ? ONIGENC_MBC_TO_CODE(enc, p, end) : PEND_VALUE)
320 #define PPEEK_IS(c) (PPEEK == (OnigCodePoint )c)
321 
322 static UChar*
323 strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end,
324  size_t capa)
325 {
326  UChar* r;
327 
328  if (dest)
329  r = (UChar* )xrealloc(dest, capa + 1);
330  else
331  r = (UChar* )xmalloc(capa + 1);
332 
334  onig_strcpy(r + (dest_end - dest), src, src_end);
335  return r;
336 }
337 
338 /* dest on static area */
339 static UChar*
340 strcat_capa_from_static(UChar* dest, UChar* dest_end,
341  const UChar* src, const UChar* src_end, size_t capa)
342 {
343  UChar* r;
344 
345  r = (UChar* )xmalloc(capa + 1);
347  onig_strcpy(r, dest, dest_end);
348  onig_strcpy(r + (dest_end - dest), src, src_end);
349  return r;
350 }
351 
352 
353 #ifdef USE_ST_LIBRARY
354 
355 # ifdef RUBY
356 # include "ruby/st.h"
357 # else
358 # include "st.h"
359 # endif
360 
361 typedef struct {
362  const UChar* s;
363  const UChar* end;
365 
366 static int
367 str_end_cmp(st_data_t xp, st_data_t yp)
368 {
369  const st_str_end_key *x, *y;
370  const UChar *p, *q;
371  int c;
372 
373  x = (const st_str_end_key *)xp;
374  y = (const st_str_end_key *)yp;
375  if ((x->end - x->s) != (y->end - y->s))
376  return 1;
377 
378  p = x->s;
379  q = y->s;
380  while (p < x->end) {
381  c = (int )*p - (int )*q;
382  if (c != 0) return c;
383 
384  p++; q++;
385  }
386 
387  return 0;
388 }
389 
390 static st_index_t
391 str_end_hash(st_data_t xp)
392 {
393  const st_str_end_key *x = (const st_str_end_key *)xp;
394  const UChar *p;
395  st_index_t val = 0;
396 
397  p = x->s;
398  while (p < x->end) {
399  val = val * 997 + (int )*p++;
400  }
401 
402  return val + (val >> 5);
403 }
404 
405 extern hash_table_type*
407 {
408  static const struct st_hash_type hashType = {
409  str_end_cmp,
410  str_end_hash,
411  };
412 
413  return (hash_table_type* )
414  onig_st_init_table_with_size(&hashType, size);
415 }
416 
417 extern int
419  const UChar* end_key, hash_data_type *value)
420 {
422 
423  key.s = (UChar* )str_key;
424  key.end = (UChar* )end_key;
425 
426  return onig_st_lookup(table, (st_data_t )(&key), value);
427 }
428 
429 extern int
431  const UChar* end_key, hash_data_type value)
432 {
434  int result;
435 
437  key->s = (UChar* )str_key;
438  key->end = (UChar* )end_key;
439  result = onig_st_insert(table, (st_data_t )key, value);
440  if (result) {
441  xfree(key);
442  }
443  return result;
444 }
445 
446 #endif /* USE_ST_LIBRARY */
447 
448 
449 #ifdef USE_NAMED_GROUP
450 
451 # define INIT_NAME_BACKREFS_ALLOC_NUM 8
452 
453 typedef struct {
455  size_t name_len; /* byte length */
456  int back_num; /* number of backrefs */
459  int* back_refs;
460 } NameEntry;
461 
462 # ifdef USE_ST_LIBRARY
463 
465 typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */
466 
467 # ifdef ONIG_DEBUG
468 static int
469 i_print_name_entry(UChar* key, NameEntry* e, void* arg)
470 {
471  int i;
472  FILE* fp = (FILE* )arg;
473 
474  fprintf(fp, "%s: ", e->name);
475  if (e->back_num == 0)
476  fputs("-", fp);
477  else if (e->back_num == 1)
478  fprintf(fp, "%d", e->back_ref1);
479  else {
480  for (i = 0; i < e->back_num; i++) {
481  if (i > 0) fprintf(fp, ", ");
482  fprintf(fp, "%d", e->back_refs[i]);
483  }
484  }
485  fputs("\n", fp);
486  return ST_CONTINUE;
487 }
488 
489 extern int
490 onig_print_names(FILE* fp, regex_t* reg)
491 {
492  NameTable* t = (NameTable* )reg->name_table;
493 
494  if (IS_NOT_NULL(t)) {
495  fprintf(fp, "name table\n");
496  onig_st_foreach(t, (st_foreach_callback_func *)i_print_name_entry, (HashDataType )fp);
497  fputs("\n", fp);
498  }
499  return 0;
500 }
501 # endif /* ONIG_DEBUG */
502 
503 static int
504 i_free_name_entry(UChar* key, NameEntry* e, void* arg ARG_UNUSED)
505 {
506  xfree(e->name);
507  if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
508  xfree(key);
509  xfree(e);
510  return ST_DELETE;
511 }
512 
513 static int
514 names_clear(regex_t* reg)
515 {
516  NameTable* t = (NameTable* )reg->name_table;
517 
518  if (IS_NOT_NULL(t)) {
519  onig_st_foreach(t, (st_foreach_callback_func *)i_free_name_entry, 0);
520  }
521  return 0;
522 }
523 
524 extern int
526 {
527  int r;
528  NameTable* t;
529 
530  r = names_clear(reg);
531  if (r) return r;
532 
533  t = (NameTable* )reg->name_table;
534  if (IS_NOT_NULL(t)) onig_st_free_table(t);
535  reg->name_table = (void* )NULL;
536  return 0;
537 }
538 
539 static NameEntry*
540 name_find(regex_t* reg, const UChar* name, const UChar* name_end)
541 {
542  NameEntry* e;
543  NameTable* t = (NameTable* )reg->name_table;
544 
545  e = (NameEntry* )NULL;
546  if (IS_NOT_NULL(t)) {
547  onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));
548  }
549  return e;
550 }
551 
552 typedef struct {
553  int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*);
555  void* arg;
556  int ret;
558 } INamesArg;
559 
560 static int
561 i_names(UChar* key ARG_UNUSED, NameEntry* e, INamesArg* arg)
562 {
563  int r = (*(arg->func))(e->name,
564  e->name + e->name_len,
565  e->back_num,
566  (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
567  arg->reg, arg->arg);
568  if (r != 0) {
569  arg->ret = r;
570  return ST_STOP;
571  }
572  return ST_CONTINUE;
573 }
574 
575 extern int
577  int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
578 {
579  INamesArg narg;
580  NameTable* t = (NameTable* )reg->name_table;
581 
582  narg.ret = 0;
583  if (IS_NOT_NULL(t)) {
584  narg.func = func;
585  narg.reg = reg;
586  narg.arg = arg;
587  narg.enc = reg->enc; /* should be pattern encoding. */
588  onig_st_foreach(t, (st_foreach_callback_func *)i_names, (HashDataType )&narg);
589  }
590  return narg.ret;
591 }
592 
593 static int
594 i_renumber_name(UChar* key ARG_UNUSED, NameEntry* e, GroupNumRemap* map)
595 {
596  int i;
597 
598  if (e->back_num > 1) {
599  for (i = 0; i < e->back_num; i++) {
600  e->back_refs[i] = map[e->back_refs[i]].new_val;
601  }
602  }
603  else if (e->back_num == 1) {
604  e->back_ref1 = map[e->back_ref1].new_val;
605  }
606 
607  return ST_CONTINUE;
608 }
609 
610 extern int
612 {
613  NameTable* t = (NameTable* )reg->name_table;
614 
615  if (IS_NOT_NULL(t)) {
616  onig_st_foreach(t, (st_foreach_callback_func *)i_renumber_name, (HashDataType )map);
617  }
618  return 0;
619 }
620 
621 
622 extern int
624 {
625  NameTable* t = (NameTable* )reg->name_table;
626 
627  if (IS_NOT_NULL(t))
628  return (int )t->num_entries;
629  else
630  return 0;
631 }
632 
633 # else /* USE_ST_LIBRARY */
634 
635 # define INIT_NAMES_ALLOC_NUM 8
636 
637 typedef struct {
638  NameEntry* e;
639  int num;
640  int alloc;
641 } NameTable;
642 
643 # ifdef ONIG_DEBUG
644 extern int
645 onig_print_names(FILE* fp, regex_t* reg)
646 {
647  int i, j;
648  NameEntry* e;
649  NameTable* t = (NameTable* )reg->name_table;
650 
651  if (IS_NOT_NULL(t) && t->num > 0) {
652  fprintf(fp, "name table\n");
653  for (i = 0; i < t->num; i++) {
654  e = &(t->e[i]);
655  fprintf(fp, "%s: ", e->name);
656  if (e->back_num == 0) {
657  fputs("-", fp);
658  }
659  else if (e->back_num == 1) {
660  fprintf(fp, "%d", e->back_ref1);
661  }
662  else {
663  for (j = 0; j < e->back_num; j++) {
664  if (j > 0) fprintf(fp, ", ");
665  fprintf(fp, "%d", e->back_refs[j]);
666  }
667  }
668  fputs("\n", fp);
669  }
670  fputs("\n", fp);
671  }
672  return 0;
673 }
674 # endif
675 
676 static int
677 names_clear(regex_t* reg)
678 {
679  int i;
680  NameEntry* e;
681  NameTable* t = (NameTable* )reg->name_table;
682 
683  if (IS_NOT_NULL(t)) {
684  for (i = 0; i < t->num; i++) {
685  e = &(t->e[i]);
686  if (IS_NOT_NULL(e->name)) {
687  xfree(e->name);
688  e->name = NULL;
689  e->name_len = 0;
690  e->back_num = 0;
691  e->back_alloc = 0;
692  if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
693  e->back_refs = (int* )NULL;
694  }
695  }
696  if (IS_NOT_NULL(t->e)) {
697  xfree(t->e);
698  t->e = NULL;
699  }
700  t->num = 0;
701  }
702  return 0;
703 }
704 
705 extern int
707 {
708  int r;
709  NameTable* t;
710 
711  r = names_clear(reg);
712  if (r) return r;
713 
714  t = (NameTable* )reg->name_table;
715  if (IS_NOT_NULL(t)) xfree(t);
716  reg->name_table = NULL;
717  return 0;
718 }
719 
720 static NameEntry*
721 name_find(regex_t* reg, const UChar* name, const UChar* name_end)
722 {
723  int i, len;
724  NameEntry* e;
725  NameTable* t = (NameTable* )reg->name_table;
726 
727  if (IS_NOT_NULL(t)) {
728  len = name_end - name;
729  for (i = 0; i < t->num; i++) {
730  e = &(t->e[i]);
731  if (len == e->name_len && onig_strncmp(name, e->name, len) == 0)
732  return e;
733  }
734  }
735  return (NameEntry* )NULL;
736 }
737 
738 extern int
740  int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
741 {
742  int i, r;
743  NameEntry* e;
744  NameTable* t = (NameTable* )reg->name_table;
745 
746  if (IS_NOT_NULL(t)) {
747  for (i = 0; i < t->num; i++) {
748  e = &(t->e[i]);
749  r = (*func)(e->name, e->name + e->name_len, e->back_num,
750  (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
751  reg, arg);
752  if (r != 0) return r;
753  }
754  }
755  return 0;
756 }
757 
758 extern int
759 onig_number_of_names(const regex_t* reg)
760 {
761  NameTable* t = (NameTable* )reg->name_table;
762 
763  if (IS_NOT_NULL(t))
764  return t->num;
765  else
766  return 0;
767 }
768 
769 # endif /* else USE_ST_LIBRARY */
770 
771 static int
772 name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)
773 {
774  int alloc;
775  NameEntry* e;
776  NameTable* t = (NameTable* )reg->name_table;
777 
778  if (name_end - name <= 0)
780 
781  e = name_find(reg, name, name_end);
782  if (IS_NULL(e)) {
783 # ifdef USE_ST_LIBRARY
784  if (IS_NULL(t)) {
786  reg->name_table = (void* )t;
787  }
788  e = (NameEntry* )xmalloc(sizeof(NameEntry));
790 
791  e->name = strdup_with_null(reg->enc, name, name_end);
792  if (IS_NULL(e->name)) {
793  xfree(e);
794  return ONIGERR_MEMORY;
795  }
796  onig_st_insert_strend(t, e->name, (e->name + (name_end - name)),
797  (HashDataType )e);
798 
799  e->name_len = name_end - name;
800  e->back_num = 0;
801  e->back_alloc = 0;
802  e->back_refs = (int* )NULL;
803 
804 # else
805 
806  if (IS_NULL(t)) {
807  alloc = INIT_NAMES_ALLOC_NUM;
808  t = (NameTable* )xmalloc(sizeof(NameTable));
810  t->e = NULL;
811  t->alloc = 0;
812  t->num = 0;
813 
814  t->e = (NameEntry* )xmalloc(sizeof(NameEntry) * alloc);
815  if (IS_NULL(t->e)) {
816  xfree(t);
817  return ONIGERR_MEMORY;
818  }
819  t->alloc = alloc;
820  reg->name_table = t;
821  goto clear;
822  }
823  else if (t->num == t->alloc) {
824  int i;
825  NameEntry* p;
826 
827  alloc = t->alloc * 2;
828  p = (NameEntry* )xrealloc(t->e, sizeof(NameEntry) * alloc);
830  t->e = p;
831  t->alloc = alloc;
832 
833  clear:
834  for (i = t->num; i < t->alloc; i++) {
835  t->e[i].name = NULL;
836  t->e[i].name_len = 0;
837  t->e[i].back_num = 0;
838  t->e[i].back_alloc = 0;
839  t->e[i].back_refs = (int* )NULL;
840  }
841  }
842  e = &(t->e[t->num]);
843  t->num++;
844  e->name = strdup_with_null(reg->enc, name, name_end);
845  if (IS_NULL(e->name)) return ONIGERR_MEMORY;
846  e->name_len = name_end - name;
847 # endif
848  }
849 
850  if (e->back_num >= 1 &&
853  name, name_end);
855  }
856 
857  e->back_num++;
858  if (e->back_num == 1) {
859  e->back_ref1 = backref;
860  }
861  else {
862  if (e->back_num == 2) {
864  e->back_refs = (int* )xmalloc(sizeof(int) * alloc);
866  e->back_alloc = alloc;
867  e->back_refs[0] = e->back_ref1;
868  e->back_refs[1] = backref;
869  }
870  else {
871  if (e->back_num > e->back_alloc) {
872  int* p;
873  alloc = e->back_alloc * 2;
874  p = (int* )xrealloc(e->back_refs, sizeof(int) * alloc);
876  e->back_refs = p;
877  e->back_alloc = alloc;
878  }
879  e->back_refs[e->back_num - 1] = backref;
880  }
881  }
882 
883  return 0;
884 }
885 
886 extern int
888  const UChar* name_end, int** nums)
889 {
890  NameEntry* e = name_find(reg, name, name_end);
891 
893 
894  switch (e->back_num) {
895  case 0:
896  *nums = 0;
897  break;
898  case 1:
899  *nums = &(e->back_ref1);
900  break;
901  default:
902  *nums = e->back_refs;
903  break;
904  }
905  return e->back_num;
906 }
907 
908 extern int
910  const UChar* name_end, const OnigRegion *region)
911 {
912  int i, n, *nums;
913 
914  n = onig_name_to_group_numbers(reg, name, name_end, &nums);
915  if (n < 0)
916  return n;
917  else if (n == 0)
918  return ONIGERR_PARSER_BUG;
919  else if (n == 1)
920  return nums[0];
921  else {
922  if (IS_NOT_NULL(region)) {
923  for (i = n - 1; i >= 0; i--) {
924  if (region->beg[nums[i]] != ONIG_REGION_NOTPOS)
925  return nums[i];
926  }
927  }
928  return nums[n - 1];
929  }
930 }
931 
932 #else /* USE_NAMED_GROUP */
933 
934 extern int
936  const UChar* name_end, int** nums)
937 {
938  return ONIG_NO_SUPPORT_CONFIG;
939 }
940 
941 extern int
943  const UChar* name_end, const OnigRegion* region)
944 {
945  return ONIG_NO_SUPPORT_CONFIG;
946 }
947 
948 extern int
950  int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
951 {
952  return ONIG_NO_SUPPORT_CONFIG;
953 }
954 
955 extern int
956 onig_number_of_names(const regex_t* reg)
957 {
958  return 0;
959 }
960 #endif /* else USE_NAMED_GROUP */
961 
962 extern int
964 {
966  return 0;
967 
968 #ifdef USE_NAMED_GROUP
969  if (onig_number_of_names(reg) > 0 &&
972  return 0;
973  }
974 #endif
975 
976  return 1;
977 }
978 
979 
980 #define INIT_SCANENV_MEMNODES_ALLOC_SIZE 16
981 
982 static void
983 scan_env_clear(ScanEnv* env)
984 {
985  int i;
986 
987  BIT_STATUS_CLEAR(env->capture_history);
988  BIT_STATUS_CLEAR(env->bt_mem_start);
989  BIT_STATUS_CLEAR(env->bt_mem_end);
990  BIT_STATUS_CLEAR(env->backrefed_mem);
991  env->error = (UChar* )NULL;
992  env->error_end = (UChar* )NULL;
993  env->num_call = 0;
994  env->num_mem = 0;
995 #ifdef USE_NAMED_GROUP
996  env->num_named = 0;
997 #endif
998  env->mem_alloc = 0;
999  env->mem_nodes_dynamic = (Node** )NULL;
1000 
1001  for (i = 0; i < SCANENV_MEMNODES_SIZE; i++)
1002  env->mem_nodes_static[i] = NULL_NODE;
1003 
1004 #ifdef USE_COMBINATION_EXPLOSION_CHECK
1005  env->num_comb_exp_check = 0;
1006  env->comb_exp_max_regnum = 0;
1007  env->curr_max_regnum = 0;
1008  env->has_recursion = 0;
1009 #endif
1010  env->parse_depth = 0;
1011  env->warnings_flag = 0;
1012 }
1013 
1014 static int
1015 scan_env_add_mem_entry(ScanEnv* env)
1016 {
1017  int i, need, alloc;
1018  Node** p;
1019 
1020  need = env->num_mem + 1;
1021  if (need > ONIG_MAX_CAPTURE_GROUP_NUM)
1023  if (need >= SCANENV_MEMNODES_SIZE) {
1024  if (env->mem_alloc <= need) {
1025  if (IS_NULL(env->mem_nodes_dynamic)) {
1027  p = (Node** )xmalloc(sizeof(Node*) * alloc);
1029  xmemcpy(p, env->mem_nodes_static,
1030  sizeof(Node*) * SCANENV_MEMNODES_SIZE);
1031  }
1032  else {
1033  alloc = env->mem_alloc * 2;
1034  p = (Node** )xrealloc(env->mem_nodes_dynamic, sizeof(Node*) * alloc);
1036  }
1037 
1038  for (i = env->num_mem + 1; i < alloc; i++)
1039  p[i] = NULL_NODE;
1040 
1041  env->mem_nodes_dynamic = p;
1042  env->mem_alloc = alloc;
1043  }
1044  }
1045 
1046  env->num_mem++;
1047  return env->num_mem;
1048 }
1049 
1050 static int
1051 scan_env_set_mem_node(ScanEnv* env, int num, Node* node)
1052 {
1053  if (env->num_mem >= num)
1054  SCANENV_MEM_NODES(env)[num] = node;
1055  else
1056  return ONIGERR_PARSER_BUG;
1057  return 0;
1058 }
1059 
1060 
1061 extern void
1063 {
1064  start:
1065  if (IS_NULL(node)) return ;
1066 
1067  switch (NTYPE(node)) {
1068  case NT_STR:
1069  if (NSTR(node)->capa != 0 &&
1070  IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) {
1071  xfree(NSTR(node)->s);
1072  }
1073  break;
1074 
1075  case NT_LIST:
1076  case NT_ALT:
1077  onig_node_free(NCAR(node));
1078  {
1079  Node* next_node = NCDR(node);
1080 
1081  xfree(node);
1082  node = next_node;
1083  goto start;
1084  }
1085  break;
1086 
1087  case NT_CCLASS:
1088  {
1089  CClassNode* cc = NCCLASS(node);
1090 
1091  if (cc->mbuf)
1092  bbuf_free(cc->mbuf);
1093  }
1094  break;
1095 
1096  case NT_QTFR:
1097  if (NQTFR(node)->target)
1098  onig_node_free(NQTFR(node)->target);
1099  break;
1100 
1101  case NT_ENCLOSE:
1102  if (NENCLOSE(node)->target)
1103  onig_node_free(NENCLOSE(node)->target);
1104  break;
1105 
1106  case NT_BREF:
1107  if (IS_NOT_NULL(NBREF(node)->back_dynamic))
1108  xfree(NBREF(node)->back_dynamic);
1109  break;
1110 
1111  case NT_ANCHOR:
1112  if (NANCHOR(node)->target)
1113  onig_node_free(NANCHOR(node)->target);
1114  break;
1115  }
1116 
1117  xfree(node);
1118 }
1119 
1120 static Node*
1121 node_new(void)
1122 {
1123  Node* node;
1124 
1125  node = (Node* )xmalloc(sizeof(Node));
1126  /* xmemset(node, 0, sizeof(Node)); */
1127  return node;
1128 }
1129 
1130 static void
1131 initialize_cclass(CClassNode* cc)
1132 {
1133  BITSET_CLEAR(cc->bs);
1134  /* cc->base.flags = 0; */
1135  cc->flags = 0;
1136  cc->mbuf = NULL;
1137 }
1138 
1139 static Node*
1140 node_new_cclass(void)
1141 {
1142  Node* node = node_new();
1143  CHECK_NULL_RETURN(node);
1144 
1145  SET_NTYPE(node, NT_CCLASS);
1146  initialize_cclass(NCCLASS(node));
1147  return node;
1148 }
1149 
1150 static Node*
1151 node_new_ctype(int type, int not, int ascii_range)
1152 {
1153  Node* node = node_new();
1154  CHECK_NULL_RETURN(node);
1155 
1156  SET_NTYPE(node, NT_CTYPE);
1157  NCTYPE(node)->ctype = type;
1158  NCTYPE(node)->not = not;
1159  NCTYPE(node)->ascii_range = ascii_range;
1160  return node;
1161 }
1162 
1163 static Node*
1164 node_new_anychar(void)
1165 {
1166  Node* node = node_new();
1167  CHECK_NULL_RETURN(node);
1168 
1169  SET_NTYPE(node, NT_CANY);
1170  return node;
1171 }
1172 
1173 static Node*
1174 node_new_list(Node* left, Node* right)
1175 {
1176  Node* node = node_new();
1177  CHECK_NULL_RETURN(node);
1178 
1179  SET_NTYPE(node, NT_LIST);
1180  NCAR(node) = left;
1181  NCDR(node) = right;
1182  return node;
1183 }
1184 
1185 extern Node*
1187 {
1188  return node_new_list(left, right);
1189 }
1190 
1191 extern Node*
1193 {
1194  Node *n;
1195 
1196  n = onig_node_new_list(x, NULL);
1197  if (IS_NULL(n)) return NULL_NODE;
1198 
1199  if (IS_NOT_NULL(list)) {
1200  while (IS_NOT_NULL(NCDR(list)))
1201  list = NCDR(list);
1202 
1203  NCDR(list) = n;
1204  }
1205 
1206  return n;
1207 }
1208 
1209 extern Node*
1211 {
1212  Node* node = node_new();
1213  CHECK_NULL_RETURN(node);
1214 
1215  SET_NTYPE(node, NT_ALT);
1216  NCAR(node) = left;
1217  NCDR(node) = right;
1218  return node;
1219 }
1220 
1221 extern Node*
1223 {
1224  Node* node = node_new();
1225  CHECK_NULL_RETURN(node);
1226 
1227  SET_NTYPE(node, NT_ANCHOR);
1228  NANCHOR(node)->type = type;
1229  NANCHOR(node)->target = NULL;
1230  NANCHOR(node)->char_len = -1;
1231  NANCHOR(node)->ascii_range = 0;
1232  return node;
1233 }
1234 
1235 static Node*
1236 node_new_backref(int back_num, int* backrefs, int by_name,
1238  int exist_level, int nest_level,
1239 #endif
1240  ScanEnv* env)
1241 {
1242  int i;
1243  Node* node = node_new();
1244 
1245  CHECK_NULL_RETURN(node);
1246 
1247  SET_NTYPE(node, NT_BREF);
1248  NBREF(node)->state = 0;
1249  NBREF(node)->back_num = back_num;
1250  NBREF(node)->back_dynamic = (int* )NULL;
1251  if (by_name != 0)
1252  NBREF(node)->state |= NST_NAME_REF;
1253 
1254 #ifdef USE_BACKREF_WITH_LEVEL
1255  if (exist_level != 0) {
1256  NBREF(node)->state |= NST_NEST_LEVEL;
1257  NBREF(node)->nest_level = nest_level;
1258  }
1259 #endif
1260 
1261  for (i = 0; i < back_num; i++) {
1262  if (backrefs[i] <= env->num_mem &&
1263  IS_NULL(SCANENV_MEM_NODES(env)[backrefs[i]])) {
1264  NBREF(node)->state |= NST_RECURSION; /* /...(\1).../ */
1265  break;
1266  }
1267  }
1268 
1269  if (back_num <= NODE_BACKREFS_SIZE) {
1270  for (i = 0; i < back_num; i++)
1271  NBREF(node)->back_static[i] = backrefs[i];
1272  }
1273  else {
1274  int* p = (int* )xmalloc(sizeof(int) * back_num);
1275  if (IS_NULL(p)) {
1276  onig_node_free(node);
1277  return NULL;
1278  }
1279  NBREF(node)->back_dynamic = p;
1280  for (i = 0; i < back_num; i++)
1281  p[i] = backrefs[i];
1282  }
1283  return node;
1284 }
1285 
1286 #ifdef USE_SUBEXP_CALL
1287 static Node*
1288 node_new_call(UChar* name, UChar* name_end, int gnum)
1289 {
1290  Node* node = node_new();
1291  CHECK_NULL_RETURN(node);
1292 
1293  SET_NTYPE(node, NT_CALL);
1294  NCALL(node)->state = 0;
1295  NCALL(node)->target = NULL_NODE;
1296  NCALL(node)->name = name;
1297  NCALL(node)->name_end = name_end;
1298  NCALL(node)->group_num = gnum; /* call by number if gnum != 0 */
1299  return node;
1300 }
1301 #endif
1302 
1303 static Node*
1304 node_new_quantifier(int lower, int upper, int by_number)
1305 {
1306  Node* node = node_new();
1307  CHECK_NULL_RETURN(node);
1308 
1309  SET_NTYPE(node, NT_QTFR);
1310  NQTFR(node)->state = 0;
1311  NQTFR(node)->target = NULL;
1312  NQTFR(node)->lower = lower;
1313  NQTFR(node)->upper = upper;
1314  NQTFR(node)->greedy = 1;
1315  NQTFR(node)->target_empty_info = NQ_TARGET_ISNOT_EMPTY;
1316  NQTFR(node)->head_exact = NULL_NODE;
1317  NQTFR(node)->next_head_exact = NULL_NODE;
1318  NQTFR(node)->is_referred = 0;
1319  if (by_number != 0)
1320  NQTFR(node)->state |= NST_BY_NUMBER;
1321 
1322 #ifdef USE_COMBINATION_EXPLOSION_CHECK
1323  NQTFR(node)->comb_exp_check_num = 0;
1324 #endif
1325 
1326  return node;
1327 }
1328 
1329 static Node*
1330 node_new_enclose(int type)
1331 {
1332  Node* node = node_new();
1333  CHECK_NULL_RETURN(node);
1334 
1335  SET_NTYPE(node, NT_ENCLOSE);
1336  NENCLOSE(node)->type = type;
1337  NENCLOSE(node)->state = 0;
1338  NENCLOSE(node)->regnum = 0;
1339  NENCLOSE(node)->option = 0;
1340  NENCLOSE(node)->target = NULL;
1341  NENCLOSE(node)->call_addr = -1;
1342  NENCLOSE(node)->opt_count = 0;
1343  return node;
1344 }
1345 
1346 extern Node*
1348 {
1349  return node_new_enclose(type);
1350 }
1351 
1352 static Node*
1353 node_new_enclose_memory(OnigOptionType option, int is_named)
1354 {
1355  Node* node = node_new_enclose(ENCLOSE_MEMORY);
1356  CHECK_NULL_RETURN(node);
1357  if (is_named != 0)
1359 
1360 #ifdef USE_SUBEXP_CALL
1361  NENCLOSE(node)->option = option;
1362 #endif
1363  return node;
1364 }
1365 
1366 static Node*
1367 node_new_option(OnigOptionType option)
1368 {
1369  Node* node = node_new_enclose(ENCLOSE_OPTION);
1370  CHECK_NULL_RETURN(node);
1371  NENCLOSE(node)->option = option;
1372  return node;
1373 }
1374 
1375 extern int
1376 onig_node_str_cat(Node* node, const UChar* s, const UChar* end)
1377 {
1378  ptrdiff_t addlen = end - s;
1379 
1380  if (addlen > 0) {
1381  ptrdiff_t len = NSTR(node)->end - NSTR(node)->s;
1382 
1383  if (NSTR(node)->capa > 0 || (len + addlen > NODE_STR_BUF_SIZE - 1)) {
1384  UChar* p;
1385  ptrdiff_t capa = len + addlen + NODE_STR_MARGIN;
1386 
1387  if (capa <= NSTR(node)->capa) {
1388  onig_strcpy(NSTR(node)->s + len, s, end);
1389  }
1390  else {
1391  if (NSTR(node)->s == NSTR(node)->buf)
1392  p = strcat_capa_from_static(NSTR(node)->s, NSTR(node)->end,
1393  s, end, capa);
1394  else
1395  p = strcat_capa(NSTR(node)->s, NSTR(node)->end, s, end, capa);
1396 
1398  NSTR(node)->s = p;
1399  NSTR(node)->capa = (int )capa;
1400  }
1401  }
1402  else {
1403  onig_strcpy(NSTR(node)->s + len, s, end);
1404  }
1405  NSTR(node)->end = NSTR(node)->s + len + addlen;
1406  }
1407 
1408  return 0;
1409 }
1410 
1411 extern int
1412 onig_node_str_set(Node* node, const UChar* s, const UChar* end)
1413 {
1414  onig_node_str_clear(node);
1415  return onig_node_str_cat(node, s, end);
1416 }
1417 
1418 static int
1419 node_str_cat_char(Node* node, UChar c)
1420 {
1421  UChar s[1];
1422 
1423  s[0] = c;
1424  return onig_node_str_cat(node, s, s + 1);
1425 }
1426 
1427 static int
1428 node_str_cat_codepoint(Node* node, OnigEncoding enc, OnigCodePoint c)
1429 {
1431  int num = ONIGENC_CODE_TO_MBC(enc, c, buf);
1432  if (num < 0) return num;
1433  return onig_node_str_cat(node, buf, buf + num);
1434 }
1435 
1436 #if 0
1437 extern void
1438 onig_node_conv_to_str_node(Node* node, int flag)
1439 {
1440  SET_NTYPE(node, NT_STR);
1441  NSTR(node)->flag = flag;
1442  NSTR(node)->capa = 0;
1443  NSTR(node)->s = NSTR(node)->buf;
1444  NSTR(node)->end = NSTR(node)->buf;
1445 }
1446 #endif
1447 
1448 extern void
1450 {
1451  if (NSTR(node)->capa != 0 &&
1452  IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) {
1453  xfree(NSTR(node)->s);
1454  }
1455 
1456  NSTR(node)->capa = 0;
1457  NSTR(node)->flag = 0;
1458  NSTR(node)->s = NSTR(node)->buf;
1459  NSTR(node)->end = NSTR(node)->buf;
1460 }
1461 
1462 static Node*
1463 node_new_str(const UChar* s, const UChar* end)
1464 {
1465  Node* node = node_new();
1466  CHECK_NULL_RETURN(node);
1467 
1468  SET_NTYPE(node, NT_STR);
1469  NSTR(node)->capa = 0;
1470  NSTR(node)->flag = 0;
1471  NSTR(node)->s = NSTR(node)->buf;
1472  NSTR(node)->end = NSTR(node)->buf;
1473  if (onig_node_str_cat(node, s, end)) {
1474  onig_node_free(node);
1475  return NULL;
1476  }
1477  return node;
1478 }
1479 
1480 extern Node*
1481 onig_node_new_str(const UChar* s, const UChar* end)
1482 {
1483  return node_new_str(s, end);
1484 }
1485 
1486 static Node*
1487 node_new_str_raw(UChar* s, UChar* end)
1488 {
1489  Node* node = node_new_str(s, end);
1490  if (IS_NOT_NULL(node))
1491  NSTRING_SET_RAW(node);
1492  return node;
1493 }
1494 
1495 static Node*
1496 node_new_empty(void)
1497 {
1498  return node_new_str(NULL, NULL);
1499 }
1500 
1501 static Node*
1502 node_new_str_raw_char(UChar c)
1503 {
1504  UChar p[1];
1505 
1506  p[0] = c;
1507  return node_new_str_raw(p, p + 1);
1508 }
1509 
1510 static Node*
1511 str_node_split_last_char(StrNode* sn, OnigEncoding enc)
1512 {
1513  const UChar *p;
1514  Node* n = NULL_NODE;
1515 
1516  if (sn->end > sn->s) {
1517  p = onigenc_get_prev_char_head(enc, sn->s, sn->end, sn->end);
1518  if (p && p > sn->s) { /* can be split. */
1519  n = node_new_str(p, sn->end);
1520  if (IS_NOT_NULL(n) && (sn->flag & NSTR_RAW) != 0)
1521  NSTRING_SET_RAW(n);
1522  sn->end = (UChar* )p;
1523  }
1524  }
1525  return n;
1526 }
1527 
1528 static int
1529 str_node_can_be_split(StrNode* sn, OnigEncoding enc)
1530 {
1531  if (sn->end > sn->s) {
1532  return ((enclen(enc, sn->s, sn->end) < sn->end - sn->s) ? 1 : 0);
1533  }
1534  return 0;
1535 }
1536 
1537 #ifdef USE_PAD_TO_SHORT_BYTE_CHAR
1538 static int
1539 node_str_head_pad(StrNode* sn, int num, UChar val)
1540 {
1542  int i, len;
1543 
1544  len = sn->end - sn->s;
1545  onig_strcpy(buf, sn->s, sn->end);
1546  onig_strcpy(&(sn->s[num]), buf, buf + len);
1547  sn->end += num;
1548 
1549  for (i = 0; i < num; i++) {
1550  sn->s[i] = val;
1551  }
1552 }
1553 #endif
1554 
1555 extern int
1557 {
1558  unsigned int num, val;
1559  OnigCodePoint c;
1560  UChar* p = *src;
1561  PFETCH_READY;
1562 
1563  num = 0;
1564  while (!PEND) {
1565  PFETCH(c);
1566  if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
1567  val = (unsigned int )DIGITVAL(c);
1568  if ((INT_MAX_LIMIT - val) / 10UL < num)
1569  return -1; /* overflow */
1570 
1571  num = num * 10 + val;
1572  }
1573  else {
1574  PUNFETCH;
1575  break;
1576  }
1577  }
1578  *src = p;
1579  return num;
1580 }
1581 
1582 static int
1583 scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int minlen,
1584  int maxlen, OnigEncoding enc)
1585 {
1586  OnigCodePoint c;
1587  unsigned int num, val;
1588  int restlen;
1589  UChar* p = *src;
1590  PFETCH_READY;
1591 
1592  restlen = maxlen - minlen;
1593  num = 0;
1594  while (!PEND && maxlen-- != 0) {
1595  PFETCH(c);
1596  if (ONIGENC_IS_CODE_XDIGIT(enc, c)) {
1597  val = (unsigned int )XDIGITVAL(enc,c);
1598  if ((INT_MAX_LIMIT - val) / 16UL < num)
1599  return -1; /* overflow */
1600 
1601  num = (num << 4) + XDIGITVAL(enc,c);
1602  }
1603  else {
1604  PUNFETCH;
1605  maxlen++;
1606  break;
1607  }
1608  }
1609  if (maxlen > restlen)
1610  return -2; /* not enough digits */
1611  *src = p;
1612  return num;
1613 }
1614 
1615 static int
1616 scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen,
1617  OnigEncoding enc)
1618 {
1619  OnigCodePoint c;
1620  unsigned int num, val;
1621  UChar* p = *src;
1622  PFETCH_READY;
1623 
1624  num = 0;
1625  while (!PEND && maxlen-- != 0) {
1626  PFETCH(c);
1627  if (ONIGENC_IS_CODE_DIGIT(enc, c) && c < '8') {
1628  val = ODIGITVAL(c);
1629  if ((INT_MAX_LIMIT - val) / 8UL < num)
1630  return -1; /* overflow */
1631 
1632  num = (num << 3) + val;
1633  }
1634  else {
1635  PUNFETCH;
1636  break;
1637  }
1638  }
1639  *src = p;
1640  return num;
1641 }
1642 
1643 
1644 #define BBUF_WRITE_CODE_POINT(bbuf,pos,code) \
1645  BBUF_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT)
1646 
1647 /* data format:
1648  [n][from-1][to-1][from-2][to-2] ... [from-n][to-n]
1649  (all data size is OnigCodePoint)
1650  */
1651 static int
1652 new_code_range(BBuf** pbuf)
1653 {
1654 #define INIT_MULTI_BYTE_RANGE_SIZE (SIZE_CODE_POINT * 5)
1655  int r;
1656  OnigCodePoint n;
1657  BBuf* bbuf;
1658 
1659  bbuf = *pbuf = (BBuf* )xmalloc(sizeof(BBuf));
1660  CHECK_NULL_RETURN_MEMERR(*pbuf);
1662  if (r) return r;
1663 
1664  n = 0;
1665  BBUF_WRITE_CODE_POINT(bbuf, 0, n);
1666  return 0;
1667 }
1668 
1669 static int
1670 add_code_range_to_buf0(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to,
1671  int checkdup)
1672 {
1673  int r, inc_n, pos;
1674  OnigCodePoint low, high, bound, x;
1675  OnigCodePoint n, *data;
1676  BBuf* bbuf;
1677 
1678  if (from > to) {
1679  n = from; from = to; to = n;
1680  }
1681 
1682  if (IS_NULL(*pbuf)) {
1683  r = new_code_range(pbuf);
1684  if (r) return r;
1685  bbuf = *pbuf;
1686  n = 0;
1687  }
1688  else {
1689  bbuf = *pbuf;
1690  GET_CODE_POINT(n, bbuf->p);
1691  }
1692  data = (OnigCodePoint* )(bbuf->p);
1693  data++;
1694 
1695  bound = (from == 0) ? 0 : n;
1696  for (low = 0; low < bound; ) {
1697  x = (low + bound) >> 1;
1698  if (from - 1 > data[x*2 + 1])
1699  low = x + 1;
1700  else
1701  bound = x;
1702  }
1703 
1704  high = (to == ONIG_LAST_CODE_POINT) ? n : low;
1705  for (bound = n; high < bound; ) {
1706  x = (high + bound) >> 1;
1707  if (to + 1 >= data[x*2])
1708  high = x + 1;
1709  else
1710  bound = x;
1711  }
1712  /* data[(low-1)*2+1] << from <= data[low*2]
1713  * data[(high-1)*2+1] <= to << data[high*2]
1714  */
1715 
1716  inc_n = low + 1 - high;
1717  if (n + inc_n > ONIG_MAX_MULTI_BYTE_RANGES_NUM)
1719 
1720  if (inc_n != 1) {
1721  if (checkdup && from <= data[low*2+1]
1722  && (data[low*2] <= from || data[low*2+1] <= to))
1723  CC_DUP_WARN(env, from, to);
1724  if (from > data[low*2])
1725  from = data[low*2];
1726  if (to < data[(high - 1)*2 + 1])
1727  to = data[(high - 1)*2 + 1];
1728  }
1729 
1730  if (inc_n != 0) {
1731  int from_pos = SIZE_CODE_POINT * (1 + high * 2);
1732  int to_pos = SIZE_CODE_POINT * (1 + (low + 1) * 2);
1733 
1734  if (inc_n > 0) {
1735  if (high < n) {
1736  int size = (n - high) * 2 * SIZE_CODE_POINT;
1737  BBUF_MOVE_RIGHT(bbuf, from_pos, to_pos, size);
1738  }
1739  }
1740  else {
1741  BBUF_MOVE_LEFT_REDUCE(bbuf, from_pos, to_pos);
1742  }
1743  }
1744 
1745  pos = SIZE_CODE_POINT * (1 + low * 2);
1746  BBUF_ENSURE_SIZE(bbuf, pos + SIZE_CODE_POINT * 2);
1747  BBUF_WRITE_CODE_POINT(bbuf, pos, from);
1748  BBUF_WRITE_CODE_POINT(bbuf, pos + SIZE_CODE_POINT, to);
1749  n += inc_n;
1750  BBUF_WRITE_CODE_POINT(bbuf, 0, n);
1751 
1752  return 0;
1753 }
1754 
1755 static int
1756 add_code_range_to_buf(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to)
1757 {
1758  return add_code_range_to_buf0(pbuf, env, from, to, 1);
1759 }
1760 
1761 static int
1762 add_code_range0(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to, int checkdup)
1763 {
1764  if (from > to) {
1766  return 0;
1767  else
1769  }
1770 
1771  return add_code_range_to_buf0(pbuf, env, from, to, checkdup);
1772 }
1773 
1774 static int
1775 add_code_range(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to)
1776 {
1777  return add_code_range0(pbuf, env, from, to, 1);
1778 }
1779 
1780 static int
1781 not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf, ScanEnv* env)
1782 {
1783  int r, i, n;
1784  OnigCodePoint pre, from, *data, to = 0;
1785 
1786  *pbuf = (BBuf* )NULL;
1787  if (IS_NULL(bbuf)) {
1788  set_all:
1789  return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
1790  }
1791 
1792  data = (OnigCodePoint* )(bbuf->p);
1793  GET_CODE_POINT(n, data);
1794  data++;
1795  if (n <= 0) goto set_all;
1796 
1797  r = 0;
1798  pre = MBCODE_START_POS(enc);
1799  for (i = 0; i < n; i++) {
1800  from = data[i*2];
1801  to = data[i*2+1];
1802  if (pre <= from - 1) {
1803  r = add_code_range_to_buf(pbuf, env, pre, from - 1);
1804  if (r != 0) return r;
1805  }
1806  if (to == ONIG_LAST_CODE_POINT) break;
1807  pre = to + 1;
1808  }
1809  if (to < ONIG_LAST_CODE_POINT) {
1810  r = add_code_range_to_buf(pbuf, env, to + 1, ONIG_LAST_CODE_POINT);
1811  }
1812  return r;
1813 }
1814 
1815 #define SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2) do {\
1816  BBuf *tbuf; \
1817  int tnot; \
1818  tnot = not1; not1 = not2; not2 = tnot; \
1819  tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; \
1820 } while (0)
1821 
1822 static int
1823 or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1,
1824  BBuf* bbuf2, int not2, BBuf** pbuf, ScanEnv* env)
1825 {
1826  int r;
1827  OnigCodePoint i, n1, *data1;
1828  OnigCodePoint from, to;
1829 
1830  *pbuf = (BBuf* )NULL;
1831  if (IS_NULL(bbuf1) && IS_NULL(bbuf2)) {
1832  if (not1 != 0 || not2 != 0)
1833  return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
1834  return 0;
1835  }
1836 
1837  r = 0;
1838  if (IS_NULL(bbuf2))
1839  SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
1840 
1841  if (IS_NULL(bbuf1)) {
1842  if (not1 != 0) {
1843  return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
1844  }
1845  else {
1846  if (not2 == 0) {
1847  return bbuf_clone(pbuf, bbuf2);
1848  }
1849  else {
1850  return not_code_range_buf(enc, bbuf2, pbuf, env);
1851  }
1852  }
1853  }
1854 
1855  if (not1 != 0)
1856  SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
1857 
1858  data1 = (OnigCodePoint* )(bbuf1->p);
1859  GET_CODE_POINT(n1, data1);
1860  data1++;
1861 
1862  if (not2 == 0 && not1 == 0) { /* 1 OR 2 */
1863  r = bbuf_clone(pbuf, bbuf2);
1864  }
1865  else if (not1 == 0) { /* 1 OR (not 2) */
1866  r = not_code_range_buf(enc, bbuf2, pbuf, env);
1867  }
1868  if (r != 0) return r;
1869 
1870  for (i = 0; i < n1; i++) {
1871  from = data1[i*2];
1872  to = data1[i*2+1];
1873  r = add_code_range_to_buf(pbuf, env, from, to);
1874  if (r != 0) return r;
1875  }
1876  return 0;
1877 }
1878 
1879 static int
1880 and_code_range1(BBuf** pbuf, ScanEnv* env, OnigCodePoint from1, OnigCodePoint to1,
1881  OnigCodePoint* data, int n)
1882 {
1883  int i, r;
1884  OnigCodePoint from2, to2;
1885 
1886  for (i = 0; i < n; i++) {
1887  from2 = data[i*2];
1888  to2 = data[i*2+1];
1889  if (from2 < from1) {
1890  if (to2 < from1) continue;
1891  else {
1892  from1 = to2 + 1;
1893  }
1894  }
1895  else if (from2 <= to1) {
1896  if (to2 < to1) {
1897  if (from1 <= from2 - 1) {
1898  r = add_code_range_to_buf(pbuf, env, from1, from2-1);
1899  if (r != 0) return r;
1900  }
1901  from1 = to2 + 1;
1902  }
1903  else {
1904  to1 = from2 - 1;
1905  }
1906  }
1907  else {
1908  from1 = from2;
1909  }
1910  if (from1 > to1) break;
1911  }
1912  if (from1 <= to1) {
1913  r = add_code_range_to_buf(pbuf, env, from1, to1);
1914  if (r != 0) return r;
1915  }
1916  return 0;
1917 }
1918 
1919 static int
1920 and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf, ScanEnv* env)
1921 {
1922  int r;
1923  OnigCodePoint i, j, n1, n2, *data1, *data2;
1924  OnigCodePoint from, to, from1, to1, from2, to2;
1925 
1926  *pbuf = (BBuf* )NULL;
1927  if (IS_NULL(bbuf1)) {
1928  if (not1 != 0 && IS_NOT_NULL(bbuf2)) /* not1 != 0 -> not2 == 0 */
1929  return bbuf_clone(pbuf, bbuf2);
1930  return 0;
1931  }
1932  else if (IS_NULL(bbuf2)) {
1933  if (not2 != 0)
1934  return bbuf_clone(pbuf, bbuf1);
1935  return 0;
1936  }
1937 
1938  if (not1 != 0)
1939  SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
1940 
1941  data1 = (OnigCodePoint* )(bbuf1->p);
1942  data2 = (OnigCodePoint* )(bbuf2->p);
1943  GET_CODE_POINT(n1, data1);
1944  GET_CODE_POINT(n2, data2);
1945  data1++;
1946  data2++;
1947 
1948  if (not2 == 0 && not1 == 0) { /* 1 AND 2 */
1949  for (i = 0; i < n1; i++) {
1950  from1 = data1[i*2];
1951  to1 = data1[i*2+1];
1952  for (j = 0; j < n2; j++) {
1953  from2 = data2[j*2];
1954  to2 = data2[j*2+1];
1955  if (from2 > to1) break;
1956  if (to2 < from1) continue;
1957  from = MAX(from1, from2);
1958  to = MIN(to1, to2);
1959  r = add_code_range_to_buf(pbuf, env, from, to);
1960  if (r != 0) return r;
1961  }
1962  }
1963  }
1964  else if (not1 == 0) { /* 1 AND (not 2) */
1965  for (i = 0; i < n1; i++) {
1966  from1 = data1[i*2];
1967  to1 = data1[i*2+1];
1968  r = and_code_range1(pbuf, env, from1, to1, data2, n2);
1969  if (r != 0) return r;
1970  }
1971  }
1972 
1973  return 0;
1974 }
1975 
1976 static int
1977 and_cclass(CClassNode* dest, CClassNode* cc, ScanEnv* env)
1978 {
1979  OnigEncoding enc = env->enc;
1980  int r, not1, not2;
1981  BBuf *buf1, *buf2, *pbuf = 0;
1982  BitSetRef bsr1, bsr2;
1983  BitSet bs1, bs2;
1984 
1985  not1 = IS_NCCLASS_NOT(dest);
1986  bsr1 = dest->bs;
1987  buf1 = dest->mbuf;
1988  not2 = IS_NCCLASS_NOT(cc);
1989  bsr2 = cc->bs;
1990  buf2 = cc->mbuf;
1991 
1992  if (not1 != 0) {
1993  bitset_invert_to(bsr1, bs1);
1994  bsr1 = bs1;
1995  }
1996  if (not2 != 0) {
1997  bitset_invert_to(bsr2, bs2);
1998  bsr2 = bs2;
1999  }
2000  bitset_and(bsr1, bsr2);
2001  if (bsr1 != dest->bs) {
2002  bitset_copy(dest->bs, bsr1);
2003  bsr1 = dest->bs;
2004  }
2005  if (not1 != 0) {
2006  bitset_invert(dest->bs);
2007  }
2008 
2009  if (! ONIGENC_IS_SINGLEBYTE(enc)) {
2010  if (not1 != 0 && not2 != 0) {
2011  r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf, env);
2012  }
2013  else {
2014  r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf, env);
2015  if (r == 0 && not1 != 0) {
2016  BBuf *tbuf = 0;
2017  r = not_code_range_buf(enc, pbuf, &tbuf, env);
2018  bbuf_free(pbuf);
2019  pbuf = tbuf;
2020  }
2021  }
2022  if (r != 0) {
2023  bbuf_free(pbuf);
2024  return r;
2025  }
2026 
2027  dest->mbuf = pbuf;
2028  bbuf_free(buf1);
2029  return r;
2030  }
2031  return 0;
2032 }
2033 
2034 static int
2035 or_cclass(CClassNode* dest, CClassNode* cc, ScanEnv* env)
2036 {
2037  OnigEncoding enc = env->enc;
2038  int r, not1, not2;
2039  BBuf *buf1, *buf2, *pbuf = 0;
2040  BitSetRef bsr1, bsr2;
2041  BitSet bs1, bs2;
2042 
2043  not1 = IS_NCCLASS_NOT(dest);
2044  bsr1 = dest->bs;
2045  buf1 = dest->mbuf;
2046  not2 = IS_NCCLASS_NOT(cc);
2047  bsr2 = cc->bs;
2048  buf2 = cc->mbuf;
2049 
2050  if (not1 != 0) {
2051  bitset_invert_to(bsr1, bs1);
2052  bsr1 = bs1;
2053  }
2054  if (not2 != 0) {
2055  bitset_invert_to(bsr2, bs2);
2056  bsr2 = bs2;
2057  }
2058  bitset_or(bsr1, bsr2);
2059  if (bsr1 != dest->bs) {
2060  bitset_copy(dest->bs, bsr1);
2061  bsr1 = dest->bs;
2062  }
2063  if (not1 != 0) {
2064  bitset_invert(dest->bs);
2065  }
2066 
2067  if (! ONIGENC_IS_SINGLEBYTE(enc)) {
2068  if (not1 != 0 && not2 != 0) {
2069  r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf, env);
2070  }
2071  else {
2072  r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf, env);
2073  if (r == 0 && not1 != 0) {
2074  BBuf *tbuf = 0;
2075  r = not_code_range_buf(enc, pbuf, &tbuf, env);
2076  bbuf_free(pbuf);
2077  pbuf = tbuf;
2078  }
2079  }
2080  if (r != 0) {
2081  bbuf_free(pbuf);
2082  return r;
2083  }
2084 
2085  dest->mbuf = pbuf;
2086  bbuf_free(buf1);
2087  return r;
2088  }
2089  else
2090  return 0;
2091 }
2092 
2093 static void UNKNOWN_ESC_WARN(ScanEnv *env, int c);
2094 
2095 static OnigCodePoint
2096 conv_backslash_value(OnigCodePoint c, ScanEnv* env)
2097 {
2099  switch (c) {
2100  case 'n': return '\n';
2101  case 't': return '\t';
2102  case 'r': return '\r';
2103  case 'f': return '\f';
2104  case 'a': return '\007';
2105  case 'b': return '\010';
2106  case 'e': return '\033';
2107  case 'v':
2109  return '\v';
2110  break;
2111 
2112  default:
2113  if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
2114  UNKNOWN_ESC_WARN(env, c);
2115  break;
2116  }
2117  }
2118  return c;
2119 }
2120 
2121 #ifdef USE_NO_INVALID_QUANTIFIER
2122 # define is_invalid_quantifier_target(node) 0
2123 #else
2124 static int
2126 {
2127  switch (NTYPE(node)) {
2128  case NT_ANCHOR:
2129  return 1;
2130  break;
2131 
2132  case NT_ENCLOSE:
2133  /* allow enclosed elements */
2134  /* return is_invalid_quantifier_target(NENCLOSE(node)->target); */
2135  break;
2136 
2137  case NT_LIST:
2138  do {
2139  if (! is_invalid_quantifier_target(NCAR(node))) return 0;
2140  } while (IS_NOT_NULL(node = NCDR(node)));
2141  return 0;
2142  break;
2143 
2144  case NT_ALT:
2145  do {
2146  if (is_invalid_quantifier_target(NCAR(node))) return 1;
2147  } while (IS_NOT_NULL(node = NCDR(node)));
2148  break;
2149 
2150  default:
2151  break;
2152  }
2153  return 0;
2154 }
2155 #endif
2156 
2157 /* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */
2158 static int
2159 popular_quantifier_num(QtfrNode* q)
2160 {
2161  if (q->greedy) {
2162  if (q->lower == 0) {
2163  if (q->upper == 1) return 0;
2164  else if (IS_REPEAT_INFINITE(q->upper)) return 1;
2165  }
2166  else if (q->lower == 1) {
2167  if (IS_REPEAT_INFINITE(q->upper)) return 2;
2168  }
2169  }
2170  else {
2171  if (q->lower == 0) {
2172  if (q->upper == 1) return 3;
2173  else if (IS_REPEAT_INFINITE(q->upper)) return 4;
2174  }
2175  else if (q->lower == 1) {
2176  if (IS_REPEAT_INFINITE(q->upper)) return 5;
2177  }
2178  }
2179  return -1;
2180 }
2181 
2182 
2184  RQ_ASIS = 0, /* as is */
2185  RQ_DEL = 1, /* delete parent */
2186  RQ_A, /* to '*' */
2187  RQ_AQ, /* to '*?' */
2188  RQ_QQ, /* to '??' */
2189  RQ_P_QQ, /* to '+)??' */
2190  RQ_PQ_Q /* to '+?)?' */
2191 };
2192 
2193 static enum ReduceType const ReduceTypeTable[6][6] = {
2194 /* '?', '*', '+', '??', '*?', '+?' p / c */
2195  {RQ_DEL, RQ_A, RQ_A, RQ_QQ, RQ_AQ, RQ_ASIS}, /* '?' */
2196  {RQ_DEL, RQ_DEL, RQ_DEL, RQ_P_QQ, RQ_P_QQ, RQ_DEL}, /* '*' */
2197  {RQ_A, RQ_A, RQ_DEL, RQ_ASIS, RQ_P_QQ, RQ_DEL}, /* '+' */
2198  {RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL, RQ_AQ, RQ_AQ}, /* '??' */
2199  {RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL}, /* '*?' */
2200  {RQ_ASIS, RQ_PQ_Q, RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL} /* '+?' */
2201 };
2202 
2203 extern void
2205 {
2206  int pnum, cnum;
2207  QtfrNode *p, *c;
2208 
2209  p = NQTFR(pnode);
2210  c = NQTFR(cnode);
2211  pnum = popular_quantifier_num(p);
2212  cnum = popular_quantifier_num(c);
2213  if (pnum < 0 || cnum < 0) return ;
2214 
2215  switch (ReduceTypeTable[cnum][pnum]) {
2216  case RQ_DEL:
2217  *pnode = *cnode;
2218  break;
2219  case RQ_A:
2220  p->target = c->target;
2221  p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 1;
2222  break;
2223  case RQ_AQ:
2224  p->target = c->target;
2225  p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 0;
2226  break;
2227  case RQ_QQ:
2228  p->target = c->target;
2229  p->lower = 0; p->upper = 1; p->greedy = 0;
2230  break;
2231  case RQ_P_QQ:
2232  p->target = cnode;
2233  p->lower = 0; p->upper = 1; p->greedy = 0;
2234  c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 1;
2235  return ;
2236  break;
2237  case RQ_PQ_Q:
2238  p->target = cnode;
2239  p->lower = 0; p->upper = 1; p->greedy = 1;
2240  c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 0;
2241  return ;
2242  break;
2243  case RQ_ASIS:
2244  p->target = cnode;
2245  return ;
2246  break;
2247  }
2248 
2249  c->target = NULL_NODE;
2250  onig_node_free(cnode);
2251 }
2252 
2253 
2255  TK_EOT = 0, /* end of token */
2267  TK_ANYCHAR_ANYTIME, /* SQL '%' == .* */
2273  TK_CHAR_PROPERTY, /* \p{...}, \P{...} */
2277  /* in cc */
2281  TK_CC_AND, /* && */
2283 };
2284 
2285 typedef struct {
2287  int escaped;
2288  int base; /* is number: 8, 16 (used in [....]) */
2290  union {
2292  int c;
2294  struct {
2295  int subtype;
2297  } anchor;
2298  struct {
2299  int lower;
2300  int upper;
2301  int greedy;
2303  } repeat;
2304  struct {
2305  int num;
2306  int ref1;
2307  int* refs;
2308  int by_name;
2309 #ifdef USE_BACKREF_WITH_LEVEL
2311  int level; /* \k<name+n> */
2312 #endif
2313  } backref;
2314  struct {
2317  int gnum;
2318  int rel;
2319  } call;
2320  struct {
2321  int ctype;
2322  int not;
2323  } prop;
2324  } u;
2325 } OnigToken;
2326 
2327 
2328 static int
2329 fetch_range_quantifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env)
2330 {
2331  int low, up, syn_allow, non_low = 0;
2332  int r = 0;
2333  OnigCodePoint c;
2334  OnigEncoding enc = env->enc;
2335  UChar* p = *src;
2336  PFETCH_READY;
2337 
2338  syn_allow = IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INVALID_INTERVAL);
2339 
2340  if (PEND) {
2341  if (syn_allow)
2342  return 1; /* "....{" : OK! */
2343  else
2344  return ONIGERR_END_PATTERN_AT_LEFT_BRACE; /* "....{" syntax error */
2345  }
2346 
2347  if (! syn_allow) {
2348  c = PPEEK;
2349  if (c == ')' || c == '(' || c == '|') {
2351  }
2352  }
2353 
2354  low = onig_scan_unsigned_number(&p, end, env->enc);
2355  if (low < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
2356  if (low > ONIG_MAX_REPEAT_NUM)
2358 
2359  if (p == *src) { /* can't read low */
2361  /* allow {,n} as {0,n} */
2362  low = 0;
2363  non_low = 1;
2364  }
2365  else
2366  goto invalid;
2367  }
2368 
2369  if (PEND) goto invalid;
2370  PFETCH(c);
2371  if (c == ',') {
2372  UChar* prev = p;
2373  up = onig_scan_unsigned_number(&p, end, env->enc);
2374  if (up < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
2375  if (up > ONIG_MAX_REPEAT_NUM)
2377 
2378  if (p == prev) {
2379  if (non_low != 0)
2380  goto invalid;
2381  up = REPEAT_INFINITE; /* {n,} : {n,infinite} */
2382  }
2383  }
2384  else {
2385  if (non_low != 0)
2386  goto invalid;
2387 
2388  PUNFETCH;
2389  up = low; /* {n} : exact n times */
2390  r = 2; /* fixed */
2391  }
2392 
2393  if (PEND) goto invalid;
2394  PFETCH(c);
2396  if (c != MC_ESC(env->syntax)) goto invalid;
2397  if (PEND) goto invalid;
2398  PFETCH(c);
2399  }
2400  if (c != '}') goto invalid;
2401 
2402  if (!IS_REPEAT_INFINITE(up) && low > up) {
2404  }
2405 
2406  tok->type = TK_INTERVAL;
2407  tok->u.repeat.lower = low;
2408  tok->u.repeat.upper = up;
2409  *src = p;
2410  return r; /* 0: normal {n,m}, 2: fixed {n} */
2411 
2412  invalid:
2413  if (syn_allow)
2414  return 1; /* OK */
2415  else
2417 }
2418 
2419 /* \M-, \C-, \c, or \... */
2420 static int
2421 fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env, OnigCodePoint* val)
2422 {
2423  int v;
2424  OnigCodePoint c;
2425  OnigEncoding enc = env->enc;
2426  UChar* p = *src;
2427 
2428  if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
2429 
2430  PFETCH_S(c);
2431  switch (c) {
2432  case 'M':
2434  if (PEND) return ONIGERR_END_PATTERN_AT_META;
2435  PFETCH_S(c);
2436  if (c != '-') return ONIGERR_META_CODE_SYNTAX;
2437  if (PEND) return ONIGERR_END_PATTERN_AT_META;
2438  PFETCH_S(c);
2439  if (c == MC_ESC(env->syntax)) {
2440  v = fetch_escaped_value(&p, end, env, &c);
2441  if (v < 0) return v;
2442  }
2443  c = ((c & 0xff) | 0x80);
2444  }
2445  else
2446  goto backslash;
2447  break;
2448 
2449  case 'C':
2452  PFETCH_S(c);
2453  if (c != '-') return ONIGERR_CONTROL_CODE_SYNTAX;
2454  goto control;
2455  }
2456  else
2457  goto backslash;
2458 
2459  case 'c':
2460  if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_C_CONTROL)) {
2461  control:
2463  PFETCH_S(c);
2464  if (c == '?') {
2465  c = 0177;
2466  }
2467  else {
2468  if (c == MC_ESC(env->syntax)) {
2469  v = fetch_escaped_value(&p, end, env, &c);
2470  if (v < 0) return v;
2471  }
2472  c &= 0x9f;
2473  }
2474  break;
2475  }
2476  /* fall through */
2477 
2478  default:
2479  {
2480  backslash:
2481  c = conv_backslash_value(c, env);
2482  }
2483  break;
2484  }
2485 
2486  *src = p;
2487  *val = c;
2488  return 0;
2489 }
2490 
2491 static int fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env);
2492 
2493 static OnigCodePoint
2494 get_name_end_code_point(OnigCodePoint start)
2495 {
2496  switch (start) {
2497  case '<': return (OnigCodePoint )'>'; break;
2498  case '\'': return (OnigCodePoint )'\''; break;
2499  case '(': return (OnigCodePoint )')'; break;
2500  case '{': return (OnigCodePoint )'}'; break;
2501  default:
2502  break;
2503  }
2504 
2505  return (OnigCodePoint )0;
2506 }
2507 
2508 #ifdef USE_NAMED_GROUP
2509 # ifdef RUBY
2510 # define ONIGENC_IS_CODE_NAME(enc, c) TRUE
2511 # else
2512 # define ONIGENC_IS_CODE_NAME(enc, c) ONIGENC_IS_CODE_WORD(enc, c)
2513 # endif
2514 
2515 # ifdef USE_BACKREF_WITH_LEVEL
2516 /*
2517  \k<name+n>, \k<name-n>
2518  \k<num+n>, \k<num-n>
2519  \k<-num+n>, \k<-num-n>
2520 */
2521 static int
2522 fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
2523  UChar** rname_end, ScanEnv* env,
2524  int* rback_num, int* rlevel)
2525 {
2526  int r, sign, is_num, exist_level;
2527  OnigCodePoint end_code;
2528  OnigCodePoint c = 0;
2529  OnigEncoding enc = env->enc;
2530  UChar *name_end;
2531  UChar *pnum_head;
2532  UChar *p = *src;
2533  PFETCH_READY;
2534 
2535  *rback_num = 0;
2536  is_num = exist_level = 0;
2537  sign = 1;
2538  pnum_head = *src;
2539 
2540  end_code = get_name_end_code_point(start_code);
2541 
2542  name_end = end;
2543  r = 0;
2544  if (PEND) {
2545  return ONIGERR_EMPTY_GROUP_NAME;
2546  }
2547  else {
2548  PFETCH(c);
2549  if (c == end_code)
2550  return ONIGERR_EMPTY_GROUP_NAME;
2551 
2552  if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
2553  is_num = 1;
2554  }
2555  else if (c == '-') {
2556  is_num = 2;
2557  sign = -1;
2558  pnum_head = p;
2559  }
2560  else if (!ONIGENC_IS_CODE_NAME(enc, c)) {
2562  }
2563  }
2564 
2565  while (!PEND) {
2566  name_end = p;
2567  PFETCH(c);
2568  if (c == end_code || c == ')' || c == '+' || c == '-') {
2569  if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME;
2570  break;
2571  }
2572 
2573  if (is_num != 0) {
2574  if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
2575  is_num = 1;
2576  }
2577  else {
2579  is_num = 0;
2580  }
2581  }
2582  else if (!ONIGENC_IS_CODE_NAME(enc, c)) {
2584  }
2585  }
2586 
2587  if (r == 0 && c != end_code) {
2588  if (c == '+' || c == '-') {
2589  int level;
2590  int flag = (c == '-' ? -1 : 1);
2591 
2592  if (PEND) {
2594  goto end;
2595  }
2596  PFETCH(c);
2597  if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto err;
2598  PUNFETCH;
2599  level = onig_scan_unsigned_number(&p, end, enc);
2600  if (level < 0) return ONIGERR_TOO_BIG_NUMBER;
2601  *rlevel = (level * flag);
2602  exist_level = 1;
2603 
2604  if (!PEND) {
2605  PFETCH(c);
2606  if (c == end_code)
2607  goto end;
2608  }
2609  }
2610 
2611  err:
2613  name_end = end;
2614  }
2615 
2616  end:
2617  if (r == 0) {
2618  if (is_num != 0) {
2619  *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
2620  if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
2621  else if (*rback_num == 0) goto err;
2622 
2623  *rback_num *= sign;
2624  }
2625 
2626  *rname_end = name_end;
2627  *src = p;
2628  return (exist_level ? 1 : 0);
2629  }
2630  else {
2631  onig_scan_env_set_error_string(env, r, *src, name_end);
2632  return r;
2633  }
2634 }
2635 # endif /* USE_BACKREF_WITH_LEVEL */
2636 
2637 /*
2638  ref: 0 -> define name (don't allow number name)
2639  1 -> reference name (allow number name)
2640 */
2641 static int
2642 fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
2643  UChar** rname_end, ScanEnv* env, int* rback_num, int ref)
2644 {
2645  int r, is_num, sign;
2646  OnigCodePoint end_code;
2647  OnigCodePoint c = 0;
2648  OnigEncoding enc = env->enc;
2649  UChar *name_end;
2650  UChar *pnum_head;
2651  UChar *p = *src;
2652 
2653  *rback_num = 0;
2654 
2655  end_code = get_name_end_code_point(start_code);
2656 
2657  name_end = end;
2658  pnum_head = *src;
2659  r = 0;
2660  is_num = 0;
2661  sign = 1;
2662  if (PEND) {
2663  return ONIGERR_EMPTY_GROUP_NAME;
2664  }
2665  else {
2666  PFETCH_S(c);
2667  if (c == end_code)
2668  return ONIGERR_EMPTY_GROUP_NAME;
2669 
2670  if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
2671  if (ref == 1)
2672  is_num = 1;
2673  else {
2675  is_num = 0;
2676  }
2677  }
2678  else if (c == '-') {
2679  if (ref == 1) {
2680  is_num = 2;
2681  sign = -1;
2682  pnum_head = p;
2683  }
2684  else {
2686  is_num = 0;
2687  }
2688  }
2689  else if (!ONIGENC_IS_CODE_NAME(enc, c)) {
2691  }
2692  }
2693 
2694  if (r == 0) {
2695  while (!PEND) {
2696  name_end = p;
2697  PFETCH_S(c);
2698  if (c == end_code || c == ')') {
2699  if (is_num == 2) {
2701  goto teardown;
2702  }
2703  break;
2704  }
2705 
2706  if (is_num != 0) {
2707  if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
2708  is_num = 1;
2709  }
2710  else {
2711  if (!ONIGENC_IS_CODE_WORD(enc, c))
2713  else
2715  goto teardown;
2716  }
2717  }
2718  else {
2719  if (!ONIGENC_IS_CODE_NAME(enc, c)) {
2721  goto teardown;
2722  }
2723  }
2724  }
2725 
2726  if (c != end_code) {
2728  name_end = end;
2729  goto err;
2730  }
2731 
2732  if (is_num != 0) {
2733  *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
2734  if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
2735  else if (*rback_num == 0) {
2737  goto err;
2738  }
2739 
2740  *rback_num *= sign;
2741  }
2742 
2743  *rname_end = name_end;
2744  *src = p;
2745  return 0;
2746  }
2747  else {
2748 teardown:
2749  while (!PEND) {
2750  name_end = p;
2751  PFETCH_S(c);
2752  if (c == end_code || c == ')')
2753  break;
2754  }
2755  if (PEND)
2756  name_end = end;
2757 
2758  err:
2759  onig_scan_env_set_error_string(env, r, *src, name_end);
2760  return r;
2761  }
2762 }
2763 #else
2764 static int
2765 fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
2766  UChar** rname_end, ScanEnv* env, int* rback_num, int ref)
2767 {
2768  int r, is_num, sign;
2769  OnigCodePoint end_code;
2770  OnigCodePoint c = 0;
2771  UChar *name_end;
2772  OnigEncoding enc = env->enc;
2773  UChar *pnum_head;
2774  UChar *p = *src;
2775  PFETCH_READY;
2776 
2777  *rback_num = 0;
2778 
2779  end_code = get_name_end_code_point(start_code);
2780 
2781  *rname_end = name_end = end;
2782  r = 0;
2783  pnum_head = *src;
2784  is_num = 0;
2785  sign = 1;
2786 
2787  if (PEND) {
2788  return ONIGERR_EMPTY_GROUP_NAME;
2789  }
2790  else {
2791  PFETCH(c);
2792  if (c == end_code)
2793  return ONIGERR_EMPTY_GROUP_NAME;
2794 
2795  if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
2796  is_num = 1;
2797  }
2798  else if (c == '-') {
2799  is_num = 2;
2800  sign = -1;
2801  pnum_head = p;
2802  }
2803  else {
2805  }
2806  }
2807 
2808  while (!PEND) {
2809  name_end = p;
2810 
2811  PFETCH(c);
2812  if (c == end_code || c == ')') break;
2813  if (! ONIGENC_IS_CODE_DIGIT(enc, c))
2815  }
2816  if (r == 0 && c != end_code) {
2818  name_end = end;
2819  }
2820 
2821  if (r == 0) {
2822  *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
2823  if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
2824  else if (*rback_num == 0) {
2826  goto err;
2827  }
2828  *rback_num *= sign;
2829 
2830  *rname_end = name_end;
2831  *src = p;
2832  return 0;
2833  }
2834  else {
2835  err:
2836  onig_scan_env_set_error_string(env, r, *src, name_end);
2837  return r;
2838  }
2839 }
2840 #endif /* USE_NAMED_GROUP */
2841 
2842 
2843 static void
2844 onig_syntax_warn(ScanEnv *env, const char *fmt, ...)
2845 {
2846  va_list args;
2848  va_start(args, fmt);
2850  env->pattern, env->pattern_end,
2851  (const UChar *)fmt, args);
2852  va_end(args);
2853 #ifdef RUBY
2854  if (env->sourcefile == NULL)
2855  rb_warn("%s", (char *)buf);
2856  else
2857  rb_compile_warn(env->sourcefile, env->sourceline, "%s", (char *)buf);
2858 #else
2859  (*onig_warn)((char* )buf);
2860 #endif
2861 }
2862 
2863 static void
2864 CC_ESC_WARN(ScanEnv *env, UChar *c)
2865 {
2866  if (onig_warn == onig_null_warn) return ;
2867 
2870  onig_syntax_warn(env, "character class has '%s' without escape", c);
2871  }
2872 }
2873 
2874 static void
2875 CLOSE_BRACKET_WITHOUT_ESC_WARN(ScanEnv* env, UChar* c)
2876 {
2877  if (onig_warn == onig_null_warn) return ;
2878 
2880  onig_syntax_warn(env, "regular expression has '%s' without escape", c);
2881  }
2882 }
2883 
2884 #ifndef RTEST
2885 # define RTEST(v) 1
2886 #endif
2887 
2888 static void
2890 {
2891  if (onig_warn == onig_null_warn || !RTEST(ruby_verbose)) return ;
2892 
2893  if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_DUP) &&
2894  !(env->warnings_flag & ONIG_SYN_WARN_CC_DUP)) {
2895 #ifdef WARN_ALL_CC_DUP
2896  onig_syntax_warn(env, "character class has duplicated range: %04x-%04x", from, to);
2897 #else
2898  env->warnings_flag |= ONIG_SYN_WARN_CC_DUP;
2899  onig_syntax_warn(env, "character class has duplicated range");
2900 #endif
2901  }
2902 }
2903 
2904 static void
2905 UNKNOWN_ESC_WARN(ScanEnv *env, int c)
2906 {
2907  if (onig_warn == onig_null_warn || !RTEST(ruby_verbose)) return ;
2908  onig_syntax_warn(env, "Unknown escape \\%c is ignored", c);
2909 }
2910 
2911 static UChar*
2912 find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to,
2913  UChar **next, OnigEncoding enc)
2914 {
2915  int i;
2916  OnigCodePoint x;
2917  UChar *q;
2918  UChar *p = from;
2919 
2920  while (p < to) {
2921  x = ONIGENC_MBC_TO_CODE(enc, p, to);
2922  q = p + enclen(enc, p, to);
2923  if (x == s[0]) {
2924  for (i = 1; i < n && q < to; i++) {
2925  x = ONIGENC_MBC_TO_CODE(enc, q, to);
2926  if (x != s[i]) break;
2927  q += enclen(enc, q, to);
2928  }
2929  if (i >= n) {
2930  if (IS_NOT_NULL(next))
2931  *next = q;
2932  return p;
2933  }
2934  }
2935  p = q;
2936  }
2937  return NULL_UCHARP;
2938 }
2939 
2940 static int
2941 str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to,
2942  OnigCodePoint bad, OnigEncoding enc, const OnigSyntaxType* syn)
2943 {
2944  int i, in_esc;
2945  OnigCodePoint x;
2946  UChar *q;
2947  UChar *p = from;
2948 
2949  in_esc = 0;
2950  while (p < to) {
2951  if (in_esc) {
2952  in_esc = 0;
2953  p += enclen(enc, p, to);
2954  }
2955  else {
2956  x = ONIGENC_MBC_TO_CODE(enc, p, to);
2957  q = p + enclen(enc, p, to);
2958  if (x == s[0]) {
2959  for (i = 1; i < n && q < to; i++) {
2960  x = ONIGENC_MBC_TO_CODE(enc, q, to);
2961  if (x != s[i]) break;
2962  q += enclen(enc, q, to);
2963  }
2964  if (i >= n) return 1;
2965  p += enclen(enc, p, to);
2966  }
2967  else {
2968  x = ONIGENC_MBC_TO_CODE(enc, p, to);
2969  if (x == bad) return 0;
2970  else if (x == MC_ESC(syn)) in_esc = 1;
2971  p = q;
2972  }
2973  }
2974  }
2975  return 0;
2976 }
2977 
2978 static int
2979 fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
2980 {
2981  int num;
2982  OnigCodePoint c, c2;
2983  const OnigSyntaxType* syn = env->syntax;
2984  OnigEncoding enc = env->enc;
2985  UChar* prev;
2986  UChar* p = *src;
2987  PFETCH_READY;
2988 
2989  if (PEND) {
2990  tok->type = TK_EOT;
2991  return tok->type;
2992  }
2993 
2994  PFETCH(c);
2995  tok->type = TK_CHAR;
2996  tok->base = 0;
2997  tok->u.c = c;
2998  tok->escaped = 0;
2999 
3000  if (c == ']') {
3001  tok->type = TK_CC_CLOSE;
3002  }
3003  else if (c == '-') {
3004  tok->type = TK_CC_RANGE;
3005  }
3006  else if (c == MC_ESC(syn)) {
3008  goto end;
3009 
3010  if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
3011 
3012  PFETCH(c);
3013  tok->escaped = 1;
3014  tok->u.c = c;
3015  switch (c) {
3016  case 'w':
3017  tok->type = TK_CHAR_TYPE;
3018  tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
3019  tok->u.prop.not = 0;
3020  break;
3021  case 'W':
3022  tok->type = TK_CHAR_TYPE;
3023  tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
3024  tok->u.prop.not = 1;
3025  break;
3026  case 'd':
3027  tok->type = TK_CHAR_TYPE;
3028  tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
3029  tok->u.prop.not = 0;
3030  break;
3031  case 'D':
3032  tok->type = TK_CHAR_TYPE;
3033  tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
3034  tok->u.prop.not = 1;
3035  break;
3036  case 's':
3037  tok->type = TK_CHAR_TYPE;
3038  tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
3039  tok->u.prop.not = 0;
3040  break;
3041  case 'S':
3042  tok->type = TK_CHAR_TYPE;
3043  tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
3044  tok->u.prop.not = 1;
3045  break;
3046  case 'h':
3047  if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
3048  tok->type = TK_CHAR_TYPE;
3049  tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
3050  tok->u.prop.not = 0;
3051  break;
3052  case 'H':
3053  if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
3054  tok->type = TK_CHAR_TYPE;
3055  tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
3056  tok->u.prop.not = 1;
3057  break;
3058 
3059  case 'p':
3060  case 'P':
3061  if (PEND) break;
3062 
3063  c2 = PPEEK;
3064  if (c2 == '{' &&
3066  PINC;
3067  tok->type = TK_CHAR_PROPERTY;
3068  tok->u.prop.not = (c == 'P' ? 1 : 0);
3069 
3071  PFETCH(c2);
3072  if (c2 == '^') {
3073  tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);
3074  }
3075  else
3076  PUNFETCH;
3077  }
3078  }
3079  else {
3080  onig_syntax_warn(env, "invalid Unicode Property \\%c", c);
3081  }
3082  break;
3083 
3084  case 'x':
3085  if (PEND) break;
3086 
3087  prev = p;
3089  PINC;
3090  num = scan_unsigned_hexadecimal_number(&p, end, 0, 8, enc);
3091  if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
3092  if (!PEND) {
3093  c2 = PPEEK;
3094  if (ONIGENC_IS_CODE_XDIGIT(enc, c2))
3096  }
3097 
3098  if (p > prev + enclen(enc, prev, end) && !PEND && (PPEEK_IS('}'))) {
3099  PINC;
3100  tok->type = TK_CODE_POINT;
3101  tok->base = 16;
3102  tok->u.code = (OnigCodePoint )num;
3103  }
3104  else {
3105  /* can't read nothing or invalid format */
3106  p = prev;
3107  }
3108  }
3109  else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {
3110  num = scan_unsigned_hexadecimal_number(&p, end, 0, 2, enc);
3111  if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
3112  if (p == prev) { /* can't read nothing. */
3113  num = 0; /* but, it's not error */
3114  }
3115  tok->type = TK_RAW_BYTE;
3116  tok->base = 16;
3117  tok->u.c = num;
3118  }
3119  break;
3120 
3121  case 'u':
3122  if (PEND) break;
3123 
3124  prev = p;
3126  num = scan_unsigned_hexadecimal_number(&p, end, 4, 4, enc);
3127  if (num < -1) return ONIGERR_TOO_SHORT_DIGITS;
3128  else if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
3129  if (p == prev) { /* can't read nothing. */
3130  num = 0; /* but, it's not error */
3131  }
3132  tok->type = TK_CODE_POINT;
3133  tok->base = 16;
3134  tok->u.code = (OnigCodePoint )num;
3135  }
3136  break;
3137 
3138  case 'o':
3139  if (PEND) break;
3140 
3141  prev = p;
3143  PINC;
3144  num = scan_unsigned_octal_number(&p, end, 11, enc);
3145  if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
3146  if (!PEND) {
3147  c2 = PPEEK;
3148  if (ONIGENC_IS_CODE_DIGIT(enc, c2) && c2 < '8')
3150  }
3151 
3152  if (p > prev + enclen(enc, prev, end) && !PEND && (PPEEK_IS('}'))) {
3153  PINC;
3154  tok->type = TK_CODE_POINT;
3155  tok->base = 8;
3156  tok->u.code = (OnigCodePoint )num;
3157  }
3158  else {
3159  /* can't read nothing or invalid format */
3160  p = prev;
3161  }
3162  }
3163  break;
3164 
3165  case '0':
3166  case '1': case '2': case '3': case '4': case '5': case '6': case '7':
3168  PUNFETCH;
3169  prev = p;
3170  num = scan_unsigned_octal_number(&p, end, 3, enc);
3171  if (num < 0 || 0xff < num) return ONIGERR_TOO_BIG_NUMBER;
3172  if (p == prev) { /* can't read nothing. */
3173  num = 0; /* but, it's not error */
3174  }
3175  tok->type = TK_RAW_BYTE;
3176  tok->base = 8;
3177  tok->u.c = num;
3178  }
3179  break;
3180 
3181  default:
3182  PUNFETCH;
3183  num = fetch_escaped_value(&p, end, env, &c2);
3184  if (num < 0) return num;
3185  if ((OnigCodePoint )tok->u.c != c2) {
3186  tok->u.code = (OnigCodePoint )c2;
3187  tok->type = TK_CODE_POINT;
3188  }
3189  break;
3190  }
3191  }
3192  else if (c == '[') {
3193  if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_POSIX_BRACKET) && (PPEEK_IS(':'))) {
3194  OnigCodePoint send[] = { (OnigCodePoint )':', (OnigCodePoint )']' };
3195  tok->backp = p; /* point at '[' is read */
3196  PINC;
3197  if (str_exist_check_with_esc(send, 2, p, end,
3198  (OnigCodePoint )']', enc, syn)) {
3199  tok->type = TK_POSIX_BRACKET_OPEN;
3200  }
3201  else {
3202  PUNFETCH;
3203  goto cc_in_cc;
3204  }
3205  }
3206  else {
3207  cc_in_cc:
3209  tok->type = TK_CC_CC_OPEN;
3210  }
3211  else {
3212  CC_ESC_WARN(env, (UChar* )"[");
3213  }
3214  }
3215  }
3216  else if (c == '&') {
3218  !PEND && (PPEEK_IS('&'))) {
3219  PINC;
3220  tok->type = TK_CC_AND;
3221  }
3222  }
3223 
3224  end:
3225  *src = p;
3226  return tok->type;
3227 }
3228 
3229 #ifdef USE_NAMED_GROUP
3230 static int
3231 fetch_named_backref_token(OnigCodePoint c, OnigToken* tok, UChar** src,
3232  UChar* end, ScanEnv* env)
3233 {
3234  int r, num;
3235  const OnigSyntaxType* syn = env->syntax;
3236  UChar* prev;
3237  UChar* p = *src;
3238  UChar* name_end;
3239  int* backs;
3240  int back_num;
3241 
3242  prev = p;
3243 
3244 # ifdef USE_BACKREF_WITH_LEVEL
3245  name_end = NULL_UCHARP; /* no need. escape gcc warning. */
3246  r = fetch_name_with_level(c, &p, end, &name_end,
3247  env, &back_num, &tok->u.backref.level);
3248  if (r == 1) tok->u.backref.exist_level = 1;
3249  else tok->u.backref.exist_level = 0;
3250 # else
3251  r = fetch_name(&p, end, &name_end, env, &back_num, 1);
3252 # endif
3253  if (r < 0) return r;
3254 
3255  if (back_num != 0) {
3256  if (back_num < 0) {
3257  back_num = BACKREF_REL_TO_ABS(back_num, env);
3258  if (back_num <= 0)
3259  return ONIGERR_INVALID_BACKREF;
3260  }
3261 
3263  if (back_num > env->num_mem ||
3264  IS_NULL(SCANENV_MEM_NODES(env)[back_num]))
3265  return ONIGERR_INVALID_BACKREF;
3266  }
3267  tok->type = TK_BACKREF;
3268  tok->u.backref.by_name = 0;
3269  tok->u.backref.num = 1;
3270  tok->u.backref.ref1 = back_num;
3271  }
3272  else {
3273  num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs);
3274  if (num <= 0) {
3276  ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end);
3278  }
3280  int i;
3281  for (i = 0; i < num; i++) {
3282  if (backs[i] > env->num_mem ||
3283  IS_NULL(SCANENV_MEM_NODES(env)[backs[i]]))
3284  return ONIGERR_INVALID_BACKREF;
3285  }
3286  }
3287 
3288  tok->type = TK_BACKREF;
3289  tok->u.backref.by_name = 1;
3290  if (num == 1 || IS_SYNTAX_BV(syn, ONIG_SYN_USE_LEFT_MOST_NAMED_GROUP)) {
3291  tok->u.backref.num = 1;
3292  tok->u.backref.ref1 = backs[0];
3293  }
3294  else {
3295  tok->u.backref.num = num;
3296  tok->u.backref.refs = backs;
3297  }
3298  }
3299  *src = p;
3300  return 0;
3301 }
3302 #endif
3303 
3304 static int
3305 fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
3306 {
3307  int r, num;
3308  OnigCodePoint c;
3309  OnigEncoding enc = env->enc;
3310  const OnigSyntaxType* syn = env->syntax;
3311  UChar* prev;
3312  UChar* p = *src;
3313  PFETCH_READY;
3314 
3315  start:
3316  if (PEND) {
3317  tok->type = TK_EOT;
3318  return tok->type;
3319  }
3320 
3321  tok->type = TK_STRING;
3322  tok->base = 0;
3323  tok->backp = p;
3324 
3325  PFETCH(c);
3326  if (IS_MC_ESC_CODE(c, syn)) {
3327  if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
3328 
3329  tok->backp = p;
3330  PFETCH(c);
3331 
3332  tok->u.c = c;
3333  tok->escaped = 1;
3334  switch (c) {
3335  case '*':
3337  tok->type = TK_OP_REPEAT;
3338  tok->u.repeat.lower = 0;
3339  tok->u.repeat.upper = REPEAT_INFINITE;
3340  goto greedy_check;
3341  break;
3342 
3343  case '+':
3344  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_PLUS_ONE_INF)) break;
3345  tok->type = TK_OP_REPEAT;
3346  tok->u.repeat.lower = 1;
3347  tok->u.repeat.upper = REPEAT_INFINITE;
3348  goto greedy_check;
3349  break;
3350 
3351  case '?':
3352  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_QMARK_ZERO_ONE)) break;
3353  tok->type = TK_OP_REPEAT;
3354  tok->u.repeat.lower = 0;
3355  tok->u.repeat.upper = 1;
3356  greedy_check:
3357  if (!PEND && PPEEK_IS('?') &&
3359  PFETCH(c);
3360  tok->u.repeat.greedy = 0;
3361  tok->u.repeat.possessive = 0;
3362  }
3363  else {
3364  possessive_check:
3365  if (!PEND && PPEEK_IS('+') &&
3367  tok->type != TK_INTERVAL) ||
3369  tok->type == TK_INTERVAL))) {
3370  PFETCH(c);
3371  tok->u.repeat.greedy = 1;
3372  tok->u.repeat.possessive = 1;
3373  }
3374  else {
3375  tok->u.repeat.greedy = 1;
3376  tok->u.repeat.possessive = 0;
3377  }
3378  }
3379  break;
3380 
3381  case '{':
3382  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) break;
3383  r = fetch_range_quantifier(&p, end, tok, env);
3384  if (r < 0) return r; /* error */
3385  if (r == 0) goto greedy_check;
3386  else if (r == 2) { /* {n} */
3388  goto possessive_check;
3389 
3390  goto greedy_check;
3391  }
3392  /* r == 1 : normal char */
3393  break;
3394 
3395  case '|':
3396  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_VBAR_ALT)) break;
3397  tok->type = TK_ALT;
3398  break;
3399 
3400  case '(':
3401  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;
3402  tok->type = TK_SUBEXP_OPEN;
3403  break;
3404 
3405  case ')':
3406  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;
3407  tok->type = TK_SUBEXP_CLOSE;
3408  break;
3409 
3410  case 'w':
3411  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
3412  tok->type = TK_CHAR_TYPE;
3413  tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
3414  tok->u.prop.not = 0;
3415  break;
3416 
3417  case 'W':
3418  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
3419  tok->type = TK_CHAR_TYPE;
3420  tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
3421  tok->u.prop.not = 1;
3422  break;
3423 
3424  case 'b':
3425  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;
3426  tok->type = TK_ANCHOR;
3427  tok->u.anchor.subtype = ANCHOR_WORD_BOUND;
3428  tok->u.anchor.ascii_range = IS_ASCII_RANGE(env->option)
3429  && ! IS_WORD_BOUND_ALL_RANGE(env->option);
3430  break;
3431 
3432  case 'B':
3433  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;
3434  tok->type = TK_ANCHOR;
3435  tok->u.anchor.subtype = ANCHOR_NOT_WORD_BOUND;
3436  tok->u.anchor.ascii_range = IS_ASCII_RANGE(env->option)
3437  && ! IS_WORD_BOUND_ALL_RANGE(env->option);
3438  break;
3439 
3440 #ifdef USE_WORD_BEGIN_END
3441  case '<':
3443  tok->type = TK_ANCHOR;
3444  tok->u.anchor.subtype = ANCHOR_WORD_BEGIN;
3445  tok->u.anchor.ascii_range = IS_ASCII_RANGE(env->option);
3446  break;
3447 
3448  case '>':
3450  tok->type = TK_ANCHOR;
3451  tok->u.anchor.subtype = ANCHOR_WORD_END;
3452  tok->u.anchor.ascii_range = IS_ASCII_RANGE(env->option);
3453  break;
3454 #endif
3455 
3456  case 's':
3457  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;
3458  tok->type = TK_CHAR_TYPE;
3459  tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
3460  tok->u.prop.not = 0;
3461  break;
3462 
3463  case 'S':
3464  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;
3465  tok->type = TK_CHAR_TYPE;
3466  tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
3467  tok->u.prop.not = 1;
3468  break;
3469 
3470  case 'd':
3471  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
3472  tok->type = TK_CHAR_TYPE;
3473  tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
3474  tok->u.prop.not = 0;
3475  break;
3476 
3477  case 'D':
3478  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
3479  tok->type = TK_CHAR_TYPE;
3480  tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
3481  tok->u.prop.not = 1;
3482  break;
3483 
3484  case 'h':
3485  if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
3486  tok->type = TK_CHAR_TYPE;
3487  tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
3488  tok->u.prop.not = 0;
3489  break;
3490 
3491  case 'H':
3492  if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
3493  tok->type = TK_CHAR_TYPE;
3494  tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
3495  tok->u.prop.not = 1;
3496  break;
3497 
3498  case 'A':
3499  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
3500  begin_buf:
3501  tok->type = TK_ANCHOR;
3502  tok->u.anchor.subtype = ANCHOR_BEGIN_BUF;
3503  break;
3504 
3505  case 'Z':
3506  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
3507  tok->type = TK_ANCHOR;
3508  tok->u.anchor.subtype = ANCHOR_SEMI_END_BUF;
3509  break;
3510 
3511  case 'z':
3512  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
3513  end_buf:
3514  tok->type = TK_ANCHOR;
3515  tok->u.anchor.subtype = ANCHOR_END_BUF;
3516  break;
3517 
3518  case 'G':
3520  tok->type = TK_ANCHOR;
3521  tok->u.anchor.subtype = ANCHOR_BEGIN_POSITION;
3522  break;
3523 
3524  case '`':
3526  goto begin_buf;
3527  break;
3528 
3529  case '\'':
3531  goto end_buf;
3532  break;
3533 
3534  case 'x':
3535  if (PEND) break;
3536 
3537  prev = p;
3539  PINC;
3540  num = scan_unsigned_hexadecimal_number(&p, end, 0, 8, enc);
3541  if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
3542  if (!PEND) {
3543  if (ONIGENC_IS_CODE_XDIGIT(enc, PPEEK))
3545  }
3546 
3547  if ((p > prev + enclen(enc, prev, end)) && !PEND && PPEEK_IS('}')) {
3548  PINC;
3549  tok->type = TK_CODE_POINT;
3550  tok->u.code = (OnigCodePoint )num;
3551  }
3552  else {
3553  /* can't read nothing or invalid format */
3554  p = prev;
3555  }
3556  }
3557  else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {
3558  num = scan_unsigned_hexadecimal_number(&p, end, 0, 2, enc);
3559  if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
3560  if (p == prev) { /* can't read nothing. */
3561  num = 0; /* but, it's not error */
3562  }
3563  tok->type = TK_RAW_BYTE;
3564  tok->base = 16;
3565  tok->u.c = num;
3566  }
3567  break;
3568 
3569  case 'u':
3570  if (PEND) break;
3571 
3572  prev = p;
3574  num = scan_unsigned_hexadecimal_number(&p, end, 4, 4, enc);
3575  if (num < -1) return ONIGERR_TOO_SHORT_DIGITS;
3576  else if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
3577  if (p == prev) { /* can't read nothing. */
3578  num = 0; /* but, it's not error */
3579  }
3580  tok->type = TK_CODE_POINT;
3581  tok->base = 16;
3582  tok->u.code = (OnigCodePoint )num;
3583  }
3584  break;
3585 
3586  case 'o':
3587  if (PEND) break;
3588 
3589  prev = p;
3591  PINC;
3592  num = scan_unsigned_octal_number(&p, end, 11, enc);
3593  if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
3594  if (!PEND) {
3595  OnigCodePoint c = PPEEK;
3596  if (ONIGENC_IS_CODE_DIGIT(enc, c) && c < '8')
3598  }
3599 
3600  if ((p > prev + enclen(enc, prev, end)) && !PEND && PPEEK_IS('}')) {
3601  PINC;
3602  tok->type = TK_CODE_POINT;
3603  tok->u.code = (OnigCodePoint )num;
3604  }
3605  else {
3606  /* can't read nothing or invalid format */
3607  p = prev;
3608  }
3609  }
3610  break;
3611 
3612  case '1': case '2': case '3': case '4':
3613  case '5': case '6': case '7': case '8': case '9':
3614  PUNFETCH;
3615  prev = p;
3616  num = onig_scan_unsigned_number(&p, end, enc);
3617  if (num < 0 || num > ONIG_MAX_BACKREF_NUM) {
3618  goto skip_backref;
3619  }
3620 
3622  (num <= env->num_mem || num <= 9)) { /* This spec. from GNU regex */
3624  if (num > env->num_mem || IS_NULL(SCANENV_MEM_NODES(env)[num]))
3625  return ONIGERR_INVALID_BACKREF;
3626  }
3627 
3628  tok->type = TK_BACKREF;
3629  tok->u.backref.num = 1;
3630  tok->u.backref.ref1 = num;
3631  tok->u.backref.by_name = 0;
3632 #ifdef USE_BACKREF_WITH_LEVEL
3633  tok->u.backref.exist_level = 0;
3634 #endif
3635  break;
3636  }
3637 
3638  skip_backref:
3639  if (c == '8' || c == '9') {
3640  /* normal char */
3641  p = prev; PINC;
3642  break;
3643  }
3644 
3645  p = prev;
3646  /* fall through */
3647  case '0':
3649  prev = p;
3650  num = scan_unsigned_octal_number(&p, end, (c == '0' ? 2:3), enc);
3651  if (num < 0 || 0xff < num) return ONIGERR_TOO_BIG_NUMBER;
3652  if (p == prev) { /* can't read nothing. */
3653  num = 0; /* but, it's not error */
3654  }
3655  tok->type = TK_RAW_BYTE;
3656  tok->base = 8;
3657  tok->u.c = num;
3658  }
3659  else if (c != '0') {
3660  PINC;
3661  }
3662  break;
3663 
3664 #ifdef USE_NAMED_GROUP
3665  case 'k':
3667  PFETCH(c);
3668  if (c == '<' || c == '\'') {
3669  r = fetch_named_backref_token(c, tok, &p, end, env);
3670  if (r < 0) return r;
3671  }
3672  else {
3673  PUNFETCH;
3674  onig_syntax_warn(env, "invalid back reference");
3675  }
3676  }
3677  break;
3678 #endif
3679 
3680 #if defined(USE_SUBEXP_CALL) || defined(USE_NAMED_GROUP)
3681  case 'g':
3682 # ifdef USE_NAMED_GROUP
3684  PFETCH(c);
3685  if (c == '{') {
3686  r = fetch_named_backref_token(c, tok, &p, end, env);
3687  if (r < 0) return r;
3688  }
3689  else
3690  PUNFETCH;
3691  }
3692 # endif
3693 # ifdef USE_SUBEXP_CALL
3695  PFETCH(c);
3696  if (c == '<' || c == '\'') {
3697  int gnum = -1, rel = 0;
3698  UChar* name_end;
3699  OnigCodePoint cnext;
3700 
3701  cnext = PPEEK;
3702  if (cnext == '0') {
3703  PINC;
3704  if (PPEEK_IS(get_name_end_code_point(c))) { /* \g<0>, \g'0' */
3705  PINC;
3706  name_end = p;
3707  gnum = 0;
3708  }
3709  }
3710  else if (cnext == '+') {
3711  PINC;
3712  rel = 1;
3713  }
3714  prev = p;
3715  if (gnum < 0) {
3716  r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &gnum, 1);
3717  if (r < 0) return r;
3718  }
3719 
3720  tok->type = TK_CALL;
3721  tok->u.call.name = prev;
3722  tok->u.call.name_end = name_end;
3723  tok->u.call.gnum = gnum;
3724  tok->u.call.rel = rel;
3725  }
3726  else {
3727  onig_syntax_warn(env, "invalid subexp call");
3728  PUNFETCH;
3729  }
3730  }
3731 # endif
3732  break;
3733 #endif
3734 
3735  case 'Q':
3737  tok->type = TK_QUOTE_OPEN;
3738  }
3739  break;
3740 
3741  case 'p':
3742  case 'P':
3743  if (PPEEK_IS('{') &&
3745  PINC;
3746  tok->type = TK_CHAR_PROPERTY;
3747  tok->u.prop.not = (c == 'P' ? 1 : 0);
3748 
3750  PFETCH(c);
3751  if (c == '^') {
3752  tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);
3753  }
3754  else
3755  PUNFETCH;
3756  }
3757  }
3758  else {
3759  onig_syntax_warn(env, "invalid Unicode Property \\%c", c);
3760  }
3761  break;
3762 
3763  case 'R':
3765  tok->type = TK_LINEBREAK;
3766  }
3767  break;
3768 
3769  case 'X':
3772  }
3773  break;
3774 
3775  case 'K':
3777  tok->type = TK_KEEP;
3778  }
3779  break;
3780 
3781  default:
3782  {
3783  OnigCodePoint c2;
3784 
3785  PUNFETCH;
3786  num = fetch_escaped_value(&p, end, env, &c2);
3787  if (num < 0) return num;
3788  /* set_raw: */
3789  if ((OnigCodePoint )tok->u.c != c2) {
3790  tok->type = TK_CODE_POINT;
3791  tok->u.code = (OnigCodePoint )c2;
3792  }
3793  else { /* string */
3794  p = tok->backp + enclen(enc, tok->backp, end);
3795  }
3796  }
3797  break;
3798  }
3799  }
3800  else {
3801  tok->u.c = c;
3802  tok->escaped = 0;
3803 
3804 #ifdef USE_VARIABLE_META_CHARS
3805  if ((c != ONIG_INEFFECTIVE_META_CHAR) &&
3807  if (c == MC_ANYCHAR(syn))
3808  goto any_char;
3809  else if (c == MC_ANYTIME(syn))
3810  goto anytime;
3811  else if (c == MC_ZERO_OR_ONE_TIME(syn))
3812  goto zero_or_one_time;
3813  else if (c == MC_ONE_OR_MORE_TIME(syn))
3814  goto one_or_more_time;
3815  else if (c == MC_ANYCHAR_ANYTIME(syn)) {
3816  tok->type = TK_ANYCHAR_ANYTIME;
3817  goto out;
3818  }
3819  }
3820 #endif
3821 
3822  switch (c) {
3823  case '.':
3824  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_DOT_ANYCHAR)) break;
3825 #ifdef USE_VARIABLE_META_CHARS
3826  any_char:
3827 #endif
3828  tok->type = TK_ANYCHAR;
3829  break;
3830 
3831  case '*':
3832  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ASTERISK_ZERO_INF)) break;
3833 #ifdef USE_VARIABLE_META_CHARS
3834  anytime:
3835 #endif
3836  tok->type = TK_OP_REPEAT;
3837  tok->u.repeat.lower = 0;
3838  tok->u.repeat.upper = REPEAT_INFINITE;
3839  goto greedy_check;
3840  break;
3841 
3842  case '+':
3843  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_PLUS_ONE_INF)) break;
3844 #ifdef USE_VARIABLE_META_CHARS
3845  one_or_more_time:
3846 #endif
3847  tok->type = TK_OP_REPEAT;
3848  tok->u.repeat.lower = 1;
3849  tok->u.repeat.upper = REPEAT_INFINITE;
3850  goto greedy_check;
3851  break;
3852 
3853  case '?':
3854  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_ZERO_ONE)) break;
3855 #ifdef USE_VARIABLE_META_CHARS
3856  zero_or_one_time:
3857 #endif
3858  tok->type = TK_OP_REPEAT;
3859  tok->u.repeat.lower = 0;
3860  tok->u.repeat.upper = 1;
3861  goto greedy_check;
3862  break;
3863 
3864  case '{':
3865  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACE_INTERVAL)) break;
3866  r = fetch_range_quantifier(&p, end, tok, env);
3867  if (r < 0) return r; /* error */
3868  if (r == 0) goto greedy_check;
3869  else if (r == 2) { /* {n} */
3871  goto possessive_check;
3872 
3873  goto greedy_check;
3874  }
3875  /* r == 1 : normal char */
3876  break;
3877 
3878  case '|':
3879  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_VBAR_ALT)) break;
3880  tok->type = TK_ALT;
3881  break;
3882 
3883  case '(':
3884  if (PPEEK_IS('?') &&
3886  PINC;
3887  if (PPEEK_IS('#')) {
3888  PFETCH(c);
3889  while (1) {
3890  if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
3891  PFETCH(c);
3892  if (c == MC_ESC(syn)) {
3893  if (!PEND) PFETCH(c);
3894  }
3895  else {
3896  if (c == ')') break;
3897  }
3898  }
3899  goto start;
3900  }
3901 #ifdef USE_PERL_SUBEXP_CALL
3902  /* (?&name), (?n), (?R), (?0), (?+n), (?-n) */
3903  c = PPEEK;
3904  if ((c == '&' || c == 'R' || ONIGENC_IS_CODE_DIGIT(enc, c)) &&
3906  /* (?&name), (?n), (?R), (?0) */
3907  int gnum;
3908  UChar *name;
3909  UChar *name_end;
3910 
3911  if (c == 'R' || c == '0') {
3912  PINC; /* skip 'R' / '0' */
3913  if (!PPEEK_IS(')')) return ONIGERR_INVALID_GROUP_NAME;
3914  PINC; /* skip ')' */
3915  name_end = name = p;
3916  gnum = 0;
3917  }
3918  else {
3919  int numref = 1;
3920  if (c == '&') { /* (?&name) */
3921  PINC;
3922  numref = 0; /* don't allow number name */
3923  }
3924  name = p;
3925  r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env, &gnum, numref);
3926  if (r < 0) return r;
3927  }
3928 
3929  tok->type = TK_CALL;
3930  tok->u.call.name = name;
3931  tok->u.call.name_end = name_end;
3932  tok->u.call.gnum = gnum;
3933  tok->u.call.rel = 0;
3934  break;
3935  }
3936  else if ((c == '-' || c == '+') &&
3938  /* (?+n), (?-n) */
3939  int gnum;
3940  UChar *name;
3941  UChar *name_end;
3942  OnigCodePoint cnext;
3943  PFETCH_READY;
3944 
3945  PINC; /* skip '-' / '+' */
3946  cnext = PPEEK;
3947  if (ONIGENC_IS_CODE_DIGIT(enc, cnext)) {
3948  if (c == '-') PUNFETCH;
3949  name = p;
3950  r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env, &gnum, 1);
3951  if (r < 0) return r;
3952 
3953  tok->type = TK_CALL;
3954  tok->u.call.name = name;
3955  tok->u.call.name_end = name_end;
3956  tok->u.call.gnum = gnum;
3957  tok->u.call.rel = 1;
3958  break;
3959  }
3960  }
3961 #endif /* USE_PERL_SUBEXP_CALL */
3962 #ifdef USE_CAPITAL_P_NAMED_GROUP
3963  if (PPEEK_IS('P') &&
3965  int gnum;
3966  UChar *name;
3967  UChar *name_end;
3968  PFETCH_READY;
3969 
3970  PINC; /* skip 'P' */
3972  PFETCH(c);
3973  if (c == '=') { /* (?P=name): backref */
3974  r = fetch_named_backref_token((OnigCodePoint )'(', tok, &p, end, env);
3975  if (r < 0) return r;
3976  break;
3977  }
3978  else if (c == '>') { /* (?P>name): subexp call */
3979  name = p;
3980  r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env, &gnum, 0);
3981  if (r < 0) return r;
3982 
3983  tok->type = TK_CALL;
3984  tok->u.call.name = name;
3985  tok->u.call.name_end = name_end;
3986  tok->u.call.gnum = gnum;
3987  tok->u.call.rel = 0;
3988  break;
3989  }
3990  }
3991 #endif /* USE_CAPITAL_P_NAMED_GROUP */
3992  PUNFETCH;
3993  }
3994 
3995  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;
3996  tok->type = TK_SUBEXP_OPEN;
3997  break;
3998 
3999  case ')':
4000  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;
4001  tok->type = TK_SUBEXP_CLOSE;
4002  break;
4003 
4004  case '^':
4005  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
4006  tok->type = TK_ANCHOR;
4007  tok->u.anchor.subtype = (IS_SINGLELINE(env->option)
4009  break;
4010 
4011  case '$':
4012  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
4013  tok->type = TK_ANCHOR;
4014  tok->u.anchor.subtype = (IS_SINGLELINE(env->option)
4016  break;
4017 
4018  case '[':
4019  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACKET_CC)) break;
4020  tok->type = TK_CC_OPEN;
4021  break;
4022 
4023  case ']':
4024  if (*src > env->pattern) /* /].../ is allowed. */
4025  CLOSE_BRACKET_WITHOUT_ESC_WARN(env, (UChar* )"]");
4026  break;
4027 
4028  case '#':
4029  if (IS_EXTEND(env->option)) {
4030  while (!PEND) {
4031  PFETCH(c);
4032  if (ONIGENC_IS_CODE_NEWLINE(enc, c))
4033  break;
4034  }
4035  goto start;
4036  break;
4037  }
4038  break;
4039 
4040  case ' ': case '\t': case '\n': case '\r': case '\f':
4041  if (IS_EXTEND(env->option))
4042  goto start;
4043  break;
4044 
4045  default:
4046  /* string */
4047  break;
4048  }
4049  }
4050 
4051 #ifdef USE_VARIABLE_META_CHARS
4052  out:
4053 #endif
4054  *src = p;
4055  return tok->type;
4056 }
4057 
4058 static int
4059 add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not,
4060  ScanEnv* env,
4061  OnigCodePoint sb_out, const OnigCodePoint mbr[])
4062 {
4063  int i, r;
4064  OnigCodePoint j;
4065 
4066  int n = ONIGENC_CODE_RANGE_NUM(mbr);
4067 
4068  if (not == 0) {
4069  for (i = 0; i < n; i++) {
4070  for (j = ONIGENC_CODE_RANGE_FROM(mbr, i);
4071  j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) {
4072  if (j >= sb_out) {
4073  if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) {
4074  r = add_code_range_to_buf(&(cc->mbuf), env, j,
4075  ONIGENC_CODE_RANGE_TO(mbr, i));
4076  if (r != 0) return r;
4077  i++;
4078  }
4079 
4080  goto sb_end;
4081  }
4082  BITSET_SET_BIT_CHKDUP(cc->bs, j);
4083  }
4084  }
4085 
4086  sb_end:
4087  for ( ; i < n; i++) {
4088  r = add_code_range_to_buf(&(cc->mbuf), env,
4089  ONIGENC_CODE_RANGE_FROM(mbr, i),
4090  ONIGENC_CODE_RANGE_TO(mbr, i));
4091  if (r != 0) return r;
4092  }
4093  }
4094  else {
4095  OnigCodePoint prev = 0;
4096 
4097  for (i = 0; i < n; i++) {
4098  for (j = prev;
4099  j < ONIGENC_CODE_RANGE_FROM(mbr, i); j++) {
4100  if (j >= sb_out) {
4101  goto sb_end2;
4102  }
4103  BITSET_SET_BIT_CHKDUP(cc->bs, j);
4104  }
4105  prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
4106  }
4107  for (j = prev; j < sb_out; j++) {
4108  BITSET_SET_BIT_CHKDUP(cc->bs, j);
4109  }
4110 
4111  sb_end2:
4112  prev = sb_out;
4113 
4114  for (i = 0; i < n; i++) {
4115  if (prev < ONIGENC_CODE_RANGE_FROM(mbr, i)) {
4116  r = add_code_range_to_buf(&(cc->mbuf), env, prev,
4117  ONIGENC_CODE_RANGE_FROM(mbr, i) - 1);
4118  if (r != 0) return r;
4119  }
4120  prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
4121  }
4122  if (prev < 0x7fffffff) {
4123  r = add_code_range_to_buf(&(cc->mbuf), env, prev, 0x7fffffff);
4124  if (r != 0) return r;
4125  }
4126  }
4127 
4128  return 0;
4129 }
4130 
4131 static int
4132 add_ctype_to_cc(CClassNode* cc, int ctype, int not, int ascii_range, ScanEnv* env)
4133 {
4134  int maxcode;
4135  int c, r;
4136  const OnigCodePoint *ranges;
4137  OnigCodePoint sb_out;
4138  OnigEncoding enc = env->enc;
4139 
4140  r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges);
4141  if (r == 0) {
4142  if (ascii_range) {
4143  CClassNode ccwork;
4144  initialize_cclass(&ccwork);
4145  r = add_ctype_to_cc_by_range(&ccwork, ctype, not, env, sb_out,
4146  ranges);
4147  if (r == 0) {
4148  if (not) {
4149  r = add_code_range_to_buf0(&(ccwork.mbuf), env, 0x80, ONIG_LAST_CODE_POINT, FALSE);
4150  }
4151  else {
4152  CClassNode ccascii;
4153  initialize_cclass(&ccascii);
4154  if (ONIGENC_MBC_MINLEN(env->enc) > 1) {
4155  r = add_code_range(&(ccascii.mbuf), env, 0x00, 0x7F);
4156  }
4157  else {
4158  bitset_set_range(env, ccascii.bs, 0x00, 0x7F);
4159  r = 0;
4160  }
4161  if (r == 0) {
4162  r = and_cclass(&ccwork, &ccascii, env);
4163  }
4164  if (IS_NOT_NULL(ccascii.mbuf)) bbuf_free(ccascii.mbuf);
4165  }
4166  if (r == 0) {
4167  r = or_cclass(cc, &ccwork, env);
4168  }
4169  if (IS_NOT_NULL(ccwork.mbuf)) bbuf_free(ccwork.mbuf);
4170  }
4171  }
4172  else {
4173  r = add_ctype_to_cc_by_range(cc, ctype, not, env, sb_out, ranges);
4174  }
4175  return r;
4176  }
4177  else if (r != ONIG_NO_SUPPORT_CONFIG) {
4178  return r;
4179  }
4180 
4181  maxcode = ascii_range ? 0x80 : SINGLE_BYTE_SIZE;
4182  r = 0;
4183  switch (ctype) {
4184  case ONIGENC_CTYPE_ALPHA:
4185  case ONIGENC_CTYPE_BLANK:
4186  case ONIGENC_CTYPE_CNTRL:
4187  case ONIGENC_CTYPE_DIGIT:
4188  case ONIGENC_CTYPE_LOWER:
4189  case ONIGENC_CTYPE_PUNCT:
4190  case ONIGENC_CTYPE_SPACE:
4191  case ONIGENC_CTYPE_UPPER:
4192  case ONIGENC_CTYPE_XDIGIT:
4193  case ONIGENC_CTYPE_ASCII:
4194  case ONIGENC_CTYPE_ALNUM:
4195  if (not != 0) {
4196  for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
4197  if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
4198  BITSET_SET_BIT_CHKDUP(cc->bs, c);
4199  }
4200  ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
4201  }
4202  else {
4203  for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
4204  if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
4205  BITSET_SET_BIT_CHKDUP(cc->bs, c);
4206  }
4207  }
4208  break;
4209 
4210  case ONIGENC_CTYPE_GRAPH:
4211  case ONIGENC_CTYPE_PRINT:
4212  if (not != 0) {
4213  for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
4214  if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)
4215  || c >= maxcode)
4216  BITSET_SET_BIT_CHKDUP(cc->bs, c);
4217  }
4218  if (ascii_range)
4219  ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
4220  }
4221  else {
4222  for (c = 0; c < maxcode; c++) {
4223  if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
4224  BITSET_SET_BIT_CHKDUP(cc->bs, c);
4225  }
4226  if (! ascii_range)
4227  ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
4228  }
4229  break;
4230 
4231  case ONIGENC_CTYPE_WORD:
4232  if (not == 0) {
4233  for (c = 0; c < maxcode; c++) {
4234  if (ONIGENC_IS_CODE_WORD(enc, c)) BITSET_SET_BIT_CHKDUP(cc->bs, c);
4235  }
4236  if (! ascii_range)
4237  ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
4238  }
4239  else {
4240  for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
4241  if ((ONIGENC_CODE_TO_MBCLEN(enc, c) > 0) /* check invalid code point */
4242  && (! ONIGENC_IS_CODE_WORD(enc, c) || c >= maxcode))
4243  BITSET_SET_BIT_CHKDUP(cc->bs, c);
4244  }
4245  if (ascii_range)
4246  ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
4247  }
4248  break;
4249 
4250  default:
4251  return ONIGERR_PARSER_BUG;
4252  break;
4253  }
4254 
4255  return r;
4256 }
4257 
4258 static int
4259 parse_posix_bracket(CClassNode* cc, CClassNode* asc_cc,
4260  UChar** src, UChar* end, ScanEnv* env)
4261 {
4262 #define POSIX_BRACKET_CHECK_LIMIT_LENGTH 20
4263 #define POSIX_BRACKET_NAME_MIN_LEN 4
4264 
4265  static const PosixBracketEntryType PBS[] = {
4280  };
4281 
4282  const PosixBracketEntryType *pb;
4283  int not, i, r;
4284  int ascii_range;
4285  OnigCodePoint c;
4286  OnigEncoding enc = env->enc;
4287  UChar *p = *src;
4288 
4289  if (PPEEK_IS('^')) {
4290  PINC_S;
4291  not = 1;
4292  }
4293  else
4294  not = 0;
4295 
4296  if (onigenc_strlen(enc, p, end) < POSIX_BRACKET_NAME_MIN_LEN + 3)
4297  goto not_posix_bracket;
4298 
4299  ascii_range = IS_ASCII_RANGE(env->option) &&
4300  ! IS_POSIX_BRACKET_ALL_RANGE(env->option);
4301  for (pb = PBS; pb < PBS + numberof(PBS); pb++) {
4302  if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) {
4303  p = (UChar* )onigenc_step(enc, p, end, pb->len);
4304  if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0)
4306 
4307  r = add_ctype_to_cc(cc, pb->ctype, not, ascii_range, env);
4308  if (r != 0) return r;
4309 
4310  if (IS_NOT_NULL(asc_cc)) {
4311  if (pb->ctype != ONIGENC_CTYPE_WORD &&
4312  pb->ctype != ONIGENC_CTYPE_ASCII &&
4313  !ascii_range)
4314  r = add_ctype_to_cc(asc_cc, pb->ctype, not, ascii_range, env);
4315  if (r != 0) return r;
4316  }
4317 
4318  PINC_S; PINC_S;
4319  *src = p;
4320  return 0;
4321  }
4322  }
4323 
4324  not_posix_bracket:
4325  c = 0;
4326  i = 0;
4327  while (!PEND && ((c = PPEEK) != ':') && c != ']') {
4328  PINC_S;
4329  if (++i > POSIX_BRACKET_CHECK_LIMIT_LENGTH) break;
4330  }
4331  if (c == ':' && ! PEND) {
4332  PINC_S;
4333  if (! PEND) {
4334  PFETCH_S(c);
4335  if (c == ']')
4337  }
4338  }
4339 
4340  return 1; /* 1: is not POSIX bracket, but no error. */
4341 }
4342 
4343 static int
4344 fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env)
4345 {
4346  int r;
4347  OnigCodePoint c;
4348  OnigEncoding enc = env->enc;
4349  UChar *prev, *start, *p = *src;
4350 
4351  r = 0;
4352  start = prev = p;
4353 
4354  while (!PEND) {
4355  prev = p;
4356  PFETCH_S(c);
4357  if (c == '}') {
4358  r = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, start, prev);
4359  if (r < 0) break;
4360 
4361  *src = p;
4362  return r;
4363  }
4364  else if (c == '(' || c == ')' || c == '{' || c == '|') {
4366  break;
4367  }
4368  }
4369 
4371  return r;
4372 }
4373 
4374 static int cclass_case_fold(Node** np, CClassNode* cc, CClassNode* asc_cc, ScanEnv* env);
4375 
4376 static int
4377 parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end,
4378  ScanEnv* env)
4379 {
4380  int r, ctype;
4381  CClassNode* cc;
4382 
4383  ctype = fetch_char_property_to_ctype(src, end, env);
4384  if (ctype < 0) return ctype;
4385 
4386  *np = node_new_cclass();
4388  cc = NCCLASS(*np);
4389  r = add_ctype_to_cc(cc, ctype, 0, 0, env);
4390  if (r != 0) return r;
4391  if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
4392 
4393  if (IS_IGNORECASE(env->option)) {
4394  if (ctype != ONIGENC_CTYPE_ASCII)
4395  r = cclass_case_fold(np, cc, cc, env);
4396  }
4397  return r;
4398 }
4399 
4400 
4401 enum CCSTATE {
4406 };
4407 
4412 };
4413 
4414 static int
4415 next_state_class(CClassNode* cc, CClassNode* asc_cc,
4416  OnigCodePoint* vs, enum CCVALTYPE* type,
4417  enum CCSTATE* state, ScanEnv* env)
4418 {
4419  int r;
4420 
4421  if (*state == CCS_RANGE)
4423 
4424  if (*state == CCS_VALUE && *type != CCV_CLASS) {
4425  if (*type == CCV_SB) {
4426  BITSET_SET_BIT_CHKDUP(cc->bs, (int )(*vs));
4427  if (IS_NOT_NULL(asc_cc))
4428  BITSET_SET_BIT(asc_cc->bs, (int )(*vs));
4429  }
4430  else if (*type == CCV_CODE_POINT) {
4431  r = add_code_range(&(cc->mbuf), env, *vs, *vs);
4432  if (r < 0) return r;
4433  if (IS_NOT_NULL(asc_cc)) {
4434  r = add_code_range0(&(asc_cc->mbuf), env, *vs, *vs, 0);
4435  if (r < 0) return r;
4436  }
4437  }
4438  }
4439 
4440  *state = CCS_VALUE;
4441  *type = CCV_CLASS;
4442  return 0;
4443 }
4444 
4445 static int
4446 next_state_val(CClassNode* cc, CClassNode* asc_cc,
4447  OnigCodePoint *from, OnigCodePoint to,
4448  int* from_israw, int to_israw,
4449  enum CCVALTYPE intype, enum CCVALTYPE* type,
4450  enum CCSTATE* state, ScanEnv* env)
4451 {
4452  int r;
4453 
4454  switch (*state) {
4455  case CCS_VALUE:
4456  if (*type == CCV_SB) {
4457  BITSET_SET_BIT_CHKDUP(cc->bs, (int )(*from));
4458  if (IS_NOT_NULL(asc_cc))
4459  BITSET_SET_BIT(asc_cc->bs, (int )(*from));
4460  }
4461  else if (*type == CCV_CODE_POINT) {
4462  r = add_code_range(&(cc->mbuf), env, *from, *from);
4463  if (r < 0) return r;
4464  if (IS_NOT_NULL(asc_cc)) {
4465  r = add_code_range0(&(asc_cc->mbuf), env, *from, *from, 0);
4466  if (r < 0) return r;
4467  }
4468  }
4469  break;
4470 
4471  case CCS_RANGE:
4472  if (intype == *type) {
4473  if (intype == CCV_SB) {
4474  if (*from > 0xff || to > 0xff)
4476 
4477  if (*from > to) {
4479  goto ccs_range_end;
4480  else
4482  }
4483  bitset_set_range(env, cc->bs, (int )*from, (int )to);
4484  if (IS_NOT_NULL(asc_cc))
4485  bitset_set_range(env, asc_cc->bs, (int )*from, (int )to);
4486  }
4487  else {
4488  r = add_code_range(&(cc->mbuf), env, *from, to);
4489  if (r < 0) return r;
4490  if (IS_NOT_NULL(asc_cc)) {
4491  r = add_code_range0(&(asc_cc->mbuf), env, *from, to, 0);
4492  if (r < 0) return r;
4493  }
4494  }
4495  }
4496  else {
4497  if (*from > to) {
4499  goto ccs_range_end;
4500  else
4502  }
4503  bitset_set_range(env, cc->bs, (int )*from, (int )(to < 0xff ? to : 0xff));
4504  r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*from, to);
4505  if (r < 0) return r;
4506  if (IS_NOT_NULL(asc_cc)) {
4507  bitset_set_range(env, asc_cc->bs, (int )*from, (int )(to < 0xff ? to : 0xff));
4508  r = add_code_range0(&(asc_cc->mbuf), env, (OnigCodePoint )*from, to, 0);
4509  if (r < 0) return r;
4510  }
4511  }
4512  ccs_range_end:
4513  *state = CCS_COMPLETE;
4514  break;
4515 
4516  case CCS_COMPLETE:
4517  case CCS_START:
4518  *state = CCS_VALUE;
4519  break;
4520 
4521  default:
4522  break;
4523  }
4524 
4525  *from_israw = to_israw;
4526  *from = to;
4527  *type = intype;
4528  return 0;
4529 }
4530 
4531 static int
4532 code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped,
4533  ScanEnv* env)
4534 {
4535  int in_esc;
4536  OnigCodePoint code;
4537  OnigEncoding enc = env->enc;
4538  UChar* p = from;
4539 
4540  in_esc = 0;
4541  while (! PEND) {
4542  if (ignore_escaped && in_esc) {
4543  in_esc = 0;
4544  }
4545  else {
4546  PFETCH_S(code);
4547  if (code == c) return 1;
4548  if (code == MC_ESC(env->syntax)) in_esc = 1;
4549  }
4550  }
4551  return 0;
4552 }
4553 
4554 static int
4555 parse_char_class(Node** np, Node** asc_np, OnigToken* tok, UChar** src, UChar* end,
4556  ScanEnv* env)
4557 {
4558  int r, neg, len, fetched, and_start;
4559  OnigCodePoint v, vs;
4560  UChar *p;
4561  Node* node;
4562  Node* asc_node;
4563  CClassNode *cc, *prev_cc;
4564  CClassNode *asc_cc, *asc_prev_cc;
4565  CClassNode work_cc, asc_work_cc;
4566 
4567  enum CCSTATE state;
4568  enum CCVALTYPE val_type, in_type;
4569  int val_israw, in_israw;
4570 
4571  *np = *asc_np = NULL_NODE;
4572  env->parse_depth++;
4573  if (env->parse_depth > ParseDepthLimit)
4575  prev_cc = asc_prev_cc = (CClassNode* )NULL;
4576  r = fetch_token_in_cc(tok, src, end, env);
4577  if (r == TK_CHAR && tok->u.c == '^' && tok->escaped == 0) {
4578  neg = 1;
4579  r = fetch_token_in_cc(tok, src, end, env);
4580  }
4581  else {
4582  neg = 0;
4583  }
4584 
4585  if (r < 0) return r;
4586  if (r == TK_CC_CLOSE) {
4587  if (! code_exist_check((OnigCodePoint )']',
4588  *src, env->pattern_end, 1, env))
4589  return ONIGERR_EMPTY_CHAR_CLASS;
4590 
4591  CC_ESC_WARN(env, (UChar* )"]");
4592  r = tok->type = TK_CHAR; /* allow []...] */
4593  }
4594 
4595  *np = node = node_new_cclass();
4597  cc = NCCLASS(node);
4598 
4599  if (IS_IGNORECASE(env->option)) {
4600  *asc_np = asc_node = node_new_cclass();
4601  CHECK_NULL_RETURN_MEMERR(asc_node);
4602  asc_cc = NCCLASS(asc_node);
4603  }
4604  else {
4605  asc_node = NULL_NODE;
4606  asc_cc = NULL;
4607  }
4608 
4609  and_start = 0;
4610  state = CCS_START;
4611  p = *src;
4612  while (r != TK_CC_CLOSE) {
4613  fetched = 0;
4614  switch (r) {
4615  case TK_CHAR:
4616  if ((tok->u.code >= SINGLE_BYTE_SIZE) ||
4617  (len = ONIGENC_CODE_TO_MBCLEN(env->enc, tok->u.c)) > 1) {
4618  in_type = CCV_CODE_POINT;
4619  }
4620  else if (len < 0) {
4621  r = len;
4622  goto err;
4623  }
4624  else {
4625  sb_char:
4626  in_type = CCV_SB;
4627  }
4628  v = (OnigCodePoint )tok->u.c;
4629  in_israw = 0;
4630  goto val_entry2;
4631  break;
4632 
4633  case TK_RAW_BYTE:
4634  /* tok->base != 0 : octal or hexadec. */
4635  if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base != 0) {
4638  UChar* psave = p;
4639  int i, base = tok->base;
4640 
4641  buf[0] = (UChar )tok->u.c;
4642  for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) {
4643  r = fetch_token_in_cc(tok, &p, end, env);
4644  if (r < 0) goto err;
4645  if (r != TK_RAW_BYTE || tok->base != base) {
4646  fetched = 1;
4647  break;
4648  }
4649  buf[i] = (UChar )tok->u.c;
4650  }
4651 
4652  if (i < ONIGENC_MBC_MINLEN(env->enc)) {
4654  goto err;
4655  }
4656 
4657  len = enclen(env->enc, buf, buf + i);
4658  if (i < len) {
4660  goto err;
4661  }
4662  else if (i > len) { /* fetch back */
4663  p = psave;
4664  for (i = 1; i < len; i++) {
4665  (void)fetch_token_in_cc(tok, &p, end, env);
4666  /* no need to check the return value (already checked above) */
4667  }
4668  fetched = 0;
4669  }
4670 
4671  if (i == 1) {
4672  v = (OnigCodePoint )buf[0];
4673  goto raw_single;
4674  }
4675  else {
4676  v = ONIGENC_MBC_TO_CODE(env->enc, buf, bufe);
4677  in_type = CCV_CODE_POINT;
4678  }
4679  }
4680  else {
4681  v = (OnigCodePoint )tok->u.c;
4682  raw_single:
4683  in_type = CCV_SB;
4684  }
4685  in_israw = 1;
4686  goto val_entry2;
4687  break;
4688 
4689  case TK_CODE_POINT:
4690  v = tok->u.code;
4691  in_israw = 1;
4692  val_entry:
4693  len = ONIGENC_CODE_TO_MBCLEN(env->enc, v);
4694  if (len < 0) {
4695  r = len;
4696  goto err;
4697  }
4698  in_type = (len == 1 ? CCV_SB : CCV_CODE_POINT);
4699  val_entry2:
4700  r = next_state_val(cc, asc_cc, &vs, v, &val_israw, in_israw, in_type, &val_type,
4701  &state, env);
4702  if (r != 0) goto err;
4703  break;
4704 
4705  case TK_POSIX_BRACKET_OPEN:
4706  r = parse_posix_bracket(cc, asc_cc, &p, end, env);
4707  if (r < 0) goto err;
4708  if (r == 1) { /* is not POSIX bracket */
4709  CC_ESC_WARN(env, (UChar* )"[");
4710  p = tok->backp;
4711  v = (OnigCodePoint )tok->u.c;
4712  in_israw = 0;
4713  goto val_entry;
4714  }
4715  goto next_class;
4716  break;
4717 
4718  case TK_CHAR_TYPE:
4719  r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.not,
4720  IS_ASCII_RANGE(env->option), env);
4721  if (r != 0) return r;
4722  if (IS_NOT_NULL(asc_cc)) {
4723  if (tok->u.prop.ctype != ONIGENC_CTYPE_WORD)
4724  r = add_ctype_to_cc(asc_cc, tok->u.prop.ctype, tok->u.prop.not,
4725  IS_ASCII_RANGE(env->option), env);
4726  if (r != 0) return r;
4727  }
4728 
4729  next_class:
4730  r = next_state_class(cc, asc_cc, &vs, &val_type, &state, env);
4731  if (r != 0) goto err;
4732  break;
4733 
4734  case TK_CHAR_PROPERTY:
4735  {
4736  int ctype;
4737 
4738  ctype = fetch_char_property_to_ctype(&p, end, env);
4739  if (ctype < 0) return ctype;
4740  r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, 0, env);
4741  if (r != 0) return r;
4742  if (IS_NOT_NULL(asc_cc)) {
4743  if (ctype != ONIGENC_CTYPE_ASCII)
4744  r = add_ctype_to_cc(asc_cc, ctype, tok->u.prop.not, 0, env);
4745  if (r != 0) return r;
4746  }
4747  goto next_class;
4748  }
4749  break;
4750 
4751  case TK_CC_RANGE:
4752  if (state == CCS_VALUE) {
4753  r = fetch_token_in_cc(tok, &p, end, env);
4754  if (r < 0) goto err;
4755  fetched = 1;
4756  if (r == TK_CC_CLOSE) { /* allow [x-] */
4757  range_end_val:
4758  v = (OnigCodePoint )'-';
4759  in_israw = 0;
4760  goto val_entry;
4761  }
4762  else if (r == TK_CC_AND) {
4763  CC_ESC_WARN(env, (UChar* )"-");
4764  goto range_end_val;
4765  }
4766 
4767  if (val_type == CCV_CLASS) {
4769  goto err;
4770  }
4771 
4772  state = CCS_RANGE;
4773  }
4774  else if (state == CCS_START) {
4775  /* [-xa] is allowed */
4776  v = (OnigCodePoint )tok->u.c;
4777  in_israw = 0;
4778 
4779  r = fetch_token_in_cc(tok, &p, end, env);
4780  if (r < 0) goto err;
4781  fetched = 1;
4782  /* [--x] or [a&&-x] is warned. */
4783  if (r == TK_CC_RANGE || and_start != 0)
4784  CC_ESC_WARN(env, (UChar* )"-");
4785 
4786  goto val_entry;
4787  }
4788  else if (state == CCS_RANGE) {
4789  CC_ESC_WARN(env, (UChar* )"-");
4790  goto sb_char; /* [!--x] is allowed */
4791  }
4792  else { /* CCS_COMPLETE */
4793  r = fetch_token_in_cc(tok, &p, end, env);
4794  if (r < 0) goto err;
4795  fetched = 1;
4796  if (r == TK_CC_CLOSE) goto range_end_val; /* allow [a-b-] */
4797  else if (r == TK_CC_AND) {
4798  CC_ESC_WARN(env, (UChar* )"-");
4799  goto range_end_val;
4800  }
4801 
4803  CC_ESC_WARN(env, (UChar* )"-");
4804  goto range_end_val; /* [0-9-a] is allowed as [0-9\-a] */
4805  }
4807  goto err;
4808  }
4809  break;
4810 
4811  case TK_CC_CC_OPEN: /* [ */
4812  {
4813  Node *anode, *aasc_node;
4814  CClassNode* acc;
4815 
4816  r = parse_char_class(&anode, &aasc_node, tok, &p, end, env);
4817  if (r == 0) {
4818  acc = NCCLASS(anode);
4819  r = or_cclass(cc, acc, env);
4820  }
4821  if (r == 0 && IS_NOT_NULL(aasc_node)) {
4822  acc = NCCLASS(aasc_node);
4823  r = or_cclass(asc_cc, acc, env);
4824  }
4825  onig_node_free(anode);
4826  onig_node_free(aasc_node);
4827  if (r != 0) goto err;
4828  }
4829  break;
4830 
4831  case TK_CC_AND: /* && */
4832  {
4833  if (state == CCS_VALUE) {
4834  r = next_state_val(cc, asc_cc, &vs, 0, &val_israw, 0, val_type,
4835  &val_type, &state, env);
4836  if (r != 0) goto err;
4837  }
4838  /* initialize local variables */
4839  and_start = 1;
4840  state = CCS_START;
4841 
4842  if (IS_NOT_NULL(prev_cc)) {
4843  r = and_cclass(prev_cc, cc, env);
4844  if (r != 0) goto err;
4845  bbuf_free(cc->mbuf);
4846  if (IS_NOT_NULL(asc_cc)) {
4847  r = and_cclass(asc_prev_cc, asc_cc, env);
4848  if (r != 0) goto err;
4849  bbuf_free(asc_cc->mbuf);
4850  }
4851  }
4852  else {
4853  prev_cc = cc;
4854  cc = &work_cc;
4855  if (IS_NOT_NULL(asc_cc)) {
4856  asc_prev_cc = asc_cc;
4857  asc_cc = &asc_work_cc;
4858  }
4859  }
4860  initialize_cclass(cc);
4861  if (IS_NOT_NULL(asc_cc))
4862  initialize_cclass(asc_cc);
4863  }
4864  break;
4865 
4866  case TK_EOT:
4868  goto err;
4869  break;
4870  default:
4871  r = ONIGERR_PARSER_BUG;
4872  goto err;
4873  break;
4874  }
4875 
4876  if (fetched)
4877  r = tok->type;
4878  else {
4879  r = fetch_token_in_cc(tok, &p, end, env);
4880  if (r < 0) goto err;
4881  }
4882  }
4883 
4884  if (state == CCS_VALUE) {
4885  r = next_state_val(cc, asc_cc, &vs, 0, &val_israw, 0, val_type,
4886  &val_type, &state, env);
4887  if (r != 0) goto err;
4888  }
4889 
4890  if (IS_NOT_NULL(prev_cc)) {
4891  r = and_cclass(prev_cc, cc, env);
4892  if (r != 0) goto err;
4893  bbuf_free(cc->mbuf);
4894  cc = prev_cc;
4895  if (IS_NOT_NULL(asc_cc)) {
4896  r = and_cclass(asc_prev_cc, asc_cc, env);
4897  if (r != 0) goto err;
4898  bbuf_free(asc_cc->mbuf);
4899  asc_cc = asc_prev_cc;
4900  }
4901  }
4902 
4903  if (neg != 0) {
4905  if (IS_NOT_NULL(asc_cc))
4906  NCCLASS_SET_NOT(asc_cc);
4907  }
4908  else {
4910  if (IS_NOT_NULL(asc_cc))
4911  NCCLASS_CLEAR_NOT(asc_cc);
4912  }
4913  if (IS_NCCLASS_NOT(cc) &&
4915  int is_empty;
4916 
4917  is_empty = (IS_NULL(cc->mbuf) ? 1 : 0);
4918  if (is_empty != 0)
4919  BITSET_IS_EMPTY(cc->bs, is_empty);
4920 
4921  if (is_empty == 0) {
4922 #define NEWLINE_CODE 0x0a
4923 
4925  if (ONIGENC_CODE_TO_MBCLEN(env->enc, NEWLINE_CODE) == 1)
4927  else {
4928  r = add_code_range(&(cc->mbuf), env, NEWLINE_CODE, NEWLINE_CODE);
4929  if (r < 0) goto err;
4930  }
4931  }
4932  }
4933  }
4934  *src = p;
4935  env->parse_depth--;
4936  return 0;
4937 
4938  err:
4939  if (cc != NCCLASS(*np))
4940  bbuf_free(cc->mbuf);
4941  if (IS_NOT_NULL(asc_cc) && (asc_cc != NCCLASS(*asc_np)))
4942  bbuf_free(asc_cc->mbuf);
4943  return r;
4944 }
4945 
4946 static int parse_subexp(Node** top, OnigToken* tok, int term,
4947  UChar** src, UChar* end, ScanEnv* env);
4948 
4949 static int
4950 parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
4951  ScanEnv* env)
4952 {
4953  int r = 0, num;
4954  Node *target, *work1 = NULL, *work2 = NULL;
4955  OnigOptionType option;
4956  OnigCodePoint c;
4957  OnigEncoding enc = env->enc;
4958 
4959 #ifdef USE_NAMED_GROUP
4960  int list_capture;
4961 #endif
4962 
4963  UChar* p = *src;
4964  PFETCH_READY;
4965 
4966  *np = NULL;
4968 
4969  option = env->option;
4970  if (PPEEK_IS('?') &&
4972  PINC;
4973  if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
4974 
4975  PFETCH(c);
4976  switch (c) {
4977  case ':': /* (?:...) grouping only */
4978  group:
4979  r = fetch_token(tok, &p, end, env);
4980  if (r < 0) return r;
4981  r = parse_subexp(np, tok, term, &p, end, env);
4982  if (r < 0) return r;
4983  *src = p;
4984  return 1; /* group */
4985  break;
4986 
4987  case '=':
4989  break;
4990  case '!': /* preceding read */
4992  break;
4993  case '>': /* (?>...) stop backtrack */
4994  *np = node_new_enclose(ENCLOSE_STOP_BACKTRACK);
4995  break;
4996  case '~': /* (?~...) absent operator */
4998  *np = node_new_enclose(ENCLOSE_ABSENT);
4999  }
5000  else {
5002  }
5003  break;
5004 
5005 #ifdef USE_NAMED_GROUP
5006  case '\'':
5008  goto named_group1;
5009  }
5010  else
5012  break;
5013 
5014 # ifdef USE_CAPITAL_P_NAMED_GROUP
5015  case 'P': /* (?P<name>...) */
5016  if (!PEND &&
5018  PFETCH(c);
5019  if (c == '<') goto named_group1;
5020  }
5022  break;
5023 # endif
5024 #endif
5025 
5026  case '<': /* look behind (?<=...), (?<!...) */
5028  PFETCH(c);
5029  if (c == '=')
5031  else if (c == '!')
5033 #ifdef USE_NAMED_GROUP
5034  else { /* (?<name>...) */
5036  UChar *name;
5037  UChar *name_end;
5038 
5039  PUNFETCH;
5040  c = '<';
5041 
5042  named_group1:
5043  list_capture = 0;
5044 
5045 # ifdef USE_CAPTURE_HISTORY
5046  named_group2:
5047 # endif
5048  name = p;
5049  r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num, 0);
5050  if (r < 0) return r;
5051 
5052  num = scan_env_add_mem_entry(env);
5053  if (num < 0) return num;
5054  if (list_capture != 0 && num >= (int )BIT_STATUS_BITS_NUM)
5056 
5057  r = name_add(env->reg, name, name_end, num, env);
5058  if (r != 0) return r;
5059  *np = node_new_enclose_memory(env->option, 1);
5061  NENCLOSE(*np)->regnum = num;
5062  if (list_capture != 0)
5063  BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num);
5064  env->num_named++;
5065  }
5066  else {
5068  }
5069  }
5070 #else
5071  else {
5073  }
5074 #endif
5075  break;
5076 
5077 #ifdef USE_CAPTURE_HISTORY
5078  case '@':
5080 # ifdef USE_NAMED_GROUP
5081  if (!PEND &&
5083  PFETCH(c);
5084  if (c == '<' || c == '\'') {
5085  list_capture = 1;
5086  goto named_group2; /* (?@<name>...) */
5087  }
5088  PUNFETCH;
5089  }
5090 # endif
5091  *np = node_new_enclose_memory(env->option, 0);
5093  num = scan_env_add_mem_entry(env);
5094  if (num < 0) return num;
5095  if (num >= (int )BIT_STATUS_BITS_NUM)
5097 
5098  NENCLOSE(*np)->regnum = num;
5099  BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num);
5100  }
5101  else {
5103  }
5104  break;
5105 #endif /* USE_CAPTURE_HISTORY */
5106 
5107  case '(': /* conditional expression: (?(cond)yes), (?(cond)yes|no) */
5108  if (!PEND &&
5110  UChar *name = NULL;
5111  UChar *name_end;
5112  PFETCH(c);
5113  if (ONIGENC_IS_CODE_DIGIT(enc, c)) { /* (n) */
5114  PUNFETCH;
5115  r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env, &num, 1);
5116  if (r < 0) return r;
5117 #if 0
5118  /* Relative number is not currently supported. (same as Perl) */
5119  if (num < 0) {
5120  num = BACKREF_REL_TO_ABS(num, env);
5121  if (num <= 0)
5122  return ONIGERR_INVALID_BACKREF;
5123  }
5124 #endif
5126  if (num > env->num_mem ||
5127  IS_NULL(SCANENV_MEM_NODES(env)[num]))
5128  return ONIGERR_INVALID_BACKREF;
5129  }
5130  }
5131 #ifdef USE_NAMED_GROUP
5132  else if (c == '<' || c == '\'') { /* (<name>), ('name') */
5133  name = p;
5134  r = fetch_named_backref_token(c, tok, &p, end, env);
5135  if (r < 0) return r;
5136  if (!PPEEK_IS(')')) return ONIGERR_UNDEFINED_GROUP_OPTION;
5137  PINC;
5138 
5140  num = tok->u.backref.ref1;
5141  }
5142  else {
5143  /* FIXME:
5144  * Use left most named group for now. This is the same as Perl.
5145  * However this should use the same strategy as normal back-
5146  * references on Ruby syntax; search right to left. */
5147  int len = tok->u.backref.num;
5148  num = len > 1 ? tok->u.backref.refs[0] : tok->u.backref.ref1;
5149  }
5150  }
5151 #endif
5152  else
5154  *np = node_new_enclose(ENCLOSE_CONDITION);
5156  NENCLOSE(*np)->regnum = num;
5157  if (IS_NOT_NULL(name)) NENCLOSE(*np)->state |= NST_NAME_REF;
5158  }
5159  else
5161  break;
5162 
5163 #if 0
5164  case '|': /* branch reset: (?|...) */
5166  /* TODO */
5167  }
5168  else
5170  break;
5171 #endif
5172 
5173  case '^': /* loads default options */
5174  if (!PEND && IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {
5175  /* d-imsx */
5176  ONOFF(option, ONIG_OPTION_ASCII_RANGE, 1);
5177  ONOFF(option, ONIG_OPTION_IGNORECASE, 1);
5178  ONOFF(option, ONIG_OPTION_SINGLELINE, 0);
5179  ONOFF(option, ONIG_OPTION_MULTILINE, 1);
5180  ONOFF(option, ONIG_OPTION_EXTEND, 1);
5181  PFETCH(c);
5182  }
5183 #if 0
5184  else if (!PEND && IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) {
5185  /* d-imx */
5186  ONOFF(option, ONIG_OPTION_ASCII_RANGE, 0);
5189  ONOFF(option, ONIG_OPTION_IGNORECASE, 1);
5190  ONOFF(option, ONIG_OPTION_MULTILINE, 1);
5191  ONOFF(option, ONIG_OPTION_EXTEND, 1);
5192  PFETCH(c);
5193  }
5194 #endif
5195  else {
5197  }
5198  /* fall through */
5199 #ifdef USE_POSIXLINE_OPTION
5200  case 'p':
5201 #endif
5202  case '-': case 'i': case 'm': case 's': case 'x':
5203  case 'a': case 'd': case 'l': case 'u':
5204  {
5205  int neg = 0;
5206 
5207  while (1) {
5208  switch (c) {
5209  case ':':
5210  case ')':
5211  break;
5212 
5213  case '-': neg = 1; break;
5214  case 'x': ONOFF(option, ONIG_OPTION_EXTEND, neg); break;
5215  case 'i': ONOFF(option, ONIG_OPTION_IGNORECASE, neg); break;
5216  case 's':
5217  if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {
5218  ONOFF(option, ONIG_OPTION_MULTILINE, neg);
5219  }
5220  else
5222  break;
5223 
5224  case 'm':
5225  if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {
5226  ONOFF(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? 1 : 0));
5227  }
5228  else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) {
5229  ONOFF(option, ONIG_OPTION_MULTILINE, neg);
5230  }
5231  else
5233  break;
5234 #ifdef USE_POSIXLINE_OPTION
5235  case 'p':
5237  break;
5238 #endif
5239 
5240  case 'a': /* limits \d, \s, \w and POSIX brackets to ASCII range */
5241  if ((IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL) ||
5243  (neg == 0)) {
5244  ONOFF(option, ONIG_OPTION_ASCII_RANGE, 0);
5247  }
5248  else
5250  break;
5251 
5252  case 'u':
5253  if ((IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL) ||
5255  (neg == 0)) {
5256  ONOFF(option, ONIG_OPTION_ASCII_RANGE, 1);
5259  }
5260  else
5262  break;
5263 
5264  case 'd':
5265  if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL) &&
5266  (neg == 0)) {
5267  ONOFF(option, ONIG_OPTION_ASCII_RANGE, 1);
5268  }
5269  else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY) &&
5270  (neg == 0)) {
5271  ONOFF(option, ONIG_OPTION_ASCII_RANGE, 0);
5274  }
5275  else
5277  break;
5278 
5279  case 'l':
5280  if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL) && (neg == 0)) {
5281  ONOFF(option, ONIG_OPTION_ASCII_RANGE, 1);
5282  }
5283  else
5285  break;
5286 
5287  default:
5289  }
5290 
5291  if (c == ')') {
5292  *np = node_new_option(option);
5294  *src = p;
5295  return 2; /* option only */
5296  }
5297  else if (c == ':') {
5298  OnigOptionType prev = env->option;
5299 
5300  env->option = option;
5301  r = fetch_token(tok, &p, end, env);
5302  if (r < 0) {
5303  env->option = prev;
5304  return r;
5305  }
5306  r = parse_subexp(&target, tok, term, &p, end, env);
5307  env->option = prev;
5308  if (r < 0) return r;
5309  *np = node_new_option(option);
5311  NENCLOSE(*np)->target = target;
5312  *src = p;
5313  return 0;
5314  }
5315 
5316  if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
5317  PFETCH(c);
5318  }
5319  }
5320  break;
5321 
5322  default:
5324  }
5325  }
5326  else {
5328  goto group;
5329 
5330  *np = node_new_enclose_memory(env->option, 0);
5332  num = scan_env_add_mem_entry(env);
5333  if (num < 0) return num;
5334  NENCLOSE(*np)->regnum = num;
5335  }
5336 
5338  r = fetch_token(tok, &p, end, env);
5339  if (r < 0) return r;
5340  r = parse_subexp(&target, tok, term, &p, end, env);
5341  if (r < 0) {
5342  onig_node_free(target);
5343  return r;
5344  }
5345 
5346  if (NTYPE(*np) == NT_ANCHOR)
5347  NANCHOR(*np)->target = target;
5348  else {
5349  NENCLOSE(*np)->target = target;
5350  if (NENCLOSE(*np)->type == ENCLOSE_MEMORY) {
5351  /* Don't move this to previous of parse_subexp() */
5352  r = scan_env_set_mem_node(env, NENCLOSE(*np)->regnum, *np);
5353  if (r != 0) return r;
5354  }
5355  else if (NENCLOSE(*np)->type == ENCLOSE_CONDITION) {
5356  if (NTYPE(target) != NT_ALT) {
5357  /* convert (?(cond)yes) to (?(cond)yes|empty) */
5358  work1 = node_new_empty();
5359  if (IS_NULL(work1)) goto err;
5360  work2 = onig_node_new_alt(work1, NULL_NODE);
5361  if (IS_NULL(work2)) goto err;
5362  work1 = onig_node_new_alt(target, work2);
5363  if (IS_NULL(work1)) goto err;
5364  NENCLOSE(*np)->target = work1;
5365  }
5366  }
5367  }
5368 
5369  *src = p;
5370  return 0;
5371 
5372  err:
5373  onig_node_free(work1);
5374  onig_node_free(work2);
5375  onig_node_free(*np);
5376  *np = NULL;
5377  return ONIGERR_MEMORY;
5378 }
5379 
5380 static const char* const PopularQStr[] = {
5381  "?", "*", "+", "??", "*?", "+?"
5382 };
5383 
5384 static const char* const ReduceQStr[] = {
5385  "", "", "*", "*?", "??", "+ and ??", "+? and ?"
5386 };
5387 
5388 static int
5389 set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)
5390 {
5391  QtfrNode* qn;
5392 
5393  qn = NQTFR(qnode);
5394  if (qn->lower == 1 && qn->upper == 1) {
5395  return 1;
5396  }
5397 
5398  switch (NTYPE(target)) {
5399  case NT_STR:
5400  if (! group) {
5401  StrNode* sn = NSTR(target);
5402  if (str_node_can_be_split(sn, env->enc)) {
5403  Node* n = str_node_split_last_char(sn, env->enc);
5404  if (IS_NOT_NULL(n)) {
5405  qn->target = n;
5406  return 2;
5407  }
5408  }
5409  }
5410  break;
5411 
5412  case NT_QTFR:
5413  { /* check redundant double repeat. */
5414  /* verbose warn (?:.?)? etc... but not warn (.?)? etc... */
5415  QtfrNode* qnt = NQTFR(target);
5416  int nestq_num = popular_quantifier_num(qn);
5417  int targetq_num = popular_quantifier_num(qnt);
5418 
5419 #ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
5420  if (nestq_num >= 0 && targetq_num >= 0 &&
5422  switch (ReduceTypeTable[targetq_num][nestq_num]) {
5423  case RQ_ASIS:
5424  break;
5425 
5426  case RQ_DEL:
5427  if (onig_warn != onig_null_warn) {
5428  onig_syntax_warn(env, "regular expression has redundant nested repeat operator '%s'",
5429  PopularQStr[targetq_num]);
5430  }
5431  goto warn_exit;
5432  break;
5433 
5434  default:
5435  if (onig_warn != onig_null_warn) {
5436  onig_syntax_warn(env, "nested repeat operator '%s' and '%s' was replaced with '%s' in regular expression",
5437  PopularQStr[targetq_num], PopularQStr[nestq_num],
5438  ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]);
5439  }
5440  goto warn_exit;
5441  break;
5442  }
5443  }
5444 
5445  warn_exit:
5446 #endif
5447  if (targetq_num >= 0) {
5448  if (nestq_num >= 0) {
5449  onig_reduce_nested_quantifier(qnode, target);
5450  goto q_exit;
5451  }
5452  else if (targetq_num == 1 || targetq_num == 2) { /* * or + */
5453  /* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */
5454  if (! IS_REPEAT_INFINITE(qn->upper) && qn->upper > 1 && qn->greedy) {
5455  qn->upper = (qn->lower == 0 ? 1 : qn->lower);
5456  }
5457  }
5458  }
5459  }
5460  break;
5461 
5462  default:
5463  break;
5464  }
5465 
5466  qn->target = target;
5467  q_exit:
5468  return 0;
5469 }
5470 
5471 
5472 #ifndef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
5473 static int
5474 clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc)
5475 {
5476  BBuf *tbuf;
5477  int r;
5478 
5479  if (IS_NCCLASS_NOT(cc)) {
5480  bitset_invert(cc->bs);
5481 
5482  if (! ONIGENC_IS_SINGLEBYTE(enc)) {
5483  r = not_code_range_buf(enc, cc->mbuf, &tbuf);
5484  if (r != 0) return r;
5485 
5486  bbuf_free(cc->mbuf);
5487  cc->mbuf = tbuf;
5488  }
5489 
5491  }
5492 
5493  return 0;
5494 }
5495 #endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */
5496 
5497 typedef struct {
5504 
5505 static int
5506 i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[],
5507  int to_len, void* arg)
5508 {
5509  IApplyCaseFoldArg* iarg;
5510  ScanEnv* env;
5511  CClassNode* cc;
5512  CClassNode* asc_cc;
5513  BitSetRef bs;
5514  int add_flag, r;
5515 
5516  iarg = (IApplyCaseFoldArg* )arg;
5517  env = iarg->env;
5518  cc = iarg->cc;
5519  asc_cc = iarg->asc_cc;
5520  bs = cc->bs;
5521 
5522  if (IS_NULL(asc_cc)) {
5523  add_flag = 0;
5524  }
5525  else if (ONIGENC_IS_ASCII_CODE(from) == ONIGENC_IS_ASCII_CODE(*to)) {
5526  add_flag = 1;
5527  }
5528  else {
5529  add_flag = onig_is_code_in_cc(env->enc, from, asc_cc);
5530  if (IS_NCCLASS_NOT(asc_cc))
5531  add_flag = !add_flag;
5532  }
5533 
5534  if (to_len == 1) {
5535  int is_in = onig_is_code_in_cc(env->enc, from, cc);
5536 #ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
5537  if ((is_in != 0 && !IS_NCCLASS_NOT(cc)) ||
5538  (is_in == 0 && IS_NCCLASS_NOT(cc))) {
5539  if (add_flag) {
5540  if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
5541  r = add_code_range0(&(cc->mbuf), env, *to, *to, 0);
5542  if (r < 0) return r;
5543  }
5544  else {
5545  BITSET_SET_BIT(bs, *to);
5546  }
5547  }
5548  }
5549 #else
5550  if (is_in != 0) {
5551  if (add_flag) {
5552  if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
5553  if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc);
5554  r = add_code_range0(&(cc->mbuf), env, *to, *to, 0);
5555  if (r < 0) return r;
5556  }
5557  else {
5558  if (IS_NCCLASS_NOT(cc)) {
5559  BITSET_CLEAR_BIT(bs, *to);
5560  }
5561  else {
5562  BITSET_SET_BIT(bs, *to);
5563  }
5564  }
5565  }
5566  }
5567 #endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */
5568  }
5569  else {
5570  int r, i, len;
5572  Node *snode = NULL_NODE;
5573 
5574  if (onig_is_code_in_cc(env->enc, from, cc)
5576  && !IS_NCCLASS_NOT(cc)
5577 #endif
5578  ) {
5579  for (i = 0; i < to_len; i++) {
5580  len = ONIGENC_CODE_TO_MBC(env->enc, to[i], buf);
5581  if (i == 0) {
5582  snode = onig_node_new_str(buf, buf + len);
5583  CHECK_NULL_RETURN_MEMERR(snode);
5584 
5585  /* char-class expanded multi-char only
5586  compare with string folded at match time. */
5587  NSTRING_SET_AMBIG(snode);
5588  }
5589  else {
5590  r = onig_node_str_cat(snode, buf, buf + len);
5591  if (r < 0) {
5592  onig_node_free(snode);
5593  return r;
5594  }
5595  }
5596  }
5597 
5598  *(iarg->ptail) = onig_node_new_alt(snode, NULL_NODE);
5599  CHECK_NULL_RETURN_MEMERR(*(iarg->ptail));
5600  iarg->ptail = &(NCDR((*(iarg->ptail))));
5601  }
5602  }
5603 
5604  return 0;
5605 }
5606 
5607 static int
5608 cclass_case_fold(Node** np, CClassNode* cc, CClassNode* asc_cc, ScanEnv* env)
5609 {
5610  int r;
5611  IApplyCaseFoldArg iarg;
5612 
5613  iarg.env = env;
5614  iarg.cc = cc;
5615  iarg.asc_cc = asc_cc;
5616  iarg.alt_root = NULL_NODE;
5617  iarg.ptail = &(iarg.alt_root);
5618 
5619  r = ONIGENC_APPLY_ALL_CASE_FOLD(env->enc, env->case_fold_flag,
5620  i_apply_case_fold, &iarg);
5621  if (r != 0) {
5622  onig_node_free(iarg.alt_root);
5623  return r;
5624  }
5625  if (IS_NOT_NULL(iarg.alt_root)) {
5626  Node* work = onig_node_new_alt(*np, iarg.alt_root);
5627  if (IS_NULL(work)) {
5628  onig_node_free(iarg.alt_root);
5629  return ONIGERR_MEMORY;
5630  }
5631  *np = work;
5632  }
5633  return r;
5634 }
5635 
5636 static int
5637 node_linebreak(Node** np, ScanEnv* env)
5638 {
5639  /* same as (?>\x0D\x0A|[\x0A-\x0D\x{85}\x{2028}\x{2029}]) */
5640  Node* left = NULL;
5641  Node* right = NULL;
5642  Node* target1 = NULL;
5643  Node* target2 = NULL;
5644  CClassNode* cc;
5645  int num1, num2, r;
5647 
5648  /* \x0D\x0A */
5649  num1 = ONIGENC_CODE_TO_MBC(env->enc, 0x0D, buf);
5650  if (num1 < 0) return num1;
5651  num2 = ONIGENC_CODE_TO_MBC(env->enc, 0x0A, buf + num1);
5652  if (num2 < 0) return num2;
5653  left = node_new_str_raw(buf, buf + num1 + num2);
5654  if (IS_NULL(left)) goto err;
5655 
5656  /* [\x0A-\x0D] or [\x0A-\x0D\x{85}\x{2028}\x{2029}] */
5657  right = node_new_cclass();
5658  if (IS_NULL(right)) goto err;
5659  cc = NCCLASS(right);
5660  if (ONIGENC_MBC_MINLEN(env->enc) > 1) {
5661  r = add_code_range(&(cc->mbuf), env, 0x0A, 0x0D);
5662  if (r != 0) goto err;
5663  }
5664  else {
5665  bitset_set_range(env, cc->bs, 0x0A, 0x0D);
5666  }
5667 
5668  /* TODO: move this block to enc/unicode.c */
5669  if (ONIGENC_IS_UNICODE(env->enc)) {
5670  /* UTF-8, UTF-16BE/LE, UTF-32BE/LE */
5671  r = add_code_range(&(cc->mbuf), env, 0x85, 0x85);
5672  if (r != 0) goto err;
5673  r = add_code_range(&(cc->mbuf), env, 0x2028, 0x2029);
5674  if (r != 0) goto err;
5675  }
5676 
5677  /* ...|... */
5678  target1 = onig_node_new_alt(right, NULL_NODE);
5679  if (IS_NULL(target1)) goto err;
5680  right = NULL;
5681  target2 = onig_node_new_alt(left, target1);
5682  if (IS_NULL(target2)) goto err;
5683  left = NULL;
5684  target1 = NULL;
5685 
5686  /* (?>...) */
5687  *np = node_new_enclose(ENCLOSE_STOP_BACKTRACK);
5688  if (IS_NULL(*np)) goto err;
5689  NENCLOSE(*np)->target = target2;
5690  return ONIG_NORMAL;
5691 
5692  err:
5693  onig_node_free(left);
5694  onig_node_free(right);
5695  onig_node_free(target1);
5696  onig_node_free(target2);
5697  return ONIGERR_MEMORY;
5698 }
5699 
5700 static int
5701 propname2ctype(ScanEnv* env, const char* propname)
5702 {
5703  UChar* name = (UChar* )propname;
5704  UChar* name_end = name + strlen(propname);
5705  int ctype = env->enc->property_name_to_ctype(ONIG_ENCODING_ASCII,
5706  name, name_end);
5707  if (ctype < 0) {
5708  onig_scan_env_set_error_string(env, ctype, name, name_end);
5709  }
5710  return ctype;
5711 }
5712 
5713 static int
5714 add_property_to_cc(CClassNode* cc, const char* propname, int not, ScanEnv* env)
5715 {
5716  int ctype = propname2ctype(env, propname);
5717  if (ctype < 0) return ctype;
5718  return add_ctype_to_cc(cc, ctype, not, 0, env);
5719 }
5720 
5721 /*
5722  * helper methods for node_extended_grapheme_cluster (/\X/)
5723  */
5724 static int
5725 create_property_node(Node **np, ScanEnv* env, const char* propname)
5726 {
5727  int r;
5728  CClassNode* cc;
5729 
5730  *np = node_new_cclass();
5731  if (IS_NULL(*np)) return ONIGERR_MEMORY;
5732  cc = NCCLASS(*np);
5733  r = add_property_to_cc(cc, propname, 0, env);
5734  if (r != 0)
5735  onig_node_free(*np);
5736  return r;
5737 }
5738 
5739 static int
5740 quantify_node(Node **np, int lower, int upper)
5741 {
5742  Node* tmp = node_new_quantifier(lower, upper, 0);
5743  if (IS_NULL(tmp)) return ONIGERR_MEMORY;
5744  NQTFR(tmp)->target = *np;
5745  *np = tmp;
5746  return 0;
5747 }
5748 
5749 static int
5750 quantify_property_node(Node **np, ScanEnv* env, const char* propname, char repetitions)
5751 {
5752  int r;
5753  int lower = 0;
5754  int upper = REPEAT_INFINITE;
5755 
5756  r = create_property_node(np, env, propname);
5757  if (r != 0) return r;
5758  switch (repetitions) {
5759  case '?': upper = 1; break;
5760  case '+': lower = 1; break;
5761  case '*': break;
5762  case '2': lower = upper = 2; break;
5763  default : return ONIGERR_PARSER_BUG;
5764  }
5765  return quantify_node(np, lower, upper);
5766 }
5767 
5768 #define LIST 0
5769 #define ALT 1
5770 
5771 /* IMPORTANT: Make sure node_array ends with NULL_NODE */
5772 static int
5773 create_node_from_array(int kind, Node **np, Node **node_array)
5774 {
5775  Node* tmp = NULL_NODE;
5776  int i = 0;
5777 
5778  while (node_array[i] != NULL_NODE) i++;
5779  while (--i >= 0) {
5780  *np = kind==LIST ? node_new_list(node_array[i], tmp)
5781  : onig_node_new_alt(node_array[i], tmp);
5782  if (IS_NULL(*np)) {
5783  while (i >= 0) {
5784  onig_node_free(node_array[i]);
5785  node_array[i--] = NULL_NODE;
5786  }
5787  onig_node_free(tmp);
5788  return ONIGERR_MEMORY;
5789  }
5790  else
5791  node_array[i] = NULL_NODE;
5792  tmp = *np;
5793  }
5794  return 0;
5795 }
5796 
5797 #define R_ERR(call) r=(call);if(r!=0)goto err
5798 
5799 /* Memory layout for common node array:
5800  * The main purpose is to be able to easily free all leftover nodes
5801  * after an error. As a side effect, we share some memory.
5802  *
5803  * The layout is as shown below (each line corresponds to one call of
5804  * create_node_from_array()). Because create_node_from_array sets all
5805  * nodes of the source to NULL_NODE, we can overlap the target array
5806  * as long as we do not override the actual target location.
5807  *
5808  * Target Array name Index
5809  *
5810  * node_array 0 1 2 3 4 5 6 7 8 9 A B C D E F
5811  * top_alts alts[5] 0 1 2 3 4*
5812  * alts+1 list[4] 0 1 2 3*
5813  * list+1 core_alts[7] 0 1 2 3 4 5 6*
5814  * core_alts+0 H_list[4] 0 1 2 3*
5815  * H_list+1 H_alt2[4] 0 1 2 3*
5816  * h_alt2+1 H_list2[3] 0 1 2*
5817  * core_alts+4 XP_list[4] 0 1 2 3*
5818  * XP_list+1 Ex_list[4] 0 1 2 3*
5819  */
5820 #define NODE_COMMON_SIZE 16
5821 
5822 static int
5823 node_extended_grapheme_cluster(Node** np, ScanEnv* env)
5824 {
5825  Node* tmp = NULL;
5826  Node* np1 = NULL;
5827  Node* top_alt = NULL;
5828  int r = 0;
5829  int num1;
5830  int i;
5831  int any_target_position;
5833  OnigOptionType option;
5834  /* node_common is function-global so that we can free all nodes
5835  * in case of error. Unused slots are set to NULL_NODE at all times. */
5836  Node *node_common[NODE_COMMON_SIZE];
5837  Node **alts = node_common+0; /* size: 5 */
5838 
5839  for (i=0; i<NODE_COMMON_SIZE; i++)
5840  node_common[i] = NULL_NODE;
5841 
5842  /* CRLF, common for both Unicode and non-Unicode */
5843  /* \x0D\x0A */
5844  r = ONIGENC_CODE_TO_MBC(env->enc, 0x0D, buf);
5845  if (r < 0) goto err;
5846  num1 = r;
5847  r = ONIGENC_CODE_TO_MBC(env->enc, 0x0A, buf + num1);
5848  if (r < 0) goto err;
5849  alts[0] = node_new_str_raw(buf, buf + num1 + r);
5850  if (IS_NULL(alts[0])) goto err;
5851 
5852 #ifdef USE_UNICODE_PROPERTIES
5853  if (ONIGENC_IS_UNICODE(env->enc)) { /* UTF-8, UTF-16BE/LE, UTF-32BE/LE */
5854  CClassNode* cc;
5855 
5856  if (propname2ctype(env, "Grapheme_Cluster_Break=Extend") < 0) goto err;
5857  /* Unicode 11.0.0
5858  * CRLF (already done)
5859  * | [Control CR LF]
5860  * | precore* core postcore*
5861  * | . (to catch invalid stuff, because this seems to be spec for String#grapheme_clusters) */
5862 
5863  /* [Control CR LF] (CR and LF are not in the spec, but this is a conformed fix) */
5864  alts[1] = node_new_cclass();
5865  if (IS_NULL(alts[1])) goto err;
5866  cc = NCCLASS(alts[1]);
5867  R_ERR(add_property_to_cc(cc, "Grapheme_Cluster_Break=Control", 0, env));
5868  if (ONIGENC_MBC_MINLEN(env->enc) > 1) { /* UTF-16/UTF-32 */
5869  R_ERR(add_code_range(&(cc->mbuf), env, 0x000A, 0x000A)); /* CR */
5870  R_ERR(add_code_range(&(cc->mbuf), env, 0x000D, 0x000D)); /* LF */
5871  }
5872  else {
5873  BITSET_SET_BIT(cc->bs, 0x0a);
5874  BITSET_SET_BIT(cc->bs, 0x0d);
5875  }
5876 
5877  /* precore* core postcore* */
5878  {
5879  Node **list = alts + 3; /* size: 4 */
5880 
5881  /* precore*; precore := Prepend */
5882  R_ERR(quantify_property_node(list+0, env, "Grapheme_Cluster_Break=Prepend", '*'));
5883 
5884  /* core := hangul-syllable
5885  * | ri-sequence
5886  * | xpicto-sequence
5887  * | [^Control CR LF] */
5888  {
5889  Node **core_alts = list + 2; /* size: 7 */
5890 
5891  /* hangul-syllable :=
5892  * L* (V+ | LV V* | LVT) T*
5893  * | L+
5894  * | T+ */
5895  /* hangul-syllable is an alternative (would be called H_alt)
5896  * inside an alternative, but we flatten it into core_alts */
5897 
5898  /* L* (V+ | LV V* | LVT) T* */
5899  {
5900  Node **H_list = core_alts + 1; /* size: 4 */
5901  R_ERR(quantify_property_node(H_list+0, env, "Grapheme_Cluster_Break=L", '*'));
5902 
5903  /* V+ | LV V* | LVT */
5904  {
5905  Node **H_alt2 = H_list + 2; /* size: 4 */
5906  R_ERR(quantify_property_node(H_alt2+0, env, "Grapheme_Cluster_Break=V", '+'));
5907 
5908  /* LV V* */
5909  {
5910  Node **H_list2 = H_alt2 + 2; /* size: 3 */
5911 
5912  R_ERR(create_property_node(H_list2+0, env, "Grapheme_Cluster_Break=LV"));
5913  R_ERR(quantify_property_node(H_list2+1, env, "Grapheme_Cluster_Break=V", '*'));
5914  R_ERR(create_node_from_array(LIST, H_alt2+1, H_list2));
5915  }
5916 
5917  R_ERR(create_property_node(H_alt2+2, env, "Grapheme_Cluster_Break=LVT"));
5918  R_ERR(create_node_from_array(ALT, H_list+1, H_alt2));
5919  }
5920 
5921  R_ERR(quantify_property_node(H_list+2, env, "Grapheme_Cluster_Break=T", '*'));
5922  R_ERR(create_node_from_array(LIST, core_alts+0, H_list));
5923  }
5924 
5925  R_ERR(quantify_property_node(core_alts+1, env, "Grapheme_Cluster_Break=L", '+'));
5926  R_ERR(quantify_property_node(core_alts+2, env, "Grapheme_Cluster_Break=T", '+'));
5927  /* end of hangul-syllable */
5928 
5929  /* ri-sequence := RI RI */
5930  R_ERR(quantify_property_node(core_alts+3, env, "Regional_Indicator", '2'));
5931 
5932  /* xpicto-sequence := \p{Extended_Pictographic} (Extend* ZWJ \p{Extended_Pictographic})* */
5933  {
5934  Node **XP_list = core_alts + 5; /* size: 3 */
5935  R_ERR(create_property_node(XP_list+0, env, "Extended_Pictographic"));
5936 
5937  /* (Extend* ZWJ \p{Extended_Pictographic})* */
5938  {
5939  Node **Ex_list = XP_list + 2; /* size: 4 */
5940  /* assert(Ex_list+4 == node_common+NODE_COMMON_SIZE); */
5941  R_ERR(quantify_property_node(Ex_list+0, env, "Grapheme_Cluster_Break=Extend", '*'));
5942 
5943  /* ZWJ (ZERO WIDTH JOINER) */
5944  r = ONIGENC_CODE_TO_MBC(env->enc, 0x200D, buf);
5945  if (r < 0) goto err;
5946  Ex_list[1] = node_new_str_raw(buf, buf + r);
5947  if (IS_NULL(Ex_list[1])) goto err;
5948 
5949  R_ERR(create_property_node(Ex_list+2, env, "Extended_Pictographic"));
5950  R_ERR(create_node_from_array(LIST, XP_list+1, Ex_list));
5951  }
5952  R_ERR(quantify_node(XP_list+1, 0, REPEAT_INFINITE)); /* TODO: Check about node freeing */
5953 
5954  R_ERR(create_node_from_array(LIST, core_alts+4, XP_list));
5955  }
5956 
5957  /* [^Control CR LF] */
5958  core_alts[5] = node_new_cclass();
5959  if (IS_NULL(core_alts[5])) goto err;
5960  cc = NCCLASS(core_alts[5]);
5961  if (ONIGENC_MBC_MINLEN(env->enc) > 1) { /* UTF-16/UTF-32 */
5962  BBuf *inverted_buf = NULL;
5963 
5964  /* TODO: fix false warning */
5965  const int dup_not_warned = env->warnings_flag | ~ONIG_SYN_WARN_CC_DUP;
5966  env->warnings_flag |= ONIG_SYN_WARN_CC_DUP;
5967 
5968  /* Start with a positive buffer and invert at the end.
5969  * Otherwise, adding single-character ranges work the wrong way. */
5970  R_ERR(add_property_to_cc(cc, "Grapheme_Cluster_Break=Control", 0, env));
5971  R_ERR(add_code_range(&(cc->mbuf), env, 0x000A, 0x000A)); /* CR */
5972  R_ERR(add_code_range(&(cc->mbuf), env, 0x000D, 0x000D)); /* LF */
5973  R_ERR(not_code_range_buf(env->enc, cc->mbuf, &inverted_buf, env));
5974  cc->mbuf = inverted_buf; /* TODO: check what to do with buffer before inversion */
5975 
5976  env->warnings_flag &= dup_not_warned; /* TODO: fix false warning */
5977  }
5978  else {
5979  R_ERR(add_property_to_cc(cc, "Grapheme_Cluster_Break=Control", 1, env));
5980  BITSET_CLEAR_BIT(cc->bs, 0x0a);
5981  BITSET_CLEAR_BIT(cc->bs, 0x0d);
5982  }
5983 
5984  R_ERR(create_node_from_array(ALT, list+1, core_alts));
5985  }
5986 
5987  /* postcore*; postcore = [Extend ZWJ SpacingMark] */
5988  R_ERR(create_property_node(list+2, env, "Grapheme_Cluster_Break=Extend"));
5989  cc = NCCLASS(list[2]);
5990  R_ERR(add_property_to_cc(cc, "Grapheme_Cluster_Break=SpacingMark", 0, env));
5991  R_ERR(add_code_range(&(cc->mbuf), env, 0x200D, 0x200D));
5992  R_ERR(quantify_node(list+2, 0, REPEAT_INFINITE));
5993 
5994  R_ERR(create_node_from_array(LIST, alts+2, list));
5995  }
5996 
5997  any_target_position = 3;
5998  }
5999  else
6000 #endif /* USE_UNICODE_PROPERTIES */
6001  {
6002  any_target_position = 1;
6003  }
6004 
6005  /* PerlSyntax: (?s:.), RubySyntax: (?m:.), common for both Unicode and non-Unicode */
6006  /* Not in Unicode spec (UAX #29), but added to catch invalid stuff,
6007  * because this is Ruby spec for String#grapheme_clusters. */
6008  np1 = node_new_anychar();
6009  if (IS_NULL(np1)) goto err;
6010 
6011  option = env->option;
6012  ONOFF(option, ONIG_OPTION_MULTILINE, 0);
6013  tmp = node_new_option(option);
6014  if (IS_NULL(tmp)) goto err;
6015  NENCLOSE(tmp)->target = np1;
6016  alts[any_target_position] = tmp;
6017  np1 = NULL;
6018 
6019  R_ERR(create_node_from_array(ALT, &top_alt, alts));
6020 
6021  /* (?>): For efficiency, because there is no text piece
6022  * that is not in a grapheme cluster, and there is only one way
6023  * to split a string into grapheme clusters. */
6024  tmp = node_new_enclose(ENCLOSE_STOP_BACKTRACK);
6025  if (IS_NULL(tmp)) goto err;
6026  NENCLOSE(tmp)->target = top_alt;
6027  np1 = tmp;
6028 
6029 #ifdef USE_UNICODE_PROPERTIES
6030  if (ONIGENC_IS_UNICODE(env->enc)) {
6031  /* Don't ignore case. */
6032  option = env->option;
6033  ONOFF(option, ONIG_OPTION_IGNORECASE, 1);
6034  *np = node_new_option(option);
6035  if (IS_NULL(*np)) goto err;
6036  NENCLOSE(*np)->target = np1;
6037  }
6038  else
6039 #endif
6040  {
6041  *np = np1;
6042  }
6043  return ONIG_NORMAL;
6044 
6045  err:
6046  onig_node_free(np1);
6047  for (i=0; i<NODE_COMMON_SIZE; i++)
6048  onig_node_free(node_common[i]);
6049  return (r == 0) ? ONIGERR_MEMORY : r;
6050 }
6051 #undef R_ERR
6052 
6053 static int
6054 countbits(unsigned int bits)
6055 {
6056  bits = (bits & 0x55555555) + ((bits >> 1) & 0x55555555);
6057  bits = (bits & 0x33333333) + ((bits >> 2) & 0x33333333);
6058  bits = (bits & 0x0f0f0f0f) + ((bits >> 4) & 0x0f0f0f0f);
6059  bits = (bits & 0x00ff00ff) + ((bits >> 8) & 0x00ff00ff);
6060  return (bits & 0x0000ffff) + ((bits >>16) & 0x0000ffff);
6061 }
6062 
6063 static int
6064 is_onechar_cclass(CClassNode* cc, OnigCodePoint* code)
6065 {
6066  const OnigCodePoint not_found = ONIG_LAST_CODE_POINT;
6067  OnigCodePoint c = not_found;
6068  int i;
6069  BBuf *bbuf = cc->mbuf;
6070 
6071  if (IS_NCCLASS_NOT(cc)) return 0;
6072 
6073  /* check bbuf */
6074  if (IS_NOT_NULL(bbuf)) {
6075  OnigCodePoint n, *data;
6076  GET_CODE_POINT(n, bbuf->p);
6077  data = (OnigCodePoint* )(bbuf->p) + 1;
6078  if ((n == 1) && (data[0] == data[1])) {
6079  /* only one char found in the bbuf, save the code point. */
6080  c = data[0];
6081  if (((c < SINGLE_BYTE_SIZE) && BITSET_AT(cc->bs, c))) {
6082  /* skip if c is included in the bitset */
6083  c = not_found;
6084  }
6085  }
6086  else {
6087  return 0; /* the bbuf contains multiple chars */
6088  }
6089  }
6090 
6091  /* check bitset */
6092  for (i = 0; i < BITSET_SIZE; i++) {
6093  Bits b1 = cc->bs[i];
6094  if (b1 != 0) {
6095  if (((b1 & (b1 - 1)) == 0) && (c == not_found)) {
6096  c = BITS_IN_ROOM * i + countbits(b1 - 1);
6097  } else {
6098  return 0; /* the character class contains multiple chars */
6099  }
6100  }
6101  }
6102 
6103  if (c != not_found) {
6104  *code = c;
6105  return 1;
6106  }
6107 
6108  /* the character class contains no char. */
6109  return 0;
6110 }
6111 
6112 
6113 static int
6114 parse_exp(Node** np, OnigToken* tok, int term,
6115  UChar** src, UChar* end, ScanEnv* env)
6116 {
6117  int r, len, group = 0;
6118  Node* qn;
6119  Node** targetp;
6120 
6121  *np = NULL;
6122  if (tok->type == (enum TokenSyms )term)
6123  goto end_of_token;
6124 
6125  switch (tok->type) {
6126  case TK_ALT:
6127  case TK_EOT:
6128  end_of_token:
6129  *np = node_new_empty();
6130  return tok->type;
6131  break;
6132 
6133  case TK_SUBEXP_OPEN:
6134  r = parse_enclose(np, tok, TK_SUBEXP_CLOSE, src, end, env);
6135  if (r < 0) return r;
6136  if (r == 1) group = 1;
6137  else if (r == 2) { /* option only */
6138  Node* target;
6139  OnigOptionType prev = env->option;
6140 
6141  env->option = NENCLOSE(*np)->option;
6142  r = fetch_token(tok, src, end, env);
6143  if (r < 0) {
6144  env->option = prev;
6145  return r;
6146  }
6147  r = parse_subexp(&target, tok, term, src, end, env);
6148  env->option = prev;
6149  if (r < 0) {
6150  onig_node_free(target);
6151  return r;
6152  }
6153  NENCLOSE(*np)->target = target;
6154  return tok->type;
6155  }
6156  break;
6157 
6158  case TK_SUBEXP_CLOSE:
6161 
6162  if (tok->escaped) goto tk_raw_byte;
6163  else goto tk_byte;
6164  break;
6165 
6166  case TK_LINEBREAK:
6167  r = node_linebreak(np, env);
6168  if (r < 0) return r;
6169  break;
6170 
6172  r = node_extended_grapheme_cluster(np, env);
6173  if (r < 0) return r;
6174  break;
6175 
6176  case TK_KEEP:
6179  break;
6180 
6181  case TK_STRING:
6182  tk_byte:
6183  {
6184  *np = node_new_str(tok->backp, *src);
6186 
6187  string_loop:
6188  while (1) {
6189  r = fetch_token(tok, src, end, env);
6190  if (r < 0) return r;
6191  if (r == TK_STRING) {
6192  r = onig_node_str_cat(*np, tok->backp, *src);
6193  }
6194 #ifndef NUMBERED_CHAR_IS_NOT_CASE_AMBIG
6195  else if (r == TK_CODE_POINT) {
6196  r = node_str_cat_codepoint(*np, env->enc, tok->u.code);
6197  }
6198 #endif
6199  else {
6200  break;
6201  }
6202  if (r < 0) return r;
6203  }
6204 
6205  string_end:
6206  targetp = np;
6207  goto repeat;
6208  }
6209  break;
6210 
6211  case TK_RAW_BYTE:
6212  tk_raw_byte:
6213  {
6214  *np = node_new_str_raw_char((UChar )tok->u.c);
6216  len = 1;
6217  while (1) {
6218  if (len >= ONIGENC_MBC_MINLEN(env->enc)) {
6219  if (len == enclen(env->enc, NSTR(*np)->s, NSTR(*np)->end)) {
6220  r = fetch_token(tok, src, end, env);
6221  NSTRING_CLEAR_RAW(*np);
6222  goto string_end;
6223  }
6224  }
6225 
6226  r = fetch_token(tok, src, end, env);
6227  if (r < 0) return r;
6228  if (r != TK_RAW_BYTE) {
6229  /* Don't use this, it is wrong for little endian encodings. */
6230 #ifdef USE_PAD_TO_SHORT_BYTE_CHAR
6231  int rem;
6232  if (len < ONIGENC_MBC_MINLEN(env->enc)) {
6233  rem = ONIGENC_MBC_MINLEN(env->enc) - len;
6234  (void )node_str_head_pad(NSTR(*np), rem, (UChar )0);
6235  if (len + rem == enclen(env->enc, NSTR(*np)->s)) {
6236  NSTRING_CLEAR_RAW(*np);
6237  goto string_end;
6238  }
6239  }
6240 #endif
6242  }
6243 
6244  r = node_str_cat_char(*np, (UChar )tok->u.c);
6245  if (r < 0) return r;
6246 
6247  len++;
6248  }
6249  }
6250  break;
6251 
6252  case TK_CODE_POINT:
6253  {
6254  *np = node_new_empty();
6256  r = node_str_cat_codepoint(*np, env->enc, tok->u.code);
6257  if (r != 0) return r;
6258 #ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG
6259  NSTRING_SET_RAW(*np);
6260 #else
6261  goto string_loop;
6262 #endif
6263  }
6264  break;
6265 
6266  case TK_QUOTE_OPEN:
6267  {
6268  OnigCodePoint end_op[2];
6269  UChar *qstart, *qend, *nextp;
6270 
6271  end_op[0] = (OnigCodePoint )MC_ESC(env->syntax);
6272  end_op[1] = (OnigCodePoint )'E';
6273  qstart = *src;
6274  qend = find_str_position(end_op, 2, qstart, end, &nextp, env->enc);
6275  if (IS_NULL(qend)) {
6276  nextp = qend = end;
6277  }
6278  *np = node_new_str(qstart, qend);
6280  *src = nextp;
6281  }
6282  break;
6283 
6284  case TK_CHAR_TYPE:
6285  {
6286  switch (tok->u.prop.ctype) {
6287  case ONIGENC_CTYPE_WORD:
6288  *np = node_new_ctype(tok->u.prop.ctype, tok->u.prop.not,
6289  IS_ASCII_RANGE(env->option));
6291  break;
6292 
6293  case ONIGENC_CTYPE_SPACE:
6294  case ONIGENC_CTYPE_DIGIT:
6295  case ONIGENC_CTYPE_XDIGIT:
6296  {
6297  CClassNode* cc;
6298 
6299  *np = node_new_cclass();
6301  cc = NCCLASS(*np);
6302  r = add_ctype_to_cc(cc, tok->u.prop.ctype, 0,
6303  IS_ASCII_RANGE(env->option), env);
6304  if (r != 0) return r;
6305  if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
6306  }
6307  break;
6308 
6309  default:
6310  return ONIGERR_PARSER_BUG;
6311  break;
6312  }
6313  }
6314  break;
6315 
6316  case TK_CHAR_PROPERTY:
6317  r = parse_char_property(np, tok, src, end, env);
6318  if (r != 0) return r;
6319  break;
6320 
6321  case TK_CC_OPEN:
6322  {
6323  Node *asc_node;
6324  CClassNode* cc;
6325  OnigCodePoint code;
6326 
6327  r = parse_char_class(np, &asc_node, tok, src, end, env);
6328  if (r != 0) {
6329  onig_node_free(asc_node);
6330  return r;
6331  }
6332 
6333  cc = NCCLASS(*np);
6334  if (is_onechar_cclass(cc, &code)) {
6335  onig_node_free(*np);
6336  onig_node_free(asc_node);
6337  *np = node_new_empty();
6339  r = node_str_cat_codepoint(*np, env->enc, code);
6340  if (r != 0) return r;
6341  goto string_loop;
6342  }
6343  if (IS_IGNORECASE(env->option)) {
6344  r = cclass_case_fold(np, cc, NCCLASS(asc_node), env);
6345  if (r != 0) {
6346  onig_node_free(asc_node);
6347  return r;
6348  }
6349  }
6350  onig_node_free(asc_node);
6351  }
6352  break;
6353 
6354  case TK_ANYCHAR:
6355  *np = node_new_anychar();
6357  break;
6358 
6359  case TK_ANYCHAR_ANYTIME:
6360  *np = node_new_anychar();
6362  qn = node_new_quantifier(0, REPEAT_INFINITE, 0);
6364  NQTFR(qn)->target = *np;
6365  *np = qn;
6366  break;
6367 
6368  case TK_BACKREF:
6369  len = tok->u.backref.num;
6370  *np = node_new_backref(len,
6371  (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)),
6372  tok->u.backref.by_name,
6374  tok->u.backref.exist_level,
6375  tok->u.backref.level,
6376 #endif
6377  env);
6379  break;
6380 
6381 #ifdef USE_SUBEXP_CALL
6382  case TK_CALL:
6383  {
6384  int gnum = tok->u.call.gnum;
6385 
6386  if (gnum < 0 || tok->u.call.rel != 0) {
6387  if (gnum > 0) gnum--;
6388  gnum = BACKREF_REL_TO_ABS(gnum, env);
6389  if (gnum <= 0)
6390  return ONIGERR_INVALID_BACKREF;
6391  }
6392  *np = node_new_call(tok->u.call.name, tok->u.call.name_end, gnum);
6394  env->num_call++;
6395  }
6396  break;
6397 #endif
6398 
6399  case TK_ANCHOR:
6400  *np = onig_node_new_anchor(tok->u.anchor.subtype);
6402  NANCHOR(*np)->ascii_range = tok->u.anchor.ascii_range;
6403  break;
6404 
6405  case TK_OP_REPEAT:
6406  case TK_INTERVAL:
6410  else
6411  *np = node_new_empty();
6412  }
6413  else {
6414  goto tk_byte;
6415  }
6416  break;
6417 
6418  default:
6419  return ONIGERR_PARSER_BUG;
6420  break;
6421  }
6422 
6423  {
6424  targetp = np;
6425 
6426  re_entry:
6427  r = fetch_token(tok, src, end, env);
6428  if (r < 0) return r;
6429 
6430  repeat:
6431  if (r == TK_OP_REPEAT || r == TK_INTERVAL) {
6432  if (is_invalid_quantifier_target(*targetp))
6434 
6435  qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper,
6436  (r == TK_INTERVAL ? 1 : 0));
6438  NQTFR(qn)->greedy = tok->u.repeat.greedy;
6439  r = set_quantifier(qn, *targetp, group, env);
6440  if (r < 0) {
6441  onig_node_free(qn);
6442  return r;
6443  }
6444 
6445  if (tok->u.repeat.possessive != 0) {
6446  Node* en;
6447  en = node_new_enclose(ENCLOSE_STOP_BACKTRACK);
6448  if (IS_NULL(en)) {
6449  onig_node_free(qn);
6450  return ONIGERR_MEMORY;
6451  }
6452  NENCLOSE(en)->target = qn;
6453  qn = en;
6454  }
6455 
6456  if (r == 0) {
6457  *targetp = qn;
6458  }
6459  else if (r == 1) {
6460  onig_node_free(qn);
6461  }
6462  else if (r == 2) { /* split case: /abc+/ */
6463  Node *tmp;
6464 
6465  *targetp = node_new_list(*targetp, NULL);
6466  if (IS_NULL(*targetp)) {
6467  onig_node_free(qn);
6468  return ONIGERR_MEMORY;
6469  }
6470  tmp = NCDR(*targetp) = node_new_list(qn, NULL);
6471  if (IS_NULL(tmp)) {
6472  onig_node_free(qn);
6473  return ONIGERR_MEMORY;
6474  }
6475  targetp = &(NCAR(tmp));
6476  }
6477  goto re_entry;
6478  }
6479  }
6480 
6481  return r;
6482 }
6483 
6484 static int
6485 parse_branch(Node** top, OnigToken* tok, int term,
6486  UChar** src, UChar* end, ScanEnv* env)
6487 {
6488  int r;
6489  Node *node, **headp;
6490 
6491  *top = NULL;
6492  r = parse_exp(&node, tok, term, src, end, env);
6493  if (r < 0) {
6494  onig_node_free(node);
6495  return r;
6496  }
6497 
6498  if (r == TK_EOT || r == term || r == TK_ALT) {
6499  *top = node;
6500  }
6501  else {
6502  *top = node_new_list(node, NULL);
6503  headp = &(NCDR(*top));
6504  while (r != TK_EOT && r != term && r != TK_ALT) {
6505  r = parse_exp(&node, tok, term, src, end, env);
6506  if (r < 0) {
6507  onig_node_free(node);
6508  return r;
6509  }
6510 
6511  if (NTYPE(node) == NT_LIST) {
6512  *headp = node;
6513  while (IS_NOT_NULL(NCDR(node))) node = NCDR(node);
6514  headp = &(NCDR(node));
6515  }
6516  else {
6517  *headp = node_new_list(node, NULL);
6518  headp = &(NCDR(*headp));
6519  }
6520  }
6521  }
6522 
6523  return r;
6524 }
6525 
6526 /* term_tok: TK_EOT or TK_SUBEXP_CLOSE */
6527 static int
6528 parse_subexp(Node** top, OnigToken* tok, int term,
6529  UChar** src, UChar* end, ScanEnv* env)
6530 {
6531  int r;
6532  Node *node, **headp;
6533 
6534  *top = NULL;
6535  env->parse_depth++;
6536  if (env->parse_depth > ParseDepthLimit)
6538  r = parse_branch(&node, tok, term, src, end, env);
6539  if (r < 0) {
6540  onig_node_free(node);
6541  return r;
6542  }
6543 
6544  if (r == term) {
6545  *top = node;
6546  }
6547  else if (r == TK_ALT) {
6548  *top = onig_node_new_alt(node, NULL);
6549  headp = &(NCDR(*top));
6550  while (r == TK_ALT) {
6551  r = fetch_token(tok, src, end, env);
6552  if (r < 0) return r;
6553  r = parse_branch(&node, tok, term, src, end, env);
6554  if (r < 0) {
6555  onig_node_free(node);
6556  return r;
6557  }
6558 
6559  *headp = onig_node_new_alt(node, NULL);
6560  headp = &(NCDR(*headp));
6561  }
6562 
6563  if (tok->type != (enum TokenSyms )term)
6564  goto err;
6565  }
6566  else {
6567  onig_node_free(node);
6568  err:
6569  if (term == TK_SUBEXP_CLOSE)
6571  else
6572  return ONIGERR_PARSER_BUG;
6573  }
6574 
6575  env->parse_depth--;
6576  return r;
6577 }
6578 
6579 static int
6580 parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env)
6581 {
6582  int r;
6583  OnigToken tok;
6584 
6585  r = fetch_token(&tok, src, end, env);
6586  if (r < 0) return r;
6587  r = parse_subexp(top, &tok, TK_EOT, src, end, env);
6588  if (r < 0) return r;
6589 
6590 #ifdef USE_SUBEXP_CALL
6591  if (env->num_call > 0) {
6592  /* Capture the pattern itself. It is used for (?R), (?0) and \g<0>. */
6593  const int num = 0;
6594  Node* np;
6595  np = node_new_enclose_memory(env->option, 0);
6597  NENCLOSE(np)->regnum = num;
6598  NENCLOSE(np)->target = *top;
6599  r = scan_env_set_mem_node(env, num, np);
6600  if (r != 0) {
6601  onig_node_free(np);
6602  return r;
6603  }
6604  *top = np;
6605  }
6606 #endif
6607  return 0;
6608 }
6609 
6610 extern int
6611 onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end,
6612  regex_t* reg, ScanEnv* env)
6613 {
6614  int r;
6615  UChar* p;
6616 
6617 #ifdef USE_NAMED_GROUP
6618  names_clear(reg);
6619 #endif
6620 
6621  scan_env_clear(env);
6622  env->option = reg->options;
6623  env->case_fold_flag = reg->case_fold_flag;
6624  env->enc = reg->enc;
6625  env->syntax = reg->syntax;
6626  env->pattern = (UChar* )pattern;
6627  env->pattern_end = (UChar* )end;
6628  env->reg = reg;
6629 
6630  *root = NULL;
6631  p = (UChar* )pattern;
6632  r = parse_regexp(root, &p, (UChar* )end, env);
6633  reg->num_mem = env->num_mem;
6634  return r;
6635 }
6636 
6637 extern void
6639  UChar* arg, UChar* arg_end)
6640 {
6641  env->error = arg;
6642  env->error_end = arg_end;
6643 }
OnigDefaultSyntax
const OnigSyntaxType * OnigDefaultSyntax
Definition: regparse.c:85
ONIGENC_CODE_RANGE_TO
#define ONIGENC_CODE_RANGE_TO(range, i)
Definition: onigmo.h:141
ONIGENC_CTYPE_XDIGIT
#define ONIGENC_CTYPE_XDIGIT
Definition: onigmo.h:305
ANCHOR_BEGIN_LINE
#define ANCHOR_BEGIN_LINE
Definition: regint.h:528
ONIGERR_TOO_LONG_WIDE_CHAR_VALUE
#define ONIGERR_TOO_LONG_WIDE_CHAR_VALUE
Definition: onigmo.h:678
CHECK_NULL_RETURN_MEMERR
#define CHECK_NULL_RETURN_MEMERR(p)
Definition: regint.h:301
_Node
Definition: regparse.h:265
NSTRING_SET_RAW
#define NSTRING_SET_RAW(node)
Definition: regparse.h:109
ONIG_SYN_OP2_OPTION_PERL
#define ONIG_SYN_OP2_OPTION_PERL
Definition: onigmo.h:554
IS_SYNTAX_OP2
#define IS_SYNTAX_OP2(syn, opm)
Definition: regparse.h:331
ONIGENC_GET_CTYPE_CODE_RANGE
#define ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, sbout, ranges)
Definition: onigmo.h:403
BBUF_MOVE_LEFT_REDUCE
#define BBUF_MOVE_LEFT_REDUCE(buf, from, to)
Definition: regint.h:509
OnigToken::ref1
int ref1
Definition: regparse.c:2306
_BBuf::p
UChar * p
Definition: regint.h:442
ONIGERR_INVALID_GROUP_NAME
#define ONIGERR_INVALID_GROUP_NAME
Definition: onigmo.h:680
st_str_end_key
Definition: regparse.c:361
ReduceType
ReduceType
Definition: regparse.c:2183
ONIGENC_CTYPE_ALPHA
#define ONIGENC_CTYPE_ALPHA
Definition: onigmo.h:295
void
void
Definition: rb_mjit_min_header-2.7.0.h:13273
MC_ONE_OR_MORE_TIME
#define MC_ONE_OR_MORE_TIME(syn)
Definition: regint.h:752
ONIG_SYN_OP_ESC_LPAREN_SUBEXP
#define ONIG_SYN_OP_ESC_LPAREN_SUBEXP
Definition: onigmo.h:532
TK_RAW_BYTE
@ TK_RAW_BYTE
Definition: regparse.c:2256
ONIG_SYN_WARN_CC_DUP
#define ONIG_SYN_WARN_CC_DUP
Definition: onigmo.h:609
NT_LIST
#define NT_LIST
Definition: regparse.h:46
NODE_COMMON_SIZE
#define NODE_COMMON_SIZE
Definition: regparse.c:5820
QtfrNode
Definition: regparse.h:179
TK_LINEBREAK
@ TK_LINEBREAK
Definition: regparse.c:2274
ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC
#define ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC
Definition: onigmo.h:602
ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP
#define ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP
Definition: onigmo.h:559
StrNode::s
UChar * s
Definition: regparse.h:172
ONIGENC_CODE_RANGE_FROM
#define ONIGENC_CODE_RANGE_FROM(range, i)
Definition: onigmo.h:140
ONIG_SYN_OP2_QMARK_TILDE_ABSENT
#define ONIG_SYN_OP2_QMARK_TILDE_ABSENT
Definition: onigmo.h:583
TK_INTERVAL
@ TK_INTERVAL
Definition: regparse.c:2266
OnigToken
Definition: regparse.c:2285
NT_ENCLOSE
#define NT_ENCLOSE
Definition: regparse.h:44
onig_name_to_backref_number
int onig_name_to_backref_number(regex_t *reg, const UChar *name, const UChar *name_end, const OnigRegion *region)
Definition: regparse.c:909
onig_reduce_nested_quantifier
void onig_reduce_nested_quantifier(Node *pnode, Node *cnode)
Definition: regparse.c:2204
st_table::num_entries
st_index_t num_entries
Definition: st.h:86
env
#define env
NCAR
#define NCAR(node)
Definition: regparse.h:86
BITSET_SET_BIT
#define BITSET_SET_BIT(bs, pos)
Definition: regint.h:436
ANCHOR_WORD_BEGIN
#define ANCHOR_WORD_BEGIN
Definition: regint.h:536
CClassNode
Definition: regint.h:804
ONIG_SYN_STRICT_CHECK_BACKREF
#define ONIG_SYN_STRICT_CHECK_BACKREF
Definition: onigmo.h:593
OnigToken::s
UChar * s
Definition: regparse.c:2291
MC_ESC
#define MC_ESC(syn)
Definition: regint.h:748
ONIGENC_CTYPE_LOWER
#define ONIGENC_CTYPE_LOWER
Definition: onigmo.h:300
ONIG_SYN_OP2_CCLASS_SET_OP
#define ONIG_SYN_OP2_CCLASS_SET_OP
Definition: onigmo.h:558
ONIG_OPTION_IGNORECASE
#define ONIG_OPTION_IGNORECASE
Definition: onigmo.h:451
IS_NCCLASS_NOT
#define IS_NCCLASS_NOT(nd)
Definition: regint.h:796
rb_warn
void rb_warn(const char *fmt,...)
Definition: error.c:313
TK_ANYCHAR_ANYTIME
@ TK_ANYCHAR_ANYTIME
Definition: regparse.c:2267
ONIGENC_IS_ASCII_CODE
#define ONIGENC_IS_ASCII_CODE(code)
Definition: regenc.h:216
NT_QTFR
#define NT_QTFR
Definition: regparse.h:43
re_pattern_buffer::case_fold_flag
OnigCaseFoldType case_fold_flag
Definition: onigmo.h:779
onig_renumber_name_table
int onig_renumber_name_table(regex_t *reg, GroupNumRemap *map)
Definition: regparse.c:611
ONIGERR_UNDEFINED_NAME_REFERENCE
#define ONIGERR_UNDEFINED_NAME_REFERENCE
Definition: onigmo.h:682
ANCHOR_LOOK_BEHIND
#define ANCHOR_LOOK_BEHIND
Definition: regint.h:540
OnigSyntaxRuby
const OnigSyntaxType OnigSyntaxRuby
Definition: regparse.c:39
ONIG_SYN_OP_ESC_BRACE_INTERVAL
#define ONIG_SYN_OP_ESC_BRACE_INTERVAL
Definition: onigmo.h:528
ST_STOP
@ ST_STOP
Definition: st.h:99
SET_NTYPE
#define SET_NTYPE(node, ntype)
Definition: regparse.h:70
ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP
#define ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP
Definition: onigmo.h:590
ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS
#define ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS
Definition: onigmo.h:653
CClassNode::bs
BitSet bs
Definition: regint.h:807
onigenc_strlen
ONIG_EXTERN int onigenc_strlen(OnigEncoding enc, const OnigUChar *p, const OnigUChar *end)
OnigToken::ctype
int ctype
Definition: regparse.c:2321
RQ_AQ
@ RQ_AQ
Definition: regparse.c:2187
NT_CCLASS
#define NT_CCLASS
Definition: regparse.h:39
ONIGERR_TOO_MANY_MULTI_BYTE_RANGES
#define ONIGERR_TOO_MANY_MULTI_BYTE_RANGES
Definition: onigmo.h:671
PINC_S
#define PINC_S
Definition: regparse.c:311
ONIGENC_MBC_MINLEN
#define ONIGENC_MBC_MINLEN(enc)
Definition: onigmo.h:364
ONIGERR_END_PATTERN_AT_META
#define ONIGERR_END_PATTERN_AT_META
Definition: onigmo.h:647
ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV
#define ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV
Definition: onigmo.h:592
onig_node_conv_to_str_node
void onig_node_conv_to_str_node(Node *node, int raw)
NT_CANY
#define NT_CANY
Definition: regparse.h:41
ONIG_MAX_CAPTURE_GROUP_NUM
#define ONIG_MAX_CAPTURE_GROUP_NUM
Definition: onigmo.h:438
re_pattern_buffer::enc
OnigEncoding enc
Definition: onigmo.h:776
OnigToken::base
int base
Definition: regparse.c:2288
onig_node_str_clear
void onig_node_str_clear(Node *node)
Definition: regparse.c:1449
IS_POSIX_BRACKET_ALL_RANGE
#define IS_POSIX_BRACKET_ALL_RANGE(option)
Definition: regint.h:394
IS_SYNTAX_BV
#define IS_SYNTAX_BV(syn, bvm)
Definition: regparse.h:332
ONIGENC_CODE_TO_MBC_MAXLEN
#define ONIGENC_CODE_TO_MBC_MAXLEN
Definition: onigmo.h:289
MC_ANYCHAR_ANYTIME
#define MC_ANYCHAR_ANYTIME(syn)
Definition: regint.h:753
ONIGENC_CTYPE_PUNCT
#define ONIGENC_CTYPE_PUNCT
Definition: onigmo.h:302
ONIG_SYN_OP_ESC_X_HEX2
#define ONIG_SYN_OP_ESC_X_HEX2
Definition: onigmo.h:548
R_ERR
#define R_ERR(call)
Definition: regparse.c:5797
PFETCH
#define PFETCH(c)
Definition: regparse.c:305
ruby_verbose
#define ruby_verbose
Definition: ruby.h:1925
ONIGENC_IS_CODE_CTYPE
#define ONIGENC_IS_CODE_CTYPE(enc, code, ctype)
Definition: onigmo.h:372
MC_ANYCHAR
#define MC_ANYCHAR(syn)
Definition: regint.h:749
CCV_SB
@ CCV_SB
Definition: regparse.c:4409
ONIGENC_APPLY_ALL_CASE_FOLD
#define ONIGENC_APPLY_ALL_CASE_FOLD(enc, case_fold_flag, f, arg)
Definition: onigmo.h:338
OnigToken::subtype
int subtype
Definition: regparse.c:2295
ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE
#define ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE
Definition: onigmo.h:651
ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY
#define ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY
Definition: onigmo.h:568
ONIGERR_INVALID_POSIX_BRACKET_TYPE
#define ONIGERR_INVALID_POSIX_BRACKET_TYPE
Definition: onigmo.h:661
OnigToken::level
int level
Definition: regparse.c:2311
PosixBracketEntryType
Definition: regenc.h:118
ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE
#define ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE
Definition: onigmo.h:667
OnigWarnFunc
void(* OnigWarnFunc)(const char *s)
Definition: onigmo.h:749
USE_BACKREF_WITH_LEVEL
#define USE_BACKREF_WITH_LEVEL
Definition: regint.h:73
xmemcpy
#define xmemcpy
Definition: regint.h:202
NULL_UCHARP
#define NULL_UCHARP
Definition: regint.h:302
CCV_CLASS
@ CCV_CLASS
Definition: regparse.c:4411
OnigCodePoint
unsigned int OnigCodePoint
Definition: onigmo.h:80
ONIG_SYN_OP2_OPTION_RUBY
#define ONIG_SYN_OP2_OPTION_RUBY
Definition: onigmo.h:555
int
__inline__ int
Definition: rb_mjit_min_header-2.7.0.h:2839
ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP
#define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP
Definition: onigmo.h:595
NULL_NODE
#define NULL_NODE
Definition: regparse.h:283
ARG_UNUSED
#define ARG_UNUSED
Definition: nkf.h:181
ONIG_SYN_OP_LPAREN_SUBEXP
#define ONIG_SYN_OP_LPAREN_SUBEXP
Definition: onigmo.h:531
PFETCH_S
#define PFETCH_S(c)
Definition: regparse.c:314
ANCHOR_PREC_READ_NOT
#define ANCHOR_PREC_READ_NOT
Definition: regint.h:539
ONIGENC_IS_CODE_XDIGIT
#define ONIGENC_IS_CODE_XDIGIT(enc, code)
Definition: onigmo.h:398
BIT_STATUS_ON_AT_SIMPLE
#define BIT_STATUS_ON_AT_SIMPLE(stats, n)
Definition: regint.h:367
SET_ENCLOSE_STATUS
#define SET_ENCLOSE_STATUS(node, f)
Definition: regparse.h:144
ONIG_SYN_OP2_ESC_CAPITAL_X_EXTENDED_GRAPHEME_CLUSTER
#define ONIG_SYN_OP2_ESC_CAPITAL_X_EXTENDED_GRAPHEME_CLUSTER
Definition: onigmo.h:574
ONIG_SYN_BACKSLASH_ESCAPE_IN_CC
#define ONIG_SYN_BACKSLASH_ESCAPE_IN_CC
Definition: onigmo.h:603
IS_ASCII_RANGE
#define IS_ASCII_RANGE(option)
Definition: regint.h:393
NT_CTYPE
#define NT_CTYPE
Definition: regparse.h:40
OnigToken::greedy
int greedy
Definition: regparse.c:2301
ONIG_SYN_OP2_ESC_H_XDIGIT
#define ONIG_SYN_OP2_ESC_H_XDIGIT
Definition: onigmo.h:571
TK_ALT
@ TK_ALT
Definition: regparse.c:2268
ONIGENC_CODE_TO_MBC
#define ONIGENC_CODE_TO_MBC(enc, code, buf)
Definition: onigmo.h:368
TK_CODE_POINT
@ TK_CODE_POINT
Definition: regparse.c:2259
ONIGERR_META_CODE_SYNTAX
#define ONIGERR_META_CODE_SYNTAX
Definition: onigmo.h:649
onig_null_warn
void onig_null_warn(const char *s ARG_UNUSED)
Definition: regparse.c:87
ONIG_LAST_CODE_POINT
#define ONIG_LAST_CODE_POINT
Definition: regint.h:304
ONIG_SYN_OP_QMARK_NON_GREEDY
#define ONIG_SYN_OP_QMARK_NON_GREEDY
Definition: onigmo.h:544
ONIG_MAX_REPEAT_NUM
#define ONIG_MAX_REPEAT_NUM
Definition: onigmo.h:440
ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META
#define ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META
Definition: onigmo.h:564
OnigToken::not
int not
Definition: regparse.c:2322
ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY
#define ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY
Definition: onigmo.h:562
ONIGENC_IS_CODE_NAME
#define ONIGENC_IS_CODE_NAME(enc, c)
Definition: regparse.c:2510
ONIGERR_INVALID_BACKREF
#define ONIGERR_INVALID_BACKREF
Definition: onigmo.h:674
onig_node_str_set
int onig_node_str_set(Node *node, const UChar *s, const UChar *end)
Definition: regparse.c:1412
TK_CHAR
@ TK_CHAR
Definition: regparse.c:2257
IApplyCaseFoldArg::asc_cc
CClassNode * asc_cc
Definition: regparse.c:5500
NBREF
#define NBREF(node)
Definition: regparse.h:79
ONIG_SYN_OP_QMARK_ZERO_ONE
#define ONIG_SYN_OP_QMARK_ZERO_ONE
Definition: onigmo.h:525
CCS_START
@ CCS_START
Definition: regparse.c:4405
POSIX_BRACKET_CHECK_LIMIT_LENGTH
#define POSIX_BRACKET_CHECK_LIMIT_LENGTH
ONIG_SYN_OP2_QMARK_GROUP_EFFECT
#define ONIG_SYN_OP2_QMARK_GROUP_EFFECT
Definition: onigmo.h:553
NT_STR
#define NT_STR
Definition: regparse.h:38
BBUF_INIT
#define BBUF_INIT(buf, size)
Definition: regint.h:447
NANCHOR
#define NANCHOR(node)
Definition: regparse.h:82
ONIG_SYN_OP_ESC_S_WHITE_SPACE
#define ONIG_SYN_OP_ESC_S_WHITE_SPACE
Definition: onigmo.h:540
ONIGENC_CTYPE_DIGIT
#define ONIGENC_CTYPE_DIGIT
Definition: onigmo.h:298
INIT_NAME_BACKREFS_ALLOC_NUM
#define INIT_NAME_BACKREFS_ALLOC_NUM
Definition: regparse.c:451
ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC
#define ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC
Definition: onigmo.h:604
st.h
IS_SYNTAX_OP
#define IS_SYNTAX_OP(syn, opm)
Definition: regparse.h:330
BIT_STATUS_CLEAR
#define BIT_STATUS_CLEAR(stats)
Definition: regint.h:355
NULL
#define NULL
Definition: _sdbm.c:101
WARN_BUFSIZE
#define WARN_BUFSIZE
Definition: regparse.c:34
ONIGENC_MBC_MAXLEN
#define ONIGENC_MBC_MAXLEN(enc)
Definition: onigmo.h:362
ST_DELETE
@ ST_DELETE
Definition: st.h:99
NameEntry
Definition: regparse.c:453
IApplyCaseFoldArg::alt_root
Node * alt_root
Definition: regparse.c:5501
DIGITVAL
#define DIGITVAL(code)
Definition: regint.h:375
OnigToken::backp
UChar * backp
Definition: regparse.c:2289
TK_CHAR_TYPE
@ TK_CHAR_TYPE
Definition: regparse.c:2261
_BBuf
Definition: regint.h:441
ONIGENC_CTYPE_GRAPH
#define ONIGENC_CTYPE_GRAPH
Definition: onigmo.h:299
ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME
#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME
Definition: onigmo.h:596
ENCLOSE_OPTION
#define ENCLOSE_OPTION
Definition: regparse.h:95
enclen
#define enclen(enc, p, e)
Definition: regenc.h:93
strlen
size_t strlen(const char *)
OnigToken::refs
int * refs
Definition: regparse.c:2307
RQ_ASIS
@ RQ_ASIS
Definition: regparse.c:2184
NENCLOSE
#define NENCLOSE(node)
Definition: regparse.h:81
onig_set_warn_func
void onig_set_warn_func(OnigWarnFunc f)
Definition: regparse.c:101
ONIG_ENCODING_ASCII
#define ONIG_ENCODING_ASCII
Definition: onigmo.h:225
onig_get_parse_depth_limit
unsigned int onig_get_parse_depth_limit(void)
Definition: regparse.c:117
ONIGERR_TOO_SHORT_MULTI_BYTE_STRING
#define ONIGERR_TOO_SHORT_MULTI_BYTE_STRING
Definition: onigmo.h:672
BITSET_SIZE
#define BITSET_SIZE
Definition: regint.h:415
TokenSyms
TokenSyms
Definition: regparse.c:2254
onig_node_new_enclose
Node * onig_node_new_enclose(int type)
Definition: regparse.c:1347
BITSET_SET_BIT_CHKDUP
#define BITSET_SET_BIT_CHKDUP(bs, pos)
Definition: regparse.c:176
ANCHOR_WORD_END
#define ANCHOR_WORD_END
Definition: regint.h:537
v
int VALUE v
Definition: rb_mjit_min_header-2.7.0.h:12332
OnigToken::gnum
int gnum
Definition: regparse.c:2317
re_pattern_buffer::options
OnigOptionType options
Definition: onigmo.h:772
CCS_COMPLETE
@ CCS_COMPLETE
Definition: regparse.c:4404
ONIG_SYN_OP2_QMARK_SUBEXP_CALL
#define ONIG_SYN_OP2_QMARK_SUBEXP_CALL
Definition: onigmo.h:579
cc
const struct rb_call_cache * cc
Definition: rb_mjit_min_header-2.7.0.h:13228
ANCHOR_NOT_WORD_BOUND
#define ANCHOR_NOT_WORD_BOUND
Definition: regint.h:535
ODIGITVAL
#define ODIGITVAL(code)
Definition: regint.h:376
onig_st_init_strend_table_with_size
hash_table_type * onig_st_init_strend_table_with_size(st_index_t size)
Definition: regparse.c:406
CCS_VALUE
@ CCS_VALUE
Definition: regparse.c:4402
OnigToken::name
UChar * name
Definition: regparse.c:2315
INamesArg::reg
regex_t * reg
Definition: regparse.c:554
ENCLOSE_STOP_BACKTRACK
#define ENCLOSE_STOP_BACKTRACK
Definition: regparse.h:96
PPEEK
#define PPEEK
Definition: regparse.c:319
ONIG_SYN_OP_ESC_B_WORD_BOUND
#define ONIG_SYN_OP_ESC_B_WORD_BOUND
Definition: onigmo.h:539
onig_parse_make_tree
int onig_parse_make_tree(Node **root, const UChar *pattern, const UChar *end, regex_t *reg, ScanEnv *env)
Definition: regparse.c:6611
onigenc_with_ascii_strncmp
int onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar *p, const UChar *end, const UChar *sascii, int n)
Definition: regenc.c:860
if
if((ID)(DISPID) nameid !=nameid)
Definition: win32ole.c:357
ONIG_SYNTAX_RUBY
#define ONIG_SYNTAX_RUBY
Definition: onigmo.h:511
onig_strcpy
void onig_strcpy(UChar *dest, const UChar *src, const UChar *end)
Definition: regparse.c:259
CClassNode::mbuf
BBuf * mbuf
Definition: regint.h:808
NST_NAME_REF
#define NST_NAME_REF
Definition: regparse.h:139
POSIX_BRACKET_NAME_MIN_LEN
#define POSIX_BRACKET_NAME_MIN_LEN
IS_REPEAT_INFINITE
#define IS_REPEAT_INFINITE(n)
Definition: regint.h:409
ENCLOSE_MEMORY
#define ENCLOSE_MEMORY
Definition: regparse.h:94
ONIG_REGION_NOTPOS
#define ONIG_REGION_NOTPOS
Definition: onigmo.h:734
TK_BACKREF
@ TK_BACKREF
Definition: regparse.c:2262
MBCODE_START_POS
#define MBCODE_START_POS(enc)
Definition: regparse.c:162
XDIGITVAL
#define XDIGITVAL(enc, code)
Definition: regint.h:377
ONIG_SYN_OP_ESC_VBAR_ALT
#define ONIG_SYN_OP_ESC_VBAR_ALT
Definition: onigmo.h:530
INIT_MULTI_BYTE_RANGE_SIZE
#define INIT_MULTI_BYTE_RANGE_SIZE
BBUF_WRITE_CODE_POINT
#define BBUF_WRITE_CODE_POINT(bbuf, pos, code)
Definition: regparse.c:1644
CCV_CODE_POINT
@ CCV_CODE_POINT
Definition: regparse.c:4410
OnigSyntaxType
Definition: onigmo.h:479
NSTRING_CLEAR_RAW
#define NSTRING_CLEAR_RAW(node)
Definition: regparse.h:110
onig_strncmp
int onig_strncmp(const UChar *s1, const UChar *s2, int n)
TK_POSIX_BRACKET_OPEN
@ TK_POSIX_BRACKET_OPEN
Definition: regparse.c:2280
st_str_end_key::end
const UChar * end
Definition: regparse.c:363
QtfrNode::lower
int lower
Definition: regparse.h:183
ONIG_SYN_ALLOW_INVALID_INTERVAL
#define ONIG_SYN_ALLOW_INVALID_INTERVAL
Definition: onigmo.h:591
BITSET_AT
#define BITSET_AT(bs, pos)
Definition: regint.h:435
onig_scan_unsigned_number
int onig_scan_unsigned_number(UChar **src, const UChar *end, OnigEncoding enc)
Definition: regparse.c:1556
ONIG_OPTION_POSIX_BRACKET_ALL_RANGE
#define ONIG_OPTION_POSIX_BRACKET_ALL_RANGE
Definition: onigmo.h:468
fprintf
int fprintf(FILE *__restrict, const char *__restrict,...) __attribute__((__format__(__printf__
ONIGERR_TOO_MANY_CAPTURE_GROUPS
#define ONIGERR_TOO_MANY_CAPTURE_GROUPS
Definition: onigmo.h:676
ONIG_SYN_OP_DOT_ANYCHAR
#define ONIG_SYN_OP_DOT_ANYCHAR
Definition: onigmo.h:520
GET_CODE_POINT
#define GET_CODE_POINT(code, p)
Definition: regint.h:697
NST_NAMED_GROUP
#define NST_NAMED_GROUP
Definition: regparse.h:138
PUNFETCH
#define PUNFETCH
Definition: regparse.c:300
ONIG_SYN_OP_ESC_CONTROL_CHARS
#define ONIG_SYN_OP_ESC_CONTROL_CHARS
Definition: onigmo.h:545
GroupNumRemap
Definition: regparse.h:335
ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR
#define ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR
Definition: onigmo.h:567
NSTRING_SET_AMBIG
#define NSTRING_SET_AMBIG(node)
Definition: regparse.h:111
SYN_GNU_REGEX_BV
#define SYN_GNU_REGEX_BV
Definition: regint.h:780
ONIG_SYN_OP2_ESC_G_SUBEXP_CALL
#define ONIG_SYN_OP2_ESC_G_SUBEXP_CALL
Definition: onigmo.h:561
OnigToken::by_name
int by_name
Definition: regparse.c:2308
SINGLE_BYTE_SIZE
#define SINGLE_BYTE_SIZE
Definition: regint.h:413
PosixBracketEntryType::name
const UChar name[6]
Definition: regenc.h:120
onig_node_new_anchor
Node * onig_node_new_anchor(int type)
Definition: regparse.c:1222
ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR
#define ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR
Definition: onigmo.h:534
st_foreach_callback_func
int st_foreach_callback_func(st_data_t, st_data_t, st_data_t)
Definition: st.h:137
NT_ALT
#define NT_ALT
Definition: regparse.h:47
RQ_DEL
@ RQ_DEL
Definition: regparse.c:2185
TK_CC_OPEN
@ TK_CC_OPEN
Definition: regparse.c:2271
i
uint32_t i
Definition: rb_mjit_min_header-2.7.0.h:5464
ONIG_SYN_OP2_ESC_K_NAMED_BACKREF
#define ONIG_SYN_OP2_ESC_K_NAMED_BACKREF
Definition: onigmo.h:560
QtfrNode::upper
int upper
Definition: regparse.h:184
MC_ZERO_OR_ONE_TIME
#define MC_ZERO_OR_ONE_TIME(syn)
Definition: regint.h:751
NST_BY_NUMBER
#define NST_BY_NUMBER
Definition: regparse.h:142
ONIGENC_CTYPE_CNTRL
#define ONIGENC_CTYPE_CNTRL
Definition: onigmo.h:297
ONIGENC_CODE_TO_MBCLEN
#define ONIGENC_CODE_TO_MBCLEN(enc, code)
Definition: onigmo.h:367
BBUF_MOVE_RIGHT
#define BBUF_MOVE_RIGHT(buf, from, to, n)
Definition: regint.h:497
onig_node_new_list
Node * onig_node_new_list(Node *left, Node *right)
Definition: regparse.c:1186
onig_node_str_cat
int onig_node_str_cat(Node *node, const UChar *s, const UChar *end)
Definition: regparse.c:1376
INamesArg::enc
OnigEncoding enc
Definition: regparse.c:557
NameEntry::back_alloc
int back_alloc
Definition: regparse.c:457
NQTFR
#define NQTFR(node)
Definition: regparse.h:80
ONIGERR_EMPTY_CHAR_CLASS
#define ONIGERR_EMPTY_CHAR_CLASS
Definition: onigmo.h:644
ONIG_SYN_OP_LINE_ANCHOR
#define ONIG_SYN_OP_LINE_ANCHOR
Definition: onigmo.h:542
_BBuf::used
unsigned int used
Definition: regint.h:443
OnigToken::code
OnigCodePoint code
Definition: regparse.c:2293
onig_st_insert_strend
int onig_st_insert_strend(hash_table_type *table, const UChar *str_key, const UChar *end_key, hash_data_type value)
Definition: regparse.c:430
PINC
#define PINC
Definition: regparse.c:301
st_index_t
st_data_t st_index_t
Definition: st.h:50
NameEntry::name_len
size_t name_len
Definition: regparse.c:455
st_hash_type
Definition: st.h:61
bad
#define bad(x)
Definition: _sdbm.c:123
ONIGENC_CTYPE_UPPER
#define ONIGENC_CTYPE_UPPER
Definition: onigmo.h:304
CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
#define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
Definition: regparse.c:36
va_end
#define va_end(v)
Definition: rb_mjit_min_header-2.7.0.h:3979
StrNode::flag
unsigned int flag
Definition: regparse.h:174
NEWLINE_CODE
#define NEWLINE_CODE
ANCHOR_END_BUF
#define ANCHOR_END_BUF
Definition: regint.h:530
ONIG_SYN_OP_ESC_OCTAL3
#define ONIG_SYN_OP_ESC_OCTAL3
Definition: onigmo.h:547
IS_EXTEND
#define IS_EXTEND(option)
Definition: regint.h:384
onig_st_lookup_strend
int onig_st_lookup_strend(hash_table_type *table, const UChar *str_key, const UChar *end_key, hash_data_type *value)
Definition: regparse.c:418
ONIG_SYN_OP2_QMARK_VBAR_BRANCH_RESET
#define ONIG_SYN_OP2_QMARK_VBAR_BRANCH_RESET
Definition: onigmo.h:580
TK_ANYCHAR
@ TK_ANYCHAR
Definition: regparse.c:2260
term
const char term
Definition: id.c:37
ONIGERR_END_PATTERN_AT_LEFT_BRACE
#define ONIGERR_END_PATTERN_AT_LEFT_BRACE
Definition: onigmo.h:642
IApplyCaseFoldArg::cc
CClassNode * cc
Definition: regparse.c:5499
TK_CALL
@ TK_CALL
Definition: regparse.c:2263
PosixBracketEntryType::ctype
int ctype
Definition: regenc.h:121
IApplyCaseFoldArg
Definition: regparse.c:5497
BITSET_CLEAR
#define BITSET_CLEAR(bs)
Definition: regint.h:427
SCANENV_MEM_NODES
#define SCANENV_MEM_NODES(senv)
Definition: regparse.h:286
ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND
#define ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND
Definition: onigmo.h:594
ONIG_OPTION_MULTILINE
#define ONIG_OPTION_MULTILINE
Definition: onigmo.h:453
ENCLOSE_ABSENT
#define ENCLOSE_ABSENT
Definition: regparse.h:98
ONIGERR_END_PATTERN_AT_ESCAPE
#define ONIGERR_END_PATTERN_AT_ESCAPE
Definition: onigmo.h:646
ONIGERR_CONTROL_CODE_SYNTAX
#define ONIGERR_CONTROL_CODE_SYNTAX
Definition: onigmo.h:650
OnigOptionType
unsigned int OnigOptionType
Definition: onigmo.h:445
ANCHOR_PREC_READ
#define ANCHOR_PREC_READ
Definition: regint.h:538
Bits
unsigned char Bits
Definition: regint.h:420
BITSET_CLEAR_BIT
#define BITSET_CLEAR_BIT(bs, pos)
Definition: regint.h:437
INamesArg
Definition: regparse.c:552
NameTable
st_table NameTable
Definition: regparse.c:464
ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY
#define ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY
Definition: onigmo.h:687
ENCLOSE_CONDITION
#define ENCLOSE_CONDITION
Definition: regparse.h:97
TK_EXTENDED_GRAPHEME_CLUSTER
@ TK_EXTENDED_GRAPHEME_CLUSTER
Definition: regparse.c:2275
ONIGENC_IS_CODE_DIGIT
#define ONIGENC_IS_CODE_DIGIT(enc, code)
Definition: onigmo.h:396
RQ_A
@ RQ_A
Definition: regparse.c:2186
BBUF_ENSURE_SIZE
#define BBUF_ENSURE_SIZE(buf, size)
Definition: regint.h:465
ONIGENC_CTYPE_PRINT
#define ONIGENC_CTYPE_PRINT
Definition: onigmo.h:301
TK_EOT
@ TK_EOT
Definition: regparse.c:2255
NCCLASS
#define NCCLASS(node)
Definition: regparse.h:77
MIN
#define MIN(a, b)
Definition: ffi.c:30
ONIG_SYN_OP2_QMARK_CAPITAL_P_NAMED_GROUP
#define ONIG_SYN_OP2_QMARK_CAPITAL_P_NAMED_GROUP
Definition: onigmo.h:582
INamesArg::arg
void * arg
Definition: regparse.c:555
ADD_ALL_MULTI_BYTE_RANGE
#define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf)
Definition: regparse.c:168
IS_SINGLELINE
#define IS_SINGLELINE(option)
Definition: regint.h:381
OnigToken::escaped
int escaped
Definition: regparse.c:2287
size
int size
Definition: encoding.c:58
re_pattern_buffer::name_table
void * name_table
Definition: onigmo.h:778
NCALL
#define NCALL(node)
Definition: regparse.h:84
ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP
#define ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP
Definition: onigmo.h:577
FALSE
#define FALSE
Definition: nkf.h:174
ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS
#define ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS
Definition: onigmo.h:669
onig_set_parse_depth_limit
int onig_set_parse_depth_limit(unsigned int depth)
Definition: regparse.c:123
ONIGERR_PARSE_DEPTH_LIMIT_OVER
#define ONIGERR_PARSE_DEPTH_LIMIT_OVER
Definition: onigmo.h:636
ANCHOR_SEMI_END_BUF
#define ANCHOR_SEMI_END_BUF
Definition: regint.h:531
onig_vsnprintf_with_pattern
void onig_vsnprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc, UChar *pat, UChar *pat_end, const UChar *fmt, va_list args)
Definition: regerror.c:314
ONIGERR_UNMATCHED_CLOSE_PARENTHESIS
#define ONIGERR_UNMATCHED_CLOSE_PARENTHESIS
Definition: onigmo.h:657
tok
#define tok(p)
Definition: ripper.c:13096
TK_CC_RANGE
@ TK_CC_RANGE
Definition: regparse.c:2279
OnigEncoding
const typedef OnigEncodingType * OnigEncoding
Definition: onigmo.h:182
list
struct rb_encoding_entry * list
Definition: encoding.c:56
ONIG_INEFFECTIVE_META_CHAR
#define ONIG_INEFFECTIVE_META_CHAR
Definition: onigmo.h:619
NT_BREF
#define NT_BREF
Definition: regparse.h:42
ONIGERR_END_PATTERN_AT_CONTROL
#define ONIGERR_END_PATTERN_AT_CONTROL
Definition: onigmo.h:648
onig_node_new_alt
Node * onig_node_new_alt(Node *left, Node *right)
Definition: regparse.c:1210
OnigToken::lower
int lower
Definition: regparse.c:2299
ONIG_NO_SUPPORT_CONFIG
#define ONIG_NO_SUPPORT_CONFIG
Definition: onigmo.h:626
ONIGERR_PARSER_BUG
#define ONIGERR_PARSER_BUG
Definition: onigmo.h:631
ONIG_OPTION_ASCII_RANGE
#define ONIG_OPTION_ASCII_RANGE
Definition: onigmo.h:467
re_pattern_buffer::syntax
const OnigSyntaxType * syntax
Definition: onigmo.h:777
OnigToken::num
int num
Definition: regparse.c:2305
ONIG_MAX_MULTI_BYTE_RANGES_NUM
#define ONIG_MAX_MULTI_BYTE_RANGES_NUM
Definition: onigmo.h:441
IS_IGNORECASE
#define IS_IGNORECASE(option)
Definition: regint.h:383
ONIGERR_TOO_BIG_WIDE_CHAR_VALUE
#define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE
Definition: onigmo.h:691
ONIGERR_MULTIPLEX_DEFINED_NAME
#define ONIGERR_MULTIPLEX_DEFINED_NAME
Definition: onigmo.h:684
NODE_STR_BUF_SIZE
#define NODE_STR_BUF_SIZE
Definition: regparse.h:101
key
key
Definition: openssl_missing.h:181
ONIGENC_IS_UNICODE
#define ONIGENC_IS_UNICODE(enc)
Definition: onigmo.h:327
ONIG_OPTION_WORD_BOUND_ALL_RANGE
#define ONIG_OPTION_WORD_BOUND_ALL_RANGE
Definition: onigmo.h:469
ANCHOR_BEGIN_POSITION
#define ANCHOR_BEGIN_POSITION
Definition: regint.h:529
IS_WORD_BOUND_ALL_RANGE
#define IS_WORD_BOUND_ALL_RANGE(option)
Definition: regint.h:395
NST_RECURSION
#define NST_RECURSION
Definition: regparse.h:135
ONIG_SYN_OP_ESC_D_DIGIT
#define ONIG_SYN_OP_ESC_D_DIGIT
Definition: onigmo.h:541
ONIG_SYN_OP_DECIMAL_BACKREF
#define ONIG_SYN_OP_DECIMAL_BACKREF
Definition: onigmo.h:535
src
__inline__ const void *__restrict src
Definition: rb_mjit_min_header-2.7.0.h:2836
fmt
const VALUE int int int int int int VALUE char * fmt
Definition: rb_mjit_min_header-2.7.0.h:6462
NameEntry::name
UChar * name
Definition: regparse.c:454
INamesArg::ret
int ret
Definition: regparse.c:556
PosixBracketEntryType::len
short int len
Definition: regenc.h:119
ONIG_SYN_OP_PLUS_ONE_INF
#define ONIG_SYN_OP_PLUS_ONE_INF
Definition: onigmo.h:523
ANCHOR_END_LINE
#define ANCHOR_END_LINE
Definition: regint.h:532
ONIG_OPTION_DONT_CAPTURE_GROUP
#define ONIG_OPTION_DONT_CAPTURE_GROUP
Definition: onigmo.h:459
PPEEK_IS
#define PPEEK_IS(c)
Definition: regparse.c:320
ONIGENC_CTYPE_SPACE
#define ONIGENC_CTYPE_SPACE
Definition: onigmo.h:303
ONIGERR_INVALID_REPEAT_RANGE_PATTERN
#define ONIGERR_INVALID_REPEAT_RANGE_PATTERN
Definition: onigmo.h:663
onig_is_code_in_cc
int onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode *cc)
Definition: regcomp.c:6117
onig_names_free
int onig_names_free(regex_t *reg)
Definition: regparse.c:525
neg
#define neg(x)
Definition: time.c:141
OnigToken::possessive
int possessive
Definition: regparse.c:2302
TK_CHAR_PROPERTY
@ TK_CHAR_PROPERTY
Definition: regparse.c:2273
INIT_SCANENV_MEMNODES_ALLOC_SIZE
#define INIT_SCANENV_MEMNODES_ALLOC_SIZE
Definition: regparse.c:980
buf
unsigned char buf[MIME_BUF_SIZE]
Definition: nkf.c:4322
n
const char size_t n
Definition: rb_mjit_min_header-2.7.0.h:5456
TK_CC_CLOSE
@ TK_CC_CLOSE
Definition: regparse.c:2278
ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT
#define ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT
Definition: onigmo.h:608
IApplyCaseFoldArg::ptail
Node ** ptail
Definition: regparse.c:5502
REPEAT_INFINITE
#define REPEAT_INFINITE
Definition: regint.h:408
re_pattern_buffer
Definition: onigmo.h:755
ONIG_SYN_WARN_CC_OP_NOT_ESCAPED
#define ONIG_SYN_WARN_CC_OP_NOT_ESCAPED
Definition: onigmo.h:607
TK_SUBEXP_OPEN
@ TK_SUBEXP_OPEN
Definition: regparse.c:2269
ONIG_SYN_OP_POSIX_BRACKET
#define ONIG_SYN_OP_POSIX_BRACKET
Definition: onigmo.h:543
UChar
#define UChar
Definition: onigmo.h:76
arg
VALUE arg
Definition: rb_mjit_min_header-2.7.0.h:5601
ONIG_SYN_OP_ESC_C_CONTROL
#define ONIG_SYN_OP_ESC_C_CONTROL
Definition: onigmo.h:546
ONIGENC_IS_CODE_WORD
#define ONIGENC_IS_CODE_WORD(enc, code)
Definition: onigmo.h:400
ONIG_MAX_BACKREF_NUM
#define ONIG_MAX_BACKREF_NUM
Definition: onigmo.h:439
f
#define f
ONIGERR_INVALID_CODE_POINT_VALUE
#define ONIGERR_INVALID_CODE_POINT_VALUE
Definition: onigmo.h:689
ONIGERR_TOO_SHORT_DIGITS
#define ONIGERR_TOO_SHORT_DIGITS
Definition: onigmo.h:677
BitSetRef
Bits * BitSetRef
Definition: regint.h:423
ST_CONTINUE
@ ST_CONTINUE
Definition: st.h:99
OnigToken::upper
int upper
Definition: regparse.c:2300
xmalloc
#define xmalloc
Definition: defines.h:211
xrealloc
#define xrealloc
Definition: defines.h:214
ONIGENC_CTYPE_ALNUM
#define ONIGENC_CTYPE_ALNUM
Definition: onigmo.h:307
_BBuf::alloc
unsigned int alloc
Definition: regint.h:444
DEFAULT_PARSE_DEPTH_LIMIT
#define DEFAULT_PARSE_DEPTH_LIMIT
Definition: regint.h:88
regparse.h
st_data_t
unsigned long st_data_t
Definition: rb_mjit_min_header-2.7.0.h:5363
ANCHOR_KEEP
#define ANCHOR_KEEP
Definition: regint.h:546
ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY
#define ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY
Definition: onigmo.h:597
ONIG_SYN_OP_ESC_W_WORD
#define ONIG_SYN_OP_ESC_W_WORD
Definition: onigmo.h:537
ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT
#define ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT
Definition: onigmo.h:569
POSIX_BRACKET_ENTRY_INIT
#define POSIX_BRACKET_ENTRY_INIT(name, ctype)
Definition: regenc.h:124
ONIGENC_PROPERTY_NAME_TO_CTYPE
#define ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, p, end)
Definition: onigmo.h:369
ONIG_NORMAL
#define ONIG_NORMAL
Definition: onigmo.h:624
OnigToken::exist_level
int exist_level
Definition: regparse.c:2310
ONIGERR_TOO_BIG_NUMBER
#define ONIGERR_TOO_BIG_NUMBER
Definition: onigmo.h:666
ONIG_SYN_OP_VARIABLE_META_CHARACTERS
#define ONIG_SYN_OP_VARIABLE_META_CHARACTERS
Definition: onigmo.h:519
ONIGERR_MEMORY
#define ONIGERR_MEMORY
Definition: onigmo.h:629
call
return cc call
Definition: rb_mjit_min_header-2.7.0.h:13249
ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL
#define ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL
Definition: onigmo.h:563
onigenc_get_prev_char_head
ONIG_EXTERN OnigUChar * onigenc_get_prev_char_head(OnigEncoding enc, const OnigUChar *start, const OnigUChar *s, const OnigUChar *end)
ONIG_SYN_OP_BRACE_INTERVAL
#define ONIG_SYN_OP_BRACE_INTERVAL
Definition: onigmo.h:527
hash_table_type
void hash_table_type
Definition: regint.h:919
CCS_RANGE
@ CCS_RANGE
Definition: regparse.c:4403
NODE_STR_MARGIN
#define NODE_STR_MARGIN
Definition: regparse.h:100
TK_STRING
@ TK_STRING
Definition: regparse.c:2258
re_registers::beg
OnigPosition * beg
Definition: onigmo.h:719
ONIG_IS_OPTION_ON
#define ONIG_IS_OPTION_ON(options, option)
Definition: onigmo.h:476
ONIGERR_UNDEFINED_GROUP_OPTION
#define ONIGERR_UNDEFINED_GROUP_OPTION
Definition: onigmo.h:660
ONIGERR_INVALID_CHAR_IN_GROUP_NAME
#define ONIGERR_INVALID_CHAR_IN_GROUP_NAME
Definition: onigmo.h:681
QtfrNode::target
struct _Node * target
Definition: regparse.h:182
re_registers
Definition: onigmo.h:716
ONIGENC_IS_SINGLEBYTE
#define ONIGENC_IS_SINGLEBYTE(enc)
Definition: onigmo.h:318
err
int err
Definition: win32.c:135
ANCHOR_LOOK_BEHIND_NOT
#define ANCHOR_LOOK_BEHIND_NOT
Definition: regint.h:541
ONIG_SYN_OP_BRACKET_CC
#define ONIG_SYN_OP_BRACKET_CC
Definition: onigmo.h:536
LIST
#define LIST
Definition: regparse.c:5768
ONIGENC_CTYPE_WORD
#define ONIGENC_CTYPE_WORD
Definition: onigmo.h:306
ANCHOR_WORD_BOUND
#define ANCHOR_WORD_BOUND
Definition: regint.h:534
ScanEnv
Definition: regparse.h:290
ONIGERR_INVALID_CONDITION_PATTERN
#define ONIGERR_INVALID_CONDITION_PATTERN
Definition: onigmo.h:664
NODE_BACKREFS_SIZE
#define NODE_BACKREFS_SIZE
Definition: regparse.h:102
xfree
#define xfree
Definition: defines.h:216
s2
const char * s2
Definition: rb_mjit_min_header-2.7.0.h:5454
ONIGERR_END_PATTERN_IN_GROUP
#define ONIGERR_END_PATTERN_IN_GROUP
Definition: onigmo.h:659
OnigToken::rel
int rel
Definition: regparse.c:2318
ONIGERR_EMPTY_GROUP_NAME
#define ONIGERR_EMPTY_GROUP_NAME
Definition: onigmo.h:679
onig_noname_group_capture_is_active
int onig_noname_group_capture_is_active(const regex_t *reg)
Definition: regparse.c:963
ONIG_SYN_OP_VBAR_ALT
#define ONIG_SYN_OP_VBAR_ALT
Definition: onigmo.h:529
ALT
#define ALT
Definition: regparse.c:5769
ONIG_SYN_OP_ESC_O_BRACE_OCTAL
#define ONIG_SYN_OP_ESC_O_BRACE_OCTAL
Definition: onigmo.h:550
OnigToken::ascii_range
int ascii_range
Definition: regparse.c:2296
PEND
#define PEND
Definition: regparse.c:299
ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END
#define ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END
Definition: onigmo.h:538
TK_KEEP
@ TK_KEEP
Definition: regparse.c:2276
ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS
#define ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS
Definition: onigmo.h:658
TK_CC_CC_OPEN
@ TK_CC_CC_OPEN
Definition: regparse.c:2282
ONIG_SYN_OP2_ESC_CAPITAL_R_LINEBREAK
#define ONIG_SYN_OP2_ESC_CAPITAL_R_LINEBREAK
Definition: onigmo.h:573
rb_compile_warn
void rb_compile_warn(const char *file, int line, const char *fmt,...)
Definition: error.c:270
ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE
#define ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE
Definition: onigmo.h:668
RQ_PQ_Q
@ RQ_PQ_Q
Definition: regparse.c:2190
ANCHOR_BEGIN_BUF
#define ANCHOR_BEGIN_BUF
Definition: regint.h:527
TK_QUOTE_OPEN
@ TK_QUOTE_OPEN
Definition: regparse.c:2272
CCSTATE
CCSTATE
Definition: regparse.c:4401
onig_foreach_name
int onig_foreach_name(regex_t *reg, int(*func)(const UChar *, const UChar *, int, int *, regex_t *, void *), void *arg)
Definition: regparse.c:576
BACKREF_REL_TO_ABS
#define BACKREF_REL_TO_ABS(rel_no, env)
Definition: regparse.c:157
len
uint8_t len
Definition: escape.c:17
CHECK_NULL_RETURN
#define CHECK_NULL_RETURN(p)
Definition: regint.h:300
ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS
#define ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS
Definition: onigmo.h:588
ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED
#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED
Definition: onigmo.h:654
IS_MC_ESC_CODE
#define IS_MC_ESC_CODE(code, syn)
Definition: regint.h:755
ONIGENC_MBC_TO_CODE
#define ONIGENC_MBC_TO_CODE(enc, p, end)
Definition: onigmo.h:366
re_pattern_buffer::num_mem
int num_mem
Definition: onigmo.h:761
ONIG_SYN_OP_ESC_X_BRACE_HEX8
#define ONIG_SYN_OP_ESC_X_BRACE_HEX8
Definition: onigmo.h:549
ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE
#define ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE
Definition: onigmo.h:552
SYN_GNU_REGEX_OP
#define SYN_GNU_REGEX_OP
Definition: regint.h:767
IS_NOT_NULL
#define IS_NOT_NULL(p)
Definition: regint.h:299
RQ_QQ
@ RQ_QQ
Definition: regparse.c:2188
ONIG_SYN_OP_ESC_PLUS_ONE_INF
#define ONIG_SYN_OP_ESC_PLUS_ONE_INF
Definition: onigmo.h:524
SIZE_CODE_POINT
#define SIZE_CODE_POINT
Definition: regint.h:683
OnigToken::name_end
UChar * name_end
Definition: regparse.c:2316
NST_NEST_LEVEL
#define NST_NEST_LEVEL
Definition: regparse.h:141
is_invalid_quantifier_target
#define is_invalid_quantifier_target(node)
Definition: regparse.c:2122
onig_scan_env_set_error_string
void onig_scan_env_set_error_string(ScanEnv *env, int ecode ARG_UNUSED, UChar *arg, UChar *arg_end)
Definition: regparse.c:6638
onig_node_new_str
Node * onig_node_new_str(const UChar *s, const UChar *end)
Definition: regparse.c:1481
va_start
#define va_start(v, l)
Definition: rb_mjit_min_header-2.7.0.h:3978
ONIG_SYN_OP_ASTERISK_ZERO_INF
#define ONIG_SYN_OP_ASTERISK_ZERO_INF
Definition: onigmo.h:521
onig_name_to_group_numbers
int onig_name_to_group_numbers(regex_t *reg, const UChar *name, const UChar *name_end, int **nums)
Definition: regparse.c:887
BITSET_IS_EMPTY
#define BITSET_IS_EMPTY(bs, empty)
Definition: regparse.c:181
ONIGENC_CODE_RANGE_NUM
#define ONIGENC_CODE_RANGE_NUM(range)
Definition: onigmo.h:139
top
unsigned int top
Definition: nkf.c:4323
INamesArg::func
int(* func)(const UChar *, const UChar *, int, int *, regex_t *, void *)
Definition: regparse.c:553
SCANENV_MEMNODES_SIZE
#define SCANENV_MEMNODES_SIZE
Definition: regparse.h:285
NSTR_RAW
#define NSTR_RAW
Definition: regparse.h:104
RQ_P_QQ
@ RQ_P_QQ
Definition: regparse.c:2189
CCVALTYPE
CCVALTYPE
Definition: regparse.c:4408
va_list
__gnuc_va_list va_list
Definition: rb_mjit_min_header-2.7.0.h:836
HashDataType
st_data_t HashDataType
Definition: regparse.c:465
NCTYPE
#define NCTYPE(node)
Definition: regparse.h:78
hash_data_type
st_data_t hash_data_type
Definition: regint.h:925
QtfrNode::greedy
int greedy
Definition: regparse.h:185
ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS
#define ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS
Definition: onigmo.h:589
OnigToken::c
int c
Definition: regparse.c:2292
ONIG_OPTION_SINGLELINE
#define ONIG_OPTION_SINGLELINE
Definition: onigmo.h:455
ONIG_SYN_OP2_ESC_G_BRACE_BACKREF
#define ONIG_SYN_OP2_ESC_G_BRACE_BACKREF
Definition: onigmo.h:578
ONIG_SYN_OP2_ESC_V_VTAB
#define ONIG_SYN_OP2_ESC_V_VTAB
Definition: onigmo.h:565
NameEntry::back_ref1
int back_ref1
Definition: regparse.c:458
TK_ANCHOR
@ TK_ANCHOR
Definition: regparse.c:2264
ONIGERR_PREMATURE_END_OF_CHAR_CLASS
#define ONIGERR_PREMATURE_END_OF_CHAR_CLASS
Definition: onigmo.h:645
ONIG_SYN_OP2_QMARK_LPAREN_CONDITION
#define ONIG_SYN_OP2_QMARK_LPAREN_CONDITION
Definition: onigmo.h:581
StrNode
Definition: regparse.h:170
BITS_IN_ROOM
#define BITS_IN_ROOM
Definition: regint.h:414
fputs
int fputs(const char *__restrict, FILE *__restrict)
ONOFF
#define ONOFF(v, f, negative)
Definition: regparse.c:160
ONIG_OPTION_CAPTURE_GROUP
#define ONIG_OPTION_CAPTURE_GROUP
Definition: onigmo.h:460
onig_node_list_add
Node * onig_node_list_add(Node *list, Node *x)
Definition: regparse.c:1192
ONIGERR_INVALID_CHAR_PROPERTY_NAME
#define ONIGERR_INVALID_CHAR_PROPERTY_NAME
Definition: onigmo.h:688
ONIG_SYN_OP_ESC_QMARK_ZERO_ONE
#define ONIG_SYN_OP_ESC_QMARK_ZERO_ONE
Definition: onigmo.h:526
IApplyCaseFoldArg::env
ScanEnv * env
Definition: regparse.c:5498
onig_node_free
void onig_node_free(Node *node)
Definition: regparse.c:1062
BitSet
Bits BitSet[BITSET_SIZE]
Definition: regint.h:422
ONIGENC_CTYPE_BLANK
#define ONIGENC_CTYPE_BLANK
Definition: onigmo.h:296
SET_ALL_MULTI_BYTE_RANGE
#define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf)
Definition: regparse.c:165
TK_OP_REPEAT
@ TK_OP_REPEAT
Definition: regparse.c:2265
MC_ANYTIME
#define MC_ANYTIME(syn)
Definition: regint.h:750
numberof
#define numberof(array)
Definition: etc.c:618
TK_SUBEXP_CLOSE
@ TK_SUBEXP_CLOSE
Definition: regparse.c:2270
NCDR
#define NCDR(node)
Definition: regparse.h:87
NameEntry::back_num
int back_num
Definition: regparse.c:456
GroupNumRemap::new_val
int new_val
Definition: regparse.h:336
NCCLASS_SET_NOT
#define NCCLASS_SET_NOT(nd)
Definition: regint.h:794
st_table
Definition: st.h:79
ONIG_SYN_USE_LEFT_MOST_NAMED_GROUP
#define ONIG_SYN_USE_LEFT_MOST_NAMED_GROUP
Definition: onigmo.h:599
NSTR
#define NSTR(node)
Definition: regparse.h:76
NameEntry::back_refs
int * back_refs
Definition: regparse.c:459
INT_MAX_LIMIT
#define INT_MAX_LIMIT
Definition: regint.h:373
ptrdiff_t
long int ptrdiff_t
Definition: rb_mjit_min_header-2.7.0.h:802
ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL
#define ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL
Definition: onigmo.h:557
onigenc_step
UChar * onigenc_step(OnigEncoding enc, const UChar *p, const UChar *end, int n)
Definition: regenc.c:113
ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR
#define ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR
Definition: onigmo.h:533
NT_CALL
#define NT_CALL
Definition: regparse.h:48
st_str_end_key::s
const UChar * s
Definition: regparse.c:362
IS_NULL
#define IS_NULL(p)
Definition: regint.h:298
onig_set_verb_warn_func
void onig_set_verb_warn_func(OnigWarnFunc f)
Definition: regparse.c:106
SWAP_BBUF_NOT
#define SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2)
Definition: regparse.c:1815
onig_number_of_names
int onig_number_of_names(const regex_t *reg)
Definition: regparse.c:623
TK_CC_AND
@ TK_CC_AND
Definition: regparse.c:2281
RTEST
#define RTEST(v)
Definition: ruby.h:481
StrNode::end
UChar * end
Definition: regparse.h:173
ONIGENC_CTYPE_ASCII
#define ONIGENC_CTYPE_ASCII
Definition: onigmo.h:308
ruby::backward::cxxanyargs::type
VALUE type(ANYARGS)
ANYARGS-ed function type.
Definition: cxxanyargs.hpp:39
ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF
#define ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF
Definition: onigmo.h:522
NT_ANCHOR
#define NT_ANCHOR
Definition: regparse.h:45
PFETCH_READY
#define PFETCH_READY
Definition: regparse.c:297
NTYPE
#define NTYPE(node)
Definition: regparse.h:69
__sFILE
Definition: vsnprintf.c:169
ONIGENC_IS_CODE_NEWLINE
#define ONIGENC_IS_CODE_NEWLINE(enc, code)
Definition: onigmo.h:374
ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID
#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID
Definition: onigmo.h:655
ONIG_OPTION_EXTEND
#define ONIG_OPTION_EXTEND
Definition: onigmo.h:452
ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT
#define ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT
Definition: onigmo.h:556
NCCLASS_CLEAR_NOT
#define NCCLASS_CLEAR_NOT(nd)
Definition: regint.h:795
NQ_TARGET_ISNOT_EMPTY
#define NQ_TARGET_ISNOT_EMPTY
Definition: regparse.h:122
MAX
#define MAX(a, b)
Definition: regint.h:296
ONIG_SYN_OP2_ESC_U_HEX4
#define ONIG_SYN_OP2_ESC_U_HEX4
Definition: onigmo.h:566
name
const char * name
Definition: nkf.c:208
ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC
#define ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC
Definition: onigmo.h:605
BIT_STATUS_BITS_NUM
#define BIT_STATUS_BITS_NUM
Definition: regint.h:354