34 #define WARN_BUFSIZE 256
36 #define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
89 #ifdef DEFAULT_WARN_FUNCTION
95 #ifdef DEFAULT_VERB_WARN_FUNCTION
119 return ParseDepthLimit;
128 ParseDepthLimit = depth;
134 bbuf_free(
BBuf* bbuf)
151 if (r != 0)
return r;
157 #define BACKREF_REL_TO_ABS(rel_no, env) \
158 ((env)->num_mem + 1 + (rel_no))
160 #define ONOFF(v,f,negative) (negative) ? ((v) &= ~(f)) : ((v) |= (f))
162 #define MBCODE_START_POS(enc) \
163 (OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80)
165 #define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf) \
166 add_code_range_to_buf(pbuf, env, MBCODE_START_POS(enc), ONIG_LAST_CODE_POINT)
168 #define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\
169 if (! ONIGENC_IS_SINGLEBYTE(enc)) {\
170 r = SET_ALL_MULTI_BYTE_RANGE(enc, &(mbuf));\
176 #define BITSET_SET_BIT_CHKDUP(bs, pos) do { \
177 if (BITSET_AT(bs, pos)) CC_DUP_WARN(env, pos, pos); \
178 BS_ROOM(bs, pos) |= BS_BIT(pos); \
181 #define BITSET_IS_EMPTY(bs,empty) do {\
184 for (i = 0; i < BITSET_SIZE; i++) {\
244 #if defined(USE_NAMED_GROUP) && !defined(USE_ST_LIBRARY)
268 #ifdef USE_NAMED_GROUP
283 for (
i = 0;
i < term_len;
i++)
295 # define PFETCH_READY UChar* pfetch_prev = NULL; (void)pfetch_prev
297 # define PFETCH_READY UChar* pfetch_prev
299 #define PEND (p < end ? 0 : 1)
300 #define PUNFETCH p = pfetch_prev
303 p += enclen(enc, p, end); \
305 #define PFETCH(c) do { \
306 c = ((enc->max_enc_len == 1) ? *p : ONIGENC_MBC_TO_CODE(enc, p, end)); \
308 p += enclen(enc, p, end); \
311 #define PINC_S do { \
312 p += enclen(enc, p, end); \
314 #define PFETCH_S(c) do { \
315 c = ((enc->max_enc_len == 1) ? *p : ONIGENC_MBC_TO_CODE(enc, p, end)); \
316 p += enclen(enc, p, end); \
319 #define PPEEK (p < end ? ONIGENC_MBC_TO_CODE(enc, p, end) : PEND_VALUE)
320 #define PPEEK_IS(c) (PPEEK == (OnigCodePoint )c)
340 strcat_capa_from_static(
UChar* dest,
UChar* dest_end,
353 #ifdef USE_ST_LIBRARY
375 if ((x->
end - x->
s) != (y->
end - y->
s))
381 c = (
int )*p - (
int )*q;
382 if (c != 0)
return c;
399 val = val * 997 + (
int )*p++;
402 return val + (val >> 5);
414 onig_st_init_table_with_size(&hashType,
size);
426 return onig_st_lookup(table, (
st_data_t )(&
key), value);
449 #ifdef USE_NAMED_GROUP
451 # define INIT_NAME_BACKREFS_ALLOC_NUM 8
462 # ifdef USE_ST_LIBRARY
530 r = names_clear(reg);
563 int r = (*(
arg->func))(e->
name,
635 # define INIT_NAMES_ALLOC_NUM 8
653 for (
i = 0;
i < t->num;
i++) {
684 for (
i = 0;
i < t->num;
i++) {
711 r = names_clear(reg);
729 for (
i = 0;
i < t->num;
i++) {
747 for (
i = 0;
i < t->num;
i++) {
752 if (r != 0)
return r;
781 e = name_find(reg,
name, name_end);
783 # ifdef USE_ST_LIBRARY
791 e->
name = strdup_with_null(reg->
enc,
name, name_end);
807 alloc = INIT_NAMES_ALLOC_NUM;
823 else if (t->num == t->alloc) {
827 alloc = t->alloc * 2;
834 for (
i = t->num; i < t->alloc;
i++) {
836 t->e[
i].name_len = 0;
837 t->e[
i].back_num = 0;
838 t->e[
i].back_alloc = 0;
839 t->e[
i].back_refs = (
int* )
NULL;
844 e->
name = strdup_with_null(reg->
enc,
name, name_end);
888 const UChar* name_end,
int** nums)
923 for (
i =
n - 1;
i >= 0;
i--) {
936 const UChar* name_end,
int** nums)
968 #ifdef USE_NAMED_GROUP
980 #define INIT_SCANENV_MEMNODES_ALLOC_SIZE 16
995 #ifdef USE_NAMED_GROUP
1004 #ifdef USE_COMBINATION_EXPLOSION_CHECK
1005 env->num_comb_exp_check = 0;
1006 env->comb_exp_max_regnum = 0;
1007 env->curr_max_regnum = 0;
1008 env->has_recursion = 0;
1010 env->parse_depth = 0;
1011 env->warnings_flag = 0;
1020 need =
env->num_mem + 1;
1024 if (
env->mem_alloc <= need) {
1033 alloc =
env->mem_alloc * 2;
1038 for (
i =
env->num_mem + 1;
i < alloc;
i++)
1041 env->mem_nodes_dynamic = p;
1042 env->mem_alloc = alloc;
1047 return env->num_mem;
1053 if (
env->num_mem >= num)
1067 switch (
NTYPE(node)) {
1069 if (
NSTR(node)->capa != 0 &&
1092 bbuf_free(
cc->mbuf);
1097 if (
NQTFR(node)->target)
1140 node_new_cclass(
void)
1142 Node* node = node_new();
1146 initialize_cclass(
NCCLASS(node));
1151 node_new_ctype(
int type,
int not,
int ascii_range)
1153 Node* node = node_new();
1159 NCTYPE(node)->ascii_range = ascii_range;
1164 node_new_anychar(
void)
1166 Node* node = node_new();
1174 node_new_list(
Node* left,
Node* right)
1176 Node* node = node_new();
1188 return node_new_list(left, right);
1212 Node* node = node_new();
1224 Node* node = node_new();
1231 NANCHOR(node)->ascii_range = 0;
1236 node_new_backref(
int back_num,
int* backrefs,
int by_name,
1238 int exist_level,
int nest_level,
1243 Node* node = node_new();
1248 NBREF(node)->state = 0;
1249 NBREF(node)->back_num = back_num;
1250 NBREF(node)->back_dynamic = (
int* )
NULL;
1254 #ifdef USE_BACKREF_WITH_LEVEL
1255 if (exist_level != 0) {
1257 NBREF(node)->nest_level = nest_level;
1261 for (
i = 0;
i < back_num;
i++) {
1262 if (backrefs[
i] <=
env->num_mem &&
1270 for (
i = 0;
i < back_num;
i++)
1271 NBREF(node)->back_static[
i] = backrefs[
i];
1274 int* p = (
int* )
xmalloc(
sizeof(
int) * back_num);
1279 NBREF(node)->back_dynamic = p;
1280 for (
i = 0;
i < back_num;
i++)
1286 #ifdef USE_SUBEXP_CALL
1290 Node* node = node_new();
1294 NCALL(node)->state = 0;
1297 NCALL(node)->name_end = name_end;
1298 NCALL(node)->group_num = gnum;
1304 node_new_quantifier(
int lower,
int upper,
int by_number)
1306 Node* node = node_new();
1310 NQTFR(node)->state = 0;
1312 NQTFR(node)->lower = lower;
1313 NQTFR(node)->upper = upper;
1314 NQTFR(node)->greedy = 1;
1318 NQTFR(node)->is_referred = 0;
1322 #ifdef USE_COMBINATION_EXPLOSION_CHECK
1323 NQTFR(node)->comb_exp_check_num = 0;
1330 node_new_enclose(
int type)
1332 Node* node = node_new();
1349 return node_new_enclose(
type);
1360 #ifdef USE_SUBEXP_CALL
1387 if (capa <=
NSTR(node)->capa) {
1392 p = strcat_capa_from_static(
NSTR(node)->s,
NSTR(node)->end,
1395 p = strcat_capa(
NSTR(node)->s,
NSTR(node)->end, s, end, capa);
1399 NSTR(node)->capa = (
int )capa;
1432 if (num < 0)
return num;
1441 NSTR(node)->flag = flag;
1442 NSTR(node)->capa = 0;
1451 if (
NSTR(node)->capa != 0 &&
1456 NSTR(node)->capa = 0;
1457 NSTR(node)->flag = 0;
1463 node_new_str(
const UChar* s,
const UChar* end)
1465 Node* node = node_new();
1469 NSTR(node)->capa = 0;
1470 NSTR(node)->flag = 0;
1483 return node_new_str(s, end);
1489 Node* node = node_new_str(s, end);
1496 node_new_empty(
void)
1502 node_new_str_raw_char(
UChar c)
1507 return node_new_str_raw(p, p + 1);
1516 if (sn->
end > sn->
s) {
1518 if (p && p > sn->
s) {
1519 n = node_new_str(p, sn->
end);
1531 if (sn->
end > sn->
s) {
1532 return ((
enclen(enc, sn->
s, sn->
end) < sn->
end - sn->
s) ? 1 : 0);
1537 #ifdef USE_PAD_TO_SHORT_BYTE_CHAR
1549 for (
i = 0;
i < num;
i++) {
1558 unsigned int num, val;
1571 num = num * 10 + val;
1583 scan_unsigned_hexadecimal_number(
UChar**
src,
UChar* end,
int minlen,
1587 unsigned int num, val;
1592 restlen = maxlen - minlen;
1594 while (!
PEND && maxlen-- != 0) {
1609 if (maxlen > restlen)
1616 scan_unsigned_octal_number(
UChar**
src,
UChar* end,
int maxlen,
1620 unsigned int num, val;
1625 while (!
PEND && maxlen-- != 0) {
1632 num = (num << 3) + val;
1644 #define BBUF_WRITE_CODE_POINT(bbuf,pos,code) \
1645 BBUF_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT)
1652 new_code_range(
BBuf** pbuf)
1654 #define INIT_MULTI_BYTE_RANGE_SIZE (SIZE_CODE_POINT * 5)
1679 n = from; from = to; to =
n;
1683 r = new_code_range(pbuf);
1695 bound = (from == 0) ? 0 :
n;
1696 for (low = 0; low < bound; ) {
1697 x = (low + bound) >> 1;
1698 if (from - 1 > data[x*2 + 1])
1705 for (bound =
n; high < bound; ) {
1706 x = (high + bound) >> 1;
1707 if (to + 1 >= data[x*2])
1716 inc_n = low + 1 - high;
1721 if (checkdup && from <= data[low*2+1]
1722 && (data[low*2] <= from || data[low*2+1] <= to))
1723 CC_DUP_WARN(
env, from, to);
1724 if (from > data[low*2])
1726 if (to < data[(high - 1)*2 + 1])
1727 to = data[(high - 1)*2 + 1];
1758 return add_code_range_to_buf0(pbuf,
env, from, to, 1);
1771 return add_code_range_to_buf0(pbuf,
env, from, to, checkdup);
1777 return add_code_range0(pbuf,
env, from, to, 1);
1795 if (
n <= 0)
goto set_all;
1799 for (
i = 0;
i <
n;
i++) {
1802 if (pre <= from - 1) {
1803 r = add_code_range_to_buf(pbuf,
env, pre, from - 1);
1804 if (r != 0)
return r;
1815 #define SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2) do {\
1818 tnot = not1; not1 = not2; not2 = tnot; \
1819 tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; \
1832 if (not1 != 0 || not2 != 0)
1847 return bbuf_clone(pbuf, bbuf2);
1850 return not_code_range_buf(enc, bbuf2, pbuf,
env);
1862 if (not2 == 0 && not1 == 0) {
1863 r = bbuf_clone(pbuf, bbuf2);
1865 else if (not1 == 0) {
1866 r = not_code_range_buf(enc, bbuf2, pbuf,
env);
1868 if (r != 0)
return r;
1870 for (
i = 0;
i < n1;
i++) {
1873 r = add_code_range_to_buf(pbuf,
env, from, to);
1874 if (r != 0)
return r;
1886 for (
i = 0;
i <
n;
i++) {
1889 if (from2 < from1) {
1890 if (to2 < from1)
continue;
1895 else if (from2 <= to1) {
1897 if (from1 <= from2 - 1) {
1898 r = add_code_range_to_buf(pbuf,
env, from1, from2-1);
1899 if (r != 0)
return r;
1910 if (from1 > to1)
break;
1913 r = add_code_range_to_buf(pbuf,
env, from1, to1);
1914 if (r != 0)
return r;
1929 return bbuf_clone(pbuf, bbuf2);
1934 return bbuf_clone(pbuf, bbuf1);
1948 if (not2 == 0 && not1 == 0) {
1949 for (
i = 0;
i < n1;
i++) {
1952 for (j = 0; j < n2; j++) {
1955 if (from2 > to1)
break;
1956 if (to2 < from1)
continue;
1957 from =
MAX(from1, from2);
1959 r = add_code_range_to_buf(pbuf,
env, from, to);
1960 if (r != 0)
return r;
1964 else if (not1 == 0) {
1965 for (
i = 0;
i < n1;
i++) {
1968 r = and_code_range1(pbuf,
env, from1, to1, data2, n2);
1969 if (r != 0)
return r;
1981 BBuf *buf1, *buf2, *pbuf = 0;
1993 bitset_invert_to(bsr1, bs1);
1997 bitset_invert_to(bsr2, bs2);
2000 bitset_and(bsr1, bsr2);
2001 if (bsr1 != dest->
bs) {
2002 bitset_copy(dest->
bs, bsr1);
2006 bitset_invert(dest->
bs);
2010 if (not1 != 0 && not2 != 0) {
2011 r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf,
env);
2014 r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf,
env);
2015 if (r == 0 && not1 != 0) {
2017 r = not_code_range_buf(enc, pbuf, &tbuf,
env);
2039 BBuf *buf1, *buf2, *pbuf = 0;
2051 bitset_invert_to(bsr1, bs1);
2055 bitset_invert_to(bsr2, bs2);
2058 bitset_or(bsr1, bsr2);
2059 if (bsr1 != dest->
bs) {
2060 bitset_copy(dest->
bs, bsr1);
2064 bitset_invert(dest->
bs);
2068 if (not1 != 0 && not2 != 0) {
2069 r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf,
env);
2072 r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf,
env);
2073 if (r == 0 && not1 != 0) {
2075 r = not_code_range_buf(enc, pbuf, &tbuf,
env);
2093 static void UNKNOWN_ESC_WARN(
ScanEnv *
env,
int c);
2100 case 'n':
return '\n';
2101 case 't':
return '\t';
2102 case 'r':
return '\r';
2103 case 'f':
return '\f';
2104 case 'a':
return '\007';
2105 case 'b':
return '\010';
2106 case 'e':
return '\033';
2113 if ((
'a' <= c && c <=
'z') || (
'A' <= c && c <=
'Z'))
2114 UNKNOWN_ESC_WARN(
env, c);
2121 #ifdef USE_NO_INVALID_QUANTIFIER
2122 # define is_invalid_quantifier_target(node) 0
2127 switch (
NTYPE(node)) {
2159 popular_quantifier_num(
QtfrNode* q)
2162 if (q->
lower == 0) {
2163 if (q->
upper == 1)
return 0;
2166 else if (q->
lower == 1) {
2171 if (q->
lower == 0) {
2172 if (q->
upper == 1)
return 3;
2175 else if (q->
lower == 1) {
2193 static enum ReduceType const ReduceTypeTable[6][6] = {
2211 pnum = popular_quantifier_num(p);
2212 cnum = popular_quantifier_num(c);
2213 if (pnum < 0 || cnum < 0) return ;
2215 switch (ReduceTypeTable[cnum][pnum]) {
2309 #ifdef USE_BACKREF_WITH_LEVEL
2331 int low, up, syn_allow, non_low = 0;
2349 if (c ==
')' || c ==
'(' || c ==
'|') {
2369 if (
PEND)
goto invalid;
2393 if (
PEND)
goto invalid;
2396 if (c !=
MC_ESC(
env->syntax))
goto invalid;
2397 if (
PEND)
goto invalid;
2400 if (c !=
'}')
goto invalid;
2407 tok->u.repeat.lower = low;
2408 tok->u.repeat.upper = up;
2440 v = fetch_escaped_value(&p, end,
env, &c);
2441 if (
v < 0)
return v;
2443 c = ((c & 0xff) | 0x80);
2469 v = fetch_escaped_value(&p, end,
env, &c);
2470 if (
v < 0)
return v;
2481 c = conv_backslash_value(c,
env);
2508 #ifdef USE_NAMED_GROUP
2510 # define ONIGENC_IS_CODE_NAME(enc, c) TRUE
2512 # define ONIGENC_IS_CODE_NAME(enc, c) ONIGENC_IS_CODE_WORD(enc, c)
2515 # ifdef USE_BACKREF_WITH_LEVEL
2524 int* rback_num,
int* rlevel)
2526 int r, sign, is_num, exist_level;
2536 is_num = exist_level = 0;
2540 end_code = get_name_end_code_point(start_code);
2555 else if (c ==
'-') {
2568 if (c == end_code || c ==
')' || c ==
'+' || c ==
'-') {
2587 if (r == 0 && c != end_code) {
2588 if (c ==
'+' || c ==
'-') {
2590 int flag = (c ==
'-' ? -1 : 1);
2601 *rlevel = (level * flag);
2621 else if (*rback_num == 0)
goto err;
2626 *rname_end = name_end;
2628 return (exist_level ? 1 : 0);
2645 int r, is_num, sign;
2655 end_code = get_name_end_code_point(start_code);
2678 else if (c ==
'-') {
2698 if (c == end_code || c ==
')') {
2726 if (c != end_code) {
2735 else if (*rback_num == 0) {
2743 *rname_end = name_end;
2752 if (c == end_code || c ==
')')
2768 int r, is_num, sign;
2779 end_code = get_name_end_code_point(start_code);
2781 *rname_end = name_end = end;
2798 else if (c ==
'-') {
2812 if (c == end_code || c ==
')')
break;
2816 if (r == 0 && c != end_code) {
2824 else if (*rback_num == 0) {
2830 *rname_end = name_end;
2850 env->pattern,
env->pattern_end,
2859 (*onig_warn)((
char* )
buf);
2870 onig_syntax_warn(
env,
"character class has '%s' without escape", c);
2880 onig_syntax_warn(
env,
"regular expression has '%s' without escape", c);
2895 #ifdef WARN_ALL_CC_DUP
2896 onig_syntax_warn(
env,
"character class has duplicated range: %04x-%04x", from, to);
2899 onig_syntax_warn(
env,
"character class has duplicated range");
2908 onig_syntax_warn(
env,
"Unknown escape \\%c is ignored", c);
2922 q = p +
enclen(enc, p, to);
2924 for (
i = 1;
i <
n && q < to;
i++) {
2926 if (x != s[
i])
break;
2957 q = p +
enclen(enc, p, to);
2959 for (
i = 1;
i <
n && q < to;
i++) {
2961 if (x != s[
i])
break;
2964 if (
i >=
n)
return 1;
2969 if (x ==
bad)
return 0;
2970 else if (x ==
MC_ESC(syn)) in_esc = 1;
3003 else if (c ==
'-') {
3006 else if (c ==
MC_ESC(syn)) {
3019 tok->u.prop.not = 0;
3024 tok->u.prop.not = 1;
3029 tok->u.prop.not = 0;
3034 tok->u.prop.not = 1;
3039 tok->u.prop.not = 0;
3044 tok->u.prop.not = 1;
3050 tok->u.prop.not = 0;
3056 tok->u.prop.not = 1;
3068 tok->u.prop.not = (c ==
'P' ? 1 : 0);
3073 tok->u.prop.not = (
tok->u.prop.not == 0 ? 1 : 0);
3080 onig_syntax_warn(
env,
"invalid Unicode Property \\%c", c);
3090 num = scan_unsigned_hexadecimal_number(&p, end, 0, 8, enc);
3110 num = scan_unsigned_hexadecimal_number(&p, end, 0, 2, enc);
3126 num = scan_unsigned_hexadecimal_number(&p, end, 4, 4, enc);
3144 num = scan_unsigned_octal_number(&p, end, 11, enc);
3166 case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
3170 num = scan_unsigned_octal_number(&p, end, 3, enc);
3183 num = fetch_escaped_value(&p, end,
env, &c2);
3184 if (num < 0)
return num;
3192 else if (c ==
'[') {
3197 if (str_exist_check_with_esc(send, 2, p, end,
3216 else if (c ==
'&') {
3229 #ifdef USE_NAMED_GROUP
3244 # ifdef USE_BACKREF_WITH_LEVEL
3246 r = fetch_name_with_level(c, &p, end, &name_end,
3247 env, &back_num, &
tok->u.backref.level);
3248 if (r == 1)
tok->u.backref.exist_level = 1;
3249 else tok->u.backref.exist_level = 0;
3251 r = fetch_name(&p, end, &name_end,
env, &back_num, 1);
3253 if (r < 0)
return r;
3255 if (back_num != 0) {
3263 if (back_num >
env->num_mem ||
3268 tok->u.backref.by_name = 0;
3269 tok->u.backref.num = 1;
3270 tok->u.backref.ref1 = back_num;
3281 for (
i = 0;
i < num;
i++) {
3282 if (backs[
i] >
env->num_mem ||
3289 tok->u.backref.by_name = 1;
3291 tok->u.backref.num = 1;
3292 tok->u.backref.ref1 = backs[0];
3295 tok->u.backref.num = num;
3296 tok->u.backref.refs = backs;
3338 tok->u.repeat.lower = 0;
3346 tok->u.repeat.lower = 1;
3354 tok->u.repeat.lower = 0;
3355 tok->u.repeat.upper = 1;
3360 tok->u.repeat.greedy = 0;
3361 tok->u.repeat.possessive = 0;
3371 tok->u.repeat.greedy = 1;
3372 tok->u.repeat.possessive = 1;
3375 tok->u.repeat.greedy = 1;
3376 tok->u.repeat.possessive = 0;
3383 r = fetch_range_quantifier(&p, end,
tok,
env);
3384 if (r < 0)
return r;
3385 if (r == 0)
goto greedy_check;
3388 goto possessive_check;
3414 tok->u.prop.not = 0;
3421 tok->u.prop.not = 1;
3440 #ifdef USE_WORD_BEGIN_END
3460 tok->u.prop.not = 0;
3467 tok->u.prop.not = 1;
3474 tok->u.prop.not = 0;
3481 tok->u.prop.not = 1;
3488 tok->u.prop.not = 0;
3495 tok->u.prop.not = 1;
3540 num = scan_unsigned_hexadecimal_number(&p, end, 0, 8, enc);
3558 num = scan_unsigned_hexadecimal_number(&p, end, 0, 2, enc);
3574 num = scan_unsigned_hexadecimal_number(&p, end, 4, 4, enc);
3592 num = scan_unsigned_octal_number(&p, end, 11, enc);
3612 case '1':
case '2':
case '3':
case '4':
3613 case '5':
case '6':
case '7':
case '8':
case '9':
3622 (num <= env->num_mem || num <= 9)) {
3629 tok->u.backref.num = 1;
3630 tok->u.backref.ref1 = num;
3631 tok->u.backref.by_name = 0;
3632 #ifdef USE_BACKREF_WITH_LEVEL
3633 tok->u.backref.exist_level = 0;
3639 if (c ==
'8' || c ==
'9') {
3650 num = scan_unsigned_octal_number(&p, end, (c ==
'0' ? 2:3), enc);
3659 else if (c !=
'0') {
3664 #ifdef USE_NAMED_GROUP
3668 if (c ==
'<' || c ==
'\'') {
3669 r = fetch_named_backref_token(c,
tok, &p, end,
env);
3670 if (r < 0)
return r;
3674 onig_syntax_warn(
env,
"invalid back reference");
3680 #if defined(USE_SUBEXP_CALL) || defined(USE_NAMED_GROUP)
3682 # ifdef USE_NAMED_GROUP
3686 r = fetch_named_backref_token(c,
tok, &p, end,
env);
3687 if (r < 0)
return r;
3693 # ifdef USE_SUBEXP_CALL
3696 if (c ==
'<' || c ==
'\'') {
3697 int gnum = -1, rel = 0;
3704 if (
PPEEK_IS(get_name_end_code_point(c))) {
3710 else if (cnext ==
'+') {
3717 if (r < 0)
return r;
3721 tok->u.call.name = prev;
3722 tok->u.call.name_end = name_end;
3723 tok->u.call.gnum = gnum;
3724 tok->u.call.rel = rel;
3727 onig_syntax_warn(
env,
"invalid subexp call");
3747 tok->u.prop.not = (c ==
'P' ? 1 : 0);
3752 tok->u.prop.not = (
tok->u.prop.not == 0 ? 1 : 0);
3759 onig_syntax_warn(
env,
"invalid Unicode Property \\%c", c);
3786 num = fetch_escaped_value(&p, end,
env, &c2);
3787 if (num < 0)
return num;
3804 #ifdef USE_VARIABLE_META_CHARS
3812 goto zero_or_one_time;
3814 goto one_or_more_time;
3825 #ifdef USE_VARIABLE_META_CHARS
3833 #ifdef USE_VARIABLE_META_CHARS
3837 tok->u.repeat.lower = 0;
3844 #ifdef USE_VARIABLE_META_CHARS
3848 tok->u.repeat.lower = 1;
3855 #ifdef USE_VARIABLE_META_CHARS
3859 tok->u.repeat.lower = 0;
3860 tok->u.repeat.upper = 1;
3866 r = fetch_range_quantifier(&p, end,
tok,
env);
3867 if (r < 0)
return r;
3868 if (r == 0)
goto greedy_check;
3871 goto possessive_check;
3896 if (c ==
')')
break;
3901 #ifdef USE_PERL_SUBEXP_CALL
3911 if (c ==
'R' || c ==
'0') {
3915 name_end =
name = p;
3925 r = fetch_name((
OnigCodePoint )
'(', &p, end, &name_end,
env, &gnum, numref);
3926 if (r < 0)
return r;
3931 tok->u.call.name_end = name_end;
3932 tok->u.call.gnum = gnum;
3933 tok->u.call.rel = 0;
3936 else if ((c ==
'-' || c ==
'+') &&
3951 if (r < 0)
return r;
3955 tok->u.call.name_end = name_end;
3956 tok->u.call.gnum = gnum;
3957 tok->u.call.rel = 1;
3962 #ifdef USE_CAPITAL_P_NAMED_GROUP
3975 if (r < 0)
return r;
3978 else if (c ==
'>') {
3981 if (r < 0)
return r;
3985 tok->u.call.name_end = name_end;
3986 tok->u.call.gnum = gnum;
3987 tok->u.call.rel = 0;
4025 CLOSE_BRACKET_WITHOUT_ESC_WARN(
env, (
UChar* )
"]");
4040 case ' ':
case '\t':
case '\n':
case '\r':
case '\f':
4051 #ifdef USE_VARIABLE_META_CHARS
4069 for (
i = 0;
i <
n;
i++) {
4074 r = add_code_range_to_buf(&(
cc->mbuf),
env, j,
4076 if (r != 0)
return r;
4087 for ( ;
i <
n;
i++) {
4088 r = add_code_range_to_buf(&(
cc->mbuf),
env,
4091 if (r != 0)
return r;
4097 for (
i = 0;
i <
n;
i++) {
4107 for (j = prev; j < sb_out; j++) {
4114 for (
i = 0;
i <
n;
i++) {
4116 r = add_code_range_to_buf(&(
cc->mbuf),
env, prev,
4118 if (r != 0)
return r;
4122 if (prev < 0x7fffffff) {
4123 r = add_code_range_to_buf(&(
cc->mbuf),
env, prev, 0x7fffffff);
4124 if (r != 0)
return r;
4144 initialize_cclass(&ccwork);
4145 r = add_ctype_to_cc_by_range(&ccwork, ctype, not,
env, sb_out,
4153 initialize_cclass(&ccascii);
4155 r = add_code_range(&(ccascii.
mbuf),
env, 0x00, 0x7F);
4158 bitset_set_range(
env, ccascii.
bs, 0x00, 0x7F);
4162 r = and_cclass(&ccwork, &ccascii,
env);
4167 r = or_cclass(
cc, &ccwork,
env);
4173 r = add_ctype_to_cc_by_range(
cc, ctype, not,
env, sb_out, ranges);
4222 for (c = 0; c < maxcode; c++) {
4233 for (c = 0; c < maxcode; c++) {
4262 #define POSIX_BRACKET_CHECK_LIMIT_LENGTH 20
4263 #define POSIX_BRACKET_NAME_MIN_LEN 4
4297 goto not_posix_bracket;
4301 for (pb = PBS; pb < PBS +
numberof(PBS); pb++) {
4307 r = add_ctype_to_cc(
cc, pb->
ctype, not, ascii_range,
env);
4308 if (r != 0)
return r;
4314 r = add_ctype_to_cc(asc_cc, pb->
ctype, not, ascii_range,
env);
4315 if (r != 0)
return r;
4327 while (!
PEND && ((c =
PPEEK) !=
':') && c !=
']') {
4331 if (c ==
':' && !
PEND) {
4364 else if (c ==
'(' || c ==
')' || c ==
'{' || c ==
'|') {
4383 ctype = fetch_char_property_to_ctype(
src, end,
env);
4384 if (ctype < 0)
return ctype;
4386 *np = node_new_cclass();
4389 r = add_ctype_to_cc(
cc, ctype, 0, 0,
env);
4390 if (r != 0)
return r;
4395 r = cclass_case_fold(np,
cc,
cc,
env);
4431 r = add_code_range(&(
cc->mbuf),
env, *vs, *vs);
4432 if (r < 0)
return r;
4434 r = add_code_range0(&(asc_cc->
mbuf),
env, *vs, *vs, 0);
4435 if (r < 0)
return r;
4448 int* from_israw,
int to_israw,
4462 r = add_code_range(&(
cc->mbuf),
env, *from, *from);
4463 if (r < 0)
return r;
4465 r = add_code_range0(&(asc_cc->
mbuf),
env, *from, *from, 0);
4466 if (r < 0)
return r;
4472 if (intype == *
type) {
4474 if (*from > 0xff || to > 0xff)
4483 bitset_set_range(
env,
cc->bs, (
int )*from, (
int )to);
4485 bitset_set_range(
env, asc_cc->
bs, (
int )*from, (
int )to);
4488 r = add_code_range(&(
cc->mbuf),
env, *from, to);
4489 if (r < 0)
return r;
4491 r = add_code_range0(&(asc_cc->
mbuf),
env, *from, to, 0);
4492 if (r < 0)
return r;
4503 bitset_set_range(
env,
cc->bs, (
int )*from, (
int )(to < 0xff ? to : 0xff));
4505 if (r < 0)
return r;
4507 bitset_set_range(
env, asc_cc->
bs, (
int )*from, (
int )(to < 0xff ? to : 0xff));
4509 if (r < 0)
return r;
4525 *from_israw = to_israw;
4542 if (ignore_escaped && in_esc) {
4547 if (code == c)
return 1;
4548 if (code ==
MC_ESC(
env->syntax)) in_esc = 1;
4558 int r,
neg,
len, fetched, and_start;
4569 int val_israw, in_israw;
4573 if (
env->parse_depth > ParseDepthLimit)
4576 r = fetch_token_in_cc(
tok,
src, end,
env);
4579 r = fetch_token_in_cc(
tok,
src, end,
env);
4585 if (r < 0)
return r;
4595 *np = node = node_new_cclass();
4600 *asc_np = asc_node = node_new_cclass();
4639 int i, base =
tok->base;
4643 r = fetch_token_in_cc(
tok, &p, end,
env);
4644 if (r < 0)
goto err;
4664 for (
i = 1;
i <
len;
i++) {
4700 r = next_state_val(
cc, asc_cc, &vs,
v, &val_israw, in_israw, in_type, &val_type,
4702 if (r != 0)
goto err;
4706 r = parse_posix_bracket(
cc, asc_cc, &p, end,
env);
4707 if (r < 0)
goto err;
4719 r = add_ctype_to_cc(
cc,
tok->u.prop.ctype,
tok->u.prop.not,
4721 if (r != 0)
return r;
4724 r = add_ctype_to_cc(asc_cc,
tok->u.prop.ctype,
tok->u.prop.not,
4726 if (r != 0)
return r;
4730 r = next_state_class(
cc, asc_cc, &vs, &val_type, &state,
env);
4731 if (r != 0)
goto err;
4738 ctype = fetch_char_property_to_ctype(&p, end,
env);
4739 if (ctype < 0)
return ctype;
4740 r = add_ctype_to_cc(
cc, ctype,
tok->u.prop.not, 0,
env);
4741 if (r != 0)
return r;
4744 r = add_ctype_to_cc(asc_cc, ctype,
tok->u.prop.not, 0,
env);
4745 if (r != 0)
return r;
4753 r = fetch_token_in_cc(
tok, &p, end,
env);
4754 if (r < 0)
goto err;
4779 r = fetch_token_in_cc(
tok, &p, end,
env);
4780 if (r < 0)
goto err;
4793 r = fetch_token_in_cc(
tok, &p, end,
env);
4794 if (r < 0)
goto err;
4813 Node *anode, *aasc_node;
4816 r = parse_char_class(&anode, &aasc_node,
tok, &p, end,
env);
4819 r = or_cclass(
cc, acc,
env);
4823 r = or_cclass(asc_cc, acc,
env);
4827 if (r != 0)
goto err;
4834 r = next_state_val(
cc, asc_cc, &vs, 0, &val_israw, 0, val_type,
4835 &val_type, &state,
env);
4836 if (r != 0)
goto err;
4843 r = and_cclass(prev_cc,
cc,
env);
4844 if (r != 0)
goto err;
4845 bbuf_free(
cc->mbuf);
4847 r = and_cclass(asc_prev_cc, asc_cc,
env);
4848 if (r != 0)
goto err;
4849 bbuf_free(asc_cc->
mbuf);
4856 asc_prev_cc = asc_cc;
4857 asc_cc = &asc_work_cc;
4860 initialize_cclass(
cc);
4862 initialize_cclass(asc_cc);
4879 r = fetch_token_in_cc(
tok, &p, end,
env);
4880 if (r < 0)
goto err;
4885 r = next_state_val(
cc, asc_cc, &vs, 0, &val_israw, 0, val_type,
4886 &val_type, &state,
env);
4887 if (r != 0)
goto err;
4891 r = and_cclass(prev_cc,
cc,
env);
4892 if (r != 0)
goto err;
4893 bbuf_free(
cc->mbuf);
4896 r = and_cclass(asc_prev_cc, asc_cc,
env);
4897 if (r != 0)
goto err;
4898 bbuf_free(asc_cc->
mbuf);
4899 asc_cc = asc_prev_cc;
4921 if (is_empty == 0) {
4922 #define NEWLINE_CODE 0x0a
4929 if (r < 0)
goto err;
4940 bbuf_free(
cc->mbuf);
4942 bbuf_free(asc_cc->
mbuf);
4959 #ifdef USE_NAMED_GROUP
4969 option =
env->option;
4979 r = fetch_token(
tok, &p, end,
env);
4980 if (r < 0)
return r;
4981 r = parse_subexp(np,
tok,
term, &p, end,
env);
4982 if (r < 0)
return r;
5005 #ifdef USE_NAMED_GROUP
5014 # ifdef USE_CAPITAL_P_NAMED_GROUP
5019 if (c ==
'<')
goto named_group1;
5033 #ifdef USE_NAMED_GROUP
5045 # ifdef USE_CAPTURE_HISTORY
5050 if (r < 0)
return r;
5052 num = scan_env_add_mem_entry(
env);
5053 if (num < 0)
return num;
5057 r = name_add(
env->reg,
name, name_end, num,
env);
5058 if (r != 0)
return r;
5059 *np = node_new_enclose_memory(
env->option, 1);
5062 if (list_capture != 0)
5077 #ifdef USE_CAPTURE_HISTORY
5080 # ifdef USE_NAMED_GROUP
5084 if (c ==
'<' || c ==
'\'') {
5091 *np = node_new_enclose_memory(
env->option, 0);
5093 num = scan_env_add_mem_entry(
env);
5094 if (num < 0)
return num;
5116 if (r < 0)
return r;
5126 if (num >
env->num_mem ||
5131 #ifdef USE_NAMED_GROUP
5132 else if (c ==
'<' || c ==
'\'') {
5134 r = fetch_named_backref_token(c,
tok, &p, end,
env);
5135 if (r < 0)
return r;
5140 num =
tok->u.backref.ref1;
5147 int len =
tok->u.backref.num;
5148 num =
len > 1 ?
tok->u.backref.refs[0] :
tok->u.backref.ref1;
5199 #ifdef USE_POSIXLINE_OPTION
5202 case '-':
case 'i':
case 'm':
case 's':
case 'x':
5203 case 'a':
case 'd':
case 'l':
case 'u':
5213 case '-':
neg = 1;
break;
5234 #ifdef USE_POSIXLINE_OPTION
5292 *np = node_new_option(option);
5297 else if (c ==
':') {
5300 env->option = option;
5301 r = fetch_token(
tok, &p, end,
env);
5306 r = parse_subexp(&target,
tok,
term, &p, end,
env);
5308 if (r < 0)
return r;
5309 *np = node_new_option(option);
5330 *np = node_new_enclose_memory(
env->option, 0);
5332 num = scan_env_add_mem_entry(
env);
5333 if (num < 0)
return num;
5338 r = fetch_token(
tok, &p, end,
env);
5339 if (r < 0)
return r;
5340 r = parse_subexp(&target,
tok,
term, &p, end,
env);
5347 NANCHOR(*np)->target = target;
5352 r = scan_env_set_mem_node(
env,
NENCLOSE(*np)->regnum, *np);
5353 if (r != 0)
return r;
5358 work1 = node_new_empty();
5380 static const char*
const PopularQStr[] = {
5381 "?",
"*",
"+",
"??",
"*?",
"+?"
5384 static const char*
const ReduceQStr[] = {
5385 "",
"",
"*",
"*?",
"??",
"+ and ??",
"+? and ?"
5398 switch (
NTYPE(target)) {
5402 if (str_node_can_be_split(sn,
env->enc)) {
5403 Node*
n = str_node_split_last_char(sn,
env->enc);
5416 int nestq_num = popular_quantifier_num(qn);
5417 int targetq_num = popular_quantifier_num(qnt);
5419 #ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
5420 if (nestq_num >= 0 && targetq_num >= 0 &&
5422 switch (ReduceTypeTable[targetq_num][nestq_num]) {
5428 onig_syntax_warn(
env,
"regular expression has redundant nested repeat operator '%s'",
5429 PopularQStr[targetq_num]);
5436 onig_syntax_warn(
env,
"nested repeat operator '%s' and '%s' was replaced with '%s' in regular expression",
5437 PopularQStr[targetq_num], PopularQStr[nestq_num],
5438 ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]);
5447 if (targetq_num >= 0) {
5448 if (nestq_num >= 0) {
5452 else if (targetq_num == 1 || targetq_num == 2) {
5472 #ifndef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
5480 bitset_invert(
cc->bs);
5483 r = not_code_range_buf(enc,
cc->mbuf, &tbuf);
5484 if (r != 0)
return r;
5486 bbuf_free(
cc->mbuf);
5507 int to_len,
void*
arg)
5531 add_flag = !add_flag;
5536 #ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
5541 r = add_code_range0(&(
cc->mbuf),
env, *to, *to, 0);
5542 if (r < 0)
return r;
5554 r = add_code_range0(&(
cc->mbuf),
env, *to, *to, 0);
5555 if (r < 0)
return r;
5579 for (
i = 0;
i < to_len;
i++) {
5620 i_apply_case_fold, &iarg);
5650 if (num1 < 0)
return num1;
5652 if (num2 < 0)
return num2;
5653 left = node_new_str_raw(
buf,
buf + num1 + num2);
5657 right = node_new_cclass();
5661 r = add_code_range(&(
cc->mbuf),
env, 0x0A, 0x0D);
5662 if (r != 0)
goto err;
5665 bitset_set_range(
env,
cc->bs, 0x0A, 0x0D);
5671 r = add_code_range(&(
cc->mbuf),
env, 0x85, 0x85);
5672 if (r != 0)
goto err;
5673 r = add_code_range(&(
cc->mbuf),
env, 0x2028, 0x2029);
5674 if (r != 0)
goto err;
5701 propname2ctype(
ScanEnv*
env,
const char* propname)
5716 int ctype = propname2ctype(
env, propname);
5717 if (ctype < 0)
return ctype;
5718 return add_ctype_to_cc(
cc, ctype, not, 0,
env);
5725 create_property_node(
Node **np,
ScanEnv*
env,
const char* propname)
5730 *np = node_new_cclass();
5733 r = add_property_to_cc(
cc, propname, 0,
env);
5740 quantify_node(
Node **np,
int lower,
int upper)
5742 Node* tmp = node_new_quantifier(lower, upper, 0);
5744 NQTFR(tmp)->target = *np;
5750 quantify_property_node(
Node **np,
ScanEnv*
env,
const char* propname,
char repetitions)
5756 r = create_property_node(np,
env, propname);
5757 if (r != 0)
return r;
5758 switch (repetitions) {
5759 case '?': upper = 1;
break;
5760 case '+': lower = 1;
break;
5762 case '2': lower = upper = 2;
break;
5765 return quantify_node(np, lower, upper);
5773 create_node_from_array(
int kind,
Node **np,
Node **node_array)
5780 *np = kind==
LIST ? node_new_list(node_array[
i], tmp)
5797 #define R_ERR(call) r=(call);if(r!=0)goto err
5820 #define NODE_COMMON_SIZE 16
5831 int any_target_position;
5837 Node **alts = node_common+0;
5845 if (r < 0)
goto err;
5848 if (r < 0)
goto err;
5849 alts[0] = node_new_str_raw(
buf,
buf + num1 + r);
5852 #ifdef USE_UNICODE_PROPERTIES
5856 if (propname2ctype(
env,
"Grapheme_Cluster_Break=Extend") < 0)
goto err;
5864 alts[1] = node_new_cclass();
5867 R_ERR(add_property_to_cc(
cc,
"Grapheme_Cluster_Break=Control", 0,
env));
5869 R_ERR(add_code_range(&(
cc->mbuf),
env, 0x000A, 0x000A));
5870 R_ERR(add_code_range(&(
cc->mbuf),
env, 0x000D, 0x000D));
5882 R_ERR(quantify_property_node(
list+0,
env,
"Grapheme_Cluster_Break=Prepend",
'*'));
5900 Node **H_list = core_alts + 1;
5901 R_ERR(quantify_property_node(H_list+0,
env,
"Grapheme_Cluster_Break=L",
'*'));
5905 Node **H_alt2 = H_list + 2;
5906 R_ERR(quantify_property_node(H_alt2+0,
env,
"Grapheme_Cluster_Break=V",
'+'));
5910 Node **H_list2 = H_alt2 + 2;
5912 R_ERR(create_property_node(H_list2+0,
env,
"Grapheme_Cluster_Break=LV"));
5913 R_ERR(quantify_property_node(H_list2+1,
env,
"Grapheme_Cluster_Break=V",
'*'));
5914 R_ERR(create_node_from_array(
LIST, H_alt2+1, H_list2));
5917 R_ERR(create_property_node(H_alt2+2,
env,
"Grapheme_Cluster_Break=LVT"));
5918 R_ERR(create_node_from_array(
ALT, H_list+1, H_alt2));
5921 R_ERR(quantify_property_node(H_list+2,
env,
"Grapheme_Cluster_Break=T",
'*'));
5922 R_ERR(create_node_from_array(
LIST, core_alts+0, H_list));
5925 R_ERR(quantify_property_node(core_alts+1,
env,
"Grapheme_Cluster_Break=L",
'+'));
5926 R_ERR(quantify_property_node(core_alts+2,
env,
"Grapheme_Cluster_Break=T",
'+'));
5930 R_ERR(quantify_property_node(core_alts+3,
env,
"Regional_Indicator",
'2'));
5934 Node **XP_list = core_alts + 5;
5935 R_ERR(create_property_node(XP_list+0,
env,
"Extended_Pictographic"));
5939 Node **Ex_list = XP_list + 2;
5941 R_ERR(quantify_property_node(Ex_list+0,
env,
"Grapheme_Cluster_Break=Extend",
'*'));
5945 if (r < 0)
goto err;
5946 Ex_list[1] = node_new_str_raw(
buf,
buf + r);
5949 R_ERR(create_property_node(Ex_list+2,
env,
"Extended_Pictographic"));
5950 R_ERR(create_node_from_array(
LIST, XP_list+1, Ex_list));
5954 R_ERR(create_node_from_array(
LIST, core_alts+4, XP_list));
5958 core_alts[5] = node_new_cclass();
5970 R_ERR(add_property_to_cc(
cc,
"Grapheme_Cluster_Break=Control", 0,
env));
5971 R_ERR(add_code_range(&(
cc->mbuf),
env, 0x000A, 0x000A));
5972 R_ERR(add_code_range(&(
cc->mbuf),
env, 0x000D, 0x000D));
5973 R_ERR(not_code_range_buf(
env->enc,
cc->mbuf, &inverted_buf,
env));
5974 cc->mbuf = inverted_buf;
5976 env->warnings_flag &= dup_not_warned;
5979 R_ERR(add_property_to_cc(
cc,
"Grapheme_Cluster_Break=Control", 1,
env));
5988 R_ERR(create_property_node(
list+2,
env,
"Grapheme_Cluster_Break=Extend"));
5990 R_ERR(add_property_to_cc(
cc,
"Grapheme_Cluster_Break=SpacingMark", 0,
env));
5991 R_ERR(add_code_range(&(
cc->mbuf),
env, 0x200D, 0x200D));
5997 any_target_position = 3;
6002 any_target_position = 1;
6008 np1 = node_new_anychar();
6011 option =
env->option;
6013 tmp = node_new_option(option);
6016 alts[any_target_position] = tmp;
6019 R_ERR(create_node_from_array(
ALT, &top_alt, alts));
6029 #ifdef USE_UNICODE_PROPERTIES
6032 option =
env->option;
6034 *np = node_new_option(option);
6054 countbits(
unsigned int bits)
6056 bits = (bits & 0x55555555) + ((bits >> 1) & 0x55555555);
6057 bits = (bits & 0x33333333) + ((bits >> 2) & 0x33333333);
6058 bits = (bits & 0x0f0f0f0f) + ((bits >> 4) & 0x0f0f0f0f);
6059 bits = (bits & 0x00ff00ff) + ((bits >> 8) & 0x00ff00ff);
6060 return (bits & 0x0000ffff) + ((bits >>16) & 0x0000ffff);
6078 if ((
n == 1) && (data[0] == data[1])) {
6095 if (((b1 & (b1 - 1)) == 0) && (c == not_found)) {
6103 if (c != not_found) {
6117 int r,
len, group = 0;
6125 switch (
tok->type) {
6129 *np = node_new_empty();
6135 if (r < 0)
return r;
6136 if (r == 1) group = 1;
6162 if (
tok->escaped)
goto tk_raw_byte;
6167 r = node_linebreak(np,
env);
6168 if (r < 0)
return r;
6172 r = node_extended_grapheme_cluster(np,
env);
6173 if (r < 0)
return r;
6184 *np = node_new_str(
tok->backp, *
src);
6190 if (r < 0)
return r;
6194 #ifndef NUMBERED_CHAR_IS_NOT_CASE_AMBIG
6196 r = node_str_cat_codepoint(*np,
env->enc,
tok->u.code);
6202 if (r < 0)
return r;
6214 *np = node_new_str_raw_char((
UChar )
tok->u.c);
6227 if (r < 0)
return r;
6230 #ifdef USE_PAD_TO_SHORT_BYTE_CHAR
6244 r = node_str_cat_char(*np, (
UChar )
tok->u.c);
6245 if (r < 0)
return r;
6254 *np = node_new_empty();
6256 r = node_str_cat_codepoint(*np,
env->enc,
tok->u.code);
6257 if (r != 0)
return r;
6258 #ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG
6269 UChar *qstart, *qend, *nextp;
6274 qend = find_str_position(end_op, 2, qstart, end, &nextp,
env->enc);
6278 *np = node_new_str(qstart, qend);
6286 switch (
tok->u.prop.ctype) {
6288 *np = node_new_ctype(
tok->u.prop.ctype,
tok->u.prop.not,
6299 *np = node_new_cclass();
6302 r = add_ctype_to_cc(
cc,
tok->u.prop.ctype, 0,
6304 if (r != 0)
return r;
6317 r = parse_char_property(np,
tok,
src, end,
env);
6318 if (r != 0)
return r;
6327 r = parse_char_class(np, &asc_node,
tok,
src, end,
env);
6334 if (is_onechar_cclass(
cc, &code)) {
6337 *np = node_new_empty();
6339 r = node_str_cat_codepoint(*np,
env->enc, code);
6340 if (r != 0)
return r;
6355 *np = node_new_anychar();
6360 *np = node_new_anychar();
6364 NQTFR(qn)->target = *np;
6369 len =
tok->u.backref.num;
6370 *np = node_new_backref(
len,
6371 (
len > 1 ?
tok->u.backref.refs : &(
tok->u.backref.ref1)),
6372 tok->u.backref.by_name,
6374 tok->u.backref.exist_level,
6375 tok->u.backref.level,
6381 #ifdef USE_SUBEXP_CALL
6384 int gnum =
tok->u.call.gnum;
6386 if (gnum < 0 || tok->u.call.rel != 0) {
6387 if (gnum > 0) gnum--;
6392 *np = node_new_call(
tok->u.call.name,
tok->u.call.name_end, gnum);
6402 NANCHOR(*np)->ascii_range =
tok->u.anchor.ascii_range;
6411 *np = node_new_empty();
6428 if (r < 0)
return r;
6435 qn = node_new_quantifier(
tok->u.repeat.lower,
tok->u.repeat.upper,
6438 NQTFR(qn)->greedy =
tok->u.repeat.greedy;
6439 r = set_quantifier(qn, *targetp, group,
env);
6445 if (
tok->u.repeat.possessive != 0) {
6465 *targetp = node_new_list(*targetp,
NULL);
6470 tmp =
NCDR(*targetp) = node_new_list(qn,
NULL);
6475 targetp = &(
NCAR(tmp));
6489 Node *node, **headp;
6502 *
top = node_new_list(node,
NULL);
6514 headp = &(
NCDR(node));
6517 *headp = node_new_list(node,
NULL);
6518 headp = &(
NCDR(*headp));
6532 Node *node, **headp;
6536 if (
env->parse_depth > ParseDepthLimit)
6552 if (r < 0)
return r;
6560 headp = &(
NCDR(*headp));
6586 if (r < 0)
return r;
6588 if (r < 0)
return r;
6590 #ifdef USE_SUBEXP_CALL
6591 if (
env->num_call > 0) {
6595 np = node_new_enclose_memory(
env->option, 0);
6599 r = scan_env_set_mem_node(
env, num, np);
6617 #ifdef USE_NAMED_GROUP
6621 scan_env_clear(
env);
6631 p = (
UChar* )pattern;
6632 r = parse_regexp(root, &p, (
UChar* )end,
env);
6642 env->error_end = arg_end;