49 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
74 c = *a; *a = *b; *b = c;
109 if (m == 0)
return 0;
122 if (bs[
i] != 0)
return 0;
159 #ifdef USE_SUBEXP_CALL
204 add_opcode(
regex_t* reg,
int opcode)
210 #ifdef USE_COMBINATION_EXPLOSION_CHECK
212 add_state_check_num(
regex_t* reg,
int num)
222 add_rel_addr(
regex_t* reg,
int addr)
231 add_abs_addr(
regex_t* reg,
int addr)
249 add_mem_num(
regex_t* reg,
int num)
259 add_pointer(
regex_t* reg,
void* addr)
276 add_opcode_rel_addr(
regex_t* reg,
int opcode,
int addr)
280 r = add_opcode(reg, opcode);
282 r = add_rel_addr(reg, addr);
305 r = add_opcode(reg, opcode);
307 r = add_option(reg, option);
311 static int compile_length_tree(
Node* node,
regex_t* reg);
315 #define IS_NEED_STR_LEN_OP_EXACT(op) \
316 ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\
317 (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
320 select_str_opcode(
int mb_len,
OnigDistance byte_len,
int ignore_case)
323 OnigDistance str_len = (byte_len + mb_len - 1) / mb_len;
366 compile_tree_empty_check(
Node* node,
regex_t* reg,
int empty_info)
371 if (empty_info != 0) {
379 r = compile_tree(node, reg);
382 if (empty_info != 0) {
391 r = add_mem_num(reg, saved_num_null_check);
396 #ifdef USE_SUBEXP_CALL
407 r = add_abs_addr(reg, 0 );
417 for (
i = 0;
i <
n;
i++) {
418 r = compile_tree(node, reg);
429 int op = select_str_opcode(mb_len, byte_len, ignore_case);
445 int op = select_str_opcode(mb_len, byte_len, ignore_case);
449 add_length(reg, mb_len);
453 add_length(reg, byte_len);
455 add_length(reg, byte_len / mb_len);
458 add_bytes(reg, s, byte_len);
464 compile_length_string_node(
Node* node,
regex_t* reg)
466 int rlen, r,
len, prev_len, blen, ambig;
472 if (sn->
end <= sn->
s)
483 for (; p < sn->
end; ) {
485 if (
len == prev_len || ambig) {
489 r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
497 r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
505 if (sn->
end <= sn->
s)
508 return add_compile_string_length(sn->
s, 1 , sn->
end - sn->
s, reg, 0);
514 int r,
len, prev_len, blen, ambig;
520 if (sn->
end <= sn->
s)
533 if (
len == prev_len || ambig) {
537 r = add_compile_string(prev, prev_len, blen, reg, ambig);
547 return add_compile_string(prev, prev_len, blen, reg, ambig);
553 if (sn->
end <= sn->
s)
556 return add_compile_string(sn->
s, 1 , sn->
end - sn->
s, reg, 0);
562 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
563 add_length(reg, mbuf->
used);
564 return add_bytes(reg, mbuf->
p, mbuf->
used);
571 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
573 r = add_bytes(reg, mbuf->
p, mbuf->
used);
577 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
597 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
618 r = add_bitset(reg,
cc->bs);
627 r = add_multi_byte_cclass(
cc->mbuf, reg);
635 r = add_bitset(reg,
cc->bs);
637 r = add_multi_byte_cclass(
cc->mbuf, reg);
645 entry_repeat_range(
regex_t* reg,
int id,
int lower,
int upper)
647 #define REPEAT_RANGE_ALLOC 4
676 compile_range_repeat_node(
QtfrNode* qn,
int target_len,
int empty_info,
684 r = add_mem_num(reg, num_repeat);
690 r = entry_repeat_range(reg, num_repeat, qn->
lower, qn->
upper);
693 r = compile_tree_empty_check(qn->
target, reg, empty_info);
707 r = add_mem_num(reg, num_repeat);
712 is_anychar_star_quantifier(
QtfrNode* qn)
721 #define QUANTIFIER_EXPAND_LIMIT_SIZE 50
722 #define CKN_ON (ckn > 0)
724 #ifdef USE_COMBINATION_EXPLOSION_CHECK
729 int len, mod_tlen, cklen;
733 int tlen = compile_length_tree(qn->
target, reg);
735 if (tlen < 0)
return tlen;
743 if (qn->
greedy && infinite) {
756 if (infinite && qn->
lower <= 1) {
774 else if (qn->
upper == 0) {
781 if (qn->
lower == 0) {
783 len = SIZE_OP_STATE_CHECK_PUSH + tlen;
800 len += SIZE_OP_STATE_CHECK;
813 int tlen = compile_length_tree(qn->
target, reg);
815 if (tlen < 0)
return tlen;
819 if (is_anychar_star_quantifier(qn)) {
820 r = compile_tree_n_times(qn->
target, qn->
lower, reg);
829 r = add_state_check_num(reg, ckn);
837 r = add_opcode(reg, (
CKN_ON ?
842 r = add_opcode(reg, (
CKN_ON ?
848 r = add_state_check_num(reg, ckn);
859 if (infinite && qn->
lower <= 1) {
861 if (qn->
lower == 1) {
862 r = add_opcode_rel_addr(reg,
OP_JUMP,
870 r = add_state_check_num(reg, ckn);
878 r = compile_tree_empty_check(qn->
target, reg, empty_info);
880 r = add_opcode_rel_addr(reg,
OP_JUMP,
885 if (qn->
lower == 0) {
886 r = add_opcode_rel_addr(reg,
OP_JUMP, mod_tlen);
889 r = compile_tree_empty_check(qn->
target, reg, empty_info);
894 r = add_state_check_num(reg, ckn);
896 r = add_rel_addr(reg,
897 -(mod_tlen + (
int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
903 else if (qn->
upper == 0) {
905 r = add_opcode_rel_addr(reg,
OP_JUMP, tlen);
907 r = compile_tree(qn->
target, reg);
913 if (qn->
lower == 0) {
917 r = add_state_check_num(reg, ckn);
919 r = add_rel_addr(reg, tlen);
922 r = add_opcode_rel_addr(reg,
OP_PUSH, tlen);
927 r = compile_tree(qn->
target, reg);
933 r = add_state_check_num(reg, ckn);
942 r = add_opcode_rel_addr(reg,
OP_JUMP, tlen);
944 r = compile_tree(qn->
target, reg);
947 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
952 r = add_state_check_num(reg, ckn);
966 int tlen = compile_length_tree(qn->
target, reg);
968 if (tlen < 0)
return tlen;
972 if (qn->
greedy && infinite) {
995 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
1011 else if (!infinite && qn->
greedy &&
1034 int tlen = compile_length_tree(qn->
target, reg);
1036 if (tlen < 0)
return tlen;
1038 if (is_anychar_star_quantifier(qn)) {
1039 r = compile_tree_n_times(qn->
target, qn->
lower, reg);
1057 if (empty_info != 0)
1066 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
1082 r = compile_tree_n_times(qn->
target, qn->
lower, reg);
1087 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
1093 r = compile_tree_empty_check(qn->
target, reg, empty_info);
1095 r = add_opcode_rel_addr(reg,
OP_JUMP,
1105 r = compile_tree_empty_check(qn->
target, reg, empty_info);
1107 r = add_opcode_rel_addr(reg,
OP_JUMP,
1113 r = compile_tree_empty_check(qn->
target, reg, empty_info);
1115 r = add_opcode_rel_addr(reg,
OP_JUMP,
1120 r = add_opcode_rel_addr(reg,
OP_JUMP, mod_tlen);
1122 r = compile_tree_empty_check(qn->
target, reg, empty_info);
1128 r = add_opcode_rel_addr(reg,
OP_JUMP, tlen);
1130 r = compile_tree(qn->
target, reg);
1132 else if (!infinite && qn->
greedy &&
1137 r = compile_tree_n_times(qn->
target, qn->
lower, reg);
1140 for (
i = 0;
i <
n;
i++) {
1141 r = add_opcode_rel_addr(reg,
OP_PUSH,
1144 r = compile_tree(qn->
target, reg);
1151 r = add_opcode_rel_addr(reg,
OP_JUMP, tlen);
1153 r = compile_tree(qn->
target, reg);
1156 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
1169 tlen = compile_length_tree(node->
target, reg);
1172 if (tlen < 0)
return tlen;
1198 r = compile_tree(node->
target, reg);
1215 return compile_length_option_node(node, reg);
1218 tlen = compile_length_tree(node->
target, reg);
1219 if (tlen < 0)
return tlen;
1224 switch (node->
type) {
1226 #ifdef USE_SUBEXP_CALL
1258 tlen = compile_length_tree(qn->
target, reg);
1259 if (tlen < 0)
return tlen;
1274 tlen = compile_length_tree(
NCAR(x), reg);
1275 if (tlen < 0)
return tlen;
1279 tlen = compile_length_tree(
NCAR(x), reg);
1280 if (tlen < 0)
return tlen;
1301 static int get_char_length_tree(
Node* node,
regex_t* reg,
int*
len);
1309 return compile_option_node(node, reg);
1311 switch (node->
type) {
1313 #ifdef USE_SUBEXP_CALL
1319 r = add_abs_addr(reg, (
int )node->
call_addr);
1321 len = compile_length_tree(node->
target, reg);
1339 r = add_mem_num(reg, node->
regnum);
1341 r = compile_tree(node->
target, reg);
1343 #ifdef USE_SUBEXP_CALL
1353 r = add_mem_num(reg, node->
regnum);
1363 r = add_mem_num(reg, node->
regnum);
1373 r = add_mem_num(reg, node->
regnum);
1380 r = compile_tree_n_times(qn->
target, qn->
lower, reg);
1383 len = compile_length_tree(qn->
target, reg);
1388 r = compile_tree(qn->
target, reg);
1390 r = add_opcode(reg,
OP_POP);
1392 r = add_opcode_rel_addr(reg,
OP_JUMP,
1398 r = compile_tree(node->
target, reg);
1407 r = add_mem_num(reg, node->
regnum);
1414 len = compile_length_tree(
NCAR(x), reg);
1418 len2 = compile_length_tree(
NCAR(x), reg);
1419 if (len2 < 0)
return len2;
1425 r = compile_tree(
NCAR(x), reg);
1427 r = add_opcode_rel_addr(reg,
OP_JUMP, len2);
1430 r = compile_tree(
NCAR(x), reg);
1438 len = compile_length_tree(node->
target, reg);
1445 r = compile_tree(node->
target, reg);
1465 tlen = compile_length_tree(node->
target, reg);
1466 if (tlen < 0)
return tlen;
1469 switch (node->
type) {
1496 switch (node->
type) {
1512 #ifdef USE_WORD_BEGIN_END
1527 r = compile_tree(node->
target, reg);
1533 len = compile_length_tree(node->
target, reg);
1537 r = compile_tree(node->
target, reg);
1548 r = get_char_length_tree(node->
target, reg, &
n);
1553 r = add_length(reg,
n);
1555 r = compile_tree(node->
target, reg);
1562 len = compile_length_tree(node->
target, reg);
1567 r = get_char_length_tree(node->
target, reg, &
n);
1572 r = add_length(reg,
n);
1574 r = compile_tree(node->
target, reg);
1598 r = compile_length_tree(
NCAR(node), reg);
1599 if (r < 0)
return r;
1610 r = compile_length_tree(
NCAR(node), reg);
1611 if (r < 0)
return r;
1622 r = compile_length_string_raw_node(
NSTR(node), reg);
1624 r = compile_length_string_node(node, reg);
1628 r = compile_length_cclass_node(
NCCLASS(node), reg);
1640 #ifdef USE_BACKREF_WITH_LEVEL
1657 #ifdef USE_SUBEXP_CALL
1664 r = compile_length_quantifier_node(
NQTFR(node), reg);
1668 r = compile_length_enclose_node(
NENCLOSE(node), reg);
1672 r = compile_length_anchor_node(
NANCHOR(node), reg);
1692 r = compile_tree(
NCAR(node), reg);
1701 len += compile_length_tree(
NCAR(x), reg);
1709 len = compile_length_tree(
NCAR(node), reg);
1714 r = compile_tree(
NCAR(node), reg);
1727 r = compile_string_raw_node(
NSTR(node), reg);
1729 r = compile_string_node(node, reg);
1733 r = compile_cclass_node(
NCCLASS(node), reg);
1740 switch (
NCTYPE(node)->ctype) {
1742 if (
NCTYPE(node)->ascii_range != 0) {
1755 r = add_opcode(reg, op);
1770 #ifdef USE_BACKREF_WITH_LEVEL
1779 goto add_bacref_mems;
1788 r = add_mem_num(reg,
n);
1797 r = add_mem_num(reg,
n);
1814 #ifdef USE_BACKREF_WITH_LEVEL
1821 r = add_mem_num(reg, p[
i]);
1828 #ifdef USE_SUBEXP_CALL
1830 r = compile_call(
NCALL(node), reg);
1835 r = compile_quantifier_node(
NQTFR(node), reg);
1839 r = compile_enclose_node(
NENCLOSE(node), reg);
1843 r = compile_anchor_node(
NANCHOR(node), reg);
1856 #ifdef USE_NAMED_GROUP
1862 Node* node = *plink;
1864 switch (
NTYPE(node)) {
1868 r = noname_disable_map(&(
NCAR(node)), map, counter);
1874 Node** ptarget = &(
NQTFR(node)->target);
1876 r = noname_disable_map(ptarget, map, counter);
1892 else if (en->
regnum != 0) {
1896 r = noname_disable_map(plink, map, counter);
1900 r = noname_disable_map(&(en->
target), map, counter);
1906 r = noname_disable_map(&(
NANCHOR(node)->target), map, counter);
1919 int i, pos,
n, old_num;
1932 for (
i = 0, pos = 0;
i < old_num;
i++) {
1949 switch (
NTYPE(node)) {
1953 r = renumber_by_map(
NCAR(node), map);
1957 r = renumber_by_map(
NQTFR(node)->target, map);
1964 r = renumber_by_map(en->
target, map);
1969 r = renumber_node_backref(node, map);
1974 r = renumber_by_map(
NANCHOR(node)->target, map);
1985 numbered_ref_check(
Node* node)
1989 switch (
NTYPE(node)) {
1993 r = numbered_ref_check(
NCAR(node));
1997 r = numbered_ref_check(
NQTFR(node)->target);
2000 r = numbered_ref_check(
NENCLOSE(node)->target);
2010 r = numbered_ref_check(
NANCHOR(node)->target);
2023 int r,
i, pos, counter;
2029 for (
i = 1;
i <=
env->num_mem;
i++) {
2033 r = noname_disable_map(root, map, &counter);
2034 if (r != 0)
return r;
2036 r = renumber_by_map(*root, map);
2037 if (r != 0)
return r;
2039 for (
i = 1, pos = 1;
i <=
env->num_mem;
i++) {
2040 if (map[
i].new_val > 0) {
2046 loc =
env->capture_history;
2054 env->num_mem =
env->num_named;
2061 #ifdef USE_SUBEXP_CALL
2069 for (
i = 0;
i < uslist->
num;
i++) {
2081 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2083 quantifiers_memory_node_info(
Node* node)
2087 switch (
NTYPE(node)) {
2093 v = quantifiers_memory_node_info(
NCAR(node));
2099 # ifdef USE_SUBEXP_CALL
2105 r = quantifiers_memory_node_info(
NCALL(node)->target);
2112 if (qn->
upper != 0) {
2113 r = quantifiers_memory_node_info(qn->
target);
2130 r = quantifiers_memory_node_info(en->
target);
2159 switch (
NTYPE(node)) {
2170 r = get_min_match_length(nodes[backs[0]], min,
env);
2174 r = get_min_match_length(nodes[backs[
i]], &tmin,
env);
2176 if (*min > tmin) *min = tmin;
2181 #ifdef USE_SUBEXP_CALL
2189 r = get_min_match_length(
NCALL(node)->target, min,
env);
2195 r = get_min_match_length(
NCAR(node), &tmin,
env);
2196 if (r == 0) *min += tmin;
2206 r = get_min_match_length(x, &tmin,
env);
2208 if (y == node) *min = tmin;
2209 else if (*min > tmin) *min = tmin;
2217 *min = sn->
end - sn->
s;
2234 if (qn->
lower > 0) {
2235 r = get_min_match_length(qn->
target, min,
env);
2237 *min = distance_multiply(*min, qn->
lower);
2254 r = get_min_match_length(en->
target, min,
env);
2267 r = get_min_match_length(en->
target, min,
env);
2291 switch (
NTYPE(node)) {
2294 r = get_max_match_length(
NCAR(node), &tmax,
env);
2296 *max = distance_add(*max, tmax);
2302 r = get_max_match_length(
NCAR(node), &tmax,
env);
2303 if (r == 0 && *max < tmax) *max = tmax;
2310 *max = sn->
end - sn->
s;
2336 r = get_max_match_length(nodes[backs[
i]], &tmax,
env);
2338 if (*max < tmax) *max = tmax;
2343 #ifdef USE_SUBEXP_CALL
2346 r = get_max_match_length(
NCALL(node)->target, max,
env);
2356 if (qn->
upper != 0) {
2357 r = get_max_match_length(qn->
target, max,
env);
2358 if (r == 0 && *max != 0) {
2360 *max = distance_multiply(*max, qn->
upper);
2380 r = get_max_match_length(en->
target, max,
env);
2393 r = get_max_match_length(en->
target, max,
env);
2410 #define GET_CHAR_LEN_VARLEN -1
2411 #define GET_CHAR_LEN_TOP_ALT_VARLEN -2
2415 get_char_length_tree1(
Node* node,
regex_t* reg,
int*
len,
int level)
2422 switch (
NTYPE(node)) {
2425 r = get_char_length_tree1(
NCAR(node), reg, &tlen, level);
2436 r = get_char_length_tree1(
NCAR(node), reg, &tlen, level);
2438 r = get_char_length_tree1(
NCAR(node), reg, &tlen2, level);
2461 while (s < sn->
end) {
2472 r = get_char_length_tree1(qn->
target, reg, &tlen, level);
2481 #ifdef USE_SUBEXP_CALL
2484 r = get_char_length_tree1(
NCALL(node)->target, reg,
len, level);
2504 #ifdef USE_SUBEXP_CALL
2508 r = get_char_length_tree1(en->
target, reg,
len, level);
2519 r = get_char_length_tree1(en->
target, reg,
len, level);
2542 return get_char_length_tree1(node, reg,
len, 0);
2574 tmp = x; x = y; y = tmp;
2594 switch (
NCTYPE(y)->ctype) {
2596 if (
NCTYPE(y)->not == 0) {
2600 if (
NCTYPE(y)->ascii_range) {
2616 if (
NCTYPE(y)->ascii_range)
2680 switch (
NCTYPE(y)->ctype) {
2682 if (
NCTYPE(y)->ascii_range) {
2686 return !(
NCTYPE(y)->not);
2692 return !(
NCTYPE(y)->not);
2722 if (*p != *q)
return 1;
2742 get_head_value_node(
Node* node,
int exact,
regex_t* reg)
2746 switch (
NTYPE(node)) {
2750 #ifdef USE_SUBEXP_CALL
2763 n = get_head_value_node(
NCAR(node), exact, reg);
2770 if (sn->
end <= sn->
s)
2785 if (qn->
lower > 0) {
2786 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
2791 n = get_head_value_node(qn->
target, exact, reg);
2805 n = get_head_value_node(
NENCLOSE(node)->target, exact, reg);
2813 n = get_head_value_node(en->
target, exact, reg);
2824 n = get_head_value_node(
NANCHOR(node)->target, exact, reg);
2835 check_type_tree(
Node* node,
int type_mask,
int enclose_mask,
int anchor_mask)
2847 r = check_type_tree(
NCAR(node), type_mask, enclose_mask,
2853 r = check_type_tree(
NQTFR(node)->target, type_mask, enclose_mask,
2860 if ((en->
type & enclose_mask) == 0)
2863 r = check_type_tree(en->
target, type_mask, enclose_mask, anchor_mask);
2869 if ((
type & anchor_mask) == 0)
2873 r = check_type_tree(
NANCHOR(node)->target,
2874 type_mask, enclose_mask, anchor_mask);
2883 #ifdef USE_SUBEXP_CALL
2885 # define RECURSION_EXIST 1
2886 # define RECURSION_INFINITE 2
2904 ret = subexp_inf_recursive_check(
NCAR(x),
env, head);
2908 ret = get_min_match_length(
NCAR(x), &min,
env);
2909 if (ret != 0)
return ret;
2910 if (min != 0) head = 0;
2921 ret = subexp_inf_recursive_check(
NCAR(node),
env, head);
2929 r = subexp_inf_recursive_check(
NQTFR(node)->target,
env, head);
2931 if (
NQTFR(node)->lower == 0) r = 0;
2943 r = subexp_inf_recursive_check(an->
target,
env, head);
2950 r = subexp_inf_recursive_check(
NCALL(node)->target,
env, head);
2960 r = subexp_inf_recursive_check(
NENCLOSE(node)->target,
env, head);
2983 r = subexp_inf_recursive_check_trav(
NCAR(node),
env);
2988 r = subexp_inf_recursive_check_trav(
NQTFR(node)->target,
env);
2999 r = subexp_inf_recursive_check_trav(an->
target,
env);
3011 r = subexp_inf_recursive_check(en->
target,
env, 1);
3015 r = subexp_inf_recursive_check_trav(en->
target,
env);
3028 subexp_recursive_check(
Node* node)
3032 switch (
NTYPE(node)) {
3036 r |= subexp_recursive_check(
NCAR(node));
3041 r = subexp_recursive_check(
NQTFR(node)->target);
3052 r = subexp_recursive_check(an->
target);
3059 r = subexp_recursive_check(
NCALL(node)->target);
3070 r = subexp_recursive_check(
NENCLOSE(node)->target);
3086 # define FOUND_CALLED_NODE 1
3098 ret = subexp_recursive_check_trav(
NCAR(node),
env);
3100 else if (ret < 0)
return ret;
3106 r = subexp_recursive_check_trav(
NQTFR(node)->target,
env);
3107 if (
NQTFR(node)->upper == 0) {
3109 NQTFR(node)->is_referred = 1;
3121 r = subexp_recursive_check_trav(an->
target,
env);
3134 r = subexp_recursive_check(en->
target);
3139 r = subexp_recursive_check_trav(en->
target,
env);
3162 r = setup_subexp_call(
NCAR(node),
env);
3168 r = setup_subexp_call(
NCAR(node),
env);
3173 r = setup_subexp_call(
NQTFR(node)->target,
env);
3176 r = setup_subexp_call(
NENCLOSE(node)->target,
env);
3187 # ifdef USE_NAMED_GROUP
3188 if (
env->num_named > 0 &&
3194 if (gnum >
env->num_mem) {
3200 # ifdef USE_NAMED_GROUP
3213 # ifdef USE_NAMED_GROUP
3214 # ifdef USE_PERL_SUBEXP_CALL
3272 divide_look_behind_alternatives(
Node* node)
3274 Node *head, *np, *insert_node;
3276 int anc_type = an->
type;
3280 swap_node(node, head);
3289 NCAR(np) = insert_node;
3307 r = get_char_length_tree(an->
target, reg, &
len);
3314 r = divide_look_behind_alternatives(node);
3332 #ifdef USE_QTFR_PEEK_NEXT
3333 Node*
n = get_head_value_node(next_node, 1, reg);
3340 if (qn->
lower <= 1) {
3344 x = get_head_value_node(qn->
target, 0, reg);
3346 y = get_head_value_node(next_node, 0, reg);
3347 if (
IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
3351 swap_node(node, en);
3371 update_string_node_case_fold(
regex_t* reg,
Node *node)
3374 UChar *sbuf, *ebuf, *sp;
3380 sbuf_size = (
end - sn->
s) * 2;
3383 ebuf = sbuf + sbuf_size;
3389 for (
i = 0;
i <
len;
i++) {
3397 sp = sbuf + sbuf_size;
3399 ebuf = sbuf + sbuf_size;
3422 r = update_string_node_case_fold(reg, node);
3440 for (
i = 0;
i < item_num;
i++) {
3441 if (items[
i].byte_len != slen) {
3444 if (items[
i].code_len != 1) {
3456 int r,
i, j,
len, varlen;
3457 Node *anode, *var_anode, *snode, *xnode, *an;
3463 for (
i = 0;
i < item_num;
i++) {
3464 if (items[
i].byte_len != slen) {
3475 if (
IS_NULL(xnode))
goto mem_err;
3476 NCAR(var_anode) = xnode;
3479 if (
IS_NULL(anode))
goto mem_err;
3480 NCAR(xnode) = anode;
3488 if (
IS_NULL(snode))
goto mem_err;
3490 NCAR(anode) = snode;
3492 for (
i = 0;
i < item_num;
i++) {
3494 if (
IS_NULL(snode))
goto mem_err;
3496 for (j = 0; j < items[
i].
code_len; j++) {
3504 if (r != 0)
goto mem_err2;
3512 if (items[
i].byte_len != slen) {
3517 r = expand_case_fold_make_rem_string(&rem, q,
end, reg);
3542 NCDR(var_anode) = an;
3566 #define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
3568 int r,
n,
len, alt_num;
3571 Node *top_root, *root, *snode, *prev_node;
3579 if (start >=
end)
return 0;
3582 top_root = root = prev_node = snode =
NULL_NODE;
3595 varlen = is_case_fold_variable_len(
n, items,
len);
3596 if (
n == 0 || varlen == 0) {
3608 if (
IS_NULL(snode))
goto mem_err;
3618 if (r != 0)
goto err;
3625 r = update_string_node_case_fold(reg, snode);
3639 r = expand_case_fold_string_alt(
n, items, p,
len,
end, reg, &prev_node);
3640 if (r < 0)
goto mem_err;
3643 top_root = prev_node;
3652 root =
NCAR(prev_node);
3669 r = update_string_node_case_fold(reg, snode);
3678 r = expand_case_fold_make_rem_string(&srem, p,
end, reg);
3679 if (r != 0)
goto mem_err;
3703 top_root = (
IS_NOT_NULL(top_root) ? top_root : prev_node);
3704 swap_node(node, top_root);
3717 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3719 # define CEC_THRES_NUM_BIG_REPEAT 512
3720 # define CEC_INFINITE_NUM 0x7fffffff
3722 # define CEC_IN_INFINITE_REPEAT (1<<0)
3723 # define CEC_IN_FINITE_REPEAT (1<<1)
3724 # define CEC_CONT_BIG_REPEAT (1<<2)
3738 r = setup_comb_exp_check(
NCAR(node), r,
env);
3748 ret = setup_comb_exp_check(
NCAR(node), state,
env);
3756 int child_state = state;
3763 if (qn->
upper > 1) {
3765 child_state |= CEC_IN_FINITE_REPEAT;
3768 if (
env->backrefed_mem == 0) {
3778 child_state = state;
3787 if (state & CEC_IN_FINITE_REPEAT) {
3788 qn->comb_exp_check_num = -1;
3792 var_num = CEC_INFINITE_NUM;
3793 child_state |= CEC_IN_INFINITE_REPEAT;
3799 if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
3800 add_state |= CEC_CONT_BIG_REPEAT;
3802 if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
3803 ((state & CEC_CONT_BIG_REPEAT) != 0 &&
3804 var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
3805 if (qn->comb_exp_check_num == 0) {
3806 env->num_comb_exp_check++;
3807 qn->comb_exp_check_num =
env->num_comb_exp_check;
3808 if (
env->curr_max_regnum >
env->comb_exp_max_regnum)
3809 env->comb_exp_max_regnum =
env->curr_max_regnum;
3814 r = setup_comb_exp_check(target, child_state,
env);
3829 r = setup_comb_exp_check(en->
target, state,
env);
3834 r = setup_comb_exp_check(en->
target, state,
env);
3840 # ifdef USE_SUBEXP_CALL
3843 env->has_recursion = 1;
3845 r = setup_comb_exp_check(
NCALL(node)->target, state,
env);
3857 #define IN_ALT (1<<0)
3858 #define IN_NOT (1<<1)
3859 #define IN_REPEAT (1<<2)
3860 #define IN_VAR_REPEAT (1<<3)
3861 #define IN_CALL (1<<4)
3862 #define IN_RECCALL (1<<5)
3885 r = setup_tree(
NCAR(node), reg, state,
env);
3887 r = next_setup(prev,
NCAR(node), reg);
3905 r = expand_case_fold_string(node, reg);
3913 #ifdef USE_SUBEXP_CALL
3929 #ifdef USE_BACKREF_WITH_LEVEL
3950 r = get_min_match_length(target, &d,
env);
3954 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3955 r = quantifiers_memory_node_info(target);
3962 r = get_max_match_length(target, &d,
env);
3963 if (r == 0 && d == 0) {
3978 r = setup_tree(target, reg, state,
env);
3982 #define EXPAND_STRING_MAX_LENGTH 100
3984 if (qn->
lower > 1) {
4013 swap_node(np1, node);
4021 swap_node(np, node);
4028 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
4054 r = setup_tree(
NENCLOSE(node)->target, reg, state,
env);
4070 r = setup_tree(en->
target, reg, state,
env);
4076 r = setup_tree(target, reg, state,
env);
4090 #ifdef USE_NAMED_GROUP
4092 env->num_named > 0 &&
4100 r = setup_tree(
NENCLOSE(node)->target, reg, state,
env);
4104 r = setup_tree(
NENCLOSE(node)->target, reg, state,
env);
4116 r = setup_tree(an->
target, reg, state,
env);
4123 #define ALLOWED_TYPE_IN_LB \
4124 ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
4125 BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
4127 #define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY | ENCLOSE_OPTION )
4128 #define ALLOWED_ENCLOSE_IN_LB_NOT ENCLOSE_OPTION
4130 #define ALLOWED_ANCHOR_IN_LB \
4131 ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4132 ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4133 ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4134 ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4135 #define ALLOWED_ANCHOR_IN_LB_NOT \
4136 ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4137 ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4138 ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4139 ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4145 if (r < 0)
return r;
4148 r = setup_tree(an->
target, reg, state,
env);
4149 if (r != 0)
return r;
4150 r = setup_look_behind(node, reg,
env);
4158 if (r < 0)
return r;
4162 if (r != 0)
return r;
4163 r = setup_look_behind(node, reg,
env);
4177 #ifndef USE_SUNDAY_QUICK_SEARCH
4181 UChar skip[],
int** int_skip,
int ignore_case)
4184 int clen, flen,
n, j, k;
4194 for (
i = 0;
i <
len - 1;
i += clen) {
4203 for (j = 0; j <
n; j++) {
4204 if ((items[j].code_len != 1) || (items[j].
byte_len != clen))
4210 for (j = 0; j < clen; j++) {
4212 for (k = 0; k <
n; k++) {
4219 # if OPT_EXACT_MAXLEN < ONIG_CHAR_TABLE_SIZE
4230 for (
i = 0;
i <
len - 1;
i += clen) {
4239 for (j = 0; j <
n; j++) {
4240 if ((items[j].code_len != 1) || (items[j].
byte_len != clen))
4246 for (j = 0; j < clen; j++) {
4247 (*int_skip)[s[
i + j]] = (
int )(
len - 1 -
i - j);
4248 for (k = 0; k <
n; k++) {
4249 (*int_skip)[
buf[k][j]] = (
int )(
len - 1 -
i - j);
4263 UChar skip[],
int** int_skip,
int ignore_case)
4266 int clen, flen,
n, j, k;
4276 for (
i = 0;
i <
len;
i += clen) {
4285 for (j = 0; j <
n; j++) {
4286 if ((items[j].code_len != 1) || (items[j].
byte_len != clen))
4292 for (j = 0; j < clen; j++) {
4294 for (k = 0; k <
n; k++) {
4301 # if OPT_EXACT_MAXLEN < ONIG_CHAR_TABLE_SIZE
4312 for (
i = 0;
i <
len;
i += clen) {
4321 for (j = 0; j <
n; j++) {
4322 if ((items[j].code_len != 1) || (items[j].
byte_len != clen))
4328 for (j = 0; j < clen; j++) {
4329 (*int_skip)[s[
i + j]] = (
int )(
len -
i - j);
4330 for (k = 0; k <
n; k++) {
4331 (*int_skip)[
buf[k][j]] = (
int )(
len -
i - j);
4392 static const short int ByteValTable[] = {
4393 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
4394 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4395 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
4396 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
4397 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4398 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
4399 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4400 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
4407 return (
int )ByteValTable[
i];
4417 static const short int dist_vals[] = {
4418 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
4419 91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
4420 48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
4421 32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
4422 24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
4423 20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
4424 16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
4425 14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
4426 12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
4427 11, 11, 11, 11, 11, 10, 10, 10, 10, 10
4437 return (
int )dist_vals[d];
4445 if (v2 <= 0)
return -1;
4446 if (v1 <= 0)
return 1;
4448 v1 *= distance_value(
d1);
4449 v2 *= distance_value(d2);
4451 if (v2 > v1)
return 1;
4452 if (v2 < v1)
return -1;
4454 if (d2->
min <
d1->min)
return 1;
4455 if (d2->
min >
d1->min)
return -1;
4489 to->
min = distance_add(to->
min, from->
min);
4490 to->
max = distance_add(to->
max, from->
max);
4532 clear_opt_anc_info(to);
4535 if (left_len == 0) {
4540 if (right_len == 0) {
4549 is_left_anchor(
int anc)
4570 if (is_left_anchor(anc))
4579 if (is_left_anchor(anc))
4601 clear_mml(&ex->
mmd);
4602 clear_opt_anc_info(&ex->
anc);
4629 for (
i = to->
len; p < end; ) {
4632 for (j = 0; j <
len && p < end; j++)
4639 concat_opt_anc_info(&tanc, &to->
anc, &
add->anc, 1, 1);
4641 copy_opt_anc_info(&to->
anc, &tanc);
4654 for (j = 0; j <
len && p < end; j++)
4666 if (
add->len == 0 || to->
len == 0) {
4667 clear_opt_exact_info(to);
4671 if (! is_equal_mml(&to->
mmd, &
add->mmd)) {
4672 clear_opt_exact_info(to);
4676 for (
i = 0;
i < to->
len &&
i <
add->len; ) {
4677 if (to->
s[
i] !=
add->s[
i])
break;
4680 for (j = 1; j <
len; j++) {
4681 if (to->
s[
i+j] !=
add->s[
i+j])
break;
4687 if (!
add->reach_end || i < add->
len || i < to->
len) {
4693 else if (
add->ignore_case >= 0)
4696 alt_merge_opt_anc_info(&to->
anc, &
add->anc);
4712 copy_opt_exact_info(now, alt);
4715 else if (v1 <= 2 && v2 <= 2) {
4717 v2 = map_position_value(enc, now->
s[0]);
4718 v1 = map_position_value(enc, alt->
s[0]);
4720 if (now->
len > 1) v1 += 5;
4721 if (alt->
len > 1) v2 += 5;
4727 if (comp_distance_value(&now->
mmd, &alt->
mmd, v1, v2) > 0)
4728 copy_opt_exact_info(now, alt);
4737 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4738 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4739 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4740 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4741 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4742 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4743 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4744 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4745 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4746 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4747 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4748 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4749 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4750 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4751 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4752 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4768 if (map->
map[c] == 0) {
4770 map->
value += map_position_value(enc, c);
4782 add_char_opt_map_info(map, p[0], enc);
4786 if (
n < 0)
return n;
4788 for (
i = 0;
i <
n;
i++) {
4790 add_char_opt_map_info(map,
buf[0], enc);
4799 const int z = 1<<15;
4803 if (alt->
value == 0) return ;
4804 if (now->
value == 0) {
4805 copy_opt_map_info(now, alt);
4809 v1 = z / now->
value;
4810 v2 = z / alt->
value;
4811 if (comp_distance_value(&now->
mmd, &alt->
mmd, v1, v2) > 0)
4812 copy_opt_map_info(now, alt);
4818 #define COMP_EM_BASE 20
4821 if (m->
value <= 0)
return -1;
4825 return comp_distance_value(&e->
mmd, &m->
mmd, ve, vm);
4834 if (to->
value == 0) return ;
4836 clear_opt_map_info(to);
4840 alt_merge_mml(&to->
mmd, &
add->mmd);
4848 val += map_position_value(enc,
i);
4852 alt_merge_opt_anc_info(&to->
anc, &
add->anc);
4858 copy_mml(&(opt->
exb.
mmd), mmd);
4859 copy_mml(&(opt->
expr.
mmd), mmd);
4860 copy_mml(&(opt->
map.
mmd), mmd);
4866 clear_mml(&opt->
len);
4867 clear_opt_anc_info(&opt->
anc);
4868 clear_opt_exact_info(&opt->
exb);
4869 clear_opt_exact_info(&opt->
exm);
4870 clear_opt_exact_info(&opt->
expr);
4871 clear_opt_map_info(&opt->
map);
4883 int exb_reach, exm_reach;
4886 concat_opt_anc_info(&tanc, &to->
anc, &
add->anc, to->
len.
max,
add->len.max);
4887 copy_opt_anc_info(&to->
anc, &tanc);
4889 if (
add->exb.len > 0 && to->
len.
max == 0) {
4890 concat_opt_anc_info(&tanc, &to->
anc, &
add->exb.anc,
4892 copy_opt_anc_info(&
add->exb.anc, &tanc);
4895 if (
add->map.value > 0 && to->
len.
max == 0) {
4896 if (
add->map.mmd.max == 0)
4903 if (
add->len.max != 0)
4906 if (
add->exb.len > 0) {
4908 concat_opt_exact_info(&to->
exb, &
add->exb, enc);
4909 clear_opt_exact_info(&
add->exb);
4911 else if (exm_reach) {
4912 concat_opt_exact_info(&to->
exm, &
add->exb, enc);
4913 clear_opt_exact_info(&
add->exb);
4916 select_opt_exact_info(enc, &to->
exm, &
add->exb);
4917 select_opt_exact_info(enc, &to->
exm, &
add->exm);
4920 if (
add->len.max > 0) {
4925 select_opt_exact_info(enc, &to->
exb, &to->
expr);
4927 select_opt_exact_info(enc, &to->
exm, &to->
expr);
4930 else if (
add->expr.len > 0) {
4931 copy_opt_exact_info(&to->
expr, &
add->expr);
4934 select_opt_map_info(&to->
map, &
add->map);
4936 add_mml(&to->
len, &
add->len);
4942 alt_merge_opt_anc_info (&to->
anc, &
add->anc);
4943 alt_merge_opt_exact_info(&to->
exb, &
add->exb,
env);
4944 alt_merge_opt_exact_info(&to->
exm, &
add->exm,
env);
4945 alt_merge_opt_exact_info(&to->
expr, &
add->expr,
env);
4946 alt_merge_opt_map_info(
env->enc, &to->
map, &
add->map);
4948 alt_merge_mml(&to->
len, &
add->len);
4952 #define MAX_NODE_OPT_INFO_REF_COUNT 5
4960 clear_node_opt_info(opt);
4961 set_bound_node_opt_info(opt, &
env->mmd);
4971 copy_opt_env(&nenv,
env);
4973 r = optimize_node_left(
NCAR(nd), &nopt, &nenv);
4975 add_mml(&nenv.
mmd, &nopt.
len);
4976 concat_left_node_opt_info(
env->enc, opt, &nopt);
4988 r = optimize_node_left(
NCAR(nd), &nopt,
env);
4990 if (nd == node) copy_node_opt_info(opt, &nopt);
4991 else alt_merge_node_opt_info(opt, &nopt,
env);
5004 concat_opt_exact_info_str(&opt->
exb, sn->
s, sn->
end,
5008 add_char_opt_map_info(&opt->
map, *(sn->
s),
env->enc);
5010 set_mml(&opt->
len, slen, slen);
5020 concat_opt_exact_info_str(&opt->
exb, sn->
s, sn->
end,
5025 r = add_char_amb_opt_map_info(&opt->
map, sn->
s, sn->
end,
5026 env->enc,
env->case_fold_flag);
5033 set_mml(&opt->
len, slen, max);
5052 set_mml(&opt->
len, min, max);
5061 set_mml(&opt->
len, 1, 1);
5077 switch (
NCTYPE(node)->ctype) {
5079 if (
NCTYPE(node)->not != 0) {
5087 for (
i = 0;
i < maxcode;
i++) {
5099 set_mml(&opt->
len, min, max);
5107 set_mml(&opt->
len, min, max);
5128 r = optimize_node_left(
NANCHOR(node)->target, &nopt,
env);
5131 copy_opt_exact_info(&opt->
expr, &nopt.
exb);
5132 else if (nopt.
exm.
len > 0)
5133 copy_opt_exact_info(&opt->
expr, &nopt.
exm);
5138 copy_opt_map_info(&opt->
map, &nopt.
map);
5161 r = get_min_match_length(nodes[backs[0]], &min,
env->scan_env);
5163 r = get_max_match_length(nodes[backs[0]], &max,
env->scan_env);
5166 r = get_min_match_length(nodes[backs[
i]], &tmin,
env->scan_env);
5168 r = get_max_match_length(nodes[backs[
i]], &tmax,
env->scan_env);
5170 if (min > tmin) min = tmin;
5171 if (max < tmax) max = tmax;
5173 if (r == 0) set_mml(&opt->
len, min, max);
5177 #ifdef USE_SUBEXP_CALL
5184 r = optimize_node_left(
NCALL(node)->target, opt,
env);
5185 env->options = save;
5197 r = optimize_node_left(qn->
target, &nopt,
env);
5201 if (
env->mmd.max == 0 &&
5211 if (qn->
lower > 0) {
5212 copy_node_opt_info(opt, &nopt);
5216 ! is_full_opt_exact_info(&opt->
exb);
i++) {
5217 concat_opt_exact_info(&opt->
exb, &nopt.
exb,
env->enc);
5219 if (i < qn->lower) {
5240 set_mml(&opt->
len, min, max);
5254 r = optimize_node_left(en->
target, opt,
env);
5255 env->options = save;
5260 #ifdef USE_SUBEXP_CALL
5269 set_mml(&opt->
len, min, max);
5274 r = optimize_node_left(en->
target, opt,
env);
5285 r = optimize_node_left(en->
target, opt,
env);
5297 fprintf(
stderr,
"optimize_node_left: undefined node type %d\n",
5313 if (e->
len == 0)
return 0;
5324 if (e->
len >= 3 || (e->
len >= 2 && allow_reverse)) {
5328 reg->
optimize = (allow_reverse != 0
5340 if (e->
len >= 3 || (e->
len >= 2 && allow_reverse)) {
5344 reg->
optimize = (allow_reverse != 0
5390 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5391 static void print_optimize_info(
FILE*
f,
regex_t* reg);
5405 env.scan_env = scan_env;
5406 clear_mml(&
env.mmd);
5408 r = optimize_node_left(node, &opt, &
env);
5427 select_opt_exact_info(reg->
enc, &opt.
exb, &opt.
exm);
5429 comp_opt_exact_or_map_info(&opt.
exb, &opt.
map) > 0) {
5433 r = set_optimize_exact_info(reg, &opt.
exb);
5434 set_sub_anchor(reg, &opt.
exb.
anc);
5439 set_optimize_map_info(reg, &opt.
map);
5440 set_sub_anchor(reg, &opt.
map.
anc);
5448 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5449 print_optimize_info(
stderr, reg);
5455 clear_optimize_info(
regex_t* reg)
5485 fprintf(fp,
" 0x%04x ", (
int )code);
5488 fputc((
int )code, fp);
5491 p +=
enclen(enc, p, end);
5496 fputc((
int )*s, fp);
5501 fprintf(fp,
"/ (%s)\n", enc->name);
5505 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5523 print_anchor(
FILE*
f,
int anchor)
5574 static const char* on[] = {
"NONE",
"EXACT",
"EXACT_BM",
"EXACT_BM_NOT_REV",
5576 "EXACT_BM_IC",
"EXACT_BM_NOT_REV_IC" };
5593 for (p = reg->
exact; p < reg->exact_end; p++) {
5602 if (reg->
map[
i])
n++;
5609 if (reg->
map[
i] != 0) {
5610 if (c > 0)
fputs(
", ",
f);
5637 #ifdef USE_NAMED_GROUP
5671 size_t size =
sizeof(*regs);
5678 #define REGEX_TRANSFER(to,from) do {\
5679 onig_free_body(to);\
5680 xmemcpy(to, from, sizeof(regex_t));\
5692 #ifdef ONIG_DEBUG_COMPILE
5693 static void print_compiled_byte_code_list(
FILE*
f,
regex_t* reg);
5695 #ifdef ONIG_DEBUG_PARSE_TREE
5696 static void print_tree(
FILE*
f,
Node* node);
5711 OnigErrorInfo* einfo,
const char *sourcefile,
int sourceline)
5718 #define COMPILE_INIT_SIZE 20
5724 #ifdef USE_SUBEXP_CALL
5736 print_enc_string(
stderr, reg->
enc, pattern, pattern_end);
5739 if (reg->
alloc == 0) {
5740 init_size = (pattern_end - pattern) * 2;
5743 if (r != 0)
goto end;
5753 #ifdef USE_COMBINATION_EXPLOSION_CHECK
5758 if (r != 0)
goto err;
5760 #ifdef ONIG_DEBUG_PARSE_TREE
5763 print_tree(
stderr, root);
5767 #ifdef USE_NAMED_GROUP
5773 r = disable_noname_group_capture(&root, reg, &scan_env);
5775 r = numbered_ref_check(root);
5777 if (r != 0)
goto err;
5781 #ifdef USE_SUBEXP_CALL
5783 r = unset_addr_list_init(&uslist, scan_env.
num_call);
5784 if (r != 0)
goto err;
5786 r = setup_subexp_call(root, &scan_env);
5787 if (r != 0)
goto err_unset;
5788 r = subexp_recursive_check_trav(root, &scan_env);
5789 if (r < 0)
goto err_unset;
5790 r = subexp_inf_recursive_check_trav(root, &scan_env);
5791 if (r != 0)
goto err_unset;
5799 r = setup_tree(root, reg, 0, &scan_env);
5800 if (r != 0)
goto err_unset;
5802 #ifdef ONIG_DEBUG_PARSE_TREE
5803 print_tree(
stderr, root);
5816 #ifdef USE_COMBINATION_EXPLOSION_CHECK
5822 setup_comb_exp_check(root, 0, &scan_env);
5823 # ifdef USE_SUBEXP_CALL
5824 if (scan_env.has_recursion != 0) {
5825 scan_env.num_comb_exp_check = 0;
5829 if (scan_env.comb_exp_max_regnum > 0) {
5831 for (
i = 1;
i <= scan_env.comb_exp_max_regnum;
i++) {
5833 scan_env.num_comb_exp_check = 0;
5843 clear_optimize_info(reg);
5844 #ifndef ONIG_DONT_OPTIMIZE
5845 r = set_optimize_info_from_tree(root, reg, &scan_env);
5846 if (r != 0)
goto err_unset;
5854 r = compile_tree(root, reg);
5856 r = add_opcode(reg,
OP_END);
5857 #ifdef USE_SUBEXP_CALL
5859 r = unset_addr_list_fix(&uslist, reg);
5860 unset_addr_list_end(&uslist);
5874 #ifdef USE_SUBEXP_CALL
5876 unset_addr_list_end(&uslist);
5881 #ifdef ONIG_DEBUG_COMPILE
5882 # ifdef USE_NAMED_GROUP
5883 onig_print_names(
stderr, reg);
5885 print_compiled_byte_code_list(
stderr, reg);
5892 #ifdef USE_SUBEXP_CALL
5894 unset_addr_list_end(&uslist);
5900 einfo->
enc = scan_env.
enc;
5912 static int onig_inited = 0;
5941 (reg)->options = option;
5942 (reg)->syntax = syntax;
5943 (reg)->optimize = 0;
5945 (reg)->int_map = (
int* )
NULL;
5946 (reg)->int_map_backward = (
int* )
NULL;
5952 (reg)->name_table = (
void* )
NULL;
5954 (reg)->case_fold_flag = case_fold_flag;
6003 if (onig_inited != 0)
6008 #if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
6009 _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
6015 #ifdef ONIG_DEBUG_STATISTICS
6016 onig_statistics_init();
6030 if (item == 0) return ;
6032 item->
next = EndCallTop;
6039 exec_end_call_list(
void)
6044 while (EndCallTop != 0) {
6045 func = EndCallTop->
func;
6049 EndCallTop = EndCallTop->
next;
6057 exec_end_call_list();
6059 #ifdef ONIG_DEBUG_STATISTICS
6060 onig_print_statistics(
stderr);
6063 #if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
6064 _CrtDumpMemoryLeaks();
6082 for (low = 0, high =
n; low < high; ) {
6083 x = (low + high) >> 1;
6084 if (code > data[x * 2 + 1])
6090 return ((low < n && code >= data[low * 2]) ? 1 : 0);
6134 # define ARG_SPECIAL -1
6136 # define ARG_RELADDR 1
6137 # define ARG_ABSADDR 2
6138 # define ARG_LENGTH 3
6139 # define ARG_MEMNUM 4
6140 # define ARG_OPTION 5
6141 # define ARG_STATE_CHECK 6
6143 OnigOpInfoType OnigOpInfo[] = {
6145 {
OP_END,
"end", ARG_NON },
6207 {
OP_JUMP,
"jump", ARG_RELADDR },
6208 {
OP_PUSH,
"push", ARG_RELADDR },
6209 {
OP_POP,
"pop", ARG_NON },
6234 {
OP_CALL,
"call", ARG_ABSADDR },
6242 "state-check-anychar-ml*", ARG_STATE_CHECK },
6251 for (
i = 0; OnigOpInfo[
i].opcode >= 0;
i++) {
6252 if (opcode == OnigOpInfo[
i].opcode)
6253 return OnigOpInfo[
i].name;
6259 op2arg_type(
int opcode)
6263 for (
i = 0; OnigOpInfo[
i].opcode >= 0;
i++) {
6264 if (opcode == OnigOpInfo[
i].opcode)
6265 return OnigOpInfo[
i].arg_type;
6270 # ifdef ONIG_DEBUG_PARSE_TREE
6272 Indent(
FILE*
f,
int indent)
6275 for (
i = 0;
i < indent;
i++)
putc(
' ',
f);
6289 int x =
len * mb_len;
6292 while (x-- > 0) {
fputc(*s++,
f); }
6308 arg_type = op2arg_type(*
bp);
6309 if (arg_type != ARG_SPECIAL) {
6316 fprintf(
f,
":(%s%d)", (addr >= 0) ?
"+" :
"", addr);
6339 case ARG_STATE_CHECK:
6351 p_string(
f, 1,
bp++);
break;
6353 p_string(
f, 2,
bp);
bp += 2;
break;
6355 p_string(
f, 3,
bp);
bp += 3;
break;
6357 p_string(
f, 4,
bp);
bp += 4;
break;
6359 p_string(
f, 5,
bp);
bp += 5;
break;
6362 p_len_string(
f,
len, 1,
bp);
6367 p_string(
f, 2,
bp);
bp += 2;
break;
6369 p_string(
f, 4,
bp);
bp += 4;
break;
6371 p_string(
f, 6,
bp);
bp += 6;
break;
6374 p_len_string(
f,
len, 2,
bp);
6379 p_len_string(
f,
len, 3,
bp);
6401 p_len_string(
f,
len, 1,
bp);
6421 # ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6435 # ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6453 for (
i = 0;
i <
len;
i++) {
6472 for (
i = 0;
i <
len;
i++) {
6495 fprintf(
f,
":(%s%d)", (addr >= 0) ?
"+" :
"", addr);
6508 fprintf(
f,
":%d:(%s%d)",
len, (addr >= 0) ?
"+" :
"", addr);
6517 fprintf(
f,
":%d:(%s%d)", scn, (addr >= 0) ?
"+" :
"", addr);
6523 fprintf(
f,
":%d:(%s%d)", mem, (addr >= 0) ?
"+" :
"", addr);
6527 fprintf(
stderr,
"onig_print_compiled_byte_code: undefined code %d\n",
6532 if (nextp) *nextp =
bp;
6535 # ifdef ONIG_DEBUG_COMPILE
6552 onig_print_compiled_byte_code(
f,
bp, end, &
bp, reg->
enc);
6559 # ifdef ONIG_DEBUG_PARSE_TREE
6561 print_indent_tree(
FILE*
f,
Node* node,
int indent)
6563 int i,
type, container_p = 0;
6569 fprintf(
f,
"ERROR: null node!!!\n");
6582 print_indent_tree(
f,
NCAR(node), indent +
add);
6585 fprintf(
f,
"ERROR: list/alt right is not a cons. %d\n",
NTYPE(node));
6588 print_indent_tree(
f,
NCAR(node), indent +
add);
6595 for (p =
NSTR(node)->s; p <
NSTR(node)->end; p++) {
6596 if (*p >= 0x20 && *p < 0x7f)
6612 for (; data < end; data+=2) {
6614 fprintf(
f,
"%04x-%04x", data[0], data[1]);
6621 switch (
NCTYPE(node)->ctype) {
6623 if (
NCTYPE(node)->not != 0)
6630 fprintf(
f,
"ERROR: undefined ctype.\n");
6651 # ifdef USE_WORD_BEGIN_END
6662 fprintf(
f,
"ERROR: undefined anchor type.\n");
6680 # ifdef USE_SUBEXP_CALL
6693 (
NQTFR(node)->greedy ?
"" :
"?"));
6694 print_indent_tree(
f,
NQTFR(node)->target, indent +
add);
6720 print_indent_tree(
f,
NENCLOSE(node)->target, indent +
add);
6724 fprintf(
f,
"print_indent_tree: undefined node type %d\n",
NTYPE(node));
6732 if (container_p) print_indent_tree(
f,
NANCHOR(node)->target, indent +
add);
6740 print_indent_tree(
f, node, 0);