18 #define ENABLE_ECONV_NEWLINE_OPTION 1
21 static VALUE rb_eUndefinedConversionError;
22 static VALUE rb_eInvalidByteSequenceError;
23 static VALUE rb_eConverterNotFoundError;
27 static VALUE sym_invalid, sym_undef, sym_replace, sym_fallback;
28 static VALUE sym_xml, sym_text, sym_attr;
29 static VALUE sym_universal_newline;
30 static VALUE sym_crlf_newline;
31 static VALUE sym_cr_newline;
32 #ifdef ENABLE_ECONV_NEWLINE_OPTION
33 static VALUE sym_newline, sym_universal, sym_crlf, sym_cr, sym_lf;
35 static VALUE sym_partial_input;
37 static VALUE sym_invalid_byte_sequence;
38 static VALUE sym_undefined_conversion;
39 static VALUE sym_destination_buffer_full;
40 static VALUE sym_source_buffer_empty;
41 static VALUE sym_finished;
42 static VALUE sym_after_output;
43 static VALUE sym_incomplete_input;
45 static unsigned char *
46 allocate_converted_string(
const char *sname,
const char *dname,
47 const unsigned char *
str,
size_t len,
48 unsigned char *caller_dst_buf,
size_t caller_dst_bufsize,
84 #define TRANSCODING_READBUF(tc) \
85 ((tc)->transcoder->max_input <= (int)sizeof((tc)->readbuf.ary) ? \
88 #define TRANSCODING_WRITEBUF(tc) \
89 ((tc)->transcoder->max_output <= (int)sizeof((tc)->writebuf.ary) ? \
90 (tc)->writebuf.ary : \
92 #define TRANSCODING_WRITEBUF_SIZE(tc) \
93 ((tc)->transcoder->max_output <= (int)sizeof((tc)->writebuf.ary) ? \
94 sizeof((tc)->writebuf.ary) : \
95 (size_t)(tc)->transcoder->max_output)
96 #define TRANSCODING_STATE_EMBED_MAX ((int)sizeof(union rb_transcoding_state_t))
97 #define TRANSCODING_STATE(tc) \
98 ((tc)->transcoder->state_size <= (int)sizeof((tc)->state) ? \
154 #define DECORATOR_P(sname, dname) (*(sname) == '\0')
166 make_transcoder_entry(
const char *sname,
const char *dname)
178 entry->
sname = sname;
179 entry->
dname = dname;
189 get_transcoder_entry(
const char *sname,
const char *dname)
207 const char *
const sname =
tr->src_encoding;
208 const char *
const dname =
tr->dst_encoding;
212 entry = make_transcoder_entry(sname, dname);
222 declare_transcoder(
const char *sname,
const char *dname,
const char *lib)
226 entry = make_transcoder_entry(sname, dname);
230 static const char transcoder_lib_prefix[] =
"enc/trans/";
238 declare_transcoder(enc1, enc2, lib);
241 #define encoding_equal(enc1, enc2) (STRCASECMP((enc1), (enc2)) == 0)
258 const char *dname = (
const char *)
key;
277 transcode_search_path(
const char *sname,
const char *dname,
278 void (*callback)(
const char *sname,
const char *dname,
int depth,
void *
arg),
335 const char *enc = dname;
343 enc = (
const char *)val;
351 callback((
const char *)val, enc, --depth,
arg);
352 enc = (
const char *)val;
368 const char *
const lib = entry->
lib;
370 const size_t total_len =
sizeof(transcoder_lib_prefix) - 1 +
len;
374 memcpy(
path, transcoder_lib_prefix,
sizeof(transcoder_lib_prefix) - 1);
388 get_replacement_character(
const char *encname,
size_t *len_ret,
const char **repl_encname_ptr)
392 *repl_encname_ptr =
"UTF-8";
393 return "\xEF\xBF\xBD";
397 *repl_encname_ptr =
"US-ASCII";
406 static const unsigned char *
408 const unsigned char *in_start,
409 const unsigned char *inchar_start,
410 const unsigned char *in_p,
411 size_t *char_len_ptr)
413 const unsigned char *
ptr;
414 if (inchar_start - in_start < tc->recognized_len) {
416 inchar_start,
unsigned char, in_p - inchar_start);
427 transcode_restartable0(
const unsigned char **in_pos,
unsigned char **out_pos,
428 const unsigned char *in_stop,
unsigned char *out_stop,
433 int unitlen =
tr->input_unit_length;
436 const unsigned char *inchar_start;
437 const unsigned char *in_p;
439 unsigned char *out_p;
441 in_p = inchar_start = *in_pos;
445 #define SUSPEND(ret, num) \
447 tc->resume_position = (num); \
448 if (0 < in_p - inchar_start) \
449 MEMMOVE(TRANSCODING_READBUF(tc)+tc->recognized_len, \
450 inchar_start, unsigned char, in_p - inchar_start); \
453 tc->recognized_len += in_p - inchar_start; \
454 if (readagain_len) { \
455 tc->recognized_len -= readagain_len; \
456 tc->readagain_len = readagain_len; \
459 resume_label ## num:; \
461 #define SUSPEND_OBUF(num) \
463 while (out_stop - out_p < 1) { SUSPEND(econv_destination_buffer_full, num); } \
466 #define SUSPEND_AFTER_OUTPUT(num) \
467 if ((opt & ECONV_AFTER_OUTPUT) && *out_pos != out_p) { \
468 SUSPEND(econv_after_output, num); \
471 #define next_table (tc->next_table)
472 #define next_info (tc->next_info)
473 #define next_byte (tc->next_byte)
474 #define writebuf_len (tc->writebuf_len)
475 #define writebuf_off (tc->writebuf_off)
479 case 1:
goto resume_label1;
480 case 2:
goto resume_label2;
481 case 3:
goto resume_label3;
482 case 4:
goto resume_label4;
483 case 5:
goto resume_label5;
484 case 6:
goto resume_label6;
485 case 7:
goto resume_label7;
486 case 8:
goto resume_label8;
487 case 9:
goto resume_label9;
488 case 10:
goto resume_label10;
489 case 11:
goto resume_label11;
490 case 12:
goto resume_label12;
491 case 13:
goto resume_label13;
492 case 14:
goto resume_label14;
493 case 15:
goto resume_label15;
494 case 16:
goto resume_label16;
495 case 17:
goto resume_label17;
496 case 18:
goto resume_label18;
497 case 19:
goto resume_label19;
498 case 20:
goto resume_label20;
499 case 21:
goto resume_label21;
500 case 22:
goto resume_label22;
501 case 23:
goto resume_label23;
502 case 24:
goto resume_label24;
503 case 25:
goto resume_label25;
504 case 26:
goto resume_label26;
505 case 27:
goto resume_label27;
506 case 28:
goto resume_label28;
507 case 29:
goto resume_label29;
508 case 30:
goto resume_label30;
509 case 31:
goto resume_label31;
510 case 32:
goto resume_label32;
511 case 33:
goto resume_label33;
512 case 34:
goto resume_label34;
522 if (in_stop <= in_p) {
529 #define BYTE_ADDR(index) (tr->byte_array + (index))
530 #define WORD_ADDR(index) (tr->word_array + INFO2WORDINDEX(index))
531 #define BL_BASE BYTE_ADDR(BYTE_LOOKUP_BASE(WORD_ADDR(next_table)))
532 #define BL_INFO WORD_ADDR(BYTE_LOOKUP_INFO(WORD_ADDR(next_table)))
533 #define BL_MIN_BYTE (BL_BASE[0])
534 #define BL_MAX_BYTE (BL_BASE[1])
535 #define BL_OFFSET(byte) (BL_BASE[2+(byte)-BL_MIN_BYTE])
536 #define BL_ACTION(byte) (BL_INFO[BL_OFFSET((byte))])
549 const unsigned char *p = inchar_start;
562 case 0x00:
case 0x04:
case 0x08:
case 0x0C:
563 case 0x10:
case 0x14:
case 0x18:
case 0x1C:
565 while (in_p >= in_stop) {
611 const unsigned char *char_start;
613 char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
619 if (
tr->max_output <= out_stop - out_p)
635 const unsigned char *char_start;
638 if (
tr->max_output <= out_stop - out_p) {
639 char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
641 char_start, (
size_t)char_len,
642 out_p, out_stop - out_p);
645 char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
647 char_start, (
size_t)char_len,
659 const unsigned char *char_start;
662 if (
tr->max_output <= out_stop - out_p) {
663 char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
666 out_p, out_stop - out_p);
669 char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
700 discard_len = ((invalid_len - 1) / unitlen) * unitlen;
701 readagain_len = invalid_len - discard_len;
725 if (
tr->finish_func) {
727 if (
tr->max_output <= out_stop - out_p) {
729 out_p, out_stop - out_p);
752 transcode_restartable(
const unsigned char **in_pos,
unsigned char **out_pos,
753 const unsigned char *in_stop,
unsigned char *out_stop,
759 const unsigned char *readagain_pos = readagain_buf;
760 const unsigned char *readagain_stop = readagain_buf + tc->
readagain_len;
766 res = transcode_restartable0(&readagain_pos, out_pos, readagain_stop, out_stop, tc, opt|
ECONV_PARTIAL_INPUT);
769 readagain_pos,
unsigned char, readagain_stop - readagain_pos);
774 return transcode_restartable0(in_pos, out_pos, in_stop, out_stop, tc, opt);
785 if (TRANSCODING_STATE_EMBED_MAX < tr->state_size)
787 if (
tr->state_init_func) {
806 const unsigned char **input_ptr,
const unsigned char *input_stop,
807 unsigned char **output_ptr,
unsigned char *output_stop,
810 return transcode_restartable(
811 input_ptr, output_ptr,
812 input_stop, output_stop,
820 if (
tr->state_fini_func) {
823 if (TRANSCODING_STATE_EMBED_MAX < tr->state_size)
838 if (TRANSCODING_STATE_EMBED_MAX < tr->state_size) {
851 rb_econv_alloc(
int n_hint)
905 ec->
elems[
i].
tc = rb_transcoding_open_by_transcoder(
tr, 0);
933 for (
i = 0;
i <
n;
i++) {
940 ec = rb_econv_alloc(
n);
942 for (
i = 0;
i <
n;
i++) {
944 ret = rb_econv_add_transcoder_at(ec,
tr, ec->
num_trans);
960 trans_open_i(
const char *sname,
const char *dname,
int depth,
void *
arg)
967 toarg->
entries[depth] = get_transcoder_entry(sname, dname);
971 rb_econv_open0(
const char *sname,
const char *dname,
int ecflags)
982 if (*sname ==
'\0' && *dname ==
'\0') {
990 toarg.num_additional = 0;
991 num_trans = transcode_search_path(sname, dname, trans_open_i, (
void *)&toarg);
999 ec = rb_econv_open_by_transcoder_entries(num_trans,
entries);
1004 ec->
flags = ecflags;
1011 #define MAX_ECFLAGS_DECORATORS 32
1014 decorator_names(
int ecflags,
const char **decorators_ret)
1035 decorators_ret[num_decorators++] =
"xml_text_escape";
1037 decorators_ret[num_decorators++] =
"xml_attr_content_escape";
1039 decorators_ret[num_decorators++] =
"xml_attr_quote";
1042 decorators_ret[num_decorators++] =
"crlf_newline";
1044 decorators_ret[num_decorators++] =
"cr_newline";
1046 decorators_ret[num_decorators++] =
"universal_newline";
1048 return num_decorators;
1059 num_decorators = decorator_names(ecflags, decorators);
1060 if (num_decorators == -1)
1067 for (
i = 0;
i < num_decorators;
i++)
1080 const unsigned char **input_ptr,
const unsigned char *input_stop,
1081 unsigned char **output_ptr,
unsigned char *output_stop,
1088 const unsigned char **ipp, *is, *iold;
1089 unsigned char **opp, *os, *oold;
1135 te->
last_result = res = rb_transcoding_convert(te->
tc, ipp, is, opp, os,
f);
1136 if (iold != *ipp || oold != *opp)
1161 const unsigned char **input_ptr,
const unsigned char *input_stop,
1162 unsigned char **output_ptr,
unsigned char *output_stop,
1164 int *result_position_ptr)
1167 int needreport_index;
1170 unsigned char empty_buf;
1171 unsigned char *empty_ptr = &empty_buf;
1174 input_ptr = (
const unsigned char **)&empty_ptr;
1175 input_stop = empty_ptr;
1179 output_ptr = &empty_ptr;
1180 output_stop = empty_ptr;
1194 goto found_needreport;
1201 rb_bug(
"unexpected transcode last result");
1211 res = rb_trans_conv(ec,
NULL,
NULL, output_ptr, output_stop,
1213 result_position_ptr);
1225 needreport_index = trans_sweep(ec, input_ptr, input_stop, output_ptr, output_stop, flags, sweep_start);
1226 sweep_start = needreport_index + 1;
1227 }
while (needreport_index != -1 && needreport_index != ec->
num_trans-1);
1238 if (result_position_ptr)
1239 *result_position_ptr =
i;
1243 if (result_position_ptr)
1244 *result_position_ptr = -1;
1250 const unsigned char **input_ptr,
const unsigned char *input_stop,
1251 unsigned char **output_ptr,
unsigned char *output_stop,
1255 int result_position;
1263 if (output_stop - *output_ptr < ec->in_data_end - ec->
in_data_start) {
1264 len = output_stop - *output_ptr;
1266 *output_ptr = output_stop;
1280 if (output_stop - *output_ptr < input_stop - *input_ptr) {
1281 len = output_stop - *output_ptr;
1284 len = input_stop - *input_ptr;
1287 *(*output_ptr)++ = *(*input_ptr)++;
1294 if (*input_ptr != input_stop)
1306 if (data_start != data_end) {
1308 if (output_stop - *output_ptr < data_end - data_start) {
1309 len = output_stop - *output_ptr;
1311 *output_ptr = output_stop;
1316 len = data_end - data_start;
1336 *input_ptr != input_stop) {
1337 input_stop = *input_ptr;
1338 res = rb_trans_conv(ec, input_ptr, input_stop, output_ptr, output_stop, flags, &result_position);
1344 res = rb_trans_conv(ec, input_ptr, input_stop, output_ptr, output_stop, flags, &result_position);
1349 res = rb_trans_conv(ec, input_ptr, input_stop, output_ptr, output_stop, flags, &result_position);
1370 static int output_replacement_character(
rb_econv_t *ec);
1376 unsigned char utfbuf[1024];
1377 const unsigned char *utf;
1379 int utf_allocated = 0;
1380 char charef_buf[16];
1381 const unsigned char *p;
1390 utfbuf,
sizeof(utfbuf),
1398 if (utf_len % 4 != 0)
1402 while (4 <= utf_len) {
1408 snprintf(charef_buf,
sizeof(charef_buf),
"&#x%X;", u);
1430 const unsigned char **input_ptr,
const unsigned char *input_stop,
1431 unsigned char **output_ptr,
unsigned char *output_stop,
1436 unsigned char empty_buf;
1437 unsigned char *empty_ptr = &empty_buf;
1442 input_ptr = (
const unsigned char **)&empty_ptr;
1443 input_stop = empty_ptr;
1447 output_ptr = &empty_ptr;
1448 output_stop = empty_ptr;
1452 ret = rb_econv_convert0(ec, input_ptr, input_stop, output_ptr, output_stop, flags);
1460 if (output_replacement_character(ec) == 0)
1471 if (output_replacement_character(ec) == 0)
1476 if (output_hex_charref(ec) == 0)
1497 return tr->src_encoding;
1498 return tr->dst_encoding;
1501 static unsigned char *
1502 allocate_converted_string(
const char *sname,
const char *dname,
1503 const unsigned char *
str,
size_t len,
1504 unsigned char *caller_dst_buf,
size_t caller_dst_bufsize,
1505 size_t *dst_len_ptr)
1507 unsigned char *dst_str;
1514 const unsigned char *sp;
1518 dst_bufsize = caller_dst_bufsize;
1528 dst_str = caller_dst_buf;
1530 dst_str =
xmalloc(dst_bufsize);
1533 dp = dst_str+dst_len;
1535 dst_len =
dp - dst_str;
1541 if (dst_str == caller_dst_buf) {
1544 memcpy(tmp, dst_str, dst_bufsize/2);
1548 dst_str =
xrealloc(dst_str, dst_bufsize);
1550 dp = dst_str+dst_len;
1552 dst_len =
dp - dst_str;
1558 *dst_len_ptr = dst_len;
1562 if (dst_str != caller_dst_buf)
1571 const unsigned char *
str,
size_t len,
const char *str_encoding)
1574 unsigned char insert_buf[4096];
1575 const unsigned char *insert_str =
NULL;
1578 int last_trans_index;
1581 unsigned char **buf_start_p;
1582 unsigned char **data_start_p;
1583 unsigned char **data_end_p;
1584 unsigned char **buf_end_p;
1598 insert_str = allocate_converted_string(str_encoding, insert_encoding,
1599 str,
len, insert_buf,
sizeof(insert_buf), &insert_len);
1600 if (insert_str ==
NULL)
1615 tc = ec->
elems[last_trans_index].
tc;
1617 if (need < insert_len)
1619 if (last_trans_index == 0) {
1639 tc = ec->
elems[last_trans_index].
tc;
1642 if (*buf_start_p ==
NULL) {
1645 *data_start_p =
buf;
1647 *buf_end_p =
buf+need;
1649 else if ((
size_t)(*buf_end_p - *data_end_p) < need) {
1650 MEMMOVE(*buf_start_p, *data_start_p,
unsigned char, *data_end_p - *data_start_p);
1651 *data_end_p = *buf_start_p + (*data_end_p - *data_start_p);
1652 *data_start_p = *buf_start_p;
1653 if ((
size_t)(*buf_end_p - *data_end_p) < need) {
1655 size_t s = (*data_end_p - *buf_start_p) + need;
1659 *data_start_p =
buf;
1660 *data_end_p =
buf + (*data_end_p - *buf_start_p);
1662 *buf_end_p =
buf + s;
1666 memcpy(*data_end_p, insert_str, insert_len);
1667 *data_end_p += insert_len;
1674 if (insert_str !=
str && insert_str != insert_buf)
1675 xfree((
void*)insert_str);
1679 if (insert_str !=
str && insert_str != insert_buf)
1680 xfree((
void*)insert_str);
1693 rb_transcoding_close(ec->
elems[
i].
tc);
1729 #if SIZEOF_SIZE_T > SIZEOF_INT
1760 tr = load_transcoder_entry(entry);
1792 return data.ascii_compat_name;
1798 unsigned const char *sp, *se;
1799 unsigned char *ds, *
dp, *de;
1817 unsigned long new_capa = (
unsigned long)dlen +
len + max_output;
1823 sp = (
const unsigned char *)ss;
1829 len -= (
const char *)sp - ss;
1830 ss = (
const char *)sp;
1866 rb_econv_add_converter(
rb_econv_t *ec,
const char *sname,
const char *dname,
int n)
1874 entry = get_transcoder_entry(sname, dname);
1878 tr = load_transcoder_entry(entry);
1881 return rb_econv_add_transcoder_at(ec,
tr,
n);
1885 rb_econv_decorate_at(
rb_econv_t *ec,
const char *decorator_name,
int n)
1887 return rb_econv_add_converter(ec,
"", decorator_name,
n);
1896 return rb_econv_decorate_at(ec, decorator_name, 0);
1902 return rb_econv_decorate_at(ec, decorator_name, 1);
1904 return rb_econv_decorate_at(ec, decorator_name, 0);
1913 return rb_econv_decorate_at(ec, decorator_name, 0);
1919 return rb_econv_decorate_at(ec, decorator_name, ec->
num_trans-1);
1921 return rb_econv_decorate_at(ec, decorator_name, ec->
num_trans);
1927 const char *dname = 0;
1931 dname =
"universal_newline";
1934 dname =
"crlf_newline";
1937 dname =
"cr_newline";
1946 for (
i=0;
i < num_trans;
i++) {
1948 rb_transcoding_close(ec->
elems[
i].
tc);
1961 econv_description(
const char *sname,
const char *dname,
int ecflags,
VALUE mesg)
1963 int has_description = 0;
1968 if (*sname !=
'\0' || *dname !=
'\0') {
1971 else if (*dname ==
'\0')
1975 has_description = 1;
1982 const char *pre =
"";
1983 if (has_description)
2009 has_description = 1;
2011 if (!has_description) {
2023 econv_description(sname, dname, ecflags, mesg);
2048 else if (readagain_len) {
2085 const char *start, *end;
2109 mesg =
rb_sprintf(
"%s to %s in conversion from %s",
2133 unsigned char *(*resize_destination)(
VALUE,
size_t,
size_t),
2135 unsigned char **out_start_ptr,
2136 unsigned char **out_pos,
2137 unsigned char **out_stop_ptr)
2139 size_t len = (*out_pos - *out_start_ptr);
2140 size_t new_len = (
len + max_output) * 2;
2141 *out_start_ptr = resize_destination(destination,
len, new_len);
2142 *out_pos = *out_start_ptr +
len;
2143 *out_stop_ptr = *out_start_ptr + new_len;
2151 const unsigned char *replacement;
2152 const char *repl_enc;
2153 const char *ins_enc;
2165 replacement = (
const unsigned char *)get_replacement_character(ins_enc, &
len, &repl_enc);
2168 replacement = (
unsigned char *)
"?";
2182 const unsigned char *
str,
size_t len,
const char *encname)
2184 unsigned char *str2;
2186 const char *encname2;
2197 str2 = allocate_converted_string(encname, encname2,
str,
len,
NULL, 0, &len2);
2217 if (make_replacement(ec) == -1)
2228 #define hash_fallback rb_hash_aref
2249 transcode_loop(
const unsigned char **in_pos,
unsigned char **out_pos,
2250 const unsigned char *in_stop,
unsigned char *out_stop,
2252 unsigned char *(*resize_destination)(
VALUE,
size_t,
size_t),
2253 const char *src_encoding,
2254 const char *dst_encoding,
2261 unsigned char *out_start = *out_pos;
2297 rep = (*fallback_func)(fallback, rep);
2302 if ((
int)ret == -1) {
2312 exc = make_econv_exception(ec);
2318 more_output_buffer(destination, resize_destination, max_output, &out_start, out_pos, &out_stop);
2328 transcode_loop(
const unsigned char **in_pos,
unsigned char **out_pos,
2329 const unsigned char *in_stop,
unsigned char *out_stop,
2331 unsigned char *(*resize_destination)(
VALUE,
size_t,
size_t),
2332 const char *src_encoding,
2333 const char *dst_encoding,
2340 unsigned char *out_start = *out_pos;
2341 const unsigned char *
ptr;
2355 unsigned char input_byte;
2356 const unsigned char *p = &input_byte;
2359 if (
ptr < in_stop) {
2370 if (&input_byte != p)
2371 ptr += p - &input_byte;
2376 exc = make_econv_exception(ec);
2382 more_output_buffer(destination, resize_destination, max_output, &out_start, out_pos, &out_stop);
2403 static unsigned char *
2404 str_transcoding_resize(
VALUE destination,
size_t len,
size_t new_len)
2411 econv_opts(
VALUE opt,
int ecflags)
2418 else if (
v==sym_replace) {
2428 else if (
v==sym_replace) {
2445 else if (
v==sym_attr) {
2456 #ifdef ENABLE_ECONV_NEWLINE_OPTION
2460 if (
v == sym_universal) {
2463 else if (
v == sym_crlf) {
2466 else if (
v == sym_cr) {
2469 else if (
v == sym_lf) {
2483 int setflags = 0, newlineflag = 0;
2488 newlineflag |= !
NIL_P(
v);
2493 newlineflag |= !
NIL_P(
v);
2498 newlineflag |= !
NIL_P(
v);
2502 ecflags |= setflags;
2515 if (
NIL_P(opthash)) {
2519 ecflags = econv_opts(opthash, ecflags);
2547 if (!
NIL_P(newhash))
2566 if (
NIL_P(opthash)) {
2571 rb_bug(
"rb_econv_open_opts called with invalid opthash");
2575 ec =
rb_econv_open(source_encoding, destination_encoding, ecflags);
2579 if (!
NIL_P(replacement)) {
2625 const char *sname, *dname;
2626 int sencidx, dencidx;
2628 dencidx = enc_arg(arg1, &dname, &denc);
2636 sencidx = enc_arg(arg2, &sname, &senc);
2653 unsigned char *
buf, *
bp, *sp;
2654 const unsigned char *fromp;
2656 const char *sname, *dname;
2658 int explicitly_invalid_replace =
TRUE;
2665 if (!ecflags)
return -1;
2669 explicitly_invalid_replace =
FALSE;
2677 dencidx = str_transcode_enc_args(
str, &arg1, &arg2, &sname, &senc, &dname, &denc);
2683 if (senc && senc == denc) {
2686 if (!
NIL_P(ecopts)) {
2694 return NIL_P(arg2) ? -1 : dencidx;
2702 return NIL_P(arg2) ? -1 : dencidx;
2718 transcode_loop(&fromp, &
bp, (sp+slen), (
bp+blen), dest, str_transcoding_resize, sname, dname, ecflags, ecopts);
2719 if (fromp != sp+slen) {
2748 return str_transcode0(
argc,
argv,
self, ecflags, ecopts);
2752 str_encode_associate(
VALUE str,
int encidx)
2792 encidx = str_transcode(
argc,
argv, &newstr);
2794 if (encidx < 0)
return str;
2795 if (newstr ==
str) {
2800 return str_encode_associate(
str, encidx);
2865 int encidx = str_transcode(
argc,
argv, &newstr);
2866 return encoded_dup(newstr,
str, encidx);
2875 int encidx = str_transcode0(
argc,
argv, &newstr, ecflags, ecopts);
2876 return encoded_dup(newstr,
str, encidx);
2883 if (newstr ==
str) {
2891 return str_encode_associate(newstr, encidx);
2900 econv_free(
void *
ptr)
2907 econv_memsize(
const void *
ptr)
2914 {
NULL, econv_free, econv_memsize,},
2925 make_dummy_encoding(
const char *
name)
2935 make_encoding(
const char *
name)
2940 enc = make_dummy_encoding(
name);
2945 make_encobj(
const char *
name)
2971 const char *arg_name, *result_name;
2974 enc_arg(&
arg, &arg_name, &arg_enc);
2978 if (result_name ==
NULL)
2981 result_enc = make_encoding(result_name);
2989 const char **sname_p,
const char **dname_p,
2994 VALUE opt, flags_v, ecopts;
2996 const char *sname, *dname;
3002 if (!
NIL_P(flags_v)) {
3009 else if (!
NIL_P(opt)) {
3042 *ecflags_p = ecflags;
3047 decorate_convpath(
VALUE convpath,
int ecflags)
3054 num_decorators = decorator_names(ecflags, decorators);
3055 if (num_decorators == -1)
3079 for (
i = 0;
i < num_decorators;
i++)
3086 search_convpath_i(
const char *sname,
const char *dname,
int depth,
void *
arg)
3091 if (*ary_p ==
Qnil) {
3132 VALUE snamev, dnamev;
3133 const char *sname, *dname;
3139 econv_args(
argc,
argv, &snamev, &dnamev, &sname, &dname, &senc, &denc, &ecflags, &ecopts);
3142 transcode_search_path(sname, dname, search_convpath_i, &convpath);
3144 if (
NIL_P(convpath)) {
3151 if (decorate_convpath(convpath, ecflags) == -1) {
3170 transcode_search_path(from_encoding, to_encoding, search_convpath_i,
3172 return RTEST(convpath);
3182 rb_econv_init_by_convpath_i(
const char *sname,
const char *dname,
int depth,
void *
arg)
3190 ret = rb_econv_add_converter(a->
ec, sname, dname, a->
index);
3197 rb_econv_init_by_convpath(
VALUE self,
VALUE convpath,
3198 const char **sname_p,
const char **dname_p,
3206 const char *sname, *dname;
3212 VALUE snamev, dnamev;
3219 enc_arg(&snamev, &sname, &senc);
3221 enc_arg(&dnamev, &dname, &denc);
3242 ret = transcode_search_path(sname, dname, rb_econv_init_by_convpath_i, &
arg);
3243 if (
ret == -1 ||
arg.ret == -1) {
3244 VALUE msg =
rb_sprintf(
"adding conversion failed: %s to %s", sname, dname);
3382 VALUE snamev, dnamev;
3383 const char *sname, *dname;
3394 ec = rb_econv_init_by_convpath(
self, convpath, &sname, &dname, &senc, &denc);
3399 econv_args(
argc,
argv, &snamev, &dnamev, &sname, &dname, &senc, &denc, &ecflags, &ecopts);
3412 senc = make_dummy_encoding(sname);
3414 denc = make_dummy_encoding(dname);
3438 econv_inspect(
VALUE self)
3445 return rb_sprintf(
"#<%s: uninitialized>", cname);
3451 econv_description(sname, dname,
ec->
flags,
str);
3458 check_econv(
VALUE self)
3476 econv_source_encoding(
VALUE self)
3491 econv_destination_encoding(
VALUE self)
3522 econv_convpath(
VALUE self)
3692 VALUE input, output, output_byteoffset_v, output_bytesize_v, opt, flags_v;
3695 const unsigned char *ip, *is;
3696 unsigned char *op, *os;
3697 long output_byteoffset, output_bytesize;
3698 unsigned long output_byteend;
3703 if (
NIL_P(output_byteoffset_v))
3704 output_byteoffset = 0;
3706 output_byteoffset =
NUM2LONG(output_byteoffset_v);
3708 if (
NIL_P(output_bytesize_v))
3709 output_bytesize = 0;
3711 output_bytesize =
NUM2LONG(output_bytesize_v);
3713 if (!
NIL_P(flags_v)) {
3719 else if (!
NIL_P(opt)) {
3738 if (
NIL_P(output_bytesize_v)) {
3746 if (
NIL_P(output_byteoffset_v))
3749 if (output_byteoffset < 0)
3755 if (output_bytesize < 0)
3758 output_byteend = (
unsigned long)output_byteoffset +
3759 (
unsigned long)output_bytesize;
3761 if (output_byteend < (
unsigned long)output_byteoffset ||
3776 op = (
unsigned char *)
RSTRING_PTR(output) + output_byteoffset;
3777 os = op + output_bytesize;
3786 if (
LONG_MAX / 2 < output_bytesize)
3788 output_bytesize *= 2;
3789 output_byteoffset_v =
Qnil;
3797 return econv_result_to_symbol(res);
3835 econv_convert(
VALUE self,
VALUE source_string)
3853 ret = econv_primitive_convert(ac, av,
self);
3855 if (
ret == sym_invalid_byte_sequence ||
3856 ret == sym_undefined_conversion ||
3857 ret == sym_incomplete_input) {
3862 if (
ret == sym_finished) {
3866 if (
ret != sym_source_buffer_empty) {
3867 rb_bug(
"unexpected result of econv_primitive_convert");
3885 econv_finish(
VALUE self)
3901 ret = econv_primitive_convert(ac, av,
self);
3903 if (
ret == sym_invalid_byte_sequence ||
3904 ret == sym_undefined_conversion ||
3905 ret == sym_incomplete_input) {
3910 if (
ret != sym_finished) {
3911 rb_bug(
"unexpected result of econv_primitive_convert");
3993 econv_primitive_errinfo(
VALUE self)
4051 econv_insert_output(
VALUE self,
VALUE string)
4053 const char *insert_enc;
4109 if (putbackable <
n)
4144 econv_last_error(
VALUE self)
4149 exc = make_econv_exception(
ec);
4168 econv_get_replacement(
VALUE self)
4174 ret = make_replacement(
ec);
4176 rb_raise(rb_eUndefinedConversionError,
"replacement character setup failed");
4211 rb_raise(rb_eUndefinedConversionError,
"replacement character setup failed");
4220 return make_econv_exception(
ec);
4228 exc = make_econv_exception(
ec);
4241 ecerr_source_encoding_name(
VALUE self)
4267 ecerr_source_encoding(
VALUE self)
4279 ecerr_destination_encoding_name(
VALUE self)
4291 ecerr_destination_encoding(
VALUE self)
4312 ecerr_error_char(
VALUE self)
4333 ecerr_error_bytes(
VALUE self)
4345 ecerr_readagain_bytes(
VALUE self)
4375 ecerr_incomplete_input(
VALUE self)
4416 sym_invalid_byte_sequence =
ID2SYM(
rb_intern(
"invalid_byte_sequence"));
4418 sym_destination_buffer_full =
ID2SYM(
rb_intern(
"destination_buffer_full"));
4428 #ifdef ENABLE_ECONV_NEWLINE_OPTION
4552 rb_define_method(rb_eUndefinedConversionError,
"source_encoding_name", ecerr_source_encoding_name, 0);
4553 rb_define_method(rb_eUndefinedConversionError,
"destination_encoding_name", ecerr_destination_encoding_name, 0);
4554 rb_define_method(rb_eUndefinedConversionError,
"source_encoding", ecerr_source_encoding, 0);
4555 rb_define_method(rb_eUndefinedConversionError,
"destination_encoding", ecerr_destination_encoding, 0);
4556 rb_define_method(rb_eUndefinedConversionError,
"error_char", ecerr_error_char, 0);
4558 rb_define_method(rb_eInvalidByteSequenceError,
"source_encoding_name", ecerr_source_encoding_name, 0);
4559 rb_define_method(rb_eInvalidByteSequenceError,
"destination_encoding_name", ecerr_destination_encoding_name, 0);
4560 rb_define_method(rb_eInvalidByteSequenceError,
"source_encoding", ecerr_source_encoding, 0);
4561 rb_define_method(rb_eInvalidByteSequenceError,
"destination_encoding", ecerr_destination_encoding, 0);
4562 rb_define_method(rb_eInvalidByteSequenceError,
"error_bytes", ecerr_error_bytes, 0);
4563 rb_define_method(rb_eInvalidByteSequenceError,
"readagain_bytes", ecerr_readagain_bytes, 0);
4564 rb_define_method(rb_eInvalidByteSequenceError,
"incomplete_input?", ecerr_incomplete_input, 0);