17 #define ENABLE_ECONV_NEWLINE_OPTION 1
20 static VALUE rb_eUndefinedConversionError;
21 static VALUE rb_eInvalidByteSequenceError;
22 static VALUE rb_eConverterNotFoundError;
26 static VALUE sym_invalid, sym_undef, sym_replace, sym_fallback, sym_aref;
27 static VALUE sym_xml, sym_text, sym_attr;
28 static VALUE sym_universal_newline;
29 static VALUE sym_crlf_newline;
30 static VALUE sym_cr_newline;
31 #ifdef ENABLE_ECONV_NEWLINE_OPTION
32 static VALUE sym_newline, sym_universal, sym_crlf, sym_cr, sym_lf;
34 static VALUE sym_partial_input;
36 static VALUE sym_invalid_byte_sequence;
37 static VALUE sym_undefined_conversion;
38 static VALUE sym_destination_buffer_full;
39 static VALUE sym_source_buffer_empty;
40 static VALUE sym_finished;
41 static VALUE sym_after_output;
42 static VALUE sym_incomplete_input;
44 static unsigned char *
45 allocate_converted_string(
const char *sname,
const char *dname,
46 const unsigned char *
str,
size_t len,
47 unsigned char *caller_dst_buf,
size_t caller_dst_bufsize,
83 #define TRANSCODING_READBUF(tc) \
84 ((tc)->transcoder->max_input <= (int)sizeof((tc)->readbuf.ary) ? \
87 #define TRANSCODING_WRITEBUF(tc) \
88 ((tc)->transcoder->max_output <= (int)sizeof((tc)->writebuf.ary) ? \
89 (tc)->writebuf.ary : \
91 #define TRANSCODING_WRITEBUF_SIZE(tc) \
92 ((tc)->transcoder->max_output <= (int)sizeof((tc)->writebuf.ary) ? \
93 sizeof((tc)->writebuf.ary) : \
94 (size_t)(tc)->transcoder->max_output)
95 #define TRANSCODING_STATE_EMBED_MAX ((int)sizeof(union rb_transcoding_state_t))
96 #define TRANSCODING_STATE(tc) \
97 ((tc)->transcoder->state_size <= (int)sizeof((tc)->state) ? \
153 #define DECORATOR_P(sname, dname) (*(sname) == '\0')
165 make_transcoder_entry(
const char *sname,
const char *dname)
177 entry->
sname = sname;
178 entry->
dname = dname;
188 get_transcoder_entry(
const char *sname,
const char *dname)
206 const char *
const sname =
tr->src_encoding;
207 const char *
const dname =
tr->dst_encoding;
211 entry = make_transcoder_entry(sname, dname);
221 declare_transcoder(
const char *sname,
const char *dname,
const char *lib)
225 entry = make_transcoder_entry(sname, dname);
229 static const char transcoder_lib_prefix[] =
"enc/trans/";
237 declare_transcoder(enc1, enc2, lib);
240 #define encoding_equal(enc1, enc2) (STRCASECMP((enc1), (enc2)) == 0)
257 const char *dname = (
const char *)
key;
276 transcode_search_path(
const char *sname,
const char *dname,
277 void (*callback)(
const char *sname,
const char *dname,
int depth,
void *
arg),
334 const char *enc = dname;
342 enc = (
const char *)val;
350 callback((
const char *)val, enc, --depth,
arg);
351 enc = (
const char *)val;
367 const char *
const lib = entry->
lib;
369 const size_t total_len =
sizeof(transcoder_lib_prefix) - 1 +
len;
373 memcpy(
path, transcoder_lib_prefix,
sizeof(transcoder_lib_prefix) - 1);
387 get_replacement_character(
const char *encname,
size_t *len_ret,
const char **repl_encname_ptr)
391 *repl_encname_ptr =
"UTF-8";
392 return "\xEF\xBF\xBD";
396 *repl_encname_ptr =
"US-ASCII";
405 static const unsigned char *
407 const unsigned char *in_start,
408 const unsigned char *inchar_start,
409 const unsigned char *in_p,
410 size_t *char_len_ptr)
412 const unsigned char *
ptr;
413 if (inchar_start - in_start < tc->recognized_len) {
415 inchar_start,
unsigned char, in_p - inchar_start);
426 transcode_restartable0(
const unsigned char **in_pos,
unsigned char **out_pos,
427 const unsigned char *in_stop,
unsigned char *out_stop,
432 int unitlen =
tr->input_unit_length;
435 const unsigned char *inchar_start;
436 const unsigned char *in_p;
438 unsigned char *out_p;
440 in_p = inchar_start = *in_pos;
444 #define SUSPEND(ret, num) \
446 tc->resume_position = (num); \
447 if (0 < in_p - inchar_start) \
448 MEMMOVE(TRANSCODING_READBUF(tc)+tc->recognized_len, \
449 inchar_start, unsigned char, in_p - inchar_start); \
452 tc->recognized_len += in_p - inchar_start; \
453 if (readagain_len) { \
454 tc->recognized_len -= readagain_len; \
455 tc->readagain_len = readagain_len; \
458 resume_label ## num:; \
460 #define SUSPEND_OBUF(num) \
462 while (out_stop - out_p < 1) { SUSPEND(econv_destination_buffer_full, num); } \
465 #define SUSPEND_AFTER_OUTPUT(num) \
466 if ((opt & ECONV_AFTER_OUTPUT) && *out_pos != out_p) { \
467 SUSPEND(econv_after_output, num); \
470 #define next_table (tc->next_table)
471 #define next_info (tc->next_info)
472 #define next_byte (tc->next_byte)
473 #define writebuf_len (tc->writebuf_len)
474 #define writebuf_off (tc->writebuf_off)
478 case 1:
goto resume_label1;
479 case 2:
goto resume_label2;
480 case 3:
goto resume_label3;
481 case 4:
goto resume_label4;
482 case 5:
goto resume_label5;
483 case 6:
goto resume_label6;
484 case 7:
goto resume_label7;
485 case 8:
goto resume_label8;
486 case 9:
goto resume_label9;
487 case 10:
goto resume_label10;
488 case 11:
goto resume_label11;
489 case 12:
goto resume_label12;
490 case 13:
goto resume_label13;
491 case 14:
goto resume_label14;
492 case 15:
goto resume_label15;
493 case 16:
goto resume_label16;
494 case 17:
goto resume_label17;
495 case 18:
goto resume_label18;
496 case 19:
goto resume_label19;
497 case 20:
goto resume_label20;
498 case 21:
goto resume_label21;
499 case 22:
goto resume_label22;
500 case 23:
goto resume_label23;
501 case 24:
goto resume_label24;
502 case 25:
goto resume_label25;
503 case 26:
goto resume_label26;
504 case 27:
goto resume_label27;
505 case 28:
goto resume_label28;
506 case 29:
goto resume_label29;
507 case 30:
goto resume_label30;
508 case 31:
goto resume_label31;
509 case 32:
goto resume_label32;
510 case 33:
goto resume_label33;
511 case 34:
goto resume_label34;
521 if (in_stop <= in_p) {
528 #define BYTE_ADDR(index) (tr->byte_array + (index))
529 #define WORD_ADDR(index) (tr->word_array + INFO2WORDINDEX(index))
530 #define BL_BASE BYTE_ADDR(BYTE_LOOKUP_BASE(WORD_ADDR(next_table)))
531 #define BL_INFO WORD_ADDR(BYTE_LOOKUP_INFO(WORD_ADDR(next_table)))
532 #define BL_MIN_BYTE (BL_BASE[0])
533 #define BL_MAX_BYTE (BL_BASE[1])
534 #define BL_OFFSET(byte) (BL_BASE[2+(byte)-BL_MIN_BYTE])
535 #define BL_ACTION(byte) (BL_INFO[BL_OFFSET((byte))])
548 const unsigned char *p = inchar_start;
561 case 0x00:
case 0x04:
case 0x08:
case 0x0C:
562 case 0x10:
case 0x14:
case 0x18:
case 0x1C:
564 while (in_p >= in_stop) {
610 const unsigned char *char_start;
612 char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
618 if (
tr->max_output <= out_stop - out_p)
634 const unsigned char *char_start;
637 if (
tr->max_output <= out_stop - out_p) {
638 char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
640 char_start, (
size_t)char_len,
641 out_p, out_stop - out_p);
644 char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
646 char_start, (
size_t)char_len,
658 const unsigned char *char_start;
661 if (
tr->max_output <= out_stop - out_p) {
662 char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
665 out_p, out_stop - out_p);
668 char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
699 discard_len = ((invalid_len - 1) / unitlen) * unitlen;
700 readagain_len = invalid_len - discard_len;
724 if (
tr->finish_func) {
726 if (
tr->max_output <= out_stop - out_p) {
728 out_p, out_stop - out_p);
751 transcode_restartable(
const unsigned char **in_pos,
unsigned char **out_pos,
752 const unsigned char *in_stop,
unsigned char *out_stop,
758 const unsigned char *readagain_pos = readagain_buf;
759 const unsigned char *readagain_stop = readagain_buf + tc->
readagain_len;
765 res = transcode_restartable0(&readagain_pos, out_pos, readagain_stop, out_stop, tc, opt|
ECONV_PARTIAL_INPUT);
768 readagain_pos,
unsigned char, readagain_stop - readagain_pos);
773 return transcode_restartable0(in_pos, out_pos, in_stop, out_stop, tc, opt);
784 if (TRANSCODING_STATE_EMBED_MAX < tr->state_size)
786 if (
tr->state_init_func) {
805 const unsigned char **input_ptr,
const unsigned char *input_stop,
806 unsigned char **output_ptr,
unsigned char *output_stop,
809 return transcode_restartable(
810 input_ptr, output_ptr,
811 input_stop, output_stop,
819 if (
tr->state_fini_func) {
822 if (TRANSCODING_STATE_EMBED_MAX < tr->state_size)
837 if (TRANSCODING_STATE_EMBED_MAX < tr->state_size) {
850 rb_econv_alloc(
int n_hint)
904 ec->
elems[
i].
tc = rb_transcoding_open_by_transcoder(
tr, 0);
932 for (
i = 0;
i <
n;
i++) {
939 ec = rb_econv_alloc(
n);
941 for (
i = 0;
i <
n;
i++) {
943 ret = rb_econv_add_transcoder_at(ec,
tr, ec->
num_trans);
959 trans_open_i(
const char *sname,
const char *dname,
int depth,
void *
arg)
966 toarg->
entries[depth] = get_transcoder_entry(sname, dname);
970 rb_econv_open0(
const char *sname,
const char *dname,
int ecflags)
981 if (*sname ==
'\0' && *dname ==
'\0') {
989 toarg.num_additional = 0;
990 num_trans = transcode_search_path(sname, dname, trans_open_i, (
void *)&toarg);
998 ec = rb_econv_open_by_transcoder_entries(num_trans,
entries);
1003 ec->
flags = ecflags;
1010 #define MAX_ECFLAGS_DECORATORS 32
1013 decorator_names(
int ecflags,
const char **decorators_ret)
1034 decorators_ret[num_decorators++] =
"xml_text_escape";
1036 decorators_ret[num_decorators++] =
"xml_attr_content_escape";
1038 decorators_ret[num_decorators++] =
"xml_attr_quote";
1041 decorators_ret[num_decorators++] =
"crlf_newline";
1043 decorators_ret[num_decorators++] =
"cr_newline";
1045 decorators_ret[num_decorators++] =
"universal_newline";
1047 return num_decorators;
1058 num_decorators = decorator_names(ecflags, decorators);
1059 if (num_decorators == -1)
1066 for (
i = 0;
i < num_decorators;
i++)
1079 const unsigned char **input_ptr,
const unsigned char *input_stop,
1080 unsigned char **output_ptr,
unsigned char *output_stop,
1087 const unsigned char **ipp, *is, *iold;
1088 unsigned char **opp, *os, *oold;
1134 te->
last_result = res = rb_transcoding_convert(te->
tc, ipp, is, opp, os,
f);
1135 if (iold != *ipp || oold != *opp)
1160 const unsigned char **input_ptr,
const unsigned char *input_stop,
1161 unsigned char **output_ptr,
unsigned char *output_stop,
1163 int *result_position_ptr)
1166 int needreport_index;
1169 unsigned char empty_buf;
1170 unsigned char *empty_ptr = &empty_buf;
1173 input_ptr = (
const unsigned char **)&empty_ptr;
1174 input_stop = empty_ptr;
1178 output_ptr = &empty_ptr;
1179 output_stop = empty_ptr;
1193 goto found_needreport;
1200 rb_bug(
"unexpected transcode last result");
1210 res = rb_trans_conv(ec,
NULL,
NULL, output_ptr, output_stop,
1212 result_position_ptr);
1224 needreport_index = trans_sweep(ec, input_ptr, input_stop, output_ptr, output_stop, flags, sweep_start);
1225 sweep_start = needreport_index + 1;
1226 }
while (needreport_index != -1 && needreport_index != ec->
num_trans-1);
1237 if (result_position_ptr)
1238 *result_position_ptr =
i;
1242 if (result_position_ptr)
1243 *result_position_ptr = -1;
1249 const unsigned char **input_ptr,
const unsigned char *input_stop,
1250 unsigned char **output_ptr,
unsigned char *output_stop,
1254 int result_position;
1262 if (output_stop - *output_ptr < ec->in_data_end - ec->
in_data_start) {
1263 len = output_stop - *output_ptr;
1265 *output_ptr = output_stop;
1279 if (output_stop - *output_ptr < input_stop - *input_ptr) {
1280 len = output_stop - *output_ptr;
1283 len = input_stop - *input_ptr;
1286 *(*output_ptr)++ = *(*input_ptr)++;
1293 if (*input_ptr != input_stop)
1305 if (data_start != data_end) {
1307 if (output_stop - *output_ptr < data_end - data_start) {
1308 len = output_stop - *output_ptr;
1310 *output_ptr = output_stop;
1315 len = data_end - data_start;
1335 *input_ptr != input_stop) {
1336 input_stop = *input_ptr;
1337 res = rb_trans_conv(ec, input_ptr, input_stop, output_ptr, output_stop, flags, &result_position);
1343 res = rb_trans_conv(ec, input_ptr, input_stop, output_ptr, output_stop, flags, &result_position);
1348 res = rb_trans_conv(ec, input_ptr, input_stop, output_ptr, output_stop, flags, &result_position);
1369 static int output_replacement_character(
rb_econv_t *ec);
1375 unsigned char utfbuf[1024];
1376 const unsigned char *utf;
1378 int utf_allocated = 0;
1379 char charef_buf[16];
1380 const unsigned char *p;
1389 utfbuf,
sizeof(utfbuf),
1397 if (utf_len % 4 != 0)
1401 while (4 <= utf_len) {
1407 snprintf(charef_buf,
sizeof(charef_buf),
"&#x%X;", u);
1429 const unsigned char **input_ptr,
const unsigned char *input_stop,
1430 unsigned char **output_ptr,
unsigned char *output_stop,
1435 unsigned char empty_buf;
1436 unsigned char *empty_ptr = &empty_buf;
1441 input_ptr = (
const unsigned char **)&empty_ptr;
1442 input_stop = empty_ptr;
1446 output_ptr = &empty_ptr;
1447 output_stop = empty_ptr;
1451 ret = rb_econv_convert0(ec, input_ptr, input_stop, output_ptr, output_stop, flags);
1459 if (output_replacement_character(ec) == 0)
1470 if (output_replacement_character(ec) == 0)
1475 if (output_hex_charref(ec) == 0)
1496 return tr->src_encoding;
1497 return tr->dst_encoding;
1500 static unsigned char *
1501 allocate_converted_string(
const char *sname,
const char *dname,
1502 const unsigned char *
str,
size_t len,
1503 unsigned char *caller_dst_buf,
size_t caller_dst_bufsize,
1504 size_t *dst_len_ptr)
1506 unsigned char *dst_str;
1513 const unsigned char *sp;
1517 dst_bufsize = caller_dst_bufsize;
1527 dst_str = caller_dst_buf;
1529 dst_str =
xmalloc(dst_bufsize);
1532 dp = dst_str+dst_len;
1534 dst_len =
dp - dst_str;
1540 if (dst_str == caller_dst_buf) {
1543 memcpy(tmp, dst_str, dst_bufsize/2);
1547 dst_str =
xrealloc(dst_str, dst_bufsize);
1549 dp = dst_str+dst_len;
1551 dst_len =
dp - dst_str;
1557 *dst_len_ptr = dst_len;
1561 if (dst_str != caller_dst_buf)
1570 const unsigned char *
str,
size_t len,
const char *str_encoding)
1573 unsigned char insert_buf[4096];
1574 const unsigned char *insert_str =
NULL;
1577 int last_trans_index;
1580 unsigned char **buf_start_p;
1581 unsigned char **data_start_p;
1582 unsigned char **data_end_p;
1583 unsigned char **buf_end_p;
1597 insert_str = allocate_converted_string(str_encoding, insert_encoding,
1598 str,
len, insert_buf,
sizeof(insert_buf), &insert_len);
1599 if (insert_str ==
NULL)
1614 tc = ec->
elems[last_trans_index].
tc;
1616 if (need < insert_len)
1618 if (last_trans_index == 0) {
1638 tc = ec->
elems[last_trans_index].
tc;
1641 if (*buf_start_p ==
NULL) {
1644 *data_start_p =
buf;
1646 *buf_end_p =
buf+need;
1648 else if ((
size_t)(*buf_end_p - *data_end_p) < need) {
1649 MEMMOVE(*buf_start_p, *data_start_p,
unsigned char, *data_end_p - *data_start_p);
1650 *data_end_p = *buf_start_p + (*data_end_p - *data_start_p);
1651 *data_start_p = *buf_start_p;
1652 if ((
size_t)(*buf_end_p - *data_end_p) < need) {
1654 size_t s = (*data_end_p - *buf_start_p) + need;
1658 *data_start_p =
buf;
1659 *data_end_p =
buf + (*data_end_p - *buf_start_p);
1661 *buf_end_p =
buf + s;
1665 memcpy(*data_end_p, insert_str, insert_len);
1666 *data_end_p += insert_len;
1673 if (insert_str !=
str && insert_str != insert_buf)
1674 xfree((
void*)insert_str);
1678 if (insert_str !=
str && insert_str != insert_buf)
1679 xfree((
void*)insert_str);
1692 rb_transcoding_close(ec->
elems[
i].
tc);
1728 #if SIZEOF_SIZE_T > SIZEOF_INT
1759 tr = load_transcoder_entry(entry);
1791 return data.ascii_compat_name;
1797 unsigned const char *sp, *se;
1798 unsigned char *ds, *
dp, *de;
1816 unsigned long new_capa = (
unsigned long)dlen +
len + max_output;
1822 sp = (
const unsigned char *)ss;
1828 len -= (
const char *)sp - ss;
1829 ss = (
const char *)sp;
1865 rb_econv_add_converter(
rb_econv_t *ec,
const char *sname,
const char *dname,
int n)
1873 entry = get_transcoder_entry(sname, dname);
1877 tr = load_transcoder_entry(entry);
1880 return rb_econv_add_transcoder_at(ec,
tr,
n);
1884 rb_econv_decorate_at(
rb_econv_t *ec,
const char *decorator_name,
int n)
1886 return rb_econv_add_converter(ec,
"", decorator_name,
n);
1895 return rb_econv_decorate_at(ec, decorator_name, 0);
1901 return rb_econv_decorate_at(ec, decorator_name, 1);
1903 return rb_econv_decorate_at(ec, decorator_name, 0);
1912 return rb_econv_decorate_at(ec, decorator_name, 0);
1918 return rb_econv_decorate_at(ec, decorator_name, ec->
num_trans-1);
1920 return rb_econv_decorate_at(ec, decorator_name, ec->
num_trans);
1926 const char *dname = 0;
1930 dname =
"universal_newline";
1933 dname =
"crlf_newline";
1936 dname =
"cr_newline";
1945 for (
i=0;
i < num_trans;
i++) {
1947 rb_transcoding_close(ec->
elems[
i].
tc);
1960 econv_description(
const char *sname,
const char *dname,
int ecflags,
VALUE mesg)
1962 int has_description = 0;
1967 if (*sname !=
'\0' || *dname !=
'\0') {
1970 else if (*dname ==
'\0')
1974 has_description = 1;
1981 const char *pre =
"";
1982 if (has_description)
2008 has_description = 1;
2010 if (!has_description) {
2022 econv_description(sname, dname, ecflags, mesg);
2047 else if (readagain_len) {
2084 const char *start, *end;
2108 mesg =
rb_sprintf(
"%s to %s in conversion from %s",
2132 unsigned char *(*resize_destination)(
VALUE,
size_t,
size_t),
2134 unsigned char **out_start_ptr,
2135 unsigned char **out_pos,
2136 unsigned char **out_stop_ptr)
2138 size_t len = (*out_pos - *out_start_ptr);
2139 size_t new_len = (
len + max_output) * 2;
2140 *out_start_ptr = resize_destination(destination,
len, new_len);
2141 *out_pos = *out_start_ptr +
len;
2142 *out_stop_ptr = *out_start_ptr + new_len;
2150 const unsigned char *replacement;
2151 const char *repl_enc;
2152 const char *ins_enc;
2164 replacement = (
const unsigned char *)get_replacement_character(ins_enc, &
len, &repl_enc);
2167 replacement = (
unsigned char *)
"?";
2181 const unsigned char *
str,
size_t len,
const char *encname)
2183 unsigned char *str2;
2185 const char *encname2;
2196 str2 = allocate_converted_string(encname, encname2,
str,
len,
NULL, 0, &len2);
2216 if (make_replacement(ec) == -1)
2227 #define hash_fallback rb_hash_aref
2248 transcode_loop(
const unsigned char **in_pos,
unsigned char **out_pos,
2249 const unsigned char *in_stop,
unsigned char *out_stop,
2251 unsigned char *(*resize_destination)(
VALUE,
size_t,
size_t),
2252 const char *src_encoding,
2253 const char *dst_encoding,
2260 unsigned char *out_start = *out_pos;
2296 rep = (*fallback_func)(fallback, rep);
2301 if ((
int)ret == -1) {
2311 exc = make_econv_exception(ec);
2317 more_output_buffer(destination, resize_destination, max_output, &out_start, out_pos, &out_stop);
2327 transcode_loop(
const unsigned char **in_pos,
unsigned char **out_pos,
2328 const unsigned char *in_stop,
unsigned char *out_stop,
2330 unsigned char *(*resize_destination)(
VALUE,
size_t,
size_t),
2331 const char *src_encoding,
2332 const char *dst_encoding,
2339 unsigned char *out_start = *out_pos;
2340 const unsigned char *
ptr;
2354 unsigned char input_byte;
2355 const unsigned char *p = &input_byte;
2358 if (
ptr < in_stop) {
2369 if (&input_byte != p)
2370 ptr += p - &input_byte;
2375 exc = make_econv_exception(ec);
2381 more_output_buffer(destination, resize_destination, max_output, &out_start, out_pos, &out_stop);
2402 static unsigned char *
2403 str_transcoding_resize(
VALUE destination,
size_t len,
size_t new_len)
2410 econv_opts(
VALUE opt,
int ecflags)
2417 else if (
v==sym_replace) {
2427 else if (
v==sym_replace) {
2444 else if (
v==sym_attr) {
2455 #ifdef ENABLE_ECONV_NEWLINE_OPTION
2459 if (
v == sym_universal) {
2462 else if (
v == sym_crlf) {
2465 else if (
v == sym_cr) {
2468 else if (
v == sym_lf) {
2482 int setflags = 0, newlineflag = 0;
2487 newlineflag |= !
NIL_P(
v);
2492 newlineflag |= !
NIL_P(
v);
2497 newlineflag |= !
NIL_P(
v);
2501 ecflags |= setflags;
2514 if (
NIL_P(opthash)) {
2518 ecflags = econv_opts(opthash, ecflags);
2546 if (!
NIL_P(newhash))
2565 if (
NIL_P(opthash)) {
2570 rb_bug(
"rb_econv_open_opts called with invalid opthash");
2574 ec =
rb_econv_open(source_encoding, destination_encoding, ecflags);
2578 if (!
NIL_P(replacement)) {
2624 const char *sname, *dname;
2625 int sencidx, dencidx;
2627 dencidx = enc_arg(arg1, &dname, &denc);
2635 sencidx = enc_arg(arg2, &sname, &senc);
2652 unsigned char *
buf, *
bp, *sp;
2653 const unsigned char *fromp;
2655 const char *sname, *dname;
2657 int explicitly_invalid_replace =
TRUE;
2664 if (!ecflags)
return -1;
2668 explicitly_invalid_replace =
FALSE;
2676 dencidx = str_transcode_enc_args(
str, &arg1, &arg2, &sname, &senc, &dname, &denc);
2682 if (senc && senc == denc) {
2685 if (!
NIL_P(ecopts)) {
2693 return NIL_P(arg2) ? -1 : dencidx;
2701 return NIL_P(arg2) ? -1 : dencidx;
2717 transcode_loop(&fromp, &
bp, (sp+slen), (
bp+blen), dest, str_transcoding_resize, sname, dname, ecflags, ecopts);
2718 if (fromp != sp+slen) {
2747 return str_transcode0(
argc,
argv,
self, ecflags, ecopts);
2751 str_encode_associate(
VALUE str,
int encidx)
2791 encidx = str_transcode(
argc,
argv, &newstr);
2793 if (encidx < 0)
return str;
2794 if (newstr ==
str) {
2799 return str_encode_associate(
str, encidx);
2864 int encidx = str_transcode(
argc,
argv, &newstr);
2865 return encoded_dup(newstr,
str, encidx);
2874 int encidx = str_transcode0(
argc,
argv, &newstr, ecflags, ecopts);
2875 return encoded_dup(newstr,
str, encidx);
2882 if (newstr ==
str) {
2890 return str_encode_associate(newstr, encidx);
2899 econv_free(
void *
ptr)
2906 econv_memsize(
const void *
ptr)
2913 {
NULL, econv_free, econv_memsize,},
2924 make_dummy_encoding(
const char *
name)
2934 make_encoding(
const char *
name)
2939 enc = make_dummy_encoding(
name);
2944 make_encobj(
const char *
name)
2970 const char *arg_name, *result_name;
2973 enc_arg(&
arg, &arg_name, &arg_enc);
2977 if (result_name ==
NULL)
2980 result_enc = make_encoding(result_name);
2988 const char **sname_p,
const char **dname_p,
2993 VALUE opt, flags_v, ecopts;
2995 const char *sname, *dname;
3001 if (!
NIL_P(flags_v)) {
3008 else if (!
NIL_P(opt)) {
3041 *ecflags_p = ecflags;
3046 decorate_convpath(
VALUE convpath,
int ecflags)
3053 num_decorators = decorator_names(ecflags, decorators);
3054 if (num_decorators == -1)
3078 for (
i = 0;
i < num_decorators;
i++)
3085 search_convpath_i(
const char *sname,
const char *dname,
int depth,
void *
arg)
3090 if (*ary_p ==
Qnil) {
3131 VALUE snamev, dnamev;
3132 const char *sname, *dname;
3138 econv_args(
argc,
argv, &snamev, &dnamev, &sname, &dname, &senc, &denc, &ecflags, &ecopts);
3141 transcode_search_path(sname, dname, search_convpath_i, &convpath);
3143 if (
NIL_P(convpath)) {
3150 if (decorate_convpath(convpath, ecflags) == -1) {
3169 transcode_search_path(from_encoding, to_encoding, search_convpath_i,
3171 return RTEST(convpath);
3181 rb_econv_init_by_convpath_i(
const char *sname,
const char *dname,
int depth,
void *
arg)
3189 ret = rb_econv_add_converter(a->
ec, sname, dname, a->
index);
3196 rb_econv_init_by_convpath(
VALUE self,
VALUE convpath,
3197 const char **sname_p,
const char **dname_p,
3205 const char *sname, *dname;
3211 VALUE snamev, dnamev;
3218 enc_arg(&snamev, &sname, &senc);
3220 enc_arg(&dnamev, &dname, &denc);
3241 ret = transcode_search_path(sname, dname, rb_econv_init_by_convpath_i, &
arg);
3242 if (
ret == -1 ||
arg.ret == -1) {
3243 VALUE msg =
rb_sprintf(
"adding conversion failed: %s to %s", sname, dname);
3381 VALUE snamev, dnamev;
3382 const char *sname, *dname;
3393 ec = rb_econv_init_by_convpath(
self, convpath, &sname, &dname, &senc, &denc);
3398 econv_args(
argc,
argv, &snamev, &dnamev, &sname, &dname, &senc, &denc, &ecflags, &ecopts);
3411 senc = make_dummy_encoding(sname);
3413 denc = make_dummy_encoding(dname);
3437 econv_inspect(
VALUE self)
3444 return rb_sprintf(
"#<%s: uninitialized>", cname);
3450 econv_description(sname, dname,
ec->
flags,
str);
3457 check_econv(
VALUE self)
3475 econv_source_encoding(
VALUE self)
3490 econv_destination_encoding(
VALUE self)
3521 econv_convpath(
VALUE self)
3691 VALUE input, output, output_byteoffset_v, output_bytesize_v, opt, flags_v;
3694 const unsigned char *ip, *is;
3695 unsigned char *op, *os;
3696 long output_byteoffset, output_bytesize;
3697 unsigned long output_byteend;
3702 if (
NIL_P(output_byteoffset_v))
3703 output_byteoffset = 0;
3705 output_byteoffset =
NUM2LONG(output_byteoffset_v);
3707 if (
NIL_P(output_bytesize_v))
3708 output_bytesize = 0;
3710 output_bytesize =
NUM2LONG(output_bytesize_v);
3712 if (!
NIL_P(flags_v)) {
3718 else if (!
NIL_P(opt)) {
3737 if (
NIL_P(output_bytesize_v)) {
3745 if (
NIL_P(output_byteoffset_v))
3748 if (output_byteoffset < 0)
3754 if (output_bytesize < 0)
3757 output_byteend = (
unsigned long)output_byteoffset +
3758 (
unsigned long)output_bytesize;
3760 if (output_byteend < (
unsigned long)output_byteoffset ||
3775 op = (
unsigned char *)
RSTRING_PTR(output) + output_byteoffset;
3776 os = op + output_bytesize;
3785 if (
LONG_MAX / 2 < output_bytesize)
3787 output_bytesize *= 2;
3788 output_byteoffset_v =
Qnil;
3796 return econv_result_to_symbol(res);
3834 econv_convert(
VALUE self,
VALUE source_string)
3852 ret = econv_primitive_convert(ac, av,
self);
3854 if (
ret == sym_invalid_byte_sequence ||
3855 ret == sym_undefined_conversion ||
3856 ret == sym_incomplete_input) {
3861 if (
ret == sym_finished) {
3865 if (
ret != sym_source_buffer_empty) {
3866 rb_bug(
"unexpected result of econv_primitive_convert");
3884 econv_finish(
VALUE self)
3900 ret = econv_primitive_convert(ac, av,
self);
3902 if (
ret == sym_invalid_byte_sequence ||
3903 ret == sym_undefined_conversion ||
3904 ret == sym_incomplete_input) {
3909 if (
ret != sym_finished) {
3910 rb_bug(
"unexpected result of econv_primitive_convert");
3992 econv_primitive_errinfo(
VALUE self)
4050 econv_insert_output(
VALUE self,
VALUE string)
4052 const char *insert_enc;
4108 if (putbackable <
n)
4143 econv_last_error(
VALUE self)
4148 exc = make_econv_exception(
ec);
4167 econv_get_replacement(
VALUE self)
4173 ret = make_replacement(
ec);
4175 rb_raise(rb_eUndefinedConversionError,
"replacement character setup failed");
4210 rb_raise(rb_eUndefinedConversionError,
"replacement character setup failed");
4219 return make_econv_exception(
ec);
4227 exc = make_econv_exception(
ec);
4240 ecerr_source_encoding_name(
VALUE self)
4266 ecerr_source_encoding(
VALUE self)
4278 ecerr_destination_encoding_name(
VALUE self)
4290 ecerr_destination_encoding(
VALUE self)
4311 ecerr_error_char(
VALUE self)
4332 ecerr_error_bytes(
VALUE self)
4344 ecerr_readagain_bytes(
VALUE self)
4374 ecerr_incomplete_input(
VALUE self)
4416 sym_invalid_byte_sequence =
ID2SYM(
rb_intern(
"invalid_byte_sequence"));
4418 sym_destination_buffer_full =
ID2SYM(
rb_intern(
"destination_buffer_full"));
4428 #ifdef ENABLE_ECONV_NEWLINE_OPTION
4552 rb_define_method(rb_eUndefinedConversionError,
"source_encoding_name", ecerr_source_encoding_name, 0);
4553 rb_define_method(rb_eUndefinedConversionError,
"destination_encoding_name", ecerr_destination_encoding_name, 0);
4554 rb_define_method(rb_eUndefinedConversionError,
"source_encoding", ecerr_source_encoding, 0);
4555 rb_define_method(rb_eUndefinedConversionError,
"destination_encoding", ecerr_destination_encoding, 0);
4556 rb_define_method(rb_eUndefinedConversionError,
"error_char", ecerr_error_char, 0);
4558 rb_define_method(rb_eInvalidByteSequenceError,
"source_encoding_name", ecerr_source_encoding_name, 0);
4559 rb_define_method(rb_eInvalidByteSequenceError,
"destination_encoding_name", ecerr_destination_encoding_name, 0);
4560 rb_define_method(rb_eInvalidByteSequenceError,
"source_encoding", ecerr_source_encoding, 0);
4561 rb_define_method(rb_eInvalidByteSequenceError,
"destination_encoding", ecerr_destination_encoding, 0);
4562 rb_define_method(rb_eInvalidByteSequenceError,
"error_bytes", ecerr_error_bytes, 0);
4563 rb_define_method(rb_eInvalidByteSequenceError,
"readagain_bytes", ecerr_readagain_bytes, 0);
4564 rb_define_method(rb_eInvalidByteSequenceError,
"incomplete_input?", ecerr_incomplete_input, 0);