Ruby  2.7.0p0(2019-12-25revision647ee6f091eafcce70ffb75ddf7e121e192ab217)
transcode.c
Go to the documentation of this file.
1 /**********************************************************************
2 
3  transcode.c -
4 
5  $Author$
6  created at: Tue Oct 30 16:10:22 JST 2007
7 
8  Copyright (C) 2007 Martin Duerst
9 
10 **********************************************************************/
11 
12 #include "ruby/encoding.h"
13 #include "internal.h"
14 #include "transcode_data.h"
15 #include <ctype.h>
16 
17 #define ENABLE_ECONV_NEWLINE_OPTION 1
18 
19 /* VALUE rb_cEncoding = rb_define_class("Encoding", rb_cObject); */
20 static VALUE rb_eUndefinedConversionError;
21 static VALUE rb_eInvalidByteSequenceError;
22 static VALUE rb_eConverterNotFoundError;
23 
25 
26 static VALUE sym_invalid, sym_undef, sym_replace, sym_fallback, sym_aref;
27 static VALUE sym_xml, sym_text, sym_attr;
28 static VALUE sym_universal_newline;
29 static VALUE sym_crlf_newline;
30 static VALUE sym_cr_newline;
31 #ifdef ENABLE_ECONV_NEWLINE_OPTION
32 static VALUE sym_newline, sym_universal, sym_crlf, sym_cr, sym_lf;
33 #endif
34 static VALUE sym_partial_input;
35 
36 static VALUE sym_invalid_byte_sequence;
37 static VALUE sym_undefined_conversion;
38 static VALUE sym_destination_buffer_full;
39 static VALUE sym_source_buffer_empty;
40 static VALUE sym_finished;
41 static VALUE sym_after_output;
42 static VALUE sym_incomplete_input;
43 
44 static unsigned char *
45 allocate_converted_string(const char *sname, const char *dname,
46  const unsigned char *str, size_t len,
47  unsigned char *caller_dst_buf, size_t caller_dst_bufsize,
48  size_t *dst_len_ptr);
49 
50 /* dynamic structure, one per conversion (similar to iconv_t) */
51 /* may carry conversion state (e.g. for iso-2022-jp) */
52 typedef struct rb_transcoding {
54 
55  int flags;
56 
58  unsigned int next_table;
60  unsigned char next_byte;
61  unsigned int output_index;
62 
63  ssize_t recognized_len; /* already interpreted */
64  ssize_t readagain_len; /* not yet interpreted */
65  union {
66  unsigned char ary[8]; /* max_input <= sizeof(ary) */
67  unsigned char *ptr; /* length: max_input */
68  } readbuf; /* recognized_len + readagain_len used */
69 
72  union {
73  unsigned char ary[8]; /* max_output <= sizeof(ary) */
74  unsigned char *ptr; /* length: max_output */
75  } writebuf;
76 
77  union rb_transcoding_state_t { /* opaque data for stateful encoding */
78  void *ptr;
79  char ary[sizeof(double) > sizeof(void*) ? sizeof(double) : sizeof(void*)];
81  } state;
83 #define TRANSCODING_READBUF(tc) \
84  ((tc)->transcoder->max_input <= (int)sizeof((tc)->readbuf.ary) ? \
85  (tc)->readbuf.ary : \
86  (tc)->readbuf.ptr)
87 #define TRANSCODING_WRITEBUF(tc) \
88  ((tc)->transcoder->max_output <= (int)sizeof((tc)->writebuf.ary) ? \
89  (tc)->writebuf.ary : \
90  (tc)->writebuf.ptr)
91 #define TRANSCODING_WRITEBUF_SIZE(tc) \
92  ((tc)->transcoder->max_output <= (int)sizeof((tc)->writebuf.ary) ? \
93  sizeof((tc)->writebuf.ary) : \
94  (size_t)(tc)->transcoder->max_output)
95 #define TRANSCODING_STATE_EMBED_MAX ((int)sizeof(union rb_transcoding_state_t))
96 #define TRANSCODING_STATE(tc) \
97  ((tc)->transcoder->state_size <= (int)sizeof((tc)->state) ? \
98  (tc)->state.ary : \
99  (tc)->state.ptr)
100 
101 typedef struct {
103  unsigned char *out_buf_start;
104  unsigned char *out_data_start;
105  unsigned char *out_data_end;
106  unsigned char *out_buf_end;
109 
110 struct rb_econv_t {
111  int flags;
112  int started; /* bool */
113 
114  const char *source_encoding_name;
116 
117  const unsigned char *replacement_str;
119  const char *replacement_enc;
120 
121  unsigned char *in_buf_start;
122  unsigned char *in_data_start;
123  unsigned char *in_data_end;
124  unsigned char *in_buf_end;
126  int replacement_allocated; /* bool */
131 
132  /* last error */
133  struct {
136  const char *source_encoding;
137  const char *destination_encoding;
138  const unsigned char *error_bytes_start;
141  } last_error;
142 
143  /* The following fields are only for Encoding::Converter.
144  * rb_econv_open set them NULL. */
147 };
148 
149 /*
150  * Dispatch data and logic
151  */
152 
153 #define DECORATOR_P(sname, dname) (*(sname) == '\0')
154 
155 typedef struct {
156  const char *sname;
157  const char *dname;
158  const char *lib; /* null means no need to load a library */
161 
162 static st_table *transcoder_table;
163 
164 static transcoder_entry_t *
165 make_transcoder_entry(const char *sname, const char *dname)
166 {
167  st_data_t val;
168  st_table *table2;
169 
170  if (!st_lookup(transcoder_table, (st_data_t)sname, &val)) {
172  st_add_direct(transcoder_table, (st_data_t)sname, val);
173  }
174  table2 = (st_table *)val;
175  if (!st_lookup(table2, (st_data_t)dname, &val)) {
177  entry->sname = sname;
178  entry->dname = dname;
179  entry->lib = NULL;
180  entry->transcoder = NULL;
181  val = (st_data_t)entry;
182  st_add_direct(table2, (st_data_t)dname, val);
183  }
184  return (transcoder_entry_t *)val;
185 }
186 
187 static transcoder_entry_t *
188 get_transcoder_entry(const char *sname, const char *dname)
189 {
190  st_data_t val;
191  st_table *table2;
192 
193  if (!st_lookup(transcoder_table, (st_data_t)sname, &val)) {
194  return NULL;
195  }
196  table2 = (st_table *)val;
197  if (!st_lookup(table2, (st_data_t)dname, &val)) {
198  return NULL;
199  }
200  return (transcoder_entry_t *)val;
201 }
202 
203 void
205 {
206  const char *const sname = tr->src_encoding;
207  const char *const dname = tr->dst_encoding;
208 
209  transcoder_entry_t *entry;
210 
211  entry = make_transcoder_entry(sname, dname);
212  if (entry->transcoder) {
213  rb_raise(rb_eArgError, "transcoder from %s to %s has been already registered",
214  sname, dname);
215  }
216 
217  entry->transcoder = tr;
218 }
219 
220 static void
221 declare_transcoder(const char *sname, const char *dname, const char *lib)
222 {
223  transcoder_entry_t *entry;
224 
225  entry = make_transcoder_entry(sname, dname);
226  entry->lib = lib;
227 }
228 
229 static const char transcoder_lib_prefix[] = "enc/trans/";
230 
231 void
232 rb_declare_transcoder(const char *enc1, const char *enc2, const char *lib)
233 {
234  if (!lib) {
235  rb_raise(rb_eArgError, "invalid library name - (null)");
236  }
237  declare_transcoder(enc1, enc2, lib);
238 }
239 
240 #define encoding_equal(enc1, enc2) (STRCASECMP((enc1), (enc2)) == 0)
241 
242 typedef struct search_path_queue_tag {
244  const char *enc;
246 
247 typedef struct {
251  const char *base_enc;
253 
254 static int
255 transcode_search_path_i(st_data_t key, st_data_t val, st_data_t arg)
256 {
257  const char *dname = (const char *)key;
260 
261  if (st_lookup(bfs->visited, (st_data_t)dname, &val)) {
262  return ST_CONTINUE;
263  }
264 
266  q->enc = dname;
267  q->next = NULL;
268  *bfs->queue_last_ptr = q;
269  bfs->queue_last_ptr = &q->next;
270 
271  st_add_direct(bfs->visited, (st_data_t)dname, (st_data_t)bfs->base_enc);
272  return ST_CONTINUE;
273 }
274 
275 static int
276 transcode_search_path(const char *sname, const char *dname,
277  void (*callback)(const char *sname, const char *dname, int depth, void *arg),
278  void *arg)
279 {
280  search_path_bfs_t bfs;
282  st_data_t val;
283  st_table *table2;
284  int found;
285  int pathlen = -1;
286 
287  if (encoding_equal(sname, dname))
288  return -1;
289 
291  q->enc = sname;
292  q->next = NULL;
293  bfs.queue_last_ptr = &q->next;
294  bfs.queue = q;
295 
298 
299  while (bfs.queue) {
300  q = bfs.queue;
301  bfs.queue = q->next;
302  if (!bfs.queue)
303  bfs.queue_last_ptr = &bfs.queue;
304 
305  if (!st_lookup(transcoder_table, (st_data_t)q->enc, &val)) {
306  xfree(q);
307  continue;
308  }
309  table2 = (st_table *)val;
310 
311  if (st_lookup(table2, (st_data_t)dname, &val)) {
312  st_add_direct(bfs.visited, (st_data_t)dname, (st_data_t)q->enc);
313  xfree(q);
314  found = 1;
315  goto cleanup;
316  }
317 
318  bfs.base_enc = q->enc;
319  st_foreach(table2, transcode_search_path_i, (st_data_t)&bfs);
320  bfs.base_enc = NULL;
321 
322  xfree(q);
323  }
324  found = 0;
325 
326  cleanup:
327  while (bfs.queue) {
328  q = bfs.queue;
329  bfs.queue = q->next;
330  xfree(q);
331  }
332 
333  if (found) {
334  const char *enc = dname;
335  int depth;
336  pathlen = 0;
337  while (1) {
338  st_lookup(bfs.visited, (st_data_t)enc, &val);
339  if (!val)
340  break;
341  pathlen++;
342  enc = (const char *)val;
343  }
344  depth = pathlen;
345  enc = dname;
346  while (1) {
347  st_lookup(bfs.visited, (st_data_t)enc, &val);
348  if (!val)
349  break;
350  callback((const char *)val, enc, --depth, arg);
351  enc = (const char *)val;
352  }
353  }
354 
355  st_free_table(bfs.visited);
356 
357  return pathlen; /* is -1 if not found */
358 }
359 
360 static const rb_transcoder *
361 load_transcoder_entry(transcoder_entry_t *entry)
362 {
363  if (entry->transcoder)
364  return entry->transcoder;
365 
366  if (entry->lib) {
367  const char *const lib = entry->lib;
368  const size_t len = strlen(lib);
369  const size_t total_len = sizeof(transcoder_lib_prefix) - 1 + len;
370  const VALUE fn = rb_str_new(0, total_len);
371  char *const path = RSTRING_PTR(fn);
372 
373  memcpy(path, transcoder_lib_prefix, sizeof(transcoder_lib_prefix) - 1);
374  memcpy(path + sizeof(transcoder_lib_prefix) - 1, lib, len);
375  rb_str_set_len(fn, total_len);
376  OBJ_FREEZE(fn);
377  rb_require_string(fn);
378  }
379 
380  if (entry->transcoder)
381  return entry->transcoder;
382 
383  return NULL;
384 }
385 
386 static const char*
387 get_replacement_character(const char *encname, size_t *len_ret, const char **repl_encname_ptr)
388 {
389  if (encoding_equal(encname, "UTF-8")) {
390  *len_ret = 3;
391  *repl_encname_ptr = "UTF-8";
392  return "\xEF\xBF\xBD";
393  }
394  else {
395  *len_ret = 1;
396  *repl_encname_ptr = "US-ASCII";
397  return "?";
398  }
399 }
400 
401 /*
402  * Transcoding engine logic
403  */
404 
405 static const unsigned char *
406 transcode_char_start(rb_transcoding *tc,
407  const unsigned char *in_start,
408  const unsigned char *inchar_start,
409  const unsigned char *in_p,
410  size_t *char_len_ptr)
411 {
412  const unsigned char *ptr;
413  if (inchar_start - in_start < tc->recognized_len) {
415  inchar_start, unsigned char, in_p - inchar_start);
416  ptr = TRANSCODING_READBUF(tc);
417  }
418  else {
419  ptr = inchar_start - tc->recognized_len;
420  }
421  *char_len_ptr = tc->recognized_len + (in_p - inchar_start);
422  return ptr;
423 }
424 
425 static rb_econv_result_t
426 transcode_restartable0(const unsigned char **in_pos, unsigned char **out_pos,
427  const unsigned char *in_stop, unsigned char *out_stop,
428  rb_transcoding *tc,
429  const int opt)
430 {
431  const rb_transcoder *tr = tc->transcoder;
432  int unitlen = tr->input_unit_length;
433  ssize_t readagain_len = 0;
434 
435  const unsigned char *inchar_start;
436  const unsigned char *in_p;
437 
438  unsigned char *out_p;
439 
440  in_p = inchar_start = *in_pos;
441 
442  out_p = *out_pos;
443 
444 #define SUSPEND(ret, num) \
445  do { \
446  tc->resume_position = (num); \
447  if (0 < in_p - inchar_start) \
448  MEMMOVE(TRANSCODING_READBUF(tc)+tc->recognized_len, \
449  inchar_start, unsigned char, in_p - inchar_start); \
450  *in_pos = in_p; \
451  *out_pos = out_p; \
452  tc->recognized_len += in_p - inchar_start; \
453  if (readagain_len) { \
454  tc->recognized_len -= readagain_len; \
455  tc->readagain_len = readagain_len; \
456  } \
457  return (ret); \
458  resume_label ## num:; \
459  } while (0)
460 #define SUSPEND_OBUF(num) \
461  do { \
462  while (out_stop - out_p < 1) { SUSPEND(econv_destination_buffer_full, num); } \
463  } while (0)
464 
465 #define SUSPEND_AFTER_OUTPUT(num) \
466  if ((opt & ECONV_AFTER_OUTPUT) && *out_pos != out_p) { \
467  SUSPEND(econv_after_output, num); \
468  }
469 
470 #define next_table (tc->next_table)
471 #define next_info (tc->next_info)
472 #define next_byte (tc->next_byte)
473 #define writebuf_len (tc->writebuf_len)
474 #define writebuf_off (tc->writebuf_off)
475 
476  switch (tc->resume_position) {
477  case 0: break;
478  case 1: goto resume_label1;
479  case 2: goto resume_label2;
480  case 3: goto resume_label3;
481  case 4: goto resume_label4;
482  case 5: goto resume_label5;
483  case 6: goto resume_label6;
484  case 7: goto resume_label7;
485  case 8: goto resume_label8;
486  case 9: goto resume_label9;
487  case 10: goto resume_label10;
488  case 11: goto resume_label11;
489  case 12: goto resume_label12;
490  case 13: goto resume_label13;
491  case 14: goto resume_label14;
492  case 15: goto resume_label15;
493  case 16: goto resume_label16;
494  case 17: goto resume_label17;
495  case 18: goto resume_label18;
496  case 19: goto resume_label19;
497  case 20: goto resume_label20;
498  case 21: goto resume_label21;
499  case 22: goto resume_label22;
500  case 23: goto resume_label23;
501  case 24: goto resume_label24;
502  case 25: goto resume_label25;
503  case 26: goto resume_label26;
504  case 27: goto resume_label27;
505  case 28: goto resume_label28;
506  case 29: goto resume_label29;
507  case 30: goto resume_label30;
508  case 31: goto resume_label31;
509  case 32: goto resume_label32;
510  case 33: goto resume_label33;
511  case 34: goto resume_label34;
512  }
513 
514  while (1) {
515  inchar_start = in_p;
516  tc->recognized_len = 0;
517  next_table = tr->conv_tree_start;
518 
520 
521  if (in_stop <= in_p) {
522  if (!(opt & ECONV_PARTIAL_INPUT))
523  break;
525  continue;
526  }
527 
528 #define BYTE_ADDR(index) (tr->byte_array + (index))
529 #define WORD_ADDR(index) (tr->word_array + INFO2WORDINDEX(index))
530 #define BL_BASE BYTE_ADDR(BYTE_LOOKUP_BASE(WORD_ADDR(next_table)))
531 #define BL_INFO WORD_ADDR(BYTE_LOOKUP_INFO(WORD_ADDR(next_table)))
532 #define BL_MIN_BYTE (BL_BASE[0])
533 #define BL_MAX_BYTE (BL_BASE[1])
534 #define BL_OFFSET(byte) (BL_BASE[2+(byte)-BL_MIN_BYTE])
535 #define BL_ACTION(byte) (BL_INFO[BL_OFFSET((byte))])
536 
537  next_byte = (unsigned char)*in_p++;
538  follow_byte:
540  next_info = INVALID;
541  else {
543  }
544  follow_info:
545  switch (next_info & 0x1F) {
546  case NOMAP:
547  {
548  const unsigned char *p = inchar_start;
549  writebuf_off = 0;
550  while (p < in_p) {
551  TRANSCODING_WRITEBUF(tc)[writebuf_off++] = (unsigned char)*p++;
552  }
554  writebuf_off = 0;
555  while (writebuf_off < writebuf_len) {
556  SUSPEND_OBUF(3);
557  *out_p++ = TRANSCODING_WRITEBUF(tc)[writebuf_off++];
558  }
559  }
560  continue;
561  case 0x00: case 0x04: case 0x08: case 0x0C:
562  case 0x10: case 0x14: case 0x18: case 0x1C:
564  while (in_p >= in_stop) {
565  if (!(opt & ECONV_PARTIAL_INPUT))
566  goto incomplete;
568  }
569  next_byte = (unsigned char)*in_p++;
570  next_table = (unsigned int)next_info;
571  goto follow_byte;
572  case ZERObt: /* drop input */
573  continue;
574  case ONEbt:
575  SUSPEND_OBUF(9); *out_p++ = getBT1(next_info);
576  continue;
577  case TWObt:
578  SUSPEND_OBUF(10); *out_p++ = getBT1(next_info);
579  SUSPEND_OBUF(21); *out_p++ = getBT2(next_info);
580  continue;
581  case THREEbt:
582  SUSPEND_OBUF(11); *out_p++ = getBT1(next_info);
583  SUSPEND_OBUF(15); *out_p++ = getBT2(next_info);
584  SUSPEND_OBUF(16); *out_p++ = getBT3(next_info);
585  continue;
586  case FOURbt:
587  SUSPEND_OBUF(12); *out_p++ = getBT0(next_info);
588  SUSPEND_OBUF(17); *out_p++ = getBT1(next_info);
589  SUSPEND_OBUF(18); *out_p++ = getBT2(next_info);
590  SUSPEND_OBUF(19); *out_p++ = getBT3(next_info);
591  continue;
592  case GB4bt:
593  SUSPEND_OBUF(29); *out_p++ = getGB4bt0(next_info);
594  SUSPEND_OBUF(30); *out_p++ = getGB4bt1(next_info);
595  SUSPEND_OBUF(31); *out_p++ = getGB4bt2(next_info);
596  SUSPEND_OBUF(32); *out_p++ = getGB4bt3(next_info);
597  continue;
598  case STR1:
599  tc->output_index = 0;
602  tc->output_index++;
603  }
604  continue;
605  case FUNii:
606  next_info = (VALUE)(*tr->func_ii)(TRANSCODING_STATE(tc), next_info);
607  goto follow_info;
608  case FUNsi:
609  {
610  const unsigned char *char_start;
611  size_t char_len;
612  char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
613  next_info = (VALUE)(*tr->func_si)(TRANSCODING_STATE(tc), char_start, (size_t)char_len);
614  goto follow_info;
615  }
616  case FUNio:
617  SUSPEND_OBUF(13);
618  if (tr->max_output <= out_stop - out_p)
619  out_p += tr->func_io(TRANSCODING_STATE(tc),
620  next_info, out_p, out_stop - out_p);
621  else {
622  writebuf_len = tr->func_io(TRANSCODING_STATE(tc),
623  next_info,
625  writebuf_off = 0;
626  while (writebuf_off < writebuf_len) {
627  SUSPEND_OBUF(20);
628  *out_p++ = TRANSCODING_WRITEBUF(tc)[writebuf_off++];
629  }
630  }
631  break;
632  case FUNso:
633  {
634  const unsigned char *char_start;
635  size_t char_len;
636  SUSPEND_OBUF(14);
637  if (tr->max_output <= out_stop - out_p) {
638  char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
639  out_p += tr->func_so(TRANSCODING_STATE(tc),
640  char_start, (size_t)char_len,
641  out_p, out_stop - out_p);
642  }
643  else {
644  char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
645  writebuf_len = tr->func_so(TRANSCODING_STATE(tc),
646  char_start, (size_t)char_len,
648  writebuf_off = 0;
649  while (writebuf_off < writebuf_len) {
650  SUSPEND_OBUF(22);
651  *out_p++ = TRANSCODING_WRITEBUF(tc)[writebuf_off++];
652  }
653  }
654  break;
655  }
656  case FUNsio:
657  {
658  const unsigned char *char_start;
659  size_t char_len;
660  SUSPEND_OBUF(33);
661  if (tr->max_output <= out_stop - out_p) {
662  char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
663  out_p += tr->func_sio(TRANSCODING_STATE(tc),
664  char_start, (size_t)char_len, next_info,
665  out_p, out_stop - out_p);
666  }
667  else {
668  char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
669  writebuf_len = tr->func_sio(TRANSCODING_STATE(tc),
670  char_start, (size_t)char_len, next_info,
672  writebuf_off = 0;
673  while (writebuf_off < writebuf_len) {
674  SUSPEND_OBUF(34);
675  *out_p++ = TRANSCODING_WRITEBUF(tc)[writebuf_off++];
676  }
677  }
678  break;
679  }
680  case INVALID:
681  if (tc->recognized_len + (in_p - inchar_start) <= unitlen) {
682  if (tc->recognized_len + (in_p - inchar_start) < unitlen)
684  while ((opt & ECONV_PARTIAL_INPUT) && tc->recognized_len + (in_stop - inchar_start) < unitlen) {
685  in_p = in_stop;
687  }
688  if (tc->recognized_len + (in_stop - inchar_start) <= unitlen) {
689  in_p = in_stop;
690  }
691  else {
692  in_p = inchar_start + (unitlen - tc->recognized_len);
693  }
694  }
695  else {
696  ssize_t invalid_len; /* including the last byte which causes invalid */
697  ssize_t discard_len;
698  invalid_len = tc->recognized_len + (in_p - inchar_start);
699  discard_len = ((invalid_len - 1) / unitlen) * unitlen;
700  readagain_len = invalid_len - discard_len;
701  }
702  goto invalid;
703  case UNDEF:
704  goto undef;
705  default:
706  rb_raise(rb_eRuntimeError, "unknown transcoding instruction");
707  }
708  continue;
709 
710  invalid:
712  continue;
713 
714  incomplete:
716  continue;
717 
718  undef:
720  continue;
721  }
722 
723  /* cleanup */
724  if (tr->finish_func) {
725  SUSPEND_OBUF(4);
726  if (tr->max_output <= out_stop - out_p) {
727  out_p += tr->finish_func(TRANSCODING_STATE(tc),
728  out_p, out_stop - out_p);
729  }
730  else {
731  writebuf_len = tr->finish_func(TRANSCODING_STATE(tc),
733  writebuf_off = 0;
734  while (writebuf_off < writebuf_len) {
735  SUSPEND_OBUF(23);
736  *out_p++ = TRANSCODING_WRITEBUF(tc)[writebuf_off++];
737  }
738  }
739  }
740  while (1)
742 #undef SUSPEND
743 #undef next_table
744 #undef next_info
745 #undef next_byte
746 #undef writebuf_len
747 #undef writebuf_off
748 }
749 
750 static rb_econv_result_t
751 transcode_restartable(const unsigned char **in_pos, unsigned char **out_pos,
752  const unsigned char *in_stop, unsigned char *out_stop,
753  rb_transcoding *tc,
754  const int opt)
755 {
756  if (tc->readagain_len) {
757  unsigned char *readagain_buf = ALLOCA_N(unsigned char, tc->readagain_len);
758  const unsigned char *readagain_pos = readagain_buf;
759  const unsigned char *readagain_stop = readagain_buf + tc->readagain_len;
760  rb_econv_result_t res;
761 
762  MEMCPY(readagain_buf, TRANSCODING_READBUF(tc) + tc->recognized_len,
763  unsigned char, tc->readagain_len);
764  tc->readagain_len = 0;
765  res = transcode_restartable0(&readagain_pos, out_pos, readagain_stop, out_stop, tc, opt|ECONV_PARTIAL_INPUT);
766  if (res != econv_source_buffer_empty) {
768  readagain_pos, unsigned char, readagain_stop - readagain_pos);
769  tc->readagain_len += readagain_stop - readagain_pos;
770  return res;
771  }
772  }
773  return transcode_restartable0(in_pos, out_pos, in_stop, out_stop, tc, opt);
774 }
775 
776 static rb_transcoding *
777 rb_transcoding_open_by_transcoder(const rb_transcoder *tr, int flags)
778 {
779  rb_transcoding *tc;
780 
781  tc = ALLOC(rb_transcoding);
782  tc->transcoder = tr;
783  tc->flags = flags;
784  if (TRANSCODING_STATE_EMBED_MAX < tr->state_size)
785  tc->state.ptr = xmalloc(tr->state_size);
786  if (tr->state_init_func) {
787  (tr->state_init_func)(TRANSCODING_STATE(tc)); /* xxx: check return value */
788  }
789  tc->resume_position = 0;
790  tc->recognized_len = 0;
791  tc->readagain_len = 0;
792  tc->writebuf_len = 0;
793  tc->writebuf_off = 0;
794  if ((int)sizeof(tc->readbuf.ary) < tr->max_input) {
795  tc->readbuf.ptr = xmalloc(tr->max_input);
796  }
797  if ((int)sizeof(tc->writebuf.ary) < tr->max_output) {
798  tc->writebuf.ptr = xmalloc(tr->max_output);
799  }
800  return tc;
801 }
802 
803 static rb_econv_result_t
804 rb_transcoding_convert(rb_transcoding *tc,
805  const unsigned char **input_ptr, const unsigned char *input_stop,
806  unsigned char **output_ptr, unsigned char *output_stop,
807  int flags)
808 {
809  return transcode_restartable(
810  input_ptr, output_ptr,
811  input_stop, output_stop,
812  tc, flags);
813 }
814 
815 static void
816 rb_transcoding_close(rb_transcoding *tc)
817 {
818  const rb_transcoder *tr = tc->transcoder;
819  if (tr->state_fini_func) {
820  (tr->state_fini_func)(TRANSCODING_STATE(tc)); /* check return value? */
821  }
822  if (TRANSCODING_STATE_EMBED_MAX < tr->state_size)
823  xfree(tc->state.ptr);
824  if ((int)sizeof(tc->readbuf.ary) < tr->max_input)
825  xfree(tc->readbuf.ptr);
826  if ((int)sizeof(tc->writebuf.ary) < tr->max_output)
827  xfree(tc->writebuf.ptr);
828  xfree(tc);
829 }
830 
831 static size_t
832 rb_transcoding_memsize(rb_transcoding *tc)
833 {
834  size_t size = sizeof(rb_transcoding);
835  const rb_transcoder *tr = tc->transcoder;
836 
837  if (TRANSCODING_STATE_EMBED_MAX < tr->state_size) {
838  size += tr->state_size;
839  }
840  if ((int)sizeof(tc->readbuf.ary) < tr->max_input) {
841  size += tr->max_input;
842  }
843  if ((int)sizeof(tc->writebuf.ary) < tr->max_output) {
844  size += tr->max_output;
845  }
846  return size;
847 }
848 
849 static rb_econv_t *
850 rb_econv_alloc(int n_hint)
851 {
852  rb_econv_t *ec;
853 
854  if (n_hint <= 0)
855  n_hint = 1;
856 
857  ec = ALLOC(rb_econv_t);
858  ec->flags = 0;
861  ec->started = 0;
862  ec->replacement_str = NULL;
863  ec->replacement_len = 0;
864  ec->replacement_enc = NULL;
865  ec->replacement_allocated = 0;
866  ec->in_buf_start = NULL;
867  ec->in_data_start = NULL;
868  ec->in_data_end = NULL;
869  ec->in_buf_end = NULL;
870  ec->num_allocated = n_hint;
871  ec->num_trans = 0;
873  ec->num_finished = 0;
874  ec->last_tc = NULL;
876  ec->last_error.error_tc = NULL;
880  ec->last_error.error_bytes_len = 0;
881  ec->last_error.readagain_len = 0;
882  ec->source_encoding = NULL;
884  return ec;
885 }
886 
887 static int
888 rb_econv_add_transcoder_at(rb_econv_t *ec, const rb_transcoder *tr, int i)
889 {
890  int n, j;
891  int bufsize = 4096;
892  unsigned char *p;
893 
894  if (ec->num_trans == ec->num_allocated) {
895  n = ec->num_allocated * 2;
897  ec->num_allocated = n;
898  }
899 
900  p = xmalloc(bufsize);
901 
902  MEMMOVE(ec->elems+i+1, ec->elems+i, rb_econv_elem_t, ec->num_trans-i);
903 
904  ec->elems[i].tc = rb_transcoding_open_by_transcoder(tr, 0);
905  ec->elems[i].out_buf_start = p;
906  ec->elems[i].out_buf_end = p + bufsize;
907  ec->elems[i].out_data_start = p;
908  ec->elems[i].out_data_end = p;
910 
911  ec->num_trans++;
912 
913  if (!DECORATOR_P(tr->src_encoding, tr->dst_encoding))
914  for (j = ec->num_trans-1; i <= j; j--) {
915  rb_transcoding *tc = ec->elems[j].tc;
916  const rb_transcoder *tr2 = tc->transcoder;
917  if (!DECORATOR_P(tr2->src_encoding, tr2->dst_encoding)) {
918  ec->last_tc = tc;
919  break;
920  }
921  }
922 
923  return 0;
924 }
925 
926 static rb_econv_t *
927 rb_econv_open_by_transcoder_entries(int n, transcoder_entry_t **entries)
928 {
929  rb_econv_t *ec;
930  int i, ret;
931 
932  for (i = 0; i < n; i++) {
933  const rb_transcoder *tr;
934  tr = load_transcoder_entry(entries[i]);
935  if (!tr)
936  return NULL;
937  }
938 
939  ec = rb_econv_alloc(n);
940 
941  for (i = 0; i < n; i++) {
942  const rb_transcoder *tr = load_transcoder_entry(entries[i]);
943  ret = rb_econv_add_transcoder_at(ec, tr, ec->num_trans);
944  if (ret == -1) {
945  rb_econv_close(ec);
946  return NULL;
947  }
948  }
949 
950  return ec;
951 }
952 
953 struct trans_open_t {
956 };
957 
958 static void
959 trans_open_i(const char *sname, const char *dname, int depth, void *arg)
960 {
961  struct trans_open_t *toarg = arg;
962 
963  if (!toarg->entries) {
964  toarg->entries = ALLOC_N(transcoder_entry_t *, depth+1+toarg->num_additional);
965  }
966  toarg->entries[depth] = get_transcoder_entry(sname, dname);
967 }
968 
969 static rb_econv_t *
970 rb_econv_open0(const char *sname, const char *dname, int ecflags)
971 {
973  int num_trans;
974  rb_econv_t *ec;
975 
976  /* Just check if sname and dname are defined */
977  /* (This check is needed?) */
978  if (*sname) rb_enc_find_index(sname);
979  if (*dname) rb_enc_find_index(dname);
980 
981  if (*sname == '\0' && *dname == '\0') {
982  num_trans = 0;
983  entries = NULL;
984  sname = dname = "";
985  }
986  else {
987  struct trans_open_t toarg;
988  toarg.entries = NULL;
989  toarg.num_additional = 0;
990  num_trans = transcode_search_path(sname, dname, trans_open_i, (void *)&toarg);
991  entries = toarg.entries;
992  if (num_trans < 0) {
993  xfree(entries);
994  return NULL;
995  }
996  }
997 
998  ec = rb_econv_open_by_transcoder_entries(num_trans, entries);
999  xfree(entries);
1000  if (!ec)
1001  return NULL;
1002 
1003  ec->flags = ecflags;
1004  ec->source_encoding_name = sname;
1005  ec->destination_encoding_name = dname;
1006 
1007  return ec;
1008 }
1009 
1010 #define MAX_ECFLAGS_DECORATORS 32
1011 
1012 static int
1013 decorator_names(int ecflags, const char **decorators_ret)
1014 {
1015  int num_decorators;
1016 
1017  switch (ecflags & ECONV_NEWLINE_DECORATOR_MASK) {
1021  case 0:
1022  break;
1023  default:
1024  return -1;
1025  }
1026 
1027  if ((ecflags & ECONV_XML_TEXT_DECORATOR) &&
1029  return -1;
1030 
1031  num_decorators = 0;
1032 
1033  if (ecflags & ECONV_XML_TEXT_DECORATOR)
1034  decorators_ret[num_decorators++] = "xml_text_escape";
1035  if (ecflags & ECONV_XML_ATTR_CONTENT_DECORATOR)
1036  decorators_ret[num_decorators++] = "xml_attr_content_escape";
1037  if (ecflags & ECONV_XML_ATTR_QUOTE_DECORATOR)
1038  decorators_ret[num_decorators++] = "xml_attr_quote";
1039 
1040  if (ecflags & ECONV_CRLF_NEWLINE_DECORATOR)
1041  decorators_ret[num_decorators++] = "crlf_newline";
1042  if (ecflags & ECONV_CR_NEWLINE_DECORATOR)
1043  decorators_ret[num_decorators++] = "cr_newline";
1044  if (ecflags & ECONV_UNIVERSAL_NEWLINE_DECORATOR)
1045  decorators_ret[num_decorators++] = "universal_newline";
1046 
1047  return num_decorators;
1048 }
1049 
1050 rb_econv_t *
1051 rb_econv_open(const char *sname, const char *dname, int ecflags)
1052 {
1053  rb_econv_t *ec;
1054  int num_decorators;
1055  const char *decorators[MAX_ECFLAGS_DECORATORS];
1056  int i;
1057 
1058  num_decorators = decorator_names(ecflags, decorators);
1059  if (num_decorators == -1)
1060  return NULL;
1061 
1062  ec = rb_econv_open0(sname, dname, ecflags & ECONV_ERROR_HANDLER_MASK);
1063  if (!ec)
1064  return NULL;
1065 
1066  for (i = 0; i < num_decorators; i++)
1067  if (rb_econv_decorate_at_last(ec, decorators[i]) == -1) {
1068  rb_econv_close(ec);
1069  return NULL;
1070  }
1071 
1072  ec->flags |= ecflags & ~ECONV_ERROR_HANDLER_MASK;
1073 
1074  return ec;
1075 }
1076 
1077 static int
1078 trans_sweep(rb_econv_t *ec,
1079  const unsigned char **input_ptr, const unsigned char *input_stop,
1080  unsigned char **output_ptr, unsigned char *output_stop,
1081  int flags,
1082  int start)
1083 {
1084  int try;
1085  int i, f;
1086 
1087  const unsigned char **ipp, *is, *iold;
1088  unsigned char **opp, *os, *oold;
1089  rb_econv_result_t res;
1090 
1091  try = 1;
1092  while (try) {
1093  try = 0;
1094  for (i = start; i < ec->num_trans; i++) {
1095  rb_econv_elem_t *te = &ec->elems[i];
1096 
1097  if (i == 0) {
1098  ipp = input_ptr;
1099  is = input_stop;
1100  }
1101  else {
1102  rb_econv_elem_t *prev_te = &ec->elems[i-1];
1103  ipp = (const unsigned char **)&prev_te->out_data_start;
1104  is = prev_te->out_data_end;
1105  }
1106 
1107  if (i == ec->num_trans-1) {
1108  opp = output_ptr;
1109  os = output_stop;
1110  }
1111  else {
1112  if (te->out_buf_start != te->out_data_start) {
1113  ssize_t len = te->out_data_end - te->out_data_start;
1114  ssize_t off = te->out_data_start - te->out_buf_start;
1115  MEMMOVE(te->out_buf_start, te->out_data_start, unsigned char, len);
1116  te->out_data_start = te->out_buf_start;
1117  te->out_data_end -= off;
1118  }
1119  opp = &te->out_data_end;
1120  os = te->out_buf_end;
1121  }
1122 
1123  f = flags;
1124  if (ec->num_finished != i)
1126  if (i == 0 && (flags & ECONV_AFTER_OUTPUT)) {
1127  start = 1;
1128  flags &= ~ECONV_AFTER_OUTPUT;
1129  }
1130  if (i != 0)
1131  f &= ~ECONV_AFTER_OUTPUT;
1132  iold = *ipp;
1133  oold = *opp;
1134  te->last_result = res = rb_transcoding_convert(te->tc, ipp, is, opp, os, f);
1135  if (iold != *ipp || oold != *opp)
1136  try = 1;
1137 
1138  switch (res) {
1142  case econv_after_output:
1143  return i;
1144 
1147  break;
1148 
1149  case econv_finished:
1150  ec->num_finished = i+1;
1151  break;
1152  }
1153  }
1154  }
1155  return -1;
1156 }
1157 
1158 static rb_econv_result_t
1159 rb_trans_conv(rb_econv_t *ec,
1160  const unsigned char **input_ptr, const unsigned char *input_stop,
1161  unsigned char **output_ptr, unsigned char *output_stop,
1162  int flags,
1163  int *result_position_ptr)
1164 {
1165  int i;
1166  int needreport_index;
1167  int sweep_start;
1168 
1169  unsigned char empty_buf;
1170  unsigned char *empty_ptr = &empty_buf;
1171 
1172  if (!input_ptr) {
1173  input_ptr = (const unsigned char **)&empty_ptr;
1174  input_stop = empty_ptr;
1175  }
1176 
1177  if (!output_ptr) {
1178  output_ptr = &empty_ptr;
1179  output_stop = empty_ptr;
1180  }
1181 
1182  if (ec->elems[0].last_result == econv_after_output)
1184 
1185  for (i = ec->num_trans-1; 0 <= i; i--) {
1186  switch (ec->elems[i].last_result) {
1190  case econv_after_output:
1191  case econv_finished:
1192  sweep_start = i+1;
1193  goto found_needreport;
1194 
1197  break;
1198 
1199  default:
1200  rb_bug("unexpected transcode last result");
1201  }
1202  }
1203 
1204  /* /^[sd]+$/ is confirmed. but actually /^s*d*$/. */
1205 
1207  (flags & ECONV_AFTER_OUTPUT)) {
1208  rb_econv_result_t res;
1209 
1210  res = rb_trans_conv(ec, NULL, NULL, output_ptr, output_stop,
1212  result_position_ptr);
1213 
1214  if (res == econv_source_buffer_empty)
1215  return econv_after_output;
1216  return res;
1217  }
1218 
1219  sweep_start = 0;
1220 
1221  found_needreport:
1222 
1223  do {
1224  needreport_index = trans_sweep(ec, input_ptr, input_stop, output_ptr, output_stop, flags, sweep_start);
1225  sweep_start = needreport_index + 1;
1226  } while (needreport_index != -1 && needreport_index != ec->num_trans-1);
1227 
1228  for (i = ec->num_trans-1; 0 <= i; i--) {
1230  rb_econv_result_t res = ec->elems[i].last_result;
1231  if (res == econv_invalid_byte_sequence ||
1232  res == econv_incomplete_input ||
1233  res == econv_undefined_conversion ||
1234  res == econv_after_output) {
1236  }
1237  if (result_position_ptr)
1238  *result_position_ptr = i;
1239  return res;
1240  }
1241  }
1242  if (result_position_ptr)
1243  *result_position_ptr = -1;
1245 }
1246 
1247 static rb_econv_result_t
1248 rb_econv_convert0(rb_econv_t *ec,
1249  const unsigned char **input_ptr, const unsigned char *input_stop,
1250  unsigned char **output_ptr, unsigned char *output_stop,
1251  int flags)
1252 {
1253  rb_econv_result_t res;
1254  int result_position;
1255  int has_output = 0;
1256 
1257  memset(&ec->last_error, 0, sizeof(ec->last_error));
1258 
1259  if (ec->num_trans == 0) {
1260  size_t len;
1261  if (ec->in_buf_start && ec->in_data_start != ec->in_data_end) {
1262  if (output_stop - *output_ptr < ec->in_data_end - ec->in_data_start) {
1263  len = output_stop - *output_ptr;
1264  memcpy(*output_ptr, ec->in_data_start, len);
1265  *output_ptr = output_stop;
1266  ec->in_data_start += len;
1268  goto gotresult;
1269  }
1270  len = ec->in_data_end - ec->in_data_start;
1271  memcpy(*output_ptr, ec->in_data_start, len);
1272  *output_ptr += len;
1273  ec->in_data_start = ec->in_data_end = ec->in_buf_start;
1274  if (flags & ECONV_AFTER_OUTPUT) {
1275  res = econv_after_output;
1276  goto gotresult;
1277  }
1278  }
1279  if (output_stop - *output_ptr < input_stop - *input_ptr) {
1280  len = output_stop - *output_ptr;
1281  }
1282  else {
1283  len = input_stop - *input_ptr;
1284  }
1285  if (0 < len && (flags & ECONV_AFTER_OUTPUT)) {
1286  *(*output_ptr)++ = *(*input_ptr)++;
1287  res = econv_after_output;
1288  goto gotresult;
1289  }
1290  memcpy(*output_ptr, *input_ptr, len);
1291  *output_ptr += len;
1292  *input_ptr += len;
1293  if (*input_ptr != input_stop)
1295  else if (flags & ECONV_PARTIAL_INPUT)
1297  else
1298  res = econv_finished;
1299  goto gotresult;
1300  }
1301 
1302  if (ec->elems[ec->num_trans-1].out_data_start) {
1303  unsigned char *data_start = ec->elems[ec->num_trans-1].out_data_start;
1304  unsigned char *data_end = ec->elems[ec->num_trans-1].out_data_end;
1305  if (data_start != data_end) {
1306  size_t len;
1307  if (output_stop - *output_ptr < data_end - data_start) {
1308  len = output_stop - *output_ptr;
1309  memcpy(*output_ptr, data_start, len);
1310  *output_ptr = output_stop;
1311  ec->elems[ec->num_trans-1].out_data_start += len;
1313  goto gotresult;
1314  }
1315  len = data_end - data_start;
1316  memcpy(*output_ptr, data_start, len);
1317  *output_ptr += len;
1318  ec->elems[ec->num_trans-1].out_data_start =
1319  ec->elems[ec->num_trans-1].out_data_end =
1320  ec->elems[ec->num_trans-1].out_buf_start;
1321  has_output = 1;
1322  }
1323  }
1324 
1325  if (ec->in_buf_start &&
1326  ec->in_data_start != ec->in_data_end) {
1327  res = rb_trans_conv(ec, (const unsigned char **)&ec->in_data_start, ec->in_data_end, output_ptr, output_stop,
1328  (flags&~ECONV_AFTER_OUTPUT)|ECONV_PARTIAL_INPUT, &result_position);
1329  if (res != econv_source_buffer_empty)
1330  goto gotresult;
1331  }
1332 
1333  if (has_output &&
1334  (flags & ECONV_AFTER_OUTPUT) &&
1335  *input_ptr != input_stop) {
1336  input_stop = *input_ptr;
1337  res = rb_trans_conv(ec, input_ptr, input_stop, output_ptr, output_stop, flags, &result_position);
1338  if (res == econv_source_buffer_empty)
1339  res = econv_after_output;
1340  }
1341  else if ((flags & ECONV_AFTER_OUTPUT) ||
1342  ec->num_trans == 1) {
1343  res = rb_trans_conv(ec, input_ptr, input_stop, output_ptr, output_stop, flags, &result_position);
1344  }
1345  else {
1346  flags |= ECONV_AFTER_OUTPUT;
1347  do {
1348  res = rb_trans_conv(ec, input_ptr, input_stop, output_ptr, output_stop, flags, &result_position);
1349  } while (res == econv_after_output);
1350  }
1351 
1352  gotresult:
1353  ec->last_error.result = res;
1354  if (res == econv_invalid_byte_sequence ||
1355  res == econv_incomplete_input ||
1356  res == econv_undefined_conversion) {
1357  rb_transcoding *error_tc = ec->elems[result_position].tc;
1358  ec->last_error.error_tc = error_tc;
1362  ec->last_error.error_bytes_len = error_tc->recognized_len;
1363  ec->last_error.readagain_len = error_tc->readagain_len;
1364  }
1365 
1366  return res;
1367 }
1368 
1369 static int output_replacement_character(rb_econv_t *ec);
1370 
1371 static int
1372 output_hex_charref(rb_econv_t *ec)
1373 {
1374  int ret;
1375  unsigned char utfbuf[1024];
1376  const unsigned char *utf;
1377  size_t utf_len;
1378  int utf_allocated = 0;
1379  char charef_buf[16];
1380  const unsigned char *p;
1381 
1382  if (encoding_equal(ec->last_error.source_encoding, "UTF-32BE")) {
1383  utf = ec->last_error.error_bytes_start;
1384  utf_len = ec->last_error.error_bytes_len;
1385  }
1386  else {
1387  utf = allocate_converted_string(ec->last_error.source_encoding, "UTF-32BE",
1389  utfbuf, sizeof(utfbuf),
1390  &utf_len);
1391  if (!utf)
1392  return -1;
1393  if (utf != utfbuf && utf != ec->last_error.error_bytes_start)
1394  utf_allocated = 1;
1395  }
1396 
1397  if (utf_len % 4 != 0)
1398  goto fail;
1399 
1400  p = utf;
1401  while (4 <= utf_len) {
1402  unsigned int u = 0;
1403  u += p[0] << 24;
1404  u += p[1] << 16;
1405  u += p[2] << 8;
1406  u += p[3];
1407  snprintf(charef_buf, sizeof(charef_buf), "&#x%X;", u);
1408 
1409  ret = rb_econv_insert_output(ec, (unsigned char *)charef_buf, strlen(charef_buf), "US-ASCII");
1410  if (ret == -1)
1411  goto fail;
1412 
1413  p += 4;
1414  utf_len -= 4;
1415  }
1416 
1417  if (utf_allocated)
1418  xfree((void *)utf);
1419  return 0;
1420 
1421  fail:
1422  if (utf_allocated)
1423  xfree((void *)utf);
1424  return -1;
1425 }
1426 
1429  const unsigned char **input_ptr, const unsigned char *input_stop,
1430  unsigned char **output_ptr, unsigned char *output_stop,
1431  int flags)
1432 {
1433  rb_econv_result_t ret;
1434 
1435  unsigned char empty_buf;
1436  unsigned char *empty_ptr = &empty_buf;
1437 
1438  ec->started = 1;
1439 
1440  if (!input_ptr) {
1441  input_ptr = (const unsigned char **)&empty_ptr;
1442  input_stop = empty_ptr;
1443  }
1444 
1445  if (!output_ptr) {
1446  output_ptr = &empty_ptr;
1447  output_stop = empty_ptr;
1448  }
1449 
1450  resume:
1451  ret = rb_econv_convert0(ec, input_ptr, input_stop, output_ptr, output_stop, flags);
1452 
1453  if (ret == econv_invalid_byte_sequence ||
1454  ret == econv_incomplete_input) {
1455  /* deal with invalid byte sequence */
1456  /* todo: add more alternative behaviors */
1457  switch (ec->flags & ECONV_INVALID_MASK) {
1458  case ECONV_INVALID_REPLACE:
1459  if (output_replacement_character(ec) == 0)
1460  goto resume;
1461  }
1462  }
1463 
1464  if (ret == econv_undefined_conversion) {
1465  /* valid character in source encoding
1466  * but no related character(s) in destination encoding */
1467  /* todo: add more alternative behaviors */
1468  switch (ec->flags & ECONV_UNDEF_MASK) {
1469  case ECONV_UNDEF_REPLACE:
1470  if (output_replacement_character(ec) == 0)
1471  goto resume;
1472  break;
1473 
1475  if (output_hex_charref(ec) == 0)
1476  goto resume;
1477  break;
1478  }
1479  }
1480 
1481  return ret;
1482 }
1483 
1484 const char *
1486 {
1487  rb_transcoding *tc = ec->last_tc;
1488  const rb_transcoder *tr;
1489 
1490  if (tc == NULL)
1491  return "";
1492 
1493  tr = tc->transcoder;
1494 
1495  if (tr->asciicompat_type == asciicompat_encoder)
1496  return tr->src_encoding;
1497  return tr->dst_encoding;
1498 }
1499 
1500 static unsigned char *
1501 allocate_converted_string(const char *sname, const char *dname,
1502  const unsigned char *str, size_t len,
1503  unsigned char *caller_dst_buf, size_t caller_dst_bufsize,
1504  size_t *dst_len_ptr)
1505 {
1506  unsigned char *dst_str;
1507  size_t dst_len;
1508  size_t dst_bufsize;
1509 
1510  rb_econv_t *ec;
1511  rb_econv_result_t res;
1512 
1513  const unsigned char *sp;
1514  unsigned char *dp;
1515 
1516  if (caller_dst_buf)
1517  dst_bufsize = caller_dst_bufsize;
1518  else if (len == 0)
1519  dst_bufsize = 1;
1520  else
1521  dst_bufsize = len;
1522 
1523  ec = rb_econv_open(sname, dname, 0);
1524  if (ec == NULL)
1525  return NULL;
1526  if (caller_dst_buf)
1527  dst_str = caller_dst_buf;
1528  else
1529  dst_str = xmalloc(dst_bufsize);
1530  dst_len = 0;
1531  sp = str;
1532  dp = dst_str+dst_len;
1533  res = rb_econv_convert(ec, &sp, str+len, &dp, dst_str+dst_bufsize, 0);
1534  dst_len = dp - dst_str;
1535  while (res == econv_destination_buffer_full) {
1536  if (SIZE_MAX/2 < dst_bufsize) {
1537  goto fail;
1538  }
1539  dst_bufsize *= 2;
1540  if (dst_str == caller_dst_buf) {
1541  unsigned char *tmp;
1542  tmp = xmalloc(dst_bufsize);
1543  memcpy(tmp, dst_str, dst_bufsize/2);
1544  dst_str = tmp;
1545  }
1546  else {
1547  dst_str = xrealloc(dst_str, dst_bufsize);
1548  }
1549  dp = dst_str+dst_len;
1550  res = rb_econv_convert(ec, &sp, str+len, &dp, dst_str+dst_bufsize, 0);
1551  dst_len = dp - dst_str;
1552  }
1553  if (res != econv_finished) {
1554  goto fail;
1555  }
1556  rb_econv_close(ec);
1557  *dst_len_ptr = dst_len;
1558  return dst_str;
1559 
1560  fail:
1561  if (dst_str != caller_dst_buf)
1562  xfree(dst_str);
1563  rb_econv_close(ec);
1564  return NULL;
1565 }
1566 
1567 /* result: 0:success -1:failure */
1568 int
1570  const unsigned char *str, size_t len, const char *str_encoding)
1571 {
1572  const char *insert_encoding = rb_econv_encoding_to_insert_output(ec);
1573  unsigned char insert_buf[4096];
1574  const unsigned char *insert_str = NULL;
1575  size_t insert_len;
1576 
1577  int last_trans_index;
1578  rb_transcoding *tc;
1579 
1580  unsigned char **buf_start_p;
1581  unsigned char **data_start_p;
1582  unsigned char **data_end_p;
1583  unsigned char **buf_end_p;
1584 
1585  size_t need;
1586 
1587  ec->started = 1;
1588 
1589  if (len == 0)
1590  return 0;
1591 
1592  if (encoding_equal(insert_encoding, str_encoding)) {
1593  insert_str = str;
1594  insert_len = len;
1595  }
1596  else {
1597  insert_str = allocate_converted_string(str_encoding, insert_encoding,
1598  str, len, insert_buf, sizeof(insert_buf), &insert_len);
1599  if (insert_str == NULL)
1600  return -1;
1601  }
1602 
1603  need = insert_len;
1604 
1605  last_trans_index = ec->num_trans-1;
1606  if (ec->num_trans == 0) {
1607  tc = NULL;
1608  buf_start_p = &ec->in_buf_start;
1609  data_start_p = &ec->in_data_start;
1610  data_end_p = &ec->in_data_end;
1611  buf_end_p = &ec->in_buf_end;
1612  }
1613  else if (ec->elems[last_trans_index].tc->transcoder->asciicompat_type == asciicompat_encoder) {
1614  tc = ec->elems[last_trans_index].tc;
1615  need += tc->readagain_len;
1616  if (need < insert_len)
1617  goto fail;
1618  if (last_trans_index == 0) {
1619  buf_start_p = &ec->in_buf_start;
1620  data_start_p = &ec->in_data_start;
1621  data_end_p = &ec->in_data_end;
1622  buf_end_p = &ec->in_buf_end;
1623  }
1624  else {
1625  rb_econv_elem_t *ee = &ec->elems[last_trans_index-1];
1626  buf_start_p = &ee->out_buf_start;
1627  data_start_p = &ee->out_data_start;
1628  data_end_p = &ee->out_data_end;
1629  buf_end_p = &ee->out_buf_end;
1630  }
1631  }
1632  else {
1633  rb_econv_elem_t *ee = &ec->elems[last_trans_index];
1634  buf_start_p = &ee->out_buf_start;
1635  data_start_p = &ee->out_data_start;
1636  data_end_p = &ee->out_data_end;
1637  buf_end_p = &ee->out_buf_end;
1638  tc = ec->elems[last_trans_index].tc;
1639  }
1640 
1641  if (*buf_start_p == NULL) {
1642  unsigned char *buf = xmalloc(need);
1643  *buf_start_p = buf;
1644  *data_start_p = buf;
1645  *data_end_p = buf;
1646  *buf_end_p = buf+need;
1647  }
1648  else if ((size_t)(*buf_end_p - *data_end_p) < need) {
1649  MEMMOVE(*buf_start_p, *data_start_p, unsigned char, *data_end_p - *data_start_p);
1650  *data_end_p = *buf_start_p + (*data_end_p - *data_start_p);
1651  *data_start_p = *buf_start_p;
1652  if ((size_t)(*buf_end_p - *data_end_p) < need) {
1653  unsigned char *buf;
1654  size_t s = (*data_end_p - *buf_start_p) + need;
1655  if (s < need)
1656  goto fail;
1657  buf = xrealloc(*buf_start_p, s);
1658  *data_start_p = buf;
1659  *data_end_p = buf + (*data_end_p - *buf_start_p);
1660  *buf_start_p = buf;
1661  *buf_end_p = buf + s;
1662  }
1663  }
1664 
1665  memcpy(*data_end_p, insert_str, insert_len);
1666  *data_end_p += insert_len;
1667  if (tc && tc->transcoder->asciicompat_type == asciicompat_encoder) {
1668  memcpy(*data_end_p, TRANSCODING_READBUF(tc)+tc->recognized_len, tc->readagain_len);
1669  *data_end_p += tc->readagain_len;
1670  tc->readagain_len = 0;
1671  }
1672 
1673  if (insert_str != str && insert_str != insert_buf)
1674  xfree((void*)insert_str);
1675  return 0;
1676 
1677  fail:
1678  if (insert_str != str && insert_str != insert_buf)
1679  xfree((void*)insert_str);
1680  return -1;
1681 }
1682 
1683 void
1685 {
1686  int i;
1687 
1688  if (ec->replacement_allocated) {
1689  xfree((void *)ec->replacement_str);
1690  }
1691  for (i = 0; i < ec->num_trans; i++) {
1692  rb_transcoding_close(ec->elems[i].tc);
1693  if (ec->elems[i].out_buf_start)
1694  xfree(ec->elems[i].out_buf_start);
1695  }
1696  xfree(ec->in_buf_start);
1697  xfree(ec->elems);
1698  xfree(ec);
1699 }
1700 
1701 size_t
1703 {
1704  size_t size = sizeof(rb_econv_t);
1705  int i;
1706 
1707  if (ec->replacement_allocated) {
1708  size += ec->replacement_len;
1709  }
1710  for (i = 0; i < ec->num_trans; i++) {
1711  size += rb_transcoding_memsize(ec->elems[i].tc);
1712 
1713  if (ec->elems[i].out_buf_start) {
1714  size += ec->elems[i].out_buf_end - ec->elems[i].out_buf_start;
1715  }
1716  }
1717  size += ec->in_buf_end - ec->in_buf_start;
1718  size += sizeof(rb_econv_elem_t) * ec->num_allocated;
1719 
1720  return size;
1721 }
1722 
1723 int
1725 {
1726  if (ec->num_trans == 0)
1727  return 0;
1728 #if SIZEOF_SIZE_T > SIZEOF_INT
1729  if (ec->elems[0].tc->readagain_len > INT_MAX) return INT_MAX;
1730 #endif
1731  return (int)ec->elems[0].tc->readagain_len;
1732 }
1733 
1734 void
1735 rb_econv_putback(rb_econv_t *ec, unsigned char *p, int n)
1736 {
1737  rb_transcoding *tc;
1738  if (ec->num_trans == 0 || n == 0)
1739  return;
1740  tc = ec->elems[0].tc;
1741  memcpy(p, TRANSCODING_READBUF(tc) + tc->recognized_len + tc->readagain_len - n, n);
1742  tc->readagain_len -= n;
1743 }
1744 
1746  const char *ascii_compat_name;
1747  const char *ascii_incompat_name;
1748 };
1749 
1750 static int
1751 asciicompat_encoding_i(st_data_t key, st_data_t val, st_data_t arg)
1752 {
1753  struct asciicompat_encoding_t *data = (struct asciicompat_encoding_t *)arg;
1754  transcoder_entry_t *entry = (transcoder_entry_t *)val;
1755  const rb_transcoder *tr;
1756 
1757  if (DECORATOR_P(entry->sname, entry->dname))
1758  return ST_CONTINUE;
1759  tr = load_transcoder_entry(entry);
1760  if (tr && tr->asciicompat_type == asciicompat_decoder) {
1761  data->ascii_compat_name = tr->dst_encoding;
1762  return ST_STOP;
1763  }
1764  return ST_CONTINUE;
1765 }
1766 
1767 const char *
1769 {
1770  st_data_t v;
1771  st_table *table2;
1772  struct asciicompat_encoding_t data;
1773 
1774  if (!st_lookup(transcoder_table, (st_data_t)ascii_incompat_name, &v))
1775  return NULL;
1776  table2 = (st_table *)v;
1777 
1778  /*
1779  * Assumption:
1780  * There is at most one transcoder for
1781  * converting from ASCII incompatible encoding.
1782  *
1783  * For ISO-2022-JP, there is ISO-2022-JP -> stateless-ISO-2022-JP and no others.
1784  */
1785  if (table2->num_entries != 1)
1786  return NULL;
1787 
1789  data.ascii_compat_name = NULL;
1790  st_foreach(table2, asciicompat_encoding_i, (st_data_t)&data);
1791  return data.ascii_compat_name;
1792 }
1793 
1794 VALUE
1795 rb_econv_append(rb_econv_t *ec, const char *ss, long len, VALUE dst, int flags)
1796 {
1797  unsigned const char *sp, *se;
1798  unsigned char *ds, *dp, *de;
1799  rb_econv_result_t res;
1800  int max_output;
1801 
1802  if (NIL_P(dst)) {
1803  dst = rb_str_buf_new(len);
1804  if (ec->destination_encoding)
1806  }
1807 
1808  if (ec->last_tc)
1809  max_output = ec->last_tc->transcoder->max_output;
1810  else
1811  max_output = 1;
1812 
1813  do {
1814  long dlen = RSTRING_LEN(dst);
1815  if (rb_str_capacity(dst) - dlen < (size_t)len + max_output) {
1816  unsigned long new_capa = (unsigned long)dlen + len + max_output;
1817  if (LONG_MAX < new_capa)
1818  rb_raise(rb_eArgError, "too long string");
1819  rb_str_resize(dst, new_capa);
1820  rb_str_set_len(dst, dlen);
1821  }
1822  sp = (const unsigned char *)ss;
1823  se = sp + len;
1824  ds = (unsigned char *)RSTRING_PTR(dst);
1825  de = ds + rb_str_capacity(dst);
1826  dp = ds += dlen;
1827  res = rb_econv_convert(ec, &sp, se, &dp, de, flags);
1828  len -= (const char *)sp - ss;
1829  ss = (const char *)sp;
1830  rb_str_set_len(dst, dlen + (dp - ds));
1832  } while (res == econv_destination_buffer_full);
1833 
1834  return dst;
1835 }
1836 
1837 VALUE
1838 rb_econv_substr_append(rb_econv_t *ec, VALUE src, long off, long len, VALUE dst, int flags)
1839 {
1841  dst = rb_econv_append(ec, RSTRING_PTR(src) + off, len, dst, flags);
1842  RB_GC_GUARD(src);
1843  return dst;
1844 }
1845 
1846 VALUE
1848 {
1849  return rb_econv_substr_append(ec, src, 0, RSTRING_LEN(src), dst, flags);
1850 }
1851 
1852 VALUE
1853 rb_econv_substr_convert(rb_econv_t *ec, VALUE src, long byteoff, long bytesize, int flags)
1854 {
1855  return rb_econv_substr_append(ec, src, byteoff, bytesize, Qnil, flags);
1856 }
1857 
1858 VALUE
1860 {
1861  return rb_econv_substr_append(ec, src, 0, RSTRING_LEN(src), Qnil, flags);
1862 }
1863 
1864 static int
1865 rb_econv_add_converter(rb_econv_t *ec, const char *sname, const char *dname, int n)
1866 {
1867  transcoder_entry_t *entry;
1868  const rb_transcoder *tr;
1869 
1870  if (ec->started != 0)
1871  return -1;
1872 
1873  entry = get_transcoder_entry(sname, dname);
1874  if (!entry)
1875  return -1;
1876 
1877  tr = load_transcoder_entry(entry);
1878  if (!tr) return -1;
1879 
1880  return rb_econv_add_transcoder_at(ec, tr, n);
1881 }
1882 
1883 static int
1884 rb_econv_decorate_at(rb_econv_t *ec, const char *decorator_name, int n)
1885 {
1886  return rb_econv_add_converter(ec, "", decorator_name, n);
1887 }
1888 
1889 int
1890 rb_econv_decorate_at_first(rb_econv_t *ec, const char *decorator_name)
1891 {
1892  const rb_transcoder *tr;
1893 
1894  if (ec->num_trans == 0)
1895  return rb_econv_decorate_at(ec, decorator_name, 0);
1896 
1897  tr = ec->elems[0].tc->transcoder;
1898 
1899  if (!DECORATOR_P(tr->src_encoding, tr->dst_encoding) &&
1900  tr->asciicompat_type == asciicompat_decoder)
1901  return rb_econv_decorate_at(ec, decorator_name, 1);
1902 
1903  return rb_econv_decorate_at(ec, decorator_name, 0);
1904 }
1905 
1906 int
1907 rb_econv_decorate_at_last(rb_econv_t *ec, const char *decorator_name)
1908 {
1909  const rb_transcoder *tr;
1910 
1911  if (ec->num_trans == 0)
1912  return rb_econv_decorate_at(ec, decorator_name, 0);
1913 
1914  tr = ec->elems[ec->num_trans-1].tc->transcoder;
1915 
1916  if (!DECORATOR_P(tr->src_encoding, tr->dst_encoding) &&
1917  tr->asciicompat_type == asciicompat_encoder)
1918  return rb_econv_decorate_at(ec, decorator_name, ec->num_trans-1);
1919 
1920  return rb_econv_decorate_at(ec, decorator_name, ec->num_trans);
1921 }
1922 
1923 void
1925 {
1926  const char *dname = 0;
1927 
1928  switch (ec->flags & ECONV_NEWLINE_DECORATOR_MASK) {
1930  dname = "universal_newline";
1931  break;
1933  dname = "crlf_newline";
1934  break;
1936  dname = "cr_newline";
1937  break;
1938  }
1939 
1940  if (dname) {
1941  const rb_transcoder *transcoder = get_transcoder_entry("", dname)->transcoder;
1942  int num_trans = ec->num_trans;
1943  int i, j = 0;
1944 
1945  for (i=0; i < num_trans; i++) {
1946  if (transcoder == ec->elems[i].tc->transcoder) {
1947  rb_transcoding_close(ec->elems[i].tc);
1948  xfree(ec->elems[i].out_buf_start);
1949  ec->num_trans--;
1950  }
1951  else
1952  ec->elems[j++] = ec->elems[i];
1953  }
1954  }
1955 
1957 }
1958 
1959 static VALUE
1960 econv_description(const char *sname, const char *dname, int ecflags, VALUE mesg)
1961 {
1962  int has_description = 0;
1963 
1964  if (NIL_P(mesg))
1965  mesg = rb_str_new(NULL, 0);
1966 
1967  if (*sname != '\0' || *dname != '\0') {
1968  if (*sname == '\0')
1969  rb_str_cat2(mesg, dname);
1970  else if (*dname == '\0')
1971  rb_str_cat2(mesg, sname);
1972  else
1973  rb_str_catf(mesg, "%s to %s", sname, dname);
1974  has_description = 1;
1975  }
1976 
1977  if (ecflags & (ECONV_NEWLINE_DECORATOR_MASK|
1981  const char *pre = "";
1982  if (has_description)
1983  rb_str_cat2(mesg, " with ");
1984  if (ecflags & ECONV_UNIVERSAL_NEWLINE_DECORATOR) {
1985  rb_str_cat2(mesg, pre); pre = ",";
1986  rb_str_cat2(mesg, "universal_newline");
1987  }
1988  if (ecflags & ECONV_CRLF_NEWLINE_DECORATOR) {
1989  rb_str_cat2(mesg, pre); pre = ",";
1990  rb_str_cat2(mesg, "crlf_newline");
1991  }
1992  if (ecflags & ECONV_CR_NEWLINE_DECORATOR) {
1993  rb_str_cat2(mesg, pre); pre = ",";
1994  rb_str_cat2(mesg, "cr_newline");
1995  }
1996  if (ecflags & ECONV_XML_TEXT_DECORATOR) {
1997  rb_str_cat2(mesg, pre); pre = ",";
1998  rb_str_cat2(mesg, "xml_text");
1999  }
2000  if (ecflags & ECONV_XML_ATTR_CONTENT_DECORATOR) {
2001  rb_str_cat2(mesg, pre); pre = ",";
2002  rb_str_cat2(mesg, "xml_attr_content");
2003  }
2004  if (ecflags & ECONV_XML_ATTR_QUOTE_DECORATOR) {
2005  rb_str_cat2(mesg, pre); pre = ",";
2006  rb_str_cat2(mesg, "xml_attr_quote");
2007  }
2008  has_description = 1;
2009  }
2010  if (!has_description) {
2011  rb_str_cat2(mesg, "no-conversion");
2012  }
2013 
2014  return mesg;
2015 }
2016 
2017 VALUE
2018 rb_econv_open_exc(const char *sname, const char *dname, int ecflags)
2019 {
2020  VALUE mesg, exc;
2021  mesg = rb_str_new_cstr("code converter not found (");
2022  econv_description(sname, dname, ecflags, mesg);
2023  rb_str_cat2(mesg, ")");
2024  exc = rb_exc_new3(rb_eConverterNotFoundError, mesg);
2025  return exc;
2026 }
2027 
2028 static VALUE
2029 make_econv_exception(rb_econv_t *ec)
2030 {
2031  VALUE mesg, exc;
2034  const char *err = (const char *)ec->last_error.error_bytes_start;
2035  size_t error_len = ec->last_error.error_bytes_len;
2036  VALUE bytes = rb_str_new(err, error_len);
2037  VALUE dumped = rb_str_dump(bytes);
2038  size_t readagain_len = ec->last_error.readagain_len;
2039  VALUE bytes2 = Qnil;
2040  VALUE dumped2;
2041  int idx;
2043  mesg = rb_sprintf("incomplete %s on %s",
2044  StringValueCStr(dumped),
2046  }
2047  else if (readagain_len) {
2048  bytes2 = rb_str_new(err+error_len, readagain_len);
2049  dumped2 = rb_str_dump(bytes2);
2050  mesg = rb_sprintf("%s followed by %s on %s",
2051  StringValueCStr(dumped),
2052  StringValueCStr(dumped2),
2054  }
2055  else {
2056  mesg = rb_sprintf("%s on %s",
2057  StringValueCStr(dumped),
2059  }
2060 
2061  exc = rb_exc_new3(rb_eInvalidByteSequenceError, mesg);
2062  rb_ivar_set(exc, rb_intern("error_bytes"), bytes);
2063  rb_ivar_set(exc, rb_intern("readagain_bytes"), bytes2);
2064  rb_ivar_set(exc, rb_intern("incomplete_input"), ec->last_error.result == econv_incomplete_input ? Qtrue : Qfalse);
2065 
2066  set_encs:
2067  rb_ivar_set(exc, rb_intern("source_encoding_name"), rb_str_new2(ec->last_error.source_encoding));
2068  rb_ivar_set(exc, rb_intern("destination_encoding_name"), rb_str_new2(ec->last_error.destination_encoding));
2070  if (0 <= idx)
2071  rb_ivar_set(exc, rb_intern("source_encoding"), rb_enc_from_encoding(rb_enc_from_index(idx)));
2073  if (0 <= idx)
2074  rb_ivar_set(exc, rb_intern("destination_encoding"), rb_enc_from_encoding(rb_enc_from_index(idx)));
2075  return exc;
2076  }
2078  VALUE bytes = rb_str_new((const char *)ec->last_error.error_bytes_start,
2080  VALUE dumped = Qnil;
2081  int idx;
2082  if (strcmp(ec->last_error.source_encoding, "UTF-8") == 0) {
2083  rb_encoding *utf8 = rb_utf8_encoding();
2084  const char *start, *end;
2085  int n;
2086  start = (const char *)ec->last_error.error_bytes_start;
2087  end = start + ec->last_error.error_bytes_len;
2088  n = rb_enc_precise_mbclen(start, end, utf8);
2089  if (MBCLEN_CHARFOUND_P(n) &&
2091  unsigned int cc = rb_enc_mbc_to_codepoint(start, end, utf8);
2092  dumped = rb_sprintf("U+%04X", cc);
2093  }
2094  }
2095  if (dumped == Qnil)
2096  dumped = rb_str_dump(bytes);
2098  ec->source_encoding_name) == 0 &&
2100  ec->destination_encoding_name) == 0) {
2101  mesg = rb_sprintf("%s from %s to %s",
2102  StringValueCStr(dumped),
2105  }
2106  else {
2107  int i;
2108  mesg = rb_sprintf("%s to %s in conversion from %s",
2109  StringValueCStr(dumped),
2111  ec->source_encoding_name);
2112  for (i = 0; i < ec->num_trans; i++) {
2113  const rb_transcoder *tr = ec->elems[i].tc->transcoder;
2114  if (!DECORATOR_P(tr->src_encoding, tr->dst_encoding))
2115  rb_str_catf(mesg, " to %s",
2116  ec->elems[i].tc->transcoder->dst_encoding);
2117  }
2118  }
2119  exc = rb_exc_new3(rb_eUndefinedConversionError, mesg);
2121  if (0 <= idx)
2122  rb_enc_associate_index(bytes, idx);
2123  rb_ivar_set(exc, rb_intern("error_char"), bytes);
2124  goto set_encs;
2125  }
2126  return Qnil;
2127 }
2128 
2129 static void
2130 more_output_buffer(
2131  VALUE destination,
2132  unsigned char *(*resize_destination)(VALUE, size_t, size_t),
2133  int max_output,
2134  unsigned char **out_start_ptr,
2135  unsigned char **out_pos,
2136  unsigned char **out_stop_ptr)
2137 {
2138  size_t len = (*out_pos - *out_start_ptr);
2139  size_t new_len = (len + max_output) * 2;
2140  *out_start_ptr = resize_destination(destination, len, new_len);
2141  *out_pos = *out_start_ptr + len;
2142  *out_stop_ptr = *out_start_ptr + new_len;
2143 }
2144 
2145 static int
2146 make_replacement(rb_econv_t *ec)
2147 {
2148  rb_transcoding *tc;
2149  const rb_transcoder *tr;
2150  const unsigned char *replacement;
2151  const char *repl_enc;
2152  const char *ins_enc;
2153  size_t len;
2154 
2155  if (ec->replacement_str)
2156  return 0;
2157 
2158  ins_enc = rb_econv_encoding_to_insert_output(ec);
2159 
2160  tc = ec->last_tc;
2161  if (*ins_enc) {
2162  tr = tc->transcoder;
2163  rb_enc_find(tr->dst_encoding);
2164  replacement = (const unsigned char *)get_replacement_character(ins_enc, &len, &repl_enc);
2165  }
2166  else {
2167  replacement = (unsigned char *)"?";
2168  len = 1;
2169  repl_enc = "";
2170  }
2171 
2172  ec->replacement_str = replacement;
2173  ec->replacement_len = len;
2174  ec->replacement_enc = repl_enc;
2175  ec->replacement_allocated = 0;
2176  return 0;
2177 }
2178 
2179 int
2181  const unsigned char *str, size_t len, const char *encname)
2182 {
2183  unsigned char *str2;
2184  size_t len2;
2185  const char *encname2;
2186 
2187  encname2 = rb_econv_encoding_to_insert_output(ec);
2188 
2189  if (!*encname2 || encoding_equal(encname, encname2)) {
2190  str2 = xmalloc(len);
2191  MEMCPY(str2, str, unsigned char, len); /* xxx: str may be invalid */
2192  len2 = len;
2193  encname2 = encname;
2194  }
2195  else {
2196  str2 = allocate_converted_string(encname, encname2, str, len, NULL, 0, &len2);
2197  if (!str2)
2198  return -1;
2199  }
2200 
2201  if (ec->replacement_allocated) {
2202  xfree((void *)ec->replacement_str);
2203  }
2204  ec->replacement_allocated = 1;
2205  ec->replacement_str = str2;
2206  ec->replacement_len = len2;
2207  ec->replacement_enc = encname2;
2208  return 0;
2209 }
2210 
2211 static int
2212 output_replacement_character(rb_econv_t *ec)
2213 {
2214  int ret;
2215 
2216  if (make_replacement(ec) == -1)
2217  return -1;
2218 
2220  if (ret == -1)
2221  return -1;
2222 
2223  return 0;
2224 }
2225 
2226 #if 1
2227 #define hash_fallback rb_hash_aref
2228 
2229 static VALUE
2230 proc_fallback(VALUE fallback, VALUE c)
2231 {
2232  return rb_proc_call(fallback, rb_ary_new4(1, &c));
2233 }
2234 
2235 static VALUE
2236 method_fallback(VALUE fallback, VALUE c)
2237 {
2238  return rb_method_call(1, &c, fallback);
2239 }
2240 
2241 static VALUE
2242 aref_fallback(VALUE fallback, VALUE c)
2243 {
2244  return rb_funcall3(fallback, sym_aref, 1, &c);
2245 }
2246 
2247 static void
2248 transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
2249  const unsigned char *in_stop, unsigned char *out_stop,
2250  VALUE destination,
2251  unsigned char *(*resize_destination)(VALUE, size_t, size_t),
2252  const char *src_encoding,
2253  const char *dst_encoding,
2254  int ecflags,
2255  VALUE ecopts)
2256 {
2257  rb_econv_t *ec;
2258  rb_transcoding *last_tc;
2259  rb_econv_result_t ret;
2260  unsigned char *out_start = *out_pos;
2261  int max_output;
2262  VALUE exc;
2263  VALUE fallback = Qnil;
2264  VALUE (*fallback_func)(VALUE, VALUE) = 0;
2265 
2266  ec = rb_econv_open_opts(src_encoding, dst_encoding, ecflags, ecopts);
2267  if (!ec)
2268  rb_exc_raise(rb_econv_open_exc(src_encoding, dst_encoding, ecflags));
2269 
2270  if (!NIL_P(ecopts) && RB_TYPE_P(ecopts, T_HASH)) {
2271  fallback = rb_hash_aref(ecopts, sym_fallback);
2272  if (RB_TYPE_P(fallback, T_HASH)) {
2274  }
2275  else if (rb_obj_is_proc(fallback)) {
2276  fallback_func = proc_fallback;
2277  }
2278  else if (rb_obj_is_method(fallback)) {
2279  fallback_func = method_fallback;
2280  }
2281  else {
2282  fallback_func = aref_fallback;
2283  }
2284  }
2285  last_tc = ec->last_tc;
2286  max_output = last_tc ? last_tc->transcoder->max_output : 1;
2287 
2288  resume:
2289  ret = rb_econv_convert(ec, in_pos, in_stop, out_pos, out_stop, 0);
2290 
2291  if (!NIL_P(fallback) && ret == econv_undefined_conversion) {
2292  VALUE rep = rb_enc_str_new(
2293  (const char *)ec->last_error.error_bytes_start,
2296  rep = (*fallback_func)(fallback, rep);
2297  if (rep != Qundef && !NIL_P(rep)) {
2298  StringValue(rep);
2299  ret = rb_econv_insert_output(ec, (const unsigned char *)RSTRING_PTR(rep),
2300  RSTRING_LEN(rep), rb_enc_name(rb_enc_get(rep)));
2301  if ((int)ret == -1) {
2302  rb_raise(rb_eArgError, "too big fallback string");
2303  }
2304  goto resume;
2305  }
2306  }
2307 
2308  if (ret == econv_invalid_byte_sequence ||
2309  ret == econv_incomplete_input ||
2310  ret == econv_undefined_conversion) {
2311  exc = make_econv_exception(ec);
2312  rb_econv_close(ec);
2313  rb_exc_raise(exc);
2314  }
2315 
2316  if (ret == econv_destination_buffer_full) {
2317  more_output_buffer(destination, resize_destination, max_output, &out_start, out_pos, &out_stop);
2318  goto resume;
2319  }
2320 
2321  rb_econv_close(ec);
2322  return;
2323 }
2324 #else
2325 /* sample transcode_loop implementation in byte-by-byte stream style */
2326 static void
2327 transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
2328  const unsigned char *in_stop, unsigned char *out_stop,
2329  VALUE destination,
2330  unsigned char *(*resize_destination)(VALUE, size_t, size_t),
2331  const char *src_encoding,
2332  const char *dst_encoding,
2333  int ecflags,
2334  VALUE ecopts)
2335 {
2336  rb_econv_t *ec;
2337  rb_transcoding *last_tc;
2338  rb_econv_result_t ret;
2339  unsigned char *out_start = *out_pos;
2340  const unsigned char *ptr;
2341  int max_output;
2342  VALUE exc;
2343 
2344  ec = rb_econv_open_opts(src_encoding, dst_encoding, ecflags, ecopts);
2345  if (!ec)
2346  rb_exc_raise(rb_econv_open_exc(src_encoding, dst_encoding, ecflags));
2347 
2348  last_tc = ec->last_tc;
2349  max_output = last_tc ? last_tc->transcoder->max_output : 1;
2350 
2352  ptr = *in_pos;
2353  while (ret != econv_finished) {
2354  unsigned char input_byte;
2355  const unsigned char *p = &input_byte;
2356 
2357  if (ret == econv_source_buffer_empty) {
2358  if (ptr < in_stop) {
2359  input_byte = *ptr;
2360  ret = rb_econv_convert(ec, &p, p+1, out_pos, out_stop, ECONV_PARTIAL_INPUT);
2361  }
2362  else {
2363  ret = rb_econv_convert(ec, NULL, NULL, out_pos, out_stop, 0);
2364  }
2365  }
2366  else {
2367  ret = rb_econv_convert(ec, NULL, NULL, out_pos, out_stop, ECONV_PARTIAL_INPUT);
2368  }
2369  if (&input_byte != p)
2370  ptr += p - &input_byte;
2371  switch (ret) {
2375  exc = make_econv_exception(ec);
2376  rb_econv_close(ec);
2377  rb_exc_raise(exc);
2378  break;
2379 
2381  more_output_buffer(destination, resize_destination, max_output, &out_start, out_pos, &out_stop);
2382  break;
2383 
2385  break;
2386 
2387  case econv_finished:
2388  break;
2389  }
2390  }
2391  rb_econv_close(ec);
2392  *in_pos = in_stop;
2393  return;
2394 }
2395 #endif
2396 
2397 
2398 /*
2399  * String-specific code
2400  */
2401 
2402 static unsigned char *
2403 str_transcoding_resize(VALUE destination, size_t len, size_t new_len)
2404 {
2405  rb_str_resize(destination, new_len);
2406  return (unsigned char *)RSTRING_PTR(destination);
2407 }
2408 
2409 static int
2410 econv_opts(VALUE opt, int ecflags)
2411 {
2412  VALUE v;
2413 
2414  v = rb_hash_aref(opt, sym_invalid);
2415  if (NIL_P(v)) {
2416  }
2417  else if (v==sym_replace) {
2418  ecflags |= ECONV_INVALID_REPLACE;
2419  }
2420  else {
2421  rb_raise(rb_eArgError, "unknown value for invalid character option");
2422  }
2423 
2424  v = rb_hash_aref(opt, sym_undef);
2425  if (NIL_P(v)) {
2426  }
2427  else if (v==sym_replace) {
2428  ecflags |= ECONV_UNDEF_REPLACE;
2429  }
2430  else {
2431  rb_raise(rb_eArgError, "unknown value for undefined character option");
2432  }
2433 
2434  v = rb_hash_aref(opt, sym_replace);
2435  if (!NIL_P(v) && !(ecflags & ECONV_INVALID_REPLACE)) {
2436  ecflags |= ECONV_UNDEF_REPLACE;
2437  }
2438 
2439  v = rb_hash_aref(opt, sym_xml);
2440  if (!NIL_P(v)) {
2441  if (v==sym_text) {
2443  }
2444  else if (v==sym_attr) {
2446  }
2447  else if (RB_TYPE_P(v, T_SYMBOL)) {
2448  rb_raise(rb_eArgError, "unexpected value for xml option: %"PRIsVALUE, rb_sym2str(v));
2449  }
2450  else {
2451  rb_raise(rb_eArgError, "unexpected value for xml option");
2452  }
2453  }
2454 
2455 #ifdef ENABLE_ECONV_NEWLINE_OPTION
2456  v = rb_hash_aref(opt, sym_newline);
2457  if (!NIL_P(v)) {
2458  ecflags &= ~ECONV_NEWLINE_DECORATOR_MASK;
2459  if (v == sym_universal) {
2461  }
2462  else if (v == sym_crlf) {
2463  ecflags |= ECONV_CRLF_NEWLINE_DECORATOR;
2464  }
2465  else if (v == sym_cr) {
2466  ecflags |= ECONV_CR_NEWLINE_DECORATOR;
2467  }
2468  else if (v == sym_lf) {
2469  /* ecflags |= ECONV_LF_NEWLINE_DECORATOR; */
2470  }
2471  else if (SYMBOL_P(v)) {
2472  rb_raise(rb_eArgError, "unexpected value for newline option: %"PRIsVALUE,
2473  rb_sym2str(v));
2474  }
2475  else {
2476  rb_raise(rb_eArgError, "unexpected value for newline option");
2477  }
2478  }
2479  else
2480 #endif
2481  {
2482  int setflags = 0, newlineflag = 0;
2483 
2484  v = rb_hash_aref(opt, sym_universal_newline);
2485  if (RTEST(v))
2487  newlineflag |= !NIL_P(v);
2488 
2489  v = rb_hash_aref(opt, sym_crlf_newline);
2490  if (RTEST(v))
2491  setflags |= ECONV_CRLF_NEWLINE_DECORATOR;
2492  newlineflag |= !NIL_P(v);
2493 
2494  v = rb_hash_aref(opt, sym_cr_newline);
2495  if (RTEST(v))
2496  setflags |= ECONV_CR_NEWLINE_DECORATOR;
2497  newlineflag |= !NIL_P(v);
2498 
2499  if (newlineflag) {
2500  ecflags &= ~ECONV_NEWLINE_DECORATOR_MASK;
2501  ecflags |= setflags;
2502  }
2503  }
2504 
2505  return ecflags;
2506 }
2507 
2508 int
2509 rb_econv_prepare_options(VALUE opthash, VALUE *opts, int ecflags)
2510 {
2511  VALUE newhash = Qnil;
2512  VALUE v;
2513 
2514  if (NIL_P(opthash)) {
2515  *opts = Qnil;
2516  return ecflags;
2517  }
2518  ecflags = econv_opts(opthash, ecflags);
2519 
2520  v = rb_hash_aref(opthash, sym_replace);
2521  if (!NIL_P(v)) {
2522  StringValue(v);
2524  VALUE dumped = rb_str_dump(v);
2525  rb_raise(rb_eArgError, "replacement string is broken: %s as %s",
2526  StringValueCStr(dumped),
2528  }
2529  v = rb_str_new_frozen(v);
2530  newhash = rb_hash_new();
2531  rb_hash_aset(newhash, sym_replace, v);
2532  }
2533 
2534  v = rb_hash_aref(opthash, sym_fallback);
2535  if (!NIL_P(v)) {
2537  if (NIL_P(h)
2538  ? (rb_obj_is_proc(v) || rb_obj_is_method(v) || rb_respond_to(v, sym_aref))
2539  : (v = h, 1)) {
2540  if (NIL_P(newhash))
2541  newhash = rb_hash_new();
2542  rb_hash_aset(newhash, sym_fallback, v);
2543  }
2544  }
2545 
2546  if (!NIL_P(newhash))
2547  rb_hash_freeze(newhash);
2548  *opts = newhash;
2549 
2550  return ecflags;
2551 }
2552 
2553 int
2555 {
2556  return rb_econv_prepare_options(opthash, opts, 0);
2557 }
2558 
2559 rb_econv_t *
2560 rb_econv_open_opts(const char *source_encoding, const char *destination_encoding, int ecflags, VALUE opthash)
2561 {
2562  rb_econv_t *ec;
2563  VALUE replacement;
2564 
2565  if (NIL_P(opthash)) {
2566  replacement = Qnil;
2567  }
2568  else {
2569  if (!RB_TYPE_P(opthash, T_HASH) || !OBJ_FROZEN(opthash))
2570  rb_bug("rb_econv_open_opts called with invalid opthash");
2571  replacement = rb_hash_aref(opthash, sym_replace);
2572  }
2573 
2574  ec = rb_econv_open(source_encoding, destination_encoding, ecflags);
2575  if (!ec)
2576  return ec;
2577 
2578  if (!NIL_P(replacement)) {
2579  int ret;
2580  rb_encoding *enc = rb_enc_get(replacement);
2581 
2582  ret = rb_econv_set_replacement(ec,
2583  (const unsigned char *)RSTRING_PTR(replacement),
2584  RSTRING_LEN(replacement),
2585  rb_enc_name(enc));
2586  if (ret == -1) {
2587  rb_econv_close(ec);
2588  return NULL;
2589  }
2590  }
2591  return ec;
2592 }
2593 
2594 static int
2595 enc_arg(VALUE *arg, const char **name_p, rb_encoding **enc_p)
2596 {
2597  rb_encoding *enc;
2598  const char *n;
2599  int encidx;
2600  VALUE encval;
2601 
2602  if (((encidx = rb_to_encoding_index(encval = *arg)) < 0) ||
2603  !(enc = rb_enc_from_index(encidx))) {
2604  enc = NULL;
2605  encidx = 0;
2606  n = StringValueCStr(*arg);
2607  }
2608  else {
2609  n = rb_enc_name(enc);
2610  }
2611 
2612  *name_p = n;
2613  *enc_p = enc;
2614 
2615  return encidx;
2616 }
2617 
2618 static int
2619 str_transcode_enc_args(VALUE str, VALUE *arg1, VALUE *arg2,
2620  const char **sname_p, rb_encoding **senc_p,
2621  const char **dname_p, rb_encoding **denc_p)
2622 {
2623  rb_encoding *senc, *denc;
2624  const char *sname, *dname;
2625  int sencidx, dencidx;
2626 
2627  dencidx = enc_arg(arg1, &dname, &denc);
2628 
2629  if (NIL_P(*arg2)) {
2630  sencidx = rb_enc_get_index(str);
2631  senc = rb_enc_from_index(sencidx);
2632  sname = rb_enc_name(senc);
2633  }
2634  else {
2635  sencidx = enc_arg(arg2, &sname, &senc);
2636  }
2637 
2638  *sname_p = sname;
2639  *senc_p = senc;
2640  *dname_p = dname;
2641  *denc_p = denc;
2642  return dencidx;
2643 }
2644 
2645 static int
2646 str_transcode0(int argc, VALUE *argv, VALUE *self, int ecflags, VALUE ecopts)
2647 {
2648  VALUE dest;
2649  VALUE str = *self;
2650  VALUE arg1, arg2;
2651  long blen, slen;
2652  unsigned char *buf, *bp, *sp;
2653  const unsigned char *fromp;
2654  rb_encoding *senc, *denc;
2655  const char *sname, *dname;
2656  int dencidx;
2657  int explicitly_invalid_replace = TRUE;
2658 
2659  rb_check_arity(argc, 0, 2);
2660 
2661  if (argc == 0) {
2662  arg1 = rb_enc_default_internal();
2663  if (NIL_P(arg1)) {
2664  if (!ecflags) return -1;
2665  arg1 = rb_obj_encoding(str);
2666  }
2667  if (!(ecflags & ECONV_INVALID_MASK)) {
2668  explicitly_invalid_replace = FALSE;
2669  }
2671  }
2672  else {
2673  arg1 = argv[0];
2674  }
2675  arg2 = argc<=1 ? Qnil : argv[1];
2676  dencidx = str_transcode_enc_args(str, &arg1, &arg2, &sname, &senc, &dname, &denc);
2677 
2678  if ((ecflags & (ECONV_NEWLINE_DECORATOR_MASK|
2682  if (senc && senc == denc) {
2683  if ((ecflags & ECONV_INVALID_MASK) && explicitly_invalid_replace) {
2684  VALUE rep = Qnil;
2685  if (!NIL_P(ecopts)) {
2686  rep = rb_hash_aref(ecopts, sym_replace);
2687  }
2688  dest = rb_enc_str_scrub(senc, str, rep);
2689  if (NIL_P(dest)) dest = str;
2690  *self = dest;
2691  return dencidx;
2692  }
2693  return NIL_P(arg2) ? -1 : dencidx;
2694  }
2695  if (senc && denc && rb_enc_asciicompat(senc) && rb_enc_asciicompat(denc)) {
2697  return dencidx;
2698  }
2699  }
2700  if (encoding_equal(sname, dname)) {
2701  return NIL_P(arg2) ? -1 : dencidx;
2702  }
2703  }
2704  else {
2705  if (encoding_equal(sname, dname)) {
2706  sname = "";
2707  dname = "";
2708  }
2709  }
2710 
2711  fromp = sp = (unsigned char *)RSTRING_PTR(str);
2712  slen = RSTRING_LEN(str);
2713  blen = slen + 30; /* len + margin */
2714  dest = rb_str_tmp_new(blen);
2715  bp = (unsigned char *)RSTRING_PTR(dest);
2716 
2717  transcode_loop(&fromp, &bp, (sp+slen), (bp+blen), dest, str_transcoding_resize, sname, dname, ecflags, ecopts);
2718  if (fromp != sp+slen) {
2719  rb_raise(rb_eArgError, "not fully converted, %"PRIdPTRDIFF" bytes left", sp+slen-fromp);
2720  }
2721  buf = (unsigned char *)RSTRING_PTR(dest);
2722  *bp = '\0';
2723  rb_str_set_len(dest, bp - buf);
2724 
2725  /* set encoding */
2726  if (!denc) {
2727  dencidx = rb_define_dummy_encoding(dname);
2728  RB_GC_GUARD(arg1);
2729  RB_GC_GUARD(arg2);
2730  }
2731  *self = dest;
2732 
2733  return dencidx;
2734 }
2735 
2736 static int
2737 str_transcode(int argc, VALUE *argv, VALUE *self)
2738 {
2739  VALUE opt;
2740  int ecflags = 0;
2741  VALUE ecopts = Qnil;
2742 
2743  argc = rb_scan_args(argc, argv, "02:", NULL, NULL, &opt);
2744  if (!NIL_P(opt)) {
2745  ecflags = rb_econv_prepare_opts(opt, &ecopts);
2746  }
2747  return str_transcode0(argc, argv, self, ecflags, ecopts);
2748 }
2749 
2750 static inline VALUE
2751 str_encode_associate(VALUE str, int encidx)
2752 {
2753  int cr = 0;
2754 
2755  rb_enc_associate_index(str, encidx);
2756 
2757  /* transcoded string never be broken. */
2758  if (rb_enc_asciicompat(rb_enc_from_index(encidx))) {
2760  }
2761  else {
2762  cr = ENC_CODERANGE_VALID;
2763  }
2764  ENC_CODERANGE_SET(str, cr);
2765  return str;
2766 }
2767 
2768 /*
2769  * call-seq:
2770  * str.encode!(encoding [, options] ) -> str
2771  * str.encode!(dst_encoding, src_encoding [, options] ) -> str
2772  *
2773  * The first form transcodes the contents of <i>str</i> from
2774  * str.encoding to +encoding+.
2775  * The second form transcodes the contents of <i>str</i> from
2776  * src_encoding to dst_encoding.
2777  * The options Hash gives details for conversion. See String#encode
2778  * for details.
2779  * Returns the string even if no changes were made.
2780  */
2781 
2782 static VALUE
2783 str_encode_bang(int argc, VALUE *argv, VALUE str)
2784 {
2785  VALUE newstr;
2786  int encidx;
2787 
2789 
2790  newstr = str;
2791  encidx = str_transcode(argc, argv, &newstr);
2792 
2793  if (encidx < 0) return str;
2794  if (newstr == str) {
2795  rb_enc_associate_index(str, encidx);
2796  return str;
2797  }
2798  rb_str_shared_replace(str, newstr);
2799  return str_encode_associate(str, encidx);
2800 }
2801 
2802 static VALUE encoded_dup(VALUE newstr, VALUE str, int encidx);
2803 
2804 /*
2805  * call-seq:
2806  * str.encode(encoding [, options] ) -> str
2807  * str.encode(dst_encoding, src_encoding [, options] ) -> str
2808  * str.encode([options]) -> str
2809  *
2810  * The first form returns a copy of +str+ transcoded
2811  * to encoding +encoding+.
2812  * The second form returns a copy of +str+ transcoded
2813  * from src_encoding to dst_encoding.
2814  * The last form returns a copy of +str+ transcoded to
2815  * <tt>Encoding.default_internal</tt>.
2816  *
2817  * By default, the first and second form raise
2818  * Encoding::UndefinedConversionError for characters that are
2819  * undefined in the destination encoding, and
2820  * Encoding::InvalidByteSequenceError for invalid byte sequences
2821  * in the source encoding. The last form by default does not raise
2822  * exceptions but uses replacement strings.
2823  *
2824  * The +options+ Hash gives details for conversion and can have the following
2825  * keys:
2826  *
2827  * :invalid ::
2828  * If the value is +:replace+, #encode replaces invalid byte sequences in
2829  * +str+ with the replacement character. The default is to raise the
2830  * Encoding::InvalidByteSequenceError exception
2831  * :undef ::
2832  * If the value is +:replace+, #encode replaces characters which are
2833  * undefined in the destination encoding with the replacement character.
2834  * The default is to raise the Encoding::UndefinedConversionError.
2835  * :replace ::
2836  * Sets the replacement string to the given value. The default replacement
2837  * string is "\uFFFD" for Unicode encoding forms, and "?" otherwise.
2838  * :fallback ::
2839  * Sets the replacement string by the given object for undefined
2840  * character. The object should be a Hash, a Proc, a Method, or an
2841  * object which has [] method.
2842  * Its key is an undefined character encoded in the source encoding
2843  * of current transcoder. Its value can be any encoding until it
2844  * can be converted into the destination encoding of the transcoder.
2845  * :xml ::
2846  * The value must be +:text+ or +:attr+.
2847  * If the value is +:text+ #encode replaces undefined characters with their
2848  * (upper-case hexadecimal) numeric character references. '&', '<', and '>'
2849  * are converted to "&amp;", "&lt;", and "&gt;", respectively.
2850  * If the value is +:attr+, #encode also quotes the replacement result
2851  * (using '"'), and replaces '"' with "&quot;".
2852  * :cr_newline ::
2853  * Replaces LF ("\n") with CR ("\r") if value is true.
2854  * :crlf_newline ::
2855  * Replaces LF ("\n") with CRLF ("\r\n") if value is true.
2856  * :universal_newline ::
2857  * Replaces CRLF ("\r\n") and CR ("\r") with LF ("\n") if value is true.
2858  */
2859 
2860 static VALUE
2861 str_encode(int argc, VALUE *argv, VALUE str)
2862 {
2863  VALUE newstr = str;
2864  int encidx = str_transcode(argc, argv, &newstr);
2865  return encoded_dup(newstr, str, encidx);
2866 }
2867 
2868 VALUE
2869 rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts)
2870 {
2871  int argc = 1;
2872  VALUE *argv = &to;
2873  VALUE newstr = str;
2874  int encidx = str_transcode0(argc, argv, &newstr, ecflags, ecopts);
2875  return encoded_dup(newstr, str, encidx);
2876 }
2877 
2878 static VALUE
2879 encoded_dup(VALUE newstr, VALUE str, int encidx)
2880 {
2881  if (encidx < 0) return rb_str_dup(str);
2882  if (newstr == str) {
2883  newstr = rb_str_dup(str);
2884  rb_enc_associate_index(newstr, encidx);
2885  return newstr;
2886  }
2887  else {
2888  RBASIC_SET_CLASS(newstr, rb_obj_class(str));
2889  }
2890  return str_encode_associate(newstr, encidx);
2891 }
2892 
2893 /*
2894  * Document-class: Encoding::Converter
2895  *
2896  * Encoding conversion class.
2897  */
2898 static void
2899 econv_free(void *ptr)
2900 {
2901  rb_econv_t *ec = ptr;
2902  rb_econv_close(ec);
2903 }
2904 
2905 static size_t
2906 econv_memsize(const void *ptr)
2907 {
2908  return sizeof(rb_econv_t);
2909 }
2910 
2911 static const rb_data_type_t econv_data_type = {
2912  "econv",
2913  {NULL, econv_free, econv_memsize,},
2915 };
2916 
2917 static VALUE
2918 econv_s_allocate(VALUE klass)
2919 {
2920  return TypedData_Wrap_Struct(klass, &econv_data_type, NULL);
2921 }
2922 
2923 static rb_encoding *
2924 make_dummy_encoding(const char *name)
2925 {
2926  rb_encoding *enc;
2927  int idx;
2929  enc = rb_enc_from_index(idx);
2930  return enc;
2931 }
2932 
2933 static rb_encoding *
2934 make_encoding(const char *name)
2935 {
2936  rb_encoding *enc;
2937  enc = rb_enc_find(name);
2938  if (!enc)
2939  enc = make_dummy_encoding(name);
2940  return enc;
2941 }
2942 
2943 static VALUE
2944 make_encobj(const char *name)
2945 {
2946  return rb_enc_from_encoding(make_encoding(name));
2947 }
2948 
2949 /*
2950  * call-seq:
2951  * Encoding::Converter.asciicompat_encoding(string) -> encoding or nil
2952  * Encoding::Converter.asciicompat_encoding(encoding) -> encoding or nil
2953  *
2954  * Returns the corresponding ASCII compatible encoding.
2955  *
2956  * Returns nil if the argument is an ASCII compatible encoding.
2957  *
2958  * "corresponding ASCII compatible encoding" is an ASCII compatible encoding which
2959  * can represents exactly the same characters as the given ASCII incompatible encoding.
2960  * So, no conversion undefined error occurs when converting between the two encodings.
2961  *
2962  * Encoding::Converter.asciicompat_encoding("ISO-2022-JP") #=> #<Encoding:stateless-ISO-2022-JP>
2963  * Encoding::Converter.asciicompat_encoding("UTF-16BE") #=> #<Encoding:UTF-8>
2964  * Encoding::Converter.asciicompat_encoding("UTF-8") #=> nil
2965  *
2966  */
2967 static VALUE
2968 econv_s_asciicompat_encoding(VALUE klass, VALUE arg)
2969 {
2970  const char *arg_name, *result_name;
2971  rb_encoding *arg_enc, *result_enc;
2972 
2973  enc_arg(&arg, &arg_name, &arg_enc);
2974 
2975  result_name = rb_econv_asciicompat_encoding(arg_name);
2976 
2977  if (result_name == NULL)
2978  return Qnil;
2979 
2980  result_enc = make_encoding(result_name);
2981 
2982  return rb_enc_from_encoding(result_enc);
2983 }
2984 
2985 static void
2986 econv_args(int argc, VALUE *argv,
2987  VALUE *snamev_p, VALUE *dnamev_p,
2988  const char **sname_p, const char **dname_p,
2989  rb_encoding **senc_p, rb_encoding **denc_p,
2990  int *ecflags_p,
2991  VALUE *ecopts_p)
2992 {
2993  VALUE opt, flags_v, ecopts;
2994  int sidx, didx;
2995  const char *sname, *dname;
2996  rb_encoding *senc, *denc;
2997  int ecflags;
2998 
2999  argc = rb_scan_args(argc, argv, "21:", snamev_p, dnamev_p, &flags_v, &opt);
3000 
3001  if (!NIL_P(flags_v)) {
3002  if (!NIL_P(opt)) {
3003  rb_error_arity(argc + 1, 2, 3);
3004  }
3005  ecflags = NUM2INT(rb_to_int(flags_v));
3006  ecopts = Qnil;
3007  }
3008  else if (!NIL_P(opt)) {
3009  ecflags = rb_econv_prepare_opts(opt, &ecopts);
3010  }
3011  else {
3012  ecflags = 0;
3013  ecopts = Qnil;
3014  }
3015 
3016  senc = NULL;
3017  sidx = rb_to_encoding_index(*snamev_p);
3018  if (0 <= sidx) {
3019  senc = rb_enc_from_index(sidx);
3020  }
3021  else {
3022  StringValue(*snamev_p);
3023  }
3024 
3025  denc = NULL;
3026  didx = rb_to_encoding_index(*dnamev_p);
3027  if (0 <= didx) {
3028  denc = rb_enc_from_index(didx);
3029  }
3030  else {
3031  StringValue(*dnamev_p);
3032  }
3033 
3034  sname = senc ? rb_enc_name(senc) : StringValueCStr(*snamev_p);
3035  dname = denc ? rb_enc_name(denc) : StringValueCStr(*dnamev_p);
3036 
3037  *sname_p = sname;
3038  *dname_p = dname;
3039  *senc_p = senc;
3040  *denc_p = denc;
3041  *ecflags_p = ecflags;
3042  *ecopts_p = ecopts;
3043 }
3044 
3045 static int
3046 decorate_convpath(VALUE convpath, int ecflags)
3047 {
3048  int num_decorators;
3049  const char *decorators[MAX_ECFLAGS_DECORATORS];
3050  int i;
3051  int n, len;
3052 
3053  num_decorators = decorator_names(ecflags, decorators);
3054  if (num_decorators == -1)
3055  return -1;
3056 
3057  len = n = RARRAY_LENINT(convpath);
3058  if (n != 0) {
3059  VALUE pair = RARRAY_AREF(convpath, n-1);
3060  if (RB_TYPE_P(pair, T_ARRAY)) {
3061  const char *sname = rb_enc_name(rb_to_encoding(RARRAY_AREF(pair, 0)));
3062  const char *dname = rb_enc_name(rb_to_encoding(RARRAY_AREF(pair, 1)));
3063  transcoder_entry_t *entry = get_transcoder_entry(sname, dname);
3064  const rb_transcoder *tr = load_transcoder_entry(entry);
3065  if (!tr)
3066  return -1;
3067  if (!DECORATOR_P(tr->src_encoding, tr->dst_encoding) &&
3068  tr->asciicompat_type == asciicompat_encoder) {
3069  n--;
3070  rb_ary_store(convpath, len + num_decorators - 1, pair);
3071  }
3072  }
3073  else {
3074  rb_ary_store(convpath, len + num_decorators - 1, pair);
3075  }
3076  }
3077 
3078  for (i = 0; i < num_decorators; i++)
3079  rb_ary_store(convpath, n + i, rb_str_new_cstr(decorators[i]));
3080 
3081  return 0;
3082 }
3083 
3084 static void
3085 search_convpath_i(const char *sname, const char *dname, int depth, void *arg)
3086 {
3087  VALUE *ary_p = arg;
3088  VALUE v;
3089 
3090  if (*ary_p == Qnil) {
3091  *ary_p = rb_ary_new();
3092  }
3093 
3094  if (DECORATOR_P(sname, dname)) {
3095  v = rb_str_new_cstr(dname);
3096  }
3097  else {
3098  v = rb_assoc_new(make_encobj(sname), make_encobj(dname));
3099  }
3100  rb_ary_store(*ary_p, depth, v);
3101 }
3102 
3103 /*
3104  * call-seq:
3105  * Encoding::Converter.search_convpath(source_encoding, destination_encoding) -> ary
3106  * Encoding::Converter.search_convpath(source_encoding, destination_encoding, opt) -> ary
3107  *
3108  * Returns a conversion path.
3109  *
3110  * p Encoding::Converter.search_convpath("ISO-8859-1", "EUC-JP")
3111  * #=> [[#<Encoding:ISO-8859-1>, #<Encoding:UTF-8>],
3112  * # [#<Encoding:UTF-8>, #<Encoding:EUC-JP>]]
3113  *
3114  * p Encoding::Converter.search_convpath("ISO-8859-1", "EUC-JP", universal_newline: true)
3115  * or
3116  * p Encoding::Converter.search_convpath("ISO-8859-1", "EUC-JP", newline: :universal)
3117  * #=> [[#<Encoding:ISO-8859-1>, #<Encoding:UTF-8>],
3118  * # [#<Encoding:UTF-8>, #<Encoding:EUC-JP>],
3119  * # "universal_newline"]
3120  *
3121  * p Encoding::Converter.search_convpath("ISO-8859-1", "UTF-32BE", universal_newline: true)
3122  * or
3123  * p Encoding::Converter.search_convpath("ISO-8859-1", "UTF-32BE", newline: :universal)
3124  * #=> [[#<Encoding:ISO-8859-1>, #<Encoding:UTF-8>],
3125  * # "universal_newline",
3126  * # [#<Encoding:UTF-8>, #<Encoding:UTF-32BE>]]
3127  */
3128 static VALUE
3129 econv_s_search_convpath(int argc, VALUE *argv, VALUE klass)
3130 {
3131  VALUE snamev, dnamev;
3132  const char *sname, *dname;
3133  rb_encoding *senc, *denc;
3134  int ecflags;
3135  VALUE ecopts;
3136  VALUE convpath;
3137 
3138  econv_args(argc, argv, &snamev, &dnamev, &sname, &dname, &senc, &denc, &ecflags, &ecopts);
3139 
3140  convpath = Qnil;
3141  transcode_search_path(sname, dname, search_convpath_i, &convpath);
3142 
3143  if (NIL_P(convpath)) {
3144  VALUE exc = rb_econv_open_exc(sname, dname, ecflags);
3145  RB_GC_GUARD(snamev);
3146  RB_GC_GUARD(dnamev);
3147  rb_exc_raise(exc);
3148  }
3149 
3150  if (decorate_convpath(convpath, ecflags) == -1) {
3151  VALUE exc = rb_econv_open_exc(sname, dname, ecflags);
3152  RB_GC_GUARD(snamev);
3153  RB_GC_GUARD(dnamev);
3154  rb_exc_raise(exc);
3155  }
3156 
3157  return convpath;
3158 }
3159 
3160 /*
3161  * Check the existence of a conversion path.
3162  * Returns the number of converters in the conversion path.
3163  * result: >=0:success -1:failure
3164  */
3165 int
3166 rb_econv_has_convpath_p(const char* from_encoding, const char* to_encoding)
3167 {
3168  VALUE convpath = Qnil;
3169  transcode_search_path(from_encoding, to_encoding, search_convpath_i,
3170  &convpath);
3171  return RTEST(convpath);
3172 }
3173 
3176  int index;
3177  int ret;
3178 };
3179 
3180 static void
3181 rb_econv_init_by_convpath_i(const char *sname, const char *dname, int depth, void *arg)
3182 {
3184  int ret;
3185 
3186  if (a->ret == -1)
3187  return;
3188 
3189  ret = rb_econv_add_converter(a->ec, sname, dname, a->index);
3190 
3191  a->ret = ret;
3192  return;
3193 }
3194 
3195 static rb_econv_t *
3196 rb_econv_init_by_convpath(VALUE self, VALUE convpath,
3197  const char **sname_p, const char **dname_p,
3198  rb_encoding **senc_p, rb_encoding**denc_p)
3199 {
3200  rb_econv_t *ec;
3201  long i;
3202  int ret, first=1;
3203  VALUE elt;
3204  rb_encoding *senc = 0, *denc = 0;
3205  const char *sname, *dname;
3206 
3207  ec = rb_econv_alloc(RARRAY_LENINT(convpath));
3208  DATA_PTR(self) = ec;
3209 
3210  for (i = 0; i < RARRAY_LEN(convpath); i++) {
3211  VALUE snamev, dnamev;
3212  VALUE pair;
3213  elt = rb_ary_entry(convpath, i);
3214  if (!NIL_P(pair = rb_check_array_type(elt))) {
3215  if (RARRAY_LEN(pair) != 2)
3216  rb_raise(rb_eArgError, "not a 2-element array in convpath");
3217  snamev = rb_ary_entry(pair, 0);
3218  enc_arg(&snamev, &sname, &senc);
3219  dnamev = rb_ary_entry(pair, 1);
3220  enc_arg(&dnamev, &dname, &denc);
3221  }
3222  else {
3223  sname = "";
3224  dname = StringValueCStr(elt);
3225  }
3226  if (DECORATOR_P(sname, dname)) {
3227  ret = rb_econv_add_converter(ec, sname, dname, ec->num_trans);
3228  if (ret == -1) {
3229  VALUE msg = rb_sprintf("decoration failed: %s", dname);
3230  RB_GC_GUARD(snamev);
3231  RB_GC_GUARD(dnamev);
3233  }
3234  }
3235  else {
3236  int j = ec->num_trans;
3238  arg.ec = ec;
3239  arg.index = ec->num_trans;
3240  arg.ret = 0;
3241  ret = transcode_search_path(sname, dname, rb_econv_init_by_convpath_i, &arg);
3242  if (ret == -1 || arg.ret == -1) {
3243  VALUE msg = rb_sprintf("adding conversion failed: %s to %s", sname, dname);
3244  RB_GC_GUARD(snamev);
3245  RB_GC_GUARD(dnamev);
3247  }
3248  if (first) {
3249  first = 0;
3250  *senc_p = senc;
3251  *sname_p = ec->elems[j].tc->transcoder->src_encoding;
3252  }
3253  *denc_p = denc;
3254  *dname_p = ec->elems[ec->num_trans-1].tc->transcoder->dst_encoding;
3255  }
3256  }
3257 
3258  if (first) {
3259  *senc_p = NULL;
3260  *denc_p = NULL;
3261  *sname_p = "";
3262  *dname_p = "";
3263  }
3264 
3265  ec->source_encoding_name = *sname_p;
3266  ec->destination_encoding_name = *dname_p;
3267 
3268  return ec;
3269 }
3270 
3271 /*
3272  * call-seq:
3273  * Encoding::Converter.new(source_encoding, destination_encoding)
3274  * Encoding::Converter.new(source_encoding, destination_encoding, opt)
3275  * Encoding::Converter.new(convpath)
3276  *
3277  * possible options elements:
3278  * hash form:
3279  * :invalid => nil # raise error on invalid byte sequence (default)
3280  * :invalid => :replace # replace invalid byte sequence
3281  * :undef => nil # raise error on undefined conversion (default)
3282  * :undef => :replace # replace undefined conversion
3283  * :replace => string # replacement string ("?" or "\uFFFD" if not specified)
3284  * :newline => :universal # decorator for converting CRLF and CR to LF
3285  * :newline => :crlf # decorator for converting LF to CRLF
3286  * :newline => :cr # decorator for converting LF to CR
3287  * :universal_newline => true # decorator for converting CRLF and CR to LF
3288  * :crlf_newline => true # decorator for converting LF to CRLF
3289  * :cr_newline => true # decorator for converting LF to CR
3290  * :xml => :text # escape as XML CharData.
3291  * :xml => :attr # escape as XML AttValue
3292  * integer form:
3293  * Encoding::Converter::INVALID_REPLACE
3294  * Encoding::Converter::UNDEF_REPLACE
3295  * Encoding::Converter::UNDEF_HEX_CHARREF
3296  * Encoding::Converter::UNIVERSAL_NEWLINE_DECORATOR
3297  * Encoding::Converter::CRLF_NEWLINE_DECORATOR
3298  * Encoding::Converter::CR_NEWLINE_DECORATOR
3299  * Encoding::Converter::XML_TEXT_DECORATOR
3300  * Encoding::Converter::XML_ATTR_CONTENT_DECORATOR
3301  * Encoding::Converter::XML_ATTR_QUOTE_DECORATOR
3302  *
3303  * Encoding::Converter.new creates an instance of Encoding::Converter.
3304  *
3305  * Source_encoding and destination_encoding should be a string or
3306  * Encoding object.
3307  *
3308  * opt should be nil, a hash or an integer.
3309  *
3310  * convpath should be an array.
3311  * convpath may contain
3312  * - two-element arrays which contain encodings or encoding names, or
3313  * - strings representing decorator names.
3314  *
3315  * Encoding::Converter.new optionally takes an option.
3316  * The option should be a hash or an integer.
3317  * The option hash can contain :invalid => nil, etc.
3318  * The option integer should be logical-or of constants such as
3319  * Encoding::Converter::INVALID_REPLACE, etc.
3320  *
3321  * [:invalid => nil]
3322  * Raise error on invalid byte sequence. This is a default behavior.
3323  * [:invalid => :replace]
3324  * Replace invalid byte sequence by replacement string.
3325  * [:undef => nil]
3326  * Raise an error if a character in source_encoding is not defined in destination_encoding.
3327  * This is a default behavior.
3328  * [:undef => :replace]
3329  * Replace undefined character in destination_encoding with replacement string.
3330  * [:replace => string]
3331  * Specify the replacement string.
3332  * If not specified, "\uFFFD" is used for Unicode encodings and "?" for others.
3333  * [:universal_newline => true]
3334  * Convert CRLF and CR to LF.
3335  * [:crlf_newline => true]
3336  * Convert LF to CRLF.
3337  * [:cr_newline => true]
3338  * Convert LF to CR.
3339  * [:xml => :text]
3340  * Escape as XML CharData.
3341  * This form can be used as an HTML 4.0 #PCDATA.
3342  * - '&' -> '&amp;'
3343  * - '<' -> '&lt;'
3344  * - '>' -> '&gt;'
3345  * - undefined characters in destination_encoding -> hexadecimal CharRef such as &#xHH;
3346  * [:xml => :attr]
3347  * Escape as XML AttValue.
3348  * The converted result is quoted as "...".
3349  * This form can be used as an HTML 4.0 attribute value.
3350  * - '&' -> '&amp;'
3351  * - '<' -> '&lt;'
3352  * - '>' -> '&gt;'
3353  * - '"' -> '&quot;'
3354  * - undefined characters in destination_encoding -> hexadecimal CharRef such as &#xHH;
3355  *
3356  * Examples:
3357  * # UTF-16BE to UTF-8
3358  * ec = Encoding::Converter.new("UTF-16BE", "UTF-8")
3359  *
3360  * # Usually, decorators such as newline conversion are inserted last.
3361  * ec = Encoding::Converter.new("UTF-16BE", "UTF-8", :universal_newline => true)
3362  * p ec.convpath #=> [[#<Encoding:UTF-16BE>, #<Encoding:UTF-8>],
3363  * # "universal_newline"]
3364  *
3365  * # But, if the last encoding is ASCII incompatible,
3366  * # decorators are inserted before the last conversion.
3367  * ec = Encoding::Converter.new("UTF-8", "UTF-16BE", :crlf_newline => true)
3368  * p ec.convpath #=> ["crlf_newline",
3369  * # [#<Encoding:UTF-8>, #<Encoding:UTF-16BE>]]
3370  *
3371  * # Conversion path can be specified directly.
3372  * ec = Encoding::Converter.new(["universal_newline", ["EUC-JP", "UTF-8"], ["UTF-8", "UTF-16BE"]])
3373  * p ec.convpath #=> ["universal_newline",
3374  * # [#<Encoding:EUC-JP>, #<Encoding:UTF-8>],
3375  * # [#<Encoding:UTF-8>, #<Encoding:UTF-16BE>]]
3376  */
3377 static VALUE
3378 econv_init(int argc, VALUE *argv, VALUE self)
3379 {
3380  VALUE ecopts;
3381  VALUE snamev, dnamev;
3382  const char *sname, *dname;
3383  rb_encoding *senc, *denc;
3384  rb_econv_t *ec;
3385  int ecflags;
3386  VALUE convpath;
3387 
3388  if (rb_check_typeddata(self, &econv_data_type)) {
3389  rb_raise(rb_eTypeError, "already initialized");
3390  }
3391 
3392  if (argc == 1 && !NIL_P(convpath = rb_check_array_type(argv[0]))) {
3393  ec = rb_econv_init_by_convpath(self, convpath, &sname, &dname, &senc, &denc);
3394  ecflags = 0;
3395  ecopts = Qnil;
3396  }
3397  else {
3398  econv_args(argc, argv, &snamev, &dnamev, &sname, &dname, &senc, &denc, &ecflags, &ecopts);
3399  ec = rb_econv_open_opts(sname, dname, ecflags, ecopts);
3400  }
3401 
3402  if (!ec) {
3403  VALUE exc = rb_econv_open_exc(sname, dname, ecflags);
3404  RB_GC_GUARD(snamev);
3405  RB_GC_GUARD(dnamev);
3406  rb_exc_raise(exc);
3407  }
3408 
3409  if (!DECORATOR_P(sname, dname)) {
3410  if (!senc)
3411  senc = make_dummy_encoding(sname);
3412  if (!denc)
3413  denc = make_dummy_encoding(dname);
3414  RB_GC_GUARD(snamev);
3415  RB_GC_GUARD(dnamev);
3416  }
3417 
3418  ec->source_encoding = senc;
3419  ec->destination_encoding = denc;
3420 
3421  DATA_PTR(self) = ec;
3422 
3423  return self;
3424 }
3425 
3426 /*
3427  * call-seq:
3428  * ec.inspect -> string
3429  *
3430  * Returns a printable version of <i>ec</i>
3431  *
3432  * ec = Encoding::Converter.new("iso-8859-1", "utf-8")
3433  * puts ec.inspect #=> #<Encoding::Converter: ISO-8859-1 to UTF-8>
3434  *
3435  */
3436 static VALUE
3437 econv_inspect(VALUE self)
3438 {
3439  const char *cname = rb_obj_classname(self);
3440  rb_econv_t *ec;
3441 
3442  TypedData_Get_Struct(self, rb_econv_t, &econv_data_type, ec);
3443  if (!ec)
3444  return rb_sprintf("#<%s: uninitialized>", cname);
3445  else {
3446  const char *sname = ec->source_encoding_name;
3447  const char *dname = ec->destination_encoding_name;
3448  VALUE str;
3449  str = rb_sprintf("#<%s: ", cname);
3450  econv_description(sname, dname, ec->flags, str);
3451  rb_str_cat2(str, ">");
3452  return str;
3453  }
3454 }
3455 
3456 static rb_econv_t *
3457 check_econv(VALUE self)
3458 {
3459  rb_econv_t *ec;
3460 
3461  TypedData_Get_Struct(self, rb_econv_t, &econv_data_type, ec);
3462  if (!ec) {
3463  rb_raise(rb_eTypeError, "uninitialized encoding converter");
3464  }
3465  return ec;
3466 }
3467 
3468 /*
3469  * call-seq:
3470  * ec.source_encoding -> encoding
3471  *
3472  * Returns the source encoding as an Encoding object.
3473  */
3474 static VALUE
3475 econv_source_encoding(VALUE self)
3476 {
3477  rb_econv_t *ec = check_econv(self);
3478  if (!ec->source_encoding)
3479  return Qnil;
3481 }
3482 
3483 /*
3484  * call-seq:
3485  * ec.destination_encoding -> encoding
3486  *
3487  * Returns the destination encoding as an Encoding object.
3488  */
3489 static VALUE
3490 econv_destination_encoding(VALUE self)
3491 {
3492  rb_econv_t *ec = check_econv(self);
3493  if (!ec->destination_encoding)
3494  return Qnil;
3496 }
3497 
3498 /*
3499  * call-seq:
3500  * ec.convpath -> ary
3501  *
3502  * Returns the conversion path of ec.
3503  *
3504  * The result is an array of conversions.
3505  *
3506  * ec = Encoding::Converter.new("ISO-8859-1", "EUC-JP", crlf_newline: true)
3507  * p ec.convpath
3508  * #=> [[#<Encoding:ISO-8859-1>, #<Encoding:UTF-8>],
3509  * # [#<Encoding:UTF-8>, #<Encoding:EUC-JP>],
3510  * # "crlf_newline"]
3511  *
3512  * Each element of the array is a pair of encodings or a string.
3513  * A pair means an encoding conversion.
3514  * A string means a decorator.
3515  *
3516  * In the above example, [#<Encoding:ISO-8859-1>, #<Encoding:UTF-8>] means
3517  * a converter from ISO-8859-1 to UTF-8.
3518  * "crlf_newline" means newline converter from LF to CRLF.
3519  */
3520 static VALUE
3521 econv_convpath(VALUE self)
3522 {
3523  rb_econv_t *ec = check_econv(self);
3524  VALUE result;
3525  int i;
3526 
3527  result = rb_ary_new();
3528  for (i = 0; i < ec->num_trans; i++) {
3529  const rb_transcoder *tr = ec->elems[i].tc->transcoder;
3530  VALUE v;
3531  if (DECORATOR_P(tr->src_encoding, tr->dst_encoding))
3532  v = rb_str_new_cstr(tr->dst_encoding);
3533  else
3534  v = rb_assoc_new(make_encobj(tr->src_encoding), make_encobj(tr->dst_encoding));
3535  rb_ary_push(result, v);
3536  }
3537  return result;
3538 }
3539 
3540 /*
3541  * call-seq:
3542  * ec == other -> true or false
3543  */
3544 static VALUE
3545 econv_equal(VALUE self, VALUE other)
3546 {
3547  rb_econv_t *ec1 = check_econv(self);
3548  rb_econv_t *ec2;
3549  int i;
3550 
3551  if (!rb_typeddata_is_kind_of(other, &econv_data_type)) {
3552  return Qnil;
3553  }
3554  ec2 = DATA_PTR(other);
3555  if (!ec2) return Qfalse;
3556  if (ec1->source_encoding_name != ec2->source_encoding_name &&
3558  return Qfalse;
3561  return Qfalse;
3562  if (ec1->flags != ec2->flags) return Qfalse;
3563  if (ec1->replacement_enc != ec2->replacement_enc &&
3565  return Qfalse;
3566  if (ec1->replacement_len != ec2->replacement_len) return Qfalse;
3567  if (ec1->replacement_str != ec2->replacement_str &&
3569  return Qfalse;
3570 
3571  if (ec1->num_trans != ec2->num_trans) return Qfalse;
3572  for (i = 0; i < ec1->num_trans; i++) {
3573  if (ec1->elems[i].tc->transcoder != ec2->elems[i].tc->transcoder)
3574  return Qfalse;
3575  }
3576  return Qtrue;
3577 }
3578 
3579 static VALUE
3580 econv_result_to_symbol(rb_econv_result_t res)
3581 {
3582  switch (res) {
3583  case econv_invalid_byte_sequence: return sym_invalid_byte_sequence;
3584  case econv_incomplete_input: return sym_incomplete_input;
3585  case econv_undefined_conversion: return sym_undefined_conversion;
3586  case econv_destination_buffer_full: return sym_destination_buffer_full;
3587  case econv_source_buffer_empty: return sym_source_buffer_empty;
3588  case econv_finished: return sym_finished;
3589  case econv_after_output: return sym_after_output;
3590  default: return INT2NUM(res); /* should not be reached */
3591  }
3592 }
3593 
3594 /*
3595  * call-seq:
3596  * ec.primitive_convert(source_buffer, destination_buffer) -> symbol
3597  * ec.primitive_convert(source_buffer, destination_buffer, destination_byteoffset) -> symbol
3598  * ec.primitive_convert(source_buffer, destination_buffer, destination_byteoffset, destination_bytesize) -> symbol
3599  * ec.primitive_convert(source_buffer, destination_buffer, destination_byteoffset, destination_bytesize, opt) -> symbol
3600  *
3601  * possible opt elements:
3602  * hash form:
3603  * :partial_input => true # source buffer may be part of larger source
3604  * :after_output => true # stop conversion after output before input
3605  * integer form:
3606  * Encoding::Converter::PARTIAL_INPUT
3607  * Encoding::Converter::AFTER_OUTPUT
3608  *
3609  * possible results:
3610  * :invalid_byte_sequence
3611  * :incomplete_input
3612  * :undefined_conversion
3613  * :after_output
3614  * :destination_buffer_full
3615  * :source_buffer_empty
3616  * :finished
3617  *
3618  * primitive_convert converts source_buffer into destination_buffer.
3619  *
3620  * source_buffer should be a string or nil.
3621  * nil means an empty string.
3622  *
3623  * destination_buffer should be a string.
3624  *
3625  * destination_byteoffset should be an integer or nil.
3626  * nil means the end of destination_buffer.
3627  * If it is omitted, nil is assumed.
3628  *
3629  * destination_bytesize should be an integer or nil.
3630  * nil means unlimited.
3631  * If it is omitted, nil is assumed.
3632  *
3633  * opt should be nil, a hash or an integer.
3634  * nil means no flags.
3635  * If it is omitted, nil is assumed.
3636  *
3637  * primitive_convert converts the content of source_buffer from beginning
3638  * and store the result into destination_buffer.
3639  *
3640  * destination_byteoffset and destination_bytesize specify the region which
3641  * the converted result is stored.
3642  * destination_byteoffset specifies the start position in destination_buffer in bytes.
3643  * If destination_byteoffset is nil,
3644  * destination_buffer.bytesize is used for appending the result.
3645  * destination_bytesize specifies maximum number of bytes.
3646  * If destination_bytesize is nil,
3647  * destination size is unlimited.
3648  * After conversion, destination_buffer is resized to
3649  * destination_byteoffset + actually produced number of bytes.
3650  * Also destination_buffer's encoding is set to destination_encoding.
3651  *
3652  * primitive_convert drops the converted part of source_buffer.
3653  * the dropped part is converted in destination_buffer or
3654  * buffered in Encoding::Converter object.
3655  *
3656  * primitive_convert stops conversion when one of following condition met.
3657  * - invalid byte sequence found in source buffer (:invalid_byte_sequence)
3658  * +primitive_errinfo+ and +last_error+ methods returns the detail of the error.
3659  * - unexpected end of source buffer (:incomplete_input)
3660  * this occur only when :partial_input is not specified.
3661  * +primitive_errinfo+ and +last_error+ methods returns the detail of the error.
3662  * - character not representable in output encoding (:undefined_conversion)
3663  * +primitive_errinfo+ and +last_error+ methods returns the detail of the error.
3664  * - after some output is generated, before input is done (:after_output)
3665  * this occur only when :after_output is specified.
3666  * - destination buffer is full (:destination_buffer_full)
3667  * this occur only when destination_bytesize is non-nil.
3668  * - source buffer is empty (:source_buffer_empty)
3669  * this occur only when :partial_input is specified.
3670  * - conversion is finished (:finished)
3671  *
3672  * example:
3673  * ec = Encoding::Converter.new("UTF-8", "UTF-16BE")
3674  * ret = ec.primitive_convert(src="pi", dst="", nil, 100)
3675  * p [ret, src, dst] #=> [:finished, "", "\x00p\x00i"]
3676  *
3677  * ec = Encoding::Converter.new("UTF-8", "UTF-16BE")
3678  * ret = ec.primitive_convert(src="pi", dst="", nil, 1)
3679  * p [ret, src, dst] #=> [:destination_buffer_full, "i", "\x00"]
3680  * ret = ec.primitive_convert(src, dst="", nil, 1)
3681  * p [ret, src, dst] #=> [:destination_buffer_full, "", "p"]
3682  * ret = ec.primitive_convert(src, dst="", nil, 1)
3683  * p [ret, src, dst] #=> [:destination_buffer_full, "", "\x00"]
3684  * ret = ec.primitive_convert(src, dst="", nil, 1)
3685  * p [ret, src, dst] #=> [:finished, "", "i"]
3686  *
3687  */
3688 static VALUE
3689 econv_primitive_convert(int argc, VALUE *argv, VALUE self)
3690 {
3691  VALUE input, output, output_byteoffset_v, output_bytesize_v, opt, flags_v;
3692  rb_econv_t *ec = check_econv(self);
3693  rb_econv_result_t res;
3694  const unsigned char *ip, *is;
3695  unsigned char *op, *os;
3696  long output_byteoffset, output_bytesize;
3697  unsigned long output_byteend;
3698  int flags;
3699 
3700  argc = rb_scan_args(argc, argv, "23:", &input, &output, &output_byteoffset_v, &output_bytesize_v, &flags_v, &opt);
3701 
3702  if (NIL_P(output_byteoffset_v))
3703  output_byteoffset = 0; /* dummy */
3704  else
3705  output_byteoffset = NUM2LONG(output_byteoffset_v);
3706 
3707  if (NIL_P(output_bytesize_v))
3708  output_bytesize = 0; /* dummy */
3709  else
3710  output_bytesize = NUM2LONG(output_bytesize_v);
3711 
3712  if (!NIL_P(flags_v)) {
3713  if (!NIL_P(opt)) {
3714  rb_error_arity(argc + 1, 2, 5);
3715  }
3716  flags = NUM2INT(rb_to_int(flags_v));
3717  }
3718  else if (!NIL_P(opt)) {
3719  VALUE v;
3720  flags = 0;
3721  v = rb_hash_aref(opt, sym_partial_input);
3722  if (RTEST(v))
3723  flags |= ECONV_PARTIAL_INPUT;
3724  v = rb_hash_aref(opt, sym_after_output);
3725  if (RTEST(v))
3726  flags |= ECONV_AFTER_OUTPUT;
3727  }
3728  else {
3729  flags = 0;
3730  }
3731 
3732  StringValue(output);
3733  if (!NIL_P(input))
3734  StringValue(input);
3735  rb_str_modify(output);
3736 
3737  if (NIL_P(output_bytesize_v)) {
3738  output_bytesize = RSTRING_EMBED_LEN_MAX;
3739  if (!NIL_P(input) && output_bytesize < RSTRING_LEN(input))
3740  output_bytesize = RSTRING_LEN(input);
3741  }
3742 
3743  retry:
3744 
3745  if (NIL_P(output_byteoffset_v))
3746  output_byteoffset = RSTRING_LEN(output);
3747 
3748  if (output_byteoffset < 0)
3749  rb_raise(rb_eArgError, "negative output_byteoffset");
3750 
3751  if (RSTRING_LEN(output) < output_byteoffset)
3752  rb_raise(rb_eArgError, "output_byteoffset too big");
3753 
3754  if (output_bytesize < 0)
3755  rb_raise(rb_eArgError, "negative output_bytesize");
3756 
3757  output_byteend = (unsigned long)output_byteoffset +
3758  (unsigned long)output_bytesize;
3759 
3760  if (output_byteend < (unsigned long)output_byteoffset ||
3761  LONG_MAX < output_byteend)
3762  rb_raise(rb_eArgError, "output_byteoffset+output_bytesize too big");
3763 
3764  if (rb_str_capacity(output) < output_byteend)
3765  rb_str_resize(output, output_byteend);
3766 
3767  if (NIL_P(input)) {
3768  ip = is = NULL;
3769  }
3770  else {
3771  ip = (const unsigned char *)RSTRING_PTR(input);
3772  is = ip + RSTRING_LEN(input);
3773  }
3774 
3775  op = (unsigned char *)RSTRING_PTR(output) + output_byteoffset;
3776  os = op + output_bytesize;
3777 
3778  res = rb_econv_convert(ec, &ip, is, &op, os, flags);
3779  rb_str_set_len(output, op-(unsigned char *)RSTRING_PTR(output));
3780  if (!NIL_P(input)) {
3781  rb_str_drop_bytes(input, ip - (unsigned char *)RSTRING_PTR(input));
3782  }
3783 
3784  if (NIL_P(output_bytesize_v) && res == econv_destination_buffer_full) {
3785  if (LONG_MAX / 2 < output_bytesize)
3786  rb_raise(rb_eArgError, "too long conversion result");
3787  output_bytesize *= 2;
3788  output_byteoffset_v = Qnil;
3789  goto retry;
3790  }
3791 
3792  if (ec->destination_encoding) {
3794  }
3795 
3796  return econv_result_to_symbol(res);
3797 }
3798 
3799 /*
3800  * call-seq:
3801  * ec.convert(source_string) -> destination_string
3802  *
3803  * Convert source_string and return destination_string.
3804  *
3805  * source_string is assumed as a part of source.
3806  * i.e. :partial_input=>true is specified internally.
3807  * finish method should be used last.
3808  *
3809  * ec = Encoding::Converter.new("utf-8", "euc-jp")
3810  * puts ec.convert("\u3042").dump #=> "\xA4\xA2"
3811  * puts ec.finish.dump #=> ""
3812  *
3813  * ec = Encoding::Converter.new("euc-jp", "utf-8")
3814  * puts ec.convert("\xA4").dump #=> ""
3815  * puts ec.convert("\xA2").dump #=> "\xE3\x81\x82"
3816  * puts ec.finish.dump #=> ""
3817  *
3818  * ec = Encoding::Converter.new("utf-8", "iso-2022-jp")
3819  * puts ec.convert("\xE3").dump #=> "".force_encoding("ISO-2022-JP")
3820  * puts ec.convert("\x81").dump #=> "".force_encoding("ISO-2022-JP")
3821  * puts ec.convert("\x82").dump #=> "\e$B$\"".force_encoding("ISO-2022-JP")
3822  * puts ec.finish.dump #=> "\e(B".force_encoding("ISO-2022-JP")
3823  *
3824  * If a conversion error occur,
3825  * Encoding::UndefinedConversionError or
3826  * Encoding::InvalidByteSequenceError is raised.
3827  * Encoding::Converter#convert doesn't supply methods to recover or restart
3828  * from these exceptions.
3829  * When you want to handle these conversion errors,
3830  * use Encoding::Converter#primitive_convert.
3831  *
3832  */
3833 static VALUE
3834 econv_convert(VALUE self, VALUE source_string)
3835 {
3836  VALUE ret, dst;
3837  VALUE av[5];
3838  int ac;
3839  rb_econv_t *ec = check_econv(self);
3840 
3841  StringValue(source_string);
3842 
3843  dst = rb_str_new(NULL, 0);
3844 
3845  av[0] = rb_str_dup(source_string);
3846  av[1] = dst;
3847  av[2] = Qnil;
3848  av[3] = Qnil;
3849  av[4] = INT2NUM(ECONV_PARTIAL_INPUT);
3850  ac = 5;
3851 
3852  ret = econv_primitive_convert(ac, av, self);
3853 
3854  if (ret == sym_invalid_byte_sequence ||
3855  ret == sym_undefined_conversion ||
3856  ret == sym_incomplete_input) {
3857  VALUE exc = make_econv_exception(ec);
3858  rb_exc_raise(exc);
3859  }
3860 
3861  if (ret == sym_finished) {
3862  rb_raise(rb_eArgError, "converter already finished");
3863  }
3864 
3865  if (ret != sym_source_buffer_empty) {
3866  rb_bug("unexpected result of econv_primitive_convert");
3867  }
3868 
3869  return dst;
3870 }
3871 
3872 /*
3873  * call-seq:
3874  * ec.finish -> string
3875  *
3876  * Finishes the converter.
3877  * It returns the last part of the converted string.
3878  *
3879  * ec = Encoding::Converter.new("utf-8", "iso-2022-jp")
3880  * p ec.convert("\u3042") #=> "\e$B$\""
3881  * p ec.finish #=> "\e(B"
3882  */
3883 static VALUE
3884 econv_finish(VALUE self)
3885 {
3886  VALUE ret, dst;
3887  VALUE av[5];
3888  int ac;
3889  rb_econv_t *ec = check_econv(self);
3890 
3891  dst = rb_str_new(NULL, 0);
3892 
3893  av[0] = Qnil;
3894  av[1] = dst;
3895  av[2] = Qnil;
3896  av[3] = Qnil;
3897  av[4] = INT2FIX(0);
3898  ac = 5;
3899 
3900  ret = econv_primitive_convert(ac, av, self);
3901 
3902  if (ret == sym_invalid_byte_sequence ||
3903  ret == sym_undefined_conversion ||
3904  ret == sym_incomplete_input) {
3905  VALUE exc = make_econv_exception(ec);
3906  rb_exc_raise(exc);
3907  }
3908 
3909  if (ret != sym_finished) {
3910  rb_bug("unexpected result of econv_primitive_convert");
3911  }
3912 
3913  return dst;
3914 }
3915 
3916 /*
3917  * call-seq:
3918  * ec.primitive_errinfo -> array
3919  *
3920  * primitive_errinfo returns important information regarding the last error
3921  * as a 5-element array:
3922  *
3923  * [result, enc1, enc2, error_bytes, readagain_bytes]
3924  *
3925  * result is the last result of primitive_convert.
3926  *
3927  * Other elements are only meaningful when result is
3928  * :invalid_byte_sequence, :incomplete_input or :undefined_conversion.
3929  *
3930  * enc1 and enc2 indicate a conversion step as a pair of strings.
3931  * For example, a converter from EUC-JP to ISO-8859-1 converts
3932  * a string as follows: EUC-JP -> UTF-8 -> ISO-8859-1.
3933  * So [enc1, enc2] is either ["EUC-JP", "UTF-8"] or ["UTF-8", "ISO-8859-1"].
3934  *
3935  * error_bytes and readagain_bytes indicate the byte sequences which caused the error.
3936  * error_bytes is discarded portion.
3937  * readagain_bytes is buffered portion which is read again on next conversion.
3938  *
3939  * Example:
3940  *
3941  * # \xff is invalid as EUC-JP.
3942  * ec = Encoding::Converter.new("EUC-JP", "Shift_JIS")
3943  * ec.primitive_convert(src="\xff", dst="", nil, 10)
3944  * p ec.primitive_errinfo
3945  * #=> [:invalid_byte_sequence, "EUC-JP", "UTF-8", "\xFF", ""]
3946  *
3947  * # HIRAGANA LETTER A (\xa4\xa2 in EUC-JP) is not representable in ISO-8859-1.
3948  * # Since this error is occur in UTF-8 to ISO-8859-1 conversion,
3949  * # error_bytes is HIRAGANA LETTER A in UTF-8 (\xE3\x81\x82).
3950  * ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
3951  * ec.primitive_convert(src="\xa4\xa2", dst="", nil, 10)
3952  * p ec.primitive_errinfo
3953  * #=> [:undefined_conversion, "UTF-8", "ISO-8859-1", "\xE3\x81\x82", ""]
3954  *
3955  * # partial character is invalid
3956  * ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
3957  * ec.primitive_convert(src="\xa4", dst="", nil, 10)
3958  * p ec.primitive_errinfo
3959  * #=> [:incomplete_input, "EUC-JP", "UTF-8", "\xA4", ""]
3960  *
3961  * # Encoding::Converter::PARTIAL_INPUT prevents invalid errors by
3962  * # partial characters.
3963  * ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
3964  * ec.primitive_convert(src="\xa4", dst="", nil, 10, Encoding::Converter::PARTIAL_INPUT)
3965  * p ec.primitive_errinfo
3966  * #=> [:source_buffer_empty, nil, nil, nil, nil]
3967  *
3968  * # \xd8\x00\x00@ is invalid as UTF-16BE because
3969  * # no low surrogate after high surrogate (\xd8\x00).
3970  * # It is detected by 3rd byte (\00) which is part of next character.
3971  * # So the high surrogate (\xd8\x00) is discarded and
3972  * # the 3rd byte is read again later.
3973  * # Since the byte is buffered in ec, it is dropped from src.
3974  * ec = Encoding::Converter.new("UTF-16BE", "UTF-8")
3975  * ec.primitive_convert(src="\xd8\x00\x00@", dst="", nil, 10)
3976  * p ec.primitive_errinfo
3977  * #=> [:invalid_byte_sequence, "UTF-16BE", "UTF-8", "\xD8\x00", "\x00"]
3978  * p src
3979  * #=> "@"
3980  *
3981  * # Similar to UTF-16BE, \x00\xd8@\x00 is invalid as UTF-16LE.
3982  * # The problem is detected by 4th byte.
3983  * ec = Encoding::Converter.new("UTF-16LE", "UTF-8")
3984  * ec.primitive_convert(src="\x00\xd8@\x00", dst="", nil, 10)
3985  * p ec.primitive_errinfo
3986  * #=> [:invalid_byte_sequence, "UTF-16LE", "UTF-8", "\x00\xD8", "@\x00"]
3987  * p src
3988  * #=> ""
3989  *
3990  */
3991 static VALUE
3992 econv_primitive_errinfo(VALUE self)
3993 {
3994  rb_econv_t *ec = check_econv(self);
3995 
3996  VALUE ary;
3997 
3998  ary = rb_ary_new2(5);
3999 
4000  rb_ary_store(ary, 0, econv_result_to_symbol(ec->last_error.result));
4001  rb_ary_store(ary, 4, Qnil);
4002 
4005 
4008 
4012  }
4013 
4014  return ary;
4015 }
4016 
4017 /*
4018  * call-seq:
4019  * ec.insert_output(string) -> nil
4020  *
4021  * Inserts string into the encoding converter.
4022  * The string will be converted to the destination encoding and
4023  * output on later conversions.
4024  *
4025  * If the destination encoding is stateful,
4026  * string is converted according to the state and the state is updated.
4027  *
4028  * This method should be used only when a conversion error occurs.
4029  *
4030  * ec = Encoding::Converter.new("utf-8", "iso-8859-1")
4031  * src = "HIRAGANA LETTER A is \u{3042}."
4032  * dst = ""
4033  * p ec.primitive_convert(src, dst) #=> :undefined_conversion
4034  * puts "[#{dst.dump}, #{src.dump}]" #=> ["HIRAGANA LETTER A is ", "."]
4035  * ec.insert_output("<err>")
4036  * p ec.primitive_convert(src, dst) #=> :finished
4037  * puts "[#{dst.dump}, #{src.dump}]" #=> ["HIRAGANA LETTER A is <err>.", ""]
4038  *
4039  * ec = Encoding::Converter.new("utf-8", "iso-2022-jp")
4040  * src = "\u{306F 3041 3068 2661 3002}" # U+2661 is not representable in iso-2022-jp
4041  * dst = ""
4042  * p ec.primitive_convert(src, dst) #=> :undefined_conversion
4043  * puts "[#{dst.dump}, #{src.dump}]" #=> ["\e$B$O$!$H".force_encoding("ISO-2022-JP"), "\xE3\x80\x82"]
4044  * ec.insert_output "?" # state change required to output "?".
4045  * p ec.primitive_convert(src, dst) #=> :finished
4046  * puts "[#{dst.dump}, #{src.dump}]" #=> ["\e$B$O$!$H\e(B?\e$B!#\e(B".force_encoding("ISO-2022-JP"), ""]
4047  *
4048  */
4049 static VALUE
4050 econv_insert_output(VALUE self, VALUE string)
4051 {
4052  const char *insert_enc;
4053 
4054  int ret;
4055 
4056  rb_econv_t *ec = check_econv(self);
4057 
4058  StringValue(string);
4059  insert_enc = rb_econv_encoding_to_insert_output(ec);
4060  string = rb_str_encode(string, rb_enc_from_encoding(rb_enc_find(insert_enc)), 0, Qnil);
4061 
4062  ret = rb_econv_insert_output(ec, (const unsigned char *)RSTRING_PTR(string), RSTRING_LEN(string), insert_enc);
4063  if (ret == -1) {
4064  rb_raise(rb_eArgError, "too big string");
4065  }
4066 
4067  return Qnil;
4068 }
4069 
4070 /*
4071  * call-seq:
4072  * ec.putback -> string
4073  * ec.putback(max_numbytes) -> string
4074  *
4075  * Put back the bytes which will be converted.
4076  *
4077  * The bytes are caused by invalid_byte_sequence error.
4078  * When invalid_byte_sequence error, some bytes are discarded and
4079  * some bytes are buffered to be converted later.
4080  * The latter bytes can be put back.
4081  * It can be observed by
4082  * Encoding::InvalidByteSequenceError#readagain_bytes and
4083  * Encoding::Converter#primitive_errinfo.
4084  *
4085  * ec = Encoding::Converter.new("utf-16le", "iso-8859-1")
4086  * src = "\x00\xd8\x61\x00"
4087  * dst = ""
4088  * p ec.primitive_convert(src, dst) #=> :invalid_byte_sequence
4089  * p ec.primitive_errinfo #=> [:invalid_byte_sequence, "UTF-16LE", "UTF-8", "\x00\xD8", "a\x00"]
4090  * p ec.putback #=> "a\x00"
4091  * p ec.putback #=> "" # no more bytes to put back
4092  *
4093  */
4094 static VALUE
4095 econv_putback(int argc, VALUE *argv, VALUE self)
4096 {
4097  rb_econv_t *ec = check_econv(self);
4098  int n;
4099  int putbackable;
4100  VALUE str, max;
4101 
4102  if (!rb_check_arity(argc, 0, 1) || NIL_P(max = argv[0])) {
4104  }
4105  else {
4106  n = NUM2INT(max);
4107  putbackable = rb_econv_putbackable(ec);
4108  if (putbackable < n)
4109  n = putbackable;
4110  }
4111 
4112  str = rb_str_new(NULL, n);
4113  rb_econv_putback(ec, (unsigned char *)RSTRING_PTR(str), n);
4114 
4115  if (ec->source_encoding) {
4117  }
4118 
4119  return str;
4120 }
4121 
4122 /*
4123  * call-seq:
4124  * ec.last_error -> exception or nil
4125  *
4126  * Returns an exception object for the last conversion.
4127  * Returns nil if the last conversion did not produce an error.
4128  *
4129  * "error" means that
4130  * Encoding::InvalidByteSequenceError and Encoding::UndefinedConversionError for
4131  * Encoding::Converter#convert and
4132  * :invalid_byte_sequence, :incomplete_input and :undefined_conversion for
4133  * Encoding::Converter#primitive_convert.
4134  *
4135  * ec = Encoding::Converter.new("utf-8", "iso-8859-1")
4136  * p ec.primitive_convert(src="\xf1abcd", dst="") #=> :invalid_byte_sequence
4137  * p ec.last_error #=> #<Encoding::InvalidByteSequenceError: "\xF1" followed by "a" on UTF-8>
4138  * p ec.primitive_convert(src, dst, nil, 1) #=> :destination_buffer_full
4139  * p ec.last_error #=> nil
4140  *
4141  */
4142 static VALUE
4143 econv_last_error(VALUE self)
4144 {
4145  rb_econv_t *ec = check_econv(self);
4146  VALUE exc;
4147 
4148  exc = make_econv_exception(ec);
4149  if (NIL_P(exc))
4150  return Qnil;
4151  return exc;
4152 }
4153 
4154 /*
4155  * call-seq:
4156  * ec.replacement -> string
4157  *
4158  * Returns the replacement string.
4159  *
4160  * ec = Encoding::Converter.new("euc-jp", "us-ascii")
4161  * p ec.replacement #=> "?"
4162  *
4163  * ec = Encoding::Converter.new("euc-jp", "utf-8")
4164  * p ec.replacement #=> "\uFFFD"
4165  */
4166 static VALUE
4167 econv_get_replacement(VALUE self)
4168 {
4169  rb_econv_t *ec = check_econv(self);
4170  int ret;
4171  rb_encoding *enc;
4172 
4173  ret = make_replacement(ec);
4174  if (ret == -1) {
4175  rb_raise(rb_eUndefinedConversionError, "replacement character setup failed");
4176  }
4177 
4178  enc = rb_enc_find(ec->replacement_enc);
4179  return rb_enc_str_new((const char *)ec->replacement_str, (long)ec->replacement_len, enc);
4180 }
4181 
4182 /*
4183  * call-seq:
4184  * ec.replacement = string
4185  *
4186  * Sets the replacement string.
4187  *
4188  * ec = Encoding::Converter.new("utf-8", "us-ascii", :undef => :replace)
4189  * ec.replacement = "<undef>"
4190  * p ec.convert("a \u3042 b") #=> "a <undef> b"
4191  */
4192 static VALUE
4193 econv_set_replacement(VALUE self, VALUE arg)
4194 {
4195  rb_econv_t *ec = check_econv(self);
4196  VALUE string = arg;
4197  int ret;
4198  rb_encoding *enc;
4199 
4200  StringValue(string);
4201  enc = rb_enc_get(string);
4202 
4204  (const unsigned char *)RSTRING_PTR(string),
4205  RSTRING_LEN(string),
4206  rb_enc_name(enc));
4207 
4208  if (ret == -1) {
4209  /* xxx: rb_eInvalidByteSequenceError? */
4210  rb_raise(rb_eUndefinedConversionError, "replacement character setup failed");
4211  }
4212 
4213  return arg;
4214 }
4215 
4216 VALUE
4218 {
4219  return make_econv_exception(ec);
4220 }
4221 
4222 void
4224 {
4225  VALUE exc;
4226 
4227  exc = make_econv_exception(ec);
4228  if (NIL_P(exc))
4229  return;
4230  rb_exc_raise(exc);
4231 }
4232 
4233 /*
4234  * call-seq:
4235  * ecerr.source_encoding_name -> string
4236  *
4237  * Returns the source encoding name as a string.
4238  */
4239 static VALUE
4240 ecerr_source_encoding_name(VALUE self)
4241 {
4242  return rb_attr_get(self, rb_intern("source_encoding_name"));
4243 }
4244 
4245 /*
4246  * call-seq:
4247  * ecerr.source_encoding -> encoding
4248  *
4249  * Returns the source encoding as an encoding object.
4250  *
4251  * Note that the result may not be equal to the source encoding of
4252  * the encoding converter if the conversion has multiple steps.
4253  *
4254  * ec = Encoding::Converter.new("ISO-8859-1", "EUC-JP") # ISO-8859-1 -> UTF-8 -> EUC-JP
4255  * begin
4256  * ec.convert("\xa0") # NO-BREAK SPACE, which is available in UTF-8 but not in EUC-JP.
4257  * rescue Encoding::UndefinedConversionError
4258  * p $!.source_encoding #=> #<Encoding:UTF-8>
4259  * p $!.destination_encoding #=> #<Encoding:EUC-JP>
4260  * p $!.source_encoding_name #=> "UTF-8"
4261  * p $!.destination_encoding_name #=> "EUC-JP"
4262  * end
4263  *
4264  */
4265 static VALUE
4266 ecerr_source_encoding(VALUE self)
4267 {
4268  return rb_attr_get(self, rb_intern("source_encoding"));
4269 }
4270 
4271 /*
4272  * call-seq:
4273  * ecerr.destination_encoding_name -> string
4274  *
4275  * Returns the destination encoding name as a string.
4276  */
4277 static VALUE
4278 ecerr_destination_encoding_name(VALUE self)
4279 {
4280  return rb_attr_get(self, rb_intern("destination_encoding_name"));
4281 }
4282 
4283 /*
4284  * call-seq:
4285  * ecerr.destination_encoding -> string
4286  *
4287  * Returns the destination encoding as an encoding object.
4288  */
4289 static VALUE
4290 ecerr_destination_encoding(VALUE self)
4291 {
4292  return rb_attr_get(self, rb_intern("destination_encoding"));
4293 }
4294 
4295 /*
4296  * call-seq:
4297  * ecerr.error_char -> string
4298  *
4299  * Returns the one-character string which cause Encoding::UndefinedConversionError.
4300  *
4301  * ec = Encoding::Converter.new("ISO-8859-1", "EUC-JP")
4302  * begin
4303  * ec.convert("\xa0")
4304  * rescue Encoding::UndefinedConversionError
4305  * puts $!.error_char.dump #=> "\xC2\xA0"
4306  * p $!.error_char.encoding #=> #<Encoding:UTF-8>
4307  * end
4308  *
4309  */
4310 static VALUE
4311 ecerr_error_char(VALUE self)
4312 {
4313  return rb_attr_get(self, rb_intern("error_char"));
4314 }
4315 
4316 /*
4317  * call-seq:
4318  * ecerr.error_bytes -> string
4319  *
4320  * Returns the discarded bytes when Encoding::InvalidByteSequenceError occurs.
4321  *
4322  * ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
4323  * begin
4324  * ec.convert("abc\xA1\xFFdef")
4325  * rescue Encoding::InvalidByteSequenceError
4326  * p $! #=> #<Encoding::InvalidByteSequenceError: "\xA1" followed by "\xFF" on EUC-JP>
4327  * puts $!.error_bytes.dump #=> "\xA1"
4328  * puts $!.readagain_bytes.dump #=> "\xFF"
4329  * end
4330  */
4331 static VALUE
4332 ecerr_error_bytes(VALUE self)
4333 {
4334  return rb_attr_get(self, rb_intern("error_bytes"));
4335 }
4336 
4337 /*
4338  * call-seq:
4339  * ecerr.readagain_bytes -> string
4340  *
4341  * Returns the bytes to be read again when Encoding::InvalidByteSequenceError occurs.
4342  */
4343 static VALUE
4344 ecerr_readagain_bytes(VALUE self)
4345 {
4346  return rb_attr_get(self, rb_intern("readagain_bytes"));
4347 }
4348 
4349 /*
4350  * call-seq:
4351  * ecerr.incomplete_input? -> true or false
4352  *
4353  * Returns true if the invalid byte sequence error is caused by
4354  * premature end of string.
4355  *
4356  * ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
4357  *
4358  * begin
4359  * ec.convert("abc\xA1z")
4360  * rescue Encoding::InvalidByteSequenceError
4361  * p $! #=> #<Encoding::InvalidByteSequenceError: "\xA1" followed by "z" on EUC-JP>
4362  * p $!.incomplete_input? #=> false
4363  * end
4364  *
4365  * begin
4366  * ec.convert("abc\xA1")
4367  * ec.finish
4368  * rescue Encoding::InvalidByteSequenceError
4369  * p $! #=> #<Encoding::InvalidByteSequenceError: incomplete "\xA1" on EUC-JP>
4370  * p $!.incomplete_input? #=> true
4371  * end
4372  */
4373 static VALUE
4374 ecerr_incomplete_input(VALUE self)
4375 {
4376  return rb_attr_get(self, rb_intern("incomplete_input"));
4377 }
4378 
4379 /*
4380  * Document-class: Encoding::UndefinedConversionError
4381  *
4382  * Raised by Encoding and String methods when a transcoding operation
4383  * fails.
4384  */
4385 
4386 /*
4387  * Document-class: Encoding::InvalidByteSequenceError
4388  *
4389  * Raised by Encoding and String methods when the string being
4390  * transcoded contains a byte invalid for the either the source or
4391  * target encoding.
4392  */
4393 
4394 /*
4395  * Document-class: Encoding::ConverterNotFoundError
4396  *
4397  * Raised by transcoding methods when a named encoding does not
4398  * correspond with a known converter.
4399  */
4400 
4401 #undef rb_intern
4402 void
4404 {
4405  transcoder_table = st_init_strcasetable();
4406 
4407  sym_invalid = ID2SYM(rb_intern("invalid"));
4408  sym_undef = ID2SYM(rb_intern("undef"));
4409  sym_replace = ID2SYM(rb_intern("replace"));
4410  sym_fallback = ID2SYM(rb_intern("fallback"));
4411  sym_aref = ID2SYM(rb_intern("[]"));
4412  sym_xml = ID2SYM(rb_intern("xml"));
4413  sym_text = ID2SYM(rb_intern("text"));
4414  sym_attr = ID2SYM(rb_intern("attr"));
4415 
4416  sym_invalid_byte_sequence = ID2SYM(rb_intern("invalid_byte_sequence"));
4417  sym_undefined_conversion = ID2SYM(rb_intern("undefined_conversion"));
4418  sym_destination_buffer_full = ID2SYM(rb_intern("destination_buffer_full"));
4419  sym_source_buffer_empty = ID2SYM(rb_intern("source_buffer_empty"));
4420  sym_finished = ID2SYM(rb_intern("finished"));
4421  sym_after_output = ID2SYM(rb_intern("after_output"));
4422  sym_incomplete_input = ID2SYM(rb_intern("incomplete_input"));
4423  sym_universal_newline = ID2SYM(rb_intern("universal_newline"));
4424  sym_crlf_newline = ID2SYM(rb_intern("crlf_newline"));
4425  sym_cr_newline = ID2SYM(rb_intern("cr_newline"));
4426  sym_partial_input = ID2SYM(rb_intern("partial_input"));
4427 
4428 #ifdef ENABLE_ECONV_NEWLINE_OPTION
4429  sym_newline = ID2SYM(rb_intern("newline"));
4430  sym_universal = ID2SYM(rb_intern("universal"));
4431  sym_crlf = ID2SYM(rb_intern("crlf"));
4432  sym_cr = ID2SYM(rb_intern("cr"));
4433  sym_lf = ID2SYM(rb_intern("lf"));
4434 #endif
4435 
4436  InitVM(transcode);
4437 }
4438 
4439 void
4441 {
4442  rb_eUndefinedConversionError = rb_define_class_under(rb_cEncoding, "UndefinedConversionError", rb_eEncodingError);
4443  rb_eInvalidByteSequenceError = rb_define_class_under(rb_cEncoding, "InvalidByteSequenceError", rb_eEncodingError);
4444  rb_eConverterNotFoundError = rb_define_class_under(rb_cEncoding, "ConverterNotFoundError", rb_eEncodingError);
4445 
4446  rb_define_method(rb_cString, "encode", str_encode, -1);
4447  rb_define_method(rb_cString, "encode!", str_encode_bang, -1);
4448 
4450  rb_define_alloc_func(rb_cEncodingConverter, econv_s_allocate);
4451  rb_define_singleton_method(rb_cEncodingConverter, "asciicompat_encoding", econv_s_asciicompat_encoding, 1);
4452  rb_define_singleton_method(rb_cEncodingConverter, "search_convpath", econv_s_search_convpath, -1);
4453  rb_define_method(rb_cEncodingConverter, "initialize", econv_init, -1);
4454  rb_define_method(rb_cEncodingConverter, "inspect", econv_inspect, 0);
4455  rb_define_method(rb_cEncodingConverter, "convpath", econv_convpath, 0);
4456  rb_define_method(rb_cEncodingConverter, "source_encoding", econv_source_encoding, 0);
4457  rb_define_method(rb_cEncodingConverter, "destination_encoding", econv_destination_encoding, 0);
4458  rb_define_method(rb_cEncodingConverter, "primitive_convert", econv_primitive_convert, -1);
4459  rb_define_method(rb_cEncodingConverter, "convert", econv_convert, 1);
4460  rb_define_method(rb_cEncodingConverter, "finish", econv_finish, 0);
4461  rb_define_method(rb_cEncodingConverter, "primitive_errinfo", econv_primitive_errinfo, 0);
4462  rb_define_method(rb_cEncodingConverter, "insert_output", econv_insert_output, 1);
4463  rb_define_method(rb_cEncodingConverter, "putback", econv_putback, -1);
4464  rb_define_method(rb_cEncodingConverter, "last_error", econv_last_error, 0);
4465  rb_define_method(rb_cEncodingConverter, "replacement", econv_get_replacement, 0);
4466  rb_define_method(rb_cEncodingConverter, "replacement=", econv_set_replacement, 1);
4467  rb_define_method(rb_cEncodingConverter, "==", econv_equal, 1);
4468 
4469  /* Document-const: INVALID_MASK
4470  *
4471  * Mask for invalid byte sequences
4472  */
4474 
4475  /* Document-const: INVALID_REPLACE
4476  *
4477  * Replace invalid byte sequences
4478  */
4480 
4481  /* Document-const: UNDEF_MASK
4482  *
4483  * Mask for a valid character in the source encoding but no related
4484  * character(s) in destination encoding.
4485  */
4487 
4488  /* Document-const: UNDEF_REPLACE
4489  *
4490  * Replace byte sequences that are undefined in the destination encoding.
4491  */
4493 
4494  /* Document-const: UNDEF_HEX_CHARREF
4495  *
4496  * Replace byte sequences that are undefined in the destination encoding
4497  * with an XML hexadecimal character reference. This is valid for XML
4498  * conversion.
4499  */
4501 
4502  /* Document-const: PARTIAL_INPUT
4503  *
4504  * Indicates the source may be part of a larger string. See
4505  * primitive_convert for an example.
4506  */
4508 
4509  /* Document-const: AFTER_OUTPUT
4510  *
4511  * Stop converting after some output is complete but before all of the
4512  * input was consumed. See primitive_convert for an example.
4513  */
4515 
4516  /* Document-const: UNIVERSAL_NEWLINE_DECORATOR
4517  *
4518  * Decorator for converting CRLF and CR to LF
4519  */
4521 
4522  /* Document-const: CRLF_NEWLINE_DECORATOR
4523  *
4524  * Decorator for converting LF to CRLF
4525  */
4527 
4528  /* Document-const: CR_NEWLINE_DECORATOR
4529  *
4530  * Decorator for converting LF to CR
4531  */
4533 
4534  /* Document-const: XML_TEXT_DECORATOR
4535  *
4536  * Escape as XML CharData
4537  */
4539 
4540  /* Document-const: XML_ATTR_CONTENT_DECORATOR
4541  *
4542  * Escape as XML AttValue
4543  */
4545 
4546  /* Document-const: XML_ATTR_QUOTE_DECORATOR
4547  *
4548  * Escape as XML AttValue
4549  */
4551 
4552  rb_define_method(rb_eUndefinedConversionError, "source_encoding_name", ecerr_source_encoding_name, 0);
4553  rb_define_method(rb_eUndefinedConversionError, "destination_encoding_name", ecerr_destination_encoding_name, 0);
4554  rb_define_method(rb_eUndefinedConversionError, "source_encoding", ecerr_source_encoding, 0);
4555  rb_define_method(rb_eUndefinedConversionError, "destination_encoding", ecerr_destination_encoding, 0);
4556  rb_define_method(rb_eUndefinedConversionError, "error_char", ecerr_error_char, 0);
4557 
4558  rb_define_method(rb_eInvalidByteSequenceError, "source_encoding_name", ecerr_source_encoding_name, 0);
4559  rb_define_method(rb_eInvalidByteSequenceError, "destination_encoding_name", ecerr_destination_encoding_name, 0);
4560  rb_define_method(rb_eInvalidByteSequenceError, "source_encoding", ecerr_source_encoding, 0);
4561  rb_define_method(rb_eInvalidByteSequenceError, "destination_encoding", ecerr_destination_encoding, 0);
4562  rb_define_method(rb_eInvalidByteSequenceError, "error_bytes", ecerr_error_bytes, 0);
4563  rb_define_method(rb_eInvalidByteSequenceError, "readagain_bytes", ecerr_readagain_bytes, 0);
4564  rb_define_method(rb_eInvalidByteSequenceError, "incomplete_input?", ecerr_incomplete_input, 0);
4565 
4566  Init_newline();
4567 }
rb_econv_putback
void rb_econv_putback(rb_econv_t *ec, unsigned char *p, int n)
Definition: transcode.c:1735
econv_source_buffer_empty
@ econv_source_buffer_empty
Definition: encoding.h:301
TRANSCODING_WRITEBUF_SIZE
#define TRANSCODING_WRITEBUF_SIZE(tc)
Definition: transcode.c:91
TRUE
#define TRUE
Definition: nkf.h:175
FOURbt
#define FOURbt
Definition: transcode_data.h:31
rb_transcoding::writebuf_len
ssize_t writebuf_len
Definition: transcode.c:71
rb_econv_init_by_convpath_t
Definition: transcode.c:3174
rb_method_call
VALUE rb_method_call(int, const VALUE *, VALUE)
Definition: proc.c:2261
rb_assoc_new
VALUE rb_assoc_new(VALUE car, VALUE cdr)
Definition: array.c:896
rb_econv_elem_t::out_data_end
unsigned char * out_data_end
Definition: transcode.c:105
rb_str_new2
#define rb_str_new2
Definition: intern.h:903
rb_enc_name
#define rb_enc_name(enc)
Definition: encoding.h:177
rb_enc_mbc_to_codepoint
#define rb_enc_mbc_to_codepoint(p, e, enc)
Definition: encoding.h:208
rb_cData
RUBY_EXTERN VALUE rb_cData
Definition: ruby.h:2018
rb_econv_t::replacement_allocated
int replacement_allocated
Definition: transcode.c:126
rb_transcoding::rb_transcoding_state_t
Definition: transcode.c:77
LONG_MAX
#define LONG_MAX
Definition: ruby.h:220
BL_ACTION
#define BL_ACTION(byte)
rb_exc_new_str
VALUE rb_exc_new_str(VALUE etype, VALUE str)
Definition: error.c:972
rb_econv_t::num_finished
int num_finished
Definition: transcode.c:129
st_table::num_entries
st_index_t num_entries
Definition: st.h:86
rb_exc_new3
#define rb_exc_new3
Definition: intern.h:293
rb_hash_new
VALUE rb_hash_new(void)
Definition: hash.c:1501
rb_econv_open
rb_econv_t * rb_econv_open(const char *sname, const char *dname, int ecflags)
Definition: transcode.c:1051
path
VALUE path
Definition: rb_mjit_min_header-2.7.0.h:7351
rb_transcoding::writebuf_off
ssize_t writebuf_off
Definition: transcode.c:70
ENC_CODERANGE_VALID
#define ENC_CODERANGE_VALID
Definition: encoding.h:105
rb_str_buf_new
VALUE rb_str_buf_new(long)
Definition: string.c:1315
memset
void * memset(void *, int, size_t)
size_t
long unsigned int size_t
Definition: rb_mjit_min_header-2.7.0.h:666
ST_STOP
@ ST_STOP
Definition: st.h:99
rb_scan_args
#define rb_scan_args(argc, argvp, fmt,...)
Definition: rb_mjit_min_header-2.7.0.h:6372
ECONV_XML_ATTR_CONTENT_DECORATOR
#define ECONV_XML_ATTR_CONTENT_DECORATOR
Definition: encoding.h:406
InitVM_transcode
void InitVM_transcode(void)
Definition: transcode.c:4440
INT2FIX
#define INT2FIX(i)
Definition: ruby.h:263
transcoder_entry_t::dname
const char * dname
Definition: transcode.c:157
snprintf
int snprintf(char *__restrict, size_t, const char *__restrict,...) __attribute__((__format__(__printf__
bp
#define bp()
Definition: internal.h:1445
ECONV_CRLF_NEWLINE_DECORATOR
#define ECONV_CRLF_NEWLINE_DECORATOR
Definition: encoding.h:403
RSTRING_PTR
#define RSTRING_PTR(str)
Definition: ruby.h:1009
SIZE_MAX
#define SIZE_MAX
Definition: ruby.h:307
NUM2LONG
#define NUM2LONG(x)
Definition: ruby.h:679
rb_attr_get
VALUE rb_attr_get(VALUE, ID)
Definition: variable.c:1084
tr
Definition: string.c:6989
rb_hash_aref
VALUE rb_hash_aref(VALUE hash, VALUE key)
Definition: hash.c:1964
rb_econv_str_append
VALUE rb_econv_str_append(rb_econv_t *ec, VALUE src, VALUE dst, int flags)
Definition: transcode.c:1847
VALUE
unsigned long VALUE
Definition: ruby.h:102
BL_MIN_BYTE
#define BL_MIN_BYTE
rb_obj_encoding
VALUE rb_obj_encoding(VALUE obj)
Definition: encoding.c:1004
rb_eArgError
VALUE rb_eArgError
Definition: error.c:923
encoding.h
RSTRING_EMBED_LEN_MAX
@ RSTRING_EMBED_LEN_MAX
Definition: ruby.h:982
rb_intern
#define rb_intern(str)
rb_ary_store
void rb_ary_store(VALUE ary, long idx, VALUE val)
Definition: array.c:1079
ECONV_UNDEF_HEX_CHARREF
#define ECONV_UNDEF_HEX_CHARREF
Definition: encoding.h:397
rb_cEncodingConverter
VALUE rb_cEncodingConverter
Definition: transcode.c:24
search_path_queue_tag::enc
const char * enc
Definition: transcode.c:244
rb_econv_make_exception
VALUE rb_econv_make_exception(rb_econv_t *ec)
Definition: transcode.c:4217
RB_TYPE_P
#define RB_TYPE_P(obj, type)
Definition: ruby.h:560
rb_transcoding::rb_transcoding_state_t::dummy_for_alignment
double dummy_for_alignment
Definition: transcode.c:80
rb_enc_get
rb_encoding * rb_enc_get(VALUE obj)
Definition: encoding.c:872
st_add_direct
void st_add_direct(st_table *tab, st_data_t key, st_data_t value)
Definition: st.c:1251
rb_enc_asciicompat
#define rb_enc_asciicompat(enc)
Definition: encoding.h:245
ECONV_UNDEF_REPLACE
#define ECONV_UNDEF_REPLACE
Definition: encoding.h:396
rb_enc_precise_mbclen
int rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc)
Definition: encoding.c:1032
rb_econv_t::started
int started
Definition: transcode.c:112
ECONV_CR_NEWLINE_DECORATOR
#define ECONV_CR_NEWLINE_DECORATOR
Definition: encoding.h:404
rb_transcoder::src_encoding
const char * src_encoding
Definition: transcode_data.h:99
int
__inline__ int
Definition: rb_mjit_min_header-2.7.0.h:2839
search_path_queue_tag::next
struct search_path_queue_tag * next
Definition: transcode.c:243
getBT3
#define getBT3(a)
Definition: transcode_data.h:73
rb_econv_t::replacement_len
size_t replacement_len
Definition: transcode.c:118
getBT2
#define getBT2(a)
Definition: transcode_data.h:72
getGB4bt0
#define getGB4bt0(a)
Definition: transcode_data.h:76
rb_transcoding::readbuf
union rb_transcoding::@166 readbuf
rb_declare_transcoder
void rb_declare_transcoder(const char *enc1, const char *enc2, const char *lib)
Definition: transcode.c:232
rb_econv_t::source_encoding_name
const char * source_encoding_name
Definition: transcode.c:114
StringValue
use StringValue() instead")))
SUSPEND_OBUF
#define SUSPEND_OBUF(num)
rb_str_dup
VALUE rb_str_dup(VALUE)
Definition: string.c:1516
rb_str_cat2
#define rb_str_cat2
Definition: intern.h:912
Qundef
#define Qundef
Definition: ruby.h:470
asciicompat_decoder
@ asciicompat_decoder
Definition: transcode_data.h:90
rb_define_singleton_method
void rb_define_singleton_method(VALUE obj, const char *name, VALUE(*func)(ANYARGS), int argc)
Defines a singleton method for obj.
Definition: class.c:1755
rb_econv_t
Definition: transcode.c:110
rb_str_modify
void rb_str_modify(VALUE)
Definition: string.c:2114
econv_after_output
@ econv_after_output
Definition: encoding.h:303
rb_define_method
void rb_define_method(VALUE klass, const char *name, VALUE(*func)(ANYARGS), int argc)
Definition: class.c:1551
ENC_CODERANGE_SET
#define ENC_CODERANGE_SET(obj, cr)
Definition: encoding.h:110
INT2NUM
#define INT2NUM(x)
Definition: ruby.h:1609
ptr
struct RIMemo * ptr
Definition: debug.c:74
rb_enc_default_internal
VALUE rb_enc_default_internal(void)
Definition: encoding.c:1521
STR1_LENGTH
#define STR1_LENGTH(byte_addr)
Definition: transcode_data.h:43
Qfalse
#define Qfalse
Definition: ruby.h:467
transcoder_entry_t
Definition: transcode.c:155
rb_transcoding::rb_transcoding_state_t::ptr
void * ptr
Definition: transcode.c:78
STR1_BYTEINDEX
#define STR1_BYTEINDEX(w)
Definition: transcode_data.h:44
ssize_t
_ssize_t ssize_t
Definition: rb_mjit_min_header-2.7.0.h:1329
trans_open_t::entries
transcoder_entry_t ** entries
Definition: transcode.c:954
dp
#define dp(v)
Definition: vm_debug.h:21
writebuf_off
#define writebuf_off
INVALID
#define INVALID
Definition: transcode_data.h:32
rb_transcoding::rb_transcoding_state_t::ary
char ary[sizeof(double) > sizeof(void *) ? sizeof(double) :sizeof(void *)]
Definition: transcode.c:79
ONEbt
#define ONEbt
Definition: transcode_data.h:28
NULL
#define NULL
Definition: _sdbm.c:101
rb_transcoding::readagain_len
ssize_t readagain_len
Definition: transcode.c:64
rb_str_encode
VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts)
Definition: transcode.c:2869
rb_econv_t::result
rb_econv_result_t result
Definition: transcode.c:134
PRIsVALUE
#define PRIsVALUE
Definition: ruby.h:166
RBASIC_SET_CLASS
#define RBASIC_SET_CLASS(obj, cls)
Definition: internal.h:1983
rb_enc_from_encoding
VALUE rb_enc_from_encoding(rb_encoding *encoding)
Definition: encoding.c:116
rb_econv_prepare_opts
int rb_econv_prepare_opts(VALUE opthash, VALUE *opts)
Definition: transcode.c:2554
ID2SYM
#define ID2SYM(x)
Definition: ruby.h:414
strlen
size_t strlen(const char *)
OBJ_FREEZE
#define OBJ_FREEZE(x)
Definition: ruby.h:1377
rb_econv_elem_t
Definition: transcode.c:101
T_SYMBOL
#define T_SYMBOL
Definition: ruby.h:540
getGB4bt1
#define getGB4bt1(a)
Definition: transcode_data.h:77
FUNso
#define FUNso
Definition: transcode_data.h:38
rb_econv_t::last_tc
struct rb_transcoding * last_tc
Definition: transcode.c:130
rb_eEncodingError
VALUE rb_eEncodingError
Definition: error.c:928
rb_respond_to
int rb_respond_to(VALUE, ID)
Definition: vm_method.c:2190
rb_transcoding::resume_position
int resume_position
Definition: transcode.c:57
rb_check_arity
#define rb_check_arity
Definition: intern.h:347
rb_econv_init_by_convpath_t::ec
rb_econv_t * ec
Definition: transcode.c:3175
InitVM
#define InitVM(ext)
Definition: ruby.h:2329
RARRAY_LENINT
#define RARRAY_LENINT(ary)
Definition: ruby.h:1071
v
int VALUE v
Definition: rb_mjit_min_header-2.7.0.h:12332
rb_str_capacity
size_t rb_str_capacity(VALUE str)
Definition: string.c:712
ALLOC_N
#define ALLOC_N(type, n)
Definition: ruby.h:1663
rb_econv_asciicompat_encoding
const char * rb_econv_asciicompat_encoding(const char *ascii_incompat_name)
Definition: transcode.c:1768
rb_str_resize
VALUE rb_str_resize(VALUE, long)
Definition: string.c:2709
DECORATOR_P
#define DECORATOR_P(sname, dname)
Definition: transcode.c:153
cc
const struct rb_call_cache * cc
Definition: rb_mjit_min_header-2.7.0.h:13228
rb_econv_close
void rb_econv_close(rb_econv_t *ec)
Definition: transcode.c:1684
rb_require_string
VALUE rb_require_string(VALUE)
Definition: load.c:1102
rb_raise
void rb_raise(VALUE exc, const char *fmt,...)
Definition: error.c:2669
transcoder_entry_t::lib
const char * lib
Definition: transcode.c:158
rb_ary_entry
VALUE rb_ary_entry(VALUE ary, long offset)
Definition: array.c:1512
rb_econv_convert
rb_econv_result_t rb_econv_convert(rb_econv_t *ec, const unsigned char **input_ptr, const unsigned char *input_stop, unsigned char **output_ptr, unsigned char *output_stop, int flags)
Definition: transcode.c:1428
rb_econv_t::replacement_str
const unsigned char * replacement_str
Definition: transcode.c:117
rb_econv_t::error_tc
struct rb_transcoding * error_tc
Definition: transcode.c:135
search_path_bfs_t::base_enc
const char * base_enc
Definition: transcode.c:251
rb_obj_class
VALUE rb_obj_class(VALUE)
Equivalent to Object#class in Ruby.
Definition: object.c:217
writebuf_len
#define writebuf_len
rb_obj_is_proc
VALUE rb_obj_is_proc(VALUE)
Definition: proc.c:152
rb_enc_get_index
int rb_enc_get_index(VALUE obj)
Definition: encoding.c:779
memcpy
void * memcpy(void *__restrict, const void *__restrict, size_t)
rb_str_dump
VALUE rb_str_dump(VALUE)
Definition: string.c:6042
double
double
Definition: rb_mjit_min_header-2.7.0.h:5923
rb_str_drop_bytes
VALUE rb_str_drop_bytes(VALUE, long)
Definition: string.c:4573
DATA_PTR
#define DATA_PTR(dta)
Definition: ruby.h:1175
MBCLEN_CHARFOUND_LEN
#define MBCLEN_CHARFOUND_LEN(ret)
Definition: encoding.h:192
rb_econv_t::last_error
struct rb_econv_t::@168 last_error
rb_encoding
const typedef OnigEncodingType rb_encoding
Definition: encoding.h:115
rb_transcoding::next_byte
unsigned char next_byte
Definition: transcode.c:60
rb_check_frozen
#define rb_check_frozen(obj)
Definition: intern.h:319
FUNio
#define FUNio
Definition: transcode_data.h:37
rb_transcoder::max_output
int max_output
Definition: transcode_data.h:109
rb_transcoder
Definition: transcode_data.h:98
rb_enc_from_index
rb_encoding * rb_enc_from_index(int index)
Definition: encoding.c:609
getGB4bt3
#define getGB4bt3(a)
Definition: transcode_data.h:79
search_path_queue_tag
Definition: transcode.c:242
rb_econv_putbackable
int rb_econv_putbackable(rb_econv_t *ec)
Definition: transcode.c:1724
rb_define_dummy_encoding
int rb_define_dummy_encoding(const char *name)
Definition: encoding.c:462
FUNsi
#define FUNsi
Definition: transcode_data.h:36
rb_econv_elem_t::tc
struct rb_transcoding * tc
Definition: transcode.c:102
rb_econv_prepare_options
int rb_econv_prepare_options(VALUE opthash, VALUE *opts, int ecflags)
Definition: transcode.c:2509
asciicompat_encoding_t::ascii_compat_name
const char * ascii_compat_name
Definition: transcode.c:1746
i
uint32_t i
Definition: rb_mjit_min_header-2.7.0.h:5464
ECONV_XML_TEXT_DECORATOR
#define ECONV_XML_TEXT_DECORATOR
Definition: encoding.h:405
st_data_t
RUBY_SYMBOL_EXPORT_BEGIN typedef unsigned long st_data_t
Definition: st.h:22
rb_econv_has_convpath_p
int rb_econv_has_convpath_p(const char *from_encoding, const char *to_encoding)
Definition: transcode.c:3166
INT_MAX
#define INT_MAX
Definition: rb_mjit_min_header-2.7.0.h:4052
trans_open_t
Definition: transcode.c:953
long
#define long
Definition: rb_mjit_min_header-2.7.0.h:2880
ECONV_UNIVERSAL_NEWLINE_DECORATOR
#define ECONV_UNIVERSAL_NEWLINE_DECORATOR
Definition: encoding.h:402
search_path_queue_t
struct search_path_queue_tag search_path_queue_t
rb_econv_t::error_bytes_len
size_t error_bytes_len
Definition: transcode.c:139
econv_incomplete_input
@ econv_incomplete_input
Definition: encoding.h:304
asciicompat_encoding_t
Definition: transcode.c:1745
rb_cEncoding
VALUE rb_cEncoding
Definition: encoding.c:46
rb_ary_push
VALUE rb_ary_push(VALUE ary, VALUE item)
Definition: array.c:1195
rb_econv_substr_convert
VALUE rb_econv_substr_convert(rb_econv_t *ec, VALUE src, long byteoff, long bytesize, int flags)
Definition: transcode.c:1853
TWObt
#define TWObt
Definition: transcode_data.h:29
rb_enc_str_scrub
VALUE rb_enc_str_scrub(rb_encoding *enc, VALUE str, VALUE repl)
Definition: string.c:10255
rb_str_shared_replace
void rb_str_shared_replace(VALUE, VALUE)
Definition: string.c:1391
rb_econv_t::in_buf_end
unsigned char * in_buf_end
Definition: transcode.c:124
FUNsio
#define FUNsio
Definition: transcode_data.h:41
TypedData_Wrap_Struct
#define TypedData_Wrap_Struct(klass, data_type, sval)
Definition: ruby.h:1231
ECONV_INVALID_MASK
#define ECONV_INVALID_MASK
Definition: encoding.h:393
econv_invalid_byte_sequence
@ econv_invalid_byte_sequence
Definition: encoding.h:298
ECONV_XML_ATTR_QUOTE_DECORATOR
#define ECONV_XML_ATTR_QUOTE_DECORATOR
Definition: encoding.h:408
rb_econv_open_opts
rb_econv_t * rb_econv_open_opts(const char *source_encoding, const char *destination_encoding, int ecflags, VALUE opthash)
Definition: transcode.c:2560
rb_eTypeError
VALUE rb_eTypeError
Definition: error.c:922
rb_transcoding::output_index
unsigned int output_index
Definition: transcode.c:61
rb_econv_t::destination_encoding_name
const char * destination_encoding_name
Definition: transcode.c:115
ALLOC
#define ALLOC(type)
Definition: ruby.h:1664
entries
struct iseq_catch_table_entry entries[]
Definition: rb_mjit_min_header-2.7.0.h:10827
rb_eRuntimeError
VALUE rb_eRuntimeError
Definition: error.c:920
SUSPEND_AFTER_OUTPUT
#define SUSPEND_AFTER_OUTPUT(num)
input
unsigned int input
Definition: nkf.c:4325
rb_econv_elem_t::last_result
rb_econv_result_t last_result
Definition: transcode.c:107
st_init_strcasetable
st_table * st_init_strcasetable(void)
Definition: st.c:683
ALLOCA_N
#define ALLOCA_N(type, n)
Definition: ruby.h:1684
ECONV_AFTER_OUTPUT
#define ECONV_AFTER_OUTPUT
Definition: encoding.h:416
rb_econv_t::error_bytes_start
const unsigned char * error_bytes_start
Definition: transcode.c:138
RARRAY_AREF
#define RARRAY_AREF(a, i)
Definition: ruby.h:1101
size
int size
Definition: encoding.c:58
rb_str_set_len
void rb_str_set_len(VALUE, long)
Definition: string.c:2692
rb_econv_t::num_trans
int num_trans
Definition: transcode.c:128
FALSE
#define FALSE
Definition: nkf.h:174
ECONV_NEWLINE_DECORATOR_MASK
#define ECONV_NEWLINE_DECORATOR_MASK
Definition: encoding.h:399
rb_to_int
VALUE rb_to_int(VALUE)
Converts val into Integer.
Definition: object.c:3021
rb_econv_t::in_buf_start
unsigned char * in_buf_start
Definition: transcode.c:121
rb_econv_append
VALUE rb_econv_append(rb_econv_t *ec, const char *ss, long len, VALUE dst, int flags)
Definition: transcode.c:1795
rb_econv_open_exc
VALUE rb_econv_open_exc(const char *sname, const char *dname, int ecflags)
Definition: transcode.c:2018
rb_str_new_frozen
VALUE rb_str_new_frozen(VALUE)
Definition: string.c:1203
memcmp
int memcmp(const void *s1, const void *s2, size_t len)
Definition: memcmp.c:7
rb_str_new_cstr
#define rb_str_new_cstr(str)
Definition: rb_mjit_min_header-2.7.0.h:6117
rb_register_transcoder
void rb_register_transcoder(const rb_transcoder *tr)
Definition: transcode.c:204
Init_newline
void Init_newline(void)
rb_error_arity
MJIT_STATIC void rb_error_arity(int argc, int min, int max)
Definition: vm_insnhelper.c:387
ECONV_ERROR_HANDLER_MASK
#define ECONV_ERROR_HANDLER_MASK
Definition: encoding.h:392
getBT1
#define getBT1(a)
Definition: transcode_data.h:71
MAX_ECFLAGS_DECORATORS
#define MAX_ECFLAGS_DECORATORS
Definition: transcode.c:1010
rb_econv_elem_t::out_data_start
unsigned char * out_data_start
Definition: transcode.c:104
StringValueCStr
#define StringValueCStr(v)
Definition: ruby.h:604
ENC_CODERANGE_BROKEN
#define ENC_CODERANGE_BROKEN
Definition: encoding.h:106
rb_check_array_type
VALUE rb_check_array_type(VALUE ary)
Definition: array.c:909
rb_econv_decorate_at_last
int rb_econv_decorate_at_last(rb_econv_t *ec, const char *decorator_name)
Definition: transcode.c:1907
key
key
Definition: openssl_missing.h:181
T_HASH
#define T_HASH
Definition: ruby.h:531
rb_econv_t::destination_encoding
rb_encoding * destination_encoding
Definition: transcode.c:146
TRANSCODING_WRITEBUF
#define TRANSCODING_WRITEBUF(tc)
Definition: transcode.c:87
rb_to_encoding_index
int rb_to_encoding_index(VALUE enc)
Definition: encoding.c:197
rb_transcoding
Definition: transcode.c:52
rb_econv_binmode
void rb_econv_binmode(rb_econv_t *ec)
Definition: transcode.c:1924
rb_typeddata_is_kind_of
int rb_typeddata_is_kind_of(VALUE obj, const rb_data_type_t *data_type)
Definition: error.c:872
next_byte
#define next_byte
src
__inline__ const void *__restrict src
Definition: rb_mjit_min_header-2.7.0.h:2836
ECONV_UNDEF_MASK
#define ECONV_UNDEF_MASK
Definition: encoding.h:395
strcmp
int strcmp(const char *, const char *)
MBCLEN_CHARFOUND_P
#define MBCLEN_CHARFOUND_P(ret)
Definition: encoding.h:191
THREEbt
#define THREEbt
Definition: transcode_data.h:30
RARRAY_LEN
#define RARRAY_LEN(a)
Definition: ruby.h:1070
st_foreach
int st_foreach(st_table *tab, st_foreach_callback_func *func, st_data_t arg)
Definition: st.c:1718
getBT0
#define getBT0(a)
Definition: transcode_data.h:74
char
#define char
Definition: rb_mjit_min_header-2.7.0.h:2876
rb_ary_new4
#define rb_ary_new4
Definition: intern.h:105
rb_check_hash_type
VALUE rb_check_hash_type(VALUE hash)
Definition: hash.c:1825
rb_ary_new2
#define rb_ary_new2
Definition: intern.h:103
buf
unsigned char buf[MIME_BUF_SIZE]
Definition: nkf.c:4322
n
const char size_t n
Definition: rb_mjit_min_header-2.7.0.h:5456
rb_exc_raise
void rb_exc_raise(VALUE mesg)
Raises an exception in the current thread.
Definition: eval.c:667
TypedData_Get_Struct
#define TypedData_Get_Struct(obj, type, data_type, sval)
Definition: ruby.h:1252
rb_enc_str_coderange
int rb_enc_str_coderange(VALUE)
Definition: string.c:657
rb_bug
void rb_bug(const char *fmt,...)
Definition: error.c:634
internal.h
rb_to_encoding
rb_encoding * rb_to_encoding(VALUE enc)
Definition: encoding.c:245
T_ARRAY
#define T_ARRAY
Definition: ruby.h:530
arg
VALUE arg
Definition: rb_mjit_min_header-2.7.0.h:5601
rb_econv_t::in_data_end
unsigned char * in_data_end
Definition: transcode.c:123
argv
char ** argv
Definition: ruby.c:223
f
#define f
rb_econv_t::source_encoding
rb_encoding * source_encoding
Definition: transcode.c:145
next_table
#define next_table
ST_CONTINUE
@ ST_CONTINUE
Definition: st.h:99
PRIdPTRDIFF
#define PRIdPTRDIFF
Definition: ruby.h:190
rb_econv_t::replacement_enc
const char * replacement_enc
Definition: transcode.c:119
BYTE_ADDR
#define BYTE_ADDR(index)
rb_econv_substr_append
VALUE rb_econv_substr_append(rb_econv_t *ec, VALUE src, long off, long len, VALUE dst, int flags)
Definition: transcode.c:1838
xmalloc
#define xmalloc
Definition: defines.h:211
xrealloc
#define xrealloc
Definition: defines.h:214
rb_transcoding::recognized_len
ssize_t recognized_len
Definition: transcode.c:63
rb_sprintf
VALUE rb_sprintf(const char *format,...)
Definition: sprintf.c:1197
rb_enc_find
rb_encoding * rb_enc_find(const char *name)
Definition: encoding.c:728
klass
VALUE klass
Definition: rb_mjit_min_header-2.7.0.h:13254
st_data_t
unsigned long st_data_t
Definition: rb_mjit_min_header-2.7.0.h:5363
rb_utf8_encoding
rb_encoding * rb_utf8_encoding(void)
Definition: encoding.c:1328
str
char str[HTML_ESCAPE_MAX_LEN+1]
Definition: escape.c:18
next_info
#define next_info
getGB4bt2
#define getGB4bt2(a)
Definition: transcode_data.h:78
rb_enc_find_index
int rb_enc_find_index(const char *name)
Definition: encoding.c:693
fallback_func
VALUE(* fallback_func)(VALUE obj, VALUE name)
Definition: variable.c:127
rb_transcoding::ptr
unsigned char * ptr
Definition: transcode.c:67
RUBY_TYPED_FREE_IMMEDIATELY
#define RUBY_TYPED_FREE_IMMEDIATELY
Definition: ruby.h:1207
rb_transcoding::state
union rb_transcoding::rb_transcoding_state_t state
ENC_CODERANGE_7BIT
#define ENC_CODERANGE_7BIT
Definition: encoding.h:104
MEMCPY
#define MEMCPY(p1, p2, type, n)
Definition: ruby.h:1753
rb_transcoding
struct rb_transcoding rb_transcoding
transcode_data.h
econv_finished
@ econv_finished
Definition: encoding.h:302
rb_econv_init_by_convpath_t::index
int index
Definition: transcode.c:3176
rb_hash_aset
VALUE rb_hash_aset(VALUE hash, VALUE key, VALUE val)
Definition: hash.c:2779
rb_cString
RUBY_EXTERN VALUE rb_cString
Definition: ruby.h:2044
rb_econv_t::flags
int flags
Definition: transcode.c:111
NIL_P
#define NIL_P(v)
Definition: ruby.h:482
ZERObt
#define ZERObt
Definition: transcode_data.h:34
fail
#define fail()
Definition: date_strptime.c:123
rb_econv_t::readagain_len
size_t readagain_len
Definition: transcode.c:140
argc
int argc
Definition: ruby.c:222
rb_econv_encoding_to_insert_output
const char * rb_econv_encoding_to_insert_output(rb_econv_t *ec)
Definition: transcode.c:1485
rb_funcall3
#define rb_funcall3
Definition: ruby.h:1896
rb_econv_str_convert
VALUE rb_econv_str_convert(rb_econv_t *ec, VALUE src, int flags)
Definition: transcode.c:1859
rb_obj_classname
const char * rb_obj_classname(VALUE)
Definition: variable.c:289
UNDEF
#define UNDEF
Definition: transcode_data.h:33
econv_undefined_conversion
@ econv_undefined_conversion
Definition: encoding.h:299
rb_econv_t::num_allocated
int num_allocated
Definition: transcode.c:127
REALLOC_N
#define REALLOC_N(var, type, n)
Definition: ruby.h:1667
rb_define_const
void rb_define_const(VALUE, const char *, VALUE)
Definition: variable.c:2880
err
int err
Definition: win32.c:135
rb_econv_t::in_data_start
unsigned char * in_data_start
Definition: transcode.c:122
rb_data_type_struct
Definition: ruby.h:1148
xfree
#define xfree
Definition: defines.h:216
rb_econv_init_by_convpath_t::ret
int ret
Definition: transcode.c:3177
econv_destination_buffer_full
@ econv_destination_buffer_full
Definition: encoding.h:300
search_path_bfs_t::queue
search_path_queue_t * queue
Definition: transcode.c:249
BL_MAX_BYTE
#define BL_MAX_BYTE
rb_str_new
#define rb_str_new(str, len)
Definition: rb_mjit_min_header-2.7.0.h:6116
transcoder_entry_t::transcoder
const rb_transcoder * transcoder
Definition: transcode.c:159
rb_check_typeddata
void * rb_check_typeddata(VALUE obj, const rb_data_type_t *data_type)
Definition: error.c:889
Qtrue
#define Qtrue
Definition: ruby.h:468
rb_str_catf
VALUE rb_str_catf(VALUE str, const char *format,...)
Definition: sprintf.c:1237
rb_obj_is_method
VALUE rb_obj_is_method(VALUE)
Definition: proc.c:1459
rb_econv_decorate_at_first
int rb_econv_decorate_at_first(rb_econv_t *ec, const char *decorator_name)
Definition: transcode.c:1890
SUSPEND
#define SUSPEND(ret, num)
OBJ_FROZEN
#define OBJ_FROZEN(x)
Definition: ruby.h:1375
len
uint8_t len
Definition: escape.c:17
FUNii
#define FUNii
Definition: transcode_data.h:35
SYMBOL_P
#define SYMBOL_P(x)
Definition: ruby.h:413
encoding_equal
#define encoding_equal(enc1, enc2)
Definition: transcode.c:240
rb_econv_elem_t::out_buf_start
unsigned char * out_buf_start
Definition: transcode.c:103
rb_transcoder::asciicompat_type
rb_transcoder_asciicompat_type_t asciicompat_type
Definition: transcode_data.h:110
MEMMOVE
#define MEMMOVE(p1, p2, type, n)
Definition: ruby.h:1754
rb_transcoding::next_info
VALUE next_info
Definition: transcode.c:59
rb_ivar_set
VALUE rb_ivar_set(VALUE, ID, VALUE)
Definition: variable.c:1300
trans_open_t::num_additional
int num_additional
Definition: transcode.c:955
TRANSCODING_READBUF
#define TRANSCODING_READBUF(tc)
Definition: transcode.c:83
search_path_bfs_t::queue_last_ptr
search_path_queue_t ** queue_last_ptr
Definition: transcode.c:250
rb_transcoding::writebuf
union rb_transcoding::@167 writebuf
rb_transcoding::flags
int flags
Definition: transcode.c:55
rb_define_class_under
VALUE rb_define_class_under(VALUE outer, const char *name, VALUE super)
Defines a class under the namespace of outer.
Definition: class.c:698
rb_sym2str
VALUE rb_sym2str(VALUE)
Definition: symbol.c:784
rb_econv_memsize
size_t rb_econv_memsize(rb_econv_t *ec)
Definition: transcode.c:1702
rb_econv_t::elems
rb_econv_elem_t * elems
Definition: transcode.c:125
NOMAP
#define NOMAP
Definition: transcode_data.h:27
rb_econv_elem_t::out_buf_end
unsigned char * out_buf_end
Definition: transcode.c:106
rb_econv_check_error
void rb_econv_check_error(rb_econv_t *ec)
Definition: transcode.c:4223
ECONV_INVALID_REPLACE
#define ECONV_INVALID_REPLACE
Definition: encoding.h:394
rb_ary_new
VALUE rb_ary_new(void)
Definition: array.c:723
Init_transcode
void Init_transcode(void)
Definition: transcode.c:4403
NUM2INT
#define NUM2INT(x)
Definition: ruby.h:715
Qnil
#define Qnil
Definition: ruby.h:469
rb_econv_set_replacement
int rb_econv_set_replacement(rb_econv_t *ec, const unsigned char *str, size_t len, const char *encname)
Definition: transcode.c:2180
exc
const rb_iseq_t const VALUE exc
Definition: rb_mjit_min_header-2.7.0.h:13504
h
size_t st_index_t h
Definition: rb_mjit_min_header-2.7.0.h:5462
st_lookup
int st_lookup(st_table *tab, st_data_t key, st_data_t *value)
Definition: st.c:1101
search_path_bfs_t
Definition: transcode.c:247
GB4bt
#define GB4bt
Definition: transcode_data.h:40
asciicompat_encoding_t::ascii_incompat_name
const char * ascii_incompat_name
Definition: transcode.c:1747
STR1
#define STR1
Definition: transcode_data.h:39
rb_proc_call
VALUE rb_proc_call(VALUE, VALUE)
Definition: proc.c:966
search_path_bfs_t::visited
st_table * visited
Definition: transcode.c:248
RB_GC_GUARD
#define RB_GC_GUARD(v)
Definition: ruby.h:585
rb_transcoding::ary
unsigned char ary[8]
Definition: transcode.c:66
rb_str_coderange_scan_restartable
long rb_str_coderange_scan_restartable(const char *, const char *, rb_encoding *, int *)
Definition: string.c:567
rb_econv_t::destination_encoding
const char * destination_encoding
Definition: transcode.c:137
rb_str_tmp_new
VALUE rb_str_tmp_new(long)
Definition: string.c:1343
rb_hash_freeze
VALUE rb_hash_freeze(VALUE hash)
Definition: hash.c:87
asciicompat_encoder
@ asciicompat_encoder
Definition: transcode_data.h:91
RSTRING_LEN
#define RSTRING_LEN(str)
Definition: ruby.h:1005
st_free_table
void st_free_table(st_table *tab)
Definition: st.c:709
st_table
Definition: st.h:79
rb_econv_t::source_encoding
const char * source_encoding
Definition: transcode.c:136
transcoder_entry_t::sname
const char * sname
Definition: transcode.c:156
rb_transcoding::next_table
unsigned int next_table
Definition: transcode.c:58
rb_enc_str_new
VALUE rb_enc_str_new(const char *, long, rb_encoding *)
Definition: string.c:796
rb_enc_associate
VALUE rb_enc_associate(VALUE obj, rb_encoding *enc)
Definition: encoding.c:866
rb_define_alloc_func
void rb_define_alloc_func(VALUE, rb_alloc_func_t)
RTEST
#define RTEST(v)
Definition: ruby.h:481
rb_econv_insert_output
int rb_econv_insert_output(rb_econv_t *ec, const unsigned char *str, size_t len, const char *str_encoding)
Definition: transcode.c:1569
ECONV_PARTIAL_INPUT
#define ECONV_PARTIAL_INPUT
Definition: encoding.h:415
rb_econv_result_t
rb_econv_result_t
Definition: encoding.h:297
rb_transcoding::transcoder
const rb_transcoder * transcoder
Definition: transcode.c:53
hash_fallback
#define hash_fallback
Definition: transcode.c:2227
rb_enc_associate_index
VALUE rb_enc_associate_index(VALUE obj, int idx)
Definition: encoding.c:838
rb_transcoder::dst_encoding
const char * dst_encoding
Definition: transcode_data.h:100
RSTRING_END
#define RSTRING_END(str)
Definition: ruby.h:1013
TRANSCODING_STATE
#define TRANSCODING_STATE(tc)
Definition: transcode.c:96
rb_econv_t
struct rb_econv_t rb_econv_t
Definition: encoding.h:307
name
const char * name
Definition: nkf.c:208