Ruby  2.7.1p83(2020-03-31revisiona0c7c23c9cec0d0ffcba012279cd652d28ad5bf3)
transcode.c
Go to the documentation of this file.
1 /**********************************************************************
2 
3  transcode.c -
4 
5  $Author$
6  created at: Tue Oct 30 16:10:22 JST 2007
7 
8  Copyright (C) 2007 Martin Duerst
9 
10 **********************************************************************/
11 
12 #include "ruby/encoding.h"
13 #include "internal.h"
14 #include "transcode_data.h"
15 #include <ctype.h>
16 #include "id.h"
17 
18 #define ENABLE_ECONV_NEWLINE_OPTION 1
19 
20 /* VALUE rb_cEncoding = rb_define_class("Encoding", rb_cObject); */
21 static VALUE rb_eUndefinedConversionError;
22 static VALUE rb_eInvalidByteSequenceError;
23 static VALUE rb_eConverterNotFoundError;
24 
26 
27 static VALUE sym_invalid, sym_undef, sym_replace, sym_fallback;
28 static VALUE sym_xml, sym_text, sym_attr;
29 static VALUE sym_universal_newline;
30 static VALUE sym_crlf_newline;
31 static VALUE sym_cr_newline;
32 #ifdef ENABLE_ECONV_NEWLINE_OPTION
33 static VALUE sym_newline, sym_universal, sym_crlf, sym_cr, sym_lf;
34 #endif
35 static VALUE sym_partial_input;
36 
37 static VALUE sym_invalid_byte_sequence;
38 static VALUE sym_undefined_conversion;
39 static VALUE sym_destination_buffer_full;
40 static VALUE sym_source_buffer_empty;
41 static VALUE sym_finished;
42 static VALUE sym_after_output;
43 static VALUE sym_incomplete_input;
44 
45 static unsigned char *
46 allocate_converted_string(const char *sname, const char *dname,
47  const unsigned char *str, size_t len,
48  unsigned char *caller_dst_buf, size_t caller_dst_bufsize,
49  size_t *dst_len_ptr);
50 
51 /* dynamic structure, one per conversion (similar to iconv_t) */
52 /* may carry conversion state (e.g. for iso-2022-jp) */
53 typedef struct rb_transcoding {
55 
56  int flags;
57 
59  unsigned int next_table;
61  unsigned char next_byte;
62  unsigned int output_index;
63 
64  ssize_t recognized_len; /* already interpreted */
65  ssize_t readagain_len; /* not yet interpreted */
66  union {
67  unsigned char ary[8]; /* max_input <= sizeof(ary) */
68  unsigned char *ptr; /* length: max_input */
69  } readbuf; /* recognized_len + readagain_len used */
70 
73  union {
74  unsigned char ary[8]; /* max_output <= sizeof(ary) */
75  unsigned char *ptr; /* length: max_output */
76  } writebuf;
77 
78  union rb_transcoding_state_t { /* opaque data for stateful encoding */
79  void *ptr;
80  char ary[sizeof(double) > sizeof(void*) ? sizeof(double) : sizeof(void*)];
82  } state;
84 #define TRANSCODING_READBUF(tc) \
85  ((tc)->transcoder->max_input <= (int)sizeof((tc)->readbuf.ary) ? \
86  (tc)->readbuf.ary : \
87  (tc)->readbuf.ptr)
88 #define TRANSCODING_WRITEBUF(tc) \
89  ((tc)->transcoder->max_output <= (int)sizeof((tc)->writebuf.ary) ? \
90  (tc)->writebuf.ary : \
91  (tc)->writebuf.ptr)
92 #define TRANSCODING_WRITEBUF_SIZE(tc) \
93  ((tc)->transcoder->max_output <= (int)sizeof((tc)->writebuf.ary) ? \
94  sizeof((tc)->writebuf.ary) : \
95  (size_t)(tc)->transcoder->max_output)
96 #define TRANSCODING_STATE_EMBED_MAX ((int)sizeof(union rb_transcoding_state_t))
97 #define TRANSCODING_STATE(tc) \
98  ((tc)->transcoder->state_size <= (int)sizeof((tc)->state) ? \
99  (tc)->state.ary : \
100  (tc)->state.ptr)
101 
102 typedef struct {
104  unsigned char *out_buf_start;
105  unsigned char *out_data_start;
106  unsigned char *out_data_end;
107  unsigned char *out_buf_end;
110 
111 struct rb_econv_t {
112  int flags;
113  int started; /* bool */
114 
115  const char *source_encoding_name;
117 
118  const unsigned char *replacement_str;
120  const char *replacement_enc;
121 
122  unsigned char *in_buf_start;
123  unsigned char *in_data_start;
124  unsigned char *in_data_end;
125  unsigned char *in_buf_end;
127  int replacement_allocated; /* bool */
132 
133  /* last error */
134  struct {
137  const char *source_encoding;
138  const char *destination_encoding;
139  const unsigned char *error_bytes_start;
142  } last_error;
143 
144  /* The following fields are only for Encoding::Converter.
145  * rb_econv_open set them NULL. */
148 };
149 
150 /*
151  * Dispatch data and logic
152  */
153 
154 #define DECORATOR_P(sname, dname) (*(sname) == '\0')
155 
156 typedef struct {
157  const char *sname;
158  const char *dname;
159  const char *lib; /* null means no need to load a library */
162 
163 static st_table *transcoder_table;
164 
165 static transcoder_entry_t *
166 make_transcoder_entry(const char *sname, const char *dname)
167 {
168  st_data_t val;
169  st_table *table2;
170 
171  if (!st_lookup(transcoder_table, (st_data_t)sname, &val)) {
173  st_add_direct(transcoder_table, (st_data_t)sname, val);
174  }
175  table2 = (st_table *)val;
176  if (!st_lookup(table2, (st_data_t)dname, &val)) {
178  entry->sname = sname;
179  entry->dname = dname;
180  entry->lib = NULL;
181  entry->transcoder = NULL;
182  val = (st_data_t)entry;
183  st_add_direct(table2, (st_data_t)dname, val);
184  }
185  return (transcoder_entry_t *)val;
186 }
187 
188 static transcoder_entry_t *
189 get_transcoder_entry(const char *sname, const char *dname)
190 {
191  st_data_t val;
192  st_table *table2;
193 
194  if (!st_lookup(transcoder_table, (st_data_t)sname, &val)) {
195  return NULL;
196  }
197  table2 = (st_table *)val;
198  if (!st_lookup(table2, (st_data_t)dname, &val)) {
199  return NULL;
200  }
201  return (transcoder_entry_t *)val;
202 }
203 
204 void
206 {
207  const char *const sname = tr->src_encoding;
208  const char *const dname = tr->dst_encoding;
209 
210  transcoder_entry_t *entry;
211 
212  entry = make_transcoder_entry(sname, dname);
213  if (entry->transcoder) {
214  rb_raise(rb_eArgError, "transcoder from %s to %s has been already registered",
215  sname, dname);
216  }
217 
218  entry->transcoder = tr;
219 }
220 
221 static void
222 declare_transcoder(const char *sname, const char *dname, const char *lib)
223 {
224  transcoder_entry_t *entry;
225 
226  entry = make_transcoder_entry(sname, dname);
227  entry->lib = lib;
228 }
229 
230 static const char transcoder_lib_prefix[] = "enc/trans/";
231 
232 void
233 rb_declare_transcoder(const char *enc1, const char *enc2, const char *lib)
234 {
235  if (!lib) {
236  rb_raise(rb_eArgError, "invalid library name - (null)");
237  }
238  declare_transcoder(enc1, enc2, lib);
239 }
240 
241 #define encoding_equal(enc1, enc2) (STRCASECMP((enc1), (enc2)) == 0)
242 
243 typedef struct search_path_queue_tag {
245  const char *enc;
247 
248 typedef struct {
252  const char *base_enc;
254 
255 static int
256 transcode_search_path_i(st_data_t key, st_data_t val, st_data_t arg)
257 {
258  const char *dname = (const char *)key;
261 
262  if (st_lookup(bfs->visited, (st_data_t)dname, &val)) {
263  return ST_CONTINUE;
264  }
265 
267  q->enc = dname;
268  q->next = NULL;
269  *bfs->queue_last_ptr = q;
270  bfs->queue_last_ptr = &q->next;
271 
272  st_add_direct(bfs->visited, (st_data_t)dname, (st_data_t)bfs->base_enc);
273  return ST_CONTINUE;
274 }
275 
276 static int
277 transcode_search_path(const char *sname, const char *dname,
278  void (*callback)(const char *sname, const char *dname, int depth, void *arg),
279  void *arg)
280 {
281  search_path_bfs_t bfs;
283  st_data_t val;
284  st_table *table2;
285  int found;
286  int pathlen = -1;
287 
288  if (encoding_equal(sname, dname))
289  return -1;
290 
292  q->enc = sname;
293  q->next = NULL;
294  bfs.queue_last_ptr = &q->next;
295  bfs.queue = q;
296 
299 
300  while (bfs.queue) {
301  q = bfs.queue;
302  bfs.queue = q->next;
303  if (!bfs.queue)
304  bfs.queue_last_ptr = &bfs.queue;
305 
306  if (!st_lookup(transcoder_table, (st_data_t)q->enc, &val)) {
307  xfree(q);
308  continue;
309  }
310  table2 = (st_table *)val;
311 
312  if (st_lookup(table2, (st_data_t)dname, &val)) {
313  st_add_direct(bfs.visited, (st_data_t)dname, (st_data_t)q->enc);
314  xfree(q);
315  found = 1;
316  goto cleanup;
317  }
318 
319  bfs.base_enc = q->enc;
320  st_foreach(table2, transcode_search_path_i, (st_data_t)&bfs);
321  bfs.base_enc = NULL;
322 
323  xfree(q);
324  }
325  found = 0;
326 
327  cleanup:
328  while (bfs.queue) {
329  q = bfs.queue;
330  bfs.queue = q->next;
331  xfree(q);
332  }
333 
334  if (found) {
335  const char *enc = dname;
336  int depth;
337  pathlen = 0;
338  while (1) {
339  st_lookup(bfs.visited, (st_data_t)enc, &val);
340  if (!val)
341  break;
342  pathlen++;
343  enc = (const char *)val;
344  }
345  depth = pathlen;
346  enc = dname;
347  while (1) {
348  st_lookup(bfs.visited, (st_data_t)enc, &val);
349  if (!val)
350  break;
351  callback((const char *)val, enc, --depth, arg);
352  enc = (const char *)val;
353  }
354  }
355 
356  st_free_table(bfs.visited);
357 
358  return pathlen; /* is -1 if not found */
359 }
360 
361 static const rb_transcoder *
362 load_transcoder_entry(transcoder_entry_t *entry)
363 {
364  if (entry->transcoder)
365  return entry->transcoder;
366 
367  if (entry->lib) {
368  const char *const lib = entry->lib;
369  const size_t len = strlen(lib);
370  const size_t total_len = sizeof(transcoder_lib_prefix) - 1 + len;
371  const VALUE fn = rb_str_new(0, total_len);
372  char *const path = RSTRING_PTR(fn);
373 
374  memcpy(path, transcoder_lib_prefix, sizeof(transcoder_lib_prefix) - 1);
375  memcpy(path + sizeof(transcoder_lib_prefix) - 1, lib, len);
376  rb_str_set_len(fn, total_len);
377  OBJ_FREEZE(fn);
378  rb_require_string(fn);
379  }
380 
381  if (entry->transcoder)
382  return entry->transcoder;
383 
384  return NULL;
385 }
386 
387 static const char*
388 get_replacement_character(const char *encname, size_t *len_ret, const char **repl_encname_ptr)
389 {
390  if (encoding_equal(encname, "UTF-8")) {
391  *len_ret = 3;
392  *repl_encname_ptr = "UTF-8";
393  return "\xEF\xBF\xBD";
394  }
395  else {
396  *len_ret = 1;
397  *repl_encname_ptr = "US-ASCII";
398  return "?";
399  }
400 }
401 
402 /*
403  * Transcoding engine logic
404  */
405 
406 static const unsigned char *
407 transcode_char_start(rb_transcoding *tc,
408  const unsigned char *in_start,
409  const unsigned char *inchar_start,
410  const unsigned char *in_p,
411  size_t *char_len_ptr)
412 {
413  const unsigned char *ptr;
414  if (inchar_start - in_start < tc->recognized_len) {
416  inchar_start, unsigned char, in_p - inchar_start);
417  ptr = TRANSCODING_READBUF(tc);
418  }
419  else {
420  ptr = inchar_start - tc->recognized_len;
421  }
422  *char_len_ptr = tc->recognized_len + (in_p - inchar_start);
423  return ptr;
424 }
425 
426 static rb_econv_result_t
427 transcode_restartable0(const unsigned char **in_pos, unsigned char **out_pos,
428  const unsigned char *in_stop, unsigned char *out_stop,
429  rb_transcoding *tc,
430  const int opt)
431 {
432  const rb_transcoder *tr = tc->transcoder;
433  int unitlen = tr->input_unit_length;
434  ssize_t readagain_len = 0;
435 
436  const unsigned char *inchar_start;
437  const unsigned char *in_p;
438 
439  unsigned char *out_p;
440 
441  in_p = inchar_start = *in_pos;
442 
443  out_p = *out_pos;
444 
445 #define SUSPEND(ret, num) \
446  do { \
447  tc->resume_position = (num); \
448  if (0 < in_p - inchar_start) \
449  MEMMOVE(TRANSCODING_READBUF(tc)+tc->recognized_len, \
450  inchar_start, unsigned char, in_p - inchar_start); \
451  *in_pos = in_p; \
452  *out_pos = out_p; \
453  tc->recognized_len += in_p - inchar_start; \
454  if (readagain_len) { \
455  tc->recognized_len -= readagain_len; \
456  tc->readagain_len = readagain_len; \
457  } \
458  return (ret); \
459  resume_label ## num:; \
460  } while (0)
461 #define SUSPEND_OBUF(num) \
462  do { \
463  while (out_stop - out_p < 1) { SUSPEND(econv_destination_buffer_full, num); } \
464  } while (0)
465 
466 #define SUSPEND_AFTER_OUTPUT(num) \
467  if ((opt & ECONV_AFTER_OUTPUT) && *out_pos != out_p) { \
468  SUSPEND(econv_after_output, num); \
469  }
470 
471 #define next_table (tc->next_table)
472 #define next_info (tc->next_info)
473 #define next_byte (tc->next_byte)
474 #define writebuf_len (tc->writebuf_len)
475 #define writebuf_off (tc->writebuf_off)
476 
477  switch (tc->resume_position) {
478  case 0: break;
479  case 1: goto resume_label1;
480  case 2: goto resume_label2;
481  case 3: goto resume_label3;
482  case 4: goto resume_label4;
483  case 5: goto resume_label5;
484  case 6: goto resume_label6;
485  case 7: goto resume_label7;
486  case 8: goto resume_label8;
487  case 9: goto resume_label9;
488  case 10: goto resume_label10;
489  case 11: goto resume_label11;
490  case 12: goto resume_label12;
491  case 13: goto resume_label13;
492  case 14: goto resume_label14;
493  case 15: goto resume_label15;
494  case 16: goto resume_label16;
495  case 17: goto resume_label17;
496  case 18: goto resume_label18;
497  case 19: goto resume_label19;
498  case 20: goto resume_label20;
499  case 21: goto resume_label21;
500  case 22: goto resume_label22;
501  case 23: goto resume_label23;
502  case 24: goto resume_label24;
503  case 25: goto resume_label25;
504  case 26: goto resume_label26;
505  case 27: goto resume_label27;
506  case 28: goto resume_label28;
507  case 29: goto resume_label29;
508  case 30: goto resume_label30;
509  case 31: goto resume_label31;
510  case 32: goto resume_label32;
511  case 33: goto resume_label33;
512  case 34: goto resume_label34;
513  }
514 
515  while (1) {
516  inchar_start = in_p;
517  tc->recognized_len = 0;
518  next_table = tr->conv_tree_start;
519 
521 
522  if (in_stop <= in_p) {
523  if (!(opt & ECONV_PARTIAL_INPUT))
524  break;
526  continue;
527  }
528 
529 #define BYTE_ADDR(index) (tr->byte_array + (index))
530 #define WORD_ADDR(index) (tr->word_array + INFO2WORDINDEX(index))
531 #define BL_BASE BYTE_ADDR(BYTE_LOOKUP_BASE(WORD_ADDR(next_table)))
532 #define BL_INFO WORD_ADDR(BYTE_LOOKUP_INFO(WORD_ADDR(next_table)))
533 #define BL_MIN_BYTE (BL_BASE[0])
534 #define BL_MAX_BYTE (BL_BASE[1])
535 #define BL_OFFSET(byte) (BL_BASE[2+(byte)-BL_MIN_BYTE])
536 #define BL_ACTION(byte) (BL_INFO[BL_OFFSET((byte))])
537 
538  next_byte = (unsigned char)*in_p++;
539  follow_byte:
541  next_info = INVALID;
542  else {
544  }
545  follow_info:
546  switch (next_info & 0x1F) {
547  case NOMAP:
548  {
549  const unsigned char *p = inchar_start;
550  writebuf_off = 0;
551  while (p < in_p) {
552  TRANSCODING_WRITEBUF(tc)[writebuf_off++] = (unsigned char)*p++;
553  }
555  writebuf_off = 0;
556  while (writebuf_off < writebuf_len) {
557  SUSPEND_OBUF(3);
558  *out_p++ = TRANSCODING_WRITEBUF(tc)[writebuf_off++];
559  }
560  }
561  continue;
562  case 0x00: case 0x04: case 0x08: case 0x0C:
563  case 0x10: case 0x14: case 0x18: case 0x1C:
565  while (in_p >= in_stop) {
566  if (!(opt & ECONV_PARTIAL_INPUT))
567  goto incomplete;
569  }
570  next_byte = (unsigned char)*in_p++;
571  next_table = (unsigned int)next_info;
572  goto follow_byte;
573  case ZERObt: /* drop input */
574  continue;
575  case ONEbt:
576  SUSPEND_OBUF(9); *out_p++ = getBT1(next_info);
577  continue;
578  case TWObt:
579  SUSPEND_OBUF(10); *out_p++ = getBT1(next_info);
580  SUSPEND_OBUF(21); *out_p++ = getBT2(next_info);
581  continue;
582  case THREEbt:
583  SUSPEND_OBUF(11); *out_p++ = getBT1(next_info);
584  SUSPEND_OBUF(15); *out_p++ = getBT2(next_info);
585  SUSPEND_OBUF(16); *out_p++ = getBT3(next_info);
586  continue;
587  case FOURbt:
588  SUSPEND_OBUF(12); *out_p++ = getBT0(next_info);
589  SUSPEND_OBUF(17); *out_p++ = getBT1(next_info);
590  SUSPEND_OBUF(18); *out_p++ = getBT2(next_info);
591  SUSPEND_OBUF(19); *out_p++ = getBT3(next_info);
592  continue;
593  case GB4bt:
594  SUSPEND_OBUF(29); *out_p++ = getGB4bt0(next_info);
595  SUSPEND_OBUF(30); *out_p++ = getGB4bt1(next_info);
596  SUSPEND_OBUF(31); *out_p++ = getGB4bt2(next_info);
597  SUSPEND_OBUF(32); *out_p++ = getGB4bt3(next_info);
598  continue;
599  case STR1:
600  tc->output_index = 0;
603  tc->output_index++;
604  }
605  continue;
606  case FUNii:
607  next_info = (VALUE)(*tr->func_ii)(TRANSCODING_STATE(tc), next_info);
608  goto follow_info;
609  case FUNsi:
610  {
611  const unsigned char *char_start;
612  size_t char_len;
613  char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
614  next_info = (VALUE)(*tr->func_si)(TRANSCODING_STATE(tc), char_start, (size_t)char_len);
615  goto follow_info;
616  }
617  case FUNio:
618  SUSPEND_OBUF(13);
619  if (tr->max_output <= out_stop - out_p)
620  out_p += tr->func_io(TRANSCODING_STATE(tc),
621  next_info, out_p, out_stop - out_p);
622  else {
623  writebuf_len = tr->func_io(TRANSCODING_STATE(tc),
624  next_info,
626  writebuf_off = 0;
627  while (writebuf_off < writebuf_len) {
628  SUSPEND_OBUF(20);
629  *out_p++ = TRANSCODING_WRITEBUF(tc)[writebuf_off++];
630  }
631  }
632  break;
633  case FUNso:
634  {
635  const unsigned char *char_start;
636  size_t char_len;
637  SUSPEND_OBUF(14);
638  if (tr->max_output <= out_stop - out_p) {
639  char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
640  out_p += tr->func_so(TRANSCODING_STATE(tc),
641  char_start, (size_t)char_len,
642  out_p, out_stop - out_p);
643  }
644  else {
645  char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
646  writebuf_len = tr->func_so(TRANSCODING_STATE(tc),
647  char_start, (size_t)char_len,
649  writebuf_off = 0;
650  while (writebuf_off < writebuf_len) {
651  SUSPEND_OBUF(22);
652  *out_p++ = TRANSCODING_WRITEBUF(tc)[writebuf_off++];
653  }
654  }
655  break;
656  }
657  case FUNsio:
658  {
659  const unsigned char *char_start;
660  size_t char_len;
661  SUSPEND_OBUF(33);
662  if (tr->max_output <= out_stop - out_p) {
663  char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
664  out_p += tr->func_sio(TRANSCODING_STATE(tc),
665  char_start, (size_t)char_len, next_info,
666  out_p, out_stop - out_p);
667  }
668  else {
669  char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
670  writebuf_len = tr->func_sio(TRANSCODING_STATE(tc),
671  char_start, (size_t)char_len, next_info,
673  writebuf_off = 0;
674  while (writebuf_off < writebuf_len) {
675  SUSPEND_OBUF(34);
676  *out_p++ = TRANSCODING_WRITEBUF(tc)[writebuf_off++];
677  }
678  }
679  break;
680  }
681  case INVALID:
682  if (tc->recognized_len + (in_p - inchar_start) <= unitlen) {
683  if (tc->recognized_len + (in_p - inchar_start) < unitlen)
685  while ((opt & ECONV_PARTIAL_INPUT) && tc->recognized_len + (in_stop - inchar_start) < unitlen) {
686  in_p = in_stop;
688  }
689  if (tc->recognized_len + (in_stop - inchar_start) <= unitlen) {
690  in_p = in_stop;
691  }
692  else {
693  in_p = inchar_start + (unitlen - tc->recognized_len);
694  }
695  }
696  else {
697  ssize_t invalid_len; /* including the last byte which causes invalid */
698  ssize_t discard_len;
699  invalid_len = tc->recognized_len + (in_p - inchar_start);
700  discard_len = ((invalid_len - 1) / unitlen) * unitlen;
701  readagain_len = invalid_len - discard_len;
702  }
703  goto invalid;
704  case UNDEF:
705  goto undef;
706  default:
707  rb_raise(rb_eRuntimeError, "unknown transcoding instruction");
708  }
709  continue;
710 
711  invalid:
713  continue;
714 
715  incomplete:
717  continue;
718 
719  undef:
721  continue;
722  }
723 
724  /* cleanup */
725  if (tr->finish_func) {
726  SUSPEND_OBUF(4);
727  if (tr->max_output <= out_stop - out_p) {
728  out_p += tr->finish_func(TRANSCODING_STATE(tc),
729  out_p, out_stop - out_p);
730  }
731  else {
732  writebuf_len = tr->finish_func(TRANSCODING_STATE(tc),
734  writebuf_off = 0;
735  while (writebuf_off < writebuf_len) {
736  SUSPEND_OBUF(23);
737  *out_p++ = TRANSCODING_WRITEBUF(tc)[writebuf_off++];
738  }
739  }
740  }
741  while (1)
743 #undef SUSPEND
744 #undef next_table
745 #undef next_info
746 #undef next_byte
747 #undef writebuf_len
748 #undef writebuf_off
749 }
750 
751 static rb_econv_result_t
752 transcode_restartable(const unsigned char **in_pos, unsigned char **out_pos,
753  const unsigned char *in_stop, unsigned char *out_stop,
754  rb_transcoding *tc,
755  const int opt)
756 {
757  if (tc->readagain_len) {
758  unsigned char *readagain_buf = ALLOCA_N(unsigned char, tc->readagain_len);
759  const unsigned char *readagain_pos = readagain_buf;
760  const unsigned char *readagain_stop = readagain_buf + tc->readagain_len;
761  rb_econv_result_t res;
762 
763  MEMCPY(readagain_buf, TRANSCODING_READBUF(tc) + tc->recognized_len,
764  unsigned char, tc->readagain_len);
765  tc->readagain_len = 0;
766  res = transcode_restartable0(&readagain_pos, out_pos, readagain_stop, out_stop, tc, opt|ECONV_PARTIAL_INPUT);
767  if (res != econv_source_buffer_empty) {
769  readagain_pos, unsigned char, readagain_stop - readagain_pos);
770  tc->readagain_len += readagain_stop - readagain_pos;
771  return res;
772  }
773  }
774  return transcode_restartable0(in_pos, out_pos, in_stop, out_stop, tc, opt);
775 }
776 
777 static rb_transcoding *
778 rb_transcoding_open_by_transcoder(const rb_transcoder *tr, int flags)
779 {
780  rb_transcoding *tc;
781 
782  tc = ALLOC(rb_transcoding);
783  tc->transcoder = tr;
784  tc->flags = flags;
785  if (TRANSCODING_STATE_EMBED_MAX < tr->state_size)
786  tc->state.ptr = xmalloc(tr->state_size);
787  if (tr->state_init_func) {
788  (tr->state_init_func)(TRANSCODING_STATE(tc)); /* xxx: check return value */
789  }
790  tc->resume_position = 0;
791  tc->recognized_len = 0;
792  tc->readagain_len = 0;
793  tc->writebuf_len = 0;
794  tc->writebuf_off = 0;
795  if ((int)sizeof(tc->readbuf.ary) < tr->max_input) {
796  tc->readbuf.ptr = xmalloc(tr->max_input);
797  }
798  if ((int)sizeof(tc->writebuf.ary) < tr->max_output) {
799  tc->writebuf.ptr = xmalloc(tr->max_output);
800  }
801  return tc;
802 }
803 
804 static rb_econv_result_t
805 rb_transcoding_convert(rb_transcoding *tc,
806  const unsigned char **input_ptr, const unsigned char *input_stop,
807  unsigned char **output_ptr, unsigned char *output_stop,
808  int flags)
809 {
810  return transcode_restartable(
811  input_ptr, output_ptr,
812  input_stop, output_stop,
813  tc, flags);
814 }
815 
816 static void
817 rb_transcoding_close(rb_transcoding *tc)
818 {
819  const rb_transcoder *tr = tc->transcoder;
820  if (tr->state_fini_func) {
821  (tr->state_fini_func)(TRANSCODING_STATE(tc)); /* check return value? */
822  }
823  if (TRANSCODING_STATE_EMBED_MAX < tr->state_size)
824  xfree(tc->state.ptr);
825  if ((int)sizeof(tc->readbuf.ary) < tr->max_input)
826  xfree(tc->readbuf.ptr);
827  if ((int)sizeof(tc->writebuf.ary) < tr->max_output)
828  xfree(tc->writebuf.ptr);
829  xfree(tc);
830 }
831 
832 static size_t
833 rb_transcoding_memsize(rb_transcoding *tc)
834 {
835  size_t size = sizeof(rb_transcoding);
836  const rb_transcoder *tr = tc->transcoder;
837 
838  if (TRANSCODING_STATE_EMBED_MAX < tr->state_size) {
839  size += tr->state_size;
840  }
841  if ((int)sizeof(tc->readbuf.ary) < tr->max_input) {
842  size += tr->max_input;
843  }
844  if ((int)sizeof(tc->writebuf.ary) < tr->max_output) {
845  size += tr->max_output;
846  }
847  return size;
848 }
849 
850 static rb_econv_t *
851 rb_econv_alloc(int n_hint)
852 {
853  rb_econv_t *ec;
854 
855  if (n_hint <= 0)
856  n_hint = 1;
857 
858  ec = ALLOC(rb_econv_t);
859  ec->flags = 0;
862  ec->started = 0;
863  ec->replacement_str = NULL;
864  ec->replacement_len = 0;
865  ec->replacement_enc = NULL;
866  ec->replacement_allocated = 0;
867  ec->in_buf_start = NULL;
868  ec->in_data_start = NULL;
869  ec->in_data_end = NULL;
870  ec->in_buf_end = NULL;
871  ec->num_allocated = n_hint;
872  ec->num_trans = 0;
874  ec->num_finished = 0;
875  ec->last_tc = NULL;
877  ec->last_error.error_tc = NULL;
881  ec->last_error.error_bytes_len = 0;
882  ec->last_error.readagain_len = 0;
883  ec->source_encoding = NULL;
885  return ec;
886 }
887 
888 static int
889 rb_econv_add_transcoder_at(rb_econv_t *ec, const rb_transcoder *tr, int i)
890 {
891  int n, j;
892  int bufsize = 4096;
893  unsigned char *p;
894 
895  if (ec->num_trans == ec->num_allocated) {
896  n = ec->num_allocated * 2;
898  ec->num_allocated = n;
899  }
900 
901  p = xmalloc(bufsize);
902 
903  MEMMOVE(ec->elems+i+1, ec->elems+i, rb_econv_elem_t, ec->num_trans-i);
904 
905  ec->elems[i].tc = rb_transcoding_open_by_transcoder(tr, 0);
906  ec->elems[i].out_buf_start = p;
907  ec->elems[i].out_buf_end = p + bufsize;
908  ec->elems[i].out_data_start = p;
909  ec->elems[i].out_data_end = p;
911 
912  ec->num_trans++;
913 
914  if (!DECORATOR_P(tr->src_encoding, tr->dst_encoding))
915  for (j = ec->num_trans-1; i <= j; j--) {
916  rb_transcoding *tc = ec->elems[j].tc;
917  const rb_transcoder *tr2 = tc->transcoder;
918  if (!DECORATOR_P(tr2->src_encoding, tr2->dst_encoding)) {
919  ec->last_tc = tc;
920  break;
921  }
922  }
923 
924  return 0;
925 }
926 
927 static rb_econv_t *
928 rb_econv_open_by_transcoder_entries(int n, transcoder_entry_t **entries)
929 {
930  rb_econv_t *ec;
931  int i, ret;
932 
933  for (i = 0; i < n; i++) {
934  const rb_transcoder *tr;
935  tr = load_transcoder_entry(entries[i]);
936  if (!tr)
937  return NULL;
938  }
939 
940  ec = rb_econv_alloc(n);
941 
942  for (i = 0; i < n; i++) {
943  const rb_transcoder *tr = load_transcoder_entry(entries[i]);
944  ret = rb_econv_add_transcoder_at(ec, tr, ec->num_trans);
945  if (ret == -1) {
946  rb_econv_close(ec);
947  return NULL;
948  }
949  }
950 
951  return ec;
952 }
953 
954 struct trans_open_t {
957 };
958 
959 static void
960 trans_open_i(const char *sname, const char *dname, int depth, void *arg)
961 {
962  struct trans_open_t *toarg = arg;
963 
964  if (!toarg->entries) {
965  toarg->entries = ALLOC_N(transcoder_entry_t *, depth+1+toarg->num_additional);
966  }
967  toarg->entries[depth] = get_transcoder_entry(sname, dname);
968 }
969 
970 static rb_econv_t *
971 rb_econv_open0(const char *sname, const char *dname, int ecflags)
972 {
974  int num_trans;
975  rb_econv_t *ec;
976 
977  /* Just check if sname and dname are defined */
978  /* (This check is needed?) */
979  if (*sname) rb_enc_find_index(sname);
980  if (*dname) rb_enc_find_index(dname);
981 
982  if (*sname == '\0' && *dname == '\0') {
983  num_trans = 0;
984  entries = NULL;
985  sname = dname = "";
986  }
987  else {
988  struct trans_open_t toarg;
989  toarg.entries = NULL;
990  toarg.num_additional = 0;
991  num_trans = transcode_search_path(sname, dname, trans_open_i, (void *)&toarg);
992  entries = toarg.entries;
993  if (num_trans < 0) {
994  xfree(entries);
995  return NULL;
996  }
997  }
998 
999  ec = rb_econv_open_by_transcoder_entries(num_trans, entries);
1000  xfree(entries);
1001  if (!ec)
1002  return NULL;
1003 
1004  ec->flags = ecflags;
1005  ec->source_encoding_name = sname;
1006  ec->destination_encoding_name = dname;
1007 
1008  return ec;
1009 }
1010 
1011 #define MAX_ECFLAGS_DECORATORS 32
1012 
1013 static int
1014 decorator_names(int ecflags, const char **decorators_ret)
1015 {
1016  int num_decorators;
1017 
1018  switch (ecflags & ECONV_NEWLINE_DECORATOR_MASK) {
1022  case 0:
1023  break;
1024  default:
1025  return -1;
1026  }
1027 
1028  if ((ecflags & ECONV_XML_TEXT_DECORATOR) &&
1030  return -1;
1031 
1032  num_decorators = 0;
1033 
1034  if (ecflags & ECONV_XML_TEXT_DECORATOR)
1035  decorators_ret[num_decorators++] = "xml_text_escape";
1036  if (ecflags & ECONV_XML_ATTR_CONTENT_DECORATOR)
1037  decorators_ret[num_decorators++] = "xml_attr_content_escape";
1038  if (ecflags & ECONV_XML_ATTR_QUOTE_DECORATOR)
1039  decorators_ret[num_decorators++] = "xml_attr_quote";
1040 
1041  if (ecflags & ECONV_CRLF_NEWLINE_DECORATOR)
1042  decorators_ret[num_decorators++] = "crlf_newline";
1043  if (ecflags & ECONV_CR_NEWLINE_DECORATOR)
1044  decorators_ret[num_decorators++] = "cr_newline";
1045  if (ecflags & ECONV_UNIVERSAL_NEWLINE_DECORATOR)
1046  decorators_ret[num_decorators++] = "universal_newline";
1047 
1048  return num_decorators;
1049 }
1050 
1051 rb_econv_t *
1052 rb_econv_open(const char *sname, const char *dname, int ecflags)
1053 {
1054  rb_econv_t *ec;
1055  int num_decorators;
1056  const char *decorators[MAX_ECFLAGS_DECORATORS];
1057  int i;
1058 
1059  num_decorators = decorator_names(ecflags, decorators);
1060  if (num_decorators == -1)
1061  return NULL;
1062 
1063  ec = rb_econv_open0(sname, dname, ecflags & ECONV_ERROR_HANDLER_MASK);
1064  if (!ec)
1065  return NULL;
1066 
1067  for (i = 0; i < num_decorators; i++)
1068  if (rb_econv_decorate_at_last(ec, decorators[i]) == -1) {
1069  rb_econv_close(ec);
1070  return NULL;
1071  }
1072 
1073  ec->flags |= ecflags & ~ECONV_ERROR_HANDLER_MASK;
1074 
1075  return ec;
1076 }
1077 
1078 static int
1079 trans_sweep(rb_econv_t *ec,
1080  const unsigned char **input_ptr, const unsigned char *input_stop,
1081  unsigned char **output_ptr, unsigned char *output_stop,
1082  int flags,
1083  int start)
1084 {
1085  int try;
1086  int i, f;
1087 
1088  const unsigned char **ipp, *is, *iold;
1089  unsigned char **opp, *os, *oold;
1090  rb_econv_result_t res;
1091 
1092  try = 1;
1093  while (try) {
1094  try = 0;
1095  for (i = start; i < ec->num_trans; i++) {
1096  rb_econv_elem_t *te = &ec->elems[i];
1097 
1098  if (i == 0) {
1099  ipp = input_ptr;
1100  is = input_stop;
1101  }
1102  else {
1103  rb_econv_elem_t *prev_te = &ec->elems[i-1];
1104  ipp = (const unsigned char **)&prev_te->out_data_start;
1105  is = prev_te->out_data_end;
1106  }
1107 
1108  if (i == ec->num_trans-1) {
1109  opp = output_ptr;
1110  os = output_stop;
1111  }
1112  else {
1113  if (te->out_buf_start != te->out_data_start) {
1114  ssize_t len = te->out_data_end - te->out_data_start;
1115  ssize_t off = te->out_data_start - te->out_buf_start;
1116  MEMMOVE(te->out_buf_start, te->out_data_start, unsigned char, len);
1117  te->out_data_start = te->out_buf_start;
1118  te->out_data_end -= off;
1119  }
1120  opp = &te->out_data_end;
1121  os = te->out_buf_end;
1122  }
1123 
1124  f = flags;
1125  if (ec->num_finished != i)
1127  if (i == 0 && (flags & ECONV_AFTER_OUTPUT)) {
1128  start = 1;
1129  flags &= ~ECONV_AFTER_OUTPUT;
1130  }
1131  if (i != 0)
1132  f &= ~ECONV_AFTER_OUTPUT;
1133  iold = *ipp;
1134  oold = *opp;
1135  te->last_result = res = rb_transcoding_convert(te->tc, ipp, is, opp, os, f);
1136  if (iold != *ipp || oold != *opp)
1137  try = 1;
1138 
1139  switch (res) {
1143  case econv_after_output:
1144  return i;
1145 
1148  break;
1149 
1150  case econv_finished:
1151  ec->num_finished = i+1;
1152  break;
1153  }
1154  }
1155  }
1156  return -1;
1157 }
1158 
1159 static rb_econv_result_t
1160 rb_trans_conv(rb_econv_t *ec,
1161  const unsigned char **input_ptr, const unsigned char *input_stop,
1162  unsigned char **output_ptr, unsigned char *output_stop,
1163  int flags,
1164  int *result_position_ptr)
1165 {
1166  int i;
1167  int needreport_index;
1168  int sweep_start;
1169 
1170  unsigned char empty_buf;
1171  unsigned char *empty_ptr = &empty_buf;
1172 
1173  if (!input_ptr) {
1174  input_ptr = (const unsigned char **)&empty_ptr;
1175  input_stop = empty_ptr;
1176  }
1177 
1178  if (!output_ptr) {
1179  output_ptr = &empty_ptr;
1180  output_stop = empty_ptr;
1181  }
1182 
1183  if (ec->elems[0].last_result == econv_after_output)
1185 
1186  for (i = ec->num_trans-1; 0 <= i; i--) {
1187  switch (ec->elems[i].last_result) {
1191  case econv_after_output:
1192  case econv_finished:
1193  sweep_start = i+1;
1194  goto found_needreport;
1195 
1198  break;
1199 
1200  default:
1201  rb_bug("unexpected transcode last result");
1202  }
1203  }
1204 
1205  /* /^[sd]+$/ is confirmed. but actually /^s*d*$/. */
1206 
1208  (flags & ECONV_AFTER_OUTPUT)) {
1209  rb_econv_result_t res;
1210 
1211  res = rb_trans_conv(ec, NULL, NULL, output_ptr, output_stop,
1213  result_position_ptr);
1214 
1215  if (res == econv_source_buffer_empty)
1216  return econv_after_output;
1217  return res;
1218  }
1219 
1220  sweep_start = 0;
1221 
1222  found_needreport:
1223 
1224  do {
1225  needreport_index = trans_sweep(ec, input_ptr, input_stop, output_ptr, output_stop, flags, sweep_start);
1226  sweep_start = needreport_index + 1;
1227  } while (needreport_index != -1 && needreport_index != ec->num_trans-1);
1228 
1229  for (i = ec->num_trans-1; 0 <= i; i--) {
1231  rb_econv_result_t res = ec->elems[i].last_result;
1232  if (res == econv_invalid_byte_sequence ||
1233  res == econv_incomplete_input ||
1234  res == econv_undefined_conversion ||
1235  res == econv_after_output) {
1237  }
1238  if (result_position_ptr)
1239  *result_position_ptr = i;
1240  return res;
1241  }
1242  }
1243  if (result_position_ptr)
1244  *result_position_ptr = -1;
1246 }
1247 
1248 static rb_econv_result_t
1249 rb_econv_convert0(rb_econv_t *ec,
1250  const unsigned char **input_ptr, const unsigned char *input_stop,
1251  unsigned char **output_ptr, unsigned char *output_stop,
1252  int flags)
1253 {
1254  rb_econv_result_t res;
1255  int result_position;
1256  int has_output = 0;
1257 
1258  memset(&ec->last_error, 0, sizeof(ec->last_error));
1259 
1260  if (ec->num_trans == 0) {
1261  size_t len;
1262  if (ec->in_buf_start && ec->in_data_start != ec->in_data_end) {
1263  if (output_stop - *output_ptr < ec->in_data_end - ec->in_data_start) {
1264  len = output_stop - *output_ptr;
1265  memcpy(*output_ptr, ec->in_data_start, len);
1266  *output_ptr = output_stop;
1267  ec->in_data_start += len;
1269  goto gotresult;
1270  }
1271  len = ec->in_data_end - ec->in_data_start;
1272  memcpy(*output_ptr, ec->in_data_start, len);
1273  *output_ptr += len;
1274  ec->in_data_start = ec->in_data_end = ec->in_buf_start;
1275  if (flags & ECONV_AFTER_OUTPUT) {
1276  res = econv_after_output;
1277  goto gotresult;
1278  }
1279  }
1280  if (output_stop - *output_ptr < input_stop - *input_ptr) {
1281  len = output_stop - *output_ptr;
1282  }
1283  else {
1284  len = input_stop - *input_ptr;
1285  }
1286  if (0 < len && (flags & ECONV_AFTER_OUTPUT)) {
1287  *(*output_ptr)++ = *(*input_ptr)++;
1288  res = econv_after_output;
1289  goto gotresult;
1290  }
1291  memcpy(*output_ptr, *input_ptr, len);
1292  *output_ptr += len;
1293  *input_ptr += len;
1294  if (*input_ptr != input_stop)
1296  else if (flags & ECONV_PARTIAL_INPUT)
1298  else
1299  res = econv_finished;
1300  goto gotresult;
1301  }
1302 
1303  if (ec->elems[ec->num_trans-1].out_data_start) {
1304  unsigned char *data_start = ec->elems[ec->num_trans-1].out_data_start;
1305  unsigned char *data_end = ec->elems[ec->num_trans-1].out_data_end;
1306  if (data_start != data_end) {
1307  size_t len;
1308  if (output_stop - *output_ptr < data_end - data_start) {
1309  len = output_stop - *output_ptr;
1310  memcpy(*output_ptr, data_start, len);
1311  *output_ptr = output_stop;
1312  ec->elems[ec->num_trans-1].out_data_start += len;
1314  goto gotresult;
1315  }
1316  len = data_end - data_start;
1317  memcpy(*output_ptr, data_start, len);
1318  *output_ptr += len;
1319  ec->elems[ec->num_trans-1].out_data_start =
1320  ec->elems[ec->num_trans-1].out_data_end =
1321  ec->elems[ec->num_trans-1].out_buf_start;
1322  has_output = 1;
1323  }
1324  }
1325 
1326  if (ec->in_buf_start &&
1327  ec->in_data_start != ec->in_data_end) {
1328  res = rb_trans_conv(ec, (const unsigned char **)&ec->in_data_start, ec->in_data_end, output_ptr, output_stop,
1329  (flags&~ECONV_AFTER_OUTPUT)|ECONV_PARTIAL_INPUT, &result_position);
1330  if (res != econv_source_buffer_empty)
1331  goto gotresult;
1332  }
1333 
1334  if (has_output &&
1335  (flags & ECONV_AFTER_OUTPUT) &&
1336  *input_ptr != input_stop) {
1337  input_stop = *input_ptr;
1338  res = rb_trans_conv(ec, input_ptr, input_stop, output_ptr, output_stop, flags, &result_position);
1339  if (res == econv_source_buffer_empty)
1340  res = econv_after_output;
1341  }
1342  else if ((flags & ECONV_AFTER_OUTPUT) ||
1343  ec->num_trans == 1) {
1344  res = rb_trans_conv(ec, input_ptr, input_stop, output_ptr, output_stop, flags, &result_position);
1345  }
1346  else {
1347  flags |= ECONV_AFTER_OUTPUT;
1348  do {
1349  res = rb_trans_conv(ec, input_ptr, input_stop, output_ptr, output_stop, flags, &result_position);
1350  } while (res == econv_after_output);
1351  }
1352 
1353  gotresult:
1354  ec->last_error.result = res;
1355  if (res == econv_invalid_byte_sequence ||
1356  res == econv_incomplete_input ||
1357  res == econv_undefined_conversion) {
1358  rb_transcoding *error_tc = ec->elems[result_position].tc;
1359  ec->last_error.error_tc = error_tc;
1363  ec->last_error.error_bytes_len = error_tc->recognized_len;
1364  ec->last_error.readagain_len = error_tc->readagain_len;
1365  }
1366 
1367  return res;
1368 }
1369 
1370 static int output_replacement_character(rb_econv_t *ec);
1371 
1372 static int
1373 output_hex_charref(rb_econv_t *ec)
1374 {
1375  int ret;
1376  unsigned char utfbuf[1024];
1377  const unsigned char *utf;
1378  size_t utf_len;
1379  int utf_allocated = 0;
1380  char charef_buf[16];
1381  const unsigned char *p;
1382 
1383  if (encoding_equal(ec->last_error.source_encoding, "UTF-32BE")) {
1384  utf = ec->last_error.error_bytes_start;
1385  utf_len = ec->last_error.error_bytes_len;
1386  }
1387  else {
1388  utf = allocate_converted_string(ec->last_error.source_encoding, "UTF-32BE",
1390  utfbuf, sizeof(utfbuf),
1391  &utf_len);
1392  if (!utf)
1393  return -1;
1394  if (utf != utfbuf && utf != ec->last_error.error_bytes_start)
1395  utf_allocated = 1;
1396  }
1397 
1398  if (utf_len % 4 != 0)
1399  goto fail;
1400 
1401  p = utf;
1402  while (4 <= utf_len) {
1403  unsigned int u = 0;
1404  u += p[0] << 24;
1405  u += p[1] << 16;
1406  u += p[2] << 8;
1407  u += p[3];
1408  snprintf(charef_buf, sizeof(charef_buf), "&#x%X;", u);
1409 
1410  ret = rb_econv_insert_output(ec, (unsigned char *)charef_buf, strlen(charef_buf), "US-ASCII");
1411  if (ret == -1)
1412  goto fail;
1413 
1414  p += 4;
1415  utf_len -= 4;
1416  }
1417 
1418  if (utf_allocated)
1419  xfree((void *)utf);
1420  return 0;
1421 
1422  fail:
1423  if (utf_allocated)
1424  xfree((void *)utf);
1425  return -1;
1426 }
1427 
1430  const unsigned char **input_ptr, const unsigned char *input_stop,
1431  unsigned char **output_ptr, unsigned char *output_stop,
1432  int flags)
1433 {
1434  rb_econv_result_t ret;
1435 
1436  unsigned char empty_buf;
1437  unsigned char *empty_ptr = &empty_buf;
1438 
1439  ec->started = 1;
1440 
1441  if (!input_ptr) {
1442  input_ptr = (const unsigned char **)&empty_ptr;
1443  input_stop = empty_ptr;
1444  }
1445 
1446  if (!output_ptr) {
1447  output_ptr = &empty_ptr;
1448  output_stop = empty_ptr;
1449  }
1450 
1451  resume:
1452  ret = rb_econv_convert0(ec, input_ptr, input_stop, output_ptr, output_stop, flags);
1453 
1454  if (ret == econv_invalid_byte_sequence ||
1455  ret == econv_incomplete_input) {
1456  /* deal with invalid byte sequence */
1457  /* todo: add more alternative behaviors */
1458  switch (ec->flags & ECONV_INVALID_MASK) {
1459  case ECONV_INVALID_REPLACE:
1460  if (output_replacement_character(ec) == 0)
1461  goto resume;
1462  }
1463  }
1464 
1465  if (ret == econv_undefined_conversion) {
1466  /* valid character in source encoding
1467  * but no related character(s) in destination encoding */
1468  /* todo: add more alternative behaviors */
1469  switch (ec->flags & ECONV_UNDEF_MASK) {
1470  case ECONV_UNDEF_REPLACE:
1471  if (output_replacement_character(ec) == 0)
1472  goto resume;
1473  break;
1474 
1476  if (output_hex_charref(ec) == 0)
1477  goto resume;
1478  break;
1479  }
1480  }
1481 
1482  return ret;
1483 }
1484 
1485 const char *
1487 {
1488  rb_transcoding *tc = ec->last_tc;
1489  const rb_transcoder *tr;
1490 
1491  if (tc == NULL)
1492  return "";
1493 
1494  tr = tc->transcoder;
1495 
1496  if (tr->asciicompat_type == asciicompat_encoder)
1497  return tr->src_encoding;
1498  return tr->dst_encoding;
1499 }
1500 
1501 static unsigned char *
1502 allocate_converted_string(const char *sname, const char *dname,
1503  const unsigned char *str, size_t len,
1504  unsigned char *caller_dst_buf, size_t caller_dst_bufsize,
1505  size_t *dst_len_ptr)
1506 {
1507  unsigned char *dst_str;
1508  size_t dst_len;
1509  size_t dst_bufsize;
1510 
1511  rb_econv_t *ec;
1512  rb_econv_result_t res;
1513 
1514  const unsigned char *sp;
1515  unsigned char *dp;
1516 
1517  if (caller_dst_buf)
1518  dst_bufsize = caller_dst_bufsize;
1519  else if (len == 0)
1520  dst_bufsize = 1;
1521  else
1522  dst_bufsize = len;
1523 
1524  ec = rb_econv_open(sname, dname, 0);
1525  if (ec == NULL)
1526  return NULL;
1527  if (caller_dst_buf)
1528  dst_str = caller_dst_buf;
1529  else
1530  dst_str = xmalloc(dst_bufsize);
1531  dst_len = 0;
1532  sp = str;
1533  dp = dst_str+dst_len;
1534  res = rb_econv_convert(ec, &sp, str+len, &dp, dst_str+dst_bufsize, 0);
1535  dst_len = dp - dst_str;
1536  while (res == econv_destination_buffer_full) {
1537  if (SIZE_MAX/2 < dst_bufsize) {
1538  goto fail;
1539  }
1540  dst_bufsize *= 2;
1541  if (dst_str == caller_dst_buf) {
1542  unsigned char *tmp;
1543  tmp = xmalloc(dst_bufsize);
1544  memcpy(tmp, dst_str, dst_bufsize/2);
1545  dst_str = tmp;
1546  }
1547  else {
1548  dst_str = xrealloc(dst_str, dst_bufsize);
1549  }
1550  dp = dst_str+dst_len;
1551  res = rb_econv_convert(ec, &sp, str+len, &dp, dst_str+dst_bufsize, 0);
1552  dst_len = dp - dst_str;
1553  }
1554  if (res != econv_finished) {
1555  goto fail;
1556  }
1557  rb_econv_close(ec);
1558  *dst_len_ptr = dst_len;
1559  return dst_str;
1560 
1561  fail:
1562  if (dst_str != caller_dst_buf)
1563  xfree(dst_str);
1564  rb_econv_close(ec);
1565  return NULL;
1566 }
1567 
1568 /* result: 0:success -1:failure */
1569 int
1571  const unsigned char *str, size_t len, const char *str_encoding)
1572 {
1573  const char *insert_encoding = rb_econv_encoding_to_insert_output(ec);
1574  unsigned char insert_buf[4096];
1575  const unsigned char *insert_str = NULL;
1576  size_t insert_len;
1577 
1578  int last_trans_index;
1579  rb_transcoding *tc;
1580 
1581  unsigned char **buf_start_p;
1582  unsigned char **data_start_p;
1583  unsigned char **data_end_p;
1584  unsigned char **buf_end_p;
1585 
1586  size_t need;
1587 
1588  ec->started = 1;
1589 
1590  if (len == 0)
1591  return 0;
1592 
1593  if (encoding_equal(insert_encoding, str_encoding)) {
1594  insert_str = str;
1595  insert_len = len;
1596  }
1597  else {
1598  insert_str = allocate_converted_string(str_encoding, insert_encoding,
1599  str, len, insert_buf, sizeof(insert_buf), &insert_len);
1600  if (insert_str == NULL)
1601  return -1;
1602  }
1603 
1604  need = insert_len;
1605 
1606  last_trans_index = ec->num_trans-1;
1607  if (ec->num_trans == 0) {
1608  tc = NULL;
1609  buf_start_p = &ec->in_buf_start;
1610  data_start_p = &ec->in_data_start;
1611  data_end_p = &ec->in_data_end;
1612  buf_end_p = &ec->in_buf_end;
1613  }
1614  else if (ec->elems[last_trans_index].tc->transcoder->asciicompat_type == asciicompat_encoder) {
1615  tc = ec->elems[last_trans_index].tc;
1616  need += tc->readagain_len;
1617  if (need < insert_len)
1618  goto fail;
1619  if (last_trans_index == 0) {
1620  buf_start_p = &ec->in_buf_start;
1621  data_start_p = &ec->in_data_start;
1622  data_end_p = &ec->in_data_end;
1623  buf_end_p = &ec->in_buf_end;
1624  }
1625  else {
1626  rb_econv_elem_t *ee = &ec->elems[last_trans_index-1];
1627  buf_start_p = &ee->out_buf_start;
1628  data_start_p = &ee->out_data_start;
1629  data_end_p = &ee->out_data_end;
1630  buf_end_p = &ee->out_buf_end;
1631  }
1632  }
1633  else {
1634  rb_econv_elem_t *ee = &ec->elems[last_trans_index];
1635  buf_start_p = &ee->out_buf_start;
1636  data_start_p = &ee->out_data_start;
1637  data_end_p = &ee->out_data_end;
1638  buf_end_p = &ee->out_buf_end;
1639  tc = ec->elems[last_trans_index].tc;
1640  }
1641 
1642  if (*buf_start_p == NULL) {
1643  unsigned char *buf = xmalloc(need);
1644  *buf_start_p = buf;
1645  *data_start_p = buf;
1646  *data_end_p = buf;
1647  *buf_end_p = buf+need;
1648  }
1649  else if ((size_t)(*buf_end_p - *data_end_p) < need) {
1650  MEMMOVE(*buf_start_p, *data_start_p, unsigned char, *data_end_p - *data_start_p);
1651  *data_end_p = *buf_start_p + (*data_end_p - *data_start_p);
1652  *data_start_p = *buf_start_p;
1653  if ((size_t)(*buf_end_p - *data_end_p) < need) {
1654  unsigned char *buf;
1655  size_t s = (*data_end_p - *buf_start_p) + need;
1656  if (s < need)
1657  goto fail;
1658  buf = xrealloc(*buf_start_p, s);
1659  *data_start_p = buf;
1660  *data_end_p = buf + (*data_end_p - *buf_start_p);
1661  *buf_start_p = buf;
1662  *buf_end_p = buf + s;
1663  }
1664  }
1665 
1666  memcpy(*data_end_p, insert_str, insert_len);
1667  *data_end_p += insert_len;
1668  if (tc && tc->transcoder->asciicompat_type == asciicompat_encoder) {
1669  memcpy(*data_end_p, TRANSCODING_READBUF(tc)+tc->recognized_len, tc->readagain_len);
1670  *data_end_p += tc->readagain_len;
1671  tc->readagain_len = 0;
1672  }
1673 
1674  if (insert_str != str && insert_str != insert_buf)
1675  xfree((void*)insert_str);
1676  return 0;
1677 
1678  fail:
1679  if (insert_str != str && insert_str != insert_buf)
1680  xfree((void*)insert_str);
1681  return -1;
1682 }
1683 
1684 void
1686 {
1687  int i;
1688 
1689  if (ec->replacement_allocated) {
1690  xfree((void *)ec->replacement_str);
1691  }
1692  for (i = 0; i < ec->num_trans; i++) {
1693  rb_transcoding_close(ec->elems[i].tc);
1694  if (ec->elems[i].out_buf_start)
1695  xfree(ec->elems[i].out_buf_start);
1696  }
1697  xfree(ec->in_buf_start);
1698  xfree(ec->elems);
1699  xfree(ec);
1700 }
1701 
1702 size_t
1704 {
1705  size_t size = sizeof(rb_econv_t);
1706  int i;
1707 
1708  if (ec->replacement_allocated) {
1709  size += ec->replacement_len;
1710  }
1711  for (i = 0; i < ec->num_trans; i++) {
1712  size += rb_transcoding_memsize(ec->elems[i].tc);
1713 
1714  if (ec->elems[i].out_buf_start) {
1715  size += ec->elems[i].out_buf_end - ec->elems[i].out_buf_start;
1716  }
1717  }
1718  size += ec->in_buf_end - ec->in_buf_start;
1719  size += sizeof(rb_econv_elem_t) * ec->num_allocated;
1720 
1721  return size;
1722 }
1723 
1724 int
1726 {
1727  if (ec->num_trans == 0)
1728  return 0;
1729 #if SIZEOF_SIZE_T > SIZEOF_INT
1730  if (ec->elems[0].tc->readagain_len > INT_MAX) return INT_MAX;
1731 #endif
1732  return (int)ec->elems[0].tc->readagain_len;
1733 }
1734 
1735 void
1736 rb_econv_putback(rb_econv_t *ec, unsigned char *p, int n)
1737 {
1738  rb_transcoding *tc;
1739  if (ec->num_trans == 0 || n == 0)
1740  return;
1741  tc = ec->elems[0].tc;
1742  memcpy(p, TRANSCODING_READBUF(tc) + tc->recognized_len + tc->readagain_len - n, n);
1743  tc->readagain_len -= n;
1744 }
1745 
1747  const char *ascii_compat_name;
1748  const char *ascii_incompat_name;
1749 };
1750 
1751 static int
1752 asciicompat_encoding_i(st_data_t key, st_data_t val, st_data_t arg)
1753 {
1754  struct asciicompat_encoding_t *data = (struct asciicompat_encoding_t *)arg;
1755  transcoder_entry_t *entry = (transcoder_entry_t *)val;
1756  const rb_transcoder *tr;
1757 
1758  if (DECORATOR_P(entry->sname, entry->dname))
1759  return ST_CONTINUE;
1760  tr = load_transcoder_entry(entry);
1761  if (tr && tr->asciicompat_type == asciicompat_decoder) {
1762  data->ascii_compat_name = tr->dst_encoding;
1763  return ST_STOP;
1764  }
1765  return ST_CONTINUE;
1766 }
1767 
1768 const char *
1770 {
1771  st_data_t v;
1772  st_table *table2;
1773  struct asciicompat_encoding_t data;
1774 
1775  if (!st_lookup(transcoder_table, (st_data_t)ascii_incompat_name, &v))
1776  return NULL;
1777  table2 = (st_table *)v;
1778 
1779  /*
1780  * Assumption:
1781  * There is at most one transcoder for
1782  * converting from ASCII incompatible encoding.
1783  *
1784  * For ISO-2022-JP, there is ISO-2022-JP -> stateless-ISO-2022-JP and no others.
1785  */
1786  if (table2->num_entries != 1)
1787  return NULL;
1788 
1790  data.ascii_compat_name = NULL;
1791  st_foreach(table2, asciicompat_encoding_i, (st_data_t)&data);
1792  return data.ascii_compat_name;
1793 }
1794 
1795 VALUE
1796 rb_econv_append(rb_econv_t *ec, const char *ss, long len, VALUE dst, int flags)
1797 {
1798  unsigned const char *sp, *se;
1799  unsigned char *ds, *dp, *de;
1800  rb_econv_result_t res;
1801  int max_output;
1802 
1803  if (NIL_P(dst)) {
1804  dst = rb_str_buf_new(len);
1805  if (ec->destination_encoding)
1807  }
1808 
1809  if (ec->last_tc)
1810  max_output = ec->last_tc->transcoder->max_output;
1811  else
1812  max_output = 1;
1813 
1814  do {
1815  long dlen = RSTRING_LEN(dst);
1816  if (rb_str_capacity(dst) - dlen < (size_t)len + max_output) {
1817  unsigned long new_capa = (unsigned long)dlen + len + max_output;
1818  if (LONG_MAX < new_capa)
1819  rb_raise(rb_eArgError, "too long string");
1820  rb_str_resize(dst, new_capa);
1821  rb_str_set_len(dst, dlen);
1822  }
1823  sp = (const unsigned char *)ss;
1824  se = sp + len;
1825  ds = (unsigned char *)RSTRING_PTR(dst);
1826  de = ds + rb_str_capacity(dst);
1827  dp = ds += dlen;
1828  res = rb_econv_convert(ec, &sp, se, &dp, de, flags);
1829  len -= (const char *)sp - ss;
1830  ss = (const char *)sp;
1831  rb_str_set_len(dst, dlen + (dp - ds));
1833  } while (res == econv_destination_buffer_full);
1834 
1835  return dst;
1836 }
1837 
1838 VALUE
1839 rb_econv_substr_append(rb_econv_t *ec, VALUE src, long off, long len, VALUE dst, int flags)
1840 {
1842  dst = rb_econv_append(ec, RSTRING_PTR(src) + off, len, dst, flags);
1843  RB_GC_GUARD(src);
1844  return dst;
1845 }
1846 
1847 VALUE
1849 {
1850  return rb_econv_substr_append(ec, src, 0, RSTRING_LEN(src), dst, flags);
1851 }
1852 
1853 VALUE
1854 rb_econv_substr_convert(rb_econv_t *ec, VALUE src, long byteoff, long bytesize, int flags)
1855 {
1856  return rb_econv_substr_append(ec, src, byteoff, bytesize, Qnil, flags);
1857 }
1858 
1859 VALUE
1861 {
1862  return rb_econv_substr_append(ec, src, 0, RSTRING_LEN(src), Qnil, flags);
1863 }
1864 
1865 static int
1866 rb_econv_add_converter(rb_econv_t *ec, const char *sname, const char *dname, int n)
1867 {
1868  transcoder_entry_t *entry;
1869  const rb_transcoder *tr;
1870 
1871  if (ec->started != 0)
1872  return -1;
1873 
1874  entry = get_transcoder_entry(sname, dname);
1875  if (!entry)
1876  return -1;
1877 
1878  tr = load_transcoder_entry(entry);
1879  if (!tr) return -1;
1880 
1881  return rb_econv_add_transcoder_at(ec, tr, n);
1882 }
1883 
1884 static int
1885 rb_econv_decorate_at(rb_econv_t *ec, const char *decorator_name, int n)
1886 {
1887  return rb_econv_add_converter(ec, "", decorator_name, n);
1888 }
1889 
1890 int
1891 rb_econv_decorate_at_first(rb_econv_t *ec, const char *decorator_name)
1892 {
1893  const rb_transcoder *tr;
1894 
1895  if (ec->num_trans == 0)
1896  return rb_econv_decorate_at(ec, decorator_name, 0);
1897 
1898  tr = ec->elems[0].tc->transcoder;
1899 
1900  if (!DECORATOR_P(tr->src_encoding, tr->dst_encoding) &&
1901  tr->asciicompat_type == asciicompat_decoder)
1902  return rb_econv_decorate_at(ec, decorator_name, 1);
1903 
1904  return rb_econv_decorate_at(ec, decorator_name, 0);
1905 }
1906 
1907 int
1908 rb_econv_decorate_at_last(rb_econv_t *ec, const char *decorator_name)
1909 {
1910  const rb_transcoder *tr;
1911 
1912  if (ec->num_trans == 0)
1913  return rb_econv_decorate_at(ec, decorator_name, 0);
1914 
1915  tr = ec->elems[ec->num_trans-1].tc->transcoder;
1916 
1917  if (!DECORATOR_P(tr->src_encoding, tr->dst_encoding) &&
1918  tr->asciicompat_type == asciicompat_encoder)
1919  return rb_econv_decorate_at(ec, decorator_name, ec->num_trans-1);
1920 
1921  return rb_econv_decorate_at(ec, decorator_name, ec->num_trans);
1922 }
1923 
1924 void
1926 {
1927  const char *dname = 0;
1928 
1929  switch (ec->flags & ECONV_NEWLINE_DECORATOR_MASK) {
1931  dname = "universal_newline";
1932  break;
1934  dname = "crlf_newline";
1935  break;
1937  dname = "cr_newline";
1938  break;
1939  }
1940 
1941  if (dname) {
1942  const rb_transcoder *transcoder = get_transcoder_entry("", dname)->transcoder;
1943  int num_trans = ec->num_trans;
1944  int i, j = 0;
1945 
1946  for (i=0; i < num_trans; i++) {
1947  if (transcoder == ec->elems[i].tc->transcoder) {
1948  rb_transcoding_close(ec->elems[i].tc);
1949  xfree(ec->elems[i].out_buf_start);
1950  ec->num_trans--;
1951  }
1952  else
1953  ec->elems[j++] = ec->elems[i];
1954  }
1955  }
1956 
1958 }
1959 
1960 static VALUE
1961 econv_description(const char *sname, const char *dname, int ecflags, VALUE mesg)
1962 {
1963  int has_description = 0;
1964 
1965  if (NIL_P(mesg))
1966  mesg = rb_str_new(NULL, 0);
1967 
1968  if (*sname != '\0' || *dname != '\0') {
1969  if (*sname == '\0')
1970  rb_str_cat2(mesg, dname);
1971  else if (*dname == '\0')
1972  rb_str_cat2(mesg, sname);
1973  else
1974  rb_str_catf(mesg, "%s to %s", sname, dname);
1975  has_description = 1;
1976  }
1977 
1978  if (ecflags & (ECONV_NEWLINE_DECORATOR_MASK|
1982  const char *pre = "";
1983  if (has_description)
1984  rb_str_cat2(mesg, " with ");
1985  if (ecflags & ECONV_UNIVERSAL_NEWLINE_DECORATOR) {
1986  rb_str_cat2(mesg, pre); pre = ",";
1987  rb_str_cat2(mesg, "universal_newline");
1988  }
1989  if (ecflags & ECONV_CRLF_NEWLINE_DECORATOR) {
1990  rb_str_cat2(mesg, pre); pre = ",";
1991  rb_str_cat2(mesg, "crlf_newline");
1992  }
1993  if (ecflags & ECONV_CR_NEWLINE_DECORATOR) {
1994  rb_str_cat2(mesg, pre); pre = ",";
1995  rb_str_cat2(mesg, "cr_newline");
1996  }
1997  if (ecflags & ECONV_XML_TEXT_DECORATOR) {
1998  rb_str_cat2(mesg, pre); pre = ",";
1999  rb_str_cat2(mesg, "xml_text");
2000  }
2001  if (ecflags & ECONV_XML_ATTR_CONTENT_DECORATOR) {
2002  rb_str_cat2(mesg, pre); pre = ",";
2003  rb_str_cat2(mesg, "xml_attr_content");
2004  }
2005  if (ecflags & ECONV_XML_ATTR_QUOTE_DECORATOR) {
2006  rb_str_cat2(mesg, pre); pre = ",";
2007  rb_str_cat2(mesg, "xml_attr_quote");
2008  }
2009  has_description = 1;
2010  }
2011  if (!has_description) {
2012  rb_str_cat2(mesg, "no-conversion");
2013  }
2014 
2015  return mesg;
2016 }
2017 
2018 VALUE
2019 rb_econv_open_exc(const char *sname, const char *dname, int ecflags)
2020 {
2021  VALUE mesg, exc;
2022  mesg = rb_str_new_cstr("code converter not found (");
2023  econv_description(sname, dname, ecflags, mesg);
2024  rb_str_cat2(mesg, ")");
2025  exc = rb_exc_new3(rb_eConverterNotFoundError, mesg);
2026  return exc;
2027 }
2028 
2029 static VALUE
2030 make_econv_exception(rb_econv_t *ec)
2031 {
2032  VALUE mesg, exc;
2035  const char *err = (const char *)ec->last_error.error_bytes_start;
2036  size_t error_len = ec->last_error.error_bytes_len;
2037  VALUE bytes = rb_str_new(err, error_len);
2038  VALUE dumped = rb_str_dump(bytes);
2039  size_t readagain_len = ec->last_error.readagain_len;
2040  VALUE bytes2 = Qnil;
2041  VALUE dumped2;
2042  int idx;
2044  mesg = rb_sprintf("incomplete %s on %s",
2045  StringValueCStr(dumped),
2047  }
2048  else if (readagain_len) {
2049  bytes2 = rb_str_new(err+error_len, readagain_len);
2050  dumped2 = rb_str_dump(bytes2);
2051  mesg = rb_sprintf("%s followed by %s on %s",
2052  StringValueCStr(dumped),
2053  StringValueCStr(dumped2),
2055  }
2056  else {
2057  mesg = rb_sprintf("%s on %s",
2058  StringValueCStr(dumped),
2060  }
2061 
2062  exc = rb_exc_new3(rb_eInvalidByteSequenceError, mesg);
2063  rb_ivar_set(exc, rb_intern("error_bytes"), bytes);
2064  rb_ivar_set(exc, rb_intern("readagain_bytes"), bytes2);
2065  rb_ivar_set(exc, rb_intern("incomplete_input"), ec->last_error.result == econv_incomplete_input ? Qtrue : Qfalse);
2066 
2067  set_encs:
2068  rb_ivar_set(exc, rb_intern("source_encoding_name"), rb_str_new2(ec->last_error.source_encoding));
2069  rb_ivar_set(exc, rb_intern("destination_encoding_name"), rb_str_new2(ec->last_error.destination_encoding));
2071  if (0 <= idx)
2072  rb_ivar_set(exc, rb_intern("source_encoding"), rb_enc_from_encoding(rb_enc_from_index(idx)));
2074  if (0 <= idx)
2075  rb_ivar_set(exc, rb_intern("destination_encoding"), rb_enc_from_encoding(rb_enc_from_index(idx)));
2076  return exc;
2077  }
2079  VALUE bytes = rb_str_new((const char *)ec->last_error.error_bytes_start,
2081  VALUE dumped = Qnil;
2082  int idx;
2083  if (strcmp(ec->last_error.source_encoding, "UTF-8") == 0) {
2084  rb_encoding *utf8 = rb_utf8_encoding();
2085  const char *start, *end;
2086  int n;
2087  start = (const char *)ec->last_error.error_bytes_start;
2088  end = start + ec->last_error.error_bytes_len;
2089  n = rb_enc_precise_mbclen(start, end, utf8);
2090  if (MBCLEN_CHARFOUND_P(n) &&
2092  unsigned int cc = rb_enc_mbc_to_codepoint(start, end, utf8);
2093  dumped = rb_sprintf("U+%04X", cc);
2094  }
2095  }
2096  if (dumped == Qnil)
2097  dumped = rb_str_dump(bytes);
2099  ec->source_encoding_name) == 0 &&
2101  ec->destination_encoding_name) == 0) {
2102  mesg = rb_sprintf("%s from %s to %s",
2103  StringValueCStr(dumped),
2106  }
2107  else {
2108  int i;
2109  mesg = rb_sprintf("%s to %s in conversion from %s",
2110  StringValueCStr(dumped),
2112  ec->source_encoding_name);
2113  for (i = 0; i < ec->num_trans; i++) {
2114  const rb_transcoder *tr = ec->elems[i].tc->transcoder;
2115  if (!DECORATOR_P(tr->src_encoding, tr->dst_encoding))
2116  rb_str_catf(mesg, " to %s",
2117  ec->elems[i].tc->transcoder->dst_encoding);
2118  }
2119  }
2120  exc = rb_exc_new3(rb_eUndefinedConversionError, mesg);
2122  if (0 <= idx)
2123  rb_enc_associate_index(bytes, idx);
2124  rb_ivar_set(exc, rb_intern("error_char"), bytes);
2125  goto set_encs;
2126  }
2127  return Qnil;
2128 }
2129 
2130 static void
2131 more_output_buffer(
2132  VALUE destination,
2133  unsigned char *(*resize_destination)(VALUE, size_t, size_t),
2134  int max_output,
2135  unsigned char **out_start_ptr,
2136  unsigned char **out_pos,
2137  unsigned char **out_stop_ptr)
2138 {
2139  size_t len = (*out_pos - *out_start_ptr);
2140  size_t new_len = (len + max_output) * 2;
2141  *out_start_ptr = resize_destination(destination, len, new_len);
2142  *out_pos = *out_start_ptr + len;
2143  *out_stop_ptr = *out_start_ptr + new_len;
2144 }
2145 
2146 static int
2147 make_replacement(rb_econv_t *ec)
2148 {
2149  rb_transcoding *tc;
2150  const rb_transcoder *tr;
2151  const unsigned char *replacement;
2152  const char *repl_enc;
2153  const char *ins_enc;
2154  size_t len;
2155 
2156  if (ec->replacement_str)
2157  return 0;
2158 
2159  ins_enc = rb_econv_encoding_to_insert_output(ec);
2160 
2161  tc = ec->last_tc;
2162  if (*ins_enc) {
2163  tr = tc->transcoder;
2164  rb_enc_find(tr->dst_encoding);
2165  replacement = (const unsigned char *)get_replacement_character(ins_enc, &len, &repl_enc);
2166  }
2167  else {
2168  replacement = (unsigned char *)"?";
2169  len = 1;
2170  repl_enc = "";
2171  }
2172 
2173  ec->replacement_str = replacement;
2174  ec->replacement_len = len;
2175  ec->replacement_enc = repl_enc;
2176  ec->replacement_allocated = 0;
2177  return 0;
2178 }
2179 
2180 int
2182  const unsigned char *str, size_t len, const char *encname)
2183 {
2184  unsigned char *str2;
2185  size_t len2;
2186  const char *encname2;
2187 
2188  encname2 = rb_econv_encoding_to_insert_output(ec);
2189 
2190  if (!*encname2 || encoding_equal(encname, encname2)) {
2191  str2 = xmalloc(len);
2192  MEMCPY(str2, str, unsigned char, len); /* xxx: str may be invalid */
2193  len2 = len;
2194  encname2 = encname;
2195  }
2196  else {
2197  str2 = allocate_converted_string(encname, encname2, str, len, NULL, 0, &len2);
2198  if (!str2)
2199  return -1;
2200  }
2201 
2202  if (ec->replacement_allocated) {
2203  xfree((void *)ec->replacement_str);
2204  }
2205  ec->replacement_allocated = 1;
2206  ec->replacement_str = str2;
2207  ec->replacement_len = len2;
2208  ec->replacement_enc = encname2;
2209  return 0;
2210 }
2211 
2212 static int
2213 output_replacement_character(rb_econv_t *ec)
2214 {
2215  int ret;
2216 
2217  if (make_replacement(ec) == -1)
2218  return -1;
2219 
2221  if (ret == -1)
2222  return -1;
2223 
2224  return 0;
2225 }
2226 
2227 #if 1
2228 #define hash_fallback rb_hash_aref
2229 
2230 static VALUE
2231 proc_fallback(VALUE fallback, VALUE c)
2232 {
2233  return rb_proc_call(fallback, rb_ary_new4(1, &c));
2234 }
2235 
2236 static VALUE
2237 method_fallback(VALUE fallback, VALUE c)
2238 {
2239  return rb_method_call(1, &c, fallback);
2240 }
2241 
2242 static VALUE
2243 aref_fallback(VALUE fallback, VALUE c)
2244 {
2245  return rb_funcallv_public(fallback, idAREF, 1, &c);
2246 }
2247 
2248 static void
2249 transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
2250  const unsigned char *in_stop, unsigned char *out_stop,
2251  VALUE destination,
2252  unsigned char *(*resize_destination)(VALUE, size_t, size_t),
2253  const char *src_encoding,
2254  const char *dst_encoding,
2255  int ecflags,
2256  VALUE ecopts)
2257 {
2258  rb_econv_t *ec;
2259  rb_transcoding *last_tc;
2260  rb_econv_result_t ret;
2261  unsigned char *out_start = *out_pos;
2262  int max_output;
2263  VALUE exc;
2264  VALUE fallback = Qnil;
2265  VALUE (*fallback_func)(VALUE, VALUE) = 0;
2266 
2267  ec = rb_econv_open_opts(src_encoding, dst_encoding, ecflags, ecopts);
2268  if (!ec)
2269  rb_exc_raise(rb_econv_open_exc(src_encoding, dst_encoding, ecflags));
2270 
2271  if (!NIL_P(ecopts) && RB_TYPE_P(ecopts, T_HASH)) {
2272  fallback = rb_hash_aref(ecopts, sym_fallback);
2273  if (RB_TYPE_P(fallback, T_HASH)) {
2275  }
2276  else if (rb_obj_is_proc(fallback)) {
2277  fallback_func = proc_fallback;
2278  }
2279  else if (rb_obj_is_method(fallback)) {
2280  fallback_func = method_fallback;
2281  }
2282  else {
2283  fallback_func = aref_fallback;
2284  }
2285  }
2286  last_tc = ec->last_tc;
2287  max_output = last_tc ? last_tc->transcoder->max_output : 1;
2288 
2289  resume:
2290  ret = rb_econv_convert(ec, in_pos, in_stop, out_pos, out_stop, 0);
2291 
2292  if (!NIL_P(fallback) && ret == econv_undefined_conversion) {
2293  VALUE rep = rb_enc_str_new(
2294  (const char *)ec->last_error.error_bytes_start,
2297  rep = (*fallback_func)(fallback, rep);
2298  if (rep != Qundef && !NIL_P(rep)) {
2299  StringValue(rep);
2300  ret = rb_econv_insert_output(ec, (const unsigned char *)RSTRING_PTR(rep),
2301  RSTRING_LEN(rep), rb_enc_name(rb_enc_get(rep)));
2302  if ((int)ret == -1) {
2303  rb_raise(rb_eArgError, "too big fallback string");
2304  }
2305  goto resume;
2306  }
2307  }
2308 
2309  if (ret == econv_invalid_byte_sequence ||
2310  ret == econv_incomplete_input ||
2311  ret == econv_undefined_conversion) {
2312  exc = make_econv_exception(ec);
2313  rb_econv_close(ec);
2314  rb_exc_raise(exc);
2315  }
2316 
2317  if (ret == econv_destination_buffer_full) {
2318  more_output_buffer(destination, resize_destination, max_output, &out_start, out_pos, &out_stop);
2319  goto resume;
2320  }
2321 
2322  rb_econv_close(ec);
2323  return;
2324 }
2325 #else
2326 /* sample transcode_loop implementation in byte-by-byte stream style */
2327 static void
2328 transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
2329  const unsigned char *in_stop, unsigned char *out_stop,
2330  VALUE destination,
2331  unsigned char *(*resize_destination)(VALUE, size_t, size_t),
2332  const char *src_encoding,
2333  const char *dst_encoding,
2334  int ecflags,
2335  VALUE ecopts)
2336 {
2337  rb_econv_t *ec;
2338  rb_transcoding *last_tc;
2339  rb_econv_result_t ret;
2340  unsigned char *out_start = *out_pos;
2341  const unsigned char *ptr;
2342  int max_output;
2343  VALUE exc;
2344 
2345  ec = rb_econv_open_opts(src_encoding, dst_encoding, ecflags, ecopts);
2346  if (!ec)
2347  rb_exc_raise(rb_econv_open_exc(src_encoding, dst_encoding, ecflags));
2348 
2349  last_tc = ec->last_tc;
2350  max_output = last_tc ? last_tc->transcoder->max_output : 1;
2351 
2353  ptr = *in_pos;
2354  while (ret != econv_finished) {
2355  unsigned char input_byte;
2356  const unsigned char *p = &input_byte;
2357 
2358  if (ret == econv_source_buffer_empty) {
2359  if (ptr < in_stop) {
2360  input_byte = *ptr;
2361  ret = rb_econv_convert(ec, &p, p+1, out_pos, out_stop, ECONV_PARTIAL_INPUT);
2362  }
2363  else {
2364  ret = rb_econv_convert(ec, NULL, NULL, out_pos, out_stop, 0);
2365  }
2366  }
2367  else {
2368  ret = rb_econv_convert(ec, NULL, NULL, out_pos, out_stop, ECONV_PARTIAL_INPUT);
2369  }
2370  if (&input_byte != p)
2371  ptr += p - &input_byte;
2372  switch (ret) {
2376  exc = make_econv_exception(ec);
2377  rb_econv_close(ec);
2378  rb_exc_raise(exc);
2379  break;
2380 
2382  more_output_buffer(destination, resize_destination, max_output, &out_start, out_pos, &out_stop);
2383  break;
2384 
2386  break;
2387 
2388  case econv_finished:
2389  break;
2390  }
2391  }
2392  rb_econv_close(ec);
2393  *in_pos = in_stop;
2394  return;
2395 }
2396 #endif
2397 
2398 
2399 /*
2400  * String-specific code
2401  */
2402 
2403 static unsigned char *
2404 str_transcoding_resize(VALUE destination, size_t len, size_t new_len)
2405 {
2406  rb_str_resize(destination, new_len);
2407  return (unsigned char *)RSTRING_PTR(destination);
2408 }
2409 
2410 static int
2411 econv_opts(VALUE opt, int ecflags)
2412 {
2413  VALUE v;
2414 
2415  v = rb_hash_aref(opt, sym_invalid);
2416  if (NIL_P(v)) {
2417  }
2418  else if (v==sym_replace) {
2419  ecflags |= ECONV_INVALID_REPLACE;
2420  }
2421  else {
2422  rb_raise(rb_eArgError, "unknown value for invalid character option");
2423  }
2424 
2425  v = rb_hash_aref(opt, sym_undef);
2426  if (NIL_P(v)) {
2427  }
2428  else if (v==sym_replace) {
2429  ecflags |= ECONV_UNDEF_REPLACE;
2430  }
2431  else {
2432  rb_raise(rb_eArgError, "unknown value for undefined character option");
2433  }
2434 
2435  v = rb_hash_aref(opt, sym_replace);
2436  if (!NIL_P(v) && !(ecflags & ECONV_INVALID_REPLACE)) {
2437  ecflags |= ECONV_UNDEF_REPLACE;
2438  }
2439 
2440  v = rb_hash_aref(opt, sym_xml);
2441  if (!NIL_P(v)) {
2442  if (v==sym_text) {
2444  }
2445  else if (v==sym_attr) {
2447  }
2448  else if (RB_TYPE_P(v, T_SYMBOL)) {
2449  rb_raise(rb_eArgError, "unexpected value for xml option: %"PRIsVALUE, rb_sym2str(v));
2450  }
2451  else {
2452  rb_raise(rb_eArgError, "unexpected value for xml option");
2453  }
2454  }
2455 
2456 #ifdef ENABLE_ECONV_NEWLINE_OPTION
2457  v = rb_hash_aref(opt, sym_newline);
2458  if (!NIL_P(v)) {
2459  ecflags &= ~ECONV_NEWLINE_DECORATOR_MASK;
2460  if (v == sym_universal) {
2462  }
2463  else if (v == sym_crlf) {
2464  ecflags |= ECONV_CRLF_NEWLINE_DECORATOR;
2465  }
2466  else if (v == sym_cr) {
2467  ecflags |= ECONV_CR_NEWLINE_DECORATOR;
2468  }
2469  else if (v == sym_lf) {
2470  /* ecflags |= ECONV_LF_NEWLINE_DECORATOR; */
2471  }
2472  else if (SYMBOL_P(v)) {
2473  rb_raise(rb_eArgError, "unexpected value for newline option: %"PRIsVALUE,
2474  rb_sym2str(v));
2475  }
2476  else {
2477  rb_raise(rb_eArgError, "unexpected value for newline option");
2478  }
2479  }
2480  else
2481 #endif
2482  {
2483  int setflags = 0, newlineflag = 0;
2484 
2485  v = rb_hash_aref(opt, sym_universal_newline);
2486  if (RTEST(v))
2488  newlineflag |= !NIL_P(v);
2489 
2490  v = rb_hash_aref(opt, sym_crlf_newline);
2491  if (RTEST(v))
2492  setflags |= ECONV_CRLF_NEWLINE_DECORATOR;
2493  newlineflag |= !NIL_P(v);
2494 
2495  v = rb_hash_aref(opt, sym_cr_newline);
2496  if (RTEST(v))
2497  setflags |= ECONV_CR_NEWLINE_DECORATOR;
2498  newlineflag |= !NIL_P(v);
2499 
2500  if (newlineflag) {
2501  ecflags &= ~ECONV_NEWLINE_DECORATOR_MASK;
2502  ecflags |= setflags;
2503  }
2504  }
2505 
2506  return ecflags;
2507 }
2508 
2509 int
2510 rb_econv_prepare_options(VALUE opthash, VALUE *opts, int ecflags)
2511 {
2512  VALUE newhash = Qnil;
2513  VALUE v;
2514 
2515  if (NIL_P(opthash)) {
2516  *opts = Qnil;
2517  return ecflags;
2518  }
2519  ecflags = econv_opts(opthash, ecflags);
2520 
2521  v = rb_hash_aref(opthash, sym_replace);
2522  if (!NIL_P(v)) {
2523  StringValue(v);
2525  VALUE dumped = rb_str_dump(v);
2526  rb_raise(rb_eArgError, "replacement string is broken: %s as %s",
2527  StringValueCStr(dumped),
2529  }
2530  v = rb_str_new_frozen(v);
2531  newhash = rb_hash_new();
2532  rb_hash_aset(newhash, sym_replace, v);
2533  }
2534 
2535  v = rb_hash_aref(opthash, sym_fallback);
2536  if (!NIL_P(v)) {
2538  if (NIL_P(h)
2540  : (v = h, 1)) {
2541  if (NIL_P(newhash))
2542  newhash = rb_hash_new();
2543  rb_hash_aset(newhash, sym_fallback, v);
2544  }
2545  }
2546 
2547  if (!NIL_P(newhash))
2548  rb_hash_freeze(newhash);
2549  *opts = newhash;
2550 
2551  return ecflags;
2552 }
2553 
2554 int
2556 {
2557  return rb_econv_prepare_options(opthash, opts, 0);
2558 }
2559 
2560 rb_econv_t *
2561 rb_econv_open_opts(const char *source_encoding, const char *destination_encoding, int ecflags, VALUE opthash)
2562 {
2563  rb_econv_t *ec;
2564  VALUE replacement;
2565 
2566  if (NIL_P(opthash)) {
2567  replacement = Qnil;
2568  }
2569  else {
2570  if (!RB_TYPE_P(opthash, T_HASH) || !OBJ_FROZEN(opthash))
2571  rb_bug("rb_econv_open_opts called with invalid opthash");
2572  replacement = rb_hash_aref(opthash, sym_replace);
2573  }
2574 
2575  ec = rb_econv_open(source_encoding, destination_encoding, ecflags);
2576  if (!ec)
2577  return ec;
2578 
2579  if (!NIL_P(replacement)) {
2580  int ret;
2581  rb_encoding *enc = rb_enc_get(replacement);
2582 
2583  ret = rb_econv_set_replacement(ec,
2584  (const unsigned char *)RSTRING_PTR(replacement),
2585  RSTRING_LEN(replacement),
2586  rb_enc_name(enc));
2587  if (ret == -1) {
2588  rb_econv_close(ec);
2589  return NULL;
2590  }
2591  }
2592  return ec;
2593 }
2594 
2595 static int
2596 enc_arg(VALUE *arg, const char **name_p, rb_encoding **enc_p)
2597 {
2598  rb_encoding *enc;
2599  const char *n;
2600  int encidx;
2601  VALUE encval;
2602 
2603  if (((encidx = rb_to_encoding_index(encval = *arg)) < 0) ||
2604  !(enc = rb_enc_from_index(encidx))) {
2605  enc = NULL;
2606  encidx = 0;
2607  n = StringValueCStr(*arg);
2608  }
2609  else {
2610  n = rb_enc_name(enc);
2611  }
2612 
2613  *name_p = n;
2614  *enc_p = enc;
2615 
2616  return encidx;
2617 }
2618 
2619 static int
2620 str_transcode_enc_args(VALUE str, VALUE *arg1, VALUE *arg2,
2621  const char **sname_p, rb_encoding **senc_p,
2622  const char **dname_p, rb_encoding **denc_p)
2623 {
2624  rb_encoding *senc, *denc;
2625  const char *sname, *dname;
2626  int sencidx, dencidx;
2627 
2628  dencidx = enc_arg(arg1, &dname, &denc);
2629 
2630  if (NIL_P(*arg2)) {
2631  sencidx = rb_enc_get_index(str);
2632  senc = rb_enc_from_index(sencidx);
2633  sname = rb_enc_name(senc);
2634  }
2635  else {
2636  sencidx = enc_arg(arg2, &sname, &senc);
2637  }
2638 
2639  *sname_p = sname;
2640  *senc_p = senc;
2641  *dname_p = dname;
2642  *denc_p = denc;
2643  return dencidx;
2644 }
2645 
2646 static int
2647 str_transcode0(int argc, VALUE *argv, VALUE *self, int ecflags, VALUE ecopts)
2648 {
2649  VALUE dest;
2650  VALUE str = *self;
2651  VALUE arg1, arg2;
2652  long blen, slen;
2653  unsigned char *buf, *bp, *sp;
2654  const unsigned char *fromp;
2655  rb_encoding *senc, *denc;
2656  const char *sname, *dname;
2657  int dencidx;
2658  int explicitly_invalid_replace = TRUE;
2659 
2660  rb_check_arity(argc, 0, 2);
2661 
2662  if (argc == 0) {
2663  arg1 = rb_enc_default_internal();
2664  if (NIL_P(arg1)) {
2665  if (!ecflags) return -1;
2666  arg1 = rb_obj_encoding(str);
2667  }
2668  if (!(ecflags & ECONV_INVALID_MASK)) {
2669  explicitly_invalid_replace = FALSE;
2670  }
2672  }
2673  else {
2674  arg1 = argv[0];
2675  }
2676  arg2 = argc<=1 ? Qnil : argv[1];
2677  dencidx = str_transcode_enc_args(str, &arg1, &arg2, &sname, &senc, &dname, &denc);
2678 
2679  if ((ecflags & (ECONV_NEWLINE_DECORATOR_MASK|
2683  if (senc && senc == denc) {
2684  if ((ecflags & ECONV_INVALID_MASK) && explicitly_invalid_replace) {
2685  VALUE rep = Qnil;
2686  if (!NIL_P(ecopts)) {
2687  rep = rb_hash_aref(ecopts, sym_replace);
2688  }
2689  dest = rb_enc_str_scrub(senc, str, rep);
2690  if (NIL_P(dest)) dest = str;
2691  *self = dest;
2692  return dencidx;
2693  }
2694  return NIL_P(arg2) ? -1 : dencidx;
2695  }
2696  if (senc && denc && rb_enc_asciicompat(senc) && rb_enc_asciicompat(denc)) {
2698  return dencidx;
2699  }
2700  }
2701  if (encoding_equal(sname, dname)) {
2702  return NIL_P(arg2) ? -1 : dencidx;
2703  }
2704  }
2705  else {
2706  if (encoding_equal(sname, dname)) {
2707  sname = "";
2708  dname = "";
2709  }
2710  }
2711 
2712  fromp = sp = (unsigned char *)RSTRING_PTR(str);
2713  slen = RSTRING_LEN(str);
2714  blen = slen + 30; /* len + margin */
2715  dest = rb_str_tmp_new(blen);
2716  bp = (unsigned char *)RSTRING_PTR(dest);
2717 
2718  transcode_loop(&fromp, &bp, (sp+slen), (bp+blen), dest, str_transcoding_resize, sname, dname, ecflags, ecopts);
2719  if (fromp != sp+slen) {
2720  rb_raise(rb_eArgError, "not fully converted, %"PRIdPTRDIFF" bytes left", sp+slen-fromp);
2721  }
2722  buf = (unsigned char *)RSTRING_PTR(dest);
2723  *bp = '\0';
2724  rb_str_set_len(dest, bp - buf);
2725 
2726  /* set encoding */
2727  if (!denc) {
2728  dencidx = rb_define_dummy_encoding(dname);
2729  RB_GC_GUARD(arg1);
2730  RB_GC_GUARD(arg2);
2731  }
2732  *self = dest;
2733 
2734  return dencidx;
2735 }
2736 
2737 static int
2738 str_transcode(int argc, VALUE *argv, VALUE *self)
2739 {
2740  VALUE opt;
2741  int ecflags = 0;
2742  VALUE ecopts = Qnil;
2743 
2744  argc = rb_scan_args(argc, argv, "02:", NULL, NULL, &opt);
2745  if (!NIL_P(opt)) {
2746  ecflags = rb_econv_prepare_opts(opt, &ecopts);
2747  }
2748  return str_transcode0(argc, argv, self, ecflags, ecopts);
2749 }
2750 
2751 static inline VALUE
2752 str_encode_associate(VALUE str, int encidx)
2753 {
2754  int cr = 0;
2755 
2756  rb_enc_associate_index(str, encidx);
2757 
2758  /* transcoded string never be broken. */
2759  if (rb_enc_asciicompat(rb_enc_from_index(encidx))) {
2761  }
2762  else {
2763  cr = ENC_CODERANGE_VALID;
2764  }
2765  ENC_CODERANGE_SET(str, cr);
2766  return str;
2767 }
2768 
2769 /*
2770  * call-seq:
2771  * str.encode!(encoding [, options] ) -> str
2772  * str.encode!(dst_encoding, src_encoding [, options] ) -> str
2773  *
2774  * The first form transcodes the contents of <i>str</i> from
2775  * str.encoding to +encoding+.
2776  * The second form transcodes the contents of <i>str</i> from
2777  * src_encoding to dst_encoding.
2778  * The options Hash gives details for conversion. See String#encode
2779  * for details.
2780  * Returns the string even if no changes were made.
2781  */
2782 
2783 static VALUE
2784 str_encode_bang(int argc, VALUE *argv, VALUE str)
2785 {
2786  VALUE newstr;
2787  int encidx;
2788 
2790 
2791  newstr = str;
2792  encidx = str_transcode(argc, argv, &newstr);
2793 
2794  if (encidx < 0) return str;
2795  if (newstr == str) {
2796  rb_enc_associate_index(str, encidx);
2797  return str;
2798  }
2799  rb_str_shared_replace(str, newstr);
2800  return str_encode_associate(str, encidx);
2801 }
2802 
2803 static VALUE encoded_dup(VALUE newstr, VALUE str, int encidx);
2804 
2805 /*
2806  * call-seq:
2807  * str.encode(encoding [, options] ) -> str
2808  * str.encode(dst_encoding, src_encoding [, options] ) -> str
2809  * str.encode([options]) -> str
2810  *
2811  * The first form returns a copy of +str+ transcoded
2812  * to encoding +encoding+.
2813  * The second form returns a copy of +str+ transcoded
2814  * from src_encoding to dst_encoding.
2815  * The last form returns a copy of +str+ transcoded to
2816  * <tt>Encoding.default_internal</tt>.
2817  *
2818  * By default, the first and second form raise
2819  * Encoding::UndefinedConversionError for characters that are
2820  * undefined in the destination encoding, and
2821  * Encoding::InvalidByteSequenceError for invalid byte sequences
2822  * in the source encoding. The last form by default does not raise
2823  * exceptions but uses replacement strings.
2824  *
2825  * The +options+ Hash gives details for conversion and can have the following
2826  * keys:
2827  *
2828  * :invalid ::
2829  * If the value is +:replace+, #encode replaces invalid byte sequences in
2830  * +str+ with the replacement character. The default is to raise the
2831  * Encoding::InvalidByteSequenceError exception
2832  * :undef ::
2833  * If the value is +:replace+, #encode replaces characters which are
2834  * undefined in the destination encoding with the replacement character.
2835  * The default is to raise the Encoding::UndefinedConversionError.
2836  * :replace ::
2837  * Sets the replacement string to the given value. The default replacement
2838  * string is "\uFFFD" for Unicode encoding forms, and "?" otherwise.
2839  * :fallback ::
2840  * Sets the replacement string by the given object for undefined
2841  * character. The object should be a Hash, a Proc, a Method, or an
2842  * object which has [] method.
2843  * Its key is an undefined character encoded in the source encoding
2844  * of current transcoder. Its value can be any encoding until it
2845  * can be converted into the destination encoding of the transcoder.
2846  * :xml ::
2847  * The value must be +:text+ or +:attr+.
2848  * If the value is +:text+ #encode replaces undefined characters with their
2849  * (upper-case hexadecimal) numeric character references. '&', '<', and '>'
2850  * are converted to "&amp;", "&lt;", and "&gt;", respectively.
2851  * If the value is +:attr+, #encode also quotes the replacement result
2852  * (using '"'), and replaces '"' with "&quot;".
2853  * :cr_newline ::
2854  * Replaces LF ("\n") with CR ("\r") if value is true.
2855  * :crlf_newline ::
2856  * Replaces LF ("\n") with CRLF ("\r\n") if value is true.
2857  * :universal_newline ::
2858  * Replaces CRLF ("\r\n") and CR ("\r") with LF ("\n") if value is true.
2859  */
2860 
2861 static VALUE
2862 str_encode(int argc, VALUE *argv, VALUE str)
2863 {
2864  VALUE newstr = str;
2865  int encidx = str_transcode(argc, argv, &newstr);
2866  return encoded_dup(newstr, str, encidx);
2867 }
2868 
2869 VALUE
2870 rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts)
2871 {
2872  int argc = 1;
2873  VALUE *argv = &to;
2874  VALUE newstr = str;
2875  int encidx = str_transcode0(argc, argv, &newstr, ecflags, ecopts);
2876  return encoded_dup(newstr, str, encidx);
2877 }
2878 
2879 static VALUE
2880 encoded_dup(VALUE newstr, VALUE str, int encidx)
2881 {
2882  if (encidx < 0) return rb_str_dup(str);
2883  if (newstr == str) {
2884  newstr = rb_str_dup(str);
2885  rb_enc_associate_index(newstr, encidx);
2886  return newstr;
2887  }
2888  else {
2889  RBASIC_SET_CLASS(newstr, rb_obj_class(str));
2890  }
2891  return str_encode_associate(newstr, encidx);
2892 }
2893 
2894 /*
2895  * Document-class: Encoding::Converter
2896  *
2897  * Encoding conversion class.
2898  */
2899 static void
2900 econv_free(void *ptr)
2901 {
2902  rb_econv_t *ec = ptr;
2903  rb_econv_close(ec);
2904 }
2905 
2906 static size_t
2907 econv_memsize(const void *ptr)
2908 {
2909  return sizeof(rb_econv_t);
2910 }
2911 
2912 static const rb_data_type_t econv_data_type = {
2913  "econv",
2914  {NULL, econv_free, econv_memsize,},
2916 };
2917 
2918 static VALUE
2919 econv_s_allocate(VALUE klass)
2920 {
2921  return TypedData_Wrap_Struct(klass, &econv_data_type, NULL);
2922 }
2923 
2924 static rb_encoding *
2925 make_dummy_encoding(const char *name)
2926 {
2927  rb_encoding *enc;
2928  int idx;
2930  enc = rb_enc_from_index(idx);
2931  return enc;
2932 }
2933 
2934 static rb_encoding *
2935 make_encoding(const char *name)
2936 {
2937  rb_encoding *enc;
2938  enc = rb_enc_find(name);
2939  if (!enc)
2940  enc = make_dummy_encoding(name);
2941  return enc;
2942 }
2943 
2944 static VALUE
2945 make_encobj(const char *name)
2946 {
2947  return rb_enc_from_encoding(make_encoding(name));
2948 }
2949 
2950 /*
2951  * call-seq:
2952  * Encoding::Converter.asciicompat_encoding(string) -> encoding or nil
2953  * Encoding::Converter.asciicompat_encoding(encoding) -> encoding or nil
2954  *
2955  * Returns the corresponding ASCII compatible encoding.
2956  *
2957  * Returns nil if the argument is an ASCII compatible encoding.
2958  *
2959  * "corresponding ASCII compatible encoding" is an ASCII compatible encoding which
2960  * can represents exactly the same characters as the given ASCII incompatible encoding.
2961  * So, no conversion undefined error occurs when converting between the two encodings.
2962  *
2963  * Encoding::Converter.asciicompat_encoding("ISO-2022-JP") #=> #<Encoding:stateless-ISO-2022-JP>
2964  * Encoding::Converter.asciicompat_encoding("UTF-16BE") #=> #<Encoding:UTF-8>
2965  * Encoding::Converter.asciicompat_encoding("UTF-8") #=> nil
2966  *
2967  */
2968 static VALUE
2969 econv_s_asciicompat_encoding(VALUE klass, VALUE arg)
2970 {
2971  const char *arg_name, *result_name;
2972  rb_encoding *arg_enc, *result_enc;
2973 
2974  enc_arg(&arg, &arg_name, &arg_enc);
2975 
2976  result_name = rb_econv_asciicompat_encoding(arg_name);
2977 
2978  if (result_name == NULL)
2979  return Qnil;
2980 
2981  result_enc = make_encoding(result_name);
2982 
2983  return rb_enc_from_encoding(result_enc);
2984 }
2985 
2986 static void
2987 econv_args(int argc, VALUE *argv,
2988  VALUE *snamev_p, VALUE *dnamev_p,
2989  const char **sname_p, const char **dname_p,
2990  rb_encoding **senc_p, rb_encoding **denc_p,
2991  int *ecflags_p,
2992  VALUE *ecopts_p)
2993 {
2994  VALUE opt, flags_v, ecopts;
2995  int sidx, didx;
2996  const char *sname, *dname;
2997  rb_encoding *senc, *denc;
2998  int ecflags;
2999 
3000  argc = rb_scan_args(argc, argv, "21:", snamev_p, dnamev_p, &flags_v, &opt);
3001 
3002  if (!NIL_P(flags_v)) {
3003  if (!NIL_P(opt)) {
3004  rb_error_arity(argc + 1, 2, 3);
3005  }
3006  ecflags = NUM2INT(rb_to_int(flags_v));
3007  ecopts = Qnil;
3008  }
3009  else if (!NIL_P(opt)) {
3010  ecflags = rb_econv_prepare_opts(opt, &ecopts);
3011  }
3012  else {
3013  ecflags = 0;
3014  ecopts = Qnil;
3015  }
3016 
3017  senc = NULL;
3018  sidx = rb_to_encoding_index(*snamev_p);
3019  if (0 <= sidx) {
3020  senc = rb_enc_from_index(sidx);
3021  }
3022  else {
3023  StringValue(*snamev_p);
3024  }
3025 
3026  denc = NULL;
3027  didx = rb_to_encoding_index(*dnamev_p);
3028  if (0 <= didx) {
3029  denc = rb_enc_from_index(didx);
3030  }
3031  else {
3032  StringValue(*dnamev_p);
3033  }
3034 
3035  sname = senc ? rb_enc_name(senc) : StringValueCStr(*snamev_p);
3036  dname = denc ? rb_enc_name(denc) : StringValueCStr(*dnamev_p);
3037 
3038  *sname_p = sname;
3039  *dname_p = dname;
3040  *senc_p = senc;
3041  *denc_p = denc;
3042  *ecflags_p = ecflags;
3043  *ecopts_p = ecopts;
3044 }
3045 
3046 static int
3047 decorate_convpath(VALUE convpath, int ecflags)
3048 {
3049  int num_decorators;
3050  const char *decorators[MAX_ECFLAGS_DECORATORS];
3051  int i;
3052  int n, len;
3053 
3054  num_decorators = decorator_names(ecflags, decorators);
3055  if (num_decorators == -1)
3056  return -1;
3057 
3058  len = n = RARRAY_LENINT(convpath);
3059  if (n != 0) {
3060  VALUE pair = RARRAY_AREF(convpath, n-1);
3061  if (RB_TYPE_P(pair, T_ARRAY)) {
3062  const char *sname = rb_enc_name(rb_to_encoding(RARRAY_AREF(pair, 0)));
3063  const char *dname = rb_enc_name(rb_to_encoding(RARRAY_AREF(pair, 1)));
3064  transcoder_entry_t *entry = get_transcoder_entry(sname, dname);
3065  const rb_transcoder *tr = load_transcoder_entry(entry);
3066  if (!tr)
3067  return -1;
3068  if (!DECORATOR_P(tr->src_encoding, tr->dst_encoding) &&
3069  tr->asciicompat_type == asciicompat_encoder) {
3070  n--;
3071  rb_ary_store(convpath, len + num_decorators - 1, pair);
3072  }
3073  }
3074  else {
3075  rb_ary_store(convpath, len + num_decorators - 1, pair);
3076  }
3077  }
3078 
3079  for (i = 0; i < num_decorators; i++)
3080  rb_ary_store(convpath, n + i, rb_str_new_cstr(decorators[i]));
3081 
3082  return 0;
3083 }
3084 
3085 static void
3086 search_convpath_i(const char *sname, const char *dname, int depth, void *arg)
3087 {
3088  VALUE *ary_p = arg;
3089  VALUE v;
3090 
3091  if (*ary_p == Qnil) {
3092  *ary_p = rb_ary_new();
3093  }
3094 
3095  if (DECORATOR_P(sname, dname)) {
3096  v = rb_str_new_cstr(dname);
3097  }
3098  else {
3099  v = rb_assoc_new(make_encobj(sname), make_encobj(dname));
3100  }
3101  rb_ary_store(*ary_p, depth, v);
3102 }
3103 
3104 /*
3105  * call-seq:
3106  * Encoding::Converter.search_convpath(source_encoding, destination_encoding) -> ary
3107  * Encoding::Converter.search_convpath(source_encoding, destination_encoding, opt) -> ary
3108  *
3109  * Returns a conversion path.
3110  *
3111  * p Encoding::Converter.search_convpath("ISO-8859-1", "EUC-JP")
3112  * #=> [[#<Encoding:ISO-8859-1>, #<Encoding:UTF-8>],
3113  * # [#<Encoding:UTF-8>, #<Encoding:EUC-JP>]]
3114  *
3115  * p Encoding::Converter.search_convpath("ISO-8859-1", "EUC-JP", universal_newline: true)
3116  * or
3117  * p Encoding::Converter.search_convpath("ISO-8859-1", "EUC-JP", newline: :universal)
3118  * #=> [[#<Encoding:ISO-8859-1>, #<Encoding:UTF-8>],
3119  * # [#<Encoding:UTF-8>, #<Encoding:EUC-JP>],
3120  * # "universal_newline"]
3121  *
3122  * p Encoding::Converter.search_convpath("ISO-8859-1", "UTF-32BE", universal_newline: true)
3123  * or
3124  * p Encoding::Converter.search_convpath("ISO-8859-1", "UTF-32BE", newline: :universal)
3125  * #=> [[#<Encoding:ISO-8859-1>, #<Encoding:UTF-8>],
3126  * # "universal_newline",
3127  * # [#<Encoding:UTF-8>, #<Encoding:UTF-32BE>]]
3128  */
3129 static VALUE
3130 econv_s_search_convpath(int argc, VALUE *argv, VALUE klass)
3131 {
3132  VALUE snamev, dnamev;
3133  const char *sname, *dname;
3134  rb_encoding *senc, *denc;
3135  int ecflags;
3136  VALUE ecopts;
3137  VALUE convpath;
3138 
3139  econv_args(argc, argv, &snamev, &dnamev, &sname, &dname, &senc, &denc, &ecflags, &ecopts);
3140 
3141  convpath = Qnil;
3142  transcode_search_path(sname, dname, search_convpath_i, &convpath);
3143 
3144  if (NIL_P(convpath)) {
3145  VALUE exc = rb_econv_open_exc(sname, dname, ecflags);
3146  RB_GC_GUARD(snamev);
3147  RB_GC_GUARD(dnamev);
3148  rb_exc_raise(exc);
3149  }
3150 
3151  if (decorate_convpath(convpath, ecflags) == -1) {
3152  VALUE exc = rb_econv_open_exc(sname, dname, ecflags);
3153  RB_GC_GUARD(snamev);
3154  RB_GC_GUARD(dnamev);
3155  rb_exc_raise(exc);
3156  }
3157 
3158  return convpath;
3159 }
3160 
3161 /*
3162  * Check the existence of a conversion path.
3163  * Returns the number of converters in the conversion path.
3164  * result: >=0:success -1:failure
3165  */
3166 int
3167 rb_econv_has_convpath_p(const char* from_encoding, const char* to_encoding)
3168 {
3169  VALUE convpath = Qnil;
3170  transcode_search_path(from_encoding, to_encoding, search_convpath_i,
3171  &convpath);
3172  return RTEST(convpath);
3173 }
3174 
3177  int index;
3178  int ret;
3179 };
3180 
3181 static void
3182 rb_econv_init_by_convpath_i(const char *sname, const char *dname, int depth, void *arg)
3183 {
3185  int ret;
3186 
3187  if (a->ret == -1)
3188  return;
3189 
3190  ret = rb_econv_add_converter(a->ec, sname, dname, a->index);
3191 
3192  a->ret = ret;
3193  return;
3194 }
3195 
3196 static rb_econv_t *
3197 rb_econv_init_by_convpath(VALUE self, VALUE convpath,
3198  const char **sname_p, const char **dname_p,
3199  rb_encoding **senc_p, rb_encoding**denc_p)
3200 {
3201  rb_econv_t *ec;
3202  long i;
3203  int ret, first=1;
3204  VALUE elt;
3205  rb_encoding *senc = 0, *denc = 0;
3206  const char *sname, *dname;
3207 
3208  ec = rb_econv_alloc(RARRAY_LENINT(convpath));
3209  DATA_PTR(self) = ec;
3210 
3211  for (i = 0; i < RARRAY_LEN(convpath); i++) {
3212  VALUE snamev, dnamev;
3213  VALUE pair;
3214  elt = rb_ary_entry(convpath, i);
3215  if (!NIL_P(pair = rb_check_array_type(elt))) {
3216  if (RARRAY_LEN(pair) != 2)
3217  rb_raise(rb_eArgError, "not a 2-element array in convpath");
3218  snamev = rb_ary_entry(pair, 0);
3219  enc_arg(&snamev, &sname, &senc);
3220  dnamev = rb_ary_entry(pair, 1);
3221  enc_arg(&dnamev, &dname, &denc);
3222  }
3223  else {
3224  sname = "";
3225  dname = StringValueCStr(elt);
3226  }
3227  if (DECORATOR_P(sname, dname)) {
3228  ret = rb_econv_add_converter(ec, sname, dname, ec->num_trans);
3229  if (ret == -1) {
3230  VALUE msg = rb_sprintf("decoration failed: %s", dname);
3231  RB_GC_GUARD(snamev);
3232  RB_GC_GUARD(dnamev);
3234  }
3235  }
3236  else {
3237  int j = ec->num_trans;
3239  arg.ec = ec;
3240  arg.index = ec->num_trans;
3241  arg.ret = 0;
3242  ret = transcode_search_path(sname, dname, rb_econv_init_by_convpath_i, &arg);
3243  if (ret == -1 || arg.ret == -1) {
3244  VALUE msg = rb_sprintf("adding conversion failed: %s to %s", sname, dname);
3245  RB_GC_GUARD(snamev);
3246  RB_GC_GUARD(dnamev);
3248  }
3249  if (first) {
3250  first = 0;
3251  *senc_p = senc;
3252  *sname_p = ec->elems[j].tc->transcoder->src_encoding;
3253  }
3254  *denc_p = denc;
3255  *dname_p = ec->elems[ec->num_trans-1].tc->transcoder->dst_encoding;
3256  }
3257  }
3258 
3259  if (first) {
3260  *senc_p = NULL;
3261  *denc_p = NULL;
3262  *sname_p = "";
3263  *dname_p = "";
3264  }
3265 
3266  ec->source_encoding_name = *sname_p;
3267  ec->destination_encoding_name = *dname_p;
3268 
3269  return ec;
3270 }
3271 
3272 /*
3273  * call-seq:
3274  * Encoding::Converter.new(source_encoding, destination_encoding)
3275  * Encoding::Converter.new(source_encoding, destination_encoding, opt)
3276  * Encoding::Converter.new(convpath)
3277  *
3278  * possible options elements:
3279  * hash form:
3280  * :invalid => nil # raise error on invalid byte sequence (default)
3281  * :invalid => :replace # replace invalid byte sequence
3282  * :undef => nil # raise error on undefined conversion (default)
3283  * :undef => :replace # replace undefined conversion
3284  * :replace => string # replacement string ("?" or "\uFFFD" if not specified)
3285  * :newline => :universal # decorator for converting CRLF and CR to LF
3286  * :newline => :crlf # decorator for converting LF to CRLF
3287  * :newline => :cr # decorator for converting LF to CR
3288  * :universal_newline => true # decorator for converting CRLF and CR to LF
3289  * :crlf_newline => true # decorator for converting LF to CRLF
3290  * :cr_newline => true # decorator for converting LF to CR
3291  * :xml => :text # escape as XML CharData.
3292  * :xml => :attr # escape as XML AttValue
3293  * integer form:
3294  * Encoding::Converter::INVALID_REPLACE
3295  * Encoding::Converter::UNDEF_REPLACE
3296  * Encoding::Converter::UNDEF_HEX_CHARREF
3297  * Encoding::Converter::UNIVERSAL_NEWLINE_DECORATOR
3298  * Encoding::Converter::CRLF_NEWLINE_DECORATOR
3299  * Encoding::Converter::CR_NEWLINE_DECORATOR
3300  * Encoding::Converter::XML_TEXT_DECORATOR
3301  * Encoding::Converter::XML_ATTR_CONTENT_DECORATOR
3302  * Encoding::Converter::XML_ATTR_QUOTE_DECORATOR
3303  *
3304  * Encoding::Converter.new creates an instance of Encoding::Converter.
3305  *
3306  * Source_encoding and destination_encoding should be a string or
3307  * Encoding object.
3308  *
3309  * opt should be nil, a hash or an integer.
3310  *
3311  * convpath should be an array.
3312  * convpath may contain
3313  * - two-element arrays which contain encodings or encoding names, or
3314  * - strings representing decorator names.
3315  *
3316  * Encoding::Converter.new optionally takes an option.
3317  * The option should be a hash or an integer.
3318  * The option hash can contain :invalid => nil, etc.
3319  * The option integer should be logical-or of constants such as
3320  * Encoding::Converter::INVALID_REPLACE, etc.
3321  *
3322  * [:invalid => nil]
3323  * Raise error on invalid byte sequence. This is a default behavior.
3324  * [:invalid => :replace]
3325  * Replace invalid byte sequence by replacement string.
3326  * [:undef => nil]
3327  * Raise an error if a character in source_encoding is not defined in destination_encoding.
3328  * This is a default behavior.
3329  * [:undef => :replace]
3330  * Replace undefined character in destination_encoding with replacement string.
3331  * [:replace => string]
3332  * Specify the replacement string.
3333  * If not specified, "\uFFFD" is used for Unicode encodings and "?" for others.
3334  * [:universal_newline => true]
3335  * Convert CRLF and CR to LF.
3336  * [:crlf_newline => true]
3337  * Convert LF to CRLF.
3338  * [:cr_newline => true]
3339  * Convert LF to CR.
3340  * [:xml => :text]
3341  * Escape as XML CharData.
3342  * This form can be used as an HTML 4.0 #PCDATA.
3343  * - '&' -> '&amp;'
3344  * - '<' -> '&lt;'
3345  * - '>' -> '&gt;'
3346  * - undefined characters in destination_encoding -> hexadecimal CharRef such as &#xHH;
3347  * [:xml => :attr]
3348  * Escape as XML AttValue.
3349  * The converted result is quoted as "...".
3350  * This form can be used as an HTML 4.0 attribute value.
3351  * - '&' -> '&amp;'
3352  * - '<' -> '&lt;'
3353  * - '>' -> '&gt;'
3354  * - '"' -> '&quot;'
3355  * - undefined characters in destination_encoding -> hexadecimal CharRef such as &#xHH;
3356  *
3357  * Examples:
3358  * # UTF-16BE to UTF-8
3359  * ec = Encoding::Converter.new("UTF-16BE", "UTF-8")
3360  *
3361  * # Usually, decorators such as newline conversion are inserted last.
3362  * ec = Encoding::Converter.new("UTF-16BE", "UTF-8", :universal_newline => true)
3363  * p ec.convpath #=> [[#<Encoding:UTF-16BE>, #<Encoding:UTF-8>],
3364  * # "universal_newline"]
3365  *
3366  * # But, if the last encoding is ASCII incompatible,
3367  * # decorators are inserted before the last conversion.
3368  * ec = Encoding::Converter.new("UTF-8", "UTF-16BE", :crlf_newline => true)
3369  * p ec.convpath #=> ["crlf_newline",
3370  * # [#<Encoding:UTF-8>, #<Encoding:UTF-16BE>]]
3371  *
3372  * # Conversion path can be specified directly.
3373  * ec = Encoding::Converter.new(["universal_newline", ["EUC-JP", "UTF-8"], ["UTF-8", "UTF-16BE"]])
3374  * p ec.convpath #=> ["universal_newline",
3375  * # [#<Encoding:EUC-JP>, #<Encoding:UTF-8>],
3376  * # [#<Encoding:UTF-8>, #<Encoding:UTF-16BE>]]
3377  */
3378 static VALUE
3379 econv_init(int argc, VALUE *argv, VALUE self)
3380 {
3381  VALUE ecopts;
3382  VALUE snamev, dnamev;
3383  const char *sname, *dname;
3384  rb_encoding *senc, *denc;
3385  rb_econv_t *ec;
3386  int ecflags;
3387  VALUE convpath;
3388 
3389  if (rb_check_typeddata(self, &econv_data_type)) {
3390  rb_raise(rb_eTypeError, "already initialized");
3391  }
3392 
3393  if (argc == 1 && !NIL_P(convpath = rb_check_array_type(argv[0]))) {
3394  ec = rb_econv_init_by_convpath(self, convpath, &sname, &dname, &senc, &denc);
3395  ecflags = 0;
3396  ecopts = Qnil;
3397  }
3398  else {
3399  econv_args(argc, argv, &snamev, &dnamev, &sname, &dname, &senc, &denc, &ecflags, &ecopts);
3400  ec = rb_econv_open_opts(sname, dname, ecflags, ecopts);
3401  }
3402 
3403  if (!ec) {
3404  VALUE exc = rb_econv_open_exc(sname, dname, ecflags);
3405  RB_GC_GUARD(snamev);
3406  RB_GC_GUARD(dnamev);
3407  rb_exc_raise(exc);
3408  }
3409 
3410  if (!DECORATOR_P(sname, dname)) {
3411  if (!senc)
3412  senc = make_dummy_encoding(sname);
3413  if (!denc)
3414  denc = make_dummy_encoding(dname);
3415  RB_GC_GUARD(snamev);
3416  RB_GC_GUARD(dnamev);
3417  }
3418 
3419  ec->source_encoding = senc;
3420  ec->destination_encoding = denc;
3421 
3422  DATA_PTR(self) = ec;
3423 
3424  return self;
3425 }
3426 
3427 /*
3428  * call-seq:
3429  * ec.inspect -> string
3430  *
3431  * Returns a printable version of <i>ec</i>
3432  *
3433  * ec = Encoding::Converter.new("iso-8859-1", "utf-8")
3434  * puts ec.inspect #=> #<Encoding::Converter: ISO-8859-1 to UTF-8>
3435  *
3436  */
3437 static VALUE
3438 econv_inspect(VALUE self)
3439 {
3440  const char *cname = rb_obj_classname(self);
3441  rb_econv_t *ec;
3442 
3443  TypedData_Get_Struct(self, rb_econv_t, &econv_data_type, ec);
3444  if (!ec)
3445  return rb_sprintf("#<%s: uninitialized>", cname);
3446  else {
3447  const char *sname = ec->source_encoding_name;
3448  const char *dname = ec->destination_encoding_name;
3449  VALUE str;
3450  str = rb_sprintf("#<%s: ", cname);
3451  econv_description(sname, dname, ec->flags, str);
3452  rb_str_cat2(str, ">");
3453  return str;
3454  }
3455 }
3456 
3457 static rb_econv_t *
3458 check_econv(VALUE self)
3459 {
3460  rb_econv_t *ec;
3461 
3462  TypedData_Get_Struct(self, rb_econv_t, &econv_data_type, ec);
3463  if (!ec) {
3464  rb_raise(rb_eTypeError, "uninitialized encoding converter");
3465  }
3466  return ec;
3467 }
3468 
3469 /*
3470  * call-seq:
3471  * ec.source_encoding -> encoding
3472  *
3473  * Returns the source encoding as an Encoding object.
3474  */
3475 static VALUE
3476 econv_source_encoding(VALUE self)
3477 {
3478  rb_econv_t *ec = check_econv(self);
3479  if (!ec->source_encoding)
3480  return Qnil;
3482 }
3483 
3484 /*
3485  * call-seq:
3486  * ec.destination_encoding -> encoding
3487  *
3488  * Returns the destination encoding as an Encoding object.
3489  */
3490 static VALUE
3491 econv_destination_encoding(VALUE self)
3492 {
3493  rb_econv_t *ec = check_econv(self);
3494  if (!ec->destination_encoding)
3495  return Qnil;
3497 }
3498 
3499 /*
3500  * call-seq:
3501  * ec.convpath -> ary
3502  *
3503  * Returns the conversion path of ec.
3504  *
3505  * The result is an array of conversions.
3506  *
3507  * ec = Encoding::Converter.new("ISO-8859-1", "EUC-JP", crlf_newline: true)
3508  * p ec.convpath
3509  * #=> [[#<Encoding:ISO-8859-1>, #<Encoding:UTF-8>],
3510  * # [#<Encoding:UTF-8>, #<Encoding:EUC-JP>],
3511  * # "crlf_newline"]
3512  *
3513  * Each element of the array is a pair of encodings or a string.
3514  * A pair means an encoding conversion.
3515  * A string means a decorator.
3516  *
3517  * In the above example, [#<Encoding:ISO-8859-1>, #<Encoding:UTF-8>] means
3518  * a converter from ISO-8859-1 to UTF-8.
3519  * "crlf_newline" means newline converter from LF to CRLF.
3520  */
3521 static VALUE
3522 econv_convpath(VALUE self)
3523 {
3524  rb_econv_t *ec = check_econv(self);
3525  VALUE result;
3526  int i;
3527 
3528  result = rb_ary_new();
3529  for (i = 0; i < ec->num_trans; i++) {
3530  const rb_transcoder *tr = ec->elems[i].tc->transcoder;
3531  VALUE v;
3532  if (DECORATOR_P(tr->src_encoding, tr->dst_encoding))
3533  v = rb_str_new_cstr(tr->dst_encoding);
3534  else
3535  v = rb_assoc_new(make_encobj(tr->src_encoding), make_encobj(tr->dst_encoding));
3536  rb_ary_push(result, v);
3537  }
3538  return result;
3539 }
3540 
3541 /*
3542  * call-seq:
3543  * ec == other -> true or false
3544  */
3545 static VALUE
3546 econv_equal(VALUE self, VALUE other)
3547 {
3548  rb_econv_t *ec1 = check_econv(self);
3549  rb_econv_t *ec2;
3550  int i;
3551 
3552  if (!rb_typeddata_is_kind_of(other, &econv_data_type)) {
3553  return Qnil;
3554  }
3555  ec2 = DATA_PTR(other);
3556  if (!ec2) return Qfalse;
3557  if (ec1->source_encoding_name != ec2->source_encoding_name &&
3559  return Qfalse;
3562  return Qfalse;
3563  if (ec1->flags != ec2->flags) return Qfalse;
3564  if (ec1->replacement_enc != ec2->replacement_enc &&
3566  return Qfalse;
3567  if (ec1->replacement_len != ec2->replacement_len) return Qfalse;
3568  if (ec1->replacement_str != ec2->replacement_str &&
3570  return Qfalse;
3571 
3572  if (ec1->num_trans != ec2->num_trans) return Qfalse;
3573  for (i = 0; i < ec1->num_trans; i++) {
3574  if (ec1->elems[i].tc->transcoder != ec2->elems[i].tc->transcoder)
3575  return Qfalse;
3576  }
3577  return Qtrue;
3578 }
3579 
3580 static VALUE
3581 econv_result_to_symbol(rb_econv_result_t res)
3582 {
3583  switch (res) {
3584  case econv_invalid_byte_sequence: return sym_invalid_byte_sequence;
3585  case econv_incomplete_input: return sym_incomplete_input;
3586  case econv_undefined_conversion: return sym_undefined_conversion;
3587  case econv_destination_buffer_full: return sym_destination_buffer_full;
3588  case econv_source_buffer_empty: return sym_source_buffer_empty;
3589  case econv_finished: return sym_finished;
3590  case econv_after_output: return sym_after_output;
3591  default: return INT2NUM(res); /* should not be reached */
3592  }
3593 }
3594 
3595 /*
3596  * call-seq:
3597  * ec.primitive_convert(source_buffer, destination_buffer) -> symbol
3598  * ec.primitive_convert(source_buffer, destination_buffer, destination_byteoffset) -> symbol
3599  * ec.primitive_convert(source_buffer, destination_buffer, destination_byteoffset, destination_bytesize) -> symbol
3600  * ec.primitive_convert(source_buffer, destination_buffer, destination_byteoffset, destination_bytesize, opt) -> symbol
3601  *
3602  * possible opt elements:
3603  * hash form:
3604  * :partial_input => true # source buffer may be part of larger source
3605  * :after_output => true # stop conversion after output before input
3606  * integer form:
3607  * Encoding::Converter::PARTIAL_INPUT
3608  * Encoding::Converter::AFTER_OUTPUT
3609  *
3610  * possible results:
3611  * :invalid_byte_sequence
3612  * :incomplete_input
3613  * :undefined_conversion
3614  * :after_output
3615  * :destination_buffer_full
3616  * :source_buffer_empty
3617  * :finished
3618  *
3619  * primitive_convert converts source_buffer into destination_buffer.
3620  *
3621  * source_buffer should be a string or nil.
3622  * nil means an empty string.
3623  *
3624  * destination_buffer should be a string.
3625  *
3626  * destination_byteoffset should be an integer or nil.
3627  * nil means the end of destination_buffer.
3628  * If it is omitted, nil is assumed.
3629  *
3630  * destination_bytesize should be an integer or nil.
3631  * nil means unlimited.
3632  * If it is omitted, nil is assumed.
3633  *
3634  * opt should be nil, a hash or an integer.
3635  * nil means no flags.
3636  * If it is omitted, nil is assumed.
3637  *
3638  * primitive_convert converts the content of source_buffer from beginning
3639  * and store the result into destination_buffer.
3640  *
3641  * destination_byteoffset and destination_bytesize specify the region which
3642  * the converted result is stored.
3643  * destination_byteoffset specifies the start position in destination_buffer in bytes.
3644  * If destination_byteoffset is nil,
3645  * destination_buffer.bytesize is used for appending the result.
3646  * destination_bytesize specifies maximum number of bytes.
3647  * If destination_bytesize is nil,
3648  * destination size is unlimited.
3649  * After conversion, destination_buffer is resized to
3650  * destination_byteoffset + actually produced number of bytes.
3651  * Also destination_buffer's encoding is set to destination_encoding.
3652  *
3653  * primitive_convert drops the converted part of source_buffer.
3654  * the dropped part is converted in destination_buffer or
3655  * buffered in Encoding::Converter object.
3656  *
3657  * primitive_convert stops conversion when one of following condition met.
3658  * - invalid byte sequence found in source buffer (:invalid_byte_sequence)
3659  * +primitive_errinfo+ and +last_error+ methods returns the detail of the error.
3660  * - unexpected end of source buffer (:incomplete_input)
3661  * this occur only when :partial_input is not specified.
3662  * +primitive_errinfo+ and +last_error+ methods returns the detail of the error.
3663  * - character not representable in output encoding (:undefined_conversion)
3664  * +primitive_errinfo+ and +last_error+ methods returns the detail of the error.
3665  * - after some output is generated, before input is done (:after_output)
3666  * this occur only when :after_output is specified.
3667  * - destination buffer is full (:destination_buffer_full)
3668  * this occur only when destination_bytesize is non-nil.
3669  * - source buffer is empty (:source_buffer_empty)
3670  * this occur only when :partial_input is specified.
3671  * - conversion is finished (:finished)
3672  *
3673  * example:
3674  * ec = Encoding::Converter.new("UTF-8", "UTF-16BE")
3675  * ret = ec.primitive_convert(src="pi", dst="", nil, 100)
3676  * p [ret, src, dst] #=> [:finished, "", "\x00p\x00i"]
3677  *
3678  * ec = Encoding::Converter.new("UTF-8", "UTF-16BE")
3679  * ret = ec.primitive_convert(src="pi", dst="", nil, 1)
3680  * p [ret, src, dst] #=> [:destination_buffer_full, "i", "\x00"]
3681  * ret = ec.primitive_convert(src, dst="", nil, 1)
3682  * p [ret, src, dst] #=> [:destination_buffer_full, "", "p"]
3683  * ret = ec.primitive_convert(src, dst="", nil, 1)
3684  * p [ret, src, dst] #=> [:destination_buffer_full, "", "\x00"]
3685  * ret = ec.primitive_convert(src, dst="", nil, 1)
3686  * p [ret, src, dst] #=> [:finished, "", "i"]
3687  *
3688  */
3689 static VALUE
3690 econv_primitive_convert(int argc, VALUE *argv, VALUE self)
3691 {
3692  VALUE input, output, output_byteoffset_v, output_bytesize_v, opt, flags_v;
3693  rb_econv_t *ec = check_econv(self);
3694  rb_econv_result_t res;
3695  const unsigned char *ip, *is;
3696  unsigned char *op, *os;
3697  long output_byteoffset, output_bytesize;
3698  unsigned long output_byteend;
3699  int flags;
3700 
3701  argc = rb_scan_args(argc, argv, "23:", &input, &output, &output_byteoffset_v, &output_bytesize_v, &flags_v, &opt);
3702 
3703  if (NIL_P(output_byteoffset_v))
3704  output_byteoffset = 0; /* dummy */
3705  else
3706  output_byteoffset = NUM2LONG(output_byteoffset_v);
3707 
3708  if (NIL_P(output_bytesize_v))
3709  output_bytesize = 0; /* dummy */
3710  else
3711  output_bytesize = NUM2LONG(output_bytesize_v);
3712 
3713  if (!NIL_P(flags_v)) {
3714  if (!NIL_P(opt)) {
3715  rb_error_arity(argc + 1, 2, 5);
3716  }
3717  flags = NUM2INT(rb_to_int(flags_v));
3718  }
3719  else if (!NIL_P(opt)) {
3720  VALUE v;
3721  flags = 0;
3722  v = rb_hash_aref(opt, sym_partial_input);
3723  if (RTEST(v))
3724  flags |= ECONV_PARTIAL_INPUT;
3725  v = rb_hash_aref(opt, sym_after_output);
3726  if (RTEST(v))
3727  flags |= ECONV_AFTER_OUTPUT;
3728  }
3729  else {
3730  flags = 0;
3731  }
3732 
3733  StringValue(output);
3734  if (!NIL_P(input))
3735  StringValue(input);
3736  rb_str_modify(output);
3737 
3738  if (NIL_P(output_bytesize_v)) {
3739  output_bytesize = RSTRING_EMBED_LEN_MAX;
3740  if (!NIL_P(input) && output_bytesize < RSTRING_LEN(input))
3741  output_bytesize = RSTRING_LEN(input);
3742  }
3743 
3744  retry:
3745 
3746  if (NIL_P(output_byteoffset_v))
3747  output_byteoffset = RSTRING_LEN(output);
3748 
3749  if (output_byteoffset < 0)
3750  rb_raise(rb_eArgError, "negative output_byteoffset");
3751 
3752  if (RSTRING_LEN(output) < output_byteoffset)
3753  rb_raise(rb_eArgError, "output_byteoffset too big");
3754 
3755  if (output_bytesize < 0)
3756  rb_raise(rb_eArgError, "negative output_bytesize");
3757 
3758  output_byteend = (unsigned long)output_byteoffset +
3759  (unsigned long)output_bytesize;
3760 
3761  if (output_byteend < (unsigned long)output_byteoffset ||
3762  LONG_MAX < output_byteend)
3763  rb_raise(rb_eArgError, "output_byteoffset+output_bytesize too big");
3764 
3765  if (rb_str_capacity(output) < output_byteend)
3766  rb_str_resize(output, output_byteend);
3767 
3768  if (NIL_P(input)) {
3769  ip = is = NULL;
3770  }
3771  else {
3772  ip = (const unsigned char *)RSTRING_PTR(input);
3773  is = ip + RSTRING_LEN(input);
3774  }
3775 
3776  op = (unsigned char *)RSTRING_PTR(output) + output_byteoffset;
3777  os = op + output_bytesize;
3778 
3779  res = rb_econv_convert(ec, &ip, is, &op, os, flags);
3780  rb_str_set_len(output, op-(unsigned char *)RSTRING_PTR(output));
3781  if (!NIL_P(input)) {
3782  rb_str_drop_bytes(input, ip - (unsigned char *)RSTRING_PTR(input));
3783  }
3784 
3785  if (NIL_P(output_bytesize_v) && res == econv_destination_buffer_full) {
3786  if (LONG_MAX / 2 < output_bytesize)
3787  rb_raise(rb_eArgError, "too long conversion result");
3788  output_bytesize *= 2;
3789  output_byteoffset_v = Qnil;
3790  goto retry;
3791  }
3792 
3793  if (ec->destination_encoding) {
3795  }
3796 
3797  return econv_result_to_symbol(res);
3798 }
3799 
3800 /*
3801  * call-seq:
3802  * ec.convert(source_string) -> destination_string
3803  *
3804  * Convert source_string and return destination_string.
3805  *
3806  * source_string is assumed as a part of source.
3807  * i.e. :partial_input=>true is specified internally.
3808  * finish method should be used last.
3809  *
3810  * ec = Encoding::Converter.new("utf-8", "euc-jp")
3811  * puts ec.convert("\u3042").dump #=> "\xA4\xA2"
3812  * puts ec.finish.dump #=> ""
3813  *
3814  * ec = Encoding::Converter.new("euc-jp", "utf-8")
3815  * puts ec.convert("\xA4").dump #=> ""
3816  * puts ec.convert("\xA2").dump #=> "\xE3\x81\x82"
3817  * puts ec.finish.dump #=> ""
3818  *
3819  * ec = Encoding::Converter.new("utf-8", "iso-2022-jp")
3820  * puts ec.convert("\xE3").dump #=> "".force_encoding("ISO-2022-JP")
3821  * puts ec.convert("\x81").dump #=> "".force_encoding("ISO-2022-JP")
3822  * puts ec.convert("\x82").dump #=> "\e$B$\"".force_encoding("ISO-2022-JP")
3823  * puts ec.finish.dump #=> "\e(B".force_encoding("ISO-2022-JP")
3824  *
3825  * If a conversion error occur,
3826  * Encoding::UndefinedConversionError or
3827  * Encoding::InvalidByteSequenceError is raised.
3828  * Encoding::Converter#convert doesn't supply methods to recover or restart
3829  * from these exceptions.
3830  * When you want to handle these conversion errors,
3831  * use Encoding::Converter#primitive_convert.
3832  *
3833  */
3834 static VALUE
3835 econv_convert(VALUE self, VALUE source_string)
3836 {
3837  VALUE ret, dst;
3838  VALUE av[5];
3839  int ac;
3840  rb_econv_t *ec = check_econv(self);
3841 
3842  StringValue(source_string);
3843 
3844  dst = rb_str_new(NULL, 0);
3845 
3846  av[0] = rb_str_dup(source_string);
3847  av[1] = dst;
3848  av[2] = Qnil;
3849  av[3] = Qnil;
3850  av[4] = INT2NUM(ECONV_PARTIAL_INPUT);
3851  ac = 5;
3852 
3853  ret = econv_primitive_convert(ac, av, self);
3854 
3855  if (ret == sym_invalid_byte_sequence ||
3856  ret == sym_undefined_conversion ||
3857  ret == sym_incomplete_input) {
3858  VALUE exc = make_econv_exception(ec);
3859  rb_exc_raise(exc);
3860  }
3861 
3862  if (ret == sym_finished) {
3863  rb_raise(rb_eArgError, "converter already finished");
3864  }
3865 
3866  if (ret != sym_source_buffer_empty) {
3867  rb_bug("unexpected result of econv_primitive_convert");
3868  }
3869 
3870  return dst;
3871 }
3872 
3873 /*
3874  * call-seq:
3875  * ec.finish -> string
3876  *
3877  * Finishes the converter.
3878  * It returns the last part of the converted string.
3879  *
3880  * ec = Encoding::Converter.new("utf-8", "iso-2022-jp")
3881  * p ec.convert("\u3042") #=> "\e$B$\""
3882  * p ec.finish #=> "\e(B"
3883  */
3884 static VALUE
3885 econv_finish(VALUE self)
3886 {
3887  VALUE ret, dst;
3888  VALUE av[5];
3889  int ac;
3890  rb_econv_t *ec = check_econv(self);
3891 
3892  dst = rb_str_new(NULL, 0);
3893 
3894  av[0] = Qnil;
3895  av[1] = dst;
3896  av[2] = Qnil;
3897  av[3] = Qnil;
3898  av[4] = INT2FIX(0);
3899  ac = 5;
3900 
3901  ret = econv_primitive_convert(ac, av, self);
3902 
3903  if (ret == sym_invalid_byte_sequence ||
3904  ret == sym_undefined_conversion ||
3905  ret == sym_incomplete_input) {
3906  VALUE exc = make_econv_exception(ec);
3907  rb_exc_raise(exc);
3908  }
3909 
3910  if (ret != sym_finished) {
3911  rb_bug("unexpected result of econv_primitive_convert");
3912  }
3913 
3914  return dst;
3915 }
3916 
3917 /*
3918  * call-seq:
3919  * ec.primitive_errinfo -> array
3920  *
3921  * primitive_errinfo returns important information regarding the last error
3922  * as a 5-element array:
3923  *
3924  * [result, enc1, enc2, error_bytes, readagain_bytes]
3925  *
3926  * result is the last result of primitive_convert.
3927  *
3928  * Other elements are only meaningful when result is
3929  * :invalid_byte_sequence, :incomplete_input or :undefined_conversion.
3930  *
3931  * enc1 and enc2 indicate a conversion step as a pair of strings.
3932  * For example, a converter from EUC-JP to ISO-8859-1 converts
3933  * a string as follows: EUC-JP -> UTF-8 -> ISO-8859-1.
3934  * So [enc1, enc2] is either ["EUC-JP", "UTF-8"] or ["UTF-8", "ISO-8859-1"].
3935  *
3936  * error_bytes and readagain_bytes indicate the byte sequences which caused the error.
3937  * error_bytes is discarded portion.
3938  * readagain_bytes is buffered portion which is read again on next conversion.
3939  *
3940  * Example:
3941  *
3942  * # \xff is invalid as EUC-JP.
3943  * ec = Encoding::Converter.new("EUC-JP", "Shift_JIS")
3944  * ec.primitive_convert(src="\xff", dst="", nil, 10)
3945  * p ec.primitive_errinfo
3946  * #=> [:invalid_byte_sequence, "EUC-JP", "UTF-8", "\xFF", ""]
3947  *
3948  * # HIRAGANA LETTER A (\xa4\xa2 in EUC-JP) is not representable in ISO-8859-1.
3949  * # Since this error is occur in UTF-8 to ISO-8859-1 conversion,
3950  * # error_bytes is HIRAGANA LETTER A in UTF-8 (\xE3\x81\x82).
3951  * ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
3952  * ec.primitive_convert(src="\xa4\xa2", dst="", nil, 10)
3953  * p ec.primitive_errinfo
3954  * #=> [:undefined_conversion, "UTF-8", "ISO-8859-1", "\xE3\x81\x82", ""]
3955  *
3956  * # partial character is invalid
3957  * ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
3958  * ec.primitive_convert(src="\xa4", dst="", nil, 10)
3959  * p ec.primitive_errinfo
3960  * #=> [:incomplete_input, "EUC-JP", "UTF-8", "\xA4", ""]
3961  *
3962  * # Encoding::Converter::PARTIAL_INPUT prevents invalid errors by
3963  * # partial characters.
3964  * ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
3965  * ec.primitive_convert(src="\xa4", dst="", nil, 10, Encoding::Converter::PARTIAL_INPUT)
3966  * p ec.primitive_errinfo
3967  * #=> [:source_buffer_empty, nil, nil, nil, nil]
3968  *
3969  * # \xd8\x00\x00@ is invalid as UTF-16BE because
3970  * # no low surrogate after high surrogate (\xd8\x00).
3971  * # It is detected by 3rd byte (\00) which is part of next character.
3972  * # So the high surrogate (\xd8\x00) is discarded and
3973  * # the 3rd byte is read again later.
3974  * # Since the byte is buffered in ec, it is dropped from src.
3975  * ec = Encoding::Converter.new("UTF-16BE", "UTF-8")
3976  * ec.primitive_convert(src="\xd8\x00\x00@", dst="", nil, 10)
3977  * p ec.primitive_errinfo
3978  * #=> [:invalid_byte_sequence, "UTF-16BE", "UTF-8", "\xD8\x00", "\x00"]
3979  * p src
3980  * #=> "@"
3981  *
3982  * # Similar to UTF-16BE, \x00\xd8@\x00 is invalid as UTF-16LE.
3983  * # The problem is detected by 4th byte.
3984  * ec = Encoding::Converter.new("UTF-16LE", "UTF-8")
3985  * ec.primitive_convert(src="\x00\xd8@\x00", dst="", nil, 10)
3986  * p ec.primitive_errinfo
3987  * #=> [:invalid_byte_sequence, "UTF-16LE", "UTF-8", "\x00\xD8", "@\x00"]
3988  * p src
3989  * #=> ""
3990  *
3991  */
3992 static VALUE
3993 econv_primitive_errinfo(VALUE self)
3994 {
3995  rb_econv_t *ec = check_econv(self);
3996 
3997  VALUE ary;
3998 
3999  ary = rb_ary_new2(5);
4000 
4001  rb_ary_store(ary, 0, econv_result_to_symbol(ec->last_error.result));
4002  rb_ary_store(ary, 4, Qnil);
4003 
4006 
4009 
4013  }
4014 
4015  return ary;
4016 }
4017 
4018 /*
4019  * call-seq:
4020  * ec.insert_output(string) -> nil
4021  *
4022  * Inserts string into the encoding converter.
4023  * The string will be converted to the destination encoding and
4024  * output on later conversions.
4025  *
4026  * If the destination encoding is stateful,
4027  * string is converted according to the state and the state is updated.
4028  *
4029  * This method should be used only when a conversion error occurs.
4030  *
4031  * ec = Encoding::Converter.new("utf-8", "iso-8859-1")
4032  * src = "HIRAGANA LETTER A is \u{3042}."
4033  * dst = ""
4034  * p ec.primitive_convert(src, dst) #=> :undefined_conversion
4035  * puts "[#{dst.dump}, #{src.dump}]" #=> ["HIRAGANA LETTER A is ", "."]
4036  * ec.insert_output("<err>")
4037  * p ec.primitive_convert(src, dst) #=> :finished
4038  * puts "[#{dst.dump}, #{src.dump}]" #=> ["HIRAGANA LETTER A is <err>.", ""]
4039  *
4040  * ec = Encoding::Converter.new("utf-8", "iso-2022-jp")
4041  * src = "\u{306F 3041 3068 2661 3002}" # U+2661 is not representable in iso-2022-jp
4042  * dst = ""
4043  * p ec.primitive_convert(src, dst) #=> :undefined_conversion
4044  * puts "[#{dst.dump}, #{src.dump}]" #=> ["\e$B$O$!$H".force_encoding("ISO-2022-JP"), "\xE3\x80\x82"]
4045  * ec.insert_output "?" # state change required to output "?".
4046  * p ec.primitive_convert(src, dst) #=> :finished
4047  * puts "[#{dst.dump}, #{src.dump}]" #=> ["\e$B$O$!$H\e(B?\e$B!#\e(B".force_encoding("ISO-2022-JP"), ""]
4048  *
4049  */
4050 static VALUE
4051 econv_insert_output(VALUE self, VALUE string)
4052 {
4053  const char *insert_enc;
4054 
4055  int ret;
4056 
4057  rb_econv_t *ec = check_econv(self);
4058 
4059  StringValue(string);
4060  insert_enc = rb_econv_encoding_to_insert_output(ec);
4061  string = rb_str_encode(string, rb_enc_from_encoding(rb_enc_find(insert_enc)), 0, Qnil);
4062 
4063  ret = rb_econv_insert_output(ec, (const unsigned char *)RSTRING_PTR(string), RSTRING_LEN(string), insert_enc);
4064  if (ret == -1) {
4065  rb_raise(rb_eArgError, "too big string");
4066  }
4067 
4068  return Qnil;
4069 }
4070 
4071 /*
4072  * call-seq:
4073  * ec.putback -> string
4074  * ec.putback(max_numbytes) -> string
4075  *
4076  * Put back the bytes which will be converted.
4077  *
4078  * The bytes are caused by invalid_byte_sequence error.
4079  * When invalid_byte_sequence error, some bytes are discarded and
4080  * some bytes are buffered to be converted later.
4081  * The latter bytes can be put back.
4082  * It can be observed by
4083  * Encoding::InvalidByteSequenceError#readagain_bytes and
4084  * Encoding::Converter#primitive_errinfo.
4085  *
4086  * ec = Encoding::Converter.new("utf-16le", "iso-8859-1")
4087  * src = "\x00\xd8\x61\x00"
4088  * dst = ""
4089  * p ec.primitive_convert(src, dst) #=> :invalid_byte_sequence
4090  * p ec.primitive_errinfo #=> [:invalid_byte_sequence, "UTF-16LE", "UTF-8", "\x00\xD8", "a\x00"]
4091  * p ec.putback #=> "a\x00"
4092  * p ec.putback #=> "" # no more bytes to put back
4093  *
4094  */
4095 static VALUE
4096 econv_putback(int argc, VALUE *argv, VALUE self)
4097 {
4098  rb_econv_t *ec = check_econv(self);
4099  int n;
4100  int putbackable;
4101  VALUE str, max;
4102 
4103  if (!rb_check_arity(argc, 0, 1) || NIL_P(max = argv[0])) {
4105  }
4106  else {
4107  n = NUM2INT(max);
4108  putbackable = rb_econv_putbackable(ec);
4109  if (putbackable < n)
4110  n = putbackable;
4111  }
4112 
4113  str = rb_str_new(NULL, n);
4114  rb_econv_putback(ec, (unsigned char *)RSTRING_PTR(str), n);
4115 
4116  if (ec->source_encoding) {
4118  }
4119 
4120  return str;
4121 }
4122 
4123 /*
4124  * call-seq:
4125  * ec.last_error -> exception or nil
4126  *
4127  * Returns an exception object for the last conversion.
4128  * Returns nil if the last conversion did not produce an error.
4129  *
4130  * "error" means that
4131  * Encoding::InvalidByteSequenceError and Encoding::UndefinedConversionError for
4132  * Encoding::Converter#convert and
4133  * :invalid_byte_sequence, :incomplete_input and :undefined_conversion for
4134  * Encoding::Converter#primitive_convert.
4135  *
4136  * ec = Encoding::Converter.new("utf-8", "iso-8859-1")
4137  * p ec.primitive_convert(src="\xf1abcd", dst="") #=> :invalid_byte_sequence
4138  * p ec.last_error #=> #<Encoding::InvalidByteSequenceError: "\xF1" followed by "a" on UTF-8>
4139  * p ec.primitive_convert(src, dst, nil, 1) #=> :destination_buffer_full
4140  * p ec.last_error #=> nil
4141  *
4142  */
4143 static VALUE
4144 econv_last_error(VALUE self)
4145 {
4146  rb_econv_t *ec = check_econv(self);
4147  VALUE exc;
4148 
4149  exc = make_econv_exception(ec);
4150  if (NIL_P(exc))
4151  return Qnil;
4152  return exc;
4153 }
4154 
4155 /*
4156  * call-seq:
4157  * ec.replacement -> string
4158  *
4159  * Returns the replacement string.
4160  *
4161  * ec = Encoding::Converter.new("euc-jp", "us-ascii")
4162  * p ec.replacement #=> "?"
4163  *
4164  * ec = Encoding::Converter.new("euc-jp", "utf-8")
4165  * p ec.replacement #=> "\uFFFD"
4166  */
4167 static VALUE
4168 econv_get_replacement(VALUE self)
4169 {
4170  rb_econv_t *ec = check_econv(self);
4171  int ret;
4172  rb_encoding *enc;
4173 
4174  ret = make_replacement(ec);
4175  if (ret == -1) {
4176  rb_raise(rb_eUndefinedConversionError, "replacement character setup failed");
4177  }
4178 
4179  enc = rb_enc_find(ec->replacement_enc);
4180  return rb_enc_str_new((const char *)ec->replacement_str, (long)ec->replacement_len, enc);
4181 }
4182 
4183 /*
4184  * call-seq:
4185  * ec.replacement = string
4186  *
4187  * Sets the replacement string.
4188  *
4189  * ec = Encoding::Converter.new("utf-8", "us-ascii", :undef => :replace)
4190  * ec.replacement = "<undef>"
4191  * p ec.convert("a \u3042 b") #=> "a <undef> b"
4192  */
4193 static VALUE
4194 econv_set_replacement(VALUE self, VALUE arg)
4195 {
4196  rb_econv_t *ec = check_econv(self);
4197  VALUE string = arg;
4198  int ret;
4199  rb_encoding *enc;
4200 
4201  StringValue(string);
4202  enc = rb_enc_get(string);
4203 
4205  (const unsigned char *)RSTRING_PTR(string),
4206  RSTRING_LEN(string),
4207  rb_enc_name(enc));
4208 
4209  if (ret == -1) {
4210  /* xxx: rb_eInvalidByteSequenceError? */
4211  rb_raise(rb_eUndefinedConversionError, "replacement character setup failed");
4212  }
4213 
4214  return arg;
4215 }
4216 
4217 VALUE
4219 {
4220  return make_econv_exception(ec);
4221 }
4222 
4223 void
4225 {
4226  VALUE exc;
4227 
4228  exc = make_econv_exception(ec);
4229  if (NIL_P(exc))
4230  return;
4231  rb_exc_raise(exc);
4232 }
4233 
4234 /*
4235  * call-seq:
4236  * ecerr.source_encoding_name -> string
4237  *
4238  * Returns the source encoding name as a string.
4239  */
4240 static VALUE
4241 ecerr_source_encoding_name(VALUE self)
4242 {
4243  return rb_attr_get(self, rb_intern("source_encoding_name"));
4244 }
4245 
4246 /*
4247  * call-seq:
4248  * ecerr.source_encoding -> encoding
4249  *
4250  * Returns the source encoding as an encoding object.
4251  *
4252  * Note that the result may not be equal to the source encoding of
4253  * the encoding converter if the conversion has multiple steps.
4254  *
4255  * ec = Encoding::Converter.new("ISO-8859-1", "EUC-JP") # ISO-8859-1 -> UTF-8 -> EUC-JP
4256  * begin
4257  * ec.convert("\xa0") # NO-BREAK SPACE, which is available in UTF-8 but not in EUC-JP.
4258  * rescue Encoding::UndefinedConversionError
4259  * p $!.source_encoding #=> #<Encoding:UTF-8>
4260  * p $!.destination_encoding #=> #<Encoding:EUC-JP>
4261  * p $!.source_encoding_name #=> "UTF-8"
4262  * p $!.destination_encoding_name #=> "EUC-JP"
4263  * end
4264  *
4265  */
4266 static VALUE
4267 ecerr_source_encoding(VALUE self)
4268 {
4269  return rb_attr_get(self, rb_intern("source_encoding"));
4270 }
4271 
4272 /*
4273  * call-seq:
4274  * ecerr.destination_encoding_name -> string
4275  *
4276  * Returns the destination encoding name as a string.
4277  */
4278 static VALUE
4279 ecerr_destination_encoding_name(VALUE self)
4280 {
4281  return rb_attr_get(self, rb_intern("destination_encoding_name"));
4282 }
4283 
4284 /*
4285  * call-seq:
4286  * ecerr.destination_encoding -> string
4287  *
4288  * Returns the destination encoding as an encoding object.
4289  */
4290 static VALUE
4291 ecerr_destination_encoding(VALUE self)
4292 {
4293  return rb_attr_get(self, rb_intern("destination_encoding"));
4294 }
4295 
4296 /*
4297  * call-seq:
4298  * ecerr.error_char -> string
4299  *
4300  * Returns the one-character string which cause Encoding::UndefinedConversionError.
4301  *
4302  * ec = Encoding::Converter.new("ISO-8859-1", "EUC-JP")
4303  * begin
4304  * ec.convert("\xa0")
4305  * rescue Encoding::UndefinedConversionError
4306  * puts $!.error_char.dump #=> "\xC2\xA0"
4307  * p $!.error_char.encoding #=> #<Encoding:UTF-8>
4308  * end
4309  *
4310  */
4311 static VALUE
4312 ecerr_error_char(VALUE self)
4313 {
4314  return rb_attr_get(self, rb_intern("error_char"));
4315 }
4316 
4317 /*
4318  * call-seq:
4319  * ecerr.error_bytes -> string
4320  *
4321  * Returns the discarded bytes when Encoding::InvalidByteSequenceError occurs.
4322  *
4323  * ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
4324  * begin
4325  * ec.convert("abc\xA1\xFFdef")
4326  * rescue Encoding::InvalidByteSequenceError
4327  * p $! #=> #<Encoding::InvalidByteSequenceError: "\xA1" followed by "\xFF" on EUC-JP>
4328  * puts $!.error_bytes.dump #=> "\xA1"
4329  * puts $!.readagain_bytes.dump #=> "\xFF"
4330  * end
4331  */
4332 static VALUE
4333 ecerr_error_bytes(VALUE self)
4334 {
4335  return rb_attr_get(self, rb_intern("error_bytes"));
4336 }
4337 
4338 /*
4339  * call-seq:
4340  * ecerr.readagain_bytes -> string
4341  *
4342  * Returns the bytes to be read again when Encoding::InvalidByteSequenceError occurs.
4343  */
4344 static VALUE
4345 ecerr_readagain_bytes(VALUE self)
4346 {
4347  return rb_attr_get(self, rb_intern("readagain_bytes"));
4348 }
4349 
4350 /*
4351  * call-seq:
4352  * ecerr.incomplete_input? -> true or false
4353  *
4354  * Returns true if the invalid byte sequence error is caused by
4355  * premature end of string.
4356  *
4357  * ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
4358  *
4359  * begin
4360  * ec.convert("abc\xA1z")
4361  * rescue Encoding::InvalidByteSequenceError
4362  * p $! #=> #<Encoding::InvalidByteSequenceError: "\xA1" followed by "z" on EUC-JP>
4363  * p $!.incomplete_input? #=> false
4364  * end
4365  *
4366  * begin
4367  * ec.convert("abc\xA1")
4368  * ec.finish
4369  * rescue Encoding::InvalidByteSequenceError
4370  * p $! #=> #<Encoding::InvalidByteSequenceError: incomplete "\xA1" on EUC-JP>
4371  * p $!.incomplete_input? #=> true
4372  * end
4373  */
4374 static VALUE
4375 ecerr_incomplete_input(VALUE self)
4376 {
4377  return rb_attr_get(self, rb_intern("incomplete_input"));
4378 }
4379 
4380 /*
4381  * Document-class: Encoding::UndefinedConversionError
4382  *
4383  * Raised by Encoding and String methods when a transcoding operation
4384  * fails.
4385  */
4386 
4387 /*
4388  * Document-class: Encoding::InvalidByteSequenceError
4389  *
4390  * Raised by Encoding and String methods when the string being
4391  * transcoded contains a byte invalid for the either the source or
4392  * target encoding.
4393  */
4394 
4395 /*
4396  * Document-class: Encoding::ConverterNotFoundError
4397  *
4398  * Raised by transcoding methods when a named encoding does not
4399  * correspond with a known converter.
4400  */
4401 
4402 #undef rb_intern
4403 void
4405 {
4406  transcoder_table = st_init_strcasetable();
4407 
4408  sym_invalid = ID2SYM(rb_intern("invalid"));
4409  sym_undef = ID2SYM(rb_intern("undef"));
4410  sym_replace = ID2SYM(rb_intern("replace"));
4411  sym_fallback = ID2SYM(rb_intern("fallback"));
4412  sym_xml = ID2SYM(rb_intern("xml"));
4413  sym_text = ID2SYM(rb_intern("text"));
4414  sym_attr = ID2SYM(rb_intern("attr"));
4415 
4416  sym_invalid_byte_sequence = ID2SYM(rb_intern("invalid_byte_sequence"));
4417  sym_undefined_conversion = ID2SYM(rb_intern("undefined_conversion"));
4418  sym_destination_buffer_full = ID2SYM(rb_intern("destination_buffer_full"));
4419  sym_source_buffer_empty = ID2SYM(rb_intern("source_buffer_empty"));
4420  sym_finished = ID2SYM(rb_intern("finished"));
4421  sym_after_output = ID2SYM(rb_intern("after_output"));
4422  sym_incomplete_input = ID2SYM(rb_intern("incomplete_input"));
4423  sym_universal_newline = ID2SYM(rb_intern("universal_newline"));
4424  sym_crlf_newline = ID2SYM(rb_intern("crlf_newline"));
4425  sym_cr_newline = ID2SYM(rb_intern("cr_newline"));
4426  sym_partial_input = ID2SYM(rb_intern("partial_input"));
4427 
4428 #ifdef ENABLE_ECONV_NEWLINE_OPTION
4429  sym_newline = ID2SYM(rb_intern("newline"));
4430  sym_universal = ID2SYM(rb_intern("universal"));
4431  sym_crlf = ID2SYM(rb_intern("crlf"));
4432  sym_cr = ID2SYM(rb_intern("cr"));
4433  sym_lf = ID2SYM(rb_intern("lf"));
4434 #endif
4435 
4436  InitVM(transcode);
4437 }
4438 
4439 void
4441 {
4442  rb_eUndefinedConversionError = rb_define_class_under(rb_cEncoding, "UndefinedConversionError", rb_eEncodingError);
4443  rb_eInvalidByteSequenceError = rb_define_class_under(rb_cEncoding, "InvalidByteSequenceError", rb_eEncodingError);
4444  rb_eConverterNotFoundError = rb_define_class_under(rb_cEncoding, "ConverterNotFoundError", rb_eEncodingError);
4445 
4446  rb_define_method(rb_cString, "encode", str_encode, -1);
4447  rb_define_method(rb_cString, "encode!", str_encode_bang, -1);
4448 
4450  rb_define_alloc_func(rb_cEncodingConverter, econv_s_allocate);
4451  rb_define_singleton_method(rb_cEncodingConverter, "asciicompat_encoding", econv_s_asciicompat_encoding, 1);
4452  rb_define_singleton_method(rb_cEncodingConverter, "search_convpath", econv_s_search_convpath, -1);
4453  rb_define_method(rb_cEncodingConverter, "initialize", econv_init, -1);
4454  rb_define_method(rb_cEncodingConverter, "inspect", econv_inspect, 0);
4455  rb_define_method(rb_cEncodingConverter, "convpath", econv_convpath, 0);
4456  rb_define_method(rb_cEncodingConverter, "source_encoding", econv_source_encoding, 0);
4457  rb_define_method(rb_cEncodingConverter, "destination_encoding", econv_destination_encoding, 0);
4458  rb_define_method(rb_cEncodingConverter, "primitive_convert", econv_primitive_convert, -1);
4459  rb_define_method(rb_cEncodingConverter, "convert", econv_convert, 1);
4460  rb_define_method(rb_cEncodingConverter, "finish", econv_finish, 0);
4461  rb_define_method(rb_cEncodingConverter, "primitive_errinfo", econv_primitive_errinfo, 0);
4462  rb_define_method(rb_cEncodingConverter, "insert_output", econv_insert_output, 1);
4463  rb_define_method(rb_cEncodingConverter, "putback", econv_putback, -1);
4464  rb_define_method(rb_cEncodingConverter, "last_error", econv_last_error, 0);
4465  rb_define_method(rb_cEncodingConverter, "replacement", econv_get_replacement, 0);
4466  rb_define_method(rb_cEncodingConverter, "replacement=", econv_set_replacement, 1);
4467  rb_define_method(rb_cEncodingConverter, "==", econv_equal, 1);
4468 
4469  /* Document-const: INVALID_MASK
4470  *
4471  * Mask for invalid byte sequences
4472  */
4474 
4475  /* Document-const: INVALID_REPLACE
4476  *
4477  * Replace invalid byte sequences
4478  */
4480 
4481  /* Document-const: UNDEF_MASK
4482  *
4483  * Mask for a valid character in the source encoding but no related
4484  * character(s) in destination encoding.
4485  */
4487 
4488  /* Document-const: UNDEF_REPLACE
4489  *
4490  * Replace byte sequences that are undefined in the destination encoding.
4491  */
4493 
4494  /* Document-const: UNDEF_HEX_CHARREF
4495  *
4496  * Replace byte sequences that are undefined in the destination encoding
4497  * with an XML hexadecimal character reference. This is valid for XML
4498  * conversion.
4499  */
4501 
4502  /* Document-const: PARTIAL_INPUT
4503  *
4504  * Indicates the source may be part of a larger string. See
4505  * primitive_convert for an example.
4506  */
4508 
4509  /* Document-const: AFTER_OUTPUT
4510  *
4511  * Stop converting after some output is complete but before all of the
4512  * input was consumed. See primitive_convert for an example.
4513  */
4515 
4516  /* Document-const: UNIVERSAL_NEWLINE_DECORATOR
4517  *
4518  * Decorator for converting CRLF and CR to LF
4519  */
4521 
4522  /* Document-const: CRLF_NEWLINE_DECORATOR
4523  *
4524  * Decorator for converting LF to CRLF
4525  */
4527 
4528  /* Document-const: CR_NEWLINE_DECORATOR
4529  *
4530  * Decorator for converting LF to CR
4531  */
4533 
4534  /* Document-const: XML_TEXT_DECORATOR
4535  *
4536  * Escape as XML CharData
4537  */
4539 
4540  /* Document-const: XML_ATTR_CONTENT_DECORATOR
4541  *
4542  * Escape as XML AttValue
4543  */
4545 
4546  /* Document-const: XML_ATTR_QUOTE_DECORATOR
4547  *
4548  * Escape as XML AttValue
4549  */
4551 
4552  rb_define_method(rb_eUndefinedConversionError, "source_encoding_name", ecerr_source_encoding_name, 0);
4553  rb_define_method(rb_eUndefinedConversionError, "destination_encoding_name", ecerr_destination_encoding_name, 0);
4554  rb_define_method(rb_eUndefinedConversionError, "source_encoding", ecerr_source_encoding, 0);
4555  rb_define_method(rb_eUndefinedConversionError, "destination_encoding", ecerr_destination_encoding, 0);
4556  rb_define_method(rb_eUndefinedConversionError, "error_char", ecerr_error_char, 0);
4557 
4558  rb_define_method(rb_eInvalidByteSequenceError, "source_encoding_name", ecerr_source_encoding_name, 0);
4559  rb_define_method(rb_eInvalidByteSequenceError, "destination_encoding_name", ecerr_destination_encoding_name, 0);
4560  rb_define_method(rb_eInvalidByteSequenceError, "source_encoding", ecerr_source_encoding, 0);
4561  rb_define_method(rb_eInvalidByteSequenceError, "destination_encoding", ecerr_destination_encoding, 0);
4562  rb_define_method(rb_eInvalidByteSequenceError, "error_bytes", ecerr_error_bytes, 0);
4563  rb_define_method(rb_eInvalidByteSequenceError, "readagain_bytes", ecerr_readagain_bytes, 0);
4564  rb_define_method(rb_eInvalidByteSequenceError, "incomplete_input?", ecerr_incomplete_input, 0);
4565 
4566  Init_newline();
4567 }
strcmp
int strcmp(const char *, const char *)
rb_econv_putback
void rb_econv_putback(rb_econv_t *ec, unsigned char *p, int n)
Definition: transcode.c:1736
i
uint32_t i
Definition: rb_mjit_min_header-2.7.1.h:5464
econv_source_buffer_empty
@ econv_source_buffer_empty
Definition: encoding.h:301
TRANSCODING_WRITEBUF_SIZE
#define TRANSCODING_WRITEBUF_SIZE(tc)
Definition: transcode.c:92
TRUE
#define TRUE
Definition: nkf.h:175
FOURbt
#define FOURbt
Definition: transcode_data.h:31
rb_transcoding::writebuf_len
ssize_t writebuf_len
Definition: transcode.c:72
long
#define long
Definition: rb_mjit_min_header-2.7.1.h:2880
rb_econv_init_by_convpath_t
Definition: transcode.c:3175
rb_method_call
VALUE rb_method_call(int, const VALUE *, VALUE)
Definition: proc.c:2261
rb_assoc_new
VALUE rb_assoc_new(VALUE car, VALUE cdr)
Definition: array.c:896
rb_econv_elem_t::out_data_end
unsigned char * out_data_end
Definition: transcode.c:106
rb_str_new2
#define rb_str_new2
Definition: intern.h:903
double
double
Definition: rb_mjit_min_header-2.7.1.h:5923
rb_enc_name
#define rb_enc_name(enc)
Definition: encoding.h:177
rb_enc_mbc_to_codepoint
#define rb_enc_mbc_to_codepoint(p, e, enc)
Definition: encoding.h:208
rb_cData
RUBY_EXTERN VALUE rb_cData
Definition: ruby.h:2018
rb_econv_t::replacement_allocated
int replacement_allocated
Definition: transcode.c:127
rb_transcoding::rb_transcoding_state_t
Definition: transcode.c:78
klass
VALUE klass
Definition: rb_mjit_min_header-2.7.1.h:13259
LONG_MAX
#define LONG_MAX
Definition: ruby.h:220
BL_ACTION
#define BL_ACTION(byte)
rb_exc_new_str
VALUE rb_exc_new_str(VALUE etype, VALUE str)
Definition: error.c:972
st_data_t
unsigned long st_data_t
Definition: rb_mjit_min_header-2.7.1.h:5363
rb_econv_t::num_finished
int num_finished
Definition: transcode.c:130
st_table::num_entries
st_index_t num_entries
Definition: st.h:86
rb_exc_new3
#define rb_exc_new3
Definition: intern.h:293
rb_hash_new
VALUE rb_hash_new(void)
Definition: hash.c:1523
rb_econv_open
rb_econv_t * rb_econv_open(const char *sname, const char *dname, int ecflags)
Definition: transcode.c:1052
rb_transcoding::writebuf_off
ssize_t writebuf_off
Definition: transcode.c:71
ENC_CODERANGE_VALID
#define ENC_CODERANGE_VALID
Definition: encoding.h:105
rb_str_buf_new
VALUE rb_str_buf_new(long)
Definition: string.c:1315
entries
struct iseq_catch_table_entry entries[]
Definition: rb_mjit_min_header-2.7.1.h:10832
ST_STOP
@ ST_STOP
Definition: st.h:99
ECONV_XML_ATTR_CONTENT_DECORATOR
#define ECONV_XML_ATTR_CONTENT_DECORATOR
Definition: encoding.h:406
InitVM_transcode
void InitVM_transcode(void)
Definition: transcode.c:4440
INT2FIX
#define INT2FIX(i)
Definition: ruby.h:263
n
const char size_t n
Definition: rb_mjit_min_header-2.7.1.h:5456
transcoder_entry_t::dname
const char * dname
Definition: transcode.c:158
bp
#define bp()
Definition: internal.h:1445
ECONV_CRLF_NEWLINE_DECORATOR
#define ECONV_CRLF_NEWLINE_DECORATOR
Definition: encoding.h:403
RSTRING_PTR
#define RSTRING_PTR(str)
Definition: ruby.h:1009
SIZE_MAX
#define SIZE_MAX
Definition: ruby.h:307
NUM2LONG
#define NUM2LONG(x)
Definition: ruby.h:679
rb_attr_get
VALUE rb_attr_get(VALUE, ID)
Definition: variable.c:1084
tr
Definition: string.c:6989
rb_str_new_cstr
#define rb_str_new_cstr(str)
Definition: rb_mjit_min_header-2.7.1.h:6117
rb_hash_aref
VALUE rb_hash_aref(VALUE hash, VALUE key)
Definition: hash.c:2032
rb_econv_str_append
VALUE rb_econv_str_append(rb_econv_t *ec, VALUE src, VALUE dst, int flags)
Definition: transcode.c:1848
VALUE
unsigned long VALUE
Definition: ruby.h:102
rb_funcallv_public
VALUE rb_funcallv_public(VALUE, ID, int, const VALUE *)
Calls a method.
Definition: vm_eval.c:980
BL_MIN_BYTE
#define BL_MIN_BYTE
rb_obj_encoding
VALUE rb_obj_encoding(VALUE obj)
Definition: encoding.c:1004
rb_eArgError
VALUE rb_eArgError
Definition: error.c:923
encoding.h
RSTRING_EMBED_LEN_MAX
@ RSTRING_EMBED_LEN_MAX
Definition: ruby.h:982
rb_intern
#define rb_intern(str)
rb_ary_store
void rb_ary_store(VALUE ary, long idx, VALUE val)
Definition: array.c:1079
ECONV_UNDEF_HEX_CHARREF
#define ECONV_UNDEF_HEX_CHARREF
Definition: encoding.h:397
rb_cEncodingConverter
VALUE rb_cEncodingConverter
Definition: transcode.c:25
search_path_queue_tag::enc
const char * enc
Definition: transcode.c:245
rb_econv_make_exception
VALUE rb_econv_make_exception(rb_econv_t *ec)
Definition: transcode.c:4218
RB_TYPE_P
#define RB_TYPE_P(obj, type)
Definition: ruby.h:560
rb_transcoding::rb_transcoding_state_t::dummy_for_alignment
double dummy_for_alignment
Definition: transcode.c:81
rb_enc_get
rb_encoding * rb_enc_get(VALUE obj)
Definition: encoding.c:872
st_add_direct
void st_add_direct(st_table *tab, st_data_t key, st_data_t value)
Definition: st.c:1251
rb_enc_asciicompat
#define rb_enc_asciicompat(enc)
Definition: encoding.h:245
ECONV_UNDEF_REPLACE
#define ECONV_UNDEF_REPLACE
Definition: encoding.h:396
rb_enc_precise_mbclen
int rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc)
Definition: encoding.c:1032
rb_econv_t::started
int started
Definition: transcode.c:113
ECONV_CR_NEWLINE_DECORATOR
#define ECONV_CR_NEWLINE_DECORATOR
Definition: encoding.h:404
rb_transcoder::src_encoding
const char * src_encoding
Definition: transcode_data.h:99
search_path_queue_tag::next
struct search_path_queue_tag * next
Definition: transcode.c:244
getBT3
#define getBT3(a)
Definition: transcode_data.h:73
id.h
rb_econv_t::replacement_len
size_t replacement_len
Definition: transcode.c:119
getBT2
#define getBT2(a)
Definition: transcode_data.h:72
getGB4bt0
#define getGB4bt0(a)
Definition: transcode_data.h:76
rb_transcoding::readbuf
union rb_transcoding::@166 readbuf
rb_declare_transcoder
void rb_declare_transcoder(const char *enc1, const char *enc2, const char *lib)
Definition: transcode.c:233
rb_econv_t::source_encoding_name
const char * source_encoding_name
Definition: transcode.c:115
arg
VALUE arg
Definition: rb_mjit_min_header-2.7.1.h:5601
SUSPEND_OBUF
#define SUSPEND_OBUF(num)
rb_str_dup
VALUE rb_str_dup(VALUE)
Definition: string.c:1516
rb_str_cat2
#define rb_str_cat2
Definition: intern.h:912
Qundef
#define Qundef
Definition: ruby.h:470
asciicompat_decoder
@ asciicompat_decoder
Definition: transcode_data.h:90
rb_define_singleton_method
void rb_define_singleton_method(VALUE obj, const char *name, VALUE(*func)(ANYARGS), int argc)
Defines a singleton method for obj.
Definition: class.c:1755
rb_econv_t
Definition: transcode.c:111
rb_str_modify
void rb_str_modify(VALUE)
Definition: string.c:2114
econv_after_output
@ econv_after_output
Definition: encoding.h:303
rb_define_method
void rb_define_method(VALUE klass, const char *name, VALUE(*func)(ANYARGS), int argc)
Definition: class.c:1551
ENC_CODERANGE_SET
#define ENC_CODERANGE_SET(obj, cr)
Definition: encoding.h:110
INT2NUM
#define INT2NUM(x)
Definition: ruby.h:1609
ptr
struct RIMemo * ptr
Definition: debug.c:74
rb_enc_default_internal
VALUE rb_enc_default_internal(void)
Definition: encoding.c:1521
rb_str_new
#define rb_str_new(str, len)
Definition: rb_mjit_min_header-2.7.1.h:6116
STR1_LENGTH
#define STR1_LENGTH(byte_addr)
Definition: transcode_data.h:43
Qfalse
#define Qfalse
Definition: ruby.h:467
transcoder_entry_t
Definition: transcode.c:156
rb_transcoding::rb_transcoding_state_t::ptr
void * ptr
Definition: transcode.c:79
STR1_BYTEINDEX
#define STR1_BYTEINDEX(w)
Definition: transcode_data.h:44
trans_open_t::entries
transcoder_entry_t ** entries
Definition: transcode.c:955
dp
#define dp(v)
Definition: vm_debug.h:21
writebuf_off
#define writebuf_off
INVALID
#define INVALID
Definition: transcode_data.h:32
rb_transcoding::rb_transcoding_state_t::ary
char ary[sizeof(double) > sizeof(void *) ? sizeof(double) :sizeof(void *)]
Definition: transcode.c:80
ONEbt
#define ONEbt
Definition: transcode_data.h:28
NULL
#define NULL
Definition: _sdbm.c:101
char
#define char
Definition: rb_mjit_min_header-2.7.1.h:2876
rb_transcoding::readagain_len
ssize_t readagain_len
Definition: transcode.c:65
rb_str_encode
VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts)
Definition: transcode.c:2870
rb_econv_t::result
rb_econv_result_t result
Definition: transcode.c:135
PRIsVALUE
#define PRIsVALUE
Definition: ruby.h:166
RBASIC_SET_CLASS
#define RBASIC_SET_CLASS(obj, cls)
Definition: internal.h:1988
rb_enc_from_encoding
VALUE rb_enc_from_encoding(rb_encoding *encoding)
Definition: encoding.c:116
rb_econv_prepare_opts
int rb_econv_prepare_opts(VALUE opthash, VALUE *opts)
Definition: transcode.c:2555
ID2SYM
#define ID2SYM(x)
Definition: ruby.h:414
strlen
size_t strlen(const char *)
OBJ_FREEZE
#define OBJ_FREEZE(x)
Definition: ruby.h:1377
rb_econv_elem_t
Definition: transcode.c:102
T_SYMBOL
#define T_SYMBOL
Definition: ruby.h:540
getGB4bt1
#define getGB4bt1(a)
Definition: transcode_data.h:77
FUNso
#define FUNso
Definition: transcode_data.h:38
rb_econv_t::last_tc
struct rb_transcoding * last_tc
Definition: transcode.c:131
rb_eEncodingError
VALUE rb_eEncodingError
Definition: error.c:928
rb_respond_to
int rb_respond_to(VALUE, ID)
Definition: vm_method.c:2190
rb_transcoding::resume_position
int resume_position
Definition: transcode.c:58
rb_check_arity
#define rb_check_arity
Definition: intern.h:347
rb_econv_init_by_convpath_t::ec
rb_econv_t * ec
Definition: transcode.c:3176
InitVM
#define InitVM(ext)
Definition: ruby.h:2329
RARRAY_LENINT
#define RARRAY_LENINT(ary)
Definition: ruby.h:1071
rb_str_capacity
size_t rb_str_capacity(VALUE str)
Definition: string.c:712
ALLOC_N
#define ALLOC_N(type, n)
Definition: ruby.h:1663
rb_econv_asciicompat_encoding
const char * rb_econv_asciicompat_encoding(const char *ascii_incompat_name)
Definition: transcode.c:1769
rb_str_resize
VALUE rb_str_resize(VALUE, long)
Definition: string.c:2709
DECORATOR_P
#define DECORATOR_P(sname, dname)
Definition: transcode.c:154
exc
const rb_iseq_t const VALUE exc
Definition: rb_mjit_min_header-2.7.1.h:13509
rb_econv_close
void rb_econv_close(rb_econv_t *ec)
Definition: transcode.c:1685
rb_require_string
VALUE rb_require_string(VALUE)
Definition: load.c:1101
rb_raise
void rb_raise(VALUE exc, const char *fmt,...)
Definition: error.c:2669
transcoder_entry_t::lib
const char * lib
Definition: transcode.c:159
rb_ary_entry
VALUE rb_ary_entry(VALUE ary, long offset)
Definition: array.c:1512
rb_econv_convert
rb_econv_result_t rb_econv_convert(rb_econv_t *ec, const unsigned char **input_ptr, const unsigned char *input_stop, unsigned char **output_ptr, unsigned char *output_stop, int flags)
Definition: transcode.c:1429
rb_econv_t::replacement_str
const unsigned char * replacement_str
Definition: transcode.c:118
rb_econv_t::error_tc
struct rb_transcoding * error_tc
Definition: transcode.c:136
search_path_bfs_t::base_enc
const char * base_enc
Definition: transcode.c:252
rb_obj_class
VALUE rb_obj_class(VALUE)
Equivalent to Object#class in Ruby.
Definition: object.c:217
writebuf_len
#define writebuf_len
rb_obj_is_proc
VALUE rb_obj_is_proc(VALUE)
Definition: proc.c:152
rb_enc_get_index
int rb_enc_get_index(VALUE obj)
Definition: encoding.c:779
rb_str_dump
VALUE rb_str_dump(VALUE)
Definition: string.c:6042
rb_str_drop_bytes
VALUE rb_str_drop_bytes(VALUE, long)
Definition: string.c:4573
DATA_PTR
#define DATA_PTR(dta)
Definition: ruby.h:1175
MBCLEN_CHARFOUND_LEN
#define MBCLEN_CHARFOUND_LEN(ret)
Definition: encoding.h:192
rb_econv_t::last_error
struct rb_econv_t::@168 last_error
rb_encoding
const typedef OnigEncodingType rb_encoding
Definition: encoding.h:115
rb_transcoding::next_byte
unsigned char next_byte
Definition: transcode.c:61
rb_check_frozen
#define rb_check_frozen(obj)
Definition: intern.h:319
idAREF
@ idAREF
Definition: id.h:105
FUNio
#define FUNio
Definition: transcode_data.h:37
rb_transcoder::max_output
int max_output
Definition: transcode_data.h:109
rb_transcoder
Definition: transcode_data.h:98
rb_enc_from_index
rb_encoding * rb_enc_from_index(int index)
Definition: encoding.c:609
getGB4bt3
#define getGB4bt3(a)
Definition: transcode_data.h:79
search_path_queue_tag
Definition: transcode.c:243
rb_econv_putbackable
int rb_econv_putbackable(rb_econv_t *ec)
Definition: transcode.c:1725
rb_define_dummy_encoding
int rb_define_dummy_encoding(const char *name)
Definition: encoding.c:462
FUNsi
#define FUNsi
Definition: transcode_data.h:36
rb_econv_elem_t::tc
struct rb_transcoding * tc
Definition: transcode.c:103
rb_econv_prepare_options
int rb_econv_prepare_options(VALUE opthash, VALUE *opts, int ecflags)
Definition: transcode.c:2510
asciicompat_encoding_t::ascii_compat_name
const char * ascii_compat_name
Definition: transcode.c:1747
h
size_t st_index_t h
Definition: rb_mjit_min_header-2.7.1.h:5462
ECONV_XML_TEXT_DECORATOR
#define ECONV_XML_TEXT_DECORATOR
Definition: encoding.h:405
st_data_t
RUBY_SYMBOL_EXPORT_BEGIN typedef unsigned long st_data_t
Definition: st.h:22
rb_econv_has_convpath_p
int rb_econv_has_convpath_p(const char *from_encoding, const char *to_encoding)
Definition: transcode.c:3167
trans_open_t
Definition: transcode.c:954
ECONV_UNIVERSAL_NEWLINE_DECORATOR
#define ECONV_UNIVERSAL_NEWLINE_DECORATOR
Definition: encoding.h:402
search_path_queue_t
struct search_path_queue_tag search_path_queue_t
rb_econv_t::error_bytes_len
size_t error_bytes_len
Definition: transcode.c:140
econv_incomplete_input
@ econv_incomplete_input
Definition: encoding.h:304
asciicompat_encoding_t
Definition: transcode.c:1746
rb_cEncoding
VALUE rb_cEncoding
Definition: encoding.c:46
rb_ary_push
VALUE rb_ary_push(VALUE ary, VALUE item)
Definition: array.c:1195
rb_econv_substr_convert
VALUE rb_econv_substr_convert(rb_econv_t *ec, VALUE src, long byteoff, long bytesize, int flags)
Definition: transcode.c:1854
TWObt
#define TWObt
Definition: transcode_data.h:29
rb_enc_str_scrub
VALUE rb_enc_str_scrub(rb_encoding *enc, VALUE str, VALUE repl)
Definition: string.c:10255
rb_str_shared_replace
void rb_str_shared_replace(VALUE, VALUE)
Definition: string.c:1391
rb_econv_t::in_buf_end
unsigned char * in_buf_end
Definition: transcode.c:125
FUNsio
#define FUNsio
Definition: transcode_data.h:41
TypedData_Wrap_Struct
#define TypedData_Wrap_Struct(klass, data_type, sval)
Definition: ruby.h:1231
ECONV_INVALID_MASK
#define ECONV_INVALID_MASK
Definition: encoding.h:393
econv_invalid_byte_sequence
@ econv_invalid_byte_sequence
Definition: encoding.h:298
ECONV_XML_ATTR_QUOTE_DECORATOR
#define ECONV_XML_ATTR_QUOTE_DECORATOR
Definition: encoding.h:408
rb_econv_open_opts
rb_econv_t * rb_econv_open_opts(const char *source_encoding, const char *destination_encoding, int ecflags, VALUE opthash)
Definition: transcode.c:2561
rb_eTypeError
VALUE rb_eTypeError
Definition: error.c:922
rb_transcoding::output_index
unsigned int output_index
Definition: transcode.c:62
rb_econv_t::destination_encoding_name
const char * destination_encoding_name
Definition: transcode.c:116
ALLOC
#define ALLOC(type)
Definition: ruby.h:1664
rb_eRuntimeError
VALUE rb_eRuntimeError
Definition: error.c:920
SUSPEND_AFTER_OUTPUT
#define SUSPEND_AFTER_OUTPUT(num)
input
unsigned int input
Definition: nkf.c:4325
rb_econv_elem_t::last_result
rb_econv_result_t last_result
Definition: transcode.c:108
size_t
long unsigned int size_t
Definition: rb_mjit_min_header-2.7.1.h:666
st_init_strcasetable
st_table * st_init_strcasetable(void)
Definition: st.c:683
ALLOCA_N
#define ALLOCA_N(type, n)
Definition: ruby.h:1684
ECONV_AFTER_OUTPUT
#define ECONV_AFTER_OUTPUT
Definition: encoding.h:416
rb_econv_t::error_bytes_start
const unsigned char * error_bytes_start
Definition: transcode.c:139
RARRAY_AREF
#define RARRAY_AREF(a, i)
Definition: ruby.h:1101
size
int size
Definition: encoding.c:58
rb_str_set_len
void rb_str_set_len(VALUE, long)
Definition: string.c:2692
rb_econv_t::num_trans
int num_trans
Definition: transcode.c:129
FALSE
#define FALSE
Definition: nkf.h:174
ECONV_NEWLINE_DECORATOR_MASK
#define ECONV_NEWLINE_DECORATOR_MASK
Definition: encoding.h:399
rb_to_int
VALUE rb_to_int(VALUE)
Converts val into Integer.
Definition: object.c:3021
rb_econv_t::in_buf_start
unsigned char * in_buf_start
Definition: transcode.c:122
rb_econv_append
VALUE rb_econv_append(rb_econv_t *ec, const char *ss, long len, VALUE dst, int flags)
Definition: transcode.c:1796
rb_econv_open_exc
VALUE rb_econv_open_exc(const char *sname, const char *dname, int ecflags)
Definition: transcode.c:2019
rb_str_new_frozen
VALUE rb_str_new_frozen(VALUE)
Definition: string.c:1203
memcmp
int memcmp(const void *s1, const void *s2, size_t len)
Definition: memcmp.c:7
rb_register_transcoder
void rb_register_transcoder(const rb_transcoder *tr)
Definition: transcode.c:205
Init_newline
void Init_newline(void)
rb_error_arity
MJIT_STATIC void rb_error_arity(int argc, int min, int max)
Definition: vm_insnhelper.c:387
ECONV_ERROR_HANDLER_MASK
#define ECONV_ERROR_HANDLER_MASK
Definition: encoding.h:392
getBT1
#define getBT1(a)
Definition: transcode_data.h:71
MAX_ECFLAGS_DECORATORS
#define MAX_ECFLAGS_DECORATORS
Definition: transcode.c:1011
rb_econv_elem_t::out_data_start
unsigned char * out_data_start
Definition: transcode.c:105
StringValueCStr
#define StringValueCStr(v)
Definition: ruby.h:604
ENC_CODERANGE_BROKEN
#define ENC_CODERANGE_BROKEN
Definition: encoding.h:106
rb_check_array_type
VALUE rb_check_array_type(VALUE ary)
Definition: array.c:909
rb_econv_decorate_at_last
int rb_econv_decorate_at_last(rb_econv_t *ec, const char *decorator_name)
Definition: transcode.c:1908
key
key
Definition: openssl_missing.h:181
T_HASH
#define T_HASH
Definition: ruby.h:531
rb_econv_t::destination_encoding
rb_encoding * destination_encoding
Definition: transcode.c:147
path
VALUE path
Definition: rb_mjit_min_header-2.7.1.h:7353
TRANSCODING_WRITEBUF
#define TRANSCODING_WRITEBUF(tc)
Definition: transcode.c:88
rb_to_encoding_index
int rb_to_encoding_index(VALUE enc)
Definition: encoding.c:197
rb_transcoding
Definition: transcode.c:53
rb_econv_binmode
void rb_econv_binmode(rb_econv_t *ec)
Definition: transcode.c:1925
rb_typeddata_is_kind_of
int rb_typeddata_is_kind_of(VALUE obj, const rb_data_type_t *data_type)
Definition: error.c:872
next_byte
#define next_byte
ECONV_UNDEF_MASK
#define ECONV_UNDEF_MASK
Definition: encoding.h:395
MBCLEN_CHARFOUND_P
#define MBCLEN_CHARFOUND_P(ret)
Definition: encoding.h:191
THREEbt
#define THREEbt
Definition: transcode_data.h:30
RARRAY_LEN
#define RARRAY_LEN(a)
Definition: ruby.h:1070
st_foreach
int st_foreach(st_table *tab, st_foreach_callback_func *func, st_data_t arg)
Definition: st.c:1718
getBT0
#define getBT0(a)
Definition: transcode_data.h:74
rb_scan_args
#define rb_scan_args(argc, argvp, fmt,...)
Definition: rb_mjit_min_header-2.7.1.h:6372
rb_ary_new4
#define rb_ary_new4
Definition: intern.h:105
rb_check_hash_type
VALUE rb_check_hash_type(VALUE hash)
Definition: hash.c:1847
rb_ary_new2
#define rb_ary_new2
Definition: intern.h:103
buf
unsigned char buf[MIME_BUF_SIZE]
Definition: nkf.c:4322
rb_exc_raise
void rb_exc_raise(VALUE mesg)
Raises an exception in the current thread.
Definition: eval.c:668
TypedData_Get_Struct
#define TypedData_Get_Struct(obj, type, data_type, sval)
Definition: ruby.h:1252
rb_enc_str_coderange
int rb_enc_str_coderange(VALUE)
Definition: string.c:657
rb_bug
void rb_bug(const char *fmt,...)
Definition: error.c:634
StringValue
use StringValue() instead")))
internal.h
rb_to_encoding
rb_encoding * rb_to_encoding(VALUE enc)
Definition: encoding.c:245
T_ARRAY
#define T_ARRAY
Definition: ruby.h:530
rb_econv_t::in_data_end
unsigned char * in_data_end
Definition: transcode.c:124
argv
char ** argv
Definition: ruby.c:223
f
#define f
rb_econv_t::source_encoding
rb_encoding * source_encoding
Definition: transcode.c:146
next_table
#define next_table
ST_CONTINUE
@ ST_CONTINUE
Definition: st.h:99
PRIdPTRDIFF
#define PRIdPTRDIFF
Definition: ruby.h:190
rb_econv_t::replacement_enc
const char * replacement_enc
Definition: transcode.c:120
BYTE_ADDR
#define BYTE_ADDR(index)
rb_econv_substr_append
VALUE rb_econv_substr_append(rb_econv_t *ec, VALUE src, long off, long len, VALUE dst, int flags)
Definition: transcode.c:1839
xmalloc
#define xmalloc
Definition: defines.h:211
xrealloc
#define xrealloc
Definition: defines.h:214
rb_transcoding::recognized_len
ssize_t recognized_len
Definition: transcode.c:64
rb_sprintf
VALUE rb_sprintf(const char *format,...)
Definition: sprintf.c:1197
rb_enc_find
rb_encoding * rb_enc_find(const char *name)
Definition: encoding.c:728
rb_utf8_encoding
rb_encoding * rb_utf8_encoding(void)
Definition: encoding.c:1328
str
char str[HTML_ESCAPE_MAX_LEN+1]
Definition: escape.c:18
cc
const struct rb_call_cache * cc
Definition: rb_mjit_min_header-2.7.1.h:13233
next_info
#define next_info
ssize_t
_ssize_t ssize_t
Definition: rb_mjit_min_header-2.7.1.h:1329
getGB4bt2
#define getGB4bt2(a)
Definition: transcode_data.h:78
rb_enc_find_index
int rb_enc_find_index(const char *name)
Definition: encoding.c:693
fallback_func
VALUE(* fallback_func)(VALUE obj, VALUE name)
Definition: variable.c:127
rb_transcoding::ptr
unsigned char * ptr
Definition: transcode.c:68
RUBY_TYPED_FREE_IMMEDIATELY
#define RUBY_TYPED_FREE_IMMEDIATELY
Definition: ruby.h:1207
rb_transcoding::state
union rb_transcoding::rb_transcoding_state_t state
memset
void * memset(void *, int, size_t)
ENC_CODERANGE_7BIT
#define ENC_CODERANGE_7BIT
Definition: encoding.h:104
MEMCPY
#define MEMCPY(p1, p2, type, n)
Definition: ruby.h:1753
rb_transcoding
struct rb_transcoding rb_transcoding
transcode_data.h
econv_finished
@ econv_finished
Definition: encoding.h:302
rb_econv_init_by_convpath_t::index
int index
Definition: transcode.c:3177
rb_hash_aset
VALUE rb_hash_aset(VALUE hash, VALUE key, VALUE val)
Definition: hash.c:2847
int
__inline__ int
Definition: rb_mjit_min_header-2.7.1.h:2839
rb_cString
RUBY_EXTERN VALUE rb_cString
Definition: ruby.h:2044
rb_econv_t::flags
int flags
Definition: transcode.c:112
NIL_P
#define NIL_P(v)
Definition: ruby.h:482
memcpy
void * memcpy(void *__restrict, const void *__restrict, size_t)
ZERObt
#define ZERObt
Definition: transcode_data.h:34
snprintf
int snprintf(char *__restrict, size_t, const char *__restrict,...) __attribute__((__format__(__printf__
fail
#define fail()
Definition: date_strptime.c:123
rb_econv_t::readagain_len
size_t readagain_len
Definition: transcode.c:141
argc
int argc
Definition: ruby.c:222
rb_econv_encoding_to_insert_output
const char * rb_econv_encoding_to_insert_output(rb_econv_t *ec)
Definition: transcode.c:1486
rb_econv_str_convert
VALUE rb_econv_str_convert(rb_econv_t *ec, VALUE src, int flags)
Definition: transcode.c:1860
rb_obj_classname
const char * rb_obj_classname(VALUE)
Definition: variable.c:289
UNDEF
#define UNDEF
Definition: transcode_data.h:33
econv_undefined_conversion
@ econv_undefined_conversion
Definition: encoding.h:299
rb_econv_t::num_allocated
int num_allocated
Definition: transcode.c:128
REALLOC_N
#define REALLOC_N(var, type, n)
Definition: ruby.h:1667
rb_define_const
void rb_define_const(VALUE, const char *, VALUE)
Definition: variable.c:2880
err
int err
Definition: win32.c:135
rb_econv_t::in_data_start
unsigned char * in_data_start
Definition: transcode.c:123
rb_data_type_struct
Definition: ruby.h:1148
xfree
#define xfree
Definition: defines.h:216
rb_econv_init_by_convpath_t::ret
int ret
Definition: transcode.c:3178
v
int VALUE v
Definition: rb_mjit_min_header-2.7.1.h:12337
econv_destination_buffer_full
@ econv_destination_buffer_full
Definition: encoding.h:300
search_path_bfs_t::queue
search_path_queue_t * queue
Definition: transcode.c:250
BL_MAX_BYTE
#define BL_MAX_BYTE
transcoder_entry_t::transcoder
const rb_transcoder * transcoder
Definition: transcode.c:160
rb_check_typeddata
void * rb_check_typeddata(VALUE obj, const rb_data_type_t *data_type)
Definition: error.c:889
Qtrue
#define Qtrue
Definition: ruby.h:468
rb_str_catf
VALUE rb_str_catf(VALUE str, const char *format,...)
Definition: sprintf.c:1237
rb_obj_is_method
VALUE rb_obj_is_method(VALUE)
Definition: proc.c:1459
rb_econv_decorate_at_first
int rb_econv_decorate_at_first(rb_econv_t *ec, const char *decorator_name)
Definition: transcode.c:1891
SUSPEND
#define SUSPEND(ret, num)
OBJ_FROZEN
#define OBJ_FROZEN(x)
Definition: ruby.h:1375
len
uint8_t len
Definition: escape.c:17
FUNii
#define FUNii
Definition: transcode_data.h:35
SYMBOL_P
#define SYMBOL_P(x)
Definition: ruby.h:413
encoding_equal
#define encoding_equal(enc1, enc2)
Definition: transcode.c:241
rb_econv_elem_t::out_buf_start
unsigned char * out_buf_start
Definition: transcode.c:104
rb_transcoder::asciicompat_type
rb_transcoder_asciicompat_type_t asciicompat_type
Definition: transcode_data.h:110
MEMMOVE
#define MEMMOVE(p1, p2, type, n)
Definition: ruby.h:1754
rb_transcoding::next_info
VALUE next_info
Definition: transcode.c:60
rb_ivar_set
VALUE rb_ivar_set(VALUE, ID, VALUE)
Definition: variable.c:1300
trans_open_t::num_additional
int num_additional
Definition: transcode.c:956
TRANSCODING_READBUF
#define TRANSCODING_READBUF(tc)
Definition: transcode.c:84
search_path_bfs_t::queue_last_ptr
search_path_queue_t ** queue_last_ptr
Definition: transcode.c:251
rb_transcoding::writebuf
union rb_transcoding::@167 writebuf
rb_transcoding::flags
int flags
Definition: transcode.c:56
rb_define_class_under
VALUE rb_define_class_under(VALUE outer, const char *name, VALUE super)
Defines a class under the namespace of outer.
Definition: class.c:698
rb_sym2str
VALUE rb_sym2str(VALUE)
Definition: symbol.c:784
rb_econv_memsize
size_t rb_econv_memsize(rb_econv_t *ec)
Definition: transcode.c:1703
rb_econv_t::elems
rb_econv_elem_t * elems
Definition: transcode.c:126
NOMAP
#define NOMAP
Definition: transcode_data.h:27
rb_econv_elem_t::out_buf_end
unsigned char * out_buf_end
Definition: transcode.c:107
rb_econv_check_error
void rb_econv_check_error(rb_econv_t *ec)
Definition: transcode.c:4224
ECONV_INVALID_REPLACE
#define ECONV_INVALID_REPLACE
Definition: encoding.h:394
rb_ary_new
VALUE rb_ary_new(void)
Definition: array.c:723
Init_transcode
void Init_transcode(void)
Definition: transcode.c:4404
NUM2INT
#define NUM2INT(x)
Definition: ruby.h:715
Qnil
#define Qnil
Definition: ruby.h:469
rb_econv_set_replacement
int rb_econv_set_replacement(rb_econv_t *ec, const unsigned char *str, size_t len, const char *encname)
Definition: transcode.c:2181
st_lookup
int st_lookup(st_table *tab, st_data_t key, st_data_t *value)
Definition: st.c:1101
search_path_bfs_t
Definition: transcode.c:248
GB4bt
#define GB4bt
Definition: transcode_data.h:40
asciicompat_encoding_t::ascii_incompat_name
const char * ascii_incompat_name
Definition: transcode.c:1748
STR1
#define STR1
Definition: transcode_data.h:39
rb_proc_call
VALUE rb_proc_call(VALUE, VALUE)
Definition: proc.c:966
search_path_bfs_t::visited
st_table * visited
Definition: transcode.c:249
RB_GC_GUARD
#define RB_GC_GUARD(v)
Definition: ruby.h:585
rb_transcoding::ary
unsigned char ary[8]
Definition: transcode.c:67
rb_str_coderange_scan_restartable
long rb_str_coderange_scan_restartable(const char *, const char *, rb_encoding *, int *)
Definition: string.c:567
rb_econv_t::destination_encoding
const char * destination_encoding
Definition: transcode.c:138
rb_str_tmp_new
VALUE rb_str_tmp_new(long)
Definition: string.c:1343
rb_hash_freeze
VALUE rb_hash_freeze(VALUE hash)
Definition: hash.c:87
asciicompat_encoder
@ asciicompat_encoder
Definition: transcode_data.h:91
RSTRING_LEN
#define RSTRING_LEN(str)
Definition: ruby.h:1005
st_free_table
void st_free_table(st_table *tab)
Definition: st.c:709
st_table
Definition: st.h:79
rb_econv_t::source_encoding
const char * source_encoding
Definition: transcode.c:137
INT_MAX
#define INT_MAX
Definition: rb_mjit_min_header-2.7.1.h:4052
transcoder_entry_t::sname
const char * sname
Definition: transcode.c:157
rb_transcoding::next_table
unsigned int next_table
Definition: transcode.c:59
rb_enc_str_new
VALUE rb_enc_str_new(const char *, long, rb_encoding *)
Definition: string.c:796
rb_enc_associate
VALUE rb_enc_associate(VALUE obj, rb_encoding *enc)
Definition: encoding.c:866
rb_define_alloc_func
void rb_define_alloc_func(VALUE, rb_alloc_func_t)
RTEST
#define RTEST(v)
Definition: ruby.h:481
rb_econv_insert_output
int rb_econv_insert_output(rb_econv_t *ec, const unsigned char *str, size_t len, const char *str_encoding)
Definition: transcode.c:1570
ECONV_PARTIAL_INPUT
#define ECONV_PARTIAL_INPUT
Definition: encoding.h:415
rb_econv_result_t
rb_econv_result_t
Definition: encoding.h:297
rb_transcoding::transcoder
const rb_transcoder * transcoder
Definition: transcode.c:54
hash_fallback
#define hash_fallback
Definition: transcode.c:2228
rb_enc_associate_index
VALUE rb_enc_associate_index(VALUE obj, int idx)
Definition: encoding.c:838
rb_transcoder::dst_encoding
const char * dst_encoding
Definition: transcode_data.h:100
RSTRING_END
#define RSTRING_END(str)
Definition: ruby.h:1013
TRANSCODING_STATE
#define TRANSCODING_STATE(tc)
Definition: transcode.c:97
rb_econv_t
struct rb_econv_t rb_econv_t
Definition: encoding.h:307
src
__inline__ const void *__restrict src
Definition: rb_mjit_min_header-2.7.1.h:2836
name
const char * name
Definition: nkf.c:208