Ruby  2.7.0p0(2019-12-25revision647ee6f091eafcce70ffb75ddf7e121e192ab217)
encoding.c
Go to the documentation of this file.
1 /**********************************************************************
2 
3  encoding.c -
4 
5  $Author$
6  created at: Thu May 24 17:23:27 JST 2007
7 
8  Copyright (C) 2007 Yukihiro Matsumoto
9 
10 **********************************************************************/
11 
12 #include "ruby/encoding.h"
13 #include "internal.h"
14 #include "encindex.h"
15 #include "regenc.h"
16 #include <ctype.h>
17 #include "ruby/util.h"
18 
19 #include "ruby_assert.h"
20 #ifndef ENC_DEBUG
21 #define ENC_DEBUG 0
22 #endif
23 #define ENC_ASSERT(expr) RUBY_ASSERT_WHEN(ENC_DEBUG, expr)
24 #define MUST_STRING(str) (ENC_ASSERT(RB_TYPE_P(str, T_STRING)), str)
25 
26 #undef rb_ascii8bit_encindex
27 #undef rb_utf8_encindex
28 #undef rb_usascii_encindex
29 
31 
32 #if defined __GNUC__ && __GNUC__ >= 4
33 #pragma GCC visibility push(default)
34 int rb_enc_register(const char *name, rb_encoding *encoding);
35 void rb_enc_set_base(const char *name, const char *orig);
36 int rb_enc_set_dummy(int index);
37 void rb_encdb_declare(const char *name);
38 int rb_encdb_replicate(const char *name, const char *orig);
39 int rb_encdb_dummy(const char *name);
40 int rb_encdb_alias(const char *alias, const char *orig);
41 void rb_encdb_set_unicode(int index);
42 #pragma GCC visibility pop
43 #endif
44 
45 static ID id_encoding;
47 static VALUE rb_encoding_list;
48 
50  const char *name;
53 };
54 
55 static struct {
57  int count;
58  int size;
60 } enc_table;
61 
62 #define ENC_DUMMY_FLAG (1<<24)
63 #define ENC_INDEX_MASK (~(~0U<<24))
64 
65 #define ENC_TO_ENCINDEX(enc) (int)((enc)->ruby_encoding_index & ENC_INDEX_MASK)
66 #define ENC_DUMMY_P(enc) ((enc)->ruby_encoding_index & ENC_DUMMY_FLAG)
67 #define ENC_SET_DUMMY(enc) ((enc)->ruby_encoding_index |= ENC_DUMMY_FLAG)
68 
69 #define ENCODING_COUNT ENCINDEX_BUILTIN_MAX
70 #define UNSPECIFIED_ENCODING INT_MAX
71 
72 #define ENCODING_NAMELEN_MAX 63
73 #define valid_encoding_name_p(name) ((name) && strlen(name) <= ENCODING_NAMELEN_MAX)
74 
75 #define enc_autoload_p(enc) (!rb_enc_mbmaxlen(enc))
76 
77 static int load_encoding(const char *name);
78 
79 static const rb_data_type_t encoding_data_type = {
80  "encoding",
81  {0, 0, 0,},
83 };
84 
85 #define is_data_encoding(obj) (RTYPEDDATA_P(obj) && RTYPEDDATA_TYPE(obj) == &encoding_data_type)
86 #define is_obj_encoding(obj) (RB_TYPE_P((obj), T_DATA) && is_data_encoding(obj))
87 
88 int
90 {
91  return is_data_encoding(obj);
92 }
93 
94 static VALUE
95 enc_new(rb_encoding *encoding)
96 {
97  return TypedData_Wrap_Struct(rb_cEncoding, &encoding_data_type, (void *)encoding);
98 }
99 
100 static VALUE
101 rb_enc_from_encoding_index(int idx)
102 {
103  VALUE list, enc;
104 
105  if (!(list = rb_encoding_list)) {
106  rb_bug("rb_enc_from_encoding_index(%d): no rb_encoding_list", idx);
107  }
108  enc = rb_ary_entry(list, idx);
109  if (NIL_P(enc)) {
110  rb_bug("rb_enc_from_encoding_index(%d): not created yet", idx);
111  }
112  return enc;
113 }
114 
115 VALUE
117 {
118  int idx;
119  if (!encoding) return Qnil;
120  idx = ENC_TO_ENCINDEX(encoding);
121  return rb_enc_from_encoding_index(idx);
122 }
123 
124 int
126 {
127  return enc ? ENC_TO_ENCINDEX(enc) : 0;
128 }
129 
130 int
132 {
133  return ENC_DUMMY_P(enc) != 0;
134 }
135 
136 static int enc_autoload(rb_encoding *);
137 
138 static int
139 check_encoding(rb_encoding *enc)
140 {
141  int index = rb_enc_to_index(enc);
142  if (rb_enc_from_index(index) != enc)
143  return -1;
144  if (enc_autoload_p(enc)) {
145  index = enc_autoload(enc);
146  }
147  return index;
148 }
149 
150 static int
151 enc_check_encoding(VALUE obj)
152 {
153  if (!is_obj_encoding(obj)) {
154  return -1;
155  }
156  return check_encoding(RDATA(obj)->data);
157 }
158 
159 NORETURN(static void not_encoding(VALUE enc));
160 static void
161 not_encoding(VALUE enc)
162 {
163  rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected Encoding)",
164  rb_obj_class(enc));
165 }
166 
167 static rb_encoding *
168 must_encoding(VALUE enc)
169 {
170  int index = enc_check_encoding(enc);
171  if (index < 0) {
172  not_encoding(enc);
173  }
174  return DATA_PTR(enc);
175 }
176 
177 static rb_encoding *
178 must_encindex(int index)
179 {
181  if (!enc) {
182  rb_raise(rb_eEncodingError, "encoding index out of bound: %d",
183  index);
184  }
185  if (ENC_TO_ENCINDEX(enc) != (int)(index & ENC_INDEX_MASK)) {
186  rb_raise(rb_eEncodingError, "wrong encoding index %d for %s (expected %d)",
188  }
189  if (enc_autoload_p(enc) && enc_autoload(enc) == -1) {
190  rb_loaderror("failed to load encoding (%s)",
191  rb_enc_name(enc));
192  }
193  return enc;
194 }
195 
196 int
198 {
199  int idx;
200 
201  idx = enc_check_encoding(enc);
202  if (idx >= 0) {
203  return idx;
204  }
205  else if (NIL_P(enc = rb_check_string_type(enc))) {
206  return -1;
207  }
209  return -1;
210  }
212 }
213 
214 /* Returns encoding index or UNSPECIFIED_ENCODING */
215 static int
216 str_find_encindex(VALUE enc)
217 {
218  int idx;
219 
220  StringValue(enc);
222  rb_raise(rb_eArgError, "invalid name encoding (non ASCII)");
223  }
225  return idx;
226 }
227 
228 static int
229 str_to_encindex(VALUE enc)
230 {
231  int idx = str_find_encindex(enc);
232  if (idx < 0) {
233  rb_raise(rb_eArgError, "unknown encoding name - %"PRIsVALUE, enc);
234  }
235  return idx;
236 }
237 
238 static rb_encoding *
239 str_to_encoding(VALUE enc)
240 {
241  return rb_enc_from_index(str_to_encindex(enc));
242 }
243 
244 rb_encoding *
246 {
247  if (enc_check_encoding(enc) >= 0) return RDATA(enc)->data;
248  return str_to_encoding(enc);
249 }
250 
251 rb_encoding *
253 {
254  int idx;
255  if (enc_check_encoding(enc) >= 0) return RDATA(enc)->data;
256  idx = str_find_encindex(enc);
257  if (idx < 0) return NULL;
258  return rb_enc_from_index(idx);
259 }
260 
261 static int
262 enc_table_expand(int newsize)
263 {
264  struct rb_encoding_entry *ent;
265  int count = newsize;
266 
267  if (enc_table.size >= newsize) return newsize;
268  newsize = (newsize + 7) / 8 * 8;
269  ent = REALLOC_N(enc_table.list, struct rb_encoding_entry, newsize);
270  memset(ent + enc_table.size, 0, sizeof(*ent)*(newsize - enc_table.size));
271  enc_table.list = ent;
272  enc_table.size = newsize;
273  return count;
274 }
275 
276 static int
277 enc_register_at(int index, const char *name, rb_encoding *base_encoding)
278 {
279  struct rb_encoding_entry *ent = &enc_table.list[index];
280  rb_raw_encoding *encoding;
281  VALUE list;
282 
283  if (!valid_encoding_name_p(name)) return -1;
284  if (!ent->name) {
285  ent->name = name = strdup(name);
286  }
287  else if (STRCASECMP(name, ent->name)) {
288  return -1;
289  }
290  encoding = (rb_raw_encoding *)ent->enc;
291  if (!encoding) {
292  encoding = xmalloc(sizeof(rb_encoding));
293  }
294  if (base_encoding) {
295  *encoding = *base_encoding;
296  }
297  else {
298  memset(encoding, 0, sizeof(*ent->enc));
299  }
300  encoding->name = name;
301  encoding->ruby_encoding_index = index;
302  ent->enc = encoding;
303  st_insert(enc_table.names, (st_data_t)name, (st_data_t)index);
304  list = rb_encoding_list;
305  if (list && NIL_P(rb_ary_entry(list, index))) {
306  /* initialize encoding data */
307  rb_ary_store(list, index, enc_new(encoding));
308  }
309  return index;
310 }
311 
312 static int
313 enc_register(const char *name, rb_encoding *encoding)
314 {
315  int index = enc_table.count;
316 
317  if ((index = enc_table_expand(index + 1)) < 0) return -1;
318  enc_table.count = index;
319  return enc_register_at(index - 1, name, encoding);
320 }
321 
322 static void set_encoding_const(const char *, rb_encoding *);
323 int rb_enc_registered(const char *name);
324 
325 int
326 rb_enc_register(const char *name, rb_encoding *encoding)
327 {
329 
330  if (index >= 0) {
332  if (STRCASECMP(name, rb_enc_name(oldenc))) {
333  index = enc_register(name, encoding);
334  }
335  else if (enc_autoload_p(oldenc) || !ENC_DUMMY_P(oldenc)) {
336  enc_register_at(index, name, encoding);
337  }
338  else {
339  rb_raise(rb_eArgError, "encoding %s is already registered", name);
340  }
341  }
342  else {
343  index = enc_register(name, encoding);
344  set_encoding_const(name, rb_enc_from_index(index));
345  }
346  return index;
347 }
348 
349 void
350 rb_encdb_declare(const char *name)
351 {
352  int idx = rb_enc_registered(name);
353  if (idx < 0) {
354  idx = enc_register(name, 0);
355  }
356  set_encoding_const(name, rb_enc_from_index(idx));
357 }
358 
359 static void
360 enc_check_duplication(const char *name)
361 {
362  if (rb_enc_registered(name) >= 0) {
363  rb_raise(rb_eArgError, "encoding %s is already registered", name);
364  }
365 }
366 
367 static rb_encoding*
368 set_base_encoding(int index, rb_encoding *base)
369 {
370  rb_encoding *enc = enc_table.list[index].enc;
371 
372  enc_table.list[index].base = base;
374  return enc;
375 }
376 
377 /* for encdb.h
378  * Set base encoding for encodings which are not replicas
379  * but not in their own files.
380  */
381 void
382 rb_enc_set_base(const char *name, const char *orig)
383 {
384  int idx = rb_enc_registered(name);
385  int origidx = rb_enc_registered(orig);
386  set_base_encoding(idx, rb_enc_from_index(origidx));
387 }
388 
389 /* for encdb.h
390  * Set encoding dummy.
391  */
392 int
394 {
395  rb_encoding *enc = enc_table.list[index].enc;
396 
398  return index;
399 }
400 
401 int
402 rb_enc_replicate(const char *name, rb_encoding *encoding)
403 {
404  int idx;
405 
406  enc_check_duplication(name);
407  idx = enc_register(name, encoding);
408  set_base_encoding(idx, encoding);
409  set_encoding_const(name, rb_enc_from_index(idx));
410  return idx;
411 }
412 
413 /*
414  * call-seq:
415  * enc.replicate(name) -> encoding
416  *
417  * Returns a replicated encoding of _enc_ whose name is _name_.
418  * The new encoding should have the same byte structure of _enc_.
419  * If _name_ is used by another encoding, raise ArgumentError.
420  *
421  */
422 static VALUE
423 enc_replicate(VALUE encoding, VALUE name)
424 {
425  return rb_enc_from_encoding_index(
427  rb_to_encoding(encoding)));
428 }
429 
430 static int
431 enc_replicate_with_index(const char *name, rb_encoding *origenc, int idx)
432 {
433  if (idx < 0) {
434  idx = enc_register(name, origenc);
435  }
436  else {
437  idx = enc_register_at(idx, name, origenc);
438  }
439  if (idx >= 0) {
440  set_base_encoding(idx, origenc);
441  set_encoding_const(name, rb_enc_from_index(idx));
442  }
443  else {
444  rb_raise(rb_eArgError, "failed to replicate encoding");
445  }
446  return idx;
447 }
448 
449 int
450 rb_encdb_replicate(const char *name, const char *orig)
451 {
452  int origidx = rb_enc_registered(orig);
453  int idx = rb_enc_registered(name);
454 
455  if (origidx < 0) {
456  origidx = enc_register(orig, 0);
457  }
458  return enc_replicate_with_index(name, rb_enc_from_index(origidx), idx);
459 }
460 
461 int
463 {
465  rb_encoding *enc = enc_table.list[index].enc;
466 
468  return index;
469 }
470 
471 int
472 rb_encdb_dummy(const char *name)
473 {
474  int index = enc_replicate_with_index(name, rb_ascii8bit_encoding(),
476  rb_encoding *enc = enc_table.list[index].enc;
477 
479  return index;
480 }
481 
482 /*
483  * call-seq:
484  * enc.dummy? -> true or false
485  *
486  * Returns true for dummy encodings.
487  * A dummy encoding is an encoding for which character handling is not properly
488  * implemented.
489  * It is used for stateful encodings.
490  *
491  * Encoding::ISO_2022_JP.dummy? #=> true
492  * Encoding::UTF_8.dummy? #=> false
493  *
494  */
495 static VALUE
496 enc_dummy_p(VALUE enc)
497 {
498  return ENC_DUMMY_P(must_encoding(enc)) ? Qtrue : Qfalse;
499 }
500 
501 /*
502  * call-seq:
503  * enc.ascii_compatible? -> true or false
504  *
505  * Returns whether ASCII-compatible or not.
506  *
507  * Encoding::UTF_8.ascii_compatible? #=> true
508  * Encoding::UTF_16BE.ascii_compatible? #=> false
509  *
510  */
511 static VALUE
512 enc_ascii_compatible_p(VALUE enc)
513 {
514  return rb_enc_asciicompat(must_encoding(enc)) ? Qtrue : Qfalse;
515 }
516 
517 /*
518  * Returns non-zero when the encoding is Unicode series other than UTF-7 else 0.
519  */
520 int
522 {
523  return ONIGENC_IS_UNICODE(enc);
524 }
525 
526 static st_data_t
527 enc_dup_name(st_data_t name)
528 {
529  return (st_data_t)strdup((const char *)name);
530 }
531 
532 /*
533  * Returns copied alias name when the key is added for st_table,
534  * else returns NULL.
535  */
536 static int
537 enc_alias_internal(const char *alias, int idx)
538 {
539  return st_insert2(enc_table.names, (st_data_t)alias, (st_data_t)idx,
540  enc_dup_name);
541 }
542 
543 static int
544 enc_alias(const char *alias, int idx)
545 {
546  if (!valid_encoding_name_p(alias)) return -1;
547  if (!enc_alias_internal(alias, idx))
548  set_encoding_const(alias, rb_enc_from_index(idx));
549  return idx;
550 }
551 
552 int
553 rb_enc_alias(const char *alias, const char *orig)
554 {
555  int idx;
556 
557  enc_check_duplication(alias);
558  if ((idx = rb_enc_find_index(orig)) < 0) {
559  return -1;
560  }
561  return enc_alias(alias, idx);
562 }
563 
564 int
565 rb_encdb_alias(const char *alias, const char *orig)
566 {
567  int idx = rb_enc_registered(orig);
568 
569  if (idx < 0) {
570  idx = enc_register(orig, 0);
571  }
572  return enc_alias(alias, idx);
573 }
574 
575 void
577 {
579 }
580 
581 void
583 {
584  enc_table_expand(ENCODING_COUNT + 1);
585  if (!enc_table.names) {
586  enc_table.names = st_init_strcasetable();
587  }
588 #define ENC_REGISTER(enc) enc_register_at(ENCINDEX_##enc, rb_enc_name(&OnigEncoding##enc), &OnigEncoding##enc)
591  ENC_REGISTER(US_ASCII);
592 #undef ENC_REGISTER
593 #define ENCDB_REGISTER(name, enc) enc_register_at(ENCINDEX_##enc, name, NULL)
594  ENCDB_REGISTER("UTF-16BE", UTF_16BE);
595  ENCDB_REGISTER("UTF-16LE", UTF_16LE);
596  ENCDB_REGISTER("UTF-32BE", UTF_32BE);
597  ENCDB_REGISTER("UTF-32LE", UTF_32LE);
598  ENCDB_REGISTER("UTF-16", UTF_16);
599  ENCDB_REGISTER("UTF-32", UTF_32);
600  ENCDB_REGISTER("UTF8-MAC", UTF8_MAC);
601 
602  ENCDB_REGISTER("EUC-JP", EUC_JP);
603  ENCDB_REGISTER("Windows-31J", Windows_31J);
604 #undef ENCDB_REGISTER
605  enc_table.count = ENCINDEX_BUILTIN_MAX;
606 }
607 
608 rb_encoding *
610 {
611  if (UNLIKELY(index < 0 || enc_table.count <= (index &= ENC_INDEX_MASK))) {
612  return 0;
613  }
614  return enc_table.list[index].enc;
615 }
616 
617 rb_encoding *
619 {
620  return must_encindex(index);
621 }
622 
623 int
625 {
626  st_data_t idx = 0;
627 
628  if (!name) return -1;
629  if (!enc_table.list) return -1;
630  if (st_lookup(enc_table.names, (st_data_t)name, &idx)) {
631  return (int)idx;
632  }
633  return -1;
634 }
635 
636 static int
637 load_encoding(const char *name)
638 {
639  VALUE enclib = rb_sprintf("enc/%s.so", name);
642  VALUE errinfo;
643  char *s = RSTRING_PTR(enclib) + 4, *e = RSTRING_END(enclib) - 3;
644  int loaded;
645  int idx;
646 
647  while (s < e) {
648  if (!ISALNUM(*s)) *s = '_';
649  else if (ISUPPER(*s)) *s = (char)TOLOWER(*s);
650  ++s;
651  }
652  enclib = rb_fstring(enclib);
654  ruby_debug = Qfalse;
655  errinfo = rb_errinfo();
656  loaded = rb_require_internal(enclib);
658  ruby_debug = debug;
659  rb_set_errinfo(errinfo);
660  if (loaded < 0 || 1 < loaded) return -1;
661  if ((idx = rb_enc_registered(name)) < 0) return -1;
662  if (enc_autoload_p(enc_table.list[idx].enc)) return -1;
663  return idx;
664 }
665 
666 static int
667 enc_autoload(rb_encoding *enc)
668 {
669  int i;
670  rb_encoding *base = enc_table.list[ENC_TO_ENCINDEX(enc)].base;
671 
672  if (base) {
673  i = 0;
674  do {
675  if (i >= enc_table.count) return -1;
676  } while (enc_table.list[i].enc != base && (++i, 1));
677  if (enc_autoload_p(base)) {
678  if (enc_autoload(base) < 0) return -1;
679  }
680  i = enc->ruby_encoding_index;
681  enc_register_at(i & ENC_INDEX_MASK, rb_enc_name(enc), base);
682  ((rb_raw_encoding *)enc)->ruby_encoding_index = i;
683  i &= ENC_INDEX_MASK;
684  }
685  else {
686  i = load_encoding(rb_enc_name(enc));
687  }
688  return i;
689 }
690 
691 /* Return encoding index or UNSPECIFIED_ENCODING from encoding name */
692 int
694 {
695  int i = rb_enc_registered(name);
696  rb_encoding *enc;
697 
698  if (i < 0) {
699  i = load_encoding(name);
700  }
701  else if (!(enc = rb_enc_from_index(i))) {
702  if (i != UNSPECIFIED_ENCODING) {
703  rb_raise(rb_eArgError, "encoding %s is not registered", name);
704  }
705  }
706  else if (enc_autoload_p(enc)) {
707  if (enc_autoload(enc) < 0) {
708  rb_warn("failed to load encoding (%s); use ASCII-8BIT instead",
709  name);
710  return 0;
711  }
712  }
713  return i;
714 }
715 
716 int
717 rb_enc_find_index2(const char *name, long len)
718 {
719  char buf[ENCODING_NAMELEN_MAX+1];
720 
721  if (len > ENCODING_NAMELEN_MAX) return -1;
722  memcpy(buf, name, len);
723  buf[len] = '\0';
724  return rb_enc_find_index(buf);
725 }
726 
727 rb_encoding *
728 rb_enc_find(const char *name)
729 {
730  int idx = rb_enc_find_index(name);
731  if (idx < 0) idx = 0;
732  return rb_enc_from_index(idx);
733 }
734 
735 static inline int
736 enc_capable(VALUE obj)
737 {
738  if (SPECIAL_CONST_P(obj)) return SYMBOL_P(obj);
739  switch (BUILTIN_TYPE(obj)) {
740  case T_STRING:
741  case T_REGEXP:
742  case T_FILE:
743  case T_SYMBOL:
744  return TRUE;
745  case T_DATA:
746  if (is_data_encoding(obj)) return TRUE;
747  default:
748  return FALSE;
749  }
750 }
751 
752 int
754 {
755  return enc_capable(obj);
756 }
757 
758 ID
760 {
761  CONST_ID(id_encoding, "encoding");
762  return id_encoding;
763 }
764 
765 static int
766 enc_get_index_str(VALUE str)
767 {
768  int i = ENCODING_GET_INLINED(str);
769  if (i == ENCODING_INLINE_MAX) {
770  VALUE iv;
771 
772  iv = rb_ivar_get(str, rb_id_encoding());
773  i = NUM2INT(iv);
774  }
775  return i;
776 }
777 
778 int
780 {
781  int i = -1;
782  VALUE tmp;
783 
784  if (SPECIAL_CONST_P(obj)) {
785  if (!SYMBOL_P(obj)) return -1;
786  obj = rb_sym2str(obj);
787  }
788  switch (BUILTIN_TYPE(obj)) {
789  case T_STRING:
790  case T_SYMBOL:
791  case T_REGEXP:
792  i = enc_get_index_str(obj);
793  break;
794  case T_FILE:
795  tmp = rb_funcallv(obj, rb_intern("internal_encoding"), 0, 0);
796  if (NIL_P(tmp)) {
797  tmp = rb_funcallv(obj, rb_intern("external_encoding"), 0, 0);
798  }
799  if (is_obj_encoding(tmp)) {
800  i = enc_check_encoding(tmp);
801  }
802  break;
803  case T_DATA:
804  if (is_data_encoding(obj)) {
805  i = enc_check_encoding(obj);
806  }
807  break;
808  default:
809  break;
810  }
811  return i;
812 }
813 
814 static void
815 enc_set_index(VALUE obj, int idx)
816 {
817  if (!enc_capable(obj)) {
818  rb_raise(rb_eArgError, "cannot set encoding on non-encoding capable object");
819  }
820 
821  if (idx < ENCODING_INLINE_MAX) {
823  return;
824  }
827 }
828 
829 void
831 {
833  must_encindex(idx);
834  enc_set_index(obj, idx);
835 }
836 
837 VALUE
839 {
840  rb_encoding *enc;
841  int oldidx, oldtermlen, termlen;
842 
843 /* enc_check_capable(obj);*/
845  oldidx = rb_enc_get_index(obj);
846  if (oldidx == idx)
847  return obj;
848  if (SPECIAL_CONST_P(obj)) {
849  rb_raise(rb_eArgError, "cannot set encoding");
850  }
851  enc = must_encindex(idx);
855  }
856  termlen = rb_enc_mbminlen(enc);
857  oldtermlen = rb_enc_mbminlen(rb_enc_from_index(oldidx));
858  if (oldtermlen != termlen && RB_TYPE_P(obj, T_STRING)) {
859  rb_str_change_terminator_length(obj, oldtermlen, termlen);
860  }
861  enc_set_index(obj, idx);
862  return obj;
863 }
864 
865 VALUE
867 {
869 }
870 
873 {
875 }
876 
877 static rb_encoding* enc_compatible_str(VALUE str1, VALUE str2);
878 
881 {
882  rb_encoding *enc = enc_compatible_str(MUST_STRING(str1), MUST_STRING(str2));
883  if (!enc)
884  rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s",
885  rb_enc_name(rb_enc_get(str1)),
886  rb_enc_name(rb_enc_get(str2)));
887  return enc;
888 }
889 
892 {
893  rb_encoding *enc = rb_enc_compatible(str1, str2);
894  if (!enc)
895  rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s",
896  rb_enc_name(rb_enc_get(str1)),
897  rb_enc_name(rb_enc_get(str2)));
898  return enc;
899 }
900 
901 static rb_encoding*
902 enc_compatible_latter(VALUE str1, VALUE str2, int idx1, int idx2)
903 {
904  int isstr1, isstr2;
905  rb_encoding *enc1 = rb_enc_from_index(idx1);
906  rb_encoding *enc2 = rb_enc_from_index(idx2);
907 
908  isstr2 = RB_TYPE_P(str2, T_STRING);
909  if (isstr2 && RSTRING_LEN(str2) == 0)
910  return enc1;
911  isstr1 = RB_TYPE_P(str1, T_STRING);
912  if (isstr1 && RSTRING_LEN(str1) == 0)
913  return (rb_enc_asciicompat(enc1) && rb_enc_str_asciionly_p(str2)) ? enc1 : enc2;
914  if (!rb_enc_asciicompat(enc1) || !rb_enc_asciicompat(enc2)) {
915  return 0;
916  }
917 
918  /* objects whose encoding is the same of contents */
919  if (!isstr2 && idx2 == ENCINDEX_US_ASCII)
920  return enc1;
921  if (!isstr1 && idx1 == ENCINDEX_US_ASCII)
922  return enc2;
923 
924  if (!isstr1) {
925  VALUE tmp = str1;
926  int idx0 = idx1;
927  str1 = str2;
928  str2 = tmp;
929  idx1 = idx2;
930  idx2 = idx0;
931  idx0 = isstr1;
932  isstr1 = isstr2;
933  isstr2 = idx0;
934  }
935  if (isstr1) {
936  int cr1, cr2;
937 
938  cr1 = rb_enc_str_coderange(str1);
939  if (isstr2) {
940  cr2 = rb_enc_str_coderange(str2);
941  if (cr1 != cr2) {
942  /* may need to handle ENC_CODERANGE_BROKEN */
943  if (cr1 == ENC_CODERANGE_7BIT) return enc2;
944  if (cr2 == ENC_CODERANGE_7BIT) return enc1;
945  }
946  if (cr2 == ENC_CODERANGE_7BIT) {
947  return enc1;
948  }
949  }
950  if (cr1 == ENC_CODERANGE_7BIT)
951  return enc2;
952  }
953  return 0;
954 }
955 
956 static rb_encoding*
957 enc_compatible_str(VALUE str1, VALUE str2)
958 {
959  int idx1 = enc_get_index_str(str1);
960  int idx2 = enc_get_index_str(str2);
961 
962  if (idx1 < 0 || idx2 < 0)
963  return 0;
964 
965  if (idx1 == idx2) {
966  return rb_enc_from_index(idx1);
967  }
968  else {
969  return enc_compatible_latter(str1, str2, idx1, idx2);
970  }
971 }
972 
975 {
976  int idx1 = rb_enc_get_index(str1);
977  int idx2 = rb_enc_get_index(str2);
978 
979  if (idx1 < 0 || idx2 < 0)
980  return 0;
981 
982  if (idx1 == idx2) {
983  return rb_enc_from_index(idx1);
984  }
985 
986  return enc_compatible_latter(str1, str2, idx1, idx2);
987 }
988 
989 void
991 {
993 }
994 
995 
996 /*
997  * call-seq:
998  * obj.encoding -> encoding
999  *
1000  * Returns the Encoding object that represents the encoding of obj.
1001  */
1002 
1003 VALUE
1005 {
1006  int idx = rb_enc_get_index(obj);
1007  if (idx < 0) {
1008  rb_raise(rb_eTypeError, "unknown encoding");
1009  }
1010  return rb_enc_from_encoding_index(idx & ENC_INDEX_MASK);
1011 }
1012 
1013 int
1014 rb_enc_fast_mbclen(const char *p, const char *e, rb_encoding *enc)
1015 {
1016  return ONIGENC_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e);
1017 }
1018 
1019 int
1020 rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc)
1021 {
1022  int n = ONIGENC_PRECISE_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e);
1023  if (MBCLEN_CHARFOUND_P(n) && MBCLEN_CHARFOUND_LEN(n) <= e-p)
1024  return MBCLEN_CHARFOUND_LEN(n);
1025  else {
1026  int min = rb_enc_mbminlen(enc);
1027  return min <= e-p ? min : (int)(e-p);
1028  }
1029 }
1030 
1031 int
1032 rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc)
1033 {
1034  int n;
1035  if (e <= p)
1038  if (e-p < n)
1039  return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(n-(int)(e-p));
1040  return n;
1041 }
1042 
1043 int
1044 rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc)
1045 {
1046  unsigned int c;
1047  int l;
1048  if (e <= p)
1049  return -1;
1050  if (rb_enc_asciicompat(enc)) {
1051  c = (unsigned char)*p;
1052  if (!ISASCII(c))
1053  return -1;
1054  if (len) *len = 1;
1055  return c;
1056  }
1057  l = rb_enc_precise_mbclen(p, e, enc);
1058  if (!MBCLEN_CHARFOUND_P(l))
1059  return -1;
1060  c = rb_enc_mbc_to_codepoint(p, e, enc);
1061  if (!rb_enc_isascii(c, enc))
1062  return -1;
1063  if (len) *len = l;
1064  return c;
1065 }
1066 
1067 unsigned int
1068 rb_enc_codepoint_len(const char *p, const char *e, int *len_p, rb_encoding *enc)
1069 {
1070  int r;
1071  if (e <= p)
1072  rb_raise(rb_eArgError, "empty string");
1073  r = rb_enc_precise_mbclen(p, e, enc);
1074  if (!MBCLEN_CHARFOUND_P(r)) {
1075  rb_raise(rb_eArgError, "invalid byte sequence in %s", rb_enc_name(enc));
1076  }
1077  if (len_p) *len_p = MBCLEN_CHARFOUND_LEN(r);
1078  return rb_enc_mbc_to_codepoint(p, e, enc);
1079 }
1080 
1081 #undef rb_enc_codepoint
1082 unsigned int
1083 rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc)
1084 {
1085  return rb_enc_codepoint_len(p, e, 0, enc);
1086 }
1087 
1088 int
1090 {
1091  int n = ONIGENC_CODE_TO_MBCLEN(enc,c);
1092  if (n == 0) {
1093  rb_raise(rb_eArgError, "invalid codepoint 0x%x in %s", c, rb_enc_name(enc));
1094  }
1095  return n;
1096 }
1097 
1098 #undef rb_enc_code_to_mbclen
1099 int
1101 {
1102  return ONIGENC_CODE_TO_MBCLEN(enc, code);
1103 }
1104 
1105 int
1107 {
1109 }
1110 
1111 int
1113 {
1115 }
1116 
1117 /*
1118  * call-seq:
1119  * enc.inspect -> string
1120  *
1121  * Returns a string which represents the encoding for programmers.
1122  *
1123  * Encoding::UTF_8.inspect #=> "#<Encoding:UTF-8>"
1124  * Encoding::ISO_2022_JP.inspect #=> "#<Encoding:ISO-2022-JP (dummy)>"
1125  */
1126 static VALUE
1127 enc_inspect(VALUE self)
1128 {
1129  rb_encoding *enc;
1130 
1131  if (!is_data_encoding(self)) {
1132  not_encoding(self);
1133  }
1134  if (!(enc = DATA_PTR(self)) || rb_enc_from_index(rb_enc_to_index(enc)) != enc) {
1135  rb_raise(rb_eTypeError, "broken Encoding");
1136  }
1138  "#<%"PRIsVALUE":%s%s%s>", rb_obj_class(self),
1139  rb_enc_name(enc),
1140  (ENC_DUMMY_P(enc) ? " (dummy)" : ""),
1141  enc_autoload_p(enc) ? " (autoload)" : "");
1142 }
1143 
1144 /*
1145  * call-seq:
1146  * enc.name -> string
1147  * enc.to_s -> string
1148  *
1149  * Returns the name of the encoding.
1150  *
1151  * Encoding::UTF_8.name #=> "UTF-8"
1152  */
1153 static VALUE
1154 enc_name(VALUE self)
1155 {
1157 }
1158 
1159 static int
1160 enc_names_i(st_data_t name, st_data_t idx, st_data_t args)
1161 {
1162  VALUE *arg = (VALUE *)args;
1163 
1164  if ((int)idx == (int)arg[0]) {
1165  VALUE str = rb_fstring_cstr((char *)name);
1166  rb_ary_push(arg[1], str);
1167  }
1168  return ST_CONTINUE;
1169 }
1170 
1171 /*
1172  * call-seq:
1173  * enc.names -> array
1174  *
1175  * Returns the list of name and aliases of the encoding.
1176  *
1177  * Encoding::WINDOWS_31J.names #=> ["Windows-31J", "CP932", "csWindows31J", "SJIS", "PCK"]
1178  */
1179 static VALUE
1180 enc_names(VALUE self)
1181 {
1182  VALUE args[2];
1183 
1184  args[0] = (VALUE)rb_to_encoding_index(self);
1185  args[1] = rb_ary_new2(0);
1186  st_foreach(enc_table.names, enc_names_i, (st_data_t)args);
1187  return args[1];
1188 }
1189 
1190 /*
1191  * call-seq:
1192  * Encoding.list -> [enc1, enc2, ...]
1193  *
1194  * Returns the list of loaded encodings.
1195  *
1196  * Encoding.list
1197  * #=> [#<Encoding:ASCII-8BIT>, #<Encoding:UTF-8>,
1198  * #<Encoding:ISO-2022-JP (dummy)>]
1199  *
1200  * Encoding.find("US-ASCII")
1201  * #=> #<Encoding:US-ASCII>
1202  *
1203  * Encoding.list
1204  * #=> [#<Encoding:ASCII-8BIT>, #<Encoding:UTF-8>,
1205  * #<Encoding:US-ASCII>, #<Encoding:ISO-2022-JP (dummy)>]
1206  *
1207  */
1208 static VALUE
1209 enc_list(VALUE klass)
1210 {
1211  VALUE ary = rb_ary_new2(0);
1212  rb_ary_replace(ary, rb_encoding_list);
1213  return ary;
1214 }
1215 
1216 /*
1217  * call-seq:
1218  * Encoding.find(string) -> enc
1219  *
1220  * Search the encoding with specified <i>name</i>.
1221  * <i>name</i> should be a string.
1222  *
1223  * Encoding.find("US-ASCII") #=> #<Encoding:US-ASCII>
1224  *
1225  * Names which this method accept are encoding names and aliases
1226  * including following special aliases
1227  *
1228  * "external":: default external encoding
1229  * "internal":: default internal encoding
1230  * "locale":: locale encoding
1231  * "filesystem":: filesystem encoding
1232  *
1233  * An ArgumentError is raised when no encoding with <i>name</i>.
1234  * Only <code>Encoding.find("internal")</code> however returns nil
1235  * when no encoding named "internal", in other words, when Ruby has no
1236  * default internal encoding.
1237  */
1238 static VALUE
1239 enc_find(VALUE klass, VALUE enc)
1240 {
1241  int idx;
1242  if (is_obj_encoding(enc))
1243  return enc;
1244  idx = str_to_encindex(enc);
1245  if (idx == UNSPECIFIED_ENCODING) return Qnil;
1246  return rb_enc_from_encoding_index(idx);
1247 }
1248 
1249 /*
1250  * call-seq:
1251  * Encoding.compatible?(obj1, obj2) -> enc or nil
1252  *
1253  * Checks the compatibility of two objects.
1254  *
1255  * If the objects are both strings they are compatible when they are
1256  * concatenatable. The encoding of the concatenated string will be returned
1257  * if they are compatible, nil if they are not.
1258  *
1259  * Encoding.compatible?("\xa1".force_encoding("iso-8859-1"), "b")
1260  * #=> #<Encoding:ISO-8859-1>
1261  *
1262  * Encoding.compatible?(
1263  * "\xa1".force_encoding("iso-8859-1"),
1264  * "\xa1\xa1".force_encoding("euc-jp"))
1265  * #=> nil
1266  *
1267  * If the objects are non-strings their encodings are compatible when they
1268  * have an encoding and:
1269  * * Either encoding is US-ASCII compatible
1270  * * One of the encodings is a 7-bit encoding
1271  *
1272  */
1273 static VALUE
1274 enc_compatible_p(VALUE klass, VALUE str1, VALUE str2)
1275 {
1276  rb_encoding *enc;
1277 
1278  if (!enc_capable(str1)) return Qnil;
1279  if (!enc_capable(str2)) return Qnil;
1280  enc = rb_enc_compatible(str1, str2);
1281  if (!enc) return Qnil;
1282  return rb_enc_from_encoding(enc);
1283 }
1284 
1285 /* :nodoc: */
1286 static VALUE
1287 enc_s_alloc(VALUE klass)
1288 {
1289  rb_undefined_alloc(klass);
1290  return Qnil;
1291 }
1292 
1293 /* :nodoc: */
1294 static VALUE
1295 enc_dump(int argc, VALUE *argv, VALUE self)
1296 {
1297  rb_check_arity(argc, 0, 1);
1298  return enc_name(self);
1299 }
1300 
1301 /* :nodoc: */
1302 static VALUE
1303 enc_load(VALUE klass, VALUE str)
1304 {
1305  return str;
1306 }
1307 
1308 /* :nodoc: */
1309 static VALUE
1310 enc_m_loader(VALUE klass, VALUE str)
1311 {
1312  return enc_find(klass, str);
1313 }
1314 
1315 rb_encoding *
1317 {
1318  return enc_table.list[ENCINDEX_ASCII].enc;
1319 }
1320 
1321 int
1323 {
1324  return ENCINDEX_ASCII;
1325 }
1326 
1327 rb_encoding *
1329 {
1330  return enc_table.list[ENCINDEX_UTF_8].enc;
1331 }
1332 
1333 int
1335 {
1336  return ENCINDEX_UTF_8;
1337 }
1338 
1339 rb_encoding *
1341 {
1342  return enc_table.list[ENCINDEX_US_ASCII].enc;
1343 }
1344 
1345 int
1347 {
1348  return ENCINDEX_US_ASCII;
1349 }
1350 
1351 int rb_locale_charmap_index(void);
1352 
1353 int
1355 {
1356  int idx = rb_locale_charmap_index();
1357 
1358  if (idx < 0) idx = ENCINDEX_ASCII;
1359 
1360  if (rb_enc_registered("locale") < 0) {
1361 # if defined _WIN32
1362  void Init_w32_codepage(void);
1364 # endif
1365  enc_alias_internal("locale", idx);
1366  }
1367 
1368  return idx;
1369 }
1370 
1371 rb_encoding *
1373 {
1375 }
1376 
1377 int
1379 {
1380  int idx = rb_enc_registered("filesystem");
1381  if (idx < 0)
1382  idx = ENCINDEX_ASCII;
1383  return idx;
1384 }
1385 
1386 rb_encoding *
1388 {
1390 }
1391 
1393  int index; /* -2 => not yet set, -1 => nil */
1395 };
1396 
1397 static struct default_encoding default_external = {0};
1398 
1399 static int
1400 enc_set_default_encoding(struct default_encoding *def, VALUE encoding, const char *name)
1401 {
1402  int overridden = FALSE;
1403 
1404  if (def->index != -2)
1405  /* Already set */
1406  overridden = TRUE;
1407 
1408  if (NIL_P(encoding)) {
1409  def->index = -1;
1410  def->enc = 0;
1411  st_insert(enc_table.names, (st_data_t)strdup(name),
1413  }
1414  else {
1415  def->index = rb_enc_to_index(rb_to_encoding(encoding));
1416  def->enc = 0;
1417  enc_alias_internal(name, def->index);
1418  }
1419 
1420  if (def == &default_external)
1421  enc_alias_internal("filesystem", Init_enc_set_filesystem_encoding());
1422 
1423  return overridden;
1424 }
1425 
1426 rb_encoding *
1428 {
1429  if (default_external.enc) return default_external.enc;
1430 
1431  if (default_external.index >= 0) {
1432  default_external.enc = rb_enc_from_index(default_external.index);
1433  return default_external.enc;
1434  }
1435  else {
1436  return rb_locale_encoding();
1437  }
1438 }
1439 
1440 VALUE
1442 {
1444 }
1445 
1446 /*
1447  * call-seq:
1448  * Encoding.default_external -> enc
1449  *
1450  * Returns default external encoding.
1451  *
1452  * The default external encoding is used by default for strings created from
1453  * the following locations:
1454  *
1455  * * CSV
1456  * * File data read from disk
1457  * * SDBM
1458  * * StringIO
1459  * * Zlib::GzipReader
1460  * * Zlib::GzipWriter
1461  * * String#inspect
1462  * * Regexp#inspect
1463  *
1464  * While strings created from these locations will have this encoding, the
1465  * encoding may not be valid. Be sure to check String#valid_encoding?.
1466  *
1467  * File data written to disk will be transcoded to the default external
1468  * encoding when written.
1469  *
1470  * The default external encoding is initialized by the locale or -E option.
1471  */
1472 static VALUE
1473 get_default_external(VALUE klass)
1474 {
1475  return rb_enc_default_external();
1476 }
1477 
1478 void
1480 {
1481  if (NIL_P(encoding)) {
1482  rb_raise(rb_eArgError, "default external can not be nil");
1483  }
1484  enc_set_default_encoding(&default_external, encoding,
1485  "external");
1486 }
1487 
1488 /*
1489  * call-seq:
1490  * Encoding.default_external = enc
1491  *
1492  * Sets default external encoding. You should not set
1493  * Encoding::default_external in ruby code as strings created before changing
1494  * the value may have a different encoding from strings created after the value
1495  * was changed., instead you should use <tt>ruby -E</tt> to invoke ruby with
1496  * the correct default_external.
1497  *
1498  * See Encoding::default_external for information on how the default external
1499  * encoding is used.
1500  */
1501 static VALUE
1502 set_default_external(VALUE klass, VALUE encoding)
1503 {
1504  rb_warning("setting Encoding.default_external");
1505  rb_enc_set_default_external(encoding);
1506  return encoding;
1507 }
1508 
1509 static struct default_encoding default_internal = {-2};
1510 
1511 rb_encoding *
1513 {
1514  if (!default_internal.enc && default_internal.index >= 0) {
1515  default_internal.enc = rb_enc_from_index(default_internal.index);
1516  }
1517  return default_internal.enc; /* can be NULL */
1518 }
1519 
1520 VALUE
1522 {
1523  /* Note: These functions cope with default_internal not being set */
1525 }
1526 
1527 /*
1528  * call-seq:
1529  * Encoding.default_internal -> enc
1530  *
1531  * Returns default internal encoding. Strings will be transcoded to the
1532  * default internal encoding in the following places if the default internal
1533  * encoding is not nil:
1534  *
1535  * * CSV
1536  * * Etc.sysconfdir and Etc.systmpdir
1537  * * File data read from disk
1538  * * File names from Dir
1539  * * Integer#chr
1540  * * String#inspect and Regexp#inspect
1541  * * Strings returned from Readline
1542  * * Strings returned from SDBM
1543  * * Time#zone
1544  * * Values from ENV
1545  * * Values in ARGV including $PROGRAM_NAME
1546  *
1547  * Additionally String#encode and String#encode! use the default internal
1548  * encoding if no encoding is given.
1549  *
1550  * The script encoding (__ENCODING__), not default_internal, is used as the
1551  * encoding of created strings.
1552  *
1553  * Encoding::default_internal is initialized by the source file's
1554  * internal_encoding or -E option.
1555  */
1556 static VALUE
1557 get_default_internal(VALUE klass)
1558 {
1559  return rb_enc_default_internal();
1560 }
1561 
1562 void
1564 {
1565  enc_set_default_encoding(&default_internal, encoding,
1566  "internal");
1567 }
1568 
1569 /*
1570  * call-seq:
1571  * Encoding.default_internal = enc or nil
1572  *
1573  * Sets default internal encoding or removes default internal encoding when
1574  * passed nil. You should not set Encoding::default_internal in ruby code as
1575  * strings created before changing the value may have a different encoding
1576  * from strings created after the change. Instead you should use
1577  * <tt>ruby -E</tt> to invoke ruby with the correct default_internal.
1578  *
1579  * See Encoding::default_internal for information on how the default internal
1580  * encoding is used.
1581  */
1582 static VALUE
1583 set_default_internal(VALUE klass, VALUE encoding)
1584 {
1585  rb_warning("setting Encoding.default_internal");
1586  rb_enc_set_default_internal(encoding);
1587  return encoding;
1588 }
1589 
1590 static void
1591 set_encoding_const(const char *name, rb_encoding *enc)
1592 {
1593  VALUE encoding = rb_enc_from_encoding(enc);
1594  char *s = (char *)name;
1595  int haslower = 0, hasupper = 0, valid = 0;
1596 
1597  if (ISDIGIT(*s)) return;
1598  if (ISUPPER(*s)) {
1599  hasupper = 1;
1600  while (*++s && (ISALNUM(*s) || *s == '_')) {
1601  if (ISLOWER(*s)) haslower = 1;
1602  }
1603  }
1604  if (!*s) {
1605  if (s - name > ENCODING_NAMELEN_MAX) return;
1606  valid = 1;
1607  rb_define_const(rb_cEncoding, name, encoding);
1608  }
1609  if (!valid || haslower) {
1610  size_t len = s - name;
1611  if (len > ENCODING_NAMELEN_MAX) return;
1612  if (!haslower || !hasupper) {
1613  do {
1614  if (ISLOWER(*s)) haslower = 1;
1615  if (ISUPPER(*s)) hasupper = 1;
1616  } while (*++s && (!haslower || !hasupper));
1617  len = s - name;
1618  }
1619  len += strlen(s);
1620  if (len++ > ENCODING_NAMELEN_MAX) return;
1621  MEMCPY(s = ALLOCA_N(char, len), name, char, len);
1622  name = s;
1623  if (!valid) {
1624  if (ISLOWER(*s)) *s = ONIGENC_ASCII_CODE_TO_UPPER_CASE((int)*s);
1625  for (; *s; ++s) {
1626  if (!ISALNUM(*s)) *s = '_';
1627  }
1628  if (hasupper) {
1629  rb_define_const(rb_cEncoding, name, encoding);
1630  }
1631  }
1632  if (haslower) {
1633  for (s = (char *)name; *s; ++s) {
1634  if (ISLOWER(*s)) *s = ONIGENC_ASCII_CODE_TO_UPPER_CASE((int)*s);
1635  }
1636  rb_define_const(rb_cEncoding, name, encoding);
1637  }
1638  }
1639 }
1640 
1641 static int
1642 rb_enc_name_list_i(st_data_t name, st_data_t idx, st_data_t arg)
1643 {
1644  VALUE ary = (VALUE)arg;
1645  VALUE str = rb_fstring_cstr((char *)name);
1646  rb_ary_push(ary, str);
1647  return ST_CONTINUE;
1648 }
1649 
1650 /*
1651  * call-seq:
1652  * Encoding.name_list -> ["enc1", "enc2", ...]
1653  *
1654  * Returns the list of available encoding names.
1655  *
1656  * Encoding.name_list
1657  * #=> ["US-ASCII", "ASCII-8BIT", "UTF-8",
1658  * "ISO-8859-1", "Shift_JIS", "EUC-JP",
1659  * "Windows-31J",
1660  * "BINARY", "CP932", "eucJP"]
1661  *
1662  */
1663 
1664 static VALUE
1665 rb_enc_name_list(VALUE klass)
1666 {
1667  VALUE ary = rb_ary_new2(enc_table.names->num_entries);
1668  st_foreach(enc_table.names, rb_enc_name_list_i, (st_data_t)ary);
1669  return ary;
1670 }
1671 
1672 static int
1673 rb_enc_aliases_enc_i(st_data_t name, st_data_t orig, st_data_t arg)
1674 {
1675  VALUE *p = (VALUE *)arg;
1676  VALUE aliases = p[0], ary = p[1];
1677  int idx = (int)orig;
1678  VALUE key, str = rb_ary_entry(ary, idx);
1679 
1680  if (NIL_P(str)) {
1682 
1683  if (!enc) return ST_CONTINUE;
1684  if (STRCASECMP((char*)name, rb_enc_name(enc)) == 0) {
1685  return ST_CONTINUE;
1686  }
1688  rb_ary_store(ary, idx, str);
1689  }
1690  key = rb_fstring_cstr((char *)name);
1691  rb_hash_aset(aliases, key, str);
1692  return ST_CONTINUE;
1693 }
1694 
1695 /*
1696  * call-seq:
1697  * Encoding.aliases -> {"alias1" => "orig1", "alias2" => "orig2", ...}
1698  *
1699  * Returns the hash of available encoding alias and original encoding name.
1700  *
1701  * Encoding.aliases
1702  * #=> {"BINARY"=>"ASCII-8BIT", "ASCII"=>"US-ASCII", "ANSI_X3.4-1968"=>"US-ASCII",
1703  * "SJIS"=>"Windows-31J", "eucJP"=>"EUC-JP", "CP932"=>"Windows-31J"}
1704  *
1705  */
1706 
1707 static VALUE
1708 rb_enc_aliases(VALUE klass)
1709 {
1710  VALUE aliases[2];
1711  aliases[0] = rb_hash_new();
1712  aliases[1] = rb_ary_new();
1713  st_foreach(enc_table.names, rb_enc_aliases_enc_i, (st_data_t)aliases);
1714  return aliases[0];
1715 }
1716 
1717 /*
1718  * An Encoding instance represents a character encoding usable in Ruby. It is
1719  * defined as a constant under the Encoding namespace. It has a name and
1720  * optionally, aliases:
1721  *
1722  * Encoding::ISO_8859_1.name
1723  * #=> "ISO-8859-1"
1724  *
1725  * Encoding::ISO_8859_1.names
1726  * #=> ["ISO-8859-1", "ISO8859-1"]
1727  *
1728  * Ruby methods dealing with encodings return or accept Encoding instances as
1729  * arguments (when a method accepts an Encoding instance as an argument, it
1730  * can be passed an Encoding name or alias instead).
1731  *
1732  * "some string".encoding
1733  * #=> #<Encoding:UTF-8>
1734  *
1735  * string = "some string".encode(Encoding::ISO_8859_1)
1736  * #=> "some string"
1737  * string.encoding
1738  * #=> #<Encoding:ISO-8859-1>
1739  *
1740  * "some string".encode "ISO-8859-1"
1741  * #=> "some string"
1742  *
1743  * Encoding::ASCII_8BIT is a special encoding that is usually used for
1744  * a byte string, not a character string. But as the name insists, its
1745  * characters in the range of ASCII are considered as ASCII
1746  * characters. This is useful when you use ASCII-8BIT characters with
1747  * other ASCII compatible characters.
1748  *
1749  * == Changing an encoding
1750  *
1751  * The associated Encoding of a String can be changed in two different ways.
1752  *
1753  * First, it is possible to set the Encoding of a string to a new Encoding
1754  * without changing the internal byte representation of the string, with
1755  * String#force_encoding. This is how you can tell Ruby the correct encoding
1756  * of a string.
1757  *
1758  * string
1759  * #=> "R\xC3\xA9sum\xC3\xA9"
1760  * string.encoding
1761  * #=> #<Encoding:ISO-8859-1>
1762  * string.force_encoding(Encoding::UTF_8)
1763  * #=> "R\u00E9sum\u00E9"
1764  *
1765  * Second, it is possible to transcode a string, i.e. translate its internal
1766  * byte representation to another encoding. Its associated encoding is also
1767  * set to the other encoding. See String#encode for the various forms of
1768  * transcoding, and the Encoding::Converter class for additional control over
1769  * the transcoding process.
1770  *
1771  * string
1772  * #=> "R\u00E9sum\u00E9"
1773  * string.encoding
1774  * #=> #<Encoding:UTF-8>
1775  * string = string.encode!(Encoding::ISO_8859_1)
1776  * #=> "R\xE9sum\xE9"
1777  * string.encoding
1778  * #=> #<Encoding::ISO-8859-1>
1779  *
1780  * == Script encoding
1781  *
1782  * All Ruby script code has an associated Encoding which any String literal
1783  * created in the source code will be associated to.
1784  *
1785  * The default script encoding is Encoding::UTF_8 after v2.0, but it
1786  * can be changed by a magic comment on the first line of the source
1787  * code file (or second line, if there is a shebang line on the
1788  * first). The comment must contain the word <code>coding</code> or
1789  * <code>encoding</code>, followed by a colon, space and the Encoding
1790  * name or alias:
1791  *
1792  * # encoding: UTF-8
1793  *
1794  * "some string".encoding
1795  * #=> #<Encoding:UTF-8>
1796  *
1797  * The <code>__ENCODING__</code> keyword returns the script encoding of the file
1798  * which the keyword is written:
1799  *
1800  * # encoding: ISO-8859-1
1801  *
1802  * __ENCODING__
1803  * #=> #<Encoding:ISO-8859-1>
1804  *
1805  * <code>ruby -K</code> will change the default locale encoding, but this is
1806  * not recommended. Ruby source files should declare its script encoding by a
1807  * magic comment even when they only depend on US-ASCII strings or regular
1808  * expressions.
1809  *
1810  * == Locale encoding
1811  *
1812  * The default encoding of the environment. Usually derived from locale.
1813  *
1814  * see Encoding.locale_charmap, Encoding.find('locale')
1815  *
1816  * == Filesystem encoding
1817  *
1818  * The default encoding of strings from the filesystem of the environment.
1819  * This is used for strings of file names or paths.
1820  *
1821  * see Encoding.find('filesystem')
1822  *
1823  * == External encoding
1824  *
1825  * Each IO object has an external encoding which indicates the encoding that
1826  * Ruby will use to read its data. By default Ruby sets the external encoding
1827  * of an IO object to the default external encoding. The default external
1828  * encoding is set by locale encoding or the interpreter <code>-E</code> option.
1829  * Encoding.default_external returns the current value of the external
1830  * encoding.
1831  *
1832  * ENV["LANG"]
1833  * #=> "UTF-8"
1834  * Encoding.default_external
1835  * #=> #<Encoding:UTF-8>
1836  *
1837  * $ ruby -E ISO-8859-1 -e "p Encoding.default_external"
1838  * #<Encoding:ISO-8859-1>
1839  *
1840  * $ LANG=C ruby -e 'p Encoding.default_external'
1841  * #<Encoding:US-ASCII>
1842  *
1843  * The default external encoding may also be set through
1844  * Encoding.default_external=, but you should not do this as strings created
1845  * before and after the change will have inconsistent encodings. Instead use
1846  * <code>ruby -E</code> to invoke ruby with the correct external encoding.
1847  *
1848  * When you know that the actual encoding of the data of an IO object is not
1849  * the default external encoding, you can reset its external encoding with
1850  * IO#set_encoding or set it at IO object creation (see IO.new options).
1851  *
1852  * == Internal encoding
1853  *
1854  * To process the data of an IO object which has an encoding different
1855  * from its external encoding, you can set its internal encoding. Ruby will use
1856  * this internal encoding to transcode the data when it is read from the IO
1857  * object.
1858  *
1859  * Conversely, when data is written to the IO object it is transcoded from the
1860  * internal encoding to the external encoding of the IO object.
1861  *
1862  * The internal encoding of an IO object can be set with
1863  * IO#set_encoding or at IO object creation (see IO.new options).
1864  *
1865  * The internal encoding is optional and when not set, the Ruby default
1866  * internal encoding is used. If not explicitly set this default internal
1867  * encoding is +nil+ meaning that by default, no transcoding occurs.
1868  *
1869  * The default internal encoding can be set with the interpreter option
1870  * <code>-E</code>. Encoding.default_internal returns the current internal
1871  * encoding.
1872  *
1873  * $ ruby -e 'p Encoding.default_internal'
1874  * nil
1875  *
1876  * $ ruby -E ISO-8859-1:UTF-8 -e "p [Encoding.default_external, \
1877  * Encoding.default_internal]"
1878  * [#<Encoding:ISO-8859-1>, #<Encoding:UTF-8>]
1879  *
1880  * The default internal encoding may also be set through
1881  * Encoding.default_internal=, but you should not do this as strings created
1882  * before and after the change will have inconsistent encodings. Instead use
1883  * <code>ruby -E</code> to invoke ruby with the correct internal encoding.
1884  *
1885  * == IO encoding example
1886  *
1887  * In the following example a UTF-8 encoded string "R\u00E9sum\u00E9" is transcoded for
1888  * output to ISO-8859-1 encoding, then read back in and transcoded to UTF-8:
1889  *
1890  * string = "R\u00E9sum\u00E9"
1891  *
1892  * open("transcoded.txt", "w:ISO-8859-1") do |io|
1893  * io.write(string)
1894  * end
1895  *
1896  * puts "raw text:"
1897  * p File.binread("transcoded.txt")
1898  * puts
1899  *
1900  * open("transcoded.txt", "r:ISO-8859-1:UTF-8") do |io|
1901  * puts "transcoded text:"
1902  * p io.read
1903  * end
1904  *
1905  * While writing the file, the internal encoding is not specified as it is
1906  * only necessary for reading. While reading the file both the internal and
1907  * external encoding must be specified to obtain the correct result.
1908  *
1909  * $ ruby t.rb
1910  * raw text:
1911  * "R\xE9sum\xE9"
1912  *
1913  * transcoded text:
1914  * "R\u00E9sum\u00E9"
1915  *
1916  */
1917 
1918 void
1920 {
1921 #undef rb_intern
1922 #define rb_intern(str) rb_intern_const(str)
1923  VALUE list;
1924  int i;
1925 
1926  rb_cEncoding = rb_define_class("Encoding", rb_cObject);
1927  rb_define_alloc_func(rb_cEncoding, enc_s_alloc);
1929  rb_define_method(rb_cEncoding, "to_s", enc_name, 0);
1930  rb_define_method(rb_cEncoding, "inspect", enc_inspect, 0);
1931  rb_define_method(rb_cEncoding, "name", enc_name, 0);
1932  rb_define_method(rb_cEncoding, "names", enc_names, 0);
1933  rb_define_method(rb_cEncoding, "dummy?", enc_dummy_p, 0);
1934  rb_define_method(rb_cEncoding, "ascii_compatible?", enc_ascii_compatible_p, 0);
1935  rb_define_method(rb_cEncoding, "replicate", enc_replicate, 1);
1936  rb_define_singleton_method(rb_cEncoding, "list", enc_list, 0);
1937  rb_define_singleton_method(rb_cEncoding, "name_list", rb_enc_name_list, 0);
1938  rb_define_singleton_method(rb_cEncoding, "aliases", rb_enc_aliases, 0);
1939  rb_define_singleton_method(rb_cEncoding, "find", enc_find, 1);
1940  rb_define_singleton_method(rb_cEncoding, "compatible?", enc_compatible_p, 2);
1941 
1942  rb_define_method(rb_cEncoding, "_dump", enc_dump, -1);
1943  rb_define_singleton_method(rb_cEncoding, "_load", enc_load, 1);
1944 
1945  rb_define_singleton_method(rb_cEncoding, "default_external", get_default_external, 0);
1946  rb_define_singleton_method(rb_cEncoding, "default_external=", set_default_external, 1);
1947  rb_define_singleton_method(rb_cEncoding, "default_internal", get_default_internal, 0);
1948  rb_define_singleton_method(rb_cEncoding, "default_internal=", set_default_internal, 1);
1949  rb_define_singleton_method(rb_cEncoding, "locale_charmap", rb_locale_charmap, 0); /* in localeinit.c */
1950 
1951  list = rb_ary_new2(enc_table.count);
1953  rb_encoding_list = list;
1955 
1956  for (i = 0; i < enc_table.count; ++i) {
1957  rb_ary_push(list, enc_new(enc_table.list[i].enc));
1958  }
1959 
1961 }
1962 
1963 void
1965 {
1966  rb_enc_init();
1967 }
1968 
1969 /* locale insensitive ctype functions */
1970 
1971 void
1973 {
1974  st_foreach(enc_table.names, func, arg);
1975 }
rb_enc_alias
int rb_enc_alias(const char *alias, const char *orig)
Definition: encoding.c:553
ASCII
@ ASCII
Definition: nkf.c:87
EUC_JP
@ EUC_JP
Definition: nkf.c:99
UNLIKELY
#define UNLIKELY(x)
Definition: ffi_common.h:126
ISASCII
#define ISASCII(c)
Definition: ruby.h:2304
ID
unsigned long ID
Definition: ruby.h:103
rb_enc_codepoint
unsigned int rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc)
Definition: encoding.c:1083
rb_define_class
VALUE rb_define_class(const char *name, VALUE super)
Defines a top-level class.
Definition: class.c:649
rb_fstring
VALUE rb_fstring(VALUE)
Definition: string.c:312
rb_enc_isascii
#define rb_enc_isascii(c, enc)
Definition: encoding.h:230
TRUE
#define TRUE
Definition: nkf.h:175
rb_enc_unicode_p
int rb_enc_unicode_p(rb_encoding *enc)
Definition: encoding.c:521
rb_enc_name
#define rb_enc_name(enc)
Definition: encoding.h:177
rb_filesystem_encoding
rb_encoding * rb_filesystem_encoding(void)
Definition: encoding.c:1387
rb_enc_mbc_to_codepoint
#define rb_enc_mbc_to_codepoint(p, e, enc)
Definition: encoding.h:208
ONIGENC_ASCII_CODE_TO_UPPER_CASE
#define ONIGENC_ASCII_CODE_TO_UPPER_CASE(c)
Definition: regenc.h:218
Init_enc_set_filesystem_encoding
int Init_enc_set_filesystem_encoding(void)
Definition: localeinit.c:119
rb_encdb_declare
void rb_encdb_declare(const char *name)
Definition: encoding.c:350
ENCINDEX_UTF_8
#define ENCINDEX_UTF_8
Definition: encindex.h:43
ENCODING_SET_INLINED
#define ENCODING_SET_INLINED(obj, i)
Definition: encoding.h:59
rb_hash_new
VALUE rb_hash_new(void)
Definition: hash.c:1501
rb_enc_mbclen
int rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc)
Definition: encoding.c:1020
rb_gc_register_mark_object
void rb_gc_register_mark_object(VALUE obj)
Definition: gc.c:7063
rb_warn
void rb_warn(const char *fmt,...)
Definition: error.c:313
memset
void * memset(void *, int, size_t)
obj2
VALUE obj2
Definition: rb_mjit_min_header-2.7.0.h:7583
ONIGENC_IS_ASCII_CODE
#define ONIGENC_IS_ASCII_CODE(code)
Definition: regenc.h:216
ISDIGIT
#define ISDIGIT(c)
Definition: ruby.h:2312
rb_warning
void rb_warning(const char *fmt,...)
Definition: error.c:334
regenc.h
RBASIC_CLEAR_CLASS
#define RBASIC_CLEAR_CLASS(obj)
Definition: internal.h:1981
ENCDB_REGISTER
#define ENCDB_REGISTER(name, enc)
ENC_INDEX_MASK
#define ENC_INDEX_MASK
Definition: encoding.c:63
RSTRING_PTR
#define RSTRING_PTR(str)
Definition: ruby.h:1009
rb_marshal_define_compat
void rb_marshal_define_compat(VALUE newclass, VALUE oldclass, VALUE(*dumper)(VALUE), VALUE(*loader)(VALUE, VALUE))
Definition: marshal.c:133
UTF_32
@ UTF_32
Definition: nkf.c:117
rb_utf8_encindex
int rb_utf8_encindex(void)
Definition: encoding.c:1334
rb_locale_encoding
rb_encoding * rb_locale_encoding(void)
Definition: encoding.c:1372
rb_default_external_encoding
rb_encoding * rb_default_external_encoding(void)
Definition: encoding.c:1427
rb_enc_sprintf
VALUE rb_enc_sprintf(rb_encoding *enc, const char *format,...)
Definition: sprintf.c:1178
VALUE
unsigned long VALUE
Definition: ruby.h:102
index
int index
Definition: rb_mjit_min_header-2.7.0.h:11246
enc_autoload_p
#define enc_autoload_p(enc)
Definition: encoding.c:75
rb_obj_encoding
VALUE rb_obj_encoding(VALUE obj)
Definition: encoding.c:1004
rb_eArgError
VALUE rb_eArgError
Definition: error.c:923
MUST_STRING
#define MUST_STRING(str)
Definition: encoding.c:24
encoding.h
rb_ary_replace
VALUE rb_ary_replace(VALUE copy, VALUE orig)
Definition: array.c:3811
ruby_verbose
#define ruby_verbose
Definition: ruby.h:1925
rb_ary_store
void rb_ary_store(VALUE ary, long idx, VALUE val)
Definition: array.c:1079
RB_TYPE_P
#define RB_TYPE_P(obj, type)
Definition: ruby.h:560
rb_enc_get
rb_encoding * rb_enc_get(VALUE obj)
Definition: encoding.c:872
rb_enc_asciicompat
#define rb_enc_asciicompat(enc)
Definition: encoding.h:245
UTF_16
@ UTF_16
Definition: nkf.c:112
rb_enc_check
rb_encoding * rb_enc_check(VALUE str1, VALUE str2)
Definition: encoding.c:891
rb_enc_precise_mbclen
int rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc)
Definition: encoding.c:1032
int
__inline__ int
Definition: rb_mjit_min_header-2.7.0.h:2839
rb_enc_set_default_internal
void rb_enc_set_default_internal(VALUE encoding)
Definition: encoding.c:1563
alias
const char * alias
Definition: nkf.c:1159
StringValue
use StringValue() instead")))
rb_check_string_type
VALUE rb_check_string_type(VALUE)
Definition: string.c:2314
rb_define_singleton_method
void rb_define_singleton_method(VALUE obj, const char *name, VALUE(*func)(ANYARGS), int argc)
Defines a singleton method for obj.
Definition: class.c:1755
default_encoding::enc
rb_encoding * enc
Definition: encoding.c:1394
rb_enc_fast_mbclen
int rb_enc_fast_mbclen(const char *p, const char *e, rb_encoding *enc)
Definition: encoding.c:1014
UTF_8
@ UTF_8
Definition: nkf.c:108
rb_define_method
void rb_define_method(VALUE klass, const char *name, VALUE(*func)(ANYARGS), int argc)
Definition: class.c:1551
rb_enc_dummy_p
int rb_enc_dummy_p(rb_encoding *enc)
Definition: encoding.c:131
INT2NUM
#define INT2NUM(x)
Definition: ruby.h:1609
rb_enc_default_internal
VALUE rb_enc_default_internal(void)
Definition: encoding.c:1521
T_DATA
#define T_DATA
Definition: ruby.h:538
is_data_encoding
#define is_data_encoding(obj)
Definition: encoding.c:85
rb_loaderror
void rb_loaderror(const char *fmt,...)
Definition: error.c:2688
Qfalse
#define Qfalse
Definition: ruby.h:467
ENC_CODERANGE_CLEAR
#define ENC_CODERANGE_CLEAR(obj)
Definition: encoding.h:111
rb_encdb_alias
int rb_encdb_alias(const char *alias, const char *orig)
Definition: encoding.c:565
default_encoding::index
int index
Definition: encoding.c:1393
UTF_32BE
@ UTF_32BE
Definition: nkf.c:118
st_insert2
int st_insert2(st_table *tab, st_data_t key, st_data_t value, st_data_t(*func)(st_data_t))
Definition: st.c:1263
NORETURN
NORETURN(static void not_encoding(VALUE enc))
SPECIAL_CONST_P
#define SPECIAL_CONST_P(x)
Definition: ruby.h:1313
NULL
#define NULL
Definition: _sdbm.c:101
rb_encdb_dummy
int rb_encdb_dummy(const char *name)
Definition: encoding.c:472
rb_enc_replicate
int rb_enc_replicate(const char *name, rb_encoding *encoding)
Definition: encoding.c:402
rb_require_internal
int rb_require_internal(VALUE fname)
Definition: load.c:1067
rb_raw_encoding
OnigEncodingType rb_raw_encoding
Definition: encoding.c:30
PRIsVALUE
#define PRIsVALUE
Definition: ruby.h:166
rb_enc_from_encoding
VALUE rb_enc_from_encoding(rb_encoding *encoding)
Definition: encoding.c:116
st_insert
int st_insert(st_table *tab, st_data_t key, st_data_t value)
Definition: st.c:1171
strlen
size_t strlen(const char *)
T_SYMBOL
#define T_SYMBOL
Definition: ruby.h:540
rb_ascii8bit_encindex
int rb_ascii8bit_encindex(void)
Definition: encoding.c:1322
rb_encoding_entry::base
rb_encoding * base
Definition: encoding.c:52
OnigEncodingTypeST::name
const char * name
Definition: onigmo.h:162
rb_find_encoding
rb_encoding * rb_find_encoding(VALUE enc)
Definition: encoding.c:252
ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE
#define ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(n)
Definition: onigmo.h:352
rb_eEncodingError
VALUE rb_eEncodingError
Definition: error.c:928
ONIGENC_ASCII_CODE_TO_LOWER_CASE
#define ONIGENC_ASCII_CODE_TO_LOWER_CASE(c)
Definition: regenc.h:217
rb_undef_method
void rb_undef_method(VALUE klass, const char *name)
Definition: class.c:1575
rb_check_arity
#define rb_check_arity
Definition: intern.h:347
debug
#define debug(lvl, x...)
Definition: ffi.c:52
rb_funcallv
#define rb_funcallv(recv, mid, argc, argv)
Definition: rb_mjit_min_header-2.7.0.h:7899
ENCODING_INLINE_MAX
#define ENCODING_INLINE_MAX
Definition: encoding.h:40
rb_raise
void rb_raise(VALUE exc, const char *fmt,...)
Definition: error.c:2669
rb_id_encoding
ID rb_id_encoding(void)
Definition: encoding.c:759
rb_ary_entry
VALUE rb_ary_entry(VALUE ary, long offset)
Definition: array.c:1512
rb_ivar_get
VALUE rb_ivar_get(VALUE, ID)
Definition: variable.c:1070
T_FILE
#define T_FILE
Definition: ruby.h:534
if
if((ID)(DISPID) nameid !=nameid)
Definition: win32ole.c:357
rb_enc_set_dummy
int rb_enc_set_dummy(int index)
Definition: encoding.c:393
obj
const VALUE VALUE obj
Definition: rb_mjit_min_header-2.7.0.h:5742
rb_obj_class
VALUE rb_obj_class(VALUE)
Equivalent to Object#class in Ruby.
Definition: object.c:217
rb_fstring_cstr
#define rb_fstring_cstr(str)
Definition: rb_mjit_min_header-2.7.0.h:7718
rb_enc_get_index
int rb_enc_get_index(VALUE obj)
Definition: encoding.c:779
memcpy
void * memcpy(void *__restrict, const void *__restrict, size_t)
rb_ascii8bit_encoding
rb_encoding * rb_ascii8bit_encoding(void)
Definition: encoding.c:1316
DATA_PTR
#define DATA_PTR(dta)
Definition: ruby.h:1175
rb_enc_registered
int rb_enc_registered(const char *name)
Definition: encoding.c:624
MBCLEN_CHARFOUND_LEN
#define MBCLEN_CHARFOUND_LEN(ret)
Definition: encoding.h:192
rb_enc_find_index2
int rb_enc_find_index2(const char *name, long len)
Definition: encoding.c:717
rb_encoding
const typedef OnigEncodingType rb_encoding
Definition: encoding.h:115
rb_check_frozen
#define rb_check_frozen(obj)
Definition: intern.h:319
ISUPPER
#define ISUPPER(c)
Definition: ruby.h:2308
UTF_16LE
@ UTF_16LE
Definition: nkf.c:115
ENCINDEX_BUILTIN_MAX
#define ENCINDEX_BUILTIN_MAX
Definition: encindex.h:54
ENCODING_NAMELEN_MAX
#define ENCODING_NAMELEN_MAX
Definition: encoding.c:72
rb_enc_from_index
rb_encoding * rb_enc_from_index(int index)
Definition: encoding.c:609
UNSPECIFIED_ENCODING
#define UNSPECIFIED_ENCODING
Definition: encoding.c:70
rb_define_dummy_encoding
int rb_define_dummy_encoding(const char *name)
Definition: encoding.c:462
i
uint32_t i
Definition: rb_mjit_min_header-2.7.0.h:5464
OnigEncodingTypeST
Definition: onigmo.h:160
ONIGENC_CODE_TO_MBCLEN
#define ONIGENC_CODE_TO_MBCLEN(enc, code)
Definition: onigmo.h:367
rb_enc_code_to_mbclen
int rb_enc_code_to_mbclen(int code, rb_encoding *enc)
Definition: encoding.c:1100
T_REGEXP
#define T_REGEXP
Definition: ruby.h:529
ISALNUM
#define ISALNUM(c)
Definition: ruby.h:2310
rb_cEncoding
VALUE rb_cEncoding
Definition: encoding.c:46
rb_errinfo
VALUE rb_errinfo(void)
The current exception in the current thread.
Definition: eval.c:1881
rb_ary_push
VALUE rb_ary_push(VALUE ary, VALUE item)
Definition: array.c:1195
rb_enc_check_str
rb_encoding * rb_enc_check_str(VALUE str1, VALUE str2)
Definition: encoding.c:880
rb_enc_mbminlen
#define rb_enc_mbminlen(enc)
Definition: encoding.h:180
rb_enc_to_index
int rb_enc_to_index(rb_encoding *enc)
Definition: encoding.c:125
TypedData_Wrap_Struct
#define TypedData_Wrap_Struct(klass, data_type, sval)
Definition: ruby.h:1231
rb_eTypeError
VALUE rb_eTypeError
Definition: error.c:922
Init_w32_codepage
void Init_w32_codepage(void)
Definition: file.c:722
rb_enc_init
void rb_enc_init(void)
Definition: encoding.c:582
rb_enc_get_from_index
rb_encoding * rb_enc_get_from_index(int index)
Definition: encoding.c:618
st_init_strcasetable
st_table * st_init_strcasetable(void)
Definition: st.c:683
Init_Encoding
void Init_Encoding(void)
Definition: encoding.c:1919
rb_encoding_entry
Definition: encoding.c:49
ALLOCA_N
#define ALLOCA_N(type, n)
Definition: ruby.h:1684
rb_enc_set_index
void rb_enc_set_index(VALUE obj, int idx)
Definition: encoding.c:830
rb_enc_copy
void rb_enc_copy(VALUE obj1, VALUE obj2)
Definition: encoding.c:990
size
int size
Definition: encoding.c:58
rb_enc_compatible
rb_encoding * rb_enc_compatible(VALUE str1, VALUE str2)
Definition: encoding.c:974
default_encoding
Definition: encoding.c:1392
rb_enc_foreach_name
void rb_enc_foreach_name(int(*func)(st_data_t name, st_data_t idx, st_data_t arg), st_data_t arg)
Definition: encoding.c:1972
FALSE
#define FALSE
Definition: nkf.h:174
ISLOWER
#define ISLOWER(c)
Definition: ruby.h:2309
valid_encoding_name_p
#define valid_encoding_name_p(name)
Definition: encoding.c:73
list
struct rb_encoding_entry * list
Definition: encoding.c:56
rb_locale_charmap_index
int rb_locale_charmap_index(void)
Definition: localeinit.c:109
Init_encodings
void Init_encodings(void)
Definition: encoding.c:1964
rb_default_internal_encoding
rb_encoding * rb_default_internal_encoding(void)
Definition: encoding.c:1512
rb_encoding_entry::name
const char * name
Definition: encoding.c:50
CONST_ID
#define CONST_ID(var, str)
Definition: ruby.h:1841
StringValueCStr
#define StringValueCStr(v)
Definition: ruby.h:604
key
key
Definition: openssl_missing.h:181
ONIGENC_IS_UNICODE
#define ONIGENC_IS_UNICODE(enc)
Definition: onigmo.h:327
UTF_16BE
@ UTF_16BE
Definition: nkf.c:113
rb_to_encoding_index
int rb_to_encoding_index(VALUE enc)
Definition: encoding.c:197
rb_locale_charmap
VALUE rb_locale_charmap(VALUE klass)
Definition: localeinit.c:91
rb_enc_register
int rb_enc_register(const char *name, rb_encoding *encoding)
Definition: encoding.c:326
CLASS_OF
#define CLASS_OF(v)
Definition: ruby.h:484
rb_enc_default_external
VALUE rb_enc_default_external(void)
Definition: encoding.c:1441
MBCLEN_CHARFOUND_P
#define MBCLEN_CHARFOUND_P(ret)
Definition: encoding.h:191
is_obj_encoding
#define is_obj_encoding(obj)
Definition: encoding.c:86
st_foreach
int st_foreach(st_table *tab, st_foreach_callback_func *func, st_data_t arg)
Definition: st.c:1718
char
#define char
Definition: rb_mjit_min_header-2.7.0.h:2876
rb_cObject
RUBY_EXTERN VALUE rb_cObject
Definition: ruby.h:2010
rb_ary_new2
#define rb_ary_new2
Definition: intern.h:103
rb_intern
#define rb_intern(str)
n
const char size_t n
Definition: rb_mjit_min_header-2.7.0.h:5456
buf
unsigned char buf[MIME_BUF_SIZE]
Definition: nkf.c:4322
rb_usascii_encindex
int rb_usascii_encindex(void)
Definition: encoding.c:1346
rb_enc_str_coderange
int rb_enc_str_coderange(VALUE)
Definition: string.c:657
rb_bug
void rb_bug(const char *fmt,...)
Definition: error.c:634
internal.h
rb_to_encoding
rb_encoding * rb_to_encoding(VALUE enc)
Definition: encoding.c:245
UChar
#define UChar
Definition: onigmo.h:76
arg
VALUE arg
Definition: rb_mjit_min_header-2.7.0.h:5601
argv
char ** argv
Definition: ruby.c:223
rb_set_errinfo
void rb_set_errinfo(VALUE err)
Sets the current exception ($!) to the given value.
Definition: eval.c:1895
ONIGENC_PRECISE_MBC_ENC_LEN
#define ONIGENC_PRECISE_MBC_ENC_LEN(enc, p, e)
Definition: onigmo.h:356
ST_CONTINUE
@ ST_CONTINUE
Definition: st.h:99
ENC_TO_ENCINDEX
#define ENC_TO_ENCINDEX(enc)
Definition: encoding.c:65
strdup
char * strdup(const char *) __attribute__((__malloc__)) __attribute__((__warn_unused_result__))
xmalloc
#define xmalloc
Definition: defines.h:211
rb_enc_set_default_external
void rb_enc_set_default_external(VALUE encoding)
Definition: encoding.c:1479
rb_sprintf
VALUE rb_sprintf(const char *format,...)
Definition: sprintf.c:1197
names
st_table * names
Definition: encoding.c:59
rb_enc_find
rb_encoding * rb_enc_find(const char *name)
Definition: encoding.c:728
klass
VALUE klass
Definition: rb_mjit_min_header-2.7.0.h:13254
st_data_t
unsigned long st_data_t
Definition: rb_mjit_min_header-2.7.0.h:5363
rb_utf8_encoding
rb_encoding * rb_utf8_encoding(void)
Definition: encoding.c:1328
str
char str[HTML_ESCAPE_MAX_LEN+1]
Definition: escape.c:18
ENC_REGISTER
#define ENC_REGISTER(enc)
ENCODING_COUNT
#define ENCODING_COUNT
Definition: encoding.c:69
rb_enc_find_index
int rb_enc_find_index(const char *name)
Definition: encoding.c:693
STRCASECMP
#define STRCASECMP(s1, s2)
Definition: ruby.h:2323
RUBY_TYPED_FREE_IMMEDIATELY
#define RUBY_TYPED_FREE_IMMEDIATELY
Definition: ruby.h:1207
ENC_CODERANGE_7BIT
#define ENC_CODERANGE_7BIT
Definition: encoding.h:104
MEMCPY
#define MEMCPY(p1, p2, type, n)
Definition: ruby.h:1753
rb_locale_encindex
int rb_locale_encindex(void)
Definition: encoding.c:1354
rb_enc_ascget
int rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc)
Definition: encoding.c:1044
rb_hash_aset
VALUE rb_hash_aset(VALUE hash, VALUE key, VALUE val)
Definition: hash.c:2779
NIL_P
#define NIL_P(v)
Definition: ruby.h:482
RDATA
#define RDATA(obj)
Definition: ruby.h:1274
ONIGENC_FLAG_UNICODE
#define ONIGENC_FLAG_UNICODE
Definition: onigmo.h:313
argc
int argc
Definition: ruby.c:222
ENC_SET_DUMMY
#define ENC_SET_DUMMY(enc)
Definition: encoding.c:67
ENCODING_GET_INLINED
#define ENCODING_GET_INLINED(obj)
Definition: encoding.h:61
REALLOC_N
#define REALLOC_N(var, type, n)
Definition: ruby.h:1667
ONIGENC_MBC_ENC_LEN
#define ONIGENC_MBC_ENC_LEN(enc, p, e)
Definition: onigmo.h:361
rb_define_const
void rb_define_const(VALUE, const char *, VALUE)
Definition: variable.c:2880
ruby_debug
#define ruby_debug
Definition: ruby.h:1926
rb_enc_toupper
int rb_enc_toupper(int c, rb_encoding *enc)
Definition: encoding.c:1106
encindex.h
rb_data_type_struct
Definition: ruby.h:1148
BUILTIN_TYPE
#define BUILTIN_TYPE(x)
Definition: ruby.h:551
rb_filesystem_encindex
int rb_filesystem_encindex(void)
Definition: encoding.c:1378
count
int count
Definition: encoding.c:57
ENCINDEX_US_ASCII
#define ENCINDEX_US_ASCII
Definition: encindex.h:44
Qtrue
#define Qtrue
Definition: ruby.h:468
ENC_DUMMY_P
#define ENC_DUMMY_P(enc)
Definition: encoding.c:66
len
uint8_t len
Definition: escape.c:17
SYMBOL_P
#define SYMBOL_P(x)
Definition: ruby.h:413
rb_enc_set_base
void rb_enc_set_base(const char *name, const char *orig)
Definition: encoding.c:382
rb_eEncCompatError
VALUE rb_eEncCompatError
Definition: error.c:929
rb_str_change_terminator_length
void rb_str_change_terminator_length(VALUE str, const int oldtermlen, const int termlen)
Definition: string.c:2230
TOLOWER
#define TOLOWER(c)
Definition: ruby.h:2319
rb_ivar_set
VALUE rb_ivar_set(VALUE, ID, VALUE)
Definition: variable.c:1300
rb_enc_codelen
int rb_enc_codelen(int c, rb_encoding *enc)
Definition: encoding.c:1089
T_STRING
#define T_STRING
Definition: ruby.h:528
rb_data_is_encoding
int rb_data_is_encoding(VALUE obj)
Definition: encoding.c:89
rb_sym2str
VALUE rb_sym2str(VALUE)
Definition: symbol.c:784
UTF8_MAC
@ UTF8_MAC
Definition: nkf.c:111
ENCINDEX_ASCII
#define ENCINDEX_ASCII
Definition: encindex.h:42
rb_enc_str_asciionly_p
int rb_enc_str_asciionly_p(VALUE)
Definition: string.c:678
rb_ary_new
VALUE rb_ary_new(void)
Definition: array.c:723
rb_enc_tolower
int rb_enc_tolower(int c, rb_encoding *enc)
Definition: encoding.c:1112
NUM2INT
#define NUM2INT(x)
Definition: ruby.h:715
Qnil
#define Qnil
Definition: ruby.h:469
ENC_CODERANGE_ASCIIONLY
#define ENC_CODERANGE_ASCIIONLY(obj)
Definition: encoding.h:109
st_lookup
int st_lookup(st_table *tab, st_data_t key, st_data_t *value)
Definition: st.c:1101
util.h
OnigEncodingTypeST::ruby_encoding_index
int ruby_encoding_index
Definition: onigmo.h:178
UTF_32LE
@ UTF_32LE
Definition: nkf.c:120
RSTRING_LEN
#define RSTRING_LEN(str)
Definition: ruby.h:1005
st_table
Definition: st.h:79
ruby_assert.h
rb_enc_codepoint_len
unsigned int rb_enc_codepoint_len(const char *p, const char *e, int *len_p, rb_encoding *enc)
Definition: encoding.c:1068
rb_encoding_entry::enc
rb_encoding * enc
Definition: encoding.c:51
rb_enc_associate
VALUE rb_enc_associate(VALUE obj, rb_encoding *enc)
Definition: encoding.c:866
rb_define_alloc_func
void rb_define_alloc_func(VALUE, rb_alloc_func_t)
rb_enc_capable
int rb_enc_capable(VALUE obj)
Definition: encoding.c:753
rb_encdb_set_unicode
void rb_encdb_set_unicode(int index)
Definition: encoding.c:576
rb_enc_associate_index
VALUE rb_enc_associate_index(VALUE obj, int idx)
Definition: encoding.c:838
rb_usascii_encoding
rb_encoding * rb_usascii_encoding(void)
Definition: encoding.c:1340
rb_encdb_replicate
int rb_encdb_replicate(const char *name, const char *orig)
Definition: encoding.c:450
RSTRING_END
#define RSTRING_END(str)
Definition: ruby.h:1013
verbose
verbose(int level, const char *format,...)
Definition: mjit_worker.c:303
name
const char * name
Definition: nkf.c:208