Ruby  2.7.1p83(2020-03-31revisiona0c7c23c9cec0d0ffcba012279cd652d28ad5bf3)
pack.c
Go to the documentation of this file.
1 /**********************************************************************
2 
3  pack.c -
4 
5  $Author$
6  created at: Thu Feb 10 15:17:05 JST 1994
7 
8  Copyright (C) 1993-2007 Yukihiro Matsumoto
9 
10 **********************************************************************/
11 
12 #include "ruby/encoding.h"
13 #include "internal.h"
14 #include <sys/types.h>
15 #include <ctype.h>
16 #include <errno.h>
17 #include <float.h>
18 #include "builtin.h"
19 
20 /*
21  * It is intentional that the condition for natstr is HAVE_TRUE_LONG_LONG
22  * instead of HAVE_LONG_LONG or LONG_LONG.
23  * This means q! and Q! means always the standard long long type and
24  * causes ArgumentError for platforms which has no long long type,
25  * even if the platform has an implementation specific 64bit type.
26  * This behavior is consistent with the document of pack/unpack.
27  */
28 #ifdef HAVE_TRUE_LONG_LONG
29 static const char natstr[] = "sSiIlLqQjJ";
30 #else
31 static const char natstr[] = "sSiIlLjJ";
32 #endif
33 static const char endstr[] = "sSiIlLqQjJ";
34 
35 #ifdef HAVE_TRUE_LONG_LONG
36 /* It is intentional to use long long instead of LONG_LONG. */
37 # define NATINT_LEN_Q NATINT_LEN(long long, 8)
38 #else
39 # define NATINT_LEN_Q 8
40 #endif
41 
42 #if SIZEOF_SHORT != 2 || SIZEOF_LONG != 4 || (defined(HAVE_TRUE_LONG_LONG) && SIZEOF_LONG_LONG != 8)
43 # define NATINT_PACK
44 #endif
45 
46 #ifdef DYNAMIC_ENDIAN
47 /* for universal binary of NEXTSTEP and MacOS X */
48 /* useless since autoconf 2.63? */
49 static int
50 is_bigendian(void)
51 {
52  static int init = 0;
53  static int endian_value;
54  char *p;
55 
56  if (init) return endian_value;
57  init = 1;
58  p = (char*)&init;
59  return endian_value = p[0]?0:1;
60 }
61 # define BIGENDIAN_P() (is_bigendian())
62 #elif defined(WORDS_BIGENDIAN)
63 # define BIGENDIAN_P() 1
64 #else
65 # define BIGENDIAN_P() 0
66 #endif
67 
68 #ifdef NATINT_PACK
69 # define NATINT_LEN(type,len) (natint?(int)sizeof(type):(int)(len))
70 #else
71 # define NATINT_LEN(type,len) ((int)sizeof(type))
72 #endif
73 
74 typedef union {
75  float f;
77  char buf[4];
79 typedef union {
80  double d;
82  char buf[8];
84 #define swapf(x) swap32(x)
85 #define swapd(x) swap64(x)
86 
87 #define rb_ntohf(x) (BIGENDIAN_P()?(x):swapf(x))
88 #define rb_ntohd(x) (BIGENDIAN_P()?(x):swapd(x))
89 #define rb_htonf(x) (BIGENDIAN_P()?(x):swapf(x))
90 #define rb_htond(x) (BIGENDIAN_P()?(x):swapd(x))
91 #define rb_htovf(x) (BIGENDIAN_P()?swapf(x):(x))
92 #define rb_htovd(x) (BIGENDIAN_P()?swapd(x):(x))
93 #define rb_vtohf(x) (BIGENDIAN_P()?swapf(x):(x))
94 #define rb_vtohd(x) (BIGENDIAN_P()?swapd(x):(x))
95 
96 #define FLOAT_CONVWITH(x) FLOAT_SWAPPER x;
97 #define HTONF(x) ((x).u = rb_htonf((x).u))
98 #define HTOVF(x) ((x).u = rb_htovf((x).u))
99 #define NTOHF(x) ((x).u = rb_ntohf((x).u))
100 #define VTOHF(x) ((x).u = rb_vtohf((x).u))
101 
102 #define DOUBLE_CONVWITH(x) DOUBLE_SWAPPER x;
103 #define HTOND(x) ((x).u = rb_htond((x).u))
104 #define HTOVD(x) ((x).u = rb_htovd((x).u))
105 #define NTOHD(x) ((x).u = rb_ntohd((x).u))
106 #define VTOHD(x) ((x).u = rb_vtohd((x).u))
107 
108 #define MAX_INTEGER_PACK_SIZE 8
109 
110 static const char toofew[] = "too few arguments";
111 
112 static void encodes(VALUE,const char*,long,int,int);
113 static void qpencode(VALUE,VALUE,long);
114 
115 static unsigned long utf8_to_uv(const char*,long*);
116 
117 static ID id_associated;
118 
119 static void
120 str_associate(VALUE str, VALUE add)
121 {
122  /* assert(NIL_P(rb_attr_get(str, id_associated))); */
123  rb_ivar_set(str, id_associated, add);
124 }
125 
126 static VALUE
127 str_associated(VALUE str)
128 {
129  return rb_ivar_lookup(str, id_associated, Qfalse);
130 }
131 
132 static void
133 unknown_directive(const char *mode, char type, VALUE fmt)
134 {
135  VALUE f;
136  char unknown[5];
137 
138  if (ISPRINT(type)) {
139  unknown[0] = type;
140  unknown[1] = '\0';
141  }
142  else {
143  snprintf(unknown, sizeof(unknown), "\\x%.2x", type & 0xff);
144  }
146  if (f != fmt) {
147  fmt = rb_str_subseq(f, 1, RSTRING_LEN(f) - 2);
148  }
149  rb_warning("unknown %s directive '%s' in '%"PRIsVALUE"'",
150  mode, unknown, fmt);
151 }
152 
153 static float
154 VALUE_to_float(VALUE obj)
155 {
156  VALUE v = rb_to_float(obj);
157  double d = RFLOAT_VALUE(v);
158 
159  if (isnan(d)) {
160  return NAN;
161  }
162  else if (d < -FLT_MAX) {
163  return -INFINITY;
164  }
165  else if (d <= FLT_MAX) {
166  return d;
167  }
168  else {
169  return INFINITY;
170  }
171 }
172 
173 static VALUE
174 pack_pack(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer)
175 {
176  static const char nul10[] = "\0\0\0\0\0\0\0\0\0\0";
177  static const char spc10[] = " ";
178  const char *p, *pend;
179  VALUE res, from, associates = 0;
180  char type;
181  long len, idx, plen;
182  const char *ptr;
183  int enc_info = 1; /* 0 - BINARY, 1 - US-ASCII, 2 - UTF-8 */
184 #ifdef NATINT_PACK
185  int natint; /* native integer */
186 #endif
187  int integer_size, bigendian_p;
188 
189  StringValue(fmt);
190  p = RSTRING_PTR(fmt);
191  pend = p + RSTRING_LEN(fmt);
192 
193  if (NIL_P(buffer)) {
194  res = rb_str_buf_new(0);
195  }
196  else {
197  if (!RB_TYPE_P(buffer, T_STRING))
198  rb_raise(rb_eTypeError, "buffer must be String, not %s", rb_obj_classname(buffer));
199  res = buffer;
200  }
201 
202  idx = 0;
203 
204 #define TOO_FEW (rb_raise(rb_eArgError, toofew), 0)
205 #define MORE_ITEM (idx < RARRAY_LEN(ary))
206 #define THISFROM (MORE_ITEM ? RARRAY_AREF(ary, idx) : TOO_FEW)
207 #define NEXTFROM (MORE_ITEM ? RARRAY_AREF(ary, idx++) : TOO_FEW)
208 
209  while (p < pend) {
210  int explicit_endian = 0;
211  if (RSTRING_PTR(fmt) + RSTRING_LEN(fmt) != pend) {
212  rb_raise(rb_eRuntimeError, "format string modified");
213  }
214  type = *p++; /* get data type */
215 #ifdef NATINT_PACK
216  natint = 0;
217 #endif
218 
219  if (ISSPACE(type)) continue;
220  if (type == '#') {
221  while ((p < pend) && (*p != '\n')) {
222  p++;
223  }
224  continue;
225  }
226 
227  {
228  modifiers:
229  switch (*p) {
230  case '_':
231  case '!':
232  if (strchr(natstr, type)) {
233 #ifdef NATINT_PACK
234  natint = 1;
235 #endif
236  p++;
237  }
238  else {
239  rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr);
240  }
241  goto modifiers;
242 
243  case '<':
244  case '>':
245  if (!strchr(endstr, type)) {
246  rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr);
247  }
248  if (explicit_endian) {
249  rb_raise(rb_eRangeError, "Can't use both '<' and '>'");
250  }
251  explicit_endian = *p++;
252  goto modifiers;
253  }
254  }
255 
256  if (*p == '*') { /* set data length */
257  len = strchr("@Xxu", type) ? 0
258  : strchr("PMm", type) ? 1
259  : RARRAY_LEN(ary) - idx;
260  p++;
261  }
262  else if (ISDIGIT(*p)) {
263  errno = 0;
264  len = STRTOUL(p, (char**)&p, 10);
265  if (errno) {
266  rb_raise(rb_eRangeError, "pack length too big");
267  }
268  }
269  else {
270  len = 1;
271  }
272 
273  switch (type) {
274  case 'U':
275  /* if encoding is US-ASCII, upgrade to UTF-8 */
276  if (enc_info == 1) enc_info = 2;
277  break;
278  case 'm': case 'M': case 'u':
279  /* keep US-ASCII (do nothing) */
280  break;
281  default:
282  /* fall back to BINARY */
283  enc_info = 0;
284  break;
285  }
286  switch (type) {
287  case 'A': case 'a': case 'Z':
288  case 'B': case 'b':
289  case 'H': case 'h':
290  from = NEXTFROM;
291  if (NIL_P(from)) {
292  ptr = "";
293  plen = 0;
294  }
295  else {
296  StringValue(from);
297  ptr = RSTRING_PTR(from);
298  plen = RSTRING_LEN(from);
299  }
300 
301  if (p[-1] == '*')
302  len = plen;
303 
304  switch (type) {
305  case 'a': /* arbitrary binary string (null padded) */
306  case 'A': /* arbitrary binary string (ASCII space padded) */
307  case 'Z': /* null terminated string */
308  if (plen >= len) {
309  rb_str_buf_cat(res, ptr, len);
310  if (p[-1] == '*' && type == 'Z')
311  rb_str_buf_cat(res, nul10, 1);
312  }
313  else {
314  rb_str_buf_cat(res, ptr, plen);
315  len -= plen;
316  while (len >= 10) {
317  rb_str_buf_cat(res, (type == 'A')?spc10:nul10, 10);
318  len -= 10;
319  }
320  rb_str_buf_cat(res, (type == 'A')?spc10:nul10, len);
321  }
322  break;
323 
324 #define castchar(from) (char)((from) & 0xff)
325 
326  case 'b': /* bit string (ascending) */
327  {
328  int byte = 0;
329  long i, j = 0;
330 
331  if (len > plen) {
332  j = (len - plen + 1)/2;
333  len = plen;
334  }
335  for (i=0; i++ < len; ptr++) {
336  if (*ptr & 1)
337  byte |= 128;
338  if (i & 7)
339  byte >>= 1;
340  else {
341  char c = castchar(byte);
342  rb_str_buf_cat(res, &c, 1);
343  byte = 0;
344  }
345  }
346  if (len & 7) {
347  char c;
348  byte >>= 7 - (len & 7);
349  c = castchar(byte);
350  rb_str_buf_cat(res, &c, 1);
351  }
352  len = j;
353  goto grow;
354  }
355  break;
356 
357  case 'B': /* bit string (descending) */
358  {
359  int byte = 0;
360  long i, j = 0;
361 
362  if (len > plen) {
363  j = (len - plen + 1)/2;
364  len = plen;
365  }
366  for (i=0; i++ < len; ptr++) {
367  byte |= *ptr & 1;
368  if (i & 7)
369  byte <<= 1;
370  else {
371  char c = castchar(byte);
372  rb_str_buf_cat(res, &c, 1);
373  byte = 0;
374  }
375  }
376  if (len & 7) {
377  char c;
378  byte <<= 7 - (len & 7);
379  c = castchar(byte);
380  rb_str_buf_cat(res, &c, 1);
381  }
382  len = j;
383  goto grow;
384  }
385  break;
386 
387  case 'h': /* hex string (low nibble first) */
388  {
389  int byte = 0;
390  long i, j = 0;
391 
392  if (len > plen) {
393  j = (len + 1) / 2 - (plen + 1) / 2;
394  len = plen;
395  }
396  for (i=0; i++ < len; ptr++) {
397  if (ISALPHA(*ptr))
398  byte |= (((*ptr & 15) + 9) & 15) << 4;
399  else
400  byte |= (*ptr & 15) << 4;
401  if (i & 1)
402  byte >>= 4;
403  else {
404  char c = castchar(byte);
405  rb_str_buf_cat(res, &c, 1);
406  byte = 0;
407  }
408  }
409  if (len & 1) {
410  char c = castchar(byte);
411  rb_str_buf_cat(res, &c, 1);
412  }
413  len = j;
414  goto grow;
415  }
416  break;
417 
418  case 'H': /* hex string (high nibble first) */
419  {
420  int byte = 0;
421  long i, j = 0;
422 
423  if (len > plen) {
424  j = (len + 1) / 2 - (plen + 1) / 2;
425  len = plen;
426  }
427  for (i=0; i++ < len; ptr++) {
428  if (ISALPHA(*ptr))
429  byte |= ((*ptr & 15) + 9) & 15;
430  else
431  byte |= *ptr & 15;
432  if (i & 1)
433  byte <<= 4;
434  else {
435  char c = castchar(byte);
436  rb_str_buf_cat(res, &c, 1);
437  byte = 0;
438  }
439  }
440  if (len & 1) {
441  char c = castchar(byte);
442  rb_str_buf_cat(res, &c, 1);
443  }
444  len = j;
445  goto grow;
446  }
447  break;
448  }
449  break;
450 
451  case 'c': /* signed char */
452  case 'C': /* unsigned char */
453  integer_size = 1;
454  bigendian_p = BIGENDIAN_P(); /* not effective */
455  goto pack_integer;
456 
457  case 's': /* s for int16_t, s! for signed short */
458  integer_size = NATINT_LEN(short, 2);
459  bigendian_p = BIGENDIAN_P();
460  goto pack_integer;
461 
462  case 'S': /* S for uint16_t, S! for unsigned short */
463  integer_size = NATINT_LEN(short, 2);
464  bigendian_p = BIGENDIAN_P();
465  goto pack_integer;
466 
467  case 'i': /* i and i! for signed int */
468  integer_size = (int)sizeof(int);
469  bigendian_p = BIGENDIAN_P();
470  goto pack_integer;
471 
472  case 'I': /* I and I! for unsigned int */
473  integer_size = (int)sizeof(int);
474  bigendian_p = BIGENDIAN_P();
475  goto pack_integer;
476 
477  case 'l': /* l for int32_t, l! for signed long */
478  integer_size = NATINT_LEN(long, 4);
479  bigendian_p = BIGENDIAN_P();
480  goto pack_integer;
481 
482  case 'L': /* L for uint32_t, L! for unsigned long */
483  integer_size = NATINT_LEN(long, 4);
484  bigendian_p = BIGENDIAN_P();
485  goto pack_integer;
486 
487  case 'q': /* q for int64_t, q! for signed long long */
488  integer_size = NATINT_LEN_Q;
489  bigendian_p = BIGENDIAN_P();
490  goto pack_integer;
491 
492  case 'Q': /* Q for uint64_t, Q! for unsigned long long */
493  integer_size = NATINT_LEN_Q;
494  bigendian_p = BIGENDIAN_P();
495  goto pack_integer;
496 
497  case 'j': /* j for intptr_t */
498  integer_size = sizeof(intptr_t);
499  bigendian_p = BIGENDIAN_P();
500  goto pack_integer;
501 
502  case 'J': /* J for uintptr_t */
503  integer_size = sizeof(uintptr_t);
504  bigendian_p = BIGENDIAN_P();
505  goto pack_integer;
506 
507  case 'n': /* 16 bit (2 bytes) integer (network byte-order) */
508  integer_size = 2;
509  bigendian_p = 1;
510  goto pack_integer;
511 
512  case 'N': /* 32 bit (4 bytes) integer (network byte-order) */
513  integer_size = 4;
514  bigendian_p = 1;
515  goto pack_integer;
516 
517  case 'v': /* 16 bit (2 bytes) integer (VAX byte-order) */
518  integer_size = 2;
519  bigendian_p = 0;
520  goto pack_integer;
521 
522  case 'V': /* 32 bit (4 bytes) integer (VAX byte-order) */
523  integer_size = 4;
524  bigendian_p = 0;
525  goto pack_integer;
526 
527  pack_integer:
528  if (explicit_endian) {
529  bigendian_p = explicit_endian == '>';
530  }
531  if (integer_size > MAX_INTEGER_PACK_SIZE)
532  rb_bug("unexpected intger size for pack: %d", integer_size);
533  while (len-- > 0) {
534  char intbuf[MAX_INTEGER_PACK_SIZE];
535 
536  from = NEXTFROM;
537  rb_integer_pack(from, intbuf, integer_size, 1, 0,
540  rb_str_buf_cat(res, intbuf, integer_size);
541  }
542  break;
543 
544  case 'f': /* single precision float in native format */
545  case 'F': /* ditto */
546  while (len-- > 0) {
547  float f;
548 
549  from = NEXTFROM;
550  f = VALUE_to_float(from);
551  rb_str_buf_cat(res, (char*)&f, sizeof(float));
552  }
553  break;
554 
555  case 'e': /* single precision float in VAX byte-order */
556  while (len-- > 0) {
557  FLOAT_CONVWITH(tmp);
558 
559  from = NEXTFROM;
560  tmp.f = VALUE_to_float(from);
561  HTOVF(tmp);
562  rb_str_buf_cat(res, tmp.buf, sizeof(float));
563  }
564  break;
565 
566  case 'E': /* double precision float in VAX byte-order */
567  while (len-- > 0) {
568  DOUBLE_CONVWITH(tmp);
569  from = NEXTFROM;
570  tmp.d = RFLOAT_VALUE(rb_to_float(from));
571  HTOVD(tmp);
572  rb_str_buf_cat(res, tmp.buf, sizeof(double));
573  }
574  break;
575 
576  case 'd': /* double precision float in native format */
577  case 'D': /* ditto */
578  while (len-- > 0) {
579  double d;
580 
581  from = NEXTFROM;
582  d = RFLOAT_VALUE(rb_to_float(from));
583  rb_str_buf_cat(res, (char*)&d, sizeof(double));
584  }
585  break;
586 
587  case 'g': /* single precision float in network byte-order */
588  while (len-- > 0) {
589  FLOAT_CONVWITH(tmp);
590  from = NEXTFROM;
591  tmp.f = VALUE_to_float(from);
592  HTONF(tmp);
593  rb_str_buf_cat(res, tmp.buf, sizeof(float));
594  }
595  break;
596 
597  case 'G': /* double precision float in network byte-order */
598  while (len-- > 0) {
599  DOUBLE_CONVWITH(tmp);
600 
601  from = NEXTFROM;
602  tmp.d = RFLOAT_VALUE(rb_to_float(from));
603  HTOND(tmp);
604  rb_str_buf_cat(res, tmp.buf, sizeof(double));
605  }
606  break;
607 
608  case 'x': /* null byte */
609  grow:
610  while (len >= 10) {
611  rb_str_buf_cat(res, nul10, 10);
612  len -= 10;
613  }
614  rb_str_buf_cat(res, nul10, len);
615  break;
616 
617  case 'X': /* back up byte */
618  shrink:
619  plen = RSTRING_LEN(res);
620  if (plen < len)
621  rb_raise(rb_eArgError, "X outside of string");
622  rb_str_set_len(res, plen - len);
623  break;
624 
625  case '@': /* null fill to absolute position */
626  len -= RSTRING_LEN(res);
627  if (len > 0) goto grow;
628  len = -len;
629  if (len > 0) goto shrink;
630  break;
631 
632  case '%':
633  rb_raise(rb_eArgError, "%% is not supported");
634  break;
635 
636  case 'U': /* Unicode character */
637  while (len-- > 0) {
638  SIGNED_VALUE l;
639  char buf[8];
640  int le;
641 
642  from = NEXTFROM;
643  from = rb_to_int(from);
644  l = NUM2LONG(from);
645  if (l < 0) {
646  rb_raise(rb_eRangeError, "pack(U): value out of range");
647  }
648  le = rb_uv_to_utf8(buf, l);
649  rb_str_buf_cat(res, (char*)buf, le);
650  }
651  break;
652 
653  case 'u': /* uuencoded string */
654  case 'm': /* base64 encoded string */
655  from = NEXTFROM;
656  StringValue(from);
657  ptr = RSTRING_PTR(from);
658  plen = RSTRING_LEN(from);
659 
660  if (len == 0 && type == 'm') {
661  encodes(res, ptr, plen, type, 0);
662  ptr += plen;
663  break;
664  }
665  if (len <= 2)
666  len = 45;
667  else if (len > 63 && type == 'u')
668  len = 63;
669  else
670  len = len / 3 * 3;
671  while (plen > 0) {
672  long todo;
673 
674  if (plen > len)
675  todo = len;
676  else
677  todo = plen;
678  encodes(res, ptr, todo, type, 1);
679  plen -= todo;
680  ptr += todo;
681  }
682  break;
683 
684  case 'M': /* quoted-printable encoded string */
685  from = rb_obj_as_string(NEXTFROM);
686  if (len <= 1)
687  len = 72;
688  qpencode(res, from, len);
689  break;
690 
691  case 'P': /* pointer to packed byte string */
692  from = THISFROM;
693  if (!NIL_P(from)) {
694  StringValue(from);
695  if (RSTRING_LEN(from) < len) {
696  rb_raise(rb_eArgError, "too short buffer for P(%ld for %ld)",
697  RSTRING_LEN(from), len);
698  }
699  }
700  len = 1;
701  /* FALL THROUGH */
702  case 'p': /* pointer to string */
703  while (len-- > 0) {
704  char *t;
705  from = NEXTFROM;
706  if (NIL_P(from)) {
707  t = 0;
708  }
709  else {
710  t = StringValuePtr(from);
711  }
712  if (!associates) {
713  associates = rb_ary_new();
714  }
715  rb_ary_push(associates, from);
716  rb_str_buf_cat(res, (char*)&t, sizeof(char*));
717  }
718  break;
719 
720  case 'w': /* BER compressed integer */
721  while (len-- > 0) {
722  VALUE buf = rb_str_new(0, 0);
723  size_t numbytes;
724  int sign;
725  char *cp;
726 
727  from = NEXTFROM;
728  from = rb_to_int(from);
729  numbytes = rb_absint_numwords(from, 7, NULL);
730  if (numbytes == 0)
731  numbytes = 1;
732  buf = rb_str_new(NULL, numbytes);
733 
735 
736  if (sign < 0)
737  rb_raise(rb_eArgError, "can't compress negative numbers");
738  if (sign == 2)
739  rb_bug("buffer size problem?");
740 
741  cp = RSTRING_PTR(buf);
742  while (1 < numbytes) {
743  *cp |= 0x80;
744  cp++;
745  numbytes--;
746  }
747 
749  }
750  break;
751 
752  default: {
753  unknown_directive("pack", type, fmt);
754  break;
755  }
756  }
757  }
758 
759  if (associates) {
760  str_associate(res, associates);
761  }
762  switch (enc_info) {
763  case 1:
765  break;
766  case 2:
768  break;
769  default:
770  /* do nothing, keep ASCII-8BIT */
771  break;
772  }
773  return res;
774 }
775 
776 static const char uu_table[] =
777 "`!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
778 static const char b64_table[] =
779 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
780 
781 static void
782 encodes(VALUE str, const char *s0, long len, int type, int tail_lf)
783 {
784  enum {buff_size = 4096, encoded_unit = 4, input_unit = 3};
785  char buff[buff_size + 1]; /* +1 for tail_lf */
786  long i = 0;
787  const char *const trans = type == 'u' ? uu_table : b64_table;
788  char padding;
789  const unsigned char *s = (const unsigned char *)s0;
790 
791  if (type == 'u') {
792  buff[i++] = (char)len + ' ';
793  padding = '`';
794  }
795  else {
796  padding = '=';
797  }
798  while (len >= input_unit) {
799  while (len >= input_unit && buff_size-i >= encoded_unit) {
800  buff[i++] = trans[077 & (*s >> 2)];
801  buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))];
802  buff[i++] = trans[077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03))];
803  buff[i++] = trans[077 & s[2]];
804  s += input_unit;
805  len -= input_unit;
806  }
807  if (buff_size-i < encoded_unit) {
808  rb_str_buf_cat(str, buff, i);
809  i = 0;
810  }
811  }
812 
813  if (len == 2) {
814  buff[i++] = trans[077 & (*s >> 2)];
815  buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))];
816  buff[i++] = trans[077 & (((s[1] << 2) & 074) | (('\0' >> 6) & 03))];
817  buff[i++] = padding;
818  }
819  else if (len == 1) {
820  buff[i++] = trans[077 & (*s >> 2)];
821  buff[i++] = trans[077 & (((*s << 4) & 060) | (('\0' >> 4) & 017))];
822  buff[i++] = padding;
823  buff[i++] = padding;
824  }
825  if (tail_lf) buff[i++] = '\n';
826  rb_str_buf_cat(str, buff, i);
827  if ((size_t)i > sizeof(buff)) rb_bug("encodes() buffer overrun");
828 }
829 
830 static const char hex_table[] = "0123456789ABCDEF";
831 
832 static void
833 qpencode(VALUE str, VALUE from, long len)
834 {
835  char buff[1024];
836  long i = 0, n = 0, prev = EOF;
837  unsigned char *s = (unsigned char*)RSTRING_PTR(from);
838  unsigned char *send = s + RSTRING_LEN(from);
839 
840  while (s < send) {
841  if ((*s > 126) ||
842  (*s < 32 && *s != '\n' && *s != '\t') ||
843  (*s == '=')) {
844  buff[i++] = '=';
845  buff[i++] = hex_table[*s >> 4];
846  buff[i++] = hex_table[*s & 0x0f];
847  n += 3;
848  prev = EOF;
849  }
850  else if (*s == '\n') {
851  if (prev == ' ' || prev == '\t') {
852  buff[i++] = '=';
853  buff[i++] = *s;
854  }
855  buff[i++] = *s;
856  n = 0;
857  prev = *s;
858  }
859  else {
860  buff[i++] = *s;
861  n++;
862  prev = *s;
863  }
864  if (n > len) {
865  buff[i++] = '=';
866  buff[i++] = '\n';
867  n = 0;
868  prev = '\n';
869  }
870  if (i > 1024 - 5) {
871  rb_str_buf_cat(str, buff, i);
872  i = 0;
873  }
874  s++;
875  }
876  if (n > 0) {
877  buff[i++] = '=';
878  buff[i++] = '\n';
879  }
880  if (i > 0) {
881  rb_str_buf_cat(str, buff, i);
882  }
883 }
884 
885 static inline int
886 hex2num(char c)
887 {
888  int n;
889  n = ruby_digit36_to_number_table[(unsigned char)c];
890  if (16 <= n)
891  n = -1;
892  return n;
893 }
894 
895 #define PACK_LENGTH_ADJUST_SIZE(sz) do { \
896  tmp_len = 0; \
897  if (len > (long)((send-s)/(sz))) { \
898  if (!star) { \
899  tmp_len = len-(send-s)/(sz); \
900  } \
901  len = (send-s)/(sz); \
902  } \
903 } while (0)
904 
905 #define PACK_ITEM_ADJUST() do { \
906  if (tmp_len > 0 && mode == UNPACK_ARRAY) \
907  rb_ary_store(ary, RARRAY_LEN(ary)+tmp_len-1, Qnil); \
908 } while (0)
909 
910 /* Workaround for Oracle Developer Studio (Oracle Solaris Studio)
911  * 12.4/12.5/12.6 C compiler optimization bug
912  * with "-xO4" optimization option.
913  */
914 #if defined(__SUNPRO_C) && 0x5130 <= __SUNPRO_C && __SUNPRO_C <= 0x5150
915 # define AVOID_CC_BUG volatile
916 #else
917 # define AVOID_CC_BUG
918 #endif
919 
920 /* unpack mode */
921 #define UNPACK_ARRAY 0
922 #define UNPACK_BLOCK 1
923 #define UNPACK_1 2
924 
925 static VALUE
926 pack_unpack_internal(VALUE str, VALUE fmt, int mode)
927 {
928 #define hexdigits ruby_hexdigits
929  char *s, *send;
930  char *p, *pend;
931  VALUE ary;
932  char type;
933  long len;
934  AVOID_CC_BUG long tmp_len;
935  int star;
936 #ifdef NATINT_PACK
937  int natint; /* native integer */
938 #endif
939  int signed_p, integer_size, bigendian_p;
940 #define UNPACK_PUSH(item) do {\
941  VALUE item_val = (item);\
942  if ((mode) == UNPACK_BLOCK) {\
943  rb_yield(item_val);\
944  }\
945  else if ((mode) == UNPACK_ARRAY) {\
946  rb_ary_push(ary, item_val);\
947  }\
948  else /* if ((mode) == UNPACK_1) { */ {\
949  return item_val; \
950  }\
951  } while (0)
952 
953  StringValue(str);
954  StringValue(fmt);
955  s = RSTRING_PTR(str);
956  send = s + RSTRING_LEN(str);
957  p = RSTRING_PTR(fmt);
958  pend = p + RSTRING_LEN(fmt);
959 
960  ary = mode == UNPACK_ARRAY ? rb_ary_new() : Qnil;
961  while (p < pend) {
962  int explicit_endian = 0;
963  type = *p++;
964 #ifdef NATINT_PACK
965  natint = 0;
966 #endif
967 
968  if (ISSPACE(type)) continue;
969  if (type == '#') {
970  while ((p < pend) && (*p != '\n')) {
971  p++;
972  }
973  continue;
974  }
975 
976  star = 0;
977  {
978  modifiers:
979  switch (*p) {
980  case '_':
981  case '!':
982 
983  if (strchr(natstr, type)) {
984 #ifdef NATINT_PACK
985  natint = 1;
986 #endif
987  p++;
988  }
989  else {
990  rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr);
991  }
992  goto modifiers;
993 
994  case '<':
995  case '>':
996  if (!strchr(endstr, type)) {
997  rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr);
998  }
999  if (explicit_endian) {
1000  rb_raise(rb_eRangeError, "Can't use both '<' and '>'");
1001  }
1002  explicit_endian = *p++;
1003  goto modifiers;
1004  }
1005  }
1006 
1007  if (p >= pend)
1008  len = 1;
1009  else if (*p == '*') {
1010  star = 1;
1011  len = send - s;
1012  p++;
1013  }
1014  else if (ISDIGIT(*p)) {
1015  errno = 0;
1016  len = STRTOUL(p, (char**)&p, 10);
1017  if (len < 0 || errno) {
1018  rb_raise(rb_eRangeError, "pack length too big");
1019  }
1020  }
1021  else {
1022  len = (type != '@');
1023  }
1024 
1025  switch (type) {
1026  case '%':
1027  rb_raise(rb_eArgError, "%% is not supported");
1028  break;
1029 
1030  case 'A':
1031  if (len > send - s) len = send - s;
1032  {
1033  long end = len;
1034  char *t = s + len - 1;
1035 
1036  while (t >= s) {
1037  if (*t != ' ' && *t != '\0') break;
1038  t--; len--;
1039  }
1040  UNPACK_PUSH(rb_str_new(s, len));
1041  s += end;
1042  }
1043  break;
1044 
1045  case 'Z':
1046  {
1047  char *t = s;
1048 
1049  if (len > send-s) len = send-s;
1050  while (t < s+len && *t) t++;
1051  UNPACK_PUSH(rb_str_new(s, t-s));
1052  if (t < send) t++;
1053  s = star ? t : s+len;
1054  }
1055  break;
1056 
1057  case 'a':
1058  if (len > send - s) len = send - s;
1059  UNPACK_PUSH(rb_str_new(s, len));
1060  s += len;
1061  break;
1062 
1063  case 'b':
1064  {
1065  VALUE bitstr;
1066  char *t;
1067  int bits;
1068  long i;
1069 
1070  if (p[-1] == '*' || len > (send - s) * 8)
1071  len = (send - s) * 8;
1072  bits = 0;
1073  bitstr = rb_usascii_str_new(0, len);
1074  t = RSTRING_PTR(bitstr);
1075  for (i=0; i<len; i++) {
1076  if (i & 7) bits >>= 1;
1077  else bits = (unsigned char)*s++;
1078  *t++ = (bits & 1) ? '1' : '0';
1079  }
1080  UNPACK_PUSH(bitstr);
1081  }
1082  break;
1083 
1084  case 'B':
1085  {
1086  VALUE bitstr;
1087  char *t;
1088  int bits;
1089  long i;
1090 
1091  if (p[-1] == '*' || len > (send - s) * 8)
1092  len = (send - s) * 8;
1093  bits = 0;
1094  bitstr = rb_usascii_str_new(0, len);
1095  t = RSTRING_PTR(bitstr);
1096  for (i=0; i<len; i++) {
1097  if (i & 7) bits <<= 1;
1098  else bits = (unsigned char)*s++;
1099  *t++ = (bits & 128) ? '1' : '0';
1100  }
1101  UNPACK_PUSH(bitstr);
1102  }
1103  break;
1104 
1105  case 'h':
1106  {
1107  VALUE bitstr;
1108  char *t;
1109  int bits;
1110  long i;
1111 
1112  if (p[-1] == '*' || len > (send - s) * 2)
1113  len = (send - s) * 2;
1114  bits = 0;
1115  bitstr = rb_usascii_str_new(0, len);
1116  t = RSTRING_PTR(bitstr);
1117  for (i=0; i<len; i++) {
1118  if (i & 1)
1119  bits >>= 4;
1120  else
1121  bits = (unsigned char)*s++;
1122  *t++ = hexdigits[bits & 15];
1123  }
1124  UNPACK_PUSH(bitstr);
1125  }
1126  break;
1127 
1128  case 'H':
1129  {
1130  VALUE bitstr;
1131  char *t;
1132  int bits;
1133  long i;
1134 
1135  if (p[-1] == '*' || len > (send - s) * 2)
1136  len = (send - s) * 2;
1137  bits = 0;
1138  bitstr = rb_usascii_str_new(0, len);
1139  t = RSTRING_PTR(bitstr);
1140  for (i=0; i<len; i++) {
1141  if (i & 1)
1142  bits <<= 4;
1143  else
1144  bits = (unsigned char)*s++;
1145  *t++ = hexdigits[(bits >> 4) & 15];
1146  }
1147  UNPACK_PUSH(bitstr);
1148  }
1149  break;
1150 
1151  case 'c':
1152  signed_p = 1;
1153  integer_size = 1;
1154  bigendian_p = BIGENDIAN_P(); /* not effective */
1155  goto unpack_integer;
1156 
1157  case 'C':
1158  signed_p = 0;
1159  integer_size = 1;
1160  bigendian_p = BIGENDIAN_P(); /* not effective */
1161  goto unpack_integer;
1162 
1163  case 's':
1164  signed_p = 1;
1165  integer_size = NATINT_LEN(short, 2);
1166  bigendian_p = BIGENDIAN_P();
1167  goto unpack_integer;
1168 
1169  case 'S':
1170  signed_p = 0;
1171  integer_size = NATINT_LEN(short, 2);
1172  bigendian_p = BIGENDIAN_P();
1173  goto unpack_integer;
1174 
1175  case 'i':
1176  signed_p = 1;
1177  integer_size = (int)sizeof(int);
1178  bigendian_p = BIGENDIAN_P();
1179  goto unpack_integer;
1180 
1181  case 'I':
1182  signed_p = 0;
1183  integer_size = (int)sizeof(int);
1184  bigendian_p = BIGENDIAN_P();
1185  goto unpack_integer;
1186 
1187  case 'l':
1188  signed_p = 1;
1189  integer_size = NATINT_LEN(long, 4);
1190  bigendian_p = BIGENDIAN_P();
1191  goto unpack_integer;
1192 
1193  case 'L':
1194  signed_p = 0;
1195  integer_size = NATINT_LEN(long, 4);
1196  bigendian_p = BIGENDIAN_P();
1197  goto unpack_integer;
1198 
1199  case 'q':
1200  signed_p = 1;
1201  integer_size = NATINT_LEN_Q;
1202  bigendian_p = BIGENDIAN_P();
1203  goto unpack_integer;
1204 
1205  case 'Q':
1206  signed_p = 0;
1207  integer_size = NATINT_LEN_Q;
1208  bigendian_p = BIGENDIAN_P();
1209  goto unpack_integer;
1210 
1211  case 'j':
1212  signed_p = 1;
1213  integer_size = sizeof(intptr_t);
1214  bigendian_p = BIGENDIAN_P();
1215  goto unpack_integer;
1216 
1217  case 'J':
1218  signed_p = 0;
1219  integer_size = sizeof(uintptr_t);
1220  bigendian_p = BIGENDIAN_P();
1221  goto unpack_integer;
1222 
1223  case 'n':
1224  signed_p = 0;
1225  integer_size = 2;
1226  bigendian_p = 1;
1227  goto unpack_integer;
1228 
1229  case 'N':
1230  signed_p = 0;
1231  integer_size = 4;
1232  bigendian_p = 1;
1233  goto unpack_integer;
1234 
1235  case 'v':
1236  signed_p = 0;
1237  integer_size = 2;
1238  bigendian_p = 0;
1239  goto unpack_integer;
1240 
1241  case 'V':
1242  signed_p = 0;
1243  integer_size = 4;
1244  bigendian_p = 0;
1245  goto unpack_integer;
1246 
1247  unpack_integer:
1248  if (explicit_endian) {
1249  bigendian_p = explicit_endian == '>';
1250  }
1251  PACK_LENGTH_ADJUST_SIZE(integer_size);
1252  while (len-- > 0) {
1253  int flags = bigendian_p ? INTEGER_PACK_BIG_ENDIAN : INTEGER_PACK_LITTLE_ENDIAN;
1254  VALUE val;
1255  if (signed_p)
1256  flags |= INTEGER_PACK_2COMP;
1257  val = rb_integer_unpack(s, integer_size, 1, 0, flags);
1258  UNPACK_PUSH(val);
1259  s += integer_size;
1260  }
1261  PACK_ITEM_ADJUST();
1262  break;
1263 
1264  case 'f':
1265  case 'F':
1266  PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1267  while (len-- > 0) {
1268  float tmp;
1269  memcpy(&tmp, s, sizeof(float));
1270  s += sizeof(float);
1271  UNPACK_PUSH(DBL2NUM((double)tmp));
1272  }
1273  PACK_ITEM_ADJUST();
1274  break;
1275 
1276  case 'e':
1277  PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1278  while (len-- > 0) {
1279  FLOAT_CONVWITH(tmp);
1280  memcpy(tmp.buf, s, sizeof(float));
1281  s += sizeof(float);
1282  VTOHF(tmp);
1283  UNPACK_PUSH(DBL2NUM(tmp.f));
1284  }
1285  PACK_ITEM_ADJUST();
1286  break;
1287 
1288  case 'E':
1289  PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1290  while (len-- > 0) {
1291  DOUBLE_CONVWITH(tmp);
1292  memcpy(tmp.buf, s, sizeof(double));
1293  s += sizeof(double);
1294  VTOHD(tmp);
1295  UNPACK_PUSH(DBL2NUM(tmp.d));
1296  }
1297  PACK_ITEM_ADJUST();
1298  break;
1299 
1300  case 'D':
1301  case 'd':
1302  PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1303  while (len-- > 0) {
1304  double tmp;
1305  memcpy(&tmp, s, sizeof(double));
1306  s += sizeof(double);
1307  UNPACK_PUSH(DBL2NUM(tmp));
1308  }
1309  PACK_ITEM_ADJUST();
1310  break;
1311 
1312  case 'g':
1313  PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1314  while (len-- > 0) {
1315  FLOAT_CONVWITH(tmp);
1316  memcpy(tmp.buf, s, sizeof(float));
1317  s += sizeof(float);
1318  NTOHF(tmp);
1319  UNPACK_PUSH(DBL2NUM(tmp.f));
1320  }
1321  PACK_ITEM_ADJUST();
1322  break;
1323 
1324  case 'G':
1325  PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1326  while (len-- > 0) {
1327  DOUBLE_CONVWITH(tmp);
1328  memcpy(tmp.buf, s, sizeof(double));
1329  s += sizeof(double);
1330  NTOHD(tmp);
1331  UNPACK_PUSH(DBL2NUM(tmp.d));
1332  }
1333  PACK_ITEM_ADJUST();
1334  break;
1335 
1336  case 'U':
1337  if (len > send - s) len = send - s;
1338  while (len > 0 && s < send) {
1339  long alen = send - s;
1340  unsigned long l;
1341 
1342  l = utf8_to_uv(s, &alen);
1343  s += alen; len--;
1344  UNPACK_PUSH(ULONG2NUM(l));
1345  }
1346  break;
1347 
1348  case 'u':
1349  {
1350  VALUE buf = rb_str_new(0, (send - s)*3/4);
1351  char *ptr = RSTRING_PTR(buf);
1352  long total = 0;
1353 
1354  while (s < send && (unsigned char)*s > ' ' && (unsigned char)*s < 'a') {
1355  long a,b,c,d;
1356  char hunk[3];
1357 
1358  len = ((unsigned char)*s++ - ' ') & 077;
1359 
1360  total += len;
1361  if (total > RSTRING_LEN(buf)) {
1362  len -= total - RSTRING_LEN(buf);
1363  total = RSTRING_LEN(buf);
1364  }
1365 
1366  while (len > 0) {
1367  long mlen = len > 3 ? 3 : len;
1368 
1369  if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1370  a = ((unsigned char)*s++ - ' ') & 077;
1371  else
1372  a = 0;
1373  if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1374  b = ((unsigned char)*s++ - ' ') & 077;
1375  else
1376  b = 0;
1377  if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1378  c = ((unsigned char)*s++ - ' ') & 077;
1379  else
1380  c = 0;
1381  if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1382  d = ((unsigned char)*s++ - ' ') & 077;
1383  else
1384  d = 0;
1385  hunk[0] = (char)(a << 2 | b >> 4);
1386  hunk[1] = (char)(b << 4 | c >> 2);
1387  hunk[2] = (char)(c << 6 | d);
1388  memcpy(ptr, hunk, mlen);
1389  ptr += mlen;
1390  len -= mlen;
1391  }
1392  if (s < send && (unsigned char)*s != '\r' && *s != '\n')
1393  s++; /* possible checksum byte */
1394  if (s < send && *s == '\r') s++;
1395  if (s < send && *s == '\n') s++;
1396  }
1397 
1398  rb_str_set_len(buf, total);
1399  UNPACK_PUSH(buf);
1400  }
1401  break;
1402 
1403  case 'm':
1404  {
1405  VALUE buf = rb_str_new(0, (send - s + 3)*3/4); /* +3 is for skipping paddings */
1406  char *ptr = RSTRING_PTR(buf);
1407  int a = -1,b = -1,c = 0,d = 0;
1408  static signed char b64_xtable[256];
1409 
1410  if (b64_xtable['/'] <= 0) {
1411  int i;
1412 
1413  for (i = 0; i < 256; i++) {
1414  b64_xtable[i] = -1;
1415  }
1416  for (i = 0; i < 64; i++) {
1417  b64_xtable[(unsigned char)b64_table[i]] = (char)i;
1418  }
1419  }
1420  if (len == 0) {
1421  while (s < send) {
1422  a = b = c = d = -1;
1423  a = b64_xtable[(unsigned char)*s++];
1424  if (s >= send || a == -1) rb_raise(rb_eArgError, "invalid base64");
1425  b = b64_xtable[(unsigned char)*s++];
1426  if (s >= send || b == -1) rb_raise(rb_eArgError, "invalid base64");
1427  if (*s == '=') {
1428  if (s + 2 == send && *(s + 1) == '=') break;
1429  rb_raise(rb_eArgError, "invalid base64");
1430  }
1431  c = b64_xtable[(unsigned char)*s++];
1432  if (s >= send || c == -1) rb_raise(rb_eArgError, "invalid base64");
1433  if (s + 1 == send && *s == '=') break;
1434  d = b64_xtable[(unsigned char)*s++];
1435  if (d == -1) rb_raise(rb_eArgError, "invalid base64");
1436  *ptr++ = castchar(a << 2 | b >> 4);
1437  *ptr++ = castchar(b << 4 | c >> 2);
1438  *ptr++ = castchar(c << 6 | d);
1439  }
1440  if (c == -1) {
1441  *ptr++ = castchar(a << 2 | b >> 4);
1442  if (b & 0xf) rb_raise(rb_eArgError, "invalid base64");
1443  }
1444  else if (d == -1) {
1445  *ptr++ = castchar(a << 2 | b >> 4);
1446  *ptr++ = castchar(b << 4 | c >> 2);
1447  if (c & 0x3) rb_raise(rb_eArgError, "invalid base64");
1448  }
1449  }
1450  else {
1451  while (s < send) {
1452  a = b = c = d = -1;
1453  while ((a = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;}
1454  if (s >= send) break;
1455  s++;
1456  while ((b = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;}
1457  if (s >= send) break;
1458  s++;
1459  while ((c = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;}
1460  if (*s == '=' || s >= send) break;
1461  s++;
1462  while ((d = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;}
1463  if (*s == '=' || s >= send) break;
1464  s++;
1465  *ptr++ = castchar(a << 2 | b >> 4);
1466  *ptr++ = castchar(b << 4 | c >> 2);
1467  *ptr++ = castchar(c << 6 | d);
1468  a = -1;
1469  }
1470  if (a != -1 && b != -1) {
1471  if (c == -1)
1472  *ptr++ = castchar(a << 2 | b >> 4);
1473  else {
1474  *ptr++ = castchar(a << 2 | b >> 4);
1475  *ptr++ = castchar(b << 4 | c >> 2);
1476  }
1477  }
1478  }
1480  UNPACK_PUSH(buf);
1481  }
1482  break;
1483 
1484  case 'M':
1485  {
1486  VALUE buf = rb_str_new(0, send - s);
1487  char *ptr = RSTRING_PTR(buf), *ss = s;
1488  int csum = 0;
1489  int c1, c2;
1490 
1491  while (s < send) {
1492  if (*s == '=') {
1493  if (++s == send) break;
1494  if (s+1 < send && *s == '\r' && *(s+1) == '\n')
1495  s++;
1496  if (*s != '\n') {
1497  if ((c1 = hex2num(*s)) == -1) break;
1498  if (++s == send) break;
1499  if ((c2 = hex2num(*s)) == -1) break;
1500  csum |= *ptr++ = castchar(c1 << 4 | c2);
1501  }
1502  }
1503  else {
1504  csum |= *ptr++ = *s;
1505  }
1506  s++;
1507  ss = s;
1508  }
1510  rb_str_buf_cat(buf, ss, send-ss);
1513  UNPACK_PUSH(buf);
1514  }
1515  break;
1516 
1517  case '@':
1518  if (len > RSTRING_LEN(str))
1519  rb_raise(rb_eArgError, "@ outside of string");
1520  s = RSTRING_PTR(str) + len;
1521  break;
1522 
1523  case 'X':
1524  if (len > s - RSTRING_PTR(str))
1525  rb_raise(rb_eArgError, "X outside of string");
1526  s -= len;
1527  break;
1528 
1529  case 'x':
1530  if (len > send - s)
1531  rb_raise(rb_eArgError, "x outside of string");
1532  s += len;
1533  break;
1534 
1535  case 'P':
1536  if (sizeof(char *) <= (size_t)(send - s)) {
1537  VALUE tmp = Qnil;
1538  char *t;
1539 
1540  memcpy(&t, s, sizeof(char *));
1541  s += sizeof(char *);
1542 
1543  if (t) {
1544  VALUE a;
1545  const VALUE *p, *pend;
1546 
1547  if (!(a = str_associated(str))) {
1548  rb_raise(rb_eArgError, "no associated pointer");
1549  }
1550  p = RARRAY_CONST_PTR(a);
1551  pend = p + RARRAY_LEN(a);
1552  while (p < pend) {
1553  if (RB_TYPE_P(*p, T_STRING) && RSTRING_PTR(*p) == t) {
1554  if (len < RSTRING_LEN(*p)) {
1555  tmp = rb_str_new(t, len);
1556  str_associate(tmp, a);
1557  }
1558  else {
1559  tmp = *p;
1560  }
1561  break;
1562  }
1563  p++;
1564  }
1565  if (p == pend) {
1566  rb_raise(rb_eArgError, "non associated pointer");
1567  }
1568  }
1569  UNPACK_PUSH(tmp);
1570  }
1571  break;
1572 
1573  case 'p':
1574  if (len > (long)((send - s) / sizeof(char *)))
1575  len = (send - s) / sizeof(char *);
1576  while (len-- > 0) {
1577  if ((size_t)(send - s) < sizeof(char *))
1578  break;
1579  else {
1580  VALUE tmp = Qnil;
1581  char *t;
1582 
1583  memcpy(&t, s, sizeof(char *));
1584  s += sizeof(char *);
1585 
1586  if (t) {
1587  VALUE a;
1588  const VALUE *p, *pend;
1589 
1590  if (!(a = str_associated(str))) {
1591  rb_raise(rb_eArgError, "no associated pointer");
1592  }
1593  p = RARRAY_CONST_PTR(a);
1594  pend = p + RARRAY_LEN(a);
1595  while (p < pend) {
1596  if (RB_TYPE_P(*p, T_STRING) && RSTRING_PTR(*p) == t) {
1597  tmp = *p;
1598  break;
1599  }
1600  p++;
1601  }
1602  if (p == pend) {
1603  rb_raise(rb_eArgError, "non associated pointer");
1604  }
1605  }
1606  UNPACK_PUSH(tmp);
1607  }
1608  }
1609  break;
1610 
1611  case 'w':
1612  {
1613  char *s0 = s;
1614  while (len > 0 && s < send) {
1615  if (*s & 0x80) {
1616  s++;
1617  }
1618  else {
1619  s++;
1621  len--;
1622  s0 = s;
1623  }
1624  }
1625  }
1626  break;
1627 
1628  default:
1629  unknown_directive("unpack", type, fmt);
1630  break;
1631  }
1632  }
1633 
1634  return ary;
1635 }
1636 
1637 static VALUE
1638 pack_unpack(rb_execution_context_t *ec, VALUE str, VALUE fmt)
1639 {
1640  int mode = rb_block_given_p() ? UNPACK_BLOCK : UNPACK_ARRAY;
1641  return pack_unpack_internal(str, fmt, mode);
1642 }
1643 
1644 static VALUE
1645 pack_unpack1(rb_execution_context_t *ec, VALUE str, VALUE fmt)
1646 {
1647  return pack_unpack_internal(str, fmt, UNPACK_1);
1648 }
1649 
1650 int
1651 rb_uv_to_utf8(char buf[6], unsigned long uv)
1652 {
1653  if (uv <= 0x7f) {
1654  buf[0] = (char)uv;
1655  return 1;
1656  }
1657  if (uv <= 0x7ff) {
1658  buf[0] = castchar(((uv>>6)&0xff)|0xc0);
1659  buf[1] = castchar((uv&0x3f)|0x80);
1660  return 2;
1661  }
1662  if (uv <= 0xffff) {
1663  buf[0] = castchar(((uv>>12)&0xff)|0xe0);
1664  buf[1] = castchar(((uv>>6)&0x3f)|0x80);
1665  buf[2] = castchar((uv&0x3f)|0x80);
1666  return 3;
1667  }
1668  if (uv <= 0x1fffff) {
1669  buf[0] = castchar(((uv>>18)&0xff)|0xf0);
1670  buf[1] = castchar(((uv>>12)&0x3f)|0x80);
1671  buf[2] = castchar(((uv>>6)&0x3f)|0x80);
1672  buf[3] = castchar((uv&0x3f)|0x80);
1673  return 4;
1674  }
1675  if (uv <= 0x3ffffff) {
1676  buf[0] = castchar(((uv>>24)&0xff)|0xf8);
1677  buf[1] = castchar(((uv>>18)&0x3f)|0x80);
1678  buf[2] = castchar(((uv>>12)&0x3f)|0x80);
1679  buf[3] = castchar(((uv>>6)&0x3f)|0x80);
1680  buf[4] = castchar((uv&0x3f)|0x80);
1681  return 5;
1682  }
1683  if (uv <= 0x7fffffff) {
1684  buf[0] = castchar(((uv>>30)&0xff)|0xfc);
1685  buf[1] = castchar(((uv>>24)&0x3f)|0x80);
1686  buf[2] = castchar(((uv>>18)&0x3f)|0x80);
1687  buf[3] = castchar(((uv>>12)&0x3f)|0x80);
1688  buf[4] = castchar(((uv>>6)&0x3f)|0x80);
1689  buf[5] = castchar((uv&0x3f)|0x80);
1690  return 6;
1691  }
1692  rb_raise(rb_eRangeError, "pack(U): value out of range");
1693 
1695 }
1696 
1697 static const unsigned long utf8_limits[] = {
1698  0x0, /* 1 */
1699  0x80, /* 2 */
1700  0x800, /* 3 */
1701  0x10000, /* 4 */
1702  0x200000, /* 5 */
1703  0x4000000, /* 6 */
1704  0x80000000, /* 7 */
1705 };
1706 
1707 static unsigned long
1708 utf8_to_uv(const char *p, long *lenp)
1709 {
1710  int c = *p++ & 0xff;
1711  unsigned long uv = c;
1712  long n;
1713 
1714  if (!(uv & 0x80)) {
1715  *lenp = 1;
1716  return uv;
1717  }
1718  if (!(uv & 0x40)) {
1719  *lenp = 1;
1720  rb_raise(rb_eArgError, "malformed UTF-8 character");
1721  }
1722 
1723  if (!(uv & 0x20)) { n = 2; uv &= 0x1f; }
1724  else if (!(uv & 0x10)) { n = 3; uv &= 0x0f; }
1725  else if (!(uv & 0x08)) { n = 4; uv &= 0x07; }
1726  else if (!(uv & 0x04)) { n = 5; uv &= 0x03; }
1727  else if (!(uv & 0x02)) { n = 6; uv &= 0x01; }
1728  else {
1729  *lenp = 1;
1730  rb_raise(rb_eArgError, "malformed UTF-8 character");
1731  }
1732  if (n > *lenp) {
1733  rb_raise(rb_eArgError, "malformed UTF-8 character (expected %ld bytes, given %ld bytes)",
1734  n, *lenp);
1735  }
1736  *lenp = n--;
1737  if (n != 0) {
1738  while (n--) {
1739  c = *p++ & 0xff;
1740  if ((c & 0xc0) != 0x80) {
1741  *lenp -= n + 1;
1742  rb_raise(rb_eArgError, "malformed UTF-8 character");
1743  }
1744  else {
1745  c &= 0x3f;
1746  uv = uv << 6 | c;
1747  }
1748  }
1749  }
1750  n = *lenp - 1;
1751  if (uv < utf8_limits[n]) {
1752  rb_raise(rb_eArgError, "redundant UTF-8 sequence");
1753  }
1754  return uv;
1755 }
1756 
1757 #include "pack.rbinc"
1758 
1759 void
1761 {
1762  load_pack();
1763 
1764  id_associated = rb_make_internal_id();
1765 }
DOUBLE_SWAPPER
Definition: pack.c:79
UNPACK_ARRAY
#define UNPACK_ARRAY
Definition: pack.c:921
HTOND
#define HTOND(x)
Definition: pack.c:103
i
uint32_t i
Definition: rb_mjit_min_header-2.7.1.h:5425
ISASCII
#define ISASCII(c)
Definition: ruby.h:2304
ID
unsigned long ID
Definition: ruby.h:103
FLOAT_SWAPPER
Definition: pack.c:74
obj
const VALUE VALUE obj
Definition: rb_mjit_min_header-2.7.1.h:5703
FLOAT_SWAPPER::u
uint32_t u
Definition: pack.c:76
double
double
Definition: rb_mjit_min_header-2.7.1.h:5884
ENC_CODERANGE_VALID
#define ENC_CODERANGE_VALID
Definition: encoding.h:105
rb_str_buf_new
VALUE rb_str_buf_new(long)
Definition: string.c:1315
rb_block_given_p
int rb_block_given_p(void)
Determines if the current method is given a block.
Definition: eval.c:898
ISDIGIT
#define ISDIGIT(c)
Definition: ruby.h:2312
rb_warning
void rb_warning(const char *fmt,...)
Definition: error.c:334
hexdigits
#define hexdigits
NATINT_LEN_Q
#define NATINT_LEN_Q
Definition: pack.c:39
rb_make_internal_id
ID rb_make_internal_id(void)
Definition: symbol.c:810
n
const char size_t n
Definition: rb_mjit_min_header-2.7.1.h:5417
strchr
char * strchr(char *, char)
RSTRING_PTR
#define RSTRING_PTR(str)
Definition: ruby.h:1009
NUM2LONG
#define NUM2LONG(x)
Definition: ruby.h:679
INFINITY
#define INFINITY
Definition: missing.h:149
rb_utf8_encindex
int rb_utf8_encindex(void)
Definition: encoding.c:1334
VALUE
unsigned long VALUE
Definition: ruby.h:102
rb_obj_as_string
VALUE rb_obj_as_string(VALUE)
Definition: string.c:1440
rb_eArgError
VALUE rb_eArgError
Definition: error.c:923
encoding.h
RB_TYPE_P
#define RB_TYPE_P(obj, type)
Definition: ruby.h:560
fmt
const VALUE int int int int int int VALUE char * fmt
Definition: rb_mjit_min_header-2.7.1.h:6423
SIGNED_VALUE
#define SIGNED_VALUE
Definition: ruby.h:104
INTEGER_PACK_LITTLE_ENDIAN
#define INTEGER_PACK_LITTLE_ENDIAN
Definition: intern.h:162
uint64_t
unsigned long long uint64_t
Definition: sha2.h:102
rb_uv_to_utf8
int rb_uv_to_utf8(char buf[6], unsigned long uv)
Definition: pack.c:1651
HTONF
#define HTONF(x)
Definition: pack.c:97
ISALPHA
#define ISALPHA(c)
Definition: ruby.h:2311
VTOHD
#define VTOHD(x)
Definition: pack.c:106
ptr
struct RIMemo * ptr
Definition: debug.c:74
rb_str_new
#define rb_str_new(str, len)
Definition: rb_mjit_min_header-2.7.1.h:6077
NAN
#define NAN
Definition: missing.h:156
Qfalse
#define Qfalse
Definition: ruby.h:467
uintptr_t
unsigned int uintptr_t
Definition: win32.h:106
DBL2NUM
#define DBL2NUM(dbl)
Definition: ruby.h:967
rb_ivar_lookup
VALUE rb_ivar_lookup(VALUE obj, ID id, VALUE undef)
Definition: variable.c:1035
NULL
#define NULL
Definition: _sdbm.c:101
char
#define char
Definition: rb_mjit_min_header-2.7.1.h:2844
uint32_t
unsigned int uint32_t
Definition: sha2.h:101
PRIsVALUE
#define PRIsVALUE
Definition: ruby.h:166
DOUBLE_CONVWITH
#define DOUBLE_CONVWITH(x)
Definition: pack.c:102
rb_ascii8bit_encindex
int rb_ascii8bit_encindex(void)
Definition: encoding.c:1322
INTEGER_PACK_BIG_ENDIAN
#define INTEGER_PACK_BIG_ENDIAN
Definition: intern.h:165
add
#define add(x, y)
Definition: date_strftime.c:23
DOUBLE_SWAPPER::u
uint64_t u
Definition: pack.c:81
THISFROM
#define THISFROM
rb_raise
void rb_raise(VALUE exc, const char *fmt,...)
Definition: error.c:2669
rb_usascii_str_new
#define rb_usascii_str_new(str, len)
Definition: rb_mjit_min_header-2.7.1.h:6079
rb_eRangeError
VALUE rb_eRangeError
Definition: error.c:926
rb_to_float
VALUE rb_to_float(VALUE)
Converts a Numeric object into Float.
Definition: object.c:3542
VTOHF
#define VTOHF(x)
Definition: pack.c:100
rb_integer_pack
int rb_integer_pack(VALUE val, void *words, size_t numwords, size_t wordsize, size_t nails, int flags)
Definition: bignum.c:3547
ULONG2NUM
#define ULONG2NUM(x)
Definition: ruby.h:1645
ruby_digit36_to_number_table
const RUBY_EXTERN signed char ruby_digit36_to_number_table[]
Definition: escape.c:6
UNPACK_1
#define UNPACK_1
Definition: pack.c:923
NTOHF
#define NTOHF(x)
Definition: pack.c:99
castchar
#define castchar(from)
UNPACK_BLOCK
#define UNPACK_BLOCK
Definition: pack.c:922
Init_pack
void Init_pack(void)
Definition: pack.c:1760
rb_ary_push
VALUE rb_ary_push(VALUE ary, VALUE item)
Definition: array.c:1195
NEXTFROM
#define NEXTFROM
PACK_ITEM_ADJUST
#define PACK_ITEM_ADJUST()
Definition: pack.c:905
isnan
#define isnan(x)
Definition: win32.h:369
rb_eTypeError
VALUE rb_eTypeError
Definition: error.c:922
rb_integer_unpack
VALUE rb_integer_unpack(const void *words, size_t numwords, size_t wordsize, size_t nails, int flags)
Definition: bignum.c:3633
NTOHD
#define NTOHD(x)
Definition: pack.c:105
ENCODING_CODERANGE_SET
#define ENCODING_CODERANGE_SET(obj, encindex, cr)
Definition: encoding.h:113
rb_eRuntimeError
VALUE rb_eRuntimeError
Definition: error.c:920
UNPACK_PUSH
#define UNPACK_PUSH(item)
PACK_LENGTH_ADJUST_SIZE
#define PACK_LENGTH_ADJUST_SIZE(sz)
Definition: pack.c:895
rb_enc_set_index
void rb_enc_set_index(VALUE obj, int idx)
Definition: encoding.c:830
StringValuePtr
#define StringValuePtr(v)
Definition: ruby.h:603
rb_str_set_len
void rb_str_set_len(VALUE, long)
Definition: string.c:2692
AVOID_CC_BUG
#define AVOID_CC_BUG
Definition: pack.c:917
STRTOUL
#define STRTOUL(str, endptr, base)
Definition: ruby.h:2327
rb_to_int
VALUE rb_to_int(VALUE)
Converts val into Integer.
Definition: object.c:3021
ISSPACE
#define ISSPACE(c)
Definition: ruby.h:2307
le
#define le(x, y)
Definition: time.c:85
EOF
#define EOF
Definition: vsnprintf.c:203
RARRAY_CONST_PTR
#define RARRAY_CONST_PTR(a)
Definition: ruby.h:1072
RARRAY_LEN
#define RARRAY_LEN(a)
Definition: ruby.h:1070
HTOVF
#define HTOVF(x)
Definition: pack.c:98
buf
unsigned char buf[MIME_BUF_SIZE]
Definition: nkf.c:4322
rb_usascii_encindex
int rb_usascii_encindex(void)
Definition: encoding.c:1346
ISPRINT
#define ISPRINT(c)
Definition: ruby.h:2305
rb_bug
void rb_bug(const char *fmt,...)
Definition: error.c:634
StringValue
use StringValue() instead")))
internal.h
f
#define f
rb_absint_numwords
size_t rb_absint_numwords(VALUE val, size_t word_numbits, size_t *nlz_bits_ret)
Definition: bignum.c:3382
rb_str_subseq
VALUE rb_str_subseq(VALUE, long, long)
Definition: string.c:2474
str
char str[HTML_ESCAPE_MAX_LEN+1]
Definition: escape.c:18
ENC_CODERANGE_7BIT
#define ENC_CODERANGE_7BIT
Definition: encoding.h:104
int
__inline__ int
Definition: rb_mjit_min_header-2.7.1.h:2807
NIL_P
#define NIL_P(v)
Definition: ruby.h:482
memcpy
void * memcpy(void *__restrict, const void *__restrict, size_t)
snprintf
int snprintf(char *__restrict, size_t, const char *__restrict,...) __attribute__((__format__(__printf__
FLOAT_SWAPPER::f
float f
Definition: pack.c:75
BIGENDIAN_P
#define BIGENDIAN_P()
Definition: pack.c:65
rb_obj_classname
const char * rb_obj_classname(VALUE)
Definition: variable.c:289
HTOVD
#define HTOVD(x)
Definition: pack.c:104
RFLOAT_VALUE
#define RFLOAT_VALUE(v)
Definition: ruby.h:966
v
int VALUE v
Definition: rb_mjit_min_header-2.7.1.h:12257
FLOAT_CONVWITH
#define FLOAT_CONVWITH(x)
Definition: pack.c:96
NATINT_LEN
#define NATINT_LEN(type, len)
Definition: pack.c:69
errno
int errno
len
uint8_t len
Definition: escape.c:17
intptr_t
int intptr_t
Definition: win32.h:90
INTEGER_PACK_2COMP
#define INTEGER_PACK_2COMP
Definition: intern.h:156
rb_ivar_set
VALUE rb_ivar_set(VALUE, ID, VALUE)
Definition: variable.c:1300
T_STRING
#define T_STRING
Definition: ruby.h:528
DOUBLE_SWAPPER::d
double d
Definition: pack.c:80
MAX_INTEGER_PACK_SIZE
#define MAX_INTEGER_PACK_SIZE
Definition: pack.c:108
rb_ary_new
VALUE rb_ary_new(void)
Definition: array.c:723
builtin.h
Qnil
#define Qnil
Definition: ruby.h:469
rb_str_buf_cat
#define rb_str_buf_cat
Definition: intern.h:910
UNREACHABLE_RETURN
#define UNREACHABLE_RETURN(val)
Definition: ruby.h:59
RSTRING_LEN
#define RSTRING_LEN(str)
Definition: ruby.h:1005
ruby::backward::cxxanyargs::type
VALUE type(ANYARGS)
ANYARGS-ed function type.
Definition: cxxanyargs.hpp:39
rb_str_quote_unprintable
VALUE rb_str_quote_unprintable(VALUE)
Definition: string.c:10714
rb_execution_context_struct
Definition: vm_core.h:843