pack.c

Go to the documentation of this file.
00001 /**********************************************************************
00002 
00003   pack.c -
00004 
00005   $Author: yugui $
00006   created at: Thu Feb 10 15:17:05 JST 1994
00007 
00008   Copyright (C) 1993-2007 Yukihiro Matsumoto
00009 
00010 **********************************************************************/
00011 
00012 #include "ruby/ruby.h"
00013 #include "ruby/encoding.h"
00014 #include <sys/types.h>
00015 #include <ctype.h>
00016 #include <errno.h>
00017 
00018 #define GCC_VERSION_SINCE(major, minor, patchlevel) \
00019   (defined(__GNUC__) && !defined(__INTEL_COMPILER) && \
00020    ((__GNUC__ > (major)) ||  \
00021     (__GNUC__ == (major) && __GNUC_MINOR__ > (minor)) || \
00022     (__GNUC__ == (major) && __GNUC_MINOR__ == (minor) && __GNUC_PATCHLEVEL__ >= (patchlevel))))
00023 
00024 #define SIZE16 2
00025 #define SIZE32 4
00026 
00027 #if SIZEOF_SHORT != 2 || SIZEOF_LONG != 4
00028 # define NATINT_PACK
00029 #endif
00030 
00031 #ifdef DYNAMIC_ENDIAN
00032  /* for universal binary of NEXTSTEP and MacOS X */
00033  /* useless since autoconf 2.63? */
00034  static int
00035  is_bigendian(void)
00036  {
00037      static int init = 0;
00038      static int endian_value;
00039      char *p;
00040 
00041      if (init) return endian_value;
00042      init = 1;
00043      p = (char*)&init;
00044      return endian_value = p[0]?0:1;
00045  }
00046 # define BIGENDIAN_P() (is_bigendian())
00047 #elif defined(WORDS_BIGENDIAN)
00048 # define BIGENDIAN_P() 1
00049 #else
00050 # define BIGENDIAN_P() 0
00051 #endif
00052 
00053 #ifdef NATINT_PACK
00054 # define NATINT_LEN(type,len) (natint?(int)sizeof(type):(int)(len))
00055 #else
00056 # define NATINT_LEN(type,len) ((int)sizeof(type))
00057 #endif
00058 
00059 #if SIZEOF_LONG == 8
00060 # define INT64toNUM(x) LONG2NUM(x)
00061 # define UINT64toNUM(x) ULONG2NUM(x)
00062 #elif defined(HAVE_LONG_LONG) && SIZEOF_LONG_LONG == 8
00063 # define INT64toNUM(x) LL2NUM(x)
00064 # define UINT64toNUM(x) ULL2NUM(x)
00065 #endif
00066 
00067 #define define_swapx(x, xtype)          \
00068 static xtype                            \
00069 TOKEN_PASTE(swap,x)(xtype z)            \
00070 {                                       \
00071     xtype r;                            \
00072     xtype *zp;                          \
00073     unsigned char *s, *t;               \
00074     int i;                              \
00075                                         \
00076     zp = xmalloc(sizeof(xtype));        \
00077     *zp = z;                            \
00078     s = (unsigned char*)zp;             \
00079     t = xmalloc(sizeof(xtype));         \
00080     for (i=0; i<sizeof(xtype); i++) {   \
00081         t[sizeof(xtype)-i-1] = s[i];    \
00082     }                                   \
00083     r = *(xtype *)t;                    \
00084     xfree(t);                           \
00085     xfree(zp);                          \
00086     return r;                           \
00087 }
00088 
00089 #if GCC_VERSION_SINCE(4,3,0)
00090 # define swap32(x) __builtin_bswap32(x)
00091 # define swap64(x) __builtin_bswap64(x)
00092 #endif
00093 
00094 #ifndef swap16
00095 # define swap16(x)      ((((x)&0xFF)<<8) | (((x)>>8)&0xFF))
00096 #endif
00097 
00098 #ifndef swap32
00099 # define swap32(x)      ((((x)&0xFF)<<24)       \
00100                         |(((x)>>24)&0xFF)       \
00101                         |(((x)&0x0000FF00)<<8)  \
00102                         |(((x)&0x00FF0000)>>8)  )
00103 #endif
00104 
00105 #ifndef swap64
00106 # ifdef HAVE_INT64_T
00107 #  define byte_in_64bit(n) ((uint64_t)0xff << (n))
00108 #  define swap64(x)       ((((x)&byte_in_64bit(0))<<56)         \
00109                            |(((x)>>56)&0xFF)                    \
00110                            |(((x)&byte_in_64bit(8))<<40)        \
00111                            |(((x)&byte_in_64bit(48))>>40)       \
00112                            |(((x)&byte_in_64bit(16))<<24)       \
00113                            |(((x)&byte_in_64bit(40))>>24)       \
00114                            |(((x)&byte_in_64bit(24))<<8)        \
00115                            |(((x)&byte_in_64bit(32))>>8))
00116 # endif
00117 #endif
00118 
00119 #if SIZEOF_SHORT == 2
00120 # define swaps(x)       swap16(x)
00121 #elif SIZEOF_SHORT == 4
00122 # define swaps(x)       swap32(x)
00123 #else
00124   define_swapx(s,short)
00125 #endif
00126 
00127 #if SIZEOF_INT == 2
00128 # define swapi(x)       swap16(x)
00129 #elif SIZEOF_INT == 4
00130 # define swapi(x)       swap32(x)
00131 #else
00132   define_swapx(i,int)
00133 #endif
00134 
00135 #if SIZEOF_LONG == 4
00136 # define swapl(x)       swap32(x)
00137 #elif SIZEOF_LONG == 8
00138 # define swapl(x)        swap64(x)
00139 #else
00140   define_swapx(l,long)
00141 #endif
00142 
00143 #ifdef HAVE_LONG_LONG
00144 # if SIZEOF_LONG_LONG == 8
00145 #  define swapll(x)        swap64(x)
00146 # else
00147    define_swapx(ll,LONG_LONG)
00148 # endif
00149 #endif
00150 
00151 #if SIZEOF_FLOAT == 4
00152 # ifdef HAVE_UINT32_T
00153 #  define swapf(x)      swap32(x)
00154 #  define FLOAT_SWAPPER uint32_t
00155 # else  /* SIZEOF_FLOAT == 4 but undivide by known size of int */
00156    define_swapx(f,float)
00157 # endif
00158 #else   /* SIZEOF_FLOAT != 4 */
00159   define_swapx(f,float)
00160 #endif  /* #if SIZEOF_FLOAT == 4 */
00161 
00162 #if SIZEOF_DOUBLE == 8
00163 # ifdef HAVE_UINT64_T   /* SIZEOF_DOUBLE == 8 == SIZEOF_UINT64_T */
00164 #  define swapd(x)      swap64(x)
00165 #  define DOUBLE_SWAPPER        uint64_t
00166 # else
00167 #  if SIZEOF_LONG == 4  /* SIZEOF_DOUBLE == 8 && 4 == SIZEOF_LONG */
00168     static double
00169     swapd(const double d)
00170     {
00171         double dtmp = d;
00172         unsigned long utmp[2];
00173         unsigned long utmp0;
00174 
00175         utmp[0] = 0; utmp[1] = 0;
00176         memcpy(utmp,&dtmp,sizeof(double));
00177         utmp0 = utmp[0];
00178         utmp[0] = swapl(utmp[1]);
00179         utmp[1] = swapl(utmp0);
00180         memcpy(&dtmp,utmp,sizeof(double));
00181         return dtmp;
00182     }
00183 #  elif SIZEOF_SHORT == 4       /* SIZEOF_DOUBLE == 8 && 4 == SIZEOF_SHORT */
00184     static double
00185     swapd(const double d)
00186     {
00187         double dtmp = d;
00188         unsigned short utmp[2];
00189         unsigned short utmp0;
00190 
00191         utmp[0] = 0; utmp[1] = 0;
00192         memcpy(utmp,&dtmp,sizeof(double));
00193         utmp0 = utmp[0];
00194         utmp[0] = swaps(utmp[1]);
00195         utmp[1] = swaps(utmp0);
00196         memcpy(&dtmp,utmp,sizeof(double));
00197         return dtmp;
00198     }
00199 #  else /* SIZEOF_DOUBLE == 8 but undivide by known size of int */
00200     define_swapx(d, double)
00201 #  endif
00202 # endif /* #if SIZEOF_LONG == 8 */
00203 #else   /* SIZEOF_DOUBLE != 8 */
00204   define_swapx(d, double)
00205 #endif  /* #if SIZEOF_DOUBLE == 8 */
00206 
00207 #undef define_swapx
00208 
00209 #define rb_ntohf(x) (BIGENDIAN_P()?(x):swapf(x))
00210 #define rb_ntohd(x) (BIGENDIAN_P()?(x):swapd(x))
00211 #define rb_htonf(x) (BIGENDIAN_P()?(x):swapf(x))
00212 #define rb_htond(x) (BIGENDIAN_P()?(x):swapd(x))
00213 #define rb_htovf(x) (BIGENDIAN_P()?swapf(x):(x))
00214 #define rb_htovd(x) (BIGENDIAN_P()?swapd(x):(x))
00215 #define rb_vtohf(x) (BIGENDIAN_P()?swapf(x):(x))
00216 #define rb_vtohd(x) (BIGENDIAN_P()?swapd(x):(x))
00217 
00218 #ifdef FLOAT_SWAPPER
00219 # define FLOAT_CONVWITH(y)      FLOAT_SWAPPER y;
00220 # define HTONF(x,y)     (memcpy(&y,&x,sizeof(float)),   \
00221                          y = rb_htonf((FLOAT_SWAPPER)y),        \
00222                          memcpy(&x,&y,sizeof(float)),   \
00223                          x)
00224 # define HTOVF(x,y)     (memcpy(&y,&x,sizeof(float)),   \
00225                          y = rb_htovf((FLOAT_SWAPPER)y),        \
00226                          memcpy(&x,&y,sizeof(float)),   \
00227                          x)
00228 # define NTOHF(x,y)     (memcpy(&y,&x,sizeof(float)),   \
00229                          y = rb_ntohf((FLOAT_SWAPPER)y),        \
00230                          memcpy(&x,&y,sizeof(float)),   \
00231                          x)
00232 # define VTOHF(x,y)     (memcpy(&y,&x,sizeof(float)),   \
00233                          y = rb_vtohf((FLOAT_SWAPPER)y),        \
00234                          memcpy(&x,&y,sizeof(float)),   \
00235                          x)
00236 #else
00237 # define FLOAT_CONVWITH(y)
00238 # define HTONF(x,y)     rb_htonf(x)
00239 # define HTOVF(x,y)     rb_htovf(x)
00240 # define NTOHF(x,y)     rb_ntohf(x)
00241 # define VTOHF(x,y)     rb_vtohf(x)
00242 #endif
00243 
00244 #ifdef DOUBLE_SWAPPER
00245 # define DOUBLE_CONVWITH(y)     DOUBLE_SWAPPER y;
00246 # define HTOND(x,y)     (memcpy(&y,&x,sizeof(double)),  \
00247                          y = rb_htond((DOUBLE_SWAPPER)y),       \
00248                          memcpy(&x,&y,sizeof(double)),  \
00249                          x)
00250 # define HTOVD(x,y)     (memcpy(&y,&x,sizeof(double)),  \
00251                          y = rb_htovd((DOUBLE_SWAPPER)y),       \
00252                          memcpy(&x,&y,sizeof(double)),  \
00253                          x)
00254 # define NTOHD(x,y)     (memcpy(&y,&x,sizeof(double)),  \
00255                          y = rb_ntohd((DOUBLE_SWAPPER)y),       \
00256                          memcpy(&x,&y,sizeof(double)),  \
00257                          x)
00258 # define VTOHD(x,y)     (memcpy(&y,&x,sizeof(double)),  \
00259                          y = rb_vtohd((DOUBLE_SWAPPER)y),       \
00260                          memcpy(&x,&y,sizeof(double)),  \
00261                          x)
00262 #else
00263 # define DOUBLE_CONVWITH(y)
00264 # define HTOND(x,y)     rb_htond(x)
00265 # define HTOVD(x,y)     rb_htovd(x)
00266 # define NTOHD(x,y)     rb_ntohd(x)
00267 # define VTOHD(x,y)     rb_vtohd(x)
00268 #endif
00269 
00270 VALUE rb_big2ulong_pack(VALUE x);
00271 
00272 static unsigned long
00273 num2i32(VALUE x)
00274 {
00275     x = rb_to_int(x); /* is nil OK? (should not) */
00276 
00277     if (FIXNUM_P(x)) return FIX2LONG(x);
00278     if (TYPE(x) == T_BIGNUM) {
00279         return rb_big2ulong_pack(x);
00280     }
00281     rb_raise(rb_eTypeError, "can't convert %s to `integer'", rb_obj_classname(x));
00282     return 0;                   /* not reached */
00283 }
00284 
00285 #define QUAD_SIZE 8
00286 #define MAX_INTEGER_PACK_SIZE 8
00287 /* #define FORCE_BIG_PACK */
00288 
00289 static const char toofew[] = "too few arguments";
00290 
00291 static void encodes(VALUE,const char*,long,int,int);
00292 static void qpencode(VALUE,VALUE,long);
00293 
00294 static unsigned long utf8_to_uv(const char*,long*);
00295 
00296 /*
00297  *  call-seq:
00298  *     arr.pack ( aTemplateString ) -> aBinaryString
00299  *
00300  *  Packs the contents of <i>arr</i> into a binary sequence according to
00301  *  the directives in <i>aTemplateString</i> (see the table below)
00302  *  Directives ``A,'' ``a,'' and ``Z'' may be followed by a count,
00303  *  which gives the width of the resulting field. The remaining
00304  *  directives also may take a count, indicating the number of array
00305  *  elements to convert. If the count is an asterisk
00306  *  (``<code>*</code>''), all remaining array elements will be
00307  *  converted. Any of the directives ``<code>sSiIlL</code>'' may be
00308  *  followed by an underscore (``<code>_</code>'') or
00309  *  exclamation mark (``<code>!</code>'') to use the underlying
00310  *  platform's native size for the specified type; otherwise, they use a
00311  *  platform-independent size. Spaces are ignored in the template
00312  *  string. See also <code>String#unpack</code>.
00313  *
00314  *     a = [ "a", "b", "c" ]
00315  *     n = [ 65, 66, 67 ]
00316  *     a.pack("A3A3A3")   #=> "a  b  c  "
00317  *     a.pack("a3a3a3")   #=> "a\000\000b\000\000c\000\000"
00318  *     n.pack("ccc")      #=> "ABC"
00319  *
00320  *  Directives for +pack+.
00321  *
00322  *   Integer      | Array   |
00323  *   Directive    | Element | Meaning
00324  *   ---------------------------------------------------------------------------
00325  *      C         | Integer | 8-bit unsigned integer (unsigned char)
00326  *      S         | Integer | 16-bit unsigned integer, native endian (uint16_t)
00327  *      L         | Integer | 32-bit unsigned integer, native endian (uint32_t)
00328  *      Q         | Integer | 64-bit unsigned integer, native endian (uint64_t)
00329  *                |         |
00330  *      c         | Integer | 8-bit signed integer (char)
00331  *      s         | Integer | 16-bit signed integer, native endian (int16_t)
00332  *      l         | Integer | 32-bit signed integer, native endian (int32_t)
00333  *      q         | Integer | 64-bit signed integer, native endian (int64_t)
00334  *                |         |
00335  *      S_, S!    | Integer | unsigned short, native endian
00336  *      I, I_, I! | Integer | unsigned int, native endian
00337  *      L_, L!    | Integer | unsigned long, native endian
00338  *                |         |
00339  *      s_, s!    | Integer | signed short, native endian
00340  *      i, i_, i! | Integer | signed int, native endian
00341  *      l_, l!    | Integer | signed long, native endian
00342  *                |         |
00343  *      n         | Integer | 16-bit unsigned integer, network (big-endian) byte order
00344  *      N         | Integer | 32-bit unsigned integer, network (big-endian) byte order
00345  *      v         | Integer | 16-bit unsigned integer, VAX (little-endian) byte order
00346  *      V         | Integer | 32-bit unsigned integer, VAX (little-endian) byte order
00347  *                |         |
00348  *      U         | Integer | UTF-8 character
00349  *      w         | Integer | BER-compressed integer
00350  *                
00351  *   Float        |         |
00352  *   Directive    |         | Meaning
00353  *   ---------------------------------------------------------------------------
00354  *      D, d      | Float   | double-precision float, native format
00355  *      F, f      | Float   | single-precision float, native format
00356  *      E         | Float   | double-precision float, little-endian byte order
00357  *      e         | Float   | single-precision float, little-endian byte order
00358  *      G         | Float   | double-precision float, network (big-endian) byte order
00359  *      g         | Float   | single-precision float, network (big-endian) byte order
00360  *                
00361  *   String       |         |
00362  *   Directive    |         | Meaning
00363  *   ---------------------------------------------------------------------------
00364  *      A         | String  | arbitrary binary string (space padded, count is width)
00365  *      a         | String  | arbitrary binary string (null padded, count is width)
00366  *      Z         | String  | same as ``a'', except that null is added with *
00367  *      B         | String  | bit string (MSB first)
00368  *      b         | String  | bit string (LSB first)
00369  *      H         | String  | hex string (high nibble first)
00370  *      h         | String  | hex string (low nibble first)
00371  *      u         | String  | UU-encoded string
00372  *      M         | String  | quoted printable, MIME encoding (see RFC2045)
00373  *      m         | String  | base64 encoded string (see RFC 2045, count is width)
00374  *                |         | (if count is 0, no line feed are added, see RFC 4648)
00375  *      P         | String  | pointer to a structure (fixed-length string)
00376  *      p         | String  | pointer to a null-terminated string
00377  *                
00378  *   Misc.        |         |
00379  *   Directive    |         | Meaning
00380  *   ---------------------------------------------------------------------------
00381  *      @         | ---     | moves to absolute position
00382  *      X         | ---     | back up a byte
00383  *      x         | ---     | null byte
00384  */
00385 
00386 static VALUE
00387 pack_pack(VALUE ary, VALUE fmt)
00388 {
00389     static const char nul10[] = "\0\0\0\0\0\0\0\0\0\0";
00390     static const char spc10[] = "          ";
00391     const char *p, *pend;
00392     VALUE res, from, associates = 0;
00393     char type;
00394     long items, len, idx, plen;
00395     const char *ptr;
00396     int enc_info = 1;           /* 0 - BINARY, 1 - US-ASCII, 2 - UTF-8 */
00397 #ifdef NATINT_PACK
00398     int natint;         /* native integer */
00399 #endif
00400     int signed_p, integer_size, bigendian_p;
00401 
00402     StringValue(fmt);
00403     p = RSTRING_PTR(fmt);
00404     pend = p + RSTRING_LEN(fmt);
00405     res = rb_str_buf_new(0);
00406 
00407     items = RARRAY_LEN(ary);
00408     idx = 0;
00409 
00410 #define TOO_FEW (rb_raise(rb_eArgError, toofew), 0)
00411 #define THISFROM (items > 0 ? RARRAY_PTR(ary)[idx] : TOO_FEW)
00412 #define NEXTFROM (items-- > 0 ? RARRAY_PTR(ary)[idx++] : TOO_FEW)
00413 
00414     while (p < pend) {
00415         if (RSTRING_PTR(fmt) + RSTRING_LEN(fmt) != pend) {
00416             rb_raise(rb_eRuntimeError, "format string modified");
00417         }
00418         type = *p++;            /* get data type */
00419 #ifdef NATINT_PACK
00420         natint = 0;
00421 #endif
00422 
00423         if (ISSPACE(type)) continue;
00424         if (type == '#') {
00425             while ((p < pend) && (*p != '\n')) {
00426                 p++;
00427             }
00428             continue;
00429         }
00430         if (*p == '_' || *p == '!') {
00431             static const char natstr[] = "sSiIlL";
00432 
00433             if (strchr(natstr, type)) {
00434 #ifdef NATINT_PACK
00435                 natint = 1;
00436 #endif
00437                 p++;
00438             }
00439             else {
00440                 rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr);
00441             }
00442         }
00443         if (*p == '*') {        /* set data length */
00444             len = strchr("@Xxu", type) ? 0
00445                 : strchr("PMm", type) ? 1
00446                 : items;
00447             p++;
00448         }
00449         else if (ISDIGIT(*p)) {
00450             errno = 0;
00451             len = STRTOUL(p, (char**)&p, 10);
00452             if (errno) {
00453                 rb_raise(rb_eRangeError, "pack length too big");
00454             }
00455         }
00456         else {
00457             len = 1;
00458         }
00459 
00460         switch (type) {
00461           case 'U':
00462             /* if encoding is US-ASCII, upgrade to UTF-8 */
00463             if (enc_info == 1) enc_info = 2;
00464             break;
00465           case 'm': case 'M': case 'u':
00466             /* keep US-ASCII (do nothing) */
00467             break;
00468           default:
00469             /* fall back to BINARY */
00470             enc_info = 0;
00471             break;
00472         }
00473         switch (type) {
00474           case 'A': case 'a': case 'Z':
00475           case 'B': case 'b':
00476           case 'H': case 'h':
00477             from = NEXTFROM;
00478             if (NIL_P(from)) {
00479                 ptr = "";
00480                 plen = 0;
00481             }
00482             else {
00483                 StringValue(from);
00484                 ptr = RSTRING_PTR(from);
00485                 plen = RSTRING_LEN(from);
00486                 OBJ_INFECT(res, from);
00487             }
00488 
00489             if (p[-1] == '*')
00490                 len = plen;
00491 
00492             switch (type) {
00493               case 'a':         /* arbitrary binary string (null padded)  */
00494               case 'A':         /* arbitrary binary string (ASCII space padded) */
00495               case 'Z':         /* null terminated string  */
00496                 if (plen >= len) {
00497                     rb_str_buf_cat(res, ptr, len);
00498                     if (p[-1] == '*' && type == 'Z')
00499                         rb_str_buf_cat(res, nul10, 1);
00500                 }
00501                 else {
00502                     rb_str_buf_cat(res, ptr, plen);
00503                     len -= plen;
00504                     while (len >= 10) {
00505                         rb_str_buf_cat(res, (type == 'A')?spc10:nul10, 10);
00506                         len -= 10;
00507                     }
00508                     rb_str_buf_cat(res, (type == 'A')?spc10:nul10, len);
00509                 }
00510                 break;
00511 
00512               case 'b':         /* bit string (ascending) */
00513                 {
00514                     int byte = 0;
00515                     long i, j = 0;
00516 
00517                     if (len > plen) {
00518                         j = (len - plen + 1)/2;
00519                         len = plen;
00520                     }
00521                     for (i=0; i++ < len; ptr++) {
00522                         if (*ptr & 1)
00523                             byte |= 128;
00524                         if (i & 7)
00525                             byte >>= 1;
00526                         else {
00527                             char c = byte & 0xff;
00528                             rb_str_buf_cat(res, &c, 1);
00529                             byte = 0;
00530                         }
00531                     }
00532                     if (len & 7) {
00533                         char c;
00534                         byte >>= 7 - (len & 7);
00535                         c = byte & 0xff;
00536                         rb_str_buf_cat(res, &c, 1);
00537                     }
00538                     len = j;
00539                     goto grow;
00540                 }
00541                 break;
00542 
00543               case 'B':         /* bit string (descending) */
00544                 {
00545                     int byte = 0;
00546                     long i, j = 0;
00547 
00548                     if (len > plen) {
00549                         j = (len - plen + 1)/2;
00550                         len = plen;
00551                     }
00552                     for (i=0; i++ < len; ptr++) {
00553                         byte |= *ptr & 1;
00554                         if (i & 7)
00555                             byte <<= 1;
00556                         else {
00557                             char c = byte & 0xff;
00558                             rb_str_buf_cat(res, &c, 1);
00559                             byte = 0;
00560                         }
00561                     }
00562                     if (len & 7) {
00563                         char c;
00564                         byte <<= 7 - (len & 7);
00565                         c = byte & 0xff;
00566                         rb_str_buf_cat(res, &c, 1);
00567                     }
00568                     len = j;
00569                     goto grow;
00570                 }
00571                 break;
00572 
00573               case 'h':         /* hex string (low nibble first) */
00574                 {
00575                     int byte = 0;
00576                     long i, j = 0;
00577 
00578                     if (len > plen) {
00579                         j = (len + 1) / 2 - (plen + 1) / 2;
00580                         len = plen;
00581                     }
00582                     for (i=0; i++ < len; ptr++) {
00583                         if (ISALPHA(*ptr))
00584                             byte |= (((*ptr & 15) + 9) & 15) << 4;
00585                         else
00586                             byte |= (*ptr & 15) << 4;
00587                         if (i & 1)
00588                             byte >>= 4;
00589                         else {
00590                             char c = byte & 0xff;
00591                             rb_str_buf_cat(res, &c, 1);
00592                             byte = 0;
00593                         }
00594                     }
00595                     if (len & 1) {
00596                         char c = byte & 0xff;
00597                         rb_str_buf_cat(res, &c, 1);
00598                     }
00599                     len = j;
00600                     goto grow;
00601                 }
00602                 break;
00603 
00604               case 'H':         /* hex string (high nibble first) */
00605                 {
00606                     int byte = 0;
00607                     long i, j = 0;
00608 
00609                     if (len > plen) {
00610                         j = (len + 1) / 2 - (plen + 1) / 2;
00611                         len = plen;
00612                     }
00613                     for (i=0; i++ < len; ptr++) {
00614                         if (ISALPHA(*ptr))
00615                             byte |= ((*ptr & 15) + 9) & 15;
00616                         else
00617                             byte |= *ptr & 15;
00618                         if (i & 1)
00619                             byte <<= 4;
00620                         else {
00621                             char c = byte & 0xff;
00622                             rb_str_buf_cat(res, &c, 1);
00623                             byte = 0;
00624                         }
00625                     }
00626                     if (len & 1) {
00627                         char c = byte & 0xff;
00628                         rb_str_buf_cat(res, &c, 1);
00629                     }
00630                     len = j;
00631                     goto grow;
00632                 }
00633                 break;
00634             }
00635             break;
00636 
00637           case 'c':             /* signed char */
00638           case 'C':             /* unsigned char */
00639             while (len-- > 0) {
00640                 char c;
00641 
00642                 from = NEXTFROM;
00643                 c = (char)num2i32(from);
00644                 rb_str_buf_cat(res, &c, sizeof(char));
00645             }
00646             break;
00647 
00648           case 's':             /* signed short */
00649             signed_p = 1;
00650             integer_size = NATINT_LEN(short, 2);
00651             bigendian_p = BIGENDIAN_P();
00652             goto pack_integer;
00653 
00654           case 'S':             /* unsigned short */
00655             signed_p = 0;
00656             integer_size = NATINT_LEN(short, 2);
00657             bigendian_p = BIGENDIAN_P();
00658             goto pack_integer;
00659 
00660           case 'i':             /* signed int */
00661             signed_p = 1;
00662             integer_size = (int)sizeof(int);
00663             bigendian_p = BIGENDIAN_P();
00664             goto pack_integer;
00665 
00666           case 'I':             /* unsigned int */
00667             signed_p = 0;
00668             integer_size = (int)sizeof(int);
00669             bigendian_p = BIGENDIAN_P();
00670             goto pack_integer;
00671 
00672           case 'l':             /* signed long */
00673             signed_p = 1;
00674             integer_size = NATINT_LEN(long, 4);
00675             bigendian_p = BIGENDIAN_P();
00676             goto pack_integer;
00677 
00678           case 'L':             /* unsigned long */
00679             signed_p = 0;
00680             integer_size = NATINT_LEN(long, 4);
00681             bigendian_p = BIGENDIAN_P();
00682             goto pack_integer;
00683 
00684           case 'q':             /* signed quad (64bit) int */
00685             signed_p = 1;
00686             integer_size = 8;
00687             bigendian_p = BIGENDIAN_P();
00688             goto pack_integer;
00689 
00690           case 'Q':             /* unsigned quad (64bit) int */
00691             signed_p = 0;
00692             integer_size = 8;
00693             bigendian_p = BIGENDIAN_P();
00694             goto pack_integer;
00695 
00696           case 'n':             /* unsigned short (network byte-order)  */
00697             signed_p = 0;
00698             integer_size = 2;
00699             bigendian_p = 1;
00700             goto pack_integer;
00701 
00702           case 'N':             /* unsigned long (network byte-order) */
00703             signed_p = 0;
00704             integer_size = 4;
00705             bigendian_p = 1;
00706             goto pack_integer;
00707 
00708           case 'v':             /* unsigned short (VAX byte-order) */
00709             signed_p = 0;
00710             integer_size = 2;
00711             bigendian_p = 0;
00712             goto pack_integer;
00713 
00714           case 'V':             /* unsigned long (VAX byte-order) */
00715             signed_p = 0;
00716             integer_size = 4;
00717             bigendian_p = 0;
00718             goto pack_integer;
00719 
00720           pack_integer:
00721             switch (integer_size) {
00722 #if defined(HAVE_INT16_T) && !defined(FORCE_BIG_PACK)
00723               case SIZEOF_INT16_T:
00724                 while (len-- > 0) {
00725                     union {
00726                         int16_t i;
00727                         char a[sizeof(int16_t)];
00728                     } v;
00729 
00730                     from = NEXTFROM;
00731                     v.i = (int16_t)num2i32(from);
00732                     if (bigendian_p != BIGENDIAN_P()) v.i = swap16(v.i);
00733                     rb_str_buf_cat(res, v.a, sizeof(int16_t));
00734                 }
00735                 break;
00736 #endif
00737 
00738 #if defined(HAVE_INT32_T) && !defined(FORCE_BIG_PACK)
00739               case SIZEOF_INT32_T:
00740                 while (len-- > 0) {
00741                     union {
00742                         int32_t i;
00743                         char a[sizeof(int32_t)];
00744                     } v;
00745 
00746                     from = NEXTFROM;
00747                     v.i = (int32_t)num2i32(from);
00748                     if (bigendian_p != BIGENDIAN_P()) v.i = swap32(v.i);
00749                     rb_str_buf_cat(res, v.a, sizeof(int32_t));
00750                 }
00751                 break;
00752 #endif
00753 
00754 #if defined(HAVE_INT64_T) && SIZEOF_LONG == SIZEOF_INT64_T && !defined(FORCE_BIG_PACK)
00755               case SIZEOF_INT64_T:
00756                 while (len-- > 0) {
00757                     union {
00758                         int64_t i;
00759                         char a[sizeof(int64_t)];
00760                     } v;
00761 
00762                     from = NEXTFROM;
00763                     v.i = num2i32(from); /* can return 64bit value if SIZEOF_LONG == SIZEOF_INT64_T */
00764                     if (bigendian_p != BIGENDIAN_P()) v.i = swap64(v.i);
00765                     rb_str_buf_cat(res, v.a, sizeof(int64_t));
00766                 }
00767                 break;
00768 #endif
00769 
00770               default:
00771                 if (integer_size > MAX_INTEGER_PACK_SIZE)
00772                     rb_bug("unexpected intger size for pack: %d", integer_size);
00773                 while (len-- > 0) {
00774                     union {
00775                         unsigned long i[(MAX_INTEGER_PACK_SIZE+SIZEOF_LONG-1)/SIZEOF_LONG];
00776                         char a[(MAX_INTEGER_PACK_SIZE+SIZEOF_LONG-1)/SIZEOF_LONG*SIZEOF_LONG];
00777                     } v;
00778                     int num_longs = (integer_size+SIZEOF_LONG-1)/SIZEOF_LONG;
00779                     int i;
00780 
00781                     from = NEXTFROM;
00782                     rb_big_pack(from, v.i, num_longs);
00783                     if (bigendian_p) {
00784                         for (i = 0; i < num_longs/2; i++) {
00785                             unsigned long t = v.i[i];
00786                             v.i[i] = v.i[num_longs-1-i];
00787                             v.i[num_longs-1-i] = t;
00788                         }
00789                     }
00790                     if (bigendian_p != BIGENDIAN_P()) {
00791                         for (i = 0; i < num_longs; i++)
00792                             v.i[i] = swapl(v.i[i]);
00793                     }
00794                     rb_str_buf_cat(res,
00795                                    bigendian_p ?
00796                                      v.a + sizeof(long)*num_longs - integer_size :
00797                                      v.a,
00798                                    integer_size);
00799                 }
00800                 break;
00801             }
00802             break;
00803 
00804           case 'f':             /* single precision float in native format */
00805           case 'F':             /* ditto */
00806             while (len-- > 0) {
00807                 float f;
00808 
00809                 from = NEXTFROM;
00810                 f = (float)RFLOAT_VALUE(rb_to_float(from));
00811                 rb_str_buf_cat(res, (char*)&f, sizeof(float));
00812             }
00813             break;
00814 
00815           case 'e':             /* single precision float in VAX byte-order */
00816             while (len-- > 0) {
00817                 float f;
00818                 FLOAT_CONVWITH(ftmp);
00819 
00820                 from = NEXTFROM;
00821                 f = (float)RFLOAT_VALUE(rb_to_float(from));
00822                 f = HTOVF(f,ftmp);
00823                 rb_str_buf_cat(res, (char*)&f, sizeof(float));
00824             }
00825             break;
00826 
00827           case 'E':             /* double precision float in VAX byte-order */
00828             while (len-- > 0) {
00829                 double d;
00830                 DOUBLE_CONVWITH(dtmp);
00831 
00832                 from = NEXTFROM;
00833                 d = RFLOAT_VALUE(rb_to_float(from));
00834                 d = HTOVD(d,dtmp);
00835                 rb_str_buf_cat(res, (char*)&d, sizeof(double));
00836             }
00837             break;
00838 
00839           case 'd':             /* double precision float in native format */
00840           case 'D':             /* ditto */
00841             while (len-- > 0) {
00842                 double d;
00843 
00844                 from = NEXTFROM;
00845                 d = RFLOAT_VALUE(rb_to_float(from));
00846                 rb_str_buf_cat(res, (char*)&d, sizeof(double));
00847             }
00848             break;
00849 
00850           case 'g':             /* single precision float in network byte-order */
00851             while (len-- > 0) {
00852                 float f;
00853                 FLOAT_CONVWITH(ftmp);
00854 
00855                 from = NEXTFROM;
00856                 f = (float)RFLOAT_VALUE(rb_to_float(from));
00857                 f = HTONF(f,ftmp);
00858                 rb_str_buf_cat(res, (char*)&f, sizeof(float));
00859             }
00860             break;
00861 
00862           case 'G':             /* double precision float in network byte-order */
00863             while (len-- > 0) {
00864                 double d;
00865                 DOUBLE_CONVWITH(dtmp);
00866 
00867                 from = NEXTFROM;
00868                 d = RFLOAT_VALUE(rb_to_float(from));
00869                 d = HTOND(d,dtmp);
00870                 rb_str_buf_cat(res, (char*)&d, sizeof(double));
00871             }
00872             break;
00873 
00874           case 'x':             /* null byte */
00875           grow:
00876             while (len >= 10) {
00877                 rb_str_buf_cat(res, nul10, 10);
00878                 len -= 10;
00879             }
00880             rb_str_buf_cat(res, nul10, len);
00881             break;
00882 
00883           case 'X':             /* back up byte */
00884           shrink:
00885             plen = RSTRING_LEN(res);
00886             if (plen < len)
00887                 rb_raise(rb_eArgError, "X outside of string");
00888             rb_str_set_len(res, plen - len);
00889             break;
00890 
00891           case '@':             /* null fill to absolute position */
00892             len -= RSTRING_LEN(res);
00893             if (len > 0) goto grow;
00894             len = -len;
00895             if (len > 0) goto shrink;
00896             break;
00897 
00898           case '%':
00899             rb_raise(rb_eArgError, "%% is not supported");
00900             break;
00901 
00902           case 'U':             /* Unicode character */
00903             while (len-- > 0) {
00904                 SIGNED_VALUE l;
00905                 char buf[8];
00906                 int le;
00907 
00908                 from = NEXTFROM;
00909                 from = rb_to_int(from);
00910                 l = NUM2LONG(from);
00911                 if (l < 0) {
00912                     rb_raise(rb_eRangeError, "pack(U): value out of range");
00913                 }
00914                 le = rb_uv_to_utf8(buf, l);
00915                 rb_str_buf_cat(res, (char*)buf, le);
00916             }
00917             break;
00918 
00919           case 'u':             /* uuencoded string */
00920           case 'm':             /* base64 encoded string */
00921             from = NEXTFROM;
00922             StringValue(from);
00923             ptr = RSTRING_PTR(from);
00924             plen = RSTRING_LEN(from);
00925 
00926             if (len == 0 && type == 'm') {
00927                 encodes(res, ptr, plen, type, 0);
00928                 ptr += plen;
00929                 break;
00930             }
00931             if (len <= 2)
00932                 len = 45;
00933             else
00934                 len = len / 3 * 3;
00935             while (plen > 0) {
00936                 long todo;
00937 
00938                 if (plen > len)
00939                     todo = len;
00940                 else
00941                     todo = plen;
00942                 encodes(res, ptr, todo, type, 1);
00943                 plen -= todo;
00944                 ptr += todo;
00945             }
00946             break;
00947 
00948           case 'M':             /* quoted-printable encoded string */
00949             from = rb_obj_as_string(NEXTFROM);
00950             if (len <= 1)
00951                 len = 72;
00952             qpencode(res, from, len);
00953             break;
00954 
00955           case 'P':             /* pointer to packed byte string */
00956             from = THISFROM;
00957             if (!NIL_P(from)) {
00958                 StringValue(from);
00959                 if (RSTRING_LEN(from) < len) {
00960                     rb_raise(rb_eArgError, "too short buffer for P(%ld for %ld)",
00961                              RSTRING_LEN(from), len);
00962                 }
00963             }
00964             len = 1;
00965             /* FALL THROUGH */
00966           case 'p':             /* pointer to string */
00967             while (len-- > 0) {
00968                 char *t;
00969                 from = NEXTFROM;
00970                 if (NIL_P(from)) {
00971                     t = 0;
00972                 }
00973                 else {
00974                     t = StringValuePtr(from);
00975                 }
00976                 if (!associates) {
00977                     associates = rb_ary_new();
00978                 }
00979                 rb_ary_push(associates, from);
00980                 rb_obj_taint(from);
00981                 rb_str_buf_cat(res, (char*)&t, sizeof(char*));
00982             }
00983             break;
00984 
00985           case 'w':             /* BER compressed integer  */
00986             while (len-- > 0) {
00987                 unsigned long ul;
00988                 VALUE buf = rb_str_new(0, 0);
00989                 char c, *bufs, *bufe;
00990 
00991                 from = NEXTFROM;
00992                 if (TYPE(from) == T_BIGNUM) {
00993                     VALUE big128 = rb_uint2big(128);
00994                     while (TYPE(from) == T_BIGNUM) {
00995                         from = rb_big_divmod(from, big128);
00996                         c = NUM2INT(RARRAY_PTR(from)[1]) | 0x80; /* mod */
00997                         rb_str_buf_cat(buf, &c, sizeof(char));
00998                         from = RARRAY_PTR(from)[0]; /* div */
00999                     }
01000                 }
01001 
01002                 {
01003                     long l = NUM2LONG(from);
01004                     if (l < 0) {
01005                         rb_raise(rb_eArgError, "can't compress negative numbers");
01006                     }
01007                     ul = l;
01008                 }
01009 
01010                 while (ul) {
01011                     c = (char)(ul & 0x7f) | 0x80;
01012                     rb_str_buf_cat(buf, &c, sizeof(char));
01013                     ul >>=  7;
01014                 }
01015 
01016                 if (RSTRING_LEN(buf)) {
01017                     bufs = RSTRING_PTR(buf);
01018                     bufe = bufs + RSTRING_LEN(buf) - 1;
01019                     *bufs &= 0x7f; /* clear continue bit */
01020                     while (bufs < bufe) { /* reverse */
01021                         c = *bufs;
01022                         *bufs++ = *bufe;
01023                         *bufe-- = c;
01024                     }
01025                     rb_str_buf_cat(res, RSTRING_PTR(buf), RSTRING_LEN(buf));
01026                 }
01027                 else {
01028                     c = 0;
01029                     rb_str_buf_cat(res, &c, sizeof(char));
01030                 }
01031             }
01032             break;
01033 
01034           default:
01035             break;
01036         }
01037     }
01038 
01039     if (associates) {
01040         rb_str_associate(res, associates);
01041     }
01042     OBJ_INFECT(res, fmt);
01043     switch (enc_info) {
01044       case 1:
01045         ENCODING_CODERANGE_SET(res, rb_usascii_encindex(), ENC_CODERANGE_7BIT);
01046         break;
01047       case 2:
01048         rb_enc_set_index(res, rb_utf8_encindex());
01049         break;
01050       default:
01051         /* do nothing, keep ASCII-8BIT */
01052         break;
01053     }
01054     return res;
01055 }
01056 
01057 static const char uu_table[] =
01058 "`!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
01059 static const char b64_table[] =
01060 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
01061 
01062 static void
01063 encodes(VALUE str, const char *s, long len, int type, int tail_lf)
01064 {
01065     char buff[4096];
01066     long i = 0;
01067     const char *trans = type == 'u' ? uu_table : b64_table;
01068     int padding;
01069 
01070     if (type == 'u') {
01071         buff[i++] = (char)len + ' ';
01072         padding = '`';
01073     }
01074     else {
01075         padding = '=';
01076     }
01077     while (len >= 3) {
01078         while (len >= 3 && sizeof(buff)-i >= 4) {
01079             buff[i++] = trans[077 & (*s >> 2)];
01080             buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))];
01081             buff[i++] = trans[077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03))];
01082             buff[i++] = trans[077 & s[2]];
01083             s += 3;
01084             len -= 3;
01085         }
01086         if (sizeof(buff)-i < 4) {
01087             rb_str_buf_cat(str, buff, i);
01088             i = 0;
01089         }
01090     }
01091 
01092     if (len == 2) {
01093         buff[i++] = trans[077 & (*s >> 2)];
01094         buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))];
01095         buff[i++] = trans[077 & (((s[1] << 2) & 074) | (('\0' >> 6) & 03))];
01096         buff[i++] = padding;
01097     }
01098     else if (len == 1) {
01099         buff[i++] = trans[077 & (*s >> 2)];
01100         buff[i++] = trans[077 & (((*s << 4) & 060) | (('\0' >> 4) & 017))];
01101         buff[i++] = padding;
01102         buff[i++] = padding;
01103     }
01104     if (tail_lf) buff[i++] = '\n';
01105     rb_str_buf_cat(str, buff, i);
01106 }
01107 
01108 static const char hex_table[] = "0123456789ABCDEF";
01109 
01110 static void
01111 qpencode(VALUE str, VALUE from, long len)
01112 {
01113     char buff[1024];
01114     long i = 0, n = 0, prev = EOF;
01115     unsigned char *s = (unsigned char*)RSTRING_PTR(from);
01116     unsigned char *send = s + RSTRING_LEN(from);
01117 
01118     while (s < send) {
01119         if ((*s > 126) ||
01120             (*s < 32 && *s != '\n' && *s != '\t') ||
01121             (*s == '=')) {
01122             buff[i++] = '=';
01123             buff[i++] = hex_table[*s >> 4];
01124             buff[i++] = hex_table[*s & 0x0f];
01125             n += 3;
01126             prev = EOF;
01127         }
01128         else if (*s == '\n') {
01129             if (prev == ' ' || prev == '\t') {
01130                 buff[i++] = '=';
01131                 buff[i++] = *s;
01132             }
01133             buff[i++] = *s;
01134             n = 0;
01135             prev = *s;
01136         }
01137         else {
01138             buff[i++] = *s;
01139             n++;
01140             prev = *s;
01141         }
01142         if (n > len) {
01143             buff[i++] = '=';
01144             buff[i++] = '\n';
01145             n = 0;
01146             prev = '\n';
01147         }
01148         if (i > 1024 - 5) {
01149             rb_str_buf_cat(str, buff, i);
01150             i = 0;
01151         }
01152         s++;
01153     }
01154     if (n > 0) {
01155         buff[i++] = '=';
01156         buff[i++] = '\n';
01157     }
01158     if (i > 0) {
01159         rb_str_buf_cat(str, buff, i);
01160     }
01161 }
01162 
01163 static inline int
01164 hex2num(char c)
01165 {
01166     switch (c) {
01167       case '0': case '1': case '2': case '3': case '4':
01168       case '5': case '6': case '7': case '8': case '9':
01169         return c - '0';
01170       case 'a': case 'b': case 'c':
01171       case 'd': case 'e': case 'f':
01172         return c - 'a' + 10;
01173       case 'A': case 'B': case 'C':
01174       case 'D': case 'E': case 'F':
01175         return c - 'A' + 10;
01176       default:
01177         return -1;
01178     }
01179 }
01180 
01181 #define PACK_LENGTH_ADJUST_SIZE(sz) do {        \
01182     tmp_len = 0;                                \
01183     if (len > (long)((send-s)/sz)) {            \
01184         if (!star) {                            \
01185             tmp_len = len-(send-s)/sz;          \
01186         }                                       \
01187         len = (send-s)/sz;                      \
01188     }                                           \
01189 } while (0)
01190 
01191 #define PACK_ITEM_ADJUST() do { \
01192     if (tmp_len > 0 && !block_p) \
01193         rb_ary_store(ary, RARRAY_LEN(ary)+tmp_len-1, Qnil); \
01194 } while (0)
01195 
01196 static VALUE
01197 infected_str_new(const char *ptr, long len, VALUE str)
01198 {
01199     VALUE s = rb_str_new(ptr, len);
01200 
01201     OBJ_INFECT(s, str);
01202     return s;
01203 }
01204 
01205 /*
01206  *  call-seq:
01207  *     str.unpack(format)    ->  anArray
01208  *
01209  *  Decodes <i>str</i> (which may contain binary data) according to the
01210  *  format string, returning an array of each value extracted. The
01211  *  format string consists of a sequence of single-character directives,
01212  *  summarized in the table at the end of this entry.
01213  *  Each directive may be followed
01214  *  by a number, indicating the number of times to repeat with this
01215  *  directive. An asterisk (``<code>*</code>'') will use up all
01216  *  remaining elements. The directives <code>sSiIlL</code> may each be
01217  *  followed by an underscore (``<code>_</code>'') or
01218  *  exclamation mark (``<code>!</code>'') to use the underlying
01219  *  platform's native size for the specified type; otherwise, it uses a
01220  *  platform-independent consistent size. Spaces are ignored in the
01221  *  format string. See also <code>Array#pack</code>.
01222  *
01223  *     "abc \0\0abc \0\0".unpack('A6Z6')   #=> ["abc", "abc "]
01224  *     "abc \0\0".unpack('a3a3')           #=> ["abc", " \000\000"]
01225  *     "abc \0abc \0".unpack('Z*Z*')       #=> ["abc ", "abc "]
01226  *     "aa".unpack('b8B8')                 #=> ["10000110", "01100001"]
01227  *     "aaa".unpack('h2H2c')               #=> ["16", "61", 97]
01228  *     "\xfe\xff\xfe\xff".unpack('sS')     #=> [-2, 65534]
01229  *     "now=20is".unpack('M*')             #=> ["now is"]
01230  *     "whole".unpack('xax2aX2aX1aX2a')    #=> ["h", "e", "l", "l", "o"]
01231  *
01232  *  This table summarizes the various formats and the Ruby classes
01233  *  returned by each.
01234  *
01235  *   Integer      |         |
01236  *   Directive    | Returns | Meaning
01237  *   -----------------------------------------------------------------
01238  *      C         | Integer | 8-bit unsigned integer (unsigned char)
01239  *      S         | Integer | 16-bit unsigned integer, native endian (uint16_t)
01240  *      L         | Integer | 32-bit unsigned integer, native endian (uint32_t)
01241  *      Q         | Integer | 64-bit unsigned integer, native endian (uint64_t)
01242  *                |         |
01243  *      c         | Integer | 8-bit signed integer (signed char)
01244  *      s         | Integer | 16-bit signed integer, native endian (int16_t)
01245  *      l         | Integer | 32-bit signed integer, native endian (int32_t)
01246  *      q         | Integer | 64-bit signed integer, native endian (int64_t)
01247  *                |         |
01248  *      S_, S!    | Integer | unsigned short, native endian
01249  *      I, I_, I! | Integer | unsigned int, native endian
01250  *      L_, L!    | Integer | unsigned long, native endian
01251  *                |         |
01252  *      s_, s!    | Integer | signed short, native endian
01253  *      i, i_, i! | Integer | signed int, native endian
01254  *      l_, l!    | Integer | signed long, native endian
01255  *                |         |
01256  *      n         | Integer | 16-bit unsigned integer, network (big-endian) byte order
01257  *      N         | Integer | 32-bit unsigned integer, network (big-endian) byte order
01258  *      v         | Integer | 16-bit unsigned integer, VAX (little-endian) byte order
01259  *      V         | Integer | 32-bit unsigned integer, VAX (little-endian) byte order
01260  *                |         |
01261  *      U         | Integer | UTF-8 character
01262  *      w         | Integer | BER-compressed integer (see Array.pack)
01263  *                
01264  *   Float        |         |
01265  *   Directive    | Returns | Meaning
01266  *   -----------------------------------------------------------------
01267  *      D, d      | Float   | double-precision float, native format
01268  *      F, f      | Float   | single-precision float, native format
01269  *      E         | Float   | double-precision float, little-endian byte order
01270  *      e         | Float   | single-precision float, little-endian byte order
01271  *      G         | Float   | double-precision float, network (big-endian) byte order
01272  *      g         | Float   | single-precision float, network (big-endian) byte order
01273  *                
01274  *   String       |         |
01275  *   Directive    | Returns | Meaning
01276  *   -----------------------------------------------------------------
01277  *      A         | String  | arbitrary binary string (remove trailing nulls and ASCII spaces)
01278  *      a         | String  | arbitrary binary string
01279  *      Z         | String  | null-terminated string
01280  *      B         | String  | bit string (MSB first)
01281  *      b         | String  | bit string (LSB first)
01282  *      H         | String  | hex string (high nibble first)
01283  *      h         | String  | hex string (low nibble first)
01284  *      u         | String  | UU-encoded string
01285  *      M         | String  | quoted-printable, MIME encoding (see RFC2045)
01286  *      m         | String  | base64 encoded string (RFC 2045) (default)
01287  *                |         | base64 encoded string (RFC 4648) if followed by 0
01288  *      P         | String  | pointer to a structure (fixed-length string)
01289  *      p         | String  | pointer to a null-terminated string
01290  *                
01291  *   Misc.        |         |
01292  *   Directive    | Returns | Meaning
01293  *   -----------------------------------------------------------------
01294  *      @         | ---     | skip to the offset given by the length argument
01295  *      X         | ---     | skip backward one byte
01296  *      x         | ---     | skip forward one byte
01297  */
01298 
01299 static VALUE
01300 pack_unpack(VALUE str, VALUE fmt)
01301 {
01302     static const char hexdigits[] = "0123456789abcdef";
01303     char *s, *send;
01304     char *p, *pend;
01305     VALUE ary;
01306     char type;
01307     long len, tmp_len;
01308     int star;
01309 #ifdef NATINT_PACK
01310     int natint;                 /* native integer */
01311 #endif
01312     int block_p = rb_block_given_p();
01313     int signed_p, integer_size, bigendian_p;
01314 #define UNPACK_PUSH(item) do {\
01315         VALUE item_val = (item);\
01316         if (block_p) {\
01317             rb_yield(item_val);\
01318         }\
01319         else {\
01320             rb_ary_push(ary, item_val);\
01321         }\
01322     } while (0)
01323 
01324     StringValue(str);
01325     StringValue(fmt);
01326     s = RSTRING_PTR(str);
01327     send = s + RSTRING_LEN(str);
01328     p = RSTRING_PTR(fmt);
01329     pend = p + RSTRING_LEN(fmt);
01330 
01331     ary = block_p ? Qnil : rb_ary_new();
01332     while (p < pend) {
01333         type = *p++;
01334 #ifdef NATINT_PACK
01335         natint = 0;
01336 #endif
01337 
01338         if (ISSPACE(type)) continue;
01339         if (type == '#') {
01340             while ((p < pend) && (*p != '\n')) {
01341                 p++;
01342             }
01343             continue;
01344         }
01345         star = 0;
01346         if (*p == '_' || *p == '!') {
01347             static const char natstr[] = "sSiIlL";
01348 
01349             if (strchr(natstr, type)) {
01350 #ifdef NATINT_PACK
01351                 natint = 1;
01352 #endif
01353                 p++;
01354             }
01355             else {
01356                 rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr);
01357             }
01358         }
01359         if (p >= pend)
01360             len = 1;
01361         else if (*p == '*') {
01362             star = 1;
01363             len = send - s;
01364             p++;
01365         }
01366         else if (ISDIGIT(*p)) {
01367             errno = 0;
01368             len = STRTOUL(p, (char**)&p, 10);
01369             if (errno) {
01370                 rb_raise(rb_eRangeError, "pack length too big");
01371             }
01372         }
01373         else {
01374             len = (type != '@');
01375         }
01376 
01377         switch (type) {
01378           case '%':
01379             rb_raise(rb_eArgError, "%% is not supported");
01380             break;
01381 
01382           case 'A':
01383             if (len > send - s) len = send - s;
01384             {
01385                 long end = len;
01386                 char *t = s + len - 1;
01387 
01388                 while (t >= s) {
01389                     if (*t != ' ' && *t != '\0') break;
01390                     t--; len--;
01391                 }
01392                 UNPACK_PUSH(infected_str_new(s, len, str));
01393                 s += end;
01394             }
01395             break;
01396 
01397           case 'Z':
01398             {
01399                 char *t = s;
01400 
01401                 if (len > send-s) len = send-s;
01402                 while (t < s+len && *t) t++;
01403                 UNPACK_PUSH(infected_str_new(s, t-s, str));
01404                 if (t < send) t++;
01405                 s = star ? t : s+len;
01406             }
01407             break;
01408 
01409           case 'a':
01410             if (len > send - s) len = send - s;
01411             UNPACK_PUSH(infected_str_new(s, len, str));
01412             s += len;
01413             break;
01414 
01415           case 'b':
01416             {
01417                 VALUE bitstr;
01418                 char *t;
01419                 int bits;
01420                 long i;
01421 
01422                 if (p[-1] == '*' || len > (send - s) * 8)
01423                     len = (send - s) * 8;
01424                 bits = 0;
01425                 UNPACK_PUSH(bitstr = rb_str_new(0, len));
01426                 t = RSTRING_PTR(bitstr);
01427                 for (i=0; i<len; i++) {
01428                     if (i & 7) bits >>= 1;
01429                     else bits = *s++;
01430                     *t++ = (bits & 1) ? '1' : '0';
01431                 }
01432             }
01433             break;
01434 
01435           case 'B':
01436             {
01437                 VALUE bitstr;
01438                 char *t;
01439                 int bits;
01440                 long i;
01441 
01442                 if (p[-1] == '*' || len > (send - s) * 8)
01443                     len = (send - s) * 8;
01444                 bits = 0;
01445                 UNPACK_PUSH(bitstr = rb_str_new(0, len));
01446                 t = RSTRING_PTR(bitstr);
01447                 for (i=0; i<len; i++) {
01448                     if (i & 7) bits <<= 1;
01449                     else bits = *s++;
01450                     *t++ = (bits & 128) ? '1' : '0';
01451                 }
01452             }
01453             break;
01454 
01455           case 'h':
01456             {
01457                 VALUE bitstr;
01458                 char *t;
01459                 int bits;
01460                 long i;
01461 
01462                 if (p[-1] == '*' || len > (send - s) * 2)
01463                     len = (send - s) * 2;
01464                 bits = 0;
01465                 UNPACK_PUSH(bitstr = rb_str_new(0, len));
01466                 t = RSTRING_PTR(bitstr);
01467                 for (i=0; i<len; i++) {
01468                     if (i & 1)
01469                         bits >>= 4;
01470                     else
01471                         bits = *s++;
01472                     *t++ = hexdigits[bits & 15];
01473                 }
01474             }
01475             break;
01476 
01477           case 'H':
01478             {
01479                 VALUE bitstr;
01480                 char *t;
01481                 int bits;
01482                 long i;
01483 
01484                 if (p[-1] == '*' || len > (send - s) * 2)
01485                     len = (send - s) * 2;
01486                 bits = 0;
01487                 UNPACK_PUSH(bitstr = rb_str_new(0, len));
01488                 t = RSTRING_PTR(bitstr);
01489                 for (i=0; i<len; i++) {
01490                     if (i & 1)
01491                         bits <<= 4;
01492                     else
01493                         bits = *s++;
01494                     *t++ = hexdigits[(bits >> 4) & 15];
01495                 }
01496             }
01497             break;
01498 
01499           case 'c':
01500             PACK_LENGTH_ADJUST_SIZE(sizeof(char));
01501             while (len-- > 0) {
01502                 int c = *s++;
01503                 if (c > (char)127) c-=256;
01504                 UNPACK_PUSH(INT2FIX(c));
01505             }
01506             PACK_ITEM_ADJUST();
01507             break;
01508 
01509           case 'C':
01510             PACK_LENGTH_ADJUST_SIZE(sizeof(unsigned char));
01511             while (len-- > 0) {
01512                 unsigned char c = *s++;
01513                 UNPACK_PUSH(INT2FIX(c));
01514             }
01515             PACK_ITEM_ADJUST();
01516             break;
01517 
01518           case 's':
01519             signed_p = 1;
01520             integer_size = NATINT_LEN(short, 2);
01521             bigendian_p = BIGENDIAN_P();
01522             goto unpack_integer;
01523 
01524           case 'S':
01525             signed_p = 0;
01526             integer_size = NATINT_LEN(short, 2);
01527             bigendian_p = BIGENDIAN_P();
01528             goto unpack_integer;
01529 
01530           case 'i':
01531             signed_p = 1;
01532             integer_size = (int)sizeof(int);
01533             bigendian_p = BIGENDIAN_P();
01534             goto unpack_integer;
01535 
01536           case 'I':
01537             signed_p = 0;
01538             integer_size = (int)sizeof(int);
01539             bigendian_p = BIGENDIAN_P();
01540             goto unpack_integer;
01541 
01542           case 'l':
01543             signed_p = 1;
01544             integer_size = NATINT_LEN(long, 4);
01545             bigendian_p = BIGENDIAN_P();
01546             goto unpack_integer;
01547 
01548           case 'L':
01549             signed_p = 0;
01550             integer_size = NATINT_LEN(long, 4);
01551             bigendian_p = BIGENDIAN_P();
01552             goto unpack_integer;
01553 
01554           case 'q':
01555             signed_p = 1;
01556             integer_size = QUAD_SIZE;
01557             bigendian_p = BIGENDIAN_P();
01558             goto unpack_integer;
01559 
01560           case 'Q':
01561             signed_p = 0;
01562             integer_size = QUAD_SIZE;
01563             bigendian_p = BIGENDIAN_P();
01564             goto unpack_integer;
01565 
01566           case 'n':
01567             signed_p = 0;
01568             integer_size = 2;
01569             bigendian_p = 1;
01570             goto unpack_integer;
01571 
01572           case 'N':
01573             signed_p = 0;
01574             integer_size = 4;
01575             bigendian_p = 1;
01576             goto unpack_integer;
01577 
01578           case 'v':
01579             signed_p = 0;
01580             integer_size = 2;
01581             bigendian_p = 0;
01582             goto unpack_integer;
01583 
01584           case 'V':
01585             signed_p = 0;
01586             integer_size = 4;
01587             bigendian_p = 0;
01588             goto unpack_integer;
01589 
01590           unpack_integer:
01591             switch (integer_size) {
01592 #if defined(HAVE_INT16_T) && !defined(FORCE_BIG_PACK)
01593               case SIZEOF_INT16_T:
01594                 if (signed_p) {
01595                     PACK_LENGTH_ADJUST_SIZE(sizeof(int16_t));
01596                     while (len-- > 0) {
01597                         union {
01598                             int16_t i;
01599                             char a[sizeof(int16_t)];
01600                         } v;
01601                         memcpy(v.a, s, sizeof(int16_t));
01602                         if (bigendian_p != BIGENDIAN_P()) v.i = swap16(v.i);
01603                         s += sizeof(int16_t);
01604                         UNPACK_PUSH(INT2FIX(v.i));
01605                     }
01606                     PACK_ITEM_ADJUST();
01607                 }
01608                 else {
01609                     PACK_LENGTH_ADJUST_SIZE(sizeof(uint16_t));
01610                     while (len-- > 0) {
01611                         union {
01612                             uint16_t i;
01613                             char a[sizeof(uint16_t)];
01614                         } v;
01615                         memcpy(v.a, s, sizeof(uint16_t));
01616                         if (bigendian_p != BIGENDIAN_P()) v.i = swap16(v.i);
01617                         s += sizeof(uint16_t);
01618                         UNPACK_PUSH(INT2FIX(v.i));
01619                     }
01620                     PACK_ITEM_ADJUST();
01621                 }
01622                 break;
01623 #endif
01624 
01625 #if defined(HAVE_INT32_T) && !defined(FORCE_BIG_PACK)
01626               case SIZEOF_INT32_T:
01627                 if (signed_p) {
01628                     PACK_LENGTH_ADJUST_SIZE(sizeof(int32_t));
01629                     while (len-- > 0) {
01630                         union {
01631                             int32_t i;
01632                             char a[sizeof(int32_t)];
01633                         } v;
01634                         memcpy(v.a, s, sizeof(int32_t));
01635                         if (bigendian_p != BIGENDIAN_P()) v.i = swap32(v.i);
01636                         s += sizeof(int32_t);
01637                         UNPACK_PUSH(INT2NUM(v.i));
01638                     }
01639                     PACK_ITEM_ADJUST();
01640                 }
01641                 else {
01642                     PACK_LENGTH_ADJUST_SIZE(sizeof(uint32_t));
01643                     while (len-- > 0) {
01644                         union {
01645                             uint32_t i;
01646                             char a[sizeof(uint32_t)];
01647                         } v;
01648                         memcpy(v.a, s, sizeof(uint32_t));
01649                         if (bigendian_p != BIGENDIAN_P()) v.i = swap32(v.i);
01650                         s += sizeof(uint32_t);
01651                         UNPACK_PUSH(UINT2NUM(v.i));
01652                     }
01653                     PACK_ITEM_ADJUST();
01654                 }
01655                 break;
01656 #endif
01657 
01658 #if defined(HAVE_INT64_T) && !defined(FORCE_BIG_PACK)
01659               case SIZEOF_INT64_T:
01660                 if (signed_p) {
01661                     PACK_LENGTH_ADJUST_SIZE(sizeof(int64_t));
01662                     while (len-- > 0) {
01663                         union {
01664                             int64_t i;
01665                             char a[sizeof(int64_t)];
01666                         } v;
01667                         memcpy(v.a, s, sizeof(int64_t));
01668                         if (bigendian_p != BIGENDIAN_P()) v.i = swap64(v.i);
01669                         s += sizeof(int64_t);
01670                         UNPACK_PUSH(INT64toNUM(v.i));
01671                     }
01672                     PACK_ITEM_ADJUST();
01673                 }
01674                 else {
01675                     PACK_LENGTH_ADJUST_SIZE(sizeof(uint64_t));
01676                     while (len-- > 0) {
01677                         union {
01678                             uint64_t i;
01679                             char a[sizeof(uint64_t)];
01680                         } v;
01681                         memcpy(v.a, s, sizeof(uint64_t));
01682                         if (bigendian_p != BIGENDIAN_P()) v.i = swap64(v.i);
01683                         s += sizeof(uint64_t);
01684                         UNPACK_PUSH(UINT64toNUM(v.i));
01685                     }
01686                     PACK_ITEM_ADJUST();
01687                 }
01688                 break;
01689 #endif
01690 
01691               default:
01692                 if (integer_size > MAX_INTEGER_PACK_SIZE)
01693                     rb_bug("unexpected intger size for pack: %d", integer_size);
01694                 PACK_LENGTH_ADJUST_SIZE(integer_size);
01695                 while (len-- > 0) {
01696                     union {
01697                         unsigned long i[(MAX_INTEGER_PACK_SIZE+SIZEOF_LONG)/SIZEOF_LONG];
01698                         char a[(MAX_INTEGER_PACK_SIZE+SIZEOF_LONG)/SIZEOF_LONG*SIZEOF_LONG];
01699                     } v;
01700                     int num_longs = (integer_size+SIZEOF_LONG)/SIZEOF_LONG;
01701                     int i;
01702 
01703                     if (signed_p && (signed char)s[bigendian_p ? 0 : (integer_size-1)] < 0)
01704                         memset(v.a, 0xff, sizeof(long)*num_longs);
01705                     else
01706                         memset(v.a, 0, sizeof(long)*num_longs);
01707                     if (bigendian_p)
01708                         memcpy(v.a + sizeof(long)*num_longs - integer_size, s, integer_size);
01709                     else
01710                         memcpy(v.a, s, integer_size);
01711                     if (bigendian_p) {
01712                         for (i = 0; i < num_longs/2; i++) {
01713                             unsigned long t = v.i[i];
01714                             v.i[i] = v.i[num_longs-1-i];
01715                             v.i[num_longs-1-i] = t;
01716                         }
01717                     }
01718                     if (bigendian_p != BIGENDIAN_P()) {
01719                         for (i = 0; i < num_longs; i++)
01720                             v.i[i] = swapl(v.i[i]);
01721                     }
01722                     s += integer_size;
01723                     UNPACK_PUSH(rb_big_unpack(v.i, num_longs));
01724                 }
01725                 PACK_ITEM_ADJUST();
01726                 break;
01727             }
01728             break;
01729 
01730           case 'f':
01731           case 'F':
01732             PACK_LENGTH_ADJUST_SIZE(sizeof(float));
01733             while (len-- > 0) {
01734                 float tmp;
01735                 memcpy(&tmp, s, sizeof(float));
01736                 s += sizeof(float);
01737                 UNPACK_PUSH(DBL2NUM((double)tmp));
01738             }
01739             PACK_ITEM_ADJUST();
01740             break;
01741 
01742           case 'e':
01743             PACK_LENGTH_ADJUST_SIZE(sizeof(float));
01744             while (len-- > 0) {
01745                 float tmp;
01746                 FLOAT_CONVWITH(ftmp);
01747 
01748                 memcpy(&tmp, s, sizeof(float));
01749                 s += sizeof(float);
01750                 tmp = VTOHF(tmp,ftmp);
01751                 UNPACK_PUSH(DBL2NUM((double)tmp));
01752             }
01753             PACK_ITEM_ADJUST();
01754             break;
01755 
01756           case 'E':
01757             PACK_LENGTH_ADJUST_SIZE(sizeof(double));
01758             while (len-- > 0) {
01759                 double tmp;
01760                 DOUBLE_CONVWITH(dtmp);
01761 
01762                 memcpy(&tmp, s, sizeof(double));
01763                 s += sizeof(double);
01764                 tmp = VTOHD(tmp,dtmp);
01765                 UNPACK_PUSH(DBL2NUM(tmp));
01766             }
01767             PACK_ITEM_ADJUST();
01768             break;
01769 
01770           case 'D':
01771           case 'd':
01772             PACK_LENGTH_ADJUST_SIZE(sizeof(double));
01773             while (len-- > 0) {
01774                 double tmp;
01775                 memcpy(&tmp, s, sizeof(double));
01776                 s += sizeof(double);
01777                 UNPACK_PUSH(DBL2NUM(tmp));
01778             }
01779             PACK_ITEM_ADJUST();
01780             break;
01781 
01782           case 'g':
01783             PACK_LENGTH_ADJUST_SIZE(sizeof(float));
01784             while (len-- > 0) {
01785                 float tmp;
01786                 FLOAT_CONVWITH(ftmp;)
01787 
01788                 memcpy(&tmp, s, sizeof(float));
01789                 s += sizeof(float);
01790                 tmp = NTOHF(tmp,ftmp);
01791                 UNPACK_PUSH(DBL2NUM((double)tmp));
01792             }
01793             PACK_ITEM_ADJUST();
01794             break;
01795 
01796           case 'G':
01797             PACK_LENGTH_ADJUST_SIZE(sizeof(double));
01798             while (len-- > 0) {
01799                 double tmp;
01800                 DOUBLE_CONVWITH(dtmp);
01801 
01802                 memcpy(&tmp, s, sizeof(double));
01803                 s += sizeof(double);
01804                 tmp = NTOHD(tmp,dtmp);
01805                 UNPACK_PUSH(DBL2NUM(tmp));
01806             }
01807             PACK_ITEM_ADJUST();
01808             break;
01809 
01810           case 'U':
01811             if (len > send - s) len = send - s;
01812             while (len > 0 && s < send) {
01813                 long alen = send - s;
01814                 unsigned long l;
01815 
01816                 l = utf8_to_uv(s, &alen);
01817                 s += alen; len--;
01818                 UNPACK_PUSH(ULONG2NUM(l));
01819             }
01820             break;
01821 
01822           case 'u':
01823             {
01824                 VALUE buf = infected_str_new(0, (send - s)*3/4, str);
01825                 char *ptr = RSTRING_PTR(buf);
01826                 long total = 0;
01827 
01828                 while (s < send && *s > ' ' && *s < 'a') {
01829                     long a,b,c,d;
01830                     char hunk[4];
01831 
01832                     hunk[3] = '\0';
01833                     len = (*s++ - ' ') & 077;
01834                     total += len;
01835                     if (total > RSTRING_LEN(buf)) {
01836                         len -= total - RSTRING_LEN(buf);
01837                         total = RSTRING_LEN(buf);
01838                     }
01839 
01840                     while (len > 0) {
01841                         long mlen = len > 3 ? 3 : len;
01842 
01843                         if (s < send && *s >= ' ')
01844                             a = (*s++ - ' ') & 077;
01845                         else
01846                             a = 0;
01847                         if (s < send && *s >= ' ')
01848                             b = (*s++ - ' ') & 077;
01849                         else
01850                             b = 0;
01851                         if (s < send && *s >= ' ')
01852                             c = (*s++ - ' ') & 077;
01853                         else
01854                             c = 0;
01855                         if (s < send && *s >= ' ')
01856                             d = (*s++ - ' ') & 077;
01857                         else
01858                             d = 0;
01859                         hunk[0] = (char)(a << 2 | b >> 4);
01860                         hunk[1] = (char)(b << 4 | c >> 2);
01861                         hunk[2] = (char)(c << 6 | d);
01862                         memcpy(ptr, hunk, mlen);
01863                         ptr += mlen;
01864                         len -= mlen;
01865                     }
01866                     if (*s == '\r') s++;
01867                     if (*s == '\n') s++;
01868                     else if (s < send && (s+1 == send || s[1] == '\n'))
01869                         s += 2; /* possible checksum byte */
01870                 }
01871 
01872                 rb_str_set_len(buf, total);
01873                 UNPACK_PUSH(buf);
01874             }
01875             break;
01876 
01877           case 'm':
01878             {
01879                 VALUE buf = infected_str_new(0, (send - s)*3/4, str);
01880                 char *ptr = RSTRING_PTR(buf);
01881                 int a = -1,b = -1,c = 0,d = 0;
01882                 static signed char b64_xtable[256];
01883 
01884                 if (b64_xtable['/'] <= 0) {
01885                     int i;
01886 
01887                     for (i = 0; i < 256; i++) {
01888                         b64_xtable[i] = -1;
01889                     }
01890                     for (i = 0; i < 64; i++) {
01891                         b64_xtable[(unsigned char)b64_table[i]] = i;
01892                     }
01893                 }
01894                 if (len == 0) {
01895                     while (s < send) {
01896                         a = b = c = d = -1;
01897                         a = b64_xtable[(unsigned char)*s++];
01898                         if (s >= send || a == -1) rb_raise(rb_eArgError, "invalid base64");
01899                         b = b64_xtable[(unsigned char)*s++];
01900                         if (s >= send || b == -1) rb_raise(rb_eArgError, "invalid base64");
01901                         if (*s == '=') {
01902                             if (s + 2 == send && *(s + 1) == '=') break;
01903                             rb_raise(rb_eArgError, "invalid base64");
01904                         }
01905                         c = b64_xtable[(unsigned char)*s++];
01906                         if (s >= send || c == -1) rb_raise(rb_eArgError, "invalid base64");
01907                         if (s + 1 == send && *s == '=') break;
01908                         d = b64_xtable[(unsigned char)*s++];
01909                         if (d == -1) rb_raise(rb_eArgError, "invalid base64");
01910                         *ptr++ = a << 2 | b >> 4;
01911                         *ptr++ = b << 4 | c >> 2;
01912                         *ptr++ = c << 6 | d;
01913                     }
01914                     if (c == -1) {
01915                         *ptr++ = a << 2 | b >> 4;
01916                         if (b & 0xf) rb_raise(rb_eArgError, "invalid base64");
01917                     }
01918                     else if (d == -1) {
01919                         *ptr++ = a << 2 | b >> 4;
01920                         *ptr++ = b << 4 | c >> 2;
01921                         if (c & 0x3) rb_raise(rb_eArgError, "invalid base64");
01922                     }
01923                 }
01924                 else {
01925                     while (s < send) {
01926                         a = b = c = d = -1;
01927                         while ((a = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;}
01928                         if (s >= send) break;
01929                         s++;
01930                         while ((b = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;}
01931                         if (s >= send) break;
01932                         s++;
01933                         while ((c = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;}
01934                         if (*s == '=' || s >= send) break;
01935                         s++;
01936                         while ((d = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;}
01937                         if (*s == '=' || s >= send) break;
01938                         s++;
01939                         *ptr++ = a << 2 | b >> 4;
01940                         *ptr++ = b << 4 | c >> 2;
01941                         *ptr++ = c << 6 | d;
01942                     }
01943                     if (a != -1 && b != -1) {
01944                         if (c == -1 && *s == '=')
01945                             *ptr++ = a << 2 | b >> 4;
01946                         else if (c != -1 && *s == '=') {
01947                             *ptr++ = a << 2 | b >> 4;
01948                             *ptr++ = b << 4 | c >> 2;
01949                         }
01950                     }
01951                 }
01952                 rb_str_set_len(buf, ptr - RSTRING_PTR(buf));
01953                 UNPACK_PUSH(buf);
01954             }
01955             break;
01956 
01957           case 'M':
01958             {
01959                 VALUE buf = infected_str_new(0, send - s, str);
01960                 char *ptr = RSTRING_PTR(buf);
01961                 int c1, c2;
01962 
01963                 while (s < send) {
01964                     if (*s == '=') {
01965                         if (++s == send) break;
01966                        if (s+1 < send && *s == '\r' && *(s+1) == '\n')
01967                          s++;
01968                         if (*s != '\n') {
01969                             if ((c1 = hex2num(*s)) == -1) break;
01970                             if (++s == send) break;
01971                             if ((c2 = hex2num(*s)) == -1) break;
01972                             *ptr++ = c1 << 4 | c2;
01973                         }
01974                     }
01975                     else {
01976                         *ptr++ = *s;
01977                     }
01978                     s++;
01979                 }
01980                 rb_str_set_len(buf, ptr - RSTRING_PTR(buf));
01981                 ENCODING_CODERANGE_SET(buf, rb_usascii_encindex(), ENC_CODERANGE_7BIT);
01982                 UNPACK_PUSH(buf);
01983             }
01984             break;
01985 
01986           case '@':
01987             if (len > RSTRING_LEN(str))
01988                 rb_raise(rb_eArgError, "@ outside of string");
01989             s = RSTRING_PTR(str) + len;
01990             break;
01991 
01992           case 'X':
01993             if (len > s - RSTRING_PTR(str))
01994                 rb_raise(rb_eArgError, "X outside of string");
01995             s -= len;
01996             break;
01997 
01998           case 'x':
01999             if (len > send - s)
02000                 rb_raise(rb_eArgError, "x outside of string");
02001             s += len;
02002             break;
02003 
02004           case 'P':
02005             if (sizeof(char *) <= (size_t)(send - s)) {
02006                 VALUE tmp = Qnil;
02007                 char *t;
02008 
02009                 memcpy(&t, s, sizeof(char *));
02010                 s += sizeof(char *);
02011 
02012                 if (t) {
02013                     VALUE a, *p, *pend;
02014 
02015                     if (!(a = rb_str_associated(str))) {
02016                         rb_raise(rb_eArgError, "no associated pointer");
02017                     }
02018                     p = RARRAY_PTR(a);
02019                     pend = p + RARRAY_LEN(a);
02020                     while (p < pend) {
02021                         if (TYPE(*p) == T_STRING && RSTRING_PTR(*p) == t) {
02022                             if (len < RSTRING_LEN(*p)) {
02023                                 tmp = rb_tainted_str_new(t, len);
02024                                 rb_str_associate(tmp, a);
02025                             }
02026                             else {
02027                                 tmp = *p;
02028                             }
02029                             break;
02030                         }
02031                         p++;
02032                     }
02033                     if (p == pend) {
02034                         rb_raise(rb_eArgError, "non associated pointer");
02035                     }
02036                 }
02037                 UNPACK_PUSH(tmp);
02038             }
02039             break;
02040 
02041           case 'p':
02042             if (len > (long)((send - s) / sizeof(char *)))
02043                 len = (send - s) / sizeof(char *);
02044             while (len-- > 0) {
02045                 if ((size_t)(send - s) < sizeof(char *))
02046                     break;
02047                 else {
02048                     VALUE tmp = Qnil;
02049                     char *t;
02050 
02051                     memcpy(&t, s, sizeof(char *));
02052                     s += sizeof(char *);
02053 
02054                     if (t) {
02055                         VALUE a, *p, *pend;
02056 
02057                         if (!(a = rb_str_associated(str))) {
02058                             rb_raise(rb_eArgError, "no associated pointer");
02059                         }
02060                         p = RARRAY_PTR(a);
02061                         pend = p + RARRAY_LEN(a);
02062                         while (p < pend) {
02063                             if (TYPE(*p) == T_STRING && RSTRING_PTR(*p) == t) {
02064                                 tmp = *p;
02065                                 break;
02066                             }
02067                             p++;
02068                         }
02069                         if (p == pend) {
02070                             rb_raise(rb_eArgError, "non associated pointer");
02071                         }
02072                     }
02073                     UNPACK_PUSH(tmp);
02074                 }
02075             }
02076             break;
02077 
02078           case 'w':
02079             {
02080                 unsigned long ul = 0;
02081                 unsigned long ulmask = 0xfeUL << ((sizeof(unsigned long) - 1) * 8);
02082 
02083                 while (len > 0 && s < send) {
02084                     ul <<= 7;
02085                     ul |= (*s & 0x7f);
02086                     if (!(*s++ & 0x80)) {
02087                         UNPACK_PUSH(ULONG2NUM(ul));
02088                         len--;
02089                         ul = 0;
02090                     }
02091                     else if (ul & ulmask) {
02092                         VALUE big = rb_uint2big(ul);
02093                         VALUE big128 = rb_uint2big(128);
02094                         while (s < send) {
02095                             big = rb_big_mul(big, big128);
02096                             big = rb_big_plus(big, rb_uint2big(*s & 0x7f));
02097                             if (!(*s++ & 0x80)) {
02098                                 UNPACK_PUSH(big);
02099                                 len--;
02100                                 ul = 0;
02101                                 break;
02102                             }
02103                         }
02104                     }
02105                 }
02106             }
02107             break;
02108 
02109           default:
02110             break;
02111         }
02112     }
02113 
02114     return ary;
02115 }
02116 
02117 #define BYTEWIDTH 8
02118 
02119 int
02120 rb_uv_to_utf8(char buf[6], unsigned long uv)
02121 {
02122     if (uv <= 0x7f) {
02123         buf[0] = (char)uv;
02124         return 1;
02125     }
02126     if (uv <= 0x7ff) {
02127         buf[0] = (char)((uv>>6)&0xff)|0xc0;
02128         buf[1] = (char)(uv&0x3f)|0x80;
02129         return 2;
02130     }
02131     if (uv <= 0xffff) {
02132         buf[0] = (char)((uv>>12)&0xff)|0xe0;
02133         buf[1] = (char)((uv>>6)&0x3f)|0x80;
02134         buf[2] = (char)(uv&0x3f)|0x80;
02135         return 3;
02136     }
02137     if (uv <= 0x1fffff) {
02138         buf[0] = (char)((uv>>18)&0xff)|0xf0;
02139         buf[1] = (char)((uv>>12)&0x3f)|0x80;
02140         buf[2] = (char)((uv>>6)&0x3f)|0x80;
02141         buf[3] = (char)(uv&0x3f)|0x80;
02142         return 4;
02143     }
02144     if (uv <= 0x3ffffff) {
02145         buf[0] = (char)((uv>>24)&0xff)|0xf8;
02146         buf[1] = (char)((uv>>18)&0x3f)|0x80;
02147         buf[2] = (char)((uv>>12)&0x3f)|0x80;
02148         buf[3] = (char)((uv>>6)&0x3f)|0x80;
02149         buf[4] = (char)(uv&0x3f)|0x80;
02150         return 5;
02151     }
02152     if (uv <= 0x7fffffff) {
02153         buf[0] = (char)((uv>>30)&0xff)|0xfc;
02154         buf[1] = (char)((uv>>24)&0x3f)|0x80;
02155         buf[2] = (char)((uv>>18)&0x3f)|0x80;
02156         buf[3] = (char)((uv>>12)&0x3f)|0x80;
02157         buf[4] = (char)((uv>>6)&0x3f)|0x80;
02158         buf[5] = (char)(uv&0x3f)|0x80;
02159         return 6;
02160     }
02161     rb_raise(rb_eRangeError, "pack(U): value out of range");
02162 }
02163 
02164 static const unsigned long utf8_limits[] = {
02165     0x0,                        /* 1 */
02166     0x80,                       /* 2 */
02167     0x800,                      /* 3 */
02168     0x10000,                    /* 4 */
02169     0x200000,                   /* 5 */
02170     0x4000000,                  /* 6 */
02171     0x80000000,                 /* 7 */
02172 };
02173 
02174 static unsigned long
02175 utf8_to_uv(const char *p, long *lenp)
02176 {
02177     int c = *p++ & 0xff;
02178     unsigned long uv = c;
02179     long n;
02180 
02181     if (!(uv & 0x80)) {
02182         *lenp = 1;
02183         return uv;
02184     }
02185     if (!(uv & 0x40)) {
02186         *lenp = 1;
02187         rb_raise(rb_eArgError, "malformed UTF-8 character");
02188     }
02189 
02190     if      (!(uv & 0x20)) { n = 2; uv &= 0x1f; }
02191     else if (!(uv & 0x10)) { n = 3; uv &= 0x0f; }
02192     else if (!(uv & 0x08)) { n = 4; uv &= 0x07; }
02193     else if (!(uv & 0x04)) { n = 5; uv &= 0x03; }
02194     else if (!(uv & 0x02)) { n = 6; uv &= 0x01; }
02195     else {
02196         *lenp = 1;
02197         rb_raise(rb_eArgError, "malformed UTF-8 character");
02198     }
02199     if (n > *lenp) {
02200         rb_raise(rb_eArgError, "malformed UTF-8 character (expected %ld bytes, given %ld bytes)",
02201                  n, *lenp);
02202     }
02203     *lenp = n--;
02204     if (n != 0) {
02205         while (n--) {
02206             c = *p++ & 0xff;
02207             if ((c & 0xc0) != 0x80) {
02208                 *lenp -= n + 1;
02209                 rb_raise(rb_eArgError, "malformed UTF-8 character");
02210             }
02211             else {
02212                 c &= 0x3f;
02213                 uv = uv << 6 | c;
02214             }
02215         }
02216     }
02217     n = *lenp - 1;
02218     if (uv < utf8_limits[n]) {
02219         rb_raise(rb_eArgError, "redundant UTF-8 sequence");
02220     }
02221     return uv;
02222 }
02223 
02224 void
02225 Init_pack(void)
02226 {
02227     rb_define_method(rb_cArray, "pack", pack_pack, 1);
02228     rb_define_method(rb_cString, "unpack", pack_unpack, 1);
02229 }
02230 

Generated on Wed Aug 10 09:17:10 2011 for Ruby by  doxygen 1.4.7