marshal.c

Go to the documentation of this file.
00001 /**********************************************************************
00002 
00003   marshal.c -
00004 
00005   $Author: nobu $
00006   created at: Thu Apr 27 16:30:01 JST 1995
00007 
00008   Copyright (C) 1993-2007 Yukihiro Matsumoto
00009 
00010 **********************************************************************/
00011 
00012 #include "ruby/ruby.h"
00013 #include "ruby/io.h"
00014 #include "ruby/st.h"
00015 #include "ruby/util.h"
00016 #include "ruby/encoding.h"
00017 
00018 #include <math.h>
00019 #ifdef HAVE_FLOAT_H
00020 #include <float.h>
00021 #endif
00022 #ifdef HAVE_IEEEFP_H
00023 #include <ieeefp.h>
00024 #endif
00025 
00026 #define BITSPERSHORT (2*CHAR_BIT)
00027 #define SHORTMASK ((1<<BITSPERSHORT)-1)
00028 #define SHORTDN(x) RSHIFT(x,BITSPERSHORT)
00029 
00030 #if SIZEOF_SHORT == SIZEOF_BDIGITS
00031 #define SHORTLEN(x) (x)
00032 #else
00033 static long
00034 shortlen(long len, BDIGIT *ds)
00035 {
00036     BDIGIT num;
00037     int offset = 0;
00038 
00039     num = ds[len-1];
00040     while (num) {
00041         num = SHORTDN(num);
00042         offset++;
00043     }
00044     return (len - 1)*sizeof(BDIGIT)/2 + offset;
00045 }
00046 #define SHORTLEN(x) shortlen((x),d)
00047 #endif
00048 
00049 #define MARSHAL_MAJOR   4
00050 #define MARSHAL_MINOR   8
00051 
00052 #define TYPE_NIL        '0'
00053 #define TYPE_TRUE       'T'
00054 #define TYPE_FALSE      'F'
00055 #define TYPE_FIXNUM     'i'
00056 
00057 #define TYPE_EXTENDED   'e'
00058 #define TYPE_UCLASS     'C'
00059 #define TYPE_OBJECT     'o'
00060 #define TYPE_DATA       'd'
00061 #define TYPE_USERDEF    'u'
00062 #define TYPE_USRMARSHAL 'U'
00063 #define TYPE_FLOAT      'f'
00064 #define TYPE_BIGNUM     'l'
00065 #define TYPE_STRING     '"'
00066 #define TYPE_REGEXP     '/'
00067 #define TYPE_ARRAY      '['
00068 #define TYPE_HASH       '{'
00069 #define TYPE_HASH_DEF   '}'
00070 #define TYPE_STRUCT     'S'
00071 #define TYPE_MODULE_OLD 'M'
00072 #define TYPE_CLASS      'c'
00073 #define TYPE_MODULE     'm'
00074 
00075 #define TYPE_SYMBOL     ':'
00076 #define TYPE_SYMLINK    ';'
00077 
00078 #define TYPE_IVAR       'I'
00079 #define TYPE_LINK       '@'
00080 
00081 static ID s_dump, s_load, s_mdump, s_mload;
00082 static ID s_dump_data, s_load_data, s_alloc, s_call;
00083 static ID s_getbyte, s_read, s_write, s_binmode;
00084 
00085 ID rb_id_encoding(void);
00086 
00087 typedef struct {
00088     VALUE newclass;
00089     VALUE oldclass;
00090     VALUE (*dumper)(VALUE);
00091     VALUE (*loader)(VALUE, VALUE);
00092 } marshal_compat_t;
00093 
00094 static st_table *compat_allocator_tbl;
00095 static VALUE compat_allocator_tbl_wrapper;
00096 
00097 static int
00098 mark_marshal_compat_i(st_data_t key, st_data_t value)
00099 {
00100     marshal_compat_t *p = (marshal_compat_t *)value;
00101     rb_gc_mark(p->newclass);
00102     rb_gc_mark(p->oldclass);
00103     return ST_CONTINUE;
00104 }
00105 
00106 static void
00107 mark_marshal_compat_t(void *tbl)
00108 {
00109     if (!tbl) return;
00110     st_foreach(tbl, mark_marshal_compat_i, 0);
00111 }
00112 
00113 void
00114 rb_marshal_define_compat(VALUE newclass, VALUE oldclass, VALUE (*dumper)(VALUE), VALUE (*loader)(VALUE, VALUE))
00115 {
00116     marshal_compat_t *compat;
00117     rb_alloc_func_t allocator = rb_get_alloc_func(newclass);
00118 
00119     if (!allocator) {
00120         rb_raise(rb_eTypeError, "no allocator");
00121     }
00122 
00123     compat = ALLOC(marshal_compat_t);
00124     compat->newclass = Qnil;
00125     compat->oldclass = Qnil;
00126     compat->newclass = newclass;
00127     compat->oldclass = oldclass;
00128     compat->dumper = dumper;
00129     compat->loader = loader;
00130 
00131     st_insert(compat_allocator_tbl, (st_data_t)allocator, (st_data_t)compat);
00132 }
00133 
00134 #define MARSHAL_INFECTION (FL_TAINT|FL_UNTRUSTED)
00135 typedef char ruby_check_marshal_viral_flags[MARSHAL_INFECTION == (int)MARSHAL_INFECTION ? 1 : -1];
00136 
00137 struct dump_arg {
00138     VALUE str, dest;
00139     st_table *symbols;
00140     st_table *data;
00141     st_table *compat_tbl;
00142     st_table *encodings;
00143     int infection;
00144 };
00145 
00146 struct dump_call_arg {
00147     VALUE obj;
00148     struct dump_arg *arg;
00149     int limit;
00150 };
00151 
00152 static void
00153 check_dump_arg(struct dump_arg *arg, ID sym)
00154 {
00155     if (!arg->symbols) {
00156         rb_raise(rb_eRuntimeError, "Marshal.dump reentered at %s",
00157                  rb_id2name(sym));
00158     }
00159 }
00160 
00161 static void clear_dump_arg(struct dump_arg *arg);
00162 
00163 static void
00164 mark_dump_arg(void *ptr)
00165 {
00166     struct dump_arg *p = ptr;
00167     if (!p->symbols)
00168         return;
00169     rb_mark_set(p->data);
00170     rb_mark_hash(p->compat_tbl);
00171     rb_gc_mark(p->str);
00172 }
00173 
00174 static void
00175 free_dump_arg(void *ptr)
00176 {
00177     clear_dump_arg(ptr);
00178     xfree(ptr);
00179 }
00180 
00181 static size_t
00182 memsize_dump_arg(const void *ptr)
00183 {
00184     return ptr ? sizeof(struct dump_arg) : 0;
00185 }
00186 
00187 static const rb_data_type_t dump_arg_data = {
00188     "dump_arg",
00189     mark_dump_arg, free_dump_arg, memsize_dump_arg
00190 };
00191 
00192 static const char *
00193 must_not_be_anonymous(const char *type, VALUE path)
00194 {
00195     char *n = RSTRING_PTR(path);
00196 
00197     if (!rb_enc_asciicompat(rb_enc_get(path))) {
00198         /* cannot occur? */
00199         rb_raise(rb_eTypeError, "can't dump non-ascii %s name", type);
00200     }
00201     if (n[0] == '#') {
00202         rb_raise(rb_eTypeError, "can't dump anonymous %s %.*s", type,
00203                  (int)RSTRING_LEN(path), n);
00204     }
00205     return n;
00206 }
00207 
00208 static VALUE
00209 class2path(VALUE klass)
00210 {
00211     VALUE path = rb_class_path(klass);
00212     const char *n;
00213 
00214     n = must_not_be_anonymous((TYPE(klass) == T_CLASS ? "class" : "module"), path);
00215     if (rb_path_to_class(path) != rb_class_real(klass)) {
00216         rb_raise(rb_eTypeError, "%s can't be referred to", n);
00217     }
00218     return path;
00219 }
00220 
00221 static void w_long(long, struct dump_arg*);
00222 static void w_encoding(VALUE obj, long num, struct dump_call_arg *arg);
00223 
00224 static void
00225 w_nbyte(const char *s, long n, struct dump_arg *arg)
00226 {
00227     VALUE buf = arg->str;
00228     rb_str_buf_cat(buf, s, n);
00229     RBASIC(buf)->flags |= arg->infection;
00230     if (arg->dest && RSTRING_LEN(buf) >= BUFSIZ) {
00231         rb_io_write(arg->dest, buf);
00232         rb_str_resize(buf, 0);
00233     }
00234 }
00235 
00236 static void
00237 w_byte(char c, struct dump_arg *arg)
00238 {
00239     w_nbyte(&c, 1, arg);
00240 }
00241 
00242 static void
00243 w_bytes(const char *s, long n, struct dump_arg *arg)
00244 {
00245     w_long(n, arg);
00246     w_nbyte(s, n, arg);
00247 }
00248 
00249 static void
00250 w_short(int x, struct dump_arg *arg)
00251 {
00252     w_byte((char)((x >> 0) & 0xff), arg);
00253     w_byte((char)((x >> 8) & 0xff), arg);
00254 }
00255 
00256 static void
00257 w_long(long x, struct dump_arg *arg)
00258 {
00259     char buf[sizeof(long)+1];
00260     int i, len = 0;
00261 
00262 #if SIZEOF_LONG > 4
00263     if (!(RSHIFT(x, 31) == 0 || RSHIFT(x, 31) == -1)) {
00264         /* big long does not fit in 4 bytes */
00265         rb_raise(rb_eTypeError, "long too big to dump");
00266     }
00267 #endif
00268 
00269     if (x == 0) {
00270         w_byte(0, arg);
00271         return;
00272     }
00273     if (0 < x && x < 123) {
00274         w_byte((char)(x + 5), arg);
00275         return;
00276     }
00277     if (-124 < x && x < 0) {
00278         w_byte((char)((x - 5)&0xff), arg);
00279         return;
00280     }
00281     for (i=1;i<(int)sizeof(long)+1;i++) {
00282         buf[i] = (char)(x & 0xff);
00283         x = RSHIFT(x,8);
00284         if (x == 0) {
00285             buf[0] = i;
00286             break;
00287         }
00288         if (x == -1) {
00289             buf[0] = -i;
00290             break;
00291         }
00292     }
00293     len = i;
00294     for (i=0;i<=len;i++) {
00295         w_byte(buf[i], arg);
00296     }
00297 }
00298 
00299 #ifdef DBL_MANT_DIG
00300 #define DECIMAL_MANT (53-16)    /* from IEEE754 double precision */
00301 
00302 #if DBL_MANT_DIG > 32
00303 #define MANT_BITS 32
00304 #elif DBL_MANT_DIG > 24
00305 #define MANT_BITS 24
00306 #elif DBL_MANT_DIG > 16
00307 #define MANT_BITS 16
00308 #else
00309 #define MANT_BITS 8
00310 #endif
00311 
00312 static int
00313 save_mantissa(double d, char *buf)
00314 {
00315     int e, i = 0;
00316     unsigned long m;
00317     double n;
00318 
00319     d = modf(ldexp(frexp(fabs(d), &e), DECIMAL_MANT), &d);
00320     if (d > 0) {
00321         buf[i++] = 0;
00322         do {
00323             d = modf(ldexp(d, MANT_BITS), &n);
00324             m = (unsigned long)n;
00325 #if MANT_BITS > 24
00326             buf[i++] = (char)(m >> 24);
00327 #endif
00328 #if MANT_BITS > 16
00329             buf[i++] = (char)(m >> 16);
00330 #endif
00331 #if MANT_BITS > 8
00332             buf[i++] = (char)(m >> 8);
00333 #endif
00334             buf[i++] = (char)m;
00335         } while (d > 0);
00336         while (!buf[i - 1]) --i;
00337     }
00338     return i;
00339 }
00340 
00341 static double
00342 load_mantissa(double d, const char *buf, long len)
00343 {
00344     if (!len) return d;
00345     if (--len > 0 && !*buf++) { /* binary mantissa mark */
00346         int e, s = d < 0, dig = 0;
00347         unsigned long m;
00348 
00349         modf(ldexp(frexp(fabs(d), &e), DECIMAL_MANT), &d);
00350         do {
00351             m = 0;
00352             switch (len) {
00353               default: m = *buf++ & 0xff;
00354 #if MANT_BITS > 24
00355               case 3: m = (m << 8) | (*buf++ & 0xff);
00356 #endif
00357 #if MANT_BITS > 16
00358               case 2: m = (m << 8) | (*buf++ & 0xff);
00359 #endif
00360 #if MANT_BITS > 8
00361               case 1: m = (m << 8) | (*buf++ & 0xff);
00362 #endif
00363             }
00364             dig -= len < MANT_BITS / 8 ? 8 * (unsigned)len : MANT_BITS;
00365             d += ldexp((double)m, dig);
00366         } while ((len -= MANT_BITS / 8) > 0);
00367         d = ldexp(d, e - DECIMAL_MANT);
00368         if (s) d = -d;
00369     }
00370     return d;
00371 }
00372 #else
00373 #define load_mantissa(d, buf, len) (d)
00374 #define save_mantissa(d, buf) 0
00375 #endif
00376 
00377 #ifdef DBL_DIG
00378 #define FLOAT_DIG (DBL_DIG+2)
00379 #else
00380 #define FLOAT_DIG 17
00381 #endif
00382 
00383 static void
00384 w_float(double d, struct dump_arg *arg)
00385 {
00386     char buf[FLOAT_DIG + (DECIMAL_MANT + 7) / 8 + 10];
00387 
00388     if (isinf(d)) {
00389         if (d < 0) strcpy(buf, "-inf");
00390         else       strcpy(buf, "inf");
00391     }
00392     else if (isnan(d)) {
00393         strcpy(buf, "nan");
00394     }
00395     else if (d == 0.0) {
00396         if (1.0/d < 0) strcpy(buf, "-0");
00397         else           strcpy(buf, "0");
00398     }
00399     else {
00400         size_t len;
00401 
00402         /* xxx: should not use system's sprintf(3) */
00403         snprintf(buf, sizeof(buf), "%.*g", FLOAT_DIG, d);
00404         len = strlen(buf);
00405         w_bytes(buf, len + save_mantissa(d, buf + len), arg);
00406         return;
00407     }
00408     w_bytes(buf, strlen(buf), arg);
00409 }
00410 
00411 static void
00412 w_symbol(ID id, struct dump_arg *arg)
00413 {
00414     VALUE sym;
00415     st_data_t num;
00416     int encidx = -1;
00417 
00418     if (st_lookup(arg->symbols, id, &num)) {
00419         w_byte(TYPE_SYMLINK, arg);
00420         w_long((long)num, arg);
00421     }
00422     else {
00423         sym = rb_id2str(id);
00424         if (!sym) {
00425             rb_raise(rb_eTypeError, "can't dump anonymous ID %ld", id);
00426         }
00427         encidx = rb_enc_get_index(sym);
00428         if (encidx == rb_usascii_encindex() ||
00429             rb_enc_str_coderange(sym) == ENC_CODERANGE_7BIT) {
00430             encidx = -1;
00431         }
00432         else {
00433             w_byte(TYPE_IVAR, arg);
00434         }
00435         w_byte(TYPE_SYMBOL, arg);
00436         w_bytes(RSTRING_PTR(sym), RSTRING_LEN(sym), arg);
00437         st_add_direct(arg->symbols, id, arg->symbols->num_entries);
00438         if (encidx != -1) {
00439             struct dump_call_arg c_arg;
00440             c_arg.limit = 1;
00441             c_arg.arg = arg;
00442             w_encoding(sym, 0, &c_arg);
00443         }
00444     }
00445 }
00446 
00447 static void
00448 w_unique(VALUE s, struct dump_arg *arg)
00449 {
00450     must_not_be_anonymous("class", s);
00451     w_symbol(rb_intern_str(s), arg);
00452 }
00453 
00454 static void w_object(VALUE,struct dump_arg*,int);
00455 
00456 static int
00457 hash_each(VALUE key, VALUE value, struct dump_call_arg *arg)
00458 {
00459     w_object(key, arg->arg, arg->limit);
00460     w_object(value, arg->arg, arg->limit);
00461     return ST_CONTINUE;
00462 }
00463 
00464 static void
00465 w_extended(VALUE klass, struct dump_arg *arg, int check)
00466 {
00467     if (check && FL_TEST(klass, FL_SINGLETON)) {
00468         if (RCLASS_M_TBL(klass)->num_entries ||
00469             (RCLASS_IV_TBL(klass) && RCLASS_IV_TBL(klass)->num_entries > 1)) {
00470             rb_raise(rb_eTypeError, "singleton can't be dumped");
00471         }
00472         klass = RCLASS_SUPER(klass);
00473     }
00474     while (BUILTIN_TYPE(klass) == T_ICLASS) {
00475         VALUE path = rb_class_name(RBASIC(klass)->klass);
00476         w_byte(TYPE_EXTENDED, arg);
00477         w_unique(path, arg);
00478         klass = RCLASS_SUPER(klass);
00479     }
00480 }
00481 
00482 static void
00483 w_class(char type, VALUE obj, struct dump_arg *arg, int check)
00484 {
00485     VALUE path;
00486     st_data_t real_obj;
00487     VALUE klass;
00488 
00489     if (st_lookup(arg->compat_tbl, (st_data_t)obj, &real_obj)) {
00490         obj = (VALUE)real_obj;
00491     }
00492     klass = CLASS_OF(obj);
00493     w_extended(klass, arg, check);
00494     w_byte(type, arg);
00495     path = class2path(rb_class_real(klass));
00496     w_unique(path, arg);
00497 }
00498 
00499 static void
00500 w_uclass(VALUE obj, VALUE super, struct dump_arg *arg)
00501 {
00502     VALUE klass = CLASS_OF(obj);
00503 
00504     w_extended(klass, arg, TRUE);
00505     klass = rb_class_real(klass);
00506     if (klass != super) {
00507         w_byte(TYPE_UCLASS, arg);
00508         w_unique(class2path(klass), arg);
00509     }
00510 }
00511 
00512 static int
00513 w_obj_each(ID id, VALUE value, struct dump_call_arg *arg)
00514 {
00515     if (id == rb_id_encoding()) return ST_CONTINUE;
00516     if (id == rb_intern("E")) return ST_CONTINUE;
00517     w_symbol(id, arg->arg);
00518     w_object(value, arg->arg, arg->limit);
00519     return ST_CONTINUE;
00520 }
00521 
00522 static void
00523 w_encoding(VALUE obj, long num, struct dump_call_arg *arg)
00524 {
00525     int encidx = rb_enc_get_index(obj);
00526     rb_encoding *enc = 0;
00527     st_data_t name;
00528 
00529     if (encidx <= 0 || !(enc = rb_enc_from_index(encidx))) {
00530         w_long(num, arg->arg);
00531         return;
00532     }
00533     w_long(num + 1, arg->arg);
00534 
00535     /* special treatment for US-ASCII and UTF-8 */
00536     if (encidx == rb_usascii_encindex()) {
00537         w_symbol(rb_intern("E"), arg->arg);
00538         w_object(Qfalse, arg->arg, arg->limit + 1);
00539         return;
00540     }
00541     else if (encidx == rb_utf8_encindex()) {
00542         w_symbol(rb_intern("E"), arg->arg);
00543         w_object(Qtrue, arg->arg, arg->limit + 1);
00544         return;
00545     }
00546 
00547     w_symbol(rb_id_encoding(), arg->arg);
00548     do {
00549         if (!arg->arg->encodings)
00550             arg->arg->encodings = st_init_strcasetable();
00551         else if (st_lookup(arg->arg->encodings, (st_data_t)rb_enc_name(enc), &name))
00552             break;
00553         name = (st_data_t)rb_str_new2(rb_enc_name(enc));
00554         st_insert(arg->arg->encodings, (st_data_t)rb_enc_name(enc), name);
00555     } while (0);
00556     w_object(name, arg->arg, arg->limit + 1);
00557 }
00558 
00559 static void
00560 w_ivar(VALUE obj, st_table *tbl, struct dump_call_arg *arg)
00561 {
00562     long num = tbl ? tbl->num_entries : 0;
00563 
00564     w_encoding(obj, num, arg);
00565     if (tbl) {
00566         st_foreach_safe(tbl, w_obj_each, (st_data_t)arg);
00567     }
00568 }
00569 
00570 static void
00571 w_objivar(VALUE obj, struct dump_call_arg *arg)
00572 {
00573     VALUE *ptr;
00574     long i, len, num;
00575 
00576     len = ROBJECT_NUMIV(obj);
00577     ptr = ROBJECT_IVPTR(obj);
00578     num = 0;
00579     for (i = 0; i < len; i++)
00580         if (ptr[i] != Qundef)
00581             num += 1;
00582 
00583     w_encoding(obj, num, arg);
00584     if (num != 0) {
00585         rb_ivar_foreach(obj, w_obj_each, (st_data_t)arg);
00586     }
00587 }
00588 
00589 static void
00590 w_object(VALUE obj, struct dump_arg *arg, int limit)
00591 {
00592     struct dump_call_arg c_arg;
00593     st_table *ivtbl = 0;
00594     st_data_t num;
00595     int hasiv = 0;
00596 #define has_ivars(obj, ivtbl) ((ivtbl = rb_generic_ivar_table(obj)) != 0 || \
00597                                (!SPECIAL_CONST_P(obj) && !ENCODING_IS_ASCII8BIT(obj)))
00598 
00599     if (limit == 0) {
00600         rb_raise(rb_eArgError, "exceed depth limit");
00601     }
00602 
00603     limit--;
00604     c_arg.limit = limit;
00605     c_arg.arg = arg;
00606 
00607     if (st_lookup(arg->data, obj, &num)) {
00608         w_byte(TYPE_LINK, arg);
00609         w_long((long)num, arg);
00610         return;
00611     }
00612 
00613     if ((hasiv = has_ivars(obj, ivtbl)) != 0) {
00614         w_byte(TYPE_IVAR, arg);
00615     }
00616     if (obj == Qnil) {
00617         w_byte(TYPE_NIL, arg);
00618     }
00619     else if (obj == Qtrue) {
00620         w_byte(TYPE_TRUE, arg);
00621     }
00622     else if (obj == Qfalse) {
00623         w_byte(TYPE_FALSE, arg);
00624     }
00625     else if (FIXNUM_P(obj)) {
00626 #if SIZEOF_LONG <= 4
00627         w_byte(TYPE_FIXNUM, arg);
00628         w_long(FIX2INT(obj), arg);
00629 #else
00630         if (RSHIFT((long)obj, 31) == 0 || RSHIFT((long)obj, 31) == -1) {
00631             w_byte(TYPE_FIXNUM, arg);
00632             w_long(FIX2LONG(obj), arg);
00633         }
00634         else {
00635             w_object(rb_int2big(FIX2LONG(obj)), arg, limit);
00636         }
00637 #endif
00638     }
00639     else if (SYMBOL_P(obj)) {
00640         w_symbol(SYM2ID(obj), arg);
00641     }
00642     else {
00643         arg->infection |= (int)FL_TEST(obj, MARSHAL_INFECTION);
00644 
00645         if (rb_respond_to(obj, s_mdump)) {
00646             volatile VALUE v;
00647 
00648             st_add_direct(arg->data, obj, arg->data->num_entries);
00649 
00650             v = rb_funcall(obj, s_mdump, 0, 0);
00651             check_dump_arg(arg, s_mdump);
00652             w_class(TYPE_USRMARSHAL, obj, arg, FALSE);
00653             w_object(v, arg, limit);
00654             if (hasiv) w_ivar(obj, ivtbl, &c_arg);
00655             return;
00656         }
00657         if (rb_respond_to(obj, s_dump)) {
00658             VALUE v;
00659             st_table *ivtbl2 = 0;
00660             int hasiv2;
00661 
00662             v = rb_funcall(obj, s_dump, 1, INT2NUM(limit));
00663             check_dump_arg(arg, s_dump);
00664             if (TYPE(v) != T_STRING) {
00665                 rb_raise(rb_eTypeError, "_dump() must return string");
00666             }
00667             if ((hasiv2 = has_ivars(v, ivtbl2)) != 0 && !hasiv) {
00668                 w_byte(TYPE_IVAR, arg);
00669             }
00670             w_class(TYPE_USERDEF, obj, arg, FALSE);
00671             w_bytes(RSTRING_PTR(v), RSTRING_LEN(v), arg);
00672             if (hasiv2) {
00673                 w_ivar(v, ivtbl2, &c_arg);
00674             }
00675             else if (hasiv) {
00676                 w_ivar(obj, ivtbl, &c_arg);
00677             }
00678             st_add_direct(arg->data, obj, arg->data->num_entries);
00679             return;
00680         }
00681 
00682         st_add_direct(arg->data, obj, arg->data->num_entries);
00683 
00684         {
00685             st_data_t compat_data;
00686             rb_alloc_func_t allocator = rb_get_alloc_func(RBASIC(obj)->klass);
00687             if (st_lookup(compat_allocator_tbl,
00688                           (st_data_t)allocator,
00689                           &compat_data)) {
00690                 marshal_compat_t *compat = (marshal_compat_t*)compat_data;
00691                 VALUE real_obj = obj;
00692                 obj = compat->dumper(real_obj);
00693                 st_insert(arg->compat_tbl, (st_data_t)obj, (st_data_t)real_obj);
00694             }
00695         }
00696 
00697         switch (BUILTIN_TYPE(obj)) {
00698           case T_CLASS:
00699             if (FL_TEST(obj, FL_SINGLETON)) {
00700                 rb_raise(rb_eTypeError, "singleton class can't be dumped");
00701             }
00702             w_byte(TYPE_CLASS, arg);
00703             {
00704                 volatile VALUE path = class2path(obj);
00705                 w_bytes(RSTRING_PTR(path), RSTRING_LEN(path), arg);
00706             }
00707             break;
00708 
00709           case T_MODULE:
00710             w_byte(TYPE_MODULE, arg);
00711             {
00712                 VALUE path = class2path(obj);
00713                 w_bytes(RSTRING_PTR(path), RSTRING_LEN(path), arg);
00714             }
00715             break;
00716 
00717           case T_FLOAT:
00718             w_byte(TYPE_FLOAT, arg);
00719             w_float(RFLOAT_VALUE(obj), arg);
00720             break;
00721 
00722           case T_BIGNUM:
00723             w_byte(TYPE_BIGNUM, arg);
00724             {
00725                 char sign = RBIGNUM_SIGN(obj) ? '+' : '-';
00726                 long len = RBIGNUM_LEN(obj);
00727                 BDIGIT *d = RBIGNUM_DIGITS(obj);
00728 
00729                 w_byte(sign, arg);
00730                 w_long(SHORTLEN(len), arg); /* w_short? */
00731                 while (len--) {
00732 #if SIZEOF_BDIGITS > SIZEOF_SHORT
00733                     BDIGIT num = *d;
00734                     int i;
00735 
00736                     for (i=0; i<SIZEOF_BDIGITS; i+=SIZEOF_SHORT) {
00737                         w_short(num & SHORTMASK, arg);
00738                         num = SHORTDN(num);
00739                         if (len == 0 && num == 0) break;
00740                     }
00741 #else
00742                     w_short(*d, arg);
00743 #endif
00744                     d++;
00745                 }
00746             }
00747             break;
00748 
00749           case T_STRING:
00750             w_uclass(obj, rb_cString, arg);
00751             w_byte(TYPE_STRING, arg);
00752             w_bytes(RSTRING_PTR(obj), RSTRING_LEN(obj), arg);
00753             break;
00754 
00755           case T_REGEXP:
00756             w_uclass(obj, rb_cRegexp, arg);
00757             w_byte(TYPE_REGEXP, arg);
00758             {
00759                 int opts = rb_reg_options(obj);
00760                 w_bytes(RREGEXP_SRC_PTR(obj), RREGEXP_SRC_LEN(obj), arg);
00761                 w_byte((char)opts, arg);
00762             }
00763             break;
00764 
00765           case T_ARRAY:
00766             w_uclass(obj, rb_cArray, arg);
00767             w_byte(TYPE_ARRAY, arg);
00768             {
00769                 long i, len = RARRAY_LEN(obj);
00770 
00771                 w_long(len, arg);
00772                 for (i=0; i<RARRAY_LEN(obj); i++) {
00773                     w_object(RARRAY_PTR(obj)[i], arg, limit);
00774                     if (len != RARRAY_LEN(obj)) {
00775                         rb_raise(rb_eRuntimeError, "array modified during dump");
00776                     }
00777                 }
00778             }
00779             break;
00780 
00781           case T_HASH:
00782             w_uclass(obj, rb_cHash, arg);
00783             if (NIL_P(RHASH_IFNONE(obj))) {
00784                 w_byte(TYPE_HASH, arg);
00785             }
00786             else if (FL_TEST(obj, FL_USER2)) {
00787                 /* FL_USER2 means HASH_PROC_DEFAULT (see hash.c) */
00788                 rb_raise(rb_eTypeError, "can't dump hash with default proc");
00789             }
00790             else {
00791                 w_byte(TYPE_HASH_DEF, arg);
00792             }
00793             w_long(RHASH_SIZE(obj), arg);
00794             rb_hash_foreach(obj, hash_each, (st_data_t)&c_arg);
00795             if (!NIL_P(RHASH_IFNONE(obj))) {
00796                 w_object(RHASH_IFNONE(obj), arg, limit);
00797             }
00798             break;
00799 
00800           case T_STRUCT:
00801             w_class(TYPE_STRUCT, obj, arg, TRUE);
00802             {
00803                 long len = RSTRUCT_LEN(obj);
00804                 VALUE mem;
00805                 long i;
00806 
00807                 w_long(len, arg);
00808                 mem = rb_struct_members(obj);
00809                 for (i=0; i<len; i++) {
00810                     w_symbol(SYM2ID(RARRAY_PTR(mem)[i]), arg);
00811                     w_object(RSTRUCT_PTR(obj)[i], arg, limit);
00812                 }
00813             }
00814             break;
00815 
00816           case T_OBJECT:
00817             w_class(TYPE_OBJECT, obj, arg, TRUE);
00818             w_objivar(obj, &c_arg);
00819             break;
00820 
00821           case T_DATA:
00822             {
00823                 VALUE v;
00824 
00825                 if (!rb_respond_to(obj, s_dump_data)) {
00826                     rb_raise(rb_eTypeError,
00827                              "no marshal_dump is defined for class %s",
00828                              rb_obj_classname(obj));
00829                 }
00830                 v = rb_funcall(obj, s_dump_data, 0);
00831                 check_dump_arg(arg, s_dump_data);
00832                 w_class(TYPE_DATA, obj, arg, TRUE);
00833                 w_object(v, arg, limit);
00834             }
00835             break;
00836 
00837           default:
00838             rb_raise(rb_eTypeError, "can't dump %s",
00839                      rb_obj_classname(obj));
00840             break;
00841         }
00842     }
00843     if (hasiv) {
00844         w_ivar(obj, ivtbl, &c_arg);
00845     }
00846 }
00847 
00848 static void
00849 clear_dump_arg(struct dump_arg *arg)
00850 {
00851     if (!arg->symbols) return;
00852     st_free_table(arg->symbols);
00853     arg->symbols = 0;
00854     st_free_table(arg->data);
00855     arg->data = 0;
00856     st_free_table(arg->compat_tbl);
00857     arg->compat_tbl = 0;
00858     if (arg->encodings) {
00859         st_free_table(arg->encodings);
00860         arg->encodings = 0;
00861     }
00862 }
00863 
00864 /*
00865  * call-seq:
00866  *      dump( obj [, anIO] , limit=--1 ) -> anIO
00867  *
00868  * Serializes obj and all descendant objects. If anIO is
00869  * specified, the serialized data will be written to it, otherwise the
00870  * data will be returned as a String. If limit is specified, the
00871  * traversal of subobjects will be limited to that depth. If limit is
00872  * negative, no checking of depth will be performed.
00873  *
00874  *     class Klass
00875  *       def initialize(str)
00876  *         @str = str
00877  *       end
00878  *       def sayHello
00879  *         @str
00880  *       end
00881  *     end
00882  *
00883  * (produces no output)
00884  *
00885  *     o = Klass.new("hello\n")
00886  *     data = Marshal.dump(o)
00887  *     obj = Marshal.load(data)
00888  *     obj.sayHello   #=> "hello\n"
00889  *
00890  * Marshal can't dump following objects:
00891  * * anonymous Class/Module.
00892  * * objects which related to its system (ex: Dir, File::Stat, IO, File, Socket
00893  *   and so on)
00894  * * an instance of MatchData, Data, Method, UnboundMethod, Proc, Thread,
00895  *   ThreadGroup, Continuation
00896  * * objects which defines singleton methods
00897  */
00898 static VALUE
00899 marshal_dump(int argc, VALUE *argv)
00900 {
00901     VALUE obj, port, a1, a2;
00902     int limit = -1;
00903     struct dump_arg *arg;
00904     volatile VALUE wrapper;
00905 
00906     port = Qnil;
00907     rb_scan_args(argc, argv, "12", &obj, &a1, &a2);
00908     if (argc == 3) {
00909         if (!NIL_P(a2)) limit = NUM2INT(a2);
00910         if (NIL_P(a1)) goto type_error;
00911         port = a1;
00912     }
00913     else if (argc == 2) {
00914         if (FIXNUM_P(a1)) limit = FIX2INT(a1);
00915         else if (NIL_P(a1)) goto type_error;
00916         else port = a1;
00917     }
00918     wrapper = TypedData_Make_Struct(rb_cData, struct dump_arg, &dump_arg_data, arg);
00919     arg->dest = 0;
00920     arg->symbols = st_init_numtable();
00921     arg->data    = st_init_numtable();
00922     arg->infection = 0;
00923     arg->compat_tbl = st_init_numtable();
00924     arg->encodings = 0;
00925     arg->str = rb_str_buf_new(0);
00926     if (!NIL_P(port)) {
00927         if (!rb_respond_to(port, s_write)) {
00928           type_error:
00929             rb_raise(rb_eTypeError, "instance of IO needed");
00930         }
00931         arg->dest = port;
00932         if (rb_respond_to(port, s_binmode)) {
00933             rb_funcall2(port, s_binmode, 0, 0);
00934             check_dump_arg(arg, s_binmode);
00935         }
00936     }
00937     else {
00938         port = arg->str;
00939     }
00940 
00941     w_byte(MARSHAL_MAJOR, arg);
00942     w_byte(MARSHAL_MINOR, arg);
00943 
00944     w_object(obj, arg, limit);
00945     if (arg->dest) {
00946         rb_io_write(arg->dest, arg->str);
00947         rb_str_resize(arg->str, 0);
00948     }
00949     clear_dump_arg(arg);
00950     RB_GC_GUARD(wrapper);
00951 
00952     return port;
00953 }
00954 
00955 struct load_arg {
00956     VALUE src;
00957     long offset;
00958     st_table *symbols;
00959     st_table *data;
00960     VALUE proc;
00961     st_table *compat_tbl;
00962     int infection;
00963 };
00964 
00965 static void
00966 check_load_arg(struct load_arg *arg, ID sym)
00967 {
00968     if (!arg->symbols) {
00969         rb_raise(rb_eRuntimeError, "Marshal.load reentered at %s",
00970                  rb_id2name(sym));
00971     }
00972 }
00973 
00974 static void clear_load_arg(struct load_arg *arg);
00975 
00976 static void
00977 mark_load_arg(void *ptr)
00978 {
00979     struct load_arg *p = ptr;
00980     if (!p->symbols)
00981         return;
00982     rb_mark_tbl(p->data);
00983     rb_mark_hash(p->compat_tbl);
00984 }
00985 
00986 static void
00987 free_load_arg(void *ptr)
00988 {
00989     clear_load_arg(ptr);
00990     xfree(ptr);
00991 }
00992 
00993 static size_t
00994 memsize_load_arg(const void *ptr)
00995 {
00996     return ptr ? sizeof(struct load_arg) : 0;
00997 }
00998 
00999 static const rb_data_type_t load_arg_data = {
01000     "load_arg",
01001     mark_load_arg, free_load_arg, memsize_load_arg
01002 };
01003 
01004 #define r_entry(v, arg) r_entry0(v, (arg)->data->num_entries, arg)
01005 static VALUE r_entry0(VALUE v, st_index_t num, struct load_arg *arg);
01006 static VALUE r_object(struct load_arg *arg);
01007 static ID r_symbol(struct load_arg *arg);
01008 static VALUE path2class(VALUE path);
01009 
01010 static st_index_t
01011 r_prepare(struct load_arg *arg)
01012 {
01013     st_index_t idx = arg->data->num_entries;
01014 
01015     st_insert(arg->data, (st_data_t)idx, (st_data_t)Qundef);
01016     return idx;
01017 }
01018 
01019 static int
01020 r_byte(struct load_arg *arg)
01021 {
01022     int c;
01023 
01024     if (TYPE(arg->src) == T_STRING) {
01025         if (RSTRING_LEN(arg->src) > arg->offset) {
01026             c = (unsigned char)RSTRING_PTR(arg->src)[arg->offset++];
01027         }
01028         else {
01029             rb_raise(rb_eArgError, "marshal data too short");
01030         }
01031     }
01032     else {
01033         VALUE src = arg->src;
01034         VALUE v = rb_funcall2(src, s_getbyte, 0, 0);
01035         check_load_arg(arg, s_getbyte);
01036         if (NIL_P(v)) rb_eof_error();
01037         c = (unsigned char)NUM2CHR(v);
01038     }
01039     return c;
01040 }
01041 
01042 static void
01043 long_toobig(int size)
01044 {
01045     rb_raise(rb_eTypeError, "long too big for this architecture (size "
01046              STRINGIZE(SIZEOF_LONG)", given %d)", size);
01047 }
01048 
01049 #undef SIGN_EXTEND_CHAR
01050 #if __STDC__
01051 # define SIGN_EXTEND_CHAR(c) ((signed char)(c))
01052 #else  /* not __STDC__ */
01053 /* As in Harbison and Steele.  */
01054 # define SIGN_EXTEND_CHAR(c) ((((unsigned char)(c)) ^ 128) - 128)
01055 #endif
01056 
01057 static long
01058 r_long(struct load_arg *arg)
01059 {
01060     register long x;
01061     int c = SIGN_EXTEND_CHAR(r_byte(arg));
01062     long i;
01063 
01064     if (c == 0) return 0;
01065     if (c > 0) {
01066         if (4 < c && c < 128) {
01067             return c - 5;
01068         }
01069         if (c > (int)sizeof(long)) long_toobig(c);
01070         x = 0;
01071         for (i=0;i<c;i++) {
01072             x |= (long)r_byte(arg) << (8*i);
01073         }
01074     }
01075     else {
01076         if (-129 < c && c < -4) {
01077             return c + 5;
01078         }
01079         c = -c;
01080         if (c > (int)sizeof(long)) long_toobig(c);
01081         x = -1;
01082         for (i=0;i<c;i++) {
01083             x &= ~((long)0xff << (8*i));
01084             x |= (long)r_byte(arg) << (8*i);
01085         }
01086     }
01087     return x;
01088 }
01089 
01090 #define r_bytes(arg) r_bytes0(r_long(arg), (arg))
01091 
01092 static VALUE
01093 r_bytes0(long len, struct load_arg *arg)
01094 {
01095     VALUE str;
01096 
01097     if (len == 0) return rb_str_new(0, 0);
01098     if (TYPE(arg->src) == T_STRING) {
01099         if (RSTRING_LEN(arg->src) - arg->offset >= len) {
01100             str = rb_str_new(RSTRING_PTR(arg->src)+arg->offset, len);
01101             arg->offset += len;
01102         }
01103         else {
01104           too_short:
01105             rb_raise(rb_eArgError, "marshal data too short");
01106         }
01107     }
01108     else {
01109         VALUE src = arg->src;
01110         VALUE n = LONG2NUM(len);
01111         str = rb_funcall2(src, s_read, 1, &n);
01112         check_load_arg(arg, s_read);
01113         if (NIL_P(str)) goto too_short;
01114         StringValue(str);
01115         if (RSTRING_LEN(str) != len) goto too_short;
01116         arg->infection |= (int)FL_TEST(str, MARSHAL_INFECTION);
01117     }
01118     return str;
01119 }
01120 
01121 static int
01122 id2encidx(ID id, VALUE val)
01123 {
01124     if (id == rb_id_encoding()) {
01125         int idx = rb_enc_find_index(StringValueCStr(val));
01126         return idx;
01127     }
01128     else if (id == rb_intern("E")) {
01129         if (val == Qfalse) return rb_usascii_encindex();
01130         else if (val == Qtrue) return rb_utf8_encindex();
01131         /* bogus ignore */
01132     }
01133     return -1;
01134 }
01135 
01136 static ID
01137 r_symlink(struct load_arg *arg)
01138 {
01139     ID id;
01140     long num = r_long(arg);
01141 
01142     if (st_lookup(arg->symbols, num, &id)) {
01143         return id;
01144     }
01145     rb_raise(rb_eArgError, "bad symbol");
01146 }
01147 
01148 static ID
01149 r_symreal(struct load_arg *arg, int ivar)
01150 {
01151     volatile VALUE s = r_bytes(arg);
01152     ID id;
01153     int idx = -1;
01154     st_index_t n = arg->symbols->num_entries;
01155 
01156     st_insert(arg->symbols, (st_data_t)n, (st_data_t)0);
01157     if (ivar) {
01158         long num = r_long(arg);
01159         while (num-- > 0) {
01160             id = r_symbol(arg);
01161             idx = id2encidx(id, r_object(arg));
01162         }
01163     }
01164     if (idx < 0) idx = rb_usascii_encindex();
01165     rb_enc_associate_index(s, idx);
01166     id = rb_intern_str(s);
01167     st_insert(arg->symbols, (st_data_t)n, (st_data_t)id);
01168 
01169     return id;
01170 }
01171 
01172 static ID
01173 r_symbol(struct load_arg *arg)
01174 {
01175     int type, ivar = 0;
01176 
01177   again:
01178     switch ((type = r_byte(arg))) {
01179       case TYPE_IVAR:
01180         ivar = 1;
01181         goto again;
01182       case TYPE_SYMBOL:
01183         return r_symreal(arg, ivar);
01184       case TYPE_SYMLINK:
01185         if (ivar) {
01186             rb_raise(rb_eArgError, "dump format error (symlink with encoding)");
01187         }
01188         return r_symlink(arg);
01189       default:
01190         rb_raise(rb_eArgError, "dump format error for symbol(0x%x)", type);
01191         break;
01192     }
01193 }
01194 
01195 static VALUE
01196 r_unique(struct load_arg *arg)
01197 {
01198     return rb_id2str(r_symbol(arg));
01199 }
01200 
01201 static VALUE
01202 r_string(struct load_arg *arg)
01203 {
01204     return r_bytes(arg);
01205 }
01206 
01207 static VALUE
01208 r_entry0(VALUE v, st_index_t num, struct load_arg *arg)
01209 {
01210     st_data_t real_obj = (VALUE)Qundef;
01211     if (st_lookup(arg->compat_tbl, v, &real_obj)) {
01212         st_insert(arg->data, num, (st_data_t)real_obj);
01213     }
01214     else {
01215         st_insert(arg->data, num, (st_data_t)v);
01216     }
01217     if (arg->infection) {
01218         FL_SET(v, arg->infection);
01219         if ((VALUE)real_obj != Qundef)
01220             FL_SET((VALUE)real_obj, arg->infection);
01221     }
01222     return v;
01223 }
01224 
01225 static VALUE
01226 r_leave(VALUE v, struct load_arg *arg)
01227 {
01228     st_data_t data;
01229     if (st_lookup(arg->compat_tbl, v, &data)) {
01230         VALUE real_obj = (VALUE)data;
01231         rb_alloc_func_t allocator = rb_get_alloc_func(CLASS_OF(real_obj));
01232         st_data_t key = v;
01233         if (st_lookup(compat_allocator_tbl, (st_data_t)allocator, &data)) {
01234             marshal_compat_t *compat = (marshal_compat_t*)data;
01235             compat->loader(real_obj, v);
01236         }
01237         st_delete(arg->compat_tbl, &key, 0);
01238         v = real_obj;
01239     }
01240     if (arg->proc) {
01241         v = rb_funcall(arg->proc, s_call, 1, v);
01242         check_load_arg(arg, s_call);
01243     }
01244     return v;
01245 }
01246 
01247 static void
01248 r_ivar(VALUE obj, int *has_encoding, struct load_arg *arg)
01249 {
01250     long len;
01251 
01252     len = r_long(arg);
01253     if (len > 0) {
01254         do {
01255             ID id = r_symbol(arg);
01256             VALUE val = r_object(arg);
01257             int idx = id2encidx(id, val);
01258             if (idx >= 0) {
01259                 rb_enc_associate_index(obj, idx);
01260                 if (has_encoding) *has_encoding = TRUE;
01261             }
01262             else {
01263                 rb_ivar_set(obj, id, val);
01264             }
01265         } while (--len > 0);
01266     }
01267 }
01268 
01269 static VALUE
01270 path2class(VALUE path)
01271 {
01272     VALUE v = rb_path_to_class(path);
01273 
01274     if (TYPE(v) != T_CLASS) {
01275         rb_raise(rb_eArgError, "%.*s does not refer to class",
01276                  (int)RSTRING_LEN(path), RSTRING_PTR(path));
01277     }
01278     return v;
01279 }
01280 
01281 static VALUE
01282 path2module(VALUE path)
01283 {
01284     VALUE v = rb_path_to_class(path);
01285 
01286     if (TYPE(v) != T_MODULE) {
01287         rb_raise(rb_eArgError, "%.*s does not refer to module",
01288                  (int)RSTRING_LEN(path), RSTRING_PTR(path));
01289     }
01290     return v;
01291 }
01292 
01293 static VALUE
01294 obj_alloc_by_path(VALUE path, struct load_arg *arg)
01295 {
01296     VALUE klass;
01297     st_data_t data;
01298     rb_alloc_func_t allocator;
01299 
01300     klass = path2class(path);
01301 
01302     allocator = rb_get_alloc_func(klass);
01303     if (st_lookup(compat_allocator_tbl, (st_data_t)allocator, &data)) {
01304         marshal_compat_t *compat = (marshal_compat_t*)data;
01305         VALUE real_obj = rb_obj_alloc(klass);
01306         VALUE obj = rb_obj_alloc(compat->oldclass);
01307         st_insert(arg->compat_tbl, (st_data_t)obj, (st_data_t)real_obj);
01308         return obj;
01309     }
01310 
01311     return rb_obj_alloc(klass);
01312 }
01313 
01314 static VALUE
01315 r_object0(struct load_arg *arg, int *ivp, VALUE extmod)
01316 {
01317     VALUE v = Qnil;
01318     int type = r_byte(arg);
01319     long id;
01320     st_data_t link;
01321 
01322     switch (type) {
01323       case TYPE_LINK:
01324         id = r_long(arg);
01325         if (!st_lookup(arg->data, (st_data_t)id, &link)) {
01326             rb_raise(rb_eArgError, "dump format error (unlinked)");
01327         }
01328         v = (VALUE)link;
01329         if (arg->proc) {
01330             v = rb_funcall(arg->proc, s_call, 1, v);
01331             check_load_arg(arg, s_call);
01332         }
01333         break;
01334 
01335       case TYPE_IVAR:
01336         {
01337             int ivar = TRUE;
01338 
01339             v = r_object0(arg, &ivar, extmod);
01340             if (ivar) r_ivar(v, NULL, arg);
01341         }
01342         break;
01343 
01344       case TYPE_EXTENDED:
01345         {
01346             VALUE m = path2module(r_unique(arg));
01347 
01348             if (NIL_P(extmod)) extmod = rb_ary_new2(0);
01349             rb_ary_push(extmod, m);
01350 
01351             v = r_object0(arg, 0, extmod);
01352             while (RARRAY_LEN(extmod) > 0) {
01353                 m = rb_ary_pop(extmod);
01354                 rb_extend_object(v, m);
01355             }
01356         }
01357         break;
01358 
01359       case TYPE_UCLASS:
01360         {
01361             VALUE c = path2class(r_unique(arg));
01362 
01363             if (FL_TEST(c, FL_SINGLETON)) {
01364                 rb_raise(rb_eTypeError, "singleton can't be loaded");
01365             }
01366             v = r_object0(arg, 0, extmod);
01367             if (rb_special_const_p(v) || TYPE(v) == T_OBJECT || TYPE(v) == T_CLASS) {
01368               format_error:
01369                 rb_raise(rb_eArgError, "dump format error (user class)");
01370             }
01371             if (TYPE(v) == T_MODULE || !RTEST(rb_class_inherited_p(c, RBASIC(v)->klass))) {
01372                 VALUE tmp = rb_obj_alloc(c);
01373 
01374                 if (TYPE(v) != TYPE(tmp)) goto format_error;
01375             }
01376             RBASIC(v)->klass = c;
01377         }
01378         break;
01379 
01380       case TYPE_NIL:
01381         v = Qnil;
01382         v = r_leave(v, arg);
01383         break;
01384 
01385       case TYPE_TRUE:
01386         v = Qtrue;
01387         v = r_leave(v, arg);
01388         break;
01389 
01390       case TYPE_FALSE:
01391         v = Qfalse;
01392         v = r_leave(v, arg);
01393         break;
01394 
01395       case TYPE_FIXNUM:
01396         {
01397             long i = r_long(arg);
01398             v = LONG2FIX(i);
01399         }
01400         v = r_leave(v, arg);
01401         break;
01402 
01403       case TYPE_FLOAT:
01404         {
01405             double d;
01406             VALUE str = r_bytes(arg);
01407             const char *ptr = RSTRING_PTR(str);
01408 
01409             if (strcmp(ptr, "nan") == 0) {
01410                 d = NAN;
01411             }
01412             else if (strcmp(ptr, "inf") == 0) {
01413                 d = INFINITY;
01414             }
01415             else if (strcmp(ptr, "-inf") == 0) {
01416                 d = -INFINITY;
01417             }
01418             else {
01419                 char *e;
01420                 d = strtod(ptr, &e);
01421                 d = load_mantissa(d, e, RSTRING_LEN(str) - (e - ptr));
01422             }
01423             v = DBL2NUM(d);
01424             v = r_entry(v, arg);
01425             v = r_leave(v, arg);
01426         }
01427         break;
01428 
01429       case TYPE_BIGNUM:
01430         {
01431             long len;
01432             BDIGIT *digits;
01433             volatile VALUE data;
01434 
01435             NEWOBJ(big, struct RBignum);
01436             OBJSETUP(big, rb_cBignum, T_BIGNUM);
01437             RBIGNUM_SET_SIGN(big, (r_byte(arg) == '+'));
01438             len = r_long(arg);
01439             data = r_bytes0(len * 2, arg);
01440 #if SIZEOF_BDIGITS == SIZEOF_SHORT
01441             rb_big_resize((VALUE)big, len);
01442 #else
01443             rb_big_resize((VALUE)big, (len + 1) * 2 / sizeof(BDIGIT));
01444 #endif
01445             digits = RBIGNUM_DIGITS(big);
01446             MEMCPY(digits, RSTRING_PTR(data), char, len * 2);
01447 #if SIZEOF_BDIGITS > SIZEOF_SHORT
01448             MEMZERO((char *)digits + len * 2, char,
01449                     RBIGNUM_LEN(big) * sizeof(BDIGIT) - len * 2);
01450 #endif
01451             len = RBIGNUM_LEN(big);
01452             while (len > 0) {
01453                 unsigned char *p = (unsigned char *)digits;
01454                 BDIGIT num = 0;
01455 #if SIZEOF_BDIGITS > SIZEOF_SHORT
01456                 int shift = 0;
01457                 int i;
01458 
01459                 for (i=0; i<SIZEOF_BDIGITS; i++) {
01460                     num |= (int)p[i] << shift;
01461                     shift += 8;
01462                 }
01463 #else
01464                 num = p[0] | (p[1] << 8);
01465 #endif
01466                 *digits++ = num;
01467                 len--;
01468             }
01469             v = rb_big_norm((VALUE)big);
01470             v = r_entry(v, arg);
01471             v = r_leave(v, arg);
01472         }
01473         break;
01474 
01475       case TYPE_STRING:
01476         v = r_entry(r_string(arg), arg);
01477         v = r_leave(v, arg);
01478         break;
01479 
01480       case TYPE_REGEXP:
01481         {
01482             volatile VALUE str = r_bytes(arg);
01483             int options = r_byte(arg);
01484             int has_encoding = FALSE;
01485             st_index_t idx = r_prepare(arg);
01486 
01487             if (ivp) {
01488                 r_ivar(str, &has_encoding, arg);
01489                 *ivp = FALSE;
01490             }
01491             if (!has_encoding) {
01492                 /* 1.8 compatibility; remove escapes undefined in 1.8 */
01493                 char *ptr = RSTRING_PTR(str), *dst = ptr, *src = ptr;
01494                 long len = RSTRING_LEN(str);
01495                 long bs = 0;
01496                 for (; len-- > 0; *dst++ = *src++) {
01497                     switch (*src) {
01498                       case '\\': bs++; break;
01499                       case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
01500                       case 'm': case 'o': case 'p': case 'q': case 'u': case 'y':
01501                       case 'E': case 'F': case 'H': case 'I': case 'J': case 'K':
01502                       case 'L': case 'N': case 'O': case 'P': case 'Q': case 'R':
01503                       case 'S': case 'T': case 'U': case 'V': case 'X': case 'Y':
01504                         if (bs & 1) --dst;
01505                       default: bs = 0; break;
01506                     }
01507                 }
01508                 rb_str_set_len(str, dst - ptr);
01509             }
01510             v = r_entry0(rb_reg_new_str(str, options), idx, arg);
01511             v = r_leave(v, arg);
01512         }
01513         break;
01514 
01515       case TYPE_ARRAY:
01516         {
01517             volatile long len = r_long(arg); /* gcc 2.7.2.3 -O2 bug?? */
01518 
01519             v = rb_ary_new2(len);
01520             v = r_entry(v, arg);
01521             while (len--) {
01522                 rb_ary_push(v, r_object(arg));
01523             }
01524             v = r_leave(v, arg);
01525         }
01526         break;
01527 
01528       case TYPE_HASH:
01529       case TYPE_HASH_DEF:
01530         {
01531             long len = r_long(arg);
01532 
01533             v = rb_hash_new();
01534             v = r_entry(v, arg);
01535             while (len--) {
01536                 VALUE key = r_object(arg);
01537                 VALUE value = r_object(arg);
01538                 rb_hash_aset(v, key, value);
01539             }
01540             if (type == TYPE_HASH_DEF) {
01541                 RHASH_IFNONE(v) = r_object(arg);
01542             }
01543             v = r_leave(v, arg);
01544         }
01545         break;
01546 
01547       case TYPE_STRUCT:
01548         {
01549             VALUE mem, values;
01550             volatile long i;    /* gcc 2.7.2.3 -O2 bug?? */
01551             ID slot;
01552             st_index_t idx = r_prepare(arg);
01553             VALUE klass = path2class(r_unique(arg));
01554             long len = r_long(arg);
01555 
01556             v = rb_obj_alloc(klass);
01557             if (TYPE(v) != T_STRUCT) {
01558                 rb_raise(rb_eTypeError, "class %s not a struct", rb_class2name(klass));
01559             }
01560             mem = rb_struct_s_members(klass);
01561             if (RARRAY_LEN(mem) != len) {
01562                 rb_raise(rb_eTypeError, "struct %s not compatible (struct size differs)",
01563                          rb_class2name(klass));
01564             }
01565 
01566             v = r_entry0(v, idx, arg);
01567             values = rb_ary_new2(len);
01568             for (i=0; i<len; i++) {
01569                 slot = r_symbol(arg);
01570 
01571                 if (RARRAY_PTR(mem)[i] != ID2SYM(slot)) {
01572                     rb_raise(rb_eTypeError, "struct %s not compatible (:%s for :%s)",
01573                              rb_class2name(klass),
01574                              rb_id2name(slot),
01575                              rb_id2name(SYM2ID(RARRAY_PTR(mem)[i])));
01576                 }
01577                 rb_ary_push(values, r_object(arg));
01578             }
01579             rb_struct_initialize(v, values);
01580             v = r_leave(v, arg);
01581         }
01582         break;
01583 
01584       case TYPE_USERDEF:
01585         {
01586             VALUE klass = path2class(r_unique(arg));
01587             VALUE data;
01588 
01589             if (!rb_respond_to(klass, s_load)) {
01590                 rb_raise(rb_eTypeError, "class %s needs to have method `_load'",
01591                          rb_class2name(klass));
01592             }
01593             data = r_string(arg);
01594             if (ivp) {
01595                 r_ivar(data, NULL, arg);
01596                 *ivp = FALSE;
01597             }
01598             v = rb_funcall(klass, s_load, 1, data);
01599             check_load_arg(arg, s_load);
01600             v = r_entry(v, arg);
01601             v = r_leave(v, arg);
01602         }
01603         break;
01604 
01605       case TYPE_USRMARSHAL:
01606         {
01607             VALUE klass = path2class(r_unique(arg));
01608             VALUE data;
01609 
01610             v = rb_obj_alloc(klass);
01611             if (!NIL_P(extmod)) {
01612                 while (RARRAY_LEN(extmod) > 0) {
01613                     VALUE m = rb_ary_pop(extmod);
01614                     rb_extend_object(v, m);
01615                 }
01616             }
01617             if (!rb_respond_to(v, s_mload)) {
01618                 rb_raise(rb_eTypeError, "instance of %s needs to have method `marshal_load'",
01619                          rb_class2name(klass));
01620             }
01621             v = r_entry(v, arg);
01622             data = r_object(arg);
01623             rb_funcall(v, s_mload, 1, data);
01624             check_load_arg(arg, s_mload);
01625             v = r_leave(v, arg);
01626         }
01627         break;
01628 
01629       case TYPE_OBJECT:
01630         {
01631             st_index_t idx = r_prepare(arg);
01632             v = obj_alloc_by_path(r_unique(arg), arg);
01633             if (TYPE(v) != T_OBJECT) {
01634                 rb_raise(rb_eArgError, "dump format error");
01635             }
01636             v = r_entry0(v, idx, arg);
01637             r_ivar(v, NULL, arg);
01638             v = r_leave(v, arg);
01639         }
01640         break;
01641 
01642       case TYPE_DATA:
01643        {
01644            VALUE klass = path2class(r_unique(arg));
01645            if (rb_respond_to(klass, s_alloc)) {
01646                static int warn = TRUE;
01647                if (warn) {
01648                    rb_warn("define `allocate' instead of `_alloc'");
01649                    warn = FALSE;
01650                }
01651                v = rb_funcall(klass, s_alloc, 0);
01652                check_load_arg(arg, s_alloc);
01653            }
01654            else {
01655                v = rb_obj_alloc(klass);
01656            }
01657            if (TYPE(v) != T_DATA) {
01658                rb_raise(rb_eArgError, "dump format error");
01659            }
01660            v = r_entry(v, arg);
01661            if (!rb_respond_to(v, s_load_data)) {
01662                rb_raise(rb_eTypeError,
01663                         "class %s needs to have instance method `_load_data'",
01664                         rb_class2name(klass));
01665            }
01666            rb_funcall(v, s_load_data, 1, r_object0(arg, 0, extmod));
01667            check_load_arg(arg, s_load_data);
01668            v = r_leave(v, arg);
01669        }
01670        break;
01671 
01672       case TYPE_MODULE_OLD:
01673         {
01674             volatile VALUE str = r_bytes(arg);
01675 
01676             v = rb_path_to_class(str);
01677             v = r_entry(v, arg);
01678             v = r_leave(v, arg);
01679         }
01680         break;
01681 
01682       case TYPE_CLASS:
01683         {
01684             volatile VALUE str = r_bytes(arg);
01685 
01686             v = path2class(str);
01687             v = r_entry(v, arg);
01688             v = r_leave(v, arg);
01689         }
01690         break;
01691 
01692       case TYPE_MODULE:
01693         {
01694             volatile VALUE str = r_bytes(arg);
01695 
01696             v = path2module(str);
01697             v = r_entry(v, arg);
01698             v = r_leave(v, arg);
01699         }
01700         break;
01701 
01702       case TYPE_SYMBOL:
01703         if (ivp) {
01704             v = ID2SYM(r_symreal(arg, *ivp));
01705             *ivp = FALSE;
01706         }
01707         else {
01708             v = ID2SYM(r_symreal(arg, 0));
01709         }
01710         v = r_leave(v, arg);
01711         break;
01712 
01713       case TYPE_SYMLINK:
01714         v = ID2SYM(r_symlink(arg));
01715         break;
01716 
01717       default:
01718         rb_raise(rb_eArgError, "dump format error(0x%x)", type);
01719         break;
01720     }
01721     return v;
01722 }
01723 
01724 static VALUE
01725 r_object(struct load_arg *arg)
01726 {
01727     return r_object0(arg, 0, Qnil);
01728 }
01729 
01730 static void
01731 clear_load_arg(struct load_arg *arg)
01732 {
01733     if (!arg->symbols) return;
01734     st_free_table(arg->symbols);
01735     arg->symbols = 0;
01736     st_free_table(arg->data);
01737     arg->data = 0;
01738     st_free_table(arg->compat_tbl);
01739     arg->compat_tbl = 0;
01740 }
01741 
01742 /*
01743  * call-seq:
01744  *     load( source [, proc] ) -> obj
01745  *     restore( source [, proc] ) -> obj
01746  *
01747  * Returns the result of converting the serialized data in source into a
01748  * Ruby object (possibly with associated subordinate objects). source
01749  * may be either an instance of IO or an object that responds to
01750  * to_str. If proc is specified, it will be passed each object as it
01751  * is deserialized.
01752  */
01753 static VALUE
01754 marshal_load(int argc, VALUE *argv)
01755 {
01756     VALUE port, proc;
01757     int major, minor, infection = 0;
01758     VALUE v;
01759     volatile VALUE wrapper;
01760     struct load_arg *arg;
01761 
01762     rb_scan_args(argc, argv, "11", &port, &proc);
01763     v = rb_check_string_type(port);
01764     if (!NIL_P(v)) {
01765         infection = (int)FL_TEST(port, MARSHAL_INFECTION); /* original taintedness */
01766         port = v;
01767     }
01768     else if (rb_respond_to(port, s_getbyte) && rb_respond_to(port, s_read)) {
01769         if (rb_respond_to(port, s_binmode)) {
01770             rb_funcall2(port, s_binmode, 0, 0);
01771         }
01772         infection = (int)(FL_TAINT | FL_TEST(port, FL_UNTRUSTED));
01773     }
01774     else {
01775         rb_raise(rb_eTypeError, "instance of IO needed");
01776     }
01777     wrapper = TypedData_Make_Struct(rb_cData, struct load_arg, &load_arg_data, arg);
01778     arg->infection = infection;
01779     arg->src = port;
01780     arg->offset = 0;
01781     arg->symbols = st_init_numtable();
01782     arg->data    = st_init_numtable();
01783     arg->compat_tbl = st_init_numtable();
01784     arg->proc = 0;
01785 
01786     major = r_byte(arg);
01787     minor = r_byte(arg);
01788     if (major != MARSHAL_MAJOR || minor > MARSHAL_MINOR) {
01789         clear_load_arg(arg);
01790         rb_raise(rb_eTypeError, "incompatible marshal file format (can't be read)\n\
01791 \tformat version %d.%d required; %d.%d given",
01792                  MARSHAL_MAJOR, MARSHAL_MINOR, major, minor);
01793     }
01794     if (RTEST(ruby_verbose) && minor != MARSHAL_MINOR) {
01795         rb_warn("incompatible marshal file format (can be read)\n\
01796 \tformat version %d.%d required; %d.%d given",
01797                 MARSHAL_MAJOR, MARSHAL_MINOR, major, minor);
01798     }
01799 
01800     if (!NIL_P(proc)) arg->proc = proc;
01801     v = r_object(arg);
01802     clear_load_arg(arg);
01803     RB_GC_GUARD(wrapper);
01804 
01805     return v;
01806 }
01807 
01808 /*
01809  * The marshaling library converts collections of Ruby objects into a
01810  * byte stream, allowing them to be stored outside the currently
01811  * active script. This data may subsequently be read and the original
01812  * objects reconstituted.
01813  *
01814  * Marshaled data has major and minor version numbers stored along
01815  * with the object information. In normal use, marshaling can only
01816  * load data written with the same major version number and an equal
01817  * or lower minor version number. If Ruby's ``verbose'' flag is set
01818  * (normally using -d, -v, -w, or --verbose) the major and minor
01819  * numbers must match exactly. Marshal versioning is independent of
01820  * Ruby's version numbers. You can extract the version by reading the
01821  * first two bytes of marshaled data.
01822  *
01823  *     str = Marshal.dump("thing")
01824  *     RUBY_VERSION   #=> "1.9.0"
01825  *     str[0].ord     #=> 4
01826  *     str[1].ord     #=> 8
01827  *
01828  * Some objects cannot be dumped: if the objects to be dumped include
01829  * bindings, procedure or method objects, instances of class IO, or
01830  * singleton objects, a TypeError will be raised.
01831  *
01832  * If your class has special serialization needs (for example, if you
01833  * want to serialize in some specific format), or if it contains
01834  * objects that would otherwise not be serializable, you can implement
01835  * your own serialization strategy.
01836  *
01837  * There are two methods of doing this, your object can define either
01838  * marshal_dump and marshal_load or _dump and _load.  marshal_dump will take
01839  * precedence over _dump if both are defined.  marshal_dump may result in
01840  * smaller Marshal strings.
01841  *
01842  * == marshal_dump and marshal_load
01843  *
01844  * When dumping an object the method marshal_dump will be called.
01845  * marshal_dump must return a result containing the information necessary for
01846  * marshal_load to reconstitute the object.  The result can be any object.
01847  *
01848  * When loading an object dumped using marshal_dump the object is first
01849  * allocated then marshal_load is called with the result from marshal_dump.
01850  * marshal_load must recreate the object from the information in the result.
01851  *
01852  * Example:
01853  *
01854  *   class MyObj
01855  *     def initialize name, version, data
01856  *       @name    = name
01857  *       @version = version
01858  *       @data    = data
01859  *     end
01860  *
01861  *     def marshal_dump
01862  *       [@name, @version]
01863  *     end
01864  *
01865  *     def marshal_load array
01866  *       @name, @version = array
01867  *     end
01868  *   end
01869  *
01870  * == _dump and _load
01871  *
01872  * Use _dump and _load when you need to allocate the object you're restoring
01873  * yourself.
01874  *
01875  * When dumping an object the instance method _dump is called with an Integer
01876  * which indicates the maximum depth of objects to dump (a value of -1 implies
01877  * that you should disable depth checking).  _dump must return a String
01878  * containing the information necessary to reconstitute the object.
01879  *
01880  * The class method _load should take a String and use it to return an object
01881  * of the same class.
01882  *
01883  * Example:
01884  *
01885  *   class MyObj
01886  *     def initialize name, version, data
01887  *       @name    = name
01888  *       @version = version
01889  *       @data    = data
01890  *     end
01891  *
01892  *     def _dump level
01893  *       [@name, @version].join ':'
01894  *     end
01895  *
01896  *     def self._load args
01897  *       new(*args.split(':'))
01898  *     end
01899  *   end
01900  *
01901  * Since Marhsal.dump outputs a string you can have _dump return a Marshal
01902  * string which is Marshal.loaded in _load for complex objects.
01903  */
01904 void
01905 Init_marshal(void)
01906 {
01907 #undef rb_intern
01908 #define rb_intern(str) rb_intern_const(str)
01909 
01910     VALUE rb_mMarshal = rb_define_module("Marshal");
01911 
01912     s_dump = rb_intern("_dump");
01913     s_load = rb_intern("_load");
01914     s_mdump = rb_intern("marshal_dump");
01915     s_mload = rb_intern("marshal_load");
01916     s_dump_data = rb_intern("_dump_data");
01917     s_load_data = rb_intern("_load_data");
01918     s_alloc = rb_intern("_alloc");
01919     s_call = rb_intern("call");
01920     s_getbyte = rb_intern("getbyte");
01921     s_read = rb_intern("read");
01922     s_write = rb_intern("write");
01923     s_binmode = rb_intern("binmode");
01924 
01925     rb_define_module_function(rb_mMarshal, "dump", marshal_dump, -1);
01926     rb_define_module_function(rb_mMarshal, "load", marshal_load, -1);
01927     rb_define_module_function(rb_mMarshal, "restore", marshal_load, -1);
01928 
01929     rb_define_const(rb_mMarshal, "MAJOR_VERSION", INT2FIX(MARSHAL_MAJOR));
01930     rb_define_const(rb_mMarshal, "MINOR_VERSION", INT2FIX(MARSHAL_MINOR));
01931 
01932     compat_allocator_tbl = st_init_numtable();
01933     compat_allocator_tbl_wrapper =
01934         Data_Wrap_Struct(rb_cData, mark_marshal_compat_t, 0, compat_allocator_tbl);
01935     rb_gc_register_mark_object(compat_allocator_tbl_wrapper);
01936 }
01937 
01938 VALUE
01939 rb_marshal_dump(VALUE obj, VALUE port)
01940 {
01941     int argc = 1;
01942     VALUE argv[2];
01943 
01944     argv[0] = obj;
01945     argv[1] = port;
01946     if (!NIL_P(port)) argc = 2;
01947     return marshal_dump(argc, argv);
01948 }
01949 
01950 VALUE
01951 rb_marshal_load(VALUE port)
01952 {
01953     return marshal_load(1, &port);
01954 }
01955 

Generated on Wed Aug 10 09:17:08 2011 for Ruby by  doxygen 1.4.7