00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #include "ruby/ruby.h"
00013 #include "ruby/io.h"
00014 #include "ruby/st.h"
00015 #include "ruby/util.h"
00016 #include "ruby/encoding.h"
00017
00018 #include <math.h>
00019 #ifdef HAVE_FLOAT_H
00020 #include <float.h>
00021 #endif
00022 #ifdef HAVE_IEEEFP_H
00023 #include <ieeefp.h>
00024 #endif
00025
00026 #define BITSPERSHORT (2*CHAR_BIT)
00027 #define SHORTMASK ((1<<BITSPERSHORT)-1)
00028 #define SHORTDN(x) RSHIFT(x,BITSPERSHORT)
00029
00030 #if SIZEOF_SHORT == SIZEOF_BDIGITS
00031 #define SHORTLEN(x) (x)
00032 #else
00033 static long
00034 shortlen(long len, BDIGIT *ds)
00035 {
00036 BDIGIT num;
00037 int offset = 0;
00038
00039 num = ds[len-1];
00040 while (num) {
00041 num = SHORTDN(num);
00042 offset++;
00043 }
00044 return (len - 1)*sizeof(BDIGIT)/2 + offset;
00045 }
00046 #define SHORTLEN(x) shortlen((x),d)
00047 #endif
00048
00049 #define MARSHAL_MAJOR 4
00050 #define MARSHAL_MINOR 8
00051
00052 #define TYPE_NIL '0'
00053 #define TYPE_TRUE 'T'
00054 #define TYPE_FALSE 'F'
00055 #define TYPE_FIXNUM 'i'
00056
00057 #define TYPE_EXTENDED 'e'
00058 #define TYPE_UCLASS 'C'
00059 #define TYPE_OBJECT 'o'
00060 #define TYPE_DATA 'd'
00061 #define TYPE_USERDEF 'u'
00062 #define TYPE_USRMARSHAL 'U'
00063 #define TYPE_FLOAT 'f'
00064 #define TYPE_BIGNUM 'l'
00065 #define TYPE_STRING '"'
00066 #define TYPE_REGEXP '/'
00067 #define TYPE_ARRAY '['
00068 #define TYPE_HASH '{'
00069 #define TYPE_HASH_DEF '}'
00070 #define TYPE_STRUCT 'S'
00071 #define TYPE_MODULE_OLD 'M'
00072 #define TYPE_CLASS 'c'
00073 #define TYPE_MODULE 'm'
00074
00075 #define TYPE_SYMBOL ':'
00076 #define TYPE_SYMLINK ';'
00077
00078 #define TYPE_IVAR 'I'
00079 #define TYPE_LINK '@'
00080
00081 static ID s_dump, s_load, s_mdump, s_mload;
00082 static ID s_dump_data, s_load_data, s_alloc, s_call;
00083 static ID s_getbyte, s_read, s_write, s_binmode;
00084
00085 ID rb_id_encoding(void);
00086
00087 typedef struct {
00088 VALUE newclass;
00089 VALUE oldclass;
00090 VALUE (*dumper)(VALUE);
00091 VALUE (*loader)(VALUE, VALUE);
00092 } marshal_compat_t;
00093
00094 static st_table *compat_allocator_tbl;
00095 static VALUE compat_allocator_tbl_wrapper;
00096
00097 static int
00098 mark_marshal_compat_i(st_data_t key, st_data_t value)
00099 {
00100 marshal_compat_t *p = (marshal_compat_t *)value;
00101 rb_gc_mark(p->newclass);
00102 rb_gc_mark(p->oldclass);
00103 return ST_CONTINUE;
00104 }
00105
00106 static void
00107 mark_marshal_compat_t(void *tbl)
00108 {
00109 if (!tbl) return;
00110 st_foreach(tbl, mark_marshal_compat_i, 0);
00111 }
00112
00113 void
00114 rb_marshal_define_compat(VALUE newclass, VALUE oldclass, VALUE (*dumper)(VALUE), VALUE (*loader)(VALUE, VALUE))
00115 {
00116 marshal_compat_t *compat;
00117 rb_alloc_func_t allocator = rb_get_alloc_func(newclass);
00118
00119 if (!allocator) {
00120 rb_raise(rb_eTypeError, "no allocator");
00121 }
00122
00123 compat = ALLOC(marshal_compat_t);
00124 compat->newclass = Qnil;
00125 compat->oldclass = Qnil;
00126 compat->newclass = newclass;
00127 compat->oldclass = oldclass;
00128 compat->dumper = dumper;
00129 compat->loader = loader;
00130
00131 st_insert(compat_allocator_tbl, (st_data_t)allocator, (st_data_t)compat);
00132 }
00133
00134 #define MARSHAL_INFECTION (FL_TAINT|FL_UNTRUSTED)
00135 typedef char ruby_check_marshal_viral_flags[MARSHAL_INFECTION == (int)MARSHAL_INFECTION ? 1 : -1];
00136
00137 struct dump_arg {
00138 VALUE str, dest;
00139 st_table *symbols;
00140 st_table *data;
00141 st_table *compat_tbl;
00142 st_table *encodings;
00143 int infection;
00144 };
00145
00146 struct dump_call_arg {
00147 VALUE obj;
00148 struct dump_arg *arg;
00149 int limit;
00150 };
00151
00152 static void
00153 check_dump_arg(struct dump_arg *arg, ID sym)
00154 {
00155 if (!arg->symbols) {
00156 rb_raise(rb_eRuntimeError, "Marshal.dump reentered at %s",
00157 rb_id2name(sym));
00158 }
00159 }
00160
00161 static void clear_dump_arg(struct dump_arg *arg);
00162
00163 static void
00164 mark_dump_arg(void *ptr)
00165 {
00166 struct dump_arg *p = ptr;
00167 if (!p->symbols)
00168 return;
00169 rb_mark_set(p->data);
00170 rb_mark_hash(p->compat_tbl);
00171 rb_gc_mark(p->str);
00172 }
00173
00174 static void
00175 free_dump_arg(void *ptr)
00176 {
00177 clear_dump_arg(ptr);
00178 xfree(ptr);
00179 }
00180
00181 static size_t
00182 memsize_dump_arg(const void *ptr)
00183 {
00184 return ptr ? sizeof(struct dump_arg) : 0;
00185 }
00186
00187 static const rb_data_type_t dump_arg_data = {
00188 "dump_arg",
00189 mark_dump_arg, free_dump_arg, memsize_dump_arg
00190 };
00191
00192 static const char *
00193 must_not_be_anonymous(const char *type, VALUE path)
00194 {
00195 char *n = RSTRING_PTR(path);
00196
00197 if (!rb_enc_asciicompat(rb_enc_get(path))) {
00198
00199 rb_raise(rb_eTypeError, "can't dump non-ascii %s name", type);
00200 }
00201 if (n[0] == '#') {
00202 rb_raise(rb_eTypeError, "can't dump anonymous %s %.*s", type,
00203 (int)RSTRING_LEN(path), n);
00204 }
00205 return n;
00206 }
00207
00208 static VALUE
00209 class2path(VALUE klass)
00210 {
00211 VALUE path = rb_class_path(klass);
00212 const char *n;
00213
00214 n = must_not_be_anonymous((TYPE(klass) == T_CLASS ? "class" : "module"), path);
00215 if (rb_path_to_class(path) != rb_class_real(klass)) {
00216 rb_raise(rb_eTypeError, "%s can't be referred to", n);
00217 }
00218 return path;
00219 }
00220
00221 static void w_long(long, struct dump_arg*);
00222 static void w_encoding(VALUE obj, long num, struct dump_call_arg *arg);
00223
00224 static void
00225 w_nbyte(const char *s, long n, struct dump_arg *arg)
00226 {
00227 VALUE buf = arg->str;
00228 rb_str_buf_cat(buf, s, n);
00229 RBASIC(buf)->flags |= arg->infection;
00230 if (arg->dest && RSTRING_LEN(buf) >= BUFSIZ) {
00231 rb_io_write(arg->dest, buf);
00232 rb_str_resize(buf, 0);
00233 }
00234 }
00235
00236 static void
00237 w_byte(char c, struct dump_arg *arg)
00238 {
00239 w_nbyte(&c, 1, arg);
00240 }
00241
00242 static void
00243 w_bytes(const char *s, long n, struct dump_arg *arg)
00244 {
00245 w_long(n, arg);
00246 w_nbyte(s, n, arg);
00247 }
00248
00249 static void
00250 w_short(int x, struct dump_arg *arg)
00251 {
00252 w_byte((char)((x >> 0) & 0xff), arg);
00253 w_byte((char)((x >> 8) & 0xff), arg);
00254 }
00255
00256 static void
00257 w_long(long x, struct dump_arg *arg)
00258 {
00259 char buf[sizeof(long)+1];
00260 int i, len = 0;
00261
00262 #if SIZEOF_LONG > 4
00263 if (!(RSHIFT(x, 31) == 0 || RSHIFT(x, 31) == -1)) {
00264
00265 rb_raise(rb_eTypeError, "long too big to dump");
00266 }
00267 #endif
00268
00269 if (x == 0) {
00270 w_byte(0, arg);
00271 return;
00272 }
00273 if (0 < x && x < 123) {
00274 w_byte((char)(x + 5), arg);
00275 return;
00276 }
00277 if (-124 < x && x < 0) {
00278 w_byte((char)((x - 5)&0xff), arg);
00279 return;
00280 }
00281 for (i=1;i<(int)sizeof(long)+1;i++) {
00282 buf[i] = (char)(x & 0xff);
00283 x = RSHIFT(x,8);
00284 if (x == 0) {
00285 buf[0] = i;
00286 break;
00287 }
00288 if (x == -1) {
00289 buf[0] = -i;
00290 break;
00291 }
00292 }
00293 len = i;
00294 for (i=0;i<=len;i++) {
00295 w_byte(buf[i], arg);
00296 }
00297 }
00298
00299 #ifdef DBL_MANT_DIG
00300 #define DECIMAL_MANT (53-16)
00301
00302 #if DBL_MANT_DIG > 32
00303 #define MANT_BITS 32
00304 #elif DBL_MANT_DIG > 24
00305 #define MANT_BITS 24
00306 #elif DBL_MANT_DIG > 16
00307 #define MANT_BITS 16
00308 #else
00309 #define MANT_BITS 8
00310 #endif
00311
00312 static int
00313 save_mantissa(double d, char *buf)
00314 {
00315 int e, i = 0;
00316 unsigned long m;
00317 double n;
00318
00319 d = modf(ldexp(frexp(fabs(d), &e), DECIMAL_MANT), &d);
00320 if (d > 0) {
00321 buf[i++] = 0;
00322 do {
00323 d = modf(ldexp(d, MANT_BITS), &n);
00324 m = (unsigned long)n;
00325 #if MANT_BITS > 24
00326 buf[i++] = (char)(m >> 24);
00327 #endif
00328 #if MANT_BITS > 16
00329 buf[i++] = (char)(m >> 16);
00330 #endif
00331 #if MANT_BITS > 8
00332 buf[i++] = (char)(m >> 8);
00333 #endif
00334 buf[i++] = (char)m;
00335 } while (d > 0);
00336 while (!buf[i - 1]) --i;
00337 }
00338 return i;
00339 }
00340
00341 static double
00342 load_mantissa(double d, const char *buf, long len)
00343 {
00344 if (!len) return d;
00345 if (--len > 0 && !*buf++) {
00346 int e, s = d < 0, dig = 0;
00347 unsigned long m;
00348
00349 modf(ldexp(frexp(fabs(d), &e), DECIMAL_MANT), &d);
00350 do {
00351 m = 0;
00352 switch (len) {
00353 default: m = *buf++ & 0xff;
00354 #if MANT_BITS > 24
00355 case 3: m = (m << 8) | (*buf++ & 0xff);
00356 #endif
00357 #if MANT_BITS > 16
00358 case 2: m = (m << 8) | (*buf++ & 0xff);
00359 #endif
00360 #if MANT_BITS > 8
00361 case 1: m = (m << 8) | (*buf++ & 0xff);
00362 #endif
00363 }
00364 dig -= len < MANT_BITS / 8 ? 8 * (unsigned)len : MANT_BITS;
00365 d += ldexp((double)m, dig);
00366 } while ((len -= MANT_BITS / 8) > 0);
00367 d = ldexp(d, e - DECIMAL_MANT);
00368 if (s) d = -d;
00369 }
00370 return d;
00371 }
00372 #else
00373 #define load_mantissa(d, buf, len) (d)
00374 #define save_mantissa(d, buf) 0
00375 #endif
00376
00377 #ifdef DBL_DIG
00378 #define FLOAT_DIG (DBL_DIG+2)
00379 #else
00380 #define FLOAT_DIG 17
00381 #endif
00382
00383 static void
00384 w_float(double d, struct dump_arg *arg)
00385 {
00386 char buf[FLOAT_DIG + (DECIMAL_MANT + 7) / 8 + 10];
00387
00388 if (isinf(d)) {
00389 if (d < 0) strcpy(buf, "-inf");
00390 else strcpy(buf, "inf");
00391 }
00392 else if (isnan(d)) {
00393 strcpy(buf, "nan");
00394 }
00395 else if (d == 0.0) {
00396 if (1.0/d < 0) strcpy(buf, "-0");
00397 else strcpy(buf, "0");
00398 }
00399 else {
00400 size_t len;
00401
00402
00403 snprintf(buf, sizeof(buf), "%.*g", FLOAT_DIG, d);
00404 len = strlen(buf);
00405 w_bytes(buf, len + save_mantissa(d, buf + len), arg);
00406 return;
00407 }
00408 w_bytes(buf, strlen(buf), arg);
00409 }
00410
00411 static void
00412 w_symbol(ID id, struct dump_arg *arg)
00413 {
00414 VALUE sym;
00415 st_data_t num;
00416 int encidx = -1;
00417
00418 if (st_lookup(arg->symbols, id, &num)) {
00419 w_byte(TYPE_SYMLINK, arg);
00420 w_long((long)num, arg);
00421 }
00422 else {
00423 sym = rb_id2str(id);
00424 if (!sym) {
00425 rb_raise(rb_eTypeError, "can't dump anonymous ID %ld", id);
00426 }
00427 encidx = rb_enc_get_index(sym);
00428 if (encidx == rb_usascii_encindex() ||
00429 rb_enc_str_coderange(sym) == ENC_CODERANGE_7BIT) {
00430 encidx = -1;
00431 }
00432 else {
00433 w_byte(TYPE_IVAR, arg);
00434 }
00435 w_byte(TYPE_SYMBOL, arg);
00436 w_bytes(RSTRING_PTR(sym), RSTRING_LEN(sym), arg);
00437 st_add_direct(arg->symbols, id, arg->symbols->num_entries);
00438 if (encidx != -1) {
00439 struct dump_call_arg c_arg;
00440 c_arg.limit = 1;
00441 c_arg.arg = arg;
00442 w_encoding(sym, 0, &c_arg);
00443 }
00444 }
00445 }
00446
00447 static void
00448 w_unique(VALUE s, struct dump_arg *arg)
00449 {
00450 must_not_be_anonymous("class", s);
00451 w_symbol(rb_intern_str(s), arg);
00452 }
00453
00454 static void w_object(VALUE,struct dump_arg*,int);
00455
00456 static int
00457 hash_each(VALUE key, VALUE value, struct dump_call_arg *arg)
00458 {
00459 w_object(key, arg->arg, arg->limit);
00460 w_object(value, arg->arg, arg->limit);
00461 return ST_CONTINUE;
00462 }
00463
00464 static void
00465 w_extended(VALUE klass, struct dump_arg *arg, int check)
00466 {
00467 if (check && FL_TEST(klass, FL_SINGLETON)) {
00468 if (RCLASS_M_TBL(klass)->num_entries ||
00469 (RCLASS_IV_TBL(klass) && RCLASS_IV_TBL(klass)->num_entries > 1)) {
00470 rb_raise(rb_eTypeError, "singleton can't be dumped");
00471 }
00472 klass = RCLASS_SUPER(klass);
00473 }
00474 while (BUILTIN_TYPE(klass) == T_ICLASS) {
00475 VALUE path = rb_class_name(RBASIC(klass)->klass);
00476 w_byte(TYPE_EXTENDED, arg);
00477 w_unique(path, arg);
00478 klass = RCLASS_SUPER(klass);
00479 }
00480 }
00481
00482 static void
00483 w_class(char type, VALUE obj, struct dump_arg *arg, int check)
00484 {
00485 VALUE path;
00486 st_data_t real_obj;
00487 VALUE klass;
00488
00489 if (st_lookup(arg->compat_tbl, (st_data_t)obj, &real_obj)) {
00490 obj = (VALUE)real_obj;
00491 }
00492 klass = CLASS_OF(obj);
00493 w_extended(klass, arg, check);
00494 w_byte(type, arg);
00495 path = class2path(rb_class_real(klass));
00496 w_unique(path, arg);
00497 }
00498
00499 static void
00500 w_uclass(VALUE obj, VALUE super, struct dump_arg *arg)
00501 {
00502 VALUE klass = CLASS_OF(obj);
00503
00504 w_extended(klass, arg, TRUE);
00505 klass = rb_class_real(klass);
00506 if (klass != super) {
00507 w_byte(TYPE_UCLASS, arg);
00508 w_unique(class2path(klass), arg);
00509 }
00510 }
00511
00512 static int
00513 w_obj_each(ID id, VALUE value, struct dump_call_arg *arg)
00514 {
00515 if (id == rb_id_encoding()) return ST_CONTINUE;
00516 if (id == rb_intern("E")) return ST_CONTINUE;
00517 w_symbol(id, arg->arg);
00518 w_object(value, arg->arg, arg->limit);
00519 return ST_CONTINUE;
00520 }
00521
00522 static void
00523 w_encoding(VALUE obj, long num, struct dump_call_arg *arg)
00524 {
00525 int encidx = rb_enc_get_index(obj);
00526 rb_encoding *enc = 0;
00527 st_data_t name;
00528
00529 if (encidx <= 0 || !(enc = rb_enc_from_index(encidx))) {
00530 w_long(num, arg->arg);
00531 return;
00532 }
00533 w_long(num + 1, arg->arg);
00534
00535
00536 if (encidx == rb_usascii_encindex()) {
00537 w_symbol(rb_intern("E"), arg->arg);
00538 w_object(Qfalse, arg->arg, arg->limit + 1);
00539 return;
00540 }
00541 else if (encidx == rb_utf8_encindex()) {
00542 w_symbol(rb_intern("E"), arg->arg);
00543 w_object(Qtrue, arg->arg, arg->limit + 1);
00544 return;
00545 }
00546
00547 w_symbol(rb_id_encoding(), arg->arg);
00548 do {
00549 if (!arg->arg->encodings)
00550 arg->arg->encodings = st_init_strcasetable();
00551 else if (st_lookup(arg->arg->encodings, (st_data_t)rb_enc_name(enc), &name))
00552 break;
00553 name = (st_data_t)rb_str_new2(rb_enc_name(enc));
00554 st_insert(arg->arg->encodings, (st_data_t)rb_enc_name(enc), name);
00555 } while (0);
00556 w_object(name, arg->arg, arg->limit + 1);
00557 }
00558
00559 static void
00560 w_ivar(VALUE obj, st_table *tbl, struct dump_call_arg *arg)
00561 {
00562 long num = tbl ? tbl->num_entries : 0;
00563
00564 w_encoding(obj, num, arg);
00565 if (tbl) {
00566 st_foreach_safe(tbl, w_obj_each, (st_data_t)arg);
00567 }
00568 }
00569
00570 static void
00571 w_objivar(VALUE obj, struct dump_call_arg *arg)
00572 {
00573 VALUE *ptr;
00574 long i, len, num;
00575
00576 len = ROBJECT_NUMIV(obj);
00577 ptr = ROBJECT_IVPTR(obj);
00578 num = 0;
00579 for (i = 0; i < len; i++)
00580 if (ptr[i] != Qundef)
00581 num += 1;
00582
00583 w_encoding(obj, num, arg);
00584 if (num != 0) {
00585 rb_ivar_foreach(obj, w_obj_each, (st_data_t)arg);
00586 }
00587 }
00588
00589 static void
00590 w_object(VALUE obj, struct dump_arg *arg, int limit)
00591 {
00592 struct dump_call_arg c_arg;
00593 st_table *ivtbl = 0;
00594 st_data_t num;
00595 int hasiv = 0;
00596 #define has_ivars(obj, ivtbl) ((ivtbl = rb_generic_ivar_table(obj)) != 0 || \
00597 (!SPECIAL_CONST_P(obj) && !ENCODING_IS_ASCII8BIT(obj)))
00598
00599 if (limit == 0) {
00600 rb_raise(rb_eArgError, "exceed depth limit");
00601 }
00602
00603 limit--;
00604 c_arg.limit = limit;
00605 c_arg.arg = arg;
00606
00607 if (st_lookup(arg->data, obj, &num)) {
00608 w_byte(TYPE_LINK, arg);
00609 w_long((long)num, arg);
00610 return;
00611 }
00612
00613 if ((hasiv = has_ivars(obj, ivtbl)) != 0) {
00614 w_byte(TYPE_IVAR, arg);
00615 }
00616 if (obj == Qnil) {
00617 w_byte(TYPE_NIL, arg);
00618 }
00619 else if (obj == Qtrue) {
00620 w_byte(TYPE_TRUE, arg);
00621 }
00622 else if (obj == Qfalse) {
00623 w_byte(TYPE_FALSE, arg);
00624 }
00625 else if (FIXNUM_P(obj)) {
00626 #if SIZEOF_LONG <= 4
00627 w_byte(TYPE_FIXNUM, arg);
00628 w_long(FIX2INT(obj), arg);
00629 #else
00630 if (RSHIFT((long)obj, 31) == 0 || RSHIFT((long)obj, 31) == -1) {
00631 w_byte(TYPE_FIXNUM, arg);
00632 w_long(FIX2LONG(obj), arg);
00633 }
00634 else {
00635 w_object(rb_int2big(FIX2LONG(obj)), arg, limit);
00636 }
00637 #endif
00638 }
00639 else if (SYMBOL_P(obj)) {
00640 w_symbol(SYM2ID(obj), arg);
00641 }
00642 else {
00643 arg->infection |= (int)FL_TEST(obj, MARSHAL_INFECTION);
00644
00645 if (rb_respond_to(obj, s_mdump)) {
00646 volatile VALUE v;
00647
00648 st_add_direct(arg->data, obj, arg->data->num_entries);
00649
00650 v = rb_funcall(obj, s_mdump, 0, 0);
00651 check_dump_arg(arg, s_mdump);
00652 w_class(TYPE_USRMARSHAL, obj, arg, FALSE);
00653 w_object(v, arg, limit);
00654 if (hasiv) w_ivar(obj, ivtbl, &c_arg);
00655 return;
00656 }
00657 if (rb_respond_to(obj, s_dump)) {
00658 VALUE v;
00659 st_table *ivtbl2 = 0;
00660 int hasiv2;
00661
00662 v = rb_funcall(obj, s_dump, 1, INT2NUM(limit));
00663 check_dump_arg(arg, s_dump);
00664 if (TYPE(v) != T_STRING) {
00665 rb_raise(rb_eTypeError, "_dump() must return string");
00666 }
00667 if ((hasiv2 = has_ivars(v, ivtbl2)) != 0 && !hasiv) {
00668 w_byte(TYPE_IVAR, arg);
00669 }
00670 w_class(TYPE_USERDEF, obj, arg, FALSE);
00671 w_bytes(RSTRING_PTR(v), RSTRING_LEN(v), arg);
00672 if (hasiv2) {
00673 w_ivar(v, ivtbl2, &c_arg);
00674 }
00675 else if (hasiv) {
00676 w_ivar(obj, ivtbl, &c_arg);
00677 }
00678 st_add_direct(arg->data, obj, arg->data->num_entries);
00679 return;
00680 }
00681
00682 st_add_direct(arg->data, obj, arg->data->num_entries);
00683
00684 {
00685 st_data_t compat_data;
00686 rb_alloc_func_t allocator = rb_get_alloc_func(RBASIC(obj)->klass);
00687 if (st_lookup(compat_allocator_tbl,
00688 (st_data_t)allocator,
00689 &compat_data)) {
00690 marshal_compat_t *compat = (marshal_compat_t*)compat_data;
00691 VALUE real_obj = obj;
00692 obj = compat->dumper(real_obj);
00693 st_insert(arg->compat_tbl, (st_data_t)obj, (st_data_t)real_obj);
00694 }
00695 }
00696
00697 switch (BUILTIN_TYPE(obj)) {
00698 case T_CLASS:
00699 if (FL_TEST(obj, FL_SINGLETON)) {
00700 rb_raise(rb_eTypeError, "singleton class can't be dumped");
00701 }
00702 w_byte(TYPE_CLASS, arg);
00703 {
00704 volatile VALUE path = class2path(obj);
00705 w_bytes(RSTRING_PTR(path), RSTRING_LEN(path), arg);
00706 }
00707 break;
00708
00709 case T_MODULE:
00710 w_byte(TYPE_MODULE, arg);
00711 {
00712 VALUE path = class2path(obj);
00713 w_bytes(RSTRING_PTR(path), RSTRING_LEN(path), arg);
00714 }
00715 break;
00716
00717 case T_FLOAT:
00718 w_byte(TYPE_FLOAT, arg);
00719 w_float(RFLOAT_VALUE(obj), arg);
00720 break;
00721
00722 case T_BIGNUM:
00723 w_byte(TYPE_BIGNUM, arg);
00724 {
00725 char sign = RBIGNUM_SIGN(obj) ? '+' : '-';
00726 long len = RBIGNUM_LEN(obj);
00727 BDIGIT *d = RBIGNUM_DIGITS(obj);
00728
00729 w_byte(sign, arg);
00730 w_long(SHORTLEN(len), arg);
00731 while (len--) {
00732 #if SIZEOF_BDIGITS > SIZEOF_SHORT
00733 BDIGIT num = *d;
00734 int i;
00735
00736 for (i=0; i<SIZEOF_BDIGITS; i+=SIZEOF_SHORT) {
00737 w_short(num & SHORTMASK, arg);
00738 num = SHORTDN(num);
00739 if (len == 0 && num == 0) break;
00740 }
00741 #else
00742 w_short(*d, arg);
00743 #endif
00744 d++;
00745 }
00746 }
00747 break;
00748
00749 case T_STRING:
00750 w_uclass(obj, rb_cString, arg);
00751 w_byte(TYPE_STRING, arg);
00752 w_bytes(RSTRING_PTR(obj), RSTRING_LEN(obj), arg);
00753 break;
00754
00755 case T_REGEXP:
00756 w_uclass(obj, rb_cRegexp, arg);
00757 w_byte(TYPE_REGEXP, arg);
00758 {
00759 int opts = rb_reg_options(obj);
00760 w_bytes(RREGEXP_SRC_PTR(obj), RREGEXP_SRC_LEN(obj), arg);
00761 w_byte((char)opts, arg);
00762 }
00763 break;
00764
00765 case T_ARRAY:
00766 w_uclass(obj, rb_cArray, arg);
00767 w_byte(TYPE_ARRAY, arg);
00768 {
00769 long i, len = RARRAY_LEN(obj);
00770
00771 w_long(len, arg);
00772 for (i=0; i<RARRAY_LEN(obj); i++) {
00773 w_object(RARRAY_PTR(obj)[i], arg, limit);
00774 if (len != RARRAY_LEN(obj)) {
00775 rb_raise(rb_eRuntimeError, "array modified during dump");
00776 }
00777 }
00778 }
00779 break;
00780
00781 case T_HASH:
00782 w_uclass(obj, rb_cHash, arg);
00783 if (NIL_P(RHASH_IFNONE(obj))) {
00784 w_byte(TYPE_HASH, arg);
00785 }
00786 else if (FL_TEST(obj, FL_USER2)) {
00787
00788 rb_raise(rb_eTypeError, "can't dump hash with default proc");
00789 }
00790 else {
00791 w_byte(TYPE_HASH_DEF, arg);
00792 }
00793 w_long(RHASH_SIZE(obj), arg);
00794 rb_hash_foreach(obj, hash_each, (st_data_t)&c_arg);
00795 if (!NIL_P(RHASH_IFNONE(obj))) {
00796 w_object(RHASH_IFNONE(obj), arg, limit);
00797 }
00798 break;
00799
00800 case T_STRUCT:
00801 w_class(TYPE_STRUCT, obj, arg, TRUE);
00802 {
00803 long len = RSTRUCT_LEN(obj);
00804 VALUE mem;
00805 long i;
00806
00807 w_long(len, arg);
00808 mem = rb_struct_members(obj);
00809 for (i=0; i<len; i++) {
00810 w_symbol(SYM2ID(RARRAY_PTR(mem)[i]), arg);
00811 w_object(RSTRUCT_PTR(obj)[i], arg, limit);
00812 }
00813 }
00814 break;
00815
00816 case T_OBJECT:
00817 w_class(TYPE_OBJECT, obj, arg, TRUE);
00818 w_objivar(obj, &c_arg);
00819 break;
00820
00821 case T_DATA:
00822 {
00823 VALUE v;
00824
00825 if (!rb_respond_to(obj, s_dump_data)) {
00826 rb_raise(rb_eTypeError,
00827 "no marshal_dump is defined for class %s",
00828 rb_obj_classname(obj));
00829 }
00830 v = rb_funcall(obj, s_dump_data, 0);
00831 check_dump_arg(arg, s_dump_data);
00832 w_class(TYPE_DATA, obj, arg, TRUE);
00833 w_object(v, arg, limit);
00834 }
00835 break;
00836
00837 default:
00838 rb_raise(rb_eTypeError, "can't dump %s",
00839 rb_obj_classname(obj));
00840 break;
00841 }
00842 }
00843 if (hasiv) {
00844 w_ivar(obj, ivtbl, &c_arg);
00845 }
00846 }
00847
00848 static void
00849 clear_dump_arg(struct dump_arg *arg)
00850 {
00851 if (!arg->symbols) return;
00852 st_free_table(arg->symbols);
00853 arg->symbols = 0;
00854 st_free_table(arg->data);
00855 arg->data = 0;
00856 st_free_table(arg->compat_tbl);
00857 arg->compat_tbl = 0;
00858 if (arg->encodings) {
00859 st_free_table(arg->encodings);
00860 arg->encodings = 0;
00861 }
00862 }
00863
00864
00865
00866
00867
00868
00869
00870
00871
00872
00873
00874
00875
00876
00877
00878
00879
00880
00881
00882
00883
00884
00885
00886
00887
00888
00889
00890
00891
00892
00893
00894
00895
00896
00897
00898 static VALUE
00899 marshal_dump(int argc, VALUE *argv)
00900 {
00901 VALUE obj, port, a1, a2;
00902 int limit = -1;
00903 struct dump_arg *arg;
00904 volatile VALUE wrapper;
00905
00906 port = Qnil;
00907 rb_scan_args(argc, argv, "12", &obj, &a1, &a2);
00908 if (argc == 3) {
00909 if (!NIL_P(a2)) limit = NUM2INT(a2);
00910 if (NIL_P(a1)) goto type_error;
00911 port = a1;
00912 }
00913 else if (argc == 2) {
00914 if (FIXNUM_P(a1)) limit = FIX2INT(a1);
00915 else if (NIL_P(a1)) goto type_error;
00916 else port = a1;
00917 }
00918 wrapper = TypedData_Make_Struct(rb_cData, struct dump_arg, &dump_arg_data, arg);
00919 arg->dest = 0;
00920 arg->symbols = st_init_numtable();
00921 arg->data = st_init_numtable();
00922 arg->infection = 0;
00923 arg->compat_tbl = st_init_numtable();
00924 arg->encodings = 0;
00925 arg->str = rb_str_buf_new(0);
00926 if (!NIL_P(port)) {
00927 if (!rb_respond_to(port, s_write)) {
00928 type_error:
00929 rb_raise(rb_eTypeError, "instance of IO needed");
00930 }
00931 arg->dest = port;
00932 if (rb_respond_to(port, s_binmode)) {
00933 rb_funcall2(port, s_binmode, 0, 0);
00934 check_dump_arg(arg, s_binmode);
00935 }
00936 }
00937 else {
00938 port = arg->str;
00939 }
00940
00941 w_byte(MARSHAL_MAJOR, arg);
00942 w_byte(MARSHAL_MINOR, arg);
00943
00944 w_object(obj, arg, limit);
00945 if (arg->dest) {
00946 rb_io_write(arg->dest, arg->str);
00947 rb_str_resize(arg->str, 0);
00948 }
00949 clear_dump_arg(arg);
00950 RB_GC_GUARD(wrapper);
00951
00952 return port;
00953 }
00954
00955 struct load_arg {
00956 VALUE src;
00957 long offset;
00958 st_table *symbols;
00959 st_table *data;
00960 VALUE proc;
00961 st_table *compat_tbl;
00962 int infection;
00963 };
00964
00965 static void
00966 check_load_arg(struct load_arg *arg, ID sym)
00967 {
00968 if (!arg->symbols) {
00969 rb_raise(rb_eRuntimeError, "Marshal.load reentered at %s",
00970 rb_id2name(sym));
00971 }
00972 }
00973
00974 static void clear_load_arg(struct load_arg *arg);
00975
00976 static void
00977 mark_load_arg(void *ptr)
00978 {
00979 struct load_arg *p = ptr;
00980 if (!p->symbols)
00981 return;
00982 rb_mark_tbl(p->data);
00983 rb_mark_hash(p->compat_tbl);
00984 }
00985
00986 static void
00987 free_load_arg(void *ptr)
00988 {
00989 clear_load_arg(ptr);
00990 xfree(ptr);
00991 }
00992
00993 static size_t
00994 memsize_load_arg(const void *ptr)
00995 {
00996 return ptr ? sizeof(struct load_arg) : 0;
00997 }
00998
00999 static const rb_data_type_t load_arg_data = {
01000 "load_arg",
01001 mark_load_arg, free_load_arg, memsize_load_arg
01002 };
01003
01004 #define r_entry(v, arg) r_entry0(v, (arg)->data->num_entries, arg)
01005 static VALUE r_entry0(VALUE v, st_index_t num, struct load_arg *arg);
01006 static VALUE r_object(struct load_arg *arg);
01007 static ID r_symbol(struct load_arg *arg);
01008 static VALUE path2class(VALUE path);
01009
01010 static st_index_t
01011 r_prepare(struct load_arg *arg)
01012 {
01013 st_index_t idx = arg->data->num_entries;
01014
01015 st_insert(arg->data, (st_data_t)idx, (st_data_t)Qundef);
01016 return idx;
01017 }
01018
01019 static int
01020 r_byte(struct load_arg *arg)
01021 {
01022 int c;
01023
01024 if (TYPE(arg->src) == T_STRING) {
01025 if (RSTRING_LEN(arg->src) > arg->offset) {
01026 c = (unsigned char)RSTRING_PTR(arg->src)[arg->offset++];
01027 }
01028 else {
01029 rb_raise(rb_eArgError, "marshal data too short");
01030 }
01031 }
01032 else {
01033 VALUE src = arg->src;
01034 VALUE v = rb_funcall2(src, s_getbyte, 0, 0);
01035 check_load_arg(arg, s_getbyte);
01036 if (NIL_P(v)) rb_eof_error();
01037 c = (unsigned char)NUM2CHR(v);
01038 }
01039 return c;
01040 }
01041
01042 static void
01043 long_toobig(int size)
01044 {
01045 rb_raise(rb_eTypeError, "long too big for this architecture (size "
01046 STRINGIZE(SIZEOF_LONG)", given %d)", size);
01047 }
01048
01049 #undef SIGN_EXTEND_CHAR
01050 #if __STDC__
01051 # define SIGN_EXTEND_CHAR(c) ((signed char)(c))
01052 #else
01053
01054 # define SIGN_EXTEND_CHAR(c) ((((unsigned char)(c)) ^ 128) - 128)
01055 #endif
01056
01057 static long
01058 r_long(struct load_arg *arg)
01059 {
01060 register long x;
01061 int c = SIGN_EXTEND_CHAR(r_byte(arg));
01062 long i;
01063
01064 if (c == 0) return 0;
01065 if (c > 0) {
01066 if (4 < c && c < 128) {
01067 return c - 5;
01068 }
01069 if (c > (int)sizeof(long)) long_toobig(c);
01070 x = 0;
01071 for (i=0;i<c;i++) {
01072 x |= (long)r_byte(arg) << (8*i);
01073 }
01074 }
01075 else {
01076 if (-129 < c && c < -4) {
01077 return c + 5;
01078 }
01079 c = -c;
01080 if (c > (int)sizeof(long)) long_toobig(c);
01081 x = -1;
01082 for (i=0;i<c;i++) {
01083 x &= ~((long)0xff << (8*i));
01084 x |= (long)r_byte(arg) << (8*i);
01085 }
01086 }
01087 return x;
01088 }
01089
01090 #define r_bytes(arg) r_bytes0(r_long(arg), (arg))
01091
01092 static VALUE
01093 r_bytes0(long len, struct load_arg *arg)
01094 {
01095 VALUE str;
01096
01097 if (len == 0) return rb_str_new(0, 0);
01098 if (TYPE(arg->src) == T_STRING) {
01099 if (RSTRING_LEN(arg->src) - arg->offset >= len) {
01100 str = rb_str_new(RSTRING_PTR(arg->src)+arg->offset, len);
01101 arg->offset += len;
01102 }
01103 else {
01104 too_short:
01105 rb_raise(rb_eArgError, "marshal data too short");
01106 }
01107 }
01108 else {
01109 VALUE src = arg->src;
01110 VALUE n = LONG2NUM(len);
01111 str = rb_funcall2(src, s_read, 1, &n);
01112 check_load_arg(arg, s_read);
01113 if (NIL_P(str)) goto too_short;
01114 StringValue(str);
01115 if (RSTRING_LEN(str) != len) goto too_short;
01116 arg->infection |= (int)FL_TEST(str, MARSHAL_INFECTION);
01117 }
01118 return str;
01119 }
01120
01121 static int
01122 id2encidx(ID id, VALUE val)
01123 {
01124 if (id == rb_id_encoding()) {
01125 int idx = rb_enc_find_index(StringValueCStr(val));
01126 return idx;
01127 }
01128 else if (id == rb_intern("E")) {
01129 if (val == Qfalse) return rb_usascii_encindex();
01130 else if (val == Qtrue) return rb_utf8_encindex();
01131
01132 }
01133 return -1;
01134 }
01135
01136 static ID
01137 r_symlink(struct load_arg *arg)
01138 {
01139 ID id;
01140 long num = r_long(arg);
01141
01142 if (st_lookup(arg->symbols, num, &id)) {
01143 return id;
01144 }
01145 rb_raise(rb_eArgError, "bad symbol");
01146 }
01147
01148 static ID
01149 r_symreal(struct load_arg *arg, int ivar)
01150 {
01151 volatile VALUE s = r_bytes(arg);
01152 ID id;
01153 int idx = -1;
01154 st_index_t n = arg->symbols->num_entries;
01155
01156 st_insert(arg->symbols, (st_data_t)n, (st_data_t)0);
01157 if (ivar) {
01158 long num = r_long(arg);
01159 while (num-- > 0) {
01160 id = r_symbol(arg);
01161 idx = id2encidx(id, r_object(arg));
01162 }
01163 }
01164 if (idx < 0) idx = rb_usascii_encindex();
01165 rb_enc_associate_index(s, idx);
01166 id = rb_intern_str(s);
01167 st_insert(arg->symbols, (st_data_t)n, (st_data_t)id);
01168
01169 return id;
01170 }
01171
01172 static ID
01173 r_symbol(struct load_arg *arg)
01174 {
01175 int type, ivar = 0;
01176
01177 again:
01178 switch ((type = r_byte(arg))) {
01179 case TYPE_IVAR:
01180 ivar = 1;
01181 goto again;
01182 case TYPE_SYMBOL:
01183 return r_symreal(arg, ivar);
01184 case TYPE_SYMLINK:
01185 if (ivar) {
01186 rb_raise(rb_eArgError, "dump format error (symlink with encoding)");
01187 }
01188 return r_symlink(arg);
01189 default:
01190 rb_raise(rb_eArgError, "dump format error for symbol(0x%x)", type);
01191 break;
01192 }
01193 }
01194
01195 static VALUE
01196 r_unique(struct load_arg *arg)
01197 {
01198 return rb_id2str(r_symbol(arg));
01199 }
01200
01201 static VALUE
01202 r_string(struct load_arg *arg)
01203 {
01204 return r_bytes(arg);
01205 }
01206
01207 static VALUE
01208 r_entry0(VALUE v, st_index_t num, struct load_arg *arg)
01209 {
01210 st_data_t real_obj = (VALUE)Qundef;
01211 if (st_lookup(arg->compat_tbl, v, &real_obj)) {
01212 st_insert(arg->data, num, (st_data_t)real_obj);
01213 }
01214 else {
01215 st_insert(arg->data, num, (st_data_t)v);
01216 }
01217 if (arg->infection) {
01218 FL_SET(v, arg->infection);
01219 if ((VALUE)real_obj != Qundef)
01220 FL_SET((VALUE)real_obj, arg->infection);
01221 }
01222 return v;
01223 }
01224
01225 static VALUE
01226 r_leave(VALUE v, struct load_arg *arg)
01227 {
01228 st_data_t data;
01229 if (st_lookup(arg->compat_tbl, v, &data)) {
01230 VALUE real_obj = (VALUE)data;
01231 rb_alloc_func_t allocator = rb_get_alloc_func(CLASS_OF(real_obj));
01232 st_data_t key = v;
01233 if (st_lookup(compat_allocator_tbl, (st_data_t)allocator, &data)) {
01234 marshal_compat_t *compat = (marshal_compat_t*)data;
01235 compat->loader(real_obj, v);
01236 }
01237 st_delete(arg->compat_tbl, &key, 0);
01238 v = real_obj;
01239 }
01240 if (arg->proc) {
01241 v = rb_funcall(arg->proc, s_call, 1, v);
01242 check_load_arg(arg, s_call);
01243 }
01244 return v;
01245 }
01246
01247 static void
01248 r_ivar(VALUE obj, int *has_encoding, struct load_arg *arg)
01249 {
01250 long len;
01251
01252 len = r_long(arg);
01253 if (len > 0) {
01254 do {
01255 ID id = r_symbol(arg);
01256 VALUE val = r_object(arg);
01257 int idx = id2encidx(id, val);
01258 if (idx >= 0) {
01259 rb_enc_associate_index(obj, idx);
01260 if (has_encoding) *has_encoding = TRUE;
01261 }
01262 else {
01263 rb_ivar_set(obj, id, val);
01264 }
01265 } while (--len > 0);
01266 }
01267 }
01268
01269 static VALUE
01270 path2class(VALUE path)
01271 {
01272 VALUE v = rb_path_to_class(path);
01273
01274 if (TYPE(v) != T_CLASS) {
01275 rb_raise(rb_eArgError, "%.*s does not refer to class",
01276 (int)RSTRING_LEN(path), RSTRING_PTR(path));
01277 }
01278 return v;
01279 }
01280
01281 static VALUE
01282 path2module(VALUE path)
01283 {
01284 VALUE v = rb_path_to_class(path);
01285
01286 if (TYPE(v) != T_MODULE) {
01287 rb_raise(rb_eArgError, "%.*s does not refer to module",
01288 (int)RSTRING_LEN(path), RSTRING_PTR(path));
01289 }
01290 return v;
01291 }
01292
01293 static VALUE
01294 obj_alloc_by_path(VALUE path, struct load_arg *arg)
01295 {
01296 VALUE klass;
01297 st_data_t data;
01298 rb_alloc_func_t allocator;
01299
01300 klass = path2class(path);
01301
01302 allocator = rb_get_alloc_func(klass);
01303 if (st_lookup(compat_allocator_tbl, (st_data_t)allocator, &data)) {
01304 marshal_compat_t *compat = (marshal_compat_t*)data;
01305 VALUE real_obj = rb_obj_alloc(klass);
01306 VALUE obj = rb_obj_alloc(compat->oldclass);
01307 st_insert(arg->compat_tbl, (st_data_t)obj, (st_data_t)real_obj);
01308 return obj;
01309 }
01310
01311 return rb_obj_alloc(klass);
01312 }
01313
01314 static VALUE
01315 r_object0(struct load_arg *arg, int *ivp, VALUE extmod)
01316 {
01317 VALUE v = Qnil;
01318 int type = r_byte(arg);
01319 long id;
01320 st_data_t link;
01321
01322 switch (type) {
01323 case TYPE_LINK:
01324 id = r_long(arg);
01325 if (!st_lookup(arg->data, (st_data_t)id, &link)) {
01326 rb_raise(rb_eArgError, "dump format error (unlinked)");
01327 }
01328 v = (VALUE)link;
01329 if (arg->proc) {
01330 v = rb_funcall(arg->proc, s_call, 1, v);
01331 check_load_arg(arg, s_call);
01332 }
01333 break;
01334
01335 case TYPE_IVAR:
01336 {
01337 int ivar = TRUE;
01338
01339 v = r_object0(arg, &ivar, extmod);
01340 if (ivar) r_ivar(v, NULL, arg);
01341 }
01342 break;
01343
01344 case TYPE_EXTENDED:
01345 {
01346 VALUE m = path2module(r_unique(arg));
01347
01348 if (NIL_P(extmod)) extmod = rb_ary_new2(0);
01349 rb_ary_push(extmod, m);
01350
01351 v = r_object0(arg, 0, extmod);
01352 while (RARRAY_LEN(extmod) > 0) {
01353 m = rb_ary_pop(extmod);
01354 rb_extend_object(v, m);
01355 }
01356 }
01357 break;
01358
01359 case TYPE_UCLASS:
01360 {
01361 VALUE c = path2class(r_unique(arg));
01362
01363 if (FL_TEST(c, FL_SINGLETON)) {
01364 rb_raise(rb_eTypeError, "singleton can't be loaded");
01365 }
01366 v = r_object0(arg, 0, extmod);
01367 if (rb_special_const_p(v) || TYPE(v) == T_OBJECT || TYPE(v) == T_CLASS) {
01368 format_error:
01369 rb_raise(rb_eArgError, "dump format error (user class)");
01370 }
01371 if (TYPE(v) == T_MODULE || !RTEST(rb_class_inherited_p(c, RBASIC(v)->klass))) {
01372 VALUE tmp = rb_obj_alloc(c);
01373
01374 if (TYPE(v) != TYPE(tmp)) goto format_error;
01375 }
01376 RBASIC(v)->klass = c;
01377 }
01378 break;
01379
01380 case TYPE_NIL:
01381 v = Qnil;
01382 v = r_leave(v, arg);
01383 break;
01384
01385 case TYPE_TRUE:
01386 v = Qtrue;
01387 v = r_leave(v, arg);
01388 break;
01389
01390 case TYPE_FALSE:
01391 v = Qfalse;
01392 v = r_leave(v, arg);
01393 break;
01394
01395 case TYPE_FIXNUM:
01396 {
01397 long i = r_long(arg);
01398 v = LONG2FIX(i);
01399 }
01400 v = r_leave(v, arg);
01401 break;
01402
01403 case TYPE_FLOAT:
01404 {
01405 double d;
01406 VALUE str = r_bytes(arg);
01407 const char *ptr = RSTRING_PTR(str);
01408
01409 if (strcmp(ptr, "nan") == 0) {
01410 d = NAN;
01411 }
01412 else if (strcmp(ptr, "inf") == 0) {
01413 d = INFINITY;
01414 }
01415 else if (strcmp(ptr, "-inf") == 0) {
01416 d = -INFINITY;
01417 }
01418 else {
01419 char *e;
01420 d = strtod(ptr, &e);
01421 d = load_mantissa(d, e, RSTRING_LEN(str) - (e - ptr));
01422 }
01423 v = DBL2NUM(d);
01424 v = r_entry(v, arg);
01425 v = r_leave(v, arg);
01426 }
01427 break;
01428
01429 case TYPE_BIGNUM:
01430 {
01431 long len;
01432 BDIGIT *digits;
01433 volatile VALUE data;
01434
01435 NEWOBJ(big, struct RBignum);
01436 OBJSETUP(big, rb_cBignum, T_BIGNUM);
01437 RBIGNUM_SET_SIGN(big, (r_byte(arg) == '+'));
01438 len = r_long(arg);
01439 data = r_bytes0(len * 2, arg);
01440 #if SIZEOF_BDIGITS == SIZEOF_SHORT
01441 rb_big_resize((VALUE)big, len);
01442 #else
01443 rb_big_resize((VALUE)big, (len + 1) * 2 / sizeof(BDIGIT));
01444 #endif
01445 digits = RBIGNUM_DIGITS(big);
01446 MEMCPY(digits, RSTRING_PTR(data), char, len * 2);
01447 #if SIZEOF_BDIGITS > SIZEOF_SHORT
01448 MEMZERO((char *)digits + len * 2, char,
01449 RBIGNUM_LEN(big) * sizeof(BDIGIT) - len * 2);
01450 #endif
01451 len = RBIGNUM_LEN(big);
01452 while (len > 0) {
01453 unsigned char *p = (unsigned char *)digits;
01454 BDIGIT num = 0;
01455 #if SIZEOF_BDIGITS > SIZEOF_SHORT
01456 int shift = 0;
01457 int i;
01458
01459 for (i=0; i<SIZEOF_BDIGITS; i++) {
01460 num |= (int)p[i] << shift;
01461 shift += 8;
01462 }
01463 #else
01464 num = p[0] | (p[1] << 8);
01465 #endif
01466 *digits++ = num;
01467 len--;
01468 }
01469 v = rb_big_norm((VALUE)big);
01470 v = r_entry(v, arg);
01471 v = r_leave(v, arg);
01472 }
01473 break;
01474
01475 case TYPE_STRING:
01476 v = r_entry(r_string(arg), arg);
01477 v = r_leave(v, arg);
01478 break;
01479
01480 case TYPE_REGEXP:
01481 {
01482 volatile VALUE str = r_bytes(arg);
01483 int options = r_byte(arg);
01484 int has_encoding = FALSE;
01485 st_index_t idx = r_prepare(arg);
01486
01487 if (ivp) {
01488 r_ivar(str, &has_encoding, arg);
01489 *ivp = FALSE;
01490 }
01491 if (!has_encoding) {
01492
01493 char *ptr = RSTRING_PTR(str), *dst = ptr, *src = ptr;
01494 long len = RSTRING_LEN(str);
01495 long bs = 0;
01496 for (; len-- > 0; *dst++ = *src++) {
01497 switch (*src) {
01498 case '\\': bs++; break;
01499 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
01500 case 'm': case 'o': case 'p': case 'q': case 'u': case 'y':
01501 case 'E': case 'F': case 'H': case 'I': case 'J': case 'K':
01502 case 'L': case 'N': case 'O': case 'P': case 'Q': case 'R':
01503 case 'S': case 'T': case 'U': case 'V': case 'X': case 'Y':
01504 if (bs & 1) --dst;
01505 default: bs = 0; break;
01506 }
01507 }
01508 rb_str_set_len(str, dst - ptr);
01509 }
01510 v = r_entry0(rb_reg_new_str(str, options), idx, arg);
01511 v = r_leave(v, arg);
01512 }
01513 break;
01514
01515 case TYPE_ARRAY:
01516 {
01517 volatile long len = r_long(arg);
01518
01519 v = rb_ary_new2(len);
01520 v = r_entry(v, arg);
01521 while (len--) {
01522 rb_ary_push(v, r_object(arg));
01523 }
01524 v = r_leave(v, arg);
01525 }
01526 break;
01527
01528 case TYPE_HASH:
01529 case TYPE_HASH_DEF:
01530 {
01531 long len = r_long(arg);
01532
01533 v = rb_hash_new();
01534 v = r_entry(v, arg);
01535 while (len--) {
01536 VALUE key = r_object(arg);
01537 VALUE value = r_object(arg);
01538 rb_hash_aset(v, key, value);
01539 }
01540 if (type == TYPE_HASH_DEF) {
01541 RHASH_IFNONE(v) = r_object(arg);
01542 }
01543 v = r_leave(v, arg);
01544 }
01545 break;
01546
01547 case TYPE_STRUCT:
01548 {
01549 VALUE mem, values;
01550 volatile long i;
01551 ID slot;
01552 st_index_t idx = r_prepare(arg);
01553 VALUE klass = path2class(r_unique(arg));
01554 long len = r_long(arg);
01555
01556 v = rb_obj_alloc(klass);
01557 if (TYPE(v) != T_STRUCT) {
01558 rb_raise(rb_eTypeError, "class %s not a struct", rb_class2name(klass));
01559 }
01560 mem = rb_struct_s_members(klass);
01561 if (RARRAY_LEN(mem) != len) {
01562 rb_raise(rb_eTypeError, "struct %s not compatible (struct size differs)",
01563 rb_class2name(klass));
01564 }
01565
01566 v = r_entry0(v, idx, arg);
01567 values = rb_ary_new2(len);
01568 for (i=0; i<len; i++) {
01569 slot = r_symbol(arg);
01570
01571 if (RARRAY_PTR(mem)[i] != ID2SYM(slot)) {
01572 rb_raise(rb_eTypeError, "struct %s not compatible (:%s for :%s)",
01573 rb_class2name(klass),
01574 rb_id2name(slot),
01575 rb_id2name(SYM2ID(RARRAY_PTR(mem)[i])));
01576 }
01577 rb_ary_push(values, r_object(arg));
01578 }
01579 rb_struct_initialize(v, values);
01580 v = r_leave(v, arg);
01581 }
01582 break;
01583
01584 case TYPE_USERDEF:
01585 {
01586 VALUE klass = path2class(r_unique(arg));
01587 VALUE data;
01588
01589 if (!rb_respond_to(klass, s_load)) {
01590 rb_raise(rb_eTypeError, "class %s needs to have method `_load'",
01591 rb_class2name(klass));
01592 }
01593 data = r_string(arg);
01594 if (ivp) {
01595 r_ivar(data, NULL, arg);
01596 *ivp = FALSE;
01597 }
01598 v = rb_funcall(klass, s_load, 1, data);
01599 check_load_arg(arg, s_load);
01600 v = r_entry(v, arg);
01601 v = r_leave(v, arg);
01602 }
01603 break;
01604
01605 case TYPE_USRMARSHAL:
01606 {
01607 VALUE klass = path2class(r_unique(arg));
01608 VALUE data;
01609
01610 v = rb_obj_alloc(klass);
01611 if (!NIL_P(extmod)) {
01612 while (RARRAY_LEN(extmod) > 0) {
01613 VALUE m = rb_ary_pop(extmod);
01614 rb_extend_object(v, m);
01615 }
01616 }
01617 if (!rb_respond_to(v, s_mload)) {
01618 rb_raise(rb_eTypeError, "instance of %s needs to have method `marshal_load'",
01619 rb_class2name(klass));
01620 }
01621 v = r_entry(v, arg);
01622 data = r_object(arg);
01623 rb_funcall(v, s_mload, 1, data);
01624 check_load_arg(arg, s_mload);
01625 v = r_leave(v, arg);
01626 }
01627 break;
01628
01629 case TYPE_OBJECT:
01630 {
01631 st_index_t idx = r_prepare(arg);
01632 v = obj_alloc_by_path(r_unique(arg), arg);
01633 if (TYPE(v) != T_OBJECT) {
01634 rb_raise(rb_eArgError, "dump format error");
01635 }
01636 v = r_entry0(v, idx, arg);
01637 r_ivar(v, NULL, arg);
01638 v = r_leave(v, arg);
01639 }
01640 break;
01641
01642 case TYPE_DATA:
01643 {
01644 VALUE klass = path2class(r_unique(arg));
01645 if (rb_respond_to(klass, s_alloc)) {
01646 static int warn = TRUE;
01647 if (warn) {
01648 rb_warn("define `allocate' instead of `_alloc'");
01649 warn = FALSE;
01650 }
01651 v = rb_funcall(klass, s_alloc, 0);
01652 check_load_arg(arg, s_alloc);
01653 }
01654 else {
01655 v = rb_obj_alloc(klass);
01656 }
01657 if (TYPE(v) != T_DATA) {
01658 rb_raise(rb_eArgError, "dump format error");
01659 }
01660 v = r_entry(v, arg);
01661 if (!rb_respond_to(v, s_load_data)) {
01662 rb_raise(rb_eTypeError,
01663 "class %s needs to have instance method `_load_data'",
01664 rb_class2name(klass));
01665 }
01666 rb_funcall(v, s_load_data, 1, r_object0(arg, 0, extmod));
01667 check_load_arg(arg, s_load_data);
01668 v = r_leave(v, arg);
01669 }
01670 break;
01671
01672 case TYPE_MODULE_OLD:
01673 {
01674 volatile VALUE str = r_bytes(arg);
01675
01676 v = rb_path_to_class(str);
01677 v = r_entry(v, arg);
01678 v = r_leave(v, arg);
01679 }
01680 break;
01681
01682 case TYPE_CLASS:
01683 {
01684 volatile VALUE str = r_bytes(arg);
01685
01686 v = path2class(str);
01687 v = r_entry(v, arg);
01688 v = r_leave(v, arg);
01689 }
01690 break;
01691
01692 case TYPE_MODULE:
01693 {
01694 volatile VALUE str = r_bytes(arg);
01695
01696 v = path2module(str);
01697 v = r_entry(v, arg);
01698 v = r_leave(v, arg);
01699 }
01700 break;
01701
01702 case TYPE_SYMBOL:
01703 if (ivp) {
01704 v = ID2SYM(r_symreal(arg, *ivp));
01705 *ivp = FALSE;
01706 }
01707 else {
01708 v = ID2SYM(r_symreal(arg, 0));
01709 }
01710 v = r_leave(v, arg);
01711 break;
01712
01713 case TYPE_SYMLINK:
01714 v = ID2SYM(r_symlink(arg));
01715 break;
01716
01717 default:
01718 rb_raise(rb_eArgError, "dump format error(0x%x)", type);
01719 break;
01720 }
01721 return v;
01722 }
01723
01724 static VALUE
01725 r_object(struct load_arg *arg)
01726 {
01727 return r_object0(arg, 0, Qnil);
01728 }
01729
01730 static void
01731 clear_load_arg(struct load_arg *arg)
01732 {
01733 if (!arg->symbols) return;
01734 st_free_table(arg->symbols);
01735 arg->symbols = 0;
01736 st_free_table(arg->data);
01737 arg->data = 0;
01738 st_free_table(arg->compat_tbl);
01739 arg->compat_tbl = 0;
01740 }
01741
01742
01743
01744
01745
01746
01747
01748
01749
01750
01751
01752
01753 static VALUE
01754 marshal_load(int argc, VALUE *argv)
01755 {
01756 VALUE port, proc;
01757 int major, minor, infection = 0;
01758 VALUE v;
01759 volatile VALUE wrapper;
01760 struct load_arg *arg;
01761
01762 rb_scan_args(argc, argv, "11", &port, &proc);
01763 v = rb_check_string_type(port);
01764 if (!NIL_P(v)) {
01765 infection = (int)FL_TEST(port, MARSHAL_INFECTION);
01766 port = v;
01767 }
01768 else if (rb_respond_to(port, s_getbyte) && rb_respond_to(port, s_read)) {
01769 if (rb_respond_to(port, s_binmode)) {
01770 rb_funcall2(port, s_binmode, 0, 0);
01771 }
01772 infection = (int)(FL_TAINT | FL_TEST(port, FL_UNTRUSTED));
01773 }
01774 else {
01775 rb_raise(rb_eTypeError, "instance of IO needed");
01776 }
01777 wrapper = TypedData_Make_Struct(rb_cData, struct load_arg, &load_arg_data, arg);
01778 arg->infection = infection;
01779 arg->src = port;
01780 arg->offset = 0;
01781 arg->symbols = st_init_numtable();
01782 arg->data = st_init_numtable();
01783 arg->compat_tbl = st_init_numtable();
01784 arg->proc = 0;
01785
01786 major = r_byte(arg);
01787 minor = r_byte(arg);
01788 if (major != MARSHAL_MAJOR || minor > MARSHAL_MINOR) {
01789 clear_load_arg(arg);
01790 rb_raise(rb_eTypeError, "incompatible marshal file format (can't be read)\n\
01791 \tformat version %d.%d required; %d.%d given",
01792 MARSHAL_MAJOR, MARSHAL_MINOR, major, minor);
01793 }
01794 if (RTEST(ruby_verbose) && minor != MARSHAL_MINOR) {
01795 rb_warn("incompatible marshal file format (can be read)\n\
01796 \tformat version %d.%d required; %d.%d given",
01797 MARSHAL_MAJOR, MARSHAL_MINOR, major, minor);
01798 }
01799
01800 if (!NIL_P(proc)) arg->proc = proc;
01801 v = r_object(arg);
01802 clear_load_arg(arg);
01803 RB_GC_GUARD(wrapper);
01804
01805 return v;
01806 }
01807
01808
01809
01810
01811
01812
01813
01814
01815
01816
01817
01818
01819
01820
01821
01822
01823
01824
01825
01826
01827
01828
01829
01830
01831
01832
01833
01834
01835
01836
01837
01838
01839
01840
01841
01842
01843
01844
01845
01846
01847
01848
01849
01850
01851
01852
01853
01854
01855
01856
01857
01858
01859
01860
01861
01862
01863
01864
01865
01866
01867
01868
01869
01870
01871
01872
01873
01874
01875
01876
01877
01878
01879
01880
01881
01882
01883
01884
01885
01886
01887
01888
01889
01890
01891
01892
01893
01894
01895
01896
01897
01898
01899
01900
01901
01902
01903
01904 void
01905 Init_marshal(void)
01906 {
01907 #undef rb_intern
01908 #define rb_intern(str) rb_intern_const(str)
01909
01910 VALUE rb_mMarshal = rb_define_module("Marshal");
01911
01912 s_dump = rb_intern("_dump");
01913 s_load = rb_intern("_load");
01914 s_mdump = rb_intern("marshal_dump");
01915 s_mload = rb_intern("marshal_load");
01916 s_dump_data = rb_intern("_dump_data");
01917 s_load_data = rb_intern("_load_data");
01918 s_alloc = rb_intern("_alloc");
01919 s_call = rb_intern("call");
01920 s_getbyte = rb_intern("getbyte");
01921 s_read = rb_intern("read");
01922 s_write = rb_intern("write");
01923 s_binmode = rb_intern("binmode");
01924
01925 rb_define_module_function(rb_mMarshal, "dump", marshal_dump, -1);
01926 rb_define_module_function(rb_mMarshal, "load", marshal_load, -1);
01927 rb_define_module_function(rb_mMarshal, "restore", marshal_load, -1);
01928
01929 rb_define_const(rb_mMarshal, "MAJOR_VERSION", INT2FIX(MARSHAL_MAJOR));
01930 rb_define_const(rb_mMarshal, "MINOR_VERSION", INT2FIX(MARSHAL_MINOR));
01931
01932 compat_allocator_tbl = st_init_numtable();
01933 compat_allocator_tbl_wrapper =
01934 Data_Wrap_Struct(rb_cData, mark_marshal_compat_t, 0, compat_allocator_tbl);
01935 rb_gc_register_mark_object(compat_allocator_tbl_wrapper);
01936 }
01937
01938 VALUE
01939 rb_marshal_dump(VALUE obj, VALUE port)
01940 {
01941 int argc = 1;
01942 VALUE argv[2];
01943
01944 argv[0] = obj;
01945 argv[1] = port;
01946 if (!NIL_P(port)) argc = 2;
01947 return marshal_dump(argc, argv);
01948 }
01949
01950 VALUE
01951 rb_marshal_load(VALUE port)
01952 {
01953 return marshal_load(1, &port);
01954 }
01955