00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016 #include "ruby/ruby.h"
00017 #include <errno.h>
00018 #include <iconv.h>
00019 #include <assert.h>
00020 #include "ruby/st.h"
00021 #include "ruby/encoding.h"
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079 #define VALUE2ICONV(v) ((iconv_t)((VALUE)(v) ^ -1))
00080 #define ICONV2VALUE(c) ((VALUE)(c) ^ -1)
00081
00082 struct iconv_env_t
00083 {
00084 iconv_t cd;
00085 int argc;
00086 VALUE *argv;
00087 VALUE ret;
00088 int toidx;
00089 VALUE (*append)_((VALUE, VALUE));
00090 };
00091
00092 struct rb_iconv_opt_t
00093 {
00094 VALUE transliterate;
00095 VALUE discard_ilseq;
00096 };
00097
00098 static ID id_transliterate, id_discard_ilseq;
00099
00100 static VALUE rb_eIconvInvalidEncoding;
00101 static VALUE rb_eIconvFailure;
00102 static VALUE rb_eIconvIllegalSeq;
00103 static VALUE rb_eIconvInvalidChar;
00104 static VALUE rb_eIconvOutOfRange;
00105 static VALUE rb_eIconvBrokenLibrary;
00106
00107 static ID rb_success, rb_failed;
00108 static VALUE iconv_fail _((VALUE error, VALUE success, VALUE failed, struct iconv_env_t* env, const char *mesg));
00109 static VALUE iconv_fail_retry _((VALUE error, VALUE success, VALUE failed, struct iconv_env_t* env, const char *mesg));
00110 static VALUE iconv_failure_initialize _((VALUE error, VALUE mesg, VALUE success, VALUE failed));
00111 static VALUE iconv_failure_success _((VALUE self));
00112 static VALUE iconv_failure_failed _((VALUE self));
00113
00114 static iconv_t iconv_create _((VALUE to, VALUE from, struct rb_iconv_opt_t *opt, int *idx));
00115 static void iconv_dfree _((void *cd));
00116 static VALUE iconv_free _((VALUE cd));
00117 static VALUE iconv_try _((iconv_t cd, const char **inptr, size_t *inlen, char **outptr, size_t *outlen));
00118 static VALUE rb_str_derive _((VALUE str, const char* ptr, long len));
00119 static VALUE iconv_convert _((iconv_t cd, VALUE str, long start, long length, int toidx,
00120 struct iconv_env_t* env));
00121 static VALUE iconv_s_allocate _((VALUE klass));
00122 static VALUE iconv_initialize _((int argc, VALUE *argv, VALUE self));
00123 static VALUE iconv_s_open _((int argc, VALUE *argv, VALUE self));
00124 static VALUE iconv_s_convert _((struct iconv_env_t* env));
00125 static VALUE iconv_s_iconv _((int argc, VALUE *argv, VALUE self));
00126 static VALUE iconv_init_state _((VALUE cd));
00127 static VALUE iconv_finish _((VALUE self));
00128 static VALUE iconv_iconv _((int argc, VALUE *argv, VALUE self));
00129 static VALUE iconv_conv _((int argc, VALUE *argv, VALUE self));
00130
00131 static VALUE charset_map;
00132
00133
00134
00135
00136
00137
00138
00139 static VALUE
00140 charset_map_get(void)
00141 {
00142 return charset_map;
00143 }
00144
00145 static VALUE
00146 strip_glibc_option(VALUE *code)
00147 {
00148 VALUE val = StringValue(*code);
00149 const char *ptr = RSTRING_PTR(val), *pend = RSTRING_END(val);
00150 const char *slash = memchr(ptr, '/', pend - ptr);
00151
00152 if (slash && slash < pend - 1 && slash[1] == '/') {
00153 VALUE opt = rb_str_subseq(val, slash - ptr, pend - slash);
00154 val = rb_str_subseq(val, 0, slash - ptr);
00155 *code = val;
00156 return opt;
00157 }
00158 return 0;
00159 }
00160
00161 static char *
00162 map_charset(VALUE *code)
00163 {
00164 VALUE val = StringValue(*code);
00165
00166 if (RHASH_SIZE(charset_map)) {
00167 VALUE key = rb_funcall2(val, rb_intern("downcase"), 0, 0);
00168 StringValuePtr(key);
00169 if (st_lookup(RHASH_TBL(charset_map), key, &val)) {
00170 *code = val;
00171 }
00172 }
00173 return StringValuePtr(*code);
00174 }
00175
00176 NORETURN(static void rb_iconv_sys_fail(const char *s));
00177 static void
00178 rb_iconv_sys_fail(const char *s)
00179 {
00180 if (errno == 0) {
00181 rb_exc_raise(iconv_fail(rb_eIconvBrokenLibrary, Qnil, Qnil, NULL, s));
00182 }
00183 rb_sys_fail(s);
00184 }
00185
00186 #define rb_sys_fail(s) rb_iconv_sys_fail(s)
00187
00188 static iconv_t
00189 iconv_create(VALUE to, VALUE from, struct rb_iconv_opt_t *opt, int *idx)
00190 {
00191 VALUE toopt = strip_glibc_option(&to);
00192 VALUE fromopt = strip_glibc_option(&from);
00193 VALUE toenc = 0, fromenc = 0;
00194 const char* tocode = map_charset(&to);
00195 const char* fromcode = map_charset(&from);
00196 iconv_t cd;
00197 int retry = 0;
00198
00199 *idx = rb_enc_find_index(tocode);
00200
00201 if (toopt) {
00202 toenc = rb_str_plus(to, toopt);
00203 tocode = RSTRING_PTR(toenc);
00204 }
00205 if (fromopt) {
00206 fromenc = rb_str_plus(from, fromopt);
00207 fromcode = RSTRING_PTR(fromenc);
00208 }
00209 while ((cd = iconv_open(tocode, fromcode)) == (iconv_t)-1) {
00210 int inval = 0;
00211 switch (errno) {
00212 case EMFILE:
00213 case ENFILE:
00214 case ENOMEM:
00215 if (!retry++) {
00216 rb_gc();
00217 continue;
00218 }
00219 break;
00220 case EINVAL:
00221 retry = 0;
00222 inval = 1;
00223 if (toenc) {
00224 tocode = RSTRING_PTR(to);
00225 rb_str_resize(toenc, 0);
00226 toenc = 0;
00227 continue;
00228 }
00229 if (fromenc) {
00230 fromcode = RSTRING_PTR(from);
00231 rb_str_resize(fromenc, 0);
00232 fromenc = 0;
00233 continue;
00234 }
00235 break;
00236 }
00237 {
00238 const char *s = inval ? "invalid encoding " : "iconv";
00239 volatile VALUE msg = rb_str_new(0, strlen(s) + RSTRING_LEN(to) +
00240 RSTRING_LEN(from) + 8);
00241
00242 sprintf(RSTRING_PTR(msg), "%s(\"%s\", \"%s\")",
00243 s, RSTRING_PTR(to), RSTRING_PTR(from));
00244 s = RSTRING_PTR(msg);
00245 rb_str_set_len(msg, strlen(s));
00246 if (!inval) rb_sys_fail(s);
00247 rb_exc_raise(iconv_fail(rb_eIconvInvalidEncoding, Qnil,
00248 rb_ary_new3(2, to, from), NULL, s));
00249 }
00250 }
00251
00252 if (toopt || fromopt) {
00253 if (toopt && fromopt && RTEST(rb_str_equal(toopt, fromopt))) {
00254 fromopt = 0;
00255 }
00256 if (toopt && fromopt) {
00257 rb_warning("encoding option isn't portable: %s, %s",
00258 RSTRING_PTR(toopt) + 2, RSTRING_PTR(fromopt) + 2);
00259 }
00260 else {
00261 rb_warning("encoding option isn't portable: %s",
00262 (toopt ? RSTRING_PTR(toopt) : RSTRING_PTR(fromopt)) + 2);
00263 }
00264 }
00265
00266 if (opt) {
00267 #ifdef ICONV_SET_TRANSLITERATE
00268 if (opt->transliterate != Qundef) {
00269 int flag = RTEST(opt->transliterate);
00270 rb_warning("encoding option isn't portable: transliterate");
00271 if (iconvctl(cd, ICONV_SET_TRANSLITERATE, (void *)&flag))
00272 rb_sys_fail("ICONV_SET_TRANSLITERATE");
00273 }
00274 #endif
00275 #ifdef ICONV_SET_DISCARD_ILSEQ
00276 if (opt->discard_ilseq != Qundef) {
00277 int flag = RTEST(opt->discard_ilseq);
00278 rb_warning("encoding option isn't portable: discard_ilseq");
00279 if (iconvctl(cd, ICONV_SET_DISCARD_ILSEQ, (void *)&flag))
00280 rb_sys_fail("ICONV_SET_DISCARD_ILSEQ");
00281 }
00282 #endif
00283 }
00284
00285 return cd;
00286 }
00287
00288 static void
00289 iconv_dfree(void *cd)
00290 {
00291 iconv_close(VALUE2ICONV(cd));
00292 }
00293
00294 #define ICONV_FREE iconv_dfree
00295
00296 static VALUE
00297 iconv_free(VALUE cd)
00298 {
00299 if (cd && iconv_close(VALUE2ICONV(cd)) == -1)
00300 rb_sys_fail("iconv_close");
00301 return Qnil;
00302 }
00303
00304 static VALUE
00305 check_iconv(VALUE obj)
00306 {
00307 Check_Type(obj, T_DATA);
00308 if (RDATA(obj)->dfree != ICONV_FREE) {
00309 rb_raise(rb_eArgError, "Iconv expected (%s)", rb_class2name(CLASS_OF(obj)));
00310 }
00311 return (VALUE)DATA_PTR(obj);
00312 }
00313
00314 static VALUE
00315 iconv_try(iconv_t cd, const char **inptr, size_t *inlen, char **outptr, size_t *outlen)
00316 {
00317 #ifdef ICONV_INPTR_CONST
00318 #define ICONV_INPTR_CAST
00319 #else
00320 #define ICONV_INPTR_CAST (char **)
00321 #endif
00322 size_t ret;
00323
00324 errno = 0;
00325 ret = iconv(cd, ICONV_INPTR_CAST inptr, inlen, outptr, outlen);
00326 if (ret == (size_t)-1) {
00327 if (!*inlen)
00328 return Qfalse;
00329 switch (errno) {
00330 case E2BIG:
00331
00332 break;
00333 case EILSEQ:
00334 return rb_eIconvIllegalSeq;
00335 case EINVAL:
00336 return rb_eIconvInvalidChar;
00337 case 0:
00338 return rb_eIconvBrokenLibrary;
00339 default:
00340 rb_sys_fail("iconv");
00341 }
00342 }
00343 else if (*inlen > 0) {
00344
00345 return rb_eIconvIllegalSeq;
00346 }
00347 else if (ret) {
00348 return Qnil;
00349 }
00350 return Qfalse;
00351 }
00352
00353 #define FAILED_MAXLEN 16
00354
00355 static VALUE
00356 iconv_failure_initialize(VALUE error, VALUE mesg, VALUE success, VALUE failed)
00357 {
00358 rb_call_super(1, &mesg);
00359 rb_ivar_set(error, rb_success, success);
00360 rb_ivar_set(error, rb_failed, failed);
00361 return error;
00362 }
00363
00364 static VALUE
00365 iconv_fail(VALUE error, VALUE success, VALUE failed, struct iconv_env_t* env, const char *mesg)
00366 {
00367 VALUE args[3];
00368
00369 if (mesg && *mesg) {
00370 args[0] = rb_str_new2(mesg);
00371 }
00372 else if (TYPE(failed) != T_STRING || RSTRING_LEN(failed) < FAILED_MAXLEN) {
00373 args[0] = rb_inspect(failed);
00374 }
00375 else {
00376 args[0] = rb_inspect(rb_str_substr(failed, 0, FAILED_MAXLEN));
00377 rb_str_cat2(args[0], "...");
00378 }
00379 args[1] = success;
00380 args[2] = failed;
00381 if (env) {
00382 args[1] = env->append(rb_obj_dup(env->ret), success);
00383 if (env->argc > 0) {
00384 *(env->argv) = failed;
00385 args[2] = rb_ary_new4(env->argc, env->argv);
00386 }
00387 }
00388 return rb_class_new_instance(3, args, error);
00389 }
00390
00391 static VALUE
00392 iconv_fail_retry(VALUE error, VALUE success, VALUE failed, struct iconv_env_t* env, const char *mesg)
00393 {
00394 error = iconv_fail(error, success, failed, env, mesg);
00395 if (!rb_block_given_p()) rb_exc_raise(error);
00396 rb_set_errinfo(error);
00397 return rb_yield(failed);
00398 }
00399
00400 static VALUE
00401 rb_str_derive(VALUE str, const char* ptr, long len)
00402 {
00403 VALUE ret;
00404
00405 if (NIL_P(str))
00406 return rb_str_new(ptr, len);
00407 if (RSTRING_PTR(str) + RSTRING_LEN(str) == ptr + len)
00408 ret = rb_str_subseq(str, ptr - RSTRING_PTR(str), len);
00409 else
00410 ret = rb_str_new(ptr, len);
00411 OBJ_INFECT(ret, str);
00412 return ret;
00413 }
00414
00415 static VALUE
00416 iconv_convert(iconv_t cd, VALUE str, long start, long length, int toidx, struct iconv_env_t* env)
00417 {
00418 VALUE ret = Qfalse;
00419 VALUE error = Qfalse;
00420 VALUE rescue;
00421 const char *inptr, *instart;
00422 size_t inlen;
00423
00424 char buffer[BUFSIZ];
00425 char *outptr;
00426 size_t outlen;
00427
00428 if (cd == (iconv_t)-1)
00429 rb_raise(rb_eArgError, "closed iconv");
00430
00431 if (NIL_P(str)) {
00432
00433 inptr = "";
00434 inlen = 0;
00435 outptr = buffer;
00436 outlen = sizeof(buffer);
00437 error = iconv_try(cd, &inptr, &inlen, &outptr, &outlen);
00438 if (RTEST(error)) {
00439 unsigned int i;
00440 rescue = iconv_fail_retry(error, Qnil, Qnil, env, 0);
00441 if (TYPE(rescue) == T_ARRAY) {
00442 str = RARRAY_LEN(rescue) > 0 ? RARRAY_PTR(rescue)[0] : Qnil;
00443 }
00444 if (FIXNUM_P(str) && (i = FIX2INT(str)) <= 0xff) {
00445 char c = i;
00446 str = rb_str_new(&c, 1);
00447 }
00448 else if (!NIL_P(str)) {
00449 StringValue(str);
00450 }
00451 }
00452
00453 inptr = NULL;
00454 length = 0;
00455 }
00456 else {
00457 long slen;
00458
00459 StringValue(str);
00460 slen = RSTRING_LEN(str);
00461 inptr = RSTRING_PTR(str);
00462
00463 inptr += start;
00464 if (length < 0 || length > start + slen)
00465 length = slen - start;
00466 }
00467 instart = inptr;
00468 inlen = length;
00469
00470 do {
00471 char errmsg[50];
00472 const char *tmpstart = inptr;
00473 outptr = buffer;
00474 outlen = sizeof(buffer);
00475
00476 errmsg[0] = 0;
00477 error = iconv_try(cd, &inptr, &inlen, &outptr, &outlen);
00478
00479 if (
00480 #if SIGNEDNESS_OF_SIZE_T < 0
00481 0 <= outlen &&
00482 #endif
00483 outlen <= sizeof(buffer)) {
00484 outlen = sizeof(buffer) - outlen;
00485 if (NIL_P(error) ||
00486 outlen > (size_t)(inptr - tmpstart) ||
00487 (outlen < (size_t)(inptr - tmpstart) && inlen > 0) ||
00488 memcmp(buffer, tmpstart, outlen))
00489 {
00490 if (NIL_P(str)) {
00491 ret = rb_str_new(buffer, outlen);
00492 if (toidx >= 0) rb_enc_associate_index(ret, toidx);
00493 }
00494 else {
00495 if (ret) {
00496 ret = rb_str_buf_cat(ret, instart, tmpstart - instart);
00497 }
00498 else {
00499 ret = rb_str_new(instart, tmpstart - instart);
00500 if (toidx >= 0) rb_enc_associate_index(ret, toidx);
00501 OBJ_INFECT(ret, str);
00502 }
00503 ret = rb_str_buf_cat(ret, buffer, outlen);
00504 instart = inptr;
00505 }
00506 }
00507 else if (!inlen) {
00508 inptr = tmpstart + outlen;
00509 }
00510 }
00511 else {
00512
00513 sprintf(errmsg, "bug?(output length = %ld)", (long)(sizeof(buffer) - outlen));
00514 error = rb_eIconvOutOfRange;
00515 }
00516
00517 if (RTEST(error)) {
00518 long len = 0;
00519
00520 if (!ret) {
00521 ret = rb_str_derive(str, instart, inptr - instart);
00522 if (toidx >= 0) rb_enc_associate_index(ret, toidx);
00523 }
00524 else if (inptr > instart) {
00525 rb_str_cat(ret, instart, inptr - instart);
00526 }
00527 str = rb_str_derive(str, inptr, inlen);
00528 rescue = iconv_fail_retry(error, ret, str, env, errmsg);
00529 if (TYPE(rescue) == T_ARRAY) {
00530 if ((len = RARRAY_LEN(rescue)) > 0)
00531 rb_str_concat(ret, RARRAY_PTR(rescue)[0]);
00532 if (len > 1 && !NIL_P(str = RARRAY_PTR(rescue)[1])) {
00533 StringValue(str);
00534 inlen = length = RSTRING_LEN(str);
00535 instart = inptr = RSTRING_PTR(str);
00536 continue;
00537 }
00538 }
00539 else if (!NIL_P(rescue)) {
00540 rb_str_concat(ret, rescue);
00541 }
00542 break;
00543 }
00544 } while (inlen > 0);
00545
00546 if (!ret) {
00547 ret = rb_str_derive(str, instart, inptr - instart);
00548 if (toidx >= 0) rb_enc_associate_index(ret, toidx);
00549 }
00550 else if (inptr > instart) {
00551 rb_str_cat(ret, instart, inptr - instart);
00552 }
00553 return ret;
00554 }
00555
00556 static VALUE
00557 iconv_s_allocate(VALUE klass)
00558 {
00559 return Data_Wrap_Struct(klass, 0, ICONV_FREE, 0);
00560 }
00561
00562 static VALUE
00563 get_iconv_opt_i(VALUE i, VALUE arg)
00564 {
00565 struct rb_iconv_opt_t *opt = (struct rb_iconv_opt_t *)arg;
00566 VALUE name, val;
00567
00568 (void)opt;
00569 i = rb_Array(i);
00570 name = rb_ary_entry(i, 0);
00571 val = rb_ary_entry(i, 1);
00572 do {
00573 if (SYMBOL_P(name)) {
00574 ID id = SYM2ID(name);
00575 if (id == id_transliterate) {
00576 #ifdef ICONV_SET_TRANSLITERATE
00577 opt->transliterate = val;
00578 #else
00579 rb_notimplement();
00580 #endif
00581 break;
00582 }
00583 if (id == id_discard_ilseq) {
00584 #ifdef ICONV_SET_DISCARD_ILSEQ
00585 opt->discard_ilseq = val;
00586 #else
00587 rb_notimplement();
00588 #endif
00589 break;
00590 }
00591 }
00592 else {
00593 const char *s = StringValueCStr(name);
00594 if (strcmp(s, "transliterate") == 0) {
00595 #ifdef ICONV_SET_TRANSLITERATE
00596 opt->transliterate = val;
00597 #else
00598 rb_notimplement();
00599 #endif
00600 break;
00601 }
00602 if (strcmp(s, "discard_ilseq") == 0) {
00603 #ifdef ICONV_SET_DISCARD_ILSEQ
00604 opt->discard_ilseq = val;
00605 #else
00606 rb_notimplement();
00607 #endif
00608 break;
00609 }
00610 }
00611 name = rb_inspect(name);
00612 rb_raise(rb_eArgError, "unknown option - %s", StringValueCStr(name));
00613 } while (0);
00614 return Qnil;
00615 }
00616
00617 static void
00618 get_iconv_opt(struct rb_iconv_opt_t *opt, VALUE options)
00619 {
00620 opt->transliterate = Qundef;
00621 opt->discard_ilseq = Qundef;
00622 if (!NIL_P(options)) {
00623 rb_block_call(options, rb_intern("each"), 0, 0, get_iconv_opt_i, (VALUE)opt);
00624 }
00625 }
00626
00627 #define iconv_ctl(self, func, val) (\
00628 iconvctl(VALUE2ICONV(check_iconv(self)), func, (void *)&(val)) ? \
00629 rb_sys_fail(#func) : (void)0)
00630
00631
00632
00633
00634
00635
00636
00637
00638
00639
00640
00641
00642
00643
00644
00645
00646
00647
00648
00649
00650 static VALUE
00651 iconv_initialize(int argc, VALUE *argv, VALUE self)
00652 {
00653 VALUE to, from, options;
00654 struct rb_iconv_opt_t opt;
00655 int idx;
00656
00657 rb_scan_args(argc, argv, "21", &to, &from, &options);
00658 get_iconv_opt(&opt, options);
00659 iconv_free(check_iconv(self));
00660 DATA_PTR(self) = NULL;
00661 DATA_PTR(self) = (void *)ICONV2VALUE(iconv_create(to, from, &opt, &idx));
00662 if (idx >= 0) ENCODING_SET(self, idx);
00663 return self;
00664 }
00665
00666
00667
00668
00669
00670
00671
00672
00673
00674 static VALUE
00675 iconv_s_open(int argc, VALUE *argv, VALUE self)
00676 {
00677 VALUE to, from, options, cd;
00678 struct rb_iconv_opt_t opt;
00679 int idx;
00680
00681 rb_scan_args(argc, argv, "21", &to, &from, &options);
00682 get_iconv_opt(&opt, options);
00683 cd = ICONV2VALUE(iconv_create(to, from, &opt, &idx));
00684
00685 self = Data_Wrap_Struct(self, NULL, ICONV_FREE, (void *)cd);
00686 if (idx >= 0) ENCODING_SET(self, idx);
00687
00688 if (rb_block_given_p()) {
00689 return rb_ensure(rb_yield, self, (VALUE(*)())iconv_finish, self);
00690 }
00691 else {
00692 return self;
00693 }
00694 }
00695
00696 static VALUE
00697 iconv_s_convert(struct iconv_env_t* env)
00698 {
00699 VALUE last = 0;
00700
00701 for (; env->argc > 0; --env->argc, ++env->argv) {
00702 VALUE s = iconv_convert(env->cd, last = *(env->argv),
00703 0, -1, env->toidx, env);
00704 env->append(env->ret, s);
00705 }
00706
00707 if (!NIL_P(last)) {
00708 VALUE s = iconv_convert(env->cd, Qnil, 0, 0, env->toidx, env);
00709 if (RSTRING_LEN(s))
00710 env->append(env->ret, s);
00711 }
00712
00713 return env->ret;
00714 }
00715
00716
00717
00718
00719
00720
00721
00722
00723
00724
00725
00726
00727
00728
00729
00730
00731
00732
00733
00734 static VALUE
00735 iconv_s_iconv(int argc, VALUE *argv, VALUE self)
00736 {
00737 struct iconv_env_t arg;
00738
00739 if (argc < 2)
00740 rb_raise(rb_eArgError, "wrong number of arguments (%d for %d)", argc, 2);
00741
00742 arg.argc = argc -= 2;
00743 arg.argv = argv + 2;
00744 arg.append = rb_ary_push;
00745 arg.ret = rb_ary_new2(argc);
00746 arg.cd = iconv_create(argv[0], argv[1], NULL, &arg.toidx);
00747 return rb_ensure(iconv_s_convert, (VALUE)&arg, iconv_free, ICONV2VALUE(arg.cd));
00748 }
00749
00750
00751
00752
00753
00754
00755
00756
00757
00758 static VALUE
00759 iconv_s_conv(VALUE self, VALUE to, VALUE from, VALUE str)
00760 {
00761 struct iconv_env_t arg;
00762
00763 arg.argc = 1;
00764 arg.argv = &str;
00765 arg.append = rb_str_append;
00766 arg.ret = rb_str_new(0, 0);
00767 arg.cd = iconv_create(to, from, NULL, &arg.toidx);
00768 return rb_ensure(iconv_s_convert, (VALUE)&arg, iconv_free, ICONV2VALUE(arg.cd));
00769 }
00770
00771
00772
00773
00774
00775
00776
00777
00778 #ifdef HAVE_ICONVLIST
00779 struct iconv_name_list
00780 {
00781 unsigned int namescount;
00782 const char *const *names;
00783 VALUE array;
00784 };
00785
00786 static VALUE
00787 list_iconv_i(VALUE ptr)
00788 {
00789 struct iconv_name_list *p = (struct iconv_name_list *)ptr;
00790 unsigned int i, namescount = p->namescount;
00791 const char *const *names = p->names;
00792 VALUE ary = rb_ary_new2(namescount);
00793
00794 for (i = 0; i < namescount; i++) {
00795 rb_ary_push(ary, rb_str_new2(names[i]));
00796 }
00797 if (p->array) {
00798 return rb_ary_push(p->array, ary);
00799 }
00800 return rb_yield(ary);
00801 }
00802
00803 static int
00804 list_iconv(unsigned int namescount, const char *const *names, void *data)
00805 {
00806 int *state = data;
00807 struct iconv_name_list list;
00808
00809 list.namescount = namescount;
00810 list.names = names;
00811 list.array = ((VALUE *)data)[1];
00812 rb_protect(list_iconv_i, (VALUE)&list, state);
00813 return *state;
00814 }
00815 #endif
00816
00817 #if defined(HAVE_ICONVLIST) || defined(HAVE___ICONV_FREE_LIST)
00818 static VALUE
00819 iconv_s_list(void)
00820 {
00821 #ifdef HAVE_ICONVLIST
00822 int state;
00823 VALUE args[2];
00824
00825 args[1] = rb_block_given_p() ? 0 : rb_ary_new();
00826 iconvlist(list_iconv, args);
00827 state = *(int *)args;
00828 if (state) rb_jump_tag(state);
00829 if (args[1]) return args[1];
00830 #elif defined(HAVE___ICONV_FREE_LIST)
00831 char **list;
00832 size_t sz, i;
00833 VALUE ary;
00834
00835 if (__iconv_get_list(&list, &sz)) return Qnil;
00836
00837 ary = rb_ary_new2(sz);
00838 for (i = 0; i < sz; i++) {
00839 rb_ary_push(ary, rb_str_new2(list[i]));
00840 }
00841 __iconv_free_list(list, sz);
00842
00843 if (!rb_block_given_p())
00844 return ary;
00845 for (i = 0; i < RARRAY_LEN(ary); i++) {
00846 rb_yield(RARRAY_PTR(ary)[i]);
00847 }
00848 #endif
00849 return Qnil;
00850 }
00851 #else
00852 #define iconv_s_list rb_f_notimplement
00853 #endif
00854
00855
00856
00857
00858
00859
00860
00861
00862
00863
00864
00865
00866 static VALUE
00867 iconv_init_state(VALUE self)
00868 {
00869 iconv_t cd = VALUE2ICONV((VALUE)DATA_PTR(self));
00870 DATA_PTR(self) = NULL;
00871 return iconv_convert(cd, Qnil, 0, 0, ENCODING_GET(self), NULL);
00872 }
00873
00874 static VALUE
00875 iconv_finish(VALUE self)
00876 {
00877 VALUE cd = check_iconv(self);
00878
00879 if (!cd) return Qnil;
00880 return rb_ensure(iconv_init_state, self, iconv_free, cd);
00881 }
00882
00883
00884
00885
00886
00887
00888
00889
00890
00891
00892
00893
00894
00895
00896
00897
00898
00899
00900
00901
00902
00903
00904
00905
00906
00907
00908
00909
00910 static VALUE
00911 iconv_iconv(int argc, VALUE *argv, VALUE self)
00912 {
00913 VALUE str, n1, n2;
00914 VALUE cd = check_iconv(self);
00915 long start = 0, length = 0, slen = 0;
00916
00917 rb_scan_args(argc, argv, "12", &str, &n1, &n2);
00918 if (!NIL_P(str)) {
00919 VALUE n = rb_str_length(StringValue(str));
00920 slen = NUM2LONG(n);
00921 }
00922 if (argc != 2 || !RTEST(rb_range_beg_len(n1, &start, &length, slen, 0))) {
00923 if (NIL_P(n1) || ((start = NUM2LONG(n1)) < 0 ? (start += slen) >= 0 : start < slen)) {
00924 length = NIL_P(n2) ? -1 : NUM2LONG(n2);
00925 }
00926 }
00927 if (start > 0 || length > 0) {
00928 rb_encoding *enc = rb_enc_get(str);
00929 const char *s = RSTRING_PTR(str), *e = s + RSTRING_LEN(str);
00930 const char *ps = s;
00931 if (start > 0) {
00932 start = (ps = rb_enc_nth(s, e, start, enc)) - s;
00933 }
00934 if (length > 0) {
00935 length = rb_enc_nth(ps, e, length, enc) - ps;
00936 }
00937 }
00938
00939 return iconv_convert(VALUE2ICONV(cd), str, start, length, ENCODING_GET(self), NULL);
00940 }
00941
00942
00943
00944
00945
00946
00947
00948
00949
00950 static VALUE
00951 iconv_conv(int argc, VALUE *argv, VALUE self)
00952 {
00953 iconv_t cd = VALUE2ICONV(check_iconv(self));
00954 VALUE str, s;
00955 int toidx = ENCODING_GET(self);
00956
00957 str = iconv_convert(cd, Qnil, 0, 0, toidx, NULL);
00958 if (argc > 0) {
00959 do {
00960 s = iconv_convert(cd, *argv++, 0, -1, toidx, NULL);
00961 if (RSTRING_LEN(s))
00962 rb_str_buf_append(str, s);
00963 } while (--argc);
00964 s = iconv_convert(cd, Qnil, 0, 0, toidx, NULL);
00965 if (RSTRING_LEN(s))
00966 rb_str_buf_append(str, s);
00967 }
00968
00969 return str;
00970 }
00971
00972 #ifdef ICONV_TRIVIALP
00973
00974
00975
00976
00977
00978
00979 static VALUE
00980 iconv_trivialp(VALUE self)
00981 {
00982 int trivial = 0;
00983 iconv_ctl(self, ICONV_TRIVIALP, trivial);
00984 if (trivial) return Qtrue;
00985 return Qfalse;
00986 }
00987 #else
00988 #define iconv_trivialp rb_f_notimplement
00989 #endif
00990
00991 #ifdef ICONV_GET_TRANSLITERATE
00992
00993
00994
00995
00996
00997
00998 static VALUE
00999 iconv_get_transliterate(VALUE self)
01000 {
01001 int trans = 0;
01002 iconv_ctl(self, ICONV_GET_TRANSLITERATE, trans);
01003 if (trans) return Qtrue;
01004 return Qfalse;
01005 }
01006 #else
01007 #define iconv_get_transliterate rb_f_notimplement
01008 #endif
01009
01010 #ifdef ICONV_SET_TRANSLITERATE
01011
01012
01013
01014
01015
01016
01017 static VALUE
01018 iconv_set_transliterate(VALUE self, VALUE transliterate)
01019 {
01020 int trans = RTEST(transliterate);
01021 iconv_ctl(self, ICONV_SET_TRANSLITERATE, trans);
01022 return self;
01023 }
01024 #else
01025 #define iconv_set_transliterate rb_f_notimplement
01026 #endif
01027
01028 #ifdef ICONV_GET_DISCARD_ILSEQ
01029
01030
01031
01032
01033
01034
01035 static VALUE
01036 iconv_get_discard_ilseq(VALUE self)
01037 {
01038 int dis = 0;
01039 iconv_ctl(self, ICONV_GET_DISCARD_ILSEQ, dis);
01040 if (dis) return Qtrue;
01041 return Qfalse;
01042 }
01043 #else
01044 #define iconv_get_discard_ilseq rb_f_notimplement
01045 #endif
01046
01047 #ifdef ICONV_SET_DISCARD_ILSEQ
01048
01049
01050
01051
01052
01053
01054 static VALUE
01055 iconv_set_discard_ilseq(VALUE self, VALUE discard_ilseq)
01056 {
01057 int dis = RTEST(discard_ilseq);
01058 iconv_ctl(self, ICONV_SET_DISCARD_ILSEQ, dis);
01059 return self;
01060 }
01061 #else
01062 #define iconv_set_discard_ilseq rb_f_notimplement
01063 #endif
01064
01065
01066
01067
01068
01069
01070
01071 static VALUE
01072 iconv_s_ctlmethods(VALUE klass)
01073 {
01074 VALUE ary = rb_ary_new();
01075 #ifdef ICONV_TRIVIALP
01076 rb_ary_push(ary, ID2SYM(rb_intern("trivial?")));
01077 #endif
01078 #ifdef ICONV_GET_TRANSLITERATE
01079 rb_ary_push(ary, ID2SYM(rb_intern("transliterate?")));
01080 #endif
01081 #ifdef ICONV_SET_TRANSLITERATE
01082 rb_ary_push(ary, ID2SYM(rb_intern("transliterate=")));
01083 #endif
01084 #ifdef ICONV_GET_DISCARD_ILSEQ
01085 rb_ary_push(ary, ID2SYM(rb_intern("discard_ilseq?")));
01086 #endif
01087 #ifdef ICONV_SET_DISCARD_ILSEQ
01088 rb_ary_push(ary, ID2SYM(rb_intern("discard_ilseq=")));
01089 #endif
01090 return ary;
01091 }
01092
01093
01094
01095
01096
01097
01098
01099
01100
01101
01102
01103
01104
01105
01106
01107
01108 static VALUE
01109 iconv_failure_success(VALUE self)
01110 {
01111 return rb_attr_get(self, rb_success);
01112 }
01113
01114
01115
01116
01117
01118
01119
01120
01121 static VALUE
01122 iconv_failure_failed(VALUE self)
01123 {
01124 return rb_attr_get(self, rb_failed);
01125 }
01126
01127
01128
01129
01130
01131
01132
01133 static VALUE
01134 iconv_failure_inspect(VALUE self)
01135 {
01136 const char *cname = rb_class2name(CLASS_OF(self));
01137 VALUE success = rb_attr_get(self, rb_success);
01138 VALUE failed = rb_attr_get(self, rb_failed);
01139 VALUE str = rb_str_buf_cat2(rb_str_new2("#<"), cname);
01140 str = rb_str_buf_cat(str, ": ", 2);
01141 str = rb_str_buf_append(str, rb_inspect(success));
01142 str = rb_str_buf_cat(str, ", ", 2);
01143 str = rb_str_buf_append(str, rb_inspect(failed));
01144 return rb_str_buf_cat(str, ">", 1);
01145 }
01146
01147
01148
01149
01150
01151
01152
01153
01154
01155
01156
01157
01158
01159
01160
01161
01162
01163
01164
01165
01166
01167
01168
01169
01170
01171
01172
01173
01174
01175
01176
01177
01178
01179
01180
01181 void
01182 Init_iconv(void)
01183 {
01184 VALUE rb_cIconv = rb_define_class("Iconv", rb_cData);
01185
01186 rb_define_alloc_func(rb_cIconv, iconv_s_allocate);
01187 rb_define_singleton_method(rb_cIconv, "open", iconv_s_open, -1);
01188 rb_define_singleton_method(rb_cIconv, "iconv", iconv_s_iconv, -1);
01189 rb_define_singleton_method(rb_cIconv, "conv", iconv_s_conv, 3);
01190 rb_define_singleton_method(rb_cIconv, "list", iconv_s_list, 0);
01191 rb_define_singleton_method(rb_cIconv, "ctlmethods", iconv_s_ctlmethods, 0);
01192 rb_define_method(rb_cIconv, "initialize", iconv_initialize, -1);
01193 rb_define_method(rb_cIconv, "close", iconv_finish, 0);
01194 rb_define_method(rb_cIconv, "iconv", iconv_iconv, -1);
01195 rb_define_method(rb_cIconv, "conv", iconv_conv, -1);
01196 rb_define_method(rb_cIconv, "trivial?", iconv_trivialp, 0);
01197 rb_define_method(rb_cIconv, "transliterate?", iconv_get_transliterate, 0);
01198 rb_define_method(rb_cIconv, "transliterate=", iconv_set_transliterate, 1);
01199 rb_define_method(rb_cIconv, "discard_ilseq?", iconv_get_discard_ilseq, 0);
01200 rb_define_method(rb_cIconv, "discard_ilseq=", iconv_set_discard_ilseq, 1);
01201
01202 rb_eIconvFailure = rb_define_module_under(rb_cIconv, "Failure");
01203 rb_define_method(rb_eIconvFailure, "initialize", iconv_failure_initialize, 3);
01204 rb_define_method(rb_eIconvFailure, "success", iconv_failure_success, 0);
01205 rb_define_method(rb_eIconvFailure, "failed", iconv_failure_failed, 0);
01206 rb_define_method(rb_eIconvFailure, "inspect", iconv_failure_inspect, 0);
01207
01208 rb_eIconvInvalidEncoding = rb_define_class_under(rb_cIconv, "InvalidEncoding", rb_eArgError);
01209 rb_eIconvIllegalSeq = rb_define_class_under(rb_cIconv, "IllegalSequence", rb_eArgError);
01210 rb_eIconvInvalidChar = rb_define_class_under(rb_cIconv, "InvalidCharacter", rb_eArgError);
01211 rb_eIconvOutOfRange = rb_define_class_under(rb_cIconv, "OutOfRange", rb_eRuntimeError);
01212 rb_eIconvBrokenLibrary = rb_define_class_under(rb_cIconv, "BrokenLibrary", rb_eRuntimeError);
01213 rb_include_module(rb_eIconvInvalidEncoding, rb_eIconvFailure);
01214 rb_include_module(rb_eIconvIllegalSeq, rb_eIconvFailure);
01215 rb_include_module(rb_eIconvInvalidChar, rb_eIconvFailure);
01216 rb_include_module(rb_eIconvOutOfRange, rb_eIconvFailure);
01217 rb_include_module(rb_eIconvBrokenLibrary, rb_eIconvFailure);
01218
01219 rb_success = rb_intern("success");
01220 rb_failed = rb_intern("failed");
01221 id_transliterate = rb_intern("transliterate");
01222 id_discard_ilseq = rb_intern("discard_ilseq");
01223
01224 rb_gc_register_address(&charset_map);
01225 charset_map = rb_hash_new();
01226 rb_define_singleton_method(rb_cIconv, "charset_map", charset_map_get, 0);
01227 }
01228
01229