ext/iconv/iconv.c

Go to the documentation of this file.
00001 /* -*- mode:c; c-file-style:"ruby" -*- */
00002 /**********************************************************************
00003 
00004   iconv.c -
00005 
00006   $Author: nobu $
00007   created at: Wed Dec  1 20:28:09 JST 1999
00008 
00009   All the files in this distribution are covered under the Ruby's
00010   license (see the file COPYING).
00011 
00012   Documentation by Yukihiro Matsumoto and Gavin Sinclair.
00013 
00014 **********************************************************************/
00015 
00016 #include "ruby/ruby.h"
00017 #include <errno.h>
00018 #include <iconv.h>
00019 #include <assert.h>
00020 #include "ruby/st.h"
00021 #include "ruby/encoding.h"
00022 
00023 /*
00024  * Document-class: Iconv
00025  *
00026  * == Summary
00027  *
00028  * Ruby extension for charset conversion.
00029  *
00030  * == Abstract
00031  *
00032  * Iconv is a wrapper class for the UNIX 95 <tt>iconv()</tt> function family,
00033  * which translates string between various encoding systems.
00034  *
00035  * See Open Group's on-line documents for more details.
00036  * * <tt>iconv.h</tt>:       http://www.opengroup.org/onlinepubs/007908799/xsh/iconv.h.html
00037  * * <tt>iconv_open()</tt>:  http://www.opengroup.org/onlinepubs/007908799/xsh/iconv_open.html
00038  * * <tt>iconv()</tt>:       http://www.opengroup.org/onlinepubs/007908799/xsh/iconv.html
00039  * * <tt>iconv_close()</tt>: http://www.opengroup.org/onlinepubs/007908799/xsh/iconv_close.html
00040  *
00041  * Which coding systems are available is platform-dependent.
00042  *
00043  * == Examples
00044  *
00045  * 1. Simple conversion between two charsets.
00046  *
00047  *      converted_text = Iconv.conv('iso-8859-15', 'utf-8', text)
00048  *
00049  * 2. Instantiate a new Iconv and use method Iconv#iconv.
00050  *
00051  *      cd = Iconv.new(to, from)
00052  *      begin
00053  *        input.each { |s| output << cd.iconv(s) }
00054  *        output << cd.iconv(nil)                   # Don't forget this!
00055  *      ensure
00056  *        cd.close
00057  *      end
00058  *
00059  * 3. Invoke Iconv.open with a block.
00060  *
00061  *      Iconv.open(to, from) do |cd|
00062  *        input.each { |s| output << cd.iconv(s) }
00063  *        output << cd.iconv(nil)
00064  *      end
00065  *
00066  * 4. Shorthand for (3).
00067  *
00068  *      Iconv.iconv(to, from, *input.to_a)
00069  *
00070  * == Attentions
00071  *
00072  * Even if some extentions of implementation dependent are useful,
00073  * DON'T USE those extentions in libraries and scripts to widely distribute.
00074  * If you want to use those feature, use String#encode.
00075  */
00076 
00077 /* Invalid value for iconv_t is -1 but 0 for VALUE, I hope VALUE is
00078    big enough to keep iconv_t */
00079 #define VALUE2ICONV(v) ((iconv_t)((VALUE)(v) ^ -1))
00080 #define ICONV2VALUE(c) ((VALUE)(c) ^ -1)
00081 
00082 struct iconv_env_t
00083 {
00084     iconv_t cd;
00085     int argc;
00086     VALUE *argv;
00087     VALUE ret;
00088     int toidx;
00089     VALUE (*append)_((VALUE, VALUE));
00090 };
00091 
00092 struct rb_iconv_opt_t
00093 {
00094     VALUE transliterate;
00095     VALUE discard_ilseq;
00096 };
00097 
00098 static ID id_transliterate, id_discard_ilseq;
00099 
00100 static VALUE rb_eIconvInvalidEncoding;
00101 static VALUE rb_eIconvFailure;
00102 static VALUE rb_eIconvIllegalSeq;
00103 static VALUE rb_eIconvInvalidChar;
00104 static VALUE rb_eIconvOutOfRange;
00105 static VALUE rb_eIconvBrokenLibrary;
00106 
00107 static ID rb_success, rb_failed;
00108 static VALUE iconv_fail _((VALUE error, VALUE success, VALUE failed, struct iconv_env_t* env, const char *mesg));
00109 static VALUE iconv_fail_retry _((VALUE error, VALUE success, VALUE failed, struct iconv_env_t* env, const char *mesg));
00110 static VALUE iconv_failure_initialize _((VALUE error, VALUE mesg, VALUE success, VALUE failed));
00111 static VALUE iconv_failure_success _((VALUE self));
00112 static VALUE iconv_failure_failed _((VALUE self));
00113 
00114 static iconv_t iconv_create _((VALUE to, VALUE from, struct rb_iconv_opt_t *opt, int *idx));
00115 static void iconv_dfree _((void *cd));
00116 static VALUE iconv_free _((VALUE cd));
00117 static VALUE iconv_try _((iconv_t cd, const char **inptr, size_t *inlen, char **outptr, size_t *outlen));
00118 static VALUE rb_str_derive _((VALUE str, const char* ptr, long len));
00119 static VALUE iconv_convert _((iconv_t cd, VALUE str, long start, long length, int toidx,
00120                               struct iconv_env_t* env));
00121 static VALUE iconv_s_allocate _((VALUE klass));
00122 static VALUE iconv_initialize _((int argc, VALUE *argv, VALUE self));
00123 static VALUE iconv_s_open _((int argc, VALUE *argv, VALUE self));
00124 static VALUE iconv_s_convert _((struct iconv_env_t* env));
00125 static VALUE iconv_s_iconv _((int argc, VALUE *argv, VALUE self));
00126 static VALUE iconv_init_state _((VALUE cd));
00127 static VALUE iconv_finish _((VALUE self));
00128 static VALUE iconv_iconv _((int argc, VALUE *argv, VALUE self));
00129 static VALUE iconv_conv _((int argc, VALUE *argv, VALUE self));
00130 
00131 static VALUE charset_map;
00132 
00133 /*
00134  * Document-method: charset_map
00135  * call-seq: Iconv.charset_map
00136  *
00137  * Returns the map from canonical name to system dependent name.
00138  */
00139 static VALUE
00140 charset_map_get(void)
00141 {
00142     return charset_map;
00143 }
00144 
00145 static VALUE
00146 strip_glibc_option(VALUE *code)
00147 {
00148     VALUE val = StringValue(*code);
00149     const char *ptr = RSTRING_PTR(val), *pend = RSTRING_END(val);
00150     const char *slash = memchr(ptr, '/', pend - ptr);
00151 
00152     if (slash && slash < pend - 1 && slash[1] ==  '/') {
00153         VALUE opt = rb_str_subseq(val, slash - ptr, pend - slash);
00154         val = rb_str_subseq(val, 0, slash - ptr);
00155         *code = val;
00156         return opt;
00157     }
00158     return 0;
00159 }
00160 
00161 static char *
00162 map_charset(VALUE *code)
00163 {
00164     VALUE val = StringValue(*code);
00165 
00166     if (RHASH_SIZE(charset_map)) {
00167         VALUE key = rb_funcall2(val, rb_intern("downcase"), 0, 0);
00168         StringValuePtr(key);
00169         if (st_lookup(RHASH_TBL(charset_map), key, &val)) {
00170             *code = val;
00171         }
00172     }
00173     return StringValuePtr(*code);
00174 }
00175 
00176 NORETURN(static void rb_iconv_sys_fail(const char *s));
00177 static void
00178 rb_iconv_sys_fail(const char *s)
00179 {
00180     if (errno == 0) {
00181         rb_exc_raise(iconv_fail(rb_eIconvBrokenLibrary, Qnil, Qnil, NULL, s));
00182     }
00183     rb_sys_fail(s);
00184 }
00185 
00186 #define rb_sys_fail(s) rb_iconv_sys_fail(s)
00187 
00188 static iconv_t
00189 iconv_create(VALUE to, VALUE from, struct rb_iconv_opt_t *opt, int *idx)
00190 {
00191     VALUE toopt = strip_glibc_option(&to);
00192     VALUE fromopt = strip_glibc_option(&from);
00193     VALUE toenc = 0, fromenc = 0;
00194     const char* tocode = map_charset(&to);
00195     const char* fromcode = map_charset(&from);
00196     iconv_t cd;
00197     int retry = 0;
00198 
00199     *idx = rb_enc_find_index(tocode);
00200 
00201     if (toopt) {
00202         toenc = rb_str_plus(to, toopt);
00203         tocode = RSTRING_PTR(toenc);
00204     }
00205     if (fromopt) {
00206         fromenc = rb_str_plus(from, fromopt);
00207         fromcode = RSTRING_PTR(fromenc);
00208     }
00209     while ((cd = iconv_open(tocode, fromcode)) == (iconv_t)-1) {
00210         int inval = 0;
00211         switch (errno) {
00212           case EMFILE:
00213           case ENFILE:
00214           case ENOMEM:
00215             if (!retry++) {
00216                 rb_gc();
00217                 continue;
00218             }
00219             break;
00220           case EINVAL:
00221             retry = 0;
00222             inval = 1;
00223             if (toenc) {
00224                 tocode = RSTRING_PTR(to);
00225                 rb_str_resize(toenc, 0);
00226                 toenc = 0;
00227                 continue;
00228             }
00229             if (fromenc) {
00230                 fromcode = RSTRING_PTR(from);
00231                 rb_str_resize(fromenc, 0);
00232                 fromenc = 0;
00233                 continue;
00234             }
00235             break;
00236         }
00237         {
00238             const char *s = inval ? "invalid encoding " : "iconv";
00239             volatile VALUE msg = rb_str_new(0, strlen(s) + RSTRING_LEN(to) +
00240                                             RSTRING_LEN(from) + 8);
00241 
00242             sprintf(RSTRING_PTR(msg), "%s(\"%s\", \"%s\")",
00243                     s, RSTRING_PTR(to), RSTRING_PTR(from));
00244             s = RSTRING_PTR(msg);
00245             rb_str_set_len(msg, strlen(s));
00246             if (!inval) rb_sys_fail(s);
00247             rb_exc_raise(iconv_fail(rb_eIconvInvalidEncoding, Qnil,
00248                                     rb_ary_new3(2, to, from), NULL, s));
00249         }
00250     }
00251 
00252     if (toopt || fromopt) {
00253         if (toopt && fromopt && RTEST(rb_str_equal(toopt, fromopt))) {
00254             fromopt = 0;
00255         }
00256         if (toopt && fromopt) {
00257             rb_warning("encoding option isn't portable: %s, %s",
00258                        RSTRING_PTR(toopt) + 2, RSTRING_PTR(fromopt) + 2);
00259         }
00260         else {
00261             rb_warning("encoding option isn't portable: %s",
00262                        (toopt ? RSTRING_PTR(toopt) : RSTRING_PTR(fromopt)) + 2);
00263         }
00264     }
00265 
00266     if (opt) {
00267 #ifdef ICONV_SET_TRANSLITERATE
00268         if (opt->transliterate != Qundef) {
00269             int flag = RTEST(opt->transliterate);
00270             rb_warning("encoding option isn't portable: transliterate");
00271             if (iconvctl(cd, ICONV_SET_TRANSLITERATE, (void *)&flag))
00272                 rb_sys_fail("ICONV_SET_TRANSLITERATE");
00273         }
00274 #endif
00275 #ifdef ICONV_SET_DISCARD_ILSEQ
00276         if (opt->discard_ilseq != Qundef) {
00277             int flag = RTEST(opt->discard_ilseq);
00278             rb_warning("encoding option isn't portable: discard_ilseq");
00279             if (iconvctl(cd, ICONV_SET_DISCARD_ILSEQ, (void *)&flag))
00280                 rb_sys_fail("ICONV_SET_DISCARD_ILSEQ");
00281         }
00282 #endif
00283     }
00284 
00285     return cd;
00286 }
00287 
00288 static void
00289 iconv_dfree(void *cd)
00290 {
00291     iconv_close(VALUE2ICONV(cd));
00292 }
00293 
00294 #define ICONV_FREE iconv_dfree
00295 
00296 static VALUE
00297 iconv_free(VALUE cd)
00298 {
00299     if (cd && iconv_close(VALUE2ICONV(cd)) == -1)
00300         rb_sys_fail("iconv_close");
00301     return Qnil;
00302 }
00303 
00304 static VALUE
00305 check_iconv(VALUE obj)
00306 {
00307     Check_Type(obj, T_DATA);
00308     if (RDATA(obj)->dfree != ICONV_FREE) {
00309         rb_raise(rb_eArgError, "Iconv expected (%s)", rb_class2name(CLASS_OF(obj)));
00310     }
00311     return (VALUE)DATA_PTR(obj);
00312 }
00313 
00314 static VALUE
00315 iconv_try(iconv_t cd, const char **inptr, size_t *inlen, char **outptr, size_t *outlen)
00316 {
00317 #ifdef ICONV_INPTR_CONST
00318 #define ICONV_INPTR_CAST
00319 #else
00320 #define ICONV_INPTR_CAST (char **)
00321 #endif
00322     size_t ret;
00323 
00324     errno = 0;
00325     ret = iconv(cd, ICONV_INPTR_CAST inptr, inlen, outptr, outlen);
00326     if (ret == (size_t)-1) {
00327         if (!*inlen)
00328             return Qfalse;
00329         switch (errno) {
00330           case E2BIG:
00331             /* try the left in next loop */
00332             break;
00333           case EILSEQ:
00334             return rb_eIconvIllegalSeq;
00335           case EINVAL:
00336             return rb_eIconvInvalidChar;
00337           case 0:
00338             return rb_eIconvBrokenLibrary;
00339           default:
00340             rb_sys_fail("iconv");
00341         }
00342     }
00343     else if (*inlen > 0) {
00344         /* something goes wrong */
00345         return rb_eIconvIllegalSeq;
00346     }
00347     else if (ret) {
00348         return Qnil;            /* conversion */
00349     }
00350     return Qfalse;
00351 }
00352 
00353 #define FAILED_MAXLEN 16
00354 
00355 static VALUE
00356 iconv_failure_initialize(VALUE error, VALUE mesg, VALUE success, VALUE failed)
00357 {
00358     rb_call_super(1, &mesg);
00359     rb_ivar_set(error, rb_success, success);
00360     rb_ivar_set(error, rb_failed, failed);
00361     return error;
00362 }
00363 
00364 static VALUE
00365 iconv_fail(VALUE error, VALUE success, VALUE failed, struct iconv_env_t* env, const char *mesg)
00366 {
00367     VALUE args[3];
00368 
00369     if (mesg && *mesg) {
00370         args[0] = rb_str_new2(mesg);
00371     }
00372     else if (TYPE(failed) != T_STRING || RSTRING_LEN(failed) < FAILED_MAXLEN) {
00373         args[0] = rb_inspect(failed);
00374     }
00375     else {
00376         args[0] = rb_inspect(rb_str_substr(failed, 0, FAILED_MAXLEN));
00377         rb_str_cat2(args[0], "...");
00378     }
00379     args[1] = success;
00380     args[2] = failed;
00381     if (env) {
00382         args[1] = env->append(rb_obj_dup(env->ret), success);
00383         if (env->argc > 0) {
00384             *(env->argv) = failed;
00385             args[2] = rb_ary_new4(env->argc, env->argv);
00386         }
00387     }
00388     return rb_class_new_instance(3, args, error);
00389 }
00390 
00391 static VALUE
00392 iconv_fail_retry(VALUE error, VALUE success, VALUE failed, struct iconv_env_t* env, const char *mesg)
00393 {
00394     error = iconv_fail(error, success, failed, env, mesg);
00395     if (!rb_block_given_p()) rb_exc_raise(error);
00396     rb_set_errinfo(error);
00397     return rb_yield(failed);
00398 }
00399 
00400 static VALUE
00401 rb_str_derive(VALUE str, const char* ptr, long len)
00402 {
00403     VALUE ret;
00404 
00405     if (NIL_P(str))
00406         return rb_str_new(ptr, len);
00407     if (RSTRING_PTR(str) + RSTRING_LEN(str) == ptr + len)
00408         ret = rb_str_subseq(str, ptr - RSTRING_PTR(str), len);
00409     else
00410         ret = rb_str_new(ptr, len);
00411     OBJ_INFECT(ret, str);
00412     return ret;
00413 }
00414 
00415 static VALUE
00416 iconv_convert(iconv_t cd, VALUE str, long start, long length, int toidx, struct iconv_env_t* env)
00417 {
00418     VALUE ret = Qfalse;
00419     VALUE error = Qfalse;
00420     VALUE rescue;
00421     const char *inptr, *instart;
00422     size_t inlen;
00423     /* I believe ONE CHARACTER never exceed this. */
00424     char buffer[BUFSIZ];
00425     char *outptr;
00426     size_t outlen;
00427 
00428     if (cd == (iconv_t)-1)
00429         rb_raise(rb_eArgError, "closed iconv");
00430 
00431     if (NIL_P(str)) {
00432         /* Reset output pointer or something. */
00433         inptr = "";
00434         inlen = 0;
00435         outptr = buffer;
00436         outlen = sizeof(buffer);
00437         error = iconv_try(cd, &inptr, &inlen, &outptr, &outlen);
00438         if (RTEST(error)) {
00439             unsigned int i;
00440             rescue = iconv_fail_retry(error, Qnil, Qnil, env, 0);
00441             if (TYPE(rescue) == T_ARRAY) {
00442                 str = RARRAY_LEN(rescue) > 0 ? RARRAY_PTR(rescue)[0] : Qnil;
00443             }
00444             if (FIXNUM_P(str) && (i = FIX2INT(str)) <= 0xff) {
00445                 char c = i;
00446                 str = rb_str_new(&c, 1);
00447             }
00448             else if (!NIL_P(str)) {
00449                 StringValue(str);
00450             }
00451         }
00452 
00453         inptr = NULL;
00454         length = 0;
00455     }
00456     else {
00457         long slen;
00458 
00459         StringValue(str);
00460         slen = RSTRING_LEN(str);
00461         inptr = RSTRING_PTR(str);
00462 
00463         inptr += start;
00464         if (length < 0 || length > start + slen)
00465             length = slen - start;
00466     }
00467     instart = inptr;
00468     inlen = length;
00469 
00470     do {
00471         char errmsg[50];
00472         const char *tmpstart = inptr;
00473         outptr = buffer;
00474         outlen = sizeof(buffer);
00475 
00476         errmsg[0] = 0;
00477         error = iconv_try(cd, &inptr, &inlen, &outptr, &outlen);
00478 
00479         if (
00480 #if SIGNEDNESS_OF_SIZE_T < 0
00481             0 <= outlen &&
00482 #endif
00483             outlen <= sizeof(buffer)) {
00484             outlen = sizeof(buffer) - outlen;
00485             if (NIL_P(error) || /* something converted */
00486                 outlen > (size_t)(inptr - tmpstart) || /* input can't contain output */
00487                 (outlen < (size_t)(inptr - tmpstart) && inlen > 0) || /* something skipped */
00488                 memcmp(buffer, tmpstart, outlen)) /* something differs */
00489             {
00490                 if (NIL_P(str)) {
00491                     ret = rb_str_new(buffer, outlen);
00492                     if (toidx >= 0) rb_enc_associate_index(ret, toidx);
00493                 }
00494                 else {
00495                     if (ret) {
00496                         ret = rb_str_buf_cat(ret, instart, tmpstart - instart);
00497                     }
00498                     else {
00499                         ret = rb_str_new(instart, tmpstart - instart);
00500                         if (toidx >= 0) rb_enc_associate_index(ret, toidx);
00501                         OBJ_INFECT(ret, str);
00502                     }
00503                     ret = rb_str_buf_cat(ret, buffer, outlen);
00504                     instart = inptr;
00505                 }
00506             }
00507             else if (!inlen) {
00508                 inptr = tmpstart + outlen;
00509             }
00510         }
00511         else {
00512             /* Some iconv() have a bug, return *outlen out of range */
00513             sprintf(errmsg, "bug?(output length = %ld)", (long)(sizeof(buffer) - outlen));
00514             error = rb_eIconvOutOfRange;
00515         }
00516 
00517         if (RTEST(error)) {
00518             long len = 0;
00519 
00520             if (!ret) {
00521                 ret = rb_str_derive(str, instart, inptr - instart);
00522                 if (toidx >= 0) rb_enc_associate_index(ret, toidx);
00523             }
00524             else if (inptr > instart) {
00525                 rb_str_cat(ret, instart, inptr - instart);
00526             }
00527             str = rb_str_derive(str, inptr, inlen);
00528             rescue = iconv_fail_retry(error, ret, str, env, errmsg);
00529             if (TYPE(rescue) == T_ARRAY) {
00530                 if ((len = RARRAY_LEN(rescue)) > 0)
00531                     rb_str_concat(ret, RARRAY_PTR(rescue)[0]);
00532                 if (len > 1 && !NIL_P(str = RARRAY_PTR(rescue)[1])) {
00533                     StringValue(str);
00534                     inlen = length = RSTRING_LEN(str);
00535                     instart = inptr = RSTRING_PTR(str);
00536                     continue;
00537                 }
00538             }
00539             else if (!NIL_P(rescue)) {
00540                 rb_str_concat(ret, rescue);
00541             }
00542             break;
00543         }
00544     } while (inlen > 0);
00545 
00546     if (!ret) {
00547         ret = rb_str_derive(str, instart, inptr - instart);
00548         if (toidx >= 0) rb_enc_associate_index(ret, toidx);
00549     }
00550     else if (inptr > instart) {
00551         rb_str_cat(ret, instart, inptr - instart);
00552     }
00553     return ret;
00554 }
00555 
00556 static VALUE
00557 iconv_s_allocate(VALUE klass)
00558 {
00559     return Data_Wrap_Struct(klass, 0, ICONV_FREE, 0);
00560 }
00561 
00562 static VALUE
00563 get_iconv_opt_i(VALUE i, VALUE arg)
00564 {
00565     struct rb_iconv_opt_t *opt = (struct rb_iconv_opt_t *)arg;
00566     VALUE name, val;
00567 
00568     (void)opt;
00569     i = rb_Array(i);
00570     name = rb_ary_entry(i, 0);
00571     val = rb_ary_entry(i, 1);
00572     do {
00573         if (SYMBOL_P(name)) {
00574             ID id = SYM2ID(name);
00575             if (id == id_transliterate) {
00576 #ifdef ICONV_SET_TRANSLITERATE
00577                 opt->transliterate = val;
00578 #else
00579                 rb_notimplement();
00580 #endif
00581                 break;
00582             }
00583             if (id == id_discard_ilseq) {
00584 #ifdef ICONV_SET_DISCARD_ILSEQ
00585                 opt->discard_ilseq = val;
00586 #else
00587                 rb_notimplement();
00588 #endif
00589                 break;
00590             }
00591         }
00592         else {
00593             const char *s = StringValueCStr(name);
00594             if (strcmp(s, "transliterate") == 0) {
00595 #ifdef ICONV_SET_TRANSLITERATE
00596                 opt->transliterate = val;
00597 #else
00598                 rb_notimplement();
00599 #endif
00600                 break;
00601             }
00602             if (strcmp(s, "discard_ilseq") == 0) {
00603 #ifdef ICONV_SET_DISCARD_ILSEQ
00604                 opt->discard_ilseq = val;
00605 #else
00606                 rb_notimplement();
00607 #endif
00608                 break;
00609             }
00610         }
00611         name = rb_inspect(name);
00612         rb_raise(rb_eArgError, "unknown option - %s", StringValueCStr(name));
00613     } while (0);
00614     return Qnil;
00615 }
00616 
00617 static void
00618 get_iconv_opt(struct rb_iconv_opt_t *opt, VALUE options)
00619 {
00620     opt->transliterate = Qundef;
00621     opt->discard_ilseq = Qundef;
00622     if (!NIL_P(options)) {
00623         rb_block_call(options, rb_intern("each"), 0, 0, get_iconv_opt_i, (VALUE)opt);
00624     }
00625 }
00626 
00627 #define iconv_ctl(self, func, val) (\
00628         iconvctl(VALUE2ICONV(check_iconv(self)), func, (void *)&(val)) ? \
00629         rb_sys_fail(#func) : (void)0)
00630 
00631 /*
00632  * Document-method: new
00633  * call-seq: Iconv.new(to, from, [options])
00634  *
00635  * Creates new code converter from a coding-system designated with +from+
00636  * to another one designated with +to+.
00637  *
00638  * === Parameters
00639  *
00640  * +to+::   encoding name for destination
00641  * +from+:: encoding name for source
00642  * +options+:: options for converter
00643  *
00644  * === Exceptions
00645  *
00646  * TypeError::       if +to+ or +from+ aren't String
00647  * InvalidEncoding:: if designated converter couldn't find out
00648  * SystemCallError:: if <tt>iconv_open(3)</tt> fails
00649  */
00650 static VALUE
00651 iconv_initialize(int argc, VALUE *argv, VALUE self)
00652 {
00653     VALUE to, from, options;
00654     struct rb_iconv_opt_t opt;
00655     int idx;
00656 
00657     rb_scan_args(argc, argv, "21", &to, &from, &options);
00658     get_iconv_opt(&opt, options);
00659     iconv_free(check_iconv(self));
00660     DATA_PTR(self) = NULL;
00661     DATA_PTR(self) = (void *)ICONV2VALUE(iconv_create(to, from, &opt, &idx));
00662     if (idx >= 0) ENCODING_SET(self, idx);
00663     return self;
00664 }
00665 
00666 /*
00667  * Document-method: open
00668  * call-seq: Iconv.open(to, from) { |iconv| ... }
00669  *
00670  * Equivalent to Iconv.new except that when it is called with a block, it
00671  * yields with the new instance and closes it, and returns the result which
00672  * returned from the block.
00673  */
00674 static VALUE
00675 iconv_s_open(int argc, VALUE *argv, VALUE self)
00676 {
00677     VALUE to, from, options, cd;
00678     struct rb_iconv_opt_t opt;
00679     int idx;
00680 
00681     rb_scan_args(argc, argv, "21", &to, &from, &options);
00682     get_iconv_opt(&opt, options);
00683     cd = ICONV2VALUE(iconv_create(to, from, &opt, &idx));
00684 
00685     self = Data_Wrap_Struct(self, NULL, ICONV_FREE, (void *)cd);
00686     if (idx >= 0) ENCODING_SET(self, idx);
00687 
00688     if (rb_block_given_p()) {
00689         return rb_ensure(rb_yield, self, (VALUE(*)())iconv_finish, self);
00690     }
00691     else {
00692         return self;
00693     }
00694 }
00695 
00696 static VALUE
00697 iconv_s_convert(struct iconv_env_t* env)
00698 {
00699     VALUE last = 0;
00700 
00701     for (; env->argc > 0; --env->argc, ++env->argv) {
00702         VALUE s = iconv_convert(env->cd, last = *(env->argv),
00703                                 0, -1, env->toidx, env);
00704         env->append(env->ret, s);
00705     }
00706 
00707     if (!NIL_P(last)) {
00708         VALUE s = iconv_convert(env->cd, Qnil, 0, 0, env->toidx, env);
00709         if (RSTRING_LEN(s))
00710             env->append(env->ret, s);
00711     }
00712 
00713     return env->ret;
00714 }
00715 
00716 /*
00717  * Document-method: Iconv::iconv
00718  * call-seq: Iconv.iconv(to, from, *strs)
00719  *
00720  * Shorthand for
00721  *   Iconv.open(to, from) { |cd|
00722  *     (strs + [nil]).collect { |s| cd.iconv(s) }
00723  *   }
00724  *
00725  * === Parameters
00726  *
00727  * <tt>to, from</tt>:: see Iconv.new
00728  * <tt>strs</tt>:: strings to be converted
00729  *
00730  * === Exceptions
00731  *
00732  * Exceptions thrown by Iconv.new, Iconv.open and Iconv#iconv.
00733  */
00734 static VALUE
00735 iconv_s_iconv(int argc, VALUE *argv, VALUE self)
00736 {
00737     struct iconv_env_t arg;
00738 
00739     if (argc < 2)               /* needs `to' and `from' arguments at least */
00740         rb_raise(rb_eArgError, "wrong number of arguments (%d for %d)", argc, 2);
00741 
00742     arg.argc = argc -= 2;
00743     arg.argv = argv + 2;
00744     arg.append = rb_ary_push;
00745     arg.ret = rb_ary_new2(argc);
00746     arg.cd = iconv_create(argv[0], argv[1], NULL, &arg.toidx);
00747     return rb_ensure(iconv_s_convert, (VALUE)&arg, iconv_free, ICONV2VALUE(arg.cd));
00748 }
00749 
00750 /*
00751  * Document-method: Iconv::conv
00752  * call-seq: Iconv.conv(to, from, str)
00753  *
00754  * Shorthand for
00755  *   Iconv.iconv(to, from, str).join
00756  * See Iconv.iconv.
00757  */
00758 static VALUE
00759 iconv_s_conv(VALUE self, VALUE to, VALUE from, VALUE str)
00760 {
00761     struct iconv_env_t arg;
00762 
00763     arg.argc = 1;
00764     arg.argv = &str;
00765     arg.append = rb_str_append;
00766     arg.ret = rb_str_new(0, 0);
00767     arg.cd = iconv_create(to, from, NULL, &arg.toidx);
00768     return rb_ensure(iconv_s_convert, (VALUE)&arg, iconv_free, ICONV2VALUE(arg.cd));
00769 }
00770 
00771 /*
00772  * Document-method: list
00773  * call-seq: Iconv.list {|*aliases| ... }
00774  *
00775  * Iterates each alias sets.
00776  */
00777 
00778 #ifdef HAVE_ICONVLIST
00779 struct iconv_name_list
00780 {
00781     unsigned int namescount;
00782     const char *const *names;
00783     VALUE array;
00784 };
00785 
00786 static VALUE
00787 list_iconv_i(VALUE ptr)
00788 {
00789     struct iconv_name_list *p = (struct iconv_name_list *)ptr;
00790     unsigned int i, namescount = p->namescount;
00791     const char *const *names = p->names;
00792     VALUE ary = rb_ary_new2(namescount);
00793 
00794     for (i = 0; i < namescount; i++) {
00795         rb_ary_push(ary, rb_str_new2(names[i]));
00796     }
00797     if (p->array) {
00798         return rb_ary_push(p->array, ary);
00799     }
00800     return rb_yield(ary);
00801 }
00802 
00803 static int
00804 list_iconv(unsigned int namescount, const char *const *names, void *data)
00805 {
00806     int *state = data;
00807     struct iconv_name_list list;
00808 
00809     list.namescount = namescount;
00810     list.names = names;
00811     list.array = ((VALUE *)data)[1];
00812     rb_protect(list_iconv_i, (VALUE)&list, state);
00813     return *state;
00814 }
00815 #endif
00816 
00817 #if defined(HAVE_ICONVLIST) || defined(HAVE___ICONV_FREE_LIST)
00818 static VALUE
00819 iconv_s_list(void)
00820 {
00821 #ifdef HAVE_ICONVLIST
00822     int state;
00823     VALUE args[2];
00824 
00825     args[1] = rb_block_given_p() ? 0 : rb_ary_new();
00826     iconvlist(list_iconv, args);
00827     state = *(int *)args;
00828     if (state) rb_jump_tag(state);
00829     if (args[1]) return args[1];
00830 #elif defined(HAVE___ICONV_FREE_LIST)
00831     char **list;
00832     size_t sz, i;
00833     VALUE ary;
00834 
00835     if (__iconv_get_list(&list, &sz)) return Qnil;
00836 
00837     ary = rb_ary_new2(sz);
00838     for (i = 0; i < sz; i++) {
00839         rb_ary_push(ary, rb_str_new2(list[i]));
00840     }
00841     __iconv_free_list(list, sz);
00842 
00843     if (!rb_block_given_p())
00844         return ary;
00845     for (i = 0; i < RARRAY_LEN(ary); i++) {
00846         rb_yield(RARRAY_PTR(ary)[i]);
00847     }
00848 #endif
00849     return Qnil;
00850 }
00851 #else
00852 #define iconv_s_list rb_f_notimplement
00853 #endif
00854 
00855 /*
00856  * Document-method: close
00857  *
00858  * Finishes conversion.
00859  *
00860  * After calling this, calling Iconv#iconv will cause an exception, but
00861  * multiple calls of #close are guaranteed to end successfully.
00862  *
00863  * Returns a string containing the byte sequence to change the output buffer to
00864  * its initial shift state.
00865  */
00866 static VALUE
00867 iconv_init_state(VALUE self)
00868 {
00869     iconv_t cd = VALUE2ICONV((VALUE)DATA_PTR(self));
00870     DATA_PTR(self) = NULL;
00871     return iconv_convert(cd, Qnil, 0, 0, ENCODING_GET(self), NULL);
00872 }
00873 
00874 static VALUE
00875 iconv_finish(VALUE self)
00876 {
00877     VALUE cd = check_iconv(self);
00878 
00879     if (!cd) return Qnil;
00880     return rb_ensure(iconv_init_state, self, iconv_free, cd);
00881 }
00882 
00883 /*
00884  * Document-method: Iconv#iconv
00885  * call-seq: iconv(str, start=0, length=-1)
00886  *
00887  * Converts string and returns the result.
00888  * * If +str+ is a String, converts <tt>str[start, length]</tt> and returns the converted string.
00889  * * If +str+ is +nil+, places converter itself into initial shift state and
00890  *   just returns a string containing the byte sequence to change the output
00891  *   buffer to its initial shift state.
00892  * * Otherwise, raises an exception.
00893  *
00894  * === Parameters
00895  *
00896  * str::    string to be converted, or nil
00897  * start::  starting offset
00898  * length:: conversion length; nil or -1 means whole the string from start
00899  *
00900  * === Exceptions
00901  *
00902  * * IconvIllegalSequence
00903  * * IconvInvalidCharacter
00904  * * IconvOutOfRange
00905  *
00906  * === Examples
00907  *
00908  * See the Iconv documentation.
00909  */
00910 static VALUE
00911 iconv_iconv(int argc, VALUE *argv, VALUE self)
00912 {
00913     VALUE str, n1, n2;
00914     VALUE cd = check_iconv(self);
00915     long start = 0, length = 0, slen = 0;
00916 
00917     rb_scan_args(argc, argv, "12", &str, &n1, &n2);
00918     if (!NIL_P(str)) {
00919         VALUE n = rb_str_length(StringValue(str));
00920         slen = NUM2LONG(n);
00921     }
00922     if (argc != 2 || !RTEST(rb_range_beg_len(n1, &start, &length, slen, 0))) {
00923         if (NIL_P(n1) || ((start = NUM2LONG(n1)) < 0 ? (start += slen) >= 0 : start < slen)) {
00924             length = NIL_P(n2) ? -1 : NUM2LONG(n2);
00925         }
00926     }
00927     if (start > 0 || length > 0) {
00928         rb_encoding *enc = rb_enc_get(str);
00929         const char *s = RSTRING_PTR(str), *e = s + RSTRING_LEN(str);
00930         const char *ps = s;
00931         if (start > 0) {
00932             start = (ps = rb_enc_nth(s, e, start, enc)) - s;
00933         }
00934         if (length > 0) {
00935             length = rb_enc_nth(ps, e, length, enc) - ps;
00936         }
00937     }
00938 
00939     return iconv_convert(VALUE2ICONV(cd), str, start, length, ENCODING_GET(self), NULL);
00940 }
00941 
00942 /*
00943  * Document-method: conv
00944  * call-seq: conv(str...)
00945  *
00946  * Equivalent to
00947  *
00948  *   iconv(nil, str..., nil).join
00949  */
00950 static VALUE
00951 iconv_conv(int argc, VALUE *argv, VALUE self)
00952 {
00953     iconv_t cd = VALUE2ICONV(check_iconv(self));
00954     VALUE str, s;
00955     int toidx = ENCODING_GET(self);
00956 
00957     str = iconv_convert(cd, Qnil, 0, 0, toidx, NULL);
00958     if (argc > 0) {
00959         do {
00960             s = iconv_convert(cd, *argv++, 0, -1, toidx, NULL);
00961             if (RSTRING_LEN(s))
00962                 rb_str_buf_append(str, s);
00963         } while (--argc);
00964         s = iconv_convert(cd, Qnil, 0, 0, toidx, NULL);
00965         if (RSTRING_LEN(s))
00966             rb_str_buf_append(str, s);
00967     }
00968 
00969     return str;
00970 }
00971 
00972 #ifdef ICONV_TRIVIALP
00973 /*
00974  * Document-method: trivial?
00975  * call-seq: trivial?
00976  *
00977  * Returns trivial flag.
00978  */
00979 static VALUE
00980 iconv_trivialp(VALUE self)
00981 {
00982     int trivial = 0;
00983     iconv_ctl(self, ICONV_TRIVIALP, trivial);
00984     if (trivial) return Qtrue;
00985     return Qfalse;
00986 }
00987 #else
00988 #define iconv_trivialp rb_f_notimplement
00989 #endif
00990 
00991 #ifdef ICONV_GET_TRANSLITERATE
00992 /*
00993  * Document-method: transliterate?
00994  * call-seq: transliterate?
00995  *
00996  * Returns transliterate flag.
00997  */
00998 static VALUE
00999 iconv_get_transliterate(VALUE self)
01000 {
01001     int trans = 0;
01002     iconv_ctl(self, ICONV_GET_TRANSLITERATE, trans);
01003     if (trans) return Qtrue;
01004     return Qfalse;
01005 }
01006 #else
01007 #define iconv_get_transliterate rb_f_notimplement
01008 #endif
01009 
01010 #ifdef ICONV_SET_TRANSLITERATE
01011 /*
01012  * Document-method: transliterate=
01013  * call-seq: cd.transliterate = flag
01014  *
01015  * Sets transliterate flag.
01016  */
01017 static VALUE
01018 iconv_set_transliterate(VALUE self, VALUE transliterate)
01019 {
01020     int trans = RTEST(transliterate);
01021     iconv_ctl(self, ICONV_SET_TRANSLITERATE, trans);
01022     return self;
01023 }
01024 #else
01025 #define iconv_set_transliterate rb_f_notimplement
01026 #endif
01027 
01028 #ifdef ICONV_GET_DISCARD_ILSEQ
01029 /*
01030  * Document-method: discard_ilseq?
01031  * call-seq: discard_ilseq?
01032  *
01033  * Returns discard_ilseq flag.
01034  */
01035 static VALUE
01036 iconv_get_discard_ilseq(VALUE self)
01037 {
01038     int dis = 0;
01039     iconv_ctl(self, ICONV_GET_DISCARD_ILSEQ, dis);
01040     if (dis) return Qtrue;
01041     return Qfalse;
01042 }
01043 #else
01044 #define iconv_get_discard_ilseq rb_f_notimplement
01045 #endif
01046 
01047 #ifdef ICONV_SET_DISCARD_ILSEQ
01048 /*
01049  * Document-method: discard_ilseq=
01050  * call-seq: cd.discard_ilseq = flag
01051  *
01052  * Sets discard_ilseq flag.
01053  */
01054 static VALUE
01055 iconv_set_discard_ilseq(VALUE self, VALUE discard_ilseq)
01056 {
01057     int dis = RTEST(discard_ilseq);
01058     iconv_ctl(self, ICONV_SET_DISCARD_ILSEQ, dis);
01059     return self;
01060 }
01061 #else
01062 #define iconv_set_discard_ilseq rb_f_notimplement
01063 #endif
01064 
01065 /*
01066  * Document-method: ctlmethods
01067  * call-seq: Iconv.ctlmethods => array
01068  *
01069  * Returns available iconvctl() method list.
01070  */
01071 static VALUE
01072 iconv_s_ctlmethods(VALUE klass)
01073 {
01074     VALUE ary = rb_ary_new();
01075 #ifdef ICONV_TRIVIALP
01076     rb_ary_push(ary, ID2SYM(rb_intern("trivial?")));
01077 #endif
01078 #ifdef ICONV_GET_TRANSLITERATE
01079     rb_ary_push(ary, ID2SYM(rb_intern("transliterate?")));
01080 #endif
01081 #ifdef ICONV_SET_TRANSLITERATE
01082     rb_ary_push(ary, ID2SYM(rb_intern("transliterate=")));
01083 #endif
01084 #ifdef ICONV_GET_DISCARD_ILSEQ
01085     rb_ary_push(ary, ID2SYM(rb_intern("discard_ilseq?")));
01086 #endif
01087 #ifdef ICONV_SET_DISCARD_ILSEQ
01088     rb_ary_push(ary, ID2SYM(rb_intern("discard_ilseq=")));
01089 #endif
01090     return ary;
01091 }
01092 
01093 /*
01094  * Document-class: Iconv::Failure
01095  *
01096  * Base attributes for Iconv exceptions.
01097  */
01098 
01099 /*
01100  * Document-method: success
01101  * call-seq: success
01102  *
01103  * Returns string(s) translated successfully until the exception occurred.
01104  * * In the case of failure occurred within Iconv.iconv, returned
01105  *   value is an array of strings translated successfully preceding
01106  *   failure and the last element is string on the way.
01107  */
01108 static VALUE
01109 iconv_failure_success(VALUE self)
01110 {
01111     return rb_attr_get(self, rb_success);
01112 }
01113 
01114 /*
01115  * Document-method: failed
01116  * call-seq: failed
01117  *
01118  * Returns substring of the original string passed to Iconv that starts at the
01119  * character caused the exception.
01120  */
01121 static VALUE
01122 iconv_failure_failed(VALUE self)
01123 {
01124     return rb_attr_get(self, rb_failed);
01125 }
01126 
01127 /*
01128  * Document-method: inspect
01129  * call-seq: inspect
01130  *
01131  * Returns inspected string like as: #<_class_: _success_, _failed_>
01132  */
01133 static VALUE
01134 iconv_failure_inspect(VALUE self)
01135 {
01136     const char *cname = rb_class2name(CLASS_OF(self));
01137     VALUE success = rb_attr_get(self, rb_success);
01138     VALUE failed = rb_attr_get(self, rb_failed);
01139     VALUE str = rb_str_buf_cat2(rb_str_new2("#<"), cname);
01140     str = rb_str_buf_cat(str, ": ", 2);
01141     str = rb_str_buf_append(str, rb_inspect(success));
01142     str = rb_str_buf_cat(str, ", ", 2);
01143     str = rb_str_buf_append(str, rb_inspect(failed));
01144     return rb_str_buf_cat(str, ">", 1);
01145 }
01146 
01147 /*
01148  * Document-class: Iconv::InvalidEncoding
01149  *
01150  * Requested coding-system is not available on this system.
01151  */
01152 
01153 /*
01154  * Document-class: Iconv::IllegalSequence
01155  *
01156  * Input conversion stopped due to an input byte that does not belong to
01157  * the input codeset, or the output codeset does not contain the
01158  * character.
01159  */
01160 
01161 /*
01162  * Document-class: Iconv::InvalidCharacter
01163  *
01164  * Input conversion stopped due to an incomplete character or shift
01165  * sequence at the end of the input buffer.
01166  */
01167 
01168 /*
01169  * Document-class: Iconv::OutOfRange
01170  *
01171  * Iconv library internal error.  Must not occur.
01172  */
01173 
01174 /*
01175  * Document-class: Iconv::BrokenLibrary
01176  *
01177  * Detected a bug of underlying iconv(3) libray.
01178  * * returns an error without setting errno properly
01179  */
01180 
01181 void
01182 Init_iconv(void)
01183 {
01184     VALUE rb_cIconv = rb_define_class("Iconv", rb_cData);
01185 
01186     rb_define_alloc_func(rb_cIconv, iconv_s_allocate);
01187     rb_define_singleton_method(rb_cIconv, "open", iconv_s_open, -1);
01188     rb_define_singleton_method(rb_cIconv, "iconv", iconv_s_iconv, -1);
01189     rb_define_singleton_method(rb_cIconv, "conv", iconv_s_conv, 3);
01190     rb_define_singleton_method(rb_cIconv, "list", iconv_s_list, 0);
01191     rb_define_singleton_method(rb_cIconv, "ctlmethods", iconv_s_ctlmethods, 0);
01192     rb_define_method(rb_cIconv, "initialize", iconv_initialize, -1);
01193     rb_define_method(rb_cIconv, "close", iconv_finish, 0);
01194     rb_define_method(rb_cIconv, "iconv", iconv_iconv, -1);
01195     rb_define_method(rb_cIconv, "conv", iconv_conv, -1);
01196     rb_define_method(rb_cIconv, "trivial?", iconv_trivialp, 0);
01197     rb_define_method(rb_cIconv, "transliterate?", iconv_get_transliterate, 0);
01198     rb_define_method(rb_cIconv, "transliterate=", iconv_set_transliterate, 1);
01199     rb_define_method(rb_cIconv, "discard_ilseq?", iconv_get_discard_ilseq, 0);
01200     rb_define_method(rb_cIconv, "discard_ilseq=", iconv_set_discard_ilseq, 1);
01201 
01202     rb_eIconvFailure = rb_define_module_under(rb_cIconv, "Failure");
01203     rb_define_method(rb_eIconvFailure, "initialize", iconv_failure_initialize, 3);
01204     rb_define_method(rb_eIconvFailure, "success", iconv_failure_success, 0);
01205     rb_define_method(rb_eIconvFailure, "failed", iconv_failure_failed, 0);
01206     rb_define_method(rb_eIconvFailure, "inspect", iconv_failure_inspect, 0);
01207 
01208     rb_eIconvInvalidEncoding = rb_define_class_under(rb_cIconv, "InvalidEncoding", rb_eArgError);
01209     rb_eIconvIllegalSeq = rb_define_class_under(rb_cIconv, "IllegalSequence", rb_eArgError);
01210     rb_eIconvInvalidChar = rb_define_class_under(rb_cIconv, "InvalidCharacter", rb_eArgError);
01211     rb_eIconvOutOfRange = rb_define_class_under(rb_cIconv, "OutOfRange", rb_eRuntimeError);
01212     rb_eIconvBrokenLibrary = rb_define_class_under(rb_cIconv, "BrokenLibrary", rb_eRuntimeError);
01213     rb_include_module(rb_eIconvInvalidEncoding, rb_eIconvFailure);
01214     rb_include_module(rb_eIconvIllegalSeq, rb_eIconvFailure);
01215     rb_include_module(rb_eIconvInvalidChar, rb_eIconvFailure);
01216     rb_include_module(rb_eIconvOutOfRange, rb_eIconvFailure);
01217     rb_include_module(rb_eIconvBrokenLibrary, rb_eIconvFailure);
01218 
01219     rb_success = rb_intern("success");
01220     rb_failed = rb_intern("failed");
01221     id_transliterate = rb_intern("transliterate");
01222     id_discard_ilseq = rb_intern("discard_ilseq");
01223 
01224     rb_gc_register_address(&charset_map);
01225     charset_map = rb_hash_new();
01226     rb_define_singleton_method(rb_cIconv, "charset_map", charset_map_get, 0);
01227 }
01228 
01229 

Generated on Wed Aug 10 09:17:00 2011 for Ruby by  doxygen 1.4.7