X-Git-Url: http://git.madism.org/?p=apps%2Fmadmutt.git;a=blobdiff_plain;f=charset.c;h=29aa77662b3e9d208e7af623fddefb83529c340e;hp=bbaf8ca7a6ccb1bdc7b138774d2908ef610939cd;hb=916e4872caf252a5850e64f79427b9dd7808435d;hpb=2c56b665394c80195b976537e608b690947fcb14 diff --git a/charset.c b/charset.c index bbaf8ca..29aa776 100644 --- a/charset.c +++ b/charset.c @@ -1,3 +1,21 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or (at + * your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + * + * Copyright © 2006 Pierre Habouzit + */ /* * Copyright notice from original mutt: * Copyright (C) 1999-2000 Thomas Roessler @@ -7,29 +25,12 @@ * please see the file GPL in the top level source directory. */ -#if HAVE_CONFIG_H -# include "config.h" -#endif - -#include -#include -#include +#include -#include - -#include -#include -#include -#include #ifdef HAVE_LANGINFO_CODESET # include #endif -#include -#include -#include -#include - #include "mutt.h" #include "charset.h" @@ -39,567 +40,431 @@ char *Charset; int Charset_is_utf8 = 0; +wchar_t CharsetReplacement = '?'; -/* - * The following list has been created manually from the data under: - * http://www.isi.edu/in-notes/iana/assignments/character-sets - * Last update: 2000-09-07 - * - * Note that it includes only the subset of character sets for which - * a preferred MIME name is given. - */ -static struct { - const char *key; - const char *pref; -} PreferredMIMENames[] = { - {"ansi_x3.4-1968", "us-ascii"}, - {"iso-ir-6", "us-ascii"}, - {"iso_646.irv:1991", "us-ascii"}, - {"ascii", "us-ascii"}, - {"iso646-us", "us-ascii"}, - {"us", "us-ascii"}, - {"ibm367", "us-ascii"}, - {"cp367", "us-ascii"}, - {"csascii", "us-ascii"}, - {"csiso2022kr", "iso-2022-kr"}, - {"cseuckr", "euc-kr"}, - {"csiso2022jp", "iso-2022-jp"}, - {"csiso2022jp2", "iso-2022-jp-2"}, - {"iso_8859-1:1987", "iso-8859-1"}, - {"iso-ir-100", "iso-8859-1"}, - {"iso_8859-1", "iso-8859-1"}, - {"latin1", "iso-8859-1"}, - {"l1", "iso-8859-1"}, - {"ibm819", "iso-8859-1"}, - {"cp819", "iso-8859-1"}, - {"csisolatin1", "iso-8859-1"}, - {"iso_8859-2:1987", "iso-8859-2"}, - {"iso-ir-101", "iso-8859-2"}, - {"iso_8859-2", "iso-8859-2"}, - {"latin2", "iso-8859-2"}, - {"l2", "iso-8859-2"}, - {"csisolatin2", "iso-8859-2"}, - {"iso_8859-3:1988", "iso-8859-3"}, - {"iso-ir-109", "iso-8859-3"}, - {"iso_8859-3", "iso-8859-3"}, - {"latin3", "iso-8859-3"}, - {"l3", "iso-8859-3"}, - {"csisolatin3", "iso-8859-3"}, - {"iso_8859-4:1988", "iso-8859-4"}, - {"iso-ir-110", "iso-8859-4"}, - {"iso_8859-4", "iso-8859-4"}, - {"latin4", "iso-8859-4"}, - {"l4", "iso-8859-4"}, - {"csisolatin4", "iso-8859-4"}, - {"iso_8859-6:1987", "iso-8859-6"}, - {"iso-ir-127", "iso-8859-6"}, - {"iso_8859-6", "iso-8859-6"}, - {"ecma-114", "iso-8859-6"}, - {"asmo-708", "iso-8859-6"}, - {"arabic", "iso-8859-6"}, - {"csisolatinarabic", "iso-8859-6"}, - {"iso_8859-7:1987", "iso-8859-7"}, - {"iso-ir-126", "iso-8859-7"}, - {"iso_8859-7", "iso-8859-7"}, - {"elot_928", "iso-8859-7"}, - {"ecma-118", "iso-8859-7"}, - {"greek", "iso-8859-7"}, - {"greek8", "iso-8859-7"}, - {"csisolatingreek", "iso-8859-7"}, - {"iso_8859-8:1988", "iso-8859-8"}, - {"iso-ir-138", "iso-8859-8"}, - {"iso_8859-8", "iso-8859-8"}, - {"hebrew", "iso-8859-8"}, - {"csisolatinhebrew", "iso-8859-8"}, - {"iso_8859-5:1988", "iso-8859-5"}, - {"iso-ir-144", "iso-8859-5"}, - {"iso_8859-5", "iso-8859-5"}, - {"cyrillic", "iso-8859-5"}, - {"csisolatincyrillic", "iso8859-5"}, - {"iso_8859-9:1989", "iso-8859-9"}, - {"iso-ir-148", "iso-8859-9"}, - {"iso_8859-9", "iso-8859-9"}, - {"latin5", "iso-8859-9"}, - {"l5", "iso-8859-9"}, - {"csisolatin5", "iso-8859-9"}, - {"iso_8859-10:1992", "iso-8859-10"}, - {"iso-ir-157", "iso-8859-10"}, - {"latin6", "iso-8859-10"}, - {"l6", "iso-8859-10"}, - {"csisolatin6", "iso-8859-10"}, - {"cskoi8r", "koi8-r"}, - {"ms_kanji", "shift_jis"}, - {"csshiftjis", "shift_jis"}, - {"extended_unix_code_packed_format_for_japanese", "euc-jp"}, - {"cseucpkdfmtjapanese", "euc-jp"}, - {"csgb2312", "gb2312"}, - {"csbig5", "big5"}, - /* end of official brain damage. - what follows has been taken * from glibc's localedata files. */ - {"iso_8859-13", "iso-8859-13"}, - {"iso-ir-179", "iso-8859-13"}, - {"latin7", "iso-8859-13"}, - {"l7", "iso-8859-13"}, - {"iso_8859-14", "iso-8859-14"}, - {"latin8", "iso-8859-14"}, - {"l8", "iso-8859-14"}, - {"iso_8859-15", "iso-8859-15"}, - {"latin9", "iso-8859-15"}, - {"latin0", "iso-8859-15"}, - {"iso_8859-16", "iso-8859-16"}, - {"latin10", "iso-8859-16"}, - {"646", "us-ascii"}, - {"eucjp", "euc-jp"}, - {"pck", "shift_jis"}, - {"ko_kr-euc", "euc-kr"}, - {"zh_tw-big5", "big5"}, - {"sjis", "shift_jis"}, - {"euc-jp-ms", "eucjp-ms"}, - {NULL, NULL} -}; +/****************************************************************************/ +/* charset functions */ +/****************************************************************************/ -void mutt_set_langinfo_charset (void) +void charset_initialize(void) { #ifdef HAVE_LANGINFO_CODESET - char buff[LONG_STRING]; - char buff2[LONG_STRING]; + char buff[SHORT_STRING]; + char buff2[SHORT_STRING]; m_strcpy(buff, sizeof(buff), nl_langinfo(CODESET)); - mutt_canonical_charset(buff2, sizeof(buff2), buff); + charset_canonicalize(buff2, sizeof(buff2), buff); /* finally, set $charset */ - if (!(Charset = m_strdup(buff2))) + if (!m_strisempty(buff2)) { + m_strreplace(&Charset, buff2); + } else #endif - Charset = m_strdup("iso-8859-1"); -} - + { + m_strreplace(&Charset, "iso-8859-1"); + } -void mutt_canonical_charset(char *dest, ssize_t dlen, const char *name) -{ - ssize_t i; - char *p; - char scratch[LONG_STRING]; + Charset_is_utf8 = !strcmp(Charset, "utf-8"); + CharsetReplacement = Charset_is_utf8 ? 0xfffd : '?'; - m_strcpy(scratch, sizeof(scratch), name); - m_strtolower(scratch); +#ifdef HAVE_BIND_TEXTDOMAIN_CODESET + bind_textdomain_codeset(PACKAGE, Charset); +#endif +} - /* catch some common iso-8859-something misspellings */ - if (!strncmp(scratch, "8859", 4)) { - snprintf(scratch, sizeof(scratch), "iso-8859-%s", - name + 4 + (name[4] == '-')); - m_strtolower(scratch); - } else - if (!strncmp(scratch, "iso8859", 7)) { - snprintf(scratch, sizeof(scratch), "iso-8859-%s", - name + 7 + (name[7] == '-')); - m_strtolower(scratch); +#include "charset.gperf" +void charset_canonicalize(char *dest, ssize_t dlen, const char *name) +{ + const struct cset_pair *cp; + char scratch[SHORT_STRING]; + const char *p; + int i = 0; + + if (!name) { + m_strcpy(dest, dlen, "us-ascii"); + return; } - for (i = 0; PreferredMIMENames[i].key; i++) { - if (!strcmp(scratch, PreferredMIMENames[i].key)) { - m_strcpy(dest, dlen, PreferredMIMENames[i].pref); - return; + // canonize name: only keep a-z0-9 and dots, put into lowercase + for (p = name; *p && *p != ':' && i < ssizeof(scratch) - 1; p++) { + if (isalnum(*p) || *p== '.') { + scratch[i++] = tolower((unsigned char)*p); } } - - m_strcpy(dest, dlen, scratch); + scratch[i] = '\0'; + + cp = charset_canonicalize_aux(scratch, strlen(scratch)); + if (cp) { + m_strcpy(dest, dlen, cp->pref); + } else { + m_strcpy(dest, dlen, name); + m_strtolower(dest); + } } -static int mutt_chscmp(const char *s, const char *chs) +/* XXX: MC: UGLY return of local static */ +const char *charset_getfirst(const char *charset) { - char buffer[STRING]; + static char fcharset[SHORT_STRING]; + const char *p; - if (!s) - return 0; + if (m_strisempty(charset)) + return "us-ascii"; - mutt_canonical_charset(buffer, sizeof(buffer), s); - return !strcmp(buffer, chs); + p = m_strchrnul(charset, ':'); + m_strncpy(fcharset, sizeof(fcharset), charset, p - charset); + return fcharset; } -int mutt_is_utf8(const char *s) +int charset_is_utf8(const char *s) { - return mutt_chscmp(s, "utf-8"); + char buf[SHORT_STRING]; + charset_canonicalize(buf, sizeof(buf), s); + return !strcmp(buf, "utf-8"); } -int mutt_is_us_ascii(const char *s) +int charset_is_us_ascii(const char *s) { - return mutt_chscmp(s, "us-ascii"); + char buf[SHORT_STRING]; + charset_canonicalize(buf, sizeof(buf), s); + return !strcmp(buf, "us-ascii"); } -/* - * Like iconv_open, but canonicalises the charsets - */ +/****************************************************************************/ +/* iconv-line functions */ +/****************************************************************************/ -iconv_t mutt_iconv_open (const char *tocode, const char *fromcode, int flags) +/* Like iconv_open, but canonicalises the charsets */ +iconv_t mutt_iconv_open(const char *tocode, const char *fromcode, int flags) { - char tocode1[SHORT_STRING]; - char fromcode1[SHORT_STRING]; - char *tocode2, *fromcode2; - char *tmp; + char tocode1[SHORT_STRING]; + char fromcode1[SHORT_STRING]; + const char *tmp; - iconv_t cd; + iconv_t cd; - mutt_canonical_charset (tocode1, sizeof (tocode1), tocode); + if ((flags & M_ICONV_HOOK_TO) && (tmp = mutt_charset_hook(tocode1))) { + charset_canonicalize(tocode1, sizeof(tocode1), tmp); + } else { + charset_canonicalize(tocode1, sizeof(tocode1), tocode); + } -#ifdef M_ICONV_HOOK_TO - /* Not used. */ - if ((flags & M_ICONV_HOOK_TO) && (tmp = mutt_charset_hook (tocode1))) - mutt_canonical_charset (tocode1, sizeof (tocode1), tmp); -#endif + if ((flags & M_ICONV_HOOK_FROM) && (tmp = mutt_charset_hook(fromcode1))) { + charset_canonicalize(fromcode1, sizeof(fromcode1), tmp); + } else { + charset_canonicalize(fromcode1, sizeof(fromcode1), fromcode); + } - mutt_canonical_charset (fromcode1, sizeof (fromcode1), fromcode); - if ((flags & M_ICONV_HOOK_FROM) && (tmp = mutt_charset_hook (fromcode1))) - mutt_canonical_charset (fromcode1, sizeof (fromcode1), tmp); + cd = iconv_open(tocode1, fromcode1); + if (cd != MUTT_ICONV_ERROR) + return cd; - if ((cd = iconv_open (tocode1, fromcode1)) != (iconv_t) - 1) - return cd; - if ((tocode2 = mutt_iconv_hook (tocode1)) - && (fromcode2 = mutt_iconv_hook (fromcode1))) - return iconv_open (tocode2, fromcode2); + { + const char *to = mutt_iconv_hook(tocode1); + const char *from = mutt_iconv_hook(fromcode1); - return (iconv_t) - 1; + return to && from ? iconv_open(to, from) : MUTT_ICONV_ERROR; + } } -/* - * Like iconv, but keeps going even when the input is invalid - * If you're supplying inrepls, the source charset should be stateless; - * if you're supplying an outrepl, the target charset should be. - */ - -ssize_t mutt_iconv(iconv_t cd, const char **inbuf, ssize_t *inbytesleft, +/* Like iconv, but keeps going even when the input is invalid + If you're supplying inrepls, the source charset should be stateless; + if you're supplying an outrepl, the target charset should be. */ +/* XXX: MC: I do not understand what it does yet */ +ssize_t mutt_iconv(iconv_t cd, + const char **inbuf, ssize_t *inbytesleft, char **outbuf, ssize_t *outbytesleft, const char **inrepls, const char *outrepl) { - ssize_t ret = 0, ret1; - const char *ib = *inbuf; - ssize_t ibl = *inbytesleft; - char *ob = *outbuf; - ssize_t obl = *outbytesleft; - - for (;;) { - ret1 = my_iconv(cd, &ib, &ibl, &ob, &obl); - if (ret1 != -1) - ret += ret1; - if (ibl && obl && errno == EILSEQ) { - if (inrepls) { - /* Try replacing the input */ - const char **t; - - for (t = inrepls; *t; t++) { - const char *ib1 = *t; - ssize_t ibl1 = m_strlen(*t); - char *ob1 = ob; - ssize_t obl1 = obl; - - my_iconv(cd, &ib1, &ibl1, &ob1, &obl1); - if (!ibl1) { - ++ib, --ibl; - ob = ob1, obl = obl1; - ++ret; - break; - } + ssize_t ret = 0, ret1; + const char *ib = *inbuf; + ssize_t ibl = *inbytesleft; + char *ob = *outbuf; + ssize_t obl = *outbytesleft; + + for (;;) { + ret1 = my_iconv(cd, &ib, &ibl, &ob, &obl); + if (ret1 != -1) + ret += ret1; + + if (ibl && obl && errno == EILSEQ) { + if (inrepls) { + /* Try replacing the input */ + const char **t; + + for (t = inrepls; *t; t++) { + const char *ib1 = *t; + ssize_t ibl1 = m_strlen(*t); + char *ob1 = ob; + ssize_t obl1 = obl; + + my_iconv(cd, &ib1, &ibl1, &ob1, &obl1); + if (!ibl1) { + ++ib, --ibl; + ob = ob1, obl = obl1; + ++ret; + break; + } + } + if (*t) + continue; + } + /* Replace the output */ + if (!outrepl) + outrepl = "?"; + my_iconv(cd, 0, 0, &ob, &obl); + if (obl) { + ssize_t n = m_strlen(outrepl); + + if (n > obl) { + outrepl = "?"; + n = 1; + } + memcpy(ob, outrepl, n); + ++ib, --ibl; + ob += n, obl -= n; + ++ret; + my_iconv(cd, 0, 0, 0, 0); /* for good measure */ + continue; + } } - if (*t) - continue; - } - /* Replace the output */ - if (!outrepl) - outrepl = "?"; - my_iconv(cd, 0, 0, &ob, &obl); - if (obl) { - ssize_t n = m_strlen(outrepl); - - if (n > obl) { - outrepl = "?"; - n = 1; - } - memcpy (ob, outrepl, n); - ++ib, --ibl; - ob += n, obl -= n; - ++ret; - my_iconv(cd, 0, 0, 0, 0); /* for good measure */ - continue; - } + *inbuf = ib, *inbytesleft = ibl; + *outbuf = ob, *outbytesleft = obl; + return ret; } - *inbuf = ib, *inbytesleft = ibl; - *outbuf = ob, *outbytesleft = obl; - return ret; - } } - -/* - * Convert a string - * Used in rfc2047.c and rfc2231.c - */ - -int mutt_convert_string (char **ps, const char *from, const char *to, - int flags) +/* Convert a string */ +int +mutt_convert_string(char **ps, const char *from, const char *to, int flags) { - iconv_t cd; - const char *repls[] = { "\357\277\275", "?", 0 }; - char *s = *ps; + iconv_t cd; + const char *repls[] = { "\357\277\275", "?", 0 }; - if (!s || !*s) - return 0; + if (m_strisempty(*ps)) + return 0; - if (to && from && (cd = mutt_iconv_open (to, from, flags)) != (iconv_t) - 1) { - int len; - const char *ib; - char *buf, *ob; - ssize_t ibl, obl; - const char **inrepls = NULL; - const char *outrepl = NULL; - - if (mutt_is_utf8 (to)) - outrepl = "\357\277\275"; - else if (mutt_is_utf8 (from)) - inrepls = repls; - else - outrepl = "?"; - - len = m_strlen(s); - ib = s, ibl = len + 1; - obl = MB_LEN_MAX * ibl; - ob = buf = xmalloc(obl + 1); - - mutt_iconv (cd, &ib, &ibl, &ob, &obl, inrepls, outrepl); - iconv_close (cd); - - *ob = '\0'; - - p_delete(ps); - *ps = buf; - return 0; - } - else - return -1; -} + cd = mutt_iconv_open(to, from, flags); + if (cd != MUTT_ICONV_ERROR) { + const char **inrepls = NULL; + const char *outrepl = NULL; + const char *ib; + char *buf, *ob; + ssize_t ibl, obl; + if (charset_is_utf8(to)) + outrepl = "\357\277\275"; + else + if (charset_is_utf8(from)) + inrepls = repls; + else + outrepl = "?"; -/* - * FGETCONV stuff for converting a file while reading it - * Used in sendlib.c for converting from mutt's Charset - */ + ibl = m_strlen(*ps) + 1; + ib = *ps; -struct fgetconv_s { - FILE *file; - iconv_t cd; - char bufi[512]; - char bufo[512]; - char *p; - char *ob; - char *ib; - ssize_t ibl; - const char **inrepls; -}; + obl = MB_LEN_MAX * ibl; + ob = buf = p_new(char, obl + 1); -struct fgetconv_not { - FILE *file; - iconv_t cd; -}; + mutt_iconv(cd, &ib, &ibl, &ob, &obl, inrepls, outrepl); + iconv_close(cd); -FGETCONV *fgetconv_open (FILE * file, const char *from, const char *to, - int flags) -{ - struct fgetconv_s *fc; - iconv_t cd = (iconv_t) - 1; - static const char *repls[] = { "\357\277\275", "?", 0 }; + *ob = '\0'; - if (from && to) - cd = mutt_iconv_open (to, from, flags); + p_delete(ps); + *ps = buf; + return 0; + } - if (cd != (iconv_t) - 1) { - fc = p_new(struct fgetconv_s, 1); - fc->p = fc->ob = fc->bufo; - fc->ib = fc->bufi; - fc->ibl = 0; - fc->inrepls = mutt_is_utf8 (to) ? repls : repls + 1; - } - else - fc = p_new(struct fgetconv_s, 1); - fc->file = file; - fc->cd = cd; - return (FGETCONV *) fc; + return -1; } -char *fgetconvs (char *buf, ssize_t l, FGETCONV * _fc) +static ssize_t convert_string(const char *f, ssize_t flen, + const char *from, const char *to, + char **t, ssize_t * tlen) { - int c; - ssize_t r; - - for (r = 0; r + 1 < l;) { - if ((c = fgetconv (_fc)) == EOF) - break; - buf[r++] = (char) c; - if (c == '\n') - break; - } - buf[r] = '\0'; - - if (r) - return buf; - else - return NULL; + iconv_t cd; + char *buf, *ob; + ssize_t obl; + ssize_t n; + int e; + + if ((cd = mutt_iconv_open(to, from, 0)) == MUTT_ICONV_ERROR) + return -1; + + obl = 4 * flen + 1; + ob = buf = p_new(char, obl); + n = my_iconv(cd, &f, &flen, &ob, &obl); + + if (n < 0 || my_iconv(cd, 0, 0, &ob, &obl) < 0) { + e = errno; + p_delete(&buf); + iconv_close(cd); + errno = e; + return -1; + } + + *ob = '\0'; + *tlen = ob - buf; + *t = buf; + iconv_close(cd); + return n; } -int fgetconv (FGETCONV * _fc) +int mutt_convert_nonmime_string(char **ps) { - struct fgetconv_s *fc = (struct fgetconv_s *) _fc; + const char *p = AssumedCharset; + ssize_t ulen = m_strlen(*ps); + char *u = *ps; - if (!fc) - return EOF; - if (fc->cd == (iconv_t) - 1) - return fgetc (fc->file); - if (!fc->p) - return EOF; - if (fc->p < fc->ob) - return (unsigned char) *(fc->p)++; + while (*p) { + const char *q; + char fromcode[LONG_STRING], *s = NULL; + ssize_t slen; - /* Try to convert some more */ - fc->p = fc->ob = fc->bufo; - if (fc->ibl) { - ssize_t obl = ssizeof(fc->bufo); + if (!ulen) + return 0; - my_iconv(fc->cd, (const char **) &fc->ib, &fc->ibl, &fc->ob, &obl); - if (fc->p < fc->ob) - return (unsigned char) *(fc->p)++; - } + while (*p == ':') + *p++; - /* If we trusted iconv a bit more, we would at this point - * ask why it had stopped converting ... */ + q = m_strchrnul(p, ':'); + m_strncpy(fromcode, sizeof(fromcode), p, q - p); + p = q; - /* Try to read some more */ - if (fc->ibl == sizeof (fc->bufi) || - (fc->ibl && fc->ib + fc->ibl < fc->bufi + sizeof (fc->bufi))) { - fc->p = 0; - return EOF; - } - if (fc->ibl) - memcpy (fc->bufi, fc->ib, fc->ibl); - fc->ib = fc->bufi; - fc->ibl += - fread (fc->ib + fc->ibl, 1, sizeof (fc->bufi) - fc->ibl, fc->file); - - /* Try harder this time to convert some */ - if (fc->ibl) { - ssize_t obl = ssizeof(fc->bufo); - - mutt_iconv (fc->cd, (const char **) &fc->ib, &fc->ibl, &fc->ob, - &obl, fc->inrepls, 0); - if (fc->p < fc->ob) - return (unsigned char) *(fc->p)++; - } + if (convert_string(u, ulen, fromcode, Charset, &s, &slen) >= 0) { + p_delete(ps); + *ps = s; + return 0; + } + } - /* Either the file has finished or one of the buffers is too small */ - fc->p = 0; - return EOF; + return -1; } -void fgetconv_close (FGETCONV ** _fc) +/****************************************************************************/ +/* fgetconv functions */ +/****************************************************************************/ + +/* fgetconv_t stuff for converting a file while reading it + Used in sendlib.c for converting from mutt's Charset */ + +struct fgetconv_t { + FILE *file; + iconv_t cd; + char bufi[BUFSIZ]; + char bufo[BUFSIZ]; + char *p; + char *ob; + char *ib; + ssize_t ibl; + const char **inrepls; +}; + +fgetconv_t * +fgetconv_open(FILE *file, const char *from, const char *to, int flags) { - struct fgetconv_s *fc = (struct fgetconv_s *) *_fc; + static const char *repls[] = { "\357\277\275", "?", 0 }; - if (fc->cd != (iconv_t) - 1) - iconv_close (fc->cd); - p_delete(_fc); + struct fgetconv_t *fc = p_new(struct fgetconv_t, 1); + + fc->file = file; + fc->cd = MUTT_ICONV_ERROR; + if (from && to) + fc->cd = mutt_iconv_open(to, from, flags); + + if (fc->cd != MUTT_ICONV_ERROR) { + fc->p = fc->ob = fc->bufo; + fc->ib = fc->bufi; + fc->ibl = 0; + fc->inrepls = repls + charset_is_utf8(to); + } + return fc; } -const char *mutt_get_first_charset (const char *charset) +void fgetconv_close(fgetconv_t **fcp) { - static char fcharset[SHORT_STRING]; - const char *c, *c1; - - c = charset; - if (!m_strlen(c)) - return "us-ascii"; - if (!(c1 = strchr (c, ':'))) - return ((char*) charset); - m_strcpy(fcharset, c1 - c + 1, c); - return fcharset; + struct fgetconv_t *fc = *fcp; + + if (fc->cd != MUTT_ICONV_ERROR) + iconv_close (fc->cd); + p_delete(fcp); } -static ssize_t convert_string (const char *f, ssize_t flen, - const char *from, const char *to, - char **t, ssize_t * tlen) + +int fgetconv(fgetconv_t *fc) { - iconv_t cd; - char *buf, *ob; - ssize_t obl; - ssize_t n; - int e; - - cd = mutt_iconv_open (to, from, 0); - if (cd == (iconv_t) (-1)) - return -1; - obl = 4 * flen + 1; - ob = buf = xmalloc(obl); - n = my_iconv(cd, &f, &flen, &ob, &obl); - if (n < 0 || my_iconv(cd, 0, 0, &ob, &obl) < 0) { - e = errno; - p_delete(&buf); - iconv_close (cd); - errno = e; - return -1; - } - *ob = '\0'; + if (!fc) + return EOF; - *tlen = ob - buf; + if (fc->cd == MUTT_ICONV_ERROR) + return fgetc(fc->file); - p_realloc(&buf, ob - buf + 1); - *t = buf; - iconv_close (cd); + if (!fc->p) + return EOF; + if (fc->p < fc->ob) + return (unsigned char)*(fc->p)++; - return n; -} + /* Try to convert some more */ + fc->p = fc->ob = fc->bufo; + if (fc->ibl) { + ssize_t obl = ssizeof(fc->bufo); -int mutt_convert_nonmime_string (char **ps) -{ - const char *c, *c1; + my_iconv(fc->cd, (const char **)&fc->ib, &fc->ibl, &fc->ob, &obl); + if (fc->p < fc->ob) + return (unsigned char)*(fc->p)++; + } - for (c = AssumedCharset; c; c = c1 ? c1 + 1 : 0) { - char *u = *ps; - char *s = NULL; - char *fromcode; - ssize_t m, n; - ssize_t ulen = m_strlen(*ps); - ssize_t slen; - - if (!u || !*u) - return 0; - - c1 = strchr (c, ':'); - n = c1 ? c1 - c : m_strlen(c); - if (!n) - continue; - fromcode = p_dupstr(c, n); - m = convert_string (u, ulen, fromcode, Charset, &s, &slen); - p_delete(&fromcode); - if (m != -1) { - p_delete(ps); - *ps = s; - return 0; + /* If we trusted iconv a bit more, we would at this point + * ask why it had stopped converting ... */ + + /* Try to read some more */ + if (fc->ibl == sizeof(fc->bufi) + || (fc->ibl && fc->ib + fc->ibl < fc->bufi + sizeof(fc->bufi))) { + fc->p = NULL; + return EOF; } - } - return -1; -} -void mutt_set_charset (char *charset) -{ - char buffer[STRING]; + if (fc->ibl) { + memcpy(fc->bufi, fc->ib, fc->ibl); + } + fc->ib = fc->bufi; + fc->ibl += fread(fc->ib + fc->ibl, 1, sizeof(fc->bufi) - fc->ibl, + fc->file); - mutt_canonical_charset (buffer, sizeof (buffer), charset); - Charset_is_utf8 = !strcmp(buffer, "utf-8"); + /* Try harder this time to convert some */ + if (fc->ibl) { + ssize_t obl = ssizeof(fc->bufo); -#ifdef HAVE_BIND_TEXTDOMAIN_CODESET - bind_textdomain_codeset (PACKAGE, buffer); -#endif + mutt_iconv(fc->cd, (const char **)&fc->ib, &fc->ibl, &fc->ob, &obl, + fc->inrepls, 0); + if (fc->p < fc->ob) { + return (unsigned char)*(fc->p)++; + } + } + + /* Either the file has finished or one of the buffers is too small */ + fc->p = NULL; + return EOF; } -wchar_t replacement_char(void) +char *fgetconvs(char *buf, ssize_t len, fgetconv_t *fc) { - return Charset_is_utf8 ? 0xfffd : '?'; + ssize_t pos = 0; + + while (pos < len - 1) { + int c = fgetconv(fc); + if (c == EOF) + break; + + buf[pos++] = c; + if (c == '\n') + break; + } + buf[pos] = '\0'; + + return pos ? buf : NULL; }