X-Git-Url: http://git.madism.org/?p=apps%2Fmadmutt.git;a=blobdiff_plain;f=charset.c;fp=charset.c;h=0000000000000000000000000000000000000000;hp=11441e4a8f8918fdcd5c48c272f7fd858f33458e;hb=16534e98723674fa391e3fc29d2a07ce419c13dd;hpb=68d6b380767702ba11529357f36d178d0dc4cb1e diff --git a/charset.c b/charset.c deleted file mode 100644 index 11441e4..0000000 --- a/charset.c +++ /dev/null @@ -1,470 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or (at - * your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - * MA 02110-1301, USA. - * - * Copyright © 2006 Pierre Habouzit - */ -/* - * Copyright notice from original mutt: - * Copyright (C) 1999-2000 Thomas Roessler - * - * This file is part of mutt-ng, see http://www.muttng.org/. - * It's licensed under the GNU General Public License, - * please see the file GPL in the top level source directory. - */ - -#include - -#ifdef HAVE_LANGINFO_CODESET -# include -#endif - -#include "mutt.h" -#include "charset.h" - -#ifndef EILSEQ -# define EILSEQ EINVAL -#endif - -char *Charset; -int Charset_is_utf8 = 0; -wchar_t CharsetReplacement = '?'; - - -/****************************************************************************/ -/* charset functions */ -/****************************************************************************/ - -void charset_initialize(void) -{ -#ifdef HAVE_LANGINFO_CODESET - char buff[STRING]; - char buff2[STRING]; - - m_strcpy(buff, sizeof(buff), nl_langinfo(CODESET)); - charset_canonicalize(buff2, sizeof(buff2), buff); - - /* finally, set $charset */ - if (!m_strisempty(buff2)) { - m_strreplace(&Charset, buff2); - } else -#endif - { - m_strreplace(&Charset, "iso-8859-1"); - } - - Charset_is_utf8 = !m_strcmp(Charset, "utf-8"); - CharsetReplacement = Charset_is_utf8 ? 0xfffd : '?'; - -#ifdef HAVE_BIND_TEXTDOMAIN_CODESET - bind_textdomain_codeset(PACKAGE, Charset); -#endif -} - -#include "charset.gperf" -void charset_canonicalize(char *dest, ssize_t dlen, const char *name) -{ - const struct cset_pair *cp; - char scratch[STRING]; - const char *p; - int i = 0; - - if (!name) { - m_strcpy(dest, dlen, "us-ascii"); - return; - } - - // canonize name: only keep a-z0-9 and dots, put into lowercase - for (p = name; *p && *p != ':' && i < ssizeof(scratch) - 1; p++) { - if (isalnum(*p) || *p== '.') { - scratch[i++] = tolower((unsigned char)*p); - } - } - scratch[i] = '\0'; - - cp = charset_canonicalize_aux(scratch, strlen(scratch)); - if (cp) { - m_strcpy(dest, dlen, cp->pref); - } else { - m_strcpy(dest, dlen, name); - m_strtolower(dest); - } -} - -/* XXX: MC: UGLY return of local static */ -const char *charset_getfirst(const char *charset) -{ - static char fcharset[STRING]; - const char *p; - - if (m_strisempty(charset)) - return "us-ascii"; - - p = m_strchrnul(charset, ':'); - m_strncpy(fcharset, sizeof(fcharset), charset, p - charset); - return fcharset; -} - -int charset_is_utf8(const char *s) -{ - char buf[STRING]; - charset_canonicalize(buf, sizeof(buf), s); - return !m_strcmp(buf, "utf-8"); -} - -int charset_is_us_ascii(const char *s) -{ - char buf[STRING]; - charset_canonicalize(buf, sizeof(buf), s); - return !m_strcmp(buf, "us-ascii"); -} - - -/****************************************************************************/ -/* iconv-line functions */ -/****************************************************************************/ - -/* Like iconv_open, but canonicalises the charsets */ -iconv_t mutt_iconv_open(const char *tocode, const char *fromcode, int flags) -{ - char tocode1[STRING]; - char fromcode1[STRING]; - const char *tmp; - - iconv_t cd; - - if ((flags & M_ICONV_HOOK_TO) && (tmp = mutt_charset_hook(tocode1))) { - charset_canonicalize(tocode1, sizeof(tocode1), tmp); - } else { - charset_canonicalize(tocode1, sizeof(tocode1), tocode); - } - - if ((flags & M_ICONV_HOOK_FROM) && (tmp = mutt_charset_hook(fromcode1))) { - charset_canonicalize(fromcode1, sizeof(fromcode1), tmp); - } else { - charset_canonicalize(fromcode1, sizeof(fromcode1), fromcode); - } - - cd = iconv_open(tocode1, fromcode1); - if (cd != MUTT_ICONV_ERROR) - return cd; - - { - const char *to = mutt_iconv_hook(tocode1); - const char *from = mutt_iconv_hook(fromcode1); - - return to && from ? iconv_open(to, from) : MUTT_ICONV_ERROR; - } -} - - -/* Like iconv, but keeps going even when the input is invalid - If you're supplying inrepls, the source charset should be stateless; - if you're supplying an outrepl, the target charset should be. */ -/* XXX: MC: I do not understand what it does yet */ -ssize_t mutt_iconv(iconv_t cd, - const char **inbuf, ssize_t *inbytesleft, - char **outbuf, ssize_t *outbytesleft, - const char **inrepls, const char *outrepl) -{ - ssize_t ret = 0, ret1; - const char *ib = *inbuf; - ssize_t ibl = *inbytesleft; - char *ob = *outbuf; - ssize_t obl = *outbytesleft; - - for (;;) { - ret1 = my_iconv(cd, &ib, &ibl, &ob, &obl); - if (ret1 != -1) - ret += ret1; - - if (ibl && obl && errno == EILSEQ) { - if (inrepls) { - /* Try replacing the input */ - const char **t; - - for (t = inrepls; *t; t++) { - const char *ib1 = *t; - ssize_t ibl1 = m_strlen(*t); - char *ob1 = ob; - ssize_t obl1 = obl; - - my_iconv(cd, &ib1, &ibl1, &ob1, &obl1); - if (!ibl1) { - ++ib, --ibl; - ob = ob1, obl = obl1; - ++ret; - break; - } - } - if (*t) - continue; - } - /* Replace the output */ - if (!outrepl) - outrepl = "?"; - my_iconv(cd, 0, 0, &ob, &obl); - if (obl) { - ssize_t n = m_strlen(outrepl); - - if (n > obl) { - outrepl = "?"; - n = 1; - } - memcpy(ob, outrepl, n); - ++ib, --ibl; - ob += n, obl -= n; - ++ret; - my_iconv(cd, 0, 0, 0, 0); /* for good measure */ - continue; - } - } - *inbuf = ib, *inbytesleft = ibl; - *outbuf = ob, *outbytesleft = obl; - return ret; - } -} - -/* Convert a string */ -int -mutt_convert_string(char **ps, const char *from, const char *to, int flags) -{ - iconv_t cd; - const char *repls[] = { "\357\277\275", "?", 0 }; - - if (m_strisempty(*ps)) - return 0; - - cd = mutt_iconv_open(to, from, flags); - if (cd != MUTT_ICONV_ERROR) { - const char **inrepls = NULL; - const char *outrepl = NULL; - const char *ib; - char *buf, *ob; - ssize_t ibl, obl; - - if (charset_is_utf8(to)) - outrepl = "\357\277\275"; - else - if (charset_is_utf8(from)) - inrepls = repls; - else - outrepl = "?"; - - ibl = m_strlen(*ps) + 1; - ib = *ps; - - obl = MB_LEN_MAX * ibl; - ob = buf = p_new(char, obl + 1); - - mutt_iconv(cd, &ib, &ibl, &ob, &obl, inrepls, outrepl); - iconv_close(cd); - - *ob = '\0'; - - p_delete(ps); - *ps = buf; - return 0; - } - - return -1; -} - -static ssize_t convert_string(const char *f, ssize_t flen, - const char *from, const char *to, - char **t, ssize_t * tlen) -{ - iconv_t cd; - char *buf, *ob; - ssize_t obl; - ssize_t n; - int e; - - if ((cd = mutt_iconv_open(to, from, 0)) == MUTT_ICONV_ERROR) - return -1; - - obl = 4 * flen + 1; - ob = buf = p_new(char, obl); - n = my_iconv(cd, &f, &flen, &ob, &obl); - - if (n < 0 || my_iconv(cd, 0, 0, &ob, &obl) < 0) { - e = errno; - p_delete(&buf); - iconv_close(cd); - errno = e; - return -1; - } - - *ob = '\0'; - *tlen = ob - buf; - *t = buf; - iconv_close(cd); - return n; -} - -int mutt_convert_nonmime_string(char **ps) -{ - const char *p = AssumedCharset; - ssize_t ulen = m_strlen(*ps); - char *u = *ps; - - while (*p) { - const char *q; - char fromcode[LONG_STRING], *s = NULL; - ssize_t slen; - - if (!ulen) - return 0; - - while (*p == ':') - *p++; - - q = m_strchrnul(p, ':'); - m_strncpy(fromcode, sizeof(fromcode), p, q - p); - p = q; - - if (convert_string(u, ulen, fromcode, Charset, &s, &slen) >= 0) { - p_delete(ps); - *ps = s; - return 0; - } - } - - return -1; -} - -/****************************************************************************/ -/* fgetconv functions */ -/****************************************************************************/ - -/* fgetconv_t stuff for converting a file while reading it - Used in sendlib.c for converting from mutt's Charset */ - -struct fgetconv_t { - FILE *file; - iconv_t cd; - char bufi[BUFSIZ]; - char bufo[BUFSIZ]; - char *p; - char *ob; - char *ib; - ssize_t ibl; - const char **inrepls; -}; - -fgetconv_t * -fgetconv_open(FILE *file, const char *from, const char *to, int flags) -{ - static const char *repls[] = { "\357\277\275", "?", 0 }; - - struct fgetconv_t *fc = p_new(struct fgetconv_t, 1); - - fc->file = file; - fc->cd = MUTT_ICONV_ERROR; - if (from && to) - fc->cd = mutt_iconv_open(to, from, flags); - - if (fc->cd != MUTT_ICONV_ERROR) { - fc->p = fc->ob = fc->bufo; - fc->ib = fc->bufi; - fc->ibl = 0; - fc->inrepls = repls + charset_is_utf8(to); - } - return fc; -} - -void fgetconv_close(fgetconv_t **fcp) -{ - struct fgetconv_t *fc = *fcp; - - if (fc->cd != MUTT_ICONV_ERROR) - iconv_close (fc->cd); - p_delete(fcp); -} - - -int fgetconv(fgetconv_t *fc) -{ - if (!fc) - return EOF; - - if (fc->cd == MUTT_ICONV_ERROR) - return fgetc(fc->file); - - if (!fc->p) - return EOF; - if (fc->p < fc->ob) - return (unsigned char)*(fc->p)++; - - /* Try to convert some more */ - fc->p = fc->ob = fc->bufo; - if (fc->ibl) { - ssize_t obl = ssizeof(fc->bufo); - - my_iconv(fc->cd, (const char **)&fc->ib, &fc->ibl, &fc->ob, &obl); - if (fc->p < fc->ob) - return (unsigned char)*(fc->p)++; - } - - /* If we trusted iconv a bit more, we would at this point - * ask why it had stopped converting ... */ - - /* Try to read some more */ - if (fc->ibl == sizeof(fc->bufi) - || (fc->ibl && fc->ib + fc->ibl < fc->bufi + sizeof(fc->bufi))) { - fc->p = NULL; - return EOF; - } - - if (fc->ibl) { - memcpy(fc->bufi, fc->ib, fc->ibl); - } - fc->ib = fc->bufi; - fc->ibl += fread(fc->ib + fc->ibl, 1, sizeof(fc->bufi) - fc->ibl, - fc->file); - - /* Try harder this time to convert some */ - if (fc->ibl) { - ssize_t obl = ssizeof(fc->bufo); - - mutt_iconv(fc->cd, (const char **)&fc->ib, &fc->ibl, &fc->ob, &obl, - fc->inrepls, 0); - if (fc->p < fc->ob) { - return (unsigned char)*(fc->p)++; - } - } - - /* Either the file has finished or one of the buffers is too small */ - fc->p = NULL; - return EOF; -} - -char *fgetconvs(char *buf, ssize_t len, fgetconv_t *fc) -{ - ssize_t pos = 0; - - while (pos < len - 1) { - int c = fgetconv(fc); - if (c == EOF) - break; - - buf[pos++] = c; - if (c == '\n') - break; - } - buf[pos] = '\0'; - - return pos ? buf : NULL; -}