/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or (at * your option) any later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. * * Copyright © 2006 Pierre Habouzit */ /* * Copyright notice from original mutt: * Copyright (C) 1999-2000 Thomas Roessler * * This file is part of mutt-ng, see http://www.muttng.org/. * It's licensed under the GNU General Public License, * please see the file GPL in the top level source directory. */ #include #ifdef HAVE_LANGINFO_H # include #endif #include "charset.h" #ifndef EILSEQ # define EILSEQ EINVAL #endif @import "lib-lua/base.cpkg" int Charset_is_utf8 = 0; wchar_t CharsetReplacement = '?'; static rx_t *charset_hooks = NULL; static rx_t *iconv_hooks = NULL; static char *charset_init(void) { const char *res = "iso-8859-1"; #ifdef HAVE_LANGINFO_H char buff[STRING]; char buff2[STRING]; m_strcpy(buff, sizeof(buff), nl_langinfo(CODESET)); charset_canonicalize(buff2, sizeof(buff2), buff); /* finally, set $charset */ if (!m_strisempty(buff2)) { res = buff2; } #endif bind_textdomain_codeset(PACKAGE, res); return m_strdup(res); } static void charset_onchange(const char *cset) { Charset_is_utf8 = charset_is_utf8(cset); CharsetReplacement = Charset_is_utf8 ? 0xfffd : '?'; } @package mod_cset { /* ** .pp ** This variable is a colon-separated list of character encoding ** schemes for messages without character encoding indication. ** Header field values and message body content without character encoding ** indication would be assumed that they are written in one of this list. ** By default, all the header fields and message body without any charset ** indication are assumed to be in \fTus-ascii\fP. ** .pp ** For example, Japanese users might prefer this: ** .pp ** \fTset assumed_charset="iso-2022-jp:euc-jp:shift_jis:utf-8"\fP ** .pp ** However, only the first content is valid for the message body. ** This variable is valid only if $$strict_mime is unset. */ string_t assumed_charset = m_strdup("us-ascii"); /* ** .pp ** Character set your terminal uses to display and enter textual data. */ string_t charset = { .init = charset_init(); .onchange = charset_onchange($$); }; /* ** .pp ** This variable is a colon-separated list of character encoding ** schemes for text file attatchments. ** If \fIunset\fP, $$charset value will be used instead. ** For example, the following configuration would work for Japanese ** text handling: ** .pp ** \fTset file_charset="iso-2022-jp:euc-jp:shift_jis:utf-8"\fP ** .pp ** Note: ``\fTiso-2022-*\fP'' must be put at the head of the value as shown above ** if included. */ string_t file_charset = NULL; /* ** .pp ** A list of character sets for outgoing messages. Madmutt will use the ** first character set into which the text can be converted exactly. ** If your ``$$charset'' is not \fTiso-8859-1\fP and recipients may not ** understand \fTUTF-8\fP, it is advisable to include in the list an ** appropriate widely used standard character set (such as ** \fTiso-8859-2\fP, \fTkoi8-r\fP or \fTiso-2022-jp\fP) either ** instead of or after \fTiso-8859-1\fP. */ string_t send_charset = m_strdup("us-ascii:iso-8859-1:utf-8"); void charset_hook(rx_t local, const string_t alias) { rx_set_template(local, alias); rx_list_add2(&charset_hooks, &local); RETURN(); }; void iconv_hook(rx_t local, const string_t alias) { rx_set_template(local, alias); rx_list_add2(&iconv_hooks, &local); RETURN(); }; }; /****************************************************************************/ /* charset functions */ /****************************************************************************/ #include "charset.gperf" void charset_canonicalize(char *dest, ssize_t dlen, const char *name) { const struct cset_pair *cp; char scratch[STRING]; const char *p; int i = 0; if (!name) { m_strcpy(dest, dlen, "us-ascii"); return; } // canonize name: only keep a-z0-9 and dots, put into lowercase for (p = name; *p && *p != ':' && i < ssizeof(scratch) - 1; p++) { if (isalnum(*p) || *p== '.') { scratch[i++] = tolower((unsigned char)*p); } } scratch[i] = '\0'; cp = charset_canonicalize_aux(scratch, strlen(scratch)); if (cp) { m_strcpy(dest, dlen, cp->pref); } else { m_strcpy(dest, dlen, name); m_strtolower(dest); } } /* XXX: MC: UGLY return of local static */ const char *charset_getfirst(const char *charset) { static char fcharset[STRING]; const char *p; if (m_strisempty(charset)) return "us-ascii"; p = m_strchrnul(charset, ':'); m_strncpy(fcharset, sizeof(fcharset), charset, p - charset); return fcharset; } int charset_is_utf8(const char *s) { char buf[STRING]; charset_canonicalize(buf, sizeof(buf), s); return !m_strcmp(buf, "utf-8"); } int charset_is_us_ascii(const char *s) { char buf[STRING]; charset_canonicalize(buf, sizeof(buf), s); return !m_strcmp(buf, "us-ascii"); } /****************************************************************************/ /* iconv-line functions */ /****************************************************************************/ /* Like iconv_open, but canonicalises the charsets */ iconv_t mutt_iconv_open(const char *tocode, const char *fromcode, int flags) { char to1[STRING], to2[STRING]; char from1[STRING], from2[STRING]; char tmp[STRING]; iconv_t cd; if ((flags & M_ICONV_HOOK_TO) && rx_list_match2(charset_hooks, tocode, tmp, sizeof(tmp))) { charset_canonicalize(to1, sizeof(to1), tmp); } else { charset_canonicalize(to1, sizeof(to1), tocode); } if ((flags & M_ICONV_HOOK_FROM) && rx_list_match2(charset_hooks, fromcode, tmp, sizeof(tmp))) { charset_canonicalize(from1, sizeof(from1), tmp); } else { charset_canonicalize(from1, sizeof(from1), fromcode); } if ((cd = iconv_open(to1, from1)) != MUTT_ICONV_ERROR) return cd; if (rx_list_match2(iconv_hooks, to1, to2, sizeof(to2)) && rx_list_match2(iconv_hooks, from1, from2, sizeof(from2))) return iconv_open(to2, from2); return MUTT_ICONV_ERROR; } /* Like iconv, but keeps going even when the input is invalid If you're supplying inrepls, the source charset should be stateless; if you're supplying an outrepl, the target charset should be. */ /* XXX: MC: I do not understand what it does yet */ ssize_t mutt_iconv(iconv_t cd, const char **inbuf, ssize_t *inbytesleft, char **outbuf, ssize_t *outbytesleft, const char **inrepls, const char *outrepl) { ssize_t ret = 0, ret1; const char *ib = *inbuf; ssize_t ibl = *inbytesleft; char *ob = *outbuf; ssize_t obl = *outbytesleft; for (;;) { ret1 = my_iconv(cd, &ib, &ibl, &ob, &obl); if (ret1 != -1) ret += ret1; if (ibl && obl && errno == EILSEQ) { if (inrepls) { /* Try replacing the input */ const char **t; for (t = inrepls; *t; t++) { const char *ib1 = *t; ssize_t ibl1 = m_strlen(*t); char *ob1 = ob; ssize_t obl1 = obl; my_iconv(cd, &ib1, &ibl1, &ob1, &obl1); if (!ibl1) { ++ib, --ibl; ob = ob1, obl = obl1; ++ret; break; } } if (*t) continue; } /* Replace the output */ if (!outrepl) outrepl = "?"; my_iconv(cd, 0, 0, &ob, &obl); if (obl) { ssize_t n = m_strlen(outrepl); if (n > obl) { outrepl = "?"; n = 1; } memcpy(ob, outrepl, n); ++ib, --ibl; ob += n, obl -= n; ++ret; my_iconv(cd, 0, 0, 0, 0); /* for good measure */ continue; } } *inbuf = ib, *inbytesleft = ibl; *outbuf = ob, *outbytesleft = obl; return ret; } } /* Convert a string */ int mutt_convert_string(char **ps, const char *from, const char *to, int flags) { iconv_t cd; const char *repls[] = { "\357\277\275", "?", 0 }; if (m_strisempty(*ps)) return 0; cd = mutt_iconv_open(to, from, flags); if (cd != MUTT_ICONV_ERROR) { const char **inrepls = NULL; const char *outrepl = NULL; const char *ib; char *buf, *ob; ssize_t ibl, obl; if (charset_is_utf8(to)) outrepl = "\357\277\275"; else if (charset_is_utf8(from)) inrepls = repls; else outrepl = "?"; ibl = m_strlen(*ps) + 1; ib = *ps; obl = MB_LEN_MAX * ibl; ob = buf = p_new(char, obl + 1); mutt_iconv(cd, &ib, &ibl, &ob, &obl, inrepls, outrepl); iconv_close(cd); *ob = '\0'; p_delete(ps); *ps = buf; return 0; } return -1; } static ssize_t convert_string(const char *f, ssize_t flen, const char *from, const char *to, char **t, ssize_t * tlen) { iconv_t cd; char *buf, *ob; ssize_t obl; ssize_t n; int e; if ((cd = mutt_iconv_open(to, from, 0)) == MUTT_ICONV_ERROR) return -1; obl = 4 * flen + 1; ob = buf = p_new(char, obl); n = my_iconv(cd, &f, &flen, &ob, &obl); if (n < 0 || my_iconv(cd, 0, 0, &ob, &obl) < 0) { e = errno; p_delete(&buf); iconv_close(cd); errno = e; return -1; } *ob = '\0'; *tlen = ob - buf; *t = buf; iconv_close(cd); return n; } int mutt_convert_nonmime_string(char **ps) { const char *p = mod_cset.assumed_charset; ssize_t ulen = m_strlen(*ps); char *u = *ps; while (*p) { const char *q; char fromcode[LONG_STRING], *s = NULL; ssize_t slen; if (!ulen) return 0; while (*p == ':') p++; q = m_strchrnul(p, ':'); m_strncpy(fromcode, sizeof(fromcode), p, q - p); p = q; if (convert_string(u, ulen, fromcode, mod_cset.charset, &s, &slen) >= 0) { p_delete(ps); *ps = s; return 0; } } return -1; } /****************************************************************************/ /* fgetconv functions */ /****************************************************************************/ /* fgetconv_t stuff for converting a file while reading it Used in sendlib.c for converting from mutt's charset */ struct fgetconv_t { FILE *file; iconv_t cd; char bufi[BUFSIZ]; char bufo[BUFSIZ]; char *p; char *ob; char *ib; ssize_t ibl; const char **inrepls; }; fgetconv_t * fgetconv_open(FILE *file, const char *from, const char *to, int flags) { static const char *repls[] = { "\357\277\275", "?", 0 }; struct fgetconv_t *fc = p_new(struct fgetconv_t, 1); fc->file = file; fc->cd = MUTT_ICONV_ERROR; if (from && to) fc->cd = mutt_iconv_open(to, from, flags); if (fc->cd != MUTT_ICONV_ERROR) { fc->p = fc->ob = fc->bufo; fc->ib = fc->bufi; fc->ibl = 0; fc->inrepls = repls + charset_is_utf8(to); } return fc; } void fgetconv_close(fgetconv_t **fcp) { struct fgetconv_t *fc = *fcp; if (fc->cd != MUTT_ICONV_ERROR) iconv_close (fc->cd); p_delete(fcp); } int fgetconv(fgetconv_t *fc) { if (!fc) return EOF; if (fc->cd == MUTT_ICONV_ERROR) return fgetc(fc->file); if (!fc->p) return EOF; if (fc->p < fc->ob) return (unsigned char)*(fc->p)++; /* Try to convert some more */ fc->p = fc->ob = fc->bufo; if (fc->ibl) { ssize_t obl = ssizeof(fc->bufo); my_iconv(fc->cd, (const char **)&fc->ib, &fc->ibl, &fc->ob, &obl); if (fc->p < fc->ob) return (unsigned char)*(fc->p)++; } /* If we trusted iconv a bit more, we would at this point * ask why it had stopped converting ... */ /* Try to read some more */ if (fc->ibl == sizeof(fc->bufi) || (fc->ibl && fc->ib + fc->ibl < fc->bufi + sizeof(fc->bufi))) { fc->p = NULL; return EOF; } if (fc->ibl) { memcpy(fc->bufi, fc->ib, fc->ibl); } fc->ib = fc->bufi; fc->ibl += fread(fc->ib + fc->ibl, 1, sizeof(fc->bufi) - fc->ibl, fc->file); /* Try harder this time to convert some */ if (fc->ibl) { ssize_t obl = ssizeof(fc->bufo); mutt_iconv(fc->cd, (const char **)&fc->ib, &fc->ibl, &fc->ob, &obl, fc->inrepls, 0); if (fc->p < fc->ob) { return (unsigned char)*(fc->p)++; } } /* Either the file has finished or one of the buffers is too small */ fc->p = NULL; return EOF; } char *fgetconvs(char *buf, ssize_t len, fgetconv_t *fc) { ssize_t pos = 0; while (pos < len - 1) { int c = fgetconv(fc); if (c == EOF) break; buf[pos++] = c; if (c == '\n') break; } buf[pos] = '\0'; return pos ? buf : NULL; } /* vim:set ft=c: */