2 * Copyright notice from original mutt:
3 * Copyright (C) 1999-2000 Thomas Roessler <roessler@does-not-exist.org>
5 * This file is part of mutt-ng, see http://www.muttng.org/.
6 * It's licensed under the GNU General Public License,
7 * please see the file GPL in the top level source directory.
20 #include <sys/types.h>
24 #ifdef HAVE_LANGINFO_CODESET
25 # include <langinfo.h>
28 #include <lib-lib/mem.h>
29 #include <lib-lib/ascii.h>
30 #include <lib-lib/str.h>
31 #include <lib-lib/macros.h>
37 # define EILSEQ EINVAL
41 int Charset_is_utf8 = 0;
43 void mutt_set_langinfo_charset(void)
45 #ifdef HAVE_LANGINFO_CODESET
46 char buff[LONG_STRING];
47 char buff2[LONG_STRING];
49 m_strcpy(buff, sizeof(buff), nl_langinfo(CODESET));
50 mutt_canonical_charset(buff2, sizeof(buff2), buff);
52 /* finally, set $charset */
53 if (!(Charset = m_strdup(buff2)))
55 Charset = m_strdup("iso-8859-1");
58 #include "charset.gperf"
60 void mutt_canonical_charset(char *dest, ssize_t dlen, const char *name)
62 const struct cset_pair *cp;
63 char scratch[LONG_STRING];
66 // canonize name: only keep a-z0-9 and dots, put into lowercase
67 for (i = 0; i < ssizeof(scratch); i++) {
68 if (isalnum(*name) || *name == '.') {
69 scratch[i] = tolower((unsigned char)*name);
71 if (!*name || *name == ':' || i + 1 == ssizeof(scratch)) {
77 cp = mutt_canonical_charset_aux(scratch, strlen(scratch));
78 m_strcpy(dest, dlen, cp ? cp->pref : name);
81 static int mutt_chscmp(const char *s, const char *chs)
88 mutt_canonical_charset(buffer, sizeof(buffer), s);
89 return !strcmp(buffer, chs);
92 int mutt_is_utf8(const char *s)
94 return mutt_chscmp(s, "utf-8");
97 int mutt_is_us_ascii(const char *s)
99 return mutt_chscmp(s, "us-ascii");
104 * Like iconv_open, but canonicalises the charsets
107 iconv_t mutt_iconv_open (const char *tocode, const char *fromcode, int flags)
109 char tocode1[SHORT_STRING];
110 char fromcode1[SHORT_STRING];
111 char *tocode2, *fromcode2;
116 mutt_canonical_charset (tocode1, sizeof (tocode1), tocode);
118 #ifdef M_ICONV_HOOK_TO
120 if ((flags & M_ICONV_HOOK_TO) && (tmp = mutt_charset_hook (tocode1)))
121 mutt_canonical_charset (tocode1, sizeof (tocode1), tmp);
124 mutt_canonical_charset (fromcode1, sizeof (fromcode1), fromcode);
125 if ((flags & M_ICONV_HOOK_FROM) && (tmp = mutt_charset_hook (fromcode1)))
126 mutt_canonical_charset (fromcode1, sizeof (fromcode1), tmp);
128 if ((cd = iconv_open (tocode1, fromcode1)) != (iconv_t) - 1)
130 if ((tocode2 = mutt_iconv_hook (tocode1))
131 && (fromcode2 = mutt_iconv_hook (fromcode1)))
132 return iconv_open (tocode2, fromcode2);
134 return (iconv_t) - 1;
139 * Like iconv, but keeps going even when the input is invalid
140 * If you're supplying inrepls, the source charset should be stateless;
141 * if you're supplying an outrepl, the target charset should be.
144 ssize_t mutt_iconv(iconv_t cd, const char **inbuf, ssize_t *inbytesleft,
145 char **outbuf, ssize_t *outbytesleft,
146 const char **inrepls, const char *outrepl)
148 ssize_t ret = 0, ret1;
149 const char *ib = *inbuf;
150 ssize_t ibl = *inbytesleft;
152 ssize_t obl = *outbytesleft;
155 ret1 = my_iconv(cd, &ib, &ibl, &ob, &obl);
158 if (ibl && obl && errno == EILSEQ) {
160 /* Try replacing the input */
163 for (t = inrepls; *t; t++) {
164 const char *ib1 = *t;
165 ssize_t ibl1 = m_strlen(*t);
169 my_iconv(cd, &ib1, &ibl1, &ob1, &obl1);
172 ob = ob1, obl = obl1;
180 /* Replace the output */
183 my_iconv(cd, 0, 0, &ob, &obl);
185 ssize_t n = m_strlen(outrepl);
191 memcpy (ob, outrepl, n);
195 my_iconv(cd, 0, 0, 0, 0); /* for good measure */
199 *inbuf = ib, *inbytesleft = ibl;
200 *outbuf = ob, *outbytesleft = obl;
208 * Used in rfc2047.c and rfc2231.c
211 int mutt_convert_string (char **ps, const char *from, const char *to,
215 const char *repls[] = { "\357\277\275", "?", 0 };
221 if (to && from && (cd = mutt_iconv_open (to, from, flags)) != (iconv_t) - 1) {
226 const char **inrepls = NULL;
227 const char *outrepl = NULL;
229 if (mutt_is_utf8 (to))
230 outrepl = "\357\277\275";
231 else if (mutt_is_utf8 (from))
237 ib = s, ibl = len + 1;
238 obl = MB_LEN_MAX * ibl;
239 ob = buf = xmalloc(obl + 1);
241 mutt_iconv (cd, &ib, &ibl, &ob, &obl, inrepls, outrepl);
256 * FGETCONV stuff for converting a file while reading it
257 * Used in sendlib.c for converting from mutt's Charset
269 const char **inrepls;
272 struct fgetconv_not {
277 FGETCONV *fgetconv_open (FILE * file, const char *from, const char *to,
280 struct fgetconv_s *fc;
281 iconv_t cd = (iconv_t) - 1;
282 static const char *repls[] = { "\357\277\275", "?", 0 };
285 cd = mutt_iconv_open (to, from, flags);
287 if (cd != (iconv_t) - 1) {
288 fc = p_new(struct fgetconv_s, 1);
289 fc->p = fc->ob = fc->bufo;
292 fc->inrepls = mutt_is_utf8 (to) ? repls : repls + 1;
295 fc = p_new(struct fgetconv_s, 1);
298 return (FGETCONV *) fc;
301 char *fgetconvs (char *buf, ssize_t l, FGETCONV * _fc)
306 for (r = 0; r + 1 < l;) {
307 if ((c = fgetconv (_fc)) == EOF)
321 int fgetconv (FGETCONV * _fc)
323 struct fgetconv_s *fc = (struct fgetconv_s *) _fc;
327 if (fc->cd == (iconv_t) - 1)
328 return fgetc (fc->file);
332 return (unsigned char) *(fc->p)++;
334 /* Try to convert some more */
335 fc->p = fc->ob = fc->bufo;
337 ssize_t obl = ssizeof(fc->bufo);
339 my_iconv(fc->cd, (const char **) &fc->ib, &fc->ibl, &fc->ob, &obl);
341 return (unsigned char) *(fc->p)++;
344 /* If we trusted iconv a bit more, we would at this point
345 * ask why it had stopped converting ... */
347 /* Try to read some more */
348 if (fc->ibl == sizeof (fc->bufi) ||
349 (fc->ibl && fc->ib + fc->ibl < fc->bufi + sizeof (fc->bufi))) {
354 memcpy (fc->bufi, fc->ib, fc->ibl);
357 fread (fc->ib + fc->ibl, 1, sizeof (fc->bufi) - fc->ibl, fc->file);
359 /* Try harder this time to convert some */
361 ssize_t obl = ssizeof(fc->bufo);
363 mutt_iconv (fc->cd, (const char **) &fc->ib, &fc->ibl, &fc->ob,
364 &obl, fc->inrepls, 0);
366 return (unsigned char) *(fc->p)++;
369 /* Either the file has finished or one of the buffers is too small */
374 void fgetconv_close (FGETCONV ** _fc)
376 struct fgetconv_s *fc = (struct fgetconv_s *) *_fc;
378 if (fc->cd != (iconv_t) - 1)
379 iconv_close (fc->cd);
383 const char *mutt_get_first_charset (const char *charset)
385 static char fcharset[SHORT_STRING];
391 if (!(c1 = strchr (c, ':')))
392 return ((char*) charset);
393 m_strcpy(fcharset, c1 - c + 1, c);
397 static ssize_t convert_string (const char *f, ssize_t flen,
398 const char *from, const char *to,
399 char **t, ssize_t * tlen)
407 cd = mutt_iconv_open (to, from, 0);
408 if (cd == (iconv_t) (-1))
411 ob = buf = xmalloc(obl);
412 n = my_iconv(cd, &f, &flen, &ob, &obl);
413 if (n < 0 || my_iconv(cd, 0, 0, &ob, &obl) < 0) {
424 p_realloc(&buf, ob - buf + 1);
431 int mutt_convert_nonmime_string (char **ps)
435 for (c = AssumedCharset; c; c = c1 ? c1 + 1 : 0) {
440 ssize_t ulen = m_strlen(*ps);
446 c1 = strchr (c, ':');
447 n = c1 ? c1 - c : m_strlen(c);
450 fromcode = p_dupstr(c, n);
451 m = convert_string (u, ulen, fromcode, Charset, &s, &slen);
462 void mutt_set_charset (char *charset)
466 mutt_canonical_charset (buffer, sizeof (buffer), charset);
467 Charset_is_utf8 = !strcmp(buffer, "utf-8");
469 #ifdef HAVE_BIND_TEXTDOMAIN_CODESET
470 bind_textdomain_codeset (PACKAGE, buffer);
474 wchar_t replacement_char(void)
476 return Charset_is_utf8 ? 0xfffd : '?';