2 * Copyright notice from original mutt:
3 * Copyright (C) 1999-2000 Thomas Roessler <roessler@does-not-exist.org>
5 * This file is part of mutt-ng, see http://www.muttng.org/.
6 * It's licensed under the GNU General Public License,
7 * please see the file GPL in the top level source directory.
20 #include <sys/types.h>
24 #ifdef HAVE_LANGINFO_CODESET
25 # include <langinfo.h>
28 #include <lib-lib/mem.h>
29 #include <lib-lib/ascii.h>
30 #include <lib-lib/str.h>
31 #include <lib-lib/macros.h>
37 # define EILSEQ EINVAL
41 int Charset_is_utf8 = 0;
43 void mutt_set_langinfo_charset(void)
45 #ifdef HAVE_LANGINFO_CODESET
46 char buff[LONG_STRING];
47 char buff2[LONG_STRING];
49 m_strcpy(buff, sizeof(buff), nl_langinfo(CODESET));
50 mutt_canonical_charset(buff2, sizeof(buff2), buff);
52 /* finally, set $charset */
53 if (!(Charset = m_strdup(buff2)))
55 Charset = m_strdup("iso-8859-1");
58 #include "charset.gperf"
60 void mutt_canonical_charset(char *dest, ssize_t dlen, const char *name)
62 const struct cset_pair *cp;
63 char scratch[LONG_STRING];
66 // canonize name: only keep a-z0-9 and dots, put into lowercase
67 for (i = 0; i < ssizeof(scratch); i++) {
68 if (isalnum(*name) || *name == '.') {
69 scratch[i++] = tolower((unsigned char)*name);
72 if (!*name || *name == ':') {
78 cp = mutt_canonical_charset_aux(scratch, strlen(scratch));
79 m_strcpy(dest, dlen, cp ? cp->pref : name);
82 static int mutt_chscmp(const char *s, const char *chs)
89 mutt_canonical_charset(buffer, sizeof(buffer), s);
90 return !strcmp(buffer, chs);
93 int mutt_is_utf8(const char *s)
95 return mutt_chscmp(s, "utf-8");
98 int mutt_is_us_ascii(const char *s)
100 return mutt_chscmp(s, "us-ascii");
105 * Like iconv_open, but canonicalises the charsets
108 iconv_t mutt_iconv_open (const char *tocode, const char *fromcode, int flags)
110 char tocode1[SHORT_STRING];
111 char fromcode1[SHORT_STRING];
112 char *tocode2, *fromcode2;
117 mutt_canonical_charset (tocode1, sizeof (tocode1), tocode);
119 #ifdef M_ICONV_HOOK_TO
121 if ((flags & M_ICONV_HOOK_TO) && (tmp = mutt_charset_hook (tocode1)))
122 mutt_canonical_charset (tocode1, sizeof (tocode1), tmp);
125 mutt_canonical_charset (fromcode1, sizeof (fromcode1), fromcode);
126 if ((flags & M_ICONV_HOOK_FROM) && (tmp = mutt_charset_hook (fromcode1)))
127 mutt_canonical_charset (fromcode1, sizeof (fromcode1), tmp);
129 if ((cd = iconv_open (tocode1, fromcode1)) != (iconv_t) - 1)
131 if ((tocode2 = mutt_iconv_hook (tocode1))
132 && (fromcode2 = mutt_iconv_hook (fromcode1)))
133 return iconv_open (tocode2, fromcode2);
135 return (iconv_t) - 1;
140 * Like iconv, but keeps going even when the input is invalid
141 * If you're supplying inrepls, the source charset should be stateless;
142 * if you're supplying an outrepl, the target charset should be.
145 ssize_t mutt_iconv(iconv_t cd, const char **inbuf, ssize_t *inbytesleft,
146 char **outbuf, ssize_t *outbytesleft,
147 const char **inrepls, const char *outrepl)
149 ssize_t ret = 0, ret1;
150 const char *ib = *inbuf;
151 ssize_t ibl = *inbytesleft;
153 ssize_t obl = *outbytesleft;
156 ret1 = my_iconv(cd, &ib, &ibl, &ob, &obl);
159 if (ibl && obl && errno == EILSEQ) {
161 /* Try replacing the input */
164 for (t = inrepls; *t; t++) {
165 const char *ib1 = *t;
166 ssize_t ibl1 = m_strlen(*t);
170 my_iconv(cd, &ib1, &ibl1, &ob1, &obl1);
173 ob = ob1, obl = obl1;
181 /* Replace the output */
184 my_iconv(cd, 0, 0, &ob, &obl);
186 ssize_t n = m_strlen(outrepl);
192 memcpy (ob, outrepl, n);
196 my_iconv(cd, 0, 0, 0, 0); /* for good measure */
200 *inbuf = ib, *inbytesleft = ibl;
201 *outbuf = ob, *outbytesleft = obl;
209 * Used in rfc2047.c and rfc2231.c
212 int mutt_convert_string (char **ps, const char *from, const char *to,
216 const char *repls[] = { "\357\277\275", "?", 0 };
222 if (to && from && (cd = mutt_iconv_open (to, from, flags)) != (iconv_t) - 1) {
227 const char **inrepls = NULL;
228 const char *outrepl = NULL;
230 if (mutt_is_utf8 (to))
231 outrepl = "\357\277\275";
232 else if (mutt_is_utf8 (from))
238 ib = s, ibl = len + 1;
239 obl = MB_LEN_MAX * ibl;
240 ob = buf = xmalloc(obl + 1);
242 mutt_iconv (cd, &ib, &ibl, &ob, &obl, inrepls, outrepl);
257 * FGETCONV stuff for converting a file while reading it
258 * Used in sendlib.c for converting from mutt's Charset
270 const char **inrepls;
273 struct fgetconv_not {
278 FGETCONV *fgetconv_open (FILE * file, const char *from, const char *to,
281 struct fgetconv_s *fc;
282 iconv_t cd = (iconv_t) - 1;
283 static const char *repls[] = { "\357\277\275", "?", 0 };
286 cd = mutt_iconv_open (to, from, flags);
288 if (cd != (iconv_t) - 1) {
289 fc = p_new(struct fgetconv_s, 1);
290 fc->p = fc->ob = fc->bufo;
293 fc->inrepls = mutt_is_utf8 (to) ? repls : repls + 1;
296 fc = p_new(struct fgetconv_s, 1);
299 return (FGETCONV *) fc;
302 char *fgetconvs (char *buf, ssize_t l, FGETCONV * _fc)
307 for (r = 0; r + 1 < l;) {
308 if ((c = fgetconv (_fc)) == EOF)
322 int fgetconv (FGETCONV * _fc)
324 struct fgetconv_s *fc = (struct fgetconv_s *) _fc;
328 if (fc->cd == (iconv_t) - 1)
329 return fgetc (fc->file);
333 return (unsigned char) *(fc->p)++;
335 /* Try to convert some more */
336 fc->p = fc->ob = fc->bufo;
338 ssize_t obl = ssizeof(fc->bufo);
340 my_iconv(fc->cd, (const char **) &fc->ib, &fc->ibl, &fc->ob, &obl);
342 return (unsigned char) *(fc->p)++;
345 /* If we trusted iconv a bit more, we would at this point
346 * ask why it had stopped converting ... */
348 /* Try to read some more */
349 if (fc->ibl == sizeof (fc->bufi) ||
350 (fc->ibl && fc->ib + fc->ibl < fc->bufi + sizeof (fc->bufi))) {
355 memcpy (fc->bufi, fc->ib, fc->ibl);
358 fread (fc->ib + fc->ibl, 1, sizeof (fc->bufi) - fc->ibl, fc->file);
360 /* Try harder this time to convert some */
362 ssize_t obl = ssizeof(fc->bufo);
364 mutt_iconv (fc->cd, (const char **) &fc->ib, &fc->ibl, &fc->ob,
365 &obl, fc->inrepls, 0);
367 return (unsigned char) *(fc->p)++;
370 /* Either the file has finished or one of the buffers is too small */
375 void fgetconv_close (FGETCONV ** _fc)
377 struct fgetconv_s *fc = (struct fgetconv_s *) *_fc;
379 if (fc->cd != (iconv_t) - 1)
380 iconv_close (fc->cd);
384 const char *mutt_get_first_charset (const char *charset)
386 static char fcharset[SHORT_STRING];
392 if (!(c1 = strchr (c, ':')))
393 return ((char*) charset);
394 m_strcpy(fcharset, c1 - c + 1, c);
398 static ssize_t convert_string (const char *f, ssize_t flen,
399 const char *from, const char *to,
400 char **t, ssize_t * tlen)
408 cd = mutt_iconv_open (to, from, 0);
409 if (cd == (iconv_t) (-1))
412 ob = buf = xmalloc(obl);
413 n = my_iconv(cd, &f, &flen, &ob, &obl);
414 if (n < 0 || my_iconv(cd, 0, 0, &ob, &obl) < 0) {
425 p_realloc(&buf, ob - buf + 1);
432 int mutt_convert_nonmime_string (char **ps)
436 for (c = AssumedCharset; c; c = c1 ? c1 + 1 : 0) {
441 ssize_t ulen = m_strlen(*ps);
447 c1 = strchr (c, ':');
448 n = c1 ? c1 - c : m_strlen(c);
451 fromcode = p_dupstr(c, n);
452 m = convert_string (u, ulen, fromcode, Charset, &s, &slen);
463 void mutt_set_charset (char *charset)
467 mutt_canonical_charset (buffer, sizeof (buffer), charset);
468 Charset_is_utf8 = !strcmp(buffer, "utf-8");
470 #ifdef HAVE_BIND_TEXTDOMAIN_CODESET
471 bind_textdomain_codeset (PACKAGE, buffer);
475 wchar_t replacement_char(void)
477 return Charset_is_utf8 ? 0xfffd : '?';