2 * Copyright notice from original mutt:
3 * Copyright (C) 1999-2000 Thomas Roessler <roessler@does-not-exist.org>
5 * This file is part of mutt-ng, see http://www.muttng.org/.
6 * It's licensed under the GNU General Public License,
7 * please see the file GPL in the top level source directory.
20 #include <sys/types.h>
24 #ifdef HAVE_LANGINFO_CODESET
25 # include <langinfo.h>
28 #include <lib-lib/mem.h>
29 #include <lib-lib/ascii.h>
30 #include <lib-lib/str.h>
31 #include <lib-lib/macros.h>
37 # define EILSEQ EINVAL
41 int Charset_is_utf8 = 0;
43 void charset_initialize(void)
45 #ifdef HAVE_LANGINFO_CODESET
46 char buff[LONG_STRING];
47 char buff2[LONG_STRING];
49 m_strcpy(buff, sizeof(buff), nl_langinfo(CODESET));
50 mutt_canonical_charset(buff2, sizeof(buff2), buff);
52 /* finally, set $charset */
53 if (!m_strisempty(buff2)) {
54 m_strreplace(&Charset, buff2);
57 m_strreplace(&Charset, "iso-8859-1");
59 Charset_is_utf8 = !strcmp(Charset, "utf-8");
60 #ifdef HAVE_BIND_TEXTDOMAIN_CODESET
61 bind_textdomain_codeset(PACKAGE, Charset);
66 #include "charset.gperf"
68 void mutt_canonical_charset(char *dest, ssize_t dlen, const char *name)
70 const struct cset_pair *cp;
71 char scratch[LONG_STRING];
75 // canonize name: only keep a-z0-9 and dots, put into lowercase
76 for (p = name; *p && *p != ':' && i < ssizeof(scratch) - 1; p++) {
77 if (isalnum(*p) || *p== '.') {
78 scratch[i++] = tolower((unsigned char)*p);
83 cp = mutt_canonical_charset_aux(scratch, strlen(scratch));
85 m_strcpy(dest, dlen, cp->pref);
87 m_strcpy(dest, dlen, name);
92 static int mutt_chscmp(const char *s, const char *chs)
99 mutt_canonical_charset(buffer, sizeof(buffer), s);
100 return !strcmp(buffer, chs);
103 int charset_is_utf8(const char *s)
105 return mutt_chscmp(s, "utf-8");
108 int charset_is_us_ascii(const char *s)
110 return mutt_chscmp(s, "us-ascii");
115 * Like iconv_open, but canonicalises the charsets
118 iconv_t mutt_iconv_open (const char *tocode, const char *fromcode, int flags)
120 char tocode1[SHORT_STRING];
121 char fromcode1[SHORT_STRING];
122 char *tocode2, *fromcode2;
127 mutt_canonical_charset (tocode1, sizeof (tocode1), tocode);
129 #ifdef M_ICONV_HOOK_TO
131 if ((flags & M_ICONV_HOOK_TO) && (tmp = mutt_charset_hook (tocode1)))
132 mutt_canonical_charset (tocode1, sizeof (tocode1), tmp);
135 mutt_canonical_charset (fromcode1, sizeof (fromcode1), fromcode);
136 if ((flags & M_ICONV_HOOK_FROM) && (tmp = mutt_charset_hook (fromcode1)))
137 mutt_canonical_charset (fromcode1, sizeof (fromcode1), tmp);
139 if ((cd = iconv_open (tocode1, fromcode1)) != (iconv_t) - 1)
141 if ((tocode2 = mutt_iconv_hook (tocode1))
142 && (fromcode2 = mutt_iconv_hook (fromcode1)))
143 return iconv_open (tocode2, fromcode2);
145 return (iconv_t) - 1;
150 * Like iconv, but keeps going even when the input is invalid
151 * If you're supplying inrepls, the source charset should be stateless;
152 * if you're supplying an outrepl, the target charset should be.
155 ssize_t mutt_iconv(iconv_t cd, const char **inbuf, ssize_t *inbytesleft,
156 char **outbuf, ssize_t *outbytesleft,
157 const char **inrepls, const char *outrepl)
159 ssize_t ret = 0, ret1;
160 const char *ib = *inbuf;
161 ssize_t ibl = *inbytesleft;
163 ssize_t obl = *outbytesleft;
166 ret1 = my_iconv(cd, &ib, &ibl, &ob, &obl);
169 if (ibl && obl && errno == EILSEQ) {
171 /* Try replacing the input */
174 for (t = inrepls; *t; t++) {
175 const char *ib1 = *t;
176 ssize_t ibl1 = m_strlen(*t);
180 my_iconv(cd, &ib1, &ibl1, &ob1, &obl1);
183 ob = ob1, obl = obl1;
191 /* Replace the output */
194 my_iconv(cd, 0, 0, &ob, &obl);
196 ssize_t n = m_strlen(outrepl);
202 memcpy (ob, outrepl, n);
206 my_iconv(cd, 0, 0, 0, 0); /* for good measure */
210 *inbuf = ib, *inbytesleft = ibl;
211 *outbuf = ob, *outbytesleft = obl;
219 * Used in rfc2047.c and rfc2231.c
222 int mutt_convert_string (char **ps, const char *from, const char *to,
226 const char *repls[] = { "\357\277\275", "?", 0 };
232 if (to && from && (cd = mutt_iconv_open (to, from, flags)) != (iconv_t) - 1) {
237 const char **inrepls = NULL;
238 const char *outrepl = NULL;
240 if (charset_is_utf8 (to))
241 outrepl = "\357\277\275";
242 else if (charset_is_utf8 (from))
248 ib = s, ibl = len + 1;
249 obl = MB_LEN_MAX * ibl;
250 ob = buf = xmalloc(obl + 1);
252 mutt_iconv (cd, &ib, &ibl, &ob, &obl, inrepls, outrepl);
267 * FGETCONV stuff for converting a file while reading it
268 * Used in sendlib.c for converting from mutt's Charset
280 const char **inrepls;
283 struct fgetconv_not {
288 FGETCONV *fgetconv_open (FILE * file, const char *from, const char *to,
291 struct fgetconv_s *fc;
292 iconv_t cd = (iconv_t) - 1;
293 static const char *repls[] = { "\357\277\275", "?", 0 };
296 cd = mutt_iconv_open (to, from, flags);
298 if (cd != (iconv_t) - 1) {
299 fc = p_new(struct fgetconv_s, 1);
300 fc->p = fc->ob = fc->bufo;
303 fc->inrepls = charset_is_utf8 (to) ? repls : repls + 1;
306 fc = p_new(struct fgetconv_s, 1);
309 return (FGETCONV *) fc;
312 char *fgetconvs (char *buf, ssize_t l, FGETCONV * _fc)
317 for (r = 0; r + 1 < l;) {
318 if ((c = fgetconv (_fc)) == EOF)
332 int fgetconv (FGETCONV * _fc)
334 struct fgetconv_s *fc = (struct fgetconv_s *) _fc;
338 if (fc->cd == (iconv_t) - 1)
339 return fgetc (fc->file);
343 return (unsigned char) *(fc->p)++;
345 /* Try to convert some more */
346 fc->p = fc->ob = fc->bufo;
348 ssize_t obl = ssizeof(fc->bufo);
350 my_iconv(fc->cd, (const char **) &fc->ib, &fc->ibl, &fc->ob, &obl);
352 return (unsigned char) *(fc->p)++;
355 /* If we trusted iconv a bit more, we would at this point
356 * ask why it had stopped converting ... */
358 /* Try to read some more */
359 if (fc->ibl == sizeof (fc->bufi) ||
360 (fc->ibl && fc->ib + fc->ibl < fc->bufi + sizeof (fc->bufi))) {
365 memcpy (fc->bufi, fc->ib, fc->ibl);
368 fread (fc->ib + fc->ibl, 1, sizeof (fc->bufi) - fc->ibl, fc->file);
370 /* Try harder this time to convert some */
372 ssize_t obl = ssizeof(fc->bufo);
374 mutt_iconv (fc->cd, (const char **) &fc->ib, &fc->ibl, &fc->ob,
375 &obl, fc->inrepls, 0);
377 return (unsigned char) *(fc->p)++;
380 /* Either the file has finished or one of the buffers is too small */
385 void fgetconv_close (FGETCONV ** _fc)
387 struct fgetconv_s *fc = (struct fgetconv_s *) *_fc;
389 if (fc->cd != (iconv_t) - 1)
390 iconv_close (fc->cd);
394 const char *mutt_get_first_charset (const char *charset)
396 static char fcharset[SHORT_STRING];
402 if (!(c1 = strchr (c, ':')))
403 return ((char*) charset);
404 m_strcpy(fcharset, c1 - c + 1, c);
408 static ssize_t convert_string (const char *f, ssize_t flen,
409 const char *from, const char *to,
410 char **t, ssize_t * tlen)
418 cd = mutt_iconv_open (to, from, 0);
419 if (cd == (iconv_t) (-1))
422 ob = buf = xmalloc(obl);
423 n = my_iconv(cd, &f, &flen, &ob, &obl);
424 if (n < 0 || my_iconv(cd, 0, 0, &ob, &obl) < 0) {
435 p_realloc(&buf, ob - buf + 1);
442 int mutt_convert_nonmime_string (char **ps)
446 for (c = AssumedCharset; c; c = c1 ? c1 + 1 : 0) {
451 ssize_t ulen = m_strlen(*ps);
457 c1 = strchr (c, ':');
458 n = c1 ? c1 - c : m_strlen(c);
461 fromcode = p_dupstr(c, n);
462 m = convert_string (u, ulen, fromcode, Charset, &s, &slen);
473 wchar_t replacement_char(void)
475 return Charset_is_utf8 ? 0xfffd : '?';