* please see the file GPL in the top level source directory.
*/
-#include <lib-lib/mem.h>
-#include <lib-lib/str.h>
-#include <lib-lib/ascii.h>
+#include <lib-lib/lib-lib.h>
#include <lib-mime/mime.h>
-#include "mutt.h"
#include "charset.h"
#include "thread.h"
-#include <assert.h>
-#include <ctype.h>
-#include <errno.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
/* If you are debugging this file, comment out the following line. */
/*#define NDEBUG*/
#define ENCWORD_LEN_MIN 9 /* m_strlen("=?.?.?.?=") */
#define HSPACE(x) ((x) == '\0' || (x) == ' ' || (x) == '\t')
-
#define CONTINUATION_BYTE(c) (((c) & 0xc0) == 0x80)
/* converts f of len flen and charset from
cd = mutt_iconv_open(to, from, 0);
- if (cd == (iconv_t)(-1))
+ if (cd == MUTT_ICONV_ERROR)
return -1;
obl = 4 * flen + 1;
const char *p = charsets;
while (*p) {
- char cset[SHORT_STRING];
+ char cset[STRING];
const char *q;
char *s;
ssize_t slen, n;
p_delete(&res);
}
- mutt_canonical_charset(buf, sizeof(buf), tocode);
+ charset_canonicalize(buf, sizeof(buf), tocode);
m_strreplace(&tocode, buf);
}
* tocode, unless fromcode is 0, in which case the data is assumed to
* be already in tocode, which should be 8-bit and stateless.
*/
-static size_t try_block (const char *d, ssize_t dlen,
- const char *fromcode, const char *tocode,
- encoder_t **encoder, ssize_t *wlen)
+static size_t try_block(const char *d, ssize_t dlen,
+ const char *fromcode, const char *tocode,
+ encoder_t **encoder, ssize_t *wlen)
{
char buf1[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1];
ssize_t obl = sizeof(buf1) - m_strlen(tocode);
ssize_t ibl = dlen;
iconv_t cd = mutt_iconv_open(tocode, fromcode, 0);
- assert (cd != (iconv_t)(-1));
+ assert (cd != MUTT_ICONV_ERROR);
ob = buf1;
len_q = len + (ob - buf1) + 2 * count;
/* Apparently RFC 1468 says to use B encoding for iso-2022-jp. */
- if (!ascii_strcasecmp(tocode, "ISO-2022-JP"))
+ if (mime_which_token(tocode, -1) == MIME_ISO_2022_JP)
len_q = ENCWORD_LEN_MAX + 1;
if (len_b < len_q && len_b <= ENCWORD_LEN_MAX) {
* Encode the data (d, dlen) into s using the encoder.
* Return the length of the encoded word.
*/
-static size_t encode_block (char *s, char *d, ssize_t dlen,
- const char *fromcode, const char *tocode,
- encoder_t *encoder)
+static size_t
+encode_block(char *s, char *d, ssize_t dlen,
+ const char *fromcode, const char *tocode, encoder_t *encoder)
{
char buf1[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1];
ssize_t ibl, obl, n1, n2;
char *ob;
if (fromcode) {
- cd = mutt_iconv_open (tocode, fromcode, 0);
- assert (cd != (iconv_t) (-1));
- ib = d, ibl = dlen, ob = buf1, obl = sizeof (buf1) - m_strlen(tocode);
+ cd = mutt_iconv_open(tocode, fromcode, 0);
+ assert (cd != MUTT_ICONV_ERROR);
+ ib = d, ibl = dlen, ob = buf1, obl = sizeof(buf1) - m_strlen(tocode);
n1 = my_iconv(cd, &ib, &ibl, &ob, &obl);
n2 = my_iconv(cd, 0, 0, &ob, &obl);
assert (n1 >= 0 && n2 >= 0);
iconv_close (cd);
- return (*encoder) (s, buf1, ob - buf1, tocode);
+ return (*encoder)(s, buf1, ob - buf1, tocode);
} else {
- return (*encoder) (s, d, dlen, tocode);
+ return (*encoder)(s, d, dlen, tocode);
}
}
encoder_t **encoder, ssize_t *wlen)
{
size_t n, nn;
- int utf8 = fromcode && !ascii_strcasecmp (fromcode, "UTF-8");
+ int utf8 = mime_which_token(fromcode, -1) == MIME_UTF_8;
n = dlen;
for (;;) {
assert (d + n > d);
- nn = try_block (d, n, fromcode, tocode, encoder, wlen);
+ nn = try_block(d, n, fromcode, tocode, encoder, wlen);
if (!nn && (col + *wlen <= ENCWORD_LEN_MAX + 1 || n <= 1))
break;
n = (nn ? nn : n) - 1;
assert (n > 0);
- if (utf8)
- while (n > 1 && CONTINUATION_BYTE (d[n]))
+ if (utf8) {
+ while (n > 1 && CONTINUATION_BYTE(d[n]))
--n;
+ }
}
return n;
}
* The input data is assumed to be a single line starting at column col;
* if col is non-zero, the preceding character was a space.
*/
-static int rfc2047_encode (const char *d, ssize_t dlen, int col,
- const char *fromcode, const char *charsets,
- char **e, ssize_t *elen, const char *specials)
+/*** XXX: simplify that one day ***/
+static int rfc2047_encode(const char *d, ssize_t dlen, int col,
+ const char *fromcode, const char *charsets,
+ char **e, ssize_t *elen, const char *specials)
{
int ret = 0;
char *buf;
ssize_t bufpos, buflen;
- char *u, *t0, *t1, *t;
- char *s0, *s1;
- ssize_t ulen, r, n, wlen;
- encoder_t *encoder;
+ char *u, *t;
+ char *s0, *s1, *t0, *t1;
char *tocode1 = 0;
const char *tocode;
const char *icode = "UTF-8";
+ ssize_t ulen, r, n, wlen;
+ encoder_t *encoder;
/* Try to convert to UTF-8. */
if (convert_string(fromcode, d, dlen, icode, &u, &ulen)) {
ret = 1;
- icode = 0;
+ icode = NULL;
u = p_dupstr(d, ulen = dlen);
}
/* Find earliest and latest things we must encode. */
- s0 = s1 = t0 = t1 = 0;
+ s0 = s1 = t0 = t1 = NULL;
for (t = u; t < u + ulen; t++) {
if ((*t & 0x80) ||
(*t == '=' && t[1] == '?' && (t == u || HSPACE (*(t - 1))))) {
}
/* Hack to avoid labelling 8-bit data as us-ascii. */
- if (!icode && mutt_is_us_ascii (tocode))
+ if (!icode && charset_is_us_ascii(tocode))
tocode = "unknown-8bit";
/* Adjust t0 for maximum length of line. */
/* Adjust t0 until we can encode a character after a space. */
for (; t0 > u; t0--) {
- if (!HSPACE (*(t0 - 1)))
+ if (!HSPACE(t0[-1]))
continue;
t = t0 + 1;
- if (icode)
- while (t < u + ulen && CONTINUATION_BYTE (*t))
+ if (icode) {
+ while (t < u + ulen && CONTINUATION_BYTE(*t))
++t;
- if (!try_block (t0, t - t0, icode, tocode, &encoder, &wlen) &&
- col + (t0 - u) + wlen <= ENCWORD_LEN_MAX + 1)
+ }
+ if (!try_block(t0, t - t0, icode, tocode, &encoder, &wlen)
+ && col + (t0 - u) + wlen <= ENCWORD_LEN_MAX + 1)
break;
}
/* Adjust t1 until we can encode a character before a space. */
for (; t1 < u + ulen; t1++) {
- if (!HSPACE (*t1))
+ if (!HSPACE(*t1))
continue;
t = t1 - 1;
- if (icode)
- while (CONTINUATION_BYTE (*t))
+ if (icode) {
+ while (CONTINUATION_BYTE(*t))
--t;
- if (!try_block (t, t1 - t, icode, tocode, &encoder, &wlen) &&
- 1 + wlen + (u + ulen - t1) <= ENCWORD_LEN_MAX + 1)
+ }
+ if (!try_block (t, t1 - t, icode, tocode, &encoder, &wlen)
+ && 1 + wlen + (u + ulen - t1) <= ENCWORD_LEN_MAX + 1)
break;
}
buflen = 2 * ulen;
buf = p_new(char, buflen);
bufpos = t0 - u;
- memcpy (buf, u, t0 - u);
+ memcpy(buf, u, t0 - u);
col += t0 - u;
/* Add to output buffer. */
#define LINEBREAK "\n\t"
- if (bufpos + wlen + m_strlen(LINEBREAK) > buflen) {
- buflen = bufpos + wlen + m_strlen(LINEBREAK);
+ if (bufpos + wlen + 2 > buflen) {
+ buflen = bufpos + wlen + 2;
p_realloc(&buf, buflen);
}
r = encode_block (buf + bufpos, t, n, icode, tocode, encoder);
return ret;
}
-void _rfc2047_encode_string (char **pd, int encode_specials, int col)
+
+static void _rfc2047_encode_string(char **pd, int encode_specials, int col)
{
char *e;
ssize_t elen;
const char *charsets;
- if (!Charset || !*pd)
+ if (!mod_cset.charset || !*pd)
return;
- charsets = SendCharset;
- if (!charsets || !*charsets)
- charsets = "UTF-8";
+ charsets = m_strisempty(mod_cset.send_charset) ? "utf-8" : mod_cset.send_charset;
- rfc2047_encode (*pd, m_strlen(*pd), col,
- Charset, charsets, &e, &elen,
- encode_specials ? RFC822Specials : NULL);
+ rfc2047_encode(*pd, m_strlen(*pd), col,
+ mod_cset.charset, charsets, &e, &elen,
+ encode_specials ? RFC822Specials : NULL);
p_delete(pd);
*pd = e;
_rfc2047_encode_string(pd, 0, 32);
}
-void rfc2047_encode_adrlist (address_t * addr, const char *tag)
+void rfc2047_encode_adrlist(address_t *addr, const char *tag)
{
address_t *ptr = addr;
int col = tag ? m_strlen(tag) + 2 : 32;
while (ptr) {
if (ptr->personal)
- _rfc2047_encode_string (&ptr->personal, 1, col);
+ _rfc2047_encode_string(&ptr->personal, 1, col);
ptr = ptr->next;
}
}
-static int rfc2047_decode_word (char *d, const char *s, size_t len)
+
+/****************************************************************************/
+/* Decoding functions */
+/****************************************************************************/
+
+/* decode one word into d[len] =?cst?[QB]?....?= */
+static int
+rfc2047_decode_word(char *d, size_t len, const char *p, const char *end)
{
- const char *pp, *pp1;
- char *pd, *d0;
- const char *t, *t1;
- int enc = 0, count = 0;
- char *charset = NULL;
+ char charset[STRING] = "";
+ const char *t;
+ char *q, *d0 = NULL;
+ int enc = 0;
- pd = d0 = p_new(char, m_strlen(s));
+ p += 2; /* =? */
- for (pp = s; (pp1 = strchr (pp, '?')); pp = pp1 + 1) {
- count++;
- switch (count) {
- case 2:
- /* ignore language specification a la RFC 2231 */
- t = pp1;
- if ((t1 = memchr (pp, '*', t - pp)))
- t = t1;
- charset = p_dupstr(pp, t - pp);
- break;
- case 3:
- if (toupper ((unsigned char) *pp) == 'Q')
- enc = ENCQUOTEDPRINTABLE;
- else if (toupper ((unsigned char) *pp) == 'B')
- enc = ENCBASE64;
- else {
- p_delete(&charset);
- p_delete(&d0);
- return (-1);
- }
- break;
- case 4:
- if (enc == ENCQUOTEDPRINTABLE) {
- for (; pp < pp1; pp++) {
- if (*pp == '_')
- *pd++ = ' ';
- else if (*pp == '=' && hexval(pp[1]) >= 0 && hexval(pp[2]) >= 0) {
- *pd++ = (hexval (pp[1]) << 4) | hexval (pp[2]);
- pp += 2;
- }
- else
- *pd++ = *pp;
- }
- *pd = 0;
+ t = strchr(p, '?');
+ if (!t)
+ return -1;
+ m_strncpy(charset, sizeof(charset), p, t - p);
+
+ switch (t[1]) {
+ case 'q': case 'Q':
+ enc = ENCQUOTEDPRINTABLE;
+ break;
+
+ case 'b': case 'B':
+ enc = ENCBASE64;
+ break;
+
+ default:
+ return -1;
+ }
+
+ if (t[2] != '?')
+ return -1;
+
+ p = t + 3; /* skip ?[QB]? */
+ d0 = q = p_new(char, end - p + 1); /* it's enough space to decode */
+
+ if (enc == ENCQUOTEDPRINTABLE) {
+ while (p < end - 2) {
+ if (*p == '=' && hexval(p[1]) >= 0 && hexval(p[2]) >= 0) {
+ *q++ = (hexval (p[1]) << 4) | hexval (p[2]);
+ p += 3;
+ } else
+ if (*p == '_') {
+ *q++ = ' ';
+ p++;
+ } else {
+ *q++ = *p++;
}
- else if (enc == ENCBASE64) {
- int c, b = 0, k = 0;
-
- for (; pp < pp1; pp++) {
- if (*pp == '=')
- break;
- if ((c = base64val(*pp)) < 0)
- continue;
- if (k + 6 >= 8) {
- k -= 2;
- *pd++ = b | (c >> k);
- b = c << (8 - k);
- }
- else {
- b |= c << (k + 2);
- k += 6;
- }
- }
- *pd = 0;
+ }
+ } else { /* enc == ENCBASE64 */
+ int c, b = 0, k = 0;
+
+ while (p < end - 2) {
+ if (*p == '=')
+ break;
+
+ c = base64val(*p++);
+ if (c < 0)
+ continue;
+
+ if (k + 6 >= 8) {
+ k -= 2;
+ *q++ = b | (c >> k);
+ b = c << (8 - k);
+ } else {
+ b |= c << (k + 2);
+ k += 6;
}
- break;
}
}
+ *q = '\0';
- if (charset)
- mutt_convert_string (&d0, charset, Charset, M_ICONV_HOOK_FROM);
+ if (*charset)
+ mutt_convert_string(&d0, charset, mod_cset.charset, M_ICONV_HOOK_FROM);
m_strcpy(d, len, d0);
- p_delete(&charset);
p_delete(&d0);
- return (0);
+ return 0;
}
/*
* must be B or Q. Also, we don't require the encoded word to be
* separated by linear-white-space (section 5(1)).
*/
-static const char *find_encoded_word (const char *s, const char **x)
+static const char *find_encoded_word(const char *s, const char **x)
{
- const char *p, *q;
+ const char *p;
- q = s;
- while ((p = strstr (q, "=?"))) {
- for (q = p + 2;
- 0x20 < *q && *q < 0x7f && !strchr ("()<>@,;:\"/[]?.=", *q); q++);
- if (q[0] != '?' || !strchr ("BbQq", q[1]) || q[2] != '?')
+ while ((p = strstr(s, "=?"))) {
+ s = p + 2;
+ while (0x20 < *s && *s < 0x7f && !strchr ("()<>@,;:\"/[]?.=", *s)) {
+ s++;
+ }
+
+ if (s[0] != '?' || !strchr("BbQq", s[1]) || s[2] != '?')
continue;
- for (q = q + 3; 0x20 <= *q && *q < 0x7f && *q != '?'; q++);
- if (q[0] != '?' || q[1] != '=') {
- --q;
+
+ s += 3;
+ while (0x20 <= *s && *s < 0x7f && (*s != '?' || s[1] != '=')) {
+ s++;
+ }
+
+ if (s[0] != '?' || s[1] != '=') {
+ --s;
continue;
}
- *x = q + 2;
+ *x = s + 2;
return p;
}
- return 0;
+ return NULL;
}
/* return length of linear white space */
-static size_t lwslen (const char *s, size_t n)
+static ssize_t lwslen(const char *s, ssize_t n)
{
- const char *p = s;
- size_t len = n;
+ const char *p;
+ ssize_t len = n;
if (n <= 0)
return 0;
- for (; p < s + n; p++)
+ for (p = s; p < s + n; p++) {
if (!strchr (" \t\r\n", *p)) {
- len = (size_t) (p - s);
+ len = p - s;
break;
}
- if (strchr ("\r\n", *(p - 1))) /* LWS doesn't end with CRLF */
- len = (size_t) 0;
+ }
+
+ if (p[-1] == '\r' || p[-1] == '\n') /* LWS cannot end with CRLF */
+ return 0;
+
return len;
}
/* return length of linear white space : reverse */
-static size_t lwsrlen (const char *s, size_t n)
+static ssize_t lwsrlen(const char *s, ssize_t n)
{
const char *p = s + n - 1;
size_t len = n;
if (n <= 0)
return 0;
- if (strchr ("\r\n", *p)) /* LWS doesn't end with CRLF */
- return (size_t) 0;
+ if (*p == '\r' || *p == '\n') /* LWS doesn't end with CRLF */
+ return 0;
- for (; p >= s; p--)
- if (!strchr (" \t\r\n", *p)) {
- len = (size_t) (s + n - 1 - p);
+ while (p >= s) {
+ if (!strchr(" \t\r\n", *p)) {
+ len = s + n - 1 - p;
break;
}
+ p--;
+ }
+
return len;
}
/* try to decode anything that looks like a valid RFC2047 encoded
* header field, ignoring RFC822 parsing rules
*/
-void rfc2047_decode (char **pd)
+void rfc2047_decode(char **pd)
{
- const char *p, *q;
- size_t m, n;
- int found_encoded = 0;
- char *d0, *d;
const char *s = *pd;
- size_t dlen;
+ char *d0, *d;
+ ssize_t dlen;
+ int found_encoded = 0;
if (!s || !*s)
return;
d = d0 = p_new(char, dlen + 1);
while (*s && dlen > 0) {
- if (!(p = find_encoded_word (s, &q))) {
+ const char *p, *q;
+
+ p = find_encoded_word(s, &q);
+
+ if (!p) {
/* no encoded words */
- if (!option (OPTSTRICTMIME)) {
- n = m_strlen(s);
- if (found_encoded && (m = lwslen (s, n)) != 0) {
- if (m != n)
- *d = ' ', d++, dlen--;
- n -= m, s += m;
- }
- if (ascii_strcasecmp (AssumedCharset, "us-ascii")) {
- char *t;
- ssize_t tlen;
-
- t = p_dupstr(s, n);
- if (mutt_convert_nonmime_string (&t) == 0) {
- tlen = m_strlen(t);
- strncpy (d, t, tlen);
- d += tlen;
- }
- else {
- strncpy (d, s, n);
- d += n;
- }
- p_delete(&t);
- break;
+ ssize_t m, n;
+
+ n = m_strlen(s);
+ if (found_encoded && (m = lwslen(s, n)) != 0) {
+ if (m != n)
+ *d++ = ' ', dlen--;
+ n -= m, s += m;
+ }
+
+ if (mime_which_token(mod_cset.assumed_charset, -1) == MIME_US_ASCII) {
+ char *t;
+
+ t = p_dupstr(s, n);
+ if (mutt_convert_nonmime_string(&t) == 0) {
+ d += m_strcpy(d, dlen, t);
+ } else {
+ d += m_strcpy(d, dlen, s);
}
+ p_delete(&t);
+ break;
}
- strncpy (d, s, dlen);
- d += dlen;
+
+ d += m_strcpy(d, dlen, s);
break;
}
if (p != s) {
+ ssize_t m, n;
+
n = (p - s);
/* ignore spaces between encoded words
* and linear white spaces between encoded word and *text */
- if (!option (OPTSTRICTMIME)) {
- if (found_encoded && (m = lwslen (s, n)) != 0) {
- if (m != n)
- *d = ' ', d++, dlen--;
- n -= m, s += m;
- }
-
- if ((m = n - lwsrlen (s, n)) != 0) {
- if (m > dlen)
- m = dlen;
- memcpy (d, s, m);
- d += m;
- dlen -= m;
- if (m != n)
- *d = ' ', d++, dlen--;
- }
+ if (found_encoded && (m = lwslen(s, n)) != 0) {
+ if (m != n)
+ *d++ = ' ', dlen--;
+ n -= m, s += m;
}
- else if (!found_encoded || strspn (s, " \t\r\n") != n) {
- if (n > dlen)
- n = dlen;
- memcpy (d, s, n);
- d += n;
- dlen -= n;
+
+ if ((m = n - lwsrlen(s, n)) != 0) {
+ m = m_strncpy(d, dlen, s, m);
+ d += m;
+ dlen -= m;
+ if (m != n)
+ *d++ = ' ', dlen--;
}
}
- rfc2047_decode_word (d, p, dlen);
+ rfc2047_decode_word(d, dlen, p, q);
found_encoded = 1;
s = q;
- n = m_strlen(d);
- dlen -= n;
- d += n;
+ while (*d && dlen)
+ d++, dlen--;
}
- *d = 0;
p_delete(pd);
*pd = d0;
- str_adjust (pd);
}
void rfc2047_decode_adrlist(address_t *a)