From: ak1 Date: Wed, 23 Feb 2005 10:33:00 +0000 (+0000) Subject: Andreas Krennmair: X-Git-Url: http://git.madism.org/?p=apps%2Fmadmutt.git;a=commitdiff_plain;h=fc9c62aae0a7ebf1ff9335e322e7c1f5d3e459d3 Andreas Krennmair: integrated assume_charset patch from http://www.emaillab.org/mutt/download15.html.en git-svn-id: svn://svn.berlios.de/mutt-ng/trunk@76 e385b8ad-14ed-0310-8656-cc95a2468c6d --- diff --git a/ChangeLog.mutt-ng b/ChangeLog.mutt-ng index c3c4800..2eceb6d 100644 --- a/ChangeLog.mutt-ng +++ b/ChangeLog.mutt-ng @@ -1,5 +1,8 @@ Changes specific to mutt-ng: +2005-02-23: + * Integrated assume_charset patch from http://www.emaillab.org/mutt/download15.html.en + 2005-02-22: * Merged mutt changes * Sidebar now honors the imap_home_namespace diff --git a/PATCHES b/PATCHES index 1cf4aef..20de3c3 100644 --- a/PATCHES +++ b/PATCHES @@ -1,3 +1,4 @@ +patch-1.5.6.tt.assumed_charset.1 patch-1.5.6.tg.hcache.12 patch-1.5.5.1.pdmef.short_mbox_name.1 rr.compressed diff --git a/charset.c b/charset.c index 4dc0f3b..e17cca7 100644 --- a/charset.c +++ b/charset.c @@ -591,3 +591,86 @@ void fgetconv_close (FGETCONV **_fc) iconv_close (fc->cd); FREE (_fc); } + +char *mutt_get_first_charset (const char *charset) +{ + static char fcharset[SHORT_STRING]; + const char *c, *c1; + + c = charset; + if (!mutt_strlen(c)) + return "us-ascii"; + if (!(c1 = strchr (c, ':'))) + return charset; + strfcpy (fcharset, c, c1 - c + 1); + return fcharset; +} + +static size_t convert_string (ICONV_CONST char *f, size_t flen, + const char *from, const char *to, + char **t, size_t *tlen) +{ + iconv_t cd; + char *buf, *ob; + size_t obl, n; + int e; + + cd = mutt_iconv_open (to, from, 0); + if (cd == (iconv_t)(-1)) + return (size_t)(-1); + obl = 4 * flen + 1; + ob = buf = safe_malloc (obl); + n = iconv (cd, &f, &flen, &ob, &obl); + if (n == (size_t)(-1) || iconv (cd, 0, 0, &ob, &obl) == (size_t)(-1)) + { + e = errno; + FREE (&buf); + iconv_close (cd); + errno = e; + return (size_t)(-1); + } + *ob = '\0'; + + *tlen = ob - buf; + + safe_realloc ((void **) &buf, ob - buf + 1); + *t = buf; + iconv_close (cd); + + return n; +} + +int mutt_convert_nonmime_string (char **ps) +{ + const char *c, *c1; + + for (c = AssumedCharset; c; c = c1 ? c1 + 1 : 0) + { + char *u = *ps; + char *s; + char *fromcode; + size_t m, n; + size_t ulen = mutt_strlen (*ps); + size_t slen; + + if (!u || !*u) + return 0; + + c1 = strchr (c, ':'); + n = c1 ? c1 - c : mutt_strlen (c); + if (!n) + continue; + fromcode = safe_malloc (n + 1); + strfcpy (fromcode, c, n + 1); + m = convert_string (u, ulen, fromcode, Charset, &s, &slen); + FREE (&fromcode); + if (m != (size_t)(-1)) + { + FREE (ps); + *ps = s; + return 0; + } + } + return -1; +} + diff --git a/charset.h b/charset.h index cfc2ac5..6397493 100644 --- a/charset.h +++ b/charset.h @@ -35,6 +35,8 @@ int iconv_close (iconv_t); #endif int mutt_convert_string (char **, const char *, const char *, int); +char *mutt_get_first_charset (const char *); +int mutt_convert_nonmime_string (char **); iconv_t mutt_iconv_open (const char *, const char *, int); size_t mutt_iconv (iconv_t, ICONV_CONST char **, size_t *, char **, size_t *, ICONV_CONST char **, const char *); diff --git a/globals.h b/globals.h index 7890320..24fc694 100644 --- a/globals.h +++ b/globals.h @@ -34,6 +34,7 @@ WHERE ADDRESS *From; WHERE char *AliasFile; WHERE char *AliasFmt; +WHERE char *AssumedCharset; WHERE char *AttachSep; WHERE char *Attribution; WHERE char *AttachFormat; @@ -48,6 +49,7 @@ WHERE char *DsnNotify; WHERE char *DsnReturn; WHERE char *Editor; WHERE char *EscChar; +WHERE char *FileCharset; WHERE char *FolderFormat; WHERE char *ForwFmt; WHERE char *Fqdn; diff --git a/handler.c b/handler.c index 09556d1..251f479 100644 --- a/handler.c +++ b/handler.c @@ -1870,11 +1870,21 @@ void mutt_decode_attachment (BODY *b, STATE *s) Quotebuf[0] = '\0'; - if (istext && s->flags & M_CHARCONV) + if (istext) { - char *charset = mutt_get_parameter ("charset", b->parameter); - if (charset && Charset) - cd = mutt_iconv_open (Charset, charset, M_ICONV_HOOK_FROM); + if(s->flags & M_CHARCONV) + { + char *charset = mutt_get_parameter ("charset", b->parameter); + if (!option (OPTSTRICTMIME) && !charset) + charset = mutt_get_first_charset (AssumedCharset); + if (charset && Charset) + cd = mutt_iconv_open (Charset, charset, M_ICONV_HOOK_FROM); + } + else + { + if (b->file_charset) + cd = mutt_iconv_open (Charset, b->file_charset, M_ICONV_HOOK_FROM); + } } fseek (s->fpin, b->offset, 0); diff --git a/init.h b/init.h index c9be17e..d3dd411 100644 --- a/init.h +++ b/init.h @@ -188,6 +188,23 @@ struct option_t MuttVars[] = { ** If set, Mutt will prompt you for carbon-copy (Cc) recipients before ** editing the body of an outgoing message. */ + { "assumed_charset", DT_STR, R_NONE, UL &AssumedCharset, UL "us-ascii"}, + /* + ** .pp + ** This variable is a colon-separated list of character encoding + ** schemes for messages without character encoding indication. + ** Header field values and message body content without character encoding + ** indication would be assumed that they are written in one of this list. + ** By default, all the header fields and message body without any charset + ** indication are assumed to be in "us-ascii". + ** .pp + ** For example, Japanese users might prefer this: + ** .pp + ** set assumed_charset="iso-2022-jp:euc-jp:shift_jis:utf-8" + ** .pp + ** However, only the first content is valid for the message body. + ** This variable is valid only if $$strict_mime is unset. + */ #ifdef USE_NNTP { "ask_follow_up", DT_BOOL, R_NONE, OPTASKFOLLOWUP, 0 }, /* @@ -575,6 +592,20 @@ struct option_t MuttVars[] = { ** signed. ** (PGP only) */ + { "file_charset", DT_STR, R_NONE, UL &FileCharset, UL 0 }, + /* + ** .pp + ** This variable is a colon-separated list of character encoding + ** schemes for text file attatchments. + ** If unset, $$charset value will be used instead. + ** For example, the following configuration would work for Japanese + ** text handling: + ** .pp + ** set file_charset="iso-2022-jp:euc-jp:shift_jis:utf-8" + ** .pp + ** Note: "iso-2022-*" must be put at the head of the value as shown above + ** if included. + */ { "folder", DT_PATH, R_NONE, UL &Maildir, UL "~/Mail" }, /* ** .pp @@ -2946,6 +2977,19 @@ struct option_t MuttVars[] = { ** Setting this variable causes the ``status bar'' to be displayed on ** the first line of the screen rather than near the bottom. */ + { "strict_mime", DT_BOOL, R_NONE, OPTSTRICTMIME, 1 }, + /* + ** .pp + ** When unset, non MIME-compliant messages that doesn't have any + ** charset indication in ``Content-Type'' field can be displayed + ** (non MIME-compliant messages are often generated by old mailers + ** or buggy mailers like MS Outlook Express). + ** See also $$assumed_charset. + ** .pp + ** This option also replaces linear-white-space between encoded-word + ** and *text to a single space to prevent the display of MIME-encoded + ** ``Subject'' field from being devided into multiple lines. + */ { "strict_threads", DT_BOOL, R_RESORT|R_RESORT_INIT|R_INDEX, OPTSTRICTTHREADS, 0 }, /* ** .pp diff --git a/mutt.h b/mutt.h index bdf9110..fa33d31 100644 --- a/mutt.h +++ b/mutt.h @@ -457,6 +457,7 @@ enum OPTSORTRE, OPTSPAMSEP, OPTSTATUSONTOP, + OPTSTRICTMIME, OPTSTRICTTHREADS, OPTSTUFFQUOTED, OPTSUSPEND, @@ -697,6 +698,7 @@ typedef struct body * If NULL, filename is used * instead. */ + char *file_charset; /* charset of attached file */ CONTENT *content; /* structure used to store detailed info about * the content of the attachment. this is used * to determine what content-transfer-encoding diff --git a/parse.c b/parse.c index 939fc35..b8fa55f 100644 --- a/parse.c +++ b/parse.c @@ -213,9 +213,23 @@ static PARAMETER *parse_parameters (const char *s) if (*s == '"') { + int state_ascii = 1; s++; - for (i=0; *s && *s != '"' && i < sizeof (buffer) - 1; i++, s++) + for (i=0; *s && i < sizeof (buffer) - 1; i++, s++) { + if (!option (OPTSTRICTMIME)) { + /* As iso-2022-* has a characer of '"' with non-ascii state, + * ignore it. */ + if (*s == 0x1b && i < sizeof (buffer) - 2) + { + if (s[1] == '(' && (s[2] == 'B' || s[2] == 'J')) + state_ascii = 1; + else + state_ascii = 0; + } + } + if (state_ascii && *s == '"') + break; if (*s == '\\') { /* Quote the next character */ @@ -384,7 +398,9 @@ void mutt_parse_content_type (char *s, BODY *ct) if (ct->type == TYPETEXT) { if (!(pc = mutt_get_parameter ("charset", ct->parameter))) - mutt_set_parameter ("charset", "us-ascii", &ct->parameter); + mutt_set_parameter ("charset", option (OPTSTRICTMIME) ? "us-ascii" : + (const char *) mutt_get_first_charset (AssumedCharset), + &ct->parameter); } } diff --git a/rfc2047.c b/rfc2047.c index 9593a8e..f5155b4 100644 --- a/rfc2047.c +++ b/rfc2047.c @@ -710,13 +710,54 @@ static const char *find_encoded_word (const char *s, const char **x) return 0; } +/* return length of linear white space */ +static size_t lwslen (const char *s, size_t n) +{ + const char *p = s; + size_t len = n; + + if (n <= 0) + return 0; + + for (; p < s + n; p++) + if (!strchr (" \t\r\n", *p)) + { + len = (size_t)(p - s); + break; + } + if (strchr ("\r\n", *(p-1))) /* LWS doesn't end with CRLF */ + len = (size_t)0; + return len; +} + +/* return length of linear white space : reverse */ +static size_t lwsrlen (const char *s, size_t n) +{ + const char *p = s + n - 1; + size_t len = n; + + if (n <= 0) + return 0; + + if (strchr ("\r\n", *p)) /* LWS doesn't end with CRLF */ + return (size_t)0; + + for (; p >= s; p--) + if (!strchr (" \t\r\n", *p)) + { + len = (size_t)(s + n - 1 - p); + break; + } + return len; +} + /* try to decode anything that looks like a valid RFC2047 encoded * header field, ignoring RFC822 parsing rules */ void rfc2047_decode (char **pd) { const char *p, *q; - size_t n; + size_t m, n; int found_encoded = 0; char *d0, *d; const char *s = *pd; @@ -733,6 +774,37 @@ void rfc2047_decode (char **pd) if (!(p = find_encoded_word (s, &q))) { /* no encoded words */ + if (!option (OPTSTRICTMIME)) + { + n = mutt_strlen (s); + if (found_encoded && (m = lwslen (s, n)) != 0) + { + if (m != n) + *d = ' ', d++, dlen--; + n -= m, s += m; + } + if (ascii_strcasecmp (AssumedCharset, "us-ascii")) + { + char *t; + size_t tlen; + + t = safe_malloc (n + 1); + strfcpy (t, s, n + 1); + if (mutt_convert_nonmime_string (&t) == 0) + { + tlen = mutt_strlen (t); + strncpy (d, t, tlen); + d += tlen; + } + else + { + strncpy (d, s, n); + d += n; + } + FREE (&t); + break; + } + } strncpy (d, s, dlen); d += dlen; break; @@ -741,8 +813,29 @@ void rfc2047_decode (char **pd) if (p != s) { n = (size_t) (p - s); - /* ignore spaces between encoded words */ - if (!found_encoded || strspn (s, " \t\r\n") != n) + /* ignore spaces between encoded words + * and linear white spaces between encoded word and *text */ + if (!option (OPTSTRICTMIME)) + { + if (found_encoded && (m = lwslen (s, n)) != 0) + { + if (m != n) + *d = ' ', d++, dlen--; + n -= m, s += m; + } + + if ((m = n - lwsrlen (s, n)) != 0) + { + if (m > dlen) + m = dlen; + memcpy (d, s, m); + d += m; + dlen -= m; + if (m != n) + *d = ' ', d++, dlen--; + } + } + else if (!found_encoded || strspn (s, " \t\r\n") != n) { if (n > dlen) n = dlen; @@ -770,9 +863,8 @@ void rfc2047_decode_adrlist (ADDRESS *a) { while (a) { - if (a->personal && strstr (a->personal, "=?") != NULL) { + if (a->personal) rfc2047_decode (&a->personal); - } #ifdef EXACT_ADDRESS if (a->val && strstr (a->val, "=?") != NULL) rfc2047_decode (&a->val); diff --git a/rfc2231.c b/rfc2231.c index ad03be9..3e49484 100644 --- a/rfc2231.c +++ b/rfc2231.c @@ -117,6 +117,11 @@ void rfc2231_decode_parameters (PARAMETER **headp) if (option (OPTRFC2047PARAMS) && p->value && strstr (p->value, "=?")) rfc2047_decode (&p->value); + else if (!option (OPTSTRICTMIME)) + { + if (ascii_strcasecmp (AssumedCharset, "us-ascii")) + mutt_convert_nonmime_string (&p->value); + } *last = p; last = &p->next; diff --git a/sendlib.c b/sendlib.c index 281f9e3..9ebab5e 100644 --- a/sendlib.c +++ b/sendlib.c @@ -509,7 +509,7 @@ int mutt_write_mime_body (BODY *a, FILE *f) } if (a->type == TYPETEXT && (!a->noconv)) - fc = fgetconv_open (fpin, Charset, + fc = fgetconv_open (fpin, a->file_charset, mutt_get_body_charset (send_charset, sizeof (send_charset), a), 0); else @@ -909,6 +909,7 @@ CONTENT *mutt_get_content_info (const char *fname, BODY *b) CONTENT *info; CONTENT_STATE state; FILE *fp = NULL; + char *fromcode; char *tocode; char buffer[100]; char chsbuf[STRING]; @@ -943,15 +944,18 @@ CONTENT *mutt_get_content_info (const char *fname, BODY *b) if (b != NULL && b->type == TYPETEXT && (!b->noconv && !b->force_charset)) { char *chs = mutt_get_parameter ("charset", b->parameter); + char *fchs = b->use_disp ? ((FileCharset && *FileCharset) ? + FileCharset : Charset) : Charset; if (Charset && (chs || SendCharset) && - convert_file_from_to (fp, Charset, chs ? chs : SendCharset, - 0, &tocode, info) != (size_t)(-1)) + convert_file_from_to (fp, fchs, chs ? chs : SendCharset, + &fromcode, &tocode, info) != (size_t)(-1)) { if (!chs) { mutt_canonical_charset (chsbuf, sizeof (chsbuf), tocode); mutt_set_parameter ("charset", chsbuf, &b->parameter); } + b->file_charset = fromcode; FREE (&tocode); safe_fclose (&fp); return info; @@ -1334,6 +1338,7 @@ BODY *mutt_make_message_attach (CONTEXT *ctx, HEADER *hdr, int attach_msg) body->unlink = 1; body->use_disp = 0; body->disposition = DISPINLINE; + body->noconv = 1; mutt_parse_mime_message (ctx, hdr);