From: ak1 <ak1@e385b8ad-14ed-0310-8656-cc95a2468c6d>
Date: Wed, 23 Feb 2005 10:33:00 +0000 (+0000)
Subject: Andreas Krennmair:
X-Git-Url: http://git.madism.org/?p=apps%2Fmadmutt.git;a=commitdiff_plain;h=fc9c62aae0a7ebf1ff9335e322e7c1f5d3e459d3

Andreas Krennmair:
	integrated assume_charset patch from http://www.emaillab.org/mutt/download15.html.en


git-svn-id: svn://svn.berlios.de/mutt-ng/trunk@76 e385b8ad-14ed-0310-8656-cc95a2468c6d
---

diff --git a/ChangeLog.mutt-ng b/ChangeLog.mutt-ng
index c3c4800..2eceb6d 100644
--- a/ChangeLog.mutt-ng
+++ b/ChangeLog.mutt-ng
@@ -1,5 +1,8 @@
 Changes specific to mutt-ng:
 
+2005-02-23:
+  * Integrated assume_charset patch from http://www.emaillab.org/mutt/download15.html.en
+
 2005-02-22:
   * Merged mutt changes
   * Sidebar now honors the imap_home_namespace
diff --git a/PATCHES b/PATCHES
index 1cf4aef..20de3c3 100644
--- a/PATCHES
+++ b/PATCHES
@@ -1,3 +1,4 @@
+patch-1.5.6.tt.assumed_charset.1
 patch-1.5.6.tg.hcache.12
 patch-1.5.5.1.pdmef.short_mbox_name.1
 rr.compressed
diff --git a/charset.c b/charset.c
index 4dc0f3b..e17cca7 100644
--- a/charset.c
+++ b/charset.c
@@ -591,3 +591,86 @@ void fgetconv_close (FGETCONV **_fc)
     iconv_close (fc->cd);
   FREE (_fc);
 }
+
+char *mutt_get_first_charset (const char *charset)
+{
+  static char fcharset[SHORT_STRING];
+  const char *c, *c1;
+
+  c = charset;
+  if (!mutt_strlen(c))
+    return "us-ascii";
+  if (!(c1 = strchr (c, ':')))
+    return charset;
+  strfcpy (fcharset, c, c1 - c + 1);
+  return fcharset;
+}
+
+static size_t convert_string (ICONV_CONST char *f, size_t flen,
+                             const char *from, const char *to,
+                             char **t, size_t *tlen)
+{
+  iconv_t cd;
+  char *buf, *ob;
+  size_t obl, n;
+  int e;
+
+  cd = mutt_iconv_open (to, from, 0);
+  if (cd == (iconv_t)(-1))
+    return (size_t)(-1);
+  obl = 4 * flen + 1;
+  ob = buf = safe_malloc (obl);
+  n = iconv (cd, &f, &flen, &ob, &obl);
+  if (n == (size_t)(-1) || iconv (cd, 0, 0, &ob, &obl) == (size_t)(-1))
+  {
+    e = errno;
+    FREE (&buf);
+    iconv_close (cd);
+    errno = e;
+    return (size_t)(-1);
+  }
+  *ob = '\0';
+
+  *tlen = ob - buf;
+
+  safe_realloc ((void **) &buf, ob - buf + 1);
+  *t = buf;
+  iconv_close (cd);
+
+  return n;
+}
+
+int mutt_convert_nonmime_string (char **ps)
+{
+  const char *c, *c1;
+
+  for (c = AssumedCharset; c; c = c1 ? c1 + 1 : 0)
+  {
+    char *u = *ps;
+    char *s;
+    char *fromcode;
+    size_t m, n;
+    size_t ulen = mutt_strlen (*ps);
+    size_t slen;
+
+    if (!u || !*u)
+      return 0;
+
+    c1 = strchr (c, ':');
+    n = c1 ? c1 - c : mutt_strlen (c);
+    if (!n)
+      continue;
+    fromcode = safe_malloc (n + 1);
+    strfcpy (fromcode, c, n + 1);
+    m = convert_string (u, ulen, fromcode, Charset, &s, &slen);
+    FREE (&fromcode);
+    if (m != (size_t)(-1))
+    {
+      FREE (ps);
+      *ps = s;
+      return 0;
+    }
+  }
+  return -1;
+}
+
diff --git a/charset.h b/charset.h
index cfc2ac5..6397493 100644
--- a/charset.h
+++ b/charset.h
@@ -35,6 +35,8 @@ int iconv_close (iconv_t);
 #endif
 
 int mutt_convert_string (char **, const char *, const char *, int);
+char *mutt_get_first_charset (const char *);
+int mutt_convert_nonmime_string (char **);
 
 iconv_t mutt_iconv_open (const char *, const char *, int);
 size_t mutt_iconv (iconv_t, ICONV_CONST char **, size_t *, char **, size_t *, ICONV_CONST char **, const char *);
diff --git a/globals.h b/globals.h
index 7890320..24fc694 100644
--- a/globals.h
+++ b/globals.h
@@ -34,6 +34,7 @@ WHERE ADDRESS *From;
 
 WHERE char *AliasFile;
 WHERE char *AliasFmt;
+WHERE char *AssumedCharset;
 WHERE char *AttachSep;
 WHERE char *Attribution;
 WHERE char *AttachFormat;
@@ -48,6 +49,7 @@ WHERE char *DsnNotify;
 WHERE char *DsnReturn;
 WHERE char *Editor;
 WHERE char *EscChar;
+WHERE char *FileCharset;
 WHERE char *FolderFormat;
 WHERE char *ForwFmt;
 WHERE char *Fqdn;
diff --git a/handler.c b/handler.c
index 09556d1..251f479 100644
--- a/handler.c
+++ b/handler.c
@@ -1870,11 +1870,21 @@ void mutt_decode_attachment (BODY *b, STATE *s)
 
   Quotebuf[0] = '\0';
 
-  if (istext && s->flags & M_CHARCONV)
+  if (istext)
   {
-    char *charset = mutt_get_parameter ("charset", b->parameter);
-    if (charset && Charset)
-      cd = mutt_iconv_open (Charset, charset, M_ICONV_HOOK_FROM);
+    if(s->flags & M_CHARCONV)
+    {
+      char *charset = mutt_get_parameter ("charset", b->parameter);
+      if (!option (OPTSTRICTMIME) && !charset)
+        charset = mutt_get_first_charset (AssumedCharset);
+      if (charset && Charset)
+        cd = mutt_iconv_open (Charset, charset, M_ICONV_HOOK_FROM);
+    }
+    else
+    {
+      if (b->file_charset)
+        cd = mutt_iconv_open (Charset, b->file_charset, M_ICONV_HOOK_FROM);
+    }
   }
 
   fseek (s->fpin, b->offset, 0);
diff --git a/init.h b/init.h
index c9be17e..d3dd411 100644
--- a/init.h
+++ b/init.h
@@ -188,6 +188,23 @@ struct option_t MuttVars[] = {
   ** If set, Mutt will prompt you for carbon-copy (Cc) recipients before
   ** editing the body of an outgoing message.
   */  
+  { "assumed_charset", DT_STR, R_NONE, UL &AssumedCharset, UL "us-ascii"},
+  /*
+  ** .pp
+  ** This variable is a colon-separated list of character encoding
+  ** schemes for messages without character encoding indication.
+  ** Header field values and message body content without character encoding
+  ** indication would be assumed that they are written in one of this list.
+  ** By default, all the header fields and message body without any charset
+  ** indication are assumed to be in "us-ascii".
+  ** .pp
+  ** For example, Japanese users might prefer this:
+  ** .pp
+  **   set assumed_charset="iso-2022-jp:euc-jp:shift_jis:utf-8"
+  ** .pp
+  ** However, only the first content is valid for the message body.
+  ** This variable is valid only if $$strict_mime is unset.
+  */
 #ifdef USE_NNTP
   { "ask_follow_up",	DT_BOOL, R_NONE, OPTASKFOLLOWUP, 0 },
   /*
@@ -575,6 +592,20 @@ struct option_t MuttVars[] = {
   ** signed.
   ** (PGP only)
   */
+  { "file_charset",    DT_STR,  R_NONE, UL &FileCharset, UL 0 },
+  /*
+  ** .pp
+  ** This variable is a colon-separated list of character encoding
+  ** schemes for text file attatchments.
+  ** If unset, $$charset value will be used instead.
+  ** For example, the following configuration would work for Japanese
+  ** text handling:
+  ** .pp
+  **   set file_charset="iso-2022-jp:euc-jp:shift_jis:utf-8"
+  ** .pp
+  ** Note: "iso-2022-*" must be put at the head of the value as shown above
+  ** if included.
+  */
   { "folder",		DT_PATH, R_NONE, UL &Maildir, UL "~/Mail" },
   /*
   ** .pp
@@ -2946,6 +2977,19 @@ struct option_t MuttVars[] = {
   ** Setting this variable causes the ``status bar'' to be displayed on
   ** the first line of the screen rather than near the bottom.
   */
+  { "strict_mime",    DT_BOOL, R_NONE, OPTSTRICTMIME, 1 },
+  /*
+  ** .pp
+  ** When unset, non MIME-compliant messages that doesn't have any
+  ** charset indication in ``Content-Type'' field can be displayed
+  ** (non MIME-compliant messages are often generated by old mailers
+  ** or buggy mailers like MS Outlook Express).
+  ** See also $$assumed_charset.
+  ** .pp
+  ** This option also replaces linear-white-space between encoded-word
+  ** and *text to a single space to prevent the display of MIME-encoded
+  ** ``Subject'' field from being devided into multiple lines.
+  */
   { "strict_threads",	DT_BOOL, R_RESORT|R_RESORT_INIT|R_INDEX, OPTSTRICTTHREADS, 0 },
   /*
   ** .pp
diff --git a/mutt.h b/mutt.h
index bdf9110..fa33d31 100644
--- a/mutt.h
+++ b/mutt.h
@@ -457,6 +457,7 @@ enum
   OPTSORTRE,
   OPTSPAMSEP,
   OPTSTATUSONTOP,
+  OPTSTRICTMIME,
   OPTSTRICTTHREADS,
   OPTSTUFFQUOTED,
   OPTSUSPEND,
@@ -697,6 +698,7 @@ typedef struct body
 				 * If NULL, filename is used 
 				 * instead.
 				 */
+  char *file_charset;           /* charset of attached file */
   CONTENT *content;             /* structure used to store detailed info about
 				 * the content of the attachment.  this is used
 				 * to determine what content-transfer-encoding
diff --git a/parse.c b/parse.c
index 939fc35..b8fa55f 100644
--- a/parse.c
+++ b/parse.c
@@ -213,9 +213,23 @@ static PARAMETER *parse_parameters (const char *s)
 
       if (*s == '"')
       {
+        int state_ascii = 1;
 	s++;
-	for (i=0; *s && *s != '"' && i < sizeof (buffer) - 1; i++, s++)
+	for (i=0; *s && i < sizeof (buffer) - 1; i++, s++)
 	{
+	  if (!option (OPTSTRICTMIME)) {
+            /* As iso-2022-* has a characer of '"' with non-ascii state,
+	     * ignore it. */
+            if (*s == 0x1b && i < sizeof (buffer) - 2)
+            {
+              if (s[1] == '(' && (s[2] == 'B' || s[2] == 'J'))
+                state_ascii = 1;
+              else
+                state_ascii = 0;
+            }
+          }
+          if (state_ascii && *s == '"')
+            break;
 	  if (*s == '\\')
 	  {
 	    /* Quote the next character */
@@ -384,7 +398,9 @@ void mutt_parse_content_type (char *s, BODY *ct)
   if (ct->type == TYPETEXT)
   {
     if (!(pc = mutt_get_parameter ("charset", ct->parameter)))
-      mutt_set_parameter ("charset", "us-ascii", &ct->parameter);
+      mutt_set_parameter ("charset", option (OPTSTRICTMIME) ? "us-ascii" :
+                         (const char *) mutt_get_first_charset (AssumedCharset),
+                         &ct->parameter);
   }
 
 }
diff --git a/rfc2047.c b/rfc2047.c
index 9593a8e..f5155b4 100644
--- a/rfc2047.c
+++ b/rfc2047.c
@@ -710,13 +710,54 @@ static const char *find_encoded_word (const char *s, const char **x)
   return 0;
 }
 
+/* return length of linear white space */
+static size_t lwslen (const char *s, size_t n)
+{
+  const char *p = s;
+  size_t len = n;
+
+  if (n <= 0)
+    return 0;
+
+  for (; p < s + n; p++)
+    if (!strchr (" \t\r\n", *p))
+    {
+      len = (size_t)(p - s);
+      break;
+    }
+  if (strchr ("\r\n", *(p-1))) /* LWS doesn't end with CRLF */
+    len = (size_t)0;
+  return len;
+}
+
+/* return length of linear white space : reverse */
+static size_t lwsrlen (const char *s, size_t n)
+{
+  const char *p = s + n - 1;
+  size_t len = n;
+
+  if (n <= 0)
+    return 0;
+
+  if (strchr ("\r\n", *p)) /* LWS doesn't end with CRLF */
+    return (size_t)0;
+
+  for (; p >= s; p--)
+    if (!strchr (" \t\r\n", *p))
+    {
+      len = (size_t)(s + n - 1 - p);
+      break;
+    }
+  return len;
+}
+
 /* try to decode anything that looks like a valid RFC2047 encoded
  * header field, ignoring RFC822 parsing rules
  */
 void rfc2047_decode (char **pd)
 {
   const char *p, *q;
-  size_t n;
+  size_t m, n;
   int found_encoded = 0;
   char *d0, *d;
   const char *s = *pd;
@@ -733,6 +774,37 @@ void rfc2047_decode (char **pd)
     if (!(p = find_encoded_word (s, &q)))
     {
       /* no encoded words */
+      if (!option (OPTSTRICTMIME))
+      {
+        n = mutt_strlen (s);
+        if (found_encoded && (m = lwslen (s, n)) != 0)
+        {
+          if (m != n)
+            *d = ' ', d++, dlen--;
+          n -= m, s += m;
+        }
+        if (ascii_strcasecmp (AssumedCharset, "us-ascii"))
+        {
+          char *t;
+          size_t tlen;
+
+          t = safe_malloc (n + 1);
+          strfcpy (t, s, n + 1);
+          if (mutt_convert_nonmime_string (&t) == 0)
+          {
+            tlen = mutt_strlen (t);
+            strncpy (d, t, tlen);
+            d += tlen;
+          }
+          else
+          {
+            strncpy (d, s, n);
+            d += n;
+          }
+          FREE (&t);
+          break;
+        }
+      }
       strncpy (d, s, dlen);
       d += dlen;
       break;
@@ -741,8 +813,29 @@ void rfc2047_decode (char **pd)
     if (p != s)
     {
       n = (size_t) (p - s);
-      /* ignore spaces between encoded words */
-      if (!found_encoded || strspn (s, " \t\r\n") != n)
+      /* ignore spaces between encoded words
+       * and linear white spaces between encoded word and *text */
+      if (!option (OPTSTRICTMIME))
+      {
+        if (found_encoded && (m = lwslen (s, n)) != 0)
+        {
+          if (m != n)
+            *d = ' ', d++, dlen--;
+          n -= m, s += m;
+        }
+
+        if ((m = n - lwsrlen (s, n)) != 0)
+        {
+          if (m > dlen)
+            m = dlen;
+          memcpy (d, s, m);
+          d += m;
+          dlen -= m;
+          if (m != n)
+            *d = ' ', d++, dlen--;
+        }
+      }
+      else if (!found_encoded || strspn (s, " \t\r\n") != n)
       {
         if (n > dlen)
           n = dlen;
@@ -770,9 +863,8 @@ void rfc2047_decode_adrlist (ADDRESS *a)
 {
   while (a)
   {
-    if (a->personal && strstr (a->personal, "=?") != NULL) {
+    if (a->personal)
       rfc2047_decode (&a->personal);
-    }
 #ifdef EXACT_ADDRESS
     if (a->val && strstr (a->val, "=?") != NULL)
       rfc2047_decode (&a->val);
diff --git a/rfc2231.c b/rfc2231.c
index ad03be9..3e49484 100644
--- a/rfc2231.c
+++ b/rfc2231.c
@@ -117,6 +117,11 @@ void rfc2231_decode_parameters (PARAMETER **headp)
 
       if (option (OPTRFC2047PARAMS) && p->value && strstr (p->value, "=?"))
 	rfc2047_decode (&p->value);
+      else if (!option (OPTSTRICTMIME))
+      {
+        if (ascii_strcasecmp (AssumedCharset, "us-ascii"))
+          mutt_convert_nonmime_string (&p->value);
+      }
 
       *last = p;
       last = &p->next;
diff --git a/sendlib.c b/sendlib.c
index 281f9e3..9ebab5e 100644
--- a/sendlib.c
+++ b/sendlib.c
@@ -509,7 +509,7 @@ int mutt_write_mime_body (BODY *a, FILE *f)
   }
 
   if (a->type == TYPETEXT && (!a->noconv))
-    fc = fgetconv_open (fpin, Charset, 
+    fc = fgetconv_open (fpin, a->file_charset, 
 			mutt_get_body_charset (send_charset, sizeof (send_charset), a),
 			0);
   else
@@ -909,6 +909,7 @@ CONTENT *mutt_get_content_info (const char *fname, BODY *b)
   CONTENT *info;
   CONTENT_STATE state;
   FILE *fp = NULL;
+  char *fromcode;
   char *tocode;
   char buffer[100];
   char chsbuf[STRING];
@@ -943,15 +944,18 @@ CONTENT *mutt_get_content_info (const char *fname, BODY *b)
   if (b != NULL && b->type == TYPETEXT && (!b->noconv && !b->force_charset))
   {
     char *chs = mutt_get_parameter ("charset", b->parameter);
+    char *fchs = b->use_disp ? ((FileCharset && *FileCharset) ?
+                                FileCharset : Charset) : Charset;
     if (Charset && (chs || SendCharset) &&
-	convert_file_from_to (fp, Charset, chs ? chs : SendCharset,
-			      0, &tocode, info) != (size_t)(-1))
+        convert_file_from_to (fp, fchs, chs ? chs : SendCharset,
+                              &fromcode, &tocode, info) != (size_t)(-1))
     {
       if (!chs)
       {
 	mutt_canonical_charset (chsbuf, sizeof (chsbuf), tocode);
 	mutt_set_parameter ("charset", chsbuf, &b->parameter);
       }
+      b->file_charset = fromcode;
       FREE (&tocode);
       safe_fclose (&fp);
       return info;
@@ -1334,6 +1338,7 @@ BODY *mutt_make_message_attach (CONTEXT *ctx, HEADER *hdr, int attach_msg)
   body->unlink = 1;
   body->use_disp = 0;
   body->disposition = DISPINLINE;
+  body->noconv = 1;
 
   mutt_parse_mime_message (ctx, hdr);