Andreas Krennmair:

author ak1 <ak1@e385b8ad-14ed-0310-8656-cc95a2468c6d>

Wed, 23 Feb 2005 10:33:00 +0000 (10:33 +0000)

committer ak1 <ak1@e385b8ad-14ed-0310-8656-cc95a2468c6d>

Wed, 23 Feb 2005 10:33:00 +0000 (10:33 +0000)
author ak1 <ak1@e385b8ad-14ed-0310-8656-cc95a2468c6d>
Wed, 23 Feb 2005 10:33:00 +0000 (10:33 +0000)
committer ak1 <ak1@e385b8ad-14ed-0310-8656-cc95a2468c6d>
Wed, 23 Feb 2005 10:33:00 +0000 (10:33 +0000)
diff --git a/ChangeLog.mutt-ng b/ChangeLog.mutt-ng

index c3c4800..2eceb6d 100644 (file)
--- a/ChangeLog.mutt-ng
+++ b/ChangeLog.mutt-ng
@@ -1,5 +1,8 @@
  Changes specific to mutt-ng:
  
+2005-02-23:
+  * Integrated assume_charset patch from http://www.emaillab.org/mutt/download15.html.en
+
  2005-02-22:
    * Merged mutt changes
    * Sidebar now honors the imap_home_namespace
diff --git a/PATCHES b/PATCHES

index 1cf4aef..20de3c3 100644 (file)
--- a/PATCHES
+++ b/PATCHES
@@ -1,3 +1,4 @@
+patch-1.5.6.tt.assumed_charset.1
  patch-1.5.6.tg.hcache.12
  patch-1.5.5.1.pdmef.short_mbox_name.1
  rr.compressed
diff --git a/charset.c b/charset.c

index 4dc0f3b..e17cca7 100644 (file)
--- a/charset.c
+++ b/charset.c
@@ -591,3 +591,86 @@ void fgetconv_close (FGETCONV **_fc)
      iconv_close (fc->cd);
    FREE (_fc);
  }
+
+char *mutt_get_first_charset (const char *charset)
+{
+  static char fcharset[SHORT_STRING];
+  const char *c, *c1;
+
+  c = charset;
+  if (!mutt_strlen(c))
+    return "us-ascii";
+  if (!(c1 = strchr (c, ':')))
+    return charset;
+  strfcpy (fcharset, c, c1 - c + 1);
+  return fcharset;
+}
+
+static size_t convert_string (ICONV_CONST char *f, size_t flen,
+                             const char *from, const char *to,
+                             char **t, size_t *tlen)
+{
+  iconv_t cd;
+  char *buf, *ob;
+  size_t obl, n;
+  int e;
+
+  cd = mutt_iconv_open (to, from, 0);
+  if (cd == (iconv_t)(-1))
+    return (size_t)(-1);
+  obl = 4 * flen + 1;
+  ob = buf = safe_malloc (obl);
+  n = iconv (cd, &f, &flen, &ob, &obl);
+  if (n == (size_t)(-1) || iconv (cd, 0, 0, &ob, &obl) == (size_t)(-1))
+  {
+    e = errno;
+    FREE (&buf);
+    iconv_close (cd);
+    errno = e;
+    return (size_t)(-1);
+  }
+  *ob = '\0';
+
+  *tlen = ob - buf;
+
+  safe_realloc ((void **) &buf, ob - buf + 1);
+  *t = buf;
+  iconv_close (cd);
+
+  return n;
+}
+
+int mutt_convert_nonmime_string (char **ps)
+{
+  const char *c, *c1;
+
+  for (c = AssumedCharset; c; c = c1 ? c1 + 1 : 0)
+  {
+    char *u = *ps;
+    char *s;
+    char *fromcode;
+    size_t m, n;
+    size_t ulen = mutt_strlen (*ps);
+    size_t slen;
+
+    if (!u || !*u)
+      return 0;
+
+    c1 = strchr (c, ':');
+    n = c1 ? c1 - c : mutt_strlen (c);
+    if (!n)
+      continue;
+    fromcode = safe_malloc (n + 1);
+    strfcpy (fromcode, c, n + 1);
+    m = convert_string (u, ulen, fromcode, Charset, &s, &slen);
+    FREE (&fromcode);
+    if (m != (size_t)(-1))
+    {
+      FREE (ps);
+      *ps = s;
+      return 0;
+    }
+  }
+  return -1;
+}
+
diff --git a/charset.h b/charset.h

index cfc2ac5..6397493 100644 (file)
--- a/charset.h
+++ b/charset.h
@@ -35,6 +35,8 @@ int iconv_close (iconv_t);
  #endif
  
  int mutt_convert_string (char **, const char *, const char *, int);
+char *mutt_get_first_charset (const char *);
+int mutt_convert_nonmime_string (char **);
  
  iconv_t mutt_iconv_open (const char *, const char *, int);
  size_t mutt_iconv (iconv_t, ICONV_CONST char **, size_t *, char **, size_t *, ICONV_CONST char **, const char *);
diff --git a/globals.h b/globals.h

index 7890320..24fc694 100644 (file)
--- a/globals.h
+++ b/globals.h
@@ -34,6 +34,7 @@ WHERE ADDRESS *From;
  
  WHERE char *AliasFile;
  WHERE char *AliasFmt;
+WHERE char *AssumedCharset;
  WHERE char *AttachSep;
  WHERE char *Attribution;
  WHERE char *AttachFormat;
@@ -48,6 +49,7 @@ WHERE char *DsnNotify;
  WHERE char *DsnReturn;
  WHERE char *Editor;
  WHERE char *EscChar;
+WHERE char *FileCharset;
  WHERE char *FolderFormat;
  WHERE char *ForwFmt;
  WHERE char *Fqdn;
diff --git a/handler.c b/handler.c

index 09556d1..251f479 100644 (file)
--- a/handler.c
+++ b/handler.c
@@ -1870,11 +1870,21 @@ void mutt_decode_attachment (BODY *b, STATE *s)
  
    Quotebuf[0] = '\0';
  
-  if (istext && s->flags & M_CHARCONV)
+  if (istext)
    {
-    char *charset = mutt_get_parameter ("charset", b->parameter);
-    if (charset && Charset)
-      cd = mutt_iconv_open (Charset, charset, M_ICONV_HOOK_FROM);
+    if(s->flags & M_CHARCONV)
+    {
+      char *charset = mutt_get_parameter ("charset", b->parameter);
+      if (!option (OPTSTRICTMIME) && !charset)
+        charset = mutt_get_first_charset (AssumedCharset);
+      if (charset && Charset)
+        cd = mutt_iconv_open (Charset, charset, M_ICONV_HOOK_FROM);
+    }
+    else
+    {
+      if (b->file_charset)
+        cd = mutt_iconv_open (Charset, b->file_charset, M_ICONV_HOOK_FROM);
+    }
    }
  
    fseek (s->fpin, b->offset, 0);
diff --git a/init.h b/init.h

index c9be17e..d3dd411 100644 (file)
--- a/init.h
+++ b/init.h
@@ -188,6 +188,23 @@ struct option_t MuttVars[] = {
    ** If set, Mutt will prompt you for carbon-copy (Cc) recipients before
    ** editing the body of an outgoing message.
    */  
+  { "assumed_charset", DT_STR, R_NONE, UL &AssumedCharset, UL "us-ascii"},
+  /*
+  ** .pp
+  ** This variable is a colon-separated list of character encoding
+  ** schemes for messages without character encoding indication.
+  ** Header field values and message body content without character encoding
+  ** indication would be assumed that they are written in one of this list.
+  ** By default, all the header fields and message body without any charset
+  ** indication are assumed to be in "us-ascii".
+  ** .pp
+  ** For example, Japanese users might prefer this:
+  ** .pp
+  **   set assumed_charset="iso-2022-jp:euc-jp:shift_jis:utf-8"
+  ** .pp
+  ** However, only the first content is valid for the message body.
+  ** This variable is valid only if $$strict_mime is unset.
+  */
  #ifdef USE_NNTP
    { "ask_follow_up",   DT_BOOL, R_NONE, OPTASKFOLLOWUP, 0 },
    /*
@@ -575,6 +592,20 @@ struct option_t MuttVars[] = {
    ** signed.
    ** (PGP only)
    */
+  { "file_charset",    DT_STR,  R_NONE, UL &FileCharset, UL 0 },
+  /*
+  ** .pp
+  ** This variable is a colon-separated list of character encoding
+  ** schemes for text file attatchments.
+  ** If unset, $$charset value will be used instead.
+  ** For example, the following configuration would work for Japanese
+  ** text handling:
+  ** .pp
+  **   set file_charset="iso-2022-jp:euc-jp:shift_jis:utf-8"
+  ** .pp
+  ** Note: "iso-2022-*" must be put at the head of the value as shown above
+  ** if included.
+  */
    { "folder",          DT_PATH, R_NONE, UL &Maildir, UL "~/Mail" },
    /*
    ** .pp
@@ -2946,6 +2977,19 @@ struct option_t MuttVars[] = {
    ** Setting this variable causes the ``status bar'' to be displayed on
    ** the first line of the screen rather than near the bottom.
    */
+  { "strict_mime",    DT_BOOL, R_NONE, OPTSTRICTMIME, 1 },
+  /*
+  ** .pp
+  ** When unset, non MIME-compliant messages that doesn't have any
+  ** charset indication in ``Content-Type'' field can be displayed
+  ** (non MIME-compliant messages are often generated by old mailers
+  ** or buggy mailers like MS Outlook Express).
+  ** See also $$assumed_charset.
+  ** .pp
+  ** This option also replaces linear-white-space between encoded-word
+  ** and *text to a single space to prevent the display of MIME-encoded
+  ** ``Subject'' field from being devided into multiple lines.
+  */
    { "strict_threads",  DT_BOOL, R_RESORT|R_RESORT_INIT|R_INDEX, OPTSTRICTTHREADS, 0 },
    /*
    ** .pp
diff --git a/mutt.h b/mutt.h

index bdf9110..fa33d31 100644 (file)
--- a/mutt.h
+++ b/mutt.h
@@ -457,6 +457,7 @@ enum
    OPTSORTRE,
    OPTSPAMSEP,
    OPTSTATUSONTOP,
+  OPTSTRICTMIME,
    OPTSTRICTTHREADS,
    OPTSTUFFQUOTED,
    OPTSUSPEND,
@@ -697,6 +698,7 @@ typedef struct body
                                  * If NULL, filename is used 
                                  * instead.
                                  */
+  char *file_charset;           /* charset of attached file */
    CONTENT *content;             /* structure used to store detailed info about
                                  * the content of the attachment.  this is used
                                  * to determine what content-transfer-encoding
diff --git a/parse.c b/parse.c

index 939fc35..b8fa55f 100644 (file)
--- a/parse.c
+++ b/parse.c
@@ -213,9 +213,23 @@ static PARAMETER *parse_parameters (const char *s)
  
        if (*s == '"')
        {
+        int state_ascii = 1;
         s++;
-       for (i=0; *s && *s != '"' && i < sizeof (buffer) - 1; i++, s++)
+       for (i=0; *s && i < sizeof (buffer) - 1; i++, s++)
         {
+         if (!option (OPTSTRICTMIME)) {
+            /* As iso-2022-* has a characer of '"' with non-ascii state,
+            * ignore it. */
+            if (*s == 0x1b && i < sizeof (buffer) - 2)
+            {
+              if (s[1] == '(' && (s[2] == 'B' || s[2] == 'J'))
+                state_ascii = 1;
+              else
+                state_ascii = 0;
+            }
+          }
+          if (state_ascii && *s == '"')
+            break;
           if (*s == '\\')
           {
             /* Quote the next character */
@@ -384,7 +398,9 @@ void mutt_parse_content_type (char *s, BODY *ct)
    if (ct->type == TYPETEXT)
    {
      if (!(pc = mutt_get_parameter ("charset", ct->parameter)))
-      mutt_set_parameter ("charset", "us-ascii", &ct->parameter);
+      mutt_set_parameter ("charset", option (OPTSTRICTMIME) ? "us-ascii" :
+                         (const char *) mutt_get_first_charset (AssumedCharset),
+                         &ct->parameter);
    }
  
  }
diff --git a/rfc2047.c b/rfc2047.c

index 9593a8e..f5155b4 100644 (file)
--- a/rfc2047.c
+++ b/rfc2047.c
@@ -710,13 +710,54 @@ static const char *find_encoded_word (const char *s, const char **x)
    return 0;
  }
  
+/* return length of linear white space */
+static size_t lwslen (const char *s, size_t n)
+{
+  const char *p = s;
+  size_t len = n;
+
+  if (n <= 0)
+    return 0;
+
+  for (; p < s + n; p++)
+    if (!strchr (" \t\r\n", *p))
+    {
+      len = (size_t)(p - s);
+      break;
+    }
+  if (strchr ("\r\n", *(p-1))) /* LWS doesn't end with CRLF */
+    len = (size_t)0;
+  return len;
+}
+
+/* return length of linear white space : reverse */
+static size_t lwsrlen (const char *s, size_t n)
+{
+  const char *p = s + n - 1;
+  size_t len = n;
+
+  if (n <= 0)
+    return 0;
+
+  if (strchr ("\r\n", *p)) /* LWS doesn't end with CRLF */
+    return (size_t)0;
+
+  for (; p >= s; p--)
+    if (!strchr (" \t\r\n", *p))
+    {
+      len = (size_t)(s + n - 1 - p);
+      break;
+    }
+  return len;
+}
+
  /* try to decode anything that looks like a valid RFC2047 encoded
   * header field, ignoring RFC822 parsing rules
   */
  void rfc2047_decode (char **pd)
  {
    const char *p, *q;
-  size_t n;
+  size_t m, n;
    int found_encoded = 0;
    char *d0, *d;
    const char *s = *pd;
@@ -733,6 +774,37 @@ void rfc2047_decode (char **pd)
      if (!(p = find_encoded_word (s, &q)))
      {
        /* no encoded words */
+      if (!option (OPTSTRICTMIME))
+      {
+        n = mutt_strlen (s);
+        if (found_encoded && (m = lwslen (s, n)) != 0)
+        {
+          if (m != n)
+            *d = ' ', d++, dlen--;
+          n -= m, s += m;
+        }
+        if (ascii_strcasecmp (AssumedCharset, "us-ascii"))
+        {
+          char *t;
+          size_t tlen;
+
+          t = safe_malloc (n + 1);
+          strfcpy (t, s, n + 1);
+          if (mutt_convert_nonmime_string (&t) == 0)
+          {
+            tlen = mutt_strlen (t);
+            strncpy (d, t, tlen);
+            d += tlen;
+          }
+          else
+          {
+            strncpy (d, s, n);
+            d += n;
+          }
+          FREE (&t);
+          break;
+        }
+      }
        strncpy (d, s, dlen);
        d += dlen;
        break;
@@ -741,8 +813,29 @@ void rfc2047_decode (char **pd)
      if (p != s)
      {
        n = (size_t) (p - s);
-      /* ignore spaces between encoded words */
-      if (!found_encoded || strspn (s, " \t\r\n") != n)
+      /* ignore spaces between encoded words
+       * and linear white spaces between encoded word and *text */
+      if (!option (OPTSTRICTMIME))
+      {
+        if (found_encoded && (m = lwslen (s, n)) != 0)
+        {
+          if (m != n)
+            *d = ' ', d++, dlen--;
+          n -= m, s += m;
+        }
+
+        if ((m = n - lwsrlen (s, n)) != 0)
+        {
+          if (m > dlen)
+            m = dlen;
+          memcpy (d, s, m);
+          d += m;
+          dlen -= m;
+          if (m != n)
+            *d = ' ', d++, dlen--;
+        }
+      }
+      else if (!found_encoded || strspn (s, " \t\r\n") != n)
        {
          if (n > dlen)
            n = dlen;
@@ -770,9 +863,8 @@ void rfc2047_decode_adrlist (ADDRESS *a)
  {
    while (a)
    {
-    if (a->personal && strstr (a->personal, "=?") != NULL) {
+    if (a->personal)
        rfc2047_decode (&a->personal);
-    }
  #ifdef EXACT_ADDRESS
      if (a->val && strstr (a->val, "=?") != NULL)
        rfc2047_decode (&a->val);
diff --git a/rfc2231.c b/rfc2231.c

index ad03be9..3e49484 100644 (file)
--- a/rfc2231.c
+++ b/rfc2231.c
@@ -117,6 +117,11 @@ void rfc2231_decode_parameters (PARAMETER **headp)
  
        if (option (OPTRFC2047PARAMS) && p->value && strstr (p->value, "=?"))
         rfc2047_decode (&p->value);
+      else if (!option (OPTSTRICTMIME))
+      {
+        if (ascii_strcasecmp (AssumedCharset, "us-ascii"))
+          mutt_convert_nonmime_string (&p->value);
+      }
  
        *last = p;
        last = &p->next;
diff --git a/sendlib.c b/sendlib.c

index 281f9e3..9ebab5e 100644 (file)
--- a/sendlib.c
+++ b/sendlib.c
@@ -509,7 +509,7 @@ int mutt_write_mime_body (BODY *a, FILE *f)
    }
  
    if (a->type == TYPETEXT && (!a->noconv))
-    fc = fgetconv_open (fpin, Charset, 
+    fc = fgetconv_open (fpin, a->file_charset, 
                         mutt_get_body_charset (send_charset, sizeof (send_charset), a),
                         0);
    else
@@ -909,6 +909,7 @@ CONTENT *mutt_get_content_info (const char *fname, BODY *b)
    CONTENT *info;
    CONTENT_STATE state;
    FILE *fp = NULL;
+  char *fromcode;
    char *tocode;
    char buffer[100];
    char chsbuf[STRING];
@@ -943,15 +944,18 @@ CONTENT *mutt_get_content_info (const char *fname, BODY *b)
    if (b != NULL && b->type == TYPETEXT && (!b->noconv && !b->force_charset))
    {
      char *chs = mutt_get_parameter ("charset", b->parameter);
+    char *fchs = b->use_disp ? ((FileCharset && *FileCharset) ?
+                                FileCharset : Charset) : Charset;
      if (Charset && (chs || SendCharset) &&
-       convert_file_from_to (fp, Charset, chs ? chs : SendCharset,
-                             0, &tocode, info) != (size_t)(-1))
+        convert_file_from_to (fp, fchs, chs ? chs : SendCharset,
+                              &fromcode, &tocode, info) != (size_t)(-1))
      {
        if (!chs)
        {
         mutt_canonical_charset (chsbuf, sizeof (chsbuf), tocode);
         mutt_set_parameter ("charset", chsbuf, &b->parameter);
        }
+      b->file_charset = fromcode;
        FREE (&tocode);
        safe_fclose (&fp);
        return info;
@@ -1334,6 +1338,7 @@ BODY *mutt_make_message_attach (CONTEXT *ctx, HEADER *hdr, int attach_msg)
    body->unlink = 1;
    body->use_disp = 0;
    body->disposition = DISPINLINE;
+  body->noconv = 1;
  
    mutt_parse_mime_message (ctx, hdr);
author	ak1 <ak1@e385b8ad-14ed-0310-8656-cc95a2468c6d>
	Wed, 23 Feb 2005 10:33:00 +0000 (10:33 +0000)
committer	ak1 <ak1@e385b8ad-14ed-0310-8656-cc95a2468c6d>
	Wed, 23 Feb 2005 10:33:00 +0000 (10:33 +0000)
ChangeLog.mutt-ng		patch \| blob \| history
PATCHES		patch \| blob \| history
charset.c		patch \| blob \| history
charset.h		patch \| blob \| history
globals.h		patch \| blob \| history
handler.c		patch \| blob \| history
init.h		patch \| blob \| history
mutt.h		patch \| blob \| history
parse.c		patch \| blob \| history
rfc2047.c		patch \| blob \| history
rfc2231.c		patch \| blob \| history
sendlib.c		patch \| blob \| history