rfc2047.c

   1 /*
   2  * Copyright notice from original mutt:
   3  * Copyright (C) 1996-2000 Michael R. Elkins <me@mutt.org>
   4  * Copyright (C) 2000-2001 Edmund Grimley Evans <edmundo@rano.org>
   5  *
   6  * This file is part of mutt-ng, see http://www.muttng.org/.
   7  * It's licensed under the GNU General Public License,
   8  * please see the file GPL in the top level source directory.
   9  */
  10
  11 #if HAVE_CONFIG_H
  12 # include "config.h"
  13 #endif
  14
  15 #include "mutt.h"
  16 #include "mime.h"
  17 #include "charset.h"
  18 #include "rfc2047.h"
  19
  20 #include <ctype.h>
  21 #include <errno.h>
  22 #include <stdio.h>
  23 #include <stdlib.h>
  24 #include <string.h>
  25
  26 /* If you are debugging this file, comment out the following line. */
  27 /*#define NDEBUG*/
  28
  29 #ifdef NDEBUG
  30 #define assert(x)
  31 #else
  32 #include <assert.h>
  33 #endif
  34
  35 #define ENCWORD_LEN_MAX 75
  36 #define ENCWORD_LEN_MIN 9       /* mutt_strlen ("=?.?.?.?=") */
  37
  38 #define HSPACE(x) ((x) == '\0' || (x) == ' ' || (x) == '\t')
  39
  40 #define CONTINUATION_BYTE(c) (((c) & 0xc0) == 0x80)
  41
  42 extern char RFC822Specials[];
  43
  44 typedef size_t (*encoder_t) (char *, ICONV_CONST char *, size_t,
  45                              const char *);
  46
  47 static size_t convert_string (ICONV_CONST char *f, size_t flen,
  48                               const char *from, const char *to,
  49                               char **t, size_t * tlen)
  50 {
  51   iconv_t cd;
  52   char *buf, *ob;
  53   size_t obl, n;
  54   int e;
  55
  56   cd = mutt_iconv_open (to, from, 0);
  57   if (cd == (iconv_t) (-1))
  58     return (size_t) (-1);
  59   obl = 4 * flen + 1;
  60   ob = buf = safe_malloc (obl);
  61   n = iconv (cd, &f, &flen, &ob, &obl);
  62   if (n == (size_t) (-1) || iconv (cd, 0, 0, &ob, &obl) == (size_t) (-1)) {
  63     e = errno;
  64     FREE (&buf);
  65     iconv_close (cd);
  66     errno = e;
  67     return (size_t) (-1);
  68   }
  69   *ob = '\0';
  70
  71   *tlen = ob - buf;
  72
  73   safe_realloc (&buf, ob - buf + 1);
  74   *t = buf;
  75   iconv_close (cd);
  76
  77   return n;
  78 }
  79
  80 char *mutt_choose_charset (const char *fromcode, const char *charsets,
  81                            char *u, size_t ulen, char **d, size_t * dlen)
  82 {
  83   char canonical_buff[LONG_STRING];
  84   char *e = 0, *tocode = 0;
  85   size_t elen = 0, bestn = 0;
  86   const char *p, *q;
  87
  88   for (p = charsets; p; p = q ? q + 1 : 0) {
  89     char *s, *t;
  90     size_t slen, n;
  91
  92     q = strchr (p, ':');
  93
  94     n = q ? q - p : mutt_strlen (p);
  95
  96     if (!n ||
  97         /* Assume that we never need more than 12 characters of
  98            encoded-text to encode a single character. */
  99         n > (ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 2 - 12))
 100       continue;
 101
 102     t = safe_malloc (n + 1);
 103     memcpy (t, p, n);
 104     t[n] = '\0';
 105
 106     n = convert_string (u, ulen, fromcode, t, &s, &slen);
 107     if (n == (size_t) (-1))
 108       continue;
 109
 110     if (!tocode || n < bestn) {
 111       bestn = n;
 112       FREE (&tocode);
 113       tocode = t;
 114       if (d) {
 115         FREE (&e);
 116         e = s;
 117       }
 118       else
 119         FREE (&s);
 120       elen = slen;
 121       if (!bestn)
 122         break;
 123     }
 124     else {
 125       FREE (&t);
 126       FREE (&s);
 127     }
 128   }
 129   if (tocode) {
 130     if (d)
 131       *d = e;
 132     if (dlen)
 133       *dlen = elen;
 134
 135     mutt_canonical_charset (canonical_buff, sizeof (canonical_buff), tocode);
 136     mutt_str_replace (&tocode, canonical_buff);
 137   }
 138   return tocode;
 139 }
 140
 141 static size_t b_encoder (char *s, ICONV_CONST char *d, size_t dlen,
 142                          const char *tocode)
 143 {
 144   char *s0 = s;
 145
 146   memcpy (s, "=?", 2), s += 2;
 147   memcpy (s, tocode, mutt_strlen (tocode)), s += mutt_strlen (tocode);
 148   memcpy (s, "?B?", 3), s += 3;
 149   for (;;) {
 150     if (!dlen)
 151       break;
 152     else if (dlen == 1) {
 153       *s++ = B64Chars[(*d >> 2) & 0x3f];
 154       *s++ = B64Chars[(*d & 0x03) << 4];
 155       *s++ = '=';
 156       *s++ = '=';
 157       break;
 158     }
 159     else if (dlen == 2) {
 160       *s++ = B64Chars[(*d >> 2) & 0x3f];
 161       *s++ = B64Chars[((*d & 0x03) << 4) | ((d[1] >> 4) & 0x0f)];
 162       *s++ = B64Chars[(d[1] & 0x0f) << 2];
 163       *s++ = '=';
 164       break;
 165     }
 166     else {
 167       *s++ = B64Chars[(*d >> 2) & 0x3f];
 168       *s++ = B64Chars[((*d & 0x03) << 4) | ((d[1] >> 4) & 0x0f)];
 169       *s++ = B64Chars[((d[1] & 0x0f) << 2) | ((d[2] >> 6) & 0x03)];
 170       *s++ = B64Chars[d[2] & 0x3f];
 171       d += 3, dlen -= 3;
 172     }
 173   }
 174   memcpy (s, "?=", 2), s += 2;
 175   return s - s0;
 176 }
 177
 178 static size_t q_encoder (char *s, ICONV_CONST char *d, size_t dlen,
 179                          const char *tocode)
 180 {
 181   char hex[] = "0123456789ABCDEF";
 182   char *s0 = s;
 183
 184   memcpy (s, "=?", 2), s += 2;
 185   memcpy (s, tocode, mutt_strlen (tocode)), s += mutt_strlen (tocode);
 186   memcpy (s, "?Q?", 3), s += 3;
 187   while (dlen--) {
 188     unsigned char c = *d++;
 189
 190     if (c == ' ')
 191       *s++ = '_';
 192     else if (c >= 0x7f || c < 0x20 || c == '_' || strchr (MimeSpecials, c)) {
 193       *s++ = '=';
 194       *s++ = hex[(c & 0xf0) >> 4];
 195       *s++ = hex[c & 0x0f];
 196     }
 197     else
 198       *s++ = c;
 199   }
 200   memcpy (s, "?=", 2), s += 2;
 201   return s - s0;
 202 }
 203
 204 /*
 205  * Return 0 if and set *encoder and *wlen if the data (d, dlen) could
 206  * be converted to an encoded word of length *wlen using *encoder.
 207  * Otherwise return an upper bound on the maximum length of the data
 208  * which could be converted.
 209  * The data is converted from fromcode (which must be stateless) to
 210  * tocode, unless fromcode is 0, in which case the data is assumed to
 211  * be already in tocode, which should be 8-bit and stateless.
 212  */
 213 static size_t try_block (ICONV_CONST char *d, size_t dlen,
 214                          const char *fromcode, const char *tocode,
 215                          encoder_t * encoder, size_t * wlen)
 216 {
 217   char buf1[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1];
 218   iconv_t cd;
 219   ICONV_CONST char *ib;
 220   char *ob, *p;
 221   size_t ibl, obl;
 222   int count, len, len_b, len_q;
 223
 224   if (fromcode) {
 225     cd = mutt_iconv_open (tocode, fromcode, 0);
 226     assert (cd != (iconv_t) (-1));
 227     ib = d, ibl = dlen, ob = buf1, obl = sizeof (buf1) - mutt_strlen (tocode);
 228     if (iconv (cd, &ib, &ibl, &ob, &obl) == (size_t) (-1) ||
 229         iconv (cd, 0, 0, &ob, &obl) == (size_t) (-1)) {
 230       assert (errno == E2BIG);
 231       iconv_close (cd);
 232       assert (ib > d);
 233       return (ib - d == dlen) ? dlen : ib - d + 1;
 234     }
 235     iconv_close (cd);
 236   }
 237   else {
 238     if (dlen > sizeof (buf1) - mutt_strlen (tocode))
 239       return sizeof (buf1) - mutt_strlen (tocode) + 1;
 240     memcpy (buf1, d, dlen);
 241     ob = buf1 + dlen;
 242   }
 243
 244   count = 0;
 245   for (p = buf1; p < ob; p++) {
 246     unsigned char c = *p;
 247
 248     assert (strchr (MimeSpecials, '?'));
 249     if (c >= 0x7f || c < 0x20 || *p == '_' ||
 250         (c != ' ' && strchr (MimeSpecials, *p)))
 251       ++count;
 252   }
 253
 254   len = ENCWORD_LEN_MIN - 2 + mutt_strlen (tocode);
 255   len_b = len + (((ob - buf1) + 2) / 3) * 4;
 256   len_q = len + (ob - buf1) + 2 * count;
 257
 258   /* Apparently RFC 1468 says to use B encoding for iso-2022-jp. */
 259   if (!ascii_strcasecmp (tocode, "ISO-2022-JP"))
 260     len_q = ENCWORD_LEN_MAX + 1;
 261
 262   if (len_b < len_q && len_b <= ENCWORD_LEN_MAX) {
 263     *encoder = b_encoder;
 264     *wlen = len_b;
 265     return 0;
 266   }
 267   else if (len_q <= ENCWORD_LEN_MAX) {
 268     *encoder = q_encoder;
 269     *wlen = len_q;
 270     return 0;
 271   }
 272   else
 273     return dlen;
 274 }
 275
 276 /*
 277  * Encode the data (d, dlen) into s using the encoder.
 278  * Return the length of the encoded word.
 279  */
 280 static size_t encode_block (char *s, char *d, size_t dlen,
 281                             const char *fromcode, const char *tocode,
 282                             encoder_t encoder)
 283 {
 284   char buf1[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1];
 285   iconv_t cd;
 286   ICONV_CONST char *ib;
 287   char *ob;
 288   size_t ibl, obl, n1, n2;
 289
 290   if (fromcode) {
 291     cd = mutt_iconv_open (tocode, fromcode, 0);
 292     assert (cd != (iconv_t) (-1));
 293     ib = d, ibl = dlen, ob = buf1, obl = sizeof (buf1) - mutt_strlen (tocode);
 294     n1 = iconv (cd, &ib, &ibl, &ob, &obl);
 295     n2 = iconv (cd, 0, 0, &ob, &obl);
 296     assert (n1 != (size_t) (-1) && n2 != (size_t) (-1));
 297     iconv_close (cd);
 298     return (*encoder) (s, buf1, ob - buf1, tocode);
 299   }
 300   else
 301     return (*encoder) (s, d, dlen, tocode);
 302 }
 303
 304 /*
 305  * Discover how much of the data (d, dlen) can be converted into
 306  * a single encoded word. Return how much data can be converted,
 307  * and set the length *wlen of the encoded word and *encoder.
 308  * We start in column col, which limits the length of the word.
 309  */
 310 static size_t choose_block (char *d, size_t dlen, int col,
 311                             const char *fromcode, const char *tocode,
 312                             encoder_t * encoder, size_t * wlen)
 313 {
 314   size_t n, nn;
 315   int utf8 = fromcode && !ascii_strcasecmp (fromcode, "UTF-8");
 316
 317   n = dlen;
 318   for (;;) {
 319     assert (d + n > d);
 320     nn = try_block (d, n, fromcode, tocode, encoder, wlen);
 321     if (!nn && (col + *wlen <= ENCWORD_LEN_MAX + 1 || n <= 1))
 322       break;
 323     n = (nn ? nn : n) - 1;
 324     assert (n > 0);
 325     if (utf8)
 326       while (n > 1 && CONTINUATION_BYTE (d[n]))
 327         --n;
 328   }
 329   return n;
 330 }
 331
 332 /*
 333  * Place the result of RFC-2047-encoding (d, dlen) into the dynamically
 334  * allocated buffer (e, elen). The input data is in charset fromcode
 335  * and is converted into a charset chosen from charsets.
 336  * Return 1 if the conversion to UTF-8 failed, 2 if conversion from UTF-8
 337  * failed, otherwise 0. If conversion failed, fromcode is assumed to be
 338  * compatible with us-ascii and the original data is used.
 339  * The input data is assumed to be a single line starting at column col;
 340  * if col is non-zero, the preceding character was a space.
 341  */
 342 static int rfc2047_encode (ICONV_CONST char *d, size_t dlen, int col,
 343                            const char *fromcode, const char *charsets,
 344                            char **e, size_t * elen, char *specials)
 345 {
 346   int ret = 0;
 347   char *buf;
 348   size_t bufpos, buflen;
 349   char *u, *t0, *t1, *t;
 350   char *s0, *s1;
 351   size_t ulen, r, n, wlen;
 352   encoder_t encoder;
 353   char *tocode1 = 0;
 354   const char *tocode;
 355   char *icode = "UTF-8";
 356
 357   /* Try to convert to UTF-8. */
 358   if (convert_string (d, dlen, fromcode, icode, &u, &ulen)) {
 359     ret = 1;
 360     icode = 0;
 361     u = safe_malloc ((ulen = dlen) + 1);
 362     memcpy (u, d, dlen);
 363     u[ulen] = 0;
 364   }
 365
 366   /* Find earliest and latest things we must encode. */
 367   s0 = s1 = t0 = t1 = 0;
 368   for (t = u; t < u + ulen; t++) {
 369     if ((*t & 0x80) ||
 370         (*t == '=' && t[1] == '?' && (t == u || HSPACE (*(t - 1))))) {
 371       if (!t0)
 372         t0 = t;
 373       t1 = t;
 374     }
 375     else if (specials && strchr (specials, *t)) {
 376       if (!s0)
 377         s0 = t;
 378       s1 = t;
 379     }
 380   }
 381
 382   /* If we have something to encode, include RFC822 specials */
 383   if (t0 && s0 && s0 < t0)
 384     t0 = s0;
 385   if (t1 && s1 && s1 > t1)
 386     t1 = s1;
 387
 388   if (!t0) {
 389     /* No encoding is required. */
 390     *e = u;
 391     *elen = ulen;
 392     return ret;
 393   }
 394
 395   /* Choose target charset. */
 396   tocode = fromcode;
 397   if (icode) {
 398     if ((tocode1 = mutt_choose_charset (icode, charsets, u, ulen, 0, 0)))
 399       tocode = tocode1;
 400     else
 401       ret = 2, icode = 0;
 402   }
 403
 404   /* Hack to avoid labelling 8-bit data as us-ascii. */
 405   if (!icode && mutt_is_us_ascii (tocode))
 406     tocode = "unknown-8bit";
 407
 408   /* Adjust t0 for maximum length of line. */
 409   t = u + (ENCWORD_LEN_MAX + 1) - col - ENCWORD_LEN_MIN;
 410   if (t < u)
 411     t = u;
 412   if (t < t0)
 413     t0 = t;
 414
 415
 416   /* Adjust t0 until we can encode a character after a space. */
 417   for (; t0 > u; t0--) {
 418     if (!HSPACE (*(t0 - 1)))
 419       continue;
 420     t = t0 + 1;
 421     if (icode)
 422       while (t < u + ulen && CONTINUATION_BYTE (*t))
 423         ++t;
 424     if (!try_block (t0, t - t0, icode, tocode, &encoder, &wlen) &&
 425         col + (t0 - u) + wlen <= ENCWORD_LEN_MAX + 1)
 426       break;
 427   }
 428
 429   /* Adjust t1 until we can encode a character before a space. */
 430   for (; t1 < u + ulen; t1++) {
 431     if (!HSPACE (*t1))
 432       continue;
 433     t = t1 - 1;
 434     if (icode)
 435       while (CONTINUATION_BYTE (*t))
 436         --t;
 437     if (!try_block (t, t1 - t, icode, tocode, &encoder, &wlen) &&
 438         1 + wlen + (u + ulen - t1) <= ENCWORD_LEN_MAX + 1)
 439       break;
 440   }
 441
 442   /* We shall encode the region [t0,t1). */
 443
 444   /* Initialise the output buffer with the us-ascii prefix. */
 445   buflen = 2 * ulen;
 446   buf = safe_malloc (buflen);
 447   bufpos = t0 - u;
 448   memcpy (buf, u, t0 - u);
 449
 450   col += t0 - u;
 451
 452   t = t0;
 453   for (;;) {
 454     /* Find how much we can encode. */
 455     n = choose_block (t, t1 - t, col, icode, tocode, &encoder, &wlen);
 456     if (n == t1 - t) {
 457       /* See if we can fit the us-ascii suffix, too. */
 458       if (col + wlen + (u + ulen - t1) <= ENCWORD_LEN_MAX + 1)
 459         break;
 460       n = t1 - t - 1;
 461       if (icode)
 462         while (CONTINUATION_BYTE (t[n]))
 463           --n;
 464       assert (t + n >= t);
 465       if (!n) {
 466         /* This should only happen in the really stupid case where the
 467            only word that needs encoding is one character long, but
 468            there is too much us-ascii stuff after it to use a single
 469            encoded word. We add the next word to the encoded region
 470            and try again. */
 471         assert (t1 < u + ulen);
 472         for (t1++; t1 < u + ulen && !HSPACE (*t1); t1++);
 473         continue;
 474       }
 475       n = choose_block (t, n, col, icode, tocode, &encoder, &wlen);
 476     }
 477
 478     /* Add to output buffer. */
 479 #define LINEBREAK "\n\t"
 480     if (bufpos + wlen + mutt_strlen (LINEBREAK) > buflen) {
 481       buflen = bufpos + wlen + mutt_strlen (LINEBREAK);
 482       safe_realloc (&buf, buflen);
 483     }
 484     r = encode_block (buf + bufpos, t, n, icode, tocode, encoder);
 485     assert (r == wlen);
 486     bufpos += wlen;
 487     memcpy (buf + bufpos, LINEBREAK, mutt_strlen (LINEBREAK));
 488     bufpos += mutt_strlen (LINEBREAK);
 489 #undef LINEBREAK
 490
 491     col = 1;
 492
 493     t += n;
 494   }
 495
 496   /* Add last encoded word and us-ascii suffix to buffer. */
 497   buflen = bufpos + wlen + (u + ulen - t1);
 498   safe_realloc (&buf, buflen + 1);
 499   r = encode_block (buf + bufpos, t, t1 - t, icode, tocode, encoder);
 500   assert (r == wlen);
 501   bufpos += wlen;
 502   memcpy (buf + bufpos, t1, u + ulen - t1);
 503
 504   FREE (&tocode1);
 505   FREE (&u);
 506
 507   buf[buflen] = '\0';
 508
 509   *e = buf;
 510   *elen = buflen + 1;
 511   return ret;
 512 }
 513
 514 void _rfc2047_encode_string (char **pd, int encode_specials, int col)
 515 {
 516   char *e;
 517   size_t elen;
 518   char *charsets;
 519
 520   if (!Charset || !*pd)
 521     return;
 522
 523   charsets = SendCharset;
 524   if (!charsets || !*charsets)
 525     charsets = "UTF-8";
 526
 527   rfc2047_encode (*pd, mutt_strlen (*pd), col,
 528                   Charset, charsets, &e, &elen,
 529                   encode_specials ? RFC822Specials : NULL);
 530
 531   FREE (pd);
 532   *pd = e;
 533 }
 534
 535 void rfc2047_encode_adrlist (ADDRESS * addr, const char *tag)
 536 {
 537   ADDRESS *ptr = addr;
 538   int col = tag ? mutt_strlen (tag) + 2 : 32;
 539
 540   while (ptr) {
 541     if (ptr->personal)
 542       _rfc2047_encode_string (&ptr->personal, 1, col);
 543 #ifdef EXACT_ADDRESS
 544     if (ptr->val)
 545       _rfc2047_encode_string (&ptr->val, 1, col);
 546 #endif
 547     ptr = ptr->next;
 548   }
 549 }
 550
 551 static int rfc2047_decode_word (char *d, const char *s, size_t len)
 552 {
 553   const char *pp, *pp1;
 554   char *pd, *d0;
 555   const char *t, *t1;
 556   int enc = 0, count = 0;
 557   char *charset = NULL;
 558
 559   pd = d0 = safe_malloc (mutt_strlen (s));
 560
 561   for (pp = s; (pp1 = strchr (pp, '?')); pp = pp1 + 1) {
 562     count++;
 563     switch (count) {
 564     case 2:
 565       /* ignore language specification a la RFC 2231 */
 566       t = pp1;
 567       if ((t1 = memchr (pp, '*', t - pp)))
 568         t = t1;
 569       charset = safe_malloc (t - pp + 1);
 570       memcpy (charset, pp, t - pp);
 571       charset[t - pp] = '\0';
 572       break;
 573     case 3:
 574       if (toupper ((unsigned char) *pp) == 'Q')
 575         enc = ENCQUOTEDPRINTABLE;
 576       else if (toupper ((unsigned char) *pp) == 'B')
 577         enc = ENCBASE64;
 578       else {
 579         FREE (&charset);
 580         FREE (&d0);
 581         return (-1);
 582       }
 583       break;
 584     case 4:
 585       if (enc == ENCQUOTEDPRINTABLE) {
 586         for (; pp < pp1; pp++) {
 587           if (*pp == '_')
 588             *pd++ = ' ';
 589           else if (*pp == '=' &&
 590                    (!(pp[1] & ~127) && hexval (pp[1]) != -1) &&
 591                    (!(pp[2] & ~127) && hexval (pp[2]) != -1)) {
 592             *pd++ = (hexval (pp[1]) << 4) | hexval (pp[2]);
 593             pp += 2;
 594           }
 595           else
 596             *pd++ = *pp;
 597         }
 598         *pd = 0;
 599       }
 600       else if (enc == ENCBASE64) {
 601         int c, b = 0, k = 0;
 602
 603         for (; pp < pp1; pp++) {
 604           if (*pp == '=')
 605             break;
 606           if ((*pp & ~127) || (c = base64val (*pp)) == -1)
 607             continue;
 608           if (k + 6 >= 8) {
 609             k -= 2;
 610             *pd++ = b | (c >> k);
 611             b = c << (8 - k);
 612           }
 613           else {
 614             b |= c << (k + 2);
 615             k += 6;
 616           }
 617         }
 618         *pd = 0;
 619       }
 620       break;
 621     }
 622   }
 623
 624   if (charset)
 625     mutt_convert_string (&d0, charset, Charset, M_ICONV_HOOK_FROM);
 626   strfcpy (d, d0, len);
 627   FREE (&charset);
 628   FREE (&d0);
 629   return (0);
 630 }
 631
 632 /*
 633  * Find the start and end of the first encoded word in the string.
 634  * We use the grammar in section 2 of RFC 2047, but the "encoding"
 635  * must be B or Q. Also, we don't require the encoded word to be
 636  * separated by linear-white-space (section 5(1)).
 637  */
 638 static const char *find_encoded_word (const char *s, const char **x)
 639 {
 640   const char *p, *q;
 641
 642   q = s;
 643   while ((p = strstr (q, "=?"))) {
 644     for (q = p + 2;
 645          0x20 < *q && *q < 0x7f && !strchr ("()<>@,;:\"/[]?.=", *q); q++);
 646     if (q[0] != '?' || !strchr ("BbQq", q[1]) || q[2] != '?')
 647       continue;
 648     for (q = q + 3; 0x20 <= *q && *q < 0x7f && *q != '?'; q++);
 649     if (q[0] != '?' || q[1] != '=') {
 650       --q;
 651       continue;
 652     }
 653
 654     *x = q + 2;
 655     return p;
 656   }
 657
 658   return 0;
 659 }
 660
 661 /* return length of linear white space */
 662 static size_t lwslen (const char *s, size_t n)
 663 {
 664   const char *p = s;
 665   size_t len = n;
 666
 667   if (n <= 0)
 668     return 0;
 669
 670   for (; p < s + n; p++)
 671     if (!strchr (" \t\r\n", *p)) {
 672       len = (size_t) (p - s);
 673       break;
 674     }
 675   if (strchr ("\r\n", *(p - 1)))        /* LWS doesn't end with CRLF */
 676     len = (size_t) 0;
 677   return len;
 678 }
 679
 680 /* return length of linear white space : reverse */
 681 static size_t lwsrlen (const char *s, size_t n)
 682 {
 683   const char *p = s + n - 1;
 684   size_t len = n;
 685
 686   if (n <= 0)
 687     return 0;
 688
 689   if (strchr ("\r\n", *p))      /* LWS doesn't end with CRLF */
 690     return (size_t) 0;
 691
 692   for (; p >= s; p--)
 693     if (!strchr (" \t\r\n", *p)) {
 694       len = (size_t) (s + n - 1 - p);
 695       break;
 696     }
 697   return len;
 698 }
 699
 700 /* try to decode anything that looks like a valid RFC2047 encoded
 701  * header field, ignoring RFC822 parsing rules
 702  */
 703 void rfc2047_decode (char **pd)
 704 {
 705   const char *p, *q;
 706   size_t m, n;
 707   int found_encoded = 0;
 708   char *d0, *d;
 709   const char *s = *pd;
 710   size_t dlen;
 711
 712   if (!s || !*s)
 713     return;
 714
 715   dlen = 4 * mutt_strlen (s);        /* should be enough */
 716   d = d0 = safe_malloc (dlen + 1);
 717
 718   while (*s && dlen > 0) {
 719     if (!(p = find_encoded_word (s, &q))) {
 720       /* no encoded words */
 721       if (!option (OPTSTRICTMIME)) {
 722         n = mutt_strlen (s);
 723         if (found_encoded && (m = lwslen (s, n)) != 0) {
 724           if (m != n)
 725             *d = ' ', d++, dlen--;
 726           n -= m, s += m;
 727         }
 728         if (ascii_strcasecmp (AssumedCharset, "us-ascii")) {
 729           char *t;
 730           size_t tlen;
 731
 732           t = safe_malloc (n + 1);
 733           strfcpy (t, s, n + 1);
 734           if (mutt_convert_nonmime_string (&t) == 0) {
 735             tlen = mutt_strlen (t);
 736             strncpy (d, t, tlen);
 737             d += tlen;
 738           }
 739           else {
 740             strncpy (d, s, n);
 741             d += n;
 742           }
 743           FREE (&t);
 744           break;
 745         }
 746       }
 747       strncpy (d, s, dlen);
 748       d += dlen;
 749       break;
 750     }
 751
 752     if (p != s) {
 753       n = (size_t) (p - s);
 754       /* ignore spaces between encoded words
 755        * and linear white spaces between encoded word and *text */
 756       if (!option (OPTSTRICTMIME)) {
 757         if (found_encoded && (m = lwslen (s, n)) != 0) {
 758           if (m != n)
 759             *d = ' ', d++, dlen--;
 760           n -= m, s += m;
 761         }
 762
 763         if ((m = n - lwsrlen (s, n)) != 0) {
 764           if (m > dlen)
 765             m = dlen;
 766           memcpy (d, s, m);
 767           d += m;
 768           dlen -= m;
 769           if (m != n)
 770             *d = ' ', d++, dlen--;
 771         }
 772       }
 773       else if (!found_encoded || strspn (s, " \t\r\n") != n) {
 774         if (n > dlen)
 775           n = dlen;
 776         memcpy (d, s, n);
 777         d += n;
 778         dlen -= n;
 779       }
 780     }
 781
 782     rfc2047_decode_word (d, p, dlen);
 783     found_encoded = 1;
 784     s = q;
 785     n = mutt_strlen (d);
 786     dlen -= n;
 787     d += n;
 788   }
 789   *d = 0;
 790
 791   FREE (pd);
 792   *pd = d0;
 793   mutt_str_adjust (pd);
 794 }
 795
 796 void rfc2047_decode_adrlist (ADDRESS * a)
 797 {
 798   while (a) {
 799     if (a->personal)
 800       rfc2047_decode (&a->personal);
 801 #ifdef EXACT_ADDRESS
 802     if (a->val && strstr (a->val, "=?") != NULL)
 803       rfc2047_decode (&a->val);
 804 #endif
 805     a = a->next;
 806   }
 807 }