rfc2047.c

   1 /*
   2  * Copyright (C) 1996-2000 Michael R. Elkins <me@mutt.org>
   3  * Copyright (C) 2000-2001 Edmund Grimley Evans <edmundo@rano.org>
   4  *
   5  *     This program is free software; you can redistribute it and/or modify
   6  *     it under the terms of the GNU General Public License as published by
   7  *     the Free Software Foundation; either version 2 of the License, or
   8  *     (at your option) any later version.
   9  *
  10  *     This program is distributed in the hope that it will be useful,
  11  *     but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13  *     GNU General Public License for more details.
  14  *
  15  *     You should have received a copy of the GNU General Public License
  16  *     along with this program; if not, write to the Free Software
  17  *     Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
  18  */
  19
  20 #if HAVE_CONFIG_H
  21 # include "config.h"
  22 #endif
  23
  24 #include "mutt.h"
  25 #include "mime.h"
  26 #include "charset.h"
  27 #include "rfc2047.h"
  28
  29 #include <ctype.h>
  30 #include <errno.h>
  31 #include <stdio.h>
  32 #include <stdlib.h>
  33 #include <string.h>
  34
  35 /* If you are debugging this file, comment out the following line. */
  36 /*#define NDEBUG*/
  37
  38 #ifdef NDEBUG
  39 #define assert(x)
  40 #else
  41 #include <assert.h>
  42 #endif
  43
  44 #define ENCWORD_LEN_MAX 75
  45 #define ENCWORD_LEN_MIN 9       /* strlen ("=?.?.?.?=") */
  46
  47 #define HSPACE(x) ((x) == '\0' || (x) == ' ' || (x) == '\t')
  48
  49 #define CONTINUATION_BYTE(c) (((c) & 0xc0) == 0x80)
  50
  51 extern char RFC822Specials[];
  52
  53 typedef size_t (*encoder_t) (char *, ICONV_CONST char *, size_t,
  54                              const char *);
  55
  56 static size_t convert_string (ICONV_CONST char *f, size_t flen,
  57                               const char *from, const char *to,
  58                               char **t, size_t * tlen)
  59 {
  60   iconv_t cd;
  61   char *buf, *ob;
  62   size_t obl, n;
  63   int e;
  64
  65   cd = mutt_iconv_open (to, from, 0);
  66   if (cd == (iconv_t) (-1))
  67     return (size_t) (-1);
  68   obl = 4 * flen + 1;
  69   ob = buf = safe_malloc (obl);
  70   n = iconv (cd, &f, &flen, &ob, &obl);
  71   if (n == (size_t) (-1) || iconv (cd, 0, 0, &ob, &obl) == (size_t) (-1)) {
  72     e = errno;
  73     FREE (&buf);
  74     iconv_close (cd);
  75     errno = e;
  76     return (size_t) (-1);
  77   }
  78   *ob = '\0';
  79
  80   *tlen = ob - buf;
  81
  82   safe_realloc (&buf, ob - buf + 1);
  83   *t = buf;
  84   iconv_close (cd);
  85
  86   return n;
  87 }
  88
  89 char *mutt_choose_charset (const char *fromcode, const char *charsets,
  90                            char *u, size_t ulen, char **d, size_t * dlen)
  91 {
  92   char canonical_buff[LONG_STRING];
  93   char *e = 0, *tocode = 0;
  94   size_t elen = 0, bestn = 0;
  95   const char *p, *q;
  96
  97   for (p = charsets; p; p = q ? q + 1 : 0) {
  98     char *s, *t;
  99     size_t slen, n;
 100
 101     q = strchr (p, ':');
 102
 103     n = q ? q - p : strlen (p);
 104
 105     if (!n ||
 106         /* Assume that we never need more than 12 characters of
 107            encoded-text to encode a single character. */
 108         n > (ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 2 - 12))
 109       continue;
 110
 111     t = safe_malloc (n + 1);
 112     memcpy (t, p, n);
 113     t[n] = '\0';
 114
 115     n = convert_string (u, ulen, fromcode, t, &s, &slen);
 116     if (n == (size_t) (-1))
 117       continue;
 118
 119     if (!tocode || n < bestn) {
 120       bestn = n;
 121       FREE (&tocode);
 122       tocode = t;
 123       if (d) {
 124         FREE (&e);
 125         e = s;
 126       }
 127       else
 128         FREE (&s);
 129       elen = slen;
 130       if (!bestn)
 131         break;
 132     }
 133     else {
 134       FREE (&t);
 135       FREE (&s);
 136     }
 137   }
 138   if (tocode) {
 139     if (d)
 140       *d = e;
 141     if (dlen)
 142       *dlen = elen;
 143
 144     mutt_canonical_charset (canonical_buff, sizeof (canonical_buff), tocode);
 145     mutt_str_replace (&tocode, canonical_buff);
 146   }
 147   return tocode;
 148 }
 149
 150 static size_t b_encoder (char *s, ICONV_CONST char *d, size_t dlen,
 151                          const char *tocode)
 152 {
 153   char *s0 = s;
 154
 155   memcpy (s, "=?", 2), s += 2;
 156   memcpy (s, tocode, strlen (tocode)), s += strlen (tocode);
 157   memcpy (s, "?B?", 3), s += 3;
 158   for (;;) {
 159     if (!dlen)
 160       break;
 161     else if (dlen == 1) {
 162       *s++ = B64Chars[(*d >> 2) & 0x3f];
 163       *s++ = B64Chars[(*d & 0x03) << 4];
 164       *s++ = '=';
 165       *s++ = '=';
 166       break;
 167     }
 168     else if (dlen == 2) {
 169       *s++ = B64Chars[(*d >> 2) & 0x3f];
 170       *s++ = B64Chars[((*d & 0x03) << 4) | ((d[1] >> 4) & 0x0f)];
 171       *s++ = B64Chars[(d[1] & 0x0f) << 2];
 172       *s++ = '=';
 173       break;
 174     }
 175     else {
 176       *s++ = B64Chars[(*d >> 2) & 0x3f];
 177       *s++ = B64Chars[((*d & 0x03) << 4) | ((d[1] >> 4) & 0x0f)];
 178       *s++ = B64Chars[((d[1] & 0x0f) << 2) | ((d[2] >> 6) & 0x03)];
 179       *s++ = B64Chars[d[2] & 0x3f];
 180       d += 3, dlen -= 3;
 181     }
 182   }
 183   memcpy (s, "?=", 2), s += 2;
 184   return s - s0;
 185 }
 186
 187 static size_t q_encoder (char *s, ICONV_CONST char *d, size_t dlen,
 188                          const char *tocode)
 189 {
 190   char hex[] = "0123456789ABCDEF";
 191   char *s0 = s;
 192
 193   memcpy (s, "=?", 2), s += 2;
 194   memcpy (s, tocode, strlen (tocode)), s += strlen (tocode);
 195   memcpy (s, "?Q?", 3), s += 3;
 196   while (dlen--) {
 197     unsigned char c = *d++;
 198
 199     if (c == ' ')
 200       *s++ = '_';
 201     else if (c >= 0x7f || c < 0x20 || c == '_' || strchr (MimeSpecials, c)) {
 202       *s++ = '=';
 203       *s++ = hex[(c & 0xf0) >> 4];
 204       *s++ = hex[c & 0x0f];
 205     }
 206     else
 207       *s++ = c;
 208   }
 209   memcpy (s, "?=", 2), s += 2;
 210   return s - s0;
 211 }
 212
 213 /*
 214  * Return 0 if and set *encoder and *wlen if the data (d, dlen) could
 215  * be converted to an encoded word of length *wlen using *encoder.
 216  * Otherwise return an upper bound on the maximum length of the data
 217  * which could be converted.
 218  * The data is converted from fromcode (which must be stateless) to
 219  * tocode, unless fromcode is 0, in which case the data is assumed to
 220  * be already in tocode, which should be 8-bit and stateless.
 221  */
 222 static size_t try_block (ICONV_CONST char *d, size_t dlen,
 223                          const char *fromcode, const char *tocode,
 224                          encoder_t * encoder, size_t * wlen)
 225 {
 226   char buf1[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1];
 227   iconv_t cd;
 228   ICONV_CONST char *ib;
 229   char *ob, *p;
 230   size_t ibl, obl;
 231   int count, len, len_b, len_q;
 232
 233   if (fromcode) {
 234     cd = mutt_iconv_open (tocode, fromcode, 0);
 235     assert (cd != (iconv_t) (-1));
 236     ib = d, ibl = dlen, ob = buf1, obl = sizeof (buf1) - strlen (tocode);
 237     if (iconv (cd, &ib, &ibl, &ob, &obl) == (size_t) (-1) ||
 238         iconv (cd, 0, 0, &ob, &obl) == (size_t) (-1)) {
 239       assert (errno == E2BIG);
 240       iconv_close (cd);
 241       assert (ib > d);
 242       return (ib - d == dlen) ? dlen : ib - d + 1;
 243     }
 244     iconv_close (cd);
 245   }
 246   else {
 247     if (dlen > sizeof (buf1) - strlen (tocode))
 248       return sizeof (buf1) - strlen (tocode) + 1;
 249     memcpy (buf1, d, dlen);
 250     ob = buf1 + dlen;
 251   }
 252
 253   count = 0;
 254   for (p = buf1; p < ob; p++) {
 255     unsigned char c = *p;
 256
 257     assert (strchr (MimeSpecials, '?'));
 258     if (c >= 0x7f || c < 0x20 || *p == '_' ||
 259         (c != ' ' && strchr (MimeSpecials, *p)))
 260       ++count;
 261   }
 262
 263   len = ENCWORD_LEN_MIN - 2 + strlen (tocode);
 264   len_b = len + (((ob - buf1) + 2) / 3) * 4;
 265   len_q = len + (ob - buf1) + 2 * count;
 266
 267   /* Apparently RFC 1468 says to use B encoding for iso-2022-jp. */
 268   if (!ascii_strcasecmp (tocode, "ISO-2022-JP"))
 269     len_q = ENCWORD_LEN_MAX + 1;
 270
 271   if (len_b < len_q && len_b <= ENCWORD_LEN_MAX) {
 272     *encoder = b_encoder;
 273     *wlen = len_b;
 274     return 0;
 275   }
 276   else if (len_q <= ENCWORD_LEN_MAX) {
 277     *encoder = q_encoder;
 278     *wlen = len_q;
 279     return 0;
 280   }
 281   else
 282     return dlen;
 283 }
 284
 285 /*
 286  * Encode the data (d, dlen) into s using the encoder.
 287  * Return the length of the encoded word.
 288  */
 289 static size_t encode_block (char *s, char *d, size_t dlen,
 290                             const char *fromcode, const char *tocode,
 291                             encoder_t encoder)
 292 {
 293   char buf1[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1];
 294   iconv_t cd;
 295   ICONV_CONST char *ib;
 296   char *ob;
 297   size_t ibl, obl, n1, n2;
 298
 299   if (fromcode) {
 300     cd = mutt_iconv_open (tocode, fromcode, 0);
 301     assert (cd != (iconv_t) (-1));
 302     ib = d, ibl = dlen, ob = buf1, obl = sizeof (buf1) - strlen (tocode);
 303     n1 = iconv (cd, &ib, &ibl, &ob, &obl);
 304     n2 = iconv (cd, 0, 0, &ob, &obl);
 305     assert (n1 != (size_t) (-1) && n2 != (size_t) (-1));
 306     iconv_close (cd);
 307     return (*encoder) (s, buf1, ob - buf1, tocode);
 308   }
 309   else
 310     return (*encoder) (s, d, dlen, tocode);
 311 }
 312
 313 /*
 314  * Discover how much of the data (d, dlen) can be converted into
 315  * a single encoded word. Return how much data can be converted,
 316  * and set the length *wlen of the encoded word and *encoder.
 317  * We start in column col, which limits the length of the word.
 318  */
 319 static size_t choose_block (char *d, size_t dlen, int col,
 320                             const char *fromcode, const char *tocode,
 321                             encoder_t * encoder, size_t * wlen)
 322 {
 323   size_t n, nn;
 324   int utf8 = fromcode && !ascii_strcasecmp (fromcode, "UTF-8");
 325
 326   n = dlen;
 327   for (;;) {
 328     assert (d + n > d);
 329     nn = try_block (d, n, fromcode, tocode, encoder, wlen);
 330     if (!nn && (col + *wlen <= ENCWORD_LEN_MAX + 1 || n <= 1))
 331       break;
 332     n = (nn ? nn : n) - 1;
 333     assert (n > 0);
 334     if (utf8)
 335       while (n > 1 && CONTINUATION_BYTE (d[n]))
 336         --n;
 337   }
 338   return n;
 339 }
 340
 341 /*
 342  * Place the result of RFC-2047-encoding (d, dlen) into the dynamically
 343  * allocated buffer (e, elen). The input data is in charset fromcode
 344  * and is converted into a charset chosen from charsets.
 345  * Return 1 if the conversion to UTF-8 failed, 2 if conversion from UTF-8
 346  * failed, otherwise 0. If conversion failed, fromcode is assumed to be
 347  * compatible with us-ascii and the original data is used.
 348  * The input data is assumed to be a single line starting at column col;
 349  * if col is non-zero, the preceding character was a space.
 350  */
 351 static int rfc2047_encode (ICONV_CONST char *d, size_t dlen, int col,
 352                            const char *fromcode, const char *charsets,
 353                            char **e, size_t * elen, char *specials)
 354 {
 355   int ret = 0;
 356   char *buf;
 357   size_t bufpos, buflen;
 358   char *u, *t0, *t1, *t;
 359   char *s0, *s1;
 360   size_t ulen, r, n, wlen;
 361   encoder_t encoder;
 362   char *tocode1 = 0;
 363   const char *tocode;
 364   char *icode = "UTF-8";
 365
 366   /* Try to convert to UTF-8. */
 367   if (convert_string (d, dlen, fromcode, icode, &u, &ulen)) {
 368     ret = 1;
 369     icode = 0;
 370     u = safe_malloc ((ulen = dlen) + 1);
 371     memcpy (u, d, dlen);
 372     u[ulen] = 0;
 373   }
 374
 375   /* Find earliest and latest things we must encode. */
 376   s0 = s1 = t0 = t1 = 0;
 377   for (t = u; t < u + ulen; t++) {
 378     if ((*t & 0x80) ||
 379         (*t == '=' && t[1] == '?' && (t == u || HSPACE (*(t - 1))))) {
 380       if (!t0)
 381         t0 = t;
 382       t1 = t;
 383     }
 384     else if (specials && strchr (specials, *t)) {
 385       if (!s0)
 386         s0 = t;
 387       s1 = t;
 388     }
 389   }
 390
 391   /* If we have something to encode, include RFC822 specials */
 392   if (t0 && s0 && s0 < t0)
 393     t0 = s0;
 394   if (t1 && s1 && s1 > t1)
 395     t1 = s1;
 396
 397   if (!t0) {
 398     /* No encoding is required. */
 399     *e = u;
 400     *elen = ulen;
 401     return ret;
 402   }
 403
 404   /* Choose target charset. */
 405   tocode = fromcode;
 406   if (icode) {
 407     if ((tocode1 = mutt_choose_charset (icode, charsets, u, ulen, 0, 0)))
 408       tocode = tocode1;
 409     else
 410       ret = 2, icode = 0;
 411   }
 412
 413   /* Hack to avoid labelling 8-bit data as us-ascii. */
 414   if (!icode && mutt_is_us_ascii (tocode))
 415     tocode = "unknown-8bit";
 416
 417   /* Adjust t0 for maximum length of line. */
 418   t = u + (ENCWORD_LEN_MAX + 1) - col - ENCWORD_LEN_MIN;
 419   if (t < u)
 420     t = u;
 421   if (t < t0)
 422     t0 = t;
 423
 424
 425   /* Adjust t0 until we can encode a character after a space. */
 426   for (; t0 > u; t0--) {
 427     if (!HSPACE (*(t0 - 1)))
 428       continue;
 429     t = t0 + 1;
 430     if (icode)
 431       while (t < u + ulen && CONTINUATION_BYTE (*t))
 432         ++t;
 433     if (!try_block (t0, t - t0, icode, tocode, &encoder, &wlen) &&
 434         col + (t0 - u) + wlen <= ENCWORD_LEN_MAX + 1)
 435       break;
 436   }
 437
 438   /* Adjust t1 until we can encode a character before a space. */
 439   for (; t1 < u + ulen; t1++) {
 440     if (!HSPACE (*t1))
 441       continue;
 442     t = t1 - 1;
 443     if (icode)
 444       while (CONTINUATION_BYTE (*t))
 445         --t;
 446     if (!try_block (t, t1 - t, icode, tocode, &encoder, &wlen) &&
 447         1 + wlen + (u + ulen - t1) <= ENCWORD_LEN_MAX + 1)
 448       break;
 449   }
 450
 451   /* We shall encode the region [t0,t1). */
 452
 453   /* Initialise the output buffer with the us-ascii prefix. */
 454   buflen = 2 * ulen;
 455   buf = safe_malloc (buflen);
 456   bufpos = t0 - u;
 457   memcpy (buf, u, t0 - u);
 458
 459   col += t0 - u;
 460
 461   t = t0;
 462   for (;;) {
 463     /* Find how much we can encode. */
 464     n = choose_block (t, t1 - t, col, icode, tocode, &encoder, &wlen);
 465     if (n == t1 - t) {
 466       /* See if we can fit the us-ascii suffix, too. */
 467       if (col + wlen + (u + ulen - t1) <= ENCWORD_LEN_MAX + 1)
 468         break;
 469       n = t1 - t - 1;
 470       if (icode)
 471         while (CONTINUATION_BYTE (t[n]))
 472           --n;
 473       assert (t + n >= t);
 474       if (!n) {
 475         /* This should only happen in the really stupid case where the
 476            only word that needs encoding is one character long, but
 477            there is too much us-ascii stuff after it to use a single
 478            encoded word. We add the next word to the encoded region
 479            and try again. */
 480         assert (t1 < u + ulen);
 481         for (t1++; t1 < u + ulen && !HSPACE (*t1); t1++);
 482         continue;
 483       }
 484       n = choose_block (t, n, col, icode, tocode, &encoder, &wlen);
 485     }
 486
 487     /* Add to output buffer. */
 488 #define LINEBREAK "\n\t"
 489     if (bufpos + wlen + strlen (LINEBREAK) > buflen) {
 490       buflen = bufpos + wlen + strlen (LINEBREAK);
 491       safe_realloc (&buf, buflen);
 492     }
 493     r = encode_block (buf + bufpos, t, n, icode, tocode, encoder);
 494     assert (r == wlen);
 495     bufpos += wlen;
 496     memcpy (buf + bufpos, LINEBREAK, strlen (LINEBREAK));
 497     bufpos += strlen (LINEBREAK);
 498 #undef LINEBREAK
 499
 500     col = 1;
 501
 502     t += n;
 503   }
 504
 505   /* Add last encoded word and us-ascii suffix to buffer. */
 506   buflen = bufpos + wlen + (u + ulen - t1);
 507   safe_realloc (&buf, buflen + 1);
 508   r = encode_block (buf + bufpos, t, t1 - t, icode, tocode, encoder);
 509   assert (r == wlen);
 510   bufpos += wlen;
 511   memcpy (buf + bufpos, t1, u + ulen - t1);
 512
 513   FREE (&tocode1);
 514   FREE (&u);
 515
 516   buf[buflen] = '\0';
 517
 518   *e = buf;
 519   *elen = buflen + 1;
 520   return ret;
 521 }
 522
 523 void _rfc2047_encode_string (char **pd, int encode_specials, int col)
 524 {
 525   char *e;
 526   size_t elen;
 527   char *charsets;
 528
 529   if (!Charset || !*pd)
 530     return;
 531
 532   charsets = SendCharset;
 533   if (!charsets || !*charsets)
 534     charsets = "UTF-8";
 535
 536   rfc2047_encode (*pd, strlen (*pd), col,
 537                   Charset, charsets, &e, &elen,
 538                   encode_specials ? RFC822Specials : NULL);
 539
 540   FREE (pd);
 541   *pd = e;
 542 }
 543
 544 void rfc2047_encode_adrlist (ADDRESS * addr, const char *tag)
 545 {
 546   ADDRESS *ptr = addr;
 547   int col = tag ? strlen (tag) + 2 : 32;
 548
 549   while (ptr) {
 550     if (ptr->personal)
 551       _rfc2047_encode_string (&ptr->personal, 1, col);
 552 #ifdef EXACT_ADDRESS
 553     if (ptr->val)
 554       _rfc2047_encode_string (&ptr->val, 1, col);
 555 #endif
 556     ptr = ptr->next;
 557   }
 558 }
 559
 560 static int rfc2047_decode_word (char *d, const char *s, size_t len)
 561 {
 562   const char *pp, *pp1;
 563   char *pd, *d0;
 564   const char *t, *t1;
 565   int enc = 0, count = 0;
 566   char *charset = NULL;
 567
 568   pd = d0 = safe_malloc (strlen (s));
 569
 570   for (pp = s; (pp1 = strchr (pp, '?')); pp = pp1 + 1) {
 571     count++;
 572     switch (count) {
 573     case 2:
 574       /* ignore language specification a la RFC 2231 */
 575       t = pp1;
 576       if ((t1 = memchr (pp, '*', t - pp)))
 577         t = t1;
 578       charset = safe_malloc (t - pp + 1);
 579       memcpy (charset, pp, t - pp);
 580       charset[t - pp] = '\0';
 581       break;
 582     case 3:
 583       if (toupper ((unsigned char) *pp) == 'Q')
 584         enc = ENCQUOTEDPRINTABLE;
 585       else if (toupper ((unsigned char) *pp) == 'B')
 586         enc = ENCBASE64;
 587       else {
 588         FREE (&charset);
 589         FREE (&d0);
 590         return (-1);
 591       }
 592       break;
 593     case 4:
 594       if (enc == ENCQUOTEDPRINTABLE) {
 595         for (; pp < pp1; pp++) {
 596           if (*pp == '_')
 597             *pd++ = ' ';
 598           else if (*pp == '=' &&
 599                    (!(pp[1] & ~127) && hexval (pp[1]) != -1) &&
 600                    (!(pp[2] & ~127) && hexval (pp[2]) != -1)) {
 601             *pd++ = (hexval (pp[1]) << 4) | hexval (pp[2]);
 602             pp += 2;
 603           }
 604           else
 605             *pd++ = *pp;
 606         }
 607         *pd = 0;
 608       }
 609       else if (enc == ENCBASE64) {
 610         int c, b = 0, k = 0;
 611
 612         for (; pp < pp1; pp++) {
 613           if (*pp == '=')
 614             break;
 615           if ((*pp & ~127) || (c = base64val (*pp)) == -1)
 616             continue;
 617           if (k + 6 >= 8) {
 618             k -= 2;
 619             *pd++ = b | (c >> k);
 620             b = c << (8 - k);
 621           }
 622           else {
 623             b |= c << (k + 2);
 624             k += 6;
 625           }
 626         }
 627         *pd = 0;
 628       }
 629       break;
 630     }
 631   }
 632
 633   if (charset)
 634     mutt_convert_string (&d0, charset, Charset, M_ICONV_HOOK_FROM);
 635   strfcpy (d, d0, len);
 636   FREE (&charset);
 637   FREE (&d0);
 638   return (0);
 639 }
 640
 641 /*
 642  * Find the start and end of the first encoded word in the string.
 643  * We use the grammar in section 2 of RFC 2047, but the "encoding"
 644  * must be B or Q. Also, we don't require the encoded word to be
 645  * separated by linear-white-space (section 5(1)).
 646  */
 647 static const char *find_encoded_word (const char *s, const char **x)
 648 {
 649   const char *p, *q;
 650
 651   q = s;
 652   while ((p = strstr (q, "=?"))) {
 653     for (q = p + 2;
 654          0x20 < *q && *q < 0x7f && !strchr ("()<>@,;:\"/[]?.=", *q); q++);
 655     if (q[0] != '?' || !strchr ("BbQq", q[1]) || q[2] != '?')
 656       continue;
 657     for (q = q + 3; 0x20 <= *q && *q < 0x7f && *q != '?'; q++);
 658     if (q[0] != '?' || q[1] != '=') {
 659       --q;
 660       continue;
 661     }
 662
 663     *x = q + 2;
 664     return p;
 665   }
 666
 667   return 0;
 668 }
 669
 670 /* return length of linear white space */
 671 static size_t lwslen (const char *s, size_t n)
 672 {
 673   const char *p = s;
 674   size_t len = n;
 675
 676   if (n <= 0)
 677     return 0;
 678
 679   for (; p < s + n; p++)
 680     if (!strchr (" \t\r\n", *p)) {
 681       len = (size_t) (p - s);
 682       break;
 683     }
 684   if (strchr ("\r\n", *(p - 1)))        /* LWS doesn't end with CRLF */
 685     len = (size_t) 0;
 686   return len;
 687 }
 688
 689 /* return length of linear white space : reverse */
 690 static size_t lwsrlen (const char *s, size_t n)
 691 {
 692   const char *p = s + n - 1;
 693   size_t len = n;
 694
 695   if (n <= 0)
 696     return 0;
 697
 698   if (strchr ("\r\n", *p))      /* LWS doesn't end with CRLF */
 699     return (size_t) 0;
 700
 701   for (; p >= s; p--)
 702     if (!strchr (" \t\r\n", *p)) {
 703       len = (size_t) (s + n - 1 - p);
 704       break;
 705     }
 706   return len;
 707 }
 708
 709 /* try to decode anything that looks like a valid RFC2047 encoded
 710  * header field, ignoring RFC822 parsing rules
 711  */
 712 void rfc2047_decode (char **pd)
 713 {
 714   const char *p, *q;
 715   size_t m, n;
 716   int found_encoded = 0;
 717   char *d0, *d;
 718   const char *s = *pd;
 719   size_t dlen;
 720
 721   if (!s || !*s)
 722     return;
 723
 724   dlen = 4 * strlen (s);        /* should be enough */
 725   d = d0 = safe_malloc (dlen + 1);
 726
 727   while (*s && dlen > 0) {
 728     if (!(p = find_encoded_word (s, &q))) {
 729       /* no encoded words */
 730       if (!option (OPTSTRICTMIME)) {
 731         n = mutt_strlen (s);
 732         if (found_encoded && (m = lwslen (s, n)) != 0) {
 733           if (m != n)
 734             *d = ' ', d++, dlen--;
 735           n -= m, s += m;
 736         }
 737         if (ascii_strcasecmp (AssumedCharset, "us-ascii")) {
 738           char *t;
 739           size_t tlen;
 740
 741           t = safe_malloc (n + 1);
 742           strfcpy (t, s, n + 1);
 743           if (mutt_convert_nonmime_string (&t) == 0) {
 744             tlen = mutt_strlen (t);
 745             strncpy (d, t, tlen);
 746             d += tlen;
 747           }
 748           else {
 749             strncpy (d, s, n);
 750             d += n;
 751           }
 752           FREE (&t);
 753           break;
 754         }
 755       }
 756       strncpy (d, s, dlen);
 757       d += dlen;
 758       break;
 759     }
 760
 761     if (p != s) {
 762       n = (size_t) (p - s);
 763       /* ignore spaces between encoded words
 764        * and linear white spaces between encoded word and *text */
 765       if (!option (OPTSTRICTMIME)) {
 766         if (found_encoded && (m = lwslen (s, n)) != 0) {
 767           if (m != n)
 768             *d = ' ', d++, dlen--;
 769           n -= m, s += m;
 770         }
 771
 772         if ((m = n - lwsrlen (s, n)) != 0) {
 773           if (m > dlen)
 774             m = dlen;
 775           memcpy (d, s, m);
 776           d += m;
 777           dlen -= m;
 778           if (m != n)
 779             *d = ' ', d++, dlen--;
 780         }
 781       }
 782       else if (!found_encoded || strspn (s, " \t\r\n") != n) {
 783         if (n > dlen)
 784           n = dlen;
 785         memcpy (d, s, n);
 786         d += n;
 787         dlen -= n;
 788       }
 789     }
 790
 791     rfc2047_decode_word (d, p, dlen);
 792     found_encoded = 1;
 793     s = q;
 794     n = mutt_strlen (d);
 795     dlen -= n;
 796     d += n;
 797   }
 798   *d = 0;
 799
 800   FREE (pd);
 801   *pd = d0;
 802   mutt_str_adjust (pd);
 803 }
 804
 805 void rfc2047_decode_adrlist (ADDRESS * a)
 806 {
 807   while (a) {
 808     if (a->personal)
 809       rfc2047_decode (&a->personal);
 810 #ifdef EXACT_ADDRESS
 811     if (a->val && strstr (a->val, "=?") != NULL)
 812       rfc2047_decode (&a->val);
 813 #endif
 814     a = a->next;
 815   }
 816 }