lib-mime/rfc2047.c

   1 /*
   2  *  This program is free software; you can redistribute it and/or modify
   3  *  it under the terms of the GNU General Public License as published by
   4  *  the Free Software Foundation; either version 2 of the License, or (at
   5  *  your option) any later version.
   6  *
   7  *  This program is distributed in the hope that it will be useful, but
   8  *  WITHOUT ANY WARRANTY; without even the implied warranty of
   9  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  10  *  General Public License for more details.
  11  *
  12  *  You should have received a copy of the GNU General Public License
  13  *  along with this program; if not, write to the Free Software
  14  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  15  *  MA 02110-1301, USA.
  16  *
  17  *  Copyright © 2006 Pierre Habouzit
  18  */
  19
  20 /*
  21  * Copyright notice from original mutt:
  22  * Copyright (C) 1996-2000 Michael R. Elkins <me@mutt.org>
  23  * Copyright (C) 2000-2001 Edmund Grimley Evans <edmundo@rano.org>
  24  *
  25  * This file is part of mutt-ng, see http://www.muttng.org/.
  26  * It's licensed under the GNU General Public License,
  27  * please see the file GPL in the top level source directory.
  28  */
  29
  30 #include <lib-lib/mem.h>
  31 #include <lib-lib/str.h>
  32 #include <lib-lib/ascii.h>
  33
  34 #include <lib-mime/mime.h>
  35
  36 #include "mutt.h"
  37 #include "charset.h"
  38 #include "thread.h"
  39
  40 #include <assert.h>
  41 #include <ctype.h>
  42 #include <errno.h>
  43 #include <stdio.h>
  44 #include <stdlib.h>
  45 #include <string.h>
  46
  47 /* If you are debugging this file, comment out the following line. */
  48 /*#define NDEBUG*/
  49
  50 #ifdef NDEBUG
  51 #define assert(x)
  52 #else
  53 #endif
  54
  55 #define ENCWORD_LEN_MAX 75
  56 #define ENCWORD_LEN_MIN 9       /* m_strlen("=?.?.?.?=") */
  57
  58 #define HSPACE(x) ((x) == '\0' || (x) == ' ' || (x) == '\t')
  59
  60 #define CONTINUATION_BYTE(c) (((c) & 0xc0) == 0x80)
  61
  62 /* converts f of len flen and charset from
  63        into *t of len *tlen and charset to
  64
  65    returns -1 on error
  66    returns number of converted chars from f, see iconv(3)
  67  */
  68 static ssize_t
  69 convert_string(const char *from, const char *f, ssize_t flen,
  70                const char *to,   char **t, ssize_t *tlen)
  71 {
  72     iconv_t cd;
  73     char *buf, *ob;
  74     ssize_t obl, n;
  75
  76     cd = mutt_iconv_open(to, from, 0);
  77
  78     if (cd == (iconv_t)(-1))
  79         return -1;
  80
  81     obl = 4 * flen + 1;
  82     ob = buf = p_new(char, obl);
  83     n = my_iconv(cd, &f, &flen, &ob, &obl);
  84
  85     if (n < 0 || my_iconv(cd, 0, 0, &ob, &obl) < 0) {
  86         int e = errno;
  87         iconv_close(cd);
  88         errno = e;
  89         p_delete(&buf);
  90         return -1;
  91     }
  92     iconv_close(cd);
  93
  94     *ob = '\0';
  95     *tlen = ob - buf;
  96     *t  = buf;
  97
  98     return n;
  99 }
 100
 101 /* choose the shortest encoding for u */
 102 char *mutt_choose_charset(const char *fromcode, const char *charsets,
 103                           char *u, ssize_t ulen, char **dst, ssize_t *dlen)
 104 {
 105     char *res = NULL;
 106     ssize_t reslen = 0;
 107
 108     char *tocode = NULL;
 109     ssize_t bestn = 0;
 110
 111     const char *p = charsets;
 112
 113     while (*p) {
 114         char cset[SHORT_STRING];
 115         const char *q;
 116         char *s;
 117         ssize_t slen, n;
 118
 119         q = strchr(p, ':');
 120         if (q) {
 121             n = m_strncpy(cset, sizeof(cset), p, q - p);
 122             p = ++q;
 123         } else {
 124             n = m_strcpy(cset, sizeof(cset), p);
 125             p += n;
 126         }
 127
 128         if (!n || n > (ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 2 - 12)) {
 129             /* Assume that we never need more than 12 characters of
 130                encoded-text to encode a single character. */
 131             continue;
 132         }
 133
 134         n = convert_string(fromcode, u, ulen, cset, &s, &slen);
 135         if (n < 0)
 136             continue;
 137
 138         if (!tocode || n < bestn) {
 139             m_strreplace(&tocode, cset);
 140             bestn = n;
 141
 142             p_delete(&res);
 143             res = s;
 144             reslen = slen;
 145             if (!bestn)
 146                 break;
 147         } else {
 148             p_delete(&s);
 149         }
 150     }
 151
 152     if (tocode) {
 153         char buf[LONG_STRING];
 154
 155         if (dst && dlen) {
 156             *dst  = res;
 157             *dlen = reslen;
 158         } else {
 159             p_delete(&res);
 160         }
 161
 162         mutt_canonical_charset(buf, sizeof(buf), tocode);
 163         m_strreplace(&tocode, buf);
 164     }
 165
 166     return tocode;
 167 }
 168
 169
 170 /****************************************************************************/
 171 /* Encoding functions                                                       */
 172 /****************************************************************************/
 173
 174 typedef size_t (encoder_t)(char *, const char *, ssize_t, const char *);
 175
 176 static size_t
 177 b_encoder(char *s, const char *d, ssize_t dlen, const char *tocode)
 178 {
 179     char *s0 = s;
 180
 181     s += sprintf(s, "=?%s?B?", tocode);
 182
 183     for (;;) {
 184         switch (dlen) {
 185           case 0:
 186             goto done;
 187
 188           case 1:
 189             *s++ = __m_b64chars[(*d >> 2) & 0x3f];
 190             *s++ = __m_b64chars[(*d & 0x03) << 4];
 191             *s++ = '=';
 192             *s++ = '=';
 193             goto done;
 194
 195           case 2:
 196             *s++ = __m_b64chars[(*d >> 2) & 0x3f];
 197             *s++ = __m_b64chars[((*d & 0x03) << 4) | ((d[1] >> 4) & 0x0f)];
 198             *s++ = __m_b64chars[(d[1] & 0x0f) << 2];
 199             *s++ = '=';
 200             goto done;
 201
 202           default:
 203             *s++ = __m_b64chars[(*d >> 2) & 0x3f];
 204             *s++ = __m_b64chars[((*d & 0x03) << 4) | ((d[1] >> 4) & 0x0f)];
 205             *s++ = __m_b64chars[((d[1] & 0x0f) << 2) | ((d[2] >> 6) & 0x03)];
 206             *s++ = __m_b64chars[d[2] & 0x3f];
 207             d += 3, dlen -= 3;
 208         }
 209     }
 210
 211   done:
 212     *s++ = '?';
 213     *s++ = '=';
 214     return s - s0;
 215 }
 216
 217 static size_t
 218 q_encoder(char *s, const char *d, ssize_t dlen, const char *tocode)
 219 {
 220     char *s0 = s;
 221
 222     s += sprintf(s, "=?%s?Q?", tocode);
 223     while (dlen--) {
 224         unsigned char c = *d++;
 225
 226         if (c == ' ') {
 227             *s++ = '_';
 228         } else
 229         if (c & 0x80 || c < 0x20 || c == '_' || strchr (MimeSpecials, c)) {
 230             *s++ = '=';
 231             *s++ = __m_b36chars_upper[c >> 4];
 232             *s++ = __m_b36chars_upper[c & 0xf];
 233         } else {
 234             *s++ = c;
 235         }
 236     }
 237
 238     *s++ = '?';
 239     *s++ = '=';
 240     return s - s0;
 241 }
 242
 243 /*
 244  * Return 0 if and set *encoder and *wlen if the data (d, dlen) could
 245  * be converted to an encoded word of length *wlen using *encoder.
 246  * Otherwise return an upper bound on the maximum length of the data
 247  * which could be converted.
 248  * The data is converted from fromcode (which must be stateless) to
 249  * tocode, unless fromcode is 0, in which case the data is assumed to
 250  * be already in tocode, which should be 8-bit and stateless.
 251  */
 252 static size_t try_block (const char *d, ssize_t dlen,
 253                          const char *fromcode, const char *tocode,
 254                          encoder_t **encoder, ssize_t *wlen)
 255 {
 256     char buf1[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1];
 257     iconv_t cd;
 258     const char *ib;
 259     char *ob, *p;
 260     ssize_t ibl, obl;
 261     int count, len, len_b, len_q;
 262
 263     if (fromcode) {
 264         cd = mutt_iconv_open (tocode, fromcode, 0);
 265         assert (cd != (iconv_t) (-1));
 266         ib = d, ibl = dlen, ob = buf1, obl = sizeof (buf1) - m_strlen(tocode);
 267         if (my_iconv(cd, &ib, &ibl, &ob, &obl) < 0
 268         ||  my_iconv(cd, 0, 0, &ob, &obl) < 0)
 269         {
 270             assert (errno == E2BIG);
 271             iconv_close (cd);
 272             assert (ib > d);
 273             return (ib - d == dlen) ? dlen : ib - d + 1;
 274         }
 275         iconv_close (cd);
 276     }
 277     else {
 278         if (dlen > ssizeof(buf1) - m_strlen(tocode))
 279             return ssizeof(buf1) - m_strlen(tocode) + 1;
 280         memcpy (buf1, d, dlen);
 281         ob = buf1 + dlen;
 282     }
 283
 284     count = 0;
 285     for (p = buf1; p < ob; p++) {
 286         unsigned char c = *p;
 287
 288         assert (strchr (MimeSpecials, '?'));
 289         if (c >= 0x7f || c < 0x20 || *p == '_' ||
 290             (c != ' ' && strchr (MimeSpecials, *p)))
 291             ++count;
 292     }
 293
 294     len = ENCWORD_LEN_MIN - 2 + m_strlen(tocode);
 295     len_b = len + (((ob - buf1) + 2) / 3) * 4;
 296     len_q = len + (ob - buf1) + 2 * count;
 297
 298     /* Apparently RFC 1468 says to use B encoding for iso-2022-jp. */
 299     if (!ascii_strcasecmp (tocode, "ISO-2022-JP"))
 300         len_q = ENCWORD_LEN_MAX + 1;
 301
 302     if (len_b < len_q && len_b <= ENCWORD_LEN_MAX) {
 303         *encoder = b_encoder;
 304         *wlen = len_b;
 305         return 0;
 306     }
 307     else if (len_q <= ENCWORD_LEN_MAX) {
 308         *encoder = q_encoder;
 309         *wlen = len_q;
 310         return 0;
 311     }
 312     else
 313         return dlen;
 314 }
 315
 316 /*
 317  * Encode the data (d, dlen) into s using the encoder.
 318  * Return the length of the encoded word.
 319  */
 320 static size_t encode_block (char *s, char *d, ssize_t dlen,
 321                             const char *fromcode, const char *tocode,
 322                             encoder_t *encoder)
 323 {
 324     char buf1[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1];
 325     ssize_t ibl, obl, n1, n2;
 326     iconv_t cd;
 327     const char *ib;
 328     char *ob;
 329
 330     if (fromcode) {
 331         cd = mutt_iconv_open (tocode, fromcode, 0);
 332         assert (cd != (iconv_t) (-1));
 333         ib = d, ibl = dlen, ob = buf1, obl = sizeof (buf1) - m_strlen(tocode);
 334         n1 = my_iconv(cd, &ib, &ibl, &ob, &obl);
 335         n2 = my_iconv(cd, 0, 0, &ob, &obl);
 336         assert (n1 >= 0 && n2 >= 0);
 337         iconv_close (cd);
 338         return (*encoder) (s, buf1, ob - buf1, tocode);
 339     } else {
 340         return (*encoder) (s, d, dlen, tocode);
 341     }
 342 }
 343
 344 /*
 345  * Discover how much of the data (d, dlen) can be converted into
 346  * a single encoded word. Return how much data can be converted,
 347  * and set the length *wlen of the encoded word and *encoder.
 348  * We start in column col, which limits the length of the word.
 349  */
 350 static size_t choose_block(char *d, size_t dlen, int col,
 351                            const char *fromcode, const char *tocode,
 352                            encoder_t **encoder, ssize_t *wlen)
 353 {
 354     size_t n, nn;
 355     int utf8 = fromcode && !ascii_strcasecmp (fromcode, "UTF-8");
 356
 357     n = dlen;
 358     for (;;) {
 359         assert (d + n > d);
 360         nn = try_block (d, n, fromcode, tocode, encoder, wlen);
 361         if (!nn && (col + *wlen <= ENCWORD_LEN_MAX + 1 || n <= 1))
 362             break;
 363         n = (nn ? nn : n) - 1;
 364         assert (n > 0);
 365         if (utf8)
 366             while (n > 1 && CONTINUATION_BYTE (d[n]))
 367                 --n;
 368     }
 369     return n;
 370 }
 371
 372 /*
 373  * Place the result of RFC-2047-encoding (d, dlen) into the dynamically
 374  * allocated buffer (e, elen). The input data is in charset fromcode
 375  * and is converted into a charset chosen from charsets.
 376  * Return 1 if the conversion to UTF-8 failed, 2 if conversion from UTF-8
 377  * failed, otherwise 0. If conversion failed, fromcode is assumed to be
 378  * compatible with us-ascii and the original data is used.
 379  * The input data is assumed to be a single line starting at column col;
 380  * if col is non-zero, the preceding character was a space.
 381  */
 382 static int rfc2047_encode (const char *d, ssize_t dlen, int col,
 383                            const char *fromcode, const char *charsets,
 384                            char **e, ssize_t *elen, const char *specials)
 385 {
 386     int ret = 0;
 387     char *buf;
 388     ssize_t bufpos, buflen;
 389     char *u, *t0, *t1, *t;
 390     char *s0, *s1;
 391     ssize_t ulen, r, n, wlen;
 392     encoder_t *encoder;
 393     char *tocode1 = 0;
 394     const char *tocode;
 395     const char *icode = "UTF-8";
 396
 397     /* Try to convert to UTF-8. */
 398     if (convert_string(fromcode, d, dlen, icode, &u, &ulen)) {
 399         ret = 1;
 400         icode = 0;
 401         u = p_dupstr(d, ulen = dlen);
 402     }
 403
 404     /* Find earliest and latest things we must encode. */
 405     s0 = s1 = t0 = t1 = 0;
 406     for (t = u; t < u + ulen; t++) {
 407         if ((*t & 0x80) ||
 408             (*t == '=' && t[1] == '?' && (t == u || HSPACE (*(t - 1))))) {
 409             if (!t0)
 410                 t0 = t;
 411             t1 = t;
 412         }
 413         else if (specials && strchr (specials, *t)) {
 414             if (!s0)
 415                 s0 = t;
 416             s1 = t;
 417         }
 418     }
 419
 420     /* If we have something to encode, include RFC822 specials */
 421     if (t0 && s0 && s0 < t0)
 422         t0 = s0;
 423     if (t1 && s1 && s1 > t1)
 424         t1 = s1;
 425
 426     if (!t0) {
 427         /* No encoding is required. */
 428         *e = u;
 429         *elen = ulen;
 430         return ret;
 431     }
 432
 433     /* Choose target charset. */
 434     tocode = fromcode;
 435     if (icode) {
 436         if ((tocode1 = mutt_choose_charset(icode, charsets, u, ulen,
 437                                            NULL, NULL)))
 438             tocode = tocode1;
 439         else
 440             ret = 2, icode = 0;
 441     }
 442
 443     /* Hack to avoid labelling 8-bit data as us-ascii. */
 444     if (!icode && mutt_is_us_ascii (tocode))
 445         tocode = "unknown-8bit";
 446
 447     /* Adjust t0 for maximum length of line. */
 448     t = u + (ENCWORD_LEN_MAX + 1) - col - ENCWORD_LEN_MIN;
 449     if (t < u)
 450         t = u;
 451     if (t < t0)
 452         t0 = t;
 453
 454
 455     /* Adjust t0 until we can encode a character after a space. */
 456     for (; t0 > u; t0--) {
 457         if (!HSPACE (*(t0 - 1)))
 458             continue;
 459         t = t0 + 1;
 460         if (icode)
 461             while (t < u + ulen && CONTINUATION_BYTE (*t))
 462                 ++t;
 463         if (!try_block (t0, t - t0, icode, tocode, &encoder, &wlen) &&
 464             col + (t0 - u) + wlen <= ENCWORD_LEN_MAX + 1)
 465             break;
 466     }
 467
 468     /* Adjust t1 until we can encode a character before a space. */
 469     for (; t1 < u + ulen; t1++) {
 470         if (!HSPACE (*t1))
 471             continue;
 472         t = t1 - 1;
 473         if (icode)
 474             while (CONTINUATION_BYTE (*t))
 475                 --t;
 476         if (!try_block (t, t1 - t, icode, tocode, &encoder, &wlen) &&
 477             1 + wlen + (u + ulen - t1) <= ENCWORD_LEN_MAX + 1)
 478             break;
 479     }
 480
 481     /* We shall encode the region [t0,t1). */
 482
 483     /* Initialise the output buffer with the us-ascii prefix. */
 484     buflen = 2 * ulen;
 485     buf = p_new(char, buflen);
 486     bufpos = t0 - u;
 487     memcpy (buf, u, t0 - u);
 488
 489     col += t0 - u;
 490
 491     t = t0;
 492     for (;;) {
 493         /* Find how much we can encode. */
 494         n = choose_block (t, t1 - t, col, icode, tocode, &encoder, &wlen);
 495         if (n == t1 - t) {
 496             /* See if we can fit the us-ascii suffix, too. */
 497             if (col + wlen + (u + ulen - t1) <= ENCWORD_LEN_MAX + 1)
 498                 break;
 499             n = t1 - t - 1;
 500             if (icode)
 501                 while (CONTINUATION_BYTE (t[n]))
 502                     --n;
 503             assert (t + n >= t);
 504             if (!n) {
 505                 /* This should only happen in the really stupid case where the
 506                    only word that needs encoding is one character long, but
 507                    there is too much us-ascii stuff after it to use a single
 508                    encoded word. We add the next word to the encoded region
 509                    and try again. */
 510                 assert (t1 < u + ulen);
 511                 for (t1++; t1 < u + ulen && !HSPACE (*t1); t1++);
 512                 continue;
 513             }
 514             n = choose_block (t, n, col, icode, tocode, &encoder, &wlen);
 515         }
 516
 517         /* Add to output buffer. */
 518 #define LINEBREAK "\n\t"
 519         if (bufpos + wlen + m_strlen(LINEBREAK) > buflen) {
 520             buflen = bufpos + wlen + m_strlen(LINEBREAK);
 521             p_realloc(&buf, buflen);
 522         }
 523         r = encode_block (buf + bufpos, t, n, icode, tocode, encoder);
 524         assert (r == wlen);
 525         bufpos += wlen;
 526         memcpy (buf + bufpos, LINEBREAK, m_strlen(LINEBREAK));
 527         bufpos += m_strlen(LINEBREAK);
 528 #undef LINEBREAK
 529
 530         col = 1;
 531
 532         t += n;
 533     }
 534
 535     /* Add last encoded word and us-ascii suffix to buffer. */
 536     buflen = bufpos + wlen + (u + ulen - t1);
 537     p_realloc(&buf, buflen + 1);
 538     r = encode_block (buf + bufpos, t, t1 - t, icode, tocode, encoder);
 539     assert (r == wlen);
 540     bufpos += wlen;
 541     memcpy (buf + bufpos, t1, u + ulen - t1);
 542
 543     p_delete(&tocode1);
 544     p_delete(&u);
 545
 546     buf[buflen] = '\0';
 547
 548     *e = buf;
 549     *elen = buflen + 1;
 550     return ret;
 551 }
 552
 553 void _rfc2047_encode_string (char **pd, int encode_specials, int col)
 554 {
 555     char *e;
 556     ssize_t elen;
 557     const char *charsets;
 558
 559     if (!Charset || !*pd)
 560         return;
 561
 562     charsets = SendCharset;
 563     if (!charsets || !*charsets)
 564         charsets = "UTF-8";
 565
 566     rfc2047_encode (*pd, m_strlen(*pd), col,
 567                     Charset, charsets, &e, &elen,
 568                     encode_specials ? RFC822Specials : NULL);
 569
 570     p_delete(pd);
 571     *pd = e;
 572 }
 573
 574 void rfc2047_encode_string(char **pd) {
 575     _rfc2047_encode_string(pd, 0, 32);
 576 }
 577
 578 void rfc2047_encode_adrlist (address_t * addr, const char *tag)
 579 {
 580     address_t *ptr = addr;
 581     int col = tag ? m_strlen(tag) + 2 : 32;
 582
 583     while (ptr) {
 584         if (ptr->personal)
 585             _rfc2047_encode_string (&ptr->personal, 1, col);
 586         ptr = ptr->next;
 587     }
 588 }
 589
 590 static int rfc2047_decode_word (char *d, const char *s, size_t len)
 591 {
 592     const char *pp, *pp1;
 593     char *pd, *d0;
 594     const char *t, *t1;
 595     int enc = 0, count = 0;
 596     char *charset = NULL;
 597
 598     pd = d0 = p_new(char, m_strlen(s));
 599
 600     for (pp = s; (pp1 = strchr (pp, '?')); pp = pp1 + 1) {
 601         count++;
 602         switch (count) {
 603           case 2:
 604             /* ignore language specification a la RFC 2231 */
 605             t = pp1;
 606             if ((t1 = memchr (pp, '*', t - pp)))
 607                 t = t1;
 608             charset = p_dupstr(pp, t - pp);
 609             break;
 610           case 3:
 611             if (toupper ((unsigned char) *pp) == 'Q')
 612                 enc = ENCQUOTEDPRINTABLE;
 613             else if (toupper ((unsigned char) *pp) == 'B')
 614                 enc = ENCBASE64;
 615             else {
 616                 p_delete(&charset);
 617                 p_delete(&d0);
 618                 return (-1);
 619             }
 620             break;
 621           case 4:
 622             if (enc == ENCQUOTEDPRINTABLE) {
 623                 for (; pp < pp1; pp++) {
 624                     if (*pp == '_')
 625                         *pd++ = ' ';
 626                     else if (*pp == '=' && hexval(pp[1]) >= 0 && hexval(pp[2]) >= 0) {
 627                         *pd++ = (hexval (pp[1]) << 4) | hexval (pp[2]);
 628                         pp += 2;
 629                     }
 630                     else
 631                         *pd++ = *pp;
 632                 }
 633                 *pd = 0;
 634             }
 635             else if (enc == ENCBASE64) {
 636                 int c, b = 0, k = 0;
 637
 638                 for (; pp < pp1; pp++) {
 639                     if (*pp == '=')
 640                         break;
 641                     if ((c = base64val(*pp)) < 0)
 642                         continue;
 643                     if (k + 6 >= 8) {
 644                         k -= 2;
 645                         *pd++ = b | (c >> k);
 646                         b = c << (8 - k);
 647                     }
 648                     else {
 649                         b |= c << (k + 2);
 650                         k += 6;
 651                     }
 652                 }
 653                 *pd = 0;
 654             }
 655             break;
 656         }
 657     }
 658
 659     if (charset)
 660         mutt_convert_string (&d0, charset, Charset, M_ICONV_HOOK_FROM);
 661     m_strcpy(d, len, d0);
 662     p_delete(&charset);
 663     p_delete(&d0);
 664     return (0);
 665 }
 666
 667 /*
 668  * Find the start and end of the first encoded word in the string.
 669  * We use the grammar in section 2 of RFC 2047, but the "encoding"
 670  * must be B or Q. Also, we don't require the encoded word to be
 671  * separated by linear-white-space (section 5(1)).
 672  */
 673 static const char *find_encoded_word (const char *s, const char **x)
 674 {
 675     const char *p, *q;
 676
 677     q = s;
 678     while ((p = strstr (q, "=?"))) {
 679         for (q = p + 2;
 680              0x20 < *q && *q < 0x7f && !strchr ("()<>@,;:\"/[]?.=", *q); q++);
 681         if (q[0] != '?' || !strchr ("BbQq", q[1]) || q[2] != '?')
 682             continue;
 683         for (q = q + 3; 0x20 <= *q && *q < 0x7f && *q != '?'; q++);
 684         if (q[0] != '?' || q[1] != '=') {
 685             --q;
 686             continue;
 687         }
 688
 689         *x = q + 2;
 690         return p;
 691     }
 692
 693     return 0;
 694 }
 695
 696 /* return length of linear white space */
 697 static size_t lwslen (const char *s, size_t n)
 698 {
 699     const char *p = s;
 700     size_t len = n;
 701
 702     if (n <= 0)
 703         return 0;
 704
 705     for (; p < s + n; p++)
 706         if (!strchr (" \t\r\n", *p)) {
 707             len = (size_t) (p - s);
 708             break;
 709         }
 710     if (strchr ("\r\n", *(p - 1)))        /* LWS doesn't end with CRLF */
 711         len = (size_t) 0;
 712     return len;
 713 }
 714
 715 /* return length of linear white space : reverse */
 716 static size_t lwsrlen (const char *s, size_t n)
 717 {
 718     const char *p = s + n - 1;
 719     size_t len = n;
 720
 721     if (n <= 0)
 722         return 0;
 723
 724     if (strchr ("\r\n", *p))      /* LWS doesn't end with CRLF */
 725         return (size_t) 0;
 726
 727     for (; p >= s; p--)
 728         if (!strchr (" \t\r\n", *p)) {
 729             len = (size_t) (s + n - 1 - p);
 730             break;
 731         }
 732     return len;
 733 }
 734
 735 /* try to decode anything that looks like a valid RFC2047 encoded
 736  * header field, ignoring RFC822 parsing rules
 737  */
 738 void rfc2047_decode (char **pd)
 739 {
 740     const char *p, *q;
 741     size_t m, n;
 742     int found_encoded = 0;
 743     char *d0, *d;
 744     const char *s = *pd;
 745     size_t dlen;
 746
 747     if (!s || !*s)
 748         return;
 749
 750     dlen = 4 * m_strlen(s);        /* should be enough */
 751     d = d0 = p_new(char, dlen + 1);
 752
 753     while (*s && dlen > 0) {
 754         if (!(p = find_encoded_word (s, &q))) {
 755             /* no encoded words */
 756             if (!option (OPTSTRICTMIME)) {
 757                 n = m_strlen(s);
 758                 if (found_encoded && (m = lwslen (s, n)) != 0) {
 759                     if (m != n)
 760                         *d = ' ', d++, dlen--;
 761                     n -= m, s += m;
 762                 }
 763                 if (ascii_strcasecmp (AssumedCharset, "us-ascii")) {
 764                     char *t;
 765                     ssize_t tlen;
 766
 767                     t = p_dupstr(s, n);
 768                     if (mutt_convert_nonmime_string (&t) == 0) {
 769                         tlen = m_strlen(t);
 770                         strncpy (d, t, tlen);
 771                         d += tlen;
 772                     }
 773                     else {
 774                         strncpy (d, s, n);
 775                         d += n;
 776                     }
 777                     p_delete(&t);
 778                     break;
 779                 }
 780             }
 781             strncpy (d, s, dlen);
 782             d += dlen;
 783             break;
 784         }
 785
 786         if (p != s) {
 787             n = (p - s);
 788             /* ignore spaces between encoded words
 789              * and linear white spaces between encoded word and *text */
 790             if (!option (OPTSTRICTMIME)) {
 791                 if (found_encoded && (m = lwslen (s, n)) != 0) {
 792                     if (m != n)
 793                         *d = ' ', d++, dlen--;
 794                     n -= m, s += m;
 795                 }
 796
 797                 if ((m = n - lwsrlen (s, n)) != 0) {
 798                     if (m > dlen)
 799                         m = dlen;
 800                     memcpy (d, s, m);
 801                     d += m;
 802                     dlen -= m;
 803                     if (m != n)
 804                         *d = ' ', d++, dlen--;
 805                 }
 806             }
 807             else if (!found_encoded || strspn (s, " \t\r\n") != n) {
 808                 if (n > dlen)
 809                     n = dlen;
 810                 memcpy (d, s, n);
 811                 d += n;
 812                 dlen -= n;
 813             }
 814         }
 815
 816         rfc2047_decode_word (d, p, dlen);
 817         found_encoded = 1;
 818         s = q;
 819         n = m_strlen(d);
 820         dlen -= n;
 821         d += n;
 822     }
 823     *d = 0;
 824
 825     p_delete(pd);
 826     *pd = d0;
 827     str_adjust (pd);
 828 }
 829
 830 void rfc2047_decode_adrlist(address_t *a)
 831 {
 832     while (a) {
 833         if (a->personal)
 834             rfc2047_decode(&a->personal);
 835         a = a->next;
 836     }
 837 }
 838
 839 void rfc2047_decode_envelope(ENVELOPE* e)
 840 {
 841     assert (e);
 842
 843     /* do RFC2047 decoding */
 844     rfc2047_decode_adrlist(e->from);
 845     rfc2047_decode_adrlist(e->to);
 846     rfc2047_decode_adrlist(e->cc);
 847     rfc2047_decode_adrlist(e->bcc);
 848     rfc2047_decode_adrlist(e->reply_to);
 849     rfc2047_decode_adrlist(e->mail_followup_to);
 850     rfc2047_decode_adrlist(e->return_path);
 851     rfc2047_decode_adrlist(e->sender);
 852
 853     if (e->subject) {
 854         rfc2047_decode(&e->subject);
 855         mutt_adjust_subject(e);
 856     }
 857 }