lib-mime/rfc2047.c

   1 /*
   2  *  This program is free software; you can redistribute it and/or modify
   3  *  it under the terms of the GNU General Public License as published by
   4  *  the Free Software Foundation; either version 2 of the License, or (at
   5  *  your option) any later version.
   6  *
   7  *  This program is distributed in the hope that it will be useful, but
   8  *  WITHOUT ANY WARRANTY; without even the implied warranty of
   9  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  10  *  General Public License for more details.
  11  *
  12  *  You should have received a copy of the GNU General Public License
  13  *  along with this program; if not, write to the Free Software
  14  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  15  *  MA 02110-1301, USA.
  16  *
  17  *  Copyright © 2006 Pierre Habouzit
  18  */
  19
  20 /*
  21  * Copyright notice from original mutt:
  22  * Copyright (C) 1996-2000 Michael R. Elkins <me@mutt.org>
  23  * Copyright (C) 2000-2001 Edmund Grimley Evans <edmundo@rano.org>
  24  *
  25  * This file is part of mutt-ng, see http://www.muttng.org/.
  26  * It's licensed under the GNU General Public License,
  27  * please see the file GPL in the top level source directory.
  28  */
  29
  30 #include <lib-lib/lib-lib.h>
  31
  32 #include <lib-mime/mime.h>
  33
  34 #include "charset.h"
  35 #include "thread.h"
  36
  37 /* If you are debugging this file, comment out the following line. */
  38 /*#define NDEBUG*/
  39
  40 #ifdef NDEBUG
  41 #define assert(x)
  42 #else
  43 #endif
  44
  45 #define ENCWORD_LEN_MAX 75
  46 #define ENCWORD_LEN_MIN 9       /* m_strlen("=?.?.?.?=") */
  47
  48 #define HSPACE(x) ((x) == '\0' || (x) == ' ' || (x) == '\t')
  49 #define CONTINUATION_BYTE(c) (((c) & 0xc0) == 0x80)
  50
  51 /* converts f of len flen and charset from
  52        into *t of len *tlen and charset to
  53
  54    returns -1 on error
  55    returns number of converted chars from f, see iconv(3)
  56  */
  57 static ssize_t
  58 convert_string(const char *from, const char *f, ssize_t flen,
  59                const char *to,   char **t, ssize_t *tlen)
  60 {
  61     iconv_t cd;
  62     char *buf, *ob;
  63     ssize_t obl, n;
  64
  65     cd = mutt_iconv_open(to, from, 0);
  66
  67     if (cd == MUTT_ICONV_ERROR)
  68         return -1;
  69
  70     obl = 4 * flen + 1;
  71     ob = buf = p_new(char, obl);
  72     n = my_iconv(cd, &f, &flen, &ob, &obl);
  73
  74     if (n < 0 || my_iconv(cd, 0, 0, &ob, &obl) < 0) {
  75         int e = errno;
  76         iconv_close(cd);
  77         errno = e;
  78         p_delete(&buf);
  79         return -1;
  80     }
  81     iconv_close(cd);
  82
  83     *ob = '\0';
  84     *tlen = ob - buf;
  85     *t  = buf;
  86
  87     return n;
  88 }
  89
  90 /* choose the shortest encoding for u */
  91 char *mutt_choose_charset(const char *fromcode, const char *charsets,
  92                           char *u, ssize_t ulen, char **dst, ssize_t *dlen)
  93 {
  94     char *res = NULL;
  95     ssize_t reslen = 0;
  96
  97     char *tocode = NULL;
  98     ssize_t bestn = 0;
  99
 100     const char *p = charsets;
 101
 102     while (*p) {
 103         char cset[SHORT_STRING];
 104         const char *q;
 105         char *s;
 106         ssize_t slen, n;
 107
 108         q = strchr(p, ':');
 109         if (q) {
 110             n = m_strncpy(cset, sizeof(cset), p, q - p);
 111             p = ++q;
 112         } else {
 113             n = m_strcpy(cset, sizeof(cset), p);
 114             p += n;
 115         }
 116
 117         if (!n || n > (ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 2 - 12)) {
 118             /* Assume that we never need more than 12 characters of
 119                encoded-text to encode a single character. */
 120             continue;
 121         }
 122
 123         n = convert_string(fromcode, u, ulen, cset, &s, &slen);
 124         if (n < 0)
 125             continue;
 126
 127         if (!tocode || n < bestn) {
 128             m_strreplace(&tocode, cset);
 129             bestn = n;
 130
 131             p_delete(&res);
 132             res = s;
 133             reslen = slen;
 134             if (!bestn)
 135                 break;
 136         } else {
 137             p_delete(&s);
 138         }
 139     }
 140
 141     if (tocode) {
 142         char buf[LONG_STRING];
 143
 144         if (dst && dlen) {
 145             *dst  = res;
 146             *dlen = reslen;
 147         } else {
 148             p_delete(&res);
 149         }
 150
 151         charset_canonicalize(buf, sizeof(buf), tocode);
 152         m_strreplace(&tocode, buf);
 153     }
 154
 155     return tocode;
 156 }
 157
 158
 159 /****************************************************************************/
 160 /* Encoding functions                                                       */
 161 /****************************************************************************/
 162
 163 static const char __qp_special[128] = {
 164     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 165     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 166     0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1,
 167     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
 168     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 169     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1,
 170     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 171     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 172 };
 173
 174 typedef size_t (encoder_t)(char *, const char *, ssize_t, const char *);
 175
 176 static size_t
 177 b_encoder(char *s, const char *d, ssize_t dlen, const char *tocode)
 178 {
 179     char *s0 = s;
 180
 181     s += sprintf(s, "=?%s?B?", tocode);
 182
 183     for (;;) {
 184         switch (dlen) {
 185           case 0:
 186             goto done;
 187
 188           case 1:
 189             *s++ = __m_b64chars[(*d >> 2) & 0x3f];
 190             *s++ = __m_b64chars[(*d & 0x03) << 4];
 191             *s++ = '=';
 192             *s++ = '=';
 193             goto done;
 194
 195           case 2:
 196             *s++ = __m_b64chars[(*d >> 2) & 0x3f];
 197             *s++ = __m_b64chars[((*d & 0x03) << 4) | ((d[1] >> 4) & 0x0f)];
 198             *s++ = __m_b64chars[(d[1] & 0x0f) << 2];
 199             *s++ = '=';
 200             goto done;
 201
 202           default:
 203             *s++ = __m_b64chars[(*d >> 2) & 0x3f];
 204             *s++ = __m_b64chars[((*d & 0x03) << 4) | ((d[1] >> 4) & 0x0f)];
 205             *s++ = __m_b64chars[((d[1] & 0x0f) << 2) | ((d[2] >> 6) & 0x03)];
 206             *s++ = __m_b64chars[d[2] & 0x3f];
 207             d += 3, dlen -= 3;
 208         }
 209     }
 210
 211   done:
 212     *s++ = '?';
 213     *s++ = '=';
 214     return s - s0;
 215 }
 216
 217 static size_t
 218 q_encoder(char *s, const char *d, ssize_t dlen, const char *tocode)
 219 {
 220     char *s0 = s;
 221
 222     s += sprintf(s, "=?%s?Q?", tocode);
 223     while (dlen--) {
 224         unsigned char c = *d++;
 225
 226         if (c == ' ') {
 227             *s++ = '_';
 228         } else
 229         if (c & 0x80 || __qp_special[c]) {
 230             *s++ = '=';
 231             *s++ = __m_b36chars_upper[c >> 4];
 232             *s++ = __m_b36chars_upper[c & 0xf];
 233         } else {
 234             *s++ = c;
 235         }
 236     }
 237
 238     *s++ = '?';
 239     *s++ = '=';
 240     return s - s0;
 241 }
 242
 243 /*
 244  * Return 0 if and set *encoder and *wlen if the data (d, dlen) could
 245  * be converted to an encoded word of length *wlen using *encoder.
 246  * Otherwise return an upper bound on the maximum length of the data
 247  * which could be converted.
 248  * The data is converted from fromcode (which must be stateless) to
 249  * tocode, unless fromcode is 0, in which case the data is assumed to
 250  * be already in tocode, which should be 8-bit and stateless.
 251  */
 252 static size_t try_block(const char *d, ssize_t dlen,
 253                         const char *fromcode, const char *tocode,
 254                         encoder_t **encoder, ssize_t *wlen)
 255 {
 256     char buf1[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1];
 257     ssize_t obl = sizeof(buf1) - m_strlen(tocode);
 258     char *ob;
 259
 260     if (fromcode) {
 261         const char *ib = d;
 262         ssize_t ibl = dlen;
 263         iconv_t cd = mutt_iconv_open(tocode, fromcode, 0);
 264
 265         assert (cd != MUTT_ICONV_ERROR);
 266
 267         ob = buf1;
 268
 269         if (my_iconv(cd, &ib, &ibl, &ob, &obl) < 0
 270         ||  my_iconv(cd, 0, 0, &ob, &obl) < 0)
 271         {
 272             assert (errno == E2BIG && ib > d);
 273             iconv_close(cd);
 274             return (ib - d == dlen) ? dlen : ib - d + 1;
 275         }
 276         iconv_close (cd);
 277     } else {
 278         if (dlen > obl)
 279             return obl + 1;
 280         memcpy(buf1, d, dlen);
 281         ob = buf1 + dlen;
 282     }
 283
 284     {
 285         const char *p;
 286         int count, len, len_b, len_q;
 287
 288         count = 0;
 289         for (p = buf1; p < ob; p++) {
 290             count += (*p & 0x80 || __qp_special[(int)*p]);
 291         }
 292
 293         len = ENCWORD_LEN_MIN - 2 + m_strlen(tocode);
 294         len_b = len + (((ob - buf1) + 2) / 3) * 4;
 295         len_q = len + (ob - buf1) + 2 * count;
 296
 297         /* Apparently RFC 1468 says to use B encoding for iso-2022-jp. */
 298         if (mime_which_token(tocode, -1) == MIME_ISO_2022_JP)
 299             len_q = ENCWORD_LEN_MAX + 1;
 300
 301         if (len_b < len_q && len_b <= ENCWORD_LEN_MAX) {
 302             *encoder = b_encoder;
 303             *wlen = len_b;
 304             return 0;
 305         } else
 306         if (len_q <= ENCWORD_LEN_MAX) {
 307             *encoder = q_encoder;
 308             *wlen = len_q;
 309             return 0;
 310         } else {
 311             return dlen;
 312         }
 313     }
 314 }
 315
 316 /*
 317  * Encode the data (d, dlen) into s using the encoder.
 318  * Return the length of the encoded word.
 319  */
 320 static size_t
 321 encode_block(char *s, char *d, ssize_t dlen,
 322              const char *fromcode, const char *tocode, encoder_t *encoder)
 323 {
 324     char buf1[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1];
 325     ssize_t ibl, obl, n1, n2;
 326     iconv_t cd;
 327     const char *ib;
 328     char *ob;
 329
 330     if (fromcode) {
 331         cd = mutt_iconv_open(tocode, fromcode, 0);
 332         assert (cd != MUTT_ICONV_ERROR);
 333         ib = d, ibl = dlen, ob = buf1, obl = sizeof(buf1) - m_strlen(tocode);
 334         n1 = my_iconv(cd, &ib, &ibl, &ob, &obl);
 335         n2 = my_iconv(cd, 0, 0, &ob, &obl);
 336         assert (n1 >= 0 && n2 >= 0);
 337         iconv_close (cd);
 338         return (*encoder)(s, buf1, ob - buf1, tocode);
 339     } else {
 340         return (*encoder)(s, d, dlen, tocode);
 341     }
 342 }
 343
 344 /*
 345  * Discover how much of the data (d, dlen) can be converted into
 346  * a single encoded word. Return how much data can be converted,
 347  * and set the length *wlen of the encoded word and *encoder.
 348  * We start in column col, which limits the length of the word.
 349  */
 350 static size_t choose_block(char *d, size_t dlen, int col,
 351                            const char *fromcode, const char *tocode,
 352                            encoder_t **encoder, ssize_t *wlen)
 353 {
 354     size_t n, nn;
 355     int utf8 = mime_which_token(fromcode, -1) == MIME_UTF_8;
 356
 357     n = dlen;
 358     for (;;) {
 359         assert (d + n > d);
 360         nn = try_block(d, n, fromcode, tocode, encoder, wlen);
 361         if (!nn && (col + *wlen <= ENCWORD_LEN_MAX + 1 || n <= 1))
 362             break;
 363         n = (nn ? nn : n) - 1;
 364         assert (n > 0);
 365         if (utf8) {
 366             while (n > 1 && CONTINUATION_BYTE(d[n]))
 367                 --n;
 368         }
 369     }
 370     return n;
 371 }
 372
 373 /*
 374  * Place the result of RFC-2047-encoding (d, dlen) into the dynamically
 375  * allocated buffer (e, elen). The input data is in charset fromcode
 376  * and is converted into a charset chosen from charsets.
 377  * Return 1 if the conversion to UTF-8 failed, 2 if conversion from UTF-8
 378  * failed, otherwise 0. If conversion failed, fromcode is assumed to be
 379  * compatible with us-ascii and the original data is used.
 380  * The input data is assumed to be a single line starting at column col;
 381  * if col is non-zero, the preceding character was a space.
 382  */
 383 /*** XXX: simplify that one day ***/
 384 static int rfc2047_encode(const char *d, ssize_t dlen, int col,
 385                           const char *fromcode, const char *charsets,
 386                           char **e, ssize_t *elen, const char *specials)
 387 {
 388     int ret = 0;
 389     char *buf;
 390     ssize_t bufpos, buflen;
 391     char *u, *t;
 392     char *s0, *s1, *t0, *t1;
 393     char *tocode1 = 0;
 394     const char *tocode;
 395     const char *icode = "UTF-8";
 396     ssize_t ulen, r, n, wlen;
 397     encoder_t *encoder;
 398
 399     /* Try to convert to UTF-8. */
 400     if (convert_string(fromcode, d, dlen, icode, &u, &ulen)) {
 401         ret = 1;
 402         icode = NULL;
 403         u = p_dupstr(d, ulen = dlen);
 404     }
 405
 406     /* Find earliest and latest things we must encode. */
 407     s0 = s1 = t0 = t1 = NULL;
 408     for (t = u; t < u + ulen; t++) {
 409         if ((*t & 0x80) ||
 410             (*t == '=' && t[1] == '?' && (t == u || HSPACE (*(t - 1))))) {
 411             if (!t0)
 412                 t0 = t;
 413             t1 = t;
 414         }
 415         else if (specials && strchr (specials, *t)) {
 416             if (!s0)
 417                 s0 = t;
 418             s1 = t;
 419         }
 420     }
 421
 422     /* If we have something to encode, include RFC822 specials */
 423     if (t0 && s0 && s0 < t0)
 424         t0 = s0;
 425     if (t1 && s1 && s1 > t1)
 426         t1 = s1;
 427
 428     if (!t0) {
 429         /* No encoding is required. */
 430         *e = u;
 431         *elen = ulen;
 432         return ret;
 433     }
 434
 435     /* Choose target charset. */
 436     tocode = fromcode;
 437     if (icode) {
 438         if ((tocode1 = mutt_choose_charset(icode, charsets, u, ulen,
 439                                            NULL, NULL)))
 440             tocode = tocode1;
 441         else
 442             ret = 2, icode = 0;
 443     }
 444
 445     /* Hack to avoid labelling 8-bit data as us-ascii. */
 446     if (!icode && charset_is_us_ascii(tocode))
 447         tocode = "unknown-8bit";
 448
 449     /* Adjust t0 for maximum length of line. */
 450     t = u + (ENCWORD_LEN_MAX + 1) - col - ENCWORD_LEN_MIN;
 451     if (t < u)
 452         t = u;
 453     if (t < t0)
 454         t0 = t;
 455
 456
 457     /* Adjust t0 until we can encode a character after a space. */
 458     for (; t0 > u; t0--) {
 459         if (!HSPACE(t0[-1]))
 460             continue;
 461         t = t0 + 1;
 462         if (icode) {
 463             while (t < u + ulen && CONTINUATION_BYTE(*t))
 464                 ++t;
 465         }
 466         if (!try_block(t0, t - t0, icode, tocode, &encoder, &wlen)
 467         &&  col + (t0 - u) + wlen <= ENCWORD_LEN_MAX + 1)
 468             break;
 469     }
 470
 471     /* Adjust t1 until we can encode a character before a space. */
 472     for (; t1 < u + ulen; t1++) {
 473         if (!HSPACE(*t1))
 474             continue;
 475         t = t1 - 1;
 476         if (icode) {
 477             while (CONTINUATION_BYTE(*t))
 478                 --t;
 479         }
 480         if (!try_block (t, t1 - t, icode, tocode, &encoder, &wlen)
 481         &&  1 + wlen + (u + ulen - t1) <= ENCWORD_LEN_MAX + 1)
 482             break;
 483     }
 484
 485     /* We shall encode the region [t0,t1). */
 486
 487     /* Initialise the output buffer with the us-ascii prefix. */
 488     buflen = 2 * ulen;
 489     buf = p_new(char, buflen);
 490     bufpos = t0 - u;
 491     memcpy(buf, u, t0 - u);
 492
 493     col += t0 - u;
 494
 495     t = t0;
 496     for (;;) {
 497         /* Find how much we can encode. */
 498         n = choose_block (t, t1 - t, col, icode, tocode, &encoder, &wlen);
 499         if (n == t1 - t) {
 500             /* See if we can fit the us-ascii suffix, too. */
 501             if (col + wlen + (u + ulen - t1) <= ENCWORD_LEN_MAX + 1)
 502                 break;
 503             n = t1 - t - 1;
 504             if (icode)
 505                 while (CONTINUATION_BYTE (t[n]))
 506                     --n;
 507             assert (t + n >= t);
 508             if (!n) {
 509                 /* This should only happen in the really stupid case where the
 510                    only word that needs encoding is one character long, but
 511                    there is too much us-ascii stuff after it to use a single
 512                    encoded word. We add the next word to the encoded region
 513                    and try again. */
 514                 assert (t1 < u + ulen);
 515                 for (t1++; t1 < u + ulen && !HSPACE (*t1); t1++);
 516                 continue;
 517             }
 518             n = choose_block (t, n, col, icode, tocode, &encoder, &wlen);
 519         }
 520
 521         /* Add to output buffer. */
 522 #define LINEBREAK "\n\t"
 523         if (bufpos + wlen + 2 > buflen) {
 524             buflen = bufpos + wlen + 2;
 525             p_realloc(&buf, buflen);
 526         }
 527         r = encode_block (buf + bufpos, t, n, icode, tocode, encoder);
 528         assert (r == wlen);
 529         bufpos += wlen;
 530         memcpy (buf + bufpos, LINEBREAK, m_strlen(LINEBREAK));
 531         bufpos += m_strlen(LINEBREAK);
 532 #undef LINEBREAK
 533
 534         col = 1;
 535
 536         t += n;
 537     }
 538
 539     /* Add last encoded word and us-ascii suffix to buffer. */
 540     buflen = bufpos + wlen + (u + ulen - t1);
 541     p_realloc(&buf, buflen + 1);
 542     r = encode_block (buf + bufpos, t, t1 - t, icode, tocode, encoder);
 543     assert (r == wlen);
 544     bufpos += wlen;
 545     memcpy (buf + bufpos, t1, u + ulen - t1);
 546
 547     p_delete(&tocode1);
 548     p_delete(&u);
 549
 550     buf[buflen] = '\0';
 551
 552     *e = buf;
 553     *elen = buflen + 1;
 554     return ret;
 555 }
 556
 557
 558 static void _rfc2047_encode_string(char **pd, int encode_specials, int col)
 559 {
 560     char *e;
 561     ssize_t elen;
 562     const char *charsets;
 563
 564     if (!Charset || !*pd)
 565         return;
 566
 567     charsets = m_strisempty(SendCharset) ? "UTF-8" : SendCharset;
 568
 569     rfc2047_encode(*pd, m_strlen(*pd), col,
 570                    Charset, charsets, &e, &elen,
 571                    encode_specials ? RFC822Specials : NULL);
 572
 573     p_delete(pd);
 574     *pd = e;
 575 }
 576
 577 void rfc2047_encode_string(char **pd) {
 578     _rfc2047_encode_string(pd, 0, 32);
 579 }
 580
 581 void rfc2047_encode_adrlist(address_t *addr, const char *tag)
 582 {
 583     address_t *ptr = addr;
 584     int col = tag ? m_strlen(tag) + 2 : 32;
 585
 586     while (ptr) {
 587         if (ptr->personal)
 588             _rfc2047_encode_string(&ptr->personal, 1, col);
 589         ptr = ptr->next;
 590     }
 591 }
 592
 593
 594 /****************************************************************************/
 595 /* Decoding functions                                                       */
 596 /****************************************************************************/
 597
 598 /* decode one word into d[len] */
 599 static int rfc2047_decode_word(char *d, size_t len, const char *s)
 600 {
 601     const char *p, *eotoken;
 602     char *charset = NULL;
 603     int enc = 0, count = 0;
 604     char *d0;
 605
 606     /* =?[QB]?cset?.?= */
 607     for (p = s; (eotoken = strchr(p, '?')); p = eotoken + 1) {
 608         switch (++count) {
 609             const char *t;
 610             char *q;
 611
 612           case 2:
 613             /* ignore language specification a la RFC 2231 */
 614             t = memchr(p, '*', eotoken - p) ?: eotoken;
 615             charset = p_dupstr(p, t - p);
 616             break;
 617
 618           case 3:
 619             switch (*p) {
 620               case 'q': case 'Q':
 621                 enc = ENCQUOTEDPRINTABLE;
 622                 break;
 623
 624               case 'b': case 'B':
 625                 enc = ENCBASE64;
 626                 break;
 627
 628               default:
 629                 p_delete(&charset);
 630                 return -1;
 631             }
 632             break;
 633
 634           case 4:
 635             d0 = q = p_new(char, m_strlen(s) + 1);
 636
 637             if (enc == ENCQUOTEDPRINTABLE) {
 638                 while (p < eotoken) {
 639                     if (*p == '=' && hexval(p[1]) >= 0 && hexval(p[2]) >= 0) {
 640                         *q++ = (hexval (p[1]) << 4) | hexval (p[2]);
 641                         p += 3;
 642                     } else
 643                     if (*p == '_') {
 644                         *q++ = ' ';
 645                         p++;
 646                     } else {
 647                         *q++ = *p++;
 648                     }
 649                 }
 650                 *q = 0;
 651             } else { /* enc == ENCBASE64 */
 652                 int c, b = 0, k = 0;
 653
 654                 while (p < eotoken) {
 655                     if (*p == '=')
 656                         break;
 657
 658                     c = base64val(*p++);
 659                     if (c < 0)
 660                         continue;
 661
 662                     if (k + 6 >= 8) {
 663                         k -= 2;
 664                         *q++ = b | (c >> k);
 665                         b = c << (8 - k);
 666                     } else {
 667                         b |= c << (k + 2);
 668                         k += 6;
 669                     }
 670                 }
 671                 *q = 0;
 672             }
 673             break;
 674         }
 675     }
 676
 677     if (charset)
 678         mutt_convert_string(&d0, charset, Charset, M_ICONV_HOOK_FROM);
 679     m_strcpy(d, len, d0);
 680     p_delete(&charset);
 681     p_delete(&d0);
 682     return 0;
 683 }
 684
 685 /*
 686  * Find the start and end of the first encoded word in the string.
 687  * We use the grammar in section 2 of RFC 2047, but the "encoding"
 688  * must be B or Q. Also, we don't require the encoded word to be
 689  * separated by linear-white-space (section 5(1)).
 690  */
 691 static const char *find_encoded_word(const char *s, const char **x)
 692 {
 693     const char *p;
 694
 695     while ((p = strstr(s, "=?"))) {
 696         s = p + 2;
 697         while (0x20 < *s && *s < 0x7f && !strchr ("()<>@,;:\"/[]?.=", *s)) {
 698             s++;
 699         }
 700
 701         if (s[0] != '?' || !strchr("BbQq", s[1]) || s[2] != '?')
 702             continue;
 703
 704         s += 3;
 705         while (0x20 <= *s && *s < 0x7f && *s != '?') {
 706             s++;
 707         }
 708
 709         if (s[0] != '?' || s[1] != '=') {
 710             --s;
 711             continue;
 712         }
 713
 714         *x = s + 2;
 715         return p;
 716     }
 717
 718     return NULL;
 719 }
 720
 721 /* return length of linear white space */
 722 static ssize_t lwslen(const char *s, ssize_t n)
 723 {
 724     const char *p;
 725     ssize_t len = n;
 726
 727     if (n <= 0)
 728         return 0;
 729
 730     for (p = s; p < s + n; p++) {
 731         if (!strchr (" \t\r\n", *p)) {
 732             len = p - s;
 733             break;
 734         }
 735     }
 736
 737     if (p[-1] == '\r' || p[-1] == '\n')  /* LWS cannot end with CRLF */
 738         return 0;
 739
 740     return len;
 741 }
 742
 743 /* return length of linear white space : reverse */
 744 static ssize_t lwsrlen(const char *s, ssize_t n)
 745 {
 746     const char *p = s + n - 1;
 747     size_t len = n;
 748
 749     if (n <= 0)
 750         return 0;
 751
 752     if (*p == '\r' || *p == '\n')   /* LWS doesn't end with CRLF */
 753         return 0;
 754
 755     while (p >= s) {
 756         if (!strchr(" \t\r\n", *p)) {
 757             len = s + n - 1 - p;
 758             break;
 759         }
 760         p--;
 761     }
 762
 763     return len;
 764 }
 765
 766 /* try to decode anything that looks like a valid RFC2047 encoded
 767  * header field, ignoring RFC822 parsing rules
 768  */
 769 void rfc2047_decode(char **pd)
 770 {
 771     const int strict_mime = option(OPTSTRICTMIME);
 772
 773     const char *s = *pd;
 774     char *d0, *d;
 775     ssize_t dlen;
 776     int found_encoded = 0;
 777
 778     if (!s || !*s)
 779         return;
 780
 781     dlen = 4 * m_strlen(s);        /* should be enough */
 782     d = d0 = p_new(char, dlen + 1);
 783
 784     while (*s && dlen > 0) {
 785         const char *p, *q;
 786
 787         p = find_encoded_word(s, &q);
 788
 789         if (!p) {
 790             /* no encoded words */
 791             if (!strict_mime) {
 792                 ssize_t m, n;
 793
 794                 n = m_strlen(s);
 795                 if (found_encoded && (m = lwslen(s, n)) != 0) {
 796                     if (m != n)
 797                         *d++ = ' ', dlen--;
 798                     n -= m, s += m;
 799                 }
 800
 801                 if (mime_which_token(AssumedCharset, -1) == MIME_US_ASCII) {
 802                     char *t;
 803
 804                     t = p_dupstr(s, n);
 805                     if (mutt_convert_nonmime_string(&t) == 0) {
 806                         d += m_strcpy(d, dlen, t);
 807                     } else {
 808                         d += m_strcpy(d, dlen, s);
 809                     }
 810                     p_delete(&t);
 811                     break;
 812                 }
 813             }
 814             d += m_strcpy(d, dlen, s);
 815             break;
 816         }
 817
 818         if (p != s) {
 819             ssize_t m, n;
 820
 821             n = (p - s);
 822             /* ignore spaces between encoded words
 823              * and linear white spaces between encoded word and *text */
 824             if (!strict_mime) {
 825                 if (found_encoded && (m = lwslen(s, n)) != 0) {
 826                     if (m != n)
 827                         *d++ = ' ', dlen--;
 828                     n -= m, s += m;
 829                 }
 830
 831                 if ((m = n - lwsrlen(s, n)) != 0) {
 832                     m  = m_strncpy(d, dlen, s, m);
 833                     d += m;
 834                     dlen -= m;
 835                     if (m != n)
 836                         *d++ = ' ', dlen--;
 837                 }
 838             } else
 839             if (!found_encoded || (ssize_t)strspn(s, " \t\r\n") != n) {
 840                 n  = m_strncpy(d, dlen, s, n);
 841                 d += n;
 842                 dlen -= n;
 843             }
 844         }
 845
 846         rfc2047_decode_word(d, dlen, p);
 847         found_encoded = 1;
 848         s = q;
 849         while (*d && dlen)
 850             d++, dlen--;
 851     }
 852
 853     p_delete(pd);
 854     *pd = d0;
 855 }
 856
 857 void rfc2047_decode_adrlist(address_t *a)
 858 {
 859     while (a) {
 860         if (a->personal)
 861             rfc2047_decode(&a->personal);
 862         a = a->next;
 863     }
 864 }
 865
 866 void rfc2047_decode_envelope(ENVELOPE* e)
 867 {
 868     assert (e);
 869
 870     /* do RFC2047 decoding */
 871     rfc2047_decode_adrlist(e->from);
 872     rfc2047_decode_adrlist(e->to);
 873     rfc2047_decode_adrlist(e->cc);
 874     rfc2047_decode_adrlist(e->bcc);
 875     rfc2047_decode_adrlist(e->reply_to);
 876     rfc2047_decode_adrlist(e->mail_followup_to);
 877     rfc2047_decode_adrlist(e->return_path);
 878     rfc2047_decode_adrlist(e->sender);
 879
 880     if (e->subject) {
 881         rfc2047_decode(&e->subject);
 882         mutt_adjust_subject(e);
 883     }
 884 }