lib-mime/rfc2047.c

   1 /*
   2  *  This program is free software; you can redistribute it and/or modify
   3  *  it under the terms of the GNU General Public License as published by
   4  *  the Free Software Foundation; either version 2 of the License, or (at
   5  *  your option) any later version.
   6  *
   7  *  This program is distributed in the hope that it will be useful, but
   8  *  WITHOUT ANY WARRANTY; without even the implied warranty of
   9  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  10  *  General Public License for more details.
  11  *
  12  *  You should have received a copy of the GNU General Public License
  13  *  along with this program; if not, write to the Free Software
  14  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  15  *  MA 02110-1301, USA.
  16  *
  17  *  Copyright © 2006 Pierre Habouzit
  18  */
  19
  20 /*
  21  * Copyright notice from original mutt:
  22  * Copyright (C) 1996-2000 Michael R. Elkins <me@mutt.org>
  23  * Copyright (C) 2000-2001 Edmund Grimley Evans <edmundo@rano.org>
  24  *
  25  * This file is part of mutt-ng, see http://www.muttng.org/.
  26  * It's licensed under the GNU General Public License,
  27  * please see the file GPL in the top level source directory.
  28  */
  29
  30 #include <lib-lib/mem.h>
  31 #include <lib-lib/str.h>
  32 #include <lib-lib/ascii.h>
  33
  34 #include <lib-mime/mime.h>
  35
  36 #include "mutt.h"
  37 #include "charset.h"
  38 #include "thread.h"
  39
  40 #include <assert.h>
  41 #include <ctype.h>
  42 #include <errno.h>
  43 #include <stdio.h>
  44 #include <stdlib.h>
  45 #include <string.h>
  46
  47 /* If you are debugging this file, comment out the following line. */
  48 /*#define NDEBUG*/
  49
  50 #ifdef NDEBUG
  51 #define assert(x)
  52 #else
  53 #endif
  54
  55 #define ENCWORD_LEN_MAX 75
  56 #define ENCWORD_LEN_MIN 9       /* m_strlen("=?.?.?.?=") */
  57
  58 #define HSPACE(x) ((x) == '\0' || (x) == ' ' || (x) == '\t')
  59 #define CONTINUATION_BYTE(c) (((c) & 0xc0) == 0x80)
  60
  61 /* converts f of len flen and charset from
  62        into *t of len *tlen and charset to
  63
  64    returns -1 on error
  65    returns number of converted chars from f, see iconv(3)
  66  */
  67 static ssize_t
  68 convert_string(const char *from, const char *f, ssize_t flen,
  69                const char *to,   char **t, ssize_t *tlen)
  70 {
  71     iconv_t cd;
  72     char *buf, *ob;
  73     ssize_t obl, n;
  74
  75     cd = mutt_iconv_open(to, from, 0);
  76
  77     if (cd == (iconv_t)(-1))
  78         return -1;
  79
  80     obl = 4 * flen + 1;
  81     ob = buf = p_new(char, obl);
  82     n = my_iconv(cd, &f, &flen, &ob, &obl);
  83
  84     if (n < 0 || my_iconv(cd, 0, 0, &ob, &obl) < 0) {
  85         int e = errno;
  86         iconv_close(cd);
  87         errno = e;
  88         p_delete(&buf);
  89         return -1;
  90     }
  91     iconv_close(cd);
  92
  93     *ob = '\0';
  94     *tlen = ob - buf;
  95     *t  = buf;
  96
  97     return n;
  98 }
  99
 100 /* choose the shortest encoding for u */
 101 char *mutt_choose_charset(const char *fromcode, const char *charsets,
 102                           char *u, ssize_t ulen, char **dst, ssize_t *dlen)
 103 {
 104     char *res = NULL;
 105     ssize_t reslen = 0;
 106
 107     char *tocode = NULL;
 108     ssize_t bestn = 0;
 109
 110     const char *p = charsets;
 111
 112     while (*p) {
 113         char cset[SHORT_STRING];
 114         const char *q;
 115         char *s;
 116         ssize_t slen, n;
 117
 118         q = strchr(p, ':');
 119         if (q) {
 120             n = m_strncpy(cset, sizeof(cset), p, q - p);
 121             p = ++q;
 122         } else {
 123             n = m_strcpy(cset, sizeof(cset), p);
 124             p += n;
 125         }
 126
 127         if (!n || n > (ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 2 - 12)) {
 128             /* Assume that we never need more than 12 characters of
 129                encoded-text to encode a single character. */
 130             continue;
 131         }
 132
 133         n = convert_string(fromcode, u, ulen, cset, &s, &slen);
 134         if (n < 0)
 135             continue;
 136
 137         if (!tocode || n < bestn) {
 138             m_strreplace(&tocode, cset);
 139             bestn = n;
 140
 141             p_delete(&res);
 142             res = s;
 143             reslen = slen;
 144             if (!bestn)
 145                 break;
 146         } else {
 147             p_delete(&s);
 148         }
 149     }
 150
 151     if (tocode) {
 152         char buf[LONG_STRING];
 153
 154         if (dst && dlen) {
 155             *dst  = res;
 156             *dlen = reslen;
 157         } else {
 158             p_delete(&res);
 159         }
 160
 161         mutt_canonical_charset(buf, sizeof(buf), tocode);
 162         m_strreplace(&tocode, buf);
 163     }
 164
 165     return tocode;
 166 }
 167
 168
 169 /****************************************************************************/
 170 /* Encoding functions                                                       */
 171 /****************************************************************************/
 172
 173 static const char __qp_special[128] = {
 174     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 175     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 176     0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1,
 177     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
 178     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 179     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1,
 180     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 181     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 182 };
 183
 184 typedef size_t (encoder_t)(char *, const char *, ssize_t, const char *);
 185
 186 static size_t
 187 b_encoder(char *s, const char *d, ssize_t dlen, const char *tocode)
 188 {
 189     char *s0 = s;
 190
 191     s += sprintf(s, "=?%s?B?", tocode);
 192
 193     for (;;) {
 194         switch (dlen) {
 195           case 0:
 196             goto done;
 197
 198           case 1:
 199             *s++ = __m_b64chars[(*d >> 2) & 0x3f];
 200             *s++ = __m_b64chars[(*d & 0x03) << 4];
 201             *s++ = '=';
 202             *s++ = '=';
 203             goto done;
 204
 205           case 2:
 206             *s++ = __m_b64chars[(*d >> 2) & 0x3f];
 207             *s++ = __m_b64chars[((*d & 0x03) << 4) | ((d[1] >> 4) & 0x0f)];
 208             *s++ = __m_b64chars[(d[1] & 0x0f) << 2];
 209             *s++ = '=';
 210             goto done;
 211
 212           default:
 213             *s++ = __m_b64chars[(*d >> 2) & 0x3f];
 214             *s++ = __m_b64chars[((*d & 0x03) << 4) | ((d[1] >> 4) & 0x0f)];
 215             *s++ = __m_b64chars[((d[1] & 0x0f) << 2) | ((d[2] >> 6) & 0x03)];
 216             *s++ = __m_b64chars[d[2] & 0x3f];
 217             d += 3, dlen -= 3;
 218         }
 219     }
 220
 221   done:
 222     *s++ = '?';
 223     *s++ = '=';
 224     return s - s0;
 225 }
 226
 227 static size_t
 228 q_encoder(char *s, const char *d, ssize_t dlen, const char *tocode)
 229 {
 230     char *s0 = s;
 231
 232     s += sprintf(s, "=?%s?Q?", tocode);
 233     while (dlen--) {
 234         unsigned char c = *d++;
 235
 236         if (c == ' ') {
 237             *s++ = '_';
 238         } else
 239         if (c & 0x80 || __qp_special[c]) {
 240             *s++ = '=';
 241             *s++ = __m_b36chars_upper[c >> 4];
 242             *s++ = __m_b36chars_upper[c & 0xf];
 243         } else {
 244             *s++ = c;
 245         }
 246     }
 247
 248     *s++ = '?';
 249     *s++ = '=';
 250     return s - s0;
 251 }
 252
 253 /*
 254  * Return 0 if and set *encoder and *wlen if the data (d, dlen) could
 255  * be converted to an encoded word of length *wlen using *encoder.
 256  * Otherwise return an upper bound on the maximum length of the data
 257  * which could be converted.
 258  * The data is converted from fromcode (which must be stateless) to
 259  * tocode, unless fromcode is 0, in which case the data is assumed to
 260  * be already in tocode, which should be 8-bit and stateless.
 261  */
 262 static size_t try_block(const char *d, ssize_t dlen,
 263                         const char *fromcode, const char *tocode,
 264                         encoder_t **encoder, ssize_t *wlen)
 265 {
 266     char buf1[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1];
 267     ssize_t obl = sizeof(buf1) - m_strlen(tocode);
 268     char *ob;
 269
 270     if (fromcode) {
 271         const char *ib = d;
 272         ssize_t ibl = dlen;
 273         iconv_t cd = mutt_iconv_open(tocode, fromcode, 0);
 274
 275         assert (cd != (iconv_t)(-1));
 276
 277         ob = buf1;
 278
 279         if (my_iconv(cd, &ib, &ibl, &ob, &obl) < 0
 280         ||  my_iconv(cd, 0, 0, &ob, &obl) < 0)
 281         {
 282             assert (errno == E2BIG && ib > d);
 283             iconv_close(cd);
 284             return (ib - d == dlen) ? dlen : ib - d + 1;
 285         }
 286         iconv_close (cd);
 287     } else {
 288         if (dlen > obl)
 289             return obl + 1;
 290         memcpy(buf1, d, dlen);
 291         ob = buf1 + dlen;
 292     }
 293
 294     {
 295         const char *p;
 296         int count, len, len_b, len_q;
 297
 298         count = 0;
 299         for (p = buf1; p < ob; p++) {
 300             count += (*p & 0x80 || __qp_special[(int)*p]);
 301         }
 302
 303         len = ENCWORD_LEN_MIN - 2 + m_strlen(tocode);
 304         len_b = len + (((ob - buf1) + 2) / 3) * 4;
 305         len_q = len + (ob - buf1) + 2 * count;
 306
 307         /* Apparently RFC 1468 says to use B encoding for iso-2022-jp. */
 308         if (!ascii_strcasecmp(tocode, "ISO-2022-JP"))
 309             len_q = ENCWORD_LEN_MAX + 1;
 310
 311         if (len_b < len_q && len_b <= ENCWORD_LEN_MAX) {
 312             *encoder = b_encoder;
 313             *wlen = len_b;
 314             return 0;
 315         } else
 316         if (len_q <= ENCWORD_LEN_MAX) {
 317             *encoder = q_encoder;
 318             *wlen = len_q;
 319             return 0;
 320         } else {
 321             return dlen;
 322         }
 323     }
 324 }
 325
 326 /*
 327  * Encode the data (d, dlen) into s using the encoder.
 328  * Return the length of the encoded word.
 329  */
 330 static size_t
 331 encode_block(char *s, char *d, ssize_t dlen,
 332              const char *fromcode, const char *tocode, encoder_t *encoder)
 333 {
 334     char buf1[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1];
 335     ssize_t ibl, obl, n1, n2;
 336     iconv_t cd;
 337     const char *ib;
 338     char *ob;
 339
 340     if (fromcode) {
 341         cd = mutt_iconv_open(tocode, fromcode, 0);
 342         assert (cd != (iconv_t) (-1));
 343         ib = d, ibl = dlen, ob = buf1, obl = sizeof(buf1) - m_strlen(tocode);
 344         n1 = my_iconv(cd, &ib, &ibl, &ob, &obl);
 345         n2 = my_iconv(cd, 0, 0, &ob, &obl);
 346         assert (n1 >= 0 && n2 >= 0);
 347         iconv_close (cd);
 348         return (*encoder)(s, buf1, ob - buf1, tocode);
 349     } else {
 350         return (*encoder)(s, d, dlen, tocode);
 351     }
 352 }
 353
 354 /*
 355  * Discover how much of the data (d, dlen) can be converted into
 356  * a single encoded word. Return how much data can be converted,
 357  * and set the length *wlen of the encoded word and *encoder.
 358  * We start in column col, which limits the length of the word.
 359  */
 360 static size_t choose_block(char *d, size_t dlen, int col,
 361                            const char *fromcode, const char *tocode,
 362                            encoder_t **encoder, ssize_t *wlen)
 363 {
 364     size_t n, nn;
 365     int utf8 = fromcode && !ascii_strcasecmp(fromcode, "UTF-8");
 366
 367     n = dlen;
 368     for (;;) {
 369         assert (d + n > d);
 370         nn = try_block(d, n, fromcode, tocode, encoder, wlen);
 371         if (!nn && (col + *wlen <= ENCWORD_LEN_MAX + 1 || n <= 1))
 372             break;
 373         n = (nn ? nn : n) - 1;
 374         assert (n > 0);
 375         if (utf8) {
 376             while (n > 1 && CONTINUATION_BYTE(d[n]))
 377                 --n;
 378         }
 379     }
 380     return n;
 381 }
 382
 383 /*
 384  * Place the result of RFC-2047-encoding (d, dlen) into the dynamically
 385  * allocated buffer (e, elen). The input data is in charset fromcode
 386  * and is converted into a charset chosen from charsets.
 387  * Return 1 if the conversion to UTF-8 failed, 2 if conversion from UTF-8
 388  * failed, otherwise 0. If conversion failed, fromcode is assumed to be
 389  * compatible with us-ascii and the original data is used.
 390  * The input data is assumed to be a single line starting at column col;
 391  * if col is non-zero, the preceding character was a space.
 392  */
 393 /*** XXX: simplify that one day ***/
 394 static int rfc2047_encode(const char *d, ssize_t dlen, int col,
 395                           const char *fromcode, const char *charsets,
 396                           char **e, ssize_t *elen, const char *specials)
 397 {
 398     int ret = 0;
 399     char *buf;
 400     ssize_t bufpos, buflen;
 401     char *u, *t;
 402     char *s0, *s1, *t0, *t1;
 403     char *tocode1 = 0;
 404     const char *tocode;
 405     const char *icode = "UTF-8";
 406     ssize_t ulen, r, n, wlen;
 407     encoder_t *encoder;
 408
 409     /* Try to convert to UTF-8. */
 410     if (convert_string(fromcode, d, dlen, icode, &u, &ulen)) {
 411         ret = 1;
 412         icode = NULL;
 413         u = p_dupstr(d, ulen = dlen);
 414     }
 415
 416     /* Find earliest and latest things we must encode. */
 417     s0 = s1 = t0 = t1 = NULL;
 418     for (t = u; t < u + ulen; t++) {
 419         if ((*t & 0x80) ||
 420             (*t == '=' && t[1] == '?' && (t == u || HSPACE (*(t - 1))))) {
 421             if (!t0)
 422                 t0 = t;
 423             t1 = t;
 424         }
 425         else if (specials && strchr (specials, *t)) {
 426             if (!s0)
 427                 s0 = t;
 428             s1 = t;
 429         }
 430     }
 431
 432     /* If we have something to encode, include RFC822 specials */
 433     if (t0 && s0 && s0 < t0)
 434         t0 = s0;
 435     if (t1 && s1 && s1 > t1)
 436         t1 = s1;
 437
 438     if (!t0) {
 439         /* No encoding is required. */
 440         *e = u;
 441         *elen = ulen;
 442         return ret;
 443     }
 444
 445     /* Choose target charset. */
 446     tocode = fromcode;
 447     if (icode) {
 448         if ((tocode1 = mutt_choose_charset(icode, charsets, u, ulen,
 449                                            NULL, NULL)))
 450             tocode = tocode1;
 451         else
 452             ret = 2, icode = 0;
 453     }
 454
 455     /* Hack to avoid labelling 8-bit data as us-ascii. */
 456     if (!icode && mutt_is_us_ascii(tocode))
 457         tocode = "unknown-8bit";
 458
 459     /* Adjust t0 for maximum length of line. */
 460     t = u + (ENCWORD_LEN_MAX + 1) - col - ENCWORD_LEN_MIN;
 461     if (t < u)
 462         t = u;
 463     if (t < t0)
 464         t0 = t;
 465
 466
 467     /* Adjust t0 until we can encode a character after a space. */
 468     for (; t0 > u; t0--) {
 469         if (!HSPACE(t0[-1]))
 470             continue;
 471         t = t0 + 1;
 472         if (icode) {
 473             while (t < u + ulen && CONTINUATION_BYTE(*t))
 474                 ++t;
 475         }
 476         if (!try_block(t0, t - t0, icode, tocode, &encoder, &wlen)
 477         &&  col + (t0 - u) + wlen <= ENCWORD_LEN_MAX + 1)
 478             break;
 479     }
 480
 481     /* Adjust t1 until we can encode a character before a space. */
 482     for (; t1 < u + ulen; t1++) {
 483         if (!HSPACE(*t1))
 484             continue;
 485         t = t1 - 1;
 486         if (icode) {
 487             while (CONTINUATION_BYTE(*t))
 488                 --t;
 489         }
 490         if (!try_block (t, t1 - t, icode, tocode, &encoder, &wlen)
 491         &&  1 + wlen + (u + ulen - t1) <= ENCWORD_LEN_MAX + 1)
 492             break;
 493     }
 494
 495     /* We shall encode the region [t0,t1). */
 496
 497     /* Initialise the output buffer with the us-ascii prefix. */
 498     buflen = 2 * ulen;
 499     buf = p_new(char, buflen);
 500     bufpos = t0 - u;
 501     memcpy(buf, u, t0 - u);
 502
 503     col += t0 - u;
 504
 505     t = t0;
 506     for (;;) {
 507         /* Find how much we can encode. */
 508         n = choose_block (t, t1 - t, col, icode, tocode, &encoder, &wlen);
 509         if (n == t1 - t) {
 510             /* See if we can fit the us-ascii suffix, too. */
 511             if (col + wlen + (u + ulen - t1) <= ENCWORD_LEN_MAX + 1)
 512                 break;
 513             n = t1 - t - 1;
 514             if (icode)
 515                 while (CONTINUATION_BYTE (t[n]))
 516                     --n;
 517             assert (t + n >= t);
 518             if (!n) {
 519                 /* This should only happen in the really stupid case where the
 520                    only word that needs encoding is one character long, but
 521                    there is too much us-ascii stuff after it to use a single
 522                    encoded word. We add the next word to the encoded region
 523                    and try again. */
 524                 assert (t1 < u + ulen);
 525                 for (t1++; t1 < u + ulen && !HSPACE (*t1); t1++);
 526                 continue;
 527             }
 528             n = choose_block (t, n, col, icode, tocode, &encoder, &wlen);
 529         }
 530
 531         /* Add to output buffer. */
 532 #define LINEBREAK "\n\t"
 533         if (bufpos + wlen + 2 > buflen) {
 534             buflen = bufpos + wlen + 2;
 535             p_realloc(&buf, buflen);
 536         }
 537         r = encode_block (buf + bufpos, t, n, icode, tocode, encoder);
 538         assert (r == wlen);
 539         bufpos += wlen;
 540         memcpy (buf + bufpos, LINEBREAK, m_strlen(LINEBREAK));
 541         bufpos += m_strlen(LINEBREAK);
 542 #undef LINEBREAK
 543
 544         col = 1;
 545
 546         t += n;
 547     }
 548
 549     /* Add last encoded word and us-ascii suffix to buffer. */
 550     buflen = bufpos + wlen + (u + ulen - t1);
 551     p_realloc(&buf, buflen + 1);
 552     r = encode_block (buf + bufpos, t, t1 - t, icode, tocode, encoder);
 553     assert (r == wlen);
 554     bufpos += wlen;
 555     memcpy (buf + bufpos, t1, u + ulen - t1);
 556
 557     p_delete(&tocode1);
 558     p_delete(&u);
 559
 560     buf[buflen] = '\0';
 561
 562     *e = buf;
 563     *elen = buflen + 1;
 564     return ret;
 565 }
 566
 567
 568 void _rfc2047_encode_string(char **pd, int encode_specials, int col)
 569 {
 570     char *e;
 571     ssize_t elen;
 572     const char *charsets;
 573
 574     if (!Charset || !*pd)
 575         return;
 576
 577     charsets = m_strisempty(SendCharset) ? "UTF-8" : SendCharset;
 578
 579     rfc2047_encode(*pd, m_strlen(*pd), col,
 580                    Charset, charsets, &e, &elen,
 581                    encode_specials ? RFC822Specials : NULL);
 582
 583     p_delete(pd);
 584     *pd = e;
 585 }
 586
 587 void rfc2047_encode_string(char **pd) {
 588     _rfc2047_encode_string(pd, 0, 32);
 589 }
 590
 591 void rfc2047_encode_adrlist(address_t *addr, const char *tag)
 592 {
 593     address_t *ptr = addr;
 594     int col = tag ? m_strlen(tag) + 2 : 32;
 595
 596     while (ptr) {
 597         if (ptr->personal)
 598             _rfc2047_encode_string(&ptr->personal, 1, col);
 599         ptr = ptr->next;
 600     }
 601 }
 602
 603
 604 /****************************************************************************/
 605 /* Decoding functions                                                       */
 606 /****************************************************************************/
 607
 608 /* decode one word into d[len] */
 609 static int rfc2047_decode_word(char *d, size_t len, const char *s)
 610 {
 611     const char *p, *eotoken;
 612     char *charset = NULL;
 613     int enc = 0, count = 0;
 614     char *d0;
 615
 616     /* =?[QB]?cset?.?= */
 617     for (p = s; (eotoken = strchr(p, '?')); p = eotoken + 1) {
 618         switch (++count) {
 619             const char *t;
 620             char *q;
 621
 622           case 2:
 623             /* ignore language specification a la RFC 2231 */
 624             t = memchr(p, '*', eotoken - p) ?: eotoken;
 625             charset = p_dupstr(p, t - p);
 626             break;
 627
 628           case 3:
 629             switch (*p) {
 630               case 'q': case 'Q':
 631                 enc = ENCQUOTEDPRINTABLE;
 632                 break;
 633
 634               case 'b': case 'B':
 635                 enc = ENCBASE64;
 636                 break;
 637
 638               default:
 639                 p_delete(&charset);
 640                 return -1;
 641             }
 642             break;
 643
 644           case 4:
 645             d0 = q = p_new(char, m_strlen(s) + 1);
 646
 647             if (enc == ENCQUOTEDPRINTABLE) {
 648                 while (p < eotoken) {
 649                     if (*p == '=' && hexval(p[1]) >= 0 && hexval(p[2]) >= 0) {
 650                         *q++ = (hexval (p[1]) << 4) | hexval (p[2]);
 651                         p += 3;
 652                     } else
 653                     if (*p == '_') {
 654                         *q++ = ' ';
 655                         p++;
 656                     } else {
 657                         *q++ = *p++;
 658                     }
 659                 }
 660                 *q = 0;
 661             } else { /* enc == ENCBASE64 */
 662                 int c, b = 0, k = 0;
 663
 664                 while (p < eotoken) {
 665                     if (*p == '=')
 666                         break;
 667
 668                     c = base64val(*p++);
 669                     if (c < 0)
 670                         continue;
 671
 672                     if (k + 6 >= 8) {
 673                         k -= 2;
 674                         *q++ = b | (c >> k);
 675                         b = c << (8 - k);
 676                     } else {
 677                         b |= c << (k + 2);
 678                         k += 6;
 679                     }
 680                 }
 681                 *q = 0;
 682             }
 683             break;
 684         }
 685     }
 686
 687     if (charset)
 688         mutt_convert_string(&d0, charset, Charset, M_ICONV_HOOK_FROM);
 689     m_strcpy(d, len, d0);
 690     p_delete(&charset);
 691     p_delete(&d0);
 692     return 0;
 693 }
 694
 695 /*
 696  * Find the start and end of the first encoded word in the string.
 697  * We use the grammar in section 2 of RFC 2047, but the "encoding"
 698  * must be B or Q. Also, we don't require the encoded word to be
 699  * separated by linear-white-space (section 5(1)).
 700  */
 701 static const char *find_encoded_word(const char *s, const char **x)
 702 {
 703     const char *p;
 704
 705     while ((p = strstr(s, "=?"))) {
 706         s = p + 2;
 707         while (0x20 < *s && *s < 0x7f && !strchr ("()<>@,;:\"/[]?.=", *s)) {
 708             s++;
 709         }
 710
 711         if (s[0] != '?' || !strchr("BbQq", s[1]) || s[2] != '?')
 712             continue;
 713
 714         s += 3;
 715         while (0x20 <= *s && *s < 0x7f && *s != '?') {
 716             s++;
 717         }
 718
 719         if (s[0] != '?' || s[1] != '=') {
 720             --s;
 721             continue;
 722         }
 723
 724         *x = s + 2;
 725         return p;
 726     }
 727
 728     return NULL;
 729 }
 730
 731 /* return length of linear white space */
 732 static ssize_t lwslen(const char *s, ssize_t n)
 733 {
 734     const char *p;
 735     ssize_t len = n;
 736
 737     if (n <= 0)
 738         return 0;
 739
 740     for (p = s; p < s + n; p++) {
 741         if (!strchr (" \t\r\n", *p)) {
 742             len = p - s;
 743             break;
 744         }
 745     }
 746
 747     if (p[-1] == '\r' || p[-1] == '\n')  /* LWS cannot end with CRLF */
 748         return 0;
 749
 750     return len;
 751 }
 752
 753 /* return length of linear white space : reverse */
 754 static ssize_t lwsrlen(const char *s, ssize_t n)
 755 {
 756     const char *p = s + n - 1;
 757     size_t len = n;
 758
 759     if (n <= 0)
 760         return 0;
 761
 762     if (*p == '\r' || *p == '\n')   /* LWS doesn't end with CRLF */
 763         return 0;
 764
 765     while (p >= s) {
 766         if (!strchr(" \t\r\n", *p)) {
 767             len = s + n - 1 - p;
 768             break;
 769         }
 770         p--;
 771     }
 772
 773     return len;
 774 }
 775
 776 /* try to decode anything that looks like a valid RFC2047 encoded
 777  * header field, ignoring RFC822 parsing rules
 778  */
 779 void rfc2047_decode(char **pd)
 780 {
 781     const int strict_mime = option(OPTSTRICTMIME);
 782
 783     const char *s = *pd;
 784     char *d0, *d;
 785     ssize_t dlen;
 786     int found_encoded = 0;
 787
 788     if (!s || !*s)
 789         return;
 790
 791     dlen = 4 * m_strlen(s);        /* should be enough */
 792     d = d0 = p_new(char, dlen + 1);
 793
 794     while (*s && dlen > 0) {
 795         const char *p, *q;
 796
 797         p = find_encoded_word(s, &q);
 798
 799         if (!p) {
 800             /* no encoded words */
 801             if (!strict_mime) {
 802                 ssize_t m, n;
 803
 804                 n = m_strlen(s);
 805                 if (found_encoded && (m = lwslen(s, n)) != 0) {
 806                     if (m != n)
 807                         *d++ = ' ', dlen--;
 808                     n -= m, s += m;
 809                 }
 810
 811                 if (ascii_strcasecmp(AssumedCharset, "us-ascii")) {
 812                     char *t;
 813
 814                     t = p_dupstr(s, n);
 815                     if (mutt_convert_nonmime_string(&t) == 0) {
 816                         d += m_strcpy(d, dlen, t);
 817                     } else {
 818                         d += m_strcpy(d, dlen, s);
 819                     }
 820                     p_delete(&t);
 821                     break;
 822                 }
 823             }
 824             d += m_strcpy(d, dlen, s);
 825             break;
 826         }
 827
 828         if (p != s) {
 829             ssize_t m, n;
 830
 831             n = (p - s);
 832             /* ignore spaces between encoded words
 833              * and linear white spaces between encoded word and *text */
 834             if (!strict_mime) {
 835                 if (found_encoded && (m = lwslen(s, n)) != 0) {
 836                     if (m != n)
 837                         *d++ = ' ', dlen--;
 838                     n -= m, s += m;
 839                 }
 840
 841                 if ((m = n - lwsrlen(s, n)) != 0) {
 842                     m  = m_strncpy(d, dlen, s, m);
 843                     d += m;
 844                     dlen -= m;
 845                     if (m != n)
 846                         *d++ = ' ', dlen--;
 847                 }
 848             } else
 849             if (!found_encoded || (ssize_t)strspn(s, " \t\r\n") != n) {
 850                 n  = m_strncpy(d, dlen, s, n);
 851                 d += n;
 852                 dlen -= n;
 853             }
 854         }
 855
 856         rfc2047_decode_word(d, dlen, p);
 857         found_encoded = 1;
 858         s = q;
 859         while (*d && dlen)
 860             d++, dlen--;
 861     }
 862
 863     p_delete(pd);
 864     *pd = d0;
 865 }
 866
 867 void rfc2047_decode_adrlist(address_t *a)
 868 {
 869     while (a) {
 870         if (a->personal)
 871             rfc2047_decode(&a->personal);
 872         a = a->next;
 873     }
 874 }
 875
 876 void rfc2047_decode_envelope(ENVELOPE* e)
 877 {
 878     assert (e);
 879
 880     /* do RFC2047 decoding */
 881     rfc2047_decode_adrlist(e->from);
 882     rfc2047_decode_adrlist(e->to);
 883     rfc2047_decode_adrlist(e->cc);
 884     rfc2047_decode_adrlist(e->bcc);
 885     rfc2047_decode_adrlist(e->reply_to);
 886     rfc2047_decode_adrlist(e->mail_followup_to);
 887     rfc2047_decode_adrlist(e->return_path);
 888     rfc2047_decode_adrlist(e->sender);
 889
 890     if (e->subject) {
 891         rfc2047_decode(&e->subject);
 892         mutt_adjust_subject(e);
 893     }
 894 }