lib-mime/rfc2047.c

   1 /*
   2  *  This program is free software; you can redistribute it and/or modify
   3  *  it under the terms of the GNU General Public License as published by
   4  *  the Free Software Foundation; either version 2 of the License, or (at
   5  *  your option) any later version.
   6  *
   7  *  This program is distributed in the hope that it will be useful, but
   8  *  WITHOUT ANY WARRANTY; without even the implied warranty of
   9  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  10  *  General Public License for more details.
  11  *
  12  *  You should have received a copy of the GNU General Public License
  13  *  along with this program; if not, write to the Free Software
  14  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  15  *  MA 02110-1301, USA.
  16  *
  17  *  Copyright © 2006 Pierre Habouzit
  18  */
  19
  20 /*
  21  * Copyright notice from original mutt:
  22  * Copyright (C) 1996-2000 Michael R. Elkins <me@mutt.org>
  23  * Copyright (C) 2000-2001 Edmund Grimley Evans <edmundo@rano.org>
  24  *
  25  * This file is part of mutt-ng, see http://www.muttng.org/.
  26  * It's licensed under the GNU General Public License,
  27  * please see the file GPL in the top level source directory.
  28  */
  29
  30 #include <lib-lib/lib-lib.h>
  31
  32 #include <lib-mime/mime.h>
  33
  34 #include "charset.h"
  35 #include "thread.h"
  36
  37 #include <assert.h>
  38 #include <ctype.h>
  39 #include <errno.h>
  40 #include <stdio.h>
  41 #include <stdlib.h>
  42 #include <string.h>
  43
  44 /* If you are debugging this file, comment out the following line. */
  45 /*#define NDEBUG*/
  46
  47 #ifdef NDEBUG
  48 #define assert(x)
  49 #else
  50 #endif
  51
  52 #define ENCWORD_LEN_MAX 75
  53 #define ENCWORD_LEN_MIN 9       /* m_strlen("=?.?.?.?=") */
  54
  55 #define HSPACE(x) ((x) == '\0' || (x) == ' ' || (x) == '\t')
  56 #define CONTINUATION_BYTE(c) (((c) & 0xc0) == 0x80)
  57
  58 /* converts f of len flen and charset from
  59        into *t of len *tlen and charset to
  60
  61    returns -1 on error
  62    returns number of converted chars from f, see iconv(3)
  63  */
  64 static ssize_t
  65 convert_string(const char *from, const char *f, ssize_t flen,
  66                const char *to,   char **t, ssize_t *tlen)
  67 {
  68     iconv_t cd;
  69     char *buf, *ob;
  70     ssize_t obl, n;
  71
  72     cd = mutt_iconv_open(to, from, 0);
  73
  74     if (cd == MUTT_ICONV_ERROR)
  75         return -1;
  76
  77     obl = 4 * flen + 1;
  78     ob = buf = p_new(char, obl);
  79     n = my_iconv(cd, &f, &flen, &ob, &obl);
  80
  81     if (n < 0 || my_iconv(cd, 0, 0, &ob, &obl) < 0) {
  82         int e = errno;
  83         iconv_close(cd);
  84         errno = e;
  85         p_delete(&buf);
  86         return -1;
  87     }
  88     iconv_close(cd);
  89
  90     *ob = '\0';
  91     *tlen = ob - buf;
  92     *t  = buf;
  93
  94     return n;
  95 }
  96
  97 /* choose the shortest encoding for u */
  98 char *mutt_choose_charset(const char *fromcode, const char *charsets,
  99                           char *u, ssize_t ulen, char **dst, ssize_t *dlen)
 100 {
 101     char *res = NULL;
 102     ssize_t reslen = 0;
 103
 104     char *tocode = NULL;
 105     ssize_t bestn = 0;
 106
 107     const char *p = charsets;
 108
 109     while (*p) {
 110         char cset[SHORT_STRING];
 111         const char *q;
 112         char *s;
 113         ssize_t slen, n;
 114
 115         q = strchr(p, ':');
 116         if (q) {
 117             n = m_strncpy(cset, sizeof(cset), p, q - p);
 118             p = ++q;
 119         } else {
 120             n = m_strcpy(cset, sizeof(cset), p);
 121             p += n;
 122         }
 123
 124         if (!n || n > (ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 2 - 12)) {
 125             /* Assume that we never need more than 12 characters of
 126                encoded-text to encode a single character. */
 127             continue;
 128         }
 129
 130         n = convert_string(fromcode, u, ulen, cset, &s, &slen);
 131         if (n < 0)
 132             continue;
 133
 134         if (!tocode || n < bestn) {
 135             m_strreplace(&tocode, cset);
 136             bestn = n;
 137
 138             p_delete(&res);
 139             res = s;
 140             reslen = slen;
 141             if (!bestn)
 142                 break;
 143         } else {
 144             p_delete(&s);
 145         }
 146     }
 147
 148     if (tocode) {
 149         char buf[LONG_STRING];
 150
 151         if (dst && dlen) {
 152             *dst  = res;
 153             *dlen = reslen;
 154         } else {
 155             p_delete(&res);
 156         }
 157
 158         charset_canonicalize(buf, sizeof(buf), tocode);
 159         m_strreplace(&tocode, buf);
 160     }
 161
 162     return tocode;
 163 }
 164
 165
 166 /****************************************************************************/
 167 /* Encoding functions                                                       */
 168 /****************************************************************************/
 169
 170 static const char __qp_special[128] = {
 171     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 172     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 173     0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1,
 174     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
 175     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 176     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1,
 177     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 178     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 179 };
 180
 181 typedef size_t (encoder_t)(char *, const char *, ssize_t, const char *);
 182
 183 static size_t
 184 b_encoder(char *s, const char *d, ssize_t dlen, const char *tocode)
 185 {
 186     char *s0 = s;
 187
 188     s += sprintf(s, "=?%s?B?", tocode);
 189
 190     for (;;) {
 191         switch (dlen) {
 192           case 0:
 193             goto done;
 194
 195           case 1:
 196             *s++ = __m_b64chars[(*d >> 2) & 0x3f];
 197             *s++ = __m_b64chars[(*d & 0x03) << 4];
 198             *s++ = '=';
 199             *s++ = '=';
 200             goto done;
 201
 202           case 2:
 203             *s++ = __m_b64chars[(*d >> 2) & 0x3f];
 204             *s++ = __m_b64chars[((*d & 0x03) << 4) | ((d[1] >> 4) & 0x0f)];
 205             *s++ = __m_b64chars[(d[1] & 0x0f) << 2];
 206             *s++ = '=';
 207             goto done;
 208
 209           default:
 210             *s++ = __m_b64chars[(*d >> 2) & 0x3f];
 211             *s++ = __m_b64chars[((*d & 0x03) << 4) | ((d[1] >> 4) & 0x0f)];
 212             *s++ = __m_b64chars[((d[1] & 0x0f) << 2) | ((d[2] >> 6) & 0x03)];
 213             *s++ = __m_b64chars[d[2] & 0x3f];
 214             d += 3, dlen -= 3;
 215         }
 216     }
 217
 218   done:
 219     *s++ = '?';
 220     *s++ = '=';
 221     return s - s0;
 222 }
 223
 224 static size_t
 225 q_encoder(char *s, const char *d, ssize_t dlen, const char *tocode)
 226 {
 227     char *s0 = s;
 228
 229     s += sprintf(s, "=?%s?Q?", tocode);
 230     while (dlen--) {
 231         unsigned char c = *d++;
 232
 233         if (c == ' ') {
 234             *s++ = '_';
 235         } else
 236         if (c & 0x80 || __qp_special[c]) {
 237             *s++ = '=';
 238             *s++ = __m_b36chars_upper[c >> 4];
 239             *s++ = __m_b36chars_upper[c & 0xf];
 240         } else {
 241             *s++ = c;
 242         }
 243     }
 244
 245     *s++ = '?';
 246     *s++ = '=';
 247     return s - s0;
 248 }
 249
 250 /*
 251  * Return 0 if and set *encoder and *wlen if the data (d, dlen) could
 252  * be converted to an encoded word of length *wlen using *encoder.
 253  * Otherwise return an upper bound on the maximum length of the data
 254  * which could be converted.
 255  * The data is converted from fromcode (which must be stateless) to
 256  * tocode, unless fromcode is 0, in which case the data is assumed to
 257  * be already in tocode, which should be 8-bit and stateless.
 258  */
 259 static size_t try_block(const char *d, ssize_t dlen,
 260                         const char *fromcode, const char *tocode,
 261                         encoder_t **encoder, ssize_t *wlen)
 262 {
 263     char buf1[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1];
 264     ssize_t obl = sizeof(buf1) - m_strlen(tocode);
 265     char *ob;
 266
 267     if (fromcode) {
 268         const char *ib = d;
 269         ssize_t ibl = dlen;
 270         iconv_t cd = mutt_iconv_open(tocode, fromcode, 0);
 271
 272         assert (cd != MUTT_ICONV_ERROR);
 273
 274         ob = buf1;
 275
 276         if (my_iconv(cd, &ib, &ibl, &ob, &obl) < 0
 277         ||  my_iconv(cd, 0, 0, &ob, &obl) < 0)
 278         {
 279             assert (errno == E2BIG && ib > d);
 280             iconv_close(cd);
 281             return (ib - d == dlen) ? dlen : ib - d + 1;
 282         }
 283         iconv_close (cd);
 284     } else {
 285         if (dlen > obl)
 286             return obl + 1;
 287         memcpy(buf1, d, dlen);
 288         ob = buf1 + dlen;
 289     }
 290
 291     {
 292         const char *p;
 293         int count, len, len_b, len_q;
 294
 295         count = 0;
 296         for (p = buf1; p < ob; p++) {
 297             count += (*p & 0x80 || __qp_special[(int)*p]);
 298         }
 299
 300         len = ENCWORD_LEN_MIN - 2 + m_strlen(tocode);
 301         len_b = len + (((ob - buf1) + 2) / 3) * 4;
 302         len_q = len + (ob - buf1) + 2 * count;
 303
 304         /* Apparently RFC 1468 says to use B encoding for iso-2022-jp. */
 305         if (mime_which_token(tocode, -1) == MIME_ISO_2022_JP)
 306             len_q = ENCWORD_LEN_MAX + 1;
 307
 308         if (len_b < len_q && len_b <= ENCWORD_LEN_MAX) {
 309             *encoder = b_encoder;
 310             *wlen = len_b;
 311             return 0;
 312         } else
 313         if (len_q <= ENCWORD_LEN_MAX) {
 314             *encoder = q_encoder;
 315             *wlen = len_q;
 316             return 0;
 317         } else {
 318             return dlen;
 319         }
 320     }
 321 }
 322
 323 /*
 324  * Encode the data (d, dlen) into s using the encoder.
 325  * Return the length of the encoded word.
 326  */
 327 static size_t
 328 encode_block(char *s, char *d, ssize_t dlen,
 329              const char *fromcode, const char *tocode, encoder_t *encoder)
 330 {
 331     char buf1[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1];
 332     ssize_t ibl, obl, n1, n2;
 333     iconv_t cd;
 334     const char *ib;
 335     char *ob;
 336
 337     if (fromcode) {
 338         cd = mutt_iconv_open(tocode, fromcode, 0);
 339         assert (cd != MUTT_ICONV_ERROR);
 340         ib = d, ibl = dlen, ob = buf1, obl = sizeof(buf1) - m_strlen(tocode);
 341         n1 = my_iconv(cd, &ib, &ibl, &ob, &obl);
 342         n2 = my_iconv(cd, 0, 0, &ob, &obl);
 343         assert (n1 >= 0 && n2 >= 0);
 344         iconv_close (cd);
 345         return (*encoder)(s, buf1, ob - buf1, tocode);
 346     } else {
 347         return (*encoder)(s, d, dlen, tocode);
 348     }
 349 }
 350
 351 /*
 352  * Discover how much of the data (d, dlen) can be converted into
 353  * a single encoded word. Return how much data can be converted,
 354  * and set the length *wlen of the encoded word and *encoder.
 355  * We start in column col, which limits the length of the word.
 356  */
 357 static size_t choose_block(char *d, size_t dlen, int col,
 358                            const char *fromcode, const char *tocode,
 359                            encoder_t **encoder, ssize_t *wlen)
 360 {
 361     size_t n, nn;
 362     int utf8 = mime_which_token(fromcode, -1) == MIME_UTF_8;
 363
 364     n = dlen;
 365     for (;;) {
 366         assert (d + n > d);
 367         nn = try_block(d, n, fromcode, tocode, encoder, wlen);
 368         if (!nn && (col + *wlen <= ENCWORD_LEN_MAX + 1 || n <= 1))
 369             break;
 370         n = (nn ? nn : n) - 1;
 371         assert (n > 0);
 372         if (utf8) {
 373             while (n > 1 && CONTINUATION_BYTE(d[n]))
 374                 --n;
 375         }
 376     }
 377     return n;
 378 }
 379
 380 /*
 381  * Place the result of RFC-2047-encoding (d, dlen) into the dynamically
 382  * allocated buffer (e, elen). The input data is in charset fromcode
 383  * and is converted into a charset chosen from charsets.
 384  * Return 1 if the conversion to UTF-8 failed, 2 if conversion from UTF-8
 385  * failed, otherwise 0. If conversion failed, fromcode is assumed to be
 386  * compatible with us-ascii and the original data is used.
 387  * The input data is assumed to be a single line starting at column col;
 388  * if col is non-zero, the preceding character was a space.
 389  */
 390 /*** XXX: simplify that one day ***/
 391 static int rfc2047_encode(const char *d, ssize_t dlen, int col,
 392                           const char *fromcode, const char *charsets,
 393                           char **e, ssize_t *elen, const char *specials)
 394 {
 395     int ret = 0;
 396     char *buf;
 397     ssize_t bufpos, buflen;
 398     char *u, *t;
 399     char *s0, *s1, *t0, *t1;
 400     char *tocode1 = 0;
 401     const char *tocode;
 402     const char *icode = "UTF-8";
 403     ssize_t ulen, r, n, wlen;
 404     encoder_t *encoder;
 405
 406     /* Try to convert to UTF-8. */
 407     if (convert_string(fromcode, d, dlen, icode, &u, &ulen)) {
 408         ret = 1;
 409         icode = NULL;
 410         u = p_dupstr(d, ulen = dlen);
 411     }
 412
 413     /* Find earliest and latest things we must encode. */
 414     s0 = s1 = t0 = t1 = NULL;
 415     for (t = u; t < u + ulen; t++) {
 416         if ((*t & 0x80) ||
 417             (*t == '=' && t[1] == '?' && (t == u || HSPACE (*(t - 1))))) {
 418             if (!t0)
 419                 t0 = t;
 420             t1 = t;
 421         }
 422         else if (specials && strchr (specials, *t)) {
 423             if (!s0)
 424                 s0 = t;
 425             s1 = t;
 426         }
 427     }
 428
 429     /* If we have something to encode, include RFC822 specials */
 430     if (t0 && s0 && s0 < t0)
 431         t0 = s0;
 432     if (t1 && s1 && s1 > t1)
 433         t1 = s1;
 434
 435     if (!t0) {
 436         /* No encoding is required. */
 437         *e = u;
 438         *elen = ulen;
 439         return ret;
 440     }
 441
 442     /* Choose target charset. */
 443     tocode = fromcode;
 444     if (icode) {
 445         if ((tocode1 = mutt_choose_charset(icode, charsets, u, ulen,
 446                                            NULL, NULL)))
 447             tocode = tocode1;
 448         else
 449             ret = 2, icode = 0;
 450     }
 451
 452     /* Hack to avoid labelling 8-bit data as us-ascii. */
 453     if (!icode && charset_is_us_ascii(tocode))
 454         tocode = "unknown-8bit";
 455
 456     /* Adjust t0 for maximum length of line. */
 457     t = u + (ENCWORD_LEN_MAX + 1) - col - ENCWORD_LEN_MIN;
 458     if (t < u)
 459         t = u;
 460     if (t < t0)
 461         t0 = t;
 462
 463
 464     /* Adjust t0 until we can encode a character after a space. */
 465     for (; t0 > u; t0--) {
 466         if (!HSPACE(t0[-1]))
 467             continue;
 468         t = t0 + 1;
 469         if (icode) {
 470             while (t < u + ulen && CONTINUATION_BYTE(*t))
 471                 ++t;
 472         }
 473         if (!try_block(t0, t - t0, icode, tocode, &encoder, &wlen)
 474         &&  col + (t0 - u) + wlen <= ENCWORD_LEN_MAX + 1)
 475             break;
 476     }
 477
 478     /* Adjust t1 until we can encode a character before a space. */
 479     for (; t1 < u + ulen; t1++) {
 480         if (!HSPACE(*t1))
 481             continue;
 482         t = t1 - 1;
 483         if (icode) {
 484             while (CONTINUATION_BYTE(*t))
 485                 --t;
 486         }
 487         if (!try_block (t, t1 - t, icode, tocode, &encoder, &wlen)
 488         &&  1 + wlen + (u + ulen - t1) <= ENCWORD_LEN_MAX + 1)
 489             break;
 490     }
 491
 492     /* We shall encode the region [t0,t1). */
 493
 494     /* Initialise the output buffer with the us-ascii prefix. */
 495     buflen = 2 * ulen;
 496     buf = p_new(char, buflen);
 497     bufpos = t0 - u;
 498     memcpy(buf, u, t0 - u);
 499
 500     col += t0 - u;
 501
 502     t = t0;
 503     for (;;) {
 504         /* Find how much we can encode. */
 505         n = choose_block (t, t1 - t, col, icode, tocode, &encoder, &wlen);
 506         if (n == t1 - t) {
 507             /* See if we can fit the us-ascii suffix, too. */
 508             if (col + wlen + (u + ulen - t1) <= ENCWORD_LEN_MAX + 1)
 509                 break;
 510             n = t1 - t - 1;
 511             if (icode)
 512                 while (CONTINUATION_BYTE (t[n]))
 513                     --n;
 514             assert (t + n >= t);
 515             if (!n) {
 516                 /* This should only happen in the really stupid case where the
 517                    only word that needs encoding is one character long, but
 518                    there is too much us-ascii stuff after it to use a single
 519                    encoded word. We add the next word to the encoded region
 520                    and try again. */
 521                 assert (t1 < u + ulen);
 522                 for (t1++; t1 < u + ulen && !HSPACE (*t1); t1++);
 523                 continue;
 524             }
 525             n = choose_block (t, n, col, icode, tocode, &encoder, &wlen);
 526         }
 527
 528         /* Add to output buffer. */
 529 #define LINEBREAK "\n\t"
 530         if (bufpos + wlen + 2 > buflen) {
 531             buflen = bufpos + wlen + 2;
 532             p_realloc(&buf, buflen);
 533         }
 534         r = encode_block (buf + bufpos, t, n, icode, tocode, encoder);
 535         assert (r == wlen);
 536         bufpos += wlen;
 537         memcpy (buf + bufpos, LINEBREAK, m_strlen(LINEBREAK));
 538         bufpos += m_strlen(LINEBREAK);
 539 #undef LINEBREAK
 540
 541         col = 1;
 542
 543         t += n;
 544     }
 545
 546     /* Add last encoded word and us-ascii suffix to buffer. */
 547     buflen = bufpos + wlen + (u + ulen - t1);
 548     p_realloc(&buf, buflen + 1);
 549     r = encode_block (buf + bufpos, t, t1 - t, icode, tocode, encoder);
 550     assert (r == wlen);
 551     bufpos += wlen;
 552     memcpy (buf + bufpos, t1, u + ulen - t1);
 553
 554     p_delete(&tocode1);
 555     p_delete(&u);
 556
 557     buf[buflen] = '\0';
 558
 559     *e = buf;
 560     *elen = buflen + 1;
 561     return ret;
 562 }
 563
 564
 565 static void _rfc2047_encode_string(char **pd, int encode_specials, int col)
 566 {
 567     char *e;
 568     ssize_t elen;
 569     const char *charsets;
 570
 571     if (!Charset || !*pd)
 572         return;
 573
 574     charsets = m_strisempty(SendCharset) ? "UTF-8" : SendCharset;
 575
 576     rfc2047_encode(*pd, m_strlen(*pd), col,
 577                    Charset, charsets, &e, &elen,
 578                    encode_specials ? RFC822Specials : NULL);
 579
 580     p_delete(pd);
 581     *pd = e;
 582 }
 583
 584 void rfc2047_encode_string(char **pd) {
 585     _rfc2047_encode_string(pd, 0, 32);
 586 }
 587
 588 void rfc2047_encode_adrlist(address_t *addr, const char *tag)
 589 {
 590     address_t *ptr = addr;
 591     int col = tag ? m_strlen(tag) + 2 : 32;
 592
 593     while (ptr) {
 594         if (ptr->personal)
 595             _rfc2047_encode_string(&ptr->personal, 1, col);
 596         ptr = ptr->next;
 597     }
 598 }
 599
 600
 601 /****************************************************************************/
 602 /* Decoding functions                                                       */
 603 /****************************************************************************/
 604
 605 /* decode one word into d[len] */
 606 static int rfc2047_decode_word(char *d, size_t len, const char *s)
 607 {
 608     const char *p, *eotoken;
 609     char *charset = NULL;
 610     int enc = 0, count = 0;
 611     char *d0;
 612
 613     /* =?[QB]?cset?.?= */
 614     for (p = s; (eotoken = strchr(p, '?')); p = eotoken + 1) {
 615         switch (++count) {
 616             const char *t;
 617             char *q;
 618
 619           case 2:
 620             /* ignore language specification a la RFC 2231 */
 621             t = memchr(p, '*', eotoken - p) ?: eotoken;
 622             charset = p_dupstr(p, t - p);
 623             break;
 624
 625           case 3:
 626             switch (*p) {
 627               case 'q': case 'Q':
 628                 enc = ENCQUOTEDPRINTABLE;
 629                 break;
 630
 631               case 'b': case 'B':
 632                 enc = ENCBASE64;
 633                 break;
 634
 635               default:
 636                 p_delete(&charset);
 637                 return -1;
 638             }
 639             break;
 640
 641           case 4:
 642             d0 = q = p_new(char, m_strlen(s) + 1);
 643
 644             if (enc == ENCQUOTEDPRINTABLE) {
 645                 while (p < eotoken) {
 646                     if (*p == '=' && hexval(p[1]) >= 0 && hexval(p[2]) >= 0) {
 647                         *q++ = (hexval (p[1]) << 4) | hexval (p[2]);
 648                         p += 3;
 649                     } else
 650                     if (*p == '_') {
 651                         *q++ = ' ';
 652                         p++;
 653                     } else {
 654                         *q++ = *p++;
 655                     }
 656                 }
 657                 *q = 0;
 658             } else { /* enc == ENCBASE64 */
 659                 int c, b = 0, k = 0;
 660
 661                 while (p < eotoken) {
 662                     if (*p == '=')
 663                         break;
 664
 665                     c = base64val(*p++);
 666                     if (c < 0)
 667                         continue;
 668
 669                     if (k + 6 >= 8) {
 670                         k -= 2;
 671                         *q++ = b | (c >> k);
 672                         b = c << (8 - k);
 673                     } else {
 674                         b |= c << (k + 2);
 675                         k += 6;
 676                     }
 677                 }
 678                 *q = 0;
 679             }
 680             break;
 681         }
 682     }
 683
 684     if (charset)
 685         mutt_convert_string(&d0, charset, Charset, M_ICONV_HOOK_FROM);
 686     m_strcpy(d, len, d0);
 687     p_delete(&charset);
 688     p_delete(&d0);
 689     return 0;
 690 }
 691
 692 /*
 693  * Find the start and end of the first encoded word in the string.
 694  * We use the grammar in section 2 of RFC 2047, but the "encoding"
 695  * must be B or Q. Also, we don't require the encoded word to be
 696  * separated by linear-white-space (section 5(1)).
 697  */
 698 static const char *find_encoded_word(const char *s, const char **x)
 699 {
 700     const char *p;
 701
 702     while ((p = strstr(s, "=?"))) {
 703         s = p + 2;
 704         while (0x20 < *s && *s < 0x7f && !strchr ("()<>@,;:\"/[]?.=", *s)) {
 705             s++;
 706         }
 707
 708         if (s[0] != '?' || !strchr("BbQq", s[1]) || s[2] != '?')
 709             continue;
 710
 711         s += 3;
 712         while (0x20 <= *s && *s < 0x7f && *s != '?') {
 713             s++;
 714         }
 715
 716         if (s[0] != '?' || s[1] != '=') {
 717             --s;
 718             continue;
 719         }
 720
 721         *x = s + 2;
 722         return p;
 723     }
 724
 725     return NULL;
 726 }
 727
 728 /* return length of linear white space */
 729 static ssize_t lwslen(const char *s, ssize_t n)
 730 {
 731     const char *p;
 732     ssize_t len = n;
 733
 734     if (n <= 0)
 735         return 0;
 736
 737     for (p = s; p < s + n; p++) {
 738         if (!strchr (" \t\r\n", *p)) {
 739             len = p - s;
 740             break;
 741         }
 742     }
 743
 744     if (p[-1] == '\r' || p[-1] == '\n')  /* LWS cannot end with CRLF */
 745         return 0;
 746
 747     return len;
 748 }
 749
 750 /* return length of linear white space : reverse */
 751 static ssize_t lwsrlen(const char *s, ssize_t n)
 752 {
 753     const char *p = s + n - 1;
 754     size_t len = n;
 755
 756     if (n <= 0)
 757         return 0;
 758
 759     if (*p == '\r' || *p == '\n')   /* LWS doesn't end with CRLF */
 760         return 0;
 761
 762     while (p >= s) {
 763         if (!strchr(" \t\r\n", *p)) {
 764             len = s + n - 1 - p;
 765             break;
 766         }
 767         p--;
 768     }
 769
 770     return len;
 771 }
 772
 773 /* try to decode anything that looks like a valid RFC2047 encoded
 774  * header field, ignoring RFC822 parsing rules
 775  */
 776 void rfc2047_decode(char **pd)
 777 {
 778     const int strict_mime = option(OPTSTRICTMIME);
 779
 780     const char *s = *pd;
 781     char *d0, *d;
 782     ssize_t dlen;
 783     int found_encoded = 0;
 784
 785     if (!s || !*s)
 786         return;
 787
 788     dlen = 4 * m_strlen(s);        /* should be enough */
 789     d = d0 = p_new(char, dlen + 1);
 790
 791     while (*s && dlen > 0) {
 792         const char *p, *q;
 793
 794         p = find_encoded_word(s, &q);
 795
 796         if (!p) {
 797             /* no encoded words */
 798             if (!strict_mime) {
 799                 ssize_t m, n;
 800
 801                 n = m_strlen(s);
 802                 if (found_encoded && (m = lwslen(s, n)) != 0) {
 803                     if (m != n)
 804                         *d++ = ' ', dlen--;
 805                     n -= m, s += m;
 806                 }
 807
 808                 if (mime_which_token(AssumedCharset, -1) == MIME_US_ASCII) {
 809                     char *t;
 810
 811                     t = p_dupstr(s, n);
 812                     if (mutt_convert_nonmime_string(&t) == 0) {
 813                         d += m_strcpy(d, dlen, t);
 814                     } else {
 815                         d += m_strcpy(d, dlen, s);
 816                     }
 817                     p_delete(&t);
 818                     break;
 819                 }
 820             }
 821             d += m_strcpy(d, dlen, s);
 822             break;
 823         }
 824
 825         if (p != s) {
 826             ssize_t m, n;
 827
 828             n = (p - s);
 829             /* ignore spaces between encoded words
 830              * and linear white spaces between encoded word and *text */
 831             if (!strict_mime) {
 832                 if (found_encoded && (m = lwslen(s, n)) != 0) {
 833                     if (m != n)
 834                         *d++ = ' ', dlen--;
 835                     n -= m, s += m;
 836                 }
 837
 838                 if ((m = n - lwsrlen(s, n)) != 0) {
 839                     m  = m_strncpy(d, dlen, s, m);
 840                     d += m;
 841                     dlen -= m;
 842                     if (m != n)
 843                         *d++ = ' ', dlen--;
 844                 }
 845             } else
 846             if (!found_encoded || (ssize_t)strspn(s, " \t\r\n") != n) {
 847                 n  = m_strncpy(d, dlen, s, n);
 848                 d += n;
 849                 dlen -= n;
 850             }
 851         }
 852
 853         rfc2047_decode_word(d, dlen, p);
 854         found_encoded = 1;
 855         s = q;
 856         while (*d && dlen)
 857             d++, dlen--;
 858     }
 859
 860     p_delete(pd);
 861     *pd = d0;
 862 }
 863
 864 void rfc2047_decode_adrlist(address_t *a)
 865 {
 866     while (a) {
 867         if (a->personal)
 868             rfc2047_decode(&a->personal);
 869         a = a->next;
 870     }
 871 }
 872
 873 void rfc2047_decode_envelope(ENVELOPE* e)
 874 {
 875     assert (e);
 876
 877     /* do RFC2047 decoding */
 878     rfc2047_decode_adrlist(e->from);
 879     rfc2047_decode_adrlist(e->to);
 880     rfc2047_decode_adrlist(e->cc);
 881     rfc2047_decode_adrlist(e->bcc);
 882     rfc2047_decode_adrlist(e->reply_to);
 883     rfc2047_decode_adrlist(e->mail_followup_to);
 884     rfc2047_decode_adrlist(e->return_path);
 885     rfc2047_decode_adrlist(e->sender);
 886
 887     if (e->subject) {
 888         rfc2047_decode(&e->subject);
 889         mutt_adjust_subject(e);
 890     }
 891 }