lib-mime/rfc2047.c

   1 /*
   2  *  This program is free software; you can redistribute it and/or modify
   3  *  it under the terms of the GNU General Public License as published by
   4  *  the Free Software Foundation; either version 2 of the License, or (at
   5  *  your option) any later version.
   6  *
   7  *  This program is distributed in the hope that it will be useful, but
   8  *  WITHOUT ANY WARRANTY; without even the implied warranty of
   9  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  10  *  General Public License for more details.
  11  *
  12  *  You should have received a copy of the GNU General Public License
  13  *  along with this program; if not, write to the Free Software
  14  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  15  *  MA 02110-1301, USA.
  16  *
  17  *  Copyright © 2006 Pierre Habouzit
  18  */
  19
  20 /*
  21  * Copyright notice from original mutt:
  22  * Copyright (C) 1996-2000 Michael R. Elkins <me@mutt.org>
  23  * Copyright (C) 2000-2001 Edmund Grimley Evans <edmundo@rano.org>
  24  *
  25  * This file is part of mutt-ng, see http://www.muttng.org/.
  26  * It's licensed under the GNU General Public License,
  27  * please see the file GPL in the top level source directory.
  28  */
  29
  30 #include <lib-lib/mem.h>
  31 #include <lib-lib/str.h>
  32 #include <lib-lib/ascii.h>
  33
  34 #include <lib-mime/mime.h>
  35
  36 #include "mutt.h"
  37 #include "charset.h"
  38 #include "thread.h"
  39
  40 #include <assert.h>
  41 #include <ctype.h>
  42 #include <errno.h>
  43 #include <stdio.h>
  44 #include <stdlib.h>
  45 #include <string.h>
  46
  47 /* If you are debugging this file, comment out the following line. */
  48 /*#define NDEBUG*/
  49
  50 #ifdef NDEBUG
  51 #define assert(x)
  52 #else
  53 #endif
  54
  55 #define ENCWORD_LEN_MAX 75
  56 #define ENCWORD_LEN_MIN 9       /* m_strlen("=?.?.?.?=") */
  57
  58 #define HSPACE(x) ((x) == '\0' || (x) == ' ' || (x) == '\t')
  59
  60 #define CONTINUATION_BYTE(c) (((c) & 0xc0) == 0x80)
  61
  62 /* converts f of len flen and charset from
  63        into *t of len *tlen and charset to
  64
  65    returns -1 on error
  66    returns number of converted chars from f, see iconv(3)
  67  */
  68 static ssize_t
  69 convert_string(const char *from, const char *f, ssize_t flen,
  70                const char *to,   char **t, ssize_t *tlen)
  71 {
  72     iconv_t cd;
  73     char *buf, *ob;
  74     ssize_t obl, n;
  75
  76     cd = mutt_iconv_open(to, from, 0);
  77
  78     if (cd == (iconv_t)(-1))
  79         return -1;
  80
  81     obl = 4 * flen + 1;
  82     ob = buf = p_new(char, obl);
  83     n = my_iconv(cd, &f, &flen, &ob, &obl);
  84
  85     if (n < 0 || my_iconv(cd, 0, 0, &ob, &obl) < 0) {
  86         int e = errno;
  87         iconv_close(cd);
  88         errno = e;
  89         p_delete(&buf);
  90         return -1;
  91     }
  92     iconv_close(cd);
  93
  94     *ob = '\0';
  95     *tlen = ob - buf;
  96     *t  = buf;
  97
  98     return n;
  99 }
 100
 101 /* choose the shortest encoding for u */
 102 char *mutt_choose_charset(const char *fromcode, const char *charsets,
 103                           char *u, ssize_t ulen, char **dst, ssize_t *dlen)
 104 {
 105     char *res = NULL;
 106     ssize_t reslen = 0;
 107
 108     char *tocode = NULL;
 109     ssize_t bestn = 0;
 110
 111     const char *p = charsets;
 112
 113     while (*p) {
 114         char cset[SHORT_STRING];
 115         const char *q;
 116         char *s;
 117         ssize_t slen, n;
 118
 119         q = strchr(p, ':');
 120         if (q) {
 121             n = m_strncpy(cset, sizeof(cset), p, q - p);
 122             p = ++q;
 123         } else {
 124             n = m_strcpy(cset, sizeof(cset), p);
 125             p += n;
 126         }
 127
 128         if (!n || n > (ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 2 - 12)) {
 129             /* Assume that we never need more than 12 characters of
 130                encoded-text to encode a single character. */
 131             continue;
 132         }
 133
 134         n = convert_string(fromcode, u, ulen, cset, &s, &slen);
 135         if (n < 0)
 136             continue;
 137
 138         if (!tocode || n < bestn) {
 139             m_strreplace(&tocode, cset);
 140             bestn = n;
 141
 142             p_delete(&res);
 143             res = s;
 144             reslen = slen;
 145             if (!bestn)
 146                 break;
 147         } else {
 148             p_delete(&s);
 149         }
 150     }
 151
 152     if (tocode) {
 153         char buf[LONG_STRING];
 154
 155         if (dst && dlen) {
 156             *dst  = res;
 157             *dlen = reslen;
 158         } else {
 159             p_delete(&res);
 160         }
 161
 162         mutt_canonical_charset(buf, sizeof(buf), tocode);
 163         m_strreplace(&tocode, buf);
 164     }
 165
 166     return tocode;
 167 }
 168
 169
 170 /****************************************************************************/
 171 /* Encoding functions                                                       */
 172 /****************************************************************************/
 173
 174 static const char __qp_special[128] = {
 175     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 176     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 177     0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1,
 178     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
 179     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 180     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1,
 181     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 182     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 183 };
 184
 185 typedef size_t (encoder_t)(char *, const char *, ssize_t, const char *);
 186
 187 static size_t
 188 b_encoder(char *s, const char *d, ssize_t dlen, const char *tocode)
 189 {
 190     char *s0 = s;
 191
 192     s += sprintf(s, "=?%s?B?", tocode);
 193
 194     for (;;) {
 195         switch (dlen) {
 196           case 0:
 197             goto done;
 198
 199           case 1:
 200             *s++ = __m_b64chars[(*d >> 2) & 0x3f];
 201             *s++ = __m_b64chars[(*d & 0x03) << 4];
 202             *s++ = '=';
 203             *s++ = '=';
 204             goto done;
 205
 206           case 2:
 207             *s++ = __m_b64chars[(*d >> 2) & 0x3f];
 208             *s++ = __m_b64chars[((*d & 0x03) << 4) | ((d[1] >> 4) & 0x0f)];
 209             *s++ = __m_b64chars[(d[1] & 0x0f) << 2];
 210             *s++ = '=';
 211             goto done;
 212
 213           default:
 214             *s++ = __m_b64chars[(*d >> 2) & 0x3f];
 215             *s++ = __m_b64chars[((*d & 0x03) << 4) | ((d[1] >> 4) & 0x0f)];
 216             *s++ = __m_b64chars[((d[1] & 0x0f) << 2) | ((d[2] >> 6) & 0x03)];
 217             *s++ = __m_b64chars[d[2] & 0x3f];
 218             d += 3, dlen -= 3;
 219         }
 220     }
 221
 222   done:
 223     *s++ = '?';
 224     *s++ = '=';
 225     return s - s0;
 226 }
 227
 228 static size_t
 229 q_encoder(char *s, const char *d, ssize_t dlen, const char *tocode)
 230 {
 231     char *s0 = s;
 232
 233     s += sprintf(s, "=?%s?Q?", tocode);
 234     while (dlen--) {
 235         unsigned char c = *d++;
 236
 237         if (c == ' ') {
 238             *s++ = '_';
 239         } else
 240         if (c & 0x80 || __qp_special[c]) {
 241             *s++ = '=';
 242             *s++ = __m_b36chars_upper[c >> 4];
 243             *s++ = __m_b36chars_upper[c & 0xf];
 244         } else {
 245             *s++ = c;
 246         }
 247     }
 248
 249     *s++ = '?';
 250     *s++ = '=';
 251     return s - s0;
 252 }
 253
 254 /*
 255  * Return 0 if and set *encoder and *wlen if the data (d, dlen) could
 256  * be converted to an encoded word of length *wlen using *encoder.
 257  * Otherwise return an upper bound on the maximum length of the data
 258  * which could be converted.
 259  * The data is converted from fromcode (which must be stateless) to
 260  * tocode, unless fromcode is 0, in which case the data is assumed to
 261  * be already in tocode, which should be 8-bit and stateless.
 262  */
 263 static size_t try_block (const char *d, ssize_t dlen,
 264                          const char *fromcode, const char *tocode,
 265                          encoder_t **encoder, ssize_t *wlen)
 266 {
 267     char buf1[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1];
 268     ssize_t obl = sizeof(buf1) - m_strlen(tocode);
 269     char *ob;
 270
 271     if (fromcode) {
 272         const char *ib = d;
 273         ssize_t ibl = dlen;
 274         iconv_t cd = mutt_iconv_open(tocode, fromcode, 0);
 275
 276         assert (cd != (iconv_t)(-1));
 277
 278         ob = buf1;
 279
 280         if (my_iconv(cd, &ib, &ibl, &ob, &obl) < 0
 281         ||  my_iconv(cd, 0, 0, &ob, &obl) < 0)
 282         {
 283             assert (errno == E2BIG && ib > d);
 284             iconv_close(cd);
 285             return (ib - d == dlen) ? dlen : ib - d + 1;
 286         }
 287         iconv_close (cd);
 288     } else {
 289         if (dlen > obl)
 290             return obl + 1;
 291         memcpy(buf1, d, dlen);
 292         ob = buf1 + dlen;
 293     }
 294
 295     {
 296         const char *p;
 297         int count, len, len_b, len_q;
 298
 299         count = 0;
 300         for (p = buf1; p < ob; p++) {
 301             count += (*p & 0x80 || __qp_special[(int)*p]);
 302         }
 303
 304         len = ENCWORD_LEN_MIN - 2 + m_strlen(tocode);
 305         len_b = len + (((ob - buf1) + 2) / 3) * 4;
 306         len_q = len + (ob - buf1) + 2 * count;
 307
 308         /* Apparently RFC 1468 says to use B encoding for iso-2022-jp. */
 309         if (!ascii_strcasecmp(tocode, "ISO-2022-JP"))
 310             len_q = ENCWORD_LEN_MAX + 1;
 311
 312         if (len_b < len_q && len_b <= ENCWORD_LEN_MAX) {
 313             *encoder = b_encoder;
 314             *wlen = len_b;
 315             return 0;
 316         } else
 317         if (len_q <= ENCWORD_LEN_MAX) {
 318             *encoder = q_encoder;
 319             *wlen = len_q;
 320             return 0;
 321         } else {
 322             return dlen;
 323         }
 324     }
 325 }
 326
 327 /*
 328  * Encode the data (d, dlen) into s using the encoder.
 329  * Return the length of the encoded word.
 330  */
 331 static size_t encode_block (char *s, char *d, ssize_t dlen,
 332                             const char *fromcode, const char *tocode,
 333                             encoder_t *encoder)
 334 {
 335     char buf1[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1];
 336     ssize_t ibl, obl, n1, n2;
 337     iconv_t cd;
 338     const char *ib;
 339     char *ob;
 340
 341     if (fromcode) {
 342         cd = mutt_iconv_open (tocode, fromcode, 0);
 343         assert (cd != (iconv_t) (-1));
 344         ib = d, ibl = dlen, ob = buf1, obl = sizeof (buf1) - m_strlen(tocode);
 345         n1 = my_iconv(cd, &ib, &ibl, &ob, &obl);
 346         n2 = my_iconv(cd, 0, 0, &ob, &obl);
 347         assert (n1 >= 0 && n2 >= 0);
 348         iconv_close (cd);
 349         return (*encoder) (s, buf1, ob - buf1, tocode);
 350     } else {
 351         return (*encoder) (s, d, dlen, tocode);
 352     }
 353 }
 354
 355 /*
 356  * Discover how much of the data (d, dlen) can be converted into
 357  * a single encoded word. Return how much data can be converted,
 358  * and set the length *wlen of the encoded word and *encoder.
 359  * We start in column col, which limits the length of the word.
 360  */
 361 static size_t choose_block(char *d, size_t dlen, int col,
 362                            const char *fromcode, const char *tocode,
 363                            encoder_t **encoder, ssize_t *wlen)
 364 {
 365     size_t n, nn;
 366     int utf8 = fromcode && !ascii_strcasecmp (fromcode, "UTF-8");
 367
 368     n = dlen;
 369     for (;;) {
 370         assert (d + n > d);
 371         nn = try_block (d, n, fromcode, tocode, encoder, wlen);
 372         if (!nn && (col + *wlen <= ENCWORD_LEN_MAX + 1 || n <= 1))
 373             break;
 374         n = (nn ? nn : n) - 1;
 375         assert (n > 0);
 376         if (utf8)
 377             while (n > 1 && CONTINUATION_BYTE (d[n]))
 378                 --n;
 379     }
 380     return n;
 381 }
 382
 383 /*
 384  * Place the result of RFC-2047-encoding (d, dlen) into the dynamically
 385  * allocated buffer (e, elen). The input data is in charset fromcode
 386  * and is converted into a charset chosen from charsets.
 387  * Return 1 if the conversion to UTF-8 failed, 2 if conversion from UTF-8
 388  * failed, otherwise 0. If conversion failed, fromcode is assumed to be
 389  * compatible with us-ascii and the original data is used.
 390  * The input data is assumed to be a single line starting at column col;
 391  * if col is non-zero, the preceding character was a space.
 392  */
 393 static int rfc2047_encode (const char *d, ssize_t dlen, int col,
 394                            const char *fromcode, const char *charsets,
 395                            char **e, ssize_t *elen, const char *specials)
 396 {
 397     int ret = 0;
 398     char *buf;
 399     ssize_t bufpos, buflen;
 400     char *u, *t0, *t1, *t;
 401     char *s0, *s1;
 402     ssize_t ulen, r, n, wlen;
 403     encoder_t *encoder;
 404     char *tocode1 = 0;
 405     const char *tocode;
 406     const char *icode = "UTF-8";
 407
 408     /* Try to convert to UTF-8. */
 409     if (convert_string(fromcode, d, dlen, icode, &u, &ulen)) {
 410         ret = 1;
 411         icode = 0;
 412         u = p_dupstr(d, ulen = dlen);
 413     }
 414
 415     /* Find earliest and latest things we must encode. */
 416     s0 = s1 = t0 = t1 = 0;
 417     for (t = u; t < u + ulen; t++) {
 418         if ((*t & 0x80) ||
 419             (*t == '=' && t[1] == '?' && (t == u || HSPACE (*(t - 1))))) {
 420             if (!t0)
 421                 t0 = t;
 422             t1 = t;
 423         }
 424         else if (specials && strchr (specials, *t)) {
 425             if (!s0)
 426                 s0 = t;
 427             s1 = t;
 428         }
 429     }
 430
 431     /* If we have something to encode, include RFC822 specials */
 432     if (t0 && s0 && s0 < t0)
 433         t0 = s0;
 434     if (t1 && s1 && s1 > t1)
 435         t1 = s1;
 436
 437     if (!t0) {
 438         /* No encoding is required. */
 439         *e = u;
 440         *elen = ulen;
 441         return ret;
 442     }
 443
 444     /* Choose target charset. */
 445     tocode = fromcode;
 446     if (icode) {
 447         if ((tocode1 = mutt_choose_charset(icode, charsets, u, ulen,
 448                                            NULL, NULL)))
 449             tocode = tocode1;
 450         else
 451             ret = 2, icode = 0;
 452     }
 453
 454     /* Hack to avoid labelling 8-bit data as us-ascii. */
 455     if (!icode && mutt_is_us_ascii (tocode))
 456         tocode = "unknown-8bit";
 457
 458     /* Adjust t0 for maximum length of line. */
 459     t = u + (ENCWORD_LEN_MAX + 1) - col - ENCWORD_LEN_MIN;
 460     if (t < u)
 461         t = u;
 462     if (t < t0)
 463         t0 = t;
 464
 465
 466     /* Adjust t0 until we can encode a character after a space. */
 467     for (; t0 > u; t0--) {
 468         if (!HSPACE (*(t0 - 1)))
 469             continue;
 470         t = t0 + 1;
 471         if (icode)
 472             while (t < u + ulen && CONTINUATION_BYTE (*t))
 473                 ++t;
 474         if (!try_block (t0, t - t0, icode, tocode, &encoder, &wlen) &&
 475             col + (t0 - u) + wlen <= ENCWORD_LEN_MAX + 1)
 476             break;
 477     }
 478
 479     /* Adjust t1 until we can encode a character before a space. */
 480     for (; t1 < u + ulen; t1++) {
 481         if (!HSPACE (*t1))
 482             continue;
 483         t = t1 - 1;
 484         if (icode)
 485             while (CONTINUATION_BYTE (*t))
 486                 --t;
 487         if (!try_block (t, t1 - t, icode, tocode, &encoder, &wlen) &&
 488             1 + wlen + (u + ulen - t1) <= ENCWORD_LEN_MAX + 1)
 489             break;
 490     }
 491
 492     /* We shall encode the region [t0,t1). */
 493
 494     /* Initialise the output buffer with the us-ascii prefix. */
 495     buflen = 2 * ulen;
 496     buf = p_new(char, buflen);
 497     bufpos = t0 - u;
 498     memcpy (buf, u, t0 - u);
 499
 500     col += t0 - u;
 501
 502     t = t0;
 503     for (;;) {
 504         /* Find how much we can encode. */
 505         n = choose_block (t, t1 - t, col, icode, tocode, &encoder, &wlen);
 506         if (n == t1 - t) {
 507             /* See if we can fit the us-ascii suffix, too. */
 508             if (col + wlen + (u + ulen - t1) <= ENCWORD_LEN_MAX + 1)
 509                 break;
 510             n = t1 - t - 1;
 511             if (icode)
 512                 while (CONTINUATION_BYTE (t[n]))
 513                     --n;
 514             assert (t + n >= t);
 515             if (!n) {
 516                 /* This should only happen in the really stupid case where the
 517                    only word that needs encoding is one character long, but
 518                    there is too much us-ascii stuff after it to use a single
 519                    encoded word. We add the next word to the encoded region
 520                    and try again. */
 521                 assert (t1 < u + ulen);
 522                 for (t1++; t1 < u + ulen && !HSPACE (*t1); t1++);
 523                 continue;
 524             }
 525             n = choose_block (t, n, col, icode, tocode, &encoder, &wlen);
 526         }
 527
 528         /* Add to output buffer. */
 529 #define LINEBREAK "\n\t"
 530         if (bufpos + wlen + m_strlen(LINEBREAK) > buflen) {
 531             buflen = bufpos + wlen + m_strlen(LINEBREAK);
 532             p_realloc(&buf, buflen);
 533         }
 534         r = encode_block (buf + bufpos, t, n, icode, tocode, encoder);
 535         assert (r == wlen);
 536         bufpos += wlen;
 537         memcpy (buf + bufpos, LINEBREAK, m_strlen(LINEBREAK));
 538         bufpos += m_strlen(LINEBREAK);
 539 #undef LINEBREAK
 540
 541         col = 1;
 542
 543         t += n;
 544     }
 545
 546     /* Add last encoded word and us-ascii suffix to buffer. */
 547     buflen = bufpos + wlen + (u + ulen - t1);
 548     p_realloc(&buf, buflen + 1);
 549     r = encode_block (buf + bufpos, t, t1 - t, icode, tocode, encoder);
 550     assert (r == wlen);
 551     bufpos += wlen;
 552     memcpy (buf + bufpos, t1, u + ulen - t1);
 553
 554     p_delete(&tocode1);
 555     p_delete(&u);
 556
 557     buf[buflen] = '\0';
 558
 559     *e = buf;
 560     *elen = buflen + 1;
 561     return ret;
 562 }
 563
 564 void _rfc2047_encode_string (char **pd, int encode_specials, int col)
 565 {
 566     char *e;
 567     ssize_t elen;
 568     const char *charsets;
 569
 570     if (!Charset || !*pd)
 571         return;
 572
 573     charsets = SendCharset;
 574     if (!charsets || !*charsets)
 575         charsets = "UTF-8";
 576
 577     rfc2047_encode (*pd, m_strlen(*pd), col,
 578                     Charset, charsets, &e, &elen,
 579                     encode_specials ? RFC822Specials : NULL);
 580
 581     p_delete(pd);
 582     *pd = e;
 583 }
 584
 585 void rfc2047_encode_string(char **pd) {
 586     _rfc2047_encode_string(pd, 0, 32);
 587 }
 588
 589 void rfc2047_encode_adrlist (address_t * addr, const char *tag)
 590 {
 591     address_t *ptr = addr;
 592     int col = tag ? m_strlen(tag) + 2 : 32;
 593
 594     while (ptr) {
 595         if (ptr->personal)
 596             _rfc2047_encode_string (&ptr->personal, 1, col);
 597         ptr = ptr->next;
 598     }
 599 }
 600
 601 static int rfc2047_decode_word (char *d, const char *s, size_t len)
 602 {
 603     const char *pp, *pp1;
 604     char *pd, *d0;
 605     const char *t, *t1;
 606     int enc = 0, count = 0;
 607     char *charset = NULL;
 608
 609     pd = d0 = p_new(char, m_strlen(s));
 610
 611     for (pp = s; (pp1 = strchr (pp, '?')); pp = pp1 + 1) {
 612         count++;
 613         switch (count) {
 614           case 2:
 615             /* ignore language specification a la RFC 2231 */
 616             t = pp1;
 617             if ((t1 = memchr (pp, '*', t - pp)))
 618                 t = t1;
 619             charset = p_dupstr(pp, t - pp);
 620             break;
 621           case 3:
 622             if (toupper ((unsigned char) *pp) == 'Q')
 623                 enc = ENCQUOTEDPRINTABLE;
 624             else if (toupper ((unsigned char) *pp) == 'B')
 625                 enc = ENCBASE64;
 626             else {
 627                 p_delete(&charset);
 628                 p_delete(&d0);
 629                 return (-1);
 630             }
 631             break;
 632           case 4:
 633             if (enc == ENCQUOTEDPRINTABLE) {
 634                 for (; pp < pp1; pp++) {
 635                     if (*pp == '_')
 636                         *pd++ = ' ';
 637                     else if (*pp == '=' && hexval(pp[1]) >= 0 && hexval(pp[2]) >= 0) {
 638                         *pd++ = (hexval (pp[1]) << 4) | hexval (pp[2]);
 639                         pp += 2;
 640                     }
 641                     else
 642                         *pd++ = *pp;
 643                 }
 644                 *pd = 0;
 645             }
 646             else if (enc == ENCBASE64) {
 647                 int c, b = 0, k = 0;
 648
 649                 for (; pp < pp1; pp++) {
 650                     if (*pp == '=')
 651                         break;
 652                     if ((c = base64val(*pp)) < 0)
 653                         continue;
 654                     if (k + 6 >= 8) {
 655                         k -= 2;
 656                         *pd++ = b | (c >> k);
 657                         b = c << (8 - k);
 658                     }
 659                     else {
 660                         b |= c << (k + 2);
 661                         k += 6;
 662                     }
 663                 }
 664                 *pd = 0;
 665             }
 666             break;
 667         }
 668     }
 669
 670     if (charset)
 671         mutt_convert_string (&d0, charset, Charset, M_ICONV_HOOK_FROM);
 672     m_strcpy(d, len, d0);
 673     p_delete(&charset);
 674     p_delete(&d0);
 675     return (0);
 676 }
 677
 678 /*
 679  * Find the start and end of the first encoded word in the string.
 680  * We use the grammar in section 2 of RFC 2047, but the "encoding"
 681  * must be B or Q. Also, we don't require the encoded word to be
 682  * separated by linear-white-space (section 5(1)).
 683  */
 684 static const char *find_encoded_word (const char *s, const char **x)
 685 {
 686     const char *p, *q;
 687
 688     q = s;
 689     while ((p = strstr (q, "=?"))) {
 690         for (q = p + 2;
 691              0x20 < *q && *q < 0x7f && !strchr ("()<>@,;:\"/[]?.=", *q); q++);
 692         if (q[0] != '?' || !strchr ("BbQq", q[1]) || q[2] != '?')
 693             continue;
 694         for (q = q + 3; 0x20 <= *q && *q < 0x7f && *q != '?'; q++);
 695         if (q[0] != '?' || q[1] != '=') {
 696             --q;
 697             continue;
 698         }
 699
 700         *x = q + 2;
 701         return p;
 702     }
 703
 704     return 0;
 705 }
 706
 707 /* return length of linear white space */
 708 static size_t lwslen (const char *s, size_t n)
 709 {
 710     const char *p = s;
 711     size_t len = n;
 712
 713     if (n <= 0)
 714         return 0;
 715
 716     for (; p < s + n; p++)
 717         if (!strchr (" \t\r\n", *p)) {
 718             len = (size_t) (p - s);
 719             break;
 720         }
 721     if (strchr ("\r\n", *(p - 1)))        /* LWS doesn't end with CRLF */
 722         len = (size_t) 0;
 723     return len;
 724 }
 725
 726 /* return length of linear white space : reverse */
 727 static size_t lwsrlen (const char *s, size_t n)
 728 {
 729     const char *p = s + n - 1;
 730     size_t len = n;
 731
 732     if (n <= 0)
 733         return 0;
 734
 735     if (strchr ("\r\n", *p))      /* LWS doesn't end with CRLF */
 736         return (size_t) 0;
 737
 738     for (; p >= s; p--)
 739         if (!strchr (" \t\r\n", *p)) {
 740             len = (size_t) (s + n - 1 - p);
 741             break;
 742         }
 743     return len;
 744 }
 745
 746 /* try to decode anything that looks like a valid RFC2047 encoded
 747  * header field, ignoring RFC822 parsing rules
 748  */
 749 void rfc2047_decode (char **pd)
 750 {
 751     const char *p, *q;
 752     size_t m, n;
 753     int found_encoded = 0;
 754     char *d0, *d;
 755     const char *s = *pd;
 756     size_t dlen;
 757
 758     if (!s || !*s)
 759         return;
 760
 761     dlen = 4 * m_strlen(s);        /* should be enough */
 762     d = d0 = p_new(char, dlen + 1);
 763
 764     while (*s && dlen > 0) {
 765         if (!(p = find_encoded_word (s, &q))) {
 766             /* no encoded words */
 767             if (!option (OPTSTRICTMIME)) {
 768                 n = m_strlen(s);
 769                 if (found_encoded && (m = lwslen (s, n)) != 0) {
 770                     if (m != n)
 771                         *d = ' ', d++, dlen--;
 772                     n -= m, s += m;
 773                 }
 774                 if (ascii_strcasecmp (AssumedCharset, "us-ascii")) {
 775                     char *t;
 776                     ssize_t tlen;
 777
 778                     t = p_dupstr(s, n);
 779                     if (mutt_convert_nonmime_string (&t) == 0) {
 780                         tlen = m_strlen(t);
 781                         strncpy (d, t, tlen);
 782                         d += tlen;
 783                     }
 784                     else {
 785                         strncpy (d, s, n);
 786                         d += n;
 787                     }
 788                     p_delete(&t);
 789                     break;
 790                 }
 791             }
 792             strncpy (d, s, dlen);
 793             d += dlen;
 794             break;
 795         }
 796
 797         if (p != s) {
 798             n = (p - s);
 799             /* ignore spaces between encoded words
 800              * and linear white spaces between encoded word and *text */
 801             if (!option (OPTSTRICTMIME)) {
 802                 if (found_encoded && (m = lwslen (s, n)) != 0) {
 803                     if (m != n)
 804                         *d = ' ', d++, dlen--;
 805                     n -= m, s += m;
 806                 }
 807
 808                 if ((m = n - lwsrlen (s, n)) != 0) {
 809                     if (m > dlen)
 810                         m = dlen;
 811                     memcpy (d, s, m);
 812                     d += m;
 813                     dlen -= m;
 814                     if (m != n)
 815                         *d = ' ', d++, dlen--;
 816                 }
 817             }
 818             else if (!found_encoded || strspn (s, " \t\r\n") != n) {
 819                 if (n > dlen)
 820                     n = dlen;
 821                 memcpy (d, s, n);
 822                 d += n;
 823                 dlen -= n;
 824             }
 825         }
 826
 827         rfc2047_decode_word (d, p, dlen);
 828         found_encoded = 1;
 829         s = q;
 830         n = m_strlen(d);
 831         dlen -= n;
 832         d += n;
 833     }
 834     *d = 0;
 835
 836     p_delete(pd);
 837     *pd = d0;
 838     str_adjust (pd);
 839 }
 840
 841 void rfc2047_decode_adrlist(address_t *a)
 842 {
 843     while (a) {
 844         if (a->personal)
 845             rfc2047_decode(&a->personal);
 846         a = a->next;
 847     }
 848 }
 849
 850 void rfc2047_decode_envelope(ENVELOPE* e)
 851 {
 852     assert (e);
 853
 854     /* do RFC2047 decoding */
 855     rfc2047_decode_adrlist(e->from);
 856     rfc2047_decode_adrlist(e->to);
 857     rfc2047_decode_adrlist(e->cc);
 858     rfc2047_decode_adrlist(e->bcc);
 859     rfc2047_decode_adrlist(e->reply_to);
 860     rfc2047_decode_adrlist(e->mail_followup_to);
 861     rfc2047_decode_adrlist(e->return_path);
 862     rfc2047_decode_adrlist(e->sender);
 863
 864     if (e->subject) {
 865         rfc2047_decode(&e->subject);
 866         mutt_adjust_subject(e);
 867     }
 868 }