lib-mime/rfc2047.c

   1 /*
   2  *  This program is free software; you can redistribute it and/or modify
   3  *  it under the terms of the GNU General Public License as published by
   4  *  the Free Software Foundation; either version 2 of the License, or (at
   5  *  your option) any later version.
   6  *
   7  *  This program is distributed in the hope that it will be useful, but
   8  *  WITHOUT ANY WARRANTY; without even the implied warranty of
   9  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  10  *  General Public License for more details.
  11  *
  12  *  You should have received a copy of the GNU General Public License
  13  *  along with this program; if not, write to the Free Software
  14  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  15  *  MA 02110-1301, USA.
  16  *
  17  *  Copyright © 2006 Pierre Habouzit
  18  */
  19
  20 /*
  21  * Copyright notice from original mutt:
  22  * Copyright (C) 1996-2000 Michael R. Elkins <me@mutt.org>
  23  * Copyright (C) 2000-2001 Edmund Grimley Evans <edmundo@rano.org>
  24  *
  25  * This file is part of mutt-ng, see http://www.muttng.org/.
  26  * It's licensed under the GNU General Public License,
  27  * please see the file GPL in the top level source directory.
  28  */
  29
  30 #include <lib-lib/mem.h>
  31 #include <lib-lib/str.h>
  32 #include <lib-lib/ascii.h>
  33
  34 #include <lib-mime/mime.h>
  35
  36 #include "charset.h"
  37 #include "thread.h"
  38
  39 #include <assert.h>
  40 #include <ctype.h>
  41 #include <errno.h>
  42 #include <stdio.h>
  43 #include <stdlib.h>
  44 #include <string.h>
  45
  46 /* If you are debugging this file, comment out the following line. */
  47 /*#define NDEBUG*/
  48
  49 #ifdef NDEBUG
  50 #define assert(x)
  51 #else
  52 #endif
  53
  54 #define ENCWORD_LEN_MAX 75
  55 #define ENCWORD_LEN_MIN 9       /* m_strlen("=?.?.?.?=") */
  56
  57 #define HSPACE(x) ((x) == '\0' || (x) == ' ' || (x) == '\t')
  58 #define CONTINUATION_BYTE(c) (((c) & 0xc0) == 0x80)
  59
  60 /* converts f of len flen and charset from
  61        into *t of len *tlen and charset to
  62
  63    returns -1 on error
  64    returns number of converted chars from f, see iconv(3)
  65  */
  66 static ssize_t
  67 convert_string(const char *from, const char *f, ssize_t flen,
  68                const char *to,   char **t, ssize_t *tlen)
  69 {
  70     iconv_t cd;
  71     char *buf, *ob;
  72     ssize_t obl, n;
  73
  74     cd = mutt_iconv_open(to, from, 0);
  75
  76     if (cd == (iconv_t)(-1))
  77         return -1;
  78
  79     obl = 4 * flen + 1;
  80     ob = buf = p_new(char, obl);
  81     n = my_iconv(cd, &f, &flen, &ob, &obl);
  82
  83     if (n < 0 || my_iconv(cd, 0, 0, &ob, &obl) < 0) {
  84         int e = errno;
  85         iconv_close(cd);
  86         errno = e;
  87         p_delete(&buf);
  88         return -1;
  89     }
  90     iconv_close(cd);
  91
  92     *ob = '\0';
  93     *tlen = ob - buf;
  94     *t  = buf;
  95
  96     return n;
  97 }
  98
  99 /* choose the shortest encoding for u */
 100 char *mutt_choose_charset(const char *fromcode, const char *charsets,
 101                           char *u, ssize_t ulen, char **dst, ssize_t *dlen)
 102 {
 103     char *res = NULL;
 104     ssize_t reslen = 0;
 105
 106     char *tocode = NULL;
 107     ssize_t bestn = 0;
 108
 109     const char *p = charsets;
 110
 111     while (*p) {
 112         char cset[SHORT_STRING];
 113         const char *q;
 114         char *s;
 115         ssize_t slen, n;
 116
 117         q = strchr(p, ':');
 118         if (q) {
 119             n = m_strncpy(cset, sizeof(cset), p, q - p);
 120             p = ++q;
 121         } else {
 122             n = m_strcpy(cset, sizeof(cset), p);
 123             p += n;
 124         }
 125
 126         if (!n || n > (ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 2 - 12)) {
 127             /* Assume that we never need more than 12 characters of
 128                encoded-text to encode a single character. */
 129             continue;
 130         }
 131
 132         n = convert_string(fromcode, u, ulen, cset, &s, &slen);
 133         if (n < 0)
 134             continue;
 135
 136         if (!tocode || n < bestn) {
 137             m_strreplace(&tocode, cset);
 138             bestn = n;
 139
 140             p_delete(&res);
 141             res = s;
 142             reslen = slen;
 143             if (!bestn)
 144                 break;
 145         } else {
 146             p_delete(&s);
 147         }
 148     }
 149
 150     if (tocode) {
 151         char buf[LONG_STRING];
 152
 153         if (dst && dlen) {
 154             *dst  = res;
 155             *dlen = reslen;
 156         } else {
 157             p_delete(&res);
 158         }
 159
 160         mutt_canonical_charset(buf, sizeof(buf), tocode);
 161         m_strreplace(&tocode, buf);
 162     }
 163
 164     return tocode;
 165 }
 166
 167
 168 /****************************************************************************/
 169 /* Encoding functions                                                       */
 170 /****************************************************************************/
 171
 172 static const char __qp_special[128] = {
 173     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 174     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 175     0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1,
 176     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
 177     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 178     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1,
 179     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 180     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 181 };
 182
 183 typedef size_t (encoder_t)(char *, const char *, ssize_t, const char *);
 184
 185 static size_t
 186 b_encoder(char *s, const char *d, ssize_t dlen, const char *tocode)
 187 {
 188     char *s0 = s;
 189
 190     s += sprintf(s, "=?%s?B?", tocode);
 191
 192     for (;;) {
 193         switch (dlen) {
 194           case 0:
 195             goto done;
 196
 197           case 1:
 198             *s++ = __m_b64chars[(*d >> 2) & 0x3f];
 199             *s++ = __m_b64chars[(*d & 0x03) << 4];
 200             *s++ = '=';
 201             *s++ = '=';
 202             goto done;
 203
 204           case 2:
 205             *s++ = __m_b64chars[(*d >> 2) & 0x3f];
 206             *s++ = __m_b64chars[((*d & 0x03) << 4) | ((d[1] >> 4) & 0x0f)];
 207             *s++ = __m_b64chars[(d[1] & 0x0f) << 2];
 208             *s++ = '=';
 209             goto done;
 210
 211           default:
 212             *s++ = __m_b64chars[(*d >> 2) & 0x3f];
 213             *s++ = __m_b64chars[((*d & 0x03) << 4) | ((d[1] >> 4) & 0x0f)];
 214             *s++ = __m_b64chars[((d[1] & 0x0f) << 2) | ((d[2] >> 6) & 0x03)];
 215             *s++ = __m_b64chars[d[2] & 0x3f];
 216             d += 3, dlen -= 3;
 217         }
 218     }
 219
 220   done:
 221     *s++ = '?';
 222     *s++ = '=';
 223     return s - s0;
 224 }
 225
 226 static size_t
 227 q_encoder(char *s, const char *d, ssize_t dlen, const char *tocode)
 228 {
 229     char *s0 = s;
 230
 231     s += sprintf(s, "=?%s?Q?", tocode);
 232     while (dlen--) {
 233         unsigned char c = *d++;
 234
 235         if (c == ' ') {
 236             *s++ = '_';
 237         } else
 238         if (c & 0x80 || __qp_special[c]) {
 239             *s++ = '=';
 240             *s++ = __m_b36chars_upper[c >> 4];
 241             *s++ = __m_b36chars_upper[c & 0xf];
 242         } else {
 243             *s++ = c;
 244         }
 245     }
 246
 247     *s++ = '?';
 248     *s++ = '=';
 249     return s - s0;
 250 }
 251
 252 /*
 253  * Return 0 if and set *encoder and *wlen if the data (d, dlen) could
 254  * be converted to an encoded word of length *wlen using *encoder.
 255  * Otherwise return an upper bound on the maximum length of the data
 256  * which could be converted.
 257  * The data is converted from fromcode (which must be stateless) to
 258  * tocode, unless fromcode is 0, in which case the data is assumed to
 259  * be already in tocode, which should be 8-bit and stateless.
 260  */
 261 static size_t try_block(const char *d, ssize_t dlen,
 262                         const char *fromcode, const char *tocode,
 263                         encoder_t **encoder, ssize_t *wlen)
 264 {
 265     char buf1[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1];
 266     ssize_t obl = sizeof(buf1) - m_strlen(tocode);
 267     char *ob;
 268
 269     if (fromcode) {
 270         const char *ib = d;
 271         ssize_t ibl = dlen;
 272         iconv_t cd = mutt_iconv_open(tocode, fromcode, 0);
 273
 274         assert (cd != (iconv_t)(-1));
 275
 276         ob = buf1;
 277
 278         if (my_iconv(cd, &ib, &ibl, &ob, &obl) < 0
 279         ||  my_iconv(cd, 0, 0, &ob, &obl) < 0)
 280         {
 281             assert (errno == E2BIG && ib > d);
 282             iconv_close(cd);
 283             return (ib - d == dlen) ? dlen : ib - d + 1;
 284         }
 285         iconv_close (cd);
 286     } else {
 287         if (dlen > obl)
 288             return obl + 1;
 289         memcpy(buf1, d, dlen);
 290         ob = buf1 + dlen;
 291     }
 292
 293     {
 294         const char *p;
 295         int count, len, len_b, len_q;
 296
 297         count = 0;
 298         for (p = buf1; p < ob; p++) {
 299             count += (*p & 0x80 || __qp_special[(int)*p]);
 300         }
 301
 302         len = ENCWORD_LEN_MIN - 2 + m_strlen(tocode);
 303         len_b = len + (((ob - buf1) + 2) / 3) * 4;
 304         len_q = len + (ob - buf1) + 2 * count;
 305
 306         /* Apparently RFC 1468 says to use B encoding for iso-2022-jp. */
 307         if (!ascii_strcasecmp(tocode, "ISO-2022-JP"))
 308             len_q = ENCWORD_LEN_MAX + 1;
 309
 310         if (len_b < len_q && len_b <= ENCWORD_LEN_MAX) {
 311             *encoder = b_encoder;
 312             *wlen = len_b;
 313             return 0;
 314         } else
 315         if (len_q <= ENCWORD_LEN_MAX) {
 316             *encoder = q_encoder;
 317             *wlen = len_q;
 318             return 0;
 319         } else {
 320             return dlen;
 321         }
 322     }
 323 }
 324
 325 /*
 326  * Encode the data (d, dlen) into s using the encoder.
 327  * Return the length of the encoded word.
 328  */
 329 static size_t
 330 encode_block(char *s, char *d, ssize_t dlen,
 331              const char *fromcode, const char *tocode, encoder_t *encoder)
 332 {
 333     char buf1[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1];
 334     ssize_t ibl, obl, n1, n2;
 335     iconv_t cd;
 336     const char *ib;
 337     char *ob;
 338
 339     if (fromcode) {
 340         cd = mutt_iconv_open(tocode, fromcode, 0);
 341         assert (cd != (iconv_t) (-1));
 342         ib = d, ibl = dlen, ob = buf1, obl = sizeof(buf1) - m_strlen(tocode);
 343         n1 = my_iconv(cd, &ib, &ibl, &ob, &obl);
 344         n2 = my_iconv(cd, 0, 0, &ob, &obl);
 345         assert (n1 >= 0 && n2 >= 0);
 346         iconv_close (cd);
 347         return (*encoder)(s, buf1, ob - buf1, tocode);
 348     } else {
 349         return (*encoder)(s, d, dlen, tocode);
 350     }
 351 }
 352
 353 /*
 354  * Discover how much of the data (d, dlen) can be converted into
 355  * a single encoded word. Return how much data can be converted,
 356  * and set the length *wlen of the encoded word and *encoder.
 357  * We start in column col, which limits the length of the word.
 358  */
 359 static size_t choose_block(char *d, size_t dlen, int col,
 360                            const char *fromcode, const char *tocode,
 361                            encoder_t **encoder, ssize_t *wlen)
 362 {
 363     size_t n, nn;
 364     int utf8 = fromcode && !ascii_strcasecmp(fromcode, "UTF-8");
 365
 366     n = dlen;
 367     for (;;) {
 368         assert (d + n > d);
 369         nn = try_block(d, n, fromcode, tocode, encoder, wlen);
 370         if (!nn && (col + *wlen <= ENCWORD_LEN_MAX + 1 || n <= 1))
 371             break;
 372         n = (nn ? nn : n) - 1;
 373         assert (n > 0);
 374         if (utf8) {
 375             while (n > 1 && CONTINUATION_BYTE(d[n]))
 376                 --n;
 377         }
 378     }
 379     return n;
 380 }
 381
 382 /*
 383  * Place the result of RFC-2047-encoding (d, dlen) into the dynamically
 384  * allocated buffer (e, elen). The input data is in charset fromcode
 385  * and is converted into a charset chosen from charsets.
 386  * Return 1 if the conversion to UTF-8 failed, 2 if conversion from UTF-8
 387  * failed, otherwise 0. If conversion failed, fromcode is assumed to be
 388  * compatible with us-ascii and the original data is used.
 389  * The input data is assumed to be a single line starting at column col;
 390  * if col is non-zero, the preceding character was a space.
 391  */
 392 /*** XXX: simplify that one day ***/
 393 static int rfc2047_encode(const char *d, ssize_t dlen, int col,
 394                           const char *fromcode, const char *charsets,
 395                           char **e, ssize_t *elen, const char *specials)
 396 {
 397     int ret = 0;
 398     char *buf;
 399     ssize_t bufpos, buflen;
 400     char *u, *t;
 401     char *s0, *s1, *t0, *t1;
 402     char *tocode1 = 0;
 403     const char *tocode;
 404     const char *icode = "UTF-8";
 405     ssize_t ulen, r, n, wlen;
 406     encoder_t *encoder;
 407
 408     /* Try to convert to UTF-8. */
 409     if (convert_string(fromcode, d, dlen, icode, &u, &ulen)) {
 410         ret = 1;
 411         icode = NULL;
 412         u = p_dupstr(d, ulen = dlen);
 413     }
 414
 415     /* Find earliest and latest things we must encode. */
 416     s0 = s1 = t0 = t1 = NULL;
 417     for (t = u; t < u + ulen; t++) {
 418         if ((*t & 0x80) ||
 419             (*t == '=' && t[1] == '?' && (t == u || HSPACE (*(t - 1))))) {
 420             if (!t0)
 421                 t0 = t;
 422             t1 = t;
 423         }
 424         else if (specials && strchr (specials, *t)) {
 425             if (!s0)
 426                 s0 = t;
 427             s1 = t;
 428         }
 429     }
 430
 431     /* If we have something to encode, include RFC822 specials */
 432     if (t0 && s0 && s0 < t0)
 433         t0 = s0;
 434     if (t1 && s1 && s1 > t1)
 435         t1 = s1;
 436
 437     if (!t0) {
 438         /* No encoding is required. */
 439         *e = u;
 440         *elen = ulen;
 441         return ret;
 442     }
 443
 444     /* Choose target charset. */
 445     tocode = fromcode;
 446     if (icode) {
 447         if ((tocode1 = mutt_choose_charset(icode, charsets, u, ulen,
 448                                            NULL, NULL)))
 449             tocode = tocode1;
 450         else
 451             ret = 2, icode = 0;
 452     }
 453
 454     /* Hack to avoid labelling 8-bit data as us-ascii. */
 455     if (!icode && mutt_is_us_ascii(tocode))
 456         tocode = "unknown-8bit";
 457
 458     /* Adjust t0 for maximum length of line. */
 459     t = u + (ENCWORD_LEN_MAX + 1) - col - ENCWORD_LEN_MIN;
 460     if (t < u)
 461         t = u;
 462     if (t < t0)
 463         t0 = t;
 464
 465
 466     /* Adjust t0 until we can encode a character after a space. */
 467     for (; t0 > u; t0--) {
 468         if (!HSPACE(t0[-1]))
 469             continue;
 470         t = t0 + 1;
 471         if (icode) {
 472             while (t < u + ulen && CONTINUATION_BYTE(*t))
 473                 ++t;
 474         }
 475         if (!try_block(t0, t - t0, icode, tocode, &encoder, &wlen)
 476         &&  col + (t0 - u) + wlen <= ENCWORD_LEN_MAX + 1)
 477             break;
 478     }
 479
 480     /* Adjust t1 until we can encode a character before a space. */
 481     for (; t1 < u + ulen; t1++) {
 482         if (!HSPACE(*t1))
 483             continue;
 484         t = t1 - 1;
 485         if (icode) {
 486             while (CONTINUATION_BYTE(*t))
 487                 --t;
 488         }
 489         if (!try_block (t, t1 - t, icode, tocode, &encoder, &wlen)
 490         &&  1 + wlen + (u + ulen - t1) <= ENCWORD_LEN_MAX + 1)
 491             break;
 492     }
 493
 494     /* We shall encode the region [t0,t1). */
 495
 496     /* Initialise the output buffer with the us-ascii prefix. */
 497     buflen = 2 * ulen;
 498     buf = p_new(char, buflen);
 499     bufpos = t0 - u;
 500     memcpy(buf, u, t0 - u);
 501
 502     col += t0 - u;
 503
 504     t = t0;
 505     for (;;) {
 506         /* Find how much we can encode. */
 507         n = choose_block (t, t1 - t, col, icode, tocode, &encoder, &wlen);
 508         if (n == t1 - t) {
 509             /* See if we can fit the us-ascii suffix, too. */
 510             if (col + wlen + (u + ulen - t1) <= ENCWORD_LEN_MAX + 1)
 511                 break;
 512             n = t1 - t - 1;
 513             if (icode)
 514                 while (CONTINUATION_BYTE (t[n]))
 515                     --n;
 516             assert (t + n >= t);
 517             if (!n) {
 518                 /* This should only happen in the really stupid case where the
 519                    only word that needs encoding is one character long, but
 520                    there is too much us-ascii stuff after it to use a single
 521                    encoded word. We add the next word to the encoded region
 522                    and try again. */
 523                 assert (t1 < u + ulen);
 524                 for (t1++; t1 < u + ulen && !HSPACE (*t1); t1++);
 525                 continue;
 526             }
 527             n = choose_block (t, n, col, icode, tocode, &encoder, &wlen);
 528         }
 529
 530         /* Add to output buffer. */
 531 #define LINEBREAK "\n\t"
 532         if (bufpos + wlen + 2 > buflen) {
 533             buflen = bufpos + wlen + 2;
 534             p_realloc(&buf, buflen);
 535         }
 536         r = encode_block (buf + bufpos, t, n, icode, tocode, encoder);
 537         assert (r == wlen);
 538         bufpos += wlen;
 539         memcpy (buf + bufpos, LINEBREAK, m_strlen(LINEBREAK));
 540         bufpos += m_strlen(LINEBREAK);
 541 #undef LINEBREAK
 542
 543         col = 1;
 544
 545         t += n;
 546     }
 547
 548     /* Add last encoded word and us-ascii suffix to buffer. */
 549     buflen = bufpos + wlen + (u + ulen - t1);
 550     p_realloc(&buf, buflen + 1);
 551     r = encode_block (buf + bufpos, t, t1 - t, icode, tocode, encoder);
 552     assert (r == wlen);
 553     bufpos += wlen;
 554     memcpy (buf + bufpos, t1, u + ulen - t1);
 555
 556     p_delete(&tocode1);
 557     p_delete(&u);
 558
 559     buf[buflen] = '\0';
 560
 561     *e = buf;
 562     *elen = buflen + 1;
 563     return ret;
 564 }
 565
 566
 567 void _rfc2047_encode_string(char **pd, int encode_specials, int col)
 568 {
 569     char *e;
 570     ssize_t elen;
 571     const char *charsets;
 572
 573     if (!Charset || !*pd)
 574         return;
 575
 576     charsets = m_strisempty(SendCharset) ? "UTF-8" : SendCharset;
 577
 578     rfc2047_encode(*pd, m_strlen(*pd), col,
 579                    Charset, charsets, &e, &elen,
 580                    encode_specials ? RFC822Specials : NULL);
 581
 582     p_delete(pd);
 583     *pd = e;
 584 }
 585
 586 void rfc2047_encode_string(char **pd) {
 587     _rfc2047_encode_string(pd, 0, 32);
 588 }
 589
 590 void rfc2047_encode_adrlist(address_t *addr, const char *tag)
 591 {
 592     address_t *ptr = addr;
 593     int col = tag ? m_strlen(tag) + 2 : 32;
 594
 595     while (ptr) {
 596         if (ptr->personal)
 597             _rfc2047_encode_string(&ptr->personal, 1, col);
 598         ptr = ptr->next;
 599     }
 600 }
 601
 602
 603 /****************************************************************************/
 604 /* Decoding functions                                                       */
 605 /****************************************************************************/
 606
 607 /* decode one word into d[len] */
 608 static int rfc2047_decode_word(char *d, size_t len, const char *s)
 609 {
 610     const char *p, *eotoken;
 611     char *charset = NULL;
 612     int enc = 0, count = 0;
 613     char *d0;
 614
 615     /* =?[QB]?cset?.?= */
 616     for (p = s; (eotoken = strchr(p, '?')); p = eotoken + 1) {
 617         switch (++count) {
 618             const char *t;
 619             char *q;
 620
 621           case 2:
 622             /* ignore language specification a la RFC 2231 */
 623             t = memchr(p, '*', eotoken - p) ?: eotoken;
 624             charset = p_dupstr(p, t - p);
 625             break;
 626
 627           case 3:
 628             switch (*p) {
 629               case 'q': case 'Q':
 630                 enc = ENCQUOTEDPRINTABLE;
 631                 break;
 632
 633               case 'b': case 'B':
 634                 enc = ENCBASE64;
 635                 break;
 636
 637               default:
 638                 p_delete(&charset);
 639                 return -1;
 640             }
 641             break;
 642
 643           case 4:
 644             d0 = q = p_new(char, m_strlen(s) + 1);
 645
 646             if (enc == ENCQUOTEDPRINTABLE) {
 647                 while (p < eotoken) {
 648                     if (*p == '=' && hexval(p[1]) >= 0 && hexval(p[2]) >= 0) {
 649                         *q++ = (hexval (p[1]) << 4) | hexval (p[2]);
 650                         p += 3;
 651                     } else
 652                     if (*p == '_') {
 653                         *q++ = ' ';
 654                         p++;
 655                     } else {
 656                         *q++ = *p++;
 657                     }
 658                 }
 659                 *q = 0;
 660             } else { /* enc == ENCBASE64 */
 661                 int c, b = 0, k = 0;
 662
 663                 while (p < eotoken) {
 664                     if (*p == '=')
 665                         break;
 666
 667                     c = base64val(*p++);
 668                     if (c < 0)
 669                         continue;
 670
 671                     if (k + 6 >= 8) {
 672                         k -= 2;
 673                         *q++ = b | (c >> k);
 674                         b = c << (8 - k);
 675                     } else {
 676                         b |= c << (k + 2);
 677                         k += 6;
 678                     }
 679                 }
 680                 *q = 0;
 681             }
 682             break;
 683         }
 684     }
 685
 686     if (charset)
 687         mutt_convert_string(&d0, charset, Charset, M_ICONV_HOOK_FROM);
 688     m_strcpy(d, len, d0);
 689     p_delete(&charset);
 690     p_delete(&d0);
 691     return 0;
 692 }
 693
 694 /*
 695  * Find the start and end of the first encoded word in the string.
 696  * We use the grammar in section 2 of RFC 2047, but the "encoding"
 697  * must be B or Q. Also, we don't require the encoded word to be
 698  * separated by linear-white-space (section 5(1)).
 699  */
 700 static const char *find_encoded_word(const char *s, const char **x)
 701 {
 702     const char *p;
 703
 704     while ((p = strstr(s, "=?"))) {
 705         s = p + 2;
 706         while (0x20 < *s && *s < 0x7f && !strchr ("()<>@,;:\"/[]?.=", *s)) {
 707             s++;
 708         }
 709
 710         if (s[0] != '?' || !strchr("BbQq", s[1]) || s[2] != '?')
 711             continue;
 712
 713         s += 3;
 714         while (0x20 <= *s && *s < 0x7f && *s != '?') {
 715             s++;
 716         }
 717
 718         if (s[0] != '?' || s[1] != '=') {
 719             --s;
 720             continue;
 721         }
 722
 723         *x = s + 2;
 724         return p;
 725     }
 726
 727     return NULL;
 728 }
 729
 730 /* return length of linear white space */
 731 static ssize_t lwslen(const char *s, ssize_t n)
 732 {
 733     const char *p;
 734     ssize_t len = n;
 735
 736     if (n <= 0)
 737         return 0;
 738
 739     for (p = s; p < s + n; p++) {
 740         if (!strchr (" \t\r\n", *p)) {
 741             len = p - s;
 742             break;
 743         }
 744     }
 745
 746     if (p[-1] == '\r' || p[-1] == '\n')  /* LWS cannot end with CRLF */
 747         return 0;
 748
 749     return len;
 750 }
 751
 752 /* return length of linear white space : reverse */
 753 static ssize_t lwsrlen(const char *s, ssize_t n)
 754 {
 755     const char *p = s + n - 1;
 756     size_t len = n;
 757
 758     if (n <= 0)
 759         return 0;
 760
 761     if (*p == '\r' || *p == '\n')   /* LWS doesn't end with CRLF */
 762         return 0;
 763
 764     while (p >= s) {
 765         if (!strchr(" \t\r\n", *p)) {
 766             len = s + n - 1 - p;
 767             break;
 768         }
 769         p--;
 770     }
 771
 772     return len;
 773 }
 774
 775 /* try to decode anything that looks like a valid RFC2047 encoded
 776  * header field, ignoring RFC822 parsing rules
 777  */
 778 void rfc2047_decode(char **pd)
 779 {
 780     const int strict_mime = option(OPTSTRICTMIME);
 781
 782     const char *s = *pd;
 783     char *d0, *d;
 784     ssize_t dlen;
 785     int found_encoded = 0;
 786
 787     if (!s || !*s)
 788         return;
 789
 790     dlen = 4 * m_strlen(s);        /* should be enough */
 791     d = d0 = p_new(char, dlen + 1);
 792
 793     while (*s && dlen > 0) {
 794         const char *p, *q;
 795
 796         p = find_encoded_word(s, &q);
 797
 798         if (!p) {
 799             /* no encoded words */
 800             if (!strict_mime) {
 801                 ssize_t m, n;
 802
 803                 n = m_strlen(s);
 804                 if (found_encoded && (m = lwslen(s, n)) != 0) {
 805                     if (m != n)
 806                         *d++ = ' ', dlen--;
 807                     n -= m, s += m;
 808                 }
 809
 810                 if (ascii_strcasecmp(AssumedCharset, "us-ascii")) {
 811                     char *t;
 812
 813                     t = p_dupstr(s, n);
 814                     if (mutt_convert_nonmime_string(&t) == 0) {
 815                         d += m_strcpy(d, dlen, t);
 816                     } else {
 817                         d += m_strcpy(d, dlen, s);
 818                     }
 819                     p_delete(&t);
 820                     break;
 821                 }
 822             }
 823             d += m_strcpy(d, dlen, s);
 824             break;
 825         }
 826
 827         if (p != s) {
 828             ssize_t m, n;
 829
 830             n = (p - s);
 831             /* ignore spaces between encoded words
 832              * and linear white spaces between encoded word and *text */
 833             if (!strict_mime) {
 834                 if (found_encoded && (m = lwslen(s, n)) != 0) {
 835                     if (m != n)
 836                         *d++ = ' ', dlen--;
 837                     n -= m, s += m;
 838                 }
 839
 840                 if ((m = n - lwsrlen(s, n)) != 0) {
 841                     m  = m_strncpy(d, dlen, s, m);
 842                     d += m;
 843                     dlen -= m;
 844                     if (m != n)
 845                         *d++ = ' ', dlen--;
 846                 }
 847             } else
 848             if (!found_encoded || (ssize_t)strspn(s, " \t\r\n") != n) {
 849                 n  = m_strncpy(d, dlen, s, n);
 850                 d += n;
 851                 dlen -= n;
 852             }
 853         }
 854
 855         rfc2047_decode_word(d, dlen, p);
 856         found_encoded = 1;
 857         s = q;
 858         while (*d && dlen)
 859             d++, dlen--;
 860     }
 861
 862     p_delete(pd);
 863     *pd = d0;
 864 }
 865
 866 void rfc2047_decode_adrlist(address_t *a)
 867 {
 868     while (a) {
 869         if (a->personal)
 870             rfc2047_decode(&a->personal);
 871         a = a->next;
 872     }
 873 }
 874
 875 void rfc2047_decode_envelope(ENVELOPE* e)
 876 {
 877     assert (e);
 878
 879     /* do RFC2047 decoding */
 880     rfc2047_decode_adrlist(e->from);
 881     rfc2047_decode_adrlist(e->to);
 882     rfc2047_decode_adrlist(e->cc);
 883     rfc2047_decode_adrlist(e->bcc);
 884     rfc2047_decode_adrlist(e->reply_to);
 885     rfc2047_decode_adrlist(e->mail_followup_to);
 886     rfc2047_decode_adrlist(e->return_path);
 887     rfc2047_decode_adrlist(e->sender);
 888
 889     if (e->subject) {
 890         rfc2047_decode(&e->subject);
 891         mutt_adjust_subject(e);
 892     }
 893 }