lib-mime/rfc2047.c

   1 /*
   2  *  This program is free software; you can redistribute it and/or modify
   3  *  it under the terms of the GNU General Public License as published by
   4  *  the Free Software Foundation; either version 2 of the License, or (at
   5  *  your option) any later version.
   6  *
   7  *  This program is distributed in the hope that it will be useful, but
   8  *  WITHOUT ANY WARRANTY; without even the implied warranty of
   9  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  10  *  General Public License for more details.
  11  *
  12  *  You should have received a copy of the GNU General Public License
  13  *  along with this program; if not, write to the Free Software
  14  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  15  *  MA 02110-1301, USA.
  16  *
  17  *  Copyright © 2006 Pierre Habouzit
  18  */
  19
  20 /*
  21  * Copyright notice from original mutt:
  22  * Copyright (C) 1996-2000 Michael R. Elkins <me@mutt.org>
  23  * Copyright (C) 2000-2001 Edmund Grimley Evans <edmundo@rano.org>
  24  *
  25  * This file is part of mutt-ng, see http://www.muttng.org/.
  26  * It's licensed under the GNU General Public License,
  27  * please see the file GPL in the top level source directory.
  28  */
  29
  30 #include <lib-lib/mem.h>
  31 #include <lib-lib/str.h>
  32 #include <lib-lib/ascii.h>
  33
  34 #include <lib-mime/mime.h>
  35
  36 #include "mutt.h"
  37 #include "charset.h"
  38 #include "thread.h"
  39
  40 #include <assert.h>
  41 #include <ctype.h>
  42 #include <errno.h>
  43 #include <stdio.h>
  44 #include <stdlib.h>
  45 #include <string.h>
  46
  47 /* If you are debugging this file, comment out the following line. */
  48 /*#define NDEBUG*/
  49
  50 #ifdef NDEBUG
  51 #define assert(x)
  52 #else
  53 #endif
  54
  55 #define ENCWORD_LEN_MAX 75
  56 #define ENCWORD_LEN_MIN 9       /* m_strlen("=?.?.?.?=") */
  57
  58 #define HSPACE(x) ((x) == '\0' || (x) == ' ' || (x) == '\t')
  59 #define CONTINUATION_BYTE(c) (((c) & 0xc0) == 0x80)
  60
  61 /* converts f of len flen and charset from
  62        into *t of len *tlen and charset to
  63
  64    returns -1 on error
  65    returns number of converted chars from f, see iconv(3)
  66  */
  67 static ssize_t
  68 convert_string(const char *from, const char *f, ssize_t flen,
  69                const char *to,   char **t, ssize_t *tlen)
  70 {
  71     iconv_t cd;
  72     char *buf, *ob;
  73     ssize_t obl, n;
  74
  75     cd = mutt_iconv_open(to, from, 0);
  76
  77     if (cd == (iconv_t)(-1))
  78         return -1;
  79
  80     obl = 4 * flen + 1;
  81     ob = buf = p_new(char, obl);
  82     n = my_iconv(cd, &f, &flen, &ob, &obl);
  83
  84     if (n < 0 || my_iconv(cd, 0, 0, &ob, &obl) < 0) {
  85         int e = errno;
  86         iconv_close(cd);
  87         errno = e;
  88         p_delete(&buf);
  89         return -1;
  90     }
  91     iconv_close(cd);
  92
  93     *ob = '\0';
  94     *tlen = ob - buf;
  95     *t  = buf;
  96
  97     return n;
  98 }
  99
 100 /* choose the shortest encoding for u */
 101 char *mutt_choose_charset(const char *fromcode, const char *charsets,
 102                           char *u, ssize_t ulen, char **dst, ssize_t *dlen)
 103 {
 104     char *res = NULL;
 105     ssize_t reslen = 0;
 106
 107     char *tocode = NULL;
 108     ssize_t bestn = 0;
 109
 110     const char *p = charsets;
 111
 112     while (*p) {
 113         char cset[SHORT_STRING];
 114         const char *q;
 115         char *s;
 116         ssize_t slen, n;
 117
 118         q = strchr(p, ':');
 119         if (q) {
 120             n = m_strncpy(cset, sizeof(cset), p, q - p);
 121             p = ++q;
 122         } else {
 123             n = m_strcpy(cset, sizeof(cset), p);
 124             p += n;
 125         }
 126
 127         if (!n || n > (ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 2 - 12)) {
 128             /* Assume that we never need more than 12 characters of
 129                encoded-text to encode a single character. */
 130             continue;
 131         }
 132
 133         n = convert_string(fromcode, u, ulen, cset, &s, &slen);
 134         if (n < 0)
 135             continue;
 136
 137         if (!tocode || n < bestn) {
 138             m_strreplace(&tocode, cset);
 139             bestn = n;
 140
 141             p_delete(&res);
 142             res = s;
 143             reslen = slen;
 144             if (!bestn)
 145                 break;
 146         } else {
 147             p_delete(&s);
 148         }
 149     }
 150
 151     if (tocode) {
 152         char buf[LONG_STRING];
 153
 154         if (dst && dlen) {
 155             *dst  = res;
 156             *dlen = reslen;
 157         } else {
 158             p_delete(&res);
 159         }
 160
 161         mutt_canonical_charset(buf, sizeof(buf), tocode);
 162         m_strreplace(&tocode, buf);
 163     }
 164
 165     return tocode;
 166 }
 167
 168
 169 /****************************************************************************/
 170 /* Encoding functions                                                       */
 171 /****************************************************************************/
 172
 173 static const char __qp_special[128] = {
 174     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 175     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 176     0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1,
 177     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
 178     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 179     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1,
 180     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 181     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 182 };
 183
 184 typedef size_t (encoder_t)(char *, const char *, ssize_t, const char *);
 185
 186 static size_t
 187 b_encoder(char *s, const char *d, ssize_t dlen, const char *tocode)
 188 {
 189     char *s0 = s;
 190
 191     s += sprintf(s, "=?%s?B?", tocode);
 192
 193     for (;;) {
 194         switch (dlen) {
 195           case 0:
 196             goto done;
 197
 198           case 1:
 199             *s++ = __m_b64chars[(*d >> 2) & 0x3f];
 200             *s++ = __m_b64chars[(*d & 0x03) << 4];
 201             *s++ = '=';
 202             *s++ = '=';
 203             goto done;
 204
 205           case 2:
 206             *s++ = __m_b64chars[(*d >> 2) & 0x3f];
 207             *s++ = __m_b64chars[((*d & 0x03) << 4) | ((d[1] >> 4) & 0x0f)];
 208             *s++ = __m_b64chars[(d[1] & 0x0f) << 2];
 209             *s++ = '=';
 210             goto done;
 211
 212           default:
 213             *s++ = __m_b64chars[(*d >> 2) & 0x3f];
 214             *s++ = __m_b64chars[((*d & 0x03) << 4) | ((d[1] >> 4) & 0x0f)];
 215             *s++ = __m_b64chars[((d[1] & 0x0f) << 2) | ((d[2] >> 6) & 0x03)];
 216             *s++ = __m_b64chars[d[2] & 0x3f];
 217             d += 3, dlen -= 3;
 218         }
 219     }
 220
 221   done:
 222     *s++ = '?';
 223     *s++ = '=';
 224     return s - s0;
 225 }
 226
 227 static size_t
 228 q_encoder(char *s, const char *d, ssize_t dlen, const char *tocode)
 229 {
 230     char *s0 = s;
 231
 232     s += sprintf(s, "=?%s?Q?", tocode);
 233     while (dlen--) {
 234         unsigned char c = *d++;
 235
 236         if (c == ' ') {
 237             *s++ = '_';
 238         } else
 239         if (c & 0x80 || __qp_special[c]) {
 240             *s++ = '=';
 241             *s++ = __m_b36chars_upper[c >> 4];
 242             *s++ = __m_b36chars_upper[c & 0xf];
 243         } else {
 244             *s++ = c;
 245         }
 246     }
 247
 248     *s++ = '?';
 249     *s++ = '=';
 250     return s - s0;
 251 }
 252
 253 /*
 254  * Return 0 if and set *encoder and *wlen if the data (d, dlen) could
 255  * be converted to an encoded word of length *wlen using *encoder.
 256  * Otherwise return an upper bound on the maximum length of the data
 257  * which could be converted.
 258  * The data is converted from fromcode (which must be stateless) to
 259  * tocode, unless fromcode is 0, in which case the data is assumed to
 260  * be already in tocode, which should be 8-bit and stateless.
 261  */
 262 static size_t try_block(const char *d, ssize_t dlen,
 263                         const char *fromcode, const char *tocode,
 264                         encoder_t **encoder, ssize_t *wlen)
 265 {
 266     char buf1[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1];
 267     ssize_t obl = sizeof(buf1) - m_strlen(tocode);
 268     char *ob;
 269
 270     if (fromcode) {
 271         const char *ib = d;
 272         ssize_t ibl = dlen;
 273         iconv_t cd = mutt_iconv_open(tocode, fromcode, 0);
 274
 275         assert (cd != (iconv_t)(-1));
 276
 277         ob = buf1;
 278
 279         if (my_iconv(cd, &ib, &ibl, &ob, &obl) < 0
 280         ||  my_iconv(cd, 0, 0, &ob, &obl) < 0)
 281         {
 282             assert (errno == E2BIG && ib > d);
 283             iconv_close(cd);
 284             return (ib - d == dlen) ? dlen : ib - d + 1;
 285         }
 286         iconv_close (cd);
 287     } else {
 288         if (dlen > obl)
 289             return obl + 1;
 290         memcpy(buf1, d, dlen);
 291         ob = buf1 + dlen;
 292     }
 293
 294     {
 295         const char *p;
 296         int count, len, len_b, len_q;
 297
 298         count = 0;
 299         for (p = buf1; p < ob; p++) {
 300             count += (*p & 0x80 || __qp_special[(int)*p]);
 301         }
 302
 303         len = ENCWORD_LEN_MIN - 2 + m_strlen(tocode);
 304         len_b = len + (((ob - buf1) + 2) / 3) * 4;
 305         len_q = len + (ob - buf1) + 2 * count;
 306
 307         /* Apparently RFC 1468 says to use B encoding for iso-2022-jp. */
 308         if (!ascii_strcasecmp(tocode, "ISO-2022-JP"))
 309             len_q = ENCWORD_LEN_MAX + 1;
 310
 311         if (len_b < len_q && len_b <= ENCWORD_LEN_MAX) {
 312             *encoder = b_encoder;
 313             *wlen = len_b;
 314             return 0;
 315         } else
 316         if (len_q <= ENCWORD_LEN_MAX) {
 317             *encoder = q_encoder;
 318             *wlen = len_q;
 319             return 0;
 320         } else {
 321             return dlen;
 322         }
 323     }
 324 }
 325
 326 /*
 327  * Encode the data (d, dlen) into s using the encoder.
 328  * Return the length of the encoded word.
 329  */
 330 static size_t
 331 encode_block(char *s, char *d, ssize_t dlen,
 332              const char *fromcode, const char *tocode, encoder_t *encoder)
 333 {
 334     char buf1[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1];
 335     ssize_t ibl, obl, n1, n2;
 336     iconv_t cd;
 337     const char *ib;
 338     char *ob;
 339
 340     if (fromcode) {
 341         cd = mutt_iconv_open(tocode, fromcode, 0);
 342         assert (cd != (iconv_t) (-1));
 343         ib = d, ibl = dlen, ob = buf1, obl = sizeof(buf1) - m_strlen(tocode);
 344         n1 = my_iconv(cd, &ib, &ibl, &ob, &obl);
 345         n2 = my_iconv(cd, 0, 0, &ob, &obl);
 346         assert (n1 >= 0 && n2 >= 0);
 347         iconv_close (cd);
 348         return (*encoder)(s, buf1, ob - buf1, tocode);
 349     } else {
 350         return (*encoder)(s, d, dlen, tocode);
 351     }
 352 }
 353
 354 /*
 355  * Discover how much of the data (d, dlen) can be converted into
 356  * a single encoded word. Return how much data can be converted,
 357  * and set the length *wlen of the encoded word and *encoder.
 358  * We start in column col, which limits the length of the word.
 359  */
 360 static size_t choose_block(char *d, size_t dlen, int col,
 361                            const char *fromcode, const char *tocode,
 362                            encoder_t **encoder, ssize_t *wlen)
 363 {
 364     size_t n, nn;
 365     int utf8 = fromcode && !ascii_strcasecmp(fromcode, "UTF-8");
 366
 367     n = dlen;
 368     for (;;) {
 369         assert (d + n > d);
 370         nn = try_block(d, n, fromcode, tocode, encoder, wlen);
 371         if (!nn && (col + *wlen <= ENCWORD_LEN_MAX + 1 || n <= 1))
 372             break;
 373         n = (nn ? nn : n) - 1;
 374         assert (n > 0);
 375         if (utf8) {
 376             while (n > 1 && CONTINUATION_BYTE(d[n]))
 377                 --n;
 378         }
 379     }
 380     return n;
 381 }
 382
 383
 384
 385 /*** XXX: MC: not read after that mark ***/
 386
 387 /*
 388  * Place the result of RFC-2047-encoding (d, dlen) into the dynamically
 389  * allocated buffer (e, elen). The input data is in charset fromcode
 390  * and is converted into a charset chosen from charsets.
 391  * Return 1 if the conversion to UTF-8 failed, 2 if conversion from UTF-8
 392  * failed, otherwise 0. If conversion failed, fromcode is assumed to be
 393  * compatible with us-ascii and the original data is used.
 394  * The input data is assumed to be a single line starting at column col;
 395  * if col is non-zero, the preceding character was a space.
 396  */
 397 static int rfc2047_encode (const char *d, ssize_t dlen, int col,
 398                            const char *fromcode, const char *charsets,
 399                            char **e, ssize_t *elen, const char *specials)
 400 {
 401     int ret = 0;
 402     char *buf;
 403     ssize_t bufpos, buflen;
 404     char *u, *t0, *t1, *t;
 405     char *s0, *s1;
 406     ssize_t ulen, r, n, wlen;
 407     encoder_t *encoder;
 408     char *tocode1 = 0;
 409     const char *tocode;
 410     const char *icode = "UTF-8";
 411
 412     /* Try to convert to UTF-8. */
 413     if (convert_string(fromcode, d, dlen, icode, &u, &ulen)) {
 414         ret = 1;
 415         icode = 0;
 416         u = p_dupstr(d, ulen = dlen);
 417     }
 418
 419     /* Find earliest and latest things we must encode. */
 420     s0 = s1 = t0 = t1 = 0;
 421     for (t = u; t < u + ulen; t++) {
 422         if ((*t & 0x80) ||
 423             (*t == '=' && t[1] == '?' && (t == u || HSPACE (*(t - 1))))) {
 424             if (!t0)
 425                 t0 = t;
 426             t1 = t;
 427         }
 428         else if (specials && strchr (specials, *t)) {
 429             if (!s0)
 430                 s0 = t;
 431             s1 = t;
 432         }
 433     }
 434
 435     /* If we have something to encode, include RFC822 specials */
 436     if (t0 && s0 && s0 < t0)
 437         t0 = s0;
 438     if (t1 && s1 && s1 > t1)
 439         t1 = s1;
 440
 441     if (!t0) {
 442         /* No encoding is required. */
 443         *e = u;
 444         *elen = ulen;
 445         return ret;
 446     }
 447
 448     /* Choose target charset. */
 449     tocode = fromcode;
 450     if (icode) {
 451         if ((tocode1 = mutt_choose_charset(icode, charsets, u, ulen,
 452                                            NULL, NULL)))
 453             tocode = tocode1;
 454         else
 455             ret = 2, icode = 0;
 456     }
 457
 458     /* Hack to avoid labelling 8-bit data as us-ascii. */
 459     if (!icode && mutt_is_us_ascii (tocode))
 460         tocode = "unknown-8bit";
 461
 462     /* Adjust t0 for maximum length of line. */
 463     t = u + (ENCWORD_LEN_MAX + 1) - col - ENCWORD_LEN_MIN;
 464     if (t < u)
 465         t = u;
 466     if (t < t0)
 467         t0 = t;
 468
 469
 470     /* Adjust t0 until we can encode a character after a space. */
 471     for (; t0 > u; t0--) {
 472         if (!HSPACE (*(t0 - 1)))
 473             continue;
 474         t = t0 + 1;
 475         if (icode)
 476             while (t < u + ulen && CONTINUATION_BYTE (*t))
 477                 ++t;
 478         if (!try_block (t0, t - t0, icode, tocode, &encoder, &wlen) &&
 479             col + (t0 - u) + wlen <= ENCWORD_LEN_MAX + 1)
 480             break;
 481     }
 482
 483     /* Adjust t1 until we can encode a character before a space. */
 484     for (; t1 < u + ulen; t1++) {
 485         if (!HSPACE (*t1))
 486             continue;
 487         t = t1 - 1;
 488         if (icode)
 489             while (CONTINUATION_BYTE (*t))
 490                 --t;
 491         if (!try_block (t, t1 - t, icode, tocode, &encoder, &wlen) &&
 492             1 + wlen + (u + ulen - t1) <= ENCWORD_LEN_MAX + 1)
 493             break;
 494     }
 495
 496     /* We shall encode the region [t0,t1). */
 497
 498     /* Initialise the output buffer with the us-ascii prefix. */
 499     buflen = 2 * ulen;
 500     buf = p_new(char, buflen);
 501     bufpos = t0 - u;
 502     memcpy (buf, u, t0 - u);
 503
 504     col += t0 - u;
 505
 506     t = t0;
 507     for (;;) {
 508         /* Find how much we can encode. */
 509         n = choose_block (t, t1 - t, col, icode, tocode, &encoder, &wlen);
 510         if (n == t1 - t) {
 511             /* See if we can fit the us-ascii suffix, too. */
 512             if (col + wlen + (u + ulen - t1) <= ENCWORD_LEN_MAX + 1)
 513                 break;
 514             n = t1 - t - 1;
 515             if (icode)
 516                 while (CONTINUATION_BYTE (t[n]))
 517                     --n;
 518             assert (t + n >= t);
 519             if (!n) {
 520                 /* This should only happen in the really stupid case where the
 521                    only word that needs encoding is one character long, but
 522                    there is too much us-ascii stuff after it to use a single
 523                    encoded word. We add the next word to the encoded region
 524                    and try again. */
 525                 assert (t1 < u + ulen);
 526                 for (t1++; t1 < u + ulen && !HSPACE (*t1); t1++);
 527                 continue;
 528             }
 529             n = choose_block (t, n, col, icode, tocode, &encoder, &wlen);
 530         }
 531
 532         /* Add to output buffer. */
 533 #define LINEBREAK "\n\t"
 534         if (bufpos + wlen + m_strlen(LINEBREAK) > buflen) {
 535             buflen = bufpos + wlen + m_strlen(LINEBREAK);
 536             p_realloc(&buf, buflen);
 537         }
 538         r = encode_block (buf + bufpos, t, n, icode, tocode, encoder);
 539         assert (r == wlen);
 540         bufpos += wlen;
 541         memcpy (buf + bufpos, LINEBREAK, m_strlen(LINEBREAK));
 542         bufpos += m_strlen(LINEBREAK);
 543 #undef LINEBREAK
 544
 545         col = 1;
 546
 547         t += n;
 548     }
 549
 550     /* Add last encoded word and us-ascii suffix to buffer. */
 551     buflen = bufpos + wlen + (u + ulen - t1);
 552     p_realloc(&buf, buflen + 1);
 553     r = encode_block (buf + bufpos, t, t1 - t, icode, tocode, encoder);
 554     assert (r == wlen);
 555     bufpos += wlen;
 556     memcpy (buf + bufpos, t1, u + ulen - t1);
 557
 558     p_delete(&tocode1);
 559     p_delete(&u);
 560
 561     buf[buflen] = '\0';
 562
 563     *e = buf;
 564     *elen = buflen + 1;
 565     return ret;
 566 }
 567
 568 void _rfc2047_encode_string (char **pd, int encode_specials, int col)
 569 {
 570     char *e;
 571     ssize_t elen;
 572     const char *charsets;
 573
 574     if (!Charset || !*pd)
 575         return;
 576
 577     charsets = SendCharset;
 578     if (!charsets || !*charsets)
 579         charsets = "UTF-8";
 580
 581     rfc2047_encode (*pd, m_strlen(*pd), col,
 582                     Charset, charsets, &e, &elen,
 583                     encode_specials ? RFC822Specials : NULL);
 584
 585     p_delete(pd);
 586     *pd = e;
 587 }
 588
 589 void rfc2047_encode_string(char **pd) {
 590     _rfc2047_encode_string(pd, 0, 32);
 591 }
 592
 593 void rfc2047_encode_adrlist (address_t * addr, const char *tag)
 594 {
 595     address_t *ptr = addr;
 596     int col = tag ? m_strlen(tag) + 2 : 32;
 597
 598     while (ptr) {
 599         if (ptr->personal)
 600             _rfc2047_encode_string (&ptr->personal, 1, col);
 601         ptr = ptr->next;
 602     }
 603 }
 604
 605 static int rfc2047_decode_word (char *d, const char *s, size_t len)
 606 {
 607     const char *pp, *pp1;
 608     char *pd, *d0;
 609     const char *t, *t1;
 610     int enc = 0, count = 0;
 611     char *charset = NULL;
 612
 613     pd = d0 = p_new(char, m_strlen(s));
 614
 615     for (pp = s; (pp1 = strchr (pp, '?')); pp = pp1 + 1) {
 616         count++;
 617         switch (count) {
 618           case 2:
 619             /* ignore language specification a la RFC 2231 */
 620             t = pp1;
 621             if ((t1 = memchr (pp, '*', t - pp)))
 622                 t = t1;
 623             charset = p_dupstr(pp, t - pp);
 624             break;
 625           case 3:
 626             if (toupper ((unsigned char) *pp) == 'Q')
 627                 enc = ENCQUOTEDPRINTABLE;
 628             else if (toupper ((unsigned char) *pp) == 'B')
 629                 enc = ENCBASE64;
 630             else {
 631                 p_delete(&charset);
 632                 p_delete(&d0);
 633                 return (-1);
 634             }
 635             break;
 636           case 4:
 637             if (enc == ENCQUOTEDPRINTABLE) {
 638                 for (; pp < pp1; pp++) {
 639                     if (*pp == '_')
 640                         *pd++ = ' ';
 641                     else if (*pp == '=' && hexval(pp[1]) >= 0 && hexval(pp[2]) >= 0) {
 642                         *pd++ = (hexval (pp[1]) << 4) | hexval (pp[2]);
 643                         pp += 2;
 644                     }
 645                     else
 646                         *pd++ = *pp;
 647                 }
 648                 *pd = 0;
 649             }
 650             else if (enc == ENCBASE64) {
 651                 int c, b = 0, k = 0;
 652
 653                 for (; pp < pp1; pp++) {
 654                     if (*pp == '=')
 655                         break;
 656                     if ((c = base64val(*pp)) < 0)
 657                         continue;
 658                     if (k + 6 >= 8) {
 659                         k -= 2;
 660                         *pd++ = b | (c >> k);
 661                         b = c << (8 - k);
 662                     }
 663                     else {
 664                         b |= c << (k + 2);
 665                         k += 6;
 666                     }
 667                 }
 668                 *pd = 0;
 669             }
 670             break;
 671         }
 672     }
 673
 674     if (charset)
 675         mutt_convert_string (&d0, charset, Charset, M_ICONV_HOOK_FROM);
 676     m_strcpy(d, len, d0);
 677     p_delete(&charset);
 678     p_delete(&d0);
 679     return (0);
 680 }
 681
 682 /*
 683  * Find the start and end of the first encoded word in the string.
 684  * We use the grammar in section 2 of RFC 2047, but the "encoding"
 685  * must be B or Q. Also, we don't require the encoded word to be
 686  * separated by linear-white-space (section 5(1)).
 687  */
 688 static const char *find_encoded_word (const char *s, const char **x)
 689 {
 690     const char *p, *q;
 691
 692     q = s;
 693     while ((p = strstr (q, "=?"))) {
 694         for (q = p + 2;
 695              0x20 < *q && *q < 0x7f && !strchr ("()<>@,;:\"/[]?.=", *q); q++);
 696         if (q[0] != '?' || !strchr ("BbQq", q[1]) || q[2] != '?')
 697             continue;
 698         for (q = q + 3; 0x20 <= *q && *q < 0x7f && *q != '?'; q++);
 699         if (q[0] != '?' || q[1] != '=') {
 700             --q;
 701             continue;
 702         }
 703
 704         *x = q + 2;
 705         return p;
 706     }
 707
 708     return 0;
 709 }
 710
 711 /* return length of linear white space */
 712 static size_t lwslen (const char *s, size_t n)
 713 {
 714     const char *p = s;
 715     size_t len = n;
 716
 717     if (n <= 0)
 718         return 0;
 719
 720     for (; p < s + n; p++)
 721         if (!strchr (" \t\r\n", *p)) {
 722             len = (size_t) (p - s);
 723             break;
 724         }
 725     if (strchr ("\r\n", *(p - 1)))        /* LWS doesn't end with CRLF */
 726         len = (size_t) 0;
 727     return len;
 728 }
 729
 730 /* return length of linear white space : reverse */
 731 static size_t lwsrlen (const char *s, size_t n)
 732 {
 733     const char *p = s + n - 1;
 734     size_t len = n;
 735
 736     if (n <= 0)
 737         return 0;
 738
 739     if (strchr ("\r\n", *p))      /* LWS doesn't end with CRLF */
 740         return (size_t) 0;
 741
 742     for (; p >= s; p--)
 743         if (!strchr (" \t\r\n", *p)) {
 744             len = (size_t) (s + n - 1 - p);
 745             break;
 746         }
 747     return len;
 748 }
 749
 750 /* try to decode anything that looks like a valid RFC2047 encoded
 751  * header field, ignoring RFC822 parsing rules
 752  */
 753 void rfc2047_decode (char **pd)
 754 {
 755     const char *p, *q;
 756     size_t m, n;
 757     int found_encoded = 0;
 758     char *d0, *d;
 759     const char *s = *pd;
 760     size_t dlen;
 761
 762     if (!s || !*s)
 763         return;
 764
 765     dlen = 4 * m_strlen(s);        /* should be enough */
 766     d = d0 = p_new(char, dlen + 1);
 767
 768     while (*s && dlen > 0) {
 769         if (!(p = find_encoded_word (s, &q))) {
 770             /* no encoded words */
 771             if (!option (OPTSTRICTMIME)) {
 772                 n = m_strlen(s);
 773                 if (found_encoded && (m = lwslen (s, n)) != 0) {
 774                     if (m != n)
 775                         *d = ' ', d++, dlen--;
 776                     n -= m, s += m;
 777                 }
 778                 if (ascii_strcasecmp (AssumedCharset, "us-ascii")) {
 779                     char *t;
 780                     ssize_t tlen;
 781
 782                     t = p_dupstr(s, n);
 783                     if (mutt_convert_nonmime_string (&t) == 0) {
 784                         tlen = m_strlen(t);
 785                         strncpy (d, t, tlen);
 786                         d += tlen;
 787                     }
 788                     else {
 789                         strncpy (d, s, n);
 790                         d += n;
 791                     }
 792                     p_delete(&t);
 793                     break;
 794                 }
 795             }
 796             strncpy (d, s, dlen);
 797             d += dlen;
 798             break;
 799         }
 800
 801         if (p != s) {
 802             n = (p - s);
 803             /* ignore spaces between encoded words
 804              * and linear white spaces between encoded word and *text */
 805             if (!option (OPTSTRICTMIME)) {
 806                 if (found_encoded && (m = lwslen (s, n)) != 0) {
 807                     if (m != n)
 808                         *d = ' ', d++, dlen--;
 809                     n -= m, s += m;
 810                 }
 811
 812                 if ((m = n - lwsrlen (s, n)) != 0) {
 813                     if (m > dlen)
 814                         m = dlen;
 815                     memcpy (d, s, m);
 816                     d += m;
 817                     dlen -= m;
 818                     if (m != n)
 819                         *d = ' ', d++, dlen--;
 820                 }
 821             }
 822             else if (!found_encoded || strspn (s, " \t\r\n") != n) {
 823                 if (n > dlen)
 824                     n = dlen;
 825                 memcpy (d, s, n);
 826                 d += n;
 827                 dlen -= n;
 828             }
 829         }
 830
 831         rfc2047_decode_word (d, p, dlen);
 832         found_encoded = 1;
 833         s = q;
 834         n = m_strlen(d);
 835         dlen -= n;
 836         d += n;
 837     }
 838     *d = 0;
 839
 840     p_delete(pd);
 841     *pd = d0;
 842     str_adjust (pd);
 843 }
 844
 845 void rfc2047_decode_adrlist(address_t *a)
 846 {
 847     while (a) {
 848         if (a->personal)
 849             rfc2047_decode(&a->personal);
 850         a = a->next;
 851     }
 852 }
 853
 854 void rfc2047_decode_envelope(ENVELOPE* e)
 855 {
 856     assert (e);
 857
 858     /* do RFC2047 decoding */
 859     rfc2047_decode_adrlist(e->from);
 860     rfc2047_decode_adrlist(e->to);
 861     rfc2047_decode_adrlist(e->cc);
 862     rfc2047_decode_adrlist(e->bcc);
 863     rfc2047_decode_adrlist(e->reply_to);
 864     rfc2047_decode_adrlist(e->mail_followup_to);
 865     rfc2047_decode_adrlist(e->return_path);
 866     rfc2047_decode_adrlist(e->sender);
 867
 868     if (e->subject) {
 869         rfc2047_decode(&e->subject);
 870         mutt_adjust_subject(e);
 871     }
 872 }