lib-mime/rfc2047.c

   1 /*
   2  *  This program is free software; you can redistribute it and/or modify
   3  *  it under the terms of the GNU General Public License as published by
   4  *  the Free Software Foundation; either version 2 of the License, or (at
   5  *  your option) any later version.
   6  *
   7  *  This program is distributed in the hope that it will be useful, but
   8  *  WITHOUT ANY WARRANTY; without even the implied warranty of
   9  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  10  *  General Public License for more details.
  11  *
  12  *  You should have received a copy of the GNU General Public License
  13  *  along with this program; if not, write to the Free Software
  14  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  15  *  MA 02110-1301, USA.
  16  *
  17  *  Copyright © 2006 Pierre Habouzit
  18  */
  19
  20 /*
  21  * Copyright notice from original mutt:
  22  * Copyright (C) 1996-2000 Michael R. Elkins <me@mutt.org>
  23  * Copyright (C) 2000-2001 Edmund Grimley Evans <edmundo@rano.org>
  24  *
  25  * This file is part of mutt-ng, see http://www.muttng.org/.
  26  * It's licensed under the GNU General Public License,
  27  * please see the file GPL in the top level source directory.
  28  */
  29
  30 #include <lib-lib/mem.h>
  31 #include <lib-lib/str.h>
  32 #include <lib-lib/ascii.h>
  33
  34 #include <lib-mime/mime.h>
  35
  36 #include "mutt.h"
  37 #include "charset.h"
  38 #include "thread.h"
  39
  40 #include <ctype.h>
  41 #include <errno.h>
  42 #include <stdio.h>
  43 #include <stdlib.h>
  44 #include <string.h>
  45
  46 /* If you are debugging this file, comment out the following line. */
  47 /*#define NDEBUG*/
  48
  49 #ifdef NDEBUG
  50 #define assert(x)
  51 #else
  52 #include <assert.h>
  53 #endif
  54
  55 #define ENCWORD_LEN_MAX 75
  56 #define ENCWORD_LEN_MIN 9       /* m_strlen("=?.?.?.?=") */
  57
  58 #define HSPACE(x) ((x) == '\0' || (x) == ' ' || (x) == '\t')
  59
  60 #define CONTINUATION_BYTE(c) (((c) & 0xc0) == 0x80)
  61
  62 typedef size_t (*encoder_t) (char *, const char *, size_t,
  63                              const char *);
  64
  65 /* converts f of len flen and charset from
  66        into *t of len *tlen and charset to
  67
  68    returns -1 on error
  69    returns number of converted chars from f, see iconv(3)
  70  */
  71 static ssize_t
  72 convert_string(const char *from, const char *f, size_t flen,
  73                const char *to,   char **t, size_t *tlen)
  74 {
  75     iconv_t cd;
  76     char *buf, *ob;
  77     size_t obl;
  78     ssize_t n;
  79     int e;
  80
  81     cd = mutt_iconv_open(to, from, 0);
  82
  83     if (cd == (iconv_t)(-1))
  84         return -1;
  85
  86     obl = 4 * flen + 1;
  87     ob = buf = p_new(char, obl);
  88     n = my_iconv(cd, &f, &flen, &ob, &obl);
  89
  90     if (n < 0 || my_iconv(cd, 0, 0, &ob, &obl) < 0) {
  91         e = errno;
  92         p_delete(&buf);
  93         iconv_close (cd);
  94         errno = e;
  95         return -1;
  96     }
  97
  98     *ob = '\0';
  99     *tlen = ob - buf;
 100
 101     p_realloc(&buf, ob - buf + 1);
 102     *t = buf;
 103     iconv_close (cd);
 104
 105     return n;
 106 }
 107
 108 char *mutt_choose_charset(const char *fromcode, const char *charsets,
 109                           char *u, size_t ulen, char **d, size_t *dlen)
 110 {
 111     char canonical_buff[LONG_STRING];
 112     char *e = 0, *tocode = 0;
 113     size_t elen = 0, bestn = 0;
 114     const char *p, *q;
 115
 116     for (p = charsets; p; p = q ? q + 1 : 0) {
 117         char *s, *t;
 118         size_t slen, n;
 119
 120         q = strchr (p, ':');
 121
 122         n = q ? q - p : m_strlen(p);
 123
 124         if (!n ||
 125             /* Assume that we never need more than 12 characters of
 126                encoded-text to encode a single character. */
 127             n > (ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 2 - 12))
 128             continue;
 129
 130         t = p_dupstr(p, n);
 131
 132         n = convert_string(fromcode, u, ulen, t, &s, &slen);
 133         if (n == (size_t) (-1))
 134             continue;
 135
 136         if (!tocode || n < bestn) {
 137             bestn = n;
 138             p_delete(&tocode);
 139             tocode = t;
 140             if (d) {
 141                 p_delete(&e);
 142                 e = s;
 143             } else {
 144                 p_delete(&s);
 145             }
 146             elen = slen;
 147             if (!bestn)
 148                 break;
 149         } else {
 150             p_delete(&t);
 151             p_delete(&s);
 152         }
 153     }
 154
 155     if (tocode) {
 156         if (d)
 157             *d = e;
 158         if (dlen)
 159             *dlen = elen;
 160
 161         mutt_canonical_charset(canonical_buff, sizeof(canonical_buff), tocode);
 162         m_strreplace(&tocode, canonical_buff);
 163     }
 164
 165     return tocode;
 166 }
 167
 168 static size_t b_encoder (char *s, const char *d, size_t dlen,
 169                          const char *tocode)
 170 {
 171     char *s0 = s;
 172
 173     memcpy (s, "=?", 2), s += 2;
 174     memcpy (s, tocode, m_strlen(tocode)), s += m_strlen(tocode);
 175     memcpy (s, "?B?", 3), s += 3;
 176     for (;;) {
 177         if (!dlen)
 178             break;
 179         else if (dlen == 1) {
 180             *s++ = __m_b64chars[(*d >> 2) & 0x3f];
 181             *s++ = __m_b64chars[(*d & 0x03) << 4];
 182             *s++ = '=';
 183             *s++ = '=';
 184             break;
 185         }
 186         else if (dlen == 2) {
 187             *s++ = __m_b64chars[(*d >> 2) & 0x3f];
 188             *s++ = __m_b64chars[((*d & 0x03) << 4) | ((d[1] >> 4) & 0x0f)];
 189             *s++ = __m_b64chars[(d[1] & 0x0f) << 2];
 190             *s++ = '=';
 191             break;
 192         }
 193         else {
 194             *s++ = __m_b64chars[(*d >> 2) & 0x3f];
 195             *s++ = __m_b64chars[((*d & 0x03) << 4) | ((d[1] >> 4) & 0x0f)];
 196             *s++ = __m_b64chars[((d[1] & 0x0f) << 2) | ((d[2] >> 6) & 0x03)];
 197             *s++ = __m_b64chars[d[2] & 0x3f];
 198             d += 3, dlen -= 3;
 199         }
 200     }
 201     memcpy (s, "?=", 2), s += 2;
 202     return s - s0;
 203 }
 204
 205 static size_t q_encoder (char *s, const char *d, size_t dlen,
 206                          const char *tocode)
 207 {
 208     char *s0 = s;
 209
 210     memcpy (s, "=?", 2), s += 2;
 211     memcpy (s, tocode, m_strlen(tocode)), s += m_strlen(tocode);
 212     memcpy (s, "?Q?", 3), s += 3;
 213     while (dlen--) {
 214         unsigned char c = *d++;
 215
 216         if (c == ' ')
 217             *s++ = '_';
 218         else if (c >= 0x7f || c < 0x20 || c == '_' || strchr (MimeSpecials, c)) {
 219             *s++ = '=';
 220             *s++ = __m_b36chars_upper[c >> 4];
 221             *s++ = __m_b36chars_upper[c & 0xf];
 222         }
 223         else
 224             *s++ = c;
 225     }
 226     memcpy (s, "?=", 2), s += 2;
 227     return s - s0;
 228 }
 229
 230 /*
 231  * Return 0 if and set *encoder and *wlen if the data (d, dlen) could
 232  * be converted to an encoded word of length *wlen using *encoder.
 233  * Otherwise return an upper bound on the maximum length of the data
 234  * which could be converted.
 235  * The data is converted from fromcode (which must be stateless) to
 236  * tocode, unless fromcode is 0, in which case the data is assumed to
 237  * be already in tocode, which should be 8-bit and stateless.
 238  */
 239 static size_t try_block (const char *d, size_t dlen,
 240                          const char *fromcode, const char *tocode,
 241                          encoder_t * encoder, size_t * wlen)
 242 {
 243     char buf1[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1];
 244     iconv_t cd;
 245     const char *ib;
 246     char *ob, *p;
 247     size_t ibl, obl;
 248     int count, len, len_b, len_q;
 249
 250     if (fromcode) {
 251         cd = mutt_iconv_open (tocode, fromcode, 0);
 252         assert (cd != (iconv_t) (-1));
 253         ib = d, ibl = dlen, ob = buf1, obl = sizeof (buf1) - m_strlen(tocode);
 254         if (my_iconv(cd, &ib, &ibl, &ob, &obl) < 0
 255         ||  my_iconv(cd, 0, 0, &ob, &obl) < 0)
 256         {
 257             assert (errno == E2BIG);
 258             iconv_close (cd);
 259             assert (ib > d);
 260             return (ib - d == dlen) ? dlen : ib - d + 1;
 261         }
 262         iconv_close (cd);
 263     }
 264     else {
 265         if (dlen > sizeof (buf1) - m_strlen(tocode))
 266             return sizeof (buf1) - m_strlen(tocode) + 1;
 267         memcpy (buf1, d, dlen);
 268         ob = buf1 + dlen;
 269     }
 270
 271     count = 0;
 272     for (p = buf1; p < ob; p++) {
 273         unsigned char c = *p;
 274
 275         assert (strchr (MimeSpecials, '?'));
 276         if (c >= 0x7f || c < 0x20 || *p == '_' ||
 277             (c != ' ' && strchr (MimeSpecials, *p)))
 278             ++count;
 279     }
 280
 281     len = ENCWORD_LEN_MIN - 2 + m_strlen(tocode);
 282     len_b = len + (((ob - buf1) + 2) / 3) * 4;
 283     len_q = len + (ob - buf1) + 2 * count;
 284
 285     /* Apparently RFC 1468 says to use B encoding for iso-2022-jp. */
 286     if (!ascii_strcasecmp (tocode, "ISO-2022-JP"))
 287         len_q = ENCWORD_LEN_MAX + 1;
 288
 289     if (len_b < len_q && len_b <= ENCWORD_LEN_MAX) {
 290         *encoder = b_encoder;
 291         *wlen = len_b;
 292         return 0;
 293     }
 294     else if (len_q <= ENCWORD_LEN_MAX) {
 295         *encoder = q_encoder;
 296         *wlen = len_q;
 297         return 0;
 298     }
 299     else
 300         return dlen;
 301 }
 302
 303 /*
 304  * Encode the data (d, dlen) into s using the encoder.
 305  * Return the length of the encoded word.
 306  */
 307 static size_t encode_block (char *s, char *d, size_t dlen,
 308                             const char *fromcode, const char *tocode,
 309                             encoder_t encoder)
 310 {
 311     char buf1[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1];
 312     iconv_t cd;
 313     const char *ib;
 314     char *ob;
 315     size_t ibl, obl, n1, n2;
 316
 317     if (fromcode) {
 318         cd = mutt_iconv_open (tocode, fromcode, 0);
 319         assert (cd != (iconv_t) (-1));
 320         ib = d, ibl = dlen, ob = buf1, obl = sizeof (buf1) - m_strlen(tocode);
 321         n1 = my_iconv(cd, &ib, &ibl, &ob, &obl);
 322         n2 = my_iconv(cd, 0, 0, &ob, &obl);
 323         assert (n1 != (size_t) (-1) && n2 != (size_t) (-1));
 324         iconv_close (cd);
 325         return (*encoder) (s, buf1, ob - buf1, tocode);
 326     }
 327     else
 328         return (*encoder) (s, d, dlen, tocode);
 329 }
 330
 331 /*
 332  * Discover how much of the data (d, dlen) can be converted into
 333  * a single encoded word. Return how much data can be converted,
 334  * and set the length *wlen of the encoded word and *encoder.
 335  * We start in column col, which limits the length of the word.
 336  */
 337 static size_t choose_block (char *d, size_t dlen, int col,
 338                             const char *fromcode, const char *tocode,
 339                             encoder_t * encoder, size_t * wlen)
 340 {
 341     size_t n, nn;
 342     int utf8 = fromcode && !ascii_strcasecmp (fromcode, "UTF-8");
 343
 344     n = dlen;
 345     for (;;) {
 346         assert (d + n > d);
 347         nn = try_block (d, n, fromcode, tocode, encoder, wlen);
 348         if (!nn && (col + *wlen <= ENCWORD_LEN_MAX + 1 || n <= 1))
 349             break;
 350         n = (nn ? nn : n) - 1;
 351         assert (n > 0);
 352         if (utf8)
 353             while (n > 1 && CONTINUATION_BYTE (d[n]))
 354                 --n;
 355     }
 356     return n;
 357 }
 358
 359 /*
 360  * Place the result of RFC-2047-encoding (d, dlen) into the dynamically
 361  * allocated buffer (e, elen). The input data is in charset fromcode
 362  * and is converted into a charset chosen from charsets.
 363  * Return 1 if the conversion to UTF-8 failed, 2 if conversion from UTF-8
 364  * failed, otherwise 0. If conversion failed, fromcode is assumed to be
 365  * compatible with us-ascii and the original data is used.
 366  * The input data is assumed to be a single line starting at column col;
 367  * if col is non-zero, the preceding character was a space.
 368  */
 369 static int rfc2047_encode (const char *d, size_t dlen, int col,
 370                            const char *fromcode, const char *charsets,
 371                            char **e, size_t * elen, char *specials)
 372 {
 373     int ret = 0;
 374     char *buf;
 375     size_t bufpos, buflen;
 376     char *u, *t0, *t1, *t;
 377     char *s0, *s1;
 378     size_t ulen, r, n, wlen;
 379     encoder_t encoder;
 380     char *tocode1 = 0;
 381     const char *tocode;
 382     const char *icode = "UTF-8";
 383
 384     /* Try to convert to UTF-8. */
 385     if (convert_string(fromcode, d, dlen, icode, &u, &ulen)) {
 386         ret = 1;
 387         icode = 0;
 388         u = p_dupstr(d, ulen = dlen);
 389     }
 390
 391     /* Find earliest and latest things we must encode. */
 392     s0 = s1 = t0 = t1 = 0;
 393     for (t = u; t < u + ulen; t++) {
 394         if ((*t & 0x80) ||
 395             (*t == '=' && t[1] == '?' && (t == u || HSPACE (*(t - 1))))) {
 396             if (!t0)
 397                 t0 = t;
 398             t1 = t;
 399         }
 400         else if (specials && strchr (specials, *t)) {
 401             if (!s0)
 402                 s0 = t;
 403             s1 = t;
 404         }
 405     }
 406
 407     /* If we have something to encode, include RFC822 specials */
 408     if (t0 && s0 && s0 < t0)
 409         t0 = s0;
 410     if (t1 && s1 && s1 > t1)
 411         t1 = s1;
 412
 413     if (!t0) {
 414         /* No encoding is required. */
 415         *e = u;
 416         *elen = ulen;
 417         return ret;
 418     }
 419
 420     /* Choose target charset. */
 421     tocode = fromcode;
 422     if (icode) {
 423         if ((tocode1 = mutt_choose_charset (icode, charsets, u, ulen, 0, 0)))
 424             tocode = tocode1;
 425         else
 426             ret = 2, icode = 0;
 427     }
 428
 429     /* Hack to avoid labelling 8-bit data as us-ascii. */
 430     if (!icode && mutt_is_us_ascii (tocode))
 431         tocode = "unknown-8bit";
 432
 433     /* Adjust t0 for maximum length of line. */
 434     t = u + (ENCWORD_LEN_MAX + 1) - col - ENCWORD_LEN_MIN;
 435     if (t < u)
 436         t = u;
 437     if (t < t0)
 438         t0 = t;
 439
 440
 441     /* Adjust t0 until we can encode a character after a space. */
 442     for (; t0 > u; t0--) {
 443         if (!HSPACE (*(t0 - 1)))
 444             continue;
 445         t = t0 + 1;
 446         if (icode)
 447             while (t < u + ulen && CONTINUATION_BYTE (*t))
 448                 ++t;
 449         if (!try_block (t0, t - t0, icode, tocode, &encoder, &wlen) &&
 450             col + (t0 - u) + wlen <= ENCWORD_LEN_MAX + 1)
 451             break;
 452     }
 453
 454     /* Adjust t1 until we can encode a character before a space. */
 455     for (; t1 < u + ulen; t1++) {
 456         if (!HSPACE (*t1))
 457             continue;
 458         t = t1 - 1;
 459         if (icode)
 460             while (CONTINUATION_BYTE (*t))
 461                 --t;
 462         if (!try_block (t, t1 - t, icode, tocode, &encoder, &wlen) &&
 463             1 + wlen + (u + ulen - t1) <= ENCWORD_LEN_MAX + 1)
 464             break;
 465     }
 466
 467     /* We shall encode the region [t0,t1). */
 468
 469     /* Initialise the output buffer with the us-ascii prefix. */
 470     buflen = 2 * ulen;
 471     buf = p_new(char, buflen);
 472     bufpos = t0 - u;
 473     memcpy (buf, u, t0 - u);
 474
 475     col += t0 - u;
 476
 477     t = t0;
 478     for (;;) {
 479         /* Find how much we can encode. */
 480         n = choose_block (t, t1 - t, col, icode, tocode, &encoder, &wlen);
 481         if (n == t1 - t) {
 482             /* See if we can fit the us-ascii suffix, too. */
 483             if (col + wlen + (u + ulen - t1) <= ENCWORD_LEN_MAX + 1)
 484                 break;
 485             n = t1 - t - 1;
 486             if (icode)
 487                 while (CONTINUATION_BYTE (t[n]))
 488                     --n;
 489             assert (t + n >= t);
 490             if (!n) {
 491                 /* This should only happen in the really stupid case where the
 492                    only word that needs encoding is one character long, but
 493                    there is too much us-ascii stuff after it to use a single
 494                    encoded word. We add the next word to the encoded region
 495                    and try again. */
 496                 assert (t1 < u + ulen);
 497                 for (t1++; t1 < u + ulen && !HSPACE (*t1); t1++);
 498                 continue;
 499             }
 500             n = choose_block (t, n, col, icode, tocode, &encoder, &wlen);
 501         }
 502
 503         /* Add to output buffer. */
 504 #define LINEBREAK "\n\t"
 505         if (bufpos + wlen + m_strlen(LINEBREAK) > buflen) {
 506             buflen = bufpos + wlen + m_strlen(LINEBREAK);
 507             p_realloc(&buf, buflen);
 508         }
 509         r = encode_block (buf + bufpos, t, n, icode, tocode, encoder);
 510         assert (r == wlen);
 511         bufpos += wlen;
 512         memcpy (buf + bufpos, LINEBREAK, m_strlen(LINEBREAK));
 513         bufpos += m_strlen(LINEBREAK);
 514 #undef LINEBREAK
 515
 516         col = 1;
 517
 518         t += n;
 519     }
 520
 521     /* Add last encoded word and us-ascii suffix to buffer. */
 522     buflen = bufpos + wlen + (u + ulen - t1);
 523     p_realloc(&buf, buflen + 1);
 524     r = encode_block (buf + bufpos, t, t1 - t, icode, tocode, encoder);
 525     assert (r == wlen);
 526     bufpos += wlen;
 527     memcpy (buf + bufpos, t1, u + ulen - t1);
 528
 529     p_delete(&tocode1);
 530     p_delete(&u);
 531
 532     buf[buflen] = '\0';
 533
 534     *e = buf;
 535     *elen = buflen + 1;
 536     return ret;
 537 }
 538
 539 void _rfc2047_encode_string (char **pd, int encode_specials, int col)
 540 {
 541     char *e;
 542     size_t elen;
 543     const char *charsets;
 544
 545     if (!Charset || !*pd)
 546         return;
 547
 548     charsets = SendCharset;
 549     if (!charsets || !*charsets)
 550         charsets = "UTF-8";
 551
 552     rfc2047_encode (*pd, m_strlen(*pd), col,
 553                     Charset, charsets, &e, &elen,
 554                     encode_specials ? RFC822Specials : NULL);
 555
 556     p_delete(pd);
 557     *pd = e;
 558 }
 559
 560 void rfc2047_encode_string(char **pd) {
 561     _rfc2047_encode_string(pd, 0, 32);
 562 }
 563
 564 void rfc2047_encode_adrlist (address_t * addr, const char *tag)
 565 {
 566     address_t *ptr = addr;
 567     int col = tag ? m_strlen(tag) + 2 : 32;
 568
 569     while (ptr) {
 570         if (ptr->personal)
 571             _rfc2047_encode_string (&ptr->personal, 1, col);
 572         ptr = ptr->next;
 573     }
 574 }
 575
 576 static int rfc2047_decode_word (char *d, const char *s, size_t len)
 577 {
 578     const char *pp, *pp1;
 579     char *pd, *d0;
 580     const char *t, *t1;
 581     int enc = 0, count = 0;
 582     char *charset = NULL;
 583
 584     pd = d0 = p_new(char, m_strlen(s));
 585
 586     for (pp = s; (pp1 = strchr (pp, '?')); pp = pp1 + 1) {
 587         count++;
 588         switch (count) {
 589           case 2:
 590             /* ignore language specification a la RFC 2231 */
 591             t = pp1;
 592             if ((t1 = memchr (pp, '*', t - pp)))
 593                 t = t1;
 594             charset = p_dupstr(pp, t - pp);
 595             break;
 596           case 3:
 597             if (toupper ((unsigned char) *pp) == 'Q')
 598                 enc = ENCQUOTEDPRINTABLE;
 599             else if (toupper ((unsigned char) *pp) == 'B')
 600                 enc = ENCBASE64;
 601             else {
 602                 p_delete(&charset);
 603                 p_delete(&d0);
 604                 return (-1);
 605             }
 606             break;
 607           case 4:
 608             if (enc == ENCQUOTEDPRINTABLE) {
 609                 for (; pp < pp1; pp++) {
 610                     if (*pp == '_')
 611                         *pd++ = ' ';
 612                     else if (*pp == '=' && hexval(pp[1]) >= 0 && hexval(pp[2]) >= 0) {
 613                         *pd++ = (hexval (pp[1]) << 4) | hexval (pp[2]);
 614                         pp += 2;
 615                     }
 616                     else
 617                         *pd++ = *pp;
 618                 }
 619                 *pd = 0;
 620             }
 621             else if (enc == ENCBASE64) {
 622                 int c, b = 0, k = 0;
 623
 624                 for (; pp < pp1; pp++) {
 625                     if (*pp == '=')
 626                         break;
 627                     if ((c = base64val(*pp)) < 0)
 628                         continue;
 629                     if (k + 6 >= 8) {
 630                         k -= 2;
 631                         *pd++ = b | (c >> k);
 632                         b = c << (8 - k);
 633                     }
 634                     else {
 635                         b |= c << (k + 2);
 636                         k += 6;
 637                     }
 638                 }
 639                 *pd = 0;
 640             }
 641             break;
 642         }
 643     }
 644
 645     if (charset)
 646         mutt_convert_string (&d0, charset, Charset, M_ICONV_HOOK_FROM);
 647     m_strcpy(d, len, d0);
 648     p_delete(&charset);
 649     p_delete(&d0);
 650     return (0);
 651 }
 652
 653 /*
 654  * Find the start and end of the first encoded word in the string.
 655  * We use the grammar in section 2 of RFC 2047, but the "encoding"
 656  * must be B or Q. Also, we don't require the encoded word to be
 657  * separated by linear-white-space (section 5(1)).
 658  */
 659 static const char *find_encoded_word (const char *s, const char **x)
 660 {
 661     const char *p, *q;
 662
 663     q = s;
 664     while ((p = strstr (q, "=?"))) {
 665         for (q = p + 2;
 666              0x20 < *q && *q < 0x7f && !strchr ("()<>@,;:\"/[]?.=", *q); q++);
 667         if (q[0] != '?' || !strchr ("BbQq", q[1]) || q[2] != '?')
 668             continue;
 669         for (q = q + 3; 0x20 <= *q && *q < 0x7f && *q != '?'; q++);
 670         if (q[0] != '?' || q[1] != '=') {
 671             --q;
 672             continue;
 673         }
 674
 675         *x = q + 2;
 676         return p;
 677     }
 678
 679     return 0;
 680 }
 681
 682 /* return length of linear white space */
 683 static size_t lwslen (const char *s, size_t n)
 684 {
 685     const char *p = s;
 686     size_t len = n;
 687
 688     if (n <= 0)
 689         return 0;
 690
 691     for (; p < s + n; p++)
 692         if (!strchr (" \t\r\n", *p)) {
 693             len = (size_t) (p - s);
 694             break;
 695         }
 696     if (strchr ("\r\n", *(p - 1)))        /* LWS doesn't end with CRLF */
 697         len = (size_t) 0;
 698     return len;
 699 }
 700
 701 /* return length of linear white space : reverse */
 702 static size_t lwsrlen (const char *s, size_t n)
 703 {
 704     const char *p = s + n - 1;
 705     size_t len = n;
 706
 707     if (n <= 0)
 708         return 0;
 709
 710     if (strchr ("\r\n", *p))      /* LWS doesn't end with CRLF */
 711         return (size_t) 0;
 712
 713     for (; p >= s; p--)
 714         if (!strchr (" \t\r\n", *p)) {
 715             len = (size_t) (s + n - 1 - p);
 716             break;
 717         }
 718     return len;
 719 }
 720
 721 /* try to decode anything that looks like a valid RFC2047 encoded
 722  * header field, ignoring RFC822 parsing rules
 723  */
 724 void rfc2047_decode (char **pd)
 725 {
 726     const char *p, *q;
 727     size_t m, n;
 728     int found_encoded = 0;
 729     char *d0, *d;
 730     const char *s = *pd;
 731     size_t dlen;
 732
 733     if (!s || !*s)
 734         return;
 735
 736     dlen = 4 * m_strlen(s);        /* should be enough */
 737     d = d0 = p_new(char, dlen + 1);
 738
 739     while (*s && dlen > 0) {
 740         if (!(p = find_encoded_word (s, &q))) {
 741             /* no encoded words */
 742             if (!option (OPTSTRICTMIME)) {
 743                 n = m_strlen(s);
 744                 if (found_encoded && (m = lwslen (s, n)) != 0) {
 745                     if (m != n)
 746                         *d = ' ', d++, dlen--;
 747                     n -= m, s += m;
 748                 }
 749                 if (ascii_strcasecmp (AssumedCharset, "us-ascii")) {
 750                     char *t;
 751                     size_t tlen;
 752
 753                     t = p_dupstr(s, n);
 754                     if (mutt_convert_nonmime_string (&t) == 0) {
 755                         tlen = m_strlen(t);
 756                         strncpy (d, t, tlen);
 757                         d += tlen;
 758                     }
 759                     else {
 760                         strncpy (d, s, n);
 761                         d += n;
 762                     }
 763                     p_delete(&t);
 764                     break;
 765                 }
 766             }
 767             strncpy (d, s, dlen);
 768             d += dlen;
 769             break;
 770         }
 771
 772         if (p != s) {
 773             n = (size_t) (p - s);
 774             /* ignore spaces between encoded words
 775              * and linear white spaces between encoded word and *text */
 776             if (!option (OPTSTRICTMIME)) {
 777                 if (found_encoded && (m = lwslen (s, n)) != 0) {
 778                     if (m != n)
 779                         *d = ' ', d++, dlen--;
 780                     n -= m, s += m;
 781                 }
 782
 783                 if ((m = n - lwsrlen (s, n)) != 0) {
 784                     if (m > dlen)
 785                         m = dlen;
 786                     memcpy (d, s, m);
 787                     d += m;
 788                     dlen -= m;
 789                     if (m != n)
 790                         *d = ' ', d++, dlen--;
 791                 }
 792             }
 793             else if (!found_encoded || strspn (s, " \t\r\n") != n) {
 794                 if (n > dlen)
 795                     n = dlen;
 796                 memcpy (d, s, n);
 797                 d += n;
 798                 dlen -= n;
 799             }
 800         }
 801
 802         rfc2047_decode_word (d, p, dlen);
 803         found_encoded = 1;
 804         s = q;
 805         n = m_strlen(d);
 806         dlen -= n;
 807         d += n;
 808     }
 809     *d = 0;
 810
 811     p_delete(pd);
 812     *pd = d0;
 813     str_adjust (pd);
 814 }
 815
 816 void rfc2047_decode_adrlist (address_t * a)
 817 {
 818     while (a) {
 819         if (a->personal)
 820             rfc2047_decode (&a->personal);
 821         a = a->next;
 822     }
 823 }
 824
 825 void rfc2047_decode_envelope (ENVELOPE* e) {
 826
 827     if (!e)
 828         return;
 829
 830     /* do RFC2047 decoding */
 831     rfc2047_decode_adrlist (e->from);
 832     rfc2047_decode_adrlist (e->to);
 833     rfc2047_decode_adrlist (e->cc);
 834     rfc2047_decode_adrlist (e->bcc);
 835     rfc2047_decode_adrlist (e->reply_to);
 836     rfc2047_decode_adrlist (e->mail_followup_to);
 837     rfc2047_decode_adrlist (e->return_path);
 838     rfc2047_decode_adrlist (e->sender);
 839
 840     if (e->subject) {
 841         rfc2047_decode (&e->subject);
 842         mutt_adjust_subject (e);
 843     }
 844 }