lib-mime/rfc822parse.c

   1 /*
   2  *  This program is free software; you can redistribute it and/or modify
   3  *  it under the terms of the GNU General Public License as published by
   4  *  the Free Software Foundation; either version 2 of the License, or (at
   5  *  your option) any later version.
   6  *
   7  *  This program is distributed in the hope that it will be useful, but
   8  *  WITHOUT ANY WARRANTY; without even the implied warranty of
   9  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  10  *  General Public License for more details.
  11  *
  12  *  You should have received a copy of the GNU General Public License
  13  *  along with this program; if not, write to the Free Software
  14  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  15  *  MA 02110-1301, USA.
  16  *
  17  *  Copyright © 2006 Pierre Habouzit
  18  */
  19
  20 /*
  21  * Copyright notice from original mutt:
  22  * Copyright (C) 1996-2000 Michael R. Elkins <me@mutt.org>
  23  *
  24  * This file is part of mutt-ng, see http://www.muttng.org/.
  25  * It's licensed under the GNU General Public License,
  26  * please see the file GPL in the top level source directory.
  27  */
  28
  29 #if HAVE_CONFIG_H
  30 # include "config.h"
  31 #endif
  32
  33 #include <stdio.h>
  34
  35 #include <lib-lib/lib-lib.h>
  36
  37 #include "recvattach.h"
  38
  39 #include "charset.h"
  40 #include "mime.h"
  41
  42 /* Reads an arbitrarily long header field, and looks ahead for continuation
  43  * lines.  ``line'' must point to a dynamically allocated string; it is
  44  * increased if more space is required to fit the whole line.
  45  */
  46 ssize_t mutt_read_rfc822_line(FILE *f, char **line, ssize_t *n)
  47 {
  48     ssize_t pos = 0;
  49
  50     for (;;) {
  51         char *p = *line;
  52
  53         /* end of file or end of headers */
  54         if (!fgets(p + pos, *n - pos, f) || (ISSPACE(*p) && pos == 0)) {
  55             *p = '\0';
  56             return 0;
  57         }
  58
  59         pos += m_strlen(p + pos);
  60         if (p[pos - 1] == '\n') {
  61             int c;
  62
  63             /* remove trailing spaces. safe: p[0] is not a space */
  64             do {
  65                 p[--pos] = '\0';
  66             } while (ISSPACE(p[pos]));
  67
  68             /* check to see if the next line is a continuation line */
  69             c = fgetc(f);
  70             if (c != ' ' && c != '\t') {
  71                 /* next line is a separate header field or EOH */
  72                 ungetc(c, f);
  73                 return pos;
  74             }
  75
  76             /* eat tabs and spaces from the beginning of the continuation line */
  77             do {
  78                 c = fgetc(f);
  79             } while (c == ' ' || c == '\t');
  80             ungetc(c, f);
  81
  82             /* string is still terminated because we removed at least one
  83                whitespace char above */
  84             p[pos++] = ' ';
  85         }
  86
  87         if (*n < pos + STRING) {
  88             /* grow the buffer */
  89             *n += STRING;
  90             p_realloc(line, *n);
  91         }
  92     }
  93 }
  94
  95 /* TODO: Make that a string list somehow */
  96 string_list_t *mutt_parse_references(char *s, int in_reply_to)
  97 {
  98     string_list_t *lst = NULL;
  99     int n = 0;
 100     char *o = NULL;
 101
 102     /* some mail clients add other garbage besides message-ids, so do a quick
 103      * check to make sure this looks like a valid message-id
 104      * some idiotic clients also break their message-ids between lines, deal
 105      * with that too (give up if it's more than two lines, though)
 106      */
 107
 108     for (s = strtok(s, " \t;"); s; s = strtok(NULL, " \t;")) {
 109         char *new = NULL;
 110
 111         if (*s == '<') {
 112             n = m_strlen(s);
 113             if (s[n - 1] != '>') {
 114                 o = s;
 115                 continue;
 116             }
 117
 118             new = m_strdup(s);
 119         } else if (o) {
 120             ssize_t m = m_strlen(s);
 121
 122             if (s[m - 1] != '>') {
 123                 o = NULL;
 124             } else {
 125                 new = p_new(char, n + m + 1);
 126                 strcpy(new, o);
 127                 strcpy(new + n, s);
 128             }
 129         }
 130
 131         /* make sure that this really does look like a message-id.
 132          * it should have exactly one @, and if we're looking at
 133          * an in-reply-to header, make sure that the part before
 134          * the @ has more than eight characters or it's probably
 135          * an email address
 136          */
 137         if (new) {
 138             char *at = strchr(new, '@');
 139             string_list_t *tmp;
 140
 141             if (!at || strchr(at + 1, '@') || (in_reply_to && at - new <= 8)) {
 142                 p_delete(&new);
 143                 continue;
 144             }
 145
 146             tmp = p_new(string_list_t, 1);
 147             tmp->data = new;
 148             tmp->next = lst;
 149             lst = tmp;
 150         }
 151     }
 152
 153     return lst;
 154 }
 155
 156 int mutt_check_encoding(const char *s)
 157 {
 158     int tok = mime_which_token(s, -1);
 159     switch (tok) {
 160       case MIME_7BIT:
 161         return ENC7BIT;
 162       case MIME_8BIT:
 163         return ENC8BIT;
 164       case MIME_BINARY:
 165         return ENCBINARY;
 166       case MIME_QUOTED_PRINTABLE:
 167         return ENCQUOTEDPRINTABLE;
 168       case MIME_BASE64:
 169         return ENCBASE64;
 170       case MIME_X_UUENCODE:
 171         return ENCUUENCODED;
 172       default:
 173         return ENCOTHER;
 174     }
 175 }
 176
 177 int mutt_check_mime_type(const char *s)
 178 {
 179     int tok;
 180
 181     if (!m_strcmp(s, "*") || !m_strcmp(s, ".*"))
 182         return TYPEANY;
 183
 184     tok = mime_which_token(s, -1);
 185     switch (tok) {
 186       case MIME_TEXT:        return TYPETEXT;
 187       case MIME_MULTIPART:   return TYPEMULTIPART;
 188       case MIME_APPLICATION: return TYPEAPPLICATION;
 189       case MIME_MESSAGE:     return TYPEMESSAGE;
 190       case MIME_IMAGE:       return TYPEIMAGE;
 191       case MIME_AUDIO:       return TYPEAUDIO;
 192       case MIME_VIDEO:       return TYPEVIDEO;
 193       case MIME_MODEL:       return TYPEMODEL;
 194       default:               return TYPEOTHER;
 195     }
 196 }
 197
 198 static parameter_t *parse_parameters(const char *s)
 199 {
 200     parameter_t *res = NULL;
 201     parameter_t **list = &res;
 202
 203     while (*s) {
 204         const char *p;
 205         parameter_t *new;
 206         int i;
 207
 208         s = skipspaces(s);
 209         if (*s == '=')             /* parameters are fucked up, go away */
 210             break;
 211
 212         p = strpbrk(s, "=;");
 213         if (!p)
 214             break;
 215
 216         if (*p == ';') {
 217             /* if we hit a ; now the parameter has no value, just skip it */
 218             s = p + 1;
 219             continue;
 220         }
 221
 222         i = p - s;
 223         new = parameter_new();
 224         new->attribute = p_dupstr(s, i);
 225
 226         while (--i >= 0 && ISSPACE(new->attribute[i])) {
 227             new->attribute[i] = '\0';
 228         }
 229         s = skipspaces(p + 1);                      /* skip over the = */
 230
 231         if (*s == '"') {
 232             char buffer[LONG_STRING];
 233             int state_ascii = 1;
 234
 235             s++;
 236             for (i = 0; *s && i < ssizeof(buffer) - 1; i++, s++) {
 237                 if (!option(OPTSTRICTMIME)) {
 238                     /* As iso-2022-* has a characer of '"' with non-ascii state,
 239                      * ignore it. */
 240                     if (*s == 0x1b && i < ssizeof(buffer) - 2) {
 241                         state_ascii = s[1] == '(' && (s[2] == 'B' || s[2] == 'J');
 242                     }
 243                 }
 244                 if (state_ascii && *s == '"')
 245                     break;
 246
 247                 if (*s == '\\') {
 248                     buffer[i] = *++s;
 249                 } else {
 250                     buffer[i] = *s;
 251                 }
 252             }
 253
 254             new->value = p_dupstr(buffer, i);
 255         } else {
 256             for (p = s; *p && *p != ' ' && *p != ';'; p++);
 257             new->value = p_dupstr(s, p - s);
 258         }
 259
 260         *list = new;
 261         list = &new->next;
 262
 263         s = strchr(s, ';');           /* Find the next parameter */
 264         if (!s)
 265             break;                    /* no more parameters */
 266     }
 267
 268     rfc2231_decode_parameters(&res);
 269     return res;
 270 }
 271
 272 void mutt_parse_content_type(char *s, BODY *ct)
 273 {
 274     char *pc;
 275     char *subtype;
 276
 277     p_delete(&ct->subtype);
 278     parameter_list_wipe(&ct->parameter);
 279
 280     /* First extract any existing parameters */
 281     if ((pc = strchr(s, ';')) != NULL) {
 282         *pc++ = '\0';
 283         ct->parameter = parse_parameters(vskipspaces(pc));
 284
 285         /* Some pre-RFC1521 gateways still use the "name=filename" convention,
 286          * but if a filename has already been set in the content-disposition,
 287          * let that take precedence, and don't set it here */
 288         pc = parameter_getval(ct->parameter, "name");
 289         if (pc && !ct->filename)
 290             ct->filename = m_strdup(pc);
 291     }
 292
 293     /* Now get the subtype */
 294     if ((subtype = strchr (s, '/'))) {
 295         *subtype++ = '\0';
 296         for (pc = subtype; *pc && !ISSPACE(*pc) && *pc != ';'; pc++);
 297         ct->subtype = p_dupstr(subtype, pc - subtype);
 298     }
 299
 300     /* Finally, get the major type */
 301     ct->type = mutt_check_mime_type(s);
 302
 303     if (ct->type == TYPEOTHER) {
 304         ct->xtype = m_strdup(s);
 305     }
 306
 307     if (!ct->subtype) {
 308         /* Some older non-MIME mailers (i.e., mailtool, elm) have a content-type
 309          * field, so we can attempt to convert the type to BODY here.
 310          */
 311         switch (ct->type) {
 312             char buffer[SHORT_STRING];
 313
 314           case TYPETEXT:
 315             ct->subtype = m_strdup("plain");
 316             break;
 317
 318           case TYPEAUDIO:
 319             ct->subtype = m_strdup("basic");
 320             break;
 321
 322           case TYPEMESSAGE:
 323             ct->subtype = m_strdup("rfc822");
 324             break;
 325
 326           case TYPEOTHER:
 327             ct->type = TYPEAPPLICATION;
 328             snprintf(buffer, sizeof(buffer), "x-%s", s);
 329             ct->subtype = m_strdup(buffer);
 330             break;
 331
 332           default:
 333             ct->subtype = m_strdup("x-unknown");
 334             break;
 335         }
 336     }
 337
 338     /* Default character set for text types. */
 339     if (ct->type == TYPETEXT) {
 340         pc = parameter_getval(ct->parameter, "charset");
 341         if (!pc) {
 342             parameter_setval(&ct->parameter, "charset",
 343                              option(OPTSTRICTMIME)
 344                              ? "us-ascii"
 345                              : charset_getfirst(AssumedCharset));
 346         }
 347     }
 348 }
 349
 350 static void parse_content_disposition(const char *s, BODY *ct)
 351 {
 352     if (!ascii_strncasecmp(s, "inline", 6)) {
 353         ct->disposition = DISPINLINE;
 354     } else if (!ascii_strncasecmp(s, "form-data", 9)) {
 355         ct->disposition = DISPFORMDATA;
 356     } else {
 357         ct->disposition = DISPATTACH;
 358     }
 359
 360     /* Check to see if a default filename was given */
 361     if ((s = strchr (s, ';'))) {
 362         parameter_t *parms = parse_parameters(vskipspaces(s));
 363
 364         if ((s = parameter_getval(parms, "filename")))
 365             m_strreplace(&ct->filename, s);
 366         if ((s = parameter_getval(parms, "name")))
 367             ct->form_name = m_strdup(s);
 368
 369         parameter_list_wipe(&parms);
 370     }
 371 }
 372
 373 /* args:
 374  *      fp      stream to read from
 375  *
 376  *      digest  1 if reading subparts of a multipart/digest, 0
 377  *              otherwise
 378  */
 379 BODY *mutt_read_mime_header(FILE *fp, int digest)
 380 {
 381     BODY *body = body_new();
 382     char *line = p_new(char, LONG_STRING);
 383     ssize_t linelen = LONG_STRING;
 384     char *p;
 385
 386     body->hdr_offset  = ftello(fp);
 387     body->encoding    = ENC7BIT;    /* default from RFC1521 */
 388     body->disposition = DISPINLINE;
 389     body->type        = digest ? TYPEMESSAGE : TYPETEXT;
 390
 391     while (mutt_read_rfc822_line(fp, &line, &linelen)) {
 392         /* Find the value of the current header */
 393         if ((p = strchr(line, ':'))) {
 394             *p++ = '\0';
 395             p = vskipspaces(p);
 396             if (!*p)
 397                 continue;
 398         } else {
 399             break;
 400         }
 401
 402         switch (mime_which_token(line, -1)) {
 403           case MIME_CONTENT_TYPE:
 404             mutt_parse_content_type (p, body);
 405             break;
 406
 407           case MIME_CONTENT_TRANSFER_ENCODING:
 408             body->encoding = mutt_check_encoding (p);
 409             break;
 410
 411           case MIME_CONTENT_DISPOSITION:
 412             parse_content_disposition(p, body);
 413             break;
 414
 415           case MIME_CONTENT_DESCRIPTION:
 416             m_strreplace(&body->description, p);
 417             rfc2047_decode(&body->description);
 418             break;
 419
 420           default: break;
 421         }
 422     }
 423
 424     body->offset = ftello(fp);       /* Mark the start of the real data */
 425     if (!body->subtype) {
 426         if (body->type == TYPETEXT)
 427             body->subtype = m_strdup("plain");
 428         if (body->type == TYPEMESSAGE)
 429             body->subtype = m_strdup("rfc822");
 430     }
 431
 432     p_delete(&line);
 433     return (body);
 434 }
 435
 436 void mutt_parse_part(FILE *fp, BODY *b)
 437 {
 438     char *bound = 0;
 439
 440     switch (b->type) {
 441       case TYPEMULTIPART:
 442         bound = parameter_getval(b->parameter, "boundary");
 443         fseeko(fp, b->offset, SEEK_SET);
 444         b->parts = mutt_parse_multipart(fp, bound, b->offset + b->length,
 445                                         mime_which_token(b->subtype, -1) == MIME_DIGEST);
 446         break;
 447
 448       case TYPEMESSAGE:
 449         if (b->subtype) {
 450             fseeko(fp, b->offset, SEEK_SET);
 451
 452             if (mutt_is_message_type(b->type, b->subtype)) {
 453                 b->parts = mutt_parse_messageRFC822(fp, b);
 454             } else
 455             if (mime_which_token(b->subtype, -1) == MIME_EXTERNAL_BODY) {
 456                 b->parts = mutt_read_mime_header(fp, 0);
 457             } else {
 458                 return;
 459             }
 460         }
 461         break;
 462
 463       default:
 464         return;
 465     }
 466
 467     /* try to recover from parsing error */
 468     if (!b->parts) {
 469         b->type = TYPETEXT;
 470         m_strreplace(&b->subtype, "plain");
 471     }
 472 }
 473
 474 /* parse a MESSAGE/RFC822 body
 475  *
 476  * args:
 477  *      fp              stream to read from
 478  *
 479  *      parent          structure which contains info about the message/rfc822
 480  *                      body part
 481  *
 482  * NOTE: this assumes that `parent->length' has been set!
 483  */
 484 BODY *mutt_parse_messageRFC822(FILE * fp, BODY * parent)
 485 {
 486     BODY *msg;
 487
 488     parent->hdr = header_new();
 489     parent->hdr->offset = ftello(fp);
 490     parent->hdr->env    = mutt_read_rfc822_header(fp, parent->hdr, 0, 0);
 491
 492     msg = parent->hdr->content;
 493
 494     /* ignore the length given in the content-length since it could be wrong
 495        and we already have the info to calculate the correct length */
 496     /* if (msg->length == -1) */
 497     /* if body of this message is empty, we can end up with a negative length */
 498     msg->length = MAX(0, parent->length - (msg->offset - parent->offset));
 499
 500     mutt_parse_part(fp, msg);
 501
 502     return msg;
 503 }
 504
 505 /* parse a multipart structure
 506  *
 507  * args:
 508  *      fp              stream to read from
 509  *
 510  *      bound           body separator
 511  *
 512  *      end_off         length of the multipart body (used when the final
 513  *                      boundary is missing to avoid reading too far)
 514  *
 515  *      digest          1 if reading a multipart/digest, 0 otherwise
 516  */
 517
 518 BODY *
 519 mutt_parse_multipart(FILE *fp, const char *bound, off_t end_off, int digest)
 520 {
 521     char buffer[LONG_STRING];
 522     BODY *head = NULL;
 523     BODY **last = &head;
 524     int blen = m_strlen(bound);
 525     int final = 0;                /* did we see the ending boundary? */
 526
 527     if (!blen) {
 528         mutt_error _("multipart message has no boundary parameter!");
 529         return NULL;
 530     }
 531
 532     while (ftello(fp) < end_off && fgets(buffer, sizeof(buffer), fp)) {
 533         int len, crlf, i;
 534
 535         len  = m_strlen(buffer);
 536         crlf = len > 1 && buffer[len - 2] == '\r';
 537
 538         if (buffer[0] == '-' && buffer[1] == '-'
 539         && !m_strncmp(buffer + 2, bound, blen))
 540         {
 541             if (*last) {
 542                 BODY *b = *last;
 543
 544                 /* if the body is empty, we can end up with a -1 length */
 545                 b->length = MAX(0, ftello(fp) - b->offset - len - 1 - crlf);
 546                 if (b->parts && b->parts->length == 0) {
 547                     b->parts->length = ftello(fp) - b->parts->offset
 548                                      - len - 1 - crlf;
 549                 }
 550             }
 551
 552             /* Remove any trailing whitespace, up to the length of the boundary */
 553             for (i = len - 1; ISSPACE(buffer[i]) && i >= blen + 2; i--)
 554                 buffer[i] = '\0';
 555
 556             /* Check for the end boundary */
 557             final = buffer[blen + 3] == '-' && buffer[blen + 4] == '-';
 558             if (final)
 559                 break;
 560
 561             if (buffer[2 + blen] == '\0') {
 562                 BODY *new = mutt_read_mime_header(fp, digest);
 563
 564                 /*
 565                  * Consistency checking - catch
 566                  * bad attachment end boundaries
 567                  */
 568
 569                 if (new->offset > end_off) {
 570                     body_list_wipe(&new);
 571                     break;
 572                 }
 573
 574                 if (*last)
 575                     last = &(*last)->next;
 576                 *last = new;
 577             }
 578         }
 579     }
 580
 581     /* in case of missing end boundary, set the length to something reasonable */
 582     if (*last && (*last)->length == 0 && !final)
 583         (*last)->length = end_off - (*last)->offset;
 584
 585     /* parse recursive MIME parts */
 586     {
 587         BODY *b;
 588         for (b = head; b; b = b->next)
 589             mutt_parse_part(fp, b);
 590     }
 591
 592     return (head);
 593 }
 594
 595 static const char *
 596 uncomment_timezone(char *buf, size_t buflen, const char *tz)
 597 {
 598     char *p;
 599
 600     if (*tz != '(')
 601         return tz;                  /* no need to do anything */
 602
 603     tz = vskipspaces(tz + 1);
 604     p = strpbrk(tz, " )");
 605     if (!p)
 606         return tz;
 607
 608     m_strncpy(buf, buflen, tz, p - tz);
 609     return buf;
 610 }
 611
 612 /* parses a date string in RFC822 format:
 613  *
 614  * Date: [ weekday , ] day-of-month month year hour:minute:second timezone
 615  *
 616  * This routine assumes that `h' has been initialized to 0.  the `timezone'
 617  * field is optional, defaulting to +0000 if missing.
 618  */
 619 time_t mutt_parse_date(const char *s, HEADER *h)
 620 {
 621     int zhours = 0, zminutes = 0, zoccident = 0;
 622     char scratch[SHORT_STRING];
 623     struct tm tm;
 624     int count = 0;
 625     char *p;
 626
 627     /* Don't modify our argument. Fixed-size buffer is ok here since
 628        the date format imposes a natural limit.  */
 629
 630     m_strcpy(scratch, sizeof(scratch), s);
 631
 632     /* kill the day of the week, if it exists. */
 633     p = strchr(scratch, ',');
 634     p = vskipspaces(p ? p + 1 : scratch);
 635
 636     p_clear(&tm, 1);
 637
 638     while ((p = strtok (p, " \t")) != NULL) {
 639         char tzstr[SHORT_STRING];
 640         const char *ptz;
 641
 642         switch (count) {
 643           case 0:                    /* day of the month */
 644             if (!isdigit((unsigned char)*p))
 645                 return -1;
 646             tm.tm_mday = atoi(p);
 647             if (tm.tm_mday > 31)
 648                 return -1;
 649             break;
 650
 651           case 1:                    /* month of the year */
 652             tm.tm_mon = mutt_check_month(p);
 653             if (tm.tm_mon < 0)
 654                 return -1;
 655             break;
 656
 657           case 2:                    /* year */
 658             tm.tm_year = atoi(p);
 659             if (tm.tm_year < 50)
 660                 tm.tm_year += 100;
 661             else if (tm.tm_year >= 1900)
 662                 tm.tm_year -= 1900;
 663             break;
 664
 665           case 3:                    /* time of day */
 666             tm.tm_hour = strtol(p, &p, 10);
 667             if (*p++ != ':')
 668                 return -1;
 669             tm.tm_min  = strtol(p, &p, 10);
 670             if (*p++ == ':') {
 671                 tm.tm_sec = strtol(p, &p, 10);
 672             } else {
 673                 tm.tm_sec = 0;
 674             }
 675             break;
 676
 677           case 4:                    /* timezone */
 678             /* sometimes we see things like (MST) or (-0700) so attempt to
 679              * compensate by uncommenting the string if non-RFC822 compliant
 680              */
 681             ptz = uncomment_timezone(tzstr, sizeof(tzstr), p);
 682
 683             if (*ptz == '+' || *ptz == '-') {
 684                 if (isdigit((unsigned char)ptz[1])
 685                 &&  isdigit((unsigned char)ptz[2])
 686                 &&  isdigit((unsigned char)ptz[3])
 687                 &&  isdigit((unsigned char)ptz[4]))
 688                 {
 689                     zoccident = ptz[0] == '-';
 690                     zhours    = (ptz[1] - '0') * 10 + (ptz[2] - '0');
 691                     zminutes  = (ptz[3] - '0') * 10 + (ptz[4] - '0');
 692                 }
 693             }
 694             break;
 695         }
 696         count++;
 697         p = NULL;
 698     }
 699
 700     if (count < 4) {  /* don't check for missing timezone */
 701         return -1;
 702     }
 703
 704     if (h) {
 705         h->zhours    = zhours;
 706         h->zminutes  = zminutes;
 707         h->zoccident = zoccident;
 708     }
 709
 710     return mutt_mktime(&tm, 0) + (zoccident ? 1 : -1) * (zhours * 3600 + zminutes * 60);
 711 }
 712
 713 string_list_t **mutt_parse_rfc822_line(ENVELOPE *e, HEADER *hdr, char *line, char *p,
 714                               short weed, short do_2047, string_list_t **user_hdrs)
 715 {
 716     switch (mime_which_token(line, -1)) {
 717       case MIME_APPARENTLY_FROM:
 718         e->from = rfc822_parse_adrlist (e->from, p);
 719         break;
 720
 721       case MIME_APPARENTLY_TO:
 722         e->to = rfc822_parse_adrlist (e->to, p);
 723         break;
 724
 725       case MIME_BCC:
 726         e->bcc = rfc822_parse_adrlist (e->bcc, p);
 727         break;
 728
 729       case MIME_CC:
 730         e->cc = rfc822_parse_adrlist (e->cc, p);
 731         break;
 732
 733       case MIME_CONTENT_DESCRIPTION:
 734         if (hdr) {
 735             m_strreplace(&hdr->content->description, p);
 736             rfc2047_decode(&hdr->content->description);
 737         }
 738         break;
 739
 740       case MIME_CONTENT_DISPOSITION:
 741         if (hdr)
 742             parse_content_disposition(p, hdr->content);
 743         break;
 744
 745       case MIME_CONTENT_LENGTH:
 746         if (hdr) {
 747             if ((hdr->content->length = atoi(p)) < 0)
 748                 hdr->content->length = -1;
 749         }
 750         break;
 751
 752       case MIME_CONTENT_TRANSFER_ENCODING:
 753         if (hdr)
 754             hdr->content->encoding = mutt_check_encoding(p);
 755         break;
 756
 757       case MIME_CONTENT_TYPE:
 758         if (hdr)
 759             mutt_parse_content_type (p, hdr->content);
 760         break;
 761
 762       case MIME_DATE:
 763         m_strreplace(&e->date, p);
 764         if (hdr)
 765             hdr->date_sent = mutt_parse_date (p, hdr);
 766         break;
 767
 768       case MIME_EXPIRES:
 769         if (hdr && mutt_parse_date (p, NULL) < time (NULL))
 770             hdr->expired = 1;
 771         break;
 772
 773 #ifdef USE_NNTP
 774       case MIME_FOLLOWUP_TO:
 775         if (!e->followup_to) {
 776             m_strrtrim(p);
 777             e->followup_to = m_strdup(skipspaces(p));
 778         }
 779         break;
 780 #endif
 781
 782       case MIME_FROM:
 783         e->from = rfc822_parse_adrlist(e->from, p);
 784         /* don't leave from info NULL if there's an invalid address (or
 785          * whatever) in From: field; mutt would just display it as empty
 786          * and mark mail/(esp.) news article as your own. aaargh! this
 787          * bothered me for _years_ */
 788         if (!e->from) {
 789             e->from = address_new();
 790             e->from->personal = m_strdup(p);
 791         }
 792         break;
 793
 794       case MIME_IN_REPLY_TO:
 795         string_list_wipe(&e->in_reply_to);
 796         e->in_reply_to = mutt_parse_references(p, 1);
 797         break;
 798
 799       case MIME_LINES:
 800         if (hdr) {
 801             /* HACK - mutt has, for a very short time, produced negative
 802                Lines header values.  Ignore them. */
 803             hdr->lines = MAX(0, atoi(p));
 804         }
 805         break;
 806
 807       case MIME_LIST_POST:
 808         /* RFC 2369.  FIXME: We should ignore whitespace, but don't. */
 809         if (strncmp(p, "NO", 2)) {
 810             char *beg, *end;
 811
 812             for (beg = strchr (p, '<'); beg; beg = strchr (end, ',')) {
 813                 ++beg;
 814                 if (!(end = strchr (beg, '>')))
 815                     break;
 816
 817                 /* Take the first mailto URL */
 818                 if (url_check_scheme (beg) == U_MAILTO) {
 819                     p_delete(&e->list_post);
 820                     e->list_post = p_dupstr(beg, end - beg);
 821                     break;
 822                 }
 823             }
 824         }
 825         break;
 826
 827       case MIME_MAIL_FOLLOWUP_TO:
 828         e->mail_followup_to = rfc822_parse_adrlist(e->mail_followup_to, p);
 829         break;
 830
 831       case MIME_MAIL_REPLY_TO:
 832         address_list_wipe(&e->reply_to);
 833         e->reply_to = rfc822_parse_adrlist(e->reply_to, p);
 834         break;
 835
 836       case MIME_MESSAGE_ID:
 837         {
 838             const char *beg, *end;
 839
 840             /* We add a new "Message-ID:" when building a message */
 841             p_delete(&e->message_id);
 842
 843             if ((beg = strchr(p, '<')) && (end = strchr(beg, '>')))
 844                 e->message_id = p_dupstr(beg, (end - beg) + 1);
 845         }
 846         break;
 847
 848       case MIME_MIME_VERSION:
 849         if (hdr)
 850             hdr->mime = 1;
 851         break;
 852
 853 #ifdef USE_NNTP
 854       case MIME_NEWSGROUPS:
 855         p_delete(&e->newsgroups);
 856         m_strrtrim(p);
 857         e->newsgroups = m_strdup(skipspaces(p));
 858         break;
 859 #endif
 860
 861       case MIME_ORGANIZATION:
 862         if (!e->organization && mime_which_token(p, -1) == MIME_UNKNOWN)
 863             e->organization = m_strdup(p);
 864         break;
 865
 866       case MIME_RECEIVED:
 867         if (hdr && !hdr->received) {
 868             char *d = strchr(p, ';');
 869             if (d)
 870                 hdr->received = mutt_parse_date(d + 1, NULL);
 871         }
 872         break;
 873
 874       case MIME_REFERENCES:
 875         string_list_wipe(&e->references);
 876         e->references = mutt_parse_references(p, 0);
 877         break;
 878
 879       case MIME_REPLY_TO:
 880         e->reply_to = rfc822_parse_adrlist(e->reply_to, p);
 881         break;
 882
 883       case MIME_RETURN_PATH:
 884         e->return_path = rfc822_parse_adrlist(e->return_path, p);
 885         break;
 886
 887       case MIME_SENDER:
 888         e->sender = rfc822_parse_adrlist (e->sender, p);
 889         break;
 890
 891       case MIME_STATUS:
 892         if (hdr) {
 893             while (*p) {
 894                 switch (*p) {
 895                   case 'r':
 896                     hdr->replied = 1;
 897                     break;
 898                   case 'O':
 899                     hdr->old = 1;
 900                     break;
 901                   case 'R':
 902                     hdr->read = 1;
 903                     break;
 904                 }
 905                 p++;
 906             }
 907         }
 908         break;
 909
 910       case MIME_SUBJECT:
 911         if (!e->subject)
 912             e->subject = m_strdup(p);
 913         break;
 914
 915       case MIME_SUPERCEDES:
 916       case MIME_SUPERSEDES:
 917         if (hdr)
 918             e->supersedes = m_strdup(p);
 919         break;
 920
 921       case MIME_TO:
 922         e->to = rfc822_parse_adrlist(e->to, p);
 923         break;
 924
 925 #ifdef USE_NNTP
 926       case MIME_X_COMMENT_TO:
 927         if (!e->x_comment_to)
 928             e->x_comment_to = m_strdup(p);
 929         break;
 930 #endif
 931
 932       case MIME_X_LABEL:
 933         e->x_label = m_strdup(p);
 934         break;
 935
 936 #ifdef USE_NNTP
 937       case MIME_XREF:
 938         if (!e->xref)
 939             e->xref = m_strdup(p);
 940         break;
 941 #endif
 942
 943       case MIME_X_STATUS:
 944         if (hdr) {
 945             while (*p) {
 946                 switch (*p) {
 947                   case 'A':
 948                     hdr->replied = 1;
 949                     break;
 950                   case 'D':
 951                     hdr->deleted = 1;
 952                     break;
 953                   case 'F':
 954                     hdr->flagged = 1;
 955                     break;
 956                   default:
 957                     break;
 958                 }
 959                 p++;
 960             }
 961         }
 962         break;
 963
 964       default:
 965         if (!user_hdrs)
 966             break;
 967
 968         /* restore the original line */
 969         line[m_strlen(line)] = ':';
 970
 971         if (weed && option(OPTWEED) && mutt_matches_ignore(line, Ignore)
 972         && !mutt_matches_ignore(line, UnIgnore)) {
 973             break;
 974         }
 975
 976         *user_hdrs = string_item_new();
 977         (*user_hdrs)->data = m_strdup(line);
 978         if (do_2047)
 979             rfc2047_decode(&(*user_hdrs)->data);
 980         return &(*user_hdrs)->next;
 981     }
 982
 983     return user_hdrs;
 984 }
 985
 986 /* mutt_read_rfc822_header() -- parses a RFC822 header
 987  *
 988  * Args:
 989  *
 990  * f            stream to read from
 991  *
 992  * hdr          header structure of current message (optional).
 993  *
 994  * user_hdrs    If set, store user headers.  Used for recall-message and
 995  *              postpone modes.
 996  *
 997  * weed         If this parameter is set and the user has activated the
 998  *              $weed option, honor the header weed list for user headers.
 999  *              Used for recall-message.
1000  *
1001  * Returns:     newly allocated envelope structure.  You should free it by
1002  *              envelope_delete() when envelope stay unneeded.
1003  */
1004 ENVELOPE *
1005 mutt_read_rfc822_header(FILE *f, HEADER *hdr, short user_hdrs, short weed)
1006 {
1007     ENVELOPE *e = envelope_new();
1008     string_list_t **last = user_hdrs ? &e->userhdrs : NULL;
1009
1010     char *line = p_new(char, LONG_STRING);
1011     ssize_t linelen = LONG_STRING;
1012     off_t loc;
1013
1014     if (hdr && !hdr->content) {
1015         hdr->content = body_new();
1016
1017         /* set the defaults from RFC1521 */
1018         hdr->content->type     = TYPETEXT;
1019         hdr->content->subtype  = m_strdup("plain");
1020         hdr->content->encoding = ENC7BIT;
1021         hdr->content->length   = -1;
1022
1023         /* RFC 2183 says this is arbitrary */
1024         hdr->content->disposition = DISPINLINE;
1025     }
1026
1027     while ((loc = ftello(f)),
1028            mutt_read_rfc822_line(f, &line, &linelen))
1029     {
1030         char buf[LONG_STRING + 1] = "";
1031         char *p;
1032
1033         p = strpbrk(line, ": \t");
1034         if (!p || *p != ':') {
1035             char return_path[LONG_STRING];
1036             time_t t;
1037
1038             /* some bogus MTAs will quote the original "From " line */
1039             if (!m_strncmp(">From ", line, 6))
1040                 continue;               /* just ignore */
1041
1042             if (is_from(line, return_path, sizeof(return_path), &t)) {
1043                 /* MH somtimes has the From_ line in the middle of the header! */
1044                 if (hdr && !hdr->received)
1045                     hdr->received = t - mutt_local_tz(t);
1046                 continue;
1047             }
1048
1049             fseeko(f, loc, 0);
1050             break;                    /* end of header */
1051         }
1052
1053         if (mutt_match_spam_list(line, SpamList, buf, sizeof(buf))) {
1054             if (!rx_list_match(NoSpamList, line)) {
1055                 /* if spam tag already exists, figure out how to amend it */
1056                 if (e->spam && *buf) {
1057                     if (SpamSep) {
1058                         /* If SpamSep defined, append with separator */
1059                         mutt_buffer_addstr(e->spam, SpamSep);
1060                         mutt_buffer_addstr(e->spam, buf);
1061                     } else {
1062                         /* else overwrite */
1063                         mutt_buffer_reset(e->spam);
1064                         mutt_buffer_addstr(e->spam, buf);
1065                     }
1066                 }
1067                 else if (!e->spam && *buf) {
1068                     /* spam tag is new, and match expr is non-empty; copy */
1069                     e->spam = mutt_buffer_from(NULL, buf);
1070                 }
1071                 else if (!e->spam) {
1072                     /* match expr is empty; plug in null string if no existing tag */
1073                     e->spam = mutt_buffer_from(NULL, "");
1074                 }
1075             }
1076         }
1077
1078         *p++ = '\0';
1079         p = vskipspaces(p);
1080         if (!*p)
1081             continue;                 /* skip empty header fields */
1082
1083         last = mutt_parse_rfc822_line(e, hdr, line, p, weed, 1, last);
1084     }
1085
1086     p_delete(&line);
1087
1088     if (hdr) {
1089         hdr->content->hdr_offset = hdr->offset;
1090         hdr->content->offset     = ftello(f);
1091         rfc2047_decode_envelope(e);
1092         /* check for missing or invalid date */
1093         if (hdr->date_sent <= 0) {
1094             hdr->date_sent = hdr->received;
1095         }
1096     }
1097
1098     return e;
1099 }
1100
1101 /* Compares mime types to the ok and except lists */
1102 static int count_body_parts_check(string_list_t **checklist, BODY *b)
1103 {
1104     string_list_t *type;
1105
1106     for (type = *checklist; type; type = type->next) {
1107         ATTACH_MATCH *a = (ATTACH_MATCH *)type->data;
1108
1109         if ((a->major_int == TYPEANY || a->major_int == b->type)
1110         &&  !regexec(&a->minor_rx, b->subtype, 0, NULL, 0)) {
1111             return 1;
1112         }
1113     }
1114
1115     return 0;
1116 }
1117
1118 static int count_body_parts (BODY *body, int flags)
1119 {
1120     int count = 0;
1121     BODY *bp;
1122
1123     if (!body)
1124         return 0;
1125
1126     for (bp = body; bp != NULL; bp = bp->next) {
1127         /* Initial disposition is to count and not to recurse this part. */
1128         int shallcount, shallrecurse, iscontainer;
1129         int tok = mime_which_token(bp->subtype, -1);
1130
1131         iscontainer  = bp->type == TYPEMESSAGE || bp->type == TYPEMULTIPART;
1132
1133         /* don't recurse in external bodies or multipart/alternatives */
1134         shallrecurse = (bp->type == TYPEMESSAGE && tok != MIME_EXTERNAL_BODY)
1135                     || (bp->type == TYPEMULTIPART && tok != MIME_ALTERNATIVE);
1136
1137         /* Don't count top level containers and fundamental inlines */
1138         shallcount   = !(iscontainer && (flags & M_PARTS_TOPLEVEL))
1139                     && !(!iscontainer && bp->disposition == DISPINLINE && bp == body);
1140
1141         if (shallcount) {
1142             /* Turn off shallcount if message type is not in ok list,
1143              * or if it is in except list. Check is done separately for
1144              * inlines vs. attachments.
1145              */
1146
1147             if (bp->disposition == DISPATTACH) {
1148                 if (!count_body_parts_check(&AttachAllow, bp))
1149                     shallcount = 0;
1150                 if (count_body_parts_check(&AttachExclude, bp))
1151                     shallcount = 0;
1152             } else {
1153                 if (!count_body_parts_check(&InlineAllow, bp))
1154                     shallcount = 0;
1155                 if (count_body_parts_check(&InlineExclude, bp))
1156                     shallcount = 0;
1157             }
1158         }
1159
1160         bp->attach_qualifies = shallcount;
1161         count += shallcount;
1162
1163         if (shallrecurse) {
1164             bp->attach_count = count_body_parts(bp->parts,
1165                                                 flags & ~M_PARTS_TOPLEVEL);
1166             count += bp->attach_count;
1167         }
1168     }
1169
1170     return count;
1171 }
1172
1173 int mutt_count_body_parts(HEADER *hdr, int flags)
1174 {
1175     if (!option(OPTCOUNTATTACH))
1176         return 0;
1177
1178     if (hdr->attach_valid && !(flags & M_PARTS_RECOUNT))
1179         return hdr->attach_total;
1180
1181     if (AttachAllow || AttachExclude || InlineAllow || InlineExclude)
1182         hdr->attach_total = count_body_parts(hdr->content,
1183                                              flags | M_PARTS_TOPLEVEL);
1184     else
1185         hdr->attach_total = 0;
1186
1187     hdr->attach_valid = 1;
1188     return hdr->attach_total;
1189 }