lib-mime/rfc822parse.c

   1 /*
   2  *  This program is free software; you can redistribute it and/or modify
   3  *  it under the terms of the GNU General Public License as published by
   4  *  the Free Software Foundation; either version 2 of the License, or (at
   5  *  your option) any later version.
   6  *
   7  *  This program is distributed in the hope that it will be useful, but
   8  *  WITHOUT ANY WARRANTY; without even the implied warranty of
   9  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  10  *  General Public License for more details.
  11  *
  12  *  You should have received a copy of the GNU General Public License
  13  *  along with this program; if not, write to the Free Software
  14  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  15  *  MA 02110-1301, USA.
  16  *
  17  *  Copyright © 2006 Pierre Habouzit
  18  */
  19
  20 /*
  21  * Copyright notice from original mutt:
  22  * Copyright (C) 1996-2000 Michael R. Elkins <me@mutt.org>
  23  *
  24  * This file is part of mutt-ng, see http://www.muttng.org/.
  25  * It's licensed under the GNU General Public License,
  26  * please see the file GPL in the top level source directory.
  27  */
  28
  29 #if HAVE_CONFIG_H
  30 # include "config.h"
  31 #endif
  32
  33 #include <stdio.h>
  34
  35 #include <lib-lib/mem.h>
  36 #include <lib-lib/str.h>
  37 #include <lib-lib/ascii.h>
  38 #include <lib-lib/macros.h>
  39 #include <lib-lib/buffer.h>
  40 #include <lib-lib/date.h>
  41 #include <lib-lib/url.h>
  42
  43 #include "recvattach.h"
  44
  45 #include "charset.h"
  46 #include "mime.h"
  47
  48 /* Reads an arbitrarily long header field, and looks ahead for continuation
  49  * lines.  ``line'' must point to a dynamically allocated string; it is
  50  * increased if more space is required to fit the whole line.
  51  */
  52 ssize_t mutt_read_rfc822_line(FILE *f, char **line, ssize_t *n)
  53 {
  54     ssize_t pos = 0;
  55
  56     for (;;) {
  57         char *p = *line;
  58
  59         /* end of file or end of headers */
  60         if (!fgets(p + pos, *n - pos, f) || (ISSPACE(*p) && pos == 0)) {
  61             *p = '\0';
  62             return 0;
  63         }
  64
  65         pos += m_strlen(p + pos);
  66         if (p[pos - 1] == '\n') {
  67             int c;
  68
  69             /* remove trailing spaces. safe: p[0] is not a space */
  70             do {
  71                 p[--pos] = '\0';
  72             } while (ISSPACE(p[pos]));
  73
  74             /* check to see if the next line is a continuation line */
  75             c = fgetc(f);
  76             if (c != ' ' && c != '\t') {
  77                 /* next line is a separate header field or EOH */
  78                 ungetc(c, f);
  79                 return pos;
  80             }
  81
  82             /* eat tabs and spaces from the beginning of the continuation line */
  83             do {
  84                 c = fgetc(f);
  85             } while (c == ' ' || c == '\t');
  86             ungetc(c, f);
  87
  88             /* string is still terminated because we removed at least one
  89                whitespace char above */
  90             p[pos++] = ' ';
  91         }
  92
  93         if (*n < pos + STRING) {
  94             /* grow the buffer */
  95             *n += STRING;
  96             p_realloc(line, *n);
  97         }
  98     }
  99 }
 100
 101 /* TODO: Make that a string list somehow */
 102 string_list_t *mutt_parse_references(char *s, int in_reply_to)
 103 {
 104     string_list_t *lst = NULL;
 105     int n = 0;
 106     char *o = NULL;
 107
 108     /* some mail clients add other garbage besides message-ids, so do a quick
 109      * check to make sure this looks like a valid message-id
 110      * some idiotic clients also break their message-ids between lines, deal
 111      * with that too (give up if it's more than two lines, though)
 112      */
 113
 114     for (s = strtok(s, " \t;"); s; s = strtok(NULL, " \t;")) {
 115         char *new = NULL;
 116
 117         if (*s == '<') {
 118             n = m_strlen(s);
 119             if (s[n - 1] != '>') {
 120                 o = s;
 121                 continue;
 122             }
 123
 124             new = m_strdup(s);
 125         } else if (o) {
 126             ssize_t m = m_strlen(s);
 127
 128             if (s[m - 1] != '>') {
 129                 o = NULL;
 130             } else {
 131                 new = p_new(char, n + m + 1);
 132                 strcpy(new, o);
 133                 strcpy(new + n, s);
 134             }
 135         }
 136
 137         /* make sure that this really does look like a message-id.
 138          * it should have exactly one @, and if we're looking at
 139          * an in-reply-to header, make sure that the part before
 140          * the @ has more than eight characters or it's probably
 141          * an email address
 142          */
 143         if (new) {
 144             char *at = strchr(new, '@');
 145             string_list_t *tmp;
 146
 147             if (!at || strchr(at + 1, '@') || (in_reply_to && at - new <= 8)) {
 148                 p_delete(&new);
 149                 continue;
 150             }
 151
 152             tmp = p_new(string_list_t, 1);
 153             tmp->data = new;
 154             tmp->next = lst;
 155             lst = tmp;
 156         }
 157     }
 158
 159     return lst;
 160 }
 161
 162 int mutt_check_encoding(const char *s)
 163 {
 164     int tok = mime_which_token(s, -1);
 165     switch (tok) {
 166       case MIME_7BIT:
 167         return ENC7BIT;
 168       case MIME_8BIT:
 169         return ENC8BIT;
 170       case MIME_BINARY:
 171         return ENCBINARY;
 172       case MIME_QUOTED_PRINTABLE:
 173         return ENCQUOTEDPRINTABLE;
 174       case MIME_BASE64:
 175         return ENCBASE64;
 176       case MIME_X_UUENCODE:
 177         return ENCUUENCODED;
 178       default:
 179         return ENCOTHER;
 180     }
 181 }
 182
 183 int mutt_check_mime_type(const char *s)
 184 {
 185     int tok;
 186
 187     if (!m_strcmp(s, "*") || !m_strcmp(s, ".*"))
 188         return TYPEANY;
 189
 190     tok = mime_which_token(s, -1);
 191     switch (tok) {
 192       case MIME_TEXT:        return TYPETEXT;
 193       case MIME_MULTIPART:   return TYPEMULTIPART;
 194       case MIME_APPLICATION: return TYPEAPPLICATION;
 195       case MIME_MESSAGE:     return TYPEMESSAGE;
 196       case MIME_IMAGE:       return TYPEIMAGE;
 197       case MIME_AUDIO:       return TYPEAUDIO;
 198       case MIME_VIDEO:       return TYPEVIDEO;
 199       case MIME_MODEL:       return TYPEMODEL;
 200       default:               return TYPEOTHER;
 201     }
 202 }
 203
 204 static PARAMETER *parse_parameters(const char *s)
 205 {
 206     PARAMETER *res = NULL;
 207     PARAMETER **list = &res;
 208
 209     while (*s) {
 210         const char *p;
 211         PARAMETER *new;
 212         int i;
 213
 214         s = skipspaces(s);
 215         if (*s == '=')             /* parameters are fucked up, go away */
 216             break;
 217
 218         p = strpbrk(s, "=;");
 219         if (!p)
 220             break;
 221
 222         if (*p == ';') {
 223             /* if we hit a ; now the parameter has no value, just skip it */
 224             s = p + 1;
 225             continue;
 226         }
 227
 228         i = p - s;
 229         new = parameter_new();
 230         new->attribute = p_dupstr(s, i);
 231
 232         while (--i >= 0 && ISSPACE(new->attribute[i])) {
 233             new->attribute[i] = '\0';
 234         }
 235         s = skipspaces(p + 1);                      /* skip over the = */
 236
 237         if (*s == '"') {
 238             char buffer[LONG_STRING];
 239             int state_ascii = 1;
 240
 241             s++;
 242             for (i = 0; *s && i < ssizeof(buffer) - 1; i++, s++) {
 243                 if (!option(OPTSTRICTMIME)) {
 244                     /* As iso-2022-* has a characer of '"' with non-ascii state,
 245                      * ignore it. */
 246                     if (*s == 0x1b && i < ssizeof(buffer) - 2) {
 247                         state_ascii = s[1] == '(' && (s[2] == 'B' || s[2] == 'J');
 248                     }
 249                 }
 250                 if (state_ascii && *s == '"')
 251                     break;
 252
 253                 if (*s == '\\') {
 254                     buffer[i] = *++s;
 255                 } else {
 256                     buffer[i] = *s;
 257                 }
 258             }
 259
 260             new->value = p_dupstr(buffer, i);
 261         } else {
 262             for (p = s; *p && *p != ' ' && *p != ';'; p++);
 263             new->value = p_dupstr(s, p - s);
 264         }
 265
 266         *list = new;
 267         list = &new->next;
 268
 269         s = strchr(s, ';');           /* Find the next parameter */
 270         if (!s)
 271             break;                    /* no more parameters */
 272     }
 273
 274     rfc2231_decode_parameters(&res);
 275     return res;
 276 }
 277
 278 void mutt_parse_content_type(char *s, BODY *ct)
 279 {
 280     char *pc;
 281     char *subtype;
 282
 283     p_delete(&ct->subtype);
 284     parameter_list_wipe(&ct->parameter);
 285
 286     /* First extract any existing parameters */
 287     if ((pc = strchr(s, ';')) != NULL) {
 288         *pc++ = '\0';
 289         ct->parameter = parse_parameters(vskipspaces(pc));
 290
 291         /* Some pre-RFC1521 gateways still use the "name=filename" convention,
 292          * but if a filename has already been set in the content-disposition,
 293          * let that take precedence, and don't set it here */
 294         pc = mutt_get_parameter("name", ct->parameter);
 295         if (pc && !ct->filename)
 296             ct->filename = m_strdup(pc);
 297     }
 298
 299     /* Now get the subtype */
 300     if ((subtype = strchr (s, '/'))) {
 301         *subtype++ = '\0';
 302         for (pc = subtype; *pc && !ISSPACE(*pc) && *pc != ';'; pc++);
 303         ct->subtype = p_dupstr(subtype, pc - subtype);
 304     }
 305
 306     /* Finally, get the major type */
 307     ct->type = mutt_check_mime_type(s);
 308
 309     if (ct->type == TYPEOTHER) {
 310         ct->xtype = m_strdup(s);
 311     }
 312
 313     if (!ct->subtype) {
 314         /* Some older non-MIME mailers (i.e., mailtool, elm) have a content-type
 315          * field, so we can attempt to convert the type to BODY here.
 316          */
 317         switch (ct->type) {
 318             char buffer[SHORT_STRING];
 319
 320           case TYPETEXT:
 321             ct->subtype = m_strdup("plain");
 322             break;
 323
 324           case TYPEAUDIO:
 325             ct->subtype = m_strdup("basic");
 326             break;
 327
 328           case TYPEMESSAGE:
 329             ct->subtype = m_strdup("rfc822");
 330             break;
 331
 332           case TYPEOTHER:
 333             ct->type = TYPEAPPLICATION;
 334             snprintf(buffer, sizeof(buffer), "x-%s", s);
 335             ct->subtype = m_strdup(buffer);
 336             break;
 337
 338           default:
 339             ct->subtype = m_strdup("x-unknown");
 340             break;
 341         }
 342     }
 343
 344     /* Default character set for text types. */
 345     if (ct->type == TYPETEXT) {
 346         pc = mutt_get_parameter("charset", ct->parameter);
 347         if (!pc) {
 348             mutt_set_parameter("charset",
 349                                option(OPTSTRICTMIME) ? "us-ascii" :
 350                                mutt_get_first_charset(AssumedCharset),
 351                                &ct->parameter);
 352         }
 353     }
 354 }
 355
 356 static void parse_content_disposition(char *s, BODY *ct)
 357 {
 358     if (!ascii_strncasecmp(s, "inline", 6)) {
 359         ct->disposition = DISPINLINE;
 360     } else if (!ascii_strncasecmp(s, "form-data", 9)) {
 361         ct->disposition = DISPFORMDATA;
 362     } else {
 363         ct->disposition = DISPATTACH;
 364     }
 365
 366     /* Check to see if a default filename was given */
 367     if ((s = strchr (s, ';'))) {
 368         PARAMETER *parms = parse_parameters(vskipspaces(s));
 369
 370         if ((s = mutt_get_parameter("filename", parms)))
 371             m_strreplace(&ct->filename, s);
 372         if ((s = mutt_get_parameter ("name", parms)))
 373             ct->form_name = m_strdup(s);
 374
 375         parameter_list_wipe(&parms);
 376     }
 377 }
 378
 379 /* args:
 380  *      fp      stream to read from
 381  *
 382  *      digest  1 if reading subparts of a multipart/digest, 0
 383  *              otherwise
 384  */
 385 BODY *mutt_read_mime_header(FILE *fp, int digest)
 386 {
 387     BODY *body = mutt_new_body ();
 388     char *line = p_new(char, LONG_STRING);
 389     ssize_t linelen = LONG_STRING;
 390     char *p;
 391
 392     body->hdr_offset  = ftello(fp);
 393     body->encoding    = ENC7BIT;    /* default from RFC1521 */
 394     body->disposition = DISPINLINE;
 395     body->type        = digest ? TYPEMESSAGE : TYPETEXT;
 396
 397     while (mutt_read_rfc822_line(fp, &line, &linelen)) {
 398         /* Find the value of the current header */
 399         if ((p = strchr(line, ':'))) {
 400             *p++ = '\0';
 401             p = vskipspaces(p);
 402             if (!*p)
 403                 continue;
 404         } else {
 405             break;
 406         }
 407
 408         switch (mime_which_token(line, -1)) {
 409           case MIME_CONTENT_TYPE:
 410             mutt_parse_content_type (p, body);
 411             break;
 412
 413           case MIME_CONTENT_TRANSFER_ENCODING:
 414             body->encoding = mutt_check_encoding (p);
 415             break;
 416
 417           case MIME_CONTENT_DISPOSITION:
 418             parse_content_disposition(p, body);
 419             break;
 420
 421           case MIME_CONTENT_DESCRIPTION:
 422             m_strreplace(&body->description, p);
 423             rfc2047_decode(&body->description);
 424             break;
 425
 426           default: break;
 427         }
 428     }
 429
 430     body->offset = ftello(fp);       /* Mark the start of the real data */
 431     if (!body->subtype) {
 432         if (body->type == TYPETEXT)
 433             body->subtype = m_strdup("plain");
 434         if (body->type == TYPEMESSAGE)
 435             body->subtype = m_strdup("rfc822");
 436     }
 437
 438     p_delete(&line);
 439     return (body);
 440 }
 441
 442 void mutt_parse_part(FILE *fp, BODY *b)
 443 {
 444     char *bound = 0;
 445
 446     switch (b->type) {
 447       case TYPEMULTIPART:
 448         bound = mutt_get_parameter("boundary", b->parameter);
 449         fseeko(fp, b->offset, SEEK_SET);
 450         b->parts = mutt_parse_multipart(fp, bound, b->offset + b->length,
 451                                         mime_which_token(b->subtype, -1) == MIME_DIGEST);
 452         break;
 453
 454       case TYPEMESSAGE:
 455         if (b->subtype) {
 456             fseeko(fp, b->offset, SEEK_SET);
 457
 458             if (mutt_is_message_type(b->type, b->subtype)) {
 459                 b->parts = mutt_parse_messageRFC822(fp, b);
 460             } else
 461             if (mime_which_token(b->subtype, -1) == MIME_EXTERNAL_BODY) {
 462                 b->parts = mutt_read_mime_header(fp, 0);
 463             } else {
 464                 return;
 465             }
 466         }
 467         break;
 468
 469       default:
 470         return;
 471     }
 472
 473     /* try to recover from parsing error */
 474     if (!b->parts) {
 475         b->type = TYPETEXT;
 476         m_strreplace(&b->subtype, "plain");
 477     }
 478 }
 479
 480 /* parse a MESSAGE/RFC822 body
 481  *
 482  * args:
 483  *      fp              stream to read from
 484  *
 485  *      parent          structure which contains info about the message/rfc822
 486  *                      body part
 487  *
 488  * NOTE: this assumes that `parent->length' has been set!
 489  */
 490 BODY *mutt_parse_messageRFC822(FILE * fp, BODY * parent)
 491 {
 492     BODY *msg;
 493
 494     parent->hdr = header_new();
 495     parent->hdr->offset = ftello(fp);
 496     parent->hdr->env    = mutt_read_rfc822_header(fp, parent->hdr, 0, 0);
 497
 498     msg = parent->hdr->content;
 499
 500     /* ignore the length given in the content-length since it could be wrong
 501        and we already have the info to calculate the correct length */
 502     /* if (msg->length == -1) */
 503     /* if body of this message is empty, we can end up with a negative length */
 504     msg->length = MAX(0, parent->length - (msg->offset - parent->offset));
 505
 506     mutt_parse_part(fp, msg);
 507
 508     return msg;
 509 }
 510
 511 /* parse a multipart structure
 512  *
 513  * args:
 514  *      fp              stream to read from
 515  *
 516  *      bound           body separator
 517  *
 518  *      end_off         length of the multipart body (used when the final
 519  *                      boundary is missing to avoid reading too far)
 520  *
 521  *      digest          1 if reading a multipart/digest, 0 otherwise
 522  */
 523
 524 BODY *
 525 mutt_parse_multipart(FILE *fp, const char *bound, off_t end_off, int digest)
 526 {
 527     char buffer[LONG_STRING];
 528     BODY *head = NULL;
 529     BODY **last = &head;
 530     int blen = m_strlen(bound);
 531     int final = 0;                /* did we see the ending boundary? */
 532
 533     if (!blen) {
 534         mutt_error _("multipart message has no boundary parameter!");
 535         return NULL;
 536     }
 537
 538     while (ftello(fp) < end_off && fgets(buffer, sizeof(buffer), fp)) {
 539         int len, crlf, i;
 540
 541         len  = m_strlen(buffer);
 542         crlf = len > 1 && buffer[len - 2] == '\r';
 543
 544         if (buffer[0] == '-' && buffer[1] == '-'
 545         && !m_strncmp(buffer + 2, bound, blen))
 546         {
 547             if (*last) {
 548                 BODY *b = *last;
 549
 550                 /* if the body is empty, we can end up with a -1 length */
 551                 b->length = MAX(0, ftello(fp) - b->offset - len - 1 - crlf);
 552                 if (b->parts && b->parts->length == 0) {
 553                     b->parts->length = ftello(fp) - b->parts->offset
 554                                      - len - 1 - crlf;
 555                 }
 556             }
 557
 558             /* Remove any trailing whitespace, up to the length of the boundary */
 559             for (i = len - 1; ISSPACE(buffer[i]) && i >= blen + 2; i--)
 560                 buffer[i] = '\0';
 561
 562             /* Check for the end boundary */
 563             final = buffer[blen + 3] == '-' && buffer[blen + 4] == '-';
 564             if (final)
 565                 break;
 566
 567             if (buffer[2 + blen] == '\0') {
 568                 BODY *new = mutt_read_mime_header(fp, digest);
 569
 570                 /*
 571                  * Consistency checking - catch
 572                  * bad attachment end boundaries
 573                  */
 574
 575                 if (new->offset > end_off) {
 576                     mutt_free_body(&new);
 577                     break;
 578                 }
 579
 580                 if (*last)
 581                     last = &(*last)->next;
 582                 *last = new;
 583             }
 584         }
 585     }
 586
 587     /* in case of missing end boundary, set the length to something reasonable */
 588     if (*last && (*last)->length == 0 && !final)
 589         (*last)->length = end_off - (*last)->offset;
 590
 591     /* parse recursive MIME parts */
 592     {
 593         BODY *b;
 594         for (b = head; b; b = b->next)
 595             mutt_parse_part(fp, b);
 596     }
 597
 598     return (head);
 599 }
 600
 601 static const char *
 602 uncomment_timezone(char *buf, size_t buflen, const char *tz)
 603 {
 604     char *p;
 605
 606     if (*tz != '(')
 607         return tz;                  /* no need to do anything */
 608
 609     tz = vskipspaces(tz + 1);
 610     p = strpbrk(tz, " )");
 611     if (!p)
 612         return tz;
 613
 614     m_strncpy(buf, buflen, tz, p - tz);
 615     return buf;
 616 }
 617
 618 static struct tz_t {
 619     char tzname[5];
 620     unsigned char zhours;
 621     unsigned char zminutes;
 622     unsigned char zoccident;      /* west of UTC? */
 623 } TimeZones[] = {
 624     {"aat", 1, 0, 1},             /* Atlantic Africa Time */
 625     {"adt", 4, 0, 0},             /* Arabia DST */
 626     {"ast", 3, 0, 0},             /* Arabia */
 627     /*{ "ast",   4,  0, 1 }, *//* Atlantic */
 628     {"bst", 1, 0, 0},             /* British DST */
 629     {"cat", 1, 0, 0},             /* Central Africa */
 630     {"cdt", 5, 0, 1},
 631     {"cest", 2, 0, 0},            /* Central Europe DST */
 632     {"cet", 1, 0, 0},             /* Central Europe */
 633     {"cst", 6, 0, 1},
 634     /*{ "cst",   8,  0, 0 }, *//* China */
 635     /*{ "cst",   9, 30, 0 }, *//* Australian Central Standard Time */
 636     {"eat", 3, 0, 0},             /* East Africa */
 637     {"edt", 4, 0, 1},
 638     {"eest", 3, 0, 0},            /* Eastern Europe DST */
 639     {"eet", 2, 0, 0},             /* Eastern Europe */
 640     {"egst", 0, 0, 0},            /* Eastern Greenland DST */
 641     {"egt", 1, 0, 1},             /* Eastern Greenland */
 642     {"est", 5, 0, 1},
 643     {"gmt", 0, 0, 0},
 644     {"gst", 4, 0, 0},             /* Presian Gulf */
 645     {"hkt", 8, 0, 0},             /* Hong Kong */
 646     {"ict", 7, 0, 0},             /* Indochina */
 647     {"idt", 3, 0, 0},             /* Israel DST */
 648     {"ist", 2, 0, 0},             /* Israel */
 649     /*{ "ist",   5, 30, 0 }, *//* India */
 650     {"jst", 9, 0, 0},             /* Japan */
 651     {"kst", 9, 0, 0},             /* Korea */
 652     {"mdt", 6, 0, 1},
 653     {"met", 1, 0, 0},             /* this is now officially CET */
 654     {"msd", 4, 0, 0},             /* Moscow DST */
 655     {"msk", 3, 0, 0},             /* Moscow */
 656     {"mst", 7, 0, 1},
 657     {"nzdt", 13, 0, 0},           /* New Zealand DST */
 658     {"nzst", 12, 0, 0},           /* New Zealand */
 659     {"pdt", 7, 0, 1},
 660     {"pst", 8, 0, 1},
 661     {"sat", 2, 0, 0},             /* South Africa */
 662     {"smt", 4, 0, 0},             /* Seychelles */
 663     {"sst", 11, 0, 1},            /* Samoa */
 664     /*{ "sst",   8,  0, 0 }, *//* Singapore */
 665     {"utc", 0, 0, 0},
 666     {"wat", 0, 0, 0},             /* West Africa */
 667     {"west", 1, 0, 0},            /* Western Europe DST */
 668     {"wet", 0, 0, 0},             /* Western Europe */
 669     {"wgst", 2, 0, 1},            /* Western Greenland DST */
 670     {"wgt", 3, 0, 1},             /* Western Greenland */
 671     {"wst", 8, 0, 0},             /* Western Australia */
 672 };
 673
 674 /* parses a date string in RFC822 format:
 675  *
 676  * Date: [ weekday , ] day-of-month month year hour:minute:second timezone
 677  *
 678  * This routine assumes that `h' has been initialized to 0.  the `timezone'
 679  * field is optional, defaulting to +0000 if missing.
 680  */
 681 time_t mutt_parse_date(const char *s, HEADER *h)
 682 {
 683     int zhours = 0, zminutes = 0, zoccident = 0;
 684     char scratch[SHORT_STRING];
 685     struct tm tm;
 686     int count = 0;
 687     char *p;
 688
 689     /* Don't modify our argument. Fixed-size buffer is ok here since
 690        the date format imposes a natural limit.  */
 691
 692     m_strcpy(scratch, sizeof(scratch), s);
 693
 694     /* kill the day of the week, if it exists. */
 695     p = strchr(scratch, ',');
 696     p = vskipspaces(p ? p + 1 : scratch);
 697
 698     p_clear(&tm, 1);
 699
 700     while ((p = strtok (p, " \t")) != NULL) {
 701         char tzstr[SHORT_STRING];
 702         const char *ptz;
 703
 704         switch (count) {
 705           case 0:                    /* day of the month */
 706             if (!isdigit((unsigned char)*p))
 707                 return -1;
 708             tm.tm_mday = atoi(p);
 709             if (tm.tm_mday > 31)
 710                 return -1;
 711             break;
 712
 713           case 1:                    /* month of the year */
 714             tm.tm_mon = mutt_check_month(p);
 715             if (tm.tm_mon < 0)
 716                 return -1;
 717             break;
 718
 719           case 2:                    /* year */
 720             tm.tm_year = atoi(p);
 721             if (tm.tm_year < 50)
 722                 tm.tm_year += 100;
 723             else if (tm.tm_year >= 1900)
 724                 tm.tm_year -= 1900;
 725             break;
 726
 727           case 3:                    /* time of day */
 728             tm.tm_hour = strtol(p, &p, 10);
 729             if (*p++ != ':')
 730                 return -1;
 731             tm.tm_min  = strtol(p, &p, 10);
 732             if (*p++ == ':') {
 733                 tm.tm_sec = strtol(p, &p, 10);
 734             } else {
 735                 tm.tm_sec = 0;
 736             }
 737             break;
 738
 739           case 4:                    /* timezone */
 740             /* sometimes we see things like (MST) or (-0700) so attempt to
 741              * compensate by uncommenting the string if non-RFC822 compliant
 742              */
 743             ptz = uncomment_timezone(tzstr, sizeof(tzstr), p);
 744
 745             if (*ptz == '+' || *ptz == '-') {
 746                 if (isdigit((unsigned char)ptz[1])
 747                 &&  isdigit((unsigned char)ptz[2])
 748                 &&  isdigit((unsigned char)ptz[3])
 749                 &&  isdigit((unsigned char)ptz[4]))
 750                 {
 751                     zoccident = ptz[0] == '-';
 752                     zhours    = (ptz[1] - '0') * 10 + (ptz[2] - '0');
 753                     zminutes  = (ptz[3] - '0') * 10 + (ptz[4] - '0');
 754                 }
 755             } else {
 756                 struct tz_t *tz;
 757
 758                 /* This is safe to do: A pointer to a struct equals a pointer to its
 759                  * first element*/
 760                 tz = bsearch(ptz, TimeZones, countof(TimeZones), sizeof(TimeZones[0]),
 761                              (int (*)(const void *, const void *))ascii_strcasecmp);
 762
 763                 if (tz) {
 764                     zhours = tz->zhours;
 765                     zminutes = tz->zminutes;
 766                     zoccident = tz->zoccident;
 767                 }
 768
 769                 /* ad hoc support for the European MET (now officially CET) TZ */
 770                 if (ascii_strcasecmp(p, "MET") == 0) {
 771                     if ((p = strtok (NULL, " \t")) && !ascii_strcasecmp(p, "DST")) {
 772                         zhours++;
 773                     }
 774                 }
 775             }
 776             break;
 777         }
 778         count++;
 779         p = NULL;
 780     }
 781
 782     if (count < 4) {  /* don't check for missing timezone */
 783         return -1;
 784     }
 785
 786     if (h) {
 787         h->zhours    = zhours;
 788         h->zminutes  = zminutes;
 789         h->zoccident = zoccident;
 790     }
 791
 792     return mutt_mktime(&tm, 0) + (zoccident ? 1 : -1) * (zhours * 3600 + zminutes * 60);
 793 }
 794
 795 string_list_t **mutt_parse_rfc822_line(ENVELOPE *e, HEADER *hdr, char *line, char *p,
 796                               short weed, short do_2047, string_list_t **user_hdrs)
 797 {
 798     switch (mime_which_token(line, -1)) {
 799       case MIME_APPARENTLY_FROM:
 800         e->from = rfc822_parse_adrlist (e->from, p);
 801         break;
 802
 803       case MIME_APPARENTLY_TO:
 804         e->to = rfc822_parse_adrlist (e->to, p);
 805         break;
 806
 807       case MIME_BCC:
 808         e->bcc = rfc822_parse_adrlist (e->bcc, p);
 809         break;
 810
 811       case MIME_CC:
 812         e->cc = rfc822_parse_adrlist (e->cc, p);
 813         break;
 814
 815       case MIME_CONTENT_DESCRIPTION:
 816         if (hdr) {
 817             m_strreplace(&hdr->content->description, p);
 818             rfc2047_decode(&hdr->content->description);
 819         }
 820         break;
 821
 822       case MIME_CONTENT_DISPOSITION:
 823         if (hdr)
 824             parse_content_disposition(p, hdr->content);
 825         break;
 826
 827       case MIME_CONTENT_LENGTH:
 828         if (hdr) {
 829             if ((hdr->content->length = atoi(p)) < 0)
 830                 hdr->content->length = -1;
 831         }
 832         break;
 833
 834       case MIME_CONTENT_TRANSFER_ENCODING:
 835         if (hdr)
 836             hdr->content->encoding = mutt_check_encoding(p);
 837         break;
 838
 839       case MIME_CONTENT_TYPE:
 840         if (hdr)
 841             mutt_parse_content_type (p, hdr->content);
 842         break;
 843
 844       case MIME_DATE:
 845         m_strreplace(&e->date, p);
 846         if (hdr)
 847             hdr->date_sent = mutt_parse_date (p, hdr);
 848         break;
 849
 850       case MIME_EXPIRES:
 851         if (hdr && mutt_parse_date (p, NULL) < time (NULL))
 852             hdr->expired = 1;
 853         break;
 854
 855 #ifdef USE_NNTP
 856       case MIME_FOLLOWUP_TO:
 857         if (!e->followup_to) {
 858             m_strrtrim(p);
 859             e->followup_to = m_strdup(skipspaces(p));
 860         }
 861         break;
 862 #endif
 863
 864       case MIME_FROM:
 865         e->from = rfc822_parse_adrlist(e->from, p);
 866         /* don't leave from info NULL if there's an invalid address (or
 867          * whatever) in From: field; mutt would just display it as empty
 868          * and mark mail/(esp.) news article as your own. aaargh! this
 869          * bothered me for _years_ */
 870         if (!e->from) {
 871             e->from = address_new();
 872             e->from->personal = m_strdup(p);
 873         }
 874         break;
 875
 876       case MIME_IN_REPLY_TO:
 877         string_list_wipe(&e->in_reply_to);
 878         e->in_reply_to = mutt_parse_references(p, 1);
 879         break;
 880
 881       case MIME_LINES:
 882         if (hdr) {
 883             /* HACK - mutt has, for a very short time, produced negative
 884                Lines header values.  Ignore them. */
 885             hdr->lines = MAX(0, atoi(p));
 886         }
 887         break;
 888
 889       case MIME_LIST_POST:
 890         /* RFC 2369.  FIXME: We should ignore whitespace, but don't. */
 891         if (strncmp(p, "NO", 2)) {
 892             char *beg, *end;
 893
 894             for (beg = strchr (p, '<'); beg; beg = strchr (end, ',')) {
 895                 ++beg;
 896                 if (!(end = strchr (beg, '>')))
 897                     break;
 898
 899                 /* Take the first mailto URL */
 900                 if (url_check_scheme (beg) == U_MAILTO) {
 901                     p_delete(&e->list_post);
 902                     e->list_post = p_dupstr(beg, end - beg);
 903                     break;
 904                 }
 905             }
 906         }
 907         break;
 908
 909       case MIME_MAIL_FOLLOWUP_TO:
 910         e->mail_followup_to = rfc822_parse_adrlist(e->mail_followup_to, p);
 911         break;
 912
 913       case MIME_MAIL_REPLY_TO:
 914         address_list_wipe(&e->reply_to);
 915         e->reply_to = rfc822_parse_adrlist(e->reply_to, p);
 916         break;
 917
 918       case MIME_MESSAGE_ID:
 919         {
 920             const char *beg, *end;
 921
 922             /* We add a new "Message-ID:" when building a message */
 923             p_delete(&e->message_id);
 924
 925             if ((beg = strchr(p, '<')) && (end = strchr(beg, '>')))
 926                 e->message_id = p_dupstr(beg, (end - beg) + 1);
 927         }
 928         break;
 929
 930       case MIME_MIME_VERSION:
 931         if (hdr)
 932             hdr->mime = 1;
 933         break;
 934
 935 #ifdef USE_NNTP
 936       case MIME_NEWSGROUPS:
 937         p_delete(&e->newsgroups);
 938         m_strrtrim(p);
 939         e->newsgroups = m_strdup(skipspaces(p));
 940         break;
 941 #endif
 942
 943       case MIME_ORGANIZATION:
 944         if (!e->organization && mime_which_token(p, -1) == MIME_UNKNOWN)
 945             e->organization = m_strdup(p);
 946         break;
 947
 948       case MIME_RECEIVED:
 949         if (hdr && !hdr->received) {
 950             char *d = strchr(p, ';');
 951             if (d)
 952                 hdr->received = mutt_parse_date(d + 1, NULL);
 953         }
 954         break;
 955
 956       case MIME_REFERENCES:
 957         string_list_wipe(&e->references);
 958         e->references = mutt_parse_references(p, 0);
 959         break;
 960
 961       case MIME_REPLY_TO:
 962         e->reply_to = rfc822_parse_adrlist(e->reply_to, p);
 963         break;
 964
 965       case MIME_RETURN_PATH:
 966         e->return_path = rfc822_parse_adrlist(e->return_path, p);
 967         break;
 968
 969       case MIME_SENDER:
 970         e->sender = rfc822_parse_adrlist (e->sender, p);
 971         break;
 972
 973       case MIME_STATUS:
 974         if (hdr) {
 975             while (*p) {
 976                 switch (*p) {
 977                   case 'r':
 978                     hdr->replied = 1;
 979                     break;
 980                   case 'O':
 981                     hdr->old = 1;
 982                     break;
 983                   case 'R':
 984                     hdr->read = 1;
 985                     break;
 986                 }
 987                 p++;
 988             }
 989         }
 990         break;
 991
 992       case MIME_SUBJECT:
 993         if (!e->subject)
 994             e->subject = m_strdup(p);
 995         break;
 996
 997       case MIME_SUPERCEDES:
 998       case MIME_SUPERSEDES:
 999         if (hdr)
1000             e->supersedes = m_strdup(p);
1001         break;
1002
1003       case MIME_TO:
1004         e->to = rfc822_parse_adrlist(e->to, p);
1005         break;
1006
1007 #ifdef USE_NNTP
1008       case MIME_X_COMMENT_TO:
1009         if (!e->x_comment_to)
1010             e->x_comment_to = m_strdup(p);
1011         break;
1012 #endif
1013
1014       case MIME_X_LABEL:
1015         e->x_label = m_strdup(p);
1016         break;
1017
1018 #ifdef USE_NNTP
1019       case MIME_XREF:
1020         if (!e->xref)
1021             e->xref = m_strdup(p);
1022         break;
1023 #endif
1024
1025       case MIME_X_STATUS:
1026         if (hdr) {
1027             while (*p) {
1028                 switch (*p) {
1029                   case 'A':
1030                     hdr->replied = 1;
1031                     break;
1032                   case 'D':
1033                     hdr->deleted = 1;
1034                     break;
1035                   case 'F':
1036                     hdr->flagged = 1;
1037                     break;
1038                   default:
1039                     break;
1040                 }
1041                 p++;
1042             }
1043         }
1044         break;
1045
1046       default:
1047         if (!user_hdrs)
1048             break;
1049
1050         /* restore the original line */
1051         line[m_strlen(line)] = ':';
1052
1053         if (weed && option(OPTWEED) && mutt_matches_ignore(line, Ignore)
1054         && !mutt_matches_ignore(line, UnIgnore)) {
1055             break;
1056         }
1057
1058         *user_hdrs = string_item_new();
1059         (*user_hdrs)->data = m_strdup(line);
1060         if (do_2047)
1061             rfc2047_decode(&(*user_hdrs)->data);
1062         return &(*user_hdrs)->next;
1063     }
1064
1065     return user_hdrs;
1066 }
1067
1068 /* mutt_read_rfc822_header() -- parses a RFC822 header
1069  *
1070  * Args:
1071  *
1072  * f            stream to read from
1073  *
1074  * hdr          header structure of current message (optional).
1075  *
1076  * user_hdrs    If set, store user headers.  Used for recall-message and
1077  *              postpone modes.
1078  *
1079  * weed         If this parameter is set and the user has activated the
1080  *              $weed option, honor the header weed list for user headers.
1081  *              Used for recall-message.
1082  *
1083  * Returns:     newly allocated envelope structure.  You should free it by
1084  *              envelope_delete() when envelope stay unneeded.
1085  */
1086 ENVELOPE *
1087 mutt_read_rfc822_header(FILE *f, HEADER *hdr, short user_hdrs, short weed)
1088 {
1089     ENVELOPE *e = envelope_new();
1090     string_list_t **last = user_hdrs ? &e->userhdrs : NULL;
1091
1092     char *line = p_new(char, LONG_STRING);
1093     ssize_t linelen = LONG_STRING;
1094     off_t loc;
1095
1096     if (hdr && !hdr->content) {
1097         hdr->content = mutt_new_body ();
1098
1099         /* set the defaults from RFC1521 */
1100         hdr->content->type     = TYPETEXT;
1101         hdr->content->subtype  = m_strdup("plain");
1102         hdr->content->encoding = ENC7BIT;
1103         hdr->content->length   = -1;
1104
1105         /* RFC 2183 says this is arbitrary */
1106         hdr->content->disposition = DISPINLINE;
1107     }
1108
1109     while ((loc = ftello(f)),
1110            mutt_read_rfc822_line(f, &line, &linelen))
1111     {
1112         char buf[LONG_STRING + 1] = "";
1113         char *p;
1114
1115         p = strpbrk(line, ": \t");
1116         if (!p || *p != ':') {
1117             char return_path[LONG_STRING];
1118             time_t t;
1119
1120             /* some bogus MTAs will quote the original "From " line */
1121             if (!m_strncmp(">From ", line, 6))
1122                 continue;               /* just ignore */
1123
1124             if (is_from(line, return_path, sizeof(return_path), &t)) {
1125                 /* MH somtimes has the From_ line in the middle of the header! */
1126                 if (hdr && !hdr->received)
1127                     hdr->received = t - mutt_local_tz(t);
1128                 continue;
1129             }
1130
1131             fseeko(f, loc, 0);
1132             break;                    /* end of header */
1133         }
1134
1135         if (mutt_match_spam_list(line, SpamList, buf, sizeof(buf))) {
1136             if (!rx_list_match(NoSpamList, line)) {
1137                 /* if spam tag already exists, figure out how to amend it */
1138                 if (e->spam && *buf) {
1139                     if (SpamSep) {
1140                         /* If SpamSep defined, append with separator */
1141                         mutt_buffer_addstr(e->spam, SpamSep);
1142                         mutt_buffer_addstr(e->spam, buf);
1143                     } else {
1144                         /* else overwrite */
1145                         mutt_buffer_reset(e->spam);
1146                         mutt_buffer_addstr(e->spam, buf);
1147                     }
1148                 }
1149                 else if (!e->spam && *buf) {
1150                     /* spam tag is new, and match expr is non-empty; copy */
1151                     e->spam = mutt_buffer_from(NULL, buf);
1152                 }
1153                 else if (!e->spam) {
1154                     /* match expr is empty; plug in null string if no existing tag */
1155                     e->spam = mutt_buffer_from(NULL, "");
1156                 }
1157             }
1158         }
1159
1160         *p++ = '\0';
1161         p = vskipspaces(p);
1162         if (!*p)
1163             continue;                 /* skip empty header fields */
1164
1165         last = mutt_parse_rfc822_line(e, hdr, line, p, weed, 1, last);
1166     }
1167
1168     p_delete(&line);
1169
1170     if (hdr) {
1171         hdr->content->hdr_offset = hdr->offset;
1172         hdr->content->offset     = ftello(f);
1173         rfc2047_decode_envelope(e);
1174         /* check for missing or invalid date */
1175         if (hdr->date_sent <= 0) {
1176             hdr->date_sent = hdr->received;
1177         }
1178     }
1179
1180     return e;
1181 }
1182
1183 /* Compares mime types to the ok and except lists */
1184 static int count_body_parts_check(string_list_t **checklist, BODY *b)
1185 {
1186     string_list_t *type;
1187
1188     for (type = *checklist; type; type = type->next) {
1189         ATTACH_MATCH *a = (ATTACH_MATCH *)type->data;
1190
1191         if ((a->major_int == TYPEANY || a->major_int == b->type)
1192         &&  !regexec(&a->minor_rx, b->subtype, 0, NULL, 0)) {
1193             return 1;
1194         }
1195     }
1196
1197     return 0;
1198 }
1199
1200 static int count_body_parts (BODY *body, int flags)
1201 {
1202     int count = 0;
1203     BODY *bp;
1204
1205     if (!body)
1206         return 0;
1207
1208     for (bp = body; bp != NULL; bp = bp->next) {
1209         /* Initial disposition is to count and not to recurse this part. */
1210         int shallcount, shallrecurse, iscontainer;
1211         int tok = mime_which_token(bp->subtype, -1);
1212
1213         iscontainer  = bp->type == TYPEMESSAGE || bp->type == TYPEMULTIPART;
1214
1215         /* don't recurse in external bodies or multipart/alternatives */
1216         shallrecurse = (bp->type == TYPEMESSAGE && tok != MIME_EXTERNAL_BODY)
1217                     || (bp->type == TYPEMULTIPART && tok != MIME_ALTERNATIVE);
1218
1219         /* Don't count top level containers and fundamental inlines */
1220         shallcount   = !(iscontainer && (flags & M_PARTS_TOPLEVEL))
1221                     && !(!iscontainer && bp->disposition == DISPINLINE && bp == body);
1222
1223         if (shallcount) {
1224             /* Turn off shallcount if message type is not in ok list,
1225              * or if it is in except list. Check is done separately for
1226              * inlines vs. attachments.
1227              */
1228
1229             if (bp->disposition == DISPATTACH) {
1230                 if (!count_body_parts_check(&AttachAllow, bp))
1231                     shallcount = 0;
1232                 if (count_body_parts_check(&AttachExclude, bp))
1233                     shallcount = 0;
1234             } else {
1235                 if (!count_body_parts_check(&InlineAllow, bp))
1236                     shallcount = 0;
1237                 if (count_body_parts_check(&InlineExclude, bp))
1238                     shallcount = 0;
1239             }
1240         }
1241
1242         bp->attach_qualifies = shallcount;
1243         count += shallcount;
1244
1245         if (shallrecurse) {
1246             bp->attach_count = count_body_parts(bp->parts,
1247                                                 flags & ~M_PARTS_TOPLEVEL);
1248             count += bp->attach_count;
1249         }
1250     }
1251
1252     return count;
1253 }
1254
1255 int mutt_count_body_parts(HEADER *hdr, int flags)
1256 {
1257     if (!option(OPTCOUNTATTACH))
1258         return 0;
1259
1260     if (hdr->attach_valid && !(flags & M_PARTS_RECOUNT))
1261         return hdr->attach_total;
1262
1263     if (AttachAllow || AttachExclude || InlineAllow || InlineExclude)
1264         hdr->attach_total = count_body_parts(hdr->content,
1265                                              flags | M_PARTS_TOPLEVEL);
1266     else
1267         hdr->attach_total = 0;
1268
1269     hdr->attach_valid = 1;
1270     return hdr->attach_total;
1271 }