lib-mime/rfc822parse.c

   1 /*
   2  *  This program is free software; you can redistribute it and/or modify
   3  *  it under the terms of the GNU General Public License as published by
   4  *  the Free Software Foundation; either version 2 of the License, or (at
   5  *  your option) any later version.
   6  *
   7  *  This program is distributed in the hope that it will be useful, but
   8  *  WITHOUT ANY WARRANTY; without even the implied warranty of
   9  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  10  *  General Public License for more details.
  11  *
  12  *  You should have received a copy of the GNU General Public License
  13  *  along with this program; if not, write to the Free Software
  14  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  15  *  MA 02110-1301, USA.
  16  *
  17  *  Copyright © 2006 Pierre Habouzit
  18  */
  19
  20 /*
  21  * Copyright notice from original mutt:
  22  * Copyright (C) 1996-2000 Michael R. Elkins <me@mutt.org>
  23  *
  24  * This file is part of mutt-ng, see http://www.muttng.org/.
  25  * It's licensed under the GNU General Public License,
  26  * please see the file GPL in the top level source directory.
  27  */
  28
  29 #if HAVE_CONFIG_H
  30 # include "config.h"
  31 #endif
  32
  33 #include <stdio.h>
  34
  35 #include <lib-lib/mem.h>
  36 #include <lib-lib/str.h>
  37 #include <lib-lib/ascii.h>
  38 #include <lib-lib/macros.h>
  39 #include <lib-lib/buffer.h>
  40 #include <lib-lib/date.h>
  41
  42 #include "recvattach.h"
  43 #include "url.h"
  44
  45 #include "lib/debug.h"
  46
  47 #include "mime.h"
  48
  49 /* Reads an arbitrarily long header field, and looks ahead for continuation
  50  * lines.  ``line'' must point to a dynamically allocated string; it is
  51  * increased if more space is required to fit the whole line.
  52  */
  53 ssize_t mutt_read_rfc822_line(FILE *f, char **line, ssize_t *n)
  54 {
  55     ssize_t pos = 0;
  56
  57     for (;;) {
  58         char *p = *line;
  59
  60         /* end of file or end of headers */
  61         if (!fgets(p + pos, *n - pos, f) || (ISSPACE(*p) && pos == 0)) {
  62             *p = '\0';
  63             return 0;
  64         }
  65
  66         pos += m_strlen(p + pos);
  67         if (p[pos - 1] == '\n') {
  68             int c;
  69
  70             /* remove trailing spaces. safe: p[0] is not a space */
  71             do {
  72                 p[--pos] = '\0';
  73             } while (ISSPACE(p[pos]));
  74
  75             /* check to see if the next line is a continuation line */
  76             c = fgetc(f);
  77             if (c != ' ' && c != '\t') {
  78                 /* next line is a separate header field or EOH */
  79                 ungetc(c, f);
  80                 return pos;
  81             }
  82
  83             /* eat tabs and spaces from the beginning of the continuation line */
  84             do {
  85                 c = fgetc(f);
  86             } while (c == ' ' || c == '\t');
  87             ungetc(c, f);
  88
  89             /* string is still terminated because we removed at least one
  90                whitespace char above */
  91             p[pos++] = ' ';
  92         }
  93
  94         if (*n < pos + STRING) {
  95             /* grow the buffer */
  96             *n += STRING;
  97             p_realloc(line, *n);
  98         }
  99     }
 100 }
 101
 102 /* TODO: Make that a string list somehow */
 103 LIST *mutt_parse_references(char *s, int in_reply_to)
 104 {
 105     LIST *lst = NULL;
 106     int n = 0;
 107     char *o = NULL;
 108
 109     /* some mail clients add other garbage besides message-ids, so do a quick
 110      * check to make sure this looks like a valid message-id
 111      * some idiotic clients also break their message-ids between lines, deal
 112      * with that too (give up if it's more than two lines, though)
 113      */
 114
 115     for (s = strtok(s, " \t;"); s; s = strtok(NULL, " \t;")) {
 116         char *new = NULL;
 117
 118         if (*s == '<') {
 119             n = m_strlen(s);
 120             if (s[n - 1] != '>') {
 121                 o = s;
 122                 continue;
 123             }
 124
 125             new = m_strdup(s);
 126         } else if (o) {
 127             ssize_t m = m_strlen(s);
 128
 129             if (s[m - 1] != '>') {
 130                 o = NULL;
 131             } else {
 132                 new = p_new(char, n + m + 1);
 133                 strcpy(new, o);
 134                 strcpy(new + n, s);
 135             }
 136         }
 137
 138         /* make sure that this really does look like a message-id.
 139          * it should have exactly one @, and if we're looking at
 140          * an in-reply-to header, make sure that the part before
 141          * the @ has more than eight characters or it's probably
 142          * an email address
 143          */
 144         if (new) {
 145             char *at = strchr(new, '@');
 146             LIST *tmp;
 147
 148             if (!at || strchr(at + 1, '@') || (in_reply_to && at - new <= 8)) {
 149                 p_delete(&new);
 150                 continue;
 151             }
 152
 153             tmp = p_new(LIST, 1);
 154             tmp->data = new;
 155             tmp->next = lst;
 156             lst = tmp;
 157         }
 158     }
 159
 160     return lst;
 161 }
 162
 163 int mutt_check_encoding(const char *s)
 164 {
 165 #define COMPARE(tok, value)                             \
 166     if (!ascii_strncasecmp(tok, s, sizeof(tok) - 1)) {  \
 167         return value;                                   \
 168     }
 169     COMPARE("7bit", ENC7BIT);
 170     COMPARE("8bit", ENC8BIT);
 171     COMPARE("binary", ENCBINARY);
 172     COMPARE("quoted-printable", ENCQUOTEDPRINTABLE);
 173     COMPARE("base64", ENCBASE64);
 174     COMPARE("x-uuencode", ENCUUENCODED);
 175 #undef COMPARE
 176
 177     return ENCOTHER;
 178 }
 179
 180 int mutt_check_mime_type(const char *s)
 181 {
 182 #define COMPARE(tok, value)                             \
 183     if (!ascii_strncasecmp(tok, s, sizeof(tok) - 1)) {  \
 184         return value;                                   \
 185     }
 186   COMPARE("text", TYPETEXT);
 187   COMPARE("multipart", TYPEMULTIPART);
 188   COMPARE("application", TYPEAPPLICATION);
 189   COMPARE("message", TYPEMESSAGE);
 190   COMPARE("image", TYPEIMAGE);
 191   COMPARE("audio", TYPEAUDIO);
 192   COMPARE("video", TYPEVIDEO);
 193   COMPARE("model", TYPEMODEL);
 194   COMPARE("*",  TYPEANY);
 195   COMPARE(".*", TYPEANY);
 196 #undef COMPARE
 197
 198   return TYPEOTHER;
 199 }
 200
 201 static PARAMETER *parse_parameters(const char *s)
 202 {
 203     PARAMETER *res = NULL;
 204     PARAMETER **list = &res;
 205
 206     while (*s) {
 207         const char *p;
 208         PARAMETER *new;
 209         int i;
 210
 211         s = skipspaces(s);
 212         if (*s == '=')             /* parameters are fucked up, go away */
 213             break;
 214
 215         p = strpbrk(s, "=;");
 216         if (!p)
 217             break;
 218
 219         if (*p == ';') {
 220             /* if we hit a ; now the parameter has no value, just skip it */
 221             s = p + 1;
 222             continue;
 223         }
 224
 225         i = p - s;
 226         new = parameter_new();
 227         new->attribute = p_dupstr(s, i);
 228
 229         while (--i >= 0 && ISSPACE(new->attribute[i])) {
 230             new->attribute[i] = '\0';
 231         }
 232         s = skipspaces(p + 1);                      /* skip over the = */
 233
 234         if (*s == '"') {
 235             char buffer[LONG_STRING];
 236             int state_ascii = 1;
 237
 238             s++;
 239             for (i = 0; *s && i < ssizeof(buffer) - 1; i++, s++) {
 240                 if (!option(OPTSTRICTMIME)) {
 241                     /* As iso-2022-* has a characer of '"' with non-ascii state,
 242                      * ignore it. */
 243                     if (*s == 0x1b && i < ssizeof(buffer) - 2) {
 244                         state_ascii = s[1] == '(' && (s[2] == 'B' || s[2] == 'J');
 245                     }
 246                 }
 247                 if (state_ascii && *s == '"')
 248                     break;
 249
 250                 if (*s == '\\') {
 251                     buffer[i] = *++s;
 252                 } else {
 253                     buffer[i] = *s;
 254                 }
 255             }
 256
 257             new->value = p_dupstr(buffer, i);
 258         } else {
 259             for (p = s; *p && *p != ' ' && *p != ';'; p++);
 260             new->value = p_dupstr(s, p - s);
 261         }
 262
 263         *list = new;
 264         list = &new->next;
 265
 266         s = strchr(s, ';');           /* Find the next parameter */
 267         if (!s)
 268             break;                    /* no more parameters */
 269     }
 270
 271     rfc2231_decode_parameters(&res);
 272     return res;
 273 }
 274
 275 void mutt_parse_content_type(char *s, BODY *ct)
 276 {
 277     char *pc;
 278     char *subtype;
 279
 280     p_delete(&ct->subtype);
 281     parameter_delete(&ct->parameter);
 282
 283     /* First extract any existing parameters */
 284     if ((pc = strchr(s, ';')) != NULL) {
 285         *pc++ = '\0';
 286         ct->parameter = parse_parameters(vskipspaces(pc));
 287
 288         /* Some pre-RFC1521 gateways still use the "name=filename" convention,
 289          * but if a filename has already been set in the content-disposition,
 290          * let that take precedence, and don't set it here */
 291         pc = mutt_get_parameter("name", ct->parameter);
 292         if (pc && !ct->filename)
 293             ct->filename = m_strdup(pc);
 294     }
 295
 296     /* Now get the subtype */
 297     if ((subtype = strchr (s, '/'))) {
 298         *subtype++ = '\0';
 299         for (pc = subtype; *pc && !ISSPACE(*pc) && *pc != ';'; pc++);
 300         ct->subtype = p_dupstr(subtype, pc - subtype);
 301     }
 302
 303     /* Finally, get the major type */
 304     ct->type = mutt_check_mime_type(s);
 305
 306     if (ct->type == TYPEOTHER) {
 307         ct->xtype = m_strdup(s);
 308     }
 309
 310     if (!ct->subtype) {
 311         /* Some older non-MIME mailers (i.e., mailtool, elm) have a content-type
 312          * field, so we can attempt to convert the type to BODY here.
 313          */
 314         switch (ct->type) {
 315             char buffer[SHORT_STRING];
 316
 317           case TYPETEXT:
 318             ct->subtype = m_strdup("plain");
 319             break;
 320
 321           case TYPEAUDIO:
 322             ct->subtype = m_strdup("basic");
 323             break;
 324
 325           case TYPEMESSAGE:
 326             ct->subtype = m_strdup("rfc822");
 327             break;
 328
 329           case TYPEOTHER:
 330             ct->type = TYPEAPPLICATION;
 331             snprintf(buffer, sizeof(buffer), "x-%s", s);
 332             ct->subtype = m_strdup(buffer);
 333             break;
 334
 335           default:
 336             ct->subtype = m_strdup("x-unknown");
 337             break;
 338         }
 339     }
 340
 341     /* Default character set for text types. */
 342     if (ct->type == TYPETEXT) {
 343         pc = mutt_get_parameter("charset", ct->parameter);
 344         if (!pc) {
 345             mutt_set_parameter("charset",
 346                                option(OPTSTRICTMIME) ? "us-ascii" :
 347                                mutt_get_first_charset(AssumedCharset),
 348                                &ct->parameter);
 349         }
 350     }
 351 }
 352
 353 static void parse_content_disposition(char *s, BODY *ct)
 354 {
 355     if (!ascii_strncasecmp(s, "inline", 6)) {
 356         ct->disposition = DISPINLINE;
 357     } else if (!ascii_strncasecmp(s, "form-data", 9)) {
 358         ct->disposition = DISPFORMDATA;
 359     } else {
 360         ct->disposition = DISPATTACH;
 361     }
 362
 363     /* Check to see if a default filename was given */
 364     if ((s = strchr (s, ';'))) {
 365         PARAMETER *parms = parse_parameters(vskipspaces(s));
 366
 367         if ((s = mutt_get_parameter("filename", parms)))
 368             m_strreplace(&ct->filename, s);
 369         if ((s = mutt_get_parameter ("name", parms)))
 370             ct->form_name = m_strdup(s);
 371
 372         parameter_delete(&parms);
 373     }
 374 }
 375
 376 /* args:
 377  *      fp      stream to read from
 378  *
 379  *      digest  1 if reading subparts of a multipart/digest, 0
 380  *              otherwise
 381  */
 382 BODY *mutt_read_mime_header(FILE *fp, int digest)
 383 {
 384     BODY *body = mutt_new_body ();
 385     char *line = p_new(char, LONG_STRING);
 386     ssize_t linelen = LONG_STRING;
 387     char *p;
 388
 389     body->hdr_offset  = ftello(fp);
 390     body->encoding    = ENC7BIT;    /* default from RFC1521 */
 391     body->disposition = DISPINLINE;
 392     body->type        = digest ? TYPEMESSAGE : TYPETEXT;
 393
 394     while (mutt_read_rfc822_line(fp, &line, &linelen)) {
 395         /* Find the value of the current header */
 396         if ((p = strchr(line, ':'))) {
 397             *p++ = '\0';
 398             p = vskipspaces(p);
 399             if (!*p)
 400                 continue;
 401         } else {
 402             debug_print (1, ("bogus MIME header: %s\n", line));
 403             break;
 404         }
 405
 406         if (!ascii_strncasecmp(line, "content-", 8)) {
 407             if (!ascii_strcasecmp("type", line + 8))
 408                 mutt_parse_content_type (p, body);
 409             else if (!ascii_strcasecmp ("transfer-encoding", line + 8))
 410                 body->encoding = mutt_check_encoding (p);
 411             else if (!ascii_strcasecmp ("disposition", line + 8))
 412                 parse_content_disposition(p, body);
 413             else if (!ascii_strcasecmp ("description", line + 8)) {
 414                 m_strreplace(&body->description, p);
 415                 rfc2047_decode(&body->description);
 416             }
 417         }
 418     }
 419
 420     body->offset = ftello(fp);       /* Mark the start of the real data */
 421     if (!body->subtype) {
 422         if (body->type == TYPETEXT)
 423             body->subtype = m_strdup("plain");
 424         if (body->type == TYPEMESSAGE)
 425             body->subtype = m_strdup("rfc822");
 426     }
 427
 428     p_delete(&line);
 429     return (body);
 430 }
 431
 432 void mutt_parse_part(FILE *fp, BODY *b)
 433 {
 434     char *bound = 0;
 435
 436     switch (b->type) {
 437       case TYPEMULTIPART:
 438         bound = mutt_get_parameter("boundary", b->parameter);
 439         fseeko(fp, b->offset, SEEK_SET);
 440         b->parts = mutt_parse_multipart(fp, bound, b->offset + b->length,
 441                            !ascii_strcasecmp("digest", b->subtype));
 442         break;
 443
 444       case TYPEMESSAGE:
 445         if (b->subtype) {
 446             fseeko(fp, b->offset, SEEK_SET);
 447
 448             if (mutt_is_message_type(b->type, b->subtype)) {
 449                 b->parts = mutt_parse_messageRFC822(fp, b);
 450             } else
 451             if (!ascii_strcasecmp(b->subtype, "external-body") == 0) {
 452                 b->parts = mutt_read_mime_header(fp, 0);
 453             } else {
 454                 return;
 455             }
 456         }
 457         break;
 458
 459       default:
 460         return;
 461     }
 462
 463     /* try to recover from parsing error */
 464     if (!b->parts) {
 465         b->type = TYPETEXT;
 466         m_strreplace(&b->subtype, "plain");
 467     }
 468 }
 469
 470 /* parse a MESSAGE/RFC822 body
 471  *
 472  * args:
 473  *      fp              stream to read from
 474  *
 475  *      parent          structure which contains info about the message/rfc822
 476  *                      body part
 477  *
 478  * NOTE: this assumes that `parent->length' has been set!
 479  */
 480 BODY *mutt_parse_messageRFC822(FILE * fp, BODY * parent)
 481 {
 482     BODY *msg;
 483
 484     parent->hdr = header_new();
 485     parent->hdr->offset = ftello(fp);
 486     parent->hdr->env    = mutt_read_rfc822_header(fp, parent->hdr, 0, 0);
 487
 488     msg = parent->hdr->content;
 489
 490     /* ignore the length given in the content-length since it could be wrong
 491        and we already have the info to calculate the correct length */
 492     /* if (msg->length == -1) */
 493     /* if body of this message is empty, we can end up with a negative length */
 494     msg->length = MAX(0, parent->length - (msg->offset - parent->offset));
 495
 496     mutt_parse_part(fp, msg);
 497
 498     return msg;
 499 }
 500
 501 /* parse a multipart structure
 502  *
 503  * args:
 504  *      fp              stream to read from
 505  *
 506  *      bound           body separator
 507  *
 508  *      end_off         length of the multipart body (used when the final
 509  *                      boundary is missing to avoid reading too far)
 510  *
 511  *      digest          1 if reading a multipart/digest, 0 otherwise
 512  */
 513
 514 BODY *
 515 mutt_parse_multipart(FILE *fp, const char *bound, off_t end_off, int digest)
 516 {
 517     char buffer[LONG_STRING];
 518     BODY *head = NULL;
 519     BODY **last = &head;
 520     int blen = m_strlen(bound);
 521     int final = 0;                /* did we see the ending boundary? */
 522
 523     if (!blen) {
 524         mutt_error _("multipart message has no boundary parameter!");
 525         return NULL;
 526     }
 527
 528     while (ftello(fp) < end_off && fgets(buffer, sizeof(buffer), fp)) {
 529         int len, crlf, i;
 530
 531         len  = m_strlen(buffer);
 532         crlf = len > 1 && buffer[len - 2] == '\r';
 533
 534         if (buffer[0] == '-' && buffer[1] == '-'
 535         && !m_strncmp(buffer + 2, bound, blen))
 536         {
 537             if (*last) {
 538                 BODY *b = *last;
 539
 540                 /* if the body is empty, we can end up with a -1 length */
 541                 b->length = MAX(0, ftello(fp) - b->offset - len - 1 - crlf);
 542                 if (b->parts && b->parts->length == 0) {
 543                     b->parts->length = ftello(fp) - b->parts->offset
 544                                      - len - 1 - crlf;
 545                 }
 546             }
 547
 548             /* Remove any trailing whitespace, up to the length of the boundary */
 549             for (i = len - 1; ISSPACE(buffer[i]) && i >= blen + 2; i--)
 550                 buffer[i] = '\0';
 551
 552             /* Check for the end boundary */
 553             final = buffer[blen + 3] == '-' && buffer[blen + 4] == '-';
 554             if (final)
 555                 break;
 556
 557             if (buffer[2 + blen] == '\0') {
 558                 BODY *new = mutt_read_mime_header(fp, digest);
 559
 560                 /*
 561                  * Consistency checking - catch
 562                  * bad attachment end boundaries
 563                  */
 564
 565                 if (new->offset > end_off) {
 566                     mutt_free_body(&new);
 567                     break;
 568                 }
 569
 570                 if (*last)
 571                     last = &(*last)->next;
 572                 *last = new;
 573             }
 574         }
 575     }
 576
 577     /* in case of missing end boundary, set the length to something reasonable */
 578     if (*last && (*last)->length == 0 && !final)
 579         (*last)->length = end_off - (*last)->offset;
 580
 581     /* parse recursive MIME parts */
 582     {
 583         BODY *b;
 584         for (b = head; b; b = b->next)
 585             mutt_parse_part(fp, b);
 586     }
 587
 588     return (head);
 589 }
 590
 591 static const char *
 592 uncomment_timezone(char *buf, size_t buflen, const char *tz)
 593 {
 594     char *p;
 595
 596     if (*tz != '(')
 597         return tz;                  /* no need to do anything */
 598
 599     tz = vskipspaces(tz + 1);
 600     p = strpbrk(tz, " )");
 601     if (!p)
 602         return tz;
 603
 604     m_strncpy(buf, buflen, tz, p - tz);
 605     return buf;
 606 }
 607
 608 static struct tz_t {
 609     char tzname[5];
 610     unsigned char zhours;
 611     unsigned char zminutes;
 612     unsigned char zoccident;      /* west of UTC? */
 613 } TimeZones[] = {
 614     {"aat", 1, 0, 1},             /* Atlantic Africa Time */
 615     {"adt", 4, 0, 0},             /* Arabia DST */
 616     {"ast", 3, 0, 0},             /* Arabia */
 617     /*{ "ast",   4,  0, 1 }, *//* Atlantic */
 618     {"bst", 1, 0, 0},             /* British DST */
 619     {"cat", 1, 0, 0},             /* Central Africa */
 620     {"cdt", 5, 0, 1},
 621     {"cest", 2, 0, 0},            /* Central Europe DST */
 622     {"cet", 1, 0, 0},             /* Central Europe */
 623     {"cst", 6, 0, 1},
 624     /*{ "cst",   8,  0, 0 }, *//* China */
 625     /*{ "cst",   9, 30, 0 }, *//* Australian Central Standard Time */
 626     {"eat", 3, 0, 0},             /* East Africa */
 627     {"edt", 4, 0, 1},
 628     {"eest", 3, 0, 0},            /* Eastern Europe DST */
 629     {"eet", 2, 0, 0},             /* Eastern Europe */
 630     {"egst", 0, 0, 0},            /* Eastern Greenland DST */
 631     {"egt", 1, 0, 1},             /* Eastern Greenland */
 632     {"est", 5, 0, 1},
 633     {"gmt", 0, 0, 0},
 634     {"gst", 4, 0, 0},             /* Presian Gulf */
 635     {"hkt", 8, 0, 0},             /* Hong Kong */
 636     {"ict", 7, 0, 0},             /* Indochina */
 637     {"idt", 3, 0, 0},             /* Israel DST */
 638     {"ist", 2, 0, 0},             /* Israel */
 639     /*{ "ist",   5, 30, 0 }, *//* India */
 640     {"jst", 9, 0, 0},             /* Japan */
 641     {"kst", 9, 0, 0},             /* Korea */
 642     {"mdt", 6, 0, 1},
 643     {"met", 1, 0, 0},             /* this is now officially CET */
 644     {"msd", 4, 0, 0},             /* Moscow DST */
 645     {"msk", 3, 0, 0},             /* Moscow */
 646     {"mst", 7, 0, 1},
 647     {"nzdt", 13, 0, 0},           /* New Zealand DST */
 648     {"nzst", 12, 0, 0},           /* New Zealand */
 649     {"pdt", 7, 0, 1},
 650     {"pst", 8, 0, 1},
 651     {"sat", 2, 0, 0},             /* South Africa */
 652     {"smt", 4, 0, 0},             /* Seychelles */
 653     {"sst", 11, 0, 1},            /* Samoa */
 654     /*{ "sst",   8,  0, 0 }, *//* Singapore */
 655     {"utc", 0, 0, 0},
 656     {"wat", 0, 0, 0},             /* West Africa */
 657     {"west", 1, 0, 0},            /* Western Europe DST */
 658     {"wet", 0, 0, 0},             /* Western Europe */
 659     {"wgst", 2, 0, 1},            /* Western Greenland DST */
 660     {"wgt", 3, 0, 1},             /* Western Greenland */
 661     {"wst", 8, 0, 0},             /* Western Australia */
 662 };
 663
 664 /* parses a date string in RFC822 format:
 665  *
 666  * Date: [ weekday , ] day-of-month month year hour:minute:second timezone
 667  *
 668  * This routine assumes that `h' has been initialized to 0.  the `timezone'
 669  * field is optional, defaulting to +0000 if missing.
 670  */
 671 time_t mutt_parse_date(const char *s, HEADER *h)
 672 {
 673     int zhours = 0, zminutes = 0, zoccident = 0;
 674     char scratch[SHORT_STRING];
 675     struct tm tm;
 676     int count = 0;
 677     char *p;
 678
 679     /* Don't modify our argument. Fixed-size buffer is ok here since
 680        the date format imposes a natural limit.  */
 681
 682     m_strcpy(scratch, sizeof(scratch), s);
 683
 684     /* kill the day of the week, if it exists. */
 685     p = strchr(scratch, ',');
 686     p = vskipspaces(p ? p + 1 : scratch);
 687
 688     p_clear(&tm, 1);
 689
 690     while ((p = strtok (p, " \t")) != NULL) {
 691         char tzstr[SHORT_STRING];
 692         const char *ptz;
 693
 694         switch (count) {
 695           case 0:                    /* day of the month */
 696             if (!isdigit((unsigned char)*p))
 697                 return -1;
 698             tm.tm_mday = atoi(p);
 699             if (tm.tm_mday > 31)
 700                 return -1;
 701             break;
 702
 703           case 1:                    /* month of the year */
 704             tm.tm_mon = mutt_check_month(p);
 705             if (tm.tm_mon < 0)
 706                 return -1;
 707             break;
 708
 709           case 2:                    /* year */
 710             tm.tm_year = atoi(p);
 711             if (tm.tm_year < 50)
 712                 tm.tm_year += 100;
 713             else if (tm.tm_year >= 1900)
 714                 tm.tm_year -= 1900;
 715             break;
 716
 717           case 3:                    /* time of day */
 718             tm.tm_hour = strtol(p, &p, 10);
 719             if (*p++ != ':')
 720                 return -1;
 721             tm.tm_min  = strtol(p, &p, 10);
 722             if (*p++ == ':') {
 723                 tm.tm_sec = strtol(p, &p, 10);
 724             } else {
 725                 tm.tm_sec = 0;
 726             }
 727             break;
 728
 729           case 4:                    /* timezone */
 730             /* sometimes we see things like (MST) or (-0700) so attempt to
 731              * compensate by uncommenting the string if non-RFC822 compliant
 732              */
 733             ptz = uncomment_timezone(tzstr, sizeof(tzstr), p);
 734
 735             if (*ptz == '+' || *ptz == '-') {
 736                 if (isdigit((unsigned char)ptz[1])
 737                 &&  isdigit((unsigned char)ptz[2])
 738                 &&  isdigit((unsigned char)ptz[3])
 739                 &&  isdigit((unsigned char)ptz[4]))
 740                 {
 741                     zoccident = ptz[0] == '-';
 742                     zhours    = (ptz[1] - '0') * 10 + (ptz[2] - '0');
 743                     zminutes  = (ptz[3] - '0') * 10 + (ptz[4] - '0');
 744                 }
 745             } else {
 746                 struct tz_t *tz;
 747
 748                 /* This is safe to do: A pointer to a struct equals a pointer to its
 749                  * first element*/
 750                 tz = bsearch(ptz, TimeZones, countof(TimeZones), sizeof(TimeZones[0]),
 751                              (int (*)(const void *, const void *))ascii_strcasecmp);
 752
 753                 if (tz) {
 754                     zhours = tz->zhours;
 755                     zminutes = tz->zminutes;
 756                     zoccident = tz->zoccident;
 757                 }
 758
 759                 /* ad hoc support for the European MET (now officially CET) TZ */
 760                 if (ascii_strcasecmp(p, "MET") == 0) {
 761                     if ((p = strtok (NULL, " \t")) && !ascii_strcasecmp(p, "DST")) {
 762                         zhours++;
 763                     }
 764                 }
 765             }
 766             break;
 767         }
 768         count++;
 769         p = NULL;
 770     }
 771
 772     if (count < 4) {  /* don't check for missing timezone */
 773         debug_print (1, ("error parsing date format, using received time\n"));
 774         return -1;
 775     }
 776
 777     if (h) {
 778         h->zhours    = zhours;
 779         h->zminutes  = zminutes;
 780         h->zoccident = zoccident;
 781     }
 782
 783     return mutt_mktime(&tm, 0) + (zoccident ? 1 : -1) * (zhours * 3600 + zminutes * 60);
 784 }
 785
 786 #include "rfc822hdrs.h"
 787
 788 void mutt_parse_rfc822_line(ENVELOPE *e, HEADER *hdr, char *line, char *p,
 789                             short user_hdrs, short weed, short do_2047,
 790                             LIST **lastp)
 791 {
 792     switch (rfc822_well_known(line)) {
 793       case HDR_APPARENTLY_FROM:
 794         e->from = rfc822_parse_adrlist (e->from, p);
 795         break;
 796
 797       case HDR_APPARENTLY_TO:
 798         e->to = rfc822_parse_adrlist (e->to, p);
 799         break;
 800
 801       case HDR_BCC:
 802         e->bcc = rfc822_parse_adrlist (e->bcc, p);
 803         break;
 804
 805       case HDR_CC:
 806         e->cc = rfc822_parse_adrlist (e->cc, p);
 807         break;
 808
 809       case HDR_CONTENT_DESCRIPTION:
 810         if (hdr) {
 811             m_strreplace(&hdr->content->description, p);
 812             rfc2047_decode(&hdr->content->description);
 813         }
 814         break;
 815
 816       case HDR_CONTENT_DISPOSITION:
 817         if (hdr)
 818             parse_content_disposition(p, hdr->content);
 819         break;
 820
 821       case HDR_CONTENT_LENGTH:
 822         if (hdr) {
 823             if ((hdr->content->length = atoi(p)) < 0)
 824                 hdr->content->length = -1;
 825         }
 826         break;
 827
 828       case HDR_CONTENT_TRANSFER_ENCODING:
 829         if (hdr)
 830             hdr->content->encoding = mutt_check_encoding(p);
 831         break;
 832
 833       case HDR_CONTENT_TYPE:
 834         if (hdr)
 835             mutt_parse_content_type (p, hdr->content);
 836         break;
 837
 838       case HDR_DATE:
 839         m_strreplace(&e->date, p);
 840         if (hdr)
 841             hdr->date_sent = mutt_parse_date (p, hdr);
 842         break;
 843
 844       case HDR_EXPIRES:
 845         if (hdr && mutt_parse_date (p, NULL) < time (NULL))
 846             hdr->expired = 1;
 847         break;
 848
 849 #ifdef USE_NNTP
 850       case HDR_FOLLOWUP_TO:
 851         if (!e->followup_to) {
 852             m_strrtrim(p);
 853             e->followup_to = m_strdup(skipspaces(p));
 854         }
 855         break;
 856 #endif
 857
 858       case HDR_FROM:
 859         e->from = rfc822_parse_adrlist(e->from, p);
 860         /* don't leave from info NULL if there's an invalid address (or
 861          * whatever) in From: field; mutt would just display it as empty
 862          * and mark mail/(esp.) news article as your own. aaargh! this
 863          * bothered me for _years_ */
 864         if (!e->from) {
 865             e->from = address_new();
 866             e->from->personal = m_strdup(p);
 867         }
 868         break;
 869
 870       case HDR_IN_REPLY_TO:
 871         mutt_free_list(&e->in_reply_to);
 872         e->in_reply_to = mutt_parse_references(p, 1);
 873         break;
 874
 875       case HDR_LINES:
 876         if (hdr) {
 877             /* HACK - mutt has, for a very short time, produced negative
 878                Lines header values.  Ignore them. */
 879             hdr->lines = MAX(0, atoi(p));
 880         }
 881         break;
 882
 883       case HDR_LIST_POST:
 884         /* RFC 2369.  FIXME: We should ignore whitespace, but don't. */
 885         if (strncmp(p, "NO", 2)) {
 886             char *beg, *end;
 887
 888             for (beg = strchr (p, '<'); beg; beg = strchr (end, ',')) {
 889                 ++beg;
 890                 if (!(end = strchr (beg, '>')))
 891                     break;
 892
 893                 /* Take the first mailto URL */
 894                 if (url_check_scheme (beg) == U_MAILTO) {
 895                     p_delete(&e->list_post);
 896                     e->list_post = p_dupstr(beg, end - beg);
 897                     break;
 898                 }
 899             }
 900         }
 901         break;
 902
 903       case HDR_MAIL_FOLLOWUP_TO:
 904         e->mail_followup_to = rfc822_parse_adrlist(e->mail_followup_to, p);
 905         break;
 906
 907       case HDR_MAIL_REPLY_TO:
 908         address_delete (&e->reply_to);
 909         e->reply_to = rfc822_parse_adrlist(e->reply_to, p);
 910         break;
 911
 912       case HDR_MESSAGE_ID:
 913         {
 914             const char *beg, *end;
 915
 916             /* We add a new "Message-ID:" when building a message */
 917             p_delete(&e->message_id);
 918
 919             if ((beg = strchr(p, '<')) && (end = strchr(beg, '>')))
 920                 e->message_id = p_dupstr(beg, (end - beg) + 1);
 921         }
 922         break;
 923
 924       case HDR_MIME_VERSION:
 925         if (hdr)
 926             hdr->mime = 1;
 927         break;
 928
 929 #ifdef USE_NNTP
 930       case HDR_NEWSGROUPS:
 931         p_delete(&e->newsgroups);
 932         m_strrtrim(p);
 933         e->newsgroups = m_strdup(skipspaces(p));
 934         break;
 935 #endif
 936
 937       case HDR_ORGANIZATION:
 938         if (!e->organization && m_strcasecmp(p, "unknown"))
 939             e->organization = m_strdup(p);
 940         break;
 941
 942       case HDR_RECEIVED:
 943         if (hdr && !hdr->received) {
 944             char *d = strchr(p, ';');
 945             if (d)
 946                 hdr->received = mutt_parse_date(d + 1, NULL);
 947         }
 948         break;
 949
 950       case HDR_REFERENCES:
 951         mutt_free_list(&e->references);
 952         e->references = mutt_parse_references(p, 0);
 953         break;
 954
 955       case HDR_REPLY_TO:
 956         e->reply_to = rfc822_parse_adrlist(e->reply_to, p);
 957         break;
 958
 959       case HDR_RETURN_PATH:
 960         e->return_path = rfc822_parse_adrlist(e->return_path, p);
 961         break;
 962
 963       case HDR_SENDER:
 964         e->sender = rfc822_parse_adrlist (e->sender, p);
 965         break;
 966
 967       case HDR_STATUS:
 968         if (hdr) {
 969             while (*p) {
 970                 switch (*p) {
 971                   case 'r':
 972                     hdr->replied = 1;
 973                     break;
 974                   case 'O':
 975                     hdr->old = 1;
 976                     break;
 977                   case 'R':
 978                     hdr->read = 1;
 979                     break;
 980                 }
 981                 p++;
 982             }
 983         }
 984         break;
 985
 986       case HDR_SUBJECT:
 987         if (!e->subject)
 988             e->subject = m_strdup(p);
 989         break;
 990
 991       case HDR_SUPERCEDES:
 992       case HDR_SUPERSEDES:
 993         if (hdr)
 994             e->supersedes = m_strdup(p);
 995         break;
 996
 997       case HDR_TO:
 998         e->to = rfc822_parse_adrlist(e->to, p);
 999         break;
1000
1001 #ifdef USE_NNTP
1002       case HDR_X_COMMENT_TO:
1003         if (!e->x_comment_to)
1004             e->x_comment_to = m_strdup(p);
1005         break;
1006 #endif
1007
1008       case HDR_X_LABEL:
1009         e->x_label = m_strdup(p);
1010         break;
1011
1012 #ifdef USE_NNTP
1013       case HDR_XREF:
1014         if (!e->xref)
1015             e->xref = m_strdup(p);
1016         break;
1017 #endif
1018
1019       case HDR_X_STATUS:
1020         if (hdr) {
1021             while (*p) {
1022                 switch (*p) {
1023                   case 'A':
1024                     hdr->replied = 1;
1025                     break;
1026                   case 'D':
1027                     hdr->deleted = 1;
1028                     break;
1029                   case 'F':
1030                     hdr->flagged = 1;
1031                     break;
1032                   default:
1033                     break;
1034                 }
1035                 p++;
1036             }
1037         }
1038         break;
1039
1040       default:
1041         if (!user_hdrs) {
1042             return;
1043         }
1044         /* restore the original line */
1045         line[m_strlen(line)] = ':';
1046
1047         if (weed && option(OPTWEED) && mutt_matches_ignore(line, Ignore)
1048         && !mutt_matches_ignore(line, UnIgnore)) {
1049             return;
1050         }
1051
1052         if (*lastp) {
1053             (*lastp)->next = mutt_new_list();
1054             (*lastp) = (*lastp)->next;
1055         } else {
1056             (*lastp) = e->userhdrs = mutt_new_list ();
1057         }
1058
1059         (*lastp)->data = m_strdup(line);
1060         if (do_2047)
1061             rfc2047_decode(&(*lastp)->data);
1062     }
1063 }
1064
1065 /*** XXX: MC READ MARK ***/
1066
1067
1068
1069 /* mutt_read_rfc822_header() -- parses a RFC822 header
1070  *
1071  * Args:
1072  *
1073  * f            stream to read from
1074  *
1075  * hdr          header structure of current message (optional).
1076  *
1077  * user_hdrs    If set, store user headers.  Used for recall-message and
1078  *              postpone modes.
1079  *
1080  * weed         If this parameter is set and the user has activated the
1081  *              $weed option, honor the header weed list for user headers.
1082  *              Used for recall-message.
1083  *
1084  * Returns:     newly allocated envelope structure.  You should free it by
1085  *              envelope_delete() when envelope stay unneeded.
1086  */
1087 ENVELOPE *mutt_read_rfc822_header (FILE * f, HEADER * hdr, short user_hdrs,
1088                                    short weed)
1089 {
1090   ENVELOPE *e = envelope_new();
1091   LIST *last = NULL;
1092   char *line = p_new(char, LONG_STRING);
1093   char *p;
1094   off_t loc;
1095   ssize_t linelen = LONG_STRING;
1096   char buf[LONG_STRING + 1];
1097
1098   if (hdr) {
1099     if (hdr->content == NULL) {
1100       hdr->content = mutt_new_body ();
1101
1102       /* set the defaults from RFC1521 */
1103       hdr->content->type = TYPETEXT;
1104       hdr->content->subtype = m_strdup("plain");
1105       hdr->content->encoding = ENC7BIT;
1106       hdr->content->length = -1;
1107
1108       /* RFC 2183 says this is arbitrary */
1109       hdr->content->disposition = DISPINLINE;
1110     }
1111   }
1112
1113   while ((loc = ftello (f)),
1114          mutt_read_rfc822_line (f, &line, &linelen))
1115   {
1116     if ((p = strpbrk (line, ": \t")) == NULL || *p != ':') {
1117       char return_path[LONG_STRING];
1118       time_t t;
1119
1120       /* some bogus MTAs will quote the original "From " line */
1121       if (m_strncmp(">From ", line, 6) == 0)
1122         continue;               /* just ignore */
1123       else if (is_from (line, return_path, sizeof (return_path), &t)) {
1124         /* MH somtimes has the From_ line in the middle of the header! */
1125         if (hdr && !hdr->received)
1126           hdr->received = t - mutt_local_tz (t);
1127         continue;
1128       }
1129
1130       fseeko (f, loc, 0);
1131       break;                    /* end of header */
1132     }
1133
1134     *buf = '\0';
1135
1136     if (mutt_match_spam_list (line, SpamList, buf, sizeof (buf))) {
1137       if (!rx_list_match (NoSpamList, line)) {
1138
1139         /* if spam tag already exists, figure out how to amend it */
1140         if (e->spam && *buf) {
1141           /* If SpamSep defined, append with separator */
1142           if (SpamSep) {
1143             mutt_buffer_addstr (e->spam, SpamSep);
1144             mutt_buffer_addstr (e->spam, buf);
1145           }
1146
1147           /* else overwrite */
1148           else {
1149             e->spam->dptr = e->spam->data;
1150             *e->spam->dptr = '\0';
1151             mutt_buffer_addstr (e->spam, buf);
1152           }
1153         }
1154
1155         /* spam tag is new, and match expr is non-empty; copy */
1156         else if (!e->spam && *buf) {
1157           e->spam = mutt_buffer_from (NULL, buf);
1158         }
1159
1160         /* match expr is empty; plug in null string if no existing tag */
1161         else if (!e->spam) {
1162           e->spam = mutt_buffer_from (NULL, "");
1163         }
1164
1165         if (e->spam && e->spam->data)
1166           debug_print (5, ("spam = %s\n", e->spam->data));
1167       }
1168     }
1169
1170     *p++ = 0;
1171     p = vskipspaces(p);
1172     if (!*p)
1173       continue;                 /* skip empty header fields */
1174
1175     mutt_parse_rfc822_line (e, hdr, line, p, user_hdrs, weed, 1, &last);
1176   }
1177
1178   p_delete(&line);
1179
1180   if (hdr) {
1181     hdr->content->hdr_offset = hdr->offset;
1182     hdr->content->offset = ftello (f);
1183     rfc2047_decode_envelope(e);
1184     /* check for missing or invalid date */
1185     if (hdr->date_sent <= 0) {
1186       debug_print (1, ("no date found, using received "
1187                        "time from msg separator\n"));
1188       hdr->date_sent = hdr->received;
1189     }
1190   }
1191
1192   return (e);
1193 }
1194
1195 address_t *mutt_parse_adrlist (address_t * p, const char *s)
1196 {
1197   const char *q;
1198
1199   /* check for a simple whitespace separated list of addresses */
1200   if ((q = strpbrk (s, "\"<>():;,\\")) == NULL) {
1201     char tmp[HUGE_STRING];
1202     char *r;
1203
1204     m_strcpy(tmp, sizeof(tmp), s);
1205     r = tmp;
1206     while ((r = strtok (r, " \t")) != NULL) {
1207       p = rfc822_parse_adrlist (p, r);
1208       r = NULL;
1209     }
1210   }
1211   else
1212     p = rfc822_parse_adrlist (p, s);
1213
1214   return p;
1215 }
1216
1217
1218 /* Compares mime types to the ok and except lists */
1219 int count_body_parts_check(LIST **checklist, BODY *b, int dflt) {
1220   LIST *type;
1221   ATTACH_MATCH *a;
1222
1223   /* If list is null, use default behavior. */
1224   if (! *checklist) {
1225     /*return dflt;*/
1226     return 0;
1227   }
1228
1229   for (type = *checklist; type; type = type->next) {
1230     a = (ATTACH_MATCH *)type->data;
1231     debug_print(5, ("cbpc: %s %d/%s ?? %s/%s [%d]... ",
1232                dflt ? "[OK] " : "[EXCL] ",
1233                b->type, b->subtype, a->major, a->minor, a->major_int));
1234     if ((a->major_int == TYPEANY || a->major_int == b->type) &&
1235         !regexec(&a->minor_rx, b->subtype, 0, NULL, 0)) {
1236       debug_print(5, ("yes\n"));
1237       return 1;
1238     } else {
1239       debug_print(5, ("no\n"));
1240     }
1241   }
1242   return 0;
1243 }
1244
1245 #define AT_COUNT(why) { shallcount = 1; }
1246 #define AT_NOCOUNT(why) { shallcount = 0; }
1247
1248 int count_body_parts (BODY *body, int flags) {
1249   int count = 0;
1250   int shallcount, shallrecurse;
1251   BODY *bp;
1252
1253   if (body == NULL)
1254     return 0;
1255
1256   for (bp = body; bp != NULL; bp = bp->next) {
1257     /* Initial disposition is to count and not to recurse this part. */
1258     AT_COUNT("default");
1259     shallrecurse = 0;
1260
1261     debug_print(5, ("bp: desc=\"%s\"; fn=\"%s\", type=\"%d/%s\"\n",
1262                bp->description ? bp->description : ("none"),
1263                bp->filename ? bp->filename :
1264                bp->d_filename ? bp->d_filename : "(none)",
1265                bp->type, bp->subtype ? bp->subtype : "*"));
1266
1267     if (bp->type == TYPEMESSAGE) {
1268       shallrecurse = 1;
1269
1270       /* If it's an external body pointer, don't recurse it. */
1271       if (!ascii_strcasecmp (bp->subtype, "external-body"))
1272         shallrecurse = 0;
1273
1274       /* Don't count containers if they're top-level. */
1275       if (flags & M_PARTS_TOPLEVEL)
1276         AT_NOCOUNT("top-level message/*");
1277     } else if (bp->type == TYPEMULTIPART) {
1278       /* Always recurse multiparts, except multipart/alternative. */
1279       shallrecurse = 1;
1280       if (!m_strcasecmp(bp->subtype, "alternative"))
1281         shallrecurse = 0;
1282
1283       /* Don't count containers if they're top-level. */
1284       if (flags & M_PARTS_TOPLEVEL)
1285         AT_NOCOUNT("top-level multipart");
1286     }
1287
1288     if (bp->disposition == DISPINLINE &&
1289         bp->type != TYPEMULTIPART && bp->type != TYPEMESSAGE && bp == body)
1290       AT_NOCOUNT("ignore fundamental inlines");
1291
1292     /* If this body isn't scheduled for enumeration already, don't bother
1293      * profiling it further. */
1294
1295     if (shallcount) {
1296       /* Turn off shallcount if message type is not in ok list,
1297        * or if it is in except list. Check is done separately for
1298        * inlines vs. attachments.
1299        */
1300
1301       if (bp->disposition == DISPATTACH) {
1302         if (!count_body_parts_check(&AttachAllow, bp, 1))
1303           AT_NOCOUNT("attach not allowed");
1304         if (count_body_parts_check(&AttachExclude, bp, 0))
1305           AT_NOCOUNT("attach excluded");
1306       } else {
1307         if (!count_body_parts_check(&InlineAllow, bp, 1))
1308           AT_NOCOUNT("inline not allowed");
1309         if (count_body_parts_check(&InlineExclude, bp, 0))
1310           AT_NOCOUNT("excluded");
1311       }
1312     }
1313
1314     if (shallcount)
1315       count++;
1316     bp->attach_qualifies = shallcount ? 1 : 0;
1317
1318     debug_print(5, ("cbp: %p shallcount = %d\n", bp, shallcount));
1319
1320     if (shallrecurse) {
1321       debug_print(5, ("cbp: %p pre count = %d\n", bp, count));
1322       bp->attach_count = count_body_parts(bp->parts, flags & ~M_PARTS_TOPLEVEL);
1323       count += bp->attach_count;
1324       debug_print(5, ("cbp: %p post count = %d\n", bp, count));
1325     }
1326   }
1327
1328   debug_print(5, ("bp: return %d\n", count < 0 ? 0 : count));
1329   return count < 0 ? 0 : count;
1330 }
1331
1332 int mutt_count_body_parts (HEADER *hdr, int flags) {
1333   if (!option (OPTCOUNTATTACH))
1334     return (0);
1335   if (hdr->attach_valid && !(flags & M_PARTS_RECOUNT))
1336     return hdr->attach_total;
1337
1338   if (AttachAllow || AttachExclude || InlineAllow || InlineExclude)
1339     hdr->attach_total = count_body_parts(hdr->content, flags | M_PARTS_TOPLEVEL);
1340   else
1341     hdr->attach_total = 0;
1342
1343   hdr->attach_valid = 1;
1344   return hdr->attach_total;
1345 }