parse.c

   1 /*
   2  * Copyright (C) 1996-2000 Michael R. Elkins <me@mutt.org>
   3  *
   4  *     This program is free software; you can redistribute it and/or modify
   5  *     it under the terms of the GNU General Public License as published by
   6  *     the Free Software Foundation; either version 2 of the License, or
   7  *     (at your option) any later version.
   8  *
   9  *     This program is distributed in the hope that it will be useful,
  10  *     but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  *     GNU General Public License for more details.
  13  *
  14  *     You should have received a copy of the GNU General Public License
  15  *     along with this program; if not, write to the Free Software
  16  *     Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
  17  */
  18
  19 #include "mutt.h"
  20 #include "mutt_regex.h"
  21 #include "mailbox.h"
  22 #include "mime.h"
  23 #include "rfc2047.h"
  24 #include "rfc2231.h"
  25 #include "mutt_crypt.h"
  26
  27 #include <string.h>
  28 #include <ctype.h>
  29 #include <sys/stat.h>
  30 #include <stdlib.h>
  31
  32 /* Reads an arbitrarily long header field, and looks ahead for continuation
  33  * lines.  ``line'' must point to a dynamically allocated string; it is
  34  * increased if more space is required to fit the whole line.
  35  */
  36 static char *read_rfc822_line (FILE *f, char *line, size_t *linelen)
  37 {
  38   char *buf = line;
  39   char ch;
  40   size_t offset = 0;
  41
  42   FOREVER
  43   {
  44     if (fgets (buf, *linelen - offset, f) == NULL ||    /* end of file or */
  45         (ISSPACE (*line) && !offset))                   /* end of headers */
  46     {
  47       *line = 0;
  48       return (line);
  49     }
  50
  51     buf += strlen (buf) - 1;
  52     if (*buf == '\n')
  53     {
  54       /* we did get a full line. remove trailing space */
  55       while (ISSPACE (*buf))
  56         *buf-- = 0;     /* we cannot come beyond line's beginning because
  57                          * it begins with a non-space */
  58
  59       /* check to see if the next line is a continuation line */
  60       if ((ch = fgetc (f)) != ' ' && ch != '\t')
  61       {
  62         ungetc (ch, f);
  63         return (line); /* next line is a separate header field or EOH */
  64       }
  65
  66       /* eat tabs and spaces from the beginning of the continuation line */
  67       while ((ch = fgetc (f)) == ' ' || ch == '\t')
  68         ;
  69       ungetc (ch, f);
  70       *++buf = ' '; /* string is still terminated because we removed
  71                        at least one whitespace char above */
  72     }
  73
  74     buf++;
  75     offset = buf - line;
  76     if (*linelen < offset + STRING)
  77     {
  78       /* grow the buffer */
  79       *linelen += STRING;
  80       safe_realloc (&line, *linelen);
  81       buf = line + offset;
  82     }
  83   }
  84   /* not reached */
  85 }
  86
  87 LIST *mutt_parse_references (char *s, int in_reply_to)
  88 {
  89   LIST *t, *lst = NULL;
  90   int m, n = 0;
  91   char *o = NULL, *new, *at;
  92
  93   while ((s = strtok (s, " \t;")) != NULL)
  94   {
  95     /*
  96      * some mail clients add other garbage besides message-ids, so do a quick
  97      * check to make sure this looks like a valid message-id
  98      * some idiotic clients also break their message-ids between lines, deal
  99      * with that too (give up if it's more than two lines, though)
 100      */
 101     t = NULL;
 102     new = NULL;
 103
 104     if (*s == '<')
 105     {
 106       n = strlen (s);
 107       if (s[n-1] != '>')
 108       {
 109         o = s;
 110         s = NULL;
 111         continue;
 112       }
 113
 114       new = safe_strdup (s);
 115     }
 116     else if (o)
 117     {
 118       m = strlen (s);
 119       if (s[m - 1] == '>')
 120       {
 121         new = safe_malloc (sizeof (char) * (n + m + 1));
 122         strcpy (new, o);        /* __STRCPY_CHECKED__ */
 123         strcpy (new + n, s);    /* __STRCPY_CHECKED__ */
 124       }
 125     }
 126     if (new)
 127     {
 128       /* make sure that this really does look like a message-id.
 129        * it should have exactly one @, and if we're looking at
 130        * an in-reply-to header, make sure that the part before
 131        * the @ has more than eight characters or it's probably
 132        * an email address
 133        */
 134       if (!(at = strchr (new, '@')) || strchr (at + 1, '@')
 135           || (in_reply_to && at - new <= 8))
 136         FREE (&new);
 137       else
 138       {
 139         t = (LIST *) safe_malloc (sizeof (LIST));
 140         t->data = new;
 141         t->next = lst;
 142         lst = t;
 143       }
 144     }
 145     o = NULL;
 146     s = NULL;
 147   }
 148
 149   return (lst);
 150 }
 151
 152 int mutt_check_encoding (const char *c)
 153 {
 154   if (ascii_strncasecmp ("7bit", c, sizeof ("7bit")-1) == 0)
 155     return (ENC7BIT);
 156   else if (ascii_strncasecmp ("8bit", c, sizeof ("8bit")-1) == 0)
 157     return (ENC8BIT);
 158   else if (ascii_strncasecmp ("binary", c, sizeof ("binary")-1) == 0)
 159     return (ENCBINARY);
 160   else if (ascii_strncasecmp ("quoted-printable", c, sizeof ("quoted-printable")-1) == 0)
 161     return (ENCQUOTEDPRINTABLE);
 162   else if (ascii_strncasecmp ("base64", c, sizeof("base64")-1) == 0)
 163     return (ENCBASE64);
 164   else if (ascii_strncasecmp ("x-uuencode", c, sizeof("x-uuencode")-1) == 0)
 165     return (ENCUUENCODED);
 166 #ifdef SUN_ATTACHMENT
 167   else if (ascii_strncasecmp ("uuencode", c, sizeof("uuencode")-1) == 0)
 168     return (ENCUUENCODED);
 169 #endif
 170   else
 171     return (ENCOTHER);
 172 }
 173
 174 static PARAMETER *parse_parameters (const char *s)
 175 {
 176   PARAMETER *head = 0, *cur = 0, *new;
 177   char buffer[LONG_STRING];
 178   const char *p;
 179   size_t i;
 180
 181   dprint (2, (debugfile, "parse_parameters: `%s'\n", s));
 182
 183   while (*s)
 184   {
 185     if ((p = strpbrk (s, "=;")) == NULL)
 186     {
 187       dprint(1, (debugfile, "parse_parameters: malformed parameter: %s\n", s));
 188       goto bail;
 189     }
 190
 191     /* if we hit a ; now the parameter has no value, just skip it */
 192     if (*p != ';')
 193     {
 194       i = p - s;
 195
 196       new = mutt_new_parameter ();
 197
 198       new->attribute = safe_malloc (i + 1);
 199       memcpy (new->attribute, s, i);
 200       new->attribute[i] = 0;
 201
 202       /* remove whitespace from the end of the attribute name */
 203       while (ISSPACE (new->attribute[--i]))
 204         new->attribute[i] = 0;
 205
 206       s = p + 1; /* skip over the = */
 207       SKIPWS (s);
 208
 209       if (*s == '"')
 210       {
 211         s++;
 212         for (i=0; *s && *s != '"' && i < sizeof (buffer) - 1; i++, s++)
 213         {
 214           if (*s == '\\')
 215           {
 216             /* Quote the next character */
 217             buffer[i] = s[1];
 218             if (!*++s)
 219               break;
 220           }
 221           else
 222             buffer[i] = *s;
 223         }
 224         buffer[i] = 0;
 225         if (*s)
 226           s++; /* skip over the " */
 227       }
 228       else
 229       {
 230         for (i=0; *s && *s != ' ' && *s != ';' && i < sizeof (buffer) - 1; i++, s++)
 231           buffer[i] = *s;
 232         buffer[i] = 0;
 233       }
 234
 235       new->value = safe_strdup (buffer);
 236
 237       dprint (2, (debugfile, "parse_parameter: `%s' = `%s'\n",
 238                   new->attribute ? new->attribute : "",
 239                   new->value ? new->value : ""));
 240
 241       /* Add this parameter to the list */
 242       if (head)
 243       {
 244         cur->next = new;
 245         cur = cur->next;
 246       }
 247       else
 248         head = cur = new;
 249     }
 250     else
 251     {
 252       dprint (1, (debugfile, "parse_parameters(): parameter with no value: %s\n", s));
 253       s = p;
 254     }
 255
 256     /* Find the next parameter */
 257     if (*s != ';' && (s = strchr (s, ';')) == NULL)
 258         break; /* no more parameters */
 259
 260     do
 261     {
 262       s++;
 263
 264       /* Move past any leading whitespace */
 265       SKIPWS (s);
 266     }
 267     while (*s == ';'); /* skip empty parameters */
 268   }
 269
 270   bail:
 271
 272   rfc2231_decode_parameters (&head);
 273   return (head);
 274 }
 275
 276 int mutt_check_mime_type (const char *s)
 277 {
 278   if (ascii_strcasecmp ("text", s) == 0)
 279     return TYPETEXT;
 280   else if (ascii_strcasecmp ("multipart", s) == 0)
 281     return TYPEMULTIPART;
 282 #ifdef SUN_ATTACHMENT
 283   else if (ascii_strcasecmp ("x-sun-attachment", s) == 0)
 284     return TYPEMULTIPART;
 285 #endif
 286   else if (ascii_strcasecmp ("application", s) == 0)
 287     return TYPEAPPLICATION;
 288   else if (ascii_strcasecmp ("message", s) == 0)
 289     return TYPEMESSAGE;
 290   else if (ascii_strcasecmp ("image", s) == 0)
 291     return TYPEIMAGE;
 292   else if (ascii_strcasecmp ("audio", s) == 0)
 293     return TYPEAUDIO;
 294   else if (ascii_strcasecmp ("video", s) == 0)
 295     return TYPEVIDEO;
 296   else if (ascii_strcasecmp ("model", s) == 0)
 297     return TYPEMODEL;
 298   else
 299     return TYPEOTHER;
 300 }
 301
 302 void mutt_parse_content_type (char *s, BODY *ct)
 303 {
 304   char *pc;
 305   char *subtype;
 306
 307   FREE (&ct->subtype);
 308   mutt_free_parameter(&ct->parameter);
 309
 310   /* First extract any existing parameters */
 311   if ((pc = strchr(s, ';')) != NULL)
 312   {
 313     *pc++ = 0;
 314     while (*pc && ISSPACE (*pc))
 315       pc++;
 316     ct->parameter = parse_parameters(pc);
 317
 318     /* Some pre-RFC1521 gateways still use the "name=filename" convention,
 319      * but if a filename has already been set in the content-disposition,
 320      * let that take precedence, and don't set it here */
 321     if ((pc = mutt_get_parameter( "name", ct->parameter)) != 0 && !ct->filename)
 322       ct->filename = safe_strdup(pc);
 323
 324 #ifdef SUN_ATTACHMENT
 325     /* this is deep and utter perversion */
 326     if ((pc = mutt_get_parameter ("conversions", ct->parameter)) != 0)
 327       ct->encoding = mutt_check_encoding (pc);
 328 #endif
 329
 330   }
 331
 332   /* Now get the subtype */
 333   if ((subtype = strchr(s, '/')))
 334   {
 335     *subtype++ = '\0';
 336     for(pc = subtype; *pc && !ISSPACE(*pc) && *pc != ';'; pc++)
 337       ;
 338     *pc = '\0';
 339     ct->subtype = safe_strdup (subtype);
 340   }
 341
 342   /* Finally, get the major type */
 343   ct->type = mutt_check_mime_type (s);
 344
 345 #ifdef SUN_ATTACHMENT
 346   if (ascii_strcasecmp ("x-sun-attachment", s) == 0)
 347       ct->subtype = safe_strdup ("x-sun-attachment");
 348 #endif
 349
 350   if (ct->type == TYPEOTHER)
 351   {
 352     ct->xtype = safe_strdup (s);
 353   }
 354
 355   if (ct->subtype == NULL)
 356   {
 357     /* Some older non-MIME mailers (i.e., mailtool, elm) have a content-type
 358      * field, so we can attempt to convert the type to BODY here.
 359      */
 360     if (ct->type == TYPETEXT)
 361       ct->subtype = safe_strdup ("plain");
 362     else if (ct->type == TYPEAUDIO)
 363       ct->subtype = safe_strdup ("basic");
 364     else if (ct->type == TYPEMESSAGE)
 365       ct->subtype = safe_strdup ("rfc822");
 366     else if (ct->type == TYPEOTHER)
 367     {
 368       char buffer[SHORT_STRING];
 369
 370       ct->type = TYPEAPPLICATION;
 371       snprintf (buffer, sizeof (buffer), "x-%s", s);
 372       ct->subtype = safe_strdup (buffer);
 373     }
 374     else
 375       ct->subtype = safe_strdup ("x-unknown");
 376   }
 377
 378   /* Default character set for text types. */
 379   if (ct->type == TYPETEXT)
 380   {
 381     if (!(pc = mutt_get_parameter ("charset", ct->parameter)))
 382       mutt_set_parameter ("charset", "us-ascii", &ct->parameter);
 383   }
 384
 385 }
 386
 387 static void parse_content_disposition (char *s, BODY *ct)
 388 {
 389   PARAMETER *parms;
 390
 391   if (!ascii_strncasecmp ("inline", s, 6))
 392     ct->disposition = DISPINLINE;
 393   else if (!ascii_strncasecmp ("form-data", s, 9))
 394     ct->disposition = DISPFORMDATA;
 395   else
 396     ct->disposition = DISPATTACH;
 397
 398   /* Check to see if a default filename was given */
 399   if ((s = strchr (s, ';')) != NULL)
 400   {
 401     s++;
 402     SKIPWS (s);
 403     if ((s = mutt_get_parameter ("filename", (parms = parse_parameters (s)))) != 0)
 404       mutt_str_replace (&ct->filename, s);
 405     if ((s = mutt_get_parameter ("name", parms)) != 0)
 406       ct->form_name = safe_strdup (s);
 407     mutt_free_parameter (&parms);
 408   }
 409 }
 410
 411 /* args:
 412  *      fp      stream to read from
 413  *
 414  *      digest  1 if reading subparts of a multipart/digest, 0
 415  *              otherwise
 416  */
 417
 418 BODY *mutt_read_mime_header (FILE *fp, int digest)
 419 {
 420   BODY *p = mutt_new_body();
 421   char *c;
 422   char *line = safe_malloc (LONG_STRING);
 423   size_t linelen = LONG_STRING;
 424
 425   p->hdr_offset  = ftell(fp);
 426
 427   p->encoding    = ENC7BIT; /* default from RFC1521 */
 428   p->type        = digest ? TYPEMESSAGE : TYPETEXT;
 429   p->disposition = DISPINLINE;
 430
 431   while (*(line = read_rfc822_line (fp, line, &linelen)) != 0)
 432   {
 433     /* Find the value of the current header */
 434     if ((c = strchr (line, ':')))
 435     {
 436       *c = 0;
 437       c++;
 438       SKIPWS (c);
 439       if (!*c)
 440       {
 441         dprint (1, (debugfile, "mutt_read_mime_header(): skipping empty header field: %s\n", line));
 442         continue;
 443       }
 444     }
 445     else
 446     {
 447       dprint (1, (debugfile, "read_mime_header: bogus MIME header: %s\n", line));
 448       break;
 449     }
 450
 451     if (!ascii_strncasecmp ("content-", line, 8))
 452     {
 453       if (!ascii_strcasecmp ("type", line + 8))
 454         mutt_parse_content_type (c, p);
 455       else if (!ascii_strcasecmp ("transfer-encoding", line + 8))
 456         p->encoding = mutt_check_encoding (c);
 457       else if (!ascii_strcasecmp ("disposition", line + 8))
 458         parse_content_disposition (c, p);
 459       else if (!ascii_strcasecmp ("description", line + 8))
 460       {
 461         mutt_str_replace (&p->description, c);
 462         rfc2047_decode (&p->description);
 463       }
 464     }
 465 #ifdef SUN_ATTACHMENT
 466     else if (!ascii_strncasecmp ("x-sun-", line, 6))
 467     {
 468       if (!ascii_strcasecmp ("data-type", line + 6))
 469         mutt_parse_content_type (c, p);
 470       else if (!ascii_strcasecmp ("encoding-info", line + 6))
 471         p->encoding = mutt_check_encoding (c);
 472       else if (!ascii_strcasecmp ("content-lines", line + 6))
 473         mutt_set_parameter ("content-lines", c, &(p->parameter));
 474       else if (!ascii_strcasecmp ("data-description", line + 6))
 475       {
 476         mutt_str_replace (&p->description, c);
 477         rfc2047_decode (&p->description);
 478       }
 479     }
 480 #endif
 481   }
 482   p->offset = ftell (fp); /* Mark the start of the real data */
 483   if (p->type == TYPETEXT && !p->subtype)
 484     p->subtype = safe_strdup ("plain");
 485   else if (p->type == TYPEMESSAGE && !p->subtype)
 486     p->subtype = safe_strdup ("rfc822");
 487
 488   FREE (&line);
 489
 490   return (p);
 491 }
 492
 493 void mutt_parse_part (FILE *fp, BODY *b)
 494 {
 495   char *bound = 0;
 496
 497   switch (b->type)
 498   {
 499     case TYPEMULTIPART:
 500 #ifdef SUN_ATTACHMENT
 501       if ( !ascii_strcasecmp (b->subtype, "x-sun-attachment") )
 502           bound = "--------";
 503       else
 504 #endif
 505           bound = mutt_get_parameter ("boundary", b->parameter);
 506
 507       fseek (fp, b->offset, SEEK_SET);
 508       b->parts =  mutt_parse_multipart (fp, bound,
 509                                         b->offset + b->length,
 510                                         ascii_strcasecmp ("digest", b->subtype) == 0);
 511       break;
 512
 513     case TYPEMESSAGE:
 514       if (b->subtype)
 515       {
 516         fseek (fp, b->offset, SEEK_SET);
 517         if (mutt_is_message_type(b->type, b->subtype))
 518           b->parts = mutt_parse_messageRFC822 (fp, b);
 519         else if (ascii_strcasecmp (b->subtype, "external-body") == 0)
 520           b->parts = mutt_read_mime_header (fp, 0);
 521         else
 522           return;
 523       }
 524       break;
 525
 526     default:
 527       return;
 528   }
 529
 530   /* try to recover from parsing error */
 531   if (!b->parts)
 532   {
 533     b->type = TYPETEXT;
 534     mutt_str_replace (&b->subtype, "plain");
 535   }
 536 }
 537
 538 /* parse a MESSAGE/RFC822 body
 539  *
 540  * args:
 541  *      fp              stream to read from
 542  *
 543  *      parent          structure which contains info about the message/rfc822
 544  *                      body part
 545  *
 546  * NOTE: this assumes that `parent->length' has been set!
 547  */
 548
 549 BODY *mutt_parse_messageRFC822 (FILE *fp, BODY *parent)
 550 {
 551   BODY *msg;
 552
 553   parent->hdr = mutt_new_header ();
 554   parent->hdr->offset = ftell (fp);
 555   parent->hdr->env = mutt_read_rfc822_header (fp, parent->hdr, 0, 0);
 556   msg = parent->hdr->content;
 557
 558   /* ignore the length given in the content-length since it could be wrong
 559      and we already have the info to calculate the correct length */
 560   /* if (msg->length == -1) */
 561   msg->length = parent->length - (msg->offset - parent->offset);
 562
 563   /* if body of this message is empty, we can end up with a negative length */
 564   if (msg->length < 0)
 565     msg->length = 0;
 566
 567   mutt_parse_part(fp, msg);
 568   return (msg);
 569 }
 570
 571 /* parse a multipart structure
 572  *
 573  * args:
 574  *      fp              stream to read from
 575  *
 576  *      boundary        body separator
 577  *
 578  *      end_off         length of the multipart body (used when the final
 579  *                      boundary is missing to avoid reading too far)
 580  *
 581  *      digest          1 if reading a multipart/digest, 0 otherwise
 582  */
 583
 584 BODY *mutt_parse_multipart (FILE *fp, const char *boundary, long end_off, int digest)
 585 {
 586 #ifdef SUN_ATTACHMENT
 587   int lines;
 588 #endif
 589   int blen, len, crlf = 0;
 590   char buffer[LONG_STRING];
 591   BODY *head = 0, *last = 0, *new = 0;
 592   int i;
 593   int final = 0; /* did we see the ending boundary? */
 594
 595   if (!boundary)
 596   {
 597     mutt_error _("multipart message has no boundary parameter!");
 598     return (NULL);
 599   }
 600
 601   blen = mutt_strlen (boundary);
 602   while (ftell (fp) < end_off && fgets (buffer, LONG_STRING, fp) != NULL)
 603   {
 604     len = mutt_strlen (buffer);
 605
 606     crlf =  (len > 1 && buffer[len - 2] == '\r') ? 1 : 0;
 607
 608     if (buffer[0] == '-' && buffer[1] == '-' &&
 609         mutt_strncmp (buffer + 2, boundary, blen) == 0)
 610     {
 611       if (last)
 612       {
 613         last->length = ftell (fp) - last->offset - len - 1 - crlf;
 614         if (last->parts && last->parts->length == 0)
 615           last->parts->length = ftell (fp) - last->parts->offset - len - 1 - crlf;
 616         /* if the body is empty, we can end up with a -1 length */
 617         if (last->length < 0)
 618           last->length = 0;
 619       }
 620
 621       /* Remove any trailing whitespace, up to the length of the boundary */
 622       for (i = len - 1; ISSPACE (buffer[i]) && i >= blen + 2; i--)
 623         buffer[i] = 0;
 624
 625       /* Check for the end boundary */
 626       if (mutt_strcmp (buffer + blen + 2, "--") == 0)
 627       {
 628         final = 1;
 629         break; /* done parsing */
 630       }
 631       else if (buffer[2 + blen] == 0)
 632       {
 633         new = mutt_read_mime_header (fp, digest);
 634
 635 #ifdef SUN_ATTACHMENT
 636         if (mutt_get_parameter ("content-lines", new->parameter)) {
 637           for (lines = atoi(mutt_get_parameter ("content-lines", new->parameter));
 638                lines; lines-- )
 639              if (ftell (fp) >= end_off || fgets (buffer, LONG_STRING, fp) == NULL)
 640                break;
 641         }
 642 #endif
 643
 644         /*
 645          * Consistency checking - catch
 646          * bad attachment end boundaries
 647          */
 648
 649         if(new->offset > end_off)
 650         {
 651           mutt_free_body(&new);
 652           break;
 653         }
 654         if (head)
 655         {
 656           last->next = new;
 657           last = new;
 658         }
 659         else
 660           last = head = new;
 661       }
 662     }
 663   }
 664
 665   /* in case of missing end boundary, set the length to something reasonable */
 666   if (last && last->length == 0 && !final)
 667     last->length = end_off - last->offset;
 668
 669   /* parse recursive MIME parts */
 670   for(last = head; last; last = last->next)
 671     mutt_parse_part(fp, last);
 672
 673   return (head);
 674 }
 675
 676 static const char *uncomment_timezone (char *buf, size_t buflen, const char *tz)
 677 {
 678   char *p;
 679   size_t len;
 680
 681   if (*tz != '(')
 682     return tz; /* no need to do anything */
 683   tz++;
 684   SKIPWS (tz);
 685   if ((p = strpbrk (tz, " )")) == NULL)
 686     return tz;
 687   len = p - tz;
 688   if (len > buflen - 1)
 689     len = buflen - 1;
 690   memcpy (buf, tz, len);
 691   buf[len] = 0;
 692   return buf;
 693 }
 694
 695 static struct tz_t
 696 {
 697   char tzname[5];
 698   unsigned char zhours;
 699   unsigned char zminutes;
 700   unsigned char zoccident; /* west of UTC? */
 701 }
 702 TimeZones[] =
 703 {
 704   { "aat",   1,  0, 1 }, /* Atlantic Africa Time */
 705   { "adt",   4,  0, 0 }, /* Arabia DST */
 706   { "ast",   3,  0, 0 }, /* Arabia */
 707 /*{ "ast",   4,  0, 1 },*/ /* Atlantic */
 708   { "bst",   1,  0, 0 }, /* British DST */
 709   { "cat",   1,  0, 0 }, /* Central Africa */
 710   { "cdt",   5,  0, 1 },
 711   { "cest",  2,  0, 0 }, /* Central Europe DST */
 712   { "cet",   1,  0, 0 }, /* Central Europe */
 713   { "cst",   6,  0, 1 },
 714 /*{ "cst",   8,  0, 0 },*/ /* China */
 715 /*{ "cst",   9, 30, 0 },*/ /* Australian Central Standard Time */
 716   { "eat",   3,  0, 0 }, /* East Africa */
 717   { "edt",   4,  0, 1 },
 718   { "eest",  3,  0, 0 }, /* Eastern Europe DST */
 719   { "eet",   2,  0, 0 }, /* Eastern Europe */
 720   { "egst",  0,  0, 0 }, /* Eastern Greenland DST */
 721   { "egt",   1,  0, 1 }, /* Eastern Greenland */
 722   { "est",   5,  0, 1 },
 723   { "gmt",   0,  0, 0 },
 724   { "gst",   4,  0, 0 }, /* Presian Gulf */
 725   { "hkt",   8,  0, 0 }, /* Hong Kong */
 726   { "ict",   7,  0, 0 }, /* Indochina */
 727   { "idt",   3,  0, 0 }, /* Israel DST */
 728   { "ist",   2,  0, 0 }, /* Israel */
 729 /*{ "ist",   5, 30, 0 },*/ /* India */
 730   { "jst",   9,  0, 0 }, /* Japan */
 731   { "kst",   9,  0, 0 }, /* Korea */
 732   { "mdt",   6,  0, 1 },
 733   { "met",   1,  0, 0 }, /* this is now officially CET */
 734   { "msd",   4,  0, 0 }, /* Moscow DST */
 735   { "msk",   3,  0, 0 }, /* Moscow */
 736   { "mst",   7,  0, 1 },
 737   { "nzdt", 13,  0, 0 }, /* New Zealand DST */
 738   { "nzst", 12,  0, 0 }, /* New Zealand */
 739   { "pdt",   7,  0, 1 },
 740   { "pst",   8,  0, 1 },
 741   { "sat",   2,  0, 0 }, /* South Africa */
 742   { "smt",   4,  0, 0 }, /* Seychelles */
 743   { "sst",  11,  0, 1 }, /* Samoa */
 744 /*{ "sst",   8,  0, 0 },*/ /* Singapore */
 745   { "utc",   0,  0, 0 },
 746   { "wat",   0,  0, 0 }, /* West Africa */
 747   { "west",  1,  0, 0 }, /* Western Europe DST */
 748   { "wet",   0,  0, 0 }, /* Western Europe */
 749   { "wgst",  2,  0, 1 }, /* Western Greenland DST */
 750   { "wgt",   3,  0, 1 }, /* Western Greenland */
 751   { "wst",   8,  0, 0 }, /* Western Australia */
 752 };
 753
 754 /* parses a date string in RFC822 format:
 755  *
 756  * Date: [ weekday , ] day-of-month month year hour:minute:second timezone
 757  *
 758  * This routine assumes that `h' has been initialized to 0.  the `timezone'
 759  * field is optional, defaulting to +0000 if missing.
 760  */
 761 time_t mutt_parse_date (const char *s, HEADER *h)
 762 {
 763   int count = 0;
 764   char *t;
 765   int hour, min, sec;
 766   struct tm tm;
 767   int i;
 768   int tz_offset = 0;
 769   int zhours = 0;
 770   int zminutes = 0;
 771   int zoccident = 0;
 772   const char *ptz;
 773   char tzstr[SHORT_STRING];
 774   char scratch[SHORT_STRING];
 775
 776   /* Don't modify our argument. Fixed-size buffer is ok here since
 777    * the date format imposes a natural limit.
 778    */
 779
 780   strfcpy (scratch, s, sizeof (scratch));
 781
 782   /* kill the day of the week, if it exists. */
 783   if ((t = strchr (scratch, ',')))
 784     t++;
 785   else
 786     t = scratch;
 787   SKIPWS (t);
 788
 789   memset (&tm, 0, sizeof (tm));
 790
 791   while ((t = strtok (t, " \t")) != NULL)
 792   {
 793     switch (count)
 794     {
 795       case 0: /* day of the month */
 796         if (!isdigit ((unsigned char) *t))
 797           return (-1);
 798         tm.tm_mday = atoi (t);
 799         if (tm.tm_mday > 31)
 800           return (-1);
 801         break;
 802
 803       case 1: /* month of the year */
 804         if ((i = mutt_check_month (t)) < 0)
 805           return (-1);
 806         tm.tm_mon = i;
 807         break;
 808
 809       case 2: /* year */
 810         tm.tm_year = atoi (t);
 811         if (tm.tm_year < 50)
 812           tm.tm_year += 100;
 813         else if (tm.tm_year >= 1900)
 814           tm.tm_year -= 1900;
 815         break;
 816
 817       case 3: /* time of day */
 818         if (sscanf (t, "%d:%d:%d", &hour, &min, &sec) == 3)
 819           ;
 820         else if (sscanf (t, "%d:%d", &hour, &min) == 2)
 821           sec = 0;
 822         else
 823         {
 824           dprint(1, (debugfile, "parse_date: could not process time format: %s\n", t));
 825           return(-1);
 826         }
 827         tm.tm_hour = hour;
 828         tm.tm_min = min;
 829         tm.tm_sec = sec;
 830         break;
 831
 832       case 4: /* timezone */
 833         /* sometimes we see things like (MST) or (-0700) so attempt to
 834          * compensate by uncommenting the string if non-RFC822 compliant
 835          */
 836         ptz = uncomment_timezone (tzstr, sizeof (tzstr), t);
 837
 838         if (*ptz == '+' || *ptz == '-')
 839         {
 840           if (ptz[1] && ptz[2] && ptz[3] && ptz[4]
 841               && isdigit ((unsigned char) ptz[1]) && isdigit ((unsigned char) ptz[2])
 842               && isdigit ((unsigned char) ptz[3]) && isdigit ((unsigned char) ptz[4]))
 843           {
 844             zhours = (ptz[1] - '0') * 10 + (ptz[2] - '0');
 845             zminutes = (ptz[3] - '0') * 10 + (ptz[4] - '0');
 846
 847             if (ptz[0] == '-')
 848               zoccident = 1;
 849           }
 850         }
 851         else
 852         {
 853           struct tz_t *tz;
 854
 855           tz = bsearch (ptz, TimeZones, sizeof TimeZones/sizeof (struct tz_t),
 856                         sizeof (struct tz_t),
 857                         (int (*)(const void *, const void *)) ascii_strcasecmp
 858                         /* This is safe to do: A pointer to a struct equals
 859                          * a pointer to its first element*/);
 860
 861           if (tz)
 862           {
 863             zhours = tz->zhours;
 864             zminutes = tz->zminutes;
 865             zoccident = tz->zoccident;
 866           }
 867
 868           /* ad hoc support for the European MET (now officially CET) TZ */
 869           if (ascii_strcasecmp (t, "MET") == 0)
 870           {
 871             if ((t = strtok (NULL, " \t")) != NULL)
 872             {
 873               if (!ascii_strcasecmp (t, "DST"))
 874                 zhours++;
 875             }
 876           }
 877         }
 878         tz_offset = zhours * 3600 + zminutes * 60;
 879         if (!zoccident)
 880           tz_offset = -tz_offset;
 881         break;
 882     }
 883     count++;
 884     t = 0;
 885   }
 886
 887   if (count < 4) /* don't check for missing timezone */
 888   {
 889     dprint(1,(debugfile, "parse_date(): error parsing date format, using received time\n"));
 890     return (-1);
 891   }
 892
 893   if (h)
 894   {
 895     h->zhours = zhours;
 896     h->zminutes = zminutes;
 897     h->zoccident = zoccident;
 898   }
 899
 900   return (mutt_mktime (&tm, 0) + tz_offset);
 901 }
 902
 903 /* extract the first substring that looks like a message-id */
 904 static char *extract_message_id (const char *s)
 905 {
 906   const char *p;
 907   char *r;
 908   size_t l;
 909
 910   if ((s = strchr (s, '<')) == NULL || (p = strchr (s, '>')) == NULL)
 911     return (NULL);
 912   l = (size_t)(p - s) + 1;
 913   r = safe_malloc (l + 1);
 914   memcpy (r, s, l);
 915   r[l] = 0;
 916   return (r);
 917 }
 918
 919 void mutt_parse_mime_message (CONTEXT *ctx, HEADER *cur)
 920 {
 921   MESSAGE *msg;
 922
 923   if (cur->content->type != TYPEMESSAGE && cur->content->type != TYPEMULTIPART)
 924     return; /* nothing to do */
 925
 926   if (cur->content->parts)
 927     return; /* The message was parsed earlier. */
 928
 929   if ((msg = mx_open_message (ctx, cur->msgno)))
 930   {
 931     mutt_parse_part (msg->fp, cur->content);
 932
 933     if (WithCrypto)
 934       cur->security = crypt_query (cur->content);
 935
 936     mx_close_message (&msg);
 937   }
 938 }
 939
 940 int mutt_parse_rfc822_line (ENVELOPE *e, HEADER *hdr, char *line, char *p, short user_hdrs, short weed,
 941                             short do_2047, LIST **lastp)
 942 {
 943   int matched = 0;
 944   LIST *last = NULL;
 945
 946   if (lastp)
 947     last = *lastp;
 948
 949   switch (ascii_tolower (line[0]))
 950   {
 951     case 'a':
 952     if (ascii_strcasecmp (line+1, "pparently-to") == 0)
 953     {
 954       e->to = rfc822_parse_adrlist (e->to, p);
 955       matched = 1;
 956     }
 957     else if (ascii_strcasecmp (line+1, "pparently-from") == 0)
 958     {
 959       e->from = rfc822_parse_adrlist (e->from, p);
 960       matched = 1;
 961     }
 962     break;
 963
 964     case 'b':
 965     if (ascii_strcasecmp (line+1, "cc") == 0)
 966     {
 967       e->bcc = rfc822_parse_adrlist (e->bcc, p);
 968       matched = 1;
 969     }
 970     break;
 971
 972     case 'c':
 973     if (ascii_strcasecmp (line+1, "c") == 0)
 974     {
 975       e->cc = rfc822_parse_adrlist (e->cc, p);
 976       matched = 1;
 977     }
 978     else if (ascii_strncasecmp (line + 1, "ontent-", 7) == 0)
 979     {
 980       if (ascii_strcasecmp (line+8, "type") == 0)
 981       {
 982         if (hdr)
 983           mutt_parse_content_type (p, hdr->content);
 984         matched = 1;
 985       }
 986       else if (ascii_strcasecmp (line+8, "transfer-encoding") == 0)
 987       {
 988         if (hdr)
 989           hdr->content->encoding = mutt_check_encoding (p);
 990         matched = 1;
 991       }
 992       else if (ascii_strcasecmp (line+8, "length") == 0)
 993       {
 994         if (hdr)
 995         {
 996           if ((hdr->content->length = atoi (p)) < 0)
 997             hdr->content->length = -1;
 998         }
 999         matched = 1;
1000       }
1001       else if (ascii_strcasecmp (line+8, "description") == 0)
1002       {
1003         if (hdr)
1004         {
1005           mutt_str_replace (&hdr->content->description, p);
1006           rfc2047_decode (&hdr->content->description);
1007         }
1008         matched = 1;
1009       }
1010       else if (ascii_strcasecmp (line+8, "disposition") == 0)
1011       {
1012         if (hdr)
1013           parse_content_disposition (p, hdr->content);
1014         matched = 1;
1015       }
1016     }
1017     break;
1018
1019     case 'd':
1020     if (!ascii_strcasecmp ("ate", line + 1))
1021     {
1022       mutt_str_replace (&e->date, p);
1023       if (hdr)
1024         hdr->date_sent = mutt_parse_date (p, hdr);
1025       matched = 1;
1026     }
1027     break;
1028
1029     case 'e':
1030     if (!ascii_strcasecmp ("xpires", line + 1) &&
1031         hdr && mutt_parse_date (p, NULL) < time (NULL))
1032       hdr->expired = 1;
1033     break;
1034
1035     case 'f':
1036     if (!ascii_strcasecmp ("rom", line + 1))
1037     {
1038       e->from = rfc822_parse_adrlist (e->from, p);
1039       matched = 1;
1040     }
1041 #ifdef USE_NNTP
1042     else if (!mutt_strcasecmp (line+1, "ollowup-to"))
1043     {
1044       if (!e->followup_to)
1045       {
1046         mutt_remove_trailing_ws (p);
1047         e->followup_to = safe_strdup (mutt_skip_whitespace (p));
1048       }
1049       matched = 1;
1050     }
1051 #endif
1052     break;
1053
1054     case 'i':
1055     if (!ascii_strcasecmp (line+1, "n-reply-to"))
1056     {
1057       mutt_free_list (&e->in_reply_to);
1058       e->in_reply_to = mutt_parse_references (p, 1);
1059       matched = 1;
1060     }
1061     break;
1062
1063     case 'l':
1064     if (!ascii_strcasecmp (line + 1, "ines"))
1065     {
1066       if (hdr)
1067         hdr->lines = atoi (p);
1068
1069       /*
1070        * HACK - mutt has, for a very short time, produced negative
1071        * Lines header values.  Ignore them.
1072        */
1073       if (hdr->lines < 0)
1074         hdr->lines = 0;
1075
1076       matched = 1;
1077     }
1078     break;
1079
1080     case 'm':
1081     if (!ascii_strcasecmp (line + 1, "ime-version"))
1082     {
1083       if (hdr)
1084         hdr->mime = 1;
1085       matched = 1;
1086     }
1087     else if (!ascii_strcasecmp (line + 1, "essage-id"))
1088     {
1089       /* We add a new "Message-Id:" when building a message */
1090       FREE (&e->message_id);
1091       e->message_id = extract_message_id (p);
1092       matched = 1;
1093     }
1094     else if (!ascii_strncasecmp (line + 1, "ail-", 4))
1095     {
1096       if (!ascii_strcasecmp (line + 5, "reply-to"))
1097       {
1098         /* override the Reply-To: field */
1099         rfc822_free_address (&e->reply_to);
1100         e->reply_to = rfc822_parse_adrlist (e->reply_to, p);
1101         matched = 1;
1102       }
1103       else if (!ascii_strcasecmp (line + 5, "followup-to"))
1104       {
1105         e->mail_followup_to = rfc822_parse_adrlist (e->mail_followup_to, p);
1106         matched = 1;
1107       }
1108     }
1109     break;
1110
1111 #ifdef USE_NNTP
1112     case 'n':
1113     if (!mutt_strcasecmp (line + 1, "ewsgroups"))
1114     {
1115       FREE (&e->newsgroups);
1116       mutt_remove_trailing_ws (p);
1117       e->newsgroups = safe_strdup (mutt_skip_whitespace (p));
1118       matched = 1;
1119     }
1120     break;
1121 #endif
1122
1123     case 'o':
1124     /* field `Organization:' saves only for pager! */
1125     if (!mutt_strcasecmp (line + 1, "rganization"))
1126     {
1127       if (!e->organization && mutt_strcasecmp (p, "unknown"))
1128         e->organization = safe_strdup (p);
1129     }
1130     break;
1131
1132     case 'r':
1133     if (!ascii_strcasecmp (line + 1, "eferences"))
1134     {
1135       mutt_free_list (&e->references);
1136       e->references = mutt_parse_references (p, 0);
1137       matched = 1;
1138     }
1139     else if (!ascii_strcasecmp (line + 1, "eply-to"))
1140     {
1141       e->reply_to = rfc822_parse_adrlist (e->reply_to, p);
1142       matched = 1;
1143     }
1144     else if (!ascii_strcasecmp (line + 1, "eturn-path"))
1145     {
1146       e->return_path = rfc822_parse_adrlist (e->return_path, p);
1147       matched = 1;
1148     }
1149     else if (!ascii_strcasecmp (line + 1, "eceived"))
1150     {
1151       if (hdr && !hdr->received)
1152       {
1153         char *d = strchr (p, ';');
1154
1155         if (d)
1156           hdr->received = mutt_parse_date (d + 1, NULL);
1157       }
1158     }
1159     break;
1160
1161     case 's':
1162     if (!ascii_strcasecmp (line + 1, "ubject"))
1163     {
1164       if (!e->subject)
1165         e->subject = safe_strdup (p);
1166       matched = 1;
1167     }
1168     else if (!ascii_strcasecmp (line + 1, "ender"))
1169     {
1170       e->sender = rfc822_parse_adrlist (e->sender, p);
1171       matched = 1;
1172     }
1173     else if (!ascii_strcasecmp (line + 1, "tatus"))
1174     {
1175       if (hdr)
1176       {
1177         while (*p)
1178         {
1179           switch(*p)
1180           {
1181             case 'r':
1182             hdr->replied = 1;
1183             break;
1184             case 'O':
1185               hdr->old = 1;
1186             break;
1187             case 'R':
1188             hdr->read = 1;
1189             break;
1190           }
1191           p++;
1192         }
1193       }
1194       matched = 1;
1195     }
1196     else if ((!ascii_strcasecmp ("upersedes", line + 1) ||
1197               !ascii_strcasecmp ("upercedes", line + 1)) && hdr)
1198       e->supersedes = safe_strdup (p);
1199     break;
1200
1201     case 't':
1202     if (ascii_strcasecmp (line+1, "o") == 0)
1203     {
1204       e->to = rfc822_parse_adrlist (e->to, p);
1205       matched = 1;
1206     }
1207     break;
1208
1209     case 'x':
1210     if (ascii_strcasecmp (line+1, "-status") == 0)
1211     {
1212       if (hdr)
1213       {
1214         while (*p)
1215         {
1216           switch (*p)
1217           {
1218             case 'A':
1219             hdr->replied = 1;
1220             break;
1221             case 'D':
1222             hdr->deleted = 1;
1223             break;
1224             case 'F':
1225             hdr->flagged = 1;
1226             break;
1227             default:
1228             break;
1229           }
1230           p++;
1231         }
1232       }
1233       matched = 1;
1234     }
1235     else if (ascii_strcasecmp (line+1, "-label") == 0)
1236     {
1237       e->x_label = safe_strdup(p);
1238       matched = 1;
1239     }
1240 #ifdef USE_NNTP
1241     else if (!mutt_strcasecmp (line + 1, "-comment-to"))
1242     {
1243       if (!e->x_comment_to)
1244         e->x_comment_to = safe_strdup (p);
1245       matched = 1;
1246     }
1247     else if (!mutt_strcasecmp (line + 1, "ref"))
1248     {
1249       if (!e->xref)
1250         e->xref = safe_strdup (p);
1251       matched = 1;
1252     }
1253 #endif
1254
1255     default:
1256     break;
1257   }
1258
1259   /* Keep track of the user-defined headers */
1260   if (!matched && user_hdrs)
1261   {
1262     /* restore the original line */
1263     line[strlen (line)] = ':';
1264
1265     if (weed && option (OPTWEED) && mutt_matches_ignore (line, Ignore)
1266         && !mutt_matches_ignore (line, UnIgnore))
1267       goto done;
1268
1269     if (last)
1270     {
1271       last->next = mutt_new_list ();
1272       last = last->next;
1273     }
1274     else
1275       last = e->userhdrs = mutt_new_list ();
1276     last->data = safe_strdup (line);
1277     if (do_2047)
1278       rfc2047_decode (&last->data);
1279   }
1280
1281   done:
1282
1283   *lastp = last;
1284   return matched;
1285 }
1286
1287
1288 /* mutt_read_rfc822_header() -- parses a RFC822 header
1289  *
1290  * Args:
1291  *
1292  * f            stream to read from
1293  *
1294  * hdr          header structure of current message (optional).
1295  *
1296  * user_hdrs    If set, store user headers.  Used for recall-message and
1297  *              postpone modes.
1298  *
1299  * weed         If this parameter is set and the user has activated the
1300  *              $weed option, honor the header weed list for user headers.
1301  *              Used for recall-message.
1302  *
1303  * Returns:     newly allocated envelope structure.  You should free it by
1304  *              mutt_free_envelope() when envelope stay unneeded.
1305  */
1306 ENVELOPE *mutt_read_rfc822_header (FILE *f, HEADER *hdr, short user_hdrs,
1307                                    short weed)
1308 {
1309   ENVELOPE *e = mutt_new_envelope();
1310   LIST *last = NULL;
1311   char *line = safe_malloc (LONG_STRING);
1312   char *p;
1313   long loc;
1314   int matched;
1315   size_t linelen = LONG_STRING;
1316
1317   if (hdr)
1318   {
1319     if (hdr->content == NULL)
1320     {
1321       hdr->content = mutt_new_body ();
1322
1323       /* set the defaults from RFC1521 */
1324       hdr->content->type        = TYPETEXT;
1325       hdr->content->subtype     = safe_strdup ("plain");
1326       hdr->content->encoding    = ENC7BIT;
1327       hdr->content->length      = -1;
1328
1329       /* RFC 2183 says this is arbitrary */
1330       hdr->content->disposition = DISPINLINE;
1331     }
1332   }
1333
1334   while ((loc = ftell (f)),
1335           *(line = read_rfc822_line (f, line, &linelen)) != 0)
1336   {
1337     matched = 0;
1338
1339     if ((p = strpbrk (line, ": \t")) == NULL || *p != ':')
1340     {
1341       char return_path[LONG_STRING];
1342       time_t t;
1343
1344       /* some bogus MTAs will quote the original "From " line */
1345       if (mutt_strncmp (">From ", line, 6) == 0)
1346         continue; /* just ignore */
1347       else if (is_from (line, return_path, sizeof (return_path), &t))
1348       {
1349         /* MH somtimes has the From_ line in the middle of the header! */
1350         if (hdr && !hdr->received)
1351           hdr->received = t - mutt_local_tz (t);
1352         continue;
1353       }
1354
1355       fseek (f, loc, 0);
1356       break; /* end of header */
1357     }
1358
1359     *p = 0;
1360     p++;
1361     SKIPWS (p);
1362     if (!*p)
1363       continue; /* skip empty header fields */
1364
1365     matched = mutt_parse_rfc822_line (e, hdr, line, p, user_hdrs, weed, 1, &last);
1366
1367   }
1368
1369   FREE (&line);
1370
1371   if (hdr)
1372   {
1373     hdr->content->hdr_offset = hdr->offset;
1374     hdr->content->offset = ftell (f);
1375
1376     /* do RFC2047 decoding */
1377     rfc2047_decode_adrlist (e->from);
1378     rfc2047_decode_adrlist (e->to);
1379     rfc2047_decode_adrlist (e->cc);
1380     rfc2047_decode_adrlist (e->reply_to);
1381     rfc2047_decode_adrlist (e->mail_followup_to);
1382     rfc2047_decode_adrlist (e->return_path);
1383     rfc2047_decode_adrlist (e->sender);
1384
1385     if (e->subject)
1386     {
1387       regmatch_t pmatch[1];
1388
1389       rfc2047_decode (&e->subject);
1390
1391       if (regexec (ReplyRegexp.rx, e->subject, 1, pmatch, 0) == 0)
1392         e->real_subj = e->subject + pmatch[0].rm_eo;
1393       else
1394         e->real_subj = e->subject;
1395     }
1396
1397     /* check for missing or invalid date */
1398     if (hdr->date_sent <= 0)
1399     {
1400       dprint(1,(debugfile,"read_rfc822_header(): no date found, using received time from msg separator\n"));
1401       hdr->date_sent = hdr->received;
1402     }
1403   }
1404
1405   return (e);
1406 }
1407
1408 ADDRESS *mutt_parse_adrlist (ADDRESS *p, const char *s)
1409 {
1410   const char *q;
1411
1412   /* check for a simple whitespace separated list of addresses */
1413   if ((q = strpbrk (s, "\"<>():;,\\")) == NULL)
1414   {
1415     char tmp[HUGE_STRING];
1416     char *r;
1417
1418     strfcpy (tmp, s, sizeof (tmp));
1419     r = tmp;
1420     while ((r = strtok (r, " \t")) != NULL)
1421     {
1422       p = rfc822_parse_adrlist (p, r);
1423       r = NULL;
1424     }
1425   }
1426   else
1427     p = rfc822_parse_adrlist (p, s);
1428
1429   return p;
1430 }