parse.c

   1 /*
   2  * Copyright (C) 1996-2000 Michael R. Elkins <me@mutt.org>
   3  *
   4  *     This program is free software; you can redistribute it and/or modify
   5  *     it under the terms of the GNU General Public License as published by
   6  *     the Free Software Foundation; either version 2 of the License, or
   7  *     (at your option) any later version.
   8  *
   9  *     This program is distributed in the hope that it will be useful,
  10  *     but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  *     GNU General Public License for more details.
  13  *
  14  *     You should have received a copy of the GNU General Public License
  15  *     along with this program; if not, write to the Free Software
  16  *     Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
  17  */
  18
  19 #include "mutt.h"
  20 #include "mutt_regex.h"
  21 #include "mailbox.h"
  22 #include "mime.h"
  23 #include "rfc2047.h"
  24 #include "rfc2231.h"
  25 #include "mutt_crypt.h"
  26
  27 #include <string.h>
  28 #include <ctype.h>
  29 #include <sys/stat.h>
  30 #include <stdlib.h>
  31
  32 /* Reads an arbitrarily long header field, and looks ahead for continuation
  33  * lines.  ``line'' must point to a dynamically allocated string; it is
  34  * increased if more space is required to fit the whole line.
  35  */
  36 static char *read_rfc822_line (FILE *f, char *line, size_t *linelen)
  37 {
  38   char *buf = line;
  39   char ch;
  40   size_t offset = 0;
  41
  42   FOREVER
  43   {
  44     if (fgets (buf, *linelen - offset, f) == NULL ||    /* end of file or */
  45         (ISSPACE (*line) && !offset))                   /* end of headers */
  46     {
  47       *line = 0;
  48       return (line);
  49     }
  50
  51     buf += strlen (buf) - 1;
  52     if (*buf == '\n')
  53     {
  54       /* we did get a full line. remove trailing space */
  55       while (ISSPACE (*buf))
  56         *buf-- = 0;     /* we cannot come beyond line's beginning because
  57                          * it begins with a non-space */
  58
  59       /* check to see if the next line is a continuation line */
  60       if ((ch = fgetc (f)) != ' ' && ch != '\t')
  61       {
  62         ungetc (ch, f);
  63         return (line); /* next line is a separate header field or EOH */
  64       }
  65
  66       /* eat tabs and spaces from the beginning of the continuation line */
  67       while ((ch = fgetc (f)) == ' ' || ch == '\t')
  68         ;
  69       ungetc (ch, f);
  70       *++buf = ' '; /* string is still terminated because we removed
  71                        at least one whitespace char above */
  72     }
  73
  74     buf++;
  75     offset = buf - line;
  76     if (*linelen < offset + STRING)
  77     {
  78       /* grow the buffer */
  79       *linelen += STRING;
  80       safe_realloc (&line, *linelen);
  81       buf = line + offset;
  82     }
  83   }
  84   /* not reached */
  85 }
  86
  87 static LIST *mutt_add_x_face (LIST *lst, char *face)
  88 {
  89   LIST *n;
  90
  91   n = safe_malloc(sizeof(LIST));
  92   n->data = safe_strdup(face);
  93   n->next = NULL;
  94
  95   if (lst)
  96   {
  97     LIST *l;
  98
  99     for(l = lst; l->next; l = l->next);
 100     l->next = n;
 101   }
 102   else
 103     lst = n;
 104
 105   return lst;
 106 }
 107
 108 LIST *mutt_parse_references (char *s, int in_reply_to)
 109 {
 110   LIST *t, *lst = NULL;
 111   int m, n = 0;
 112   char *o = NULL, *new, *at;
 113
 114   while ((s = strtok (s, " \t;")) != NULL)
 115   {
 116     /*
 117      * some mail clients add other garbage besides message-ids, so do a quick
 118      * check to make sure this looks like a valid message-id
 119      * some idiotic clients also break their message-ids between lines, deal
 120      * with that too (give up if it's more than two lines, though)
 121      */
 122     t = NULL;
 123     new = NULL;
 124
 125     if (*s == '<')
 126     {
 127       n = strlen (s);
 128       if (s[n-1] != '>')
 129       {
 130         o = s;
 131         s = NULL;
 132         continue;
 133       }
 134
 135       new = safe_strdup (s);
 136     }
 137     else if (o)
 138     {
 139       m = strlen (s);
 140       if (s[m - 1] == '>')
 141       {
 142         new = safe_malloc (sizeof (char) * (n + m + 1));
 143         strcpy (new, o);        /* __STRCPY_CHECKED__ */
 144         strcpy (new + n, s);    /* __STRCPY_CHECKED__ */
 145       }
 146     }
 147     if (new)
 148     {
 149       /* make sure that this really does look like a message-id.
 150        * it should have exactly one @, and if we're looking at
 151        * an in-reply-to header, make sure that the part before
 152        * the @ has more than eight characters or it's probably
 153        * an email address
 154        */
 155       if (!(at = strchr (new, '@')) || strchr (at + 1, '@')
 156           || (in_reply_to && at - new <= 8))
 157         FREE (&new);
 158       else
 159       {
 160         t = (LIST *) safe_malloc (sizeof (LIST));
 161         t->data = new;
 162         t->next = lst;
 163         lst = t;
 164       }
 165     }
 166     o = NULL;
 167     s = NULL;
 168   }
 169
 170   return (lst);
 171 }
 172
 173 int mutt_check_encoding (const char *c)
 174 {
 175   if (ascii_strncasecmp ("7bit", c, sizeof ("7bit")-1) == 0)
 176     return (ENC7BIT);
 177   else if (ascii_strncasecmp ("8bit", c, sizeof ("8bit")-1) == 0)
 178     return (ENC8BIT);
 179   else if (ascii_strncasecmp ("binary", c, sizeof ("binary")-1) == 0)
 180     return (ENCBINARY);
 181   else if (ascii_strncasecmp ("quoted-printable", c, sizeof ("quoted-printable")-1) == 0)
 182     return (ENCQUOTEDPRINTABLE);
 183   else if (ascii_strncasecmp ("base64", c, sizeof("base64")-1) == 0)
 184     return (ENCBASE64);
 185   else if (ascii_strncasecmp ("x-uuencode", c, sizeof("x-uuencode")-1) == 0)
 186     return (ENCUUENCODED);
 187 #ifdef SUN_ATTACHMENT
 188   else if (ascii_strncasecmp ("uuencode", c, sizeof("uuencode")-1) == 0)
 189     return (ENCUUENCODED);
 190 #endif
 191   else
 192     return (ENCOTHER);
 193 }
 194
 195 static PARAMETER *parse_parameters (const char *s)
 196 {
 197   PARAMETER *head = 0, *cur = 0, *new;
 198   char buffer[LONG_STRING];
 199   const char *p;
 200   size_t i;
 201
 202   dprint (2, (debugfile, "parse_parameters: `%s'\n", s));
 203
 204   while (*s)
 205   {
 206     if ((p = strpbrk (s, "=;")) == NULL)
 207     {
 208       dprint(1, (debugfile, "parse_parameters: malformed parameter: %s\n", s));
 209       goto bail;
 210     }
 211
 212     /* if we hit a ; now the parameter has no value, just skip it */
 213     if (*p != ';')
 214     {
 215       i = p - s;
 216
 217       new = mutt_new_parameter ();
 218
 219       new->attribute = safe_malloc (i + 1);
 220       memcpy (new->attribute, s, i);
 221       new->attribute[i] = 0;
 222
 223       /* remove whitespace from the end of the attribute name */
 224       while (ISSPACE (new->attribute[--i]))
 225         new->attribute[i] = 0;
 226
 227       s = p + 1; /* skip over the = */
 228       SKIPWS (s);
 229
 230       if (*s == '"')
 231       {
 232         s++;
 233         for (i=0; *s && *s != '"' && i < sizeof (buffer) - 1; i++, s++)
 234         {
 235           if (*s == '\\')
 236           {
 237             /* Quote the next character */
 238             buffer[i] = s[1];
 239             if (!*++s)
 240               break;
 241           }
 242           else
 243             buffer[i] = *s;
 244         }
 245         buffer[i] = 0;
 246         if (*s)
 247           s++; /* skip over the " */
 248       }
 249       else
 250       {
 251         for (i=0; *s && *s != ' ' && *s != ';' && i < sizeof (buffer) - 1; i++, s++)
 252           buffer[i] = *s;
 253         buffer[i] = 0;
 254       }
 255
 256       new->value = safe_strdup (buffer);
 257
 258       dprint (2, (debugfile, "parse_parameter: `%s' = `%s'\n",
 259                   new->attribute ? new->attribute : "",
 260                   new->value ? new->value : ""));
 261
 262       /* Add this parameter to the list */
 263       if (head)
 264       {
 265         cur->next = new;
 266         cur = cur->next;
 267       }
 268       else
 269         head = cur = new;
 270     }
 271     else
 272     {
 273       dprint (1, (debugfile, "parse_parameters(): parameter with no value: %s\n", s));
 274       s = p;
 275     }
 276
 277     /* Find the next parameter */
 278     if (*s != ';' && (s = strchr (s, ';')) == NULL)
 279         break; /* no more parameters */
 280
 281     do
 282     {
 283       s++;
 284
 285       /* Move past any leading whitespace */
 286       SKIPWS (s);
 287     }
 288     while (*s == ';'); /* skip empty parameters */
 289   }
 290
 291   bail:
 292
 293   rfc2231_decode_parameters (&head);
 294   return (head);
 295 }
 296
 297 int mutt_check_mime_type (const char *s)
 298 {
 299   if (ascii_strcasecmp ("text", s) == 0)
 300     return TYPETEXT;
 301   else if (ascii_strcasecmp ("multipart", s) == 0)
 302     return TYPEMULTIPART;
 303 #ifdef SUN_ATTACHMENT
 304   else if (ascii_strcasecmp ("x-sun-attachment", s) == 0)
 305     return TYPEMULTIPART;
 306 #endif
 307   else if (ascii_strcasecmp ("application", s) == 0)
 308     return TYPEAPPLICATION;
 309   else if (ascii_strcasecmp ("message", s) == 0)
 310     return TYPEMESSAGE;
 311   else if (ascii_strcasecmp ("image", s) == 0)
 312     return TYPEIMAGE;
 313   else if (ascii_strcasecmp ("audio", s) == 0)
 314     return TYPEAUDIO;
 315   else if (ascii_strcasecmp ("video", s) == 0)
 316     return TYPEVIDEO;
 317   else if (ascii_strcasecmp ("model", s) == 0)
 318     return TYPEMODEL;
 319   else
 320     return TYPEOTHER;
 321 }
 322
 323 void mutt_parse_content_type (char *s, BODY *ct)
 324 {
 325   char *pc;
 326   char *subtype;
 327
 328   FREE (&ct->subtype);
 329   mutt_free_parameter(&ct->parameter);
 330
 331   /* First extract any existing parameters */
 332   if ((pc = strchr(s, ';')) != NULL)
 333   {
 334     *pc++ = 0;
 335     while (*pc && ISSPACE (*pc))
 336       pc++;
 337     ct->parameter = parse_parameters(pc);
 338
 339     /* Some pre-RFC1521 gateways still use the "name=filename" convention,
 340      * but if a filename has already been set in the content-disposition,
 341      * let that take precedence, and don't set it here */
 342     if ((pc = mutt_get_parameter( "name", ct->parameter)) != 0 && !ct->filename)
 343       ct->filename = safe_strdup(pc);
 344
 345 #ifdef SUN_ATTACHMENT
 346     /* this is deep and utter perversion */
 347     if ((pc = mutt_get_parameter ("conversions", ct->parameter)) != 0)
 348       ct->encoding = mutt_check_encoding (pc);
 349 #endif
 350
 351   }
 352
 353   /* Now get the subtype */
 354   if ((subtype = strchr(s, '/')))
 355   {
 356     *subtype++ = '\0';
 357     for(pc = subtype; *pc && !ISSPACE(*pc) && *pc != ';'; pc++)
 358       ;
 359     *pc = '\0';
 360     ct->subtype = safe_strdup (subtype);
 361   }
 362
 363   /* Finally, get the major type */
 364   ct->type = mutt_check_mime_type (s);
 365
 366 #ifdef SUN_ATTACHMENT
 367   if (ascii_strcasecmp ("x-sun-attachment", s) == 0)
 368       ct->subtype = safe_strdup ("x-sun-attachment");
 369 #endif
 370
 371   if (ct->type == TYPEOTHER)
 372   {
 373     ct->xtype = safe_strdup (s);
 374   }
 375
 376   if (ct->subtype == NULL)
 377   {
 378     /* Some older non-MIME mailers (i.e., mailtool, elm) have a content-type
 379      * field, so we can attempt to convert the type to BODY here.
 380      */
 381     if (ct->type == TYPETEXT)
 382       ct->subtype = safe_strdup ("plain");
 383     else if (ct->type == TYPEAUDIO)
 384       ct->subtype = safe_strdup ("basic");
 385     else if (ct->type == TYPEMESSAGE)
 386       ct->subtype = safe_strdup ("rfc822");
 387     else if (ct->type == TYPEOTHER)
 388     {
 389       char buffer[SHORT_STRING];
 390
 391       ct->type = TYPEAPPLICATION;
 392       snprintf (buffer, sizeof (buffer), "x-%s", s);
 393       ct->subtype = safe_strdup (buffer);
 394     }
 395     else
 396       ct->subtype = safe_strdup ("x-unknown");
 397   }
 398
 399   /* Default character set for text types. */
 400   if (ct->type == TYPETEXT)
 401   {
 402     if (!(pc = mutt_get_parameter ("charset", ct->parameter)))
 403       mutt_set_parameter ("charset", "us-ascii", &ct->parameter);
 404   }
 405
 406 }
 407
 408 static void parse_content_disposition (char *s, BODY *ct)
 409 {
 410   PARAMETER *parms;
 411
 412   if (!ascii_strncasecmp ("inline", s, 6))
 413     ct->disposition = DISPINLINE;
 414   else if (!ascii_strncasecmp ("form-data", s, 9))
 415     ct->disposition = DISPFORMDATA;
 416   else
 417     ct->disposition = DISPATTACH;
 418
 419   /* Check to see if a default filename was given */
 420   if ((s = strchr (s, ';')) != NULL)
 421   {
 422     s++;
 423     SKIPWS (s);
 424     if ((s = mutt_get_parameter ("filename", (parms = parse_parameters (s)))) != 0)
 425       mutt_str_replace (&ct->filename, s);
 426     if ((s = mutt_get_parameter ("name", parms)) != 0)
 427       ct->form_name = safe_strdup (s);
 428     mutt_free_parameter (&parms);
 429   }
 430 }
 431
 432 /* args:
 433  *      fp      stream to read from
 434  *
 435  *      digest  1 if reading subparts of a multipart/digest, 0
 436  *              otherwise
 437  */
 438
 439 BODY *mutt_read_mime_header (FILE *fp, int digest)
 440 {
 441   BODY *p = mutt_new_body();
 442   char *c;
 443   char *line = safe_malloc (LONG_STRING);
 444   size_t linelen = LONG_STRING;
 445
 446   p->hdr_offset  = ftell(fp);
 447
 448   p->encoding    = ENC7BIT; /* default from RFC1521 */
 449   p->type        = digest ? TYPEMESSAGE : TYPETEXT;
 450   p->disposition = DISPINLINE;
 451
 452   while (*(line = read_rfc822_line (fp, line, &linelen)) != 0)
 453   {
 454     /* Find the value of the current header */
 455     if ((c = strchr (line, ':')))
 456     {
 457       *c = 0;
 458       c++;
 459       SKIPWS (c);
 460       if (!*c)
 461       {
 462         dprint (1, (debugfile, "mutt_read_mime_header(): skipping empty header field: %s\n", line));
 463         continue;
 464       }
 465     }
 466     else
 467     {
 468       dprint (1, (debugfile, "read_mime_header: bogus MIME header: %s\n", line));
 469       break;
 470     }
 471
 472     if (!ascii_strncasecmp ("content-", line, 8))
 473     {
 474       if (!ascii_strcasecmp ("type", line + 8))
 475         mutt_parse_content_type (c, p);
 476       else if (!ascii_strcasecmp ("transfer-encoding", line + 8))
 477         p->encoding = mutt_check_encoding (c);
 478       else if (!ascii_strcasecmp ("disposition", line + 8))
 479         parse_content_disposition (c, p);
 480       else if (!ascii_strcasecmp ("description", line + 8))
 481       {
 482         mutt_str_replace (&p->description, c);
 483         rfc2047_decode (&p->description);
 484       }
 485     }
 486 #ifdef SUN_ATTACHMENT
 487     else if (!ascii_strncasecmp ("x-sun-", line, 6))
 488     {
 489       if (!ascii_strcasecmp ("data-type", line + 6))
 490         mutt_parse_content_type (c, p);
 491       else if (!ascii_strcasecmp ("encoding-info", line + 6))
 492         p->encoding = mutt_check_encoding (c);
 493       else if (!ascii_strcasecmp ("content-lines", line + 6))
 494         mutt_set_parameter ("content-lines", c, &(p->parameter));
 495       else if (!ascii_strcasecmp ("data-description", line + 6))
 496       {
 497         mutt_str_replace (&p->description, c);
 498         rfc2047_decode (&p->description);
 499       }
 500     }
 501 #endif
 502   }
 503   p->offset = ftell (fp); /* Mark the start of the real data */
 504   if (p->type == TYPETEXT && !p->subtype)
 505     p->subtype = safe_strdup ("plain");
 506   else if (p->type == TYPEMESSAGE && !p->subtype)
 507     p->subtype = safe_strdup ("rfc822");
 508
 509   FREE (&line);
 510
 511   return (p);
 512 }
 513
 514 void mutt_parse_part (FILE *fp, BODY *b)
 515 {
 516   char *bound = 0;
 517
 518   switch (b->type)
 519   {
 520     case TYPEMULTIPART:
 521 #ifdef SUN_ATTACHMENT
 522       if ( !ascii_strcasecmp (b->subtype, "x-sun-attachment") )
 523           bound = "--------";
 524       else
 525 #endif
 526           bound = mutt_get_parameter ("boundary", b->parameter);
 527
 528       fseek (fp, b->offset, SEEK_SET);
 529       b->parts =  mutt_parse_multipart (fp, bound,
 530                                         b->offset + b->length,
 531                                         ascii_strcasecmp ("digest", b->subtype) == 0);
 532       break;
 533
 534     case TYPEMESSAGE:
 535       if (b->subtype)
 536       {
 537         fseek (fp, b->offset, SEEK_SET);
 538         if (mutt_is_message_type(b->type, b->subtype))
 539           b->parts = mutt_parse_messageRFC822 (fp, b);
 540         else if (ascii_strcasecmp (b->subtype, "external-body") == 0)
 541           b->parts = mutt_read_mime_header (fp, 0);
 542         else
 543           return;
 544       }
 545       break;
 546
 547     default:
 548       return;
 549   }
 550
 551   /* try to recover from parsing error */
 552   if (!b->parts)
 553   {
 554     b->type = TYPETEXT;
 555     mutt_str_replace (&b->subtype, "plain");
 556   }
 557 }
 558
 559 /* parse a MESSAGE/RFC822 body
 560  *
 561  * args:
 562  *      fp              stream to read from
 563  *
 564  *      parent          structure which contains info about the message/rfc822
 565  *                      body part
 566  *
 567  * NOTE: this assumes that `parent->length' has been set!
 568  */
 569
 570 BODY *mutt_parse_messageRFC822 (FILE *fp, BODY *parent)
 571 {
 572   BODY *msg;
 573
 574   parent->hdr = mutt_new_header ();
 575   parent->hdr->offset = ftell (fp);
 576   parent->hdr->env = mutt_read_rfc822_header (fp, parent->hdr, 0, 0);
 577   msg = parent->hdr->content;
 578
 579   /* ignore the length given in the content-length since it could be wrong
 580      and we already have the info to calculate the correct length */
 581   /* if (msg->length == -1) */
 582   msg->length = parent->length - (msg->offset - parent->offset);
 583
 584   /* if body of this message is empty, we can end up with a negative length */
 585   if (msg->length < 0)
 586     msg->length = 0;
 587
 588   mutt_parse_part(fp, msg);
 589   return (msg);
 590 }
 591
 592 /* parse a multipart structure
 593  *
 594  * args:
 595  *      fp              stream to read from
 596  *
 597  *      boundary        body separator
 598  *
 599  *      end_off         length of the multipart body (used when the final
 600  *                      boundary is missing to avoid reading too far)
 601  *
 602  *      digest          1 if reading a multipart/digest, 0 otherwise
 603  */
 604
 605 BODY *mutt_parse_multipart (FILE *fp, const char *boundary, long end_off, int digest)
 606 {
 607 #ifdef SUN_ATTACHMENT
 608   int lines;
 609 #endif
 610   int blen, len, crlf = 0;
 611   char buffer[LONG_STRING];
 612   BODY *head = 0, *last = 0, *new = 0;
 613   int i;
 614   int final = 0; /* did we see the ending boundary? */
 615
 616   if (!boundary)
 617   {
 618     mutt_error _("multipart message has no boundary parameter!");
 619     return (NULL);
 620   }
 621
 622   blen = mutt_strlen (boundary);
 623   while (ftell (fp) < end_off && fgets (buffer, LONG_STRING, fp) != NULL)
 624   {
 625     len = mutt_strlen (buffer);
 626
 627     crlf =  (len > 1 && buffer[len - 2] == '\r') ? 1 : 0;
 628
 629     if (buffer[0] == '-' && buffer[1] == '-' &&
 630         mutt_strncmp (buffer + 2, boundary, blen) == 0)
 631     {
 632       if (last)
 633       {
 634         last->length = ftell (fp) - last->offset - len - 1 - crlf;
 635         if (last->parts && last->parts->length == 0)
 636           last->parts->length = ftell (fp) - last->parts->offset - len - 1 - crlf;
 637         /* if the body is empty, we can end up with a -1 length */
 638         if (last->length < 0)
 639           last->length = 0;
 640       }
 641
 642       /* Remove any trailing whitespace, up to the length of the boundary */
 643       for (i = len - 1; ISSPACE (buffer[i]) && i >= blen + 2; i--)
 644         buffer[i] = 0;
 645
 646       /* Check for the end boundary */
 647       if (mutt_strcmp (buffer + blen + 2, "--") == 0)
 648       {
 649         final = 1;
 650         break; /* done parsing */
 651       }
 652       else if (buffer[2 + blen] == 0)
 653       {
 654         new = mutt_read_mime_header (fp, digest);
 655
 656 #ifdef SUN_ATTACHMENT
 657         if (mutt_get_parameter ("content-lines", new->parameter)) {
 658           for (lines = atoi(mutt_get_parameter ("content-lines", new->parameter));
 659                lines; lines-- )
 660              if (ftell (fp) >= end_off || fgets (buffer, LONG_STRING, fp) == NULL)
 661                break;
 662         }
 663 #endif
 664
 665         /*
 666          * Consistency checking - catch
 667          * bad attachment end boundaries
 668          */
 669
 670         if(new->offset > end_off)
 671         {
 672           mutt_free_body(&new);
 673           break;
 674         }
 675         if (head)
 676         {
 677           last->next = new;
 678           last = new;
 679         }
 680         else
 681           last = head = new;
 682       }
 683     }
 684   }
 685
 686   /* in case of missing end boundary, set the length to something reasonable */
 687   if (last && last->length == 0 && !final)
 688     last->length = end_off - last->offset;
 689
 690   /* parse recursive MIME parts */
 691   for(last = head; last; last = last->next)
 692     mutt_parse_part(fp, last);
 693
 694   return (head);
 695 }
 696
 697 static const char *uncomment_timezone (char *buf, size_t buflen, const char *tz)
 698 {
 699   char *p;
 700   size_t len;
 701
 702   if (*tz != '(')
 703     return tz; /* no need to do anything */
 704   tz++;
 705   SKIPWS (tz);
 706   if ((p = strpbrk (tz, " )")) == NULL)
 707     return tz;
 708   len = p - tz;
 709   if (len > buflen - 1)
 710     len = buflen - 1;
 711   memcpy (buf, tz, len);
 712   buf[len] = 0;
 713   return buf;
 714 }
 715
 716 static struct tz_t
 717 {
 718   char tzname[5];
 719   unsigned char zhours;
 720   unsigned char zminutes;
 721   unsigned char zoccident; /* west of UTC? */
 722 }
 723 TimeZones[] =
 724 {
 725   { "aat",   1,  0, 1 }, /* Atlantic Africa Time */
 726   { "adt",   4,  0, 0 }, /* Arabia DST */
 727   { "ast",   3,  0, 0 }, /* Arabia */
 728 /*{ "ast",   4,  0, 1 },*/ /* Atlantic */
 729   { "bst",   1,  0, 0 }, /* British DST */
 730   { "cat",   1,  0, 0 }, /* Central Africa */
 731   { "cdt",   5,  0, 1 },
 732   { "cest",  2,  0, 0 }, /* Central Europe DST */
 733   { "cet",   1,  0, 0 }, /* Central Europe */
 734   { "cst",   6,  0, 1 },
 735 /*{ "cst",   8,  0, 0 },*/ /* China */
 736 /*{ "cst",   9, 30, 0 },*/ /* Australian Central Standard Time */
 737   { "eat",   3,  0, 0 }, /* East Africa */
 738   { "edt",   4,  0, 1 },
 739   { "eest",  3,  0, 0 }, /* Eastern Europe DST */
 740   { "eet",   2,  0, 0 }, /* Eastern Europe */
 741   { "egst",  0,  0, 0 }, /* Eastern Greenland DST */
 742   { "egt",   1,  0, 1 }, /* Eastern Greenland */
 743   { "est",   5,  0, 1 },
 744   { "gmt",   0,  0, 0 },
 745   { "gst",   4,  0, 0 }, /* Presian Gulf */
 746   { "hkt",   8,  0, 0 }, /* Hong Kong */
 747   { "ict",   7,  0, 0 }, /* Indochina */
 748   { "idt",   3,  0, 0 }, /* Israel DST */
 749   { "ist",   2,  0, 0 }, /* Israel */
 750 /*{ "ist",   5, 30, 0 },*/ /* India */
 751   { "jst",   9,  0, 0 }, /* Japan */
 752   { "kst",   9,  0, 0 }, /* Korea */
 753   { "mdt",   6,  0, 1 },
 754   { "met",   1,  0, 0 }, /* this is now officially CET */
 755   { "msd",   4,  0, 0 }, /* Moscow DST */
 756   { "msk",   3,  0, 0 }, /* Moscow */
 757   { "mst",   7,  0, 1 },
 758   { "nzdt", 13,  0, 0 }, /* New Zealand DST */
 759   { "nzst", 12,  0, 0 }, /* New Zealand */
 760   { "pdt",   7,  0, 1 },
 761   { "pst",   8,  0, 1 },
 762   { "sat",   2,  0, 0 }, /* South Africa */
 763   { "smt",   4,  0, 0 }, /* Seychelles */
 764   { "sst",  11,  0, 1 }, /* Samoa */
 765 /*{ "sst",   8,  0, 0 },*/ /* Singapore */
 766   { "utc",   0,  0, 0 },
 767   { "wat",   0,  0, 0 }, /* West Africa */
 768   { "west",  1,  0, 0 }, /* Western Europe DST */
 769   { "wet",   0,  0, 0 }, /* Western Europe */
 770   { "wgst",  2,  0, 1 }, /* Western Greenland DST */
 771   { "wgt",   3,  0, 1 }, /* Western Greenland */
 772   { "wst",   8,  0, 0 }, /* Western Australia */
 773 };
 774
 775 /* parses a date string in RFC822 format:
 776  *
 777  * Date: [ weekday , ] day-of-month month year hour:minute:second timezone
 778  *
 779  * This routine assumes that `h' has been initialized to 0.  the `timezone'
 780  * field is optional, defaulting to +0000 if missing.
 781  */
 782 time_t mutt_parse_date (const char *s, HEADER *h)
 783 {
 784   int count = 0;
 785   char *t;
 786   int hour, min, sec;
 787   struct tm tm;
 788   int i;
 789   int tz_offset = 0;
 790   int zhours = 0;
 791   int zminutes = 0;
 792   int zoccident = 0;
 793   const char *ptz;
 794   char tzstr[SHORT_STRING];
 795   char scratch[SHORT_STRING];
 796
 797   /* Don't modify our argument. Fixed-size buffer is ok here since
 798    * the date format imposes a natural limit.
 799    */
 800
 801   strfcpy (scratch, s, sizeof (scratch));
 802
 803   /* kill the day of the week, if it exists. */
 804   if ((t = strchr (scratch, ',')))
 805     t++;
 806   else
 807     t = scratch;
 808   SKIPWS (t);
 809
 810   memset (&tm, 0, sizeof (tm));
 811
 812   while ((t = strtok (t, " \t")) != NULL)
 813   {
 814     switch (count)
 815     {
 816       case 0: /* day of the month */
 817         if (!isdigit ((unsigned char) *t))
 818           return (-1);
 819         tm.tm_mday = atoi (t);
 820         if (tm.tm_mday > 31)
 821           return (-1);
 822         break;
 823
 824       case 1: /* month of the year */
 825         if ((i = mutt_check_month (t)) < 0)
 826           return (-1);
 827         tm.tm_mon = i;
 828         break;
 829
 830       case 2: /* year */
 831         tm.tm_year = atoi (t);
 832         if (tm.tm_year < 50)
 833           tm.tm_year += 100;
 834         else if (tm.tm_year >= 1900)
 835           tm.tm_year -= 1900;
 836         break;
 837
 838       case 3: /* time of day */
 839         if (sscanf (t, "%d:%d:%d", &hour, &min, &sec) == 3)
 840           ;
 841         else if (sscanf (t, "%d:%d", &hour, &min) == 2)
 842           sec = 0;
 843         else
 844         {
 845           dprint(1, (debugfile, "parse_date: could not process time format: %s\n", t));
 846           return(-1);
 847         }
 848         tm.tm_hour = hour;
 849         tm.tm_min = min;
 850         tm.tm_sec = sec;
 851         break;
 852
 853       case 4: /* timezone */
 854         /* sometimes we see things like (MST) or (-0700) so attempt to
 855          * compensate by uncommenting the string if non-RFC822 compliant
 856          */
 857         ptz = uncomment_timezone (tzstr, sizeof (tzstr), t);
 858
 859         if (*ptz == '+' || *ptz == '-')
 860         {
 861           if (ptz[1] && ptz[2] && ptz[3] && ptz[4]
 862               && isdigit ((unsigned char) ptz[1]) && isdigit ((unsigned char) ptz[2])
 863               && isdigit ((unsigned char) ptz[3]) && isdigit ((unsigned char) ptz[4]))
 864           {
 865             zhours = (ptz[1] - '0') * 10 + (ptz[2] - '0');
 866             zminutes = (ptz[3] - '0') * 10 + (ptz[4] - '0');
 867
 868             if (ptz[0] == '-')
 869               zoccident = 1;
 870           }
 871         }
 872         else
 873         {
 874           struct tz_t *tz;
 875
 876           tz = bsearch (ptz, TimeZones, sizeof TimeZones/sizeof (struct tz_t),
 877                         sizeof (struct tz_t),
 878                         (int (*)(const void *, const void *)) ascii_strcasecmp
 879                         /* This is safe to do: A pointer to a struct equals
 880                          * a pointer to its first element*/);
 881
 882           if (tz)
 883           {
 884             zhours = tz->zhours;
 885             zminutes = tz->zminutes;
 886             zoccident = tz->zoccident;
 887           }
 888
 889           /* ad hoc support for the European MET (now officially CET) TZ */
 890           if (ascii_strcasecmp (t, "MET") == 0)
 891           {
 892             if ((t = strtok (NULL, " \t")) != NULL)
 893             {
 894               if (!ascii_strcasecmp (t, "DST"))
 895                 zhours++;
 896             }
 897           }
 898         }
 899         tz_offset = zhours * 3600 + zminutes * 60;
 900         if (!zoccident)
 901           tz_offset = -tz_offset;
 902         break;
 903     }
 904     count++;
 905     t = 0;
 906   }
 907
 908   if (count < 4) /* don't check for missing timezone */
 909   {
 910     dprint(1,(debugfile, "parse_date(): error parsing date format, using received time\n"));
 911     return (-1);
 912   }
 913
 914   if (h)
 915   {
 916     h->zhours = zhours;
 917     h->zminutes = zminutes;
 918     h->zoccident = zoccident;
 919   }
 920
 921   return (mutt_mktime (&tm, 0) + tz_offset);
 922 }
 923
 924 /* extract the first substring that looks like a message-id */
 925 static char *extract_message_id (const char *s)
 926 {
 927   const char *p;
 928   char *r;
 929   size_t l;
 930
 931   if ((s = strchr (s, '<')) == NULL || (p = strchr (s, '>')) == NULL)
 932     return (NULL);
 933   l = (size_t)(p - s) + 1;
 934   r = safe_malloc (l + 1);
 935   memcpy (r, s, l);
 936   r[l] = 0;
 937   return (r);
 938 }
 939
 940 void mutt_parse_mime_message (CONTEXT *ctx, HEADER *cur)
 941 {
 942   MESSAGE *msg;
 943
 944   if (cur->content->type != TYPEMESSAGE && cur->content->type != TYPEMULTIPART)
 945     return; /* nothing to do */
 946
 947   if (cur->content->parts)
 948     return; /* The message was parsed earlier. */
 949
 950   if ((msg = mx_open_message (ctx, cur->msgno)))
 951   {
 952     mutt_parse_part (msg->fp, cur->content);
 953
 954     if (WithCrypto)
 955       cur->security = crypt_query (cur->content);
 956
 957     mx_close_message (&msg);
 958   }
 959 }
 960
 961 int mutt_parse_rfc822_line (ENVELOPE *e, HEADER *hdr, char *line, char *p, short user_hdrs, short weed,
 962                             short do_2047, LIST **lastp)
 963 {
 964   int matched = 0;
 965   LIST *last = NULL;
 966
 967   if (lastp)
 968     last = *lastp;
 969
 970   switch (ascii_tolower (line[0]))
 971   {
 972     case 'a':
 973     if (ascii_strcasecmp (line+1, "pparently-to") == 0)
 974     {
 975       e->to = rfc822_parse_adrlist (e->to, p);
 976       matched = 1;
 977     }
 978     else if (ascii_strcasecmp (line+1, "pparently-from") == 0)
 979     {
 980       e->from = rfc822_parse_adrlist (e->from, p);
 981       matched = 1;
 982     }
 983     break;
 984
 985     case 'b':
 986     if (ascii_strcasecmp (line+1, "cc") == 0)
 987     {
 988       e->bcc = rfc822_parse_adrlist (e->bcc, p);
 989       matched = 1;
 990     }
 991     break;
 992
 993     case 'c':
 994     if (ascii_strcasecmp (line+1, "c") == 0)
 995     {
 996       e->cc = rfc822_parse_adrlist (e->cc, p);
 997       matched = 1;
 998     }
 999     else if (ascii_strncasecmp (line + 1, "ontent-", 7) == 0)
1000     {
1001       if (ascii_strcasecmp (line+8, "type") == 0)
1002       {
1003         if (hdr)
1004           mutt_parse_content_type (p, hdr->content);
1005         matched = 1;
1006       }
1007       else if (ascii_strcasecmp (line+8, "transfer-encoding") == 0)
1008       {
1009         if (hdr)
1010           hdr->content->encoding = mutt_check_encoding (p);
1011         matched = 1;
1012       }
1013       else if (ascii_strcasecmp (line+8, "length") == 0)
1014       {
1015         if (hdr)
1016         {
1017           if ((hdr->content->length = atoi (p)) < 0)
1018             hdr->content->length = -1;
1019         }
1020         matched = 1;
1021       }
1022       else if (ascii_strcasecmp (line+8, "description") == 0)
1023       {
1024         if (hdr)
1025         {
1026           mutt_str_replace (&hdr->content->description, p);
1027           rfc2047_decode (&hdr->content->description);
1028         }
1029         matched = 1;
1030       }
1031       else if (ascii_strcasecmp (line+8, "disposition") == 0)
1032       {
1033         if (hdr)
1034           parse_content_disposition (p, hdr->content);
1035         matched = 1;
1036       }
1037     }
1038     break;
1039
1040     case 'd':
1041     if (!ascii_strcasecmp ("ate", line + 1))
1042     {
1043       mutt_str_replace (&e->date, p);
1044       if (hdr)
1045         hdr->date_sent = mutt_parse_date (p, hdr);
1046       matched = 1;
1047     }
1048     break;
1049
1050     case 'e':
1051     if (!ascii_strcasecmp ("xpires", line + 1) &&
1052         hdr && mutt_parse_date (p, NULL) < time (NULL))
1053       hdr->expired = 1;
1054     break;
1055
1056     case 'f':
1057     if (!ascii_strcasecmp ("rom", line + 1))
1058     {
1059       e->from = rfc822_parse_adrlist (e->from, p);
1060       matched = 1;
1061     }
1062 #ifdef USE_NNTP
1063     else if (!mutt_strcasecmp (line+1, "ollowup-to"))
1064     {
1065       if (!e->followup_to)
1066       {
1067         mutt_remove_trailing_ws (p);
1068         e->followup_to = safe_strdup (mutt_skip_whitespace (p));
1069       }
1070       matched = 1;
1071     }
1072 #endif
1073     break;
1074
1075     case 'i':
1076     if (!ascii_strcasecmp (line+1, "n-reply-to"))
1077     {
1078       mutt_free_list (&e->in_reply_to);
1079       e->in_reply_to = mutt_parse_references (p, 1);
1080       matched = 1;
1081     }
1082     break;
1083
1084     case 'l':
1085     if (!ascii_strcasecmp (line + 1, "ines"))
1086     {
1087       if (hdr)
1088         hdr->lines = atoi (p);
1089
1090       /*
1091        * HACK - mutt has, for a very short time, produced negative
1092        * Lines header values.  Ignore them.
1093        */
1094       if (hdr->lines < 0)
1095         hdr->lines = 0;
1096
1097       matched = 1;
1098     }
1099     break;
1100
1101     case 'm':
1102     if (!ascii_strcasecmp (line + 1, "ime-version"))
1103     {
1104       if (hdr)
1105         hdr->mime = 1;
1106       matched = 1;
1107     }
1108     else if (!ascii_strcasecmp (line + 1, "essage-id"))
1109     {
1110       /* We add a new "Message-Id:" when building a message */
1111       FREE (&e->message_id);
1112       e->message_id = extract_message_id (p);
1113       matched = 1;
1114     }
1115     else if (!ascii_strncasecmp (line + 1, "ail-", 4))
1116     {
1117       if (!ascii_strcasecmp (line + 5, "reply-to"))
1118       {
1119         /* override the Reply-To: field */
1120         rfc822_free_address (&e->reply_to);
1121         e->reply_to = rfc822_parse_adrlist (e->reply_to, p);
1122         matched = 1;
1123       }
1124       else if (!ascii_strcasecmp (line + 5, "followup-to"))
1125       {
1126         e->mail_followup_to = rfc822_parse_adrlist (e->mail_followup_to, p);
1127         matched = 1;
1128       }
1129     }
1130     break;
1131
1132 #ifdef USE_NNTP
1133     case 'n':
1134     if (!mutt_strcasecmp (line + 1, "ewsgroups"))
1135     {
1136       FREE (&e->newsgroups);
1137       mutt_remove_trailing_ws (p);
1138       e->newsgroups = safe_strdup (mutt_skip_whitespace (p));
1139       matched = 1;
1140     }
1141     break;
1142 #endif
1143
1144     case 'o':
1145     /* field `Organization:' saves only for pager! */
1146     if (!mutt_strcasecmp (line + 1, "rganization"))
1147     {
1148       if (!e->organization && mutt_strcasecmp (p, "unknown"))
1149         e->organization = safe_strdup (p);
1150     }
1151     break;
1152
1153     case 'r':
1154     if (!ascii_strcasecmp (line + 1, "eferences"))
1155     {
1156       mutt_free_list (&e->references);
1157       e->references = mutt_parse_references (p, 0);
1158       matched = 1;
1159     }
1160     else if (!ascii_strcasecmp (line + 1, "eply-to"))
1161     {
1162       e->reply_to = rfc822_parse_adrlist (e->reply_to, p);
1163       matched = 1;
1164     }
1165     else if (!ascii_strcasecmp (line + 1, "eturn-path"))
1166     {
1167       e->return_path = rfc822_parse_adrlist (e->return_path, p);
1168       matched = 1;
1169     }
1170     else if (!ascii_strcasecmp (line + 1, "eceived"))
1171     {
1172       if (hdr && !hdr->received)
1173       {
1174         char *d = strchr (p, ';');
1175
1176         if (d)
1177           hdr->received = mutt_parse_date (d + 1, NULL);
1178       }
1179     }
1180     break;
1181
1182     case 's':
1183     if (!ascii_strcasecmp (line + 1, "ubject"))
1184     {
1185       if (!e->subject)
1186         e->subject = safe_strdup (p);
1187       matched = 1;
1188     }
1189     else if (!ascii_strcasecmp (line + 1, "ender"))
1190     {
1191       e->sender = rfc822_parse_adrlist (e->sender, p);
1192       matched = 1;
1193     }
1194     else if (!ascii_strcasecmp (line + 1, "tatus"))
1195     {
1196       if (hdr)
1197       {
1198         while (*p)
1199         {
1200           switch(*p)
1201           {
1202             case 'r':
1203             hdr->replied = 1;
1204             break;
1205             case 'O':
1206               hdr->old = 1;
1207             break;
1208             case 'R':
1209             hdr->read = 1;
1210             break;
1211           }
1212           p++;
1213         }
1214       }
1215       matched = 1;
1216     }
1217     else if ((!ascii_strcasecmp ("upersedes", line + 1) ||
1218               !ascii_strcasecmp ("upercedes", line + 1)) && hdr)
1219       e->supersedes = safe_strdup (p);
1220     break;
1221
1222     case 't':
1223     if (ascii_strcasecmp (line+1, "o") == 0)
1224     {
1225       e->to = rfc822_parse_adrlist (e->to, p);
1226       matched = 1;
1227     }
1228     break;
1229
1230     case 'x':
1231     if (ascii_strcasecmp (line+1, "-status") == 0)
1232     {
1233       if (hdr)
1234       {
1235         while (*p)
1236         {
1237           switch (*p)
1238           {
1239             case 'A':
1240             hdr->replied = 1;
1241             break;
1242             case 'D':
1243             hdr->deleted = 1;
1244             break;
1245             case 'F':
1246             hdr->flagged = 1;
1247             break;
1248             default:
1249             break;
1250           }
1251           p++;
1252         }
1253       }
1254       matched = 1;
1255     }
1256     else if (ascii_strcasecmp (line+1, "-label") == 0)
1257     {
1258       e->x_label = safe_strdup(p);
1259       matched = 1;
1260     }
1261     else if (ascii_strcasecmp (line+1, "-face") == 0)
1262     {
1263       e->x_face = mutt_add_x_face (e->x_face, p);
1264       matched = 1;
1265     }
1266 #ifdef USE_NNTP
1267     else if (!mutt_strcasecmp (line + 1, "-comment-to"))
1268     {
1269       if (!e->x_comment_to)
1270         e->x_comment_to = safe_strdup (p);
1271       matched = 1;
1272     }
1273     else if (!mutt_strcasecmp (line + 1, "ref"))
1274     {
1275       if (!e->xref)
1276         e->xref = safe_strdup (p);
1277       matched = 1;
1278     }
1279 #endif
1280
1281     default:
1282     break;
1283   }
1284
1285   /* Keep track of the user-defined headers */
1286   if (!matched && user_hdrs)
1287   {
1288     /* restore the original line */
1289     line[strlen (line)] = ':';
1290
1291     if (weed && option (OPTWEED) && mutt_matches_ignore (line, Ignore)
1292         && !mutt_matches_ignore (line, UnIgnore))
1293       goto done;
1294
1295     if (last)
1296     {
1297       last->next = mutt_new_list ();
1298       last = last->next;
1299     }
1300     else
1301       last = e->userhdrs = mutt_new_list ();
1302     last->data = safe_strdup (line);
1303     if (do_2047)
1304       rfc2047_decode (&last->data);
1305   }
1306
1307   done:
1308
1309   *lastp = last;
1310   return matched;
1311 }
1312
1313
1314 /* mutt_read_rfc822_header() -- parses a RFC822 header
1315  *
1316  * Args:
1317  *
1318  * f            stream to read from
1319  *
1320  * hdr          header structure of current message (optional).
1321  *
1322  * user_hdrs    If set, store user headers.  Used for recall-message and
1323  *              postpone modes.
1324  *
1325  * weed         If this parameter is set and the user has activated the
1326  *              $weed option, honor the header weed list for user headers.
1327  *              Used for recall-message.
1328  *
1329  * Returns:     newly allocated envelope structure.  You should free it by
1330  *              mutt_free_envelope() when envelope stay unneeded.
1331  */
1332 ENVELOPE *mutt_read_rfc822_header (FILE *f, HEADER *hdr, short user_hdrs,
1333                                    short weed)
1334 {
1335   ENVELOPE *e = mutt_new_envelope();
1336   LIST *last = NULL;
1337   char *line = safe_malloc (LONG_STRING);
1338   char *p;
1339   long loc;
1340   int matched;
1341   size_t linelen = LONG_STRING;
1342
1343   if (hdr)
1344   {
1345     if (hdr->content == NULL)
1346     {
1347       hdr->content = mutt_new_body ();
1348
1349       /* set the defaults from RFC1521 */
1350       hdr->content->type        = TYPETEXT;
1351       hdr->content->subtype     = safe_strdup ("plain");
1352       hdr->content->encoding    = ENC7BIT;
1353       hdr->content->length      = -1;
1354
1355       /* RFC 2183 says this is arbitrary */
1356       hdr->content->disposition = DISPINLINE;
1357     }
1358   }
1359
1360   while ((loc = ftell (f)),
1361           *(line = read_rfc822_line (f, line, &linelen)) != 0)
1362   {
1363     matched = 0;
1364
1365     if ((p = strpbrk (line, ": \t")) == NULL || *p != ':')
1366     {
1367       char return_path[LONG_STRING];
1368       time_t t;
1369
1370       /* some bogus MTAs will quote the original "From " line */
1371       if (mutt_strncmp (">From ", line, 6) == 0)
1372         continue; /* just ignore */
1373       else if (is_from (line, return_path, sizeof (return_path), &t))
1374       {
1375         /* MH somtimes has the From_ line in the middle of the header! */
1376         if (hdr && !hdr->received)
1377           hdr->received = t - mutt_local_tz (t);
1378         continue;
1379       }
1380
1381       fseek (f, loc, 0);
1382       break; /* end of header */
1383     }
1384
1385     *p = 0;
1386     p++;
1387     SKIPWS (p);
1388     if (!*p)
1389       continue; /* skip empty header fields */
1390
1391     matched = mutt_parse_rfc822_line (e, hdr, line, p, user_hdrs, weed, 1, &last);
1392
1393   }
1394
1395   FREE (&line);
1396
1397   if (hdr)
1398   {
1399     hdr->content->hdr_offset = hdr->offset;
1400     hdr->content->offset = ftell (f);
1401
1402     /* do RFC2047 decoding */
1403     rfc2047_decode_adrlist (e->from);
1404     rfc2047_decode_adrlist (e->to);
1405     rfc2047_decode_adrlist (e->cc);
1406     rfc2047_decode_adrlist (e->reply_to);
1407     rfc2047_decode_adrlist (e->mail_followup_to);
1408     rfc2047_decode_adrlist (e->return_path);
1409     rfc2047_decode_adrlist (e->sender);
1410
1411     if (e->subject)
1412     {
1413       regmatch_t pmatch[1];
1414
1415       rfc2047_decode (&e->subject);
1416
1417       if (regexec (ReplyRegexp.rx, e->subject, 1, pmatch, 0) == 0)
1418         e->real_subj = e->subject + pmatch[0].rm_eo;
1419       else
1420         e->real_subj = e->subject;
1421     }
1422
1423     /* check for missing or invalid date */
1424     if (hdr->date_sent <= 0)
1425     {
1426       dprint(1,(debugfile,"read_rfc822_header(): no date found, using received time from msg separator\n"));
1427       hdr->date_sent = hdr->received;
1428     }
1429   }
1430
1431   return (e);
1432 }
1433
1434 ADDRESS *mutt_parse_adrlist (ADDRESS *p, const char *s)
1435 {
1436   const char *q;
1437
1438   /* check for a simple whitespace separated list of addresses */
1439   if ((q = strpbrk (s, "\"<>():;,\\")) == NULL)
1440   {
1441     char tmp[HUGE_STRING];
1442     char *r;
1443
1444     strfcpy (tmp, s, sizeof (tmp));
1445     r = tmp;
1446     while ((r = strtok (r, " \t")) != NULL)
1447     {
1448       p = rfc822_parse_adrlist (p, r);
1449       r = NULL;
1450     }
1451   }
1452   else
1453     p = rfc822_parse_adrlist (p, s);
1454
1455   return p;
1456 }