2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or (at
5 * your option) any later version.
7 * This program is distributed in the hope that it will be useful, but
8 * WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10 * General Public License for more details.
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17 * Copyright © 2006 Pierre Habouzit
21 * Copyright notice from original mutt:
22 * Copyright (C) 1996-2000 Michael R. Elkins <me@mutt.org>
24 * This file is part of mutt-ng, see http://www.muttng.org/.
25 * It's licensed under the GNU General Public License,
26 * please see the file GPL in the top level source directory.
29 #include <lib-lib/lib-lib.h>
31 #include "recvattach.h"
35 /* Reads an arbitrarily long header field, and looks ahead for continuation
36 * lines. ``line'' must point to a dynamically allocated string; it is
37 * increased if more space is required to fit the whole line.
39 ssize_t mutt_read_rfc822_line(FILE *f, char **line, ssize_t *n)
46 /* end of file or end of headers */
47 if (!fgets(p + pos, *n - pos, f) || (ISSPACE(*p) && pos == 0)) {
52 pos += m_strlen(p + pos);
53 if (p[pos - 1] == '\n') {
56 /* remove trailing spaces. safe: p[0] is not a space */
59 } while (ISSPACE(p[pos]));
61 /* check to see if the next line is a continuation line */
63 if (c != ' ' && c != '\t') {
64 /* next line is a separate header field or EOH */
69 /* eat tabs and spaces from the beginning of the continuation line */
72 } while (c == ' ' || c == '\t');
75 /* string is still terminated because we removed at least one
76 whitespace char above */
80 if (*n < pos + STRING) {
88 /* TODO: Make that a string list somehow */
89 string_list_t *mutt_parse_references(char *s, int in_reply_to)
91 string_list_t *lst = NULL;
95 /* some mail clients add other garbage besides message-ids, so do a quick
96 * check to make sure this looks like a valid message-id
97 * some idiotic clients also break their message-ids between lines, deal
98 * with that too (give up if it's more than two lines, though)
101 for (s = strtok(s, " \t;"); s; s = strtok(NULL, " \t;")) {
106 if (s[n - 1] != '>') {
113 ssize_t m = m_strlen(s);
115 if (s[m - 1] != '>') {
118 new = p_new(char, n + m + 1);
119 m_strcpy(new, n + m + 1, o);
120 m_strcpy(new + n, m + 1, s);
124 /* make sure that this really does look like a message-id.
125 * it should have exactly one @, and if we're looking at
126 * an in-reply-to header, make sure that the part before
127 * the @ has more than eight characters or it's probably
131 char *at = strchr(new, '@');
134 if (!at || strchr(at + 1, '@') || (in_reply_to && at - new <= 8)) {
139 tmp = p_new(string_list_t, 1);
149 int mutt_check_encoding(const char *s)
151 int tok = mime_which_token(s, -1);
159 case MIME_QUOTED_PRINTABLE:
160 return ENCQUOTEDPRINTABLE;
163 case MIME_X_UUENCODE:
170 int mutt_check_mime_type(const char *s)
174 if (!m_strcmp(s, "*") || !m_strcmp(s, ".*"))
177 tok = mime_which_token(s, -1);
179 case MIME_TEXT: return TYPETEXT;
180 case MIME_MULTIPART: return TYPEMULTIPART;
181 case MIME_APPLICATION: return TYPEAPPLICATION;
182 case MIME_MESSAGE: return TYPEMESSAGE;
183 case MIME_IMAGE: return TYPEIMAGE;
184 case MIME_AUDIO: return TYPEAUDIO;
185 case MIME_VIDEO: return TYPEVIDEO;
186 case MIME_MODEL: return TYPEMODEL;
187 default: return TYPEOTHER;
191 static parameters_t *parse_parameters(const char *s)
193 parameters_t *res = parameter_new();
201 if (*s == '=') /* parameters are fucked up, go away */
204 p = strpbrk(s, "=;");
209 /* if we hit a ; now the parameter has no value, just skip it */
215 attr = p_dupstr(s, i);
217 while (--i >= 0 && ISSPACE(attr[i])) {
220 s = skipspaces(p + 1); /* skip over the = */
223 char buffer[LONG_STRING];
227 for (i = 0; *s && i < ssizeof(buffer) - 1; i++, s++) {
228 /* As iso-2022-* has a characer of '"' with non-ascii state,
230 if (*s == 0x1b && i < ssizeof(buffer) - 2) {
231 state_ascii = s[1] == '(' && (s[2] == 'B' || s[2] == 'J');
233 if (state_ascii && *s == '"')
243 val = p_dupstr(buffer, i);
245 for (p = s; *p && *p != ' ' && *p != ';'; p++);
246 val = p_dupstr(s, p - s);
249 parameter_setval(res, attr, val);
252 s = strchr(s, ';'); /* Find the next parameter */
254 break; /* no more parameters */
257 rfc2231_decode_parameters(res);
261 void mutt_parse_content_type(char *s, BODY *ct)
266 p_delete(&ct->subtype);
267 ct->parameter = NULL;
269 /* First extract any existing parameters */
270 if ((pc = strchr(s, ';')) != NULL) {
272 ct->parameter = parse_parameters(vskipspaces(pc));
274 /* Some pre-RFC1521 gateways still use the "name=filename" convention,
275 * but if a filename has already been set in the content-disposition,
276 * let that take precedence, and don't set it here */
277 pc = parameter_getval(ct->parameter, "name");
278 if (pc && !ct->filename)
279 ct->filename = m_strdup(pc);
282 /* Now get the subtype */
283 if ((subtype = strchr (s, '/'))) {
285 for (pc = subtype; *pc && !ISSPACE(*pc) && *pc != ';'; pc++);
286 ct->subtype = p_dupstr(subtype, pc - subtype);
289 /* Finally, get the major type */
290 ct->type = mutt_check_mime_type(s);
292 if (ct->type == TYPEOTHER) {
293 ct->xtype = m_strdup(s);
297 /* Some older non-MIME mailers (i.e., mailtool, elm) have a content-type
298 * field, so we can attempt to convert the type to BODY here.
304 ct->subtype = m_strdup("plain");
308 ct->subtype = m_strdup("basic");
312 ct->subtype = m_strdup("rfc822");
316 ct->type = TYPEAPPLICATION;
317 snprintf(buffer, sizeof(buffer), "x-%s", s);
318 ct->subtype = m_strdup(buffer);
322 ct->subtype = m_strdup("x-unknown");
327 /* Default character set for text types. */
328 if (ct->type == TYPETEXT) {
329 pc = parameter_getval(ct->parameter, "charset");
331 parameter_setval(ct->parameter, "charset",
332 charset_getfirst(mod_cset.assumed_charset));
337 static void parse_content_disposition(const char *s, BODY *ct)
339 if (!ascii_strncasecmp(s, "inline", 6)) {
340 ct->disposition = DISPINLINE;
341 } else if (!ascii_strncasecmp(s, "form-data", 9)) {
342 ct->disposition = DISPFORMDATA;
344 ct->disposition = DISPATTACH;
347 /* Check to see if a default filename was given */
348 if ((s = strchr (s, ';'))) {
349 parameters_t *parms = parse_parameters(vskipspaces(s));
351 if ((s = parameter_getval(parms, "filename")))
352 m_strreplace(&ct->filename, s);
353 if ((s = parameter_getval(parms, "name")))
354 ct->form_name = m_strdup(s);
359 * fp stream to read from
361 * digest 1 if reading subparts of a multipart/digest, 0
364 BODY *mutt_read_mime_header(FILE *fp, int digest)
366 BODY *body = body_new();
367 char *line = p_new(char, LONG_STRING);
368 ssize_t linelen = LONG_STRING;
371 body->hdr_offset = ftello(fp);
372 body->encoding = ENC7BIT; /* default from RFC1521 */
373 body->disposition = DISPINLINE;
374 body->type = digest ? TYPEMESSAGE : TYPETEXT;
376 while (mutt_read_rfc822_line(fp, &line, &linelen)) {
377 /* Find the value of the current header */
378 if ((p = strchr(line, ':'))) {
387 switch (mime_which_token(line, -1)) {
388 case MIME_CONTENT_TYPE:
389 mutt_parse_content_type (p, body);
392 case MIME_CONTENT_TRANSFER_ENCODING:
393 body->encoding = mutt_check_encoding (p);
396 case MIME_CONTENT_DISPOSITION:
397 parse_content_disposition(p, body);
400 case MIME_CONTENT_DESCRIPTION:
401 m_strreplace(&body->description, p);
402 rfc2047_decode(&body->description);
409 body->offset = ftello(fp); /* Mark the start of the real data */
410 if (!body->subtype) {
411 if (body->type == TYPETEXT)
412 body->subtype = m_strdup("plain");
413 if (body->type == TYPEMESSAGE)
414 body->subtype = m_strdup("rfc822");
421 void mutt_parse_part(FILE *fp, BODY *b)
427 bound = parameter_getval(b->parameter, "boundary");
428 fseeko(fp, b->offset, SEEK_SET);
429 b->parts = mutt_parse_multipart(fp, bound, b->offset + b->length,
430 mime_which_token(b->subtype, -1) == MIME_DIGEST);
435 fseeko(fp, b->offset, SEEK_SET);
437 if (mutt_is_message_type(b)) {
438 b->parts = mutt_parse_messageRFC822(fp, b);
440 if (mime_which_token(b->subtype, -1) == MIME_EXTERNAL_BODY) {
441 b->parts = mutt_read_mime_header(fp, 0);
452 /* try to recover from parsing error */
455 m_strreplace(&b->subtype, "plain");
459 /* parse a MESSAGE/RFC822 body
462 * fp stream to read from
464 * parent structure which contains info about the message/rfc822
467 * NOTE: this assumes that `parent->length' has been set!
469 BODY *mutt_parse_messageRFC822(FILE * fp, BODY * parent)
473 parent->hdr = header_new();
474 parent->hdr->offset = ftello(fp);
475 parent->hdr->env = mutt_read_rfc822_header(fp, parent->hdr, 0, 0);
477 msg = parent->hdr->content;
479 /* ignore the length given in the content-length since it could be wrong
480 and we already have the info to calculate the correct length */
481 /* if (msg->length == -1) */
482 /* if body of this message is empty, we can end up with a negative length */
483 msg->length = MAX(0, parent->length - (msg->offset - parent->offset));
485 mutt_parse_part(fp, msg);
490 /* parse a multipart structure
493 * fp stream to read from
495 * bound body separator
497 * end_off length of the multipart body (used when the final
498 * boundary is missing to avoid reading too far)
500 * digest 1 if reading a multipart/digest, 0 otherwise
504 mutt_parse_multipart(FILE *fp, const char *bound, off_t end_off, int digest)
506 char buffer[LONG_STRING];
509 int blen = m_strlen(bound);
510 int final = 0; /* did we see the ending boundary? */
513 mutt_error _("multipart message has no boundary parameter!");
517 while (ftello(fp) < end_off && fgets(buffer, sizeof(buffer), fp)) {
520 len = m_strlen(buffer);
521 crlf = len > 1 && buffer[len - 2] == '\r';
523 if (buffer[0] == '-' && buffer[1] == '-'
524 && !m_strncmp(buffer + 2, bound, blen))
529 /* if the body is empty, we can end up with a -1 length */
530 b->length = MAX(0, ftello(fp) - b->offset - len - 1 - crlf);
531 if (b->parts && b->parts->length == 0) {
532 b->parts->length = ftello(fp) - b->parts->offset
537 /* Remove any trailing whitespace, up to the length of the boundary */
538 for (i = len - 1; ISSPACE(buffer[i]) && i >= blen + 2; i--)
541 /* Check for the end boundary */
542 final = buffer[blen + 3] == '-' && buffer[blen + 4] == '-';
546 if (buffer[2 + blen] == '\0') {
547 BODY *new = mutt_read_mime_header(fp, digest);
550 * Consistency checking - catch
551 * bad attachment end boundaries
554 if (new->offset > end_off) {
555 body_list_wipe(&new);
560 last = &(*last)->next;
566 /* in case of missing end boundary, set the length to something reasonable */
567 if (*last && (*last)->length == 0 && !final)
568 (*last)->length = end_off - (*last)->offset;
570 /* parse recursive MIME parts */
573 for (b = head; b; b = b->next)
574 mutt_parse_part(fp, b);
580 /* parses a date string in RFC822 format:
582 * Date: [ weekday , ] day-of-month month year hour:minute:second timezone
584 * This routine assumes that `h' has been initialized to 0. the `timezone'
585 * field is optional, defaulting to +0000 if missing.
587 time_t mutt_parse_date(const char *s, HEADER *h)
593 loc = setlocale(LC_ALL, "C");
595 if (strptime(s, "%a, %d %b %Y %H:%M:%S %z", &tm))
598 if (strptime(s, "%a, %d %b %Y %H:%M %z", &tm))
600 setlocale(LC_ALL, "");
604 setlocale(LC_ALL, "");
606 return timegm(&tm) - tz;
609 string_list_t **mutt_parse_rfc822_line(ENVELOPE *e, HEADER *hdr, char *line, char *p,
610 short weed, short do_2047, string_list_t **user_hdrs)
612 switch (mime_which_token(line, -1)) {
613 case MIME_APPARENTLY_FROM:
614 e->from = rfc822_parse_adrlist (e->from, p);
617 case MIME_APPARENTLY_TO:
618 e->to = rfc822_parse_adrlist (e->to, p);
622 e->bcc = rfc822_parse_adrlist (e->bcc, p);
626 e->cc = rfc822_parse_adrlist (e->cc, p);
629 case MIME_CONTENT_DESCRIPTION:
631 m_strreplace(&hdr->content->description, p);
632 rfc2047_decode(&hdr->content->description);
636 case MIME_CONTENT_DISPOSITION:
638 parse_content_disposition(p, hdr->content);
641 case MIME_CONTENT_LENGTH:
643 if ((hdr->content->length = atoi(p)) < 0)
644 hdr->content->length = -1;
648 case MIME_CONTENT_TRANSFER_ENCODING:
650 hdr->content->encoding = mutt_check_encoding(p);
653 case MIME_CONTENT_TYPE:
655 mutt_parse_content_type (p, hdr->content);
659 m_strreplace(&e->date, p);
661 hdr->date_sent = mutt_parse_date (p, hdr);
665 if (hdr && mutt_parse_date (p, NULL) < time (NULL))
670 e->from = rfc822_parse_adrlist(e->from, p);
671 /* don't leave from info NULL if there's an invalid address (or
672 * whatever) in From: field; mutt would just display it as empty
673 * and mark mail/(esp.) news article as your own. aaargh! this
674 * bothered me for _years_ */
676 e->from = address_new();
677 e->from->personal = m_strdup(p);
681 case MIME_IN_REPLY_TO:
682 string_list_wipe(&e->in_reply_to);
683 e->in_reply_to = mutt_parse_references(p, 1);
688 /* HACK - mutt has, for a very short time, produced negative
689 Lines header values. Ignore them. */
690 hdr->lines = MAX(0, atoi(p));
695 /* RFC 2369. FIXME: We should ignore whitespace, but don't. */
696 if (m_strncmp(p, "NO", 2)) {
699 for (beg = strchr (p, '<'); beg; beg = strchr (end, ',')) {
701 if (!(end = strchr (beg, '>')))
704 /* Take the first mailto URL */
705 if (url_check_scheme (beg) == U_MAILTO) {
706 p_delete(&e->list_post);
707 e->list_post = p_dupstr(beg, end - beg);
714 case MIME_MAIL_FOLLOWUP_TO:
715 e->mail_followup_to = rfc822_parse_adrlist(e->mail_followup_to, p);
718 case MIME_MAIL_REPLY_TO:
719 address_list_wipe(&e->reply_to);
720 e->reply_to = rfc822_parse_adrlist(e->reply_to, p);
723 case MIME_MESSAGE_ID:
725 const char *beg, *end;
727 /* We add a new "Message-ID:" when building a message */
728 p_delete(&e->message_id);
730 if ((beg = strchr(p, '<')) && (end = strchr(beg, '>')))
731 e->message_id = p_dupstr(beg, (end - beg) + 1);
735 case MIME_MIME_VERSION:
740 case MIME_ORGANIZATION:
741 if (!e->organization && mime_which_token(p, -1) == MIME_UNKNOWN)
742 e->organization = m_strdup(p);
746 if (hdr && !hdr->received) {
747 char *d = strchr(p, ';');
749 hdr->received = mutt_parse_date(d + 1, NULL);
753 case MIME_REFERENCES:
754 string_list_wipe(&e->references);
755 e->references = mutt_parse_references(p, 0);
759 e->reply_to = rfc822_parse_adrlist(e->reply_to, p);
762 case MIME_RETURN_PATH:
763 e->return_path = rfc822_parse_adrlist(e->return_path, p);
767 e->sender = rfc822_parse_adrlist (e->sender, p);
791 e->subject = m_strdup(p);
794 case MIME_SUPERCEDES:
795 case MIME_SUPERSEDES:
797 e->supersedes = m_strdup(p);
801 e->to = rfc822_parse_adrlist(e->to, p);
805 e->x_label = m_strdup(p);
833 /* restore the original line */
834 line[m_strlen(line)] = ':';
836 if (weed && string_list_contains(Ignore, line, "*")
837 && !string_list_contains(UnIgnore, line, "*")) {
841 *user_hdrs = string_item_new();
842 (*user_hdrs)->data = m_strdup(line);
844 rfc2047_decode(&(*user_hdrs)->data);
845 return &(*user_hdrs)->next;
851 /* mutt_read_rfc822_header() -- parses a RFC822 header
855 * f stream to read from
857 * hdr header structure of current message (optional).
859 * user_hdrs If set, store user headers. Used for recall-message and
862 * weed If this parameter is set and the user has activated the
863 * $weed option, honor the header weed list for user headers.
864 * Used for recall-message.
866 * Returns: newly allocated envelope structure. You should free it by
867 * envelope_delete() when envelope stay unneeded.
870 mutt_read_rfc822_header(FILE *f, HEADER *hdr, short user_hdrs, short weed)
872 ENVELOPE *e = envelope_new();
873 string_list_t **last = user_hdrs ? &e->userhdrs : NULL;
875 char *line = p_new(char, LONG_STRING);
876 ssize_t linelen = LONG_STRING;
879 if (hdr && !hdr->content) {
880 hdr->content = body_new();
882 /* set the defaults from RFC1521 */
883 hdr->content->type = TYPETEXT;
884 hdr->content->subtype = m_strdup("plain");
885 hdr->content->encoding = ENC7BIT;
886 hdr->content->length = -1;
888 /* RFC 2183 says this is arbitrary */
889 hdr->content->disposition = DISPINLINE;
892 while ((loc = ftello(f)),
893 mutt_read_rfc822_line(f, &line, &linelen))
895 char buf[LONG_STRING + 1] = "";
898 p = strpbrk(line, ": \t");
899 if (!p || *p != ':') {
900 /* some bogus MTAs will quote the original "From " line */
901 if (!m_strncmp(">From ", line, 6) || !m_strncmp("From ", line, 5))
902 continue; /* just ignore */
905 break; /* end of header */
908 if (rx_list_match2(SpamList, line, buf, sizeof(buf))
909 && !rx_list_match(NoSpamList, line))
911 /* if spam tag already exists, figure out how to amend it */
912 if (e->spam && *buf) {
913 if (mod_mime.spam_separator) {
914 mutt_buffer_addstr(e->spam, mod_mime.spam_separator);
916 mutt_buffer_reset(e->spam);
918 mutt_buffer_addstr(e->spam, buf);
922 e->spam = mutt_buffer_from(NULL, buf);
929 continue; /* skip empty header fields */
931 last = mutt_parse_rfc822_line(e, hdr, line, p, weed, 1, last);
937 hdr->content->hdr_offset = hdr->offset;
938 hdr->content->offset = ftello(f);
939 rfc2047_decode_envelope(e);
940 /* check for missing or invalid date */
941 if (hdr->date_sent <= 0) {
942 hdr->date_sent = hdr->received;
949 /* Compares mime types to the ok and except lists */
950 static int count_body_parts_check(string_list_t **checklist, BODY *b)
954 for (type = *checklist; type; type = type->next) {
955 ATTACH_MATCH *a = (ATTACH_MATCH *)type->data;
957 if ((a->major_int == TYPEANY || a->major_int == b->type)
958 && !regexec(&a->minor_rx, b->subtype, 0, NULL, 0)) {
966 static int count_body_parts (BODY *body, int flags)
974 for (bp = body; bp != NULL; bp = bp->next) {
975 /* Initial disposition is to count and not to recurse this part. */
976 int shallcount, shallrecurse, iscontainer;
977 int tok = mime_which_token(bp->subtype, -1);
979 iscontainer = bp->type == TYPEMESSAGE || bp->type == TYPEMULTIPART;
981 /* don't recurse in external bodies or multipart/alternatives */
982 shallrecurse = (bp->type == TYPEMESSAGE && tok != MIME_EXTERNAL_BODY)
983 || (bp->type == TYPEMULTIPART && tok != MIME_ALTERNATIVE);
985 /* Don't count top level containers and fundamental inlines */
986 shallcount = !(iscontainer && (flags & M_PARTS_TOPLEVEL))
987 && !(!iscontainer && bp->disposition == DISPINLINE && bp == body);
990 /* Turn off shallcount if message type is not in ok list,
991 * or if it is in except list. Check is done separately for
992 * inlines vs. attachments.
995 if (bp->disposition == DISPATTACH) {
996 if (!count_body_parts_check(&AttachAllow, bp))
998 if (count_body_parts_check(&AttachExclude, bp))
1001 if (!count_body_parts_check(&InlineAllow, bp))
1003 if (count_body_parts_check(&InlineExclude, bp))
1008 bp->attach_qualifies = shallcount;
1009 count += shallcount;
1012 bp->attach_count = count_body_parts(bp->parts,
1013 flags & ~M_PARTS_TOPLEVEL);
1014 count += bp->attach_count;
1021 int mutt_count_body_parts(HEADER *hdr, int flags)
1023 if (hdr->attach_valid && !(flags & M_PARTS_RECOUNT))
1024 return hdr->attach_total;
1026 if (AttachAllow || AttachExclude || InlineAllow || InlineExclude)
1027 hdr->attach_total = count_body_parts(hdr->content,
1028 flags | M_PARTS_TOPLEVEL);
1030 hdr->attach_total = 0;
1032 hdr->attach_valid = 1;
1033 return hdr->attach_total;
1037 * A valid message separator looks like:
1039 * From [ <return-path> ] <weekday> <month> <day> <time> [ <timezone> ] <year>
1041 bool is_from(const char *s, char *path, ssize_t pathlen, time_t *tp)
1051 if (m_strncmp("From ", s, 5) != 0)
1054 s = skipspaces(s + 5); /* skip over the From part. */
1058 for (p = s; *p && (q || !ISSPACE(*p)); p++) {
1063 else if (*p == '"') {
1072 m_strncpy(path, pathlen, s, p - s);
1074 s = vskipspaces(p + 1);
1078 loc = setlocale(LC_TIME, "C");
1079 for (int i = 0; i < 4; i++) {
1080 static char const * const formats[] = {
1081 "%a %b %d %H:%M:%S %Y",
1082 "%a %b %d %H:%M:%S %z %Y",
1083 "%a %b %d %H:%M %Y",
1084 "%a %b %d %H:%M %z %Y",
1088 p = strptime(s, formats[i], &tm);
1094 setlocale(LC_TIME, loc);
1100 *tp = timegm(&tm) - *tp;