2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or (at
5 * your option) any later version.
7 * This program is distributed in the hope that it will be useful, but
8 * WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10 * General Public License for more details.
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17 * Copyright © 2006 Pierre Habouzit
21 * Copyright notice from original mutt:
22 * Copyright (C) 1996-2000 Michael R. Elkins <me@mutt.org>
24 * This file is part of mutt-ng, see http://www.muttng.org/.
25 * It's licensed under the GNU General Public License,
26 * please see the file GPL in the top level source directory.
29 #include <lib-lib/lib-lib.h>
31 #include "recvattach.h"
35 /* Reads an arbitrarily long header field, and looks ahead for continuation
36 * lines. ``line'' must point to a dynamically allocated string; it is
37 * increased if more space is required to fit the whole line.
39 ssize_t mutt_read_rfc822_line(FILE *f, char **line, ssize_t *n)
46 /* end of file or end of headers */
47 if (!fgets(p + pos, *n - pos, f) || (ISSPACE(*p) && pos == 0)) {
52 pos += m_strlen(p + pos);
53 if (p[pos - 1] == '\n') {
56 /* remove trailing spaces. safe: p[0] is not a space */
59 } while (ISSPACE(p[pos]));
61 /* check to see if the next line is a continuation line */
63 if (c != ' ' && c != '\t') {
64 /* next line is a separate header field or EOH */
69 /* eat tabs and spaces from the beginning of the continuation line */
72 } while (c == ' ' || c == '\t');
75 /* string is still terminated because we removed at least one
76 whitespace char above */
80 if (*n < pos + STRING) {
88 /* TODO: Make that a string list somehow */
89 string_list_t *mutt_parse_references(char *s, int in_reply_to)
91 string_list_t *lst = NULL;
95 /* some mail clients add other garbage besides message-ids, so do a quick
96 * check to make sure this looks like a valid message-id
97 * some idiotic clients also break their message-ids between lines, deal
98 * with that too (give up if it's more than two lines, though)
101 for (s = strtok(s, " \t;"); s; s = strtok(NULL, " \t;")) {
106 if (s[n - 1] != '>') {
113 ssize_t m = m_strlen(s);
115 if (s[m - 1] != '>') {
118 new = p_new(char, n + m + 1);
119 m_strcpy(new, n + m + 1, o);
120 m_strcpy(new + n, m + 1, s);
124 /* make sure that this really does look like a message-id.
125 * it should have exactly one @, and if we're looking at
126 * an in-reply-to header, make sure that the part before
127 * the @ has more than eight characters or it's probably
131 char *at = strchr(new, '@');
134 if (!at || strchr(at + 1, '@') || (in_reply_to && at - new <= 8)) {
139 tmp = p_new(string_list_t, 1);
149 int mutt_check_encoding(const char *s)
151 int tok = mime_which_token(s, -1);
159 case MIME_QUOTED_PRINTABLE:
160 return ENCQUOTEDPRINTABLE;
163 case MIME_X_UUENCODE:
170 int mutt_check_mime_type(const char *s)
174 if (!m_strcmp(s, "*") || !m_strcmp(s, ".*"))
177 tok = mime_which_token(s, -1);
179 case MIME_TEXT: return TYPETEXT;
180 case MIME_MULTIPART: return TYPEMULTIPART;
181 case MIME_APPLICATION: return TYPEAPPLICATION;
182 case MIME_MESSAGE: return TYPEMESSAGE;
183 case MIME_IMAGE: return TYPEIMAGE;
184 case MIME_AUDIO: return TYPEAUDIO;
185 case MIME_VIDEO: return TYPEVIDEO;
186 case MIME_MODEL: return TYPEMODEL;
187 default: return TYPEOTHER;
191 static parameter_t *parse_parameters(const char *s)
193 parameter_t *res = NULL;
194 parameter_t **list = &res;
202 if (*s == '=') /* parameters are fucked up, go away */
205 p = strpbrk(s, "=;");
210 /* if we hit a ; now the parameter has no value, just skip it */
216 new = parameter_new();
217 new->attribute = p_dupstr(s, i);
219 while (--i >= 0 && ISSPACE(new->attribute[i])) {
220 new->attribute[i] = '\0';
222 s = skipspaces(p + 1); /* skip over the = */
225 char buffer[LONG_STRING];
229 for (i = 0; *s && i < ssizeof(buffer) - 1; i++, s++) {
230 /* As iso-2022-* has a characer of '"' with non-ascii state,
232 if (*s == 0x1b && i < ssizeof(buffer) - 2) {
233 state_ascii = s[1] == '(' && (s[2] == 'B' || s[2] == 'J');
235 if (state_ascii && *s == '"')
245 new->value = p_dupstr(buffer, i);
247 for (p = s; *p && *p != ' ' && *p != ';'; p++);
248 new->value = p_dupstr(s, p - s);
254 s = strchr(s, ';'); /* Find the next parameter */
256 break; /* no more parameters */
259 rfc2231_decode_parameters(&res);
263 void mutt_parse_content_type(char *s, BODY *ct)
268 p_delete(&ct->subtype);
269 parameter_list_wipe(&ct->parameter);
271 /* First extract any existing parameters */
272 if ((pc = strchr(s, ';')) != NULL) {
274 ct->parameter = parse_parameters(vskipspaces(pc));
276 /* Some pre-RFC1521 gateways still use the "name=filename" convention,
277 * but if a filename has already been set in the content-disposition,
278 * let that take precedence, and don't set it here */
279 pc = parameter_getval(ct->parameter, "name");
280 if (pc && !ct->filename)
281 ct->filename = m_strdup(pc);
284 /* Now get the subtype */
285 if ((subtype = strchr (s, '/'))) {
287 for (pc = subtype; *pc && !ISSPACE(*pc) && *pc != ';'; pc++);
288 ct->subtype = p_dupstr(subtype, pc - subtype);
291 /* Finally, get the major type */
292 ct->type = mutt_check_mime_type(s);
294 if (ct->type == TYPEOTHER) {
295 ct->xtype = m_strdup(s);
299 /* Some older non-MIME mailers (i.e., mailtool, elm) have a content-type
300 * field, so we can attempt to convert the type to BODY here.
306 ct->subtype = m_strdup("plain");
310 ct->subtype = m_strdup("basic");
314 ct->subtype = m_strdup("rfc822");
318 ct->type = TYPEAPPLICATION;
319 snprintf(buffer, sizeof(buffer), "x-%s", s);
320 ct->subtype = m_strdup(buffer);
324 ct->subtype = m_strdup("x-unknown");
329 /* Default character set for text types. */
330 if (ct->type == TYPETEXT) {
331 pc = parameter_getval(ct->parameter, "charset");
333 parameter_setval(&ct->parameter, "charset",
334 charset_getfirst(mod_cset.assumed_charset));
339 static void parse_content_disposition(const char *s, BODY *ct)
341 if (!ascii_strncasecmp(s, "inline", 6)) {
342 ct->disposition = DISPINLINE;
343 } else if (!ascii_strncasecmp(s, "form-data", 9)) {
344 ct->disposition = DISPFORMDATA;
346 ct->disposition = DISPATTACH;
349 /* Check to see if a default filename was given */
350 if ((s = strchr (s, ';'))) {
351 parameter_t *parms = parse_parameters(vskipspaces(s));
353 if ((s = parameter_getval(parms, "filename")))
354 m_strreplace(&ct->filename, s);
355 if ((s = parameter_getval(parms, "name")))
356 ct->form_name = m_strdup(s);
358 parameter_list_wipe(&parms);
363 * fp stream to read from
365 * digest 1 if reading subparts of a multipart/digest, 0
368 BODY *mutt_read_mime_header(FILE *fp, int digest)
370 BODY *body = body_new();
371 char *line = p_new(char, LONG_STRING);
372 ssize_t linelen = LONG_STRING;
375 body->hdr_offset = ftello(fp);
376 body->encoding = ENC7BIT; /* default from RFC1521 */
377 body->disposition = DISPINLINE;
378 body->type = digest ? TYPEMESSAGE : TYPETEXT;
380 while (mutt_read_rfc822_line(fp, &line, &linelen)) {
381 /* Find the value of the current header */
382 if ((p = strchr(line, ':'))) {
391 switch (mime_which_token(line, -1)) {
392 case MIME_CONTENT_TYPE:
393 mutt_parse_content_type (p, body);
396 case MIME_CONTENT_TRANSFER_ENCODING:
397 body->encoding = mutt_check_encoding (p);
400 case MIME_CONTENT_DISPOSITION:
401 parse_content_disposition(p, body);
404 case MIME_CONTENT_DESCRIPTION:
405 m_strreplace(&body->description, p);
406 rfc2047_decode(&body->description);
413 body->offset = ftello(fp); /* Mark the start of the real data */
414 if (!body->subtype) {
415 if (body->type == TYPETEXT)
416 body->subtype = m_strdup("plain");
417 if (body->type == TYPEMESSAGE)
418 body->subtype = m_strdup("rfc822");
425 void mutt_parse_part(FILE *fp, BODY *b)
431 bound = parameter_getval(b->parameter, "boundary");
432 fseeko(fp, b->offset, SEEK_SET);
433 b->parts = mutt_parse_multipart(fp, bound, b->offset + b->length,
434 mime_which_token(b->subtype, -1) == MIME_DIGEST);
439 fseeko(fp, b->offset, SEEK_SET);
441 if (mutt_is_message_type(b)) {
442 b->parts = mutt_parse_messageRFC822(fp, b);
444 if (mime_which_token(b->subtype, -1) == MIME_EXTERNAL_BODY) {
445 b->parts = mutt_read_mime_header(fp, 0);
456 /* try to recover from parsing error */
459 m_strreplace(&b->subtype, "plain");
463 /* parse a MESSAGE/RFC822 body
466 * fp stream to read from
468 * parent structure which contains info about the message/rfc822
471 * NOTE: this assumes that `parent->length' has been set!
473 BODY *mutt_parse_messageRFC822(FILE * fp, BODY * parent)
477 parent->hdr = header_new();
478 parent->hdr->offset = ftello(fp);
479 parent->hdr->env = mutt_read_rfc822_header(fp, parent->hdr, 0, 0);
481 msg = parent->hdr->content;
483 /* ignore the length given in the content-length since it could be wrong
484 and we already have the info to calculate the correct length */
485 /* if (msg->length == -1) */
486 /* if body of this message is empty, we can end up with a negative length */
487 msg->length = MAX(0, parent->length - (msg->offset - parent->offset));
489 mutt_parse_part(fp, msg);
494 /* parse a multipart structure
497 * fp stream to read from
499 * bound body separator
501 * end_off length of the multipart body (used when the final
502 * boundary is missing to avoid reading too far)
504 * digest 1 if reading a multipart/digest, 0 otherwise
508 mutt_parse_multipart(FILE *fp, const char *bound, off_t end_off, int digest)
510 char buffer[LONG_STRING];
513 int blen = m_strlen(bound);
514 int final = 0; /* did we see the ending boundary? */
517 mutt_error _("multipart message has no boundary parameter!");
521 while (ftello(fp) < end_off && fgets(buffer, sizeof(buffer), fp)) {
524 len = m_strlen(buffer);
525 crlf = len > 1 && buffer[len - 2] == '\r';
527 if (buffer[0] == '-' && buffer[1] == '-'
528 && !m_strncmp(buffer + 2, bound, blen))
533 /* if the body is empty, we can end up with a -1 length */
534 b->length = MAX(0, ftello(fp) - b->offset - len - 1 - crlf);
535 if (b->parts && b->parts->length == 0) {
536 b->parts->length = ftello(fp) - b->parts->offset
541 /* Remove any trailing whitespace, up to the length of the boundary */
542 for (i = len - 1; ISSPACE(buffer[i]) && i >= blen + 2; i--)
545 /* Check for the end boundary */
546 final = buffer[blen + 3] == '-' && buffer[blen + 4] == '-';
550 if (buffer[2 + blen] == '\0') {
551 BODY *new = mutt_read_mime_header(fp, digest);
554 * Consistency checking - catch
555 * bad attachment end boundaries
558 if (new->offset > end_off) {
559 body_list_wipe(&new);
564 last = &(*last)->next;
570 /* in case of missing end boundary, set the length to something reasonable */
571 if (*last && (*last)->length == 0 && !final)
572 (*last)->length = end_off - (*last)->offset;
574 /* parse recursive MIME parts */
577 for (b = head; b; b = b->next)
578 mutt_parse_part(fp, b);
584 /* parses a date string in RFC822 format:
586 * Date: [ weekday , ] day-of-month month year hour:minute:second timezone
588 * This routine assumes that `h' has been initialized to 0. the `timezone'
589 * field is optional, defaulting to +0000 if missing.
591 time_t mutt_parse_date(const char *s, HEADER *h)
597 loc = setlocale(LC_ALL, "C");
599 if (strptime(s, "%a, %d %b %Y %H:%M:%S %z", &tm))
602 if (strptime(s, "%a, %d %b %Y %H:%M %z", &tm))
604 setlocale(LC_ALL, "");
608 setlocale(LC_ALL, "");
610 return timegm(&tm) - tz;
613 string_list_t **mutt_parse_rfc822_line(ENVELOPE *e, HEADER *hdr, char *line, char *p,
614 short weed, short do_2047, string_list_t **user_hdrs)
616 switch (mime_which_token(line, -1)) {
617 case MIME_APPARENTLY_FROM:
618 e->from = rfc822_parse_adrlist (e->from, p);
621 case MIME_APPARENTLY_TO:
622 e->to = rfc822_parse_adrlist (e->to, p);
626 e->bcc = rfc822_parse_adrlist (e->bcc, p);
630 e->cc = rfc822_parse_adrlist (e->cc, p);
633 case MIME_CONTENT_DESCRIPTION:
635 m_strreplace(&hdr->content->description, p);
636 rfc2047_decode(&hdr->content->description);
640 case MIME_CONTENT_DISPOSITION:
642 parse_content_disposition(p, hdr->content);
645 case MIME_CONTENT_LENGTH:
647 if ((hdr->content->length = atoi(p)) < 0)
648 hdr->content->length = -1;
652 case MIME_CONTENT_TRANSFER_ENCODING:
654 hdr->content->encoding = mutt_check_encoding(p);
657 case MIME_CONTENT_TYPE:
659 mutt_parse_content_type (p, hdr->content);
663 m_strreplace(&e->date, p);
665 hdr->date_sent = mutt_parse_date (p, hdr);
669 if (hdr && mutt_parse_date (p, NULL) < time (NULL))
674 case MIME_FOLLOWUP_TO:
675 if (!e->followup_to) {
677 e->followup_to = m_strdup(skipspaces(p));
683 e->from = rfc822_parse_adrlist(e->from, p);
684 /* don't leave from info NULL if there's an invalid address (or
685 * whatever) in From: field; mutt would just display it as empty
686 * and mark mail/(esp.) news article as your own. aaargh! this
687 * bothered me for _years_ */
689 e->from = address_new();
690 e->from->personal = m_strdup(p);
694 case MIME_IN_REPLY_TO:
695 string_list_wipe(&e->in_reply_to);
696 e->in_reply_to = mutt_parse_references(p, 1);
701 /* HACK - mutt has, for a very short time, produced negative
702 Lines header values. Ignore them. */
703 hdr->lines = MAX(0, atoi(p));
708 /* RFC 2369. FIXME: We should ignore whitespace, but don't. */
709 if (m_strncmp(p, "NO", 2)) {
712 for (beg = strchr (p, '<'); beg; beg = strchr (end, ',')) {
714 if (!(end = strchr (beg, '>')))
717 /* Take the first mailto URL */
718 if (url_check_scheme (beg) == U_MAILTO) {
719 p_delete(&e->list_post);
720 e->list_post = p_dupstr(beg, end - beg);
727 case MIME_MAIL_FOLLOWUP_TO:
728 e->mail_followup_to = rfc822_parse_adrlist(e->mail_followup_to, p);
731 case MIME_MAIL_REPLY_TO:
732 address_list_wipe(&e->reply_to);
733 e->reply_to = rfc822_parse_adrlist(e->reply_to, p);
736 case MIME_MESSAGE_ID:
738 const char *beg, *end;
740 /* We add a new "Message-ID:" when building a message */
741 p_delete(&e->message_id);
743 if ((beg = strchr(p, '<')) && (end = strchr(beg, '>')))
744 e->message_id = p_dupstr(beg, (end - beg) + 1);
748 case MIME_MIME_VERSION:
754 case MIME_NEWSGROUPS:
755 p_delete(&e->newsgroups);
757 e->newsgroups = m_strdup(skipspaces(p));
761 case MIME_ORGANIZATION:
762 if (!e->organization && mime_which_token(p, -1) == MIME_UNKNOWN)
763 e->organization = m_strdup(p);
767 if (hdr && !hdr->received) {
768 char *d = strchr(p, ';');
770 hdr->received = mutt_parse_date(d + 1, NULL);
774 case MIME_REFERENCES:
775 string_list_wipe(&e->references);
776 e->references = mutt_parse_references(p, 0);
780 e->reply_to = rfc822_parse_adrlist(e->reply_to, p);
783 case MIME_RETURN_PATH:
784 e->return_path = rfc822_parse_adrlist(e->return_path, p);
788 e->sender = rfc822_parse_adrlist (e->sender, p);
812 e->subject = m_strdup(p);
815 case MIME_SUPERCEDES:
816 case MIME_SUPERSEDES:
818 e->supersedes = m_strdup(p);
822 e->to = rfc822_parse_adrlist(e->to, p);
826 e->x_label = m_strdup(p);
832 e->xref = m_strdup(p);
861 /* restore the original line */
862 line[m_strlen(line)] = ':';
864 if (weed && string_list_contains(Ignore, line, "*")
865 && !string_list_contains(UnIgnore, line, "*")) {
869 *user_hdrs = string_item_new();
870 (*user_hdrs)->data = m_strdup(line);
872 rfc2047_decode(&(*user_hdrs)->data);
873 return &(*user_hdrs)->next;
879 /* mutt_read_rfc822_header() -- parses a RFC822 header
883 * f stream to read from
885 * hdr header structure of current message (optional).
887 * user_hdrs If set, store user headers. Used for recall-message and
890 * weed If this parameter is set and the user has activated the
891 * $weed option, honor the header weed list for user headers.
892 * Used for recall-message.
894 * Returns: newly allocated envelope structure. You should free it by
895 * envelope_delete() when envelope stay unneeded.
898 mutt_read_rfc822_header(FILE *f, HEADER *hdr, short user_hdrs, short weed)
900 ENVELOPE *e = envelope_new();
901 string_list_t **last = user_hdrs ? &e->userhdrs : NULL;
903 char *line = p_new(char, LONG_STRING);
904 ssize_t linelen = LONG_STRING;
907 if (hdr && !hdr->content) {
908 hdr->content = body_new();
910 /* set the defaults from RFC1521 */
911 hdr->content->type = TYPETEXT;
912 hdr->content->subtype = m_strdup("plain");
913 hdr->content->encoding = ENC7BIT;
914 hdr->content->length = -1;
916 /* RFC 2183 says this is arbitrary */
917 hdr->content->disposition = DISPINLINE;
920 while ((loc = ftello(f)),
921 mutt_read_rfc822_line(f, &line, &linelen))
923 char buf[LONG_STRING + 1] = "";
926 p = strpbrk(line, ": \t");
927 if (!p || *p != ':') {
928 /* some bogus MTAs will quote the original "From " line */
929 if (!m_strncmp(">From ", line, 6) || !m_strncmp("From ", line, 5))
930 continue; /* just ignore */
933 break; /* end of header */
936 if (rx_list_match2(SpamList, line, buf, sizeof(buf))
937 && !rx_list_match(NoSpamList, line))
939 /* if spam tag already exists, figure out how to amend it */
940 if (e->spam && *buf) {
941 if (mod_mime.spam_separator) {
942 mutt_buffer_addstr(e->spam, mod_mime.spam_separator);
944 mutt_buffer_reset(e->spam);
946 mutt_buffer_addstr(e->spam, buf);
950 e->spam = mutt_buffer_from(NULL, buf);
957 continue; /* skip empty header fields */
959 last = mutt_parse_rfc822_line(e, hdr, line, p, weed, 1, last);
965 hdr->content->hdr_offset = hdr->offset;
966 hdr->content->offset = ftello(f);
967 rfc2047_decode_envelope(e);
968 /* check for missing or invalid date */
969 if (hdr->date_sent <= 0) {
970 hdr->date_sent = hdr->received;
977 /* Compares mime types to the ok and except lists */
978 static int count_body_parts_check(string_list_t **checklist, BODY *b)
982 for (type = *checklist; type; type = type->next) {
983 ATTACH_MATCH *a = (ATTACH_MATCH *)type->data;
985 if ((a->major_int == TYPEANY || a->major_int == b->type)
986 && !regexec(&a->minor_rx, b->subtype, 0, NULL, 0)) {
994 static int count_body_parts (BODY *body, int flags)
1002 for (bp = body; bp != NULL; bp = bp->next) {
1003 /* Initial disposition is to count and not to recurse this part. */
1004 int shallcount, shallrecurse, iscontainer;
1005 int tok = mime_which_token(bp->subtype, -1);
1007 iscontainer = bp->type == TYPEMESSAGE || bp->type == TYPEMULTIPART;
1009 /* don't recurse in external bodies or multipart/alternatives */
1010 shallrecurse = (bp->type == TYPEMESSAGE && tok != MIME_EXTERNAL_BODY)
1011 || (bp->type == TYPEMULTIPART && tok != MIME_ALTERNATIVE);
1013 /* Don't count top level containers and fundamental inlines */
1014 shallcount = !(iscontainer && (flags & M_PARTS_TOPLEVEL))
1015 && !(!iscontainer && bp->disposition == DISPINLINE && bp == body);
1018 /* Turn off shallcount if message type is not in ok list,
1019 * or if it is in except list. Check is done separately for
1020 * inlines vs. attachments.
1023 if (bp->disposition == DISPATTACH) {
1024 if (!count_body_parts_check(&AttachAllow, bp))
1026 if (count_body_parts_check(&AttachExclude, bp))
1029 if (!count_body_parts_check(&InlineAllow, bp))
1031 if (count_body_parts_check(&InlineExclude, bp))
1036 bp->attach_qualifies = shallcount;
1037 count += shallcount;
1040 bp->attach_count = count_body_parts(bp->parts,
1041 flags & ~M_PARTS_TOPLEVEL);
1042 count += bp->attach_count;
1049 int mutt_count_body_parts(HEADER *hdr, int flags)
1051 if (hdr->attach_valid && !(flags & M_PARTS_RECOUNT))
1052 return hdr->attach_total;
1054 if (AttachAllow || AttachExclude || InlineAllow || InlineExclude)
1055 hdr->attach_total = count_body_parts(hdr->content,
1056 flags | M_PARTS_TOPLEVEL);
1058 hdr->attach_total = 0;
1060 hdr->attach_valid = 1;
1061 return hdr->attach_total;
1065 * A valid message separator looks like:
1067 * From [ <return-path> ] <weekday> <month> <day> <time> [ <timezone> ] <year>
1069 bool is_from(const char *s, char *path, ssize_t pathlen, time_t *tp)
1079 if (m_strncmp("From ", s, 5) != 0)
1082 s = skipspaces(s + 5); /* skip over the From part. */
1086 for (p = s; *p && (q || !ISSPACE(*p)); p++) {
1091 else if (*p == '"') {
1100 m_strncpy(path, pathlen, s, p - s);
1102 s = vskipspaces(p + 1);
1106 loc = setlocale(LC_TIME, "C");
1107 for (int i = 0; i < 4; i++) {
1108 static char const * const formats[] = {
1109 "%a %b %d %H:%M:%S %Y",
1110 "%a %b %d %H:%M:%S %z %Y",
1111 "%a %b %d %H:%M %Y",
1112 "%a %b %d %H:%M %z %Y",
1116 p = strptime(s, formats[i], &tm);
1122 setlocale(LC_TIME, loc);
1128 *tp = timegm(&tm) - *tp;