2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or (at
5 * your option) any later version.
7 * This program is distributed in the hope that it will be useful, but
8 * WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10 * General Public License for more details.
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17 * Copyright © 2006 Pierre Habouzit
21 * Copyright notice from original mutt:
22 * Copyright (C) 1996-2000 Michael R. Elkins <me@mutt.org>
24 * This file is part of mutt-ng, see http://www.muttng.org/.
25 * It's licensed under the GNU General Public License,
26 * please see the file GPL in the top level source directory.
29 #include <lib-lib/lib-lib.h>
31 #include "recvattach.h"
35 /* Reads an arbitrarily long header field, and looks ahead for continuation
36 * lines. ``line'' must point to a dynamically allocated string; it is
37 * increased if more space is required to fit the whole line.
39 ssize_t mutt_read_rfc822_line(FILE *f, char **line, ssize_t *n)
46 /* end of file or end of headers */
47 if (!fgets(p + pos, *n - pos, f) || (ISSPACE(*p) && pos == 0)) {
52 pos += m_strlen(p + pos);
53 if (p[pos - 1] == '\n') {
56 /* remove trailing spaces. safe: p[0] is not a space */
59 } while (ISSPACE(p[pos]));
61 /* check to see if the next line is a continuation line */
63 if (c != ' ' && c != '\t') {
64 /* next line is a separate header field or EOH */
69 /* eat tabs and spaces from the beginning of the continuation line */
72 } while (c == ' ' || c == '\t');
75 /* string is still terminated because we removed at least one
76 whitespace char above */
80 if (*n < pos + STRING) {
88 /* TODO: Make that a string list somehow */
89 string_list_t *mutt_parse_references(char *s, int in_reply_to)
91 string_list_t *lst = NULL;
95 /* some mail clients add other garbage besides message-ids, so do a quick
96 * check to make sure this looks like a valid message-id
97 * some idiotic clients also break their message-ids between lines, deal
98 * with that too (give up if it's more than two lines, though)
101 for (s = strtok(s, " \t;"); s; s = strtok(NULL, " \t;")) {
106 if (s[n - 1] != '>') {
113 ssize_t m = m_strlen(s);
115 if (s[m - 1] != '>') {
118 new = p_new(char, n + m + 1);
119 m_strcpy(new, n + m + 1, o);
120 m_strcpy(new + n, m + 1, s);
124 /* make sure that this really does look like a message-id.
125 * it should have exactly one @, and if we're looking at
126 * an in-reply-to header, make sure that the part before
127 * the @ has more than eight characters or it's probably
131 char *at = strchr(new, '@');
134 if (!at || strchr(at + 1, '@') || (in_reply_to && at - new <= 8)) {
139 tmp = p_new(string_list_t, 1);
149 int mutt_check_encoding(const char *s)
151 int tok = mime_which_token(s, -1);
159 case MIME_QUOTED_PRINTABLE:
160 return ENCQUOTEDPRINTABLE;
163 case MIME_X_UUENCODE:
170 int mutt_check_mime_type(const char *s)
174 if (!m_strcmp(s, "*") || !m_strcmp(s, ".*"))
177 tok = mime_which_token(s, -1);
179 case MIME_TEXT: return TYPETEXT;
180 case MIME_MULTIPART: return TYPEMULTIPART;
181 case MIME_APPLICATION: return TYPEAPPLICATION;
182 case MIME_MESSAGE: return TYPEMESSAGE;
183 case MIME_IMAGE: return TYPEIMAGE;
184 case MIME_AUDIO: return TYPEAUDIO;
185 case MIME_VIDEO: return TYPEVIDEO;
186 case MIME_MODEL: return TYPEMODEL;
187 default: return TYPEOTHER;
191 static parameter_t *parse_parameters(const char *s)
193 parameter_t *res = NULL;
194 parameter_t **list = &res;
202 if (*s == '=') /* parameters are fucked up, go away */
205 p = strpbrk(s, "=;");
210 /* if we hit a ; now the parameter has no value, just skip it */
216 new = parameter_new();
217 new->attribute = p_dupstr(s, i);
219 while (--i >= 0 && ISSPACE(new->attribute[i])) {
220 new->attribute[i] = '\0';
222 s = skipspaces(p + 1); /* skip over the = */
225 char buffer[LONG_STRING];
229 for (i = 0; *s && i < ssizeof(buffer) - 1; i++, s++) {
230 /* As iso-2022-* has a characer of '"' with non-ascii state,
232 if (*s == 0x1b && i < ssizeof(buffer) - 2) {
233 state_ascii = s[1] == '(' && (s[2] == 'B' || s[2] == 'J');
235 if (state_ascii && *s == '"')
245 new->value = p_dupstr(buffer, i);
247 for (p = s; *p && *p != ' ' && *p != ';'; p++);
248 new->value = p_dupstr(s, p - s);
254 s = strchr(s, ';'); /* Find the next parameter */
256 break; /* no more parameters */
259 rfc2231_decode_parameters(&res);
263 void mutt_parse_content_type(char *s, BODY *ct)
268 p_delete(&ct->subtype);
269 parameter_list_wipe(&ct->parameter);
271 /* First extract any existing parameters */
272 if ((pc = strchr(s, ';')) != NULL) {
274 ct->parameter = parse_parameters(vskipspaces(pc));
276 /* Some pre-RFC1521 gateways still use the "name=filename" convention,
277 * but if a filename has already been set in the content-disposition,
278 * let that take precedence, and don't set it here */
279 pc = parameter_getval(ct->parameter, "name");
280 if (pc && !ct->filename)
281 ct->filename = m_strdup(pc);
284 /* Now get the subtype */
285 if ((subtype = strchr (s, '/'))) {
287 for (pc = subtype; *pc && !ISSPACE(*pc) && *pc != ';'; pc++);
288 ct->subtype = p_dupstr(subtype, pc - subtype);
291 /* Finally, get the major type */
292 ct->type = mutt_check_mime_type(s);
294 if (ct->type == TYPEOTHER) {
295 ct->xtype = m_strdup(s);
299 /* Some older non-MIME mailers (i.e., mailtool, elm) have a content-type
300 * field, so we can attempt to convert the type to BODY here.
306 ct->subtype = m_strdup("plain");
310 ct->subtype = m_strdup("basic");
314 ct->subtype = m_strdup("rfc822");
318 ct->type = TYPEAPPLICATION;
319 snprintf(buffer, sizeof(buffer), "x-%s", s);
320 ct->subtype = m_strdup(buffer);
324 ct->subtype = m_strdup("x-unknown");
329 /* Default character set for text types. */
330 if (ct->type == TYPETEXT) {
331 pc = parameter_getval(ct->parameter, "charset");
333 parameter_setval(&ct->parameter, "charset",
334 charset_getfirst(mod_cset.assumed_charset));
339 static void parse_content_disposition(const char *s, BODY *ct)
341 if (!ascii_strncasecmp(s, "inline", 6)) {
342 ct->disposition = DISPINLINE;
343 } else if (!ascii_strncasecmp(s, "form-data", 9)) {
344 ct->disposition = DISPFORMDATA;
346 ct->disposition = DISPATTACH;
349 /* Check to see if a default filename was given */
350 if ((s = strchr (s, ';'))) {
351 parameter_t *parms = parse_parameters(vskipspaces(s));
353 if ((s = parameter_getval(parms, "filename")))
354 m_strreplace(&ct->filename, s);
355 if ((s = parameter_getval(parms, "name")))
356 ct->form_name = m_strdup(s);
358 parameter_list_wipe(&parms);
363 * fp stream to read from
365 * digest 1 if reading subparts of a multipart/digest, 0
368 BODY *mutt_read_mime_header(FILE *fp, int digest)
370 BODY *body = body_new();
371 char *line = p_new(char, LONG_STRING);
372 ssize_t linelen = LONG_STRING;
375 body->hdr_offset = ftello(fp);
376 body->encoding = ENC7BIT; /* default from RFC1521 */
377 body->disposition = DISPINLINE;
378 body->type = digest ? TYPEMESSAGE : TYPETEXT;
380 while (mutt_read_rfc822_line(fp, &line, &linelen)) {
381 /* Find the value of the current header */
382 if ((p = strchr(line, ':'))) {
391 switch (mime_which_token(line, -1)) {
392 case MIME_CONTENT_TYPE:
393 mutt_parse_content_type (p, body);
396 case MIME_CONTENT_TRANSFER_ENCODING:
397 body->encoding = mutt_check_encoding (p);
400 case MIME_CONTENT_DISPOSITION:
401 parse_content_disposition(p, body);
404 case MIME_CONTENT_DESCRIPTION:
405 m_strreplace(&body->description, p);
406 rfc2047_decode(&body->description);
413 body->offset = ftello(fp); /* Mark the start of the real data */
414 if (!body->subtype) {
415 if (body->type == TYPETEXT)
416 body->subtype = m_strdup("plain");
417 if (body->type == TYPEMESSAGE)
418 body->subtype = m_strdup("rfc822");
425 void mutt_parse_part(FILE *fp, BODY *b)
431 bound = parameter_getval(b->parameter, "boundary");
432 fseeko(fp, b->offset, SEEK_SET);
433 b->parts = mutt_parse_multipart(fp, bound, b->offset + b->length,
434 mime_which_token(b->subtype, -1) == MIME_DIGEST);
439 fseeko(fp, b->offset, SEEK_SET);
441 if (mutt_is_message_type(b)) {
442 b->parts = mutt_parse_messageRFC822(fp, b);
444 if (mime_which_token(b->subtype, -1) == MIME_EXTERNAL_BODY) {
445 b->parts = mutt_read_mime_header(fp, 0);
456 /* try to recover from parsing error */
459 m_strreplace(&b->subtype, "plain");
463 /* parse a MESSAGE/RFC822 body
466 * fp stream to read from
468 * parent structure which contains info about the message/rfc822
471 * NOTE: this assumes that `parent->length' has been set!
473 BODY *mutt_parse_messageRFC822(FILE * fp, BODY * parent)
477 parent->hdr = header_new();
478 parent->hdr->offset = ftello(fp);
479 parent->hdr->env = mutt_read_rfc822_header(fp, parent->hdr, 0, 0);
481 msg = parent->hdr->content;
483 /* ignore the length given in the content-length since it could be wrong
484 and we already have the info to calculate the correct length */
485 /* if (msg->length == -1) */
486 /* if body of this message is empty, we can end up with a negative length */
487 msg->length = MAX(0, parent->length - (msg->offset - parent->offset));
489 mutt_parse_part(fp, msg);
494 /* parse a multipart structure
497 * fp stream to read from
499 * bound body separator
501 * end_off length of the multipart body (used when the final
502 * boundary is missing to avoid reading too far)
504 * digest 1 if reading a multipart/digest, 0 otherwise
508 mutt_parse_multipart(FILE *fp, const char *bound, off_t end_off, int digest)
510 char buffer[LONG_STRING];
513 int blen = m_strlen(bound);
514 int final = 0; /* did we see the ending boundary? */
517 mutt_error _("multipart message has no boundary parameter!");
521 while (ftello(fp) < end_off && fgets(buffer, sizeof(buffer), fp)) {
524 len = m_strlen(buffer);
525 crlf = len > 1 && buffer[len - 2] == '\r';
527 if (buffer[0] == '-' && buffer[1] == '-'
528 && !m_strncmp(buffer + 2, bound, blen))
533 /* if the body is empty, we can end up with a -1 length */
534 b->length = MAX(0, ftello(fp) - b->offset - len - 1 - crlf);
535 if (b->parts && b->parts->length == 0) {
536 b->parts->length = ftello(fp) - b->parts->offset
541 /* Remove any trailing whitespace, up to the length of the boundary */
542 for (i = len - 1; ISSPACE(buffer[i]) && i >= blen + 2; i--)
545 /* Check for the end boundary */
546 final = buffer[blen + 3] == '-' && buffer[blen + 4] == '-';
550 if (buffer[2 + blen] == '\0') {
551 BODY *new = mutt_read_mime_header(fp, digest);
554 * Consistency checking - catch
555 * bad attachment end boundaries
558 if (new->offset > end_off) {
559 body_list_wipe(&new);
564 last = &(*last)->next;
570 /* in case of missing end boundary, set the length to something reasonable */
571 if (*last && (*last)->length == 0 && !final)
572 (*last)->length = end_off - (*last)->offset;
574 /* parse recursive MIME parts */
577 for (b = head; b; b = b->next)
578 mutt_parse_part(fp, b);
584 /* parses a date string in RFC822 format:
586 * Date: [ weekday , ] day-of-month month year hour:minute:second timezone
588 * This routine assumes that `h' has been initialized to 0. the `timezone'
589 * field is optional, defaulting to +0000 if missing.
591 time_t mutt_parse_date(const char *s, HEADER *h)
597 loc = setlocale(LC_ALL, "C");
599 if (strptime(s, "%a, %d %b %Y %H:%M:%S %z", &tm))
602 if (strptime(s, "%a, %d %b %Y %H:%M %z", &tm))
604 setlocale(LC_ALL, "");
608 setlocale(LC_ALL, "");
610 return timegm(&tm) - tz;
613 string_list_t **mutt_parse_rfc822_line(ENVELOPE *e, HEADER *hdr, char *line, char *p,
614 short weed, short do_2047, string_list_t **user_hdrs)
616 switch (mime_which_token(line, -1)) {
617 case MIME_APPARENTLY_FROM:
618 e->from = rfc822_parse_adrlist (e->from, p);
621 case MIME_APPARENTLY_TO:
622 e->to = rfc822_parse_adrlist (e->to, p);
626 e->bcc = rfc822_parse_adrlist (e->bcc, p);
630 e->cc = rfc822_parse_adrlist (e->cc, p);
633 case MIME_CONTENT_DESCRIPTION:
635 m_strreplace(&hdr->content->description, p);
636 rfc2047_decode(&hdr->content->description);
640 case MIME_CONTENT_DISPOSITION:
642 parse_content_disposition(p, hdr->content);
645 case MIME_CONTENT_LENGTH:
647 if ((hdr->content->length = atoi(p)) < 0)
648 hdr->content->length = -1;
652 case MIME_CONTENT_TRANSFER_ENCODING:
654 hdr->content->encoding = mutt_check_encoding(p);
657 case MIME_CONTENT_TYPE:
659 mutt_parse_content_type (p, hdr->content);
663 m_strreplace(&e->date, p);
665 hdr->date_sent = mutt_parse_date (p, hdr);
669 if (hdr && mutt_parse_date (p, NULL) < time (NULL))
674 e->from = rfc822_parse_adrlist(e->from, p);
675 /* don't leave from info NULL if there's an invalid address (or
676 * whatever) in From: field; mutt would just display it as empty
677 * and mark mail/(esp.) news article as your own. aaargh! this
678 * bothered me for _years_ */
680 e->from = address_new();
681 e->from->personal = m_strdup(p);
685 case MIME_IN_REPLY_TO:
686 string_list_wipe(&e->in_reply_to);
687 e->in_reply_to = mutt_parse_references(p, 1);
692 /* HACK - mutt has, for a very short time, produced negative
693 Lines header values. Ignore them. */
694 hdr->lines = MAX(0, atoi(p));
699 /* RFC 2369. FIXME: We should ignore whitespace, but don't. */
700 if (m_strncmp(p, "NO", 2)) {
703 for (beg = strchr (p, '<'); beg; beg = strchr (end, ',')) {
705 if (!(end = strchr (beg, '>')))
708 /* Take the first mailto URL */
709 if (url_check_scheme (beg) == U_MAILTO) {
710 p_delete(&e->list_post);
711 e->list_post = p_dupstr(beg, end - beg);
718 case MIME_MAIL_FOLLOWUP_TO:
719 e->mail_followup_to = rfc822_parse_adrlist(e->mail_followup_to, p);
722 case MIME_MAIL_REPLY_TO:
723 address_list_wipe(&e->reply_to);
724 e->reply_to = rfc822_parse_adrlist(e->reply_to, p);
727 case MIME_MESSAGE_ID:
729 const char *beg, *end;
731 /* We add a new "Message-ID:" when building a message */
732 p_delete(&e->message_id);
734 if ((beg = strchr(p, '<')) && (end = strchr(beg, '>')))
735 e->message_id = p_dupstr(beg, (end - beg) + 1);
739 case MIME_MIME_VERSION:
744 case MIME_ORGANIZATION:
745 if (!e->organization && mime_which_token(p, -1) == MIME_UNKNOWN)
746 e->organization = m_strdup(p);
750 if (hdr && !hdr->received) {
751 char *d = strchr(p, ';');
753 hdr->received = mutt_parse_date(d + 1, NULL);
757 case MIME_REFERENCES:
758 string_list_wipe(&e->references);
759 e->references = mutt_parse_references(p, 0);
763 e->reply_to = rfc822_parse_adrlist(e->reply_to, p);
766 case MIME_RETURN_PATH:
767 e->return_path = rfc822_parse_adrlist(e->return_path, p);
771 e->sender = rfc822_parse_adrlist (e->sender, p);
795 e->subject = m_strdup(p);
798 case MIME_SUPERCEDES:
799 case MIME_SUPERSEDES:
801 e->supersedes = m_strdup(p);
805 e->to = rfc822_parse_adrlist(e->to, p);
809 e->x_label = m_strdup(p);
837 /* restore the original line */
838 line[m_strlen(line)] = ':';
840 if (weed && string_list_contains(Ignore, line, "*")
841 && !string_list_contains(UnIgnore, line, "*")) {
845 *user_hdrs = string_item_new();
846 (*user_hdrs)->data = m_strdup(line);
848 rfc2047_decode(&(*user_hdrs)->data);
849 return &(*user_hdrs)->next;
855 /* mutt_read_rfc822_header() -- parses a RFC822 header
859 * f stream to read from
861 * hdr header structure of current message (optional).
863 * user_hdrs If set, store user headers. Used for recall-message and
866 * weed If this parameter is set and the user has activated the
867 * $weed option, honor the header weed list for user headers.
868 * Used for recall-message.
870 * Returns: newly allocated envelope structure. You should free it by
871 * envelope_delete() when envelope stay unneeded.
874 mutt_read_rfc822_header(FILE *f, HEADER *hdr, short user_hdrs, short weed)
876 ENVELOPE *e = envelope_new();
877 string_list_t **last = user_hdrs ? &e->userhdrs : NULL;
879 char *line = p_new(char, LONG_STRING);
880 ssize_t linelen = LONG_STRING;
883 if (hdr && !hdr->content) {
884 hdr->content = body_new();
886 /* set the defaults from RFC1521 */
887 hdr->content->type = TYPETEXT;
888 hdr->content->subtype = m_strdup("plain");
889 hdr->content->encoding = ENC7BIT;
890 hdr->content->length = -1;
892 /* RFC 2183 says this is arbitrary */
893 hdr->content->disposition = DISPINLINE;
896 while ((loc = ftello(f)),
897 mutt_read_rfc822_line(f, &line, &linelen))
899 char buf[LONG_STRING + 1] = "";
902 p = strpbrk(line, ": \t");
903 if (!p || *p != ':') {
904 /* some bogus MTAs will quote the original "From " line */
905 if (!m_strncmp(">From ", line, 6) || !m_strncmp("From ", line, 5))
906 continue; /* just ignore */
909 break; /* end of header */
912 if (rx_list_match2(SpamList, line, buf, sizeof(buf))
913 && !rx_list_match(NoSpamList, line))
915 /* if spam tag already exists, figure out how to amend it */
916 if (e->spam && *buf) {
917 if (mod_mime.spam_separator) {
918 mutt_buffer_addstr(e->spam, mod_mime.spam_separator);
920 mutt_buffer_reset(e->spam);
922 mutt_buffer_addstr(e->spam, buf);
926 e->spam = mutt_buffer_from(NULL, buf);
933 continue; /* skip empty header fields */
935 last = mutt_parse_rfc822_line(e, hdr, line, p, weed, 1, last);
941 hdr->content->hdr_offset = hdr->offset;
942 hdr->content->offset = ftello(f);
943 rfc2047_decode_envelope(e);
944 /* check for missing or invalid date */
945 if (hdr->date_sent <= 0) {
946 hdr->date_sent = hdr->received;
953 /* Compares mime types to the ok and except lists */
954 static int count_body_parts_check(string_list_t **checklist, BODY *b)
958 for (type = *checklist; type; type = type->next) {
959 ATTACH_MATCH *a = (ATTACH_MATCH *)type->data;
961 if ((a->major_int == TYPEANY || a->major_int == b->type)
962 && !regexec(&a->minor_rx, b->subtype, 0, NULL, 0)) {
970 static int count_body_parts (BODY *body, int flags)
978 for (bp = body; bp != NULL; bp = bp->next) {
979 /* Initial disposition is to count and not to recurse this part. */
980 int shallcount, shallrecurse, iscontainer;
981 int tok = mime_which_token(bp->subtype, -1);
983 iscontainer = bp->type == TYPEMESSAGE || bp->type == TYPEMULTIPART;
985 /* don't recurse in external bodies or multipart/alternatives */
986 shallrecurse = (bp->type == TYPEMESSAGE && tok != MIME_EXTERNAL_BODY)
987 || (bp->type == TYPEMULTIPART && tok != MIME_ALTERNATIVE);
989 /* Don't count top level containers and fundamental inlines */
990 shallcount = !(iscontainer && (flags & M_PARTS_TOPLEVEL))
991 && !(!iscontainer && bp->disposition == DISPINLINE && bp == body);
994 /* Turn off shallcount if message type is not in ok list,
995 * or if it is in except list. Check is done separately for
996 * inlines vs. attachments.
999 if (bp->disposition == DISPATTACH) {
1000 if (!count_body_parts_check(&AttachAllow, bp))
1002 if (count_body_parts_check(&AttachExclude, bp))
1005 if (!count_body_parts_check(&InlineAllow, bp))
1007 if (count_body_parts_check(&InlineExclude, bp))
1012 bp->attach_qualifies = shallcount;
1013 count += shallcount;
1016 bp->attach_count = count_body_parts(bp->parts,
1017 flags & ~M_PARTS_TOPLEVEL);
1018 count += bp->attach_count;
1025 int mutt_count_body_parts(HEADER *hdr, int flags)
1027 if (hdr->attach_valid && !(flags & M_PARTS_RECOUNT))
1028 return hdr->attach_total;
1030 if (AttachAllow || AttachExclude || InlineAllow || InlineExclude)
1031 hdr->attach_total = count_body_parts(hdr->content,
1032 flags | M_PARTS_TOPLEVEL);
1034 hdr->attach_total = 0;
1036 hdr->attach_valid = 1;
1037 return hdr->attach_total;
1041 * A valid message separator looks like:
1043 * From [ <return-path> ] <weekday> <month> <day> <time> [ <timezone> ] <year>
1045 bool is_from(const char *s, char *path, ssize_t pathlen, time_t *tp)
1055 if (m_strncmp("From ", s, 5) != 0)
1058 s = skipspaces(s + 5); /* skip over the From part. */
1062 for (p = s; *p && (q || !ISSPACE(*p)); p++) {
1067 else if (*p == '"') {
1076 m_strncpy(path, pathlen, s, p - s);
1078 s = vskipspaces(p + 1);
1082 loc = setlocale(LC_TIME, "C");
1083 for (int i = 0; i < 4; i++) {
1084 static char const * const formats[] = {
1085 "%a %b %d %H:%M:%S %Y",
1086 "%a %b %d %H:%M:%S %z %Y",
1087 "%a %b %d %H:%M %Y",
1088 "%a %b %d %H:%M %z %Y",
1092 p = strptime(s, formats[i], &tm);
1098 setlocale(LC_TIME, loc);
1104 *tp = timegm(&tm) - *tp;