From: Pierre Habouzit Date: Mon, 6 Nov 2006 23:49:08 +0000 (+0100) Subject: replace the pathetic mutt_parse_rfc822_line function with a really better X-Git-Url: http://git.madism.org/?p=apps%2Fmadmutt.git;a=commitdiff_plain;h=38c7d971a4e206284e06e958511bb55694cb4deb;ds=inline replace the pathetic mutt_parse_rfc822_line function with a really better looking one. that code uses gperf to generate the string -> enum function, so that we don't have to do a thing to hash them. we improved: * readability (the C code is way better) * efficiency Signed-off-by: Pierre Habouzit --- diff --git a/lib-mime/.gitignore b/lib-mime/.gitignore new file mode 100644 index 0000000..2e9a39f --- /dev/null +++ b/lib-mime/.gitignore @@ -0,0 +1,2 @@ +rfc822hdrs.h +rfc822hdrs.c diff --git a/lib-mime/Makefile.am b/lib-mime/Makefile.am index b06f4f7..362022e 100644 --- a/lib-mime/Makefile.am +++ b/lib-mime/Makefile.am @@ -1,8 +1,15 @@ +BUILT_SOURCES = rfc822hdrs.h rfc822hdrs.c +CLEANFILES = $(BUILT_SOURCES) + noinst_LIBRARIES = libmime.a -libmime_a_SOURCES = mime.h mime-types.h \ +libmime_a_SOURCES = mime.h mime-types.h $(BUILT_SOURCES) \ mime.c rfc822address.c rfc822parse.c rfc2047.c rfc2231.c noinst_HEADERS = mime.h mime-types.h + +rfc822hdrs.c rfc822hdrs.h: rfc822hdrs.def + sh rfc822hdrs.sh $@ < $< + -include ../cflags.mk diff --git a/lib-mime/mime.h b/lib-mime/mime.h index b30b42b..160a7c9 100644 --- a/lib-mime/mime.h +++ b/lib-mime/mime.h @@ -64,6 +64,9 @@ BODY *mutt_read_mime_header (FILE *, int); void mutt_parse_part(FILE *, BODY *); BODY *mutt_parse_messageRFC822(FILE *, BODY *); BODY *mutt_parse_multipart(FILE *, const char *, off_t, int); +void mutt_parse_rfc822_line(ENVELOPE *, HEADER *, char *line, char *p, + short user_hdrs, short weed, short do_2047, + LIST **); /*** addresses ***/ diff --git a/lib-mime/rfc822hdrs.def b/lib-mime/rfc822hdrs.def new file mode 100644 index 0000000..1d073e2 --- /dev/null +++ b/lib-mime/rfc822hdrs.def @@ -0,0 +1,36 @@ +apparently-from +apparently-to +bcc +cc +content-description +content-disposition +content-length +content-transfer-encoding +content-type +date +expires +followup-to +from +in-reply-to +lines +list-post +mail-followup-to +mail-reply-to +message-id +mime-version +newsgroups +organization +received +references +reply-to +return-path +sender +status +subject +supercedes +supersedes +to +x-comment-to +x-label +xref +x-status diff --git a/lib-mime/rfc822hdrs.sh b/lib-mime/rfc822hdrs.sh new file mode 100644 index 0000000..1124536 --- /dev/null +++ b/lib-mime/rfc822hdrs.sh @@ -0,0 +1,42 @@ +#! /bin/sh + +die() { + echo "$@" 1>&2 + exit 2 +} + +do_h() { + echo "#ifndef MUTT_LIB_MIME_RFC822HDRS_H" + echo "#define MUTT_LIB_MIME_RFC822HDRS_H" + echo "/* THIS FILE IS AUTOGENERATED FROM $< DO NOT MODIFY */" + echo "enum rfc822hdr {" + echo " HDR_UNKNOWN," + tr 'a-z-' 'A-Z_' | sed -e 's/.*/ HDR_&,/' + echo "};" + echo + echo "enum rfc822hdr rfc822_well_known(const char *s);" + echo "#endif /* MUTT_LIB_MIME_RFC822HDRS_H */" +} + +do_c() { + echo "%{" + echo "#include " + echo "#include \"rfc822hdrs.h\"" + echo "%}" + echo "struct hdr { const char *name; int val; };" + echo "%%" + awk '{print $$0 ", " NR }' + echo "%%" + echo "enum rfc822hdr rfc822_well_known(const char *s) {" + echo " const struct hdr *res = in_word_set(s, strlen(s));" + echo " return res ? res->val : HDR_UNKNOWN;" + echo "}" +} + +case "$1" in + *.h) do_h > "$1" ;; + *.c) do_c | gperf --ignore-case -t -C -F,0 > "$1" ;; + *) die "you must ask for the 'h' or 'c' generation" ;; +esac + +exit 0 diff --git a/lib-mime/rfc822parse.c b/lib-mime/rfc822parse.c index 3fd2610..ebd7b82 100644 --- a/lib-mime/rfc822parse.c +++ b/lib-mime/rfc822parse.c @@ -783,342 +783,288 @@ time_t mutt_parse_date(const char *s, HEADER *h) return mutt_mktime(&tm, 0) + (zoccident ? 1 : -1) * (zhours * 3600 + zminutes * 60); } -/*** XXX: MC READ MARK ***/ +#include "rfc822hdrs.h" -int mutt_parse_rfc822_line (ENVELOPE * e, HEADER * hdr, char *line, char *p, +void mutt_parse_rfc822_line(ENVELOPE *e, HEADER *hdr, char *line, char *p, short user_hdrs, short weed, short do_2047, - LIST ** lastp) + LIST **lastp) { - int matched = 0; - LIST *last = NULL; + switch (rfc822_well_known(line)) { + case HDR_APPARENTLY_FROM: + e->from = rfc822_parse_adrlist (e->from, p); + break; - if (lastp) - last = *lastp; + case HDR_APPARENTLY_TO: + e->to = rfc822_parse_adrlist (e->to, p); + break; - switch (ascii_tolower (line[0])) { - case 'a': - if (ascii_strcasecmp (line + 1, "pparently-to") == 0) { - e->to = rfc822_parse_adrlist (e->to, p); - matched = 1; - } - else if (ascii_strcasecmp (line + 1, "pparently-from") == 0) { - e->from = rfc822_parse_adrlist (e->from, p); - matched = 1; - } - break; + case HDR_BCC: + e->bcc = rfc822_parse_adrlist (e->bcc, p); + break; - case 'b': - if (ascii_strcasecmp (line + 1, "cc") == 0) { - e->bcc = rfc822_parse_adrlist (e->bcc, p); - matched = 1; - } - break; + case HDR_CC: + e->cc = rfc822_parse_adrlist (e->cc, p); + break; - case 'c': - if (ascii_strcasecmp (line + 1, "c") == 0) { - e->cc = rfc822_parse_adrlist (e->cc, p); - matched = 1; - } - else if (ascii_strncasecmp (line + 1, "ontent-", 7) == 0) { - if (ascii_strcasecmp (line + 8, "type") == 0) { - if (hdr) - mutt_parse_content_type (p, hdr->content); - matched = 1; - } - else if (ascii_strcasecmp (line + 8, "transfer-encoding") == 0) { - if (hdr) - hdr->content->encoding = mutt_check_encoding (p); - matched = 1; - } - else if (ascii_strcasecmp (line + 8, "length") == 0) { + case HDR_CONTENT_DESCRIPTION: if (hdr) { - if ((hdr->content->length = atoi (p)) < 0) - hdr->content->length = -1; + m_strreplace(&hdr->content->description, p); + rfc2047_decode(&hdr->content->description); } - matched = 1; - } - else if (ascii_strcasecmp (line + 8, "description") == 0) { + break; + + case HDR_CONTENT_DISPOSITION: + if (hdr) + parse_content_disposition(p, hdr->content); + break; + + case HDR_CONTENT_LENGTH: if (hdr) { - m_strreplace(&hdr->content->description, p); - rfc2047_decode (&hdr->content->description); + if ((hdr->content->length = atoi(p)) < 0) + hdr->content->length = -1; } - matched = 1; - } - else if (ascii_strcasecmp (line + 8, "disposition") == 0) { + break; + + case HDR_CONTENT_TRANSFER_ENCODING: if (hdr) - parse_content_disposition (p, hdr->content); - matched = 1; - } - } - break; - - case 'd': - if (!ascii_strcasecmp ("ate", line + 1)) { - m_strreplace(&e->date, p); - if (hdr) - hdr->date_sent = mutt_parse_date (p, hdr); - matched = 1; - } - break; - - case 'e': - if (!ascii_strcasecmp ("xpires", line + 1) && - hdr && mutt_parse_date (p, NULL) < time (NULL)) - hdr->expired = 1; - break; - - case 'f': - if (!ascii_strcasecmp ("rom", line + 1)) { - e->from = rfc822_parse_adrlist (e->from, p); - /* don't leave from info NULL if there's an invalid address (or - * whatever) in From: field; mutt would just display it as empty - * and mark mail/(esp.) news article as your own. aaargh! this - * bothered me for _years_ */ - if (!e->from) { - e->from = address_new (); - e->from->personal = m_strdup(p); - } - matched = 1; - } + hdr->content->encoding = mutt_check_encoding(p); + break; + + case HDR_CONTENT_TYPE: + if (hdr) + mutt_parse_content_type (p, hdr->content); + break; + + case HDR_DATE: + m_strreplace(&e->date, p); + if (hdr) + hdr->date_sent = mutt_parse_date (p, hdr); + break; + + case HDR_EXPIRES: + if (hdr && mutt_parse_date (p, NULL) < time (NULL)) + hdr->expired = 1; + break; + #ifdef USE_NNTP - else if (!m_strcasecmp(line + 1, "ollowup-to")) { - if (!e->followup_to) { - m_strrtrim(p); - e->followup_to = m_strdup(skipspaces(p)); - } - matched = 1; - } + case HDR_FOLLOWUP_TO: + if (!e->followup_to) { + m_strrtrim(p); + e->followup_to = m_strdup(skipspaces(p)); + } + break; #endif - break; - case 'i': - if (!ascii_strcasecmp (line + 1, "n-reply-to")) { - mutt_free_list (&e->in_reply_to); - e->in_reply_to = mutt_parse_references (p, 1); - matched = 1; - } - break; + case HDR_FROM: + e->from = rfc822_parse_adrlist(e->from, p); + /* don't leave from info NULL if there's an invalid address (or + * whatever) in From: field; mutt would just display it as empty + * and mark mail/(esp.) news article as your own. aaargh! this + * bothered me for _years_ */ + if (!e->from) { + e->from = address_new(); + e->from->personal = m_strdup(p); + } + break; - case 'l': - if (!ascii_strcasecmp (line + 1, "ines")) { - if (hdr) { - hdr->lines = atoi (p); + case HDR_IN_REPLY_TO: + mutt_free_list(&e->in_reply_to); + e->in_reply_to = mutt_parse_references(p, 1); + break; - /* - * HACK - mutt has, for a very short time, produced negative - * Lines header values. Ignore them. - */ - if (hdr->lines < 0) - hdr->lines = 0; - } + case HDR_LINES: + if (hdr) { + /* HACK - mutt has, for a very short time, produced negative + Lines header values. Ignore them. */ + hdr->lines = MAX(0, atoi(p)); + } + break; - matched = 1; - } - else if (!ascii_strcasecmp (line + 1, "ist-Post")) { - /* RFC 2369. FIXME: We should ignore whitespace, but don't. */ - if (strncmp (p, "NO", 2)) { - char *beg, *end; - - for (beg = strchr (p, '<'); beg; beg = strchr (end, ',')) { - ++beg; - if (!(end = strchr (beg, '>'))) - break; + case HDR_LIST_POST: + /* RFC 2369. FIXME: We should ignore whitespace, but don't. */ + if (strncmp(p, "NO", 2)) { + char *beg, *end; - /* Take the first mailto URL */ - if (url_check_scheme (beg) == U_MAILTO) { - p_delete(&e->list_post); - e->list_post = p_dupstr(beg, end - beg); - break; - } - } - } - matched = 1; - } - break; + for (beg = strchr (p, '<'); beg; beg = strchr (end, ',')) { + ++beg; + if (!(end = strchr (beg, '>'))) + break; - case 'm': - if (!ascii_strcasecmp (line + 1, "ime-version")) { - if (hdr) - hdr->mime = 1; - matched = 1; - } - else if (!ascii_strcasecmp (line + 1, "essage-id")) { - const char *beg, *end; + /* Take the first mailto URL */ + if (url_check_scheme (beg) == U_MAILTO) { + p_delete(&e->list_post); + e->list_post = p_dupstr(beg, end - beg); + break; + } + } + } + break; - /* We add a new "Message-ID:" when building a message */ - p_delete(&e->message_id); + case HDR_MAIL_FOLLOWUP_TO: + e->mail_followup_to = rfc822_parse_adrlist(e->mail_followup_to, p); + break; - if ((beg = strchr(p, '<')) && (end = strchr(beg, '>'))) - e->message_id = p_dupstr(beg, (end - beg) + 1); - matched = 1; - } - else if (!ascii_strncasecmp (line + 1, "ail-", 4)) { - if (!ascii_strcasecmp (line + 5, "reply-to")) { - /* override the Reply-To: field */ + case HDR_MAIL_REPLY_TO: address_delete (&e->reply_to); - e->reply_to = rfc822_parse_adrlist (e->reply_to, p); - matched = 1; - } - else if (!ascii_strcasecmp (line + 5, "followup-to")) { - e->mail_followup_to = rfc822_parse_adrlist (e->mail_followup_to, p); - matched = 1; - } - } - break; + e->reply_to = rfc822_parse_adrlist(e->reply_to, p); + break; + + case HDR_MESSAGE_ID: + { + const char *beg, *end; + + /* We add a new "Message-ID:" when building a message */ + p_delete(&e->message_id); + + if ((beg = strchr(p, '<')) && (end = strchr(beg, '>'))) + e->message_id = p_dupstr(beg, (end - beg) + 1); + } + break; + + case HDR_MIME_VERSION: + if (hdr) + hdr->mime = 1; + break; #ifdef USE_NNTP - case 'n': - if (!m_strcasecmp(line + 1, "ewsgroups")) { - p_delete(&e->newsgroups); - m_strrtrim(p); - e->newsgroups = m_strdup(skipspaces(p)); - matched = 1; - } - break; + case HDR_NEWSGROUPS: + p_delete(&e->newsgroups); + m_strrtrim(p); + e->newsgroups = m_strdup(skipspaces(p)); + break; #endif - case 'o': - /* field `Organization:' saves only for pager! */ - if (!m_strcasecmp(line + 1, "rganization")) { - if (!e->organization && m_strcasecmp(p, "unknown")) - e->organization = m_strdup(p); - } - break; + case HDR_ORGANIZATION: + if (!e->organization && m_strcasecmp(p, "unknown")) + e->organization = m_strdup(p); + break; - case 'r': - if (!ascii_strcasecmp (line + 1, "eferences")) { - mutt_free_list (&e->references); - e->references = mutt_parse_references (p, 0); - matched = 1; - } - else if (!ascii_strcasecmp (line + 1, "eply-to")) { - e->reply_to = rfc822_parse_adrlist (e->reply_to, p); - matched = 1; - } - else if (!ascii_strcasecmp (line + 1, "eturn-path")) { - e->return_path = rfc822_parse_adrlist (e->return_path, p); - matched = 1; - } - else if (!ascii_strcasecmp (line + 1, "eceived")) { - if (hdr && !hdr->received) { - char *d = strchr (p, ';'); + case HDR_RECEIVED: + if (hdr && !hdr->received) { + char *d = strchr(p, ';'); + if (d) + hdr->received = mutt_parse_date(d + 1, NULL); + } + break; - if (d) - hdr->received = mutt_parse_date (d + 1, NULL); - } - } - break; + case HDR_REFERENCES: + mutt_free_list(&e->references); + e->references = mutt_parse_references(p, 0); + break; - case 's': - if (!ascii_strcasecmp (line + 1, "ubject")) { - if (!e->subject) - e->subject = m_strdup(p); - matched = 1; - } - else if (!ascii_strcasecmp (line + 1, "ender")) { - e->sender = rfc822_parse_adrlist (e->sender, p); - matched = 1; - } - else if (!ascii_strcasecmp (line + 1, "tatus")) { - if (hdr) { - while (*p) { - switch (*p) { - case 'r': - hdr->replied = 1; - break; - case 'O': - hdr->old = 1; - break; - case 'R': - hdr->read = 1; - break; - } - p++; - } - } - matched = 1; - } - else if ((!ascii_strcasecmp ("upersedes", line + 1) || - !ascii_strcasecmp ("upercedes", line + 1)) && hdr) - e->supersedes = m_strdup(p); - break; - - case 't': - if (ascii_strcasecmp (line + 1, "o") == 0) { - e->to = rfc822_parse_adrlist (e->to, p); - matched = 1; - } - break; - - case 'x': - if (ascii_strcasecmp (line + 1, "-status") == 0) { - if (hdr) { - while (*p) { - switch (*p) { - case 'A': - hdr->replied = 1; - break; - case 'D': - hdr->deleted = 1; - break; - case 'F': - hdr->flagged = 1; - break; - default: - break; - } - p++; + case HDR_REPLY_TO: + e->reply_to = rfc822_parse_adrlist(e->reply_to, p); + break; + + case HDR_RETURN_PATH: + e->return_path = rfc822_parse_adrlist(e->return_path, p); + break; + + case HDR_SENDER: + e->sender = rfc822_parse_adrlist (e->sender, p); + break; + + case HDR_STATUS: + if (hdr) { + while (*p) { + switch (*p) { + case 'r': + hdr->replied = 1; + break; + case 'O': + hdr->old = 1; + break; + case 'R': + hdr->read = 1; + break; + } + p++; + } } - } - matched = 1; - } - else if (ascii_strcasecmp (line + 1, "-label") == 0) { - e->x_label = m_strdup(p); - matched = 1; - } + break; + + case HDR_SUBJECT: + if (!e->subject) + e->subject = m_strdup(p); + break; + + case HDR_SUPERCEDES: + case HDR_SUPERSEDES: + if (hdr) + e->supersedes = m_strdup(p); + break; + + case HDR_TO: + e->to = rfc822_parse_adrlist(e->to, p); + break; + #ifdef USE_NNTP - else if (!m_strcasecmp(line + 1, "-comment-to")) { - if (!e->x_comment_to) - e->x_comment_to = m_strdup(p); - matched = 1; - } - else if (!m_strcasecmp(line + 1, "ref")) { - if (!e->xref) - e->xref = m_strdup(p); - matched = 1; - } + case HDR_X_COMMENT_TO: + if (!e->x_comment_to) + e->x_comment_to = m_strdup(p); + break; #endif - default: - break; - } + case HDR_X_LABEL: + e->x_label = m_strdup(p); + break; - /* Keep track of the user-defined headers */ - if (!matched && user_hdrs) { - /* restore the original line */ - line[m_strlen(line)] = ':'; +#ifdef USE_NNTP + case HDR_XREF: + if (!e->xref) + e->xref = m_strdup(p); + break; +#endif - if (weed && option (OPTWEED) && mutt_matches_ignore (line, Ignore) - && !mutt_matches_ignore (line, UnIgnore)) - goto done; + case HDR_X_STATUS: + if (hdr) { + while (*p) { + switch (*p) { + case 'A': + hdr->replied = 1; + break; + case 'D': + hdr->deleted = 1; + break; + case 'F': + hdr->flagged = 1; + break; + default: + break; + } + p++; + } + } + break; - if (last) { - last->next = mutt_new_list (); - last = last->next; - } - else - last = e->userhdrs = mutt_new_list (); - last->data = m_strdup(line); - if (do_2047) - rfc2047_decode (&last->data); - } + default: + if (!user_hdrs) { + return; + } + /* restore the original line */ + line[m_strlen(line)] = ':'; -done: + if (weed && option(OPTWEED) && mutt_matches_ignore(line, Ignore) + && !mutt_matches_ignore(line, UnIgnore)) { + return; + } - *lastp = last; - return matched; + if (*lastp) { + (*lastp)->next = mutt_new_list(); + (*lastp) = (*lastp)->next; + } else { + (*lastp) = e->userhdrs = mutt_new_list (); + } + + (*lastp)->data = m_strdup(line); + if (do_2047) + rfc2047_decode(&(*lastp)->data); + } } +/*** XXX: MC READ MARK ***/ + + /* mutt_read_rfc822_header() -- parses a RFC822 header * @@ -1146,7 +1092,6 @@ ENVELOPE *mutt_read_rfc822_header (FILE * f, HEADER * hdr, short user_hdrs, char *line = p_new(char, LONG_STRING); char *p; off_t loc; - int matched; ssize_t linelen = LONG_STRING; char buf[LONG_STRING + 1]; @@ -1168,8 +1113,6 @@ ENVELOPE *mutt_read_rfc822_header (FILE * f, HEADER * hdr, short user_hdrs, while ((loc = ftello (f)), mutt_read_rfc822_line (f, &line, &linelen)) { - matched = 0; - if ((p = strpbrk (line, ": \t")) == NULL || *p != ':') { char return_path[LONG_STRING]; time_t t; @@ -1229,9 +1172,7 @@ ENVELOPE *mutt_read_rfc822_header (FILE * f, HEADER * hdr, short user_hdrs, if (!*p) continue; /* skip empty header fields */ - matched = - mutt_parse_rfc822_line (e, hdr, line, p, user_hdrs, weed, 1, &last); - + mutt_parse_rfc822_line (e, hdr, line, p, user_hdrs, weed, 1, &last); } p_delete(&line); diff --git a/protos.h b/protos.h index 14fd7ea..d87c1f3 100644 --- a/protos.h +++ b/protos.h @@ -243,9 +243,6 @@ int mutt_parse_mono (BUFFER *, BUFFER *, unsigned long, BUFFER *); int mutt_parse_unmono (BUFFER *, BUFFER *, unsigned long, BUFFER *); int mutt_parse_push (BUFFER *, BUFFER *, unsigned long, BUFFER *); int mutt_parse_rc_line ( /* const */ char *, BUFFER *, BUFFER *); -int mutt_parse_rfc822_line (ENVELOPE * e, HEADER * hdr, char *line, char *p, - short user_hdrs, short weed, short do_2047, - LIST ** lastp); int mutt_parse_score (BUFFER *, BUFFER *, unsigned long, BUFFER *); int mutt_parse_unscore (BUFFER *, BUFFER *, unsigned long, BUFFER *); int mutt_parse_unhook (BUFFER *, BUFFER *, unsigned long, BUFFER *);