From: Pierre Habouzit Date: Fri, 3 Nov 2006 15:46:24 +0000 (+0100) Subject: even better and simpler parser. X-Git-Url: http://git.madism.org/?p=apps%2Fmadmutt.git;a=commitdiff_plain;h=99a84f88c0acee0c46e9525684b5187d4192168b;ds=sidebyside even better and simpler parser. Signed-off-by: Pierre Habouzit --- diff --git a/lib-mime/rfc822.c b/lib-mime/rfc822.c index 3f3e654..d672891 100644 --- a/lib-mime/rfc822.c +++ b/lib-mime/rfc822.c @@ -84,45 +84,51 @@ address_t *address_list_dup(address_t *addr) } -static void rfc822_dequote_comment(char *s) -{ - char *w = s; +/****************************************************************************/ +/* Parsing functions */ +/****************************************************************************/ - for (; *s; s++) { - if (*s == '\\') { - /* if *++s is NUL that's an error, but we don't care */ - *w++ = *++s; - } else - if (*s != '\"') { - *w++ = *s; - } +typedef struct static_buf { + char buf[STRING]; + int len; +} static_buf; + +static inline void stbuf_append(static_buf *buf, int c) { + if (buf->len < ssizeof(buf->buf) - 1) { + buf->buf[buf->len++] = c; + buf->buf[buf->len] = '\0'; } - *w = 0; } +static inline void stbuf_append_sp(static_buf *buf) { + if (buf->len) + stbuf_append(buf, ' '); +} -/****************************************************************************/ -/* Parsing functions */ -/****************************************************************************/ - -struct rfc822_parse_ctx { - address_t *cur; +static char *rfc822_dequote_comment(static_buf *buf) +{ + char *res = p_new(char, buf->len + 1); + char *q = res; + char *p = buf->buf; + int i; - char comment[STRING]; - size_t commentlen; + for (i = 0; i < buf->len; i++) { + if (p[i] == '\"') + continue; - char phrase[STRING]; - size_t phraselen; -}; + if (p[i] == '\\') { + if (++i >= buf->len) /* should not happen */ + break; + } -#define is_special(x) strchr(RFC822Specials,x) -#define terminate_string(a, b, c) (a[MIN(b, c)] = 0) -#define terminate_buffer(a) terminate_string(a, a##len, sizeof (a) - 1) + *q++ = p[i]; + } + *q++ = '\0'; + return res; +} -static const char * -parse_comment(const char *s, char *comment, size_t *commentlen, - size_t commentmax) +static const char *parse_comment(const char *s, static_buf *buf) { int level = 1; @@ -146,70 +152,60 @@ parse_comment(const char *s, char *comment, size_t *commentlen, break; } - if (*commentlen < commentmax) - comment[(*commentlen)++] = *s; + stbuf_append(buf, *s); } return NULL; } -static const char * -parse_quote(const char *s, char *token, size_t *tokenlen, size_t tokenmax) +static const char *parse_quote(const char *s, static_buf *buf) { - if (*tokenlen < tokenmax) - token[(*tokenlen)++] = '"'; - for (; *s; s++) { - if (*tokenlen < tokenmax) - token[*tokenlen] = *s; - - if (*s == '"') { - (*tokenlen)++; + switch (*s) { + case '"': + stbuf_append(buf, *s); return s + 1; - } - if (*s == '\\') { + case '\\': if (!*++s) - break; + return NULL; + /* fallthrough */ - if (*tokenlen < tokenmax) - token[*tokenlen] = *s; + default: + stbuf_append(buf, *s); + break; } - (*tokenlen)++; } return NULL; } -static const char * -next_token(const char *s, char *token, size_t *tokenlen, size_t tokenmax) -{ - if (*s == '(') - return parse_comment(s + 1, token, tokenlen, tokenmax); +#define is_special(x) strchr(RFC822Specials,x) - if (*s == '"') - return parse_quote(s + 1, token, tokenlen, tokenmax); +static const char *next_phrase(const char *s, static_buf *buf) +{ + if (*s == '"') { + stbuf_append(buf, '"'); + return parse_quote(s + 1, buf); + } if (is_special(*s)) { - if (*tokenlen < tokenmax) - token[(*tokenlen)++] = *s; + stbuf_append(buf, *s); return s + 1; } while (*s) { if (ISSPACE(*s) || is_special(*s)) break; - if (*tokenlen < tokenmax) - token[(*tokenlen)++] = *s; - s++; + stbuf_append(buf, *s++); } + return s; } static const char * -parse_mailboxdomain(const char *s, const char *nonspecial, - char *mailbox, size_t *mailboxlen, size_t mailboxmax, - struct rfc822_parse_ctx *ctx) +parse_mailboxdomain(const char *s, const char *nonspecial, static_buf *mbox, + static_buf *comment) { while (*s) { s = skipspaces(s); @@ -218,11 +214,10 @@ parse_mailboxdomain(const char *s, const char *nonspecial, return s; if (*s == '(') { - if (ctx->commentlen && ctx->commentlen < sizeof(ctx->comment) - 1) - ctx->comment[ctx->commentlen++] = ' '; - s = next_token(s, ctx->comment, &ctx->commentlen, sizeof(ctx->comment) - 1); + stbuf_append_sp(comment); + s = parse_comment(s + 1, comment); } else { - s = next_token(s, mailbox, mailboxlen, mailboxmax); + s = next_phrase(s, mbox); } if (!s) @@ -233,80 +228,68 @@ parse_mailboxdomain(const char *s, const char *nonspecial, } static const char * -parse_address(const char *s, struct rfc822_parse_ctx *ctx) +parse_address(const char *s, static_buf *comment, address_t *cur) { - char token[STRING]; - size_t tokenlen = 0; + static_buf token = {"", 0}; - s = parse_mailboxdomain(s, ".\"(\\", - token, &tokenlen, sizeof(token) - 1, ctx); + s = parse_mailboxdomain(s, ".\"(\\", &token, comment); if (!s) return NULL; if (*s == '@') { - if (tokenlen < sizeof(token) - 1) - token[tokenlen++] = '@'; - s = parse_mailboxdomain(s + 1, ".([]\\", - token, &tokenlen, sizeof(token) - 1, ctx); + stbuf_append(&token, '@'); + s = parse_mailboxdomain(s + 1, ".([]\\", &token, comment); if (!s) return NULL; } - terminate_buffer(token); - ctx->cur->mailbox = m_strdup(token); + cur->mailbox = p_dupstr(token.buf, token.len); - if (ctx->commentlen && !ctx->cur->personal) { - terminate_buffer(ctx->comment); - ctx->cur->personal = m_strdup(ctx->comment); + if (comment->len && !cur->personal) { + cur->personal = p_dupstr(comment->buf, comment->len); } return s; } -address_t **add_addrspec(address_t **last, struct rfc822_parse_ctx *ctx) +address_t **rfc822_eotoken(address_t **last, static_buf *phrase, static_buf *comment) { - const char *s; + if (phrase->len) { + const char *s; + address_t *cur = address_new(); + + s = parse_address(phrase->buf, comment, cur); + if (s && *s && *s != ',' && *s != ';') { + address_delete(&cur); + return last; + } - ctx->cur = address_new(); - s = parse_address(ctx->phrase, ctx); - if (s && *s && *s != ',' && *s != ';') { - address_delete(&ctx->cur); - return last; + *last = cur; + return &(*last)->next; } - *last = ctx->cur; - return &(*last)->next; + return last; } address_t *rfc822_parse_adrlist(address_t *top, const char *s) { - struct rfc822_parse_ctx ctx = { NULL, "", 0, "", 0 }; - int ws_pending = 0; - address_t **last; + static_buf comment = {"", 0}; + static_buf phrase = {"", 0}; - last = address_list_last(&top); + address_t **last = address_list_last(&top); + int ws_pending = 0; for (;;) { ws_pending = ISSPACE(*s); s = skipspaces(s); switch (*s) { - case '\0': - if (ctx.phraselen) { - terminate_buffer(ctx.phrase); - terminate_buffer(ctx.comment); - last = add_addrspec(last, &ctx); - } else - if (ctx.commentlen && ctx.cur && !ctx.cur->personal) { - terminate_buffer(ctx.comment); - ctx.cur->personal = m_strdup(ctx.comment); - } - return top; + address_t *cur; default: - if (ctx.phraselen && ctx.phraselen < sizeof(ctx.phrase) - 1 && ws_pending) - ctx.phrase[ctx.phraselen++] = ' '; - s = next_token(s, ctx.phrase, &ctx.phraselen, sizeof(ctx.phrase) - 1); + if (ws_pending) + stbuf_append_sp(&phrase); + s = next_phrase(s, &phrase); if (!s) { address_delete(&top); return NULL; @@ -314,70 +297,57 @@ address_t *rfc822_parse_adrlist(address_t *top, const char *s) continue; case '(': - if (ctx.commentlen && ctx.commentlen < sizeof(ctx.comment) - 1) - ctx.comment[ctx.commentlen++] = ' '; - s = next_token(s, ctx.comment, &ctx.commentlen, sizeof(ctx.comment) - 1); + stbuf_append_sp(&comment); + s = parse_comment(s + 1, &comment); if (!s) { - address_delete (&top); + address_delete(&top); return NULL; } continue; - case ',': - if (ctx.phraselen) { - terminate_buffer(ctx.phrase); - last = add_addrspec(last, &ctx); - } else - if (ctx.commentlen && ctx.cur && !ctx.cur->personal) { - terminate_buffer(ctx.comment); - ctx.cur->personal = m_strdup(ctx.comment); + + case '<': + cur = address_new(); + if (phrase.len) { + /* if we get something like "Michael R. Elkins" remove the quotes */ + cur->personal = rfc822_dequote_comment(&phrase); } + + s = parse_address(skipspaces(s + 1), &comment, cur); + if (!s || *s != '>' || !cur->mailbox) { + address_delete(&top); + address_delete(&cur); + return NULL; + } + + *last = cur; + last = &(*last)->next; + break; + + case ',': + last = rfc822_eotoken(last, &phrase, &comment); break; - case ':': - terminate_buffer(ctx.phrase); + case ':': /* group start */ *last = address_new(); - (*last)->mailbox = m_strdup(ctx.phrase); + (*last)->mailbox = p_dupstr(phrase.buf, phrase.len); (*last)->group = 1; last = &(*last)->next; break; case ';': - if (ctx.phraselen) { - terminate_buffer(ctx.phrase); - last = add_addrspec(last, &ctx); - } else - if (ctx.commentlen && ctx.cur && !ctx.cur->personal) { - terminate_buffer(ctx.comment); - ctx.cur->personal = m_strdup(ctx.comment); - } - + last = rfc822_eotoken(last, &phrase, &comment); /* add group terminator */ *last = address_new(); - return top; - - case '<': - terminate_buffer(ctx.phrase); - ctx.cur = address_new (); - if (ctx.phraselen) { - /* if we get something like "Michael R. Elkins" remove the quotes */ - rfc822_dequote_comment(ctx.phrase); - ctx.cur->personal = m_strdup(ctx.phrase); - } - - s = parse_address(skipspaces(s + 1), &ctx); - if (!s || *s != '>' || !ctx.cur->mailbox) { - address_delete(&top); - address_delete(&ctx.cur); - return NULL; - } - - *last = ctx.cur; + last = &(*last)->next; break; + + case '\0': + last = rfc822_eotoken(last, &phrase, &comment); + return top; } - ctx.commentlen = 0; - ctx.phraselen = 0; + comment.len = phrase.len = 0; s++; }