From: Pierre Habouzit <madcoder@debian.org>
Date: Fri, 3 Nov 2006 15:46:24 +0000 (+0100)
Subject: even better and simpler parser.
X-Git-Url: http://git.madism.org/?p=apps%2Fmadmutt.git;a=commitdiff_plain;h=99a84f88c0acee0c46e9525684b5187d4192168b;ds=sidebyside

even better and simpler parser.

Signed-off-by: Pierre Habouzit <madcoder@debian.org>
---

diff --git a/lib-mime/rfc822.c b/lib-mime/rfc822.c
index 3f3e654..d672891 100644
--- a/lib-mime/rfc822.c
+++ b/lib-mime/rfc822.c
@@ -84,45 +84,51 @@ address_t *address_list_dup(address_t *addr)
 }
 
 
-static void rfc822_dequote_comment(char *s)
-{
-    char *w = s;
+/****************************************************************************/
+/* Parsing functions                                                        */
+/****************************************************************************/
 
-    for (; *s; s++) {
-        if (*s == '\\') {
-            /* if *++s is NUL that's an error, but we don't care */
-            *w++ = *++s;
-        } else
-        if (*s != '\"') {
-            *w++ = *s;
-        }
+typedef struct static_buf {
+    char buf[STRING];
+    int  len;
+} static_buf;
+
+static inline void stbuf_append(static_buf *buf, int c) {
+    if (buf->len < ssizeof(buf->buf) - 1) {
+        buf->buf[buf->len++] = c;
+        buf->buf[buf->len]   = '\0';
     }
-    *w = 0;
 }
 
+static inline void stbuf_append_sp(static_buf *buf) {
+    if (buf->len)
+        stbuf_append(buf, ' ');
+}
 
-/****************************************************************************/
-/* Parsing functions                                                        */
-/****************************************************************************/
-
-struct rfc822_parse_ctx {
-    address_t *cur;
+static char *rfc822_dequote_comment(static_buf *buf)
+{
+    char *res = p_new(char, buf->len + 1);
+    char *q   = res;
+    char *p   = buf->buf;
+    int i;
 
-    char comment[STRING];
-    size_t commentlen;
+    for (i = 0; i < buf->len; i++) {
+        if (p[i] == '\"')
+            continue;
 
-    char phrase[STRING];
-    size_t phraselen;
-};
+        if (p[i] == '\\') {
+            if (++i >= buf->len) /* should not happen */
+                break;
+        }
 
-#define is_special(x) strchr(RFC822Specials,x)
-#define terminate_string(a, b, c)  (a[MIN(b, c)] = 0)
-#define terminate_buffer(a)        terminate_string(a, a##len, sizeof (a) - 1)
+        *q++ = p[i];
+    }
 
+    *q++ = '\0';
+    return res;
+}
 
-static const char *
-parse_comment(const char *s, char *comment, size_t *commentlen,
-              size_t commentmax)
+static const char *parse_comment(const char *s, static_buf *buf)
 {
     int level = 1;
 
@@ -146,70 +152,60 @@ parse_comment(const char *s, char *comment, size_t *commentlen,
             break;
         }
 
-        if (*commentlen < commentmax)
-            comment[(*commentlen)++] = *s;
+        stbuf_append(buf, *s);
     }
 
     return NULL;
 }
 
-static const char *
-parse_quote(const char *s, char *token, size_t *tokenlen, size_t tokenmax)
+static const char *parse_quote(const char *s, static_buf *buf)
 {
-    if (*tokenlen < tokenmax)
-        token[(*tokenlen)++] = '"';
-
     for (; *s; s++) {
-        if (*tokenlen < tokenmax)
-            token[*tokenlen] = *s;
-
-        if (*s == '"') {
-            (*tokenlen)++;
+        switch (*s) {
+          case '"':
+            stbuf_append(buf, *s);
             return s + 1;
-        }
 
-        if (*s == '\\') {
+          case '\\':
             if (!*++s)
-                break;
+                return NULL;
+            /* fallthrough */
 
-            if (*tokenlen < tokenmax)
-                token[*tokenlen] = *s;
+          default:
+            stbuf_append(buf, *s);
+            break;
         }
-        (*tokenlen)++;
     }
 
     return NULL;
 }
 
-static const char *
-next_token(const char *s, char *token, size_t *tokenlen, size_t tokenmax)
-{
-    if (*s == '(')
-        return parse_comment(s + 1, token, tokenlen, tokenmax);
+#define is_special(x)         strchr(RFC822Specials,x)
 
-    if (*s == '"')
-        return parse_quote(s + 1, token, tokenlen, tokenmax);
+static const char *next_phrase(const char *s, static_buf *buf)
+{
+    if (*s == '"') {
+        stbuf_append(buf, '"');
+        return parse_quote(s + 1, buf);
+    }
 
     if (is_special(*s)) {
-        if (*tokenlen < tokenmax)
-            token[(*tokenlen)++] = *s;
+        stbuf_append(buf, *s);
         return s + 1;
     }
 
     while (*s) {
         if (ISSPACE(*s) || is_special(*s))
             break;
-        if (*tokenlen < tokenmax)
-            token[(*tokenlen)++] = *s;
-        s++;
+        stbuf_append(buf, *s++);
     }
+
     return s;
 }
 
 static const char *
-parse_mailboxdomain(const char *s, const char *nonspecial,
-                    char *mailbox, size_t *mailboxlen, size_t mailboxmax,
-                    struct rfc822_parse_ctx *ctx)
+parse_mailboxdomain(const char *s, const char *nonspecial, static_buf *mbox,
+                    static_buf *comment)
 {
     while (*s) {
         s = skipspaces(s);
@@ -218,11 +214,10 @@ parse_mailboxdomain(const char *s, const char *nonspecial,
             return s;
 
         if (*s == '(') {
-            if (ctx->commentlen && ctx->commentlen < sizeof(ctx->comment) - 1)
-                ctx->comment[ctx->commentlen++] = ' ';
-            s = next_token(s, ctx->comment, &ctx->commentlen, sizeof(ctx->comment) - 1);
+            stbuf_append_sp(comment);
+            s = parse_comment(s + 1, comment);
         } else {
-            s = next_token(s, mailbox, mailboxlen, mailboxmax);
+            s = next_phrase(s, mbox);
         }
 
         if (!s)
@@ -233,80 +228,68 @@ parse_mailboxdomain(const char *s, const char *nonspecial,
 }
 
 static const char *
-parse_address(const char *s, struct rfc822_parse_ctx *ctx)
+parse_address(const char *s, static_buf *comment, address_t *cur)
 {
-    char token[STRING];
-    size_t tokenlen = 0;
+    static_buf token = {"", 0};
 
-    s = parse_mailboxdomain(s, ".\"(\\",
-                            token, &tokenlen, sizeof(token) - 1, ctx);
+    s = parse_mailboxdomain(s, ".\"(\\", &token, comment);
     if (!s)
         return NULL;
 
     if (*s == '@') {
-        if (tokenlen < sizeof(token) - 1)
-            token[tokenlen++] = '@';
-        s = parse_mailboxdomain(s + 1, ".([]\\",
-                                token, &tokenlen, sizeof(token) - 1, ctx);
+        stbuf_append(&token, '@');
+        s = parse_mailboxdomain(s + 1, ".([]\\", &token, comment);
         if (!s)
             return NULL;
     }
 
-    terminate_buffer(token);
-    ctx->cur->mailbox = m_strdup(token);
+    cur->mailbox = p_dupstr(token.buf, token.len);
 
-    if (ctx->commentlen && !ctx->cur->personal) {
-        terminate_buffer(ctx->comment);
-        ctx->cur->personal = m_strdup(ctx->comment);
+    if (comment->len && !cur->personal) {
+        cur->personal = p_dupstr(comment->buf, comment->len);
     }
 
     return s;
 }
 
-address_t **add_addrspec(address_t **last, struct rfc822_parse_ctx *ctx)
+address_t **rfc822_eotoken(address_t **last, static_buf *phrase, static_buf *comment)
 {
-    const char *s;
+    if (phrase->len) {
+        const char *s;
+        address_t *cur = address_new();
+
+        s = parse_address(phrase->buf, comment, cur);
+        if (s && *s && *s != ',' && *s != ';') {
+            address_delete(&cur);
+            return last;
+        }
 
-    ctx->cur = address_new();
-    s = parse_address(ctx->phrase, ctx);
-    if (s && *s && *s != ',' && *s != ';') {
-        address_delete(&ctx->cur);
-        return last;
+        *last = cur;
+        return &(*last)->next;
     }
 
-    *last = ctx->cur;
-    return &(*last)->next;
+    return last;
 }
 
 address_t *rfc822_parse_adrlist(address_t *top, const char *s)
 {
-    struct rfc822_parse_ctx ctx = { NULL, "", 0, "", 0 };
-    int ws_pending = 0;
-    address_t **last;
+    static_buf comment = {"", 0};
+    static_buf phrase  = {"", 0};
 
-    last = address_list_last(&top);
+    address_t **last = address_list_last(&top);
+    int ws_pending = 0;
 
     for (;;) {
         ws_pending = ISSPACE(*s);
         s = skipspaces(s);
 
         switch (*s) {
-          case '\0':
-            if (ctx.phraselen) {
-                terminate_buffer(ctx.phrase);
-                terminate_buffer(ctx.comment);
-                last = add_addrspec(last, &ctx);
-            } else
-            if (ctx.commentlen && ctx.cur && !ctx.cur->personal) {
-                terminate_buffer(ctx.comment);
-                ctx.cur->personal = m_strdup(ctx.comment);
-            }
-            return top;
+            address_t *cur;
 
           default:
-            if (ctx.phraselen && ctx.phraselen < sizeof(ctx.phrase) - 1 && ws_pending)
-                ctx.phrase[ctx.phraselen++] = ' ';
-            s = next_token(s, ctx.phrase, &ctx.phraselen, sizeof(ctx.phrase) - 1);
+            if (ws_pending)
+                stbuf_append_sp(&phrase);
+            s = next_phrase(s, &phrase);
             if (!s) {
                 address_delete(&top);
                 return NULL;
@@ -314,70 +297,57 @@ address_t *rfc822_parse_adrlist(address_t *top, const char *s)
             continue;
 
           case '(':
-            if (ctx.commentlen && ctx.commentlen < sizeof(ctx.comment) - 1)
-                ctx.comment[ctx.commentlen++] = ' ';
-            s = next_token(s, ctx.comment, &ctx.commentlen, sizeof(ctx.comment) - 1);
+            stbuf_append_sp(&comment);
+            s = parse_comment(s + 1, &comment);
             if (!s) {
-                address_delete (&top);
+                address_delete(&top);
                 return NULL;
             }
             continue;
 
-          case ',':
-            if (ctx.phraselen) {
-                terminate_buffer(ctx.phrase);
-                last = add_addrspec(last, &ctx);
-            } else
-            if (ctx.commentlen && ctx.cur && !ctx.cur->personal) {
-                terminate_buffer(ctx.comment);
-                ctx.cur->personal = m_strdup(ctx.comment);
+
+          case '<':
+            cur = address_new();
+            if (phrase.len) {
+                /* if we get something like "Michael R. Elkins" remove the quotes */
+                cur->personal = rfc822_dequote_comment(&phrase);
             }
+
+            s = parse_address(skipspaces(s + 1), &comment, cur);
+            if (!s || *s != '>' || !cur->mailbox) {
+                address_delete(&top);
+                address_delete(&cur);
+                return NULL;
+            }
+
+            *last = cur;
+            last = &(*last)->next;
+            break;
+
+          case ',':
+            last = rfc822_eotoken(last, &phrase, &comment);
             break;
 
-          case ':':
-            terminate_buffer(ctx.phrase);
+          case ':': /* group start */
             *last = address_new();
-            (*last)->mailbox = m_strdup(ctx.phrase);
+            (*last)->mailbox = p_dupstr(phrase.buf, phrase.len);
             (*last)->group = 1;
             last = &(*last)->next;
             break;
 
           case ';':
-            if (ctx.phraselen) {
-                terminate_buffer(ctx.phrase);
-                last = add_addrspec(last, &ctx);
-            } else
-            if (ctx.commentlen && ctx.cur && !ctx.cur->personal) {
-                terminate_buffer(ctx.comment);
-                ctx.cur->personal = m_strdup(ctx.comment);
-            }
-
+            last = rfc822_eotoken(last, &phrase, &comment);
             /* add group terminator */
             *last = address_new();
-            return top;
-
-          case '<':
-            terminate_buffer(ctx.phrase);
-            ctx.cur = address_new ();
-            if (ctx.phraselen) {
-                /* if we get something like "Michael R. Elkins" remove the quotes */
-                rfc822_dequote_comment(ctx.phrase);
-                ctx.cur->personal = m_strdup(ctx.phrase);
-            }
-
-            s = parse_address(skipspaces(s + 1), &ctx);
-            if (!s || *s != '>' || !ctx.cur->mailbox) {
-                address_delete(&top);
-                address_delete(&ctx.cur);
-                return NULL;
-            }
-
-            *last = ctx.cur;
+            last = &(*last)->next;
             break;
+
+          case '\0':
+            last = rfc822_eotoken(last, &phrase, &comment);
+            return top;
         }
 
-        ctx.commentlen = 0;
-        ctx.phraselen = 0;
+        comment.len = phrase.len  = 0;
         s++;
     }