From: Florent Bruneau Date: Fri, 3 Oct 2008 20:18:16 +0000 (+0200) Subject: Add prefix and suffix matching for strlist filter. X-Git-Url: http://git.madism.org/?a=commitdiff_plain;h=3df18edc2580a1cc3e95d427337e5afef042a83d;p=apps%2Fpfixtools.git Add prefix and suffix matching for strlist filter. Signed-off-by: Florent Bruneau --- diff --git a/common/trie.c b/common/trie.c index cb39cb2..f58bd30 100644 --- a/common/trie.c +++ b/common/trie.c @@ -111,7 +111,11 @@ static inline bool trie_entry_match(const trie_t *trie, static inline bool trie_entry_prefix(const trie_t *trie, const trie_entry_t *entry, const char *key) { - return !!(strncmp(array_ptr(trie->c, entry->c_offset), key, entry->c_len) == 0); + int len = entry->c_len; + if (len > 0 && array_elt(trie->c, entry->c_offset + len - 1) == '\0') { + --len; + } + return !!(strncmp(array_ptr(trie->c, entry->c_offset), key, len) == 0); } static inline bool trie_entry_is_leaf(const trie_entry_t *entry) @@ -366,6 +370,11 @@ static inline void trie_entry_inspect(const trie_t *trie, bool show_content, static int leaves = 0; static int depth_sum = 0; + if (entry == array_ptr(trie->entries, 0)) { + max_depth = 0; + leaves = 0; + depth_sum = 0; + } if (trie_entry_is_leaf(entry)) { if (level > max_depth) { max_depth = level; diff --git a/common/tst-trie.c b/common/tst-trie.c index 990b151..78977a8 100644 --- a/common/tst-trie.c +++ b/common/tst-trie.c @@ -129,7 +129,7 @@ int main(int argc, char *argv[]) */ if (argc > 1) { trie = create_trie_from_file(argv[1]); - trie_inspect(trie, false); + trie_inspect(trie, true); trie_delete(&trie); } return 0; diff --git a/example/postlicyd.conf b/example/postlicyd.conf index fded2bd..ca17b60 100644 --- a/example/postlicyd.conf +++ b/example/postlicyd.conf @@ -101,15 +101,22 @@ spamhaus_and_abuseat { # - strlist: match strings from the query against a list of list. # Parameters: -# - file: (no)?lock:(pre|suf)fix:weight:filename +# - file: (no)?lock:(partial-)?(pre|suf)fix:weight:filename # declare a file to load. If lock is given, the list is locked into the # RAM. Prefix/Suffix is a parameter to tell the matcher which is the most # efficient storage order. The strings are internally stored into a trie that # allow high compression if a lot of prefix are shared by several strings. If # you choose "prefix", string are stored in the natural order in memory and # prefix compression is performed. If you choose "suffix", strings are stored -# in reverse order in memory and suffix compression is performed. The weight -# is a number giving the weight of this list in the string score. +# in reverse order in memory and suffix compression is performed. If you add "partial-" +# to the match order, the entry will match if the file contains a prefix (resp. suffix) +# of the string. The weight is a number giving the weight of this list in the string score. +# e.g.: +# * a file that contains ".polytechnique.org" in "partial-suffix" mode will match +# all subdomains of "polytechnique.org". +# * a file that contains "postmaster@" in "partial-prefix" mode will match all +# postmaster emails. +# * a file open without "partial-" modifier match exact strings. # - soft_threshold: score (default: 1) # minimum score to match the soft_match return value # - hard_threshold: score (default: 1) @@ -140,7 +147,7 @@ client_whitelist { type = strlist; # configuration - file = lock:1:/var/spool/postlicyd/client_whitelist; + file = lock:1:suffix:/var/spool/postlicyd/client_whitelist; fields = client_name; # hooks diff --git a/postlicyd/data/test.conf b/postlicyd/data/test.conf index 6e0a7a0..8f4c9a0 100644 --- a/postlicyd/data/test.conf +++ b/postlicyd/data/test.conf @@ -55,6 +55,17 @@ hostnames2 { on_fail = postfix:OK; } +hostnames3 { + type = strlist; + + fields = client_name; + file = nolock:partial-suffix:1:data/test_hostnames_4; + + on_hard_match = postfix:OK; + on_fail = postfix:OK; +} + + emails1 { type = strlist; @@ -87,6 +98,16 @@ emails2 { on_fail = postfix:ok; } +emails3 { + type = strlist; + + fields = sender; + file = nolock:partial-prefix:1:data/test_emails_4; + + on_hard_match = postfix:ok; + on_fail = postfix:ok; +} + ips1 { type = iplist; diff --git a/postlicyd/data/testcase_1 b/postlicyd/data/testcase_1 index a562889..4356694 100644 --- a/postlicyd/data/testcase_1 +++ b/postlicyd/data/testcase_1 @@ -27,6 +27,8 @@ match1=match match2=fail hostnames1=fail hostnames2=fail +hostnames3=fail emails1=fail emails2=fail +emails3=fail ips1=hard_match diff --git a/postlicyd/data/testcase_2 b/postlicyd/data/testcase_2 index 55fdab7..674df87 100644 --- a/postlicyd/data/testcase_2 +++ b/postlicyd/data/testcase_2 @@ -3,7 +3,7 @@ protocol_state=RCPT protocol_name=SMTP helo_name=example.org queue_id=8045F2AB23 -sender=contact@example.com +sender=postmaster@example.com recipient=contact@example.org recipient_count=0 client_address=1.2.3.4 @@ -27,6 +27,8 @@ match1=fail match2=match hostnames1=soft_match hostnames2=hard_match +hostnames3=hard_match emails1=soft_match emails2=hard_match +emails3=hard_match ips1=hard_match diff --git a/postlicyd/data/testcase_3 b/postlicyd/data/testcase_3 index efe3754..c6c2670 100644 --- a/postlicyd/data/testcase_3 +++ b/postlicyd/data/testcase_3 @@ -27,6 +27,8 @@ match1=fail match2=match hostnames1=fail hostnames2=hard_match +hostnames3=hard_match emails1=fail emails2=fail +emails3=fail ips1=error diff --git a/postlicyd/data/testcase_4 b/postlicyd/data/testcase_4 index beed42d..2ab60ee 100644 --- a/postlicyd/data/testcase_4 +++ b/postlicyd/data/testcase_4 @@ -27,6 +27,8 @@ match1=fail match2=fail hostnames1=fail hostnames2=hard_match +hostnames3=hard_match emails1=fail emails2=fail +emails3=fail ips1=error diff --git a/postlicyd/data/testcase_5 b/postlicyd/data/testcase_5 index 9b96b98..d28b7f4 100644 --- a/postlicyd/data/testcase_5 +++ b/postlicyd/data/testcase_5 @@ -27,6 +27,8 @@ match1=fail match2=match hostnames1=fail hostnames2=hard_match +hostnames3=hard_match emails1=fail emails2=fail +emails3=fail ips1=fail diff --git a/postlicyd/data/testcase_6 b/postlicyd/data/testcase_6 index 45213d7..616b315 100644 --- a/postlicyd/data/testcase_6 +++ b/postlicyd/data/testcase_6 @@ -3,7 +3,7 @@ protocol_state=RCPT protocol_name=SMTP helo_name=tata.example.org queue_id=8045F2AB23 -sender=contact@exemple.com +sender=postmaster@exemple.com recipient=contact@exemple.org recipient_count=0 client_address=2.3.4.5 @@ -27,6 +27,8 @@ match1=fail match2=fail hostnames1=fail hostnames2=hard_match +hostnames3=hard_match emails1=fail emails2=fail +emails3=hard_match ips1=soft_match diff --git a/postlicyd/strlist.c b/postlicyd/strlist.c index 875ee23..4ca2057 100644 --- a/postlicyd/strlist.c +++ b/postlicyd/strlist.c @@ -43,6 +43,7 @@ typedef struct strlist_config_t { PA(trie_t) tries; A(int) weights; A(bool) reverses; + A(bool) partiales; int soft_threshold; int hard_threshold; @@ -69,6 +70,7 @@ static void strlist_config_delete(strlist_config_t **config) array_deep_wipe((*config)->tries, trie_delete); array_wipe((*config)->weights); array_wipe((*config)->reverses); + array_wipe((*config)->partiales); p_delete(config); } } @@ -177,6 +179,7 @@ static bool strlist_filter_constructor(filter_t *filter) bool lock = false; int weight = 0; bool reverse = false; + bool partial = false; trie_t *trie = NULL; const char *current = param->value; const char *p = m_strchrnul(param->value, ':'); @@ -198,6 +201,11 @@ static bool strlist_filter_constructor(filter_t *filter) break; case 1: + if (p - current > (ssize_t)strlen("partial-") + && strncmp(current, "partial-", strlen("partial-")) == 0) { + partial = true; + current += strlen("partial-"); + } if ((p - current) == 6 && strncmp(current, "suffix", 6) == 0) { reverse = true; } else if ((p - current) == 6 && strncmp(current, "prefix", 6) == 0) { @@ -222,6 +230,7 @@ static bool strlist_filter_constructor(filter_t *filter) array_add(config->tries, trie); array_add(config->weights, weight); array_add(config->reverses, reverse); + array_add(config->partiales, partial); break; } if (i != 3) { @@ -326,8 +335,10 @@ static filter_result_t strlist_filter(const filter_t *filter, const query_t *que for (uint32_t i = 0 ; i < config->tries.len ; ++i) { \ const int weight = array_elt(config->weights, i); \ const trie_t *trie = array_elt(config->tries, i); \ - const bool rev = array_elt(config->reverses, i); \ - if (trie_lookup(trie, rev ? reverse : normal)) { \ + const bool rev = array_elt(config->reverses, i); \ + const bool part = array_elt(config->partiales, i); \ + if ((!part && trie_lookup(trie, rev ? reverse : normal)) \ + || (part && trie_prefix(trie, rev ? reverse : normal))) { \ sum += weight; \ } \ } \