Cleverer hard_match detection.
[apps/pfixtools.git] / postlicyd / strlist.c
1 /******************************************************************************/
2 /*          pfixtools: a collection of postfix related tools                  */
3 /*          ~~~~~~~~~                                                         */
4 /*  ________________________________________________________________________  */
5 /*                                                                            */
6 /*  Redistribution and use in source and binary forms, with or without        */
7 /*  modification, are permitted provided that the following conditions        */
8 /*  are met:                                                                  */
9 /*                                                                            */
10 /*  1. Redistributions of source code must retain the above copyright         */
11 /*     notice, this list of conditions and the following disclaimer.          */
12 /*  2. Redistributions in binary form must reproduce the above copyright      */
13 /*     notice, this list of conditions and the following disclaimer in the    */
14 /*     documentation and/or other materials provided with the distribution.   */
15 /*  3. The names of its contributors may not be used to endorse or promote    */
16 /*     products derived from this software without specific prior written     */
17 /*     permission.                                                            */
18 /*                                                                            */
19 /*  THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND   */
20 /*  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE     */
21 /*  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR        */
22 /*  PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS    */
23 /*  BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR    */
24 /*  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF      */
25 /*  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS  */
26 /*  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN   */
27 /*  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)   */
28 /*  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF    */
29 /*  THE POSSIBILITY OF SUCH DAMAGE.                                           */
30 /******************************************************************************/
31
32 /*
33  * Copyright © 2008 Florent Bruneau
34  */
35
36 #include "filter.h"
37 #include "trie.h"
38 #include "file.h"
39 #include "str.h"
40 #include "policy_tokens.h"
41
42 typedef struct strlist_config_t {
43     PA(trie_t) tries;
44     A(int)     weights;
45     A(bool)    reverses;
46     A(bool)    partiales;
47
48     int soft_threshold;
49     int hard_threshold;
50
51     unsigned is_email         :1;
52     unsigned match_sender     :1;
53     unsigned match_recipient  :1;
54
55     unsigned is_hostname      :1;
56     unsigned match_helo       :1;
57     unsigned match_client     :1;
58     unsigned match_reverse    :1;
59 } strlist_config_t;
60
61
62 static strlist_config_t *strlist_config_new(void)
63 {
64     return p_new(strlist_config_t, 1);
65 }
66
67 static void strlist_config_delete(strlist_config_t **config)
68 {
69     if (*config) {
70         array_deep_wipe((*config)->tries, trie_delete);
71         array_wipe((*config)->weights);
72         array_wipe((*config)->reverses);
73         array_wipe((*config)->partiales);
74         p_delete(config);
75     }
76 }
77
78 static inline void strlist_copy(char *dest, const char *str, ssize_t str_len,
79                                 bool reverse)
80 {
81     if (str_len > 0) {
82         if (reverse) {
83             for (const char *src = str + str_len - 1 ; src >= str ; --src) {
84                 *dest = ascii_tolower(*src);
85                 ++dest;
86             }
87         } else {
88             for (int i = 0 ; i < str_len ; ++i) {
89                 *dest = ascii_tolower(str[i]);
90                 ++dest;
91             }
92         }
93     }
94     *dest = '\0';
95 }
96
97
98 static trie_t *strlist_create(const char *file, bool reverse, bool lock)
99 {
100     trie_t *db;
101     file_map_t map;
102     const char *p, *end;
103     char line[BUFSIZ];
104
105     if (!file_map_open(&map, file, false)) {
106         return NULL;
107     }
108     p   = map.map;
109     end = map.end;
110     while (end > p && end[-1] != '\n') {
111         --end;
112     }
113     if (end != map.end) {
114         warn("file %s miss a final \\n, ignoring last line",
115              file);
116     }
117
118     db = trie_new();
119     while (p < end && p != NULL) {
120         const char *eol = (char *)memchr(p, '\n', end - p);
121         if (eol == NULL) {
122             eol = end;
123         }
124         if (eol - p >= BUFSIZ) {
125             err("unreasonnable long line");
126             file_map_close(&map);
127             trie_delete(&db);
128             return NULL;
129         }
130         if (*p != '#') {
131             const char *eos = eol;
132             while (p < eos && isspace(*p)) {
133                 ++p;
134             }
135             while (p < eos && isspace(eos[-1])) {
136                 --eos;
137             }
138             if (p < eos) {
139                 strlist_copy(line, p, eos - p, reverse);
140                 trie_insert(db, line);
141             }
142         }
143         p = eol + 1;
144     }
145     file_map_close(&map);
146     trie_compile(db, lock);
147     return db;
148 }
149
150
151 static bool strlist_filter_constructor(filter_t *filter)
152 {
153     strlist_config_t *config = strlist_config_new();
154
155 #define PARSE_CHECK(Expr, Str, ...)                                            \
156     if (!(Expr)) {                                                             \
157         err(Str, ##__VA_ARGS__);                                               \
158         strlist_config_delete(&config);                                        \
159         return false;                                                          \
160     }
161
162     config->hard_threshold = 1;
163     config->soft_threshold = 1;
164     foreach (filter_param_t *param, filter->params) {
165         switch (param->type) {
166           /* file parameter is:
167            *  [no]lock:(prefix|suffix):weight:filename
168            *  valid options are:
169            *    - lock:   memlock the database in memory.
170            *    - nolock: don't memlock the database in memory.
171            *    - prefix: perform "prefix" compression on storage.
172            *    - suffix  perform "suffix" compression on storage.
173            *    - \d+:    a number describing the weight to give to the match
174            *              the given list [mandatory]
175            *  the file pointed by filename MUST be a valid string list (one string per
176            *  line, empty lines and lines beginning with a '#' are ignored).
177            */
178           case ATK_FILE: {
179             bool lock = false;
180             int  weight = 0;
181             bool reverse = false;
182             bool partial = false;
183             trie_t *trie = NULL;
184             const char *current = param->value;
185             const char *p = m_strchrnul(param->value, ':');
186             char *next = NULL;
187             for (int i = 0 ; i < 4 ; ++i) {
188                 PARSE_CHECK(i == 3 || *p,
189                             "file parameter must contains a locking state "
190                             "and a weight option");
191                 switch (i) {
192                   case 0:
193                     if ((p - current) == 4 && strncmp(current, "lock", 4) == 0) {
194                         lock = true;
195                     } else if ((p - current) == 6 && strncmp(current, "nolock", 6) == 0) {
196                         lock = false;
197                     } else {
198                         PARSE_CHECK(false, "illegal locking state %.*s",
199                                     p - current, current);
200                     }
201                     break;
202
203                   case 1:
204                     if (p - current > (ssize_t)strlen("partial-") 
205                         && strncmp(current, "partial-", strlen("partial-")) == 0) {
206                         partial = true;
207                         current += strlen("partial-");
208                     }
209                     if ((p - current) == 6 && strncmp(current, "suffix", 6) == 0) {
210                         reverse = true;
211                     } else if ((p - current) == 6 && strncmp(current, "prefix", 6) == 0) {
212                         reverse = false;
213                     } else {
214                         PARSE_CHECK(false, "illegal character order value %.*s",
215                                     p - current, current);
216                     }
217                     break;
218
219                   case 2:
220                     weight = strtol(current, &next, 10);
221                     PARSE_CHECK(next == p && weight >= 0 && weight <= 1024,
222                                 "illegal weight value %.*s",
223                                 (p - current), current);
224                     break;
225
226                   case 3:
227                     trie = strlist_create(current, reverse, lock);
228                     PARSE_CHECK(trie != NULL,
229                                 "cannot load string list from %s", current);
230                     array_add(config->tries, trie);
231                     array_add(config->weights, weight);
232                     array_add(config->reverses, reverse);
233                     array_add(config->partiales, partial);
234                     break;
235                 }
236                 if (i != 3) {
237                     current = p + 1;
238                     p = m_strchrnul(current, ':');
239                 }
240             }
241           } break;
242
243           /* hard_threshold parameter is an integer.
244            *  If the matching score is greater or equal than this threshold,
245            *  the hook "hard_match" is called.
246            * hard_threshold = 1 means, that all matches are hard matches.
247            * default is 1;
248            */
249           FILTER_PARAM_PARSE_INT(HARD_THRESHOLD, config->hard_threshold);
250
251           /* soft_threshold parameter is an integer.
252            *  if the matching score is greater or equal than this threshold
253            *  and smaller or equal than the hard_threshold, the hook "soft_match"
254            *  is called.
255            * default is 1;
256            */
257           FILTER_PARAM_PARSE_INT(SOFT_THRESHOLD, config->soft_threshold);
258
259           /* fields to match againes:
260            *  fields = field_name(,field_name)*
261            *  field_names are
262            *    - hostname: helo_name,client_name,reverse_client_name
263            *    - email: sender,recipient
264            */
265           case ATK_FIELDS: {
266             const char *current = param->value;
267             const char *p = m_strchrnul(param->value, ',');
268             do {
269                 postlicyd_token tok = policy_tokenize(current, p - current);
270                 switch (tok) {
271 #define           CASE(Up, Low, Type)                                          \
272                   case PTK_ ## Up:                                             \
273                     config->match_ ## Low = true;                              \
274                     config->is_ ## Type = true;                                \
275                     break
276                   CASE(HELO_NAME, helo, hostname);
277                   CASE(CLIENT_NAME, client, hostname);
278                   CASE(REVERSE_CLIENT_NAME, reverse, hostname);
279                   CASE(SENDER, sender, email);
280                   CASE(RECIPIENT, recipient, email);
281 #undef CASE
282                   default:
283                     PARSE_CHECK(false, "unknown field %.*s", p - current, current);
284                     break;
285                 }
286                 if (!*p) {
287                     break;
288                 }
289                 current = p + 1;
290                 p = m_strchrnul(current, ',');
291             } while (true);
292           } break;
293
294           default: break;
295         }
296     }}
297
298     PARSE_CHECK(config->is_email != config->is_hostname,
299                 "matched field MUST be emails XOR hostnames");
300     PARSE_CHECK(config->tries.len,
301                 "no file parameter in the filter %s", filter->name);
302     filter->data = config;
303     return true;
304 }
305
306 static void strlist_filter_destructor(filter_t *filter)
307 {
308     strlist_config_t *config = filter->data;
309     strlist_config_delete(&config);
310     filter->data = config;
311 }
312
313 static filter_result_t strlist_filter(const filter_t *filter, const query_t *query)
314 {
315     char reverse[BUFSIZ];
316     char normal[BUFSIZ];
317     const strlist_config_t *config = filter->data;
318     int sum = 0;
319     if (config->is_email && 
320         ((config->match_sender && query->state < SMTP_MAIL)
321         || (config->match_recipient && query->state != SMTP_RCPT))) {
322         warn("trying to match an email against a field that is not "
323              "available in current protocol state");
324         return HTK_ABORT;
325     } else if (config->is_hostname && config->match_helo && query->state < SMTP_HELO) {
326         warn("trying to match hostname against helo before helo is received");
327         return HTK_ABORT;
328     }
329 #define LOOKUP(Flag, Field)                                                    \
330     if (config->match_ ## Flag) {                                              \
331         const int len = m_strlen(query->Field);                                \
332         strlist_copy(normal, query->Field, len, false);                        \
333         strlist_copy(reverse, query->Field, len, true);                        \
334         for (uint32_t i = 0 ; i < config->tries.len ; ++i) {                   \
335             const int weight   = array_elt(config->weights, i);                \
336             const trie_t *trie = array_elt(config->tries, i);                  \
337             const bool rev     = array_elt(config->reverses, i);               \
338             const bool part    = array_elt(config->partiales, i);              \
339             if ((!part && trie_lookup(trie, rev ? reverse : normal))           \
340                 || (part && trie_prefix(trie, rev ? reverse : normal))) {      \
341                 sum += weight;                                                 \
342                 if (sum >= config->hard_threshold) {                           \
343                     return HTK_HARD_MATCH;                                     \
344                 }                                                              \
345             }                                                                  \
346         }                                                                      \
347     }
348     if (config->is_email) {
349         LOOKUP(sender, sender);
350         LOOKUP(recipient, recipient);
351     } else if (config->is_hostname) {
352         LOOKUP(helo, helo_name);
353         LOOKUP(client, client_name);
354         LOOKUP(reverse, reverse_client_name);
355     }
356 #undef  LOOKUP
357     if (sum >= config->hard_threshold) {
358         return HTK_HARD_MATCH;
359     } else if (sum >= config->soft_threshold) {
360         return HTK_SOFT_MATCH;
361     } else {
362         return HTK_FAIL;
363     }
364 }
365
366 static int strlist_init(void)
367 {
368     filter_type_t type =  filter_register("strlist", strlist_filter_constructor,
369                                           strlist_filter_destructor, strlist_filter);
370     /* Hooks.
371      */
372     (void)filter_hook_register(type, "abort");
373     (void)filter_hook_register(type, "error");
374     (void)filter_hook_register(type, "fail");
375     (void)filter_hook_register(type, "hard_match");
376     (void)filter_hook_register(type, "soft_match");
377
378     /* Parameters.
379      */
380     (void)filter_param_register(type, "file");
381     (void)filter_param_register(type, "hard_threshold");
382     (void)filter_param_register(type, "soft_threshold");
383     (void)filter_param_register(type, "fields");
384     return 0;
385 }
386 module_init(strlist_init);