Add basic support for rbldnsd zone files.
[apps/pfixtools.git] / postlicyd / strlist.c
1 /******************************************************************************/
2 /*          pfixtools: a collection of postfix related tools                  */
3 /*          ~~~~~~~~~                                                         */
4 /*  ________________________________________________________________________  */
5 /*                                                                            */
6 /*  Redistribution and use in source and binary forms, with or without        */
7 /*  modification, are permitted provided that the following conditions        */
8 /*  are met:                                                                  */
9 /*                                                                            */
10 /*  1. Redistributions of source code must retain the above copyright         */
11 /*     notice, this list of conditions and the following disclaimer.          */
12 /*  2. Redistributions in binary form must reproduce the above copyright      */
13 /*     notice, this list of conditions and the following disclaimer in the    */
14 /*     documentation and/or other materials provided with the distribution.   */
15 /*  3. The names of its contributors may not be used to endorse or promote    */
16 /*     products derived from this software without specific prior written     */
17 /*     permission.                                                            */
18 /*                                                                            */
19 /*  THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND   */
20 /*  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE     */
21 /*  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR        */
22 /*  PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS    */
23 /*  BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR    */
24 /*  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF      */
25 /*  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS  */
26 /*  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN   */
27 /*  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)   */
28 /*  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF    */
29 /*  THE POSSIBILITY OF SUCH DAMAGE.                                           */
30 /******************************************************************************/
31
32 /*
33  * Copyright © 2008 Florent Bruneau
34  */
35
36 #include "filter.h"
37 #include "trie.h"
38 #include "file.h"
39 #include "str.h"
40 #include "policy_tokens.h"
41
42 typedef struct strlist_config_t {
43     PA(trie_t) tries;
44     A(int)     weights;
45     A(bool)    reverses;
46     A(bool)    partiales;
47
48     int soft_threshold;
49     int hard_threshold;
50
51     unsigned is_email         :1;
52     unsigned is_hostname      :1;
53
54     unsigned match_sender     :1;
55     unsigned match_recipient  :1;
56
57     unsigned match_helo       :1;
58     unsigned match_client     :1;
59     unsigned match_reverse    :1;
60 } strlist_config_t;
61
62
63 static strlist_config_t *strlist_config_new(void)
64 {
65     return p_new(strlist_config_t, 1);
66 }
67
68 static void strlist_config_delete(strlist_config_t **config)
69 {
70     if (*config) {
71         array_deep_wipe((*config)->tries, trie_delete);
72         array_wipe((*config)->weights);
73         array_wipe((*config)->reverses);
74         array_wipe((*config)->partiales);
75         p_delete(config);
76     }
77 }
78
79 static inline void strlist_copy(char *dest, const char *str, ssize_t str_len,
80                                 bool reverse)
81 {
82     if (str_len > 0) {
83         if (reverse) {
84             for (const char *src = str + str_len - 1 ; src >= str ; --src) {
85                 *dest = ascii_tolower(*src);
86                 ++dest;
87             }
88         } else {
89             for (int i = 0 ; i < str_len ; ++i) {
90                 *dest = ascii_tolower(str[i]);
91                 ++dest;
92             }
93         }
94     }
95     *dest = '\0';
96 }
97
98
99 static trie_t *strlist_create(const char *file, bool reverse, bool lock)
100 {
101     trie_t *db;
102     file_map_t map;
103     const char *p, *end;
104     char line[BUFSIZ];
105
106     if (!file_map_open(&map, file, false)) {
107         return NULL;
108     }
109     p   = map.map;
110     end = map.end;
111     while (end > p && end[-1] != '\n') {
112         --end;
113     }
114     if (end != map.end) {
115         warn("file %s miss a final \\n, ignoring last line",
116              file);
117     }
118
119     db = trie_new();
120     while (p < end && p != NULL) {
121         const char *eol = (char *)memchr(p, '\n', end - p);
122         if (eol == NULL) {
123             eol = end;
124         }
125         if (eol - p >= BUFSIZ) {
126             err("unreasonnable long line");
127             file_map_close(&map);
128             trie_delete(&db);
129             return NULL;
130         }
131         if (*p != '#') {
132             const char *eos = eol;
133             while (p < eos && isspace(*p)) {
134                 ++p;
135             }
136             while (p < eos && isspace(eos[-1])) {
137                 --eos;
138             }
139             if (p < eos) {
140                 strlist_copy(line, p, eos - p, reverse);
141                 trie_insert(db, line);
142             }
143         }
144         p = eol + 1;
145     }
146     file_map_close(&map);
147     trie_compile(db, lock);
148     return db;
149 }
150
151 static bool strlist_create_from_rhbl(const char *file, bool lock,
152                                      trie_t **phosts, trie_t **pdomains)
153 {
154     trie_t *hosts, *domains;
155     uint32_t host_count, domain_count;
156     file_map_t map;
157     const char *p, *end;
158     char line[BUFSIZ];
159
160     if (!file_map_open(&map, file, false)) {
161         return false;
162     }
163     p   = map.map;
164     end = map.end;
165     while (end > p && end[-1] != '\n') {
166         --end;
167     }
168     if (end != map.end) {
169         warn("file %s miss a final \\n, ignoring last line",
170              file);
171     }
172
173     hosts = trie_new();
174     host_count = 0;
175     domains = trie_new();
176     domain_count = 0;
177     while (p < end && p != NULL) {
178         const char *eol = (char *)memchr(p, '\n', end - p);
179         if (eol == NULL) {
180             eol = end;
181         }
182         if (eol - p >= BUFSIZ) {
183             err("unreasonnable long line");
184             file_map_close(&map);
185             trie_delete(&hosts);
186             trie_delete(&domains);
187             return false;
188         }
189         if (*p != '#') {
190             const char *eos = eol;
191             while (p < eos && isspace(*p)) {
192                 ++p;
193             }
194             while (p < eos && isspace(eos[-1])) {
195                 --eos;
196             }
197             if (p < eos) {
198                 if (isalnum(*p)) {
199                     strlist_copy(line, p, eos - p, true);
200                     trie_insert(hosts, line);
201                     ++host_count;
202                 } else if (*p == '*') {
203                     ++p;
204                     strlist_copy(line, p, eos - p, true);
205                     trie_insert(domains, line);
206                     ++domain_count;
207                 }
208             }
209         }
210         p = eol + 1;
211     }
212     file_map_close(&map);
213     if (host_count > 0) {
214         trie_compile(hosts, lock);
215         *phosts = hosts;
216     } else {
217         trie_delete(&hosts);
218         *phosts = NULL;
219     }
220     if (domain_count > 0) {
221         trie_compile(domains, lock);
222         *pdomains = domains;
223     } else {
224         trie_delete(&domains);
225         *pdomains = NULL;
226     }
227     return hosts != NULL || domains != NULL;
228
229 }
230
231
232 static bool strlist_filter_constructor(filter_t *filter)
233 {
234     strlist_config_t *config = strlist_config_new();
235
236 #define PARSE_CHECK(Expr, Str, ...)                                            \
237     if (!(Expr)) {                                                             \
238         err(Str, ##__VA_ARGS__);                                               \
239         strlist_config_delete(&config);                                        \
240         return false;                                                          \
241     }
242
243     config->hard_threshold = 1;
244     config->soft_threshold = 1;
245     foreach (filter_param_t *param, filter->params) {
246         switch (param->type) {
247           /* file parameter is:
248            *  [no]lock:(partial-)(prefix|suffix):weight:filename
249            *  valid options are:
250            *    - lock:   memlock the database in memory.
251            *    - nolock: don't memlock the database in memory.
252            *    - prefix: perform "prefix" compression on storage.
253            *    - suffix  perform "suffix" compression on storage.
254            *    - \d+:    a number describing the weight to give to the match
255            *              the given list [mandatory]
256            *  the file pointed by filename MUST be a valid string list (one string per
257            *  line, empty lines and lines beginning with a '#' are ignored).
258            */
259           case ATK_FILE: {
260             bool lock = false;
261             int  weight = 0;
262             bool reverse = false;
263             bool partial = false;
264             trie_t *trie = NULL;
265             const char *current = param->value;
266             const char *p = m_strchrnul(param->value, ':');
267             char *next = NULL;
268             for (int i = 0 ; i < 4 ; ++i) {
269                 PARSE_CHECK(i == 3 || *p,
270                             "file parameter must contains a locking state "
271                             "and a weight option");
272                 switch (i) {
273                   case 0:
274                     if ((p - current) == 4 && strncmp(current, "lock", 4) == 0) {
275                         lock = true;
276                     } else if ((p - current) == 6 && strncmp(current, "nolock", 6) == 0) {
277                         lock = false;
278                     } else {
279                         PARSE_CHECK(false, "illegal locking state %.*s",
280                                     p - current, current);
281                     }
282                     break;
283
284                   case 1:
285                     if (p - current > (ssize_t)strlen("partial-") 
286                         && strncmp(current, "partial-", strlen("partial-")) == 0) {
287                         partial = true;
288                         current += strlen("partial-");
289                     }
290                     if ((p - current) == 6 && strncmp(current, "suffix", 6) == 0) {
291                         reverse = true;
292                     } else if ((p - current) == 6 && strncmp(current, "prefix", 6) == 0) {
293                         reverse = false;
294                     } else {
295                         PARSE_CHECK(false, "illegal character order value %.*s",
296                                     p - current, current);
297                     }
298                     break;
299
300                   case 2:
301                     weight = strtol(current, &next, 10);
302                     PARSE_CHECK(next == p && weight >= 0 && weight <= 1024,
303                                 "illegal weight value %.*s",
304                                 (p - current), current);
305                     break;
306
307                   case 3:
308                     trie = strlist_create(current, reverse, lock);
309                     PARSE_CHECK(trie != NULL,
310                                 "cannot load string list from %s", current);
311                     array_add(config->tries, trie);
312                     array_add(config->weights, weight);
313                     array_add(config->reverses, reverse);
314                     array_add(config->partiales, partial);
315                     break;
316                 }
317                 if (i != 3) {
318                     current = p + 1;
319                     p = m_strchrnul(current, ':');
320                 }
321             }
322           } break;
323
324           /* rbldns parameter is:
325            *  [no]lock::weight:filename
326            *  valid options are:
327            *    - lock:   memlock the database in memory.
328            *    - nolock: don't memlock the database in memory.
329            *    - \d+:    a number describing the weight to give to the match
330            *              the given list [mandatory]
331            *  directly import a file issued from a rhbl in rbldns format.
332            */
333           case ATK_RBLDNS: {
334             bool lock = false;
335             int  weight = 0;
336             trie_t *trie_hosts   = NULL;
337             trie_t *trie_domains = NULL;
338             const char *current = param->value;
339             const char *p = m_strchrnul(param->value, ':');
340             char *next = NULL;
341             for (int i = 0 ; i < 3 ; ++i) {
342                 PARSE_CHECK(i == 2 || *p,
343                             "file parameter must contains a locking state "
344                             "and a weight option");
345                 switch (i) {
346                   case 0:
347                     if ((p - current) == 4 && strncmp(current, "lock", 4) == 0) {
348                         lock = true;
349                     } else if ((p - current) == 6 && strncmp(current, "nolock", 6) == 0) {
350                         lock = false;
351                     } else {
352                         PARSE_CHECK(false, "illegal locking state %.*s",
353                                     p - current, current);
354                     }
355                     break;
356
357                   case 1:
358                     weight = strtol(current, &next, 10);
359                     PARSE_CHECK(next == p && weight >= 0 && weight <= 1024,
360                                 "illegal weight value %.*s",
361                                 (p - current), current);
362                     break;
363
364                   case 2:
365                     PARSE_CHECK(strlist_create_from_rhbl(current, lock,
366                                                          &trie_hosts, &trie_domains),
367                                 "cannot load string list from rhbl %s", current);
368                     if (trie_hosts != NULL) {
369                         array_add(config->tries, trie_hosts);
370                         array_add(config->weights, weight);
371                         array_add(config->reverses, true);
372                         array_add(config->partiales, false);
373                     }
374                     if (trie_domains != NULL) {
375                         array_add(config->tries, trie_domains);
376                         array_add(config->weights, weight);
377                         array_add(config->reverses, true);
378                         array_add(config->partiales, true);
379                     }
380                     config->is_hostname = true;
381                     break;
382                 }
383                 if (i != 2) {
384                     current = p + 1;
385                     p = m_strchrnul(current, ':');
386                 }
387             }
388           } break;
389
390           /* hard_threshold parameter is an integer.
391            *  If the matching score is greater or equal than this threshold,
392            *  the hook "hard_match" is called.
393            * hard_threshold = 1 means, that all matches are hard matches.
394            * default is 1;
395            */
396           FILTER_PARAM_PARSE_INT(HARD_THRESHOLD, config->hard_threshold);
397
398           /* soft_threshold parameter is an integer.
399            *  if the matching score is greater or equal than this threshold
400            *  and smaller or equal than the hard_threshold, the hook "soft_match"
401            *  is called.
402            * default is 1;
403            */
404           FILTER_PARAM_PARSE_INT(SOFT_THRESHOLD, config->soft_threshold);
405
406           /* fields to match againes:
407            *  fields = field_name(,field_name)*
408            *  field_names are
409            *    - hostname: helo_name,client_name,reverse_client_name
410            *    - email: sender,recipient
411            */
412           case ATK_FIELDS: {
413             const char *current = param->value;
414             const char *p = m_strchrnul(param->value, ',');
415             do {
416                 postlicyd_token tok = policy_tokenize(current, p - current);
417                 switch (tok) {
418 #define           CASE(Up, Low, Type)                                          \
419                   case PTK_ ## Up:                                             \
420                     config->match_ ## Low = true;                              \
421                     config->is_ ## Type = true;                                \
422                     break
423                   CASE(HELO_NAME, helo, hostname);
424                   CASE(CLIENT_NAME, client, hostname);
425                   CASE(REVERSE_CLIENT_NAME, reverse, hostname);
426                   CASE(SENDER_DOMAIN, sender, hostname);
427                   CASE(RECIPIENT_DOMAIN, recipient, hostname);
428                   CASE(SENDER, sender, email);
429                   CASE(RECIPIENT, recipient, email);
430 #undef CASE
431                   default:
432                     PARSE_CHECK(false, "unknown field %.*s", p - current, current);
433                     break;
434                 }
435                 if (!*p) {
436                     break;
437                 }
438                 current = p + 1;
439                 p = m_strchrnul(current, ',');
440             } while (true);
441           } break;
442
443           default: break;
444         }
445     }}
446
447     PARSE_CHECK(config->is_email != config->is_hostname,
448                 "matched field MUST be emails XOR hostnames");
449     PARSE_CHECK(config->tries.len,
450                 "no file parameter in the filter %s", filter->name);
451     filter->data = config;
452     return true;
453 }
454
455 static void strlist_filter_destructor(filter_t *filter)
456 {
457     strlist_config_t *config = filter->data;
458     strlist_config_delete(&config);
459     filter->data = config;
460 }
461
462 static filter_result_t strlist_filter(const filter_t *filter, const query_t *query)
463 {
464     char reverse[BUFSIZ];
465     char normal[BUFSIZ];
466     const strlist_config_t *config = filter->data;
467     int sum = 0;
468     if (config->is_email && 
469         ((config->match_sender && query->state < SMTP_MAIL)
470         || (config->match_recipient && query->state != SMTP_RCPT))) {
471         warn("trying to match an email against a field that is not "
472              "available in current protocol state");
473         return HTK_ABORT;
474     } else if (config->is_hostname && config->match_helo && query->state < SMTP_HELO) {
475         warn("trying to match hostname against helo before helo is received");
476         return HTK_ABORT;
477     }
478 #define LOOKUP(Flag, Field)                                                    \
479     if (config->match_ ## Flag) {                                              \
480         const int len = m_strlen(query->Field);                                \
481         strlist_copy(normal, query->Field, len, false);                        \
482         strlist_copy(reverse, query->Field, len, true);                        \
483         for (uint32_t i = 0 ; i < config->tries.len ; ++i) {                   \
484             const int weight   = array_elt(config->weights, i);                \
485             const trie_t *trie = array_elt(config->tries, i);                  \
486             const bool rev     = array_elt(config->reverses, i);               \
487             const bool part    = array_elt(config->partiales, i);              \
488             if ((!part && trie_lookup(trie, rev ? reverse : normal))           \
489                 || (part && trie_prefix(trie, rev ? reverse : normal))) {      \
490                 sum += weight;                                                 \
491                 if (sum >= config->hard_threshold) {                           \
492                     return HTK_HARD_MATCH;                                     \
493                 }                                                              \
494             }                                                                  \
495         }                                                                      \
496     }
497     if (config->is_email) {
498         LOOKUP(sender, sender);
499         LOOKUP(recipient, recipient);
500     } else if (config->is_hostname) {
501         LOOKUP(helo, helo_name);
502         LOOKUP(client, client_name);
503         LOOKUP(reverse, reverse_client_name);
504         LOOKUP(recipient, recipient_domain);
505         LOOKUP(sender, sender_domain);
506     }
507 #undef  LOOKUP
508     if (sum >= config->hard_threshold) {
509         return HTK_HARD_MATCH;
510     } else if (sum >= config->soft_threshold) {
511         return HTK_SOFT_MATCH;
512     } else {
513         return HTK_FAIL;
514     }
515 }
516
517 static int strlist_init(void)
518 {
519     filter_type_t type =  filter_register("strlist", strlist_filter_constructor,
520                                           strlist_filter_destructor, strlist_filter);
521     /* Hooks.
522      */
523     (void)filter_hook_register(type, "abort");
524     (void)filter_hook_register(type, "error");
525     (void)filter_hook_register(type, "fail");
526     (void)filter_hook_register(type, "hard_match");
527     (void)filter_hook_register(type, "soft_match");
528
529     /* Parameters.
530      */
531     (void)filter_param_register(type, "file");
532     (void)filter_param_register(type, "rbldns");
533     (void)filter_param_register(type, "hard_threshold");
534     (void)filter_param_register(type, "soft_threshold");
535     (void)filter_param_register(type, "fields");
536     return 0;
537 }
538 module_init(strlist_init);