0c790cb1dc5283c0efd1da262eee9693339ad54e
[apps/pfixtools.git] / strlist.c
1 /******************************************************************************/
2 /*          pfixtools: a collection of postfix related tools                  */
3 /*          ~~~~~~~~~                                                         */
4 /*  ________________________________________________________________________  */
5 /*                                                                            */
6 /*  Redistribution and use in source and binary forms, with or without        */
7 /*  modification, are permitted provided that the following conditions        */
8 /*  are met:                                                                  */
9 /*                                                                            */
10 /*  1. Redistributions of source code must retain the above copyright         */
11 /*     notice, this list of conditions and the following disclaimer.          */
12 /*  2. Redistributions in binary form must reproduce the above copyright      */
13 /*     notice, this list of conditions and the following disclaimer in the    */
14 /*     documentation and/or other materials provided with the distribution.   */
15 /*  3. The names of its contributors may not be used to endorse or promote    */
16 /*     products derived from this software without specific prior written     */
17 /*     permission.                                                            */
18 /*                                                                            */
19 /*  THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND   */
20 /*  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE     */
21 /*  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR        */
22 /*  PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS    */
23 /*  BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR    */
24 /*  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF      */
25 /*  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS  */
26 /*  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN   */
27 /*  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)   */
28 /*  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF    */
29 /*  THE POSSIBILITY OF SUCH DAMAGE.                                           */
30 /******************************************************************************/
31
32 /*
33  * Copyright © 2008 Florent Bruneau
34  */
35
36 #include "filter.h"
37 #include "trie.h"
38 #include "file.h"
39 #include "str.h"
40 #include "rbl.h"
41 #include "policy_tokens.h"
42
43 typedef struct strlist_config_t {
44     PA(trie_t) tries;
45     A(int)     weights;
46     A(bool)    reverses;
47     A(bool)    partiales;
48
49     A(char)     hosts;
50     A(int)      host_offsets;
51     A(int)      host_weights;
52
53     int soft_threshold;
54     int hard_threshold;
55
56     unsigned is_email         :1;
57     unsigned is_hostname      :1;
58
59     unsigned match_sender     :1;
60     unsigned match_recipient  :1;
61
62     unsigned match_helo       :1;
63     unsigned match_client     :1;
64     unsigned match_reverse    :1;
65 } strlist_config_t;
66
67
68 static strlist_config_t *strlist_config_new(void)
69 {
70     return p_new(strlist_config_t, 1);
71 }
72
73 static void strlist_config_delete(strlist_config_t **config)
74 {
75     if (*config) {
76         array_deep_wipe((*config)->tries, trie_delete);
77         array_wipe((*config)->weights);
78         array_wipe((*config)->reverses);
79         array_wipe((*config)->partiales);
80         array_wipe((*config)->hosts);
81         array_wipe((*config)->host_offsets);
82         array_wipe((*config)->host_weights);
83         p_delete(config);
84     }
85 }
86
87 static inline void strlist_copy(char *dest, const char *str, ssize_t str_len,
88                                 bool reverse)
89 {
90     if (str_len > 0) {
91         if (reverse) {
92             for (const char *src = str + str_len - 1 ; src >= str ; --src) {
93                 *dest = ascii_tolower(*src);
94                 ++dest;
95             }
96         } else {
97             for (int i = 0 ; i < str_len ; ++i) {
98                 *dest = ascii_tolower(str[i]);
99                 ++dest;
100             }
101         }
102     }
103     *dest = '\0';
104 }
105
106
107 static trie_t *strlist_create(const char *file, bool reverse, bool lock)
108 {
109     trie_t *db;
110     file_map_t map;
111     const char *p, *end;
112     char line[BUFSIZ];
113
114     if (!file_map_open(&map, file, false)) {
115         return NULL;
116     }
117     p   = map.map;
118     end = map.end;
119     while (end > p && end[-1] != '\n') {
120         --end;
121     }
122     if (end != map.end) {
123         warn("file %s miss a final \\n, ignoring last line",
124              file);
125     }
126
127     db = trie_new();
128     while (p < end && p != NULL) {
129         const char *eol = (char *)memchr(p, '\n', end - p);
130         if (eol == NULL) {
131             eol = end;
132         }
133         if (eol - p >= BUFSIZ) {
134             err("unreasonnable long line");
135             file_map_close(&map);
136             trie_delete(&db);
137             return NULL;
138         }
139         if (*p != '#') {
140             const char *eos = eol;
141             while (p < eos && isspace(*p)) {
142                 ++p;
143             }
144             while (p < eos && isspace(eos[-1])) {
145                 --eos;
146             }
147             if (p < eos) {
148                 strlist_copy(line, p, eos - p, reverse);
149                 trie_insert(db, line);
150             }
151         }
152         p = eol + 1;
153     }
154     file_map_close(&map);
155     trie_compile(db, lock);
156     return db;
157 }
158
159 static bool strlist_create_from_rhbl(const char *file, bool lock,
160                                      trie_t **phosts, trie_t **pdomains)
161 {
162     trie_t *hosts, *domains;
163     uint32_t host_count, domain_count;
164     file_map_t map;
165     const char *p, *end;
166     char line[BUFSIZ];
167
168     if (!file_map_open(&map, file, false)) {
169         return false;
170     }
171     p   = map.map;
172     end = map.end;
173     while (end > p && end[-1] != '\n') {
174         --end;
175     }
176     if (end != map.end) {
177         warn("file %s miss a final \\n, ignoring last line",
178              file);
179     }
180
181     hosts = trie_new();
182     host_count = 0;
183     domains = trie_new();
184     domain_count = 0;
185     while (p < end && p != NULL) {
186         const char *eol = (char *)memchr(p, '\n', end - p);
187         if (eol == NULL) {
188             eol = end;
189         }
190         if (eol - p >= BUFSIZ) {
191             err("unreasonnable long line");
192             file_map_close(&map);
193             trie_delete(&hosts);
194             trie_delete(&domains);
195             return false;
196         }
197         if (*p != '#') {
198             const char *eos = eol;
199             while (p < eos && isspace(*p)) {
200                 ++p;
201             }
202             while (p < eos && isspace(eos[-1])) {
203                 --eos;
204             }
205             if (p < eos) {
206                 if (isalnum(*p)) {
207                     strlist_copy(line, p, eos - p, true);
208                     trie_insert(hosts, line);
209                     ++host_count;
210                 } else if (*p == '*') {
211                     ++p;
212                     strlist_copy(line, p, eos - p, true);
213                     trie_insert(domains, line);
214                     ++domain_count;
215                 }
216             }
217         }
218         p = eol + 1;
219     }
220     file_map_close(&map);
221     if (host_count > 0) {
222         trie_compile(hosts, lock);
223         *phosts = hosts;
224     } else {
225         trie_delete(&hosts);
226         *phosts = NULL;
227     }
228     if (domain_count > 0) {
229         trie_compile(domains, lock);
230         *pdomains = domains;
231     } else {
232         trie_delete(&domains);
233         *pdomains = NULL;
234     }
235     return hosts != NULL || domains != NULL;
236
237 }
238
239
240 static bool strlist_filter_constructor(filter_t *filter)
241 {
242     strlist_config_t *config = strlist_config_new();
243
244 #define PARSE_CHECK(Expr, Str, ...)                                            \
245     if (!(Expr)) {                                                             \
246         err(Str, ##__VA_ARGS__);                                               \
247         strlist_config_delete(&config);                                        \
248         return false;                                                          \
249     }
250
251     config->hard_threshold = 1;
252     config->soft_threshold = 1;
253     foreach (filter_param_t *param, filter->params) {
254         switch (param->type) {
255           /* file parameter is:
256            *  [no]lock:(partial-)(prefix|suffix):weight:filename
257            *  valid options are:
258            *    - lock:   memlock the database in memory.
259            *    - nolock: don't memlock the database in memory.
260            *    - prefix: perform "prefix" compression on storage.
261            *    - suffix  perform "suffix" compression on storage.
262            *    - \d+:    a number describing the weight to give to the match
263            *              the given list [mandatory]
264            *  the file pointed by filename MUST be a valid string list (one string per
265            *  line, empty lines and lines beginning with a '#' are ignored).
266            */
267           case ATK_FILE: {
268             bool lock = false;
269             int  weight = 0;
270             bool reverse = false;
271             bool partial = false;
272             trie_t *trie = NULL;
273             const char *current = param->value;
274             const char *p = m_strchrnul(param->value, ':');
275             char *next = NULL;
276             for (int i = 0 ; i < 4 ; ++i) {
277                 PARSE_CHECK(i == 3 || *p,
278                             "file parameter must contains a locking state "
279                             "and a weight option");
280                 switch (i) {
281                   case 0:
282                     if ((p - current) == 4 && strncmp(current, "lock", 4) == 0) {
283                         lock = true;
284                     } else if ((p - current) == 6 && strncmp(current, "nolock", 6) == 0) {
285                         lock = false;
286                     } else {
287                         PARSE_CHECK(false, "illegal locking state %.*s",
288                                     (int)(p - current), current);
289                     }
290                     break;
291
292                   case 1:
293                     if (p - current > (ssize_t)strlen("partial-") 
294                         && strncmp(current, "partial-", strlen("partial-")) == 0) {
295                         partial = true;
296                         current += strlen("partial-");
297                     }
298                     if ((p - current) == 6 && strncmp(current, "suffix", 6) == 0) {
299                         reverse = true;
300                     } else if ((p - current) == 6 && strncmp(current, "prefix", 6) == 0) {
301                         reverse = false;
302                     } else {
303                         PARSE_CHECK(false, "illegal character order value %.*s",
304                                     (int)(p - current), current);
305                     }
306                     break;
307
308                   case 2:
309                     weight = strtol(current, &next, 10);
310                     PARSE_CHECK(next == p && weight >= 0 && weight <= 1024,
311                                 "illegal weight value %.*s",
312                                 (int)(p - current), current);
313                     break;
314
315                   case 3:
316                     trie = strlist_create(current, reverse, lock);
317                     PARSE_CHECK(trie != NULL,
318                                 "cannot load string list from %s", current);
319                     array_add(config->tries, trie);
320                     array_add(config->weights, weight);
321                     array_add(config->reverses, reverse);
322                     array_add(config->partiales, partial);
323                     break;
324                 }
325                 if (i != 3) {
326                     current = p + 1;
327                     p = m_strchrnul(current, ':');
328                 }
329             }
330           } break;
331
332           /* rbldns parameter is:
333            *  [no]lock::weight:filename
334            *  valid options are:
335            *    - lock:   memlock the database in memory.
336            *    - nolock: don't memlock the database in memory.
337            *    - \d+:    a number describing the weight to give to the match
338            *              the given list [mandatory]
339            *  directly import a file issued from a rhbl in rbldns format.
340            */
341           case ATK_RBLDNS: {
342             bool lock = false;
343             int  weight = 0;
344             trie_t *trie_hosts   = NULL;
345             trie_t *trie_domains = NULL;
346             const char *current = param->value;
347             const char *p = m_strchrnul(param->value, ':');
348             char *next = NULL;
349             for (int i = 0 ; i < 3 ; ++i) {
350                 PARSE_CHECK(i == 2 || *p,
351                             "file parameter must contains a locking state "
352                             "and a weight option");
353                 switch (i) {
354                   case 0:
355                     if ((p - current) == 4 && strncmp(current, "lock", 4) == 0) {
356                         lock = true;
357                     } else if ((p - current) == 6 && strncmp(current, "nolock", 6) == 0) {
358                         lock = false;
359                     } else {
360                         PARSE_CHECK(false, "illegal locking state %.*s",
361                                     (int)(p - current), current);
362                     }
363                     break;
364
365                   case 1:
366                     weight = strtol(current, &next, 10);
367                     PARSE_CHECK(next == p && weight >= 0 && weight <= 1024,
368                                 "illegal weight value %.*s",
369                                 (int)(p - current), current);
370                     break;
371
372                   case 2:
373                     PARSE_CHECK(strlist_create_from_rhbl(current, lock,
374                                                          &trie_hosts, &trie_domains),
375                                 "cannot load string list from rhbl %s", current);
376                     if (trie_hosts != NULL) {
377                         array_add(config->tries, trie_hosts);
378                         array_add(config->weights, weight);
379                         array_add(config->reverses, true);
380                         array_add(config->partiales, false);
381                     }
382                     if (trie_domains != NULL) {
383                         array_add(config->tries, trie_domains);
384                         array_add(config->weights, weight);
385                         array_add(config->reverses, true);
386                         array_add(config->partiales, true);
387                     }
388                     config->is_hostname = true;
389                     break;
390                 }
391                 if (i != 2) {
392                     current = p + 1;
393                     p = m_strchrnul(current, ':');
394                 }
395             }
396           } break;
397
398           /* dns parameter.
399            *  weight:hostname.
400            * define a RBL to use through DNS resolution.
401            */
402           case ATK_DNS: {
403             int  weight = 0;
404             const char *current = param->value;
405             const char *p = m_strchrnul(param->value, ':');
406             char *next = NULL;
407             for (int i = 0 ; i < 2 ; ++i) {
408                 PARSE_CHECK(i == 1 || *p,
409                             "host parameter must contains a weight option");
410                 switch (i) {
411                   case 0:
412                     weight = strtol(current, &next, 10);
413                     PARSE_CHECK(next == p && weight >= 0 && weight <= 1024,
414                                 "illegal weight value %.*s",
415                                 (int)(p - current), current);
416                     break;
417
418                   case 1:
419                     array_add(config->host_offsets, array_len(config->hosts));
420                     array_append(config->hosts, current, strlen(current) + 1);
421                     array_add(config->host_weights, weight);
422                     break;
423                 }
424                 if (i != 1) {
425                     current = p + 1;
426                     p = m_strchrnul(current, ':');
427                 }
428             }
429           } break;
430
431           /* hard_threshold parameter is an integer.
432            *  If the matching score is greater or equal than this threshold,
433            *  the hook "hard_match" is called.
434            * hard_threshold = 1 means, that all matches are hard matches.
435            * default is 1;
436            */
437           FILTER_PARAM_PARSE_INT(HARD_THRESHOLD, config->hard_threshold);
438
439           /* soft_threshold parameter is an integer.
440            *  if the matching score is greater or equal than this threshold
441            *  and smaller or equal than the hard_threshold, the hook "soft_match"
442            *  is called.
443            * default is 1;
444            */
445           FILTER_PARAM_PARSE_INT(SOFT_THRESHOLD, config->soft_threshold);
446
447           /* fields to match againes:
448            *  fields = field_name(,field_name)*
449            *  field_names are
450            *    - hostname: helo_name,client_name,reverse_client_name
451            *    - email: sender,recipient
452            */
453           case ATK_FIELDS: {
454             const char *current = param->value;
455             const char *p = m_strchrnul(param->value, ',');
456             do {
457                 postlicyd_token tok = policy_tokenize(current, p - current);
458                 switch (tok) {
459 #define           CASE(Up, Low, Type)                                          \
460                   case PTK_ ## Up:                                             \
461                     config->match_ ## Low = true;                              \
462                     config->is_ ## Type = true;                                \
463                     break
464                   CASE(HELO_NAME, helo, hostname);
465                   CASE(CLIENT_NAME, client, hostname);
466                   CASE(REVERSE_CLIENT_NAME, reverse, hostname);
467                   CASE(SENDER_DOMAIN, sender, hostname);
468                   CASE(RECIPIENT_DOMAIN, recipient, hostname);
469                   CASE(SENDER, sender, email);
470                   CASE(RECIPIENT, recipient, email);
471 #undef CASE
472                   default:
473                     PARSE_CHECK(false, "unknown field %.*s", (int)(p - current), current);
474                     break;
475                 }
476                 if (!*p) {
477                     break;
478                 }
479                 current = p + 1;
480                 p = m_strchrnul(current, ',');
481             } while (true);
482           } break;
483
484           default: break;
485         }
486     }}
487
488     PARSE_CHECK(config->is_email != config->is_hostname,
489                 "matched field MUST be emails XOR hostnames");
490     PARSE_CHECK(config->tries.len || config->host_offsets.len,
491                 "no file parameter in the filter %s", filter->name);
492     filter->data = config;
493     return true;
494 }
495
496 static void strlist_filter_destructor(filter_t *filter)
497 {
498     strlist_config_t *config = filter->data;
499     strlist_config_delete(&config);
500     filter->data = config;
501 }
502
503 static filter_result_t strlist_filter(const filter_t *filter, const query_t *query)
504 {
505     char reverse[BUFSIZ];
506     char normal[BUFSIZ];
507     const strlist_config_t *config = filter->data;
508     int sum = 0;
509     bool error = true;
510
511
512     if (config->is_email && 
513         ((config->match_sender && query->state < SMTP_MAIL)
514         || (config->match_recipient && query->state != SMTP_RCPT))) {
515         warn("trying to match an email against a field that is not "
516              "available in current protocol state");
517         return HTK_ABORT;
518     } else if (config->is_hostname && config->match_helo && query->state < SMTP_HELO) {
519         warn("trying to match hostname against helo before helo is received");
520         return HTK_ABORT;
521     }
522 #define LOOKUP(Flag, Field)                                                    \
523     if (config->match_ ## Flag) {                                              \
524         const int len = m_strlen(query->Field);                                \
525         strlist_copy(normal, query->Field, len, false);                        \
526         strlist_copy(reverse, query->Field, len, true);                        \
527         for (uint32_t i = 0 ; i < config->tries.len ; ++i) {                   \
528             const int weight   = array_elt(config->weights, i);                \
529             const trie_t *trie = array_elt(config->tries, i);                  \
530             const bool rev     = array_elt(config->reverses, i);               \
531             const bool part    = array_elt(config->partiales, i);              \
532             if ((!part && trie_lookup(trie, rev ? reverse : normal))           \
533                 || (part && trie_prefix(trie, rev ? reverse : normal))) {      \
534                 sum += weight;                                                 \
535                 if (sum >= config->hard_threshold) {                           \
536                     return HTK_HARD_MATCH;                                     \
537                 }                                                              \
538             }                                                                  \
539             error = false;                                                     \
540         }                                                                      \
541     }
542 #define DNS(Flag, Field)                                                       \
543     if (config->match_ ## Flag) {                                              \
544         const int len = m_strlen(query->Field);                                \
545         strlist_copy(normal, query->Field, len, false);                        \
546         for (uint32_t i = 0 ; len > 0 && i < config->host_offsets.len ; ++i) { \
547             const char *rbl    = array_ptr(config->hosts,                      \
548                                            array_elt(config->host_offsets, i));\
549             const int weight   = array_elt(config->host_weights, i);           \
550             switch (rhbl_check(normal, rbl)) {                                 \
551               case RBL_FOUND:                                                  \
552                 error = false;                                                 \
553                 sum += weight;                                                 \
554                 if (sum >= config->hard_threshold) {                           \
555                     return HTK_HARD_MATCH;                                     \
556                 }                                                              \
557                 break;                                                         \
558               case RBL_NOTFOUND:                                               \
559                 error = false;                                                 \
560                 break;                                                         \
561               case RBL_ERROR:                                                  \
562                 warn("rbl %s unavailable", rbl);                               \
563                 break;                                                         \
564             }                                                                  \
565         }                                                                      \
566     }
567
568     if (config->is_email) {
569         LOOKUP(sender, sender);
570         LOOKUP(recipient, recipient);
571         DNS(sender, sender);
572         DNS(recipient, recipient);
573     } else if (config->is_hostname) {
574         LOOKUP(helo, helo_name);
575         LOOKUP(client, client_name);
576         LOOKUP(reverse, reverse_client_name);
577         LOOKUP(recipient, recipient_domain);
578         LOOKUP(sender, sender_domain);
579         DNS(helo, helo_name);
580         DNS(client, client_name);
581         DNS(reverse, reverse_client_name);
582         DNS(recipient, recipient_domain);
583         DNS(sender, sender_domain);
584     }
585 #undef  DNS
586 #undef  LOOKUP
587     if (error) {
588         err("filter %s: all the rbls returned an error", filter->name);
589         return HTK_ERROR;
590     }
591     if (sum >= config->hard_threshold) {
592         return HTK_HARD_MATCH;
593     } else if (sum >= config->soft_threshold) {
594         return HTK_SOFT_MATCH;
595     } else {
596         return HTK_FAIL;
597     }
598 }
599
600 static int strlist_init(void)
601 {
602     filter_type_t type =  filter_register("strlist", strlist_filter_constructor,
603                                           strlist_filter_destructor, strlist_filter);
604     /* Hooks.
605      */
606     (void)filter_hook_register(type, "abort");
607     (void)filter_hook_register(type, "error");
608     (void)filter_hook_register(type, "fail");
609     (void)filter_hook_register(type, "hard_match");
610     (void)filter_hook_register(type, "soft_match");
611
612     /* Parameters.
613      */
614     (void)filter_param_register(type, "file");
615     (void)filter_param_register(type, "rbldns");
616     (void)filter_param_register(type, "dns");
617     (void)filter_param_register(type, "hard_threshold");
618     (void)filter_param_register(type, "soft_threshold");
619     (void)filter_param_register(type, "fields");
620     return 0;
621 }
622 module_init(strlist_init);