Array contains a "lock" flag, allowing "per array" locking.
[apps/pfixtools.git] / postlicyd / strlist.c
1 /******************************************************************************/
2 /*          pfixtools: a collection of postfix related tools                  */
3 /*          ~~~~~~~~~                                                         */
4 /*  ________________________________________________________________________  */
5 /*                                                                            */
6 /*  Redistribution and use in source and binary forms, with or without        */
7 /*  modification, are permitted provided that the following conditions        */
8 /*  are met:                                                                  */
9 /*                                                                            */
10 /*  1. Redistributions of source code must retain the above copyright         */
11 /*     notice, this list of conditions and the following disclaimer.          */
12 /*  2. Redistributions in binary form must reproduce the above copyright      */
13 /*     notice, this list of conditions and the following disclaimer in the    */
14 /*     documentation and/or other materials provided with the distribution.   */
15 /*  3. The names of its contributors may not be used to endorse or promote    */
16 /*     products derived from this software without specific prior written     */
17 /*     permission.                                                            */
18 /*                                                                            */
19 /*  THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND   */
20 /*  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE     */
21 /*  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR        */
22 /*  PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS    */
23 /*  BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR    */
24 /*  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF      */
25 /*  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS  */
26 /*  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN   */
27 /*  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)   */
28 /*  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF    */
29 /*  THE POSSIBILITY OF SUCH DAMAGE.                                           */
30 /******************************************************************************/
31
32 /*
33  * Copyright © 2008 Florent Bruneau
34  */
35
36 #include "filter.h"
37 #include "trie.h"
38 #include "file.h"
39 #include "str.h"
40 #include "policy_tokens.h"
41
42 typedef struct strlist_config_t {
43     PA(trie_t) tries;
44     A(int)     weights;
45     A(bool)    reverses;
46
47     int soft_threshold;
48     int hard_threshold;
49
50     unsigned is_email         :1;
51     unsigned match_sender     :1;
52     unsigned match_recipient  :1;
53
54     unsigned is_hostname      :1;
55     unsigned match_helo       :1;
56     unsigned match_client     :1;
57     unsigned match_reverse    :1;
58 } strlist_config_t;
59
60
61 static strlist_config_t *strlist_config_new(void)
62 {
63     return p_new(strlist_config_t, 1);
64 }
65
66 static void strlist_config_delete(strlist_config_t **config)
67 {
68     if (*config) {
69         array_deep_wipe((*config)->tries, trie_delete);
70         array_wipe((*config)->weights);
71         array_wipe((*config)->reverses);
72         p_delete(config);
73     }
74 }
75
76 static inline void strlist_copy(char *dest, const char *str, ssize_t str_len,
77                                 bool reverse)
78 {
79     if (str_len > 0) {
80         if (reverse) {
81             for (const char *src = str + str_len - 1 ; src >= str ; --src) {
82                 *dest = ascii_tolower(*src);
83                 ++dest;
84             }
85         } else {
86             for (int i = 0 ; i < str_len ; ++i) {
87                 *dest = ascii_tolower(str[i]);
88                 ++dest;
89             }
90         }
91     }
92     *dest = '\0';
93 }
94
95
96 static trie_t *strlist_create(const char *file, bool reverse, bool lock)
97 {
98     trie_t *db;
99     file_map_t map;
100     const char *p, *end;
101     char line[BUFSIZ];
102
103     if (!file_map_open(&map, file, false)) {
104         return NULL;
105     }
106     p   = map.map;
107     end = map.end;
108     while (end > p && end[-1] != '\n') {
109         --end;
110     }
111     if (end != map.end) {
112         syslog(LOG_WARNING, "file %s miss a final \\n, ignoring last line",
113                file);
114     }
115
116     db = trie_new();
117     while (p < end && p != NULL) {
118         const char *eol = (char *)memchr(p, '\n', end - p);
119         if (eol == NULL) {
120             eol = end;
121         }
122         if (eol - p >= BUFSIZ) {
123             syslog(LOG_ERR, "unreasonnable long line");
124             file_map_close(&map);
125             trie_delete(&db);
126             return NULL;
127         }
128         if (*p != '#') {
129             const char *eos = eol;
130             while (p < eos && isspace(*p)) {
131                 ++p;
132             }
133             while (p < eos && isspace(eos[-1])) {
134                 --eos;
135             }
136             if (p < eos) {
137                 strlist_copy(line, p, eos - p, reverse);
138                 trie_insert(db, line);
139             }
140         }
141         p = eol + 1;
142     }
143     file_map_close(&map);
144     trie_compile(db, lock);
145     return db;
146 }
147
148
149 static bool strlist_filter_constructor(filter_t *filter)
150 {
151     strlist_config_t *config = strlist_config_new();
152
153 #define PARSE_CHECK(Expr, Str, ...)                                            \
154     if (!(Expr)) {                                                             \
155         syslog(LOG_ERR, Str, ##__VA_ARGS__);                                   \
156         strlist_config_delete(&config);                                        \
157         return false;                                                          \
158     }
159
160     config->hard_threshold = 1;
161     config->soft_threshold = 1;
162     foreach (filter_param_t *param, filter->params) {
163         switch (param->type) {
164           /* file parameter is:
165            *  [no]lock:(prefix|suffix):weight:filename
166            *  valid options are:
167            *    - lock:   memlock the database in memory.
168            *    - nolock: don't memlock the database in memory.
169            *    - prefix: perform "prefix" compression on storage.
170            *    - suffix  perform "suffix" compression on storage.
171            *    - \d+:    a number describing the weight to give to the match
172            *              the given list [mandatory]
173            *  the file pointed by filename MUST be a valid string list (one string per
174            *  line, empty lines and lines beginning with a '#' are ignored).
175            */
176           case ATK_FILE: {
177             bool lock = false;
178             int  weight = 0;
179             bool reverse = false;
180             trie_t *trie = NULL;
181             const char *current = param->value;
182             const char *p = m_strchrnul(param->value, ':');
183             char *next = NULL;
184             for (int i = 0 ; i < 4 ; ++i) {
185                 PARSE_CHECK(i == 3 || *p,
186                             "file parameter must contains a locking state "
187                             "and a weight option");
188                 switch (i) {
189                   case 0:
190                     if ((p - current) == 4 && strncmp(current, "lock", 4) == 0) {
191                         lock = true;
192                     } else if ((p - current) == 6 && strncmp(current, "nolock", 6) == 0) {
193                         lock = false;
194                     } else {
195                         PARSE_CHECK(false, "illegal locking state %.*s",
196                                     p - current, current);
197                     }
198                     break;
199
200                   case 1:
201                     if ((p - current) == 6 && strncmp(current, "suffix", 6) == 0) {
202                         reverse = true;
203                     } else if ((p - current) == 6 && strncmp(current, "prefix", 6) == 0) {
204                         reverse = false;
205                     } else {
206                         PARSE_CHECK(false, "illegal character order value %.*s",
207                                     p - current, current);
208                     }
209                     break;
210
211                   case 2:
212                     weight = strtol(current, &next, 10);
213                     PARSE_CHECK(next == p && weight >= 0 && weight <= 1024,
214                                 "illegal weight value %.*s",
215                                 (p - current), current);
216                     break;
217
218                   case 3:
219                     trie = strlist_create(current, reverse, lock);
220                     PARSE_CHECK(trie != NULL,
221                                 "cannot load string list from %s", current);
222                     array_add(config->tries, trie);
223                     array_add(config->weights, weight);
224                     array_add(config->reverses, reverse);
225                     break;
226                 }
227                 if (i != 3) {
228                     current = p + 1;
229                     p = m_strchrnul(current, ':');
230                 }
231             }
232           } break;
233
234           /* hard_threshold parameter is an integer.
235            *  If the matching score is greater or equal than this threshold,
236            *  the hook "hard_match" is called.
237            * hard_threshold = 1 means, that all matches are hard matches.
238            * default is 1;
239            */
240           FILTER_PARAM_PARSE_INT(HARD_THRESHOLD, config->hard_threshold);
241
242           /* soft_threshold parameter is an integer.
243            *  if the matching score is greater or equal than this threshold
244            *  and smaller or equal than the hard_threshold, the hook "soft_match"
245            *  is called.
246            * default is 1;
247            */
248           FILTER_PARAM_PARSE_INT(SOFT_THRESHOLD, config->soft_threshold);
249
250           /* fields to match againes:
251            *  fields = field_name(,field_name)*
252            *  field_names are
253            *    - hostname: helo_name,client_name,reverse_client_name
254            *    - email: sender,recipient
255            */
256           case ATK_FIELDS: {
257             const char *current = param->value;
258             const char *p = m_strchrnul(param->value, ',');
259             do {
260                 postlicyd_token tok = policy_tokenize(current, p - current);
261                 switch (tok) {
262 #define           CASE(Up, Low, Type)                                          \
263                   case PTK_ ## Up:                                             \
264                     config->match_ ## Low = true;                              \
265                     config->is_ ## Type = true;                                \
266                     break
267                   CASE(HELO_NAME, helo, hostname);
268                   CASE(CLIENT_NAME, client, hostname);
269                   CASE(REVERSE_CLIENT_NAME, reverse, hostname);
270                   CASE(SENDER, sender, email);
271                   CASE(RECIPIENT, recipient, email);
272 #undef CASE
273                   default:
274                     PARSE_CHECK(false, "unknown field %.*s", p - current, current);
275                     break;
276                 }
277                 if (!*p) {
278                     break;
279                 }
280                 current = p + 1;
281                 p = m_strchrnul(current, ',');
282             } while (true);
283           } break;
284
285           default: break;
286         }
287     }}
288
289     PARSE_CHECK(config->is_email != config->is_hostname,
290                 "matched field MUST be emails XOR hostnames");
291     PARSE_CHECK(config->tries.len,
292                 "no file parameter in the filter %s", filter->name);
293     filter->data = config;
294     return true;
295 }
296
297 static void strlist_filter_destructor(filter_t *filter)
298 {
299     strlist_config_t *config = filter->data;
300     strlist_config_delete(&config);
301     filter->data = config;
302 }
303
304 static filter_result_t strlist_filter(const filter_t *filter, const query_t *query)
305 {
306     char reverse[BUFSIZ];
307     char normal[BUFSIZ];
308     const strlist_config_t *config = filter->data;
309     int sum = 0;
310     if (config->is_email && 
311         ((config->match_sender && query->state < SMTP_MAIL)
312         || (config->match_recipient && query->state != SMTP_RCPT))) {
313         syslog(LOG_WARNING, "trying to match an email against a field that is not "
314                "available in current protocol state");
315         return HTK_ABORT;
316     } else if (config->is_hostname && config->match_helo && query->state < SMTP_HELO) {
317         syslog(LOG_WARNING, "trying to match hostname against helo before helo "
318                "is received");
319         return HTK_ABORT;
320     }
321 #define LOOKUP(Flag, Field)                                                    \
322     if (config->match_ ## Flag) {                                              \
323         const int len = m_strlen(query->Field);                                \
324         strlist_copy(normal, query->Field, len, false);                        \
325         strlist_copy(reverse, query->Field, len, true);                        \
326         for (uint32_t i = 0 ; i < config->tries.len ; ++i) {                   \
327             const int weight   = array_elt(config->weights, i);                \
328             const trie_t *trie = array_elt(config->tries, i);                  \
329             const bool rev = array_elt(config->reverses, i);                   \
330             if (trie_lookup(trie, rev ? reverse : normal)) {                   \
331                 sum += weight;                                                 \
332             }                                                                  \
333         }                                                                      \
334     }
335     if (config->is_email) {
336         LOOKUP(sender, sender);
337         LOOKUP(recipient, recipient);
338     } else if (config->is_hostname) {
339         LOOKUP(helo, helo_name);
340         LOOKUP(client, client_name);
341         LOOKUP(reverse, reverse_client_name);
342     }
343 #undef  LOOKUP
344     if (sum >= config->hard_threshold) {
345         return HTK_HARD_MATCH;
346     } else if (sum >= config->soft_threshold) {
347         return HTK_SOFT_MATCH;
348     } else {
349         return HTK_FAIL;
350     }
351 }
352
353 static int strlist_init(void)
354 {
355     filter_type_t type =  filter_register("strlist", strlist_filter_constructor,
356                                           strlist_filter_destructor, strlist_filter);
357     /* Hooks.
358      */
359     (void)filter_hook_register(type, "abort");
360     (void)filter_hook_register(type, "error");
361     (void)filter_hook_register(type, "fail");
362     (void)filter_hook_register(type, "hard_match");
363     (void)filter_hook_register(type, "soft_match");
364
365     /* Parameters.
366      */
367     (void)filter_param_register(type, "file");
368     (void)filter_param_register(type, "hard_threshold");
369     (void)filter_param_register(type, "soft_threshold");
370     (void)filter_param_register(type, "fields");
371     return 0;
372 }
373 module_init(strlist_init);