Store param value length. Fix off-by-one.
[apps/pfixtools.git] / postlicyd / strlist.c
1 /******************************************************************************/
2 /*          pfixtools: a collection of postfix related tools                  */
3 /*          ~~~~~~~~~                                                         */
4 /*  ________________________________________________________________________  */
5 /*                                                                            */
6 /*  Redistribution and use in source and binary forms, with or without        */
7 /*  modification, are permitted provided that the following conditions        */
8 /*  are met:                                                                  */
9 /*                                                                            */
10 /*  1. Redistributions of source code must retain the above copyright         */
11 /*     notice, this list of conditions and the following disclaimer.          */
12 /*  2. Redistributions in binary form must reproduce the above copyright      */
13 /*     notice, this list of conditions and the following disclaimer in the    */
14 /*     documentation and/or other materials provided with the distribution.   */
15 /*  3. The names of its contributors may not be used to endorse or promote    */
16 /*     products derived from this software without specific prior written     */
17 /*     permission.                                                            */
18 /*                                                                            */
19 /*  THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND   */
20 /*  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE     */
21 /*  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR        */
22 /*  PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS    */
23 /*  BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR    */
24 /*  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF      */
25 /*  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS  */
26 /*  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN   */
27 /*  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)   */
28 /*  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF    */
29 /*  THE POSSIBILITY OF SUCH DAMAGE.                                           */
30 /******************************************************************************/
31
32 /*
33  * Copyright © 2008 Florent Bruneau
34  */
35
36 #include "filter.h"
37 #include "trie.h"
38 #include "file.h"
39 #include "str.h"
40 #include "policy_tokens.h"
41
42 typedef struct strlist_config_t {
43     PA(trie_t) tries;
44     A(int)     weights;
45     A(bool)    reverses;
46
47     int soft_threshold;
48     int hard_threshold;
49
50     unsigned is_email         :1;
51     unsigned match_sender     :1;
52     unsigned match_recipient  :1;
53
54     unsigned is_hostname      :1;
55     unsigned match_helo       :1;
56     unsigned match_client     :1;
57     unsigned match_reverse    :1;
58 } strlist_config_t;
59
60
61 static strlist_config_t *strlist_config_new(void)
62 {
63     return p_new(strlist_config_t, 1);
64 }
65
66 static void strlist_config_delete(strlist_config_t **config)
67 {
68     if (*config) {
69         array_deep_wipe((*config)->tries, trie_delete);
70         array_wipe((*config)->weights);
71         array_wipe((*config)->reverses);
72         p_delete(config);
73     }
74 }
75
76 static inline void strlist_copy(char *dest, const char *str, ssize_t str_len,
77                                 bool reverse)
78 {
79     if (str_len > 0) {
80         if (reverse) {
81             for (const char *src = str + str_len - 1 ; src >= str ; --src) {
82                 *dest = ascii_tolower(*src);
83                 ++dest;
84             }
85         } else {
86             for (int i = 0 ; i < str_len ; ++i) {
87                 *dest = ascii_tolower(str[i]);
88                 ++dest;
89             }
90         }
91     }
92     *dest = '\0';
93 }
94
95
96 static trie_t *strlist_create(const char *file, bool reverse, bool lock)
97 {
98     trie_t *db;
99     file_map_t map;
100     const char *p, *end;
101     char line[BUFSIZ];
102
103     if (!file_map_open(&map, file, false)) {
104         return NULL;
105     }
106     p   = map.map;
107     end = map.end;
108     while (end > p && end[-1] != '\n') {
109         --end;
110     }
111     if (end != map.end) {
112         syslog(LOG_WARNING, "file %s miss a final \\n, ignoring last line",
113                file);
114     }
115
116     db = trie_new();
117     while (p < end && p != NULL) {
118         const char *eol = (char *)memchr(p, '\n', end - p);
119         if (eol == NULL) {
120             eol = end;
121         }
122         if (eol - p >= BUFSIZ) {
123             syslog(LOG_ERR, "unreasonnable long line");
124             file_map_close(&map);
125             trie_delete(&db);
126             return NULL;
127         }
128         if (*p != '#') {
129             const char *eos = eol;
130             while (p < eos && isspace(*p)) {
131                 ++p;
132             }
133             while (p < eos && isspace(eos[-1])) {
134                 --eos;
135             }
136             if (p < eos) {
137                 strlist_copy(line, p, eos - p, reverse);
138                 trie_insert(db, line);
139             }
140         }
141         p = eol + 1;
142     }
143     file_map_close(&map);
144     trie_compile(db, lock);
145     return db;
146 }
147
148
149 static bool strlist_filter_constructor(filter_t *filter)
150 {
151     strlist_config_t *config = strlist_config_new();
152
153 #define PARSE_CHECK(Expr, Str, ...)                                            \
154     if (!(Expr)) {                                                             \
155         syslog(LOG_ERR, Str, ##__VA_ARGS__);                                   \
156         strlist_config_delete(&config);                                        \
157         return false;                                                          \
158     }
159
160     config->hard_threshold = 1;
161     config->soft_threshold = 1;
162     foreach (filter_param_t *param, filter->params) {
163         switch (param->type) {
164           /* file parameter is:
165            *  [no]lock:(prefix|suffix):weight:filename
166            *  valid options are:
167            *    - lock:   memlock the database in memory.
168            *    - nolock: don't memlock the database in memory.
169            *    - prefix: perform "prefix" compression on storage.
170            *    - suffix  perform "suffix" compression on storage.
171            *    - \d+:    a number describing the weight to give to the match
172            *              the given list [mandatory]
173            *  the file pointed by filename MUST be a valid string list (one string per
174            *  line, empty lines and lines beginning with a '#' are ignored).
175            */
176           case ATK_FILE: {
177             bool lock = false;
178             int  weight = 0;
179             bool reverse = false;
180             trie_t *trie = NULL;
181             const char *current = param->value;
182             const char *p = m_strchrnul(param->value, ':');
183             char *next = NULL;
184             for (int i = 0 ; i < 4 ; ++i) {
185                 PARSE_CHECK(i == 3 || *p,
186                             "file parameter must contains a locking state "
187                             "and a weight option");
188                 switch (i) {
189                   case 0:
190                     if ((p - current) == 4 && strncmp(current, "lock", 4) == 0) {
191                         lock = true;
192                     } else if ((p - current) == 6 && strncmp(current, "nolock", 6) == 0) {
193                         lock = false;
194                     } else {
195                         PARSE_CHECK(false, "illegal locking state %.*s",
196                                     p - current, current);
197                     }
198                     break;
199
200                   case 1:
201                     if ((p - current) == 6 && strncmp(current, "suffix", 6) == 0) {
202                         reverse = true;
203                     } else if ((p - current) == 6 && strncmp(current, "prefix", 6) == 0) {
204                         reverse = false;
205                     } else {
206                         PARSE_CHECK(false, "illegal character order value %.*s",
207                                     p - current, current);
208                     }
209                     break;
210
211                   case 2:
212                     weight = strtol(current, &next, 10);
213                     PARSE_CHECK(next == p && weight >= 0 && weight <= 1024,
214                                 "illegal weight value %.*s",
215                                 (p - current), current);
216                     break;
217
218                   case 3:
219                     trie = strlist_create(current, reverse, lock);
220                     PARSE_CHECK(trie != NULL,
221                                 "cannot load string list from %s", current);
222                     trie_inspect(trie, false);
223                     array_add(config->tries, trie);
224                     array_add(config->weights, weight);
225                     array_add(config->reverses, reverse);
226                     break;
227                 }
228                 if (i != 3) {
229                     current = p + 1;
230                     p = m_strchrnul(current, ':');
231                 }
232             }
233           } break;
234
235           /* hard_threshold parameter is an integer.
236            *  If the matching score is greater or equal than this threshold,
237            *  the hook "hard_match" is called.
238            * hard_threshold = 1 means, that all matches are hard matches.
239            * default is 1;
240            */
241           FILTER_PARAM_PARSE_INT(HARD_THRESHOLD, config->hard_threshold);
242
243           /* soft_threshold parameter is an integer.
244            *  if the matching score is greater or equal than this threshold
245            *  and smaller or equal than the hard_threshold, the hook "soft_match"
246            *  is called.
247            * default is 1;
248            */
249           FILTER_PARAM_PARSE_INT(SOFT_THRESHOLD, config->soft_threshold);
250
251           /* fields to match againes:
252            *  fields = field_name(,field_name)*
253            *  field_names are
254            *    - hostname: helo_name,client_name,reverse_client_name
255            *    - email: sender,recipient
256            */
257           case ATK_FIELDS: {
258             const char *current = param->value;
259             const char *p = m_strchrnul(param->value, ',');
260             do {
261                 postlicyd_token tok = policy_tokenize(current, p - current);
262                 switch (tok) {
263 #define           CASE(Up, Low, Type)                                          \
264                   case PTK_ ## Up:                                             \
265                     config->match_ ## Low = true;                              \
266                     config->is_ ## Type = true;                                \
267                     break
268                   CASE(HELO_NAME, helo, hostname);
269                   CASE(CLIENT_NAME, client, hostname);
270                   CASE(REVERSE_CLIENT_NAME, reverse, hostname);
271                   CASE(SENDER, sender, email);
272                   CASE(RECIPIENT, recipient, email);
273 #undef CASE
274                   default:
275                     PARSE_CHECK(false, "unknown field %.*s", p - current, current);
276                     break;
277                 }
278                 if (!*p) {
279                     break;
280                 }
281                 current = p + 1;
282                 p = m_strchrnul(current, ',');
283             } while (true);
284           } break;
285
286           default: break;
287         }
288     }}
289
290     PARSE_CHECK(config->is_email != config->is_hostname,
291                 "matched field MUST be emails XOR hostnames");
292     PARSE_CHECK(config->tries.len,
293                 "no file parameter in the filter %s", filter->name);
294     filter->data = config;
295     return true;
296 }
297
298 static void strlist_filter_destructor(filter_t *filter)
299 {
300     strlist_config_t *config = filter->data;
301     strlist_config_delete(&config);
302     filter->data = config;
303 }
304
305 static filter_result_t strlist_filter(const filter_t *filter, const query_t *query)
306 {
307     char reverse[BUFSIZ];
308     char normal[BUFSIZ];
309     const strlist_config_t *config = filter->data;
310     int sum = 0;
311     if (config->is_email && 
312         ((config->match_sender && query->state < SMTP_MAIL)
313         || (config->match_recipient && query->state != SMTP_RCPT))) {
314         syslog(LOG_WARNING, "trying to match an email against a field that is not "
315                "available in current protocol state");
316         return HTK_ABORT;
317     } else if (config->is_hostname && config->match_helo && query->state < SMTP_HELO) {
318         syslog(LOG_WARNING, "trying to match hostname against helo before helo "
319                "is received");
320         return HTK_ABORT;
321     }
322 #define LOOKUP(Flag, Field)                                                    \
323     if (config->match_ ## Flag) {                                          \
324         const int len = m_strlen(query->Field);                            \
325         strlist_copy(normal, query->Field, len, false);                    \
326         strlist_copy(reverse, query->Field, len, true);                    \
327         for (int i = 0 ; i < config->tries.len ; ++i) {                    \
328             const int weight   = array_elt(config->weights, i);            \
329             const trie_t *trie = array_elt(config->tries, i);              \
330             const bool rev = array_elt(config->reverses, i);               \
331             if (trie_lookup(trie, rev ? reverse : normal)) {               \
332                 sum += weight;                                             \
333             }                                                              \
334         }                                                                  \
335     }
336     if (config->is_email) {
337         LOOKUP(sender, sender);
338         LOOKUP(recipient, recipient);
339     } else if (config->is_hostname) {
340         LOOKUP(helo, helo_name);
341         LOOKUP(client, client_name);
342         LOOKUP(reverse, reverse_client_name);
343     }
344 #undef  LOOKUP
345     if (sum >= config->hard_threshold) {
346         return HTK_HARD_MATCH;
347     } else if (sum >= config->soft_threshold) {
348         return HTK_SOFT_MATCH;
349     } else {
350         return HTK_FAIL;
351     }
352 }
353
354 static int strlist_init(void)
355 {
356     filter_type_t type =  filter_register("strlist", strlist_filter_constructor,
357                                           strlist_filter_destructor, strlist_filter);
358     /* Hooks.
359      */
360     (void)filter_hook_register(type, "abort");
361     (void)filter_hook_register(type, "error");
362     (void)filter_hook_register(type, "fail");
363     (void)filter_hook_register(type, "hard_match");
364     (void)filter_hook_register(type, "soft_match");
365
366     /* Parameters.
367      */
368     (void)filter_param_register(type, "file");
369     (void)filter_param_register(type, "hard_threshold");
370     (void)filter_param_register(type, "soft_threshold");
371     (void)filter_param_register(type, "fields");
372     return 0;
373 }
374 module_init(strlist_init);