Add hostname match on recipient_domain and sender_domain.
[apps/pfixtools.git] / postlicyd / strlist.c
1 /******************************************************************************/
2 /*          pfixtools: a collection of postfix related tools                  */
3 /*          ~~~~~~~~~                                                         */
4 /*  ________________________________________________________________________  */
5 /*                                                                            */
6 /*  Redistribution and use in source and binary forms, with or without        */
7 /*  modification, are permitted provided that the following conditions        */
8 /*  are met:                                                                  */
9 /*                                                                            */
10 /*  1. Redistributions of source code must retain the above copyright         */
11 /*     notice, this list of conditions and the following disclaimer.          */
12 /*  2. Redistributions in binary form must reproduce the above copyright      */
13 /*     notice, this list of conditions and the following disclaimer in the    */
14 /*     documentation and/or other materials provided with the distribution.   */
15 /*  3. The names of its contributors may not be used to endorse or promote    */
16 /*     products derived from this software without specific prior written     */
17 /*     permission.                                                            */
18 /*                                                                            */
19 /*  THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND   */
20 /*  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE     */
21 /*  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR        */
22 /*  PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS    */
23 /*  BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR    */
24 /*  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF      */
25 /*  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS  */
26 /*  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN   */
27 /*  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)   */
28 /*  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF    */
29 /*  THE POSSIBILITY OF SUCH DAMAGE.                                           */
30 /******************************************************************************/
31
32 /*
33  * Copyright © 2008 Florent Bruneau
34  */
35
36 #include "filter.h"
37 #include "trie.h"
38 #include "file.h"
39 #include "str.h"
40 #include "policy_tokens.h"
41
42 typedef struct strlist_config_t {
43     PA(trie_t) tries;
44     A(int)     weights;
45     A(bool)    reverses;
46     A(bool)    partiales;
47
48     int soft_threshold;
49     int hard_threshold;
50
51     unsigned is_email         :1;
52     unsigned is_hostname      :1;
53
54     unsigned match_sender     :1;
55     unsigned match_recipient  :1;
56
57     unsigned match_helo       :1;
58     unsigned match_client     :1;
59     unsigned match_reverse    :1;
60 } strlist_config_t;
61
62
63 static strlist_config_t *strlist_config_new(void)
64 {
65     return p_new(strlist_config_t, 1);
66 }
67
68 static void strlist_config_delete(strlist_config_t **config)
69 {
70     if (*config) {
71         array_deep_wipe((*config)->tries, trie_delete);
72         array_wipe((*config)->weights);
73         array_wipe((*config)->reverses);
74         array_wipe((*config)->partiales);
75         p_delete(config);
76     }
77 }
78
79 static inline void strlist_copy(char *dest, const char *str, ssize_t str_len,
80                                 bool reverse)
81 {
82     if (str_len > 0) {
83         if (reverse) {
84             for (const char *src = str + str_len - 1 ; src >= str ; --src) {
85                 *dest = ascii_tolower(*src);
86                 ++dest;
87             }
88         } else {
89             for (int i = 0 ; i < str_len ; ++i) {
90                 *dest = ascii_tolower(str[i]);
91                 ++dest;
92             }
93         }
94     }
95     *dest = '\0';
96 }
97
98
99 static trie_t *strlist_create(const char *file, bool reverse, bool lock)
100 {
101     trie_t *db;
102     file_map_t map;
103     const char *p, *end;
104     char line[BUFSIZ];
105
106     if (!file_map_open(&map, file, false)) {
107         return NULL;
108     }
109     p   = map.map;
110     end = map.end;
111     while (end > p && end[-1] != '\n') {
112         --end;
113     }
114     if (end != map.end) {
115         warn("file %s miss a final \\n, ignoring last line",
116              file);
117     }
118
119     db = trie_new();
120     while (p < end && p != NULL) {
121         const char *eol = (char *)memchr(p, '\n', end - p);
122         if (eol == NULL) {
123             eol = end;
124         }
125         if (eol - p >= BUFSIZ) {
126             err("unreasonnable long line");
127             file_map_close(&map);
128             trie_delete(&db);
129             return NULL;
130         }
131         if (*p != '#') {
132             const char *eos = eol;
133             while (p < eos && isspace(*p)) {
134                 ++p;
135             }
136             while (p < eos && isspace(eos[-1])) {
137                 --eos;
138             }
139             if (p < eos) {
140                 strlist_copy(line, p, eos - p, reverse);
141                 trie_insert(db, line);
142             }
143         }
144         p = eol + 1;
145     }
146     file_map_close(&map);
147     trie_compile(db, lock);
148     return db;
149 }
150
151
152 static bool strlist_filter_constructor(filter_t *filter)
153 {
154     strlist_config_t *config = strlist_config_new();
155
156 #define PARSE_CHECK(Expr, Str, ...)                                            \
157     if (!(Expr)) {                                                             \
158         err(Str, ##__VA_ARGS__);                                               \
159         strlist_config_delete(&config);                                        \
160         return false;                                                          \
161     }
162
163     config->hard_threshold = 1;
164     config->soft_threshold = 1;
165     foreach (filter_param_t *param, filter->params) {
166         switch (param->type) {
167           /* file parameter is:
168            *  [no]lock:(prefix|suffix):weight:filename
169            *  valid options are:
170            *    - lock:   memlock the database in memory.
171            *    - nolock: don't memlock the database in memory.
172            *    - prefix: perform "prefix" compression on storage.
173            *    - suffix  perform "suffix" compression on storage.
174            *    - \d+:    a number describing the weight to give to the match
175            *              the given list [mandatory]
176            *  the file pointed by filename MUST be a valid string list (one string per
177            *  line, empty lines and lines beginning with a '#' are ignored).
178            */
179           case ATK_FILE: {
180             bool lock = false;
181             int  weight = 0;
182             bool reverse = false;
183             bool partial = false;
184             trie_t *trie = NULL;
185             const char *current = param->value;
186             const char *p = m_strchrnul(param->value, ':');
187             char *next = NULL;
188             for (int i = 0 ; i < 4 ; ++i) {
189                 PARSE_CHECK(i == 3 || *p,
190                             "file parameter must contains a locking state "
191                             "and a weight option");
192                 switch (i) {
193                   case 0:
194                     if ((p - current) == 4 && strncmp(current, "lock", 4) == 0) {
195                         lock = true;
196                     } else if ((p - current) == 6 && strncmp(current, "nolock", 6) == 0) {
197                         lock = false;
198                     } else {
199                         PARSE_CHECK(false, "illegal locking state %.*s",
200                                     p - current, current);
201                     }
202                     break;
203
204                   case 1:
205                     if (p - current > (ssize_t)strlen("partial-") 
206                         && strncmp(current, "partial-", strlen("partial-")) == 0) {
207                         partial = true;
208                         current += strlen("partial-");
209                     }
210                     if ((p - current) == 6 && strncmp(current, "suffix", 6) == 0) {
211                         reverse = true;
212                     } else if ((p - current) == 6 && strncmp(current, "prefix", 6) == 0) {
213                         reverse = false;
214                     } else {
215                         PARSE_CHECK(false, "illegal character order value %.*s",
216                                     p - current, current);
217                     }
218                     break;
219
220                   case 2:
221                     weight = strtol(current, &next, 10);
222                     PARSE_CHECK(next == p && weight >= 0 && weight <= 1024,
223                                 "illegal weight value %.*s",
224                                 (p - current), current);
225                     break;
226
227                   case 3:
228                     trie = strlist_create(current, reverse, lock);
229                     PARSE_CHECK(trie != NULL,
230                                 "cannot load string list from %s", current);
231                     array_add(config->tries, trie);
232                     array_add(config->weights, weight);
233                     array_add(config->reverses, reverse);
234                     array_add(config->partiales, partial);
235                     break;
236                 }
237                 if (i != 3) {
238                     current = p + 1;
239                     p = m_strchrnul(current, ':');
240                 }
241             }
242           } break;
243
244           /* hard_threshold parameter is an integer.
245            *  If the matching score is greater or equal than this threshold,
246            *  the hook "hard_match" is called.
247            * hard_threshold = 1 means, that all matches are hard matches.
248            * default is 1;
249            */
250           FILTER_PARAM_PARSE_INT(HARD_THRESHOLD, config->hard_threshold);
251
252           /* soft_threshold parameter is an integer.
253            *  if the matching score is greater or equal than this threshold
254            *  and smaller or equal than the hard_threshold, the hook "soft_match"
255            *  is called.
256            * default is 1;
257            */
258           FILTER_PARAM_PARSE_INT(SOFT_THRESHOLD, config->soft_threshold);
259
260           /* fields to match againes:
261            *  fields = field_name(,field_name)*
262            *  field_names are
263            *    - hostname: helo_name,client_name,reverse_client_name
264            *    - email: sender,recipient
265            */
266           case ATK_FIELDS: {
267             const char *current = param->value;
268             const char *p = m_strchrnul(param->value, ',');
269             do {
270                 postlicyd_token tok = policy_tokenize(current, p - current);
271                 switch (tok) {
272 #define           CASE(Up, Low, Type)                                          \
273                   case PTK_ ## Up:                                             \
274                     config->match_ ## Low = true;                              \
275                     config->is_ ## Type = true;                                \
276                     break
277                   CASE(HELO_NAME, helo, hostname);
278                   CASE(CLIENT_NAME, client, hostname);
279                   CASE(REVERSE_CLIENT_NAME, reverse, hostname);
280                   CASE(SENDER_DOMAIN, sender, hostname);
281                   CASE(RECIPIENT_DOMAIN, recipient, hostname);
282                   CASE(SENDER, sender, email);
283                   CASE(RECIPIENT, recipient, email);
284 #undef CASE
285                   default:
286                     PARSE_CHECK(false, "unknown field %.*s", p - current, current);
287                     break;
288                 }
289                 if (!*p) {
290                     break;
291                 }
292                 current = p + 1;
293                 p = m_strchrnul(current, ',');
294             } while (true);
295           } break;
296
297           default: break;
298         }
299     }}
300
301     PARSE_CHECK(config->is_email != config->is_hostname,
302                 "matched field MUST be emails XOR hostnames");
303     PARSE_CHECK(config->tries.len,
304                 "no file parameter in the filter %s", filter->name);
305     filter->data = config;
306     return true;
307 }
308
309 static void strlist_filter_destructor(filter_t *filter)
310 {
311     strlist_config_t *config = filter->data;
312     strlist_config_delete(&config);
313     filter->data = config;
314 }
315
316 static filter_result_t strlist_filter(const filter_t *filter, const query_t *query)
317 {
318     char reverse[BUFSIZ];
319     char normal[BUFSIZ];
320     const strlist_config_t *config = filter->data;
321     int sum = 0;
322     if (config->is_email && 
323         ((config->match_sender && query->state < SMTP_MAIL)
324         || (config->match_recipient && query->state != SMTP_RCPT))) {
325         warn("trying to match an email against a field that is not "
326              "available in current protocol state");
327         return HTK_ABORT;
328     } else if (config->is_hostname && config->match_helo && query->state < SMTP_HELO) {
329         warn("trying to match hostname against helo before helo is received");
330         return HTK_ABORT;
331     }
332 #define LOOKUP(Flag, Field)                                                    \
333     if (config->match_ ## Flag) {                                              \
334         const int len = m_strlen(query->Field);                                \
335         strlist_copy(normal, query->Field, len, false);                        \
336         strlist_copy(reverse, query->Field, len, true);                        \
337         for (uint32_t i = 0 ; i < config->tries.len ; ++i) {                   \
338             const int weight   = array_elt(config->weights, i);                \
339             const trie_t *trie = array_elt(config->tries, i);                  \
340             const bool rev     = array_elt(config->reverses, i);               \
341             const bool part    = array_elt(config->partiales, i);              \
342             if ((!part && trie_lookup(trie, rev ? reverse : normal))           \
343                 || (part && trie_prefix(trie, rev ? reverse : normal))) {      \
344                 sum += weight;                                                 \
345                 if (sum >= config->hard_threshold) {                           \
346                     return HTK_HARD_MATCH;                                     \
347                 }                                                              \
348             }                                                                  \
349         }                                                                      \
350     }
351     if (config->is_email) {
352         LOOKUP(sender, sender);
353         LOOKUP(recipient, recipient);
354     } else if (config->is_hostname) {
355         LOOKUP(helo, helo_name);
356         LOOKUP(client, client_name);
357         LOOKUP(reverse, reverse_client_name);
358         LOOKUP(recipient, recipient_domain);
359         LOOKUP(sender, sender_domain);
360     }
361 #undef  LOOKUP
362     if (sum >= config->hard_threshold) {
363         return HTK_HARD_MATCH;
364     } else if (sum >= config->soft_threshold) {
365         return HTK_SOFT_MATCH;
366     } else {
367         return HTK_FAIL;
368     }
369 }
370
371 static int strlist_init(void)
372 {
373     filter_type_t type =  filter_register("strlist", strlist_filter_constructor,
374                                           strlist_filter_destructor, strlist_filter);
375     /* Hooks.
376      */
377     (void)filter_hook_register(type, "abort");
378     (void)filter_hook_register(type, "error");
379     (void)filter_hook_register(type, "fail");
380     (void)filter_hook_register(type, "hard_match");
381     (void)filter_hook_register(type, "soft_match");
382
383     /* Parameters.
384      */
385     (void)filter_param_register(type, "file");
386     (void)filter_param_register(type, "hard_threshold");
387     (void)filter_param_register(type, "soft_threshold");
388     (void)filter_param_register(type, "fields");
389     return 0;
390 }
391 module_init(strlist_init);