2013-11-28 05:05:19 -05:00
|
|
|
/*
|
|
|
|
|
* Pattern management functions.
|
|
|
|
|
*
|
|
|
|
|
* Copyright 2000-2013 Willy Tarreau <w@1wt.eu>
|
|
|
|
|
*
|
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
|
* modify it under the terms of the GNU General Public License
|
|
|
|
|
* as published by the Free Software Foundation; either version
|
|
|
|
|
* 2 of the License, or (at your option) any later version.
|
|
|
|
|
*
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#include <ctype.h>
|
|
|
|
|
#include <stdio.h>
|
2020-01-17 12:01:20 -05:00
|
|
|
#include <errno.h>
|
2013-11-28 05:05:19 -05:00
|
|
|
|
2023-08-22 10:52:47 -04:00
|
|
|
#include <import/ebistree.h>
|
|
|
|
|
#include <import/ebpttree.h>
|
2020-06-09 03:07:15 -04:00
|
|
|
#include <import/ebsttree.h>
|
|
|
|
|
#include <import/lru.h>
|
|
|
|
|
|
2020-05-27 06:58:42 -04:00
|
|
|
#include <haproxy/api.h>
|
2020-06-04 11:05:57 -04:00
|
|
|
#include <haproxy/global.h>
|
2020-06-04 16:01:04 -04:00
|
|
|
#include <haproxy/log.h>
|
2020-06-02 10:48:09 -04:00
|
|
|
#include <haproxy/net_helper.h>
|
2020-06-04 09:06:28 -04:00
|
|
|
#include <haproxy/pattern.h>
|
2020-06-02 11:32:26 -04:00
|
|
|
#include <haproxy/regex.h>
|
2020-06-04 09:33:47 -04:00
|
|
|
#include <haproxy/sample.h>
|
2020-06-09 03:07:15 -04:00
|
|
|
#include <haproxy/tools.h>
|
2021-09-11 11:51:13 -04:00
|
|
|
#include <haproxy/xxhash.h>
|
2013-11-28 05:05:19 -05:00
|
|
|
|
|
|
|
|
|
2021-04-10 11:44:27 -04:00
|
|
|
const char *const pat_match_names[PAT_MATCH_NUM] = {
|
2013-11-28 12:22:00 -05:00
|
|
|
[PAT_MATCH_FOUND] = "found",
|
|
|
|
|
[PAT_MATCH_BOOL] = "bool",
|
|
|
|
|
[PAT_MATCH_INT] = "int",
|
|
|
|
|
[PAT_MATCH_IP] = "ip",
|
|
|
|
|
[PAT_MATCH_BIN] = "bin",
|
|
|
|
|
[PAT_MATCH_LEN] = "len",
|
|
|
|
|
[PAT_MATCH_STR] = "str",
|
|
|
|
|
[PAT_MATCH_BEG] = "beg",
|
|
|
|
|
[PAT_MATCH_SUB] = "sub",
|
|
|
|
|
[PAT_MATCH_DIR] = "dir",
|
|
|
|
|
[PAT_MATCH_DOM] = "dom",
|
|
|
|
|
[PAT_MATCH_END] = "end",
|
|
|
|
|
[PAT_MATCH_REG] = "reg",
|
2016-02-10 16:55:20 -05:00
|
|
|
[PAT_MATCH_REGM] = "regm",
|
2013-11-28 05:05:19 -05:00
|
|
|
};
|
|
|
|
|
|
2021-04-10 11:44:27 -04:00
|
|
|
int (*const pat_parse_fcts[PAT_MATCH_NUM])(const char *, struct pattern *, int, char **) = {
|
2013-11-28 12:22:00 -05:00
|
|
|
[PAT_MATCH_FOUND] = pat_parse_nothing,
|
|
|
|
|
[PAT_MATCH_BOOL] = pat_parse_nothing,
|
|
|
|
|
[PAT_MATCH_INT] = pat_parse_int,
|
|
|
|
|
[PAT_MATCH_IP] = pat_parse_ip,
|
|
|
|
|
[PAT_MATCH_BIN] = pat_parse_bin,
|
2014-01-27 08:19:53 -05:00
|
|
|
[PAT_MATCH_LEN] = pat_parse_int,
|
2013-11-28 12:22:00 -05:00
|
|
|
[PAT_MATCH_STR] = pat_parse_str,
|
|
|
|
|
[PAT_MATCH_BEG] = pat_parse_str,
|
|
|
|
|
[PAT_MATCH_SUB] = pat_parse_str,
|
|
|
|
|
[PAT_MATCH_DIR] = pat_parse_str,
|
|
|
|
|
[PAT_MATCH_DOM] = pat_parse_str,
|
|
|
|
|
[PAT_MATCH_END] = pat_parse_str,
|
|
|
|
|
[PAT_MATCH_REG] = pat_parse_reg,
|
2016-02-10 16:55:20 -05:00
|
|
|
[PAT_MATCH_REGM] = pat_parse_reg,
|
2013-11-28 05:05:19 -05:00
|
|
|
};
|
|
|
|
|
|
2021-04-10 11:44:27 -04:00
|
|
|
int (*const pat_index_fcts[PAT_MATCH_NUM])(struct pattern_expr *, struct pattern *, char **) = {
|
2013-12-13 09:12:32 -05:00
|
|
|
[PAT_MATCH_FOUND] = pat_idx_list_val,
|
|
|
|
|
[PAT_MATCH_BOOL] = pat_idx_list_val,
|
|
|
|
|
[PAT_MATCH_INT] = pat_idx_list_val,
|
|
|
|
|
[PAT_MATCH_IP] = pat_idx_tree_ip,
|
|
|
|
|
[PAT_MATCH_BIN] = pat_idx_list_ptr,
|
|
|
|
|
[PAT_MATCH_LEN] = pat_idx_list_val,
|
|
|
|
|
[PAT_MATCH_STR] = pat_idx_tree_str,
|
2014-05-10 02:53:48 -04:00
|
|
|
[PAT_MATCH_BEG] = pat_idx_tree_pfx,
|
2013-12-13 09:12:32 -05:00
|
|
|
[PAT_MATCH_SUB] = pat_idx_list_str,
|
|
|
|
|
[PAT_MATCH_DIR] = pat_idx_list_str,
|
|
|
|
|
[PAT_MATCH_DOM] = pat_idx_list_str,
|
|
|
|
|
[PAT_MATCH_END] = pat_idx_list_str,
|
|
|
|
|
[PAT_MATCH_REG] = pat_idx_list_reg,
|
2016-02-10 16:55:20 -05:00
|
|
|
[PAT_MATCH_REGM] = pat_idx_list_regm,
|
2013-12-13 09:12:32 -05:00
|
|
|
};
|
|
|
|
|
|
2021-04-10 11:44:27 -04:00
|
|
|
void (*const pat_prune_fcts[PAT_MATCH_NUM])(struct pattern_expr *) = {
|
2020-11-02 13:26:02 -05:00
|
|
|
[PAT_MATCH_FOUND] = pat_prune_gen,
|
|
|
|
|
[PAT_MATCH_BOOL] = pat_prune_gen,
|
|
|
|
|
[PAT_MATCH_INT] = pat_prune_gen,
|
|
|
|
|
[PAT_MATCH_IP] = pat_prune_gen,
|
|
|
|
|
[PAT_MATCH_BIN] = pat_prune_gen,
|
|
|
|
|
[PAT_MATCH_LEN] = pat_prune_gen,
|
|
|
|
|
[PAT_MATCH_STR] = pat_prune_gen,
|
|
|
|
|
[PAT_MATCH_BEG] = pat_prune_gen,
|
|
|
|
|
[PAT_MATCH_SUB] = pat_prune_gen,
|
|
|
|
|
[PAT_MATCH_DIR] = pat_prune_gen,
|
|
|
|
|
[PAT_MATCH_DOM] = pat_prune_gen,
|
|
|
|
|
[PAT_MATCH_END] = pat_prune_gen,
|
|
|
|
|
[PAT_MATCH_REG] = pat_prune_gen,
|
|
|
|
|
[PAT_MATCH_REGM] = pat_prune_gen,
|
2014-01-14 10:24:51 -05:00
|
|
|
};
|
|
|
|
|
|
2021-04-10 11:44:27 -04:00
|
|
|
struct pattern *(*const pat_match_fcts[PAT_MATCH_NUM])(struct sample *, struct pattern_expr *, int) = {
|
2013-11-28 12:22:00 -05:00
|
|
|
[PAT_MATCH_FOUND] = NULL,
|
|
|
|
|
[PAT_MATCH_BOOL] = pat_match_nothing,
|
|
|
|
|
[PAT_MATCH_INT] = pat_match_int,
|
|
|
|
|
[PAT_MATCH_IP] = pat_match_ip,
|
|
|
|
|
[PAT_MATCH_BIN] = pat_match_bin,
|
|
|
|
|
[PAT_MATCH_LEN] = pat_match_len,
|
|
|
|
|
[PAT_MATCH_STR] = pat_match_str,
|
|
|
|
|
[PAT_MATCH_BEG] = pat_match_beg,
|
|
|
|
|
[PAT_MATCH_SUB] = pat_match_sub,
|
|
|
|
|
[PAT_MATCH_DIR] = pat_match_dir,
|
|
|
|
|
[PAT_MATCH_DOM] = pat_match_dom,
|
|
|
|
|
[PAT_MATCH_END] = pat_match_end,
|
|
|
|
|
[PAT_MATCH_REG] = pat_match_reg,
|
2016-02-10 16:55:20 -05:00
|
|
|
[PAT_MATCH_REGM] = pat_match_regm,
|
2013-11-28 05:05:19 -05:00
|
|
|
};
|
|
|
|
|
|
2013-12-06 09:36:54 -05:00
|
|
|
/* Just used for checking configuration compatibility */
|
2021-04-10 11:44:27 -04:00
|
|
|
int const pat_match_types[PAT_MATCH_NUM] = {
|
2015-07-06 17:43:03 -04:00
|
|
|
[PAT_MATCH_FOUND] = SMP_T_SINT,
|
|
|
|
|
[PAT_MATCH_BOOL] = SMP_T_SINT,
|
|
|
|
|
[PAT_MATCH_INT] = SMP_T_SINT,
|
2013-12-06 09:36:54 -05:00
|
|
|
[PAT_MATCH_IP] = SMP_T_ADDR,
|
2013-12-16 18:20:33 -05:00
|
|
|
[PAT_MATCH_BIN] = SMP_T_BIN,
|
|
|
|
|
[PAT_MATCH_LEN] = SMP_T_STR,
|
|
|
|
|
[PAT_MATCH_STR] = SMP_T_STR,
|
|
|
|
|
[PAT_MATCH_BEG] = SMP_T_STR,
|
|
|
|
|
[PAT_MATCH_SUB] = SMP_T_STR,
|
|
|
|
|
[PAT_MATCH_DIR] = SMP_T_STR,
|
|
|
|
|
[PAT_MATCH_DOM] = SMP_T_STR,
|
|
|
|
|
[PAT_MATCH_END] = SMP_T_STR,
|
|
|
|
|
[PAT_MATCH_REG] = SMP_T_STR,
|
2016-02-10 16:55:20 -05:00
|
|
|
[PAT_MATCH_REGM] = SMP_T_STR,
|
2013-12-06 09:36:54 -05:00
|
|
|
};
|
|
|
|
|
|
2014-01-17 09:25:13 -05:00
|
|
|
/* this struct is used to return information */
|
2017-07-03 05:34:05 -04:00
|
|
|
static THREAD_LOCAL struct pattern static_pattern;
|
|
|
|
|
static THREAD_LOCAL struct sample_data static_sample_data;
|
2014-01-17 09:25:13 -05:00
|
|
|
|
2014-02-11 05:31:40 -05:00
|
|
|
/* This is the root of the list of all pattern_ref avalaibles. */
|
|
|
|
|
struct list pattern_reference = LIST_HEAD_INIT(pattern_reference);
|
|
|
|
|
|
2019-10-23 00:59:31 -04:00
|
|
|
static THREAD_LOCAL struct lru64_head *pat_lru_tree;
|
2021-04-10 11:42:04 -04:00
|
|
|
static unsigned long long pat_lru_seed __read_mostly;
|
2015-04-29 10:24:50 -04:00
|
|
|
|
2013-11-28 05:05:19 -05:00
|
|
|
/*
|
2014-01-21 05:25:41 -05:00
|
|
|
*
|
|
|
|
|
* The following functions are not exported and are used by internals process
|
|
|
|
|
* of pattern matching
|
|
|
|
|
*
|
2013-11-28 05:05:19 -05:00
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
/* Background: Fast way to find a zero byte in a word
|
|
|
|
|
* http://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord
|
|
|
|
|
* hasZeroByte = (v - 0x01010101UL) & ~v & 0x80808080UL;
|
|
|
|
|
*
|
|
|
|
|
* To look for 4 different byte values, xor the word with those bytes and
|
|
|
|
|
* then check for zero bytes:
|
|
|
|
|
*
|
|
|
|
|
* v = (((unsigned char)c * 0x1010101U) ^ delimiter)
|
|
|
|
|
* where <delimiter> is the 4 byte values to look for (as an uint)
|
|
|
|
|
* and <c> is the character that is being tested
|
|
|
|
|
*/
|
|
|
|
|
static inline unsigned int is_delimiter(unsigned char c, unsigned int mask)
|
|
|
|
|
{
|
|
|
|
|
mask ^= (c * 0x01010101); /* propagate the char to all 4 bytes */
|
|
|
|
|
return (mask - 0x01010101) & ~mask & 0x80808080U;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline unsigned int make_4delim(unsigned char d1, unsigned char d2, unsigned char d3, unsigned char d4)
|
|
|
|
|
{
|
|
|
|
|
return d1 << 24 | d2 << 16 | d3 << 8 | d4;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2014-01-21 05:25:41 -05:00
|
|
|
/*
|
|
|
|
|
*
|
|
|
|
|
* These functions are exported and may be used by any other component.
|
|
|
|
|
*
|
2014-08-29 09:19:33 -04:00
|
|
|
* The following functions are used for parsing pattern matching input value.
|
|
|
|
|
* The <text> contain the string to be parsed. <pattern> must be a preallocated
|
|
|
|
|
* pattern. The pat_parse_* functions fill this structure with the parsed value.
|
|
|
|
|
* <err> is filled with an error message built with memprintf() function. It is
|
|
|
|
|
* allowed to use a trash as a temporary storage for the returned pattern, as
|
|
|
|
|
* the next call after these functions will be pat_idx_*.
|
2014-01-21 05:25:41 -05:00
|
|
|
*
|
2014-08-29 09:19:33 -04:00
|
|
|
* In success case, the pat_parse_* function returns 1. If the function
|
|
|
|
|
* fails, it returns 0 and <err> is filled.
|
2014-01-21 05:25:41 -05:00
|
|
|
*/
|
2013-11-28 05:05:19 -05:00
|
|
|
|
2014-01-21 05:25:41 -05:00
|
|
|
/* ignore the current line */
|
2014-04-28 05:18:57 -04:00
|
|
|
int pat_parse_nothing(const char *text, struct pattern *pattern, int mflags, char **err)
|
2014-01-21 05:25:41 -05:00
|
|
|
{
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
2013-11-28 05:05:19 -05:00
|
|
|
|
2014-01-21 05:25:41 -05:00
|
|
|
/* Parse a string. It is allocated and duplicated. */
|
2014-04-28 05:18:57 -04:00
|
|
|
int pat_parse_str(const char *text, struct pattern *pattern, int mflags, char **err)
|
2014-01-21 05:25:41 -05:00
|
|
|
{
|
2013-12-16 18:20:33 -05:00
|
|
|
pattern->type = SMP_T_STR;
|
2013-12-13 09:36:59 -05:00
|
|
|
pattern->ptr.str = (char *)text;
|
2014-01-24 04:58:12 -05:00
|
|
|
pattern->len = strlen(text);
|
2014-01-21 05:25:41 -05:00
|
|
|
return 1;
|
2013-11-28 05:05:19 -05:00
|
|
|
}
|
|
|
|
|
|
2014-01-21 05:25:41 -05:00
|
|
|
/* Parse a binary written in hexa. It is allocated. */
|
2014-04-28 05:18:57 -04:00
|
|
|
int pat_parse_bin(const char *text, struct pattern *pattern, int mflags, char **err)
|
2013-11-28 05:05:19 -05:00
|
|
|
{
|
2018-07-13 05:56:34 -04:00
|
|
|
struct buffer *trash;
|
2013-11-28 05:05:19 -05:00
|
|
|
|
2013-12-16 18:20:33 -05:00
|
|
|
pattern->type = SMP_T_BIN;
|
2014-01-21 05:25:41 -05:00
|
|
|
trash = get_trash_chunk();
|
|
|
|
|
pattern->len = trash->size;
|
2018-07-13 04:54:26 -04:00
|
|
|
pattern->ptr.str = trash->area;
|
2014-01-24 04:58:12 -05:00
|
|
|
return !!parse_binary(text, &pattern->ptr.str, &pattern->len, err);
|
2013-11-28 05:05:19 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Parse a regex. It is allocated. */
|
2014-04-28 05:18:57 -04:00
|
|
|
int pat_parse_reg(const char *text, struct pattern *pattern, int mflags, char **err)
|
2013-11-28 05:05:19 -05:00
|
|
|
{
|
2014-01-29 13:35:16 -05:00
|
|
|
pattern->ptr.str = (char *)text;
|
2013-11-28 05:05:19 -05:00
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Parse a range of positive integers delimited by either ':' or '-'. If only
|
|
|
|
|
* one integer is read, it is set as both min and max. An operator may be
|
|
|
|
|
* specified as the prefix, among this list of 5 :
|
|
|
|
|
*
|
|
|
|
|
* 0:eq, 1:gt, 2:ge, 3:lt, 4:le
|
|
|
|
|
*
|
|
|
|
|
* The default operator is "eq". It supports range matching. Ranges are
|
|
|
|
|
* rejected for other operators. The operator may be changed at any time.
|
|
|
|
|
* The operator is stored in the 'opaque' argument.
|
|
|
|
|
*
|
|
|
|
|
* If err is non-NULL, an error message will be returned there on errors and
|
2014-01-23 11:40:34 -05:00
|
|
|
* the caller will have to free it. The function returns zero on error, and
|
|
|
|
|
* non-zero on success.
|
2013-11-28 05:05:19 -05:00
|
|
|
*
|
|
|
|
|
*/
|
2014-04-28 05:18:57 -04:00
|
|
|
int pat_parse_int(const char *text, struct pattern *pattern, int mflags, char **err)
|
2013-11-28 05:05:19 -05:00
|
|
|
{
|
2014-01-24 04:58:12 -05:00
|
|
|
const char *ptr = text;
|
2013-11-28 05:05:19 -05:00
|
|
|
|
2015-07-06 17:43:03 -04:00
|
|
|
pattern->type = SMP_T_SINT;
|
2013-12-06 13:06:43 -05:00
|
|
|
|
2014-01-23 11:40:34 -05:00
|
|
|
/* Empty string is not valid */
|
2014-01-24 04:58:12 -05:00
|
|
|
if (!*text)
|
2014-01-23 11:40:34 -05:00
|
|
|
goto not_valid_range;
|
2013-11-28 05:05:19 -05:00
|
|
|
|
2014-01-23 11:40:34 -05:00
|
|
|
/* Search ':' or '-' separator. */
|
|
|
|
|
while (*ptr != '\0' && *ptr != ':' && *ptr != '-')
|
|
|
|
|
ptr++;
|
2013-11-28 05:05:19 -05:00
|
|
|
|
2014-01-23 11:40:34 -05:00
|
|
|
/* If separator not found. */
|
|
|
|
|
if (!*ptr) {
|
2014-01-24 04:58:12 -05:00
|
|
|
if (strl2llrc(text, ptr - text, &pattern->val.range.min) != 0) {
|
|
|
|
|
memprintf(err, "'%s' is not a number", text);
|
2014-01-23 11:40:34 -05:00
|
|
|
return 0;
|
2013-11-28 05:05:19 -05:00
|
|
|
}
|
2014-01-23 11:40:34 -05:00
|
|
|
pattern->val.range.max = pattern->val.range.min;
|
|
|
|
|
pattern->val.range.min_set = 1;
|
|
|
|
|
pattern->val.range.max_set = 1;
|
|
|
|
|
return 1;
|
2013-11-28 05:05:19 -05:00
|
|
|
}
|
|
|
|
|
|
2014-01-23 11:40:34 -05:00
|
|
|
/* If the separator is the first character. */
|
2014-01-24 04:58:12 -05:00
|
|
|
if (ptr == text && *(ptr + 1) != '\0') {
|
2014-01-23 11:40:34 -05:00
|
|
|
if (strl2llrc(ptr + 1, strlen(ptr + 1), &pattern->val.range.max) != 0)
|
|
|
|
|
goto not_valid_range;
|
2013-11-28 05:05:19 -05:00
|
|
|
|
2014-01-23 11:40:34 -05:00
|
|
|
pattern->val.range.min_set = 0;
|
2013-11-28 05:05:19 -05:00
|
|
|
pattern->val.range.max_set = 1;
|
2014-01-23 11:40:34 -05:00
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* If separator is the last character. */
|
|
|
|
|
if (*(ptr + 1) == '\0') {
|
2014-01-24 04:58:12 -05:00
|
|
|
if (strl2llrc(text, ptr - text, &pattern->val.range.min) != 0)
|
2014-01-23 11:40:34 -05:00
|
|
|
goto not_valid_range;
|
|
|
|
|
|
2013-11-28 05:05:19 -05:00
|
|
|
pattern->val.range.min_set = 1;
|
|
|
|
|
pattern->val.range.max_set = 0;
|
2014-01-23 11:40:34 -05:00
|
|
|
return 1;
|
2013-11-28 05:05:19 -05:00
|
|
|
}
|
2014-01-23 11:40:34 -05:00
|
|
|
|
|
|
|
|
/* Else, parse two numbers. */
|
2014-01-24 04:58:12 -05:00
|
|
|
if (strl2llrc(text, ptr - text, &pattern->val.range.min) != 0)
|
2014-01-23 11:40:34 -05:00
|
|
|
goto not_valid_range;
|
|
|
|
|
|
|
|
|
|
if (strl2llrc(ptr + 1, strlen(ptr + 1), &pattern->val.range.max) != 0)
|
|
|
|
|
goto not_valid_range;
|
|
|
|
|
|
|
|
|
|
if (pattern->val.range.min > pattern->val.range.max)
|
|
|
|
|
goto not_valid_range;
|
|
|
|
|
|
|
|
|
|
pattern->val.range.min_set = 1;
|
|
|
|
|
pattern->val.range.max_set = 1;
|
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
|
|
not_valid_range:
|
2014-01-24 04:58:12 -05:00
|
|
|
memprintf(err, "'%s' is not a valid number range", text);
|
2014-01-23 11:40:34 -05:00
|
|
|
return 0;
|
2013-11-28 05:05:19 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Parse a range of positive 2-component versions delimited by either ':' or
|
|
|
|
|
* '-'. The version consists in a major and a minor, both of which must be
|
|
|
|
|
* smaller than 65536, because internally they will be represented as a 32-bit
|
|
|
|
|
* integer.
|
|
|
|
|
* If only one version is read, it is set as both min and max. Just like for
|
|
|
|
|
* pure integers, an operator may be specified as the prefix, among this list
|
|
|
|
|
* of 5 :
|
|
|
|
|
*
|
|
|
|
|
* 0:eq, 1:gt, 2:ge, 3:lt, 4:le
|
|
|
|
|
*
|
|
|
|
|
* The default operator is "eq". It supports range matching. Ranges are
|
|
|
|
|
* rejected for other operators. The operator may be changed at any time.
|
|
|
|
|
* The operator is stored in the 'opaque' argument. This allows constructs
|
|
|
|
|
* such as the following one :
|
|
|
|
|
*
|
|
|
|
|
* acl obsolete_ssl ssl_req_proto lt 3
|
|
|
|
|
* acl unsupported_ssl ssl_req_proto gt 3.1
|
|
|
|
|
* acl valid_ssl ssl_req_proto 3.0-3.1
|
|
|
|
|
*
|
|
|
|
|
*/
|
2014-04-28 05:18:57 -04:00
|
|
|
int pat_parse_dotted_ver(const char *text, struct pattern *pattern, int mflags, char **err)
|
2013-11-28 05:05:19 -05:00
|
|
|
{
|
2014-01-24 04:58:12 -05:00
|
|
|
const char *ptr = text;
|
2013-11-28 05:05:19 -05:00
|
|
|
|
2015-07-06 17:43:03 -04:00
|
|
|
pattern->type = SMP_T_SINT;
|
2014-01-23 11:40:34 -05:00
|
|
|
|
|
|
|
|
/* Search ':' or '-' separator. */
|
|
|
|
|
while (*ptr != '\0' && *ptr != ':' && *ptr != '-')
|
|
|
|
|
ptr++;
|
2013-11-28 05:05:19 -05:00
|
|
|
|
2014-01-23 11:40:34 -05:00
|
|
|
/* If separator not found. */
|
2014-01-24 04:58:12 -05:00
|
|
|
if (*ptr == '\0' && ptr > text) {
|
|
|
|
|
if (strl2llrc_dotted(text, ptr-text, &pattern->val.range.min) != 0) {
|
|
|
|
|
memprintf(err, "'%s' is not a dotted number", text);
|
2013-11-28 05:05:19 -05:00
|
|
|
return 0;
|
|
|
|
|
}
|
2014-01-23 11:40:34 -05:00
|
|
|
pattern->val.range.max = pattern->val.range.min;
|
|
|
|
|
pattern->val.range.min_set = 1;
|
|
|
|
|
pattern->val.range.max_set = 1;
|
|
|
|
|
return 1;
|
2013-11-28 05:05:19 -05:00
|
|
|
}
|
|
|
|
|
|
2014-01-23 11:40:34 -05:00
|
|
|
/* If the separator is the first character. */
|
2014-01-24 04:58:12 -05:00
|
|
|
if (ptr == text && *(ptr+1) != '\0') {
|
2014-01-23 11:40:34 -05:00
|
|
|
if (strl2llrc_dotted(ptr+1, strlen(ptr+1), &pattern->val.range.max) != 0) {
|
2014-01-24 04:58:12 -05:00
|
|
|
memprintf(err, "'%s' is not a valid dotted number range", text);
|
2014-01-23 11:40:34 -05:00
|
|
|
return 0;
|
2013-11-28 05:05:19 -05:00
|
|
|
}
|
2014-01-23 11:40:34 -05:00
|
|
|
pattern->val.range.min_set = 0;
|
|
|
|
|
pattern->val.range.max_set = 1;
|
|
|
|
|
return 1;
|
2013-11-28 05:05:19 -05:00
|
|
|
}
|
|
|
|
|
|
2014-01-23 11:40:34 -05:00
|
|
|
/* If separator is the last character. */
|
2014-01-24 04:58:12 -05:00
|
|
|
if (ptr == &text[strlen(text)-1]) {
|
|
|
|
|
if (strl2llrc_dotted(text, ptr-text, &pattern->val.range.min) != 0) {
|
|
|
|
|
memprintf(err, "'%s' is not a valid dotted number range", text);
|
2014-01-23 11:40:34 -05:00
|
|
|
return 0;
|
|
|
|
|
}
|
2013-11-28 05:05:19 -05:00
|
|
|
pattern->val.range.min_set = 1;
|
|
|
|
|
pattern->val.range.max_set = 0;
|
2014-01-23 11:40:34 -05:00
|
|
|
return 1;
|
2013-11-28 05:05:19 -05:00
|
|
|
}
|
2014-01-23 11:40:34 -05:00
|
|
|
|
|
|
|
|
/* Else, parse two numbers. */
|
2014-01-24 04:58:12 -05:00
|
|
|
if (strl2llrc_dotted(text, ptr-text, &pattern->val.range.min) != 0) {
|
|
|
|
|
memprintf(err, "'%s' is not a valid dotted number range", text);
|
2014-01-23 11:40:34 -05:00
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
if (strl2llrc_dotted(ptr+1, strlen(ptr+1), &pattern->val.range.max) != 0) {
|
2014-01-24 04:58:12 -05:00
|
|
|
memprintf(err, "'%s' is not a valid dotted number range", text);
|
2014-01-23 11:40:34 -05:00
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
if (pattern->val.range.min > pattern->val.range.max) {
|
2014-01-24 04:58:12 -05:00
|
|
|
memprintf(err, "'%s' is not a valid dotted number range", text);
|
2014-01-23 11:40:34 -05:00
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
pattern->val.range.min_set = 1;
|
|
|
|
|
pattern->val.range.max_set = 1;
|
|
|
|
|
return 1;
|
2013-11-28 05:05:19 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Parse an IP address and an optional mask in the form addr[/mask].
|
|
|
|
|
* The addr may either be an IPv4 address or a hostname. The mask
|
|
|
|
|
* may either be a dotted mask or a number of bits. Returns 1 if OK,
|
|
|
|
|
* otherwise 0. NOTE: IP address patterns are typed (IPV4/IPV6).
|
|
|
|
|
*/
|
2014-04-28 05:18:57 -04:00
|
|
|
int pat_parse_ip(const char *text, struct pattern *pattern, int mflags, char **err)
|
2013-11-28 05:05:19 -05:00
|
|
|
{
|
2014-04-28 05:18:57 -04:00
|
|
|
if (str2net(text, !(mflags & PAT_MF_NO_DNS) && (global.mode & MODE_STARTING),
|
2014-02-11 09:23:04 -05:00
|
|
|
&pattern->val.ipv4.addr, &pattern->val.ipv4.mask)) {
|
2013-11-28 05:05:19 -05:00
|
|
|
pattern->type = SMP_T_IPV4;
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
2014-01-24 04:58:12 -05:00
|
|
|
else if (str62net(text, &pattern->val.ipv6.addr, &pattern->val.ipv6.mask)) {
|
2013-11-28 05:05:19 -05:00
|
|
|
pattern->type = SMP_T_IPV6;
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
else {
|
2014-01-24 04:58:12 -05:00
|
|
|
memprintf(err, "'%s' is not a valid IPv4 or IPv6 address", text);
|
2013-11-28 05:05:19 -05:00
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2014-01-21 05:25:41 -05:00
|
|
|
/*
|
|
|
|
|
*
|
|
|
|
|
* These functions are exported and may be used by any other component.
|
|
|
|
|
*
|
2018-11-15 13:22:31 -05:00
|
|
|
* This function just takes a sample <smp> and checks if this sample matches
|
|
|
|
|
* with the pattern <pattern>. This function returns only PAT_MATCH or
|
2014-01-21 05:25:41 -05:00
|
|
|
* PAT_NOMATCH.
|
|
|
|
|
*
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
/* always return false */
|
2013-12-16 08:22:13 -05:00
|
|
|
struct pattern *pat_match_nothing(struct sample *smp, struct pattern_expr *expr, int fill)
|
2014-01-21 05:25:41 -05:00
|
|
|
{
|
2015-08-19 03:07:19 -04:00
|
|
|
if (smp->data.u.sint) {
|
2014-03-17 14:53:10 -04:00
|
|
|
if (fill) {
|
2015-08-19 02:35:43 -04:00
|
|
|
static_pattern.data = NULL;
|
2014-03-17 14:53:10 -04:00
|
|
|
static_pattern.ref = NULL;
|
|
|
|
|
static_pattern.type = 0;
|
|
|
|
|
static_pattern.ptr.str = NULL;
|
|
|
|
|
}
|
|
|
|
|
return &static_pattern;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
return NULL;
|
2014-01-21 05:25:41 -05:00
|
|
|
}
|
|
|
|
|
|
BUG/MEDIUM: pattern: prevent uninitialized reads in pat_match_{str,beg}
Using valgrind when running map_beg or map_str, the following error is
reported:
==242644== Conditional jump or move depends on uninitialised value(s)
==242644== at 0x2E4AB1: pat_match_str (pattern.c:457)
==242644== by 0x2E81ED: pattern_exec_match (pattern.c:2560)
==242644== by 0x343176: sample_conv_map (map.c:211)
==242644== by 0x27522F: sample_process_cnv (sample.c:1330)
==242644== by 0x2752DB: sample_process (sample.c:1373)
==242644== by 0x319917: action_store (vars.c:814)
==242644== by 0x24D451: http_req_get_intercept_rule (http_ana.c:2697)
In fact, the error is legit, because in pat_match_{beg,str}, we
dereference the buffer on len+1 to check if a value was previously set,
and then decide to force NULL-byte if it wasn't set.
But the approach is no longer compatible with current architecture:
data past str.data is not guaranteed to be initialized in the buffer.
Thus we cannot dereference the value, else we expose us to uninitialized
read errors. Moreover, the check is useless, because we systematically
set the ending byte to 0 when the conditions are met.
Finally, restoring the older value after the lookup is not relevant:
indeed, either the sample is marked as const and in such case it
is already duplicated, or the sample is not const and we forcefully add
a terminating NULL byte outside from the actual string bytes (since we're
past str.data), so as we didn't alter effective string data and that data
past str.data cannot be dereferenced anyway as it isn't guaranteed to be
initialized, there's no point in restoring previous uninitialized data.
It could be backported in all stable versions. But since this was only
detected by valgrind and isn't known to cause issues in existing
deployments, it's probably better to wait a bit before backporting it
to avoid any breakage.. although the fix should be theoretically harmless.
2024-09-06 10:33:15 -04:00
|
|
|
/* ensure the input sample can be read as a string without knowing its size,
|
|
|
|
|
* that is, ensure the terminating null byte is there
|
|
|
|
|
*
|
|
|
|
|
* The function may fail. Returns 1 on success and 0 on failure
|
|
|
|
|
*/
|
|
|
|
|
static inline int pat_match_ensure_str(struct sample *smp)
|
|
|
|
|
{
|
|
|
|
|
if (smp->data.u.str.data < smp->data.u.str.size) {
|
|
|
|
|
/* we have to force a trailing zero on the test pattern and
|
|
|
|
|
* the buffer is large enough to accommodate it. If the flag
|
|
|
|
|
* CONST is set, duplicate the string
|
|
|
|
|
*/
|
|
|
|
|
if (smp->flags & SMP_F_CONST) {
|
|
|
|
|
if (!smp_dup(smp))
|
|
|
|
|
return 0;
|
|
|
|
|
} else
|
|
|
|
|
smp->data.u.str.area[smp->data.u.str.data] = '\0';
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
/* Otherwise, the sample is duplicated. A trailing zero
|
|
|
|
|
* is automatically added to the string.
|
|
|
|
|
*/
|
|
|
|
|
if (!smp_dup(smp))
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
2014-01-21 05:25:41 -05:00
|
|
|
|
2018-11-15 13:22:31 -05:00
|
|
|
/* NB: For two strings to be identical, it is required that their length match */
|
2013-12-16 08:22:13 -05:00
|
|
|
struct pattern *pat_match_str(struct sample *smp, struct pattern_expr *expr, int fill)
|
2014-01-21 05:25:41 -05:00
|
|
|
{
|
|
|
|
|
int icase;
|
2013-12-16 08:22:13 -05:00
|
|
|
struct ebmb_node *node;
|
|
|
|
|
struct pattern_tree *elt;
|
|
|
|
|
struct pattern_list *lst;
|
|
|
|
|
struct pattern *pattern;
|
2015-04-29 10:24:50 -04:00
|
|
|
struct pattern *ret = NULL;
|
|
|
|
|
struct lru64 *lru = NULL;
|
2013-12-16 08:22:13 -05:00
|
|
|
|
|
|
|
|
/* Lookup a string in the expression's pattern tree. */
|
|
|
|
|
if (!eb_is_empty(&expr->pattern_tree)) {
|
BUG/MEDIUM: pattern: prevent uninitialized reads in pat_match_{str,beg}
Using valgrind when running map_beg or map_str, the following error is
reported:
==242644== Conditional jump or move depends on uninitialised value(s)
==242644== at 0x2E4AB1: pat_match_str (pattern.c:457)
==242644== by 0x2E81ED: pattern_exec_match (pattern.c:2560)
==242644== by 0x343176: sample_conv_map (map.c:211)
==242644== by 0x27522F: sample_process_cnv (sample.c:1330)
==242644== by 0x2752DB: sample_process (sample.c:1373)
==242644== by 0x319917: action_store (vars.c:814)
==242644== by 0x24D451: http_req_get_intercept_rule (http_ana.c:2697)
In fact, the error is legit, because in pat_match_{beg,str}, we
dereference the buffer on len+1 to check if a value was previously set,
and then decide to force NULL-byte if it wasn't set.
But the approach is no longer compatible with current architecture:
data past str.data is not guaranteed to be initialized in the buffer.
Thus we cannot dereference the value, else we expose us to uninitialized
read errors. Moreover, the check is useless, because we systematically
set the ending byte to 0 when the conditions are met.
Finally, restoring the older value after the lookup is not relevant:
indeed, either the sample is marked as const and in such case it
is already duplicated, or the sample is not const and we forcefully add
a terminating NULL byte outside from the actual string bytes (since we're
past str.data), so as we didn't alter effective string data and that data
past str.data cannot be dereferenced anyway as it isn't guaranteed to be
initialized, there's no point in restoring previous uninitialized data.
It could be backported in all stable versions. But since this was only
detected by valgrind and isn't known to cause issues in existing
deployments, it's probably better to wait a bit before backporting it
to avoid any breakage.. although the fix should be theoretically harmless.
2024-09-06 10:33:15 -04:00
|
|
|
if (!pat_match_ensure_str(smp))
|
|
|
|
|
return NULL;
|
2020-06-30 12:52:32 -04:00
|
|
|
|
2018-07-13 04:54:26 -04:00
|
|
|
node = ebst_lookup(&expr->pattern_tree, smp->data.u.str.area);
|
2020-10-29 04:41:34 -04:00
|
|
|
|
|
|
|
|
while (node) {
|
|
|
|
|
elt = ebmb_entry(node, struct pattern_tree, node);
|
|
|
|
|
if (elt->ref->gen_id != expr->ref->curr_gen) {
|
BUG/MEDIUM: pattern: only visit equivalent nodes when skipping versions
Miroslav reported in issue #1802 a problem that affects atomic map/acl
updates. During an update, incorrect versions are properly skipped, but
in order to do so, we rely on ebmb_next() instead of ebmb_next_dup().
This means that if a new matching entry is in the process of being
added and is the first one to succeed in the lookup, we'll skip it due
to its version and use the next entry regardless of its value provided
that it has the correct version. For IP addresses and string prefixes
it's particularly visible because a lookup may match a new longer prefix
that's not yet committed (e.g. 11.0.0.1 would match 11/8 when 10/7 was
the only committed one), and skipping it could end up on 12/8 for
example. As soon as a commit for the last version happens, the issue
disappears.
This problem only affects tree-based matches: the "str", "ip", and "beg"
matches.
Here we replace the ebmb_next() values with ebmb_next_dup() for exact
string matches, and with ebmb_lookup_shorter() for longest matches,
which will first visit duplicates, then look for shorter prefixes. This
relies on previous commit:
MINOR: ebtree: add ebmb_lookup_shorter() to pursue lookups
Both need to be backported to 2.4, where the generation ID was added.
Note that nowadays a simpler and more efficient approach might be employed,
by having a single version in the current tree, and a list of trees per
version. Manipulations would look up the tree version and work (and lock)
only in the relevant trees, while normal operations would be performed on
the current tree only. Committing would just be a matter of swapping tree
roots and deleting old trees contents.
2022-08-01 05:46:27 -04:00
|
|
|
node = ebmb_next_dup(node);
|
2020-10-29 04:41:34 -04:00
|
|
|
continue;
|
|
|
|
|
}
|
2013-12-16 08:22:13 -05:00
|
|
|
if (fill) {
|
2015-08-19 02:35:43 -04:00
|
|
|
static_pattern.data = elt->data;
|
2014-01-28 09:54:36 -05:00
|
|
|
static_pattern.ref = elt->ref;
|
2014-04-28 05:18:57 -04:00
|
|
|
static_pattern.sflags = PAT_SF_TREE;
|
2013-12-16 08:22:13 -05:00
|
|
|
static_pattern.type = SMP_T_STR;
|
|
|
|
|
static_pattern.ptr.str = (char *)elt->node.key;
|
|
|
|
|
}
|
|
|
|
|
return &static_pattern;
|
|
|
|
|
}
|
|
|
|
|
}
|
2014-01-21 05:25:41 -05:00
|
|
|
|
2013-12-16 08:22:13 -05:00
|
|
|
/* look in the list */
|
2023-08-22 01:22:05 -04:00
|
|
|
if (pat_lru_tree && !LIST_ISEMPTY(&expr->patterns)) {
|
2015-05-04 11:18:42 -04:00
|
|
|
unsigned long long seed = pat_lru_seed ^ (long)expr;
|
2015-04-29 10:24:50 -04:00
|
|
|
|
2020-12-22 07:22:34 -05:00
|
|
|
lru = lru64_get(XXH3(smp->data.u.str.area, smp->data.u.str.data, seed),
|
2020-11-02 09:26:51 -05:00
|
|
|
pat_lru_tree, expr, expr->ref->revision);
|
2019-10-23 00:59:31 -04:00
|
|
|
if (lru && lru->domain) {
|
2017-07-03 05:34:05 -04:00
|
|
|
ret = lru->data;
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
2015-04-29 10:24:50 -04:00
|
|
|
}
|
|
|
|
|
|
2017-07-03 05:34:05 -04:00
|
|
|
|
2013-12-16 08:22:13 -05:00
|
|
|
list_for_each_entry(lst, &expr->patterns, list) {
|
|
|
|
|
pattern = &lst->pat;
|
2014-01-21 05:25:41 -05:00
|
|
|
|
2020-10-29 04:41:34 -04:00
|
|
|
if (pattern->ref->gen_id != expr->ref->curr_gen)
|
|
|
|
|
continue;
|
|
|
|
|
|
2018-07-13 04:54:26 -04:00
|
|
|
if (pattern->len != smp->data.u.str.data)
|
2013-12-16 08:22:13 -05:00
|
|
|
continue;
|
|
|
|
|
|
2014-04-28 05:18:57 -04:00
|
|
|
icase = expr->mflags & PAT_MF_IGNORE_CASE;
|
2018-07-13 04:54:26 -04:00
|
|
|
if ((icase && strncasecmp(pattern->ptr.str, smp->data.u.str.area, smp->data.u.str.data) == 0) ||
|
|
|
|
|
(!icase && strncmp(pattern->ptr.str, smp->data.u.str.area, smp->data.u.str.data) == 0)) {
|
2015-04-29 10:24:50 -04:00
|
|
|
ret = pattern;
|
|
|
|
|
break;
|
|
|
|
|
}
|
2013-12-16 08:22:13 -05:00
|
|
|
}
|
|
|
|
|
|
2019-10-23 00:59:31 -04:00
|
|
|
if (lru)
|
2020-11-02 09:26:51 -05:00
|
|
|
lru64_commit(lru, ret, expr, expr->ref->revision, NULL);
|
2015-04-29 10:24:50 -04:00
|
|
|
|
|
|
|
|
return ret;
|
2014-01-21 05:25:41 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* NB: For two binaries buf to be identical, it is required that their lengths match */
|
2013-12-16 08:22:13 -05:00
|
|
|
struct pattern *pat_match_bin(struct sample *smp, struct pattern_expr *expr, int fill)
|
2014-01-21 05:25:41 -05:00
|
|
|
{
|
2013-12-16 08:22:13 -05:00
|
|
|
struct pattern_list *lst;
|
|
|
|
|
struct pattern *pattern;
|
2015-04-29 10:24:50 -04:00
|
|
|
struct pattern *ret = NULL;
|
|
|
|
|
struct lru64 *lru = NULL;
|
|
|
|
|
|
2023-08-22 01:22:05 -04:00
|
|
|
if (pat_lru_tree && !LIST_ISEMPTY(&expr->patterns)) {
|
2015-05-04 11:18:42 -04:00
|
|
|
unsigned long long seed = pat_lru_seed ^ (long)expr;
|
2015-04-29 10:24:50 -04:00
|
|
|
|
2020-12-22 07:22:34 -05:00
|
|
|
lru = lru64_get(XXH3(smp->data.u.str.area, smp->data.u.str.data, seed),
|
2020-11-02 09:26:51 -05:00
|
|
|
pat_lru_tree, expr, expr->ref->revision);
|
2019-10-23 00:59:31 -04:00
|
|
|
if (lru && lru->domain) {
|
2017-07-03 05:34:05 -04:00
|
|
|
ret = lru->data;
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
2015-04-29 10:24:50 -04:00
|
|
|
}
|
2014-01-21 05:25:41 -05:00
|
|
|
|
2013-12-16 08:22:13 -05:00
|
|
|
list_for_each_entry(lst, &expr->patterns, list) {
|
|
|
|
|
pattern = &lst->pat;
|
|
|
|
|
|
2020-10-29 04:41:34 -04:00
|
|
|
if (pattern->ref->gen_id != expr->ref->curr_gen)
|
|
|
|
|
continue;
|
|
|
|
|
|
2018-07-13 04:54:26 -04:00
|
|
|
if (pattern->len != smp->data.u.str.data)
|
2013-12-16 08:22:13 -05:00
|
|
|
continue;
|
|
|
|
|
|
2018-07-13 04:54:26 -04:00
|
|
|
if (memcmp(pattern->ptr.str, smp->data.u.str.area, smp->data.u.str.data) == 0) {
|
2015-04-29 10:24:50 -04:00
|
|
|
ret = pattern;
|
|
|
|
|
break;
|
|
|
|
|
}
|
2013-12-16 08:22:13 -05:00
|
|
|
}
|
|
|
|
|
|
2019-10-23 00:59:31 -04:00
|
|
|
if (lru)
|
2020-11-02 09:26:51 -05:00
|
|
|
lru64_commit(lru, ret, expr, expr->ref->revision, NULL);
|
2015-04-29 10:24:50 -04:00
|
|
|
|
|
|
|
|
return ret;
|
2014-01-21 05:25:41 -05:00
|
|
|
}
|
|
|
|
|
|
2016-02-10 16:55:20 -05:00
|
|
|
/* Executes a regex. It temporarily changes the data to add a trailing zero,
|
|
|
|
|
* and restores the previous character when leaving. This function fills
|
|
|
|
|
* a matching array.
|
|
|
|
|
*/
|
|
|
|
|
struct pattern *pat_match_regm(struct sample *smp, struct pattern_expr *expr, int fill)
|
|
|
|
|
{
|
|
|
|
|
struct pattern_list *lst;
|
|
|
|
|
struct pattern *pattern;
|
|
|
|
|
struct pattern *ret = NULL;
|
|
|
|
|
|
|
|
|
|
list_for_each_entry(lst, &expr->patterns, list) {
|
|
|
|
|
pattern = &lst->pat;
|
|
|
|
|
|
2020-10-29 04:41:34 -04:00
|
|
|
if (pattern->ref->gen_id != expr->ref->curr_gen)
|
|
|
|
|
continue;
|
|
|
|
|
|
2018-07-13 04:54:26 -04:00
|
|
|
if (regex_exec_match2(pattern->ptr.reg, smp->data.u.str.area, smp->data.u.str.data,
|
2016-02-10 16:55:20 -05:00
|
|
|
MAX_MATCH, pmatch, 0)) {
|
|
|
|
|
ret = pattern;
|
|
|
|
|
smp->ctx.a[0] = pmatch;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
2014-01-21 05:25:41 -05:00
|
|
|
/* Executes a regex. It temporarily changes the data to add a trailing zero,
|
|
|
|
|
* and restores the previous character when leaving.
|
|
|
|
|
*/
|
2013-12-16 08:22:13 -05:00
|
|
|
struct pattern *pat_match_reg(struct sample *smp, struct pattern_expr *expr, int fill)
|
2014-01-21 05:25:41 -05:00
|
|
|
{
|
2013-12-16 08:22:13 -05:00
|
|
|
struct pattern_list *lst;
|
|
|
|
|
struct pattern *pattern;
|
2015-04-29 10:24:50 -04:00
|
|
|
struct pattern *ret = NULL;
|
|
|
|
|
struct lru64 *lru = NULL;
|
|
|
|
|
|
2023-08-22 01:22:05 -04:00
|
|
|
if (pat_lru_tree && !LIST_ISEMPTY(&expr->patterns)) {
|
2015-05-04 11:18:42 -04:00
|
|
|
unsigned long long seed = pat_lru_seed ^ (long)expr;
|
2015-04-29 10:24:50 -04:00
|
|
|
|
2020-12-22 07:22:34 -05:00
|
|
|
lru = lru64_get(XXH3(smp->data.u.str.area, smp->data.u.str.data, seed),
|
2020-11-02 09:26:51 -05:00
|
|
|
pat_lru_tree, expr, expr->ref->revision);
|
2019-10-23 00:59:31 -04:00
|
|
|
if (lru && lru->domain) {
|
2017-07-03 05:34:05 -04:00
|
|
|
ret = lru->data;
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
2015-04-29 10:24:50 -04:00
|
|
|
}
|
2013-12-16 08:22:13 -05:00
|
|
|
|
|
|
|
|
list_for_each_entry(lst, &expr->patterns, list) {
|
|
|
|
|
pattern = &lst->pat;
|
|
|
|
|
|
2020-10-29 04:41:34 -04:00
|
|
|
if (pattern->ref->gen_id != expr->ref->curr_gen)
|
|
|
|
|
continue;
|
|
|
|
|
|
2018-07-13 04:54:26 -04:00
|
|
|
if (regex_exec2(pattern->ptr.reg, smp->data.u.str.area, smp->data.u.str.data)) {
|
2015-04-29 10:24:50 -04:00
|
|
|
ret = pattern;
|
|
|
|
|
break;
|
|
|
|
|
}
|
2013-12-16 08:22:13 -05:00
|
|
|
}
|
2015-04-29 10:24:50 -04:00
|
|
|
|
2019-10-23 00:59:31 -04:00
|
|
|
if (lru)
|
2020-11-02 09:26:51 -05:00
|
|
|
lru64_commit(lru, ret, expr, expr->ref->revision, NULL);
|
2015-04-29 10:24:50 -04:00
|
|
|
|
|
|
|
|
return ret;
|
2014-01-21 05:25:41 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Checks that the pattern matches the beginning of the tested string. */
|
2013-12-16 08:22:13 -05:00
|
|
|
struct pattern *pat_match_beg(struct sample *smp, struct pattern_expr *expr, int fill)
|
2014-01-21 05:25:41 -05:00
|
|
|
{
|
|
|
|
|
int icase;
|
2014-05-10 02:53:48 -04:00
|
|
|
struct ebmb_node *node;
|
|
|
|
|
struct pattern_tree *elt;
|
2013-12-16 08:22:13 -05:00
|
|
|
struct pattern_list *lst;
|
|
|
|
|
struct pattern *pattern;
|
2015-04-29 10:24:50 -04:00
|
|
|
struct pattern *ret = NULL;
|
|
|
|
|
struct lru64 *lru = NULL;
|
2014-01-21 05:25:41 -05:00
|
|
|
|
2014-05-10 02:53:48 -04:00
|
|
|
/* Lookup a string in the expression's pattern tree. */
|
|
|
|
|
if (!eb_is_empty(&expr->pattern_tree)) {
|
BUG/MEDIUM: pattern: prevent uninitialized reads in pat_match_{str,beg}
Using valgrind when running map_beg or map_str, the following error is
reported:
==242644== Conditional jump or move depends on uninitialised value(s)
==242644== at 0x2E4AB1: pat_match_str (pattern.c:457)
==242644== by 0x2E81ED: pattern_exec_match (pattern.c:2560)
==242644== by 0x343176: sample_conv_map (map.c:211)
==242644== by 0x27522F: sample_process_cnv (sample.c:1330)
==242644== by 0x2752DB: sample_process (sample.c:1373)
==242644== by 0x319917: action_store (vars.c:814)
==242644== by 0x24D451: http_req_get_intercept_rule (http_ana.c:2697)
In fact, the error is legit, because in pat_match_{beg,str}, we
dereference the buffer on len+1 to check if a value was previously set,
and then decide to force NULL-byte if it wasn't set.
But the approach is no longer compatible with current architecture:
data past str.data is not guaranteed to be initialized in the buffer.
Thus we cannot dereference the value, else we expose us to uninitialized
read errors. Moreover, the check is useless, because we systematically
set the ending byte to 0 when the conditions are met.
Finally, restoring the older value after the lookup is not relevant:
indeed, either the sample is marked as const and in such case it
is already duplicated, or the sample is not const and we forcefully add
a terminating NULL byte outside from the actual string bytes (since we're
past str.data), so as we didn't alter effective string data and that data
past str.data cannot be dereferenced anyway as it isn't guaranteed to be
initialized, there's no point in restoring previous uninitialized data.
It could be backported in all stable versions. But since this was only
detected by valgrind and isn't known to cause issues in existing
deployments, it's probably better to wait a bit before backporting it
to avoid any breakage.. although the fix should be theoretically harmless.
2024-09-06 10:33:15 -04:00
|
|
|
if (!pat_match_ensure_str(smp))
|
|
|
|
|
return NULL;
|
2020-06-30 12:52:32 -04:00
|
|
|
|
2018-07-13 04:54:26 -04:00
|
|
|
node = ebmb_lookup_longest(&expr->pattern_tree,
|
|
|
|
|
smp->data.u.str.area);
|
2014-05-10 02:53:48 -04:00
|
|
|
|
2020-10-29 04:41:34 -04:00
|
|
|
while (node) {
|
|
|
|
|
elt = ebmb_entry(node, struct pattern_tree, node);
|
|
|
|
|
if (elt->ref->gen_id != expr->ref->curr_gen) {
|
BUG/MEDIUM: pattern: only visit equivalent nodes when skipping versions
Miroslav reported in issue #1802 a problem that affects atomic map/acl
updates. During an update, incorrect versions are properly skipped, but
in order to do so, we rely on ebmb_next() instead of ebmb_next_dup().
This means that if a new matching entry is in the process of being
added and is the first one to succeed in the lookup, we'll skip it due
to its version and use the next entry regardless of its value provided
that it has the correct version. For IP addresses and string prefixes
it's particularly visible because a lookup may match a new longer prefix
that's not yet committed (e.g. 11.0.0.1 would match 11/8 when 10/7 was
the only committed one), and skipping it could end up on 12/8 for
example. As soon as a commit for the last version happens, the issue
disappears.
This problem only affects tree-based matches: the "str", "ip", and "beg"
matches.
Here we replace the ebmb_next() values with ebmb_next_dup() for exact
string matches, and with ebmb_lookup_shorter() for longest matches,
which will first visit duplicates, then look for shorter prefixes. This
relies on previous commit:
MINOR: ebtree: add ebmb_lookup_shorter() to pursue lookups
Both need to be backported to 2.4, where the generation ID was added.
Note that nowadays a simpler and more efficient approach might be employed,
by having a single version in the current tree, and a list of trees per
version. Manipulations would look up the tree version and work (and lock)
only in the relevant trees, while normal operations would be performed on
the current tree only. Committing would just be a matter of swapping tree
roots and deleting old trees contents.
2022-08-01 05:46:27 -04:00
|
|
|
node = ebmb_lookup_shorter(node);
|
2020-10-29 04:41:34 -04:00
|
|
|
continue;
|
|
|
|
|
}
|
2014-05-10 02:53:48 -04:00
|
|
|
if (fill) {
|
2015-08-19 02:35:43 -04:00
|
|
|
static_pattern.data = elt->data;
|
2014-05-10 02:53:48 -04:00
|
|
|
static_pattern.ref = elt->ref;
|
|
|
|
|
static_pattern.sflags = PAT_SF_TREE;
|
|
|
|
|
static_pattern.type = SMP_T_STR;
|
|
|
|
|
static_pattern.ptr.str = (char *)elt->node.key;
|
|
|
|
|
}
|
|
|
|
|
return &static_pattern;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* look in the list */
|
2023-08-22 01:22:05 -04:00
|
|
|
if (pat_lru_tree && !LIST_ISEMPTY(&expr->patterns)) {
|
2015-05-04 11:18:42 -04:00
|
|
|
unsigned long long seed = pat_lru_seed ^ (long)expr;
|
2015-04-29 10:24:50 -04:00
|
|
|
|
2020-12-22 07:22:34 -05:00
|
|
|
lru = lru64_get(XXH3(smp->data.u.str.area, smp->data.u.str.data, seed),
|
2020-11-02 09:26:51 -05:00
|
|
|
pat_lru_tree, expr, expr->ref->revision);
|
2019-10-23 00:59:31 -04:00
|
|
|
if (lru && lru->domain) {
|
2017-07-03 05:34:05 -04:00
|
|
|
ret = lru->data;
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
2015-04-29 10:24:50 -04:00
|
|
|
}
|
|
|
|
|
|
2013-12-16 08:22:13 -05:00
|
|
|
list_for_each_entry(lst, &expr->patterns, list) {
|
|
|
|
|
pattern = &lst->pat;
|
|
|
|
|
|
2020-10-29 04:41:34 -04:00
|
|
|
if (pattern->ref->gen_id != expr->ref->curr_gen)
|
|
|
|
|
continue;
|
|
|
|
|
|
2018-07-13 04:54:26 -04:00
|
|
|
if (pattern->len > smp->data.u.str.data)
|
2013-12-16 08:22:13 -05:00
|
|
|
continue;
|
|
|
|
|
|
2014-04-28 05:18:57 -04:00
|
|
|
icase = expr->mflags & PAT_MF_IGNORE_CASE;
|
2018-07-13 04:54:26 -04:00
|
|
|
if ((icase && strncasecmp(pattern->ptr.str, smp->data.u.str.area, pattern->len) != 0) ||
|
|
|
|
|
(!icase && strncmp(pattern->ptr.str, smp->data.u.str.area, pattern->len) != 0))
|
2013-12-16 08:22:13 -05:00
|
|
|
continue;
|
|
|
|
|
|
2015-04-29 10:24:50 -04:00
|
|
|
ret = pattern;
|
|
|
|
|
break;
|
2013-12-16 08:22:13 -05:00
|
|
|
}
|
2015-04-29 10:24:50 -04:00
|
|
|
|
2019-10-23 00:59:31 -04:00
|
|
|
if (lru)
|
2020-11-02 09:26:51 -05:00
|
|
|
lru64_commit(lru, ret, expr, expr->ref->revision, NULL);
|
2015-04-29 10:24:50 -04:00
|
|
|
|
|
|
|
|
return ret;
|
2014-01-21 05:25:41 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Checks that the pattern matches the end of the tested string. */
|
2013-12-16 08:22:13 -05:00
|
|
|
struct pattern *pat_match_end(struct sample *smp, struct pattern_expr *expr, int fill)
|
2014-01-21 05:25:41 -05:00
|
|
|
{
|
|
|
|
|
int icase;
|
2013-12-16 08:22:13 -05:00
|
|
|
struct pattern_list *lst;
|
|
|
|
|
struct pattern *pattern;
|
2015-04-29 10:24:50 -04:00
|
|
|
struct pattern *ret = NULL;
|
|
|
|
|
struct lru64 *lru = NULL;
|
|
|
|
|
|
2023-08-22 01:22:05 -04:00
|
|
|
if (pat_lru_tree && !LIST_ISEMPTY(&expr->patterns)) {
|
2015-05-04 11:18:42 -04:00
|
|
|
unsigned long long seed = pat_lru_seed ^ (long)expr;
|
2015-04-29 10:24:50 -04:00
|
|
|
|
2020-12-22 07:22:34 -05:00
|
|
|
lru = lru64_get(XXH3(smp->data.u.str.area, smp->data.u.str.data, seed),
|
2020-11-02 09:26:51 -05:00
|
|
|
pat_lru_tree, expr, expr->ref->revision);
|
2019-10-23 00:59:31 -04:00
|
|
|
if (lru && lru->domain) {
|
2017-07-03 05:34:05 -04:00
|
|
|
ret = lru->data;
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
2015-04-29 10:24:50 -04:00
|
|
|
}
|
2014-01-21 05:25:41 -05:00
|
|
|
|
2013-12-16 08:22:13 -05:00
|
|
|
list_for_each_entry(lst, &expr->patterns, list) {
|
|
|
|
|
pattern = &lst->pat;
|
|
|
|
|
|
2020-10-29 04:41:34 -04:00
|
|
|
if (pattern->ref->gen_id != expr->ref->curr_gen)
|
|
|
|
|
continue;
|
|
|
|
|
|
2018-07-13 04:54:26 -04:00
|
|
|
if (pattern->len > smp->data.u.str.data)
|
2013-12-16 08:22:13 -05:00
|
|
|
continue;
|
|
|
|
|
|
2014-04-28 05:18:57 -04:00
|
|
|
icase = expr->mflags & PAT_MF_IGNORE_CASE;
|
2018-07-13 04:54:26 -04:00
|
|
|
if ((icase && strncasecmp(pattern->ptr.str, smp->data.u.str.area + smp->data.u.str.data - pattern->len, pattern->len) != 0) ||
|
|
|
|
|
(!icase && strncmp(pattern->ptr.str, smp->data.u.str.area + smp->data.u.str.data - pattern->len, pattern->len) != 0))
|
2013-12-16 08:22:13 -05:00
|
|
|
continue;
|
|
|
|
|
|
2015-04-29 10:24:50 -04:00
|
|
|
ret = pattern;
|
|
|
|
|
break;
|
2013-12-16 08:22:13 -05:00
|
|
|
}
|
2015-04-29 10:24:50 -04:00
|
|
|
|
2019-10-23 00:59:31 -04:00
|
|
|
if (lru)
|
2020-11-02 09:26:51 -05:00
|
|
|
lru64_commit(lru, ret, expr, expr->ref->revision, NULL);
|
2015-04-29 10:24:50 -04:00
|
|
|
|
|
|
|
|
return ret;
|
2014-01-21 05:25:41 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Checks that the pattern is included inside the tested string.
|
|
|
|
|
* NB: Suboptimal, should be rewritten using a Boyer-Moore method.
|
|
|
|
|
*/
|
2013-12-16 08:22:13 -05:00
|
|
|
struct pattern *pat_match_sub(struct sample *smp, struct pattern_expr *expr, int fill)
|
2014-01-21 05:25:41 -05:00
|
|
|
{
|
|
|
|
|
int icase;
|
|
|
|
|
char *end;
|
|
|
|
|
char *c;
|
2013-12-16 08:22:13 -05:00
|
|
|
struct pattern_list *lst;
|
|
|
|
|
struct pattern *pattern;
|
2015-04-29 10:24:50 -04:00
|
|
|
struct pattern *ret = NULL;
|
|
|
|
|
struct lru64 *lru = NULL;
|
|
|
|
|
|
2023-08-22 01:22:05 -04:00
|
|
|
if (pat_lru_tree && !LIST_ISEMPTY(&expr->patterns)) {
|
2015-05-04 11:18:42 -04:00
|
|
|
unsigned long long seed = pat_lru_seed ^ (long)expr;
|
2015-04-29 10:24:50 -04:00
|
|
|
|
2020-12-22 07:22:34 -05:00
|
|
|
lru = lru64_get(XXH3(smp->data.u.str.area, smp->data.u.str.data, seed),
|
2020-11-02 09:26:51 -05:00
|
|
|
pat_lru_tree, expr, expr->ref->revision);
|
2019-10-23 00:59:31 -04:00
|
|
|
if (lru && lru->domain) {
|
2017-07-03 05:34:05 -04:00
|
|
|
ret = lru->data;
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
2015-04-29 10:24:50 -04:00
|
|
|
}
|
2014-01-21 05:25:41 -05:00
|
|
|
|
2013-12-16 08:22:13 -05:00
|
|
|
list_for_each_entry(lst, &expr->patterns, list) {
|
|
|
|
|
pattern = &lst->pat;
|
|
|
|
|
|
2020-10-29 04:41:34 -04:00
|
|
|
if (pattern->ref->gen_id != expr->ref->curr_gen)
|
|
|
|
|
continue;
|
|
|
|
|
|
2018-07-13 04:54:26 -04:00
|
|
|
if (pattern->len > smp->data.u.str.data)
|
2013-12-16 08:22:13 -05:00
|
|
|
continue;
|
|
|
|
|
|
2018-07-13 04:54:26 -04:00
|
|
|
end = smp->data.u.str.area + smp->data.u.str.data - pattern->len;
|
2014-04-28 05:18:57 -04:00
|
|
|
icase = expr->mflags & PAT_MF_IGNORE_CASE;
|
2013-12-16 08:22:13 -05:00
|
|
|
if (icase) {
|
2018-07-13 04:54:26 -04:00
|
|
|
for (c = smp->data.u.str.area; c <= end; c++) {
|
2020-07-05 15:46:32 -04:00
|
|
|
if (tolower((unsigned char)*c) != tolower((unsigned char)*pattern->ptr.str))
|
2013-12-16 08:22:13 -05:00
|
|
|
continue;
|
2015-04-29 10:24:50 -04:00
|
|
|
if (strncasecmp(pattern->ptr.str, c, pattern->len) == 0) {
|
|
|
|
|
ret = pattern;
|
|
|
|
|
goto leave;
|
|
|
|
|
}
|
2013-12-16 08:22:13 -05:00
|
|
|
}
|
|
|
|
|
} else {
|
2018-07-13 04:54:26 -04:00
|
|
|
for (c = smp->data.u.str.area; c <= end; c++) {
|
2013-12-16 08:22:13 -05:00
|
|
|
if (*c != *pattern->ptr.str)
|
|
|
|
|
continue;
|
2015-04-29 10:24:50 -04:00
|
|
|
if (strncmp(pattern->ptr.str, c, pattern->len) == 0) {
|
|
|
|
|
ret = pattern;
|
|
|
|
|
goto leave;
|
|
|
|
|
}
|
2013-12-16 08:22:13 -05:00
|
|
|
}
|
2014-01-21 05:25:41 -05:00
|
|
|
}
|
|
|
|
|
}
|
2015-04-29 10:24:50 -04:00
|
|
|
leave:
|
2019-10-23 00:59:31 -04:00
|
|
|
if (lru)
|
2020-11-02 09:26:51 -05:00
|
|
|
lru64_commit(lru, ret, expr, expr->ref->revision, NULL);
|
2015-04-29 10:24:50 -04:00
|
|
|
|
|
|
|
|
return ret;
|
2014-01-21 05:25:41 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* This one is used by other real functions. It checks that the pattern is
|
|
|
|
|
* included inside the tested string, but enclosed between the specified
|
|
|
|
|
* delimiters or at the beginning or end of the string. The delimiters are
|
|
|
|
|
* provided as an unsigned int made by make_4delim() and match up to 4 different
|
|
|
|
|
* delimiters. Delimiters are stripped at the beginning and end of the pattern.
|
|
|
|
|
*/
|
2014-04-28 05:18:57 -04:00
|
|
|
static int match_word(struct sample *smp, struct pattern *pattern, int mflags, unsigned int delimiters)
|
2014-01-21 05:25:41 -05:00
|
|
|
{
|
|
|
|
|
int may_match, icase;
|
|
|
|
|
char *c, *end;
|
|
|
|
|
char *ps;
|
|
|
|
|
int pl;
|
|
|
|
|
|
|
|
|
|
pl = pattern->len;
|
|
|
|
|
ps = pattern->ptr.str;
|
|
|
|
|
|
|
|
|
|
while (pl > 0 && is_delimiter(*ps, delimiters)) {
|
|
|
|
|
pl--;
|
|
|
|
|
ps++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
while (pl > 0 && is_delimiter(ps[pl - 1], delimiters))
|
|
|
|
|
pl--;
|
|
|
|
|
|
2018-07-13 04:54:26 -04:00
|
|
|
if (pl > smp->data.u.str.data)
|
2014-01-21 05:25:41 -05:00
|
|
|
return PAT_NOMATCH;
|
|
|
|
|
|
|
|
|
|
may_match = 1;
|
2014-04-28 05:18:57 -04:00
|
|
|
icase = mflags & PAT_MF_IGNORE_CASE;
|
2018-07-13 04:54:26 -04:00
|
|
|
end = smp->data.u.str.area + smp->data.u.str.data - pl;
|
|
|
|
|
for (c = smp->data.u.str.area; c <= end; c++) {
|
2014-01-21 05:25:41 -05:00
|
|
|
if (is_delimiter(*c, delimiters)) {
|
|
|
|
|
may_match = 1;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!may_match)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
if (icase) {
|
2020-07-05 15:46:32 -04:00
|
|
|
if ((tolower((unsigned char)*c) == tolower((unsigned char)*ps)) &&
|
2014-01-21 05:25:41 -05:00
|
|
|
(strncasecmp(ps, c, pl) == 0) &&
|
|
|
|
|
(c == end || is_delimiter(c[pl], delimiters)))
|
|
|
|
|
return PAT_MATCH;
|
|
|
|
|
} else {
|
|
|
|
|
if ((*c == *ps) &&
|
|
|
|
|
(strncmp(ps, c, pl) == 0) &&
|
|
|
|
|
(c == end || is_delimiter(c[pl], delimiters)))
|
|
|
|
|
return PAT_MATCH;
|
|
|
|
|
}
|
|
|
|
|
may_match = 0;
|
|
|
|
|
}
|
|
|
|
|
return PAT_NOMATCH;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Checks that the pattern is included inside the tested string, but enclosed
|
|
|
|
|
* between the delimiters '?' or '/' or at the beginning or end of the string.
|
|
|
|
|
* Delimiters at the beginning or end of the pattern are ignored.
|
|
|
|
|
*/
|
2013-12-16 08:22:13 -05:00
|
|
|
struct pattern *pat_match_dir(struct sample *smp, struct pattern_expr *expr, int fill)
|
2014-01-21 05:25:41 -05:00
|
|
|
{
|
2013-12-16 08:22:13 -05:00
|
|
|
struct pattern_list *lst;
|
|
|
|
|
struct pattern *pattern;
|
|
|
|
|
|
|
|
|
|
list_for_each_entry(lst, &expr->patterns, list) {
|
|
|
|
|
pattern = &lst->pat;
|
2020-10-29 04:41:34 -04:00
|
|
|
|
|
|
|
|
if (pattern->ref->gen_id != expr->ref->curr_gen)
|
|
|
|
|
continue;
|
|
|
|
|
|
2014-04-28 05:18:57 -04:00
|
|
|
if (match_word(smp, pattern, expr->mflags, make_4delim('/', '?', '?', '?')))
|
2013-12-16 08:22:13 -05:00
|
|
|
return pattern;
|
|
|
|
|
}
|
|
|
|
|
return NULL;
|
2014-01-21 05:25:41 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Checks that the pattern is included inside the tested string, but enclosed
|
|
|
|
|
* between the delmiters '/', '?', '.' or ":" or at the beginning or end of
|
|
|
|
|
* the string. Delimiters at the beginning or end of the pattern are ignored.
|
|
|
|
|
*/
|
2013-12-16 08:22:13 -05:00
|
|
|
struct pattern *pat_match_dom(struct sample *smp, struct pattern_expr *expr, int fill)
|
2014-01-21 05:25:41 -05:00
|
|
|
{
|
2013-12-16 08:22:13 -05:00
|
|
|
struct pattern_list *lst;
|
|
|
|
|
struct pattern *pattern;
|
|
|
|
|
|
|
|
|
|
list_for_each_entry(lst, &expr->patterns, list) {
|
|
|
|
|
pattern = &lst->pat;
|
2020-10-29 04:41:34 -04:00
|
|
|
|
|
|
|
|
if (pattern->ref->gen_id != expr->ref->curr_gen)
|
|
|
|
|
continue;
|
|
|
|
|
|
2014-04-28 05:18:57 -04:00
|
|
|
if (match_word(smp, pattern, expr->mflags, make_4delim('/', '?', '.', ':')))
|
2013-12-16 08:22:13 -05:00
|
|
|
return pattern;
|
|
|
|
|
}
|
|
|
|
|
return NULL;
|
2014-01-21 05:25:41 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Checks that the integer in <test> is included between min and max */
|
2013-12-16 08:22:13 -05:00
|
|
|
struct pattern *pat_match_int(struct sample *smp, struct pattern_expr *expr, int fill)
|
2014-01-21 05:25:41 -05:00
|
|
|
{
|
2013-12-16 08:22:13 -05:00
|
|
|
struct pattern_list *lst;
|
|
|
|
|
struct pattern *pattern;
|
|
|
|
|
|
|
|
|
|
list_for_each_entry(lst, &expr->patterns, list) {
|
|
|
|
|
pattern = &lst->pat;
|
2020-10-29 04:41:34 -04:00
|
|
|
|
|
|
|
|
if (pattern->ref->gen_id != expr->ref->curr_gen)
|
|
|
|
|
continue;
|
|
|
|
|
|
2015-08-19 03:07:19 -04:00
|
|
|
if ((!pattern->val.range.min_set || pattern->val.range.min <= smp->data.u.sint) &&
|
|
|
|
|
(!pattern->val.range.max_set || smp->data.u.sint <= pattern->val.range.max))
|
2013-12-16 08:22:13 -05:00
|
|
|
return pattern;
|
|
|
|
|
}
|
|
|
|
|
return NULL;
|
2014-01-21 05:25:41 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Checks that the length of the pattern in <test> is included between min and max */
|
2013-12-16 08:22:13 -05:00
|
|
|
struct pattern *pat_match_len(struct sample *smp, struct pattern_expr *expr, int fill)
|
2014-01-21 05:25:41 -05:00
|
|
|
{
|
2013-12-16 08:22:13 -05:00
|
|
|
struct pattern_list *lst;
|
|
|
|
|
struct pattern *pattern;
|
|
|
|
|
|
|
|
|
|
list_for_each_entry(lst, &expr->patterns, list) {
|
|
|
|
|
pattern = &lst->pat;
|
2020-10-29 04:41:34 -04:00
|
|
|
|
|
|
|
|
if (pattern->ref->gen_id != expr->ref->curr_gen)
|
|
|
|
|
continue;
|
|
|
|
|
|
2018-07-13 04:54:26 -04:00
|
|
|
if ((!pattern->val.range.min_set || pattern->val.range.min <= smp->data.u.str.data) &&
|
|
|
|
|
(!pattern->val.range.max_set || smp->data.u.str.data <= pattern->val.range.max))
|
2013-12-16 08:22:13 -05:00
|
|
|
return pattern;
|
|
|
|
|
}
|
|
|
|
|
return NULL;
|
2014-01-21 05:25:41 -05:00
|
|
|
}
|
|
|
|
|
|
2023-09-06 05:32:54 -04:00
|
|
|
/* Performs ipv4 key lookup in <expr> ipv4 tree
|
|
|
|
|
* Returns NULL on failure
|
|
|
|
|
*/
|
|
|
|
|
static struct pattern *_pat_match_tree_ipv4(struct in_addr *key, struct pattern_expr *expr, int fill)
|
2014-01-21 05:25:41 -05:00
|
|
|
{
|
2013-12-16 08:22:13 -05:00
|
|
|
struct ebmb_node *node;
|
|
|
|
|
struct pattern_tree *elt;
|
2023-09-06 05:32:54 -04:00
|
|
|
|
|
|
|
|
/* Lookup an IPv4 address in the expression's pattern tree using
|
|
|
|
|
* the longest match method.
|
|
|
|
|
*/
|
|
|
|
|
node = ebmb_lookup_longest(&expr->pattern_tree, key);
|
|
|
|
|
while (node) {
|
|
|
|
|
elt = ebmb_entry(node, struct pattern_tree, node);
|
|
|
|
|
if (elt->ref->gen_id != expr->ref->curr_gen) {
|
|
|
|
|
node = ebmb_lookup_shorter(node);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
if (fill) {
|
|
|
|
|
static_pattern.data = elt->data;
|
|
|
|
|
static_pattern.ref = elt->ref;
|
|
|
|
|
static_pattern.sflags = PAT_SF_TREE;
|
|
|
|
|
static_pattern.type = SMP_T_IPV4;
|
|
|
|
|
static_pattern.val.ipv4.addr.s_addr = read_u32(elt->node.key);
|
|
|
|
|
if (!cidr2dotted(elt->node.node.pfx, &static_pattern.val.ipv4.mask))
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
return &static_pattern;
|
|
|
|
|
}
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Performs ipv6 key lookup in <expr> ipv6 tree
|
|
|
|
|
* Returns NULL on failure
|
|
|
|
|
*/
|
|
|
|
|
static struct pattern *_pat_match_tree_ipv6(struct in6_addr *key, struct pattern_expr *expr, int fill)
|
|
|
|
|
{
|
|
|
|
|
struct ebmb_node *node;
|
|
|
|
|
struct pattern_tree *elt;
|
|
|
|
|
|
|
|
|
|
/* Lookup an IPv6 address in the expression's pattern tree using
|
|
|
|
|
* the longest match method.
|
|
|
|
|
*/
|
|
|
|
|
node = ebmb_lookup_longest(&expr->pattern_tree_2, key);
|
|
|
|
|
while (node) {
|
|
|
|
|
elt = ebmb_entry(node, struct pattern_tree, node);
|
|
|
|
|
if (elt->ref->gen_id != expr->ref->curr_gen) {
|
|
|
|
|
node = ebmb_lookup_shorter(node);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
if (fill) {
|
|
|
|
|
static_pattern.data = elt->data;
|
|
|
|
|
static_pattern.ref = elt->ref;
|
|
|
|
|
static_pattern.sflags = PAT_SF_TREE;
|
|
|
|
|
static_pattern.type = SMP_T_IPV6;
|
|
|
|
|
memcpy(&static_pattern.val.ipv6.addr, elt->node.key, 16);
|
|
|
|
|
static_pattern.val.ipv6.mask = elt->node.node.pfx;
|
|
|
|
|
}
|
|
|
|
|
return &static_pattern;
|
|
|
|
|
}
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct pattern *pat_match_ip(struct sample *smp, struct pattern_expr *expr, int fill)
|
|
|
|
|
{
|
|
|
|
|
struct in_addr v4;
|
|
|
|
|
struct in6_addr v6;
|
2013-12-16 08:22:13 -05:00
|
|
|
struct pattern_list *lst;
|
|
|
|
|
struct pattern *pattern;
|
|
|
|
|
|
2013-12-19 17:54:54 -05:00
|
|
|
/* The input sample is IPv4. Try to match in the trees. */
|
2015-08-19 03:00:18 -04:00
|
|
|
if (smp->data.type == SMP_T_IPV4) {
|
2023-09-06 05:32:54 -04:00
|
|
|
pattern = _pat_match_tree_ipv4(&smp->data.u.ipv4, expr, fill);
|
|
|
|
|
if (pattern)
|
|
|
|
|
return pattern;
|
2021-01-07 23:35:52 -05:00
|
|
|
/* The IPv4 sample don't match the IPv4 tree. Convert the IPv4
|
2023-09-05 08:58:53 -04:00
|
|
|
* sample address to IPv6 and try to lookup in the IPv6 tree.
|
2013-12-19 17:54:54 -05:00
|
|
|
*/
|
2023-09-06 05:32:54 -04:00
|
|
|
v4tov6(&v6, &smp->data.u.ipv4);
|
|
|
|
|
pattern = _pat_match_tree_ipv6(&v6, expr, fill);
|
|
|
|
|
if (pattern)
|
|
|
|
|
return pattern;
|
|
|
|
|
/* eligible for list lookup using IPv4 address */
|
|
|
|
|
v4 = smp->data.u.ipv4;
|
|
|
|
|
goto list_lookup;
|
2013-12-19 17:54:54 -05:00
|
|
|
}
|
2013-12-16 08:22:13 -05:00
|
|
|
|
2013-12-19 17:54:54 -05:00
|
|
|
/* The input sample is IPv6. Try to match in the trees. */
|
2015-08-19 03:00:18 -04:00
|
|
|
if (smp->data.type == SMP_T_IPV6) {
|
2023-09-06 05:32:54 -04:00
|
|
|
pattern = _pat_match_tree_ipv6(&smp->data.u.ipv6, expr, fill);
|
|
|
|
|
if (pattern)
|
|
|
|
|
return pattern;
|
|
|
|
|
/* No match in the IPv6 tree. Try to convert 6 to 4 to lookup in
|
|
|
|
|
* the IPv4 tree
|
2013-12-19 17:54:54 -05:00
|
|
|
*/
|
2023-09-05 08:58:53 -04:00
|
|
|
if (v6tov4(&v4, &smp->data.u.ipv6)) {
|
2023-09-06 05:32:54 -04:00
|
|
|
pattern = _pat_match_tree_ipv4(&v4, expr, fill);
|
|
|
|
|
if (pattern)
|
|
|
|
|
return pattern;
|
|
|
|
|
/* eligible for list lookup using IPv4 address */
|
|
|
|
|
goto list_lookup;
|
2013-12-19 17:54:54 -05:00
|
|
|
}
|
|
|
|
|
}
|
2014-01-21 05:25:41 -05:00
|
|
|
|
2023-09-06 05:32:54 -04:00
|
|
|
not_found:
|
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
|
|
list_lookup:
|
|
|
|
|
/* No match in the trees, but we still have a valid IPv4 address: lookup
|
|
|
|
|
* in the IPv4 list (non-contiguous masks list). This is our last resort
|
|
|
|
|
*/
|
2013-12-19 17:54:54 -05:00
|
|
|
list_for_each_entry(lst, &expr->patterns, list) {
|
|
|
|
|
pattern = &lst->pat;
|
|
|
|
|
|
2020-10-29 04:41:34 -04:00
|
|
|
if (pattern->ref->gen_id != expr->ref->curr_gen)
|
|
|
|
|
continue;
|
|
|
|
|
|
2013-12-19 17:54:54 -05:00
|
|
|
/* Check if the input sample match the current pattern. */
|
2023-09-05 08:58:53 -04:00
|
|
|
if (((v4.s_addr ^ pattern->val.ipv4.addr.s_addr) & pattern->val.ipv4.mask.s_addr) == 0)
|
2013-12-16 08:22:13 -05:00
|
|
|
return pattern;
|
2014-01-21 05:25:41 -05:00
|
|
|
}
|
2023-09-06 05:32:54 -04:00
|
|
|
goto not_found;
|
2014-01-21 05:25:41 -05:00
|
|
|
}
|
|
|
|
|
|
2020-11-03 05:22:04 -05:00
|
|
|
/* finds the pattern holding <list> from list head <head> and deletes it.
|
|
|
|
|
* This is made for use for pattern removal within an expression.
|
|
|
|
|
*/
|
2020-11-03 08:50:29 -05:00
|
|
|
static void pat_unlink_from_head(void **head, void **list)
|
2020-11-03 05:22:04 -05:00
|
|
|
{
|
2020-11-03 08:50:29 -05:00
|
|
|
while (*head) {
|
|
|
|
|
if (*head == list) {
|
|
|
|
|
*head = *list;
|
2020-11-03 05:22:04 -05:00
|
|
|
return;
|
|
|
|
|
}
|
2020-11-03 08:50:29 -05:00
|
|
|
head = *head;
|
2020-11-03 05:22:04 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2013-11-28 05:05:19 -05:00
|
|
|
void free_pattern_tree(struct eb_root *root)
|
|
|
|
|
{
|
|
|
|
|
struct eb_node *node, *next;
|
2013-12-13 10:09:50 -05:00
|
|
|
struct pattern_tree *elt;
|
2013-12-09 05:29:46 -05:00
|
|
|
|
2013-11-28 05:05:19 -05:00
|
|
|
node = eb_first(root);
|
|
|
|
|
while (node) {
|
|
|
|
|
next = eb_next(node);
|
|
|
|
|
eb_delete(node);
|
2013-12-13 10:09:50 -05:00
|
|
|
elt = container_of(node, struct pattern_tree, node);
|
2020-11-03 05:22:04 -05:00
|
|
|
pat_unlink_from_head(&elt->ref->tree_head, &elt->from_ref);
|
2015-08-19 02:35:43 -04:00
|
|
|
free(elt->data);
|
2013-12-09 05:29:46 -05:00
|
|
|
free(elt);
|
2013-11-28 05:05:19 -05:00
|
|
|
node = next;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-11-02 13:26:02 -05:00
|
|
|
void pat_prune_gen(struct pattern_expr *expr)
|
2013-11-28 05:41:23 -05:00
|
|
|
{
|
2014-01-14 10:24:51 -05:00
|
|
|
struct pattern_list *pat, *tmp;
|
|
|
|
|
|
|
|
|
|
list_for_each_entry_safe(pat, tmp, &expr->patterns, list) {
|
2021-04-21 01:32:39 -04:00
|
|
|
LIST_DELETE(&pat->list);
|
2020-11-03 05:22:04 -05:00
|
|
|
pat_unlink_from_head(&pat->pat.ref->list_head, &pat->from_ref);
|
2020-11-02 13:26:02 -05:00
|
|
|
if (pat->pat.sflags & PAT_SF_REGFREE)
|
|
|
|
|
regex_free(pat->pat.ptr.ptr);
|
|
|
|
|
else
|
|
|
|
|
free(pat->pat.ptr.ptr);
|
2015-08-19 02:35:43 -04:00
|
|
|
free(pat->pat.data);
|
2014-01-14 10:24:51 -05:00
|
|
|
free(pat);
|
|
|
|
|
}
|
|
|
|
|
|
2013-11-28 05:41:23 -05:00
|
|
|
free_pattern_tree(&expr->pattern_tree);
|
2013-12-19 17:54:54 -05:00
|
|
|
free_pattern_tree(&expr->pattern_tree_2);
|
2013-11-28 05:41:23 -05:00
|
|
|
LIST_INIT(&expr->patterns);
|
2020-11-02 09:26:51 -05:00
|
|
|
expr->ref->revision = rdtsc();
|
2021-05-21 10:59:15 -04:00
|
|
|
expr->ref->entry_cnt = 0;
|
2013-11-28 05:41:23 -05:00
|
|
|
}
|
|
|
|
|
|
2013-12-13 09:12:32 -05:00
|
|
|
/*
|
|
|
|
|
*
|
|
|
|
|
* The following functions are used for the pattern indexation
|
|
|
|
|
*
|
2013-11-28 05:05:19 -05:00
|
|
|
*/
|
2013-12-13 09:12:32 -05:00
|
|
|
|
|
|
|
|
int pat_idx_list_val(struct pattern_expr *expr, struct pattern *pat, char **err)
|
2013-11-28 05:05:19 -05:00
|
|
|
{
|
2013-12-13 09:12:32 -05:00
|
|
|
struct pattern_list *patl;
|
2013-11-28 05:05:19 -05:00
|
|
|
|
2013-12-13 09:12:32 -05:00
|
|
|
/* allocate pattern */
|
|
|
|
|
patl = calloc(1, sizeof(*patl));
|
|
|
|
|
if (!patl) {
|
|
|
|
|
memprintf(err, "out of memory while indexing pattern");
|
2014-01-23 11:53:31 -05:00
|
|
|
return 0;
|
|
|
|
|
}
|
2013-11-28 05:05:19 -05:00
|
|
|
|
2013-12-13 09:12:32 -05:00
|
|
|
/* duplicate pattern */
|
|
|
|
|
memcpy(&patl->pat, pat, sizeof(*pat));
|
2013-12-06 13:06:43 -05:00
|
|
|
|
2013-12-13 09:12:32 -05:00
|
|
|
/* chain pattern in the expression */
|
2021-04-21 01:32:39 -04:00
|
|
|
LIST_APPEND(&expr->patterns, &patl->list);
|
2023-08-23 09:58:26 -04:00
|
|
|
patl->expr = expr;
|
2020-11-02 06:10:48 -05:00
|
|
|
/* and from the reference */
|
2020-11-03 08:50:29 -05:00
|
|
|
patl->from_ref = pat->ref->list_head;
|
|
|
|
|
pat->ref->list_head = &patl->from_ref;
|
2020-11-02 09:26:51 -05:00
|
|
|
expr->ref->revision = rdtsc();
|
2021-05-21 10:59:15 -04:00
|
|
|
expr->ref->entry_cnt++;
|
2013-12-13 09:12:32 -05:00
|
|
|
|
|
|
|
|
/* that's ok */
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int pat_idx_list_ptr(struct pattern_expr *expr, struct pattern *pat, char **err)
|
|
|
|
|
{
|
|
|
|
|
struct pattern_list *patl;
|
|
|
|
|
|
|
|
|
|
/* allocate pattern */
|
|
|
|
|
patl = calloc(1, sizeof(*patl));
|
2015-02-06 11:50:55 -05:00
|
|
|
if (!patl) {
|
|
|
|
|
memprintf(err, "out of memory while indexing pattern");
|
2014-01-23 11:53:31 -05:00
|
|
|
return 0;
|
2015-02-06 11:50:55 -05:00
|
|
|
}
|
2013-12-06 13:06:43 -05:00
|
|
|
|
2013-12-13 09:12:32 -05:00
|
|
|
/* duplicate pattern */
|
|
|
|
|
memcpy(&patl->pat, pat, sizeof(*pat));
|
|
|
|
|
patl->pat.ptr.ptr = malloc(patl->pat.len);
|
|
|
|
|
if (!patl->pat.ptr.ptr) {
|
|
|
|
|
free(patl);
|
|
|
|
|
memprintf(err, "out of memory while indexing pattern");
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
memcpy(patl->pat.ptr.ptr, pat->ptr.ptr, pat->len);
|
2013-11-28 05:05:19 -05:00
|
|
|
|
2013-12-13 09:12:32 -05:00
|
|
|
/* chain pattern in the expression */
|
2021-04-21 01:32:39 -04:00
|
|
|
LIST_APPEND(&expr->patterns, &patl->list);
|
2023-08-23 09:58:26 -04:00
|
|
|
patl->expr = expr;
|
2020-11-02 06:10:48 -05:00
|
|
|
/* and from the reference */
|
2020-11-03 08:50:29 -05:00
|
|
|
patl->from_ref = pat->ref->list_head;
|
|
|
|
|
pat->ref->list_head = &patl->from_ref;
|
2020-11-02 09:26:51 -05:00
|
|
|
expr->ref->revision = rdtsc();
|
2021-05-21 10:59:15 -04:00
|
|
|
expr->ref->entry_cnt++;
|
2013-12-06 13:06:43 -05:00
|
|
|
|
2013-12-13 09:12:32 -05:00
|
|
|
/* that's ok */
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
2013-12-06 13:06:43 -05:00
|
|
|
|
2013-12-13 09:12:32 -05:00
|
|
|
int pat_idx_list_str(struct pattern_expr *expr, struct pattern *pat, char **err)
|
|
|
|
|
{
|
|
|
|
|
struct pattern_list *patl;
|
2013-12-06 13:06:43 -05:00
|
|
|
|
2013-12-13 09:12:32 -05:00
|
|
|
/* allocate pattern */
|
|
|
|
|
patl = calloc(1, sizeof(*patl));
|
|
|
|
|
if (!patl) {
|
|
|
|
|
memprintf(err, "out of memory while indexing pattern");
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* duplicate pattern */
|
|
|
|
|
memcpy(&patl->pat, pat, sizeof(*pat));
|
|
|
|
|
patl->pat.ptr.str = malloc(patl->pat.len + 1);
|
|
|
|
|
if (!patl->pat.ptr.str) {
|
|
|
|
|
free(patl);
|
|
|
|
|
memprintf(err, "out of memory while indexing pattern");
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
memcpy(patl->pat.ptr.ptr, pat->ptr.ptr, pat->len);
|
|
|
|
|
patl->pat.ptr.str[patl->pat.len] = '\0';
|
2013-12-06 13:06:43 -05:00
|
|
|
|
2013-12-13 09:12:32 -05:00
|
|
|
/* chain pattern in the expression */
|
2021-04-21 01:32:39 -04:00
|
|
|
LIST_APPEND(&expr->patterns, &patl->list);
|
2023-08-23 09:58:26 -04:00
|
|
|
patl->expr = expr;
|
2020-11-02 06:10:48 -05:00
|
|
|
/* and from the reference */
|
2020-11-03 08:50:29 -05:00
|
|
|
patl->from_ref = pat->ref->list_head;
|
|
|
|
|
pat->ref->list_head = &patl->from_ref;
|
2020-11-02 09:26:51 -05:00
|
|
|
expr->ref->revision = rdtsc();
|
2021-05-21 10:59:15 -04:00
|
|
|
expr->ref->entry_cnt++;
|
2013-12-06 13:06:43 -05:00
|
|
|
|
2013-12-13 09:12:32 -05:00
|
|
|
/* that's ok */
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
2016-02-10 16:55:20 -05:00
|
|
|
int pat_idx_list_reg_cap(struct pattern_expr *expr, struct pattern *pat, int cap, char **err)
|
2013-12-13 09:12:32 -05:00
|
|
|
{
|
|
|
|
|
struct pattern_list *patl;
|
2013-12-06 13:06:43 -05:00
|
|
|
|
2013-12-13 09:12:32 -05:00
|
|
|
/* allocate pattern */
|
|
|
|
|
patl = calloc(1, sizeof(*patl));
|
|
|
|
|
if (!patl) {
|
|
|
|
|
memprintf(err, "out of memory while indexing pattern");
|
|
|
|
|
return 0;
|
2014-01-23 11:53:31 -05:00
|
|
|
}
|
2013-12-06 13:06:43 -05:00
|
|
|
|
2013-12-13 09:12:32 -05:00
|
|
|
/* duplicate pattern */
|
|
|
|
|
memcpy(&patl->pat, pat, sizeof(*pat));
|
|
|
|
|
|
|
|
|
|
/* compile regex */
|
2020-11-02 13:16:23 -05:00
|
|
|
patl->pat.sflags |= PAT_SF_REGFREE;
|
2019-04-30 09:54:36 -04:00
|
|
|
if (!(patl->pat.ptr.reg = regex_comp(pat->ptr.str, !(expr->mflags & PAT_MF_IGNORE_CASE),
|
|
|
|
|
cap, err))) {
|
2014-04-28 18:57:16 -04:00
|
|
|
free(patl);
|
2013-12-13 09:12:32 -05:00
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* chain pattern in the expression */
|
2021-04-21 01:32:39 -04:00
|
|
|
LIST_APPEND(&expr->patterns, &patl->list);
|
2023-08-23 09:58:26 -04:00
|
|
|
patl->expr = expr;
|
2020-11-02 06:10:48 -05:00
|
|
|
/* and from the reference */
|
2020-11-03 08:50:29 -05:00
|
|
|
patl->from_ref = pat->ref->list_head;
|
|
|
|
|
pat->ref->list_head = &patl->from_ref;
|
2020-11-02 09:26:51 -05:00
|
|
|
expr->ref->revision = rdtsc();
|
2021-05-21 10:59:15 -04:00
|
|
|
expr->ref->entry_cnt++;
|
2013-12-13 09:12:32 -05:00
|
|
|
|
|
|
|
|
/* that's ok */
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
2016-02-10 16:55:20 -05:00
|
|
|
int pat_idx_list_reg(struct pattern_expr *expr, struct pattern *pat, char **err)
|
|
|
|
|
{
|
|
|
|
|
return pat_idx_list_reg_cap(expr, pat, 0, err);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int pat_idx_list_regm(struct pattern_expr *expr, struct pattern *pat, char **err)
|
|
|
|
|
{
|
|
|
|
|
return pat_idx_list_reg_cap(expr, pat, 1, err);
|
|
|
|
|
}
|
|
|
|
|
|
2013-12-13 09:12:32 -05:00
|
|
|
int pat_idx_tree_ip(struct pattern_expr *expr, struct pattern *pat, char **err)
|
|
|
|
|
{
|
|
|
|
|
unsigned int mask;
|
2013-12-13 10:09:50 -05:00
|
|
|
struct pattern_tree *node;
|
2013-12-13 09:12:32 -05:00
|
|
|
|
|
|
|
|
/* Only IPv4 can be indexed */
|
|
|
|
|
if (pat->type == SMP_T_IPV4) {
|
2014-01-23 11:53:31 -05:00
|
|
|
/* in IPv4 case, check if the mask is contiguous so that we can
|
|
|
|
|
* insert the network into the tree. A continuous mask has only
|
|
|
|
|
* ones on the left. This means that this mask + its lower bit
|
|
|
|
|
* added once again is null.
|
|
|
|
|
*/
|
2013-12-13 09:12:32 -05:00
|
|
|
mask = ntohl(pat->val.ipv4.mask.s_addr);
|
|
|
|
|
if (mask + (mask & -mask) == 0) {
|
|
|
|
|
mask = mask ? 33 - flsnz(mask & -mask) : 0; /* equals cidr value */
|
|
|
|
|
|
|
|
|
|
/* node memory allocation */
|
|
|
|
|
node = calloc(1, sizeof(*node) + 4);
|
|
|
|
|
if (!node) {
|
|
|
|
|
memprintf(err, "out of memory while loading pattern");
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
2013-12-06 13:06:43 -05:00
|
|
|
|
2013-12-13 09:12:32 -05:00
|
|
|
/* copy the pointer to sample associated to this node */
|
2015-08-19 02:35:43 -04:00
|
|
|
node->data = pat->data;
|
2014-01-28 09:54:36 -05:00
|
|
|
node->ref = pat->ref;
|
2013-12-06 13:06:43 -05:00
|
|
|
|
2013-12-13 09:12:32 -05:00
|
|
|
/* FIXME: insert <addr>/<mask> into the tree here */
|
|
|
|
|
memcpy(node->node.key, &pat->val.ipv4.addr, 4); /* network byte order */
|
|
|
|
|
node->node.node.pfx = mask;
|
2014-01-29 18:27:15 -05:00
|
|
|
|
|
|
|
|
/* Insert the entry. */
|
|
|
|
|
ebmb_insert_prefix(&expr->pattern_tree, &node->node, 4);
|
2023-08-23 09:58:26 -04:00
|
|
|
|
|
|
|
|
node->expr = expr;
|
2020-11-03 08:50:29 -05:00
|
|
|
node->from_ref = pat->ref->tree_head;
|
|
|
|
|
pat->ref->tree_head = &node->from_ref;
|
2020-11-02 09:26:51 -05:00
|
|
|
expr->ref->revision = rdtsc();
|
2021-05-21 10:59:15 -04:00
|
|
|
expr->ref->entry_cnt++;
|
2013-11-28 05:05:19 -05:00
|
|
|
|
2013-12-13 09:12:32 -05:00
|
|
|
/* that's ok */
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
2013-12-19 17:54:54 -05:00
|
|
|
else {
|
|
|
|
|
/* If the mask is not contiguous, just add the pattern to the list */
|
|
|
|
|
return pat_idx_list_val(expr, pat, err);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else if (pat->type == SMP_T_IPV6) {
|
|
|
|
|
/* IPv6 also can be indexed */
|
|
|
|
|
node = calloc(1, sizeof(*node) + 16);
|
|
|
|
|
if (!node) {
|
|
|
|
|
memprintf(err, "out of memory while loading pattern");
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* copy the pointer to sample associated to this node */
|
2015-08-19 02:35:43 -04:00
|
|
|
node->data = pat->data;
|
2014-01-28 09:54:36 -05:00
|
|
|
node->ref = pat->ref;
|
2013-12-19 17:54:54 -05:00
|
|
|
|
|
|
|
|
/* FIXME: insert <addr>/<mask> into the tree here */
|
|
|
|
|
memcpy(node->node.key, &pat->val.ipv6.addr, 16); /* network byte order */
|
|
|
|
|
node->node.node.pfx = pat->val.ipv6.mask;
|
2014-01-29 18:27:15 -05:00
|
|
|
|
|
|
|
|
/* Insert the entry. */
|
|
|
|
|
ebmb_insert_prefix(&expr->pattern_tree_2, &node->node, 16);
|
2023-08-23 09:58:26 -04:00
|
|
|
|
|
|
|
|
node->expr = expr;
|
2020-11-03 08:50:29 -05:00
|
|
|
node->from_ref = pat->ref->tree_head;
|
|
|
|
|
pat->ref->tree_head = &node->from_ref;
|
2020-11-02 09:26:51 -05:00
|
|
|
expr->ref->revision = rdtsc();
|
2021-05-21 10:59:15 -04:00
|
|
|
expr->ref->entry_cnt++;
|
2013-12-19 17:54:54 -05:00
|
|
|
|
|
|
|
|
/* that's ok */
|
|
|
|
|
return 1;
|
2013-12-13 09:12:32 -05:00
|
|
|
}
|
2013-12-06 13:06:43 -05:00
|
|
|
|
2013-12-19 17:54:54 -05:00
|
|
|
return 0;
|
2013-12-13 09:12:32 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int pat_idx_tree_str(struct pattern_expr *expr, struct pattern *pat, char **err)
|
|
|
|
|
{
|
|
|
|
|
int len;
|
2013-12-13 10:09:50 -05:00
|
|
|
struct pattern_tree *node;
|
2013-12-13 09:12:32 -05:00
|
|
|
|
|
|
|
|
/* Only string can be indexed */
|
2013-12-16 18:20:33 -05:00
|
|
|
if (pat->type != SMP_T_STR) {
|
2013-12-13 09:12:32 -05:00
|
|
|
memprintf(err, "internal error: string expected, but the type is '%s'",
|
|
|
|
|
smp_to_type[pat->type]);
|
|
|
|
|
return 0;
|
2014-01-23 11:53:31 -05:00
|
|
|
}
|
2013-12-06 13:06:43 -05:00
|
|
|
|
2013-12-13 09:12:32 -05:00
|
|
|
/* If the flag PAT_F_IGNORE_CASE is set, we cannot use trees */
|
2014-04-28 05:18:57 -04:00
|
|
|
if (expr->mflags & PAT_MF_IGNORE_CASE)
|
2013-12-13 09:12:32 -05:00
|
|
|
return pat_idx_list_str(expr, pat, err);
|
|
|
|
|
|
|
|
|
|
/* Process the key len */
|
|
|
|
|
len = strlen(pat->ptr.str) + 1;
|
2013-12-06 13:06:43 -05:00
|
|
|
|
2013-12-13 09:12:32 -05:00
|
|
|
/* node memory allocation */
|
|
|
|
|
node = calloc(1, sizeof(*node) + len);
|
|
|
|
|
if (!node) {
|
|
|
|
|
memprintf(err, "out of memory while loading pattern");
|
|
|
|
|
return 0;
|
2013-11-28 05:05:19 -05:00
|
|
|
}
|
|
|
|
|
|
2013-12-13 09:12:32 -05:00
|
|
|
/* copy the pointer to sample associated to this node */
|
2015-08-19 02:35:43 -04:00
|
|
|
node->data = pat->data;
|
2014-01-28 09:54:36 -05:00
|
|
|
node->ref = pat->ref;
|
2013-12-13 09:12:32 -05:00
|
|
|
|
|
|
|
|
/* copy the string */
|
|
|
|
|
memcpy(node->node.key, pat->ptr.str, len);
|
|
|
|
|
|
|
|
|
|
/* index the new node */
|
2014-01-29 18:27:15 -05:00
|
|
|
ebst_insert(&expr->pattern_tree, &node->node);
|
2023-08-23 09:58:26 -04:00
|
|
|
|
|
|
|
|
node->expr = expr;
|
2020-11-03 08:50:29 -05:00
|
|
|
node->from_ref = pat->ref->tree_head;
|
|
|
|
|
pat->ref->tree_head = &node->from_ref;
|
2020-11-02 09:26:51 -05:00
|
|
|
expr->ref->revision = rdtsc();
|
2021-05-21 10:59:15 -04:00
|
|
|
expr->ref->entry_cnt++;
|
2013-12-13 09:12:32 -05:00
|
|
|
|
|
|
|
|
/* that's ok */
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
2014-05-10 02:53:48 -04:00
|
|
|
int pat_idx_tree_pfx(struct pattern_expr *expr, struct pattern *pat, char **err)
|
|
|
|
|
{
|
|
|
|
|
int len;
|
|
|
|
|
struct pattern_tree *node;
|
|
|
|
|
|
|
|
|
|
/* Only string can be indexed */
|
|
|
|
|
if (pat->type != SMP_T_STR) {
|
|
|
|
|
memprintf(err, "internal error: string expected, but the type is '%s'",
|
|
|
|
|
smp_to_type[pat->type]);
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* If the flag PAT_F_IGNORE_CASE is set, we cannot use trees */
|
|
|
|
|
if (expr->mflags & PAT_MF_IGNORE_CASE)
|
|
|
|
|
return pat_idx_list_str(expr, pat, err);
|
|
|
|
|
|
|
|
|
|
/* Process the key len */
|
|
|
|
|
len = strlen(pat->ptr.str);
|
|
|
|
|
|
|
|
|
|
/* node memory allocation */
|
|
|
|
|
node = calloc(1, sizeof(*node) + len + 1);
|
|
|
|
|
if (!node) {
|
|
|
|
|
memprintf(err, "out of memory while loading pattern");
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* copy the pointer to sample associated to this node */
|
2015-08-19 02:35:43 -04:00
|
|
|
node->data = pat->data;
|
2014-05-10 02:53:48 -04:00
|
|
|
node->ref = pat->ref;
|
|
|
|
|
|
|
|
|
|
/* copy the string and the trailing zero */
|
|
|
|
|
memcpy(node->node.key, pat->ptr.str, len + 1);
|
|
|
|
|
node->node.node.pfx = len * 8;
|
|
|
|
|
|
|
|
|
|
/* index the new node */
|
|
|
|
|
ebmb_insert_prefix(&expr->pattern_tree, &node->node, len);
|
2023-08-23 09:58:26 -04:00
|
|
|
|
|
|
|
|
node->expr = expr;
|
2020-11-03 08:50:29 -05:00
|
|
|
node->from_ref = pat->ref->tree_head;
|
|
|
|
|
pat->ref->tree_head = &node->from_ref;
|
2020-11-02 09:26:51 -05:00
|
|
|
expr->ref->revision = rdtsc();
|
2021-05-21 10:59:15 -04:00
|
|
|
expr->ref->entry_cnt++;
|
2014-05-10 02:53:48 -04:00
|
|
|
|
|
|
|
|
/* that's ok */
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
2020-11-02 13:53:16 -05:00
|
|
|
/* Deletes all patterns from reference <elt>. Note that all of their
|
2020-11-02 07:55:22 -05:00
|
|
|
* expressions must be locked, and the pattern lock must be held as well.
|
|
|
|
|
*/
|
2020-11-02 13:53:16 -05:00
|
|
|
void pat_delete_gen(struct pat_ref *ref, struct pat_ref_elt *elt)
|
2014-01-15 05:38:49 -05:00
|
|
|
{
|
2020-11-03 08:50:29 -05:00
|
|
|
struct pattern_tree *tree;
|
|
|
|
|
struct pattern_list *pat;
|
|
|
|
|
void **node;
|
2020-11-02 13:53:16 -05:00
|
|
|
|
|
|
|
|
/* delete all known tree nodes. They are all allocated inline */
|
2020-11-03 08:50:29 -05:00
|
|
|
for (node = elt->tree_head; node;) {
|
|
|
|
|
tree = container_of(node, struct pattern_tree, from_ref);
|
|
|
|
|
node = *node;
|
2020-11-02 13:53:16 -05:00
|
|
|
BUG_ON(tree->ref != elt);
|
|
|
|
|
|
|
|
|
|
ebmb_delete(&tree->node);
|
|
|
|
|
free(tree->data);
|
|
|
|
|
free(tree);
|
|
|
|
|
}
|
2014-01-15 05:38:49 -05:00
|
|
|
|
2020-11-02 13:53:16 -05:00
|
|
|
/* delete all list nodes and free their pattern entries (str/reg) */
|
2020-11-03 08:50:29 -05:00
|
|
|
for (node = elt->list_head; node;) {
|
|
|
|
|
pat = container_of(node, struct pattern_list, from_ref);
|
|
|
|
|
node = *node;
|
2020-11-02 07:55:22 -05:00
|
|
|
BUG_ON(pat->pat.ref != elt);
|
2014-01-15 05:38:49 -05:00
|
|
|
|
|
|
|
|
/* Delete and free entry. */
|
2021-04-21 01:32:39 -04:00
|
|
|
LIST_DELETE(&pat->list);
|
2020-11-02 13:26:02 -05:00
|
|
|
if (pat->pat.sflags & PAT_SF_REGFREE)
|
|
|
|
|
regex_free(pat->pat.ptr.reg);
|
|
|
|
|
else
|
|
|
|
|
free(pat->pat.ptr.ptr);
|
2015-08-19 02:35:43 -04:00
|
|
|
free(pat->pat.data);
|
2014-01-15 05:38:49 -05:00
|
|
|
free(pat);
|
|
|
|
|
}
|
|
|
|
|
|
2020-11-02 13:53:16 -05:00
|
|
|
/* update revision number to refresh the cache */
|
|
|
|
|
ref->revision = rdtsc();
|
2021-05-21 10:59:15 -04:00
|
|
|
ref->entry_cnt--;
|
2020-11-03 08:50:29 -05:00
|
|
|
elt->tree_head = NULL;
|
|
|
|
|
elt->list_head = NULL;
|
2014-01-15 05:38:49 -05:00
|
|
|
}
|
|
|
|
|
|
2014-02-11 05:31:40 -05:00
|
|
|
void pattern_init_expr(struct pattern_expr *expr)
|
|
|
|
|
{
|
|
|
|
|
LIST_INIT(&expr->patterns);
|
2014-01-29 18:27:15 -05:00
|
|
|
expr->pattern_tree = EB_ROOT;
|
|
|
|
|
expr->pattern_tree_2 = EB_ROOT;
|
2014-02-11 05:31:40 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void pattern_init_head(struct pattern_head *head)
|
|
|
|
|
{
|
|
|
|
|
LIST_INIT(&head->head);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* The following functions are relative to the management of the reference
|
|
|
|
|
* lists. These lists are used to store the original pattern and associated
|
|
|
|
|
* value as string form.
|
|
|
|
|
*
|
|
|
|
|
* This is used with modifiable ACL and MAPS
|
2014-03-11 09:29:22 -04:00
|
|
|
*
|
|
|
|
|
* The pattern reference are stored with two identifiers: the unique_id and
|
|
|
|
|
* the reference.
|
|
|
|
|
*
|
|
|
|
|
* The reference identify a file. Each file with the same name point to the
|
|
|
|
|
* same reference. We can register many times one file. If the file is modified,
|
|
|
|
|
* all his dependencies are also modified. The reference can be used with map or
|
|
|
|
|
* acl.
|
|
|
|
|
*
|
|
|
|
|
* The unique_id identify inline acl. The unique id is unique for each acl.
|
|
|
|
|
* You cannot force the same id in the configuration file, because this repoort
|
|
|
|
|
* an error.
|
|
|
|
|
*
|
|
|
|
|
* A particular case appears if the filename is a number. In this case, the
|
|
|
|
|
* unique_id is set with the number represented by the filename and the
|
|
|
|
|
* reference is also set. This method prevent double unique_id.
|
|
|
|
|
*
|
2014-02-11 05:31:40 -05:00
|
|
|
*/
|
|
|
|
|
|
2020-10-30 11:03:50 -04:00
|
|
|
/* This function looks up a reference by name. If the reference is found, a
|
|
|
|
|
* pointer to the struct pat_ref is returned, otherwise NULL is returned.
|
2014-02-11 05:31:40 -05:00
|
|
|
*/
|
|
|
|
|
struct pat_ref *pat_ref_lookup(const char *reference)
|
|
|
|
|
{
|
|
|
|
|
struct pat_ref *ref;
|
|
|
|
|
|
MEDIUM: pattern: Add support for virtual and optional files for patterns
Before this patch, it was not possible to use a list of patterns, map or a
list of acls, without an existing file. However, it could be handy to just
use an ID, with no file on the disk. It is pretty useful for everyone
managing dynamically these lists. It could also be handy to try to load a
list from a file if it exists without failing if not. This way, it could be
possible to make a cold start without any file (instead of empty file),
dynamically add and del patterns, dump the list to the file periodically to
reuse it on reload (via an external process).
In this patch, we uses some prefixes to be able to use virtual or optional
files.
The default case remains unchanged. regular files are used. A filename, with
no prefix, is used as reference, and it must exist on the disk. With the
prefix "file@", the same is performed. Internally this prefix is
skipped. Thus the same file, with ou without "file@" prefix, references the
same list of patterns.
To use a virtual map, "virt@" prefix must be used. No file is read, even if
the following name looks like a file. It is just an ID. The prefix is part
of ID and must always be used.
To use a optional file, ie a file that may or may not exist on a disk at
startup, "opt@" prefix must be used. If the file exists, its content is
loaded. But HAProxy doesn't complain if not. The prefix is not part of
ID. For a given file, optional files and regular files reference the same
list of patterns.
This patch should fix the issue #2202.
2023-12-01 06:04:35 -05:00
|
|
|
/* Skip file@ prefix, it is the default case. Can be mixed with ref omitting the prefix */
|
|
|
|
|
if (strlen(reference) > 5 && strncmp(reference, "file@", 5) == 0)
|
|
|
|
|
reference += 5;
|
|
|
|
|
|
2014-02-11 05:31:40 -05:00
|
|
|
list_for_each_entry(ref, &pattern_reference, list)
|
2014-03-11 09:29:22 -04:00
|
|
|
if (ref->reference && strcmp(reference, ref->reference) == 0)
|
|
|
|
|
return ref;
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2020-10-30 11:03:50 -04:00
|
|
|
/* This function looks up a reference's unique id. If the reference is found, a
|
|
|
|
|
* pointer to the struct pat_ref is returned, otherwise NULL is returned.
|
2014-03-11 09:29:22 -04:00
|
|
|
*/
|
|
|
|
|
struct pat_ref *pat_ref_lookupid(int unique_id)
|
|
|
|
|
{
|
|
|
|
|
struct pat_ref *ref;
|
|
|
|
|
|
|
|
|
|
list_for_each_entry(ref, &pattern_reference, list)
|
|
|
|
|
if (ref->unique_id == unique_id)
|
2014-02-11 05:31:40 -05:00
|
|
|
return ref;
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2020-11-02 11:30:17 -05:00
|
|
|
/* This function removes from the pattern reference <ref> all the patterns
|
|
|
|
|
* attached to the reference element <elt>, and the element itself. The
|
|
|
|
|
* reference must be locked.
|
|
|
|
|
*/
|
|
|
|
|
void pat_ref_delete_by_ptr(struct pat_ref *ref, struct pat_ref_elt *elt)
|
|
|
|
|
{
|
|
|
|
|
struct pattern_expr *expr;
|
|
|
|
|
struct bref *bref, *back;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* we have to unlink all watchers from this reference pattern. We must
|
|
|
|
|
* not relink them if this elt was the last one in the list.
|
|
|
|
|
*/
|
|
|
|
|
list_for_each_entry_safe(bref, back, &elt->back_refs, users) {
|
2021-04-21 01:32:39 -04:00
|
|
|
LIST_DELETE(&bref->users);
|
2020-11-02 11:30:17 -05:00
|
|
|
LIST_INIT(&bref->users);
|
|
|
|
|
if (elt->list.n != &ref->head)
|
2021-04-21 01:32:39 -04:00
|
|
|
LIST_APPEND(&LIST_ELEM(elt->list.n, typeof(elt), list)->back_refs, &bref->users);
|
2020-11-02 11:30:17 -05:00
|
|
|
bref->ref = elt->list.n;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* delete all entries from all expressions for this pattern */
|
|
|
|
|
list_for_each_entry(expr, &ref->pat, list)
|
|
|
|
|
HA_RWLOCK_WRLOCK(PATEXP_LOCK, &expr->lock);
|
|
|
|
|
|
|
|
|
|
pat_delete_gen(ref, elt);
|
|
|
|
|
|
|
|
|
|
list_for_each_entry(expr, &ref->pat, list)
|
|
|
|
|
HA_RWLOCK_WRUNLOCK(PATEXP_LOCK, &expr->lock);
|
|
|
|
|
|
2021-04-21 01:32:39 -04:00
|
|
|
LIST_DELETE(&elt->list);
|
2023-11-26 05:56:08 -05:00
|
|
|
ebmb_delete(&elt->node);
|
2020-11-02 11:30:17 -05:00
|
|
|
free(elt->sample);
|
|
|
|
|
free(elt);
|
|
|
|
|
}
|
|
|
|
|
|
BUG/MEDIUM: map/acl: pat_ref_{set,delete}_by_id regressions
Some regressions were introduced by 5fea59754b ("MEDIUM: map/acl:
Accelerate several functions using pat_ref_elt struct ->head list")
pat_ref_delete_by_id() fails to properly unlink and free the removed
reference because it bypasses the pat_ref_delete_by_ptr() made for
that purpose. This function is normally used everywhere the target
reference is set for removal, such as the pat_ref_delete() function
that matches pattern against a string. The call was probably skipped
by accident during the rewrite of the function.
With the above commit also comes another undesirable change:
both pat_ref_delete_by_id() and pat_ref_set_by_id() directly use the
<refelt> argument as a valid pointer (they do dereference it).
This is wrong, because <refelt> is unsafe and should be handled as an
ID, not a pointer (hence the function name). Indeed, the calling function
may directly pass user input from the CLI as <refelt> argument, so we must
first ensure that it points to a valid element before using it, else it is
probably invalid and we shouldn't touch it.
What this patch essentially does, is that it reverts pat_ref_set_by_id()
and pat_ref_delete_by_id() to pre 5fea59754b behavior. This seems like
it was the only optimization from the patch that doesn't apply.
Hopefully, after reviewing the changes with Fred, it seems that the 2
functions are only being involved in commands for manipulating maps or
acls on the cli, so the "missed" opportunity to improve their performance
shouldn't matter much. Nonetheless, if we wanted to speed up the reference
lookup by ID, we could consider adding an eb64 tree for that specific
purpose that contains all pattern references IDs (ie: pointers) so that
eb lookup functions may be used instead of linear list search.
The issue was raised by Marko Juraga as he failed to perform an an acl
removal by reference on the CLI on 2.9 which was known to work properly
on other versions.
It should be backported on 2.9.
Co-Authored-by: Frédéric Lécaille <flecaille@haproxy.com>
2023-12-08 05:46:15 -05:00
|
|
|
/* This function removes the pattern matching the pointer <refelt> from
|
2020-10-30 11:03:50 -04:00
|
|
|
* the reference and from each expr member of this reference. This function
|
|
|
|
|
* returns 1 if the entry was found and deleted, otherwise zero.
|
BUG/MEDIUM: map/acl: pat_ref_{set,delete}_by_id regressions
Some regressions were introduced by 5fea59754b ("MEDIUM: map/acl:
Accelerate several functions using pat_ref_elt struct ->head list")
pat_ref_delete_by_id() fails to properly unlink and free the removed
reference because it bypasses the pat_ref_delete_by_ptr() made for
that purpose. This function is normally used everywhere the target
reference is set for removal, such as the pat_ref_delete() function
that matches pattern against a string. The call was probably skipped
by accident during the rewrite of the function.
With the above commit also comes another undesirable change:
both pat_ref_delete_by_id() and pat_ref_set_by_id() directly use the
<refelt> argument as a valid pointer (they do dereference it).
This is wrong, because <refelt> is unsafe and should be handled as an
ID, not a pointer (hence the function name). Indeed, the calling function
may directly pass user input from the CLI as <refelt> argument, so we must
first ensure that it points to a valid element before using it, else it is
probably invalid and we shouldn't touch it.
What this patch essentially does, is that it reverts pat_ref_set_by_id()
and pat_ref_delete_by_id() to pre 5fea59754b behavior. This seems like
it was the only optimization from the patch that doesn't apply.
Hopefully, after reviewing the changes with Fred, it seems that the 2
functions are only being involved in commands for manipulating maps or
acls on the cli, so the "missed" opportunity to improve their performance
shouldn't matter much. Nonetheless, if we wanted to speed up the reference
lookup by ID, we could consider adding an eb64 tree for that specific
purpose that contains all pattern references IDs (ie: pointers) so that
eb lookup functions may be used instead of linear list search.
The issue was raised by Marko Juraga as he failed to perform an an acl
removal by reference on the CLI on 2.9 which was known to work properly
on other versions.
It should be backported on 2.9.
Co-Authored-by: Frédéric Lécaille <flecaille@haproxy.com>
2023-12-08 05:46:15 -05:00
|
|
|
*
|
|
|
|
|
* <refelt> is user input: it is provided as an ID and should never be
|
|
|
|
|
* dereferenced without making sure that it is valid.
|
2014-01-28 10:43:36 -05:00
|
|
|
*/
|
|
|
|
|
int pat_ref_delete_by_id(struct pat_ref *ref, struct pat_ref_elt *refelt)
|
|
|
|
|
{
|
BUG/MEDIUM: map/acl: pat_ref_{set,delete}_by_id regressions
Some regressions were introduced by 5fea59754b ("MEDIUM: map/acl:
Accelerate several functions using pat_ref_elt struct ->head list")
pat_ref_delete_by_id() fails to properly unlink and free the removed
reference because it bypasses the pat_ref_delete_by_ptr() made for
that purpose. This function is normally used everywhere the target
reference is set for removal, such as the pat_ref_delete() function
that matches pattern against a string. The call was probably skipped
by accident during the rewrite of the function.
With the above commit also comes another undesirable change:
both pat_ref_delete_by_id() and pat_ref_set_by_id() directly use the
<refelt> argument as a valid pointer (they do dereference it).
This is wrong, because <refelt> is unsafe and should be handled as an
ID, not a pointer (hence the function name). Indeed, the calling function
may directly pass user input from the CLI as <refelt> argument, so we must
first ensure that it points to a valid element before using it, else it is
probably invalid and we shouldn't touch it.
What this patch essentially does, is that it reverts pat_ref_set_by_id()
and pat_ref_delete_by_id() to pre 5fea59754b behavior. This seems like
it was the only optimization from the patch that doesn't apply.
Hopefully, after reviewing the changes with Fred, it seems that the 2
functions are only being involved in commands for manipulating maps or
acls on the cli, so the "missed" opportunity to improve their performance
shouldn't matter much. Nonetheless, if we wanted to speed up the reference
lookup by ID, we could consider adding an eb64 tree for that specific
purpose that contains all pattern references IDs (ie: pointers) so that
eb lookup functions may be used instead of linear list search.
The issue was raised by Marko Juraga as he failed to perform an an acl
removal by reference on the CLI on 2.9 which was known to work properly
on other versions.
It should be backported on 2.9.
Co-Authored-by: Frédéric Lécaille <flecaille@haproxy.com>
2023-12-08 05:46:15 -05:00
|
|
|
struct pat_ref_elt *elt, *safe;
|
2023-08-22 12:32:13 -04:00
|
|
|
|
BUG/MEDIUM: map/acl: pat_ref_{set,delete}_by_id regressions
Some regressions were introduced by 5fea59754b ("MEDIUM: map/acl:
Accelerate several functions using pat_ref_elt struct ->head list")
pat_ref_delete_by_id() fails to properly unlink and free the removed
reference because it bypasses the pat_ref_delete_by_ptr() made for
that purpose. This function is normally used everywhere the target
reference is set for removal, such as the pat_ref_delete() function
that matches pattern against a string. The call was probably skipped
by accident during the rewrite of the function.
With the above commit also comes another undesirable change:
both pat_ref_delete_by_id() and pat_ref_set_by_id() directly use the
<refelt> argument as a valid pointer (they do dereference it).
This is wrong, because <refelt> is unsafe and should be handled as an
ID, not a pointer (hence the function name). Indeed, the calling function
may directly pass user input from the CLI as <refelt> argument, so we must
first ensure that it points to a valid element before using it, else it is
probably invalid and we shouldn't touch it.
What this patch essentially does, is that it reverts pat_ref_set_by_id()
and pat_ref_delete_by_id() to pre 5fea59754b behavior. This seems like
it was the only optimization from the patch that doesn't apply.
Hopefully, after reviewing the changes with Fred, it seems that the 2
functions are only being involved in commands for manipulating maps or
acls on the cli, so the "missed" opportunity to improve their performance
shouldn't matter much. Nonetheless, if we wanted to speed up the reference
lookup by ID, we could consider adding an eb64 tree for that specific
purpose that contains all pattern references IDs (ie: pointers) so that
eb lookup functions may be used instead of linear list search.
The issue was raised by Marko Juraga as he failed to perform an an acl
removal by reference on the CLI on 2.9 which was known to work properly
on other versions.
It should be backported on 2.9.
Co-Authored-by: Frédéric Lécaille <flecaille@haproxy.com>
2023-12-08 05:46:15 -05:00
|
|
|
/* delete pattern from reference */
|
|
|
|
|
list_for_each_entry_safe(elt, safe, &ref->head, list) {
|
|
|
|
|
if (elt == refelt) {
|
|
|
|
|
pat_ref_delete_by_ptr(ref, elt);
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return 0;
|
2014-01-28 10:43:36 -05:00
|
|
|
}
|
|
|
|
|
|
2020-11-02 11:30:17 -05:00
|
|
|
/* This function removes all patterns matching <key> from the reference
|
2018-11-15 13:22:31 -05:00
|
|
|
* and from each expr member of the reference. This function returns 1
|
2020-11-02 11:30:17 -05:00
|
|
|
* if the deletion is done and returns 0 is the entry is not found.
|
2014-02-11 05:31:40 -05:00
|
|
|
*/
|
|
|
|
|
int pat_ref_delete(struct pat_ref *ref, const char *key)
|
|
|
|
|
{
|
2023-11-26 05:56:08 -05:00
|
|
|
struct ebmb_node *node;
|
2014-02-11 05:31:40 -05:00
|
|
|
int found = 0;
|
|
|
|
|
|
|
|
|
|
/* delete pattern from reference */
|
2023-11-26 05:56:08 -05:00
|
|
|
node = ebst_lookup(&ref->ebmb_root, key);
|
2023-08-22 12:32:13 -04:00
|
|
|
while (node) {
|
|
|
|
|
struct pat_ref_elt *elt;
|
|
|
|
|
|
2023-11-26 05:56:08 -05:00
|
|
|
elt = ebmb_entry(node, struct pat_ref_elt, node);
|
|
|
|
|
node = ebmb_next_dup(node);
|
2023-08-22 12:32:13 -04:00
|
|
|
pat_ref_delete_by_ptr(ref, elt);
|
|
|
|
|
found = 1;
|
2014-02-11 05:31:40 -05:00
|
|
|
}
|
|
|
|
|
|
2020-11-02 11:30:17 -05:00
|
|
|
return found;
|
2014-02-11 05:31:40 -05:00
|
|
|
}
|
|
|
|
|
|
2014-04-25 10:57:03 -04:00
|
|
|
/*
|
|
|
|
|
* find and return an element <elt> matching <key> in a reference <ref>
|
|
|
|
|
* return NULL if not found
|
|
|
|
|
*/
|
|
|
|
|
struct pat_ref_elt *pat_ref_find_elt(struct pat_ref *ref, const char *key)
|
|
|
|
|
{
|
2023-11-26 05:56:08 -05:00
|
|
|
struct ebmb_node *node;
|
2014-04-25 10:57:03 -04:00
|
|
|
|
2023-11-26 05:56:08 -05:00
|
|
|
node = ebst_lookup(&ref->ebmb_root, key);
|
2023-08-22 10:52:47 -04:00
|
|
|
if (node)
|
2023-11-26 05:56:08 -05:00
|
|
|
return ebmb_entry(node, struct pat_ref_elt, node);
|
2014-04-25 10:57:03 -04:00
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2020-10-30 11:03:50 -04:00
|
|
|
/* This function modifies the sample of pat_ref_elt <elt> in all expressions
|
|
|
|
|
* found under <ref> to become <value>. It is assumed that the caller has
|
|
|
|
|
* already verified that <elt> belongs to <ref>.
|
|
|
|
|
*/
|
2014-01-29 10:24:55 -05:00
|
|
|
static inline int pat_ref_set_elt(struct pat_ref *ref, struct pat_ref_elt *elt,
|
2014-01-29 13:08:49 -05:00
|
|
|
const char *value, char **err)
|
2014-02-11 05:31:40 -05:00
|
|
|
{
|
|
|
|
|
struct pattern_expr *expr;
|
2015-08-19 02:35:43 -04:00
|
|
|
struct sample_data **data;
|
2014-02-11 05:31:40 -05:00
|
|
|
char *sample;
|
2015-08-13 18:02:11 -04:00
|
|
|
struct sample_data test;
|
2023-08-23 09:58:26 -04:00
|
|
|
struct pattern_tree *tree;
|
|
|
|
|
struct pattern_list *pat;
|
|
|
|
|
void **node;
|
|
|
|
|
|
2014-01-29 13:35:06 -05:00
|
|
|
|
|
|
|
|
/* Try all needed converters. */
|
|
|
|
|
list_for_each_entry(expr, &ref->pat, list) {
|
|
|
|
|
if (!expr->pat_head->parse_smp)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
if (!expr->pat_head->parse_smp(value, &test)) {
|
|
|
|
|
memprintf(err, "unable to parse '%s'", value);
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
}
|
2014-02-11 05:31:40 -05:00
|
|
|
|
2014-01-29 10:24:55 -05:00
|
|
|
/* Modify pattern from reference. */
|
|
|
|
|
sample = strdup(value);
|
2014-01-29 13:08:49 -05:00
|
|
|
if (!sample) {
|
|
|
|
|
memprintf(err, "out of memory error");
|
2014-02-11 05:31:40 -05:00
|
|
|
return 0;
|
2014-01-29 13:08:49 -05:00
|
|
|
}
|
2020-10-30 11:03:50 -04:00
|
|
|
/* Load sample in each reference. All the conversions are tested
|
|
|
|
|
* below, normally these calls don't fail.
|
2014-01-29 13:35:06 -05:00
|
|
|
*/
|
2023-08-23 09:58:26 -04:00
|
|
|
for (node = elt->tree_head; node;) {
|
|
|
|
|
tree = container_of(node, struct pattern_tree, from_ref);
|
|
|
|
|
node = *node;
|
|
|
|
|
BUG_ON(tree->ref != elt);
|
|
|
|
|
expr = tree->expr;
|
2014-01-29 10:24:55 -05:00
|
|
|
if (!expr->pat_head->parse_smp)
|
|
|
|
|
continue;
|
|
|
|
|
|
2023-08-23 09:58:26 -04:00
|
|
|
data = &tree->data;
|
|
|
|
|
if (data && *data) {
|
|
|
|
|
HA_RWLOCK_WRLOCK(PATEXP_LOCK, &expr->lock);
|
|
|
|
|
if (!expr->pat_head->parse_smp(sample, *data))
|
|
|
|
|
*data = NULL;
|
|
|
|
|
HA_RWLOCK_WRUNLOCK(PATEXP_LOCK, &expr->lock);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (node = elt->list_head; node;) {
|
|
|
|
|
pat = container_of(node, struct pattern_list, from_ref);
|
|
|
|
|
node = *node;
|
|
|
|
|
BUG_ON(pat->pat.ref != elt);
|
|
|
|
|
expr = pat->expr;
|
|
|
|
|
if (!expr->pat_head->parse_smp)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
data = &pat->pat.data;
|
|
|
|
|
if (data && *data) {
|
|
|
|
|
HA_RWLOCK_WRLOCK(PATEXP_LOCK, &expr->lock);
|
|
|
|
|
if (!expr->pat_head->parse_smp(sample, *data))
|
|
|
|
|
*data = NULL;
|
|
|
|
|
HA_RWLOCK_WRUNLOCK(PATEXP_LOCK, &expr->lock);
|
|
|
|
|
}
|
2014-02-11 05:31:40 -05:00
|
|
|
}
|
|
|
|
|
|
2017-07-03 05:34:05 -04:00
|
|
|
/* free old sample only when all exprs are updated */
|
|
|
|
|
free(elt->sample);
|
|
|
|
|
elt->sample = sample;
|
|
|
|
|
|
|
|
|
|
|
2014-01-29 13:35:06 -05:00
|
|
|
return 1;
|
2014-01-29 10:24:55 -05:00
|
|
|
}
|
|
|
|
|
|
2020-10-30 11:03:50 -04:00
|
|
|
/* This function modifies the sample of pat_ref_elt <refelt> in all expressions
|
|
|
|
|
* found under <ref> to become <value>, after checking that <refelt> really
|
|
|
|
|
* belongs to <ref>.
|
BUG/MEDIUM: map/acl: pat_ref_{set,delete}_by_id regressions
Some regressions were introduced by 5fea59754b ("MEDIUM: map/acl:
Accelerate several functions using pat_ref_elt struct ->head list")
pat_ref_delete_by_id() fails to properly unlink and free the removed
reference because it bypasses the pat_ref_delete_by_ptr() made for
that purpose. This function is normally used everywhere the target
reference is set for removal, such as the pat_ref_delete() function
that matches pattern against a string. The call was probably skipped
by accident during the rewrite of the function.
With the above commit also comes another undesirable change:
both pat_ref_delete_by_id() and pat_ref_set_by_id() directly use the
<refelt> argument as a valid pointer (they do dereference it).
This is wrong, because <refelt> is unsafe and should be handled as an
ID, not a pointer (hence the function name). Indeed, the calling function
may directly pass user input from the CLI as <refelt> argument, so we must
first ensure that it points to a valid element before using it, else it is
probably invalid and we shouldn't touch it.
What this patch essentially does, is that it reverts pat_ref_set_by_id()
and pat_ref_delete_by_id() to pre 5fea59754b behavior. This seems like
it was the only optimization from the patch that doesn't apply.
Hopefully, after reviewing the changes with Fred, it seems that the 2
functions are only being involved in commands for manipulating maps or
acls on the cli, so the "missed" opportunity to improve their performance
shouldn't matter much. Nonetheless, if we wanted to speed up the reference
lookup by ID, we could consider adding an eb64 tree for that specific
purpose that contains all pattern references IDs (ie: pointers) so that
eb lookup functions may be used instead of linear list search.
The issue was raised by Marko Juraga as he failed to perform an an acl
removal by reference on the CLI on 2.9 which was known to work properly
on other versions.
It should be backported on 2.9.
Co-Authored-by: Frédéric Lécaille <flecaille@haproxy.com>
2023-12-08 05:46:15 -05:00
|
|
|
*
|
|
|
|
|
* <refelt> is user input: it is provided as an ID and should never be
|
|
|
|
|
* dereferenced without making sure that it is valid.
|
2020-10-30 11:03:50 -04:00
|
|
|
*/
|
2014-01-29 13:08:49 -05:00
|
|
|
int pat_ref_set_by_id(struct pat_ref *ref, struct pat_ref_elt *refelt, const char *value, char **err)
|
2014-01-29 10:24:55 -05:00
|
|
|
{
|
BUG/MEDIUM: map/acl: pat_ref_{set,delete}_by_id regressions
Some regressions were introduced by 5fea59754b ("MEDIUM: map/acl:
Accelerate several functions using pat_ref_elt struct ->head list")
pat_ref_delete_by_id() fails to properly unlink and free the removed
reference because it bypasses the pat_ref_delete_by_ptr() made for
that purpose. This function is normally used everywhere the target
reference is set for removal, such as the pat_ref_delete() function
that matches pattern against a string. The call was probably skipped
by accident during the rewrite of the function.
With the above commit also comes another undesirable change:
both pat_ref_delete_by_id() and pat_ref_set_by_id() directly use the
<refelt> argument as a valid pointer (they do dereference it).
This is wrong, because <refelt> is unsafe and should be handled as an
ID, not a pointer (hence the function name). Indeed, the calling function
may directly pass user input from the CLI as <refelt> argument, so we must
first ensure that it points to a valid element before using it, else it is
probably invalid and we shouldn't touch it.
What this patch essentially does, is that it reverts pat_ref_set_by_id()
and pat_ref_delete_by_id() to pre 5fea59754b behavior. This seems like
it was the only optimization from the patch that doesn't apply.
Hopefully, after reviewing the changes with Fred, it seems that the 2
functions are only being involved in commands for manipulating maps or
acls on the cli, so the "missed" opportunity to improve their performance
shouldn't matter much. Nonetheless, if we wanted to speed up the reference
lookup by ID, we could consider adding an eb64 tree for that specific
purpose that contains all pattern references IDs (ie: pointers) so that
eb lookup functions may be used instead of linear list search.
The issue was raised by Marko Juraga as he failed to perform an an acl
removal by reference on the CLI on 2.9 which was known to work properly
on other versions.
It should be backported on 2.9.
Co-Authored-by: Frédéric Lécaille <flecaille@haproxy.com>
2023-12-08 05:46:15 -05:00
|
|
|
struct pat_ref_elt *elt;
|
|
|
|
|
|
|
|
|
|
/* Look for pattern in the reference. */
|
|
|
|
|
list_for_each_entry(elt, &ref->head, list) {
|
|
|
|
|
if (elt == refelt) {
|
|
|
|
|
if (!pat_ref_set_elt(ref, elt, value, err))
|
|
|
|
|
return 0;
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
2014-01-29 10:24:55 -05:00
|
|
|
}
|
2014-01-29 13:08:49 -05:00
|
|
|
|
|
|
|
|
memprintf(err, "key or pattern not found");
|
2014-01-29 10:24:55 -05:00
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2020-10-30 11:03:50 -04:00
|
|
|
/* This function modifies to <value> the sample of all patterns matching <key>
|
|
|
|
|
* under <ref>.
|
|
|
|
|
*/
|
2023-08-22 10:52:47 -04:00
|
|
|
int pat_ref_set(struct pat_ref *ref, const char *key, const char *value, char **err, struct pat_ref_elt *elt)
|
2014-01-29 10:24:55 -05:00
|
|
|
{
|
2014-01-29 13:08:49 -05:00
|
|
|
int found = 0;
|
2023-11-26 05:56:08 -05:00
|
|
|
struct ebmb_node *node;
|
2014-01-29 13:08:49 -05:00
|
|
|
|
2023-08-22 10:52:47 -04:00
|
|
|
if (elt) {
|
|
|
|
|
node = &elt->node;
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
/* Look for pattern in the reference. */
|
2023-11-26 05:56:08 -05:00
|
|
|
node = ebst_lookup(&ref->ebmb_root, key);
|
2023-08-22 10:52:47 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
while (node) {
|
2024-08-12 09:32:00 -04:00
|
|
|
char *tmp_err = NULL;
|
|
|
|
|
|
2023-11-26 05:56:08 -05:00
|
|
|
elt = ebmb_entry(node, struct pat_ref_elt, node);
|
|
|
|
|
node = ebmb_next_dup(node);
|
2024-08-12 09:32:00 -04:00
|
|
|
if (!pat_ref_set_elt(ref, elt, value, &tmp_err)) {
|
2024-09-10 02:55:29 -04:00
|
|
|
if (err)
|
|
|
|
|
*err = tmp_err;
|
|
|
|
|
else
|
|
|
|
|
ha_free(&tmp_err);
|
2024-08-12 13:21:00 -04:00
|
|
|
return 0;
|
2014-01-29 10:24:55 -05:00
|
|
|
}
|
2023-08-22 10:52:47 -04:00
|
|
|
found = 1;
|
2014-01-29 10:24:55 -05:00
|
|
|
}
|
2014-01-29 13:08:49 -05:00
|
|
|
|
|
|
|
|
if (!found) {
|
|
|
|
|
memprintf(err, "entry not found");
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
return 1;
|
2014-02-11 05:31:40 -05:00
|
|
|
}
|
|
|
|
|
|
2018-11-15 13:22:31 -05:00
|
|
|
/* This function creates a new reference. <ref> is the reference name.
|
2014-02-11 05:31:40 -05:00
|
|
|
* <flags> are PAT_REF_*. /!\ The reference is not checked, and must
|
|
|
|
|
* be unique. The user must check the reference with "pat_ref_lookup()"
|
2020-10-30 11:03:50 -04:00
|
|
|
* before calling this function. If the function fails, it returns NULL,
|
|
|
|
|
* otherwise it returns the new struct pat_ref.
|
2014-02-11 05:31:40 -05:00
|
|
|
*/
|
2014-02-10 21:31:34 -05:00
|
|
|
struct pat_ref *pat_ref_new(const char *reference, const char *display, unsigned int flags)
|
2014-02-11 05:31:40 -05:00
|
|
|
{
|
|
|
|
|
struct pat_ref *ref;
|
|
|
|
|
|
2020-10-30 10:35:11 -04:00
|
|
|
ref = calloc(1, sizeof(*ref));
|
2014-02-11 05:31:40 -05:00
|
|
|
if (!ref)
|
|
|
|
|
return NULL;
|
|
|
|
|
|
2014-02-10 21:31:34 -05:00
|
|
|
if (display) {
|
|
|
|
|
ref->display = strdup(display);
|
|
|
|
|
if (!ref->display) {
|
|
|
|
|
free(ref);
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
MEDIUM: pattern: Add support for virtual and optional files for patterns
Before this patch, it was not possible to use a list of patterns, map or a
list of acls, without an existing file. However, it could be handy to just
use an ID, with no file on the disk. It is pretty useful for everyone
managing dynamically these lists. It could also be handy to try to load a
list from a file if it exists without failing if not. This way, it could be
possible to make a cold start without any file (instead of empty file),
dynamically add and del patterns, dump the list to the file periodically to
reuse it on reload (via an external process).
In this patch, we uses some prefixes to be able to use virtual or optional
files.
The default case remains unchanged. regular files are used. A filename, with
no prefix, is used as reference, and it must exist on the disk. With the
prefix "file@", the same is performed. Internally this prefix is
skipped. Thus the same file, with ou without "file@" prefix, references the
same list of patterns.
To use a virtual map, "virt@" prefix must be used. No file is read, even if
the following name looks like a file. It is just an ID. The prefix is part
of ID and must always be used.
To use a optional file, ie a file that may or may not exist on a disk at
startup, "opt@" prefix must be used. If the file exists, its content is
loaded. But HAProxy doesn't complain if not. The prefix is not part of
ID. For a given file, optional files and regular files reference the same
list of patterns.
This patch should fix the issue #2202.
2023-12-01 06:04:35 -05:00
|
|
|
|
|
|
|
|
if (strlen(reference) > 5 && strncmp(reference, "virt@", 5) == 0)
|
|
|
|
|
flags |= PAT_REF_ID;
|
|
|
|
|
else if (strlen(reference) > 4 && strncmp(reference, "opt@", 4) == 0) {
|
|
|
|
|
flags |= (PAT_REF_ID|PAT_REF_FILE); // Will be decided later
|
|
|
|
|
reference += 4;
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
/* A file by default */
|
|
|
|
|
flags |= PAT_REF_FILE;
|
|
|
|
|
/* Skip file@ prefix to be mixed with ref omitting the prefix */
|
|
|
|
|
if (strlen(reference) > 5 && strncmp(reference, "file@", 5) == 0)
|
|
|
|
|
reference += 5;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2014-02-11 05:31:40 -05:00
|
|
|
ref->reference = strdup(reference);
|
|
|
|
|
if (!ref->reference) {
|
2014-02-10 21:31:34 -05:00
|
|
|
free(ref->display);
|
2014-02-11 05:31:40 -05:00
|
|
|
free(ref);
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ref->flags = flags;
|
2014-03-11 09:29:22 -04:00
|
|
|
ref->unique_id = -1;
|
2020-11-02 09:26:51 -05:00
|
|
|
ref->revision = 0;
|
2021-05-21 10:59:15 -04:00
|
|
|
ref->entry_cnt = 0;
|
2014-03-11 09:29:22 -04:00
|
|
|
|
|
|
|
|
LIST_INIT(&ref->head);
|
2023-11-26 05:56:08 -05:00
|
|
|
ref->ebmb_root = EB_ROOT;
|
2014-03-11 09:29:22 -04:00
|
|
|
LIST_INIT(&ref->pat);
|
2023-08-21 12:44:24 -04:00
|
|
|
HA_RWLOCK_INIT(&ref->lock);
|
2021-04-21 01:32:39 -04:00
|
|
|
LIST_APPEND(&pattern_reference, &ref->list);
|
2014-03-11 09:29:22 -04:00
|
|
|
|
|
|
|
|
return ref;
|
|
|
|
|
}
|
|
|
|
|
|
2020-10-30 11:03:50 -04:00
|
|
|
/* This function creates a new reference. <unique_id> is the unique id. If
|
2014-03-11 09:29:22 -04:00
|
|
|
* the value of <unique_id> is -1, the unique id is calculated later.
|
|
|
|
|
* <flags> are PAT_REF_*. /!\ The reference is not checked, and must
|
|
|
|
|
* be unique. The user must check the reference with "pat_ref_lookup()"
|
|
|
|
|
* or pat_ref_lookupid before calling this function. If the function
|
2020-10-30 11:03:50 -04:00
|
|
|
* fails, it returns NULL, otherwise it returns the new struct pat_ref.
|
2014-03-11 09:29:22 -04:00
|
|
|
*/
|
2014-02-10 21:31:34 -05:00
|
|
|
struct pat_ref *pat_ref_newid(int unique_id, const char *display, unsigned int flags)
|
2014-03-11 09:29:22 -04:00
|
|
|
{
|
|
|
|
|
struct pat_ref *ref;
|
|
|
|
|
|
2020-10-30 10:35:11 -04:00
|
|
|
ref = calloc(1, sizeof(*ref));
|
2014-03-11 09:29:22 -04:00
|
|
|
if (!ref)
|
|
|
|
|
return NULL;
|
|
|
|
|
|
2014-02-10 21:31:34 -05:00
|
|
|
if (display) {
|
|
|
|
|
ref->display = strdup(display);
|
|
|
|
|
if (!ref->display) {
|
|
|
|
|
free(ref);
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2014-03-11 09:29:22 -04:00
|
|
|
ref->reference = NULL;
|
|
|
|
|
ref->flags = flags;
|
2020-10-28 06:43:49 -04:00
|
|
|
ref->curr_gen = 0;
|
|
|
|
|
ref->next_gen = 0;
|
2014-03-11 09:29:22 -04:00
|
|
|
ref->unique_id = unique_id;
|
2014-02-11 05:31:40 -05:00
|
|
|
LIST_INIT(&ref->head);
|
2023-11-26 05:56:08 -05:00
|
|
|
ref->ebmb_root = EB_ROOT;
|
2014-02-11 05:31:40 -05:00
|
|
|
LIST_INIT(&ref->pat);
|
2023-08-21 12:44:24 -04:00
|
|
|
HA_RWLOCK_INIT(&ref->lock);
|
2021-04-21 01:32:39 -04:00
|
|
|
LIST_APPEND(&pattern_reference, &ref->list);
|
2014-02-11 05:31:40 -05:00
|
|
|
|
|
|
|
|
return ref;
|
|
|
|
|
}
|
|
|
|
|
|
2020-10-28 05:52:46 -04:00
|
|
|
/* This function adds entry to <ref>. It can fail on memory error. It returns
|
|
|
|
|
* the newly added element on success, or NULL on failure. The PATREF_LOCK on
|
2020-10-28 06:43:49 -04:00
|
|
|
* <ref> must be held. It sets the newly created pattern's generation number
|
|
|
|
|
* to the same value as the reference's.
|
2014-02-11 05:31:40 -05:00
|
|
|
*/
|
2020-10-28 05:52:46 -04:00
|
|
|
struct pat_ref_elt *pat_ref_append(struct pat_ref *ref, const char *pattern, const char *sample, int line)
|
2014-02-11 05:31:40 -05:00
|
|
|
{
|
|
|
|
|
struct pat_ref_elt *elt;
|
2023-11-26 05:56:08 -05:00
|
|
|
int len = strlen(pattern);
|
2014-02-11 05:31:40 -05:00
|
|
|
|
2023-11-26 05:56:08 -05:00
|
|
|
elt = calloc(1, sizeof(*elt) + len + 1);
|
2014-02-11 05:31:40 -05:00
|
|
|
if (!elt)
|
2020-10-28 05:52:46 -04:00
|
|
|
goto fail;
|
2014-02-11 05:31:40 -05:00
|
|
|
|
2020-10-28 06:43:49 -04:00
|
|
|
elt->gen_id = ref->curr_gen;
|
2014-02-11 05:31:40 -05:00
|
|
|
elt->line = line;
|
|
|
|
|
|
2023-11-26 05:56:08 -05:00
|
|
|
memcpy((char*)elt->pattern, pattern, len + 1);
|
2014-02-11 05:31:40 -05:00
|
|
|
|
|
|
|
|
if (sample) {
|
|
|
|
|
elt->sample = strdup(sample);
|
2020-10-28 05:52:46 -04:00
|
|
|
if (!elt->sample)
|
|
|
|
|
goto fail;
|
2014-02-11 05:31:40 -05:00
|
|
|
}
|
|
|
|
|
|
2017-06-29 09:40:33 -04:00
|
|
|
LIST_INIT(&elt->back_refs);
|
2020-11-03 08:50:29 -05:00
|
|
|
elt->list_head = NULL;
|
|
|
|
|
elt->tree_head = NULL;
|
2021-04-21 01:32:39 -04:00
|
|
|
LIST_APPEND(&ref->head, &elt->list);
|
2023-08-22 12:32:13 -04:00
|
|
|
/* Even if calloc()'ed, ensure this node is not linked to a tree. */
|
|
|
|
|
elt->node.node.leaf_p = NULL;
|
2023-11-26 05:56:08 -05:00
|
|
|
ebst_insert(&ref->ebmb_root, &elt->node);
|
2020-10-28 05:52:46 -04:00
|
|
|
return elt;
|
|
|
|
|
fail:
|
|
|
|
|
free(elt);
|
|
|
|
|
return NULL;
|
2014-02-11 05:31:40 -05:00
|
|
|
}
|
|
|
|
|
|
2020-10-30 11:03:50 -04:00
|
|
|
/* This function creates sample found in <elt>, parses the pattern also
|
|
|
|
|
* found in <elt> and inserts it in <expr>. The function copies <patflags>
|
|
|
|
|
* into <expr>. If the function fails, it returns 0 and <err> is filled.
|
2020-06-21 12:42:57 -04:00
|
|
|
* In success case, the function returns 1.
|
2014-02-11 05:31:40 -05:00
|
|
|
*/
|
|
|
|
|
int pat_ref_push(struct pat_ref_elt *elt, struct pattern_expr *expr,
|
|
|
|
|
int patflags, char **err)
|
|
|
|
|
{
|
2015-08-19 02:35:43 -04:00
|
|
|
struct sample_data *data;
|
2014-01-28 09:34:35 -05:00
|
|
|
struct pattern pattern;
|
2014-02-11 05:31:40 -05:00
|
|
|
|
|
|
|
|
/* Create sample */
|
|
|
|
|
if (elt->sample && expr->pat_head->parse_smp) {
|
|
|
|
|
/* New sample. */
|
2015-08-19 02:35:43 -04:00
|
|
|
data = malloc(sizeof(*data));
|
|
|
|
|
if (!data)
|
2014-02-11 05:31:40 -05:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
/* Parse value. */
|
2015-08-19 02:35:43 -04:00
|
|
|
if (!expr->pat_head->parse_smp(elt->sample, data)) {
|
2014-02-11 05:31:40 -05:00
|
|
|
memprintf(err, "unable to parse '%s'", elt->sample);
|
2015-08-19 02:35:43 -04:00
|
|
|
free(data);
|
2014-02-11 05:31:40 -05:00
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
else
|
2015-08-19 02:35:43 -04:00
|
|
|
data = NULL;
|
2014-02-11 05:31:40 -05:00
|
|
|
|
2014-01-28 09:34:35 -05:00
|
|
|
/* initialise pattern */
|
|
|
|
|
memset(&pattern, 0, sizeof(pattern));
|
2015-08-19 02:35:43 -04:00
|
|
|
pattern.data = data;
|
2014-01-28 09:54:36 -05:00
|
|
|
pattern.ref = elt;
|
2014-01-28 09:34:35 -05:00
|
|
|
|
|
|
|
|
/* parse pattern */
|
2014-04-28 05:18:57 -04:00
|
|
|
if (!expr->pat_head->parse(elt->pattern, &pattern, expr->mflags, err)) {
|
2015-08-19 02:35:43 -04:00
|
|
|
free(data);
|
2014-01-28 09:34:35 -05:00
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2017-11-07 04:42:54 -05:00
|
|
|
HA_RWLOCK_WRLOCK(PATEXP_LOCK, &expr->lock);
|
2014-01-28 09:34:35 -05:00
|
|
|
/* index pattern */
|
|
|
|
|
if (!expr->pat_head->index(expr, &pattern, err)) {
|
2017-11-07 04:42:54 -05:00
|
|
|
HA_RWLOCK_WRUNLOCK(PATEXP_LOCK, &expr->lock);
|
2015-08-19 02:35:43 -04:00
|
|
|
free(data);
|
2014-02-11 05:31:40 -05:00
|
|
|
return 0;
|
|
|
|
|
}
|
2017-11-07 04:42:54 -05:00
|
|
|
HA_RWLOCK_WRUNLOCK(PATEXP_LOCK, &expr->lock);
|
2014-02-11 05:31:40 -05:00
|
|
|
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
2020-10-28 13:45:45 -04:00
|
|
|
/* This function tries to commit entry <elt> into <ref>. The new entry must
|
|
|
|
|
* have already been inserted using pat_ref_append(), and its generation number
|
|
|
|
|
* may have been adjusted as it will not be changed. <err> must point to a NULL
|
|
|
|
|
* pointer. The PATREF lock on <ref> must be held. All the pattern_expr for
|
|
|
|
|
* this reference will be updated (parsing, indexing). On success, non-zero is
|
|
|
|
|
* returned. On failure, all the operation is rolled back (the element is
|
|
|
|
|
* deleted from all expressions and is freed), zero is returned and the error
|
|
|
|
|
* pointer <err> may have been updated (and the caller must free it). Failure
|
|
|
|
|
* causes include memory allocation, parsing error or indexing error.
|
|
|
|
|
*/
|
2021-01-15 08:11:59 -05:00
|
|
|
int pat_ref_commit_elt(struct pat_ref *ref, struct pat_ref_elt *elt, char **err)
|
2020-10-28 13:45:45 -04:00
|
|
|
{
|
|
|
|
|
struct pattern_expr *expr;
|
|
|
|
|
|
|
|
|
|
list_for_each_entry(expr, &ref->pat, list) {
|
|
|
|
|
if (!pat_ref_push(elt, expr, 0, err)) {
|
|
|
|
|
pat_ref_delete_by_ptr(ref, elt);
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
2020-10-29 04:21:43 -04:00
|
|
|
/* Loads <pattern>:<sample> into <ref> for generation <gen>. <sample> may be
|
|
|
|
|
* NULL if none exists (e.g. ACL). If not needed, the generation number should
|
|
|
|
|
* be set to ref->curr_gen. The error pointer must initially point to NULL. The
|
|
|
|
|
* new entry will be propagated to all use places, involving allocation, parsing
|
|
|
|
|
* and indexing. On error (parsing, allocation), the operation will be rolled
|
|
|
|
|
* back, an error may be reported, and NULL will be reported. On success, the
|
|
|
|
|
* freshly allocated element will be returned. The PATREF lock on <ref> must be
|
|
|
|
|
* held during the operation.
|
|
|
|
|
*/
|
|
|
|
|
struct pat_ref_elt *pat_ref_load(struct pat_ref *ref, unsigned int gen,
|
|
|
|
|
const char *pattern, const char *sample,
|
|
|
|
|
int line, char **err)
|
|
|
|
|
{
|
|
|
|
|
struct pat_ref_elt *elt;
|
|
|
|
|
|
|
|
|
|
elt = pat_ref_append(ref, pattern, sample, line);
|
|
|
|
|
if (elt) {
|
|
|
|
|
elt->gen_id = gen;
|
2021-01-15 08:11:59 -05:00
|
|
|
if (!pat_ref_commit_elt(ref, elt, err))
|
2020-10-29 04:21:43 -04:00
|
|
|
elt = NULL;
|
|
|
|
|
} else
|
|
|
|
|
memprintf(err, "out of memory error");
|
|
|
|
|
|
|
|
|
|
return elt;
|
|
|
|
|
}
|
|
|
|
|
|
2020-10-28 05:58:05 -04:00
|
|
|
/* This function adds entry to <ref>. It can fail on memory error. The new
|
2014-01-29 18:27:15 -05:00
|
|
|
* entry is added at all the pattern_expr registered in this reference. The
|
2020-10-28 05:58:05 -04:00
|
|
|
* function stops on the first error encountered. It returns 0 and <err> is
|
2014-01-29 18:27:15 -05:00
|
|
|
* filled. If an error is encountered, the complete add operation is cancelled.
|
|
|
|
|
* If the insertion is a success the function returns 1.
|
2014-02-11 05:31:40 -05:00
|
|
|
*/
|
|
|
|
|
int pat_ref_add(struct pat_ref *ref,
|
|
|
|
|
const char *pattern, const char *sample,
|
2014-04-28 05:18:57 -04:00
|
|
|
char **err)
|
2014-02-11 05:31:40 -05:00
|
|
|
{
|
2020-10-29 04:21:43 -04:00
|
|
|
return !!pat_ref_load(ref, ref->curr_gen, pattern, sample, -1, err);
|
2014-02-11 05:31:40 -05:00
|
|
|
}
|
|
|
|
|
|
2021-04-30 07:19:37 -04:00
|
|
|
/* This function purges all elements from <ref> whose generation is included in
|
|
|
|
|
* the range of <from> to <to> (inclusive), taking wrapping into consideration.
|
|
|
|
|
* It will not purge more than <budget> entries at once, in order to remain
|
|
|
|
|
* responsive. If budget is negative, no limit is applied.
|
2020-10-28 13:23:49 -04:00
|
|
|
* The caller must already hold the PATREF_LOCK on <ref>. The function will
|
|
|
|
|
* take the PATEXP_LOCK on all expressions of the pattern as needed. It returns
|
|
|
|
|
* non-zero on completion, or zero if it had to stop before the end after
|
|
|
|
|
* <budget> was depleted.
|
|
|
|
|
*/
|
2021-04-30 07:19:37 -04:00
|
|
|
int pat_ref_purge_range(struct pat_ref *ref, uint from, uint to, int budget)
|
2020-10-28 13:23:49 -04:00
|
|
|
{
|
|
|
|
|
struct pat_ref_elt *elt, *elt_bck;
|
|
|
|
|
struct bref *bref, *bref_bck;
|
|
|
|
|
struct pattern_expr *expr;
|
|
|
|
|
int done;
|
|
|
|
|
|
|
|
|
|
list_for_each_entry(expr, &ref->pat, list)
|
|
|
|
|
HA_RWLOCK_WRLOCK(PATEXP_LOCK, &expr->lock);
|
|
|
|
|
|
|
|
|
|
/* all expr are locked, we can safely remove all pat_ref */
|
|
|
|
|
|
|
|
|
|
/* assume completion for e.g. empty lists */
|
|
|
|
|
done = 1;
|
|
|
|
|
list_for_each_entry_safe(elt, elt_bck, &ref->head, list) {
|
2021-04-30 07:19:37 -04:00
|
|
|
if (elt->gen_id - from > to - from)
|
2020-10-28 13:23:49 -04:00
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
if (budget >= 0 && !budget--) {
|
|
|
|
|
done = 0;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* we have to unlink all watchers from this reference pattern. We must
|
|
|
|
|
* not relink them if this elt was the last one in the list.
|
|
|
|
|
*/
|
|
|
|
|
list_for_each_entry_safe(bref, bref_bck, &elt->back_refs, users) {
|
2021-04-21 01:32:39 -04:00
|
|
|
LIST_DELETE(&bref->users);
|
2020-10-28 13:23:49 -04:00
|
|
|
LIST_INIT(&bref->users);
|
|
|
|
|
if (elt->list.n != &ref->head)
|
2021-04-21 01:32:39 -04:00
|
|
|
LIST_APPEND(&LIST_ELEM(elt->list.n, typeof(elt), list)->back_refs, &bref->users);
|
2020-10-28 13:23:49 -04:00
|
|
|
bref->ref = elt->list.n;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* delete the storage for all representations of this pattern. */
|
|
|
|
|
pat_delete_gen(ref, elt);
|
|
|
|
|
|
2021-04-21 01:32:39 -04:00
|
|
|
LIST_DELETE(&elt->list);
|
2023-11-26 05:56:08 -05:00
|
|
|
ebmb_delete(&elt->node);
|
2020-10-28 13:23:49 -04:00
|
|
|
free(elt->sample);
|
|
|
|
|
free(elt);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
list_for_each_entry(expr, &ref->pat, list)
|
|
|
|
|
HA_RWLOCK_WRUNLOCK(PATEXP_LOCK, &expr->lock);
|
|
|
|
|
|
|
|
|
|
return done;
|
|
|
|
|
}
|
|
|
|
|
|
2020-11-03 04:37:31 -05:00
|
|
|
/* This function prunes all entries of <ref> and all their associated
|
|
|
|
|
* pattern_expr. It may return before the end of the list is reached,
|
|
|
|
|
* returning 0, to yield, indicating to the caller that it must call it again.
|
|
|
|
|
* until it returns non-zero. All patterns are purged, both current ones and
|
|
|
|
|
* future or incomplete ones. This is used by "clear map" or "clear acl".
|
2014-02-11 05:31:40 -05:00
|
|
|
*/
|
2019-12-20 12:22:02 -05:00
|
|
|
int pat_ref_prune(struct pat_ref *ref)
|
2014-02-11 05:31:40 -05:00
|
|
|
{
|
2021-04-30 07:19:37 -04:00
|
|
|
return pat_ref_purge_range(ref, 0, ~0, 100);
|
2014-02-11 05:31:40 -05:00
|
|
|
}
|
|
|
|
|
|
2020-10-30 11:03:50 -04:00
|
|
|
/* This function looks up any existing reference <ref> in pattern_head <head>, and
|
|
|
|
|
* returns the associated pattern_expr pointer if found, otherwise NULL.
|
|
|
|
|
*/
|
2014-02-11 05:31:40 -05:00
|
|
|
struct pattern_expr *pattern_lookup_expr(struct pattern_head *head, struct pat_ref *ref)
|
|
|
|
|
{
|
2014-01-20 08:29:33 -05:00
|
|
|
struct pattern_expr_list *expr;
|
2014-02-11 05:31:40 -05:00
|
|
|
|
2014-01-20 08:29:33 -05:00
|
|
|
list_for_each_entry(expr, &head->head, list)
|
|
|
|
|
if (expr->expr->ref == ref)
|
|
|
|
|
return expr->expr;
|
2014-02-11 05:31:40 -05:00
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2018-11-15 13:22:31 -05:00
|
|
|
/* This function creates new pattern_expr associated to the reference <ref>.
|
|
|
|
|
* <ref> can be NULL. If an error occurs, the function returns NULL and
|
2014-02-11 05:31:40 -05:00
|
|
|
* <err> is filled. Otherwise, the function returns new pattern_expr linked
|
|
|
|
|
* with <head> and <ref>.
|
2014-11-24 05:14:42 -05:00
|
|
|
*
|
2018-11-15 13:22:31 -05:00
|
|
|
* The returned value can be an already filled pattern list, in this case the
|
2014-11-24 05:14:42 -05:00
|
|
|
* flag <reuse> is set.
|
2014-02-11 05:31:40 -05:00
|
|
|
*/
|
2014-11-24 05:14:42 -05:00
|
|
|
struct pattern_expr *pattern_new_expr(struct pattern_head *head, struct pat_ref *ref,
|
2017-07-03 11:54:23 -04:00
|
|
|
int patflags, char **err, int *reuse)
|
2014-02-11 05:31:40 -05:00
|
|
|
{
|
|
|
|
|
struct pattern_expr *expr;
|
2014-01-20 08:29:33 -05:00
|
|
|
struct pattern_expr_list *list;
|
2014-02-11 05:31:40 -05:00
|
|
|
|
2014-11-24 05:14:42 -05:00
|
|
|
if (reuse)
|
|
|
|
|
*reuse = 0;
|
|
|
|
|
|
2014-01-20 08:29:33 -05:00
|
|
|
/* Memory and initialization of the chain element. */
|
2020-10-30 10:35:11 -04:00
|
|
|
list = calloc(1, sizeof(*list));
|
2014-01-20 08:29:33 -05:00
|
|
|
if (!list) {
|
2014-02-11 05:31:40 -05:00
|
|
|
memprintf(err, "out of memory");
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2014-01-20 08:29:33 -05:00
|
|
|
/* Look for existing similar expr. No that only the index, parse and
|
|
|
|
|
* parse_smp function must be identical for having similar pattern.
|
2018-11-15 13:22:31 -05:00
|
|
|
* The other function depends of these first.
|
2014-01-20 08:29:33 -05:00
|
|
|
*/
|
|
|
|
|
if (ref) {
|
|
|
|
|
list_for_each_entry(expr, &ref->pat, list)
|
|
|
|
|
if (expr->pat_head->index == head->index &&
|
|
|
|
|
expr->pat_head->parse == head->parse &&
|
2017-07-03 11:54:23 -04:00
|
|
|
expr->pat_head->parse_smp == head->parse_smp &&
|
|
|
|
|
expr->mflags == patflags)
|
2014-01-20 08:29:33 -05:00
|
|
|
break;
|
|
|
|
|
if (&expr->list == &ref->pat)
|
|
|
|
|
expr = NULL;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
expr = NULL;
|
|
|
|
|
|
|
|
|
|
/* If no similar expr was found, we create new expr. */
|
|
|
|
|
if (!expr) {
|
|
|
|
|
/* Get a lot of memory for the expr struct. */
|
2020-10-30 10:35:11 -04:00
|
|
|
expr = calloc(1, sizeof(*expr));
|
2014-01-20 08:29:33 -05:00
|
|
|
if (!expr) {
|
2016-03-03 14:20:23 -05:00
|
|
|
free(list);
|
2014-01-20 08:29:33 -05:00
|
|
|
memprintf(err, "out of memory");
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
2014-02-11 05:31:40 -05:00
|
|
|
|
2014-01-20 08:29:33 -05:00
|
|
|
/* Initialize this new expr. */
|
|
|
|
|
pattern_init_expr(expr);
|
2014-02-11 05:31:40 -05:00
|
|
|
|
2017-07-03 11:54:23 -04:00
|
|
|
/* Copy the pattern matching and indexing flags. */
|
|
|
|
|
expr->mflags = patflags;
|
|
|
|
|
|
2014-01-20 08:29:33 -05:00
|
|
|
/* This new pattern expression reference one of his heads. */
|
|
|
|
|
expr->pat_head = head;
|
2014-02-11 05:31:40 -05:00
|
|
|
|
2021-04-21 01:32:39 -04:00
|
|
|
/* Link with ref, or to self to facilitate LIST_DELETE() */
|
2014-01-20 08:29:33 -05:00
|
|
|
if (ref)
|
2021-04-21 01:32:39 -04:00
|
|
|
LIST_APPEND(&ref->pat, &expr->list);
|
2014-01-20 08:29:33 -05:00
|
|
|
else
|
|
|
|
|
LIST_INIT(&expr->list);
|
|
|
|
|
|
|
|
|
|
expr->ref = ref;
|
|
|
|
|
|
2017-11-07 04:42:54 -05:00
|
|
|
HA_RWLOCK_INIT(&expr->lock);
|
2014-01-20 08:29:33 -05:00
|
|
|
}
|
|
|
|
|
else {
|
2014-11-24 05:14:42 -05:00
|
|
|
if (reuse)
|
|
|
|
|
*reuse = 1;
|
2014-01-20 08:29:33 -05:00
|
|
|
}
|
|
|
|
|
|
BUG/MEDIUM: pattern: prevent UAF on reused pattern expr
Since c5959fd ("MEDIUM: pattern: merge same pattern"), UAF (leading to
crash) can be experienced if the same pattern file (and match method) is
used in two default sections and the first one is not referenced later in
the config. In this case, the first default section will be cleaned up.
However, due to an unhandled case in the above optimization, the original
expr which the second default section relies on is mistakenly freed.
This issue was discovered while trying to reproduce GH #2708. The issue
was particularly tricky to reproduce given the config and sequence
required to make the UAF happen. Hopefully, Github user @asmnek not only
provided useful informations, but since he was able to consistently
trigger the crash in his environment he was able to nail down the crash to
the use of pattern file involved with 2 named default sections. Big thanks
to him.
To fix the issue, let's push the logic from c5959fd a bit further. Instead
of relying on "do_free" variable to know if the expression should be freed
or not (which proved to be insufficient in our case), let's switch to a
simple refcounting logic. This way, no matter who owns the expression, the
last one attempting to free it will be responsible for freeing it.
Refcount is implemented using a 32bit value which fills a previous 4 bytes
structure gap:
int mflags; /* 80 4 */
/* XXX 4 bytes hole, try to pack */
long unsigned int lock; /* 88 8 */
(output from pahole)
Even though it was not reproduced in 2.6 or below by @asmnek (the bug was
revealed thanks to another bugfix), this issue theorically affects all
stable versions (up to c5959fd), thus it should be backported to all
stable versions.
2024-09-09 08:59:19 -04:00
|
|
|
HA_ATOMIC_INC(&expr->refcount);
|
|
|
|
|
|
2014-01-20 08:29:33 -05:00
|
|
|
/* The new list element reference the pattern_expr. */
|
|
|
|
|
list->expr = expr;
|
|
|
|
|
|
|
|
|
|
/* Link the list element with the pattern_head. */
|
2021-04-21 01:32:39 -04:00
|
|
|
LIST_APPEND(&head->head, &list->list);
|
2014-02-11 05:31:40 -05:00
|
|
|
return expr;
|
|
|
|
|
}
|
|
|
|
|
|
2014-01-29 07:29:45 -05:00
|
|
|
/* Reads patterns from a file. If <err_msg> is non-NULL, an error message will
|
|
|
|
|
* be returned there on errors and the caller will have to free it.
|
|
|
|
|
*
|
|
|
|
|
* The file contains one key + value per line. Lines which start with '#' are
|
|
|
|
|
* ignored, just like empty lines. Leading tabs/spaces are stripped. The key is
|
|
|
|
|
* then the first "word" (series of non-space/tabs characters), and the value is
|
|
|
|
|
* what follows this series of space/tab till the end of the line excluding
|
|
|
|
|
* trailing spaces/tabs.
|
|
|
|
|
*
|
|
|
|
|
* Example :
|
|
|
|
|
*
|
|
|
|
|
* # this is a comment and is ignored
|
|
|
|
|
* 62.212.114.60 1wt.eu \n
|
|
|
|
|
* <-><-----------><---><----><---->
|
|
|
|
|
* | | | | `--- trailing spaces ignored
|
|
|
|
|
* | | | `-------- value
|
|
|
|
|
* | | `--------------- middle spaces ignored
|
|
|
|
|
* | `------------------------ key
|
|
|
|
|
* `-------------------------------- leading spaces ignored
|
|
|
|
|
*
|
2020-06-21 12:42:57 -04:00
|
|
|
* Return non-zero in case of success, otherwise 0.
|
2014-01-29 07:29:45 -05:00
|
|
|
*/
|
2023-12-01 06:04:13 -05:00
|
|
|
int pat_ref_read_from_file_smp(struct pat_ref *ref, char **err)
|
2014-01-29 07:29:45 -05:00
|
|
|
{
|
|
|
|
|
FILE *file;
|
|
|
|
|
char *c;
|
|
|
|
|
int ret = 0;
|
|
|
|
|
int line = 0;
|
|
|
|
|
char *key_beg;
|
|
|
|
|
char *key_end;
|
|
|
|
|
char *value_beg;
|
|
|
|
|
char *value_end;
|
|
|
|
|
|
2023-12-01 06:04:13 -05:00
|
|
|
file = fopen(ref->reference, "r");
|
2014-01-29 07:29:45 -05:00
|
|
|
if (!file) {
|
MEDIUM: pattern: Add support for virtual and optional files for patterns
Before this patch, it was not possible to use a list of patterns, map or a
list of acls, without an existing file. However, it could be handy to just
use an ID, with no file on the disk. It is pretty useful for everyone
managing dynamically these lists. It could also be handy to try to load a
list from a file if it exists without failing if not. This way, it could be
possible to make a cold start without any file (instead of empty file),
dynamically add and del patterns, dump the list to the file periodically to
reuse it on reload (via an external process).
In this patch, we uses some prefixes to be able to use virtual or optional
files.
The default case remains unchanged. regular files are used. A filename, with
no prefix, is used as reference, and it must exist on the disk. With the
prefix "file@", the same is performed. Internally this prefix is
skipped. Thus the same file, with ou without "file@" prefix, references the
same list of patterns.
To use a virtual map, "virt@" prefix must be used. No file is read, even if
the following name looks like a file. It is just an ID. The prefix is part
of ID and must always be used.
To use a optional file, ie a file that may or may not exist on a disk at
startup, "opt@" prefix must be used. If the file exists, its content is
loaded. But HAProxy doesn't complain if not. The prefix is not part of
ID. For a given file, optional files and regular files reference the same
list of patterns.
This patch should fix the issue #2202.
2023-12-01 06:04:35 -05:00
|
|
|
if (ref->flags & PAT_REF_ID) {
|
|
|
|
|
/* file not found for an optional file, switch it to a virtual list of patterns */
|
|
|
|
|
ref->flags &= ~PAT_REF_FILE;
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
2023-12-01 06:04:13 -05:00
|
|
|
memprintf(err, "failed to open pattern file <%s>", ref->reference);
|
2014-01-29 07:29:45 -05:00
|
|
|
return 0;
|
|
|
|
|
}
|
MEDIUM: pattern: Add support for virtual and optional files for patterns
Before this patch, it was not possible to use a list of patterns, map or a
list of acls, without an existing file. However, it could be handy to just
use an ID, with no file on the disk. It is pretty useful for everyone
managing dynamically these lists. It could also be handy to try to load a
list from a file if it exists without failing if not. This way, it could be
possible to make a cold start without any file (instead of empty file),
dynamically add and del patterns, dump the list to the file periodically to
reuse it on reload (via an external process).
In this patch, we uses some prefixes to be able to use virtual or optional
files.
The default case remains unchanged. regular files are used. A filename, with
no prefix, is used as reference, and it must exist on the disk. With the
prefix "file@", the same is performed. Internally this prefix is
skipped. Thus the same file, with ou without "file@" prefix, references the
same list of patterns.
To use a virtual map, "virt@" prefix must be used. No file is read, even if
the following name looks like a file. It is just an ID. The prefix is part
of ID and must always be used.
To use a optional file, ie a file that may or may not exist on a disk at
startup, "opt@" prefix must be used. If the file exists, its content is
loaded. But HAProxy doesn't complain if not. The prefix is not part of
ID. For a given file, optional files and regular files reference the same
list of patterns.
This patch should fix the issue #2202.
2023-12-01 06:04:35 -05:00
|
|
|
ref->flags |= PAT_REF_FILE;
|
2014-01-29 07:29:45 -05:00
|
|
|
|
|
|
|
|
/* now parse all patterns. The file may contain only one pattern
|
|
|
|
|
* followed by one value per line. The start spaces, separator spaces
|
|
|
|
|
* and and spaces are stripped. Each can contain comment started by '#'
|
|
|
|
|
*/
|
2018-07-13 04:54:26 -04:00
|
|
|
while (fgets(trash.area, trash.size, file) != NULL) {
|
2014-01-29 07:29:45 -05:00
|
|
|
line++;
|
2018-07-13 04:54:26 -04:00
|
|
|
c = trash.area;
|
2014-01-29 07:29:45 -05:00
|
|
|
|
|
|
|
|
/* ignore lines beginning with a dash */
|
|
|
|
|
if (*c == '#')
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
/* strip leading spaces and tabs */
|
|
|
|
|
while (*c == ' ' || *c == '\t')
|
|
|
|
|
c++;
|
|
|
|
|
|
|
|
|
|
/* empty lines are ignored too */
|
|
|
|
|
if (*c == '\0' || *c == '\r' || *c == '\n')
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
/* look for the end of the key */
|
|
|
|
|
key_beg = c;
|
|
|
|
|
while (*c && *c != ' ' && *c != '\t' && *c != '\n' && *c != '\r')
|
|
|
|
|
c++;
|
|
|
|
|
|
|
|
|
|
key_end = c;
|
|
|
|
|
|
|
|
|
|
/* strip middle spaces and tabs */
|
|
|
|
|
while (*c == ' ' || *c == '\t')
|
|
|
|
|
c++;
|
|
|
|
|
|
|
|
|
|
/* look for the end of the value, it is the end of the line */
|
|
|
|
|
value_beg = c;
|
|
|
|
|
while (*c && *c != '\n' && *c != '\r')
|
|
|
|
|
c++;
|
|
|
|
|
value_end = c;
|
|
|
|
|
|
|
|
|
|
/* trim possibly trailing spaces and tabs */
|
|
|
|
|
while (value_end > value_beg && (value_end[-1] == ' ' || value_end[-1] == '\t'))
|
|
|
|
|
value_end--;
|
|
|
|
|
|
|
|
|
|
/* set final \0 and check entries */
|
|
|
|
|
*key_end = '\0';
|
|
|
|
|
*value_end = '\0';
|
|
|
|
|
|
|
|
|
|
/* insert values */
|
|
|
|
|
if (!pat_ref_append(ref, key_beg, value_beg, line)) {
|
|
|
|
|
memprintf(err, "out of memory");
|
|
|
|
|
goto out_close;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-01-17 10:09:33 -05:00
|
|
|
if (ferror(file)) {
|
|
|
|
|
memprintf(err, "error encountered while reading <%s> : %s",
|
2023-12-01 06:04:13 -05:00
|
|
|
ref->reference, strerror(errno));
|
2020-01-17 10:09:33 -05:00
|
|
|
goto out_close;
|
|
|
|
|
}
|
2020-06-21 12:42:57 -04:00
|
|
|
/* success */
|
2014-01-29 07:29:45 -05:00
|
|
|
ret = 1;
|
|
|
|
|
|
|
|
|
|
out_close:
|
|
|
|
|
fclose(file);
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
2013-11-28 05:05:19 -05:00
|
|
|
/* Reads patterns from a file. If <err_msg> is non-NULL, an error message will
|
|
|
|
|
* be returned there on errors and the caller will have to free it.
|
|
|
|
|
*/
|
2023-12-01 06:04:13 -05:00
|
|
|
int pat_ref_read_from_file(struct pat_ref *ref, char **err)
|
2013-11-28 05:05:19 -05:00
|
|
|
{
|
|
|
|
|
FILE *file;
|
|
|
|
|
char *c;
|
|
|
|
|
char *arg;
|
|
|
|
|
int ret = 0;
|
|
|
|
|
int line = 0;
|
|
|
|
|
|
2023-12-01 06:04:13 -05:00
|
|
|
file = fopen(ref->reference, "r");
|
2013-11-28 05:05:19 -05:00
|
|
|
if (!file) {
|
MEDIUM: pattern: Add support for virtual and optional files for patterns
Before this patch, it was not possible to use a list of patterns, map or a
list of acls, without an existing file. However, it could be handy to just
use an ID, with no file on the disk. It is pretty useful for everyone
managing dynamically these lists. It could also be handy to try to load a
list from a file if it exists without failing if not. This way, it could be
possible to make a cold start without any file (instead of empty file),
dynamically add and del patterns, dump the list to the file periodically to
reuse it on reload (via an external process).
In this patch, we uses some prefixes to be able to use virtual or optional
files.
The default case remains unchanged. regular files are used. A filename, with
no prefix, is used as reference, and it must exist on the disk. With the
prefix "file@", the same is performed. Internally this prefix is
skipped. Thus the same file, with ou without "file@" prefix, references the
same list of patterns.
To use a virtual map, "virt@" prefix must be used. No file is read, even if
the following name looks like a file. It is just an ID. The prefix is part
of ID and must always be used.
To use a optional file, ie a file that may or may not exist on a disk at
startup, "opt@" prefix must be used. If the file exists, its content is
loaded. But HAProxy doesn't complain if not. The prefix is not part of
ID. For a given file, optional files and regular files reference the same
list of patterns.
This patch should fix the issue #2202.
2023-12-01 06:04:35 -05:00
|
|
|
if (ref->flags & PAT_REF_ID) {
|
|
|
|
|
/* file not found for an optional file, switch it to a virtual list of patterns */
|
|
|
|
|
ref->flags &= ~PAT_REF_FILE;
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
2023-12-01 06:04:13 -05:00
|
|
|
memprintf(err, "failed to open pattern file <%s>", ref->reference);
|
2013-11-28 05:05:19 -05:00
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* now parse all patterns. The file may contain only one pattern per
|
|
|
|
|
* line. If the line contains spaces, they will be part of the pattern.
|
|
|
|
|
* The pattern stops at the first CR, LF or EOF encountered.
|
|
|
|
|
*/
|
2018-07-13 04:54:26 -04:00
|
|
|
while (fgets(trash.area, trash.size, file) != NULL) {
|
2013-11-28 05:05:19 -05:00
|
|
|
line++;
|
2018-07-13 04:54:26 -04:00
|
|
|
c = trash.area;
|
2013-11-28 05:05:19 -05:00
|
|
|
|
|
|
|
|
/* ignore lines beginning with a dash */
|
|
|
|
|
if (*c == '#')
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
/* strip leading spaces and tabs */
|
|
|
|
|
while (*c == ' ' || *c == '\t')
|
|
|
|
|
c++;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
arg = c;
|
|
|
|
|
while (*c && *c != '\n' && *c != '\r')
|
|
|
|
|
c++;
|
|
|
|
|
*c = 0;
|
|
|
|
|
|
|
|
|
|
/* empty lines are ignored too */
|
|
|
|
|
if (c == arg)
|
|
|
|
|
continue;
|
|
|
|
|
|
2014-02-11 05:31:40 -05:00
|
|
|
if (!pat_ref_append(ref, arg, NULL, line)) {
|
2023-12-01 06:04:13 -05:00
|
|
|
memprintf(err, "out of memory when loading patterns from file <%s>", ref->reference);
|
2013-11-28 05:05:19 -05:00
|
|
|
goto out_close;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-01-17 10:09:33 -05:00
|
|
|
if (ferror(file)) {
|
|
|
|
|
memprintf(err, "error encountered while reading <%s> : %s",
|
2023-12-01 06:04:13 -05:00
|
|
|
ref->reference, strerror(errno));
|
2020-01-17 10:09:33 -05:00
|
|
|
goto out_close;
|
|
|
|
|
}
|
2013-11-28 05:05:19 -05:00
|
|
|
ret = 1; /* success */
|
|
|
|
|
|
|
|
|
|
out_close:
|
|
|
|
|
fclose(file);
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
2014-02-11 05:31:40 -05:00
|
|
|
int pattern_read_from_file(struct pattern_head *head, unsigned int refflags,
|
2014-01-29 07:29:45 -05:00
|
|
|
const char *filename, int patflags, int load_smp,
|
2014-02-11 08:36:45 -05:00
|
|
|
char **err, const char *file, int line)
|
2014-02-11 05:31:40 -05:00
|
|
|
{
|
|
|
|
|
struct pat_ref *ref;
|
|
|
|
|
struct pattern_expr *expr;
|
2014-01-29 07:29:45 -05:00
|
|
|
struct pat_ref_elt *elt;
|
2014-11-26 07:17:03 -05:00
|
|
|
int reuse = 0;
|
2014-02-11 05:31:40 -05:00
|
|
|
|
2014-03-11 09:29:22 -04:00
|
|
|
/* Lookup for the existing reference. */
|
2014-02-11 05:31:40 -05:00
|
|
|
ref = pat_ref_lookup(filename);
|
2014-03-11 09:29:22 -04:00
|
|
|
|
|
|
|
|
/* If the reference doesn't exists, create it and load associated file. */
|
2014-02-11 05:31:40 -05:00
|
|
|
if (!ref) {
|
2014-02-11 08:36:45 -05:00
|
|
|
chunk_printf(&trash,
|
|
|
|
|
"pattern loaded from file '%s' used by %s at file '%s' line %d",
|
|
|
|
|
filename, refflags & PAT_REF_MAP ? "map" : "acl", file, line);
|
|
|
|
|
|
2018-07-13 04:54:26 -04:00
|
|
|
ref = pat_ref_new(filename, trash.area, refflags);
|
2014-02-11 05:31:40 -05:00
|
|
|
if (!ref) {
|
|
|
|
|
memprintf(err, "out of memory");
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
MEDIUM: pattern: Add support for virtual and optional files for patterns
Before this patch, it was not possible to use a list of patterns, map or a
list of acls, without an existing file. However, it could be handy to just
use an ID, with no file on the disk. It is pretty useful for everyone
managing dynamically these lists. It could also be handy to try to load a
list from a file if it exists without failing if not. This way, it could be
possible to make a cold start without any file (instead of empty file),
dynamically add and del patterns, dump the list to the file periodically to
reuse it on reload (via an external process).
In this patch, we uses some prefixes to be able to use virtual or optional
files.
The default case remains unchanged. regular files are used. A filename, with
no prefix, is used as reference, and it must exist on the disk. With the
prefix "file@", the same is performed. Internally this prefix is
skipped. Thus the same file, with ou without "file@" prefix, references the
same list of patterns.
To use a virtual map, "virt@" prefix must be used. No file is read, even if
the following name looks like a file. It is just an ID. The prefix is part
of ID and must always be used.
To use a optional file, ie a file that may or may not exist on a disk at
startup, "opt@" prefix must be used. If the file exists, its content is
loaded. But HAProxy doesn't complain if not. The prefix is not part of
ID. For a given file, optional files and regular files reference the same
list of patterns.
This patch should fix the issue #2202.
2023-12-01 06:04:35 -05:00
|
|
|
if (ref->flags & PAT_REF_FILE) {
|
|
|
|
|
if (load_smp) {
|
|
|
|
|
ref->flags |= PAT_REF_SMP;
|
|
|
|
|
if (!pat_ref_read_from_file_smp(ref, err))
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
if (!pat_ref_read_from_file(ref, err))
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
2014-01-29 07:29:45 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else {
|
2014-01-29 06:32:58 -05:00
|
|
|
/* The reference already exists, check the map compatibility. */
|
|
|
|
|
|
|
|
|
|
/* If the load require samples and the flag PAT_REF_SMP is not set,
|
|
|
|
|
* the reference doesn't contain sample, and cannot be used.
|
|
|
|
|
*/
|
|
|
|
|
if (load_smp) {
|
|
|
|
|
if (!(ref->flags & PAT_REF_SMP)) {
|
|
|
|
|
memprintf(err, "The file \"%s\" is already used as one column file "
|
|
|
|
|
"and cannot be used by as two column file.",
|
|
|
|
|
filename);
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
/* The load doesn't require samples. If the flag PAT_REF_SMP is
|
|
|
|
|
* set, the reference contains a sample, and cannot be used.
|
|
|
|
|
*/
|
|
|
|
|
if (ref->flags & PAT_REF_SMP) {
|
|
|
|
|
memprintf(err, "The file \"%s\" is already used as two column file "
|
|
|
|
|
"and cannot be used by as one column file.",
|
|
|
|
|
filename);
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2014-02-11 08:36:45 -05:00
|
|
|
/* Extends display */
|
|
|
|
|
chunk_printf(&trash, "%s", ref->display);
|
|
|
|
|
chunk_appendf(&trash, ", by %s at file '%s' line %d",
|
|
|
|
|
refflags & PAT_REF_MAP ? "map" : "acl", file, line);
|
|
|
|
|
free(ref->display);
|
2018-07-13 04:54:26 -04:00
|
|
|
ref->display = strdup(trash.area);
|
2014-02-11 08:36:45 -05:00
|
|
|
if (!ref->display) {
|
|
|
|
|
memprintf(err, "out of memory");
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2014-01-29 06:32:58 -05:00
|
|
|
/* Merge flags. */
|
2014-01-29 07:29:45 -05:00
|
|
|
ref->flags |= refflags;
|
2014-02-11 05:31:40 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Now, we can loading patterns from the reference. */
|
|
|
|
|
|
|
|
|
|
/* Lookup for existing reference in the head. If the reference
|
|
|
|
|
* doesn't exists, create it.
|
|
|
|
|
*/
|
|
|
|
|
expr = pattern_lookup_expr(head, ref);
|
2014-04-28 05:18:57 -04:00
|
|
|
if (!expr || (expr->mflags != patflags)) {
|
2017-07-03 11:54:23 -04:00
|
|
|
expr = pattern_new_expr(head, ref, patflags, err, &reuse);
|
2014-02-11 05:31:40 -05:00
|
|
|
if (!expr)
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2014-11-24 05:14:42 -05:00
|
|
|
/* The returned expression may be not empty, because the function
|
|
|
|
|
* "pattern_new_expr" lookup for similar pattern list and can
|
|
|
|
|
* reuse a already filled pattern list. In this case, we can not
|
|
|
|
|
* reload the patterns.
|
|
|
|
|
*/
|
|
|
|
|
if (reuse)
|
|
|
|
|
return 1;
|
|
|
|
|
|
BUG/MINOR: map: list-based matching potential ordering regression
An unexpected side-effect was introduced by 5fea597 ("MEDIUM: map/acl:
Accelerate several functions using pat_ref_elt struct ->head list")
The above commit tried to use eb tree API to manipulate elements as much
as possible in the hope to accelerate some functions.
Prior to 5fea597, pattern_read_from_file() used to iterate over all
elements from the map file in the same order they were seen in the file
(using list_for_each_entry) to push them in the pattern expression.
Now, since eb api is used to iterate over elements, the ordering is lost
very early.
This is known to cause behavior changes with existing setups (same conf
and map file) when compared with previous versions for some list-based
matching methods as described in GH #2400. For instance, the map_dom()
converter may return a different matching key from the one that was
returned by older haproxy versions.
For IP or STR matching, matching is based on tree lookups for better
efficiency, so in this case the ordering is lost at the name of
performance. The order in which they are loaded doesn't matter because
tree ordering is based on the content, it is not positional.
But with some other types, matching is based on list lookups (e.g.: dom),
and the order in which elements are pushed into the list can affect the
matching element that will be returned (in case of multiple matches, since
only the first matching element in the list will be returned).
Despite the documentation not officially stating that the file ordering
should be preserved for list-based matching methods, it's probably best
to be conservative here and stick to historical behavior. Moreover, there
was no performance benefit from using the eb tree api to iterate over
elements in pattern_read_from_file() since all elements are visited
anyway.
This should be backported to 2.9.
2024-01-03 05:54:03 -05:00
|
|
|
/* Load reference content in the pattern expression.
|
|
|
|
|
* We need to load elements in the same order they were seen in the
|
2024-01-11 04:31:04 -05:00
|
|
|
* file. Indeed, some list-based matching types may rely on it as the
|
|
|
|
|
* list is positional, and for tree-based matching, even if the tree is
|
|
|
|
|
* content-based in case of duplicated keys we only want the first key
|
|
|
|
|
* in the file to be considered.
|
BUG/MINOR: map: list-based matching potential ordering regression
An unexpected side-effect was introduced by 5fea597 ("MEDIUM: map/acl:
Accelerate several functions using pat_ref_elt struct ->head list")
The above commit tried to use eb tree API to manipulate elements as much
as possible in the hope to accelerate some functions.
Prior to 5fea597, pattern_read_from_file() used to iterate over all
elements from the map file in the same order they were seen in the file
(using list_for_each_entry) to push them in the pattern expression.
Now, since eb api is used to iterate over elements, the ordering is lost
very early.
This is known to cause behavior changes with existing setups (same conf
and map file) when compared with previous versions for some list-based
matching methods as described in GH #2400. For instance, the map_dom()
converter may return a different matching key from the one that was
returned by older haproxy versions.
For IP or STR matching, matching is based on tree lookups for better
efficiency, so in this case the ordering is lost at the name of
performance. The order in which they are loaded doesn't matter because
tree ordering is based on the content, it is not positional.
But with some other types, matching is based on list lookups (e.g.: dom),
and the order in which elements are pushed into the list can affect the
matching element that will be returned (in case of multiple matches, since
only the first matching element in the list will be returned).
Despite the documentation not officially stating that the file ordering
should be preserved for list-based matching methods, it's probably best
to be conservative here and stick to historical behavior. Moreover, there
was no performance benefit from using the eb tree api to iterate over
elements in pattern_read_from_file() since all elements are visited
anyway.
This should be backported to 2.9.
2024-01-03 05:54:03 -05:00
|
|
|
*/
|
|
|
|
|
list_for_each_entry(elt, &ref->head, list) {
|
2014-01-29 07:29:45 -05:00
|
|
|
if (!pat_ref_push(elt, expr, patflags, err)) {
|
|
|
|
|
if (elt->line > 0)
|
|
|
|
|
memprintf(err, "%s at line %d of file '%s'",
|
|
|
|
|
*err, elt->line, filename);
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
}
|
2014-02-11 05:31:40 -05:00
|
|
|
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
2014-01-17 09:25:13 -05:00
|
|
|
/* This function executes a pattern match on a sample. It applies pattern <expr>
|
2021-01-07 23:35:52 -05:00
|
|
|
* to sample <smp>. The function returns NULL if the sample don't match. It returns
|
2014-01-17 09:25:13 -05:00
|
|
|
* non-null if the sample match. If <fill> is true and the sample match, the
|
|
|
|
|
* function returns the matched pattern. In many cases, this pattern can be a
|
|
|
|
|
* static buffer.
|
2013-11-28 05:05:19 -05:00
|
|
|
*/
|
2014-02-11 05:31:40 -05:00
|
|
|
struct pattern *pattern_exec_match(struct pattern_head *head, struct sample *smp, int fill)
|
2013-11-28 05:05:19 -05:00
|
|
|
{
|
2014-01-20 08:29:33 -05:00
|
|
|
struct pattern_expr_list *list;
|
2014-02-11 05:31:40 -05:00
|
|
|
struct pattern *pat;
|
|
|
|
|
|
|
|
|
|
if (!head->match) {
|
2014-01-17 09:25:13 -05:00
|
|
|
if (fill) {
|
2015-08-19 02:35:43 -04:00
|
|
|
static_pattern.data = NULL;
|
2014-01-28 09:54:36 -05:00
|
|
|
static_pattern.ref = NULL;
|
2014-04-28 05:18:57 -04:00
|
|
|
static_pattern.sflags = 0;
|
2015-07-06 17:43:03 -04:00
|
|
|
static_pattern.type = SMP_T_SINT;
|
2013-12-16 08:22:13 -05:00
|
|
|
static_pattern.val.i = 1;
|
2014-01-17 09:25:13 -05:00
|
|
|
}
|
|
|
|
|
return &static_pattern;
|
|
|
|
|
}
|
2014-02-11 05:31:40 -05:00
|
|
|
|
2014-01-27 08:19:53 -05:00
|
|
|
/* convert input to string */
|
|
|
|
|
if (!sample_convert(smp, head->expect_type))
|
|
|
|
|
return NULL;
|
|
|
|
|
|
2014-01-20 08:29:33 -05:00
|
|
|
list_for_each_entry(list, &head->head, list) {
|
2017-11-07 04:42:54 -05:00
|
|
|
HA_RWLOCK_RDLOCK(PATEXP_LOCK, &list->expr->lock);
|
2014-01-20 08:29:33 -05:00
|
|
|
pat = head->match(smp, list->expr, fill);
|
2017-07-03 05:34:05 -04:00
|
|
|
if (pat) {
|
|
|
|
|
/* We duplicate the pattern cause it could be modified
|
|
|
|
|
by another thread */
|
|
|
|
|
if (pat != &static_pattern) {
|
|
|
|
|
memcpy(&static_pattern, pat, sizeof(struct pattern));
|
|
|
|
|
pat = &static_pattern;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* We also duplicate the sample data for
|
|
|
|
|
same reason */
|
|
|
|
|
if (pat->data && (pat->data != &static_sample_data)) {
|
2017-11-09 10:14:16 -05:00
|
|
|
switch(pat->data->type) {
|
2017-07-03 05:34:05 -04:00
|
|
|
case SMP_T_STR:
|
|
|
|
|
static_sample_data.type = SMP_T_STR;
|
|
|
|
|
static_sample_data.u.str = *get_trash_chunk();
|
2018-07-13 04:54:26 -04:00
|
|
|
static_sample_data.u.str.data = pat->data->u.str.data;
|
|
|
|
|
if (static_sample_data.u.str.data >= static_sample_data.u.str.size)
|
|
|
|
|
static_sample_data.u.str.data = static_sample_data.u.str.size - 1;
|
|
|
|
|
memcpy(static_sample_data.u.str.area,
|
2020-06-11 10:37:35 -04:00
|
|
|
pat->data->u.str.area, static_sample_data.u.str.data);
|
2018-07-13 04:54:26 -04:00
|
|
|
static_sample_data.u.str.area[static_sample_data.u.str.data] = 0;
|
2020-06-11 10:37:35 -04:00
|
|
|
pat->data = &static_sample_data;
|
|
|
|
|
break;
|
|
|
|
|
|
2017-07-03 05:34:05 -04:00
|
|
|
case SMP_T_IPV4:
|
|
|
|
|
case SMP_T_IPV6:
|
|
|
|
|
case SMP_T_SINT:
|
|
|
|
|
memcpy(&static_sample_data, pat->data, sizeof(struct sample_data));
|
2020-06-11 10:37:35 -04:00
|
|
|
pat->data = &static_sample_data;
|
|
|
|
|
break;
|
2017-07-03 05:34:05 -04:00
|
|
|
default:
|
2020-06-11 10:37:35 -04:00
|
|
|
/* unimplemented pattern type */
|
2017-07-03 05:34:05 -04:00
|
|
|
pat->data = NULL;
|
2020-06-11 10:37:35 -04:00
|
|
|
break;
|
2017-07-03 05:34:05 -04:00
|
|
|
}
|
|
|
|
|
}
|
2017-11-07 04:42:54 -05:00
|
|
|
HA_RWLOCK_RDUNLOCK(PATEXP_LOCK, &list->expr->lock);
|
2014-02-11 05:31:40 -05:00
|
|
|
return pat;
|
2017-07-03 05:34:05 -04:00
|
|
|
}
|
2017-11-07 04:42:54 -05:00
|
|
|
HA_RWLOCK_RDUNLOCK(PATEXP_LOCK, &list->expr->lock);
|
2014-02-11 05:31:40 -05:00
|
|
|
}
|
|
|
|
|
return NULL;
|
2013-11-28 05:05:19 -05:00
|
|
|
}
|
|
|
|
|
|
2020-10-30 11:03:50 -04:00
|
|
|
/* This function prunes the pattern expressions starting at pattern_head <head>. */
|
2014-02-11 05:31:40 -05:00
|
|
|
void pattern_prune(struct pattern_head *head)
|
2014-01-14 10:24:51 -05:00
|
|
|
{
|
2014-01-20 08:29:33 -05:00
|
|
|
struct pattern_expr_list *list, *safe;
|
2014-02-11 05:31:40 -05:00
|
|
|
|
2014-01-20 08:29:33 -05:00
|
|
|
list_for_each_entry_safe(list, safe, &head->head, list) {
|
2021-04-21 01:32:39 -04:00
|
|
|
LIST_DELETE(&list->list);
|
BUG/MEDIUM: pattern: prevent UAF on reused pattern expr
Since c5959fd ("MEDIUM: pattern: merge same pattern"), UAF (leading to
crash) can be experienced if the same pattern file (and match method) is
used in two default sections and the first one is not referenced later in
the config. In this case, the first default section will be cleaned up.
However, due to an unhandled case in the above optimization, the original
expr which the second default section relies on is mistakenly freed.
This issue was discovered while trying to reproduce GH #2708. The issue
was particularly tricky to reproduce given the config and sequence
required to make the UAF happen. Hopefully, Github user @asmnek not only
provided useful informations, but since he was able to consistently
trigger the crash in his environment he was able to nail down the crash to
the use of pattern file involved with 2 named default sections. Big thanks
to him.
To fix the issue, let's push the logic from c5959fd a bit further. Instead
of relying on "do_free" variable to know if the expression should be freed
or not (which proved to be insufficient in our case), let's switch to a
simple refcounting logic. This way, no matter who owns the expression, the
last one attempting to free it will be responsible for freeing it.
Refcount is implemented using a 32bit value which fills a previous 4 bytes
structure gap:
int mflags; /* 80 4 */
/* XXX 4 bytes hole, try to pack */
long unsigned int lock; /* 88 8 */
(output from pahole)
Even though it was not reproduced in 2.6 or below by @asmnek (the bug was
revealed thanks to another bugfix), this issue theorically affects all
stable versions (up to c5959fd), thus it should be backported to all
stable versions.
2024-09-09 08:59:19 -04:00
|
|
|
if (HA_ATOMIC_SUB_FETCH(&list->expr->refcount, 1) == 0) {
|
2021-04-21 01:32:39 -04:00
|
|
|
LIST_DELETE(&list->expr->list);
|
2017-11-07 04:42:54 -05:00
|
|
|
HA_RWLOCK_WRLOCK(PATEXP_LOCK, &list->expr->lock);
|
2014-01-20 08:29:33 -05:00
|
|
|
head->prune(list->expr);
|
2017-11-07 04:42:54 -05:00
|
|
|
HA_RWLOCK_WRUNLOCK(PATEXP_LOCK, &list->expr->lock);
|
2014-01-20 08:29:33 -05:00
|
|
|
free(list->expr);
|
|
|
|
|
}
|
|
|
|
|
free(list);
|
2014-02-11 05:31:40 -05:00
|
|
|
}
|
2014-01-14 10:24:51 -05:00
|
|
|
}
|
|
|
|
|
|
2020-10-30 11:03:50 -04:00
|
|
|
/* This function compares two pat_ref** on their unique_id, and returns -1/0/1
|
|
|
|
|
* depending on their order (suitable for sorting).
|
|
|
|
|
*/
|
2020-02-27 10:45:50 -05:00
|
|
|
static int cmp_pat_ref(const void *_a, const void *_b)
|
|
|
|
|
{
|
|
|
|
|
struct pat_ref * const *a = _a;
|
|
|
|
|
struct pat_ref * const *b = _b;
|
|
|
|
|
|
|
|
|
|
if ((*a)->unique_id < (*b)->unique_id)
|
|
|
|
|
return -1;
|
|
|
|
|
else if ((*a)->unique_id > (*b)->unique_id)
|
|
|
|
|
return 1;
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2020-10-30 11:03:50 -04:00
|
|
|
/* This function finalizes the configuration parsing. It sets all the
|
|
|
|
|
* automatic ids.
|
2014-03-11 09:29:22 -04:00
|
|
|
*/
|
2020-02-27 10:45:50 -05:00
|
|
|
int pattern_finalize_config(void)
|
2014-03-11 09:29:22 -04:00
|
|
|
{
|
2020-03-17 16:08:24 -04:00
|
|
|
size_t len = 0;
|
|
|
|
|
size_t unassigned_pos = 0;
|
2020-02-27 10:45:50 -05:00
|
|
|
int next_unique_id = 0;
|
2020-03-17 16:08:24 -04:00
|
|
|
size_t i, j;
|
2020-02-27 10:45:50 -05:00
|
|
|
struct pat_ref *ref, **arr;
|
2014-03-11 09:29:22 -04:00
|
|
|
struct list pr = LIST_HEAD_INIT(pr);
|
|
|
|
|
|
BUG/MEDIUM: random: implement a thread-safe and process-safe PRNG
This is the replacement of failed attempt to add thread safety and
per-process sequences of random numbers initally tried with commit
1c306aa84d ("BUG/MEDIUM: random: implement per-thread and per-process
random sequences").
This new version takes a completely different approach and doesn't try
to work around the horrible OS-specific and non-portable random API
anymore. Instead it implements "xoroshiro128**", a reputedly high
quality random number generator, which is one of the many variants of
xorshift, which passes all quality tests and which is described here:
http://prng.di.unimi.it/
While not cryptographically secure, it is fast and features a 2^128-1
period. It supports fast jumps allowing to cut the period into smaller
non-overlapping sequences, which we use here to support up to 2^32
processes each having their own, non-overlapping sequence of 2^96
numbers (~7*10^28). This is enough to provide 1 billion randoms per
second and per process for 2200 billion years.
The implementation was made thread-safe either by using a double 64-bit
CAS on platforms supporting it (x86_64, aarch64) or by using a local
lock for the time needed to perform the shift operations. This ensures
that all threads pick numbers from the same pool so that it is not
needed to assign per-thread ranges. For processes we use the fast jump
method to advance the sequence by 2^96 for each process.
Before this patch, the following config:
global
nbproc 8
frontend f
bind :4445
mode http
log stdout format raw daemon
log-format "%[uuid] %pid"
redirect location /
Would produce this output:
a4d0ad64-2645-4b74-b894-48acce0669af 12987
a4d0ad64-2645-4b74-b894-48acce0669af 12992
a4d0ad64-2645-4b74-b894-48acce0669af 12986
a4d0ad64-2645-4b74-b894-48acce0669af 12988
a4d0ad64-2645-4b74-b894-48acce0669af 12991
a4d0ad64-2645-4b74-b894-48acce0669af 12989
a4d0ad64-2645-4b74-b894-48acce0669af 12990
82d5f6cd-f6c1-4f85-a89c-36ae85d26fb9 12987
82d5f6cd-f6c1-4f85-a89c-36ae85d26fb9 12992
82d5f6cd-f6c1-4f85-a89c-36ae85d26fb9 12986
(...)
And now produces:
f94b29b3-da74-4e03-a0c5-a532c635bad9 13011
47470c02-4862-4c33-80e7-a952899570e5 13014
86332123-539a-47bf-853f-8c8ea8b2a2b5 13013
8f9efa99-3143-47b2-83cf-d618c8dea711 13012
3cc0f5c7-d790-496b-8d39-bec77647af5b 13015
3ec64915-8f95-4374-9e66-e777dc8791e0 13009
0f9bf894-dcde-408c-b094-6e0bb3255452 13011
49c7bfde-3ffb-40e9-9a8d-8084d650ed8f 13014
e23f6f2e-35c5-4433-a294-b790ab902653 13012
There are multiple benefits to using this method. First, it doesn't
depend anymore on a non-portable API. Second it's thread safe. Third it
is fast and more proven than any hack we could attempt to try to work
around the deficiencies of the various implementations around.
This commit depends on previous patches "MINOR: tools: add 64-bit rotate
operators" and "BUG/MEDIUM: random: initialize the random pool a bit
better", all of which will need to be backported at least as far as
version 2.0. It doesn't require to backport the build fixes for circular
include files dependecy anymore.
2020-03-07 18:42:37 -05:00
|
|
|
pat_lru_seed = ha_random();
|
2015-04-29 10:24:50 -04:00
|
|
|
|
2020-02-27 10:45:50 -05:00
|
|
|
/* Count pat_refs with user defined unique_id and totalt count */
|
2014-03-11 09:29:22 -04:00
|
|
|
list_for_each_entry(ref, &pattern_reference, list) {
|
2020-02-27 10:45:50 -05:00
|
|
|
len++;
|
|
|
|
|
if (ref->unique_id != -1)
|
|
|
|
|
unassigned_pos++;
|
|
|
|
|
}
|
2014-03-11 09:29:22 -04:00
|
|
|
|
2020-03-17 16:08:24 -04:00
|
|
|
if (len == 0) {
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2020-02-27 10:45:50 -05:00
|
|
|
arr = calloc(len, sizeof(*arr));
|
|
|
|
|
if (arr == NULL) {
|
|
|
|
|
ha_alert("Out of memory error.\n");
|
|
|
|
|
return ERR_ALERT | ERR_FATAL;
|
2014-03-11 09:29:22 -04:00
|
|
|
}
|
|
|
|
|
|
2020-02-27 10:45:50 -05:00
|
|
|
i = 0;
|
|
|
|
|
j = unassigned_pos;
|
|
|
|
|
list_for_each_entry(ref, &pattern_reference, list) {
|
|
|
|
|
if (ref->unique_id != -1)
|
|
|
|
|
arr[i++] = ref;
|
|
|
|
|
else
|
|
|
|
|
arr[j++] = ref;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Sort first segment of array with user-defined unique ids for
|
|
|
|
|
* fast lookup when generating unique ids
|
|
|
|
|
*/
|
|
|
|
|
qsort(arr, unassigned_pos, sizeof(*arr), cmp_pat_ref);
|
|
|
|
|
|
|
|
|
|
/* Assign unique ids to the rest of the elements */
|
|
|
|
|
for (i = unassigned_pos; i < len; i++) {
|
|
|
|
|
do {
|
|
|
|
|
arr[i]->unique_id = next_unique_id++;
|
|
|
|
|
} while (bsearch(&arr[i], arr, unassigned_pos, sizeof(*arr), cmp_pat_ref));
|
2014-03-11 09:29:22 -04:00
|
|
|
}
|
|
|
|
|
|
2020-02-27 10:45:50 -05:00
|
|
|
/* Sort complete array */
|
|
|
|
|
qsort(arr, len, sizeof(*arr), cmp_pat_ref);
|
|
|
|
|
|
|
|
|
|
/* Convert back to linked list */
|
|
|
|
|
for (i = 0; i < len; i++)
|
2021-04-21 01:32:39 -04:00
|
|
|
LIST_APPEND(&pr, &arr[i]->list);
|
2020-02-27 10:45:50 -05:00
|
|
|
|
2014-03-11 09:29:22 -04:00
|
|
|
/* swap root */
|
2021-04-21 01:32:39 -04:00
|
|
|
LIST_INSERT(&pr, &pattern_reference);
|
|
|
|
|
LIST_DELETE(&pr);
|
2020-02-27 10:45:50 -05:00
|
|
|
|
|
|
|
|
free(arr);
|
|
|
|
|
return 0;
|
2014-03-11 09:29:22 -04:00
|
|
|
}
|
2019-10-23 00:59:31 -04:00
|
|
|
|
|
|
|
|
static int pattern_per_thread_lru_alloc()
|
|
|
|
|
{
|
|
|
|
|
if (!global.tune.pattern_cache)
|
|
|
|
|
return 1;
|
|
|
|
|
pat_lru_tree = lru64_new(global.tune.pattern_cache);
|
|
|
|
|
return !!pat_lru_tree;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void pattern_per_thread_lru_free()
|
|
|
|
|
{
|
|
|
|
|
lru64_destroy(pat_lru_tree);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
REGISTER_PER_THREAD_ALLOC(pattern_per_thread_lru_alloc);
|
|
|
|
|
REGISTER_PER_THREAD_FREE(pattern_per_thread_lru_free);
|