removed begin, end, and whitespace from reg exp

This commit is contained in:
Logan007 2020-09-16 02:24:08 +05:45
parent 1815790280
commit f71ef12753
3 changed files with 16 additions and 19 deletions

View File

@ -13,22 +13,19 @@ netleo
ntop[0-1][0-9] ntop[0-1][0-9]
# #
# * Note that fixed-name communities may not contain one of the following characters # * Note that fixed-name communities may not contain one of the following characters
# . ^ $ * + ? [ ] \ # . * + ? [ ] \
# as otherwise, they are interpreted as regular expression # as otherwise, they are interpreted as regular expression
# #
# * Only fixed-name communities are supported for header encryption (-H) # * Only fixed-name communities are supported for header encryption (-H)
# #
# * Regular expression support the following placeholders # * Regular expression support the following placeholders
# '.' Dot, matches any character (meaningless, as full matches only) # '.' Dot, matches any character
# '^' Start anchor, matches beginning of string (meaningless, as full matches only)
# '$' End anchor, matches end of string
# '*' Asterisk, match zero or more (greedy) # '*' Asterisk, match zero or more (greedy)
# '+' Plus, match one or more (greedy) # '+' Plus, match one or more (greedy)
# '?' Question, match zero or one (non-greedy) # '?' Question, match zero or one (non-greedy)
# '[abc]' Character class, match if one of {'a', 'b', 'c'} # '[abc]' Character class, match if one of {'a', 'b', 'c'}
# '[^abc]' Inverted class, match if NOT one of {'a', 'b', 'c'} (feature is currently broken) # '[^abc]' Inverted class, match if NOT one of {'a', 'b', 'c'} (feature is currently broken)
# '[a-zA-Z]' Character ranges, the character set of the ranges { a-z | A-Z } # '[a-zA-Z]' Character ranges, the character set of the ranges { a-z | A-Z }
# '\s' Whitespace, \t \f \r \n \v and spaces
# '\S' Non-whitespace # '\S' Non-whitespace
# '\w' Alphanumeric, [a-zA-Z0-9_] # '\w' Alphanumeric, [a-zA-Z0-9_]
# '\W' Non-alphanumeric # '\W' Non-alphanumeric

View File

@ -30,15 +30,15 @@
* Supports: * Supports:
* --------- * ---------
* '.' Dot, matches any character * '.' Dot, matches any character
* '^' Start anchor, matches beginning of string * '^' Start anchor, matches beginning of string -- NOTE: currently disabled (checking for full matches anyway)
* '$' End anchor, matches end of string * '$' End anchor, matches end of string -- NOTE: currently disabled (checking for full matches anyway)
* '*' Asterisk, match zero or more (greedy) * '*' Asterisk, match zero or more (greedy)
* '+' Plus, match one or more (greedy) * '+' Plus, match one or more (greedy)
* '?' Question, match zero or one (non-greedy) * '?' Question, match zero or one (non-greedy)
* '[abc]' Character class, match if one of {'a', 'b', 'c'} * '[abc]' Character class, match if one of {'a', 'b', 'c'}
* '[^abc]' Inverted class, match if NOT one of {'a', 'b', 'c'} -- NOTE: feature is currently broken! * '[^abc]' Inverted class, match if NOT one of {'a', 'b', 'c'} -- NOTE: feature is currently broken!
* '[a-zA-Z]' Character ranges, the character set of the ranges { a-z | A-Z } * '[a-zA-Z]' Character ranges, the character set of the ranges { a-z | A-Z }
* '\s' Whitespace, \t \f \r \n \v and spaces * '\s' Whitespace, \t \f \r \n \v and spaces -- NOTE: currently disabled (incompatible with optionally provided network delimited by SPACE)
* '\S' Non-whitespace * '\S' Non-whitespace
* '\w' Alphanumeric, [a-zA-Z0-9_] * '\w' Alphanumeric, [a-zA-Z0-9_]
* '\W' Non-alphanumeric * '\W' Non-alphanumeric
@ -58,7 +58,7 @@
#define MAX_CHAR_CLASS_LEN 40 /* Max length of character-class buffer in. */ #define MAX_CHAR_CLASS_LEN 40 /* Max length of character-class buffer in. */
enum { UNUSED, DOT, BEGIN, END, QUESTIONMARK, STAR, PLUS, CHAR, CHAR_CLASS, INV_CHAR_CLASS, DIGIT, NOT_DIGIT, ALPHA, NOT_ALPHA, WHITESPACE, NOT_WHITESPACE, /* BRANCH */ }; enum { UNUSED, DOT, BEGIN, END, QUESTIONMARK, STAR, PLUS, CHAR, CHAR_CLASS, INV_CHAR_CLASS, DIGIT, NOT_DIGIT, ALPHA, NOT_ALPHA, /* WHITESPACE,*/ NOT_WHITESPACE, /* BRANCH */ };
typedef struct regex_t typedef struct regex_t
{ {
@ -154,8 +154,8 @@ re_t re_compile(const char* pattern)
switch (c) switch (c)
{ {
/* Meta-characters: */ /* Meta-characters: */
case '^': { re_compiled[j].type = BEGIN; } break; // case '^': { re_compiled[j].type = BEGIN; } break; <-- disabled (always full matches)
case '$': { re_compiled[j].type = END; } break; // case '$': { re_compiled[j].type = END; } break; <-- disabled (always full matches)
case '.': { re_compiled[j].type = DOT; } break; case '.': { re_compiled[j].type = DOT; } break;
case '*': { re_compiled[j].type = STAR; } break; case '*': { re_compiled[j].type = STAR; } break;
case '+': { re_compiled[j].type = PLUS; } break; case '+': { re_compiled[j].type = PLUS; } break;
@ -177,10 +177,10 @@ re_t re_compile(const char* pattern)
case 'D': { re_compiled[j].type = NOT_DIGIT; } break; case 'D': { re_compiled[j].type = NOT_DIGIT; } break;
case 'w': { re_compiled[j].type = ALPHA; } break; case 'w': { re_compiled[j].type = ALPHA; } break;
case 'W': { re_compiled[j].type = NOT_ALPHA; } break; case 'W': { re_compiled[j].type = NOT_ALPHA; } break;
case 's': { re_compiled[j].type = WHITESPACE; } break; // case 's': { re_compiled[j].type = WHITESPACE; } break; <-- disabled (incompatible to optionally provided network delimited by SPACE)
case 'S': { re_compiled[j].type = NOT_WHITESPACE; } break; case 'S': { re_compiled[j].type = NOT_WHITESPACE; } break;
/* Escaped character, e.g. '.' or '$' */ /* Escaped character, e.g. '.' */
default: default:
{ {
re_compiled[j].type = CHAR; re_compiled[j].type = CHAR;
@ -266,7 +266,7 @@ re_t re_compile(const char* pattern)
void re_print(regex_t* pattern) void re_print(regex_t* pattern)
{ {
const char* types[] = { "UNUSED", "DOT", "BEGIN", "END", "QUESTIONMARK", "STAR", "PLUS", "CHAR", "CHAR_CLASS", "INV_CHAR_CLASS", "DIGIT", "NOT_DIGIT", "ALPHA", "NOT_ALPHA", "WHITESPACE", "NOT_WHITESPACE", "BRANCH" }; const char* types[] = { "UNUSED", "DOT", "BEGIN", "END", "QUESTIONMARK", "STAR", "PLUS", "CHAR", "CHAR_CLASS", "INV_CHAR_CLASS", "DIGIT", "NOT_DIGIT", "ALPHA", "NOT_ALPHA", /* "WHITESPACE" ,*/ "NOT_WHITESPACE", /* "BRANCH" */ };
int i; int i;
int j; int j;
@ -332,7 +332,7 @@ static int matchdot(char c)
} }
static int ismetachar(char c) static int ismetachar(char c)
{ {
return ((c == 's') || (c == 'S') || (c == 'w') || (c == 'W') || (c == 'd') || (c == 'D')); return (/*(c == 's') ||*/ (c == 'S') || (c == 'w') || (c == 'W') || (c == 'd') || (c == 'D'));
} }
static int matchmetachar(char c, const char* str) static int matchmetachar(char c, const char* str)
@ -343,7 +343,7 @@ static int matchmetachar(char c, const char* str)
case 'D': return !matchdigit(c); case 'D': return !matchdigit(c);
case 'w': return matchalphanum(c); case 'w': return matchalphanum(c);
case 'W': return !matchalphanum(c); case 'W': return !matchalphanum(c);
case 's': return matchwhitespace(c); // case 's': return matchwhitespace(c);
case 'S': return !matchwhitespace(c); case 'S': return !matchwhitespace(c);
default: return (c == str[0]); default: return (c == str[0]);
} }
@ -398,7 +398,7 @@ static int matchone(regex_t p, char c)
case NOT_DIGIT: return !matchdigit(c); case NOT_DIGIT: return !matchdigit(c);
case ALPHA: return matchalphanum(c); case ALPHA: return matchalphanum(c);
case NOT_ALPHA: return !matchalphanum(c); case NOT_ALPHA: return !matchalphanum(c);
case WHITESPACE: return matchwhitespace(c); // case WHITESPACE: return matchwhitespace(c);
case NOT_WHITESPACE: return !matchwhitespace(c); case NOT_WHITESPACE: return !matchwhitespace(c);
default: return (p.ch == c); default: return (p.ch == c);
} }
@ -419,7 +419,7 @@ static int matchstar(regex_t p, regex_t* pattern, const char* text, int* matchle
return 1; return 1;
(*matchlength)--; (*matchlength)--;
} }
*matchlength = prelen; *matchlength = prelen;
return 0; return 0;
} }

View File

@ -76,7 +76,7 @@ static int load_allowed_sn_community(n2n_sn_t *sss, char *path) {
has_net = ( sscanf (line, "%s %s", cmn_str, net_str) == 2 ); has_net = ( sscanf (line, "%s %s", cmn_str, net_str) == 2 );
// if it contains typical characters... // if it contains typical characters...
if(NULL != strpbrk(cmn_str, ".^$*+?[]\\")) { if(NULL != strpbrk(cmn_str, ".*+?[]\\")) {
// ...it is treated as regular expression // ...it is treated as regular expression
re = (struct sn_community_regular_expression*)calloc(1,sizeof(struct sn_community_regular_expression)); re = (struct sn_community_regular_expression*)calloc(1,sizeof(struct sn_community_regular_expression));
if (re) { if (re) {