added regular expression checking

This commit is contained in:
Logan007 2020-08-04 18:49:12 +05:45
parent 7311efa35a
commit b5b18443af
6 changed files with 645 additions and 8 deletions

View File

@ -110,6 +110,7 @@ add_library(n2n STATIC
src/tuntap_netbsd.c src/tuntap_netbsd.c
src/tuntap_linux.c src/tuntap_linux.c
src/tuntap_osx.c src/tuntap_osx.c
src/n2n_regex.c
) )
if(DEFINED WIN32) if(DEFINED WIN32)

View File

@ -155,6 +155,7 @@ typedef struct ether_hdr ether_hdr_t;
#include "pearson.h" #include "pearson.h"
#include "portable_endian.h" #include "portable_endian.h"
#include "speck.h" #include "speck.h"
#include "n2n_regex.h"
#ifdef WIN32 #ifdef WIN32
#define N2N_IFNAMSIZ 64 #define N2N_IFNAMSIZ 64
@ -359,6 +360,13 @@ struct sn_community
UT_hash_handle hh; /* makes this structure hashable */ UT_hash_handle hh; /* makes this structure hashable */
}; };
struct sn_community_regular_expression
{
re_t rule; // compiles regular expression
UT_hash_handle hh; /* makes this structure hashable */
};
typedef struct n2n_sn typedef struct n2n_sn
{ {
time_t start_time; /* Used to measure uptime. */ time_t start_time; /* Used to measure uptime. */
@ -372,8 +380,9 @@ typedef struct n2n_sn
uid_t userid; uid_t userid;
gid_t groupid; gid_t groupid;
#endif #endif
int lock_communities; /* If true, only loaded communities can be used. */ int lock_communities; /* If true, only loaded and matching communities can be used. */
struct sn_community *communities; struct sn_community *communities;
struct sn_community_regular_expression *rules;
} n2n_sn_t; } n2n_sn_t;
/* ************************************** */ /* ************************************** */

80
include/n2n_regex.h Normal file
View File

@ -0,0 +1,80 @@
/**
* (C) 2007-20 - ntop.org and contributors
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not see see <http://www.gnu.org/licenses/>
*
*/
// taken from https://github.com/kokke/tiny-regex-c
// under Unlicense as of August 4, 2020
/*
*
* Mini regex-module inspired by Rob Pike's regex code described in:
*
* http://www.cs.princeton.edu/courses/archive/spr09/cos333/beautiful.html
*
*
*
* Supports:
* ---------
* '.' Dot, matches any character
* '^' Start anchor, matches beginning of string
* '$' End anchor, matches end of string
* '*' Asterisk, match zero or more (greedy)
* '+' Plus, match one or more (greedy)
* '?' Question, match zero or one (non-greedy)
* '[abc]' Character class, match if one of {'a', 'b', 'c'}
* '[^abc]' Inverted class, match if NOT one of {'a', 'b', 'c'} -- NOTE: feature is currently broken!
* '[a-zA-Z]' Character ranges, the character set of the ranges { a-z | A-Z }
* '\s' Whitespace, \t \f \r \n \v and spaces
* '\S' Non-whitespace
* '\w' Alphanumeric, [a-zA-Z0-9_]
* '\W' Non-alphanumeric
* '\d' Digits, [0-9]
* '\D' Non-digits
*
*
*/
#ifndef _N2N_REGEX_
#define _N2N_REGEX_
#ifdef __cplusplus
extern "C"{
#endif
#include <stdio.h>
/* Typedef'd pointer to get abstract datatype. */
typedef struct regex_t* re_t;
/* Compile regex string pattern to a regex_t-array. */
re_t re_compile(const char* pattern);
/* Find matches of the compiled pattern inside text. */
int re_matchp(re_t pattern, const char* text, int* matchlenght);
/* Find matches of the txt pattern inside text (will compile automatically first). */
int re_match(const char* pattern, const char* text, int* matchlenght);
#ifdef __cplusplus
}
#endif
#endif

523
src/n2n_regex.c Normal file
View File

@ -0,0 +1,523 @@
/**
* (C) 2007-20 - ntop.org and contributors
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not see see <http://www.gnu.org/licenses/>
*
*/
// taken from https://github.com/kokke/tiny-regex-c
// under Unlicense as of August 4, 2020
/*
*
* Mini regex-module inspired by Rob Pike's regex code described in:
*
* http://www.cs.princeton.edu/courses/archive/spr09/cos333/beautiful.html
*
*
*
* Supports:
* ---------
* '.' Dot, matches any character
* '^' Start anchor, matches beginning of string
* '$' End anchor, matches end of string
* '*' Asterisk, match zero or more (greedy)
* '+' Plus, match one or more (greedy)
* '?' Question, match zero or one (non-greedy)
* '[abc]' Character class, match if one of {'a', 'b', 'c'}
* '[^abc]' Inverted class, match if NOT one of {'a', 'b', 'c'} -- NOTE: feature is currently broken!
* '[a-zA-Z]' Character ranges, the character set of the ranges { a-z | A-Z }
* '\s' Whitespace, \t \f \r \n \v and spaces
* '\S' Non-whitespace
* '\w' Alphanumeric, [a-zA-Z0-9_]
* '\W' Non-alphanumeric
* '\d' Digits, [0-9]
* '\D' Non-digits
*
*
*/
#include "n2n_regex.h"
/* Definitions: */
#define MAX_REGEXP_OBJECTS 30 /* Max number of regex symbols in expression. */
#define MAX_CHAR_CLASS_LEN 40 /* Max length of character-class buffer in. */
enum { UNUSED, DOT, BEGIN, END, QUESTIONMARK, STAR, PLUS, CHAR, CHAR_CLASS, INV_CHAR_CLASS, DIGIT, NOT_DIGIT, ALPHA, NOT_ALPHA, WHITESPACE, NOT_WHITESPACE, /* BRANCH */ };
typedef struct regex_t
{
unsigned char type; /* CHAR, STAR, etc. */
union
{
unsigned char ch; /* the character itself */
unsigned char* ccl; /* OR a pointer to characters in class */
};
} regex_t;
/* Private function declarations: */
static int matchpattern(regex_t* pattern, const char* text, int* matchlength);
static int matchcharclass(char c, const char* str);
static int matchstar(regex_t p, regex_t* pattern, const char* text, int* matchlength);
static int matchplus(regex_t p, regex_t* pattern, const char* text, int* matchlength);
static int matchone(regex_t p, char c);
static int matchdigit(char c);
static int matchalpha(char c);
static int matchwhitespace(char c);
static int matchmetachar(char c, const char* str);
static int matchrange(char c, const char* str);
static int matchdot(char c);
static int ismetachar(char c);
/* Public functions: */
int re_match(const char* pattern, const char* text, int* matchlength)
{
return re_matchp(re_compile(pattern), text, matchlength);
}
int re_matchp(re_t pattern, const char* text, int* matchlength)
{
*matchlength = 0;
if (pattern != 0)
{
if (pattern[0].type == BEGIN)
{
return ((matchpattern(&pattern[1], text, matchlength)) ? 0 : -1);
}
else
{
int idx = -1;
do
{
idx += 1;
if (matchpattern(pattern, text, matchlength))
{
if (text[0] == '\0')
return -1;
return idx;
}
}
while (*text++ != '\0');
}
}
return -1;
}
re_t re_compile(const char* pattern)
{
/* The sizes of the two static arrays below substantiates the static RAM usage of this module.
MAX_REGEXP_OBJECTS is the max number of symbols in the expression.
MAX_CHAR_CLASS_LEN determines the size of buffer for chars in all char-classes in the expression. */
static regex_t re_compiled[MAX_REGEXP_OBJECTS];
static unsigned char ccl_buf[MAX_CHAR_CLASS_LEN];
int ccl_bufidx = 1;
char c; /* current char in pattern */
int i = 0; /* index into pattern */
int j = 0; /* index into re_compiled */
while (pattern[i] != '\0' && (j+1 < MAX_REGEXP_OBJECTS))
{
c = pattern[i];
switch (c)
{
/* Meta-characters: */
case '^': { re_compiled[j].type = BEGIN; } break;
case '$': { re_compiled[j].type = END; } break;
case '.': { re_compiled[j].type = DOT; } break;
case '*': { re_compiled[j].type = STAR; } break;
case '+': { re_compiled[j].type = PLUS; } break;
case '?': { re_compiled[j].type = QUESTIONMARK; } break;
/* case '|': { re_compiled[j].type = BRANCH; } break; <-- not working properly */
/* Escaped character-classes (\s \w ...): */
case '\\':
{
if (pattern[i+1] != '\0')
{
/* Skip the escape-char '\\' */
i += 1;
/* ... and check the next */
switch (pattern[i])
{
/* Meta-character: */
case 'd': { re_compiled[j].type = DIGIT; } break;
case 'D': { re_compiled[j].type = NOT_DIGIT; } break;
case 'w': { re_compiled[j].type = ALPHA; } break;
case 'W': { re_compiled[j].type = NOT_ALPHA; } break;
case 's': { re_compiled[j].type = WHITESPACE; } break;
case 'S': { re_compiled[j].type = NOT_WHITESPACE; } break;
/* Escaped character, e.g. '.' or '$' */
default:
{
re_compiled[j].type = CHAR;
re_compiled[j].ch = pattern[i];
} break;
}
}
/* '\\' as last char in pattern -> invalid regular expression. */
/*
else
{
re_compiled[j].type = CHAR;
re_compiled[j].ch = pattern[i];
}
*/
} break;
/* Character class: */
case '[':
{
/* Remember where the char-buffer starts. */
int buf_begin = ccl_bufidx;
/* Look-ahead to determine if negated */
if (pattern[i+1] == '^')
{
re_compiled[j].type = INV_CHAR_CLASS;
i += 1; /* Increment i to avoid including '^' in the char-buffer */
}
else
{
re_compiled[j].type = CHAR_CLASS;
}
/* Copy characters inside [..] to buffer */
while ( (pattern[++i] != ']')
&& (pattern[i] != '\0')) /* Missing ] */
{
if (pattern[i] == '\\')
{
if (ccl_bufidx >= MAX_CHAR_CLASS_LEN - 1)
{
//fputs("exceeded internal buffer!\n", stderr);
return 0;
}
ccl_buf[ccl_bufidx++] = pattern[i++];
}
else if (ccl_bufidx >= MAX_CHAR_CLASS_LEN)
{
//fputs("exceeded internal buffer!\n", stderr);
return 0;
}
ccl_buf[ccl_bufidx++] = pattern[i];
}
if (ccl_bufidx >= MAX_CHAR_CLASS_LEN)
{
/* Catches cases such as [00000000000000000000000000000000000000][ */
//fputs("exceeded internal buffer!\n", stderr);
return 0;
}
/* Null-terminate string end */
ccl_buf[ccl_bufidx++] = 0;
re_compiled[j].ccl = &ccl_buf[buf_begin];
} break;
/* Other characters: */
default:
{
re_compiled[j].type = CHAR;
re_compiled[j].ch = c;
} break;
}
i += 1;
j += 1;
}
/* 'UNUSED' is a sentinel used to indicate end-of-pattern */
re_compiled[j].type = UNUSED;
return (re_t) re_compiled;
}
void re_print(regex_t* pattern)
{
const char* types[] = { "UNUSED", "DOT", "BEGIN", "END", "QUESTIONMARK", "STAR", "PLUS", "CHAR", "CHAR_CLASS", "INV_CHAR_CLASS", "DIGIT", "NOT_DIGIT", "ALPHA", "NOT_ALPHA", "WHITESPACE", "NOT_WHITESPACE", "BRANCH" };
int i;
int j;
char c;
for (i = 0; i < MAX_REGEXP_OBJECTS; ++i)
{
if (pattern[i].type == UNUSED)
{
break;
}
printf("type: %s", types[pattern[i].type]);
if (pattern[i].type == CHAR_CLASS || pattern[i].type == INV_CHAR_CLASS)
{
printf(" [");
for (j = 0; j < MAX_CHAR_CLASS_LEN; ++j)
{
c = pattern[i].ccl[j];
if ((c == '\0') || (c == ']'))
{
break;
}
printf("%c", c);
}
printf("]");
}
else if (pattern[i].type == CHAR)
{
printf(" '%c'", pattern[i].ch);
}
printf("\n");
}
}
/* Private functions: */
static int matchdigit(char c)
{
return ((c >= '0') && (c <= '9'));
}
static int matchalpha(char c)
{
return ((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'));
}
static int matchwhitespace(char c)
{
return ((c == ' ') || (c == '\t') || (c == '\n') || (c == '\r') || (c == '\f') || (c == '\v'));
}
static int matchalphanum(char c)
{
return ((c == '_') || matchalpha(c) || matchdigit(c));
}
static int matchrange(char c, const char* str)
{
return ((c != '-') && (str[0] != '\0') && (str[0] != '-') &&
(str[1] == '-') && (str[1] != '\0') &&
(str[2] != '\0') && ((c >= str[0]) && (c <= str[2])));
}
static int matchdot(char c)
{
return c != '\n' && c != '\r';
}
static int ismetachar(char c)
{
return ((c == 's') || (c == 'S') || (c == 'w') || (c == 'W') || (c == 'd') || (c == 'D'));
}
static int matchmetachar(char c, const char* str)
{
switch (str[0])
{
case 'd': return matchdigit(c);
case 'D': return !matchdigit(c);
case 'w': return matchalphanum(c);
case 'W': return !matchalphanum(c);
case 's': return matchwhitespace(c);
case 'S': return !matchwhitespace(c);
default: return (c == str[0]);
}
}
static int matchcharclass(char c, const char* str)
{
do
{
if (matchrange(c, str))
{
return 1;
}
else if (str[0] == '\\')
{
/* Escape-char: increment str-ptr and match on next char */
str += 1;
if (matchmetachar(c, str))
{
return 1;
}
else if ((c == str[0]) && !ismetachar(c))
{
return 1;
}
}
else if (c == str[0])
{
if (c == '-')
{
return ((str[-1] == '\0') || (str[1] == '\0'));
}
else
{
return 1;
}
}
}
while (*str++ != '\0');
return 0;
}
static int matchone(regex_t p, char c)
{
switch (p.type)
{
case DOT: return matchdot(c);
case CHAR_CLASS: return matchcharclass(c, (const char*)p.ccl);
case INV_CHAR_CLASS: return !matchcharclass(c, (const char*)p.ccl);
case DIGIT: return matchdigit(c);
case NOT_DIGIT: return !matchdigit(c);
case ALPHA: return matchalphanum(c);
case NOT_ALPHA: return !matchalphanum(c);
case WHITESPACE: return matchwhitespace(c);
case NOT_WHITESPACE: return !matchwhitespace(c);
default: return (p.ch == c);
}
}
static int matchstar(regex_t p, regex_t* pattern, const char* text, int* matchlength)
{
int prelen = *matchlength;
const char* prepoint = text;
while ((text[0] != '\0') && matchone(p, *text))
{
text++;
(*matchlength)++;
}
while (text >= prepoint)
{
if (matchpattern(pattern, text--, matchlength))
return 1;
(*matchlength)--;
}
*matchlength = prelen;
return 0;
}
static int matchplus(regex_t p, regex_t* pattern, const char* text, int* matchlength)
{
const char* prepoint = text;
while ((text[0] != '\0') && matchone(p, *text))
{
text++;
(*matchlength)++;
}
while (text > prepoint)
{
if (matchpattern(pattern, text--, matchlength))
return 1;
(*matchlength)--;
}
return 0;
}
static int matchquestion(regex_t p, regex_t* pattern, const char* text, int* matchlength)
{
if (p.type == UNUSED)
return 1;
if (matchpattern(pattern, text, matchlength))
return 1;
if (*text && matchone(p, *text++))
{
if (matchpattern(pattern, text, matchlength))
{
(*matchlength)++;
return 1;
}
}
return 0;
}
#if 0
/* Recursive matching */
static int matchpattern(regex_t* pattern, const char* text, int *matchlength)
{
int pre = *matchlength;
if ((pattern[0].type == UNUSED) || (pattern[1].type == QUESTIONMARK))
{
return matchquestion(pattern[1], &pattern[2], text, matchlength);
}
else if (pattern[1].type == STAR)
{
return matchstar(pattern[0], &pattern[2], text, matchlength);
}
else if (pattern[1].type == PLUS)
{
return matchplus(pattern[0], &pattern[2], text, matchlength);
}
else if ((pattern[0].type == END) && pattern[1].type == UNUSED)
{
return text[0] == '\0';
}
else if ((text[0] != '\0') && matchone(pattern[0], text[0]))
{
(*matchlength)++;
return matchpattern(&pattern[1], text+1);
}
else
{
*matchlength = pre;
return 0;
}
}
#else
/* Iterative matching */
static int matchpattern(regex_t* pattern, const char* text, int* matchlength)
{
int pre = *matchlength;
do
{
if ((pattern[0].type == UNUSED) || (pattern[1].type == QUESTIONMARK))
{
return matchquestion(pattern[0], &pattern[2], text, matchlength);
}
else if (pattern[1].type == STAR)
{
return matchstar(pattern[0], &pattern[2], text, matchlength);
}
else if (pattern[1].type == PLUS)
{
return matchplus(pattern[0], &pattern[2], text, matchlength);
}
else if ((pattern[0].type == END) && pattern[1].type == UNUSED)
{
return (text[0] == '\0');
}
/* Branching is not working properly
else if (pattern[1].type == BRANCH)
{
return (matchpattern(pattern, text) || matchpattern(&pattern[2], text));
}
*/
(*matchlength)++;
}
while ((text[0] != '\0') && matchone(*pattern++, *text++));
*matchlength = pre;
return 0;
}
#endif

View File

@ -30,6 +30,7 @@ static int load_allowed_sn_community(n2n_sn_t *sss, char *path) {
char buffer[4096], *line; char buffer[4096], *line;
FILE *fd = fopen(path, "r"); FILE *fd = fopen(path, "r");
struct sn_community *s, *tmp; struct sn_community *s, *tmp;
struct sn_community_regular_expression *re, *tmp_re;
uint32_t num_communities = 0; uint32_t num_communities = 0;
if(fd == NULL) { if(fd == NULL) {
@ -44,13 +45,17 @@ static int load_allowed_sn_community(n2n_sn_t *sss, char *path) {
free(s); free(s);
} }
HASH_ITER(hh, sss->rules, re, tmp_re) {
HASH_DEL(sss->rules, re);
free(re);
}
while((line = fgets(buffer, sizeof(buffer), fd)) != NULL) { while((line = fgets(buffer, sizeof(buffer), fd)) != NULL) {
int len = strlen(line); int len = strlen(line);
if((len < 2) || line[0] == '#') if((len < 2) || line[0] == '#')
continue; continue;
// !!! if (isregexp) then add to regExp list [check this using a "meta"-regExp]
// !!! otherwise (fixed name) do the following
len--; len--;
while(len > 0) { while(len > 0) {
if((line[len] == '\n') || (line[len] == '\r')) { if((line[len] == '\n') || (line[len] == '\r')) {
@ -60,6 +65,18 @@ static int load_allowed_sn_community(n2n_sn_t *sss, char *path) {
break; break;
} }
// if it contains typical characters...
if(NULL != strpbrk(line, ".^$*+?[]\\")) {
// ...it is treated as regular expression
re = (struct sn_community_regular_expression*)calloc(1,sizeof(struct sn_community_regular_expression));
if (re) {
re->rule = re_compile(line);
HASH_ADD_PTR(sss->rules, rule, re);
traceEvent(TRACE_INFO, "Added regular expression for allowed communities '%s'", line);
continue;
}
}
s = (struct sn_community*)calloc(1,sizeof(struct sn_community)); s = (struct sn_community*)calloc(1,sizeof(struct sn_community));
if(s != NULL) { if(s != NULL) {
@ -81,7 +98,7 @@ static int load_allowed_sn_community(n2n_sn_t *sss, char *path) {
fclose(fd); fclose(fd);
traceEvent(TRACE_NORMAL, "Loaded %u communities from %s", traceEvent(TRACE_NORMAL, "Loaded %u fixed-name communities from %s",
num_communities, path); num_communities, path);
/* No new communities will be allowed */ /* No new communities will be allowed */

View File

@ -791,7 +791,9 @@ static int process_udp(n2n_sn_t * sss,
n2n_common_t cmn2; n2n_common_t cmn2;
uint8_t ackbuf[N2N_SN_PKTBUF_SIZE]; uint8_t ackbuf[N2N_SN_PKTBUF_SIZE];
size_t encx=0; size_t encx=0;
struct sn_community_regular_expression *re, *tmp_re;
int8_t allowed_match = -1;
int match_length = 0;
/* Edge requesting registration with us. */ /* Edge requesting registration with us. */
sss->stats.last_reg_super=now; sss->stats.last_reg_super=now;
++(sss->stats.reg_super); ++(sss->stats.reg_super);
@ -812,9 +814,14 @@ static int process_udp(n2n_sn_t * sss,
not report any message back to the edge to hide the supernode not report any message back to the edge to hide the supernode
existance (better from the security standpoint) existance (better from the security standpoint)
*/ */
// !!! check if the requested name matches any of the regExps (ITERate)
// !!! put result in variable uint8_t (or so) "allowed_match" HASH_ITER(hh, sss->rules, re, tmp_re) {
if(!comm && (!sss->lock_communities || allowed_match)) { allowed_match = re_matchp(re->rule, cmn.community, &match_length);
if(allowed_match != -1)
break;
}
if(!comm && (!sss->lock_communities || (allowed_match != -1))) {
comm = calloc(1, sizeof(struct sn_community)); comm = calloc(1, sizeof(struct sn_community));
if(comm) { if(comm) {