From 7cfd9265851677ef204524d395c6a1bfff439f1e Mon Sep 17 00:00:00 2001 From: xdrm-brackets Date: Sun, 14 May 2017 18:24:21 +0200 Subject: [PATCH] [automate] develop or --- src/Makefile | 5 +- src/lib/automate/automate.c | 180 ++++++++++++++++++++++++++++++++++++ src/lib/automate/automate.h | 11 +++ src/lib/regex/regex.c | 83 ----------------- src/lib/regex/regex.h | 6 +- src/linter.c | 6 ++ 6 files changed, 204 insertions(+), 87 deletions(-) diff --git a/src/Makefile b/src/Makefile index dbaccf6..87afdad 100644 --- a/src/Makefile +++ b/src/Makefile @@ -17,6 +17,9 @@ clean: lib/common.o: lib/common/common.h lib/common/common.c gcc -c -o lib/common.o lib/common/common.c; +lib/regex.o: lib/regex/regex.h lib/regex/regex.c + gcc -c -o lib/regex.o lib/regex/regex.c; + lib/automate.o: lib/automate/automate.h lib/automate/automate.c gcc -c -o lib/automate.o lib/automate/automate.c; @@ -26,6 +29,6 @@ lib/scope.o: lib/scope/scope.h lib/scope/scope.c linter.o: linter.h linter.c gcc -c -o linter.o linter.c; -linter: lib/scope.o lib/automate.o lib/common/common.o linter.o +linter: lib/scope.o lib/automate.o lib/common/common.o linter.o gcc $(GCCARGV) -o linter lib/automate.o lib/scope.o lib/common/common.o linter.o; diff --git a/src/lib/automate/automate.c b/src/lib/automate/automate.c index 5401e01..e738de2 100644 --- a/src/lib/automate/automate.c +++ b/src/lib/automate/automate.c @@ -64,6 +64,9 @@ struct Automate buildFromRegExp(const char* pRegExp){ =========================================================*/ struct Automate result; + printf("or groups: %s\n", developOr(pRegExp) ); + + } @@ -188,6 +191,12 @@ void linkStates(struct Automate* pAutomate, const unsigned int pIn, const unsign + + + + + + char is_part(const char pChar, const char* pSet){ unsigned int a; @@ -200,4 +209,175 @@ char is_part(const char pChar, const char* pSet){ return 1; return 0; +} + + + +char* replaceAll(const char* pHaystack, const char* pNeedle, const char* pReplacement){ + + /* [0] Initialize variables + =========================================================*/ + int* occur = malloc( 0 ); // occurences start offset + int found = 0; + char* diff = NULL; + char* result = NULL; + int c, i, f, hl, nl, rl; // counters + + + /* [1] Search for each occurence of @pNeedle in @pHaystack + =========================================================*/ + nl = strlen(pNeedle); + hl = strlen(pHaystack) - nl + 1; + diff = malloc( nl+1 ); + + /* (1) For each position, try match */ + for( c = 0 ; c < hl ; c++ ){ + + strncpy(diff, pHaystack+c, nl); + diff[nl] = 0; + + /* (2) If found, store offset */ + if( strcmp(diff, pNeedle) == 0 ){ + occur = realloc(occur, sizeof(int) * ++found ); + occur[found-1] = c; + } + + } + + + /* [2] Replace all occurences + =========================================================*/ + i = 0; // index in new string + c = 0; // index in old string + rl = strlen(pReplacement); + result = malloc( strlen(pHaystack) + 1 + found*rl - found*nl ); + + /* (1) For each found occurence */ + for( f = 0 ; f < found ; f++ ){ + + // Copy before found + strncpy(result+i, pHaystack+c, occur[f]-c); + i += occur[f]-c; + + // Replace + strncpy(result+i, pReplacement, rl); + i += rl; + c = occur[f]+nl; + + } + + result[i+1] = 0; + + + /* [3] Free memory + =========================================================*/ + free(occur); + free(diff); + + + /* [4] Return result + =========================================================*/ + return result; +} + + + +char getGroups(const char* pString, int* pStart, int* pEnd){ + + /* [0] Initialize variables + =========================================================*/ + int c, C = strlen(pString); + int lvl = 0; + int groups = 0; + int start, end; + + + /* [1] Find groups + =========================================================*/ + c = 0; + while( c < C ){ + + /* (1) Find group START character + ---------------------------------------------------------*/ + for( ; c < C ; c++ ){ + if( pString[c] == '(' ) lvl++; + if( pString[c] == ')' ) lvl--; + + /* (1) If found and level-0 -> got it */ + if( pString[c] == '(' && lvl == 1 ) + break; + } + + /* (2) If not found -> exit */ + if( c >= C ) + break; + + /* (3) else -> store */ + start = c++; + + + /* (2) Find group STOP character + ---------------------------------------------------------*/ + for( ; c < C ; c++ ){ + if( pString[c] == '(' ) lvl++; + if( pString[c] == ')' ) lvl--; + + /* (1) If found and level-0 -> got it */ + if( pString[c] == ')' && lvl == 0 ) + break; + } + + /* (2) If not found -> exit */ + if( c >= C ) + break; + + /* (3) else -> store */ + end = c++; + + + /* (3) Store group + ---------------------------------------------------------*/ + groups++; + pStart = realloc(pStart, sizeof(int) * groups ); + pEnd = realloc(pEnd, sizeof(int) * groups ); + + pStart[groups-1] = start; + pEnd[groups-1] = end; + + } + + + return groups; +} + + + +char* developOr(const char* pString){ + + /* [0] Initialize variables + =========================================================*/ + char* result = malloc( strlen(pString) + 3 ); + char* string = malloc( strlen(pString) + 1 ); + strcpy(string, pString); + + int c, l; + + /* [1] Split by '+' chars + =========================================================*/ + c = 0; l = strlen(pString); + + for( ; c < l ; c++ ) + if( pString[c] == '+' ) + string[c] = '|'; + + + result[0] = '('; + strcpy(result+1, pString); + result[l+1] = ')'; + result[l+2] = 0; + + free(string); + + return result; + } \ No newline at end of file diff --git a/src/lib/automate/automate.h b/src/lib/automate/automate.h index a95a3b1..ca00581 100644 --- a/src/lib/automate/automate.h +++ b/src/lib/automate/automate.h @@ -52,6 +52,13 @@ struct Automate buildFromRegExp(const char* pRegExp); + /* + * + * + * + * + */ + /* Initializes an automate * @@ -92,5 +99,9 @@ */ char is_part(const char pChar, const char* pSet); + char* replaceAll(const char* pHaystack, const char* pNeedle, const char* pReplacement); + char getGroups(const char* pString, int* pStart, int* pEnd); + char* developOr(const char* pString); + #endif \ No newline at end of file diff --git a/src/lib/regex/regex.c b/src/lib/regex/regex.c index 9b607fd..95252e2 100644 --- a/src/lib/regex/regex.c +++ b/src/lib/regex/regex.c @@ -9,37 +9,6 @@ **************************/ #include "regex.h" -// int match(const char* pPattern, char* pBuffer); - -char madeOfSet(char* pBuffer, const char* pSet){ - - unsigned int a, b; - unsigned int A = strlen(pBuffer); - unsigned int B = strlen(pSet); - char in = 0; - - /* [1] For each char of pBuffer - =========================================================*/ - for( a = 0 ; a < A ; a++ ){ - - /* (2) Check if char is one of each of @pSet - ---------------------------------------------------------*/ - for( b = 0 ; b < B ; b++ ) - if( pBuffer[a] == pSet[b] ) - break; - - /* If char not in */ - if( b == B ) - return 0; - } - - return 1; -} - - - - - int developSingle(const char* pPattern, struct RegExpDevSets* pCases, struct RegExpDevSets* pGroups){ /* [0] Init vars @@ -243,58 +212,6 @@ int developSingle(const char* pPattern, struct RegExpDevSets* pCases, struct Reg -int developMulti(const char* pPattern, struct RegExpDevSets* pCases, struct RegExpDevSets* pGroups, const char pDepth){ - - /* [0] Init vars - =========================================================*/ - char depth = pDepth; - unsigned int c, d; - unsigned int boundary[2]; - struct RegExpDevSets groups = { 0, malloc(1) }; - struct RegExpDevSets cases = { 0, malloc(1) }; - int devRtn; - char buffer[MAX_DEV_SIZE]; - /* [1] Get all level-0 groups+cases - =========================================================*/ - boundary[0] = 0; // min boundary - boundary[1] = strlen(pPattern); // max boundary - - // while there is a depth to do - do{ - - strncpy(buffer, pPattern+boundary[0], boundary[1]); - devRtn = developSingle(buffer, pGroups, pCases); - - // if( devRtn == 0 ){ - // groups = realloc(groups, sizeof(struct RegExpDevSets) * depth); - // cases = realloc(cases, sizeof(struct RegExpDevSets) * depth); - // } - - - for( c = 0 ; c < pGroups->n ; c++ ){ - for( d = 0 ; d < depth ; d++ ) printf(" "); - printf("Group.%d-%d: '%s' (quantifier: %c)\n", depth, c, pGroups->sets[c].set, pGroups->sets[c].quantifier); - - strcpy(buffer, pGroups->sets[c].set); - for( d = 0 ; d < depth ; d++ ) printf(" "); - printf("dev: %d\n", developMulti(buffer, &groups, &cases, depth+1)); - - } - - for( c = 0 ; c < pCases->n ; c++ ){ - for( d = 0 ; d < depth ; d++ ) printf(" "); - printf("Case.%d-%d: '%s' (quantifier: %c)\n", depth, c, pCases->sets[c].set, pCases->sets[c].quantifier); - - strcpy(buffer, pCases->sets[c].set); - for( d = 0 ; d < depth ; d++ ) printf(" "); - printf("dev: %d\n", developMulti(buffer, &groups, &cases, depth+1)); - } - - }while( devRtn == 10 ); - - - return 0; -} \ No newline at end of file diff --git a/src/lib/regex/regex.h b/src/lib/regex/regex.h index 35c62c6..788718e 100644 --- a/src/lib/regex/regex.h +++ b/src/lib/regex/regex.h @@ -69,12 +69,12 @@ /* SIGNATURES */ // struct RegExpMatches match(const char* pPattern, char* pBuffer); - /* Check if buffer is made of characters from pSet */ - char madeOfSet(char* pBuffer, const char* pSet); /* Develops a pattern's sets (error:-1, else:0) */ int developSingle(const char* pPattern, struct RegExpDevSets* pCases, struct RegExpDevSets* pGroups); - int developMulti(const char* pPattern, struct RegExpDevSets* pCases, struct RegExpDevSets* pGroups, const char pDepth); + char* replaceAll(const char* pHaystack, const char* pNeedle, const char* pReplacement); + + char getGroups(const char* pString, int* pStart, int* pEnd); #endif \ No newline at end of file diff --git a/src/linter.c b/src/linter.c index a350e78..ce5beb3 100644 --- a/src/linter.c +++ b/src/linter.c @@ -42,6 +42,12 @@ int main(int argc, char* argv[]){ printf("browse 'abb' : %d\n", browse(a, "abb")); printf("browse 'i' : %d\n", browse(a, "i")); printf("browse 'b' : %d\n", browse(a, "b")); + + /* Build RegExp */ + // char regex[20] = "(b+a)(b+e)*(b+i)"; + char regex[20] = "abc+bde+aslkdj+sd"; + + buildFromRegExp(regex); /** TEST **/ { /* [1] Check arguments