diff --git a/src/lib/automate/automate.c b/src/lib/automate/automate.c index 1ea9d14..d2aa159 100644 --- a/src/lib/automate/automate.c +++ b/src/lib/automate/automate.c @@ -33,6 +33,7 @@ unsigned int browse(struct Automate* pAutomate, const char* pString){ len = strlen(pString); buffer = realloc(buffer, len+1); pAutomate->dCurrent = 0; + pAutomate->steps = 0; /* (1) Initialize path memory @@ -49,6 +50,8 @@ unsigned int browse(struct Automate* pAutomate, const char* pString){ strIndex = 0; while( c < len ){ + pAutomate->steps++; + /* (1) Check if the current char can lead to a state ---------------------------------------------------------*/ @@ -83,8 +86,21 @@ unsigned int browse(struct Automate* pAutomate, const char* pString){ break; } - /* (4) If AUTOMATE match */ + /* (4) If RANGE match */ + }else if( dotPtr.type[i] == 1 ){ + + ptr = pAutomate->redge[dotPtr.edge[i]]; + + if( pString[strIndex] >= ptr[0] && pString[strIndex] <= ptr[1] ){ + pAutomate->dCurrent = dotPtr.dot[i]; + strIndex += 1; + pathmem[c] = i; + break; + } + + /* (5) If AUTOMATE match */ }else{ + recursive = browse(&pAutomate->aedge[dotPtr.edge[i]], pString+strIndex); if( pAutomate->aedge[dotPtr.edge[i]].dCurrent == pAutomate->aedge[dotPtr.edge[i]].dFinal ){ @@ -98,20 +114,21 @@ unsigned int browse(struct Automate* pAutomate, const char* pString){ } - /* (5) If no more path */ + + /* (6) If no more path */ if( strIndex == indexmem[c] ){ - // if not first branch -> go previous + // {1} if not first branch -> go previous // if( c > 0 ) c--; - // else next path of previous branch + // {2} else next path of previous branch // strIndex = indexmem[c]; pathmem[c]++; pAutomate->dCurrent = dotmem[c]; // printf("* back to q%d:%d\n", dotmem[c], pathmem[c]); - /* (6) Next branch + reset data (if already browsed in other path) */ + /* (7) Next branch + reset data (if already browsed in other path) */ }else{ // printf("q%d -%.1s-> q%d\n", c, pString+indexmem[c], pAutomate->dCurrent); c++; @@ -217,6 +234,7 @@ struct Automate createAutomate(){ automate.dots = 0; automate.sedges = 0; automate.aedges = 0; + automate.redges = 0; automate.dCurrent = 0; automate.dFinal = 0; @@ -225,6 +243,7 @@ struct Automate createAutomate(){ =========================================================*/ automate.dot = malloc( 0 ); automate.sedge = malloc( 0 ); + automate.redge = malloc( 0 ); automate.aedge = malloc( 0 ); @@ -234,7 +253,6 @@ struct Automate createAutomate(){ } - char addDot(struct Automate* pAutomate){ /* [0] Initialize variables @@ -264,7 +282,6 @@ char addDot(struct Automate* pAutomate){ } - char addStringTransition(struct Automate* pAutomate, const char* pSedge){ /* [0] Initialize variables =========================================================*/ @@ -283,6 +300,27 @@ char addStringTransition(struct Automate* pAutomate, const char* pSedge){ return index; } + +char addRangeTransition(struct Automate* pAutomate, const char pRedge[2]){ + /* [0] Initialize variables + =========================================================*/ + char index = pAutomate->redges++; + + /* [1] Reallocate memory for sedge + =========================================================*/ + pAutomate->redge = realloc(pAutomate->redge, sizeof(char*) * pAutomate->redges); + pAutomate->redge[index] = malloc( 3 ); + + + /* [2] Create the redge + =========================================================*/ + strcpy(pAutomate->redge[index], pRedge); + pAutomate->redge[index][2] = 0; + + return index; +} + + char addAutomateTransition(struct Automate* pAutomate, struct Automate* pAedge){ /* [0] Initialize variables =========================================================*/ @@ -301,7 +339,7 @@ char addAutomateTransition(struct Automate* pAutomate, struct Automate* pAedge){ } -void linkSEdge(struct Automate* pAutomate, const unsigned int pIn, const unsigned int pOut, const char pSedge){ +void linkSEdge(struct Automate* pAutomate, const char pIn, const char pOut, const char pSedge){ /* [0] Initialize variables =========================================================*/ @@ -325,7 +363,7 @@ void linkSEdge(struct Automate* pAutomate, const unsigned int pIn, const unsigne } -void linkAEdge(struct Automate* pAutomate, const unsigned int pIn, const unsigned int pOut, const char pAedge){ +void linkREdge(struct Automate* pAutomate, const char pIn, const char pOut, const char pRedge){ /* [0] Initialize variables =========================================================*/ @@ -344,6 +382,30 @@ void linkAEdge(struct Automate* pAutomate, const unsigned int pIn, const unsigne =========================================================*/ pAutomate->dot[pIn].dot[index] = pOut; pAutomate->dot[pIn].type[index] = 1; + pAutomate->dot[pIn].edge[index] = pRedge; + +} + + +void linkAEdge(struct Automate* pAutomate, const char pIn, const char pOut, const char pAedge){ + + /* [0] Initialize variables + =========================================================*/ + char c; // counter + char index = pAutomate->dot[pIn].n++; + + + /* [1] Reallocate memory for link + =========================================================*/ + pAutomate->dot[pIn].dot = realloc( pAutomate->dot[pIn].dot, pAutomate->dot[pIn].n ); + pAutomate->dot[pIn].type = realloc( pAutomate->dot[pIn].type, pAutomate->dot[pIn].n ); + pAutomate->dot[pIn].edge = realloc( pAutomate->dot[pIn].edge, pAutomate->dot[pIn].n ); + + + /* [2] Create the link + =========================================================*/ + pAutomate->dot[pIn].dot[index] = pOut; + pAutomate->dot[pIn].type[index] = 2; pAutomate->dot[pIn].edge[index] = pAedge; } @@ -358,26 +420,6 @@ void linkAEdge(struct Automate* pAutomate, const unsigned int pIn, const unsigne - - - - -char is_part(const char pChar, const char* pSet){ - - unsigned int a; - unsigned int A = strlen(pSet); - - /* [1] For each char of @pSet - =========================================================*/ - for( a = 0 ; a < A ; a++ ) - if( pChar == pSet[a] ) - return 1; - - return 0; -} - - - char* replaceAll(const char* pHaystack, const char* pNeedle, const char* pReplacement){ /* [0] Initialize variables @@ -450,7 +492,6 @@ char* replaceAll(const char* pHaystack, const char* pNeedle, const char* pReplac } - char getGroups(const char* pString, int* pStart, int* pEnd){ /* [0] Initialize variables @@ -518,6 +559,7 @@ char getGroups(const char* pString, int* pStart, int* pEnd){ return groups; } + char getCases(const char* pString, int* pStart, int* pEnd){ /* [0] Initialize variables @@ -586,7 +628,6 @@ char getCases(const char* pString, int* pStart, int* pEnd){ } - char* developOr(const char* pString){ /* [0] Initialize variables @@ -617,6 +658,7 @@ char* developOr(const char* pString){ } + char* developCases(const char* pString){ /* [0] Initialize variables diff --git a/src/lib/automate/automate.h b/src/lib/automate/automate.h index 4be2204..d406713 100644 --- a/src/lib/automate/automate.h +++ b/src/lib/automate/automate.h @@ -8,111 +8,149 @@ * doowap31@gmail.com * **************************/ #ifndef _LIB_AUTOMATE_AUTOMATE_H_ - #define _LIB_AUTOMATE_AUTOMATE_H_ + #define _LIB_AUTOMATE_AUTOMATE_H_ - #include - #include - #include + #include + #include + #include - struct AutomateDot{ - char n; // number of edges - char* type; // type of Transition (according to index) (0=string;1=automate) - char* dot; // list of dot_id - char* edge; // list of [sa]edge_id - }; + struct AutomateDot{ + char n; // number of edges + char* type; // type of Transition (according to index) (0=string;1=range;2=automate) + char* dot; // list of dot_id + char* edge; // list of [sa]edge_id + }; - struct Automate{ - char sedges; // Max sedge index - char aedges; // Max aedge index - char dots; // Max dot index + struct Automate{ + char sedges; // Max sedge index + char aedges; // Max aedge index + char redges; // Max redge index + char dots; // Max dot index - struct AutomateDot* dot; // dots - char** sedge; // string edges - struct Automate* aedge; // automate edges + struct AutomateDot* dot; // dots + char** sedge; // string edges + char** redge; // range edges (between char[0] and char[1]) + struct Automate* aedge; // automate edges - char dCurrent; // current dot index - char dFinal; // final dot index - }; + char dCurrent; // current dot index + char dFinal; // final dot index + char steps; // number of steps + }; - /* Try to browse an automate with a string - * - * @pAutomate The current automate - * @pString The string to test - * - * - * @return state The final state we can browse to - * - */ - unsigned int browse(struct Automate* pAutomate, const char* pString); + /* Try to browse an automate with a string + * + * @pAutomate Current working automate + * @pString String to test + * + * @return offset The @pString offset browsed to + * + * @NOTE: Automate.dCurrent is equal to Automate.dFinal if final state reached + * + */ + unsigned int browse(struct Automate* pAutomate, const char* pString); - /* Builds an automate from a regexp - * - * @pRegExp Regular expression to build from - * - * @return pAutomate The created automate - * - */ - struct Automate buildFromRegExp(const char* pRegExp); - - - /* - * - * - * - * - */ - - - /* Initializes an automate - * - */ - struct Automate createAutomate(); - // struct Automate clone(const struct Automate* pOriginal); - - - /* Adds a dot to an automate - * - * @return index The created dot index - * - */ - char addDot(struct Automate* pAutomate); - - - /* Adds a set to an automate - * - * @return index The created set index - * - */ - char addSet(struct Automate* pAutomate, const char* pSet); - - char addStringTransition(struct Automate* pAutomate, const char* pSedge); - char addAutomateTransition(struct Automate* pAutomate, struct Automate* pAedge); - /* Adds a link between 2 states with a set - * - */ - void linkSEdge(struct Automate* pAutomate, const unsigned int pIn, const unsigned int pOut, const char pSedge); - void linkAEdge(struct Automate* pAutomate, const unsigned int pIn, const unsigned int pOut, const char pAedge); + /* Builds an automate from a regexp + * + * @pRegExp Regular expression to build from + * + * @return pAutomate The created automate + * + */ + struct Automate buildFromRegExp(const char* pRegExp); - /* Checks if a char is part of a set - * - * @pChar The char to test - * @pSet The set to test in - * - * @return is_part 1 if is part, 0 if not - * - */ - char is_part(const char pChar, const char* pSet); - char* replaceAll(const char* pHaystack, const char* pNeedle, const char* pReplacement); - char getGroups(const char* pString, int* pStart, int* pEnd); - char getCases(const char* pString, int* pStart, int* pEnd); - char* developOr(const char* pString); - char* developCases(const char* pString); + + + + /* Initializes an automate + * + * @return new The created automate + * + */ + struct Automate createAutomate(); + + + /* Adds a dot to an automate + * + * @pAutomate Current working automate + * + * @return index The created dot index + * + */ + char addDot(struct Automate* pAutomate); + + /* Adds a string transition + * + * @pAutomate Current working automate + * @pSedge String to build transition from + * + * @return sedge Index of the created transition + * + */ + char addStringTransition(struct Automate* pAutomate, const char* pSedge); + + /* Adds a range transition + * + * @pAutomate Current working automate + * @pSedge Range to build transition from + * + * @return redge Index of the created transition + * + */ + char addRangeTransition(struct Automate* pAutomate, const char pRedge[2]); + + /* Adds an automate transition + * + * @pAutomate Current working automate + * @pAedge Automate to build transition from + * + * @return aedge Index of the created transition + */ + char addAutomateTransition(struct Automate* pAutomate, struct Automate* pAedge); + + /* Adds a link between 2 dots with a string + * + * @pAutomate Current working automate + * @pIn Input dot index + * @pOut Output dot index + * @pSedge String transition index + * + */ + void linkSEdge(struct Automate* pAutomate, const char pIn, const char pOut, const char pSedge); + + /* Adds a link between 2 dots with a range + * + * @pAutomate Current working automate + * @pIn Input dot index + * @pOut Output dot index + * @pRedge Range transition index + * + */ + void linkREdge(struct Automate* pAutomate, const char pIn, const char pOut, const char pRedge); + + /* Adds a link between 2 dots with an automate + * + * @pAutomate Current working automate + * @pIn Input dot index + * @pOut Output dot index + * @pAedge Automate transition index + * + * + */ + void linkAEdge(struct Automate* pAutomate, const char pIn, const char pOut, const char pAedge); + + + + char* replaceAll(const char* pHaystack, const char* pNeedle, const char* pReplacement); + char getGroups(const char* pString, int* pStart, int* pEnd); + char getCases(const char* pString, int* pStart, int* pEnd); + char* developOr(const char* pString); + char* developCases(const char* pString); #endif \ No newline at end of file diff --git a/src/linter.c b/src/linter.c index 6669ec8..ffde014 100644 --- a/src/linter.c +++ b/src/linter.c @@ -23,7 +23,7 @@ int main(int argc, char* argv[]){ /** TEST **/ /** BUILD AUTOMATE **/ - // A = a+(bcx+bcd) + // A = a+(bc(x-z)+bcd) struct Automate a = createAutomate(); addDot(&a); // q0 addDot(&a); // q1 @@ -32,34 +32,65 @@ int main(int argc, char* argv[]){ addDot(&a); // q4 addDot(&a); // q5 - addStringTransition(&a, "a\0"); // t0 - addStringTransition(&a, "b\0"); // t1 - addStringTransition(&a, "c\0"); // t2 - addStringTransition(&a, "d\0"); // t3 - addStringTransition(&a, "x\0"); // t4 + addStringTransition(&a, "a\0"); // s0 + addStringTransition(&a, "b\0"); // s1 + addStringTransition(&a, "c\0"); // s2 + addStringTransition(&a, "d\0"); // s3 + addRangeTransition(&a, "xz\0"); // r0 - linkSEdge(&a, 0, 1, 0); // q0 -t0-> q1 || branch-0 path-0 - linkSEdge(&a, 1, 1, 0); // q1 -t0-> q1 || branch-1 path-0 - linkSEdge(&a, 1, 2, 1); // q1 -t1-> q2 || branch-1 path-1 - linkSEdge(&a, 2, 3, 2); // q2 -t2-> q3 || branch-2 path-0 - linkSEdge(&a, 3, 5, 4); // q2 -t4-> q5 || branch-3 path-0 - linkSEdge(&a, 1, 4, 1); // q1 -t1-> q4 || branch-1 path-2 - linkSEdge(&a, 4, 4, 2); // q4 -t2-> q4 || branch-4 path-0 - linkSEdge(&a, 4, 5, 3); // q4 -t3-> q5 || branch-4 path-1 + linkSEdge(&a, 0, 1, 0); // q0 -s0-> q1 || branch-0 path-0 + linkSEdge(&a, 1, 1, 0); // q1 -s0-> q1 || branch-1 path-0 + linkSEdge(&a, 1, 2, 1); // q1 -s1-> q2 || branch-1 path-1 + linkSEdge(&a, 2, 3, 2); // q2 -s2-> q3 || branch-2 path-0 + linkREdge(&a, 3, 5, 0); // q2 -r0-> q5 || branch-3 path-0 + linkSEdge(&a, 1, 4, 1); // q1 -s1-> q4 || branch-1 path-2 + linkSEdge(&a, 4, 4, 2); // q4 -s2-> q4 || branch-4 path-0 + linkSEdge(&a, 4, 5, 3); // q4 -s3-> q5 || branch-4 path-1 - printf("browse 'abcx' : %d\n", browse(&a, "abcx")); - printf("* final_state: %d\n", a.dCurrent); - printf("browse 'abcd' : %d\n", browse(&a, "abcd")); - printf("* final_state: %d\n", a.dCurrent); - printf("browse 'aaaaaaabcx' : %d\n", browse(&a, "aaaaaaabcx")); - printf("* final_state: %d\n", a.dCurrent); - printf("browse 'aaabc' : %d\n", browse(&a, "aaabc")); - printf("* final_state: %d\n", a.dCurrent); - printf("browse 'aaabd' : %d\n", browse(&a, "aaabd")); - printf("* final_state: %d\n", a.dCurrent); - printf("browse 'bc' : %d\n", browse(&a, "bc")); - printf("* final_state: %d\n", a.dCurrent); + char string[40] = {0}; + strcpy(string, "abcx"); + printf("browse '%s' : %d/%d\n", string, browse(&a, string), (int) strlen(string)); + printf(" (*) final_state: %d/%d\n", a.dCurrent, a.dFinal); + printf(" (*) in %d steps\n\n", a.steps); + strcpy(string, "abcy"); + printf("browse '%s' : %d/%d\n", string, browse(&a, string), (int) strlen(string)); + printf(" (*) final_state: %d/%d\n", a.dCurrent, a.dFinal); + printf(" (*) in %d steps\n\n", a.steps); + + strcpy(string, "abcz"); + printf("browse '%s' : %d/%d\n", string, browse(&a, string), (int) strlen(string)); + printf(" (*) final_state: %d/%d\n", a.dCurrent, a.dFinal); + printf(" (*) in %d steps\n\n", a.steps); + + strcpy(string, "abcd"); + printf("browse '%s' : %d/%d\n", string, browse(&a, string), (int) strlen(string)); + printf(" (*) final_state: %d/%d\n", a.dCurrent, a.dFinal); + printf(" (*) in %d steps\n\n", a.steps); + + strcpy(string, "aaaaaaabcx"); + printf("browse '%s' : %d/%d\n", string, browse(&a, string), (int) strlen(string)); + printf(" (*) final_state: %d/%d\n", a.dCurrent, a.dFinal); + printf(" (*) in %d steps\n\n", a.steps); + + strcpy(string, "aaabc"); + printf("browse '%s' : %d/%d\n", string, browse(&a, string), (int) strlen(string)); + printf(" (*) final_state: %d/%d\n", a.dCurrent, a.dFinal); + printf(" (*) in %d steps\n\n", a.steps); + + strcpy(string, "aaabc"); + printf("browse '%s' : %d/%d\n", string, browse(&a, string), (int) strlen(string)); + printf(" (*) final_state: %d/%d\n", a.dCurrent, a.dFinal); + printf(" (*) in %d steps\n\n", a.steps); + + strcpy(string, "aaabd"); + printf("browse '%s' : %d/%d\n", string, browse(&a, string), (int) strlen(string)); + printf(" (*) final_state: %d/%d\n", a.dCurrent, a.dFinal); + printf(" (*) in %d steps\n\n", a.steps); + + strcpy(string, "bc"); + printf("browse '%s' : %d/%d\n", string, browse(&a, string), (int) strlen(string)); + printf("* final_state: %d/%d\n", a.dCurrent, a.dFinal); /* Build RegExp */ // char regex[40] = "(a([abc](b+a)(b+e)+ahd)*(b+i)c)sd"; char regex[40] = "(a[c(e[g(i+j)h]f)d]b)";