progress on experimental lexer
This commit is contained in:
parent
842c2b8f7b
commit
39fd699274
|
@ -7,7 +7,7 @@ pushd ..\build
|
||||||
cl %WARNINGOPS% ..\code\test\fsm_table_generator.cpp /Fefsm_gen %*
|
cl %WARNINGOPS% ..\code\test\fsm_table_generator.cpp /Fefsm_gen %*
|
||||||
|
|
||||||
pushd ..\code\test
|
pushd ..\code\test
|
||||||
..\build\fsm_gen
|
..\..\build\fsm_gen
|
||||||
popd
|
popd
|
||||||
|
|
||||||
cl %WARNINGOPS% ..\code\test\experiment.cpp /Fexperiment %*
|
cl %WARNINGOPS% ..\code\test\experiment.cpp /Fexperiment %*
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -482,35 +482,40 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
|
||||||
S.token.flags = CPP_TFLAG_IS_OPERATOR;
|
S.token.flags = CPP_TFLAG_IS_OPERATOR;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case LS_identifier:
|
case LS_identifier:
|
||||||
{
|
{
|
||||||
S.fsm.state = 0;
|
S.fsm.state = 0;
|
||||||
S.fsm.emit_token = 0;
|
S.fsm.emit_token = 0;
|
||||||
S.fsm.sub_machine = 0;
|
S.fsm.sub_machine = 0;
|
||||||
S.key_table = key_tables[S.fsm.sub_machine];
|
|
||||||
S.key_eq_classes = key_eq_class_tables[S.fsm.sub_machine];
|
|
||||||
--S.pos;
|
--S.pos;
|
||||||
for (;;){
|
for (;;){
|
||||||
|
// TODO(allen): Need to drop down to the instructions to optimize
|
||||||
|
// this correctly I think. This looks like it will have more branches
|
||||||
|
// than it needs unless I am very careful.
|
||||||
for (; S.fsm.state < LSKEY_totally_finished && S.pos < end_pos;){
|
for (; S.fsm.state < LSKEY_totally_finished && S.pos < end_pos;){
|
||||||
|
// TODO(allen): Rebase these super tables so that we don't have
|
||||||
|
// to do a subtract on the state.
|
||||||
|
S.key_table = key_tables[S.fsm.sub_machine];
|
||||||
|
S.key_eq_classes = key_eq_class_tables[S.fsm.sub_machine];
|
||||||
for (; S.fsm.state < LSKEY_table_transition && S.pos < end_pos;){
|
for (; S.fsm.state < LSKEY_table_transition && S.pos < end_pos;){
|
||||||
c = chunk[S.pos++];
|
c = chunk[S.pos++];
|
||||||
S.fsm.state = S.key_table[S.fsm.state + S.key_eq_classes[c]];
|
S.fsm.state = S.key_table[S.fsm.state + S.key_eq_classes[c]];
|
||||||
}
|
}
|
||||||
// TODO(allen): udpate table
|
if (S.fsm.state >= LSKEY_table_transition && S.fsm.state < LSKEY_totally_finished){
|
||||||
S.fsm.sub_machine = 0;
|
S.fsm.sub_machine = S.fsm.state - LSKEY_table_transition;
|
||||||
S.key_table = key_tables[S.fsm.sub_machine];
|
S.fsm.state = 0;
|
||||||
S.key_eq_classes = key_eq_class_tables[S.fsm.sub_machine];
|
}
|
||||||
}
|
}
|
||||||
S.fsm.emit_token = (S.fsm.int_state >= LSKEY_totally_finished);
|
S.fsm.emit_token = (S.fsm.int_state >= LSKEY_totally_finished);
|
||||||
|
|
||||||
if (S.fsm.emit_token == 0){
|
if (S.fsm.emit_token == 0){
|
||||||
DrYield(7, 1);
|
DrYield(7, 1);
|
||||||
}
|
}
|
||||||
else break;
|
else break;
|
||||||
}
|
}
|
||||||
--S.pos;
|
--S.pos;
|
||||||
|
|
||||||
// TODO(allen): do stuff regarding the actual type of the token
|
// TODO(allen): do stuff regarding the actual type of the token
|
||||||
S.token.type = CPP_TOKEN_INTEGER_CONSTANT;
|
S.token.type = CPP_TOKEN_INTEGER_CONSTANT;
|
||||||
S.token.flags = 0;
|
S.token.flags = 0;
|
||||||
|
|
|
@ -338,11 +338,11 @@ show_time(Times t, int repeats, char *type){
|
||||||
|
|
||||||
int main(){
|
int main(){
|
||||||
int repeats = 1;
|
int repeats = 1;
|
||||||
int verbose_level = 0;
|
int verbose_level = 1;
|
||||||
int chunk_start = 1;
|
int chunk_start = 0;
|
||||||
int chunk_end = 32;
|
int chunk_end = 0;
|
||||||
#define TEST_FILE "junk.cpp"
|
#define TEST_FILE "parser_test1.cpp"
|
||||||
#define SINGLE_ITEM 0
|
#define SINGLE_ITEM 1
|
||||||
|
|
||||||
int chunks = (chunk_start > 0 && chunk_start <= chunk_end);
|
int chunks = (chunk_start > 0 && chunk_start <= chunk_end);
|
||||||
int c = 0;
|
int c = 0;
|
||||||
|
|
|
@ -148,7 +148,7 @@ static String_And_Flag keyword_strings[] = {
|
||||||
};
|
};
|
||||||
|
|
||||||
struct FSM_State{
|
struct FSM_State{
|
||||||
unsigned char transition_rule[256];
|
unsigned int transition_rule[256];
|
||||||
unsigned char override;
|
unsigned char override;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -275,21 +275,16 @@ tree_init(unsigned short max){
|
||||||
return(tree);
|
return(tree);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
unsigned char
|
||||||
push_future_fsm(Future_FSM_Stack *stack, Match_Node *node){
|
push_future_fsm(Future_FSM_Stack *stack, Match_Node *node){
|
||||||
Future_FSM *future;
|
unsigned char index = 0;
|
||||||
|
Future_FSM *future = 0;
|
||||||
assert(stack->count < stack->max);
|
assert(stack->count < stack->max);
|
||||||
future = &stack->futures[stack->count++];
|
assert(stack->max < 256);
|
||||||
|
index = (unsigned char)(stack->count++);
|
||||||
|
future = &stack->futures[index];
|
||||||
future->source = node;
|
future->source = node;
|
||||||
}
|
return(index);
|
||||||
|
|
||||||
Future_FSM*
|
|
||||||
pop_future_fsm(Future_FSM_Stack *stack){
|
|
||||||
Future_FSM *result = 0;
|
|
||||||
assert(stack->count > 0);
|
|
||||||
--stack->count;
|
|
||||||
result = stack->futures + stack->count;
|
|
||||||
return(result);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Match_Node*
|
Match_Node*
|
||||||
|
@ -324,10 +319,9 @@ match_add_word(Match_Node *node, int word){
|
||||||
}
|
}
|
||||||
|
|
||||||
FSM_State*
|
FSM_State*
|
||||||
fsm_get_state(FSM *fsm){
|
fsm_get_state(FSM *fsm, unsigned int terminal_base){
|
||||||
FSM_State *result;
|
FSM_State *result;
|
||||||
unsigned short i;
|
unsigned short i;
|
||||||
unsigned char terminal_base = fsm->terminal_base;
|
|
||||||
assert(fsm->count < fsm->max);
|
assert(fsm->count < fsm->max);
|
||||||
result = &fsm->states[fsm->count++];
|
result = &fsm->states[fsm->count++];
|
||||||
for (i = 0; i < 256; ++i){
|
for (i = 0; i < 256; ++i){
|
||||||
|
@ -337,6 +331,12 @@ fsm_get_state(FSM *fsm){
|
||||||
return(result);
|
return(result);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
FSM_State*
|
||||||
|
fsm_get_state(FSM *fsm){
|
||||||
|
FSM_State *result = fsm_get_state(fsm, fsm->terminal_base);
|
||||||
|
return(result);
|
||||||
|
}
|
||||||
|
|
||||||
FSM_State*
|
FSM_State*
|
||||||
fsm_get_term_state(FSM *fsm, unsigned char override){
|
fsm_get_term_state(FSM *fsm, unsigned char override){
|
||||||
FSM_State *result;
|
FSM_State *result;
|
||||||
|
@ -368,9 +368,7 @@ struct Terminal_Lookup_Table{
|
||||||
};
|
};
|
||||||
|
|
||||||
void
|
void
|
||||||
process_match_node(String_And_Flag *input, Match_Node *node, Match_Tree *tree, FSM *fsm,
|
process_match_node(String_And_Flag *input, Match_Node *node, Match_Tree *tree, FSM *fsm){
|
||||||
Terminal_Lookup_Table *terminal_table = 0, int levels_to_go = -1, Future_FSM_Stack *unfinished_fsms = 0){
|
|
||||||
|
|
||||||
int next_index = node->index + 1;
|
int next_index = node->index + 1;
|
||||||
int match_count = node->count;
|
int match_count = node->count;
|
||||||
FSM_State *this_state = node->state;
|
FSM_State *this_state = node->state;
|
||||||
|
@ -385,17 +383,10 @@ process_match_node(String_And_Flag *input, Match_Node *node, Match_Tree *tree, F
|
||||||
Match_Node *next_nodes[256];
|
Match_Node *next_nodes[256];
|
||||||
Match_Node *newest_child = 0;
|
Match_Node *newest_child = 0;
|
||||||
Match_Node *n;
|
Match_Node *n;
|
||||||
int count = 0;
|
|
||||||
|
|
||||||
unsigned char unjunkify = 0;
|
unsigned char unjunkify = 0;
|
||||||
unsigned char state_override = 0;
|
|
||||||
|
|
||||||
fsm->terminal_base = terminal_base;
|
|
||||||
memset(next_nodes, 0, sizeof(next_nodes));
|
memset(next_nodes, 0, sizeof(next_nodes));
|
||||||
|
|
||||||
if (levels_to_go == 1){
|
|
||||||
state_override = terminal_table->state_count;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i = 0; i < match_count; ++i){
|
for (i = 0; i < match_count; ++i){
|
||||||
j = words[i];
|
j = words[i];
|
||||||
|
@ -410,12 +401,7 @@ process_match_node(String_And_Flag *input, Match_Node *node, Match_Tree *tree, F
|
||||||
match_init_node(next_nodes[c], match_count);
|
match_init_node(next_nodes[c], match_count);
|
||||||
|
|
||||||
next_nodes[c]->index = next_index;
|
next_nodes[c]->index = next_index;
|
||||||
if (state_override){
|
next_nodes[c]->state = fsm_get_state(fsm);
|
||||||
next_nodes[c]->state = fsm_get_term_state(fsm, state_override++);
|
|
||||||
}
|
|
||||||
else{
|
|
||||||
next_nodes[c]->state = fsm_get_state(fsm);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (newest_child == 0){
|
if (newest_child == 0){
|
||||||
assert(node->first_child == 0);
|
assert(node->first_child == 0);
|
||||||
|
@ -426,23 +412,14 @@ process_match_node(String_And_Flag *input, Match_Node *node, Match_Tree *tree, F
|
||||||
newest_child->next_sibling = next_nodes[c];
|
newest_child->next_sibling = next_nodes[c];
|
||||||
}
|
}
|
||||||
newest_child = next_nodes[c];
|
newest_child = next_nodes[c];
|
||||||
++count;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
match_add_word(next_nodes[c], j);
|
match_add_word(next_nodes[c], j);
|
||||||
fsm_add_transition(this_state, c, fsm_index(fsm, next_nodes[c]->state));
|
fsm_add_transition(this_state, c, fsm_index(fsm, next_nodes[c]->state));
|
||||||
}
|
}
|
||||||
else if (next_index == l){
|
else if (next_index == l){
|
||||||
if (terminal_table == 0){
|
assert(unjunkify == 0);
|
||||||
assert(unjunkify == 0);
|
unjunkify = (unsigned char)saf.flags;
|
||||||
unjunkify = (unsigned char)saf.flags;
|
|
||||||
assert(unjunkify < 55);
|
|
||||||
}
|
|
||||||
else{
|
|
||||||
assert(unjunkify == 0);
|
|
||||||
unjunkify = terminal_table->type_to_state[(unsigned char)saf.flags];
|
|
||||||
assert(unjunkify < 55);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -453,16 +430,9 @@ process_match_node(String_And_Flag *input, Match_Node *node, Match_Tree *tree, F
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (levels_to_go == 1){
|
for (n = node->first_child; n; n = n->next_sibling){
|
||||||
for (n = node->first_child; n; n = n->next_sibling){
|
process_match_node(input, n, tree, fsm);
|
||||||
push_future_fsm(unfinished_fsms, n);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else{
|
|
||||||
for (n = node->first_child; n; n = n->next_sibling){
|
|
||||||
process_match_node(input, n, tree, fsm, terminal_table, levels_to_go - 1, unfinished_fsms);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -498,11 +468,119 @@ generate_pp_directive_fsm(){
|
||||||
return(fsm);
|
return(fsm);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
|
||||||
|
Each state needs a full set of transition rules. Most transitions should go into a
|
||||||
|
"not-a-keyword-state". The exceptions are:
|
||||||
|
1. When we see an alphanumeric character that is the next character of an actual keyword
|
||||||
|
i. May need to transition to a new table at this point.
|
||||||
|
2. When we have just seen an entire valid keyword, and the next thing we see is not alphanumeric.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define RealTerminateBase 65536
|
||||||
|
|
||||||
|
int
|
||||||
|
char_is_alphanumeric(char x){
|
||||||
|
int result = 0;
|
||||||
|
if ((x >= '0' && x <= '9') ||
|
||||||
|
(x >= 'A' && x <= 'Z') ||
|
||||||
|
(x >= 'a' && x <= 'z') ||
|
||||||
|
x == '_'){
|
||||||
|
result = 1;
|
||||||
|
}
|
||||||
|
return(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
process_match_node(String_And_Flag *input, Match_Node *node, Match_Tree *tree, FSM *fsm,
|
||||||
|
Terminal_Lookup_Table *terminal_table, int levels_to_go,
|
||||||
|
Future_FSM_Stack *unfinished_fsms){
|
||||||
|
|
||||||
|
int next_index = node->index + 1;
|
||||||
|
int match_count = node->count;
|
||||||
|
int *words = node->words;
|
||||||
|
FSM_State *this_state = node->state;
|
||||||
|
|
||||||
|
int word_index = 0;
|
||||||
|
int good_transition = 0;
|
||||||
|
int len = 0;
|
||||||
|
int i = 0;
|
||||||
|
|
||||||
|
String_And_Flag saf = {0};
|
||||||
|
|
||||||
|
Match_Node *next_nodes[256];
|
||||||
|
Match_Node *newest_child = 0;
|
||||||
|
Match_Node *n = 0;
|
||||||
|
char c = 0;
|
||||||
|
|
||||||
|
unsigned char override = 0;
|
||||||
|
|
||||||
|
memset(next_nodes, 0, sizeof(next_nodes));
|
||||||
|
|
||||||
|
for (i = 0; i < match_count; ++i){
|
||||||
|
word_index = words[i];
|
||||||
|
saf = input[word_index];
|
||||||
|
|
||||||
|
len = (int)strlen(saf.str);
|
||||||
|
if (next_index < len){
|
||||||
|
c = saf.str[next_index];
|
||||||
|
|
||||||
|
if (next_nodes[c] == 0){
|
||||||
|
next_nodes[c] = match_get_node(tree);
|
||||||
|
match_init_node(next_nodes[c], match_count);
|
||||||
|
|
||||||
|
next_nodes[c]->index = next_index;
|
||||||
|
|
||||||
|
if (levels_to_go == 1){
|
||||||
|
override = push_future_fsm(unfinished_fsms, next_nodes[c]);
|
||||||
|
next_nodes[c]->state = fsm_get_term_state(fsm, override);
|
||||||
|
}
|
||||||
|
else{
|
||||||
|
next_nodes[c]->state = fsm_get_state(fsm, RealTerminateBase);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (newest_child == 0){
|
||||||
|
assert(node->first_child == 0);
|
||||||
|
node->first_child = next_nodes[c];
|
||||||
|
}
|
||||||
|
else{
|
||||||
|
assert(newest_child->next_sibling == 0);
|
||||||
|
newest_child->next_sibling = next_nodes[c];
|
||||||
|
}
|
||||||
|
newest_child = next_nodes[c];
|
||||||
|
}
|
||||||
|
|
||||||
|
match_add_word(next_nodes[c], word_index);
|
||||||
|
fsm_add_transition(this_state, c, fsm_index(fsm, next_nodes[c]->state));
|
||||||
|
}
|
||||||
|
else{
|
||||||
|
assert(next_index == len);
|
||||||
|
assert(good_transition == 0);
|
||||||
|
good_transition = terminal_table->type_to_state[saf.flags] + RealTerminateBase;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (good_transition){
|
||||||
|
for (i = 0; i < 256; ++i){
|
||||||
|
if (!char_is_alphanumeric((char)i)){
|
||||||
|
this_state->transition_rule[i] = good_transition;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (levels_to_go != 1){
|
||||||
|
for (n = node->first_child; n; n = n->next_sibling){
|
||||||
|
process_match_node(input, n, tree, fsm, terminal_table, levels_to_go-1, unfinished_fsms);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
FSM_Stack
|
FSM_Stack
|
||||||
generate_keyword_fsms(){
|
generate_keyword_fsms(){
|
||||||
Terminal_Lookup_Table terminal_table;
|
Terminal_Lookup_Table terminal_table;
|
||||||
Cpp_Token_Type type;
|
Cpp_Token_Type type;
|
||||||
|
|
||||||
Future_FSM_Stack unfinished_futures;
|
Future_FSM_Stack unfinished_futures;
|
||||||
Match_Tree_Stack tree_stack;
|
Match_Tree_Stack tree_stack;
|
||||||
FSM_Stack fsm_stack;
|
FSM_Stack fsm_stack;
|
||||||
|
@ -511,78 +589,81 @@ generate_keyword_fsms(){
|
||||||
Future_FSM *future;
|
Future_FSM *future;
|
||||||
Match_Node *root_node;
|
Match_Node *root_node;
|
||||||
FSM_State *root_state;
|
FSM_State *root_state;
|
||||||
int i;
|
int i, j;
|
||||||
|
|
||||||
memset(terminal_table.type_to_state, 0, sizeof(terminal_table.type_to_state));
|
memset(terminal_table.type_to_state, 0, sizeof(terminal_table.type_to_state));
|
||||||
memset(terminal_table.state_to_type, 0, sizeof(terminal_table.state_to_type));
|
memset(terminal_table.state_to_type, 0, sizeof(terminal_table.state_to_type));
|
||||||
|
|
||||||
for (i = 0; i < ArrayCount(keyword_strings); ++i){
|
for (i = 0; i < ArrayCount(keyword_strings); ++i){
|
||||||
type = (Cpp_Token_Type)keyword_strings[i].flags;
|
type = (Cpp_Token_Type)keyword_strings[i].flags;
|
||||||
if (terminal_table.type_to_state[type] == 0){
|
if (terminal_table.type_to_state[type] == 0){
|
||||||
terminal_table.type_to_state[type] = terminal_table.state_count;
|
terminal_table.type_to_state[type] = terminal_table.state_count;
|
||||||
terminal_table.state_to_type[terminal_table.state_count] = type;
|
terminal_table.state_to_type[terminal_table.state_count] = type;
|
||||||
++terminal_table.state_count;
|
++terminal_table.state_count;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fsm_stack.max = 1024;
|
fsm_stack.max = 255;
|
||||||
fsm_stack.count = 0;
|
fsm_stack.count = 0;
|
||||||
fsm_stack.fsms = (FSM*)malloc(sizeof(FSM)*fsm_stack.max);
|
fsm_stack.fsms = (FSM*)malloc(sizeof(FSM)*fsm_stack.max);
|
||||||
fsm_stack.table_transition_state = 26;
|
fsm_stack.table_transition_state = 26;
|
||||||
|
|
||||||
tree_stack.max = 1024;
|
tree_stack.max = 255;
|
||||||
tree_stack.count = 0;
|
tree_stack.count = 0;
|
||||||
tree_stack.trees = (Match_Tree*)malloc(sizeof(Match_Tree)*tree_stack.max);
|
tree_stack.trees = (Match_Tree*)malloc(sizeof(Match_Tree)*tree_stack.max);
|
||||||
|
|
||||||
unfinished_futures.max = 1024;
|
unfinished_futures.max = 255;
|
||||||
unfinished_futures.count = 0;
|
unfinished_futures.count = 0;
|
||||||
unfinished_futures.futures = (Future_FSM*)malloc(sizeof(Future_FSM)*unfinished_futures.max);
|
unfinished_futures.futures = (Future_FSM*)malloc(sizeof(Future_FSM)*unfinished_futures.max);
|
||||||
|
|
||||||
fsm = get_fsm(&fsm_stack);
|
fsm = get_fsm(&fsm_stack);
|
||||||
tree = get_tree(&tree_stack);
|
tree = get_tree(&tree_stack);
|
||||||
|
|
||||||
*fsm = fsm_init(200, fsm_stack.table_transition_state);
|
*fsm = fsm_init(200, fsm_stack.table_transition_state);
|
||||||
*tree = tree_init(200);
|
*tree = tree_init(200);
|
||||||
|
|
||||||
root_state = fsm_get_state(fsm);
|
root_state = fsm_get_state(fsm, RealTerminateBase);
|
||||||
root_node = match_get_node(tree);
|
root_node = match_get_node(tree);
|
||||||
match_init_node(root_node, ArrayCount(keyword_strings));
|
match_init_node(root_node, ArrayCount(keyword_strings));
|
||||||
for (i = 0; i < ArrayCount(keyword_strings); ++i){
|
for (i = 0; i < ArrayCount(keyword_strings); ++i){
|
||||||
root_node->words[i] = i;
|
root_node->words[i] = i;
|
||||||
}
|
}
|
||||||
|
|
||||||
root_node->count = ArrayCount(keyword_strings);
|
root_node->count = ArrayCount(keyword_strings);
|
||||||
root_node->state = root_state;
|
root_node->state = root_state;
|
||||||
root_node->index = -1;
|
root_node->index = -1;
|
||||||
|
|
||||||
|
push_future_fsm(&unfinished_futures, root_node);
|
||||||
process_match_node(keyword_strings, root_node, tree, fsm, &terminal_table, 2, &unfinished_futures);
|
process_match_node(keyword_strings, root_node, tree, fsm, &terminal_table, 2, &unfinished_futures);
|
||||||
|
|
||||||
while (unfinished_futures.count > 0){
|
for (i = 1; i < unfinished_futures.count; ++i){
|
||||||
future = pop_future_fsm(&unfinished_futures);
|
future = unfinished_futures.futures + i;
|
||||||
|
|
||||||
fsm = get_fsm(&fsm_stack);
|
fsm = get_fsm(&fsm_stack);
|
||||||
tree = get_tree(&tree_stack);
|
tree = get_tree(&tree_stack);
|
||||||
|
|
||||||
|
assert((int)(fsm - fsm_stack.fsms) == i);
|
||||||
|
|
||||||
*fsm = fsm_init(200, fsm_stack.table_transition_state);
|
*fsm = fsm_init(200, fsm_stack.table_transition_state);
|
||||||
*tree = tree_init(200);
|
*tree = tree_init(200);
|
||||||
|
|
||||||
root_state = fsm_get_state(fsm);
|
root_state = fsm_get_state(fsm, RealTerminateBase);
|
||||||
root_node = match_get_node(tree);
|
root_node = match_get_node(tree);
|
||||||
match_copy_init_node(root_node, future->source);
|
match_copy_init_node(root_node, future->source);
|
||||||
root_node->state = root_state;
|
root_node->state = root_state;
|
||||||
|
|
||||||
for (i = 0; i < root_node->count; ++i){
|
for (j = 0; j < root_node->count; ++j){
|
||||||
char space[1024];
|
char space[1024];
|
||||||
sprintf(space, "%s\n", keyword_strings[root_node->words[i]].str);
|
sprintf(space, "%s\n", keyword_strings[root_node->words[j]].str);
|
||||||
fsm_add_comment(fsm, space);
|
fsm_add_comment(fsm, space);
|
||||||
}
|
}
|
||||||
|
|
||||||
process_match_node(keyword_strings, root_node, tree, fsm, &terminal_table, 12, &unfinished_futures);
|
process_match_node(keyword_strings, root_node, tree, fsm, &terminal_table, 12, &unfinished_futures);
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(fsm_stack.count < 255);
|
assert(fsm_stack.count < 255);
|
||||||
fsm_stack.final_state = fsm_stack.table_transition_state + (unsigned char)fsm_stack.count;
|
fsm_stack.final_state = fsm_stack.table_transition_state + (unsigned char)fsm_stack.count;
|
||||||
|
|
||||||
return(fsm_stack);
|
return(fsm_stack);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -742,8 +823,8 @@ main_fsm(Lex_FSM fsm, unsigned char pp_state, unsigned char c){
|
||||||
fsm.emit_token = 1;
|
fsm.emit_token = 1;
|
||||||
}
|
}
|
||||||
else{
|
else{
|
||||||
fsm.state = LS_pound;
|
fsm.state = LS_pound;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
#define OperCase(op,type) case op: fsm.emit_token = 1; break;
|
#define OperCase(op,type) case op: fsm.emit_token = 1; break;
|
||||||
|
@ -1087,7 +1168,7 @@ struct FSM_Tables{
|
||||||
unsigned char *eq_class;
|
unsigned char *eq_class;
|
||||||
unsigned char *eq_class_rep;
|
unsigned char *eq_class_rep;
|
||||||
unsigned char *reduced_transition_table;
|
unsigned char *reduced_transition_table;
|
||||||
|
|
||||||
unsigned char eq_class_counter;
|
unsigned char eq_class_counter;
|
||||||
unsigned short state_count;
|
unsigned short state_count;
|
||||||
};
|
};
|
||||||
|
@ -1138,10 +1219,10 @@ do_table_reduction(FSM_Tables *table, unsigned short state_count){
|
||||||
|
|
||||||
FSM_Tables
|
FSM_Tables
|
||||||
generate_whitespace_skip_table(){
|
generate_whitespace_skip_table(){
|
||||||
unsigned char state_count = LSPP_count;
|
unsigned char state_count = LSPP_count;
|
||||||
FSM_Tables table;
|
FSM_Tables table;
|
||||||
allocate_full_tables(&table, state_count);
|
allocate_full_tables(&table, state_count);
|
||||||
|
|
||||||
int i = 0;
|
int i = 0;
|
||||||
Whitespace_FSM wfsm = {0};
|
Whitespace_FSM wfsm = {0};
|
||||||
Whitespace_FSM new_wfsm;
|
Whitespace_FSM new_wfsm;
|
||||||
|
@ -1153,18 +1234,18 @@ generate_whitespace_skip_table(){
|
||||||
table.full_transition_table[i++] = new_wfsm.pp_state + state_count*new_wfsm.white_done;
|
table.full_transition_table[i++] = new_wfsm.pp_state + state_count*new_wfsm.white_done;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
do_table_reduction(&table, state_count);
|
do_table_reduction(&table, state_count);
|
||||||
|
|
||||||
return(table);
|
return(table);
|
||||||
}
|
}
|
||||||
|
|
||||||
FSM_Tables
|
FSM_Tables
|
||||||
generate_int_table(){
|
generate_int_table(){
|
||||||
unsigned char state_count = LSINT_count;
|
unsigned char state_count = LSINT_count;
|
||||||
FSM_Tables table;
|
FSM_Tables table;
|
||||||
allocate_full_tables(&table, state_count);
|
allocate_full_tables(&table, state_count);
|
||||||
|
|
||||||
int i = 0;
|
int i = 0;
|
||||||
Lex_FSM fsm = {0};
|
Lex_FSM fsm = {0};
|
||||||
Lex_FSM new_fsm;
|
Lex_FSM new_fsm;
|
||||||
|
@ -1176,7 +1257,7 @@ generate_int_table(){
|
||||||
table.full_transition_table[i++] = new_fsm.int_state + state_count*new_fsm.emit_token;
|
table.full_transition_table[i++] = new_fsm.int_state + state_count*new_fsm.emit_token;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
do_table_reduction(&table, state_count);
|
do_table_reduction(&table, state_count);
|
||||||
|
|
||||||
return(table);
|
return(table);
|
||||||
|
@ -1187,7 +1268,7 @@ generate_fsm_table(unsigned char pp_state){
|
||||||
unsigned char state_count = LS_count;
|
unsigned char state_count = LS_count;
|
||||||
FSM_Tables table;
|
FSM_Tables table;
|
||||||
allocate_full_tables(&table, state_count);
|
allocate_full_tables(&table, state_count);
|
||||||
|
|
||||||
int i = 0;
|
int i = 0;
|
||||||
Lex_FSM fsm = {0};
|
Lex_FSM fsm = {0};
|
||||||
Lex_FSM new_fsm;
|
Lex_FSM new_fsm;
|
||||||
|
@ -1199,9 +1280,9 @@ generate_fsm_table(unsigned char pp_state){
|
||||||
table.full_transition_table[i++] = new_fsm.state + state_count*new_fsm.emit_token;
|
table.full_transition_table[i++] = new_fsm.state + state_count*new_fsm.emit_token;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
do_table_reduction(&table, state_count);
|
do_table_reduction(&table, state_count);
|
||||||
|
|
||||||
return(table);
|
return(table);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1255,18 +1336,21 @@ PP_Names pp_names[] = {
|
||||||
};
|
};
|
||||||
|
|
||||||
FSM_Tables
|
FSM_Tables
|
||||||
generate_table_from_abstract_fsm(FSM fsm){
|
generate_table_from_abstract_fsm(FSM fsm, unsigned char real_term_base){
|
||||||
unsigned char state_count = (unsigned char)fsm.count;
|
unsigned char state_count = (unsigned char)fsm.count;
|
||||||
FSM_Tables table;
|
FSM_Tables table;
|
||||||
|
|
||||||
allocate_full_tables(&table, state_count);
|
allocate_full_tables(&table, state_count);
|
||||||
|
|
||||||
int i = 0;
|
int i = 0;
|
||||||
unsigned char new_state;
|
unsigned int new_state;
|
||||||
for (unsigned short c = 0; c < 256; ++c){
|
for (unsigned short c = 0; c < 256; ++c){
|
||||||
for (unsigned char state = 0; state < state_count; ++state){
|
for (unsigned char state = 0; state < state_count; ++state){
|
||||||
new_state = fsm.states[state].transition_rule[c];
|
new_state = fsm.states[state].transition_rule[c];
|
||||||
table.full_transition_table[i++] = new_state;
|
if (new_state >= RealTerminateBase){
|
||||||
|
new_state = new_state - RealTerminateBase + real_term_base;
|
||||||
|
}
|
||||||
|
table.full_transition_table[i++] = (unsigned char)new_state;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1314,7 +1398,7 @@ main(){
|
||||||
end_table(file);
|
end_table(file);
|
||||||
|
|
||||||
FSM pp_directive_fsm = generate_pp_directive_fsm();
|
FSM pp_directive_fsm = generate_pp_directive_fsm();
|
||||||
FSM_Tables pp_directive_tables = generate_table_from_abstract_fsm(pp_directive_fsm);
|
FSM_Tables pp_directive_tables = generate_table_from_abstract_fsm(pp_directive_fsm, 0);
|
||||||
|
|
||||||
render_fsm_table(file, pp_directive_tables, "pp_directive");
|
render_fsm_table(file, pp_directive_tables, "pp_directive");
|
||||||
render_variable(file, "unsigned char", "LSDIR_default", 0);
|
render_variable(file, "unsigned char", "LSDIR_default", 0);
|
||||||
|
@ -1324,9 +1408,9 @@ main(){
|
||||||
FSM_Stack keyword_fsms = generate_keyword_fsms();
|
FSM_Stack keyword_fsms = generate_keyword_fsms();
|
||||||
|
|
||||||
char name[1024];
|
char name[1024];
|
||||||
render_variable(file, "unsigned char", "keywords_part_terminal_base", keyword_fsms.fsms[0].terminal_base);
|
|
||||||
for (int i = 0; i < keyword_fsms.count; ++i){
|
for (int i = 0; i < keyword_fsms.count; ++i){
|
||||||
FSM_Tables partial_keywords_table = generate_table_from_abstract_fsm(keyword_fsms.fsms[i]);
|
FSM_Tables partial_keywords_table =
|
||||||
|
generate_table_from_abstract_fsm(keyword_fsms.fsms[i], keyword_fsms.final_state);
|
||||||
if (keyword_fsms.fsms[i].comment){
|
if (keyword_fsms.fsms[i].comment){
|
||||||
render_comment(file, keyword_fsms.fsms[i].comment);
|
render_comment(file, keyword_fsms.fsms[i].comment);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue