work on new lexer

This commit is contained in:
Allen Webster 2016-06-01 19:52:06 -04:00
parent e1a03017e8
commit 5e56483ec0
8 changed files with 658 additions and 1568 deletions

View File

@ -65,6 +65,19 @@ NOTES ON USE:
#include "4cpp_lexer_types.h" #include "4cpp_lexer_types.h"
struct Cpp_Lex_Data{
Cpp_Preprocessor_State pp_state;
int pos;
int complete;
};
struct Cpp_Read_Result{
Cpp_Token token;
int pos;
char newline;
char has_result;
};
Cpp_File Cpp_File
data_as_cpp_file(Data data){ data_as_cpp_file(Data data){
Cpp_File result; Cpp_File result;
@ -140,7 +153,6 @@ FCPP_LINK bool cpp_push_token_no_merge(Cpp_Token_Stack *stack, Cpp_Token token);
FCPP_LINK bool cpp_push_token_nonalloc(Cpp_Token_Stack *stack, Cpp_Token token); FCPP_LINK bool cpp_push_token_nonalloc(Cpp_Token_Stack *stack, Cpp_Token token);
inline Cpp_Lex_Data cpp_lex_data_zero() { Cpp_Lex_Data data = {(Cpp_Preprocessor_State)0}; return(data); } inline Cpp_Lex_Data cpp_lex_data_zero() { Cpp_Lex_Data data = {(Cpp_Preprocessor_State)0}; return(data); }
inline Cpp_Token_Stack cpp_token_stack_zero() { Cpp_Token_Stack stack={0}; return(stack); }
FCPP_LINK Cpp_Read_Result cpp_lex_step(Cpp_File file, Cpp_Lex_Data *lex); FCPP_LINK Cpp_Read_Result cpp_lex_step(Cpp_File file, Cpp_Lex_Data *lex);

View File

@ -209,23 +209,15 @@ enum Cpp_Preprocessor_State{
CPP_LEX_PP_COUNT CPP_LEX_PP_COUNT
}; };
struct Cpp_Lex_Data{
Cpp_Preprocessor_State pp_state;
int pos;
int complete;
};
struct Cpp_Read_Result{
Cpp_Token token;
int pos;
char newline;
char has_result;
};
struct Cpp_Token_Stack{ struct Cpp_Token_Stack{
Cpp_Token *tokens; Cpp_Token *tokens;
int count, max_count; int count, max_count;
}; };
inline Cpp_Token_Stack
cpp_token_stack_zero(){
Cpp_Token_Stack stack={0};
return(stack);
}
struct Cpp_Token_Merge{ struct Cpp_Token_Merge{
Cpp_Token new_token; Cpp_Token new_token;

View File

@ -809,6 +809,40 @@ Job_Callback_Sig(job_full_lex){
tokens.max_count = memory->size / sizeof(Cpp_Token); tokens.max_count = memory->size / sizeof(Cpp_Token);
tokens.count = 0; tokens.count = 0;
#if 0
b32 still_lexing = 1;
Lex_Data lex = {0};
do{
i32 result =
cpp_lex_nonalloc(&lex, cpp_file.data, cpp_file.size, &tokens, 2048);
switch (result){
case LexNeedChunk: Assert(!"Invalid Path"); break;
case LexNeedTokenMemory:
if (system->check_cancel(thread)){
return;
}
system->grow_thread_memory(memory);
tokens.tokens = (Cpp_Token*)memory->data;
tokens.max_count = memory->size / sizeof(Cpp_Token);
break;
case LexHitTokenLimit:
if (system->check_cancel(thread)){
return;
}
break;
case LexFinished: still_lexing = 0; break;
}
} while (still_lexing);
#else
Cpp_Lex_Data status = {}; Cpp_Lex_Data status = {};
do{ do{
@ -836,6 +870,10 @@ Job_Callback_Sig(job_full_lex){
} }
} while(!status.complete); } while(!status.complete);
#endif
i32 new_max = LargeRoundUp(tokens.count+1, Kbytes(1)); i32 new_max = LargeRoundUp(tokens.count+1, Kbytes(1));
system->acquire_lock(FRAME_LOCK); system->acquire_lock(FRAME_LOCK);
@ -948,9 +986,9 @@ file_relex_parallel(System_Functions *system,
i32 shift_amount = relex_space.count - delete_amount; i32 shift_amount = relex_space.count - delete_amount;
if (shift_amount != 0){ if (shift_amount != 0){
int new_count = stack->count + shift_amount; i32 new_count = stack->count + shift_amount;
if (new_count > stack->max_count){ if (new_count > stack->max_count){
int new_max = LargeRoundUp(new_count, Kbytes(1)); i32 new_max = LargeRoundUp(new_count, Kbytes(1));
stack->tokens = (Cpp_Token*) stack->tokens = (Cpp_Token*)
general_memory_reallocate(general, stack->tokens, general_memory_reallocate(general, stack->tokens,
stack->count*sizeof(Cpp_Token), stack->count*sizeof(Cpp_Token),
@ -958,7 +996,7 @@ file_relex_parallel(System_Functions *system,
stack->max_count = new_max; stack->max_count = new_max;
} }
int shift_size = stack->count - relex_end; i32 shift_size = stack->count - relex_end;
if (shift_size > 0){ if (shift_size > 0){
Cpp_Token *old_base = stack->tokens + relex_end; Cpp_Token *old_base = stack->tokens + relex_end;
memmove(old_base + shift_amount, old_base, memmove(old_base + shift_amount, old_base,
@ -3238,7 +3276,6 @@ try_kill_file(System_Functions *system, Models *models,
} }
else{ else{
kill_file(system, models, file, string_zero()); kill_file(system, models, file, string_zero());
view_show_file(view);
} }
} }
} }
@ -3286,6 +3323,7 @@ interactive_view_complete(System_Functions *system, View *view, String dest, i32
case IAct_Kill: case IAct_Kill:
try_kill_file(system, models, 0, 0, dest); try_kill_file(system, models, 0, 0, dest);
view_show_file(view);
break; break;
case IAct_Sure_To_Close: case IAct_Sure_To_Close:

View File

@ -99,6 +99,9 @@
; [X] feedback messages ; [X] feedback messages
; [X] feedback message API ; [X] feedback message API
; [X] kill rect ; [X] kill rect
; [X] add high DPI support
;
; [] OS font rendering
; ;
; [] file status in custom API ; [] file status in custom API
; [] user file bar string ; [] user file bar string

File diff suppressed because it is too large Load Diff

View File

@ -4,6 +4,7 @@
#ifndef FCPP_NEW_LEXER_INC #ifndef FCPP_NEW_LEXER_INC
#define FCPP_NEW_LEXER_INC #define FCPP_NEW_LEXER_INC
#include "..\4cpp_lexer_types.h"
#include "4cpp_lexer_fsms.h" #include "4cpp_lexer_fsms.h"
#include "4cpp_lexer_tables.c" #include "4cpp_lexer_tables.c"
@ -286,22 +287,35 @@ cpp_attempt_token_merge(Cpp_Token prev_token, Cpp_Token next_token){
return result; return result;
} }
lexer_link void lexer_link int
cpp_push_token_nonalloc(Cpp_Token *out_tokens, int *token_i, Cpp_Token token){ cpp_place_token_nonalloc(Cpp_Token *out_tokens, int token_i, Cpp_Token token){
Cpp_Token_Merge merge = {(Cpp_Token_Type)0}; Cpp_Token_Merge merge = {(Cpp_Token_Type)0};
Cpp_Token prev_token = {(Cpp_Token_Type)0}; Cpp_Token prev_token = {(Cpp_Token_Type)0};
if (*token_i > 0){ if (token_i > 0){
prev_token = out_tokens[*token_i - 1]; prev_token = out_tokens[token_i - 1];
merge = new_lex::cpp_attempt_token_merge(prev_token, token); merge = new_lex::cpp_attempt_token_merge(prev_token, token);
if (merge.did_merge){ if (merge.did_merge){
out_tokens[*token_i - 1] = merge.new_token; out_tokens[token_i - 1] = merge.new_token;
} }
} }
if (!merge.did_merge){ if (!merge.did_merge){
out_tokens[(*token_i)++] = token; out_tokens[token_i++] = token;
} }
return(token_i);
}
lexer_link bool
cpp_push_token_nonalloc(Cpp_Token_Stack *out_tokens, Cpp_Token token){
bool result = 0;
if (out_tokens->count == out_tokens->max_count){
out_tokens->count =
cpp_place_token_nonalloc(out_tokens->tokens, out_tokens->count, token);
result = 1;
}
return(result);
} }
struct Lex_Data{ struct Lex_Data{
@ -311,15 +325,13 @@ struct Lex_Data{
int pos; int pos;
int pos_overide; int pos_overide;
int chunk_pos;
Lex_FSM fsm; Lex_FSM fsm;
Whitespace_FSM wfsm; Whitespace_FSM wfsm;
unsigned char pp_state; unsigned char pp_state;
unsigned char completed; unsigned char completed;
unsigned short *key_eq_classes;
unsigned char *key_table;
Cpp_Token token; Cpp_Token token;
int __pc__; int __pc__;
@ -335,6 +347,13 @@ struct Lex_Data{
token_stack_out->count = token_i;\ token_stack_out->count = token_i;\
*S_ptr = S; S_ptr->__pc__ = -1; return(n); } *S_ptr = S; S_ptr->__pc__ = -1; return(n); }
enum Lex_Result{
LexFinished,
LexNeedChunk,
LexNeedTokenMemory,
LexHitTokenLimit
};
lexer_link int lexer_link int
cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_stack_out){ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_stack_out){
Lex_Data S = *S_ptr; Lex_Data S = *S_ptr;
@ -347,8 +366,8 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
char c = 0; char c = 0;
int end_pos = size + S.pos; int end_pos = size + S.chunk_pos;
chunk -= S.pos; chunk -= S.chunk_pos;
switch (S.__pc__){ switch (S.__pc__){
DrCase(1); DrCase(1);
@ -357,7 +376,6 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
DrCase(4); DrCase(4);
DrCase(5); DrCase(5);
DrCase(6); DrCase(6);
DrCase(7);
} }
for (;;){ for (;;){
@ -372,7 +390,8 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
S.wfsm.white_done = (S.wfsm.pp_state >= LSPP_count); S.wfsm.white_done = (S.wfsm.pp_state >= LSPP_count);
if (S.wfsm.white_done == 0){ if (S.wfsm.white_done == 0){
DrYield(4, 1); S.chunk_pos += size;
DrYield(4, LexNeedChunk);
} }
else break; else break;
} }
@ -400,7 +419,8 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
S.fsm.emit_token = (S.fsm.state >= LS_count); S.fsm.emit_token = (S.fsm.state >= LS_count);
if (S.fsm.emit_token == 0){ if (S.fsm.emit_token == 0){
DrYield(3, 1); S.chunk_pos += size;
DrYield(3, LexNeedChunk);
} }
else break; else break;
} }
@ -463,7 +483,8 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
} }
if (S.wfsm.white_done == 0){ if (S.wfsm.white_done == 0){
DrYield(1, 1); S.chunk_pos += size;
DrYield(1, LexNeedChunk);
} }
else break; else break;
} }
@ -485,42 +506,6 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
case LS_identifier: case LS_identifier:
{ {
S.fsm.state = 0;
S.fsm.emit_token = 0;
S.fsm.sub_machine = 0;
--S.pos;
for (;;){
// TODO(allen): Need to drop down to the instructions to optimize
// this correctly I think. This looks like it will have more branches
// than it needs unless I am very careful.
for (; S.fsm.state < LSKEY_totally_finished && S.pos < end_pos;){
// TODO(allen): Rebase these super tables so that we don't have
// to do a subtract on the state.
S.key_table = key_tables[S.fsm.sub_machine];
S.key_eq_classes = key_eq_class_tables[S.fsm.sub_machine];
for (; S.fsm.state < LSKEY_table_transition && S.pos < end_pos;){
c = chunk[S.pos++];
S.fsm.state = S.key_table[S.fsm.state + S.key_eq_classes[c]];
}
if (S.fsm.state >= LSKEY_table_transition && S.fsm.state < LSKEY_totally_finished){
S.fsm.sub_machine = S.fsm.state - LSKEY_table_transition;
S.fsm.state = 0;
}
}
S.fsm.emit_token = (S.fsm.int_state >= LSKEY_totally_finished);
if (S.fsm.emit_token == 0){
DrYield(7, 1);
}
else break;
}
--S.pos;
// TODO(allen): do stuff regarding the actual type of the token
S.token.type = CPP_TOKEN_INTEGER_CONSTANT;
S.token.flags = 0;
#if 0
--S.pos; --S.pos;
int word_size = S.pos - S.token_start; int word_size = S.pos - S.token_start;
@ -553,8 +538,6 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
S.token.flags = 0; S.token.flags = 0;
} }
} }
#endif
}break; }break;
case LS_pound: case LS_pound:
@ -580,7 +563,8 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
S.fsm.emit_token = (S.fsm.int_state >= LSDIR_count); S.fsm.emit_token = (S.fsm.int_state >= LSDIR_count);
if (S.fsm.emit_token == 0){ if (S.fsm.emit_token == 0){
DrYield(6, 1); S.chunk_pos += size;
DrYield(6, LexNeedChunk);
} }
else break; else break;
} }
@ -611,7 +595,8 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
S.fsm.emit_token = (S.fsm.int_state >= LSINT_count); S.fsm.emit_token = (S.fsm.int_state >= LSINT_count);
if (S.fsm.emit_token == 0){ if (S.fsm.emit_token == 0){
DrYield(5, 1); S.chunk_pos += size;
DrYield(5, LexNeedChunk);
} }
else break; else break;
} }
@ -944,9 +929,9 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
} }
S.token.state_flags = S.pp_state; S.token.state_flags = S.pp_state;
cpp_push_token_nonalloc(out_tokens, &token_i, S.token); token_i = cpp_place_token_nonalloc(out_tokens, token_i, S.token);
if (token_i == max_token_i){ if (token_i == max_token_i){
DrYield(2, 2); DrYield(2, LexNeedTokenMemory);
} }
} }
} }
@ -957,13 +942,199 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
} }
} }
DrReturn(0); DrReturn(LexFinished);
} }
#undef DrYield #undef DrYield
#undef DrReturn #undef DrReturn
#undef DrCase #undef DrCase
lexer_link int
cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size,
Cpp_Token_Stack *token_stack_out, int max_tokens){
Cpp_Token_Stack temp_stack = *token_stack_out;
if (temp_stack.max_count > temp_stack.count + max_tokens){
temp_stack.max_count = temp_stack.count + max_tokens;
}
int result = cpp_lex_nonalloc(S_ptr, chunk, size, &temp_stack);
token_stack_out->count = temp_stack.count;
if (result == LexNeedTokenMemory){
if (token_stack_out->count < token_stack_out->max_count){
result = LexHitTokenLimit;
}
}
return(result);
}
lexer_link int
cpp_lex_size_nonalloc(Lex_Data *S_ptr, char *chunk, int size, int full_size,
Cpp_Token_Stack *token_stack_out){
int result = 0;
if (S_ptr->pos >= full_size){
char end_null = 0;
result = cpp_lex_nonalloc(S_ptr, &end_null, 1, token_stack_out);
}
else{
result = cpp_lex_nonalloc(S_ptr, chunk, size, token_stack_out);
if (result == LexNeedChunk){
if (S_ptr->pos >= full_size){
char end_null = 0;
result = cpp_lex_nonalloc(S_ptr, &end_null, 1, token_stack_out);
}
}
}
return(result);
}
lexer_link int
cpp_lex_size_nonalloc(Lex_Data *S_ptr, char *chunk, int size, int full_size,
Cpp_Token_Stack *token_stack_out, int max_tokens){
Cpp_Token_Stack temp_stack = *token_stack_out;
if (temp_stack.max_count > temp_stack.count + max_tokens){
temp_stack.max_count = temp_stack.count + max_tokens;
}
int result = cpp_lex_size_nonalloc(S_ptr, chunk, size, full_size,
&temp_stack);
token_stack_out->count = temp_stack.count;
if (result == LexNeedTokenMemory){
if (token_stack_out->count < token_stack_out->max_count){
result = LexHitTokenLimit;
}
}
return(result);
}
#if 0
lexer_link Cpp_Relex_State
cpp_relex_nonalloc_start(Cpp_File file, Cpp_Token_Stack *stack,
int start, int end, int amount, int tolerance){
Cpp_Relex_State state;
state.file = file;
state.stack = stack;
state.start = start;
state.end = end;
state.amount = amount;
state.tolerance = tolerance;
Cpp_Get_Token_Result result = new_lex::cpp_get_token(stack, start);
if (result.token_index <= 0){
state.start_token_i = 0;
}
else{
state.start_token_i = result.token_index-1;
}
result = new_lex::cpp_get_token(stack, end);
if (result.token_index < 0) result.token_index = 0;
else if (end > stack->tokens[result.token_index].start) ++result.token_index;
state.end_token_i = result.token_index;
state.relex_start = stack->tokens[state.start_token_i].start;
if (start < state.relex_start) state.relex_start = start;
state.space_request = state.end_token_i - state.start_token_i + tolerance + 1;
return(state);
}
// TODO(allen): Eliminate this once we actually store the EOF token
// in the token stack.
inline Cpp_Token
cpp__get_token(Cpp_Token_Stack *stack, Cpp_Token *tokens, int size, int index){
Cpp_Token result;
if (index < stack->count){
result = tokens[index];
}
else{
result.start = size;
result.size = 0;
result.type = CPP_TOKEN_EOF;
result.flags = 0;
result.state_flags = 0;
}
return result;
}
FCPP_LINK bool
cpp_relex_nonalloc_main(Cpp_Relex_State *state, Cpp_Token_Stack *relex_stack, int *relex_end){
Cpp_Token_Stack *stack = state->stack;
Cpp_Token *tokens = stack->tokens;
new_lex::cpp_shift_token_starts(stack, state->end_token_i, state->amount);
Lex_Data lex = {};
lex.pp_state = cpp_token_get_pp_state(tokens[state->start_token_i].state_flags);
lex.pos = state->relex_start;
int relex_end_i = state->end_token_i;
Cpp_Token match_token = cpp__get_token(stack, tokens, state->file.size, relex_end_i);
Cpp_Token end_token = match_token;
bool went_too_far = 0;
for (;;){
Cpp_Read_Result read = cpp_lex_step(state->file, &lex);
if (read.has_result){
if (read.token.start == end_token.start &&
read.token.size == end_token.size &&
read.token.flags == end_token.flags &&
read.token.state_flags == end_token.state_flags){
break;
}
cpp_push_token_nonalloc(relex_stack, read.token);
while (lex.pos > end_token.start && relex_end_i < stack->count){
++relex_end_i;
end_token = cpp__get_token(stack, tokens, state->file.size, relex_end_i);
}
if (relex_stack->count == relex_stack->max_count){
went_too_far = 1;
break;
}
}
if (lex.pos >= state->file.size) break;
}
if (!went_too_far){
if (relex_stack->count > 0){
if (state->start_token_i > 0){
Cpp_Token_Merge merge =
cpp_attempt_token_merge(tokens[state->start_token_i - 1],
relex_stack->tokens[0]);
if (merge.did_merge){
--state->start_token_i;
relex_stack->tokens[0] = merge.new_token;
}
}
if (relex_end_i < state->stack->count){
Cpp_Token_Merge merge =
cpp_attempt_token_merge(relex_stack->tokens[relex_stack->count-1],
tokens[relex_end_i]);
if (merge.did_merge){
++relex_end_i;
relex_stack->tokens[relex_stack->count-1] = merge.new_token;
}
}
}
*relex_end = relex_end_i;
}
else{
cpp_shift_token_starts(stack, state->end_token_i, -state->amount);
}
return went_too_far;
}
#endif
#endif #endif
// BOTTOM // BOTTOM

View File

@ -7,6 +7,9 @@
* *
*/ */
// TODO(allen): In what corner cases, such as invalid files
// does the new lexer suffer???
// TOP // TOP
#include "../4ed_meta.h" #include "../4ed_meta.h"
@ -204,7 +207,8 @@ end_t(Times *t){
} }
static void static void
run_experiment(Experiment *exp, char *filename, int verbose, int chunks){ run_experiment(Experiment *exp, char *filename, int verbose,
int chunks, int max_tokens){
String extension = {}; String extension = {};
Data file_data; Data file_data;
Cpp_File file_cpp; Cpp_File file_cpp;
@ -224,8 +228,8 @@ run_experiment(Experiment *exp, char *filename, int verbose, int chunks){
exp->correct_stack.count = 0; exp->correct_stack.count = 0;
exp->testing_stack.count = 0; exp->testing_stack.count = 0;
memset(exp->correct_stack.tokens, TOKEN_ARRAY_SIZE, 0); memset(exp->correct_stack.tokens, 0, TOKEN_ARRAY_SIZE);
memset(exp->testing_stack.tokens, TOKEN_ARRAY_SIZE, 0); memset(exp->testing_stack.tokens, 0, TOKEN_ARRAY_SIZE);
file_cpp.data = (char*)file_data.data; file_cpp.data = (char*)file_data.data;
file_cpp.size = file_data.size; file_cpp.size = file_data.size;
@ -239,8 +243,9 @@ run_experiment(Experiment *exp, char *filename, int verbose, int chunks){
cpp_lex_file_nonalloc(file_cpp, &exp->correct_stack, lex_data); cpp_lex_file_nonalloc(file_cpp, &exp->correct_stack, lex_data);
time.handcoded += (__rdtsc() - start); time.handcoded += (__rdtsc() - start);
start = __rdtsc(); if (max_tokens == 0){
if (chunks){ if (chunks){
start = __rdtsc();
int relevant_size = file_data.size + 1; int relevant_size = file_data.size + 1;
is_last = 0; is_last = 0;
for (k = 0; k < relevant_size; k += chunks){ for (k = 0; k < relevant_size; k += chunks){
@ -250,15 +255,75 @@ run_experiment(Experiment *exp, char *filename, int verbose, int chunks){
is_last = 1; is_last = 1;
} }
int result = new_lex::cpp_lex_nonalloc(&ld, (char*)file_data.data + k, chunk_size, &exp->testing_stack); int result =
if (result == 0 || result == 2) break; new_lex::cpp_lex_nonalloc(&ld,
(char*)file_data.data + k, chunk_size,
&exp->testing_stack);
if (result == new_lex::LexFinished ||
result == new_lex::LexNeedTokenMemory) break;
}
time.fsm += (__rdtsc() - start);
}
else{
start = __rdtsc();
new_lex::cpp_lex_nonalloc(&ld,
(char*)file_data.data, file_data.size,
&exp->testing_stack);
time.fsm += (__rdtsc() - start);
} }
} }
else{ else{
new_lex::cpp_lex_nonalloc(&ld, (char*)file_data.data, file_data.size, &exp->testing_stack); if (chunks){
start = __rdtsc();
int relevant_size = file_data.size + 1;
is_last = 0;
for (k = 0; k < relevant_size; k += chunks){
chunk_size = chunks;
if (chunk_size + k >= relevant_size){
chunk_size = relevant_size - k;
is_last = 1;
}
int result = 0;
int still_lexing = 1;
do{
result =
new_lex::cpp_lex_size_nonalloc(&ld,
(char*)file_data.data + k, chunk_size, file_data.size,
&exp->testing_stack,
max_tokens);
if (result == new_lex::LexFinished ||
result == new_lex::LexNeedTokenMemory ||
result == new_lex::LexNeedChunk){
still_lexing = 0;
}
} while(still_lexing);
if (result == new_lex::LexFinished ||
result == new_lex::LexNeedTokenMemory) break;
} }
time.fsm += (__rdtsc() - start); time.fsm += (__rdtsc() - start);
} }
else{
start = __rdtsc();
int still_lexing = 1;
do{
int result =
new_lex::cpp_lex_size_nonalloc(&ld,
(char*)file_data.data, file_data.size, file_data.size,
&exp->testing_stack,
max_tokens);
if (result == new_lex::LexFinished ||
result == new_lex::LexNeedTokenMemory){
still_lexing = 0;
}
} while(still_lexing);
time.fsm += (__rdtsc() - start);
}
}
}
free(ld.tb); free(ld.tb);
@ -338,11 +403,12 @@ show_time(Times t, int repeats, char *type){
int main(){ int main(){
int repeats = 1; int repeats = 1;
int verbose_level = 1; int verbose_level = 0;
int chunk_start = 0; int chunk_start = 32;
int chunk_end = 0; int chunk_end = 64;
#define TEST_FILE "parser_test1.cpp" #define TEST_FILE "parser_test1.cpp"
#define SINGLE_ITEM 1 #define SINGLE_ITEM 0
int token_limit = 2;
int chunks = (chunk_start > 0 && chunk_start <= chunk_end); int chunks = (chunk_start > 0 && chunk_start <= chunk_end);
int c = 0; int c = 0;
@ -371,14 +437,14 @@ int main(){
begin_t(&chunk_exp_t); begin_t(&chunk_exp_t);
printf("With chunks of %d\n", chunks); printf("With chunks of %d\n", chunks);
for (c = chunk_start; c <= chunk_end; ++c){ for (c = chunk_start; c <= chunk_end; ++c){
run_experiment(&chunk_exp, BASE_DIR TEST_FILE, 1, c); run_experiment(&chunk_exp, BASE_DIR TEST_FILE, 1, c, token_limit);
} }
end_t(&chunk_exp_t); end_t(&chunk_exp_t);
} }
begin_t(&exp_t); begin_t(&exp_t);
printf("Unchunked\n"); printf("Unchunked\n");
run_experiment(&exp, BASE_DIR TEST_FILE, 1, 0); run_experiment(&exp, BASE_DIR TEST_FILE, 1, 0, token_limit);
end_t(&exp_t); end_t(&exp_t);
#else #else
@ -391,7 +457,7 @@ int main(){
if (chunks){ if (chunks){
begin_t(&chunk_exp_t); begin_t(&chunk_exp_t);
for (c = chunk_start; c <= chunk_end; ++c){ for (c = chunk_start; c <= chunk_end; ++c){
run_experiment(&chunk_exp, all_files.infos[i].filename.str, verbose_level, c); run_experiment(&chunk_exp, all_files.infos[i].filename.str, verbose_level, c, token_limit);
} }
end_t(&chunk_exp_t); end_t(&chunk_exp_t);
} }
@ -399,11 +465,11 @@ int main(){
begin_t(&exp_t); begin_t(&exp_t);
if (verbose_level == -1 && chunks){ if (verbose_level == -1 && chunks){
for (c = chunk_start; c <= chunk_end; ++c){ for (c = chunk_start; c <= chunk_end; ++c){
run_experiment(&exp, all_files.infos[i].filename.str, verbose_level, 0); run_experiment(&exp, all_files.infos[i].filename.str, verbose_level, 0, token_limit);
} }
} }
else{ else{
run_experiment(&exp, all_files.infos[i].filename.str, verbose_level, 0); run_experiment(&exp, all_files.infos[i].filename.str, verbose_level, 0, token_limit);
} }
end_t(&exp_t); end_t(&exp_t);
} }

View File

@ -576,97 +576,6 @@ process_match_node(String_And_Flag *input, Match_Node *node, Match_Tree *tree, F
} }
} }
FSM_Stack
generate_keyword_fsms(){
Terminal_Lookup_Table terminal_table;
Cpp_Token_Type type;
Future_FSM_Stack unfinished_futures;
Match_Tree_Stack tree_stack;
FSM_Stack fsm_stack;
Match_Tree *tree;
FSM *fsm;
Future_FSM *future;
Match_Node *root_node;
FSM_State *root_state;
int i, j;
memset(terminal_table.type_to_state, 0, sizeof(terminal_table.type_to_state));
memset(terminal_table.state_to_type, 0, sizeof(terminal_table.state_to_type));
for (i = 0; i < ArrayCount(keyword_strings); ++i){
type = (Cpp_Token_Type)keyword_strings[i].flags;
if (terminal_table.type_to_state[type] == 0){
terminal_table.type_to_state[type] = terminal_table.state_count;
terminal_table.state_to_type[terminal_table.state_count] = type;
++terminal_table.state_count;
}
}
fsm_stack.max = 255;
fsm_stack.count = 0;
fsm_stack.fsms = (FSM*)malloc(sizeof(FSM)*fsm_stack.max);
fsm_stack.table_transition_state = 26;
tree_stack.max = 255;
tree_stack.count = 0;
tree_stack.trees = (Match_Tree*)malloc(sizeof(Match_Tree)*tree_stack.max);
unfinished_futures.max = 255;
unfinished_futures.count = 0;
unfinished_futures.futures = (Future_FSM*)malloc(sizeof(Future_FSM)*unfinished_futures.max);
fsm = get_fsm(&fsm_stack);
tree = get_tree(&tree_stack);
*fsm = fsm_init(200, fsm_stack.table_transition_state);
*tree = tree_init(200);
root_state = fsm_get_state(fsm, RealTerminateBase);
root_node = match_get_node(tree);
match_init_node(root_node, ArrayCount(keyword_strings));
for (i = 0; i < ArrayCount(keyword_strings); ++i){
root_node->words[i] = i;
}
root_node->count = ArrayCount(keyword_strings);
root_node->state = root_state;
root_node->index = -1;
push_future_fsm(&unfinished_futures, root_node);
process_match_node(keyword_strings, root_node, tree, fsm, &terminal_table, 2, &unfinished_futures);
for (i = 1; i < unfinished_futures.count; ++i){
future = unfinished_futures.futures + i;
fsm = get_fsm(&fsm_stack);
tree = get_tree(&tree_stack);
assert((int)(fsm - fsm_stack.fsms) == i);
*fsm = fsm_init(200, fsm_stack.table_transition_state);
*tree = tree_init(200);
root_state = fsm_get_state(fsm, RealTerminateBase);
root_node = match_get_node(tree);
match_copy_init_node(root_node, future->source);
root_node->state = root_state;
for (j = 0; j < root_node->count; ++j){
char space[1024];
sprintf(space, "%s\n", keyword_strings[root_node->words[j]].str);
fsm_add_comment(fsm, space);
}
process_match_node(keyword_strings, root_node, tree, fsm, &terminal_table, 12, &unfinished_futures);
}
assert(fsm_stack.count < 255);
fsm_stack.final_state = fsm_stack.table_transition_state + (unsigned char)fsm_stack.count;
return(fsm_stack);
}
Whitespace_FSM Whitespace_FSM
whitespace_skip_fsm(Whitespace_FSM wfsm, char c){ whitespace_skip_fsm(Whitespace_FSM wfsm, char c){
if (wfsm.pp_state != LSPP_default){ if (wfsm.pp_state != LSPP_default){
@ -781,7 +690,6 @@ main_fsm(Lex_FSM fsm, unsigned char pp_state, unsigned char c){
case LS_default: case LS_default:
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'){ if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'){
fsm.state = LS_identifier; fsm.state = LS_identifier;
fsm.emit_token = 1;
} }
else if (c >= '1' && c <= '9'){ else if (c >= '1' && c <= '9'){
fsm.state = LS_number; fsm.state = LS_number;
@ -849,13 +757,11 @@ main_fsm(Lex_FSM fsm, unsigned char pp_state, unsigned char c){
} }
break; break;
#if 0
case LS_identifier: case LS_identifier:
if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_')){ if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_')){
fsm.emit_token = 1; fsm.emit_token = 1;
} }
break; break;
#endif
case LS_pound: case LS_pound:
switch (c){ switch (c){
@ -1405,39 +1311,6 @@ main(){
render_variable(file, "unsigned char", "LSDIR_count", pp_directive_fsm.count); render_variable(file, "unsigned char", "LSDIR_count", pp_directive_fsm.count);
render_variable(file, "unsigned char", "pp_directive_terminal_base", pp_directive_fsm.terminal_base); render_variable(file, "unsigned char", "pp_directive_terminal_base", pp_directive_fsm.terminal_base);
FSM_Stack keyword_fsms = generate_keyword_fsms();
char name[1024];
for (int i = 0; i < keyword_fsms.count; ++i){
FSM_Tables partial_keywords_table =
generate_table_from_abstract_fsm(keyword_fsms.fsms[i], keyword_fsms.final_state);
if (keyword_fsms.fsms[i].comment){
render_comment(file, keyword_fsms.fsms[i].comment);
}
sprintf(name, "keyword_part_%d_table", i);
render_fsm_table(file, partial_keywords_table, name);
}
begin_ptr_table(file, "short", "key_eq_class_tables");
for (int i = 0; i < keyword_fsms.count; ++i){
sprintf(name, "keyword_part_%d_table_eq_classes", i);
do_table_item_direct(file, name, "");
end_row(file);
}
end_table(file);
begin_ptr_table(file, "char", "key_tables");
for (int i = 0; i < keyword_fsms.count; ++i){
sprintf(name, "keyword_part_%d_table_table", i);
do_table_item_direct(file, name, "");
end_row(file);
}
end_table(file);
fprintf(file, "#define LSKEY_table_transition %d\n", (int)(keyword_fsms.table_transition_state));
fprintf(file, "#define LSKEY_totally_finished %d\n", (int)(keyword_fsms.final_state));
fclose(file); fclose(file);
return(0); return(0);
} }