work on new lexer
This commit is contained in:
parent
e1a03017e8
commit
5e56483ec0
14
4cpp_lexer.h
14
4cpp_lexer.h
|
@ -65,6 +65,19 @@ NOTES ON USE:
|
|||
|
||||
#include "4cpp_lexer_types.h"
|
||||
|
||||
struct Cpp_Lex_Data{
|
||||
Cpp_Preprocessor_State pp_state;
|
||||
int pos;
|
||||
int complete;
|
||||
};
|
||||
|
||||
struct Cpp_Read_Result{
|
||||
Cpp_Token token;
|
||||
int pos;
|
||||
char newline;
|
||||
char has_result;
|
||||
};
|
||||
|
||||
Cpp_File
|
||||
data_as_cpp_file(Data data){
|
||||
Cpp_File result;
|
||||
|
@ -140,7 +153,6 @@ FCPP_LINK bool cpp_push_token_no_merge(Cpp_Token_Stack *stack, Cpp_Token token);
|
|||
FCPP_LINK bool cpp_push_token_nonalloc(Cpp_Token_Stack *stack, Cpp_Token token);
|
||||
|
||||
inline Cpp_Lex_Data cpp_lex_data_zero() { Cpp_Lex_Data data = {(Cpp_Preprocessor_State)0}; return(data); }
|
||||
inline Cpp_Token_Stack cpp_token_stack_zero() { Cpp_Token_Stack stack={0}; return(stack); }
|
||||
|
||||
FCPP_LINK Cpp_Read_Result cpp_lex_step(Cpp_File file, Cpp_Lex_Data *lex);
|
||||
|
||||
|
|
|
@ -209,23 +209,15 @@ enum Cpp_Preprocessor_State{
|
|||
CPP_LEX_PP_COUNT
|
||||
};
|
||||
|
||||
struct Cpp_Lex_Data{
|
||||
Cpp_Preprocessor_State pp_state;
|
||||
int pos;
|
||||
int complete;
|
||||
};
|
||||
|
||||
struct Cpp_Read_Result{
|
||||
Cpp_Token token;
|
||||
int pos;
|
||||
char newline;
|
||||
char has_result;
|
||||
};
|
||||
|
||||
struct Cpp_Token_Stack{
|
||||
Cpp_Token *tokens;
|
||||
int count, max_count;
|
||||
};
|
||||
inline Cpp_Token_Stack
|
||||
cpp_token_stack_zero(){
|
||||
Cpp_Token_Stack stack={0};
|
||||
return(stack);
|
||||
}
|
||||
|
||||
struct Cpp_Token_Merge{
|
||||
Cpp_Token new_token;
|
||||
|
|
|
@ -809,6 +809,40 @@ Job_Callback_Sig(job_full_lex){
|
|||
tokens.max_count = memory->size / sizeof(Cpp_Token);
|
||||
tokens.count = 0;
|
||||
|
||||
#if 0
|
||||
|
||||
b32 still_lexing = 1;
|
||||
|
||||
Lex_Data lex = {0};
|
||||
|
||||
do{
|
||||
i32 result =
|
||||
cpp_lex_nonalloc(&lex, cpp_file.data, cpp_file.size, &tokens, 2048);
|
||||
|
||||
switch (result){
|
||||
case LexNeedChunk: Assert(!"Invalid Path"); break;
|
||||
|
||||
case LexNeedTokenMemory:
|
||||
if (system->check_cancel(thread)){
|
||||
return;
|
||||
}
|
||||
system->grow_thread_memory(memory);
|
||||
tokens.tokens = (Cpp_Token*)memory->data;
|
||||
tokens.max_count = memory->size / sizeof(Cpp_Token);
|
||||
break;
|
||||
|
||||
case LexHitTokenLimit:
|
||||
if (system->check_cancel(thread)){
|
||||
return;
|
||||
}
|
||||
break;
|
||||
|
||||
case LexFinished: still_lexing = 0; break;
|
||||
}
|
||||
} while (still_lexing);
|
||||
|
||||
#else
|
||||
|
||||
Cpp_Lex_Data status = {};
|
||||
|
||||
do{
|
||||
|
@ -836,6 +870,10 @@ Job_Callback_Sig(job_full_lex){
|
|||
}
|
||||
} while(!status.complete);
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
i32 new_max = LargeRoundUp(tokens.count+1, Kbytes(1));
|
||||
|
||||
system->acquire_lock(FRAME_LOCK);
|
||||
|
@ -948,9 +986,9 @@ file_relex_parallel(System_Functions *system,
|
|||
i32 shift_amount = relex_space.count - delete_amount;
|
||||
|
||||
if (shift_amount != 0){
|
||||
int new_count = stack->count + shift_amount;
|
||||
i32 new_count = stack->count + shift_amount;
|
||||
if (new_count > stack->max_count){
|
||||
int new_max = LargeRoundUp(new_count, Kbytes(1));
|
||||
i32 new_max = LargeRoundUp(new_count, Kbytes(1));
|
||||
stack->tokens = (Cpp_Token*)
|
||||
general_memory_reallocate(general, stack->tokens,
|
||||
stack->count*sizeof(Cpp_Token),
|
||||
|
@ -958,7 +996,7 @@ file_relex_parallel(System_Functions *system,
|
|||
stack->max_count = new_max;
|
||||
}
|
||||
|
||||
int shift_size = stack->count - relex_end;
|
||||
i32 shift_size = stack->count - relex_end;
|
||||
if (shift_size > 0){
|
||||
Cpp_Token *old_base = stack->tokens + relex_end;
|
||||
memmove(old_base + shift_amount, old_base,
|
||||
|
@ -3238,7 +3276,6 @@ try_kill_file(System_Functions *system, Models *models,
|
|||
}
|
||||
else{
|
||||
kill_file(system, models, file, string_zero());
|
||||
view_show_file(view);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -3286,6 +3323,7 @@ interactive_view_complete(System_Functions *system, View *view, String dest, i32
|
|||
|
||||
case IAct_Kill:
|
||||
try_kill_file(system, models, 0, 0, dest);
|
||||
view_show_file(view);
|
||||
break;
|
||||
|
||||
case IAct_Sure_To_Close:
|
||||
|
|
3
TODO.txt
3
TODO.txt
|
@ -99,6 +99,9 @@
|
|||
; [X] feedback messages
|
||||
; [X] feedback message API
|
||||
; [X] kill rect
|
||||
; [X] add high DPI support
|
||||
;
|
||||
; [] OS font rendering
|
||||
;
|
||||
; [] file status in custom API
|
||||
; [] user file bar string
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -4,6 +4,7 @@
|
|||
#ifndef FCPP_NEW_LEXER_INC
|
||||
#define FCPP_NEW_LEXER_INC
|
||||
|
||||
#include "..\4cpp_lexer_types.h"
|
||||
#include "4cpp_lexer_fsms.h"
|
||||
#include "4cpp_lexer_tables.c"
|
||||
|
||||
|
@ -286,22 +287,35 @@ cpp_attempt_token_merge(Cpp_Token prev_token, Cpp_Token next_token){
|
|||
return result;
|
||||
}
|
||||
|
||||
lexer_link void
|
||||
cpp_push_token_nonalloc(Cpp_Token *out_tokens, int *token_i, Cpp_Token token){
|
||||
lexer_link int
|
||||
cpp_place_token_nonalloc(Cpp_Token *out_tokens, int token_i, Cpp_Token token){
|
||||
Cpp_Token_Merge merge = {(Cpp_Token_Type)0};
|
||||
Cpp_Token prev_token = {(Cpp_Token_Type)0};
|
||||
|
||||
if (*token_i > 0){
|
||||
prev_token = out_tokens[*token_i - 1];
|
||||
if (token_i > 0){
|
||||
prev_token = out_tokens[token_i - 1];
|
||||
merge = new_lex::cpp_attempt_token_merge(prev_token, token);
|
||||
if (merge.did_merge){
|
||||
out_tokens[*token_i - 1] = merge.new_token;
|
||||
out_tokens[token_i - 1] = merge.new_token;
|
||||
}
|
||||
}
|
||||
|
||||
if (!merge.did_merge){
|
||||
out_tokens[(*token_i)++] = token;
|
||||
out_tokens[token_i++] = token;
|
||||
}
|
||||
|
||||
return(token_i);
|
||||
}
|
||||
|
||||
lexer_link bool
|
||||
cpp_push_token_nonalloc(Cpp_Token_Stack *out_tokens, Cpp_Token token){
|
||||
bool result = 0;
|
||||
if (out_tokens->count == out_tokens->max_count){
|
||||
out_tokens->count =
|
||||
cpp_place_token_nonalloc(out_tokens->tokens, out_tokens->count, token);
|
||||
result = 1;
|
||||
}
|
||||
return(result);
|
||||
}
|
||||
|
||||
struct Lex_Data{
|
||||
|
@ -311,15 +325,13 @@ struct Lex_Data{
|
|||
|
||||
int pos;
|
||||
int pos_overide;
|
||||
int chunk_pos;
|
||||
|
||||
Lex_FSM fsm;
|
||||
Whitespace_FSM wfsm;
|
||||
unsigned char pp_state;
|
||||
unsigned char completed;
|
||||
|
||||
unsigned short *key_eq_classes;
|
||||
unsigned char *key_table;
|
||||
|
||||
Cpp_Token token;
|
||||
|
||||
int __pc__;
|
||||
|
@ -335,6 +347,13 @@ struct Lex_Data{
|
|||
token_stack_out->count = token_i;\
|
||||
*S_ptr = S; S_ptr->__pc__ = -1; return(n); }
|
||||
|
||||
enum Lex_Result{
|
||||
LexFinished,
|
||||
LexNeedChunk,
|
||||
LexNeedTokenMemory,
|
||||
LexHitTokenLimit
|
||||
};
|
||||
|
||||
lexer_link int
|
||||
cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_stack_out){
|
||||
Lex_Data S = *S_ptr;
|
||||
|
@ -347,8 +366,8 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
|
|||
|
||||
char c = 0;
|
||||
|
||||
int end_pos = size + S.pos;
|
||||
chunk -= S.pos;
|
||||
int end_pos = size + S.chunk_pos;
|
||||
chunk -= S.chunk_pos;
|
||||
|
||||
switch (S.__pc__){
|
||||
DrCase(1);
|
||||
|
@ -357,7 +376,6 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
|
|||
DrCase(4);
|
||||
DrCase(5);
|
||||
DrCase(6);
|
||||
DrCase(7);
|
||||
}
|
||||
|
||||
for (;;){
|
||||
|
@ -372,7 +390,8 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
|
|||
S.wfsm.white_done = (S.wfsm.pp_state >= LSPP_count);
|
||||
|
||||
if (S.wfsm.white_done == 0){
|
||||
DrYield(4, 1);
|
||||
S.chunk_pos += size;
|
||||
DrYield(4, LexNeedChunk);
|
||||
}
|
||||
else break;
|
||||
}
|
||||
|
@ -400,7 +419,8 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
|
|||
S.fsm.emit_token = (S.fsm.state >= LS_count);
|
||||
|
||||
if (S.fsm.emit_token == 0){
|
||||
DrYield(3, 1);
|
||||
S.chunk_pos += size;
|
||||
DrYield(3, LexNeedChunk);
|
||||
}
|
||||
else break;
|
||||
}
|
||||
|
@ -463,7 +483,8 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
|
|||
}
|
||||
|
||||
if (S.wfsm.white_done == 0){
|
||||
DrYield(1, 1);
|
||||
S.chunk_pos += size;
|
||||
DrYield(1, LexNeedChunk);
|
||||
}
|
||||
else break;
|
||||
}
|
||||
|
@ -485,42 +506,6 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
|
|||
|
||||
case LS_identifier:
|
||||
{
|
||||
S.fsm.state = 0;
|
||||
S.fsm.emit_token = 0;
|
||||
S.fsm.sub_machine = 0;
|
||||
--S.pos;
|
||||
for (;;){
|
||||
// TODO(allen): Need to drop down to the instructions to optimize
|
||||
// this correctly I think. This looks like it will have more branches
|
||||
// than it needs unless I am very careful.
|
||||
for (; S.fsm.state < LSKEY_totally_finished && S.pos < end_pos;){
|
||||
// TODO(allen): Rebase these super tables so that we don't have
|
||||
// to do a subtract on the state.
|
||||
S.key_table = key_tables[S.fsm.sub_machine];
|
||||
S.key_eq_classes = key_eq_class_tables[S.fsm.sub_machine];
|
||||
for (; S.fsm.state < LSKEY_table_transition && S.pos < end_pos;){
|
||||
c = chunk[S.pos++];
|
||||
S.fsm.state = S.key_table[S.fsm.state + S.key_eq_classes[c]];
|
||||
}
|
||||
if (S.fsm.state >= LSKEY_table_transition && S.fsm.state < LSKEY_totally_finished){
|
||||
S.fsm.sub_machine = S.fsm.state - LSKEY_table_transition;
|
||||
S.fsm.state = 0;
|
||||
}
|
||||
}
|
||||
S.fsm.emit_token = (S.fsm.int_state >= LSKEY_totally_finished);
|
||||
|
||||
if (S.fsm.emit_token == 0){
|
||||
DrYield(7, 1);
|
||||
}
|
||||
else break;
|
||||
}
|
||||
--S.pos;
|
||||
|
||||
// TODO(allen): do stuff regarding the actual type of the token
|
||||
S.token.type = CPP_TOKEN_INTEGER_CONSTANT;
|
||||
S.token.flags = 0;
|
||||
|
||||
#if 0
|
||||
--S.pos;
|
||||
|
||||
int word_size = S.pos - S.token_start;
|
||||
|
@ -553,8 +538,6 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
|
|||
S.token.flags = 0;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
}break;
|
||||
|
||||
case LS_pound:
|
||||
|
@ -580,7 +563,8 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
|
|||
S.fsm.emit_token = (S.fsm.int_state >= LSDIR_count);
|
||||
|
||||
if (S.fsm.emit_token == 0){
|
||||
DrYield(6, 1);
|
||||
S.chunk_pos += size;
|
||||
DrYield(6, LexNeedChunk);
|
||||
}
|
||||
else break;
|
||||
}
|
||||
|
@ -611,7 +595,8 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
|
|||
S.fsm.emit_token = (S.fsm.int_state >= LSINT_count);
|
||||
|
||||
if (S.fsm.emit_token == 0){
|
||||
DrYield(5, 1);
|
||||
S.chunk_pos += size;
|
||||
DrYield(5, LexNeedChunk);
|
||||
}
|
||||
else break;
|
||||
}
|
||||
|
@ -944,9 +929,9 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
|
|||
}
|
||||
S.token.state_flags = S.pp_state;
|
||||
|
||||
cpp_push_token_nonalloc(out_tokens, &token_i, S.token);
|
||||
token_i = cpp_place_token_nonalloc(out_tokens, token_i, S.token);
|
||||
if (token_i == max_token_i){
|
||||
DrYield(2, 2);
|
||||
DrYield(2, LexNeedTokenMemory);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -957,13 +942,199 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
|
|||
}
|
||||
}
|
||||
|
||||
DrReturn(0);
|
||||
DrReturn(LexFinished);
|
||||
}
|
||||
|
||||
#undef DrYield
|
||||
#undef DrReturn
|
||||
#undef DrCase
|
||||
|
||||
lexer_link int
|
||||
cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size,
|
||||
Cpp_Token_Stack *token_stack_out, int max_tokens){
|
||||
Cpp_Token_Stack temp_stack = *token_stack_out;
|
||||
if (temp_stack.max_count > temp_stack.count + max_tokens){
|
||||
temp_stack.max_count = temp_stack.count + max_tokens;
|
||||
}
|
||||
|
||||
int result = cpp_lex_nonalloc(S_ptr, chunk, size, &temp_stack);
|
||||
|
||||
token_stack_out->count = temp_stack.count;
|
||||
|
||||
if (result == LexNeedTokenMemory){
|
||||
if (token_stack_out->count < token_stack_out->max_count){
|
||||
result = LexHitTokenLimit;
|
||||
}
|
||||
}
|
||||
|
||||
return(result);
|
||||
}
|
||||
|
||||
lexer_link int
|
||||
cpp_lex_size_nonalloc(Lex_Data *S_ptr, char *chunk, int size, int full_size,
|
||||
Cpp_Token_Stack *token_stack_out){
|
||||
int result = 0;
|
||||
if (S_ptr->pos >= full_size){
|
||||
char end_null = 0;
|
||||
result = cpp_lex_nonalloc(S_ptr, &end_null, 1, token_stack_out);
|
||||
}
|
||||
else{
|
||||
result = cpp_lex_nonalloc(S_ptr, chunk, size, token_stack_out);
|
||||
if (result == LexNeedChunk){
|
||||
if (S_ptr->pos >= full_size){
|
||||
char end_null = 0;
|
||||
result = cpp_lex_nonalloc(S_ptr, &end_null, 1, token_stack_out);
|
||||
}
|
||||
}
|
||||
}
|
||||
return(result);
|
||||
}
|
||||
|
||||
lexer_link int
|
||||
cpp_lex_size_nonalloc(Lex_Data *S_ptr, char *chunk, int size, int full_size,
|
||||
Cpp_Token_Stack *token_stack_out, int max_tokens){
|
||||
Cpp_Token_Stack temp_stack = *token_stack_out;
|
||||
if (temp_stack.max_count > temp_stack.count + max_tokens){
|
||||
temp_stack.max_count = temp_stack.count + max_tokens;
|
||||
}
|
||||
|
||||
int result = cpp_lex_size_nonalloc(S_ptr, chunk, size, full_size,
|
||||
&temp_stack);
|
||||
|
||||
token_stack_out->count = temp_stack.count;
|
||||
|
||||
if (result == LexNeedTokenMemory){
|
||||
if (token_stack_out->count < token_stack_out->max_count){
|
||||
result = LexHitTokenLimit;
|
||||
}
|
||||
}
|
||||
|
||||
return(result);
|
||||
}
|
||||
|
||||
#if 0
|
||||
lexer_link Cpp_Relex_State
|
||||
cpp_relex_nonalloc_start(Cpp_File file, Cpp_Token_Stack *stack,
|
||||
int start, int end, int amount, int tolerance){
|
||||
Cpp_Relex_State state;
|
||||
state.file = file;
|
||||
state.stack = stack;
|
||||
state.start = start;
|
||||
state.end = end;
|
||||
state.amount = amount;
|
||||
state.tolerance = tolerance;
|
||||
|
||||
Cpp_Get_Token_Result result = new_lex::cpp_get_token(stack, start);
|
||||
if (result.token_index <= 0){
|
||||
state.start_token_i = 0;
|
||||
}
|
||||
else{
|
||||
state.start_token_i = result.token_index-1;
|
||||
}
|
||||
|
||||
result = new_lex::cpp_get_token(stack, end);
|
||||
if (result.token_index < 0) result.token_index = 0;
|
||||
else if (end > stack->tokens[result.token_index].start) ++result.token_index;
|
||||
state.end_token_i = result.token_index;
|
||||
|
||||
state.relex_start = stack->tokens[state.start_token_i].start;
|
||||
if (start < state.relex_start) state.relex_start = start;
|
||||
|
||||
state.space_request = state.end_token_i - state.start_token_i + tolerance + 1;
|
||||
|
||||
return(state);
|
||||
}
|
||||
|
||||
// TODO(allen): Eliminate this once we actually store the EOF token
|
||||
// in the token stack.
|
||||
inline Cpp_Token
|
||||
cpp__get_token(Cpp_Token_Stack *stack, Cpp_Token *tokens, int size, int index){
|
||||
Cpp_Token result;
|
||||
if (index < stack->count){
|
||||
result = tokens[index];
|
||||
}
|
||||
else{
|
||||
result.start = size;
|
||||
result.size = 0;
|
||||
result.type = CPP_TOKEN_EOF;
|
||||
result.flags = 0;
|
||||
result.state_flags = 0;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
FCPP_LINK bool
|
||||
cpp_relex_nonalloc_main(Cpp_Relex_State *state, Cpp_Token_Stack *relex_stack, int *relex_end){
|
||||
Cpp_Token_Stack *stack = state->stack;
|
||||
Cpp_Token *tokens = stack->tokens;
|
||||
|
||||
new_lex::cpp_shift_token_starts(stack, state->end_token_i, state->amount);
|
||||
|
||||
Lex_Data lex = {};
|
||||
lex.pp_state = cpp_token_get_pp_state(tokens[state->start_token_i].state_flags);
|
||||
lex.pos = state->relex_start;
|
||||
|
||||
int relex_end_i = state->end_token_i;
|
||||
Cpp_Token match_token = cpp__get_token(stack, tokens, state->file.size, relex_end_i);
|
||||
Cpp_Token end_token = match_token;
|
||||
bool went_too_far = 0;
|
||||
|
||||
for (;;){
|
||||
Cpp_Read_Result read = cpp_lex_step(state->file, &lex);
|
||||
if (read.has_result){
|
||||
if (read.token.start == end_token.start &&
|
||||
read.token.size == end_token.size &&
|
||||
read.token.flags == end_token.flags &&
|
||||
read.token.state_flags == end_token.state_flags){
|
||||
break;
|
||||
}
|
||||
cpp_push_token_nonalloc(relex_stack, read.token);
|
||||
|
||||
while (lex.pos > end_token.start && relex_end_i < stack->count){
|
||||
++relex_end_i;
|
||||
end_token = cpp__get_token(stack, tokens, state->file.size, relex_end_i);
|
||||
}
|
||||
if (relex_stack->count == relex_stack->max_count){
|
||||
went_too_far = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (lex.pos >= state->file.size) break;
|
||||
}
|
||||
|
||||
if (!went_too_far){
|
||||
if (relex_stack->count > 0){
|
||||
if (state->start_token_i > 0){
|
||||
Cpp_Token_Merge merge =
|
||||
cpp_attempt_token_merge(tokens[state->start_token_i - 1],
|
||||
relex_stack->tokens[0]);
|
||||
if (merge.did_merge){
|
||||
--state->start_token_i;
|
||||
relex_stack->tokens[0] = merge.new_token;
|
||||
}
|
||||
}
|
||||
|
||||
if (relex_end_i < state->stack->count){
|
||||
Cpp_Token_Merge merge =
|
||||
cpp_attempt_token_merge(relex_stack->tokens[relex_stack->count-1],
|
||||
tokens[relex_end_i]);
|
||||
if (merge.did_merge){
|
||||
++relex_end_i;
|
||||
relex_stack->tokens[relex_stack->count-1] = merge.new_token;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
*relex_end = relex_end_i;
|
||||
}
|
||||
else{
|
||||
cpp_shift_token_starts(stack, state->end_token_i, -state->amount);
|
||||
}
|
||||
|
||||
return went_too_far;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
// BOTTOM
|
||||
|
|
|
@ -7,6 +7,9 @@
|
|||
*
|
||||
*/
|
||||
|
||||
// TODO(allen): In what corner cases, such as invalid files
|
||||
// does the new lexer suffer???
|
||||
|
||||
// TOP
|
||||
|
||||
#include "../4ed_meta.h"
|
||||
|
@ -204,7 +207,8 @@ end_t(Times *t){
|
|||
}
|
||||
|
||||
static void
|
||||
run_experiment(Experiment *exp, char *filename, int verbose, int chunks){
|
||||
run_experiment(Experiment *exp, char *filename, int verbose,
|
||||
int chunks, int max_tokens){
|
||||
String extension = {};
|
||||
Data file_data;
|
||||
Cpp_File file_cpp;
|
||||
|
@ -224,8 +228,8 @@ run_experiment(Experiment *exp, char *filename, int verbose, int chunks){
|
|||
exp->correct_stack.count = 0;
|
||||
exp->testing_stack.count = 0;
|
||||
|
||||
memset(exp->correct_stack.tokens, TOKEN_ARRAY_SIZE, 0);
|
||||
memset(exp->testing_stack.tokens, TOKEN_ARRAY_SIZE, 0);
|
||||
memset(exp->correct_stack.tokens, 0, TOKEN_ARRAY_SIZE);
|
||||
memset(exp->testing_stack.tokens, 0, TOKEN_ARRAY_SIZE);
|
||||
|
||||
file_cpp.data = (char*)file_data.data;
|
||||
file_cpp.size = file_data.size;
|
||||
|
@ -239,8 +243,9 @@ run_experiment(Experiment *exp, char *filename, int verbose, int chunks){
|
|||
cpp_lex_file_nonalloc(file_cpp, &exp->correct_stack, lex_data);
|
||||
time.handcoded += (__rdtsc() - start);
|
||||
|
||||
start = __rdtsc();
|
||||
if (max_tokens == 0){
|
||||
if (chunks){
|
||||
start = __rdtsc();
|
||||
int relevant_size = file_data.size + 1;
|
||||
is_last = 0;
|
||||
for (k = 0; k < relevant_size; k += chunks){
|
||||
|
@ -250,15 +255,75 @@ run_experiment(Experiment *exp, char *filename, int verbose, int chunks){
|
|||
is_last = 1;
|
||||
}
|
||||
|
||||
int result = new_lex::cpp_lex_nonalloc(&ld, (char*)file_data.data + k, chunk_size, &exp->testing_stack);
|
||||
if (result == 0 || result == 2) break;
|
||||
int result =
|
||||
new_lex::cpp_lex_nonalloc(&ld,
|
||||
(char*)file_data.data + k, chunk_size,
|
||||
&exp->testing_stack);
|
||||
|
||||
if (result == new_lex::LexFinished ||
|
||||
result == new_lex::LexNeedTokenMemory) break;
|
||||
}
|
||||
time.fsm += (__rdtsc() - start);
|
||||
}
|
||||
else{
|
||||
start = __rdtsc();
|
||||
new_lex::cpp_lex_nonalloc(&ld,
|
||||
(char*)file_data.data, file_data.size,
|
||||
&exp->testing_stack);
|
||||
time.fsm += (__rdtsc() - start);
|
||||
}
|
||||
}
|
||||
else{
|
||||
new_lex::cpp_lex_nonalloc(&ld, (char*)file_data.data, file_data.size, &exp->testing_stack);
|
||||
if (chunks){
|
||||
start = __rdtsc();
|
||||
int relevant_size = file_data.size + 1;
|
||||
is_last = 0;
|
||||
for (k = 0; k < relevant_size; k += chunks){
|
||||
chunk_size = chunks;
|
||||
if (chunk_size + k >= relevant_size){
|
||||
chunk_size = relevant_size - k;
|
||||
is_last = 1;
|
||||
}
|
||||
|
||||
int result = 0;
|
||||
int still_lexing = 1;
|
||||
do{
|
||||
result =
|
||||
new_lex::cpp_lex_size_nonalloc(&ld,
|
||||
(char*)file_data.data + k, chunk_size, file_data.size,
|
||||
&exp->testing_stack,
|
||||
max_tokens);
|
||||
if (result == new_lex::LexFinished ||
|
||||
result == new_lex::LexNeedTokenMemory ||
|
||||
result == new_lex::LexNeedChunk){
|
||||
still_lexing = 0;
|
||||
}
|
||||
} while(still_lexing);
|
||||
|
||||
|
||||
if (result == new_lex::LexFinished ||
|
||||
result == new_lex::LexNeedTokenMemory) break;
|
||||
}
|
||||
time.fsm += (__rdtsc() - start);
|
||||
}
|
||||
else{
|
||||
start = __rdtsc();
|
||||
int still_lexing = 1;
|
||||
do{
|
||||
int result =
|
||||
new_lex::cpp_lex_size_nonalloc(&ld,
|
||||
(char*)file_data.data, file_data.size, file_data.size,
|
||||
&exp->testing_stack,
|
||||
max_tokens);
|
||||
if (result == new_lex::LexFinished ||
|
||||
result == new_lex::LexNeedTokenMemory){
|
||||
still_lexing = 0;
|
||||
}
|
||||
} while(still_lexing);
|
||||
time.fsm += (__rdtsc() - start);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
free(ld.tb);
|
||||
|
||||
|
@ -338,11 +403,12 @@ show_time(Times t, int repeats, char *type){
|
|||
|
||||
int main(){
|
||||
int repeats = 1;
|
||||
int verbose_level = 1;
|
||||
int chunk_start = 0;
|
||||
int chunk_end = 0;
|
||||
int verbose_level = 0;
|
||||
int chunk_start = 32;
|
||||
int chunk_end = 64;
|
||||
#define TEST_FILE "parser_test1.cpp"
|
||||
#define SINGLE_ITEM 1
|
||||
#define SINGLE_ITEM 0
|
||||
int token_limit = 2;
|
||||
|
||||
int chunks = (chunk_start > 0 && chunk_start <= chunk_end);
|
||||
int c = 0;
|
||||
|
@ -371,14 +437,14 @@ int main(){
|
|||
begin_t(&chunk_exp_t);
|
||||
printf("With chunks of %d\n", chunks);
|
||||
for (c = chunk_start; c <= chunk_end; ++c){
|
||||
run_experiment(&chunk_exp, BASE_DIR TEST_FILE, 1, c);
|
||||
run_experiment(&chunk_exp, BASE_DIR TEST_FILE, 1, c, token_limit);
|
||||
}
|
||||
end_t(&chunk_exp_t);
|
||||
}
|
||||
|
||||
begin_t(&exp_t);
|
||||
printf("Unchunked\n");
|
||||
run_experiment(&exp, BASE_DIR TEST_FILE, 1, 0);
|
||||
run_experiment(&exp, BASE_DIR TEST_FILE, 1, 0, token_limit);
|
||||
end_t(&exp_t);
|
||||
|
||||
#else
|
||||
|
@ -391,7 +457,7 @@ int main(){
|
|||
if (chunks){
|
||||
begin_t(&chunk_exp_t);
|
||||
for (c = chunk_start; c <= chunk_end; ++c){
|
||||
run_experiment(&chunk_exp, all_files.infos[i].filename.str, verbose_level, c);
|
||||
run_experiment(&chunk_exp, all_files.infos[i].filename.str, verbose_level, c, token_limit);
|
||||
}
|
||||
end_t(&chunk_exp_t);
|
||||
}
|
||||
|
@ -399,11 +465,11 @@ int main(){
|
|||
begin_t(&exp_t);
|
||||
if (verbose_level == -1 && chunks){
|
||||
for (c = chunk_start; c <= chunk_end; ++c){
|
||||
run_experiment(&exp, all_files.infos[i].filename.str, verbose_level, 0);
|
||||
run_experiment(&exp, all_files.infos[i].filename.str, verbose_level, 0, token_limit);
|
||||
}
|
||||
}
|
||||
else{
|
||||
run_experiment(&exp, all_files.infos[i].filename.str, verbose_level, 0);
|
||||
run_experiment(&exp, all_files.infos[i].filename.str, verbose_level, 0, token_limit);
|
||||
}
|
||||
end_t(&exp_t);
|
||||
}
|
||||
|
|
|
@ -576,97 +576,6 @@ process_match_node(String_And_Flag *input, Match_Node *node, Match_Tree *tree, F
|
|||
}
|
||||
}
|
||||
|
||||
FSM_Stack
|
||||
generate_keyword_fsms(){
|
||||
Terminal_Lookup_Table terminal_table;
|
||||
Cpp_Token_Type type;
|
||||
|
||||
Future_FSM_Stack unfinished_futures;
|
||||
Match_Tree_Stack tree_stack;
|
||||
FSM_Stack fsm_stack;
|
||||
Match_Tree *tree;
|
||||
FSM *fsm;
|
||||
Future_FSM *future;
|
||||
Match_Node *root_node;
|
||||
FSM_State *root_state;
|
||||
int i, j;
|
||||
|
||||
memset(terminal_table.type_to_state, 0, sizeof(terminal_table.type_to_state));
|
||||
memset(terminal_table.state_to_type, 0, sizeof(terminal_table.state_to_type));
|
||||
|
||||
for (i = 0; i < ArrayCount(keyword_strings); ++i){
|
||||
type = (Cpp_Token_Type)keyword_strings[i].flags;
|
||||
if (terminal_table.type_to_state[type] == 0){
|
||||
terminal_table.type_to_state[type] = terminal_table.state_count;
|
||||
terminal_table.state_to_type[terminal_table.state_count] = type;
|
||||
++terminal_table.state_count;
|
||||
}
|
||||
}
|
||||
|
||||
fsm_stack.max = 255;
|
||||
fsm_stack.count = 0;
|
||||
fsm_stack.fsms = (FSM*)malloc(sizeof(FSM)*fsm_stack.max);
|
||||
fsm_stack.table_transition_state = 26;
|
||||
|
||||
tree_stack.max = 255;
|
||||
tree_stack.count = 0;
|
||||
tree_stack.trees = (Match_Tree*)malloc(sizeof(Match_Tree)*tree_stack.max);
|
||||
|
||||
unfinished_futures.max = 255;
|
||||
unfinished_futures.count = 0;
|
||||
unfinished_futures.futures = (Future_FSM*)malloc(sizeof(Future_FSM)*unfinished_futures.max);
|
||||
|
||||
fsm = get_fsm(&fsm_stack);
|
||||
tree = get_tree(&tree_stack);
|
||||
|
||||
*fsm = fsm_init(200, fsm_stack.table_transition_state);
|
||||
*tree = tree_init(200);
|
||||
|
||||
root_state = fsm_get_state(fsm, RealTerminateBase);
|
||||
root_node = match_get_node(tree);
|
||||
match_init_node(root_node, ArrayCount(keyword_strings));
|
||||
for (i = 0; i < ArrayCount(keyword_strings); ++i){
|
||||
root_node->words[i] = i;
|
||||
}
|
||||
|
||||
root_node->count = ArrayCount(keyword_strings);
|
||||
root_node->state = root_state;
|
||||
root_node->index = -1;
|
||||
|
||||
push_future_fsm(&unfinished_futures, root_node);
|
||||
process_match_node(keyword_strings, root_node, tree, fsm, &terminal_table, 2, &unfinished_futures);
|
||||
|
||||
for (i = 1; i < unfinished_futures.count; ++i){
|
||||
future = unfinished_futures.futures + i;
|
||||
|
||||
fsm = get_fsm(&fsm_stack);
|
||||
tree = get_tree(&tree_stack);
|
||||
|
||||
assert((int)(fsm - fsm_stack.fsms) == i);
|
||||
|
||||
*fsm = fsm_init(200, fsm_stack.table_transition_state);
|
||||
*tree = tree_init(200);
|
||||
|
||||
root_state = fsm_get_state(fsm, RealTerminateBase);
|
||||
root_node = match_get_node(tree);
|
||||
match_copy_init_node(root_node, future->source);
|
||||
root_node->state = root_state;
|
||||
|
||||
for (j = 0; j < root_node->count; ++j){
|
||||
char space[1024];
|
||||
sprintf(space, "%s\n", keyword_strings[root_node->words[j]].str);
|
||||
fsm_add_comment(fsm, space);
|
||||
}
|
||||
|
||||
process_match_node(keyword_strings, root_node, tree, fsm, &terminal_table, 12, &unfinished_futures);
|
||||
}
|
||||
|
||||
assert(fsm_stack.count < 255);
|
||||
fsm_stack.final_state = fsm_stack.table_transition_state + (unsigned char)fsm_stack.count;
|
||||
|
||||
return(fsm_stack);
|
||||
}
|
||||
|
||||
Whitespace_FSM
|
||||
whitespace_skip_fsm(Whitespace_FSM wfsm, char c){
|
||||
if (wfsm.pp_state != LSPP_default){
|
||||
|
@ -781,7 +690,6 @@ main_fsm(Lex_FSM fsm, unsigned char pp_state, unsigned char c){
|
|||
case LS_default:
|
||||
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'){
|
||||
fsm.state = LS_identifier;
|
||||
fsm.emit_token = 1;
|
||||
}
|
||||
else if (c >= '1' && c <= '9'){
|
||||
fsm.state = LS_number;
|
||||
|
@ -849,13 +757,11 @@ main_fsm(Lex_FSM fsm, unsigned char pp_state, unsigned char c){
|
|||
}
|
||||
break;
|
||||
|
||||
#if 0
|
||||
case LS_identifier:
|
||||
if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_')){
|
||||
fsm.emit_token = 1;
|
||||
}
|
||||
break;
|
||||
#endif
|
||||
|
||||
case LS_pound:
|
||||
switch (c){
|
||||
|
@ -1405,39 +1311,6 @@ main(){
|
|||
render_variable(file, "unsigned char", "LSDIR_count", pp_directive_fsm.count);
|
||||
render_variable(file, "unsigned char", "pp_directive_terminal_base", pp_directive_fsm.terminal_base);
|
||||
|
||||
FSM_Stack keyword_fsms = generate_keyword_fsms();
|
||||
|
||||
char name[1024];
|
||||
for (int i = 0; i < keyword_fsms.count; ++i){
|
||||
FSM_Tables partial_keywords_table =
|
||||
generate_table_from_abstract_fsm(keyword_fsms.fsms[i], keyword_fsms.final_state);
|
||||
if (keyword_fsms.fsms[i].comment){
|
||||
render_comment(file, keyword_fsms.fsms[i].comment);
|
||||
}
|
||||
|
||||
sprintf(name, "keyword_part_%d_table", i);
|
||||
render_fsm_table(file, partial_keywords_table, name);
|
||||
}
|
||||
|
||||
begin_ptr_table(file, "short", "key_eq_class_tables");
|
||||
for (int i = 0; i < keyword_fsms.count; ++i){
|
||||
sprintf(name, "keyword_part_%d_table_eq_classes", i);
|
||||
do_table_item_direct(file, name, "");
|
||||
end_row(file);
|
||||
}
|
||||
end_table(file);
|
||||
|
||||
begin_ptr_table(file, "char", "key_tables");
|
||||
for (int i = 0; i < keyword_fsms.count; ++i){
|
||||
sprintf(name, "keyword_part_%d_table_table", i);
|
||||
do_table_item_direct(file, name, "");
|
||||
end_row(file);
|
||||
}
|
||||
end_table(file);
|
||||
|
||||
fprintf(file, "#define LSKEY_table_transition %d\n", (int)(keyword_fsms.table_transition_state));
|
||||
fprintf(file, "#define LSKEY_totally_finished %d\n", (int)(keyword_fsms.final_state));
|
||||
|
||||
fclose(file);
|
||||
return(0);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue