new lexer is sort of in there ... relexing still needs work

This commit is contained in:
Allen Webster 2016-06-04 02:35:57 -04:00
parent c38c0f5082
commit c9e40e492c
6 changed files with 141 additions and 55 deletions

View File

@ -84,6 +84,8 @@ CUSTOM_COMMAND_SIG(set_mark){
View_Summary view = app->get_active_view(app);
app->view_set_mark(app, &view, seek_pos(view.cursor.pos));
// TODO(allen): Just expose the preferred_x seperately
app->view_set_cursor(app, &view, seek_pos(view.cursor.pos), 1);
}
CUSTOM_COMMAND_SIG(delete_range){

View File

@ -1404,7 +1404,7 @@ cpp_relex_nonalloc_start(Cpp_File file, Cpp_Token_Stack *stack,
state.end = end;
state.amount = amount;
state.tolerance = tolerance;
Cpp_Get_Token_Result result = cpp_get_token(stack, start);
if (result.token_index <= 0){
state.start_token_i = 0;
@ -1412,18 +1412,24 @@ cpp_relex_nonalloc_start(Cpp_File file, Cpp_Token_Stack *stack,
else{
state.start_token_i = result.token_index-1;
}
result = cpp_get_token(stack, end);
if (result.token_index < 0) result.token_index = 0;
else if (end > stack->tokens[result.token_index].start) ++result.token_index;
if (result.token_index < 0){
result.token_index = 0;
}
else if (end > stack->tokens[result.token_index].start){
++result.token_index;
}
state.end_token_i = result.token_index;
state.relex_start = stack->tokens[state.start_token_i].start;
if (start < state.relex_start) state.relex_start = start;
if (start < state.relex_start){
state.relex_start = start;
}
state.space_request = state.end_token_i - state.start_token_i + tolerance + 1;
return state;
return(state);
}
inline Cpp_Token
@ -1446,18 +1452,18 @@ FCPP_LINK bool
cpp_relex_nonalloc_main(Cpp_Relex_State *state, Cpp_Token_Stack *relex_stack, int *relex_end){
Cpp_Token_Stack *stack = state->stack;
Cpp_Token *tokens = stack->tokens;
cpp_shift_token_starts(stack, state->end_token_i, state->amount);
Cpp_Lex_Data lex = {};
lex.pp_state = cpp_token_get_pp_state(tokens[state->start_token_i].state_flags);
lex.pos = state->relex_start;
int relex_end_i = state->end_token_i;
Cpp_Token match_token = cpp__get_token(stack, tokens, state->file.size, relex_end_i);
Cpp_Token end_token = match_token;
bool went_too_far = 0;
for (;;){
Cpp_Read_Result read = cpp_lex_step(state->file, &lex);
if (read.has_result){
@ -1468,7 +1474,7 @@ cpp_relex_nonalloc_main(Cpp_Relex_State *state, Cpp_Token_Stack *relex_stack, in
break;
}
cpp_push_token_nonalloc(relex_stack, read.token);
while (lex.pos > end_token.start && relex_end_i < stack->count){
++relex_end_i;
end_token = cpp__get_token(stack, tokens, state->file.size, relex_end_i);
@ -1480,7 +1486,7 @@ cpp_relex_nonalloc_main(Cpp_Relex_State *state, Cpp_Token_Stack *relex_stack, in
}
if (lex.pos >= state->file.size) break;
}
if (!went_too_far){
if (relex_stack->count > 0){
if (state->start_token_i > 0){
@ -1492,7 +1498,7 @@ cpp_relex_nonalloc_main(Cpp_Relex_State *state, Cpp_Token_Stack *relex_stack, in
relex_stack->tokens[0] = merge.new_token;
}
}
if (relex_end_i < state->stack->count){
Cpp_Token_Merge merge =
cpp_attempt_token_merge(relex_stack->tokens[relex_stack->count-1],
@ -1503,14 +1509,14 @@ cpp_relex_nonalloc_main(Cpp_Relex_State *state, Cpp_Token_Stack *relex_stack, in
}
}
}
*relex_end = relex_end_i;
}
else{
cpp_shift_token_starts(stack, state->end_token_i, -state->amount);
}
return went_too_far;
return(went_too_far);
}
#ifndef FCPP_FORBID_MALLOC

View File

@ -29,8 +29,8 @@
#include "4tech_table.cpp"
#define FCPP_LEXER_IMPLEMENTATION
//#include "test/4cpp_new_lexer.h"
#include "4cpp_lexer.h"
#include "test/4cpp_new_lexer.h"
//#include "4cpp_lexer.h"
#include "4ed_template.cpp"

View File

@ -804,20 +804,31 @@ Job_Callback_Sig(job_full_lex){
cpp_file.data = file->state.buffer.data;
cpp_file.size = file->state.buffer.size;
i32 buffer_size = file->state.buffer.size;
buffer_size = (buffer_size + 3)&(~3);
while (memory->size < buffer_size*2){
system->grow_thread_memory(memory);
}
char *tb = (char*)memory->data;
Cpp_Token_Stack tokens;
tokens.tokens = (Cpp_Token*)memory->data;
tokens.max_count = memory->size / sizeof(Cpp_Token);
tokens.tokens = (Cpp_Token*)((char*)memory->data + buffer_size);
tokens.max_count = (memory->size - buffer_size) / sizeof(Cpp_Token);
tokens.count = 0;
#if 0
#if 1
b32 still_lexing = 1;
Lex_Data lex = {0};
Lex_Data lex = lex_data_init(tb);
do{
i32 result =
cpp_lex_nonalloc(&lex, cpp_file.data, cpp_file.size, &tokens, 2048);
cpp_lex_size_nonalloc(&lex,
cpp_file.data, cpp_file.size, cpp_file.size,
&tokens, 2048);
switch (result){
case LexNeedChunk: Assert(!"Invalid Path"); break;
@ -843,7 +854,7 @@ Job_Callback_Sig(job_full_lex){
#else
Cpp_Lex_Data status = {};
Cpp_Lex_Data status = {0};
do{
for (i32 r = 2048; r > 0 && status.pos < cpp_file.size; --r){
@ -978,7 +989,10 @@ file_relex_parallel(System_Functions *system,
relex_space.count = 0;
relex_space.max_count = state.space_request;
relex_space.tokens = push_array(part, Cpp_Token, relex_space.max_count);
if (cpp_relex_nonalloc_main(&state, &relex_space, &relex_end)){
char *spare = push_array(part, char, cpp_file.size);
if (cpp_relex_nonalloc_main(&state, &relex_space, &relex_end, spare)){
inline_lex = 0;
}
else{
@ -3088,6 +3102,7 @@ view_show_file(View *view){
}
view->showing_ui = VUI_None;
view->current_scroll = &view->recent->scroll;
view->recent->scroll.max_y = view_compute_max_target_y(view);
}
internal void

View File

@ -104,6 +104,11 @@ struct Lex_FSM{
unsigned char emit_token;
unsigned char multi_line;
};
inline Lex_FSM
zero_lex_fsm(){
Lex_FSM fsm = {0};
return(fsm);
}
// BOTTOM

View File

@ -4,7 +4,7 @@
#ifndef FCPP_NEW_LEXER_INC
#define FCPP_NEW_LEXER_INC
#include "..\4cpp_lexer_types.h"
#include "../4cpp_lexer_types.h"
#include "4cpp_lexer_fsms.h"
#include "4cpp_lexer_tables.c"
@ -294,7 +294,7 @@ cpp_place_token_nonalloc(Cpp_Token *out_tokens, int token_i, Cpp_Token token){
if (token_i > 0){
prev_token = out_tokens[token_i - 1];
merge = new_lex::cpp_attempt_token_merge(prev_token, token);
merge = cpp_attempt_token_merge(prev_token, token);
if (merge.did_merge){
out_tokens[token_i - 1] = merge.new_token;
}
@ -336,6 +336,13 @@ struct Lex_Data{
int __pc__;
};
inline Lex_Data
lex_data_init(char *tb){
Lex_Data data = {0};
data.tb = tb;
return(data);
}
#define DrCase(PC) case PC: goto resumespot_##PC
@ -355,7 +362,9 @@ enum Lex_Result{
};
lexer_link int
cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_stack_out){
cpp_lex_nonalloc(Lex_Data *S_ptr,
char *chunk, int size,
Cpp_Token_Stack *token_stack_out){
Lex_Data S = *S_ptr;
Cpp_Token *out_tokens = token_stack_out->tokens;
@ -403,20 +412,22 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
S.token_start = S.pos;
S.tb_pos = 0;
S.fsm = {0};
S.fsm = zero_lex_fsm();
for(;;){
unsigned short *eq_classes = get_eq_classes[S.pp_state];
unsigned char *fsm_table = get_table[S.pp_state];
for (; S.fsm.state < LS_count && S.pos < end_pos;){
c = chunk[S.pos++];
S.tb[S.tb_pos++] = c;
{
unsigned short *eq_classes = get_eq_classes[S.pp_state];
unsigned char *fsm_table = get_table[S.pp_state];
int i = S.fsm.state + eq_classes[c];
S.fsm.state = fsm_table[i];
S.fsm.multi_line |= multiline_state_table[S.fsm.state];
for (; S.fsm.state < LS_count && S.pos < end_pos;){
c = chunk[S.pos++];
S.tb[S.tb_pos++] = c;
int i = S.fsm.state + eq_classes[c];
S.fsm.state = fsm_table[i];
S.fsm.multi_line |= multiline_state_table[S.fsm.state];
}
S.fsm.emit_token = (S.fsm.state >= LS_count);
}
S.fsm.emit_token = (S.fsm.state >= LS_count);
if (S.fsm.emit_token == 0){
S.chunk_pos += size;
@ -950,7 +961,8 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
#undef DrCase
lexer_link int
cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size,
cpp_lex_nonalloc(Lex_Data *S_ptr,
char *chunk, int size,
Cpp_Token_Stack *token_stack_out, int max_tokens){
Cpp_Token_Stack temp_stack = *token_stack_out;
if (temp_stack.max_count > temp_stack.count + max_tokens){
@ -971,7 +983,8 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size,
}
lexer_link int
cpp_lex_size_nonalloc(Lex_Data *S_ptr, char *chunk, int size, int full_size,
cpp_lex_size_nonalloc(Lex_Data *S_ptr,
char *chunk, int size, int full_size,
Cpp_Token_Stack *token_stack_out){
int result = 0;
if (S_ptr->pos >= full_size){
@ -991,7 +1004,8 @@ cpp_lex_size_nonalloc(Lex_Data *S_ptr, char *chunk, int size, int full_size,
}
lexer_link int
cpp_lex_size_nonalloc(Lex_Data *S_ptr, char *chunk, int size, int full_size,
cpp_lex_size_nonalloc(Lex_Data *S_ptr,
char *chunk, int size, int full_size,
Cpp_Token_Stack *token_stack_out, int max_tokens){
Cpp_Token_Stack temp_stack = *token_stack_out;
if (temp_stack.max_count > temp_stack.count + max_tokens){
@ -1012,7 +1026,6 @@ cpp_lex_size_nonalloc(Lex_Data *S_ptr, char *chunk, int size, int full_size,
return(result);
}
#if 0
lexer_link Cpp_Relex_State
cpp_relex_nonalloc_start(Cpp_File file, Cpp_Token_Stack *stack,
int start, int end, int amount, int tolerance){
@ -1024,7 +1037,7 @@ cpp_relex_nonalloc_start(Cpp_File file, Cpp_Token_Stack *stack,
state.amount = amount;
state.tolerance = tolerance;
Cpp_Get_Token_Result result = new_lex::cpp_get_token(stack, start);
Cpp_Get_Token_Result result = cpp_get_token(stack, start);
if (result.token_index <= 0){
state.start_token_i = 0;
}
@ -1032,7 +1045,7 @@ cpp_relex_nonalloc_start(Cpp_File file, Cpp_Token_Stack *stack,
state.start_token_i = result.token_index-1;
}
result = new_lex::cpp_get_token(stack, end);
result = cpp_get_token(stack, end);
if (result.token_index < 0) result.token_index = 0;
else if (end > stack->tokens[result.token_index].start) ++result.token_index;
state.end_token_i = result.token_index;
@ -1045,6 +1058,11 @@ cpp_relex_nonalloc_start(Cpp_File file, Cpp_Token_Stack *stack,
return(state);
}
inline Cpp_Preprocessor_State
cpp_token_get_pp_state(fcpp_u16 bitfield){
return (Cpp_Preprocessor_State)(bitfield);
}
// TODO(allen): Eliminate this once we actually store the EOF token
// in the token stack.
inline Cpp_Token
@ -1064,23 +1082,62 @@ cpp__get_token(Cpp_Token_Stack *stack, Cpp_Token *tokens, int size, int index){
}
FCPP_LINK bool
cpp_relex_nonalloc_main(Cpp_Relex_State *state, Cpp_Token_Stack *relex_stack, int *relex_end){
cpp_relex_nonalloc_main(Cpp_Relex_State *state,
Cpp_Token_Stack *relex_stack,
int *relex_end,
char *spare){
Cpp_Token_Stack *stack = state->stack;
Cpp_Token *tokens = stack->tokens;
new_lex::cpp_shift_token_starts(stack, state->end_token_i, state->amount);
cpp_shift_token_starts(stack, state->end_token_i, state->amount);
Lex_Data lex = {};
Lex_Data lex = lex_data_init(spare);
lex.pp_state = cpp_token_get_pp_state(tokens[state->start_token_i].state_flags);
lex.pos = state->relex_start;
int relex_end_i = state->end_token_i;
Cpp_Token match_token = cpp__get_token(stack, tokens, state->file.size, relex_end_i);
Cpp_Token end_token = match_token;
bool went_too_far = 0;
int went_too_far = false;
// TODO(allen): This can be better now I suspect.
for (;;){
Cpp_Read_Result read = cpp_lex_step(state->file, &lex);
int result =
cpp_lex_nonalloc(&lex,
state->file.data, state->file.size,
stack, 1);
switch (result){
case LexHitTokenLimit:
{
Cpp_Token token =
stack->tokens[stack->count-1];
if (token.start == end_token.start &&
token.size == end_token.size &&
token.flags == end_token.flags &&
token.state_flags == end_token.state_flags){
--stack->count;
goto double_break;
}
while (lex.pos > end_token.start && relex_end_i < stack->count){
++relex_end_i;
end_token = cpp__get_token(stack, tokens, state->file.size, relex_end_i);
}
}
break;
case LexNeedChunk: Assert(!"Invalid path"); break;
case LexNeedTokenMemory:
went_too_far = true;
goto double_break;
case LexFinished:
goto double_break;
}
#if 0
if (read.has_result){
if (read.token.start == end_token.start &&
read.token.size == end_token.size &&
@ -1095,12 +1152,14 @@ cpp_relex_nonalloc_main(Cpp_Relex_State *state, Cpp_Token_Stack *relex_stack, in
end_token = cpp__get_token(stack, tokens, state->file.size, relex_end_i);
}
if (relex_stack->count == relex_stack->max_count){
went_too_far = 1;
went_too_far = true;
break;
}
}
if (lex.pos >= state->file.size) break;
#endif
}
double_break:;
if (!went_too_far){
if (relex_stack->count > 0){
@ -1133,7 +1192,6 @@ cpp_relex_nonalloc_main(Cpp_Relex_State *state, Cpp_Token_Stack *relex_stack, in
return went_too_far;
}
#endif
#endif