From 8e3cc45e38efcb3f9ece120b5e0cd8b427f20e97 Mon Sep 17 00:00:00 2001 From: Allen Webster Date: Thu, 31 Oct 2019 11:42:30 -0700 Subject: [PATCH] Sketch up of the parser --- custom/4coder_code_index.cpp | 189 ++++++++++++++++++++++++++++++ custom/4coder_code_index.h | 60 ++++++++++ custom/4coder_default_hooks.cpp | 60 ++++++++++ custom/4coder_default_include.cpp | 2 + custom/4coder_layout_rule.cpp | 3 +- 5 files changed, 312 insertions(+), 2 deletions(-) create mode 100644 custom/4coder_code_index.cpp create mode 100644 custom/4coder_code_index.h diff --git a/custom/4coder_code_index.cpp b/custom/4coder_code_index.cpp new file mode 100644 index 00000000..82edd803 --- /dev/null +++ b/custom/4coder_code_index.cpp @@ -0,0 +1,189 @@ +/* +4coder_code_index.cpp - Generic code indexing system for layout, definition jumps, etc. +*/ + +// TOP + +function void +code_index_push_nest(Code_Index_Nest_List *list, Code_Index_Nest *nest){ + sll_queue_push(list->first, list->last, nest); + list->count += 1; +} + +function Code_Index_Nest_Ptr_Array +code_index_nest_ptr_array_from_list(Arena *arena, Code_Index_Nest_List *list){ + Code_Index_Nest_Ptr_Array array = {}; + array.ptrs = push_array_zero(arena, Code_Index_Nest*, list->count); + array.count = list->count; + i32 counter = 0; + for (Code_Index_Nest *node = list->first; + node != 0; + node = node->next){ + array.ptrs[counter] = node; + counter += 1; + } + return(array); +} + +function void +code_index_set_file(Application_Links *app, Buffer_ID buffer, Arena arena, Code_Index_File *index){ + NotImplemented; +} + +//////////////////////////////// + +function void +generic_parse_init(Application_Links *app, Arena *arena, String_Const_u8 contents, Token_Array *tokens, + Generic_Parse_Comment_Function *handle_comment, Generic_Parse_State *state){ + state->app = app; + state->arena = arena; + state->contents = contents; + state->it = token_iterator(0, tokens); + state->handle_comment = handle_comment; + + Token *token = token_it_read(&state->it); + if (token != 0 && token->kind == TokenBaseKind_Whitespace){ + token_it_inc_non_whitespace(&state->it); + } +} + +function Token* +generic_parse_read_token(Code_Index_File *index, Generic_Parse_State *state){ + Token *token = token_it_read(&state->it); + for (;token != 0 && token->kind == TokenBaseKind_Comment;){ + state->handle_comment(state->app, state->arena, index, token, state->contents); + token_it_inc_non_whitespace(&state->it); + token = token_it_read(&state->it); + } + return(token); +} + +function Code_Index_Nest* +generic_parse_parenthical(Code_Index_File *index, Generic_Parse_State *state); + +function Code_Index_Nest* +generic_parse_scope(Code_Index_File *index, Generic_Parse_State *state){ + Token *token = token_it_read(&state->it); + Code_Index_Nest *result = push_array_zero(state->arena, Code_Index_Nest, 1); + result->kind = CodeIndexNest_Scope; + result->open = Ii64(token); + + for (;;){ + token = generic_parse_read_token(index, state); + if (token == 0){ + break; + } + + if (token->kind == TokenBaseKind_ScopeOpen){ + Code_Index_Nest *nest = generic_parse_scope(index, state); + code_index_push_nest(&result->nest_list, nest); + } + else if (token->kind == TokenBaseKind_ParentheticalOpen){ + Code_Index_Nest *nest = generic_parse_parenthical(index, state); + code_index_push_nest(&result->nest_list, nest); + } + else if (token->kind == TokenBaseKind_ScopeClose){ + result->close = Ii64(token); + break; + } + else{ + token_it_inc_non_whitespace(&state->it); + } + } + + result->nest_array = code_index_nest_ptr_array_from_list(state->arena, &result->nest_list); + + return(result); +} + +function Code_Index_Nest* +generic_parse_parenthical(Code_Index_File *index, Generic_Parse_State *state){ + Token *token = token_it_read(&state->it); + Code_Index_Nest *result = push_array_zero(state->arena, Code_Index_Nest, 1); + result->kind = CodeIndexNest_Paren; + result->open = Ii64(token); + + for (;;){ + token = generic_parse_read_token(index, state); + if (token == 0){ + break; + } + + if (token->kind == TokenBaseKind_ScopeOpen){ + Code_Index_Nest *nest = generic_parse_scope(index, state); + code_index_push_nest(&result->nest_list, nest); + } + else if (token->kind == TokenBaseKind_ParentheticalOpen){ + Code_Index_Nest *nest = generic_parse_parenthical(index, state); + code_index_push_nest(&result->nest_list, nest); + } + else if (token->kind == TokenBaseKind_ParentheticalClose){ + result->close = Ii64(token); + break; + } + else{ + token_it_inc_non_whitespace(&state->it); + } + } + + result->nest_array = code_index_nest_ptr_array_from_list(state->arena, &result->nest_list); + + return(result); +} + +function b32 +generic_parse_full_input_breaks(Code_Index_File *index, Generic_Parse_State *state, i32 limit){ + b32 result = false; + + i64 first_index = token_it_index(&state->it); + i64 one_past_last_index = first_index + limit; + for (;;){ + Token *token = generic_parse_read_token(index, state); + + if (token == 0){ + result = true; + break; + } + + if (token->kind == TokenBaseKind_ScopeOpen){ + Code_Index_Nest *nest = generic_parse_scope(index, state); + code_index_push_nest(&index->nest_list, nest); + } + else if (token->kind == TokenBaseKind_ParentheticalOpen){ + Code_Index_Nest *nest = generic_parse_parenthical(index, state); + code_index_push_nest(&index->nest_list, nest); + } + else{ + token_it_inc_non_whitespace(&state->it); + } + + i64 index = token_it_index(&state->it); + if (index >= one_past_last_index){ + token = token_it_read(&state->it); + if (token == 0){ + result = true; + } + break; + } + } + + return(result); +} + +//////////////////////////////// + +function void +default_comment_index(Application_Links *app, Arena *arena, Code_Index_File *index, + Token *token, String_Const_u8 contents){ + +} + +function void +generic_parse_init(Application_Links *app, Arena *arena, String_Const_u8 contents, Token_Array *tokens, + Generic_Parse_State *state){ + generic_parse_init(app, arena, contents, tokens, default_comment_index, state); +} + + +// BOTTOM + diff --git a/custom/4coder_code_index.h b/custom/4coder_code_index.h new file mode 100644 index 00000000..de68c9b7 --- /dev/null +++ b/custom/4coder_code_index.h @@ -0,0 +1,60 @@ +/* +4coder_code_index.h - Generic code indexing system for layout, definition jumps, etc. +*/ + +// TOP + +#if !defined(FCODER_CODE_INDEX_H) +#define FCODER_CODE_INDEX_H + +struct Code_Index_Nest_List{ + struct Code_Index_Nest *first; + struct Code_Index_Nest *last; + i32 count; +}; + +struct Code_Index_Nest_Ptr_Array{ + struct Code_Index_Nest **ptrs; + i32 count; +}; + +typedef i32 Code_Index_Nest_Kind; +enum{ + CodeIndexNest_Scope, + CodeIndexNest_Paren, +}; + +struct Code_Index_Nest{ + Code_Index_Nest *next; + + Code_Index_Nest_Kind kind; + b32 is_closed; + Range_i64 open; + Range_i64 close; + + Code_Index_Nest_List nest_list; + Code_Index_Nest_Ptr_Array nest_array; +}; + +struct Code_Index_File{ + Code_Index_Nest_List nest_list; + Code_Index_Nest_Ptr_Array nest_array; +}; + +//////////////////////////////// + +typedef void Generic_Parse_Comment_Function(Application_Links *app, Arena *arena, Code_Index_File *index, + Token *token, String_Const_u8 contents); + +struct Generic_Parse_State{ + Application_Links *app; + Arena *arena; + String_Const_u8 contents; + Token_Iterator_Array it; + Generic_Parse_Comment_Function *handle_comment; +}; + +#endif + +// BOTTOM + diff --git a/custom/4coder_default_hooks.cpp b/custom/4coder_default_hooks.cpp index a8af694d..159398ad 100644 --- a/custom/4coder_default_hooks.cpp +++ b/custom/4coder_default_hooks.cpp @@ -643,6 +643,60 @@ do_full_lex_async(Async_Context *actx, Data data){ } } +function void +do_full_parse_async__inner(Async_Context *actx, Buffer_ID buffer){ + Application_Links *app = actx->app; + ProfileScope(app, "async parse"); + + Thread_Context *tctx = get_thread_context(app); + Scratch_Block scratch(tctx); + + String_Const_u8 contents = {}; + Token_Array tokens = {}; + { + ProfileBlock(app, "async parse contents (before mutex)"); + system_acquire_global_frame_mutex(tctx); + ProfileBlock(app, "async parse contents (after mutex)"); + contents = push_whole_buffer(app, scratch, buffer); + Managed_Scope scope = buffer_get_managed_scope(app, buffer); + Token_Array *tokens_ptr = scope_attachment(app, scope, attachment_tokens, Token_Array); + tokens.count = tokens_ptr->count; + tokens.tokens = push_array_write(scratch, Token, tokens.count, tokens_ptr->tokens); + system_release_global_frame_mutex(tctx); + } + + Arena arena = make_arena_system(KB(16)); + + Generic_Parse_State state = {}; + generic_parse_init(app, &arena, contents, &tokens, &state); + + Code_Index_File index = {}; + b32 canceled = false; + for (;;){ + ProfileBlock(app, "async parse block"); + if (generic_parse_full_input_breaks(&index, &state, 10000)){ + break; + } + if (async_check_canceled(actx)){ + canceled = true; + break; + } + } + + if (!canceled){ + ProfileBlock(app, "async parse save results"); + code_index_set_file(app, buffer, arena, &index); + } +} + +function void +do_full_parse_async(Async_Context *actx, Data data){ + if (data.size == sizeof(Buffer_ID)){ + Buffer_ID buffer = *(Buffer_ID*)data.data; + do_full_parse_async__inner(actx, buffer); + } +} + BUFFER_HOOK_SIG(default_begin_buffer){ ProfileScope(app, "begin buffer"); @@ -762,6 +816,8 @@ BUFFER_HOOK_SIG(default_begin_buffer){ ProfileBlock(app, "begin buffer kick off lexer"); Async_Task *lex_task_ptr = scope_attachment(app, scope, buffer_lex_task, Async_Task); *lex_task_ptr = async_task_no_dep(&global_async_system, do_full_lex_async, make_data_struct(&buffer_id)); + async_task_single_dep(&global_async_system, do_full_parse_async, make_data_struct(&buffer_id), + *lex_task_ptr); } if (wrap_lines){ @@ -832,6 +888,8 @@ BUFFER_EDIT_RANGE_SIG(default_buffer_edit_range){ if (async_task_is_running_or_pending(&global_async_system, *lex_task_ptr)){ async_task_cancel(&global_async_system, *lex_task_ptr); *lex_task_ptr = async_task_no_dep(&global_async_system, do_full_lex_async, make_data_struct(&buffer_id)); + async_task_single_dep(&global_async_system, do_full_parse_async, make_data_struct(&buffer_id), + *lex_task_ptr); } else{ Token_Array *ptr = scope_attachment(app, scope, attachment_tokens, Token_Array); @@ -912,6 +970,8 @@ BUFFER_EDIT_RANGE_SIG(default_buffer_edit_range){ *lex_task_ptr = async_task_no_dep(&global_async_system, do_full_lex_async, make_data_struct(&buffer_id)); + async_task_single_dep(&global_async_system, do_full_parse_async, make_data_struct(&buffer_id), + *lex_task_ptr); } } } diff --git a/custom/4coder_default_include.cpp b/custom/4coder_default_include.cpp index b423c56e..21a059b5 100644 --- a/custom/4coder_default_include.cpp +++ b/custom/4coder_default_include.cpp @@ -28,6 +28,7 @@ #include "4coder_profile.h" #include "4coder_async_tasks.h" #include "4coder_token.h" +#include "4coder_code_index.h" #include "generated/lexer_cpp.h" #include "4coder_string_match.h" #include "4coder_helper.h" @@ -71,6 +72,7 @@ #include "4coder_buffer_seek_constructors.cpp" #include "4coder_token.cpp" +#include "4coder_code_index.cpp" #include "generated/lexer_cpp.cpp" #include "4coder_command_map.cpp" #include "4coder_default_framework_variables.cpp" diff --git a/custom/4coder_layout_rule.cpp b/custom/4coder_layout_rule.cpp index 47c2c447..d6eb0de8 100644 --- a/custom/4coder_layout_rule.cpp +++ b/custom/4coder_layout_rule.cpp @@ -627,8 +627,7 @@ layout_virt_indent_unwrapped(Application_Links *app, Arena *arena, for (;ptr < end_ptr;){ Character_Consume_Result consume = utf8_consume(ptr, (umem)(end_ptr - ptr)); - if (consume.codepoint != ' ' && - consume.codepoint != '\t'){ + if (!character_is_whitespace(consume.codepoint)){ skipping_leading_whitespace = false; }