From 7caaed736b8ecc0059353fc60aa02c6dd3fe0562 Mon Sep 17 00:00:00 2001 From: Peter Slattery Date: Thu, 10 Jul 2025 07:08:27 -0700 Subject: [PATCH] Initial tree sitter usage: - identify buffer language - custom_begin_buffer sets up necessary tree sitter state, and kicks off a parse task - custom_end_buffer cleans up tree sitter data and kills async parse tasks - tree_sitter_parse_async/__inner uses tree sitter api to get a tree of the buffer's code and stores it on the buffers managed scope - tree_sitter_write_tree prints tree to a special *tree* buffer - use new build scripts in 4coder project --- code/custom/4coder_custom_hooks.cpp | 192 ++++++++++++++++++++++ code/custom/4coder_default_bindings.cpp | 5 + code/custom/4coder_default_hooks.cpp | 4 +- code/custom/4coder_default_include.cpp | 3 + code/custom/4coder_tree_sitter.cpp | 204 ++++++++++++++++++++++++ code/custom/4coder_tree_sitter.h | 50 ++++++ project.4coder | 6 +- 7 files changed, 460 insertions(+), 4 deletions(-) create mode 100644 code/custom/4coder_custom_hooks.cpp create mode 100644 code/custom/4coder_tree_sitter.cpp create mode 100644 code/custom/4coder_tree_sitter.h diff --git a/code/custom/4coder_custom_hooks.cpp b/code/custom/4coder_custom_hooks.cpp new file mode 100644 index 00000000..86261643 --- /dev/null +++ b/code/custom/4coder_custom_hooks.cpp @@ -0,0 +1,192 @@ + +/////////////////////////////////////////////////////////////////////////// +// Begin Buffer +/////////////////////////////////////////////////////////////////////////// + +struct File_Language_Result +{ + File_Language_Kind kind; + bool treat_as_code; +}; + +function File_Language_Result +identify_file_language(Application_Links* app, Buffer_ID buffer_id) +{ + Scratch_Block scratch(app); + + File_Language_Result result; + block_zero_struct(&result); + String_Const_u8 file_name = push_buffer_file_name(app, scratch, buffer_id); + if (file_name.size <= 0) return result; + String_Const_u8 file_extension = string_file_extension(file_name); + result.kind = File_Language_Text; + + String_Const_u8 treat_as_code_string = def_get_config_string(scratch, vars_save_string_lit("treat_as_code")); + String_Const_u8_Array extensions_to_treat_as_code = parse_extension_line_to_extension_list(app, scratch, treat_as_code_string); + + for (i32 i = 0; i < extensions_to_treat_as_code.count; ++i) + { + if (string_match(file_extension, extensions_to_treat_as_code.strings[i])) + { + result.treat_as_code = true; + result.kind = File_Language_Unknown; + + if (string_match(file_extension, string_u8_litexpr("md"))) + { + print_message(app, SCu8("Language Detected as Markdown\n")); + result.kind = File_Language_Markdown; + } + else if (string_match(file_extension, string_u8_litexpr("c"))) + { + print_message(app, SCu8("Language Detected as C\n")); + result.kind = File_Language_CPP; + } + else if (string_match(file_extension, string_u8_litexpr("cpp")) || + string_match(file_extension, string_u8_litexpr("h")) || + string_match(file_extension, string_u8_litexpr("hpp")) || + string_match(file_extension, string_u8_litexpr("cc")) + ){ + print_message(app, SCu8("Language Detected as Cpp\n")); + result.kind = File_Language_CPP; + } + else if (string_match(file_extension, string_u8_litexpr("m"))){ + print_message(app, SCu8("Language Detected as ObjectiveC\n")); + result.kind = File_Language_ObjectiveC; + } + else if (string_match(file_extension, string_u8_litexpr("hlsl"))){ + print_message(app, SCu8("Language Detected as HLSL\n")); + result.kind = File_Language_HLSL; + } + else if (string_match(file_extension, string_u8_litexpr("glsl"))){ + print_message(app, SCu8("Language Detected as GLSL\n")); + result.kind = File_Language_GLSL; + } + else if (string_match(file_extension, string_u8_litexpr("jai"))){ + print_message(app, SCu8("Language Detected as Jai\n")); + result.kind = File_Language_Jai; + } + else if (string_match(file_extension, string_u8_litexpr("cs"))){ + print_message(app, SCu8("Language Detected as C#\n")); + result.kind = File_Language_CSharp; + } + else if (string_match(file_extension, string_u8_litexpr("swift"))){ + print_message(app, SCu8("Language Detected as Swift\n")); + result.kind = File_Language_Swift; + } + else if (string_match(file_extension, string_u8_litexpr("go"))){ + print_message(app, SCu8("Language Detected as Go\n")); + result.kind = File_Language_Go; + } + else if (string_match(file_extension, string_u8_litexpr("rs"))){ + print_message(app, SCu8("Language Detected as Rust\n")); + result.kind = File_Language_Rust; + } + else if (string_match(file_extension, string_u8_litexpr("js"))){ + print_message(app, SCu8("Language Detected as Javascript\n")); + result.kind = File_Language_Javascript; + } + else if (string_match(file_extension, string_u8_litexpr("ts"))){ + print_message(app, SCu8("Language Detected as Typescript\n")); + result.kind = File_Language_Typescript; + } + else if (string_match(file_extension, string_u8_litexpr("json"))){ + print_message(app, SCu8("Language Detected as JSON\n")); + result.kind = File_Language_JSON; + } + else if (string_match(file_extension, string_u8_litexpr("odin"))){ + print_message(app, SCu8("Language Detected as Odin\n")); + result.kind = File_Language_Odin; + } + else if (string_match(file_extension, string_u8_litexpr("zig"))){ + print_message(app, SCu8("Language Detected as Zig\n")); + result.kind = File_Language_Zig; + } + + if (result.kind != File_Language_Unknown) break; + } + } + + return result; +} + +BUFFER_HOOK_SIG(custom_begin_buffer){ + ProfileScope(app, "begin buffer"); + + Scratch_Block scratch(app); + + File_Language_Result lang = identify_file_language(app, buffer_id); + bool begin_parse_task = false; + if (lang.treat_as_code) begin_parse_task = tree_sitter_begin_buffer(app, buffer_id, lang.kind); + + String_ID file_map_id = vars_save_string_lit("keys_file"); + String_ID code_map_id = vars_save_string_lit("keys_code"); + + Command_Map_ID map_id = (lang.treat_as_code)?(code_map_id):(file_map_id); + Managed_Scope scope = buffer_get_managed_scope(app, buffer_id); + Command_Map_ID *map_id_ptr = scope_attachment(app, scope, buffer_map_id, Command_Map_ID); + *map_id_ptr = map_id; + + Line_Ending_Kind setting = guess_line_ending_kind_from_buffer(app, buffer_id); + Line_Ending_Kind *eol_setting = scope_attachment(app, scope, buffer_eol_setting, Line_Ending_Kind); + *eol_setting = setting; + + // NOTE(allen): Decide buffer settings + b32 wrap_lines = true; + b32 use_lexer = false; + if (lang.treat_as_code){ + wrap_lines = def_get_config_b32(vars_save_string_lit("enable_code_wrapping")); + // TODO(PS): @Remove - consider removing the lexer for now? later, replace in favor of tree-sitter + use_lexer = true; + } + + if (begin_parse_task) + { + Async_Task* parse_task = scope_attachment(app, scope, buffer_tree_sitter_parse_task_id, Async_Task); + *parse_task = async_task_no_dep(&global_async_system, tree_sitter_parse_async, make_data_struct(&buffer_id)); + } + + String_Const_u8 buffer_name = push_buffer_base_name(app, scratch, buffer_id); + if (buffer_name.size > 0 && buffer_name.str[0] == '*' && buffer_name.str[buffer_name.size - 1] == '*'){ + wrap_lines = def_get_config_b32(vars_save_string_lit("enable_output_wrapping")); + } + + if (use_lexer){ + ProfileBlock(app, "begin buffer kick off lexer"); + Async_Task *lex_task_ptr = scope_attachment(app, scope, buffer_lex_task, Async_Task); + *lex_task_ptr = async_task_no_dep(&global_async_system, do_full_lex_async, make_data_struct(&buffer_id)); + } + + { + b32 *wrap_lines_ptr = scope_attachment(app, scope, buffer_wrap_lines, b32); + *wrap_lines_ptr = wrap_lines; + } + + if (use_lexer){ + buffer_set_layout(app, buffer_id, layout_virt_indent_index_generic); + } + else{ + if (lang.treat_as_code){ + buffer_set_layout(app, buffer_id, layout_virt_indent_literal_generic); + } + else{ + buffer_set_layout(app, buffer_id, layout_generic); + } + } + + // no meaning for return + return(0); +} + +/////////////////////////////////////////////////////////////////////////// +// End Buffer +/////////////////////////////////////////////////////////////////////////// + + +BUFFER_HOOK_SIG(custom_end_buffer){ + Marker_List *list = get_marker_list_for_buffer(buffer_id); + if (list != 0) delete_marker_list(list); + + tree_sitter_end_buffer(app, buffer_id); + default_end_buffer(app, buffer_id); + return(0); +} diff --git a/code/custom/4coder_default_bindings.cpp b/code/custom/4coder_default_bindings.cpp index cc55c76a..ce71a92d 100644 --- a/code/custom/4coder_default_bindings.cpp +++ b/code/custom/4coder_default_bindings.cpp @@ -527,8 +527,12 @@ custom_layer_init(Application_Links *app){ set_all_default_hooks(app); modal_init(3, tctx); + set_custom_hook(app, HookID_BeginBuffer, custom_begin_buffer); + set_custom_hook(app, HookID_EndBuffer, custom_end_buffer); + custom_keyboard_bindings(); + #if 0 mapping_init(tctx, &framework_mapping); String_ID global_map_id = vars_save_string_lit("keys_global"); @@ -542,6 +546,7 @@ custom_layer_init(Application_Links *app){ setup_essential_mapping(&framework_mapping, global_map_id, file_map_id, code_map_id); #endif + tree_sitter_init(app); } #endif //FCODER_DEFAULT_BINDINGS diff --git a/code/custom/4coder_default_hooks.cpp b/code/custom/4coder_default_hooks.cpp index 9aeaeba7..e8c2a4e2 100644 --- a/code/custom/4coder_default_hooks.cpp +++ b/code/custom/4coder_default_hooks.cpp @@ -208,7 +208,9 @@ reload_clean_buffers_on_filesystem_change(Application_Links *app, Frame_Info fra function void default_tick(Application_Links *app, Frame_Info frame_info){ - code_index_update_tick(app); + + if (use_tree_sitter_code_indexing) { tree_sitter_code_index_update_tick(app); } + else { code_index_update_tick(app); } if (tick_all_fade_ranges(app, frame_info.animation_dt)){ animate_in_n_milliseconds(app, 0); diff --git a/code/custom/4coder_default_include.cpp b/code/custom/4coder_default_include.cpp index 198ace40..0ecd1055 100644 --- a/code/custom/4coder_default_include.cpp +++ b/code/custom/4coder_default_include.cpp @@ -65,6 +65,7 @@ #include "4coder_search_list.h" #include "4coder_modal.h" #include "4coder_qol.h" +#include "4coder_tree_sitter.h" //////////////////////////////// @@ -143,10 +144,12 @@ #include "4coder_search_list.cpp" #include "4coder_modal.cpp" #include "4coder_yeet.cpp" +#include "4coder_tree_sitter.cpp" #include "4coder_examples.cpp" #include "4coder_default_hooks.cpp" +#include "4coder_custom_hooks.cpp" #include "4coder_qol.cpp" diff --git a/code/custom/4coder_tree_sitter.cpp b/code/custom/4coder_tree_sitter.cpp new file mode 100644 index 00000000..ce51eee1 --- /dev/null +++ b/code/custom/4coder_tree_sitter.cpp @@ -0,0 +1,204 @@ +function bool +tree_sitter_init(Application_Links* app) +{ + Buffer_ID buffer = create_buffer( + app, + string_u8_litexpr("*tree*"), + BufferCreate_NeverAttachToFile | BufferCreate_AlwaysNew + ); + buffer_set_setting(app, buffer, BufferSetting_Unimportant, true); + buffer_set_setting(app, buffer, BufferSetting_ReadOnly, true); +} + +function bool +tree_sitter_begin_buffer(Application_Links* app, Buffer_ID buffer_id, File_Language_Kind kind) +{ + Managed_Scope buffer_scope = buffer_get_managed_scope(app, buffer_id); + Buffer_Tree_Sitter_Data* tree_data = scope_attachment(app, buffer_scope, buffer_tree_sitter_data_id, Buffer_Tree_Sitter_Data); + switch (kind) + { + case File_Language_CPP: + { + tree_data->language = tree_sitter_cpp(); + } break; + + default: + tree_data->language = 0; + } + + if (tree_data->language != 0) + { + tree_data->tree_mutex = system_mutex_make(); + } + + return tree_data->language != 0; +} + +function void +tree_sitter_end_buffer(Application_Links* app, Buffer_ID buffer_id) +{ + Managed_Scope buffer_scope = buffer_get_managed_scope(app, buffer_id); + Buffer_Tree_Sitter_Data* tree_data = scope_attachment(app, buffer_scope, buffer_tree_sitter_data_id, Buffer_Tree_Sitter_Data); + if (!tree_data || !tree_data->language) return; + + Async_Task *tree_sitter_parse_task = scope_attachment(app, buffer_scope, buffer_tree_sitter_parse_task_id, Async_Task); + if (async_task_is_running_or_pending(&global_async_system, *tree_sitter_parse_task)) + { + async_task_cancel(app, &global_async_system, *tree_sitter_parse_task); + } + + system_mutex_acquire(tree_data->tree_mutex); + ts_tree_delete(tree_data->tree); + system_mutex_release(tree_data->tree_mutex); + system_mutex_free(tree_data->tree_mutex); +} + +function TSTree* +tree_sitter_buffer_get_tree_copy(Buffer_Tree_Sitter_Data* tree_data) +{ + TSTree* result = 0; + // system_mutex_acquire(tree_data->tree_mutex); + if (tree_data->tree) result = ts_tree_copy(tree_data->tree); + // system_mutex_release(tree_data->tree_mutex); + return result; +} + +function void +tree_sitter_parse_async__inner(Async_Context* actx, Buffer_ID buffer_id) +{ + Application_Links *app = actx->app; + + Arena arena = make_arena_system(KB(16)); + + TSParser *parser = ts_parser_new(); + ts_parser_set_timeout_micros(parser, 5000); + + acquire_global_frame_mutex(app); + String_Const_u8 src = push_whole_buffer(app, &arena, buffer_id); + Managed_Scope scope = buffer_get_managed_scope(app, buffer_id); + Buffer_Tree_Sitter_Data* tree_data = scope_attachment(app, scope, buffer_tree_sitter_data_id, Buffer_Tree_Sitter_Data); + TSTree *old_tree = tree_sitter_buffer_get_tree_copy(tree_data); + bool lang_set = ts_parser_set_language(parser, tree_data->language); + release_global_frame_mutex(app); + + if (!lang_set) + { + AssertMessageAlways("Failed to set the language for the parser." + "This probably means a language wasn't set" + "in the BeginBuffer hook.\n"); + } + + // Iterate until we get a tree or we find that we should cancel the parse + TSTree *new_tree = 0; + b32 canceled = false; + for (;;) + { + new_tree = ts_parser_parse_string(parser, old_tree, (char *)src.str, (u32)src.size); + if (async_check_canceled(actx)) + { + canceled = true; + break; + } + if (new_tree) break; + } + + if (!canceled && new_tree) + { + TSTree* old_buffer_tree; + acquire_global_frame_mutex(app); + { + // NOTE(jack): Copy the old pointer to delete it outside the mutex. + system_mutex_acquire(tree_data->tree_mutex); + old_buffer_tree = tree_data->tree; + tree_data->tree = new_tree; + system_mutex_acquire(tree_data->tree_mutex); + + print_message(app, SCu8("Finished Parse\n")); + + // TODO(PS): Just put the code index update call here + // NOTE(jack): This feels kinda hacky, this is here to trigger + // the code index update tick. The buffer is also makred by the + // async lexer so we will update the index too frequently. We + // should probably change the lexer to not mark as modified. + // TODO(jack): Should we instead trigger another async task here to + // update the code index once this is done? + buffer_mark_as_modified(buffer_id); + + // Force a frame refresh by requesting another frame + animate_in_n_milliseconds(app, 0); + } + release_global_frame_mutex(app); + ts_tree_delete(old_buffer_tree); + } + + ts_parser_delete(parser); + ts_tree_delete(old_tree); + linalloc_clear(&arena); +} + +function void +tree_sitter_parse_async(Async_Context* actx, String_Const_u8 data) +{ + if (data.size != sizeof(Buffer_ID)) return; + Buffer_ID buffer_id = *(Buffer_ID*)data.str; + tree_sitter_parse_async__inner(actx, buffer_id); +} + +function void +tree_sitter_code_index_update_tick(Application_Links* app) +{ + +} + +//////////////////////////////////////////////////////////////////// +// DEBUG +//////////////////////////////////////////////////////////////////// + +char* prefix_buffer = " "; + +function void +write_tree_sitter_tree_to_buffer__inner(Application_Links *app, Arena *arena, Buffer_ID buffer_id, + TSNode cur_node, i32 level = 0, const char *field="") +{ + TSPoint start = ts_node_start_point(cur_node); + TSPoint end = ts_node_end_point(cur_node); + // + 1 on ts positions becuase the first line/column are zero in treesitter, + // but 4coder displays as 1 indexed in the filebar. + String_Const_u8 string = push_stringf(arena, "%.*s%s: %s [%d, %d] - [%d, %d]\n", + level*2, prefix_buffer, field, ts_node_type(cur_node), + start.row + 1, start.column + 1, + end.row + 1, end.column + 1); + + buffer_replace_range(app, buffer_id, Ii64(buffer_get_size(app, buffer_id)), string); + + u32 child_count = ts_node_child_count(cur_node); + for (u32 i = 0; i < child_count; ++i) + { + TSNode child = ts_node_child(cur_node, i); + if (ts_node_is_named(child)) + { + field = ts_node_field_name_for_child(cur_node, i); + if (!field) field = ""; + write_tree_sitter_tree_to_buffer__inner(app, arena, buffer_id, child, level + 1, field); + } + } +} + +CUSTOM_COMMAND_SIG(tree_sitter_write_tree) +CUSTOM_DOC("Write the current buffer's tree sitter tree to *tree*") +{ + Scratch_Block scratch(app); + Buffer_ID out_buffer = get_buffer_by_name(app, string_u8_litexpr("*tree*"), Access_Always); + + View_ID view = get_active_view(app, Access_Always); + Buffer_ID buffer = view_get_buffer(app, view, Access_Visible); + + Managed_Scope scope = buffer_get_managed_scope(app, buffer); + Buffer_Tree_Sitter_Data *tree_data = scope_attachment(app, scope, buffer_tree_sitter_data_id, Buffer_Tree_Sitter_Data); + + if (tree_data->tree) + { + TSNode root = ts_tree_root_node(tree_data->tree); + write_tree_sitter_tree_to_buffer__inner(app, scratch, out_buffer, root); + } +} \ No newline at end of file diff --git a/code/custom/4coder_tree_sitter.h b/code/custom/4coder_tree_sitter.h new file mode 100644 index 00000000..145cc9d4 --- /dev/null +++ b/code/custom/4coder_tree_sitter.h @@ -0,0 +1,50 @@ +/* date = July 8th 2025 10:13 am */ + +#ifndef FCODER_TREE_SITTER_H +#define FCODER_TREE_SITTER_H + +#include + +enum File_Language_Kind +{ + File_Language_None, + File_Language_Unknown, + File_Language_Text, + File_Language_Markdown, + File_Language_C, + File_Language_CPP, + File_Language_ObjectiveC, + File_Language_HLSL, + File_Language_GLSL, + File_Language_Jai, + File_Language_CSharp, + File_Language_Swift, + File_Language_Go, + File_Language_Rust, + File_Language_Javascript, + File_Language_Typescript, + File_Language_JSON, + File_Language_Odin, + File_Language_Zig, +}; + +extern "C" { + TSLanguage *tree_sitter_cpp(); + TSLanguage *tree_sitter_c(); +} + +CUSTOM_ID(attachment, buffer_tree_sitter_data_id); +CUSTOM_ID(attachment, buffer_tree_sitter_parse_task_id); + +struct Buffer_Tree_Sitter_Data +{ + TSLanguage* language; + TSTree* tree; + + System_Mutex tree_mutex; +}; + +b8 use_tree_sitter_code_indexing = true; +function void tree_sitter_code_index_update_tick(Application_Links *app); + +#endif //FCODER_TREE_SITTER_H diff --git a/project.4coder b/project.4coder index 98607544..4741723b 100644 --- a/project.4coder +++ b/project.4coder @@ -27,9 +27,9 @@ commands = { .out = "*compilation*", .footer_panel = true, .save_dirty_files = true, - .win = "code\\bin\\build.bat", - .linux = "./code/bin/package.sh", - .mac = "code/bin/package-mac.sh", }, + .win = "bash build_new\\scripts\\build.sh", + .linux = "build_new/scripts/build.sh", + .mac = "build_new/scripts/build.sh", }, .run = { .out = "*run*", .footer_panel = false, .save_dirty_files = false, .win = "build\\4ed.exe", .linux = "build/4ed",