From 19dd5af51a3605ba71b43e63b942ed22ae708bfc Mon Sep 17 00:00:00 2001 From: Allen Webster Date: Wed, 9 Mar 2016 21:59:58 -0500 Subject: [PATCH] bug fixes and new lexer started --- 4coder_custom.cpp | 6 +- 4coder_string.h | 8 +- 4cpp_lexer.h | 254 +---------------------------------- 4cpp_lexer_types.h | 256 +++++++++++++++++++++++++++++++++++ 4ed_app_target.cpp | 2 +- 4ed_file_view.cpp | 16 ++- TODO.txt | 5 +- test/4cpp_new_lexer.h | 250 +++++++++++++++++++++++++++++++++++ test/experiment.cpp | 301 ++++++++++++++++++++++++++++++++++++------ 9 files changed, 795 insertions(+), 303 deletions(-) create mode 100644 4cpp_lexer_types.h create mode 100644 test/4cpp_new_lexer.h diff --git a/4coder_custom.cpp b/4coder_custom.cpp index dca8651a..2a022546 100644 --- a/4coder_custom.cpp +++ b/4coder_custom.cpp @@ -445,8 +445,10 @@ isearch(Application_Links *app, int start_reversed){ made_change = 1; } else if (in.key.keycode == key_back){ - --bar.string.size; - made_change = 1; + if (bar.string.size > 0){ + --bar.string.size; + made_change = 1; + } } int step_forward = 0; diff --git a/4coder_string.h b/4coder_string.h index f736dbe7..774c373c 100644 --- a/4coder_string.h +++ b/4coder_string.h @@ -50,6 +50,8 @@ inline bool char_is_slash(char c) { return (c == '\\' || c == '/'); } inline char char_to_upper(char c) { return (c >= 'a' && c <= 'z') ? c + (char)('A' - 'a') : c; } inline char char_to_lower(char c) { return (c >= 'A' && c <= 'Z') ? c - (char)('A' - 'a') : c; } +inline int char_to_int(char c) { return (c - '0'); } +inline char int_to_char(int x) { return (char)(x + '0'); } inline bool char_is_whitespace(char c) { return (c == ' ' || c == '\n' || c == '\r' || c == '\t'); } inline bool char_is_white_not_r(char c) { return (c == ' ' || c == '\n' || c == '\t'); } @@ -173,8 +175,10 @@ FCPP_LINK int compare(String a, String b); FCPP_LINK int reverse_seek_slash(String str); FCPP_LINK int reverse_seek_slash(String str, int start_pos); -inline bool get_front_of_directory(String *dest, String dir) { return append_checked(dest, substr(dir, reverse_seek_slash(dir) + 1)); } -inline bool get_path_of_directory(String *dest, String dir) { return append_checked(dest, substr(dir, 0, reverse_seek_slash(dir) + 1)); } +inline String front_of_directory(String dir) { return substr(dir, reverse_seek_slash(dir) + 1); } +inline String path_of_directory(String dir) { return substr(dir, 0, reverse_seek_slash(dir) + 1); } +inline bool get_front_of_directory(String *dest, String dir) { return append_checked(dest, front_of_directory(dir)); } +inline bool get_path_of_directory(String *dest, String dir) { return append_checked(dest, path_of_directory(dir)); } FCPP_LINK bool set_last_folder(String *dir, char *folder_name, char slash); FCPP_LINK bool set_last_folder(String *dir, String folder_name, char slash); FCPP_LINK String file_extension(String str); diff --git a/4cpp_lexer.h b/4cpp_lexer.h index 837af0c0..115e2a90 100644 --- a/4cpp_lexer.h +++ b/4cpp_lexer.h @@ -63,175 +63,7 @@ NOTES ON USE: #ifndef FCPP_LEXER_INC #define FCPP_LEXER_INC -enum Cpp_Token_Type{ - CPP_TOKEN_JUNK, - CPP_TOKEN_COMMENT, - - CPP_TOKEN_KEY_TYPE, - CPP_TOKEN_KEY_MODIFIER, - CPP_TOKEN_KEY_QUALIFIER, - CPP_TOKEN_KEY_OPERATOR, // NOTE(allen): This type is not actually stored in tokens - CPP_TOKEN_KEY_CONTROL_FLOW, - CPP_TOKEN_KEY_CAST, - CPP_TOKEN_KEY_TYPE_DECLARATION, - CPP_TOKEN_KEY_ACCESS, - CPP_TOKEN_KEY_LINKAGE, - CPP_TOKEN_KEY_OTHER, - - CPP_TOKEN_IDENTIFIER, - CPP_TOKEN_INTEGER_CONSTANT, - CPP_TOKEN_CHARACTER_CONSTANT, - CPP_TOKEN_FLOATING_CONSTANT, - CPP_TOKEN_STRING_CONSTANT, - CPP_TOKEN_BOOLEAN_CONSTANT, - - CPP_TOKEN_STATIC_ASSERT, - - CPP_TOKEN_BRACKET_OPEN, - CPP_TOKEN_BRACKET_CLOSE, - CPP_TOKEN_PARENTHESE_OPEN, - CPP_TOKEN_PARENTHESE_CLOSE, - CPP_TOKEN_BRACE_OPEN, - CPP_TOKEN_BRACE_CLOSE, - CPP_TOKEN_SEMICOLON, - CPP_TOKEN_ELLIPSIS, - - // NOTE(allen): Ambiguous tokens, lexer only, - // parser figures out the real meaning - CPP_TOKEN_STAR, - CPP_TOKEN_AMPERSAND, - CPP_TOKEN_TILDE, - CPP_TOKEN_PLUS, - CPP_TOKEN_MINUS, - CPP_TOKEN_INCREMENT, - CPP_TOKEN_DECREMENT, - - // NOTE(allen): Precedence 1, LtoR - CPP_TOKEN_SCOPE, - - // NOTE(allen): Precedence 2, LtoR - CPP_TOKEN_POSTINC, // from increment, parser only - CPP_TOKEN_POSTDEC, // from decrement, parser only - CPP_TOKEN_FUNC_STYLE_CAST, // parser only - CPP_TOKEN_CPP_STYLE_CAST, - CPP_TOKEN_CALL, // from open paren, parser only - CPP_TOKEN_INDEX, // from bracket open, parser only - CPP_TOKEN_DOT, - CPP_TOKEN_ARROW, - - // NOTE(allen): Precedence 3, RtoL - CPP_TOKEN_PREINC, // from increment, parser only - CPP_TOKEN_PREDEC, // from decrement, parser only - CPP_TOKEN_POSITIVE, // from plus, parser only - CPP_TOKEN_NEGAITVE, // from minus, parser only - CPP_TOKEN_NOT, - CPP_TOKEN_BIT_NOT, // from tilde, direct from 'compl' - CPP_TOKEN_CAST, // from open paren, parser only - CPP_TOKEN_DEREF, // from star, parser only - CPP_TOKEN_TYPE_PTR, // from star, parser only - CPP_TOKEN_ADDRESS, // from ampersand, parser only - CPP_TOKEN_TYPE_REF, // from ampersand, parser only - CPP_TOKEN_SIZEOF, - CPP_TOKEN_ALIGNOF, - CPP_TOKEN_DECLTYPE, - CPP_TOKEN_TYPEID, - CPP_TOKEN_NEW, - CPP_TOKEN_DELETE, - CPP_TOKEN_NEW_ARRAY, // from new and bracket open, parser only - CPP_TOKEN_DELETE_ARRAY, // from delete and bracket open, parser only - - // NOTE(allen): Precedence 4, LtoR - CPP_TOKEN_PTRDOT, - CPP_TOKEN_PTRARROW, - - // NOTE(allen): Precedence 5, LtoR - CPP_TOKEN_MUL, // from start, parser only - CPP_TOKEN_DIV, - CPP_TOKEN_MOD, - - // NOTE(allen): Precedence 6, LtoR - CPP_TOKEN_ADD, // from plus, parser only - CPP_TOKEN_SUB, // from minus, parser only - - // NOTE(allen): Precedence 7, LtoR - CPP_TOKEN_LSHIFT, - CPP_TOKEN_RSHIFT, - - // NOTE(allen): Precedence 8, LtoR - CPP_TOKEN_LESS, - CPP_TOKEN_GRTR, - CPP_TOKEN_GRTREQ, - CPP_TOKEN_LESSEQ, - - // NOTE(allen): Precedence 9, LtoR - CPP_TOKEN_EQEQ, - CPP_TOKEN_NOTEQ, - - // NOTE(allen): Precedence 10, LtoR - CPP_TOKEN_BIT_AND, // from ampersand, direct from 'bitand' - - // NOTE(allen): Precedence 11, LtoR - CPP_TOKEN_BIT_XOR, - - // NOTE(allen): Precedence 12, LtoR - CPP_TOKEN_BIT_OR, - - // NOTE(allen): Precedence 13, LtoR - CPP_TOKEN_AND, - - // NOTE(allen): Precedence 14, LtoR - CPP_TOKEN_OR, - - // NOTE(allen): Precedence 15, RtoL - CPP_TOKEN_TERNARY_QMARK, - CPP_TOKEN_COLON, - CPP_TOKEN_THROW, - CPP_TOKEN_EQ, - CPP_TOKEN_ADDEQ, - CPP_TOKEN_SUBEQ, - CPP_TOKEN_MULEQ, - CPP_TOKEN_DIVEQ, - CPP_TOKEN_MODEQ, - CPP_TOKEN_LSHIFTEQ, - CPP_TOKEN_RSHIFTEQ, - CPP_TOKEN_ANDEQ, - CPP_TOKEN_OREQ, - CPP_TOKEN_XOREQ, - - // NOTE(allen): Precedence 16, LtoR - CPP_TOKEN_COMMA, - - CPP_PP_INCLUDE, - CPP_PP_DEFINE, - CPP_PP_UNDEF, - CPP_PP_IF, - CPP_PP_IFDEF, - CPP_PP_IFNDEF, - CPP_PP_ELSE, - CPP_PP_ELIF, - CPP_PP_ENDIF, - CPP_PP_ERROR, - CPP_PP_IMPORT, - CPP_PP_USING, - CPP_PP_LINE, - CPP_PP_PRAGMA, - CPP_PP_STRINGIFY, - CPP_PP_CONCAT, - CPP_PP_UNKNOWN, - CPP_TOKEN_DEFINED, - CPP_TOKEN_INCLUDE_FILE, - CPP_TOKEN_ERROR_MESSAGE, - - // NOTE(allen): used in the parser - CPP_TOKEN_EOF -}; - -// TODO(allen): This is a dumb redundant type... probably just -// move towards using String for this everywhere eventually. -struct Cpp_File{ - char *data; - int size; -}; +#include "4cpp_lexer_types.h" Cpp_File data_as_cpp_file(Data data){ @@ -241,71 +73,6 @@ data_as_cpp_file(Data data){ return(result); } -struct Cpp_Token{ - Cpp_Token_Type type; - fcpp_i32 start, size; - fcpp_u16 state_flags; - fcpp_u16 flags; -}; - -enum Cpp_Token_Flag{ - CPP_TFLAG_IGNORE = 1 << 0, - CPP_TFLAG_PP_DIRECTIVE = 1 << 1, - CPP_TFLAG_PP_BODY = 1 << 2, - CPP_TFLAG_BAD_ENDING = 1 << 3, - CPP_TFLAG_MULTILINE = 1 << 4, - CPP_TFLAG_PARAMETERIZED = 1 << 5, - CPP_TFLAG_IS_OPERATOR = 1 << 6, - CPP_TFLAG_IS_KEYWORD = 1 << 7 -}; - -enum Cpp_Preprocessor_State{ - CPP_LEX_PP_DEFAULT, - CPP_LEX_PP_IDENTIFIER, - CPP_LEX_PP_MACRO_IDENTIFIER, - CPP_LEX_PP_INCLUDE, - CPP_LEX_PP_BODY, - CPP_LEX_PP_BODY_IF, - CPP_LEX_PP_NUMBER, - CPP_LEX_PP_ERROR, - CPP_LEX_PP_JUNK, - // NEVER ADD BELOW THIS - CPP_LEX_PP_COUNT -}; - -struct Cpp_Lex_Data{ - Cpp_Preprocessor_State pp_state; - fcpp_i32 pos; - fcpp_bool32 complete; -}; - -struct Cpp_Read_Result{ - Cpp_Token token; - fcpp_i32 pos; - fcpp_bool8 newline; - fcpp_bool8 has_result; -}; - -struct Cpp_Token_Stack{ - Cpp_Token *tokens; - int count, max_count; -}; - -struct Cpp_Token_Merge{ - Cpp_Token new_token; - fcpp_bool32 did_merge; -}; - -struct Seek_Result{ - fcpp_i32 pos; - fcpp_bool32 new_line; -}; - -struct Cpp_Get_Token_Result{ - fcpp_i32 token_index; - fcpp_bool32 in_whitespace; -}; - // TODO(allen): revisit this keyword data declaration system struct String_And_Flag{ char *str; @@ -383,17 +150,6 @@ FCPP_LINK Cpp_Get_Token_Result cpp_get_token(Cpp_Token_Stack *stack, int pos); FCPP_LINK int cpp_get_end_token(Cpp_Token_Stack *stack, int end); FCPP_LINK void cpp_shift_token_starts(Cpp_Token_Stack *stack, int from_token, int amount); -struct Cpp_Relex_State{ - Cpp_File file; - Cpp_Token_Stack *stack; - int start, end, amount; - int start_token_i; - int end_token_i; - int relex_start; - int tolerance; - int space_request; -}; - FCPP_LINK Cpp_Relex_State cpp_relex_nonalloc_start(Cpp_File file, Cpp_Token_Stack *stack, int start, int end, int amount, int tolerance); FCPP_LINK bool cpp_relex_nonalloc_main(Cpp_Relex_State state, Cpp_Token_Stack *stack); @@ -1614,14 +1370,6 @@ cpp_get_token(Cpp_Token_Stack *token_stack, int pos){ return result; } -FCPP_LINK int -cpp_get_end_token(Cpp_Token_Stack *stack, int end){ - Cpp_Get_Token_Result result = cpp_get_token(stack, end); - if (result.token_index < 0) result.token_index = 0; - else if (end > stack->tokens[result.token_index].start) ++result.token_index; - return result.token_index; -} - FCPP_LINK void cpp_shift_token_starts(Cpp_Token_Stack *stack, int from_token_i, int amount){ int count = stack->count; diff --git a/4cpp_lexer_types.h b/4cpp_lexer_types.h new file mode 100644 index 00000000..65c2043a --- /dev/null +++ b/4cpp_lexer_types.h @@ -0,0 +1,256 @@ + +// TOP + +#ifndef FCPP_LEXER_TYPES_INC +#define FCPP_LEXER_TYPES_INC + +enum Cpp_Token_Type{ + CPP_TOKEN_JUNK, + CPP_TOKEN_COMMENT, + + CPP_TOKEN_KEY_TYPE, + CPP_TOKEN_KEY_MODIFIER, + CPP_TOKEN_KEY_QUALIFIER, + CPP_TOKEN_KEY_OPERATOR, // NOTE(allen): This type is not actually stored in tokens + CPP_TOKEN_KEY_CONTROL_FLOW, + CPP_TOKEN_KEY_CAST, + CPP_TOKEN_KEY_TYPE_DECLARATION, + CPP_TOKEN_KEY_ACCESS, + CPP_TOKEN_KEY_LINKAGE, + CPP_TOKEN_KEY_OTHER, + + CPP_TOKEN_IDENTIFIER, + CPP_TOKEN_INTEGER_CONSTANT, + CPP_TOKEN_CHARACTER_CONSTANT, + CPP_TOKEN_FLOATING_CONSTANT, + CPP_TOKEN_STRING_CONSTANT, + CPP_TOKEN_BOOLEAN_CONSTANT, + + CPP_TOKEN_STATIC_ASSERT, + + CPP_TOKEN_BRACKET_OPEN, + CPP_TOKEN_BRACKET_CLOSE, + CPP_TOKEN_PARENTHESE_OPEN, + CPP_TOKEN_PARENTHESE_CLOSE, + CPP_TOKEN_BRACE_OPEN, + CPP_TOKEN_BRACE_CLOSE, + CPP_TOKEN_SEMICOLON, + CPP_TOKEN_ELLIPSIS, + + // NOTE(allen): Ambiguous tokens, lexer only, + // parser figures out the real meaning + CPP_TOKEN_STAR, + CPP_TOKEN_AMPERSAND, + CPP_TOKEN_TILDE, + CPP_TOKEN_PLUS, + CPP_TOKEN_MINUS, + CPP_TOKEN_INCREMENT, + CPP_TOKEN_DECREMENT, + + // NOTE(allen): Precedence 1, LtoR + CPP_TOKEN_SCOPE, + + // NOTE(allen): Precedence 2, LtoR + CPP_TOKEN_POSTINC, // from increment, parser only + CPP_TOKEN_POSTDEC, // from decrement, parser only + CPP_TOKEN_FUNC_STYLE_CAST, // parser only + CPP_TOKEN_CPP_STYLE_CAST, + CPP_TOKEN_CALL, // from open paren, parser only + CPP_TOKEN_INDEX, // from bracket open, parser only + CPP_TOKEN_DOT, + CPP_TOKEN_ARROW, + + // NOTE(allen): Precedence 3, RtoL + CPP_TOKEN_PREINC, // from increment, parser only + CPP_TOKEN_PREDEC, // from decrement, parser only + CPP_TOKEN_POSITIVE, // from plus, parser only + CPP_TOKEN_NEGAITVE, // from minus, parser only + CPP_TOKEN_NOT, + CPP_TOKEN_BIT_NOT, // from tilde, direct from 'compl' + CPP_TOKEN_CAST, // from open paren, parser only + CPP_TOKEN_DEREF, // from star, parser only + CPP_TOKEN_TYPE_PTR, // from star, parser only + CPP_TOKEN_ADDRESS, // from ampersand, parser only + CPP_TOKEN_TYPE_REF, // from ampersand, parser only + CPP_TOKEN_SIZEOF, + CPP_TOKEN_ALIGNOF, + CPP_TOKEN_DECLTYPE, + CPP_TOKEN_TYPEID, + CPP_TOKEN_NEW, + CPP_TOKEN_DELETE, + CPP_TOKEN_NEW_ARRAY, // from new and bracket open, parser only + CPP_TOKEN_DELETE_ARRAY, // from delete and bracket open, parser only + + // NOTE(allen): Precedence 4, LtoR + CPP_TOKEN_PTRDOT, + CPP_TOKEN_PTRARROW, + + // NOTE(allen): Precedence 5, LtoR + CPP_TOKEN_MUL, // from start, parser only + CPP_TOKEN_DIV, + CPP_TOKEN_MOD, + + // NOTE(allen): Precedence 6, LtoR + CPP_TOKEN_ADD, // from plus, parser only + CPP_TOKEN_SUB, // from minus, parser only + + // NOTE(allen): Precedence 7, LtoR + CPP_TOKEN_LSHIFT, + CPP_TOKEN_RSHIFT, + + // NOTE(allen): Precedence 8, LtoR + CPP_TOKEN_LESS, + CPP_TOKEN_GRTR, + CPP_TOKEN_GRTREQ, + CPP_TOKEN_LESSEQ, + + // NOTE(allen): Precedence 9, LtoR + CPP_TOKEN_EQEQ, + CPP_TOKEN_NOTEQ, + + // NOTE(allen): Precedence 10, LtoR + CPP_TOKEN_BIT_AND, // from ampersand, direct from 'bitand' + + // NOTE(allen): Precedence 11, LtoR + CPP_TOKEN_BIT_XOR, + + // NOTE(allen): Precedence 12, LtoR + CPP_TOKEN_BIT_OR, + + // NOTE(allen): Precedence 13, LtoR + CPP_TOKEN_AND, + + // NOTE(allen): Precedence 14, LtoR + CPP_TOKEN_OR, + + // NOTE(allen): Precedence 15, RtoL + CPP_TOKEN_TERNARY_QMARK, + CPP_TOKEN_COLON, + CPP_TOKEN_THROW, + CPP_TOKEN_EQ, + CPP_TOKEN_ADDEQ, + CPP_TOKEN_SUBEQ, + CPP_TOKEN_MULEQ, + CPP_TOKEN_DIVEQ, + CPP_TOKEN_MODEQ, + CPP_TOKEN_LSHIFTEQ, + CPP_TOKEN_RSHIFTEQ, + CPP_TOKEN_ANDEQ, + CPP_TOKEN_OREQ, + CPP_TOKEN_XOREQ, + + // NOTE(allen): Precedence 16, LtoR + CPP_TOKEN_COMMA, + + CPP_PP_INCLUDE, + CPP_PP_DEFINE, + CPP_PP_UNDEF, + CPP_PP_IF, + CPP_PP_IFDEF, + CPP_PP_IFNDEF, + CPP_PP_ELSE, + CPP_PP_ELIF, + CPP_PP_ENDIF, + CPP_PP_ERROR, + CPP_PP_IMPORT, + CPP_PP_USING, + CPP_PP_LINE, + CPP_PP_PRAGMA, + CPP_PP_STRINGIFY, + CPP_PP_CONCAT, + CPP_PP_UNKNOWN, + CPP_TOKEN_DEFINED, + CPP_TOKEN_INCLUDE_FILE, + CPP_TOKEN_ERROR_MESSAGE, + + // NOTE(allen): used in the parser + CPP_TOKEN_EOF +}; + +// TODO(allen): This is a dumb redundant type... probably just +// move towards using String for this everywhere eventually. +struct Cpp_File{ + char *data; + int size; +}; + +struct Cpp_Token{ + Cpp_Token_Type type; + fcpp_i32 start, size; + fcpp_u16 state_flags; + fcpp_u16 flags; +}; + +enum Cpp_Token_Flag{ + CPP_TFLAG_IGNORE = 1 << 0, + CPP_TFLAG_PP_DIRECTIVE = 1 << 1, + CPP_TFLAG_PP_BODY = 1 << 2, + CPP_TFLAG_BAD_ENDING = 1 << 3, + CPP_TFLAG_MULTILINE = 1 << 4, + CPP_TFLAG_PARAMETERIZED = 1 << 5, + CPP_TFLAG_IS_OPERATOR = 1 << 6, + CPP_TFLAG_IS_KEYWORD = 1 << 7 +}; + +enum Cpp_Preprocessor_State{ + CPP_LEX_PP_DEFAULT, + CPP_LEX_PP_IDENTIFIER, + CPP_LEX_PP_MACRO_IDENTIFIER, + CPP_LEX_PP_INCLUDE, + CPP_LEX_PP_BODY, + CPP_LEX_PP_BODY_IF, + CPP_LEX_PP_NUMBER, + CPP_LEX_PP_ERROR, + CPP_LEX_PP_JUNK, + // NEVER ADD BELOW THIS + CPP_LEX_PP_COUNT +}; + +struct Cpp_Lex_Data{ + Cpp_Preprocessor_State pp_state; + fcpp_i32 pos; + fcpp_bool32 complete; +}; + +struct Cpp_Read_Result{ + Cpp_Token token; + fcpp_i32 pos; + fcpp_bool8 newline; + fcpp_bool8 has_result; +}; + +struct Cpp_Token_Stack{ + Cpp_Token *tokens; + int count, max_count; +}; + +struct Cpp_Token_Merge{ + Cpp_Token new_token; + fcpp_bool32 did_merge; +}; + +struct Seek_Result{ + fcpp_i32 pos; + fcpp_bool32 new_line; +}; + +struct Cpp_Get_Token_Result{ + fcpp_i32 token_index; + fcpp_bool32 in_whitespace; +}; + +struct Cpp_Relex_State{ + Cpp_File file; + Cpp_Token_Stack *stack; + int start, end, amount; + int start_token_i; + int end_token_i; + int relex_start; + int tolerance; + int space_request; +}; + +#endif + +// BOTTOM + diff --git a/4ed_app_target.cpp b/4ed_app_target.cpp index 26d1958b..fe8f3273 100644 --- a/4ed_app_target.cpp +++ b/4ed_app_target.cpp @@ -32,7 +32,7 @@ #include "4tech_table.cpp" #define FCPP_LEXER_IMPLEMENTATION -#include "4cpp_lexer.h" +#include "test/4cpp_new_lexer.h" #include "4ed_template.cpp" diff --git a/4ed_file_view.cpp b/4ed_file_view.cpp index f342f3bc..1afa9a04 100644 --- a/4ed_file_view.cpp +++ b/4ed_file_view.cpp @@ -660,13 +660,13 @@ Job_Callback_Sig(job_full_lex){ tokens.count = 0; Cpp_Lex_Data status; - status = cpp_lex_file_nonalloc(cpp_file, &tokens); + status = cpp_lex_nonalloc(cpp_file, &tokens); while (!status.complete){ system->grow_thread_memory(memory); tokens.tokens = (Cpp_Token*)memory->data; tokens.max_count = memory->size / sizeof(Cpp_Token); - status = cpp_lex_file_nonalloc(cpp_file, &tokens, status); + status = cpp_lex_nonalloc(cpp_file, &tokens, status); } i32 new_max = LargeRoundUp(tokens.count+1, Kbytes(1)); @@ -810,11 +810,17 @@ file_relex_parallel(System_Functions *system, } if (!inline_lex){ - i32 end_token_i = cpp_get_end_token(&file->state.token_stack, end_i); - cpp_shift_token_starts(&file->state.token_stack, end_token_i, amount); + Cpp_Token_Stack *stack = &file->state.token_stack; + Cpp_Get_Token_Result get_token_result = cpp_get_token(stack, end_i); + i32 end_token_i = get_token_result.token_index; + + if (end_token_i < 0) end_token_i = 0; + else if (end_i > stack->tokens[end_token_i].start) ++end_token_i; + + cpp_shift_token_starts(stack, end_token_i, amount); --end_token_i; if (end_token_i >= 0){ - Cpp_Token *token = file->state.token_stack.tokens + end_token_i; + Cpp_Token *token = stack->tokens + end_token_i; if (token->start < end_i && token->start + token->size > end_i){ token->size += amount; } diff --git a/TODO.txt b/TODO.txt index 0ca6a213..22e1380e 100644 --- a/TODO.txt +++ b/TODO.txt @@ -66,8 +66,8 @@ ; [] tab character wrong width ; [] bouncing when scrolling down ; [] miblo's off screen cursor thing -; [] fyoucon's segfaults with malloc on win10 -; [] open empty file bug +; +; [] open empty file bug ~ possibly a win10 issue? ; ; @@ -166,6 +166,7 @@ ; ; HARD BUGS +; [] fyoucon's segfaults with malloc on win10 ; [] repainting too slow for resize looks really dumb ; [] handling cursor in non-client part of window so it doesn't spaz ; [] fill screen right away diff --git a/test/4cpp_new_lexer.h b/test/4cpp_new_lexer.h new file mode 100644 index 00000000..4c8ce82f --- /dev/null +++ b/test/4cpp_new_lexer.h @@ -0,0 +1,250 @@ + +// TOP + +#ifndef FCPP_NEW_LEXER_INC +#define FCPP_NEW_LEXER_INC + +#include "../4cpp_lexer_types.h" + +#define lexer_link static + + +lexer_link Cpp_Get_Token_Result +cpp_get_token(Cpp_Token_Stack *token_stack, int pos){ + Cpp_Get_Token_Result result = {}; + Cpp_Token *token_array = token_stack->tokens; + Cpp_Token *token = 0; + int first = 0; + int count = token_stack->count; + int last = count; + int this_start = 0, next_start = 0; + + if (count > 0){ + for (;;){ + result.token_index = (first + last)/2; + token = token_array + result.token_index; + + this_start = token->start; + + if (result.token_index + 1 < count){ + next_start = (token + 1)->start; + } + else{ + next_start = this_start + token->size; + } + if (this_start <= pos && pos < next_start){ + break; + } + else if (pos < this_start){ + last = result.token_index; + } + else{ + first = result.token_index + 1; + } + if (first == last){ + result.token_index = first; + break; + } + } + + if (result.token_index == count){ + --result.token_index; + result.in_whitespace = 1; + } + else{ + if (token->start + token->size <= pos){ + result.in_whitespace = 1; + } + } + } + else{ + result.token_index = -1; + result.in_whitespace = 1; + } + + return(result); +} + +lexer_link void +cpp_shift_token_starts(Cpp_Token_Stack *stack, int from_token_i, int shift_amount){ + Cpp_Token *token = stack->tokens + from_token_i; + int count = stack->count, i; + + for (i = from_token_i; i < count; ++i, ++token){ + token->start += shift_amount; + } +} + +enum Lex_State{ + LS_default, + LS_comment_pre, + LS_comment, + LS_comment_block, + LS_comment_block_ending, + LS_dot, + LS_less, + LS_more, +}; + +struct Lex_Data{ + int token_start; + int token_end; + int completed; +}; + +lexer_link Lex_Data +cpp_lex_nonalloc(char *chunk, int file_absolute_pos, int size, Cpp_Token_Stack *token_stack_out){ + Cpp_Token *out_tokens = token_stack_out->tokens; + int token_i = token_stack_out->count; + int max_token_i = token_stack_out->max_count; + + Cpp_Token token = {}; + + int pos = file_absolute_pos; + int end_pos = size + file_absolute_pos; + unsigned short state = LS_default; + unsigned short pp_state = 0; + + Lex_Data lex_data = {}; + + int emit_token = 0; + + char c; + + chunk -= file_absolute_pos; + + for (; pos < end_pos && token_i < max_token_i; ++pos){ + for (; pos < end_pos;){ + c = chunk[pos++]; + if (!(c == ' ' || c == '\n' || c == '\t' || c == '\r' || c == '\f' || c == '\v')) break; + } + + --pos; + lex_data.token_start = pos; + + state = LS_default; + emit_token = 0; + for (; emit_token == 0 && pos < end_pos;){ + c = chunk[pos++]; + + switch (state){ + case LS_default: + switch (c){ + case '/': state = LS_comment_pre; break; + +#define OperCase(op,type) case op: emit_token = 1; break; + OperCase('{', CPP_TOKEN_BRACE_OPEN); + OperCase('}', CPP_TOKEN_BRACE_CLOSE); + + OperCase('[', CPP_TOKEN_BRACKET_OPEN); + OperCase(']', CPP_TOKEN_BRACKET_CLOSE); + + OperCase('(', CPP_TOKEN_PARENTHESE_OPEN); + OperCase(')', CPP_TOKEN_PARENTHESE_CLOSE); + + OperCase('~', CPP_TOKEN_TILDE); + OperCase(',', CPP_TOKEN_COMMA); + OperCase('?', CPP_TOKEN_TERNARY_QMARK); +#undef OperCase + +#if 0 + case '.': state = LS_dot; break; + case '<': state = LS_less; break; + case '>': state = LS_more; break; +#endif + } + break; + + case LS_dot: + break; + + case LS_less: + break; + + case LS_more: + break; + + case LS_comment_pre: + switch (c){ + case '/': state = LS_comment; break; + case '*': state = LS_comment_block; break; + } + break; + + case LS_comment: + switch (c){ + case '\n': emit_token = 1; break; + } + break; + + case LS_comment_block: + switch (c){ + case '*': state = LS_comment_block_ending; break; + } + break; + + case LS_comment_block_ending: + switch (c){ + case '*': state = LS_comment_block_ending; break; + case '/': emit_token = 1; break; + default: state = LS_comment_block; break; + } + break; + } + } + + if (emit_token){ + --pos; + lex_data.token_end = pos; + + switch (state){ + case LS_default: + switch (chunk[pos]){ +#define OperCase(op,t) case op: token.type = t; break; + OperCase('{', CPP_TOKEN_BRACE_OPEN); + OperCase('}', CPP_TOKEN_BRACE_CLOSE); + + OperCase('[', CPP_TOKEN_BRACKET_OPEN); + OperCase(']', CPP_TOKEN_BRACKET_CLOSE); + + OperCase('(', CPP_TOKEN_PARENTHESE_OPEN); + OperCase(')', CPP_TOKEN_PARENTHESE_CLOSE); + + OperCase('~', CPP_TOKEN_TILDE); + OperCase(',', CPP_TOKEN_COMMA); + OperCase('?', CPP_TOKEN_TERNARY_QMARK); +#undef OperCase + } + token.flags = CPP_TFLAG_IS_OPERATOR; + break; + + case LS_comment: case LS_comment_block_ending: + token.type = CPP_TOKEN_COMMENT; + token.flags = 0; + c = chunk[pos]; + while (c == ' ' || c == '\n' || c == '\t' || c == '\r' || c == '\v' || c == '\f'){ + --pos; + c = chunk[pos]; + } + ++pos; + break; + } + + token.start = lex_data.token_start; + token.size = pos - lex_data.token_start; + token.state_flags = pp_state; + out_tokens[token_i++] = token; + + pos = lex_data.token_end; + } + } + + token_stack_out->count = token_i; + + if (pos == end_pos) lex_data.completed = 1; + return(lex_data); +} + +#endif + +// BOTTOM diff --git a/test/experiment.cpp b/test/experiment.cpp index c9a27063..af1bb1ce 100644 --- a/test/experiment.cpp +++ b/test/experiment.cpp @@ -15,57 +15,282 @@ #include "../4coder_string.h" #include "../4cpp_types.h" + +#include "../4cpp_lexer_types.h" + #define FCPP_LEXER_IMPLEMENTATION #include "../4cpp_lexer.h" -#include "../4cpp_preprocessor.cpp" + +namespace new_lex{ +#include "4cpp_new_lexer.h" +} + +#include #include #include -Data -file_dump(char *filename){ - Data result; - FILE *file; - result = {}; - file = fopen(filename, "rb"); - if (file){ - fseek(file, 0, SEEK_END); - result.size = ftell(file); - fseek(file, 0, SEEK_SET); - result.data = (byte*)malloc(result.size); - fread(result.data, 1, result.size, file); - fclose(file); +static Data +dump_file(char *filename){ + Data data = {}; + HANDLE file; + DWORD hi, lo; + + file = CreateFile(filename, GENERIC_READ, 0, 0, + OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0); + + if (file != INVALID_HANDLE_VALUE){ + lo = GetFileSize(file, &hi); + assert(hi == 0); + + data.size = (int)lo; + data.data = (byte*)malloc(data.size + 1); + + ReadFile(file, data.data, lo, &lo, 0); + + assert((int)lo == data.size); + + CloseHandle(file); } - return(result); + + return(data); } -int main(int argc, char **argv){ - Data target_file; - Cpp_File file; - Cpp_Token_Stack tokens; - Cpp_Token *token; - int i; +typedef struct File_Info{ + String filename; + int folder; +} File_Info; + +typedef struct File_List{ + // Ignore this, it's for internal stuff. + void *block; - if (argc != 2){ - printf("usage: %s \n", argv[0]); - exit(1); + // The list of files and folders. + File_Info *infos; + int count; + + // Ignore this, it's for internal stuff. + int block_size; +} File_List; + +void* +Win32GetMemory(int size){ + return (malloc(size)); +} + +void +Win32FreeMemory(void *ptr){ + free(ptr); +} + +static void +system_set_file_list(File_List *file_list, String directory){ + if (directory.size > 0){ + char dir_space[MAX_PATH + 32]; + String dir = make_string(dir_space, 0, MAX_PATH + 32); + append(&dir, directory); + char trail_str[] = "\\*"; + append(&dir, trail_str); + + char *c_str_dir = make_c_str(dir); + + WIN32_FIND_DATA find_data; + HANDLE search; + search = FindFirstFileA(c_str_dir, &find_data); + + if (search != INVALID_HANDLE_VALUE){ + i32 count = 0; + i32 file_count = 0; + BOOL more_files = 1; + do{ + if (!match(find_data.cFileName, ".") && + !match(find_data.cFileName, "..")){ + ++file_count; + i32 size = 0; + for(;find_data.cFileName[size];++size); + count += size + 1; + } + more_files = FindNextFile(search, &find_data); + }while(more_files); + FindClose(search); + + i32 required_size = count + file_count * sizeof(File_Info); + if (file_list->block_size < required_size){ + Win32FreeMemory(file_list->block); + file_list->block = Win32GetMemory(required_size); + file_list->block_size = required_size; + } + + file_list->infos = (File_Info*)file_list->block; + char *name = (char*)(file_list->infos + file_count); + if (file_list->block){ + search = FindFirstFileA(c_str_dir, &find_data); + + if (search != INVALID_HANDLE_VALUE){ + File_Info *info = file_list->infos; + more_files = 1; + do{ + if (!match(find_data.cFileName, ".") && + !match(find_data.cFileName, "..")){ + info->folder = (find_data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0; + info->filename.str = name; + + i32 i = 0; + for(;find_data.cFileName[i];++i) *name++ = find_data.cFileName[i]; + info->filename.size = i; + info->filename.memory_size = info->filename.size + 1; + *name++ = 0; + replace_char(info->filename, '\\', '/'); + ++info; + } + more_files = FindNextFile(search, &find_data); + }while(more_files); + FindClose(search); + + file_list->count = file_count; + + }else{ + Win32FreeMemory(file_list->block); + file_list->block = 0; + file_list->block_size = 0; + } + } + } } - - target_file = file_dump(argv[1]); - if (target_file.data == 0){ - printf("couldn't open file %s\n", argv[1]); - exit(1); + else{ + if (directory.str == 0){ + Win32FreeMemory(file_list->block); + file_list->block = 0; + file_list->block_size = 0; + } + file_list->infos = 0; + file_list->count = 0; } - - tokens = cpp_make_token_stack(1 << 10); - - file = data_as_cpp_file(target_file); - cpp_lex_file(file, &tokens); - - token = tokens.tokens; - for (i = 0; i < tokens.count; ++i, ++token){ - printf("%.*s\n", token->size, file.data + token->start); +} + +#define TOKEN_MAX (1 << 12) +#define TOKEN_ARRAY_SIZE (TOKEN_MAX*sizeof(Cpp_Token)) + +static void +init_test_stack(Cpp_Token_Stack *stack){ + stack->tokens = (Cpp_Token*)malloc(TOKEN_ARRAY_SIZE); + stack->count = 0; + stack->max_count = TOKEN_MAX; +} + +Cpp_Lex_Data lex_data = {}; + +struct Experiment{ + Cpp_Token_Stack correct_stack; + Cpp_Token_Stack testing_stack; + int passed_total, test_total; +}; + +static void +run_experiment(Experiment *exp, char *filename){ + String extension = {}; + Data file_data; + Cpp_File file_cpp; + int pass; + + extension = file_extension(make_string_slowly(filename)); + + if (match(extension, "cpp") || match(extension, "h")){ + + pass = 1; + printf("testing on file: %s\n", filename); + file_data = dump_file(filename); + + if (file_data.size < (100 << 10)){ + exp->test_total++; + + exp->correct_stack.count = 0; + exp->testing_stack.count = 0; + + memset(exp->correct_stack.tokens, TOKEN_ARRAY_SIZE, 0); + memset(exp->testing_stack.tokens, TOKEN_ARRAY_SIZE, 0); + + file_cpp.data = (char*)file_data.data; + file_cpp.size = file_data.size; + + cpp_lex_file_nonalloc(file_cpp, &exp->correct_stack, lex_data); + new_lex::cpp_lex_nonalloc((char*)file_data.data, 0, file_data.size, &exp->testing_stack); + + if (exp->correct_stack.count != exp->testing_stack.count){ + pass = 0; + printf("error: stack size mismatch %d original and %d testing\n", + exp->correct_stack.count, exp->testing_stack.count); + } + + int min_count = exp->correct_stack.count; + if (min_count > exp->testing_stack.count) min_count = exp->testing_stack.count; + + for (int j = 0; j < min_count; ++j){ + Cpp_Token *correct, *testing; + correct = exp->correct_stack.tokens + j; + testing = exp->testing_stack.tokens + j; + + if (correct->type != testing->type){ + pass = 0; + printf("type mismatch at token %d\n", j); + } + + if (correct->start != testing->start || correct->size != testing->size){ + pass = 0; + printf("token range mismatch at token %d\n" + "\t%d:%d original %d:%d testing\n" + "\t%.*s original %.*s testing\n", + j, + correct->start, correct->size, testing->start, testing->size, + correct->size, file_cpp.data + correct->start, + testing->size, file_cpp.data + testing->start); + } + + if (correct->flags != testing->flags){ + pass = 0; + printf("token flag mismatch at token %d\n", j); + } + } + + if (pass){ + exp->passed_total++; + printf("test passed!\n\n"); + } + else{ + printf("test failed, you failed, fix it now!\n\n"); + } + } + + free(file_data.data); } +} + +#define BASE_DIR "w:/4ed/data/test/" + +int main(){ + char test_directory[] = BASE_DIR; + File_List all_files = {}; + Experiment exp = {}; + + init_test_stack(&exp.correct_stack); + init_test_stack(&exp.testing_stack); + + AllowLocal(test_directory); + AllowLocal(all_files); + + run_experiment(&exp, BASE_DIR "autotab.cpp"); + +#if 0 + system_set_file_list(&all_files, make_lit_string(test_directory)); + + for (int i = 0; i < all_files.count; ++i){ + if (all_files.infos[i].folder == 0){ + run_experiment(&exp, all_files.infos[i].filename.str); + } + } +#endif + + printf("you passed %d / %d tests\n", exp.passed_total, exp.test_total); return(0); }