added clparams to app step on linux; got the lexer API working with a fixed with tb
This commit is contained in:
parent
b9536768f7
commit
1b03a6a2be
|
@ -292,7 +292,7 @@ It should point at the String in the first element of the array.</div></div></di
|
|||
words it should be the size of one element of the array.</div></div></div><div><div style='font-weight: 600;'>count</div><div style='margin-bottom: 6mm;'><div style='margin-left: 5mm; margin-right: 5mm;'>The count parameter specifies the number of elements in the str_set array.</div></div></div><div><div style='font-weight: 600;'>str</div><div style='margin-bottom: 6mm;'><div style='margin-left: 5mm; margin-right: 5mm;'>The str parameter specifies the string to match against the str_set.</div></div></div><div><div style='font-weight: 600;'>match_index</div><div style='margin-bottom: 6mm;'><div style='margin-left: 5mm; margin-right: 5mm;'>If this call succeeds match_index is filled with the index into str_set where the match occurred.</div></div></div><div style='margin-top: 3mm; margin-bottom: 3mm; color: #309030;'><b><i>Description</i></b></div><div style='margin-left: 5mm; margin-right: 5mm;'>This call tries to see if str matches any of the strings in str_set. If there is a match the call
|
||||
succeeds and returns non-zero. The matching rule is equivalent to the matching rule for match.<br><br></div><div style='margin-top: 3mm; margin-bottom: 3mm; color: #309030;'><b><i>See Also</i></b></div><div style='margin-left: 5mm; margin-right: 5mm;'><a href='#match_doc'>match</a></div></div></div><hr><div id='string_set_match_doc' style='margin-bottom: 1cm;'><h4>§4.3.116: string_set_match</h4><div style='font-family: "Courier New", Courier, monospace; text-align: left; margin-top: 3mm; margin-bottom: 3mm; font-size: .95em; background: #DFDFDF; padding: 0.25em;'>fstr_bool string_set_match(<div style='margin-left: 4mm;'>String *str_set,<br>int32_t count,<br>String str,<br>int32_t *match_index<br></div>)</div><div style='margin-top: 3mm; margin-bottom: 3mm; color: #309030;'><b><i>Parameters</i></b></div><div><div style='font-weight: 600;'>str_set</div><div style='margin-bottom: 6mm;'><div style='margin-left: 5mm; margin-right: 5mm;'>The str_set parameter is an array of String structs specifying matchable strings.</div></div></div><div><div style='font-weight: 600;'>count</div><div style='margin-bottom: 6mm;'><div style='margin-left: 5mm; margin-right: 5mm;'>The count parameter specifies the number of String structs in the str_set array.</div></div></div><div><div style='font-weight: 600;'>str</div><div style='margin-bottom: 6mm;'><div style='margin-left: 5mm; margin-right: 5mm;'>The str parameter specifies the string to match against the str_set.</div></div></div><div><div style='font-weight: 600;'>match_index</div><div style='margin-bottom: 6mm;'><div style='margin-left: 5mm; margin-right: 5mm;'>If this call succeeds match_index is filled with the index into str_set where the match occurred.</div></div></div><div style='margin-top: 3mm; margin-bottom: 3mm; color: #309030;'><b><i>Description</i></b></div><div style='margin-left: 5mm; margin-right: 5mm;'>This call tries to see if str matches any of the strings in str_set. If there is a match the call
|
||||
succeeds and returns non-zero. The matching rule is equivalent to the matching rule for match.<br><br></div><div style='margin-top: 3mm; margin-bottom: 3mm; color: #309030;'><b><i>See Also</i></b></div><div style='margin-left: 5mm; margin-right: 5mm;'><a href='#match_doc'>match</a></div></div><hr>
|
||||
<h2 id='section_lexer_library'>§5 Lexer Library</h2><h3>§5.1 Lexer Intro</h3><div>The 4cpp lexer system provides a polished, fast, flexible system that takes in C/C++ and outputs a tokenization of the text data. There are two API levels. One level is setup to let you easily get a tokenization of the file. This level manages memory for you with malloc to make it as fast as possible to start getting your tokens. The second level enables deep integration by allowing control over allocation, data chunking, and output rate control.<br><br>To use the quick setup API you simply include 4cpp_lexer.h and read the documentation at <a href='#cpp_lex_file_doc'>cpp_lex_file</a>.<br><br>To use the the fancier API include 4cpp_lexer.h and read the documentation at <a href='#cpp_lex_step_doc'>cpp_lex_step</a>. If you want to be absolutely sure you are not including malloc into your program you can define FCPP_FORBID_MALLOC before the include and the "step" API will continue to work.<br><br>There are a few more features in 4cpp that are not documented yet. You are free to try to use these, but I am not totally sure they are ready yet, and when they are they will be documented.</div><h3>§5.2 Lexer Function List</h3><ul><li><a href='#cpp_get_token_doc'>cpp_get_token</a></li><li><a href='#cpp_lex_step_doc'>cpp_lex_step</a></li><li><a href='#cpp_lex_data_init_doc'>cpp_lex_data_init</a></li><li><a href='#cpp_lex_data_temp_size_doc'>cpp_lex_data_temp_size</a></li><li><a href='#cpp_lex_data_temp_read_doc'>cpp_lex_data_temp_read</a></li><li><a href='#cpp_lex_data_new_temp_doc'>cpp_lex_data_new_temp</a></li><li><a href='#cpp_get_relex_range_doc'>cpp_get_relex_range</a></li><li><a href='#cpp_relex_init_doc'>cpp_relex_init</a></li><li><a href='#cpp_relex_start_position_doc'>cpp_relex_start_position</a></li><li><a href='#cpp_relex_declare_first_chunk_position_doc'>cpp_relex_declare_first_chunk_position</a></li><li><a href='#cpp_relex_is_start_chunk_doc'>cpp_relex_is_start_chunk</a></li><li><a href='#cpp_relex_step_doc'>cpp_relex_step</a></li><li><a href='#cpp_relex_get_new_count_doc'>cpp_relex_get_new_count</a></li><li><a href='#cpp_relex_complete_doc'>cpp_relex_complete</a></li><li><a href='#cpp_relex_abort_doc'>cpp_relex_abort</a></li><li><a href='#cpp_make_token_array_doc'>cpp_make_token_array</a></li><li><a href='#cpp_free_token_array_doc'>cpp_free_token_array</a></li><li><a href='#cpp_resize_token_array_doc'>cpp_resize_token_array</a></li><li><a href='#cpp_lex_file_doc'>cpp_lex_file</a></li></ul><h3>§5.3 Lexer Types List</h3><ul><li><a href='#Cpp_Token_Type_doc'>Cpp_Token_Type</a></li><li><a href='#Cpp_Token_doc'>Cpp_Token</a></li><li><a href='#Cpp_Token_Flag_doc'>Cpp_Token_Flag</a></li><li><a href='#Cpp_Token_Array_doc'>Cpp_Token_Array</a></li><li><a href='#Cpp_Get_Token_Result_doc'>Cpp_Get_Token_Result</a></li><li><a href='#Cpp_Relex_Range_doc'>Cpp_Relex_Range</a></li><li><a href='#Cpp_Lex_Data_doc'>Cpp_Lex_Data</a></li><li><a href='#Cpp_Lex_Result_doc'>Cpp_Lex_Result</a></li><li><a href='#Cpp_Relex_Data_doc'>Cpp_Relex_Data</a></li></ul><h3>§5.4 Lexer Function Descriptions</h3><div id='cpp_get_token_doc' style='margin-bottom: 1cm;'><h4>§5.4.1: cpp_get_token</h4><div style='font-family: "Courier New", Courier, monospace; text-align: left; margin-top: 3mm; margin-bottom: 3mm; font-size: .95em; background: #DFDFDF; padding: 0.25em;'>Cpp_Get_Token_Result cpp_get_token(<div style='margin-left: 4mm;'>Cpp_Token_Array *token_array_in,<br>int32_t pos<br></div>)</div><div style='margin-top: 3mm; margin-bottom: 3mm; color: #309030;'><b><i>Parameters</i></b></div><div><div style='font-weight: 600;'>token_array</div><div style='margin-bottom: 6mm;'><div style='margin-left: 5mm; margin-right: 5mm;'>The array of tokens from which to get a token.</div></div></div><div><div style='font-weight: 600;'>pos</div><div style='margin-bottom: 6mm;'><div style='margin-left: 5mm; margin-right: 5mm;'>The position, measured in bytes, to get the token for.</div></div></div><div style='margin-top: 3mm; margin-bottom: 3mm; color: #309030;'><b><i>Return</i></b></div><div style='margin-left: 5mm; margin-right: 5mm;'>A Cpp_Get_Token_Result struct is returned containing the index
|
||||
<h2 id='section_lexer_library'>§5 Lexer Library</h2><h3>§5.1 Lexer Intro</h3><div>The 4cpp lexer system provides a polished, fast, flexible system that takes in C/C++ and outputs a tokenization of the text data. There are two API levels. One level is setup to let you easily get a tokenization of the file. This level manages memory for you with malloc to make it as fast as possible to start getting your tokens. The second level enables deep integration by allowing control over allocation, data chunking, and output rate control.<br><br>To use the quick setup API you simply include 4cpp_lexer.h and read the documentation at <a href='#cpp_lex_file_doc'>cpp_lex_file</a>.<br><br>To use the the fancier API include 4cpp_lexer.h and read the documentation at <a href='#cpp_lex_step_doc'>cpp_lex_step</a>. If you want to be absolutely sure you are not including malloc into your program you can define FCPP_FORBID_MALLOC before the include and the "step" API will continue to work.<br><br>There are a few more features in 4cpp that are not documented yet. You are free to try to use these, but I am not totally sure they are ready yet, and when they are they will be documented.</div><h3>§5.2 Lexer Function List</h3><ul><li><a href='#cpp_get_token_doc'>cpp_get_token</a></li><li><a href='#cpp_lex_step_doc'>cpp_lex_step</a></li><li><a href='#cpp_lex_data_init_doc'>cpp_lex_data_init</a></li><li><a href='#cpp_lex_data_temp_size_doc'>cpp_lex_data_temp_size</a></li><li><a href='#cpp_lex_data_temp_read_doc'>cpp_lex_data_temp_read</a></li><li><a href='#cpp_lex_data_new_temp_DEP_doc'>cpp_lex_data_new_temp_DEP</a></li><li><a href='#cpp_get_relex_range_doc'>cpp_get_relex_range</a></li><li><a href='#cpp_relex_init_doc'>cpp_relex_init</a></li><li><a href='#cpp_relex_start_position_doc'>cpp_relex_start_position</a></li><li><a href='#cpp_relex_declare_first_chunk_position_doc'>cpp_relex_declare_first_chunk_position</a></li><li><a href='#cpp_relex_is_start_chunk_doc'>cpp_relex_is_start_chunk</a></li><li><a href='#cpp_relex_step_doc'>cpp_relex_step</a></li><li><a href='#cpp_relex_get_new_count_doc'>cpp_relex_get_new_count</a></li><li><a href='#cpp_relex_complete_doc'>cpp_relex_complete</a></li><li><a href='#cpp_relex_abort_doc'>cpp_relex_abort</a></li><li><a href='#cpp_make_token_array_doc'>cpp_make_token_array</a></li><li><a href='#cpp_free_token_array_doc'>cpp_free_token_array</a></li><li><a href='#cpp_resize_token_array_doc'>cpp_resize_token_array</a></li><li><a href='#cpp_lex_file_doc'>cpp_lex_file</a></li></ul><h3>§5.3 Lexer Types List</h3><ul><li><a href='#Cpp_Token_Type_doc'>Cpp_Token_Type</a></li><li><a href='#Cpp_Token_doc'>Cpp_Token</a></li><li><a href='#Cpp_Token_Flag_doc'>Cpp_Token_Flag</a></li><li><a href='#Cpp_Token_Array_doc'>Cpp_Token_Array</a></li><li><a href='#Cpp_Get_Token_Result_doc'>Cpp_Get_Token_Result</a></li><li><a href='#Cpp_Relex_Range_doc'>Cpp_Relex_Range</a></li><li><a href='#Cpp_Lex_Data_doc'>Cpp_Lex_Data</a></li><li><a href='#Cpp_Lex_Result_doc'>Cpp_Lex_Result</a></li><li><a href='#Cpp_Relex_Data_doc'>Cpp_Relex_Data</a></li></ul><h3>§5.4 Lexer Function Descriptions</h3><div id='cpp_get_token_doc' style='margin-bottom: 1cm;'><h4>§5.4.1: cpp_get_token</h4><div style='font-family: "Courier New", Courier, monospace; text-align: left; margin-top: 3mm; margin-bottom: 3mm; font-size: .95em; background: #DFDFDF; padding: 0.25em;'>Cpp_Get_Token_Result cpp_get_token(<div style='margin-left: 4mm;'>Cpp_Token_Array *token_array_in,<br>int32_t pos<br></div>)</div><div style='margin-top: 3mm; margin-bottom: 3mm; color: #309030;'><b><i>Parameters</i></b></div><div><div style='font-weight: 600;'>token_array</div><div style='margin-bottom: 6mm;'><div style='margin-left: 5mm; margin-right: 5mm;'>The array of tokens from which to get a token.</div></div></div><div><div style='font-weight: 600;'>pos</div><div style='margin-bottom: 6mm;'><div style='margin-left: 5mm; margin-right: 5mm;'>The position, measured in bytes, to get the token for.</div></div></div><div style='margin-top: 3mm; margin-bottom: 3mm; color: #309030;'><b><i>Return</i></b></div><div style='margin-left: 5mm; margin-right: 5mm;'>A Cpp_Get_Token_Result struct is returned containing the index
|
||||
of a token and a flag indicating whether the pos is contained in the token
|
||||
or in whitespace after the token.</div><div style='margin-top: 3mm; margin-bottom: 3mm; color: #309030;'><b><i>Description</i></b></div><div style='margin-left: 5mm; margin-right: 5mm;'>This call performs a binary search over all of the tokens looking
|
||||
for the token that contains the specified position. If the position
|
||||
|
@ -328,29 +328,22 @@ system says it needs a chunk. You may switch to or modify the output array in b
|
|||
The most basic use of this system is to get it all done in one big chunk and try to allocate a nearly "infinite" output
|
||||
array so that it will not run out of memory. This way you can get the entire job done in one call and then just assert
|
||||
to make sure it returns LexResult_Finished to you:<br><br>
|
||||
<br><br><div style='font-family: "Courier New", Courier, monospace; text-align: left;margin-top: 3mm; margin-bottom: 3mm; font-size: .95em; background: #EFEFDF; padding: 0.25em;'>Cpp_Token_Array lex_file(char *file_name){<br> File_Data file = read_whole_file(file_name);<br> <br> char *temp = (char*)malloc(4096); // hopefully big enough<br> Cpp_Lex_Data lex_state = cpp_lex_data_init(temp); <br> <br> Cpp_Token_Array array = {0};<br> array.tokens = (Cpp_Token*)malloc(1 << 20); // hopefully big enough<br> array.max_count = (1 << 20)/sizeof(Cpp_Token);<br> <br> Cpp_Lex_Result result = <br> cpp_lex_step(&lex_state, file.data, file.size, file.size,<br> &array, NO_OUT_LIMIT);<br> Assert(result == LexResult_Finished);<br> <br> free(temp);<br> <br> return(array);<br>}<br></div></div><div style='margin-top: 3mm; margin-bottom: 3mm; color: #309030;'><b><i>See Also</i></b></div><div style='margin-left: 5mm; margin-right: 5mm;'><a href='#Cpp_Lex_Data_doc'>Cpp_Lex_Data</a></div><div style='margin-left: 5mm; margin-right: 5mm;'><a href='#Cpp_Lex_Result_doc'>Cpp_Lex_Result</a></div></div><hr><div id='cpp_lex_data_init_doc' style='margin-bottom: 1cm;'><h4>§5.4.3: cpp_lex_data_init</h4><div style='font-family: "Courier New", Courier, monospace; text-align: left; margin-top: 3mm; margin-bottom: 3mm; font-size: .95em; background: #DFDFDF; padding: 0.25em;'>Cpp_Lex_Data cpp_lex_data_init(<div style='margin-left: 4mm;'>char *mem_buffer<br></div>)</div><div style='margin-top: 3mm; margin-bottom: 3mm; color: #309030;'><b><i>Parameters</i></b></div><div><div style='font-weight: 600;'>mem_buffer</div><div style='margin-bottom: 6mm;'><div style='margin-left: 5mm; margin-right: 5mm;'>The memory to use for initializing the lex state's temp memory buffer.</div></div></div><div style='margin-top: 3mm; margin-bottom: 3mm; color: #309030;'><b><i>Return</i></b></div><div style='margin-left: 5mm; margin-right: 5mm;'>A brand new lex state ready to begin lexing a file from the beginning.</div><div style='margin-top: 3mm; margin-bottom: 3mm; color: #309030;'><b><i>Description</i></b></div><div style='margin-left: 5mm; margin-right: 5mm;'>Creates a new lex state in the form of a Cpp_Lex_Data struct and returns the struct.
|
||||
<br><br><div style='font-family: "Courier New", Courier, monospace; text-align: left;margin-top: 3mm; margin-bottom: 3mm; font-size: .95em; background: #EFEFDF; padding: 0.25em;'>Cpp_Token_Array lex_file(char *file_name){<br> File_Data file = read_whole_file(file_name);<br> <br> char *temp = (char*)malloc(4096); // hopefully big enough<br> Cpp_Lex_Data lex_state = cpp_lex_data_init(temp); <br> <br> Cpp_Token_Array array = {0};<br> array.tokens = (Cpp_Token*)malloc(1 << 20); // hopefully big enough<br> array.max_count = (1 << 20)/sizeof(Cpp_Token);<br> <br> Cpp_Lex_Result result = <br> cpp_lex_step(&lex_state, file.data, file.size, file.size,<br> &array, NO_OUT_LIMIT);<br> Assert(result == LexResult_Finished);<br> <br> free(temp);<br> <br> return(array);<br>}<br></div></div><div style='margin-top: 3mm; margin-bottom: 3mm; color: #309030;'><b><i>See Also</i></b></div><div style='margin-left: 5mm; margin-right: 5mm;'><a href='#Cpp_Lex_Data_doc'>Cpp_Lex_Data</a></div><div style='margin-left: 5mm; margin-right: 5mm;'><a href='#Cpp_Lex_Result_doc'>Cpp_Lex_Result</a></div></div><hr><div id='cpp_lex_data_init_doc' style='margin-bottom: 1cm;'><h4>§5.4.3: cpp_lex_data_init</h4><div style='font-family: "Courier New", Courier, monospace; text-align: left; margin-top: 3mm; margin-bottom: 3mm; font-size: .95em; background: #DFDFDF; padding: 0.25em;'>Cpp_Lex_Data cpp_lex_data_init(<div style='margin-left: 4mm;'><br></div>)</div><div style='margin-top: 3mm; margin-bottom: 3mm; color: #309030;'><b><i>Return</i></b></div><div style='margin-left: 5mm; margin-right: 5mm;'>A brand new lex state ready to begin lexing a file from the beginning.</div><div style='margin-top: 3mm; margin-bottom: 3mm; color: #309030;'><b><i>Description</i></b></div><div style='margin-left: 5mm; margin-right: 5mm;'>Creates a new lex state in the form of a Cpp_Lex_Data struct and returns the struct.
|
||||
The system needs a temporary buffer that is as long as the longest token. 4096 is usually
|
||||
enough but the buffer is not checked, so to be 100% bullet proof it has to be the same length
|
||||
as the file being lexed.<br><br></div></div><hr><div id='cpp_lex_data_temp_size_doc' style='margin-bottom: 1cm;'><h4>§5.4.4: cpp_lex_data_temp_size</h4><div style='font-family: "Courier New", Courier, monospace; text-align: left; margin-top: 3mm; margin-bottom: 3mm; font-size: .95em; background: #DFDFDF; padding: 0.25em;'>int32_t cpp_lex_data_temp_size(<div style='margin-left: 4mm;'>Cpp_Lex_Data *lex_data<br></div>)</div><div style='margin-top: 3mm; margin-bottom: 3mm; color: #309030;'><b><i>Parameters</i></b></div><div><div style='font-weight: 600;'>lex_data</div><div style='margin-bottom: 6mm;'><div style='margin-left: 5mm; margin-right: 5mm;'>The lex state from which to get the temporary buffer size.</div></div></div><div style='margin-top: 3mm; margin-bottom: 3mm; color: #309030;'><b><i>Description</i></b></div><div style='margin-left: 5mm; margin-right: 5mm;'>This call gets the current size of the temporary buffer in the lexer state so
|
||||
that you can move to a new temporary buffer by copying the data over.<br><br></div><div style='margin-top: 3mm; margin-bottom: 3mm; color: #309030;'><b><i>See Also</i></b></div><div style='margin-left: 5mm; margin-right: 5mm;'><a href='#cpp_lex_data_temp_read_doc'>cpp_lex_data_temp_read</a></div><div style='margin-left: 5mm; margin-right: 5mm;'><a href='#cpp_lex_data_new_temp_doc'>cpp_lex_data_new_temp</a></div></div><hr><div id='cpp_lex_data_temp_read_doc' style='margin-bottom: 1cm;'><h4>§5.4.5: cpp_lex_data_temp_read</h4><div style='font-family: "Courier New", Courier, monospace; text-align: left; margin-top: 3mm; margin-bottom: 3mm; font-size: .95em; background: #DFDFDF; padding: 0.25em;'>void cpp_lex_data_temp_read(<div style='margin-left: 4mm;'>Cpp_Lex_Data *lex_data,<br>char *out_buffer<br></div>)</div><div style='margin-top: 3mm; margin-bottom: 3mm; color: #309030;'><b><i>Parameters</i></b></div><div><div style='font-weight: 600;'>lex_data</div><div style='margin-bottom: 6mm;'><div style='margin-left: 5mm; margin-right: 5mm;'>The lex state from which to read the temporary buffer.</div></div></div><div><div style='font-weight: 600;'>out_buffer</div><div style='margin-bottom: 6mm;'><div style='margin-left: 5mm; margin-right: 5mm;'>The buffer into which the contents of the temporary buffer will be written.
|
||||
The size of the buffer must be at least the size as returned by cpp_lex_data_temp_size.</div></div></div><div style='margin-top: 3mm; margin-bottom: 3mm; color: #309030;'><b><i>Description</i></b></div><div style='margin-left: 5mm; margin-right: 5mm;'>This call reads the current contents of the temporary buffer.<br><br></div><div style='margin-top: 3mm; margin-bottom: 3mm; color: #309030;'><b><i>See Also</i></b></div><div style='margin-left: 5mm; margin-right: 5mm;'><a href='#cpp_lex_data_temp_size_doc'>cpp_lex_data_temp_size</a></div><div style='margin-left: 5mm; margin-right: 5mm;'><a href='#cpp_lex_data_new_temp_doc'>cpp_lex_data_new_temp</a></div></div><hr><div id='cpp_lex_data_new_temp_doc' style='margin-bottom: 1cm;'><h4>§5.4.6: cpp_lex_data_new_temp</h4><div style='font-family: "Courier New", Courier, monospace; text-align: left; margin-top: 3mm; margin-bottom: 3mm; font-size: .95em; background: #DFDFDF; padding: 0.25em;'>void cpp_lex_data_new_temp(<div style='margin-left: 4mm;'>Cpp_Lex_Data *lex_data,<br>char *new_buffer<br></div>)</div><div style='margin-top: 3mm; margin-bottom: 3mm; color: #309030;'><b><i>Parameters</i></b></div><div><div style='font-weight: 600;'>lex_data</div><div style='margin-bottom: 6mm;'><div style='margin-left: 5mm; margin-right: 5mm;'>The lex state that will receive the new temporary buffer.</div></div></div><div><div style='font-weight: 600;'>new_buffer</div><div style='margin-bottom: 6mm;'><div style='margin-left: 5mm; margin-right: 5mm;'>The new temporary buffer that has the same contents as the old temporary buffer.</div></div></div><div style='margin-top: 3mm; margin-bottom: 3mm; color: #309030;'><b><i>Description</i></b></div><div style='margin-left: 5mm; margin-right: 5mm;'>This call can be used to set a new temporary buffer for the lex state. In cases where you want to
|
||||
discontinue lexing, store the state, and resume later. In such a situation it may be necessary for you
|
||||
to free the temp buffer that was originally used to make the lex state. This call allows you to supply
|
||||
a new temp buffer when you are ready to resume lexing.<br><br>
|
||||
However the new buffer needs to have the same contents the old buffer had. To ensure this you have to
|
||||
use cpp_lex_data_temp_size and cpp_lex_data_temp_read to get the relevant contents of the temp buffer
|
||||
before you free it.<br><br></div><div style='margin-top: 3mm; margin-bottom: 3mm; color: #309030;'><b><i>See Also</i></b></div><div style='margin-left: 5mm; margin-right: 5mm;'><a href='#cpp_lex_data_temp_size_doc'>cpp_lex_data_temp_size</a></div><div style='margin-left: 5mm; margin-right: 5mm;'><a href='#cpp_lex_data_temp_read_doc'>cpp_lex_data_temp_read</a></div></div><hr><div id='cpp_get_relex_range_doc' style='margin-bottom: 1cm;'><h4>§5.4.7: cpp_get_relex_range</h4><div style='font-family: "Courier New", Courier, monospace; text-align: left; margin-top: 3mm; margin-bottom: 3mm; font-size: .95em; background: #DFDFDF; padding: 0.25em;'>Cpp_Relex_Range cpp_get_relex_range(<div style='margin-left: 4mm;'>Cpp_Token_Array *array,<br>int32_t start_pos,<br>int32_t end_pos<br></div>)</div><div style='margin-top: 3mm; margin-bottom: 3mm; color: #309030;'><b><i>Parameters</i></b></div><div><div style='font-weight: 600;'>array</div><div style='margin-bottom: 6mm;'><div style='margin-left: 5mm; margin-right: 5mm;'>A pointer to the token array that will be modified by the relex,
|
||||
The size of the buffer must be at least the size as returned by cpp_lex_data_temp_size.</div></div></div><div style='margin-top: 3mm; margin-bottom: 3mm; color: #309030;'><b><i>Description</i></b></div><div style='margin-left: 5mm; margin-right: 5mm;'>This call reads the current contents of the temporary buffer.<br><br></div><div style='margin-top: 3mm; margin-bottom: 3mm; color: #309030;'><b><i>See Also</i></b></div><div style='margin-left: 5mm; margin-right: 5mm;'><a href='#cpp_lex_data_temp_size_doc'>cpp_lex_data_temp_size</a></div><div style='margin-left: 5mm; margin-right: 5mm;'><a href='#cpp_lex_data_new_temp_doc'>cpp_lex_data_new_temp</a></div></div><hr><div id='cpp_lex_data_new_temp_DEP_doc' style='margin-bottom: 1cm;'><h4>§5.4.6: cpp_lex_data_new_temp_DEP</h4><div style='font-family: "Courier New", Courier, monospace; text-align: left; margin-top: 3mm; margin-bottom: 3mm; font-size: .95em; background: #DFDFDF; padding: 0.25em;'>void cpp_lex_data_new_temp_DEP(<div style='margin-left: 4mm;'>Cpp_Lex_Data *lex_data,<br>char *new_buffer<br></div>)</div></div><hr><div id='cpp_get_relex_range_doc' style='margin-bottom: 1cm;'><h4>§5.4.7: cpp_get_relex_range</h4><div style='font-family: "Courier New", Courier, monospace; text-align: left; margin-top: 3mm; margin-bottom: 3mm; font-size: .95em; background: #DFDFDF; padding: 0.25em;'>Cpp_Relex_Range cpp_get_relex_range(<div style='margin-left: 4mm;'>Cpp_Token_Array *array,<br>int32_t start_pos,<br>int32_t end_pos<br></div>)</div><div style='margin-top: 3mm; margin-bottom: 3mm; color: #309030;'><b><i>Parameters</i></b></div><div><div style='font-weight: 600;'>array</div><div style='margin-bottom: 6mm;'><div style='margin-left: 5mm; margin-right: 5mm;'>A pointer to the token array that will be modified by the relex,
|
||||
this array should already contain the tokens for the previous state of the file.</div></div></div><div><div style='font-weight: 600;'>start_pos</div><div style='margin-bottom: 6mm;'><div style='margin-left: 5mm; margin-right: 5mm;'>The start position of the edited region of the file.
|
||||
The start and end points are based on the edited region of the file before the edit.</div></div></div><div><div style='font-weight: 600;'>end_pos</div><div style='margin-bottom: 6mm;'><div style='margin-left: 5mm; margin-right: 5mm;'>The end position of the edited region of the file.
|
||||
In particular, end_pos is the first character after the edited region not effected by the edit.
|
||||
Thus if the edited region contained one character end_pos - start_pos should equal 1.
|
||||
The start and end points are based on the edited region of the file before the edit.</div></div></div></div><hr><div id='cpp_relex_init_doc' style='margin-bottom: 1cm;'><h4>§5.4.8: cpp_relex_init</h4><div style='font-family: "Courier New", Courier, monospace; text-align: left; margin-top: 3mm; margin-bottom: 3mm; font-size: .95em; background: #DFDFDF; padding: 0.25em;'>Cpp_Relex_Data cpp_relex_init(<div style='margin-left: 4mm;'>Cpp_Token_Array *array,<br>int32_t start_pos,<br>int32_t end_pos,<br>int32_t character_shift_amount,<br>char *spare<br></div>)</div><div style='margin-top: 3mm; margin-bottom: 3mm; color: #309030;'><b><i>Parameters</i></b></div><div><div style='font-weight: 600;'>array</div><div style='margin-bottom: 6mm;'><div style='margin-left: 5mm; margin-right: 5mm;'>A pointer to the token array that will be modified by the relex,
|
||||
The start and end points are based on the edited region of the file before the edit.</div></div></div></div><hr><div id='cpp_relex_init_doc' style='margin-bottom: 1cm;'><h4>§5.4.8: cpp_relex_init</h4><div style='font-family: "Courier New", Courier, monospace; text-align: left; margin-top: 3mm; margin-bottom: 3mm; font-size: .95em; background: #DFDFDF; padding: 0.25em;'>Cpp_Relex_Data cpp_relex_init(<div style='margin-left: 4mm;'>Cpp_Token_Array *array,<br>int32_t start_pos,<br>int32_t end_pos,<br>int32_t character_shift_amount<br></div>)</div><div style='margin-top: 3mm; margin-bottom: 3mm; color: #309030;'><b><i>Parameters</i></b></div><div><div style='font-weight: 600;'>array</div><div style='margin-bottom: 6mm;'><div style='margin-left: 5mm; margin-right: 5mm;'>A pointer to the token array that will be modified by the relex,
|
||||
this array should already contain the tokens for the previous state of the file.</div></div></div><div><div style='font-weight: 600;'>start_pos</div><div style='margin-bottom: 6mm;'><div style='margin-left: 5mm; margin-right: 5mm;'>The start position of the edited region of the file.
|
||||
The start and end points are based on the edited region of the file before the edit.</div></div></div><div><div style='font-weight: 600;'>end_pos</div><div style='margin-bottom: 6mm;'><div style='margin-left: 5mm; margin-right: 5mm;'>The end position of the edited region of the file.
|
||||
In particular, end_pos is the first character after the edited region not effected by the edit.
|
||||
Thus if the edited region contained one character end_pos - start_pos should equal 1.
|
||||
The start and end points are based on the edited region of the file before the edit.</div></div></div><div><div style='font-weight: 600;'>character_shift_amount</div><div style='margin-bottom: 6mm;'><div style='margin-left: 5mm; margin-right: 5mm;'>The shift in the characters after the edited region.</div></div></div><div><div style='font-weight: 600;'>spare</div><div style='margin-bottom: 6mm;'><div style='margin-left: 5mm; margin-right: 5mm;'>The spare space for the lexing state.
|
||||
Should be big enough to store the largest token in the file.</div></div></div><div style='margin-top: 3mm; margin-bottom: 3mm; color: #309030;'><b><i>Return</i></b></div><div style='margin-left: 5mm; margin-right: 5mm;'>Returns a partially initialized relex state.</div><div style='margin-top: 3mm; margin-bottom: 3mm; color: #309030;'><b><i>Description</i></b></div><div style='margin-left: 5mm; margin-right: 5mm;'>This call does the first setup step of initializing a relex state. To finish initializing the relex state
|
||||
The start and end points are based on the edited region of the file before the edit.</div></div></div><div><div style='font-weight: 600;'>character_shift_amount</div><div style='margin-bottom: 6mm;'><div style='margin-left: 5mm; margin-right: 5mm;'>The shift in the characters after the edited region.</div></div></div><div style='margin-top: 3mm; margin-bottom: 3mm; color: #309030;'><b><i>Return</i></b></div><div style='margin-left: 5mm; margin-right: 5mm;'>Returns a partially initialized relex state.</div><div style='margin-top: 3mm; margin-bottom: 3mm; color: #309030;'><b><i>Description</i></b></div><div style='margin-left: 5mm; margin-right: 5mm;'>This call does the first setup step of initializing a relex state. To finish initializing the relex state
|
||||
you must tell the state about the positioning of the first chunk it will be fed. There are two methods of doing
|
||||
this, the direct method is with cpp_relex_declare_first_chunk_position, the method that is often more convenient
|
||||
is with cpp_relex_is_start_chunk. If the file is not chunked the second step of initialization can be skipped.<br><br></div><div style='margin-top: 3mm; margin-bottom: 3mm; color: #309030;'><b><i>See Also</i></b></div><div style='margin-left: 5mm; margin-right: 5mm;'><a href='#cpp_relex_declare_first_chunk_position_doc'>cpp_relex_declare_first_chunk_position</a></div><div style='margin-left: 5mm; margin-right: 5mm;'><a href='#cpp_relex_is_start_chunk_doc'>cpp_relex_is_start_chunk</a></div></div><hr><div id='cpp_relex_start_position_doc' style='margin-bottom: 1cm;'><h4>§5.4.9: cpp_relex_start_position</h4><div style='font-family: "Courier New", Courier, monospace; text-align: left; margin-top: 3mm; margin-bottom: 3mm; font-size: .95em; background: #DFDFDF; padding: 0.25em;'>int32_t cpp_relex_start_position(<div style='margin-left: 4mm;'>Cpp_Relex_Data *S_ptr<br></div>)</div><div style='margin-top: 3mm; margin-bottom: 3mm; color: #309030;'><b><i>Parameters</i></b></div><div><div style='font-weight: 600;'>S_ptr</div><div style='margin-bottom: 6mm;'><div style='margin-left: 5mm; margin-right: 5mm;'></div></div></div><div style='margin-top: 3mm; margin-bottom: 3mm; color: #309030;'><b><i>Return</i></b></div><div style='margin-left: 5mm; margin-right: 5mm;'>Returns the first position in the file the relexer wants to read. This is usually a position slightly
|
||||
|
|
60
4cpp_lexer.h
60
4cpp_lexer.h
|
@ -314,7 +314,9 @@ cpp_lex_nonalloc_null_end_no_limit(Cpp_Lex_Data *S_ptr, char *chunk, int32_t siz
|
|||
S.chunk_pos += size;
|
||||
DrYield(4, LexResult_NeedChunk);
|
||||
}
|
||||
else break;
|
||||
else{
|
||||
break;
|
||||
}
|
||||
}
|
||||
--S.pos;
|
||||
if (S.pp_state >= LSPP_count){
|
||||
|
@ -333,7 +335,7 @@ cpp_lex_nonalloc_null_end_no_limit(Cpp_Lex_Data *S_ptr, char *chunk, int32_t siz
|
|||
|
||||
for (; S.fsm.state < LS_count && S.pos < end_pos;){
|
||||
c = chunk[S.pos++];
|
||||
S.tb[S.tb_pos++] = c;
|
||||
S.tb[(S.tb_pos++) & (sizeof(S.tb)-1)] = c;
|
||||
|
||||
int32_t i = S.fsm.state + eq_classes[c];
|
||||
S.fsm.state = fsm_table[i];
|
||||
|
@ -346,7 +348,9 @@ cpp_lex_nonalloc_null_end_no_limit(Cpp_Lex_Data *S_ptr, char *chunk, int32_t siz
|
|||
S.chunk_pos += size;
|
||||
DrYield(3, LexResult_NeedChunk);
|
||||
}
|
||||
else break;
|
||||
else{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Assert(S.fsm.emit_token == 1);
|
||||
|
@ -426,6 +430,7 @@ cpp_lex_nonalloc_null_end_no_limit(Cpp_Lex_Data *S_ptr, char *chunk, int32_t siz
|
|||
|
||||
int32_t word_size = S.pos - S.token_start;
|
||||
|
||||
if (word_size < sizeof(S.tb)){
|
||||
if (S.pp_state == LSPP_body_if){
|
||||
if (match_ss(make_string(S.tb, word_size), make_lit_string("defined"))){
|
||||
S.token.type = CPP_PP_DEFINED;
|
||||
|
@ -442,11 +447,12 @@ cpp_lex_nonalloc_null_end_no_limit(Cpp_Lex_Data *S_ptr, char *chunk, int32_t siz
|
|||
String_And_Flag data = keywords[sub_match];
|
||||
S.token.type = (Cpp_Token_Type)data.flags;
|
||||
S.token.flags = CPP_TFLAG_IS_KEYWORD;
|
||||
break;
|
||||
}
|
||||
else{
|
||||
}
|
||||
|
||||
S.token.type = CPP_TOKEN_IDENTIFIER;
|
||||
S.token.flags = 0;
|
||||
}
|
||||
}break;
|
||||
|
||||
case LS_pound:
|
||||
|
@ -471,6 +477,7 @@ cpp_lex_nonalloc_null_end_no_limit(Cpp_Lex_Data *S_ptr, char *chunk, int32_t siz
|
|||
{
|
||||
--S.pos;
|
||||
|
||||
if (S.tb_pos < sizeof(S.tb)){
|
||||
int32_t pos = S.tb_pos-1;
|
||||
int32_t i = 1;
|
||||
for (;i < pos; ++i){
|
||||
|
@ -488,11 +495,12 @@ cpp_lex_nonalloc_null_end_no_limit(Cpp_Lex_Data *S_ptr, char *chunk, int32_t siz
|
|||
S.token.type = (Cpp_Token_Type)data.flags;
|
||||
S.token.flags = CPP_TFLAG_PP_DIRECTIVE;
|
||||
S.pp_state = (uint8_t)cpp_pp_directive_to_state(S.token.type);
|
||||
break;
|
||||
}
|
||||
else{
|
||||
}
|
||||
|
||||
S.token.type = CPP_TOKEN_JUNK;
|
||||
S.token.flags = 0;
|
||||
}
|
||||
}break;
|
||||
|
||||
case LS_number:
|
||||
|
@ -1034,8 +1042,7 @@ DOC_SEE(Cpp_Lex_Result)
|
|||
}
|
||||
|
||||
FCPP_LINK Cpp_Lex_Data
|
||||
cpp_lex_data_init(char *mem_buffer)/*
|
||||
DOC_PARAM(mem_buffer, The memory to use for initializing the lex state's temp memory buffer.)
|
||||
cpp_lex_data_init()/*
|
||||
DOC_RETURN(A brand new lex state ready to begin lexing a file from the beginning.)
|
||||
|
||||
DOC(Creates a new lex state in the form of a Cpp_Lex_Data struct and returns the struct.
|
||||
|
@ -1044,7 +1051,6 @@ enough but the buffer is not checked, so to be 100% bullet proof it has to be th
|
|||
as the file being lexed.)
|
||||
*/{
|
||||
Cpp_Lex_Data data = {0};
|
||||
data.tb = mem_buffer;
|
||||
return(data);
|
||||
}
|
||||
|
||||
|
@ -1079,24 +1085,8 @@ DOC_SEE(cpp_lex_data_new_temp)
|
|||
}
|
||||
|
||||
FCPP_LINK void
|
||||
cpp_lex_data_new_temp(Cpp_Lex_Data *lex_data, char *new_buffer)/*
|
||||
DOC_PARAM(lex_data, The lex state that will receive the new temporary buffer.)
|
||||
DOC_PARAM(new_buffer, The new temporary buffer that has the same contents as the old temporary buffer.)
|
||||
|
||||
DOC(This call can be used to set a new temporary buffer for the lex state. In cases where you want to
|
||||
discontinue lexing, store the state, and resume later. In such a situation it may be necessary for you
|
||||
to free the temp buffer that was originally used to make the lex state. This call allows you to supply
|
||||
a new temp buffer when you are ready to resume lexing.
|
||||
|
||||
However the new buffer needs to have the same contents the old buffer had. To ensure this you have to
|
||||
use cpp_lex_data_temp_size and cpp_lex_data_temp_read to get the relevant contents of the temp buffer
|
||||
before you free it.)
|
||||
|
||||
DOC_SEE(cpp_lex_data_temp_size)
|
||||
DOC_SEE(cpp_lex_data_temp_read)
|
||||
*/{
|
||||
lex_data->tb = new_buffer;
|
||||
}
|
||||
cpp_lex_data_new_temp_DEP(Cpp_Lex_Data *lex_data, char *new_buffer)
|
||||
/*DOC(Deprecated in 4cpp Lexer 1.0.1*/{}
|
||||
|
||||
FCPP_INTERNAL char
|
||||
cpp_token_get_pp_state(uint16_t bitfield){
|
||||
|
@ -1162,7 +1152,7 @@ The start and end points are based on the edited region of the file before the e
|
|||
}
|
||||
|
||||
FCPP_LINK Cpp_Relex_Data
|
||||
cpp_relex_init(Cpp_Token_Array *array, int32_t start_pos, int32_t end_pos, int32_t character_shift_amount, char *spare)
|
||||
cpp_relex_init(Cpp_Token_Array *array, int32_t start_pos, int32_t end_pos, int32_t character_shift_amount)
|
||||
/*
|
||||
DOC_PARAM(array, A pointer to the token array that will be modified by the relex,
|
||||
this array should already contain the tokens for the previous state of the file.)
|
||||
|
@ -1173,8 +1163,6 @@ In particular, end_pos is the first character after the edited region not effect
|
|||
Thus if the edited region contained one character end_pos - start_pos should equal 1.
|
||||
The start and end points are based on the edited region of the file before the edit.)
|
||||
DOC_PARAM(character_shift_amount, The shift in the characters after the edited region.)
|
||||
DOC_PARAM(spare, The spare space for the lexing state.
|
||||
Should be big enough to store the largest token in the file.)
|
||||
DOC_RETURN(Returns a partially initialized relex state.)
|
||||
|
||||
DOC(This call does the first setup step of initializing a relex state. To finish initializing the relex state
|
||||
|
@ -1200,7 +1188,7 @@ DOC_SEE(cpp_relex_is_start_chunk)
|
|||
|
||||
state.character_shift_amount = character_shift_amount;
|
||||
|
||||
state.lex = cpp_lex_data_init(spare);
|
||||
state.lex = cpp_lex_data_init();
|
||||
state.lex.pp_state = cpp_token_get_pp_state(array->tokens[state.start_token_index].state_flags);
|
||||
state.lex.pos = state.relex_start_position;
|
||||
|
||||
|
@ -1348,6 +1336,7 @@ DOC_SEE(cpp_relex_abort)
|
|||
*/{
|
||||
|
||||
Cpp_Relex_Data S = *S_ptr;
|
||||
Cpp_Lex_Result step_result = LexResult_Finished;
|
||||
|
||||
switch (S.__pc__){
|
||||
DrCase(1);
|
||||
|
@ -1359,7 +1348,7 @@ DOC_SEE(cpp_relex_abort)
|
|||
|
||||
// TODO(allen): This can be better I suspect.
|
||||
for (;;){
|
||||
Cpp_Lex_Result step_result =
|
||||
step_result =
|
||||
cpp_lex_nonalloc_no_null_out_limit(&S.lex, chunk, chunk_size, full_size,
|
||||
relex_array, 1);
|
||||
|
||||
|
@ -1564,8 +1553,7 @@ Cpp_Token_Array lex_file(char *file_name){
|
|||
)
|
||||
DOC_SEE(cpp_make_token_array)
|
||||
*/{
|
||||
Cpp_Lex_Data S = {0};
|
||||
S.tb = (char*)malloc(size);
|
||||
Cpp_Lex_Data S = cpp_lex_data_init();
|
||||
int32_t quit = 0;
|
||||
|
||||
char empty = 0;
|
||||
|
@ -1600,8 +1588,6 @@ DOC_SEE(cpp_make_token_array)
|
|||
}break;
|
||||
}
|
||||
}
|
||||
|
||||
free(S.tb);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -332,8 +332,7 @@ The internals of the lex state should not be treated as a part of the public API
|
|||
DOC_SEE(cpp_lex_data_init)
|
||||
HIDE_MEMBERS()*/
|
||||
struct Cpp_Lex_Data{
|
||||
char *tb;
|
||||
|
||||
char tb[32];
|
||||
int32_t tb_pos;
|
||||
int32_t token_start;
|
||||
|
||||
|
|
|
@ -1127,20 +1127,18 @@ Job_Callback_Sig(job_full_lex){
|
|||
|
||||
i32 buffer_size = (text_size + 3)&(~3);
|
||||
|
||||
while (memory->size < buffer_size*2){
|
||||
while (memory->size < buffer_size){
|
||||
system->grow_thread_memory(memory);
|
||||
}
|
||||
|
||||
char *tb = (char*)memory->data;
|
||||
|
||||
Cpp_Token_Array tokens;
|
||||
tokens.tokens = (Cpp_Token*)((char*)memory->data + buffer_size);
|
||||
tokens.max_count = (memory->size - buffer_size) / sizeof(Cpp_Token);
|
||||
tokens.tokens = (Cpp_Token*)(memory->data);
|
||||
tokens.max_count = memory->size / sizeof(Cpp_Token);
|
||||
tokens.count = 0;
|
||||
|
||||
b32 still_lexing = 1;
|
||||
|
||||
Cpp_Lex_Data lex = cpp_lex_data_init(tb);
|
||||
Cpp_Lex_Data lex = cpp_lex_data_init();
|
||||
|
||||
// TODO(allen): deduplicate this against relex
|
||||
char *chunks[3];
|
||||
|
@ -1165,18 +1163,15 @@ Job_Callback_Sig(job_full_lex){
|
|||
cpp_lex_step(&lex, chunk, chunk_size, text_size, &tokens, 2048);
|
||||
|
||||
switch (result){
|
||||
case LexResult_NeedChunk:
|
||||
++chunk_index;
|
||||
break;
|
||||
case LexResult_NeedChunk: ++chunk_index; break;
|
||||
|
||||
case LexResult_NeedTokenMemory:
|
||||
if (system->check_cancel(thread)){
|
||||
return;
|
||||
}
|
||||
system->grow_thread_memory(memory);
|
||||
lex.tb = (char*)memory->data;
|
||||
tokens.tokens = (Cpp_Token*)((char*)memory->data + buffer_size);
|
||||
tokens.max_count = (memory->size - buffer_size) / sizeof(Cpp_Token);
|
||||
tokens.tokens = (Cpp_Token*)(memory->data);
|
||||
tokens.max_count = memory->size / sizeof(Cpp_Token);
|
||||
break;
|
||||
|
||||
case LexResult_HitTokenLimit:
|
||||
|
@ -1294,9 +1289,8 @@ file_relex_parallel(System_Functions *system,
|
|||
relex_array.tokens = push_array(part, Cpp_Token, relex_array.max_count);
|
||||
|
||||
i32 size = buffer_size(buffer);
|
||||
char *spare = push_array(part, char, size+1);
|
||||
|
||||
Cpp_Relex_Data state = cpp_relex_init(array, start_i, end_i, shift_amount, spare);
|
||||
Cpp_Relex_Data state = cpp_relex_init(array, start_i, end_i, shift_amount);
|
||||
|
||||
char *chunks[3];
|
||||
i32 chunk_sizes[3];
|
||||
|
|
|
@ -3470,7 +3470,8 @@ main(int argc, char **argv)
|
|||
&linuxvars.target,
|
||||
&memory_vars,
|
||||
&linuxvars.input,
|
||||
&result
|
||||
&result,
|
||||
clparams
|
||||
);
|
||||
|
||||
if(result.perform_kill){
|
||||
|
|
Loading…
Reference in New Issue