Lumenarium/meta/gs_meta_lexer.h

261 lines
7.2 KiB
C

struct token_selection_spec
{
b32 MatchText;
string Text;
};
internal s32
EatPreprocessor (tokenizer* Tokenizer)
{
char* TStart = Tokenizer->At;
while (AtValidPosition(*Tokenizer) && !IsNewline(*Tokenizer->At))
{
if (Tokenizer->At[0] == '\\')
{
EatChar(Tokenizer);
while (IsWhitespace(*Tokenizer->At))
{
EatChar(Tokenizer);
}
if (IsNewline(*Tokenizer->At))
{
EatPastNewLine(Tokenizer);
}
}
else if (!IsNewline(*Tokenizer->At))
{
EatChar(Tokenizer);
}
}
return Tokenizer->At - TStart;
}
internal s32
EatString (tokenizer* Tokenizer)
{
s32 Length = 0;
while (Tokenizer->At[0] && Tokenizer->At[0] != '"')
{
if (Tokenizer->At[0] == '/')
{
++Tokenizer->At;
Length++;
}
++Tokenizer->At;
Length++;
}
++Tokenizer->At;
return Length;
}
internal s32
EatIdentifier (tokenizer* Tokenizer)
{
s32 Length = 0;
while (Tokenizer->At[0] &&
(IsAlpha(Tokenizer->At[0]) || IsNumericExtended(Tokenizer->At[0])))
{
++Tokenizer->At;
Length++;
}
return Length;
}
internal b32
TokenAtEquals(tokenizer* Tokenizer, char* Needle)
{
b32 Result = true;
char* TokenizerStart = Tokenizer->At;
char* NeedleAt = Needle;
while (AtValidPosition(*Tokenizer) && *NeedleAt)
{
if (*NeedleAt != *Tokenizer->At)
{
Result = false;
break;
}
NeedleAt++;
EatChar(Tokenizer);
}
// NOTE(Peter): rewind tokenizer
if (!Result)
{
Tokenizer->At = TokenizerStart;
}
return Result;
}
internal token
GetNextToken (tokenizer* Tokenizer)
{
token Result = {};
EatWhitespace(Tokenizer);
// Don't include comments in tokens
while (Tokenizer->At[0] && Tokenizer->At[0] == '/' && Tokenizer->At[1] && Tokenizer->At[1] == '/')
{
EatToNewLine(Tokenizer);
EatWhitespace(Tokenizer);
}
while(Tokenizer->At[0] && Tokenizer->At[0] == '/' && Tokenizer->At[1] && Tokenizer->At[1] == '*')
{
Tokenizer->At += 2;
while (*Tokenizer->At)
{
if (Tokenizer->At[0] && Tokenizer->At[0] == '*' && Tokenizer->At[1] && Tokenizer->At[1] == '/')
{
Tokenizer->At += 2;
break;
}
Tokenizer->At++;
}
EatWhitespace(Tokenizer);
}
Result.Text = MakeString(Tokenizer->At, 1, 1);
// NOTE(Peter): Adding one because I want the tokenizer to work with clear to zero
// but line numbers generally start at 1, not 0
Result.LineNumber = Tokenizer->LineNumber + 1;
char C = Tokenizer->At[0];
++Tokenizer->At;
if (C == 0) { Result.Type = Token_EndOfStream; }
else if (C == '(') { Result.Type = Token_LeftParen; }
else if (C == ')') { Result.Type = Token_RightParen; }
else if (C == '[') { Result.Type = Token_LeftSquareBracket; }
else if (C == ']') { Result.Type = Token_RightSquareBracket; }
else if (C == '{') { Result.Type = Token_LeftCurlyBracket; }
else if (C == '}') { Result.Type = Token_RightCurlyBracket; }
else if (C == ';') { Result.Type = Token_Semicolon; }
else if (C == ',') { Result.Type = Token_Comma; }
else if (C == '.') { Result.Type = Token_Period; }
else if (C == '-' && Tokenizer->At[0] && Tokenizer->At[0] == '>')
{
Result.Type = Token_PointerReference;
Result.Text.Length = 2;
++Tokenizer->At;
}
else if (C == '#')
{
// NOTE(Peter): Technically correct to do things like "# define"
EatWhitespace(Tokenizer);
if (TokenAtEquals(Tokenizer, "define"))
{
Result.Type = Token_PoundDefine;
EatPreprocessor(Tokenizer);
Result.Text.Length = Tokenizer->At - Result.Text.Memory;
}
else if (TokenAtEquals(Tokenizer, "undef"))
{
Result.Type = Token_PoundUndef;
EatToNewLine(Tokenizer);
Result.Text.Length = Tokenizer->At - Result.Text.Memory;
}
else if (TokenAtEquals(Tokenizer, "include"))
{
Result.Type = Token_PoundInclude;
Result.Text.Length = Tokenizer->At - Result.Text.Memory;
}
else if (TokenAtEquals(Tokenizer, "ifdef"))
{
Result.Type = Token_PoundIfDef;
EatToNewLine(Tokenizer);
Result.Text.Length = Tokenizer->At - Result.Text.Memory;
}
else if (TokenAtEquals(Tokenizer, "ifndef"))
{
Result.Type = Token_PoundIfNDef;
EatToNewLine(Tokenizer);
Result.Text.Length = Tokenizer->At - Result.Text.Memory;
}
else if (TokenAtEquals(Tokenizer, "if"))
{
Result.Type = Token_PoundIf;
EatToNewLine(Tokenizer);
Result.Text.Length = Tokenizer->At - Result.Text.Memory;
}
else if (TokenAtEquals(Tokenizer, "elif"))
{
Result.Type = Token_PoundElif;
EatToNewLine(Tokenizer);
Result.Text.Length = Tokenizer->At - Result.Text.Memory;
}
else if (TokenAtEquals(Tokenizer, "else"))
{
Result.Type = Token_PoundElse;
EatToNewLine(Tokenizer);
Result.Text.Length = Tokenizer->At - Result.Text.Memory;
}
else if (TokenAtEquals(Tokenizer, "endif"))
{
Result.Type = Token_PoundEndif;
EatToNewLine(Tokenizer);
Result.Text.Length = Tokenizer->At - Result.Text.Memory;
}
else if (TokenAtEquals(Tokenizer, "error"))
{
Result.Type = Token_PoundError;
EatToNewLine(Tokenizer);
Result.Text.Length = Tokenizer->At - Result.Text.Memory;
}
else if (TokenAtEquals(Tokenizer, "pragma"))
{
Result.Type = Token_PoundPragma;
EatToNewLine(Tokenizer);
Result.Text.Length = Tokenizer->At - Result.Text.Memory;
}
}
else if (IsNumeric(C))
{
Result.Type = Token_Number;
// NOTE(Peter): adding 1 to account for the fact that we've already advanced
// Tokenizer once
Result.Text.Length = 1 + EatNumber(Tokenizer);
}
else if (C == '\'')
{
Result.Type = Token_Char;
Result.Text.Memory = Tokenizer->At;
if (Tokenizer->At[0] && Tokenizer->At[0] == '\\')
{
++Tokenizer->At;
}
++Tokenizer->At;
++Tokenizer->At;
}
else if (C == '"')
{
Result.Type = Token_String;
// replace the length added by the quote
Result.Text.Memory = Tokenizer->At;
Result.Text.Length = EatString(Tokenizer);
}
// NOTE(Peter): This is after comment parsing so that the division operator
// falls through the comment case
else if (IsOperator(C)) { Result.Type = Token_Operator; }
else
{
Result.Type = Token_Identifier;
Result.Text.Length += EatIdentifier(Tokenizer);
}
return Result;
}