/* date = July 9th 2023 8:04 pm */ #ifndef VN_TOKENIZER_H #define VN_TOKENIZER_H //////////////////////////////// //~ sixten: Token Types enum token_kind { TokenKind_None, // sixten: labels TokenKind_Identifier, TokenKind_Numeric, TokenKind_StringLiteral, // sixten: symbols TokenKind_SymbolsBegin, // sixten: (single char) TokenKind_CurlyOpen, TokenKind_CurlyClose, TokenKind_ParenthesisOpen, TokenKind_ParenthesisClose, TokenKind_Comma, TokenKind_Dot, TokenKind_DollarSign, TokenKind_Semicolon, TokenKind_Plus, TokenKind_Minus, TokenKind_Star, TokenKind_Slash, // sixten: (one or two chars) TokenKind_Bang, TokenKind_BangEqual, TokenKind_Equal, TokenKind_EqualEqual, TokenKind_Greater, TokenKind_GreaterEqual, TokenKind_Less, TokenKind_LessEqual, TokenKind_SymbolsEnd, // sixten: keywords TokenKind_KeywordsBegin, TokenKind_Branch, TokenKind_Else, TokenKind_False, TokenKind_If, TokenKind_Then, TokenKind_While, TokenKind_Jump, TokenKind_Proc, TokenKind_True, TokenKind_Let, TokenKind_Call, TokenKind_Wait, TokenKind_End, TokenKind_KeywordsEnd, // sixten: whitespace TokenKind_Whitespace, TokenKind_Newline, TokenKind_Comment, // sixten: invalid kinds TokenKind_BrokenComment, TokenKind_BrokenStringLiteral, TokenKind_BadCharacter, }; typedef b32 tokenizer_filter_function(token_kind Kind); struct token { token_kind Kind; range1_s64 Range; s64 Line; }; global read_only token T_EmptyToken = {}; struct token_chunk_node { token *Tokens; s64 MaxCount; s64 Count; token_chunk_node *Next; }; struct token_chunk_list { token_chunk_node *First; token_chunk_node *Last; s64 Count; }; struct token_array { token *Tokens; s64 Count; }; //////////////////////////////// //~ sixten: Tokenizer Message Types enum tokenizer_message_kind { T_MessageKind_Invalid, T_MessageKind_Note, T_MessageKind_Warning, T_MessageKind_Error, }; struct tokenizer_message { tokenizer_message *Next; tokenizer_message_kind Kind; s64 Offset; string String; }; struct tokenizer_message_list { tokenizer_message *First; tokenizer_message *Last; s64 Count; }; //////////////////////////////// //~ sixten: Text -> Token Types struct tokenize_result { token_array Tokens; tokenizer_message_list Messages; }; //////////////////////////////// //~ sixten: Token Filter Helper Functions inline b32 T_IsComment(token_kind Kind) { return(Kind == TokenKind_Comment); } inline b32 T_IsWhitespace(token_kind Kind) { return(Kind == TokenKind_Whitespace || Kind == TokenKind_Newline); } inline b32 T_IsIrregular(token_kind Kind) { return(T_IsComment(Kind) || T_IsWhitespace(Kind)); } inline b32 T_IsRegular(token_kind Kind) { return(!T_IsIrregular(Kind)); } inline b32 T_IsInvalid(token_kind Kind) { return(Kind == TokenKind_None || Kind == TokenKind_BrokenComment || Kind == TokenKind_BrokenStringLiteral || Kind == TokenKind_BadCharacter); } //////////////////////////////// //~ sixten: Token Type Functions static string T_StringFromToken(string Text, token Token); static void T_TokenChunkListPush(arena *Arena, token_chunk_list *List, token Token, s64 MaxTokenCountPerNode); static token_array T_TokenArrayFromChunkList(arena *Arena, token_chunk_list *List); //////////////////////////////// //~ sixten: Tokenizer Message Functions static void T_MessageListPush(arena *Arena, tokenizer_message_list *List, tokenizer_message_kind Kind, s64 Offset, string String); //////////////////////////////// //~ sixten: Text -> Token Functions static tokenize_result T_TokenizeFromText(arena *Arena, string Text, tokenizer_filter_function *ExcludeFilter = 0); #endif //VN_TOKENIZER_H