/* date = July 9th 2023 8:04 pm */ #ifndef VN_TOKENIZER_H #define VN_TOKENIZER_H //////////////////////////////// //~ sixten: Token Types typedef u32 token_flags; enum { TokenFlag_Identifier = (1<<0), TokenFlag_Numeric = (1<<1), TokenFlag_StringLiteral = (1<<2), TokenFlag_Symbol = (1<<3), TokenFlag_Reserved = (1<<4), TokenFlag_Comment = (1<<5), TokenFlag_Whitespace = (1<<6), TokenFlag_Newline = (1<<7), TokenFlag_BrokenComment = (1<<8), TokenFlag_BrokenStringLiteral = (1<<9), TokenFlag_BadCharacter = (1<<10), }; typedef u32 token_group; enum { TokenGroup_Comment = TokenFlag_Comment, TokenGroup_Whitespace = (TokenFlag_Whitespace | TokenFlag_Newline), TokenGroup_Irregular = (TokenGroup_Comment | TokenGroup_Whitespace), TokenGroup_Regular = ~TokenGroup_Irregular, TokenGroup_Label = (TokenFlag_Identifier | TokenFlag_Numeric | TokenFlag_StringLiteral | TokenFlag_Symbol), TokenGroup_Error = (TokenFlag_BrokenComment | TokenFlag_BrokenStringLiteral | TokenFlag_BadCharacter), }; struct token { token_flags Flags; range1_s64 Range; }; struct token_chunk_node { token *Tokens; s64 MaxCount; s64 Count; token_chunk_node *Next; }; struct token_chunk_list { token_chunk_node *First; token_chunk_node *Last; s64 Count; }; struct token_array { token *Tokens; s64 Count; }; //////////////////////////////// //~ sixten: Tokenizer Message Types enum tokenizer_message_kind { T_MessageKind_Invalid, T_MessageKind_Note, T_MessageKind_Warning, T_MessageKind_Error, }; struct tokenizer_message { tokenizer_message *Next; tokenizer_message_kind Kind; s64 Offset; string String; }; struct tokenizer_message_list { tokenizer_message *First; tokenizer_message *Last; s64 Count; }; //////////////////////////////// //~ sixten: Text -> Token Types struct tokenize_result { token_array Tokens; tokenizer_message_list Messages; }; //////////////////////////////// //~ sixten: Token Type Functions static string T_StringFromToken(string Text, token Token); static b32 T_TokenMatches(token Token, token_flags Flags, string Text, string String); static void T_TokenChunkListPush(memory_arena *Arena, token_chunk_list *List, token Token, s64 MaxTokenCountPerNode); static token_array T_TokenArrayFromChunkList(memory_arena *Arena, token_chunk_list *List); //////////////////////////////// //~ sixten: Tokenizer Message Functions static void T_MessageListPush(memory_arena *Arena, tokenizer_message_list *List, tokenizer_message_kind Kind, s64 Offset, string String); //////////////////////////////// //~ sixten: Text -> Token Functions static tokenize_result T_TokenizeFromText(memory_arena *Arena, string Filename, string Text, token_flags ExcludeFilter = 0); #endif //VN_TOKENIZER_H