119 lines
3.1 KiB
C
119 lines
3.1 KiB
C
/* date = July 9th 2023 8:04 pm */
|
|
|
|
#ifndef VN_TOKENIZER_H
|
|
#define VN_TOKENIZER_H
|
|
|
|
////////////////////////////////
|
|
//~ sixten: Token Types
|
|
typedef u32 token_flags;
|
|
enum
|
|
{
|
|
TokenFlag_Identifier = (1<<0),
|
|
TokenFlag_Numeric = (1<<1),
|
|
TokenFlag_StringLiteral = (1<<2),
|
|
TokenFlag_Symbol = (1<<3),
|
|
TokenFlag_Reserved = (1<<4),
|
|
TokenFlag_Comment = (1<<5),
|
|
TokenFlag_Whitespace = (1<<6),
|
|
TokenFlag_Newline = (1<<7),
|
|
|
|
TokenFlag_BrokenComment = (1<<8),
|
|
TokenFlag_BrokenStringLiteral = (1<<9),
|
|
TokenFlag_BadCharacter = (1<<10),
|
|
};
|
|
|
|
typedef u32 token_group;
|
|
enum
|
|
{
|
|
TokenGroup_Comment = TokenFlag_Comment,
|
|
TokenGroup_Whitespace = (TokenFlag_Whitespace |
|
|
TokenFlag_Newline),
|
|
TokenGroup_Irregular = (TokenGroup_Comment |
|
|
TokenGroup_Whitespace),
|
|
TokenGroup_Regular = ~TokenGroup_Irregular,
|
|
TokenGroup_Label = (TokenFlag_Identifier |
|
|
TokenFlag_Numeric |
|
|
TokenFlag_StringLiteral |
|
|
TokenFlag_Symbol),
|
|
TokenGroup_Error = (TokenFlag_BrokenComment |
|
|
TokenFlag_BrokenStringLiteral |
|
|
TokenFlag_BadCharacter),
|
|
};
|
|
|
|
struct token
|
|
{
|
|
token_flags Flags;
|
|
range1_s64 Range;
|
|
};
|
|
|
|
struct token_chunk_node
|
|
{
|
|
token *Tokens;
|
|
s64 MaxCount;
|
|
s64 Count;
|
|
token_chunk_node *Next;
|
|
};
|
|
|
|
struct token_chunk_list
|
|
{
|
|
token_chunk_node *First;
|
|
token_chunk_node *Last;
|
|
s64 Count;
|
|
};
|
|
|
|
struct token_array
|
|
{
|
|
token *Tokens;
|
|
s64 Count;
|
|
};
|
|
|
|
////////////////////////////////
|
|
//~ sixten: Tokenizer Message Types
|
|
enum tokenizer_message_kind
|
|
{
|
|
T_MessageKind_Invalid,
|
|
T_MessageKind_Note,
|
|
T_MessageKind_Warning,
|
|
T_MessageKind_Error,
|
|
};
|
|
|
|
struct tokenizer_message
|
|
{
|
|
tokenizer_message *Next;
|
|
tokenizer_message_kind Kind;
|
|
s64 Offset;
|
|
string String;
|
|
};
|
|
|
|
struct tokenizer_message_list
|
|
{
|
|
tokenizer_message *First;
|
|
tokenizer_message *Last;
|
|
s64 Count;
|
|
};
|
|
|
|
////////////////////////////////
|
|
//~ sixten: Text -> Token Types
|
|
struct tokenize_result
|
|
{
|
|
token_array Tokens;
|
|
tokenizer_message_list Messages;
|
|
};
|
|
|
|
////////////////////////////////
|
|
//~ sixten: Token Type Functions
|
|
static string T_StringFromToken(string Text, token Token);
|
|
static b32 T_TokenMatches(token Token, token_flags Flags, string Text, string String);
|
|
static void T_TokenChunkListPush(memory_arena *Arena, token_chunk_list *List, token Token, s64 MaxTokenCountPerNode);
|
|
static token_array T_TokenArrayFromChunkList(memory_arena *Arena, token_chunk_list *List);
|
|
|
|
////////////////////////////////
|
|
//~ sixten: Tokenizer Message Functions
|
|
static void T_MessageListPush(memory_arena *Arena, tokenizer_message_list *List, tokenizer_message_kind Kind, s64 Offset, string String);
|
|
|
|
////////////////////////////////
|
|
//~ sixten: Text -> Token Functions
|
|
static tokenize_result T_TokenizeFromText(memory_arena *Arena, string Filename, string Text, token_flags ExcludeFilter = 0);
|
|
|
|
#endif //VN_TOKENIZER_H
|