vn/code/vn_tokenizer.h

119 lines
3.1 KiB
C
Raw Normal View History

2023-07-19 15:09:41 +00:00
/* date = July 9th 2023 8:04 pm */
2023-06-19 17:12:26 +00:00
#ifndef VN_TOKENIZER_H
#define VN_TOKENIZER_H
2023-07-19 15:09:41 +00:00
////////////////////////////////
//~ sixten: Token Types
typedef u32 token_flags;
enum
2023-06-19 17:12:26 +00:00
{
2023-07-19 15:09:41 +00:00
TokenFlag_Identifier = (1<<0),
TokenFlag_Numeric = (1<<1),
TokenFlag_StringLiteral = (1<<2),
TokenFlag_Symbol = (1<<3),
TokenFlag_Reserved = (1<<4),
TokenFlag_Comment = (1<<5),
TokenFlag_Whitespace = (1<<6),
TokenFlag_Newline = (1<<7),
TokenFlag_BrokenComment = (1<<8),
TokenFlag_BrokenStringLiteral = (1<<9),
TokenFlag_BadCharacter = (1<<10),
2023-06-19 17:12:26 +00:00
};
2023-07-19 15:09:41 +00:00
typedef u32 token_group;
enum
2023-06-19 17:12:26 +00:00
{
2023-07-19 15:09:41 +00:00
TokenGroup_Comment = TokenFlag_Comment,
TokenGroup_Whitespace = (TokenFlag_Whitespace |
TokenFlag_Newline),
TokenGroup_Irregular = (TokenGroup_Comment |
TokenGroup_Whitespace),
TokenGroup_Regular = ~TokenGroup_Irregular,
TokenGroup_Label = (TokenFlag_Identifier |
TokenFlag_Numeric |
TokenFlag_StringLiteral |
TokenFlag_Symbol),
TokenGroup_Error = (TokenFlag_BrokenComment |
TokenFlag_BrokenStringLiteral |
TokenFlag_BadCharacter),
2023-06-19 17:12:26 +00:00
};
struct token
{
2023-07-19 15:09:41 +00:00
token_flags Flags;
range1_s64 Range;
2023-06-19 17:12:26 +00:00
};
2023-07-19 15:09:41 +00:00
struct token_chunk_node
2023-06-19 17:12:26 +00:00
{
2023-07-19 15:09:41 +00:00
token *Tokens;
s64 MaxCount;
s64 Count;
token_chunk_node *Next;
};
2023-06-19 17:12:26 +00:00
2023-07-19 15:09:41 +00:00
struct token_chunk_list
2023-06-19 17:12:26 +00:00
{
2023-07-19 15:09:41 +00:00
token_chunk_node *First;
token_chunk_node *Last;
s64 Count;
};
2023-06-19 17:12:26 +00:00
2023-07-19 15:09:41 +00:00
struct token_array
2023-06-19 17:12:26 +00:00
{
2023-07-19 15:09:41 +00:00
token *Tokens;
s64 Count;
};
2023-06-19 17:12:26 +00:00
2023-07-19 15:09:41 +00:00
////////////////////////////////
//~ sixten: Tokenizer Message Types
enum tokenizer_message_kind
2023-06-19 17:12:26 +00:00
{
2023-07-19 15:09:41 +00:00
T_MessageKind_Invalid,
T_MessageKind_Note,
T_MessageKind_Warning,
T_MessageKind_Error,
};
struct tokenizer_message
{
tokenizer_message *Next;
tokenizer_message_kind Kind;
s64 Offset;
string String;
};
struct tokenizer_message_list
{
tokenizer_message *First;
tokenizer_message *Last;
s64 Count;
};
////////////////////////////////
//~ sixten: Text -> Token Types
struct tokenize_result
{
token_array Tokens;
tokenizer_message_list Messages;
};
////////////////////////////////
//~ sixten: Token Type Functions
static string T_StringFromToken(string Text, token Token);
static b32 T_TokenMatches(token Token, token_flags Flags, string Text, string String);
static void T_TokenChunkListPush(memory_arena *Arena, token_chunk_list *List, token Token, s64 MaxTokenCountPerNode);
static token_array T_TokenArrayFromChunkList(memory_arena *Arena, token_chunk_list *List);
////////////////////////////////
//~ sixten: Tokenizer Message Functions
static void T_MessageListPush(memory_arena *Arena, tokenizer_message_list *List, tokenizer_message_kind Kind, s64 Offset, string String);
////////////////////////////////
//~ sixten: Text -> Token Functions
static tokenize_result T_TokenizeFromText(memory_arena *Arena, string Filename, string Text, token_flags ExcludeFilter = 0);
2023-06-19 17:12:26 +00:00
#endif //VN_TOKENIZER_H