vn/code/vn_tokenizer.h

164 lines
3.8 KiB
C
Raw Permalink Normal View History

2023-07-19 15:09:41 +00:00
/* date = July 9th 2023 8:04 pm */
2023-06-19 17:12:26 +00:00
#ifndef VN_TOKENIZER_H
#define VN_TOKENIZER_H
2023-07-19 15:09:41 +00:00
////////////////////////////////
//~ sixten: Token Types
enum token_kind
2023-06-19 17:12:26 +00:00
{
2023-12-23 07:27:22 +00:00
TokenKind_None,
// sixten: labels
TokenKind_Identifier,
TokenKind_Numeric,
TokenKind_StringLiteral,
// sixten: symbols
TokenKind_SymbolsBegin,
// sixten: (single char)
TokenKind_CurlyOpen,
TokenKind_CurlyClose,
TokenKind_ParenthesisOpen,
TokenKind_ParenthesisClose,
TokenKind_Comma,
TokenKind_Dot,
2024-01-20 11:18:57 +00:00
TokenKind_DollarSign,
2023-12-23 07:27:22 +00:00
TokenKind_Semicolon,
TokenKind_Plus,
TokenKind_Minus,
TokenKind_Star,
TokenKind_Slash,
// sixten: (one or two chars)
TokenKind_Bang,
TokenKind_BangEqual,
TokenKind_Equal,
TokenKind_EqualEqual,
TokenKind_Greater,
TokenKind_GreaterEqual,
TokenKind_Less,
TokenKind_LessEqual,
TokenKind_SymbolsEnd,
// sixten: keywords
TokenKind_KeywordsBegin,
TokenKind_Branch,
TokenKind_Else,
TokenKind_False,
TokenKind_If,
2024-01-20 11:18:57 +00:00
TokenKind_Then,
TokenKind_While,
2023-12-23 07:27:22 +00:00
TokenKind_Jump,
TokenKind_Proc,
TokenKind_True,
2024-01-20 11:18:57 +00:00
TokenKind_Let,
TokenKind_Call,
TokenKind_Wait,
TokenKind_End,
2023-12-23 07:27:22 +00:00
TokenKind_KeywordsEnd,
2024-01-20 11:18:57 +00:00
2023-12-23 07:27:22 +00:00
// sixten: whitespace
TokenKind_Whitespace,
TokenKind_Newline,
TokenKind_Comment,
// sixten: invalid kinds
TokenKind_BrokenComment,
TokenKind_BrokenStringLiteral,
TokenKind_BadCharacter,
2023-06-19 17:12:26 +00:00
};
typedef b32 tokenizer_filter_function(token_kind Kind);
2023-06-19 17:12:26 +00:00
struct token
{
2023-12-23 07:27:22 +00:00
token_kind Kind;
range1_s64 Range;
s64 Line;
2023-06-19 17:12:26 +00:00
};
2024-01-20 11:18:57 +00:00
global read_only token T_EmptyToken = {};
2023-07-19 15:09:41 +00:00
struct token_chunk_node
2023-06-19 17:12:26 +00:00
{
2023-12-23 07:27:22 +00:00
token *Tokens;
s64 MaxCount;
s64 Count;
token_chunk_node *Next;
2023-07-19 15:09:41 +00:00
};
2023-06-19 17:12:26 +00:00
2023-07-19 15:09:41 +00:00
struct token_chunk_list
2023-06-19 17:12:26 +00:00
{
2023-12-23 07:27:22 +00:00
token_chunk_node *First;
token_chunk_node *Last;
s64 Count;
2023-07-19 15:09:41 +00:00
};
2023-06-19 17:12:26 +00:00
2023-07-19 15:09:41 +00:00
struct token_array
2023-06-19 17:12:26 +00:00
{
2023-12-23 07:27:22 +00:00
token *Tokens;
s64 Count;
2023-07-19 15:09:41 +00:00
};
2023-06-19 17:12:26 +00:00
2023-07-19 15:09:41 +00:00
////////////////////////////////
//~ sixten: Tokenizer Message Types
enum tokenizer_message_kind
2023-06-19 17:12:26 +00:00
{
2023-12-23 07:27:22 +00:00
T_MessageKind_Invalid,
T_MessageKind_Note,
T_MessageKind_Warning,
T_MessageKind_Error,
2023-07-19 15:09:41 +00:00
};
struct tokenizer_message
{
2023-12-23 07:27:22 +00:00
tokenizer_message *Next;
tokenizer_message_kind Kind;
s64 Offset;
string String;
2023-07-19 15:09:41 +00:00
};
struct tokenizer_message_list
{
2023-12-23 07:27:22 +00:00
tokenizer_message *First;
tokenizer_message *Last;
s64 Count;
2023-07-19 15:09:41 +00:00
};
////////////////////////////////
//~ sixten: Text -> Token Types
struct tokenize_result
{
2023-12-23 07:27:22 +00:00
token_array Tokens;
tokenizer_message_list Messages;
2023-07-19 15:09:41 +00:00
};
2023-08-22 03:19:51 +00:00
////////////////////////////////
//~ sixten: Token Filter Helper Functions
inline b32 T_IsComment(token_kind Kind) { return(Kind == TokenKind_Comment); }
inline b32 T_IsWhitespace(token_kind Kind) { return(Kind == TokenKind_Whitespace ||
Kind == TokenKind_Newline); }
inline b32 T_IsIrregular(token_kind Kind) { return(T_IsComment(Kind) ||
T_IsWhitespace(Kind)); }
inline b32 T_IsRegular(token_kind Kind) { return(!T_IsIrregular(Kind)); }
inline b32 T_IsInvalid(token_kind Kind) { return(Kind == TokenKind_None ||
Kind == TokenKind_BrokenComment ||
Kind == TokenKind_BrokenStringLiteral ||
Kind == TokenKind_BadCharacter); }
2023-07-19 15:09:41 +00:00
////////////////////////////////
//~ sixten: Token Type Functions
static string T_StringFromToken(string Text, token Token);
static void T_TokenChunkListPush(arena *Arena, token_chunk_list *List, token Token, s64 MaxTokenCountPerNode);
static token_array T_TokenArrayFromChunkList(arena *Arena, token_chunk_list *List);
2023-07-19 15:09:41 +00:00
////////////////////////////////
//~ sixten: Tokenizer Message Functions
static void T_MessageListPush(arena *Arena, tokenizer_message_list *List, tokenizer_message_kind Kind, s64 Offset, string String);
2023-07-19 15:09:41 +00:00
////////////////////////////////
//~ sixten: Text -> Token Functions
static tokenize_result T_TokenizeFromText(arena *Arena, string Text, tokenizer_filter_function *ExcludeFilter = 0);
2023-06-19 17:12:26 +00:00
#endif //VN_TOKENIZER_H