vn/code/vn_tokenizer.h

164 lines
3.8 KiB
C

/* date = July 9th 2023 8:04 pm */
#ifndef VN_TOKENIZER_H
#define VN_TOKENIZER_H
////////////////////////////////
//~ sixten: Token Types
enum token_kind
{
TokenKind_None,
// sixten: labels
TokenKind_Identifier,
TokenKind_Numeric,
TokenKind_StringLiteral,
// sixten: symbols
TokenKind_SymbolsBegin,
// sixten: (single char)
TokenKind_CurlyOpen,
TokenKind_CurlyClose,
TokenKind_ParenthesisOpen,
TokenKind_ParenthesisClose,
TokenKind_Comma,
TokenKind_Dot,
TokenKind_DollarSign,
TokenKind_Semicolon,
TokenKind_Plus,
TokenKind_Minus,
TokenKind_Star,
TokenKind_Slash,
// sixten: (one or two chars)
TokenKind_Bang,
TokenKind_BangEqual,
TokenKind_Equal,
TokenKind_EqualEqual,
TokenKind_Greater,
TokenKind_GreaterEqual,
TokenKind_Less,
TokenKind_LessEqual,
TokenKind_SymbolsEnd,
// sixten: keywords
TokenKind_KeywordsBegin,
TokenKind_Branch,
TokenKind_Else,
TokenKind_False,
TokenKind_If,
TokenKind_Then,
TokenKind_While,
TokenKind_Jump,
TokenKind_Proc,
TokenKind_True,
TokenKind_Let,
TokenKind_Call,
TokenKind_Wait,
TokenKind_End,
TokenKind_KeywordsEnd,
// sixten: whitespace
TokenKind_Whitespace,
TokenKind_Newline,
TokenKind_Comment,
// sixten: invalid kinds
TokenKind_BrokenComment,
TokenKind_BrokenStringLiteral,
TokenKind_BadCharacter,
};
typedef b32 tokenizer_filter_function(token_kind Kind);
struct token
{
token_kind Kind;
range1_s64 Range;
s64 Line;
};
global read_only token T_EmptyToken = {};
struct token_chunk_node
{
token *Tokens;
s64 MaxCount;
s64 Count;
token_chunk_node *Next;
};
struct token_chunk_list
{
token_chunk_node *First;
token_chunk_node *Last;
s64 Count;
};
struct token_array
{
token *Tokens;
s64 Count;
};
////////////////////////////////
//~ sixten: Tokenizer Message Types
enum tokenizer_message_kind
{
T_MessageKind_Invalid,
T_MessageKind_Note,
T_MessageKind_Warning,
T_MessageKind_Error,
};
struct tokenizer_message
{
tokenizer_message *Next;
tokenizer_message_kind Kind;
s64 Offset;
string String;
};
struct tokenizer_message_list
{
tokenizer_message *First;
tokenizer_message *Last;
s64 Count;
};
////////////////////////////////
//~ sixten: Text -> Token Types
struct tokenize_result
{
token_array Tokens;
tokenizer_message_list Messages;
};
////////////////////////////////
//~ sixten: Token Filter Helper Functions
inline b32 T_IsComment(token_kind Kind) { return(Kind == TokenKind_Comment); }
inline b32 T_IsWhitespace(token_kind Kind) { return(Kind == TokenKind_Whitespace ||
Kind == TokenKind_Newline); }
inline b32 T_IsIrregular(token_kind Kind) { return(T_IsComment(Kind) ||
T_IsWhitespace(Kind)); }
inline b32 T_IsRegular(token_kind Kind) { return(!T_IsIrregular(Kind)); }
inline b32 T_IsInvalid(token_kind Kind) { return(Kind == TokenKind_None ||
Kind == TokenKind_BrokenComment ||
Kind == TokenKind_BrokenStringLiteral ||
Kind == TokenKind_BadCharacter); }
////////////////////////////////
//~ sixten: Token Type Functions
static string T_StringFromToken(string Text, token Token);
static void T_TokenChunkListPush(arena *Arena, token_chunk_list *List, token Token, s64 MaxTokenCountPerNode);
static token_array T_TokenArrayFromChunkList(arena *Arena, token_chunk_list *List);
////////////////////////////////
//~ sixten: Tokenizer Message Functions
static void T_MessageListPush(arena *Arena, tokenizer_message_list *List, tokenizer_message_kind Kind, s64 Offset, string String);
////////////////////////////////
//~ sixten: Text -> Token Functions
static tokenize_result T_TokenizeFromText(arena *Arena, string Text, tokenizer_filter_function *ExcludeFilter = 0);
#endif //VN_TOKENIZER_H