vn/code/core/core_string.cpp

795 lines
19 KiB
C++

#define STB_SPRINTF_IMPLEMENTATION
#include "third_party/stb_sprintf.h"
//~ sixten: Char funcitons
inline b32 IsWhitespace(char C)
{
b32 Result = ((C == ' ') ||
(C == '\n') ||
(C == '\t') ||
(C == '\r'));
return(Result);
}
inline b32 IsDigit(char C)
{
b32 Result = ((C >= '0') && (C <= '9'));
return(Result);
}
inline b32 IsLetter(char C)
{
b32 Result = ((C >= 'A') && (C <= 'Z')) || ((C >= 'a') && (C <= 'z'));
return(Result);
}
//~ sixten: String functions
//- sixten: Basic constructors
inline string MakeString(u8 *Data, s64 Count)
{
string Result = {Count, Data};
return(Result);
}
inline string MakeString(char *CString)
{
string Result = {StringLength(CString), (u8 *)CString};
return(Result);
}
//- sixten: Equality
static b32 AreEqual(string A, string B)
{
b32 Result = false;
if(A.Count == B.Count)
{
Result = true;
for(s64 Index = 0;
Index < A.Count;
++Index)
{
if(A.Data[Index] != B.Data[Index])
{
Result = false;
break;
}
}
}
return(Result);
}
//- sixten: Substring
static string Substring(string String, range1_s64 Range)
{
string Result = MakeString(String.Data + Range.Min, DimOfRange(Range));
return(Result);
}
static string Prefix(string String, s64 Count)
{
range1_s64 Range = Range1S64(0, Count);
string Result = Substring(String, Range);
return(Result);
}
static string Suffix(string String, s64 Count)
{
range1_s64 Range = Range1S64(String.Count - Count, String.Count);
string Result = Substring(String, Range);
return(Result);
}
//- sixten: Hashing
static u64 HashString(string String)
{
u64 Result = 5731;
for(s64 Index = 0;
Index < String.Count;
++Index)
{
Result += String.Data[Index];
Result ^= Result << 13;
Result ^= Result >> 7;
Result ^= Result << 17;
}
return(Result);
}
//- sixten: Searching
static s64 FirstIndexOf(string String, char Char)
{
s64 Result = -1;
for(s64 Index = 0;
Index < String.Count;
++Index)
{
if(String.Data[Index] == Char)
{
Result = Index;
break;
}
}
return(Result);
}
static s64 LastIndexOf(string String, char Char)
{
s64 Result = -1;
for(s64 Index = String.Count-1;
Index >= 0;
--Index)
{
if(String.Data[Index] == Char)
{
Result = Index;
break;
}
}
return(Result);
}
static s64 FirstIndexOf(string String, string Sub)
{
s64 Result = -1;
if(String.Count >= Sub.Count)
{
for(s64 Index = 0;
Index < String.Count - Sub.Count;
++Index)
{
string ToCheck = Substring(String, Range1S64(Index, Index + Sub.Count));
if(AreEqual(ToCheck, Sub))
{
Result = Index;
break;
}
}
}
return(Result);
}
static s64 LastIndexOf(string String, string Sub)
{
s64 Result = -1;
if(String.Count >= Sub.Count)
{
for(s64 Index = String.Count - Sub.Count - 1;
Index >= 0;
--Index)
{
string ToCheck = Substring(String, Range1S64(Index, Index + Sub.Count));
if(AreEqual(ToCheck, Sub))
{
Result = Index;
break;
}
}
}
return(Result);
}
//- sixten: Allocation
static string PushString(memory_arena *Arena, string String)
{
string Result;
Result.Data = PushArrayNoClear(Arena, u8, String.Count);
Result.Count = String.Count;
Copy(Result.Data, String.Data, String.Count);
return(Result);
}
static string PushFormatVariadic(memory_arena *Arena, char *Format, va_list Arguments)
{
va_list ArgumentsCopy;
va_copy(ArgumentsCopy, Arguments);
string Result;
Result.Count = stbsp_vsnprintf(0, 0, Format, ArgumentsCopy);
Result.Data = PushArrayNoClear(Arena, u8, Result.Count + 1);
Result.Data[Result.Count] = 0;
stbsp_vsnprintf((char *)Result.Data, (s32)Result.Count + 1, Format, Arguments);
return(Result);
}
static string PushFormat(memory_arena *Arena, char *Format, ...)
{
va_list Arguments;
va_start(Arguments, Format);
string Result = PushFormatVariadic(Arena, Format, Arguments);
va_end(Arguments);
return(Result);
}
static string PushCString(memory_arena *Arena, char *CString)
{
string String = MakeString(CString);
string Result = PushString(Arena, String);
return(Result);
}
//- sixten: Conversion
static s64 ConvertStringToS64(string String)
{
s64 Result = 0;
b32 IsNegative = false;
s64 Index = 0;
if(String.Data[Index] == '-')
{
IsNegative = true;
++Index;
}
for(;Index < String.Count; ++Index)
{
u8 Char = String.Data[Index];
Assert(IsDigit(Char));
Result = Result*10 + (Char-'0');
}
if(IsNegative)
{
Result = -Result;;
}
return(Result);
}
static string ConvertS64ToString(memory_arena *Arena, s64 Value)
{
b32 IsNegative = (Value < 0);
if(IsNegative)
{
Value = -Value;
}
s64 DigitCount = (s64)Floor(Log(Max(Value, 1LL)) / Log(10)) + 1;
s64 TotalBufferCount = DigitCount + IsNegative;
string String = {TotalBufferCount, PushArray(Arena, u8, TotalBufferCount + 1)};
String.Data[TotalBufferCount] = 0;
if(IsNegative)
{
String.Data[0] = '-';
}
for(s64 Index = 0;
Index < DigitCount;
++Index)
{
String.Data[TotalBufferCount - 1 - Index] = '0' + (Value % 10);
Value /= 10;
}
return(String);
}
static string StringFromCodepoint(memory_arena *Arena, u32 Codepoint)
{
char Buffer[5] = {};
UTF8FromCodepoint((u8 *)Buffer, Codepoint);
string Result = PushCString(Arena, Buffer);
return(Result);
}
//- sixten: Replacing
static string RemoveAll(memory_arena *Arena, string Text, char ToRemove)
{
//- sixten: get new count
s64 Occurrences = 0;
u8 *TextBegin = Text.Data;
u8 *TextEnd = TextBegin+Text.Count;
for(u8 *Char = TextBegin; Char != TextEnd; Char += 1)
{
if(*Char == ToRemove)
{
Occurrences += 1;
}
}
//- sixten: copy over all other bytes
s64 Count = Text.Count - Occurrences;
u8 *Data = PushArrayNoClear(Arena, u8, Count + 1);
Data[Count] = 0;
s64 Index = 0;
for(u8 *Char = TextBegin; Char != TextEnd; Char += 1)
{
if(*Char != ToRemove)
{
Data[Index] = *Char;
Index += 1;
}
}
string Result = MakeString(Data, Count);
return(Result);
}
//- sixten: "C Style" strings
static s64 StringLength(char *String)
{
s64 Result = 0;
while(*String++)
{
++Result;
}
return(Result);
}
//~ sixten: String list
static void AppendString(string_list *List, string String, memory_arena *Arena)
{
string_node *Node = PushStruct(Arena, string_node);
Node->String = String;
List->TotalCount += String.Count;
DLLInsertLast(List->First, List->Last, Node);
}
static string JoinStringList(string_list *List, memory_arena *Arena)
{
u8 *Buffer = PushArray(Arena, u8, List->TotalCount + 1);
Buffer[List->TotalCount] = 0;
s64 GlobalIndex = 0;
for(string_node *Node = List->First;
Node != 0;
Node = Node->Next)
{
string String = Node->String;
for(s64 Index = 0;
Index < String.Count;
++Index)
{
Buffer[GlobalIndex++] = String.Data[Index];
}
}
string Result = MakeString(Buffer, List->TotalCount);
return(Result);
}
/////////////////////////////////////
//~ sixten: String Chunk Functions
static string_chunk_list MakeStringChunkList(s64 ChunkSize)
{
string_chunk_list Result = {};
Result.ChunkSize = ChunkSize;
return(Result);
}
static string JoinStringChunkList(memory_arena *Arena, string_chunk_list *List)
{
string Result = {};
Result.Count = List->TotalCount;
Result.Data = PushArrayNoClear(Arena, u8, List->TotalCount + 1);
s64 Index = 0;
s64 CountRemaining = List->TotalCount;
for(string_node *Node = List->First; Node != 0; Node = Node->Next)
{
string String = Node->String;
Copy(Result.Data + Index, String.Data, Min(CountRemaining, List->ChunkSize));
CountRemaining -= List->ChunkSize;
Index += String.Count;
}
return(Result);
}
// sixten(TODO): Incomplete, remove maybe?
static void ReplaceRange(memory_arena *Arena, string_chunk_list *List, string Text, range1_s64 Range)
{
s64 NewTotalCount = Max(0ULL, List->TotalCount - DimOfRange(Range)) + Text.Count;
//- sixten: do we need to allocate more chunks?
if(List->ChunkSize*List->ChunkCount < NewTotalCount)
{
s64 ChunksToAlloc = (NewTotalCount - List->ChunkSize*List->ChunkCount)/List->ChunkSize + 1;
for(s64 Index = 0; Index < ChunksToAlloc; Index += 1)
{
if(DLLIsEmpty(List->FirstFree))
{
string_node *Node = PushStructNoClear(Arena, string_node);
Node->String.Count = 0;
Node->String.Data = PushArrayNoClear(Arena, u8, List->ChunkSize);
DLLInsertLast(List->First, List->Last, Node);
}
else
{
string_node *Node = List->FirstFree;
Node->String.Count = 0;
DLLRemove(List->FirstFree, List->LastFree, Node);
DLLInsertLast(List->First, List->Last, Node);
}
}
List->ChunkCount += ChunksToAlloc;
}
s64 CountDelta = NewTotalCount - List->TotalCount;
// sixten: I cannot be bothered enough to figure out the correct implementation for this. However, if I do this - remember that you can rearrange
// the ordering of the linked list, instead of actually copying over the bytes for the majority of this.
Assert(AbsoluteValue(CountDelta) < List->ChunkSize);
//- sixten: find the first and last affected nodes
s64 FirstAffectedNodeIndex = Range.Min/List->ChunkSize;
s64 LastAffectedNodeIndex = Range.Max/List->ChunkSize;
string_node *FirstAffectedNode = List->First;
for(s64 WalkIndex = 0; WalkIndex < FirstAffectedNodeIndex; WalkIndex += 1)
{
FirstAffectedNode = FirstAffectedNode->Next;
}
string_node *LastAffectedNode = FirstAffectedNode;
for(s64 WalkIndex = 0; WalkIndex < LastAffectedNodeIndex-FirstAffectedNodeIndex; WalkIndex += 1)
{
LastAffectedNode = LastAffectedNode->Next;
}
if(CountDelta >= 0)
{
//- sixten: insertion - make room and the copy the data
s64 WriteOffset = Range.Min%List->ChunkSize;
for(string_node *Node = List->Last; Node != 0; Node = Node->Prev)
{
CopyReverse(Node->String.Data+CountDelta+WriteOffset, Node->String.Data+WriteOffset, List->ChunkSize-CountDelta-WriteOffset);
if(Node == LastAffectedNode)
{
break;
}
else
{
Copy(Node->String.Data, Node->Prev->String.Data+List->ChunkSize-CountDelta, CountDelta);
}
}
s64 SourceOffset = 0;
for(string_node *Node = FirstAffectedNode; Node != 0; Node = Node->Next)
{
Copy(Node->String.Data+WriteOffset, Text.Data+SourceOffset, Min(List->ChunkSize-WriteOffset, Text.Count-SourceOffset));
SourceOffset += List->ChunkSize;
if(Node == LastAffectedNode)
{
break;
}
}
}
else if(CountDelta < 0)
{
//- sixten: deletion
}
List->TotalCount = NewTotalCount;
}
//~ sixten: Unicode
read_only u8 UTF8Lengths[] =
{
1, 1, 1, 1, // 000xx
1, 1, 1, 1,
1, 1, 1, 1,
1, 1, 1, 1,
0, 0, 0, 0, // 100xx
0, 0, 0, 0,
2, 2, 2, 2, // 110xx
3, 3, // 1110x
4, // 11110
0, // 11111
};
static string_decode DecodeUTF8Codepoint(u8 *Data, s64 Count)
{
string_decode Result = {};
u8 FirstByteMask[] = {0, 0x7F, 0x1F, 0x0F, 0x07};
u8 FinalShift[] = {0, 18, 12, 6, 0};
if(Count > 0)
{
Result.Codepoint = '#';
Result.Size = 1;
u8 Byte = Data[0];
u8 Length = UTF8Lengths[Byte>>3];
if(0 < Length && Length <= Count)
{
u32 Codepoint = (Byte&FirstByteMask[Length])<<18;
switch(Length)
{
case 4: {Codepoint |= ((Data[3] & 0x3F) << 0);} fallthrough;
case 3: {Codepoint |= ((Data[2] & 0x3F) << 6);} fallthrough;
case 2: {Codepoint |= ((Data[1] & 0x3F) << 12);} fallthrough;
default: break;
}
Result.Codepoint = Codepoint >> FinalShift[Length];
Result.Size = Length;
}
}
return(Result);
}
static u32 EncodeUTF8Codepoint(u8 *Dest, u32 Codepoint)
{
u32 Size = 0;
u8 DummyDest[4];
Dest = Dest?Dest:DummyDest;
if(Codepoint < (1<<8))
{
Dest[0] = Codepoint;
Size = 1;
}
else if (Codepoint < (1 << 11))
{
Dest[0] = 0xC0|(Codepoint >> 6);
Dest[1] = 0x80|(Codepoint & 0x3F);
Size = 2;
}
else if (Codepoint < (1 << 16))
{
Dest[0] = 0xE0|(Codepoint >> 12);
Dest[1] = 0x80|((Codepoint >> 6) & 0x3F);
Dest[2] = 0x80|(Codepoint & 0x3F);
Size = 3;
}
else if (Codepoint < (1 << 21))
{
Dest[0] = 0xF0|(Codepoint >> 18);
Dest[1] = 0x80|((Codepoint >> 12) & 0x3F);
Dest[2] = 0x80|((Codepoint >> 6) & 0x3F);
Dest[3] = 0x80|(Codepoint & 0x3F);
Size = 4;
}
else
{
Dest[0] = '#';
Size = 1;
}
return(Size);
}
static string_decode DecodeUTF16Codepoint(u8 *Data, s64 Count)
{
string_decode Result = {'#', 1};
if(Data[0] < 0xD800 || 0xDFFF < Data[0])
{
Result.Codepoint = Data[0];
Result.Size = 1;
}
else if(Count >= 2)
{
if(0xD800 <= Data[0] && Data[0] < 0xDC00 &&
0xDC00 <= Data[1] && Data[1] < 0xE000)
{
Result.Codepoint = ((Data[0] - 0xD800)<<10)|(Data[1]-0xDC00);
Result.Size = 2;
}
}
return(Result);
}
static u32 EncodeUTF16Codepoint(u16 *Dest, u32 Codepoint)
{
u32 Size = 0;
u16 DummyDest[2];
Dest = Dest?Dest:DummyDest;
if(Codepoint < 0x10000)
{
Dest[0] = Codepoint;
Size = 1;
}
else
{
Dest[0] = ((Codepoint - 0x10000) >> 10) + 0xD800;
Dest[1] = ((Codepoint - 0x10000) & 0x3FF) + 0xDC00;
Size = 2;
}
return(Size);
}
static s64 UTF8IndexFromOffset(string String, s64 Offset)
{
u8 *StringBegin = String.Data;
u8 *StringEnd = StringBegin+String.Count;
u8 *Byte = StringBegin;
for(;Byte < StringEnd && Offset > 1; Offset -= 1)
{
Byte += DecodeUTF8Codepoint(Byte, StringEnd-Byte).Size;
}
s64 Result = Byte-StringBegin;
return(Result);
}
static s64 UTF8OffsetFromIndex(string String, s64 Index)
{
s64 Offset = 0;
u8 *StringBegin = String.Data;
u8 *StringEnd = StringBegin+Min(Index, String.Count);
u8 *Byte = StringBegin;
for(;Byte < StringEnd;)
{
Offset += 1;
Byte += DecodeUTF8Codepoint(Byte, StringEnd-Byte).Size;
}
return(Offset);
}
static s64 UTF8FromCodepoint(u8 *Out, u32 Codepoint)
{
s64 Length = 0;
if(Codepoint <= 0x7F)
{
Out[0] = (u8)Codepoint;
Length = 1;
}
else if(Codepoint <= 0x7FF)
{
Out[0] = (0x3 << 6) | ((Codepoint >> 6) & 0x1F);
Out[1] = 0x80 | ( Codepoint & 0x3F);
Length = 2;
}
else if(Codepoint <= 0xFFFF)
{
Out[0] = (0x7 << 5) | ((Codepoint >> 12) & 0x0F);
Out[1] = 0x80 | ((Codepoint >> 6) & 0x3F);
Out[2] = 0x80 | ( Codepoint & 0x3F);
Length = 3;
}
else if(Codepoint <= 0x10FFFF)
{
Out[0] = (0xF << 4) | ((Codepoint >> 12) & 0x07);
Out[1] = 0x80 | ((Codepoint >> 12) & 0x3F);
Out[2] = 0x80 | ((Codepoint >> 6) & 0x3F);
Out[3] = 0x80 | ( Codepoint & 0x3F);
Length = 4;
}
else
{
Out[0] = '?';
Length = 1;
}
return(Length);
}
//~ sixten: Text point
static text_point TextPointFromOffset(string String, s64 Offset)
{
text_point Point = {1, 1};
for(s64 Index = 0;
Index < String.Count && Index < Offset;
++Index)
{
if(String.Data[Index] == '\n')
{
++Point.Line;
Point.Column = 1;
}
else
{
if(String.Data[Index] != '\r')
{
++Point.Column;
}
}
}
return(Point);
}
static s64 OffsetFromTextPoint(string String, text_point Point)
{
s64 Offset = 0;
Point.Line -= 1;
Point.Column -= 1;
u8 *StringBegin = String.Data;
u8 *StringEnd = StringBegin+String.Count;
u8 *Char = StringBegin;
//- sixten: find the start of the correct line
for(;Char < StringEnd && Point.Line > 0; Char += 1, Offset += 1)
{
if(*Char == '\n')
{
Point.Line -= 1;
}
}
for(;Char < StringEnd && Point.Column > 0; Char += 1, Offset += 1)
{
//- sixten: if a newline has been reached, the initial column was out of bounds
if(*Char == '\n')
{
break;
}
//- sixten: tabs are two-spaces, so we must take that into account
#if 0
if(*Char == '\t')
{
Point.Column -= 1;
}
#endif
Point.Column -= 1;
}
return(Offset);
}
static text_range TextRange(text_point A, text_point B)
{
text_range Result;
if(A.Line > B.Line || (A.Line == B.Line && A.Column > B.Column))
{
Result = {B, A};
}
else
{
Result = {A, B};
}
return(Result);
}
////////////////////////////////
//~ sixten: 1D Interval List & Array Functions
static void Range1S64ListPush(memory_arena *Arena, range1_s64_list *List, range1_s64 Range)
{
range1_s64_node *Node = PushStructNoClear(Arena, range1_s64_node);
Node->Range = Range;
QueuePush(List->First, List->Last, Node);
List->Count += 1;
}
static range1_s64_array Range1S64ArrayFromList(memory_arena *Arena, range1_s64_list *List)
{
range1_s64_array Result = {};
Result.Count = List->Count;
Result.Ranges = PushArray(Arena, range1_s64, List->Count);
s64 Index = 0;
for(range1_s64_node *Node = List->First; Node != 0; Node = Node->Next, Index += 1)
{
Result.Ranges[Index] = Node->Range;
}
return(Result);
}
static s64 OffsetFromTextPoint(string String, range1_s64_array Lines, text_point Point)
{
s64 LineIndex = Clamp(Point.Line, 1, Lines.Count) - 1;
range1_s64 Range = Lines.Ranges[LineIndex];
s64 ColumnIndex = Clamp(Point.Column, 1, DimOfRange(Range)) - 1;
s64 Offset = Range.Min+ColumnIndex;
return(Offset);
}