vn/code/core/core_string.cpp

#define STB_SPRINTF_IMPLEMENTATION
#include "third_party/stb_sprintf.h"

//~ sixten: Char funcitons

inline b32 IsWhitespace(char C)
{
    b32 Result = ((C == ' ') ||
                  (C == '\n') ||
                  (C == '\t') ||
                  (C == '\r'));
    return(Result);
}

inline b32 IsDigit(char C)
{
    b32 Result = ((C >= '0') && (C <= '9'));
    return(Result);
}

inline b32 IsLetter(char C)
{
    b32 Result = ((C >= 'A') && (C <= 'Z')) || ((C >= 'a') && (C <= 'z'));
    return(Result);
}


//~ sixten: String functions

//- sixten: Basic constructors

inline string MakeString(u8 *Data, s64 Count)
{
    string Result = {Count, Data};
    return(Result);
}

inline string MakeString(char *CString)
{
    string Result = {StringLength(CString), (u8 *)CString};
    return(Result);
}

//- sixten: Equality

static b32 AreEqual(string A, string B)
{
    b32 Result = false;
    if(A.Count == B.Count)
    {
        Result = true;

        for(s64 Index = 0;
            Index < A.Count;
            ++Index)
        {
            if(A.Data[Index] != B.Data[Index])
            {
                Result = false;
                break;
            }
        }
    }

    return(Result);
}

//- sixten: Substring

static string Substring(string String, range1_s64 Range)
{
    string Result = MakeString(String.Data + Range.Min, DimOfRange(Range));
    return(Result);
}

static string Prefix(string String, s64 Count)
{
    range1_s64 Range = Range1S64(0, Count);
    string Result = Substring(String, Range);
    return(Result);
}

static string Suffix(string String, s64 Count)
{
    range1_s64 Range = Range1S64(String.Count - Count, String.Count);
    string Result = Substring(String, Range);
    return(Result);
}

//- sixten: Hashing

static u64 HashString(string String)
{
    u64 Result = 5731;
    for(s64 Index = 0;
        Index < String.Count;
        ++Index)
    {
        Result += String.Data[Index];
        Result ^= Result << 13;
        Result ^= Result >> 7;
        Result ^= Result << 17;
    }

    return(Result);
}

//- sixten: Searching

static s64 FirstIndexOf(string String, char Char)
{
    s64 Result = -1;
    for(s64 Index = 0;
        Index < String.Count;
        ++Index)
    {
        if(String.Data[Index] == Char)
        {
            Result = Index;
            break;
        }
    }
    return(Result);
}

static s64 LastIndexOf(string String, char Char)
{
    s64 Result = -1;
    for(s64 Index = String.Count-1;
        Index >= 0;
        --Index)
    {
        if(String.Data[Index] == Char)
        {
            Result = Index;
            break;
        }
    }
    return(Result);
}

static s64 FirstIndexOf(string String, string Sub)
{
    s64 Result = -1;
    if(String.Count >= Sub.Count)
    {
        for(s64 Index = 0;
            Index < String.Count - Sub.Count;
            ++Index)
        {
            string ToCheck = Substring(String, Range1S64(Index, Index + Sub.Count));
            if(AreEqual(ToCheck, Sub))
            {
                Result = Index;
                break;
            }
        }
    }

    return(Result);
}

static s64 LastIndexOf(string String, string Sub)
{
    s64 Result = -1;
    if(String.Count >= Sub.Count)
    {
        for(s64 Index = String.Count - Sub.Count - 1;
            Index >= 0;
            --Index)
        {
            string ToCheck = Substring(String, Range1S64(Index, Index + Sub.Count));
            if(AreEqual(ToCheck, Sub))
            {
                Result = Index;
                break;
            }
        }
    }

    return(Result);
}

//- sixten: Allocation

static string PushString(memory_arena *Arena, string String)
{
    string Result;
    Result.Data = PushArrayNoClear(Arena, u8, String.Count);
    Result.Count = String.Count;
    Copy(Result.Data, String.Data, String.Count);

    return(Result);
}

static string PushFormatVariadic(memory_arena *Arena, char *Format, va_list Arguments)
{
    va_list ArgumentsCopy;
    va_copy(ArgumentsCopy, Arguments);

    string Result;
    Result.Count = stbsp_vsnprintf(0, 0, Format, ArgumentsCopy);
    Result.Data = PushArrayNoClear(Arena, u8, Result.Count + 1);
    Result.Data[Result.Count] = 0;

    stbsp_vsnprintf((char *)Result.Data, (s32)Result.Count + 1, Format, Arguments);

    return(Result);
}

static string PushFormat(memory_arena *Arena, char *Format, ...)
{
    va_list Arguments;
    va_start(Arguments, Format);
    string Result = PushFormatVariadic(Arena, Format, Arguments);
    va_end(Arguments);

    return(Result);
}

static string PushCString(memory_arena *Arena, char *CString)
{
    string String = MakeString(CString);
    string Result = PushString(Arena, String);
    return(Result);
}

//- sixten: Conversion

static s64 ConvertStringToS64(string String)
{
    s64 Result = 0;
    b32 IsNegative = false;

    s64 Index = 0;
    if(String.Data[Index] == '-')
    {
        IsNegative = true;
        ++Index;
    }

    for(;Index < String.Count; ++Index)
    {
        u8 Char = String.Data[Index];
        Assert(IsDigit(Char));
        Result = Result*10 + (Char-'0');
    }

    if(IsNegative)
    {
        Result = -Result;;
    }

    return(Result);
}

static string ConvertS64ToString(memory_arena *Arena, s64 Value)
{
    b32 IsNegative = (Value < 0);
    if(IsNegative)
    {
        Value = -Value;
    }

    s64 DigitCount = (s64)Floor(Log(Max(Value, 1LL)) / Log(10)) + 1;

    s64 TotalBufferCount = DigitCount + IsNegative;

    string String = {TotalBufferCount, PushArray(Arena, u8, TotalBufferCount + 1)};
    String.Data[TotalBufferCount] = 0;

    if(IsNegative)
    {
        String.Data[0] = '-';
    }

    for(s64 Index = 0;
        Index < DigitCount;
        ++Index)
    {
        String.Data[TotalBufferCount - 1 - Index] = '0' + (Value % 10);
        Value /= 10;
    }

    return(String);
}

static string StringFromCodepoint(memory_arena *Arena, u32 Codepoint)
{
    char Buffer[5] = {};
    UTF8FromCodepoint((u8 *)Buffer, Codepoint);

    string Result = PushCString(Arena, Buffer);
    return(Result);
}

//- sixten: Replacing

static string RemoveAll(memory_arena *Arena, string Text, char ToRemove)
{
    //- sixten: get new count
    s64 Occurrences = 0;
    u8 *TextBegin = Text.Data;
    u8 *TextEnd = TextBegin+Text.Count;
    for(u8 *Char = TextBegin; Char != TextEnd; Char += 1)
    {
        if(*Char == ToRemove)
        {
            Occurrences += 1;
        }
    }

    //- sixten: copy over all other bytes
    s64 Count = Text.Count - Occurrences;
    u8 *Data = PushArrayNoClear(Arena, u8, Count + 1);
    Data[Count] = 0;

    s64 Index = 0;
    for(u8 *Char = TextBegin; Char != TextEnd; Char += 1)
    {
        if(*Char != ToRemove)
        {
            Data[Index] = *Char;
            Index += 1;
        }
    }

    string Result = MakeString(Data, Count);
    return(Result);
}

//- sixten: "C Style" strings

static s64 StringLength(char *String)
{
    s64 Result = 0;
    while(*String++)
    {
        ++Result;
    }

    return(Result);
}


//~ sixten: String list

static void AppendString(string_list *List, string String, memory_arena *Arena)
{
    string_node *Node = PushStruct(Arena, string_node);
    Node->String = String;

    List->TotalCount += String.Count;

    DLLInsertLast(List->First, List->Last, Node);
}

static string JoinStringList(string_list *List, memory_arena *Arena)
{
    u8 *Buffer = PushArray(Arena, u8, List->TotalCount + 1);
    Buffer[List->TotalCount] = 0;

    s64 GlobalIndex = 0;

    for(string_node *Node = List->First;
        Node != 0;
        Node = Node->Next)
    {
        string String = Node->String;
        for(s64 Index = 0;
            Index < String.Count;
            ++Index)
        {
            Buffer[GlobalIndex++] = String.Data[Index];
        }
    }

    string Result = MakeString(Buffer, List->TotalCount);
    return(Result);
}

/////////////////////////////////////
//~ sixten: String Chunk Functions
static string_chunk_list MakeStringChunkList(s64 ChunkSize)
{
    string_chunk_list Result = {};
    Result.ChunkSize = ChunkSize;
    return(Result);
}

static string JoinStringChunkList(memory_arena *Arena, string_chunk_list *List)
{
    string Result = {};
    Result.Count = List->TotalCount;
    Result.Data = PushArrayNoClear(Arena, u8, List->TotalCount + 1);
    s64 Index = 0;
    s64 CountRemaining = List->TotalCount;
    for(string_node *Node = List->First; Node != 0; Node = Node->Next)
    {
        string String = Node->String;
        Copy(Result.Data + Index, String.Data, Min(CountRemaining, List->ChunkSize));
        CountRemaining -= List->ChunkSize;
        Index += String.Count;
    }
    return(Result);
}

// sixten(TODO): Incomplete, remove maybe?
static void ReplaceRange(memory_arena *Arena, string_chunk_list *List, string Text, range1_s64 Range)
{
    s64 NewTotalCount = Max(0ULL, List->TotalCount - DimOfRange(Range)) + Text.Count;

    //- sixten: do we need to allocate more chunks?
    if(List->ChunkSize*List->ChunkCount < NewTotalCount)
    {
        s64 ChunksToAlloc = (NewTotalCount - List->ChunkSize*List->ChunkCount)/List->ChunkSize + 1;
        for(s64 Index = 0; Index < ChunksToAlloc; Index += 1)
        {
            if(DLLIsEmpty(List->FirstFree))
            {
                string_node *Node = PushStructNoClear(Arena, string_node);
                Node->String.Count = 0;
                Node->String.Data = PushArrayNoClear(Arena, u8, List->ChunkSize);
                DLLInsertLast(List->First, List->Last, Node);
            }
            else
            {
                string_node *Node = List->FirstFree;
                Node->String.Count = 0;
                DLLRemove(List->FirstFree, List->LastFree, Node);
                DLLInsertLast(List->First, List->Last, Node);
            }
        }

        List->ChunkCount += ChunksToAlloc;
    }

    s64 CountDelta = NewTotalCount - List->TotalCount;

    // sixten: I cannot be bothered enough to figure out the correct implementation for this. However, if I do this - remember that you can rearrange
    // the ordering of the linked list, instead of actually copying over the bytes for the majority of this.
    Assert(AbsoluteValue(CountDelta) < List->ChunkSize);

    //- sixten: find the first and last affected nodes
    s64 FirstAffectedNodeIndex = Range.Min/List->ChunkSize;
    s64 LastAffectedNodeIndex = Range.Max/List->ChunkSize;
    string_node *FirstAffectedNode = List->First;
    for(s64 WalkIndex = 0; WalkIndex < FirstAffectedNodeIndex; WalkIndex += 1)
    {
        FirstAffectedNode = FirstAffectedNode->Next;
    }
    string_node *LastAffectedNode = FirstAffectedNode;
    for(s64 WalkIndex = 0; WalkIndex < LastAffectedNodeIndex-FirstAffectedNodeIndex; WalkIndex += 1)
    {
        LastAffectedNode = LastAffectedNode->Next;
    }

    if(CountDelta >= 0)
    {
        //- sixten: insertion - make room and the copy the data
        s64 WriteOffset = Range.Min%List->ChunkSize;
        for(string_node *Node = List->Last; Node != 0; Node = Node->Prev)
        {
            CopyReverse(Node->String.Data+CountDelta+WriteOffset, Node->String.Data+WriteOffset, List->ChunkSize-CountDelta-WriteOffset);

            if(Node == LastAffectedNode)
            {
                break;
            }
            else
            {
                Copy(Node->String.Data, Node->Prev->String.Data+List->ChunkSize-CountDelta, CountDelta);
            }
        }
        s64 SourceOffset = 0;
        for(string_node *Node = FirstAffectedNode; Node != 0; Node = Node->Next)
        {
            Copy(Node->String.Data+WriteOffset, Text.Data+SourceOffset, Min(List->ChunkSize-WriteOffset, Text.Count-SourceOffset));
            SourceOffset += List->ChunkSize;
            if(Node == LastAffectedNode)
            {
                break;
            }
        }
    }
    else if(CountDelta < 0)
    {
        //- sixten: deletion
    }

    List->TotalCount = NewTotalCount;
}


//~ sixten: Unicode

read_only u8 UTF8Lengths[] =
{
    1, 1, 1, 1, // 000xx
    1, 1, 1, 1,
    1, 1, 1, 1,
    1, 1, 1, 1,
    0, 0, 0, 0, // 100xx
    0, 0, 0, 0,
    2, 2, 2, 2, // 110xx
    3, 3,       // 1110x
    4,          // 11110
    0,          // 11111
};

static string_decode DecodeUTF8Codepoint(u8 *Data, s64 Count)
{
    string_decode Result = {};
    u8 FirstByteMask[] = {0, 0x7F, 0x1F, 0x0F, 0x07};
    u8 FinalShift[] = {0, 18, 12, 6, 0};
    if(Count > 0)
    {
        Result.Codepoint = '#';
        Result.Size = 1;

        u8 Byte = Data[0];
        u8 Length = UTF8Lengths[Byte>>3];
        if(0 < Length && Length <= Count)
        {
            u32 Codepoint = (Byte&FirstByteMask[Length])<<18;
            switch(Length)
            {
                case 4: {Codepoint |= ((Data[3] & 0x3F) << 0);} fallthrough;
                case 3: {Codepoint |= ((Data[2] & 0x3F) << 6);} fallthrough;
                case 2: {Codepoint |= ((Data[1] & 0x3F) << 12);} fallthrough;
                default: break;
            }

            Result.Codepoint = Codepoint >> FinalShift[Length];
            Result.Size = Length;
        }
    }
    return(Result);
}

static u32 EncodeUTF8Codepoint(u8 *Dest, u32 Codepoint)
{
    u32 Size = 0;
    u8 DummyDest[4];
    Dest = Dest?Dest:DummyDest;
    if(Codepoint < (1<<8))
    {
        Dest[0] = Codepoint;
        Size = 1;
    }
    else if (Codepoint < (1 << 11))
    {
        Dest[0] = 0xC0|(Codepoint >> 6);
        Dest[1] = 0x80|(Codepoint & 0x3F);
        Size = 2;
    }
    else if (Codepoint < (1 << 16))
    {
        Dest[0] = 0xE0|(Codepoint >> 12);
        Dest[1] = 0x80|((Codepoint >> 6) & 0x3F);
        Dest[2] = 0x80|(Codepoint & 0x3F);
        Size = 3;
    }
    else if (Codepoint < (1 << 21))
    {
        Dest[0] = 0xF0|(Codepoint >> 18);
        Dest[1] = 0x80|((Codepoint >> 12) & 0x3F);
        Dest[2] = 0x80|((Codepoint >> 6) & 0x3F);
        Dest[3] = 0x80|(Codepoint & 0x3F);
        Size = 4;
    }
    else
    {
        Dest[0] = '#';
        Size = 1;
    }
    return(Size);
}

static string_decode DecodeUTF16Codepoint(u8 *Data, s64 Count)
{
    string_decode Result = {'#', 1};
    if(Data[0] < 0xD800 || 0xDFFF < Data[0])
    {
        Result.Codepoint = Data[0];
        Result.Size = 1;
    }
    else if(Count >= 2)
    {
        if(0xD800 <= Data[0] && Data[0] < 0xDC00 &&
           0xDC00 <= Data[1] && Data[1] < 0xE000)
        {
            Result.Codepoint = ((Data[0] - 0xD800)<<10)|(Data[1]-0xDC00);
            Result.Size = 2;
        }
    }
    return(Result);
}

static u32 EncodeUTF16Codepoint(u16 *Dest, u32 Codepoint)
{
    u32 Size = 0;
    u16 DummyDest[2];
    Dest = Dest?Dest:DummyDest;
    if(Codepoint < 0x10000)
    {
        Dest[0] = Codepoint;
        Size = 1;
    }
    else
    {
        Dest[0] = ((Codepoint - 0x10000) >> 10) + 0xD800;
        Dest[1] = ((Codepoint - 0x10000) & 0x3FF) + 0xDC00;
        Size = 2;
    }
    return(Size);
}

static s64 UTF8IndexFromOffset(string String, s64 Offset)
{
    u8 *StringBegin = String.Data;
    u8 *StringEnd = StringBegin+String.Count;
    u8 *Byte = StringBegin;
    for(;Byte < StringEnd && Offset > 1; Offset -= 1)
    {
        Byte += DecodeUTF8Codepoint(Byte, StringEnd-Byte).Size;
    }
    s64 Result = Byte-StringBegin;
    return(Result);
}

static s64 UTF8OffsetFromIndex(string String, s64 Index)
{
    s64 Offset = 0;
    u8 *StringBegin = String.Data;
    u8 *StringEnd = StringBegin+Min(Index, String.Count);
    u8 *Byte = StringBegin;
    for(;Byte < StringEnd;)
    {
        Offset += 1;
        Byte += DecodeUTF8Codepoint(Byte, StringEnd-Byte).Size;
    }
    return(Offset);
}

static s64 UTF8FromCodepoint(u8 *Out, u32 Codepoint)
{
    s64 Length = 0;
    if(Codepoint <= 0x7F)
    {
        Out[0] = (u8)Codepoint;
        Length = 1;
    }
    else if(Codepoint <= 0x7FF)
    {
        Out[0] = (0x3 << 6) | ((Codepoint >> 6) & 0x1F);
        Out[1] = 0x80       | ( Codepoint       & 0x3F);
        Length = 2;
    }
    else if(Codepoint <= 0xFFFF)
    {
        Out[0] = (0x7 << 5) | ((Codepoint >> 12) & 0x0F);
        Out[1] = 0x80       | ((Codepoint >> 6) & 0x3F);
        Out[2] = 0x80       | ( Codepoint       & 0x3F);
        Length = 3;
    }
    else if(Codepoint <= 0x10FFFF)
    {
        Out[0] = (0xF << 4) | ((Codepoint >> 12) & 0x07);
        Out[1] = 0x80       | ((Codepoint >> 12) & 0x3F);
        Out[2] = 0x80       | ((Codepoint >> 6)  & 0x3F);
        Out[3] = 0x80       | ( Codepoint        & 0x3F);
        Length = 4;
    }
    else
    {
        Out[0] = '?';
        Length = 1;
    }

    return(Length);
}

//~ sixten: Text point

static text_point TextPointFromOffset(string String, s64 Offset)
{
    text_point Point = {1, 1};
    for(s64 Index = 0;
        Index < String.Count && Index < Offset;
        ++Index)
    {
        if(String.Data[Index] == '\n')
        {
            ++Point.Line;
            Point.Column = 1;
        }
        else
        {
            if(String.Data[Index] != '\r')
            {
                ++Point.Column;
            }
        }
    }

    return(Point);
}

static s64 OffsetFromTextPoint(string String, text_point Point)
{
    s64 Offset = 0;

    Point.Line -= 1;
    Point.Column -= 1;

    u8 *StringBegin = String.Data;
    u8 *StringEnd = StringBegin+String.Count;
    u8 *Char = StringBegin;
    //- sixten: find the start of the correct line
    for(;Char < StringEnd && Point.Line > 0; Char += 1, Offset += 1)
    {
        if(*Char == '\n')
        {
            Point.Line -= 1;
        }
    }

    for(;Char < StringEnd && Point.Column > 0; Char += 1, Offset += 1)
    {
        //- sixten: if a newline has been reached, the initial column was out of bounds
        if(*Char == '\n')
        {
            break;
        }
        //- sixten: tabs are two-spaces, so we must take that into account
#if 0
        if(*Char == '\t')
        {
            Point.Column -= 1;
        }
#endif
        Point.Column -= 1;
    }

    return(Offset);
}

static text_range TextRange(text_point A, text_point B)
{
    text_range Result;
    if(A.Line > B.Line || (A.Line == B.Line && A.Column > B.Column))
    {
        Result = {B, A};
    }
    else
    {
        Result = {A, B};
    }
    return(Result);
}

////////////////////////////////
//~ sixten: 1D Interval List & Array Functions
static void Range1S64ListPush(memory_arena *Arena, range1_s64_list *List, range1_s64 Range)
{
    range1_s64_node *Node = PushStructNoClear(Arena, range1_s64_node);
    Node->Range = Range;
    QueuePush(List->First, List->Last, Node);
    List->Count += 1;
}

static range1_s64_array Range1S64ArrayFromList(memory_arena *Arena, range1_s64_list *List)
{
    range1_s64_array Result = {};
    Result.Count = List->Count;
    Result.Ranges = PushArray(Arena, range1_s64, List->Count);
    s64 Index = 0;
    for(range1_s64_node *Node = List->First; Node != 0; Node = Node->Next, Index += 1)
    {
        Result.Ranges[Index] = Node->Range;
    }
    return(Result);
}

static s64 OffsetFromTextPoint(string String, range1_s64_array Lines, text_point Point)
{
    s64 LineIndex = Clamp(Point.Line, 1, Lines.Count) - 1;
    range1_s64 Range = Lines.Ranges[LineIndex];
    s64 ColumnIndex = Clamp(Point.Column, 1, DimOfRange(Range)) - 1;
    s64 Offset = Range.Min+ColumnIndex;
    return(Offset);
}