//
// File: hzString.cpp
//
// Legal Notice: This file is part of the HadronZoo C++ Class Library. Copyright 2025 HadronZoo Project (http://www.hadronzoo.com)
//
// The HadronZoo C++ Class Library is free software: You can redistribute it, and/or modify it under the terms of the GNU Lesser General Public License, as published by the Free
// Software Foundation, either version 3 of the License, or any later version.
//
// The HadronZoo C++ Class Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
// A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License along with the HadronZoo C++ Class Library. If not, see http://www.gnu.org/licenses.
//
#include <iostream>
#include <stdarg.h>
#include <execinfo.h>
#include "hzChars.h"
#include "hzProcess.h"
#include "hzTextproc.h"
#include "hzString.h"
#include "hzTmplMapS.h"
/*
** String addresses are unsigned 32-bit numbers with the most significant 16-bits indicating the block and the least significant 16-bits indicating the position within the block.
*/
#define HZ_STR_OVERSIZE 16379 // 16384 (minus 5 bytes for copy count, len indicator and NULL terminator). Above this size, strings are allocated from the heap.
#define HZ_STRADDR_BLKID 0xffff0000 // Block part of address
#define HZ_STRING_SBSPACE 65536 // Total size of the string space blocks in multiples of 4 bytes
/*
** Definitions
*/
struct _strBloc
{
// Small string space superblock
uint32_t m_blkSelf ; // Address of block
uint32_t m_Usage ; // Space used (so position of free space)
uint32_t m_Space[HZ_STRING_SBSPACE] ; // Areas for string spaces
} ;
class _strItem
{
// hzString control area (start of string).
public:
uchar m_copy ; // Copy count (min value 1)
uchar m_size[1] ; // Start of serial integer size (1 to 3 bytes)
char m_resv[2] ; // Start of data in the case of bytes < 128 bytes
_strItem (void) { m_copy = 0 ; m_size[0] = 0 ; m_resv[0] = m_resv[1] = 0 ; }
void _setSize (uint32_t nSize)
{
if (nSize < 128)
m_size[0] = nSize ;
else if (nSize < HZ_STR_OVERSIZE)
{
m_size[0] = 128 + (nSize / 256) ;
m_size[1] = nSize % 256 ;
}
else if (nSize < HZSTRING_MAXLEN)
{
m_size[0] = 192 + (nSize / 0xffff) ;
m_size[1] = (nSize & 0xff00) >> 8 ;
m_size[2] = nSize & 0xff ;
}
else
{
// Invalid size
m_size[0] = m_size[1] = m_size[2] = 0xff ;
}
}
uint32_t _getSize (void)
{
if (m_size[0] & 0x80)
{
if (m_size[0] & 0x40)
return ((m_size[0] & 0x3f) << 16) + (m_size[1] << 8) + m_size[2] ;
return ((m_size[0] & 0x3f) << 8) + m_size[1] ;
}
return m_size[0] ;
}
/*
uint32_t _factor (void)
{
// Actual length in bytes including copy count, serial integer length, the string value and the NULL terminator.
if (m_size[0] & 0x80)
{
if (m_size[0] & 0x40)
return ((m_size[0] & 0x3f) << 16) + (m_size[1] << 8) + m_size[2] + 5 ;
return ((m_size[0] & 0x3f) << 8) + m_size[1] + 4 ;
}
return m_size[0] + 3 ;
}
*/
char* _data (void)
{
// Note that in setting a string, _setSize must be done first before using the pointer returned by this function to set the string value
if (m_size[0] & 0x80)
{
if (m_size[0] & 0x40)
return m_resv + 2 ;
return m_resv + 1 ;
}
return m_resv ;
}
} ;
class _strRegime
{
public:
hzArray <_strBloc*> m_Super ; // Array of superblocks
hzMapS <uint32_t,uint32_t> m_Flists ; // Freelists - Size:Freelist
hzMapS <uint32_t,void*> m_Heap ; // Heap allocated oversized strings
_strBloc* m_pTopBlock ; // Latest small string space superblock (only one from which new string spaces can be allocated)
// Locks
//hzLockRWD m_lockSmall[32] ; // Small string allocation locks
hzLockRWD m_lockSbloc ; // Lock for allocating superblocks
hzLockRWD m_lockOsize ; // Lock for allocating/freeing of oversize strings
uint32_t m_nOver ; // Population of oversize strings
_strRegime ()
{
m_pTopBlock = 0 ;
}
} ;
/*
** Variables
*/
global hzSet<hzString> _hzGlobal_setStrings ; // Global string repository
static _strRegime* s_pStrRegime ; // The one and only string regime
global const hzString _hzGlobal_nullString ; // Null string
global const hzString _hz_null_string ; // Null string
global const hzString _hzString_TooLong = "-- String Too Long --" ; // To be returned when limit exceeded
global const hzString _hzString_Fault = "-- String Fault --" ; // To be returned when string corrupted
static char _hz_NullBuffer [8] ;
void* _strXlate (uint32_t strAddr)
{
//_hzfunc(__func__) ;
_strBloc* pBloc ; // Pointer to superblock
uint32_t* pSeg ; // Data segment
uint32_t blocNo ; // Offset within vector of blocks
uint32_t slotNo ; // String slot within block
if (!strAddr)
return 0 ;
slotNo = strAddr & 0xffff ;
blocNo = (strAddr & HZ_STRADDR_BLKID) >> 16 ;
if (!blocNo)
{
// Oversized designation
return s_pStrRegime->m_Heap[strAddr] ;
}
// String not oversized
if (blocNo > s_pStrRegime->m_Super.Count())
hzexit(E_CORRUPT, "Cannot xlate address %u:%u. No such superblock (%u issued)\n", blocNo, slotNo, s_pStrRegime->m_Super.Count()) ;
pBloc = s_pStrRegime->m_Super[blocNo-1] ;
if (!pBloc)
hzexit(E_CORRUPT, "strXlate: No block found for address %u:%u. Total of %u superblocks issued)\n", blocNo, slotNo, s_pStrRegime->m_Super.Count()) ;
pSeg = pBloc->m_Space + slotNo ;
return (void*) pSeg ;
}
uint32_t _strAlloc (uint32_t nSize)
{
// Allocate string space. The actual space allocated will be sufficient for a string of length nSize, plus the copy count, length indicator and NULL terminator.
//
// Argument: nSize Required string length (excluding copy count, length indicator and null terminator)
//
// Returns: Pointer to the required string space. If is a fatal condition if this cannot be obtained.
_hzfunc(__func__) ;
_strBloc* pBloc = 0 ; // Pointer to superblock
uint32_t* pSlot = 0 ; // Pointer to freelist slot
hzLockS* pLock ; // Cast to hzLock (of first 4 bytes of free space)
void* pVoid ; // Address allocated as void*
uint32_t* pSeg = 0 ; // Pointer to segment
uint32_t strAddr = 0 ; // Address of string (block + slot)
uint32_t nUnit ; // Number of 4/8-byte units required
uint32_t blocNo ; // Offset within vector of blocks
uint32_t slotNo ; // String slot within block
if (!s_pStrRegime)
s_pStrRegime = new _strRegime() ;
if (!nSize)
hzexit(E_NODATA, "Cannot allocate a zero size string") ;
if (nSize < 128)
nSize += 3 ;
else if (nSize < HZ_STR_OVERSIZE)
nSize += 4 ;
else
nSize += 5 ;
// Deal with oversized allocations
if (nSize >= HZ_STR_OVERSIZE)
{
nUnit = (nSize/8) + (nSize%8 ? 1:0) ;
strAddr = s_pStrRegime->m_Heap.Count() + 1 ;
pVoid = (void*) new char[nUnit * 8] ;
s_pStrRegime->m_Heap.Insert(strAddr, pVoid) ;
return strAddr ;
}
// String not oversized
nUnit = (nSize/4) + (nSize%4 ? 1:0) ;
if (nUnit < 2)
nUnit = 2 ;
// Is there a free list for the exact size?
/*
if (s_pStrRegime->m_Flists.Exists(nUnit))
{
strAddr = s_pStrRegime->m_Flists[nUnit] ;
slotNo = strAddr & 0xffff ;
blocNo = (strAddr & HZ_STRADDR_BLKID) >> 16 ;
pBloc = s_pStrRegime->m_Super[blocNo] ;
if (!pBloc)
hzexit(E_CORRUPT, "Case 1 Illegal String Address %u:%u", blocNo, slotNo) ;
pSlot = pBloc->m_Space + slotNo ;
if (pSlot[0])
goto top ;
pLock = (hzLockS*) pSlot ;
pLock->Lock() ;
s_pStrRegime->m_Flists[nUnit] = pSlot[1] ; //->m_fleNext ;
pLock->Unlock() ;
memset(pSlot, 0, nUnit * 4) ;
return strAddr ;
}
*/
top:
// No free slots so create another superblock
if (_hzGlobal_MT)
s_pStrRegime->m_lockSbloc.LockWrite() ;
if (!s_pStrRegime->m_pTopBlock || ((s_pStrRegime->m_pTopBlock->m_Usage + nUnit) >= HZ_STRING_SBSPACE))
{
// Assign any remaining free space on the highest block to the small freelist of the size
// Then create a new highest block
s_pStrRegime->m_pTopBlock = pBloc = new _strBloc() ;
memset(pBloc, 0, sizeof(_strBloc)) ;
s_pStrRegime->m_Super.Add(pBloc) ;
pBloc->m_blkSelf = s_pStrRegime->m_Super.Count() ;
pBloc->m_Usage = 0 ;
printf("CREATED SUPERBLOCK %u at %p\n", pBloc->m_blkSelf, pBloc) ;
}
// Assign from the superblock free space
pSeg = s_pStrRegime->m_pTopBlock->m_Space + s_pStrRegime->m_pTopBlock->m_Usage ;
strAddr = ((s_pStrRegime->m_pTopBlock->m_blkSelf) << 16) + s_pStrRegime->m_pTopBlock->m_Usage ;
s_pStrRegime->m_pTopBlock->m_Usage += nUnit ;
memset(pSeg, 0, nUnit * 4) ;
if (_hzGlobal_MT)
s_pStrRegime->m_lockSbloc.Unlock() ;
//printf("Allocated %d (%u) bytes at %u:%u\n", nSize, nUnit * 4, (strAddr&0xffff0000)>>16, (strAddr&0xffff)) ;
return strAddr ;
}
void _strFree (uint32_t strAddr, uint32_t nSize)
{
// Places object in freelist if it is of one of the precribed sizes, otherwise it frees it from the OS managed heap
//
// Arguments: 1) pMemobj A pointer to what is assumed to be string space to be freed
// 2) nSize The size of the string space
//
// Returns: None
_hzfunc(__func__) ;
_strItem* pItem ; // Start of string space
uint32_t* pSlot ; // Start of string space (as freelist entry)
uint32_t nUnit ; // Number of 4-byte units in string being freed
uint32_t blocNo ; // Offset within vector of blocks
uint32_t slotNo ; // String slot within block
if (!strAddr)
hzexit(E_CORRUPT, "NULL address") ;
slotNo = strAddr & 0xffff ;
blocNo = (strAddr & HZ_STRADDR_BLKID) >> 16 ;
if (!nSize)
hzexit(E_CORRUPT, "WARNING freeing zero size item %u:%u", blocNo, slotNo) ;
pItem = (_strItem*) _strXlate(strAddr) ;
if (!blocNo || nSize >= HZ_STR_OVERSIZE)
{
// Heap
printf("Deleting heap item %u:%u (size %d)\n", blocNo, slotNo, nSize) ;
pItem = (_strItem*) s_pStrRegime->m_Heap[strAddr-1] ;
delete pItem ;
s_pStrRegime->m_Heap.Delete(strAddr) ;
return ;
}
//return ;
//printf("Deleting item %u:%u (size %d) %s\n", blocNo, slotNo, nSize, pItem->_data()) ;
// Take account of the copy count (1 byte), the length indicator (1 to 3 bytes), and the NULL terminator
if (nSize < 128)
nSize += 3 ;
else if (nSize < 16384)
nSize += 4 ;
else
nSize += 5 ;
nUnit = (nSize/4) + (nSize%4 ? 1:0) ;
if (nUnit < 2)
nUnit = 2 ;
if (_hzGlobal_MT)
s_pStrRegime->m_lockSbloc.LockWrite() ;
pSlot = (uint32_t*) pItem ;
pSlot[0] = 0 ;
if (!s_pStrRegime->m_Flists.Exists(nUnit))
{
//pSlot->m_fleNext = 0 ;
s_pStrRegime->m_Flists.Insert(nUnit, strAddr) ;
}
else
{
//pSlot->m_fleNext = s_pStrRegime->m_Flists[nUnit] ;
pSlot[1] = s_pStrRegime->m_Flists[nUnit] ;
s_pStrRegime->m_Flists[nUnit] = strAddr ;
}
if (_hzGlobal_MT)
s_pStrRegime->m_lockSbloc.Unlock() ;
}
/*
** hzString Constructors/Destructors
*/
hzString::hzString (void)
{
_hzfunc("hzString::hzString") ;
m_addr = 0 ;
_hzGlobal_Memstats.m_numStrings++ ;
}
hzString::hzString (const char* pStr)
{
_hzfunc("hzString::hzString(char*)") ;
m_addr = 0 ;
_hzGlobal_Memstats.m_numStrings++ ;
operator=(pStr) ;
}
hzString::hzString (const hzString& op)
{
_hzfunc("hzString::hzString(copy)") ;
m_addr = 0 ;
_hzGlobal_Memstats.m_numStrings++ ;
operator=(op) ;
}
hzString::~hzString (void)
{
_hzfunc("hzString::~hzString") ;
if (m_addr)
Clear() ;
_hzGlobal_Memstats.m_numStrings-- ;
}
/*
** hzString private methods
*/
int32_t hzString::_cmp (const hzString& s) const
{
// Case sensitive compare based on strcmp
//
// Argument: s The test string
//
// Returns: -1 If this string is less than the test string
// 1 If ths string is greater than the test string
// 0 If the two strings are equal
_hzfunc("hzString::_cmp(hzString&)") ;
_strItem* thisCtl ; // This string control area
_strItem* suppCtl ; // Supplied string control area
if (!m_addr) return s.m_addr ? -1 : 0 ;
if (!s.m_addr) return m_addr ? 1 : 0 ;
if (m_addr == s.m_addr) return 0 ;
thisCtl = (_strItem*) _strXlate(m_addr) ;
suppCtl = (_strItem*) _strXlate(s.m_addr) ;
return strcmp(thisCtl->_data(), suppCtl->_data()) ;
}
int32_t hzString::_cmpI (const hzString& s) const
{
// Case insensitive compare based on strcasecmp
//
// Arguments: 1) s The test string
//
// Returns: -1 If this string is less than the test string
// 1 If ths string is greater than the test string
// 0 If the two strings are equal
_hzfunc("hzString::_cmpI(hzString&)") ;
_strItem* thisCtl ; // This string control area
_strItem* suppCtl ; // Supplied string control area
if (!m_addr) return s.m_addr ? -1 : 0 ;
if (!s.m_addr) return m_addr ? 1 : 0 ;
if (m_addr == s.m_addr) return 0 ;
thisCtl = (_strItem*) _strXlate(m_addr) ;
suppCtl = (_strItem*) _strXlate(s.m_addr) ;
return strcasecmp(thisCtl->_data(), suppCtl->_data()) ;
}
int32_t hzString::_cmp (const char* s) const
{
// Case sensitive compare based on strcmp
//
// Arguments: 1) s The null terminated test string
//
// Returns: -1 If this string is less than the test string
// 1 If ths string is greater than the test string
// 0 If the two strings are equal
_hzfunc("hzString::_cmp(char*)") ;
_strItem* thisCtl ; // This string control area
if (!m_addr) return (!s || !s[0]) ? 0 : -1 ;
if (!s || !s[0]) return m_addr ? 1 : 0 ;
thisCtl = (_strItem*) _strXlate(m_addr) ;
return strcmp(thisCtl->_data(), s) ;
}
int32_t hzString::_cmpI (const char* s) const
{
// Case insensitive compare based on strcasecmp
//
// Arguments: 1) s The null terminated test string
//
// Returns: -1 If this string is less than the test string
// 1 If ths string is greater than the test string
// 0 If the two strings are equal
_hzfunc("hzString::_cmpI(char*)") ;
_strItem* thisCtl ; // This string's control area
if (!m_addr) return (!s || !s[0]) ? 0 : -1 ;
if (!s || !s[0]) return m_addr ? 1 : 0 ;
thisCtl = (_strItem*) _strXlate(m_addr) ;
return strcasecmp(thisCtl->_data(), s) ;
}
int32_t hzString::_cmpF (const hzString& tS) const
{
// Fast compare. This provides a string comparison based on 32-bit chunks rather than bytes. It is faster but only the equivelent to lexical comparison in
// big-endian architetures. This method should be considered only for unordered collections where only uniqueness of entries is important.
//
// Arguments: 1) S The test string
//
// Returns: -1 If this string is less than the test string
// 1 If ths string is greater than the test string
// 0 If the two strings are equal
_hzfunc("hzString::_cmpF") ;
_strItem* thisCtl ; // This string's control area
_strItem* suppCtl ; // This string's control area
if (!m_addr) return tS.m_addr ? -1 : 0 ;
if (!tS.m_addr) return 1 ;
thisCtl = (_strItem*) _strXlate(m_addr) ;
suppCtl = (_strItem*) _strXlate(tS.m_addr) ;
if (thisCtl->_getSize() < suppCtl->_getSize()) return -1 ;
if (thisCtl->_getSize() > suppCtl->_getSize()) return 1 ;
int32_t* pA ; // Pointer into data this
int32_t* pB ; // Pointer into data supplied
uint32_t n ; // Offset
uint32_t x ; // Counter (4 byte increments)
pA = (int32_t*) thisCtl->_data() ;
pB = (int32_t*) suppCtl->_data() ;
for (x = n = 0 ; x < thisCtl->_getSize() ; x += 4, n++)
{
if (pA[n] < pB[n]) return -1 ;
if (pA[n] > pB[n]) return 1 ;
}
return 0 ;
}
bool hzString::_feq (const hzString& tS) const
{
// Determine equality by fast compare technique. This works in both big and little endian architectures.
//
// Arguments: 1) tS The test string
//
// Returns: True If this string is lexically equal to the supplied test string
// False Otherwise
_hzfunc("hzString::_feq") ;
_strItem* thisCtl ; // This string's control area
_strItem* suppCtl ; // This string's control area
if (m_addr == tS.m_addr)
return true ;
if (!m_addr) return tS.m_addr ? false : true ;
if (!tS.m_addr) return false ;
thisCtl = (_strItem*) _strXlate(m_addr) ;
suppCtl = (_strItem*) _strXlate(tS.m_addr) ;
if (thisCtl->_getSize() != suppCtl->_getSize())
return false ;
int32_t* pA ; // Pointer into data this
int32_t* pB ; // Pointer into data supplied
uint32_t n ; // Offset
uint32_t x ; // Counter (4 byte increments)
pA = (int32_t*) thisCtl->_data() ;
pB = (int32_t*) suppCtl->_data() ;
for (x = n = 0 ; x < thisCtl->_getSize() ; x += 4, n++)
if (pA[n] != pB[n]) return false ;
return true ;
}
/*
** hzString internal methods
*/
void hzString::_inc_copy (void) const
{
_strItem* thisCtl ; // This string's control area
thisCtl = (_strItem*) _strXlate(m_addr) ;
if (thisCtl->m_copy && thisCtl->m_copy < 50)
thisCtl->m_copy++ ;
}
void hzString::_dec_copy (void) const
{
_strItem* thisCtl ; // This string's control area
thisCtl = (_strItem*) _strXlate(m_addr) ;
if (thisCtl->m_copy <= 1)
printf("WARNING: STR _dec_copy would zero copy count\n") ;
else
thisCtl->m_copy-- ;
}
/*
** hzString public methods
*/
uint32_t hzString::Length (void) const
{
// Returns length of string, not including a null terminator
//
// Arguments: None
// Returns: Data length
_hzfunc("hzString::Length") ;
_strItem* thisCtl ; // This string's control area
if (!m_addr)
return 0 ;
thisCtl = (_strItem*) _strXlate(m_addr) ;
return thisCtl->_getSize() ;
}
uint32_t hzString::Copies (void) const
{
// Returns numbers of copies for diagnostics
//
// Arguments: None
// Returns: Data length
_hzfunc("hzString::Copies") ;
_strItem* thisCtl ; // This string's control area
if (!m_addr)
return 0 ;
thisCtl = (_strItem*) _strXlate(m_addr) ;
return thisCtl->m_copy ;
}
const char* hzString::operator* (void) const
{
// Returns the string data (a null terminated string)
//
// Arguments: None
// Returns: Content as null terminated string
_hzfunc("hzString::operator*") ;
_strItem* thisCtl ; // This string's control area
if (!this)
hzexit(E_CORRUPT, "No instance") ;
if (!m_addr)
return 0 ;
thisCtl = (_strItem*) _strXlate(m_addr) ;
return thisCtl->_data() ;
}
hzString::operator const char* (void) const
{
// Returns the string data (a null terminated string)
//
// Arguments: None
// Returns: Content as null terminated string
_hzfunc("hzString::operator const char*") ;
_strItem* thisCtl ; // This string's control area
if (!this)
hzexit(E_CORRUPT, "No instance") ;
if (!m_addr)
return 0 ;
thisCtl = (_strItem*) _strXlate(m_addr) ;
return thisCtl->_data() ;
}
void hzString::Clear (void)
{
// Clear this string.
//
// The end result will be this string has a null pointer and the string space that was being pointed to, has its copy count reduced by 1. If this means the
// copy count falls to zero, then the string space shall be freed.
//
// Clears the string. Note that if other string instances share the same internal string space (have equal contents) then all that occurs is a decrement of
// the copy count. This leaves the string value intact maintaining the integrity of the other string instances. Only if there are no other string instances
// sharing the internal string space (copy count is zero) is the internal string space is released (deleted). In both cases the local internal string space
// pointer is then set to null. Subsequent setting of this hzString instance will then allocate fresh memory.
//
// Arguments: None
// Returns: None
_hzfunc("hzString::Clear()") ;
_strItem* thisCtl ; // This string's control area
uint32_t nLen ; // Length of string
if (m_addr)
{
thisCtl = (_strItem*) _strXlate(m_addr) ;
nLen = thisCtl->_getSize() ;
if (!nLen)
hzexit(E_CORRUPT, "Zero string size %u:%u", (m_addr&0xffff0000)>>16, m_addr&0xffff) ;
if (thisCtl->m_copy == 0)
{
printf("WARN - Zero copis of string size %u:%u (%s)", (m_addr&0xffff0000)>>16, m_addr&0xffff, thisCtl->_data()) ;
m_addr = 0 ;
return ;
}
if (thisCtl->m_copy && thisCtl->m_copy < 50)
{
if (_hzGlobal_MT)
{
if (__sync_add_and_fetch(&(thisCtl->m_copy), -1) == 0)
_strFree(m_addr, nLen) ;
}
else
{
thisCtl->m_copy-- ;
if (!thisCtl->m_copy)
_strFree(m_addr, nLen) ;
}
}
m_addr = 0 ;
}
}
// Index operator
const char hzString::operator[] (uint32_t nIndex) const
{
// Returns the (char) value of the Nth position in the buffer.
//
// A zero is returned if N is less than zero or overshoots the text part of the buffer
//
// Arguments: 1) nIndex Position of char to be returned
//
// Returns: 0+ Value of the Nth byte in the string if N is less than the length of the string
_hzfunc("hzString::operator[]") ;
_strItem* thisCtl ; // This string's control area
if (!m_addr)
return 0 ;
thisCtl = (_strItem*) _strXlate(m_addr) ;
if (nIndex > thisCtl->_getSize())
return 0 ;
return thisCtl->_data()[nIndex] ;
}
// Stream operator
#if 0
std::istream& operator>> (std::istream& is, hzString& str)
{
// Category: Data Input
//
// This facilitates appendage of the string value with data from a stream
//
// Arguments: 1) is Input stream
// 2) obj String to be populated by the read operation
//
// Returns: Reference to this string instance
_hzfunc("hzString::operator>>") ;
std::string s ; // STL string
std::getline(is, s) ;
if (!s.length())
std::getline(is, s) ;
str = s.c_str() ;
return is ;
}
#endif
std::ostream& operator<< (std::ostream& os, const hzString& obj)
{
// Category: Data Output
//
// This facilitates output of the string value to a stream
//
// Arguments: 1) is Output stream
// 2) obj String to be written out
//
// Returns: Reference to this string instance
_hzfunc("hzString::operator<<") ;
if (*obj)
os << *obj ;
return os ;
}
hzString& hzString::ToLower (void)
{
// Convert string to all lower case.
//
// As this function can alter string content, it follows the protocol described in charper 1.2 of the synopsis.
//
// Arguments: None
// Returns: Reference to this string instance
_hzfunc("hzString::ToLower") ;
_strItem* thisCtl ; // This string's control area
_strItem* destCtl ; // New internal structure if required
char* i ; // Iterator
char* j ; // Iterator
uint32_t count ; // Char counter
uint32_t destAddr ; // New string address if required
bool bChange ; // Flag to indicate if operation chages content
// If NULL return
if (!m_addr)
return *this ;
thisCtl = (_strItem*) _strXlate(m_addr) ;
// Is change needed?
bChange = false ;
for (i = (char*) thisCtl->_data(), count = thisCtl->_getSize() ; count ; count--, i++)
{
if (*i >= 'A' && *i <= 'Z')
{ bChange = true ; break ; }
}
if (!bChange)
return *this ;
// Allocate new space
count = thisCtl->_getSize() ;
destAddr = _strAlloc(count) ;
destCtl = (_strItem*) _strXlate(destAddr) ;
destCtl->_setSize(count) ;
destCtl->m_copy = 1 ;
i = (char*) thisCtl->_data() ;
j = (char*) destCtl->_data() ;
// The new string space is populated
for (; count ; count--)
*j++ = tolower(*i++) ;
// Tidy up
if (thisCtl->m_copy && thisCtl->m_copy < 50)
{
if (!_hzGlobal_MT)
thisCtl->m_copy-- ;
else
if (__sync_add_and_fetch(&(thisCtl->m_copy), -1) == 0)
if (!thisCtl->m_copy)
_strFree(m_addr, thisCtl->_getSize()) ;
}
m_addr = destAddr ;
return *this ;
}
hzString& hzString::ToUpper (void)
{
// Convert string to all upper case.
//
// As this function can alter string content, it follows the protocol described in charper 1.2 of the synopsis.
//
// Arguments: None
// Returns: Reference to this string instance
_hzfunc("hzString::ToUpper") ;
_strItem* thisCtl ; // This string's control area
_strItem* destCtl ; // New internal structure if required
char* i ; // Iterator
char* j ; // Iterator
uint32_t nLen ; // Length
uint32_t destAddr ; // New string address if required
bool bChange ; // Flag to indicate if operation chages content
// If NULL return
if (!m_addr)
return *this ;
thisCtl = (_strItem*) _strXlate(m_addr) ;
// Is change needed?
bChange = false ;
for (i = (char*) thisCtl->_data(), nLen = thisCtl->_getSize() ; nLen ; i++, nLen--)
{
if (*i >= 'a' && *i <= 'z')
{ bChange = true ; break ; }
}
if (!bChange)
return *this ;
// Allocate new space
nLen = thisCtl->_getSize() ;
destAddr = _strAlloc(nLen) ;
destCtl = (_strItem*) _strXlate(destAddr) ;
destCtl->_setSize(nLen) ;
destCtl->m_copy = 1 ;
i = (char*) thisCtl->_data() ;
j = (char*) destCtl->_data() ;
for (; nLen ; nLen--)
*j++ = toupper(*i++) ;
// Tidy up
if (thisCtl->m_copy && thisCtl->m_copy < 50)
{
if (!_hzGlobal_MT)
thisCtl->m_copy-- ;
else
__sync_add_and_fetch(&(thisCtl->m_copy), -1) ;
if (!thisCtl->m_copy)
_strFree(m_addr, thisCtl->_getSize()) ;
}
m_addr = destAddr ;
return *this ;
}
hzString& hzString::UrlEncode (bool bResv)
{
// Performs URL-Encoding on the current string content.
//
// This is transformation is only carried out if URL characters exist in the string value
//
// Arguments: 1) bResv With bResv false (default), only the standard URL characters are encoded. But with bResv true, an extended set of URL characters
// are converted. Note that these include the forward slash character.
//
// Returns: Reference to this string instance
_hzfunc("hzString::UrlEncode") ;
_strItem* thisCtl ; // This string's control area
_strItem* destCtl ; // New internal structure if required
char* i ; // For iteration
char* j ; // For iteration
uint32_t nLen ; // Lenght of original string
uint32_t destAddr ; // New string address if required
char buf [4] ; // For Hex conversion
// If NULL return
if (!m_addr)
return *this ;
thisCtl = (_strItem*) _strXlate(m_addr) ;
// Is change needed? Count chars that are to be converted as these will occupy 3 chars in the new string
nLen = thisCtl->_getSize() ;
//nLen = Length() ;
for (i = (char*) thisCtl->_data() ; *i ; i++)
{
// If the char is a % (string may already be encoded), pass this by
if (*i == CHAR_PERCENT)
continue ;
// If the char is a normal URL char, pass
if (IsUrlnorm(*i))
continue ;
// If the char is a reserved URL char, pass olny if bResv is false
if (IsUrlresv(*i))
{
if (bResv)
nLen += 2 ;
continue ;
}
// Only increas the expected length if there is any char that must be encoded
nLen += 2 ;
}
if (nLen == thisCtl->_getSize())
return *this ;
// Allocate new space
destAddr = _strAlloc(nLen) ;
destCtl = (_strItem*) _strXlate(destAddr) ;
destCtl->_setSize(nLen) ;
destCtl->m_copy = 1 ;
j = (char*) destCtl->_data() ;
for (i = (char*) thisCtl->_data() ; *i ; i++)
{
if (*i == CHAR_PERCENT)
*j++ = *i ;
else if (IsUrlnorm(*i))
*j++ = *i ;
else if (IsUrlresv(*i) && !bResv)
*j++ = *i ;
else
{
*j++ = CHAR_PERCENT ;
sprintf(buf, "%02x", (uint32_t) *i) ;
*j++ = buf[0] ;
*j++ = buf[1] ;
}
}
// Tidy up
if (thisCtl->m_copy && thisCtl->m_copy < 50)
{
if (!_hzGlobal_MT)
{
thisCtl->m_copy-- ;
if (!thisCtl->m_copy)
_strFree(m_addr, thisCtl->_getSize()) ;
}
else
{
if (__sync_add_and_fetch(&(thisCtl->m_copy), -1) == 0)
_strFree(m_addr, thisCtl->_getSize()) ;
}
}
m_addr = destAddr ;
return *this ;
}
hzString& hzString::UrlDecode (void)
{
// Performs URL-Decoding on the current string content. If the current string does not contain URL-encoded sequences, the string will be unchanged.
//
// Arguments: None
// Returns: Reference to this string instance
_hzfunc("hzString::UrlDecode") ;
_strItem* thisCtl ; // This string's control area
_strItem* destCtl ; // New string's control area
char* i ; // Pointer into old string data
char* j ; // Pointer into new string data
uint32_t destAddr ; // New string space address
uint32_t newLen = 0 ; // Length of new string
uint32_t val ; // Hex value
// If NULL return
if (!m_addr)
return *this ;
thisCtl = (_strItem*) _strXlate(m_addr) ;
// Is change needed? Not unless there are incidences of a percent followed by two hex chars
for (i = thisCtl->_data() ; *i ; i++)
{
if (*i == CHAR_PERCENT && IsHex(i[1]) && IsHex(i[2]))
i += 2 ;
newLen++ ;
}
if (newLen == thisCtl->_getSize())
return *this ;
// Allocate new space
destAddr = _strAlloc(newLen) ;
destCtl = (_strItem*) _strXlate(destAddr) ;
destCtl->_setSize(newLen) ;
destCtl->m_copy = 1 ;
// Create new string
for (j = destCtl->_data(), i = thisCtl->_data() ; *i ; i++)
{
if (*i == CHAR_PERCENT)
{
if (IsHex(i[1]) && IsHex(i[2]))
{
i++ ;
val = (*i >='0' && *i <='9' ? *i-'0' : *i >= 'A' && *i<= 'F' ? *i+10-'A' : *i >= 'a' && *i<='f' ? *i+10-'a' : 0) ;
val *= 16 ;
i++ ;
val += (*i >='0' && *i <='9' ? *i-'0' : *i >= 'A' && *i<= 'F' ? *i+10-'A' : *i >= 'a' && *i<='f' ? *i+10-'a' : 0) ;
*j++ = (char) val ;
continue ;
}
}
*j++ = *i ;
}
// Tidy up
if (thisCtl->m_copy && thisCtl->m_copy < 50)
{
if (!_hzGlobal_MT)
{
thisCtl->m_copy-- ;
if (!thisCtl->m_copy)
_strFree(m_addr, thisCtl->_getSize()) ;
}
else
{
thisCtl->m_copy-- ;
if (!thisCtl->m_copy)
//if (__sync_add_and_fetch(&(thisCtl->m_copy), -1) == 0)
_strFree(m_addr, thisCtl->_getSize()) ;
}
}
m_addr = destAddr ;
return *this ;
}
hzString& hzString::Reverse (void)
{
// Reverse the string content. This first ensures the string is independent.
//
// Arguments: None
// Returns: Reference to this string in all cases
_hzfunc("hzString::Reverse") ;
_strItem* thisCtl ; // This string's control area
_strItem* destCtl ; // New internal structure if required
char* pTgt ; // Target internal value
char* pSrc ; // Source internal value
uint32_t destAddr ; // New string address if required
uint32_t nLen ; // Length
uint32_t c_up ; // Ascending iterator
uint32_t c_down ; // Decending iterator
// If NULL return
if (!m_addr)
return *this ;
thisCtl = (_strItem*) _strXlate(m_addr) ;
// Allocate new space
nLen = thisCtl->_getSize() ;
destAddr = _strAlloc(nLen) ;
destCtl = (_strItem*) _strXlate(destAddr) ;
destCtl->_setSize(nLen) ;
destCtl->m_copy = 1 ;
// Create new string value as the reverse of the original
c_down = nLen - 1 ;
pSrc = thisCtl->_data() ;
pTgt = destCtl->_data() ;
for (c_up = 0 ; c_up < c_down ; c_up++, c_down--)
{
pTgt[c_up] = pSrc[c_down] ;
}
// Tidy up
if (thisCtl->m_copy && thisCtl->m_copy < 50)
{
if (!_hzGlobal_MT)
{
thisCtl->m_copy-- ;
if (!thisCtl->m_copy)
_strFree(m_addr, thisCtl->_getSize()) ;
}
else
{
thisCtl->m_copy-- ;
if (!thisCtl->m_copy)
//if (__sync_add_and_fetch(&(thisCtl->m_copy), -1) == 0)
_strFree(m_addr, thisCtl->_getSize()) ;
}
}
m_addr = destAddr ;
return *this ;
}
hzString& hzString::Truncate (uint32_t limit)
{
// Truncate the string
//
// Arguments: 1) limit Sets max length for the string and truncates beyond this.
// Returns: Reference to this string in all cases
_hzfunc("hzString::Truncate") ;
_strItem* thisCtl ; // This string's control area
_strItem* destCtl ; // New control area
uint32_t destAddr = 0 ; // New string address if required
// If NULL return
if (!m_addr)
return *this ;
thisCtl = (_strItem*) _strXlate(m_addr) ;
// Do nothing if the size limit exceeds length of string value
if (limit >= thisCtl->_getSize())
return *this ;
// Full truncation, delete
if (limit)
{
// Perform the (partial) truncation
destAddr = _strAlloc(limit) ;
destCtl = (_strItem*) _strXlate(destAddr) ;
destCtl->_setSize(limit) ;
memcpy(destCtl->_data(), thisCtl->_data(), limit) ;
destCtl->m_copy = 1 ;
}
// Tidy up
if (thisCtl->m_copy && thisCtl->m_copy < 50)
{
if (!_hzGlobal_MT)
{
thisCtl->m_copy-- ;
if (!thisCtl->m_copy)
_strFree(m_addr, thisCtl->_getSize()) ;
}
else
{
if (__sync_add_and_fetch(&(thisCtl->m_copy), -1) == 0)
_strFree(m_addr, thisCtl->_getSize()) ;
}
}
m_addr = destAddr ;
return *this ;
}
hzString& hzString::TruncateUpto (const char* patern)
{
// Category: Text Processing
//
// Conditional, content based truncation. If the supplied patern exists in the string, the new string will be everything upto but not including the first
// instance of the patern. If the pattern is empty or does not exist in the string, the string content will be unchanged.
//
// Arguments: 1) pattern The pattern at which the string will be truncated.
// Returns: Reference to this string in all cases
_hzfunc("hzString::TruncateUpto") ;
_strItem* thisCtl ; // This string's control area
const char* i ; // Pointer into string data
const char* j ; // Pointer to patern instance
uint32_t nLen ; // Number of bytes from start of existing string to patern instance
// If no pattern supplied, do nothing
if (!patern || !patern[0])
return *this ;
// If NULL return
if (!m_addr)
return *this ;
thisCtl = (_strItem*) _strXlate(m_addr) ;
// Test if change required
i = (char*) thisCtl->_data() ;
j = strstr(i, patern) ;
if (!j)
return *this ;
for (nLen = 0 ; i != j ; i++, nLen++) ;
return Truncate(nLen) ;
}
hzString& hzString::TruncateBeyond (const char* patern)
{
// Category: Text Processing
//
// Conditional, content based truncation. If the supplied patern exists in the string, the new string will be everything beyond but not including the first
// instance of the patern. If the pattern is empty or does not exist in the string, the string content will be unchanged.
//
// Arguments: 1) pattern The pattern at which the string will be truncated.
// Returns: Reference to this string in all cases
_hzfunc("hzString::TruncateBeyond") ;
_strItem* thisCtl ; // This string's control area
_strItem* destCtl ; // New control area
char* i ; // Pointer into old string data
char* j ; // Pointer into new string data
uint32_t destAddr ; // New string address if required
uint32_t newLen ; // New string length
// If no pattern supplied, do nothing
if (!patern || !patern[0])
return *this ;
// If NULL return
if (!m_addr)
return *this ;
// Test if change required
thisCtl = (_strItem*) _strXlate(m_addr) ;
i = strstr((char*) thisCtl->_data(), patern) ;
if (!i)
return *this ;
// Allocate new space
newLen = i - thisCtl->_data() ;
destAddr = _strAlloc(newLen) ;
destCtl = (_strItem*) _strXlate(destAddr) ;
destCtl->_setSize(newLen) ;
destCtl->m_copy = 1 ;
i += strlen(patern) ;
for (j = destCtl->_data() ; *i ; *j++ = *i++) ;
*j = 0 ;
// Tidy up
if (thisCtl->m_copy && thisCtl->m_copy < 50)
{
if (!_hzGlobal_MT)
{
thisCtl->m_copy-- ;
if (!thisCtl->m_copy)
_strFree(m_addr, thisCtl->_getSize()) ;
}
else
{
if (__sync_add_and_fetch(&(thisCtl->m_copy), -1) == 0)
_strFree(m_addr, thisCtl->_getSize()) ;
}
}
m_addr = destAddr ;
return *this ;
}
hzString& hzString::DelWhiteLead (void)
{
// Removes leading whitespace from the string
//
// Arguments: None
// Returns: Reference to this string in all cases
_hzfunc("hzString::DelWhiteLead") ;
_strItem* thisCtl ; // This string's control area
_strItem* destCtl ; // New string space
const char* i ; // Pointer into string data
uint32_t destAddr ; // New string address if required
uint32_t wc ; // Count of whitespace chars
uint32_t count ; // Iterator
uint32_t nLen ; // Length of original string
uint32_t nusize ; // The size the string will be once leading whitespace removed
// If NULL return
if (!m_addr)
return *this ;
// Check that change is needed
thisCtl = (_strItem*) _strXlate(m_addr) ;
nLen = Length() ;
i = (char*) thisCtl->_data() ;
for (wc = count = 0 ; count < nLen ; count++)
{
if (i[count] <= CHAR_SPACE)
wc++ ;
else
break ;
}
if (!wc)
return *this ;
// Must alter content
nusize = nLen - wc ;
if (nusize <= 0)
{
Clear() ;
return *this ;
}
destAddr = _strAlloc(nusize) ;
if (!destAddr)
hzexit(E_MEMORY, "Buffer of (%d) bytes", nusize) ;
destCtl = (_strItem*) _strXlate(destAddr) ;
destCtl->_setSize(nusize) ;
memcpy(destCtl->_data(), thisCtl->_data() + wc, nusize) ;
destCtl->m_copy = 1 ;
// Tidy up
if (thisCtl->m_copy && thisCtl->m_copy < 50)
{
if (!_hzGlobal_MT)
{
thisCtl->m_copy-- ;
if (!thisCtl->m_copy)
_strFree(m_addr, thisCtl->_getSize()) ;
}
else
{
if (__sync_add_and_fetch(&(thisCtl->m_copy), -1) == 0)
_strFree(m_addr, thisCtl->_getSize()) ;
}
}
m_addr = destAddr ;
return *this ;
}
hzString& hzString::DelWhiteTrail (void)
{
// Removes tariling whitespace
//
// Arguments: None
// Returns: Reference to this string in all cases
_hzfunc("hzString::DelWhiteTrail") ;
_strItem* thisCtl ; // This string's control area
_strItem* destCtl ; // New string space
const char* i ; // Pointer into string data
uint32_t destAddr ; // New string address if required
uint32_t wc ; // Count of whitespace chars
uint32_t nLen ; // Length of original string
uint32_t nusize ; // The size the string will be once leading whitespace removed
int32_t count ; // Iterator
if (!m_addr)
return *this ;
thisCtl = (_strItem*) _strXlate(m_addr) ;
i = (char*) thisCtl->_data() ;
nLen = Length() ;
wc = 0 ;
for (count = nLen - 1 ; count >= 0 ; count--)
{
if (i[count] <= CHAR_SPACE)
wc++ ;
else
break ;
}
if (!wc)
return *this ;
// Must alter content
nusize = nLen - wc ;
if (nusize <= 0)
{
Clear() ;
return *this ;
}
destAddr = _strAlloc(nusize) ;
if (!destAddr)
hzexit(E_MEMORY, "Buffer of (%d) bytes", nusize) ;
destCtl = (_strItem*) _strXlate(destAddr) ;
destCtl->_setSize(nusize) ;
memcpy(destCtl->_data(), thisCtl->_data(), nusize) ;
destCtl->m_copy = 1 ;
Clear() ;
m_addr = destAddr ;
return *this ;
}
hzString& hzString::TopAndTail (void)
{
// Text substitution
//
// Removes leading and trailing whitespce from this string. If no whitespace exists within this string, it is unchanged
//
// Arguments: None
// Returns: Reference to this string in all cases
_hzfunc("hzString::TopAndTail") ;
DelWhiteLead() ;
DelWhiteTrail() ;
return *this ;
}
hzString& hzString::Replace (const char* strA, const char* strB)
{
// Text substitution
//
// Replace within this string, all instances of strA with strB. This string is unchanged if strA does not exist within it.
//
// Arguments: 1) strA Patern to be substituted out
// 2) strB Patern to be used instead
//
// Returns: Reference to this string in all cases
_hzfunc("hzString::Replace") ;
hzChain Z ; // Working chain buffer
_strItem* thisCtl ; // This string's control area
const char* i ; // Pointer into string data
uint32_t nLen ; // Lenth of supplied strings
bool bFound = false ; // Indicates string to be replace is found
if (!strA)
return *this ;
if (!m_addr)
return *this ;
thisCtl = (_strItem*) _strXlate(m_addr) ;
nLen = strlen(strA) ;
i = (char*) thisCtl->_data() ;
if (strstr(i, strA))
{
for (; *i ;)
{
if (*i == strA[0])
{
if (!memcmp(i, strA, nLen))
{
bFound = true ;
if (strB && strB[0])
Z << strB ;
i += nLen ;
continue ;
}
}
Z.AddByte(*i) ;
i++ ;
}
}
if (bFound)
{ Clear() ; operator=(Z) ; }
return *this ;
}
hzEcode hzString::SetValue (const char* cpStr, uint32_t nLen)
{
// Set a string to a non-terminated char string
//
// Arguments: 1) cpStr The char* pointer
// 2) nLen The length
//
// Returns: E_OK If operation successful
// E_RANGE If length is -ve or too uint32_t
_hzfunc("hzString::SetValue(a)") ;
_strItem* destCtl ; // New string space
Clear() ;
if (!cpStr || !cpStr[0])
return E_OK ;
if (nLen <= 0 || nLen > HZSTRING_MAXLEN)
{
operator=(_hzString_TooLong) ;
return E_RANGE ;
}
m_addr = _strAlloc(nLen) ;
if (!m_addr)
hzexit(E_MEMORY, "Cannot allocate string of %d bytes", nLen) ;
destCtl = (_strItem*) _strXlate(m_addr) ;
destCtl->_setSize(nLen) ;
memcpy(destCtl->_data(), cpStr, nLen) ;
destCtl->m_copy = 1 ;
return E_OK ;
}
hzEcode hzString::SetValue (const char* cpStr, const char* cpTerm)
{
// Set a string to a char string that is terminated by a char sequence rather than a null
//
// Arguments: 1) cpStr The non-null terminated string value
// 2) cpTerm The teminator sequence
//
// Returns: E_OK If operation successful
// E_RANGE If length is -ve or too long
_hzfunc("hzString::SetValue(b)") ;
_strItem* destCtl ; // New string space
const char* i ; // Source string iterator
uint32_t nLen ; // Length to be allocated
Clear() ;
if (!cpStr || !cpStr[0])
return E_OK ;
for (nLen = 0, i = cpStr ; *i && i != cpTerm ; i++, nLen++) ;
if (nLen > HZSTRING_MAXLEN)
{
operator=(_hzString_TooLong) ;
return E_RANGE ;
}
m_addr = _strAlloc(nLen) ;
if (!m_addr)
hzexit(E_MEMORY, "Cannot allocate string of %d bytes", nLen) ;
destCtl = (_strItem*) _strXlate(m_addr) ;
destCtl->_setSize(nLen) ;
memcpy(destCtl->_data(), cpStr, nLen) ;
destCtl->m_copy = 1 ;
return E_OK ;
}
hzEcode hzString::SetValue (const char* cpStr, char termchar)
{
// Set a string to a char string that is terminated by a char other than a null
//
// Arguments: 1) cpStr The non-null terminated string value
// 2) termchar The teminator char
//
// Returns: E_RANGE If the supplied terminated string exceeds HZSTRING_MAXLEN characters
// E_OK If the string is set
_hzfunc("hzString::SetValue(c)") ;
_strItem* destCtl ; // New string space
const char* i ; // Source string iterator
uint32_t nLen ; // Length to be allocated
Clear() ;
if (!cpStr || !cpStr[0])
return E_OK ;
for (nLen = 0, i = cpStr ; *i && *i != termchar ; i++, nLen++) ;
if (nLen > HZSTRING_MAXLEN)
{
operator=(_hzString_TooLong) ;
return E_RANGE ;
}
m_addr = _strAlloc(nLen) ;
if (!m_addr)
hzexit(E_MEMORY, "Cannot allocate string of %d bytes", nLen) ;
destCtl = (_strItem*) _strXlate(m_addr) ;
destCtl->_setSize(nLen) ;
memcpy(destCtl->_data(), cpStr, nLen) ;
destCtl->m_copy = 1 ;
return E_OK ;
}
hzString hzString::SubString (uint32_t nPosn, uint32_t nBytes) const
{
// Purpose: Populate a string with a substring of this string. Return an empty string if the
// requested position goes beyong length of string, Return a partial string if the
// length requested goes beyond the end of the string
//
// Arguments: 1) nPosn Starting offset within this string.
// 2) nBytes Length from here. A value of 0 indicates remainder of this string.
//
// Returns: Instance of hzString by value being the substring result
_hzfunc("hzString::SubString") ;
_strItem* thisCtl ; // This string's control area
_strItem* destCtl ; // Result string control area
hzString Dest ; // Target string
uint32_t nRemainder ; // Remainder of original past the stated position
if (!m_addr)
return Dest ;
thisCtl = (_strItem*) _strXlate(m_addr) ;
nRemainder = Length() - nPosn ;
if (nRemainder <= 0)
return Dest ;
if (nBytes == 0)
nBytes = nRemainder ;
if (nBytes > nRemainder)
nBytes = nRemainder ;
Dest.m_addr = _strAlloc(nBytes) ;
if (!Dest.m_addr)
hzexit(E_MEMORY, "Buffer of (%d) bytes", nBytes) ;
destCtl = (_strItem*) _strXlate(Dest.m_addr) ;
// Go to position
destCtl->_setSize(nBytes) ;
memcpy(destCtl->_data(), thisCtl->_data() + nPosn, nBytes) ;
destCtl->m_copy = 1 ;
return Dest ;
}
// Find first/last instance of a test char in this string (I denotes case insensitive)
int32_t hzString::First (const char c) const
{
_hzfunc("hzString::First") ;
_strItem* thisCtl ; // This string's control area
if (!m_addr)
return -1 ;
thisCtl = (_strItem*) _strXlate(m_addr) ;
return CstrFirst(thisCtl->_data(), c) ;
}
int32_t hzString::FirstI (const char c) const
{
_hzfunc("hzString::FirstI") ;
_strItem* thisCtl ; // This string's control area
if (!m_addr)
return -1 ;
thisCtl = (_strItem*) _strXlate(m_addr) ;
return CstrFirstI(thisCtl->_data(), c) ;
}
int32_t hzString::Last (const char c) const
{
_hzfunc("hzString::Last") ;
_strItem* thisCtl ; // This string's control area
if (!m_addr)
return -1 ;
thisCtl = (_strItem*) _strXlate(m_addr) ;
return CstrLast(thisCtl->_data(), c) ;
}
int32_t hzString::LastI (const char c) const
{
_hzfunc("hzString::LastI") ;
_strItem* thisCtl ; // This string's control area
if (!m_addr)
return -1 ;
thisCtl = (_strItem*) _strXlate(m_addr) ;
return CstrLastI(thisCtl->_data(), c) ;
}
// Find first/last instance of a test cstr in this string (I denotes case insensitive)
int32_t hzString::First (const char* str) const
{
_hzfunc("hzString::First(char*)") ;
_strItem* thisCtl ; // This string's control area
if (!m_addr)
return -1 ;
thisCtl = (_strItem*) _strXlate(m_addr) ;
return CstrFirst(thisCtl->_data(), str) ;
}
int32_t hzString::FirstI (const char* str) const
{
_hzfunc("hzString::FirstI(char*)") ;
_strItem* thisCtl ; // This string's control area
if (!m_addr)
return -1 ;
thisCtl = (_strItem*) _strXlate(m_addr) ;
return CstrFirstI(thisCtl->_data(), str) ;
}
int32_t hzString::Last (const char* str) const
{
_hzfunc("hzString::Last(char*)") ;
_strItem* thisCtl ; // This string's control area
if (!m_addr)
return -1 ;
thisCtl = (_strItem*) _strXlate(m_addr) ;
return CstrLast(thisCtl->_data(), str) ;
}
int32_t hzString::LastI (const char* str) const
{
_hzfunc("hzString::LastI(char*)") ;
_strItem* thisCtl ; // This string's control area
if (!m_addr)
return -1 ;
thisCtl = (_strItem*) _strXlate(m_addr) ;
return CstrLastI(thisCtl->_data(), str) ;
}
// Find first/last instance of a test string in this string (I denotes case insensitive)
int32_t hzString::First (const hzString& S) const
{
_hzfunc("hzString::First(hzStr)") ;
_strItem* thisCtl ; // This string's control area
_strItem* suppCtl ; // This string's control area
const char* test = 0 ; // Supplied string value
if (!m_addr)
return -1 ;
thisCtl = (_strItem*) _strXlate(m_addr) ;
if (S.m_addr)
{
suppCtl = (_strItem*) _strXlate(S.m_addr) ;
test = suppCtl->_data() ;
}
return CstrFirst(thisCtl->_data(), test) ;
}
int32_t hzString::FirstI (const hzString& S) const
{
_hzfunc("hzString::FirstiI(hzStr)") ;
_strItem* thisCtl ; // This string's control area
if (!m_addr)
return -1 ;
thisCtl = (_strItem*) _strXlate(m_addr) ;
return CstrFirstI(thisCtl->_data(), *S) ;
}
int32_t hzString::Last (const hzString& S) const
{
_hzfunc("hzString::Last(hzStr)") ;
_strItem* thisCtl ; // This string's control area
if (!m_addr)
return -1 ;
thisCtl = (_strItem*) _strXlate(m_addr) ;
return CstrLast(thisCtl->_data(), *S) ;
}
int32_t hzString::LastI (const hzString& S) const
{
_hzfunc("hzString::LastI(hzStr)") ;
_strItem* thisCtl ; // This string's control area
if (!m_addr)
return -1 ;
thisCtl = (_strItem*) _strXlate(m_addr) ;
return CstrLastI(thisCtl->_data(), *S) ;
}
bool hzString::Contains (const char c) const
{
// Test if the string contains one or more instances of a test char
//
// Arguments: 1) c The char to test for
//
// Returns: True If the test char exists within the string
// False otherwise
_hzfunc("hzString::Contains(char)") ;
if (!m_addr)
return false ;
_strItem* thisCtl ; // This string's control area
const char* i ; // Pointer into string data
uint32_t len ; // Lenth of this string
thisCtl = (_strItem*) _strXlate(m_addr) ;
i = (char*) thisCtl->_data() ;
len = Length() ;
for (; len ; len--)
{
if (i[len] == c)
return true ;
}
return false ;
}
bool hzString::Contains (const char* cpNeedle) const
{
// Test if the string contains a char string (case sensitive)
//
// Arguments: 1) cpStr The char sequence to test for
//
// Returns: True If this string contains the supplied test sequence
// False Otherwise
_hzfunc("hzString::Contains(char*)") ;
_strItem* thisCtl ; // This string's control area
if (!cpNeedle || !cpNeedle[0])
return true ;
if (!m_addr)
return false ;
thisCtl = (_strItem*) _strXlate(m_addr) ;
return strstr((char*) thisCtl->_data(), cpNeedle) ? true : false ;
}
bool hzString::ContainsI (const char* cpNeedle) const
{
// Test if the string contains a char string (case insensitive)
//
// Arguments: 1) cpStr The char sequence to test for
//
// Returns: True If this string is lexically eqivelent to the supplied cstr
// False Otherwise
_hzfunc("hzString::ContainsI(char*)") ;
_strItem* thisCtl ; // This string's control area
if (!cpNeedle || !cpNeedle[0])
return true ;
if (!m_addr)
return false ;
thisCtl = (_strItem*) _strXlate(m_addr) ;
return CstrContainsI((char*) thisCtl->_data(), cpNeedle) ? true : false ;
}
bool hzString::Equiv (const char* cpStr) const
{
// Test if this string is equal to the operand char string (on a case insensitive basis)
//
// Arguments: 1) cpStr The char sequence to test for
//
// Returns: True If this string is lexically eqivelent to the supplied cstr
// False Otherwise
_hzfunc("hzString::Equiv") ;
_strItem* thisCtl ; // This string's control area
if (!m_addr)
return false ;
thisCtl = (_strItem*) _strXlate(m_addr) ;
return CstrCompareI(cpStr, (char*) thisCtl->_data()) == 0 ? true : false ;
}
/*
** Assignment operators
*/
hzString& hzString::operator= (const hzString& op)
{
// Purpose: Set this string equal to the operand.
//
// If the internal address of this string instance is already equal to that of the operand, this function does nothing. Otherwise this string is cleared, the copy count of the
// operand string is incremented, then the internal address is set to that of the operand.
//
// Argument: op Reference to the operand hzString instance.
//
// Returns: Reference to this string instance
_hzfunc("hzString::operator=(hzString&)") ;
_strItem* suppCtl ; // Supplied string's control area
if (!this)
hzerr(E_CORRUPT, "No instance") ;
// It the this string's internal pointer and that of the operand already point to the same internal structure in memory, do nothing
if (m_addr == op.m_addr)
return *this ;
// If this string has a value, clear it.
if (m_addr)
Clear() ;
// If the operand has content, increment the copy count and make this string address equal to that of the supplied.
if (op.m_addr)
{
suppCtl = (_strItem*) _strXlate(op.m_addr) ;
if (suppCtl->m_copy < 50)
{
if (_hzGlobal_MT)
//__sync_add_and_fetch((uint32_t*)&(suppCtl->m_copy), 1) ;
suppCtl->m_copy++ ;
else
suppCtl->m_copy++ ;
}
m_addr = op.m_addr ;
}
return *this ;
}
hzString& hzString::operator= (const hzChain& C)
{
// Set string equal to content of the supplied chain. Note this function will fail (with the string empty) if the chain contents
// are too large
//
// Arguments: 1) C The operand chain
// Returns: Reference to this string instance
_hzfunc("hzString::op=(hzChain&)") ;
chIter ci ; // Chain iterator
_strItem* destCtl ; // New string space
char* i ; // New string space populator
if (m_addr)
Clear() ;
if (C.Size())
{
if (C.Size() >= HZSTRING_MAXLEN)
operator=(_hzString_TooLong) ;
else
{
// Create new internal structure
m_addr = _strAlloc(C.Size()) ;
if (!m_addr)
hzexit(E_MEMORY, "Could not allocate internal buffer") ;
destCtl = (_strItem*) _strXlate(m_addr) ;
destCtl->_setSize(C.Size()) ;
i = destCtl->_data() ;
for (ci = C ; !ci.eof() ; ci++)
*i++ = *ci ;
*i = 0 ;
destCtl->m_copy = 1 ;
}
}
return *this ;
}
hzString& hzString::operator= (const char* cpStr)
{
// Set the value of this hzString instance to the operand null terminated char string. Note that it is possible for the operand to
// have come from the string itself. For this reason we do not clear the existing string until we have allocated and populated the
// new buffer.
//
// Arguments: 1) cpStr The operand null terminated char string.
// Returns: Reference to this string instance
_hzfunc("hzString::op=(const char*)") ;
_strItem* destCtl ; // New string space
uint32_t nLen ; // Required length of new string
Clear() ;
if (!cpStr || !cpStr[0])
return *this ;
nLen = strlen(cpStr) ;
if (!nLen || nLen > HZSTRING_MAXLEN)
{
operator=(_hzString_TooLong) ;
return *this ;
}
m_addr = _strAlloc(nLen) ;
if (!m_addr)
hzexit(E_MEMORY, "Cannot allocate string of %d bytes for value [%s]", nLen, cpStr) ;
destCtl = (_strItem*) _strXlate(m_addr) ;
destCtl->_setSize(nLen) ;
strcpy(destCtl->_data(), cpStr) ;
destCtl->m_copy = 1 ;
return *this ;
}
hzString& hzString::operator= (const uchar* cpStr)
{
// Set the value of this hzString instance to the operand null terminated char string. Note that it is possible for the operand to
// have come from the string itself. For this reason we do not clear the existing string until we have allocated and populated the
// new buffer.
//
// Arguments: 1) cpStr The operand null terminated char string.
// Returns: Reference to this string instance
_hzfunc("hzString::op=(const char*)") ;
_strItem* destCtl ; // New string space
uint32_t nLen ; // Required length of new string
Clear() ;
if (!cpStr || !cpStr[0])
return *this ;
nLen = strlen((char*) cpStr) ;
if (!nLen || nLen > HZSTRING_MAXLEN)
{
operator=(_hzString_TooLong) ;
return *this ;
}
m_addr = _strAlloc(nLen) ;
if (!m_addr)
hzexit(E_MEMORY, "Cannot allocate string of %d bytes for value [%s]", nLen, cpStr) ;
destCtl = (_strItem*) _strXlate(m_addr) ;
destCtl->_setSize(nLen) ;
strcpy((char*) destCtl->_data(), (char*) cpStr) ;
destCtl->m_copy = 1 ;
return *this ;
}
// FnGrp: operator+
// Category: Text Processing
//
// Add two strings forming a new string. Neither of the two input strings are effected in the process.
//
// Variants: 1) Add two strings
// 2) Add a string and a cstr
// 3) Add a cstr and a string
//
// Note there is no char* plus char* operator.
//
// Arguments: 1) a The first string (as char* or hzString)
// 2) b The second string (as char* or hzString)
//
// Returns: Instance of new hzString by value
hzString operator+ (const hzString a, const hzString b)
{
_hzfunc("friend hzString operator+(1)") ;
hzString r ; // Return string
r = a ;
r += b ;
return r ;
}
hzString operator+ (const hzString a, const char* cpStr)
{
_hzfunc("friend hzString operator+(2)") ;
hzString r ; // Return string
r = a ;
r += cpStr ;
return r ;
}
hzString operator+ (const char* cpStr, const hzString S)
{
_hzfunc("friend hzString operator+(3)") ;
hzString r ; // Return string
r = cpStr ;
r += S ;
return r ;
}
/*
** Appending operators
*/
hzString& hzString::operator+= (const hzString& op)
{
// Append the operand string to the contents of this.
//
// Arguments: 1) op Operand string
// Returns: Reference to this string in all cases
_hzfunc("hzString::op+=(hzString&)") ;
_strItem* thisCtl = 0 ; // This string control area
_strItem* suppCtl ; // Supplied string control area
_strItem* destCtl ; // Result string space
uint32_t destAddr ; // New string address if required
uint32_t crlen ; // Len of this string
uint32_t nulen ; // Len of combined string
// If operand is empty do nothing
if (!op.m_addr)
return *this ;
suppCtl = (_strItem*) _strXlate(op.m_addr) ;
crlen = 0 ;
if (m_addr)
{
thisCtl = (_strItem*) _strXlate(m_addr) ;
crlen = thisCtl->_getSize() ;
}
// Calculate required length
nulen = crlen + suppCtl->_getSize() ;
// Allocate and populate a new buffer
destAddr = _strAlloc(nulen) ;
destCtl = (_strItem*) _strXlate(destAddr) ;
destCtl->_setSize(nulen) ;
if (crlen)
memcpy(destCtl->_data(), thisCtl->_data(), crlen) ;
memcpy(destCtl->_data() + crlen, suppCtl->_data(), suppCtl->_getSize() + 1) ;
destCtl->m_copy = 1 ;
// Tidy up
if (thisCtl && thisCtl->m_copy && thisCtl->m_copy < 50)
{
if (!_hzGlobal_MT)
{
thisCtl->m_copy-- ;
if (!thisCtl->m_copy)
_strFree(m_addr, thisCtl->_getSize()) ;
}
else
{
//if (__sync_add_and_fetch(&(thisCtl->m_copy), -1) == 0)
thisCtl->m_copy-- ;
if (!thisCtl->m_copy)
_strFree(m_addr, thisCtl->_getSize()) ;
}
}
m_addr = destAddr ;
return *this ;
}
hzString& hzString::operator+= (const char* cpStr)
{
// Append the operand char string to the contents of this.
//
// Arguments: 1) op Operand string
// Returns: Reference to this string
_hzfunc("hzString::op+=(const char*)") ;
_strItem* thisCtl = 0 ; // This string's control area
_strItem* destCtl ; // Result string space
uint32_t destAddr ; // New string address if required
uint32_t strLen ; // Len of operand string
uint32_t oldLen = 0 ; // Len of this string
// If operand is empty do nothing
if (!cpStr || !cpStr[0])
return *this ;
// If this string is empty, make it equal to the supplied string
if (!m_addr)
{ operator=(cpStr) ; return *this ; }
thisCtl = (_strItem*) _strXlate(m_addr) ;
oldLen = thisCtl->_getSize() ;
strLen = strlen(cpStr) ;
// Allocate and populate a new buffer
destAddr = _strAlloc(oldLen + strLen) ;
destCtl = (_strItem*) _strXlate(destAddr) ;
destCtl->_setSize(oldLen + strLen) ;
if (oldLen)
memcpy(destCtl->_data(), thisCtl->_data(), oldLen) ;
memcpy(destCtl->_data() + oldLen, cpStr, strLen) ;
destCtl->m_copy = 1 ;
// Tidy up
if (thisCtl && thisCtl->m_copy && thisCtl->m_copy < 50)
{
if (!_hzGlobal_MT)
thisCtl->m_copy-- ;
else
if (__sync_add_and_fetch(&(thisCtl->m_copy), -1) == 0)
if (!thisCtl->m_copy)
_strFree(m_addr, thisCtl->_getSize()) ;
}
m_addr = destAddr ;
return *this ;
}
int32_t StringCompare (const hzString& A, const hzString& B)
{
// Category: Text Processing
//
// Compare two hzString instances, case sensitive.
//
// Arguments: 1) A First test string
// 2) B Second test string
//
// Returns: <0 If A is lexically less than B
// >0 If A is lexically more than B
// 0 If A abs B are equal.
_hzfunc(__func__) ;
const char* t = *A ; // Pointer to string A value
const char* s = *B ; // Pointer to string B value
if (!t)
return (!s || !s[0]) ? 0 : -*s ;
if (!s || !s[0])
return *t ;
for (; *t && *s && *t == *s ; t++, s++) ;
return *t - *s ;
}
int32_t StringCompareI (const hzString& A, const hzString& B)
{
// Category: Text Processing
//
// Compare two hzString instances, ignore case.
//
// Arguments: 1) A First test string
// 2) B Second test string
//
// Returns: <0 If A is lexically less than B
// >0 If A is lexically more than B
// 0 If A abs B are eqivelent.
_hzfunc(__func__) ;
const char* t = *A ; // Pointer to string A value
const char* s = *B ; // Pointer to string B value
if (!t)
return (!s || !s[0]) ? 0 : -_tolower(*s) ;
if (!s || !s[0])
return _tolower(*t) ;
for (; *t && *s && _tolower(*t) == _tolower(*s) ; t++, s++) ;
return _tolower(*t) - _tolower(*s) ;
}
int32_t StringCompareW (const hzString& A, const hzString& B)
{
// Category: Text Processing
//
// Compares two hzString instances but ignores whitespace
//
// Arguments: 1) A First test string
// 2) B Second test string
//
// Returns: <0 If A is lexically less than B
// >0 If A is lexically more than B
// 0 If A abs B are eqivelent.
_hzfunc(__func__) ;
return CstrCompareW(*A, *B) ;
}
int32_t StringCompareF (const hzString& a, const hzString& b)
{
// Category: Text Processing
//
// Fast String Compare.
//
// Although normally one would expect a string compare function to correctly determine if one string was greater or less than another, this isn't
// nessesary in sets and maps if one is only concerned with lookup and not seeking to export the sets or maps in lexical order. String comparison
// can be speed up considerably by treating string values as arrays of 64 bit integers, rather than arrays of bytes.
//
// Arguments: 1) a The 1st string
// 2) b The 2nd string
//
// Returns: +1 If a > b
// -1 If a < b
// 0 If a and b are equal.
_hzfunc(__func__) ;
return a.CompareF(b) ;
}