// // File: hzString.cpp // // Legal Notice: This file is part of the HadronZoo C++ Class Library. Copyright 2025 HadronZoo Project (http://www.hadronzoo.com) // // The HadronZoo C++ Class Library is free software: You can redistribute it, and/or modify it under the terms of the GNU Lesser General Public License, as published by the Free // Software Foundation, either version 3 of the License, or any later version. // // The HadronZoo C++ Class Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR // A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public License along with the HadronZoo C++ Class Library. If not, see http://www.gnu.org/licenses. //
#include <iostream>
#include <stdarg.h> #include <execinfo.h>
#include "hzChars.h" #include "hzProcess.h" #include "hzTextproc.h" #include "hzString.h" #include "hzTmplMapS.h"
/* ** String addresses are unsigned 32-bit numbers with the most significant 16-bits indicating the block and the least significant 16-bits indicating the position within the block. */
#define HZ_STR_OVERSIZE 16379 // 16384 (minus 5 bytes for copy count, len indicator and NULL terminator). Above this size, strings are allocated from the heap. #define HZ_STRADDR_BLKID 0xffff0000 // Block part of address #define HZ_STRING_SBSPACE 65536 // Total size of the string space blocks in multiples of 4 bytes
/* ** Definitions */
struct _strBloc { // Small string space superblock
uint32_t m_blkSelf ; // Address of block uint32_t m_Usage ; // Space used (so position of free space) uint32_t m_Space[HZ_STRING_SBSPACE] ; // Areas for string spaces } ;
class _strItem { // hzString control area (start of string).
public: uchar m_copy ; // Copy count (min value 1) uchar m_size[1] ; // Start of serial integer size (1 to 3 bytes) char m_resv[2] ; // Start of data in the case of bytes < 128 bytes
_strItem (void) { m_copy = 0 ; m_size[0] = 0 ; m_resv[0] = m_resv[1] = 0 ; }
void _setSize (uint32_t nSize) { if (nSize < 128) m_size[0] = nSize ; else if (nSize < HZ_STR_OVERSIZE) { m_size[0] = 128 + (nSize / 256) ; m_size[1] = nSize % 256 ; } else if (nSize < HZSTRING_MAXLEN) { m_size[0] = 192 + (nSize / 0xffff) ; m_size[1] = (nSize & 0xff00) >> 8 ; m_size[2] = nSize & 0xff ; } else { // Invalid size m_size[0] = m_size[1] = m_size[2] = 0xff ; } }
uint32_t _getSize (void) { if (m_size[0] & 0x80) { if (m_size[0] & 0x40) return ((m_size[0] & 0x3f) << 16) + (m_size[1] << 8) + m_size[2] ; return ((m_size[0] & 0x3f) << 8) + m_size[1] ; } return m_size[0] ; }
/* uint32_t _factor (void) { // Actual length in bytes including copy count, serial integer length, the string value and the NULL terminator. if (m_size[0] & 0x80) { if (m_size[0] & 0x40) return ((m_size[0] & 0x3f) << 16) + (m_size[1] << 8) + m_size[2] + 5 ; return ((m_size[0] & 0x3f) << 8) + m_size[1] + 4 ; } return m_size[0] + 3 ; } */
char* _data (void) { // Note that in setting a string, _setSize must be done first before using the pointer returned by this function to set the string value if (m_size[0] & 0x80) { if (m_size[0] & 0x40) return m_resv + 2 ; return m_resv + 1 ; } return m_resv ; } } ;
class _strRegime { public: hzArray <_strBloc*> m_Super ; // Array of superblocks hzMapS <uint32_t,uint32_t> m_Flists ; // Freelists - Size:Freelist hzMapS <uint32_t,void*> m_Heap ; // Heap allocated oversized strings
_strBloc* m_pTopBlock ; // Latest small string space superblock (only one from which new string spaces can be allocated)
// Locks //hzLockRWD m_lockSmall[32] ; // Small string allocation locks hzLockRWD m_lockSbloc ; // Lock for allocating superblocks hzLockRWD m_lockOsize ; // Lock for allocating/freeing of oversize strings
uint32_t m_nOver ; // Population of oversize strings
_strRegime () { m_pTopBlock = 0 ; } } ;
/* ** Variables */
global hzSet<hzString> _hzGlobal_setStrings ; // Global string repository
static _strRegime* s_pStrRegime ; // The one and only string regime
global const hzString _hzGlobal_nullString ; // Null string global const hzString _hz_null_string ; // Null string global const hzString _hzString_TooLong = "-- String Too Long --" ; // To be returned when limit exceeded global const hzString _hzString_Fault = "-- String Fault --" ; // To be returned when string corrupted
static char _hz_NullBuffer [8] ;
void* _strXlate (uint32_t strAddr) { //_hzfunc(__func__) ;
_strBloc* pBloc ; // Pointer to superblock uint32_t* pSeg ; // Data segment uint32_t blocNo ; // Offset within vector of blocks uint32_t slotNo ; // String slot within block
if (!strAddr) return 0 ;
slotNo = strAddr & 0xffff ; blocNo = (strAddr & HZ_STRADDR_BLKID) >> 16 ;
if (!blocNo) { // Oversized designation return s_pStrRegime->m_Heap[strAddr] ; }
// String not oversized if (blocNo > s_pStrRegime->m_Super.Count()) hzexit(E_CORRUPT, "Cannot xlate address %u:%u. No such superblock (%u issued)\n", blocNo, slotNo, s_pStrRegime->m_Super.Count()) ;
pBloc = s_pStrRegime->m_Super[blocNo-1] ; if (!pBloc) hzexit(E_CORRUPT, "strXlate: No block found for address %u:%u. Total of %u superblocks issued)\n", blocNo, slotNo, s_pStrRegime->m_Super.Count()) ;
pSeg = pBloc->m_Space + slotNo ; return (void*) pSeg ; }
uint32_t _strAlloc (uint32_t nSize) { // Allocate string space. The actual space allocated will be sufficient for a string of length nSize, plus the copy count, length indicator and NULL terminator. // // Argument: nSize Required string length (excluding copy count, length indicator and null terminator) // // Returns: Pointer to the required string space. If is a fatal condition if this cannot be obtained.
_hzfunc(__func__) ;
_strBloc* pBloc = 0 ; // Pointer to superblock uint32_t* pSlot = 0 ; // Pointer to freelist slot hzLockS* pLock ; // Cast to hzLock (of first 4 bytes of free space) void* pVoid ; // Address allocated as void* uint32_t* pSeg = 0 ; // Pointer to segment uint32_t strAddr = 0 ; // Address of string (block + slot) uint32_t nUnit ; // Number of 4/8-byte units required uint32_t blocNo ; // Offset within vector of blocks uint32_t slotNo ; // String slot within block
if (!s_pStrRegime) s_pStrRegime = new _strRegime() ; if (!nSize) hzexit(E_NODATA, "Cannot allocate a zero size string") ;
if (nSize < 128) nSize += 3 ; else if (nSize < HZ_STR_OVERSIZE) nSize += 4 ; else nSize += 5 ;
// Deal with oversized allocations if (nSize >= HZ_STR_OVERSIZE) { nUnit = (nSize/8) + (nSize%8 ? 1:0) ; strAddr = s_pStrRegime->m_Heap.Count() + 1 ; pVoid = (void*) new char[nUnit * 8] ; s_pStrRegime->m_Heap.Insert(strAddr, pVoid) ; return strAddr ; }
// String not oversized nUnit = (nSize/4) + (nSize%4 ? 1:0) ; if (nUnit < 2) nUnit = 2 ;
// Is there a free list for the exact size? /* if (s_pStrRegime->m_Flists.Exists(nUnit)) { strAddr = s_pStrRegime->m_Flists[nUnit] ; slotNo = strAddr & 0xffff ; blocNo = (strAddr & HZ_STRADDR_BLKID) >> 16 ; pBloc = s_pStrRegime->m_Super[blocNo] ;
if (!pBloc) hzexit(E_CORRUPT, "Case 1 Illegal String Address %u:%u", blocNo, slotNo) ;
pSlot = pBloc->m_Space + slotNo ; if (pSlot[0]) goto top ; pLock = (hzLockS*) pSlot ; pLock->Lock() ;
s_pStrRegime->m_Flists[nUnit] = pSlot[1] ; //->m_fleNext ;
pLock->Unlock() ; memset(pSlot, 0, nUnit * 4) ; return strAddr ; } */
top: // No free slots so create another superblock if (_hzGlobal_MT) s_pStrRegime->m_lockSbloc.LockWrite() ;
if (!s_pStrRegime->m_pTopBlock || ((s_pStrRegime->m_pTopBlock->m_Usage + nUnit) >= HZ_STRING_SBSPACE)) { // Assign any remaining free space on the highest block to the small freelist of the size
// Then create a new highest block s_pStrRegime->m_pTopBlock = pBloc = new _strBloc() ; memset(pBloc, 0, sizeof(_strBloc)) ;
s_pStrRegime->m_Super.Add(pBloc) ; pBloc->m_blkSelf = s_pStrRegime->m_Super.Count() ; pBloc->m_Usage = 0 ;
printf("CREATED SUPERBLOCK %u at %p\n", pBloc->m_blkSelf, pBloc) ; }
// Assign from the superblock free space pSeg = s_pStrRegime->m_pTopBlock->m_Space + s_pStrRegime->m_pTopBlock->m_Usage ; strAddr = ((s_pStrRegime->m_pTopBlock->m_blkSelf) << 16) + s_pStrRegime->m_pTopBlock->m_Usage ;
s_pStrRegime->m_pTopBlock->m_Usage += nUnit ; memset(pSeg, 0, nUnit * 4) ;
if (_hzGlobal_MT) s_pStrRegime->m_lockSbloc.Unlock() ;
//printf("Allocated %d (%u) bytes at %u:%u\n", nSize, nUnit * 4, (strAddr&0xffff0000)>>16, (strAddr&0xffff)) ; return strAddr ; }
void _strFree (uint32_t strAddr, uint32_t nSize) { // Places object in freelist if it is of one of the precribed sizes, otherwise it frees it from the OS managed heap // // Arguments: 1) pMemobj A pointer to what is assumed to be string space to be freed // 2) nSize The size of the string space // // Returns: None
_hzfunc(__func__) ;
_strItem* pItem ; // Start of string space uint32_t* pSlot ; // Start of string space (as freelist entry) uint32_t nUnit ; // Number of 4-byte units in string being freed uint32_t blocNo ; // Offset within vector of blocks uint32_t slotNo ; // String slot within block
if (!strAddr) hzexit(E_CORRUPT, "NULL address") ;
slotNo = strAddr & 0xffff ; blocNo = (strAddr & HZ_STRADDR_BLKID) >> 16 ;
if (!nSize) hzexit(E_CORRUPT, "WARNING freeing zero size item %u:%u", blocNo, slotNo) ;
pItem = (_strItem*) _strXlate(strAddr) ;
if (!blocNo || nSize >= HZ_STR_OVERSIZE) { // Heap printf("Deleting heap item %u:%u (size %d)\n", blocNo, slotNo, nSize) ; pItem = (_strItem*) s_pStrRegime->m_Heap[strAddr-1] ; delete pItem ; s_pStrRegime->m_Heap.Delete(strAddr) ; return ; }
//return ;
//printf("Deleting item %u:%u (size %d) %s\n", blocNo, slotNo, nSize, pItem->_data()) ;
// Take account of the copy count (1 byte), the length indicator (1 to 3 bytes), and the NULL terminator if (nSize < 128) nSize += 3 ; else if (nSize < 16384) nSize += 4 ; else nSize += 5 ;
nUnit = (nSize/4) + (nSize%4 ? 1:0) ; if (nUnit < 2) nUnit = 2 ;
if (_hzGlobal_MT) s_pStrRegime->m_lockSbloc.LockWrite() ;
pSlot = (uint32_t*) pItem ; pSlot[0] = 0 ;
if (!s_pStrRegime->m_Flists.Exists(nUnit)) { //pSlot->m_fleNext = 0 ; s_pStrRegime->m_Flists.Insert(nUnit, strAddr) ; } else { //pSlot->m_fleNext = s_pStrRegime->m_Flists[nUnit] ; pSlot[1] = s_pStrRegime->m_Flists[nUnit] ; s_pStrRegime->m_Flists[nUnit] = strAddr ; }
if (_hzGlobal_MT) s_pStrRegime->m_lockSbloc.Unlock() ; }
/* ** hzString Constructors/Destructors */
hzString::hzString (void) { _hzfunc("hzString::hzString") ;
m_addr = 0 ; _hzGlobal_Memstats.m_numStrings++ ; }
hzString::hzString (const char* pStr) { _hzfunc("hzString::hzString(char*)") ;
m_addr = 0 ; _hzGlobal_Memstats.m_numStrings++ ; operator=(pStr) ; }
hzString::hzString (const hzString& op) { _hzfunc("hzString::hzString(copy)") ;
m_addr = 0 ; _hzGlobal_Memstats.m_numStrings++ ; operator=(op) ; }
hzString::~hzString (void) { _hzfunc("hzString::~hzString") ;
if (m_addr) Clear() ; _hzGlobal_Memstats.m_numStrings-- ; }
/* ** hzString private methods */
int32_t hzString::_cmp (const hzString& s) const { // Case sensitive compare based on strcmp // // Argument: s The test string // // Returns: -1 If this string is less than the test string // 1 If ths string is greater than the test string // 0 If the two strings are equal
_hzfunc("hzString::_cmp(hzString&)") ;
_strItem* thisCtl ; // This string control area _strItem* suppCtl ; // Supplied string control area
if (!m_addr) return s.m_addr ? -1 : 0 ; if (!s.m_addr) return m_addr ? 1 : 0 ; if (m_addr == s.m_addr) return 0 ;
thisCtl = (_strItem*) _strXlate(m_addr) ; suppCtl = (_strItem*) _strXlate(s.m_addr) ;
return strcmp(thisCtl->_data(), suppCtl->_data()) ; }
int32_t hzString::_cmpI (const hzString& s) const { // Case insensitive compare based on strcasecmp // // Arguments: 1) s The test string // // Returns: -1 If this string is less than the test string // 1 If ths string is greater than the test string // 0 If the two strings are equal
_hzfunc("hzString::_cmpI(hzString&)") ;
_strItem* thisCtl ; // This string control area _strItem* suppCtl ; // Supplied string control area
if (!m_addr) return s.m_addr ? -1 : 0 ; if (!s.m_addr) return m_addr ? 1 : 0 ; if (m_addr == s.m_addr) return 0 ;
thisCtl = (_strItem*) _strXlate(m_addr) ; suppCtl = (_strItem*) _strXlate(s.m_addr) ;
return strcasecmp(thisCtl->_data(), suppCtl->_data()) ; }
int32_t hzString::_cmp (const char* s) const { // Case sensitive compare based on strcmp // // Arguments: 1) s The null terminated test string // // Returns: -1 If this string is less than the test string // 1 If ths string is greater than the test string // 0 If the two strings are equal
_hzfunc("hzString::_cmp(char*)") ;
_strItem* thisCtl ; // This string control area
if (!m_addr) return (!s || !s[0]) ? 0 : -1 ; if (!s || !s[0]) return m_addr ? 1 : 0 ;
thisCtl = (_strItem*) _strXlate(m_addr) ;
return strcmp(thisCtl->_data(), s) ; }
int32_t hzString::_cmpI (const char* s) const { // Case insensitive compare based on strcasecmp // // Arguments: 1) s The null terminated test string // // Returns: -1 If this string is less than the test string // 1 If ths string is greater than the test string // 0 If the two strings are equal
_hzfunc("hzString::_cmpI(char*)") ;
_strItem* thisCtl ; // This string's control area
if (!m_addr) return (!s || !s[0]) ? 0 : -1 ; if (!s || !s[0]) return m_addr ? 1 : 0 ;
thisCtl = (_strItem*) _strXlate(m_addr) ;
return strcasecmp(thisCtl->_data(), s) ; }
int32_t hzString::_cmpF (const hzString& tS) const { // Fast compare. This provides a string comparison based on 32-bit chunks rather than bytes. It is faster but only the equivelent to lexical comparison in // big-endian architetures. This method should be considered only for unordered collections where only uniqueness of entries is important. // // Arguments: 1) S The test string // // Returns: -1 If this string is less than the test string // 1 If ths string is greater than the test string // 0 If the two strings are equal
_hzfunc("hzString::_cmpF") ;
_strItem* thisCtl ; // This string's control area _strItem* suppCtl ; // This string's control area
if (!m_addr) return tS.m_addr ? -1 : 0 ; if (!tS.m_addr) return 1 ;
thisCtl = (_strItem*) _strXlate(m_addr) ; suppCtl = (_strItem*) _strXlate(tS.m_addr) ;
if (thisCtl->_getSize() < suppCtl->_getSize()) return -1 ; if (thisCtl->_getSize() > suppCtl->_getSize()) return 1 ;
int32_t* pA ; // Pointer into data this int32_t* pB ; // Pointer into data supplied uint32_t n ; // Offset uint32_t x ; // Counter (4 byte increments)
pA = (int32_t*) thisCtl->_data() ; pB = (int32_t*) suppCtl->_data() ;
for (x = n = 0 ; x < thisCtl->_getSize() ; x += 4, n++) { if (pA[n] < pB[n]) return -1 ; if (pA[n] > pB[n]) return 1 ; } return 0 ; }
bool hzString::_feq (const hzString& tS) const { // Determine equality by fast compare technique. This works in both big and little endian architectures. // // Arguments: 1) tS The test string // // Returns: True If this string is lexically equal to the supplied test string // False Otherwise
_hzfunc("hzString::_feq") ;
_strItem* thisCtl ; // This string's control area _strItem* suppCtl ; // This string's control area
if (m_addr == tS.m_addr) return true ;
if (!m_addr) return tS.m_addr ? false : true ; if (!tS.m_addr) return false ;
thisCtl = (_strItem*) _strXlate(m_addr) ; suppCtl = (_strItem*) _strXlate(tS.m_addr) ;
if (thisCtl->_getSize() != suppCtl->_getSize()) return false ;
int32_t* pA ; // Pointer into data this int32_t* pB ; // Pointer into data supplied uint32_t n ; // Offset uint32_t x ; // Counter (4 byte increments)
pA = (int32_t*) thisCtl->_data() ; pB = (int32_t*) suppCtl->_data() ;
for (x = n = 0 ; x < thisCtl->_getSize() ; x += 4, n++) if (pA[n] != pB[n]) return false ; return true ; }
/* ** hzString internal methods */
void hzString::_inc_copy (void) const { _strItem* thisCtl ; // This string's control area
thisCtl = (_strItem*) _strXlate(m_addr) ; if (thisCtl->m_copy && thisCtl->m_copy < 50) thisCtl->m_copy++ ; }
void hzString::_dec_copy (void) const { _strItem* thisCtl ; // This string's control area
thisCtl = (_strItem*) _strXlate(m_addr) ;
if (thisCtl->m_copy <= 1) printf("WARNING: STR _dec_copy would zero copy count\n") ; else thisCtl->m_copy-- ; }
/* ** hzString public methods */
uint32_t hzString::Length (void) const { // Returns length of string, not including a null terminator // // Arguments: None // Returns: Data length
_hzfunc("hzString::Length") ;
_strItem* thisCtl ; // This string's control area
if (!m_addr) return 0 ;
thisCtl = (_strItem*) _strXlate(m_addr) ;
return thisCtl->_getSize() ; }
uint32_t hzString::Copies (void) const { // Returns numbers of copies for diagnostics // // Arguments: None // Returns: Data length
_hzfunc("hzString::Copies") ;
_strItem* thisCtl ; // This string's control area
if (!m_addr) return 0 ;
thisCtl = (_strItem*) _strXlate(m_addr) ;
return thisCtl->m_copy ; }
const char* hzString::operator* (void) const { // Returns the string data (a null terminated string) // // Arguments: None // Returns: Content as null terminated string
_hzfunc("hzString::operator*") ;
_strItem* thisCtl ; // This string's control area
if (!this) hzexit(E_CORRUPT, "No instance") ;
if (!m_addr) return 0 ;
thisCtl = (_strItem*) _strXlate(m_addr) ;
return thisCtl->_data() ; }
hzString::operator const char* (void) const { // Returns the string data (a null terminated string) // // Arguments: None // Returns: Content as null terminated string
_hzfunc("hzString::operator const char*") ;
_strItem* thisCtl ; // This string's control area
if (!this) hzexit(E_CORRUPT, "No instance") ;
if (!m_addr) return 0 ;
thisCtl = (_strItem*) _strXlate(m_addr) ;
return thisCtl->_data() ; }
void hzString::Clear (void) { // Clear this string. // // The end result will be this string has a null pointer and the string space that was being pointed to, has its copy count reduced by 1. If this means the // copy count falls to zero, then the string space shall be freed. // // Clears the string. Note that if other string instances share the same internal string space (have equal contents) then all that occurs is a decrement of // the copy count. This leaves the string value intact maintaining the integrity of the other string instances. Only if there are no other string instances // sharing the internal string space (copy count is zero) is the internal string space is released (deleted). In both cases the local internal string space // pointer is then set to null. Subsequent setting of this hzString instance will then allocate fresh memory. // // Arguments: None // Returns: None _hzfunc("hzString::Clear()") ;
_strItem* thisCtl ; // This string's control area uint32_t nLen ; // Length of string
if (m_addr) { thisCtl = (_strItem*) _strXlate(m_addr) ; nLen = thisCtl->_getSize() ; if (!nLen) hzexit(E_CORRUPT, "Zero string size %u:%u", (m_addr&0xffff0000)>>16, m_addr&0xffff) ;
if (thisCtl->m_copy == 0) { printf("WARN - Zero copis of string size %u:%u (%s)", (m_addr&0xffff0000)>>16, m_addr&0xffff, thisCtl->_data()) ; m_addr = 0 ; return ; }
if (thisCtl->m_copy && thisCtl->m_copy < 50) { if (_hzGlobal_MT) { if (__sync_add_and_fetch(&(thisCtl->m_copy), -1) == 0) _strFree(m_addr, nLen) ; } else { thisCtl->m_copy-- ; if (!thisCtl->m_copy) _strFree(m_addr, nLen) ; } }
m_addr = 0 ; } }
// Index operator const char hzString::operator[] (uint32_t nIndex) const { // Returns the (char) value of the Nth position in the buffer. // // A zero is returned if N is less than zero or overshoots the text part of the buffer // // Arguments: 1) nIndex Position of char to be returned // // Returns: 0+ Value of the Nth byte in the string if N is less than the length of the string
_hzfunc("hzString::operator[]") ;
_strItem* thisCtl ; // This string's control area
if (!m_addr) return 0 ;
thisCtl = (_strItem*) _strXlate(m_addr) ; if (nIndex > thisCtl->_getSize()) return 0 ;
return thisCtl->_data()[nIndex] ; }
// Stream operator #if 0 std::istream& operator>> (std::istream& is, hzString& str) { // Category: Data Input // // This facilitates appendage of the string value with data from a stream // // Arguments: 1) is Input stream // 2) obj String to be populated by the read operation // // Returns: Reference to this string instance
_hzfunc("hzString::operator>>") ;
std::string s ; // STL string
std::getline(is, s) ; if (!s.length()) std::getline(is, s) ; str = s.c_str() ; return is ; } #endif
std::ostream& operator<< (std::ostream& os, const hzString& obj) { // Category: Data Output // // This facilitates output of the string value to a stream // // Arguments: 1) is Output stream // 2) obj String to be written out // // Returns: Reference to this string instance
_hzfunc("hzString::operator<<") ;
if (*obj) os << *obj ; return os ; }
hzString& hzString::ToLower (void) { // Convert string to all lower case. // // As this function can alter string content, it follows the protocol described in charper 1.2 of the synopsis. // // Arguments: None // Returns: Reference to this string instance
_hzfunc("hzString::ToLower") ;
_strItem* thisCtl ; // This string's control area _strItem* destCtl ; // New internal structure if required char* i ; // Iterator char* j ; // Iterator uint32_t count ; // Char counter uint32_t destAddr ; // New string address if required bool bChange ; // Flag to indicate if operation chages content
// If NULL return if (!m_addr) return *this ; thisCtl = (_strItem*) _strXlate(m_addr) ;
// Is change needed? bChange = false ; for (i = (char*) thisCtl->_data(), count = thisCtl->_getSize() ; count ; count--, i++) { if (*i >= 'A' && *i <= 'Z') { bChange = true ; break ; } }
if (!bChange) return *this ;
// Allocate new space count = thisCtl->_getSize() ; destAddr = _strAlloc(count) ; destCtl = (_strItem*) _strXlate(destAddr) ; destCtl->_setSize(count) ; destCtl->m_copy = 1 ;
i = (char*) thisCtl->_data() ; j = (char*) destCtl->_data() ;
// The new string space is populated for (; count ; count--) *j++ = tolower(*i++) ;
// Tidy up if (thisCtl->m_copy && thisCtl->m_copy < 50) { if (!_hzGlobal_MT) thisCtl->m_copy-- ; else if (__sync_add_and_fetch(&(thisCtl->m_copy), -1) == 0)
if (!thisCtl->m_copy) _strFree(m_addr, thisCtl->_getSize()) ; }
m_addr = destAddr ; return *this ; }
hzString& hzString::ToUpper (void) { // Convert string to all upper case. // // As this function can alter string content, it follows the protocol described in charper 1.2 of the synopsis. // // Arguments: None // Returns: Reference to this string instance
_hzfunc("hzString::ToUpper") ;
_strItem* thisCtl ; // This string's control area _strItem* destCtl ; // New internal structure if required char* i ; // Iterator char* j ; // Iterator uint32_t nLen ; // Length uint32_t destAddr ; // New string address if required bool bChange ; // Flag to indicate if operation chages content
// If NULL return if (!m_addr) return *this ;
thisCtl = (_strItem*) _strXlate(m_addr) ;
// Is change needed? bChange = false ; for (i = (char*) thisCtl->_data(), nLen = thisCtl->_getSize() ; nLen ; i++, nLen--) { if (*i >= 'a' && *i <= 'z') { bChange = true ; break ; } }
if (!bChange) return *this ;
// Allocate new space nLen = thisCtl->_getSize() ; destAddr = _strAlloc(nLen) ; destCtl = (_strItem*) _strXlate(destAddr) ; destCtl->_setSize(nLen) ; destCtl->m_copy = 1 ;
i = (char*) thisCtl->_data() ; j = (char*) destCtl->_data() ;
for (; nLen ; nLen--) *j++ = toupper(*i++) ;
// Tidy up if (thisCtl->m_copy && thisCtl->m_copy < 50) { if (!_hzGlobal_MT) thisCtl->m_copy-- ; else __sync_add_and_fetch(&(thisCtl->m_copy), -1) ;
if (!thisCtl->m_copy) _strFree(m_addr, thisCtl->_getSize()) ; }
m_addr = destAddr ; return *this ; }
hzString& hzString::UrlEncode (bool bResv) { // Performs URL-Encoding on the current string content. // // This is transformation is only carried out if URL characters exist in the string value // // Arguments: 1) bResv With bResv false (default), only the standard URL characters are encoded. But with bResv true, an extended set of URL characters // are converted. Note that these include the forward slash character. // // Returns: Reference to this string instance
_hzfunc("hzString::UrlEncode") ;
_strItem* thisCtl ; // This string's control area _strItem* destCtl ; // New internal structure if required char* i ; // For iteration char* j ; // For iteration uint32_t nLen ; // Lenght of original string uint32_t destAddr ; // New string address if required char buf [4] ; // For Hex conversion
// If NULL return if (!m_addr) return *this ; thisCtl = (_strItem*) _strXlate(m_addr) ;
// Is change needed? Count chars that are to be converted as these will occupy 3 chars in the new string nLen = thisCtl->_getSize() ; //nLen = Length() ; for (i = (char*) thisCtl->_data() ; *i ; i++) { // If the char is a % (string may already be encoded), pass this by if (*i == CHAR_PERCENT) continue ;
// If the char is a normal URL char, pass if (IsUrlnorm(*i)) continue ;
// If the char is a reserved URL char, pass olny if bResv is false if (IsUrlresv(*i)) { if (bResv) nLen += 2 ; continue ; }
// Only increas the expected length if there is any char that must be encoded nLen += 2 ; }
if (nLen == thisCtl->_getSize()) return *this ;
// Allocate new space destAddr = _strAlloc(nLen) ; destCtl = (_strItem*) _strXlate(destAddr) ; destCtl->_setSize(nLen) ; destCtl->m_copy = 1 ;
j = (char*) destCtl->_data() ;
for (i = (char*) thisCtl->_data() ; *i ; i++) { if (*i == CHAR_PERCENT) *j++ = *i ; else if (IsUrlnorm(*i)) *j++ = *i ; else if (IsUrlresv(*i) && !bResv) *j++ = *i ; else { *j++ = CHAR_PERCENT ; sprintf(buf, "%02x", (uint32_t) *i) ; *j++ = buf[0] ; *j++ = buf[1] ; } }
// Tidy up if (thisCtl->m_copy && thisCtl->m_copy < 50) { if (!_hzGlobal_MT) { thisCtl->m_copy-- ; if (!thisCtl->m_copy) _strFree(m_addr, thisCtl->_getSize()) ; } else { if (__sync_add_and_fetch(&(thisCtl->m_copy), -1) == 0) _strFree(m_addr, thisCtl->_getSize()) ; } }
m_addr = destAddr ; return *this ; }
hzString& hzString::UrlDecode (void) { // Performs URL-Decoding on the current string content. If the current string does not contain URL-encoded sequences, the string will be unchanged. // // Arguments: None // Returns: Reference to this string instance
_hzfunc("hzString::UrlDecode") ;
_strItem* thisCtl ; // This string's control area _strItem* destCtl ; // New string's control area char* i ; // Pointer into old string data char* j ; // Pointer into new string data uint32_t destAddr ; // New string space address uint32_t newLen = 0 ; // Length of new string uint32_t val ; // Hex value
// If NULL return if (!m_addr) return *this ; thisCtl = (_strItem*) _strXlate(m_addr) ;
// Is change needed? Not unless there are incidences of a percent followed by two hex chars for (i = thisCtl->_data() ; *i ; i++) { if (*i == CHAR_PERCENT && IsHex(i[1]) && IsHex(i[2])) i += 2 ; newLen++ ; }
if (newLen == thisCtl->_getSize()) return *this ;
// Allocate new space destAddr = _strAlloc(newLen) ; destCtl = (_strItem*) _strXlate(destAddr) ; destCtl->_setSize(newLen) ; destCtl->m_copy = 1 ;
// Create new string for (j = destCtl->_data(), i = thisCtl->_data() ; *i ; i++) { if (*i == CHAR_PERCENT) { if (IsHex(i[1]) && IsHex(i[2])) { i++ ; val = (*i >='0' && *i <='9' ? *i-'0' : *i >= 'A' && *i<= 'F' ? *i+10-'A' : *i >= 'a' && *i<='f' ? *i+10-'a' : 0) ; val *= 16 ; i++ ; val += (*i >='0' && *i <='9' ? *i-'0' : *i >= 'A' && *i<= 'F' ? *i+10-'A' : *i >= 'a' && *i<='f' ? *i+10-'a' : 0) ; *j++ = (char) val ; continue ; } }
*j++ = *i ; }
// Tidy up if (thisCtl->m_copy && thisCtl->m_copy < 50) { if (!_hzGlobal_MT) { thisCtl->m_copy-- ; if (!thisCtl->m_copy) _strFree(m_addr, thisCtl->_getSize()) ; } else { thisCtl->m_copy-- ; if (!thisCtl->m_copy) //if (__sync_add_and_fetch(&(thisCtl->m_copy), -1) == 0) _strFree(m_addr, thisCtl->_getSize()) ; } }
m_addr = destAddr ; return *this ; }
hzString& hzString::Reverse (void) { // Reverse the string content. This first ensures the string is independent. // // Arguments: None // Returns: Reference to this string in all cases
_hzfunc("hzString::Reverse") ;
_strItem* thisCtl ; // This string's control area _strItem* destCtl ; // New internal structure if required char* pTgt ; // Target internal value char* pSrc ; // Source internal value uint32_t destAddr ; // New string address if required uint32_t nLen ; // Length uint32_t c_up ; // Ascending iterator uint32_t c_down ; // Decending iterator
// If NULL return if (!m_addr) return *this ;
thisCtl = (_strItem*) _strXlate(m_addr) ;
// Allocate new space nLen = thisCtl->_getSize() ;
destAddr = _strAlloc(nLen) ; destCtl = (_strItem*) _strXlate(destAddr) ; destCtl->_setSize(nLen) ; destCtl->m_copy = 1 ;
// Create new string value as the reverse of the original c_down = nLen - 1 ; pSrc = thisCtl->_data() ; pTgt = destCtl->_data() ;
for (c_up = 0 ; c_up < c_down ; c_up++, c_down--) { pTgt[c_up] = pSrc[c_down] ; }
// Tidy up if (thisCtl->m_copy && thisCtl->m_copy < 50) { if (!_hzGlobal_MT) { thisCtl->m_copy-- ; if (!thisCtl->m_copy) _strFree(m_addr, thisCtl->_getSize()) ; } else { thisCtl->m_copy-- ; if (!thisCtl->m_copy) //if (__sync_add_and_fetch(&(thisCtl->m_copy), -1) == 0) _strFree(m_addr, thisCtl->_getSize()) ; } }
m_addr = destAddr ; return *this ; }
hzString& hzString::Truncate (uint32_t limit) { // Truncate the string // // Arguments: 1) limit Sets max length for the string and truncates beyond this. // Returns: Reference to this string in all cases
_hzfunc("hzString::Truncate") ;
_strItem* thisCtl ; // This string's control area _strItem* destCtl ; // New control area uint32_t destAddr = 0 ; // New string address if required
// If NULL return if (!m_addr) return *this ;
thisCtl = (_strItem*) _strXlate(m_addr) ;
// Do nothing if the size limit exceeds length of string value if (limit >= thisCtl->_getSize()) return *this ;
// Full truncation, delete if (limit) { // Perform the (partial) truncation destAddr = _strAlloc(limit) ; destCtl = (_strItem*) _strXlate(destAddr) ; destCtl->_setSize(limit) ; memcpy(destCtl->_data(), thisCtl->_data(), limit) ; destCtl->m_copy = 1 ; }
// Tidy up if (thisCtl->m_copy && thisCtl->m_copy < 50) { if (!_hzGlobal_MT) { thisCtl->m_copy-- ; if (!thisCtl->m_copy) _strFree(m_addr, thisCtl->_getSize()) ; } else { if (__sync_add_and_fetch(&(thisCtl->m_copy), -1) == 0) _strFree(m_addr, thisCtl->_getSize()) ; } }
m_addr = destAddr ; return *this ; }
hzString& hzString::TruncateUpto (const char* patern) { // Category: Text Processing // // Conditional, content based truncation. If the supplied patern exists in the string, the new string will be everything upto but not including the first // instance of the patern. If the pattern is empty or does not exist in the string, the string content will be unchanged. // // Arguments: 1) pattern The pattern at which the string will be truncated. // Returns: Reference to this string in all cases
_hzfunc("hzString::TruncateUpto") ;
_strItem* thisCtl ; // This string's control area const char* i ; // Pointer into string data const char* j ; // Pointer to patern instance uint32_t nLen ; // Number of bytes from start of existing string to patern instance
// If no pattern supplied, do nothing if (!patern || !patern[0]) return *this ;
// If NULL return if (!m_addr) return *this ;
thisCtl = (_strItem*) _strXlate(m_addr) ;
// Test if change required i = (char*) thisCtl->_data() ; j = strstr(i, patern) ;
if (!j) return *this ;
for (nLen = 0 ; i != j ; i++, nLen++) ;
return Truncate(nLen) ; }
hzString& hzString::TruncateBeyond (const char* patern) { // Category: Text Processing // // Conditional, content based truncation. If the supplied patern exists in the string, the new string will be everything beyond but not including the first // instance of the patern. If the pattern is empty or does not exist in the string, the string content will be unchanged. // // Arguments: 1) pattern The pattern at which the string will be truncated. // Returns: Reference to this string in all cases
_hzfunc("hzString::TruncateBeyond") ;
_strItem* thisCtl ; // This string's control area _strItem* destCtl ; // New control area char* i ; // Pointer into old string data char* j ; // Pointer into new string data uint32_t destAddr ; // New string address if required uint32_t newLen ; // New string length
// If no pattern supplied, do nothing if (!patern || !patern[0]) return *this ;
// If NULL return if (!m_addr) return *this ;
// Test if change required thisCtl = (_strItem*) _strXlate(m_addr) ; i = strstr((char*) thisCtl->_data(), patern) ; if (!i) return *this ;
// Allocate new space newLen = i - thisCtl->_data() ;
destAddr = _strAlloc(newLen) ; destCtl = (_strItem*) _strXlate(destAddr) ; destCtl->_setSize(newLen) ; destCtl->m_copy = 1 ;
i += strlen(patern) ; for (j = destCtl->_data() ; *i ; *j++ = *i++) ; *j = 0 ;
// Tidy up if (thisCtl->m_copy && thisCtl->m_copy < 50) { if (!_hzGlobal_MT) { thisCtl->m_copy-- ; if (!thisCtl->m_copy) _strFree(m_addr, thisCtl->_getSize()) ; } else { if (__sync_add_and_fetch(&(thisCtl->m_copy), -1) == 0) _strFree(m_addr, thisCtl->_getSize()) ; } }
m_addr = destAddr ; return *this ; }
hzString& hzString::DelWhiteLead (void) { // Removes leading whitespace from the string // // Arguments: None // Returns: Reference to this string in all cases
_hzfunc("hzString::DelWhiteLead") ;
_strItem* thisCtl ; // This string's control area _strItem* destCtl ; // New string space const char* i ; // Pointer into string data uint32_t destAddr ; // New string address if required uint32_t wc ; // Count of whitespace chars uint32_t count ; // Iterator uint32_t nLen ; // Length of original string uint32_t nusize ; // The size the string will be once leading whitespace removed
// If NULL return if (!m_addr) return *this ;
// Check that change is needed thisCtl = (_strItem*) _strXlate(m_addr) ;
nLen = Length() ; i = (char*) thisCtl->_data() ;
for (wc = count = 0 ; count < nLen ; count++) { if (i[count] <= CHAR_SPACE) wc++ ; else break ; }
if (!wc) return *this ;
// Must alter content
nusize = nLen - wc ; if (nusize <= 0) { Clear() ; return *this ; }
destAddr = _strAlloc(nusize) ; if (!destAddr) hzexit(E_MEMORY, "Buffer of (%d) bytes", nusize) ; destCtl = (_strItem*) _strXlate(destAddr) ; destCtl->_setSize(nusize) ; memcpy(destCtl->_data(), thisCtl->_data() + wc, nusize) ; destCtl->m_copy = 1 ;
// Tidy up if (thisCtl->m_copy && thisCtl->m_copy < 50) { if (!_hzGlobal_MT) { thisCtl->m_copy-- ; if (!thisCtl->m_copy) _strFree(m_addr, thisCtl->_getSize()) ; } else { if (__sync_add_and_fetch(&(thisCtl->m_copy), -1) == 0) _strFree(m_addr, thisCtl->_getSize()) ; } }
m_addr = destAddr ; return *this ; }
hzString& hzString::DelWhiteTrail (void) { // Removes tariling whitespace // // Arguments: None // Returns: Reference to this string in all cases
_hzfunc("hzString::DelWhiteTrail") ;
_strItem* thisCtl ; // This string's control area _strItem* destCtl ; // New string space const char* i ; // Pointer into string data uint32_t destAddr ; // New string address if required uint32_t wc ; // Count of whitespace chars uint32_t nLen ; // Length of original string uint32_t nusize ; // The size the string will be once leading whitespace removed int32_t count ; // Iterator
if (!m_addr) return *this ;
thisCtl = (_strItem*) _strXlate(m_addr) ; i = (char*) thisCtl->_data() ; nLen = Length() ; wc = 0 ;
for (count = nLen - 1 ; count >= 0 ; count--) { if (i[count] <= CHAR_SPACE) wc++ ; else break ; }
if (!wc) return *this ;
// Must alter content
nusize = nLen - wc ; if (nusize <= 0) { Clear() ; return *this ; }
destAddr = _strAlloc(nusize) ; if (!destAddr) hzexit(E_MEMORY, "Buffer of (%d) bytes", nusize) ; destCtl = (_strItem*) _strXlate(destAddr) ; destCtl->_setSize(nusize) ; memcpy(destCtl->_data(), thisCtl->_data(), nusize) ; destCtl->m_copy = 1 ;
Clear() ; m_addr = destAddr ;
return *this ; }
hzString& hzString::TopAndTail (void) { // Text substitution // // Removes leading and trailing whitespce from this string. If no whitespace exists within this string, it is unchanged // // Arguments: None // Returns: Reference to this string in all cases
_hzfunc("hzString::TopAndTail") ;
DelWhiteLead() ; DelWhiteTrail() ; return *this ; }
hzString& hzString::Replace (const char* strA, const char* strB) { // Text substitution // // Replace within this string, all instances of strA with strB. This string is unchanged if strA does not exist within it. // // Arguments: 1) strA Patern to be substituted out // 2) strB Patern to be used instead // // Returns: Reference to this string in all cases
_hzfunc("hzString::Replace") ;
hzChain Z ; // Working chain buffer _strItem* thisCtl ; // This string's control area const char* i ; // Pointer into string data uint32_t nLen ; // Lenth of supplied strings bool bFound = false ; // Indicates string to be replace is found
if (!strA) return *this ; if (!m_addr) return *this ; thisCtl = (_strItem*) _strXlate(m_addr) ; nLen = strlen(strA) ; i = (char*) thisCtl->_data() ;
if (strstr(i, strA)) { for (; *i ;) { if (*i == strA[0]) { if (!memcmp(i, strA, nLen)) { bFound = true ; if (strB && strB[0]) Z << strB ; i += nLen ; continue ; } }
Z.AddByte(*i) ; i++ ; } }
if (bFound) { Clear() ; operator=(Z) ; } return *this ; }
hzEcode hzString::SetValue (const char* cpStr, uint32_t nLen) { // Set a string to a non-terminated char string // // Arguments: 1) cpStr The char* pointer // 2) nLen The length // // Returns: E_OK If operation successful // E_RANGE If length is -ve or too uint32_t
_hzfunc("hzString::SetValue(a)") ;
_strItem* destCtl ; // New string space
Clear() ; if (!cpStr || !cpStr[0]) return E_OK ;
if (nLen <= 0 || nLen > HZSTRING_MAXLEN) { operator=(_hzString_TooLong) ; return E_RANGE ; }
m_addr = _strAlloc(nLen) ; if (!m_addr) hzexit(E_MEMORY, "Cannot allocate string of %d bytes", nLen) ;
destCtl = (_strItem*) _strXlate(m_addr) ; destCtl->_setSize(nLen) ; memcpy(destCtl->_data(), cpStr, nLen) ; destCtl->m_copy = 1 ;
return E_OK ; }
hzEcode hzString::SetValue (const char* cpStr, const char* cpTerm) { // Set a string to a char string that is terminated by a char sequence rather than a null // // Arguments: 1) cpStr The non-null terminated string value // 2) cpTerm The teminator sequence // // Returns: E_OK If operation successful // E_RANGE If length is -ve or too long
_hzfunc("hzString::SetValue(b)") ;
_strItem* destCtl ; // New string space const char* i ; // Source string iterator uint32_t nLen ; // Length to be allocated
Clear() ; if (!cpStr || !cpStr[0]) return E_OK ;
for (nLen = 0, i = cpStr ; *i && i != cpTerm ; i++, nLen++) ; if (nLen > HZSTRING_MAXLEN) { operator=(_hzString_TooLong) ; return E_RANGE ; }
m_addr = _strAlloc(nLen) ; if (!m_addr) hzexit(E_MEMORY, "Cannot allocate string of %d bytes", nLen) ;
destCtl = (_strItem*) _strXlate(m_addr) ; destCtl->_setSize(nLen) ; memcpy(destCtl->_data(), cpStr, nLen) ; destCtl->m_copy = 1 ;
return E_OK ; }
hzEcode hzString::SetValue (const char* cpStr, char termchar) { // Set a string to a char string that is terminated by a char other than a null // // Arguments: 1) cpStr The non-null terminated string value // 2) termchar The teminator char // // Returns: E_RANGE If the supplied terminated string exceeds HZSTRING_MAXLEN characters // E_OK If the string is set
_hzfunc("hzString::SetValue(c)") ;
_strItem* destCtl ; // New string space const char* i ; // Source string iterator uint32_t nLen ; // Length to be allocated
Clear() ; if (!cpStr || !cpStr[0]) return E_OK ;
for (nLen = 0, i = cpStr ; *i && *i != termchar ; i++, nLen++) ; if (nLen > HZSTRING_MAXLEN) { operator=(_hzString_TooLong) ; return E_RANGE ; }
m_addr = _strAlloc(nLen) ; if (!m_addr) hzexit(E_MEMORY, "Cannot allocate string of %d bytes", nLen) ;
destCtl = (_strItem*) _strXlate(m_addr) ; destCtl->_setSize(nLen) ; memcpy(destCtl->_data(), cpStr, nLen) ; destCtl->m_copy = 1 ;
return E_OK ; }
hzString hzString::SubString (uint32_t nPosn, uint32_t nBytes) const { // Purpose: Populate a string with a substring of this string. Return an empty string if the // requested position goes beyong length of string, Return a partial string if the // length requested goes beyond the end of the string // // Arguments: 1) nPosn Starting offset within this string. // 2) nBytes Length from here. A value of 0 indicates remainder of this string. // // Returns: Instance of hzString by value being the substring result
_hzfunc("hzString::SubString") ;
_strItem* thisCtl ; // This string's control area _strItem* destCtl ; // Result string control area hzString Dest ; // Target string uint32_t nRemainder ; // Remainder of original past the stated position
if (!m_addr) return Dest ;
thisCtl = (_strItem*) _strXlate(m_addr) ; nRemainder = Length() - nPosn ; if (nRemainder <= 0) return Dest ;
if (nBytes == 0) nBytes = nRemainder ; if (nBytes > nRemainder) nBytes = nRemainder ;
Dest.m_addr = _strAlloc(nBytes) ; if (!Dest.m_addr) hzexit(E_MEMORY, "Buffer of (%d) bytes", nBytes) ; destCtl = (_strItem*) _strXlate(Dest.m_addr) ;
// Go to position destCtl->_setSize(nBytes) ; memcpy(destCtl->_data(), thisCtl->_data() + nPosn, nBytes) ; destCtl->m_copy = 1 ;
return Dest ; }
// Find first/last instance of a test char in this string (I denotes case insensitive) int32_t hzString::First (const char c) const { _hzfunc("hzString::First") ;
_strItem* thisCtl ; // This string's control area
if (!m_addr) return -1 ;
thisCtl = (_strItem*) _strXlate(m_addr) ; return CstrFirst(thisCtl->_data(), c) ; }
int32_t hzString::FirstI (const char c) const { _hzfunc("hzString::FirstI") ;
_strItem* thisCtl ; // This string's control area
if (!m_addr) return -1 ;
thisCtl = (_strItem*) _strXlate(m_addr) ; return CstrFirstI(thisCtl->_data(), c) ; }
int32_t hzString::Last (const char c) const { _hzfunc("hzString::Last") ;
_strItem* thisCtl ; // This string's control area
if (!m_addr) return -1 ;
thisCtl = (_strItem*) _strXlate(m_addr) ; return CstrLast(thisCtl->_data(), c) ; }
int32_t hzString::LastI (const char c) const { _hzfunc("hzString::LastI") ;
_strItem* thisCtl ; // This string's control area
if (!m_addr) return -1 ;
thisCtl = (_strItem*) _strXlate(m_addr) ; return CstrLastI(thisCtl->_data(), c) ; }
// Find first/last instance of a test cstr in this string (I denotes case insensitive) int32_t hzString::First (const char* str) const { _hzfunc("hzString::First(char*)") ;
_strItem* thisCtl ; // This string's control area
if (!m_addr) return -1 ;
thisCtl = (_strItem*) _strXlate(m_addr) ; return CstrFirst(thisCtl->_data(), str) ; }
int32_t hzString::FirstI (const char* str) const { _hzfunc("hzString::FirstI(char*)") ;
_strItem* thisCtl ; // This string's control area
if (!m_addr) return -1 ;
thisCtl = (_strItem*) _strXlate(m_addr) ; return CstrFirstI(thisCtl->_data(), str) ; }
int32_t hzString::Last (const char* str) const { _hzfunc("hzString::Last(char*)") ;
_strItem* thisCtl ; // This string's control area
if (!m_addr) return -1 ;
thisCtl = (_strItem*) _strXlate(m_addr) ; return CstrLast(thisCtl->_data(), str) ; }
int32_t hzString::LastI (const char* str) const { _hzfunc("hzString::LastI(char*)") ; _strItem* thisCtl ; // This string's control area
if (!m_addr) return -1 ;
thisCtl = (_strItem*) _strXlate(m_addr) ; return CstrLastI(thisCtl->_data(), str) ; }
// Find first/last instance of a test string in this string (I denotes case insensitive) int32_t hzString::First (const hzString& S) const { _hzfunc("hzString::First(hzStr)") ;
_strItem* thisCtl ; // This string's control area _strItem* suppCtl ; // This string's control area const char* test = 0 ; // Supplied string value
if (!m_addr) return -1 ;
thisCtl = (_strItem*) _strXlate(m_addr) ; if (S.m_addr) { suppCtl = (_strItem*) _strXlate(S.m_addr) ; test = suppCtl->_data() ; }
return CstrFirst(thisCtl->_data(), test) ; }
int32_t hzString::FirstI (const hzString& S) const { _hzfunc("hzString::FirstiI(hzStr)") ;
_strItem* thisCtl ; // This string's control area
if (!m_addr) return -1 ;
thisCtl = (_strItem*) _strXlate(m_addr) ; return CstrFirstI(thisCtl->_data(), *S) ; }
int32_t hzString::Last (const hzString& S) const { _hzfunc("hzString::Last(hzStr)") ;
_strItem* thisCtl ; // This string's control area
if (!m_addr) return -1 ;
thisCtl = (_strItem*) _strXlate(m_addr) ; return CstrLast(thisCtl->_data(), *S) ; }
int32_t hzString::LastI (const hzString& S) const { _hzfunc("hzString::LastI(hzStr)") ;
_strItem* thisCtl ; // This string's control area
if (!m_addr) return -1 ;
thisCtl = (_strItem*) _strXlate(m_addr) ; return CstrLastI(thisCtl->_data(), *S) ; }
bool hzString::Contains (const char c) const { // Test if the string contains one or more instances of a test char // // Arguments: 1) c The char to test for // // Returns: True If the test char exists within the string // False otherwise
_hzfunc("hzString::Contains(char)") ;
if (!m_addr) return false ;
_strItem* thisCtl ; // This string's control area const char* i ; // Pointer into string data uint32_t len ; // Lenth of this string
thisCtl = (_strItem*) _strXlate(m_addr) ; i = (char*) thisCtl->_data() ; len = Length() ;
for (; len ; len--) { if (i[len] == c) return true ; } return false ; }
bool hzString::Contains (const char* cpNeedle) const { // Test if the string contains a char string (case sensitive) // // Arguments: 1) cpStr The char sequence to test for // // Returns: True If this string contains the supplied test sequence // False Otherwise
_hzfunc("hzString::Contains(char*)") ;
_strItem* thisCtl ; // This string's control area
if (!cpNeedle || !cpNeedle[0]) return true ; if (!m_addr) return false ;
thisCtl = (_strItem*) _strXlate(m_addr) ; return strstr((char*) thisCtl->_data(), cpNeedle) ? true : false ; }
bool hzString::ContainsI (const char* cpNeedle) const { // Test if the string contains a char string (case insensitive) // // Arguments: 1) cpStr The char sequence to test for // // Returns: True If this string is lexically eqivelent to the supplied cstr // False Otherwise
_hzfunc("hzString::ContainsI(char*)") ;
_strItem* thisCtl ; // This string's control area
if (!cpNeedle || !cpNeedle[0]) return true ; if (!m_addr) return false ;
thisCtl = (_strItem*) _strXlate(m_addr) ; return CstrContainsI((char*) thisCtl->_data(), cpNeedle) ? true : false ; }
bool hzString::Equiv (const char* cpStr) const { // Test if this string is equal to the operand char string (on a case insensitive basis) // // Arguments: 1) cpStr The char sequence to test for // // Returns: True If this string is lexically eqivelent to the supplied cstr // False Otherwise
_hzfunc("hzString::Equiv") ;
_strItem* thisCtl ; // This string's control area
if (!m_addr) return false ;
thisCtl = (_strItem*) _strXlate(m_addr) ; return CstrCompareI(cpStr, (char*) thisCtl->_data()) == 0 ? true : false ; }
/* ** Assignment operators */
hzString& hzString::operator= (const hzString& op) { // Purpose: Set this string equal to the operand. // // If the internal address of this string instance is already equal to that of the operand, this function does nothing. Otherwise this string is cleared, the copy count of the // operand string is incremented, then the internal address is set to that of the operand. // // Argument: op Reference to the operand hzString instance. // // Returns: Reference to this string instance
_hzfunc("hzString::operator=(hzString&)") ;
_strItem* suppCtl ; // Supplied string's control area
if (!this) hzerr(E_CORRUPT, "No instance") ;
// It the this string's internal pointer and that of the operand already point to the same internal structure in memory, do nothing if (m_addr == op.m_addr) return *this ;
// If this string has a value, clear it. if (m_addr) Clear() ;
// If the operand has content, increment the copy count and make this string address equal to that of the supplied. if (op.m_addr) { suppCtl = (_strItem*) _strXlate(op.m_addr) ;
if (suppCtl->m_copy < 50) { if (_hzGlobal_MT) //__sync_add_and_fetch((uint32_t*)&(suppCtl->m_copy), 1) ; suppCtl->m_copy++ ; else suppCtl->m_copy++ ; } m_addr = op.m_addr ; }
return *this ; }
hzString& hzString::operator= (const hzChain& C) { // Set string equal to content of the supplied chain. Note this function will fail (with the string empty) if the chain contents // are too large // // Arguments: 1) C The operand chain // Returns: Reference to this string instance
_hzfunc("hzString::op=(hzChain&)") ;
chIter ci ; // Chain iterator _strItem* destCtl ; // New string space char* i ; // New string space populator
if (m_addr) Clear() ;
if (C.Size()) { if (C.Size() >= HZSTRING_MAXLEN) operator=(_hzString_TooLong) ; else { // Create new internal structure m_addr = _strAlloc(C.Size()) ;
if (!m_addr) hzexit(E_MEMORY, "Could not allocate internal buffer") ;
destCtl = (_strItem*) _strXlate(m_addr) ; destCtl->_setSize(C.Size()) ; i = destCtl->_data() ; for (ci = C ; !ci.eof() ; ci++) *i++ = *ci ; *i = 0 ;
destCtl->m_copy = 1 ; } }
return *this ; }
hzString& hzString::operator= (const char* cpStr) { // Set the value of this hzString instance to the operand null terminated char string. Note that it is possible for the operand to // have come from the string itself. For this reason we do not clear the existing string until we have allocated and populated the // new buffer. // // Arguments: 1) cpStr The operand null terminated char string. // Returns: Reference to this string instance
_hzfunc("hzString::op=(const char*)") ;
_strItem* destCtl ; // New string space uint32_t nLen ; // Required length of new string
Clear() ; if (!cpStr || !cpStr[0]) return *this ;
nLen = strlen(cpStr) ; if (!nLen || nLen > HZSTRING_MAXLEN) { operator=(_hzString_TooLong) ; return *this ; }
m_addr = _strAlloc(nLen) ; if (!m_addr) hzexit(E_MEMORY, "Cannot allocate string of %d bytes for value [%s]", nLen, cpStr) ;
destCtl = (_strItem*) _strXlate(m_addr) ; destCtl->_setSize(nLen) ; strcpy(destCtl->_data(), cpStr) ; destCtl->m_copy = 1 ;
return *this ; }
hzString& hzString::operator= (const uchar* cpStr) { // Set the value of this hzString instance to the operand null terminated char string. Note that it is possible for the operand to // have come from the string itself. For this reason we do not clear the existing string until we have allocated and populated the // new buffer. // // Arguments: 1) cpStr The operand null terminated char string. // Returns: Reference to this string instance
_hzfunc("hzString::op=(const char*)") ;
_strItem* destCtl ; // New string space uint32_t nLen ; // Required length of new string
Clear() ; if (!cpStr || !cpStr[0]) return *this ;
nLen = strlen((char*) cpStr) ; if (!nLen || nLen > HZSTRING_MAXLEN) { operator=(_hzString_TooLong) ; return *this ; }
m_addr = _strAlloc(nLen) ; if (!m_addr) hzexit(E_MEMORY, "Cannot allocate string of %d bytes for value [%s]", nLen, cpStr) ;
destCtl = (_strItem*) _strXlate(m_addr) ; destCtl->_setSize(nLen) ; strcpy((char*) destCtl->_data(), (char*) cpStr) ; destCtl->m_copy = 1 ;
return *this ; }
// FnGrp: operator+ // Category: Text Processing // // Add two strings forming a new string. Neither of the two input strings are effected in the process. // // Variants: 1) Add two strings // 2) Add a string and a cstr // 3) Add a cstr and a string // // Note there is no char* plus char* operator. // // Arguments: 1) a The first string (as char* or hzString) // 2) b The second string (as char* or hzString) // // Returns: Instance of new hzString by value
hzString operator+ (const hzString a, const hzString b) { _hzfunc("friend hzString operator+(1)") ;
hzString r ; // Return string
r = a ; r += b ; return r ; }
hzString operator+ (const hzString a, const char* cpStr) { _hzfunc("friend hzString operator+(2)") ;
hzString r ; // Return string
r = a ; r += cpStr ; return r ; }
hzString operator+ (const char* cpStr, const hzString S) { _hzfunc("friend hzString operator+(3)") ;
hzString r ; // Return string
r = cpStr ; r += S ; return r ; }
/* ** Appending operators */
hzString& hzString::operator+= (const hzString& op) { // Append the operand string to the contents of this. // // Arguments: 1) op Operand string // Returns: Reference to this string in all cases
_hzfunc("hzString::op+=(hzString&)") ;
_strItem* thisCtl = 0 ; // This string control area _strItem* suppCtl ; // Supplied string control area _strItem* destCtl ; // Result string space uint32_t destAddr ; // New string address if required uint32_t crlen ; // Len of this string uint32_t nulen ; // Len of combined string
// If operand is empty do nothing if (!op.m_addr) return *this ;
suppCtl = (_strItem*) _strXlate(op.m_addr) ;
crlen = 0 ; if (m_addr) { thisCtl = (_strItem*) _strXlate(m_addr) ; crlen = thisCtl->_getSize() ; }
// Calculate required length nulen = crlen + suppCtl->_getSize() ;
// Allocate and populate a new buffer destAddr = _strAlloc(nulen) ; destCtl = (_strItem*) _strXlate(destAddr) ; destCtl->_setSize(nulen) ;
if (crlen) memcpy(destCtl->_data(), thisCtl->_data(), crlen) ;
memcpy(destCtl->_data() + crlen, suppCtl->_data(), suppCtl->_getSize() + 1) ; destCtl->m_copy = 1 ;
// Tidy up if (thisCtl && thisCtl->m_copy && thisCtl->m_copy < 50) { if (!_hzGlobal_MT) { thisCtl->m_copy-- ; if (!thisCtl->m_copy) _strFree(m_addr, thisCtl->_getSize()) ; } else { //if (__sync_add_and_fetch(&(thisCtl->m_copy), -1) == 0) thisCtl->m_copy-- ; if (!thisCtl->m_copy) _strFree(m_addr, thisCtl->_getSize()) ; } }
m_addr = destAddr ; return *this ; }
hzString& hzString::operator+= (const char* cpStr) { // Append the operand char string to the contents of this. // // Arguments: 1) op Operand string // Returns: Reference to this string
_hzfunc("hzString::op+=(const char*)") ;
_strItem* thisCtl = 0 ; // This string's control area _strItem* destCtl ; // Result string space uint32_t destAddr ; // New string address if required uint32_t strLen ; // Len of operand string uint32_t oldLen = 0 ; // Len of this string
// If operand is empty do nothing if (!cpStr || !cpStr[0]) return *this ;
// If this string is empty, make it equal to the supplied string if (!m_addr) { operator=(cpStr) ; return *this ; }
thisCtl = (_strItem*) _strXlate(m_addr) ; oldLen = thisCtl->_getSize() ; strLen = strlen(cpStr) ;
// Allocate and populate a new buffer destAddr = _strAlloc(oldLen + strLen) ; destCtl = (_strItem*) _strXlate(destAddr) ; destCtl->_setSize(oldLen + strLen) ;
if (oldLen) memcpy(destCtl->_data(), thisCtl->_data(), oldLen) ;
memcpy(destCtl->_data() + oldLen, cpStr, strLen) ; destCtl->m_copy = 1 ;
// Tidy up if (thisCtl && thisCtl->m_copy && thisCtl->m_copy < 50) { if (!_hzGlobal_MT) thisCtl->m_copy-- ; else if (__sync_add_and_fetch(&(thisCtl->m_copy), -1) == 0)
if (!thisCtl->m_copy) _strFree(m_addr, thisCtl->_getSize()) ; }
m_addr = destAddr ; return *this ; }
int32_t StringCompare (const hzString& A, const hzString& B) { // Category: Text Processing // // Compare two hzString instances, case sensitive. // // Arguments: 1) A First test string // 2) B Second test string // // Returns: <0 If A is lexically less than B // >0 If A is lexically more than B // 0 If A abs B are equal.
_hzfunc(__func__) ;
const char* t = *A ; // Pointer to string A value const char* s = *B ; // Pointer to string B value
if (!t) return (!s || !s[0]) ? 0 : -*s ; if (!s || !s[0]) return *t ;
for (; *t && *s && *t == *s ; t++, s++) ; return *t - *s ; }
int32_t StringCompareI (const hzString& A, const hzString& B) { // Category: Text Processing // // Compare two hzString instances, ignore case. // // Arguments: 1) A First test string // 2) B Second test string // // Returns: <0 If A is lexically less than B // >0 If A is lexically more than B // 0 If A abs B are eqivelent.
_hzfunc(__func__) ;
const char* t = *A ; // Pointer to string A value const char* s = *B ; // Pointer to string B value
if (!t) return (!s || !s[0]) ? 0 : -_tolower(*s) ; if (!s || !s[0]) return _tolower(*t) ;
for (; *t && *s && _tolower(*t) == _tolower(*s) ; t++, s++) ; return _tolower(*t) - _tolower(*s) ; }
int32_t StringCompareW (const hzString& A, const hzString& B) { // Category: Text Processing // // Compares two hzString instances but ignores whitespace // // Arguments: 1) A First test string // 2) B Second test string // // Returns: <0 If A is lexically less than B // >0 If A is lexically more than B // 0 If A abs B are eqivelent.
_hzfunc(__func__) ;
return CstrCompareW(*A, *B) ; }
int32_t StringCompareF (const hzString& a, const hzString& b) { // Category: Text Processing // // Fast String Compare. // // Although normally one would expect a string compare function to correctly determine if one string was greater or less than another, this isn't // nessesary in sets and maps if one is only concerned with lookup and not seeking to export the sets or maps in lexical order. String comparison // can be speed up considerably by treating string values as arrays of 64 bit integers, rather than arrays of bytes. // // Arguments: 1) a The 1st string // 2) b The 2nd string // // Returns: +1 If a > b // -1 If a < b // 0 If a and b are equal.
_hzfunc(__func__) ;
return a.CompareF(b) ; }