// // File: hzIdxCh.cpp // // Legal Notice: This file is part of the HadronZoo C++ Class Library. Copyright 2025 HadronZoo Project (http://www.hadronzoo.com) // // The HadronZoo C++ Class Library is free software: You can redistribute it, and/or modify it under the terms of the GNU Lesser General Public License, as published by the Free // Software Foundation, either version 3 of the License, or any later version. // // The HadronZoo C++ Class Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR // A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public License along with the HadronZoo C++ Class Library. If not, see http://www.gnu.org/licenses. //
#include <fstream> #include <iostream>
#include <stdarg.h>
#include "hzChars.h" #include "hzTextproc.h" #include "hzIpaddr.h" #include "hzTmplSet.h" #include "hzProcess.h"
using namespace std ;
/* ** Definitions */
class hzEChain { // Category: Data // // SUPPORT class for _cache_blk // // hzEChain (Entity or Element chain), is specifically designed to operate as element container in indexed chain implementations, and is not intended for general use. hzEChain // supports rudimentary data operations as follows:- // // INSERT Insert the supplied new element content, into the element chain at the supplied address // UPDATE Replace the element content found at the supplied address, with the supplied new element content // DELETE Delete the element found at the supplied address, from the chain // FETCH Fetch the element found at the supplied address // // This class treats elements as binary datum from which nothing but datum size can be discerned. Element constitution is defined by the calling process, which must be able to // compare elements, so that the element chain can be kept in value order. To this end, the calling process must locate the target element ahead of an INSERT, UPDATE or DELETE // operation. // // Element addresses are expressed as a pair of numbers: the chain block address; and the element position within the block. The target block is found first, by binary chop on // the chain of blocks, then the element within the block is found, usually by iteration. // // Indexed chain implimentations are described in article 4.3 ISAM and Indexed Chain, in the HadronZoo Library Manual.
// Prevent copy construction hzEChain (hzEChain& op) {}
public: struct _ech_bloc { uint32_t m_Buckets[8] ; // Up to 8 buckets uint32_t m_Aux[8] ; // Up to 8 buckets uint32_t m_Next ; // Superior adjacent uint32_t m_Prev ; // Inferior adjacent uint32_t m_Addr ; // Address of block uint16_t m_nCapacity ; // Capacity of allocated buckets uint16_t m_nUsed ; // Total bytes used uint16_t m_First ; // First element in block uint16_t m_Flags ; } ;
hzArray<void*> m_Blox ; // Chain of ... hzArray<_ech_bloc> m_Blocks ; // Chain of blocks, in order of creation hzSet<uint32_t> m_BlkIdx ; // Ids of blocks in value order
hzEcode Insert (const hzChain& Z, uint32_t blkNo, uint32_t blkPosn) ; hzEcode Update (uint32_t blkNo, uint32_t blkPosn, const hzChain& Z) ; hzEcode Delete (uint32_t blkNo, uint32_t blkPosn) ; hzEcode Fetch (hzChain& Z, uint32_t blkNo, uint32_t blkPosn) ;
void Clear (void) ;
// Constructors/Destructors hzEChain (void) ; ~hzEChain (void) ; } ;
#define EBUKSIZE 252 // Overflow bucket size #define EBLKSIZE 1004 // Standard block size #define EBLKMAX 1760 // 1 full block plus up to 3 overflow buckets
struct _ebuk { // Category: Data // // Small bucket for chain block overflows
uint32_t m_nNext ; // Next _ebuk or _eblk address. If top bit set, next block is an _ebuk, otherwise it is an _eblk. uchar m_Data[252] ; // Up to 252 bytes of data } ;
struct _eblk { // Category: Data // // Chain block
uint32_t m_nAddr ; // Actual address in the main array of blocks uint32_t m_nPosn ; // Position within value-ordered chain uint32_t m_nNext ; // Actual address of next block in chain. This must have a position equal to this block's postition + 1 uint32_t m_nPrev ; // Actual address of prev block in chain. This must have a position equal to this block's postition - 1 uint16_t m_nUsage ; // bytes used uint16_t m_nFirst ; // Offset to first element char m_Data[EBLKSIZE] ; // data area
_eblk (void) { // Constructor
m_nAddr = m_nPosn = m_nNext = m_nPrev = 0 ; m_nUsage = 0 ; m_nFirst = 0xffff ; memset(m_Data, 0, EBLKSIZE) ; } } ;
/* ** Memory management regime for chains */
static hzLockRWD s_chain_mutex("hzEChain mutex") ; // Protects the memory allocation
//static _eblk* s_eblk_free ; // Free list pointer
/* ** Element Chain Functions */
hzEcode hzEChain::Insert (const hzChain& Z, uint32_t blkNo, uint32_t blkPosn) { // Insert a new element (the contents of the supplied hzChain), at the supplied position within the supplied block. // // This action will expand the block which may result in block spliting. // // Arguments: 1) Z New element // 2) blkNo The chain block id // 3) blkPosn Position within block // // Returns: E_NODATA If the new element is empty // E_NOTFOUND If the block is more than one ahead of the total // E_RANGE If the element does not exist within the block // E_CORRUPT If no elements begin within the block // E_OK Operation successful
if (blkNo > m_Blocks.Count()) return E_NOTFOUND ;
return E_OK ; }
hzEcode hzEChain::Update (uint32_t blkNo, uint32_t blkPosn, const hzChain& Z) { // Replace the element at the supplied position within the supplied block, with the contents of the supplied hzChain. // // This action will expand or contract the block if the replacement element is larger or smaller than the original. An expansion may result in block spliting. A contraction in // the merger of two adjacent blocks. // // Arguments: 1) blkNo The chain block id // 2) blkPosn Position within block // 3) Z Replacement element // // Returns: E_NOTFOUND If the block does not exist // E_RANGE If the element does not exist within the block // E_CORRUPT If no elements begin within the block // E_OK Operation successful
return E_OK ; }
hzEcode hzEChain::Delete (uint32_t blkNo, uint32_t blkPosn) { // Delete the element at the supplied position within the supplied block. This may result in block deletion and in the merger of two adjacent blocks. // // Arguments: 1) Z Element recepticle // 2) blkNo The chain block id // 3) blkPosn Position within block // // Returns: E_NOTFOUND If the block does not exist // E_RANGE If the element does not exist within the block // E_CORRUPT If no elements begin within the block // E_OK Operation successful
return E_OK ; }
hzEcode hzEChain::Fetch (hzChain& Z, uint32_t blkNo, uint32_t blkPosn) { // Fetch into the supplied chain, the element at the supplied position within the supplied block // // Arguments: 1) Z Element recepticle // 2) blkNo The chain block id // 3) blkPosn Position within block // // Returns: E_NOTFOUND If the block does not exist // E_RANGE If the element does not exist within the block // E_CORRUPT If no elements begin within the block // E_OK Operation successful
return E_OK ; }
#if 0 _eblk* _eblk_alloc (void) { // Allocate a chain block. Either draw from a list of free blocks or create a new block // // Arguments: 1) id The chain id // // Returns: Pointer to a usable z-block
_hzfunc("_eblk_alloc") ;
_eblk* zp = 0 ; // Pointer to a real block (8 bytes)
s_chain_mutex.LockWrite() ;
if (s_eblk_free) { // Set allocation address to the freelist, set the freelist to the next in the list. zp = s_eblk_free ; s_eblk_free = zp->next ;
_hzGlobal_Memstats.m_numChainBF-- ; } else { // No free blocks so allocate a new one zp = new _eblk() ;
if (zp) _hzGlobal_Memstats.m_numChainBlks++ ; }
// Return the address s_chain_mutex.Unlock() ;
if (!zp) Fatal("No chain block allocated\n") ;
// Clear the free block's values to ready it for use zp->clear() ; //zp->m_Id = id ;
return zp ; }
/* ** Section 1B: hzEChain public functions */
void hzEChain::Clear (void) { // Clears all content held by the hzEChain. // // Arguments: None // Returns: None
_hzfunc("hzEChain::Clear") ; }
hzEChain::hzEChain (void) { // Construct an empty hzEChain instance. Increment the global count of currently allocated hzEChain instances for memory use reporting purposes.
_hzGlobal_Memstats.m_numChain++ ; mx = 0 ; }
extern uint32_t xnumlen32 (uint32_t v) ; extern uint32_t xnumlen64 (uint64_t v) ;
extern uint32_t _numlen32 (int32_t v) ; extern uint32_t _numlen32 (uint32_t v) ;
extern uint32_t _numlen64 (int64_t v) ; extern uint32_t _numlen64 (uint64_t v) ;
// End of hzEChain Append functions
hzEChain& hzEChain::operator+= (const hzString& s) { // Append chain with supplied string // // Arguments: 1) s The supplied string // // Returns: Reference to this chain
_hzfunc("hzEChain::operator+=(hzString&)") ;
//const char* i ; //uint32_t len ;
if (!this) hzexit(E_CORRUPT, "No instance") ;
//len = s.Length() ; //i = *s ;
if (!s) return *this ;
Append(*s, s.Length()) ; return *this ; }
hzEChain& hzEChain::operator+= (const char* s) { // Append the current chain with the supplied char string operand. // // Arguments: 1) s The null terminated string to add to the chain // // Returns: Reference to this chain
_hzfunc("hzEChain::operator+=(char*)") ;
_eblk* curBlk ; // Block pointer _eblk* newBlk ; // Working block pointer const char* i ; // Supplied string iterator
if (!s || !s[0]) return *this ;
// If nothing in chain, create the first block if (!mx) mx = new _chain() ; if (!mx->m_Begin) mx->m_Begin = mx->m_End = _eblk_alloc() ;
curBlk = (_eblk*) mx->m_End ; if (!curBlk) Fatal("Chain %p has no end block\n", this) ;
for (i = s ; *i ; i++) { if (curBlk->xize == EBLKSIZE) { // Out of space - make new block
newBlk = _eblk_alloc() ; if (!newBlk) Fatal("No allocation (case 2)\n") ;
newBlk->prev = (_eblk*) mx->m_End ; curBlk->next = newBlk ; mx->m_End = newBlk ;
curBlk = newBlk ; }
curBlk->m_Data[curBlk->xize] = *i ; curBlk->xize++ ; mx->m_nSize++ ; }
return *this ; }
hzEChain& hzEChain::operator<< (const hzEChain& C) { // Append chain with supplied chain // // Arguments: 1) C The chain to add to this chain // Returns: Reference to this chain
_hzfunc("hzEChain::operator<<(hzEChain&)") ;
return operator+=(C) ; }
hzEChain& hzEChain::operator<< (const hzEmaddr& e) { // Append chain with supplied Email address // // Arguments: 1) C The chain to add to this chain // Returns: Reference to this chain
_hzfunc("hzEChain::operator<<(hzEmaddr&)") ;
return operator+=(*e) ; }
hzEChain& hzEChain::operator<< (const hzIpaddr& i) { // Append chain with supplied IP address // // Arguments: 1) i The IP address to add to this chain // Returns: Reference to this chain
_hzfunc("hzEChain::operator<<(hzIpaddr&)") ;
return operator+=(i.Txt()) ; }
/* ** Section 2: hzEChain Iterator functions */
char hzEChain::Iter::operator* (void) const { // Return the value of the char currently pointed to by the iterator // // Arguments: None // // Returns: >0 The current character of the chain iterator // 0 If the chain iterator is at the end of the chan or the chain is empty
_hzfunc("hzEChain::Iter::operator*") ;
_eblk* zp ; // Block pointer
if (!m_block) return m_cDefault ;
zp = (_eblk*) m_block ; if (!zp) Fatal("Cannot access block %d\n", m_block) ;
if (!zp->next && m_nOset == zp->xize) return m_cDefault ;
if (m_nOset >= zp->xize) Fatal("Have block %d next %d oset %d size %d\n", zp->next, m_block, m_nOset, zp->xize) ;
return zp->m_Data[m_nOset] ; }
bool hzEChain::Iter::eof (void) const { // Rteurns true if the iterator is at EOF. Returns false otherwise. // // Arguments: None // // Returns: True If the iterator is at EOF // False If the iterator is still valid
_hzfunc("hzEChain::Iter::eof") ;
_eblk* zp ; // Block pointer
if (!m_block) return true ;
zp = (_eblk*) m_block ; if (!zp) Fatal("Cannot access block %d\n", m_block) ;
if (zp->next) return false ; return m_nOset < zp->xize ? false : true ; }
bool hzEChain::Iter::Equal (const char c) const { // Determines if the supplied char (arg 1) is the same as the current char in the chain iterator. // // - The comparison is case-sensitive. // - Returns true/false // // Arguments: 1) c The test character // // Returns: True If the supplied char is equal to that of the current iterator char // False Otherwise
_hzfunc("hzEChain::Iter::Equal(char)") ;
_eblk* zp ; // Block pointer
if (!m_block) return false ;
zp = (_eblk*) m_block ; if (!zp) Fatal("Cannot access block %d\n", m_block) ;
return zp->m_Data[m_nOset] == c ? true : false ; }
bool hzEChain::Iter::Equal (const char* s) const { // Determine if the supplied char sequence (arg 1) matches that at the current point in the chain iterator, return true if it does, false // otherwise. The comparison is case-sensitive and the function does not alter (advance or retard) the iterator. // // Arguments: 1) s The test char string // // Returns: True If the supplied cstr matches that found at the current iterator position // False Otherwise
_hzfunc("hzEChain::Iter::Equal(char*)") ; _eblk* zp ; // Chain block pointer const char* i = s ; // Test string iterator uint32_t nOset ; // Offset within block
// Dismiss the case where the iterator is null if (!m_block) return s && s[0] ? false : true ;
zp = (_eblk*) m_block ; if (!zp) Fatal("Cannot access block %d\n", m_block) ;
// Dismiss the case where the operand is null if (!i) return zp->m_Data[nOset] ? false : true ;
// Test for equality for (nOset = m_nOset ; *i ; i++) { if (!zp) return false ;
if (*i != zp->m_Data[nOset]) return false ;
nOset++ ; if (nOset >= zp->xize) { nOset = 0 ; zp = zp->next ; } }
return true ; }
bool hzEChain::Iter::Equiv (const char* s) const { // Determines if the supplied char sequence (arg 1) is found at the current char in the chain iterator. Note this function does not advance // the iterator. The comparison is case-insensitive. // // Arguments: 1) s The test char string // // Returns: True If the supplied cstr matches that found at the current iterator position // False Otherwise
_hzfunc("hzEChain::Iter::Equiv") ; _eblk* zp ; // Chain block pointer const char* i = s ; // Test string iterator uint32_t nOset ; // Offset within block
// Dismiss the case where the iterator is null if (!m_block) return s && s[0] ? false : true ;
zp = (_eblk*) m_block ; if (!zp) Fatal("Cannot access block %d\n", m_block) ;
// Dismiss the case where the operand is null if (!i) return zp->m_Data[nOset] ? false : true ;
for (nOset = m_nOset ; *i ; i++) { if (!zp) return false ;
if (_tolower(*i) != _tolower(zp->m_Data[nOset])) return false ;
nOset++ ; if (nOset >= zp->xize) { nOset = 0 ; zp = zp->next ; } }
return true ; }
uint32_t hzEChain::Iter::Advance (uint32_t nInc) { // Increments the current chain iterator by the requested length. Will set the iterator to the end of the chain if the requested // increment is too great. // // Arguments: 1) nInc Number of bytes to advance // // Returns: Number of bytes advanced
_hzfunc("hzEChain::Iter::Advance") ;
_eblk* zp ; // Chain block pointer uint32_t nCan ; // Advance possibible within current block uint32_t nAdv = 0 ; // Number of chars actully advanced
if (nInc < 0) return 0 ;
zp = (_eblk*) m_block ; if (!zp) return 0 ;
for (; nAdv < nInc ;) { nCan = zp->xize - m_nOset ; if ((nCan + nAdv) > nInc) nCan = nInc - nAdv ;
m_nOset += nCan ; nAdv += nCan ;
if (m_nOset == zp->xize) { if (!zp->next) break ; m_nOset = 0 ; m_block = zp = zp->next ; } }
return nAdv ; }
hzEChain::Iter& hzEChain::Iter::operator++ (void) { // Increments the current chain iterator if it can be incremented ie is not at the end of the chain. Note that the void argument means this // is the 'post evaluation version' called when the code is 'iter++' rather than '++iter'. // // Arguments: None // Returns: Reference to this chain iterator
_hzfunc("hzEChain::Iter::operator++") ;
_eblk* zp ; // Chain block pointer
if (m_block) { zp = (_eblk*) m_block ;
if (zp->m_Data[m_nOset] == CHAR_NL) { m_nCol = 0 ; m_nLine++ ; }
if (zp->m_Data[m_nOset] == CHAR_TAB) m_nCol += (4-(m_nCol%4)) ; else m_nCol++ ;
if (m_nOset < zp->xize) m_nOset++ ;
if (m_nOset >= zp->xize) { if (zp->next) { m_block = zp->next ; m_nOset = 0 ; } } }
return *this ; }
hzEChain::Iter& hzEChain::Iter::operator++ (int) { // Increments the current chain iterator if it can be incremented ie is not at the end of the chain. Note that the void argument means this // is the 'pre evaluation version' called when the code is '++iter' rather than 'iter++'. // // Arguments: Nominal int argument. No actual argument. // Returns: Reference to this chain iterator
_hzfunc("hzEChain::Iter::operator++") ;
_eblk* zp ; // Chain block pointer
if (m_block) { zp = (_eblk*) m_block ;
if (zp->m_Data[m_nOset] == CHAR_NL) { m_nCol = 0 ; m_nLine++ ; }
if (zp->m_Data[m_nOset] == CHAR_TAB) m_nCol += (4-(m_nCol%4)) ; else m_nCol++ ;
if (m_nOset < zp->xize) m_nOset++ ;
if (m_nOset >= zp->xize) { if (zp->next) { m_block = zp->next ; m_nOset = 0 ; } } }
return *this ; }
hzEChain::Iter& hzEChain::Iter::operator-- (void) { // Decrements the current chain iterator if it can be incremented (is not at the end of the chain) // // Arguments: None // Returns: Reference to this chain iterator
_hzfunc("hzEChain::Iter::operator--") ;
_eblk* zp ; // Chain block pointer
if (m_block) { zp = (_eblk*) m_block ;
m_nOset-- ; if (m_nOset < 0) { m_nOset = 0 ; if (zp->prev) { zp = zp->prev ; if (zp) m_nOset = zp->xize - 1 ; } } }
return *this ; }
hzEChain::Iter& hzEChain::Iter::operator-- (int) { // Decrements the current chain iterator if it can be incremented (is not at the end of the chain) // // Arguments: Nominal int argument. No actual argument. // Returns: Reference to this chain iterator
_hzfunc("hzEChain::Iter::operator--(int)") ;
_eblk* zp ; // Chain block pointer
if (m_block) { zp = (_eblk*) m_block ;
m_nOset-- ; if (m_nOset < 0) { m_nOset = 0 ; if (zp->prev) { zp = zp->prev ; if (zp) m_nOset = zp->xize - 1 ; } } }
return *this ; }
hzEChain::Iter& hzEChain::Iter::operator+= (uint32_t nInc) { // Increments the current chain iterator by the requested length. Will set the iterator to the end of the chain if the requested increment is too great. // // Arguments: 1) nInc The number of bytes to increment the chain iterator by // // Returns: Reference to this chain iterator
_hzfunc("hzEChain::Iter::operator+=") ;
_eblk* zp ; // Chain block pointer
zp = (_eblk*) m_block ;
for (; zp && nInc > 0 ;) { if (zp->m_Data[m_nOset] == CHAR_NL) { m_nCol = 0 ; m_nLine++ ; }
if (zp->m_Data[m_nOset] == CHAR_TAB) m_nCol += (4-(m_nCol%4)) ; else m_nCol++ ;
if (m_nOset < zp->xize) m_nOset++ ;
if (m_nOset >= zp->xize) { if (zp->next) { m_nOset = 0 ; m_block = zp = zp->next ; } } nInc-- ; }
return *this ; }
hzEChain::Iter& hzEChain::Iter::operator-= (uint32_t nDec) { // Retards the current chain iterator by the requested length. Will set the iterator to the start of the chain if the requested decrement is too great. // // Arguments: 1) nInc The number of bytes to decrement the chain iterator by // // Returns: Reference to this chain iterator
_hzfunc("hzEChain::Iter::operator-=") ;
_eblk* zp ; // Chain block pointer
zp = (_eblk*) m_block ;
for (; zp && nDec ;) { if (m_nOset >= nDec) { // If we can decrement N places and still be on the same block, then just decrement the offset m_nOset -= nDec ; break ; }
nDec -= m_nOset ;
if (!zp->prev) { m_nOset = 0 ; break ; }
m_block = zp = zp->prev ;
if (zp) m_nOset = (zp->xize - 1) ; else m_nOset = 0 ; }
return *this ; }
char hzEChain::Iter::operator[] (uint32_t nOset) const { // Returns the value of the character pointed to by the iterator plus the supplied offset. The offsets should preferably be small for reasons of efficiency // as the method winds through the requested number of places each time. The envisiaged use is in tokenization or encoding where the percent sign followed // by a percent sign is a percent but a percent sign followed by two hexidecimal characters is an 8-bit ASCII character. It is a convient means of a look // ahead only. // // Arguments: 1) nOset Look ahead offset // // Returns: >0 The character at the relative position // 0 If the current position plus the offset exeeds the chain length.
_hzfunc("hzEChain::Iter::operator[]") ;
hzEChain::Iter ci ; // Chain iterator
ci = *this ; ci += nOset ; return *ci ; }
hzEChain::Iter& hzEChain::Iter::Skipwhite (void) { // Advance interator to next non-whitespace char. Unless the iterator is currently pointing at a whitespace char, it does nothing. // // Arguments: None // Returns: Reference to this iterator instance
_hzfunc("hzEChain::Iter::Skipwhite") ;
_eblk* zp ; // Chain block pointer
zp = (_eblk*) m_block ;
if (zp && m_nOset < zp->xize) { for (; zp && IsWhite(zp->m_Data[m_nOset]) ;) { if (zp->m_Data[m_nOset] == CHAR_NL) { m_nCol = 0 ; m_nLine++ ; }
if (zp->m_Data[m_nOset] == CHAR_TAB) m_nCol += (4-(m_nCol%4)) ; else m_nCol++ ;
m_nOset++ ; if (m_nOset >= zp->xize) { if (zp->next) { m_block = zp->next ; m_nOset = 0 ; zp = zp->next ; } } } }
return *this ; }
uint32_t hzEChain::Iter::Write (void* pBuf, uint32_t maxBytes) { // Write out to the supplied buffer, from the current position, upto maxBytes. Do not increment the iterator. // // Arguments: 1) pBuf The output buffer // 2) maxBytes The maximum number to write (size of buffer) // // Returns: Number of bytes written to the buffer
_hzfunc("hzEChain::Iter::Write") ;
_eblk* zp ; // Working block pointer char* i ; // Input iterator uint32_t nOset ; // Current offset uint32_t nAvail ; // Max number of bytes that can be written from current block uint32_t nWritten = 0 ; // Limiter
if (maxBytes < 0) return -1 ;
zp = (_eblk*) m_block ; if (!zp) return -1 ; nOset = m_nOset ;
i = (char*) pBuf ; for (; nWritten < maxBytes ;) { if (nOset == zp->xize) { // At end of current block, move to next zp = zp->next ; if (!zp) break ; nOset = 0 ; }
nAvail = zp->xize - nOset ;
if ((nWritten + nAvail) > maxBytes) nAvail = maxBytes - nWritten ;
// Add bytes to current block memcpy(i, zp->m_Data + nOset, nAvail) ; nOset += nAvail ; i += nAvail ; nWritten += nAvail ; }
return nWritten ; } #endif