// // File: hzIdset.cpp // // Legal Notice: This file is part of the HadronZoo C++ Class Library. Copyright 2025 HadronZoo Project (http://www.hadronzoo.com) // // The HadronZoo C++ Class Library is free software: You can redistribute it, and/or modify it under the terms of the GNU Lesser General Public License, as published by the Free // Software Foundation, either version 3 of the License, or any later version. // // The HadronZoo C++ Class Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR // A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public License along with the HadronZoo C++ Class Library. If not, see http://www.gnu.org/licenses. //
// The complete set of encodings currently in use are as follows:- // // 0xxxxxxx 1-byte single-increment. Range 1 to 128. // // 100xxxxx 2-byte single-increment. Range 129 to 8,320. // // 101xxxxx 3-byte single-increment. Range 8,321 to 2,105,472. // // 110xxxxx 4-byte single-increment. Range 2,105,473 to 538,976,384. // // 11100001 Single-increment, absoluete value. // // 11100010 Invert Segment. From now to end of segmenmt, numbers are the ids the segment does not have. // // 11100011 Invert Ongoing. From now until further notice, numbers are the ids the segment does not have. // // 11100100 Invert Stop. Stop inverting (can only negate an invert ongoing directive). // // 11100101 Bitmap Variable Size: The next N bytes are a straight bitmap. // // 11100110 Bitmap Full: Full 512 bit segment. This must specify the segment number (3 bytes). Total size of EIU is 68 bytes as there is the control byte and 64 data bytes. // // 11100111 Bit-tree: These have a root index byte, each bit of which indicates an index byte in the second level. Upto 8 index bytes in the second level indicate up to 64 data // bytes in the final level. Each bit in each data byte indicates a number allowing the bitmap to store up to 512 numbers. These are only used for full segments and so // like Bitmap-Full, must specify the segment number.
#include <iostream>
#include "hzChars.h" #include "hzDatabase.h"
using namespace std ;
/* ** Variables */
#define IDSET_SEGSIZE 512 // Range of value segments
static char s_BitArray [256] = { // Array to speed up bit counting per byte
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, // 0 - 15 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, // 16 - 31 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, // 32 - 47 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, // 48 - 63 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, // 64 - 79 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, // 80 - 95 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, // 96 - 111 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, // 112 - 127 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, // 128 - 143 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, // 144 - 159 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, // 160 - 175 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, // 176 - 191 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, // 192 - 207 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, // 208 - 223 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, // 224 - 239 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 // 240 - 255 } ;
/* ** SECTION 1: Bitmap non member functions */
uint32_t CountBits (const void* buf, uint32_t nBytes) { // Category: Bitwise // // Count the total number of bits set in the supplied buffer, taken to be of the supplied size. // // Arguments: 1) buf The const void* buffer // 2) nBytes The number of bytes // // Returns: Number of bits found to be set
const uchar* uber ; // Buffer as a series of chars uint32_t nCount ; // Buffer iterator uint32_t nBits ; // Cumulative bit counter
if (!buf) return -1 ; uber = (uchar*) buf ;
for (nBits = nCount = 0 ; nCount < nBytes ; nCount++) nBits += s_BitArray[uber[nCount]] ;
return nBits ; }
void SetBits (uchar* pBitbuf, uint32_t nOset, bool bValue) { // Category: Bitwise // // Set or clear the bit in the buffer at the given offset // // Arguments: 1) pBitbuf The buffer // 2) nOset The offset (is divided by 8) // 2) bValue The offset (is divided by 8) // // Returns: True If the requested bit number is set within the segment // False If the requested bit number is not set within the segment or it exceeds the segment range (0 - 4095)
_hzfunc("__func__") ;
if (pBitbuf) { if (bValue) { switch (nOset % 8) { case 0: pBitbuf[nOset/8] |= 0x80 ; break ; case 1: pBitbuf[nOset/8] |= 0x40 ; break ; case 2: pBitbuf[nOset/8] |= 0x20 ; break ; case 3: pBitbuf[nOset/8] |= 0x10 ; break ; case 4: pBitbuf[nOset/8] |= 0x08 ; break ; case 5: pBitbuf[nOset/8] |= 0x04 ; break ; case 6: pBitbuf[nOset/8] |= 0x02 ; break ; case 7: pBitbuf[nOset/8] |= 0x01 ; break ; } } else { switch (nOset % 8) { case 0: pBitbuf[nOset/8] &= ~0x80 ; break ; case 1: pBitbuf[nOset/8] &= ~0x40 ; break ; case 2: pBitbuf[nOset/8] &= ~0x20 ; break ; case 3: pBitbuf[nOset/8] &= ~0x10 ; break ; case 4: pBitbuf[nOset/8] &= ~0x08 ; break ; case 5: pBitbuf[nOset/8] &= ~0x04 ; break ; case 6: pBitbuf[nOset/8] &= ~0x02 ; break ; case 7: pBitbuf[nOset/8] &= ~0x01 ; break ; } } } }
bool GetBits (const uchar* pBitbuf, uint32_t nOset) { // Category: Bitwise // // Determine if the bit in the buffer at the given offset is set // // Arguments: 1) pBitbuf The buffer // 2) nOset The offset (is divided by 8) // // Returns: True If the requested bit number is set within the segment // False If the requested bit number is not set within the segment or it exceeds the segment range (0 - 4095)
_hzfunc("__func__") ;
if (!pBitbuf) return false ;
switch (nOset%8) { case 0: return pBitbuf[nOset/8] & 0x80 ? true : false ; case 1: return pBitbuf[nOset/8] & 0x40 ? true : false ; case 2: return pBitbuf[nOset/8] & 0x20 ? true : false ; case 3: return pBitbuf[nOset/8] & 0x10 ? true : false ; case 4: return pBitbuf[nOset/8] & 0x08 ? true : false ; case 5: return pBitbuf[nOset/8] & 0x04 ? true : false ; case 6: return pBitbuf[nOset/8] & 0x02 ? true : false ; case 7: return pBitbuf[nOset/8] & 0x01 ? true : false ; }
return false ; }
/* ** SECTION 2: Idset segments */
class _idset_seg { // The _idset_seg (bitmap segment) class comprises a count, a segment number and 32 bytes of storage. Currently this storage space is used as a bitmap to hold upto 512 values.
uint64_t m_work[8] ; // 512-bit working buffer uint32_t m_count ; // Number of bits set uint32_t m_segno ; // Segment number
// Non-implimented copy constructor _idset_seg (const _idset_seg& op) ;
// Non-implimented Assignment operator _idset_seg& operator= (const _idset_seg& op) ;
public: _idset_seg (void) { m_count = m_segno = 0 ; } ~_idset_seg (void) {}
// Metadata uint32_t Count (void) const { return m_count ; } // Return population count uint32_t Segno (void) const { return m_segno ; } // Return segment number //bool Perm (void) const { return m_count & 0x8000 ? false : true ; }
// Clear all bits in the segment void Clear (void) { m_count = m_segno = 0 ; memset(m_work, 0, 64) ; }
// Add an element to the segment uint32_t Insert (const hzSet<uint32_t>& idset, uint32_t& nPosn) ; hzEcode Insert (uint32_t nElement) ;
// Remove an element from the segment hzEcode Delete (uint32_t nElement) ; uint32_t Delete (const hzSet<uint32_t>& idset, uint32_t& nPosn) ;
// Where a segment has a single value bool IsSet (uint32_t oset) const ;
// Fetch a number of elements from the segment int32_t _fetch (hzVect<uint32_t>& Result, uint32_t nBase, uint32_t nStart, uint32_t nReq) const ;
// Boolean operators _idset_seg& operator|= (const _idset_seg& ss) ; _idset_seg& operator&= (const _idset_seg& ss) ; } ;
hzEcode _idset_seg::Insert (uint32_t nId) { uchar* ptr ; // Cast to m_Work uint32_t nRange ; // Value range
nRange = nId/IDSET_SEGSIZE ; if (nRange != m_segno) return E_RANGE ;
ptr = (uchar*) m_work ; SetBits(ptr, nId % IDSET_SEGSIZE, true) ; return E_OK ; }
hzEcode _idset_seg::Delete (uint32_t nId) { uchar* ptr ; // Cast to m_Work uint32_t nRange ; // Value range
nRange = nId/IDSET_SEGSIZE ; if (nRange != m_segno) return E_RANGE ;
ptr = (uchar*) m_work ; SetBits(ptr, nId % IDSET_SEGSIZE, false) ; return E_OK ; }
bool _idset_seg::IsSet (uint32_t nId) const { uchar* ptr ; // Cast to m_Work uint32_t nRange ; // Value range
nRange = nId/IDSET_SEGSIZE ; if (nRange != m_segno) return false ; ptr = (uchar*) m_work ; return GetBits(ptr, nId % IDSET_SEGSIZE) ; }
_idset_seg& _idset_seg::operator|= (const _idset_seg& ss) { m_work[0] |= ss.m_work[0] ; m_work[1] |= ss.m_work[1] ; m_work[2] |= ss.m_work[2] ; m_work[3] |= ss.m_work[3] ; m_work[4] |= ss.m_work[4] ; m_work[5] |= ss.m_work[5] ; m_work[6] |= ss.m_work[6] ; m_work[7] |= ss.m_work[7] ; return *this ; }
_idset_seg& _idset_seg::operator&= (const _idset_seg& ss) { m_work[0] &= ss.m_work[0] ; m_work[1] &= ss.m_work[1] ; m_work[2] &= ss.m_work[2] ; m_work[3] &= ss.m_work[3] ; m_work[4] &= ss.m_work[4] ; m_work[5] &= ss.m_work[5] ; m_work[6] &= ss.m_work[6] ; m_work[7] &= ss.m_work[7] ; return *this ; }
/* ** SECTION 3: _idsNode member functions */
struct _idsNode { // Idset segment node: This stores however many complete segments will fit within a 4K disk block.
hzXbuf m_Data ; // Up to 4K data uint32_t m_Start ; // The lowest value segment in the node uint32_t m_End ; // The higest value segment in the node uint32_t m_Top ; // The higest id in the node uint32_t m_nPop ; // Number of ids in the node
// Set funcions int32_t Insert (uint32_t) ; int32_t Delete (uint32_t) ;
// Get funcions bool IsSet (uint32_t nIndex) const ;
// Boolean operators _idsNode& operator|= (const _idsNode& ss) ; _idsNode& operator&= (const _idsNode& ss) ;
// Node metadata uint32_t Size (void) const { return m_Data.Size() ; } uint32_t Count (void) const { return m_nPop ; }
// Import/Export hzEcode Show (hzChain& C) const ;
// Constructor/Destructor _idsNode (void) { m_Start = m_End = m_Top = m_nPop = 0 ; } ~_idsNode (void) { m_Data.Clear() ; } } ;
void _writeSingleEUI (hzXbuf& X, uint32_t nIncr) { // Write a single increment EUI. This action is usually conducted in ascending sequence. // // Arguments: 1) X The data buffer // 2) nSofar The last number reached // 3) nId The new id // // Returns: Number of ids found
uint32_t byte ; // Byte to write
if (nIncr < 0x80) X.AddByte(nIncr) ; else if (nIncr < 0x2000) { byte = (nIncr & 0x1f00) >> 8 ; byte |= 0x80 ; X.AddByte(byte) ; byte = nIncr & 0xff ; X.AddByte(byte) ; } else if (nIncr < 0x200000) { byte = (nIncr & 0x1f0000) >> 16 ; byte |= 0xA0 ; X.AddByte(byte) ; byte = (nIncr & 0xff00) >> 8 ; X.AddByte(byte) ; byte = nIncr & 0xff ; X.AddByte(byte) ; } else if (nIncr < 0x20000000) { byte = (nIncr & 0x1f0000) >> 16 ; byte |= 0xC0 ; byte = (nIncr & 0xff0000) >> 16 ; X.AddByte(byte) ; byte = (nIncr & 0xff00) >> 8 ; X.AddByte(byte) ; byte = nIncr & 0xff ; X.AddByte(byte) ;
} else { X.AddByte(0xE1) ; byte = (nIncr & 0xff000000) >> 24 ; X.AddByte(byte) ; byte = (nIncr & 0xff0000) >> 16 ; X.AddByte(byte) ; byte = (nIncr & 0xff00) >> 8 ; X.AddByte(byte) ; byte = nIncr & 0xff ; X.AddByte(byte) ; } }
uint32_t _readEUI (_idset_seg& seg, uint32_t& nSofar, xbufIter& zi) { // Reads a single EUI and in so doing, add to or repopulate the supplied value segment. In all cases advance the supplied hzXbuf iterator. // // Arguments: 1) seg The 512-bit segment buffer // 2) zi The iterator // // Returns: Number of ids found
_hzfunc("_readEUI") ;
uint32_t byte ; // Iterator current byte value uint32_t incr ; // Increment uint32_t nFound ; // Number of ids found
nFound = 0 ; if (zi.eof()) return 0 ;
byte = *zi ;
if (!(byte & 0x80)) { // 0xxxxxxx: 1-byte single-increment. Range 1 to 128. nFound = 1 ; nSofar += (byte & 0x7f) ; zi++ ; return 1 ; }
switch (byte & 0xE0) { case 0x80: // 100xxxxx 2-byte single-increment. Range 129 to 8,320. incr = ((byte & 0x1f) << 8) ; zi++ ; incr += *zi ; nFound = 1 ; nSofar += incr ; break ;
case 0xA0: // 101xxxxx 3-byte single-increment. Range 8,321 to 2,105,472. incr = ((byte & 0x1f) << 16) ; zi++ ; incr += (*zi << 8) ; zi++ ; incr += *zi ; nFound = 1 ; nSofar += incr ; break ;
case 0xC0: // 110xxxxx 4-byte single-increment. Range 2,105,473 to 538,976,384. incr = ((byte & 0x1f) << 24) ; zi++ ; incr += (*zi << 16) ; zi++ ; incr += (*zi << 8) ; zi++ ; incr += *zi ; nFound = 1 ; nSofar += incr ; break ;
case 0xE0: // The whole bytes is control
switch (byte & 0x0F) { case 0x01: // 11100001 Single-increment, absoluete value. zi++ ; incr = (*zi << 24) ; zi++ ; incr += (*zi << 16) ; zi++ ; incr += (*zi << 8) ; zi++ ; incr += *zi ; nFound = 1 ; nSofar += incr ; break ;
case 0x02: // 11100010 Invert Segment. From now to end of segmenmt, numbers are the ids the segment does not have. break ;
case 0x03: // 11100011 Invert Ongoing. From now until further notice, numbers are the ids the segment does not have. break ;
case 0x04: // 11100100 Invert Stop. Stop inverting (can only negate an invert ongoing directive). break ;
case 0x05: // 11100101 Bitmap Variable Size: The next N bytes are a straight bitmap. break ;
case 0x06: // 11100110 Bitmap Full: Full 512 bit segment as a straight bitmap. This must specify the segment number (3 bytes), so the total size of EIU is 68 bytes. break ;
case 0x07: // 11100111 Bit-tree: This has a root index byte, each bit of which indicates an index byte in the 2nd level. Upto 8 index bytes in the 2nd level, indicate up // to 64 data bytes in the final level - which represents a full segment. break ; } break ; }
return nFound ; }
int32_t _idsNode::Insert (uint32_t nId) { // Insert an element into the node. // // Scenarios are:- // - Virgin node // - Supplied id exceeds the higest id in the node // - Supplied id is not the higest // // Argument: nId The id // // Returns: Number of items inserted (0 if the id already exists)
_hzfunc("_idsNode::Insert(int)") ;
_idset_seg seg ; // Segment hzXbuf newBuf ; // New buffer xbufIter zi ; // Iterator xbufIter xi ; // Iterator marker uint32_t nRange ; // Value segment number uint32_t nSofar ; // Value so far as a result of iteration uint32_t nPrev ; // Last nSofar uint32_t nFound ; // Number of ids in EUI
nRange = nId/IDSET_SEGSIZE ;
if (!m_Data.Size()) { // Virgin node _writeSingleEUI(m_Data, nId) ; m_Start = m_End = nRange ; m_Top = nId ; m_nPop = 1 ; return 1 ; }
if (nId > m_Top) { _writeSingleEUI(m_Data, nId - m_Top) ; m_Top = nId ; m_nPop++ ; return 1 ; }
// Scan m_Data until nId is found or exceeded. If found just return. If exceeded we add - so copy from m_Data to newBuf up to the point just before nId is exceeded, then add // nId, then copy the rest. m_Data is then replaced by newBuf.
zi = m_Data ; nSofar = 0 ;
for (; !zi.eof() ;) { xi = zi ; nPrev = nSofar ; nFound = _readEUI(seg, nSofar, zi) ;
if (nFound == 1) { if (nSofar == nId) return 0 ; if (nSofar < nId) continue ;
// nId exceeds nSofar. break ; } }
// Copy up to point just before the EUI that exceeded the new id for (zi = m_Data ; zi != xi ; zi++) newBuf.AddByte(*zi) ;
// Write the new id as the difference between the new id and the last id it exceeded _writeSingleEUI(newBuf, nId - nPrev) ;
// Write the id that exceeded the new _writeSingleEUI(newBuf, nSofar - nId) ;
// Re-read the last EUI? zi = xi ; nFound = _readEUI(seg, nSofar, zi) ; for (; !zi.eof() ; zi++) newBuf.AddByte(*zi) ;
m_Data.Clear() ; m_Data = newBuf ; m_nPop++ ; return 1 ; }
int32_t _idsNode::Delete (uint32_t nId) { // Delete an id from the node // // Argument: nId The id // // Returns: The number of ids deleted (0 if id not found)
_hzfunc("_idsNode::Delete") ;
_idset_seg seg ; // Segment hzXbuf newBuf ; // New buffer xbufIter zi ; // Iterator xbufIter xi ; // Iterator marker xbufIter ji ; // Iterator marker uint32_t nSofar ; // Value so far as a result of iteration uint32_t nNext ; // Last nSofar uint32_t nFound ; // Number of ids in EUI
if (!m_Data.Size()) return 0 ; if (nId > m_Top) return 0 ;
// Scan m_Data until nId is found or exceeded. If exceeded, just return 0. If found we delete - so copy from m_Data to newBuf up to the point where the id is found, then copy // the next id across as an adjusted increment, then just copy the remainder. m_Data is then replaced by newBuf.
zi = m_Data ; nSofar = 0 ;
for (; !zi.eof() ;) { xi = zi ; nFound = _readEUI(seg, nSofar, zi) ;
if (nFound == 1) { if (nSofar > nId) return 0 ; if (nSofar < nId) continue ;
// Id found, get next nFound = _readEUI(seg, nNext, zi) ; ji = zi ; break ; } }
// Copy up to point just before the EUI that exceeded the new id for (zi = m_Data ; zi != xi ; zi++) newBuf.AddByte(*zi) ;
// Write remainder for (; !ji.eof() ; ji++) newBuf.AddByte(*ji) ;
m_Data.Clear() ; m_Data = newBuf ; m_nPop++ ; return 1 ; }
bool _idsNode::IsSet (uint32_t nId) const { // Iterate the node // // Argument: nIndex The id to test for // // Returns: True If the requested bit number is set within the segment // False If the requested bit number is not set within the segment or it exceeds the segment range (0 - 4095)
_hzfunc("_idsNode::IsSet") ;
_idset_seg seg ; // Segment xbufIter zi ; // Node iterator uint32_t range ; // Range (id/512) uint32_t nSofar ; // Value so far as a result of iteration uint32_t nFound ; // Number of ids in EUI
zi = m_Data ; range = nId/IDSET_SEGSIZE ; nSofar = m_Start ;
for (; !zi.eof() ;) { nFound = _readEUI(seg, nSofar, zi) ;
if (!nFound) return false ;
if (nFound == 1) { if (nSofar < nId) continue ; if (nSofar > nId) return false ; return true ; } if (seg.Segno() == range) { if (seg.IsSet(nId % IDSET_SEGSIZE)) return true ; } }
return false ; }
// Boolean operators _idsNode& _idsNode::operator|= (const _idsNode& ss) { // ORs this segment with the operand // // Arguments: 1) ss The other segment // // Returns: Reference to this segment
_hzfunc("_idsNode::operator|=") ;
// Anything to do? if (!ss.Count()) return *this ;
// OK do it
return *this ; }
_idsNode& _idsNode::operator&= (const _idsNode& ss) { // ANDs this segment with the operand // // Arguments: 1) ss The other segment // // Returns: Reference to this segment
_hzfunc("_idsNode::operator&=") ;
// Anything to do? if (!Count()) return *this ;
if (!ss.Count()) m_Data.Clear() ;
// OK do it return *this ; }
hzEcode _idsNode::Show (hzChain& C) const { // Export node in 'human readable' form to the supplied chain. // // The EUIs are written out in a 'human readable' format, with each byte of the data expanded out to a pair of hexadecimal digits and the whole enclosed in square brackets. // // Arguments: 1) C Aggregation chain // // Returns: E_CORRUPT If anything is wrong with the segment // E_OK If the operation was successful
_hzfunc("_idsNode::Show") ;
_idset_seg seg ; // Value segment xbufIter zi ; // Iterator xbufIter xi ; // Iterator uint32_t byte ; // Used to extract most and then least significant 4 bits from encoded segment byte uint32_t nSofar ; // Value so far as a result of iteration uint32_t nFound ; // Number of ids in EUI
if (!m_Data.Size()) return E_NODATA ;
xi = zi = m_Data ; for (; !zi.eof() ;) { xi = zi ; nFound = _readEUI(seg, nSofar, zi) ;
if (!nFound) break ;
C.AddByte(CHAR_SQOPEN) ; for (; xi != zi ; xi++) { byte = (*xi & 0xf0) >> 4 ; C.AddByte(byte > 9 ? byte - 10 + 'a' : byte + '0') ;
byte = *xi & 0x0f ; C.AddByte(byte > 9 ? byte - 10 + 'a' : byte + '0') ; } C.Printf("]\t%d\n", nSofar) ; }
C.AddByte(CHAR_SQCLOSE) ; return E_OK ; }
/* ** SECTION 4: hdbIdset Member Functions */
hdbIdset::hdbIdset (void) { mx = 0 ; _hzGlobal_Memstats.m_numBitmaps++ ; }
hdbIdset::~hdbIdset (void) { Clear() ; _hzGlobal_Memstats.m_numBitmaps-- ; }
_idsNode* hdbIdset::_findNode (uint32_t nId) const { hzVect<_idsNode*>* pNodes ; // Vector of nodes _idsNode* pNode ; // Node pointer uint32_t nRange ; // Range of value segment uint32_t n ; // Loop counter
if (!mx) return 0 ; if (!mx->m_pData) return 0 ;
if (!mx->m_bVect) return (_idsNode*) mx->m_pData ;
// Nodes in vector. Find node by range pNodes = (hzVect<_idsNode*>*) mx->m_pData ; nRange = nId/IDSET_SEGSIZE ;
for (n = 0 ; n < pNodes->Count() ; n++) { pNode = pNodes->operator[](n) ;
if (nRange > pNode->m_End) continue ; break ; }
return pNode ; }
void hdbIdset::Clear (void) { // Clears the bitmap (removes all segments) // // Arguments: None // Returns: None
hzVect<_idsNode*>* pNodes ; // Vector of nodes _idsNode* pNode ; // Node pointer uint32_t n ; // Loop iterator
if (mx) { if (mx->m_pData) { if (mx->m_bVect) { pNodes = (hzVect<_idsNode*>*) mx->m_pData ; for (n = 0 ; n < pNodes->Count() ; n++) { pNode = pNodes->operator[](n) ; pNode->m_Data.Clear() ; delete pNode ; }
pNodes->Clear() ; delete pNodes ; } else { pNode = (_idsNode*) mx->m_pData ; pNode->m_Data.Clear() ; delete pNode ; }
mx->m_pData = 0 ; }
delete mx ; } }
uint32_t hdbIdset::NoNodes (void) const { // Reports the number of _idsNodes //
// Arguments: None // Returns: Number of _idsNodes
if (!mx) return 0 ;
if (mx->m_pData) return 0 ;
if (!mx->m_bVect) return 1 ;
hzVect<_idsNode*>* pNodes ; // Vector of nodes
pNodes = (hzVect<_idsNode*>*) mx->m_pData ; return pNodes->Count() ; }
hdbIdset& hdbIdset::operator= (const hdbIdset& op) { // Assign a hdbIdset instance to another. This will only facilitate a soft copy. // // Argument: op The supplied bitmap // // Returns: Reference to this bitmap instance in all cases.
_hzfunc("hdbIdset::operator=") ;
if (mx == op.mx) return *this ;
Clear() ; mx = op.mx ; if (mx) mx->m_nCopies++ ;
return *this ; }
bool hdbIdset::operator[] (uint32_t nId) const { // Purpose: Deterermine if the supplied test id, exists within the idset // // Argument: The test id // // Returns: True if the test id exists within the idset, false otherwise
_hzfunc("hdbIdset::operator[]") ;
_idsNode* pNode ; // Target node uint32_t range ; // Target segment number
if (!mx) return false ;
range = nId / IDSET_SEGSIZE ;
pNode = _findNode(nId) ; if (!pNode) return false ;
return pNode->IsSet(nId) ; }
uint32_t hdbIdset::Insert (uint32_t nId) { // Locate node, insert into node. If no nodes exist, the first is create. // // Argument: nd The document id to insert // // Returns: 1 If the supplied id did not exist in the bitmap prior to this call // 0 If the supplied id already existed in the bitmap
_hzfunc("hdbIdset::Insert") ;
_idsNode* pNode ; // Target segment int32_t rv ; // Count of number added (1)
if (!mx) mx = new _idset_ca() ; if (!mx->m_pData) mx->m_pData = new _idsNode() ;
pNode = _findNode(nId) ; if (!pNode) return 0 ;
rv = pNode->Insert(nId) ; if (rv == 1) mx->m_nPop += 1 ;
// Check if node needs to split if (pNode->m_Data.Size() > 8192) { }
return rv ; }
hzEcode hdbIdset::Delete (uint32_t nId) { // Delete a document/object id from the bitmap // // Argument: nId The document/object id // // Returns: E_NOTFOUND If the document/object id was not in the bitmap // E_OK If it was and now isn't
_hzfunc("hdbIdset::Delete") ;
_idsNode* pNode ; // Target segment int32_t rv ; // Count of number added (1)
if (!mx) return E_NOTFOUND ; if (!mx->m_pData) return E_NOTFOUND ;
pNode = _findNode(nId) ; if (!pNode) return E_NOTFOUND ;
rv = pNode->Delete(nId) ; if (rv == 1) mx->m_nPop -= 1 ;
// Check if node needs to merge return E_OK ; }
void hdbIdset::Show (hzChain& Z) const { // Export the idset. Be aware this can produce chains of several megabytes. // // Argument: Z The aggregation chain // // Returns: None
_hzfunc("hdbIdset::Show") ;
hzVect<_idsNode*>* pNodes ; // Vector of nodes _idsNode* pNode ; // Node pointer uint32_t n ; // Loop iterator
// Is there anything to do? if (mx) { if (mx->m_pData) { if (mx->m_bVect) { pNodes = (hzVect<_idsNode*>*) mx->m_pData ; for (n = 0 ; n < pNodes->Count() ; n++) { pNode = pNodes->operator[](n) ; pNode->Show(Z) ; } } else { pNode = (_idsNode*) mx->m_pData ; pNode->Show(Z) ; } } } }
uint32_t hdbIdset::Fetch (hzVect<uint32_t>& Result, uint32_t nStart, uint32_t nReq) const { // Purpose: Fetch ids from the idset into an id buffer // // Note that the presumed scenario is that the idset will be a search result, which will not have a vector of nodes. The function is written however, to cope with a vector. // // Arguments: 1) Result The id buffer (type uint32_t) // 2) nStart The start position (ignore all ids before this) // 3) nReq The number to fetch // // Returns: Number of ids fetched
_hzfunc("hdbIdset::Fetch") ;
hzVect<_idsNode*>* pNodes ; // Vector of nodes _idsNode* pNode ; // Segment (to process) _idset_seg seg ; // Value segment xbufIter zi ; // Iterator into low pop idset nodes uint32_t numNodes ; // Number of nodes uint32_t nSofar ; // Value so far as a result of iteration uint32_t nFound ; // Number of ids in EUI uint32_t n ; // Iterator
//uint32_t nPosn = 0 ; // Position (in the bitmap so far) attained last segment //uint32_t nMax = 0 ; // Maximum id this segment //uint32_t segNo ; // Segment number //uint32_t segAddr ; // Segment address //uint32_t nSegStart ; // Starting position within a segment //uint32_t x ; // For iterating resut of segment fetch //uint32_t v ; // For value
Result.Clear() ;
if (!mx) return 0 ; if (!mx->m_pData) return 0 ; if (nStart >= mx->m_nPop) return 0 ;
if (mx->m_bVect) { pNodes = (hzVect<_idsNode*>*) mx->m_pData ; numNodes = pNodes->Count() ; } else { pNodes = 0 ; numNodes = 1 ; }
for (n = 0 ; n < numNodes ; n++) { pNode = mx->m_bVect ? pNodes->operator[](n) : (_idsNode*) mx->m_pData ;
zi = pNode->m_Data ;
// While not yet at start for (;;) { nFound = _readEUI(seg, nSofar, zi) ; } }
return Result.Count() ; }
// Operators hdbIdset& hdbIdset::operator|= (const hdbIdset& op) { // Run through the nodes in the operand idset. Iterate each node using _readEUI, and insert everything found into the applicable node in this idset. // // Arguments: 1) The supplied bitmap // // Returns: Reference to this bitmap instance in all cases.
_hzfunc("hdbIdset::operator&=") ;
hzVect<_idsNode*>* pNodesA ; // Vector of nodes (this) hzVect<_idsNode*>* pNodesB ; // Vector of nodes (operand) _idsNode* pNodeA ; // Node (this) _idsNode* pNodeB ; // Node (operand) _idset_seg segA ; // Value segment (this) _idset_seg segB ; // Value segment (operand) xbufIter ziA ; // EUI data iterator (this) xbufIter ziB ; // EUI data iterator (operand) uint32_t numNodes ; // Number of nodes uint32_t nA ; // Iterator uint32_t nB ; // Iterator uint32_t nSofar ; // Value so far as a result of iteration uint32_t nFound ; // Number of ids in EUI
// Is there anything to AND with? if (!op.Count()) { Clear() ; return *this ; }
// If this map is empty an AND operation cannot populate it - just return if (!Count()) return *this ;
if (op.mx->m_bVect) { pNodesB = (hzVect<_idsNode*>*) op.mx->m_pData ; numNodes = pNodesB->Count() ; } else { pNodesB = 0 ; numNodes = 1 ; }
// Go through operand idset nodes for (nB = 0 ; nB < numNodes ; nB++) { pNodeB = op.mx->m_bVect ? pNodesB->operator[](nB) : (_idsNode*) op.mx->m_pData ;
ziB = pNodeB->m_Data ;
for (; !ziB.eof() ;) { nFound = _readEUI(segB, nSofar, ziB) ; } }
return *this ; }
hdbIdset& hdbIdset::operator&= (const hdbIdset& op) { // AND this idset with the operand idset. Place the result in this idset. // // Argument: The operand idset // // Returns: Reference to this idset in all cases.
_hzfunc("hdbIdset::operator&=") ;
hzVect<_idsNode*>* pNodesA ; // Vector of nodes (this) hzVect<_idsNode*>* pNodesB ; // Vector of nodes (operand) _idsNode* pNodeA ; // Node (this) _idsNode* pNodeB ; // Node (operand) _idset_seg segA ; // Value segment (this) _idset_seg segB ; // Value segment (operand) xbufIter ziA ; // EUI data iterator (this) xbufIter ziB ; // EUI data iterator (operand) uint32_t numNodes ; // Number of nodes uint32_t nA ; // Iterator uint32_t nB ; // Iterator uint32_t nSofar ; // Value so far as a result of iteration uint32_t nFound ; // Number of ids in EUI
// Is there anything to AND with? if (!op.Count()) { Clear() ; return *this ; }
// If this map is empty an AND operation cannot populate it - just return if (!Count()) return *this ;
if (op.mx->m_bVect) { pNodesB = (hzVect<_idsNode*>*) op.mx->m_pData ; numNodes = pNodesB->Count() ; } else { pNodesB = 0 ; numNodes = 1 ; }
// Go through operand idset nodes for (nB = 0 ; nB < numNodes ; nB++) { pNodeB = op.mx->m_bVect ? pNodesB->operator[](nB) : (_idsNode*) op.mx->m_pData ;
ziB = pNodeB->m_Data ;
for (; !ziB.eof() ;) { nFound = _readEUI(segB, nSofar, ziB) ; } }
return *this ; }