//
// File: hzIdset.cpp
//
// Legal Notice: This file is part of the HadronZoo C++ Class Library. Copyright 2025 HadronZoo Project (http://www.hadronzoo.com)
//
// The HadronZoo C++ Class Library is free software: You can redistribute it, and/or modify it under the terms of the GNU Lesser General Public License, as published by the Free
// Software Foundation, either version 3 of the License, or any later version.
//
// The HadronZoo C++ Class Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
// A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License along with the HadronZoo C++ Class Library. If not, see http://www.gnu.org/licenses.
//
// The complete set of encodings currently in use are as follows:-
//
// 0xxxxxxx 1-byte single-increment. Range 1 to 128.
//
// 100xxxxx 2-byte single-increment. Range 129 to 8,320.
//
// 101xxxxx 3-byte single-increment. Range 8,321 to 2,105,472.
//
// 110xxxxx 4-byte single-increment. Range 2,105,473 to 538,976,384.
//
// 11100001 Single-increment, absoluete value.
//
// 11100010 Invert Segment. From now to end of segmenmt, numbers are the ids the segment does not have.
//
// 11100011 Invert Ongoing. From now until further notice, numbers are the ids the segment does not have.
//
// 11100100 Invert Stop. Stop inverting (can only negate an invert ongoing directive).
//
// 11100101 Bitmap Variable Size: The next N bytes are a straight bitmap.
//
// 11100110 Bitmap Full: Full 512 bit segment. This must specify the segment number (3 bytes). Total size of EIU is 68 bytes as there is the control byte and 64 data bytes.
//
// 11100111 Bit-tree: These have a root index byte, each bit of which indicates an index byte in the second level. Upto 8 index bytes in the second level indicate up to 64 data
// bytes in the final level. Each bit in each data byte indicates a number allowing the bitmap to store up to 512 numbers. These are only used for full segments and so
// like Bitmap-Full, must specify the segment number.
#include <iostream>
#include "hzChars.h"
#include "hzDatabase.h"
using namespace std ;
/*
** Variables
*/
#define IDSET_SEGSIZE 512 // Range of value segments
static char s_BitArray [256] =
{
// Array to speed up bit counting per byte
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, // 0 - 15
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, // 16 - 31
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, // 32 - 47
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, // 48 - 63
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, // 64 - 79
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, // 80 - 95
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, // 96 - 111
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, // 112 - 127
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, // 128 - 143
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, // 144 - 159
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, // 160 - 175
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, // 176 - 191
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, // 192 - 207
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, // 208 - 223
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, // 224 - 239
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 // 240 - 255
} ;
/*
** SECTION 1: Bitmap non member functions
*/
uint32_t CountBits (const void* buf, uint32_t nBytes)
{
// Category: Bitwise
//
// Count the total number of bits set in the supplied buffer, taken to be of the supplied size.
//
// Arguments: 1) buf The const void* buffer
// 2) nBytes The number of bytes
//
// Returns: Number of bits found to be set
const uchar* uber ; // Buffer as a series of chars
uint32_t nCount ; // Buffer iterator
uint32_t nBits ; // Cumulative bit counter
if (!buf)
return -1 ;
uber = (uchar*) buf ;
for (nBits = nCount = 0 ; nCount < nBytes ; nCount++)
nBits += s_BitArray[uber[nCount]] ;
return nBits ;
}
void SetBits (uchar* pBitbuf, uint32_t nOset, bool bValue)
{
// Category: Bitwise
//
// Set or clear the bit in the buffer at the given offset
//
// Arguments: 1) pBitbuf The buffer
// 2) nOset The offset (is divided by 8)
// 2) bValue The offset (is divided by 8)
//
// Returns: True If the requested bit number is set within the segment
// False If the requested bit number is not set within the segment or it exceeds the segment range (0 - 4095)
_hzfunc("__func__") ;
if (pBitbuf)
{
if (bValue)
{
switch (nOset % 8)
{
case 0: pBitbuf[nOset/8] |= 0x80 ; break ;
case 1: pBitbuf[nOset/8] |= 0x40 ; break ;
case 2: pBitbuf[nOset/8] |= 0x20 ; break ;
case 3: pBitbuf[nOset/8] |= 0x10 ; break ;
case 4: pBitbuf[nOset/8] |= 0x08 ; break ;
case 5: pBitbuf[nOset/8] |= 0x04 ; break ;
case 6: pBitbuf[nOset/8] |= 0x02 ; break ;
case 7: pBitbuf[nOset/8] |= 0x01 ; break ;
}
}
else
{
switch (nOset % 8)
{
case 0: pBitbuf[nOset/8] &= ~0x80 ; break ;
case 1: pBitbuf[nOset/8] &= ~0x40 ; break ;
case 2: pBitbuf[nOset/8] &= ~0x20 ; break ;
case 3: pBitbuf[nOset/8] &= ~0x10 ; break ;
case 4: pBitbuf[nOset/8] &= ~0x08 ; break ;
case 5: pBitbuf[nOset/8] &= ~0x04 ; break ;
case 6: pBitbuf[nOset/8] &= ~0x02 ; break ;
case 7: pBitbuf[nOset/8] &= ~0x01 ; break ;
}
}
}
}
bool GetBits (const uchar* pBitbuf, uint32_t nOset)
{
// Category: Bitwise
//
// Determine if the bit in the buffer at the given offset is set
//
// Arguments: 1) pBitbuf The buffer
// 2) nOset The offset (is divided by 8)
//
// Returns: True If the requested bit number is set within the segment
// False If the requested bit number is not set within the segment or it exceeds the segment range (0 - 4095)
_hzfunc("__func__") ;
if (!pBitbuf)
return false ;
switch (nOset%8)
{
case 0: return pBitbuf[nOset/8] & 0x80 ? true : false ;
case 1: return pBitbuf[nOset/8] & 0x40 ? true : false ;
case 2: return pBitbuf[nOset/8] & 0x20 ? true : false ;
case 3: return pBitbuf[nOset/8] & 0x10 ? true : false ;
case 4: return pBitbuf[nOset/8] & 0x08 ? true : false ;
case 5: return pBitbuf[nOset/8] & 0x04 ? true : false ;
case 6: return pBitbuf[nOset/8] & 0x02 ? true : false ;
case 7: return pBitbuf[nOset/8] & 0x01 ? true : false ;
}
return false ;
}
/*
** SECTION 2: Idset segments
*/
class _idset_seg
{
// The _idset_seg (bitmap segment) class comprises a count, a segment number and 32 bytes of storage. Currently this storage space is used as a bitmap to hold upto 512 values.
uint64_t m_work[8] ; // 512-bit working buffer
uint32_t m_count ; // Number of bits set
uint32_t m_segno ; // Segment number
// Non-implimented copy constructor
_idset_seg (const _idset_seg& op) ;
// Non-implimented Assignment operator
_idset_seg& operator= (const _idset_seg& op) ;
public:
_idset_seg (void) { m_count = m_segno = 0 ; }
~_idset_seg (void) {}
// Metadata
uint32_t Count (void) const { return m_count ; } // Return population count
uint32_t Segno (void) const { return m_segno ; } // Return segment number
//bool Perm (void) const { return m_count & 0x8000 ? false : true ; }
// Clear all bits in the segment
void Clear (void)
{
m_count = m_segno = 0 ;
memset(m_work, 0, 64) ;
}
// Add an element to the segment
uint32_t Insert (const hzSet<uint32_t>& idset, uint32_t& nPosn) ;
hzEcode Insert (uint32_t nElement) ;
// Remove an element from the segment
hzEcode Delete (uint32_t nElement) ;
uint32_t Delete (const hzSet<uint32_t>& idset, uint32_t& nPosn) ;
// Where a segment has a single value
bool IsSet (uint32_t oset) const ;
// Fetch a number of elements from the segment
int32_t _fetch (hzVect<uint32_t>& Result, uint32_t nBase, uint32_t nStart, uint32_t nReq) const ;
// Boolean operators
_idset_seg& operator|= (const _idset_seg& ss) ;
_idset_seg& operator&= (const _idset_seg& ss) ;
} ;
hzEcode _idset_seg::Insert (uint32_t nId)
{
uchar* ptr ; // Cast to m_Work
uint32_t nRange ; // Value range
nRange = nId/IDSET_SEGSIZE ;
if (nRange != m_segno)
return E_RANGE ;
ptr = (uchar*) m_work ;
SetBits(ptr, nId % IDSET_SEGSIZE, true) ;
return E_OK ;
}
hzEcode _idset_seg::Delete (uint32_t nId)
{
uchar* ptr ; // Cast to m_Work
uint32_t nRange ; // Value range
nRange = nId/IDSET_SEGSIZE ;
if (nRange != m_segno)
return E_RANGE ;
ptr = (uchar*) m_work ;
SetBits(ptr, nId % IDSET_SEGSIZE, false) ;
return E_OK ;
}
bool _idset_seg::IsSet (uint32_t nId) const
{
uchar* ptr ; // Cast to m_Work
uint32_t nRange ; // Value range
nRange = nId/IDSET_SEGSIZE ;
if (nRange != m_segno)
return false ;
ptr = (uchar*) m_work ;
return GetBits(ptr, nId % IDSET_SEGSIZE) ;
}
_idset_seg& _idset_seg::operator|= (const _idset_seg& ss)
{
m_work[0] |= ss.m_work[0] ;
m_work[1] |= ss.m_work[1] ;
m_work[2] |= ss.m_work[2] ;
m_work[3] |= ss.m_work[3] ;
m_work[4] |= ss.m_work[4] ;
m_work[5] |= ss.m_work[5] ;
m_work[6] |= ss.m_work[6] ;
m_work[7] |= ss.m_work[7] ;
return *this ;
}
_idset_seg& _idset_seg::operator&= (const _idset_seg& ss)
{
m_work[0] &= ss.m_work[0] ;
m_work[1] &= ss.m_work[1] ;
m_work[2] &= ss.m_work[2] ;
m_work[3] &= ss.m_work[3] ;
m_work[4] &= ss.m_work[4] ;
m_work[5] &= ss.m_work[5] ;
m_work[6] &= ss.m_work[6] ;
m_work[7] &= ss.m_work[7] ;
return *this ;
}
/*
** SECTION 3: _idsNode member functions
*/
struct _idsNode
{
// Idset segment node: This stores however many complete segments will fit within a 4K disk block.
hzXbuf m_Data ; // Up to 4K data
uint32_t m_Start ; // The lowest value segment in the node
uint32_t m_End ; // The higest value segment in the node
uint32_t m_Top ; // The higest id in the node
uint32_t m_nPop ; // Number of ids in the node
// Set funcions
int32_t Insert (uint32_t) ;
int32_t Delete (uint32_t) ;
// Get funcions
bool IsSet (uint32_t nIndex) const ;
// Boolean operators
_idsNode& operator|= (const _idsNode& ss) ;
_idsNode& operator&= (const _idsNode& ss) ;
// Node metadata
uint32_t Size (void) const { return m_Data.Size() ; }
uint32_t Count (void) const { return m_nPop ; }
// Import/Export
hzEcode Show (hzChain& C) const ;
// Constructor/Destructor
_idsNode (void) { m_Start = m_End = m_Top = m_nPop = 0 ; }
~_idsNode (void) { m_Data.Clear() ; }
} ;
void _writeSingleEUI (hzXbuf& X, uint32_t nIncr)
{
// Write a single increment EUI. This action is usually conducted in ascending sequence.
//
// Arguments: 1) X The data buffer
// 2) nSofar The last number reached
// 3) nId The new id
//
// Returns: Number of ids found
uint32_t byte ; // Byte to write
if (nIncr < 0x80)
X.AddByte(nIncr) ;
else if (nIncr < 0x2000)
{
byte = (nIncr & 0x1f00) >> 8 ;
byte |= 0x80 ;
X.AddByte(byte) ;
byte = nIncr & 0xff ;
X.AddByte(byte) ;
}
else if (nIncr < 0x200000)
{
byte = (nIncr & 0x1f0000) >> 16 ;
byte |= 0xA0 ;
X.AddByte(byte) ;
byte = (nIncr & 0xff00) >> 8 ; X.AddByte(byte) ;
byte = nIncr & 0xff ; X.AddByte(byte) ;
}
else if (nIncr < 0x20000000)
{
byte = (nIncr & 0x1f0000) >> 16 ;
byte |= 0xC0 ;
byte = (nIncr & 0xff0000) >> 16 ; X.AddByte(byte) ;
byte = (nIncr & 0xff00) >> 8 ; X.AddByte(byte) ;
byte = nIncr & 0xff ; X.AddByte(byte) ;
}
else
{
X.AddByte(0xE1) ;
byte = (nIncr & 0xff000000) >> 24 ; X.AddByte(byte) ;
byte = (nIncr & 0xff0000) >> 16 ; X.AddByte(byte) ;
byte = (nIncr & 0xff00) >> 8 ; X.AddByte(byte) ;
byte = nIncr & 0xff ; X.AddByte(byte) ;
}
}
uint32_t _readEUI (_idset_seg& seg, uint32_t& nSofar, xbufIter& zi)
{
// Reads a single EUI and in so doing, add to or repopulate the supplied value segment. In all cases advance the supplied hzXbuf iterator.
//
// Arguments: 1) seg The 512-bit segment buffer
// 2) zi The iterator
//
// Returns: Number of ids found
_hzfunc("_readEUI") ;
uint32_t byte ; // Iterator current byte value
uint32_t incr ; // Increment
uint32_t nFound ; // Number of ids found
nFound = 0 ;
if (zi.eof())
return 0 ;
byte = *zi ;
if (!(byte & 0x80))
{
// 0xxxxxxx: 1-byte single-increment. Range 1 to 128.
nFound = 1 ;
nSofar += (byte & 0x7f) ;
zi++ ;
return 1 ;
}
switch (byte & 0xE0)
{
case 0x80: // 100xxxxx 2-byte single-increment. Range 129 to 8,320.
incr = ((byte & 0x1f) << 8) ;
zi++ ;
incr += *zi ;
nFound = 1 ;
nSofar += incr ;
break ;
case 0xA0: // 101xxxxx 3-byte single-increment. Range 8,321 to 2,105,472.
incr = ((byte & 0x1f) << 16) ;
zi++ ; incr += (*zi << 8) ;
zi++ ; incr += *zi ;
nFound = 1 ;
nSofar += incr ;
break ;
case 0xC0: // 110xxxxx 4-byte single-increment. Range 2,105,473 to 538,976,384.
incr = ((byte & 0x1f) << 24) ;
zi++ ; incr += (*zi << 16) ;
zi++ ; incr += (*zi << 8) ;
zi++ ; incr += *zi ;
nFound = 1 ;
nSofar += incr ;
break ;
case 0xE0: // The whole bytes is control
switch (byte & 0x0F)
{
case 0x01: // 11100001 Single-increment, absoluete value.
zi++ ; incr = (*zi << 24) ;
zi++ ; incr += (*zi << 16) ;
zi++ ; incr += (*zi << 8) ;
zi++ ; incr += *zi ;
nFound = 1 ;
nSofar += incr ;
break ;
case 0x02: // 11100010 Invert Segment. From now to end of segmenmt, numbers are the ids the segment does not have.
break ;
case 0x03: // 11100011 Invert Ongoing. From now until further notice, numbers are the ids the segment does not have.
break ;
case 0x04: // 11100100 Invert Stop. Stop inverting (can only negate an invert ongoing directive).
break ;
case 0x05: // 11100101 Bitmap Variable Size: The next N bytes are a straight bitmap.
break ;
case 0x06: // 11100110 Bitmap Full: Full 512 bit segment as a straight bitmap. This must specify the segment number (3 bytes), so the total size of EIU is 68 bytes.
break ;
case 0x07: // 11100111 Bit-tree: This has a root index byte, each bit of which indicates an index byte in the 2nd level. Upto 8 index bytes in the 2nd level, indicate up
// to 64 data bytes in the final level - which represents a full segment.
break ;
}
break ;
}
return nFound ;
}
int32_t _idsNode::Insert (uint32_t nId)
{
// Insert an element into the node.
//
// Scenarios are:-
// - Virgin node
// - Supplied id exceeds the higest id in the node
// - Supplied id is not the higest
//
// Argument: nId The id
//
// Returns: Number of items inserted (0 if the id already exists)
_hzfunc("_idsNode::Insert(int)") ;
_idset_seg seg ; // Segment
hzXbuf newBuf ; // New buffer
xbufIter zi ; // Iterator
xbufIter xi ; // Iterator marker
uint32_t nRange ; // Value segment number
uint32_t nSofar ; // Value so far as a result of iteration
uint32_t nPrev ; // Last nSofar
uint32_t nFound ; // Number of ids in EUI
nRange = nId/IDSET_SEGSIZE ;
if (!m_Data.Size())
{
// Virgin node
_writeSingleEUI(m_Data, nId) ;
m_Start = m_End = nRange ;
m_Top = nId ;
m_nPop = 1 ;
return 1 ;
}
if (nId > m_Top)
{
_writeSingleEUI(m_Data, nId - m_Top) ;
m_Top = nId ;
m_nPop++ ;
return 1 ;
}
// Scan m_Data until nId is found or exceeded. If found just return. If exceeded we add - so copy from m_Data to newBuf up to the point just before nId is exceeded, then add
// nId, then copy the rest. m_Data is then replaced by newBuf.
zi = m_Data ;
nSofar = 0 ;
for (; !zi.eof() ;)
{
xi = zi ;
nPrev = nSofar ;
nFound = _readEUI(seg, nSofar, zi) ;
if (nFound == 1)
{
if (nSofar == nId)
return 0 ;
if (nSofar < nId)
continue ;
// nId exceeds nSofar.
break ;
}
}
// Copy up to point just before the EUI that exceeded the new id
for (zi = m_Data ; zi != xi ; zi++)
newBuf.AddByte(*zi) ;
// Write the new id as the difference between the new id and the last id it exceeded
_writeSingleEUI(newBuf, nId - nPrev) ;
// Write the id that exceeded the new
_writeSingleEUI(newBuf, nSofar - nId) ;
// Re-read the last EUI?
zi = xi ;
nFound = _readEUI(seg, nSofar, zi) ;
for (; !zi.eof() ; zi++)
newBuf.AddByte(*zi) ;
m_Data.Clear() ;
m_Data = newBuf ;
m_nPop++ ;
return 1 ;
}
int32_t _idsNode::Delete (uint32_t nId)
{
// Delete an id from the node
//
// Argument: nId The id
//
// Returns: The number of ids deleted (0 if id not found)
_hzfunc("_idsNode::Delete") ;
_idset_seg seg ; // Segment
hzXbuf newBuf ; // New buffer
xbufIter zi ; // Iterator
xbufIter xi ; // Iterator marker
xbufIter ji ; // Iterator marker
uint32_t nSofar ; // Value so far as a result of iteration
uint32_t nNext ; // Last nSofar
uint32_t nFound ; // Number of ids in EUI
if (!m_Data.Size()) return 0 ;
if (nId > m_Top) return 0 ;
// Scan m_Data until nId is found or exceeded. If exceeded, just return 0. If found we delete - so copy from m_Data to newBuf up to the point where the id is found, then copy
// the next id across as an adjusted increment, then just copy the remainder. m_Data is then replaced by newBuf.
zi = m_Data ;
nSofar = 0 ;
for (; !zi.eof() ;)
{
xi = zi ;
nFound = _readEUI(seg, nSofar, zi) ;
if (nFound == 1)
{
if (nSofar > nId)
return 0 ;
if (nSofar < nId)
continue ;
// Id found, get next
nFound = _readEUI(seg, nNext, zi) ;
ji = zi ;
break ;
}
}
// Copy up to point just before the EUI that exceeded the new id
for (zi = m_Data ; zi != xi ; zi++)
newBuf.AddByte(*zi) ;
// Write remainder
for (; !ji.eof() ; ji++)
newBuf.AddByte(*ji) ;
m_Data.Clear() ;
m_Data = newBuf ;
m_nPop++ ;
return 1 ;
}
bool _idsNode::IsSet (uint32_t nId) const
{
// Iterate the node
//
// Argument: nIndex The id to test for
//
// Returns: True If the requested bit number is set within the segment
// False If the requested bit number is not set within the segment or it exceeds the segment range (0 - 4095)
_hzfunc("_idsNode::IsSet") ;
_idset_seg seg ; // Segment
xbufIter zi ; // Node iterator
uint32_t range ; // Range (id/512)
uint32_t nSofar ; // Value so far as a result of iteration
uint32_t nFound ; // Number of ids in EUI
zi = m_Data ;
range = nId/IDSET_SEGSIZE ;
nSofar = m_Start ;
for (; !zi.eof() ;)
{
nFound = _readEUI(seg, nSofar, zi) ;
if (!nFound)
return false ;
if (nFound == 1)
{
if (nSofar < nId)
continue ;
if (nSofar > nId)
return false ;
return true ;
}
if (seg.Segno() == range)
{
if (seg.IsSet(nId % IDSET_SEGSIZE))
return true ;
}
}
return false ;
}
// Boolean operators
_idsNode& _idsNode::operator|= (const _idsNode& ss)
{
// ORs this segment with the operand
//
// Arguments: 1) ss The other segment
//
// Returns: Reference to this segment
_hzfunc("_idsNode::operator|=") ;
// Anything to do?
if (!ss.Count())
return *this ;
// OK do it
return *this ;
}
_idsNode& _idsNode::operator&= (const _idsNode& ss)
{
// ANDs this segment with the operand
//
// Arguments: 1) ss The other segment
//
// Returns: Reference to this segment
_hzfunc("_idsNode::operator&=") ;
// Anything to do?
if (!Count())
return *this ;
if (!ss.Count())
m_Data.Clear() ;
// OK do it
return *this ;
}
hzEcode _idsNode::Show (hzChain& C) const
{
// Export node in 'human readable' form to the supplied chain.
//
// The EUIs are written out in a 'human readable' format, with each byte of the data expanded out to a pair of hexadecimal digits and the whole enclosed in square brackets.
//
// Arguments: 1) C Aggregation chain
//
// Returns: E_CORRUPT If anything is wrong with the segment
// E_OK If the operation was successful
_hzfunc("_idsNode::Show") ;
_idset_seg seg ; // Value segment
xbufIter zi ; // Iterator
xbufIter xi ; // Iterator
uint32_t byte ; // Used to extract most and then least significant 4 bits from encoded segment byte
uint32_t nSofar ; // Value so far as a result of iteration
uint32_t nFound ; // Number of ids in EUI
if (!m_Data.Size())
return E_NODATA ;
xi = zi = m_Data ;
for (; !zi.eof() ;)
{
xi = zi ;
nFound = _readEUI(seg, nSofar, zi) ;
if (!nFound)
break ;
C.AddByte(CHAR_SQOPEN) ;
for (; xi != zi ; xi++)
{
byte = (*xi & 0xf0) >> 4 ;
C.AddByte(byte > 9 ? byte - 10 + 'a' : byte + '0') ;
byte = *xi & 0x0f ;
C.AddByte(byte > 9 ? byte - 10 + 'a' : byte + '0') ;
}
C.Printf("]\t%d\n", nSofar) ;
}
C.AddByte(CHAR_SQCLOSE) ;
return E_OK ;
}
/*
** SECTION 4: hdbIdset Member Functions
*/
hdbIdset::hdbIdset (void)
{
mx = 0 ;
_hzGlobal_Memstats.m_numBitmaps++ ;
}
hdbIdset::~hdbIdset (void)
{
Clear() ;
_hzGlobal_Memstats.m_numBitmaps-- ;
}
_idsNode* hdbIdset::_findNode (uint32_t nId) const
{
hzVect<_idsNode*>* pNodes ; // Vector of nodes
_idsNode* pNode ; // Node pointer
uint32_t nRange ; // Range of value segment
uint32_t n ; // Loop counter
if (!mx) return 0 ;
if (!mx->m_pData) return 0 ;
if (!mx->m_bVect)
return (_idsNode*) mx->m_pData ;
// Nodes in vector. Find node by range
pNodes = (hzVect<_idsNode*>*) mx->m_pData ;
nRange = nId/IDSET_SEGSIZE ;
for (n = 0 ; n < pNodes->Count() ; n++)
{
pNode = pNodes->operator[](n) ;
if (nRange > pNode->m_End)
continue ;
break ;
}
return pNode ;
}
void hdbIdset::Clear (void)
{
// Clears the bitmap (removes all segments)
//
// Arguments: None
// Returns: None
hzVect<_idsNode*>* pNodes ; // Vector of nodes
_idsNode* pNode ; // Node pointer
uint32_t n ; // Loop iterator
if (mx)
{
if (mx->m_pData)
{
if (mx->m_bVect)
{
pNodes = (hzVect<_idsNode*>*) mx->m_pData ;
for (n = 0 ; n < pNodes->Count() ; n++)
{
pNode = pNodes->operator[](n) ;
pNode->m_Data.Clear() ;
delete pNode ;
}
pNodes->Clear() ;
delete pNodes ;
}
else
{
pNode = (_idsNode*) mx->m_pData ;
pNode->m_Data.Clear() ;
delete pNode ;
}
mx->m_pData = 0 ;
}
delete mx ;
}
}
uint32_t hdbIdset::NoNodes (void) const
{
// Reports the number of _idsNodes
//
// Arguments: None
// Returns: Number of _idsNodes
if (!mx)
return 0 ;
if (mx->m_pData)
return 0 ;
if (!mx->m_bVect)
return 1 ;
hzVect<_idsNode*>* pNodes ; // Vector of nodes
pNodes = (hzVect<_idsNode*>*) mx->m_pData ;
return pNodes->Count() ;
}
hdbIdset& hdbIdset::operator= (const hdbIdset& op)
{
// Assign a hdbIdset instance to another. This will only facilitate a soft copy.
//
// Argument: op The supplied bitmap
//
// Returns: Reference to this bitmap instance in all cases.
_hzfunc("hdbIdset::operator=") ;
if (mx == op.mx)
return *this ;
Clear() ;
mx = op.mx ;
if (mx)
mx->m_nCopies++ ;
return *this ;
}
bool hdbIdset::operator[] (uint32_t nId) const
{
// Purpose: Deterermine if the supplied test id, exists within the idset
//
// Argument: The test id
//
// Returns: True if the test id exists within the idset, false otherwise
_hzfunc("hdbIdset::operator[]") ;
_idsNode* pNode ; // Target node
uint32_t range ; // Target segment number
if (!mx)
return false ;
range = nId / IDSET_SEGSIZE ;
pNode = _findNode(nId) ;
if (!pNode)
return false ;
return pNode->IsSet(nId) ;
}
uint32_t hdbIdset::Insert (uint32_t nId)
{
// Locate node, insert into node. If no nodes exist, the first is create.
//
// Argument: nd The document id to insert
//
// Returns: 1 If the supplied id did not exist in the bitmap prior to this call
// 0 If the supplied id already existed in the bitmap
_hzfunc("hdbIdset::Insert") ;
_idsNode* pNode ; // Target segment
int32_t rv ; // Count of number added (1)
if (!mx)
mx = new _idset_ca() ;
if (!mx->m_pData)
mx->m_pData = new _idsNode() ;
pNode = _findNode(nId) ;
if (!pNode)
return 0 ;
rv = pNode->Insert(nId) ;
if (rv == 1)
mx->m_nPop += 1 ;
// Check if node needs to split
if (pNode->m_Data.Size() > 8192)
{
}
return rv ;
}
hzEcode hdbIdset::Delete (uint32_t nId)
{
// Delete a document/object id from the bitmap
//
// Argument: nId The document/object id
//
// Returns: E_NOTFOUND If the document/object id was not in the bitmap
// E_OK If it was and now isn't
_hzfunc("hdbIdset::Delete") ;
_idsNode* pNode ; // Target segment
int32_t rv ; // Count of number added (1)
if (!mx) return E_NOTFOUND ;
if (!mx->m_pData) return E_NOTFOUND ;
pNode = _findNode(nId) ;
if (!pNode)
return E_NOTFOUND ;
rv = pNode->Delete(nId) ;
if (rv == 1)
mx->m_nPop -= 1 ;
// Check if node needs to merge
return E_OK ;
}
void hdbIdset::Show (hzChain& Z) const
{
// Export the idset. Be aware this can produce chains of several megabytes.
//
// Argument: Z The aggregation chain
//
// Returns: None
_hzfunc("hdbIdset::Show") ;
hzVect<_idsNode*>* pNodes ; // Vector of nodes
_idsNode* pNode ; // Node pointer
uint32_t n ; // Loop iterator
// Is there anything to do?
if (mx)
{
if (mx->m_pData)
{
if (mx->m_bVect)
{
pNodes = (hzVect<_idsNode*>*) mx->m_pData ;
for (n = 0 ; n < pNodes->Count() ; n++)
{
pNode = pNodes->operator[](n) ;
pNode->Show(Z) ;
}
}
else
{
pNode = (_idsNode*) mx->m_pData ;
pNode->Show(Z) ;
}
}
}
}
uint32_t hdbIdset::Fetch (hzVect<uint32_t>& Result, uint32_t nStart, uint32_t nReq) const
{
// Purpose: Fetch ids from the idset into an id buffer
//
// Note that the presumed scenario is that the idset will be a search result, which will not have a vector of nodes. The function is written however, to cope with a vector.
//
// Arguments: 1) Result The id buffer (type uint32_t)
// 2) nStart The start position (ignore all ids before this)
// 3) nReq The number to fetch
//
// Returns: Number of ids fetched
_hzfunc("hdbIdset::Fetch") ;
hzVect<_idsNode*>* pNodes ; // Vector of nodes
_idsNode* pNode ; // Segment (to process)
_idset_seg seg ; // Value segment
xbufIter zi ; // Iterator into low pop idset nodes
uint32_t numNodes ; // Number of nodes
uint32_t nSofar ; // Value so far as a result of iteration
uint32_t nFound ; // Number of ids in EUI
uint32_t n ; // Iterator
//uint32_t nPosn = 0 ; // Position (in the bitmap so far) attained last segment
//uint32_t nMax = 0 ; // Maximum id this segment
//uint32_t segNo ; // Segment number
//uint32_t segAddr ; // Segment address
//uint32_t nSegStart ; // Starting position within a segment
//uint32_t x ; // For iterating resut of segment fetch
//uint32_t v ; // For value
Result.Clear() ;
if (!mx)
return 0 ;
if (!mx->m_pData)
return 0 ;
if (nStart >= mx->m_nPop)
return 0 ;
if (mx->m_bVect)
{
pNodes = (hzVect<_idsNode*>*) mx->m_pData ;
numNodes = pNodes->Count() ;
}
else
{
pNodes = 0 ;
numNodes = 1 ;
}
for (n = 0 ; n < numNodes ; n++)
{
pNode = mx->m_bVect ? pNodes->operator[](n) : (_idsNode*) mx->m_pData ;
zi = pNode->m_Data ;
// While not yet at start
for (;;)
{
nFound = _readEUI(seg, nSofar, zi) ;
}
}
return Result.Count() ;
}
// Operators
hdbIdset& hdbIdset::operator|= (const hdbIdset& op)
{
// Run through the nodes in the operand idset. Iterate each node using _readEUI, and insert everything found into the applicable node in this idset.
//
// Arguments: 1) The supplied bitmap
//
// Returns: Reference to this bitmap instance in all cases.
_hzfunc("hdbIdset::operator&=") ;
hzVect<_idsNode*>* pNodesA ; // Vector of nodes (this)
hzVect<_idsNode*>* pNodesB ; // Vector of nodes (operand)
_idsNode* pNodeA ; // Node (this)
_idsNode* pNodeB ; // Node (operand)
_idset_seg segA ; // Value segment (this)
_idset_seg segB ; // Value segment (operand)
xbufIter ziA ; // EUI data iterator (this)
xbufIter ziB ; // EUI data iterator (operand)
uint32_t numNodes ; // Number of nodes
uint32_t nA ; // Iterator
uint32_t nB ; // Iterator
uint32_t nSofar ; // Value so far as a result of iteration
uint32_t nFound ; // Number of ids in EUI
// Is there anything to AND with?
if (!op.Count())
{ Clear() ; return *this ; }
// If this map is empty an AND operation cannot populate it - just return
if (!Count())
return *this ;
if (op.mx->m_bVect)
{
pNodesB = (hzVect<_idsNode*>*) op.mx->m_pData ;
numNodes = pNodesB->Count() ;
}
else
{
pNodesB = 0 ;
numNodes = 1 ;
}
// Go through operand idset nodes
for (nB = 0 ; nB < numNodes ; nB++)
{
pNodeB = op.mx->m_bVect ? pNodesB->operator[](nB) : (_idsNode*) op.mx->m_pData ;
ziB = pNodeB->m_Data ;
for (; !ziB.eof() ;)
{
nFound = _readEUI(segB, nSofar, ziB) ;
}
}
return *this ;
}
hdbIdset& hdbIdset::operator&= (const hdbIdset& op)
{
// AND this idset with the operand idset. Place the result in this idset.
//
// Argument: The operand idset
//
// Returns: Reference to this idset in all cases.
_hzfunc("hdbIdset::operator&=") ;
hzVect<_idsNode*>* pNodesA ; // Vector of nodes (this)
hzVect<_idsNode*>* pNodesB ; // Vector of nodes (operand)
_idsNode* pNodeA ; // Node (this)
_idsNode* pNodeB ; // Node (operand)
_idset_seg segA ; // Value segment (this)
_idset_seg segB ; // Value segment (operand)
xbufIter ziA ; // EUI data iterator (this)
xbufIter ziB ; // EUI data iterator (operand)
uint32_t numNodes ; // Number of nodes
uint32_t nA ; // Iterator
uint32_t nB ; // Iterator
uint32_t nSofar ; // Value so far as a result of iteration
uint32_t nFound ; // Number of ids in EUI
// Is there anything to AND with?
if (!op.Count())
{ Clear() ; return *this ; }
// If this map is empty an AND operation cannot populate it - just return
if (!Count())
return *this ;
if (op.mx->m_bVect)
{
pNodesB = (hzVect<_idsNode*>*) op.mx->m_pData ;
numNodes = pNodesB->Count() ;
}
else
{
pNodesB = 0 ;
numNodes = 1 ;
}
// Go through operand idset nodes
for (nB = 0 ; nB < numNodes ; nB++)
{
pNodeB = op.mx->m_bVect ? pNodesB->operator[](nB) : (_idsNode*) op.mx->m_pData ;
ziB = pNodeB->m_Data ;
for (; !ziB.eof() ;)
{
nFound = _readEUI(segB, nSofar, ziB) ;
}
}
return *this ;
}