//
// File: hdbIndex.cpp
//
// Legal Notice: This file is part of the HadronZoo C++ Class Library. Copyright 2025 HadronZoo Project (http://www.hadronzoo.com)
//
// The HadronZoo C++ Class Library is free software: You can redistribute it, and/or modify it under the terms of the GNU Lesser General Public License, as published by the Free
// Software Foundation, either version 3 of the License, or any later version.
//
// The HadronZoo C++ Class Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
// A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License along with the HadronZoo C++ Class Library. If not, see http://www.gnu.org/licenses.
//
//
// This file impliments the following classes which are part of the HadronZoo Database Suite
//
// 1) _hz_sqle_expr For imlimentation of searches based on SQL-esce
// 2) hdbIndexEnum Set of bitmaps, one per enum value
// 3) hdbIndexUkey
// 4) hdbIndexText
//
#include <iostream>
#include <fstream>
#include <cstdio>
#include <unistd.h>
#include <stdlib.h>
#include <fcntl.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "hzBasedefs.h"
#include "hzString.h"
#include "hzChars.h"
#include "hzChain.h"
#include "hzDate.h"
#include "hzTextproc.h"
#include "hzTokens.h"
#include "hzCodec.h"
#include "hzDocument.h"
#include "hzDirectory.h"
#include "hzDatabase.h"
#include "hzProcess.h"
using namespace std ;
/*
** SECTION 1: SQL-esce
**
** SQL-esce is implimented using four classes as follows:-
**
** 1) _hz_sqle_term - This is the base class for a SQL-esce term allowing a term to be of two types as follows:-
**
** a) _hz_sqle_unit - This is the SQL-esce term of the form member-condOp-value together with the hdbIdset needed to stored the evaluation result.
** b) _hz_sqle_form - (formula) This comprises a boolean operator and a pair of _hz_sqle_unit or _hz_sqle_form operands - i.e. the remainder of the expression.
**
** 2) _hz_sql_expr - The root of the expression.
*/
enum hzVconOp
{
// Category: Expression
//
// Binary ID-set operators
BIN_OP_SET_NULL, // No operation
BIN_OP_SET_ASSIGN, // =
BIN_OP_SET_EQUAL, // ==
BIN_OP_SET_PLUS, // +
BIN_OP_SET_MINUS, // -
BIN_OP_SET_AND, // &&
BIN_OP_SET_OR // ||
} ;
class _hz_sqle_term
{
// Category: Expression
//
// Base class for SQL-esce terms (see synopsis 'SQL-esce')
public:
virtual hzEcode SetUop (bool bNot) = 0 ;
virtual hzEcode Evaluate (hdbIdset& Result, hdbIndexText* pIndex) = 0 ;
} ;
class _hz_sqle_unit : public _hz_sqle_term
{
// Category: Expression
//
// A single SQL-esce term (see synopsis 'SQL-esce')
protected:
hzString m_Key ; // Word or value with which set is associated
bool m_bNot ; // Apply the ! unary operator when evaluating
public:
_hz_sqle_unit (void)
{
m_bNot = false ;
}
~_hz_sqle_unit (void)
{
}
// Set the key
void SetValue (const hzString& s) { m_Key = s ; }
void SetValue (const char* s) { m_Key = s ; }
// Get the key
hzString& Key (void) { return m_Key ; }
// Set unary op (if any)
hzEcode SetUop (bool bNot) { m_bNot = bNot ; return E_OK ; }
// Evaluate (search for key on index)
hzEcode Evaluate (hdbIdset& Result, hdbIndexText* pIndex) ;
//void Show (hzLogger& xlog) ;
} ;
/*
** The hzIdsetExp class provides the method of parenthesis
*/
class _hz_sqle_form : public _hz_sqle_term
{
// Category: Expression
//
// A single SQL-esce expression of the form term-boolean_op-remainder_of_expression (see synopsis 'SQL-esce')
protected:
_hz_sqle_term* m_pA ; // First term in SQL-esce expression
_hz_sqle_term* m_pB ; // Second term in SQL-esce expression
hzVconOp m_eBinary ; // Binary operator to apply to the two terms
bool m_bNot ; // Flag to direct negation of the result
public:
_hz_sqle_form (void)
{
m_pA = 0 ;
m_pB = 0 ;
m_eBinary = BIN_OP_SET_NULL ;
m_bNot = false ;
}
~_hz_sqle_form (void)
{
}
hzEcode SetUop (bool bNot) { m_bNot = bNot ; return E_OK ; }
hzEcode AddOperand (_hz_sqle_term* pOperand)
{
// Add operand to the SQL term
if (!m_pA)
{ m_pA = pOperand ; return E_OK ; }
if (!m_pB)
{ m_pB = pOperand ; return E_OK ; }
return E_DUPLICATE ;
}
hzEcode SetBop (hzVconOp eOperator)
{
// Set SQL term-pair operator
m_eBinary = eOperator ;
return E_OK ;
}
hzEcode Evaluate (hdbIdset& Result, hdbIndexText* pIndex) ;
} ;
class _hz_sqle_expr
{
// Category: Expression
//
// This is the class which holds and parses the expression part of the SQL-esce statement (see synopsis 'SQL-esce')
protected:
hzVect<hzToken> m_Tokens ; // Tokens of the SQL-esce expression
_hz_sqle_term* m_pRoot ; // Root term of the SQL-esce expression
uint32_t m_tokIter ; // Token iterator
uint32_t m_nParLevel ; // Current tree level
_hz_sqle_term* _proctoks (void) ;
public:
hdbIndexText* m_pIndex ; // Freetext index
_hz_sqle_expr (void)
{
m_pRoot = 0 ;
m_tokIter = 0 ;
m_nParLevel = 0 ;
}
~_hz_sqle_expr (void) {}
hzEcode Evaluate (hdbIdset& result) ;
hzEcode Parse (hdbIdset& result, const hzString& pExpression) ;
} ;
/*
** _hz_sqle_unit Functions
*/
hzEcode _hz_sqle_unit::Evaluate (hdbIdset& Result, hdbIndexText* pIndex)
{
// Evaluate (search for) this single SQL-Esce term within the supplied index and place the result in the supplied bitmap.
//
// Arguments: 1) Result The bitmap (of document ids) to be populated by the search operation.
// 2) pIndex The index to search for this term in.
//
// Returns: E_NODATA No search term available
// E_ARGUMENT No index pointer supplied
// E_CORRUPT An error occurred during searching
// E_OK The operation was successfule (even if nothing was found)
_hzfunc("_hz_sqle_unit::Evaluate\n`") ;
hzEcode rc = E_OK ; // Return code
if (!m_Key.Length())
return hzerr(E_NODATA, "Empty _cant") ;
if (!pIndex)
hzexit(E_ARGUMENT, "No index supplied") ;
rc = pIndex->Select(Result, m_Key) ;
if (rc != E_OK)
return hzerr(E_CORRUPT, "Evaluating _cant [%s] failed in select\n", *m_Key) ;
threadLog("Eval of %s - finds %d records\n", *m_Key, Result.Count()) ;
return rc ;
}
hzEcode _hz_sqle_form::Evaluate (hdbIdset& Result, hdbIndexText* pIndex)
{
// Evaluate (search for) this composite SQL-Esce term within the supplied index and place the result in the supplied bitmap.
//
// Arguments: 1) The bitmap (of document ids) to be populated by the search operation.
// 2) The index to search for this term in.
//
// Returns: E_NODATA No search term available
// E_ARGUMENT No index pointer supplied
// E_CORRUPT An error occurred during searching
// E_OK The operation was successfule (even if nothing was found)
_hzfunc("_hz_sqle_form::Evaluate") ;
hdbIdset B ; // Working intermeadiate bitmap
hzEcode rc ; // Return code
Result.Clear() ;
if (!pIndex)
hzexit(E_CORRUPT, "No index supplied") ;
threadLog("Applying formula\n") ;
switch (m_eBinary)
{
case BIN_OP_SET_AND:
threadLog("Applying formula (AND) - ") ;
rc = m_pA->Evaluate(Result, pIndex) ;
if (rc != E_OK)
return hzerr(rc, "Case 1 (AND) Corruption in index") ;
if (Result.Count() > 0)
{
threadLog("%d recs with ", Result.Count()) ;
rc = m_pB->Evaluate(B, pIndex) ;
if (rc != E_OK)
return hzerr(rc, "Case 2 (AND) Corruption in index") ;
threadLog("%d recs ", B.Count()) ;
Result &= B ;
threadLog(" -> %d records total\n", Result.Count()) ;
}
break ;
case BIN_OP_SET_OR:
threadLog("Applying formula (OR) - ") ;
rc = m_pA->Evaluate(Result, pIndex) ;
if (rc != E_OK)
return hzerr(rc, "Case 1 (OR) eveluation failed") ;
threadLog("%d recs with ", Result.Count()) ;
m_pB->Evaluate(B, pIndex) ;
if (rc != E_OK)
return hzerr(rc, "Case 2 (OR) evaluation failed") ;
threadLog("%d recs ", B.Count()) ;
Result |= B ;
threadLog(" -> %d records total\n", Result.Count()) ;
break ;
}
threadLog("Found %d records in %d nodes for formula\n", Result.Count(), Result.NoNodes()) ;
return E_OK ;
}
/*
** _hz_sqle_expr Functions
*/
hzEcode _hz_sqle_expr::Evaluate (hdbIdset& result)
{
// Evaluate this SQL-Esce expression within the supplied index and place the result in the supplied bitmap.
//
// Arguments: 1) The bitmap (of document ids) to be populated by the search operation.
//
// Returns: E_NODATA No search term available
// E_ARGUMENT No index pointer supplied
// E_CORRUPT An error occurred during searching
// E_OK The operation was successfule (even if nothing was found)
_hzfunc("_hz_sqle_expr::Evaluate") ;
// Result.Clear() ;
if (!m_pIndex)
hzexit(E_CORRUPT, "No index supplied") ;
if (!m_pRoot)
{
threadLog("No root!\n") ;
return E_OK ;
}
threadLog("Calling Eval\n") ;
return m_pRoot->Evaluate(result, m_pIndex) ;
}
_hz_sqle_term* _hz_sqle_expr::_proctoks (void)
{
// Recursively convert text tokens into tree of _hz_sqle_term instances. This is a support function to Parse()
//
// Arguments: None
//
// Returns: Pointer to a valid SQL term if tokens remain in the expression
// NULL Otherwise
_hzfunc("_hz_sqle_expr::_proctoks") ;
_hz_sqle_form* pFormula = 0 ; // Pointer to rest of expression
_hz_sqle_unit* pConstant = 0 ; // Pointer to a constant (if applicaple)
_hz_sqle_term* pOperand1 = 0 ; // Pointer to 1st operand
_hz_sqle_term* pOperand2 = 0 ; // Pointer to 2nd operand
hzString tval ; // Temp string - token value
hzVconOp eOp = BIN_OP_SET_NULL ; // Applicable binary operator
//bool bNot = false ; // Negator
threadLog("Now on level %d\n", m_nParLevel) ;
if (m_tokIter >= m_Tokens.Count())
{
threadLog("No more tokens - returning null\n") ;
return 0 ;
}
tval = m_Tokens[m_tokIter].Value() ;
threadLog("token is %s\n", *tval) ;
/*
** Occupy first operand
*/
// Unary operators are permitted while expecting operand
if (tval == "!")
{
threadLog("Setting unary for first operand\n") ;
//bNot = true ;
m_tokIter++ ;
if (m_tokIter >= m_Tokens.Count())
{
hzerr(E_SYNTAX, "Expected an operand or '(' to follow unary") ;
return 0 ;
}
tval = m_Tokens[m_tokIter].Value() ;
}
// Binary operators are not permitted here though
if (tval == "&&" || tval == "||")
{
hzerr(E_SYNTAX, "Expected an operand or '('. Got an operator") ;
return 0 ;
}
if (tval == "(")
{
// The operand can be a formula, recurse if it is
m_nParLevel++ ;
m_tokIter++ ;
pOperand1 = _proctoks() ;
tval = m_Tokens[m_tokIter].Value() ;
}
else
{
// Token is not a unary or an open bracket. We assume token is a const
threadLog("Making a const of %s\n", *tval) ;
pConstant = new _hz_sqle_unit() ;
pConstant->SetValue(tval) ;
pOperand1 = pConstant ;
m_tokIter++ ;
tval = m_Tokens[m_tokIter].Value() ;
}
/*
** Obtain the operator
*/
if (m_tokIter < m_Tokens.Count())
{
// we now expect an binary operator or a terminating )
if (tval == ")")
{
//tm.Advance() ;
m_tokIter++ ;
return pOperand1 ;
}
// Test for operator
if (tval == "&&" || tval == "&")
{
eOp = BIN_OP_SET_AND ;
m_tokIter++ ;
tval = m_Tokens[m_tokIter].Value() ;
}
else if (tval == "||" || tval == "|")
{
eOp = BIN_OP_SET_OR ;
m_tokIter++ ;
tval = m_Tokens[m_tokIter].Value() ;
}
else if (tval == "(")
{
hzerr(E_SYNTAX, "Line %d: Expected an operator", m_Tokens[m_tokIter].LineNo()) ;
return 0 ;
}
else
{
// Token is not an operator and not an open or a close.
// We assume it is another _cant and thus there is
// an implied AND operator.
threadLog("Asserting operator as AND\n") ;
eOp = BIN_OP_SET_AND ;
}
}
/*
** Recurse to get second operand
*/
if (tval)
{
m_nParLevel++ ;
pOperand2 = _proctoks() ;
m_nParLevel-- ;
}
/*
** Decide if we return a const or a formula
*/
if (!pOperand1)
{
threadLog("returning null (no first operand)\n") ;
return 0 ;
}
if (eOp == BIN_OP_SET_NULL)
{
threadLog("Returning single operand\n") ;
return pOperand1 ;
}
if (!pOperand2)
{
// report syntax error
threadLog("Returning null (no second operand)\n") ;
return 0 ;
}
// Allocate a formula node
threadLog("Returning double operand\n") ;
pFormula = new _hz_sqle_form() ;
pFormula->AddOperand(pOperand1) ;
pFormula->AddOperand(pOperand2) ;
pFormula->SetBop(eOp) ;
return pFormula ;
}
hzEcode _hz_sqle_expr::Parse (hdbIdset& result, const hzString& srchExp)
{
// Parse the supplied expression. This is first a matter of tokenization, then a call to the root of the expression by the recursive _proctoks()
// (process tokens) function. After parsing the expression is ready for evaluation.
//
// Arguments: 1) result The result bitmap
// 2) srchExp The search expression
//
// Returns: E_ARGUMENT If no expression was supplied
// E_FORMAT If no tokens were identified in the supplied expression
// E_OK If no errors occured
_hzfunc("_hz_sqle_expr::Parse") ;
// tokenize the expression
if (!srchExp)
return E_ARGUMENT ;
TokenizeString(m_Tokens, *srchExp, TOK_MO_BOOL) ;
if (m_Tokens.Count() == 0)
{
threadLog("No tokens found in expression\n") ;
return E_FORMAT ;
}
threadLog("Parsing %d tokens\n", m_Tokens.Count()) ;
m_pRoot = _proctoks() ;
if (!m_pRoot)
return E_FORMAT ;
//return Evaluate(result) ;
return m_pRoot ? E_OK : E_FORMAT ;
}
/*
** SECTION 2: hdbIndexEnum Functions
*/
void hdbIndexEnum::Halt (void)
{
// Close down the enumerated index. De-allocate the bitmaps and clear the bitmap array.
//
// Arguments: None
// Returns: None
hdbIdset* pS ; // Allocated bitmap
uint32_t n ; // Allowed value iterator
for (n = 0 ; n < m_Maps.Count() ; n++)
{
pS = m_Maps.GetObj(n) ;
delete pS ;
}
m_Maps.Clear() ;
}
hzEcode hdbIndexEnum::Insert (uint32_t objId, const hzAtom& Key)
{
// Purpose: Add an object/key combination
//
// Arguments: 1) objId The object id (row number)
// 2) Key The key
//
// Returns: E_RANGE If the supplied value is beyond the supported range of enum values.
// E_OK If the operation was successful.
_hzfunc("hdbIndexEnum::Insert") ;
hdbIdset* pS ; // Applicable bitmap to insert object id
uint32_t val ; // Enum value from supplied atom
hzEcode rc = E_OK ; // Return code
val = (uint32_t) Key ;
if (val <= 0 || val > m_Maps.Count())
return E_RANGE ;
pS = m_Maps[val] ;
if ((rc = pS->Insert(objId)) != E_OK)
return hzerr(rc, "(case key exists) Bitmap could not insert object") ;
return rc ;
}
hzEcode hdbIndexEnum::Delete (uint32_t objId, const hzAtom& Key)
{
// Purpose: Delete an object/key combination
//
// Arguments: 1) objId The object id (row number)
// 2) Key The key
//
// Returns: E_RANGE If the supplied value is beyond the supported range of enum values.
// E_OK If the operation was successful.
hdbIdset* pS ; // Applicable bitmap to delete object id from
uint32_t val ; // Enum value from supplied atom
hzEcode rc = E_OK ; // Return code
val = (uint32_t) Key ;
if (val <= 0 || val > m_Maps.Count())
return E_RANGE ;
pS = m_Maps[val] ;
pS->Delete(objId) ;
// if (!pS->Count())
// m_Maps.Delete(K) ;
return rc ;
}
hzEcode hdbIndexEnum::Select (hdbIdset& Result, const hzAtom& Key)
{
// Purpose: Select into a set, all identifiers matching the key
//
// Arguments: 1) The object id (row number)
// 2) The key
//
// Returns: E_RANGE If the key was not located
// E_OK If the operation was successful.
_hzfunc("hdbIndexEnum::Select") ;
hdbIdset* pS ; // Applicable bitmap for value
uint32_t val ; // Enum value from supplied atom
hzEcode rc = E_OK ; // Return code
Result.Clear() ;
val = (uint32_t) Key ;
// Querry this?
if (val <= 0 || val > m_Maps.Count())
return E_RANGE ;
pS = m_Maps[val] ;
if (!pS)
return hzerr(E_CORRUPT, "Bitmap stated as existing could not be retrieved from the map") ;
Result = *pS ;
if (Result.Count() != pS->Count())
return hzerr(E_CORRUPT, "Index bound bitmap has %d records, copy has %d records", pS->Count(), Result.Count()) ;
return rc ;
}
hzEcode hdbIndexEnum::Dump (const hzString& Filename, bool bFull)
{
// Output list of keys and thier segment numbers together with the segment contents (Ids relative to the segment start)
//
// Arguments: 1) Full path of file to dump to
// 2) Do a full dump (true) or just segments (false)
//
// Returns: E_ARGUMENT If the filename is not supplied
// E_OPENFAIL If the supplied filename cannot be opened
// E_WRITEFAIL If there was a write error
// E_OK If the index was dumped to file
_hzfunc("hdbIndexEnum::Dump") ;
hzVect<uint32_t> Results ; // Results of fetch
ofstream os ; // Output stream
hzChain Z ; // Used to build list of ids
hdbIdset proc ; // Processing bitmap
hdbIdset* pI ; // The Id set assoc with key
uint32_t ev ; // Enum value
uint32_t nRecs ; // Total number of records fetched
uint32_t nFetched = 0 ; // Number of record fetched in call to Fetch()
uint32_t nStart ; // Starter for Fetch
char cvLine [120] ; // For output
if (!Filename)
return hzerr(E_ARGUMENT, "No filename supplied") ;
os.open(*Filename) ;
if (os.fail())
return hzerr(E_OPENFAIL, "Could not open index dump file (%s)", *Filename) ;
os << "Index Dump\n" ;
for (ev = 0 ; ev < m_Maps.Count() ; ev++)
{
pI = m_Maps.GetObj(ev) ;
if (!pI)
{
sprintf(cvLine, "Enum-Val %d (null list)\n", ev) ;
os << cvLine ;
continue ;
}
nRecs = pI->Count() ;
if (!nRecs)
{
sprintf(cvLine, "Enum-Val %d (empty list)\n", ev) ;
os << cvLine ;
continue ;
}
sprintf(cvLine, "Enum-Val %d (%d objects)\n", ev, nRecs) ;
os << cvLine ;
if (!bFull)
continue ;
// Extract ids from the binaries
proc = *pI ;
for (nStart = 0 ; nStart < nRecs ; nStart += 10)
{
// for (nIndex = 0 ; nIndex < 10 ; nIndex++)
// Results[nIndex] = -1 ;
nFetched = proc.Fetch(Results, nStart, 10) ;
if (!nFetched)
break ;
sprintf(cvLine, " %10d %10d %10d %10d %10d %10d %10d %10d %10d %10d\n",
Results[0], Results[1], Results[2], Results[3], Results[4],
Results[5], Results[6], Results[7], Results[8], Results[9]) ;
os << cvLine ;
}
if (os.fail())
{
os.close() ;
hzerr(E_WRITEFAIL, "Could not write to index dump file (%s)", *Filename) ;
return E_WRITEFAIL ;
}
}
os << "Index Dump End\n" ;
os.close() ;
return E_OK ;
}
/*
** hdbIndexUkey Functions
*/
hzEcode hdbIndexUkey::Init (const hdbObjRepos* pRepos, const hzString& mbrName, hdbBasetype dtype)
{
// hdbIndexUkey is implimented as a 1:1 map between unique keys and the id of the data object that has the key. As HDB indexes are applied to data object members, the map must
// use keys of the same data type as the member concerned. hdbIndexUkey initialization is a matter of checking that the data type is compatible with a hdbIndexUkey index, then
// of creating a 1:1 map of the type to object ids (uint32_t).
//
// Arguments: 1) dtype The data type of the member to which the index applies
//
// Returns: E_TYPE If the supplied HadronZoo data type is undefned or cannot be applied to a hdbIndexUkey
// E_OK If the operation was successful
_hzfunc("hdbIndexUkey::Init") ;
hzEcode rc = E_OK ; // Return code
if (m_bInit)
return hzerr(E_SEQUENCE, "%s already initialized", *m_Name) ;
if (!pRepos)
hzexit(E_ARGUMENT, "No repository supplied") ;
//m_pRepos = pRepos ;
m_Name = pRepos->txtName() ;
m_Name += "::" ;
m_Name += mbrName ;
switch (dtype)
{
//case BASETYPE_DIGEST: m_keys.pMd5 = new hzMapS <hzMD5,uint32_t> ; break ;
case BASETYPE_STRING: m_keys.pStr = new hzMapS <hzString,uint32_t> ; break ;
case BASETYPE_DOMAIN: m_keys.pDom = new hzMapS <hzDomain,uint32_t> ; break ;
case BASETYPE_EMADDR: m_keys.pEma = new hzMapS <hzEmaddr,uint32_t> ; break ;
case BASETYPE_URL: m_keys.pUrl = new hzMapS <hzUrl,uint32_t> ; break ;
case BASETYPE_IPADDR: m_keys.pIpa = new hzMapS <hzIpaddr,uint32_t> ; break ;
case BASETYPE_TIME: m_keys.pTime = new hzMapS <hzTime,uint32_t> ; break ;
case BASETYPE_SDATE: m_keys.pSD = new hzMapS <hzSDate,uint32_t> ; break ;
case BASETYPE_XDATE: m_keys.pXD = new hzMapS <hzXDate,uint32_t> ; break ;
case BASETYPE_INT64: m_keys.pSI64 = new hzMapS <int64_t,uint32_t> ; break ;
case BASETYPE_UINT64: m_keys.pUI64 = new hzMapS <uint64_t,uint32_t> ; break ;
case BASETYPE_INT32: m_keys.pSI32 = new hzMapS <int32_t,uint32_t> ; break ;
case BASETYPE_UINT32: m_keys.pUI32 = new hzMapS <uint32_t,uint32_t> ; break ;
default:
rc = E_TYPE ;
break ;
}
if (rc == E_OK)
{
m_eBasetype = dtype ;
m_bInit = true ;
}
return rc ;
}
void hdbIndexUkey::Halt (void)
{
// Save index to disk and close files
//
// Arguments: None
// Returns: None
// STUB
}
hzEcode hdbIndexUkey::Insert (const hzAtom& atom, uint32_t objId)
{
// Insert an atomic-value/object-id pair into the unique key index.
//
// Arguments: 1) atom The atomic value
// 2) objId The object identifier
//
// Returns: E_TYPE If the atomic value is not of the expected data type
// E_NODATA If the atomic value is not set (see note)
// E_OK If the operation was successful
//
// Note that as hdbIndexUkey can only be applied to data members with a minimun and maximum population of 1, the member cannot be NULL and therefore a NULL
// value to insert is an error.
_hzfunc("hdbIndexUkey::Insert") ;
hzChain Z ; // For writing delta
hzString str ; // String value if applicable
hzDomain dom ; // Domain name if applicable
hzEmaddr ema ; // Email address if applicable
hzUrl url ; // URL
hzEcode rc = E_OK ; // Return code
if (!m_bInit) return E_NOINIT ;
if (atom.IsNull()) return E_NODATA ;
if (atom.Type() != m_eBasetype)
return hzerr(E_TYPE, "Index type %s - supplied value type %s", Basetype2Txt(m_eBasetype), Basetype2Txt(atom.Type())) ;
switch (m_eBasetype)
{
case BASETYPE_DOMAIN: _hzGlobal_setDomains.Insert(atom.Domain()) ;
dom = _hzGlobal_setDomains[atom.Domain()] ;
rc = m_keys.pDom->Insert(dom, objId) ;
break ;
case BASETYPE_EMADDR: _hzGlobal_setEmaddrs.Insert(atom.Emaddr()) ;
ema = _hzGlobal_setEmaddrs[atom.Emaddr()] ;
rc = m_keys.pEma->Insert(ema, objId) ;
break ;
case BASETYPE_STRING: _hzGlobal_setStrings.Insert(atom.Str()) ;
str = _hzGlobal_setStrings[atom.Str()] ;
rc = m_keys.pStr->Insert(str, objId) ;
break ;
case BASETYPE_URL: rc = m_keys.pUrl->Insert(atom.Url(), objId) ; break ;
case BASETYPE_XDATE: rc = m_keys.pXD->Insert(atom.XDate(), objId) ; break ;
case BASETYPE_INT64: rc = m_keys.pSI64->Insert(atom.Int64(), objId) ; break ;
case BASETYPE_UINT64: rc = m_keys.pUI64->Insert(atom.Unt64(), objId) ; break ;
case BASETYPE_IPADDR: rc = m_keys.pIpa->Insert(atom.Ipaddr(), objId) ; break ;
case BASETYPE_TIME: rc = m_keys.pTime->Insert(atom.Time(), objId) ; break ;
case BASETYPE_SDATE: rc = m_keys.pSD->Insert(atom.SDate(), objId) ; break ;
case BASETYPE_INT32: rc = m_keys.pSI32->Insert(atom.Int32(), objId) ; break ;
case BASETYPE_UINT32: rc = m_keys.pUI32->Insert(atom.Unt32(), objId) ; break ;
}
if (rc == E_OK)
{
Z.Printf("@%u:%s\n", objId, atom.Show()) ;
//m_osDelta << Z ;
}
return rc ;
}
hzEcode hdbIndexUkey::Delete (const hzAtom& key)
{
// Remove the key/object pair named by the supplied key, from the index
//
// Arguments: 1) atom Reference to atom with the lookup value.
//
// Returns: E_NODATA If the supplied atom has no value
// E_TYPE If the atom data type does not match that of the member to which this index applies
// E_NOTFOUND If the value of the atom does not identify an object
// E_OK If the value of the atom does identify an object
_hzfunc("hdbIndexUkey::Delete") ;
hzString str ; // String
hzDomain dom ; // Domain name if applicable
hzEmaddr ema ; // Email addr if applicable
hzUrl url ; // URL if applicable
hzEcode rc = E_OK ; // Return code
if (!m_bInit) return E_NOINIT ;
if (key.IsNull()) return E_NODATA ;
if (key.Type() != m_eBasetype)
return hzerr(E_TYPE, "Index type %s - supplied value type %s", Basetype2Txt(m_eBasetype), Basetype2Txt(key.Type())) ;
// Do lookup based on type
switch (m_eBasetype)
{
case BASETYPE_DOMAIN: dom = key.Domain() ;
if (dom)
{
_hzGlobal_setDomains[dom].Clear() ;
rc = m_keys.pDom->Delete(dom) ;
}
break ;
case BASETYPE_EMADDR: ema = key.Emaddr() ;
if (ema)
{
_hzGlobal_setEmaddrs[ema].Clear() ;
rc = m_keys.pEma->Delete(ema) ;
}
break ;
case BASETYPE_STRING: str = key.Str() ;
if (str)
{
_hzGlobal_setStrings[str].Clear() ;
rc = m_keys.pStr->Delete(str) ;
}
break ;
case BASETYPE_URL: url = key.Url() ;
if (url)
{
//_hzGlobal_setStrings.Delete(str) ;
rc = m_keys.pUrl->Delete(url) ;
}
break ;
case BASETYPE_DOUBLE:
case BASETYPE_XDATE:
case BASETYPE_INT64:
case BASETYPE_UINT64: rc = m_keys.pUI64->Delete(key.Unt64()) ; break ;
case BASETYPE_IPADDR:
case BASETYPE_TIME:
case BASETYPE_SDATE:
case BASETYPE_INT32:
case BASETYPE_UINT32: rc = m_keys.pUI32->Delete(key.Unt32()) ; break ;
} ;
return rc ;
}
hzEcode hdbIndexUkey::Select (uint32_t& Result, const hzAtom& key)
{
// Find the single object matching the supplied key - if it exists.
//
// Arguments: 1) objId Reference to object id, set by this function
// 2) pAtom Pointer to atom with the lookup value
//
// Returns: E_NOINIT If the index is not initialized
// E_NODATA If no key is supplied
// E_TYPE If the atom data type does not match that of the member to which this index applies
// E_OK If no errors occured
_hzfunc("hdbIndexUkey::Select") ;
hzString str ; // String value
hzDomain dom ; // Domain name if applicable
hzEmaddr ema ; // Email addr if applicable
hzUrl url ; // URL if applicable
hzEcode rc = E_OK ; // Return code
Result = 0 ;
if (!m_bInit)
return E_NOINIT ;
if (key.IsNull())
return hzerr(E_NODATA, "No key supplied") ;
if (key.Type() != m_eBasetype)
return hzerr(E_TYPE, "Index type %s - supplied value type %s", Basetype2Txt(m_eBasetype), Basetype2Txt(key.Type())) ;
threadLog("Selecting [%s, %s]\n", key.Show(), *key.Str()) ;
switch (m_eBasetype)
{
case BASETYPE_DOMAIN: dom = _hzGlobal_setDomains[key.Domain()] ;
if (dom)
{
if (m_keys.pDom->Exists(dom))
Result = m_keys.pDom->operator[](dom) ;
}
break ;
case BASETYPE_EMADDR: ema = _hzGlobal_setEmaddrs[key.Emaddr()] ;
if (ema)
{
if (m_keys.pEma->Exists(ema))
Result = m_keys.pEma->operator[](ema) ;
}
break ;
case BASETYPE_URL: url = key.Url() ;
//tmpStr = _hzGlobal_setStrings[key.Url().Whole()] ;
if (url)
{
if (m_keys.pUrl->Exists(url))
Result = m_keys.pUrl->operator[](url) ;
}
break ;
case BASETYPE_STRING: str = _hzGlobal_setStrings[key.Str()] ;
threadLog("Selecting [%s]\n", *str) ;
if (!str)
str = key.Str() ;
if (str)
{
if (m_keys.pStr->Exists(str))
{
Result = m_keys.pStr->operator[](str) ;
threadLog("Selected %u\n", Result) ;
}
else
threadLog("Select failed\n") ;
}
break ;
case BASETYPE_DOUBLE:
case BASETYPE_XDATE:
case BASETYPE_INT64:
case BASETYPE_UINT64:
if (m_keys.pUI64->Exists(key.Unt64()))
Result = m_keys.pUI64->operator[](key.Unt64()) ;
break ;
case BASETYPE_IPADDR:
case BASETYPE_TIME:
case BASETYPE_SDATE:
case BASETYPE_INT32:
case BASETYPE_UINT32: if (m_keys.pUI32->Exists(key.Unt32()))
Result = m_keys.pUI32->operator[](key.Unt32()) ;
break ;
} ;
return rc ;
}
/*
** SECTION 4: hdbIndexText Functions
*/
hzEcode hdbIndexText::Init (const hzString& name, const hzString& opdir, const hzString& backup, uint32_t cacheMode)
{
// Initialize the free text index with a name, an operational directory and an optional backup directory. These parameters initialize the
// underlying hzIsam.
//
_hzfunc("hdbIndexText::Init") ;
hzEcode rc = E_OK ; // Return code
//rc = m_Isam.Init(name, opdir, backup, cacheMode) ;
if (rc != E_OK)
return hzerr(rc, "Failed on account of ISAM init") ;
return rc ;
}
hzEcode hdbIndexText::Halt (void)
{
// Halt the operation of the free text index. This will halt the underlying hzIsam.
//m_Isam.Halt() ;
return E_OK ;
}
hzEcode hdbIndexText::Insert (const hzString& word, uint32_t docId)
{
// Locate or insert the word and its associated bitmap, assign object id in the bitmap.
_hzfunc("hdbIndexText::Insert(int)") ;
hdbIdset bm ; // The bitmap associated with the word
hzString lcword ; // The word but all in lower case
hzEcode rc = E_OK ; // Error code
lcword = word ;
lcword.ToLower() ;
if (m_Keys.Exists(lcword))
m_Keys[lcword].Insert(docId) ;
else
{
bm.Insert(docId) ;
rc = m_Keys.Insert(lcword, bm) ;
if (rc != E_OK)
hzerr(rc, "Failed to insert doc_id of %d into bitmap. Error=%s\n", docId, Err2Txt(rc)) ;
}
return rc ;
}
#if 0
hzEcode hdbIndexText::Insert (const hzString& word, const hzSet<uint32_t>& idset)
hzEcode hdbIndexText::InsSeg (const hzString& word, const hzBitseg& seg, uint32_t segNo)
#endif
hzEcode hdbIndexText::Delete (const hzString& word, uint32_t docId)
{
// Locate the bitmap for the word and delete the object id from it.
hdbIdset bm ; // The bitmap associated with the word
hzString lcword ; // The word but all in lower case
lcword = word ;
lcword.ToLower() ;
if (m_Keys.Exists(lcword))
{
bm = m_Keys[lcword] ;
bm.Delete(docId) ;
return E_OK ;
}
return E_NOTFOUND ;
}
#if 0
hzEcode hdbIndexText::Delete (const hzString& word, const hzSet<uint32_t>& idset)
hzEcode hdbIndexText::DelSeg (const hzString& word, uint32_t segNo)
#endif
hzEcode hdbIndexText::Clear (void)
{
// Clear all contents of the free text index by clearing the ISAM and the map of keys
//m_Isam.Clear() ;
m_Keys.Clear() ;
return E_OK ;
}
hzEcode hdbIndexText::Select (hdbIdset& Result, const hzString& word)
{
// Locate the bitmap for the word and assign it to the result
hzString lcword ; // The word but all in lower case
lcword = word ;
lcword.ToLower() ;
Result.Clear() ;
if (m_Keys.Exists(lcword))
Result = m_Keys[lcword] ;
return E_OK ;
}
hzEcode hdbIndexText::Eval (hdbIdset& result, const hzString& criteria)
{
// Perform a search on the freetext index and place the resulting set of document ids into the supplied bitmap (arg 1). The supplied criteria
// (arg 2) must comprise at least one whole word and may comprise a boolean expression.
_hz_sqle_expr exp ; // Working SQL expression
hzEcode rc ; // Return code
rc = exp.Parse(result, criteria) ;
if (rc != E_OK)
return rc ;
exp.m_pIndex = this ;
return exp.Evaluate(result) ;
}
hzEcode hdbIndexText::Export (const hzString& filepath, bool bFull)
{
// Output list of words and associated idsets to the supplied filepath. The output is human radable for diagnostic purposes
//
// Arguments: 1) Full path of file to dump to
// 2) Do a full dump (true) or just segments (false)
//
// Returns: E_ARGUMENT If the filename is not supplied
// E_OPENFAIL If the supplied filename cannot be opened
// E_WRITEFAIL If there was a write error
// E_OK If the index was dumped to file
_hzfunc("hdbIndexText::Export") ;
hzVect<uint32_t> res ; // Results of bitmap Fetch
ofstream os ; // Output file
hzChain Z ; // For constructing bitmap export
hdbIdset bm ; // Current word bitmap
hdbIdset S ; // Current segment
hzString word ; // Current word
uint32_t nIndex ; // Word/bitmap iterator
uint32_t nStart ; // Starting position for bitmap Fetch
uint32_t nFetched ; // Number of ids fetched
uint32_t nPosn ; // Fetch result iterator
uint32_t nSegs = 0 ; // Segment counter
uint32_t nInst = 0 ; // Incidence counter
if (!filepath)
return hzerr(E_ARGUMENT, "No pathname for Index Export") ;
os.open(filepath) ;
if (os.fail())
return hzerr(E_OPENFAIL, "Cannot open %s", *filepath) ;
threadLog("Index has:-\n") ;
for (nIndex = 0 ; nIndex < m_Keys.Count() ; nIndex++)
{
word = m_Keys.GetKey(nIndex) ;
bm = m_Keys.GetObj(nIndex) ;
nSegs += bm.NoNodes() ;
nInst += bm.Count() ;
Z.Printf("%u %s: s=%u c=%u\n", nIndex, *word, bm.NoNodes(), bm.Count()) ;
if (bFull)
{
for (nStart = 0 ; nStart < bm.Count() ; nStart += 20)
{
nFetched = bm.Fetch(res, nPosn, 20) ;
for (nPosn = 0 ; nPosn < nFetched ; nPosn++)
Z.Printf("\t%u", res[nPosn]) ;
}
}
os << Z ;
Z.Clear() ;
if (os.fail())
{
os.close() ;
hzerr(E_WRITEFAIL, "Write fail to %s", *filepath) ;
return E_WRITEFAIL ;
}
}
Z.Printf("\tWords (maps): %d\n", m_Keys.Count()) ;
Z.Printf("\tSegments: %d\n", nSegs) ;
Z.Printf("\tInstances: %d\n", nInst) ;
os << Z ;
os.close() ;
Z.Clear() ;
return E_OK ;
}