// // File: hzDatabase.h // // Legal Notice: This file is part of the HadronZoo C++ Class Library. // // Copyright 2025 HadronZoo Project (http://www.hadronzoo.com) // // The HadronZoo C++ Class Library is free software: You can redistribute it, and/or modify it under the terms of the GNU Lesser General Public License, as published by the Free // Software Foundation, either version 3 of the License, or any later version. // // The HadronZoo C++ Class Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR // A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public License along with the HadronZoo C++ Class Library. If not, see http://www.gnu.org/licenses. //
#ifndef hzDatabase_h #define hzDatabase_h
/* ** Other includes */
#include "hzBasedefs.h" #include "hzChain.h" #include "hzXbuf.h" #include "hzDate.h" #include "hzIpaddr.h" #include "hzEmaddr.h" #include "hzUrl.h" #include "hzCodec.h" #include "hzMimetype.h" #include "hzDocument.h" #include "hzProcess.h" #include "hzTmplArray.h" #include "hzTmplVect.h" #include "hzTmplMapS.h" #include "hzTmplMapM.h" #include "hzTmplSet.h"
/* ** Definitions */
#define HZ_MAXOBJECT 16000000 // Largets possible binary object
/* ** SECTION 1: Classes for data structure/definition. */
enum hdbBasetype { // Category: Data Definition // // ENUM of legal HadronZoo data types and data type groupings. // // Note that for groups 1 and 2 below, the hdbBasetype is fully qualified. Groups 3, 4 and 5 are general data types and require further qualification, i.e. must be named. BASETYPE_UNDEF = 0, // Null data type, not grouped
// Group 1: Fundamental C++ types (fixed size) BASETYPE_CPP_UNDEF = 0x0100, // C++ type but not yet defined BASETYPE_DIGEST = 0x0101, // 128-bit MD5 hash value BASETYPE_DOUBLE = 0x0102, // 64 bit floating point value BASETYPE_INT64 = 0x0103, // 64-bit Signed integer BASETYPE_INT32 = 0x0104, // 32-bit Signed integer BASETYPE_INT16 = 0x0105, // 16-bit Signed integer BASETYPE_BYTE = 0x0106, // 8-bit Signed integer BASETYPE_UINT64 = 0x0107, // 64-bit Positive integer BASETYPE_UINT32 = 0x0108, // 32-bit Positive integer BASETYPE_UINT16 = 0x0109, // 16-bit Positive integer BASETYPE_UBYTE = 0x010A, // 8-bit Positive integer BASETYPE_BOOL = 0x010B, // Either true or false, cannot be empty or have mutiple values
// Group 2: HadronZoo Defined types (fixed size) BASETYPE_HZO_UNDEF = 0x0200, // HadronZoo inbuilt type but not yet defined BASETYPE_TBOOL = 0x0201, // Either true, false or don't know (empty). Cannot have mutiple values BASETYPE_DOMAIN = 0x0202, // Internet domain BASETYPE_EMADDR = 0x0203, // Email Address BASETYPE_URL = 0x0204, // Universal Resource Locator BASETYPE_IPADDR = 0x0205, // IP Address BASETYPE_TIME = 0x0206, // No of seconds since midnight (4 bytes) BASETYPE_SDATE = 0x0207, // No of days since Jan 1st year 0000 BASETYPE_XDATE = 0x0208, // Full date & time BASETYPE_PHONE = 0x0209, // Internationl phone number BASETYPE_STRING = 0x020A, // Any string, treated as a single value BASETYPE_TEXT = 0x020B, // Any string, treated as a series of words and indexable. Cannot have multiple values. BASETYPE_BINARY = 0x020C, // Binary object, assummed to be un-indexable (e.g. image). No multiple values. Disk only. BASETYPE_TXTDOC = 0x020D, // Document from which text can be extracted and so indexed. No multiple values. Disk only.
// Group 3: Application defined data enumerations BASETYPE_ENUM = 0x1001, // Data enumeration
// Group 4: Application defined special text types BASETYPE_APPDEF = 0x2000, // String subject to special interpretation by the application (e.g. serial numbers).
// Group 5: Application defined data class BASETYPE_CLASS = 0x4000, // subclass (instances stored as part of host class instance). } ;
hdbBasetype Str2Basetype (const hzString& type) ; const char* Basetype2Txt (hdbBasetype dt) ;
class hdbDatatype { // Category: Data Definition // // hdbDatatype unifies five quite different groups of data types that members of data classes can assume. There is a derivative for each group as follows:- // // 1) hdbCpptype C++ fundamental data types // 2) hdbHzotype HadronZoo in-built data types // 3) hdbRgxtype Application specific data types validated by a regular expression // 4) hdbEnum Application specific data enumeration (validation list) // 5) hdbClass Application specific data class // // The hdbDatatype ensures each data type has a name and a base data type. All data types known to an application are held in _hzGlobal_Datatypes, declared // as a hzMapS<hzString,hdbDatatype*>. This maps data type name to data type and ensures the data type name is unique across all data types, not just those // within the same group. // // The data type used to initialize a data class member or other data entity must always be supplied as a pointer to one of the derived classes. Use of the // base class is meaningless. For this reason the hdbDatatype constructor is protected. // // Note that InitDatabase() pre-loads both the C++ fundamental and the HadronZoo in-built data types. Any application specific data types are added as they // are encountered, usually in the application config files.
protected: hdbDatatype (void) { m_Basetype = BASETYPE_UNDEF ; }
hzString m_Typename ; // Type name hdbBasetype m_Basetype ; // Real (internal) data type //uint32_t m_Resv ; // Reserved
public: virtual ~hdbDatatype (void) {}
hzEcode SetTypename (const hzString& type) { if (m_Typename) return E_SEQUENCE ; m_Typename = type ; return E_OK ; }
hzEcode SetBasetype (hdbBasetype bt) { if (m_Basetype != BASETYPE_UNDEF && m_Basetype != BASETYPE_CPP_UNDEF && m_Basetype != BASETYPE_HZO_UNDEF) return E_SEQUENCE ; m_Basetype = bt ; return E_OK ; }
hdbBasetype Basetype (void) const { return m_Basetype ; } const hzString strType (void) const { return m_Typename ; } const hzString strName (void) const { return m_Typename ; } const char* txtType (void) const { return *m_Typename ; } const char* txtName (void) const { return *m_Typename ; } } ;
class hdbCpptype : public hdbDatatype { // Category: Data Definition // // Data type comprising a fundamental C++ type
public: hdbCpptype (void) { m_Basetype = BASETYPE_CPP_UNDEF ; }
~hdbCpptype (void) {} } ;
class hdbHzotype : public hdbDatatype { // Category: Data Definition // // Data type inbuilt with the HadronZoo library
public: _mut hzString m_valJS ; // Validation JavaScript (for front-end presentation)
hdbHzotype (void) { m_Basetype = BASETYPE_HZO_UNDEF ; }
~hdbHzotype (void) {} } ;
class hdbRgxtype : public hdbDatatype { // Category: Data Definition // // Data type comprising a regular expression controlled text data type
public: hzString m_valJS ; // Validation JavaScript (for front-end presentation)
hdbRgxtype (void) { m_Basetype = BASETYPE_APPDEF ; }
~hdbRgxtype (void) {}
hzString m_Regex ; // Regular expression } ;
class hdbEnum : public hdbDatatype { // Category: Data Definition // // The hdbEnum class implements HadronZoo ENUM data types. These are validation lists, manifest in Dissemino webapps as HTML selectors, or sets of radio buttons or checkboxes. // Under the current Dissemino regime, these selector devices either allow a single selection or a multiple selection. There are no in-betweens. // // Because ENUMs are manifest as selector devices, large ENUM populations would be impractical, so the limit was set at 256 values. This limit allows a single value selector, // i.e. a data object member of the applicable ENUM type, to be held internally as a single byte. Multiple values are held internally as either a straight bitmap (hdbObject), // or as a string encoded as an idset (hdbObjRepos cache).
// Prevent copies hdbEnum (const hdbEnum&) ; hdbEnum& operator= (const hdbEnum&) ;
hzArray <uint32_t> m_Numbers ; // Internal value numbers hzArray <hzString> m_Strings ; // String values public: uint32_t m_Default ; // Position of default item (commonly 0) uint32_t m_nMax ; // Length of longest item
hdbEnum (void) { m_Basetype = BASETYPE_ENUM ; m_Default = 0 ; m_nMax = 0 ; }
~hdbEnum (void) { }
hzEcode Init (const hzString& enumName) ; hzEcode AddItem (const hzString& strValue) ; hzEcode AddItem (const hzString& strValue, uint32_t numValue) ;
uint32_t GetNum (const hzString& S) const ; hzString GetStr (uint32_t num) const { return m_Strings[num] ; } uint32_t Count (void) const { return m_Numbers.Count() ; } } ;
/* ** Application Delta Profile */
// Reserved system class ids #define HZ_ADP_CLS_SUBSCRIBER 1 // Subscriber class ID #define HZ_ADP_CLS_SITEINDEX 2 // Site index class ID #define HZ_ADP_CLS_FIN_CRCY 3 // Financial Currency class ID #define HZ_ADP_CLS_FIN_CAT 4 // Financial Category class ID #define HZ_ADP_CLS_FIN_ACC 5 // Financial Account class ID #define HZ_ADP_CLS_FIN_TRNS 6 // Financial Transaction class ID
// Other class and member ids #define HZ_ADP_CLS_RNG_USER 21 // User data classes have IDs from 21 to 50 #define HZ_ADP_CLS_RNG_APPL 51 // Application data classes have IDs from at 51 to 1000 #define HZ_ADP_CLS_RNG_CONTEXT 1001 // Subclass combinations have IDs starting at 1001 #define HZ_ADP_MBR_RNG_SYSTEM 1 // System data classes have members from 1 to 500 #define HZ_ADP_MBR_RNG_USER 501 // User data classes have members from 501 to 1000 #define HZ_ADP_MBR_RNG_APPL 1001 // Application data classes have members startig at 1001
class hdbDatatype ; class hdbClass ; class hdbMember ; class hdbEnum ; class hdbObjRepos ; class hdbBinRepos ; class hdbIndexText ; class hdbRgxtype ;
class hdbADP { // Category: Database // // hdbADP - Application Delta Profile // // The ADP is the entire set of database entities and their delta assignments for an application. Most programs will only have a single ADP but in the case of Dissemino, there // can be multiple ADP instances. Dissemino is routinely configured to listen on the standard ports for HTTP and HTTPS on behalf of multiple hosts (web applications), most of // which require some form of database functionality. Any that do, get an ADP. Because of this, there is no global ADP and programs must declare ADP instances as required.
hzMapM <hzString,const hdbClass*> m_mapSubs ; // Cumulative map of all subclasses hzMapS <hzString,const hdbEnum*> m_mapEnums ; // Total data enums (selectors) hzMapS <hzString,const hdbClass*> m_mapClasses ; // All defined data classes hzMapS <hzString,const hdbDatatype*> m_mapDatatypes ; // Complete set of names of all legal data types including enums and classes
hzMapS <hzString,hdbObjRepos*> m_mapRepositories ; // All declared data repositories hzMapS <hzString,hdbBinRepos*> m_mapBinRepos ; // All hdbBinRepos instances
hzMapS <uint16_t,const hdbClass*> m_mapClsCtxDtId ; // Map of data class delta ids to data classes hzMapS <hzString,uint16_t> m_mapClsCtxName ; // Map of classname and classname.membername (if of a data class), to data class delta ids hzMapS <uint16_t,const hdbMember*> m_mapMembers ; // Map of member IDs to members (all classes)
hzArray <hdbObjRepos*> m_arrRepositories ; // Repositories by Delta id hdbIndexText* m_pSiteindex ; // Webapp site index (optional)
// Operational Params hzString m_appName ; // Application name hzString m_Datadir ; // Directory of binary core regime and repository deltas
// ID Allocation Sequences uint32_t m_nsqClsUsr ; // Data class sequence for user classes (11-40) uint32_t m_nsqClsCfg ; // Data class sequence for classes declared within the application configs (41-1000) uint32_t m_nsqClsCtx ; // Data class sequence for subclass combinations (1001 upwards) uint32_t m_nsqMbrSys ; // Data class member sequence for in-built classes (1-500) uint32_t m_nsqMbrUsr ; // Data class member sequence for user classes (501-1000) uint32_t m_nsqMbrCfg ; // Data class member sequence for all other config classes (1001 upwards)
hzEcode _rdClass (hzXmlNode* pN) ;
// Prevent copies hdbADP (const hdbADP&) ; hdbADP& operator= (const hdbADP&) ;
public: // Subscriber Repository, Data Class and Members hdbClass* m_pClassSubscriber ; // Subscriber class const hdbMember* m_pMbr_Subscriber_username ; // Subscriber member username const hdbMember* m_pMbr_Subscriber_userpass ; // Subscriber member userpass const hdbMember* m_pMbr_Subscriber_email ; // Subscriber member email address const hdbMember* m_pMbr_Subscriber_UID ; // Subscriber member object id in applicable user class (if any) const hdbMember* m_pMbr_Subscriber_type ; // Subscriber member type (user class, if applicable) hdbObjRepos* m_pReposSubscriber ; // Subscriber repository
// Constructor and destructors hdbADP (void) { m_pClassSubscriber = 0 ; m_pMbr_Subscriber_username = 0 ; m_pMbr_Subscriber_userpass = 0 ; m_pMbr_Subscriber_email = 0 ; m_pMbr_Subscriber_UID = 0 ; m_pReposSubscriber = 0 ;
m_pSiteindex = 0 ;
//m_nsqClsSys = 1 ; // System data classes have IDs starting at 1 m_nsqClsUsr = HZ_ADP_CLS_RNG_USER ; m_nsqClsCfg = HZ_ADP_CLS_RNG_APPL ; m_nsqClsCtx = HZ_ADP_CLS_RNG_CONTEXT ; m_nsqMbrSys = HZ_ADP_MBR_RNG_SYSTEM ; m_nsqMbrUsr = HZ_ADP_MBR_RNG_USER ; m_nsqMbrCfg = HZ_ADP_MBR_RNG_APPL ; } ~hdbADP (void) {}
// Init fuction - adds in-built data types hzEcode InitStandard (const hzString& appName) ; hzEcode InitSubscribers (const hzString& dataDir) ; hzEcode InitSiteIndex (const hzString& dataDir) ; hzEcode InitFinancials (const hzString& dataDir) ; hzEcode InitBlockedIPs (const hzString& dataDir) ; //hzEcode InitSearch (void) ;
// Insert new data type hzEcode RegisterDataClass (const hdbClass* pClass) ; hzEcode RegisterComposite (hzString& context, const hdbClass* pClass) ; hzEcode RegisterMember (const hdbMember* pMbr) ; hzEcode RegisterDataEnum (const hdbEnum* pEnum) ; hzEcode RegisterRegexType (const hdbRgxtype* pRgx) ;
// Register repositories hzEcode RegisterObjRepos (hdbObjRepos* pRepos) ; hzEcode RegisterBinRepos (hdbBinRepos* pRepos) ;
// Obtain data class by data class delta const hdbDatatype* GetDatatype (const hzString& tname) const { return m_mapDatatypes[tname] ; } const hdbClass* GetPureClass (const hzString& cname) const { return m_mapClasses[cname] ; } const hdbClass* GetDataClass (uint32_t clsId) const { return m_mapClsCtxDtId[clsId] ; } const hdbEnum* GetDataEnum (const hzString& ename) const { return m_mapEnums[ename] ; } const hdbEnum* GetDataEnum (uint32_t n) const { return m_mapEnums.GetObj(n) ; } uint32_t CountDataClass (void) const { return m_mapClasses.Count() ; } uint32_t CountDataEnum (void) const { return m_mapEnums.Count() ; }
// Get object repository hdbObjRepos* GetObjRepos (const hzString& rname) const { return m_mapRepositories[rname] ; } hdbObjRepos* GetObjRepos (uint32_t n) const { return m_mapRepositories.GetObj(n) ; } uint32_t CountObjRepos (void) const { return m_mapRepositories.Count() ; } uint32_t CountAllMembers (void) const { return m_mapMembers.Count() ; }
// Get binary repository hdbBinRepos* GetBinRepos (const hzString& dsName) const { return m_mapBinRepos[dsName] ; } uint32_t CountBinRepos (void) const { return m_mapBinRepos.Count() ; }
hzEcode NoteSub (const hzString& clsName, const hdbClass* pSub) { return m_mapSubs.Insert(clsName, pSub) ; } bool IsSubClass (const hdbClass* pMain, const hdbClass* pSub) ;
// Obtain class delta id from either the pure classname or in the case of composite members, classname.membername uint16_t GetDataClassID (const hzString& clsName) const { return m_mapClsCtxName[clsName] ; }
hzEcode Export (void) ; hzEcode Import (const hzString& appName) ; void Report (hzChain& Z) ;
hzEcode DeltaInit (const char* dir, const char* app, const char* arg, const char* ver, bool bMustHave) ; } ;
/* ** SECTION 1: Classes for data structure/definition. */
enum hdbIniStat { // Category: Initialization // // The hdbIniStat enum lists the possible initialization states of hdbClass (data class), and hdbObjRepos (data object repository) - for the purpose of imposing initialization // sequences on these classes. // // Data classes are constructed with an initial state of HDB_CLASS_INIT_NONE. The sequence begins with a call to hdbClass::InitStart(), which takes the initialization state to // HDB_CLASS_INIT_PROG. Then for each class member there is a call to InitMember(). This expects an initialization state of HDB_CLASS_INIT_PROG otherwise execution terminates. // InitMember() does not alter the initialization state. Finally there is a call to InitDone(). This also expects an initialization state of HDB_CLASS_INIT_PROG otherwise it // will terminate execution. InitDone() sets the initialization state of the hdbClass instance to HDB_CLASS_INIT_DONE which is the end of the process. // // Repositories are also constructed with an initial state of HDB_CLASS_INIT_NONE. The sequence begins with a call to hdbObjRepos::InitStart(), which expects a predefined data // class (fully initialized), so the class members are all known. Then optionally, InitMbrIndex() and InitMbrStore() can be called to add an index or to define how a member is // stored. As with data classes, InitDone() completes the initialization sequence. The repository can then be opened, operated upon and closed.
HDB_CLASS_INIT_NONE, // No initialization has begun HDB_CLASS_INIT_PROG, // The hdbClass member function InitStart() has been called and so the class definition is in progress, members can be added. HDB_CLASS_INIT_DONE, // The hdbClass member function InitDone() has been called and so the class definition is complete. No new members can be added. HDB_REPOS_INIT_PROG, // The repository member function InitStart() has been called so initialization steps relating to members can be effected. HDB_REPOS_INIT_DONE, // The repository member function InitDone() has been called. No further initialization is permitted. HDB_REPOS_OPEN // The repository is open to transactions. If the repository is closed the state reverts to HDB_REPOS_INIT_DONE. } ;
class hdsFldspec ; // Dissemino guest class
enum hdbClsDgn { // Data Class Designation: Data classes are either:-
HDB_CLASS_DESIG_SYS, // In-built data class HDB_CLASS_DESIG_USR, // User data class HDB_CLASS_DESIG_CFG // General data class } ;
enum hdbPopCtl { // States data class member population restraints, required by hdbMember::Init().
HDB_MBR_POP_UNSPECIFIED = 0x00, // Member population restraints not specified. HDB_MBR_POP_SINGLE_OPTIONAL = 0x10, // Member holds a single value but can be left empty. HDB_MBR_POP_SINGLE_COMPULSORY = 0x11, // Member holds a single value which must be set. HDB_MBR_POP_ARRAY_OPTIONAL = 0x20, // Member holds an array of values which can be empty. HDB_MBR_POP_ARRAY_COMPULSORY = 0x21 // Member holds an array of at least one value. } ;
#define HDB_MBR_MASK_COMPULSORY 0x01 #define HDB_MBR_MASK_ARRAY 0x20
const char* PopCtl2Txt (hdbPopCtl popCtl) ;
class hdbMember { // Data class member // // At the app level, the important data class member properties are the host data class, the name (unique within the data class), the data type, and the population constraints // that apply to member values (as described by enum hdbPopCtl). // // Please note the following:- // // - hdbMember instances are created during the initialization of data classes (hdbClass instances), and should NOT be created outside of this context. // // - The recommended practice is to declare global pointers to members, with names of the form g_pMbr_classname_membername or similar, and to set these to the members directly // after data class initialization. This is done because the hdbObject Get and Set functions expect member pointers, rather than member names. // // - As data class members cannot exist without a data class, it would make sense to nest the hdbMember definition within the hdbClass definition. That it is not is a historic // matter, which is unlikely to change. It is easier and neater to use 'const hdbMember' in declarations, rather than 'const hdbClass::Member' or similar. If hdbMember were a // subclass of hdbClass, it would not eliminate the need for hdbObject Get and Set functions to check if the supplied member pointer was initialized to the applicable class.
private: const hdbClass* m_pClass ; // Data class to which this member belongs const hdbDatatype* m_pType ; // Data type _mac hdsFldspec* m_pSpec ; // Dissemino field specification for rendering HTML (Aid to webapp forms, not used for HDB purposes) hzString m_Name ; // Member name, must be unique within the data class _mut uint16_t m_MemberUID ; // Unique member delta ID. _mut int16_t m_nOsetStd ; // Offset to member space within hdbObject core - calculated by hdbClass::InitDone() _mut int16_t m_nOsetAux ; // Offset to aux member space within hdbObject core (used only by BINARY members for binary datum repository address) uint16_t m_nPosn ; // Actual position within class (set on Init() on being added to class) uchar m_popCtl ; // Population restraints uchar m_sizeCore ; // Size of core slot uchar m_sizeAux ; // Size of core slot uchar m_sizeDatum ; // Size of datum in bytes, as per the data type
// Restrict copying and assignment to other instances hdbMember (const hdbMember&) ; hdbMember& operator= (const hdbMember&) ;
public: _mut hzString m_Desc ; // For application specific purposes, data members may have an optional description _mut hzString m_dsmTabSubject ; // Dissemino tab section subject (not for internal db consideration)
hdbMember (void) ; ~hdbMember (void) {}
// Set functions hzEcode Init (const hdbClass* pClass, const hdbDatatype* pType, const hzString& mbrName, uint32_t nPosn, hdbPopCtl popCtl = HDB_MBR_POP_UNSPECIFIED) ; hzEcode SetSpec (const hdsFldspec* pSpec) const ; hzEcode _setId (uint32_t id) const ; hzEcode _setOset (int32_t nOset) const ; hzEcode _setAux (int32_t nOset) const ;
// Get functions const hdbClass* Class (void) const { return m_pClass ; } const hdbDatatype* Datatype (void) const { return m_pType ; } const hdsFldspec* GetSpec (void) const { return m_pSpec ; }
hdbBasetype Basetype (void) const { return m_pType ? m_pType->Basetype() : BASETYPE_UNDEF ; } bool IsClass (void) const { return !m_pType ? false : m_pType->Basetype() == BASETYPE_CLASS ? true : false ; } bool Optional (void) const { return m_popCtl & HDB_MBR_MASK_COMPULSORY ? false : true ; } bool Compulsory (void) const { return m_popCtl & HDB_MBR_MASK_COMPULSORY ? true : false ; } bool Multiple (void) const { return m_popCtl & HDB_MBR_MASK_ARRAY ? true : false ; } bool Singular (void) const { return m_popCtl & HDB_MBR_MASK_ARRAY ? false : true ; } hdbPopCtl PopCtl (void) const { return (hdbPopCtl) m_popCtl ; } hzString strName (void) const { return m_Name ; } hzString strDesc (void) const { return m_Desc ; } const char* txtName (void) const { return *m_Name ; } const char* txtDesc (void) const { return *m_Desc ; }
uint32_t Posn (void) const { return m_nPosn ; } int32_t OsetStd (void) const { return m_nOsetStd ; } int32_t OsetAux (void) const { return m_nOsetAux ; } uint32_t SizeCore (void) const { return (uint32_t) m_sizeCore ; } uint32_t SizeAux (void) const { return (uint32_t) m_sizeAux ; } uint32_t SizeDatum (void) const { return (uint32_t) m_sizeDatum ; } uint16_t DeltaId (void) const { return m_MemberUID ; }
// Compare bool operator== (const hdbMember& op) const ; bool operator!= (const hdbMember& op) const { return !operator==(op) ; } } ;
class hdbClass : public hdbDatatype { // Category: Data Definition // // hdbClass defines a data class as a set of data class members with each having a name (unique within the class), a predefined data type, and value population restraints. As // data classes are themselves data types, the data type a member may be of, includes previously defined data classes. Thus, within a data object of a main class, members can // hold data objects of a subclass. By this means, hdbClass supports hierarchy. // // Defining a data class is a matter of initializing a hdbClass instance, which involves a sequence of function calls. The sequence begins with a call to InitStart(), then for // each member a call to InitMember(), then finally, a call to InitDone(). If this sequence is violated, the program terminates. // // Data Class Member Delta IDs: // // Note that data classes are assigned a unique class id and a delta id, and are generally referred to by the latter which will depend on the context of the class. If the data // class has no subclass members, the two ids will always be the same. However where a class is used as a subclass, its delta id will differ from the pure id. This // regime is described in the HadronZoo Library Overview.
private: hzMapS <hzString,hdbMember*> m_mapMembers ; // Members by member name hzArray <hdbMember*> m_arrMembers ; // Members in order of incidence
hdbADP* m_pADP ; // Host ADP _mut hzString m_Desc ; // XML Description hdbIniStat m_eClassInit ; // Init state uint16_t m_eDesignation ; // Class designated as system, user or general _mut uint16_t m_ClassUID ; // Unique class id uint16_t m_nCoreLen ; // Total core size needed in hdbObject (includes bytes to host litmus bits) uint16_t m_nLitmusBits ; // Total number of litmus bits needed to indicate the presence of member values uint16_t m_nLitmusSize ; // Bytes needed to accommodate the litmus bits (1 byte per 8 bits or part thereof) uchar m_nBinaries ; // Number of members with BINARY/TXTDOC types uchar m_nArrays ; // Number of members that expect arrays
// Prevent copies hdbClass (hdbClass& op) ; hdbClass& operator= (hdbClass& op) ;
void _clear (void) ;
public: hzString m_Category ; // This has no database purpose but is helpful for Dissemino
hdbClass (hdbADP& adp, hdbClsDgn designation) ; ~hdbClass (void) ;
// Get Member inlines const hdbMember* GetMember (const hzString& mbr) const { return m_mapMembers[mbr] ; } const hdbMember* GetMember (uint32_t mbrNo) const { return mbrNo < m_arrMembers.Count() ? m_arrMembers[mbrNo] : 0 ; }
uint32_t MemberNo (const hzString& mbr) const { const hdbMember* pMem ;
pMem = m_mapMembers[mbr] ; return pMem ? pMem->Posn() : 0xffffffff ; }
// Init from DB method. hzEcode InitStart (const hzString& name) ; hzEcode InitMember (const hzString& name, const hdbDatatype* type, hdbPopCtl popCtl) ; hzEcode InitMember (const hzString& name, const hzString& type, hdbPopCtl popCtl) ; hzEcode InitDone (void) ; hzEcode _setId (uint32_t id) const ;
// Diagnostics const hzString& Desc (void) const { return m_Desc ; } void DescClass (hzChain& Z, uint32_t nIndent) const ; hzEcode DescCheck (hzChain& report, hzChain& desc) const ;
// Get functions const hdbADP* GetADP (void) const { return m_pADP ; }
hdbClsDgn Designation (void) const { return (hdbClsDgn) m_eDesignation ; } uint32_t CoreLen (void) const { return m_nCoreLen ; } uint32_t NonBool (void) const { return m_arrMembers.Count() ; } uint32_t MbrCount (void) const { return m_arrMembers.Count() ; } uint32_t LitmusBits (void) const { return m_nLitmusBits ; } uint32_t LitmusSize (void) const { return m_nLitmusSize ; } uint32_t ClassId (void) const { return m_ClassUID ; } bool HasBinaries (void) const { return m_nBinaries? true : false ; } bool HasArrays (void) const { return m_nArrays? true : false ; } bool IsInit (void) const { return m_eClassInit == HDB_CLASS_INIT_DONE ? true : false ; }
// Test sub class bool operator== (const hdbClass& op) const ; bool operator!= (const hdbClass& op) const { return !operator==(op) ; } } ;
//#define clsMbr hdbClass::Member
/* ** Data Class Instances: hdbObject and hzAtom */
// Atom status #define ATOM_CLEAR 0 // Atom is clear #define ATOM_SET 1 // Atom is set #define ATOM_ERROR 2 // Atom is invalid
class hzAtom { // Category: Database // // As described in the Library Overview (2.3.h1 Data Classes), hzAtom holds a single datum of any atomic HadronZoo data type and so serves as a universal means of passing and // holding atomic values. // // hzAtom comprises an atomval union with members of types char*, double, 64, 32, 16 and 8-bit signed and unsigned integers and a bool - together with a hzBasetype indicator, // a hzString, a hzChain and control flags. An atomval on its own cannot state which of its members applies and nor can it disambiguate zero, hence the control flags. // // The hzString is used to directly hold the value where the type is BASETYPE_STRING and where the type is BASETYPE_DOMAIN, BASETYPE_EMADDR or BASETYPE_URL, to hold the value // as a string form. The hzString also enables hzAtom to avail the text form of any of the non-binary types held by atomval. The hzChain holds datum of binary data types.
_atomval m_Data ; // Actual data or pointer to document/binary/start of list hdbBasetype m_eType ; // Data type adopted uint16_t m_eStatus ; // Set on assignment, either NULL, OK or some error //uchar m_bCast ; // Set on assignment to string like values uint16_t m_Resv ; // Reserved
// Prevent copies // hzAtom (const hzAtom&) {} // hzAtom& operator= (const hzAtom&) { return *this ; }
public: hzAtom (void) { // Standard constructor. The atom will be empty and be of unknown type. m_Data.m_uInt64 = 0 ; m_eType = BASETYPE_UNDEF; m_eStatus = ATOM_CLEAR ; //m_bCast = 0 ; }
hzAtom (const hzAtom& op) { // Copy constructor. The atom will be of the type and value of the operand. m_Data = op.m_Data ; m_eType = op.m_eType ; m_eStatus = op.m_eStatus ; //m_bCast = false ; }
~hzAtom (void) { // Destructor. This will delete any strings the atom had retained. Clear() ; }
hdbBasetype Type (void) const { return (hdbBasetype) m_eType ; } // Get internal type of hzAtom
bool IsSet (void) const { return m_eStatus == ATOM_CLEAR ? false : true ; } bool IsNull (void) const { return m_eStatus == ATOM_CLEAR ? true : false ; }
// Get functions const char* Show (void) const ; const hzMD5 MD5 (void) const ; const hzChain Chain (void) const ; const hzString Str (void) const ; const char* Cstr (void) const ; const hzDomain Domain (void) const ; const hzEmaddr Emaddr (void) const ; const hzUrl Url (void) const ; const hzXDate XDate (void) const ; const hzSDate SDate (void) const ; const hzTime Time (void) const ; const hzIpaddr Ipaddr (void) const ;
_atomval Datum (void) const { return m_Data ; } const void* Binary (void) const { return &m_Data ; } double Double (void) const { return m_Data.m_Double ; } uint64_t Unt64 (void) const { return m_Data.m_uInt64 ; } uint32_t Unt32 (void) const { return m_Data.m_uInt32 ; } uint16_t Unt16 (void) const { return m_Data.m_uInt16 ; } int64_t Int64 (void) const { return m_Data.m_sInt64 ; } int32_t Int32 (void) const { return m_Data.m_sInt32 ; } int16_t Int16 (void) const { return m_Data.m_sInt16 ; } char Byte (void) const { return m_Data.m_sByte ; } uchar UByte (void) const { return m_Data.m_uByte ; } bool Bool (void) const { return m_Data.m_Bool ; }
// Casting operators (HadronZoo data types) operator const char* (void) const { return Cstr() ; } operator const hzChain (void) const { return Chain() ; } operator const hzIpaddr (void) const { return Ipaddr() ; } operator const hzDomain (void) const { return Domain() ; } operator const hzEmaddr (void) const { return Emaddr() ; } operator const hzUrl (void) const { return Url() ; } operator const hzXDate (void) const { return XDate() ; } operator const hzSDate (void) const { return SDate() ; } operator const hzTime (void) const { return Time() ; }
// Casting operators (fundamental data types) operator double (void) const { return m_Data.m_Double ; } operator int64_t (void) const { return m_Data.m_sInt64 ; } operator uint64_t (void) const { return m_Data.m_uInt64 ; } operator int32_t (void) const { return m_Data.m_sInt32 ; } operator uint32_t (void) const { return m_Data.m_uInt32 ; } operator int16_t (void) const { return m_Data.m_sInt16 ; } operator uint16_t (void) const { return m_Data.m_uInt16 ; } operator char (void) const { return m_Data.m_sByte ; } operator uchar (void) const { return m_Data.m_uByte ; }
/* ** Setting hzAtom values */
// Set value by datatype and free format string hzEcode SetValue (hdbBasetype eType, const hzString& S) ; hzEcode SetValue (hdbBasetype eType, const _atomval& av) ; hzEcode SetNumber (const char* s) ; hzEcode SetNumber (const hzString& s) { return SetNumber(*s) ; }
// Set values by types hzAtom& operator= (const hzAtom& a) ; hzAtom& operator= (const hzMD5& md5) ; hzAtom& operator= (const hzChain& Z) ; hzAtom& operator= (const hzString& s) ; hzAtom& operator= (const hzIpaddr& v) ; hzAtom& operator= (const hzDomain& v) ; hzAtom& operator= (const hzEmaddr& v) ; hzAtom& operator= (const hzUrl& v) ; hzAtom& operator= (const hzXDate& v) ; hzAtom& operator= (const hzSDate& v) ; hzAtom& operator= (const hzTime& v) ; hzAtom& operator= (double v) ; hzAtom& operator= (int64_t v) ; hzAtom& operator= (uint64_t v) ; hzAtom& operator= (int32_t v) ; hzAtom& operator= (uint32_t v) ; hzAtom& operator= (int16_t v) ; hzAtom& operator= (uint16_t v) ; hzAtom& operator= (char v) ; hzAtom& operator= (uchar v) ; hzAtom& operator= (bool b) ;
// Set status //void SetStatus (atomState eState) { m_eStatus = eState ; } uint32_t Status (void) const { return m_eStatus ; }
// Set value to null hzAtom& Clear (void) ;
// Stream integration friend std::ostream& operator<< (std::ostream& os, const hzAtom& obj) ; } ;
/* ** Litmus bits. These are used in EDOs to state if members, (a) have a value, and (b) have an array of values. In hdbObject, additional litmus bits are used with members of string ** like data types, to state the data origin - whether the string value address held in the member data space, was allocated by a string repository, or by the allocation regime of ** the applicable native C++ class. Values of such members will be native (i.e. as hzString, hzDomain, hzEmaddr or hzURL), when set by a SetMbrValue() function (data ingress), but ** in 'HDB form' when fetched from the repository. */
#define LITMUS_NULL 0x00 // No member data #define LITMUS_SET 0x01 // Member has value #define LITMUS_AUX 0x02 // Used as the value in TBOOL members #define LITMUS_ERR 0x04 // Error on set
class _obj_data ; // Supprt class for hdbObject, used as an internal object container. Defined in hdbObject.cpp
class hdbObject { // Category: Database // // hdbObject is a single data object container. Once a hdbObject instance has been initialized to a data class, it can hold a single object of that class. hdbObject is central // to the data object repository program interface. In repository FETCH operations, hdbObject acts as a data object recepticle. In INSERT and MODIFY operations, hdbObject acts // as the data object donor. // // The hdbObject GET value and SET value functions can only be applied to ATOMIC members of the class to which the hdbObject instance is initialized. Where a data class member // introduces a subclass, the only way to access a subclass object and address its members, is via a separate hdbObject instance that is initialized to the subclass. Subclass // objects are fetched by calling FetchObject() on the applicable host class member. Modified subclass objects are returned to the host class member by calling UpdateObject() // on the same host class member. Entirely new subclass objects are calling AddObject() on the applicable host class member. // // Data objects members can be:- // // 1) A single BOOL or TBOOL // 2) A single numeric value or a pointer to an array of numeric values // 3) A single string value or a pointer to an array of string values // 4) A single ENUM value OR a set of ENUM values encoded as a bitmap. // 5) A single binary datum address OR a pointer into a hzChain that hold the actual binary datum. // 6) A single subclass object address or a pointer to an array of subclass object addresses (see note). // // Within hdbObject members are assigned a slot in a uchar buffer (m_pCore), either to store a single value, or the address of the first value in a list of values. The size of // the slot depends on the member data type and whether the member can have multiple values. Please note the following:- // // - BOOL and TBOOL members are always single value as they represent attributes the host class object either does, or does not have. Both are assigned a single core byte. // // - Numeric members are not usually configured to accept multiple values but it is permitted for 32 and 64-bit signed and unsigned numbers, and for doubles. In the single // value case, numeric members are assigned core slots of their data size (8, 4, 2 or 1 bytes). In the multiple value case, a 4-byte aligned core slot is assigned to hold // the address of the first value in the list. // // - STRING and string-like members are 32-bit entities whose values are stored within hdbObject by casting a 4-byte space, aligned to a 4-byte boundary, to the applicable // C++ class (e.g. hzString). In all cases however, 4 bytes are assigned in the core, as both the value space (single value), and the address (multiple value), are 4-bytes // in size. // // - ENUM members are given a 1 byte slot if only one value is allowed (max pop of ENUM data type 256), and 4 bytes if multiple values are allowed. As stated above, if the // ENUM population is <= 32, the ENUM is stored as a 4-byte bitmap, otherwise the 4-byte value is the string address of an ecoded idset. // // - BINARY members are always single value. As binary datum cannot be indexed, BINARY members must be part of a class (or subclass), that has at least one member that can // be indexed, in order for the binary datum to be found. BINARY members are assigned two core slots. An 8-byte aligned slot which is cast to a hzChain to hold the value, // and a 4-byte aligned slot to hold the datum id, that the chain content either already does or will have in the binary datum repository. // // - Subclass members are always multiple as there would be no point to them if they were limited to a single subclass instance. Members can hold a lone subclass object or // even be empty, but they cannot be limited to a single subclass object. Subclass members are assigned 4-byte aligned core slots, being the address of the first subclass // object in the list. // // Slots are assigned in decending order of size, with 16 byte slots (type DIGEST) first, followed by 8-byte slots, then 4, then 2, then 1. This ensures the 8 and 4 byte slots // are aligned to 8 and 4 byte boundaries. This is particularly critical for string and string-like members, as the slots are cast to hzString or other C++ string like class. // // Writing code to move data between data objects and program variables requires knowledge of the data class and its members, as is the case with all forms of structured data. // hdbObject has various 'Get and Set' member functions which are overloaded to cover the whole set of HadronZoo data types. These expect a reference to a program variable and // a member pointer. The compiler will not be able to check for type mismatches between program variables and member, so these functions return E_TYPE in this event. There is // also a generic Get and Set which works with atoms, but these likewise cannot be type checked by the compiler. Get array and Set array functions are not currently provided // but are being considered for applicable data types. // // Calling Set() on BINARY and TXTDOC members, sets the member to the actual binary datum supplied in the hzChain. However, when hdbObject is populated by a repository FETCH, // only the binary datum id is loaded. This avoids expensive seek operations for binary datum that are quite often not needed by the task in hand. Calling Get() on the member // explicitly, will pull the binary datum from the binary datum repository - providing there has been an initial FETCH on a data object repository. // // Data objects can be transmitted between services as JSON objects (the industry standard method). Within data object repositories, data objects are manifest as EDOs (encoded // data objects). hdbObject has import/export methods for both formats. JSON and EDO formats are descrbed in the synopsis "Data Encoding". // // Note that hdbObject is often initialized to a data object repository, from which the data class can be inferred. This is necessary if the hdbObject is to be used to extract // or to insert binary datum from/to BINARY or TXTDOC members, but not otherwise.
const hdbClass* m_pClass ; // Object class const hdbObjRepos* m_pRepos ; // Associated data object repository _mut _obj_data* m_pRoot ; // Internal object data hzString m_Key ; // Unique name within app uint16_t m_ReposId ; // Repository id uint16_t m_ClassId ; // Host class delta id uint32_t m_Resv ; // Reserved
// Private support functions hzEcode _export_json_r (hzChain& json, const hdbClass* pClass, uint32_t nLevel) const ;
// Prevent copies hdbObject (const hdbObject&) {} hdbObject& operator= (const hdbObject&) { return *this ; }
public: hdbObject (void) ; // Constructor ~hdbObject (void) ; // Destructor
hzEcode Init (const hdbClass* pClass) ; hzEcode Init (const hdbObjRepos* pRepos) ; hzEcode SetName (const hzString& objKey) ; hzEcode SetRepos (const hdbObjRepos* pRepos) ;
hdbObject* Node (uint32_t nIndex) const ; hzEcode Clear (void) ;
void SetObjId (uint32_t objId) const ;
const hdbClass* Class (void) const { return m_pClass ; } const hzString ObjKey (void) const { return m_Key ; }
const char* Classname (void) const { return m_pClass ? m_pClass->txtType() : 0 ; } uint32_t ReposId (void) const { return m_ReposId ; } uint32_t GetObjId (void) const ; //bool IsNull (void) const ;
// Set atomic member value, prefered method. Note when the member being set is a list, this will just add the value hzEcode SetBool (const hdbMember* pMbr, bool value) ; hzEcode SetBinary (const hdbMember* pMbr, const hzChain& value) ; hzEcode SetValue (const hdbMember* pMbr, const hzAtom& atom) ;
hzEcode SetMbrValue (const hdbMember* pMbr, const hzMD5& digest) ; hzEcode SetMbrValue (const hdbMember* pMbr, const hzString& str) ; hzEcode SetMbrValue (const hdbMember* pMbr, const hzDomain& dom) ; hzEcode SetMbrValue (const hdbMember* pMbr, const hzEmaddr& ema) ; hzEcode SetMbrValue (const hdbMember* pMbr, const hzUrl& url) ; hzEcode SetMbrValue (const hdbMember* pMbr, const hzIpaddr& ipa) ; hzEcode SetMbrValue (const hdbMember* pMbr, const hzXDate& date) ; hzEcode SetMbrValue (const hdbMember* pMbr, const hzSDate& date) ; hzEcode SetMbrValue (const hdbMember* pMbr, const hzTime& date) ; hzEcode SetMbrValue (const hdbMember* pMbr, uint64_t val) ; hzEcode SetMbrValue (const hdbMember* pMbr, int64_t val) ; hzEcode SetMbrValue (const hdbMember* pMbr, uint32_t val) ; hzEcode SetMbrValue (const hdbMember* pMbr, int32_t val) ;
// Get atomic member value hzEcode GetBool (bool& result, const hdbMember* pMbr) const ; hzEcode GetBinary (hzChain& val, const hdbMember* pMbr) ; hzEcode GetValue (hzAtom& atom, const hdbMember* pMbr, uint32_t oset = 0) const ; hzEcode GetMbrValue (hzMD5& digest, const hdbMember* pMbr) const ; hzEcode GetMbrValue (hzString& str, const hdbMember* pMbr) const ; hzEcode GetMbrValue (hzDomain& dom, const hdbMember* pMbr) const ; hzEcode GetMbrValue (hzEmaddr& ema, const hdbMember* pMbr) const ; hzEcode GetMbrValue (hzUrl& url, const hdbMember* pMbr) const ; hzEcode GetMbrValue (hzIpaddr& ipa, const hdbMember* pMbr) const ; hzEcode GetMbrValue (hzXDate& date, const hdbMember* pMbr) const ; hzEcode GetMbrValue (hzSDate& date, const hdbMember* pMbr) const ; hzEcode GetMbrValue (hzTime& date, const hdbMember* pMbr) const ; hzEcode GetMbrValue (uint64_t& val, const hdbMember* pMbr) const ; hzEcode GetMbrValue (uint32_t& val, const hdbMember* pMbr) const ;
// FETCH, UPDATE and ADD Subclass Objects hzEcode GetObject (hdbObject& obj, const hdbMember* pMbr, uint32_t oset) const ; hzEcode SetObject (const hdbMember* pMbr, const hdbObject& obj) ;
// Commit all binary member values to binary datum repository hzEcode CommitBinaries (hdbBinRepos* pRepos) const ;
// Import and Export hzEcode ImportJSON (const hzChain& J) ; hzEcode ExportJSON (hzChain& J) const ; hzEcode ImportEDO (const hzChain& J) ; hzEcode ExportEDO (hzChain& J) const ; hzEcode ImportDelta (const hzChain& J) ; hzEcode ExportDelta (hzChain& J) const ;
// Diagnostics and Integrity Checks hzEcode Integrity (void) ; } ;
/* ** SECTION 3: Data Object Repositories */
class hdbBinRepos { // Category: Database // // hdbBinRepos: Binary Datum Repository
class _datum_hd { // Datum header in index file public: hzXDate m_DTStamp ; // Date time stamp uint64_t m_Addr ; // Address within superblock uint32_t m_Size ; // Datum size in bytes uint32_t m_Prev ; // Previous datum id (0 if new) uint32_t m_Appnote1 ; // Value specified by application (if any) uint32_t m_Appnote2 ; // Value specified by application (if any) } ;
// Operational parameters std::ofstream m_WrI ; // Index file output stream for Insert/Update std::ofstream m_WrD ; // Data file output stream for Insert/Update _mut std::ifstream m_RdI ; // Data file input stream for Fetch _mut std::ifstream m_RdD ; // Data file input stream for Fetch
hdbADP* m_pADP ; // Host ADP uint64_t m_nSize ; // Size of data file hzString m_Name ; // Name of object store file hzString m_Workdir ; // Directory where object store file is hzString m_FileData ; // Name of row data file hzString m_FileIndx ; // Name of index file (addresses and sizes of rows) _mut hzLockS m_LockIrd ; // Lock on index file read hzLockS m_LockIwr ; // Lock on index file write _mut hzLockS m_LockDrd ; // Lock on data file read hzLockS m_LockDwr ; // Lock on data file write uint32_t m_nSeqId ; // Highest datum id thusfar issued uint32_t m_nPopulation ; // Population uint32_t m_nInitState ; // Initialization state
// Write function void _deltaWrite (void) ;
// Prevent copies hdbBinRepos (const hdbBinRepos&) ; hdbBinRepos& operator= (const hdbBinRepos&) ;
public: _mut hzChain m_Error ; // Error or report string
hdbBinRepos (hdbADP& adp) ; ~hdbBinRepos (void) ;
// Get functions hzString strName (void) const { return m_Name ; } const char* txtName (void) const { return *m_Name ; } bool IState (void) const { return m_nInitState ; } uint32_t Count (void) const { return m_nPopulation ; } uint32_t SeqId (void) const { return m_nSeqId ; }
// Init & Halt hzEcode Init (const hzString& name, const hzString& opdir) ; hzEcode Open (void) ; hzEcode Close (void) ; hzEcode Integ (hzLogger& pLog) ;
// Data operations hzEcode Insert (uint32_t& datumId, const hzChain& datum) ; hzEcode Insert (uint32_t& datumId, const hzChain& datum, uint32_t an1, uint32_t an2) ; hzEcode Update (uint32_t& datumId, const hzChain& datum) ; hzEcode Update (uint32_t& datumId, const hzChain& datum, uint32_t an1, uint32_t an2) ; hzEcode Delete (uint32_t datumId) ; hzEcode Fetch (hzChain& datum, uint32_t datumId) const ; } ;
/* ** Idset class */
class _idsNode ; // Defined in hdbIdset.cpp
class hdbIdset { // Category: Index // // hdbIdset holds a set of ids. // // Idsets can be configured to be persistent or volatile. Persistent idsets are used inter-alia in HDB repositories and indexes, while volatile idsets are used inter-alia, for // holding and processing search results. // // Volatile idsets are fully memory resident, which simplifies matters considerably. Depending on the population, the memory resident component either has the form of a single // segment, or a map of segments. // // Persistent idsets also have a memory resident component. It is of limited size, is backed by a delta file, and has the primary objective of forestalling whole block writes. // Due to the 4K block size and the compactness of idsets, a single block will typically contain thousands of ids. It makes no sense to write out a whole block each time an id // is added or deleted.
struct _idset_ca { // Bitmap internal structure to facilitate soft copy
void* m_pData ; // Data area pointer: to either a single _idsNode, or to a vector of _idsNode pointers uint16_t m_bVect ; // 1 if a vector is used, 0 if single _idsNode uint16_t m_nCopies ; // Copy count uint32_t m_nPop ; // Total idset popuation
_idset_ca (void) { m_pData = 0 ; m_bVect = 0 ; m_nPop = 0 ; m_nCopies = 0 ; } } ;
_idset_ca* mx ; // Internal instance
_idsNode* _findNode (uint32_t nId) const ;
// Prevent copy constructor hdbIdset (const hdbIdset& op) {}
public: hdbIdset (void) ; ~hdbIdset (void) ;
uint32_t Count (void) const { return mx ? mx->m_nPop : 0 ; } uint32_t NoNodes (void) const ; // { return mx ? mx->m_pNodes.Count() : 0 ; }
void Clear (void) ; uint32_t Insert (uint32_t nId) ; hzEcode Delete (uint32_t nId) ; uint32_t Fetch (hzVect<uint32_t>& Result, uint32_t nStart, uint32_t nReq) const ;
// Assignment hdbIdset& operator= (const hdbIdset& op) ;
// Boolean operators hdbIdset& operator|= (const hdbIdset& op) ; hdbIdset& operator&= (const hdbIdset& op) ;
// Get operators bool operator[] (uint32_t docId) const ;
// Diagnostics void Show (hzChain& C) const ;
// Stream operators friend std::istream& operator>> (std::istream& is, hdbIdset& ids) ; friend std::ostream& operator<< (std::ostream& os, const hdbIdset& ids) ; } ;
/* ** Idset Prototypes */
uint32_t CountBits (const void* pvBuffer, uint32_t nNoBytes) ; void SetBits (char* pBitbuf, uint32_t nOset, bool bValue) ; bool GetBits (const char* pBitbuf, uint32_t nOset) ;
/* ** Class based Repositories */
class hdbIndex ; // Repository index base class (defined below)
enum hdbReposMode { // This states the object repository storage strategy.
HDB_REPOS_NULL = 0x00, // Invalid mode HDB_REPOS_HARD = 0x01, // Persistant media with whole object deltas committed to binary datum repository HDB_REPOS_CACHE = 0x02, // Use cache only (small volumes, delta file only) HDB_REPOS_DUAL = 0x03 // Use both cache and persistant media } ;
class hdbObjRepos { // Category: Database // // HDB Data Object Repository // // The class definition sets out the form objects of the class will have, but not how such objects are collected. What object members will be indexed, and whether objects must // be unique in some way, are matters for the repository. To this end there is an initialization sequence. The repository member function InitStart() takes the predefined data // class, names the repository and thus the data file, and it names the working directory. After calling InitStart() and before calling InitDone() to conclude initialization, // you may call:- // // 1) InitMbrIndex() To add an index to a mamber - as long as the member is indexable member and does not already have an index. // 2) InitMbrStore() Both OBJECT repositories store binary member data in a BINARY repository. This fn names which one. // // As this is part of overal program initialization, both these functions terminate execution if called on members with incompatible types. // // Note that Fetch() fetches the data object into a hdbObject recepticle but in the case of BINARY members, only the address of the binary datum value is loaded, not the value // itself. This saves time in the common case where a BINARY member value is not needed by the data process. The value is loaded into the object by hdbObject::GetBinary(), and // to support this, the hdbObjRepos class has a GetBinary() function (see below).
class _c_blk { // virtual _cache block. Stores a limited number of EDO's. Implemented as a hzXbuf.
public: hzXbuf m_edo_space ; // EDO buffer uint32_t m_nLo ; // Lowest EDO in block (by id) uint32_t m_nHi ; // Highest EDO in block
_c_blk (void) { m_nLo = m_nHi = 0 ; } } ;
class _cache { // Where memory resident EDO storage is required, the EDOs are stored in virtual blocks (_cache_chain_blk). These primarily comprise a number of full size buckets, but to // avoid wasteful gaps, can also contain smaller buckets.
hzVect<_c_blk*> m_Chain ; // Chain of EDO blocks in order of ascending object id const hdbClass* m_pClass ; // The class of objets uint32_t m_nEDO ; // Total number of EDOs in cache uint32_t m_nTopId ; // Hisgest object id issued
_c_blk* _findBlock (uint32_t objId) ;
public: void Clear (void) ; hzEcode Init (const hdbClass* pClass) ; hzEcode CommitEDO (const hzChain& edo, uint32_t objId, bool bLoad) ; hzEcode FetchEDO (hzChain& edo, uint32_t objId) ;
hzEcode GetVal (_atomval& value, uint32_t objId, uint32_t mbrNo) const ; hzEcode GetBool (bool& bValue, uint32_t objId, uint32_t mbrNo) const ;
uint32_t Count (void) { return m_nEDO ; }
// Diagnostics void Show (hzChain& C, bool bDetail=false) const ; } ;
hzArray<_cache*> m_ClassCaches ; // Control block for each class
hzMapS <uint16_t,const hdbObjRepos*> m_mapRepos ; // Map of members to external data object repositories (if applicable) hzMapS <uint16_t,hdbIndex*> m_mapIndex ; // Map of members to indexes
std::ofstream m_osDelta ; // Output stream to delta file hdbADP* m_pADP ; // Host ADP hdbBinRepos* m_pBR_Delta ; // hdbBinRepos for storing objects as whole object deltas hdbBinRepos* m_pBR_Datum ; // hdbBinRepos for storing datum on behalf of TEXT, TXTDOC or BINARY members const hdbClass* m_pClass ; // Native data object class hzString m_Name ; // The unique repository name. The name serves as a base for the various repository data files hzString m_Workdir ; // The working directory. All repository data files go here. hzString m_nameBR_Delta ; // Delta binary repository name (m_Name + _br_delta) hzString m_nameBR_Datum ; // Datum binary repository name (m_Name + _br_datum) hzString m_pathCD ; // Name of cache delta file if applicable (m_Name + _cache.delta) uint32_t m_nSeqId ; // Highest object id thus far issued uint32_t m_nPopulation ; // Population uint16_t m_DeltaId ; // Repository id (as per the ADP) uchar m_bBinaries ; // Repository native has one or more BINARY/TXTDOC members (and thus a named binary datum repos for storing member datum) uchar m_Resv ; // Reserved hdbIniStat m_eReposInit ; // Controls initialization
_cache* m_pMain ; // Main data RAM table (full width needed for all members)
void _initerr (const hzFuncname& _fn, uint32_t nExpect) ; void _deltaWrite (void) ;
// Support functions hzEcode _updateIdx (const hdbObject& obj) ; hzEcode _loadCache (void) ; hzEcode _loadDeltas (void) ;
// Force use of constructor with hdbADP arg hdbObjRepos (void) ;
// Prevent copying hdbObjRepos (const hdbObjRepos&) ; hdbObjRepos& operator= (const hdbObjRepos&) ;
public: hdbObjRepos (hdbADP& adp) ; ~hdbObjRepos (void) ;
// Init functions hzEcode InitStart (const hdbClass* pObjClass, const hzString& reposName, const hzString& workdir, hdbReposMode eMode) ; hzEcode InitMbrIndex (const hdbMember* pMbr, bool bUnique) ; hzEcode InitMbrIndex (const hzString& memberName, bool bUnique) ; hzEcode InitMbrRepos (const hzString& memberName, const hzString& reposName) ; hzEcode InitDone (void) ;
// Database operations hzEcode Open (void) ; // Ready repository for data operations hzEcode Insert (uint32_t& objId, const hdbObject& obj) ; // Standard insert operation, fails if members defined as unique are duplicated hzEcode Update (hdbObject& obj, uint32_t objId) ; // Standard modify operation, fails only if the stated object id does not exist hzEcode Fetch (hdbObject& obj, uint32_t objId) const ; // Loads an object from the cache into the supplied object container hzEcode Select (hdbIdset& result, const char* exp) const ; // Select using a SQL-esce criteria hzEcode Delete (uint32_t objId) ; // Standard delete operation, fails only if the stated object id does not exist hzEcode Clear (void) ; // empty the Cache hzEcode GetBinary (hzChain& Z, const hdbMember* pMbr, uint32_t datumId) const ; // Loads the actual value of a BINARY member (not loaded by Fetch())
// Obtain member specific repositories (if any apply) const hdbObjRepos* ObjRepos (const hdbMember* pMbr) const ; const hdbBinRepos* BinRepos (void) const { return m_pBR_Datum ; }
// Obtain info about class of objects being stored const hdbClass* Class (void) const { return m_pClass ; } const hdbMember* GetMember (const hzString name) { return m_pClass ? m_pClass->GetMember(name) : 0 ; }
// Single object lookup. bool Exists (uint32_t objId) ; // Check existance of object by object id hzEcode Exists (uint32_t& objId, const hdbMember* pMbr, const hzAtom& val) ; // Lookup object by member and value. This expects only one object.
// General Get funtions hdbIniStat InitState (void) const { return m_eReposInit ; } bool IsInit (void) const { return m_eReposInit >= HDB_CLASS_INIT_DONE ? true : false ; } hzString strName (void) const { return m_Name ; } const char* txtName (void) const { return *m_Name ; } const char* Classname (void) const { return m_pClass ? m_pClass->txtName() : 0 ; } uint32_t DeltaId (void) const { return m_DeltaId ; } uint32_t Count (void) const { return m_nPopulation ; } uint32_t SeqId (void) const { return m_nSeqId ; }
void DescRepos (hzChain& Z, uint32_t nIndent) const ; } ;
/* ** SECTION 4: Indexation */
#define ISAM_CACHE_OFF 0 // Don't cache #define ISAM_CACHE_MIN 1 // Cache only indicator keys in lower blocks #define ISAM_CACHE_IDX 2 // Cache lower blocks (all higher block indicators) #define ISAM_CACHE_ALL 3 // Cache all blocks
enum hdbIdxtype { // Category: Index // // This is returned by the Whatami functions of the hdbIndex class family
HZINDEX_NULL, // No index HZINDEX_ENUM, // Index is hdbIndexEnum HZINDEX_UKEY, // Index is hdbIndexUkey HZINDEX_TEXT, // Index is hdbIndexText } ;
enum hzSqlOp { // Category: Index // // Universal SQL operators (Applies to all member types)
HZSQL_EQUAL, // Member equal to an operand
// Arithmetic SQL operators (Applies to dates and numeric members) HZSQL_LT, // Member less than operand HZSQL_GT, // Member greater than operand HZSQL_LTEQ, // Member less than or equal to operand HZSQL_GTEQ, // Member greater than or equal to operand HZSQL_BETWEEN, // Member is greater than lower operand, less than higher operand HZSQL_RANGE, // Member is >= lower operand, <= higher operand
// String SQL operators HZSQL_CONTAINS, // Member contains operand } ;
class hdbIsamfile { // Category: Index // // hdbIsamfile is a memory assisted disk based ordered collection class, designed to store large volumes of data either as a 1:1 or 1:many key-object map or as a key-only set. // Both keys and objects are strings and are limited to 256 bytes. The collection is ordered by lexical value. It is anticipated that in most cases, both keys and objects will // be small single values such as domain names. However, the strings can also be serialized objects. Epistula for example, uses a hdbIsamfile to store short form messages. // // The keys or key-object pairs are held within a single large data file, in logical data blocks formed of a variable number of disk blocks. The index is a memory resident map // which maps the LOWEST key from from each logical block to the logical block address, this being the position of its first disk block in the data file. The index is rendered // persistent by means of a separate index file, which is operated on an always-append basis. The index file is completely read in during initialization. // // With an SSD, there is no concern regarding read operations but write operations must be minimized to conserve the life of the device. With a standard hard disk all I/O is a // performance issue. Either way, write operations in particular must be as few as possible. Clearly retrieval of an item (key or key-object pair), will read at least one disk // block unless some form of cache is deployed and the item of interest happens to be in it. It is also clear that until an inserted item is committed to disk, it can be lost // in the event of a program crash. However as files are buffered, not all write operations are 'hard'. Data is only written to a disk block when the buffer pointer equates to // a block boundary OR a seek operation redeploys the buffer to another block, necessitating the saving of the current buffer content to the current disk block. If all inserts // were written to the end of the data file, as the items are small most would not result in a hard write. As the whole point of an ISAM is that items are in key order in the // data file, the advantage of file buffering is lost. // // To overcome this difficulty, hdbIsamfile has an optional auxillary memory resident map for pending items which is backed up to file that is always appeneded. This auxillary // will map items to the logical block addresses where they would be inserted. Then at a suitable point, the pending items are which directly maps keys .... //
hzMapM<hzString,uint32_t> m_Index ; // Operaional map
std::ofstream m_WrI ; // Index file output stream for Insert/Update std::ofstream m_WrD ; // Data file output stream for Insert/Update std::ifstream m_RdD ; // Data file input stream for Fetch
hzString m_Workdir ; // Working directory hzString m_fileDelta ; // Name of delta file hzString m_fileStore ; // Name of data file hzString m_Name ; // Basename uint32_t m_nElements ; // Total number of elements (keys or key-object pairs) uint32_t m_nBlocks ; // Number of file system blocks in data file uint16_t m_nKeyLimit ; // Max size of key (max 256 bytes) uint16_t m_nObjLimit ; // Max size of object (max 256 bytes) uint16_t m_nBlkSize ; // Logical data block size uint16_t m_nInitState ; // Initialization state char m_Buf[HZ_BLOCKSIZE] ; // Operational buffer
public: hzChain m_Error ; // Error report hzEcode m_Cond ; // Error condition
hdbIsamfile (void) ; ~hdbIsamfile (void) ;
hzEcode Init (hdbADP& adp, const hzString& workdir, const hzString& name, uint32_t keySize, uint32_t objSize, uint32_t blkSize=4) ; hzEcode Open (void) ; hzEcode Close (void) ;
hzEcode Insert (const hzString& key, const hzString& obj) ; bool Exists (const hzString& key) ; hzEcode Fetch (hzArray<hzPair>& result, const hzString& keyA, const hzString& keyB) ; hzEcode Delete (const hzString& key) ; } ;
class hdbIndex { // Category: Index // // The hdbIndex pure virtual base class exists only to unify the different type of indexes.
#if 0 union _idx_set { hzMapS <uint32_t,hdbIdset>* m_Maps ; // For Enums, 16-bit and 8-bit values only. These values have a narrow range so each value is likely to occur in many objects. // Hence each value is represented by a bitmap to hold the object id of objects that hold the value.
hzMapS <hzString,hdbIdset>* m_Keys ; // Free text index. Map of words to list of objects (records) containing the words hzMapS <hzString,uint32_t>* pStr ; // String index hzMapS <uint64_t,uint32_t>* pLu ; // 64-but index hzMapS <uint32_t,uint32_t>* pSu ; // 32-bit index } ; #endif
protected: //const hdbObjRepos* m_pRepos ; // Repository //const hdbMember* m_pMbr ; // Class member
hzString m_Name ; // Name of index (usually that of the applicable class member)
hdbIndex() {}
public: virtual ~hdbIndex (void) {}
virtual hdbIdxtype Whatami (void) = 0 ; } ;
class hdbIndexEnum : public hdbIndex { // Category: Index // // hdbIndexEnum applies specifically to ENUM data class members and is currently the only indexation method available for such members. It consists of a 1:1 map of ENUM values // to Idsets. // // Member values are usually set in HTML forms, where the ENUM is manifest as a set of selector options, a set of radio buttons or a set of check-boxes. In this scenario, ENUM // populations are generally small. This not always the case however. The maximum posible population is set to 65,355 values. ENUM members either store a single ENUM value in // which case the value will occupy 1 or 2 bytes in the repository, OR it will store multiple ENUM values in encoded form.
hzMapS <uint32_t,hdbIdset*> m_Maps ; // Map of keys to lists of objects matching the key (held as bitmaps)
// Prevent copy construction hdbIndexEnum (hdbIndexEnum& op) {}
// Prevent direct copies hdbIndexEnum& operator= (hdbIndexEnum& op) { return *this ; }
public: hdbIndexEnum (void) { }
~hdbIndexEnum (void) { Halt() ; }
void Halt (void) ; hzEcode Insert (uint32_t nObjectId, const hzAtom& eVal) ; hzEcode Delete (uint32_t nObjectId, const hzAtom& eVal) ; hzEcode Select (hdbIdset& Result, const hzAtom& eVal) ;
// Diagnostics hzEcode Dump (const hzString& cpFilename, bool bFull) ;
// Get functions uint32_t Count (void) { return m_Maps.Count() ; } hdbIdxtype Whatami (void) { return HZINDEX_ENUM ; } } ;
class hdbIndexUkey : public hdbIndex { // Category: Index // // hdbIndexUkey (unique key index), ensures objects in a repository are unique on the member to which the index applies. hdbIndexUkey can be applied to members with any string // like or numeric data type with a maximum population of 1. hdbIndexUkey is implimented as direct, 1:1 mappings between values and object ids. Thus, for every value found in // the index, there will be a single object in the repository that will have that value for the applicable data class member. hdbIndexUkey is generally applied to inherently // unique data, such as usernames and email addresses. // // Note that as hdbIndexUkey can be applied to members with either string like or numeric data types, the mapping regime must be able to cope with this. By // means of a union of map pointers, the maping can be between a key of string or a 64-bit or 32-bit number - and the 32-bit object id.
union _ptrset { // Covers all data types applicable to unique key indexes
//hzMapS <hzString,uint32_t>* pStr ; // String index (not currently used) //hzMapS <uint64_t,uint32_t>* pLu ; // 64-but index //hzMapS <uint32_t,uint32_t>* pSu ; // 32-bit index
hzMapS <hzMD5,uint32_t>* pMd5 ; hzMapS <hzString,uint32_t>* pStr ; hzMapS <hzDomain,uint32_t>* pDom ; hzMapS <hzEmaddr,uint32_t>* pEma ; hzMapS <hzUrl,uint32_t>* pUrl ; hzMapS <hzIpaddr,uint32_t>* pIpa ; hzMapS <hzTime,uint32_t>* pTime ; hzMapS <hzSDate,uint32_t>* pSD ; hzMapS <hzXDate,uint32_t>* pXD ; hzMapS <int64_t,uint32_t>* pSI64 ; hzMapS <uint64_t,uint32_t>* pUI64 ; hzMapS <int32_t,uint32_t>* pSI32 ; hzMapS <uint32_t,uint32_t>* pUI32 ; } ;
_ptrset m_keys ; // The key hdbBasetype m_eBasetype ; // The type bool m_bInit ; // Init state
public: hdbIndexUkey (void) { m_keys.pStr = 0 ; m_eBasetype = BASETYPE_UNDEF ; m_bInit = false ; }
~hdbIndexUkey (void) { }
hzEcode Init (const hdbObjRepos* pRepos, const hzString& mbrName, hdbBasetype eType) ; void Halt (void) ;
hzEcode Insert (const hzAtom& A, uint32_t objId) ; hzEcode Delete (const hzAtom& A) ; hzEcode Select (uint32_t& objId, const hzAtom& key) ;
hdbBasetype Basetype (void) { return m_eBasetype ; } hdbIdxtype Whatami (void) { return HZINDEX_UKEY ; } } ;
class hdbIndexText : public hdbIndex { // Category: Index // // hdbIndexText is a free text index applicable to members of BASETYPE_TEXT and BASETYPE_TXTDOC. Free text indexation is particularly profigate, as there is an idset for every // word occuring at least once in any document. Document collections do not need to be very extensive for the word count to reach a good proportion of all the words in a given // language, and of course, the collection may span multiple languages. A count of a million or so unique words, would not be unusual, and that is a lot of idsets!
hzMapS <hzString,hdbIdset> m_Keys ; // Map of words to list of objects (records) containing the words
public: hzEcode Init (const hzString& name, const hzString& opdir, const hzString& backup, uint32_t cacheMode) ; hzEcode Halt (void) ; hzEcode Insert (const hzString& Word, uint32_t docId) ; hzEcode Delete (const hzString& Word, uint32_t docId) ; hzEcode Clear (void) ; hzEcode Select (hdbIdset& Result, const hzString& Word) ; hzEcode Eval (hdbIdset& Result, const hzString& Criteria) ;
uint32_t Count (void) { return m_Keys.Count() ; }
// Diagnostics hzEcode Export (const hzString& filepath, bool bFull = true) ;
hdbIdxtype Whatami (void) { return HZINDEX_TEXT ; } } ;
/* ** External variables for database package */
extern hzSet <hzString> _hzGlobal_setStrings ; // Global string repository
// Group 1 Datatyeps: C++ Fundamentals extern const hdbCpptype* datatype_DOUBLE ; // 64 bit floating point value extern const hdbCpptype* datatype_INT64 ; // 64-bit Signed integer extern const hdbCpptype* datatype_INT32 ; // 32-bit Signed integer extern const hdbCpptype* datatype_INT16 ; // 16-bit Signed integer extern const hdbCpptype* datatype_BYTE ; // 8-bit Signed integer extern const hdbCpptype* datatype_UINT64 ; // 64-bit Positive integer extern const hdbCpptype* datatype_UINT32 ; // 32-bit Positive integer extern const hdbCpptype* datatype_UINT16 ; // 16-bit Positive integer extern const hdbCpptype* datatype_UBYTE ; // 8-bit Positive integer extern const hdbCpptype* datatype_BOOL ; // either true or false
// Group 2 Datatypes: HadronZoo Defined types (fixed size) extern const hdbHzotype* datatype_DOMAIN ; // Internet Domain extern const hdbHzotype* datatype_EMADDR ; // Email Address extern const hdbHzotype* datatype_URL ; // Universal Resource Locator extern const hdbHzotype* datatype_PHONE ; // Phone number (limited aphabet, standard form, likely to be unique to data object) extern const hdbHzotype* datatype_IPADDR ; // IP Address extern const hdbHzotype* datatype_TIME ; // No of seconds since midnight (4 bytes) extern const hdbHzotype* datatype_SDATE ; // No of days since Jan 1st year 0000 extern const hdbHzotype* datatype_XDATE ; // Full date & time extern const hdbHzotype* datatype_STRING ; // Any string, treated as a single value extern const hdbHzotype* datatype_TEXT ; // Any string, treated as a series of words, stored on disk, frequent change extern const hdbHzotype* datatype_BINARY ; // File assummed to be un-indexable (eg image). Stored on disk, infrequent change. extern const hdbHzotype* datatype_TXTDOC ; // Document from which text can be extracted/indexed. Stored on disk, infrequent change.
/* ** Prototypes */
// hzEcode InitDatabase (const hzString& stringsFile = 0) ; // hzEcode ExportADP (void) ;
#endif // hzDatabase_h