// // File: hzDocXml.cpp // // Legal Notice: This file is part of the HadronZoo C++ Class Library. Copyright 2025 HadronZoo Project (http://www.hadronzoo.com) // // The HadronZoo C++ Class Library is free software: You can redistribute it, and/or modify it under the terms of the GNU Lesser General Public License, as published by the Free // Software Foundation, either version 3 of the License, or any later version. // // The HadronZoo C++ Class Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR // A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public License along with the HadronZoo C++ Class Library. If not, see http://www.gnu.org/licenses. //
// // Generic XML parsing //
#include <fstream>
#include <sys/stat.h>
#include "hzChars.h" #include "hzChain.h" #include "hzDirectory.h" #include "hzDocument.h" #include "hzTextproc.h" #include "hzProcess.h"
using namespace std ;
hzDocXml::hzDocXml (void) { m_pRoot = 0 ; m_FileEpoch = 0 ; m_bXmlesce = 0 ; _hzGlobal_Memstats.m_numDocxml++ ; }
hzDocXml::~hzDocXml (void) { Clear() ; _hzGlobal_Memstats.m_numDocxml-- ; }
int32_t hzDocXml::_proctagopen (hzXmlNode** ppChild, hzXmlNode* pParent, chIter& ci) { // Determine if the current char (in the working chain) amounts to the start of an XML tag instance (node). If is is then a new // node is allocated and the pointer to this returned to the caller (hzDocXml::Load()) via the first argument. If the current // char in the chain does not amount to the start of the tag .. // // Arguments: 1) ppChild Populated with a pointer for a new node if allocatedi (we are at an opening tag) // 2) pParent The pointer to the current node (which will be new node's parent) // 3) ci The chain iterator being processed // // Returns: -1 We are not at an opening tag or the format is errant // 0 If the tag is valid and left open. The iterator will be advanced to the closing '>' char. // 1 If the tag is valid but self closed. The iterator will be advanced to the closing '>' char.
_hzfunc("_proctagopen (XML tags)") ;
hzChain W ; // For building tokens chIter zi ; // For iterating tag chIter xi ; // Reference iterator hzXmlNode* pNewnode ; // Pointer to node //hzNumPair attr ; // Attribute name/value pair hzPair attr ; // Attribute name/value pair hzString Name ; // Tag/node or attr name hzString Value ; // Attr value hzString S ; // Temp string uint32_t nCol ; // Column bool bOpen = false ; // Opening tag indicator bool bXE = false ; // XMLesce mode
if (!ppChild) Fatal("(Non app fn): No child node recepticle\n") ; *ppChild = 0 ;
if (*ci != '<') return -1 ; nCol = ci.Col() ; zi = ci ; zi++ ;
// Get tag name which must start with an alphanum but then may contain colon, minus or uscore if (*zi <= CHAR_SPACE) return -1 ; if (!IsAlphanum(*zi)) return -1 ;
for (xi = zi ;; xi++) { if (IsAlphanum(*xi) || *xi == CHAR_COLON || *xi == CHAR_MINUS || *xi == CHAR_USCORE) W.AddByte(*xi) ; else break ; }
Name = W ; W.Clear() ; zi = xi ;
// Now we have the name we can see if the tag is on the m_Xmlesce list. If it is we set a flag. Later this flag will be used to // suppress the formation of subnodes where tags which could be legal HTML tags are encountered.
if (m_Xmlesce.Count()) { if (m_Xmlesce.Exists(Name)) bXE = true ; }
/* ** If at tag end */
if (*zi == '>') { pNewnode = new hzXmlNode() ; pNewnode = pNewnode->Init(this, pParent, Name, zi.Line(), nCol, bXE) ;
ci = zi ; *ppChild = pNewnode ; if (pParent) pParent->AddNode(pNewnode) ; return 0 ; }
if (zi == " />") zi++ ; if (zi == "/>") { // Self closing tag without any parameters (null tags, legal though, eg <br/>) pNewnode = new hzXmlNode() ; pNewnode = pNewnode->Init(this, pParent, Name, zi.Line(), nCol, bXE) ;
zi++ ; ci = zi ; *ppChild = pNewnode ; if (pParent) pParent->AddNode(pNewnode) ; return 1 ; }
/* ** Get tag params if any */
// If no whitespace there are no params and tag is not a tag if (!IsWhite(*zi)) { threadLog("Line %d: tag %s expected whitespace before attr list\n", zi.Line(), *Name) ; return -1 ; }
// Allocate node for tag pNewnode = new hzXmlNode() ; pNewnode = pNewnode->Init(this, pParent, Name, zi.Line(), nCol, bXE) ;
zi.Skipwhite() ; for (;;) { // Get param name (note that these can contain colons as well as a-z, A-Z and 0-9 for (xi = zi ; !xi.eof() ; xi++) { if (IsAlphanum(*xi) || *xi == CHAR_COLON || *xi == CHAR_PERIOD || *xi == CHAR_MINUS || *xi == CHAR_USCORE) W.AddByte(*xi) ; else break ; }
Name = W ; W.Clear() ;
if (*xi != CHAR_EQUAL) { threadLog("Line %d: param name (%s) not followed by a '='\n", zi.Line(), *Name) ; return -1 ; }
zi = xi ; zi++ ;
// Get param value if (*zi == CHAR_DQUOTE) { zi++ ; for (xi = zi ; *xi && *xi != CHAR_DQUOTE ; xi++) W.AddByte(*xi) ;
if (*xi != CHAR_DQUOTE) { threadLog("Line %d: unmatched double-quote\n", zi.Line()) ; return -1 ; }
Value = W ; W.Clear() ; zi = xi ; zi++ ; } else { if (!IsAlphanum(*zi)) { threadLog("Line %d: Non-alphanumeric parameter value\n", zi.Line()) ; return -1 ; }
for (xi = zi ; !xi.eof() && IsAlphanum(*xi) ; xi++) W.AddByte(*xi) ;
Value = W ; W.Clear() ; zi = xi ; }
if (!m_Dict.Exists(Name)) m_Dict.Insert(Name) ; if (!m_Dict.Exists(Value)) m_Dict.Insert(Value) ;
attr.name = m_Dict[Name] ; attr.value = m_Dict[Value] ; m_NodeAttrs.Insert(pNewnode->GetUid(), attr) ;
// Check chars. At this point we could have "/>", ">", " />", " >" or whitespace followed by another param
zi.Skipwhite() ;
if (zi == "/>") { zi++ ; break ; } if (zi == '>') { bOpen = true ; break ; }
if (!IsAlphanum(*zi)) { threadLog("Line %d: Illegal char (%c,%u) in tag (name=%s, value=%s)\n", zi.Line(), *zi, *zi, *Name, *Value) ; return -1 ; } }
// if (attributes.Count()) // pNewnode->_setnodeattrs(attributes) ;
if (bOpen) *ppChild = pNewnode ; if (pParent) pParent->AddNode(pNewnode) ;
ci = zi ; return bOpen ? 0:1 ; }
bool _istagclose (hzString& S, hzChain::Iter& ci) { // Determine if the current location in the string corresponds to the end of an XML tag. // // Arguments: 1) S A hzString reference for the tagname // 2) ci The chain iterator into the XML source // // Returns: True If the XML pointer is at the end of the tag // False Otherwise.
hzChain W ; // For building tokens chIter zi ; // For iterating tag
zi = ci ; if (zi != "</") return false ;
for (zi += 2 ; IsAlphanum(*zi) || *zi == CHAR_COLON || *zi == CHAR_MINUS || *zi == CHAR_USCORE ; zi++) W.AddByte(*zi) ;
if (*zi != CHAR_MORE) return false ;
ci += 2 ; S = W ; ci = zi ; return true ; }
/* ** hzAttrset members */
hzAttrset& hzAttrset::operator= (hzHtmElem* pNode) { _hzfunc("hzAttrset::operator=(htm)") ;
m_NodeUid = pNode->GetUid() ;
if (pNode) m_pHostDoc = pNode->GetHostDoc() ;
if (!m_pHostDoc) { m_Current = m_Start = m_Final = -1 ; //m_Pair.m_A = m_Pair.m_B = 0 ; m_Pair.name = m_Pair.value = (char*) 0 ; } else { m_Current = m_Start = m_pHostDoc->m_NodeAttrs.First(pNode->GetUid()) ; if (m_Start >= 0) m_Final = m_pHostDoc->m_NodeAttrs.Last(pNode->GetUid()) ;
m_Pair = m_pHostDoc->m_NodeAttrs.GetObj(m_Current) ; }
return *this ; }
hzAttrset& hzAttrset::operator= (hzXmlNode* pNode) { _hzfunc("hzAttrset::operator=(xml)") ;
// Set to nothing (no attrs) m_Current = m_Start = m_Final = -1 ; //m_Pair.m_A = m_Pair.m_B = 0 ; m_Pair.name = m_Pair.value = (char*) 0 ;
m_NodeUid = pNode->GetUid() ;
if (pNode) { m_pHostDoc = pNode->GetHostDoc() ;
if (m_pHostDoc) { m_Current = m_Start = m_pHostDoc->m_NodeAttrs.First(pNode->GetUid()) ; if (m_Start >= 0) m_Final = m_pHostDoc->m_NodeAttrs.Last(pNode->GetUid()) ;
m_Pair = m_pHostDoc->m_NodeAttrs.GetObj(m_Current) ; } }
return *this ; }
void hzAttrset::Advance (void) { // Advance the iterator
if (m_Current == m_Final) //m_Pair.m_A = m_Pair.m_B = 0 ; m_Pair.name = m_Pair.value = (char*) 0 ; else { m_Current++ ; m_Pair = m_pHostDoc->m_NodeAttrs.GetObj(m_Current) ; } }
bool hzAttrset::NameEQ (const char* cstr) const { // Confirm or deny that the current attribute name equals the supplied null terminated string // // Argument: cstr The test name // // Returns: True If the supplied test string matches the name // false otherwise
if (!m_pHostDoc) return false ;
//return CstrCompare(m_pHostDoc->Xlate(m_Pair.m_A), cstr) == 0 ? true : false ; return m_Pair.name == cstr ? true : false ; }
bool hzAttrset::ValEQ (const char* cstr) const { // Confirm or deny that the current attribute value equals the supplied null terminated string // // Argument: cstr The test string // // Returns: True If the supplied test string matches the name // false otherwise
if (!m_pHostDoc) return false ;
//return CstrCompare(m_pHostDoc->Xlate(m_Pair.m_B), cstr) == 0 ? true : false ; return m_Pair.value == cstr ? true : false ; }
const char* hzAttrset::Name (void) const { // Return the attribute name
//if (!m_pHostDoc) return 0 ; //if (m_Pair.m_A < 1) return 0 ;
//return m_pHostDoc->Xlate(m_Pair.m_A) ; return *m_Pair.name ; }
const char* hzAttrset::Value (void) const { // Return the attribute name
//if (!m_pHostDoc) return 0 ; //if (m_Pair.m_B < 1) return 0 ;
//return m_pHostDoc->Xlate(m_Pair.m_B) ; return *m_Pair.value ; }
/* ** hzXmlNode members */
//hzXmlNode* hzXmlNode::Init (hzDocXml* pHostDoc, hzXmlNode* pParent, uint32_t snName, uint32_t nLineNo, uint32_t nCol, bool bXmlesce) hzXmlNode* hzXmlNode::Init (hzDocXml* pHostDoc, hzXmlNode* pParent, const hzString& name, uint32_t nLineNo, uint32_t nCol, bool bXmlesce) { // Initialize a hzXmlNode, insert it into the host document's array of nodes and return the in-situ pointer // // Arguments: 1) pHostDoc The host document // 2) pParent The parent node (to this) // 3) snName The string number for the node name // 4) nLineNo The config file line number // 5) nCol The config file column position // 6) bXmlesce Flag for XML-esce rules // // Returns: Pointer to node in-situ
_hzfunc("hzXmlNode::Init") ;
hzXmlNode* pInSitu ; // Final address in host document array of nodes
if (!pHostDoc) hzexit(E_ARGUMENT, "No host document supplied") ;
m_pHostDoc = pHostDoc ;
if (!pParent) { m_Parent = 0 ; m_nLevel = 0 ; } else { m_Parent = pParent->m_Uid ; m_nLevel = pParent->m_nLevel + 1 ; }
m_bXmlesce = bXmlesce ? 1 : 0 ;
//m_snName = snName ; m_Name = name ; m_nLine = m_nAnti = nLineNo ; m_nCol = nCol ;
m_Uid = pHostDoc->m_arrNodes.Count() + 1 ; pHostDoc->m_arrNodes.Add(*this) ;
pInSitu = pHostDoc->m_arrNodes.InSitu(m_Uid-1) ;
return pInSitu ; }
const char* hzXmlNode::txtName (void) const { _hzfunc("hzXmlNode::txtName") ;
if (!m_pHostDoc) hzexit(E_NOINIT, "Node has no host document") ;
//return m_pHostDoc->Xlate(m_snName) ; return *m_Name ; }
const char* hzXmlNode::txtPtxt (void) const { _hzfunc("hzXmlNode::txtPtxt") ;
if (!m_pHostDoc) hzexit(E_NOINIT, "Node has no host document") ;
//return m_pHostDoc->Xlate(m_snPtxt) ; return *m_Ptxt ; }
bool hzXmlNode::NameEQ (const char* testval) const { _hzfunc("hzXmlNode::NameEQ") ;
//const char* i ; // Node name
if (!m_pHostDoc) hzexit(E_NOINIT, "Node has no host document") ;
//i = m_pHostDoc->Xlate(m_snName) ; //i = *m_Name ; if (!m_Name) hzexit(E_NOINIT, "Node has no tagname") ;
return m_Name == testval ? true : false ; //return CstrCompare(i, testval) == 0 ? true : false ;
//return !strcmp(i, testval) ? true : false ; }
#if 0 const char* hzXmlNode::Xlate (uint32_t strNo) const { _hzfunc("hzXmlNode::Xlate") ;
if (!this) hzexit(E_NOINIT, "No Node instance") ; if (!m_pHostDoc) hzexit(E_NOINIT, "Node has no host document") ;
return m_pHostDoc->Xlate(strNo) ; } #endif
hzXmlNode* hzXmlNode::GetFirstChild (void) const { _hzfunc("hzXmlNode::GetFirstChild") ;
if (!m_pHostDoc) hzexit(E_NOINIT, "Node has no host document") ;
if (!m_Children) return 0 ;
return m_pHostDoc->m_arrNodes.InSitu(m_Children-1) ; }
hzXmlNode* hzXmlNode::Sibling (void) const { _hzfunc("hzXmlNode::Sibling") ;
if (!m_pHostDoc) hzexit(E_NOINIT, "Node has no host document") ;
if (!m_Sibling) return 0 ;
return m_pHostDoc->m_arrNodes.InSitu(m_Sibling-1) ; }
hzXmlNode* hzXmlNode::Parent (void) const { _hzfunc("hzXmlNode::Parent") ;
if (!m_pHostDoc) hzexit(E_NOINIT, "Node has no host document") ;
if (!m_Parent) return 0 ;
return m_pHostDoc->m_arrNodes.InSitu(m_Parent-1) ; }
hzEcode hzXmlNode::AddNode (hzXmlNode* pNode) { // Add the supplied node to the calling node's list of child node. // // This is achieved by either making m_pChildren equal to the supplied node (where the calling node does not yet have chidren) or by // seeking to the end of the list and then appending (each of the existing children will have its m_pSibling pointer set to the next // child). // // Arguments: 1) pNode The node to be added to this node's list of child nodes // // Returns: E_ARGUMENT If no node is supplied // E_DUPLICATE If the supplied node is this node // E_OK If the node is added as a child
_hzfunc("hzXmlNode::AddNode") ;
hzXmlNode* tmp ; // XML node pointer //uint32_t nodeNo ; // Node number
if (!pNode) return hzerr(E_ARGUMENT, "Attempt to add a null node to %s", txtName()) ; if (pNode == this) return hzerr(E_DUPLICATE, "Attempt to add a node (%d) to itself (%d, %s)", pNode->m_Uid, m_Uid, txtName()) ;
if (!m_pHostDoc) hzexit(E_NOINIT, "Node has no host document") ;
m_pHostDoc->m_NodesPar.Insert(m_Uid, pNode->m_Uid) ;
if (!m_Children) m_Children = pNode->GetUid() ; else { for (tmp = GetFirstChild() ; tmp->m_Sibling ; tmp = tmp->Sibling()) { if (pNode == tmp) return hzerr(E_DUPLICATE, "Attempt to add an already existing node to %s", txtName()) ; } // for (nodeNo = m_Children ; nodeNo ; nodeNo = tmp->m_Sibling) // { // tmp = m_pHostDoc->m_arrNodes.InSitu(nodeNo) ; // if (!tmp->m_Sibling) // break ; // if (pNode == tmp) // return hzerr(E_DUPLICATE, "Attempt to add an already existing node to %s", txtName()) ; // } tmp->m_Sibling = pNode->m_Uid ; }
return E_OK ; }
hzEcode hzXmlNode::SetPretext (hzChain& Z) { // Set the XML node's pretext value to that of the supplied chain. // // Arguments: 1) Z The hzChain containing the pretext value // // Returns: E_OVERFLOW If the chain content exceeds the maximum allowed size for a hzString // E_OK If the operation is successful
_hzfunc("hzXmlNode::SetPretext") ;
hzString P ; // Pretext value hzEcode rc = E_OK ; // Return code
if (!m_pHostDoc) hzexit(E_NOINIT, "Node has no host document") ;
if (Z.Size() > HZSTRING_MAXLEN) { P = "overflow" ; rc = E_OVERFLOW ; } else P = Z ;
if (!m_pHostDoc->m_Dict.Exists(P)) m_pHostDoc->m_Dict.Insert(P) ;
m_Ptxt = m_pHostDoc->m_Dict[P] ; return rc ; }
void hzXmlNode::SetContent (hzChain& Z) { // SetContent is called by hzDocXml::Load() once the node content has been established by encountering the anti-tag. Technically, the content of a node is // the opening tag, the anti-tag and absoultely everything in-between. That is not how HadronZoo chooses to see it. The content does not include eiher the // tag or anti-tag and text content is stipped of leading and trailing whitespace. This unifies the approach between single and multi-line tags. // // Argument: Z The chain containing the content // // Returns: None
_hzfunc("hzXmlNode::SetContent") ;
hzChain X ; // Result chain chIter zi ; // Input chain iterator chIter xi ; // Input chain forward iterator
// Strip leading whitespace //for (zi = Z ; !zi.eof() && *zi <= CHAR_SPACE ; zi++) ;
// Strip leading newlines and tabs from tag content - but not spaces for (zi = Z ; !zi.eof() && *zi < CHAR_SPACE ; zi++) ;
for (; !zi.eof() ; zi++) { if (*zi == CHAR_NL) { // Run to end of whitespace for (xi = zi, xi++ ; !xi.eof() && *xi <= CHAR_SPACE ; xi++) ;
// Terminate if no more non-whitespace chars if (xi.eof()) break ; } X.AddByte(*zi) ; }
// if (X.Size() > HZSTRING_MAXLEN) // { m_fixContent.Clear() ; m_tmpContent = X ; } // else // { m_tmpContent.Clear() ; m_fixContent = X ; }
m_fixContent = X ; }
void hzXmlNode::SetCDATA (hzChain& Z) { // Set content supplied as CDATA // // Argument: Z The chain containing the content // // Returns: None
m_fixContent = Z ; }
hzXmlNode* hzXmlNode::_findsubnode (bool& bMatch, const hzString& name, const hzString& attr, const hzString& value) { // Test if this node has the correct name and if not, attemp to find a subnode that does. If no match on name return NULL, else return the node. Once a node matching the name // has been found, it is tested for any supplied attr and value criteria. If these match (or are not supplied), bMatch is set true. In all other scenarios bMatch is set false. // // Arguments: 1) bMatch Set to indicate if result is a complete match // 2) name Required name // 3) attr Required attribute names (if any) // 4) value Required value (if any) // // Returns: Pointer to subnode
hzAttrset ai ; // Attribute iterator hzXmlNode* result ; // Subnode found hzXmlNode* pN ; // XML node pointer //const char* anam ; // Converted attribute name //const char* aval ; // Converted attribute value
bMatch = false ; result = 0 ;
if (name == txtName()) { // Node found result = this ;
ai = this ; if (ai.Valid()) { // Check for supplied attribute and test for (; ai.Valid() ; ai.Advance()) { //anam = ai.Name() ; aval = ai.Value() ;
//if (attr == anam) if (attr == ai.Name()) { if (!value) { bMatch = true ; break ; }
//if (value == aval) if (value == ai.Value()) { bMatch = true ; break ; } } } } else { if (!value || m_fixContent == value) bMatch = true ; }
if (bMatch) return result ; }
// We are not on the required node so call this function on all subnodes
result = 0 ; for (pN = GetFirstChild() ; pN ; pN = pN->Sibling()) { result = pN->_findsubnode(bMatch, name, attr, value) ;
if (bMatch) break ; }
return result ; }
uint32_t hzXmlNode::_testnode (hzVect<hzXmlNode*>& tmpResult, const char* srchExp, uint32_t& nLimit) { // Support funcion for FindSubnodes() // // Split up first part of criteria (up to first period or null terminator), to a node/tag name and if present, a content speciifer // (="some_value"), an attribute name (->"attr_name") an attribute content specifer. // // We now apply the test to the current node and when required, to the children. We do not operate where nodes are at a higher // level than the limit. This is because the FindSubnodes function is looking for the set of nodes matching the criteria that are // found at the lowest level // // Arguments: 1) result A reference to a vector of node pointers that will be populated by this operation // 2) srchExp The search expression that decendent nodes of this node, must match to be included in the result // 3) nLimit Limit the number of levels to decend // // Returns: Total subnodes found
hzAttrset ai ; // Attribute iterator hzXmlNode* pNode ; // Node to be returned const char* i ; // Criterion iterator const char* j ; // Criterion iterator const char* anam ; // Attribute name const char* aval ; // Attribute value const char* cpNext = 0 ; // Next part of search expression if present hzString nname ; // Required name of node hzString pname ; // Required name of node parameter (attribute) hzString nvalue ; // Required value of node hzString pvalue ; // Required value of parameter uint32_t nSize ; // Size of string uint32_t nTotal ; // Total nodes found matching criteria uint32_t nA ; // Attribute iterator bool bFound ; // Does this node pass this part of criteria?
// If we are already at too high a level, return if (nLimit && (m_nLevel > nLimit)) return 0 ;
/* ** Derive the required node name and if applicable, node value, attribute name and attribute value, from the supplied criteria. */
// Get required name of node for (nSize = 0, i = j = srchExp ; *i ; nSize++, i++) { if (*i == CHAR_PERIOD) { i++ ; cpNext = i ; break ; } if (*i == CHAR_EQUAL || (i[0] == CHAR_MINUS && i[1] == CHAR_MORE)) break ; }
nname.SetValue(j, nSize) ;
// Get required value of node if applicable if (*i == CHAR_EQUAL) { // We are setting a value requirement for the node for (nSize = 0, i += 2, j = i ; *i != CHAR_DQUOTE ; nSize++, i++) ; i++ ;
if (*i == CHAR_PERIOD) { i++ ; cpNext = i ; }
nvalue.SetValue(j, nSize) ; }
// Get required name of node parameter if applicable if (i[0] == CHAR_MINUS && i[1] == CHAR_MORE) { for (nSize = 0, i += 2, j = i ; *i ; nSize++, i++) { if (*i == CHAR_PERIOD) { i++ ; cpNext = i ; break ; } if (*i == CHAR_EQUAL) break ; }
pname.SetValue(j, nSize) ;
// Get required value of parameter if applicable if (*i == CHAR_EQUAL) { // We are setting a value requirement for the node for (nSize = 0, i += 2, j = i ; *i != CHAR_SQUOTE ; nSize++, i++) ; i++ ;
if (*i == CHAR_PERIOD) { i++ ; cpNext = i ; }
pvalue.SetValue(j, nSize) ; } }
// Now we have the first part of the criteria, we test to see if this node meets this. If it does we still have to establish if // the remainder of the criteria (if it exists) is satisfied.
bFound = false ;
//if (nname == m_pHostDoc->Xlate(m_snName)) if (nname == m_Name) { // We are on the specified node so if the value is not right, any named attribute does not exist or it does but with the // wrong value, we return a zero (to end the examination of this branch of nodes)
bFound = true ;
if (nvalue) { if (nvalue != m_fixContent) return 0 ; }
if (bFound && pname) { // See if we can find a parameter of pname on this node for (ai = this ; ai.Valid() ; ai.Advance()) { anam = ai.Name() ; aval = ai.Value() ;
if (pname == anam) { if (pvalue) { if (pvalue != aval) continue ; } break ; } }
if (nA == m_nAttrs) return 0 ; } }
if (bFound) { // Now we have passed the first part of the criteria, we can add this node to the results if there is no furthur criteria. But // if there is, we have to establish if the remainder of the criteria is satisfied. This will nessesitate a recursive call of // this function for each and every child of this node with the criteria pointer advanced. Only if at least one of these calls // succeeds (returns a positive integer for nodes added to the result), can this call succeed.
if (!cpNext) { // if (plog) // plog->Out("\tMatched. Adding %s at level %d and position %d to array\n", *Lineage(), m_nLevel, tmpResult.Count()) ;
nLimit = m_nLevel ; tmpResult.Add(this) ; return 1 ; }
// Test children on the further criteria nTotal = 0 ; for (pNode = GetFirstChild() ; pNode ; pNode = pNode->Sibling()) { if (!pNode->IsAncestor(this)) Fatal("Case 2: Proported child fails to be ancestor of this\n") ;
if (nLimit && (pNode->m_nLevel > nLimit)) continue ;
nTotal += pNode->_testnode(tmpResult, cpNext, nLimit) ; } return nTotal ; }
// This node does not have the required name and so does not meet the first part of the criteria. However a child might meet the // criteria so we try each in turn.
nTotal = 0 ; for (pNode = GetFirstChild() ; pNode ; pNode = pNode->Sibling()) { if (!pNode->IsAncestor(this)) Fatal("Case 3: Proported child fails to be ancestor of this\n") ;
if (nLimit && (pNode->m_nLevel > nLimit)) continue ;
nTotal += pNode->_testnode(tmpResult, srchExp, nLimit) ; }
return nTotal ; }
void hzXmlNode::FindSubnodes (hzVect<hzXmlNode*>& result, const char* srchExp) { // From the current node (the node used to call this member function), find all sub-nodes matching the supplied criteria. // // Warning! This function has been known to cause a lot of confusion. // // This function does not simply locate nodes that are children of the calling node whose name matches the supplied criteria. The // aim is to locate descenant nodes, however far down the tree they are. // // Arguments: 1) result A reference to a vector of nodes that will be populated by this operation // 2) srchExp The search expression that decendent nodes of this node, must match to be included in the result // // Returns: None
_hzfunc("hzXmlNode::FindSubnodes") ;
uint32_t nLimit = 0 ; // Level limit
if (!m_pHostDoc) hzexit(E_NOINIT, "Node has no host document") ;
result.Clear() ; _testnode(result, srchExp, nLimit) ; }
hzXmlNode* hzXmlNode::FindSubnode (const char* srchExp) { // From the current node (the node used to call this member function), find all sub-nodes matching the supplied criteria. // // The critieria will be that required to uniquely identify a tag and optionally that required to specify a tag attribute and optionally that // required to specify a value for the tag or attribute. The convention is to use the :: symbol between tag levels where these are needed and // the -> symbol to name the tag attribute and the = symbol to specify a value. Criteria can thus be of the forms:- // // 1) tagname // 2) level0tagname...levelNtagname // 3) ....tagname="some value" // 4) ....tagname->attribute // 5) ....tagname->attribute="some value" // // The criteria must only serve to identify a single tag or tag attribute. All nodes matching matching this criteria are then compiled into the // supplied hzVect. Note that only the nodes will be placed in the vector and not the attributes. The application will have to use the hzXmlNode // member fuctions for accessing attributes and there values if these are required. // // Arguments: 1) srchExp The search expression that decendent nodes of this node, must match to be included in the result // // Returns: Pointer to subnode matching supplied criteria // NULL If no subnode matches
hzXmlNode* pResult ; // Node pointer char* cpBuf ; // Buffer for breaking up criteria const char* i ; // Char iterator const char* x ; // Char iterator char* j ; // Buffer populator hzString tagname ; // The part of the criteria needed to name the tag hzString attrname ; // The part of the criteria needed to name the tag attribute hzString value ; // The part of the criteria needed to specify node or node attribute values bool bMatch ; // Node has passed criteria
/* ** Create a temp buffer */
cpBuf = new char[strlen(srchExp) + 1] ;
/* ** We first check for criteria of the form tagname.tagname ..... ** If we do have this we have to recurse this function */
x = strchr(srchExp, CHAR_PERIOD) ; if (x) { i = srchExp ; j = cpBuf ;
for (; *i && *i != CHAR_PERIOD ;) *j++ = *i++ ; *j = 0 ; tagname = cpBuf ;
pResult = _findsubnode(bMatch, tagname, attrname, value) ; delete cpBuf ; if (pResult && bMatch) return pResult->FindSubnode(x + 1) ; return 0 ; }
/* ** Objain the tagname, any attribute name and value from the criteria */
i = srchExp ; j = cpBuf ;
for (; *i ; i++) { if (*i == CHAR_EQUAL || (*i == '-' && i[1] == '>')) break ; *j++ = *i ; } *j = 0 ; tagname = j = cpBuf ;
if (i[0] == '-' && i[1] == '>') { for (i += 2 ; *i ; i++) { if (*i == CHAR_EQUAL) break ; *j++ = *i ; } *j = 0 ; attrname = j = cpBuf ; }
if (*i == CHAR_EQUAL) { for (i++ ; *i && *i != CHAR_SQUOTE ; i++) ; for (i++ ; *i ; i++) { if (*i == CHAR_SQUOTE) break ; *j++ = *i ; } *j = 0 ; value = cpBuf ; }
delete cpBuf ;
/* ** Find the subnode by recursive search */
pResult = _findsubnode(bMatch, tagname, attrname, value) ;
if (pResult && bMatch) return pResult ; return 0 ; }
void hzXmlNode::Export_r (hzDocXml* pDoc, hzChain& Z, uint32_t& relLine) { // Export the combined value of this node to the supplied chain. Please note this is a recursive process so the chain is not cleared. Note also the caller // must provide a reference to a 32-bit unsigned value for tracking the relative line number. This number should be set to the line number of the original // node. // // This will incorporate the node name and attributes, the direct content of the node including subtags. // // This function was introduced for the purpose of establishing an MD5 value for the node. This is used in large XML config files in which many resources // are configured, to determine which resources have changed and so need to be reloaded. // // Arguments: 1) val The chain populated with this node's combined value // 2) relLine Relative line number // // Returns: None
_hzfunc("hzXmlNode::Export_r") ;
hzAttrset ai ; // Attribute iterator hzXmlNode* pSub ; // Subnodes const char* anam ; // Attribute name const char* aval ; // Attribute value uint32_t n ; // Level iterator hzEcode rc = E_OK ; // Return code
if (!pDoc) hzexit(E_CORRUPT, "No host document supplied") ;
// Write out the opening of the tag if (m_nLine > relLine) { Z.AddByte(CHAR_NL) ; for (n = m_nCol ; n >= 4 ; n -= 4) Z.AddByte(CHAR_TAB) ; relLine = m_nLine ; }
//name = pDoc->Xlate(m_snName) ; //Z.Printf("<%s", name) ; Z.Printf("<%s", *m_Name) ;
for (ai = this ; ai.Valid() ; ai.Advance()) { anam = ai.Name() ; aval = ai.Value() ;
Z.Printf(" %s=\"%s\"", anam, aval) ; }
//if (!m_Children && !m_tmpContent.Size() && !m_fixContent) if (!m_Children && !m_fixContent) { Z << "/>" ; return ; }
Z << ">" ;
// Visit child nodes if any if (m_Children) { for (pSub = GetFirstChild() ; rc == E_OK && pSub ; pSub = pSub->Sibling()) { Z << pSub->txtPtxt() ; pSub->Export_r(pDoc, Z, relLine) ; } }
// Then do content // if (m_tmpContent.Size() || m_fixContent) if (m_fixContent) { if (m_nAnti > m_nLine) { Z.AddByte(CHAR_NL) ; for (n = m_nCol ; n > 4 ; n -= 4) Z.AddByte(CHAR_TAB) ; relLine = m_nAnti ; }
// if (m_tmpContent.Size()) // Z << m_tmpContent ; // else Z << m_fixContent ; }
// Write out the closing of the tag if (m_nAnti > m_nLine) { Z.AddByte(CHAR_NL) ; for (n = m_nCol ; n >= 4 ; n -= 4) Z.AddByte(CHAR_TAB) ; relLine = m_nAnti ; } Z.Printf("</%s>", *m_Name) ; }
void hzXmlNode::Export (hzChain& Z) { // Export the combined value of this node to the supplied chain. This function is non-recursive allowing it to clear the chain at the outset. Because nodes // have child nodes, the process is recursive so this function calls Export_r to effect the export. // // This will incorporate the node name and attributes, the direct content of the node including subtags. // // Thsis function was introduced for the purpose of establishing an MD5 value for the node. This is used in large XML config files in which many resources // are configured, to determine which resources have changed and so need to be reloaded. // // Arguments: 1) val The chain populated with this node's combined value // 2) relLine Relative line number // // Returns: None
_hzfunc("hzXmlNode::Export") ;
uint32_t relLine ; // Line management
Z.Clear() ; relLine = m_nLine ;
Export_r(m_pHostDoc, Z, relLine) ; }
hzEcode hzXmlNode::SelectSubnodes (hzVect<hzXmlNode*>& result, hzMapM<hzString,hzXmlNode*>& allsubnodes, const char* criteria) { // Select from a map of subnodes (from a call to hzXmlNode::MapAllSubnodes) according to the supplied criteria. // // The criteria is of the form tagname...tagname->param=value in which the parameter value could be missing, both // the parameter and the value coule be missing and the tagname may be singular. The tagname (if multiple) will be // applied in reverse order. A node will be selected only if it's name matches the last tagname and it has a parent // whose name matches the last but one tagname and a grandparent whose name matches the last but two tagname and so // on until there are no more tagnames to apply. // // 1) tagname // 2) level0tagname...levelNtagname // 3) ....tagname="some value" // 4) ....tagname->attribute // 5) ....tagname->attribute="some value" // // The criteria must only serve to identify a single tag or tag attribute. All nodes matching // matching this criteria are then compiled into the supplied hzVect. Note that only the // nodes will be placed in the vector and not the attributes. The application will have to // use the hzXmlNode member fuctions for accessing attributes and there values if these are // required. // // Arguments: 1) result Vector of selected nodes // 2) subnodes Map of subnodes from which to select into result // 3) criteria Selection criteria // // Returns: E_NODATA If there are no subnodes // E_OK If subnodes are selected
_hzfunc("hzXmlNode::SelectSubnodes") ;
hzVect<hzString> ar ; // List of tagnames to be applied in reverse
hzXmlNode* pnode ; // Node pointer const char* i ; // Char iterator const char* j ; // Buffer populator hzString S ; // The partial tagname uint32_t nIndex ; // Iterator uint32_t nLo ; // Iterator to first matching node uint32_t nHi ; // Iterator to first matching node uint32_t nX ; // Iterator between first and last
result.Clear() ;
// Check input data if (!allsubnodes.Count()) return E_NODATA ;
// If no filter if (!criteria || !criteria[0]) { for (nIndex = 0 ; nIndex < allsubnodes.Count() ; nIndex++) { pnode = allsubnodes.GetObj(nIndex) ; result.Add(pnode) ; } return E_OK ; }
// Obtain the list of tagnames from the criteria. for (j = i = criteria ; *i ; i++) { if (i[0] == CHAR_PERIOD) { // Make a name string and continue S.SetValue(j, i) ; ar.Add(S) ; j = i + 1 ; continue ; }
if (i[0] == CHAR_EQUAL || (i[0] == CHAR_MINUS && i[1] == CHAR_MORE)) { // Make a name string and break S.SetValue(j, i) ; ar.Add(S) ; j = 0 ; break ; } } if (j) { S = j ; ar.Add(S) ; }
// Locate all nodes with name of last tagname // Apply list of tagnames in reverse for (nIndex = ar.Count() ; nIndex ; nIndex--) { S = ar[nIndex-1] ; }
nLo = allsubnodes.First(S) ; if (nLo < 0) return E_NODATA ; nHi = allsubnodes.Last(S) ;
for (nX = nLo ; nX <= nHi ; nX++) { pnode = allsubnodes.GetObj(nX) ; if (pnode->txtName() != S) continue ;
// We now have a node with name match on last tagname. If there are no more tagnames we add this to the list if (ar.Count() == 1) { result.Add(pnode) ; continue ; } }
return E_OK ; }
bool hzXmlNode::IsAncestor (hzXmlNode* candidate) { // Is the candidate node an ancestor of this node? // // Arguments: 1) candidate The XML node to be tested as an ancestor of this node // // Returns: True If the candidate node is an ancestor of this // False Otherwise
hzXmlNode* pNode ; // XML node pointer
// This node cannot have a non-ancestor. if (!candidate) return false ;
// If this node is at the same or lower level than the candidate then the candidate cannot be an ancestor of this node. if (m_nLevel <= candidate->m_nLevel) return false ;
// Starting at the this node's level we work back to the candidate node's level. for (pNode = this ; pNode->m_nLevel > candidate->m_nLevel ; pNode = pNode->Parent()) ;
// Now pNode is on the same level as the candidate. If the pNode and the candidate are the same node, the candidate is an ancestor // of this node.
return pNode == candidate ? true : false ; }
hzString hzXmlNode::Filename (void) const { // Category: Diagnostics // // This provides the name of the XML file used as the source of XML this XML node is a part. This is useful in diagnosing config errors as config // files can be spread accross more than one XML file (eg SiteServer configs). // // Arguments: None // Returns: Instance of hzString by value being document name
hzString x ; // Target string
if (m_pHostDoc) return m_pHostDoc->Filename() ; else return x ; }
const char* hzXmlNode::Fname (void) const { // Category: Diagnostics // // This provides the name of the XML file used as the source of XML this XML node is a part. This is useful in diagnosing config errors as config // files can be spread accross more than one XML file (eg SiteServer configs). // // Arguments: None // Returns: Filename of host document if this is known, 0 otherwise.
return m_pHostDoc ? m_pHostDoc->Fname() : 0 ; }
void hzDocXml::listnodes (void) { // Category: Diagnostics // // List all nodes in this XML document for diagnostic purposes // // Arguments: None // Returns: None
hzChain Z ; // Chain to build output hzAttrset ai ; // Attribute iterator hzLogger* pLog ; // Logger for the thread hzXmlNode* pNode ; // XML node const char* anam ; // Name derived from string number const char* aval ; // Attribute value uint32_t nN ; // XML Document node iterator
pLog = GetThreadLogger() ; if (!pLog) return ;
Z.Printf("LISTING %d NODES for file %s\n", m_arrNodes.Count(), *m_Filename) ;
for (nN = 0 ; nN < m_arrNodes.Count() ; nN++) { pNode = m_arrNodes.InSitu(nN) ;
Z.Printf("Parent %u ID %u Level %d Firstchild %u sibling %u Line %d", pNode->ParentId(), pNode->Uid(), pNode->Level(), pNode->FirstChildId(), pNode->SiblingId(), pNode->Line()) ;
ai = pNode ; if (!ai.Valid()) Z.Printf("<%s>\n", pNode->txtName()) ; else { Z.Printf("<%s ", pNode->txtName()) ; for (; ai.Valid() ; ai.Advance()) { anam = ai.Name() ; aval = ai.Value() ;
Z.Printf(" %s='%s'", anam, aval) ; } Z << ">\n" ; } }
Z.Printf("END LIST NODES\n") ;
pLog->Log(Z) ; }
hzXmlNode* hzDocXml::GetNode (uint32_t nodeId) const { // Find node by node id (position in document) // // Argument: nodeId // // Returns: Node pointer or NULL
if (!nodeId) return 0 ;
if (nodeId > m_arrNodes.Count()) return 0 ;
return m_arrNodes.InSitu(nodeId-1) ; }
bool _testHtag (hzString& tagval, hzChain::Iter& ci) { // Determine if we are at a HTML tag as opposed to a XML tag. This is important because under some circumstances, HTML tags can be legally form part of the // content of an XML tag. In such circumstances it is important incidence of HTML tags do not give rise to an XML node. // // The function tests if the supplied iterator is at the start of a HTML tag/antitag. If it is then the iterator is advanced to the end of the tag/antitag // and the tag/antitag (complete with any attributes) is returned as a string. If the test fails the iterator is not advanced and the returned string left // empty. // // Arguments: 1) tagval Reference to string populated in the even of a HTML tag // 2) ci Input chain iterator // // Returns: True If the iterator is at the start of a legal HTML tag // False Otherwise
hzChain W ; // For building complete tag chIter zi ; // Chain iterator hzHtagtype tt ; // HTML tag type char* i ; // For testing tag value hzString S ; // Possible HTML tag/antitag uint32_t len ; // For limiting test value char quote = 0 ; // Quote state (either single or double) char buf [20] ; // For compiling test value
tagval.Clear() ; zi = ci ;
if (zi.eof()) return false ; if (*zi != CHAR_LESS) return false ;
zi++ ; if (*zi == CHAR_FWSLASH) zi++ ;
for (i = buf, len = 0 ; len < 18 && !zi.eof() && IsAlpha(*zi) ; *i++ = *zi, len++, zi++) ; *i = 0 ; S = buf ;
tt = Txt2Tagtype(S) ; if (tt == HTAG_NULL) return false ;
// We do have a HTML tag so we need to populate tagval with the complete tag for (zi = ci ; !zi.eof() ; zi++) { W.AddByte(*zi) ;
if (quote) { if (*zi == quote) quote = 0 ; continue ; }
if (*zi == CHAR_MORE) break ;
if (*zi == CHAR_SQUOTE) quote = CHAR_SQUOTE ; if (*zi == CHAR_DQUOTE) quote = CHAR_DQUOTE ; }
if (*zi != CHAR_MORE) return false ;
tagval = W ; return true ; }
hzEcode hzDocXml::Load (hzChain& Z) { // Loads an XML document supplied as a chain into a tree of XML nodes // // Arguments: 1) Z The chain containing a full XML document // // Returns: E_FORMAT If this XML document does not conform to XML // E_OK If this XML document loaded successfully
_hzfunc("hzDocXml::Load") ;
hzList<hzString>::Iter exI ; // Excluded tags iterator if required
std::ifstream is ; // Input stream hzChain::Iter ci ; // Chain iterator hzChain::Iter xi ; // Chain iterator for inner loop
hzChain nodeContent ; // Chain for building node content hzXmlNode* pCN = 0 ; // Current XML node hzXmlNode* pNN ; // New XML node hzString Test ; // To test if current tag is being closed hzString tagval ; // Used by _ishtmltag() to test if we are currently at a HTML tag uint32_t tagstate ; // Type of tag (-1 error, 0 no tag, 1 open tag 2 closed tag) bool bNewline ; // True if we are at start of a line hzEcode rc = E_OK ; // Return code
m_Error.Clear() ;
// Can encounter chars with top bit set before the XML document gets going. Don't know why but if they occur, bypass them. ci = Z ; for (; *ci & 0x80 ; ci++) ;
// If the is an XML header, processes it if (ci == "<?xml") { // Skip to the doctype for (ci++ ; *ci != CHAR_MORE ; ci++) ; for (ci++ ; *ci <= CHAR_SPACE ; ci++) ; }
// If the is an XML doctype, processes it if (ci.Equiv("<!doctype")) { // For now just skip doctype
for (ci += 9 ; *ci && *ci <= CHAR_SPACE ; ci++) ;
tagstate = 1 ; for (; tagstate && *ci ; ci++) { if (*ci == CHAR_LESS) tagstate++ ; if (*ci == CHAR_MORE) tagstate-- ; } }
/* ** Process document */
bNewline = true ;
for (; !ci.eof() ;) { // Handle newlines. Exclude lines begining with # and remove whitespace from start of line if (*ci == CHAR_CR) ci++ ;
if (*ci == CHAR_NL) { bNewline = true ; ci++ ; continue ; }
if (bNewline) { // Add the newline to the node content if there is a node and there is already some content! if (pCN && nodeContent.Size()) nodeContent.AddByte(CHAR_NL) ;
bNewline = false ; }
if (!pCN) { // If there is no current tag (the initial condition), the only acceptable char is the opening '<' of a tag.
if (*ci != CHAR_LESS) { m_Error.Printf("File %s Line %d: Encountered char (%c:%d) outside scope of any tag\n", *m_Filename, ci.Line(), *ci, *ci) ; rc = E_SYNTAX ; break ; } }
if (*ci == CHAR_LESS) { // Remove HTML type comments if (ci == "<!--") { for (ci += 4 ; !ci.eof() ; ci++) { if (*ci == CHAR_MINUS && ci == "-->") break ; } if (ci.eof()) { m_Error.Printf("File %s Line %d: HTML comment block begins which is not terminated\n", *m_Filename, ci.Line()) ; rc = E_SYNTAX ; break ; } ci += 3 ; continue ; }
// Handle <![CDATA[...]]> block by converting the innards to straight data if (ci == "<![CDATA[") { xi = ci ; for (xi += 9 ; !xi.eof() ; xi++) { if (xi == "]]>") { xi += 3 ; ci = xi ; break ; } nodeContent.AddByte(*xi) ; } // Bypass the entity conversions pCN->SetCDATA(nodeContent) ; nodeContent.Clear() ; continue ; }
// If bXmlesce is set, treat any legal HTML tag or antitag as node content if (m_bXmlesce) { // Call _testHtag to see if the < marks the start of a HTML tag/antitag. If it does it is added to the content of the current node. if (_testHtag(tagval, ci)) { nodeContent << *tagval ; ci += tagval.Length() ; continue ; } }
if (pCN && pCN->IsXmlesce()) { // The current tag allows HTML tags as content if (_testHtag(tagval, ci)) { threadLog("m_bXmlesce is on node=%s\n", *tagval) ; nodeContent << *tagval ; ci += tagval.Length() ; continue ; } }
/* ** Handle tag open */
// threadLog("Calling proctagopen with par at %p\n", pCN) ;
tagstate = _proctagopen(&pNN, pCN, ci) ;
if (tagstate < 0) { m_Error.Printf("File %s Line %d: Bad tag format\n", *m_Filename, ci.Line()) ; break ; }
if (tagstate == 0) { // At a tag open. Write out any content gathered for the current tag (if any), and set this as pretext in the new node. Then set the current node to the new node // (this will be reverted when the new node is closed)
if (pNN) { if (nodeContent.Size()) pNN->SetPretext(nodeContent) ; nodeContent.Clear() ;
pCN = pNN ; if (!m_pRoot) { // Add root and set root parent to this document m_pRoot = pNN ; } m_NodesName.Insert(pNN->txtName(), pNN->GetUid()) ; } ci++ ; continue ; }
if (tagstate == 1) { // At a self closing tag. This is a valid new node and will need adding to the document, together with its pretext. However the current node is not set to this new // node as it would be in the open tag case.
if (pNN) { if (nodeContent.Size()) pNN->SetPretext(nodeContent) ; nodeContent.Clear() ;
if (!m_pRoot) { // Add root and set root parent to this document m_pRoot = pNN ; } m_NodesName.Insert(pNN->txtName(), pNN->GetUid()) ; } ci++ ; continue ; } rc = E_OK ;
/* ** Handle tag close */
if (_istagclose(Test, ci)) { if (Test != pCN->txtName()) { m_Error.Printf("File %s Line %d: Mismatched XML tags: Current <%s> line %d closing <%s>", *m_Filename, ci.Line(), pCN->txtName(), pCN->Line(), *Test) ; rc = E_FORMAT ; break ; }
if (nodeContent.Size()) pCN->SetContent(nodeContent) ; nodeContent.Clear() ; pCN->_setanti(ci.Line()) ; pCN = pCN->Parent() ; if (pCN == 0) break ; ci++ ; continue ; } }
if (pCN == 0) break ;
// Remove tabs if (*ci == CHAR_TAB) nodeContent.AddByte(CHAR_SPACE) ; else nodeContent.AddByte(*ci) ; ci++ ; }
if (pCN) { m_Error.Printf("File %s Line %d: End of file encountered whilst inside tag definition\n", *m_Filename, ci.Line()) ; rc = E_FORMAT ; }
return rc ; }
hzEcode hzDocXml::Load (const char* fpath) { // Loads an XML document into a tree of XML nodes // // Arguments: 1) fpath Pathname of XML document file // // Returns: E_ARGUMENT If the file path is not supplied // E_NOTFOUND If the file path does not exist // E_NODATA If the XML file is empty // E_OPENFAIL If the XML file cannot be read // E_FORMAT If the XML file contains malformed tags // E_OK If the XML file is successfully loaded
_hzfunc("hzDocXml::Load") ;
ifstream is ; // Input stream hzChain Z ; // Chain for holding file content hzEcode rc ; // Return code
rc = OpenInputStrm(is, fpath) ; if (rc != E_OK) return rc ;
Z << is ; is.close() ; is.clear() ;
m_Filename = fpath ; rc = Load(Z) ;
return rc ; }
void hzDocXml::Clear (void) { // Deletes all nodes from XML tree // // Arguments: None // Returns: None
_hzfunc("hzDocXml::Clear") ;
m_pRoot = 0 ; m_arrNodes.Clear() ; m_NodesName.Clear() ; m_NodesPar.Clear() ; m_NodeAttrs.Clear() ; m_Xmlesce.Clear() ; m_Dict.Clear() ; }
hzEcode hzDocXml::FindNodes (hzVect<hzXmlNode*>& Nodes, const char* srchExp) { // Find all nodes within an XML document meeting the supplied search expression. // // The critieria MUST as a minimum, specify the name of the XML nodes sought. The criteria can optionally specify an attribute the node must contain and can // go on to require that the attribute has a particular value. In addition, node ancestry may be specified. // // The notation convention is to use the :: symbol between tags to show ancestry, the -> symbol to name the tag attribute and the = symbol to specify a tag // attribute value. Criteria can thus be of the forms:- // // 1) tagname // 2) level_N-1_tagname::level_N_tagname // 3) ...tagname->attribute_name // 4) ...tagname->attribute_name=attribute_value // // All nodes matching this criteria are added to the supplied vector and appear in their order of incidence in the XML document. // // Arguments: 1) Nodes Vector of nodes selected // 2) srchExp Selection criteria // // Returns: E_ARGUMENT If no selection criteria is supplied // E_NOTFOUND If no nodes were selected // E_OK If nodes were selected
_hzfunc("hzDocXml::FindNodes") ;
hzAttrset ai ; // Attribute iterator hzXmlNode* pN ; // Node pointer const char* anam ; // Name derived from string number const char* aval ; // Attribute value const char* i ; // Char iterator char* j ; // Buffer populator char* cpBuf ; // Buffer for breaking up criteria hzString tagname ; // The part of the criteria needed to name the tag hzString attrname ; // The part of the criteria needed to name the tag attribute hzString value ; // The part of the criteria needed to specify node or node attribute values uint32_t nodeNo ; // Node number uint32_t nLo ; // First instance of tagname in m_AllNodes uint32_t nHi ; // Last instance of tagname in m_AllNodes uint32_t nIndex ; // Node iterator bool bInclude ; // Node has passed criteria
Nodes.Clear() ;
if (!srchExp || !srchExp[0]) return E_ARGUMENT ;
/* ** Objain the tagname, any attribute name and value from the criteria */
j = cpBuf = new char[strlen(srchExp) + 1] ; for (i = srchExp ; *i ; *j++ = *i++) { if (*i == CHAR_EQUAL || (*i == '-' && i[1] == '>')) break ; } *j = 0 ; tagname = j = cpBuf ;
if (i[0] == '-' && i[1] == '>') { for (i += 2 ; *i ; *j++ = *i++) { if (*i == CHAR_EQUAL) break ; } *j = 0 ; attrname = j = cpBuf ; }
if (*i == CHAR_EQUAL) { for (i++ ; *i && *i != CHAR_DQUOTE ; i++) ; for (i++ ; *i && *i != CHAR_DQUOTE ; *j++ = *i++) ; *j = 0 ; value = cpBuf ; }
delete cpBuf ;
/* ** First obtain the nodes */
nLo = m_NodesName.First(tagname) ; if (nLo < 0) { threadLog("Cannot locate a tag of [%s] in tree\n", *tagname) ; return E_NOTFOUND ; } nHi = m_NodesName.Last(tagname) ;
for (nIndex = nLo ; nIndex <= nHi ; nIndex++) { nodeNo = m_NodesName.GetObj(nIndex) ; pN = m_arrNodes.InSitu(nodeNo-1) ;
bInclude = false ; if (attrname) { // To qualify, the node must have an attribute named attrname. If there is also a specified // value, then the attribute must be of this value.
// pAttr = pN->GetAttributes() ; // for (nA = 0 ; nA < pN->GetNoAttrs() ; nA++) // { // anam = Xlate(pAttr[nA].snName) ; // aval = Xlate(pAttr[nA].snValue) ;
for (ai = pN ; ai.Valid() ; ai.Advance()) { anam = ai.Name() ; aval = ai.Value() ;
if (attrname == anam) { // We have the attribute, but if there is a value this must match as well
if (!value || value == aval) bInclude = true ; break ; } } } else { if (!value || pN->m_fixContent == value) bInclude = true ; }
if (bInclude) Nodes.Add(pN) ; }
return E_OK ; }
hzString hzDocXml::GetValue (hzXmlNode* pRoot, hzString& Nodename, hzString& Info) { // Using a supplied starting node (arg 1) to define a sub-tree of the current document's tree of XML nodes (tags), obtain the set of nodes // whose name matches the supplied node-name (arg 2). Then, depending on the value of the supplied control string (arg 3), build the string // to be returned by value, as one of the following:- // // 1) Info="aggr". Aggregate the content of all the matching nodes. // 2) Info="node". Take the content from the first matching node. // 3) Other value. Take this value as the attribute name. The result will then be the attribute value (if found) of the first matching // node. // // Arguments: 1) pRoot Starting node // 2) Nodename Name nodes must have to be processed // 3) Info Processing directive // // Returns: Instance of hzString by value containing the requested sub-tree
hzVect<hzXmlNode*> nodelist ; // List of subnodes of (node supplied in arg 1) matching m_Slct
hzChain X ; // For aggregating content from a series of like nodes hzAttrset ai ; // Attribute iterator hzXmlNode* pN ; // Node pointer const char* anam ; // Attribute value hzString S ; // Output value (tag value garnered) uint32_t nIndex ; // Iterator for nodelist
if (!pRoot) return S ;
pRoot->FindSubnodes(nodelist, *Nodename) ;
if (!nodelist.Count()) return S ;
pN = nodelist[0] ;
if (Info == "aggr") { // We need a series of nodes meeting the criteria defined in m_Slct, not just a single node
for (nIndex = 0 ; nIndex < nodelist.Count() ; nIndex++) { pN = nodelist[nIndex] ; X << pN->m_fixContent ; } S = X ; } else if (Info == "node") S = pN->m_fixContent ; else { if (memcmp(*Info, "->", 2) == 0) { for (ai = pN ; ai.Valid() ; ai.Advance()) { anam = ai.Name() ;
if (!strcmp(anam, *Info + 2)) { S = ai.Value() ; break ; } } } }
return S ; }
hzEcode hzDocXml::Export (hzChain& Z) { // Exports the in-memory XML document as XML. Place resulting text in the supplied chain // // Arguments: 1) Z Chain to be populated by this operation as the XML form of this document // // Returns: E_NODATA If this XML document is empty // E_OK If the document is exported
_hzfunc("hzDocXml::Export") ;
uint32_t relLine = 0 ; // For line management
Z.Clear() ;
if (!m_pRoot) { threadLog("Empty document\n") ; return E_NODATA ; }
if (m_Info.m_urlReq) Z.Printf("URL (req): %s\n", *m_Info.m_urlReq) ; if (*m_Info.m_urlAct) Z.Printf("URL (act): %s\n", *m_Info.m_urlAct) ;
m_pRoot->Export_r(this, Z, relLine) ; return E_OK ; }
hzEcode hzDocXml::Export (const hzString& fpath) { // Exports the in-memory XML document as XML. Place resulting text in the supplied filepath // // Arguments: 1) fpath Pathname of exported XML file // // Returns: E_ARGUMENT If not export pathname is supplied // E_NODATA If this XML document is empty // E_OPENFAIL If the export file cannot be opened for writing // E_OK If this document is exported
_hzfunc("hzDocXml::Export(file)") ;
ofstream os ; // Output stream hzChain Z ; // Chain for output construction hzEcode rc ; // Return code
if (!fpath) return hzwarn(E_ARGUMENT, "Document un-named") ; if (!m_pRoot) return hzwarn(E_NODATA, "Document empty") ;
os.open(*fpath) ; if (os.fail()) return hzerr(E_OPENFAIL, "Could not open file %s\n", *fpath) ;
rc = Export(Z) ; if (rc == E_OK) os << Z ; os.close() ; return rc ; }