Loads an XML document supplied as a chain into a tree of XML nodes
| Return Type | Function name | Arguments |
|---|---|---|
| hzEcode | hzDocXml::Load | (hzChain&,) |
Declared in file: hzDocument.h
Defined in file : hzDocXml.cpp
Function Logic:
Function body:
hzEcode hzDocXml::Load (hzChain& Z)
{
// Loads an XML document supplied as a chain into a tree of XML nodes
//
// Arguments: 1) Z The chain containing a full XML document
//
// Returns: E_FORMAT If this XML document does not conform to XML
// E_OK If this XML document loaded successfully
_hzfunc("hzDocXml::Load") ;
hzList<hzString>::Iter exI ; // Excluded tags iterator if required
std::ifstream is ; // Input stream
hzChain::Iter ci ; // Chain iterator
hzChain::Iter xi ; // Chain iterator for inner loop
hzChain nodeContent ; // Chain for building node content
hzXmlNode* pCN = 0; // Current XML node
hzXmlNode* pNN ; // New XML node
hzString Test ; // To test if current tag is being closed
hzString tagval ; // Used by _ishtmltag() to test if we are currently at a HTML tag
uint32_t tagstate ; // Type of tag (-1 error, 0 no tag, 1 open tag 2 closed tag)
bool bNewline ; // True if we are at start of a line
hzEcode rc = E_OK ; // Return code
m_Error.Clear() ;
// Can encounter chars with top bit set before the XML document gets going. Don't know why but if they occur, bypass them.
ci = Z ;
for (; *ci & 0x80;ci++);
// If the is an XML header, processes it
if (ci == "<?xml")
{
// Skip to the doctype
for (ci++ ; *ci != CHAR_MORE ; ci++) ;
for (ci++ ; *ci <&eq; CHAR_SPACE ; ci++) ;
}
// If the is an XML doctype, processes it
if (ci.Equiv("<!doctype"))
{
// For now just skip doctype
for (ci += 9; *ci && *ci <&eq; CHAR_SPACE ; ci++) ;
tagstate = 1;
for (; tagstate && *ci ; ci++)
{
if (*ci == CHAR_LESS) tagstate++ ;
if (*ci == CHAR_MORE) tagstate-- ;
}
}
/*
** ** Process document
** */
bNewline = true ;
for (; !ci.eof() ;)
{
// Handle newlines. Exclude lines begining with # and remove whitespace from start of line
if (*ci == CHAR_CR)
ci++ ;
if (*ci == CHAR_NL)
{ bNewline = true ; ci++ ; continue ; }
if (bNewline)
{
// Add the newline to the node content if there is a node and there is already some content!
if (pCN && nodeContent.Size())
nodeContent.AddByte(CHAR_NL) ;
bNewline = false ;
}
if (!pCN)
{
// If there is no current tag (the initial condition), the only acceptable char is the opening '<' of a tag.
if (*ci != CHAR_LESS)
{
m_Error.Printf("File %s Line %d: Encountered char (%c:%d) outside scope of any tag\n", *m_Filename, ci.Line(), *ci, *ci) ;
rc = E_FORMAT ;
break ;
}
}
if (*ci == CHAR_LESS)
{
// Remove HTML type comments
if (ci == "<!--")
{
for (ci += 4; !ci.eof() ; ci++)
{
if (*ci == CHAR_MINUS && ci == "-->")
break ;
}
if (ci.eof())
{
m_Error.Printf("File %s Line %d: HTML comment block begins which is not terminated\n", *m_Filename, ci.Line()) ;
rc = E_FORMAT ;
break ;
}
ci += 3;
continue ;
}
// Handle <![CDATA[...]]> block by converting the innards to straight data
if (ci == "<![CDATA[")
{
xi = ci ;
for (xi += 9; !xi.eof() ; xi++)
{
if (xi == "]]>")
{ xi += 3; ci = xi ; break ; }
nodeContent.AddByte(*xi) ;
}
// Bypass the entity conversions
pCN->SetCDATA(nodeContent) ;
nodeContent.Clear() ;
continue ;
}
// If bXmlesce is set, treat any legal HTML tag or antitag as node content
if (m_bXmlesce)
{
// Call _testHtag to see if the < marks the start of a HTML tag/antitag. If it does it is added to the content of the current node.
if (_testHtag(tagval, ci))
{
nodeContent << *tagval ;
ci += tagval.Length() ;
continue ;
}
}
if (pCN && pCN->IsXmlesce())
{
// The current tag allows HTML tags as content
if (_testHtag(tagval, ci))
{
threadLog("m_bXmlesce is on node=%s\n", *tagval) ;
nodeContent << *tagval ;
ci += tagval.Length() ;
continue ;
}
}
/*
** ** Handle tag open
** */
// threadLog("Calling proctagopen with par at %p\n", pCN) ;
tagstate = _proctagopen(&pNN, pCN, ci) ;
if (tagstate < 0)
{ m_Error.Printf("File %s Line %d: Bad tag format\n", *m_Filename, ci.Line()) ; break ; }
if (tagstate == 0)
{
// At a tag open. Write out any content gathered for the current tag (if any), and set this as pretext in the new node. Then set the current node to the new node
// (this will be reverted when the new node is closed)
if (pNN)
{
if (nodeContent.Size())
pNN->SetPretext(nodeContent) ;
nodeContent.Clear() ;
pCN = pNN ;
if (!m_pRoot)
{
// Add root and set root parent to this document
m_pRoot = pNN ;
}
m_NodesName.Insert(pNN->txtName(), pNN->GetUid()) ;
}
ci++ ;
continue ;
}
if (tagstate == 1)
{
// At a self closing tag. This is a valid new node and will need adding to the document, together with its pretext. However the current node is not set to this new
// node as it would be in the open tag case.
if (pNN)
{
if (nodeContent.Size())
pNN->SetPretext(nodeContent) ;
nodeContent.Clear() ;
if (!m_pRoot)
{
// Add root and set root parent to this document
m_pRoot = pNN ;
}
m_NodesName.Insert(pNN->txtName(), pNN->GetUid()) ;
}
ci++ ;
continue ;
}
rc = E_OK ;
/*
** ** Handle tag close
** */
if (_istagclose(Test, ci))
{
if (Test != pCN->txtName())
{
m_Error.Printf("File %s Line %d: Mismatched XML tags: Current <%s> line %d closing <%s>", *m_Filename, ci.Line(), pCN->txtName(), pCN->Line(), *Test) ;
rc = E_FORMAT ;
break ;
}
if (nodeContent.Size())
pCN->SetContent(nodeContent) ;
nodeContent.Clear() ;
pCN->_setanti(ci.Line()) ;
pCN = pCN->Parent() ;
if (pCN == 0)
break ;
ci++ ;
continue ;
}
}
if (pCN == 0)
break ;
// Remove tabs
if (*ci == CHAR_TAB)
nodeContent.AddByte(CHAR_SPACE) ;
else
nodeContent.AddByte(*ci) ;
ci++ ;
}
if (pCN)
{
m_Error.Printf("File %s Line %d: End of file encountered whilst inside tag definition\n", *m_Filename, ci.Line()) ;
rc = E_FORMAT ;
}
return rc ;
}