Loads an XML document supplied as a chain into a tree of XML nodes

Return TypeFunction nameArguments
hzEcodehzDocXml::Load(hzChain&,)

Declared in file: hzDocument.h
Defined in file : hzDocXml.cpp

Function Logic:

0:START 1:items ci 2:unknown 3:unknown 4:unknown 5:unknown 6:unknown 7:unknown 8:tagstate 9:unknown 10:unknown 11:items 12:unknown 13:items 14:bNewline 15:unknown 16:unknown 17:items 18:unknown 19:bNewline items 20:unknown 21:unknown 22:items 23:bNewline 24:unknown 25:unknown 26:items rc 27:unknown 28:unknown 29:unknown 30:unknown 31:unknown 32:items rc 33:ci 34:unknown 35:xi 36:unknown 37:unknown 38:xi ci 39:items 40:items items 41:unknown 42:unknown 43:items ci 44:unknown 45:unknown 46:items items ci 47:tagstate 48:unknown 49:items 50:unknown 51:unknown 52:unknown 53:items 54:items pCN 55:unknown 56:m_pRoot 57:items 58:items 59:unknown 60:unknown 61:unknown 62:items 63:items 64:unknown 65:m_pRoot 66:items 67:items 68:rc 69:unknown 70:unknown 71:items rc 72:unknown 73:items 74:items items pCN 75:unknown 76:items 77:unknown 78:unknown 79:items 80:items 81:items 82:unknown 83:items rc 84:Return rc

Function body:

hzEcode hzDocXml::Load (hzChain& Z)
{
   //  Loads an XML document supplied as a chain into a tree of XML nodes
   //  
   //  Arguments: 1) Z The chain containing a full XML document
   //  
   //  Returns: E_FORMAT If this XML document does not conform to XML
   //     E_OK  If this XML document loaded successfully
   _hzfunc("hzDocXml::Load") ;
   hzList<hzString>::Iter  exI ;   //  Excluded tags iterator if required
   std::ifstream   is ;            //  Input stream
   hzChain::Iter   ci ;            //  Chain iterator
   hzChain::Iter   xi ;            //  Chain iterator for inner loop
   hzChain     nodeContent ;       //  Chain for building node content
   hzXmlNode*  pCN = 0;            //  Current XML node
   hzXmlNode*  pNN ;               //  New XML node
   hzString    Test ;              //  To test if current tag is being closed
   hzString    tagval ;            //  Used by _ishtmltag() to test if we are currently at a HTML tag
   uint32_t    tagstate ;          //  Type of tag (-1 error, 0 no tag, 1 open tag 2 closed tag)
   bool        bNewline ;          //  True if we are at start of a line
   hzEcode     rc = E_OK ;         //  Return code
   m_Error.Clear() ;
   //  Can encounter chars with top bit set before the XML document gets going. Don't know why but if they occur, bypass them.
   ci = Z ;
   for (; *ci & 0x80;ci++);
   //  If the is an XML header, processes it
   if (ci == "<?xml")
   {
       //  Skip to the doctype
       for (ci++ ; *ci != CHAR_MORE ; ci++) ;
       for (ci++ ; *ci <&eq; CHAR_SPACE ; ci++) ;
   }
   //  If the is an XML doctype, processes it
   if (ci.Equiv("<!doctype"))
   {
       //  For now just skip doctype
       for (ci += 9; *ci && *ci <&eq; CHAR_SPACE ; ci++) ;
       tagstate = 1;
       for (; tagstate && *ci ; ci++)
       {
           if (*ci == CHAR_LESS)   tagstate++ ;
           if (*ci == CHAR_MORE)   tagstate-- ;
       }
   }
   /*
   **  ** Process document
   **      */
   bNewline = true ;
   for (; !ci.eof() ;)
   {
       //  Handle newlines. Exclude lines begining with # and remove whitespace from start of line
       if (*ci == CHAR_CR)
           ci++ ;
       if (*ci == CHAR_NL)
           { bNewline = true ; ci++ ; continue ; }
       if (bNewline)
       {
           //  Add the newline to the node content if there is a node and there is already some content!
           if (pCN && nodeContent.Size())
               nodeContent.AddByte(CHAR_NL) ;
           bNewline = false ;
       }
       if (!pCN)
       {
           //  If there is no current tag (the initial condition), the only acceptable char is the opening '<' of a tag.
           if (*ci != CHAR_LESS)
           {
               m_Error.Printf("File %s Line %d: Encountered char (%c:%d) outside scope of any tag\n", *m_Filename, ci.Line(), *ci, *ci) ;
               rc = E_FORMAT ;
               break ;
           }
       }
       if (*ci == CHAR_LESS)
       {
           //  Remove HTML type comments
           if (ci == "<!--")
           {
               for (ci += 4; !ci.eof() ; ci++)
               {
                   if (*ci == CHAR_MINUS && ci == "-->")
                       break ;
               }
               if (ci.eof())
               {
                   m_Error.Printf("File %s Line %d: HTML comment block begins which is not terminated\n", *m_Filename, ci.Line()) ;
                   rc = E_FORMAT ;
                   break ;
               }
               ci += 3;
               continue ;
           }
           //  Handle <![CDATA[...]]> block by converting the innards to straight data
           if (ci == "<![CDATA[")
           {
               xi = ci ;
               for (xi += 9; !xi.eof() ; xi++)
               {
                   if (xi == "]]>")
                       { xi += 3; ci = xi ; break ; }
                   nodeContent.AddByte(*xi) ;
               }
               //  Bypass the entity conversions
               pCN->SetCDATA(nodeContent) ;
               nodeContent.Clear() ;
               continue ;
           }
           //  If bXmlesce is set, treat any legal HTML tag or antitag as node content
           if (m_bXmlesce)
           {
               //  Call _testHtag to see if the < marks the start of a HTML tag/antitag. If it does it is added to the content of the current node.
               if (_testHtag(tagval, ci))
               {
                   nodeContent << *tagval ;
                   ci += tagval.Length() ;
                   continue ;
               }
           }
           if (pCN && pCN->IsXmlesce())
           {
               //  The current tag allows HTML tags as content
               if (_testHtag(tagval, ci))
               {
                   threadLog("m_bXmlesce is on node=%s\n", *tagval) ;
                   nodeContent << *tagval ;
                   ci += tagval.Length() ;
                   continue ;
               }
           }
           /*
           **  ** Handle tag open
           **                */
           //  threadLog("Calling proctagopen with par at %p\n", pCN) ;
           tagstate = _proctagopen(&pNN, pCN, ci) ;
           if (tagstate < 0)
               { m_Error.Printf("File %s Line %d: Bad tag format\n", *m_Filename, ci.Line()) ; break ; }
           if (tagstate == 0)
           {
               //  At a tag open. Write out any content gathered for the current tag (if any), and set this as pretext in the new node. Then set the current node to the new node
               //  (this will be reverted when the new node is closed)
               if (pNN)
               {
                   if (nodeContent.Size())
                       pNN->SetPretext(nodeContent) ;
                   nodeContent.Clear() ;
                   pCN = pNN ;
                   if (!m_pRoot)
                   {
                       //  Add root and set root parent to this document
                       m_pRoot = pNN ;
                   }
                   m_NodesName.Insert(pNN->txtName(), pNN->GetUid()) ;
               }
               ci++ ;
               continue ;
           }
           if (tagstate == 1)
           {
               //  At a self closing tag. This is a valid new node and will need adding to the document, together with its pretext. However the current node is not set to this new
               //  node as it would be in the open tag case.
               if (pNN)
               {
                   if (nodeContent.Size())
                       pNN->SetPretext(nodeContent) ;
                   nodeContent.Clear() ;
                   if (!m_pRoot)
                   {
                       //  Add root and set root parent to this document
                       m_pRoot = pNN ;
                   }
                   m_NodesName.Insert(pNN->txtName(), pNN->GetUid()) ;
               }
               ci++ ;
               continue ;
           }
           rc = E_OK ;
           /*
           **  ** Handle tag close
           **                */
           if (_istagclose(Test, ci))
           {
               if (Test != pCN->txtName())
               {
                   m_Error.Printf("File %s Line %d: Mismatched XML tags: Current <%s> line %d closing <%s>", *m_Filename, ci.Line(), pCN->txtName(), pCN->Line(), *Test) ;
                   rc = E_FORMAT ;
                   break ;
               }
               if (nodeContent.Size())
                   pCN->SetContent(nodeContent) ;
               nodeContent.Clear() ;
               pCN->_setanti(ci.Line()) ;
               pCN = pCN->Parent() ;
               if (pCN == 0)
                   break ;
               ci++ ;
               continue ;
           }
       }
       if (pCN == 0)
           break ;
       //  Remove tabs
       if (*ci == CHAR_TAB)
           nodeContent.AddByte(CHAR_SPACE) ;
       else
           nodeContent.AddByte(*ci) ;
       ci++ ;
   }
   if (pCN)
   {
       m_Error.Printf("File %s Line %d: End of file encountered whilst inside tag definition\n", *m_Filename, ci.Line()) ;
       rc = E_FORMAT ;
   }
   return rc ;
}