Populate the hzDocHtml object with HTML source code in the supplied chain. Two scenarios are permitted - Full or Partial as follows:- 1) Full: If the HTML source has the <html> as its first tag it will be considered as a full page and tested as such. It will be expected to have the standard sub-tags of <head> and <body> and thier corresponding anti-tags. If either of these are missing or in error (malformed or containing unxpected or malformed tags) the HTML source code is deemed to be syntactically in error and the load fails. 2) Partial: If the opening tag of the HTML source code is not the <html> tag it is viable only if it would be viable as a HTML fragment that could be seemlessly inserted into the <body> part of a whole HTML page. This is to say that all it's tags must be legal sub-tags of <body> and not of <head> and nor must the <body> or <head> tag or anti-tag be present. In either case, tags are loaded into a tree of nodes (tags). The nodes/tags may be searched for and examined. Note: Unlike XML where tags are named so that content in the tree can be searched directly, the nodes in HTML are not named named and so cannot be definitely referenced (they only have type). Some other process must apply application specific criteria to read meaning into the data.

Return TypeFunction nameArguments
hzEcodehzDocHtml::Load(hzChain&,)

Declared in file: hzDocument.h
Defined in file : hzDocHtml.cpp

Function Logic:

0:START 1:items rc 2:unknown 3:Return rc 4:m_Content 5:unknown 6:items 7:zi items 8:unknown 9:quote 10:unknown 11:unknown 12:unknown 13:quote 14:unknown 15:items 16:unknown 17:quote 18:items 19:unknown 20:unknown 21:m_pRoot 22:unknown 23:items 24:Return E_FORMAT 25:unknown 26:items zi items pCN items items 27:unknown 28:unknown 29:m_pHead 30:unknown 31:items 32:Return E_FORMAT 33:items 34:unknown 35:items 36:Return E_FORMAT 37:pCN 38:unknown 39:unknown 40:unknown 41:unknown 42:unknown 43:items 44:items items 45:nLine 46:unknown 47:unknown 48:unknown 49:zi 50:unknown 51:zi 52:unknown 53:unknown 54:unknown 55:zi 56:unknown 57:zi 58:unknown 59:unknown 60:unknown 61:zi 62:unknown 63:unknown 64:unknown 65:zi 66:items 67:unknown 68:unknown 69:unknown 70:zi 71:unknown 72:items rc 73:items items limit items items bAnti 74:unknown 75:items bAnti items 76:nColon 77:unknown 78:unknown 79:unknown 80:items 81:items items 82:tagword items 83:unknown 84:items 85:unknown 86:cDelim 87:unknown 88:unknown 89:items 90:unknown 91:unknown 92:unknown 93:items 94:wholetag items 95:unknown 96:items zi 97:items 98:unknown 99:unknown 100:tf tf tf items items items 101:unknown 102:unknown 103:items 104:items 105:items zi 106:tf 107:unknown 108:unknown 109:items 110:items 111:items zi 112:unknown 113:unknown 114:pCN 115:unknown 116:rc items 117:zi.Equiv( 118:pCN 119:unknown 120:rc items 121:zi.Equiv( 122:pCN 123:unknown 124:rc items 125:zi.Equiv( 126:pCN 127:unknown 128:rc items 129:zi.Equiv( 130:pCN 131:unknown 132:rc items 133:zi.Equiv( 134:pCN 135:unknown 136:rc items 137:unknown 138:m_Base 139:ai 140:unknown 141:m_Base 142:rc items 143:unknown 144:unknown 145:zi 146:unknown 147:items zi 148:zi 149:unknown 150:pCN 151:items 152:unknown 153:pCN items 154:unknown 155:unknown 156:unknown 157:pCN items 158:items 159:unknown 160:unknown 161:m_pBody 162:unknown 163:items 164:Return E_FORMAT 165:items 166:unknown 167:items 168:Return E_FORMAT 169:pCN 170:unknown 171:unknown 172:unknown 173:unknown 174:items 175:items 176:unknown 177:items 178:items 179:nLine 180:unknown 181:unknown 182:unknown 183:zi 184:unknown 185:items rc 186:unknown 187:unknown 188:unknown 189:zi 190:unknown 191:items rc 192:unknown 193:unknown 194:unknown 195:zi 196:unknown 197:items rc 198:unknown 199:unknown 200:unknown 201:zi 202:unknown 203:zi 204:unknown 205:items rc 206:unknown 207:unknown 208:unknown 209:zi 210:unknown 211:zi 212:unknown 213:items rc 214:unknown 215:unknown 216:unknown 217:zi 218:unknown 219:items rc 220:items items limit items items bAnti 221:unknown 222:items bAnti items 223:nColon 224:unknown 225:unknown 226:unknown 227:items 228:items items 229:tagword items 230:unknown 231:items 232:unknown 233:cDelim 234:unknown 235:unknown 236:items 237:unknown 238:unknown 239:unknown 240:items 241:wholetag items 242:unknown 243:items zi 244:items 245:unknown 246:unknown 247:tf tf tf items items items 248:tf 249:unknown 250:unknown 251:items 252:items 253:items zi 254:unknown 255:unknown 256:zi 257:unknown 258:unknown 259:items 260:unknown 261:items 262:unknown 263:unknown 264:items 265:items zi 266:unknown 267:items zi 268:unknown 269:unknown 270:unknown 271:tmp zi 272:unknown 273:items rc 274:pNN pNN 275:unknown 276:items 277:Return E_FORMAT 278:pCN zi 279:unknown 280:pCN 281:unknown 282:unknown 283:items zi 284:unknown 285:unknown 286:items 287:unknown 288:items 289:unknown 290:unknown 291:items 292:items zi 293:zi 294:unknown 295:pCN 296:items 297:unknown 298:pCN items 299:unknown 300:unknown 301:unknown 302:pCN items 303:items 304:unknown 305:items 306:unknown 307:pX 308:unknown 309:pCurForm pForm items 310:unknown 311:unknown 312:unknown 313:P 314:unknown 315:anam aval 316:unknown 317:P 318:items 319:unknown 320:pCurForm 321:items 322:Return rc

Function body:

hzEcode hzDocHtml::Load (hzChain& Z)
{
   //  Populate the hzDocHtml object with HTML source code in the supplied chain.
   //  
   //  Two scenarios are permitted - Full or Partial as follows:-
   //   1) Full:  If the HTML source has the <html> as its first tag it will be considered as a full page and tested as such.
   //       It will be expected to have the standard sub-tags of <head> and <body> and thier corresponding anti-tags.
   //       If either of these are missing or in error (malformed or containing unxpected or malformed tags) the HTML
   //       source code is deemed to be syntactically in error and the load fails.
   //  
   //   2) Partial: If the opening tag of the HTML source code is not the <html> tag it is viable only if it would be viable as
   //       a HTML fragment that could be seemlessly inserted into the <body> part of a whole HTML page. This is to say
   //       that all it's tags must be legal sub-tags of <body> and not of <head> and nor must the <body> or <head> tag
   //       or anti-tag be present.
   //  
   //  In either case, tags are loaded into a tree of nodes (tags). The nodes/tags may be searched for and examined. 
   //  
   //  Arguments: 1) Z The chain containing the HTML document
   //  
   //  Returns: E_FORMAT If the HTML was rejected by the the HTML pre-processor _htmlPreproc() OR if any tags could not be processed by _proctag()
   //     E_OK  If the HTML was loaded successfully
   //  
   //  Note: Unlike XML where tags are named so that content in the tree can be searched directly, the nodes in HTML are not named
   //  named and so cannot be definitely referenced (they only have type). Some other process must apply application specific criteria
   //  to read meaning into the data.
   _hzfunc("hzDocHtml::Load") ;
   hzChain         nc ;            //  Node content
   hzChain         T ;             //  For token building
   hzChain         W ;             //  For token building
   chIter          zi ;            //  Chain iterator
   chIter          tw_start ;      //  Start of tagword marker
   chIter          tmp ;           //  Start of tagword marker
   chIter          limit ;         //  End of tag marker - Protection against malformed tags (NLA style)
   hzHtmElem*      pCN = 0;        //  Current HTML node
   hzHtmElem*      pNN ;           //  New HTML node
   hzHtmElem*      pX ;            //  HTML node for diagnostics
   hzHtmElem*      pCurForm = 0;   //  HTML node for diagnostics
   hzAttrset       ai ;            //  Attribute iterator
   hzHtmForm*      pForm = 0;      //  Form found in page
   hzPair          P ;             //  Name value pair (for forms and fields)
   hzString        strval ;        //  To test if current tag is being closed
   hzString        tagword ;       //  From MakeTag - just the tagname.
   hzString        wholetag ;      //  From MakeTag - the entire opening sequence if applicable
   hzString        anam ;          //  Attribute name
   hzString        aval ;          //  Attribute value
   hzHtagform      tf ;            //  Tag form
   uint32_t        nX ;            //  For nesting levels/general iteration
   uint32_t        nColon ;        //  Does the tagname contain a colon (3rd party tag)
   uint32_t        nLine ;         //  Line number for errors
   uint32_t        quote ;         //  Are we in a quoted string
   bool            bAnti ;         //  Tag is an anti-tag
   int32_t         cDelim ;        //  Delimiting char (single/double quote)
   hzEcode         rc = E_OK ;     //  return code
   Clear() ;
   //  m_Error.Clear() ;
   //  Pre-process the HTML
   rc = _htmPreproc(Z) ;
   if (rc != E_OK)
       return rc ;
   m_Content = Z ;
   //  Make sure the HTML tags are loading into the lookup table
   if (!s_htagNam.Count())
       InitHtml() ;
   //  Init the iterator
   zi = Z ;
   zi.Skipwhite() ;
   //  Bypass the doctype if present
   if (zi.Equiv("<!DOCTYPE"))
   {
       quote = 0;
       for (zi += 9; !zi.eof() ; zi++)
       {
           if (quote)
           {
               if (*zi == CHAR_DQUOTE)
                   quote = 0;
               continue ;
           }
           if (*zi == CHAR_MORE)
               { zi++ ; break ; }
           if (*zi == CHAR_DQUOTE)
               quote = 1;
       }
       zi.Skipwhite() ;
   }
   //  Look for the opening <html>
   for (; !zi.eof() ; zi++)
   {
       if (zi.Equiv("<html"))
       {
           m_pRoot = _proctag(0,zi, HTAG_HTML) ;
           if (!m_pRoot)
               { threadLog("Could not establist root node (the <html> tag)\n") ; return E_FORMAT ; }
           break ;
       }
   }
   if (!m_pRoot)
   {
       threadLog("No valid contents found before expected <html> tag - assuming a partial page\n") ;
       zi = Z ;
       zi.Skipwhite() ;
       pCN = new hzHtmElem() ;
       pCN->Init(this, 0,tagword, HTAG_NULL, m_vecTags.Count(), zi.Line()) ;
       m_vecTags.Add(pCN) ;
   }
   else
   {
       //  A <html> tag has been found so this is a full page. Look for <head> next
       for (; !zi.eof() ;)
       {
           if (zi.Equiv("<head"))
           {
               m_pHead = _proctag(m_pRoot, zi, HTAG_HEAD) ;
               if (!m_pHead)
                   { threadLog("Could not process <head> tag\n") ; return E_FORMAT ; }
               break ;
           }
           zi++ ;
       }
       if (!m_pHead)
           { threadLog("Expected a <head> tag\n") ; return E_FORMAT ; }
       pCN = m_pHead ;
       //  Now get the subtags of <head>
       for (; rc == E_OK && pCN && !zi.eof() ;)
       {
           //  Handle tag content
           if (*zi != CHAR_LESS)
           {
               //  Ignore certain constructs
               if (zi == "//")
               {
                   for (zi += 2; !zi.eof() && *zi != CHAR_NL ; zi++) ;
                   continue ;
               }
               //  If not part of a construct, just agregate the char to the current tag's content, striping leading whitespace
               if (*zi <&eq; CHAR_SPACE && pCN->m_tmpContent.Size() == 0)
                   { zi++ ; continue ; }
               pCN->m_tmpContent.AddByte(*zi) ;
               zi++ ;
               continue ;
           }
           //  Ignore deleted text within comment (<!-- and -->) tags. Note these cannot be nested
           nLine = zi.Line() ;
           if (zi == "<!--[if")
           {
               for (zi += 7; !zi.eof() ; zi++)
               {
                   if (zi == "<![endif]>")     { zi += 10;break ; }
                   if (zi == "<![endif]-->")   { zi += 12;break ; }
               }
               continue ;
           }
           if (zi == "<![if")
           {
               for (zi += 5; !zi.eof() ; zi++)
               {
                   if (zi == "<![endif]>")     { zi += 10;break ; }
                   if (zi == "<![endif]-->")   { zi += 12;break ; }
               }
               continue ;
           }
           if (zi == "<!--")
           {
               for (zi += 4; !zi.eof() ; zi++)
               {
                   if (zi == "-->")
                       { zi += 3; break ; }
               }
               continue ;
           }
           //  Handle <![CDATA[...]]> block by converting the innards to straight data (apparently CDATA now legal in HTML)
           if (zi == "<![CDATA[")
           {
               for (zi += 9; !zi.eof() ; zi++)
               {
                   if (zi == "]]>")
                       { zi += 3; break ; }
                   pCN->m_tmpContent.AddByte(*zi) ;
               }
               continue ;
           }
           //  Eliminate <noscript> tags from header (we don't use them)
           if (zi == "<noscript")
           {
               for (zi += 9; !zi.eof() ; zi++)
               {
                   if (zi == "</noscript>")
                       { zi += 11;break ; }
               }
               if (zi.eof())
                   { threadLog("Unclosed <noscript> block\n") ; rc = E_FORMAT ; break ; }
               continue ;
           }
           //  At this point we have the '<' start of tag char. Establish whole and tagword of possible HTML tag
           wholetag.Clear() ;
           tagword.Clear() ;
           limit = zi ;
           limit++ ;
           W.AddByte(CHAR_LESS) ;
           bAnti = false ;
           if (*limit == CHAR_FWSLASH)
               { W.AddByte(CHAR_FWSLASH) ; bAnti = true ; limit++ ; }
           nColon = 0;
           for (tw_start = limit ; !limit.eof() ; limit++)
           {
               if (*limit == CHAR_COLON || IsAlphanum(*limit))
               {
                   if (*limit == CHAR_COLON)
                       nColon++ ;
                   T.AddByte(*limit) ;
                   W.AddByte(*limit) ;
                   continue ;
               }
               break ;
           }
           tagword = T ;
           T.Clear() ;
           for (; !limit.eof() ;)
           {
               W.AddByte(*limit) ;
               if (*limit == CHAR_DQUOTE || *limit == CHAR_SQUOTE)
               {
                   cDelim = *limit ;
                   for (limit++ ; !limit.eof() ; limit++)
                   {
                       if (*limit == CHAR_BKSLASH)
                       {
                           limit++ ;
                           if (*limit == cDelim)
                               continue ;
                       }
                       if (*limit == cDelim)
                           break ;
                   }
               }
               if (*limit == CHAR_MORE)
                   break ;
               limit++ ;
           }
           wholetag = W ;
           W.Clear() ;
           if (*limit != CHAR_MORE)
           {
               threadLog("Malformed tag (%s)\n", *wholetag) ;
               zi = limit ;
               continue ;
           }
           limit++ ;
           //  tagword.ToLower() ;
           if (nColon)
           {
               if (!s_htagNam.Exists(tagword))
               {
                   tf.klas = HTCLASS_3RD ;
                   tf.rule = HTRULE_OPTION ;
                   tf.name = tagword ;
                   s_htagTyp.Insert(tf.type, tf) ;
                   s_htagNam.Insert(tf.name, tf) ;
                   threadLog("Inserted 3rd party HTML tag %s\n", *tagword) ;
               }
           }
           if (!s_htagNam.Exists(tagword))
           {
               if (bAnti)
                   threadLog("Line %d case 1 Unknown lookup anti-tag </%s> (%s)\n", zi.Line(), *tagword, *wholetag) ;
               else
                   threadLog("Line %d Case 1 Unknown lookup tag <%s> (%d bytes)\n", zi.Line(), *tagword, wholetag.Length()) ;
               pCN->m_tmpContent << wholetag ;
               zi = limit ;
               continue ;
           }
           tf = s_htagNam[tagword] ;
           if (tf.type == HTAG_NULL)
           {
               if (bAnti)
                   threadLog("Line %d case 2 Unknown lookup anti-tag </%s> (%s)\n", zi.Line(), *tagword, *wholetag) ;
               else
                   threadLog("Line %d Case 2 Unknown lookup tag <%s> (%d bytes)\n", zi.Line(), *tagword, wholetag.Length()) ;
               pCN->m_tmpContent << wholetag ;
               zi = limit ;
               continue ;
           }
           //  Obtain tag name
           if (bAnti == false)
           {
               if (zi.Equiv("<title>"))
               {
                   pCN = _proctag(m_pHead, zi, HTAG_TITLE) ;
                   if (!pCN)
                       { rc = E_FORMAT ; threadLog("Line %d Could not process <meta> tags\n", zi.Line()) ; }
               }
               else if (zi.Equiv("<meta"))
               {
                   pCN = _proctag(m_pHead, zi, HTAG_META) ;
                   if (!pCN)
                       { rc = E_FORMAT ; threadLog("Line %d Could not process <meta> tags\n", zi.Line()) ; }
               }
               else if (zi.Equiv("<style"))
               {
                   pCN = _proctag(m_pHead, zi, HTAG_STYLE) ;
                   if (!pCN)
                       { rc = E_FORMAT ; threadLog("Line %d Could not process <style> tags\n", zi.Line()) ; }
               }
               else if (zi.Equiv("<script"))
               {
                   pCN = _proctag(m_pHead, zi, HTAG_SCRIPT) ;
                   if (!pCN)
                       { rc = E_FORMAT ; threadLog("Line %d Could not process <script> tags\n", zi.Line()) ; }
               }
               else if (zi.Equiv("<link"))
               {
                   pCN = _proctag(m_pHead, zi, HTAG_LINK) ;
                   if (!pCN)
                       { rc = E_FORMAT ; threadLog("Line %d Could not process <link> tags\n", zi.Line()) ; }
               }
               else if (zi.Equiv("<base"))
               {
                   pCN = _proctag(m_pHead, zi, HTAG_BASE) ;
                   if (!pCN)
                       { rc = E_FORMAT ; threadLog("Line %d Could not process <link> tags\n", zi.Line()) ; }
                   //  Set m_Base
                   if (pCN->m_tmpContent.Size())
                       m_Base = pCN->m_tmpContent ;
                   else
                   {
                       //  set the m_Base to the first param
                       ai = pCN ;
                       if (ai.Value())
                           m_Base = ai.Value() ;
                       //  pAttr = pCN->GetFirstAttr() ;
                       //  if (pAttr)
                       //   m_Base = pAttr->value ;
                   }
               }
               else
                   { rc = E_FORMAT ; threadLog("Line %d Could not process <%s> tag within <head>\n", zi.Line(), *tagword) ; }
               continue ;
           }
           //  Handle antitag
           if (bAnti)
           {
               if (zi.Equiv("</head>"))
                   { zi += 7; break ; }
               //  Inactive (text rendering only) anti-tags
               if (tf.klas == HTCLASS_TXT)
                   { pCN->m_tmpContent << wholetag ; zi = limit ; continue ; }
               //  { zi = limit ; continue ; }
               zi = limit ;
               if (pCN->Type() == tf.type || tf.rule == HTRULE_SINGLE)
                   pCN = pCN->Parent() ;
               else
               {
                   threadLog("case 1 Tag mis-match. Current highest tag is <%s id=%d, level=%d> but on line %d we have an anti-tag for %s\n",
                       *Tagtype2Txt(pCN->Type()), pCN->GetUid(), pCN->Level(), zi.Line(), *Tagtype2Txt(tf.type)) ;
                   if (tf.rule == HTRULE_SINGLE)
                   {
                       //  pCN = pX ;
                       pCN = pCN->Parent() ;
                       threadLog("Case 2 Corrected by allowing last tag as anti-tag\n") ;
                   }
                   if (pCN->Type() == HTAG_TBL_CEL && tf.type == HTAG_TR)
                   {
                       for (pX = pCN ; pX ; pX = pX->Parent())
                       {
                           if (pX->Type() == tf.type)
                           {
                               pCN = pX ;
                               threadLog("Corrected by decending to level %d\n", pCN->Level()) ;
                               break ;
                           }
                       }
                   }
               }
               continue ;
           }
           //  If none of the above just advance
           zi++ ;
       }
       //  Advance to the <body> tag
       for (; !zi.eof() ;)
       {
           if (zi.Equiv("<body"))
           {
               m_pBody = _proctag(m_pRoot, zi, HTAG_BODY) ;
               if (!m_pBody)
                   { threadLog("Expected an actual body\n") ; return E_FORMAT ; }
               break ;
           }
           zi++ ;
       }
       if (!m_pBody)
           { threadLog("Expected a <body> tag\n") ; return E_FORMAT ; }
       pCN = m_pBody ;
   }
   //  
   //  Process document body. Here everything is either a tag, an anti-tag or it is tag-content. Both tags and antitags begin with a '<' so the
   //  raw HTML is iterated and whenever the < is found, it is tested for a known tag/antitag. In the general case of "<tag>content</tag>", the
   //  process is to call _procTag() to parse the tag, garner the attributes and to create a new element (which the current element is then set
   //  to). Bytes after the tag are agregated to the current element's content until the antitag occurs (at which point the current element is
   //  then set back to the parent tag).
   //  
   //  The exceptions to the general case:-
   //  
   //  1) Paragraph tags can be left open (antitag omited). These tags are closed by the parent antitag or by another paragraph tag.
   //  
   //  2) Print control tags which are completely ignored. These can never become the current tag so any content they have is aggregated to
   //   their parent tag.
   //  
   //  3) Links which do become current, but will have thier content aggregated to the parent tag.
   //  
   for (; pCN && !zi.eof() ;)
   {
       //  Handle tag content
       if (*zi != CHAR_LESS)
       {
           if (pCN->Type() != HTAG_ANCHOR)
           {
               if (*zi <&eq; CHAR_SPACE && pCN->m_tmpContent.Size() == 0)
                   { zi++ ; continue ; }
               pCN->m_tmpContent.AddByte(*zi) ;
           }
           else
           {
               if (pCN->Parent())
                   pCN->Parent()->m_tmpContent.AddByte(*zi) ;
           }
           zi++ ;
           continue ;
       }
       //  Ignore deleted text within <strike></strike> tags
       nLine = zi.Line() ;
       if (zi == "<strike>")
       {
           for (zi += 8; !zi.eof() ; zi++)
           {
               if (zi == "</strike>")
                   { zi += 9; break ; }
           }
           if (zi.eof())
               { threadLog("Unclosed comment block\n") ; rc = E_FORMAT ; break ; }
           continue ;
       }
       if (zi == "<fb:like>")
       {
           for (zi += 9; !zi.eof() ; zi++)
           {
               if (zi == "</fb:like>")
                   { zi += 10;break ; }
           }
           if (zi.eof())
               { threadLog("Facebook special\n") ; rc = E_FORMAT ; break ; }
           continue ;
       }
       if (zi == "<g:plusone>")
       {
           for (zi += 11;!zi.eof() ; zi++)
           {
               if (zi == "</g:plusone>")
                   { zi += 12;break ; }
           }
           if (zi.eof())
               { threadLog("Google special\n") ; rc = E_FORMAT ; break ; }
           continue ;
       }
       //  Ignore deleted text within comment (<!-- and -->) tags
       if (zi == "<!--[if")
       {
           for (zi += 7; !zi.eof() ; zi++)
           {
               if (zi == "<![endif]>")     { zi += 10;break ; }
               if (zi == "<![endif]-->")   { zi += 12;break ; }
           }
           if (zi.eof())
               { threadLog("Unterminated <!--[if cond]..> tag starting line %d\n", nLine) ; rc = E_FORMAT ; break ; }
           continue ;
       }
       if (zi == "<![if")
       {
           for (zi += 5; !zi.eof() ; zi++)
           {
               if (zi == "<![endif]>")     { zi += 10;break ; }
               if (zi == "<![endif]-->")   { zi += 12;break ; }
           }
           if (zi.eof())
               { threadLog("Unterminated <![if cond]..> tag starting line %d\n", nLine) ; rc = E_FORMAT ; break ; }
           continue ;
       }
       if (zi == "<!--")
       {
           for (zi += 4; !zi.eof() ; zi++)
           {
               if (zi == "-->")
                   { zi += 3; break ; }
           }
           if (zi.eof())
               { threadLog("Unterminated <!--> tag starting line %d\n", nLine) ; rc = E_FORMAT ; break ; }
           continue ;
       }
       /*
       **  ** At this point we have the '<' start of tag char. Establish whole and tagword of possible HTML tag
       **           */
       wholetag.Clear() ;
       tagword.Clear() ;
       limit = zi ;
       limit++ ;
       W.AddByte(CHAR_LESS) ;
       bAnti = false ;
       if (*limit == CHAR_FWSLASH)
           { W.AddByte(CHAR_FWSLASH) ; bAnti = true ; limit++ ; }
       nColon = 0;
       for (tw_start = limit ; !limit.eof() ; limit++)
       {
           if (*limit == CHAR_COLON || IsAlphanum(*limit))
           {
               if (*limit == CHAR_COLON)
                   nColon++ ;
               T.AddByte(*limit) ;
               W.AddByte(*limit) ;
               continue ;
           }
           break ;
       }
       tagword = T ;
       T.Clear() ;
       for (; !limit.eof() ;)
       {
           W.AddByte(*limit) ;
           if (*limit == CHAR_DQUOTE || *limit == CHAR_SQUOTE)
           {
               cDelim = *limit ;
               for (limit++ ; !limit.eof() ; limit++)
               {
                   if (*limit == CHAR_BKSLASH)
                   {
                       limit++ ;
                       if (*limit == cDelim)
                           continue ;
                   }
                   if (*limit == cDelim)
                       break ;
               }
           }
           if (*limit == CHAR_MORE)
               break ;
           limit++ ;
       }
       wholetag = W ;
       W.Clear() ;
       if (*limit != CHAR_MORE)
       {
           threadLog("Malformed tag (%s)\n", *wholetag) ;
           zi = limit ;
           continue ;
       }
       tagword.ToLower() ;
       if (nColon)
       {
           if (!s_htagNam.Exists(tagword))
           {
               tf.klas=HTCLASS_3RD ;
               tf.rule=HTRULE_OPTION ;
               tf.name = tagword ;
               s_htagTyp.Insert(tf.type, tf) ;
               s_htagNam.Insert(tf.name, tf) ;
               threadLog("Inserted 3rd party HTML tag %s\n", *tagword) ;
           }
       }
       //  if (bAnti)
       //   threadLog("Case 2 line %d Doing antitag %s\n", zi.Line(), *tagword) ;
       //  else
       //   threadLog("Case 2 line %d Doing tag %s\n", zi.Line(), *tagword) ;
       tf = s_htagNam[tagword] ;
       if (tf.type == HTAG_NULL)
       {
           //  Unrecognized tags are just made part of the content of the currently applicable tag
           if (bAnti)
               threadLog("Line %d Unknown lookup anti-tag </%s> (%s)\n", zi.Line(), *tagword, *wholetag) ;
           else
               threadLog("Line %d Case 3 Unknown lookup tag <%s> (%d bytes)\n", zi.Line(), *tagword, wholetag.Length()) ;
           pCN->m_tmpContent << wholetag ;
           zi = limit ;
           continue ;
       }
       if (bAnti == false)
       {
           //  Ignore graphic tags
           if (tf.klas == HTCLASS_IMG)
               { zi = limit ; continue ; }
           //  Ignore self-closed 'system' tags
           if (tf.klas == HTCLASS_SYS)
           {
               if (tf.type == HTAG_EMBED)
                   pCN->m_tmpContent << "<embed/>" ;
               if (tf.type == HTAG_NOEMBED)
                   pCN->m_tmpContent << "<noembed/>" ;
               for (; !zi.eof() ; zi++)
               {
                   if (*zi == CHAR_MORE)
                       { zi++ ; break ; }
               }
               threadLog("Line %d Bypassed system tag <%s> (%s)\n", zi.Line(), *tagword, *wholetag) ;
               zi = limit ;
               continue ;
           }
           //  Handle HTCLASS_TXT 'in-content' tags. We just copy these through, complete with tag, antitag and content, to the content of the
           //  current tag. However these tags should still be placed in the m_mapTags and m_vecTags member.
           if (tf.klas == HTCLASS_TXT) //  || tf.type == HTAG_ANCHOR)
           {
               pCN->m_tmpContent << wholetag ; zi = limit ;
               continue ;
           }
           //  If we are suppressing anchors, we only want the content of a <a href=...>...</a> sequence.
           //  if (m_bOpflags & HDOC_SUPPRESS_LINKS && tf.klas == HTCLASS_LNK && tf.type == HTAG_ANCHOR)
           //  if (bFlags & HDOC_ONLOAD_LINKS && tf.klas == HTCLASS_LNK && tf.type == HTAG_ANCHOR)
           //  { zi = limit ; continue ; }
           //  Eliminate scripts (may revisit)
           if (zi.Equiv("<script"))
           {
               //  plog->Out("%s. ignoring a script tag ...\n", __FUNCTION__) ;
               for (tmp = zi ; !tmp.eof() ; tmp++)
               {
                   if (tmp.Equiv("</script>"))
                       { tmp += 9; zi = tmp ; break ; }
               }
               if (zi.eof())
                   { threadLog("Unclosed script tag\n") ; rc = E_FORMAT ; break ; }
               continue ;
           }
           /*
           **  ** Process 'data structure' tags into nodes. These are tables (with there rows and columns) but also menus
           **     ** and ordered and unordered lists.
           **                */
           pNN = 0;
           pNN = _proctag(pCN, zi, tf.type) ;
           if (!pNN)
           {
               threadLog("No node allocated for tag <%s>\n", *Tagtype2Txt(tf.type)) ;
               return E_FORMAT ;
           }
           pCN = pNN ;
           zi = limit ;
           /*
           **  ** Handle the <input> tag. As this is it's own anti-tag it has no content, only parameters. We need to include the tag
           **     ** in the tree as it is active, but we need to effect the anti-tag aspect as well (so the level is not raised)
           **                */
           if (tf.type == HTAG_INPUT)
               pCN = pCN->Parent() ;
           continue ;
       }
       //  Handle anti-tags
       if (bAnti)
       {
           //  Inactive (text rendering only) anti-tags
           if (tf.klas == HTCLASS_TXT) //  || tf.type == HTAG_ANCHOR)
               { pCN->m_tmpContent << wholetag ; zi = limit ; continue ; }
           //  Ignore self-closed 'system' tags
           if (tf.klas == HTCLASS_SYS)
           {
               if (tf.type == HTAG_EMBED)
                   pCN->m_tmpContent << "</embed>" ;
               if (tf.type == HTAG_NOEMBED)
                   pCN->m_tmpContent << "</noembed>" ;
               for (; !zi.eof() ; zi++)
               {
                   if (*zi == CHAR_MORE)
                       { zi++ ; break ; }
               }
               threadLog("Line %d Bypassed system anti-tag <%s> (%s)\n", zi.Line(), *tagword, *wholetag) ;
               zi = limit ;
               continue ;
           }
           zi = limit ;
           if (pCN->Type() == tf.type || tf.rule == HTRULE_SINGLE)
               pCN = pCN->Parent() ;
           else
           {
               threadLog("case 2 Tag mis-match. Current highest tag is <%s id=%d, level=%d> but on line %d we have an anti-tag for %s\n",
                   *Tagtype2Txt(pCN->Type()), pCN->GetUid(), pCN->Level(), zi.Line(), *Tagtype2Txt(tf.type)) ;
               if (tf.rule == HTRULE_SINGLE)
               {
                   //  pCN = pX ;
                   pCN = pCN->Parent() ;
                   threadLog("Case 1 Corrected by allowing last tag as anti-tag\n") ;
               }
               if (pCN->Type() == HTAG_TBL_CEL && tf.type == HTAG_TR)
               {
                   for (pX = pCN ; pX ; pX = pX->Parent())
                   {
                       if (pX->Type() == tf.type)
                       {
                           pCN = pX ;
                           threadLog("Corrected by decending to level %d\n", pCN->Level()) ;
                           break ;
                       }
                   }
               }
           }
           continue ;
       }
       threadLog("HANDLING ABD %s (%s)\n", *tagword, *wholetag) ;
   }
   if (pCN)
       threadLog("End of file encountered whilst inside tag definition\n") ;
   //  Move thru the tags in thier order of appearence and reduce where appropriate, the tag content held in chains to strings. Place forms in
   //  the list of forms and place form field tags with thier host forms.
   for (nX = 0; nX < m_vecTags.Count() ; nX++)
   {
       pX = m_vecTags[nX] ;
       if (pX->Type() == HTAG_FORM)
       {
           //  Add the form to to m_Forms and set this to the current form
           pCurForm = pX ;
           pForm = new hzHtmForm() ;
           m_Forms.Add(pForm) ;
           continue ;
       }
       if (pCurForm)
       {
           if (pX->Type() == HTAG_INPUT)
           {
               //  Add this field to the current form (report error if not in a current form)
               if (pX->Line() < pCurForm->Anti())
               {
                   P.name = pX->Name() ;
                   //  for (pAttr = pX->GetFirstAttr() ; pAttr ; pAttr = pAttr->next)
                   //  {
                   //   if (pAttr->name == "value")
                   //    { P.value = pAttr->value ; break ; }
                   //  }
                   for (ai = pX ; ai.Valid() ; ai.Advance())
                   {
                       anam = ai.Name() ; aval = ai.Value() ;
                       if (anam == "value")
                           { P.value = aval ; break ; }
                   }
                   pForm->fields.Add(P) ;
               }
               continue ;
           }
           if (pX->Line() > pCurForm->Anti())
               pCurForm = 0;
       }
   }
   threadLog("END OF LOAD page has %d links\n", m_vecLinks.Count()) ;
   return rc ;
}