//
//  File:   hzDocHtml.cpp
//
//  Legal Notice:   This file is part of the HadronZoo C++ Class Library. Copyright 2025 HadronZoo Project (http://www.hadronzoo.com)
//
//  The HadronZoo C++ Class Library is free software: You can redistribute it, and/or modify it under the terms of the GNU Lesser General Public License, as published by the Free
//  Software Foundation, either version 3 of the License, or any later version.
//
//  The HadronZoo C++ Class Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
//  A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
//
//  You should have received a copy of the GNU Lesser General Public License along with the HadronZoo C++ Class Library. If not, see http://www.gnu.org/licenses.
//
//
//  Management of HTML documents
//
#include <fstream>
#include <sys/stat.h>
#include "hzChars.h"
#include "hzTextproc.h"
#include "hzDirectory.h"
#include "hzDocument.h"
#include "hzProcess.h"
using namespace std ;
/*
**  Variables
*/
static  hzMapS<hzString,hzHtagform>     s_htagNam ;     //  All HTML tags by name
static  hzMapS<hzHtagtype,hzHtagform>   s_htagTyp ;     //  All HTML tags by type
static  hzHtagform  s_tagformDuff ;                     //  Null tag form
static  uint32_t    s_htagPop ;                         //  This is set by InitHtml() to the number of HTML tags, to indicate that the tags have been set up.
/*
**  SECTION 1:  HTML Tag Types
*/
hzEcode InitHtml    (void)
{
    //  Category:   Data Initialization
    //
    //  Populate the map of tag names to tag forms and the map of tag types to tag forms (see hzHtagform definition). This facilitates HTML tag lookup for such
    //  purposes as the import and processing of HTML documents.
    //
    //  Arguments:  None
    //
    //  Returns:    E_SETONCE   If the HTML maps are already populated
    //              E_OK        If the operation was successful
    _hzfunc(__func__) ;
    if (s_htagPop)
        return E_SETONCE ;
    hzHtagform  t ;     //  Full tag info for insertion
    //  Default (invalid)
    t.klas=HTCLASS_NUL; t.rule=HTRULE_NULL; t.type=HTAG_NULL;           t.name=(char*)0;            s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    //  Page structure tags
    t.klas=HTCLASS_HDR; t.rule=HTRULE_SINGLE; t.type=HTAG_DOCTYPE;      t.name="!DOCTYPE";          s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_HDR; t.rule=HTRULE_PAIRED; t.type=HTAG_HTML;         t.name="html";              s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_HDR; t.rule=HTRULE_PAIRED; t.type=HTAG_HEAD;         t.name="head";              s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_HDR; t.rule=HTRULE_PAIRED; t.type=HTAG_TITLE;        t.name="title";             s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_HDR; t.rule=HTRULE_PAIRED; t.type=HTAG_META;         t.name="meta";              s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_HDR; t.rule=HTRULE_PAIRED; t.type=HTAG_BODY;         t.name="body";              s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_HDR; t.rule=HTRULE_PAIRED; t.type=HTAG_BASE;         t.name="base";              s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_HDR; t.rule=HTRULE_PAIRED; t.type=HTAG_BASEFONT;     t.name="basefont";          s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_HDR; t.rule=HTRULE_PAIRED; t.type=HTAG_STYLE;        t.name="style";             s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    //  Programing tags
    t.klas=HTCLASS_HDR; t.rule=HTRULE_PAIRED; t.type=HTAG_SCRIPT;       t.name="script";            s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_HDR; t.rule=HTRULE_PAIRED; t.type=HTAG_NOFRAMES;     t.name="noframes";          s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_HDR; t.rule=HTRULE_PAIRED; t.type=HTAG_NOSCRIPT;     t.name="noscript";          s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_HDR; t.rule=HTRULE_PAIRED; t.type=HTAG_APPLET;       t.name="applet";            s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    //  Frames   
    t.klas=HTCLASS_DAT; t.rule=HTRULE_PAIRED; t.type=HTAG_FRAME;        t.name="frame";             s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_DAT; t.rule=HTRULE_PAIRED; t.type=HTAG_FRAMESET;     t.name="frameset";          s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_DAT; t.rule=HTRULE_PAIRED; t.type=HTAG_IFRAME;       t.name="iframe";            s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_DAT; t.rule=HTRULE_SINGLE; t.type=HTAG_PARAM;        t.name="param";             s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    //  System tags
    t.klas=HTCLASS_SYS; t.rule=HTRULE_PAIRED; t.type=HTAG_EMBED;        t.name="embed";             s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_SYS; t.rule=HTRULE_PAIRED; t.type=HTAG_NOEMBED;      t.name="noembed";           s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    //  Font control or text tags - no content
    t.klas=HTCLASS_TXT; t.rule=HTRULE_PAIRED; t.type=HTAG_BOLD;         t.name="b";                 s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_TXT; t.rule=HTRULE_PAIRED; t.type=HTAG_ULINE;        t.name="u";                 s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_TXT; t.rule=HTRULE_PAIRED; t.type=HATG_ITALIC;       t.name="i";                 s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_TXT; t.rule=HTRULE_PAIRED; t.type=HTAG_EM;           t.name="em";                s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_TXT; t.rule=HTRULE_PAIRED; t.type=HTAG_STRONG;       t.name="strong";            s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_TXT; t.rule=HTRULE_PAIRED; t.type=HTAG_CENTER;       t.name="center";            s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_TXT; t.rule=HTRULE_PAIRED; t.type=HTAG_FONT;         t.name="font";              s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_TXT; t.rule=HTRULE_PAIRED; t.type=HTAG_BIG;          t.name="big";               s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_TXT; t.rule=HTRULE_PAIRED; t.type=HTAG_SMALL;        t.name="small";             s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_TXT; t.rule=HTRULE_SINGLE; t.type=HATG_BR;           t.name="br";                s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_TXT; t.rule=HTRULE_SINGLE; t.type=HTAG_HR;           t.name="hr";                s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    //  Text description tags
    t.klas=HTCLASS_DAT; t.rule=HTRULE_PAIRED; t.type=HTAG_HEADER;       t.name="header";            s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_DAT; t.rule=HTRULE_PAIRED; t.type=HTAG_FOOTER;       t.name="footer";            s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_DAT; t.rule=HTRULE_PAIRED; t.type=HTAG_SECTION;      t.name="section";           s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_DAT; t.rule=HTRULE_PAIRED; t.type=HTAG_ARTICLE;      t.name="article";           s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_DAT; t.rule=HTRULE_PAIRED; t.type=HTAG_ASIDE;        t.name="aside";             s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_DAT; t.rule=HTRULE_PAIRED; t.type=HTAG_DETAILS;      t.name="details";           s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_DAT; t.rule=HTRULE_PAIRED; t.type=HTAG_SUMMARY;      t.name="summary";           s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_DAT; t.rule=HTRULE_PAIRED; t.type=HTAG_DIALOG;       t.name="dialog";            s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    //  Text grouping tags
    t.klas=HTCLASS_TXT; t.rule=HTRULE_PAIRED; t.type=HTAG_STRIKE;       t.name="strike";            s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_TXT; t.rule=HTRULE_PAIRED; t.type=HTAG_S;            t.name="s";                 s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_TXT; t.rule=HTRULE_PAIRED; t.type=HTAG_DEL;          t.name="del";               s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_TXT; t.rule=HTRULE_PAIRED; t.type=HTAG_INS;          t.name="ins";               s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_TXT; t.rule=HTRULE_PAIRED; t.type=HTAG_KBD;          t.name="kbd";               s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_TXT; t.rule=HTRULE_PAIRED; t.type=HTAG_SPAN;         t.name="span";              s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    //  Text control tags
    t.klas=HTCLASS_DAT; t.rule=HTRULE_OPTION; t.type=HTAG_PARAG;        t.name="p";                 s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_DAT; t.rule=HTRULE_PAIRED; t.type=HTAG_QUOTATION;    t.name="q";                 s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_DAT; t.rule=HTRULE_PAIRED; t.type=HTAG_H1;           t.name="h1";                s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_DAT; t.rule=HTRULE_PAIRED; t.type=HTAG_H2;           t.name="h2";                s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_DAT; t.rule=HTRULE_PAIRED; t.type=HTAG_H3;           t.name="h3";                s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_DAT; t.rule=HTRULE_PAIRED; t.type=HTAG_H4;           t.name="h4";                s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_DAT; t.rule=HTRULE_PAIRED; t.type=HTAG_H5;           t.name="h5";                s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_DAT; t.rule=HTRULE_PAIRED; t.type=HTAG_H6;           t.name="h6";                s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_DAT; t.rule=HTRULE_PAIRED; t.type=HATG_TT;           t.name="tt";                s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_DAT; t.rule=HTRULE_PAIRED; t.type=HTAG_CODE;         t.name="code";              s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_DAT; t.rule=HTRULE_PAIRED; t.type=HTAG_SAMP;         t.name="samp";              s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_DAT; t.rule=HTRULE_PAIRED; t.type=HTAG_CITE;         t.name="cite";              s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_DAT; t.rule=HTRULE_PAIRED; t.type=HTAG_CAPTION;      t.name="caption";           s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_DAT; t.rule=HTRULE_PAIRED; t.type=HTAG_VAR;          t.name="var";               s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_DAT; t.rule=HTRULE_PAIRED; t.type=HTAG_PRE;          t.name="pre";               s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_DAT; t.rule=HTRULE_PAIRED; t.type=HTAG_BQ;           t.name="bq";                s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_DAT; t.rule=HTRULE_PAIRED; t.type=HTAG_BLOCKQUOTE;   t.name="blockquote";        s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_DAT; t.rule=HTRULE_PAIRED; t.type=HTAB_BDO;          t.name="bdo";               s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_DAT; t.rule=HTRULE_PAIRED; t.type=HTAG_SUBSCRIPT;    t.name="sub";               s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_DAT; t.rule=HTRULE_PAIRED; t.type=HTAG_SUPERSCRIPT;  t.name="sup";               s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    //  Data/layout tags
    t.klas=HTCLASS_DAT; t.rule=HTRULE_PAIRED; t.type=HTAG_TABLE;        t.name="table";             s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_DAT; t.rule=HTRULE_OPTION; t.type=HTAG_TCOL;         t.name="col";               s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_DAT; t.rule=HTRULE_OPTION; t.type=HTAG_TCOLGRP;      t.name="colgroup";          s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_DAT; t.rule=HTRULE_PAIRED; t.type=HTAG_TH;           t.name="th";                s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_DAT; t.rule=HTRULE_PAIRED; t.type=HTAG_TR;           t.name="tr";                s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_DAT; t.rule=HTRULE_PAIRED; t.type=HTAG_TBL_CEL;      t.name="td";                s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_DAT; t.rule=HTRULE_PAIRED; t.type=HTAG_DIV;          t.name="div";               s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_DAT; t.rule=HTRULE_PAIRED; t.type=HTAG_TBODY;        t.name="tbody";             s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_DAT; t.rule=HTRULE_PAIRED; t.type=HTAG_THEAD;        t.name="thead";             s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_DAT; t.rule=HTRULE_PAIRED; t.type=HTAG_TFOOT;        t.name="tfoot";             s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_DAT; t.rule=HTRULE_PAIRED; t.type=HTAG_FIELDSET;     t.name="fieldset";          s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_DAT; t.rule=HTRULE_PAIRED; t.type=HTAG_LEGEND;       t.name="legend";            s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_DAT; t.rule=HTRULE_PAIRED; t.type=HTAG_MENU;         t.name="menu";              s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_DAT; t.rule=HTRULE_PAIRED; t.type=HTAG_DT;           t.name="dt";                s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_DAT; t.rule=HTRULE_PAIRED; t.type=HTAG_DD;           t.name="dd";                s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_DAT; t.rule=HTRULE_PAIRED; t.type=HTAG_DFN;          t.name="dfn";               s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_DAT; t.rule=HTRULE_PAIRED; t.type=HTAG_DIR;          t.name="dir";               s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_DAT; t.rule=HTRULE_PAIRED; t.type=HTAG_DLIST;        t.name="dl";                s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_DAT; t.rule=HTRULE_PAIRED; t.type=HTAG_OLIST;        t.name="ol";                s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_DAT; t.rule=HTRULE_PAIRED; t.type=HTAG_ULIST;        t.name="ul";                s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_DAT; t.rule=HTRULE_PAIRED; t.type=HTAG_ITEM;         t.name="li";                s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_DAT; t.rule=HTRULE_PAIRED; t.type=HTAG_HGROUP;       t.name="hgroup";            s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_DAT; t.rule=HTRULE_PAIRED; t.type=HTAG_TIME;         t.name="time";              s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    //  Link tags
    t.klas=HTCLASS_LNK; t.rule=HTRULE_PAIRED; t.type=HTAG_ANCHOR;       t.name="a";                 s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_LNK; t.rule=HTRULE_PAIRED; t.type=HTAG_NAV;          t.name="nav";               s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_LNK; t.rule=HTRULE_PAIRED; t.type=HTAG_LINK;         t.name="link";              s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    //  Input/form tags
    t.klas=HTCLASS_INP; t.rule=HTRULE_PAIRED; t.type=HTAG_FORM;         t.name="form";              s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_INP; t.rule=HTRULE_SINGLE; t.type=HTAG_INPUT;        t.name="input";             s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_INP; t.rule=HTRULE_PAIRED; t.type=HTAG_TEXTAREA;     t.name="textarea";          s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_INP; t.rule=HTRULE_PAIRED; t.type=HTAG_SELECT;       t.name="select";            s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_INP; t.rule=HTRULE_PAIRED; t.type=HTAG_OPTGROUP;     t.name="optgroup";          s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_INP; t.rule=HTRULE_PAIRED; t.type=HTAG_OPTION;       t.name="option";            s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_INP; t.rule=HTRULE_PAIRED; t.type=HTAG_BUTTON;       t.name="button";            s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_INP; t.rule=HTRULE_PAIRED; t.type=HTAG_LABEL;        t.name="label";             s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    //  Information tags
    t.klas=HTCLASS_INF; t.rule=HTRULE_PAIRED; t.type=HTAG_ABBR;         t.name="abbr";              s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_INF; t.rule=HTRULE_PAIRED; t.type=HTAG_ACRONYM;      t.name="acronym";           s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_INF; t.rule=HTRULE_PAIRED; t.type=HTAG_ADDRESS;      t.name="address";           s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    //  Image tags
    t.klas=HTCLASS_IMG; t.rule=HTRULE_SINGLE; t.type=HTAG_IMG;          t.name="img";               s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_IMG; t.rule=HTRULE_PAIRED; t.type=HTAG_MAP;          t.name="map";               s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_IMG; t.rule=HTRULE_PAIRED; t.type=HTAG_AREA;         t.name="area";              s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_IMG; t.rule=HTRULE_PAIRED; t.type=HTAG_OBJECT;       t.name="object";            s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_IMG; t.rule=HTRULE_PAIRED; t.type=HTAG_MARQUEE;      t.name="marquee";           s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_IMG; t.rule=HTRULE_PAIRED; t.type=HTAG_CANVAS;       t.name="canvas";            s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_IMG; t.rule=HTRULE_PAIRED; t.type=HTAG_FIGURE;       t.name="figure";            s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_IMG; t.rule=HTRULE_PAIRED; t.type=HTAG_FIGCAPTION;   t.name="figcaption" ;       s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    //  Image SVG tags
    t.klas=HTCLASS_IMG;
    t.rule=HTRULE_PAIRED;
    t.type=HTAG_SVG;                   t.name="svg";                    s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_ANIMATE;           t.name="animate";                s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_ANIMATEMOTION;     t.name="animateMotion";          s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_ANIMATEXFORM;      t.name="animateTransform";       s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_CIRCLE;            t.name="circle";                 s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_CLIPPATH;          t.name="clipPath";               s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_DEFS;              t.name="defs";                   s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_DESC;              t.name="desc";                   s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_DISCARD;           t.name="discard";                s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_ELLIPSE;           t.name="ellipse";                s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_BLEND;             t.name="feBlend";                s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_COLORMATRIX;       t.name="feColorMatrix";          s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_COMPONENTXFER;     t.name="feComponentTransfer";    s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_COMPOSITE;         t.name="feComposite";            s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_CONVOLVEMATRIX;    t.name="feConvolveMatrix";       s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_DIFFUSELIGHTING;   t.name="feDiffuseLighting";      s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_DISPLACEMENTMAP;   t.name="feDisplacementMap";      s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_DISTANTLIGHT;      t.name="feDistantLight";         s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_DROPSHADOW;        t.name="feDropShadow";           s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_FLOOD;             t.name="feFlood";                s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_FUNC_A;            t.name="feFuncA";                s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_FUNC_B;            t.name="feFuncB";                s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_FUNC_G;            t.name="feFuncG";                s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_FUNC_R;            t.name="feFuncR";                s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_DEGAUSS;           t.name="feGaussianBlur";         s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_IMAGE;             t.name="feImage";                s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_MERGE;             t.name="feMerge";                s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_MERGENODE;         t.name="feMergeNode";            s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_MORPHOLOGY;        t.name="feMorphology";           s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_OFFSET;            t.name="feOffset";               s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_POINTLIGHT;        t.name="fePointLight";           s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_SPECLIGHT;         t.name="feSpecularLighting";     s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_SPOTLIGHT;         t.name="feSpotLight";            s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_TITLE;             t.name="feTile";                 s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_TURBULENCE;        t.name="feTurbulence";           s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_FILTER;            t.name="filter";                 s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_FOREIGNOBJECT;     t.name="foreignObject";          s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_GENERIC;           t.name="g";                      s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_HATCH;             t.name="hatch";                  s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_HATCHPATH;         t.name="hatchpath";              s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_LINE;              t.name="line";                   s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_lINEARGRADIENT;    t.name="linearGradient";         s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_MARKER;            t.name="marker";                 s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_MASK;              t.name="mask";                   s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_METADATA;          t.name="metadata";               s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_MPATH;             t.name="mpath";                  s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_PATH;              t.name="path";                   s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_PATTERN;           t.name="pattern";                s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_POLYGON;           t.name="polygon";                s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_POLYLINE;          t.name="polyline";               s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_RADIALGRADIENT;    t.name="radialGradient";         s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_RECT;              t.name="rect";                   s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_SET;               t.name="set";                    s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_STOP;              t.name="stop";                   s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_SWITCH;            t.name="switch";                 s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_SYMBOL;            t.name="symbol";                 s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_TEXT;              t.name="text";                   s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_TEXTPATH;          t.name="textPath";               s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_TSPAN;             t.name="tspan";                  s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_USE;               t.name="use";                    s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.type=HTAG_SVG_VIEW;              t.name="view";                   s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    //  Audio/Video Tags
    t.klas=HTCLASS_IMG; t.rule=HTRULE_PAIRED; t.type=HTAG_AUDIO;        t.name="audio";             s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_IMG; t.rule=HTRULE_PAIRED; t.type=HTAG_SOURCE;       t.name="source";            s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_IMG; t.rule=HTRULE_PAIRED; t.type=HTAG_TRACK;        t.name="track";             s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    t.klas=HTCLASS_IMG; t.rule=HTRULE_PAIRED; t.type=HTAG_VIDEO;        t.name="video";             s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    //  Third party tags
    t.klas=HTCLASS_3RD; t.rule=HTRULE_SINGLE; t.type=HTAG_FBLIKE;       t.name="fb:like";           s_htagTyp.Insert(t.type,t); s_htagNam.Insert(t.name,t);
    s_htagPop = s_htagNam.Count() ;
    return E_OK ;
}
const char* Doctype2Txt (hzDoctype dtype)
{
    //  Category:   Diagnostics
    //
    //  Convert hzDoctype enum to text for diagnostics
    //
    //  Arguments:  1)  dtype   The enumerated document type (either HTML or XML)
    //
    //  Returns:    Pointer to the doctype text form
    static  const char* strings [] =
    {
        "DOCTYPE_UNDEFINED",
        "DOCTYPE_HTML",
        "DOCTYPE_XML",
        ""
    } ;
    if (dtype < 0 || dtype >= DOCTYPE_XML)
        return strings[0] ;
    return strings[dtype] ;
}
    
hzString    Tagtype2Txt (hzHtagtype type)
{
    //  Category:   Diagnostics
    //
    //  Convert a HTML tag type (enum) into a string naming the type
    //
    //  Arguments:  1)  dtype   The enumerated document type (either HTML or XML)
    //
    //  Returns:    Instance of hzString by value
    //  If tagmap not loaded, load it
    if (!s_htagNam.Count())
        InitHtml() ;
    if (type < HTAG_NULL)
        return s_tagformDuff.name ;
    if (s_htagTyp.Count() <= (uint32_t) type)
        return s_tagformDuff.name ;
    return s_htagTyp[type].name ;
}
hzHtagtype  Txt2Tagtype (const hzString& htag)
{
    //  Category:   Config
    //
    //  Convert a string representing a HTML tag type, into the HTML tag type.
    //
    //  Arguments:  1)  htag    A string presumed to be one of the allowed HTML5 tags
    //
    //  Returns:    Enumerated hzHtagtype
    _hzfunc(__func__) ;
    hzHtagform  tf ;        //  HTML tag info
    hzString    S ;         //  HTML tag search string
    //  If tagmap not loaded, load it
    if (!s_htagPop)
        InitHtml() ;
    S = htag ;
    S.ToLower() ;
    tf = s_htagNam[S] ;
    return tf.type ;
}
const hzHtagform&   TagLookup   (const hzString& htag)
{
    //  Category:   Internet
    //
    //  Lookup and return the hzHtagform (tag function class). The search is by tagname.
    //
    //  Arguments:  1)  htag    A string presumed to be one of the allowed HTML5 tags
    //
    //  Returns:    Reference to the tag form for the tag
    //  If tagmap not loaded, load it
    if (!s_htagNam.Count())
        InitHtml() ;
    return s_htagNam[htag] ;
}
const hzHtagform&   TagLookup   (chIter& ci)
{
    //  Category:   Internet
    //
    //  Determine if the supplied chain iterator, is at the start of a legal HTML tag or anti-tag
    //
    //  Arguments:  1)  ci  A chain iterator to be tested to see if it is at the begening of an allowed HTML5
    //
    //  Returns:    Reference to the tag form for the tag
    hzChain     W ;         //  Working chain
    chIter      xi ;        //  Internal chain iterator
    hzString    word ;      //  Individual word
    //  If tagmap not loaded, load it
    if (!s_htagNam.Count())
        InitHtml() ;
    xi = ci ;
    if (*xi != CHAR_LESS)
        return s_tagformDuff ;
    xi++ ;
    if (*xi == CHAR_FWSLASH)
        xi++ ;
    for (;;)
    {
        if (*xi == CHAR_SPACE)  break ;
        if (*xi == CHAR_MORE)   break ;
        W.AddByte(*xi) ;
        xi++ ;
    }
    word = W ;
    word.ToLower() ;
    return s_htagNam[word] ;
}
/*
**  Tag cleanup
*/
hzHtagInd   AtHtmlTag   (hzString& tagseq, chIter& ci)
{
    //  Category:   Text Processing
    //
    //  Determines if the supplied chain iterator marks the start of a sequence that amounts to a legal HTML tag or anti-tag. If it does not 0 is returned and
    //  the supplied string will be empty. If the sequence has the right form, a case-insensitive lookup is performed to test the name part against all known
    //  HTML5 tags. If this finds a match the supplied string will be populated with the sequence (including the opening and closing angle brackets). The return
    //  value will then be either 1 for the tag or 2 for the anti-tag.
    //
    //  Arguments:  1)  tagseq  If a tag is found, this string reference will be populated by it.
    //              2)  ci      The test chain iterator
    //
    //  Returns:    HTRULE_NULL     If the sequence is not a known HTML tag or antitag.
    //              HTRULE_PAIRED   If the sequence is a HTML tag.
    //              HTRULE_SINGLE   If the sequence is a HTML antitag.
    //              HTRULE_OPTION   If the sequence is both a HTML tag and antitag (eg <br/>).
    _hzfunc(__func__) ;
    hzChain     W ;             //  For building tagname
    chIter      zi ;            //  Used to iterate whole tag sequence.
    hzHtagform  tf ;            //  The tag form for the found tag (if any).
    hzString    tagname ;       //  The tag name
    hzHtagInd   retval ;        //  Return value (0 invalid, 1 tag, 2 anti-tag)
    //  If tagmap not loaded, load it
    if (!s_htagNam.Count())
        InitHtml() ;
    //  Clear the supplied tag and set chain iter
    tagseq.Clear() ;
    zi = ci ;
    if (*zi != CHAR_LESS)
        return HTAG_IND_NULL ;
    zi++ ;
    if (*zi == CHAR_FWSLASH)
        { retval = HTAG_IND_ANTI ; zi++ ; }
    else
        retval = HTAG_IND_OPEN ;
    for (; !zi.eof() && IsAlpha(*zi) ; zi++)
        W.AddByte(*zi) ;
    if (!W.Size())
        return HTAG_IND_NULL ;
    tagname = W ;
    W.Clear() ;
    tagname.ToLower() ;
    tf = s_htagNam[tagname] ;
    if (tf.type == HTAG_NULL)
        return HTAG_IND_NULL ;
    //  We have a HTML tag so build the complete tag for populating tagseq
    for (zi = ci ; !zi.eof() ; zi++)
    {
        W.AddByte(*zi) ;
        if (*zi == CHAR_DQUOTE)
        {
            for (zi++ ; !zi.eof() ; zi++)
            {
                W.AddByte(*zi) ;
                if (*zi == CHAR_BKSLASH)
                    { zi++ ; W.AddByte(*zi) ; }
                if (*zi == CHAR_DQUOTE)
                    break ;
            }
            continue ;
        }
        if (*zi == CHAR_FWSLASH)
        {
            if (zi == "/>")
                { retval = HTAG_IND_SELF ; zi++ ; W.AddByte(*zi) ; }
        }
        if (*zi == CHAR_MORE)
            break ;
    }
            
    if (*zi != CHAR_MORE)
        return HTAG_IND_NULL ;
    tagseq = W ;
    return retval ;
}
void    XmlCleanHtags   (hzChain& output, const hzChain& input)
{
    //  Category:   Text Processing
    //
    //  Remove all instance of <, > and & and replace them with <, > and & respectively
    //
    //  Arguments:  1)  output  The cleaned output
    //              2)  input   The unclean input
    //
    //  Returns:    None
    chIter      zi ;        //  Chain iterator
    uint32_t    ent ;       //  Entity value (needed by call to AtEntity)
    uint32_t    entLen ;    //  Entity value (needed by call to AtEntity)
    for (zi = input ; !zi.eof() ; zi++)
    {
        if (*zi == CHAR_LESS)
            output << "<" ;
        else if (*zi == CHAR_MORE)
            output << ">" ;
        else if (*zi == CHAR_AMPSAND)
        {
            if (AtEntity(ent, entLen, zi))
                output.AddByte(*zi) ;
            else
                output << "&" ;
        }
        else
            output.AddByte(*zi) ;
    }
}
hzEcode hzDocument::Init    (const hzUrl& url)
{
    //  Initialize a hzDocument with a URL
    //
    //  Arguments:  1)  url     The URL of the document
    //
    //  Returns:    E_INITDUP   If the document is already associated with a URL
    //              E_OK        If the document URL is set
    _hzfunc("hzDocument::Init") ;
    if (*m_Info.m_urlReq)
    {
        if (m_Info.m_urlReq == url)
            hzerr(E_INITDUP, "Duplicate call. Address already set to %s\n", *m_Info.m_urlReq) ;
        else
            hzerr(E_INITDUP, "Duplicate call. Addr=%s, arg=%s\n", *m_Info.m_urlReq, *url) ;
        return E_INITDUP ;
    }
    m_Info.m_urlReq = url ;
    return E_OK ;
}
hzDocHtml::hzDocHtml   (void)
{
    m_pRoot = 0 ;
    m_pHead = 0 ;
    m_pBody = 0 ;
    _hzGlobal_Memstats.m_numDochtm++ ;
}
hzDocHtml::~hzDocHtml  (void)
{
    _hzGlobal_Memstats.m_numDochtm-- ;
    Clear() ;
}
hzHtmElem*  hzDocHtml::_proctag (hzHtmElem* pParent, chIter& ci, hzHtagtype type)
{
    //  This assumes the chain iterator is currently at a '<' char and that this is the start of an HTML tag or ant-tag. To succeed the tag must be
    //  both a known HTML tag and of the correct form.
    //
    //  If successful, the iterator will be advanced to one place beyond the terminating '>'. If unsuccessful, the iterator will be left unchanged.
    //
    //  Arguments:  1)  The parent node
    //              2)  The iterator.
    //              3)  The current tag type. This determines how structural defects are to be handled.#
    //
    //  Returns:    Pointer to a new hzHtmElem if the operation was sussessful
    //              NULL if function could not identify a tag
    //
    //  Scope:      Private to the hzDocHtml class.
    _hzfunc("hzDocHtml::_proctag") ;
    hzChain         theTag ;        //  The full text of the tag
    hzChain         Z ;             //  For building param names and values
    hzAttrset       ai ;            //  Attribute iterator
    chIter          end ;           //  End of tag marker
    chIter          xi ;            //  Main operating chain iterator
    chIter          yi ;            //  Shadow chain iterator
    hzHtmElem*      pX ;            //  Parent element
    hzHtmElem*      pNewnode ;      //  Tag found (new copy created)
    hzUrl           link_url ;      //  URL for links
    //hzNumPair     attr ;          //  Attribute name/value pair
    hzPair          attr ;          //  Attribute name/value pair
    hzString        tnam ;          //  Tag name
    hzString        attrName ;      //  Attr name
    hzString        attrValue ;     //  Attr value
    hzString        S ;             //  Temporary string
    uint32_t        nLine ;         //  Line number of tag
    hzHtagtype      ptype ;         //  Parent tag's type
    //bool  bQuot = false ;
    bool    bError = false ;
    //  Check validity of call
    if (ci.eof())
        { threadLog("Invalid iterator\n") ; return 0 ; }
    nLine = ci.Line() ;
    if (*ci != '<')
        { threadLog("Line %d Wrong call\n", nLine) ; return 0 ; }
    switch  (type)
    {
    case HTAG_META:         //  Examininig a <META ...> tag
    case HTAG_STYLE:        //  Examininig a <META ...> tag
    case HTAG_SCRIPT:       //  Examininig a <SCRIPT .> tag
    case HTAG_LINK:         //  Examininig a <LINK ...> tag
    case HTAG_HTML:         //  Examininig a <HTML ...> tag (header)
    case HTAG_BODY:         //  Examininig a <BODY ...> tag (body)
        break ;
    default:
        break ;
    }
    //  Pre-process the tag and get tag name
    xi = ci ;
    xi++ ;
    if (!IsAlpha(*xi))
        { threadLog("Line %d Non-tag (< followed by non-alpha %d)\n", nLine, *xi) ; return 0 ; }
    for (; !xi.eof() && (*xi == CHAR_COLON || IsAlphanum(*xi)) ; xi++)
        theTag.AddByte(*xi) ;
    if (!theTag.Size())
        { threadLog("Line %d Tag un-named\n", nLine) ; return 0 ; }
    tnam = theTag ;
    //  Check if tag is known as a HTML tag
    if (type == HTAG_TABLE)
        pNewnode = new hzHtmTbl() ;
    else
        pNewnode = new hzHtmElem() ;
    pNewnode->Init(this, pParent, tnam, type, m_vecTags.Count(), ci.Line()) ;
    m_vecTags.Add(pNewnode) ;
    //  Collect tag attributes if any
    for (; !xi.eof() ;)
    {
        if (IsWhite(*xi))
            { xi++ ; continue ; }
        if (*xi == CHAR_FWSLASH)
        {
            if (xi == "/>")
                { pNewnode->_setanti(xi.Line()) ; xi++ ; end = xi ; break ; }
        }
        if (*xi == CHAR_MORE)
            { end = xi ; break ; }
        //  Not at end of tag, so should have attr=value sequence (otherwise error)
        if (!IsAlpha(*xi))
            { threadLog("Line %d Error. Unexpected char is [%c]\n", nLine, *xi) ; xi++ ; continue ; }
        Z.Clear() ;
        for (; !xi.eof() && (IsUrlnorm(*xi) || *xi == CHAR_COLON || *xi == CHAR_PERIOD || *xi == CHAR_MINUS || *xi == CHAR_USCORE) ; xi++)
            Z.AddByte(*xi) ;
        attrName = Z ;
        Z.Clear() ;
        attrValue = (char*)0 ;
        for (; !xi.eof() && IsWhite(*xi) ; xi++) ;
        if (*xi != CHAR_EQUAL)
        {
            //  Tag attribute does not have a value assignent part (="some_val"). This is an error although there are some slopy exceptions,
            //  eg 'allowfullscreen' in the <tframe> tag.
            if (pNewnode->Type() == HTAG_IFRAME || pNewnode->Type() == HTAG_TIME)
            {
                attrValue = attrName ;
                //pNewnode->AddAttr(attrName, attrValue) ;
                //attr.m_A = m_Dict.Insert(*attrName) ;
                //attr.m_B = m_Dict.Insert(*attrValue) ;
                attr.name = attrName ;
                attr.value = attrValue ;
                
                m_NodeAttrs.Insert(pNewnode->GetUid(), attr) ;
                continue ;
            }
            threadLog("Line %d Tag %s param %s not assigned\n", nLine, *tnam, *attrName) ;
            return 0 ;
        }
        //  Get attribute value
        for (xi++ ; !xi.eof() && IsWhite(*xi) ; xi++) ;
        Z.Clear() ;
        if (*xi == CHAR_DQUOTE)
        {
            for (xi++ ; !xi.eof() && *xi != CHAR_DQUOTE ; xi++)
                Z.AddByte(*xi) ;
            if (xi.eof())
                { threadLog("Line %d Double-quote non-closure disqualifies tag\n", nLine) ; return 0 ; }
            xi++ ;
        }
        else if (*xi == CHAR_SQUOTE)
        {
            for (xi++ ; !xi.eof() && *xi != CHAR_SQUOTE ; xi++)
                Z.AddByte(*xi) ;
            if (xi.eof())
                { threadLog("Line %d Single-quote non-closure disqualifies tag\n", nLine) ; return 0 ; }
            xi++ ;
        }
        else
        {
            for (; !xi.eof() && IsUrlresv(*xi) ; xi++)
            Z.AddByte(*xi) ;
        }
        attrValue = Z ;
        //  If the tag is a link/anchor and attr is named 'href' then add link to the list of links found in the page
        if ((pNewnode->Type() == HTAG_LINK || pNewnode->Type() == HTAG_ANCHOR) && attrName.Equiv("href"))
        {
            //threadLog("Considering link %s\n", *attrValue) ;
            //  Is the link a mailto ?
            if (!attrValue)
            {
                S = theTag ;
                threadLog("Line %d null link in tag %s\n", nLine, *S) ;
            }
            else
            {
                if (attrValue[0] != CHAR_HASH)
                {
                    if (memcmp(*attrValue, "mailto:", 7) == 0)
                    {
                        S = *attrValue + 7 ;
                        m_Emails.Insert(S) ;
                    }
                    else
                    {
                        //  Add the link
                        if (m_Base && attrValue[0] == CHAR_FWSLASH)
                        {
                            link_url.SetValue(m_Base, attrValue) ;
                            if (!link_url)
                                threadLog("not a link case 1: %s\n", *attrValue) ;
                        }
                        else if (m_Info.Domain())
                        {
                            link_url.SetValue(m_Info.Domain(), attrValue) ;
                            if (!link_url)
                                threadLog("not a link case 2: %s\n", *attrValue) ;
                        }
                        else
                        {
                            link_url = attrValue ;
                            if (!link_url)
                                threadLog("not a link case 3: %s\n", *attrValue) ;
                        }
                        if (!link_url.Domain())
                            threadLog("not a link case 4: %s\n", *link_url) ;
                        if (link_url)
                        {
                            S = *link_url ;
                            //attr.m_A = m_Dict.Insert(*attrName) ;
                            //attr.m_B = m_Dict.Insert(*attrValue) ;
                            attr.name = attrName ;
                            attr.value = attrValue ;
                            m_NodeAttrs.Insert(pNewnode->GetUid(), attr) ;
                            if (!m_setLinks.Exists(link_url))
                            {
                                m_setLinks.Insert(link_url) ;
                                m_vecLinks.Add(link_url) ;
                            }
                        }
                    }
                }
            }
        }
        else
        {
            //pNewnode->AddAttr(attrName, attrValue) ;
            //attr.m_A = m_Dict.Insert(*attrName) ;
            //attr.m_B = m_Dict.Insert(*attrValue) ;
            attr.name = attrName ;
            attr.value = attrValue ;
            m_NodeAttrs.Insert(pNewnode->GetUid(), attr) ;
        }
    }
    if (xi.eof())
        { threadLog("Line %d A. non-closure disqualifies tag\n", nLine) ; return 0 ; }
    if (*xi != CHAR_MORE)
        { S = theTag ; threadLog("Line %d C. malformed tag <%s> pnam=%s, attrValue=%s [%c]\n", nLine, *S, *attrName, *attrValue, *xi) ; return 0 ; }
    for (xi++ ; !xi.eof() && IsWhite(*xi) ; xi++) ;
    end = xi ;
    //  Check for correct parentage
    if (pParent)
    {
        //  Some tag-type rules
        ptype = pParent->Type() ;
        if (type == HTAG_TBL_CEL)
        {
            if (ptype == HTAG_TBL_CEL)
            {
                //  This is where the author has forgotton to close a <td> and is now adding the next <td> in the row. We
                //  seek back to the <tr> (the true parent).
                threadLog("WARNING: Missing </td> anti-tag\n") ;
                pX = pParent->Parent() ;
                if (pX)
                {
                    ptype = pX->Type() ;
                    if (ptype != HTAG_TH || ptype != HTAG_TR)
                        pParent = pX ;
                }
            }
        }
        if (bError)
            threadLog("WARNING: New <%s> tag has parent of <%s>\n", *Tagtype2Txt(type), *Tagtype2Txt(ptype)) ;
    }
    ci = end ;
    //m_mapTags.Insert(pNewnode->Name(), pNewnode) ;
    return pNewnode ;
}
hzEcode hzDocHtml::_htmPreproc  (hzChain& Z)
{
    //  Remove comments and non applicable conditional comments from HTML
    //
    //  Arguments:  1)  Reference to chain to be pre-processed
    //
    //  Returns:    E_FORMAT    If the HTML is malformed
    //              E_OK        If the HTML was successfully processed
    _hzfunc("hzDocHtml::_htmPreproc") ;
    chIter  zi ;        //  Iterator of input
    hzChain X ;         //  Target chain
    hzChain word ;      //  Diagnostics chain
    bool    bIn ;       //  In a conditional comment
    if (Z.Size() == 0)
        return E_OK ;
    for (zi = Z ; !zi.eof() ;)
    {
        if (*zi != CHAR_LESS)
            { X.AddByte(*zi) ; zi++ ; continue ; }
        if (zi == "<!-->")
            { zi += 5 ; continue ; }
        //  Ignore deleted text within comment (<!-- and -->) tags. Note these cannot be nested
        bIn = false ;
        if (zi == "<!--[if")
            { bIn = true ; zi += 7 ; }
        if (zi == "<![if")
            { bIn = true ; zi += 5 ; }
        if (bIn)
        {
            for (; !zi.eof() && *zi <= CHAR_SPACE ;)
                zi++ ;
            if (zi == "!IE")
            {
                //  Specific non-IE comment. Content herein must be allowed through.
                for (zi += 2 ; !zi.eof() && *zi != CHAR_MORE ; zi++) ;
                if (zi.eof())
                {
                    threadLog("Unterminated conditional comment (line %d)\n", zi.Line()) ;
                    return E_FORMAT ;
                }
                zi++ ;
                if (zi == "-->")
                    zi += 3 ;
                for (; !zi.eof() ; zi++)
                {
                    if (*zi == CHAR_LESS)
                    {
                        if (zi == "<![endif]>")         { zi += 10 ; break ; }
                        if (zi == "<![endif]-->")       { zi += 12 ; break ; }
                        if (zi == "<![endif]>-->")      { zi += 13 ; break ; }
                        if (zi == "<!--<![endif]-->")   { zi += 16 ; break ; }
                    }
                    word.AddByte(*zi) ;
                    X.AddByte(*zi) ;
                }
                //threadLog("word is %s\n", *word
                //m_Error << "\nword is: " << word ;
                //m_Error.AddByte(CHAR_NL) ;
                word.Clear() ;
                continue ;
            }
            if (zi == "!(")
                zi += 2 ;
            if (zi == "lte IE" || zi == "lt IE" || zi == "gte IE" || zi == "gt IE" || zi == "IE")
            {
                //  We are not and never will be IE so ignore conditional comment
                //  threadLog("Stripping IE cond comment line %d - ", zi.Line()) ;
                for (zi += 2 ; !zi.eof() ; zi++)
                {
                    if (zi == "<![endif]>")     { zi += 10 ; break ; }
                    if (zi == "<![endif]-->")   { zi += 12 ; break ; }
                }
                continue ;
            }
            //  Include non IE stuff
            //  threadLog("Stripping non-IE cond comment line %d - ", zi.Line()) ;
            for (zi += 2 ; !zi.eof() && *zi != CHAR_MORE ; zi++) ;
            if (zi.eof())
            {
                threadLog("Unterminated conditional comment (line %d)\n", zi.Line()) ;
                return E_FORMAT ;
            }
            zi++ ;
            if (zi == "<!-->")
                zi += 5 ;
            if (zi == "-->")
                zi += 3 ;
            for (; !zi.eof() ; zi++)
            {
                if (*zi == CHAR_LESS)
                {
                    if (zi == "<![endif]>")         { zi += 10 ; break ; }
                    if (zi == "<![endif]-->")       { zi += 12 ; break ; }
                    if (zi == "<![endif]>-->")      { zi += 13 ; break ; }
                    if (zi == "<!--<![endif]-->")   { zi += 16 ; break ; }
                }
                word.AddByte(*zi) ;
                X.AddByte(*zi) ;
            }
            //m_Error << "\nword is: " << word ;
            //m_Error.AddByte(CHAR_NL) ;
            word.Clear() ;
            continue ;
        }
        if (zi == "<!--")
        {
            for (zi += 4 ; !zi.eof() ; zi++)
            {
                if (zi == "-->")
                    { zi += 3 ; break ; }
            }
            if (zi.eof())
            {
                threadLog("_htmPreproc. Unterminated normal comment starting on line %d\n", zi.Line()) ;
                return E_FORMAT ;
            }
            continue ;
        }
        X.AddByte(*zi) ;
        zi++ ;
    }
    if (X.Size() == Z.Size())
        return E_OK ;
    Z.Clear() ;
    Z = X ;
    return E_OK ;
}
hzEcode hzDocHtml::Load (hzChain& Z)
{
    //  Populate the hzDocHtml object with HTML source code in the supplied chain.
    //
    //  Two scenarios are permitted - Full or Partial as follows:-
    //      1)  Full:       If the HTML source has the <html> as its first tag it will be considered as a full page and tested as such.
    //                      It will be expected to have the standard sub-tags of <head> and <body> and thier corresponding anti-tags.
    //                      If either of these are missing or in error (malformed or containing unxpected or malformed tags) the HTML
    //                      source code is deemed to be syntactically in error and the load fails.
    //
    //      2)  Partial:    If the opening tag of the HTML source code is not the <html> tag it is viable only if it would be viable as
    //                      a HTML fragment that could be seemlessly inserted into the <body> part of a whole HTML page. This is to say
    //                      that all it's tags must be legal sub-tags of <body> and not of <head> and nor must the <body> or <head> tag
    //                      or anti-tag be present.
    //
    //  In either case, tags are loaded into a tree of nodes (tags). The nodes/tags may be searched for and examined. 
    //
    //  Arguments:  1)  Z   The chain containing the HTML document
    //
    //  Returns:    E_FORMAT    If the HTML was rejected by the the HTML pre-processor _htmlPreproc() OR if any tags could not be processed by _proctag()
    //              E_OK        If the HTML was loaded successfully
    //
    //  Note:   Unlike XML where tags are named so that content in the tree can be searched directly, the nodes in HTML are not named
    //  named and so cannot be definitely referenced (they only have type). Some other process must apply application specific criteria
    //  to read meaning into the data.
    _hzfunc("hzDocHtml::Load") ;
    hzChain         nc ;            //  Node content
    hzChain         T ;             //  For token building
    hzChain         W ;             //  For token building
    chIter          zi ;            //  Chain iterator
    chIter          tw_start ;      //  Start of tagword marker
    chIter          tmp ;           //  Start of tagword marker
    chIter          limit ;         //  End of tag marker - Protection against malformed tags (NLA style)
    hzHtmElem*      pCN = 0 ;       //  Current HTML node
    hzHtmElem*      pNN ;           //  New HTML node
    hzHtmElem*      pX ;            //  HTML node for diagnostics
    hzHtmElem*      pCurForm = 0 ;  //  HTML node for diagnostics
    hzAttrset       ai ;            //  Attribute iterator
    hzHtmForm*      pForm = 0 ;     //  Form found in page
    hzPair          P ;             //  Name value pair (for forms and fields)
    hzString        strval ;        //  To test if current tag is being closed
    hzString        tagword ;       //  From MakeTag - just the tagname.
    hzString        wholetag ;      //  From MakeTag - the entire opening sequence if applicable
    hzString        anam ;          //  Attribute name
    hzString        aval ;          //  Attribute value
    hzHtagform      tf ;            //  Tag form
    uint32_t        nX ;            //  For nesting levels/general iteration
    uint32_t        nColon ;        //  Does the tagname contain a colon (3rd party tag)
    uint32_t        nLine ;         //  Line number for errors
    uint32_t        quote ;         //  Are we in a quoted string
    bool            bAnti ;         //  Tag is an anti-tag
    int32_t         cDelim ;        //  Delimiting char (single/double quote)
    hzEcode         rc = E_OK ;     //  return code
    Clear() ;
    //m_Error.Clear() ;
    //  Pre-process the HTML
    rc = _htmPreproc(Z) ;
    if (rc != E_OK)
        return rc ;
    m_Content = Z ;
    //  Make sure the HTML tags are loading into the lookup table
    if (!s_htagNam.Count())
        InitHtml() ;
    //  Init the iterator
    zi = Z ;
    zi.Skipwhite() ;
    //  Bypass the doctype if present
    if (zi.Equiv("<!DOCTYPE"))
    {
        quote = 0 ;
        for (zi += 9 ; !zi.eof() ; zi++)
        {
            if (quote)
            {
                if (*zi == CHAR_DQUOTE)
                    quote = 0 ;
                continue ;
            }
            if (*zi == CHAR_MORE)
                { zi++ ; break ; }
            if (*zi == CHAR_DQUOTE)
                quote = 1 ;
        }
        zi.Skipwhite() ;
    }
    //  Look for the opening <html>
    for (; !zi.eof() ; zi++)
    {
        if (zi.Equiv("<html"))
        {
            m_pRoot = _proctag(0, zi, HTAG_HTML) ;
            if (!m_pRoot)
                { threadLog("Could not establist root node (the <html> tag)\n") ; return E_FORMAT ; }
            break ;
        }
    }
    if (!m_pRoot)
    {
        threadLog("No valid contents found before expected <html> tag - assuming a partial page\n") ;
        zi = Z ;
        zi.Skipwhite() ;
        pCN = new hzHtmElem() ;
        pCN->Init(this, 0, tagword, HTAG_NULL, m_vecTags.Count(), zi.Line()) ;
        m_vecTags.Add(pCN) ;
    }
    else
    {
        //  A <html> tag has been found so this is a full page. Look for <head> next
        for (; !zi.eof() ;)
        {
            if (zi.Equiv("<head"))
            {
                m_pHead = _proctag(m_pRoot, zi, HTAG_HEAD) ;
                if (!m_pHead)
                    { threadLog("Could not process <head> tag\n") ; return E_FORMAT ; }
                break ;
            }
            zi++ ;
        }
        if (!m_pHead)
            { threadLog("Expected a <head> tag\n") ; return E_FORMAT ; }
        pCN = m_pHead ;
        //  Now get the subtags of <head>
        for (; rc == E_OK && pCN && !zi.eof() ;)
        {
            //  Handle tag content
            if (*zi != CHAR_LESS)
            {
                //  Ignore certain constructs
                if (zi == "//")
                {
                    for (zi += 2 ; !zi.eof() && *zi != CHAR_NL ; zi++) ;
                    continue ;
                }
                //  If not part of a construct, just agregate the char to the current tag's content, striping leading whitespace
                if (*zi <= CHAR_SPACE && pCN->m_tmpContent.Size() == 0)
                    { zi++ ; continue ; }
                pCN->m_tmpContent.AddByte(*zi) ;
                zi++ ;
                continue ;
            }
            //  Ignore deleted text within comment (<!-- and -->) tags. Note these cannot be nested
            nLine = zi.Line() ;
            if (zi == "<!--[if")
            {
                for (zi += 7 ; !zi.eof() ; zi++)
                {
                    if (zi == "<![endif]>")     { zi += 10 ; break ; }
                    if (zi == "<![endif]-->")   { zi += 12 ; break ; }
                }
                continue ;
            }
            if (zi == "<![if")
            {
                for (zi += 5 ; !zi.eof() ; zi++)
                {
                    if (zi == "<![endif]>")     { zi += 10 ; break ; }
                    if (zi == "<![endif]-->")   { zi += 12 ; break ; }
                }
                continue ;
            }
            if (zi == "<!--")
            {
                for (zi += 4 ; !zi.eof() ; zi++)
                {
                    if (zi == "-->")
                        { zi += 3 ; break ; }
                }
                continue ;
            }
            //  Handle <![CDATA[...]]> block by converting the innards to straight data (apparently CDATA now legal in HTML)
            if (zi == "<![CDATA[")
            {
                for (zi += 9 ; !zi.eof() ; zi++)
                {
                    if (zi == "]]>")
                        { zi += 3 ; break ; }
                    pCN->m_tmpContent.AddByte(*zi) ;
                }
                continue ;
            }
            //  Eliminate <noscript> tags from header (we don't use them)
            if (zi == "<noscript")
            {
                for (zi += 9 ; !zi.eof() ; zi++)
                {
                    if (zi == "</noscript>")
                        { zi += 11 ; break ; }
                }
                if (zi.eof())
                    { threadLog("Unclosed <noscript> block\n") ; rc = E_FORMAT ; break ; }
                continue ;
            }
            //  At this point we have the '<' start of tag char. Establish whole and tagword of possible HTML tag
            wholetag.Clear() ;
            tagword.Clear() ;
    
            limit = zi ;
            limit++ ;
            W.AddByte(CHAR_LESS) ;
            bAnti = false ;
            if (*limit == CHAR_FWSLASH)
                { W.AddByte(CHAR_FWSLASH) ; bAnti = true ; limit++ ; }
            nColon = 0 ;
            for (tw_start = limit ; !limit.eof() ; limit++)
            {
                if (*limit == CHAR_COLON || IsAlphanum(*limit))
                {
                    if (*limit == CHAR_COLON)
                        nColon++ ;
                    T.AddByte(*limit) ;
                    W.AddByte(*limit) ;
                    continue ;
                }
                break ;
            }
            tagword = T ;
            T.Clear() ;
        
            for (; !limit.eof() ;)
            {
                W.AddByte(*limit) ;
                if (*limit == CHAR_DQUOTE || *limit == CHAR_SQUOTE)
                {
                    cDelim = *limit ;
                    for (limit++ ; !limit.eof() ; limit++)
                    {
                        if (*limit == CHAR_BKSLASH)
                        {
                            limit++ ;
                            if (*limit == cDelim)
                                continue ;
                        }
                        if (*limit == cDelim)
                            break ;
                    }
                }
                if (*limit == CHAR_MORE)
                    break ;
                limit++ ;
            }
            wholetag = W ;
            W.Clear() ;
            if (*limit != CHAR_MORE)
            {
                threadLog("Malformed tag (%s)\n", *wholetag) ;
                zi = limit ;
                continue ;
            }
            limit++ ;
            //tagword.ToLower() ;
            if (nColon)
            {
                if (!s_htagNam.Exists(tagword))
                {
                    tf.klas = HTCLASS_3RD ;
                    tf.rule = HTRULE_OPTION ;
                    tf.name = tagword ;
                    s_htagTyp.Insert(tf.type, tf) ;
                    s_htagNam.Insert(tf.name, tf) ;
                    threadLog("Inserted 3rd party HTML tag %s\n", *tagword) ;
                }
            }
            if (!s_htagNam.Exists(tagword))
            {
                if (bAnti)
                    threadLog("Line %d case 1 Unknown lookup anti-tag </%s> (%s)\n", zi.Line(), *tagword, *wholetag) ;
                else
                    threadLog("Line %d Case 1 Unknown lookup tag <%s> (%d bytes)\n", zi.Line(), *tagword, wholetag.Length()) ;
                pCN->m_tmpContent << wholetag ;
                zi = limit ;
                continue ;
            }
            tf = s_htagNam[tagword] ;
            if (tf.type == HTAG_NULL)
            {
                if (bAnti)
                    threadLog("Line %d case 2 Unknown lookup anti-tag </%s> (%s)\n", zi.Line(), *tagword, *wholetag) ;
                else
                    threadLog("Line %d Case 2 Unknown lookup tag <%s> (%d bytes)\n", zi.Line(), *tagword, wholetag.Length()) ;
                pCN->m_tmpContent << wholetag ;
                zi = limit ;
                continue ;
            }
            //  Obtain tag name
            if (bAnti == false)
            {
                if (zi.Equiv("<title>"))
                {
                    pCN = _proctag(m_pHead, zi, HTAG_TITLE) ;
                    if (!pCN)
                        { rc = E_FORMAT ; threadLog("Line %d Could not process <meta> tags\n", zi.Line()) ; }
                }
                else if (zi.Equiv("<meta"))
                {
                    pCN = _proctag(m_pHead, zi, HTAG_META) ;
                    if (!pCN)
                        { rc = E_FORMAT ; threadLog("Line %d Could not process <meta> tags\n", zi.Line()) ; }
                }
                else if (zi.Equiv("<style"))
                {
                    pCN = _proctag(m_pHead, zi, HTAG_STYLE) ;
                    if (!pCN)
                        { rc = E_FORMAT ; threadLog("Line %d Could not process <style> tags\n", zi.Line()) ; }
                }
                else if (zi.Equiv("<script"))
                {
                    pCN = _proctag(m_pHead, zi, HTAG_SCRIPT) ;
                    if (!pCN)
                        { rc = E_FORMAT ; threadLog("Line %d Could not process <script> tags\n", zi.Line()) ; }
                }
                else if (zi.Equiv("<link"))
                {
                    pCN = _proctag(m_pHead, zi, HTAG_LINK) ;
                    if (!pCN)
                        { rc = E_FORMAT ; threadLog("Line %d Could not process <link> tags\n", zi.Line()) ; }
                }
                else if (zi.Equiv("<base"))
                {
                    pCN = _proctag(m_pHead, zi, HTAG_BASE) ;
                    if (!pCN)
                        { rc = E_FORMAT ; threadLog("Line %d Could not process <link> tags\n", zi.Line()) ; }
                    //  Set m_Base
                    if (pCN->m_tmpContent.Size())
                        m_Base = pCN->m_tmpContent ;
                    else
                    {
                        //  set the m_Base to the first param
                        ai = pCN ;
                        if (ai.Value())
                            m_Base = ai.Value() ;
                        //  pAttr = pCN->GetFirstAttr() ;
                        //  if (pAttr)
                        //      m_Base = pAttr->value ;
                    }
                }
                else
                    { rc = E_FORMAT ; threadLog("Line %d Could not process <%s> tag within <head>\n", zi.Line(), *tagword) ; }
                continue ;
            }
            //  Handle antitag
            if (bAnti)
            {
                if (zi.Equiv("</head>"))
                    { zi += 7 ; break ; }
                //  Inactive (text rendering only) anti-tags
                if (tf.klas == HTCLASS_TXT)
                    { pCN->m_tmpContent << wholetag ; zi = limit ; continue ; }
                //  { zi = limit ; continue ; }
                zi = limit ;
                if (pCN->Type() == tf.type || tf.rule == HTRULE_SINGLE)
                    pCN = pCN->Parent() ;
                else
                {
                    threadLog("case 1 Tag mis-match. Current highest tag is <%s id=%d, level=%d> but on line %d we have an anti-tag for %s\n",
                        *Tagtype2Txt(pCN->Type()), pCN->GetUid(), pCN->Level(), zi.Line(), *Tagtype2Txt(tf.type)) ;
                    if (tf.rule == HTRULE_SINGLE)
                    {
                        //pCN = pX ;
                        pCN = pCN->Parent() ;
                        threadLog("Case 2 Corrected by allowing last tag as anti-tag\n") ;
                    }
                    if (pCN->Type() == HTAG_TBL_CEL && tf.type == HTAG_TR)
                    {
                        for (pX = pCN ; pX ; pX = pX->Parent())
                        {
                            if (pX->Type() == tf.type)
                            {
                                pCN = pX ;
                                threadLog("Corrected by decending to level %d\n", pCN->Level()) ;
                                break ;
                            }
                        }
                    }
                }
                continue ;
            }
            //  If none of the above just advance
            zi++ ;
        }
        //  Advance to the <body> tag
        for (; !zi.eof() ;)
        {
            if (zi.Equiv("<body"))
            {
                m_pBody = _proctag(m_pRoot, zi, HTAG_BODY) ;
                if (!m_pBody)
                    { threadLog("Expected an actual body\n") ; return E_FORMAT ; }
                break ;
            }
            zi++ ;
        }
        if (!m_pBody)
            { threadLog("Expected a <body> tag\n") ; return E_FORMAT ; }
        pCN = m_pBody ;
    }
    //
    //  Process document body. Here everything is either a tag, an anti-tag or it is tag-content. Both tags and antitags begin with a '<' so the
    //  raw HTML is iterated and whenever the < is found, it is tested for a known tag/antitag. In the general case of "<tag>content</tag>", the
    //  process is to call _procTag() to parse the tag, garner the attributes and to create a new element (which the current element is then set
    //  to). Bytes after the tag are agregated to the current element's content until the antitag occurs (at which point the current element is
    //  then set back to the parent tag).
    //
    //  The exceptions to the general case:-
    //
    //  1)  Paragraph tags can be left open (antitag omited). These tags are closed by the parent antitag or by another paragraph tag.
    //
    //  2)  Print control tags which are completely ignored. These can never become the current tag so any content they have is aggregated to
    //      their parent tag.
    //
    //  3)  Links which do become current, but will have thier content aggregated to the parent tag.
    //
    for (; pCN && !zi.eof() ;)
    {
        //  Handle tag content
        if (*zi != CHAR_LESS)
        {
            if (pCN->Type() != HTAG_ANCHOR)
            {
                if (*zi <= CHAR_SPACE && pCN->m_tmpContent.Size() == 0)
                    { zi++ ; continue ; }
                pCN->m_tmpContent.AddByte(*zi) ;
            }
            else
            {
                if (pCN->Parent())
                    pCN->Parent()->m_tmpContent.AddByte(*zi) ;
            }
            zi++ ;
            continue ;
        }
        //  Ignore deleted text within <strike></strike> tags
        nLine = zi.Line() ;
        if (zi == "<strike>")
        {
            for (zi += 8 ; !zi.eof() ; zi++)
            {
                if (zi == "</strike>")
                    { zi += 9 ; break ; }
            }
            if (zi.eof())
                { threadLog("Unclosed comment block\n") ; rc = E_FORMAT ; break ; }
            continue ;
        }
        if (zi == "<fb:like>")
        {
            for (zi += 9 ; !zi.eof() ; zi++)
            {
                if (zi == "</fb:like>")
                    { zi += 10 ; break ; }
            }
            if (zi.eof())
                { threadLog("Facebook special\n") ; rc = E_FORMAT ; break ; }
            continue ;
        }
        if (zi == "<g:plusone>")
        {
            for (zi += 11 ; !zi.eof() ; zi++)
            {
                if (zi == "</g:plusone>")
                    { zi += 12 ; break ; }
            }
            if (zi.eof())
                { threadLog("Google special\n") ; rc = E_FORMAT ; break ; }
            continue ;
        }
        //  Ignore deleted text within comment (<!-- and -->) tags
        if (zi == "<!--[if")
        {
            for (zi += 7 ; !zi.eof() ; zi++)
            {
                if (zi == "<![endif]>")     { zi += 10 ; break ; }
                if (zi == "<![endif]-->")   { zi += 12 ; break ; }
            }
            if (zi.eof())
                { threadLog("Unterminated <!--[if cond]..> tag starting line %d\n", nLine) ; rc = E_FORMAT ; break ; }
            continue ;
        }
        if (zi == "<![if")
        {
            for (zi += 5 ; !zi.eof() ; zi++)
            {
                if (zi == "<![endif]>")     { zi += 10 ; break ; }
                if (zi == "<![endif]-->")   { zi += 12 ; break ; }
            }
            if (zi.eof())
                { threadLog("Unterminated <![if cond]..> tag starting line %d\n", nLine) ; rc = E_FORMAT ; break ; }
            continue ;
        }
        if (zi == "<!--")
        {
            for (zi += 4 ; !zi.eof() ; zi++)
            {
                if (zi == "-->")
                    { zi += 3 ; break ; }
            }
            if (zi.eof())
                { threadLog("Unterminated <!--> tag starting line %d\n", nLine) ; rc = E_FORMAT ; break ; }
            continue ;
        }
        /*
        **  At this point we have the '<' start of tag char. Establish whole and tagword of possible HTML tag
        */
        wholetag.Clear() ;
        tagword.Clear() ;
    
        limit = zi ;
        limit++ ;
        W.AddByte(CHAR_LESS) ;
        bAnti = false ;
        if (*limit == CHAR_FWSLASH)
            { W.AddByte(CHAR_FWSLASH) ; bAnti = true ; limit++ ; }
        nColon = 0 ;
        for (tw_start = limit ; !limit.eof() ; limit++)
        {
            if (*limit == CHAR_COLON || IsAlphanum(*limit))
            {
                if (*limit == CHAR_COLON)
                    nColon++ ;
                T.AddByte(*limit) ;
                W.AddByte(*limit) ;
                continue ;
            }
            break ;
        }
        tagword = T ;
        T.Clear() ;
        
        for (; !limit.eof() ;)
        {
            W.AddByte(*limit) ;
            if (*limit == CHAR_DQUOTE || *limit == CHAR_SQUOTE)
            {
                cDelim = *limit ;
                for (limit++ ; !limit.eof() ; limit++)
                {
                    if (*limit == CHAR_BKSLASH)
                    {
                        limit++ ;
                        if (*limit == cDelim)
                            continue ;
                    }
                    if (*limit == cDelim)
                        break ;
                }
            }
            if (*limit == CHAR_MORE)
                break ;
            limit++ ;
        }
        wholetag = W ;
        W.Clear() ;
        if (*limit != CHAR_MORE)
        {
            threadLog("Malformed tag (%s)\n", *wholetag) ;
            zi = limit ;
            continue ;
        }
        tagword.ToLower() ;
        if (nColon)
        {
            if (!s_htagNam.Exists(tagword))
            {
                tf.klas=HTCLASS_3RD ;
                tf.rule=HTRULE_OPTION ;
                tf.name = tagword ;
                s_htagTyp.Insert(tf.type, tf) ;
                s_htagNam.Insert(tf.name, tf) ;
                threadLog("Inserted 3rd party HTML tag %s\n", *tagword) ;
            }
        }
        //  if (bAnti)
        //      threadLog("Case 2 line %d Doing antitag %s\n", zi.Line(), *tagword) ;
        //  else
        //      threadLog("Case 2 line %d Doing tag %s\n", zi.Line(), *tagword) ;
        tf = s_htagNam[tagword] ;
        if (tf.type == HTAG_NULL)
        {
            //  Unrecognized tags are just made part of the content of the currently applicable tag
            if (bAnti)
                threadLog("Line %d Unknown lookup anti-tag </%s> (%s)\n", zi.Line(), *tagword, *wholetag) ;
            else
                threadLog("Line %d Case 3 Unknown lookup tag <%s> (%d bytes)\n", zi.Line(), *tagword, wholetag.Length()) ;
            pCN->m_tmpContent << wholetag ;
            zi = limit ;
            continue ;
        }
        if (bAnti == false)
        {
            //  Ignore graphic tags
            if (tf.klas == HTCLASS_IMG)
                { zi = limit ; continue ; }
            //  Ignore self-closed 'system' tags
            if (tf.klas == HTCLASS_SYS)
            {
                if (tf.type == HTAG_EMBED)
                    pCN->m_tmpContent << "<embed/>" ;
                if (tf.type == HTAG_NOEMBED)
                    pCN->m_tmpContent << "<noembed/>" ;
                for (; !zi.eof() ; zi++)
                {
                    if (*zi == CHAR_MORE)
                        { zi++ ; break ; }
                }
                threadLog("Line %d Bypassed system tag <%s> (%s)\n", zi.Line(), *tagword, *wholetag) ;
                zi = limit ;
                continue ;
            }
            //  Handle HTCLASS_TXT 'in-content' tags. We just copy these through, complete with tag, antitag and content, to the content of the
            //  current tag. However these tags should still be placed in the m_mapTags and m_vecTags member.
            if (tf.klas == HTCLASS_TXT) // || tf.type == HTAG_ANCHOR)
            {
                pCN->m_tmpContent << wholetag ; zi = limit ;
                continue ;
            }
            //  If we are suppressing anchors, we only want the content of a <a href=...>...</a> sequence.
            //if (m_bOpflags & HDOC_SUPPRESS_LINKS && tf.klas == HTCLASS_LNK && tf.type == HTAG_ANCHOR)
            //if (bFlags & HDOC_ONLOAD_LINKS && tf.klas == HTCLASS_LNK && tf.type == HTAG_ANCHOR)
            //  { zi = limit ; continue ; }
            //  Eliminate scripts (may revisit)
            if (zi.Equiv("<script"))
            {
                //  plog->Out("%s. ignoring a script tag ...\n", __FUNCTION__) ;
                for (tmp = zi ; !tmp.eof() ; tmp++)
                {
                    if (tmp.Equiv("</script>"))
                        { tmp += 9 ; zi = tmp ; break ; }
                }
                if (zi.eof())
                    { threadLog("Unclosed script tag\n") ; rc = E_FORMAT ; break ; }
                continue ;
            }
            /*
            **  Process 'data structure' tags into nodes. These are tables (with there rows and columns) but also menus
            **  and ordered and unordered lists.
            */
            pNN = 0 ;
            pNN = _proctag(pCN, zi, tf.type) ;
            if (!pNN)
            {
                threadLog("No node allocated for tag <%s>\n", *Tagtype2Txt(tf.type)) ;
                return E_FORMAT ;
            }
            pCN = pNN ;
            zi = limit ;
            /*
            **  Handle the <input> tag. As this is it's own anti-tag it has no content, only parameters. We need to include the tag
            **  in the tree as it is active, but we need to effect the anti-tag aspect as well (so the level is not raised)
            */
            if (tf.type == HTAG_INPUT)
                pCN = pCN->Parent() ;
            continue ;
        }
        //  Handle anti-tags
        if (bAnti)
        {
            //  Inactive (text rendering only) anti-tags
            if (tf.klas == HTCLASS_TXT) // || tf.type == HTAG_ANCHOR)
                { pCN->m_tmpContent << wholetag ; zi = limit ; continue ; }
            //  Ignore self-closed 'system' tags
            if (tf.klas == HTCLASS_SYS)
            {
                if (tf.type == HTAG_EMBED)
                    pCN->m_tmpContent << "</embed>" ;
                if (tf.type == HTAG_NOEMBED)
                    pCN->m_tmpContent << "</noembed>" ;
                for (; !zi.eof() ; zi++)
                {
                    if (*zi == CHAR_MORE)
                        { zi++ ; break ; }
                }
                threadLog("Line %d Bypassed system anti-tag <%s> (%s)\n", zi.Line(), *tagword, *wholetag) ;
                zi = limit ;
                continue ;
            }
            zi = limit ;
            if (pCN->Type() == tf.type || tf.rule == HTRULE_SINGLE)
                pCN = pCN->Parent() ;
            else
            {
                threadLog("case 2 Tag mis-match. Current highest tag is <%s id=%d, level=%d> but on line %d we have an anti-tag for %s\n",
                    *Tagtype2Txt(pCN->Type()), pCN->GetUid(), pCN->Level(), zi.Line(), *Tagtype2Txt(tf.type)) ;
                if (tf.rule == HTRULE_SINGLE)
                {
                    //pCN = pX ;
                    pCN = pCN->Parent() ;
                    threadLog("Case 1 Corrected by allowing last tag as anti-tag\n") ;
                }
                if (pCN->Type() == HTAG_TBL_CEL && tf.type == HTAG_TR)
                {
                    for (pX = pCN ; pX ; pX = pX->Parent())
                    {
                        if (pX->Type() == tf.type)
                        {
                            pCN = pX ;
                            threadLog("Corrected by decending to level %d\n", pCN->Level()) ;
                            break ;
                        }
                    }
                }
            }
            continue ;
        }
        threadLog("HANDLING ABD %s (%s)\n", *tagword, *wholetag) ;
    }
    if (pCN)
        threadLog("End of file encountered whilst inside tag definition\n") ;
    //  Move thru the tags in thier order of appearence and reduce where appropriate, the tag content held in chains to strings. Place forms in
    //  the list of forms and place form field tags with thier host forms.
    for (nX = 0 ; nX < m_vecTags.Count() ; nX++)
    {
        pX = m_vecTags[nX] ;
        if (pX->Type() == HTAG_FORM)
        {
            //  Add the form to to m_Forms and set this to the current form
            pCurForm = pX ;
            pForm = new hzHtmForm() ;
            m_Forms.Add(pForm) ;
            continue ;
        }
        if (pCurForm)
        {
            if (pX->Type() == HTAG_INPUT)
            {
                //  Add this field to the current form (report error if not in a current form)
                if (pX->Line() < pCurForm->Anti())
                {
                    P.name = pX->Name() ;
                    //  for (pAttr = pX->GetFirstAttr() ; pAttr ; pAttr = pAttr->next)
                    //  {
                    //      if (pAttr->name == "value")
                    //          { P.value = pAttr->value ; break ; }
                    //  }
                    for (ai = pX ; ai.Valid() ; ai.Advance())
                    {
                        anam = ai.Name() ; aval = ai.Value() ;
                        if (anam == "value")
                            { P.value = aval ; break ; }
                    }
                    pForm->fields.Add(P) ;
                }
                continue ;
            }
            if (pX->Line() > pCurForm->Anti())
                pCurForm = 0 ;
        }
    }
    threadLog("END OF LOAD page has %d links\n", m_vecLinks.Count()) ;
    return rc ;
}
hzEcode hzDocHtml::Load (const char* fpath)
{
    //  Loads an XML document into a tree of XML nodes
    //
    //  Arguments:  1)  fpath   Source file of HTML document
    //
    //  Returns:    E_ARGUMENT  If no file path is supplied
    //              E_NOTFOUND  If the file does not exist
    //              E_NODATA    If the file is empty
    //              E_OPENFAIL  If the file cannot be read
    //              E_FORMAT    If a format error caused the file load to fail
    //              E_OK        If the operation is successful
    _hzfunc("hzDocXml::Load") ;
    ifstream    is ;    //  Input stream
    hzChain     Z ;     //  Chain for holding file content
    hzEcode     rc ;    //  Return code
    //  Load document into a working chain
    rc = OpenInputStrm(is, fpath) ;
    if (rc == E_OK)
    {
        Z << is ;
        is.close() ;
        rc = Load(Z) ;
    }
    return rc ;
}
hzHtmElem*  hzHtmElem::GetFirstChild    (void) const
{
    _hzfunc("hzHtmElem::GetFirstChild") ;
    if (!m_pHostDoc)
        hzexit(E_NOINIT, "Element %s: Node has no host document", *m_Name) ;
    if (!m_Children)
        return 0 ;
    return m_pHostDoc->m_arrNodes.InSitu(m_Children-1) ;
}
hzHtmElem*  hzHtmElem::Sibling  (void) const
{
    _hzfunc("hzHtmElem::Sibling") ;
    if (!m_pHostDoc)
        hzexit(E_NOINIT, "Element %s: Node has no host document", *m_Name) ;
    if (!m_Sibling)
        return 0 ;
    return m_pHostDoc->m_arrNodes.InSitu(m_Sibling-1) ;
}
hzHtmElem*  hzHtmElem::Parent   (void) const
{
    _hzfunc("hzHtmElem::Parent") ;
    if (!m_pHostDoc)
        hzexit(E_NOINIT, "Element %s: Node has no host document", *m_Name) ;
    if (!m_Parent)
        return 0 ;
    return m_pHostDoc->m_arrNodes.InSitu(m_Parent-1) ;
}
hzDocHtml*  hzHtmElem::GetTree  (void)
{
    //  Return the HTML document whose tree of HTML elemnents this hzHtmElem is a part. We start at the current node and follow the parentage all the way back
    //  to the base of the tree.
    //
    //  Arguments:  None
    //  Returns:    Pointer to root node of the tree to which the current node (element) belongs
    hzHtmElem*  pN ;    //  Current tree node
    if (!m_Parent)
        Fatal("hzHtmElem::GetTree. 1. Tag %s (line %d, level %d) has no parent\n", *m_Name, m_nLine, m_nLevel) ;
    for (pN = this ; pN->m_nLevel ; pN = pN->Parent()) ;
        if (!pN->m_Parent)
            Fatal("hzHtmElem::GetTree. 2. Tag %s (line %d, level %d) has no parent\n", *pN->m_Name, pN->m_nLine, pN->m_nLevel) ;
    return (hzDocHtml*) pN->Parent() ;
}
uint32_t    hzHtmElem::_testnode    (hzVect<hzHtmElem*>& tmpResult, const char* srchExp, uint32_t& nLimit, uint32_t nLevel, bool bLog)
{
    //  Recursive support function to the non-recursive FindSubnodes function.
    //
    //  Split up first part of search expression (up to first period or null terminator), to a node/tag name and if present, a content speciifer
    //  (="some_value"), an attribute name (->"attr_name") an attribute content specifer.
    //
    //  We now apply the test to the current node and when required, to the children. We do not operate where nodes are at a higher
    //  level than the limit. This is because the FindSubnodes function is looking for the set of nodes matching the search expression that are
    //  found at the lowest level
    //
    //  Arguments:  1)  tmpResult   Vector of HTML elements this function will add to
    //              2)  srchExp     HTML element selection criteria
    //              3)  nLimit      Depth limit for probing of child nodes
    //              4)  nLevel      Depth level of this HTML element
    //              5)  bLog        Print log flag
    //
    //  Returns:    Number of elements added during this call on this element
    _hzfunc("hzHtmElem::_testnode") ;
    hzChain         Z ;                 //  For extracting search expression components
    hzHtmElem*      pNode ;             //  Node to be returned
    const char*     i ;                 //  Search expression iterator
    const char*     cpNext = 0 ;        //  Next part of search expression if present
    hzAttrset       ai ;                //  Attribute iterator
    hzString        cont ;              //  Convert elemnet's content to temp string
    hzString        reqNode_name ;      //  Required name of node
    hzString        reqChild_name ;     //  Required name of node child
    hzString        reqNode_cont ;      //  Required content of node
    hzString        reqAttr_name ;      //  Required name of attribute
    hzString        reqAttr_value ;     //  Required value of attribute
    hzString        anam ;              //  Attribute name
    hzString        aval ;              //  Attribute value
    uint32_t        nTotal ;            //  Total nodes found matching search expression
    bool            bFound ;            //  Does this node pass this part of search expression
    //  If we are already at too high a level, return
    if (nLimit && (m_nLevel > nLimit))
    {
        if (bLog)
            threadLog("\t-> Out of range, returning 0\n") ;
        return 0 ;
    }
    //  Get required name of node
    for (i = srchExp ; IsAlpha(*i) ; i++)
        Z.AddByte(*i) ;
    reqNode_name = Z ;
    Z.Clear() ;
    if (*i == CHAR_PERIOD)
    {
        i++ ;
        if (!IsAlpha(*i))
        {
            if (bLog)
                threadLog("Malformed criteria (%s)\n", srchExp) ;
            return 0 ;
        }
        cpNext = i ;
        for (; IsAlpha(*i) ; i++)
            Z.AddByte(*i) ;
        reqChild_name = Z ;
        Z.Clear() ;
    }
    //  Get name of attribute if applicable
    if (i[0] == CHAR_MINUS && i[1] == CHAR_MORE)
    {
        for (i += 2 ; IsUrlnorm(*i) ; i++)
            Z.AddByte(*i) ;
        reqAttr_name = Z ;
        Z.Clear() ;
    }
    //  An equal sign after the tag name specifies what the tag contents must be for the tag to qualify
    if (*i == CHAR_EQUAL)
    {
        for (i += 2 ; *i != CHAR_DQUOTE ; i++)
            Z.AddByte(*i) ;
        reqAttr_value = Z ;
        Z.Clear() ;
    }
    /*
    if (bLog)
    {
        threadLog("On-node [%s] (%d) Testing node with reqNode_name=%s, reqChild_name=%s, reqAttr_name=%s, reqAttr_value=%s level=%d, slct=%s\n",
            *Lineage(), m_nLevel, *reqNode_name, *reqChild_name, *reqAttr_name, *reqAttr_value, nLevel, srchExp) ;
        for (pNode = m_Children ; pNode ; pNode = pNode->m_Sibling)
            threadLog("\t-> child: %s\n", *pNode->m_Name) ;
    }
    */
    //  Now we have the first part of the search expression, we test to see if this node meets this. If it does we still have to establish if
    //  the remainder of the search expression (if it exists) is satisfied.
    //pAttr = 0 ;
    bFound = false ;
    if (m_Name == reqNode_name)
    {
        //  We are on the specified node so if the value is not right, any named attribute does not exist or it does but with the
        //  wrong value, we return a zero (to end the examination of this branch of nodes)
        bFound = true ;
        if (!reqChild_name)
        {
            //  No child node has been specified so this node must be the last to check
            if (reqNode_cont)
            {
                cont = m_tmpContent ;
                if (reqNode_cont != cont)
                    return 0 ;
            }
            if (bFound && reqAttr_name)
            {
                //  See if we can find an attribute of the requrired name on this node
                for (ai = this ; ai.Valid() ; ai.Advance())
                {
                    anam = ai.Name() ; aval = ai.Value() ;
                    threadLog("Compare attr names (%s to param->name of %s)\n", *reqAttr_name, *anam) ;
                    if (anam == reqAttr_name)
                    {
                        threadLog("Found a attr name match ") ;
                        if (reqAttr_value)
                        {
                            if (reqAttr_value != aval)
                            {
                                threadLog("but not a pvalue match (%s not param->val of %s)\n", *reqAttr_value, *aval) ;
                                continue ;
                            }
                        }
                        threadLog(" - bingo\n") ;
                        break ;
                    }
                }
                //  if (!pAttr)
                //      { threadLog("Oops - run out of params\n") ; return 0 ; }
            }
        }
    }
    if (bFound)
    {
        /*
        **  Now we have passed the first part of the search expression, we can add this node to the results if there is no furthur search expression. But
        **  if there is, we have to establish if the remainder of the search expression is satisfied. This will nessesitate a recursive call of
        **  this function for each and every child of this node with the search expression pointer advanced. Only if at least one of these calls
        **  succeeds (returns a positive integer for nodes added to the result), can this call succeed.
        */
        if (!cpNext)
        {
            //threadLog("\tMatched. Adding %s at level %d and position %d to array\n", *Lineage(), m_nLevel, tmpResult.Count()) ;
            nLimit = m_nLevel ;
            tmpResult.Add(this) ;
            return 1 ;
        }
        //  Test children on the further search expression
        nTotal = 0 ;
        for (pNode = GetFirstChild() ; pNode ; pNode = pNode->Sibling())
        {
            //  if (!pNode->IsAncestor(this))
            //      Fatal("Case 2: Proported child failes to be ancestor of this\n") ;
            if (nLimit && (pNode->m_nLevel > nLimit))
                continue ;
            nTotal += pNode->_testnode(tmpResult, cpNext, nLimit, nLevel + 1, bLog) ;
        }
        return nTotal ;
    }
    /*
    **  This node does not have the required name and so does not meet the first part of the search expression. However a child might meet the
    **  search expression so we try each in turn.
    */
    nTotal = 0 ;
    for (pNode = GetFirstChild() ; pNode ; pNode = pNode->Sibling())
    {
        if (nLimit && (pNode->m_nLevel > nLimit))
            continue ;
        if (pNode->Name() == reqNode_name)
            nTotal += pNode->_testnode(tmpResult, srchExp, nLimit, nLevel + 1, bLog) ;
    }
    return nTotal ;
}
void    hzHtmElem::FindSubnodes (hzVect<hzHtmElem*>& result, const char* srchExp, bool bLog)
{
    //  From the current node (the node used to call this member function), find all sub-nodes matching the supplied search expression.
    //
    //  This function does not simply locate nodes that are children of the calling node whose name matches the supplied search expression. The aim is
    //  to locate descenant nodes, however far down the tree they are.
    //
    //  Note:   The search expression will be of the form of one or more name-value pairs as follows:-
    //
    //      1)  name="some_name";       - Only applies if the element is given an id which is often not the case
    //      2)  type="html_tagtype";    - The element is of the right type, eg <table>
    //      3)  class="class_value";    - The element has the given class value
    //      4)  pname="param_name";     - The element has the parameter
    //      4)  pvalue="param_value";   - The element has the parameter value
    //      6)  cont="content_value";   - The element has contents of the given value
    //
    //  Arguments:  1)  elements    The vector of elements found and in thier actual order of incidence.
    //              2)  srchExp     Search expression
    //              3)  bLog        Set if detailed logging is required
    //
    //  Returns:    None
    hzDocHtml*  pTree ;         //  The Tree holding this node
    uint32_t    nLimit = 0 ;    //  Level limit
    //  Check we have a tree
    pTree = GetTree() ;
    if (!pTree)
        Fatal("No tree - aborting\n") ;
    //  Recursively call _testnode
    result.Clear() ;
    _testnode(result, srchExp, nLimit, 0, bLog) ;
    //threadLog("hzHtmElem::FindSubnodes: found %d results, set limit to %d\n", result.Count(), nLimit) ;
}
uint32_t    hzDocHtml::ExtractLinksBasic    (hzVect<hzUrl>& links, const hzSet<hzString>& domains, const hzString& form)
{
    //  Find all links on a page lying within a set of acceptable domains and matching any supplied criteria. These are aggregated to the supplied vector of link
    //  URLs. If no domains or criteria are supplied, all the links in the page will be aggregated.
    //
    //  Note the links in a page are established in the Load() function. This function meerly filters them. It does not read the page content.
    //
    //  Arguments:  1)  links:      The vector or set of URLs (links) found in the document
    //              2)  domains:    The set of domains that links must belong to in order to be included
    //              3)  form:       The search criteria is any
    //
    //  Returns:    Number of links that meet the supplied criteria
    hzUrl       link ;          //  URL of link
    uint32_t    nIndex ;        //  Links iterator
    links.Clear() ;
    for (nIndex = 0 ; nIndex < m_vecLinks.Count() ; nIndex++)
    {
        link = m_vecLinks[nIndex] ;
        //  Ignore empty links (should not be any)
        if (!link)
            continue ;
        //  Ignore links to domains not on the list of acceptable domains (usually the website domain only)
        if (domains.Count())
        {
            if (!domains.Exists(link.Domain()))
                continue ;
        }
        //  Now apply criteria
        if (form)
        {
            if (!FormCheckCstr(*link, *form))
                continue ;
        }
        links.Add(link) ;
    }
    return links.Count() ;
}
uint32_t    hzDocHtml::ExtractLinksContent  (hzMapS<hzUrl,hzString>& links, const hzSet<hzString>& domains, const hzString& criteria)
{
    //  Find all links on a page lying within a set of acceptable domains and matching any supplied criteria. These are aggregated to the supplied map of link
    //  URLs to link content. If no domains or criteria are supplied, all the links in the page will be aggregated.
    //
    //  Note the links in a page are established in the Load() function. This function meerly filters them. It does not read the page content.
    //
    //  Arguments:  1) links:   The vector or set of URLs (links) found in the document
    //              2) domains: The set of domains that links must belong to in order to be included
    //              3) form:    The search criteria is any
    //
    //  Returns:    Number of links that meet the supplied criteria
    hzHtmElem*      pElement ;  //  HTML node
    hzAttrset       ai ;        //  Attribute iterator
    hzString        anam ;      //  Attribute name
    hzString        S ;         //  Content of link node
    hzUrl           link ;      //  URL of link
    uint32_t        nIndex ;    //  Links iterator
    links.Clear() ;
    for (nIndex = 0 ; nIndex < m_vecTags.Count() ; nIndex++)
    {
        pElement = m_vecTags[nIndex] ;
        if (pElement->Type() != HTAG_ANCHOR)
            continue ;
        //for (pm = pElement->GetFirstAttr() ; pm ; pm = pm->next)
        for (ai = pElement ; ai.Valid() ; ai.Advance())
        {
            anam = ai.Name() ;
            if (anam.Equiv("href"))
            {
                link = ai.Value() ;
                //  Ignore empty links (should not be any)
                if (!link)
                    continue ;
                //  Ignore links to domains not on the list of acceptable domains (usually the website domain only)
                if (domains.Count())
                {
                    if (!domains.Exists(link.Domain()))
                        continue ;
                }
                //  Enforce limiting criteria
                if (criteria)
                {
                    if (!FormCheckCstr(*link, *criteria))
                        continue ;
                }
                S = pElement->m_tmpContent ;
                links.Insert(link, S) ;
            }
        }
    }
    return links.Count() ;
}
hzEcode hzDocHtml::Import   (const hzString& path)
{
    //  Loads an HTML document into a tree of HTML nodes
    //
    //  Arguments:  1)  path    The full pathname of the file to load
    //
    //  Returns:    E_ARGUMENT  If no file path is supplied
    //              E_NOTFOUND  If the file does not exist
    //              E_NODATA    If the file is empty
    //              E_OPENFAIL  If the file cannot be read
    //              E_FORMAT    If a format error caused the file load to fail
    //              E_OK        If the operation is successful
    _hzfunc("hzDocHtml::Import") ;
    ifstream    is ;    //  Input stream
    hzChain     Z ;     //  Chain for holding file content
    hzEcode     rc ;    //  Return code
    //  Check path and load document
    rc = OpenInputStrm(is, path) ;
    if (rc == E_OK)
    {
        Z << is ;
        is.close() ;
        rc = Load(Z) ;
    }
    return rc ;
}
void    hzDocHtml::_report  (hzLogger& xlog, hzHtmElem* node)
{
    //  Category:   Diagnostics
    //
    //  Recursive suport function for non-recursive hzDocHtml::Report
    //
    //  Arguments:  1)  xlog    The logfile to write report to
    //              2)  node    The starting node
    //
    //  Returns:    None
    hzHtmElem*      pSub ;      //  Subnodes
    hzChain         ult ;       //  Final version of node contents
    chIter          x ;         //  Content iterator
    hzAttrset       ai ;        //  Attribute iterator
    int             n ;         //  Level iterator
    if (!node)
        { xlog.Out("hzDocHtml::_report: ERROR No HTML element suppled\n") ; return ; }
    /*
    **  Write out the opening of the tag
    */
    xlog.Out("%2d: ", node->Level()) ;
    for (n = node->Level() ; n ; n--)
        xlog << ". " ;
    xlog.Out("<%s", *Tagtype2Txt(node->Type())) ;
    for (ai = node ; ai.Valid() ; ai.Advance())
        xlog.Out(" %s=\"%s\"", ai.Name(), ai.Value()) ;
    xlog << ">\n" ;
    /*
    **  First visit higher level tags if any
    */
    //pSub = node->FirstSubnode() ;
    pSub = node->GetFirstChild() ;
    if (pSub)
    {
        //for (; pSub ; pSub = pSub->NextSubnode())
        for (; pSub ; pSub = pSub->Sibling())
            _report(xlog, pSub) ;
    }
    /*
    **  Then do content
    */
    if (node->m_tmpContent.Size())
    {
        for (x = node->m_tmpContent ; !x.eof() ; x++)
        {
            if (*x <= CHAR_SPACE)
                continue ;
            break ;
        }
        for (; !x.eof() ; x++)
        {
            if (x == "\r\n")
                { x++ ; continue ; }
            ult.AddByte(*x) ;
        }
        if (ult.Size())
        {
            xlog.Out("%2d: ", node->Level()) ;
            for (n = node->Level() ; n ; n--)
                xlog << "  " ;
            xlog << "[" << ult << "]\n" ;
        }
    }
    /*
    **  Write out the closing of the tag
    */
    xlog.Out("%2d: ", node->Level()) ;
    for (n = node->Level() ; n ; n--)
        xlog << ". " ;
    xlog.Out("</%s>\n", *Tagtype2Txt(node->Type())) ;
}
void    hzDocHtml::Report   (hzLogger& xlog)
{
    //  Show list of nodes plus content
    //
    //  Arguments:  1)  xlog    The logfile to write report to
    //  Returns:    None
    _hzfunc("hzDocHtml::Report") ;
    hzHtmElem*  pE ;            //  Current node
    hzString    S ;             //  Tag content holder
    uint32_t    nIndex ;        //  Document tag iterator
    if (!m_vecTags.Count())
        xlog.Out("PAGE is EMPTY - No nodes in Vector\n") ;
    else
    {
        for (nIndex = 0 ; nIndex < m_vecTags.Count() ; nIndex++)
        {
            pE = m_vecTags[nIndex] ;
            S = pE->m_tmpContent ;
            xlog.Out("id=%d par=%d subs=%d nxt=%d lev=%d: %s [%s]\n",
                pE->GetUid(),
                pE->Parent() ? pE->Parent()->GetUid() : 0,
                pE->GetFirstChild() ? pE->GetFirstChild()->GetUid() : 0,
                pE->Sibling() ? pE->Sibling()->GetUid() : 0,
                pE->Level(),
                *Tagtype2Txt(pE->Type()),
                *S) ;
        }
    }
    //  Show tree of nodes plus content
    if (!m_pRoot)
        xlog.Out("PAGE is EMPTY - No subnodes of root\n") ;
    else
        _report(xlog, m_pRoot) ;
}
hzEcode hzDocHtml::_xport   (hzChain& Z, hzHtmElem* node)
{
    //  Recursive support function for hzDocHtml::Export. It exports the full tag (including attributes and content) of the supplied node and all
    //  subnodes, to the supplied chain.
    //
    //  Arguments:  1)  Z       The output chain
    //              2)  node    The current node
    //
    //  Returns:    E_ARGUMENT  If no HTML element is supplied
    //              E_OK        If the operation was successful
    //
    //  Note this is a support function for hzDocHtml::Export
    hzChain         ult ;       //  Final version of node contents
    chIter          x ;         //  Content iterator
    hzHtmElem*      pSub ;      //  Subnodes
    hzAttrset       ai ;        //  Attribute iterator
    int             n ;         //  Level iterator
    if (!node)
        return E_ARGUMENT ;
    //  Write out the opening of the tag
    Z.Printf("%2d: ", node->Level()) ;
    for (n = node->Level() ; n ; n--)
        Z << ". " ;
    Z.Printf("<%s", *Tagtype2Txt(node->Type())) ;
    for (ai = node ; ai.Valid() ; ai.Advance())
        Z.Printf(" %s=\"%s\"", ai.Name(), ai.Value()) ;
    Z << ">\n" ;
    //  Then do content
    if (node->m_tmpContent.Size())
    {
        for (x = node->m_tmpContent ; !x.eof() ; x++)
        {
            if (*x <= CHAR_SPACE)
                continue ;
            break ;
        }
        for (; !x.eof() ; x++)
        {
            if (x == "\r\n")
                { x++ ; continue ; }
            ult.AddByte(*x) ;
        }
        if (ult.Size())
        {
            Z.Printf("%2d: ", node->Level()) ;
            for (n = node->Level() ; n ; n--)
                Z << "  " ;
            Z.AddByte('[') ;
            Z << ult ;
            Z.AddByte(']') ;
            Z.AddByte(CHAR_NL) ;
        }
    }
    //  First visit higher level tags if any
    //pSub = node->FirstSubnode() ;
    pSub = node->GetFirstChild() ;
    if (pSub)
    {
        //for (; pSub ; pSub = pSub->NextSubnode())
        for (; pSub ; pSub = pSub->Sibling())
            _xport(Z, pSub) ;
    }
    //  Write out the closing of the tag
    Z.Printf("%2d: ", node->Level()) ;
    for (n = node->Level() ; n ; n--)
        Z << ". " ;
    Z.Printf("</%s>\n", *Tagtype2Txt(node->Type())) ;
    return E_OK ;
}
hzEcode hzDocHtml::Export   (const hzString& filepath)
{
    //  Exports a HTML page to a file named as per the supplied file path.
    //
    //  Arguments:  1)  filepath    The file to export the HTML document to
    //
    //  Returns:    E_ARGUMENT  If no export file path is supplied
    //              E_NODATA    If there is no HTML elements in the document
    //              E_OPENFAIL  If the supplied 
    //              E_WRITEFAIL If a write file occurs during export
    //              E_OK        If the export ran to completion
    _hzfunc("hzDocHtml::Export") ;
    ofstream    os ;        //  Output stream
    hzChain     Z ;         //  Working chain for output construction
    hzEcode     rc = E_OK ; //  Return code
    if (!filepath)
        return hzerr(E_ARGUMENT, "No pathname supplied") ;
    if (!m_pRoot)
    {
        if (!m_Content.Size())
            return hzerr(E_NODATA, "Empty page (no root node). Nothing written to file %s\n", *filepath) ;
    }
    //  Dump out to file
    os.clear() ;
    os.open(*filepath) ;
    if (os.fail())
        return hzerr(E_OPENFAIL, "Could not open file %s\n", *filepath) ;
    if (m_Info.m_urlReq)
        Z.Printf("URL (req): %s\n", *m_Info.m_urlReq) ;
    if (*m_Info.m_urlAct)
        Z.Printf("URL (act): %s\n", *m_Info.m_urlAct) ;
    os << Z ;
    if (os.fail())
        rc = E_WRITEFAIL ;
    Z.Clear() ;
    if (rc == E_OK)
    {
        if (m_pRoot)
            rc = _xport(Z, m_pRoot) ;
        else
            Z = m_Content ;
        os << Z ;
        if (os.fail())
            rc = E_WRITEFAIL ;
    }
    os.close() ;
    return rc ;
}
void    hzDocHtml::Clear    (void)
{
    //  Recursively clear the tree of nodes
    //
    //  Arguments:  None
    //  Returns:    None
    hzHtmElem*  pNode ;     //  Node pointer
    uint32_t    nIndex ;    //  Document tags iterator
    for (nIndex = 0 ; nIndex < m_vecTags.Count() ; nIndex++)
    {
        pNode = m_vecTags[nIndex] ;
        delete pNode ;
    }
    m_vecTags.Clear() ;
    m_vecLinks.Clear() ;
    m_setLinks.Clear() ;
    m_Emails.Clear() ;
    m_pRoot = 0 ;
    m_pHead = 0 ;
    m_pBody = 0 ;
}
hzEcode hzDocHtml::FindElements (hzVect<hzHtmElem*>& elements, hzString& htag, hzString& attrName, hzString& attrValue)
{
    //  Find all elements in a page with the given tag name and/or attribute and value.
    //
    //  Arguments:  1)  elements    Elements found in order of incidence in this document matching on tag type and on attribute name and value if supplied.
    //              2)  htag        The tag type. This is compulsory and matches only elements of the given type.
    //              3)  aname       The attribute name. This is optional but if supplied, will require elements to have an attribute of the supplied name
    //              4)  avalue      The attribute value. Also optional but if supplied, will require elements to have an attribute of the supplied name
    //
    //  Returns:    E_NOTFOUND  If no elements matched
    //              E_OK        If elements matched
    hzHtmElem*      pElement ;  //  HTML node
    hzAttrset       ai ;        //  Attribute iterator
    hzString        anam ;      //  Attribute name
    hzString        aval ;      //  Attribute value
    hzString        S ;         //  Content of link node
    hzUrl           link ;      //  URL of link
    uint32_t        Lo ;        //  First element in m_mapTags to investigate
    uint32_t        Hi ;        //  Last element in m_mapTags to investigate
    uint32_t        nIndex ;    //  Links iterator
    bool            bOk ;       //  OK to insert the element
    elements.Clear() ;
    Lo = 0 ;
    Hi = m_mapTags.Count() - 1 ;
    if (htag)
    {
        //  A tagname has been supplied so limit the investigation to tags with the tagname
        Lo = m_mapTags.First(htag) ;
        if (Lo < 0)
            return E_NOTFOUND ;
        Hi = m_mapTags.Last(htag) ;
    }
    //  Investigate elements
    for (nIndex = Lo ; nIndex <= Hi ; nIndex++)
    {
        pElement = m_mapTags.GetObj(nIndex) ;
        bOk = false ;
        if (attrName)
        {
            //  An attrubute name has been supplied so the element must have this attribute
            for (ai = pElement ; ai.Valid() ; ai.Advance())
            {
                anam = ai.Name() ; aval = ai.Value() ;
                if (anam == attrName)
                {
                    if (!attrValue)
                        bOk = true ;
                    else
                    {
                        if (aval == attrValue)
                            bOk = true ;
                    }
                }
            }
        }
        else
        {
            if (attrValue)
            {
                //  An attribute value ...
                for (ai = pElement ; ai.Valid() ; ai.Advance())
                {
                    anam = ai.Name() ; aval = ai.Value() ;
                    if (aval == attrValue)
                        bOk = true ;
                }
            }
        }
        if (bOk)
            //elements.Insert(pElement) ;
            elements.Add(pElement) ;
    }
    return E_OK ;
}
hzEcode hzDocHtml::FindElements (hzVect<hzHtmElem*>& elements, const char* srchExp)
{
    //  Find all tags meeting the supplied criteria and place pointers to the tags in the supplied results vector.
    //
    //  Note:   The criteria will be of the form of one or more name-value pairs as follows:-
    //
    //      1)  name="some_name";       - Only applies if the element is given an id which is often not the case
    //      2)  type="html_tagtype";    - The element is of the right type, eg <table>
    //      3)  class="class_value";    - The element has the given class value
    //      4)  pname="param_name";     - The element has the parameter
    //      4)  pvalue="param_value";   - The element has the parameter value
    //      6)  cont="content_value";   - The element has contents of the given value
    //
    //  Arguments:  1)  elements    The vector of elements found and in thier actual order of incidence.
    //              2)  srchExp     Search expression
    //
    //  Returns:    E_NOTFOUND  If no elements matched
    //              E_OK        If elements matched
    _hzfunc("hzDocHtml::FindElements") ;
    hzVect<hzString>    list ;      //  List of tagnames forming required nod ancestry
    hzVect<hzHtmElem*>  found ;     //  Nodes matching this
    hzChain         Z ;             //  For extracting tagnames etc
    hzHtmElem*      pN ;            //  Element
    hzHtmElem*      pK ;            //  Element child
    hzAttrset       ai ;            //  Attribute iterator
    const char*     i ;             //  For processing criteria
    hzString        tnam ;          //  Tagname
    hzString        knam ;          //  Child tagname (if any)
    hzString        reqAttr_name ;  //  Attribute name (if any)
    hzString        reqAttr_value ; //  Attribute value (if any)
    uint32_t        Lo ;            //  1st element to investigate
    uint32_t        Hi ;            //  Lst element to investigate
    uint32_t        x ;             //  Element iterator
    uint32_t        v ;             //  Element iterator
    uint32_t        anc ;           //  Ancestry level
    elements.Clear() ;
    //  Find node by name required name of node
    for (i = srchExp ; IsAlphanum(*i) ; i++)
        Z.AddByte(*i) ;
    tnam = Z ;
    Z.Clear() ;
    list.Add(tnam) ;
    for (; *i == CHAR_PERIOD ;)
    {
        i++ ;
        if (!IsAlpha(*i))
            return hzerr(E_FORMAT, "Malformed criteria (%s)\n", srchExp) ;
        for (; IsAlphanum(*i) ; i++)
            Z.AddByte(*i) ;
        tnam = Z ;
        Z.Clear() ;
        list.Add(tnam) ;
    }
    //  Get name of attribute if applicable
    if (i[0] == CHAR_MINUS && i[1] == CHAR_MORE)
    {
        for (i += 2 ; IsUrlnorm(*i) ; i++)
            Z.AddByte(*i) ;
        reqAttr_name = Z ;
        Z.Clear() ;
    }
    //  An equal sign after the tag name specifies what the tag contents must be for the tag to qualify
    if (*i == CHAR_EQUAL)
    {
        for (i += 2 ; *i != CHAR_SQUOTE ; i++)
            Z.AddByte(*i) ;
        reqAttr_value = Z ;
        Z.Clear() ;
    }
    anc = list.Count() ;
    if (anc)
    {
        //  Look up the last tag in the m_mapTags
        anc-- ;
        tnam = list[anc] ;
        Lo = m_mapTags.First(tnam) ;
        if (Lo < 0)
            return E_OK ;
        Hi = m_mapTags.Last(tnam) ;
        threadLog("node (%d - %d) %s a=%s v=%s", Lo, Hi, *tnam, *reqAttr_name, *reqAttr_value) ;
        for (x = Lo ; x <= Hi ; x++)
        {
            pN = m_mapTags.GetObj(x) ;
            if (!anc)
                found.Add(pN) ;
            else
            {
                //  Progress thru ancestry
                pK = pN->Parent() ;
                for (v = anc-1 ; pK && v >= 0 ; pK = pK->Parent(), v--)
                {
                    threadLog("<- %s ", *pK->Name()) ;
                    if (pK->Name() != list[v])
                        break ;
                }
                if (v < 0)
                {
                    found.Add(pN) ;
                    threadLog("OK ") ;
                }
            }
        }
        //  Check all found nodes for attribute criiteria
        for (x = 0 ; x < found.Count() ; x++)
        {
            pN = found[x] ;
            if (!reqAttr_name && !reqAttr_value)
                elements.Add(pN) ;
            else
            {
                for (ai = pN ; ai.Valid() ; ai.Advance())
                {
                    if (reqAttr_name && reqAttr_name != ai.Name())
                    {
                        threadLog("-1 ") ;
                        continue ;
                    }
                    if (reqAttr_value && reqAttr_value != ai.Value())
                    {
                        threadLog("-2 ") ;
                        continue ;
                    }
                    elements.Add(pN) ;
                    threadLog("+ ") ;
                    break ;
                }
            }
        }
        threadLog("done\n") ;
    }
    else
    {
        //  Check all the nodes for attribute criteria
        for (x = 0 ; x < m_mapTags.Count() ; x++)
        {
            pN = m_mapTags.GetObj(x) ;
            if (!reqAttr_name && !reqAttr_value)
                elements.Add(pN) ;
            else
            {
                for (ai = pN ; ai.Valid() ; ai.Advance())
                {
                    if (reqAttr_name && reqAttr_name != ai.Name())
                        continue ;
                    if (reqAttr_value && reqAttr_value != ai.Value())
                        continue ;
                    elements.Add(pN) ;
                    break ;
                }
            }
        }
    }
    return E_OK ;
}
hzEcode hzDocHtml::_selectTag   (hzSet<hzHtmElem*>& parents, hzSet<hzHtmElem*>& elements, const hzString& tagspec)
{
    //  Finds the set of tags meeting the supplied tag specifier.
    //
    //  Arguments:  1)  parents     Set of parent tags
    //              2)  elements    Set of selected tags
    //              3)  tagspec     Tag selection criteria
    //
    //  Returns:    E_SYNTAX    If the tag is malformed or illegal
    //              E_OK        If the tag is correct, even if no instances are found
    _hzfunc("hzDocHtml::_selectTag") ;
    hzMapS<hzString,hzString>   pairs ;     //  List of attrs and attr values the tag must possess (if any)
    hzChain         word ;          //  Word extraction
    hzAttrset       ai ;            //  Attribute iterator
    hzHtmElem*      pE ;            //  HTML element (tag)
    hzHtmElem*      pAnc ;          //  HTML element (tag)
    const char*     i ;             //  For processing term
    hzString        tagname ;       //  Name of tag sought
    hzString        pnam ;          //  Name of attr sought
    hzString        pval ;          //  Value of attr sought
    hzString        anam ;          //  Attribute name
    hzString        aval ;          //  Attribute value
    uint32_t        nP ;            //  Name-value pair iterator
    uint32_t        Lo ;            //  First incidence of tagname
    uint32_t        Hi ;            //  Last incidence of tagname
    uint32_t        nIndex ;        //  Tag iterator
    uint32_t        nFound ;        //  All attributes found
    bool            bFound ;        //  Ancestry test
    hzEcode         rc = E_OK ;     //  Return code
    elements.Clear() ;
    /*
    **  Get tag name from the search criteria
    */
    i = *tagspec ;
    if (i[0] != CHAR_LESS)
        return hzerr(E_SYNTAX, "Term does not begin with an opening '<' char") ;
    for (i++ ; IsAlphanum(*i) ; i++)
        word.AddByte(*i) ;
    tagname = word ;
    word.Clear() ;
    if (!tagname)
        return hzerr(E_SYNTAX, "No tagname supplied") ;
    /*
    **  Get attribute requirements from the search criteria
    */
    for (; *i == CHAR_SPACE ;)
    {
        for (i++ ; *i && *i <= CHAR_SPACE ; i++) ;
        pnam = pval = (char*) 0 ;
        for (; IsAlphanum(*i) ; i++)
            word.AddByte(*i) ;
        pnam = word ;
        word.Clear() ;
        if (!pnam)
            { rc = E_SYNTAX ; threadLog("Attr name not supplied\n") ; break ; }
        if (*i != CHAR_EQUAL)
            { rc = E_SYNTAX ; threadLog("Attr name not followed by an assignment operator\n") ; break ; }
        i++ ;
        if (*i == CHAR_ASTERISK)
        {
            i++ ;
            pval = "*" ;
            pairs.Insert(pnam, pval) ;
            continue ;
        }
        if (*i != CHAR_SQUOTE)
            { rc = E_SYNTAX ; threadLog("Attr has no opening single quote\n") ; break ; }
        for (i++ ; *i && *i != CHAR_SQUOTE ; i++)
            word.AddByte(*i) ;
        if (*i != CHAR_SQUOTE)
            { rc = E_SYNTAX ; threadLog("Attr has no closing single quote\n") ; break ; }
        i++ ;
        pval = word ;
        word.Clear() ;
        pairs.Insert(pnam, pval) ;
    }
    if (rc != E_OK)
        return rc ;
    if (*i != CHAR_MORE)
        { threadLog("Term does not end with a closing '<' char\n") ; return E_SYNTAX ; }
    threadLog("Examining %d tags for tagnam=%s\n", m_vecTags.Count(), *tagname) ;
    for (nP = 0 ; nP < pairs.Count() ; nP++)
    {
        pnam = pairs.GetKey(nP) ;
        pval = pairs.GetObj(nP) ;
        threadLog(" - with %s=%s\n", *pnam, *pval) ;
    }
    /*
    **  Get all tags in document with the tagname. It is not a failure if none found.
    */
    Lo = m_mapTags.First(tagname) ;
    if (Lo < 0)
        threadLog("No matching tags for <%s>\n", *tagname) ;
    else
    {
        Hi = m_mapTags.Last(tagname) ;
        for (nIndex = Lo ; nIndex <= Hi ; nIndex++)
        {
            pE = m_mapTags.GetObj(nIndex) ;
            //  Exclude elements with the wrong parent
            if (parents.Count())
            {
                bFound = false ;
                for (pAnc = pE->Parent() ; pAnc ; pAnc = pAnc->Parent())
                {
                    if (parents.Exists(pAnc))
                    {
                        bFound = true ;
                        threadLog("Found parent of %p\n", pAnc) ;
                        break ;
                    }
                    threadLog("No such parent as %p\n", pAnc) ;
                }
                if (!bFound)
                    continue ;
            }
            //if (parents.Count() && !parents.Exists(pE->Parent()))
            //  continue ;
            //  No attribute/value pairs specified so the tag is added to the list
            if (!pairs.Count())
                { elements.Insert(pE) ; continue ; }
            nFound = 0 ;
            for (ai = pE ; ai.Valid() ; ai.Advance())
            {
                anam = ai.Name() ; aval = ai.Value() ;
                if (!pairs.Exists(anam))
                    continue ;
                pval = pairs[anam] ;
                if (pval == "*")
                    { nFound++ ; continue ; }
                if (pval == aval)
                    nFound++ ;
            }
            //  If there is a match on every attribute/value pair specified, add to the list
            if (nFound == pairs.Count())
                elements.Insert(pE) ;
        }
    }
    threadLog("Found %d tags for tagspec=[%s]\n", elements.Count(), *tagspec) ;
    return rc ;
}
hzEcode hzDocHtml::_selectTerm  (hzSet<hzHtmElem*>& elements, const hzString& term)
{
    //  A 'term' within the context of HTML document tag selection, can be a specification of a single tag or it can specifiy multiple tags. In the latter case,
    //  where multiple tag specifiers are concatenated, hierarchy is implied.
    //
    //  Selection works on the basis of more detail, more tests. For example, the term <div> will populate the set of elements found with every <div> tag in the
    //  document. The term <div class> will only find div tags with an attribute of 'class' while the term <div class="body"> will only find div tags that have
    //  an attribute of class whose value is 'body'. It should be noted however, that tags are selected if they have what is asked for in the term. There is not
    //  presently, any means to exclude tags if they have something we don't want them to have.
    //
    //  A hierarchical concatenated term such as <div class='body'><p> will find every paragraph tag in the document whose parent tag is a div with an attribute
    //  of class whose value is 'body'. If no div tags meet that criteria nothing will be selected. Likewise if div tags do meet the <div class="body"> test but
    //  are not followed directly by the <p> tag, nothing is selected.
    //
    //  Note that multiple tag terms are implemented by multiples calls to _selectTag, with the selection of tags found being reduced by each call. 
    //
    //  Arguments:  1)  elements    Set of lements selected by this function
    //              2)  term        Tag selection criteria
    //
    //  Returns:    E_SYNTAX    If the tag is malformed or illegal
    //              E_OK        If the tag is correct, even if no instances are found
    _hzfunc("hzDocHtml::_selectTerm") ;
    hzSet<hzHtmElem*>   parents ;   //  Parents
    hzArray<hzString>   ar ;        //  Array of terms
    uint32_t    x ;     //  For populating reducedSet
    uint32_t    t ;     //  Term count
    hzEcode     rc ;    //  Return code
    SplitCSV(ar, *term, CHAR_PLUS) ;
    if (!ar.Count())
        return hzerr(E_SYNTAX, "No tag specifiers found in term") ;
    threadLog("Term is %s (%d) components\n", *term, ar.Count()) ;
    for (t = 0 ; t < ar.Count() ; t++)
    {
        threadLog("Term component %d: %s\n", t, *ar[t]) ;
    }
    if (ar.Count() == 1)
    {
        //  Call the _selectTag function once with the document's m_vecTags vector as the reduced set
        rc = _selectTag(parents, elements, ar[0]) ;
        return rc ;
    }
    //  There is more than one tag. Call the _selectTag function with no parents listed to start with and then repeatedly with the elements
    //  found acting as the list of valid parents for the next call.
    rc = _selectTag(parents, elements, ar[0]) ;
    if (rc == E_OK)
    {
        if (elements.Count())
        {
            for (t = 1 ; rc == E_OK && t < ar.Count() ; t++)
            {
                //  Parents is the last tag's haul
                parents.Clear() ;
                for (x = 0 ; x < elements.Count() ; x++)
                    parents.Insert(elements.GetObj(x)) ;
                rc = _selectTag(parents, elements, ar[t]) ;
            }
        }
    }
    threadLog("Found %d tags for term=[%s]\n", elements.Count(), *term) ;
    return rc ;
}
hzEcode hzDocHtml::_selectExp   (hzSet<hzHtmElem*>& elements, const hzString& srchExp)
{
    //  Recursive support function for hzDocHtml::SelectElements (see below)
    //
    //  Breaks up the expression into a term or 'term op expression' and calls _selectTerm to find the set of tags for each term. The terms can
    //  be enclosed in parenthesis but individually, they take the form of tags enclosed in a <> block. The tag name is the first and often only
    //  part but optionally after that, attributes may be specified.
    //
    //  Arguments:  1)  elements    The set of elements elected (in order of tag type)
    //              2)  srchExp     Search expression
    //
    //  Returns:    E_SYNTAX    If the expression is malformed
    //              E_OK        If the operation was successful (it still may have found no elements)
    _hzfunc("hzDocHtml::_selectExp") ;
    hzSet<hzHtmElem*>   setA ;      //  Element set for first term
    hzSet<hzHtmElem*>   setB ;      //  Element set for second term
    hzChain         word ;          //  Individual word
    hzHtmElem*      pE ;            //  HTML element
    const char*     i ;             //  For processing criteria
    hzString        termA ;         //  First term
    hzString        termB ;         //  Remainder of epression
    hzString        expA ;          //  First term
    hzString        expB ;          //  Remainder of epression
    uint32_t        op ;            //  1 for OR and 2 for AND
    uint32_t        n ;             //  Counter
    uint32_t        level ;         //  Parenthesis
    hzEcode         rc = E_OK ;     //  Return code
    /*
    **  Get 1st term
    */
    for (i = *srchExp ; *i && *i <= CHAR_SPACE ; i++) ;
    if (*i == '(')
    {
        level = 1 ;
        for (i++ ; level && *i >= CHAR_SPACE ; i++)
        {
            if (*i == '(')  level++ ;
            if (*i == ')')  level-- ;
            if (level)
                word.AddByte(*i) ;
        }
        expA = word ;
    }
    else if (*i == CHAR_LESS)
    {
        for (; *i == CHAR_LESS ;)
        {
            for (; *i != CHAR_MORE ; i++)
                word.AddByte(*i) ;
            word.AddByte(CHAR_MORE) ;
            i++ ;
            if (*i == CHAR_PLUS)
                { word.AddByte(CHAR_PLUS) ; i++ ; }
        }
        termA = word ;
    }
    else
    {
        threadLog("Expected an opening '<'\n") ;
        rc = E_SYNTAX ;
    }
    if (rc != E_OK)
        return rc ;
    if (*i == 0)
    {
        //  No further terms so populate element list with setA
        threadLog("Calling _selectTerm with a single exp [%s] term [%s]\n", *srchExp, *termA) ;
        if (expA)
            rc = _selectExp(elements, termA) ;
        if (termA)
            rc = _selectTerm(elements, termA) ;
        //for (n = 0 ; n < setA.Count() ; n++)
        //  elements.Insert(setA.GetObj(n)) ;
        threadLog("case 1 Found %d tags for term=[%s]\n", elements.Count(), *srchExp) ;
        return rc ;
    }
    /*
    **  Get operator
    */
    for (; *i && *i <= CHAR_SPACE ; i++) ;
    if (!CstrCompareI(i, "or"))
        { i += 2 ; op = 1 ; }
    else if (!CstrCompareI(i, "and"))
        { i += 3 ; op = 2 ; }
    else
        { threadLog("Illegal operator [%s]\n", i) ; return E_SYNTAX ; }
    /*
    **  Get remainder of expression as second term
    */
    for (; *i && *i <= CHAR_SPACE ; i++) ;
    word.Clear() ;
    if (*i == '(')
    {
        level = 1 ;
        for (i++ ; level && *i >= CHAR_SPACE ; i++)
        {
            if (*i == '(')  level++ ;
            if (*i == ')')  level-- ;
            if (level)
                word.AddByte(*i) ;
        }
        expB = word ;
    }
    else if (*i == CHAR_LESS)
    {
        for (; *i == CHAR_LESS ;)
        {
            for (; *i != CHAR_MORE ; i++)
                word.AddByte(*i) ;
            word.AddByte(CHAR_MORE) ;
            i++ ;
            if (*i == CHAR_PLUS)
                { word.AddByte(CHAR_PLUS) ; i++ ; }
        }
        termB = word ;
    }
    else
    {
        threadLog("Expected an opening '<'\n") ;
        rc = E_SYNTAX ;
    }
    if (rc != E_OK)
        return rc ;
    /*
    **  Apply operator
    */
    threadLog("Calling _selectTerm with terms [%s:%s] and [%s:%s]\n", *expA, *termA, *expB, *termB) ;
    if (expA)
        rc = _selectExp(setA, termA) ;
    if (termA)
        rc = _selectTerm(setA, termA) ;
    if (expB)
        rc = _selectExp(setB, expB) ;
    if (termB)
        rc = _selectTerm(setB, termB) ;
    if (op == 1)
    {
        threadLog("OR'ing\n") ;
        for (n = 0 ; n < setA.Count() ; n++)
            elements.Insert(setA.GetObj(n)) ;
        for (n = 0 ; n < setB.Count() ; n++)
            elements.Insert(setB.GetObj(n)) ;
        threadLog("(total %d)\n", elements.Count()) ;
    }
    else
    {
        threadLog("AND'ing\n") ;
        for (n = 0 ; n < setA.Count() ; n++)
        {
            pE = setA.GetObj(n) ;
            if (setB.Exists(pE))
                elements.Insert(pE) ;
        }
    }
    threadLog("Found %d tags for term=[%s]\n", elements.Count(), *srchExp) ;
    return rc ;
}
hzEcode hzDocHtml::FindElements (hzVect<hzHtmElem*>& elements, const hzString& srchExp)
{
    //  Select elements from this document according to the supplied search expression
    //
    //  Webpages (HTML documents) commonly contain a lot of supurfluous matter whilst confining most information content to a limited set of elements (tags). If
    //  it is known which element(s) contain what information (eg title, author, body content etc), FindElements can be used to select these element(s) and from
    //  there, data can be efficiently extracted.
    //
    //  Arguments:  1)  The vector of HTML elements to be populated by this query. A vector is used in preference to a set as this ensures that
    //                  the elements found will be in the order of thier incidence in the HTML document.
    //              2)  The criteria as a boolean expression of one or more terms, where each term specifies how elements are to be selected.
    //
    //  Returns:    E_SYNTAX    If the expression is malformed
    //              E_OK        If the operation was successful (it still may have found no elements)
    //
    //  Support functions:
    //
    //  SelectElements() itself calls the private member function _selectExp to do the selecting. This places selected elements in a hzSet ordered
    //  by their RAM address (this ensures tags are only counted once). SelectElements() then re-orders the elements from the hzSet into a hzVect.
    //
    //  _selectExp  (hzSet<hzHtmElem*>& elements, const hzString& exp) simply breaks up the expression into a term or 'term op expression' and calls
    //  the second fupport function _selectTerm() to find the set of tags for each term.
    //
    //  _selectTerm (hzSet<hzHtmElem*>& elements, const hzString& exp) deals only with terms designed to specify elements. Each term consists of one or
    //  more tag specifiers, which when multiple, are separated by a + sign. A single tag specifier will identify a list of one or more tags within
    //  the document. Subsequent tag specifiers will do the same but will limit the search to descendents of the tags found under the previous tag
    //  specifier. The _selectTerm() calls the third support function _selectTag() on each tag specifier in turn, to actually do the selecting.
    //
    //  _selectTag  (hzSet<hzHtmElem*>& parents, hzSet<hzHtmElem*>& elements, const hzString& exp) uses a single tag specifier to select tags from the
    //  HTML document and then if a list of parents (previously found tags) is supplied the selected tags are tested to ensure they have an ancestor
    //  among the list of parents.
    //
    //  Each tag specifier will be encased in a <> block and be of the general form <tagname attr1='value1' attr2='value2' ...> where either the tag
    //  name or at least one attribute must exist. If an attribute is specified the tag must match on the attribute be be selected. Wildcards can be
    //  used as well.
    _hzfunc("hzDocHtml::FindElements") ;
    hzMapS<uint32_t,hzHtmElem*> ord ;   //  Ordered set
    hzSet<hzHtmElem*>           res ;   //  Results
    hzHtmElem*  pE ;    //  The HTML element (tag)
    uint32_t    x ;     //  Result set iterator
    hzEcode     rc ;    //  Return code
    elements.Clear() ;
    if (!srchExp)
        return E_OK ;
    //  Get expression 
    rc = _selectExp(res, srchExp) ;
    if (rc != E_OK)
    {
        threadLog("Failed\n") ;
        return rc ;
    }
    //  Assemble results
    for (x = 0 ; x < res.Count() ; x++)
    {
        pE = res.GetObj(x) ;
        ord.Insert(pE->GetUid(), pE) ;
    }
    for (x = 0 ; x < res.Count() ; x++)
    {
        pE = ord.GetObj(x) ;
        elements.Add(pE) ;
    }
    threadLog("Got %d elements\n", res.Count()) ;
    return E_OK ;
}
/*
**  Section 2:  hzHtmElem members
*/
hzEcode hzHtmElem::Init (hzDocHtml* pRoot, hzHtmElem* pParent, hzString& tagname, hzHtagtype type, uint32_t id, uint32_t line)
{
    //  Initialize a HTML element (tag) to the parent element (if any), the tag type. Set also the id and line number (within the HTML
    //  in question)
    //
    //  Arguments:  1)  pRoot   Pointer to the HTML document root
    //              2)  pParent Pointer to the parent element of this
    //              3)  tagname The name of this tag
    //              4)  htag    HTML Tag type
    //              5)  id      Numeric identifier
    //              6)  line    Line number of tag in the source HTML file
    //
    //  Returns:    E_ARGUMENT  If no root is supplied
    //              E_OK        If the HTML element was initialized
    _hzfunc("hzHtmElem::Init") ;
    if (!pRoot)
    {
        hzerr(E_ARGUMENT, "No root supplied") ;
        return E_ARGUMENT ;
    }
    if (!pParent)
    {
        m_Parent = 0 ;
        m_nLevel = 0 ;
    }
    else
    {
        m_Parent = pParent->GetUid() ;
        m_nLevel = pParent->m_nLevel + 1 ;
        pParent->_addnode(this) ;
    }
    m_Name = tagname ;
    m_Type = type ;
    m_Uid = id ;
    m_nLine = line ;
    m_Children = 0 ;
    m_Sibling = 0 ;
    return E_OK ;
}
hzEcode hzHtmElem::_addnode (hzHtmElem* pNode)
{
    //  Adds an element as a subnode of this. Subnodes are always appended.
    //
    //  Arguments:  1)  pNode   Element to add as child of this element
    //
    //  Returns:    E_ARGUMENT  If no element is supplied
    //              E_DUPLICATE If the supplied element is actually this element
    //              E_OK        If the element is added as child
    _hzfunc("hzHtmElem::_addnode") ;
    hzHtmElem*  p_temp ;    //  Current node pointer
    if (!pNode)
        return hzerr(E_ARGUMENT, "Attempt to add a null node") ;
    if (pNode == this)
        return hzerr(E_DUPLICATE, "Attempt to add a node to itself (%s)", *m_Name) ;
    if (!m_Children)
        m_Children = pNode->GetUid() ;
    else
    {
        for (p_temp = GetFirstChild() ; p_temp->m_Sibling ; p_temp = p_temp->Sibling())
        {
            if (pNode == p_temp)
                return hzerr(E_DUPLICATE, "Attempt to add an already existing node to %s", *m_Name) ;
        }
        p_temp->m_Sibling = pNode->GetUid() ;
    }
    m_nSubnodes++ ;
    return E_OK ;
}
/*
**  Section 2A: hzHtmlTable members
*/
uint32_t    hzHtmTbl::Colcount  (void)
{
    //  Establishes the number of column headers. If there are no <th> headers there will still be columns.
    //
    //  Method is to check if there has been an edit (any additional tags) since the last report (of either row or column count). If not then the value held in
    //  m_NoCols is returned. Otherwise the columns are counted explicitly. In the absence of the row of table headers, the column count will be the row with
    //  the maximum number of columns.
    //
    //  Arguments:  None
    //  Returns:    Number of columns
    hzHtmElem*  pE ;        //  Table row tags
    hzHtmElem*  pC ;        //  Columns
    if (!m_nCols)
    {
        pE = GetFirstChild() ; 
        for (pC = pE->GetFirstChild() ; pC ; pC = pC->Sibling())
        {
            if (pC->Type() != HTAG_TH)
                continue ;
            m_nCols++ ;
        }
    }
    
    return m_nCols ;
}
uint32_t    hzHtmTbl::Rowcount  (void)
{
    //  Returns the number of rows. This will not include the row of headers.
    //
    //  Arguments:  None
    //  Returns:    Number of rows in the table
    if (!m_nSubnodes)
    {
        //threadLog("Table is empty\n") ;
        return 0 ;
    }
    if (!m_nCols)
    {
        if (!m_nRows)
            Colcount() ;
        if (!m_nCols)
            m_nRows = m_nSubnodes ;
        else
            m_nRows = m_nSubnodes - 1 ;
    }
    
    return m_nRows ;
}
hzString    hzHtmTbl::GetColl   (uint32_t nCol)
{
    //  Return the value (string) of the requested column
    //
    //  In the case of a table, the only allowed sub-nodes are <tr> nodes. The columns for the table are all under the table's first <tr> sub-node as <th> nodes.
    //
    //  Arguments:  1)  nCol    The column number
    //
    //  Returns:    Instance of hzString by value - of the table row as a concatenated series of <td>content</td>
    hzHtmElem*  pE ;        //  Table row tags
    hzHtmElem*  pC ;        //  Columns
    hzString    S ;         //  Target string
    uint32_t    nIndex ;    //  Column iterator
    if (!m_Children)
        return S ;
    pE = GetFirstChild() ; 
    if (!pE->GetFirstChild())
        return S ;
    nIndex = 0 ;
    for (pC = pE->GetFirstChild() ; pC ; pC = pC->Sibling())
    {
        if (pC->Type() != HTAG_THEAD)
            continue ;
        if (nIndex == nCol)
        {
            S = pC->m_tmpContent ;
            break ;
        }
        nIndex++ ;
    }
    return S ;
}
hzString    hzHtmTbl::GetCell   (uint32_t nRow, uint32_t nCol)
{
    //  Return the cell from the supplied row and column.
    //
    //  Method is to move thru the table's <tr> subnodes to get to the row, then move thur that row's <td> (or equivelent) tags to get to the column within the row (the cell).
    //
    //  Arguments:  1)  nRow    The row number
    //              2)  nCol    The column number
    //
    //  Returns:    Instance of hzString by value - of the table cell
    hzHtmElem*  pR ;        //  Table row tags
    hzHtmElem*  pC ;        //  Columns
    hzString    S ;         //  Target string
    uint32_t    row = -1 ;  //  Row counter
    uint32_t    col = 0 ;   //  Column counter
    if (!m_Children)    { S = "No child nodes" ; return S ; }
    if (!m_nCols)       { S = "No columns" ; return S ; }
    for (pR = GetFirstChild() ; row <= nRow && pR ; row++, pR = pR->Sibling())
    {
        if (row < nRow)
            continue ;
        for (pC = pR->GetFirstChild() ; col <= nCol && pC ; col++, pC = pC->Sibling())
        {
            if (col < nCol)
                continue ;
            S = pC->m_tmpContent ;
            break ;
        }
        break ;
    }
    return S ;
}
/*
**  Non-member functions
*/
hzDoctype   DeriveDoctype   (hzChain& Z)
{
    //  Category:   Text Processing
    //
    //  Rudimentary check to determine if the document is HTML or XML.
    //
    //  Argument:   Z   Input document
    //
    //  Returns:    The doctype
    chIter  zi ;        //  Chain iterator
    for (zi = Z ; !zi.eof() && *zi != CHAR_LESS ; zi++) ;
    if (zi.Equiv("<html"))
        return DOCTYPE_HTML ;
    if (zi.Equiv("<!DOCTYPE "))
    {
        zi += 10 ;
        if (zi.Equiv("html"))
            return DOCTYPE_HTML ;
        if (zi.Equiv("xml"))
            return DOCTYPE_XML ;
    }
    if (zi.Equiv("<?xml"))
        return DOCTYPE_XML ;
        
    return DOCTYPE_UNDEFINED ;
}