<pre>//
//  File:   hzHttpClient.cpp
//
//  Legal Notice:   This file is part of the HadronZoo C++ Class Library. Copyright 2025 HadronZoo Project (http://www.hadronzoo.com)
//
//  The HadronZoo C++ Class Library is free software: You can redistribute it, and/or modify it under the terms of the GNU Lesser General Public License, as published by the Free
//  Software Foundation, either version 3 of the License, or any later version.
//
//  The HadronZoo C++ Class Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
//  A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
//
//  You should have received a copy of the GNU Lesser General Public License along with the HadronZoo C++ Class Library. If not, see http://www.gnu.org/licenses.
//</pre>
			<pre>#include &lt;iostream&gt;
#include &lt;fstream&gt;</pre>
			<pre>#include &lt;unistd.h&gt;
#include &lt;netdb.h&gt;
#include &lt;sys/stat.h&gt;</pre>
			<pre>#include "hzChars.h"
#include "hzTextproc.h"
#include "hzDirectory.h"
#include "hzCodec.h"
#include "hzHttpClient.h"
#include "hzProcess.h"</pre>
			<pre>using namespace std ;</pre>
			<pre>/*
**  Prototypes
*/</pre>
			<pre>uint32_t    _extractHttpHeader  (hzString&amp; Attr, hzString&amp; Value, hzChain::Iter&amp; ci, bool bConvert) ;</pre>
			<pre>/*
**  Section 1:  hzHttpClient member functions
*/</pre>
			<pre>hzEcode hzHttpClient::Connect   (const hzUrl&amp; url)
{
    _hzfunc("hzHttpClient::Connect") ;</pre>
			<pre>    hzEcode rc ;    //  Return code</pre>
			<pre>    if (url.IsSSL())
        rc = m_Webhost.ConnectSSL(url.Domain(), url.Port()) ;
    else
        rc = m_Webhost.ConnectStd(url.Domain(), url.Port()) ;</pre>
			<pre>    if (rc != E_OK)
        m_Error.Printf("Could not connect to domain [&#37;s] on port &#37;d (error=&#37;s)\n", *url.Domain(), url.Port(), Err2Txt(rc)) ;
    else
    {
        rc = m_Webhost.SetSendTimeout(30) ;
        if (rc != E_OK)
            m_Error.Printf("Could not set send_timeout on connection to domain [&#37;s] on port &#37;d (error=&#37;s)\n", *url.Domain(), url.Port(), Err2Txt(rc)) ;
        else
        {
            rc = m_Webhost.SetRecvTimeout(30) ;
            if (rc != E_OK)
                m_Error.Printf("Could not set recv_timeout on connection to domain [&#37;s] on port &#37;d (error=&#37;s)\n", *url.Domain(), url.Port(), Err2Txt(rc)) ;
        }
    }</pre>
			<pre>    return rc ;
}</pre>
			<pre>hzEcode hzHttpClient::Close (void)
{
    _hzfunc("hzHttpClient::Close") ;</pre>
			<pre>    m_Webhost.Close() ;
    return E_OK ;
}</pre>
			<pre>uint32_t    _extractHttpHeader  (hzString&amp; Param, hzString&amp; Value, hzChain::Iter&amp; ci, bool bConvert)
{
    //  Support function to extract parameter name and value from a HTTP header (either that of a request or response). HTTP header lines are of the form param_name: param_value
    //  and are terminated by a CR/NL
    //
    //  Arguments:  1)  Param       The hzString to store the parameter name.
    //              2)  Value       The hzString to store the parameter value.
    //              3)  ci          A reference to the chain iterator processing the HTTP request.
    //              4)  bConvert    Flag to convert percent sign followed by two hex digits into single char value
    //
    //  Returns:    Number of charachters processed.</pre>
			<pre>    _hzfunc("_extractHttpHeader") ;</pre>
			<pre>    chIter      xi ;            //  For iterating line
    hzChain     temp ;          //  For building param and then value
    uint32_t    nCount = 0 ;    //  Returned length of HTTP header line
    uint32_t    nHex ;          //  Hex value
    char        cvHex[4] ;      //  Hex value buffer</pre>
			<pre>    Param.Clear() ;
    Value.Clear() ;
    cvHex[2] = 0 ;</pre>
			<pre>    xi = ci ;
    for (; !xi.eof() ;)
    {
        if (*xi == CHAR_PERCENT)
        {
            if (bConvert)
            {
                xi++ ; cvHex[0] = *xi ;
                xi++ ; cvHex[1] = *xi ;
                xi++ ;
                nCount += 3 ;</pre>
			<pre>                if (IsHexnum(nHex, cvHex))
                    temp.AddByte(nHex) ;
                continue ;
            }
        }</pre>
			<pre>        if (*xi == CHAR_COLON &amp;&amp; !Param)
        {
            xi++ ;
            nCount++ ;</pre>
			<pre>            Param = temp ;
            temp.Clear() ;</pre>
			<pre>            if (*xi == CHAR_SPACE)
                for (; !xi.eof() &amp;&amp; (*xi == CHAR_SPACE || *xi == CHAR_TAB) ; xi++, nCount++) ;
        }</pre>
			<pre>        if (xi == "\r\n")
            { xi += 2 ; nCount += 2 ; break ; }
        if (*xi == CHAR_NL)
            { xi++ ; nCount++ ; break ; }</pre>
			<pre>        if (*xi &lt; CHAR_SPACE)
            threadLog("Illegal char (&#37;u) in HTTP Header\n", (uchar) *xi) ;</pre>
			<pre>        if (*xi == CHAR_PLUS)
            temp.AddByte(CHAR_SPACE) ;
        else
            temp.AddByte(*xi) ;
        xi++ ;
        nCount++ ;
    }</pre>
			<pre>    Value = temp ;
    return nCount ;
}</pre>
			<pre>hzEcode hzHttpClient::_procHttpResponse (HttpRC&amp; hRet, const hzUrl&amp; url)
{
    //  Support funtion to the hzHttpClient member functions GetPage() and PostForm(). The purpose is to gather the server response to
    //  an earlier HTTP GET, POST or HEAD request.
    //
    //  Arguments:  1)  hRet    HTTP return code
    //              2)  url     The URL
    //
    //  Returns:    E_NOSOCKET  If the external server has closed the connection
    //              E_NODATA    If nothing was recived
    //              E_FORMAT    If the response was malformed
    //              E_OK        If the response was recieved without error</pre>
			<pre>    _hzfunc("hzHttpClient::_procHttpResponse") ;</pre>
			<pre>    chIter      zi ;                //  To iterate the returned page
    chIter      hi ;                //  To re-iterate lines of interest in the header of the returned page
    chIter      ti ;                //  Temp iterator
    hzChain     Z ;                 //  Request buffer
    hzChain     X ;                 //  Temp buffer
    hzCookie    cookie ;            //  Cookie (to be checked against supplied map of cookies)
    hzString    S ;                 //  Temp string
    hzString    param ;             //  Header parameter name
    hzString    value ;             //  Header parameter value
    uint32_t    nRecv ;             //  Bytes received
    uint32_t    nExpect = 0 ;       //  Size of current chunk
    uint32_t    nLen = 0 ;          //  Content length
    uint32_t    nLine ;             //  Line number (of header)
    uint32_t    nTry ;              //  Number of tries
    uint32_t    nCount ;            //  Number of bytes counted off from those expected
    bool        duHast = false ;    //  Have read a chunking directive or have a content len
    bool        bTerm = false ;     //  Terminate chunking (only set upon a 0 value on a line by itself
    hzEcode     sRet = E_OK ;       //  Return code
    char        numBuf[4] ;         //  For HTTP return code</pre>
			<pre>    //  Clear variables
    m_CacheCtrl = (char*) 0 ;
    m_Pragma = (char*) 0 ;
    m_Redirect = (char*) 0 ;
    m_KeepAlive = (char*) 0 ;
    m_ContentType = (char*) 0 ;
    m_XferEncoding = (char*) 0 ;
    m_ContEncoding = (char*) 0 ;
    m_Etag = (char*) 0 ;
    m_bConnection = false ;
    m_nContentLen = 0 ;</pre>
			<pre>    m_Content.Clear() ;
    m_Header.Clear() ;</pre>
			<pre>    //  Garner first the header, from the response
    for (nTry = 0 ; nTry &lt; 4 &amp;&amp; !m_Header.Size() ; nTry++)
    {
        sRet = m_Webhost.Recv(m_buf, nRecv, HZ_MAXPACKET) ;
        if (sRet != E_OK)
        {
            if (sRet == E_NOSOCKET)
                m_Error.Printf("Connection closed by server\n") ;
            else
                m_Error.Printf("Could not recv bytes (nbytes=&#37;d) from page &#37;s (error=&#37;s)\n", nRecv, *url.Resource(), Err2Txt(sRet)) ;
            break ;
        }</pre>
			<pre>        if (!nRecv)
        {
            m_Error.Printf("Got no response, retrying ...\n") ;
            sleep(1) ;
            continue ;
        }</pre>
			<pre>        Z.Append(m_buf, nRecv) ;</pre>
			<pre>        //  Test for presence of \r\n\r\n to mark end of header
        for (zi = Z ; !zi.eof() ; zi++)
        {
            if (*zi != CHAR_CR)
                continue ;</pre>
			<pre>            if (zi == "\r\n\r\n")
            {
                //  Bytes before the header's end are now copied from temp chain Z to the header
                for (ti = Z ; ti != zi ; ti++)
                    m_Header.AddByte(*ti) ;
                zi += 4 ;
                break ;
            }
        }
    }</pre>
			<pre>    if (nTry == 4)
        { m_Error.Printf("Given up!\n") ; return E_NODATA ; }</pre>
			<pre>    if (!m_Header.Size())
        { m_Error.Printf("Given up! Header is empty\n") ; return E_NODATA ; }</pre>
			<pre>    /*
    **  Examine header
    */</pre>
			<pre>    //  First part is the HTTP return code
    memset(numBuf, 0, 4) ;
    hi = m_Header ;
    if (hi == "HTTP/")
    {
        for (hi += 5 ; !hi.eof() &amp;&amp; *hi &gt; CHAR_SPACE ; hi++) ;
    }
    else
    {
        m_Error.Printf("case 1: 1st line of server response should be HTTP/{version} followed by a 3 digit HTML return code\n") ;
        m_Error.Printf("got &#37;d bytes of header namely:-\n[", m_Header.Size()) ;
        m_Error &lt;&lt; m_Header ;
        m_Error &lt;&lt; "]\n" ;</pre>
			<pre>        return E_FORMAT ;
    }</pre>
			<pre>    m_Error &lt;&lt; "Response\n" &lt;&lt; m_Header &lt;&lt; "\n--------------------------\n" ;</pre>
			<pre>    hi++ ;  numBuf[0] = *hi ;
    hi++ ;  numBuf[1] = *hi ;
    hi++ ;  numBuf[2] = *hi ;
    hi++ ;  numBuf[3] = 0 ;</pre>
			<pre>    if (*hi != CHAR_SPACE || !IsDigit(numBuf[0]) || !IsDigit(numBuf[1]) || !IsDigit(numBuf[2])) 
    {
        m_Error.Printf("case 2: 1st line of server response should be HTTP/1.1 followed by a 3 digit HTML return code - got [&#37;s]\n\n", numBuf) ;
        return E_FORMAT ;
    }</pre>
			<pre>    hRet = (HttpRC) atoi(numBuf) ;
    for (hi++ ; !hi.eof() &amp;&amp; *hi != CHAR_NL ; hi++) ;
    hi++ ;</pre>
			<pre>    //  Next part is the header lines
    for (nLine = 1 ; !hi.eof() ; nLine++, hi += nLen)
    {
        nLen = _extractHttpHeader(param, value, hi, false) ;</pre>
			<pre>        if (nLen == 0)
        {
            for (hi++ ; !hi.eof() &amp;&amp; *hi != CHAR_NL ; hi++) ;
            hi++ ;
            m_Error.Printf("Line &#37;d of header rejected (param=&#37;s, value=&#37;s)\n", nLine, *param, *value) ;
            continue ;
        }</pre>
			<pre>        if (param.Equiv("Date"))                { m_Accessed = value ; continue ; }
        if (param.Equiv("Expires"))             { m_Expires = value ; continue ; }
        if (param.Equiv("Last-Modified"))       { m_Modified = value ; continue ; }
        if (param.Equiv("Cache-Control"))       { m_CacheCtrl = value ; continue ; }
        if (param.Equiv("Pragma"))              { m_Pragma = value ; continue ; }
        if (param.Equiv("Location"))            { m_Redirect = value ; continue ; }
        if (param.Equiv("Keep-Alive"))          { m_KeepAlive = value ; continue ; }
        if (param.Equiv("Connection"))          { m_bConnection = value == "close" ? false : true ; continue ; }
        if (param.Equiv("Content-Type"))        { m_ContentType = value ; continue ; }
        if (param.Equiv("Content-Encoding"))    { m_ContEncoding = value ; continue ; }
        if (param.Equiv("Transfer-Encoding"))   { m_XferEncoding = value ; continue ; }
        if (param.Equiv("Alternate-Protocol"))  { m_AltProto = value ; continue ; }
        if (param.Equiv("ETag"))                { m_Etag = value ; continue ; }</pre>
			<pre>        if (param.Equiv("Set-Cookie"))
        {
            //  Get the cookie value
            ti = hi ;</pre>
			<pre>            for (ti += 12 ; !ti.eof() &amp;&amp; *ti != CHAR_EQUAL ; ti++)
                X.AddByte(*ti) ;
            cookie.m_Name = X ;
            X.Clear() ;</pre>
			<pre>            for (ti++ ; !ti.eof() &amp;&amp; *ti != CHAR_SCOLON ; ti++)
                X.AddByte(*ti) ;
            cookie.m_Value = X ;
            //cookie.m_Value.FnameDecode() ;
            X.Clear() ;</pre>
			<pre>            //  Get the path
            for (ti++ ; !ti.eof() &amp;&amp; *ti == CHAR_SPACE ; ti++) ;</pre>
			<pre>            if (ti == "path=")
            {
                for (ti += 5 ; !ti.eof() &amp;&amp; *ti &gt; CHAR_SPACE ; ti++)
                    X.AddByte(*ti) ;
                cookie.m_Path = X ;
                X.Clear() ;
            }</pre>
			<pre>            //  Get special directives (eg HttpOnly)
            for (ti++ ; !ti.eof() &amp;&amp; *ti == CHAR_SPACE ; ti++) ;</pre>
			<pre>            if (ti == "HttpOnly")
                cookie.m_Flags |= COOKIE_HTTPONLY ;</pre>
			<pre>            m_Cookies.Insert(cookie.m_Name, cookie) ;
            cookie.Clear() ;
            continue ;
        }</pre>
			<pre>        if (param.Equiv("Content-Length"))
        {
            if (*value &amp;&amp; value[0])
            {
                duHast = true ;
                m_nContentLen = atoi(*value) ;
            }
            continue ;
        }
    }</pre>
			<pre>    /*
    **  Garner next the body, from the response
    */</pre>
			<pre>    m_Error.Printf("Getting body. xfer=&#37;s, expect=&#37;d, clen=&#37;d\n", *m_XferEncoding, duHast?1:0, m_nContentLen) ;</pre>
			<pre>    if (!duHast)
    {
        //  In chunked encoding the first part (directly after the header and the terminating \r\n\r\n), will be a hex number followed
        //  by a \r\n (on a line by itself). This hex number will mean the size of the following chunk. At the end of the chunk will be
        //  another hex number on a line by itself. Only when this number is zero are we at the end of the page.
        //
        //  While reading the chunk size and chunk, we will most probably, reach the end of the buffer and have to do a read operation
        //  on the socket.</pre>
			<pre>        m_Error.Printf("Encoding is chunked\n") ;
        nExpect = nCount = 0 ;
        bTerm = false ;</pre>
			<pre>        for (; !bTerm ;)
        {
            //  If we are at the end of the buffer, read more
            for (; zi.eof() ;)
            {
                //  If out of data, get more
                m_Webhost.Recv(m_buf, nRecv, HZ_MAXPACKET) ;
                if (nRecv &lt;= 0)
                    break ;</pre>
			<pre>                m_Error.Printf("Read buffer &#37;d bytes\n", nRecv) ;</pre>
			<pre>                Z.Clear() ;
                Z.Append(m_buf, nRecv) ;</pre>
			<pre>                for (zi = Z ; nExpect &amp;&amp; !zi.eof() ; nExpect--, zi++)
                    m_Content.AddByte(*zi) ;</pre>
			<pre>                if (!nExpect)
                    break ;
            }</pre>
			<pre>            if (!nExpect)
            {
                //  We are on the 'chunk size' directive. This will be of the form \r\nXXX\r\n where X is a hex number</pre>
			<pre>                //  Get rid of any \r\n sequences that are beyond the expected chars and before the chunk size directive
                for (; !zi.eof() &amp;&amp; (*zi == CHAR_CR || *zi == CHAR_NL) ; zi++) ;</pre>
			<pre>                if (zi.eof())
                {
                    //  If out of input data, get more
                    m_Webhost.Recv(m_buf, nRecv, HZ_MAXPACKET) ;
                    if (nRecv)
                    {
                        m_Error.Printf("Read extras &#37;d bytes\n", nRecv) ;</pre>
			<pre>                        Z.Clear() ;
                        Z.Append(m_buf, nRecv) ;</pre>
			<pre>                        for (zi = Z ; !zi.eof() &amp;&amp; (*zi == CHAR_CR || *zi == CHAR_NL) ; zi++) ;
                    }
                }</pre>
			<pre>                duHast = false ;</pre>
			<pre>                for (;;)
                {
                    if (zi.eof())
                    {
                        m_Webhost.Recv(m_buf, nRecv, HZ_MAXPACKET) ;
                        if (nRecv)
                        {
                            m_Error.Printf("Read extras &#37;d bytes\n", nRecv) ;</pre>
			<pre>                            Z.Clear() ;
                            Z.Append(m_buf, nRecv) ;
                            zi = Z ;
                        }
                    }</pre>
			<pre>                    //  Read the chunk size</pre>
			<pre>                    if (*zi &gt;= '0' &amp;&amp; *zi &lt;= '9')   { duHast = true ; nExpect *= 16 ; nExpect += (*zi - '0') ;  zi++ ; continue ; }
                    if (*zi &gt;= 'A' &amp;&amp; *zi &lt;= 'F')   { duHast = true ; nExpect *= 16 ; nExpect += (*zi-'A'+10) ; zi++ ; continue ; }
                    if (*zi &gt;= 'a' &amp;&amp; *zi &lt;= 'f')   { duHast = true ; nExpect *= 16 ; nExpect += (*zi-'a'+10) ; zi++ ; continue ; }</pre>
			<pre>                    if (zi == "\r\n")
                        { zi += 2 ; break ; }
                    if (*zi == CHAR_CR)
                        { zi++ ; continue ; }
                    if (*zi == CHAR_NL)
                        { zi++ ; break ; }</pre>
			<pre>                    sRet = E_FORMAT ;
                    m_Error.Printf("Unexpected char (&#37;d) in chunking directive - from page &#37;s\n", *zi, *url.Resource()) ;
                    break ;
                }</pre>
			<pre>                if (!duHast)
                {
                    m_Error.Printf("Chunk notice missing\n") ;
                    sRet = E_FORMAT ;
                }</pre>
			<pre>                if (sRet != E_OK)
                    break ;</pre>
			<pre>                if (nExpect == 0)
                    bTerm = true ;</pre>
			<pre>                //m_Error.Printf("Chunk notice &#37;d bytes\n", nExpect) ;</pre>
			<pre>                if (nExpect)
                {
                    //  Play out rest of buffer but make sure we don't exceed the chunk size
                    for (; !zi.eof() &amp;&amp; nExpect ; zi++, nExpect--)
                        m_Content.AddByte(*zi) ;
                }
                else
                {
                    //  At end of page, just play out rest of buffer
                    for (; !zi.eof() ; zi++) ;
                        //m_Content.AddByte(*zi) ;
                }</pre>
			<pre>                m_Error.Printf("Chunk complete. Expect = &#37;d\n", nExpect) ;
            }
        }
    }
    else
    {
        //  Not chunked - just read until stated Content-Length is reached</pre>
			<pre>        if (m_nContentLen)
        {
            for (; !zi.eof() ; zi++)
                m_Content.AddByte(*zi) ;
            Z.Clear() ;</pre>
			<pre>            for (; m_Content.Size() &lt; m_nContentLen ;)
            {
                sRet = m_Webhost.Recv(m_buf, nRecv, HZ_MAXPACKET) ;
                if (sRet != E_OK)
                {
                    m_Error.Printf("(1) Could not recv bytes from page &#37;s (error=&#37;s)\n", *url.Resource(), Err2Txt(sRet)) ;
                    break ;
                }</pre>
			<pre>                if (nRecv == 0)
                {
                    sRet = m_Webhost.Recv(m_buf, nRecv, HZ_MAXPACKET) ;
                    if (sRet != E_OK)
                    {
                        m_Error.Printf("(2) Could not recv bytes from page &#37;s (error=&#37;s)\n", *url.Resource(), Err2Txt(sRet)) ;
                        break ;
                    }
                }</pre>
			<pre>                if (nRecv &lt;= 0)
                {
                    m_Error.Printf("Breaking after recv &#37;d of &#37;d bytes\n", m_Content.Size(), m_nContentLen) ;
                    break ;
                }</pre>
			<pre>                m_Content.Append(m_buf, nRecv) ;
            }</pre>
			<pre>            if (m_Content.Size() &lt; m_nContentLen)
            {
                if (m_Content.Size() == (m_nContentLen - 4))
                    m_Error.Printf("Allowing 4-byte shortfall\n") ;
                else
                    sRet = E_READFAIL ;
            }
        }
    }</pre>
			<pre>    if (hRet == 200)
    {
        if (!m_Content.Size())
        {
            m_Error.Printf("No content (xfer_encoding=&#37;s content_size=&#37;d)\n", *m_XferEncoding, m_nContentLen) ;
            sRet = E_NODATA ;
        }
    }</pre>
			<pre>    if (sRet == E_OK &amp;&amp; m_ContEncoding)
    {
        //  Must apply appropiate decoding to content</pre>
			<pre>        if (m_ContEncoding == "gzip")
        {
            X = m_Content ;
            m_Content.Clear() ;</pre>
			<pre>            m_Error.Printf("doing gunzip\n") ;
            sRet = Gunzip(m_Content, X) ;</pre>
			<pre>            if (sRet != E_OK)
                m_Error.Printf("Gunzip failed\n") ;
        }
    }</pre>
			<pre>    m_Error.Printf("URL [&#37;s] Header &#37;d bytes, Content &#37;d bytes (&#37;d)\n\n", *url, m_Header.Size(), m_Content.Size(), m_nContentLen) ;
    if (m_Content.Size() &lt; 2000)
    {
        m_Error &lt;&lt; "Content:\n" ;
        m_Error &lt;&lt; m_Content ;
        m_Error &lt;&lt; "------------------------\n" ;
    }</pre>
			<pre>    return sRet ;
}</pre>
			<pre>hzEcode hzHttpClient::TestPage  (hzChain&amp; Z, const hzUrl&amp; url)
{
    //  Get a HTTP page from a website but do not process it in any way. This is for speed testing only.
    //
    //  Note:   The website (server) must already be connected to.
    //          No account is taken of redirected pages.
    //
    //  Arguments:  1)  Z   The chain into which page content is to be received
    //              2)  url The URL of the page
    //
    //  Returns:    E_ARGUMENT  If no URL was specified
    //              E_NODATA    If nothing was recived
    //              E_OK        If the response was recieved without error</pre>
			<pre>    _hzfunc("hzHttpClient::Testpage") ;</pre>
			<pre>    chIter      zi ;            //  To iterate the returned page
    chIter      hi ;            //  To re-iterate lines of interest in the header of the returned page
    chIter      ti ;            //  Temp iterator
    hzChain     X ;             //  Temp buffer
    hzCookie    cookie ;        //  Cookie (drawn from supplied map of cookies)
    hzString    S ;             //  Temp string
    hzString    param ;         //  Header parameter name
    hzString    value ;         //  Header parameter value
    hzString    encoding ;      //  Page content is encoded, eg gzip
    uint32_t    nRecv ;         //  Bytes received
    uint32_t    nTry ;          //  Number of tries
    hzEcode     rc = E_OK ;     //  Return code</pre>
			<pre>    //  Clear buffers
    Z.Clear() ;
    m_Header.Clear() ;
    m_Content.Clear() ;</pre>
			<pre>    if (!url.Domain())
        { m_Error.Printf("TestPage: No host to locate\n") ; return E_ARGUMENT ; }</pre>
			<pre>    /*
    **  Formulate HTTP request
    */</pre>
			<pre>    m_Request.Clear() ;
    if (url.Resource())
        m_Request &lt;&lt; "GET " &lt;&lt;  url.Resource() &lt;&lt; " HTTP/1.1\r\n" ;
    else
        m_Request &lt;&lt; "GET / HTTP/1.1\r\n" ;</pre>
			<pre>    m_Request &lt;&lt;
    "Accept: */*\r\n"
    "Accept-Language: en-gb\r\n" ;</pre>
			<pre>    if (m_AuthBasic)
        m_Request &lt;&lt; "Authorization: Basic " &lt;&lt; m_AuthBasic &lt;&lt; "\r\n" ;</pre>
			<pre>    m_Request &lt;&lt; "User-Agent: HadronZoo/0.8 Linux 2.6.18\r\n" ;
    m_Request &lt;&lt; "Host: " &lt;&lt; url.Domain() &lt;&lt; "\r\n" ;
    if (m_Referer)
        m_Request &lt;&lt; "Referer: " &lt;&lt; m_Referer &lt;&lt; "\r\n" ;
    m_Request &lt;&lt; "Connection: Keep-Alive\r\n\r\n" ;</pre>
			<pre>    /*
    **  Send request
    */</pre>
			<pre>    m_Error &lt;&lt; " Sending [" &lt;&lt; m_Request &lt;&lt; "] to domain " &lt;&lt; url.Domain() &lt;&lt; "\n" ;</pre>
			<pre>    rc = m_Webhost.Send(m_Request) ;
    if (rc != E_OK)
    {
        m_Error.Printf("Could not send request to domain [&#37;s] (error=&#37;s)\n", *url.Domain(), Err2Txt(rc)) ;
        return rc ;
    }</pre>
			<pre>    //  Garner response
    for (nTry = 0 ; nTry &lt; 4 &amp;&amp; !m_Header.Size() ; nTry++)
    {
        rc = m_Webhost.Recv(m_buf, nRecv, HZ_MAXPACKET) ;
        if (rc != E_OK)
        {
            if (rc == E_NOSOCKET)
                m_Error.Printf("Connection closed by server\n") ;
            else
                m_Error.Printf("Could not recv bytes (nbytes=&#37;d) from page &#37;s (error=&#37;s)\n", nRecv, *url.Resource(), Err2Txt(rc)) ;
            break ;
        }</pre>
			<pre>        if (!nRecv)
        {
            m_Error.Printf("Got no response, retrying ...\n") ;
            sleep(1) ;
            continue ;
        }</pre>
			<pre>        Z.Append(m_buf, nRecv) ;
    }</pre>
			<pre>    if (rc != E_OK)
    {
        m_Error.Printf("Could not process response from [&#37;s] (error=&#37;s)\n", *url, Err2Txt(rc)) ;
        return rc ;
    }</pre>
			<pre>    m_Referer = url ;
    return rc ;
}</pre>
			<pre>hzEcode hzHttpClient::_getpage  (HttpRC&amp; hRet, const hzUrl&amp; url, const hzString&amp; etag)
{
    //  Get a HTTP page from a website but do not redirect. This is a support function for GetPage()
    //
    //  Arguments:  1) hRet     The HTTP return code from server
    //              2) url      The URL
    //              3) etag     Entity tag
    //
    //  Returns:    E_ARGUMENT  If the URL is not supplied or no domain specified
    //              E_NOSOCKET  If the external server has closed the connection
    //              E_NODATA    If nothing was recived
    //              E_FORMAT    If the response was malformed
    //              E_OK        If the response was recieved without error</pre>
			<pre>    _hzfunc("hzHttpClient::_getpage") ;</pre>
			<pre>    chIter      zi ;                //  To iterate the returned page
    chIter      hi ;                //  To re-iterate lines of interest in the header of the returned page
    chIter      ti ;                //  Temp iterator
    hzChain     Z ;                 //  Request buffer
    hzChain     X ;                 //  Temp buffer
    hzCookie    cookie ;            //  Cookie (drawn from supplied map of cookies)
    hzString    S ;                 //  Temp string
    hzString    param ;             //  Header parameter name
    hzString    value ;             //  Header parameter value
    hzString    encoding ;          //  Page content is encoded, eg gzip
    uint32_t    x = 0 ;             //  Size of current chunk
    bool        bFirstCookie ;      //  Controls form of cookie header
    hzEcode     rc = E_OK ;</pre>
			<pre>    //  Clear buffers
    m_Header.Clear() ;
    m_Content.Clear() ;</pre>
			<pre>    if (!url.Domain())
        { m_Error.Printf("No host to locate\n") ; return E_ARGUMENT ; }</pre>
			<pre>    /*
    **  Formulate HTTP request
    */</pre>
			<pre>    m_Request.Clear() ;
    if (url.Resource())
        m_Request &lt;&lt; "GET " &lt;&lt;  url.Resource() &lt;&lt; " HTTP/1.1\r\n" ;
    else
        m_Request &lt;&lt; "GET / HTTP/1.1\r\n" ;</pre>
			<pre>    m_Request &lt;&lt; "Accept: */*\r\n" ;
    //m_Request &lt;&lt; "Accept-Encoding: gzip\r\n" ;
    m_Request &lt;&lt; "Accept-Language: en-gb\r\n" ;</pre>
			<pre>    if (m_Cookies.Count())
    {
        m_Request &lt;&lt; "Cookie: " ;
        bFirstCookie = false ;
        for (x = 0 ; x &lt; m_Cookies.Count() ; x++)
        {
            cookie = m_Cookies.GetObj(x) ;</pre>
			<pre>            if (bFirstCookie)
                m_Request &lt;&lt; "; " ;</pre>
			<pre>            m_Request.Printf("&#37;s=&#37;s", *cookie.m_Name, *cookie.m_Value) ;
            bFirstCookie = true ;
        }
        m_Request &lt;&lt; "\r\n" ;
    }</pre>
			<pre>    if (etag)
        m_Request &lt;&lt; "If-None-Match: " &lt;&lt; etag &lt;&lt; "\r\n" ;</pre>
			<pre>    if (m_AuthBasic)
        m_Request &lt;&lt; "Authorization: Basic " &lt;&lt; m_AuthBasic &lt;&lt; "\r\n" ;</pre>
			<pre>    m_Request &lt;&lt; "User-Agent: HadronZoo/0.8 Linux 2.6.18\r\n" ;
    m_Request &lt;&lt; "Host: " &lt;&lt; url.Domain() &lt;&lt; "\r\n" ;
    if (m_Referer)
        m_Request &lt;&lt; "Referer: " &lt;&lt; m_Referer &lt;&lt; "\r\n" ;
    m_Request &lt;&lt; "Connection: keepalive\r\n\r\n" ;</pre>
			<pre>    //  Connect to server
    if (url.IsSSL())
        rc = m_Webhost.ConnectSSL(url.Domain(), url.Port()) ;
    else
        rc = m_Webhost.ConnectStd(url.Domain(), url.Port()) ;
    if (rc != E_OK)
    {
        m_Error.Printf("Could not connect to domain [&#37;s] on port &#37;d (error=&#37;s)\n", *url.Domain(), url.Port(), Err2Txt(rc)) ;
        return rc ;
    }</pre>
			<pre>    //  Send request
    m_Error &lt;&lt; " Sending [" &lt;&lt; m_Request &lt;&lt; "] to domain " &lt;&lt; url.Domain() &lt;&lt; "\n" ;</pre>
			<pre>    rc = m_Webhost.Send(m_Request) ;
    if (rc != E_OK)
    {
        m_Error.Printf("Could not send request to domain [&#37;s] (error=&#37;s)\n", *url.Domain(), Err2Txt(rc)) ;
        return rc ;
    }</pre>
			<pre>    //  Garner response
    rc = _procHttpResponse(hRet, url) ;
    if (rc != E_OK)
    {
        m_Error.Printf("Could not process response from [&#37;s] (error=&#37;s)\n", *url, Err2Txt(rc)) ;
        return rc ;
    }</pre>
			<pre>    m_Referer = url ;
    m_Webhost.Close() ;
    return rc ;
}</pre>
			<pre>hzEcode hzHttpClient::GetPage   (HttpRC&amp; hRet, const hzUrl&amp; url, const hzString&amp; etag)
{
    //  Get a HTTP page from a website. Note that the whole page is retrieved or abandoned before this function returns. Some servers send pages with
    //  the header 'Transfer-Encoding: chunked' instead of the 'Content-Length:' header. This is done because the size of the page is not known at the
    //  start of transmission. The body part of the message is sent in chunks with the chunk size given (in hex on a line by itself) at the start of
    //  each chunk. Because of the existance of the chunked approach, this function has to handle it but it is currently not possible for applications
    //  to take advantage in the intended way. Instead applications calling this function have to wait until it returns with a complete page, however
    //  long!
    //
    //  Note that no assumptions can be made about packets that are sent except that since the connection is TCP, they will be in order. The header
    //  may be comprised of a number of whole packets or it may be that a packet stradles the end of the header and the start of the contents.
    //
    //  Arguments:  1) hRet     HTTP return code from the server.
    //              2) url      The URL of the page to retrieve.
    //              3) etag     Page entity tag (as maintained by hzWebhost instance)
    //
    //  Returns:    E_ARGUMENT  If the URL is not supplied or no domain specified
    //              E_NOSOCKET  If the external server has closed the connection
    //              E_NODATA    If nothing was recived
    //              E_FORMAT    If the response was malformed
    //              E_OK        If the response was recieved without error</pre>
			<pre>    _hzfunc("hzHttpClient::GetPage") ;</pre>
			<pre>    hzUrl       dest ;          //  Actual URL for downloading - may be result of a redirection
    hzString    dom ;           //  This is set first to the called URL's domain but afterwards to any redirected domain
    hzString    etag2 ;         //  Set as null for the benefit of _getpage() in the case of redirection
    hzEcode     rc = E_OK ;     //  Return code</pre>
			<pre>    //  Considered a top-level function so we clear the error chain
    m_Error.Clear() ;
    m_Error.Printf("GETTING PAGE &#37;s\n", *url) ;</pre>
			<pre>    dest = url ;</pre>
			<pre>    m_rtRequest = RealtimeNano() ;
    rc = _getpage(hRet, dest, etag) ;
    m_rtResponse = RealtimeNano() ;</pre>
			<pre>    if (rc != E_OK)
    {
        m_Error.Printf("ABORTED (_getpage failure)\n") ;
        return rc ;
    }</pre>
			<pre>    for (; hRet == HTTPMSG_REDIRECT_PERM || hRet == HTTPMSG_REDIRECT_TEMP ;)
    {
        //Clear() ;</pre>
			<pre>        if (!m_Redirect)
            m_Error.Printf("Oops - no URL to redirect to\n") ;
        else
        {
            if (m_Redirect[0] == CHAR_FWSLASH)
                { dom = dest.Domain() ; dest.SetValue(dom, m_Redirect) ; }
            else
                dest = m_Redirect ;</pre>
			<pre>            m_Error.Printf("redirecting to &#37;s\n", *dest) ;</pre>
			<pre>            rc = _getpage(hRet, dest, etag2) ;</pre>
			<pre>            if (rc != E_OK)
            {
                m_Error.Printf("Redirect FAILED (error=&#37;s)\n", Err2Txt(rc)) ;
                return rc ;
            }
        }
    }</pre>
			<pre>    //  Obtain document type. If HTML then also get links</pre>
			<pre>    m_Error.Printf("Got response &#37;d (size &#37;d bytes)\n", hRet, m_Content.Size()) ;
    return rc ;
}</pre>
			<pre>hzEcode hzHttpClient::_postform (HttpRC&amp; hRet, const hzUrl&amp; url, hzVect&lt;hzString&gt;&amp; hdrs, const hzChain&amp; formData)
{
    //  Support function for hzHttpClient::PostForm(). Compiles the HTTP request and adds the supplied form. The functionality herin would just
    //  appear in PostForm() except for the need to cope with redirection. This requires that the request ...
    //
    //  Arguments:  1)  hRet        Reference to HTTP return code, set by this operation
    //              2)  url         The URL to post the form to
    //              3)  hdrs        Vector of additional HTTP headers
    //              4)  formData    The actual form data
    //
    //  Returns:    E_ARGUMENT  If the URL is not supplied or no domain specified
    //              E_NOSOCKET  If the external server has closed the connection
    //              E_NODATA    If nothing was recived
    //              E_FORMAT    If the response was malformed
    //              E_OK        If the form was posted and the response was recieved without error</pre>
			<pre>    _hzfunc("hzHttpClient::PostForm") ;</pre>
			<pre>    hzCookie    cookie ;        //  Cookie (drawn from supplied map of cookies)
    hzString    dom ;           //  Domain part of URL
    hzString    res ;           //  Resource part of URL
    uint32_t    nPort ;         //  Port (from URL)
    uint32_t    nIndex ;        //  Form data iterator
    bool        bFirstCookie ;  //  Controls form of cookie header
    hzEcode     rc ;            //  Return code</pre>
			<pre>    m_Request.Clear() ;</pre>
			<pre>    dom = url.Domain() ;
    res = url.Resource() ;
    nPort = url.Port() ;</pre>
			<pre>    if (url.IsSSL())
        m_Request.Printf("POST https://&#37;s&#37;s HTTP/1.1\r\n", *dom, *res) ;
    else
        m_Request.Printf("POST http://&#37;s&#37;s HTTP/1.1\r\n", *dom, *res) ;</pre>
			<pre>    m_Request &lt;&lt; "Host: " &lt;&lt; dom &lt;&lt; "\r\n" ;
    m_Request &lt;&lt; "User-Agent: HadronZoo/0.8 Linux 2.6.18\r\n" ;
    m_Request &lt;&lt; "Accept: */*\r\n" ;
    m_Request &lt;&lt; "Accept-Language: en-gb,en;q=0.5\r\n" ;
    //m_Request &lt;&lt; "Accept-Encoding: gzip, deflate\r\n" ;
    m_Request &lt;&lt; "Content-Type: application/x-www-form-urlencoded; charset=UTF-8\r\n" ;</pre>
			<pre>    if (m_Referer)
        m_Request &lt;&lt; "Referer: " &lt;&lt; m_Referer &lt;&lt; "\r\n" ;</pre>
			<pre>    m_Request.Printf("Content-Length: &#37;d\r\n", formData.Size()) ;</pre>
			<pre>    if (m_Cookies.Count())
    {
        m_Request &lt;&lt; "Cookie: " ;
        bFirstCookie = false ;
        for (nIndex = 0 ; nIndex &lt; m_Cookies.Count() ; nIndex++)
        {
            cookie = m_Cookies.GetObj(nIndex) ;</pre>
			<pre>            if (bFirstCookie)
                m_Request &lt;&lt; "; " ;</pre>
			<pre>            m_Request.Printf("&#37;s=&#37;s", *cookie.m_Name, *cookie.m_Value) ;
            bFirstCookie = true ;
        }
        m_Request &lt;&lt; "\r\n" ;
    }</pre>
			<pre>    if (hdrs.Count())
    {
        for (nIndex = 0 ; nIndex &lt; hdrs.Count() ; nIndex++)
            //m_Request &lt;&lt; hdrs.Element(nIndex) ;
            m_Request &lt;&lt; hdrs[nIndex] ;
    }</pre>
			<pre>    m_Request &lt;&lt; "Connection: keep-alive\r\n" ;
    m_Request &lt;&lt; "Pragma: no-cache\r\n" ;
    m_Request &lt;&lt; "Cache-Control: no-cache\r\n\r\n" ;
    m_Request &lt;&lt; formData ;</pre>
			<pre>    //  Connect to server
    if (url.IsSSL())
        rc = m_Webhost.ConnectSSL(dom, nPort) ;
    else
        rc = m_Webhost.ConnectStd(dom, nPort) ;</pre>
			<pre>    if (rc != E_OK)
    {
        m_Error.Printf("Could not connect to &#37;s on port &#37;d\n", *dom, nPort) ;
        return rc ;
    }</pre>
			<pre>    m_Error.Printf("Connected to &#37;s on port &#37;d\n[\n", *dom, nPort) ;
    m_Error &lt;&lt; m_Request ;
    m_Error &lt;&lt; "\n-------------------------\n\n" ;</pre>
			<pre>    rc = m_Webhost.Send(m_Request) ;
    if (rc != E_OK)
        m_Error.Printf("Could not send request (error=&#37;s)\n", Err2Txt(rc)) ;
    else
    {
        rc = _procHttpResponse(hRet, url) ;
        if (rc != E_OK)
            m_Error.Printf("Could not get response (error=&#37;s)\n", Err2Txt(rc)) ;
    }</pre>
			<pre>    return rc ;
}</pre>
			<pre>hzEcode hzHttpClient::PostForm  (HttpRC&amp; hRet, const hzUrl&amp; url, hzVect&lt;hzString&gt;&amp; hdrs, const hzList&lt;hzPair&gt;&amp; formData)
{
    //  Post a form to the server. Note that this will normally result in a HTTP response. This response must be processed in the same
    //  way (ie values are extracted from lines in the HTTP header).
    //
    //  Arguments:  1) hRet     HTTP return code
    //              2) url      The URL
    //              3) hdrs     Lines in HTTP header
    //              4) formData The form data to be submitted
    //
    //  Returns:    E_ARGUMENT  If the URL is not supplied or no domain specified
    //              E_NOSOCKET  If the external server has closed the connection
    //              E_NODATA    If nothing was recived
    //              E_FORMAT    If the response was malformed
    //              E_OK        If the form was posted and the response was recieved without error</pre>
			<pre>    _hzfunc("hzHttpClient::PostForm") ;</pre>
			<pre>    hzList&lt;hzPair&gt;::Iter    iD ;    //  Form data iterator</pre>
			<pre>    hzChain     F ;                 //  Form data in submissible form
    hzCookie    cookie ;            //  Cookie (drawn from supplied map of cookies)
    hzPair      P ;                 //  Form data field
    hzUrl       dest ;              //  Url may change due to redirection
    hzString    dom ;               //  Domain part of URL
    hzString    res ;               //  Resource part of URL
    hzString    etag ;              //  Temp string for reading form data
    hzEcode     rc ;                //  Return code</pre>
			<pre>    //  Considered a top-level function so we clear the error chain
    m_Error.Clear() ;
    m_Error.Printf("POSTING FORM &#37;s\n", *url) ;</pre>
			<pre>    //Clear() ;
    m_Header.Clear() ;
    m_Content.Clear() ;
    m_Request.Clear() ;</pre>
			<pre>    if (!formData.Count())
        return E_NODATA ;</pre>
			<pre>    for (iD = formData ; iD.Valid() ; iD++)
    {
        P = iD.Element() ;</pre>
			<pre>        if (F.Size())
            F.AddByte(CHAR_AMPSAND) ;</pre>
			<pre>        F &lt;&lt; P.name ;
        F.AddByte(CHAR_EQUAL) ;
        P.value.UrlEncode() ;
        F &lt;&lt; P.value ;
    }</pre>
			<pre>    dest = url ;</pre>
			<pre>    rc = _postform(hRet, dest, hdrs, F) ;
    if (rc != E_OK)
    {
        m_Error.Printf("FAILED (error=&#37;s)\n", Err2Txt(rc)) ;
        return rc ;
    }</pre>
			<pre>    for (; hRet == HTTPMSG_REDIRECT_PERM || hRet == HTTPMSG_REDIRECT_TEMP ;)
    {
        if (!m_Redirect)
            m_Error.Printf("Oops - no URL to redirect to\n") ;
        else
        {
            if (m_Redirect[0] == CHAR_FWSLASH)
                { dom = dest.Domain() ; dest.SetValue(dom, m_Redirect) ; }
            else
                dest = m_Redirect ;</pre>
			<pre>            m_Error.Printf("redirecting to &#37;s\n", *dest) ;</pre>
			<pre>            etag = (char*) 0 ;
            rc = _getpage(hRet, dest, etag) ;
            if (rc != E_OK)
            {
                m_Error.Printf("Redirect FAILED (error=&#37;s)\n", Err2Txt(rc)) ;
                break ;
            }
        }
    }</pre>
			<pre>    return rc ;
}</pre>
			<pre>hzEcode hzHttpClient::PostAjax  (HttpRC&amp; hRet, const hzUrl&amp; url, hzVect&lt;hzString&gt;&amp; hdrs, const hzList&lt;hzPair&gt;&amp; formData)
{
    //  Post a form to the server but do not seek a HTTP response.
    //
    //  Arguments:  1)  hRet        HTTP return code
    //              2)  url         The URL
    //              3)  hdrs        Lines in HTTP header
    //              4)  formData    The form data to be submitted
    //
    //  Returns:    E_ARGUMENT  If the URL is not supplied or no domain specified
    //              E_NOSOCKET  If the external server has closed the connection
    //              E_NODATA    If nothing was recived
    //              E_FORMAT    If the response was malformed
    //              E_OK        If the AJAX request was sent and the response was recieved without error</pre>
			<pre>    _hzfunc("hzHttpClient::PostAjax") ;</pre>
			<pre>    hzList&lt;hzPair&gt;::Iter    iD ;    //  Form data iterator</pre>
			<pre>    hzChain     F ;                 //  Form data in submissible form
    hzCookie    cookie ;            //  Cookie (drawn from supplied map of cookies)
    hzPair      P ;                 //  Form data field
    hzString    dom ;               //  Domain part of URL
    hzString    res ;               //  Resource part of URL
    hzString    S ;                 //  Temp string for reading form data
    uint32_t    nPort ;             //  Port (from URL)
    uint32_t    nIndex ;            //  Form data iterator
    hzEcode     rc ;                //  Return code</pre>
			<pre>    //Clear() ;
    m_Header.Clear() ;
    m_Content.Clear() ;
    m_Request.Clear() ;</pre>
			<pre>    if (!formData.Count())
        return E_NODATA ;</pre>
			<pre>    for (iD = formData ; iD.Valid() ; iD++)
    {
        P = iD.Element() ;</pre>
			<pre>        if (F.Size())
            F.AddByte(CHAR_AMPSAND) ;</pre>
			<pre>        F &lt;&lt; P.name ;
        F.AddByte(CHAR_EQUAL) ;
        F &lt;&lt; P.value ;
    }</pre>
			<pre>    dom = url.Domain() ;
    res = url.Resource() ;
    nPort = url.Port() ;</pre>
			<pre>    if (url.IsSSL())
        m_Request.Printf("POST https://&#37;s&#37;s HTTP/1.1\r\n", *dom, *res) ;
    else
        m_Request.Printf("POST http://&#37;s&#37;s HTTP/1.1\r\n", *dom, *res) ;</pre>
			<pre>    //m_Request &lt;&lt; "POST " &lt;&lt; "http://" &lt;&lt; dom &lt;&lt; res &lt;&lt; " HTTP/1.1\r\n" ;
    m_Request &lt;&lt; "Accept: text/*\r\n" ;
    m_Request &lt;&lt; "Accept-Language: en-gb\r\n" ;
    //m_Request &lt;&lt; "Accept-Encoding:\r\n" ;
    //m_Request &lt;&lt; "Accept-Encoding: gzip, deflate\r\n" ;</pre>
			<pre>    for (nIndex = 0 ; nIndex &lt; m_Cookies.Count() ; nIndex++)
    {
        cookie = m_Cookies.GetObj(nIndex) ;
        if (cookie.m_Flags &amp; COOKIE_HTTPONLY)
            continue ;</pre>
			<pre>        m_Request.Printf("Cookie: &#37;s=&#37;s\r\n", *cookie.m_Name, *cookie.m_Value) ;
    }</pre>
			<pre>    //m_Request &lt;&lt; "User-Agent: HadronZoo/0.8 (compatible; MSIE 6.0;)\r\n" ;
    m_Request &lt;&lt; "User-Agent: HadronZoo/0.8 Linux 2.6.18\r\n" ;
    m_Request.Printf("Content-Length: &#37;d\r\n", F.Size()) ;
    m_Request &lt;&lt; "Host: " &lt;&lt; dom &lt;&lt; "\r\n" ;</pre>
			<pre>    if (hdrs.Count())
    {
        for (nIndex = 0 ; nIndex &lt; hdrs.Count() ; nIndex++)
            //m_Request &lt;&lt; hdrs.Element(nIndex) ;
            m_Request &lt;&lt; hdrs[nIndex] ;
    }</pre>
			<pre>    m_Request &lt;&lt; "Connection: close\r\n\r\n" ;
    m_Request &lt;&lt; F ;</pre>
			<pre>    S = m_Request ;
    threadLog("Sending [\n&#37;s]\n", *S) ;</pre>
			<pre>    //  Connect to server
    if (url.IsSSL())
        rc = m_Webhost.ConnectSSL(dom, nPort) ;
    else
        rc = m_Webhost.ConnectStd(dom, nPort) ;</pre>
			<pre>    if (rc != E_OK)
        return rc ;</pre>
			<pre>    //  Send request
    rc = m_Webhost.Send(m_Request) ;
    return rc ;
}</pre>
			<pre>/*
**  Section 2, Subsect-A:   hzWebhost private functions
*/</pre>
			<pre>void    hzWebhost::_clear   (void)
{
    //  Clears the hzWebhost for shutdown or for re-initialization for syncing another website
    //
    //  Arguments:  None
    //  Returns:    None</pre>
			<pre>    _hzfunc("hzWebhost::_clear") ;</pre>
			<pre>    hzDocMeta*  pMark ;     //  Document info
    uint32_t    nIndex ;    //  History itterator</pre>
			<pre>    m_Offsite.Clear() ;
    m_Domains.Clear() ;
    m_Roots.Clear() ;
    m_Feeds.Clear() ;
    m_Emails.Clear() ;
    m_Banned.Clear() ;</pre>
			<pre>    for (nIndex = 0 ; nIndex &lt; m_mapHist.Count() ; nIndex++)
    {
        pMark = m_mapHist.GetObj(nIndex) ;
        delete pMark ;
    }</pre>
			<pre>    m_mapHist.Clear() ;
    m_vecHist.Clear() ;
}</pre>
			<pre>hzEcode hzWebhost::_loadstatus  (void)
{
    //  Load visit status file (called upon startup). This way we do not re-fetch pages that have already been loaded unless they are out of date.
    //
    //  Arguments:  None
    //
    //  Returns:    E_NOINIT    If the repository for the webhost has not previously been defined
    //              E_OPENFAIL  If the visit status file could not be opened
    //              E_OK        If the visit status file is read in or was empty</pre>
			<pre>    _hzfunc("hzWebhost::_loadstatus") ;</pre>
			<pre>    hzDocXml        X ;             //  The manifest as XML document
    hzWebCMD        wc ;            //  Current web command
    hzAttrset       ai ;            //  Attribute itterator
    hzDocMeta*      pMark ;         //  Link meta data
    hzXmlNode*      pRoot ;         //  Root XML node
    hzXmlNode*      pN1 ;           //  Level 1 XML node
    hzXmlNode*      pN2 ;           //  Level 2 XML node
    hzXmlNode*      pN3 ;           //  Level 3 XML node
    _pageList*      pgl ;           //  List of lists of pages
    hzPair          p ;             //  Pair from formdata
    hzUrl           url ;           //  in-page link
    hzString        vs_fname ;      //  Vistation status file
    hzString        anam ;          //  Attribute name
    hzString        aval ;          //  Attribute value
    hzEcode         rc = E_OK ;     //  Return</pre>
			<pre>    m_mapHist.Clear() ;
    m_vecHist.Clear() ;</pre>
			<pre>    if (!m_Repos)
        return hzerr(E_NOINIT, "No repository specified. Cannot determine data state") ;</pre>
			<pre>    vs_fname = m_Repos + "/manifest" ;</pre>
			<pre>    rc = TestFile(vs_fname) ;
    if (rc == E_NOTFOUND)
        { threadLog("No status file found. Repository in virgin state\n") ; return E_OK ; }</pre>
			<pre>    if (rc != E_OK)
        { threadLog("manifest file lookup error (&#37;s)\n", Err2Txt(rc)) ; return rc ; }</pre>
			<pre>    rc = X.Load(vs_fname) ;
    if (rc != E_OK)
        { threadLog("Could not open Visit Status File &#37;s for writing\n", *vs_fname) ; return E_OPENFAIL ; }</pre>
			<pre>    pRoot = X.GetRoot() ;</pre>
			<pre>    for (pN1 = pRoot-&gt;GetFirstChild() ; rc == E_OK &amp;&amp; pN1 ; pN1 = pN1-&gt;Sibling())
    {
        if (pN1-&gt;NameEQ("pagelists"))
        {
            for (pN2 = pN1-&gt;GetFirstChild() ; rc == E_OK &amp;&amp; pN2 ; pN2 = pN2-&gt;Sibling())
            {
                if (pN2-&gt;NameEQ("pagelist"))
                {
                    ai = pN2 ;</pre>
			<pre>                    if (ai.Valid())
                    {
                        anam = ai.Name() ; aval = ai.Value() ;</pre>
			<pre>                        pgl = new _pageList() ;</pre>
			<pre>                        if (anam == "name")
                            pgl-&gt;name = aval ;
                    }</pre>
			<pre>                    for (pN3 = pN2-&gt;GetFirstChild() ; rc == E_OK &amp;&amp; pN3 ; pN3 = pN3-&gt;Sibling())
                    {
                        if (pN3-&gt;NameEQ("page"))
                        {
                            ai = pN3 ;
                            if (ai.Valid())
                            {
                                anam = ai.Name() ; aval = ai.Value() ;</pre>
			<pre>                                if (anam == "url")
                                    pgl-&gt;links.Add(aval) ;
                            }
                        }
                    }
                }
            }
        }</pre>
			<pre>        if (pN1-&gt;NameEQ("commands"))
        {
            ai = pN1 ;
            if (ai.Valid())
            {
                anam = ai.Name() ; ai.Value() ;</pre>
			<pre>                if (anam == "sofar")
                    m_Sofar = atoi(*aval) ;
            }
        
            for (pN2 = pN1-&gt;GetFirstChild() ; rc == E_OK &amp;&amp; pN2 ; pN2 = pN2-&gt;Sibling())
            {
                if (pN2-&gt;NameEQ("command"))
                    continue ;</pre>
			<pre>                for (ai = pN2 ; ai.Valid() ; ai.Advance())
                {
                    anam = ai.Name() ; ai.Value() ;</pre>
			<pre>                    if      (anam == "url")     wc.m_Url = aval ;
                    else if (anam == "crit")    wc.m_Crit = aval ;
                    else if (anam == "slct")    wc.m_Slct = aval ;
                    else if (anam == "inps")    wc.m_Inputs = aval ;
                    else if (anam == "outs")    wc.m_Output = aval ;
                }</pre>
			<pre>                pN3 = pN2-&gt;GetFirstChild() ;
                if (pN3 &amp;&amp; pN3-&gt;NameEQ("form"))
                {
                    for (ai = pN3 ; ai.Valid() ; ai.Advance())
                    {
                        anam = ai.Name() ; ai.Value() ;</pre>
			<pre>                        p.name = anam ;
                        p.value = aval ;
                        wc.m_Formdata.Add(p) ;
                    }
                }
            }
        }</pre>
			<pre>        if (pN1-&gt;NameEQ("history"))
        {
            for (pN2 = pN1-&gt;GetFirstChild() ; rc == E_OK &amp;&amp; pN2 ; pN2 = pN2-&gt;Sibling())
            {
                if (pN2-&gt;NameEQ("page"))
                {
                    pMark = new hzDocMeta() ;</pre>
			<pre>                    for (ai = pN2 ; ai.Valid() ; ai.Advance())
                    {
                        anam = ai.Name() ; ai.Value() ;</pre>
			<pre>                        if      (anam == "urlReq")  pMark-&gt;m_urlReq = aval ;
                        else if (anam == "urlAct")  pMark-&gt;m_urlAct = aval ;
                        else if (anam == "title")   pMark-&gt;m_Title = aval ;
                        else if (anam == "desc")    pMark-&gt;m_Desc = aval ;
                        else if (anam == "fname")   pMark-&gt;m_Filename = aval ;
                        else if (anam == "etag")    pMark-&gt;m_Etag = aval ;
                        else if (anam == "dtDnl")   pMark-&gt;m_Download.SetDateTime(aval) ;
                        else if (anam == "dtMod")   pMark-&gt;m_Modified.SetDateTime(aval) ;
                        else if (anam == "dtExp")   pMark-&gt;m_Expires.SetDateTime(aval) ;
                        else if (anam == "type")    pMark-&gt;m_Doctype = (hzDoctype) atoi(*aval) ;
                        else
                            threadLog("Unexpected page attribute &#37;s=&#37;s\n", *anam, *aval) ;
                    }</pre>
			<pre>                    m_vecHist.Add(pMark) ;
                }
            }
        }
    }</pre>
			<pre>    return rc ;
}</pre>
			<pre>hzEcode hzWebhost::_savestatus  (void)
{
    //  Write out visit status file. This keeps a record of what URL's have already been downloaded and to which files, and the expiry
    //  date (after which the page will have to be fetched again)
    //
    //  Arguments:  None
    //
    //  Returns:    E_NOINIT    If the repository for the webhost has not previously been defined
    //              E_OPENFAIL  If the visit status file could not be opened
    //              E_OK        If the visit status file is read in or was empty</pre>
			<pre>    _hzfunc("hzWebhost::_status") ;</pre>
			<pre>    hzList&lt;hzUrl&gt;::Iter     li ;    //  Links iterator (for pagelists)
    hzList&lt;hzWebCMD&gt;::Iter  ci ;    //  Iterator for web commands
    hzList&lt;hzPair&gt;::Iter    pi ;    //  Iterator for web commands</pre>
			<pre>    ofstream    os ;                //  Output stream
    hzWebCMD    wc ;                //  Current web command
    hzCookie    cook ;              //  Cookie instance
    hzChain     Z ;                 //  For building status file
    _pageList*  pgl ;               //  Pagelist
    hzDocMeta*  pMark ;             //  Document meta data
    hzPair      p ;                 //  Pair from formdata
    hzString    vs_fname ;          //  Vistation status file
    hzString    S ;                 //  Tmp string
    hzUrl       url ;               //  Link
    uint32_t    nIndex ;            //  Links iterator
    uint32_t    x ;                 //  Links iterator
    hzEcode     rc = E_OK ;         //  Return</pre>
			<pre>    if (!m_Repos)
        return hzerr(E_NOINIT, "No repository specified. Cannot determine data state") ;</pre>
			<pre>    vs_fname = m_Repos + "/manifest" ;
    os.open(*vs_fname) ;
    if (os.fail())
    {
        threadLog("Could not open Visit Status File &#37;s for writing\n", *vs_fname) ;
        return E_OPENFAIL ;
    }</pre>
			<pre>    threadLog("savestat: case 1\n") ;</pre>
			<pre>    if (m_Cookies.Count())
    {
        Z &lt;&lt; "&lt;cookies&gt;\n" ;
        for (x = 0 ; x &lt; m_Cookies.Count() ; x++)
        {
            cook = m_Cookies.GetObj(x) ;
            Z.Printf("\t&lt;cookie sig=\"&#37;s\" name=\"&#37;s\" path=\"&#37;s\" flg=\"&#37;d\" expire=\"&#37;s\"/&gt;\n",
                *cook.m_Value, *cook.m_Name, *cook.m_Path, cook.m_Flags, *cook.m_Expires) ;
        }
        Z &lt;&lt; "&lt;/cookies&gt;\n" ;
    }</pre>
			<pre>    threadLog("savestat: case 2\n") ;</pre>
			<pre>    if (m_Pagelists.Count())
    {
        Z &lt;&lt; "&lt;pagelists&gt;\n" ;</pre>
			<pre>        for (x = 0 ; x &lt; m_Pagelists.Count() ; x++)
        {
            pgl = m_Pagelists.GetObj(x) ;</pre>
			<pre>            Z.Printf("\t&lt;pagelist name=\"&#37;s\"&gt;\n", *pgl-&gt;name) ;</pre>
			<pre>            if (pgl-&gt;links.Count())
            {
                for (li = pgl-&gt;links ; li.Valid() ; li++)
                {
                    url = li.Element() ;
                    Z.Printf("\t\t&lt;page url=\"&#37;s\"&gt;\n", *url.Whole()) ;
                }
            }</pre>
			<pre>            Z &lt;&lt; "\t&lt;/pagelist&gt;\n" ;
        }
        Z &lt;&lt; "&lt;/pagelists&gt;\n" ;
    }
    threadLog("savestat: case 3\n") ;</pre>
			<pre>    /*
    **  Do command list and status
    */</pre>
			<pre>    Z.Printf("&lt;commands sofar=\"&#37;d\"&gt;\n", m_Sofar) ;
    for (ci = m_Commands ; ci.Valid() ; ci++)
    {
        wc = ci.Element() ;</pre>
			<pre>        if (wc.m_Cmd == WEBCMD_LOAD_PAGE)   Z &lt;&lt; "\t&lt;command type=^WEBCMD_LOAD_PAGE^" ;
        if (wc.m_Cmd == WEBCMD_LOAD_LIST)   Z &lt;&lt; "\t&lt;command type=^WEBCMD_LOAD_LIST^" ;
        if (wc.m_Cmd == WEBCMD_SLCT_PAGE)   Z &lt;&lt; "\t&lt;command type=^WEBCMD_SLCT_PAGE^" ;
        if (wc.m_Cmd == WEBCMD_SLCT_LIST)   Z &lt;&lt; "\t&lt;command type=^WEBCMD_SLCT_LIST^" ;
        if (wc.m_Cmd == WEBCMD_RGET)        Z &lt;&lt; "\t&lt;command type=^WEBCMD_RGET^" ;
        if (wc.m_Cmd == WEBCMD_POST)        Z &lt;&lt; "\t&lt;command type=^WEBCMD_POST^" ;
        if (wc.m_Cmd == WEBCMD_RSS)         Z &lt;&lt; "\t&lt;command type=^WEBCMD_RSS^" ;</pre>
			<pre>        if (wc.m_Url)       Z.Printf(" url=\"&#37;s\"", *wc.m_Url) ; 
        if (wc.m_Crit)      Z.Printf(" crit=\"&#37;s\"", *wc.m_Crit) ;
        if (wc.m_Slct)      Z.Printf(" slct=\"&#37;s\"", *wc.m_Slct) ;
        if (wc.m_Inputs)    Z.Printf(" inps=\"&#37;s\"", *wc.m_Inputs) ;
        if (wc.m_Output)    Z.Printf(" outs=\"&#37;s\"", *wc.m_Output) ;</pre>
			<pre>        if (!wc.m_Formdata.Count())
            Z &lt;&lt; " /&gt;\n" ;
        else
        {
            Z &lt;&lt; "&gt;\n" ;
            Z &lt;&lt; "\t\t&lt;form " ;</pre>
			<pre>            for (pi = wc.m_Formdata ; pi.Valid() ; pi++)
            {
                p = pi.Element() ;
    
                Z.Printf(" &#37;s=\"&#37;s\"", *p.name, *p.value) ;
            }</pre>
			<pre>            Z &lt;&lt; " /&gt;\n" ;
            Z &lt;&lt; "\t&lt;/command&gt;\n" ;
        }</pre>
			<pre>    }
    Z &lt;&lt; "&lt;/commands&gt;\n" ;
    threadLog("savestat: case 4\n") ;</pre>
			<pre>    /*
    **  Do History
    */</pre>
			<pre>    Z &lt;&lt; "&lt;history&gt;\n" ;
    for (nIndex = 0 ; nIndex &lt; m_vecHist.Count() ; nIndex++)
    {
        pMark = m_vecHist[nIndex] ;</pre>
			<pre>        Z.Printf("\t&lt;webpage id=\"&#37;d\" type=\"&#37;d\"", pMark-&gt;m_Id, (uint32_t) pMark-&gt;m_Doctype) ;</pre>
			<pre>        if (pMark-&gt;m_urlReq)            Z.Printf("\n\t\turlReq=\"&#37;s\"", *pMark-&gt;m_urlReq) ;
        if (pMark-&gt;m_urlAct)            Z.Printf("\n\t\turlAct=\"&#37;s\"", *pMark-&gt;m_urlAct) ;
        if (pMark-&gt;m_Title)             Z.Printf("\n\t\ttitle=\"&#37;s\"",  *pMark-&gt;m_Title) ;
        if (pMark-&gt;m_Desc)              Z.Printf("\n\t\tdesc=\"&#37;s\"",   *pMark-&gt;m_Desc) ;
        if (pMark-&gt;m_Filename)          Z.Printf("\n\t\tfname=\"&#37;s\"",  *pMark-&gt;m_Filename) ;
        if (pMark-&gt;m_Etag)              Z.Printf("\n\t\e-tag=\"&#37;s\"",   *pMark-&gt;m_Etag) ;
        if (pMark-&gt;m_Download.IsSet())  Z.Printf("\n\t\tdtDnl=\"&#37;s\"",  *pMark-&gt;m_Download) ;
        if (pMark-&gt;m_Modified.IsSet())  Z.Printf("\n\t\tdtMod=\"&#37;s\"",  *pMark-&gt;m_Modified) ;
        if (pMark-&gt;m_Expires.IsSet())   Z.Printf("\n\t\tdtExp=\"&#37;s\"",  *pMark-&gt;m_Expires) ;</pre>
			<pre>        Z &lt;&lt; "/&gt;\n" ;
    }
    Z &lt;&lt; "&lt;/history&gt;\n" ;
    threadLog("savestat: case 5\n") ;</pre>
			<pre>    if (m_Trace.Size())
    {
        Z &lt;&lt; "&lt;trace&gt;\n" ;
        Z &lt;&lt; m_Trace ;
        Z &lt;&lt; "&lt;/trace&gt;\n" ;
    }
    threadLog("savestat: case 6\n") ;</pre>
			<pre>    //Rat4Html(Z) ;
    os &lt;&lt; Z ;
    os.close() ;</pre>
			<pre>    return rc ;
}</pre>
			<pre>hzEcode hzWebhost::AddRoot (hzUrl&amp; url, hzString&amp; criteria)
{
    //  Adds a root URL for the target website
    //
    //  Arguments:  1)  url         The root URL of the website
    //              2)  criteria    The resource we want as the entry point
    //
    //  Returns:    E_ARGUMENT  If the URL is not specified
    //              E_OK        If the root is added</pre>
			<pre>    _hzfunc("hzWebhost::AddRoot") ;</pre>
			<pre>    hzPair  X ;     //  URL/Search critiria pair</pre>
			<pre>    if (!url)
        return E_ARGUMENT ;</pre>
			<pre>    X.name = url.Whole() ;
    X.value = criteria ;
    m_Roots.Add(X) ;</pre>
			<pre>    return E_OK ;
}</pre>
			<pre>hzEcode hzWebhost::AddRSS  (hzUrl&amp; rss)
{
    //  Adds an RSS feed URL for the target website
    //
    //  Arguments:  1)  rss     The URL of the website's RSS feed
    //
    //  Returns:    E_ARGUMENT  If the URL is not specified
    //              E_OK        If the root is added</pre>
			<pre>    _hzfunc("hzWebhost::AddRSS") ;</pre>
			<pre>    m_Feeds.Add(rss) ;
    return E_OK ;
}</pre>
			<pre>#define SITEPARAM_USE_FIRST_COOKIE  0x01    //  Use the first cookie provided for the rest of session
#define SITEPARAM_USE_LOGIN_COOKIE  0x02    //  Use the cookie in the login response for the rest of session</pre>
			<pre>hzEcode hzWebhost::AuthBasic    (const char* username, const char* password)
{
    //  Sets the basic authentication string for the website (if the site uses this method). Once set all requests to the target website will be
    //  submitted with this string in the HTTP header.
    //
    //  Arguments:  1)  username    The user account username
    //              2)  password    The user account password
    //
    //  Returns:    E_ARGUMENT  If either the username or password is not supplied
    //              E_OK        If the root is added</pre>
			<pre>    _hzfunc("hzWebhost::AuthBasic") ;</pre>
			<pre>    hzChain Enc ;           //  The encrypted sequence
    hzChain Raw ;           //  The raw sequence</pre>
			<pre>    if (!username || !username[0] || !password || !password[0])
    {
        threadLog("Must supply both a username and password\n") ;
        return E_ARGUMENT ;
    }</pre>
			<pre>    Raw &lt;&lt; username ;
    Raw.AddByte(CHAR_COLON) ;
    Raw &lt;&lt; password ;</pre>
			<pre>    Base64Encode(Enc, Raw) ;
    HC.m_AuthBasic = m_AuthBasic = Enc ;</pre>
			<pre>    return E_OK ;
}</pre>
			<pre>hzEcode hzWebhost::Login    (void)
{
    //  Execute the login process. This is always a case of downloading each page listed in m_Authspteps (if any) and then posting to the URL given in m_Authpage (if provided) with
    //  the name-value pairs listed in in m_Authform.
    //
    //  Arguments:  None
    //
    //  Returns:    E_NOTFOUND  If the login page was not located
    //              E_WRITEFAIL If the form recieved was not written to the repository
    //              E_OK        If the login form was posted (not the same thing as a successful login)</pre>
			<pre>    _hzfunc("hzWebhost::Login") ;</pre>
			<pre>    hzList&lt;hzUrl&gt;::Iter     ias ;       //  Iterator for URLs in m_Authsteps
    hzList&lt;hzPair&gt;::Iter    inv ;       //  Iterator for name-value pairs in m_Authform
    hzVect&lt;hzString&gt;        hdrs ;      //  Extra headers, needed for submit form (not generally applicable)</pre>
			<pre>    ofstream    os ;                    //  For exporting to file
    hzDocument* pDoc ;                  //  Downoaded document
    hzPair      P ;                     //  Name-value pair instance
    hzUrl       url ;                   //  URL instance
    hzString    S ;                     //  Temp string
    hzString    etag ;                  //  For GetPage() call
    HttpRC      hRet ;                  //  HTML return code
    bool        bAuthpage = false ;     //  Set to true if the login form (if used) is correctly listed in m_Authsteps
    hzEcode     rc = E_OK ;             //  Return code</pre>
			<pre>    threadLog("Starting Login Sequence\n") ;</pre>
			<pre>    //  Werify we have to log on and if so, that the parameters are in place to support the login
    if (m_Opflags &amp; HZ_WEBSYNC_AUTH_BASIC)
        { threadLog("Basis Authentication. No login process required\n") ; return E_OK ; }</pre>
			<pre>    if (!(m_Opflags &amp; (HZ_WEBSYNC_AUTH_POST | HZ_WEBSYNC_AUTH_GET)))
    {
        threadLog("No Authentication method\n") ;</pre>
			<pre>        if (!m_Authsteps.Count() &amp;&amp; !m_Authform.Count())
            { threadLog("No Authentication steps or form submission. No login process required\n") ; return E_OK ; }
    }</pre>
			<pre>    //  Download all pages listed in m_Authsteps (note the download must happen even if the page is in the history because we need the cookies)
    for (ias = m_Authsteps ; rc == E_OK &amp;&amp; ias.Valid() ; ias++)
    {
        url = ias.Element() ;
        if (url == m_Authpage)
            bAuthpage = true ;</pre>
			<pre>        rc = HC.GetPage(hRet, url, etag) ;
        if (rc != E_OK)
            { rc = E_NOTFOUND ; threadLog("Could not download &#37;s\n", *url) ; }
    }</pre>
			<pre>    if (rc != E_OK)
        return rc ;</pre>
			<pre>    if (!bAuthpage &amp;&amp; m_Authpage)
    {
        pDoc = Download(m_Authpage) ;
        if (!pDoc)
            { threadLog("Could not download &#37;s\n", *url) ; return E_NOTFOUND ; }
    }</pre>
			<pre>    //  Now if there is a login form, post this now
    if (m_Authform.Count())
    {
        //  Write out login form to file
        if (m_Repos)
        {
            S = m_Repos + "/login_form" ;</pre>
			<pre>            os.open(*S) ;
            if (os.fail())
                { threadLog("Cannot write out header file &#37;s\n", *S) ; return E_WRITEFAIL ; }</pre>
			<pre>            os &lt;&lt; HC.m_Header ;
            os &lt;&lt; "\r\n\r\n" ;
            os &lt;&lt; HC.m_Content ;
            os.close() ;
            os.clear() ;
        }</pre>
			<pre>        //  Post the form
        rc = HC.PostForm(hRet, m_Authpage, hdrs, m_Authform) ;
        if (rc != E_OK)
            { threadLog("Could not post form to &#37;s\n", *m_Authpage) ; return rc ; }</pre>
			<pre>        //  Write out the login response
        if (m_Repos)
        {
            S = m_Repos + "/login_response" ;
    
            os.open(*S) ;
            if (os.fail())
                { threadLog("Cannot write out header file &#37;s\n", *S) ; return E_WRITEFAIL ; }</pre>
			<pre>            os &lt;&lt; HC.m_Header ;
            os &lt;&lt; "\r\n\r\n" ;
            os &lt;&lt; HC.m_Content ;
            os.close() ;
        }
    }</pre>
			<pre>    return rc ;
}</pre>
			<pre>void    hzWebhost::Logout   (void)
{
    //  Execute the logout process.
    //
    //  Arguments:  None
    //  Returns:    None</pre>
			<pre>    _hzfunc("hzWebhost::Logout") ;</pre>
			<pre>    //  STUB
}</pre>
			<pre>hzEcode hzWebhost::Sync (void)
{
    //  Run the series of hzWebCMD directives to sync key pages from a website to a repository
    //
    //  Arguments:  None
    //
    //  Returns:    E_NOINIT    If no repository, no domain or no homepage has been specified
    //              E_NOTFOUND  If the login page was not located
    //              E_WRITEFAIL If the login form recieved was not written to the repository
    //              E_OPENFAIL  If the visit status file could not be opened
    //              E_OK        If the scrape operation was successful</pre>
			<pre>    _hzfunc("hzWebhost::Sync") ;</pre>
			<pre>    hzMapS  &lt;hzUrl,hzDocument*&gt; cur ;       //  Currently loaded documents
    hzMapS  &lt;hzString,hzString&gt; fvals ;     //  Form values to be submitted
    hzVect  &lt;hzHtmElem*&gt;        elems ;     //  Elements selected by the web selector command</pre>
			<pre>    hzList  &lt;hzWebCMD&gt;::Iter    ci ;        //  Iterator for web commands
    hzList  &lt;hzPair&gt;::Iter      pi ;        //  Iterator for form data
    hzList  &lt;hzUrl&gt;::Iter       si ;        //  Iterator for pagelist
    hzList  &lt;hzHtmForm*&gt;::Iter  fi ;        //  Iterator for forms</pre>
			<pre>    hzSet   &lt;hzUrl&gt;     set_ctrl ;          //  Initial links from processing config params
    hzVect  &lt;hzUrl&gt;     pglinks ;           //  Links encountered within a given pages
    hzVect  &lt;hzUrl&gt;     allinks ;           //  Links encountered within a given pages
    hzVect  &lt;hzString&gt;  hdrs ;              //  Extra headers, needed for submit form
    hzList  &lt;hzPair&gt;    flist ;             //  Filtered list of form values</pre>
			<pre>    ofstream        os ;                    //  For writing form respose
    _pageList*      pgl = 0 ;               //  Primary pagelist instance
    _pageList*      pgl2 = 0 ;              //  Secondary pagelist instance
    hzWebCMD        wc ;                    //  Current web command
    hzDocument*     pDoc ;                  //  Downloaded document
    hzDocHtml*      pHdoc ;                 //  Set if downloaded document is a HTML page.
    hzHtmElem*      pElem ;                 //  HTML element (tag) lifted from page
    hzHtmForm*      pForm ;                 //  Form found in page
    hzPair          P ;                     //  Name value pair
    hzXDate         now ;                   //  Date/time now (for cheking is pages have expired
    hzAttrset       ai ;                    //  HTML element attribute iterator
    hzString        anam ;                  //  Attribute name
    hzString        aval ;                  //  Attribute value
    hzString        S ;                     //  Temp string
    hzUrl           url ;                   //  Temp link
    uint32_t        nStart ;                //  Links iterator
    uint32_t        nLimit ;                //  Links iterator
    uint32_t        nCount ;                //  Links iterator
    uint32_t        n ;                     //  Aggregation iterator
    HttpRC          hRet = HTTPMSG_OK ;     //  HTML return code
    hzEcode         rc ;                    //  Return code</pre>
			<pre>    threadLog("Called hzWebhost::Sync\n") ;</pre>
			<pre>    //  Check if repository and list of command is set up
    if (!m_Repos)
        { threadLog("Website is not properly initialized (no repository)\n") ; return E_NOINIT ; }
    if (!m_Commands.Count())
        { threadLog("Website is not properly initialized (no commands)\n") ; return E_NOINIT ; }</pre>
			<pre>    //  Read in any existing manifest file
    rc = _loadstatus() ;
    if (rc != E_OK)
        { threadLog("Error on loading status - aborting\n") ; return rc ; }</pre>
			<pre>    //  If resuming execution, start we left off
    for (n = 0, ci = m_Commands ; n &lt; m_Sofar ; n++, ci++) ;</pre>
			<pre>    //  Execute commands in order
    for (; rc == E_OK &amp;&amp; hRet == HTTPMSG_OK &amp;&amp; ci.Valid() ; ci++)
    {
        pDoc = 0 ;
        wc = ci.Element() ;</pre>
			<pre>        switch  (wc.m_Cmd)
        {
        case WEBCMD_LOAD_PAGE:  //  Get a page (no conditions)</pre>
			<pre>            if (!wc.m_Url)
                { threadLog("Invalid loadPage command - no URL\n") ; rc = E_NOINIT ; break ; }
            threadLog("Doing WEBCMD_LOAD_PAGE\n") ;</pre>
			<pre>            pDoc = Download(wc.m_Url) ;
            if (!pDoc)
                { threadLog("case 1. Could not fetch page &#37;s\n", *wc.m_Url) ; rc = E_NOTFOUND ; break ; }</pre>
			<pre>            cur.Insert(wc.m_Url, pDoc) ;</pre>
			<pre>            if (pDoc-&gt;Whatami() == DOCTYPE_HTML)
            {
                pHdoc = (hzDocHtml*) pDoc ;
                if (pHdoc-&gt;m_Forms.Count())
                {
                    //  Add the forms to the m_Forms map in the hzWebhost instance
                    for (fi = pHdoc-&gt;m_Forms ; fi.Valid() ; fi++)
                    {
                        pForm = fi.Element() ;
                        m_Forms.Insert(pForm-&gt;name, pForm) ;
                    }
                }
            }</pre>
			<pre>            break ;</pre>
			<pre>        case WEBCMD_LOAD_LIST:  //  Get a list of pages (list supplied in command)</pre>
			<pre>            threadLog("Doing WEBCMD_LOAD_LIST\n") ;</pre>
			<pre>            if (!wc.m_Inputs)
                { threadLog(" - Invalid loadList command - no list of links named\n") ; rc  = E_NOTFOUND ; break ; }</pre>
			<pre>            if (!m_Pagelists.Exists(wc.m_Inputs))
                { threadLog(" - No such list of links as &#37;s\n", *wc.m_Inputs) ; rc  = E_NOTFOUND ; break ; }</pre>
			<pre>            pgl = m_Pagelists[wc.m_Inputs] ;
            for (si = pgl-&gt;links ; si.Valid() ; si++)
            {
                url = si.Element() ;
                pDoc = Download(url) ;
                if (!pDoc)
                    { threadLog(" - case 3. Could not fetch page &#37;s\n", *url) ; rc = E_NOTFOUND ; }
                else
                    threadLog(" - Fetched page &#37;s\n", *url) ;
            }</pre>
			<pre>            threadLog("Ending WEBCMD_LOAD_LIST (&#37;s)\n", *wc.m_Inputs) ;
            break ;</pre>
			<pre>        case WEBCMD_SLCT_PAGE:  //  Select links from a page</pre>
			<pre>            threadLog("Doing WEBCMD_SLCT_PAGE\n") ;</pre>
			<pre>            if (wc.m_Url &amp;&amp; wc.m_Inputs)    { rc = E_NOINIT ; threadLog("Invalid request. Both a URL and an Input set specified\n") ; }
            if (!wc.m_Url &amp;&amp; !wc.m_Inputs)  { rc = E_NOINIT ; threadLog("Invalid request. No URL or Input set specified\n") ; }
            if (!wc.m_Output)               { rc = E_NOINIT ; threadLog("Invalid linkSlct command - no name for output list\n") ; }
            if (!wc.m_Slct &amp;&amp; !wc.m_Crit)   { rc = E_NOINIT ; threadLog("Invalid linkSlct command - no node selection or globing criteria\n") ; }</pre>
			<pre>            if (rc != E_OK)
                break ;</pre>
			<pre>            if (cur.Exists(wc.m_Url))
                pDoc = cur[wc.m_Url] ;
            else
                pDoc = Download(wc.m_Url) ;</pre>
			<pre>            if (!pDoc)
                { rc = E_NOTFOUND ; threadLog("case 2. Could not fetch page &#37;s\n", *wc.m_Url) ; break ; }</pre>
			<pre>            pgl = new _pageList() ;
            pgl-&gt;name = wc.m_Output ;</pre>
			<pre>            if (pDoc-&gt;Whatami() != DOCTYPE_HTML)
                threadLog("Not a HTML document\n") ;
            else
            {
                pHdoc = (hzDocHtml*) pDoc ;</pre>
			<pre>                for (n = 0 ; n &lt; pHdoc-&gt;m_vecTags.Count() ; n++)
                {
                    pElem = pHdoc-&gt;m_vecTags[n] ;
                    threadLog("VEC TAG &#37;d &lt;&#37;s ", n, *pElem-&gt;Name()) ;
                    for (ai = pElem ; ai.Valid() ; ai.Advance())
                    {
                        threadLog(" &#37;s=&#37;s", ai.Name(), ai.Value()) ;
                    }
                    threadLog(" /&gt;\n") ;
                }</pre>
			<pre>                rc = pHdoc-&gt;FindElements(elems, wc.m_Slct) ;</pre>
			<pre>                for (n = 0 ; n &lt; elems.Count() ; n++)
                {
                    pElem = elems[n] ;</pre>
			<pre>                    threadLog("&#37;s. GOT &lt;&#37;s ", *pElem-&gt;Name()) ;</pre>
			<pre>                    for (ai = pElem ; ai.Valid() ; ai.Advance())
                    {
                        anam = ai.Name() ; aval = ai.Value() ;</pre>
			<pre>                        threadLog(" &#37;s=&#37;s", *anam, *aval) ;</pre>
			<pre>                        if (anam == "href")
                        {
                            url = aval ;
                            pgl-&gt;links.Add(url) ;
                        }
                    }
                    threadLog(" /&gt;\n") ;
                }
            }</pre>
			<pre>            threadLog("Inserting pagelist &#37;s of &#37;d items\n", *pgl-&gt;name, pgl-&gt;links.Count()) ;
            m_Pagelists.Insert(pgl-&gt;name, pgl) ;
            break ;</pre>
			<pre>        case WEBCMD_SLCT_LIST:  //  Select links from a set of pages (supplied as a set of links)</pre>
			<pre>            threadLog("Doing WEBCMD_SLCT_LIST (&#37;s)\n", *wc.m_Url) ;</pre>
			<pre>            if (!wc.m_Inputs)
                { threadLog("Invalid slctList command - no source list of links\n") ; rc = E_NOINIT ; break ; }</pre>
			<pre>            if (!wc.m_Output)
                { rc = E_NOINIT ; threadLog("Invalid slctList command - no name for output list\n") ; }
            if (!wc.m_Slct &amp;&amp; !wc.m_Crit)
                { rc = E_NOINIT ; threadLog("Invalid slctList command - no node selection or globing criteria\n") ; }
            if (rc != E_OK)
                break ;</pre>
			<pre>            pgl2 = new _pageList() ;
            pgl2-&gt;name = wc.m_Output ;</pre>
			<pre>            //  Begin
            pgl = m_Pagelists[wc.m_Inputs] ;
            if (!pgl)
                { rc = E_CORRUPT ; threadLog("Pagelist of &#37;s not found\n", *wc.m_Inputs) ; break ; }</pre>
			<pre>            for (si = pgl-&gt;links ; si.Valid() ; si++)
            {
                url = si.Element() ;</pre>
			<pre>                if (cur.Exists(url))
                    pDoc = cur[url] ;
                else
                    pDoc = Download(url) ;</pre>
			<pre>                if (!pDoc)
                    { rc = E_NOTFOUND ; threadLog("case 2.2 Could not fetch page &#37;s\n", *url) ; break ; }</pre>
			<pre>                if (pDoc-&gt;Whatami() == DOCTYPE_HTML)
                {
                    pHdoc = (hzDocHtml*) pDoc ;</pre>
			<pre>                    rc = pHdoc-&gt;FindElements(elems, wc.m_Slct) ;</pre>
			<pre>                    for (n = 0 ; n &lt; elems.Count() ; n++)
                    {
                        pElem = elems[n] ;</pre>
			<pre>                        threadLog("&#37;s. GOT &lt;&#37;s ", *pElem-&gt;Name()) ;</pre>
			<pre>                        for (ai = pElem ; ai.Valid() ; ai.Advance())
                        {
                            anam = ai.Name() ; aval = ai.Value() ;</pre>
			<pre>                            threadLog(" &#37;s=&#37;s", *anam, *aval) ;</pre>
			<pre>                            if (anam == "href")
                            {
                                url = aval ;
                                pgl2-&gt;links.Add(url) ;
                            }
                        }
                        threadLog(" /&gt;\n") ;
                    }
                }
            }</pre>
			<pre>            threadLog("Case 2. Inserting pagelist &#37;s of &#37;d items\n", *pgl2-&gt;name, pgl2-&gt;links.Count()) ;
            m_Pagelists.Insert(pgl2-&gt;name, pgl2) ;
            break ;</pre>
			<pre>        case WEBCMD_RGET:   //  Get a root page</pre>
			<pre>            threadLog("Doing WEBCMD_RGET\n") ;
            threadLog("Page=&#37;s Crit=&#37;s\n", *wc.m_Url, *wc.m_Crit) ;</pre>
			<pre>            //  Get root page first
            pDoc = Download(wc.m_Url) ;
            if (!pDoc)
                threadLog("case 4. Could not fetch page &#37;s\n", *wc.m_Url) ;
            else
            {
                if (pDoc-&gt;Whatami() != DOCTYPE_HTML)
                    threadLog("Page &#37;s not HTML\n", *wc.m_Url) ;
                else
                {
                    pHdoc = (hzDocHtml*) pDoc ;
                    pHdoc-&gt;ExtractLinksBasic(pglinks, m_Domains, wc.m_Crit) ;
                }</pre>
			<pre>                delete pDoc ;
            }</pre>
			<pre>            //  Now aggregate the vector of links from the page to a vector of all links from all pages. Use a set to avoid repeats.
            for (n = 0 ; n &lt; pglinks.Count() ; n++)
            {
                url = pglinks[n] ;
                if (!set_ctrl.Exists(url))
                    allinks.Add(url) ;
            }</pre>
			<pre>            //  Starting at the site root and for each page, grab all links and go to each link in turn</pre>
			<pre>            threadLog("STAGE TWO Have &#37;d links in history, &#37;d links in 'all-links'\n", m_vecHist.Count(), allinks.Count()) ;</pre>
			<pre>            for (nStart = 0 ; nStart &lt; allinks.Count() ; nStart = nCount)
            {
                now.SysDateTime() ;
                pglinks.Clear() ;</pre>
			<pre>                for (nCount = nStart, nLimit = allinks.Count() ; nCount &lt; nLimit ; nCount++)
                {
                    url = allinks[nCount] ;</pre>
			<pre>                    threadLog("Cosidering link &#37;s - ", *url.Whole()) ;</pre>
			<pre>                    if (m_mapHist.Exists(url))              { threadLog("historic\n") ; continue ; }
                    if (url == m_Authexit)                  { threadLog("exit-page\n") ; continue ; }
                    if (!m_Domains.Exists(url.Domain()))    { threadLog("URL &#37;s outside domain\n", *url) ; continue ; }</pre>
			<pre>                    //  Page not yet visted so we visit it, put it in list of pages visited and get the links. Some of these links may add to
                    //  the list of links.</pre>
			<pre>                    threadLog("Fetching\n") ;</pre>
			<pre>                    pDoc = Download(url) ;
                    if (!pDoc)
                        threadLog("case 2. Could not fetch page &#37;s\n", *url) ;
                    else
                    {
                        if (pDoc-&gt;Whatami() == DOCTYPE_HTML)
                        {
                            pHdoc = (hzDocHtml*) pDoc ;
                            pHdoc-&gt;ExtractLinksBasic(pglinks, m_Domains, wc.m_Crit) ;</pre>
			<pre>                            //  Re-aggregate the all-links vector
                            for (n = 0 ; n &lt; pglinks.Count() ; n++)
                            {
                                url = pglinks[n] ;
                                if (!set_ctrl.Exists(url))
                                    allinks.Add(url) ;
                            }
                        }</pre>
			<pre>                        delete pDoc ;
                    }
                }
            }
            break ;</pre>
			<pre>        case WEBCMD_POST:   //  Post a form. The form should have been previously downloaded and will be looked for by name</pre>
			<pre>            threadLog("Doing WEBCMD_POST\n") ;
            pForm = m_Forms[wc.m_Output] ;
            if (!pForm)
                threadLog("Warning: No such form as [&#37;s]\n", *wc.m_Output) ;</pre>
			<pre>            //  Take the command's formdata and use it to populate the form's set of fields</pre>
			<pre>            /*
            for (pi = pForm-&gt;fields ; pi.Valid() ; pi++)
                { P = pi.Element() ; fvals.Insert(P.name, P.value) ; }
            for (pi = wc.m_Formdata ; pi.Valid() ; pi++)
                { P = pi.Element() ; fvals.Insert(P.name, P.value) ; }</pre>
			<pre>            for (n = 0 ; n &lt; fvals.Count() ; n++)
            {
                P.name = fvals.GetKey(n) ;
                P.value = fvals.GetObj(n) ;
                flist.Add(P) ;
            }
            */</pre>
			<pre>            rc = HC.PostForm(hRet, wc.m_Url, hdrs, wc.m_Formdata) ;
            if (rc != E_OK)
                { threadLog("Could not post form to &#37;s\n", *wc.m_Url) ; return rc ; }
            if (hRet != HTTPMSG_OK)
                { threadLog("Invalid response to post form (to &#37;s)\n", *wc.m_Url) ; return rc ; }</pre>
			<pre>            //  Write out the login response
            if (m_Repos)
            {
                url = wc.m_Url ;
                S = m_Repos + "/" + url.Filename() ;
                S += ".response" ;
    
                os.open(*S) ;
                if (os.fail())
                    { threadLog("Cannot write out header file &#37;s\n", *S) ; return E_WRITEFAIL ; }</pre>
			<pre>                os &lt;&lt; HC.m_Header ;
                os &lt;&lt; "\r\n\r\n" ;
                os &lt;&lt; HC.m_Content ;
                os.close() ;
            }
            break ;</pre>
			<pre>        case WEBCMD_RSS:    //  Get an RSS feed</pre>
			<pre>            threadLog("Doing WEBCMD_RSS\n") ;</pre>
			<pre>            //  If XML selectors for RSS feed are not initialized, set them here
            if (!m_tagItem.m_Slct)  { m_tagItem.m_Filt = (char*) 0 ; m_tagItem.m_Info = "node" ; m_tagItem.m_Slct = "item" ; }
            if (!m_tagUqid.m_Slct)  { m_tagUqid.m_Filt = (char*) 0 ; m_tagUqid.m_Info = "node" ; m_tagUqid.m_Slct = "guid" ; }
            if (!m_tagLink.m_Slct)  { m_tagLink.m_Filt = (char*) 0 ; m_tagLink.m_Info = "node" ; m_tagLink.m_Slct = "link" ; }
            if (!m_tagDesc.m_Slct)  { m_tagDesc.m_Filt = (char*) 0 ; m_tagDesc.m_Info = "node" ; m_tagDesc.m_Slct = "description" ; }
            if (!m_tagDate.m_Slct)  { m_tagDate.m_Filt = (char*) 0 ; m_tagDate.m_Info = "node" ; m_tagDate.m_Slct = "pubDate" ; }</pre>
			<pre>            //  Get the feed
            rc = getRss_r(hRet, wc.m_Url, 0) ;
            threadLog("Processed items\n") ;
            break ;
        }
    }</pre>
			<pre>    //  Write out manifest file
    rc = _savestatus() ;</pre>
			<pre>    //  Clear documents
    for (n = 0 ; n &lt; m_Pagelists.Count() ; n++)
    {
        pgl = m_Pagelists.GetObj(n) ;
        delete pgl ;
    }</pre>
			<pre>    for (n = 0 ; n &lt; cur.Count() ; n++)
    {
        pDoc = cur.GetObj(n) ;
        delete pDoc ;
    }</pre>
			<pre>    return rc ;
}</pre>
			<pre>hzEcode hzWebhost::Scrape   (void)
{
    //  In general a website can be thought of as a source of 'rolling' news updates in which old pages are deleted, new pages created and existing pages can be
    //  modified on an ad-hoc basis. A scrape captures the current state of the website or a limited portion of it to file.
    //
    //  The scraping process runs through a set of known links for the website, downloading the page for each in turn. Each downloaded page is then examined for
    //  links. Links to domains other than the one in qestion are ignored. Links to such things as images are also ignored. Remaining links not found in the set
    //  of known links are added to this set. The process terminates when all the links have been attempted.
    //
    //  The set of known links will need to comprise the site's home-page and a login page if this exists and if it is not the same as the home page. These will
    //  usually be enough to 'bootstrap' the rest of the site.
    //
    //  Arguments:  None
    //
    //  Returns:    E_NOINIT    If no repository, no domain or no homepage has been specified
    //              E_NOTFOUND  If the login page was not located
    //              E_WRITEFAIL If the login form recieved was not written to the repository
    //              E_OPENFAIL  If the visit status file could not be opened
    //              E_OK        If the scrape operation was successful</pre>
			<pre>    _hzfunc("hzWebhost::Scrape") ;</pre>
			<pre>    hzMapS&lt;hzString,hzString&gt;   formData ;  //  Set of name value pairs
    hzVect&lt;hzString&gt;            hdrs ;      //  Extra headers, needed for submit form</pre>
			<pre>    hzList&lt;hzPair&gt;::Iter        ci ;        //  Root commands iterator</pre>
			<pre>    hzSet&lt;hzUrl&gt;    set_ctrl ;      //  Initial links from processing config params
    hzVect&lt;hzUrl&gt;   pglinks ;       //  Links encountered within a given pages
    hzVect&lt;hzUrl&gt;   allinks ;       //  Links encountered within a given pages
    hzVect&lt;hzUrl&gt;   todo ;          //  Links encountered in the pages in ctrl</pre>
			<pre>    ifstream        is ;            //  For reading in visit status file
    ofstream        os ;            //  For writing out visit status file at end of scrape</pre>
			<pre>    hzDocMeta       mark ;          //  Document meta data</pre>
			<pre>    hzChain         Response ;      //  Response from form submission
    hzDocument*     pDoc ;          //  Downloaded document
    hzDocHtml*      pHdoc ;         //  Set if downloaded document is a HTML page.
    hzPair          X ;             //  Root comand instance
    hzXDate         now ;           //  Date/time now (for cheking is pages have expired
    hzUrl           url ;           //  Temp link
    hzString        vs_fname ;      //  Visit status filename
    hzString        pagepath ;      //  Filepath for file to store downloaded page
    hzString        S ;             //  Temp string
    hzString        etag ;          //  Temp string
    uint32_t        nStart ;        //  Links iterator
    uint32_t        nLimit ;        //  Links iterator
    uint32_t        nCount ;        //  Links iterator
    uint32_t        n ;             //  Aggregation iterator
    hzEcode         rc = E_OK ;     //  Return code</pre>
			<pre>    threadLog("Called hzWebhost::Scrape\n") ;</pre>
			<pre>    //  Check if repository is set up (website is initialized)
    if (!m_Repos)
        { threadLog("Website is not properly initialized (no repository)\n") ; return E_NOINIT ; }</pre>
			<pre>    //  Is there anything to do?
    if (!m_Roots.Count())
        { threadLog("Website has no starting point (URL) for a WEB SCRAPE.\n") ; return E_NOINIT ; }</pre>
			<pre>    //  Get the home page
    //  if (*m_Homepage)
    if (m_Homepage)
    {
        //etag = 0 ;
        //etag = (char*) 0 ;
        pDoc = Download(m_Homepage) ;
        if (!pDoc)
            { threadLog("Could not download page &#37;s\n", *m_Homepage) ; return E_NOINIT ; }
        m_docHome = pDoc ;
        threadLog("HOMEPAGE SUCCESS\n") ;
    }</pre>
			<pre>    //  Login
    rc = Login() ;
    if (rc != E_OK)
        { threadLog("Login failed\n") ; return rc ; }
    threadLog("Login SUCCESS\n") ;</pre>
			<pre>    //  Run the root commands to obtain the set of roots. A root command may have either a URL or a 'link criteria' or both. If only a
    //  URL is present, this URL and ALL links found within it are added to the list of pages to process. If only a link criteria is
    //  present, the links found in the HOME page and the LOGIN RESPONSE page are tested against the criteria. If they match the link
    //  is added to the list of pages to process. If both a URL and a link criteria is found then the URL and any matching links found
    //  within it are added to the list of pages to process.</pre>
			<pre>    threadLog("Have &#37;d root commands\n", m_Roots.Count()) ;</pre>
			<pre>    for (ci = m_Roots ; ci.Valid() ; ci++)
    {
        X = ci.Element() ;</pre>
			<pre>        threadLog("Page=&#37;s Crit=&#37;s\n", *X.name, *X.value) ;</pre>
			<pre>        //  Get the page
        if (X.name == "homepage")
        {
            //  No page to get, just compare the criteria to the home
            pHdoc = (hzDocHtml*) m_docHome ;
            pHdoc-&gt;ExtractLinksBasic(pglinks, m_Domains, X.value) ;
        }
        else if (X.name == "loginResponse")
        {
            //  No page to get, just compare the criteria to the login response
            pHdoc = (hzDocHtml*) m_resAuth ;
            pHdoc-&gt;ExtractLinksBasic(pglinks, m_Domains, X.value) ;
        }
        else
        {
            url = X.name ;
            if (!url)
                { threadLog("Root command invalid page &#37;s\n", *X.name) ; continue ; }</pre>
			<pre>            etag = (char*) 0 ;
            pDoc = Download(url) ;
            if (!pDoc)
                threadLog("case 1. Could not fetch page &#37;s\n", *url) ;
            else
            {
                if (pDoc-&gt;Whatami() != DOCTYPE_HTML)
                    threadLog("Page &#37;s not HTML\n", *url) ;
                else
                {
                    pHdoc = (hzDocHtml*) pDoc ;
                    pHdoc-&gt;ExtractLinksBasic(pglinks, m_Domains, X.value) ;
                    threadLog("Got page content, extracted &#37;d links\n", pglinks.Count()) ;
                }</pre>
			<pre>                delete pDoc ;
            }
        }</pre>
			<pre>        //  Now aggregate the vector of links from the page to a vector of all links from all pages. Use a set to avoid repeats.
        for (n = 0 ; n &lt; pglinks.Count() ; n++)
        {
            url = pglinks[n] ;
            if (!set_ctrl.Exists(url))
                allinks.Add(url) ;
        }
    }</pre>
			<pre>    /*
    **  Starting at the site root and for each page, grab all links and go to each link in turn
    */</pre>
			<pre>    threadLog("STAGE TWO Have &#37;d links in history, &#37;d links in 'all-links'\n", m_vecHist.Count(), allinks.Count()) ;</pre>
			<pre>    for (nStart = 0 ; nStart &lt; allinks.Count() ; nStart = nCount)
    {
        now.SysDateTime() ;
        todo.Clear() ;</pre>
			<pre>        for (nCount = nStart, nLimit = allinks.Count() ; nCount &lt; nLimit ; nCount++)
        {
            url = allinks[nCount] ;</pre>
			<pre>            threadLog("Cosidering link &#37;s - ", *url.Whole()) ;</pre>
			<pre>            if (m_mapHist.Exists(url))              { threadLog("historic\n") ; continue ; }
            if (url == m_Authexit)                  { threadLog("exit-page\n") ; continue ; }
            if (!m_Domains.Exists(url.Domain()))    { threadLog("URL &#37;s outside domain\n", *url) ; continue ; }</pre>
			<pre>            //  Page not yet visted so we visit it, put it in list of pages visited and get the links. Some of these links may add to
            //  the list of links.</pre>
			<pre>            threadLog("Fetching\n") ;</pre>
			<pre>            pDoc = Download(url) ;
            threadLog("Fetched page &#37;p\n", pDoc) ;
            if (!pDoc)
                threadLog("case 2. Could not fetch page &#37;s\n", *url) ;
            else
            {
                if (pDoc-&gt;Whatami() == DOCTYPE_HTML)
                {
                    pHdoc = (hzDocHtml*) pDoc ;
                    pHdoc-&gt;ExtractLinksBasic(pglinks, m_Domains, X.value) ;</pre>
			<pre>                    //  Re-aggregate the all-links vector
                    for (n = 0 ; n &lt; pglinks.Count() ; n++)
                    {
                        url = pglinks[n] ;
                        if (!set_ctrl.Exists(url))
                            allinks.Add(url) ;
                    }
                }</pre>
			<pre>                delete pDoc ;
            }
        }</pre>
			<pre>        /*
        for (nAdded = nX = 0 ; nX &lt; todo.Count() ; nX++)
        {
            //url = todo.GetObj(nX) ;
            url = todo[nX] ;    //.GetObj(nX) ;</pre>
			<pre>            if (set_ctrl.Exists(url))
                continue ;
            nAdded++ ;
            set_ctrl.Insert(url) ;
        }</pre>
			<pre>        todo.Clear() ;</pre>
			<pre>        if (!nAdded)
            break ;
        */
    }</pre>
			<pre>    //  Write out manifest file
    rc = _savestatus() ;
    return rc ;
}</pre>
			<pre>hzEcode hzWebhost::getRss_r (HttpRC&amp; hRet, const hzUrl&amp; feed, uint32_t nLevel)
{
    //  Recursive fetch of RSS documents. The supplied URL is downloaded and loaded into an XML document. There it is tested to ensure it is an
    //  XML document. The RSS feed is assumed to contain only links. These links may be to HTML pages or other (sub RSS feeds). The HTML pages
    //  are end points of the process. They are downloaded but any links they may contain are recorded but not followed. The sub-RSS feeds are
    //  then processed by recursive call to this function.
    //
    //  Arguments:  1) hRet     Set by this operation
    //              2) feed     The RSS URL
    //              3) nLevel   RSS Hierarchy
    //
    //  Returns:    E_NODATA    If the download failed
    //              E_TYPE      If the downloaded material does not appear to be XML
    //              E_FORMAT    If the downloaded material could not be loaded into an XML document
    //              E_ARGUMENT  If the RSS tags are not defined
    //              E_NOTFOUND  If no tags were found in the RSS
    //              E_OK        If the RSS data was collected</pre>
			<pre>    _hzfunc("hzWebhost::getRss_r") ;</pre>
			<pre>    hzVect&lt;hzXmlNode*&gt;  linx ;      //  Links found in (this) RSS feed page
    hzVect&lt;hzUrl&gt;       todo ;      //  Links found in RSS feed page (additions to this are controlled by the set above)</pre>
			<pre>    hzDocXml        X ;             //  For loading of RSS feed pages and extraction of links
    hzXmlNode*      pN1 ;           //  Nodes (containing &lt;item&gt;)
    hzXmlNode*      pN2 ;           //  Nodes (containing &lt;item&gt; subnodes of title, link, description)
    hzDocMeta*      pMark ;         //  Document meta data
    hzDocument*     pDoc ;          //  Document found at URL (could be XML of HTML)
    hzUrl           page ;          //  Temp link
    hzString        desc ;          //  RSS article description
    hzString        dstr ;          //  RSS article date
    hzString        uqid ;          //  Unique ID of RSS item
    hzString        title ;         //  RSS article title
    uint32_t        nIndex ;        //  Links iterator
    hzEcode         rc = E_OK ;     //  Return code</pre>
			<pre>    //  Fetch the current RSS document
    pDoc = Download(feed) ;
    if (rc != E_OK)
        { threadLog("Could not fetch URL &#37;s\n", *feed) ; return rc ; }</pre>
			<pre>    //  If not an XML document then it is just a page. Nothing further.
    if (pDoc-&gt;Whatami() != DOCTYPE_XML)
        { threadLog("case 1. Fetched feed (&#37;s) is not of doctype XML\n", *feed) ; return E_TYPE ; }</pre>
			<pre>    nLevel++ ;</pre>
			<pre>    //  Load current RSS document into XML document tree
    rc = X.Load(HC.m_Content) ;
    if (rc != E_OK)
        return hzerr(rc, "Could not load feed &#37;s", *feed) ;</pre>
			<pre>    //  The page is an RSS document so select the &lt;itme&gt; tags
    rc = X.FindNodes(linx, m_tagItem.m_Slct) ;
    threadLog("Found &#37;d &lt;item&gt; tags in feed &#37;s\n", linx.Count(), *feed) ;
    if (rc != E_OK)
        return rc ;</pre>
			<pre>    for (nIndex = 0 ; nIndex &lt; linx.Count() ; nIndex++)
    {
        threadLog("case 1\n") ;
        pN1 = linx[nIndex] ;</pre>
			<pre>        title = (char*) 0 ; desc = (char*) 0 ; page = (char*) 0 ; uqid = (char*) 0 ; dstr = (char*) 0 ;</pre>
			<pre>        for (pN2 = pN1-&gt;GetFirstChild() ; pN2 ; pN2 = pN2-&gt;Sibling())
        {
            threadLog("case 2\n") ;
            if (pN2-&gt;NameEQ(*m_tagTitl.m_Slct)) { title = pN2-&gt;m_fixContent ; continue ; }
            if (pN2-&gt;NameEQ(*m_tagDesc.m_Slct)) { desc = pN2-&gt;m_fixContent ; continue ; }
            if (pN2-&gt;NameEQ(*m_tagLink.m_Slct)) { page = pN2-&gt;m_fixContent ; continue ; }
            if (pN2-&gt;NameEQ(*m_tagUqid.m_Slct)) { uqid = pN2-&gt;m_fixContent ; continue ; }
            if (pN2-&gt;NameEQ(*m_tagDate.m_Slct)) { dstr = pN2-&gt;m_fixContent ; continue ; }
        }
        threadLog("case 3\n") ;</pre>
			<pre>        if (!page)
            { threadLog("case 1: title=&#37;s; link=null uqid=&#37;s\n", *title, *uqid) ; page = uqid ; }</pre>
			<pre>        if (!page)
            { threadLog("case 2: title=&#37;s; link=null uqid=&#37;s\n", *title, *uqid) ; continue ; }</pre>
			<pre>        threadLog("title=&#37;s; link=&#37;s\n", *title, *page) ;</pre>
			<pre>        if (m_mapHist.Exists(page))
            threadLog("Exists in history, page &#37;s\n", *page) ;
        else
        {
            pMark = new hzDocMeta() ;
            pMark-&gt;m_Title = title ;
            pMark-&gt;m_Desc = desc ;
            pMark-&gt;m_urlReq = page ;
            if (dstr)
                pMark-&gt;m_Modified.SetDateTime(*dstr) ;
            //todo.Insert(page) ;
            todo.Add(page) ;
            threadLog("Adding to history, page &#37;s\n", *page) ;
        }
    }</pre>
			<pre>    //  Fetch all the new links found above by recursive call
    for (nIndex = 0 ; nIndex &lt; todo.Count() ; nIndex++)
    {
        page = todo[nIndex] ;
        //pMark = m_mapHist[page] ;</pre>
			<pre>        threadLog("Processing &#37;s\n", *page) ;
        rc = getRss_r(hRet, page, nLevel) ;
    }</pre>
			<pre>    return rc ;
}</pre>
			<pre>hzEcode hzWebhost::GetRSS   (void)
{
    //  In general a website can be thought of as a source of 'rolling' news updates in which old pages are deleted, new pages created
    //  and existing pages can be modified on an ad-hoc basis. The RSS feeds allow greter ease when syncing an external website to the
    //  local machine. By periodically reading one or more RSS feeds one can obtain a set of links which can generally be taken as the
    //  set of pages deemed 'current' by the website. By comparing these links to a history file of already fetched links, new pages
    //  can be added to a respository as they appear on the site. The RSS feeds are just XML files containing links.
    //
    //  This function will obtain all the RSS feeds from the site, garner all the links from them and then download any pages from the
    //  links that are not already in the site history. The feeds themselves are not saved as these will be fetched again.
    //
    //  Arguments:  None
    //
    //  Returns:    E_NOINIT    If the repository for the webhost has not previously been defined
    //              E_OPENFAIL  If the visit status file could not be opened
    //              E_NODATA    If the download failed
    //              E_TYPE      If the downloaded material does not appear to be XML
    //              E_FORMAT    If the downloaded material could not be loaded into an XML document
    //              E_ARGUMENT  If the RSS tags are not defined
    //              E_NOTFOUND  If no tags were found in the RSS
    //              E_OK        If the RSS data was collected</pre>
			<pre>    _hzfunc("hzWebhost::GetRSS") ;</pre>
			<pre>    hzList&lt;hzUrl&gt;::Iter fi ;        //  RSS feeds iterator</pre>
			<pre>    hzUrl       feed ;              //  Temp link
    HttpRC      hRet ;              //  HTML return code
    hzEcode     rc = E_OK ;         //  Return code</pre>
			<pre>    threadLog("Called\n") ;</pre>
			<pre>    //  Login
    rc = Login() ;
    if (rc != E_OK)
        { threadLog("Login failed\n") ; return rc ; }</pre>
			<pre>    //  Get the home page if one applies. Do this regardless of weather we already have it because we need the cookie
    if (!m_Feeds.Count())
        { threadLog("Website has no starting point (URL) for an RSS feed.\n") ; return E_NOINIT ; }</pre>
			<pre>    //  If XML selectors for RSS feed are not initialized, set them here
    if (!m_tagItem.m_Slct)  { m_tagItem.m_Filt = (char*) 0 ; m_tagItem.m_Info = "node" ; m_tagItem.m_Slct = "item" ; }
    if (!m_tagUqid.m_Slct)  { m_tagUqid.m_Filt = (char*) 0 ; m_tagUqid.m_Info = "node" ; m_tagUqid.m_Slct = "guid" ; }
    if (!m_tagLink.m_Slct)  { m_tagLink.m_Filt = (char*) 0 ; m_tagLink.m_Info = "node" ; m_tagLink.m_Slct = "link" ; }
    if (!m_tagDesc.m_Slct)  { m_tagDesc.m_Filt = (char*) 0 ; m_tagDesc.m_Info = "node" ; m_tagDesc.m_Slct = "description" ; }
    if (!m_tagDate.m_Slct)  { m_tagDate.m_Filt = (char*) 0 ; m_tagDate.m_Info = "node" ; m_tagDate.m_Slct = "pubDate" ; }</pre>
			<pre>    /*
    **  Fetch all the feed XML documents from the RSS source(s)
    */</pre>
			<pre>    for (fi = m_Feeds ; fi.Valid() ; fi++)
    {
        feed = fi.Element() ;</pre>
			<pre>        //  Get the feed
        rc = getRss_r(hRet, feed, 0) ;
        threadLog("Processed items\n") ;
    }</pre>
			<pre>    //  Write out visit status file
    rc = _savestatus() ;
    return rc ;
}</pre>
			<pre>hzDocument* hzWebhost::Download (const hzUrl&amp; url)
{
    //  Fetch the page found at the supplied URL and return as a document (either XML or HTML).
    //
    //  Note that if the page has already been downloaded (is in the site's history) then it is only downloaded again if it the time to
    //  live has expired. If the page is not downloaded then this function will reload it from file.
    //
    //  Arguments:  1) url      The URL of the file/resource to download
    //
    //  Returns:    Pointer to newly allocated document. Must be deleted after use.</pre>
			<pre>    _hzfunc("hzWebhost::Download") ;</pre>
			<pre>    static uint32_t nlast = 0 ;     //  Last point reached (for download rsumption)</pre>
			<pre>    ofstream    os ;                //  To write out page contents
    hzDocument* pDoc = 0 ;          //  Document downloaded
    hzDocXml*   pXdoc = 0 ;         //  XML Document downloaded
    hzDocHtml*  pHdoc = 0 ;         //  HTML Document downloaded</pre>
			<pre>    hzDocMeta*  pMark ;             //  Document meta data
    hzXDate     now ;               //  Date &amp; Time now
    hzString    S ;                 //  Temp string
    HttpRC      hc ;                //  HTTP server return code
    hzEcode     rc ;                //  Return code
    bool        bHist = false ;     //  Set if url is already in history and downloaded again because of being out of date
    char        numbuf [8] ;        //  Working buffer</pre>
			<pre>    /*
    **  Check URL, insert in visited links if not already there
    */</pre>
			<pre>    if (!url)
        { threadLog("No supplied address\n") ; return 0 ; }
    threadLog("FETCHING PAGE: &#37;s\n", *url) ;</pre>
			<pre>    now.SysDateTime() ;</pre>
			<pre>    if (!(m_Opflags &amp; WEBFLG_FORCE))
    {
        if (m_mapHist.Exists(url))
        {
            //  The requested URL exists in the repository already. We check if it has expired and if not we terminate with OK</pre>
			<pre>            pMark = m_mapHist[url] ;
            bHist = true ;
            threadLog("Page &#37;s is historic\n", *url) ;</pre>
			<pre>            //  Create a document of the right type (XML or HTML)</pre>
			<pre>            if (pMark-&gt;m_Doctype == DOCTYPE_HTML)
                pDoc = pHdoc = new hzDocHtml() ;
            else if (pMark-&gt;m_Doctype == DOCTYPE_XML)
                pDoc = pXdoc = new hzDocXml() ;
            else
                pDoc = pHdoc = new hzDocHtml() ;</pre>
			<pre>            pDoc-&gt;SetMeta(*pMark) ;</pre>
			<pre>            //  Check if expiry is known and if so if it has expired</pre>
			<pre>            if (pMark-&gt;m_Expires.IsSet())
            {
                if (pMark-&gt;m_Expires &lt; now)
                {
                    //  Set the markers and return
                    if (pMark-&gt;m_Doctype == DOCTYPE_XML)
                    {
                        //  XML
                        pDoc = pXdoc = new hzDocXml() ;
                        pDoc-&gt;SetMeta(*pMark) ;
                        rc = pDoc-&gt;Load(HC.m_Content) ;
                    }
                    else
                    {
                        //  HTML
                        pDoc = pHdoc = new hzDocHtml() ;
                        pDoc-&gt;SetMeta(*pMark) ;
                        rc = pDoc-&gt;Load(HC.m_Content) ;
                    }</pre>
			<pre>                    threadLog("DOWNLOAD PREVIOUS (error=&#37;s)\n\n", Err2Txt(rc)) ;
                    return pDoc ;
                }
            }</pre>
			<pre>            //  At this point either the expiry date is unknown or it is known and has expired. Load from file</pre>
			<pre>            if (!HC.m_Content.Size())
            {
                threadLog("Case 1 Bloody thing is empty!\n") ;
                return 0 ;
            }</pre>
			<pre>            rc = pDoc-&gt;Load(HC.m_Content) ;
            if (rc != E_OK)
                threadLog("LOAD failed (error=&#37;s)\n\n", Err2Txt(rc)) ;</pre>
			<pre>            return pDoc ;
        }
    }</pre>
			<pre>    //  The requested URL is not in the history. Create the document meta for it and download it.</pre>
			<pre>    S = url.Filename() ;</pre>
			<pre>    pMark = new hzDocMeta() ;
    pMark-&gt;m_urlReq = url ;
    pMark-&gt;m_urlAct = url ;
    pMark-&gt;m_Id = m_mapHist.Count() ;
    sprintf(numbuf, "/&#37;04d", pMark-&gt;m_Id) ;
    pMark-&gt;m_Filename = m_Repos + numbuf + S ;</pre>
			<pre>    /*
    **  Get page content and process it into a tree
    */</pre>
			<pre>    threadLog("GETTIG PAGE: &#37;s\n", *url) ;
    rc = HC.GetPage(hc, url, pMark-&gt;m_Etag) ;
    if (rc != E_OK)
    {
        threadLog("FAILED (error=&#37;s) synopsis\n", Err2Txt(rc)) ;
        threadLog(HC.m_Error) ;
        return 0 ;
    }</pre>
			<pre>    if (HC.m_Redirect)
        pMark-&gt;m_urlAct = HC.m_Redirect ;
    pMark-&gt;m_Modified = HC.m_Modified ;</pre>
			<pre>    threadLog("HTTP Return code = &#37;d, cookie (value &#37;s, path &#37;s)\n", (uint32_t) hc, *m_CookieSess, *m_CookiePath) ;</pre>
			<pre>    /*
    **  Write out header to .hdr file and content to .con file
    */</pre>
			<pre>    if (m_Repos)
    {
        os.open(*pMark-&gt;m_Filename) ;
        if (os.fail())
            threadLog("Cannot write out header file &#37;s\n", *pMark-&gt;m_Filename) ;
        else
        {
            os &lt;&lt; HC.m_Content ;
            os.close() ;
        }
        os.clear() ;
    }</pre>
			<pre>    /*
    **  Add the page but only process pages that are of a known HTML type .htm, .html, .shtml, .xhtml etc
    */</pre>
			<pre>    threadLog("PROCESSING Content: &#37;d bytes\n", HC.m_Content.Size()) ;
    if (!HC.m_Content.Size())
    {
        threadLog("Case 2 Bloody thing is empty!\n") ;
        return 0 ;
    }
    pMark-&gt;m_Doctype = DeriveDoctype(HC.m_Content) ;</pre>
			<pre>    rc = E_NODATA ;
    if (pMark-&gt;m_Doctype == DOCTYPE_XML)
    {
        //  XML
        pDoc = pXdoc = new hzDocXml() ;
        pXdoc-&gt;Init(url) ;
        rc = pXdoc-&gt;Load(HC.m_Content) ;
    }
    else
    {
        //  HTML
        pDoc = pHdoc = new hzDocHtml() ;
        pHdoc-&gt;Init(url) ;
        rc = pHdoc-&gt;Load(HC.m_Content) ;</pre>
			<pre>        if (rc != E_OK)
        threadLog("Case 2 Bloody thing failed (error=&#37;s)!\n", Err2Txt(rc)) ;
    }</pre>
			<pre>    if (rc != E_OK)
    {
        threadLog("Load page failed error=&#37;s\n", Err2Txt(rc)) ;
        //delete pDoc ;
        //return 0 ;
    }</pre>
			<pre>    pDoc-&gt;SetMeta(*pMark) ;</pre>
			<pre>    //  Place the URL in the site's history
    m_mapHist.Insert(pMark-&gt;m_urlReq, pMark) ;
    threadLog("Inserted URL &#37;s\n", *pMark-&gt;m_urlReq) ;
    if (pMark-&gt;m_urlAct != pMark-&gt;m_urlReq)
    {
        m_mapHist.Insert(pMark-&gt;m_urlAct, pMark) ;
        threadLog("Inserted URL &#37;s\n", *pMark-&gt;m_urlAct) ;
    }</pre>
			<pre>    if (!bHist)
        m_vecHist.Add(pMark) ;</pre>
			<pre>    if (pXdoc)
        threadLog("DOWNLOAD SUCCESS XML Page &#37;s. Now have &#37;d (&#37;d) items in history\n\n", *url, m_mapHist.Count(), nlast) ;
    if (pHdoc)
        threadLog("DOWNLOAD SUCCESS Page &#37;s has &#37;d links. Now have &#37;d (&#37;d) items in history\n\n", *url, pHdoc-&gt;m_vecLinks.Count(), m_mapHist.Count(), nlast) ;</pre>
			<pre>    threadLog(HC.m_Error) ;
    return pDoc ;
}</pre>