<pre>//
//  File:   hzUrl.cpp
//
//  Legal Notice:   This file is part of the HadronZoo C++ Class Library. Copyright 2025 HadronZoo Project (http://www.hadronzoo.com)
//
//  The HadronZoo C++ Class Library is free software: You can redistribute it, and/or modify it under the terms of the GNU Lesser General Public License, as published by the Free
//  Software Foundation, either version 3 of the License, or any later version.
//
//  The HadronZoo C++ Class Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
//  A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
//
//  You should have received a copy of the GNU Lesser General Public License along with the HadronZoo C++ Class Library. If not, see http://www.gnu.org/licenses.
//</pre>
			<pre>//
//  Implimentation of the hzUrl class.
//</pre>
			<pre>#include &lt;iostream&gt;</pre>
			<pre>#include &lt;stdarg.h&gt;</pre>
			<pre>#include "hzChars.h"
#include "hzTextproc.h"
#include "hzProcess.h"
#include "hzSSR.h"</pre>
			<pre>#define URL_FACTOR  9   //  This is added to the URL string size to accomodate the copy count, lengths of the protocol, domain, port and resource components as
                        //  well as the null terminator.</pre>
			<pre>#define URI_FACTOR  12  //  This is added to the URI string size to accomodate the copy count and URI components as defined in _uri_space below</pre>
			<pre>class   _uri_space
{
    //  Internal structure for URI is scheme:[//authority]path[?query][#fragment] where the authority = [userinfo@]host[:port]. Note that while the authority part can include user
    //  information, in most cases only the host is present (and sometimes the port)</pre>
			<pre>    uint16_t    m_Scheme ;      //  URI Scheme code
    uint16_t    m_nPort ;       //  Port number
    uint16_t    m_lenQuery ;    //  Length of query component
    uchar       m_copy ;        //  Copy counter
    uchar       m_lenUser ;     //  Length of authority part userinfo (usually 0)
    uchar       m_lenHost ;     //  Length of authority host
    uchar       m_lenPath ;     //  Length of path
    uchar       m_lenFrag ;     //  Length of fragment
    char        m_data[5] ;     //  First part of data</pre>
			<pre>    _uri_space  (void)  { m_Scheme = m_nPort = m_lenQuery = 0 ; m_copy = m_lenUser = m_lenHost = m_lenPath = m_lenFrag = 0 ; }</pre>
			<pre>    uint32_t    Length  (void)  { return URI_FACTOR + m_lenQuery + m_lenUser + m_lenHost + m_lenPath + m_lenFrag ; }
} ;</pre>
			<pre>class   _url_space
{
    //  Internal structure for URL to facilitate soft copies    URI = scheme:[//authority]path[?query][#fragment]</pre>
			<pre>public:
    uchar       m_copy ;        //  Copy counter
    uchar       m_lenProt ;     //  Length of protocol component
    uchar       m_lenDom ;      //  Length of domain component
    uchar       m_lenPort ;     //  Length of port component
    uint16_t    m_lenRes ;      //  Length of resource component
    uint16_t    m_port ;        //  Port number
    char        m_data[8] ;     //  First part of data</pre>
			<pre>    _url_space  (void)  { m_copy = m_lenRes = m_port = 0 ; m_lenProt = m_lenDom = m_lenPort = m_data[0] = 0 ; }
} ;</pre>
			<pre>/*
**  Global constants
*/</pre>
			<pre>global  const hzUrl _hz_null_hzUrl ;    //  Null URL</pre>
			<pre>/*
**  Small String Regime
*/</pre>
			<pre>extern  hzSSR   g_ssrInet ;</pre>
			<pre>/*
**  hzUrl public methods
*/</pre>
			<pre>void    hzUrl::Clear    (void)
{
    //  Clear the contents of this instance
    //
    //  Arguments:  None
    //  Returns:    None</pre>
			<pre>    _url_space* thisCtl ;   //  This URL space</pre>
			<pre>    if (m_addr)
    {
        thisCtl = (_url_space*) g_ssrInet.Xlate(m_addr) ;</pre>
			<pre>        if (_hzGlobal_MT)
        {
            __sync_add_and_fetch(&amp;(thisCtl-&gt;m_copy), -1) ;</pre>
			<pre>            if (!thisCtl-&gt;m_copy)
                g_ssrInet.Free(m_addr, thisCtl-&gt;m_lenRes + thisCtl-&gt;m_lenProt + thisCtl-&gt;m_lenDom + thisCtl-&gt;m_lenPort + URL_FACTOR) ;
        }
        else
        {
            thisCtl-&gt;m_copy-- ;
            if (!thisCtl-&gt;m_copy)
                g_ssrInet.Free(m_addr, thisCtl-&gt;m_lenRes + thisCtl-&gt;m_lenProt + thisCtl-&gt;m_lenDom + thisCtl-&gt;m_lenPort + URL_FACTOR) ;
        }</pre>
			<pre>        m_addr = 0 ;
    }
}</pre>
			<pre>void    hzUrl::_inc_copy    (void) const
{
    _url_space* thisCtl ;       //  This string's control area</pre>
			<pre>    thisCtl = (_url_space*) g_ssrInet.Xlate(m_addr) ;
    if (thisCtl-&gt;m_copy &lt; 100)
        thisCtl-&gt;m_copy++ ;
}</pre>
			<pre>void    hzUrl::_dec_copy    (void) const
{
    _url_space* thisCtl ;       //  This string's control area</pre>
			<pre>    thisCtl = (_url_space*) g_ssrInet.Xlate(m_addr) ;</pre>
			<pre>    if (thisCtl-&gt;m_copy == 1)
        threadLog("WARNING: URL _dec_copy would zero copy count\n") ;
    else
        thisCtl-&gt;m_copy-- ;
}</pre>
			<pre>/*
bool    hzUrl::valid    (void) const
{
    _url_space* pCtrl ;     //  This string's control area
    _ssrFLE*    pSlot ;     //  Item cast to _ssrFLE (to self point on free and to check it is not already free)</pre>
			<pre>    pCtrl = (_url_space*) g_ssrInet.Xlate(m_addr) ;
    pSlot = (_ssrFLE*) pCtrl ;</pre>
			<pre>    if (!pCtrl-&gt;m_copy || !pCtrl-&gt;m_lhs || !pCtrl-&gt;m_rhs || pSlot-&gt;m_fleSelf == m_addr)
        return false ;
    return true ;
}
*/</pre>
			<pre>uint32_t    hzUrl::Length   (void) const
{
    //  Return the total length of the URL (including the http:// bit)
    //
    //  Arguments:  None
    //  Returns:    Number being length of whole URL string</pre>
			<pre>    _url_space* thisCtl ;   //  This URL space</pre>
			<pre>    if (!m_addr)
        return 0 ;</pre>
			<pre>    thisCtl = (_url_space*) g_ssrInet.Xlate(m_addr) ;
    return thisCtl-&gt;m_lenRes + thisCtl-&gt;m_lenProt + thisCtl-&gt;m_lenDom + thisCtl-&gt;m_lenPort ;
}</pre>
			<pre>uint32_t    hzUrl::Port     (void) const
{
    //  Return the port (default is 80) that the URL states (if any)
    //
    //  Arguments:  None
    //
    //  Returns:    Number being any port number specified in the URL</pre>
			<pre>    _url_space* thisCtl ;   //  This URL space</pre>
			<pre>    if (!m_addr)
        return 0 ;</pre>
			<pre>    thisCtl = (_url_space*) g_ssrInet.Xlate(m_addr) ;
    return thisCtl-&gt;m_port ;
}</pre>
			<pre>bool    hzUrl::IsSSL    (void) const
{
    //  Return true is the URL indicates a would-be connection would use SSL
    //
    //  Arguments:  None
    //
    //  Returns:    True    If the port is set to 443
    //              False   Otherwise</pre>
			<pre>    if (Port() == 443)
        return true ;
    return false ;
}</pre>
			<pre>hzString    hzUrl::Whole    (void) const
{
    //  Return a pointer to the whole string as a null terminated sequence.
    //
    //  Arguments:  None
    //  Returns:    Instance of hzString by value being whole URL</pre>
			<pre>    _url_space*     thisCtl ;   //  This URL space
    hzString        S ;         //  Target hzString for whole URL</pre>
			<pre>    if (m_addr)
    {
        thisCtl = (_url_space*) g_ssrInet.Xlate(m_addr) ;
        S = thisCtl-&gt;m_data ;
    }</pre>
			<pre>    return S ;
}</pre>
			<pre>hzString    hzUrl::Domain   (void) const
{
    //  Create and return a string consisting of the domain name only.
    //
    //  Arguments:  None
    //  Returns:    Instance of hzString by value being domain part of the URL</pre>
			<pre>    _url_space*     thisCtl ;   //  This URL space
    hzString        S ;         //  Target hzString for domain part of URL</pre>
			<pre>    if (m_addr)
    {
        thisCtl = (_url_space*) g_ssrInet.Xlate(m_addr) ;
        S.SetValue(thisCtl-&gt;m_data + thisCtl-&gt;m_lenProt, (uint32_t) thisCtl-&gt;m_lenDom) ;
    }</pre>
			<pre>    return S ;
}</pre>
			<pre>hzString    hzUrl::Resource (void) const
{
    //  Create and return a string consisting of the resource component only.
    //
    //  Arguments:  None
    //  Returns:    Instance of hzString by value being resource part of the URL</pre>
			<pre>    _url_space*     thisCtl ;   //  This URL space
    hzString        S ;         //  Target hzString for resource part of URL</pre>
			<pre>    if (m_addr)
    {
        thisCtl = (_url_space*) g_ssrInet.Xlate(m_addr) ;
        S = thisCtl-&gt;m_data + thisCtl-&gt;m_lenProt + thisCtl-&gt;m_lenDom + thisCtl-&gt;m_lenPort ;
    }</pre>
			<pre>    return S ;
}</pre>
			<pre>const char* hzUrl::operator*    (void) const
{
    //  Returns the URL data (a null terminated string)
    //
    //  Arguments:  None
    //  Returns:    Pointer to value as null terminated string</pre>
			<pre>    _url_space* thisCtl ;       //  This string's control area</pre>
			<pre>    if (!m_addr)
        return 0 ;</pre>
			<pre>    thisCtl = (_url_space*) g_ssrInet.Xlate(m_addr) ;
    return thisCtl-&gt;m_data ;
}</pre>
			<pre>hzUrl::operator const char* (void) const
{
    //  Returns the string data (a null terminated string)
    //
    //  Arguments:  None
    //  Returns:    Pointer to value as null terminated string</pre>
			<pre>    _url_space* thisCtl ;       //  This string's control area</pre>
			<pre>    if (!m_addr)
        return 0 ;</pre>
			<pre>    thisCtl = (_url_space*) g_ssrInet.Xlate(m_addr) ;
    return thisCtl-&gt;m_data ;
}</pre>
			<pre>hzUrl&amp;  hzUrl::SetValue     (const hzString domain, const hzString resource, bool bSecure, uint32_t nPort)
{
    //  This is used as an alternative to the assignement operator, specifically to deal with such URLs as links in webpages. These may be full URLs with or without the scheme, but
    //  with the domain - or they can simply start with a / and consist only of the resource. Where the link lacks the domain, the supplied domain is used. Where the link does have
    //  a domain specified this takes precedence.
    //
    //  Arguments:  1)  domain      The domain name part of the URL
    //              2)  resource    The resource part
    //              3)  bSecure     Use SSL so https:// instead of http://
    //              4)  nPort       Port number if any
    //
    //  Returns:    Reference to this URL intance</pre>
			<pre>    _url_space*     thisCtl ;       //  This URL space
    const char*     pRI ;           //  Resource iterator
    char*           pTmp ;          //  Shuts compiler up about sprintf to buffer
    hzString        res ;           //  Resource (allowing a truncate after ?)
    uint32_t        lenTotal ;      //  Length of whole URL
    uint32_t        lenProt ;       //  Lenth of protocol part
    uint32_t        lenDom ;        //  Length of domain part
    uint32_t        lenRes ;        //  Length of resource part
    uint32_t        lenPort = 0 ;   //  Length of port part
    bool            bPort = false ; //  Port specified indicator </pre>
			<pre>    /*
    **  Test arguments
    */</pre>
			<pre>    Clear() ;</pre>
			<pre>    if (!domain)    return *this ;
    if (!resource)  return *this ;</pre>
			<pre>    res = resource ;
    res.TruncateUpto("?") ;</pre>
			<pre>    if (memcmp(*res, "http://", 7) == 0)
    {
        operator=(res) ;
        return *this ;
    }</pre>
			<pre>    //  If the port is not set it is determined by the protocol (http is 80 or https is 443).</pre>
			<pre>    if (!nPort)
        nPort = bSecure ? 443 : 80 ;
    else
    {
        //  Don't include the :port_no notation unless we have a non-standard port</pre>
			<pre>        if (bSecure)
            bPort = nPort == 443 ? false : true ;
        else
            bPort = nPort == 80 ? false : true ;
    }</pre>
			<pre>    if (bPort)
        lenPort = nPort &gt; 9999 ? 6 : nPort &gt; 999 ? 5 : nPort &gt; 99 ? 4 : nPort &gt; 9 ? 3 : 2 ;
    else
        lenPort = 0 ;</pre>
			<pre>    lenProt = bSecure ? 8 : 7 ;
    lenDom = domain.Length() ;
    pRI = *res ;
    if (pRI[0] == CHAR_FWSLASH)
    {
        lenRes = res.Length() ;
        pRI++ ;
    }
    else
        lenRes = res.Length() + 1 ;</pre>
			<pre>    /*
    **  Compile finished URL
    */</pre>
			<pre>    lenTotal = lenProt + lenDom + lenPort + lenRes ;
    m_addr = g_ssrInet.Alloc(lenTotal + URL_FACTOR) ;
    pTmp = (char*) g_ssrInet.Xlate(m_addr) ;
    thisCtl = (_url_space*) pTmp ;
    pTmp += 11 ;
    //thisCtl = (_url_space*) g_ssrInet.Xlate(m_addr) ;</pre>
			<pre>    thisCtl-&gt;m_copy = 1 ;               //  No copies
    thisCtl-&gt;m_lenProt = lenProt ;      //  Length of protocol component
    thisCtl-&gt;m_lenDom = lenDom ;        //  Length of domain component
    thisCtl-&gt;m_lenPort = lenPort ;      //  Length of port component
    thisCtl-&gt;m_lenRes = lenRes ;        //  MSB of length of resource component
    thisCtl-&gt;m_port = nPort ;           //  Port number</pre>
			<pre>    if (bSecure)
        memcpy(thisCtl-&gt;m_data, "https://", lenProt) ;
    else
        memcpy(thisCtl-&gt;m_data, "http://", lenProt) ;</pre>
			<pre>    memcpy(thisCtl-&gt;m_data + lenProt, *domain, lenDom) ;</pre>
			<pre>    if (bPort)
    {
        pTmp += (lenProt + lenDom) ;
        sprintf(pTmp, ":&#37;d", nPort) ;
        //sprintf(thisCtl-&gt;m_data + lenProt + lenDom, ":&#37;d", nPort) ;
    }</pre>
			<pre>    thisCtl-&gt;m_data[lenProt + lenDom + lenPort] = CHAR_FWSLASH ;
    memcpy(thisCtl-&gt;m_data + lenProt + lenDom + lenPort + 1, pRI, lenRes - 1) ;
    thisCtl-&gt;m_data[lenTotal] = 0 ;</pre>
			<pre>    return *this ;
}</pre>
			<pre>//  Encoding:   If the internal buffer exists the first seven bytes will have the following meanings:-
//
//      m_buf[0] - No copies
//      m_buf[1] - Length of protocol component
//      m_buf[2] - Length of domain component
//      m_buf[3] - Length of port component
//      m_buf[4] - MSB of length of resource component
//      m_buf[5] - LSB of length of resource component
//      m_buf[6] - MSB of port
//      m_buf[7] - LSB of port</pre>
			<pre>hzUrl&amp;  hzUrl::operator=    (const char* url)
{
    //  Assign the URL by character string
    //
    //  Arguments:  1)  url The whole URL as string
    //
    //  Returns:    Reference to this URL intance</pre>
			<pre>    _hzfunc("hzUrl::operator=") ;</pre>
			<pre>    _url_space* thisCtl ;           //  This URL space
    const char* pDom ;              //  Start of domain component
    const char* pRes ;              //  Start of resource component
    const char* _ptr ;              //  Char iterator</pre>
			<pre>    uint32_t    lenProt = 0 ;       //  Length of protocol indicator (eg http://)
    uint32_t    lenDom = 0 ;        //  Length of domain name
    uint32_t    lenPort = 0 ;       //  Length of port indicator if present
    uint32_t    lenRes = 0 ;        //  Length of resource
    uint32_t    lenTotal ;          //  Length of whole string</pre>
			<pre>    uint32_t    nPort = 80 ;        //  Port number
    uint32_t    nProto = 80 ;       //  Presumed port number of protocol
    uint32_t    nAlphas = 0 ;       //  Number of alphanum chars (must be at least one)
    uint32_t    nPeriod = 0 ;       //  Number of periods (must be at least one)
    uint32_t    nWhite = 0 ;        //  Number of whitespace chars
    bool        bPort = false ;     //  True only if a port component specified (:num)</pre>
			<pre>    Clear() ;
    if (!url || !url[0])
        return *this ;</pre>
			<pre>    //threadLog("URL [&#37;s]\n", url) ;</pre>
			<pre>    //  Strip leading spaces to find real start
    for (_ptr = url ; *_ptr &lt;= CHAR_SPACE ; _ptr++) ;</pre>
			<pre>    /*
    **  Handle protocol component - Remove http:// or https:// to get to domain
    */</pre>
			<pre>    if (strstr(_ptr, "//"))
    {
        if      (!memcmp(_ptr, "http://", 7))   { nProto = nPort = 80 ; lenProt = 7 ; }
        else if (!memcmp(_ptr, "https://", 8))  { nProto = nPort = 443 ; lenProt = 8 ; }
        else if (!memcmp(_ptr, "ws://", 5))     { nProto = nPort = 80 ; lenProt = 5 ; }
        else if (!memcmp(_ptr, "wss://", 6))    { nProto = nPort = 443 ; lenProt = 6 ; }
        else if (!memcmp(_ptr, "ftp://", 6))    { nProto = nPort = 21 ; lenProt = 6 ; }
        else
        {
            hzerr(E_FORMAT, "Scheme not supported") ;
            return *this ;
        }</pre>
			<pre>        _ptr += lenProt ;
    }</pre>
			<pre>    pDom = _ptr ;</pre>
			<pre>    /*
    **  Handle domain component: Read up to the end of the domain name. This could be the end of the test string or it
    **  could be a forward slash or a colon (for the port number)
    */</pre>
			<pre>    for (; *_ptr ; lenDom++, _ptr++)
    {
        if (*_ptr == CHAR_COLON)    break ;
        if (*_ptr == CHAR_FWSLASH)  break ;</pre>
			<pre>        if (*_ptr == CHAR_PERIOD)   { nPeriod++ ; continue ; }
        if (*_ptr == CHAR_MINUS)    { nAlphas++ ; continue ; }
        if (*_ptr &lt;= CHAR_SPACE)    { nWhite++ ; continue ; }</pre>
			<pre>        nAlphas++ ;
    }</pre>
			<pre>    //  Deal with failures
    if (nWhite)     { hzerr(E_FORMAT, "Has whitespace\n") ; return *this ; }
    if (!nPeriod)   { hzerr(E_FORMAT, "No periods\n") ; return *this ; }
    if (!nAlphas)   { hzerr(E_FORMAT, "No alphas\n") ; return *this ; }</pre>
			<pre>    /*
    **  Handle port component: Deal with case where domain string is terminated by a port indicator
    */</pre>
			<pre>    if (*_ptr == CHAR_COLON)
    {
        _ptr++ ;
        for (nPort = 0, lenPort++ ; IsDigit(*_ptr) ; lenPort++)
            { nPort *= 10 ; nPort += (*_ptr - '0') ; _ptr++ ; }</pre>
			<pre>        if (nPort &gt;= 65536)
        {
            hzerr(E_FORMAT, "Bad port\n") ;
            return *this ;
        }
        bPort = true ;
    }</pre>
			<pre>    //  Should now have a terminator (&lt;= space) or the / marking start of resource
    if (*_ptr &gt;= CHAR_SPACE &amp;&amp; *_ptr != CHAR_FWSLASH)
        return *this ;</pre>
			<pre>    /*
    **  Handle resource component: Read up to end of resource
    */</pre>
			<pre>    pRes = _ptr ;
    for (; *_ptr &gt; ' ' &amp;&amp; *_ptr != CHAR_QUERY ; lenRes++, _ptr++) ;</pre>
			<pre>    /*
    **  Compile finished URL
    */</pre>
			<pre>    lenTotal = lenProt + lenDom + lenPort + lenRes ;
    m_addr = g_ssrInet.Alloc(lenTotal + URL_FACTOR) ;
    thisCtl = (_url_space*) g_ssrInet.Xlate(m_addr) ;</pre>
			<pre>    //  Do control part
    thisCtl-&gt;m_copy = 1 ;               //  No copies
    thisCtl-&gt;m_lenProt = lenProt ;      //  Length of protocol component
    thisCtl-&gt;m_lenDom = lenDom ;        //  Length of domain component
    thisCtl-&gt;m_lenPort = lenPort ;      //  Length of port component
    thisCtl-&gt;m_lenRes = lenRes ;        //  MSB of length of resource component
    thisCtl-&gt;m_port = nPort ;           //  Port number</pre>
			<pre>    //  Do protocol part
    if      (nProto == 80 &amp;&amp; lenProt == 7)  memcpy(thisCtl-&gt;m_data, "http://", lenProt) ;
    else if (nProto == 443 &amp;&amp; lenProt == 8) memcpy(thisCtl-&gt;m_data, "https://", lenProt) ;
    else if (nProto == 80 &amp;&amp; lenProt == 5)  memcpy(thisCtl-&gt;m_data, "ws://", lenProt) ;
    else if (nProto == 443 &amp;&amp; lenProt == 6) memcpy(thisCtl-&gt;m_data, "wss://", lenProt) ;
    else if (nProto == 21)  memcpy(thisCtl-&gt;m_data, "ftp://", lenProt) ;
    else
    {
        hzerr(E_FORMAT, "Bad scheme\n") ;
        return *this ;
    }</pre>
			<pre>    //  Do domain part
    memcpy(thisCtl-&gt;m_data + lenProt, pDom, lenDom) ;</pre>
			<pre>    //  Do port part
    if (bPort)
        sprintf(thisCtl-&gt;m_data + lenProt + lenDom, ":&#37;d", nPort) ;</pre>
			<pre>    //  Do resource part
    memcpy(thisCtl-&gt;m_data + lenProt + lenDom + lenPort, pRes, lenRes) ;
    thisCtl-&gt;m_data[lenTotal] = 0 ;
    
    return *this ;
}</pre>
			<pre>hzUrl&amp;  hzUrl::operator=    (const hzString&amp; S)
{
    //  Assign the URL by string instance
    //
    //  Arguments:  1)  S   The string value
    //  Returns:    Reference to this URL intance</pre>
			<pre>    Clear() ;
    return operator=(*S) ;
}</pre>
			<pre>hzUrl&amp;  hzUrl::operator=    (const hzUrl&amp; url)
{
    //  Assign the URL by copying another hzUrl instance
    //
    //  Argument:   U   The URL value
    //
    //  Returns:    Reference to this URL intance</pre>
			<pre>    _url_space* suppCtl ;       //  Supplied URL space</pre>
			<pre>    Clear() ;
    if (!url.m_addr)
        return *this ;</pre>
			<pre>    suppCtl = (_url_space*) g_ssrInet.Xlate(url.m_addr) ;</pre>
			<pre>    if (_hzGlobal_MT)
        __sync_add_and_fetch(&amp;(suppCtl-&gt;m_copy), 1) ;
    else
        suppCtl-&gt;m_copy++ ;</pre>
			<pre>    m_addr = url.m_addr ;
    return *this ;
}</pre>
			<pre>/*
**  Compare operators
*/</pre>
			<pre>bool    hzUrl::operator==   (const hzUrl&amp; testUrl) const
{
    //  Test for equality between this URL and a supplied test value
    //
    //  Argument:   U   The test URL
    //
    //  Returns:    True    If this hzUrl is equal to the operand hzUrl
    //              False   Otherwise</pre>
			<pre>    if (m_addr == testUrl.m_addr)   return true ;
    if (!m_addr &amp;&amp; testUrl.m_addr)  return false ;
    if (m_addr &amp;&amp; !testUrl.m_addr)  return false ;</pre>
			<pre>    _url_space* thisCtl ;       //  This URL space
    _url_space* suppCtl ;       //  Supplied URL space</pre>
			<pre>    thisCtl = (_url_space*) g_ssrInet.Xlate(m_addr) ;
    suppCtl = (_url_space*) g_ssrInet.Xlate(testUrl.m_addr) ;</pre>
			<pre>    if (thisCtl-&gt;m_lenRes != suppCtl-&gt;m_lenRes)     return false ;
    if (thisCtl-&gt;m_port != suppCtl-&gt;m_port)         return false ;
    if (thisCtl-&gt;m_lenProt != suppCtl-&gt;m_lenProt)   return false ;
    if (thisCtl-&gt;m_lenDom != suppCtl-&gt;m_lenDom)     return false ;
    if (thisCtl-&gt;m_lenPort != suppCtl-&gt;m_lenPort)   return false ;</pre>
			<pre>    return strcmp(thisCtl-&gt;m_data, suppCtl-&gt;m_data) ? false : true ;
}</pre>
			<pre>bool    hzUrl::operator&lt;    (const hzUrl&amp; testUrl) const
{
    //  Test for this URL being less than the supplied test URL. The domain part takes precedence, followed by the resource part and finally the port.
    //
    //  Argument:   U   The test URL
    //
    //  Returns:    True    If this hzUrl is equal to the operand hzUrl
    //              False   Otherwise</pre>
			<pre>    if (m_addr == testUrl.m_addr)   return false ;
    if (!m_addr &amp;&amp; testUrl.m_addr)  return true ;
    if (m_addr &amp;&amp; !testUrl.m_addr)  return false ;</pre>
			<pre>    //_url_space*   thisCtl ;       //  This URL space
    //_url_space*   suppCtl ;       //  Supplied URL space</pre>
			<pre>    //thisCtl = (_url_space*) g_ssrInet.Xlate(m_addr) ;
    //suppCtl = (_url_space*) g_ssrInet.Xlate(testUrl.m_addr) ;</pre>
			<pre>    return Whole() &lt; testUrl.Whole() ? true : false ;
}</pre>
			<pre>bool    hzUrl::operator&lt;=   (const hzUrl&amp; testUrl) const
{
    //  Test for this URL being lexically less or equal to than the supplied test URL
    //
    //  Argument:   U   The test URL
    //
    //  Returns:    True    If this hzUrl is equal to the operand hzUrl
    //              False   Otherwise</pre>
			<pre>    if (m_addr == testUrl.m_addr)   return false ;
    if (!m_addr &amp;&amp; testUrl.m_addr)  return true ;
    if (m_addr &amp;&amp; !testUrl.m_addr)  return false ;</pre>
			<pre>    //_url_space*   thisCtl ;       //  This URL space
    //_url_space*   suppCtl ;       //  Supplied URL space</pre>
			<pre>    //thisCtl = (_url_space*) g_ssrInet.Xlate(m_addr) ;
    //suppCtl = (_url_space*) g_ssrInet.Xlate(testUrl.m_addr) ;</pre>
			<pre>    return Whole() &lt;= testUrl.Whole() ? true : false ;
}</pre>
			<pre>bool    hzUrl::operator&gt;    (const hzUrl&amp; testUrl) const
{
    //  Test for this URL being lexically greater than the supplied test URL
    //
    //  Argument:   U   The test URL
    //
    //  Returns:    True    If this hzUrl is equal to the operand hzUrl
    //              False   Otherwise</pre>
			<pre>    if (m_addr == testUrl.m_addr)   return false ;
    if (!m_addr &amp;&amp; testUrl.m_addr)  return false ;
    if (m_addr &amp;&amp; !testUrl.m_addr)  return true ;</pre>
			<pre>    //_url_space*   thisCtl ;       //  This URL space
    //_url_space*   suppCtl ;       //  Supplied URL space</pre>
			<pre>    //thisCtl = (_url_space*) g_ssrInet.Xlate(m_addr) ;
    //suppCtl = (_url_space*) g_ssrInet.Xlate(testUrl.m_addr) ;</pre>
			<pre>    return Whole() &gt; testUrl.Whole() ? true : false ;
}</pre>
			<pre>bool    hzUrl::operator&gt;=   (const hzUrl&amp; testUrl) const
{
    //  Test for this URL being lexically greater than or equal the supplied test URL
    //
    //  Argument:   U   The test URL
    //
    //  Returns:    True    If this hzUrl is equal to the operand hzUrl
    //              False   Otherwise</pre>
			<pre>    if (m_addr == testUrl.m_addr)   return false ;
    if (!m_addr &amp;&amp; testUrl.m_addr)  return false ;
    if (m_addr &amp;&amp; !testUrl.m_addr)  return true ;</pre>
			<pre>    //_url_space*   thisCtl ;       //  This URL space
    //_url_space*   suppCtl ;       //  Supplied URL space</pre>
			<pre>    //thisCtl = (_url_space*) g_ssrInet.Xlate(m_addr) ;
    //suppCtl = (_url_space*) g_ssrInet.Xlate(testUrl.m_addr) ;</pre>
			<pre>    return Whole() &gt;= testUrl.Whole() ? true : false ;
}</pre>
			<pre>bool    hzUrl::operator==   (const hzString&amp; S) const
{
    //  Return true if this hzUrl is equal to the supplied string</pre>
			<pre>    if (!S &amp;&amp; !m_addr)  return true ;
    if (!S)             return false ;
    if (!m_addr)        return false ;</pre>
			<pre>    return Whole() == S ? true : false ;
}</pre>
			<pre>bool    hzUrl::operator!=   (const hzString&amp; S) const
{
    //  Return true if this hzUrl is not equal to the supplied string</pre>
			<pre>    if (!S &amp;&amp; !m_addr)  return false ;
    if (!S)             return true ;
    if (!m_addr)        return true ;</pre>
			<pre>    return Whole() == S ? false : true ;
}</pre>
			<pre>bool    hzUrl::operator==   (const char* cpStr) const
{
    //  Return true if this hzUrl is equal to the supplied character string</pre>
			<pre>    if ((!cpStr || !cpStr[0]) &amp;&amp; !m_addr)
        return true ;</pre>
			<pre>    if (!cpStr || !cpStr[0])    return false ;
    if (!m_addr)                return false ;</pre>
			<pre>    return Whole() == cpStr ? true : false ;
}</pre>
			<pre>bool    hzUrl::operator!=   (const char* cpStr) const
{
    //  Return true if this hzUrl is not equal to the supplied character string</pre>
			<pre>    if ((!cpStr || !cpStr[0]) &amp;&amp; !m_addr)
        return false ;</pre>
			<pre>    if (!cpStr || !cpStr[0])    return true ;
    if (!m_addr)                return true ;</pre>
			<pre>    return Whole() == cpStr ? false : true ;
}</pre>
			<pre>std::ostream&amp;   operator&lt;&lt;  (std::ostream&amp; os, const hzUrl&amp; obj)
{
    //  Category:   Data Output
    //
    //  Write the whole value of the hzUrl to the output stream</pre>
			<pre>    if (*obj)
        os &lt;&lt; *obj ;
    return os ;
}</pre>
			<pre>/*
**  Section 2:  Independent URL test functions
*/</pre>
			<pre>//  Tests if a string is of the form of a URL (Universal Resource Locator). To qualify, a string must at least amount to an Internet domain
//  name. This may optionally be preceeded by either a 'http://' or 'https://' sequence and may be optionally followed by a port indicator
//  and/or a forward slash. If the forward slash is present it can appear on its own or be followed by a resource specifier and may be
//  further followed by a resource qualifier (a query). Or the forward slash can be followed by the resource qualifier directly.
//
//  Dealing with each of these entities in turn:-
//  1)  An Internet domain is a string of characters from the set [a-z], [A-Z], [0-9], [_,-,.] which must contain at least one period and
//      must not start or end with any punctuation character (including the period)
//
//  2)  A port indicator specifies the IP port to connect to. It is a colon followed by the port number expressed as a decimal.
// 
//  3)  A resource specifier is one or more strings separated by a forward slash representing 'directories' off of the domain's 'root'. The
//      allowed chars are [a-z], [A-Z], [0-9], [_,-] and hash (#) if that appears in the last string.
//
//  4)  A resource qualifier is very similar to the resource specifier except that it uses a query (?) to mark the start of a search
//  parameter that will be expressed in the form (some_name=some_value).</pre>
			<pre>bool    IsUrl   (const char* url)
{
    //  Category:   Text processing
    //
    //  Determine if the supplied cstr amounts to a valid URL.
    //
    //  Arguments:  1)  url     The URL as cstr, string
    //
    //  Returns:    True    If the chain iterator is at the start of a valid URL
    //              False   Otherwise</pre>
			<pre>    _hzfunc("IsUrl") ;</pre>
			<pre>    hzUrl   U ;     //  Test URL</pre>
			<pre>    U = url ;
    return !U ? false : true ;
}</pre>
			<pre>bool    IsUrl   (hzUrl&amp; url, uint32_t&amp; nLen, chIter&amp; ci)
{
    //  Category:   Text processing
    //
    //  Determine if the supplied chain-iterator is at the start of a valid URL. Note this does not bypass leading whitespace and it
    //  allows a terminating period if this is followed by either whitespace, a non-URL character or end of file.
    //
    //  Arguments:  1)  url     A hzUrl reference; Populated by chain content if that content is of the form of a URL
    //              2)  nLen    The string length used to make the URL. This is usually needed by the calling function to advance the chain
    //                          iterator in the event that a URL is found
    //              3)  ci      The chain iterator into the content being tested.
    //
    //  Returns:    True    If the chain iterator is at the start of a valid URL
    //              False   Otherwise</pre>
			<pre>    _hzfunc("IsUrl") ;</pre>
			<pre>    hzChain     W ;                 //  For building tokens
    chIter      xi ;                //  Iterator
    hzString    S ;                 //  Token as a string
    uint32_t    nPeriod = 0 ;       //  Number of periods
    uint32_t    nPeriodCont = 0 ;   //  Number of contiguous periods
    uint32_t    nAlpha = 0 ;        //  Number of periods
    uint32_t    nPort = 0 ;         //  Port number</pre>
			<pre>    url.Clear() ;
    nLen = 0 ;
    if (ci.eof())
        return false ;</pre>
			<pre>    //  Strip leading spaces
    for (xi = ci ; !xi.eof() &amp;&amp; *xi &lt;= CHAR_SPACE ; xi++) ;</pre>
			<pre>    //  Remove http:// or https://
    if (*xi == 'h')
    {
        if (xi == "http://")
            { W &lt;&lt; "http://" ; nLen = 7 ; xi += 7 ; }</pre>
			<pre>        if (xi == "https://")
            { W &lt;&lt; "https://" ; nLen = 8 ; xi += 8 ; }
    }</pre>
			<pre>    //  Read up to the end of the domain name. This could be the end of the test string or it could be a forward slash or a colon (for
    //  the port number). This part cannot legally end with a period but it could have a period on the end if the URL was the last word
    //  in a sentence for example.</pre>
			<pre>    for (; !xi.eof() &amp;&amp; IsUrlnorm(*xi) ; xi++)
    {
        W.AddByte(*xi) ;</pre>
			<pre>        if (*xi == CHAR_PERIOD)
        {
            nPeriod++ ;
            nPeriodCont++ ;
            if (nPeriodCont == 2)
                return false ;
        }
        else
        {
            nPeriodCont = 0 ;
            nAlpha++ ;
        }
    }</pre>
			<pre>    if (nAlpha &lt; 3 || nPeriod &lt; 2)
        return false ;</pre>
			<pre>    //  Check for port number
    if (*xi == CHAR_COLON)
    {
        xi++ ;
        if (!IsDigit(*xi))
            return false ;
        for (nPort = 0 ; !xi.eof() &amp;&amp; IsDigit(*xi) ; xi++)
        {
            nPort *= 10 ; nPort += (*xi - '0') ;
        }</pre>
			<pre>        if (nPort &gt; 0x10000)
            return false ;
    }</pre>
			<pre>    //  The URL may end here with any allowed incident punctuation char or space - or it may continue with a slash
    if (*xi == CHAR_FWSLASH)
    {
        for (xi++ ; !xi.eof() &amp;&amp; IsUrlnorm(*xi) ; xi++)
        {
            if (*xi == CHAR_PERIOD)
                nPeriod++ ;
            else
                nAlpha++ ;
        }</pre>
			<pre>        if (*xi == CHAR_QUERY)
        {
            for (xi++ ; !xi.eof() &amp;&amp; IsUrlresv(*xi) ; xi++)
            {
                if (*xi == CHAR_PERCENT)
                {
                    xi++ ;
                    if (!IsHex(*xi))
                        return false ;
                    xi++ ;
                    if (!IsHex(*xi))
                        return false ;
                    nLen += 2 ;
                }
            }
        }
    }</pre>
			<pre>    if (*xi &lt;= CHAR_SPACE)
    {
        xi-- ;
        if (*xi == CHAR_PERIOD)
            xi-- ;
    }</pre>
			<pre>    //ci.GetString(S, xi) ;
    S = W ;
    url = *S ;</pre>
			<pre>    if (!url.Whole())
        return false ;</pre>
			<pre>    nLen = S.Length() ;
    return false ;
}</pre>
			<pre>hzString    hzUrl::Filename (void) const
{
    //  Convert a URL to a string suitable for a filename as used in webscraping. The following conversions occur:-
    //
    //  1)  The sequence http:// is converted to h: (but only if it occurs at the start)
    //  2)  The sequence https:// is converted to s: (but only if it occurs at the start)
    //  3)  The slash is converted to an @
    //  4)  The @ (which should not exist
    //
    //  Converts non-URL and non-filename chars into &#37;xx form.
    //
    //  Note that no assumptions can be made about the input except that it may contain chars unsuitable for filenames (eg the forward
    //  slash). The encoding must therefore be reversible.
    //
    //  This function assumes the chars a-z, A-Z, 0-9, the period and the underscore are the only valid filename chars. Any other char
    //  will be converted to a set of chars consisting of a percent sign and two hexidecimal numbers. This means that when it comes to
    //  decoding, such a set will be converted to a single char. This would be fine if we could assume that no input would ever have
    //  such a sequence but alas we cannot assume this.
    //
    //  It is nessesary therefore to convert percent chars in the input to a &#37;hh set even if they are blatently part of such a set
    //  already!
    //
    //  Arguments:  None
    //  Returns:    Instance of hzString being the URL in same filename form</pre>
			<pre>    _hzfunc("hzUrl::Filename") ;</pre>
			<pre>    hzChain         Z ;         //  Used to construct the (longer) encoded string value
    _url_space*     thisCtl ;   //  This URL space
    uchar*          i ;         //  For iteration
    hzString        S ;         //  Return string
    uint32_t        val ;       //  For casting
    char            buf [4] ;   //  Fox hex-conversion</pre>
			<pre>    if (!m_addr)
        return S ;
    thisCtl = (_url_space*) g_ssrInet.Xlate(m_addr) ;</pre>
			<pre>    i = (uchar*) thisCtl-&gt;m_data ;
    if (!memcmp(i, "http", 4))
    {
        if (!memcmp(i + 4, "://", 3))
            { i += 7 ; Z &lt;&lt; "h:" ; }
        if (!memcmp(i + 4, "s://", 4))
            { i += 8 ; Z &lt;&lt; "s:" ; }
    }</pre>
			<pre>    //  Count chars that are to be converted as these will occupy 3 chars in the new string
    for (; *i ; i++)
    {
        if (*i &gt;= 'A' &amp;&amp; *i &lt;= 'Z')
            { Z.AddByte(conv2lower(*i)) ; continue ; }</pre>
			<pre>        if (*i &gt;= 'a' &amp;&amp; *i &lt;= 'z') { Z.AddByte(*i) ; continue ; }
        if (*i &gt;= '0' &amp;&amp; *i &lt;= '9') { Z.AddByte(*i) ; continue ; }</pre>
			<pre>        if (*i == CHAR_FWSLASH)     { Z.AddByte(CHAR_AT) ; continue ; }
        if (*i == CHAR_AMPSAND)     { Z.AddByte(CHAR_COLON) ; continue ; }</pre>
			<pre>        if (*i == CHAR_USCORE || *i == CHAR_PERIOD || *i == CHAR_PERCENT || *i == CHAR_EQUAL || *i == CHAR_QUERY || *i == CHAR_PLUS ||
            *i == CHAR_MINUS)
        {
            Z.AddByte(*i) ;
            continue ;
        }</pre>
			<pre>        Z.AddByte(CHAR_PERCENT) ;
        val = (uchar) *i ;
        sprintf(buf, "&#37;02x", val) ;
        Z.AddByte(buf[0]) ;
        Z.AddByte(buf[1]) ;
    }</pre>
			<pre>    S = Z ;
    return S ;
}</pre>