Fetch the page found at the supplied URL and return as a document (either XML or HTML). Note that if the page has already been downloaded (is in the site's history) then it is only downloaded again if it the time to live has expired. If the page is not downloaded then this function will reload it from file. Pointer to newly allocated document. Must be deleted after use.
Return Type	Function name	Arguments
hzDocument*	hzWebhost::Download	(hzUrl&,)
Declared in file: hzHttpClient.h
Defined in file : hzHttpClient.cpp
Function Logic:
Function body:
hzDocument* hzWebhost::Download (hzUrl& url)
{
   //  Fetch the page found at the supplied URL and return as a document (either XML or HTML).
   //  
   //  Note that if the page has already been downloaded (is in the site's history) then it is only downloaded again if it the time to
   //  live has expired. If the page is not downloaded then this function will reload it from file.
   //  
   //  Arguments: 1) url  The URL of the file/resource to download
   //  
   //  Returns: Pointer to newly allocated document. Must be deleted after use.
   _hzfunc("hzWebhost::Download") ;
   static uint32_t nlast = 0;      //  Last point reached (for download rsumption)
   ofstream    os ;                //  To write out page contents
   hzDocument* pDoc = 0;           //  Document downloaded
   hzDocXml*   pXdoc = 0;          //  XML Document downloaded
   hzDocHtml*  pHdoc = 0;          //  HTML Document downloaded
   hzDocMeta*  pMark ;             //  Document meta data
   hzXDate     now ;               //  Date & Time now
   hzString    S ;                 //  Temp string
   HttpRC      hc ;                //  HTTP server return code
   hzEcode     rc ;                //  Return code
   bool        bHist = false ;     //  Set if url is already in history and downloaded again because of being out of date
   char        numbuf [8];     //  Working buffer
   /*
   **  ** Check URL, insert in visited links if not already there
   **      */
   if (!url)
       { threadLog("No supplied address\n") ; return 0; }
   threadLog("FETCHING PAGE: %s\n", *url) ;
   now.SysDateTime() ;
   if (!(m_Opflags & WEBFLG_FORCE))
   {
       if (m_mapHist.Exists(url))
       {
           //  The requested URL exists in the repository already. We check if it has expired and if not we terminate with OK
           pMark = m_mapHist[url] ;
           bHist = true ;
           threadLog("Page %s is historic\n", *url) ;
           //  Create a document of the right type (XML or HTML)
           if (pMark->m_Doctype == DOCTYPE_HTML)
               pDoc = pHdoc = new hzDocHtml() ;
           else if (pMark->m_Doctype == DOCTYPE_XML)
               pDoc = pXdoc = new hzDocXml() ;
           else
               pDoc = pHdoc = new hzDocHtml() ;
           pDoc->SetMeta(*pMark) ;
           //  Check if expiry is known and if so if it has expired
           if (pMark->m_Expires.IsSet())
           {
               if (pMark->m_Expires < now)
               {
                   //  Set the markers and return
                   if (pMark->m_Doctype == DOCTYPE_XML)
                   {
                       //  XML
                       pDoc = pXdoc = new hzDocXml() ;
                       pDoc->SetMeta(*pMark) ;
                       rc = pDoc->Load(HC.m_Content) ;
                   }
                   else
                   {
                       //  HTML
                       pDoc = pHdoc = new hzDocHtml() ;
                       pDoc->SetMeta(*pMark) ;
                       rc = pDoc->Load(HC.m_Content) ;
                   }
                   threadLog("DOWNLOAD PREVIOUS (error=%s)\n\n", Err2Txt(rc)) ;
                   return pDoc ;
               }
           }
           //  At this point either the expiry date is unknown or it is known and has expired. Load from file
           if (!HC.m_Content.Size())
           {
               threadLog("Case 1 Bloody thing is empty!\n") ;
               return 0;
           }
           rc = pDoc->Load(HC.m_Content) ;
           if (rc != E_OK)
               threadLog("LOAD failed (error=%s)\n\n", Err2Txt(rc)) ;
           return pDoc ;
       }
   }
   //  The requested URL is not in the history. Create the document meta for it and download it.
   S = url.Filename() ;
   pMark = new hzDocMeta() ;
   pMark->m_urlReq = url ;
   pMark->m_urlAct = url ;
   pMark->m_Id = m_mapHist.Count() ;
   sprintf(numbuf, "/%04d", pMark->m_Id) ;
   pMark->m_Filename = m_Repos + numbuf + S ;
   /*
   **  ** Get page content and process it into a tree
   **      */
   threadLog("GETTIG PAGE: %s\n", *url) ;
   rc = HC.GetPage(hc, url, pMark->m_Etag) ;
   if (rc != E_OK)
   {
       threadLog("FAILED (error=%s) synopsis\n", Err2Txt(rc)) ;
       threadLog(HC.m_Error) ;
       return 0;
   }
   if (HC.m_Redirect)
       pMark->m_urlAct = HC.m_Redirect ;
   pMark->m_Modified = HC.m_Modified ;
   threadLog("HTTP Return code = %d, cookie (value %s, path %s)\n", (uint32_t) hc, *m_CookieSess, *m_CookiePath) ;
   /*
   **  ** Write out header to .hdr file and content to .con file
   **      */
   if (m_Repos)
   {
       os.open(*pMark->m_Filename) ;
       if (os.fail())
           threadLog("Cannot write out header file %s\n", *pMark->m_Filename) ;
       else
       {
           os << HC.m_Content ;
           os.close() ;
       }
       os.clear() ;
   }
   /*
   **  ** Add the page but only process pages that are of a known HTML type .htm, .html, .shtml, .xhtml etc
   **      */
   threadLog("PROCESSING Content: %d bytes\n", HC.m_Content.Size()) ;
   if (!HC.m_Content.Size())
   {
       threadLog("Case 2 Bloody thing is empty!\n") ;
       return 0;
   }
   pMark->m_Doctype = DeriveDoctype(HC.m_Content) ;
   rc = E_NODATA ;
   if (pMark->m_Doctype == DOCTYPE_XML)
   {
       //  XML
       pDoc = pXdoc = new hzDocXml() ;
       pXdoc->Init(url) ;
       rc = pXdoc->Load(HC.m_Content) ;
   }
   else
   {
       //  HTML
       pDoc = pHdoc = new hzDocHtml() ;
       pHdoc->Init(url) ;
       rc = pHdoc->Load(HC.m_Content) ;
       if (rc != E_OK)
       threadLog("Case 2 Bloody thing failed (error=%s)!\n", Err2Txt(rc)) ;
   }
   if (rc != E_OK)
   {
       threadLog("Load page failed error=%s\n", Err2Txt(rc)) ;
       //  delete pDoc ;
       //  return 0 ;
   }
   pDoc->SetMeta(*pMark) ;
   //  Place the URL in the site's history
   m_mapHist.Insert(pMark->m_urlReq, pMark) ;
   threadLog("Inserted URL %s\n", *pMark->m_urlReq) ;
   if (pMark->m_urlAct != pMark->m_urlReq)
   {
       m_mapHist.Insert(pMark->m_urlAct, pMark) ;
       threadLog("Inserted URL %s\n", *pMark->m_urlAct) ;
   }
   if (!bHist)
       m_vecHist.Add(pMark) ;
   if (pXdoc)
       threadLog("DOWNLOAD SUCCESS XML Page %s. Now have %d (%d) items in history\n\n", *url, m_mapHist.Count(), nlast) ;
   if (pHdoc)
       threadLog("DOWNLOAD SUCCESS Page %s has %d links. Now have %d (%d) items in history\n\n", *url, pHdoc->m_vecLinks.Count(), m_mapHist.Count(), nlast) ;
   threadLog(HC.m_Error) ;
   return pDoc ;
}