Return TypeFunction nameArguments
hzDocument*hzWebhost::Download(const hzUrl&,)

Declared in file: hzHttpClient.h
Defined in file : hzHttpClient.cpp

Function Logic:

0:START 1:!url 2:Return 0 3:hzXDate::SysDateTime 4:!(m_Opflags&WEBFLG_FORCE) 5:m_mapHist.Exists(url) 6:pMark bHist 7:pMark->m_Doctype==DOCTYPE_HTML 8:pHdoc pDoc 9:pMark->m_Doctype==DOCTYPE_XML 10:pXdoc pDoc 11:pHdoc pDoc 12:hzDocument::SetMeta hzXDate::IsSet 13:pMark->m_Expires.IsSet() 14:pMark->m_Expires 15:pMark->m_Doctype==DOCTYPE_XML 16:pXdoc pDoc hzDocument::SetMeta hzDocument::Load rc 17:pHdoc pDoc hzDocument::SetMeta hzDocument::Load rc 18:Err2Txt 19:Return pDoc 20:!HC.m_Content.Size() 21:Return 0 22:hzDocument::Load rc 23:rc!=E_OK 24:Err2Txt 25:Return pDoc 26:hzUrl::Filename S pMark pMark pMark hzMapS::Count pMark sprintf pMark hzHttpClient::GetPage rc 27:rc!=E_OK 28:Err2Txt 29:Return 0 30:HC.m_Redirect 31:pMark 32:pMark 33:m_Repos 34:ofstream::open ofstream::fail 35:os.fail() 36:items close 37:ofstream::clear 38:hzChain::Size hzChain::Size 39:!HC.m_Content.Size() 40:Return 0 41:DeriveDoctype pMark rc 42:pMark->m_Doctype==DOCTYPE_XML 43:pXdoc pDoc hzDocument::Init hzDocXml::Load rc 44:pHdoc pDoc hzDocument::Init hzDocHtml::Load rc 45:rc!=E_OK 46:Err2Txt 47:rc!=E_OK 48:Err2Txt 49:hzDocument::SetMeta hzMapS::Insert 50:pMark->m_urlAct!=pMark->m_urlReq 51:hzMapS::Insert 52:!bHist 53:hzVect::Add 54:pXdoc 55:hzMapS::Count 56:pHdoc 57:hzVect::Count hzMapS::Count 58:Return pDoc

Function body:

hzDocument* hzWebhost::Download (const hzUrl& url)
{
   _hzfunc("hzWebhost::Download") ;
   static uint32_t nlast = 0;
   ofstream    os ;
   hzDocument* pDoc = 0;
   hzDocXml*   pXdoc = 0;
   hzDocHtml*  pHdoc = 0;
   hzDocMeta*  pMark ;
   hzXDate     now ;
   hzString    S ;
   HttpRC      hc ;
   hzEcode     rc ;
   bool        bHist = false ;
   char        numbuf [8];
   /*
   **  ** Check URL, insert in visited links if not already there
   **      */
   if (!url)
       { threadLog("No supplied address\n") ; return 0; }
   threadLog("FETCHING PAGE: %s\n", *url) ;
   now.SysDateTime() ;
   if (!(m_Opflags & WEBFLG_FORCE))
   {
       if (m_mapHist.Exists(url))
       {
           pMark = m_mapHist[url] ;
           bHist = true ;
           threadLog("Page %s is historic\n", *url) ;
           if (pMark->m_Doctype == DOCTYPE_HTML)
               pDoc = pHdoc = new hzDocHtml() ;
           else if (pMark->m_Doctype == DOCTYPE_XML)
               pDoc = pXdoc = new hzDocXml() ;
           else
               pDoc = pHdoc = new hzDocHtml() ;
           pDoc->SetMeta(*pMark) ;
           if (pMark->m_Expires.IsSet())
           {
               if (pMark->m_Expires < now)
               {
                   if (pMark->m_Doctype == DOCTYPE_XML)
                   {
                       pDoc = pXdoc = new hzDocXml() ;
                       pDoc->SetMeta(*pMark) ;
                       rc = pDoc->Load(HC.m_Content) ;
                   }
                   else
                   {
                       pDoc = pHdoc = new hzDocHtml() ;
                       pDoc->SetMeta(*pMark) ;
                       rc = pDoc->Load(HC.m_Content) ;
                   }
                   threadLog("DOWNLOAD PREVIOUS (error=%s)\n\n", Err2Txt(rc)) ;
                   return pDoc ;
               }
           }
           if (!HC.m_Content.Size())
           {
               threadLog("Case 1 Bloody thing is empty!\n") ;
               return 0;
           }
           rc = pDoc->Load(HC.m_Content) ;
           if (rc != E_OK)
               threadLog("LOAD failed (error=%s)\n\n", Err2Txt(rc)) ;
           return pDoc ;
       }
   }
   S = url.Filename() ;
   pMark = new hzDocMeta() ;
   pMark->m_urlReq = url ;
   pMark->m_urlAct = url ;
   pMark->m_Id = m_mapHist.Count() ;
   sprintf(numbuf, "/%04d", pMark->m_Id) ;
   pMark->m_Filename = m_Repos + numbuf + S ;
   /*
   **  ** Get page content and process it into a tree
   **      */
   threadLog("GETTIG PAGE: %s\n", *url) ;
   rc = HC.GetPage(hc, url, pMark->m_Etag) ;
   if (rc != E_OK)
   {
       threadLog("FAILED (error=%s) synopsis\n", Err2Txt(rc)) ;
       threadLog(HC.m_Error) ;
       return 0;
   }
   if (HC.m_Redirect)
       pMark->m_urlAct = HC.m_Redirect ;
   pMark->m_Modified = HC.m_Modified ;
   threadLog("HTTP Return code = %d, cookie (value %s, path %s)\n", (uint32_t) hc, *m_CookieSess, *m_CookiePath) ;
   /*
   **  ** Write out header to .hdr file and content to .con file
   **      */
   if (m_Repos)
   {
       os.open(*pMark->m_Filename) ;
       if (os.fail())
           threadLog("Cannot write out header file %s\n", *pMark->m_Filename) ;
       else
       {
           os << HC.m_Content ;
           os.close() ;
       }
       os.clear() ;
   }
   /*
   **  ** Add the page but only process pages that are of a known HTML type .htm, .html, .shtml, .xhtml etc
   **      */
   threadLog("PROCESSING Content: %d bytes\n", HC.m_Content.Size()) ;
   if (!HC.m_Content.Size())
   {
       threadLog("Case 2 Bloody thing is empty!\n") ;
       return 0;
   }
   pMark->m_Doctype = DeriveDoctype(HC.m_Content) ;
   rc = E_NODATA ;
   if (pMark->m_Doctype == DOCTYPE_XML)
   {
       pDoc = pXdoc = new hzDocXml() ;
       pXdoc->Init(url) ;
       rc = pXdoc->Load(HC.m_Content) ;
   }
   else
   {
       pDoc = pHdoc = new hzDocHtml() ;
       pHdoc->Init(url) ;
       rc = pHdoc->Load(HC.m_Content) ;
       if (rc != E_OK)
       threadLog("Case 2 Bloody thing failed (error=%s)!\n", Err2Txt(rc)) ;
   }
   if (rc != E_OK)
   {
       threadLog("Load page failed error=%s\n", Err2Txt(rc)) ;
   }
   pDoc->SetMeta(*pMark) ;
   m_mapHist.Insert(pMark->m_urlReq, pMark) ;
   threadLog("Inserted URL %s\n", *pMark->m_urlReq) ;
   if (pMark->m_urlAct != pMark->m_urlReq)
   {
       m_mapHist.Insert(pMark->m_urlAct, pMark) ;
       threadLog("Inserted URL %s\n", *pMark->m_urlAct) ;
   }
   if (!bHist)
       m_vecHist.Add(pMark) ;
   if (pXdoc)
       threadLog("DOWNLOAD SUCCESS XML Page %s. Now have %d (%d) items in history\n\n", *url, m_mapHist.Count(), nlast) ;
   if (pHdoc)
       threadLog("DOWNLOAD SUCCESS Page %s has %d links. Now have %d (%d) items in history\n\n", *url, pHdoc->m_vecLinks.Count(), m_mapHist.Count(), nlast) ;
   threadLog(HC.m_Error) ;
   return pDoc ;
}