| Return Type | Function name | Arguments |
|---|---|---|
| hzDocument* | hzWebhost::Download | (const hzUrl&,) |
Declared in file: hzHttpClient.h
Defined in file : hzHttpClient.cpp
Function Logic:
Function body:
hzDocument* hzWebhost::Download (const hzUrl& url)
{
_hzfunc("hzWebhost::Download") ;
static uint32_t nlast = 0;
ofstream os ;
hzDocument* pDoc = 0;
hzDocXml* pXdoc = 0;
hzDocHtml* pHdoc = 0;
hzDocMeta* pMark ;
hzXDate now ;
hzString S ;
HttpRC hc ;
hzEcode rc ;
bool bHist = false ;
char numbuf [8];
/*
** ** Check URL, insert in visited links if not already there
** */
if (!url)
{ threadLog("No supplied address\n") ; return 0; }
threadLog("FETCHING PAGE: %s\n", *url) ;
now.SysDateTime() ;
if (!(m_Opflags & WEBFLG_FORCE))
{
if (m_mapHist.Exists(url))
{
pMark = m_mapHist[url] ;
bHist = true ;
threadLog("Page %s is historic\n", *url) ;
if (pMark->m_Doctype == DOCTYPE_HTML)
pDoc = pHdoc = new hzDocHtml() ;
else if (pMark->m_Doctype == DOCTYPE_XML)
pDoc = pXdoc = new hzDocXml() ;
else
pDoc = pHdoc = new hzDocHtml() ;
pDoc->SetMeta(*pMark) ;
if (pMark->m_Expires.IsSet())
{
if (pMark->m_Expires < now)
{
if (pMark->m_Doctype == DOCTYPE_XML)
{
pDoc = pXdoc = new hzDocXml() ;
pDoc->SetMeta(*pMark) ;
rc = pDoc->Load(HC.m_Content) ;
}
else
{
pDoc = pHdoc = new hzDocHtml() ;
pDoc->SetMeta(*pMark) ;
rc = pDoc->Load(HC.m_Content) ;
}
threadLog("DOWNLOAD PREVIOUS (error=%s)\n\n", Err2Txt(rc)) ;
return pDoc ;
}
}
if (!HC.m_Content.Size())
{
threadLog("Case 1 Bloody thing is empty!\n") ;
return 0;
}
rc = pDoc->Load(HC.m_Content) ;
if (rc != E_OK)
threadLog("LOAD failed (error=%s)\n\n", Err2Txt(rc)) ;
return pDoc ;
}
}
S = url.Filename() ;
pMark = new hzDocMeta() ;
pMark->m_urlReq = url ;
pMark->m_urlAct = url ;
pMark->m_Id = m_mapHist.Count() ;
sprintf(numbuf, "/%04d", pMark->m_Id) ;
pMark->m_Filename = m_Repos + numbuf + S ;
/*
** ** Get page content and process it into a tree
** */
threadLog("GETTIG PAGE: %s\n", *url) ;
rc = HC.GetPage(hc, url, pMark->m_Etag) ;
if (rc != E_OK)
{
threadLog("FAILED (error=%s) synopsis\n", Err2Txt(rc)) ;
threadLog(HC.m_Error) ;
return 0;
}
if (HC.m_Redirect)
pMark->m_urlAct = HC.m_Redirect ;
pMark->m_Modified = HC.m_Modified ;
threadLog("HTTP Return code = %d, cookie (value %s, path %s)\n", (uint32_t) hc, *m_CookieSess, *m_CookiePath) ;
/*
** ** Write out header to .hdr file and content to .con file
** */
if (m_Repos)
{
os.open(*pMark->m_Filename) ;
if (os.fail())
threadLog("Cannot write out header file %s\n", *pMark->m_Filename) ;
else
{
os << HC.m_Content ;
os.close() ;
}
os.clear() ;
}
/*
** ** Add the page but only process pages that are of a known HTML type .htm, .html, .shtml, .xhtml etc
** */
threadLog("PROCESSING Content: %d bytes\n", HC.m_Content.Size()) ;
if (!HC.m_Content.Size())
{
threadLog("Case 2 Bloody thing is empty!\n") ;
return 0;
}
pMark->m_Doctype = DeriveDoctype(HC.m_Content) ;
rc = E_NODATA ;
if (pMark->m_Doctype == DOCTYPE_XML)
{
pDoc = pXdoc = new hzDocXml() ;
pXdoc->Init(url) ;
rc = pXdoc->Load(HC.m_Content) ;
}
else
{
pDoc = pHdoc = new hzDocHtml() ;
pHdoc->Init(url) ;
rc = pHdoc->Load(HC.m_Content) ;
if (rc != E_OK)
threadLog("Case 2 Bloody thing failed (error=%s)!\n", Err2Txt(rc)) ;
}
if (rc != E_OK)
{
threadLog("Load page failed error=%s\n", Err2Txt(rc)) ;
}
pDoc->SetMeta(*pMark) ;
m_mapHist.Insert(pMark->m_urlReq, pMark) ;
threadLog("Inserted URL %s\n", *pMark->m_urlReq) ;
if (pMark->m_urlAct != pMark->m_urlReq)
{
m_mapHist.Insert(pMark->m_urlAct, pMark) ;
threadLog("Inserted URL %s\n", *pMark->m_urlAct) ;
}
if (!bHist)
m_vecHist.Add(pMark) ;
if (pXdoc)
threadLog("DOWNLOAD SUCCESS XML Page %s. Now have %d (%d) items in history\n\n", *url, m_mapHist.Count(), nlast) ;
if (pHdoc)
threadLog("DOWNLOAD SUCCESS Page %s has %d links. Now have %d (%d) items in history\n\n", *url, pHdoc->m_vecLinks.Count(), m_mapHist.Count(), nlast) ;
threadLog(HC.m_Error) ;
return pDoc ;
}