Get a HTTP page from a website. Note that the whole page is retrieved or abandoned before this function returns. Some servers send pages with the header 'Transfer-Encoding: chunked' instead of the 'Content-Length:' header. This is done because the size of the page is not known at the start of transmission. The body part of the message is sent in chunks with the chunk size given (in hex on a line by itself) at the start of each chunk. Because of the existance of the chunked approach, this function has to handle it but it is currently not possible for applications to take advantage in the intended way. Instead applications calling this function have to wait until it returns with a complete page, however long! Note that no assumptions can be made about packets that are sent except that since the connection is TCP, they will be in order. The header may be comprised of a number of whole packets or it may be that a packet stradles the end of the header and the start of the contents.
| Return Type | Function name | Arguments |
|---|---|---|
| hzEcode | hzHttpClient::GetPage | (HttpRC&,hzUrl&,hzString&,) |
Declared in file: hzHttpClient.h
Defined in file : hzHttpClient.cpp
Function Logic:
Function body:
hzEcode hzHttpClient::GetPage (HttpRC& hRet)hzUrl& url, hzString& etag,
{
// Get a HTTP page from a website. Note that the whole page is retrieved or abandoned before this function returns. Some servers send pages with
// the header 'Transfer-Encoding: chunked' instead of the 'Content-Length:' header. This is done because the size of the page is not known at the
// start of transmission. The body part of the message is sent in chunks with the chunk size given (in hex on a line by itself) at the start of
// each chunk. Because of the existance of the chunked approach, this function has to handle it but it is currently not possible for applications
// to take advantage in the intended way. Instead applications calling this function have to wait until it returns with a complete page, however
// long!
//
// Note that no assumptions can be made about packets that are sent except that since the connection is TCP, they will be in order. The header
// may be comprised of a number of whole packets or it may be that a packet stradles the end of the header and the start of the contents.
//
// Arguments: 1) hRet HTTP return code from the server.
// 2) url The URL of the page to retrieve.
// 3) etag Page entity tag (as maintained by hzWebhost instance)
//
// Returns: E_ARGUMENT If the URL is not supplied or no domain specified
// E_NOSOCKET If the external server has closed the connection
// E_NODATA If nothing was recived
// E_FORMAT If the response was malformed
// E_OK If the response was recieved without error
_hzfunc("hzHttpClient::GetPage") ;
hzUrl dest ; // Actual URL for downloading - may be result of a redirection
hzString dom ; // This is set first to the called URL's domain but afterwards to any redirected domain
hzString etag2 ; // Set as null for the benefit of _getpage() in the case of redirection
hzEcode rc = E_OK ; // Return code
// Considered a top-level function so we clear the error chain
m_Error.Clear() ;
m_Error.Printf("GETTING PAGE %s\n", *url) ;
dest = url ;
m_rtRequest = RealtimeNano() ;
rc = _getpage(hRet, dest, etag) ;
m_rtResponse = RealtimeNano() ;
if (rc != E_OK)
{
m_Error.Printf("ABORTED (_getpage failure)\n") ;
return rc ;
}
for (; hRet == HTTPMSG_REDIRECT_PERM || hRet == HTTPMSG_REDIRECT_TEMP ;)
{
// Clear() ;
if (!m_Redirect)
m_Error.Printf("Oops - no URL to redirect to\n") ;
else
{
if (m_Redirect[0]== CHAR_FWSLASH)
{ dom = dest.Domain() ; dest.SetValue(dom, m_Redirect) ; }
else
dest = m_Redirect ;
m_Error.Printf("redirecting to %s\n", *dest) ;
rc = _getpage(hRet, dest, etag2) ;
if (rc != E_OK)
{
m_Error.Printf("Redirect FAILED (error=%s)\n", Err2Txt(rc)) ;
return rc ;
}
}
}
// Obtain document type. If HTML then also get links
m_Error.Printf("Got response %d (size %d bytes)\n", hRet, m_Content.Size()) ;
return rc ;
}