Get a HTTP page from a website. Note that the whole page is retrieved or abandoned before this function returns. Some servers send pages with the header 'Transfer-Encoding: chunked' instead of the 'Content-Length:' header. This is done because the size of the page is not known at the start of transmission. The body part of the message is sent in chunks with the chunk size given (in hex on a line by itself) at the start of each chunk. Because of the existance of the chunked approach, this function has to handle it but it is currently not possible for applications to take advantage in the intended way. Instead applications calling this function have to wait until it returns with a complete page, however long! Note that no assumptions can be made about packets that are sent except that since the connection is TCP, they will be in order. The header may be comprised of a number of whole packets or it may be that a packet stradles the end of the header and the start of the contents.

Return TypeFunction nameArguments
hzEcodehzHttpClient::GetPage(HttpRC&,hzUrl&,hzString&,)

Declared in file: hzHttpClient.h
Defined in file : hzHttpClient.cpp

Function Logic:

0:START 1:items items dest m_rtRequest rc m_rtResponse 2:unknown 3:items 4:Return rc 5:unknown 6:unknown 7:items 8:unknown 9:dom items 10:dest 11:items rc 12:unknown 13:items 14:Return rc 15:items 16:Return rc

Function body:

hzEcode hzHttpClient::GetPage (HttpRC& hRet)hzUrl& url, hzString& etag, 
{
   //  Get a HTTP page from a website. Note that the whole page is retrieved or abandoned before this function returns. Some servers send pages with
   //  the header 'Transfer-Encoding: chunked' instead of the 'Content-Length:' header. This is done because the size of the page is not known at the
   //  start of transmission. The body part of the message is sent in chunks with the chunk size given (in hex on a line by itself) at the start of
   //  each chunk. Because of the existance of the chunked approach, this function has to handle it but it is currently not possible for applications
   //  to take advantage in the intended way. Instead applications calling this function have to wait until it returns with a complete page, however
   //  long!
   //  
   //  Note that no assumptions can be made about packets that are sent except that since the connection is TCP, they will be in order. The header
   //  may be comprised of a number of whole packets or it may be that a packet stradles the end of the header and the start of the contents.
   //  
   //  Arguments: 1) hRet  HTTP return code from the server.
   //     2) url  The URL of the page to retrieve.
   //     3) etag  Page entity tag (as maintained by hzWebhost instance)
   //  
   //  Returns: E_ARGUMENT If the URL is not supplied or no domain specified
   //     E_NOSOCKET If the external server has closed the connection
   //     E_NODATA If nothing was recived
   //     E_FORMAT If the response was malformed
   //     E_OK  If the response was recieved without error
   _hzfunc("hzHttpClient::GetPage") ;
   hzUrl       dest ;          //  Actual URL for downloading - may be result of a redirection
   hzString    dom ;           //  This is set first to the called URL's domain but afterwards to any redirected domain
   hzString    etag2 ;         //  Set as null for the benefit of _getpage() in the case of redirection
   hzEcode     rc = E_OK ;     //  Return code
   //  Considered a top-level function so we clear the error chain
   m_Error.Clear() ;
   m_Error.Printf("GETTING PAGE %s\n", *url) ;
   dest = url ;
   m_rtRequest = RealtimeNano() ;
   rc = _getpage(hRet, dest, etag) ;
   m_rtResponse = RealtimeNano() ;
   if (rc != E_OK)
   {
       m_Error.Printf("ABORTED (_getpage failure)\n") ;
       return rc ;
   }
   for (; hRet == HTTPMSG_REDIRECT_PERM || hRet == HTTPMSG_REDIRECT_TEMP ;)
   {
       //  Clear() ;
       if (!m_Redirect)
           m_Error.Printf("Oops - no URL to redirect to\n") ;
       else
       {
           if (m_Redirect[0]== CHAR_FWSLASH)
               { dom = dest.Domain() ; dest.SetValue(dom, m_Redirect) ; }
           else
               dest = m_Redirect ;
           m_Error.Printf("redirecting to %s\n", *dest) ;
           rc = _getpage(hRet, dest, etag2) ;
           if (rc != E_OK)
           {
               m_Error.Printf("Redirect FAILED (error=%s)\n", Err2Txt(rc)) ;
               return rc ;
           }
       }
   }
   //  Obtain document type. If HTML then also get links
   m_Error.Printf("Got response %d (size %d bytes)\n", hRet, m_Content.Size()) ;
   return rc ;
}