In general a website can be thought of as a source of 'rolling' news updates in which old pages are deleted, new pages created and existing pages can be modified on an ad-hoc basis. The RSS feeds allow greter ease when syncing an external website to the local machine. By periodically reading one or more RSS feeds one can obtain a set of links which can generally be taken as the set of pages deemed 'current' by the website. By comparing these links to a history file of already fetched links, new pages can be added to a respository as they appear on the site. The RSS feeds are just XML files containing links. This function will obtain all the RSS feeds from the site, garner all the links from them and then download any pages from the links that are not already in the site history. The feeds themselves are not saved as these will be fetched again. Arguments: None

Return TypeFunction nameArguments
hzEcodehzWebhost::GetRSS(void)

Declared in file: hzHttpClient.h
Defined in file : hzHttpClient.cpp

Function Logic:

0:START 1:items rc 2:unknown 3:items 4:Return rc 5:unknown 6:items 7:Return E_NOINIT 8:unknown 9:m_tagItem m_tagItem m_tagItem 10:unknown 11:m_tagUqid m_tagUqid m_tagUqid 12:unknown 13:m_tagLink m_tagLink m_tagLink 14:unknown 15:m_tagDesc m_tagDesc m_tagDesc 16:unknown 17:m_tagDate m_tagDate m_tagDate 18:unknown 19:feed rc items 20:rc 21:Return rc

Function body:

hzEcode hzWebhost::GetRSS (void)
{
   //  In general a website can be thought of as a source of 'rolling' news updates in which old pages are deleted, new pages created
   //  and existing pages can be modified on an ad-hoc basis. The RSS feeds allow greter ease when syncing an external website to the
   //  local machine. By periodically reading one or more RSS feeds one can obtain a set of links which can generally be taken as the
   //  set of pages deemed 'current' by the website. By comparing these links to a history file of already fetched links, new pages
   //  can be added to a respository as they appear on the site. The RSS feeds are just XML files containing links.
   //  
   //  This function will obtain all the RSS feeds from the site, garner all the links from them and then download any pages from the
   //  links that are not already in the site history. The feeds themselves are not saved as these will be fetched again.
   //  
   //  Arguments: None
   //  
   //  Returns: E_NOINIT If the repository for the webhost has not previously been defined
   //     E_OPENFAIL If the visit status file could not be opened
   //     E_NODATA If the download failed
   //     E_TYPE  If the downloaded material does not appear to be XML
   //     E_FORMAT If the downloaded material could not be loaded into an XML document
   //     E_ARGUMENT If the RSS tags are not defined
   //     E_NOTFOUND If no tags were found in the RSS
   //     E_OK  If the RSS data was collected
   _hzfunc("hzWebhost::GetRSS") ;
   hzList<hzUrl>::Iter fi ;        //  RSS feeds iterator
   hzUrl       feed ;              //  Temp link
   HttpRC      hRet ;              //  HTML return code
   hzEcode     rc = E_OK ;         //  Return code
   threadLog("Called\n") ;
   //  Login
   rc = Login() ;
   if (rc != E_OK)
       { threadLog("Login failed\n") ; return rc ; }
   //  Get the home page if one applies. Do this regardless of weather we already have it because we need the cookie
   if (!m_Feeds.Count())
       { threadLog("Website has no starting point (URL) for an RSS feed.\n") ; return E_NOINIT ; }
   //  If XML selectors for RSS feed are not initialized, set them here
   if (!m_tagItem.m_Slct)  { m_tagItem.m_Filt = (char*) 0; m_tagItem.m_Info = "node" ; m_tagItem.m_Slct = "item" ; }
   if (!m_tagUqid.m_Slct)  { m_tagUqid.m_Filt = (char*) 0; m_tagUqid.m_Info = "node" ; m_tagUqid.m_Slct = "guid" ; }
   if (!m_tagLink.m_Slct)  { m_tagLink.m_Filt = (char*) 0; m_tagLink.m_Info = "node" ; m_tagLink.m_Slct = "link" ; }
   if (!m_tagDesc.m_Slct)  { m_tagDesc.m_Filt = (char*) 0; m_tagDesc.m_Info = "node" ; m_tagDesc.m_Slct = "description" ; }
   if (!m_tagDate.m_Slct)  { m_tagDate.m_Filt = (char*) 0; m_tagDate.m_Info = "node" ; m_tagDate.m_Slct = "pubDate" ; }
   /*
   **  ** Fetch all the feed XML documents from the RSS source(s)
   **      */
   for (fi = m_Feeds ; fi.Valid() ; fi++)
   {
       feed = fi.Element() ;
       //  Get the feed
       rc = getRss_r(hRet, feed, 0);
       threadLog("Processed items\n") ;
   }
   //  Write out visit status file
   rc = _savestatus() ;
   return rc ;
}