In general a website can be thought of as a source of 'rolling' news updates in which old pages are deleted, new pages created and existing pages can be modified on an ad-hoc basis. The RSS feeds allow greter ease when syncing an external website to the local machine. By periodically reading one or more RSS feeds one can obtain a set of links which can generally be taken as the set of pages deemed 'current' by the website. By comparing these links to a history file of already fetched links, new pages can be added to a respository as they appear on the site. The RSS feeds are just XML files containing links. This function will obtain all the RSS feeds from the site, garner all the links from them and then download any pages from the links that are not already in the site history. The feeds themselves are not saved as these will be fetched again. Arguments: None

Return Type	Function name	Arguments
hzEcode	hzWebhost::GetRSS	(void)

Declared in file: hzHttpClient.h
Defined in file : hzHttpClient.cpp

Function Logic:

Function body:

hzEcode hzWebhost::GetRSS (void)
{
   //  In general a website can be thought of as a source of 'rolling' news updates in which old pages are deleted, new pages created
   //  and existing pages can be modified on an ad-hoc basis. The RSS feeds allow greter ease when syncing an external website to the
   //  local machine. By periodically reading one or more RSS feeds one can obtain a set of links which can generally be taken as the
   //  set of pages deemed 'current' by the website. By comparing these links to a history file of already fetched links, new pages
   //  can be added to a respository as they appear on the site. The RSS feeds are just XML files containing links.
   //  
   //  This function will obtain all the RSS feeds from the site, garner all the links from them and then download any pages from the
   //  links that are not already in the site history. The feeds themselves are not saved as these will be fetched again.
   //  
   //  Arguments: None
   //  
   //  Returns: E_NOINIT If the repository for the webhost has not previously been defined
   //     E_OPENFAIL If the visit status file could not be opened
   //     E_NODATA If the download failed
   //     E_TYPE  If the downloaded material does not appear to be XML
   //     E_FORMAT If the downloaded material could not be loaded into an XML document
   //     E_ARGUMENT If the RSS tags are not defined
   //     E_NOTFOUND If no tags were found in the RSS
   //     E_OK  If the RSS data was collected
   _hzfunc("hzWebhost::GetRSS") ;
   hzList<hzUrl>::Iter fi ;        //  RSS feeds iterator
   hzUrl       feed ;              //  Temp link
   HttpRC      hRet ;              //  HTML return code
   hzEcode     rc = E_OK ;         //  Return code
   threadLog("Called\n") ;
   //  Login
   rc = Login() ;
   if (rc != E_OK)
       { threadLog("Login failed\n") ; return rc ; }
   //  Get the home page if one applies. Do this regardless of weather we already have it because we need the cookie
   if (!m_Feeds.Count())
       { threadLog("Website has no starting point (URL) for an RSS feed.\n") ; return E_NOINIT ; }
   //  If XML selectors for RSS feed are not initialized, set them here
   if (!m_tagItem.m_Slct)  { m_tagItem.m_Filt = (char*) 0; m_tagItem.m_Info = "node" ; m_tagItem.m_Slct = "item" ; }
   if (!m_tagUqid.m_Slct)  { m_tagUqid.m_Filt = (char*) 0; m_tagUqid.m_Info = "node" ; m_tagUqid.m_Slct = "guid" ; }
   if (!m_tagLink.m_Slct)  { m_tagLink.m_Filt = (char*) 0; m_tagLink.m_Info = "node" ; m_tagLink.m_Slct = "link" ; }
   if (!m_tagDesc.m_Slct)  { m_tagDesc.m_Filt = (char*) 0; m_tagDesc.m_Info = "node" ; m_tagDesc.m_Slct = "description" ; }
   if (!m_tagDate.m_Slct)  { m_tagDate.m_Filt = (char*) 0; m_tagDate.m_Info = "node" ; m_tagDate.m_Slct = "pubDate" ; }
   /*
   **  ** Fetch all the feed XML documents from the RSS source(s)
   **      */
   for (fi = m_Feeds ; fi.Valid() ; fi++)
   {
       feed = fi.Element() ;
       //  Get the feed
       rc = getRss_r(hRet, feed, 0);
       threadLog("Processed items\n") ;
   }
   //  Write out visit status file
   rc = _savestatus() ;
   return rc ;
}