Find all links on a page lying within a set of acceptable domains and matching any supplied criteria. These are aggregated to the supplied map of link URLs to link content. If no domains or criteria are supplied, all the links in the page will be aggregated. Note the links in a page are established in the Load() function. This function meerly filters them. It does not read the page content.

Return TypeFunction nameArguments
uint32_thzDocHtml::ExtractLinksContent(hzMapS<hzUrl,hzString>&,hzSet<hzString>&,hzString&,)

Declared in file: hzDocument.h
Defined in file : hzDocHtml.cpp

Function Logic:

0:START 1:items 2:unknown 3:pElement 4:unknown 5:unknown 6:anam 7:unknown 8:link 9:unknown 10:unknown 11:unknown 12:unknown 13:unknown 14:S items 15:Return links.Count()

Function body:

uint32_t hzDocHtml::ExtractLinksContent (hzMapS<hzUrl,hzString>& links)hzSet<hzString>& domains, hzString& criteria, 
{
   //  Find all links on a page lying within a set of acceptable domains and matching any supplied criteria. These are aggregated to the supplied map of link
   //  URLs to link content. If no domains or criteria are supplied, all the links in the page will be aggregated.
   //  
   //  Note the links in a page are established in the Load() function. This function meerly filters them. It does not read the page content.
   //  
   //  Arguments: 1) links: The vector or set of URLs (links) found in the document
   //     2) domains: The set of domains that links must belong to in order to be included
   //     3) form: The search criteria is any
   //  
   //  Returns: Number of links that meet the supplied criteria
   hzHtmElem*      pElement ;  //  HTML node
   hzAttrset       ai ;        //  Attribute iterator
   hzString        anam ;      //  Attribute name
   hzString        S ;         //  Content of link node
   hzUrl           link ;      //  URL of link
   uint32_t        nIndex ;    //  Links iterator
   links.Clear() ;
   for (nIndex = 0; nIndex < m_vecTags.Count() ; nIndex++)
   {
       pElement = m_vecTags[nIndex] ;
       if (pElement->Type() != HTAG_ANCHOR)
           continue ;
       //  for (pm = pElement->GetFirstAttr() ; pm ; pm = pm->next)
       for (ai = pElement ; ai.Valid() ; ai.Advance())
       {
           anam = ai.Name() ;
           if (anam.Equiv("href"))
           {
               link = ai.Value() ;
               //  Ignore empty links (should not be any)
               if (!link)
                   continue ;
               //  Ignore links to domains not on the list of acceptable domains (usually the website domain only)
               if (domains.Count())
               {
                   if (!domains.Exists(link.Domain()))
                       continue ;
               }
               //  Enforce limiting criteria
               if (criteria)
               {
                   if (!FormCheckCstr(*link, *criteria))
                       continue ;
               }
               S = pElement->m_tmpContent ;
               links.Insert(link, S) ;
           }
       }
   }
   return links.Count() ;
}