Go through all pages found in the config files and index them Arguments: None
| Return Type | Function name | Arguments |
|---|---|---|
| hzEcode | hdsApp::IndexPages | (void) |
Declared in file: hzDissemino.h
Defined in file : hdsResource.cpp
Function Logic:
Function body:
hzEcode hdsApp::IndexPages (void)
{
// Go through all pages found in the config files and index them
//
// Arguments: None
//
// Returns: E_FORMAT If any page could not be tokenized
// E_OK If the pages were indexed
_hzfunc("hdsApp::IndexPages") ;
hzVect<hzToken> toks ; // Token list
hzChain pageVal ; // Extract content from tags into chain, then tokenize to get words to index
hzToken T ; // Tokens
hdsResource* pRes ; // Resource under consideration
hdsPage* pPage ; // Current page
uint32_t nD ; // Document number
uint32_t nCount ; // Loop counter
uint32_t nDone ; // Count of actual inserts
hzEcode rc = E_OK ; // Return code
/*
** ** Allocate working buffers and load HTML page
** */
for (nD = 0; nD < m_ResourcesName.Count() ; nD++)
{
pRes = m_ResourcesName.GetObj(nD) ;
pPage = dynamic_cast<hdsPage*>(pRes) ;
if (!pPage)
continue ;
if (!pPage->m_Bodytext.Size())
continue ;
// Pass thru page tags looking for indexable content. This will include the page title, description metatags and the content of paragraphs.
// Note tha paragraph content must be assumed to be complex and so is comprised of the pretext of the subtags and only lastly the content.
pageVal.Clear() ;
pageVal << pPage->m_Title ;
pageVal.AddByte(CHAR_NL) ;
pageVal << pPage->m_Desc ;
pageVal.AddByte(CHAR_NL) ;
pageVal << pPage->m_Bodytext ;
rc = TokenizeChain(toks, pageVal, TOK_MO_WHITE) ;
if (rc != E_OK)
{
m_pLog->Out("Abandoning indexation of page %s (%s)\n", *pPage->m_Url, *pPage->m_Title) ;
break ;
}
for (nDone = nCount = 0; rc == E_OK && nCount < toks.Count() ; nCount++)
{
T = toks[nCount] ;
if (!T.Value())
continue ;
nDone++ ;
rc = m_PageIndex.Insert(T.Value(), nD) ;
}
m_pLog->Out("Indexing page %s (%s), %d of %d tokens\n", *pPage->m_Url, *pPage->m_Title, nDone, toks.Count()) ;
}
return rc ;
}