Perform a text extraction. The scenario will be that the input will be a file bound to the event (an uploaded file in a form submission). The m_Input member will name the field carrying the file. ...

Return TypeFunction nameArguments
hzEcodehdsExec::Extract(hzChain&,hzHttpEvent*,)

Declared in file: hzDissemino.h
Defined in file : hdsExec.cpp

Function Logic:

0:START 1:m_Input m_Target items i 2:unknown 3:items 4:Return E_NOTFOUND 5:i 6:fldname 7:unknown 8:hf items 9:unknown 10:items 11:unknown 12:unknown 13:unknown 14:items 15:unknown 16:unknown 17:items 18:unknown 19:items 20:items 21:unknown 22:items 23:rc items 24:unknown 25:items 26:unknown 27:pN E items E 28:items items 29:Return rc

Function body:

hzEcode hdsExec::Extract (hzChain& errorReport)hzHttpEvent* pE, 
{
   //  Perform a text extraction. The scenario will be that the input will be a file bound to the event (an uploaded file in a form submission). The
   //  m_Input member will name the field carrying the file. ...
   //  
   //  Arguments: 1) error The error report chain
   //     2) pE  The HTTP event pointer
   //  
   //  Returns: E_NOTFOUND If there is no session or if the username is not established
   //     E_OK  If the username is established
   _hzfunc("hdsExec::Extract") ;
   hzVect<hzXmlNode*>          vx ;    //  Nodes containing text
   hzList<hzHttpFile>::Iter    fx ;    //  Iterator of submitted files
   hzDocXml        xdoc ;      //  Unzipped docx file loader
   hzHttpFile      hf ;        //  File meta data from event
   hzChain         Z ;         //  Unzipped docx file
   hzChain         D ;         //  Part of docx within and including <w:document> tags
   hzChain         E ;         //  For export of XML doc
   hzChain         T ;         //  Extracted Text
   chIter          zi ;        //  For iteration (aim to ignore larges parts of MicroSoft formats)
   hzXmlNode*      pN ;        //  Node pointer
   const char*     i ;         //  For derivation
   hzString        m_Input ;   //  Name of field carrying the submitted file
   hzString        m_Target ;  //  Name of field carrying the target file
   hzString        fldname ;   //  Name of field carrying the submitted file
   uint32_t        n ;         //  XML Node counter
   hzEcode         rc ;        //  Return code
   m_Input = m_pApp->m_ExecParams[m_FstParam] ;
   m_Target = m_pApp->m_ExecParams[m_FstParam+1];
   errorReport.Printf("Extracting from %s to %s\n", *m_Input, *m_Target) ;
   i = *m_Input ;
   if (memcmp(i, "%e:", 3))
   {
       errorReport.Printf("Not an identifiable input\n") ;
       return E_NOTFOUND ;
   }
   fldname = i + 3;
   if (pE->m_Uploads.Exists(fldname))
   {
       hf = pE->m_Uploads[fldname] ;
       errorReport.Printf("Found input (fld %s file %s of %d bytes) mime=%d\n", *hf.m_fldname, *hf.m_filename, hf.m_file.Size(), hf.m_mime) ;
       if (hf.m_mime == HMTYPE_APP_OPEN_DOCX)
       {
           Gunzip(Z, hf.m_file) ;
           for (zi = Z ; !zi.eof() ; zi++)
           {
               if (*zi != CHAR_LESS)
                   continue ;
               if (zi == "<w:document")
               {
                   D << "<w:document" ;
                   for (zi += 11;!zi.eof() ; zi++)
                   {
                       if (*zi == CHAR_LESS)
                       {
                           D.AddByte(CHAR_NL) ;
                           if (zi == "</w:document>")
                           {
                               D << "</w:document>\n" ;
                               break ;
                           }
                       }
                       D.AddByte(*zi) ;
                       if (*zi == CHAR_MORE)
                           D.AddByte(CHAR_NL) ;
                   }
                   break ;
               }
           }
           rc = xdoc.Load(D) ;
           errorReport.Printf("%s. XML load status err=%s\n", __func__, Err2Txt(rc)) ;
           if (rc == E_OK)
           {
               xdoc.FindNodes(vx, "w:t") ;
               for (n = 0; n < vx.Count() ; n++)
               {
                   pN = vx[n] ;
                   E += "<p>\n" ;
                   //  E << pN->m_tmpContent ;
                   E << pN->m_fixContent ;
                   E += "\n</p>\n" ;
               }
           }
           errorReport.Printf("Setting var %s with chain of %d bytes\n", *m_Target, E.Size()) ;
           pE->SetVarChain(m_Target, E) ;
           //  pE->m_mapChains.Insert(m_Target, E) ;
       }
   }
   return rc ;
}