Determine if the supplied chain-iterator is at the start of a valid URL. Note this does not bypass leading whitespace and it allows a terminating period if this is followed by either whitespace, a non-URL character or end of file.

Return TypeFunction nameArguments
boolIsUrl(hzUrl&,uint32_t&,hzChain::Iter&,)

Declared and defined in file: hzUrl.cpp

Function Logic:

0:START 1:items nLen 2:unknown 3:Return false 4:unknown 5:unknown 6:unknown 7:items nLen xi 8:unknown 9:items nLen xi 10:unknown 11:items 12:unknown 13:items items 14:unknown 15:Return false 16:nPeriodCont items 17:unknown 18:Return false 19:unknown 20:items 21:unknown 22:Return false 23:unknown 24:nPort * nPort 25:unknown 26:Return false 27:unknown 28:unknown 29:unknown 30:items 31:items 32:unknown 33:unknown 34:unknown 35:items 36:unknown 37:Return false 38:items 39:unknown 40:Return false 41:nLen 42:unknown 43:items 44:unknown 45:items 46:S url 47:unknown 48:Return false 49:nLen 50:Return false

Function body:

bool IsUrl (hzUrl& url)uint32_t& nLen, hzChain::Iter& ci, 
{
   //  Category: Text processing
   //  
   //  Determine if the supplied chain-iterator is at the start of a valid URL. Note this does not bypass leading whitespace and it
   //  allows a terminating period if this is followed by either whitespace, a non-URL character or end of file.
   //  
   //  Arguments: 1) url  A hzUrl reference; Populated by chain content if that content is of the form of a URL
   //     2) nLen The string length used to make the URL. This is usually needed by the calling function to advance the chain
   //        iterator in the event that a URL is found
   //     3) ci  The chain iterator into the content being tested.
   //  
   //  Returns: True If the chain iterator is at the start of a valid URL
   //     False Otherwise
   _hzfunc("IsUrl") ;
   hzChain     W ;                 //  For building tokens
   chIter      xi ;                //  Iterator
   hzString    S ;                 //  Token as a string
   uint32_t    nPeriod = 0;        //  Number of periods
   uint32_t    nPeriodCont = 0;    //  Number of contiguous periods
   uint32_t    nAlpha = 0;     //  Number of periods
   uint32_t    nPort = 0;          //  Port number
   url.Clear() ;
   nLen = 0;
   if (ci.eof())
       return false ;
   //  Strip leading spaces
   for (xi = ci ; !xi.eof() && *xi <&eq; CHAR_SPACE ; xi++) ;
   //  Remove http:// or https://
   if (*xi == ''h'')
   {
       if (xi == "http://")
           { W << "http://" ; nLen = 7; xi += 7; }
       if (xi == "https://")
           { W << "https://" ; nLen = 8; xi += 8; }
   }
   //  Read up to the end of the domain name. This could be the end of the test string or it could be a forward slash or a colon (for
   //  the port number). This part cannot legally end with a period but it could have a period on the end if the URL was the last word
   //  in a sentence for example.
   for (; !xi.eof() && IsUrlnorm(*xi) ; xi++)
   {
       W.AddByte(*xi) ;
       if (*xi == CHAR_PERIOD)
       {
           nPeriod++ ;
           nPeriodCont++ ;
           if (nPeriodCont == 2)
               return false ;
       }
       else
       {
           nPeriodCont = 0;
           nAlpha++ ;
       }
   }
   if (nAlpha < 3|| nPeriod < 2)
       return false ;
   //  Check for port number
   if (*xi == CHAR_COLON)
   {
       xi++ ;
       if (!IsDigit(*xi))
           return false ;
       for (nPort = 0; !xi.eof() && IsDigit(*xi) ; xi++)
       {
           nPort *= 10;nPort += (*xi - ''0'');
       }
       if (nPort > 0x10000)
           return false ;
   }
   //  The URL may end here with any allowed incident punctuation char or space - or it may continue with a slash
   if (*xi == CHAR_FWSLASH)
   {
       for (xi++ ; !xi.eof() && IsUrlnorm(*xi) ; xi++)
       {
           if (*xi == CHAR_PERIOD)
               nPeriod++ ;
           else
               nAlpha++ ;
       }
       if (*xi == CHAR_QUERY)
       {
           for (xi++ ; !xi.eof() && IsUrlresv(*xi) ; xi++)
           {
               if (*xi == CHAR_PERCENT)
               {
                   xi++ ;
                   if (!IsHex(*xi))
                       return false ;
                   xi++ ;
                   if (!IsHex(*xi))
                       return false ;
                   nLen += 2;
               }
           }
       }
   }
   if (*xi <&eq; CHAR_SPACE)
   {
       xi-- ;
       if (*xi == CHAR_PERIOD)
           xi-- ;
   }
   //  ci.GetString(S, xi) ;
   S = W ;
   url = *S ;
   if (!url.Whole())
       return false ;
   nLen = S.Length() ;
   return false ;
}