Determine if the supplied chain-iterator is at the start of a valid URL. Note this does not bypass leading whitespace and it allows a terminating period if this is followed by either whitespace, a non-URL character or end of file.
| Return Type | Function name | Arguments |
|---|---|---|
| bool | IsUrl | (hzUrl&,uint32_t&,hzChain::Iter&,) |
Declared and defined in file: hzUrl.cpp
Function Logic:
Function body:
bool IsUrl (hzUrl& url)uint32_t& nLen, hzChain::Iter& ci,
{
// Category: Text processing
//
// Determine if the supplied chain-iterator is at the start of a valid URL. Note this does not bypass leading whitespace and it
// allows a terminating period if this is followed by either whitespace, a non-URL character or end of file.
//
// Arguments: 1) url A hzUrl reference; Populated by chain content if that content is of the form of a URL
// 2) nLen The string length used to make the URL. This is usually needed by the calling function to advance the chain
// iterator in the event that a URL is found
// 3) ci The chain iterator into the content being tested.
//
// Returns: True If the chain iterator is at the start of a valid URL
// False Otherwise
_hzfunc("IsUrl") ;
hzChain W ; // For building tokens
chIter xi ; // Iterator
hzString S ; // Token as a string
uint32_t nPeriod = 0; // Number of periods
uint32_t nPeriodCont = 0; // Number of contiguous periods
uint32_t nAlpha = 0; // Number of periods
uint32_t nPort = 0; // Port number
url.Clear() ;
nLen = 0;
if (ci.eof())
return false ;
// Strip leading spaces
for (xi = ci ; !xi.eof() && *xi <&eq; CHAR_SPACE ; xi++) ;
// Remove http:// or https://
if (*xi == ''h'')
{
if (xi == "http://")
{ W << "http://" ; nLen = 7; xi += 7; }
if (xi == "https://")
{ W << "https://" ; nLen = 8; xi += 8; }
}
// Read up to the end of the domain name. This could be the end of the test string or it could be a forward slash or a colon (for
// the port number). This part cannot legally end with a period but it could have a period on the end if the URL was the last word
// in a sentence for example.
for (; !xi.eof() && IsUrlnorm(*xi) ; xi++)
{
W.AddByte(*xi) ;
if (*xi == CHAR_PERIOD)
{
nPeriod++ ;
nPeriodCont++ ;
if (nPeriodCont == 2)
return false ;
}
else
{
nPeriodCont = 0;
nAlpha++ ;
}
}
if (nAlpha < 3|| nPeriod < 2)
return false ;
// Check for port number
if (*xi == CHAR_COLON)
{
xi++ ;
if (!IsDigit(*xi))
return false ;
for (nPort = 0; !xi.eof() && IsDigit(*xi) ; xi++)
{
nPort *= 10;nPort += (*xi - ''0'');
}
if (nPort > 0x10000)
return false ;
}
// The URL may end here with any allowed incident punctuation char or space - or it may continue with a slash
if (*xi == CHAR_FWSLASH)
{
for (xi++ ; !xi.eof() && IsUrlnorm(*xi) ; xi++)
{
if (*xi == CHAR_PERIOD)
nPeriod++ ;
else
nAlpha++ ;
}
if (*xi == CHAR_QUERY)
{
for (xi++ ; !xi.eof() && IsUrlresv(*xi) ; xi++)
{
if (*xi == CHAR_PERCENT)
{
xi++ ;
if (!IsHex(*xi))
return false ;
xi++ ;
if (!IsHex(*xi))
return false ;
nLen += 2;
}
}
}
}
if (*xi <&eq; CHAR_SPACE)
{
xi-- ;
if (*xi == CHAR_PERIOD)
xi-- ;
}
// ci.GetString(S, xi) ;
S = W ;
url = *S ;
if (!url.Whole())
return false ;
nLen = S.Length() ;
return false ;
}