Convert a URL to a string suitable for a filename as used in webscraping. The following conversions occur:- 1) The sequence http:// is converted to h: (but only if it occurs at the start) 2) The sequence https:// is converted to s: (but only if it occurs at the start) 3) The slash is converted to an @ 4) The @ (which should not exist Converts non-URL and non-filename chars into %xx form. Note that no assumptions can be made about the input except that it may contain chars unsuitable for filenames (eg the forward slash). The encoding must therefore be reversible. This function assumes the chars a-z, A-Z, 0-9, the period and the underscore are the only valid filename chars. Any other char will be converted to a set of chars consisting of a percent sign and two hexidecimal numbers. This means that when it comes to decoding, such a set will be converted to a single char. This would be fine if we could assume that no input would ever have such a sequence but alas we cannot assume this. It is nessesary therefore to convert percent chars in the input to a %hh set even if they are blatently part of such a set already! Arguments: None
| Return Type | Function name | Arguments |
|---|---|---|
| hzString | hzUrl::Filename | (void) |
Declared in file: hzUrl.h
Defined in file : hzUrl.cpp
Function Logic:
Function body:
hzString hzUrl::Filename (void)
{
// Convert a URL to a string suitable for a filename as used in webscraping. The following conversions occur:-
//
// 1) The sequence http:// is converted to h: (but only if it occurs at the start)
// 2) The sequence https:// is converted to s: (but only if it occurs at the start)
// 3) The slash is converted to an @
// 4) The @ (which should not exist
//
// Converts non-URL and non-filename chars into %xx form.
//
// Note that no assumptions can be made about the input except that it may contain chars unsuitable for filenames (eg the forward
// slash). The encoding must therefore be reversible.
//
// This function assumes the chars a-z, A-Z, 0-9, the period and the underscore are the only valid filename chars. Any other char
// will be converted to a set of chars consisting of a percent sign and two hexidecimal numbers. This means that when it comes to
// decoding, such a set will be converted to a single char. This would be fine if we could assume that no input would ever have
// such a sequence but alas we cannot assume this.
//
// It is nessesary therefore to convert percent chars in the input to a %hh set even if they are blatently part of such a set
// already!
//
// Arguments: None
// Returns: Instance of hzString being the URL in same filename form
_hzfunc("hzUrl::Filename") ;
hzChain Z ; // Used to construct the (longer) encoded string value
_url_space* thisCtl ; // This URL space
uchar* i ; // For iteration
hzString S ; // Return string
uint32_t val ; // For casting
char buf [4]; // Fox hex-conversion
if (!m_addr)
return S ;
thisCtl = _urlXlate(m_addr) ;
i = (uchar*) thisCtl->m_data ;
if (!memcmp(i, "http", 4))
{
if (!memcmp(i + 4,"://", 3))
{ i += 7; Z << "h:" ; }
if (!memcmp(i + 4,"s://", 4))
{ i += 8; Z << "s:" ; }
}
// Count chars that are to be converted as these will occupy 3 chars in the new string
for (; *i ; i++)
{
if (*i >&eq; ''A''&&*i <&eq; ''Z'')
{ Z.AddByte(conv2lower(*i)) ; continue ; }
if (*i >&eq; ''a''&&*i <&eq; ''z''){Z.AddByte(*i) ; continue ; }
if (*i >&eq; ''0''&&*i <&eq; ''9''){Z.AddByte(*i) ; continue ; }
if (*i == CHAR_FWSLASH) { Z.AddByte(CHAR_AT) ; continue ; }
if (*i == CHAR_AMPSAND) { Z.AddByte(CHAR_COLON) ; continue ; }
if (*i == CHAR_USCORE || *i == CHAR_PERIOD || *i == CHAR_PERCENT || *i == CHAR_EQUAL || *i == CHAR_QUERY || *i == CHAR_PLUS ||
*i == CHAR_MINUS)
{
Z.AddByte(*i) ;
continue ;
}
Z.AddByte(CHAR_PERCENT) ;
val = (uchar) *i ;
sprintf(buf, "%02x", val) ;
Z.AddByte(buf[0]);
Z.AddByte(buf[1]);
}
S = Z ;
return S ;
}