Purpose: Compare two strings on a word basis. This ignores case and considers both input strings as a series of alphanumeric words separated by whitespace. Adjacent whitespace characters are treated as a single space with leading & trailing whitespace ignored. All punctuation characters and all characters beyond the lower ASCII are ignored.
| Return Type | Function name | Arguments |
|---|---|---|
| int32_t | CstrCompareW | (const char*,const char*,) |
Declared in file: hzTextproc.h
Defined in file : hzTextproc.cpp
Function Logic:
Function body:
int32_t CstrCompareW (const char* a)const char* b,
{
// Category: Text Processing
//
// Purpose: Compare two strings on a word basis. This ignores case and considers both input strings as a series of alphanumeric
// words separated by whitespace. Adjacent whitespace characters are treated as a single space with leading & trailing
// whitespace ignored. All punctuation characters and all characters beyond the lower ASCII are ignored.
//
// Arguments: 1) a First string
// 2) b Second string
//
// Returns: +1 If a is lexically more than b
// -1 If a is lexically less than b
// 0 If a and b are lexically equivelent
const char* pA = a ; // String A iterator
const char* pB = b ; // String B iterator
uint32_t entA ; // Value of char (be it ASCII, entity or unicode sequence)
uint32_t entB ; // Same for second input
uint32_t len ; // Length of sequence from which ent is derived
if (pA)
for (pA++ ; *pA && *pA <&eq; CHAR_SPACE ; pA++) ;
if (pB)
for (pB++ ; *pB && *pB <&eq; CHAR_SPACE ; pB++) ;
for (;;)
{
entA = entB = len = 0;
// Get next value from A
if (pA)
{
if (*pA && *pA <&eq; CHAR_SPACE)
{
for (pA++ ; *pA <&eq; CHAR_SPACE ; pA++) ;
entA = CHAR_SPACE ;
}
else
{
for (; *pA ;)
{
if (IsEntity(entA, len, pA))
pA += len ;
else if (IsUnicodeSeq(entA, len, pA))
pA += len ;
else
entA = *pA++ ;
if (IsAlphanum(entA))
{ entA = conv2lower(entA) ; break ; }
}
}
if (*pA == 0&& entA == CHAR_SPACE)
entA = 0;
}
if (pB)
{
if (*pB && *pB <&eq; CHAR_SPACE)
{
for (pB++ ; *pB && *pB <&eq; CHAR_SPACE ; pB++) ;
entB = CHAR_SPACE ;
}
else
{
for (; *pB ;)
{
if (IsEntity(entB, len, pB))
pB += len ;
else if (IsUnicodeSeq(entB, len, pB))
pB += len ;
else
entB = *pB++ ;
if (IsAlphanum(entB))
{ entB = conv2lower(entB) ; break ; }
}
}
if (*pB == 0&& entB == CHAR_SPACE)
entB = 0;
}
if (entA != entB)
return entA - entB ;
if (!entA)
break ;
}
return 0;
}