Purpose: Compare two strings on a word basis. This ignores case and considers both input strings as a series of alphanumeric words separated by whitespace. Adjacent whitespace characters are treated as a single space with leading & trailing whitespace ignored. All punctuation characters and all characters beyond the lower ASCII are ignored.

Return TypeFunction nameArguments
int32_tCstrCompareW(const char*,const char*,)

Declared in file: hzTextproc.h
Defined in file : hzTextproc.cpp

Function Logic:

0:START 1:unknown 2:unknown 3:unknown 4:unknown 5:unknown 6:len 7:entB 8:entA 9:unknown 10:unknown 11:unknown 12:entA 13:unknown 14:unknown 15:pA 16:IsUnicodeSeq(entA,len,pA) 17:pA 18:entA 19:unknown 20:entA 21:unknown 22:entA 23:unknown 24:unknown 25:unknown 26:entB 27:unknown 28:unknown 29:pB 30:IsUnicodeSeq(entB,len,pB) 31:pB 32:entB 33:unknown 34:entB 35:unknown 36:entB 37:unknown 38:Return entA-entB 39:unknown 40:Return 0

Function body:

int32_t CstrCompareW (const char* a)const char* b, 
{
   //  Category: Text Processing
   //  
   //  Purpose: Compare two strings on a word basis. This ignores case and considers both input strings as a series of alphanumeric
   //     words separated by whitespace. Adjacent whitespace characters are treated as a single space with leading & trailing
   //     whitespace ignored. All punctuation characters and all characters beyond the lower ASCII are ignored.
   //  
   //  Arguments: 1) a First string
   //     2) b Second string
   //  
   //  Returns: +1 If a is lexically more than b
   //     -1 If a is lexically less than b
   //     0 If a and b are lexically equivelent
   const char* pA = a ;    //  String A iterator
   const char* pB = b ;    //  String B iterator
   uint32_t    entA ;      //  Value of char (be it ASCII, entity or unicode sequence)
   uint32_t    entB ;      //  Same for second input
   uint32_t    len ;       //  Length of sequence from which ent is derived
   if (pA)
       for (pA++ ; *pA && *pA <&eq; CHAR_SPACE ; pA++) ;
   if (pB)
       for (pB++ ; *pB && *pB <&eq; CHAR_SPACE ; pB++) ;
   for (;;)
   {
       entA = entB = len = 0;
       //  Get next value from A
       if (pA)
       {
           if (*pA && *pA <&eq; CHAR_SPACE)
           {
               for (pA++ ; *pA <&eq; CHAR_SPACE ; pA++) ;
               entA = CHAR_SPACE ;
           }
           else
           {
               for (; *pA ;)
               {
                   if (IsEntity(entA, len, pA))
                       pA += len ;
                   else if (IsUnicodeSeq(entA, len, pA))
                       pA += len ;
                   else
                       entA = *pA++ ;
                   if (IsAlphanum(entA))
                       { entA = conv2lower(entA) ; break ; }
               }
           }
           if (*pA == 0&& entA == CHAR_SPACE)
               entA = 0;
       }
       if (pB)
       {
           if (*pB && *pB <&eq; CHAR_SPACE)
           {
               for (pB++ ; *pB && *pB <&eq; CHAR_SPACE ; pB++) ;
               entB = CHAR_SPACE ;
           }
           else
           {
               for (; *pB ;)
               {
                   if (IsEntity(entB, len, pB))
                       pB += len ;
                   else if (IsUnicodeSeq(entB, len, pB))
                       pB += len ;
                   else
                       entB = *pB++ ;
                   if (IsAlphanum(entB))
                       { entB = conv2lower(entB) ; break ; }
               }
           }
           if (*pB == 0&& entB == CHAR_SPACE)
               entB = 0;
       }
       if (entA != entB)
           return entA - entB ;
       if (!entA)
           break ;
   }
   return 0;
}