Determines if the supplied chain iterator is at the begining of a unicode sequence. If it is then both the value and size are determined. Note to be a UTF-8 sequence, the first byte must be either:- a) 110xxxxx for a 2-byte sequence with the next byte being of the form 10xxxxxx b) 1110xxxx for a 3-byte sequence with the next 2 bytes being of the form 10xxxxxx c) 11110xxx for a 4-byte sequence with the next 3 bytes being of the form 10xxxxxx d) 111110xx for a 5-byte sequence with the next 4 bytes being of the form 10xxxxxx e) 1111110x for a 6-byte sequence with the next 5 bytes being of the form 10xxxxxx

Return TypeFunction nameArguments
boolAtUnicodeSeq(uint32_t&,uint32_t&,hzChain::Iter&,)

Declared and defined in file: hzTextproc.cpp

Function Logic:

0:START 1:uVal nLen 2:unknown 3:Return false 4:unknown 5:Return false 6:xi ubuf items ubuf 7:unknown 8:nLen uVal 9:Return false 10:unknown 11:unknown 12:nLen ubuf ubuf ( uVal 13:Return true 14:Return false 15:unknown 16:items ubuf 17:unknown 18:nLen ubuf ubuf ubuf ( ( uVal 19:Return true 20:Return false 21:unknown 22:items ubuf items ubuf 23:unknown 24:ubuf ubuf ubuf ubuf ( ( 25:( 26:uVal nLen 27:Return true 28:Return false 29:unknown 30:items ubuf items ubuf items ubuf 31:unknown 32:ubuf ubuf ubuf ubuf ubuf ( ( ( 33:( 34:uVal nLen 35:Return true 36:Return false 37:unknown 38:items ubuf items ubuf items ubuf items ubuf 39:unknown 40:ubuf ubuf ubuf ubuf ubuf ubuf ( ( ( ( 41:( 42:uVal nLen 43:Return true 44:Return false

Function body:

bool AtUnicodeSeq (uint32_t& uVal)uint32_t& nLen, hzChain::Iter& zi, 
{
   //  Category: Text Presentation
   //  
   //  Determines if the supplied chain iterator is at the begining of a unicode sequence. If it is then both the value and size are determined.
   //  
   //  Note to be a UTF-8 sequence, the first byte must be either:-
   //   a) 110xxxxx for a 2-byte sequence with the next byte being of the form 10xxxxxx 
   //   b) 1110xxxx for a 3-byte sequence with the next 2 bytes being of the form 10xxxxxx
   //   c) 11110xxx for a 4-byte sequence with the next 3 bytes being of the form 10xxxxxx
   //   d) 111110xx for a 5-byte sequence with the next 4 bytes being of the form 10xxxxxx
   //   e) 1111110x for a 6-byte sequence with the next 5 bytes being of the form 10xxxxxx
   //  
   //  Arguments: 1) uVal The unicode value (set by this function)
   //     2) nLen The length of the unicode sequence encountered (set by this function)
   //     3) zi  The chain or char sting iterator
   //  
   //  Returns: True If the supplied chain iterator is at the begining of a unicode sequence
   //     False Otherwise
   _hzfunc(__func__) ;
   chIter  xi ;        //  Input chain iterator
   uchar   ubuf[8];    //  Unicode buffer
   uVal = 0;
   nLen = 0;
   if (zi.eof())
       return false ;
   if (!(*zi & 0x80))
       return false ;
   //  Get first two bytes
   xi = zi ;
   ubuf[0]= (uchar) *xi ;
   xi++ ;
   ubuf[1]= (uchar) *xi ;
   //  If 2nd byte is not 0x80 or greater then we have a single byte unicode sequence
   if (!(ubuf[1]& 0x80))
       { nLen = 1; uVal = ubuf[0]; return false ; }
   //  If this is a utf-8 sequence then between 1 and 5 subsequent bytes will have a value between 128 and 191. If this is
   //  the case we compute a value for the sequence and then look up this value for a printable form.
   if ((ubuf[0]& 0xE0)==0xC0)
   {
       //  First 2 bits set, 3rd bit clear; Utf-8 sequence is this and next byte
       if ((ubuf[1]& 0xC0)==0x80)
           { nLen = 2; uVal = ((ubuf[0]& 0x1F)<<6)+(ubuf[1]&0x3F);returntrue; }
       return false ;
   }
   if ((ubuf[0]& 0xF0)==0xE0)
   {
       //  First 3 bits set, 4th bit clear; Utf-8 sequence is this and next 2 bytes
       xi++ ; ubuf[2]= (uchar) *xi ;
       if ((ubuf[1]& 0xC0)==0x80&&(ubuf[2]&0xC0)==0x80)
           { nLen = 3; uVal = ((ubuf[0]& 0x0F)<<12)+((ubuf[1]&0x3F)<<6)+(ubuf[2]&0x3F);returntrue;}
       return false ;
   }
   if ((ubuf[0]& 0xF8)==0xF0)
   {
       //  First 4 bits set, 5th bit clear; Utf-8 sequence is this and next 3 bytes
       xi++ ; ubuf[2]= (uchar) *xi ;
       xi++ ; ubuf[3]= (uchar) *xi ;
       if ((ubuf[1]& 0xC0)==0x80&&(ubuf[2]&0xC0)==0x80&&(ubuf[3]&0xC0)==0x80)
       {
           uVal = ((ubuf[0]& 0x07)<<18)+((ubuf[1]&0x3F)<<12)+((ubuf[2]&0x3F)<<6)+(ubuf[3]&0x3F);
           nLen = 4;
           return true ;
       }
       return false ;
   }
   if ((ubuf[0]& 0xFC)==0xF8)
   {
       //  First 5 bits set, 6th bit clear; Utf-8 sequence is this and next 4 bytes
       xi++ ; ubuf[2]= (uchar) *xi ;
       xi++ ; ubuf[3]= (uchar) *xi ;
       xi++ ; ubuf[4]= (uchar) *xi ;
       if ((ubuf[1]& 0xC0)==0x80&&(ubuf[2]&0xC0)==0x80&&(ubuf[3]&0xC0)==0x80&&(ubuf[4]&0xC0)==0x80)
       {
           uVal = ((ubuf[0]& 0x03)<<24)+((ubuf[1]&0x3F)<<18)+((ubuf[2]&0x3F)<<12)+((ubuf[3]&0x3F)<<6)+(ubuf[4]&0x3F);
           nLen = 5;
           return true ;
       }
       return false ;
   }
   if ((ubuf[0]& 0xFE)==0xFC)
   {
       //  First 6 bits set, 7th bit clear; Utf-8 sequence is this and next 5 bytes
       xi++ ; ubuf[2]= (uchar) *xi ;
       xi++ ; ubuf[3]= (uchar) *xi ;
       xi++ ; ubuf[4]= (uchar) *xi ;
       xi++ ; ubuf[5]= (uchar) *xi ;
       if ((ubuf[1]& 0xC0)==0x80&&(ubuf[2]&0xC0)==0x80&&(ubuf[3]&0xC0)==0x80&&(ubuf[4]&0xC0)==0x80&&(ubuf[5]&0xC0))
       {
           uVal = ((ubuf[0]& 0x01)<<30)+((ubuf[1]&0x3F)<<24)+((ubuf[2]&0x3F)<<18)+((ubuf[3]&0x3F)<<12)
               + ((ubuf[4]& 0x3F)<<6)+(ubuf[5]& 0x3F);
           nLen = 6;
           return true ;
       }
   }
   return false ;
}