Determines if the supplied chain iterator is at the begining of a unicode sequence. If it is then both the value and size are determined. Note to be a UTF-8 sequence, the first byte must be either:- a) 110xxxxx for a 2-byte sequence with the next byte being of the form 10xxxxxx b) 1110xxxx for a 3-byte sequence with the next 2 bytes being of the form 10xxxxxx c) 11110xxx for a 4-byte sequence with the next 3 bytes being of the form 10xxxxxx d) 111110xx for a 5-byte sequence with the next 4 bytes being of the form 10xxxxxx e) 1111110x for a 6-byte sequence with the next 5 bytes being of the form 10xxxxxx
| Return Type | Function name | Arguments |
|---|---|---|
| bool | AtUnicodeSeq | (uint32_t&,uint32_t&,hzChain::Iter&,) |
Declared and defined in file: hzTextproc.cpp
Function Logic:
Function body:
bool AtUnicodeSeq (uint32_t& uVal)uint32_t& nLen, hzChain::Iter& zi,
{
// Category: Text Presentation
//
// Determines if the supplied chain iterator is at the begining of a unicode sequence. If it is then both the value and size are determined.
//
// Note to be a UTF-8 sequence, the first byte must be either:-
// a) 110xxxxx for a 2-byte sequence with the next byte being of the form 10xxxxxx
// b) 1110xxxx for a 3-byte sequence with the next 2 bytes being of the form 10xxxxxx
// c) 11110xxx for a 4-byte sequence with the next 3 bytes being of the form 10xxxxxx
// d) 111110xx for a 5-byte sequence with the next 4 bytes being of the form 10xxxxxx
// e) 1111110x for a 6-byte sequence with the next 5 bytes being of the form 10xxxxxx
//
// Arguments: 1) uVal The unicode value (set by this function)
// 2) nLen The length of the unicode sequence encountered (set by this function)
// 3) zi The chain or char sting iterator
//
// Returns: True If the supplied chain iterator is at the begining of a unicode sequence
// False Otherwise
_hzfunc(__func__) ;
chIter xi ; // Input chain iterator
uchar ubuf[8]; // Unicode buffer
uVal = 0;
nLen = 0;
if (zi.eof())
return false ;
if (!(*zi & 0x80))
return false ;
// Get first two bytes
xi = zi ;
ubuf[0]= (uchar) *xi ;
xi++ ;
ubuf[1]= (uchar) *xi ;
// If 2nd byte is not 0x80 or greater then we have a single byte unicode sequence
if (!(ubuf[1]& 0x80))
{ nLen = 1; uVal = ubuf[0]; return false ; }
// If this is a utf-8 sequence then between 1 and 5 subsequent bytes will have a value between 128 and 191. If this is
// the case we compute a value for the sequence and then look up this value for a printable form.
if ((ubuf[0]& 0xE0)==0xC0)
{
// First 2 bits set, 3rd bit clear; Utf-8 sequence is this and next byte
if ((ubuf[1]& 0xC0)==0x80)
{ nLen = 2; uVal = ((ubuf[0]& 0x1F)<<6)+(ubuf[1]&0x3F);returntrue; }
return false ;
}
if ((ubuf[0]& 0xF0)==0xE0)
{
// First 3 bits set, 4th bit clear; Utf-8 sequence is this and next 2 bytes
xi++ ; ubuf[2]= (uchar) *xi ;
if ((ubuf[1]& 0xC0)==0x80&&(ubuf[2]&0xC0)==0x80)
{ nLen = 3; uVal = ((ubuf[0]& 0x0F)<<12)+((ubuf[1]&0x3F)<<6)+(ubuf[2]&0x3F);returntrue;}
return false ;
}
if ((ubuf[0]& 0xF8)==0xF0)
{
// First 4 bits set, 5th bit clear; Utf-8 sequence is this and next 3 bytes
xi++ ; ubuf[2]= (uchar) *xi ;
xi++ ; ubuf[3]= (uchar) *xi ;
if ((ubuf[1]& 0xC0)==0x80&&(ubuf[2]&0xC0)==0x80&&(ubuf[3]&0xC0)==0x80)
{
uVal = ((ubuf[0]& 0x07)<<18)+((ubuf[1]&0x3F)<<12)+((ubuf[2]&0x3F)<<6)+(ubuf[3]&0x3F);
nLen = 4;
return true ;
}
return false ;
}
if ((ubuf[0]& 0xFC)==0xF8)
{
// First 5 bits set, 6th bit clear; Utf-8 sequence is this and next 4 bytes
xi++ ; ubuf[2]= (uchar) *xi ;
xi++ ; ubuf[3]= (uchar) *xi ;
xi++ ; ubuf[4]= (uchar) *xi ;
if ((ubuf[1]& 0xC0)==0x80&&(ubuf[2]&0xC0)==0x80&&(ubuf[3]&0xC0)==0x80&&(ubuf[4]&0xC0)==0x80)
{
uVal = ((ubuf[0]& 0x03)<<24)+((ubuf[1]&0x3F)<<18)+((ubuf[2]&0x3F)<<12)+((ubuf[3]&0x3F)<<6)+(ubuf[4]&0x3F);
nLen = 5;
return true ;
}
return false ;
}
if ((ubuf[0]& 0xFE)==0xFC)
{
// First 6 bits set, 7th bit clear; Utf-8 sequence is this and next 5 bytes
xi++ ; ubuf[2]= (uchar) *xi ;
xi++ ; ubuf[3]= (uchar) *xi ;
xi++ ; ubuf[4]= (uchar) *xi ;
xi++ ; ubuf[5]= (uchar) *xi ;
if ((ubuf[1]& 0xC0)==0x80&&(ubuf[2]&0xC0)==0x80&&(ubuf[3]&0xC0)==0x80&&(ubuf[4]&0xC0)==0x80&&(ubuf[5]&0xC0))
{
uVal = ((ubuf[0]& 0x01)<<30)+((ubuf[1]&0x3F)<<24)+((ubuf[2]&0x3F)<<18)+((ubuf[3]&0x3F)<<12)
+ ((ubuf[4]& 0x3F)<<6)+(ubuf[5]& 0x3F);
nLen = 6;
return true ;
}
}
return false ;
}