//
//  File:   hzChars.h
//
//  Legal Notice: This file is part of the HadronZoo C++ Class Library.
//
//  Copyright 2025 HadronZoo Project (http://www.hadronzoo.com)
//
//  The HadronZoo C++ Class Library is free software: You can redistribute it, and/or modify it under the terms of the GNU Lesser General Public License, as published by the Free
//  Software Foundation, either version 3 of the License, or any later version.
//
//  The HadronZoo C++ Class Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
//  A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
//
//  You should have received a copy of the GNU Lesser General Public License along with the HadronZoo C++ Class Library. If not, see http://www.gnu.org/licenses.
//
//
//  See synopsis    Treatment of Character Types
//
//  The characters of the ASCII table fall into distinct groups which are important for processing of text. The most ovious of these are:-
//
//  1)  Whitespace  (eg SPACE, TAB, CR and NL)
//  2)  Digits      (0-9)
//  3)  Alpha       (a-z and A-Z)
//  4)  Punctuation (eg period, comma, colon, semi-colon, apostrophy)
//  5)  Symbols     (eg dollar and pound sign)
//  6)  Binary      (unprintable characters)
//
//  To cope with processing of such things as URLs, email addresses and standard form numbers, additional groups were introduced as follows:-
//
//  7)  Alphanum    Either a digit or an alpha
//  8)  Hex         Allowed chars in a hexadecimal integer (Either 0-9 or a-f or A-F)
//  9)  Numchar     Chars used in standard form number (0-9, period, hat)
//  10) Hyphen      Chars that can serve as hyphens
//  11) Url         Chars allowed in a URL
//
//  HadronZoo provides a global array of character types indexed by the ASCII value of the characters. This is called chartype although this is
//  rarely referenced by applications. Instead applications generally use the inline functions with prefixes of 'is' (followed by char type) or
//  'Is' (for chars as int32_t).
#ifndef hzChars_h
#define hzChars_h
#include "hzBasedefs.h"
#define CHAR_NULL       (char)  0       //  Null terminator
#define CHAR_CTRLA      (char)  1       //  Control-A
#define CHAR_SOH        (char)  1       //  Start of Heading
#define CHAR_CTRLB      (char)  2       //  Control-B
#define CHAR_STX        (char)  2       //  Start of Text
#define CHAR_CTRLC      (char)  3       //  Control-C
#define CHAR_ETX        (char)  3       //  End of Text
#define CHAR_CTRLD      (char)  4       //  Control-D
#define CHAR_EOT        (char)  4       //  End of Transmission
#define CHAR_CTRLE      (char)  5       //  Control-E
#define CHAR_ENQ        (char)  5       //  Enquiry
#define CHAR_CTRLF      (char)  6       //  Control-F
#define CHAR_ACK        (char)  6       //  Acknowledgement
#define CHAR_CTRLG      (char)  7       //  Control-G
#define CHAR_BEL        (char)  7       //  Bell
#define CHAR_CTRLH      (char)  8       //  Control-H
#define CHAR_BS         (char)  8       //  Backspace[e][f]
#define CHAR_CTRLI      (char)  9       //  Control-I
#define CHAR_TAB        (char)  9       //  Tab
#define CHAR_CTRLJ      (char) 10       //  Control-J
#define CHAR_NL         (char) 10       //  Newline
#define NEWLINE         (char) 10       //  Newline
#define CHAR_CTRLK      (char) 11       //  Control-K
#define CHAR_VT         (char) 11       //  Vertical Tab
#define CHAR_CTRLL      (char) 12       //  Control-L
#define CHAR_FF         (char) 12       //  Form Feed
#define CHAR_CTRLM      (char) 13       //  Control-M
#define CHAR_CR         (char) 13       //  Carriage return
#define CHAR_CTRLN      (char) 14       //  Control-N
#define CHAR_SO         (char) 14       //  Shift Out
#define CHAR_CTRLO      (char) 15       //  Control-O
#define CHAR_SI         (char) 15       //  Shift In
#define CHAR_CTRLP      (char) 16       //  Control-P
#define CHAR_DLE        (char) 16       //  Data Link Escape
#define CHAR_CTRLQ      (char) 17       //  Control-Q
#define CHAR_DC1        (char) 17       //  Device Control 1
#define CHAR_XON        (char) 17       //  XON (Device Control 1)
#define CHAR_CTRLR      (char) 18       //  Control-R
#define CHAR_DC2        (char) 18       //  Device Control 2
#define CHAR_CTRLS      (char) 19       //  Control-S
#define CHAR_DC3        (char) 19       //  Device Control 3
#define CHAR_XOFF       (char) 19       //  XOFF (Device Control 3)
#define CHAR_CTRLT      (char) 20       //  Control-T
#define CHAR_DC4        (char) 20       //  Device Control 4
#define CHAR_CTRLU      (char) 21       //  Control-U
#define CHAR_NAK        (char) 21       //  Negative Acknowledgement
#define CHAR_CTRLV      (char) 22       //  Control-V
#define CHAR_SYN        (char) 22       //  Synchronous Idle
#define CHAR_CTRLW      (char) 23       //  Control-W
#define CHAR_ETB        (char) 23       //  End of Transmission Block
#define CHAR_CTRLX      (char) 24       //  Control-X
#define CHAR_CAN        (char) 24       //  Cancel
#define CHAR_CTRLY      (char) 25       //  Control-Y
#define CHAR_EM         (char) 25       //  End of Medium
#define CHAR_CTRLZ      (char) 26       //  Control-Z
#define CHAR_SUB        (char) 26       //  Substitute
#define CHAR_ESC        (char) 27       //  Escape[j]
#define CHAR_FS         (char) 28       //  File Separator
#define CHAR_GS         (char) 29       //  Group Separator
#define CHAR_RS         (char) 30       //  Record Separator
#define CHAR_US         (char) 31       //  Unit Separator
#define CHAR_SPACE      (char) 32       //  Space
#define CHAR_PLING      (char) 33       //  Logicl NOT symbol
#define CHAR_DQUOTE     (char) 34       //  Double quotation mark
#define CHAR_HASH       (char) 35       //  Hash symbol
#define CHAR_DOLLAR     (char) 36       //  Dollar sign
#define CHAR_PERCENT    (char) 37       //  Percent sign
#define CHAR_AMPSAND    (char) 38       //  Ampersand
#define CHAR_SQUOTE     (char) 39       //  Single quotation mark
#define CHAR_PAROPEN    (char) 40       //  Opening round brace
#define CHAR_PARCLOSE   (char) 41       //  Closing round brace
#define CHAR_ASTERISK   (char) 42       //  Asterisk/Multiply sign
#define CHAR_PLUS       (char) 43       //  Plus sign
#define CHAR_COMMA      (char) 44       //  Comma
#define CHAR_MINUS      (char) 45       //  Minus sign
#define CHAR_PERIOD     (char) 46       //  Period symbol
#define CHAR_FWSLASH    (char) 47       //  Forward slash
#define CHAR_0          (char) 48       //  Digit 0
#define CHAR_1          (char) 49       //  Digit 1
#define CHAR_2          (char) 50       //  Digit 2
#define CHAR_3          (char) 51       //  Digit 3
#define CHAR_4          (char) 52       //  Digit 4
#define CHAR_5          (char) 53       //  Digit 5
#define CHAR_6          (char) 54       //  Digit 6
#define CHAR_7          (char) 55       //  Digit 7
#define CHAR_8          (char) 56       //  Digit 8
#define CHAR_9          (char) 57       //  Digit 9
#define CHAR_COLON      (char) 58       //  Colon
#define CHAR_SCOLON     (char) 59       //  Semi-colon
#define CHAR_LESS       (char) 60       //  Less-than symbol
#define CHAR_EQUAL      (char) 61       //  Equality symbol
#define CHAR_MORE       (char) 62       //  Greater-than symbol
#define CHAR_QUERY      (char) 63       //  Question mark
#define CHAR_AT         (char) 64       //  The @ char
#define CHAR_UC_A       (char) 65       //  Upper case a
#define CHAR_UC_B       (char) 66       //  Upper case b
#define CHAR_UC_C       (char) 67       //  Upper case c
#define CHAR_UC_D       (char) 68       //  Upper case d
#define CHAR_UC_E       (char) 69       //  Upper case e
#define CHAR_UC_F       (char) 70       //  Upper case f
#define CHAR_UC_G       (char) 71       //  Upper case g
#define CHAR_UC_H       (char) 72       //  Upper case h
#define CHAR_UC_I       (char) 73       //  Upper case i
#define CHAR_UC_J       (char) 74       //  Upper case j
#define CHAR_UC_K       (char) 75       //  Upper case k
#define CHAR_UC_L       (char) 76       //  Upper case l
#define CHAR_UC_M       (char) 77       //  Upper case m
#define CHAR_UC_N       (char) 78       //  Upper case n
#define CHAR_UC_O       (char) 79       //  Upper case o
#define CHAR_UC_P       (char) 80       //  Upper case p
#define CHAR_UC_Q       (char) 81       //  Upper case q
#define CHAR_UC_R       (char) 82       //  Upper case r
#define CHAR_UC_S       (char) 83       //  Upper case s
#define CHAR_UC_T       (char) 84       //  Upper case t
#define CHAR_UC_U       (char) 85       //  Upper case u
#define CHAR_UC_V       (char) 86       //  Upper case v
#define CHAR_UC_W       (char) 87       //  Upper case w
#define CHAR_UC_X       (char) 88       //  Upper case x
#define CHAR_UC_Y       (char) 89       //  Upper case y
#define CHAR_UC_Z       (char) 90       //  Upper case z
#define CHAR_SQOPEN     (char) 91       //  Opening square brace
#define CHAR_BKSLASH    (char) 92       //  Back slash '\'
#define CHAR_SQCLOSE    (char) 93       //  Closing square brace
#define CHAR_HAT        (char) 94       //  Hat char (to power of)
#define CHAR_USCORE     (char) 95       //  Underscore
#define CHAR_BKQUOTE    (char) 96       //  Back quote (Opening single quote)
#define CHAR_LC_A       (char) 97       //  Lower case a
#define CHAR_LC_B       (char) 98       //  Lower case b
#define CHAR_LC_C       (char) 99       //  Lower case c
#define CHAR_LC_D       (char)100       //  Lower case d
#define CHAR_LC_E       (char)101       //  Lower case e
#define CHAR_LC_F       (char)102       //  Lower case f
#define CHAR_LC_G       (char)103       //  Lower case g
#define CHAR_LC_H       (char)104       //  Lower case h
#define CHAR_LC_I       (char)105       //  Lower case i
#define CHAR_LC_J       (char)106       //  Lower case j
#define CHAR_LC_K       (char)107       //  Lower case k
#define CHAR_LC_L       (char)108       //  Lower case l
#define CHAR_LC_M       (char)109       //  Lower case m
#define CHAR_LC_N       (char)110       //  Lower case n
#define CHAR_LC_O       (char)111       //  Lower case o
#define CHAR_LC_P       (char)112       //  Lower case p
#define CHAR_LC_Q       (char)113       //  Lower case q
#define CHAR_LC_R       (char)114       //  Lower case r
#define CHAR_LC_S       (char)115       //  Lower case s
#define CHAR_LC_T       (char)116       //  Lower case t
#define CHAR_LC_U       (char)117       //  Lower case u
#define CHAR_LC_V       (char)118       //  Lower case v
#define CHAR_LC_W       (char)119       //  Lower case w
#define CHAR_LC_X       (char)120       //  Lower case x
#define CHAR_LC_Y       (char)121       //  Lower case y
#define CHAR_LC_Z       (char)122       //  Lower case z
#define CHAR_CUROPEN    (char)123       //  Opening curly brace
#define CHAR_OR         (char)124       //  Vertical bar (logical OR symbol)
#define CHAR_CURCLOSE   (char)125       //  Closing curly brace
#define CHAR_TILDA      (char)126       //  Tilda
#define CHAR_DEL        (char)127       //  Delete
#define CHAR_BLOCK      (char)127       //  Block char
/*
**  Char Types
*/
#define CTYPE_NOTYPE    0x0000      //  undefine char type
#define CTYPE_BINARY    0x0001      //  unprintable, ctl etc
#define CTYPE_WHITE     0x0002      //  space, tab or newline
#define CTYPE_DIGIT     0x0004      //  0 - 9 only
#define CTYPE_HEXDIGIT  0x0008      //  0 - 9 and (a - f and A - F)
#define CTYPE_ALPHA     0x0010      //  a - z and A - Z
#define CTYPE_HYPEN     0x0020      //  any symbol used as hyphen
#define CTYPE_PUNCT     0x0040      //  any punctuation char
#define CTYPE_SYMB      0x0080      //  symbols, eg math operators
#define CTYPE_NUMCHAR   0x0100      //  any char used in a standard form number
#define CTYPE_URL_NORM  0x0200      //  Allowed in a URL (unreserved)
#define CTYPE_URL_RESV  0x0400      //  Allowed in a URL (reserved)
#define CTYPE_HTX_TAG   0x0800      //  Any char allowed as a html/xml tag name
extern  int16_t chartype[256] ;     //  Character types by value
/*
**  Prototypes
*/
//  Char type functions
bool    IsBinary    (int32_t c) ;   //  Tests if the char is non-printable
bool    IsWhite     (int32_t c) ;   //  Tests if the char is whitespace
bool    IsDigit     (int32_t c) ;   //  Tests if the char is 0-9
bool    IsHex       (int32_t c) ;   //  Tests if the char is 0-9, a-f, A-F
bool    IsAlpha     (int32_t c) ;   //  Tests if the char is a-z or A-Z
bool    IsHyphen    (int32_t c) ;   //  Tests if the char is a hyphen
bool    IsAlphanum  (int32_t c) ;   //  Tests if the char is a-z, A-Z or 0-9
bool    IsPunct     (int32_t c) ;   //  Tests if the char is a punctuation char
bool    IsSymb      (int32_t c) ;   //  Tests if the char is a symbol (eg $)
bool    IsNumchar   (int32_t c) ;   //  Tests if the char is allowed in a standard form number. So 0-9, period, 'e', +, -
bool    IsUrlnorm   (int32_t c) ;   //  Tests if the char is allowed as part of a base URL (subdomain.domain/)
bool    IsUrlresv   (int32_t c) ;   //  Tests if the char is allowed as part of an extended URL (subdomain.domain/url-extn)
bool    IsTagchar   (int32_t c) ;   //  Tests if the char is allowed as part of a tag-name
#if 0
//  Endian conversion
void    _getv1byte  (uint32_t& v, const uchar* i) ;
void    _getv2byte  (uint32_t& v, const uchar* i) ;
void    _getv3byte  (uint32_t& v, const uchar* i) ;
void    _getv4byte  (uint32_t& v, const uchar* i) ;
void    _getv5byte  (uint64_t& v, const uchar* i) ;
void    _getv6byte  (uint64_t& v, const uchar* i) ;
void    _getv7byte  (uint64_t& v, const uchar* i) ;
void    _getv8byte  (uint64_t& v, const uchar* i) ;
void    _setv1byte  (uchar* s, char n) ;
void    _setv2byte  (uchar* s, uint16_t n) ;
void    _setv2byte  (uchar* s, int16_t n) ;
void    _setv3byte  (uchar* s, uint32_t n) ;
void    _setv3byte  (uchar* s, int32_t n) ;
void    _setv4byte  (uchar* s, uint32_t n) ;
void    _setv4byte  (uchar* s, int32_t n) ;
void    _setv5byte  (uchar* s, uint64_t n) ;
void    _setv5byte  (uchar* s, int64_t n) ;
void    _setv6byte  (uchar* s, uint64_t n) ;
void    _setv6byte  (uchar* s, int64_t n) ;
void    _setv7byte  (uchar* s, uint64_t n) ;
void    _setv7byte  (uchar* s, int64_t n) ;
void    _setv8byte  (uchar* s, uint64_t n) ;
void    _setv8byte  (uchar* s, int64_t n) ;
#endif
#endif  //  hzChars_h