//
//  File:   hzTokens.h
//
//  Legal Notice: This file is part of the HadronZoo C++ Class Library.
//
//  Copyright 2025 HadronZoo Project (http://www.hadronzoo.com)
//
//  The HadronZoo C++ Class Library is free software: You can redistribute it, and/or modify it under the terms of the GNU Lesser General Public License, as published by the Free
//  Software Foundation, either version 3 of the License, or any later version.
//
//  The HadronZoo C++ Class Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
//  A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
//
//  You should have received a copy of the GNU Lesser General Public License along with the HadronZoo C++ Class Library. If not, see http://www.gnu.org/licenses.
//
//
//  See synopsis    Tokenization
//
//  This package facilitates tokenization of strings into tokens classified as one of the following types:-
//
//  1)  Alphanumeric. The token may have chars A-Z, a-z, 0-9 or _
//  2)  Operator. The token is an arithmetic or logical operator.
//  3)  Value. This can be either of the following:-
//      1)  An integer. (has only chars 0-9)
//      2)  A Double. (has form of standard number)
//      3)  A String Value. (Must be enclosed in quotes)
//  4)  Separator. Punctuation. Always a single char.
//
//  The operation is as follows:-
//
//  1)  Create an instance of hzToken
//  2)  Call member Tokenize() passing string to be tokenized.
//  3)  Move thru the resulting hzVect of tokens
//
#ifndef hzTokens_h
#define hzTokens_h
#include "hzTmplVect.h"
#define MAXTOKENSIZE    256
/*
**  Special char types for tokenization purposes
*/
#define NOTYPE      0x0000      //  undefined char type
#define BINARY      0x0001      //  unprintable, ctl etc
#define WHITE       0x0002      //  space, tab or newline
#define DIGIT       0x0004      //  0 - 9 only
#define HEXDIGIT    0x0008      //  0 - 9 only
#define ALPHA       0x0010      //  a - z and A - Z
#define HYPEN       0x0020      //  any symbol used as hyphen
#define PUNCT       0x0040      //  any punctuation char
#define SYMB        0x0080      //  symbols, eg math operators
#define NUMCHAR     0x0100      //  any char used in a number
/*
**  Token types
*/
enum    hzTokenType
{
    //  Category:   Text Processing
    //
    //  Enumeration of token types
    TOKEN_ALPHANUM,             //  Token consists of string of [a-z] or [A-Z] or [0-9]
    TOKEN_OPERATOR,             //  Token is any of the standard operators
    TOKEN_SEPARATOR,            //  Token acts as a separator
    TOKEN_INTEGER,              //  Token is a string of [0-9]
    TOKEN_NUMBER,               //  Token is a number eg 10, 10.8, 1.08e-2 etc etc
    TOKEN_HEXVALUE,             //  Token is an integer expresed in hex, must begin with '0x'
    TOKEN_DOUBLE,               //  Token is a number of standard form
    TOKEN_STRING,               //  Token is a string
    TOKEN_COMMENT,              //  Token is comment. Either /* ... */ or //
    TOKEN_UNDEFINED             //  Token is none of the above
} ;
enum    hzTokMode
{
    //  Category:   Text Processing
    //
    //  Enumeration of different tokenization regimes
    TOK_MO_WHITE,               //  Split into words separated by whitespace
    TOK_MO_FTEXT,               //  Split into words according to free text rules
    TOK_MO_BOOL,                //  Tokenize into boolean expression of operators and operands
    TOK_MO_CPP                  //  Tokenize into C++ classes, functions, operators and operands
} ;
/*
**  hzToken class
*/
class   hzToken
{
    //  Category:   Text Processing
    //
    //  Tokens are the meaningful units derived from the input text.
private:
    hzString    m_Tok ;         //  The token
    uint32_t    m_nLine ;       //  The line it was on in the file
    hzTokenType m_eType ;       //  The token type
public:
    hzToken     (void)
    {
        m_nLine = 0 ;
        m_eType = TOKEN_UNDEFINED ;
    }
    ~hzToken    (void)
    {
    }
    //  Set functions
    hzEcode Init    (const char* txt, uint32_t line, hzTokenType type)      { m_Tok = txt ; m_nLine = line ; m_eType = type ; return E_OK ; }
    hzEcode Init    (const hzString& txt, uint32_t line, hzTokenType type)  { m_Tok = txt ; m_nLine = line ; m_eType = type ; return E_OK ; }
    //  Get functions
    const hzString& Value   (void) const    { return m_Tok ; }
    uint32_t        LineNo  (void) const    { return m_nLine ; }
    hzTokenType     Type    (void) const    { return m_eType ; }
    //  Operators
    bool    operator=   (const hzToken& op)
    {
        m_Tok = op.m_Tok ;
        m_nLine = op.m_nLine ;
        m_eType = op.m_eType ;
        return m_Tok.Length() ? true : false ;
    }
    bool    operator==  (const hzToken& op)
    {
        if (m_nLine != op.m_nLine)  return false ;
        if (m_eType != op.m_eType)  return false ;
        return m_Tok == op.m_Tok ;
    }
    bool    operator==  (const char* pStr)  { return m_Tok == pStr ? true : false ; }
    bool    operator!=  (const char* pStr)  { return m_Tok == pStr ? false : true ; }
    bool    operator!   (void)  { return m_Tok ? false : true ; }
} ;
/*
**  Prototypes
*/
hzEcode TokenizeChain   (hzVect<hzToken>& Toklist, hzChain& C, hzTokMode eMode) ;
hzEcode TokenizeString  (hzVect<hzToken>& Toklist, const char* pBuf, hzTokMode eMode) ;
hzEcode TokenizeFile    (hzVect<hzToken>& Toklist, const char* filepath, hzTokMode eMode) ;
#endif  //  hzTokens_h