d4/d79/lexer_8cpp_source.html

/*

 * lexer.cpp

 *

 *  Created on: Feb 14, 2021

 *      Author: betten

 */


#include "foundations.h"


using namespace std;


namespace orbiter {

namespace layer1_foundations {

namespace expression_parser {


lexer::lexer()

{

      //std::string program_;


      pWord_ = 0L;

      pWordStart_ = 0L;

      // last token parsed

      type_ = NONE;

      //std::string word_;

      value_ = 0.;

      T = 0L;

}


void lexer::print_token(std::ostream &ost, TokenType t)

{

    std::string s;


    token_as_string(s, t);

    ost << s;

}


void lexer::token_as_string(std::string &s, TokenType t)

{

    if (t == NONE) {

        s.assign("NONE");

    }

    else if (t == NAME) {

        s.assign("NAME");

    }

    else if (t == NUMBER) {

        s.assign("NUMBER");

    }

    else if (t == END) {

        s.assign("END");

    }

    else if (t == PLUS) {

        s.assign("PLUS");

    }

    else if (t == MINUS) {

        s.assign("MINUS");

    }

    else if (t == MULTIPLY) {

        s.assign("MULTIPLY");

    }

    else if (t == DIVIDE) {

        s.assign("DIVIDE");

    }

    else if (t == ASSIGN) {

        s.assign("ASSIGN");

    }

    else if (t == LHPAREN) {

        s.assign("LHPAREN");

    }

    else if (t == RHPAREN) {

        s.assign("RHPAREN");

    }

    else if (t == COMMA) {

        s.assign("COMMA");

    }

    else if (t == NOT) {

        s.assign("NOT");

    }

    else if (t == LT) {

        s.assign("LT");

    }

    else if (t == GT) {

        s.assign("GT");

    }

    else if (t == LE) {

        s.assign("LE");

    }

    else if (t == GE) {

        s.assign("GE");

    }

    else if (t == EQ) {

        s.assign("EQ");

    }

    else if (t == NE) {

        s.assign("NE");

    }

    else if (t == AND) {

        s.assign("AND");

    }

    else if (t == OR) {

        s.assign("OR");

    }

    else if (t == ASSIGN_ADD) {

        s.assign("ASSIGN_ADD");

    }

    else if (t == ASSIGN_SUB) {

        s.assign("ASSIGN_SUB");

    }

    else if (t == ASSIGN_MUL) {

        s.assign("ASSIGN_MUL");

    }

    else if (t == ASSIGN_DIV) {

        s.assign("ASSIGN_DIV");

    }

}


TokenType lexer::GetToken (int verbose_level, const bool ignoreSign)

{

    int f_v = (verbose_level >= 1);


      word_.erase (0, std::string::npos);


      // skip spaces

      while (*pWord_ && isspace (*pWord_))

        ++pWord_;


      pWordStart_ = pWord_;   // remember where word_ starts *now*


      // look out for unterminated statements and things

      if (*pWord_ == 0 &&  // we have EOF

          type_ == END)  // after already detecting it

        throw std::runtime_error ("Unexpected end of expression.");


      unsigned char cFirstCharacter = *pWord_;        // first character in new word_


      if (cFirstCharacter == 0)    // stop at end of file

        {

        word_ = "<end of expression>";

        if (f_v) {

          std::cout << "token END " << word_ << std::endl;

        }

        create_text_token(word_);

        return type_ = END;

        }


      unsigned char cNextCharacter  = *(pWord_ + 1);  // 2nd character in new word_


      // look for number

      // can be: + or - followed by a decimal point

      // or: + or - followed by a digit

      // or: starting with a digit

      // or: decimal point followed by a digit

      if ((!ignoreSign &&

           (cFirstCharacter == '+' || cFirstCharacter == '-') &&

           (isdigit (cNextCharacter) || cNextCharacter == '.')

           )

          || isdigit (cFirstCharacter)

          // allow decimal numbers without a leading 0. e.g. ".5"

          // Dennis Jones 01-30-2009

          || (cFirstCharacter == '.' && isdigit (cNextCharacter)) )

          {

        // skip sign for now

        if ((cFirstCharacter == '+' || cFirstCharacter == '-'))

          pWord_++;

        while (isdigit (*pWord_) || *pWord_ == '.')

          pWord_++;


        // allow for 1.53158e+15

        if (*pWord_ == 'e' || *pWord_ == 'E')

          {

          pWord_++; // skip 'e'

          if ((*pWord_  == '+' || *pWord_  == '-'))

            pWord_++; // skip sign after e

          while (isdigit (*pWord_))  // now digits after e

            pWord_++;

          }


        word_ = std::string (pWordStart_, pWord_ - pWordStart_);


        std::istringstream is (word_);

        // parse std::string into double value

        is >> value_;


        if (is.fail () && !is.eof ())

          throw std::runtime_error ("Bad numeric literal: " + word_);

        if (f_v) {

          std::cout << "token NUMBER " << value_ << std::endl;

        }


        create_double_token(value_);

        return type_ = NUMBER;

        }   // end of number found


      // special test for 2-character sequences: <= >= == !=

      // also +=, -=, /=, *=

      if (cNextCharacter == '=')

        {

        switch (cFirstCharacter)

          {

          // comparisons

          case '=': type_ = EQ;   break;

          case '<': type_ = LE;   break;

          case '>': type_ = GE;   break;

          case '!': type_ = NE;   break;

          // assignments

          case '+': type_ = ASSIGN_ADD;   break;

          case '-': type_ = ASSIGN_SUB;   break;

          case '*': type_ = ASSIGN_MUL;   break;

          case '/': type_ = ASSIGN_DIV;   break;

          // none of the above

          default:  type_ = NONE; break;

          } // end of switch on cFirstCharacter


        if (type_ != NONE)

          {

          word_ = std::string (pWordStart_, 2);

          pWord_ += 2;   // skip both characters

          if (f_v) {

              std::cout << "token operator ";

              print_token(std::cout, type_);

              std::cout << std::endl;

          }

          create_text_token(word_);

          return type_;

          } // end of found one

        } // end of *=


      switch (cFirstCharacter)

        {

        case '&': if (cNextCharacter == '&')    // &&

                    {

                    word_ = std::string (pWordStart_, 2);

                    pWord_ += 2;   // skip both characters

                    if (f_v) {

                      std::cout << "token AND " << std::endl;

                    }

                    T = new syntax_tree_node_terminal;

                    T->f_text = true;

                    T->value_text.assign(word_);

                    return type_ = AND;

                    }

                  break;

       case '|': if (cNextCharacter == '|')   // ||

                    {

                    word_ = std::string (pWordStart_, 2);

                    pWord_ += 2;   // skip both characters

                    if (f_v) {

                      std::cout << "token OR " << std::endl;

                    }

                    T = new syntax_tree_node_terminal;

                    T->f_text = true;

                    T->value_text.assign(word_);

                    return type_ = OR;

                    }

                  break;

        // single-character symbols

        case '=':

        case '<':

        case '>':

        case '+':

        case '-':

        case '/':

        case '*':

        case '(':

        case ')':

        case ',':

        case '!':

          word_ = std::string (pWordStart_, 1);

          ++pWord_;   // skip it

          type_ = TokenType (cFirstCharacter);

          if (f_v) {

              std::cout << "token operator ";

              print_token(std::cout, type_);

              std::cout << std::endl;

          }


          create_text_token(word_);

          return type_;

        } // end of switch on cFirstCharacter


      if (!isalpha (cFirstCharacter))

        {

        if (cFirstCharacter < ' ')

          {

          std::ostringstream s;

          s << "Unexpected character (decimal " << int (cFirstCharacter) << ")";

          throw std::runtime_error (s.str ());

          }

        else

          throw std::runtime_error ("Unexpected character: " + std::string (1, cFirstCharacter));

        }


      // we have a word (starting with A-Z) - pull it out

      while (isalnum (*pWord_) || *pWord_ == '_')

        ++pWord_;


      word_ = std::string (pWordStart_, pWord_ - pWordStart_);


      if (f_v) {

          std::cout << "token NAME " << word_ << std::endl;

      }

      create_text_token(word_);

      return type_ = NAME;


}


void lexer::create_text_token(std::string &txt)

{

    if (T) {

        delete T;

    }

    T = new syntax_tree_node_terminal;

    T->f_text = true;

    T->value_text.assign(txt);

    //std::cout << "lexer::create_text_token text=" << txt << std::endl;


}


void lexer::create_double_token(double dbl)

{

    if (T) {

        delete T;

    }

    T = new syntax_tree_node_terminal;

    T->f_double = true;

    T->value_double = dbl;

    //std::cout << "lexer::create_double_token value=" << dbl << std::endl;


}


void lexer::CheckToken (TokenType wanted)

{

  if (type_ != wanted)

    {

    std::ostringstream s;

    s << "'" << static_cast <char> (wanted) << "' expected.";

    throw std::runtime_error (s.str ());

    }

}


}}}


orbiter::layer1_foundations::expression_parser::lexer::CheckToken
void CheckToken(TokenType wanted)
Definition: lexer.cpp:341

orbiter::layer1_foundations::expression_parser::lexer::value_
double value_
Definition: expression_parser.h:164

orbiter::layer1_foundations::expression_parser::lexer::GetToken
TokenType GetToken(int verbose_level, const bool ignoreSign=false)
Definition: lexer.cpp:124

orbiter::layer1_foundations::expression_parser::lexer::pWordStart_
const char * pWordStart_
Definition: expression_parser.h:160

orbiter::layer1_foundations::expression_parser::lexer::print_token
void print_token(std::ostream &ost, TokenType t)
Definition: lexer.cpp:36

orbiter::layer1_foundations::expression_parser::lexer::pWord_
const char * pWord_
Definition: expression_parser.h:159

orbiter::layer1_foundations::expression_parser::lexer::word_
std::string word_
Definition: expression_parser.h:163

orbiter::layer1_foundations::expression_parser::lexer::type_
TokenType type_
Definition: expression_parser.h:162

orbiter::layer1_foundations::expression_parser::lexer::token_as_string
void token_as_string(std::string &s, TokenType t)
Definition: lexer.cpp:44

orbiter::layer1_foundations::expression_parser::lexer::create_text_token
void create_text_token(std::string &txt)
Definition: lexer.cpp:315

orbiter::layer1_foundations::expression_parser::lexer::T
syntax_tree_node_terminal * T
Definition: expression_parser.h:165

orbiter::layer1_foundations::expression_parser::lexer::lexer
lexer()
Definition: lexer.cpp:22

orbiter::layer1_foundations::expression_parser::lexer::create_double_token
void create_double_token(double dbl)
Definition: lexer.cpp:327

orbiter::layer1_foundations::expression_parser::syntax_tree_node_terminal
terminal note in the syntax tree of an expression
Definition: expression_parser.h:185

orbiter::layer1_foundations::expression_parser::syntax_tree_node_terminal::f_double
int f_double
Definition: expression_parser.h:188

orbiter::layer1_foundations::expression_parser::syntax_tree_node_terminal::value_text
std::string value_text
Definition: expression_parser.h:192

orbiter::layer1_foundations::expression_parser::syntax_tree_node_terminal::value_double
double value_double
Definition: expression_parser.h:191

orbiter::layer1_foundations::expression_parser::syntax_tree_node_terminal::f_text
int f_text
Definition: expression_parser.h:189

foundations.h

orbiter::layer1_foundations::TokenType
TokenType
Definition: foundations.h:792

orbiter::layer1_foundations::NOT
@ NOT
Definition: foundations.h:805

orbiter::layer1_foundations::OR
@ OR
Definition: foundations.h:815

orbiter::layer1_foundations::NAME
@ NAME
Definition: foundations.h:794

orbiter::layer1_foundations::RHPAREN
@ RHPAREN
Definition: foundations.h:803

orbiter::layer1_foundations::COMMA
@ COMMA
Definition: foundations.h:804

orbiter::layer1_foundations::AND
@ AND
Definition: foundations.h:814

orbiter::layer1_foundations::LT
@ LT
Definition: foundations.h:808

orbiter::layer1_foundations::GE
@ GE
Definition: foundations.h:811

orbiter::layer1_foundations::ASSIGN_SUB
@ ASSIGN_SUB
Definition: foundations.h:820

orbiter::layer1_foundations::GT
@ GT
Definition: foundations.h:809

orbiter::layer1_foundations::ASSIGN_ADD
@ ASSIGN_ADD
Definition: foundations.h:819

orbiter::layer1_foundations::MINUS
@ MINUS
Definition: foundations.h:798

orbiter::layer1_foundations::NONE
@ NONE
Definition: foundations.h:793

orbiter::layer1_foundations::PLUS
@ PLUS
Definition: foundations.h:797

orbiter::layer1_foundations::ASSIGN_MUL
@ ASSIGN_MUL
Definition: foundations.h:821

orbiter::layer1_foundations::LHPAREN
@ LHPAREN
Definition: foundations.h:802

orbiter::layer1_foundations::DIVIDE
@ DIVIDE
Definition: foundations.h:800

orbiter::layer1_foundations::END
@ END
Definition: foundations.h:796

orbiter::layer1_foundations::ASSIGN
@ ASSIGN
Definition: foundations.h:801

orbiter::layer1_foundations::ASSIGN_DIV
@ ASSIGN_DIV
Definition: foundations.h:822

orbiter::layer1_foundations::LE
@ LE
Definition: foundations.h:810

orbiter::layer1_foundations::NUMBER
@ NUMBER
Definition: foundations.h:795

orbiter::layer1_foundations::NE
@ NE
Definition: foundations.h:813

orbiter::layer1_foundations::MULTIPLY
@ MULTIPLY
Definition: foundations.h:799

orbiter::layer1_foundations::EQ
@ EQ
Definition: foundations.h:812

orbiter
the orbiter library for the classification of combinatorial objects
Definition: classification.h:18