Orbiter 2022
Combinatorial Objects
lexer.cpp
Go to the documentation of this file.
1/*
2 * lexer.cpp
3 *
4 * Created on: Feb 14, 2021
5 * Author: betten
6 */
7
8
9
10#include "foundations.h"
11
12using namespace std;
13
14
15namespace orbiter {
16namespace layer1_foundations {
17namespace expression_parser {
18
19
20
21
23{
24 //std::string program_;
25
26 pWord_ = 0L;
27 pWordStart_ = 0L;
28 // last token parsed
29 type_ = NONE;
30 //std::string word_;
31 value_ = 0.;
32 T = 0L;
33}
34
35
36void lexer::print_token(std::ostream &ost, TokenType t)
37{
38 std::string s;
39
40 token_as_string(s, t);
41 ost << s;
42}
43
44void lexer::token_as_string(std::string &s, TokenType t)
45{
46 if (t == NONE) {
47 s.assign("NONE");
48 }
49 else if (t == NAME) {
50 s.assign("NAME");
51 }
52 else if (t == NUMBER) {
53 s.assign("NUMBER");
54 }
55 else if (t == END) {
56 s.assign("END");
57 }
58 else if (t == PLUS) {
59 s.assign("PLUS");
60 }
61 else if (t == MINUS) {
62 s.assign("MINUS");
63 }
64 else if (t == MULTIPLY) {
65 s.assign("MULTIPLY");
66 }
67 else if (t == DIVIDE) {
68 s.assign("DIVIDE");
69 }
70 else if (t == ASSIGN) {
71 s.assign("ASSIGN");
72 }
73 else if (t == LHPAREN) {
74 s.assign("LHPAREN");
75 }
76 else if (t == RHPAREN) {
77 s.assign("RHPAREN");
78 }
79 else if (t == COMMA) {
80 s.assign("COMMA");
81 }
82 else if (t == NOT) {
83 s.assign("NOT");
84 }
85 else if (t == LT) {
86 s.assign("LT");
87 }
88 else if (t == GT) {
89 s.assign("GT");
90 }
91 else if (t == LE) {
92 s.assign("LE");
93 }
94 else if (t == GE) {
95 s.assign("GE");
96 }
97 else if (t == EQ) {
98 s.assign("EQ");
99 }
100 else if (t == NE) {
101 s.assign("NE");
102 }
103 else if (t == AND) {
104 s.assign("AND");
105 }
106 else if (t == OR) {
107 s.assign("OR");
108 }
109 else if (t == ASSIGN_ADD) {
110 s.assign("ASSIGN_ADD");
111 }
112 else if (t == ASSIGN_SUB) {
113 s.assign("ASSIGN_SUB");
114 }
115 else if (t == ASSIGN_MUL) {
116 s.assign("ASSIGN_MUL");
117 }
118 else if (t == ASSIGN_DIV) {
119 s.assign("ASSIGN_DIV");
120 }
121}
122
123
124TokenType lexer::GetToken (int verbose_level, const bool ignoreSign)
125{
126 int f_v = (verbose_level >= 1);
127
128
129 word_.erase (0, std::string::npos);
130
131 // skip spaces
132 while (*pWord_ && isspace (*pWord_))
133 ++pWord_;
134
135 pWordStart_ = pWord_; // remember where word_ starts *now*
136
137 // look out for unterminated statements and things
138 if (*pWord_ == 0 && // we have EOF
139 type_ == END) // after already detecting it
140 throw std::runtime_error ("Unexpected end of expression.");
141
142 unsigned char cFirstCharacter = *pWord_; // first character in new word_
143
144 if (cFirstCharacter == 0) // stop at end of file
145 {
146 word_ = "<end of expression>";
147 if (f_v) {
148 std::cout << "token END " << word_ << std::endl;
149 }
151 return type_ = END;
152 }
153
154 unsigned char cNextCharacter = *(pWord_ + 1); // 2nd character in new word_
155
156 // look for number
157 // can be: + or - followed by a decimal point
158 // or: + or - followed by a digit
159 // or: starting with a digit
160 // or: decimal point followed by a digit
161 if ((!ignoreSign &&
162 (cFirstCharacter == '+' || cFirstCharacter == '-') &&
163 (isdigit (cNextCharacter) || cNextCharacter == '.')
164 )
165 || isdigit (cFirstCharacter)
166 // allow decimal numbers without a leading 0. e.g. ".5"
167 // Dennis Jones 01-30-2009
168 || (cFirstCharacter == '.' && isdigit (cNextCharacter)) )
169 {
170 // skip sign for now
171 if ((cFirstCharacter == '+' || cFirstCharacter == '-'))
172 pWord_++;
173 while (isdigit (*pWord_) || *pWord_ == '.')
174 pWord_++;
175
176 // allow for 1.53158e+15
177 if (*pWord_ == 'e' || *pWord_ == 'E')
178 {
179 pWord_++; // skip 'e'
180 if ((*pWord_ == '+' || *pWord_ == '-'))
181 pWord_++; // skip sign after e
182 while (isdigit (*pWord_)) // now digits after e
183 pWord_++;
184 }
185
186 word_ = std::string (pWordStart_, pWord_ - pWordStart_);
187
188 std::istringstream is (word_);
189 // parse std::string into double value
190 is >> value_;
191
192 if (is.fail () && !is.eof ())
193 throw std::runtime_error ("Bad numeric literal: " + word_);
194 if (f_v) {
195 std::cout << "token NUMBER " << value_ << std::endl;
196 }
197
199 return type_ = NUMBER;
200 } // end of number found
201
202 // special test for 2-character sequences: <= >= == !=
203 // also +=, -=, /=, *=
204 if (cNextCharacter == '=')
205 {
206 switch (cFirstCharacter)
207 {
208 // comparisons
209 case '=': type_ = EQ; break;
210 case '<': type_ = LE; break;
211 case '>': type_ = GE; break;
212 case '!': type_ = NE; break;
213 // assignments
214 case '+': type_ = ASSIGN_ADD; break;
215 case '-': type_ = ASSIGN_SUB; break;
216 case '*': type_ = ASSIGN_MUL; break;
217 case '/': type_ = ASSIGN_DIV; break;
218 // none of the above
219 default: type_ = NONE; break;
220 } // end of switch on cFirstCharacter
221
222 if (type_ != NONE)
223 {
224 word_ = std::string (pWordStart_, 2);
225 pWord_ += 2; // skip both characters
226 if (f_v) {
227 std::cout << "token operator ";
228 print_token(std::cout, type_);
229 std::cout << std::endl;
230 }
232 return type_;
233 } // end of found one
234 } // end of *=
235
236 switch (cFirstCharacter)
237 {
238 case '&': if (cNextCharacter == '&') // &&
239 {
240 word_ = std::string (pWordStart_, 2);
241 pWord_ += 2; // skip both characters
242 if (f_v) {
243 std::cout << "token AND " << std::endl;
244 }
246 T->f_text = true;
247 T->value_text.assign(word_);
248 return type_ = AND;
249 }
250 break;
251 case '|': if (cNextCharacter == '|') // ||
252 {
253 word_ = std::string (pWordStart_, 2);
254 pWord_ += 2; // skip both characters
255 if (f_v) {
256 std::cout << "token OR " << std::endl;
257 }
259 T->f_text = true;
260 T->value_text.assign(word_);
261 return type_ = OR;
262 }
263 break;
264 // single-character symbols
265 case '=':
266 case '<':
267 case '>':
268 case '+':
269 case '-':
270 case '/':
271 case '*':
272 case '(':
273 case ')':
274 case ',':
275 case '!':
276 word_ = std::string (pWordStart_, 1);
277 ++pWord_; // skip it
278 type_ = TokenType (cFirstCharacter);
279 if (f_v) {
280 std::cout << "token operator ";
281 print_token(std::cout, type_);
282 std::cout << std::endl;
283 }
284
286 return type_;
287 } // end of switch on cFirstCharacter
288
289 if (!isalpha (cFirstCharacter))
290 {
291 if (cFirstCharacter < ' ')
292 {
293 std::ostringstream s;
294 s << "Unexpected character (decimal " << int (cFirstCharacter) << ")";
295 throw std::runtime_error (s.str ());
296 }
297 else
298 throw std::runtime_error ("Unexpected character: " + std::string (1, cFirstCharacter));
299 }
300
301 // we have a word (starting with A-Z) - pull it out
302 while (isalnum (*pWord_) || *pWord_ == '_')
303 ++pWord_;
304
305 word_ = std::string (pWordStart_, pWord_ - pWordStart_);
306
307 if (f_v) {
308 std::cout << "token NAME " << word_ << std::endl;
309 }
311 return type_ = NAME;
312
313}
314
315void lexer::create_text_token(std::string &txt)
316{
317 if (T) {
318 delete T;
319 }
321 T->f_text = true;
322 T->value_text.assign(txt);
323 //std::cout << "lexer::create_text_token text=" << txt << std::endl;
324
325}
326
328{
329 if (T) {
330 delete T;
331 }
333 T->f_double = true;
334 T->value_double = dbl;
335 //std::cout << "lexer::create_double_token value=" << dbl << std::endl;
336
337}
338
339
340
342{
343 if (type_ != wanted)
344 {
345 std::ostringstream s;
346 s << "'" << static_cast <char> (wanted) << "' expected.";
347 throw std::runtime_error (s.str ());
348 }
349}
350
351
352
353
354
355}}}
356
357
358
TokenType GetToken(int verbose_level, const bool ignoreSign=false)
Definition: lexer.cpp:124
void print_token(std::ostream &ost, TokenType t)
Definition: lexer.cpp:36
void token_as_string(std::string &s, TokenType t)
Definition: lexer.cpp:44
the orbiter library for the classification of combinatorial objects