-
Notifications
You must be signed in to change notification settings - Fork 0
/
Tokenizer.py
69 lines (56 loc) · 1.97 KB
/
Tokenizer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import TokenType
import TokenizeState
from token import Token
class Tokenizer:
def IsOp(self, chr):
return (chr == '+') or (chr == '-') or (chr == '*') or \
(chr == '/')
def FindOpType(self, firstOperator):
type = TokenType.UNKNOWN
if firstOperator == '+':
type = TokenType.ADD
elif firstOperator == '-':
type = TokenType.SUBTRACT
elif firstOperator == '*':
type = TokenType.MULTIPLY
elif firstOperator == '/':
type = TokenType.DIVIDE
return type
def IsParen(self, chr):
return (chr == '(') or (chr == ')')
def FindParenType(self, chr):
type = TokenType.UNKNOWN
if chr == '(':
type = TokenType.LEFT_PAREN
elif chr == ')':
type = TokenType.RIGHT_PAREN
return type
# The passed expression is checked to get tokens in Tokenize
def Tokenize(self, source):
tokens = list()
token = ''
state = TokenizeState.DEFAULT
index = 0
while index < len(source):
chr = source[index]
if state == TokenizeState.DEFAULT:
opType = self.FindOpType(chr)
if self.IsOp(chr):
tokens.append(Token(str(chr), opType))
elif self.IsParen(chr):
parenType = self.FindParenType(chr)
tokens.append(Token(str(chr), parenType))
elif chr.isdigit():
token = token + chr
state = TokenizeState.NUMBER
# Handles multi-digit numbers
elif state == TokenizeState.NUMBER:
if chr.isdigit():
token = token + chr
else:
tokens.append(Token(token, TokenizeState.NUMBER))
token = ""
state = TokenizeState.DEFAULT
index -= 1
index += 1
return tokens