def test_basic(self): string = 'test 11a 110 test 111' start_state = 'A' tokenize_events = { 'B': tokenize_ignore } transitions = { 'A': ( (r'[^01 ]', 'H'), (' ', 'B'), ('0', 'C'), ('1', 'G'), ), 'B': ( (' ', 'B'), ), 'C': ( (r'[^01 ]', 'H'), ('0', 'C'), ('1', 'D'), ), 'D': ( (r'[^01 ]', 'H'), ('0', 'C'), ('1', 'E'), ), 'E': ( (r'[^01 ]', 'H'), ('0', 'F'), ('1', 'E'), ), 'F': ( (r'[^01 ]', 'H'), (r"01", 'F'), ), 'G': ( (r'[^01 ]', 'H'), ('0', 'C'), ('1', 'E') ), 'H': ( (r'[^ ]', 'H'), ), } final_states = ('B', 'E') result = (('ERROR', 'test'), ('ERROR', '11a'), ('ERROR', '110'), ('ERROR', 'test'), ('E', '111')) tokens = lex(string, start_state, transitions, final_states, tokenize_events) self.assertEqual(tokens, result)
def test_lexer_24_hour(self): string = '0000 at 2400 1233 1260' tokens = lex(string, pa1.lexer_q0, pa1.lexer_transitions, pa1.lexer_F, pa1.tokenize_events) result = ( ('24HOUR_TIME', '0000'), ('AT', ''), ('NOT_TOKEN', '2400'), ('24HOUR_TIME', '1233'), ('NOT_TOKEN', '1260'), ) self.assertEqual(tokens, result)
def test_lexer_informal_dash(self): string = '1. to - 24. 13; 11' tokens = lex(string, pa1.lexer_q0, pa1.lexer_transitions, pa1.lexer_F, pa1.tokenize_events) result = ( ('INFORMAL_TIME', '1'), ('TO', ' to '), ('DASH', '-'), ('NOT_TOKEN', '24'), ('NOT_TOKEN', '13'), ('INFORMAL_TIME', '11'), ) self.assertEqual(tokens, result)
def test_lexer_12_hour(self): string = '11:00 12:00am 12:00 AM 24:00 08:22 PM.' tokens = lex(string, pa1.lexer_q0, pa1.lexer_transitions, pa1.lexer_F, pa1.tokenize_events) result = ( ('12HOUR_TIME', '11:00'), ('NOT_TOKEN', '12:00am'), ('12HOUR_TIME', '12:00'), ('AM_PM', ' AM'), ('NOT_TOKEN', '24:00'), ('12HOUR_TIME', '08:22'), ('AM_PM', ' PM'), ) self.assertEqual(tokens, result)
(r'TO|DASH', 'B'), ), 'E': (), 'F': ( (r'TO|DASH', 'G'), ), 'G': ( ('INFORMAL_TIME', 'H'), ), 'H': ( ('AM_PM', 'I'), ), 'I': (), 'J': ( ('AM_PM', 'I'), (r'TO|DASH', 'K'), ), 'K': ( ('12HOUR_TIME', 'L'), ), 'L': ( ('AM_PM', 'I'), ), } parser_F = ('C', 'H', 'I', 'J', 'L') if __name__ == '__main__': tokens = lex(string, lexer_q0, lexer_transitions, lexer_F, tokenize_events) print parse(tokens, parser_q0, parser_transitions, parser_F)
def test_lexer_not_token(self): string = 'test 12:2a' tokens = lex(string, pa1.lexer_q0, pa1.lexer_transitions, pa1.lexer_F, pa1.tokenize_events) self.assertEqual(tokens, (('NOT_TOKEN', 'test'), ('NOT_TOKEN', '12:2a')))
@property def column(self): return self.linecol[1] @property def linecol(self): line, column = self.lexinfo.lookup(self.position) return (line + 1, column + 1) def __repr__(self): return 'Token({})'.format(self.name) from fsm_lexer import lex data = open('test.g').read() lexinfo = LexicalInfo() tokens = [] for ofs, token, value in lex(data): if token in ('NL', 'REGEXP'): for i in range(value.count('\n')): lexinfo.mark_line(ofs + i) tokens.append(Token(token, value, ofs, lexinfo)) for t in tokens: print('{0:02}:{1:02} -- {2} = {3}'.format(t.line, t.column, t.name, repr(t.value)))