def test_basic(self):
        string = 'test 11a 110 test 111'

        start_state = 'A'

        tokenize_events = {
            'B': tokenize_ignore
        }

        transitions = {
            'A': (
                (r'[^01 ]', 'H'),
                (' ', 'B'),
                ('0', 'C'),
                ('1', 'G'),
            ),
            'B': (
                (' ', 'B'),
            ),
            'C': (
                (r'[^01 ]', 'H'),       
                ('0', 'C'),
                ('1', 'D'),

            ),
            'D': (
                (r'[^01 ]', 'H'),                   
                ('0', 'C'),
                ('1', 'E'),
            ),
            'E': (
                (r'[^01 ]', 'H'),                       
                ('0', 'F'),
                ('1', 'E'),
            ),
            'F': (
                (r'[^01 ]', 'H'),                   
                (r"01", 'F'),
            ),
            'G': (
                (r'[^01 ]', 'H'),       
                ('0', 'C'),
                ('1', 'E')
            ),
            'H': (
                (r'[^ ]', 'H'),
            ),                                                              
        }

        final_states = ('B', 'E')

        result = (('ERROR', 'test'), ('ERROR', '11a'), ('ERROR', '110'), ('ERROR', 'test'), ('E', '111'))

        tokens = lex(string, start_state, transitions, final_states, tokenize_events)
        self.assertEqual(tokens, result)
 def test_lexer_24_hour(self):
     string = '0000 at 2400 1233 1260'
     tokens = lex(string, pa1.lexer_q0, pa1.lexer_transitions, pa1.lexer_F, pa1.tokenize_events)
     result = (
         ('24HOUR_TIME', '0000'), 
         ('AT', ''),
         ('NOT_TOKEN', '2400'),
         ('24HOUR_TIME', '1233'),
         ('NOT_TOKEN', '1260'),                       
     )
     self.assertEqual(tokens, result)
 def test_lexer_informal_dash(self):
     string = '1. to - 24. 13; 11'
     tokens = lex(string, pa1.lexer_q0, pa1.lexer_transitions, pa1.lexer_F, pa1.tokenize_events)
     result = (
         ('INFORMAL_TIME', '1'),
         ('TO', ' to '), 
         ('DASH', '-'),
         ('NOT_TOKEN', '24'),
         ('NOT_TOKEN', '13'),
         ('INFORMAL_TIME', '11'),                       
     )
     self.assertEqual(tokens, result)
 def test_lexer_12_hour(self):
     string = '11:00 12:00am 12:00 AM 24:00 08:22 PM.'
     tokens = lex(string, pa1.lexer_q0, pa1.lexer_transitions, pa1.lexer_F, pa1.tokenize_events)
     result = (
         ('12HOUR_TIME', '11:00'), 
         ('NOT_TOKEN', '12:00am'),
         ('12HOUR_TIME', '12:00'),
         ('AM_PM', ' AM'),
         ('NOT_TOKEN', '24:00'),
         ('12HOUR_TIME', '08:22'),
         ('AM_PM', ' PM'),                        
     )
     self.assertEqual(tokens, result)
        (r'TO|DASH', 'B'),
    ),                             
    'E': (),
    'F': (
        (r'TO|DASH', 'G'),
    ),
    'G': (
        ('INFORMAL_TIME', 'H'),
    ),
    'H': (
        ('AM_PM', 'I'),
    ),
    'I': (),
    'J': (
        ('AM_PM', 'I'),        
        (r'TO|DASH', 'K'),
    ), 
    'K': (
        ('12HOUR_TIME', 'L'),                             
    ),
    'L': (
        ('AM_PM', 'I'),
    ),                   
}

parser_F = ('C', 'H', 'I', 'J', 'L')

if __name__ == '__main__':
    tokens = lex(string, lexer_q0, lexer_transitions, lexer_F, tokenize_events)
    print parse(tokens, parser_q0, parser_transitions, parser_F)
 def test_lexer_not_token(self):
     string = 'test 12:2a'
     tokens = lex(string, pa1.lexer_q0, pa1.lexer_transitions, pa1.lexer_F, pa1.tokenize_events)
     self.assertEqual(tokens, (('NOT_TOKEN', 'test'), ('NOT_TOKEN', '12:2a')))
Пример #7
0
    @property
    def column(self):
        return self.linecol[1]

    @property
    def linecol(self):
        line, column = self.lexinfo.lookup(self.position)
        return (line + 1, column + 1)

    def __repr__(self):
        return 'Token({})'.format(self.name)


from fsm_lexer import lex

data = open('test.g').read()

lexinfo = LexicalInfo()
tokens = []

for ofs, token, value in lex(data):
    if token in ('NL', 'REGEXP'):
        for i in range(value.count('\n')):
            lexinfo.mark_line(ofs + i)

    tokens.append(Token(token, value, ofs, lexinfo))

for t in tokens:
    print('{0:02}:{1:02} -- {2} = {3}'.format(t.line, t.column, t.name,
                                              repr(t.value)))