def test_skip_whitespace(self): text = 'crop \t ( 20, 30, 40, 10 ) ;' lexer = lexery.Lexer(rules=[ lexery.Rule(identifier='identifier', pattern=re.compile(r'[a-zA-Z_][a-zA-Z_]*')), lexery.Rule(identifier='lpar', pattern=re.compile(r'\(')), lexery.Rule(identifier='number', pattern=re.compile(r'[1-9][0-9]*')), lexery.Rule(identifier='rpar', pattern=re.compile(r'\)')), lexery.Rule(identifier='comma', pattern=re.compile(r',')), lexery.Rule(identifier='semi', pattern=re.compile(r';')) ], skip_whitespace=True) tokens = lexer.lex(text=text) expected = [[ lexery.Token('identifier', 'crop', 0, 0), lexery.Token('lpar', '(', 9, 0), lexery.Token('number', '20', 11, 0), lexery.Token('comma', ',', 13, 0), lexery.Token('number', '30', 15, 0), lexery.Token('comma', ',', 17, 0), lexery.Token('number', '40', 19, 0), lexery.Token('comma', ',', 21, 0), lexery.Token('number', '10', 23, 0), lexery.Token('rpar', ')', 26, 0), lexery.Token('semi', ';', 28, 0) ]] self.assertEqual(expected, tokens)
def test_that_it_works(self): text = 'crop ( 20, 30, 40, 10 ) ;\n' \ '\n' \ 'resize(40, 10);' lexer = lexery.Lexer(rules=[ lexery.Rule(identifier='identifier', pattern=re.compile(r'[a-zA-Z_][a-zA-Z_]*')), lexery.Rule(identifier='lpar', pattern=re.compile(r'\(')), lexery.Rule(identifier='number', pattern=re.compile(r'[1-9][0-9]*')), lexery.Rule(identifier='rpar', pattern=re.compile(r'\)')), lexery.Rule(identifier='comma', pattern=re.compile(r',')), lexery.Rule(identifier='semi', pattern=re.compile(r';')), lexery.Rule(identifier='space', pattern=re.compile(r' ')) ]) tokens = lexer.lex(text=text) expected = [[ lexery.Token('identifier', 'crop', 0, 0), lexery.Token('space', ' ', 4, 0), lexery.Token('lpar', '(', 5, 0), lexery.Token('space', ' ', 6, 0), lexery.Token('number', '20', 7, 0), lexery.Token('comma', ',', 9, 0), lexery.Token('space', ' ', 10, 0), lexery.Token('number', '30', 11, 0), lexery.Token('comma', ',', 13, 0), lexery.Token('space', ' ', 14, 0), lexery.Token('number', '40', 15, 0), lexery.Token('comma', ',', 17, 0), lexery.Token('space', ' ', 18, 0), lexery.Token('number', '10', 19, 0), lexery.Token('space', ' ', 21, 0), lexery.Token('rpar', ')', 22, 0), lexery.Token('space', ' ', 23, 0), lexery.Token('semi', ';', 24, 0) ], [], [ lexery.Token('identifier', 'resize', 0, 2), lexery.Token('lpar', '(', 6, 2), lexery.Token('number', '40', 7, 2), lexery.Token('comma', ',', 9, 2), lexery.Token('space', ' ', 10, 2), lexery.Token('number', '10', 11, 2), lexery.Token('rpar', ')', 13, 2), lexery.Token('semi', ';', 14, 2) ]] self.assertEqual(expected, tokens)
def test_unmatched_identifier(self): text = 'crop {} ( 20, 30, 40, 10 ) ; {}\n{}' lexer = lexery.Lexer(rules=[ lexery.Rule(identifier='identifier', pattern=re.compile(r'[a-zA-Z_][a-zA-Z_]*')), lexery.Rule(identifier='lpar', pattern=re.compile(r'\(')), lexery.Rule(identifier='number', pattern=re.compile(r'[1-9][0-9]*')), lexery.Rule(identifier='rpar', pattern=re.compile(r'\)')), lexery.Rule(identifier='comma', pattern=re.compile(r',')), lexery.Rule(identifier='semi', pattern=re.compile(r';')), lexery.Rule(identifier='space', pattern=re.compile(r' ')) ], unmatched_identifier="unmatched") tokens = lexer.lex(text=text) expected = [[ lexery.Token('identifier', 'crop', 0, 0), lexery.Token('space', ' ', 4, 0), lexery.Token('unmatched', '{}', 5, 0), lexery.Token('space', ' ', 7, 0), lexery.Token('lpar', '(', 8, 0), lexery.Token('space', ' ', 9, 0), lexery.Token('number', '20', 10, 0), lexery.Token('comma', ',', 12, 0), lexery.Token('space', ' ', 13, 0), lexery.Token('number', '30', 14, 0), lexery.Token('comma', ',', 16, 0), lexery.Token('space', ' ', 17, 0), lexery.Token('number', '40', 18, 0), lexery.Token('comma', ',', 20, 0), lexery.Token('space', ' ', 21, 0), lexery.Token('number', '10', 22, 0), lexery.Token('space', ' ', 24, 0), lexery.Token('rpar', ')', 25, 0), lexery.Token('space', ' ', 26, 0), lexery.Token('semi', ';', 27, 0), lexery.Token('space', ' ', 28, 0), lexery.Token('unmatched', '{}', 29, 0) ], [lexery.Token('unmatched', '{}', 0, 1)]] self.assertListEqual(expected, tokens)
import re from typing import Optional, List, Pattern, MutableMapping, Union, Tuple, Iterable # pylint: disable=unused-import import lexery LEXER = lexery.Lexer( rules=[ lexery.Rule(identifier='*', pattern=re.compile(r'\*')), lexery.Rule(identifier='?', pattern=re.compile(r'\?')), lexery.Rule(identifier='%d', pattern=re.compile(r'%d')), lexery.Rule(identifier='%-d', pattern=re.compile(r'%-d')), lexery.Rule(identifier='%m', pattern=re.compile(r'%m')), lexery.Rule(identifier='%-m', pattern=re.compile(r'%-m')), lexery.Rule(identifier='%y', pattern=re.compile(r'%y')), lexery.Rule(identifier='%Y', pattern=re.compile(r'%Y')), lexery.Rule(identifier='%H', pattern=re.compile(r'%H')), lexery.Rule(identifier='%-H', pattern=re.compile(r'%-H')), lexery.Rule(identifier='%M', pattern=re.compile(r'%M')), lexery.Rule(identifier='%-M', pattern=re.compile(r'%-M')), lexery.Rule(identifier='%S', pattern=re.compile(r'%S')), lexery.Rule(identifier='%-S', pattern=re.compile(r'%-S')), lexery.Rule(identifier='%f', pattern=re.compile(r'%f')), lexery.Rule(identifier='%%', pattern=re.compile(r'%%')), lexery.Rule(identifier='text', pattern=re.compile(r'[^%*?]')) ] ) # yapf: enable
"""Parse and convert strftime directives.""" import collections import re from typing import ( # pylint: disable=unused-import List, MutableMapping, Optional) import lexery _LEXER = lexery.Lexer(rules=[ lexery.Rule(identifier='directive', pattern=re.compile(r'%[a-zA-Z%]')), lexery.Rule(identifier='text', pattern=re.compile(r'[^%]+')) ], skip_whitespace=False) # Supported strftime directives. # # Mapry can't support all the directives since the format needs # to be parsed with different libraries. # # For example, see what Go ``time`` package can do: # http://fuckinggodateformat.com/ SUPPORTED_DIRECTIVES = { "%a", # The abbreviated weekday name ("Sun") "%A", # The full weekday name ("Sunday") "%b", # The abbreviated month name ("Jan") "%B", # The full month name ("January") "%d", # Day of the month (01..31) "%e", # Day of the month with a leading blank instead of zero ( 1..31) "%m", # Month of the year (01..12) "%y", # Year without a century (00..99) "%Y", # Year with century