def __init__(self, parser_state, parent): super(CharacterBuilder, self).__init__(parser_state) self._parent = parent self._charset = Character([], alphabet=parser_state.alphabet) self._invert = None self._queue = None self._range = False
def append_character(self, character, escaped=False): '''Add the next character.''' char_str = self._parser_state.alphabet.expression_to_str(character) if not escaped and char_str == '\\': return ComplexEscapeBuilder(self._parser_state, self) elif not escaped and char_str == '{': return CountBuilder(self._parser_state, self, character) elif not escaped and char_str == '(': return GroupEscapeBuilder(self._parser_state, self) elif not escaped and char_str == '[': return CharacterBuilder(self._parser_state, self) elif not escaped and char_str == '.': self._sequence.append( Dot(self._parser_state.flags & ParserState.DOT_ALL)) elif not escaped and char_str == '^': self._sequence.append( StartOfLine(self._parser_state.flags & ParserState.MULTILINE)) elif not escaped and char_str == '$': self._sequence.append( EndOfLine(self._parser_state.flags & ParserState.MULTILINE)) elif not escaped and char_str == '|': self.__start_new_alternative() elif character is not None and self._sequence and (not escaped and char_str in '+?*'): return RepeatBuilder(self._parser_state, self, self._sequence.pop(), character) elif character is not None and ( escaped or self._parser_state.significant(character)): (is_pair, value) = \ self._parser_state.alphabet.expression_to_charset(character, self._parser_state.flags) if is_pair: self._sequence.append( Character([(value[0], value[0]), (value[1], value[1])], self._parser_state.alphabet)) else: self._sequence.append(String(value)) return self
class CharacterBuilder(Builder): ''' Parse a character range - expressions of the form [...]. These can include character classes (\\s for example), which we handle in the alphabet as functions rather than character code ranges, so the final graph node can be quite complex. ''' def __init__(self, parser_state, parent): super(CharacterBuilder, self).__init__(parser_state) self._parent = parent self._charset = Character([], alphabet=parser_state.alphabet) self._invert = None self._queue = None self._range = False def append_character(self, character, escaped=False): '''Add the next character.''' def append(character=character): '''Helper function to avoid repetition below - adds character.''' def unpack(character): '''Generate a `CharSet` or a character pair.''' (is_charset, value) = \ self._parser_state.alphabet.expression_to_charset( character, self._parser_state.flags) if not is_charset: value = (character, character) return value if self._range: if self._queue is None: raise RxpyError('Incomplete range') else: (alo, ahi) = unpack(self._queue) (blo, bhi) = unpack(character) self._charset.append_interval((alo, blo)) self._charset.append_interval((ahi, bhi)) self._queue = None self._range = False else: if self._queue: (lo, hi) = unpack(self._queue) self._charset.append_interval((lo, lo)) self._charset.append_interval((hi, hi)) self._queue = character char_str = self._parser_state.alphabet.expression_to_str(character) if self._invert is None and char_str == '^': self._invert = True elif not escaped and char_str == '\\': return SimpleEscapeBuilder(self._parser_state, self) elif escaped and char_str in 'dD': self._charset.append_class(self._parser_state.alphabet.digit, character, char_str=='D') elif escaped and char_str in 'wW': self._charset.append_class(self._parser_state.alphabet.word, character, char_str=='W') elif escaped and char_str in 'sS': self._charset.append_class(self._parser_state.alphabet.space, character, char_str=='S') # not charset allows first character to be unescaped - or ] elif character is not None and \ ((not self._charset and not self._queue) or escaped or char_str not in "-]"): append() elif char_str == '-': if self._range: # repeated - is range to -? append() else: self._range = True elif char_str == ']': if self._queue: if self._range: self._range = False # convert open range to '-' append('-') append(None) if self._invert: self._charset.invert() self._parent._sequence.append(self._charset.simplify()) return self._parent else: raise RxpyError('Syntax error in character set') # after first character this must be known if self._invert is None: self._invert = False return self
class CharacterBuilder(Builder): ''' Parse a character range - expressions of the form [...]. These can include character classes (\\s for example), which we handle in the alphabet as functions rather than character code ranges, so the final graph node can be quite complex. ''' def __init__(self, parser_state, parent): super(CharacterBuilder, self).__init__(parser_state) self._parent = parent self._charset = Character([], alphabet=parser_state.alphabet) self._invert = None self._queue = None self._range = False def append_character(self, character, escaped=False): '''Add the next character.''' def append(character=character): '''Helper function to avoid repetition below - adds character.''' def unpack(character): '''Generate a `CharSet` or a character pair.''' (is_charset, value) = \ self._parser_state.alphabet.expression_to_charset( character, self._parser_state.flags) if not is_charset: value = (character, character) return value if self._range: if self._queue is None: raise RxpyError('Incomplete range') else: (alo, ahi) = unpack(self._queue) (blo, bhi) = unpack(character) self._charset.append_interval((alo, blo)) self._charset.append_interval((ahi, bhi)) self._queue = None self._range = False else: if self._queue: (lo, hi) = unpack(self._queue) self._charset.append_interval((lo, lo)) self._charset.append_interval((hi, hi)) self._queue = character char_str = self._parser_state.alphabet.expression_to_str(character) if self._invert is None and char_str == '^': self._invert = True elif not escaped and char_str == '\\': return SimpleEscapeBuilder(self._parser_state, self) elif escaped and char_str in 'dD': self._charset.append_class(self._parser_state.alphabet.digit, character, char_str == 'D') elif escaped and char_str in 'wW': self._charset.append_class(self._parser_state.alphabet.word, character, char_str == 'W') elif escaped and char_str in 'sS': self._charset.append_class(self._parser_state.alphabet.space, character, char_str == 'S') # not charset allows first character to be unescaped - or ] elif character is not None and \ ((not self._charset and not self._queue) or escaped or char_str not in "-]"): append() elif char_str == '-': if self._range: # repeated - is range to -? append() else: self._range = True elif char_str == ']': if self._queue: if self._range: self._range = False # convert open range to '-' append('-') append(None) if self._invert: self._charset.invert() self._parent._sequence.append(self._charset.simplify()) return self._parent else: raise RxpyError('Syntax error in character set') # after first character this must be known if self._invert is None: self._invert = False return self
def test_contains(self): assert [0] not in Character([('1', '1')], Digits()) assert [1] in Character([('1', '1')], Digits()) assert [2] not in Character([('1', '1')], Digits()) assert [0] in Character([('0', '1')], Digits()) assert [1] in Character([('0', '1')], Digits()) assert [2] not in Character([('0', '1')], Digits()) assert [0] in Character([('0', '2')], Digits()) assert [1] in Character([('0', '2')], Digits()) assert [2] in Character([('0', '2')], Digits()) assert [0] in Character([('0', '1'), ('1', '2')], Digits()) assert [1] in Character([('0', '1'), ('1', '2')], Digits()) assert [2] in Character([('0', '1'), ('1', '2')], Digits()) assert [0] in Character([('0', '0'), ('2', '2')], Digits()) assert [1] not in Character([('0', '0'), ('2', '2')], Digits()) assert [2] in Character([('0', '0'), ('2', '2')], Digits())
def do_test_str(self, intervals, target): result = str(Character(intervals, alphabet=Digits())) assert result == target, result
def do_test_str(self, intervals, target): result = str(Character(intervals, String())) assert result == target, result
def test_contains(self): assert 'a' not in Character([('b', 'b')], String()) assert 'b' in Character([('b', 'b')], String()) assert 'c' not in Character([('b', 'b')], String()) assert 'a' in Character([('a', 'b')], String()) assert 'b' in Character([('a', 'b')], String()) assert 'c' not in Character([('a', 'b')], String()) assert 'a' in Character([('a', 'c')], String()) assert 'b' in Character([('a', 'c')], String()) assert 'c' in Character([('a', 'c')], String()) assert 'a' in Character([('a', 'b'), ('b', 'c')], String()) assert 'b' in Character([('a', 'b'), ('b', 'c')], String()) assert 'c' in Character([('a', 'b'), ('b', 'c')], String()) assert 'a' in Character([('a', 'a'), ('c', 'c')], String()) assert 'b' not in Character([('a', 'a'), ('c', 'c')], String()) assert 'c' in Character([('a', 'a'), ('c', 'c')], String())