Пример #1
0
 def __init__(self, parser_state, parent):
     super(CharacterBuilder, self).__init__(parser_state)
     self._parent = parent
     self._charset = Character([], alphabet=parser_state.alphabet)
     self._invert = None
     self._queue = None
     self._range = False
Пример #2
0
 def __init__(self, parser_state, parent):
     super(CharacterBuilder, self).__init__(parser_state)
     self._parent = parent
     self._charset = Character([], alphabet=parser_state.alphabet)
     self._invert = None
     self._queue = None
     self._range = False
Пример #3
0
 def append_character(self, character, escaped=False):
     '''Add the next character.'''
     char_str = self._parser_state.alphabet.expression_to_str(character)
     if not escaped and char_str == '\\':
         return ComplexEscapeBuilder(self._parser_state, self)
     elif not escaped and char_str == '{':
         return CountBuilder(self._parser_state, self, character)
     elif not escaped and char_str == '(':
         return GroupEscapeBuilder(self._parser_state, self)
     elif not escaped and char_str == '[':
         return CharacterBuilder(self._parser_state, self)
     elif not escaped and char_str == '.':
         self._sequence.append(
             Dot(self._parser_state.flags & ParserState.DOT_ALL))
     elif not escaped and char_str == '^':
         self._sequence.append(
             StartOfLine(self._parser_state.flags & ParserState.MULTILINE))
     elif not escaped and char_str == '$':
         self._sequence.append(
             EndOfLine(self._parser_state.flags & ParserState.MULTILINE))
     elif not escaped and char_str == '|':
         self.__start_new_alternative()
     elif character is not None and self._sequence and (not escaped and
                                                        char_str in '+?*'):
         return RepeatBuilder(self._parser_state, self,
                              self._sequence.pop(), character)
     elif character is not None and (
             escaped or self._parser_state.significant(character)):
         (is_pair, value) = \
             self._parser_state.alphabet.expression_to_charset(character,
                                                  self._parser_state.flags)
         if is_pair:
             self._sequence.append(
                 Character([(value[0], value[0]), (value[1], value[1])],
                           self._parser_state.alphabet))
         else:
             self._sequence.append(String(value))
     return self
Пример #4
0
class CharacterBuilder(Builder):
    '''
    Parse a character range - expressions of the form [...].
    These can include character classes (\\s for example), which we handle
    in the alphabet as functions rather than character code ranges, so the
    final graph node can be quite complex.
    '''

    def __init__(self, parser_state, parent):
        super(CharacterBuilder, self).__init__(parser_state)
        self._parent = parent
        self._charset = Character([], alphabet=parser_state.alphabet)
        self._invert = None
        self._queue = None
        self._range = False

    def append_character(self, character, escaped=False):
        '''Add the next character.'''

        def append(character=character):
            '''Helper function to avoid repetition below - adds character.'''

            def unpack(character):
                '''Generate a `CharSet` or a character pair.'''
                (is_charset, value) = \
                    self._parser_state.alphabet.expression_to_charset(
                        character, self._parser_state.flags)
                if not is_charset:
                    value = (character, character)
                return value

            if self._range:
                if self._queue is None:
                    raise RxpyError('Incomplete range')
                else:
                    (alo, ahi) = unpack(self._queue)
                    (blo, bhi) = unpack(character)
                    self._charset.append_interval((alo, blo))
                    self._charset.append_interval((ahi, bhi))
                    self._queue = None
                    self._range = False
            else:
                if self._queue:
                    (lo, hi) = unpack(self._queue)
                    self._charset.append_interval((lo, lo))
                    self._charset.append_interval((hi, hi))
                self._queue = character

        char_str = self._parser_state.alphabet.expression_to_str(character)
        if self._invert is None and char_str == '^':
            self._invert = True
        elif not escaped and char_str == '\\':
            return SimpleEscapeBuilder(self._parser_state, self)
        elif escaped and char_str in 'dD':
            self._charset.append_class(self._parser_state.alphabet.digit,
                                       character, char_str=='D')
        elif escaped and char_str in 'wW':
            self._charset.append_class(self._parser_state.alphabet.word,
                                       character, char_str=='W')
        elif escaped and char_str in 'sS':
            self._charset.append_class(self._parser_state.alphabet.space,
                                       character, char_str=='S')
        # not charset allows first character to be unescaped - or ]
        elif character is not None and \
                ((not self._charset and not self._queue)
                 or escaped or char_str not in "-]"):
            append()
        elif char_str == '-':
            if self._range:
                # repeated - is range to -?
                append()
            else:
                self._range = True
        elif char_str == ']':
            if self._queue:
                if self._range:
                    self._range = False
                    # convert open range to '-'
                    append('-')
                append(None)
            if self._invert:
                self._charset.invert()
            self._parent._sequence.append(self._charset.simplify())
            return self._parent
        else:
            raise RxpyError('Syntax error in character set')

        # after first character this must be known
        if self._invert is None:
            self._invert = False

        return self
Пример #5
0
class CharacterBuilder(Builder):
    '''
    Parse a character range - expressions of the form [...].
    These can include character classes (\\s for example), which we handle
    in the alphabet as functions rather than character code ranges, so the
    final graph node can be quite complex.
    '''
    def __init__(self, parser_state, parent):
        super(CharacterBuilder, self).__init__(parser_state)
        self._parent = parent
        self._charset = Character([], alphabet=parser_state.alphabet)
        self._invert = None
        self._queue = None
        self._range = False

    def append_character(self, character, escaped=False):
        '''Add the next character.'''
        def append(character=character):
            '''Helper function to avoid repetition below - adds character.'''
            def unpack(character):
                '''Generate a `CharSet` or a character pair.'''
                (is_charset, value) = \
                    self._parser_state.alphabet.expression_to_charset(
                        character, self._parser_state.flags)
                if not is_charset:
                    value = (character, character)
                return value

            if self._range:
                if self._queue is None:
                    raise RxpyError('Incomplete range')
                else:
                    (alo, ahi) = unpack(self._queue)
                    (blo, bhi) = unpack(character)
                    self._charset.append_interval((alo, blo))
                    self._charset.append_interval((ahi, bhi))
                    self._queue = None
                    self._range = False
            else:
                if self._queue:
                    (lo, hi) = unpack(self._queue)
                    self._charset.append_interval((lo, lo))
                    self._charset.append_interval((hi, hi))
                self._queue = character

        char_str = self._parser_state.alphabet.expression_to_str(character)
        if self._invert is None and char_str == '^':
            self._invert = True
        elif not escaped and char_str == '\\':
            return SimpleEscapeBuilder(self._parser_state, self)
        elif escaped and char_str in 'dD':
            self._charset.append_class(self._parser_state.alphabet.digit,
                                       character, char_str == 'D')
        elif escaped and char_str in 'wW':
            self._charset.append_class(self._parser_state.alphabet.word,
                                       character, char_str == 'W')
        elif escaped and char_str in 'sS':
            self._charset.append_class(self._parser_state.alphabet.space,
                                       character, char_str == 'S')
        # not charset allows first character to be unescaped - or ]
        elif character is not None and \
                ((not self._charset and not self._queue)
                 or escaped or char_str not in "-]"):
            append()
        elif char_str == '-':
            if self._range:
                # repeated - is range to -?
                append()
            else:
                self._range = True
        elif char_str == ']':
            if self._queue:
                if self._range:
                    self._range = False
                    # convert open range to '-'
                    append('-')
                append(None)
            if self._invert:
                self._charset.invert()
            self._parent._sequence.append(self._charset.simplify())
            return self._parent
        else:
            raise RxpyError('Syntax error in character set')

        # after first character this must be known
        if self._invert is None:
            self._invert = False

        return self
Пример #6
0
 def test_contains(self):
     assert [0] not in Character([('1', '1')], Digits())
     assert [1] in Character([('1', '1')], Digits())
     assert [2] not in Character([('1', '1')], Digits())
     assert [0] in Character([('0', '1')], Digits())
     assert [1] in Character([('0', '1')], Digits())
     assert [2] not in Character([('0', '1')], Digits())
     assert [0] in Character([('0', '2')], Digits())
     assert [1] in Character([('0', '2')], Digits())
     assert [2] in Character([('0', '2')], Digits())
     assert [0] in Character([('0', '1'), ('1', '2')], Digits())
     assert [1] in Character([('0', '1'), ('1', '2')], Digits())
     assert [2] in Character([('0', '1'), ('1', '2')], Digits())
     assert [0] in Character([('0', '0'), ('2', '2')], Digits())
     assert [1] not in Character([('0', '0'), ('2', '2')], Digits())
     assert [2] in Character([('0', '0'), ('2', '2')], Digits())
Пример #7
0
 def do_test_str(self, intervals, target):
     result = str(Character(intervals, alphabet=Digits()))
     assert result == target, result
Пример #8
0
 def do_test_str(self, intervals, target):
     result = str(Character(intervals, String()))
     assert result == target, result
Пример #9
0
 def test_contains(self):
     assert 'a' not in Character([('b', 'b')], String())
     assert 'b' in Character([('b', 'b')], String())
     assert 'c' not in Character([('b', 'b')], String())
     assert 'a' in Character([('a', 'b')], String())
     assert 'b' in Character([('a', 'b')], String())
     assert 'c' not in Character([('a', 'b')], String())
     assert 'a' in Character([('a', 'c')], String())
     assert 'b' in Character([('a', 'c')], String())
     assert 'c' in Character([('a', 'c')], String())
     assert 'a' in Character([('a', 'b'), ('b', 'c')], String())
     assert 'b' in Character([('a', 'b'), ('b', 'c')], String())
     assert 'c' in Character([('a', 'b'), ('b', 'c')], String())
     assert 'a' in Character([('a', 'a'), ('c', 'c')], String())
     assert 'b' not in Character([('a', 'a'), ('c', 'c')], String())
     assert 'c' in Character([('a', 'a'), ('c', 'c')], String())