Пример #1
0
 def _state0(self, char, tokenList):
     # Normal state
     if char in chars('*'):
         tokenList.append(self.Token(self.TOK_EXPONENT, ExponentToken(0, None), self._pos))
     elif char in chars('+'):
         tokenList.append(self.Token(self.TOK_EXPONENT, ExponentToken(1, None), self._pos))
     elif char in chars('.'):
         tokenList.append(self.Token(self.TOK_CLASS, AnyCharacterClass(), self._pos))
     elif char in chars('('):
         tokenList.append(self.Token(self.TOK_LPAREN, char, self._pos))
     elif char in chars(')'):
         tokenList.append(self.Token(self.TOK_RPAREN, char, self._pos))
     elif char in chars('|'):
         tokenList.append(self.Token(self.TOK_UNION, char, self._pos))
     elif char == '[':
         self._currentClass = io.StringIO()
         self._currentClass.write(char)
         return 2
     elif char == b'['[0]:
         self._currentClass = io.BytesIO()
         self._currentClass.write(bytes([char]))
         return 2
     elif char in chars('{'):
         return 9
     elif char in chars(']') + chars('}'):
         raise TokenizeError('Unexpected token "%s"' % str(char))
     elif char in chars('\\'):
         return 1
     else:
         tokenList.append(self.Token(self.TOK_CLASS, LitteralCharacterClass(char), self._pos))
Пример #2
0
    def ignore(char):
        """
        Override this to ignore characters in input stream. The
        default is to ignore spaces and tabs.

        :param char: The character to test
        :return: True if *char* should be ignored
        """
        return char in chars(' ') + chars('\t')
Пример #3
0
 def _state2(self, char, tokenList):
     # In character class
     if char in chars('\\'):
         return 3
     if char in chars(']'):
         self._currentClass.write(bytes([char]) if isinstance(char, int) else char)
         tokenList.append(self.Token(self.TOK_CLASS, RegexCharacterClass(self._currentClass.getvalue()), self._pos))
         self._currentClass = None
         return 0
     self._currentClass.write(bytes([char]) if isinstance(char, int) else char)
Пример #4
0
 def _state10(self, char, tokenList):
     # In exponent, computing start value
     if char in chars('-'):
         self._startExponent = self._exponentValue
         return 11
     elif char in chars('}'):
         tokenList.append(self.Token(self.TOK_EXPONENT, ExponentToken(self._exponentValue, self._exponentValue), self._pos))
         return 0
     else:
         try:
             v = intValue(char)
         except ValueError:
             raise InvalidExponentError('Invalid character "%s"' % char)
         self._exponentValue *= 10
         self._exponentValue += v
Пример #5
0
 def _parse(self, string, pos):
     while pos < len(string):
         char = string[pos]
         try:
             if self.consumer() is None:
                 if self.ignore(char):
                     pos += 1
                     continue
                 pos = self._findMatch(string, pos)
             else:
                 try:
                     tok = self.consumer().feed(char)
                 except SkipToken:
                     self.setConsumer(None)
                 else:
                     if tok is not None:
                         self.setConsumer(None)
                         if tok[0] is not None:
                             self.newToken(self.Token(*tok, self.position()))
                 pos += 1
         finally:
             if char in chars('\n'):
                 self.advanceLine()
             else:
                 self.advanceColumn()
     return pos
Пример #6
0
 def _state11(self, char, tokenList): # pylint: disable=W0613
     # In exponent, expecting second term of interval
     if char in chars('}'):
         raise InvalidExponentError('Missing range end')
     try:
         v = intValue(char)
     except ValueError:
         raise InvalidExponentError('Invalid character "%s"' % char)
     self._exponentValue = v
     return 12
Пример #7
0
 def _state12(self, char, tokenList):
     # In exponent, computing end value
     if char in chars('}'):
         if self._startExponent > self._exponentValue:
             raise InvalidExponentError('Invalid exponent range %d-%d' % (self._startExponent, self._exponentValue))
         tokenList.append(self.Token(self.TOK_EXPONENT, ExponentToken(self._startExponent, self._exponentValue), self._pos))
         return 0
     try:
         v = intValue(char)
     except ValueError:
         raise InvalidExponentError('Invalid character "%s"' % char)
     self._exponentValue *= 10
     self._exponentValue += v
Пример #8
0
    def feed(self, char):
        """
        Handle a single input character. When you're finished, call
        this with EOF as argument.
        """

        self._input.append((char, self.position()))
        if char in chars('\n'):
            self.advanceLine()
        else:
            self.advanceColumn()

        while self._input:
            char, charPos = self._input.pop(0)
            for tok in self._feed(char, charPos):
                self.newToken(tok)
Пример #9
0
 def test_bytes(self):
     for byte in b'*':
         self.assertTrue(byte in chars('*'))
Пример #10
0
 def test_str(self):
     self.assertTrue('*' in chars('*'))
Пример #11
0
    async def _asyncFeed(self, char, charPos): # pylint: disable=R0912,R0915
        # Unfortunately this is copy/pasted from ProgressiveLexer._feed to add the async stuff...
        if char in chars('\n'):
            self.advanceLine()
        else:
            self.advanceColumn()

        if self.consumer() is not None:
            tok = await self.consumer().feed(char)
            if tok is not None:
                self.setConsumer(None)
                if tok[0] is not None:
                    await yield_(self.Token(*tok, self.position()))
            return

        try:
            if char is EOF:
                if self._state == 0:
                    self.restartLexer()
                    await yield_(EOF)
                    return
                self._maxPos = max(self._maxPos, max(pos[0] for regex, callback, defaultType, pos in self._currentState))
                if self._maxPos == 0 and self._currentMatch:
                    raise LexerError(self._currentMatch[0][0], *self._currentMatch[0][1])
                self._matches.extend([(pos[0], callback) for regex, callback, defaultType, pos in self._currentState if pos[0] == self._maxPos])
                self._matches = [(pos, callback) for pos, callback in self._matches if pos == self._maxPos]
            else:
                if self._state == 0 and self.ignore(char):
                    return
                self._state = 1

                newState = list()
                for regex, callback, defaultType, pos in self._currentState:
                    try:
                        if regex.feed(char):
                            pos[0] = len(self._currentMatch) + 1
                    except DeadState:
                        if pos[0]:
                            self._matches.append((pos[0], callback))
                            self._maxPos = max(self._maxPos, pos[0])
                    else:
                        newState.append((regex, callback, defaultType, pos))

                if all([regex.isDeadEnd() for regex, callback, defaultType, pos in newState]):
                    for regex, callback, defaultType, pos in newState:
                        self._matches.append((len(self._currentMatch) + 1, callback))
                        self._maxPos = max(self._maxPos, len(self._currentMatch) + 1)
                    newState = list()

                self._matches = [(pos, callback) for pos, callback in self._matches if pos == self._maxPos]
                self._currentState = newState

                self._currentMatch.append((char, self.position() if charPos is None else charPos))
                if self._currentState:
                    return

                if self._maxPos == 0:
                    raise LexerError(char, *self.position())
        except LexerError:
            self.restartLexer()
            raise

        tok = self._finalizeMatch()
        if tok is not None:
            await yield_(tok)

        if char is EOF:
            self.restartLexer()
            await yield_(EOF)
Пример #12
0
 def __contains__(self, char):
     return char not in chars('\n')