Exemplo n.º 1
0
    def __init__(self):

        """
        Class initializer.
        """

        # Call the superclass initializer

        Parser.__init__(self)

        # Set the containers we're going to recognize

        self._containers = [('(', ')'), ('{', '}'), ('[', ']')]

        # Define our token pattern matches

        tmat = self._token_matches
        tmat.append(TokenMatch(re.compile(r"(/\*)(?s).*?(\*/)"),
                               [CommentToken]))
        tmat.append(TokenMatch(re.compile(r"//.*"),
                               [CommentToken]))
        tmat.append(TokenMatch(re.compile(r"#(?:(?!/\*|//).)*"),
                               [PreprocToken]))
        tmat.append(TokenMatch(re.compile(r'''(["/']).*?(?<!\\)(\\\\)*\1'''),
                               [StringToken]))
        tmat.append(TokenMatch(re.compile(r"[a-zA-Z_][\w]*"),
                               [IdentifierToken]))
Exemplo n.º 2
0
    def _get_token(self, tclass, value):

        # Override the superclass method to distinguish
        # keywords from other plain identifiers. If we
        # don't find a keyword (or if we're not dealing
        # with an identifier) then just call the superclass
        # method

        if tclass == FirstIdentifierToken:

            # Note: This routine will highlight keywords and
            # builtins even if they're not intended to be such,
            # e.g. if a keyword is used as a function argument
            # it will be highlighted as a keyword. This is probably
            # desired behaviour, since that's not a good practice,
            # and vi seems to do the same.

            if value in keyword.kwlist:
                return KeywordToken(value)
            elif value in dir(__builtin__):
                return BuiltinToken(value)
            else:
                return IdentifierToken(value)

        return Parser._get_token(self, tclass, value)
Exemplo n.º 3
0
    def _get_token(self, tclass, value):

        # Override the superclass method to distinguish
        # keywords from other plain identifiers. If we
        # don't find a keyword (or if we're not dealing
        # with an identifier) then just call the superclass
        # method

        ckeyws = ["auto", "break", "case", "char", "const", "continue",
                  "default", "do", "double", "else", "enum", "extern",
                  "float", "for", "goto", "if", "int", "long", "register",
                  "return", "short", "signed", "sizeof", "static",
                  "struct", "switch", "typedef", "union", "unsigned",
                  "void", "volatile", "while"]
        if tclass == IdentifierToken:
            if value in ckeyws:
                return KeywordToken(value)

        return Parser._get_token(self, tclass, value)
Exemplo n.º 4
0
    def __init__(self):

        """
        Class initializer.
        """

        # Call the superclass initializer

        Parser.__init__(self)

        # Set the containers we're going to recognize

        self._containers = [('(', ')'), ('{', '}'), ('[', ']')]

        # Define our token pattern matches

        tmat = self._token_matches

        # Search order matters, here. Patterns will be matched in
        # the order in which they appear in this list. Look for
        # comments, first, as they override everything, and then
        # look for multi-line strings, then strings, then backticks.
        # Backticks are deprecated, and removed in Python 3, but
        # retained here for backwards compatibility.

        tmat.append(TokenMatch(re.compile(r"#.*"),
                               [CommentToken]))
        tmat.append(TokenMatch(re.compile(r"r?([\"|\']{3})[^\1]*?" +
                                          r"(?<!\\)(\\\\)*\1"),
                               [MLStringToken]))
        tmat.append(TokenMatch(re.compile(r'''r?(["|']).*?(?<!\\)(\\\\)*\1'''),
                               [StringToken]))
        tmat.append(TokenMatch(re.compile(r"r?([`]).*?(?<!\\)(\\\\)*\1"),
                               [BacktickToken]))

        # Decorators and definitions go next. Note that we include
        # periods within the match for a decorator, unlike for regular
        # identifiers

        tmat.append(TokenMatch(re.compile(r"@[a-zA-Z_][\w\.]*"),
                               [DecoratorToken]))
        tmat.append(TokenMatch(re.compile(r"(def)(\s+)([a-zA-Z_][\w]*)"),
                               [KeywordToken,
                                WhitespaceToken,
                                DefinitionToken]))
        tmat.append(TokenMatch(re.compile(r"(class)(\s+)([a-zA-Z_][\w]*)"),
                               [KeywordToken,
                                WhitespaceToken,
                                DefinitionToken]))

        # Match regular identifiers, next. First check if an identifier
        # is preceded by a period, since if it is, we should not treat
        # it as a builtin or a keyword. If it's not, then label it a
        # FirstIdentifierToken, and _get_token() will replace it with a
        # KeywordToken or a BuiltinToken, if necessary.

        tmat.append(TokenMatch(re.compile(r"(\.)([a-zA-Z_][\w]*)"),
                               [SeparatorToken, IdentifierToken]))
        tmat.append(TokenMatch(re.compile(r"[a-zA-Z_][\w]*"),
                               [FirstIdentifierToken]))

        # Match numbers. Start with floats which start with a number
        # rather than a period, then floats which start with a period.
        # Note that we cannot make numbers both before and after the
        # period as optional in the same regular expression, or a plain
        # period will match as a float.
        #
        # Then match integers. Include an optional j on the end to
        # catch complex numbers. Note that, currently, the real and
        # imaginary parts of a complex number will be captured as two
        # separate numbers with an operator between them, which is
        # probably not ideal, and may be changed in the future.

        tmat.append(TokenMatch(re.compile(r"[0-9]+[\.][0-9]*((e|E)[\+\-]" +
                                          r"?[0-9]+)?(J|j)?"),
                               [FloatToken]))
        tmat.append(TokenMatch(re.compile(r"[\.][0-9]+((e|E)[\+\-]?" +
                                          r"[0-9]+)?(j|J)?"),
                               [FloatToken]))
        tmat.append(TokenMatch(re.compile(r"(0x)?[0-9]+(L|l)?(J|j)?"),
                               [IntegerToken]))

        # Look for assignment delimiter tokens, except the
        # regular '=' operator

        tmat.append(TokenMatch(re.compile(r"(\+=|\-=|\*=|/=|%=|//=|\*\*=)"),
                               [DelimiterToken]))

        # Look for multi-character operators

        tmat.append(TokenMatch(re.compile(r"(\*\*|<<|>>|<=|>=|<>|==|!=|//)"),
                               [OperatorToken]))

        # Look for the '=' operator only after matching any
        # multi-line operators, in particular we would never
        # match the '==' operator if we looked for the '='
        # operator first

        tmat.append(TokenMatch(re.compile(r"="),
                               [DelimiterToken]))

        # Look for single character operators

        tmat.append(TokenMatch(re.compile(r"[\+\*\-\/%~&\^\|<>]"),
                               [OperatorToken]))

        # Finally, look for single character separators

        tmat.append(TokenMatch(re.compile(r"[,:\.]"),
                               [SeparatorToken]))