Exemplo n.º 1
0
class HamlLexer(ExtendedRegexLexer):
    """
    For Haml markup.

    .. versionadded:: 1.3
    """

    name = 'Haml'
    aliases = ['haml']
    filenames = ['*.haml']
    mimetypes = ['text/x-haml']

    flags = re.IGNORECASE
    # Haml can include " |\n" anywhere,
    # which is ignored and used to wrap long lines.
    # To accomodate this, use this custom faux dot instead.
    _dot = r'(?: \|\n(?=.* \|)|.)'

    # In certain places, a comma at the end of the line
    # allows line wrapping as well.
    _comma_dot = r'(?:,\s*\n|' + _dot + ')'
    tokens = {
        'root': [
            (r'[ \t]*\n', Text),
            (r'[ \t]*', _indentation),
        ],
        'css': [
            (r'\.[\w:-]+', Name.Class, 'tag'),
            (r'\#[\w:-]+', Name.Function, 'tag'),
        ],
        'eval-or-plain': [
            (r'[&!]?==', Punctuation, 'plain'),
            (r'([&!]?[=~])(' + _comma_dot + r'*\n)',
             bygroups(Punctuation, using(RubyLexer)), 'root'),
            default('plain'),
        ],
        'content': [
            include('css'),
            (r'%[\w:-]+', Name.Tag, 'tag'),
            (r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'),
            (r'(/)(\[' + _dot + r'*?\])(' + _dot + r'*\n)',
             bygroups(Comment, Comment.Special, Comment), '#pop'),
            (r'/' + _dot + r'*\n',
             _starts_block(Comment, 'html-comment-block'), '#pop'),
            (r'-#' + _dot + r'*\n',
             _starts_block(Comment.Preproc, 'haml-comment-block'), '#pop'),
            (r'(-)(' + _comma_dot + r'*\n)',
             bygroups(Punctuation, using(RubyLexer)), '#pop'),
            (r':' + _dot + r'*\n', _starts_block(Name.Decorator,
                                                 'filter-block'), '#pop'),
            include('eval-or-plain'),
        ],
        'tag': [
            include('css'),
            (r'\{(,\n|' + _dot + r')*?\}', using(RubyLexer)),
            (r'\[' + _dot + r'*?\]', using(RubyLexer)),
            (r'\(', Text, 'html-attributes'),
            (r'/[ \t]*\n', Punctuation, '#pop:2'),
            (r'[<>]{1,2}(?=[ \t=])', Punctuation),
            include('eval-or-plain'),
        ],
        'plain': [
            (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text),
            (r'(#\{)(' + _dot + r'*?)(\})',
             bygroups(String.Interpol, using(RubyLexer), String.Interpol)),
            (r'\n', Text, 'root'),
        ],
        'html-attributes': [
            (r'\s+', Text),
            (r'[\w:-]+[ \t]*=', Name.Attribute, 'html-attribute-value'),
            (r'[\w:-]+', Name.Attribute),
            (r'\)', Text, '#pop'),
        ],
        'html-attribute-value': [
            (r'[ \t]+', Text),
            (r'\w+', Name.Variable, '#pop'),
            (r'@\w+', Name.Variable.Instance, '#pop'),
            (r'\$\w+', Name.Variable.Global, '#pop'),
            (r"'(\\\\|\\'|[^'\n])*'", String, '#pop'),
            (r'"(\\\\|\\"|[^"\n])*"', String, '#pop'),
        ],
        'html-comment-block': [
            (_dot + '+', Comment),
            (r'\n', Text, 'root'),
        ],
        'haml-comment-block': [
            (_dot + '+', Comment.Preproc),
            (r'\n', Text, 'root'),
        ],
        'filter-block': [
            (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Name.Decorator),
            (r'(#\{)(' + _dot + r'*?)(\})',
             bygroups(String.Interpol, using(RubyLexer), String.Interpol)),
            (r'\n', Text, 'root'),
        ],
    }
Exemplo n.º 2
0
class ActionScript3Lexer(RegexLexer):
    """
    For ActionScript 3 source code.

    .. versionadded:: 0.11
    """

    name = 'ActionScript 3'
    aliases = ['as3', 'actionscript3']
    filenames = ['*.as']
    mimetypes = ['application/x-actionscript3', 'text/x-actionscript3',
                 'text/actionscript3']

    identifier = r'[$a-zA-Z_]\w*'
    typeidentifier = identifier + r'(?:\.<\w+>)?'

    flags = re.DOTALL | re.MULTILINE
    tokens = {
        'root': [
            (r'\s+', Text),
            (r'(function\s+)(' + identifier + r')(\s*)(\()',
             bygroups(Keyword.Declaration, Name.Function, Text, Operator),
             'funcparams'),
            (r'(var|const)(\s+)(' + identifier + r')(\s*)(:)(\s*)(' +
             typeidentifier + r')',
             bygroups(Keyword.Declaration, Text, Name, Text, Punctuation, Text,
                      Keyword.Type)),
            (r'(import|package)(\s+)((?:' + identifier + r'|\.)+)(\s*)',
             bygroups(Keyword, Text, Name.Namespace, Text)),
            (r'(new)(\s+)(' + typeidentifier + r')(\s*)(\()',
             bygroups(Keyword, Text, Keyword.Type, Text, Operator)),
            (r'//.*?\n', Comment.Single),
            (r'/\*.*?\*/', Comment.Multiline),
            (r'/(\\\\|\\/|[^\n])*/[gisx]*', String.Regex),
            (r'(\.)(' + identifier + r')', bygroups(Operator, Name.Attribute)),
            (r'(case|default|for|each|in|while|do|break|return|continue|if|else|'
             r'throw|try|catch|with|new|typeof|arguments|instanceof|this|'
             r'switch|import|include|as|is)\b',
             Keyword),
            (r'(class|public|final|internal|native|override|private|protected|'
             r'static|import|extends|implements|interface|intrinsic|return|super|'
             r'dynamic|function|const|get|namespace|package|set)\b',
             Keyword.Declaration),
            (r'(true|false|null|NaN|Infinity|-Infinity|undefined|void)\b',
             Keyword.Constant),
            (r'(decodeURI|decodeURIComponent|encodeURI|escape|eval|isFinite|isNaN|'
             r'isXMLName|clearInterval|fscommand|getTimer|getURL|getVersion|'
             r'isFinite|parseFloat|parseInt|setInterval|trace|updateAfterEvent|'
             r'unescape)\b', Name.Function),
            (identifier, Name),
            (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),
            (r'0x[0-9a-f]+', Number.Hex),
            (r'[0-9]+', Number.Integer),
            (r'"(\\\\|\\"|[^"])*"', String.Double),
            (r"'(\\\\|\\'|[^'])*'", String.Single),
            (r'[~^*!%&<>|+=:;,/?\\{}\[\]().-]+', Operator),
        ],
        'funcparams': [
            (r'\s+', Text),
            (r'(\s*)(\.\.\.)?(' + identifier + r')(\s*)(:)(\s*)(' +
             typeidentifier + r'|\*)(\s*)',
             bygroups(Text, Punctuation, Name, Text, Operator, Text,
                      Keyword.Type, Text), 'defval'),
            (r'\)', Operator, 'type')
        ],
        'type': [
            (r'(\s*)(:)(\s*)(' + typeidentifier + r'|\*)',
             bygroups(Text, Operator, Text, Keyword.Type), '#pop:2'),
            (r'\s+', Text, '#pop:2'),
            default('#pop:2')
        ],
        'defval': [
            (r'(=)(\s*)([^(),]+)(\s*)(,?)',
             bygroups(Operator, Text, using(this), Text, Operator), '#pop'),
            (r',', Operator, '#pop'),
            default('#pop')
        ]
    }

    def analyse_text(text):
        if re.match(r'\w+\s*:\s*\w', text):
            return 0.3
        return 0
Exemplo n.º 3
0
class CypherLexer(RegexLexer):
    """
    For `Cypher Query Language
    <https://neo4j.com/docs/developer-manual/3.3/cypher/>`_

    For the Cypher version in Neo4j 3.3

    .. versionadded:: 2.0
    """
    name = 'Cypher'
    aliases = ['cypher']
    filenames = ['*.cyp', '*.cypher']

    flags = re.MULTILINE | re.IGNORECASE

    tokens = {
        'root': [
            include('comment'),
            include('keywords'),
            include('clauses'),
            include('relations'),
            include('strings'),
            include('whitespace'),
            include('barewords'),
        ],
        'comment': [
            (r'^.*//.*\n', Comment.Single),
        ],
        'keywords': [
            (r'(create|order|match|limit|set|skip|start|return|with|where|'
             r'delete|foreach|not|by|true|false)\b', Keyword),
        ],
        'clauses': [
            # based on https://neo4j.com/docs/cypher-refcard/3.3/
            (r'(all|any|as|asc|ascending|assert|call|case|create|'
             r'create\s+index|create\s+unique|delete|desc|descending|'
             r'distinct|drop\s+constraint\s+on|drop\s+index\s+on|end|'
             r'ends\s+with|fieldterminator|foreach|in|is\s+node\s+key|'
             r'is\s+null|is\s+unique|limit|load\s+csv\s+from|match|merge|none|'
             r'not|null|on\s+match|on\s+create|optional\s+match|order\s+by|'
             r'remove|return|set|skip|single|start|starts\s+with|then|union|'
             r'union\s+all|unwind|using\s+periodic\s+commit|yield|where|when|'
             r'with)\b', Keyword),
        ],
        'relations': [
            (r'(-\[)(.*?)(\]->)', bygroups(Operator, using(this), Operator)),
            (r'(<-\[)(.*?)(\]-)', bygroups(Operator, using(this), Operator)),
            (r'(-\[)(.*?)(\]-)', bygroups(Operator, using(this), Operator)),
            (r'-->|<--|\[|\]', Operator),
            (r'<|>|<>|=|<=|=>|\(|\)|\||:|,|;', Punctuation),
            (r'[.*{}]', Punctuation),
        ],
        'strings': [
            (r'"(?:\\[tbnrf\'"\\]|[^\\"])*"', String),
            (r'`(?:``|[^`])+`', Name.Variable),
        ],
        'whitespace': [
            (r'\s+', Whitespace),
        ],
        'barewords': [
            (r'[a-z]\w*', Name),
            (r'\d+', Number),
        ],
    }
Exemplo n.º 4
0
class AsymptoteLexer(RegexLexer):
    """
    For `Asymptote <http://asymptote.sf.net/>`_ source code.

    .. versionadded:: 1.2
    """
    name = 'Asymptote'
    aliases = ['asy', 'asymptote']
    filenames = ['*.asy']
    mimetypes = ['text/x-asymptote']

    #: optional Comment or Whitespace
    _ws = r'(?:\s|//.*?\n|/\*.*?\*/)+'

    tokens = {
        'whitespace': [
            (r'\n', Text),
            (r'\s+', Text),
            (r'\\\n', Text),  # line continuation
            (r'//(\n|(.|\n)*?[^\\]\n)', Comment),
            (r'/(\\\n)?\*(.|\n)*?\*(\\\n)?/', Comment),
        ],
        'statements': [
            # simple string (TeX friendly)
            (r'"(\\\\|\\"|[^"])*"', String),
            # C style string (with character escapes)
            (r"'", String, 'string'),
            (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[lL]?', Number.Float),
            (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float),
            (r'0x[0-9a-fA-F]+[Ll]?', Number.Hex),
            (r'0[0-7]+[Ll]?', Number.Oct),
            (r'\d+[Ll]?', Number.Integer),
            (r'[~!%^&*+=|?:<>/-]', Operator),
            (r'[()\[\],.]', Punctuation),
            (r'\b(case)(.+?)(:)', bygroups(Keyword, using(this), Text)),
            (r'(and|controls|tension|atleast|curl|if|else|while|for|do|'
             r'return|break|continue|struct|typedef|new|access|import|'
             r'unravel|from|include|quote|static|public|private|restricted|'
             r'this|explicit|true|false|null|cycle|newframe|operator)\b',
             Keyword),
            # Since an asy-type-name can be also an asy-function-name,
            # in the following we test if the string "  [a-zA-Z]" follows
            # the Keyword.Type.
            # Of course it is not perfect !
            (r'(Braid|FitResult|Label|Legend|TreeNode|abscissa|arc|arrowhead|'
             r'binarytree|binarytreeNode|block|bool|bool3|bounds|bqe|circle|'
             r'conic|coord|coordsys|cputime|ellipse|file|filltype|frame|grid3|'
             r'guide|horner|hsv|hyperbola|indexedTransform|int|inversion|key|'
             r'light|line|linefit|marginT|marker|mass|object|pair|parabola|path|'
             r'path3|pen|picture|point|position|projection|real|revolution|'
             r'scaleT|scientific|segment|side|slice|splitface|string|surface|'
             r'tensionSpecifier|ticklocate|ticksgridT|tickvalues|transform|'
             r'transformation|tree|triangle|trilinear|triple|vector|'
             r'vertex|void)(?=\s+[a-zA-Z])', Keyword.Type),
            # Now the asy-type-name which are not asy-function-name
            # except yours !
            # Perhaps useless
            (r'(Braid|FitResult|TreeNode|abscissa|arrowhead|block|bool|bool3|'
             r'bounds|coord|frame|guide|horner|int|linefit|marginT|pair|pen|'
             r'picture|position|real|revolution|slice|splitface|ticksgridT|'
             r'tickvalues|tree|triple|vertex|void)\b', Keyword.Type),
            (r'[a-zA-Z_]\w*:(?!:)', Name.Label),
            (r'[a-zA-Z_]\w*', Name),
        ],
        'root': [
            include('whitespace'),
            # functions
            (
                r'((?:[\w*\s])+?(?:\s|\*))'  # return arguments
                r'([a-zA-Z_]\w*)'  # method name
                r'(\s*\([^;]*?\))'  # signature
                r'(' + _ws + r')(\{)',
                bygroups(using(this), Name.Function, using(this), using(this),
                         Punctuation),
                'function'),
            # function declarations
            (
                r'((?:[\w*\s])+?(?:\s|\*))'  # return arguments
                r'([a-zA-Z_]\w*)'  # method name
                r'(\s*\([^;]*?\))'  # signature
                r'(' + _ws + r')(;)',
                bygroups(using(this), Name.Function, using(this), using(this),
                         Punctuation)),
            default('statement'),
        ],
        'statement': [
            include('whitespace'),
            include('statements'),
            ('[{}]', Punctuation),
            (';', Punctuation, '#pop'),
        ],
        'function': [
            include('whitespace'),
            include('statements'),
            (';', Punctuation),
            (r'\{', Punctuation, '#push'),
            (r'\}', Punctuation, '#pop'),
        ],
        'string': [
            (r"'", String, '#pop'),
            (r'\\([\\abfnrtv"\'?]|x[a-fA-F0-9]{2,4}|[0-7]{1,3})',
             String.Escape),
            (r'\n', String),
            (r"[^\\'\n]+", String),  # all other characters
            (r'\\\n', String),
            (r'\\n', String),  # line continuation
            (r'\\', String),  # stray backslash
        ],
    }

    def get_tokens_unprocessed(self, text):
        from testflows._core.contrib.pygments.lexers._asy_builtins import ASYFUNCNAME, ASYVARNAME
        for index, token, value in \
                RegexLexer.get_tokens_unprocessed(self, text):
            if token is Name and value in ASYFUNCNAME:
                token = Name.Function
            elif token is Name and value in ASYVARNAME:
                token = Name.Variable
            yield index, token, value
Exemplo n.º 5
0
class CSharpLexer(RegexLexer):
    """
    For `C# <http://msdn2.microsoft.com/en-us/vcsharp/default.aspx>`_
    source code.

    Additional options accepted:

    `unicodelevel`
      Determines which Unicode characters this lexer allows for identifiers.
      The possible values are:

      * ``none`` -- only the ASCII letters and numbers are allowed. This
        is the fastest selection.
      * ``basic`` -- all Unicode characters from the specification except
        category ``Lo`` are allowed.
      * ``full`` -- all Unicode characters as specified in the C# specs
        are allowed.  Note that this means a considerable slowdown since the
        ``Lo`` category has more than 40,000 characters in it!

      The default value is ``basic``.

      .. versionadded:: 0.8
    """

    name = 'C#'
    aliases = ['csharp', 'c#']
    filenames = ['*.cs']
    mimetypes = ['text/x-csharp']  # inferred

    flags = re.MULTILINE | re.DOTALL | re.UNICODE

    # for the range of allowed unicode characters in identifiers, see
    # http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-334.pdf

    levels = {
        'none':
        r'@?[_a-zA-Z]\w*',
        'basic': ('@?[_' + uni.combine('Lu', 'Ll', 'Lt', 'Lm', 'Nl') + ']' +
                  '[' + uni.combine('Lu', 'Ll', 'Lt', 'Lm', 'Nl', 'Nd', 'Pc',
                                    'Cf', 'Mn', 'Mc') + ']*'),
        'full':
        ('@?(?:_|[^' + uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl') +
         '])' + '[^' + uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', 'Nd',
                                     'Pc', 'Cf', 'Mn', 'Mc') + ']*'),
    }

    tokens = {}
    token_variants = True

    for levelname, cs_ident in iteritems(levels):
        tokens[levelname] = {
            'root': [
                # method names
                (
                    r'^([ \t]*(?:' + cs_ident +
                    r'(?:\[\])?\s+)+?)'  # return type
                    r'(' + cs_ident + ')'  # method name
                    r'(\s*)(\()',  # signature start
                    bygroups(using(this), Name.Function, Text, Punctuation)),
                (r'^\s*\[.*?\]', Name.Attribute),
                (r'[^\S\n]+', Text),
                (r'\\\n', Text),  # line continuation
                (r'//.*?\n', Comment.Single),
                (r'/[*].*?[*]/', Comment.Multiline),
                (r'\n', Text),
                (r'[~!%^&*()+=|\[\]:;,.<>/?-]', Punctuation),
                (r'[{}]', Punctuation),
                (r'@"(""|[^"])*"', String),
                (r'"(\\\\|\\"|[^"\n])*["\n]', String),
                (r"'\\.'|'[^\\]'", String.Char),
                (r"[0-9](\.[0-9]*)?([eE][+-][0-9]+)?"
                 r"[flFLdD]?|0[xX][0-9a-fA-F]+[Ll]?", Number),
                (r'#[ \t]*(if|endif|else|elif|define|undef|'
                 r'line|error|warning|region|endregion|pragma)\b.*?\n',
                 Comment.Preproc),
                (r'\b(extern)(\s+)(alias)\b', bygroups(Keyword, Text,
                                                       Keyword)),
                (r'(abstract|as|async|await|base|break|by|case|catch|'
                 r'checked|const|continue|default|delegate|'
                 r'do|else|enum|event|explicit|extern|false|finally|'
                 r'fixed|for|foreach|goto|if|implicit|in|interface|'
                 r'internal|is|let|lock|new|null|on|operator|'
                 r'out|override|params|private|protected|public|readonly|'
                 r'ref|return|sealed|sizeof|stackalloc|static|'
                 r'switch|this|throw|true|try|typeof|'
                 r'unchecked|unsafe|virtual|void|while|'
                 r'get|set|new|partial|yield|add|remove|value|alias|ascending|'
                 r'descending|from|group|into|orderby|select|thenby|where|'
                 r'join|equals)\b', Keyword),
                (r'(global)(::)', bygroups(Keyword, Punctuation)),
                (r'(bool|byte|char|decimal|double|dynamic|float|int|long|object|'
                 r'sbyte|short|string|uint|ulong|ushort|var)\b\??',
                 Keyword.Type),
                (r'(class|struct)(\s+)', bygroups(Keyword, Text), 'class'),
                (r'(namespace|using)(\s+)', bygroups(Keyword,
                                                     Text), 'namespace'),
                (cs_ident, Name),
            ],
            'class': [
                (cs_ident, Name.Class, '#pop'),
                default('#pop'),
            ],
            'namespace': [
                (r'(?=\()', Text, '#pop'),  # using (resource)
                ('(' + cs_ident + r'|\.)+', Name.Namespace, '#pop'),
            ]
        }

    def __init__(self, **options):
        level = get_choice_opt(options, 'unicodelevel', list(self.tokens),
                               'basic')
        if level not in self._all_tokens:
            # compile the regexes now
            self._tokens = self.__class__.process_tokendef(level)
        else:
            self._tokens = self._all_tokens[level]

        RegexLexer.__init__(self, **options)
Exemplo n.º 6
0
class TypoScriptLexer(RegexLexer):
    """
    Lexer for TypoScript code.

    http://docs.typo3.org/typo3cms/TyposcriptReference/

    .. versionadded:: 2.2
    """

    name = 'TypoScript'
    aliases = ['typoscript']
    filenames = ['*.typoscript']
    mimetypes = ['text/x-typoscript']

    flags = re.DOTALL | re.MULTILINE

    tokens = {
        'root': [
            include('comment'),
            include('constant'),
            include('html'),
            include('label'),
            include('whitespace'),
            include('keywords'),
            include('punctuation'),
            include('operator'),
            include('structure'),
            include('literal'),
            include('other'),
        ],
        'keywords': [
            # Conditions
            (r'(?i)(\[)(browser|compatVersion|dayofmonth|dayofweek|dayofyear|'
             r'device|ELSE|END|GLOBAL|globalString|globalVar|hostname|hour|IP|'
             r'language|loginUser|loginuser|minute|month|page|PIDinRootline|'
             r'PIDupinRootline|system|treeLevel|useragent|userFunc|usergroup|'
             r'version)([^\]]*)(\])',
             bygroups(String.Symbol, Name.Constant, Text, String.Symbol)),
            # Functions
            (r'(?=[\w\-])(HTMLparser|HTMLparser_tags|addParams|cache|encapsLines|'
             r'filelink|if|imageLinkWrap|imgResource|makelinks|numRows|numberFormat|'
             r'parseFunc|replacement|round|select|split|stdWrap|strPad|tableStyle|'
             r'tags|textStyle|typolink)(?![\w\-])', Name.Function),
            # Toplevel objects and _*
            (r'(?:(=?\s*<?\s+|^\s*))(cObj|field|config|content|constants|FEData|'
             r'file|frameset|includeLibs|lib|page|plugin|register|resources|sitemap|'
             r'sitetitle|styles|temp|tt_[^:.\s]*|types|xmlnews|INCLUDE_TYPOSCRIPT|'
             r'_CSS_DEFAULT_STYLE|_DEFAULT_PI_VARS|_LOCAL_LANG)(?![\w\-])',
             bygroups(Operator, Name.Builtin)),
            # Content objects
            (r'(?=[\w\-])(CASE|CLEARGIF|COA|COA_INT|COBJ_ARRAY|COLUMNS|CONTENT|'
             r'CTABLE|EDITPANEL|FILE|FILES|FLUIDTEMPLATE|FORM|HMENU|HRULER|HTML|'
             r'IMAGE|IMGTEXT|IMG_RESOURCE|LOAD_REGISTER|MEDIA|MULTIMEDIA|OTABLE|'
             r'PAGE|QTOBJECT|RECORDS|RESTORE_REGISTER|SEARCHRESULT|SVG|SWFOBJECT|'
             r'TEMPLATE|TEXT|USER|USER_INT)(?![\w\-])', Name.Class),
            # Menu states
            (r'(?=[\w\-])(ACTIFSUBRO|ACTIFSUB|ACTRO|ACT|CURIFSUBRO|CURIFSUB|CURRO|'
             r'CUR|IFSUBRO|IFSUB|NO|SPC|USERDEF1RO|USERDEF1|USERDEF2RO|USERDEF2|'
             r'USRRO|USR)', Name.Class),
            # Menu objects
            (r'(?=[\w\-])(GMENU_FOLDOUT|GMENU_LAYERS|GMENU|IMGMENUITEM|IMGMENU|'
             r'JSMENUITEM|JSMENU|TMENUITEM|TMENU_LAYERS|TMENU)', Name.Class),
            # PHP objects
            (r'(?=[\w\-])(PHP_SCRIPT(_EXT|_INT)?)', Name.Class),
            (r'(?=[\w\-])(userFunc)(?![\w\-])', Name.Function),
        ],
        'whitespace': [
            (r'\s+', Text),
        ],
        'html': [
            (r'<\S[^\n>]*>', using(TypoScriptHtmlDataLexer)),
            (r'&[^;\n]*;', String),
            (r'(?s)(_CSS_DEFAULT_STYLE)(\s*)(\()(.*(?=\n\)))',
             bygroups(Name.Class, Text, String.Symbol,
                      using(TypoScriptCssDataLexer))),
        ],
        'literal': [
            (r'0x[0-9A-Fa-f]+t?', Number.Hex),
            # (r'[0-9]*\.[0-9]+([eE][0-9]+)?[fd]?\s*(?:[^=])', Number.Float),
            (r'[0-9]+', Number.Integer),
            (r'(###\w+###)', Name.Constant),
        ],
        'label': [
            # Language label or extension resource FILE:... or LLL:... or EXT:...
            (r'(EXT|FILE|LLL):[^}\n"]*', String),
            # Path to a resource
            (r'(?![^\w\-])([\w\-]+(?:/[\w\-]+)+/?)(\S*\n)',
             bygroups(String, String)),
        ],
        'punctuation': [
            (r'[,.]', Punctuation),
        ],
        'operator': [
            (r'[<>,:=.*%+|]', Operator),
        ],
        'structure': [
            # Brackets and braces
            (r'[{}()\[\]\\]', String.Symbol),
        ],
        'constant': [
            # Constant: {$some.constant}
            (r'(\{)(\$)((?:[\w\-]+\.)*)([\w\-]+)(\})',
             bygroups(String.Symbol, Operator, Name.Constant, Name.Constant,
                      String.Symbol)),  # constant
            # Constant: {register:somevalue}
            (r'(\{)([\w\-]+)(\s*:\s*)([\w\-]+)(\})',
             bygroups(String.Symbol, Name.Constant, Operator, Name.Constant,
                      String.Symbol)),  # constant
            # Hex color: #ff0077
            (r'(#[a-fA-F0-9]{6}\b|#[a-fA-F0-9]{3}\b)', String.Char)
        ],
        'comment': [
            (r'(?<!(#|\'|"))(?:#(?!(?:[a-fA-F0-9]{6}|[a-fA-F0-9]{3}))[^\n#]+|//[^\n]*)',
             Comment),
            (r'/\*(?:(?!\*/).)*\*/', Comment),
            (r'(\s*#\s*\n)', Comment),
        ],
        'other': [
            (r'[\w"\-!/&;]+', Text),
        ],
    }
Exemplo n.º 7
0
class JsgfLexer(RegexLexer):
    """
    For `JSpeech Grammar Format <https://www.w3.org/TR/jsgf/>`_
    grammars.

    .. versionadded:: 2.2
    """
    name = 'JSGF'
    aliases = ['jsgf']
    filenames = ['*.jsgf']
    mimetypes = ['application/jsgf', 'application/x-jsgf', 'text/jsgf']

    flags = re.MULTILINE | re.UNICODE

    tokens = {
        'root': [
            include('comments'),
            include('non-comments'),
        ],
        'comments': [
            (r'/\*\*(?!/)', Comment.Multiline, 'documentation comment'),
            (r'/\*[\w\W]*?\*/', Comment.Multiline),
            (r'//.*', Comment.Single),
        ],
        'non-comments': [
            (r'\A#JSGF[^;]*', Comment.Preproc),
            (r'\s+', Text),
            (r';', Punctuation),
            (r'[=|()\[\]*+]', Operator),
            (r'/[^/]+/', Number.Float),
            (r'"', String.Double, 'string'),
            (r'\{', String.Other, 'tag'),
            (words(('import', 'public'), suffix=r'\b'), Keyword.Reserved),
            (r'grammar\b', Keyword.Reserved, 'grammar name'),
            (r'(<)(NULL|VOID)(>)',
             bygroups(Punctuation, Name.Builtin, Punctuation)),
            (r'<', Punctuation, 'rulename'),
            (r'\w+|[^\s;=|()\[\]*+/"{<\w]+', Text),
        ],
        'string': [
            (r'"', String.Double, '#pop'),
            (r'\\.', String.Escape),
            (r'[^\\"]+', String.Double),
        ],
        'tag': [
            (r'\}', String.Other, '#pop'),
            (r'\\.', String.Escape),
            (r'[^\\}]+', String.Other),
        ],
        'grammar name': [
            (r';', Punctuation, '#pop'),
            (r'\s+', Text),
            (r'\.', Punctuation),
            (r'[^;\s.]+', Name.Namespace),
        ],
        'rulename': [
            (r'>', Punctuation, '#pop'),
            (r'\*', Punctuation),
            (r'\s+', Text),
            (r'([^.>]+)(\s*)(\.)', bygroups(Name.Namespace, Text,
                                            Punctuation)),
            (r'[^.>]+', Name.Constant),
        ],
        'documentation comment': [
            (r'\*/', Comment.Multiline, '#pop'),
            (r'(^\s*\*?\s*)(@(?:example|see)\s+)'
             r'([\w\W]*?(?=(?:^\s*\*?\s*@|\*/)))',
             bygroups(Comment.Multiline, Comment.Special,
                      using(this, state='example'))),
            (r'(^\s*\*?\s*)(@\S*)', bygroups(Comment.Multiline,
                                             Comment.Special)),
            (r'[^*\n@]+|\w|\W', Comment.Multiline),
        ],
        'example': [
            (r'\n\s*\*', Comment.Multiline),
            include('non-comments'),
            (r'.', Comment.Multiline),
        ],
    }
Exemplo n.º 8
0
class CFamilyLexer(RegexLexer):
    """
    For C family source code.  This is used as a base class to avoid repetitious
    definitions.
    """

    #: optional Comment or Whitespace
    _ws = r'(?:\s|//.*?\n|/[*].*?[*]/)+'

    # The trailing ?, rather than *, avoids a geometric performance drop here.
    #: only one /* */ style comment
    _ws1 = r'\s*(?:/[*].*?[*]/\s*)?'

    tokens = {
        'whitespace': [
            # preprocessor directives: without whitespace
            (r'^#if\s+0', Comment.Preproc, 'if0'),
            ('^#', Comment.Preproc, 'macro'),
            # or with whitespace
            ('^(' + _ws1 + r')(#if\s+0)', bygroups(using(this),
                                                   Comment.Preproc), 'if0'),
            ('^(' + _ws1 + ')(#)', bygroups(using(this),
                                            Comment.Preproc), 'macro'),
            (r'\n', Text),
            (r'\s+', Text),
            (r'\\\n', Text),  # line continuation
            (r'//(\n|[\w\W]*?[^\\]\n)', Comment.Single),
            (r'/(\\\n)?[*][\w\W]*?[*](\\\n)?/', Comment.Multiline),
            # Open until EOF, so no ending delimeter
            (r'/(\\\n)?[*][\w\W]*', Comment.Multiline),
        ],
        'statements': [
            (r'(L?)(")', bygroups(String.Affix, String), 'string'),
            (r"(L?)(')(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])(')",
             bygroups(String.Affix, String.Char, String.Char, String.Char)),
            (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[LlUu]*', Number.Float),
            (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float),
            (r'0x[0-9a-fA-F]+[LlUu]*', Number.Hex),
            (r'0[0-7]+[LlUu]*', Number.Oct),
            (r'\d+[LlUu]*', Number.Integer),
            (r'\*/', Error),
            (r'[~!%^&*+=|?:<>/-]', Operator),
            (r'[()\[\],.]', Punctuation),
            (words(
                ('asm', 'auto', 'break', 'case', 'const', 'continue',
                 'default', 'do', 'else', 'enum', 'extern', 'for', 'goto',
                 'if', 'register', 'restricted', 'return', 'sizeof', 'static',
                 'struct', 'switch', 'typedef', 'union', 'volatile', 'while'),
                suffix=r'\b'), Keyword),
            (r'(bool|int|long|float|short|double|char|unsigned|signed|void)\b',
             Keyword.Type),
            (words(('inline', '_inline', '__inline', 'naked', 'restrict',
                    'thread', 'typename'),
                   suffix=r'\b'), Keyword.Reserved),
            # Vector intrinsics
            (r'(__m(128i|128d|128|64))\b', Keyword.Reserved),
            # Microsoft-isms
            (words(('asm', 'int8', 'based', 'except', 'int16', 'stdcall',
                    'cdecl', 'fastcall', 'int32', 'declspec', 'finally',
                    'int64', 'try', 'leave', 'wchar_t', 'w64', 'unaligned',
                    'raise', 'noop', 'identifier', 'forceinline', 'assume'),
                   prefix=r'__',
                   suffix=r'\b'), Keyword.Reserved),
            (r'(true|false|NULL)\b', Name.Builtin),
            (r'([a-zA-Z_]\w*)(\s*)(:)(?!:)',
             bygroups(Name.Label, Text, Punctuation)),
            (r'[a-zA-Z_]\w*', Name),
        ],
        'root': [
            include('whitespace'),
            # functions
            (
                r'((?:[\w*\s])+?(?:\s|[*]))'  # return arguments
                r'([a-zA-Z_]\w*)'  # method name
                r'(\s*\([^;]*?\))'  # signature
                r'([^;{]*)(\{)',
                bygroups(using(this), Name.Function, using(this), using(this),
                         Punctuation),
                'function'),
            # function declarations
            (
                r'((?:[\w*\s])+?(?:\s|[*]))'  # return arguments
                r'([a-zA-Z_]\w*)'  # method name
                r'(\s*\([^;]*?\))'  # signature
                r'([^;]*)(;)',
                bygroups(using(this), Name.Function, using(this), using(this),
                         Punctuation)),
            default('statement'),
        ],
        'statement': [
            include('whitespace'),
            include('statements'),
            ('[{}]', Punctuation),
            (';', Punctuation, '#pop'),
        ],
        'function': [
            include('whitespace'),
            include('statements'),
            (';', Punctuation),
            (r'\{', Punctuation, '#push'),
            (r'\}', Punctuation, '#pop'),
        ],
        'string': [
            (r'"', String, '#pop'),
            (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|'
             r'u[a-fA-F0-9]{4}|U[a-fA-F0-9]{8}|[0-7]{1,3})', String.Escape),
            (r'[^\\"\n]+', String),  # all other characters
            (r'\\\n', String),  # line continuation
            (r'\\', String),  # stray backslash
        ],
        'macro': [
            (r'(include)(' + _ws1 + r')([^\n]+)',
             bygroups(Comment.Preproc, Text, Comment.PreprocFile)),
            (r'[^/\n]+', Comment.Preproc),
            (r'/[*](.|\n)*?[*]/', Comment.Multiline),
            (r'//.*?\n', Comment.Single, '#pop'),
            (r'/', Comment.Preproc),
            (r'(?<=\\)\n', Comment.Preproc),
            (r'\n', Comment.Preproc, '#pop'),
        ],
        'if0': [
            (r'^\s*#if.*?(?<!\\)\n', Comment.Preproc, '#push'),
            (r'^\s*#el(?:se|if).*\n', Comment.Preproc, '#pop'),
            (r'^\s*#endif.*?(?<!\\)\n', Comment.Preproc, '#pop'),
            (r'.*?\n', Comment),
        ]
    }

    stdlib_types = {
        'size_t', 'ssize_t', 'off_t', 'wchar_t', 'ptrdiff_t', 'sig_atomic_t',
        'fpos_t', 'clock_t', 'time_t', 'va_list', 'jmp_buf', 'FILE', 'DIR',
        'div_t', 'ldiv_t', 'mbstate_t', 'wctrans_t', 'wint_t', 'wctype_t'
    }
    c99_types = {
        '_Bool', '_Complex', 'int8_t', 'int16_t', 'int32_t', 'int64_t',
        'uint8_t', 'uint16_t', 'uint32_t', 'uint64_t', 'int_least8_t',
        'int_least16_t', 'int_least32_t', 'int_least64_t', 'uint_least8_t',
        'uint_least16_t', 'uint_least32_t', 'uint_least64_t', 'int_fast8_t',
        'int_fast16_t', 'int_fast32_t', 'int_fast64_t', 'uint_fast8_t',
        'uint_fast16_t', 'uint_fast32_t', 'uint_fast64_t', 'intptr_t',
        'uintptr_t', 'intmax_t', 'uintmax_t'
    }
    linux_types = {
        'clockid_t', 'cpu_set_t', 'cpumask_t', 'dev_t', 'gid_t', 'id_t',
        'ino_t', 'key_t', 'mode_t', 'nfds_t', 'pid_t', 'rlim_t', 'sig_t',
        'sighandler_t', 'siginfo_t', 'sigset_t', 'sigval_t', 'socklen_t',
        'timer_t', 'uid_t'
    }

    def __init__(self, **options):
        self.stdlibhighlighting = get_bool_opt(options, 'stdlibhighlighting',
                                               True)
        self.c99highlighting = get_bool_opt(options, 'c99highlighting', True)
        self.platformhighlighting = get_bool_opt(options,
                                                 'platformhighlighting', True)
        RegexLexer.__init__(self, **options)

    def get_tokens_unprocessed(self, text):
        for index, token, value in \
                RegexLexer.get_tokens_unprocessed(self, text):
            if token is Name:
                if self.stdlibhighlighting and value in self.stdlib_types:
                    token = Keyword.Type
                elif self.c99highlighting and value in self.c99_types:
                    token = Keyword.Type
                elif self.platformhighlighting and value in self.linux_types:
                    token = Keyword.Type
            yield index, token, value
Exemplo n.º 9
0
class UniconLexer(RegexLexer):
    """
    For Unicon source code.

    .. versionadded:: 2.4
    """

    name = 'Unicon'
    aliases = ['unicon']
    filenames = ['*.icn']
    mimetypes = ['text/unicon']

    flags = re.MULTILINE

    tokens = {
        'root': [
            (r'[^\S\n]+', Text),
            (r'#.*?\n', Comment.Single),
            (r'[^\S\n]+', Text),
            (r'class|method|procedure', Keyword.Declaration, 'subprogram'),
            (r'(record)(\s+)(\w+)',
             bygroups(Keyword.Declaration, Text, Keyword.Type), 'type_def'),
            (r'(#line|\$C|\$Cend|\$define|\$else|\$endif|\$error|\$ifdef|'
             r'\$ifndef|\$include|\$line|\$undef)\b', Keyword.PreProc),
            (r'(&null|&fail)\b', Keyword.Constant),
            (r'&allocated|&ascii|&clock|&collections|&column|&col|&control|'
             r'&cset|&current|&dateline|&date|&digits|&dump|'
             r'&errno|&errornumber|&errortext|&errorvalue|&error|&errout|'
             r'&eventcode|&eventvalue|&eventsource|&e|'
             r'&features|&file|&host|&input|&interval|&lcase|&letters|'
             r'&level|&line|&ldrag|&lpress|&lrelease|'
             r'&main|&mdrag|&meta|&mpress|&mrelease|&now|&output|'
             r'&phi|&pick|&pi|&pos|&progname|'
             r'&random|&rdrag|&regions|&resize|&row|&rpress|&rrelease|'
             r'&shift|&source|&storage|&subject|'
             r'&time|&trace|&ucase|&version|'
             r'&window|&x|&y', Keyword.Reserved),
            (r'(by|of|not|to)\b', Keyword.Reserved),
            (r'(global|local|static|abstract)\b', Keyword.Reserved),
            (r'package|link|import', Keyword.Declaration),
            (words((
                'break', 'case', 'create', 'critical', 'default', 'end', 'all',
                'do', 'else', 'every', 'fail', 'if', 'import', 'initial',
                'initially', 'invocable', 'next',
                'repeat', 'return', 'suspend',
                'then', 'thread', 'until', 'while'), prefix=r'\b', suffix=r'\b'),
             Keyword.Reserved),
            (words((
                'Abort', 'abs', 'acos', 'Active', 'Alert', 'any', 'Any', 'Arb',
                'Arbno', 'args', 'array', 'asin', 'atan', 'atanh', 'Attrib',
                'Bal', 'bal', 'Bg', 'Break', 'Breakx',
                'callout', 'center', 'char', 'chdir', 'chmod', 'chown', 'chroot',
                'classname', 'Clip', 'Clone', 'close', 'cofail', 'collect',
                'Color', 'ColorValue', 'condvar', 'constructor', 'copy',
                'CopyArea', 'cos', 'Couple', 'crypt', 'cset', 'ctime',
                'dbcolumns', 'dbdriver', 'dbkeys', 'dblimits', 'dbproduct',
                'dbtables', 'delay', 'delete', 'detab', 'display', 'DrawArc',
                'DrawCircle', 'DrawCube', 'DrawCurve', 'DrawCylinder',
                'DrawDisk', 'DrawImage', 'DrawLine', 'DrawPoint', 'DrawPolygon',
                'DrawRectangle', 'DrawSegment', 'DrawSphere', 'DrawString',
                'DrawTorus', 'dtor',
                'entab', 'EraseArea', 'errorclear', 'Event', 'eventmask',
                'EvGet', 'EvSend', 'exec', 'exit', 'exp', 'Eye',
                'Fail', 'fcntl', 'fdup', 'Fence', 'fetch', 'Fg', 'fieldnames',
                'filepair', 'FillArc', 'FillCircle', 'FillPolygon',
                'FillRectangle', 'find', 'flock', 'flush', 'Font', 'fork',
                'FreeColor', 'FreeSpace', 'function',
                'get', 'getch', 'getche', 'getegid', 'getenv', 'geteuid',
                'getgid', 'getgr', 'gethost', 'getpgrp', 'getpid', 'getppid',
                'getpw', 'getrusage', 'getserv', 'GetSpace', 'gettimeofday',
                'getuid', 'globalnames', 'GotoRC', 'GotoXY', 'gtime', 'hardlink',
                'iand', 'icom', 'IdentityMatrix', 'image', 'InPort', 'insert',
                'Int86', 'integer', 'ioctl', 'ior', 'ishift', 'istate', 'ixor',
                'kbhit', 'key', 'keyword', 'kill',
                'left', 'Len', 'list', 'load', 'loadfunc', 'localnames',
                'lock', 'log', 'Lower', 'lstat',
                'many', 'map', 'match', 'MatrixMode', 'max', 'member',
                'membernames', 'methodnames', 'methods', 'min', 'mkdir', 'move',
                'MultMatrix', 'mutex',
                'name', 'NewColor', 'Normals', 'NotAny', 'numeric',
                'open', 'opencl', 'oprec', 'ord', 'OutPort',
                'PaletteChars', 'PaletteColor', 'PaletteKey', 'paramnames',
                'parent', 'Pattern', 'Peek', 'Pending', 'pipe', 'Pixel',
                'PlayAudio', 'Poke', 'pop', 'PopMatrix', 'Pos', 'pos',
                'proc', 'pull', 'push', 'PushMatrix', 'PushRotate', 'PushScale',
                'PushTranslate', 'put',
                'QueryPointer',
                'Raise', 'read', 'ReadImage', 'readlink', 'reads', 'ready',
                'real', 'receive', 'Refresh', 'Rem', 'remove', 'rename',
                'repl', 'reverse', 'right', 'rmdir', 'Rotate', 'Rpos',
                'Rtab', 'rtod', 'runerr',
                'save', 'Scale', 'seek', 'select', 'send', 'seq',
                'serial', 'set', 'setenv', 'setgid', 'setgrent',
                'sethostent', 'setpgrp', 'setpwent', 'setservent',
                'setuid', 'signal', 'sin', 'sort', 'sortf', 'Span',
                'spawn', 'sql', 'sqrt', 'stat', 'staticnames', 'stop',
                'StopAudio', 'string', 'structure', 'Succeed', 'Swi',
                'symlink', 'sys_errstr', 'system', 'syswrite',
                'Tab', 'tab', 'table', 'tan',
                'Texcoord', 'Texture', 'TextWidth', 'Translate',
                'trap', 'trim', 'truncate', 'trylock', 'type',
                'umask', 'Uncouple', 'unlock', 'upto', 'utime',
                'variable', 'VAttrib',
                'wait', 'WAttrib', 'WDefault', 'WFlush', 'where',
                'WinAssociate', 'WinButton', 'WinColorDialog', 'WindowContents',
                'WinEditRegion', 'WinFontDialog', 'WinMenuBar', 'WinOpenDialog',
                'WinPlayMedia', 'WinSaveDialog', 'WinScrollBar', 'WinSelectDialog',
                'write', 'WriteImage', 'writes', 'WSection',
                'WSync'), prefix=r'\b', suffix=r'\b'),
             Name.Function),
            include('numbers'),
            (r'<@|<<@|>@|>>@|\.>|\->', Operator),
            (r'\*\*|\+\+|\-\-|\.|\=|\~\=|<\=|>\=|\=\=|\~\=\=|<<|<<\=|>>|>>\=', Operator),
            (r':\=|:\=:|\->|<\->|\+:\=|\|', Operator),
            (r'\=\=\=|\~\=\=\=', Operator),
            (r'"(?:[^\\"]|\\.)*"', String),
            (r"'(?:[^\\']|\\.)*'", String.Character),
            (r'[*<>+=/&!?@~\\-]', Operator),
            (r'\^', Operator),
            (r'(\w+)(\s*|[(,])', bygroups(Name, using(this))),
            (r"([\[\]])", Punctuation),
            (r"(<>|=>|[()|:;,.'`]|[{}]|[%]|[&?])", Punctuation),
            (r'\n+', Text),
        ],
        'numbers': [
            (r'\b([+-]?([2-9]|[12][0-9]|3[0-6])[rR][0-9a-zA-Z]+)\b', Number.Hex),
            (r'[+-]?[0-9]*\.([0-9]*)([Ee][+-]?[0-9]*)?', Number.Float),
            (r'\b([+-]?[0-9]+[KMGTPkmgtp]?)\b', Number.Integer),
        ],
        'subprogram': [
            (r'\(', Punctuation, ('#pop', 'formal_part')),
            (r';', Punctuation, '#pop'),
            (r'"[^"]+"|\w+', Name.Function),
            include('root'),
        ],
        'type_def': [
            (r'\(', Punctuation, 'formal_part'),
        ],
        'formal_part': [
            (r'\)', Punctuation, '#pop'),
            (r'\w+', Name.Variable),
            (r',', Punctuation),
            (r'(:string|:integer|:real)\b', Keyword.Reserved),
            include('root'),
        ],
    }
Exemplo n.º 10
0
class BatchLexer(RegexLexer):
    """
    Lexer for the DOS/Windows Batch file format.

    .. versionadded:: 0.7
    """
    name = 'Batchfile'
    aliases = ['bat', 'batch', 'dosbatch', 'winbatch']
    filenames = ['*.bat', '*.cmd']
    mimetypes = ['application/x-dos-batch']

    flags = re.MULTILINE | re.IGNORECASE

    _nl = r'\n\x1a'
    _punct = r'&<>|'
    _ws = r'\t\v\f\r ,;=\xa0'
    _space = r'(?:(?:(?:\^[%s])?[%s])+)' % (_nl, _ws)
    _keyword_terminator = (r'(?=(?:\^[%s]?)?[%s+./:[\\\]]|[%s%s(])' %
                           (_nl, _ws, _nl, _punct))
    _token_terminator = r'(?=\^?[%s]|[%s%s])' % (_ws, _punct, _nl)
    _start_label = r'((?:(?<=^[^:])|^[^:]?)[%s]*)(:)' % _ws
    _label = r'(?:(?:[^%s%s%s+:^]|\^[%s]?[\w\W])*)' % (_nl, _punct, _ws, _nl)
    _label_compound = (r'(?:(?:[^%s%s%s+:^)]|\^[%s]?[^)])*)' %
                       (_nl, _punct, _ws, _nl))
    _number = r'(?:-?(?:0[0-7]+|0x[\da-f]+|\d+)%s)' % _token_terminator
    _opword = r'(?:equ|geq|gtr|leq|lss|neq)'
    _string = r'(?:"[^%s"]*(?:"|(?=[%s])))' % (_nl, _nl)
    _variable = (r'(?:(?:%%(?:\*|(?:~[a-z]*(?:\$[^:]+:)?)?\d|'
                 r'[^%%:%s]+(?::(?:~(?:-?\d+)?(?:,(?:-?\d+)?)?|(?:[^%%%s^]|'
                 r'\^[^%%%s])[^=%s]*=(?:[^%%%s^]|\^[^%%%s])*)?)?%%))|'
                 r'(?:\^?![^!:%s]+(?::(?:~(?:-?\d+)?(?:,(?:-?\d+)?)?|(?:'
                 r'[^!%s^]|\^[^!%s])[^=%s]*=(?:[^!%s^]|\^[^!%s])*)?)?\^?!))' %
                 (_nl, _nl, _nl, _nl, _nl, _nl, _nl, _nl, _nl, _nl, _nl, _nl))
    _core_token = r'(?:(?:(?:\^[%s]?)?[^"%s%s%s])+)' % (_nl, _nl, _punct, _ws)
    _core_token_compound = r'(?:(?:(?:\^[%s]?)?[^"%s%s%s)])+)' % (_nl, _nl,
                                                                  _punct, _ws)
    _token = r'(?:[%s]+|%s)' % (_punct, _core_token)
    _token_compound = r'(?:[%s]+|%s)' % (_punct, _core_token_compound)
    _stoken = (r'(?:[%s]+|(?:%s|%s|%s)+)' %
               (_punct, _string, _variable, _core_token))

    def _make_begin_state(compound,
                          _core_token=_core_token,
                          _core_token_compound=_core_token_compound,
                          _keyword_terminator=_keyword_terminator,
                          _nl=_nl,
                          _punct=_punct,
                          _string=_string,
                          _space=_space,
                          _start_label=_start_label,
                          _stoken=_stoken,
                          _token_terminator=_token_terminator,
                          _variable=_variable,
                          _ws=_ws):
        rest = '(?:%s|%s|[^"%%%s%s%s])*' % (_string, _variable, _nl, _punct,
                                            ')' if compound else '')
        rest_of_line = r'(?:(?:[^%s^]|\^[%s]?[\w\W])*)' % (_nl, _nl)
        rest_of_line_compound = r'(?:(?:[^%s^)]|\^[%s]?[^)])*)' % (_nl, _nl)
        set_space = r'((?:(?:\^[%s]?)?[^\S\n])*)' % _nl
        suffix = ''
        if compound:
            _keyword_terminator = r'(?:(?=\))|%s)' % _keyword_terminator
            _token_terminator = r'(?:(?=\))|%s)' % _token_terminator
            suffix = '/compound'
        return [
            ((r'\)', Punctuation, '#pop') if compound else
             (r'\)((?=\()|%s)%s' % (_token_terminator, rest_of_line),
              Comment.Single)),
            (r'(?=%s)' % _start_label, Text, 'follow%s' % suffix),
            (_space, using(this, state='text')),
            include('redirect%s' % suffix), (r'[%s]+' % _nl, Text),
            (r'\(', Punctuation, 'root/compound'), (r'@+', Punctuation),
            (r'((?:for|if|rem)(?:(?=(?:\^[%s]?)?/)|(?:(?!\^)|'
             r'(?<=m))(?:(?=\()|%s)))(%s?%s?(?:\^[%s]?)?/(?:\^[%s]?)?\?)' %
             (_nl, _token_terminator, _space,
              _core_token_compound if compound else _core_token, _nl, _nl),
             bygroups(Keyword, using(this,
                                     state='text')), 'follow%s' % suffix),
            (r'(goto%s)(%s(?:\^[%s]?)?/(?:\^[%s]?)?\?%s)' %
             (_keyword_terminator, rest, _nl, _nl, rest),
             bygroups(Keyword, using(this,
                                     state='text')), 'follow%s' % suffix),
            (words(('assoc', 'break', 'cd', 'chdir', 'cls', 'color', 'copy',
                    'date', 'del', 'dir', 'dpath', 'echo', 'endlocal', 'erase',
                    'exit', 'ftype', 'keys', 'md', 'mkdir', 'mklink', 'move',
                    'path', 'pause', 'popd', 'prompt', 'pushd', 'rd', 'ren',
                    'rename', 'rmdir', 'setlocal', 'shift', 'start', 'time',
                    'title', 'type', 'ver', 'verify', 'vol'),
                   suffix=_keyword_terminator), Keyword, 'follow%s' % suffix),
            (r'(call)(%s?)(:)' % _space,
             bygroups(Keyword, using(this, state='text'),
                      Punctuation), 'call%s' % suffix),
            (r'call%s' % _keyword_terminator, Keyword),
            (r'(for%s(?!\^))(%s)(/f%s)' %
             (_token_terminator, _space, _token_terminator),
             bygroups(Keyword, using(this, state='text'),
                      Keyword), ('for/f', 'for')),
            (r'(for%s(?!\^))(%s)(/l%s)' %
             (_token_terminator, _space, _token_terminator),
             bygroups(Keyword, using(this, state='text'),
                      Keyword), ('for/l', 'for')),
            (r'for%s(?!\^)' % _token_terminator, Keyword, ('for2', 'for')),
            (r'(goto%s)(%s?)(:?)' % (_keyword_terminator, _space),
             bygroups(Keyword, using(this, state='text'),
                      Punctuation), 'label%s' % suffix),
            (r'(if(?:(?=\()|%s)(?!\^))(%s?)((?:/i%s)?)(%s?)((?:not%s)?)(%s?)' %
             (_token_terminator, _space, _token_terminator, _space,
              _token_terminator, _space),
             bygroups(Keyword, using(this, state='text'), Keyword,
                      using(this, state='text'), Keyword,
                      using(this, state='text')), ('(?', 'if')),
            (r'rem(((?=\()|%s)%s?%s?.*|%s%s)' %
             (_token_terminator, _space, _stoken, _keyword_terminator,
              rest_of_line_compound if compound else rest_of_line),
             Comment.Single, 'follow%s' % suffix),
            (r'(set%s)%s(/a)' % (_keyword_terminator, set_space),
             bygroups(Keyword, using(this, state='text'),
                      Keyword), 'arithmetic%s' % suffix),
            (r'(set%s)%s((?:/p)?)%s((?:(?:(?:\^[%s]?)?[^"%s%s^=%s]|'
             r'\^[%s]?[^"=])+)?)((?:(?:\^[%s]?)?=)?)' %
             (_keyword_terminator, set_space, set_space, _nl, _nl, _punct,
              ')' if compound else '', _nl, _nl),
             bygroups(Keyword, using(this, state='text'), Keyword,
                      using(this, state='text'), using(this, state='variable'),
                      Punctuation), 'follow%s' % suffix),
            default('follow%s' % suffix)
        ]

    def _make_follow_state(compound,
                           _label=_label,
                           _label_compound=_label_compound,
                           _nl=_nl,
                           _space=_space,
                           _start_label=_start_label,
                           _token=_token,
                           _token_compound=_token_compound,
                           _ws=_ws):
        suffix = '/compound' if compound else ''
        state = []
        if compound:
            state.append((r'(?=\))', Text, '#pop'))
        state += [
            (r'%s([%s]*)(%s)(.*)' %
             (_start_label, _ws, _label_compound if compound else _label),
             bygroups(Text, Punctuation, Text, Name.Label, Comment.Single)),
            include('redirect%s' % suffix), (r'(?=[%s])' % _nl, Text, '#pop'),
            (r'\|\|?|&&?', Punctuation, '#pop'),
            include('text')
        ]
        return state

    def _make_arithmetic_state(compound,
                               _nl=_nl,
                               _punct=_punct,
                               _string=_string,
                               _variable=_variable,
                               _ws=_ws):
        op = r'=+\-*/!~'
        state = []
        if compound:
            state.append((r'(?=\))', Text, '#pop'))
        state += [(r'0[0-7]+', Number.Oct), (r'0x[\da-f]+', Number.Hex),
                  (r'\d+', Number.Integer), (r'[(),]+', Punctuation),
                  (r'([%s]|%%|\^\^)+' % op, Operator),
                  (r'(%s|%s|(\^[%s]?)?[^()%s%%^"%s%s%s]|\^[%s%s]?%s)+' %
                   (_string, _variable, _nl, op, _nl, _punct, _ws, _nl, _ws,
                    r'[^)]' if compound else r'[\w\W]'),
                   using(this, state='variable')),
                  (r'(?=[\x00|&])', Text, '#pop'),
                  include('follow')]
        return state

    def _make_call_state(compound,
                         _label=_label,
                         _label_compound=_label_compound):
        state = []
        if compound:
            state.append((r'(?=\))', Text, '#pop'))
        state.append((r'(:?)(%s)' % (_label_compound if compound else _label),
                      bygroups(Punctuation, Name.Label), '#pop'))
        return state

    def _make_label_state(compound,
                          _label=_label,
                          _label_compound=_label_compound,
                          _nl=_nl,
                          _punct=_punct,
                          _string=_string,
                          _variable=_variable):
        state = []
        if compound:
            state.append((r'(?=\))', Text, '#pop'))
        state.append(
            (r'(%s?)((?:%s|%s|\^[%s]?%s|[^"%%^%s%s%s])*)' %
             (_label_compound if compound else _label, _string, _variable, _nl,
              r'[^)]' if compound else r'[\w\W]', _nl, _punct,
              r')' if compound else ''), bygroups(Name.Label,
                                                  Comment.Single), '#pop'))
        return state

    def _make_redirect_state(compound,
                             _core_token_compound=_core_token_compound,
                             _nl=_nl,
                             _punct=_punct,
                             _stoken=_stoken,
                             _string=_string,
                             _space=_space,
                             _variable=_variable,
                             _ws=_ws):
        stoken_compound = (r'(?:[%s]+|(?:%s|%s|%s)+)' %
                           (_punct, _string, _variable, _core_token_compound))
        return [
            (r'((?:(?<=[%s%s])\d)?)(>>?&|<&)([%s%s]*)(\d)' %
             (_nl, _ws, _nl, _ws),
             bygroups(Number.Integer, Punctuation, Text, Number.Integer)),
            (r'((?:(?<=[%s%s])(?<!\^[%s])\d)?)(>>?|<)(%s?%s)' %
             (_nl, _ws, _nl, _space, stoken_compound if compound else _stoken),
             bygroups(Number.Integer, Punctuation, using(this, state='text')))
        ]

    tokens = {
        'root':
        _make_begin_state(False),
        'follow':
        _make_follow_state(False),
        'arithmetic':
        _make_arithmetic_state(False),
        'call':
        _make_call_state(False),
        'label':
        _make_label_state(False),
        'redirect':
        _make_redirect_state(False),
        'root/compound':
        _make_begin_state(True),
        'follow/compound':
        _make_follow_state(True),
        'arithmetic/compound':
        _make_arithmetic_state(True),
        'call/compound':
        _make_call_state(True),
        'label/compound':
        _make_label_state(True),
        'redirect/compound':
        _make_redirect_state(True),
        'variable-or-escape':
        [(_variable, Name.Variable),
         (r'%%%%|\^[%s]?(\^!|[\w\W])' % _nl, String.Escape)],
        'string': [(r'"', String.Double, '#pop'), (_variable, Name.Variable),
                   (r'\^!|%%', String.Escape),
                   (r'[^"%%^%s]+|[%%^]' % _nl, String.Double),
                   default('#pop')],
        'sqstring':
        [include('variable-or-escape'), (r'[^%]+|%', String.Single)],
        'bqstring':
        [include('variable-or-escape'), (r'[^%]+|%', String.Backtick)],
        'text': [(r'"', String.Double, 'string'),
                 include('variable-or-escape'),
                 (r'[^"%%^%s%s%s\d)]+|.' % (_nl, _punct, _ws), Text)],
        'variable': [(r'"', String.Double, 'string'),
                     include('variable-or-escape'),
                     (r'[^"%%^%s]+|.' % _nl, Name.Variable)],
        'for': [(r'(%s)(in)(%s)(\()' % (_space, _space),
                 bygroups(using(this, state='text'), Keyword,
                          using(this, state='text'), Punctuation), '#pop'),
                include('follow')],
        'for2': [(r'\)', Punctuation),
                 (r'(%s)(do%s)' % (_space, _token_terminator),
                  bygroups(using(this, state='text'), Keyword), '#pop'),
                 (r'[%s]+' % _nl, Text),
                 include('follow')],
        'for/f':
        [(r'(")((?:%s|[^"])*?")([%s%s]*)(\))' % (_variable, _nl, _ws),
          bygroups(String.Double, using(this, state='string'), Text,
                   Punctuation)),
         (r'"', String.Double, ('#pop', 'for2', 'string')),
         (r"('(?:%%%%|%s|[\w\W])*?')([%s%s]*)(\))" % (_variable, _nl, _ws),
          bygroups(using(this, state='sqstring'), Text, Punctuation)),
         (r'(`(?:%%%%|%s|[\w\W])*?`)([%s%s]*)(\))' % (_variable, _nl, _ws),
          bygroups(using(this, state='bqstring'), Text, Punctuation)),
         include('for2')],
        'for/l': [(r'-?\d+', Number.Integer),
                  include('for2')],
        'if': [
            (r'((?:cmdextversion|errorlevel)%s)(%s)(\d+)' %
             (_token_terminator, _space),
             bygroups(Keyword, using(this, state='text'),
                      Number.Integer), '#pop'),
            (r'(defined%s)(%s)(%s)' % (_token_terminator, _space, _stoken),
             bygroups(Keyword, using(this, state='text'),
                      using(this, state='variable')), '#pop'),
            (r'(exist%s)(%s%s)' % (_token_terminator, _space, _stoken),
             bygroups(Keyword, using(this, state='text')), '#pop'),
            (r'(%s%s)(%s)(%s%s)' % (_number, _space, _opword, _space, _number),
             bygroups(using(this, state='arithmetic'), Operator.Word,
                      using(this, state='arithmetic')), '#pop'),
            (_stoken, using(this, state='text'), ('#pop', 'if2')),
        ],
        'if2': [(r'(%s?)(==)(%s?%s)' % (_space, _space, _stoken),
                 bygroups(using(this, state='text'), Operator,
                          using(this, state='text')), '#pop'),
                (r'(%s)(%s)(%s%s)' % (_space, _opword, _space, _stoken),
                 bygroups(using(this, state='text'), Operator.Word,
                          using(this, state='text')), '#pop')],
        '(?': [(_space, using(this, state='text')),
               (r'\(', Punctuation, ('#pop', 'else?', 'root/compound')),
               default('#pop')],
        'else?': [(_space, using(this, state='text')),
                  (r'else%s' % _token_terminator, Keyword, '#pop'),
                  default('#pop')]
    }
Exemplo n.º 11
0
 def _make_begin_state(compound,
                       _core_token=_core_token,
                       _core_token_compound=_core_token_compound,
                       _keyword_terminator=_keyword_terminator,
                       _nl=_nl,
                       _punct=_punct,
                       _string=_string,
                       _space=_space,
                       _start_label=_start_label,
                       _stoken=_stoken,
                       _token_terminator=_token_terminator,
                       _variable=_variable,
                       _ws=_ws):
     rest = '(?:%s|%s|[^"%%%s%s%s])*' % (_string, _variable, _nl, _punct,
                                         ')' if compound else '')
     rest_of_line = r'(?:(?:[^%s^]|\^[%s]?[\w\W])*)' % (_nl, _nl)
     rest_of_line_compound = r'(?:(?:[^%s^)]|\^[%s]?[^)])*)' % (_nl, _nl)
     set_space = r'((?:(?:\^[%s]?)?[^\S\n])*)' % _nl
     suffix = ''
     if compound:
         _keyword_terminator = r'(?:(?=\))|%s)' % _keyword_terminator
         _token_terminator = r'(?:(?=\))|%s)' % _token_terminator
         suffix = '/compound'
     return [
         ((r'\)', Punctuation, '#pop') if compound else
          (r'\)((?=\()|%s)%s' % (_token_terminator, rest_of_line),
           Comment.Single)),
         (r'(?=%s)' % _start_label, Text, 'follow%s' % suffix),
         (_space, using(this, state='text')),
         include('redirect%s' % suffix), (r'[%s]+' % _nl, Text),
         (r'\(', Punctuation, 'root/compound'), (r'@+', Punctuation),
         (r'((?:for|if|rem)(?:(?=(?:\^[%s]?)?/)|(?:(?!\^)|'
          r'(?<=m))(?:(?=\()|%s)))(%s?%s?(?:\^[%s]?)?/(?:\^[%s]?)?\?)' %
          (_nl, _token_terminator, _space,
           _core_token_compound if compound else _core_token, _nl, _nl),
          bygroups(Keyword, using(this,
                                  state='text')), 'follow%s' % suffix),
         (r'(goto%s)(%s(?:\^[%s]?)?/(?:\^[%s]?)?\?%s)' %
          (_keyword_terminator, rest, _nl, _nl, rest),
          bygroups(Keyword, using(this,
                                  state='text')), 'follow%s' % suffix),
         (words(('assoc', 'break', 'cd', 'chdir', 'cls', 'color', 'copy',
                 'date', 'del', 'dir', 'dpath', 'echo', 'endlocal', 'erase',
                 'exit', 'ftype', 'keys', 'md', 'mkdir', 'mklink', 'move',
                 'path', 'pause', 'popd', 'prompt', 'pushd', 'rd', 'ren',
                 'rename', 'rmdir', 'setlocal', 'shift', 'start', 'time',
                 'title', 'type', 'ver', 'verify', 'vol'),
                suffix=_keyword_terminator), Keyword, 'follow%s' % suffix),
         (r'(call)(%s?)(:)' % _space,
          bygroups(Keyword, using(this, state='text'),
                   Punctuation), 'call%s' % suffix),
         (r'call%s' % _keyword_terminator, Keyword),
         (r'(for%s(?!\^))(%s)(/f%s)' %
          (_token_terminator, _space, _token_terminator),
          bygroups(Keyword, using(this, state='text'),
                   Keyword), ('for/f', 'for')),
         (r'(for%s(?!\^))(%s)(/l%s)' %
          (_token_terminator, _space, _token_terminator),
          bygroups(Keyword, using(this, state='text'),
                   Keyword), ('for/l', 'for')),
         (r'for%s(?!\^)' % _token_terminator, Keyword, ('for2', 'for')),
         (r'(goto%s)(%s?)(:?)' % (_keyword_terminator, _space),
          bygroups(Keyword, using(this, state='text'),
                   Punctuation), 'label%s' % suffix),
         (r'(if(?:(?=\()|%s)(?!\^))(%s?)((?:/i%s)?)(%s?)((?:not%s)?)(%s?)' %
          (_token_terminator, _space, _token_terminator, _space,
           _token_terminator, _space),
          bygroups(Keyword, using(this, state='text'), Keyword,
                   using(this, state='text'), Keyword,
                   using(this, state='text')), ('(?', 'if')),
         (r'rem(((?=\()|%s)%s?%s?.*|%s%s)' %
          (_token_terminator, _space, _stoken, _keyword_terminator,
           rest_of_line_compound if compound else rest_of_line),
          Comment.Single, 'follow%s' % suffix),
         (r'(set%s)%s(/a)' % (_keyword_terminator, set_space),
          bygroups(Keyword, using(this, state='text'),
                   Keyword), 'arithmetic%s' % suffix),
         (r'(set%s)%s((?:/p)?)%s((?:(?:(?:\^[%s]?)?[^"%s%s^=%s]|'
          r'\^[%s]?[^"=])+)?)((?:(?:\^[%s]?)?=)?)' %
          (_keyword_terminator, set_space, set_space, _nl, _nl, _punct,
           ')' if compound else '', _nl, _nl),
          bygroups(Keyword, using(this, state='text'), Keyword,
                   using(this, state='text'), using(this, state='variable'),
                   Punctuation), 'follow%s' % suffix),
         default('follow%s' % suffix)
     ]
Exemplo n.º 12
0
class PugLexer(ExtendedRegexLexer):
    """
    For Pug markup.
    Pug is a variant of Scaml, see:
    http://scalate.fusesource.org/documentation/scaml-reference.html

    .. versionadded:: 1.4
    """

    name = 'Pug'
    aliases = ['pug', 'jade']
    filenames = ['*.pug', '*.jade']
    mimetypes = ['text/x-pug', 'text/x-jade']

    flags = re.IGNORECASE
    _dot = r'.'

    tokens = {
        'root': [
            (r'[ \t]*\n', Text),
            (r'[ \t]*', _indentation),
        ],
        'css': [
            (r'\.[\w:-]+', Name.Class, 'tag'),
            (r'\#[\w:-]+', Name.Function, 'tag'),
        ],
        'eval-or-plain': [
            (r'[&!]?==', Punctuation, 'plain'),
            (r'([&!]?[=~])(' + _dot + r'*\n)',
             bygroups(Punctuation, using(ScalaLexer)), 'root'),
            default('plain'),
        ],
        'content': [
            include('css'),
            (r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'),
            (r'(/)(\[' + _dot + r'*?\])(' + _dot + r'*\n)',
             bygroups(Comment, Comment.Special, Comment), '#pop'),
            (r'/' + _dot + r'*\n',
             _starts_block(Comment, 'html-comment-block'), '#pop'),
            (r'-#' + _dot + r'*\n',
             _starts_block(Comment.Preproc, 'scaml-comment-block'), '#pop'),
            (r'(-@\s*)(import)?(' + _dot + r'*\n)',
             bygroups(Punctuation, Keyword, using(ScalaLexer)), '#pop'),
            (r'(-)(' + _dot + r'*\n)', bygroups(Punctuation,
                                                using(ScalaLexer)), '#pop'),
            (r':' + _dot + r'*\n', _starts_block(Name.Decorator,
                                                 'filter-block'), '#pop'),
            (r'[\w:-]+', Name.Tag, 'tag'),
            (r'\|', Text, 'eval-or-plain'),
        ],
        'tag': [
            include('css'),
            (r'\{(,\n|' + _dot + r')*?\}', using(ScalaLexer)),
            (r'\[' + _dot + r'*?\]', using(ScalaLexer)),
            (r'\(', Text, 'html-attributes'),
            (r'/[ \t]*\n', Punctuation, '#pop:2'),
            (r'[<>]{1,2}(?=[ \t=])', Punctuation),
            include('eval-or-plain'),
        ],
        'plain': [
            (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text),
            (r'(#\{)(' + _dot + r'*?)(\})',
             bygroups(String.Interpol, using(ScalaLexer), String.Interpol)),
            (r'\n', Text, 'root'),
        ],
        'html-attributes': [
            (r'\s+', Text),
            (r'[\w:-]+[ \t]*=', Name.Attribute, 'html-attribute-value'),
            (r'[\w:-]+', Name.Attribute),
            (r'\)', Text, '#pop'),
        ],
        'html-attribute-value': [
            (r'[ \t]+', Text),
            (r'\w+', Name.Variable, '#pop'),
            (r'@\w+', Name.Variable.Instance, '#pop'),
            (r'\$\w+', Name.Variable.Global, '#pop'),
            (r"'(\\\\|\\'|[^'\n])*'", String, '#pop'),
            (r'"(\\\\|\\"|[^"\n])*"', String, '#pop'),
        ],
        'html-comment-block': [
            (_dot + '+', Comment),
            (r'\n', Text, 'root'),
        ],
        'scaml-comment-block': [
            (_dot + '+', Comment.Preproc),
            (r'\n', Text, 'root'),
        ],
        'filter-block': [
            (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Name.Decorator),
            (r'(#\{)(' + _dot + r'*?)(\})',
             bygroups(String.Interpol, using(ScalaLexer), String.Interpol)),
            (r'\n', Text, 'root'),
        ],
    }
Exemplo n.º 13
0
class ScamlLexer(ExtendedRegexLexer):
    """
    For `Scaml markup <http://scalate.fusesource.org/>`_.  Scaml is Haml for Scala.

    .. versionadded:: 1.4
    """

    name = 'Scaml'
    aliases = ['scaml']
    filenames = ['*.scaml']
    mimetypes = ['text/x-scaml']

    flags = re.IGNORECASE
    # Scaml does not yet support the " |\n" notation to
    # wrap long lines.  Once it does, use the custom faux
    # dot instead.
    # _dot = r'(?: \|\n(?=.* \|)|.)'
    _dot = r'.'

    tokens = {
        'root': [
            (r'[ \t]*\n', Text),
            (r'[ \t]*', _indentation),
        ],
        'css': [
            (r'\.[\w:-]+', Name.Class, 'tag'),
            (r'\#[\w:-]+', Name.Function, 'tag'),
        ],
        'eval-or-plain': [
            (r'[&!]?==', Punctuation, 'plain'),
            (r'([&!]?[=~])(' + _dot + r'*\n)',
             bygroups(Punctuation, using(ScalaLexer)), 'root'),
            default('plain'),
        ],
        'content': [
            include('css'),
            (r'%[\w:-]+', Name.Tag, 'tag'),
            (r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'),
            (r'(/)(\[' + _dot + r'*?\])(' + _dot + r'*\n)',
             bygroups(Comment, Comment.Special, Comment), '#pop'),
            (r'/' + _dot + r'*\n',
             _starts_block(Comment, 'html-comment-block'), '#pop'),
            (r'-#' + _dot + r'*\n',
             _starts_block(Comment.Preproc, 'scaml-comment-block'), '#pop'),
            (r'(-@\s*)(import)?(' + _dot + r'*\n)',
             bygroups(Punctuation, Keyword, using(ScalaLexer)), '#pop'),
            (r'(-)(' + _dot + r'*\n)', bygroups(Punctuation,
                                                using(ScalaLexer)), '#pop'),
            (r':' + _dot + r'*\n', _starts_block(Name.Decorator,
                                                 'filter-block'), '#pop'),
            include('eval-or-plain'),
        ],
        'tag': [
            include('css'),
            (r'\{(,\n|' + _dot + r')*?\}', using(ScalaLexer)),
            (r'\[' + _dot + r'*?\]', using(ScalaLexer)),
            (r'\(', Text, 'html-attributes'),
            (r'/[ \t]*\n', Punctuation, '#pop:2'),
            (r'[<>]{1,2}(?=[ \t=])', Punctuation),
            include('eval-or-plain'),
        ],
        'plain': [
            (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text),
            (r'(#\{)(' + _dot + r'*?)(\})',
             bygroups(String.Interpol, using(ScalaLexer), String.Interpol)),
            (r'\n', Text, 'root'),
        ],
        'html-attributes': [
            (r'\s+', Text),
            (r'[\w:-]+[ \t]*=', Name.Attribute, 'html-attribute-value'),
            (r'[\w:-]+', Name.Attribute),
            (r'\)', Text, '#pop'),
        ],
        'html-attribute-value': [
            (r'[ \t]+', Text),
            (r'\w+', Name.Variable, '#pop'),
            (r'@\w+', Name.Variable.Instance, '#pop'),
            (r'\$\w+', Name.Variable.Global, '#pop'),
            (r"'(\\\\|\\'|[^'\n])*'", String, '#pop'),
            (r'"(\\\\|\\"|[^"\n])*"', String, '#pop'),
        ],
        'html-comment-block': [
            (_dot + '+', Comment),
            (r'\n', Text, 'root'),
        ],
        'scaml-comment-block': [
            (_dot + '+', Comment.Preproc),
            (r'\n', Text, 'root'),
        ],
        'filter-block': [
            (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Name.Decorator),
            (r'(#\{)(' + _dot + r'*?)(\})',
             bygroups(String.Interpol, using(ScalaLexer), String.Interpol)),
            (r'\n', Text, 'root'),
        ],
    }
Exemplo n.º 14
0
class HtmlLexer(RegexLexer):
    """
    For HTML 4 and XHTML 1 markup. Nested JavaScript and CSS is highlighted
    by the appropriate lexer.
    """

    name = 'HTML'
    aliases = ['html']
    filenames = ['*.html', '*.htm', '*.xhtml', '*.xslt']
    mimetypes = ['text/html', 'application/xhtml+xml']

    flags = re.IGNORECASE | re.DOTALL
    tokens = {
        'root': [
            ('[^<&]+', Text),
            (r'&\S*?;', Name.Entity),
            (r'\<\!\[CDATA\[.*?\]\]\>', Comment.Preproc),
            ('<!--', Comment, 'comment'),
            (r'<\?.*?\?>', Comment.Preproc),
            ('<![^>]*>', Comment.Preproc),
            (r'(<)(\s*)(script)(\s*)',
             bygroups(Punctuation, Text, Name.Tag,
                      Text), ('script-content', 'tag')),
            (r'(<)(\s*)(style)(\s*)',
             bygroups(Punctuation, Text, Name.Tag,
                      Text), ('style-content', 'tag')),
            # note: this allows tag names not used in HTML like <x:with-dash>,
            # this is to support yet-unknown template engines and the like
            (r'(<)(\s*)([\w:.-]+)', bygroups(Punctuation, Text,
                                             Name.Tag), 'tag'),
            (r'(<)(\s*)(/)(\s*)([\w:.-]+)(\s*)(>)',
             bygroups(Punctuation, Text, Punctuation, Text, Name.Tag, Text,
                      Punctuation)),
        ],
        'comment': [
            ('[^-]+', Comment),
            ('-->', Comment, '#pop'),
            ('-', Comment),
        ],
        'tag': [
            (r'\s+', Text),
            (r'([\w:-]+\s*)(=)(\s*)', bygroups(Name.Attribute, Operator,
                                               Text), 'attr'),
            (r'[\w:-]+', Name.Attribute),
            (r'(/?)(\s*)(>)', bygroups(Punctuation, Text,
                                       Punctuation), '#pop'),
        ],
        'script-content': [
            (r'(<)(\s*)(/)(\s*)(script)(\s*)(>)',
             bygroups(Punctuation, Text, Punctuation, Text, Name.Tag, Text,
                      Punctuation), '#pop'),
            (r'.+?(?=<\s*/\s*script\s*>)', using(JavascriptLexer)),
        ],
        'style-content': [
            (r'(<)(\s*)(/)(\s*)(style)(\s*)(>)',
             bygroups(Punctuation, Text, Punctuation, Text, Name.Tag, Text,
                      Punctuation), '#pop'),
            (r'.+?(?=<\s*/\s*style\s*>)', using(CssLexer)),
        ],
        'attr': [
            ('".*?"', String, '#pop'),
            ("'.*?'", String, '#pop'),
            (r'[^\s>]+', String, '#pop'),
        ],
    }

    def analyse_text(text):
        if html_doctype_matches(text):
            return 0.5
Exemplo n.º 15
0
class VCLLexer(RegexLexer):
    """
    For Varnish Configuration Language (VCL).

    .. versionadded:: 2.2
    """
    name = 'VCL'
    aliases = ['vcl']
    filenames = ['*.vcl']
    mimetypes = ['text/x-vclsrc']

    def analyse_text(text):
        # If the very first line is 'vcl 4.0;' it's pretty much guaranteed
        # that this is VCL
        if text.startswith('vcl 4.0;'):
            return 1.0
        # Skip over comments and blank lines
        # This is accurate enough that returning 0.9 is reasonable.
        # Almost no VCL files start without some comments.
        elif '\nvcl 4.0;' in text[:1000]:
            return 0.9

    tokens = {
        'probe': [
            include('whitespace'),
            include('comments'),
            (r'(\.\w+)(\s*=\s*)([^;]*)(;)',
             bygroups(Name.Attribute, Operator, using(this), Punctuation)),
            (r'\}', Punctuation, '#pop'),
        ],
        'acl': [
            include('whitespace'),
            include('comments'),
            (r'[!/]+', Operator),
            (r';', Punctuation),
            (r'\d+', Number),
            (r'\}', Punctuation, '#pop'),
        ],
        'backend': [
            include('whitespace'),
            (r'(\.probe)(\s*=\s*)(\w+)(;)',
             bygroups(Name.Attribute, Operator, Name.Variable.Global,
                      Punctuation)),
            (r'(\.probe)(\s*=\s*)(\{)',
             bygroups(Name.Attribute, Operator, Punctuation), 'probe'),
            (r'(\.\w+\b)(\s*=\s*)([^;]*)(\s*;)',
             bygroups(Name.Attribute, Operator, using(this), Punctuation)),
            (r'\{', Punctuation, '#push'),
            (r'\}', Punctuation, '#pop'),
        ],
        'statements': [
            (r'(\d\.)?\d+[sdwhmy]', Literal.Date),
            (r'(\d\.)?\d+ms', Literal.Date),
            (r'(vcl_pass|vcl_hash|vcl_hit|vcl_init|vcl_backend_fetch|vcl_pipe|'
             r'vcl_backend_response|vcl_synth|vcl_deliver|vcl_backend_error|'
             r'vcl_fini|vcl_recv|vcl_purge|vcl_miss)\b', Name.Function),
            (r'(pipe|retry|hash|synth|deliver|purge|abandon|lookup|pass|fail|ok|'
             r'miss|fetch|restart)\b', Name.Constant),
            (r'(beresp|obj|resp|req|req_top|bereq)\.http\.[a-zA-Z_-]+\b',
             Name.Variable),
            (words(
                ('obj.status', 'req.hash_always_miss', 'beresp.backend',
                 'req.esi_level', 'req.can_gzip', 'beresp.ttl',
                 'obj.uncacheable', 'req.ttl', 'obj.hits', 'client.identity',
                 'req.hash_ignore_busy', 'obj.reason', 'req.xid',
                 'req_top.proto', 'beresp.age', 'obj.proto', 'obj.age',
                 'local.ip', 'beresp.uncacheable', 'req.method',
                 'beresp.backend.ip', 'now', 'obj.grace', 'req.restarts',
                 'beresp.keep', 'req.proto', 'resp.proto', 'bereq.xid',
                 'bereq.between_bytes_timeout', 'req.esi',
                 'bereq.first_byte_timeout', 'bereq.method',
                 'bereq.connect_timeout', 'beresp.do_gzip', 'resp.status',
                 'beresp.do_gunzip', 'beresp.storage_hint',
                 'resp.is_streaming', 'beresp.do_stream', 'req_top.method',
                 'bereq.backend', 'beresp.backend.name', 'beresp.status',
                 'req.url', 'obj.keep', 'obj.ttl', 'beresp.reason',
                 'bereq.retries', 'resp.reason', 'bereq.url', 'beresp.do_esi',
                 'beresp.proto', 'client.ip', 'bereq.proto', 'server.hostname',
                 'remote.ip', 'req.backend_hint', 'server.identity',
                 'req_top.url', 'beresp.grace', 'beresp.was_304', 'server.ip',
                 'bereq.uncacheable'),
                suffix=r'\b'), Name.Variable),
            (r'[!%&+*\-,/<.}{>=|~]+', Operator),
            (r'[();]', Punctuation),
            (r'[,]+', Punctuation),
            (words(('hash_data', 'regsub', 'regsuball', 'if', 'else', 'elsif',
                    'elif', 'synth', 'synthetic', 'ban', 'return', 'set',
                    'unset', 'import', 'include', 'new', 'rollback', 'call'),
                   suffix=r'\b'), Keyword),
            (r'storage\.\w+\.\w+\b', Name.Variable),
            (words(('true', 'false')), Name.Builtin),
            (r'\d+\b', Number),
            (r'(backend)(\s+\w+)(\s*\{)',
             bygroups(Keyword, Name.Variable.Global, Punctuation), 'backend'),
            (r'(probe\s)(\s*\w+\s)(\{)',
             bygroups(Keyword, Name.Variable.Global, Punctuation), 'probe'),
            (r'(acl\s)(\s*\w+\s)(\{)',
             bygroups(Keyword, Name.Variable.Global, Punctuation), 'acl'),
            (r'(vcl )(4.0)(;)$',
             bygroups(Keyword.Reserved, Name.Constant, Punctuation)),
            (r'(sub\s+)([a-zA-Z]\w*)(\s*\{)',
             bygroups(Keyword, Name.Function, Punctuation)),
            (r'([a-zA-Z_]\w*)'
             r'(\.)'
             r'([a-zA-Z_]\w*)'
             r'(\s*\(.*\))',
             bygroups(Name.Function, Punctuation, Name.Function, using(this))),
            (r'[a-zA-Z_]\w*', Name),
        ],
        'comment': [
            (r'[^*/]+', Comment.Multiline),
            (r'/\*', Comment.Multiline, '#push'),
            (r'\*/', Comment.Multiline, '#pop'),
            (r'[*/]', Comment.Multiline),
        ],
        'comments': [
            (r'#.*$', Comment),
            (r'/\*', Comment.Multiline, 'comment'),
            (r'//.*$', Comment),
        ],
        'string': [
            (r'"', String, '#pop'),
            (r'[^"\n]+', String),  # all other characters
        ],
        'multistring': [
            (r'[^"}]', String),
            (r'"\}', String, '#pop'),
            (r'["}]', String),
        ],
        'whitespace': [
            (r'L?"', String, 'string'),
            (r'\{"', String, 'multistring'),
            (r'\n', Text),
            (r'\s+', Text),
            (r'\\\n', Text),  # line continuation
        ],
        'root': [
            include('whitespace'),
            include('comments'),
            include('statements'),
            (r'\s+', Text),
        ],
    }
Exemplo n.º 16
0
class RstLexer(RegexLexer):
    """
    For `reStructuredText <http://docutils.sf.net/rst.html>`_ markup.

    .. versionadded:: 0.7

    Additional options accepted:

    `handlecodeblocks`
        Highlight the contents of ``.. sourcecode:: language``,
        ``.. code:: language`` and ``.. code-block:: language``
        directives with a lexer for the given language (default:
        ``True``).

        .. versionadded:: 0.8
    """
    name = 'reStructuredText'
    aliases = ['rst', 'rest', 'restructuredtext']
    filenames = ['*.rst', '*.rest']
    mimetypes = ["text/x-rst", "text/prs.fallenstein.rst"]
    flags = re.MULTILINE

    def _handle_sourcecode(self, match):
        from testflows._core.contrib.pygments.lexers import get_lexer_by_name

        # section header
        yield match.start(1), Punctuation, match.group(1)
        yield match.start(2), Text, match.group(2)
        yield match.start(3), Operator.Word, match.group(3)
        yield match.start(4), Punctuation, match.group(4)
        yield match.start(5), Text, match.group(5)
        yield match.start(6), Keyword, match.group(6)
        yield match.start(7), Text, match.group(7)

        # lookup lexer if wanted and existing
        lexer = None
        if self.handlecodeblocks:
            try:
                lexer = get_lexer_by_name(match.group(6).strip())
            except ClassNotFound:
                pass
        indention = match.group(8)
        indention_size = len(indention)
        code = (indention + match.group(9) + match.group(10) + match.group(11))

        # no lexer for this language. handle it like it was a code block
        if lexer is None:
            yield match.start(8), String, code
            return

        # highlight the lines with the lexer.
        ins = []
        codelines = code.splitlines(True)
        code = ''
        for line in codelines:
            if len(line) > indention_size:
                ins.append((len(code), [(0, Text, line[:indention_size])]))
                code += line[indention_size:]
            else:
                code += line
        for item in do_insertions(ins, lexer.get_tokens_unprocessed(code)):
            yield item

    # from docutils.parsers.rst.states
    closers = u'\'")]}>\u2019\u201d\xbb!?'
    unicode_delimiters = u'\u2010\u2011\u2012\u2013\u2014\u00a0'
    end_string_suffix = (r'((?=$)|(?=[-/:.,; \n\x00%s%s]))' %
                         (re.escape(unicode_delimiters), re.escape(closers)))

    tokens = {
        'root': [
            # Heading with overline
            (r'^(=+|-+|`+|:+|\.+|\'+|"+|~+|\^+|_+|\*+|\++|#+)([ \t]*\n)'
             r'(.+)(\n)(\1)(\n)',
             bygroups(Generic.Heading, Text, Generic.Heading, Text,
                      Generic.Heading, Text)),
            # Plain heading
            (r'^(\S.*)(\n)(={3,}|-{3,}|`{3,}|:{3,}|\.{3,}|\'{3,}|"{3,}|'
             r'~{3,}|\^{3,}|_{3,}|\*{3,}|\+{3,}|#{3,})(\n)',
             bygroups(Generic.Heading, Text, Generic.Heading, Text)),
            # Bulleted lists
            (r'^(\s*)([-*+])( .+\n(?:\1  .+\n)*)',
             bygroups(Text, Number, using(this, state='inline'))),
            # Numbered lists
            (r'^(\s*)([0-9#ivxlcmIVXLCM]+\.)( .+\n(?:\1  .+\n)*)',
             bygroups(Text, Number, using(this, state='inline'))),
            (r'^(\s*)(\(?[0-9#ivxlcmIVXLCM]+\))( .+\n(?:\1  .+\n)*)',
             bygroups(Text, Number, using(this, state='inline'))),
            # Numbered, but keep words at BOL from becoming lists
            (r'^(\s*)([A-Z]+\.)( .+\n(?:\1  .+\n)+)',
             bygroups(Text, Number, using(this, state='inline'))),
            (r'^(\s*)(\(?[A-Za-z]+\))( .+\n(?:\1  .+\n)+)',
             bygroups(Text, Number, using(this, state='inline'))),
            # Line blocks
            (r'^(\s*)(\|)( .+\n(?:\|  .+\n)*)',
             bygroups(Text, Operator, using(this, state='inline'))),
            # Sourcecode directives
            (r'^( *\.\.)(\s*)((?:source)?code(?:-block)?)(::)([ \t]*)([^\n]+)'
             r'(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\8.*|)\n)+)',
             _handle_sourcecode),
            # A directive
            (r'^( *\.\.)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))',
             bygroups(Punctuation, Text, Operator.Word, Punctuation, Text,
                      using(this, state='inline'))),
            # A reference target
            (r'^( *\.\.)(\s*)(_(?:[^:\\]|\\.)+:)(.*?)$',
             bygroups(Punctuation, Text, Name.Tag, using(this,
                                                         state='inline'))),
            # A footnote/citation target
            (r'^( *\.\.)(\s*)(\[.+\])(.*?)$',
             bygroups(Punctuation, Text, Name.Tag, using(this,
                                                         state='inline'))),
            # A substitution def
            (r'^( *\.\.)(\s*)(\|.+\|)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))',
             bygroups(Punctuation, Text, Name.Tag, Text, Operator.Word,
                      Punctuation, Text, using(this, state='inline'))),
            # Comments
            (r'^ *\.\..*(\n( +.*\n|\n)+)?', Comment.Preproc),
            # Field list marker
            (r'^( *)(:(?:\\\\|\\:|[^:\n])+:(?=\s))([ \t]*)',
             bygroups(Text, Name.Class, Text)),
            # Definition list
            (r'^(\S.*(?<!::)\n)((?:(?: +.*)\n)+)',
             bygroups(using(this, state='inline'), using(this,
                                                         state='inline'))),
            # Code blocks
            (r'(::)(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\3.*|)\n)+)',
             bygroups(String.Escape, Text, String, String, Text, String)),
            include('inline'),
        ],
        'inline': [
            (r'\\.', Text),  # escape
            (r'``', String, 'literal'),  # code
            (
                r'(`.+?)(<.+?>)(`__?)',  # reference with inline target
                bygroups(String, String.Interpol, String)),
            (r'`.+?`__?', String),  # reference
            (r'(`.+?`)(:[a-zA-Z0-9:-]+?:)?',
             bygroups(Name.Variable, Name.Attribute)),  # role
            (r'(:[a-zA-Z0-9:-]+?:)(`.+?`)',
             bygroups(Name.Attribute, Name.Variable)),  # role (content first)
            (r'\*\*.+?\*\*', Generic.Strong),  # Strong emphasis
            (r'\*.+?\*', Generic.Emph),  # Emphasis
            (r'\[.*?\]_', String),  # Footnote or citation
            (r'<.+?>', Name.Tag),  # Hyperlink
            (r'[^\\\n\[*`:]+', Text),
            (r'.', Text),
        ],
        'literal': [
            (r'[^`]+', String),
            (r'``' + end_string_suffix, String, '#pop'),
            (r'`', String),
        ]
    }

    def __init__(self, **options):
        self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)
        RegexLexer.__init__(self, **options)

    def analyse_text(text):
        if text[:2] == '..' and text[2:3] != '.':
            return 0.3
        p1 = text.find("\n")
        p2 = text.find("\n", p1 + 1)
        if (p2 > -1 and  # has two lines
                p1 * 2 + 1 == p2 and  # they are the same length
                text[p1 + 1] in '-='
                and  # the next line both starts and ends with
                text[p1 + 1] == text[p2 - 1]):  # ...a sufficiently high header
            return 0.5
Exemplo n.º 17
0
class SlimLexer(ExtendedRegexLexer):
    """
    For Slim markup.

    .. versionadded:: 2.0
    """

    name = 'Slim'
    aliases = ['slim']
    filenames = ['*.slim']
    mimetypes = ['text/x-slim']

    flags = re.IGNORECASE
    _dot = r'(?: \|\n(?=.* \|)|.)'
    tokens = {
        'root': [
            (r'[ \t]*\n', Text),
            (r'[ \t]*', _indentation),
        ],
        'css': [
            (r'\.[\w:-]+', Name.Class, 'tag'),
            (r'\#[\w:-]+', Name.Function, 'tag'),
        ],
        'eval-or-plain': [
            (r'([ \t]*==?)(.*\n)', bygroups(Punctuation,
                                            using(RubyLexer)), 'root'),
            (r'[ \t]+[\w:-]+(?==)', Name.Attribute, 'html-attributes'),
            default('plain'),
        ],
        'content': [
            include('css'),
            (r'[\w:-]+:[ \t]*\n', Text, 'plain'),
            (r'(-)(.*\n)', bygroups(Punctuation, using(RubyLexer)), '#pop'),
            (r'\|' + _dot + r'*\n', _starts_block(Text, 'plain'), '#pop'),
            (r'/' + _dot + r'*\n',
             _starts_block(Comment.Preproc, 'slim-comment-block'), '#pop'),
            (r'[\w:-]+', Name.Tag, 'tag'),
            include('eval-or-plain'),
        ],
        'tag': [
            include('css'),
            (r'[<>]{1,2}(?=[ \t=])', Punctuation),
            (r'[ \t]+\n', Punctuation, '#pop:2'),
            include('eval-or-plain'),
        ],
        'plain': [
            (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text),
            (r'(#\{)(.*?)(\})',
             bygroups(String.Interpol, using(RubyLexer), String.Interpol)),
            (r'\n', Text, 'root'),
        ],
        'html-attributes': [
            (r'=', Punctuation),
            (r'"[^"]+"', using(RubyLexer), 'tag'),
            (r'\'[^\']+\'', using(RubyLexer), 'tag'),
            (r'\w+', Text, 'tag'),
        ],
        'slim-comment-block': [
            (_dot + '+', Comment.Preproc),
            (r'\n', Text, 'root'),
        ],
    }
Exemplo n.º 18
0
class MarkdownLexer(RegexLexer):
    """
    For `Markdown <https://help.github.com/categories/writing-on-github/>`_ markup.

    .. versionadded:: 2.2
    """
    name = 'markdown'
    aliases = ['md']
    filenames = ['*.md']
    mimetypes = ["text/x-markdown"]
    flags = re.MULTILINE

    def _handle_codeblock(self, match):
        """
        match args: 1:backticks, 2:lang_name, 3:newline, 4:code, 5:backticks
        """
        from testflows._core.contrib.pygments.lexers import get_lexer_by_name

        # section header
        yield match.start(1), String, match.group(1)
        yield match.start(2), String, match.group(2)
        yield match.start(3), Text, match.group(3)

        # lookup lexer if wanted and existing
        lexer = None
        if self.handlecodeblocks:
            try:
                lexer = get_lexer_by_name(match.group(2).strip())
            except ClassNotFound:
                pass
        code = match.group(4)

        # no lexer for this language. handle it like it was a code block
        if lexer is None:
            yield match.start(4), String, code
        else:
            for item in do_insertions([], lexer.get_tokens_unprocessed(code)):
                yield item

        yield match.start(5), String, match.group(5)

    tokens = {
        'root': [
            # heading with pound prefix
            (r'^(#)([^#].+\n)', bygroups(Generic.Heading, Text)),
            (r'^(#{2,6})(.+\n)', bygroups(Generic.Subheading, Text)),
            # task list
            (r'^(\s*)([*-] )(\[[ xX]\])( .+\n)',
             bygroups(Text, Keyword, Keyword, using(this, state='inline'))),
            # bulleted lists
            (r'^(\s*)([*-])(\s)(.+\n)',
             bygroups(Text, Keyword, Text, using(this, state='inline'))),
            # numbered lists
            (r'^(\s*)([0-9]+\.)( .+\n)',
             bygroups(Text, Keyword, using(this, state='inline'))),
            # quote
            (r'^(\s*>\s)(.+\n)', bygroups(Keyword, Generic.Emph)),
            # text block
            (r'^(```\n)([\w\W]*?)(^```$)', bygroups(String, Text, String)),
            # code block with language
            (r'^(```)(\w+)(\n)([\w\W]*?)(^```$)', _handle_codeblock),
            include('inline'),
        ],
        'inline': [
            # escape
            (r'\\.', Text),
            # italics
            (r'(\s)([*_][^*_]+[*_])(\W|\n)', bygroups(Text, Generic.Emph,
                                                      Text)),
            # bold
            # warning: the following rule eats internal tags. eg. **foo _bar_ baz** bar is not italics
            (r'(\s)((\*\*|__).*\3)((?=\W|\n))',
             bygroups(Text, Generic.Strong, None, Text)),
            # "proper way" (r'(\s)([*_]{2}[^*_]+[*_]{2})((?=\W|\n))', bygroups(Text, Generic.Strong, Text)),
            # strikethrough
            (r'(\s)(~~[^~]+~~)((?=\W|\n))',
             bygroups(Text, Generic.Deleted, Text)),
            # inline code
            (r'`[^`]+`', String.Backtick),
            # mentions and topics (twitter and github stuff)
            (r'[@#][\w/:]+', Name.Entity),
            # (image?) links eg: ![Image of Yaktocat](https://octodex.github.com/images/yaktocat.png)
            (r'(!?\[)([^]]+)(\])(\()([^)]+)(\))',
             bygroups(Text, Name.Tag, Text, Text, Name.Attribute, Text)),
            # reference-style links, e.g.:
            #   [an example][id]
            #   [id]: http://example.com/
            (r'(\[)([^]]+)(\])(\[)([^]]*)(\])',
             bygroups(Text, Name.Tag, Text, Text, Name.Label, Text)),
            (r'^(\s*\[)([^]]*)(\]:\s*)(.+)',
             bygroups(Text, Name.Label, Text, Name.Attribute)),

            # general text, must come last!
            (r'[^\\\s]+', Text),
            (r'.', Text),
        ],
    }

    def __init__(self, **options):
        self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)
        RegexLexer.__init__(self, **options)
Exemplo n.º 19
0
class ModelicaLexer(RegexLexer):
    """
    For `Modelica <http://www.modelica.org/>`_ source code.

    .. versionadded:: 1.1
    """
    name = 'Modelica'
    aliases = ['modelica']
    filenames = ['*.mo']
    mimetypes = ['text/x-modelica']

    flags = re.DOTALL | re.MULTILINE

    _name = r"(?:'(?:[^\\']|\\.)+'|[a-zA-Z_]\w*)"

    tokens = {
        'whitespace': [(u'[\\s\ufeff]+', Text),
                       (r'//[^\n]*\n?', Comment.Single),
                       (r'/\*.*?\*/', Comment.Multiline)],
        'root': [
            include('whitespace'), (r'"', String.Double, 'string'),
            (r'[()\[\]{},;]+', Punctuation),
            (r'\.?[*^/+-]|\.|<>|[<>:=]=?', Operator),
            (r'\d+(\.?\d*[eE][-+]?\d+|\.\d*)', Number.Float),
            (r'\d+', Number.Integer),
            (r'(abs|acos|actualStream|array|asin|assert|AssertionLevel|atan|'
             r'atan2|backSample|Boolean|cardinality|cat|ceil|change|Clock|'
             r'Connections|cos|cosh|cross|delay|diagonal|div|edge|exp|'
             r'ExternalObject|fill|floor|getInstanceName|hold|homotopy|'
             r'identity|inStream|integer|Integer|interval|inverse|isPresent|'
             r'linspace|log|log10|matrix|max|min|mod|ndims|noClock|noEvent|'
             r'ones|outerProduct|pre|previous|product|Real|reinit|rem|rooted|'
             r'sample|scalar|semiLinear|shiftSample|sign|sin|sinh|size|skew|'
             r'smooth|spatialDistribution|sqrt|StateSelect|String|subSample|'
             r'sum|superSample|symmetric|tan|tanh|terminal|terminate|time|'
             r'transpose|vector|zeros)\b', Name.Builtin),
            (r'(algorithm|annotation|break|connect|constant|constrainedby|der|'
             r'discrete|each|else|elseif|elsewhen|encapsulated|enumeration|'
             r'equation|exit|expandable|extends|external|firstTick|final|flow|for|if|'
             r'import|impure|in|initial|inner|input|interval|loop|nondiscrete|outer|'
             r'output|parameter|partial|protected|public|pure|redeclare|'
             r'replaceable|return|stream|then|when|while)\b',
             Keyword.Reserved), (r'(and|not|or)\b', Operator.Word),
            (r'(block|class|connector|end|function|model|operator|package|'
             r'record|type)\b', Keyword.Reserved, 'class'),
            (r'(false|true)\b', Keyword.Constant),
            (r'within\b', Keyword.Reserved, 'package-prefix'), (_name, Name)
        ],
        'class': [
            include('whitespace'), (r'(function|record)\b', Keyword.Reserved),
            (r'(if|for|when|while)\b', Keyword.Reserved, '#pop'),
            (_name, Name.Class, '#pop'),
            default('#pop')
        ],
        'package-prefix': [
            include('whitespace'), (_name, Name.Namespace, '#pop'),
            default('#pop')
        ],
        'string': [(r'"', String.Double, '#pop'),
                   (r'\\[\'"?\\abfnrtv]', String.Escape),
                   (r'(?i)<\s*html\s*>([^\\"]|\\.)+?(<\s*/\s*html\s*>|(?="))',
                    using(HtmlLexer)), (r'<|\\?[^"\\<]+', String.Double)]
    }
Exemplo n.º 20
0
class AdaLexer(RegexLexer):
    """
    For Ada source code.

    .. versionadded:: 1.3
    """

    name = 'Ada'
    aliases = ['ada', 'ada95', 'ada2005']
    filenames = ['*.adb', '*.ads', '*.ada']
    mimetypes = ['text/x-ada']

    flags = re.MULTILINE | re.IGNORECASE

    tokens = {
        'root': [
            (r'[^\S\n]+', Text),
            (r'--.*?\n', Comment.Single),
            (r'[^\S\n]+', Text),
            (r'function|procedure|entry', Keyword.Declaration, 'subprogram'),
            (r'(subtype|type)(\s+)(\w+)',
             bygroups(Keyword.Declaration, Text, Keyword.Type), 'type_def'),
            (r'task|protected', Keyword.Declaration),
            (r'(subtype)(\s+)', bygroups(Keyword.Declaration, Text)),
            (r'(end)(\s+)', bygroups(Keyword.Reserved, Text), 'end'),
            (r'(pragma)(\s+)(\w+)',
             bygroups(Keyword.Reserved, Text, Comment.Preproc)),
            (r'(true|false|null)\b', Keyword.Constant),
            (words(
                ('Address', 'Byte', 'Boolean', 'Character', 'Controlled',
                 'Count', 'Cursor', 'Duration', 'File_Mode', 'File_Type',
                 'Float', 'Generator', 'Integer', 'Long_Float', 'Long_Integer',
                 'Long_Long_Float', 'Long_Long_Integer', 'Natural', 'Positive',
                 'Reference_Type', 'Short_Float', 'Short_Integer',
                 'Short_Short_Float', 'Short_Short_Integer', 'String',
                 'Wide_Character', 'Wide_String'),
                suffix=r'\b'), Keyword.Type),
            (r'(and(\s+then)?|in|mod|not|or(\s+else)|rem)\b', Operator.Word),
            (r'generic|private', Keyword.Declaration),
            (r'package', Keyword.Declaration, 'package'),
            (r'array\b', Keyword.Reserved, 'array_def'),
            (r'(with|use)(\s+)', bygroups(Keyword.Namespace, Text), 'import'),
            (r'(\w+)(\s*)(:)(\s*)(constant)',
             bygroups(Name.Constant, Text, Punctuation, Text,
                      Keyword.Reserved)),
            (r'<<\w+>>', Name.Label),
            (r'(\w+)(\s*)(:)(\s*)(declare|begin|loop|for|while)',
             bygroups(Name.Label, Text, Punctuation, Text, Keyword.Reserved)),
            (words(
                ('abort', 'abs', 'abstract', 'accept', 'access', 'aliased',
                 'all', 'array', 'at', 'begin', 'body', 'case', 'constant',
                 'declare', 'delay', 'delta', 'digits', 'do', 'else', 'elsif',
                 'end', 'entry', 'exception', 'exit', 'interface', 'for',
                 'goto', 'if', 'is', 'limited', 'loop', 'new', 'null', 'of',
                 'or', 'others', 'out', 'overriding', 'pragma', 'protected',
                 'raise', 'range', 'record', 'renames', 'requeue', 'return',
                 'reverse', 'select', 'separate', 'subtype', 'synchronized',
                 'task', 'tagged', 'terminate', 'then', 'type', 'until',
                 'when', 'while', 'xor'),
                prefix=r'\b',
                suffix=r'\b'), Keyword.Reserved),
            (r'"[^"]*"', String),
            include('attribute'),
            include('numbers'),
            (r"'[^']'", String.Character),
            (r'(\w+)(\s*|[(,])', bygroups(Name, using(this))),
            (r"(<>|=>|:=|[()|:;,.'])", Punctuation),
            (r'[*<>+=/&-]', Operator),
            (r'\n+', Text),
        ],
        'numbers': [
            (r'[0-9_]+#[0-9a-f]+#', Number.Hex),
            (r'[0-9_]+\.[0-9_]*', Number.Float),
            (r'[0-9_]+', Number.Integer),
        ],
        'attribute': [
            (r"(')(\w+)", bygroups(Punctuation, Name.Attribute)),
        ],
        'subprogram': [
            (r'\(', Punctuation, ('#pop', 'formal_part')),
            (r';', Punctuation, '#pop'),
            (r'is\b', Keyword.Reserved, '#pop'),
            (r'"[^"]+"|\w+', Name.Function),
            include('root'),
        ],
        'end': [
            ('(if|case|record|loop|select)', Keyword.Reserved),
            (r'"[^"]+"|[\w.]+', Name.Function),
            (r'\s+', Text),
            (';', Punctuation, '#pop'),
        ],
        'type_def': [
            (r';', Punctuation, '#pop'),
            (r'\(', Punctuation, 'formal_part'),
            (r'with|and|use', Keyword.Reserved),
            (r'array\b', Keyword.Reserved, ('#pop', 'array_def')),
            (r'record\b', Keyword.Reserved, ('record_def')),
            (r'(null record)(;)', bygroups(Keyword.Reserved,
                                           Punctuation), '#pop'),
            include('root'),
        ],
        'array_def': [
            (r';', Punctuation, '#pop'),
            (r'(\w+)(\s+)(range)',
             bygroups(Keyword.Type, Text, Keyword.Reserved)),
            include('root'),
        ],
        'record_def': [
            (r'end record', Keyword.Reserved, '#pop'),
            include('root'),
        ],
        'import': [
            (r'[\w.]+', Name.Namespace, '#pop'),
            default('#pop'),
        ],
        'formal_part': [
            (r'\)', Punctuation, '#pop'),
            (r'\w+', Name.Variable),
            (r',|:[^=]', Punctuation),
            (r'(in|not|null|out|access)\b', Keyword.Reserved),
            include('root'),
        ],
        'package': [
            ('body', Keyword.Declaration),
            (r'is\s+new|renames', Keyword.Reserved),
            ('is', Keyword.Reserved, '#pop'),
            (';', Punctuation, '#pop'),
            (r'\(', Punctuation, 'package_instantiation'),
            (r'([\w.]+)', Name.Class),
            include('root'),
        ],
        'package_instantiation': [
            (r'("[^"]+"|\w+)(\s+)(=>)',
             bygroups(Name.Variable, Text, Punctuation)),
            (r'[\w.\'"]', Text),
            (r'\)', Punctuation, '#pop'),
            include('root'),
        ],
    }
Exemplo n.º 21
0
class FantomLexer(RegexLexer):
    """
    For Fantom source code.

    .. versionadded:: 1.5
    """
    name = 'Fantom'
    aliases = ['fan']
    filenames = ['*.fan']
    mimetypes = ['application/x-fantom']

    # often used regexes
    def s(str):
        return Template(str).substitute(
            dict(
                pod=r'[\"\w\.]+',
                eos=r'\n|;',
                id=r'[a-zA-Z_]\w*',
                # all chars which can be part of type definition. Starts with
                # either letter, or [ (maps), or | (funcs)
                type=r'(?:\[|[a-zA-Z_]|\|)[:\w\[\]|\->?]*?',
            ))

    tokens = {
        'comments': [
            (r'(?s)/\*.*?\*/', Comment.Multiline),  # Multiline
            (r'//.*?\n', Comment.Single),  # Single line
            # TODO: highlight references in fandocs
            (r'\*\*.*?\n', Comment.Special),  # Fandoc
            (r'#.*\n', Comment.Single)  # Shell-style
        ],
        'literals': [
            (r'\b-?[\d_]+(ns|ms|sec|min|hr|day)', Number),  # Duration
            (r'\b-?[\d_]*\.[\d_]+(ns|ms|sec|min|hr|day)',
             Number),  # Duration with dot
            (r'\b-?(\d+)?\.\d+(f|F|d|D)?', Number.Float),  # Float/Decimal
            (r'\b-?0x[0-9a-fA-F_]+', Number.Hex),  # Hex
            (r'\b-?[\d_]+', Number.Integer),  # Int
            (r"'\\.'|'[^\\]'|'\\u[0-9a-f]{4}'", String.Char),  # Char
            (r'"', Punctuation, 'insideStr'),  # Opening quote
            (r'`', Punctuation, 'insideUri'),  # Opening accent
            (r'\b(true|false|null)\b', Keyword.Constant),  # Bool & null
            (
                r'(?:(\w+)(::))?(\w+)(<\|)(.*?)(\|>)',  # DSL
                bygroups(Name.Namespace, Punctuation, Name.Class, Punctuation,
                         String, Punctuation)),
            (
                r'(?:(\w+)(::))?(\w+)?(#)(\w+)?',  # Type/slot literal
                bygroups(Name.Namespace, Punctuation, Name.Class, Punctuation,
                         Name.Function)),
            (r'\[,\]', Literal),  # Empty list
            (
                s(r'($type)(\[,\])'),  # Typed empty list
                bygroups(using(this, state='inType'), Literal)),
            (r'\[:\]', Literal),  # Empty Map
            (s(r'($type)(\[:\])'),
             bygroups(using(this, state='inType'), Literal)),
        ],
        'insideStr': [
            (r'\\\\', String.Escape),  # Escaped backslash
            (r'\\"', String.Escape),  # Escaped "
            (r'\\`', String.Escape),  # Escaped `
            (r'\$\w+', String.Interpol),  # Subst var
            (r'\$\{.*?\}', String.Interpol),  # Subst expr
            (r'"', Punctuation, '#pop'),  # Closing quot
            (r'.', String)  # String content
        ],
        'insideUri': [  # TODO: remove copy/paste str/uri
            (r'\\\\', String.Escape),  # Escaped backslash
            (r'\\"', String.Escape),  # Escaped "
            (r'\\`', String.Escape),  # Escaped `
            (r'\$\w+', String.Interpol),  # Subst var
            (r'\$\{.*?\}', String.Interpol),  # Subst expr
            (r'`', Punctuation, '#pop'),  # Closing tick
            (r'.', String.Backtick)  # URI content
        ],
        'protectionKeywords': [
            (r'\b(public|protected|private|internal)\b', Keyword),
        ],
        'typeKeywords': [
            (r'\b(abstract|final|const|native|facet|enum)\b', Keyword),
        ],
        'methodKeywords': [
            (r'\b(abstract|native|once|override|static|virtual|final)\b',
             Keyword),
        ],
        'fieldKeywords':
        [(r'\b(abstract|const|final|native|override|static|virtual|'
          r'readonly)\b', Keyword)],
        'otherKeywords': [
            (words(('try', 'catch', 'throw', 'finally', 'for', 'if', 'else',
                    'while', 'as', 'is', 'isnot', 'switch', 'case', 'default',
                    'continue', 'break', 'do', 'return', 'get', 'set'),
                   prefix=r'\b',
                   suffix=r'\b'), Keyword),
            (r'\b(it|this|super)\b', Name.Builtin.Pseudo),
        ],
        'operators':
        [(r'\+\+|\-\-|\+|\-|\*|/|\|\||&&|<=>|<=|<|>=|>|=|!|\[|\]', Operator)],
        'inType': [
            (r'[\[\]|\->:?]', Punctuation),
            (s(r'$id'), Name.Class),
            default('#pop'),
        ],
        'root': [
            include('comments'),
            include('protectionKeywords'),
            include('typeKeywords'),
            include('methodKeywords'),
            include('fieldKeywords'),
            include('literals'),
            include('otherKeywords'),
            include('operators'),
            (r'using\b', Keyword.Namespace, 'using'),  # Using stmt
            (r'@\w+', Name.Decorator, 'facet'),  # Symbol
            (r'(class|mixin)(\s+)(\w+)', bygroups(Keyword, Text, Name.Class),
             'inheritance'),  # Inheritance list

            # Type var := val
            (s(r'($type)([ \t]+)($id)(\s*)(:=)'),
             bygroups(using(this, state='inType'), Text, Name.Variable, Text,
                      Operator)),

            # var := val
            (s(r'($id)(\s*)(:=)'), bygroups(Name.Variable, Text, Operator)),

            # .someId( or ->someId( ###
            (s(r'(\.|(?:\->))($id)(\s*)(\()'),
             bygroups(Operator, Name.Function, Text,
                      Punctuation), 'insideParen'),

            # .someId  or ->someId
            (s(r'(\.|(?:\->))($id)'), bygroups(Operator, Name.Function)),

            # new makeXXX (
            (r'(new)(\s+)(make\w*)(\s*)(\()',
             bygroups(Keyword, Text, Name.Function, Text,
                      Punctuation), 'insideMethodDeclArgs'),

            # Type name (
            (
                s(r'($type)([ \t]+)'  # Return type and whitespace
                  r'($id)(\s*)(\()'),  # method name + open brace
                bygroups(using(this, state='inType'), Text, Name.Function,
                         Text, Punctuation),
                'insideMethodDeclArgs'),

            # ArgType argName,
            (s(r'($type)(\s+)($id)(\s*)(,)'),
             bygroups(using(this, state='inType'), Text, Name.Variable, Text,
                      Punctuation)),

            # ArgType argName)
            # Covered in 'insideParen' state

            # ArgType argName -> ArgType|
            (s(r'($type)(\s+)($id)(\s*)(\->)(\s*)($type)(\|)'),
             bygroups(using(this, state='inType'), Text,
                      Name.Variable, Text, Punctuation, Text,
                      using(this, state='inType'), Punctuation)),

            # ArgType argName|
            (s(r'($type)(\s+)($id)(\s*)(\|)'),
             bygroups(using(this, state='inType'), Text, Name.Variable, Text,
                      Punctuation)),

            # Type var
            (s(r'($type)([ \t]+)($id)'),
             bygroups(using(this, state='inType'), Text, Name.Variable)),
            (r'\(', Punctuation, 'insideParen'),
            (r'\{', Punctuation, 'insideBrace'),
            (r'.', Text)
        ],
        'insideParen': [
            (r'\)', Punctuation, '#pop'),
            include('root'),
        ],
        'insideMethodDeclArgs': [
            (r'\)', Punctuation, '#pop'),
            (s(r'($type)(\s+)($id)(\s*)(\))'),
             bygroups(using(this, state='inType'), Text, Name.Variable, Text,
                      Punctuation), '#pop'),
            include('root'),
        ],
        'insideBrace': [
            (r'\}', Punctuation, '#pop'),
            include('root'),
        ],
        'inheritance': [
            (r'\s+', Text),  # Whitespace
            (r':|,', Punctuation),
            (r'(?:(\w+)(::))?(\w+)',
             bygroups(Name.Namespace, Punctuation, Name.Class)),
            (r'\{', Punctuation, '#pop')
        ],
        'using': [
            (r'[ \t]+', Text),  # consume whitespaces
            (r'(\[)(\w+)(\])',
             bygroups(Punctuation, Comment.Special, Punctuation)),  # ffi
            (r'(\")?([\w.]+)(\")?',
             bygroups(Punctuation, Name.Namespace, Punctuation)),  # podname
            (r'::', Punctuation, 'usingClass'),
            default('#pop')
        ],
        'usingClass': [
            (r'[ \t]+', Text),  # consume whitespaces
            (r'(as)(\s+)(\w+)', bygroups(Keyword.Declaration, Text,
                                         Name.Class), '#pop:2'),
            (r'[\w$]+', Name.Class),
            default('#pop:2')  # jump out to root state
        ],
        'facet': [(r'\s+', Text), (r'\{', Punctuation, 'facetFields'),
                  default('#pop')],
        'facetFields': [
            include('comments'),
            include('literals'),
            include('operators'), (r'\s+', Text),
            (r'(\s*)(\w+)(\s*)(=)', bygroups(Text, Name, Text, Operator)),
            (r'\}', Punctuation, '#pop'), (r'.', Text)
        ],
    }
Exemplo n.º 22
0
class RagelEmbeddedLexer(RegexLexer):
    """
    A lexer for `Ragel`_ embedded in a host language file.

    This will only highlight Ragel statements. If you want host language
    highlighting then call the language-specific Ragel lexer.

    .. versionadded:: 1.1
    """

    name = 'Embedded Ragel'
    aliases = ['ragel-em']
    filenames = ['*.rl']

    tokens = {
        'root': [
            (
                r'(' + r'|'.join((  # keep host code in largest possible chunks
                    r'[^%\'"/#]+',  # exclude unsafe characters
                    r'%(?=[^%]|$)',  # a single % sign is okay, just not 2 of them

                    # strings and comments may safely contain unsafe characters
                    r'"(\\\\|\\"|[^"])*"',  # double quote string
                    r"'(\\\\|\\'|[^'])*'",  # single quote string
                    r'/\*(.|\n)*?\*/',  # multi-line javadoc-style comment
                    r'//.*$\n?',  # single line comment
                    r'\#.*$\n?',  # ruby/ragel comment
                    r'/(?!\*)(\\\\|\\/|[^/])*/',  # regular expression

                    # / is safe now that we've handled regex and javadoc comments
                    r'/',
                )) + r')+',
                Other),

            # Single Line FSM.
            # Please don't put a quoted newline in a single line FSM.
            # That's just mean. It will break this.
            (r'(%%)(?![{%])(.*)($|;)(\n?)',
             bygroups(Punctuation, using(RagelLexer), Punctuation, Text)),

            # Multi Line FSM.
            (r'(%%%%|%%)\{', Punctuation, 'multi-line-fsm'),
        ],
        'multi-line-fsm': [
            (
                r'(' +
                r'|'.join((  # keep ragel code in largest possible chunks.
                    r'(' + r'|'.join((
                        r'[^}\'"\[/#]',  # exclude unsafe characters
                        r'\}(?=[^%]|$)',  # } is okay as long as it's not followed by %
                        r'\}%(?=[^%]|$)',  # ...well, one %'s okay, just not two...
                        r'[^\\]\\[{}]',  # ...and } is okay if it's escaped

                        # allow / if it's preceded with one of these symbols
                        # (ragel EOF actions)
                        r'(>|\$|%|<|@|<>)/',

                        # specifically allow regex followed immediately by *
                        # so it doesn't get mistaken for a comment
                        r'/(?!\*)(\\\\|\\/|[^/])*/\*',

                        # allow / as long as it's not followed by another / or by a *
                        r'/(?=[^/*]|$)',

                        # We want to match as many of these as we can in one block.
                        # Not sure if we need the + sign here,
                        # does it help performance?
                    )) + r')+',

                    # strings and comments may safely contain unsafe characters
                    r'"(\\\\|\\"|[^"])*"',  # double quote string
                    r"'(\\\\|\\'|[^'])*'",  # single quote string
                    r"\[(\\\\|\\\]|[^\]])*\]",  # square bracket literal
                    r'/\*(.|\n)*?\*/',  # multi-line javadoc-style comment
                    r'//.*$\n?',  # single line comment
                    r'\#.*$\n?',  # ruby/ragel comment
                )) + r')+',
                using(RagelLexer)),
            (r'\}%%', Punctuation, '#pop'),
        ]
    }

    def analyse_text(text):
        return '@LANG: indep' in text
Exemplo n.º 23
0
class NemerleLexer(RegexLexer):
    """
    For `Nemerle <http://nemerle.org>`_ source code.

    Additional options accepted:

    `unicodelevel`
      Determines which Unicode characters this lexer allows for identifiers.
      The possible values are:

      * ``none`` -- only the ASCII letters and numbers are allowed. This
        is the fastest selection.
      * ``basic`` -- all Unicode characters from the specification except
        category ``Lo`` are allowed.
      * ``full`` -- all Unicode characters as specified in the C# specs
        are allowed.  Note that this means a considerable slowdown since the
        ``Lo`` category has more than 40,000 characters in it!

      The default value is ``basic``.

    .. versionadded:: 1.5
    """

    name = 'Nemerle'
    aliases = ['nemerle']
    filenames = ['*.n']
    mimetypes = ['text/x-nemerle']  # inferred

    flags = re.MULTILINE | re.DOTALL | re.UNICODE

    # for the range of allowed unicode characters in identifiers, see
    # http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-334.pdf

    levels = {
        'none':
        r'@?[_a-zA-Z]\w*',
        'basic': ('@?[_' + uni.combine('Lu', 'Ll', 'Lt', 'Lm', 'Nl') + ']' +
                  '[' + uni.combine('Lu', 'Ll', 'Lt', 'Lm', 'Nl', 'Nd', 'Pc',
                                    'Cf', 'Mn', 'Mc') + ']*'),
        'full':
        ('@?(?:_|[^' + uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl') +
         '])' + '[^' + uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', 'Nd',
                                     'Pc', 'Cf', 'Mn', 'Mc') + ']*'),
    }

    tokens = {}
    token_variants = True

    for levelname, cs_ident in iteritems(levels):
        tokens[levelname] = {
            'root': [
                # method names
                (
                    r'^([ \t]*(?:' + cs_ident +
                    r'(?:\[\])?\s+)+?)'  # return type
                    r'(' + cs_ident + ')'  # method name
                    r'(\s*)(\()',  # signature start
                    bygroups(using(this), Name.Function, Text, Punctuation)),
                (r'^\s*\[.*?\]', Name.Attribute),
                (r'[^\S\n]+', Text),
                (r'\\\n', Text),  # line continuation
                (r'//.*?\n', Comment.Single),
                (r'/[*].*?[*]/', Comment.Multiline),
                (r'\n', Text),
                (r'\$\s*"', String, 'splice-string'),
                (r'\$\s*<#', String, 'splice-string2'),
                (r'<#', String, 'recursive-string'),
                (r'(<\[)\s*(' + cs_ident + ':)?', Keyword),
                (r'\]\>', Keyword),

                # quasiquotation only
                (r'\$' + cs_ident, Name),
                (r'(\$)(\()', bygroups(Name,
                                       Punctuation), 'splice-string-content'),
                (r'[~!%^&*()+=|\[\]:;,.<>/?-]', Punctuation),
                (r'[{}]', Punctuation),
                (r'@"(""|[^"])*"', String),
                (r'"(\\\\|\\"|[^"\n])*["\n]', String),
                (r"'\\.'|'[^\\]'", String.Char),
                (r"0[xX][0-9a-fA-F]+[Ll]?", Number),
                (r"[0-9](\.[0-9]*)?([eE][+-][0-9]+)?[flFLdD]?", Number),
                (r'#[ \t]*(if|endif|else|elif|define|undef|'
                 r'line|error|warning|region|endregion|pragma)\b.*?\n',
                 Comment.Preproc),
                (r'\b(extern)(\s+)(alias)\b', bygroups(Keyword, Text,
                                                       Keyword)),
                (r'(abstract|and|as|base|catch|def|delegate|'
                 r'enum|event|extern|false|finally|'
                 r'fun|implements|interface|internal|'
                 r'is|macro|match|matches|module|mutable|new|'
                 r'null|out|override|params|partial|private|'
                 r'protected|public|ref|sealed|static|'
                 r'syntax|this|throw|true|try|type|typeof|'
                 r'virtual|volatile|when|where|with|'
                 r'assert|assert2|async|break|checked|continue|do|else|'
                 r'ensures|for|foreach|if|late|lock|new|nolate|'
                 r'otherwise|regexp|repeat|requires|return|surroundwith|'
                 r'unchecked|unless|using|while|yield)\b', Keyword),
                (r'(global)(::)', bygroups(Keyword, Punctuation)),
                (r'(bool|byte|char|decimal|double|float|int|long|object|sbyte|'
                 r'short|string|uint|ulong|ushort|void|array|list)\b\??',
                 Keyword.Type),
                (r'(:>?)\s*(' + cs_ident + r'\??)',
                 bygroups(Punctuation, Keyword.Type)),
                (r'(class|struct|variant|module)(\s+)',
                 bygroups(Keyword, Text), 'class'),
                (r'(namespace|using)(\s+)', bygroups(Keyword,
                                                     Text), 'namespace'),
                (cs_ident, Name),
            ],
            'class': [(cs_ident, Name.Class, '#pop')],
            'namespace': [
                (r'(?=\()', Text, '#pop'),  # using (resource)
                ('(' + cs_ident + r'|\.)+', Name.Namespace, '#pop')
            ],
            'splice-string':
            [(r'[^"$]', String), (r'\$' + cs_ident, Name),
             (r'(\$)(\()', bygroups(Name,
                                    Punctuation), 'splice-string-content'),
             (r'\\"', String), (r'"', String, '#pop')],
            'splice-string2':
            [(r'[^#<>$]', String), (r'\$' + cs_ident, Name),
             (r'(\$)(\()', bygroups(Name,
                                    Punctuation), 'splice-string-content'),
             (r'<#', String, '#push'), (r'#>', String, '#pop')],
            'recursive-string': [(r'[^#<>]', String), (r'<#', String, '#push'),
                                 (r'#>', String, '#pop')],
            'splice-string-content':
            [(r'if|match', Keyword),
             (r'[~!%^&*+=|\[\]:;,.<>/?-\\"$ ]', Punctuation), (cs_ident, Name),
             (r'\d+', Number), (r'\(', Punctuation, '#push'),
             (r'\)', Punctuation, '#pop')]
        }

    def __init__(self, **options):
        level = get_choice_opt(options, 'unicodelevel', list(self.tokens),
                               'basic')
        if level not in self._all_tokens:
            # compile the regexes now
            self._tokens = self.__class__.process_tokendef(level)
        else:
            self._tokens = self._all_tokens[level]

        RegexLexer.__init__(self, **options)
Exemplo n.º 24
0
class ThriftLexer(RegexLexer):
    """
    For `Thrift <https://thrift.apache.org/>`__ interface definitions.

    .. versionadded:: 2.1
    """
    name = 'Thrift'
    aliases = ['thrift']
    filenames = ['*.thrift']
    mimetypes = ['application/x-thrift']

    tokens = {
        'root': [
            include('whitespace'),
            include('comments'),
            (r'"', String.Double, combined('stringescape', 'dqs')),
            (r'\'', String.Single, combined('stringescape', 'sqs')),
            (r'(namespace)(\s+)', bygroups(Keyword.Namespace,
                                           Text.Whitespace), 'namespace'),
            (r'(enum|union|struct|service|exception)(\s+)',
             bygroups(Keyword.Declaration, Text.Whitespace), 'class'),
            (
                r'((?:(?:[^\W\d]|\$)[\w.\[\]$<>]*\s+)+?)'  # return arguments
                r'((?:[^\W\d]|\$)[\w$]*)'  # method name
                r'(\s*)(\()',  # signature start
                bygroups(using(this), Name.Function, Text, Operator)),
            include('keywords'),
            include('numbers'),
            (r'[&=]', Operator),
            (r'[:;,{}()<>\[\]]', Punctuation),
            (r'[a-zA-Z_](\.\w|\w)*', Name),
        ],
        'whitespace': [
            (r'\n', Text.Whitespace),
            (r'\s+', Text.Whitespace),
        ],
        'comments': [
            (r'#.*$', Comment),
            (r'//.*?\n', Comment),
            (r'/\*[\w\W]*?\*/', Comment.Multiline),
        ],
        'stringescape': [
            (r'\\([\\nrt"\'])', String.Escape),
        ],
        'dqs': [
            (r'"', String.Double, '#pop'),
            (r'[^\\"\n]+', String.Double),
        ],
        'sqs': [
            (r"'", String.Single, '#pop'),
            (r'[^\\\'\n]+', String.Single),
        ],
        'namespace': [
            (r'[a-z*](\.\w|\w)*', Name.Namespace, '#pop'),
            default('#pop'),
        ],
        'class': [
            (r'[a-zA-Z_]\w*', Name.Class, '#pop'),
            default('#pop'),
        ],
        'keywords': [
            (r'(async|oneway|extends|throws|required|optional)\b', Keyword),
            (r'(true|false)\b', Keyword.Constant),
            (r'(const|typedef)\b', Keyword.Declaration),
            (words(('cpp_namespace', 'cpp_include', 'cpp_type', 'java_package',
                    'cocoa_prefix', 'csharp_namespace', 'delphi_namespace',
                    'php_namespace', 'py_module', 'perl_package',
                    'ruby_namespace', 'smalltalk_category', 'smalltalk_prefix',
                    'xsd_all', 'xsd_optional', 'xsd_nillable', 'xsd_namespace',
                    'xsd_attrs', 'include'),
                   suffix=r'\b'), Keyword.Namespace),
            (words(
                ('void', 'bool', 'byte', 'i16', 'i32', 'i64', 'double',
                 'string', 'binary', 'map', 'list', 'set', 'slist', 'senum'),
                suffix=r'\b'), Keyword.Type),
            (words(
                ('BEGIN', 'END', '__CLASS__', '__DIR__', '__FILE__',
                 '__FUNCTION__', '__LINE__', '__METHOD__', '__NAMESPACE__',
                 'abstract', 'alias', 'and', 'args', 'as', 'assert', 'begin',
                 'break', 'case', 'catch', 'class', 'clone', 'continue',
                 'declare', 'def', 'default', 'del', 'delete', 'do', 'dynamic',
                 'elif', 'else', 'elseif', 'elsif', 'end', 'enddeclare',
                 'endfor', 'endforeach', 'endif', 'endswitch', 'endwhile',
                 'ensure', 'except', 'exec', 'finally', 'float', 'for',
                 'foreach', 'function', 'global', 'goto', 'if', 'implements',
                 'import', 'in', 'inline', 'instanceof', 'interface', 'is',
                 'lambda', 'module', 'native', 'new', 'next', 'nil', 'not',
                 'or', 'pass', 'public', 'print', 'private', 'protected',
                 'raise', 'redo', 'rescue', 'retry', 'register', 'return',
                 'self', 'sizeof', 'static', 'super', 'switch', 'synchronized',
                 'then', 'this', 'throw', 'transient', 'try', 'undef',
                 'unless', 'unsigned', 'until', 'use', 'var', 'virtual',
                 'volatile', 'when', 'while', 'with', 'xor', 'yield'),
                prefix=r'\b',
                suffix=r'\b'), Keyword.Reserved),
        ],
        'numbers': [
            (r'[+-]?(\d+\.\d+([eE][+-]?\d+)?|\.?\d+[eE][+-]?\d+)',
             Number.Float),
            (r'[+-]?0x[0-9A-Fa-f]+', Number.Hex),
            (r'[+-]?[0-9]+', Number.Integer),
        ],
    }
Exemplo n.º 25
0
class AdlLexer(AtomsLexer):
    """
    Lexer for ADL syntax.

    .. versionadded:: 2.1
    """

    name = 'ADL'
    aliases = ['adl']
    filenames = ['*.adl', '*.adls', '*.adlf', '*.adlx']

    tokens = {
        'whitespace': [
            # blank line ends
            (r'\s*\n', Text),
            # comment-only line
            (r'^[ \t]*--.*$', Comment),
        ],
        'odin_section': [
            # repeating the following two rules from the root state enable multi-line
            # strings that start in the first column to be dealt with
            (r'^(language|description|ontology|terminology|annotations|'
             r'component_terminologies|revision_history)[ \t]*\n',
             Generic.Heading),
            (r'^(definition)[ \t]*\n', Generic.Heading, 'cadl_section'),
            (r'^([ \t]*|[ \t]+.*)\n', using(OdinLexer)),
            (r'^([^"]*")(>[ \t]*\n)', bygroups(String, Punctuation)),
            # template overlay delimiter
            (r'^----------*\n', Text, '#pop'),
            (r'^.*\n', String),
            default('#pop'),
        ],
        'cadl_section': [
            (r'^([ \t]*|[ \t]+.*)\n', using(CadlLexer)),
            default('#pop'),
        ],
        'rules_section': [
            (r'^[ \t]+.*\n', using(CadlLexer)),
            default('#pop'),
        ],
        'metadata': [
            (r'\)', Punctuation, '#pop'),
            (r';', Punctuation),
            (r'([Tt]rue|[Ff]alse)', Literal),
            # numbers and version ids
            (r'\d+(\.\d+)*', Literal),
            # Guids
            (r'(\d|[a-fA-F])+(-(\d|[a-fA-F])+){3,}', Literal),
            (r'\w+', Name.Class),
            (r'"', String, 'string'),
            (r'=', Operator),
            (r'[ \t]+', Text),
            default('#pop'),
        ],
        'root': [
            (r'^(archetype|template_overlay|operational_template|template|'
             r'speciali[sz]e)', Generic.Heading),
            (r'^(language|description|ontology|terminology|annotations|'
             r'component_terminologies|revision_history)[ \t]*\n',
             Generic.Heading, 'odin_section'),
            (r'^(definition)[ \t]*\n', Generic.Heading, 'cadl_section'),
            (r'^(rules)[ \t]*\n', Generic.Heading, 'rules_section'),
            include('archetype_id'),
            (r'[ \t]*\(', Punctuation, 'metadata'),
            include('whitespace'),
        ],
    }
Exemplo n.º 26
0
class PhpLexer(RegexLexer):
    """
    For `PHP <http://www.php.net/>`_ source code.
    For PHP embedded in HTML, use the `HtmlPhpLexer`.

    Additional options accepted:

    `startinline`
        If given and ``True`` the lexer starts highlighting with
        php code (i.e.: no starting ``<?php`` required).  The default
        is ``False``.
    `funcnamehighlighting`
        If given and ``True``, highlight builtin function names
        (default: ``True``).
    `disabledmodules`
        If given, must be a list of module names whose function names
        should not be highlighted. By default all modules are highlighted
        except the special ``'unknown'`` module that includes functions
        that are known to php but are undocumented.

        To get a list of allowed modules have a look into the
        `_php_builtins` module:

        .. sourcecode:: pycon

            >>> from testflows._core.contrib.pygments.lexers._php_builtins import MODULES
            >>> MODULES.keys()
            ['PHP Options/Info', 'Zip', 'dba', ...]

        In fact the names of those modules match the module names from
        the php documentation.
    """

    name = 'PHP'
    aliases = ['php', 'php3', 'php4', 'php5']
    filenames = ['*.php', '*.php[345]', '*.inc']
    mimetypes = ['text/x-php']

    # Note that a backslash is included in the following two patterns
    # PHP uses a backslash as a namespace separator
    _ident_char = r'[\\\w]|[^\x00-\x7f]'
    _ident_begin = r'(?:[\\_a-z]|[^\x00-\x7f])'
    _ident_end = r'(?:' + _ident_char + ')*'
    _ident_inner = _ident_begin + _ident_end

    flags = re.IGNORECASE | re.DOTALL | re.MULTILINE
    tokens = {
        'root': [(r'<\?(php)?', Comment.Preproc, 'php'), (r'[^<]+', Other),
                 (r'<', Other)],
        'php': [
            (r'\?>', Comment.Preproc, '#pop'),
            (r'(<<<)([\'"]?)(' + _ident_inner + r')(\2\n.*?\n\s*)(\3)(;?)(\n)',
             bygroups(String, String, String.Delimiter, String,
                      String.Delimiter, Punctuation, Text)),
            (r'\s+', Text),
            (r'#.*?\n', Comment.Single),
            (r'//.*?\n', Comment.Single),
            # put the empty comment here, it is otherwise seen as
            # the start of a docstring
            (r'/\*\*/', Comment.Multiline),
            (r'/\*\*.*?\*/', String.Doc),
            (r'/\*.*?\*/', Comment.Multiline),
            (r'(->|::)(\s*)(' + _ident_inner + ')',
             bygroups(Operator, Text, Name.Attribute)),
            (r'[~!%^&*+=|:.<>/@-]+', Operator),
            (r'\?', Operator),  # don't add to the charclass above!
            (r'[\[\]{}();,]+', Punctuation),
            (r'(class)(\s+)', bygroups(Keyword, Text), 'classname'),
            (r'(function)(\s*)(?=\()', bygroups(Keyword, Text)),
            (r'(function)(\s+)(&?)(\s*)',
             bygroups(Keyword, Text, Operator, Text), 'functionname'),
            (r'(const)(\s+)(' + _ident_inner + ')',
             bygroups(Keyword, Text, Name.Constant)),
            (r'(and|E_PARSE|old_function|E_ERROR|or|as|E_WARNING|parent|'
             r'eval|PHP_OS|break|exit|case|extends|PHP_VERSION|cfunction|'
             r'FALSE|print|for|require|continue|foreach|require_once|'
             r'declare|return|default|static|do|switch|die|stdClass|'
             r'echo|else|TRUE|elseif|var|empty|if|xor|enddeclare|include|'
             r'virtual|endfor|include_once|while|endforeach|global|'
             r'endif|list|endswitch|new|endwhile|not|'
             r'array|E_ALL|NULL|final|php_user_filter|interface|'
             r'implements|public|private|protected|abstract|clone|try|'
             r'catch|throw|this|use|namespace|trait|yield|'
             r'finally)\b', Keyword),
            (r'(true|false|null)\b', Keyword.Constant),
            include('magicconstants'),
            (r'\$\{\$+' + _ident_inner + r'\}', Name.Variable),
            (r'\$+' + _ident_inner, Name.Variable),
            (_ident_inner, Name.Other),
            (r'(\d+\.\d*|\d*\.\d+)(e[+-]?[0-9]+)?', Number.Float),
            (r'\d+e[+-]?[0-9]+', Number.Float),
            (r'0[0-7]+', Number.Oct),
            (r'0x[a-f0-9]+', Number.Hex),
            (r'\d+', Number.Integer),
            (r'0b[01]+', Number.Bin),
            (r"'([^'\\]*(?:\\.[^'\\]*)*)'", String.Single),
            (r'`([^`\\]*(?:\\.[^`\\]*)*)`', String.Backtick),
            (r'"', String.Double, 'string'),
        ],
        'magicfuncs': [
            # source: http://php.net/manual/en/language.oop5.magic.php
            (words((
                '__construct',
                '__destruct',
                '__call',
                '__callStatic',
                '__get',
                '__set',
                '__isset',
                '__unset',
                '__sleep',
                '__wakeup',
                '__toString',
                '__invoke',
                '__set_state',
                '__clone',
                '__debugInfo',
            ),
                   suffix=r'\b'), Name.Function.Magic),
        ],
        'magicconstants': [
            # source: http://php.net/manual/en/language.constants.predefined.php
            (words((
                '__LINE__',
                '__FILE__',
                '__DIR__',
                '__FUNCTION__',
                '__CLASS__',
                '__TRAIT__',
                '__METHOD__',
                '__NAMESPACE__',
            ),
                   suffix=r'\b'), Name.Constant),
        ],
        'classname': [(_ident_inner, Name.Class, '#pop')],
        'functionname': [
            include('magicfuncs'), (_ident_inner, Name.Function, '#pop'),
            default('#pop')
        ],
        'string':
        [(r'"', String.Double, '#pop'), (r'[^{$"\\]+', String.Double),
         (r'\\([nrt"$\\]|[0-7]{1,3}|x[0-9a-f]{1,2})', String.Escape),
         (r'\$' + _ident_inner + r'(\[\S+?\]|->' + _ident_inner + ')?',
          String.Interpol),
         (r'(\{\$\{)(.*?)(\}\})',
          bygroups(String.Interpol, using(this, _startinline=True),
                   String.Interpol)),
         (r'(\{)(\$.*?)(\})',
          bygroups(String.Interpol, using(this, _startinline=True),
                   String.Interpol)),
         (r'(\$\{)(\S+)(\})',
          bygroups(String.Interpol, Name.Variable, String.Interpol)),
         (r'[${\\]', String.Double)],
    }

    def __init__(self, **options):
        self.funcnamehighlighting = get_bool_opt(options,
                                                 'funcnamehighlighting', True)
        self.disabledmodules = get_list_opt(options, 'disabledmodules',
                                            ['unknown'])
        self.startinline = get_bool_opt(options, 'startinline', False)

        # private option argument for the lexer itself
        if '_startinline' in options:
            self.startinline = options.pop('_startinline')

        # collect activated functions in a set
        self._functions = set()
        if self.funcnamehighlighting:
            from testflows._core.contrib.pygments.lexers._php_builtins import MODULES
            for key, value in iteritems(MODULES):
                if key not in self.disabledmodules:
                    self._functions.update(value)
        RegexLexer.__init__(self, **options)

    def get_tokens_unprocessed(self, text):
        stack = ['root']
        if self.startinline:
            stack.append('php')
        for index, token, value in \
                RegexLexer.get_tokens_unprocessed(self, text, stack):
            if token is Name.Other:
                if value in self._functions:
                    yield index, Name.Builtin, value
                    continue
            yield index, token, value

    def analyse_text(text):
        if shebang_matches(text, r'php'):
            return True
        rv = 0.0
        if re.search(r'<\?(?!xml)', text):
            rv += 0.3
        return rv
Exemplo n.º 27
0
    class GeneratedObjectiveCVariant(baselexer):
        """
        Implements Objective-C syntax on top of an existing C family lexer.
        """

        tokens = {
            'statements': [
                (r'@"', String, 'string'),
                (r'@(YES|NO)', Number),
                (r"@'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'", String.Char),
                (r'@(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[lL]?', Number.Float),
                (r'@(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float),
                (r'@0x[0-9a-fA-F]+[Ll]?', Number.Hex),
                (r'@0[0-7]+[Ll]?', Number.Oct),
                (r'@\d+[Ll]?', Number.Integer),
                (r'@\(', Literal, 'literal_number'),
                (r'@\[', Literal, 'literal_array'),
                (r'@\{', Literal, 'literal_dictionary'),
                (words((
                    '@selector', '@private', '@protected', '@public', '@encode',
                    '@synchronized', '@try', '@throw', '@catch', '@finally',
                    '@end', '@property', '@synthesize', '__bridge', '__bridge_transfer',
                    '__autoreleasing', '__block', '__weak', '__strong', 'weak', 'strong',
                    'copy', 'retain', 'assign', 'unsafe_unretained', 'atomic', 'nonatomic',
                    'readonly', 'readwrite', 'setter', 'getter', 'typeof', 'in',
                    'out', 'inout', 'release', 'class', '@dynamic', '@optional',
                    '@required', '@autoreleasepool', '@import'), suffix=r'\b'),
                 Keyword),
                (words(('id', 'instancetype', 'Class', 'IMP', 'SEL', 'BOOL',
                        'IBOutlet', 'IBAction', 'unichar'), suffix=r'\b'),
                 Keyword.Type),
                (r'@(true|false|YES|NO)\n', Name.Builtin),
                (r'(YES|NO|nil|self|super)\b', Name.Builtin),
                # Carbon types
                (r'(Boolean|UInt8|SInt8|UInt16|SInt16|UInt32|SInt32)\b', Keyword.Type),
                # Carbon built-ins
                (r'(TRUE|FALSE)\b', Name.Builtin),
                (r'(@interface|@implementation)(\s+)', bygroups(Keyword, Text),
                 ('#pop', 'oc_classname')),
                (r'(@class|@protocol)(\s+)', bygroups(Keyword, Text),
                 ('#pop', 'oc_forward_classname')),
                # @ can also prefix other expressions like @{...} or @(...)
                (r'@', Punctuation),
                inherit,
            ],
            'oc_classname': [
                # interface definition that inherits
                (r'([a-zA-Z$_][\w$]*)(\s*:\s*)([a-zA-Z$_][\w$]*)?(\s*)(\{)',
                 bygroups(Name.Class, Text, Name.Class, Text, Punctuation),
                 ('#pop', 'oc_ivars')),
                (r'([a-zA-Z$_][\w$]*)(\s*:\s*)([a-zA-Z$_][\w$]*)?',
                 bygroups(Name.Class, Text, Name.Class), '#pop'),
                # interface definition for a category
                (r'([a-zA-Z$_][\w$]*)(\s*)(\([a-zA-Z$_][\w$]*\))(\s*)(\{)',
                 bygroups(Name.Class, Text, Name.Label, Text, Punctuation),
                 ('#pop', 'oc_ivars')),
                (r'([a-zA-Z$_][\w$]*)(\s*)(\([a-zA-Z$_][\w$]*\))',
                 bygroups(Name.Class, Text, Name.Label), '#pop'),
                # simple interface / implementation
                (r'([a-zA-Z$_][\w$]*)(\s*)(\{)',
                 bygroups(Name.Class, Text, Punctuation), ('#pop', 'oc_ivars')),
                (r'([a-zA-Z$_][\w$]*)', Name.Class, '#pop')
            ],
            'oc_forward_classname': [
                (r'([a-zA-Z$_][\w$]*)(\s*,\s*)',
                 bygroups(Name.Class, Text), 'oc_forward_classname'),
                (r'([a-zA-Z$_][\w$]*)(\s*;?)',
                 bygroups(Name.Class, Text), '#pop')
            ],
            'oc_ivars': [
                include('whitespace'),
                include('statements'),
                (';', Punctuation),
                (r'\{', Punctuation, '#push'),
                (r'\}', Punctuation, '#pop'),
            ],
            'root': [
                # methods
                (r'^([-+])(\s*)'                         # method marker
                 r'(\(.*?\))?(\s*)'                      # return type
                 r'([a-zA-Z$_][\w$]*:?)',        # begin of method name
                 bygroups(Punctuation, Text, using(this),
                          Text, Name.Function),
                 'method'),
                inherit,
            ],
            'method': [
                include('whitespace'),
                # TODO unsure if ellipses are allowed elsewhere, see
                # discussion in Issue 789
                (r',', Punctuation),
                (r'\.\.\.', Punctuation),
                (r'(\(.*?\))(\s*)([a-zA-Z$_][\w$]*)',
                 bygroups(using(this), Text, Name.Variable)),
                (r'[a-zA-Z$_][\w$]*:', Name.Function),
                (';', Punctuation, '#pop'),
                (r'\{', Punctuation, 'function'),
                default('#pop'),
            ],
            'literal_number': [
                (r'\(', Punctuation, 'literal_number_inner'),
                (r'\)', Literal, '#pop'),
                include('statement'),
            ],
            'literal_number_inner': [
                (r'\(', Punctuation, '#push'),
                (r'\)', Punctuation, '#pop'),
                include('statement'),
            ],
            'literal_array': [
                (r'\[', Punctuation, 'literal_array_inner'),
                (r'\]', Literal, '#pop'),
                include('statement'),
            ],
            'literal_array_inner': [
                (r'\[', Punctuation, '#push'),
                (r'\]', Punctuation, '#pop'),
                include('statement'),
            ],
            'literal_dictionary': [
                (r'\}', Literal, '#pop'),
                include('statement'),
            ],
        }

        def analyse_text(text):
            if _oc_keywords.search(text):
                return 1.0
            elif '@"' in text:  # strings
                return 0.8
            elif re.search('@[0-9]+', text):
                return 0.7
            elif _oc_message.search(text):
                return 0.8
            return 0

        def get_tokens_unprocessed(self, text):
            from testflows._core.contrib.pygments.lexers._cocoa_builtins import COCOA_INTERFACES, \
                COCOA_PROTOCOLS, COCOA_PRIMITIVES

            for index, token, value in \
                    baselexer.get_tokens_unprocessed(self, text):
                if token is Name or token is Name.Class:
                    if value in COCOA_INTERFACES or value in COCOA_PROTOCOLS \
                       or value in COCOA_PRIMITIVES:
                        token = Name.Builtin.Pseudo

                yield index, token, value
Exemplo n.º 28
0
class RPMSpecLexer(RegexLexer):
    """
    For RPM ``.spec`` files.

    .. versionadded:: 1.6
    """

    name = 'RPMSpec'
    aliases = ['spec']
    filenames = ['*.spec']
    mimetypes = ['text/x-rpm-spec']

    _directives = ('(?:package|prep|build|install|clean|check|pre[a-z]*|'
                   'post[a-z]*|trigger[a-z]*|files)')

    tokens = {
        'root': [
            (r'#.*\n', Comment),
            include('basic'),
        ],
        'description': [
            (r'^(%' + _directives + ')(.*)$', bygroups(Name.Decorator,
                                                       Text), '#pop'),
            (r'\n', Text),
            (r'.', Text),
        ],
        'changelog': [
            (r'\*.*\n', Generic.Subheading),
            (r'^(%' + _directives + ')(.*)$', bygroups(Name.Decorator,
                                                       Text), '#pop'),
            (r'\n', Text),
            (r'.', Text),
        ],
        'string': [
            (r'"', String.Double, '#pop'),
            (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|[0-7]{1,3})',
             String.Escape),
            include('interpol'),
            (r'.', String.Double),
        ],
        'basic': [
            include('macro'),
            (r'(?i)^(Name|Version|Release|Epoch|Summary|Group|License|Packager|'
             r'Vendor|Icon|URL|Distribution|Prefix|Patch[0-9]*|Source[0-9]*|'
             r'Requires\(?[a-z]*\)?|[a-z]+Req|Obsoletes|Suggests|Provides|Conflicts|'
             r'Build[a-z]+|[a-z]+Arch|Auto[a-z]+)(:)(.*)$',
             bygroups(Generic.Heading, Punctuation, using(this))),
            (r'^%description', Name.Decorator, 'description'),
            (r'^%changelog', Name.Decorator, 'changelog'),
            (r'^(%' + _directives + ')(.*)$', bygroups(Name.Decorator, Text)),
            (r'%(attr|defattr|dir|doc(?:dir)?|setup|config(?:ure)?|'
             r'make(?:install)|ghost|patch[0-9]+|find_lang|exclude|verify)',
             Keyword),
            include('interpol'),
            (r"'.*?'", String.Single),
            (r'"', String.Double, 'string'),
            (r'.', Text),
        ],
        'macro': [
            (r'%define.*\n', Comment.Preproc),
            (r'%\{\!\?.*%define.*\}', Comment.Preproc),
            (r'(%(?:if(?:n?arch)?|else(?:if)?|endif))(.*)$',
             bygroups(Comment.Preproc, Text)),
        ],
        'interpol': [
            (r'%\{?__[a-z_]+\}?', Name.Function),
            (r'%\{?_([a-z_]+dir|[a-z_]+path|prefix)\}?', Keyword.Pseudo),
            (r'%\{\?\w+\}', Name.Variable),
            (r'\$\{?RPM_[A-Z0-9_]+\}?', Name.Variable.Global),
            (r'%\{[a-zA-Z]\w+\}', Keyword.Constant),
        ]
    }
Exemplo n.º 29
0
class AmplLexer(RegexLexer):
    """
    For `AMPL <http://ampl.com/>`_ source code.

    .. versionadded:: 2.2
    """
    name = 'Ampl'
    aliases = ['ampl']
    filenames = ['*.run']

    tokens = {
        'root': [
            (r'\n', Text),
            (r'\s+', Text.Whitespace),
            (r'#.*?\n', Comment.Single),
            (r'/[*](.|\n)*?[*]/', Comment.Multiline),
            (words(
                ('call', 'cd', 'close', 'commands', 'data', 'delete',
                 'display', 'drop', 'end', 'environ', 'exit', 'expand',
                 'include', 'load', 'model', 'objective', 'option', 'problem',
                 'purge', 'quit', 'redeclare', 'reload', 'remove', 'reset',
                 'restore', 'shell', 'show', 'solexpand', 'solution', 'solve',
                 'update', 'unload', 'xref', 'coeff', 'coef', 'cover', 'obj',
                 'interval', 'default', 'from', 'to', 'to_come', 'net_in',
                 'net_out', 'dimen', 'dimension', 'check', 'complements',
                 'write', 'function', 'pipe', 'format', 'if', 'then', 'else',
                 'in', 'while', 'repeat', 'for'),
                suffix=r'\b'), Keyword.Reserved),
            (r'(integer|binary|symbolic|ordered|circular|reversed|INOUT|IN|OUT|LOCAL)',
             Keyword.Type),
            (r'\".*?\"', String.Double),
            (r'\'.*?\'', String.Single),
            (r'[()\[\]{},;:]+', Punctuation),
            (r'\b(\w+)(\.)(astatus|init0|init|lb0|lb1|lb2|lb|lrc|'
             r'lslack|rc|relax|slack|sstatus|status|ub0|ub1|ub2|'
             r'ub|urc|uslack|val)',
             bygroups(Name.Variable, Punctuation, Keyword.Reserved)),
            (r'(set|param|var|arc|minimize|maximize|subject to|s\.t\.|subj to|'
             r'node|table|suffix|read table|write table)(\s+)(\w+)',
             bygroups(Keyword.Declaration, Text, Name.Variable)),
            (r'(param)(\s*)(:)(\s*)(\w+)(\s*)(:)(\s*)((\w|\s)+)',
             bygroups(Keyword.Declaration, Text, Punctuation, Text,
                      Name.Variable, Text, Punctuation, Text, Name.Variable)),
            (r'(let|fix|unfix)(\s*)((?:\{.*\})?)(\s*)(\w+)',
             bygroups(Keyword.Declaration, Text, using(this), Text,
                      Name.Variable)),
            (words(('abs', 'acos', 'acosh', 'alias', 'asin', 'asinh', 'atan',
                    'atan2', 'atanh', 'ceil', 'ctime', 'cos', 'exp', 'floor',
                    'log', 'log10', 'max', 'min', 'precision', 'round', 'sin',
                    'sinh', 'sqrt', 'tan', 'tanh', 'time', 'trunc', 'Beta',
                    'Cauchy', 'Exponential', 'Gamma', 'Irand224', 'Normal',
                    'Normal01', 'Poisson', 'Uniform', 'Uniform01', 'num',
                    'num0', 'ichar', 'char', 'length', 'substr', 'sprintf',
                    'match', 'sub', 'gsub', 'print', 'printf', 'next', 'nextw',
                    'prev', 'prevw', 'first', 'last', 'ord', 'ord0', 'card',
                    'arity', 'indexarity'),
                   prefix=r'\b',
                   suffix=r'\b'), Name.Builtin),
            (r'(\+|\-|\*|/|\*\*|=|<=|>=|==|\||\^|<|>|\!|\.\.|:=|\&|\!=|<<|>>)',
             Operator),
            (words(('or', 'exists', 'forall', 'and', 'in', 'not', 'within',
                    'union', 'diff', 'difference', 'symdiff', 'inter',
                    'intersect', 'intersection', 'cross', 'setof', 'by',
                    'less', 'sum', 'prod', 'product', 'div', 'mod'),
                   suffix=r'\b'), Keyword.Reserved
             ),  # Operator.Name but not enough emphasized with that
            (r'(\d+\.(?!\.)\d*|\.(?!.)\d+)([eE][+-]?\d+)?', Number.Float),
            (r'\d+([eE][+-]?\d+)?', Number.Integer),
            (r'[+-]?Infinity', Number.Integer),
            (r'(\w+|(\.(?!\.)))', Text)
        ]
    }
Exemplo n.º 30
0
class CsoundOrchestraLexer(CsoundLexer):
    """
    For `Csound <https://csound.com>`_ orchestras.

    .. versionadded:: 2.1
    """

    name = 'Csound Orchestra'
    aliases = ['csound', 'csound-orc']
    filenames = ['*.orc', '*.udo']

    user_defined_opcodes = set()

    def opcode_name_callback(lexer, match):
        opcode = match.group(0)
        lexer.user_defined_opcodes.add(opcode)
        yield match.start(), Name.Function, opcode

    def name_callback(lexer, match):
        type_annotation_token = Keyword.Type

        name = match.group(1)
        if name in OPCODES or name in DEPRECATED_OPCODES:
            yield match.start(), Name.Builtin, name
        elif name in lexer.user_defined_opcodes:
            yield match.start(), Name.Function, name
        else:
            type_annotation_token = Name
            name_match = re.search(r'^(g?[afikSw])(\w+)', name)
            if name_match:
                yield name_match.start(1), Keyword.Type, name_match.group(1)
                yield name_match.start(2), Name, name_match.group(2)
            else:
                yield match.start(), Name, name

        if match.group(2):
            yield match.start(2), Punctuation, match.group(2)
            yield match.start(3), type_annotation_token, match.group(3)

    tokens = {
        'root': [
            (r'\n', Text),

            (r'^([ \t]*)(\w+)(:)(?:[ \t]+|$)', bygroups(Text, Name.Label, Punctuation)),

            include('whitespace and macro uses'),
            include('preprocessor directives'),

            (r'\binstr\b', Keyword.Declaration, 'instrument numbers and identifiers'),
            (r'\bopcode\b', Keyword.Declaration, 'after opcode keyword'),
            (r'\b(?:end(?:in|op))\b', Keyword.Declaration),

            include('partial statements')
        ],

        'partial statements': [
            (r'\b(?:0dbfs|A4|k(?:r|smps)|nchnls(?:_i)?|sr)\b', Name.Variable.Global),

            include('numbers'),

            (r'\+=|-=|\*=|/=|<<|>>|<=|>=|==|!=|&&|\|\||[~¬]|[=!+\-*/^%&|<>#?:]', Operator),
            (r'[(),\[\]]', Punctuation),

            (r'"', String, 'quoted string'),
            (r'\{\{', String, 'braced string'),

            (words((
                'do', 'else', 'elseif', 'endif', 'enduntil', 'fi', 'if', 'ithen', 'kthen',
                'od', 'then', 'until', 'while',
                ), prefix=r'\b', suffix=r'\b'), Keyword),
            (words(('return', 'rireturn'), prefix=r'\b', suffix=r'\b'), Keyword.Pseudo),

            (r'\b[ik]?goto\b', Keyword, 'goto label'),
            (r'\b(r(?:einit|igoto)|tigoto)(\(|\b)', bygroups(Keyword.Pseudo, Punctuation),
             'goto label'),
            (r'\b(c(?:g|in?|k|nk?)goto)(\(|\b)', bygroups(Keyword.Pseudo, Punctuation),
             ('goto label', 'goto argument')),
            (r'\b(timout)(\(|\b)', bygroups(Keyword.Pseudo, Punctuation),
             ('goto label', 'goto argument', 'goto argument')),
            (r'\b(loop_[gl][et])(\(|\b)', bygroups(Keyword.Pseudo, Punctuation),
             ('goto label', 'goto argument', 'goto argument', 'goto argument')),

            (r'\bprintk?s\b', Name.Builtin, 'prints opcode'),
            (r'\b(?:readscore|scoreline(?:_i)?)\b', Name.Builtin, 'Csound score opcode'),
            (r'\bpyl?run[it]?\b', Name.Builtin, 'Python opcode'),
            (r'\blua_(?:exec|opdef)\b', Name.Builtin, 'Lua opcode'),
            (r'\bp\d+\b', Name.Variable.Instance),
            (r'\b([A-Z_a-z]\w*)(?:(:)([A-Za-z]))?\b', name_callback)
        ],

        'instrument numbers and identifiers': [
            include('whitespace and macro uses'),
            (r'\d+|[A-Z_a-z]\w*', Name.Function),
            (r'[+,]', Punctuation),
            (r'\n', Text, '#pop')
        ],

        'after opcode keyword': [
            include('whitespace and macro uses'),
            (r'[A-Z_a-z]\w*', opcode_name_callback, ('#pop', 'opcode type signatures')),
            (r'\n', Text, '#pop')
        ],
        'opcode type signatures': [
            include('whitespace and macro uses'),

            # https://github.com/csound/csound/search?q=XIDENT+path%3AEngine+filename%3Acsound_orc.lex
            (r'0|[afijkKoOpPStV\[\]]+', Keyword.Type),

            (r',', Punctuation),
            (r'\n', Text, '#pop')
        ],

        'quoted string': [
            (r'"', String, '#pop'),
            (r'[^\\"$%)]+', String),
            include('macro uses'),
            include('escape sequences'),
            include('format specifiers'),
            (r'[\\$%)]', String)
        ],
        'braced string': [
            (r'\}\}', String, '#pop'),
            (r'(?:[^\\%)}]|\}(?!\}))+', String),
            include('escape sequences'),
            include('format specifiers'),
            (r'[\\%)]', String)
        ],
        'escape sequences': [
            # https://github.com/csound/csound/search?q=unquote_string+path%3AEngine+filename%3Acsound_orc_compile.c
            (r'\\(?:[\\abnrt"]|[0-7]{1,3})', String.Escape)
        ],
        # Format specifiers are highlighted in all strings, even though only
        #   fprintks        https://csound.com/docs/manual/fprintks.html
        #   fprints         https://csound.com/docs/manual/fprints.html
        #   printf/printf_i https://csound.com/docs/manual/printf.html
        #   printks         https://csound.com/docs/manual/printks.html
        #   prints          https://csound.com/docs/manual/prints.html
        #   sprintf         https://csound.com/docs/manual/sprintf.html
        #   sprintfk        https://csound.com/docs/manual/sprintfk.html
        # work with strings that contain format specifiers. In addition, these
        # opcodes’ handling of format specifiers is inconsistent:
        #   - fprintks, fprints, printks, and prints do accept %a and %A
        #     specifiers, but can’t accept %s specifiers.
        #   - printf, printf_i, sprintf, and sprintfk don’t accept %a and %A
        #     specifiers, but can accept %s specifiers.
        # See https://github.com/csound/csound/issues/747 for more information.
        'format specifiers': [
            (r'%[#0\- +]*\d*(?:\.\d+)?[diuoxXfFeEgGaAcs]', String.Interpol),
            (r'%%', String.Escape)
        ],

        'goto argument': [
            include('whitespace and macro uses'),
            (r',', Punctuation, '#pop'),
            include('partial statements')
        ],
        'goto label': [
            include('whitespace and macro uses'),
            (r'\w+', Name.Label, '#pop'),
            default('#pop')
        ],

        'prints opcode': [
            include('whitespace and macro uses'),
            (r'"', String, 'prints quoted string'),
            default('#pop')
        ],
        'prints quoted string': [
            (r'\\\\[aAbBnNrRtT]', String.Escape),
            (r'%[!nNrRtT]|[~^]{1,2}', String.Escape),
            include('quoted string')
        ],

        'Csound score opcode': [
            include('whitespace and macro uses'),
            (r'"', String, 'quoted string'),
            (r'\{\{', String, 'Csound score'),
            (r'\n', Text, '#pop')
        ],
        'Csound score': [
            (r'\}\}', String, '#pop'),
            (r'([^}]+)|\}(?!\})', using(CsoundScoreLexer))
        ],

        'Python opcode': [
            include('whitespace and macro uses'),
            (r'"', String, 'quoted string'),
            (r'\{\{', String, 'Python'),
            (r'\n', Text, '#pop')
        ],
        'Python': [
            (r'\}\}', String, '#pop'),
            (r'([^}]+)|\}(?!\})', using(PythonLexer))
        ],

        'Lua opcode': [
            include('whitespace and macro uses'),
            (r'"', String, 'quoted string'),
            (r'\{\{', String, 'Lua'),
            (r'\n', Text, '#pop')
        ],
        'Lua': [
            (r'\}\}', String, '#pop'),
            (r'([^}]+)|\}(?!\})', using(LuaLexer))
        ]
    }